diff options
author | David S. Miller <davem@davemloft.net> | 2018-03-22 15:29:05 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-03-22 15:29:05 -0400 |
commit | 6686c459e1449a3ee5f3fd313b0a559ace7a700e (patch) | |
tree | d8e84a2ba3f2621b5d67a1d96d6c3a2637ca3d44 /drivers/net/ethernet | |
parent | 1a2e10a24039147047cc21759ec57314c333c1ac (diff) | |
parent | 2bfbd35d8ecd97a4a7f1db1754908b54542fa7aa (diff) | |
download | linux-rt-6686c459e1449a3ee5f3fd313b0a559ace7a700e.tar.gz |
Merge branch 'hns3-VF-reset'
Salil Mehta says:
====================
Add support of VF Reset to HNS3 VF driver
This patch-set adds the support of VF reset to the existing VF driver.
VF Reset can be triggered due to TX watchdog firing as a result of TX
data-path not working. VF reset could also be a result of some internal
configuration changes if that requires reset, or as a result of the
PF/Core/Global/IMP(Integrated Management Processor) reset happened in
the PF.
Summary of Patches:
* Watchdog timer trigger chnages are present in Patch 1.
* Reset Service Task and related Event handling is present in Patches {2,3}
* Changes to send reset request to PF, reset stack and re-initialization
of the hclge device is present in Patches {4,5,6}
* Changes related to ARQ (Asynchronous Receive Queue) and its event handling
are present in Patches {7,8}
* Changes required in PF to handle the VF Reset request and actually perform
hardware VF reset is there in Patch 9.
NOTE: This patch depends upon "[PATCH net-next 00/11] fix some bugs for HNS3 driver"
Link: https://lkml.org/lkml/2018/3/21/72
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet')
11 files changed, 534 insertions, 71 deletions
diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index e6e1d221b5c8..519e2bd6aa60 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -11,6 +11,7 @@ enum HCLGE_MBX_OPCODE { HCLGE_MBX_RESET = 0x01, /* (VF -> PF) assert reset */ + HCLGE_MBX_ASSERTING_RESET, /* (PF -> VF) PF is asserting reset*/ HCLGE_MBX_SET_UNICAST, /* (VF -> PF) set UC addr */ HCLGE_MBX_SET_MULTICAST, /* (VF -> PF) set MC addr */ HCLGE_MBX_SET_VLAN, /* (VF -> PF) set VLAN */ @@ -85,6 +86,21 @@ struct hclge_mbx_pf_to_vf_cmd { u16 msg[8]; }; +/* used by VF to store the received Async responses from PF */ +struct hclgevf_mbx_arq_ring { +#define HCLGE_MBX_MAX_ARQ_MSG_SIZE 8 +#define HCLGE_MBX_MAX_ARQ_MSG_NUM 1024 + struct hclgevf_dev *hdev; + u32 head; + u32 tail; + u32 count; + u16 msg_q[HCLGE_MBX_MAX_ARQ_MSG_NUM][HCLGE_MBX_MAX_ARQ_MSG_SIZE]; +}; + #define hclge_mbx_ring_ptr_move_crq(crq) \ (crq->next_to_use = (crq->next_to_use + 1) % crq->desc_num) +#define hclge_mbx_tail_ptr_move_arq(arq) \ + (arq.tail = (arq.tail + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE) +#define hclge_mbx_head_ptr_move_arq(arq) \ + (arq.head = (arq.head + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE) #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 9daa88d9041e..37ec1b3286c6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -118,6 +118,8 @@ enum hnae3_reset_notify_type { }; enum hnae3_reset_type { + HNAE3_VF_RESET, + HNAE3_VF_FULL_RESET, HNAE3_FUNC_RESET, HNAE3_CORE_RESET, HNAE3_GLOBAL_RESET, @@ -400,8 +402,7 @@ struct hnae3_ae_ops { int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid, u16 vlan, u8 qos, __be16 proto); int (*enable_hw_strip_rxvtag)(struct hnae3_handle *handle, bool enable); - void (*reset_event)(struct hnae3_handle *handle, - enum hnae3_reset_type reset); + void (*reset_event)(struct hnae3_handle *handle); void (*get_channels)(struct hnae3_handle *handle, struct ethtool_channels *ch); void (*get_tqps_and_rss_info)(struct hnae3_handle *h, @@ -495,6 +496,9 @@ struct hnae3_handle { struct hnae3_ae_algo *ae_algo; /* the class who provides this handle */ u64 flags; /* Indicate the capabilities for this handle*/ + unsigned long last_reset_time; + enum hnae3_reset_type reset_level; + union { struct net_device *netdev; /* first member */ struct hnae3_knic_private_info kinfo; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 0b4a676999ca..40a3eb70629e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -320,7 +320,7 @@ static int hns3_nic_net_open(struct net_device *netdev) return ret; } - priv->last_reset_time = jiffies; + priv->ae_handle->last_reset_time = jiffies; return 0; } @@ -1543,7 +1543,6 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev) static void hns3_nic_net_timeout(struct net_device *ndev) { struct hns3_nic_priv *priv = netdev_priv(ndev); - unsigned long last_reset_time = priv->last_reset_time; struct hnae3_handle *h = priv->ae_handle; if (!hns3_get_tx_timeo_queue_info(ndev)) @@ -1551,24 +1550,12 @@ static void hns3_nic_net_timeout(struct net_device *ndev) priv->tx_timeout_count++; - /* This timeout is far away enough from last timeout, - * if timeout again,set the reset type to PF reset - */ - if (time_after(jiffies, (last_reset_time + 20 * HZ))) - priv->reset_level = HNAE3_FUNC_RESET; - - /* Don't do any new action before the next timeout */ - else if (time_before(jiffies, (last_reset_time + ndev->watchdog_timeo))) + if (time_before(jiffies, (h->last_reset_time + ndev->watchdog_timeo))) return; - priv->last_reset_time = jiffies; - + /* request the reset */ if (h->ae_algo->ops->reset_event) - h->ae_algo->ops->reset_event(h, priv->reset_level); - - priv->reset_level++; - if (priv->reset_level > HNAE3_GLOBAL_RESET) - priv->reset_level = HNAE3_GLOBAL_RESET; + h->ae_algo->ops->reset_event(h); } static const struct net_device_ops hns3_nic_netdev_ops = { @@ -3122,8 +3109,8 @@ static int hns3_client_init(struct hnae3_handle *handle) priv->dev = &pdev->dev; priv->netdev = netdev; priv->ae_handle = handle; - priv->last_reset_time = jiffies; - priv->reset_level = HNAE3_FUNC_RESET; + priv->ae_handle->reset_level = HNAE3_NONE_RESET; + priv->ae_handle->last_reset_time = jiffies; priv->tx_timeout_count = 0; handle->kinfo.netdev = netdev; @@ -3355,7 +3342,6 @@ static int hns3_reset_notify_down_enet(struct hnae3_handle *handle) static int hns3_reset_notify_up_enet(struct hnae3_handle *handle) { struct hnae3_knic_private_info *kinfo = &handle->kinfo; - struct hns3_nic_priv *priv = netdev_priv(kinfo->netdev); int ret = 0; if (netif_running(kinfo->netdev)) { @@ -3365,8 +3351,7 @@ static int hns3_reset_notify_up_enet(struct hnae3_handle *handle) "hns net up fail, ret=%d!\n", ret); return ret; } - - priv->last_reset_time = jiffies; + handle->last_reset_time = jiffies; } return ret; @@ -3378,7 +3363,6 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle) struct hns3_nic_priv *priv = netdev_priv(netdev); int ret; - priv->reset_level = 1; hns3_init_mac_addr(netdev); hns3_nic_set_rx_mode(netdev); hns3_recover_hw_addr(netdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 39daa01f08d5..9e4cfbbf8dcd 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -532,8 +532,6 @@ struct hns3_nic_priv { /* The most recently read link state */ int link; u64 tx_timeout_count; - enum hnae3_reset_type reset_level; - unsigned long last_reset_time; unsigned long state; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 31e90b588e92..bede4117bad9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2749,7 +2749,7 @@ static int hclge_reset_wait(struct hclge_dev *hdev) return 0; } -static int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id) +int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id) { struct hclge_desc desc; struct hclge_reset_cmd *req = (struct hclge_reset_cmd *)desc.data; @@ -2845,27 +2845,31 @@ static void hclge_reset(struct hclge_dev *hdev) hclge_notify_client(hdev, HNAE3_UP_CLIENT); } -static void hclge_reset_event(struct hnae3_handle *handle, - enum hnae3_reset_type reset) +static void hclge_reset_event(struct hnae3_handle *handle) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - dev_info(&hdev->pdev->dev, - "Receive reset event , reset_type is %d", reset); + /* check if this is a new reset request and we are not here just because + * last reset attempt did not succeed and watchdog hit us again. We will + * know this if last reset request did not occur very recently (watchdog + * timer = 5*HZ, let us check after sufficiently large time, say 4*5*Hz) + * In case of new request we reset the "reset level" to PF reset. + */ + if (time_after(jiffies, (handle->last_reset_time + 4 * 5 * HZ))) + handle->reset_level = HNAE3_FUNC_RESET; - switch (reset) { - case HNAE3_FUNC_RESET: - case HNAE3_CORE_RESET: - case HNAE3_GLOBAL_RESET: - /* request reset & schedule reset task */ - set_bit(reset, &hdev->reset_request); - hclge_reset_task_schedule(hdev); - break; - default: - dev_warn(&hdev->pdev->dev, "Unsupported reset event:%d", reset); - break; - } + dev_info(&hdev->pdev->dev, "received reset event , reset type is %d", + handle->reset_level); + + /* request reset & schedule reset task */ + set_bit(handle->reset_level, &hdev->reset_request); + hclge_reset_task_schedule(hdev); + + if (handle->reset_level < HNAE3_GLOBAL_RESET) + handle->reset_level++; + + handle->last_reset_time = jiffies; } static void hclge_reset_subtask(struct hclge_dev *hdev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 8c14d10c88cb..0f4157e71282 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -657,4 +657,5 @@ void hclge_mbx_handler(struct hclge_dev *hdev); void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id); void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id); int hclge_cfg_flowctrl(struct hclge_dev *hdev); +int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 949da0c993cf..39013334a613 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -79,6 +79,18 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len, return status; } +int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport) +{ + u8 msg_data[2]; + u8 dest_vfid; + + dest_vfid = (u8)vport->vport_id; + + /* send this requested info to VF */ + return hclge_send_mbx_msg(vport, msg_data, sizeof(u8), + HCLGE_MBX_ASSERTING_RESET, dest_vfid); +} + static void hclge_free_vector_ring_chain(struct hnae3_ring_chain_node *head) { struct hnae3_ring_chain_node *chain_tmp, *chain; @@ -339,6 +351,33 @@ static void hclge_mbx_reset_vf_queue(struct hclge_vport *vport, hclge_gen_resp_to_vf(vport, mbx_req, 0, NULL, 0); } +static void hclge_reset_vf(struct hclge_vport *vport, + struct hclge_mbx_vf_to_pf_cmd *mbx_req) +{ + struct hclge_dev *hdev = vport->back; + int ret; + + dev_warn(&hdev->pdev->dev, "PF received VF reset request from VF %d!", + mbx_req->mbx_src_vfid); + + /* Acknowledge VF that PF is now about to assert the reset for the VF. + * On receiving this message VF will get into pending state and will + * start polling for the hardware reset completion status. + */ + ret = hclge_inform_reset_assert_to_vf(vport); + if (ret) { + dev_err(&hdev->pdev->dev, + "PF fail(%d) to inform VF(%d)of reset, reset failed!\n", + ret, vport->vport_id); + return; + } + + dev_warn(&hdev->pdev->dev, "PF is now resetting VF %d.\n", + mbx_req->mbx_src_vfid); + /* reset this virtual function */ + hclge_func_reset_cmd(hdev, mbx_req->mbx_src_vfid); +} + void hclge_mbx_handler(struct hclge_dev *hdev) { struct hclge_cmq_ring *crq = &hdev->hw.cmq.crq; @@ -416,6 +455,9 @@ void hclge_mbx_handler(struct hclge_dev *hdev) case HCLGE_MBX_QUEUE_RESET: hclge_mbx_reset_vf_queue(vport, req); break; + case HCLGE_MBX_RESET: + hclge_reset_vf(vport, req); + break; default: dev_err(&hdev->pdev->dev, "un-supported mailbox message, code = %d\n", diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c index 85985e731311..1bbfe131b596 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c @@ -315,6 +315,12 @@ int hclgevf_cmd_init(struct hclgevf_dev *hdev) goto err_csq; } + /* initialize the pointers of async rx queue of mailbox */ + hdev->arq.hdev = hdev; + hdev->arq.head = 0; + hdev->arq.tail = 0; + hdev->arq.count = 0; + /* get firmware version */ ret = hclgevf_cmd_query_firmware_version(&hdev->hw, &version); if (ret) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 906dfa3d0fc6..2b8426412cc9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2,6 +2,7 @@ // Copyright (c) 2016-2017 Hisilicon Limited. #include <linux/etherdevice.h> +#include <net/rtnetlink.h> #include "hclgevf_cmd.h" #include "hclgevf_main.h" #include "hclge_mbx.h" @@ -9,6 +10,8 @@ #define HCLGEVF_NAME "hclgevf" +static int hclgevf_init_hdev(struct hclgevf_dev *hdev); +static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev); static struct hnae3_ae_algo ae_algovf; static const struct pci_device_id ae_algovf_pci_tbl[] = { @@ -208,6 +211,12 @@ static int hclgevf_alloc_tqps(struct hclgevf_dev *hdev) struct hclgevf_tqp *tqp; int i; + /* if this is on going reset then we need to re-allocate the TPQs + * since we cannot assume we would get same number of TPQs back from PF + */ + if (hclgevf_dev_ongoing_reset(hdev)) + devm_kfree(&hdev->pdev->dev, hdev->htqp); + hdev->htqp = devm_kcalloc(&hdev->pdev->dev, hdev->num_tqps, sizeof(struct hclgevf_tqp), GFP_KERNEL); if (!hdev->htqp) @@ -251,6 +260,12 @@ static int hclgevf_knic_setup(struct hclgevf_dev *hdev) new_tqps = kinfo->rss_size * kinfo->num_tc; kinfo->num_tqps = min(new_tqps, hdev->num_tqps); + /* if this is on going reset then we need to re-allocate the hnae queues + * as well since number of TPQs from PF might have changed. + */ + if (hclgevf_dev_ongoing_reset(hdev)) + devm_kfree(&hdev->pdev->dev, kinfo->tqp); + kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, kinfo->num_tqps, sizeof(struct hnae3_queue *), GFP_KERNEL); if (!kinfo->tqp) @@ -832,6 +847,138 @@ static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id) 2, true, NULL, 0); } +static int hclgevf_notify_client(struct hclgevf_dev *hdev, + enum hnae3_reset_notify_type type) +{ + struct hnae3_client *client = hdev->nic_client; + struct hnae3_handle *handle = &hdev->nic; + + if (!client->ops->reset_notify) + return -EOPNOTSUPP; + + return client->ops->reset_notify(handle, type); +} + +static int hclgevf_reset_wait(struct hclgevf_dev *hdev) +{ +#define HCLGEVF_RESET_WAIT_MS 500 +#define HCLGEVF_RESET_WAIT_CNT 20 + u32 val, cnt = 0; + + /* wait to check the hardware reset completion status */ + val = hclgevf_read_dev(&hdev->hw, HCLGEVF_FUN_RST_ING); + while (hnae_get_bit(val, HCLGEVF_FUN_RST_ING_B) && + (cnt < HCLGEVF_RESET_WAIT_CNT)) { + msleep(HCLGEVF_RESET_WAIT_MS); + val = hclgevf_read_dev(&hdev->hw, HCLGEVF_FUN_RST_ING); + cnt++; + } + + /* hardware completion status should be available by this time */ + if (cnt >= HCLGEVF_RESET_WAIT_CNT) { + dev_warn(&hdev->pdev->dev, + "could'nt get reset done status from h/w, timeout!\n"); + return -EBUSY; + } + + /* we will wait a bit more to let reset of the stack to complete. This + * might happen in case reset assertion was made by PF. Yes, this also + * means we might end up waiting bit more even for VF reset. + */ + msleep(5000); + + return 0; +} + +static int hclgevf_reset_stack(struct hclgevf_dev *hdev) +{ + int ret; + + /* uninitialize the nic client */ + hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT); + + /* re-initialize the hclge device */ + ret = hclgevf_init_hdev(hdev); + if (ret) { + dev_err(&hdev->pdev->dev, + "hclge device re-init failed, VF is disabled!\n"); + return ret; + } + + /* bring up the nic client again */ + hclgevf_notify_client(hdev, HNAE3_INIT_CLIENT); + + return 0; +} + +static int hclgevf_reset(struct hclgevf_dev *hdev) +{ + int ret; + + rtnl_lock(); + + /* bring down the nic to stop any ongoing TX/RX */ + hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT); + + /* check if VF could successfully fetch the hardware reset completion + * status from the hardware + */ + ret = hclgevf_reset_wait(hdev); + if (ret) { + /* can't do much in this situation, will disable VF */ + dev_err(&hdev->pdev->dev, + "VF failed(=%d) to fetch H/W reset completion status\n", + ret); + + dev_warn(&hdev->pdev->dev, "VF reset failed, disabling VF!\n"); + hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT); + + rtnl_unlock(); + return ret; + } + + /* now, re-initialize the nic client and ae device*/ + ret = hclgevf_reset_stack(hdev); + if (ret) + dev_err(&hdev->pdev->dev, "failed to reset VF stack\n"); + + /* bring up the nic to enable TX/RX again */ + hclgevf_notify_client(hdev, HNAE3_UP_CLIENT); + + rtnl_unlock(); + + return ret; +} + +static int hclgevf_do_reset(struct hclgevf_dev *hdev) +{ + int status; + u8 respmsg; + + status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_RESET, 0, NULL, + 0, false, &respmsg, sizeof(u8)); + if (status) + dev_err(&hdev->pdev->dev, + "VF reset request to PF failed(=%d)\n", status); + + return status; +} + +static void hclgevf_reset_event(struct hnae3_handle *handle) +{ + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + + dev_info(&hdev->pdev->dev, "received reset request from VF enet\n"); + + handle->reset_level = HNAE3_VF_RESET; + + /* reset of this VF requested */ + set_bit(HCLGEVF_RESET_REQUESTED, &hdev->reset_state); + hclgevf_reset_task_schedule(hdev); + + handle->last_reset_time = jiffies; +} + static u32 hclgevf_get_fw_version(struct hnae3_handle *handle) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); @@ -854,10 +1001,22 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev) hdev->num_msi_used += 1; } -static void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev) +void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev) { - if (!test_and_set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state)) + if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state) && + !test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) { + set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state); + schedule_work(&hdev->rst_service_task); + } +} + +void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev) +{ + if (!test_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state) && + !test_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state)) { + set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state); schedule_work(&hdev->mbx_service_task); + } } static void hclgevf_task_schedule(struct hclgevf_dev *hdev) @@ -867,6 +1026,16 @@ static void hclgevf_task_schedule(struct hclgevf_dev *hdev) schedule_work(&hdev->service_task); } +static void hclgevf_deferred_task_schedule(struct hclgevf_dev *hdev) +{ + /* if we have any pending mailbox event then schedule the mbx task */ + if (hdev->mbx_event_pending) + hclgevf_mbx_task_schedule(hdev); + + if (test_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state)) + hclgevf_reset_task_schedule(hdev); +} + static void hclgevf_service_timer(struct timer_list *t) { struct hclgevf_dev *hdev = from_timer(hdev, t, service_timer); @@ -876,6 +1045,75 @@ static void hclgevf_service_timer(struct timer_list *t) hclgevf_task_schedule(hdev); } +static void hclgevf_reset_service_task(struct work_struct *work) +{ + struct hclgevf_dev *hdev = + container_of(work, struct hclgevf_dev, rst_service_task); + int ret; + + if (test_and_set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) + return; + + clear_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state); + + if (test_and_clear_bit(HCLGEVF_RESET_PENDING, + &hdev->reset_state)) { + /* PF has initmated that it is about to reset the hardware. + * We now have to poll & check if harware has actually completed + * the reset sequence. On hardware reset completion, VF needs to + * reset the client and ae device. + */ + hdev->reset_attempts = 0; + + ret = hclgevf_reset(hdev); + if (ret) + dev_err(&hdev->pdev->dev, "VF stack reset failed.\n"); + } else if (test_and_clear_bit(HCLGEVF_RESET_REQUESTED, + &hdev->reset_state)) { + /* we could be here when either of below happens: + * 1. reset was initiated due to watchdog timeout due to + * a. IMP was earlier reset and our TX got choked down and + * which resulted in watchdog reacting and inducing VF + * reset. This also means our cmdq would be unreliable. + * b. problem in TX due to other lower layer(example link + * layer not functioning properly etc.) + * 2. VF reset might have been initiated due to some config + * change. + * + * NOTE: Theres no clear way to detect above cases than to react + * to the response of PF for this reset request. PF will ack the + * 1b and 2. cases but we will not get any intimation about 1a + * from PF as cmdq would be in unreliable state i.e. mailbox + * communication between PF and VF would be broken. + */ + + /* if we are never geting into pending state it means either: + * 1. PF is not receiving our request which could be due to IMP + * reset + * 2. PF is screwed + * We cannot do much for 2. but to check first we can try reset + * our PCIe + stack and see if it alleviates the problem. + */ + if (hdev->reset_attempts > 3) { + /* prepare for full reset of stack + pcie interface */ + hdev->nic.reset_level = HNAE3_VF_FULL_RESET; + + /* "defer" schedule the reset task again */ + set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state); + } else { + hdev->reset_attempts++; + + /* request PF for resetting this VF via mailbox */ + ret = hclgevf_do_reset(hdev); + if (ret) + dev_warn(&hdev->pdev->dev, + "VF rst fail, stack will call\n"); + } + } + + clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); +} + static void hclgevf_mailbox_service_task(struct work_struct *work) { struct hclgevf_dev *hdev; @@ -887,7 +1125,7 @@ static void hclgevf_mailbox_service_task(struct work_struct *work) clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state); - hclgevf_mbx_handler(hdev); + hclgevf_mbx_async_handler(hdev); clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state); } @@ -903,6 +1141,8 @@ static void hclgevf_service_task(struct work_struct *work) */ hclgevf_request_link_info(hdev); + hclgevf_deferred_task_schedule(hdev); + clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state); } @@ -945,8 +1185,7 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data) if (!hclgevf_check_event_cause(hdev, &clearval)) goto skip_sched; - /* schedule the VF mailbox service task, if not already scheduled */ - hclgevf_mbx_task_schedule(hdev); + hclgevf_mbx_handler(hdev); hclgevf_clear_event_cause(hdev, clearval); @@ -968,6 +1207,22 @@ static int hclgevf_configure(struct hclgevf_dev *hdev) return hclgevf_get_tc_info(hdev); } +static int hclgevf_alloc_hdev(struct hnae3_ae_dev *ae_dev) +{ + struct pci_dev *pdev = ae_dev->pdev; + struct hclgevf_dev *hdev = ae_dev->priv; + + hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL); + if (!hdev) + return -ENOMEM; + + hdev->pdev = pdev; + hdev->ae_dev = ae_dev; + ae_dev->priv = hdev; + + return 0; +} + static int hclgevf_init_roce_base_info(struct hclgevf_dev *hdev) { struct hnae3_handle *roce = &hdev->roce; @@ -1073,6 +1328,10 @@ static void hclgevf_ae_stop(struct hnae3_handle *handle) static void hclgevf_state_init(struct hclgevf_dev *hdev) { + /* if this is on going reset then skip this initialization */ + if (hclgevf_dev_ongoing_reset(hdev)) + return; + /* setup tasks for the MBX */ INIT_WORK(&hdev->mbx_service_task, hclgevf_mailbox_service_task); clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state); @@ -1084,6 +1343,8 @@ static void hclgevf_state_init(struct hclgevf_dev *hdev) INIT_WORK(&hdev->service_task, hclgevf_service_task); clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state); + INIT_WORK(&hdev->rst_service_task, hclgevf_reset_service_task); + mutex_init(&hdev->mbx_resp.mbx_mutex); /* bring the device down */ @@ -1100,6 +1361,8 @@ static void hclgevf_state_uninit(struct hclgevf_dev *hdev) cancel_work_sync(&hdev->service_task); if (hdev->mbx_service_task.func) cancel_work_sync(&hdev->mbx_service_task); + if (hdev->rst_service_task.func) + cancel_work_sync(&hdev->rst_service_task); mutex_destroy(&hdev->mbx_resp.mbx_mutex); } @@ -1110,6 +1373,10 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev) int vectors; int i; + /* if this is on going reset then skip this initialization */ + if (hclgevf_dev_ongoing_reset(hdev)) + return 0; + hdev->num_msi = HCLGEVF_MAX_VF_VECTOR_NUM; vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi, @@ -1160,6 +1427,10 @@ static int hclgevf_misc_irq_init(struct hclgevf_dev *hdev) { int ret = 0; + /* if this is on going reset then skip this initialization */ + if (hclgevf_dev_ongoing_reset(hdev)) + return 0; + hclgevf_get_misc_vector(hdev); ret = request_irq(hdev->misc_vector.vector_irq, hclgevf_misc_irq_handle, @@ -1270,6 +1541,14 @@ static int hclgevf_pci_init(struct hclgevf_dev *hdev) struct hclgevf_hw *hw; int ret; + /* check if we need to skip initialization of pci. This will happen if + * device is undergoing VF reset. Otherwise, we would need to + * re-initialize pci interface again i.e. when device is not going + * through *any* reset or actually undergoing full reset. + */ + if (hclgevf_dev_ongoing_reset(hdev)) + return 0; + ret = pci_enable_device(pdev); if (ret) { dev_err(&pdev->dev, "failed to enable PCI device\n"); @@ -1321,19 +1600,16 @@ static void hclgevf_pci_uninit(struct hclgevf_dev *hdev) pci_set_drvdata(pdev, NULL); } -static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev) +static int hclgevf_init_hdev(struct hclgevf_dev *hdev) { - struct pci_dev *pdev = ae_dev->pdev; - struct hclgevf_dev *hdev; + struct pci_dev *pdev = hdev->pdev; int ret; - hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL); - if (!hdev) - return -ENOMEM; - - hdev->pdev = pdev; - hdev->ae_dev = ae_dev; - ae_dev->priv = hdev; + /* check if device is on-going full reset(i.e. pcie as well) */ + if (hclgevf_dev_ongoing_full_reset(hdev)) { + dev_warn(&pdev->dev, "device is going full reset\n"); + hclgevf_uninit_hdev(hdev); + } ret = hclgevf_pci_init(hdev); if (ret) { @@ -1418,15 +1694,38 @@ err_irq_init: return ret; } -static void hclgevf_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) +static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev) { - struct hclgevf_dev *hdev = ae_dev->priv; - hclgevf_cmd_uninit(hdev); hclgevf_misc_irq_uninit(hdev); hclgevf_state_uninit(hdev); hclgevf_uninit_msi(hdev); hclgevf_pci_uninit(hdev); +} + +static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev) +{ + struct pci_dev *pdev = ae_dev->pdev; + int ret; + + ret = hclgevf_alloc_hdev(ae_dev); + if (ret) { + dev_err(&pdev->dev, "hclge device allocation failed\n"); + return ret; + } + + ret = hclgevf_init_hdev(ae_dev->priv); + if (ret) + dev_err(&pdev->dev, "hclge device initialization failed\n"); + + return ret; +} + +static void hclgevf_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) +{ + struct hclgevf_dev *hdev = ae_dev->priv; + + hclgevf_uninit_hdev(hdev); ae_dev->priv = NULL; } @@ -1526,6 +1825,7 @@ static const struct hnae3_ae_ops hclgevf_ops = { .get_tc_size = hclgevf_get_tc_size, .get_fw_version = hclgevf_get_fw_version, .set_vlan_filter = hclgevf_set_vlan_filter, + .reset_event = hclgevf_reset_event, .get_channels = hclgevf_get_channels, .get_tqps_and_rss_info = hclgevf_get_tqps_and_rss_info, .get_status = hclgevf_get_status, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 0eaea063e7c5..a477a7c36bbd 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -34,6 +34,9 @@ #define HCLGEVF_VECTOR0_RX_CMDQ_INT_B 1 #define HCLGEVF_TQP_RESET_TRY_TIMES 10 +/* Reset related Registers */ +#define HCLGEVF_FUN_RST_ING 0x20C00 +#define HCLGEVF_FUN_RST_ING_B 0 #define HCLGEVF_RSS_IND_TBL_SIZE 512 #define HCLGEVF_RSS_SET_BITMAP_MSK 0xffff @@ -52,6 +55,8 @@ enum hclgevf_states { HCLGEVF_STATE_DISABLED, /* task states */ HCLGEVF_STATE_SERVICE_SCHED, + HCLGEVF_STATE_RST_SERVICE_SCHED, + HCLGEVF_STATE_RST_HANDLING, HCLGEVF_STATE_MBX_SERVICE_SCHED, HCLGEVF_STATE_MBX_HANDLING, }; @@ -122,6 +127,11 @@ struct hclgevf_dev { struct hclgevf_rss_cfg rss_cfg; unsigned long state; +#define HCLGEVF_RESET_REQUESTED 0 +#define HCLGEVF_RESET_PENDING 1 + unsigned long reset_state; /* requested, pending */ + u32 reset_attempts; + u32 fw_version; u16 num_tqps; /* num task queue pairs of this PF */ @@ -142,10 +152,13 @@ struct hclgevf_dev { int *vector_irq; bool accept_mta_mc; /* whether to accept mta filter multicast */ + bool mbx_event_pending; struct hclgevf_mbx_resp_status mbx_resp; /* mailbox response */ + struct hclgevf_mbx_arq_ring arq; /* mailbox async rx queue */ struct timer_list service_timer; struct work_struct service_task; + struct work_struct rst_service_task; struct work_struct mbx_service_task; struct hclgevf_tqp *htqp; @@ -158,11 +171,29 @@ struct hclgevf_dev { u32 flag; }; +static inline bool hclgevf_dev_ongoing_reset(struct hclgevf_dev *hdev) +{ + return (hdev && + (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) && + (hdev->nic.reset_level == HNAE3_VF_RESET)); +} + +static inline bool hclgevf_dev_ongoing_full_reset(struct hclgevf_dev *hdev) +{ + return (hdev && + (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) && + (hdev->nic.reset_level == HNAE3_VF_FULL_RESET)); +} + int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, u16 code, u16 subcode, const u8 *msg_data, u8 msg_len, bool need_resp, u8 *resp_data, u16 resp_len); void hclgevf_mbx_handler(struct hclgevf_dev *hdev); +void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev); + void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state); void hclgevf_update_speed_duplex(struct hclgevf_dev *hdev, u32 speed, u8 duplex); +void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev); +void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index a63ed3aa2c00..a28618428338 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -132,9 +132,8 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) struct hclge_mbx_pf_to_vf_cmd *req; struct hclgevf_cmq_ring *crq; struct hclgevf_desc *desc; - u16 link_status, flag; - u32 speed; - u8 duplex; + u16 *msg_q; + u16 flag; u8 *temp; int i; @@ -146,6 +145,12 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) desc = &crq->desc[crq->next_to_use]; req = (struct hclge_mbx_pf_to_vf_cmd *)desc->data; + /* synchronous messages are time critical and need preferential + * treatment. Therefore, we need to acknowledge all the sync + * responses as quickly as possible so that waiting tasks do not + * timeout and simultaneously queue the async messages for later + * prcessing in context of mailbox task i.e. the slow path. + */ switch (req->msg[0]) { case HCLGE_MBX_PF_VF_RESP: if (resp->received_resp) @@ -165,13 +170,31 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) } break; case HCLGE_MBX_LINK_STAT_CHANGE: - link_status = le16_to_cpu(req->msg[1]); - memcpy(&speed, &req->msg[2], sizeof(speed)); - duplex = (u8)le16_to_cpu(req->msg[4]); + case HCLGE_MBX_ASSERTING_RESET: + /* set this mbx event as pending. This is required as we + * might loose interrupt event when mbx task is busy + * handling. This shall be cleared when mbx task just + * enters handling state. + */ + hdev->mbx_event_pending = true; - /* update upper layer with new link link status */ - hclgevf_update_link_status(hdev, link_status); - hclgevf_update_speed_duplex(hdev, speed, duplex); + /* we will drop the async msg if we find ARQ as full + * and continue with next message + */ + if (hdev->arq.count >= HCLGE_MBX_MAX_ARQ_MSG_NUM) { + dev_warn(&hdev->pdev->dev, + "Async Q full, dropping msg(%d)\n", + req->msg[1]); + break; + } + + /* tail the async message in arq */ + msg_q = hdev->arq.msg_q[hdev->arq.tail]; + memcpy(&msg_q[0], req->msg, HCLGE_MBX_MAX_ARQ_MSG_SIZE); + hclge_mbx_tail_ptr_move_arq(hdev->arq); + hdev->arq.count++; + + hclgevf_mbx_task_schedule(hdev); break; default: @@ -189,3 +212,57 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) hclgevf_write_dev(&hdev->hw, HCLGEVF_NIC_CRQ_HEAD_REG, crq->next_to_use); } + +void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev) +{ + u16 link_status; + u16 *msg_q; + u8 duplex; + u32 speed; + u32 tail; + + /* we can safely clear it now as we are at start of the async message + * processing + */ + hdev->mbx_event_pending = false; + + tail = hdev->arq.tail; + + /* process all the async queue messages */ + while (tail != hdev->arq.head) { + msg_q = hdev->arq.msg_q[hdev->arq.head]; + + switch (msg_q[0]) { + case HCLGE_MBX_LINK_STAT_CHANGE: + link_status = le16_to_cpu(msg_q[1]); + memcpy(&speed, &msg_q[2], sizeof(speed)); + duplex = (u8)le16_to_cpu(msg_q[4]); + + /* update upper layer with new link link status */ + hclgevf_update_link_status(hdev, link_status); + hclgevf_update_speed_duplex(hdev, speed, duplex); + + break; + case HCLGE_MBX_ASSERTING_RESET: + /* PF has asserted reset hence VF should go in pending + * state and poll for the hardware reset status till it + * has been completely reset. After this stack should + * eventually be re-initialized. + */ + hdev->nic.reset_level = HNAE3_VF_RESET; + set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state); + hclgevf_reset_task_schedule(hdev); + + break; + default: + dev_err(&hdev->pdev->dev, + "fetched unsupported(%d) message from arq\n", + msg_q[0]); + break; + } + + hclge_mbx_head_ptr_move_arq(hdev->arq); + hdev->arq.count--; + msg_q = hdev->arq.msg_q[hdev->arq.head]; + } +} |