diff options
Diffstat (limited to 'drivers/infiniband/hw/hfi1')
25 files changed, 2631 insertions, 459 deletions
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 0405d26d0833..2e89ec10efed 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -22,9 +22,13 @@ hfi1-y := \ init.o \ intr.o \ iowait.o \ + ipoib_main.o \ + ipoib_rx.o \ + ipoib_tx.o \ mad.o \ mmu_rb.o \ msix.o \ + netdev_rx.o \ opfn.o \ pcie.o \ pio.o \ diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 1aeea5d65c01..2a91b8d95e12 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -64,6 +64,7 @@ struct hfi1_affinity_node_list node_affinity = { static const char * const irq_type_names[] = { "SDMA", "RCVCTXT", + "NETDEVCTXT", "GENERAL", "OTHER", }; @@ -915,6 +916,11 @@ static int get_irq_affinity(struct hfi1_devdata *dd, set = &entry->rcv_intr; scnprintf(extra, 64, "ctxt %u", rcd->ctxt); break; + case IRQ_NETDEVCTXT: + rcd = (struct hfi1_ctxtdata *)msix->arg; + set = &entry->def_intr; + scnprintf(extra, 64, "ctxt %u", rcd->ctxt); + break; default: dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type); return -EINVAL; @@ -987,6 +993,10 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd, if (rcd->ctxt != HFI1_CTRL_CTXT) set = &entry->rcv_intr; break; + case IRQ_NETDEVCTXT: + rcd = (struct hfi1_ctxtdata *)msix->arg; + set = &entry->def_intr; + break; default: mutex_unlock(&node_affinity.lock); return; diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index 6a7e6ea4e426..f94ed5d7c7a3 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -52,6 +52,7 @@ enum irq_type { IRQ_SDMA, IRQ_RCVCTXT, + IRQ_NETDEVCTXT, IRQ_GENERAL, IRQ_OTHER }; diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index e0b1238d31df..15f9c635f292 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -66,10 +66,7 @@ #include "affinity.h" #include "debugfs.h" #include "fault.h" - -uint kdeth_qp; -module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO); -MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix"); +#include "netdev.h" uint num_vls = HFI1_MAX_VLS_SUPPORTED; module_param(num_vls, uint, S_IRUGO); @@ -128,13 +125,15 @@ struct flag_table { /* * RSM instance allocation - * 0 - Verbs - * 1 - User Fecn Handling - * 2 - Vnic + * 0 - User Fecn Handling + * 1 - Vnic + * 2 - AIP + * 3 - Verbs */ -#define RSM_INS_VERBS 0 -#define RSM_INS_FECN 1 -#define RSM_INS_VNIC 2 +#define RSM_INS_FECN 0 +#define RSM_INS_VNIC 1 +#define RSM_INS_AIP 2 +#define RSM_INS_VERBS 3 /* Bit offset into the GUID which carries HFI id information */ #define GUID_HFI_INDEX_SHIFT 39 @@ -175,6 +174,25 @@ struct flag_table { /* QPN[m+n:1] QW 1, OFFSET 1 */ #define QPN_SELECT_OFFSET ((1ull << QW_SHIFT) | (1ull)) +/* RSM fields for AIP */ +/* LRH.BTH above is reused for this rule */ + +/* BTH.DESTQP: QW 1, OFFSET 16 for match */ +#define BTH_DESTQP_QW 1ull +#define BTH_DESTQP_BIT_OFFSET 16ull +#define BTH_DESTQP_OFFSET(off) ((BTH_DESTQP_QW << QW_SHIFT) | (off)) +#define BTH_DESTQP_MATCH_OFFSET BTH_DESTQP_OFFSET(BTH_DESTQP_BIT_OFFSET) +#define BTH_DESTQP_MASK 0xFFull +#define BTH_DESTQP_VALUE 0x81ull + +/* DETH.SQPN: QW 1 Offset 56 for select */ +/* We use 8 most significant Soure QPN bits as entropy fpr AIP */ +#define DETH_AIP_SQPN_QW 3ull +#define DETH_AIP_SQPN_BIT_OFFSET 56ull +#define DETH_AIP_SQPN_OFFSET(off) ((DETH_AIP_SQPN_QW << QW_SHIFT) | (off)) +#define DETH_AIP_SQPN_SELECT_OFFSET \ + DETH_AIP_SQPN_OFFSET(DETH_AIP_SQPN_BIT_OFFSET) + /* RSM fields for Vnic */ /* L2_TYPE: QW 0, OFFSET 61 - for match */ #define L2_TYPE_QW 0ull @@ -8463,6 +8481,49 @@ static void hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd) local_irq_restore(flags); } +/** + * hfi1_netdev_rx_napi - napi poll function to move eoi inline + * @napi - pointer to napi object + * @budget - netdev budget + */ +int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget) +{ + struct hfi1_netdev_rxq *rxq = container_of(napi, + struct hfi1_netdev_rxq, napi); + struct hfi1_ctxtdata *rcd = rxq->rcd; + int work_done = 0; + + work_done = rcd->do_interrupt(rcd, budget); + + if (work_done < budget) { + napi_complete_done(napi, work_done); + hfi1_rcd_eoi_intr(rcd); + } + + return work_done; +} + +/* Receive packet napi handler for netdevs VNIC and AIP */ +irqreturn_t receive_context_interrupt_napi(int irq, void *data) +{ + struct hfi1_ctxtdata *rcd = data; + + receive_interrupt_common(rcd); + + if (likely(rcd->napi)) { + if (likely(napi_schedule_prep(rcd->napi))) + __napi_schedule_irqoff(rcd->napi); + else + __hfi1_rcd_eoi_intr(rcd); + } else { + WARN_ONCE(1, "Napi IRQ handler without napi set up ctxt=%d\n", + rcd->ctxt); + __hfi1_rcd_eoi_intr(rcd); + } + + return IRQ_HANDLED; +} + /* * Receive packet IRQ handler. This routine expects to be on its own IRQ. * This routine will try to handle packets immediately (latency), but if @@ -13330,13 +13391,12 @@ static int set_up_interrupts(struct hfi1_devdata *dd) * in array of contexts * freectxts - number of free user contexts * num_send_contexts - number of PIO send contexts being used - * num_vnic_contexts - number of contexts reserved for VNIC + * num_netdev_contexts - number of contexts reserved for netdev */ static int set_up_context_variables(struct hfi1_devdata *dd) { unsigned long num_kernel_contexts; - u16 num_vnic_contexts = HFI1_NUM_VNIC_CTXT; - int total_contexts; + u16 num_netdev_contexts; int ret; unsigned ngroups; int rmt_count; @@ -13373,13 +13433,6 @@ static int set_up_context_variables(struct hfi1_devdata *dd) num_kernel_contexts = send_contexts - num_vls - 1; } - /* Accommodate VNIC contexts if possible */ - if ((num_kernel_contexts + num_vnic_contexts) > rcv_contexts) { - dd_dev_err(dd, "No receive contexts available for VNIC\n"); - num_vnic_contexts = 0; - } - total_contexts = num_kernel_contexts + num_vnic_contexts; - /* * User contexts: * - default to 1 user context per real (non-HT) CPU core if @@ -13392,28 +13445,32 @@ static int set_up_context_variables(struct hfi1_devdata *dd) /* * Adjust the counts given a global max. */ - if (total_contexts + n_usr_ctxts > rcv_contexts) { + if (num_kernel_contexts + n_usr_ctxts > rcv_contexts) { dd_dev_err(dd, - "Reducing # user receive contexts to: %d, from %u\n", - rcv_contexts - total_contexts, + "Reducing # user receive contexts to: %u, from %u\n", + (u32)(rcv_contexts - num_kernel_contexts), n_usr_ctxts); /* recalculate */ - n_usr_ctxts = rcv_contexts - total_contexts; + n_usr_ctxts = rcv_contexts - num_kernel_contexts; } + num_netdev_contexts = + hfi1_num_netdev_contexts(dd, rcv_contexts - + (num_kernel_contexts + n_usr_ctxts), + &node_affinity.real_cpu_mask); /* * The RMT entries are currently allocated as shown below: * 1. QOS (0 to 128 entries); * 2. FECN (num_kernel_context - 1 + num_user_contexts + - * num_vnic_contexts); - * 3. VNIC (num_vnic_contexts). - * It should be noted that FECN oversubscribe num_vnic_contexts - * entries of RMT because both VNIC and PSM could allocate any receive + * num_netdev_contexts); + * 3. netdev (num_netdev_contexts). + * It should be noted that FECN oversubscribe num_netdev_contexts + * entries of RMT because both netdev and PSM could allocate any receive * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts, * and PSM FECN must reserve an RMT entry for each possible PSM receive * context. */ - rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2); + rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_netdev_contexts * 2); if (HFI1_CAP_IS_KSET(TID_RDMA)) rmt_count += num_kernel_contexts - 1; if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) { @@ -13426,21 +13483,20 @@ static int set_up_context_variables(struct hfi1_devdata *dd) n_usr_ctxts = user_rmt_reduced; } - total_contexts += n_usr_ctxts; - - /* the first N are kernel contexts, the rest are user/vnic contexts */ - dd->num_rcv_contexts = total_contexts; + /* the first N are kernel contexts, the rest are user/netdev contexts */ + dd->num_rcv_contexts = + num_kernel_contexts + n_usr_ctxts + num_netdev_contexts; dd->n_krcv_queues = num_kernel_contexts; dd->first_dyn_alloc_ctxt = num_kernel_contexts; - dd->num_vnic_contexts = num_vnic_contexts; + dd->num_netdev_contexts = num_netdev_contexts; dd->num_user_contexts = n_usr_ctxts; dd->freectxts = n_usr_ctxts; dd_dev_info(dd, - "rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n", + "rcv contexts: chip %d, used %d (kernel %d, netdev %u, user %u)\n", rcv_contexts, (int)dd->num_rcv_contexts, (int)dd->n_krcv_queues, - dd->num_vnic_contexts, + dd->num_netdev_contexts, dd->num_user_contexts); /* @@ -14119,21 +14175,12 @@ static void init_early_variables(struct hfi1_devdata *dd) static void init_kdeth_qp(struct hfi1_devdata *dd) { - /* user changed the KDETH_QP */ - if (kdeth_qp != 0 && kdeth_qp >= 0xff) { - /* out of range or illegal value */ - dd_dev_err(dd, "Invalid KDETH queue pair prefix, ignoring"); - kdeth_qp = 0; - } - if (kdeth_qp == 0) /* not set, or failed range check */ - kdeth_qp = DEFAULT_KDETH_QP; - write_csr(dd, SEND_BTH_QP, - (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK) << + (RVT_KDETH_QP_PREFIX & SEND_BTH_QP_KDETH_QP_MASK) << SEND_BTH_QP_KDETH_QP_SHIFT); write_csr(dd, RCV_BTH_QP, - (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK) << + (RVT_KDETH_QP_PREFIX & RCV_BTH_QP_KDETH_QP_MASK) << RCV_BTH_QP_KDETH_QP_SHIFT); } @@ -14249,6 +14296,12 @@ static void complete_rsm_map_table(struct hfi1_devdata *dd, } } +/* Is a receive side mapping rule */ +static bool has_rsm_rule(struct hfi1_devdata *dd, u8 rule_index) +{ + return read_csr(dd, RCV_RSM_CFG + (8 * rule_index)) != 0; +} + /* * Add a receive side mapping rule. */ @@ -14485,77 +14538,138 @@ static void init_fecn_handling(struct hfi1_devdata *dd, rmt->used += total_cnt; } -/* Initialize RSM for VNIC */ -void hfi1_init_vnic_rsm(struct hfi1_devdata *dd) +static inline bool hfi1_is_rmt_full(int start, int spare) +{ + return (start + spare) > NUM_MAP_ENTRIES; +} + +static bool hfi1_netdev_update_rmt(struct hfi1_devdata *dd) { u8 i, j; u8 ctx_id = 0; u64 reg; u32 regoff; - struct rsm_rule_data rrd; + int rmt_start = hfi1_netdev_get_free_rmt_idx(dd); + int ctxt_count = hfi1_netdev_ctxt_count(dd); - if (hfi1_vnic_is_rsm_full(dd, NUM_VNIC_MAP_ENTRIES)) { - dd_dev_err(dd, "Vnic RSM disabled, rmt entries used = %d\n", - dd->vnic.rmt_start); - return; + /* We already have contexts mapped in RMT */ + if (has_rsm_rule(dd, RSM_INS_VNIC) || has_rsm_rule(dd, RSM_INS_AIP)) { + dd_dev_info(dd, "Contexts are already mapped in RMT\n"); + return true; + } + + if (hfi1_is_rmt_full(rmt_start, NUM_NETDEV_MAP_ENTRIES)) { + dd_dev_err(dd, "Not enough RMT entries used = %d\n", + rmt_start); + return false; } - dev_dbg(&(dd)->pcidev->dev, "Vnic rsm start = %d, end %d\n", - dd->vnic.rmt_start, - dd->vnic.rmt_start + NUM_VNIC_MAP_ENTRIES); + dev_dbg(&(dd)->pcidev->dev, "RMT start = %d, end %d\n", + rmt_start, + rmt_start + NUM_NETDEV_MAP_ENTRIES); /* Update RSM mapping table, 32 regs, 256 entries - 1 ctx per byte */ - regoff = RCV_RSM_MAP_TABLE + (dd->vnic.rmt_start / 8) * 8; + regoff = RCV_RSM_MAP_TABLE + (rmt_start / 8) * 8; reg = read_csr(dd, regoff); - for (i = 0; i < NUM_VNIC_MAP_ENTRIES; i++) { - /* Update map register with vnic context */ - j = (dd->vnic.rmt_start + i) % 8; + for (i = 0; i < NUM_NETDEV_MAP_ENTRIES; i++) { + /* Update map register with netdev context */ + j = (rmt_start + i) % 8; reg &= ~(0xffllu << (j * 8)); - reg |= (u64)dd->vnic.ctxt[ctx_id++]->ctxt << (j * 8); - /* Wrap up vnic ctx index */ - ctx_id %= dd->vnic.num_ctxt; + reg |= (u64)hfi1_netdev_get_ctxt(dd, ctx_id++)->ctxt << (j * 8); + /* Wrap up netdev ctx index */ + ctx_id %= ctxt_count; /* Write back map register */ - if (j == 7 || ((i + 1) == NUM_VNIC_MAP_ENTRIES)) { + if (j == 7 || ((i + 1) == NUM_NETDEV_MAP_ENTRIES)) { dev_dbg(&(dd)->pcidev->dev, - "Vnic rsm map reg[%d] =0x%llx\n", + "RMT[%d] =0x%llx\n", regoff - RCV_RSM_MAP_TABLE, reg); write_csr(dd, regoff, reg); regoff += 8; - if (i < (NUM_VNIC_MAP_ENTRIES - 1)) + if (i < (NUM_NETDEV_MAP_ENTRIES - 1)) reg = read_csr(dd, regoff); } } - /* Add rule for vnic */ - rrd.offset = dd->vnic.rmt_start; - rrd.pkt_type = 4; - /* Match 16B packets */ - rrd.field1_off = L2_TYPE_MATCH_OFFSET; - rrd.mask1 = L2_TYPE_MASK; - rrd.value1 = L2_16B_VALUE; - /* Match ETH L4 packets */ - rrd.field2_off = L4_TYPE_MATCH_OFFSET; - rrd.mask2 = L4_16B_TYPE_MASK; - rrd.value2 = L4_16B_ETH_VALUE; - /* Calc context from veswid and entropy */ - rrd.index1_off = L4_16B_HDR_VESWID_OFFSET; - rrd.index1_width = ilog2(NUM_VNIC_MAP_ENTRIES); - rrd.index2_off = L2_16B_ENTROPY_OFFSET; - rrd.index2_width = ilog2(NUM_VNIC_MAP_ENTRIES); - add_rsm_rule(dd, RSM_INS_VNIC, &rrd); - - /* Enable RSM if not already enabled */ + return true; +} + +static void hfi1_enable_rsm_rule(struct hfi1_devdata *dd, + int rule, struct rsm_rule_data *rrd) +{ + if (!hfi1_netdev_update_rmt(dd)) { + dd_dev_err(dd, "Failed to update RMT for RSM%d rule\n", rule); + return; + } + + add_rsm_rule(dd, rule, rrd); add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK); } +void hfi1_init_aip_rsm(struct hfi1_devdata *dd) +{ + /* + * go through with the initialisation only if this rule actually doesn't + * exist yet + */ + if (atomic_fetch_inc(&dd->ipoib_rsm_usr_num) == 0) { + int rmt_start = hfi1_netdev_get_free_rmt_idx(dd); + struct rsm_rule_data rrd = { + .offset = rmt_start, + .pkt_type = IB_PACKET_TYPE, + .field1_off = LRH_BTH_MATCH_OFFSET, + .mask1 = LRH_BTH_MASK, + .value1 = LRH_BTH_VALUE, + .field2_off = BTH_DESTQP_MATCH_OFFSET, + .mask2 = BTH_DESTQP_MASK, + .value2 = BTH_DESTQP_VALUE, + .index1_off = DETH_AIP_SQPN_SELECT_OFFSET + + ilog2(NUM_NETDEV_MAP_ENTRIES), + .index1_width = ilog2(NUM_NETDEV_MAP_ENTRIES), + .index2_off = DETH_AIP_SQPN_SELECT_OFFSET, + .index2_width = ilog2(NUM_NETDEV_MAP_ENTRIES) + }; + + hfi1_enable_rsm_rule(dd, RSM_INS_AIP, &rrd); + } +} + +/* Initialize RSM for VNIC */ +void hfi1_init_vnic_rsm(struct hfi1_devdata *dd) +{ + int rmt_start = hfi1_netdev_get_free_rmt_idx(dd); + struct rsm_rule_data rrd = { + /* Add rule for vnic */ + .offset = rmt_start, + .pkt_type = 4, + /* Match 16B packets */ + .field1_off = L2_TYPE_MATCH_OFFSET, + .mask1 = L2_TYPE_MASK, + .value1 = L2_16B_VALUE, + /* Match ETH L4 packets */ + .field2_off = L4_TYPE_MATCH_OFFSET, + .mask2 = L4_16B_TYPE_MASK, + .value2 = L4_16B_ETH_VALUE, + /* Calc context from veswid and entropy */ + .index1_off = L4_16B_HDR_VESWID_OFFSET, + .index1_width = ilog2(NUM_NETDEV_MAP_ENTRIES), + .index2_off = L2_16B_ENTROPY_OFFSET, + .index2_width = ilog2(NUM_NETDEV_MAP_ENTRIES) + }; + + hfi1_enable_rsm_rule(dd, RSM_INS_VNIC, &rrd); +} + void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd) { clear_rsm_rule(dd, RSM_INS_VNIC); +} - /* Disable RSM if used only by vnic */ - if (dd->vnic.rmt_start == 0) - clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK); +void hfi1_deinit_aip_rsm(struct hfi1_devdata *dd) +{ + /* only actually clear the rule if it's the last user asking to do so */ + if (atomic_fetch_add_unless(&dd->ipoib_rsm_usr_num, -1, 0) == 1) + clear_rsm_rule(dd, RSM_INS_AIP); } static int init_rxe(struct hfi1_devdata *dd) @@ -14574,8 +14688,8 @@ static int init_rxe(struct hfi1_devdata *dd) init_qos(dd, rmt); init_fecn_handling(dd, rmt); complete_rsm_map_table(dd, rmt); - /* record number of used rsm map entries for vnic */ - dd->vnic.rmt_start = rmt->used; + /* record number of used rsm map entries for netdev */ + hfi1_netdev_set_free_rmt_idx(dd, rmt->used); kfree(rmt); /* @@ -15129,6 +15243,10 @@ int hfi1_init_dd(struct hfi1_devdata *dd) (dd->revision >> CCE_REVISION_SW_SHIFT) & CCE_REVISION_SW_MASK); + /* alloc netdev data */ + if (hfi1_netdev_alloc(dd)) + goto bail_cleanup; + ret = set_up_context_variables(dd); if (ret) goto bail_cleanup; @@ -15229,6 +15347,7 @@ bail_clear_intr: hfi1_comp_vectors_clean_up(dd); msix_clean_up_interrupts(dd); bail_cleanup: + hfi1_netdev_free(dd); hfi1_pcie_ddcleanup(dd); bail_free: hfi1_free_devdata(dd); diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 725509261016..2c6f2de74d4d 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1,7 +1,7 @@ #ifndef _CHIP_H #define _CHIP_H /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -1447,6 +1447,7 @@ irqreturn_t general_interrupt(int irq, void *data); irqreturn_t sdma_interrupt(int irq, void *data); irqreturn_t receive_context_interrupt(int irq, void *data); irqreturn_t receive_context_thread(int irq, void *data); +irqreturn_t receive_context_interrupt_napi(int irq, void *data); int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set); void init_qsfp_int(struct hfi1_devdata *dd); @@ -1455,6 +1456,8 @@ void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr); void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr); void reset_interrupts(struct hfi1_devdata *dd); u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx); +void hfi1_init_aip_rsm(struct hfi1_devdata *dd); +void hfi1_deinit_aip_rsm(struct hfi1_devdata *dd); /* * Interrupt source table. diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 40a1ff0c8a8e..ff423e546b80 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -72,13 +72,6 @@ * compilation unit */ -/* - * If a packet's QP[23:16] bits match this value, then it is - * a PSM packet and the hardware will expect a KDETH header - * following the BTH. - */ -#define DEFAULT_KDETH_QP 0x80 - /* driver/hw feature set bitmask */ #define HFI1_CAP_USER_SHIFT 24 #define HFI1_CAP_MASK ((1UL << HFI1_CAP_USER_SHIFT) - 1) @@ -149,7 +142,8 @@ HFI1_CAP_NO_INTEGRITY | \ HFI1_CAP_PKEY_CHECK | \ HFI1_CAP_TID_RDMA | \ - HFI1_CAP_OPFN) << \ + HFI1_CAP_OPFN | \ + HFI1_CAP_AIP) << \ HFI1_CAP_USER_SHIFT) /* * Set of capabilities that need to be enabled for kernel context in @@ -166,6 +160,7 @@ HFI1_CAP_PKEY_CHECK | \ HFI1_CAP_MULTI_PKT_EGR | \ HFI1_CAP_EXTENDED_PSN | \ + HFI1_CAP_AIP | \ ((HFI1_CAP_HDRSUPP | \ HFI1_CAP_MULTI_PKT_EGR | \ HFI1_CAP_STATIC_RATE_CTRL | \ diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 049d15befe58..a40701a6e1b6 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015-2018 Intel Corporation. + * Copyright(c) 2015-2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -54,6 +54,7 @@ #include <linux/module.h> #include <linux/prefetch.h> #include <rdma/ib_verbs.h> +#include <linux/etherdevice.h> #include "hfi.h" #include "trace.h" @@ -63,6 +64,9 @@ #include "vnic.h" #include "fault.h" +#include "ipoib.h" +#include "netdev.h" + #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -748,6 +752,39 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread) return ret; } +static void process_rcv_packet_napi(struct hfi1_packet *packet) +{ + packet->etype = rhf_rcv_type(packet->rhf); + + /* total length */ + packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */ + /* retrieve eager buffer details */ + packet->etail = rhf_egr_index(packet->rhf); + packet->ebuf = get_egrbuf(packet->rcd, packet->rhf, + &packet->updegr); + /* + * Prefetch the contents of the eager buffer. It is + * OK to send a negative length to prefetch_range(). + * The +2 is the size of the RHF. + */ + prefetch_range(packet->ebuf, + packet->tlen - ((packet->rcd->rcvhdrqentsize - + (rhf_hdrq_offset(packet->rhf) + + 2)) * 4)); + + packet->rcd->rhf_rcv_function_map[packet->etype](packet); + packet->numpkt++; + + /* Set up for the next packet */ + packet->rhqoff += packet->rsize; + if (packet->rhqoff >= packet->maxcnt) + packet->rhqoff = 0; + + packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff + + packet->rcd->rhf_offset; + packet->rhf = rhf_to_cpu(packet->rhf_addr); +} + static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) { int ret; @@ -827,6 +864,36 @@ static inline void finish_packet(struct hfi1_packet *packet) } /* + * handle_receive_interrupt_napi_fp - receive a packet + * @rcd: the context + * @budget: polling budget + * + * Called from interrupt handler for receive interrupt. + * This is the fast path interrupt handler + * when executing napi soft irq environment. + */ +int handle_receive_interrupt_napi_fp(struct hfi1_ctxtdata *rcd, int budget) +{ + struct hfi1_packet packet; + + init_packet(rcd, &packet); + if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) + goto bail; + + while (packet.numpkt < budget) { + process_rcv_packet_napi(&packet); + if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf))) + break; + + process_rcv_update(0, &packet); + } + hfi1_set_rcd_head(rcd, packet.rhqoff); +bail: + finish_packet(&packet); + return packet.numpkt; +} + +/* * Handle receive interrupts when using the no dma rtail option. */ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread) @@ -1074,6 +1141,63 @@ bail: } /* + * handle_receive_interrupt_napi_sp - receive a packet + * @rcd: the context + * @budget: polling budget + * + * Called from interrupt handler for errors or receive interrupt. + * This is the slow path interrupt handler + * when executing napi soft irq environment. + */ +int handle_receive_interrupt_napi_sp(struct hfi1_ctxtdata *rcd, int budget) +{ + struct hfi1_devdata *dd = rcd->dd; + int last = RCV_PKT_OK; + bool needset = true; + struct hfi1_packet packet; + + init_packet(rcd, &packet); + if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) + goto bail; + + while (last != RCV_PKT_DONE && packet.numpkt < budget) { + if (hfi1_need_drop(dd)) { + /* On to the next packet */ + packet.rhqoff += packet.rsize; + packet.rhf_addr = (__le32 *)rcd->rcvhdrq + + packet.rhqoff + + rcd->rhf_offset; + packet.rhf = rhf_to_cpu(packet.rhf_addr); + + } else { + if (set_armed_to_active(&packet)) + goto bail; + process_rcv_packet_napi(&packet); + } + + if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf))) + last = RCV_PKT_DONE; + + if (needset) { + needset = false; + set_all_fastpath(dd, rcd); + } + + process_rcv_update(last, &packet); + } + + hfi1_set_rcd_head(rcd, packet.rhqoff); + +bail: + /* + * Always write head at end, and setup rcv interrupt, even + * if no packets were processed. + */ + finish_packet(&packet); + return packet.numpkt; +} + +/* * We may discover in the interrupt that the hardware link state has * changed from ARMED to ACTIVE (due to the arrival of a non-SC15 packet), * and we need to update the driver's notion of the link state. We cannot @@ -1550,6 +1674,82 @@ void handle_eflags(struct hfi1_packet *packet) show_eflags_errs(packet); } +static void hfi1_ipoib_ib_rcv(struct hfi1_packet *packet) +{ + struct hfi1_ibport *ibp; + struct net_device *netdev; + struct hfi1_ctxtdata *rcd = packet->rcd; + struct napi_struct *napi = rcd->napi; + struct sk_buff *skb; + struct hfi1_netdev_rxq *rxq = container_of(napi, + struct hfi1_netdev_rxq, napi); + u32 extra_bytes; + u32 tlen, qpnum; + bool do_work, do_cnp; + struct hfi1_ipoib_dev_priv *priv; + + trace_hfi1_rcvhdr(packet); + + hfi1_setup_ib_header(packet); + + packet->ohdr = &((struct ib_header *)packet->hdr)->u.oth; + packet->grh = NULL; + + if (unlikely(rhf_err_flags(packet->rhf))) { + handle_eflags(packet); + return; + } + + qpnum = ib_bth_get_qpn(packet->ohdr); + netdev = hfi1_netdev_get_data(rcd->dd, qpnum); + if (!netdev) + goto drop_no_nd; + + trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf))); + trace_ctxt_rsm_hist(rcd->ctxt); + + /* handle congestion notifications */ + do_work = hfi1_may_ecn(packet); + if (unlikely(do_work)) { + do_cnp = (packet->opcode != IB_OPCODE_CNP); + (void)hfi1_process_ecn_slowpath(hfi1_ipoib_priv(netdev)->qp, + packet, do_cnp); + } + + /* + * We have split point after last byte of DETH + * lets strip padding and CRC and ICRC. + * tlen is whole packet len so we need to + * subtract header size as well. + */ + tlen = packet->tlen; + extra_bytes = ib_bth_get_pad(packet->ohdr) + (SIZE_OF_CRC << 2) + + packet->hlen; + if (unlikely(tlen < extra_bytes)) + goto drop; + + tlen -= extra_bytes; + + skb = hfi1_ipoib_prepare_skb(rxq, tlen, packet->ebuf); + if (unlikely(!skb)) + goto drop; + + priv = hfi1_ipoib_priv(netdev); + hfi1_ipoib_update_rx_netstats(priv, 1, skb->len); + + skb->dev = netdev; + skb->pkt_type = PACKET_HOST; + netif_receive_skb(skb); + + return; + +drop: + ++netdev->stats.rx_dropped; +drop_no_nd: + ibp = rcd_to_iport(packet->rcd); + ++ibp->rvp.n_pkt_drops; +} + /* * The following functions are called by the interrupt handler. They are type * specific handlers for each packet type. @@ -1572,28 +1772,10 @@ static void process_receive_ib(struct hfi1_packet *packet) hfi1_ib_rcv(packet); } -static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet) -{ - /* Packet received in VNIC context via RSM */ - if (packet->rcd->is_vnic) - return true; - - if ((hfi1_16B_get_l2(packet->ebuf) == OPA_16B_L2_TYPE) && - (hfi1_16B_get_l4(packet->ebuf) == OPA_16B_L4_ETHR)) - return true; - - return false; -} - static void process_receive_bypass(struct hfi1_packet *packet) { struct hfi1_devdata *dd = packet->rcd->dd; - if (hfi1_is_vnic_packet(packet)) { - hfi1_vnic_bypass_rcv(packet); - return; - } - if (hfi1_setup_bypass_packet(packet)) return; @@ -1757,3 +1939,14 @@ const rhf_rcv_function_ptr normal_rhf_rcv_functions[] = { [RHF_RCV_TYPE_INVALID6] = process_receive_invalid, [RHF_RCV_TYPE_INVALID7] = process_receive_invalid, }; + +const rhf_rcv_function_ptr netdev_rhf_rcv_functions[] = { + [RHF_RCV_TYPE_EXPECTED] = process_receive_invalid, + [RHF_RCV_TYPE_EAGER] = process_receive_invalid, + [RHF_RCV_TYPE_IB] = hfi1_ipoib_ib_rcv, + [RHF_RCV_TYPE_ERROR] = process_receive_error, + [RHF_RCV_TYPE_BYPASS] = hfi1_vnic_bypass_rcv, + [RHF_RCV_TYPE_INVALID5] = process_receive_invalid, + [RHF_RCV_TYPE_INVALID6] = process_receive_invalid, + [RHF_RCV_TYPE_INVALID7] = process_receive_invalid, +}; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index e7fdd70c6e78..8ca51e43cf53 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015-2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -1264,7 +1264,7 @@ static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) memset(&binfo, 0, sizeof(binfo)); binfo.hw_version = dd->revision; binfo.sw_version = HFI1_KERN_SWVERSION; - binfo.bthqp = kdeth_qp; + binfo.bthqp = RVT_KDETH_QP_PREFIX; binfo.jkey = uctxt->jkey; /* * If more than 64 contexts are enabled the allocated credit diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index b06c2594105a..b4c6bff60a4e 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1,7 +1,7 @@ #ifndef _HFI1_KERNEL_H #define _HFI1_KERNEL_H /* - * Copyright(c) 2015-2018 Intel Corporation. + * Copyright(c) 2015-2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -233,6 +233,8 @@ struct hfi1_ctxtdata { intr_handler fast_handler; /** slow handler */ intr_handler slow_handler; + /* napi pointer assiociated with netdev */ + struct napi_struct *napi; /* verbs rx_stats per rcd */ struct hfi1_opcode_stats_perctx *opstats; /* clear interrupt mask */ @@ -383,11 +385,11 @@ struct hfi1_packet { u32 rhqoff; u32 dlid; u32 slid; + int numpkt; u16 tlen; s16 etail; u16 pkey; u8 hlen; - u8 numpkt; u8 rsize; u8 updegr; u8 etype; @@ -985,7 +987,7 @@ typedef void (*hfi1_make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct rvt_swqe *wqe); extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[]; - +extern const rhf_rcv_function_ptr netdev_rhf_rcv_functions[]; /* return values for the RHF receive functions */ #define RHF_RCV_CONTINUE 0 /* keep going */ @@ -1045,23 +1047,10 @@ struct hfi1_asic_data { #define NUM_MAP_ENTRIES 256 #define NUM_MAP_REGS 32 -/* - * Number of VNIC contexts used. Ensure it is less than or equal to - * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE). - */ -#define HFI1_NUM_VNIC_CTXT 8 - -/* Number of VNIC RSM entries */ -#define NUM_VNIC_MAP_ENTRIES 8 - /* Virtual NIC information */ struct hfi1_vnic_data { - struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT]; struct kmem_cache *txreq_cache; - struct xarray vesws; u8 num_vports; - u8 rmt_start; - u8 num_ctxt; }; struct hfi1_vnic_vport_info; @@ -1167,8 +1156,8 @@ struct hfi1_devdata { u64 z_send_schedule; u64 __percpu *send_schedule; - /* number of reserved contexts for VNIC usage */ - u16 num_vnic_contexts; + /* number of reserved contexts for netdev usage */ + u16 num_netdev_contexts; /* number of receive contexts in use by the driver */ u32 num_rcv_contexts; /* number of pio send contexts in use by the driver */ @@ -1417,12 +1406,12 @@ struct hfi1_devdata { struct hfi1_vnic_data vnic; /* Lock to protect IRQ SRC register access */ spinlock_t irq_src_lock; -}; + int vnic_num_vports; + struct net_device *dummy_netdev; -static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare) -{ - return (dd->vnic.rmt_start + spare) > NUM_MAP_ENTRIES; -} + /* Keeps track of IPoIB RSM rule users */ + atomic_t ipoib_rsm_usr_num; +}; /* 8051 firmware version helper */ #define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c)) @@ -1500,6 +1489,8 @@ struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt); int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread); +int handle_receive_interrupt_napi_fp(struct hfi1_ctxtdata *rcd, int budget); +int handle_receive_interrupt_napi_sp(struct hfi1_ctxtdata *rcd, int budget); void set_all_slowpath(struct hfi1_devdata *dd); extern const struct pci_device_id hfi1_pci_tbl[]; @@ -2250,7 +2241,6 @@ extern int num_user_contexts; extern unsigned long n_krcvqs; extern uint krcvqs[]; extern int krcvqsset; -extern uint kdeth_qp; extern uint loopback; extern uint quick_linkup; extern uint rcv_intr_timeout; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 3759d9233a1c..5eed4360695f 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -69,6 +69,7 @@ #include "affinity.h" #include "vnic.h" #include "exp_rcv.h" +#include "netdev.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -374,6 +375,7 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, rcd->numa_id = numa; rcd->rcv_array_groups = dd->rcv_entries.ngroups; rcd->rhf_rcv_function_map = normal_rhf_rcv_functions; + rcd->msix_intr = CCE_NUM_MSIX_VECTORS; mutex_init(&rcd->exp_mutex); spin_lock_init(&rcd->exp_lock); @@ -1316,6 +1318,7 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, goto bail; } + atomic_set(&dd->ipoib_rsm_usr_num, 0); return dd; bail: @@ -1663,9 +1666,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* do the generic initialization */ initfail = hfi1_init(dd, 0); - /* setup vnic */ - hfi1_vnic_setup(dd); - ret = hfi1_register_ib_device(dd); /* @@ -1704,7 +1704,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) hfi1_device_remove(dd); if (!ret) hfi1_unregister_ib_device(dd); - hfi1_vnic_cleanup(dd); postinit_cleanup(dd); if (initfail) ret = initfail; @@ -1749,8 +1748,8 @@ static void remove_one(struct pci_dev *pdev) /* unregister from IB core */ hfi1_unregister_ib_device(dd); - /* cleanup vnic */ - hfi1_vnic_cleanup(dd); + /* free netdev data */ + hfi1_netdev_free(dd); /* * Disable the IB link, disable interrupts on the device, diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h new file mode 100644 index 000000000000..185c9b02c974 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/ipoib.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * Copyright(c) 2020 Intel Corporation. + * + */ + +/* + * This file contains HFI1 support for IPOIB functionality + */ + +#ifndef HFI1_IPOIB_H +#define HFI1_IPOIB_H + +#include <linux/types.h> +#include <linux/stddef.h> +#include <linux/atomic.h> +#include <linux/netdevice.h> +#include <linux/slab.h> +#include <linux/skbuff.h> +#include <linux/list.h> +#include <linux/if_infiniband.h> + +#include "hfi.h" +#include "iowait.h" +#include "netdev.h" + +#include <rdma/ib_verbs.h> + +#define HFI1_IPOIB_ENTROPY_SHIFT 24 + +#define HFI1_IPOIB_TXREQ_NAME_LEN 32 + +#define HFI1_IPOIB_PSEUDO_LEN 20 +#define HFI1_IPOIB_ENCAP_LEN 4 + +struct hfi1_ipoib_dev_priv; + +union hfi1_ipoib_flow { + u16 as_int; + struct { + u8 tx_queue; + u8 sc5; + } __attribute__((__packed__)); +}; + +/** + * struct hfi1_ipoib_circ_buf - List of items to be processed + * @items: ring of items + * @head: ring head + * @tail: ring tail + * @max_items: max items + 1 that the ring can contain + * @producer_lock: producer sync lock + * @consumer_lock: consumer sync lock + */ +struct hfi1_ipoib_circ_buf { + void **items; + unsigned long head; + unsigned long tail; + unsigned long max_items; + spinlock_t producer_lock; /* head sync lock */ + spinlock_t consumer_lock; /* tail sync lock */ +}; + +/** + * struct hfi1_ipoib_txq - IPOIB per Tx queue information + * @priv: private pointer + * @sde: sdma engine + * @tx_list: tx request list + * @sent_txreqs: count of txreqs posted to sdma + * @flow: tracks when list needs to be flushed for a flow change + * @q_idx: ipoib Tx queue index + * @pkts_sent: indicator packets have been sent from this queue + * @wait: iowait structure + * @complete_txreqs: count of txreqs completed by sdma + * @napi: pointer to tx napi interface + * @tx_ring: ring of ipoib txreqs to be reaped by napi callback + */ +struct hfi1_ipoib_txq { + struct hfi1_ipoib_dev_priv *priv; + struct sdma_engine *sde; + struct list_head tx_list; + u64 sent_txreqs; + union hfi1_ipoib_flow flow; + u8 q_idx; + bool pkts_sent; + struct iowait wait; + + atomic64_t ____cacheline_aligned_in_smp complete_txreqs; + struct napi_struct *napi; + struct hfi1_ipoib_circ_buf tx_ring; +}; + +struct hfi1_ipoib_dev_priv { + struct hfi1_devdata *dd; + struct net_device *netdev; + struct ib_device *device; + struct hfi1_ipoib_txq *txqs; + struct kmem_cache *txreq_cache; + struct napi_struct *tx_napis; + u16 pkey; + u16 pkey_index; + u32 qkey; + u8 port_num; + + const struct net_device_ops *netdev_ops; + struct rvt_qp *qp; + struct pcpu_sw_netstats __percpu *netstats; +}; + +/* hfi1 ipoib rdma netdev's private data structure */ +struct hfi1_ipoib_rdma_netdev { + struct rdma_netdev rn; /* keep this first */ + /* followed by device private data */ + struct hfi1_ipoib_dev_priv dev_priv; +}; + +static inline struct hfi1_ipoib_dev_priv * +hfi1_ipoib_priv(const struct net_device *dev) +{ + return &((struct hfi1_ipoib_rdma_netdev *)netdev_priv(dev))->dev_priv; +} + +static inline void +hfi1_ipoib_update_rx_netstats(struct hfi1_ipoib_dev_priv *priv, + u64 packets, + u64 bytes) +{ + struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats); + + u64_stats_update_begin(&netstats->syncp); + netstats->rx_packets += packets; + netstats->rx_bytes += bytes; + u64_stats_update_end(&netstats->syncp); +} + +static inline void +hfi1_ipoib_update_tx_netstats(struct hfi1_ipoib_dev_priv *priv, + u64 packets, + u64 bytes) +{ + struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats); + + u64_stats_update_begin(&netstats->syncp); + netstats->tx_packets += packets; + netstats->tx_bytes += bytes; + u64_stats_update_end(&netstats->syncp); +} + +int hfi1_ipoib_send_dma(struct net_device *dev, + struct sk_buff *skb, + struct ib_ah *address, + u32 dqpn); + +int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv); +void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv); + +int hfi1_ipoib_rxq_init(struct net_device *dev); +void hfi1_ipoib_rxq_deinit(struct net_device *dev); + +void hfi1_ipoib_napi_tx_enable(struct net_device *dev); +void hfi1_ipoib_napi_tx_disable(struct net_device *dev); + +struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq, + int size, void *data); + +int hfi1_ipoib_rn_get_params(struct ib_device *device, + u8 port_num, + enum rdma_netdev_t type, + struct rdma_netdev_alloc_params *params); + +#endif /* _IPOIB_H */ diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c new file mode 100644 index 000000000000..014351ebbefa --- /dev/null +++ b/drivers/infiniband/hw/hfi1/ipoib_main.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright(c) 2020 Intel Corporation. + * + */ + +/* + * This file contains HFI1 support for ipoib functionality + */ + +#include "ipoib.h" +#include "hfi.h" + +static u32 qpn_from_mac(u8 *mac_arr) +{ + return (u32)mac_arr[1] << 16 | mac_arr[2] << 8 | mac_arr[3]; +} + +static int hfi1_ipoib_dev_init(struct net_device *dev) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + int ret; + + priv->netstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + + ret = priv->netdev_ops->ndo_init(dev); + if (ret) + return ret; + + ret = hfi1_netdev_add_data(priv->dd, + qpn_from_mac(priv->netdev->dev_addr), + dev); + if (ret < 0) { + priv->netdev_ops->ndo_uninit(dev); + return ret; + } + + return 0; +} + +static void hfi1_ipoib_dev_uninit(struct net_device *dev) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + + hfi1_netdev_remove_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr)); + + priv->netdev_ops->ndo_uninit(dev); +} + +static int hfi1_ipoib_dev_open(struct net_device *dev) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + int ret; + + ret = priv->netdev_ops->ndo_open(dev); + if (!ret) { + struct hfi1_ibport *ibp = to_iport(priv->device, + priv->port_num); + struct rvt_qp *qp; + u32 qpn = qpn_from_mac(priv->netdev->dev_addr); + + rcu_read_lock(); + qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn); + if (!qp) { + rcu_read_unlock(); + priv->netdev_ops->ndo_stop(dev); + return -EINVAL; + } + rvt_get_qp(qp); + priv->qp = qp; + rcu_read_unlock(); + + hfi1_netdev_enable_queues(priv->dd); + hfi1_ipoib_napi_tx_enable(dev); + } + + return ret; +} + +static int hfi1_ipoib_dev_stop(struct net_device *dev) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + + if (!priv->qp) + return 0; + + hfi1_ipoib_napi_tx_disable(dev); + hfi1_netdev_disable_queues(priv->dd); + + rvt_put_qp(priv->qp); + priv->qp = NULL; + + return priv->netdev_ops->ndo_stop(dev); +} + +static void hfi1_ipoib_dev_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *storage) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + u64 rx_packets = 0ull; + u64 rx_bytes = 0ull; + u64 tx_packets = 0ull; + u64 tx_bytes = 0ull; + int i; + + netdev_stats_to_stats64(storage, &dev->stats); + + for_each_possible_cpu(i) { + const struct pcpu_sw_netstats *stats; + unsigned int start; + u64 trx_packets; + u64 trx_bytes; + u64 ttx_packets; + u64 ttx_bytes; + + stats = per_cpu_ptr(priv->netstats, i); + do { + start = u64_stats_fetch_begin_irq(&stats->syncp); + trx_packets = stats->rx_packets; + trx_bytes = stats->rx_bytes; + ttx_packets = stats->tx_packets; + ttx_bytes = stats->tx_bytes; + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + + rx_packets += trx_packets; + rx_bytes += trx_bytes; + tx_packets += ttx_packets; + tx_bytes += ttx_bytes; + } + + storage->rx_packets += rx_packets; + storage->rx_bytes += rx_bytes; + storage->tx_packets += tx_packets; + storage->tx_bytes += tx_bytes; +} + +static const struct net_device_ops hfi1_ipoib_netdev_ops = { + .ndo_init = hfi1_ipoib_dev_init, + .ndo_uninit = hfi1_ipoib_dev_uninit, + .ndo_open = hfi1_ipoib_dev_open, + .ndo_stop = hfi1_ipoib_dev_stop, + .ndo_get_stats64 = hfi1_ipoib_dev_get_stats64, +}; + +static int hfi1_ipoib_send(struct net_device *dev, + struct sk_buff *skb, + struct ib_ah *address, + u32 dqpn) +{ + return hfi1_ipoib_send_dma(dev, skb, address, dqpn); +} + +static int hfi1_ipoib_mcast_attach(struct net_device *dev, + struct ib_device *device, + union ib_gid *mgid, + u16 mlid, + int set_qkey, + u32 qkey) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + u32 qpn = (u32)qpn_from_mac(priv->netdev->dev_addr); + struct hfi1_ibport *ibp = to_iport(priv->device, priv->port_num); + struct rvt_qp *qp; + int ret = -EINVAL; + + rcu_read_lock(); + + qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn); + if (qp) { + rvt_get_qp(qp); + rcu_read_unlock(); + if (set_qkey) + priv->qkey = qkey; + + /* attach QP to multicast group */ + ret = ib_attach_mcast(&qp->ibqp, mgid, mlid); + rvt_put_qp(qp); + } else { + rcu_read_unlock(); + } + + return ret; +} + +static int hfi1_ipoib_mcast_detach(struct net_device *dev, + struct ib_device *device, + union ib_gid *mgid, + u16 mlid) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + u32 qpn = (u32)qpn_from_mac(priv->netdev->dev_addr); + struct hfi1_ibport *ibp = to_iport(priv->device, priv->port_num); + struct rvt_qp *qp; + int ret = -EINVAL; + + rcu_read_lock(); + + qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn); + if (qp) { + rvt_get_qp(qp); + rcu_read_unlock(); + ret = ib_detach_mcast(&qp->ibqp, mgid, mlid); + rvt_put_qp(qp); + } else { + rcu_read_unlock(); + } + return ret; +} + +static void hfi1_ipoib_netdev_dtor(struct net_device *dev) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + + hfi1_ipoib_txreq_deinit(priv); + hfi1_ipoib_rxq_deinit(priv->netdev); + + free_percpu(priv->netstats); +} + +static void hfi1_ipoib_free_rdma_netdev(struct net_device *dev) +{ + hfi1_ipoib_netdev_dtor(dev); + free_netdev(dev); +} + +static void hfi1_ipoib_set_id(struct net_device *dev, int id) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + + priv->pkey_index = (u16)id; + ib_query_pkey(priv->device, + priv->port_num, + priv->pkey_index, + &priv->pkey); +} + +static int hfi1_ipoib_setup_rn(struct ib_device *device, + u8 port_num, + struct net_device *netdev, + void *param) +{ + struct hfi1_devdata *dd = dd_from_ibdev(device); + struct rdma_netdev *rn = netdev_priv(netdev); + struct hfi1_ipoib_dev_priv *priv; + int rc; + + rn->send = hfi1_ipoib_send; + rn->attach_mcast = hfi1_ipoib_mcast_attach; + rn->detach_mcast = hfi1_ipoib_mcast_detach; + rn->set_id = hfi1_ipoib_set_id; + rn->hca = device; + rn->port_num = port_num; + rn->mtu = netdev->mtu; + + priv = hfi1_ipoib_priv(netdev); + priv->dd = dd; + priv->netdev = netdev; + priv->device = device; + priv->port_num = port_num; + priv->netdev_ops = netdev->netdev_ops; + + netdev->netdev_ops = &hfi1_ipoib_netdev_ops; + + ib_query_pkey(device, port_num, priv->pkey_index, &priv->pkey); + + rc = hfi1_ipoib_txreq_init(priv); + if (rc) { + dd_dev_err(dd, "IPoIB netdev TX init - failed(%d)\n", rc); + hfi1_ipoib_free_rdma_netdev(netdev); + return rc; + } + + rc = hfi1_ipoib_rxq_init(netdev); + if (rc) { + dd_dev_err(dd, "IPoIB netdev RX init - failed(%d)\n", rc); + hfi1_ipoib_free_rdma_netdev(netdev); + return rc; + } + + netdev->priv_destructor = hfi1_ipoib_netdev_dtor; + netdev->needs_free_netdev = true; + + return 0; +} + +int hfi1_ipoib_rn_get_params(struct ib_device *device, + u8 port_num, + enum rdma_netdev_t type, + struct rdma_netdev_alloc_params *params) +{ + struct hfi1_devdata *dd = dd_from_ibdev(device); + + if (type != RDMA_NETDEV_IPOIB) + return -EOPNOTSUPP; + + if (!HFI1_CAP_IS_KSET(AIP) || !dd->num_netdev_contexts) + return -EOPNOTSUPP; + + if (!port_num || port_num > dd->num_pports) + return -EINVAL; + + params->sizeof_priv = sizeof(struct hfi1_ipoib_rdma_netdev); + params->txqs = dd->num_sdma; + params->rxqs = dd->num_netdev_contexts; + params->param = NULL; + params->initialize_rdma_netdev = hfi1_ipoib_setup_rn; + + return 0; +} diff --git a/drivers/infiniband/hw/hfi1/ipoib_rx.c b/drivers/infiniband/hw/hfi1/ipoib_rx.c new file mode 100644 index 000000000000..3afa7545242c --- /dev/null +++ b/drivers/infiniband/hw/hfi1/ipoib_rx.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright(c) 2020 Intel Corporation. + * + */ + +#include "netdev.h" +#include "ipoib.h" + +#define HFI1_IPOIB_SKB_PAD ((NET_SKB_PAD) + (NET_IP_ALIGN)) + +static void copy_ipoib_buf(struct sk_buff *skb, void *data, int size) +{ + void *dst_data; + + skb_checksum_none_assert(skb); + skb->protocol = *((__be16 *)data); + + dst_data = skb_put(skb, size); + memcpy(dst_data, data, size); + skb->mac_header = HFI1_IPOIB_PSEUDO_LEN; + skb_pull(skb, HFI1_IPOIB_ENCAP_LEN); +} + +static struct sk_buff *prepare_frag_skb(struct napi_struct *napi, int size) +{ + struct sk_buff *skb; + int skb_size = SKB_DATA_ALIGN(size + HFI1_IPOIB_SKB_PAD); + void *frag; + + skb_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + skb_size = SKB_DATA_ALIGN(skb_size); + frag = napi_alloc_frag(skb_size); + + if (unlikely(!frag)) + return napi_alloc_skb(napi, size); + + skb = build_skb(frag, skb_size); + + if (unlikely(!skb)) { + skb_free_frag(frag); + return NULL; + } + + skb_reserve(skb, HFI1_IPOIB_SKB_PAD); + return skb; +} + +struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq, + int size, void *data) +{ + struct napi_struct *napi = &rxq->napi; + int skb_size = size + HFI1_IPOIB_ENCAP_LEN; + struct sk_buff *skb; + + /* + * For smaller(4k + skb overhead) allocations we will go using + * napi cache. Otherwise we will try to use napi frag cache. + */ + if (size <= SKB_WITH_OVERHEAD(PAGE_SIZE)) + skb = napi_alloc_skb(napi, skb_size); + else + skb = prepare_frag_skb(napi, skb_size); + + if (unlikely(!skb)) + return NULL; + + copy_ipoib_buf(skb, data, size); + + return skb; +} + +int hfi1_ipoib_rxq_init(struct net_device *netdev) +{ + struct hfi1_ipoib_dev_priv *ipoib_priv = hfi1_ipoib_priv(netdev); + struct hfi1_devdata *dd = ipoib_priv->dd; + int ret; + + ret = hfi1_netdev_rx_init(dd); + if (ret) + return ret; + + hfi1_init_aip_rsm(dd); + + return ret; +} + +void hfi1_ipoib_rxq_deinit(struct net_device *netdev) +{ + struct hfi1_ipoib_dev_priv *ipoib_priv = hfi1_ipoib_priv(netdev); + struct hfi1_devdata *dd = ipoib_priv->dd; + + hfi1_deinit_aip_rsm(dd); + hfi1_netdev_rx_destroy(dd); +} diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c new file mode 100644 index 000000000000..883cb9d48022 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -0,0 +1,828 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright(c) 2020 Intel Corporation. + * + */ + +/* + * This file contains HFI1 support for IPOIB SDMA functionality + */ + +#include <linux/log2.h> +#include <linux/circ_buf.h> + +#include "sdma.h" +#include "verbs.h" +#include "trace_ibhdrs.h" +#include "ipoib.h" + +/* Add a convenience helper */ +#define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1)) +#define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size) +#define CIRC_PREV(val, size) CIRC_ADD(val, -1, size) + +/** + * struct ipoib_txreq - IPOIB transmit descriptor + * @txreq: sdma transmit request + * @sdma_hdr: 9b ib headers + * @sdma_status: status returned by sdma engine + * @priv: ipoib netdev private data + * @txq: txq on which skb was output + * @skb: skb to send + */ +struct ipoib_txreq { + struct sdma_txreq txreq; + struct hfi1_sdma_header sdma_hdr; + int sdma_status; + struct hfi1_ipoib_dev_priv *priv; + struct hfi1_ipoib_txq *txq; + struct sk_buff *skb; +}; + +struct ipoib_txparms { + struct hfi1_devdata *dd; + struct rdma_ah_attr *ah_attr; + struct hfi1_ibport *ibp; + struct hfi1_ipoib_txq *txq; + union hfi1_ipoib_flow flow; + u32 dqpn; + u8 hdr_dwords; + u8 entropy; +}; + +static u64 hfi1_ipoib_txreqs(const u64 sent, const u64 completed) +{ + return sent - completed; +} + +static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq) +{ + if (unlikely(hfi1_ipoib_txreqs(++txq->sent_txreqs, + atomic64_read(&txq->complete_txreqs)) >= + min_t(unsigned int, txq->priv->netdev->tx_queue_len, + txq->tx_ring.max_items - 1))) + netif_stop_subqueue(txq->priv->netdev, txq->q_idx); +} + +static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq) +{ + struct net_device *dev = txq->priv->netdev; + + /* If the queue is already running just return */ + if (likely(!__netif_subqueue_stopped(dev, txq->q_idx))) + return; + + /* If shutting down just return as queue state is irrelevant */ + if (unlikely(dev->reg_state != NETREG_REGISTERED)) + return; + + /* + * When the queue has been drained to less than half full it will be + * restarted. + * The size of the txreq ring is fixed at initialization. + * The tx queue len can be adjusted upward while the interface is + * running. + * The tx queue len can be large enough to overflow the txreq_ring. + * Use the minimum of the current tx_queue_len or the rings max txreqs + * to protect against ring overflow. + */ + if (hfi1_ipoib_txreqs(txq->sent_txreqs, + atomic64_read(&txq->complete_txreqs)) + < min_t(unsigned int, dev->tx_queue_len, + txq->tx_ring.max_items) >> 1) + netif_wake_subqueue(dev, txq->q_idx); +} + +static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget) +{ + struct hfi1_ipoib_dev_priv *priv = tx->priv; + + if (likely(!tx->sdma_status)) { + hfi1_ipoib_update_tx_netstats(priv, 1, tx->skb->len); + } else { + ++priv->netdev->stats.tx_errors; + dd_dev_warn(priv->dd, + "%s: Status = 0x%x pbc 0x%llx txq = %d sde = %d\n", + __func__, tx->sdma_status, + le64_to_cpu(tx->sdma_hdr.pbc), tx->txq->q_idx, + tx->txq->sde->this_idx); + } + + napi_consume_skb(tx->skb, budget); + sdma_txclean(priv->dd, &tx->txreq); + kmem_cache_free(priv->txreq_cache, tx); +} + +static int hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq, int budget) +{ + struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; + unsigned long head; + unsigned long tail; + unsigned int max_tx; + int work_done; + int tx_count; + + spin_lock_bh(&tx_ring->consumer_lock); + + /* Read index before reading contents at that index. */ + head = smp_load_acquire(&tx_ring->head); + tail = tx_ring->tail; + max_tx = tx_ring->max_items; + + work_done = min_t(int, CIRC_CNT(head, tail, max_tx), budget); + + for (tx_count = work_done; tx_count; tx_count--) { + hfi1_ipoib_free_tx(tx_ring->items[tail], budget); + tail = CIRC_NEXT(tail, max_tx); + } + + atomic64_add(work_done, &txq->complete_txreqs); + + /* Finished freeing tx items so store the tail value. */ + smp_store_release(&tx_ring->tail, tail); + + spin_unlock_bh(&tx_ring->consumer_lock); + + hfi1_ipoib_check_queue_stopped(txq); + + return work_done; +} + +static int hfi1_ipoib_process_tx_ring(struct napi_struct *napi, int budget) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(napi->dev); + struct hfi1_ipoib_txq *txq = &priv->txqs[napi - priv->tx_napis]; + + int work_done = hfi1_ipoib_drain_tx_ring(txq, budget); + + if (work_done < budget) + napi_complete_done(napi, work_done); + + return work_done; +} + +static void hfi1_ipoib_add_tx(struct ipoib_txreq *tx) +{ + struct hfi1_ipoib_circ_buf *tx_ring = &tx->txq->tx_ring; + unsigned long head; + unsigned long tail; + size_t max_tx; + + spin_lock(&tx_ring->producer_lock); + + head = tx_ring->head; + tail = READ_ONCE(tx_ring->tail); + max_tx = tx_ring->max_items; + + if (likely(CIRC_SPACE(head, tail, max_tx))) { + tx_ring->items[head] = tx; + + /* Finish storing txreq before incrementing head. */ + smp_store_release(&tx_ring->head, CIRC_ADD(head, 1, max_tx)); + napi_schedule(tx->txq->napi); + } else { + struct hfi1_ipoib_txq *txq = tx->txq; + struct hfi1_ipoib_dev_priv *priv = tx->priv; + + /* Ring was full */ + hfi1_ipoib_free_tx(tx, 0); + atomic64_inc(&txq->complete_txreqs); + dd_dev_dbg(priv->dd, "txq %d full.\n", txq->q_idx); + } + + spin_unlock(&tx_ring->producer_lock); +} + +static void hfi1_ipoib_sdma_complete(struct sdma_txreq *txreq, int status) +{ + struct ipoib_txreq *tx = container_of(txreq, struct ipoib_txreq, txreq); + + tx->sdma_status = status; + + hfi1_ipoib_add_tx(tx); +} + +static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx, + struct ipoib_txparms *txp) +{ + struct hfi1_devdata *dd = txp->dd; + struct sdma_txreq *txreq = &tx->txreq; + struct sk_buff *skb = tx->skb; + int ret = 0; + int i; + + if (skb_headlen(skb)) { + ret = sdma_txadd_kvaddr(dd, txreq, skb->data, skb_headlen(skb)); + if (unlikely(ret)) + return ret; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + ret = sdma_txadd_page(dd, + txreq, + skb_frag_page(frag), + frag->bv_offset, + skb_frag_size(frag)); + if (unlikely(ret)) + break; + } + + return ret; +} + +static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx, + struct ipoib_txparms *txp) +{ + struct hfi1_devdata *dd = txp->dd; + struct sdma_txreq *txreq = &tx->txreq; + struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr; + u16 pkt_bytes = + sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2) + tx->skb->len; + int ret; + + ret = sdma_txinit(txreq, 0, pkt_bytes, hfi1_ipoib_sdma_complete); + if (unlikely(ret)) + return ret; + + /* add pbc + headers */ + ret = sdma_txadd_kvaddr(dd, + txreq, + sdma_hdr, + sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2)); + if (unlikely(ret)) + return ret; + + /* add the ulp payload */ + return hfi1_ipoib_build_ulp_payload(tx, txp); +} + +static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx, + struct ipoib_txparms *txp) +{ + struct hfi1_ipoib_dev_priv *priv = tx->priv; + struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr; + struct sk_buff *skb = tx->skb; + struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp); + struct rdma_ah_attr *ah_attr = txp->ah_attr; + struct ib_other_headers *ohdr; + struct ib_grh *grh; + u16 dwords; + u16 slid; + u16 dlid; + u16 lrh0; + u32 bth0; + u32 sqpn = (u32)(priv->netdev->dev_addr[1] << 16 | + priv->netdev->dev_addr[2] << 8 | + priv->netdev->dev_addr[3]); + u16 payload_dwords; + u8 pad_cnt; + + pad_cnt = -skb->len & 3; + + /* Includes ICRC */ + payload_dwords = ((skb->len + pad_cnt) >> 2) + SIZE_OF_CRC; + + /* header size in dwords LRH+BTH+DETH = (8+12+8)/4. */ + txp->hdr_dwords = 7; + + if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { + grh = &sdma_hdr->hdr.ibh.u.l.grh; + txp->hdr_dwords += + hfi1_make_grh(txp->ibp, + grh, + rdma_ah_read_grh(ah_attr), + txp->hdr_dwords - LRH_9B_DWORDS, + payload_dwords); + lrh0 = HFI1_LRH_GRH; + ohdr = &sdma_hdr->hdr.ibh.u.l.oth; + } else { + lrh0 = HFI1_LRH_BTH; + ohdr = &sdma_hdr->hdr.ibh.u.oth; + } + + lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4; + lrh0 |= (txp->flow.sc5 & 0xf) << 12; + + dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 9B); + if (dlid == be16_to_cpu(IB_LID_PERMISSIVE)) { + slid = be16_to_cpu(IB_LID_PERMISSIVE); + } else { + u16 lid = (u16)ppd->lid; + + if (lid) { + lid |= rdma_ah_get_path_bits(ah_attr) & + ((1 << ppd->lmc) - 1); + slid = lid; + } else { + slid = be16_to_cpu(IB_LID_PERMISSIVE); + } + } + + /* Includes ICRC */ + dwords = txp->hdr_dwords + payload_dwords; + + /* Build the lrh */ + sdma_hdr->hdr.hdr_type = HFI1_PKT_TYPE_9B; + hfi1_make_ib_hdr(&sdma_hdr->hdr.ibh, lrh0, dwords, dlid, slid); + + /* Build the bth */ + bth0 = (IB_OPCODE_UD_SEND_ONLY << 24) | (pad_cnt << 20) | priv->pkey; + + ohdr->bth[0] = cpu_to_be32(bth0); + ohdr->bth[1] = cpu_to_be32(txp->dqpn); + ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->sent_txreqs)); + + /* Build the deth */ + ohdr->u.ud.deth[0] = cpu_to_be32(priv->qkey); + ohdr->u.ud.deth[1] = cpu_to_be32((txp->entropy << + HFI1_IPOIB_ENTROPY_SHIFT) | sqpn); + + /* Construct the pbc. */ + sdma_hdr->pbc = + cpu_to_le64(create_pbc(ppd, + ib_is_sc5(txp->flow.sc5) << + PBC_DC_INFO_SHIFT, + 0, + sc_to_vlt(priv->dd, txp->flow.sc5), + dwords - SIZE_OF_CRC + + (sizeof(sdma_hdr->pbc) >> 2))); +} + +static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev, + struct sk_buff *skb, + struct ipoib_txparms *txp) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + struct ipoib_txreq *tx; + int ret; + + tx = kmem_cache_alloc_node(priv->txreq_cache, + GFP_ATOMIC, + priv->dd->node); + if (unlikely(!tx)) + return ERR_PTR(-ENOMEM); + + /* so that we can test if the sdma decriptors are there */ + tx->txreq.num_desc = 0; + tx->priv = priv; + tx->txq = txp->txq; + tx->skb = skb; + + hfi1_ipoib_build_ib_tx_headers(tx, txp); + + ret = hfi1_ipoib_build_tx_desc(tx, txp); + if (likely(!ret)) { + if (txp->txq->flow.as_int != txp->flow.as_int) { + txp->txq->flow.tx_queue = txp->flow.tx_queue; + txp->txq->flow.sc5 = txp->flow.sc5; + txp->txq->sde = + sdma_select_engine_sc(priv->dd, + txp->flow.tx_queue, + txp->flow.sc5); + } + + return tx; + } + + sdma_txclean(priv->dd, &tx->txreq); + kmem_cache_free(priv->txreq_cache, tx); + + return ERR_PTR(ret); +} + +static int hfi1_ipoib_submit_tx_list(struct net_device *dev, + struct hfi1_ipoib_txq *txq) +{ + int ret; + u16 count_out; + + ret = sdma_send_txlist(txq->sde, + iowait_get_ib_work(&txq->wait), + &txq->tx_list, + &count_out); + if (likely(!ret) || ret == -EBUSY || ret == -ECOMM) + return ret; + + dd_dev_warn(txq->priv->dd, "cannot send skb tx list, err %d.\n", ret); + + return ret; +} + +static int hfi1_ipoib_flush_tx_list(struct net_device *dev, + struct hfi1_ipoib_txq *txq) +{ + int ret = 0; + + if (!list_empty(&txq->tx_list)) { + /* Flush the current list */ + ret = hfi1_ipoib_submit_tx_list(dev, txq); + + if (unlikely(ret)) + if (ret != -EBUSY) + ++dev->stats.tx_carrier_errors; + } + + return ret; +} + +static int hfi1_ipoib_submit_tx(struct hfi1_ipoib_txq *txq, + struct ipoib_txreq *tx) +{ + int ret; + + ret = sdma_send_txreq(txq->sde, + iowait_get_ib_work(&txq->wait), + &tx->txreq, + txq->pkts_sent); + if (likely(!ret)) { + txq->pkts_sent = true; + iowait_starve_clear(txq->pkts_sent, &txq->wait); + } + + return ret; +} + +static int hfi1_ipoib_send_dma_single(struct net_device *dev, + struct sk_buff *skb, + struct ipoib_txparms *txp) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + struct hfi1_ipoib_txq *txq = txp->txq; + struct ipoib_txreq *tx; + int ret; + + tx = hfi1_ipoib_send_dma_common(dev, skb, txp); + if (IS_ERR(tx)) { + int ret = PTR_ERR(tx); + + dev_kfree_skb_any(skb); + + if (ret == -ENOMEM) + ++dev->stats.tx_errors; + else + ++dev->stats.tx_carrier_errors; + + return NETDEV_TX_OK; + } + + ret = hfi1_ipoib_submit_tx(txq, tx); + if (likely(!ret)) { + trace_sdma_output_ibhdr(tx->priv->dd, + &tx->sdma_hdr.hdr, + ib_is_sc5(txp->flow.sc5)); + hfi1_ipoib_check_queue_depth(txq); + return NETDEV_TX_OK; + } + + txq->pkts_sent = false; + + if (ret == -EBUSY) { + list_add_tail(&tx->txreq.list, &txq->tx_list); + + trace_sdma_output_ibhdr(tx->priv->dd, + &tx->sdma_hdr.hdr, + ib_is_sc5(txp->flow.sc5)); + hfi1_ipoib_check_queue_depth(txq); + return NETDEV_TX_OK; + } + + if (ret == -ECOMM) { + hfi1_ipoib_check_queue_depth(txq); + return NETDEV_TX_OK; + } + + sdma_txclean(priv->dd, &tx->txreq); + dev_kfree_skb_any(skb); + kmem_cache_free(priv->txreq_cache, tx); + ++dev->stats.tx_carrier_errors; + + return NETDEV_TX_OK; +} + +static int hfi1_ipoib_send_dma_list(struct net_device *dev, + struct sk_buff *skb, + struct ipoib_txparms *txp) +{ + struct hfi1_ipoib_txq *txq = txp->txq; + struct ipoib_txreq *tx; + + /* Has the flow change ? */ + if (txq->flow.as_int != txp->flow.as_int) + (void)hfi1_ipoib_flush_tx_list(dev, txq); + + tx = hfi1_ipoib_send_dma_common(dev, skb, txp); + if (IS_ERR(tx)) { + int ret = PTR_ERR(tx); + + dev_kfree_skb_any(skb); + + if (ret == -ENOMEM) + ++dev->stats.tx_errors; + else + ++dev->stats.tx_carrier_errors; + + return NETDEV_TX_OK; + } + + list_add_tail(&tx->txreq.list, &txq->tx_list); + + hfi1_ipoib_check_queue_depth(txq); + + trace_sdma_output_ibhdr(tx->priv->dd, + &tx->sdma_hdr.hdr, + ib_is_sc5(txp->flow.sc5)); + + if (!netdev_xmit_more()) + (void)hfi1_ipoib_flush_tx_list(dev, txq); + + return NETDEV_TX_OK; +} + +static u8 hfi1_ipoib_calc_entropy(struct sk_buff *skb) +{ + if (skb_transport_header_was_set(skb)) { + u8 *hdr = (u8 *)skb_transport_header(skb); + + return (hdr[0] ^ hdr[1] ^ hdr[2] ^ hdr[3]); + } + + return (u8)skb_get_queue_mapping(skb); +} + +int hfi1_ipoib_send_dma(struct net_device *dev, + struct sk_buff *skb, + struct ib_ah *address, + u32 dqpn) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + struct ipoib_txparms txp; + struct rdma_netdev *rn = netdev_priv(dev); + + if (unlikely(skb->len > rn->mtu + HFI1_IPOIB_ENCAP_LEN)) { + dd_dev_warn(priv->dd, "packet len %d (> %d) too long to send, dropping\n", + skb->len, + rn->mtu + HFI1_IPOIB_ENCAP_LEN); + ++dev->stats.tx_dropped; + ++dev->stats.tx_errors; + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + txp.dd = priv->dd; + txp.ah_attr = &ibah_to_rvtah(address)->attr; + txp.ibp = to_iport(priv->device, priv->port_num); + txp.txq = &priv->txqs[skb_get_queue_mapping(skb)]; + txp.dqpn = dqpn; + txp.flow.sc5 = txp.ibp->sl_to_sc[rdma_ah_get_sl(txp.ah_attr)]; + txp.flow.tx_queue = (u8)skb_get_queue_mapping(skb); + txp.entropy = hfi1_ipoib_calc_entropy(skb); + + if (netdev_xmit_more() || !list_empty(&txp.txq->tx_list)) + return hfi1_ipoib_send_dma_list(dev, skb, &txp); + + return hfi1_ipoib_send_dma_single(dev, skb, &txp); +} + +/* + * hfi1_ipoib_sdma_sleep - ipoib sdma sleep function + * + * This function gets called from sdma_send_txreq() when there are not enough + * sdma descriptors available to send the packet. It adds Tx queue's wait + * structure to sdma engine's dmawait list to be woken up when descriptors + * become available. + */ +static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde, + struct iowait_work *wait, + struct sdma_txreq *txreq, + uint seq, + bool pkts_sent) +{ + struct hfi1_ipoib_txq *txq = + container_of(wait->iow, struct hfi1_ipoib_txq, wait); + + write_seqlock(&sde->waitlock); + + if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED)) { + if (sdma_progress(sde, seq, txreq)) { + write_sequnlock(&sde->waitlock); + return -EAGAIN; + } + + netif_stop_subqueue(txq->priv->netdev, txq->q_idx); + + if (list_empty(&txq->wait.list)) + iowait_queue(pkts_sent, wait->iow, &sde->dmawait); + + write_sequnlock(&sde->waitlock); + return -EBUSY; + } + + write_sequnlock(&sde->waitlock); + return -EINVAL; +} + +/* + * hfi1_ipoib_sdma_wakeup - ipoib sdma wakeup function + * + * This function gets called when SDMA descriptors becomes available and Tx + * queue's wait structure was previously added to sdma engine's dmawait list. + */ +static void hfi1_ipoib_sdma_wakeup(struct iowait *wait, int reason) +{ + struct hfi1_ipoib_txq *txq = + container_of(wait, struct hfi1_ipoib_txq, wait); + + if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED)) + iowait_schedule(wait, system_highpri_wq, WORK_CPU_UNBOUND); +} + +static void hfi1_ipoib_flush_txq(struct work_struct *work) +{ + struct iowait_work *ioww = + container_of(work, struct iowait_work, iowork); + struct iowait *wait = iowait_ioww_to_iow(ioww); + struct hfi1_ipoib_txq *txq = + container_of(wait, struct hfi1_ipoib_txq, wait); + struct net_device *dev = txq->priv->netdev; + + if (likely(dev->reg_state == NETREG_REGISTERED) && + likely(__netif_subqueue_stopped(dev, txq->q_idx)) && + likely(!hfi1_ipoib_flush_tx_list(dev, txq))) + netif_wake_subqueue(dev, txq->q_idx); +} + +int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) +{ + struct net_device *dev = priv->netdev; + char buf[HFI1_IPOIB_TXREQ_NAME_LEN]; + unsigned long tx_ring_size; + int i; + + /* + * Ring holds 1 less than tx_ring_size + * Round up to next power of 2 in order to hold at least tx_queue_len + */ + tx_ring_size = roundup_pow_of_two((unsigned long)dev->tx_queue_len + 1); + + snprintf(buf, sizeof(buf), "hfi1_%u_ipoib_txreq_cache", priv->dd->unit); + priv->txreq_cache = kmem_cache_create(buf, + sizeof(struct ipoib_txreq), + 0, + 0, + NULL); + if (!priv->txreq_cache) + return -ENOMEM; + + priv->tx_napis = kcalloc_node(dev->num_tx_queues, + sizeof(struct napi_struct), + GFP_ATOMIC, + priv->dd->node); + if (!priv->tx_napis) + goto free_txreq_cache; + + priv->txqs = kcalloc_node(dev->num_tx_queues, + sizeof(struct hfi1_ipoib_txq), + GFP_ATOMIC, + priv->dd->node); + if (!priv->txqs) + goto free_tx_napis; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct hfi1_ipoib_txq *txq = &priv->txqs[i]; + + iowait_init(&txq->wait, + 0, + hfi1_ipoib_flush_txq, + NULL, + hfi1_ipoib_sdma_sleep, + hfi1_ipoib_sdma_wakeup, + NULL, + NULL); + txq->priv = priv; + txq->sde = NULL; + INIT_LIST_HEAD(&txq->tx_list); + atomic64_set(&txq->complete_txreqs, 0); + txq->q_idx = i; + txq->flow.tx_queue = 0xff; + txq->flow.sc5 = 0xff; + txq->pkts_sent = false; + + netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), + priv->dd->node); + + txq->tx_ring.items = + vzalloc_node(array_size(tx_ring_size, + sizeof(struct ipoib_txreq)), + priv->dd->node); + if (!txq->tx_ring.items) + goto free_txqs; + + spin_lock_init(&txq->tx_ring.producer_lock); + spin_lock_init(&txq->tx_ring.consumer_lock); + txq->tx_ring.max_items = tx_ring_size; + + txq->napi = &priv->tx_napis[i]; + netif_tx_napi_add(dev, txq->napi, + hfi1_ipoib_process_tx_ring, + NAPI_POLL_WEIGHT); + } + + return 0; + +free_txqs: + for (i--; i >= 0; i--) { + struct hfi1_ipoib_txq *txq = &priv->txqs[i]; + + netif_napi_del(txq->napi); + vfree(txq->tx_ring.items); + } + + kfree(priv->txqs); + priv->txqs = NULL; + +free_tx_napis: + kfree(priv->tx_napis); + priv->tx_napis = NULL; + +free_txreq_cache: + kmem_cache_destroy(priv->txreq_cache); + priv->txreq_cache = NULL; + return -ENOMEM; +} + +static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq) +{ + struct sdma_txreq *txreq; + struct sdma_txreq *txreq_tmp; + atomic64_t *complete_txreqs = &txq->complete_txreqs; + + list_for_each_entry_safe(txreq, txreq_tmp, &txq->tx_list, list) { + struct ipoib_txreq *tx = + container_of(txreq, struct ipoib_txreq, txreq); + + list_del(&txreq->list); + sdma_txclean(txq->priv->dd, &tx->txreq); + dev_kfree_skb_any(tx->skb); + kmem_cache_free(txq->priv->txreq_cache, tx); + atomic64_inc(complete_txreqs); + } + + if (hfi1_ipoib_txreqs(txq->sent_txreqs, atomic64_read(complete_txreqs))) + dd_dev_warn(txq->priv->dd, + "txq %d not empty found %llu requests\n", + txq->q_idx, + hfi1_ipoib_txreqs(txq->sent_txreqs, + atomic64_read(complete_txreqs))); +} + +void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv) +{ + int i; + + for (i = 0; i < priv->netdev->num_tx_queues; i++) { + struct hfi1_ipoib_txq *txq = &priv->txqs[i]; + + iowait_cancel_work(&txq->wait); + iowait_sdma_drain(&txq->wait); + hfi1_ipoib_drain_tx_list(txq); + netif_napi_del(txq->napi); + (void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items); + vfree(txq->tx_ring.items); + } + + kfree(priv->txqs); + priv->txqs = NULL; + + kfree(priv->tx_napis); + priv->tx_napis = NULL; + + kmem_cache_destroy(priv->txreq_cache); + priv->txreq_cache = NULL; +} + +void hfi1_ipoib_napi_tx_enable(struct net_device *dev) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct hfi1_ipoib_txq *txq = &priv->txqs[i]; + + napi_enable(txq->napi); + } +} + +void hfi1_ipoib_napi_tx_disable(struct net_device *dev) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct hfi1_ipoib_txq *txq = &priv->txqs[i]; + + napi_disable(txq->napi); + (void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items); + } +} diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c index db82db497b2c..d61ee853d215 100644 --- a/drivers/infiniband/hw/hfi1/msix.c +++ b/drivers/infiniband/hw/hfi1/msix.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* - * Copyright(c) 2018 Intel Corporation. + * Copyright(c) 2018 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -49,6 +49,7 @@ #include "hfi.h" #include "affinity.h" #include "sdma.h" +#include "netdev.h" /** * msix_initialize() - Calculate, request and configure MSIx IRQs @@ -69,7 +70,7 @@ int msix_initialize(struct hfi1_devdata *dd) * one for each VNIC context * ...any new IRQs should be added here. */ - total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts; + total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_netdev_contexts; if (total >= CCE_NUM_MSIX_VECTORS) return -EINVAL; @@ -140,7 +141,7 @@ static int msix_request_irq(struct hfi1_devdata *dd, void *arg, ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name); if (ret) { dd_dev_err(dd, - "%s: request for IRQ %d failed, MSIx %lu, err %d\n", + "%s: request for IRQ %d failed, MSIx %lx, err %d\n", name, irq, nr, ret); spin_lock(&dd->msix_info.msix_lock); __clear_bit(nr, dd->msix_info.in_use_msix); @@ -160,7 +161,7 @@ static int msix_request_irq(struct hfi1_devdata *dd, void *arg, /* This is a request, so a failure is not fatal */ ret = hfi1_get_irq_affinity(dd, me); if (ret) - dd_dev_err(dd, "unable to pin IRQ %d\n", ret); + dd_dev_err(dd, "%s: unable to pin IRQ %d\n", name, ret); return nr; } @@ -171,7 +172,8 @@ static int msix_request_rcd_irq_common(struct hfi1_ctxtdata *rcd, const char *name) { int nr = msix_request_irq(rcd->dd, rcd, handler, thread, - IRQ_RCVCTXT, name); + rcd->is_vnic ? IRQ_NETDEVCTXT : IRQ_RCVCTXT, + name); if (nr < 0) return nr; @@ -204,6 +206,21 @@ int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd) } /** + * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs + * for netdev context + * @rcd: valid netdev contexti + */ +int msix_netdev_request_rcd_irq(struct hfi1_ctxtdata *rcd) +{ + char name[MAX_NAME_SIZE]; + + snprintf(name, sizeof(name), DRIVER_NAME "_%d nd kctxt%d", + rcd->dd->unit, rcd->ctxt); + return msix_request_rcd_irq_common(rcd, receive_context_interrupt_napi, + NULL, name); +} + +/** * msix_request_smda_ira() - Helper for getting SDMA IRQ resources * @sde: valid sdma engine * @@ -355,15 +372,16 @@ void msix_clean_up_interrupts(struct hfi1_devdata *dd) } /** - * msix_vnic_syncrhonize_irq() - Vnic IRQ synchronize + * msix_netdev_syncrhonize_irq() - netdev IRQ synchronize * @dd: valid devdata */ -void msix_vnic_synchronize_irq(struct hfi1_devdata *dd) +void msix_netdev_synchronize_irq(struct hfi1_devdata *dd) { int i; + int ctxt_count = hfi1_netdev_ctxt_count(dd); - for (i = 0; i < dd->vnic.num_ctxt; i++) { - struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i]; + for (i = 0; i < ctxt_count; i++) { + struct hfi1_ctxtdata *rcd = hfi1_netdev_get_ctxt(dd, i); struct hfi1_msix_entry *me; me = &dd->msix_info.msix_entries[rcd->msix_intr]; diff --git a/drivers/infiniband/hw/hfi1/msix.h b/drivers/infiniband/hw/hfi1/msix.h index 1a02ab7971c8..e63e944bf0fc 100644 --- a/drivers/infiniband/hw/hfi1/msix.h +++ b/drivers/infiniband/hw/hfi1/msix.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ /* - * Copyright(c) 2018 Intel Corporation. + * Copyright(c) 2018 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -59,7 +59,8 @@ int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd); int msix_request_sdma_irq(struct sdma_engine *sde); void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr); -/* VNIC interface */ -void msix_vnic_synchronize_irq(struct hfi1_devdata *dd); +/* Netdev interface */ +void msix_netdev_synchronize_irq(struct hfi1_devdata *dd); +int msix_netdev_request_rcd_irq(struct hfi1_ctxtdata *rcd); #endif diff --git a/drivers/infiniband/hw/hfi1/netdev.h b/drivers/infiniband/hw/hfi1/netdev.h new file mode 100644 index 000000000000..947543a3e0c4 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/netdev.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * Copyright(c) 2020 Intel Corporation. + * + */ + +#ifndef HFI1_NETDEV_H +#define HFI1_NETDEV_H + +#include "hfi.h" + +#include <linux/netdevice.h> +#include <linux/xarray.h> + +/** + * struct hfi1_netdev_rxq - Receive Queue for HFI + * dummy netdev. Both IPoIB and VNIC netdevices will be working on + * top of this device. + * @napi: napi object + * @priv: ptr to netdev_priv + * @rcd: ptr to receive context data + */ +struct hfi1_netdev_rxq { + struct napi_struct napi; + struct hfi1_netdev_priv *priv; + struct hfi1_ctxtdata *rcd; +}; + +/* + * Number of netdev contexts used. Ensure it is less than or equal to + * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE). + */ +#define HFI1_MAX_NETDEV_CTXTS 8 + +/* Number of NETDEV RSM entries */ +#define NUM_NETDEV_MAP_ENTRIES HFI1_MAX_NETDEV_CTXTS + +/** + * struct hfi1_netdev_priv: data required to setup and run HFI netdev. + * @dd: hfi1_devdata + * @rxq: pointer to dummy netdev receive queues. + * @num_rx_q: number of receive queues + * @rmt_index: first free index in RMT Array + * @msix_start: first free MSI-X interrupt vector. + * @dev_tbl: netdev table for unique identifier VNIC and IPoIb VLANs. + * @enabled: atomic counter of netdevs enabling receive queues. + * When 0 NAPI will be disabled. + * @netdevs: atomic counter of netdevs using dummy netdev. + * When 0 receive queues will be freed. + */ +struct hfi1_netdev_priv { + struct hfi1_devdata *dd; + struct hfi1_netdev_rxq *rxq; + int num_rx_q; + int rmt_start; + struct xarray dev_tbl; + /* count of enabled napi polls */ + atomic_t enabled; + /* count of netdevs on top */ + atomic_t netdevs; +}; + +static inline +struct hfi1_netdev_priv *hfi1_netdev_priv(struct net_device *dev) +{ + return (struct hfi1_netdev_priv *)&dev[1]; +} + +static inline +int hfi1_netdev_ctxt_count(struct hfi1_devdata *dd) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + + return priv->num_rx_q; +} + +static inline +struct hfi1_ctxtdata *hfi1_netdev_get_ctxt(struct hfi1_devdata *dd, int ctxt) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + + return priv->rxq[ctxt].rcd; +} + +static inline +int hfi1_netdev_get_free_rmt_idx(struct hfi1_devdata *dd) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + + return priv->rmt_start; +} + +static inline +void hfi1_netdev_set_free_rmt_idx(struct hfi1_devdata *dd, int rmt_idx) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + + priv->rmt_start = rmt_idx; +} + +u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts, + struct cpumask *cpu_mask); + +void hfi1_netdev_enable_queues(struct hfi1_devdata *dd); +void hfi1_netdev_disable_queues(struct hfi1_devdata *dd); +int hfi1_netdev_rx_init(struct hfi1_devdata *dd); +int hfi1_netdev_rx_destroy(struct hfi1_devdata *dd); +int hfi1_netdev_alloc(struct hfi1_devdata *dd); +void hfi1_netdev_free(struct hfi1_devdata *dd); +int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data); +void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id); +void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id); +void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id); + +/* chip.c */ +int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget); + +#endif /* HFI1_NETDEV_H */ diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c new file mode 100644 index 000000000000..63688e85e8da --- /dev/null +++ b/drivers/infiniband/hw/hfi1/netdev_rx.c @@ -0,0 +1,481 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright(c) 2020 Intel Corporation. + * + */ + +/* + * This file contains HFI1 support for netdev RX functionality + */ + +#include "sdma.h" +#include "verbs.h" +#include "netdev.h" +#include "hfi.h" + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <rdma/ib_verbs.h> + +static int hfi1_netdev_setup_ctxt(struct hfi1_netdev_priv *priv, + struct hfi1_ctxtdata *uctxt) +{ + unsigned int rcvctrl_ops; + struct hfi1_devdata *dd = priv->dd; + int ret; + + uctxt->rhf_rcv_function_map = netdev_rhf_rcv_functions; + uctxt->do_interrupt = &handle_receive_interrupt_napi_sp; + + /* Now allocate the RcvHdr queue and eager buffers. */ + ret = hfi1_create_rcvhdrq(dd, uctxt); + if (ret) + goto done; + + ret = hfi1_setup_eagerbufs(uctxt); + if (ret) + goto done; + + clear_rcvhdrtail(uctxt); + + rcvctrl_ops = HFI1_RCVCTRL_CTXT_DIS; + rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_DIS; + + if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR)) + rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; + if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL)) + rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; + if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) + rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; + if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) + rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; + + hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); +done: + return ret; +} + +static int hfi1_netdev_allocate_ctxt(struct hfi1_devdata *dd, + struct hfi1_ctxtdata **ctxt) +{ + struct hfi1_ctxtdata *uctxt; + int ret; + + if (dd->flags & HFI1_FROZEN) + return -EIO; + + ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt); + if (ret < 0) { + dd_dev_err(dd, "Unable to create ctxtdata, failing open\n"); + return -ENOMEM; + } + + uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) | + HFI1_CAP_KGET(NODROP_RHQ_FULL) | + HFI1_CAP_KGET(NODROP_EGR_FULL) | + HFI1_CAP_KGET(DMA_RTAIL); + /* Netdev contexts are always NO_RDMA_RTAIL */ + uctxt->fast_handler = handle_receive_interrupt_napi_fp; + uctxt->slow_handler = handle_receive_interrupt_napi_sp; + hfi1_set_seq_cnt(uctxt, 1); + uctxt->is_vnic = true; + + hfi1_stats.sps_ctxts++; + + dd_dev_info(dd, "created netdev context %d\n", uctxt->ctxt); + *ctxt = uctxt; + + return 0; +} + +static void hfi1_netdev_deallocate_ctxt(struct hfi1_devdata *dd, + struct hfi1_ctxtdata *uctxt) +{ + flush_wc(); + + /* + * Disable receive context and interrupt available, reset all + * RcvCtxtCtrl bits to default values. + */ + hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | + HFI1_RCVCTRL_TIDFLOW_DIS | + HFI1_RCVCTRL_INTRAVAIL_DIS | + HFI1_RCVCTRL_ONE_PKT_EGR_DIS | + HFI1_RCVCTRL_NO_RHQ_DROP_DIS | + HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); + + if (uctxt->msix_intr != CCE_NUM_MSIX_VECTORS) + msix_free_irq(dd, uctxt->msix_intr); + + uctxt->msix_intr = CCE_NUM_MSIX_VECTORS; + uctxt->event_flags = 0; + + hfi1_clear_tids(uctxt); + hfi1_clear_ctxt_pkey(dd, uctxt); + + hfi1_stats.sps_ctxts--; + + hfi1_free_ctxt(uctxt); +} + +static int hfi1_netdev_allot_ctxt(struct hfi1_netdev_priv *priv, + struct hfi1_ctxtdata **ctxt) +{ + int rc; + struct hfi1_devdata *dd = priv->dd; + + rc = hfi1_netdev_allocate_ctxt(dd, ctxt); + if (rc) { + dd_dev_err(dd, "netdev ctxt alloc failed %d\n", rc); + return rc; + } + + rc = hfi1_netdev_setup_ctxt(priv, *ctxt); + if (rc) { + dd_dev_err(dd, "netdev ctxt setup failed %d\n", rc); + hfi1_netdev_deallocate_ctxt(dd, *ctxt); + *ctxt = NULL; + } + + return rc; +} + +/** + * hfi1_num_netdev_contexts - Count of netdev recv contexts to use. + * @dd: device on which to allocate netdev contexts + * @available_contexts: count of available receive contexts + * @cpu_mask: mask of possible cpus to include for contexts + * + * Return: count of physical cores on a node or the remaining available recv + * contexts for netdev recv context usage up to the maximum of + * HFI1_MAX_NETDEV_CTXTS. + * A value of 0 can be returned when acceleration is explicitly turned off, + * a memory allocation error occurs or when there are no available contexts. + * + */ +u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts, + struct cpumask *cpu_mask) +{ + cpumask_var_t node_cpu_mask; + unsigned int available_cpus; + + if (!HFI1_CAP_IS_KSET(AIP)) + return 0; + + /* Always give user contexts priority over netdev contexts */ + if (available_contexts == 0) { + dd_dev_info(dd, "No receive contexts available for netdevs.\n"); + return 0; + } + + if (!zalloc_cpumask_var(&node_cpu_mask, GFP_KERNEL)) { + dd_dev_err(dd, "Unable to allocate cpu_mask for netdevs.\n"); + return 0; + } + + cpumask_and(node_cpu_mask, cpu_mask, + cpumask_of_node(pcibus_to_node(dd->pcidev->bus))); + + available_cpus = cpumask_weight(node_cpu_mask); + + free_cpumask_var(node_cpu_mask); + + return min3(available_cpus, available_contexts, + (u32)HFI1_MAX_NETDEV_CTXTS); +} + +static int hfi1_netdev_rxq_init(struct net_device *dev) +{ + int i; + int rc; + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dev); + struct hfi1_devdata *dd = priv->dd; + + priv->num_rx_q = dd->num_netdev_contexts; + priv->rxq = kcalloc_node(priv->num_rx_q, sizeof(struct hfi1_netdev_rxq), + GFP_KERNEL, dd->node); + + if (!priv->rxq) { + dd_dev_err(dd, "Unable to allocate netdev queue data\n"); + return (-ENOMEM); + } + + for (i = 0; i < priv->num_rx_q; i++) { + struct hfi1_netdev_rxq *rxq = &priv->rxq[i]; + + rc = hfi1_netdev_allot_ctxt(priv, &rxq->rcd); + if (rc) + goto bail_context_irq_failure; + + hfi1_rcd_get(rxq->rcd); + rxq->priv = priv; + rxq->rcd->napi = &rxq->napi; + dd_dev_info(dd, "Setting rcv queue %d napi to context %d\n", + i, rxq->rcd->ctxt); + /* + * Disable BUSY_POLL on this NAPI as this is not supported + * right now. + */ + set_bit(NAPI_STATE_NO_BUSY_POLL, &rxq->napi.state); + netif_napi_add(dev, &rxq->napi, hfi1_netdev_rx_napi, 64); + rc = msix_netdev_request_rcd_irq(rxq->rcd); + if (rc) + goto bail_context_irq_failure; + } + + return 0; + +bail_context_irq_failure: + dd_dev_err(dd, "Unable to allot receive context\n"); + for (; i >= 0; i--) { + struct hfi1_netdev_rxq *rxq = &priv->rxq[i]; + + if (rxq->rcd) { + hfi1_netdev_deallocate_ctxt(dd, rxq->rcd); + hfi1_rcd_put(rxq->rcd); + rxq->rcd = NULL; + } + } + kfree(priv->rxq); + priv->rxq = NULL; + + return rc; +} + +static void hfi1_netdev_rxq_deinit(struct net_device *dev) +{ + int i; + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dev); + struct hfi1_devdata *dd = priv->dd; + + for (i = 0; i < priv->num_rx_q; i++) { + struct hfi1_netdev_rxq *rxq = &priv->rxq[i]; + + netif_napi_del(&rxq->napi); + hfi1_netdev_deallocate_ctxt(dd, rxq->rcd); + hfi1_rcd_put(rxq->rcd); + rxq->rcd = NULL; + } + + kfree(priv->rxq); + priv->rxq = NULL; + priv->num_rx_q = 0; +} + +static void enable_queues(struct hfi1_netdev_priv *priv) +{ + int i; + + for (i = 0; i < priv->num_rx_q; i++) { + struct hfi1_netdev_rxq *rxq = &priv->rxq[i]; + + dd_dev_info(priv->dd, "enabling queue %d on context %d\n", i, + rxq->rcd->ctxt); + napi_enable(&rxq->napi); + hfi1_rcvctrl(priv->dd, + HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB, + rxq->rcd); + } +} + +static void disable_queues(struct hfi1_netdev_priv *priv) +{ + int i; + + msix_netdev_synchronize_irq(priv->dd); + + for (i = 0; i < priv->num_rx_q; i++) { + struct hfi1_netdev_rxq *rxq = &priv->rxq[i]; + + dd_dev_info(priv->dd, "disabling queue %d on context %d\n", i, + rxq->rcd->ctxt); + + /* wait for napi if it was scheduled */ + hfi1_rcvctrl(priv->dd, + HFI1_RCVCTRL_CTXT_DIS | HFI1_RCVCTRL_INTRAVAIL_DIS, + rxq->rcd); + napi_synchronize(&rxq->napi); + napi_disable(&rxq->napi); + } +} + +/** + * hfi1_netdev_rx_init - Incrememnts netdevs counter. When called first time, + * it allocates receive queue data and calls netif_napi_add + * for each queue. + * + * @dd: hfi1 dev data + */ +int hfi1_netdev_rx_init(struct hfi1_devdata *dd) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + int res; + + if (atomic_fetch_inc(&priv->netdevs)) + return 0; + + mutex_lock(&hfi1_mutex); + init_dummy_netdev(dd->dummy_netdev); + res = hfi1_netdev_rxq_init(dd->dummy_netdev); + mutex_unlock(&hfi1_mutex); + return res; +} + +/** + * hfi1_netdev_rx_destroy - Decrements netdevs counter, when it reaches 0 + * napi is deleted and receive queses memory is freed. + * + * @dd: hfi1 dev data + */ +int hfi1_netdev_rx_destroy(struct hfi1_devdata *dd) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + + /* destroy the RX queues only if it is the last netdev going away */ + if (atomic_fetch_add_unless(&priv->netdevs, -1, 0) == 1) { + mutex_lock(&hfi1_mutex); + hfi1_netdev_rxq_deinit(dd->dummy_netdev); + mutex_unlock(&hfi1_mutex); + } + + return 0; +} + +/** + * hfi1_netdev_alloc - Allocates netdev and private data. It is required + * because RMT index and MSI-X interrupt can be set only + * during driver initialization. + * + * @dd: hfi1 dev data + */ +int hfi1_netdev_alloc(struct hfi1_devdata *dd) +{ + struct hfi1_netdev_priv *priv; + const int netdev_size = sizeof(*dd->dummy_netdev) + + sizeof(struct hfi1_netdev_priv); + + dd_dev_info(dd, "allocating netdev size %d\n", netdev_size); + dd->dummy_netdev = kcalloc_node(1, netdev_size, GFP_KERNEL, dd->node); + + if (!dd->dummy_netdev) + return -ENOMEM; + + priv = hfi1_netdev_priv(dd->dummy_netdev); + priv->dd = dd; + xa_init(&priv->dev_tbl); + atomic_set(&priv->enabled, 0); + atomic_set(&priv->netdevs, 0); + + return 0; +} + +void hfi1_netdev_free(struct hfi1_devdata *dd) +{ + if (dd->dummy_netdev) { + dd_dev_info(dd, "hfi1 netdev freed\n"); + free_netdev(dd->dummy_netdev); + dd->dummy_netdev = NULL; + } +} + +/** + * hfi1_netdev_enable_queues - This is napi enable function. + * It enables napi objects associated with queues. + * When at least one device has called it it increments atomic counter. + * Disable function decrements counter and when it is 0, + * calls napi_disable for every queue. + * + * @dd: hfi1 dev data + */ +void hfi1_netdev_enable_queues(struct hfi1_devdata *dd) +{ + struct hfi1_netdev_priv *priv; + + if (!dd->dummy_netdev) + return; + + priv = hfi1_netdev_priv(dd->dummy_netdev); + if (atomic_fetch_inc(&priv->enabled)) + return; + + mutex_lock(&hfi1_mutex); + enable_queues(priv); + mutex_unlock(&hfi1_mutex); +} + +void hfi1_netdev_disable_queues(struct hfi1_devdata *dd) +{ + struct hfi1_netdev_priv *priv; + + if (!dd->dummy_netdev) + return; + + priv = hfi1_netdev_priv(dd->dummy_netdev); + if (atomic_dec_if_positive(&priv->enabled)) + return; + + mutex_lock(&hfi1_mutex); + disable_queues(priv); + mutex_unlock(&hfi1_mutex); +} + +/** + * hfi1_netdev_add_data - Registers data with unique identifier + * to be requested later this is needed for VNIC and IPoIB VLANs + * implementations. + * This call is protected by mutex idr_lock. + * + * @dd: hfi1 dev data + * @id: requested integer id up to INT_MAX + * @data: data to be associated with index + */ +int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + + return xa_insert(&priv->dev_tbl, id, data, GFP_NOWAIT); +} + +/** + * hfi1_netdev_remove_data - Removes data with previously given id. + * Returns the reference to removed entry. + * + * @dd: hfi1 dev data + * @id: requested integer id up to INT_MAX + */ +void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + + return xa_erase(&priv->dev_tbl, id); +} + +/** + * hfi1_netdev_get_data - Gets data with given id + * + * @dd: hfi1 dev data + * @id: requested integer id up to INT_MAX + */ +void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + + return xa_load(&priv->dev_tbl, id); +} + +/** + * hfi1_netdev_get_first_dat - Gets first entry with greater or equal id. + * + * @dd: hfi1 dev data + * @id: requested integer id up to INT_MAX + */ +void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id) +{ + struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev); + unsigned long index = *start_id; + void *ret; + + ret = xa_find(&priv->dev_tbl, &index, UINT_MAX, XA_PRESENT); + *start_id = (int)index; + return ret; +} diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index f8e733aa3bb8..0c2ae9f7b3e8 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2019 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -186,15 +186,6 @@ static void flush_iowait(struct rvt_qp *qp) write_sequnlock_irqrestore(lock, flags); } -static inline int opa_mtu_enum_to_int(int mtu) -{ - switch (mtu) { - case OPA_MTU_8192: return 8192; - case OPA_MTU_10240: return 10240; - default: return -1; - } -} - /** * This function is what we would push to the core layer if we wanted to be a * "first class citizen". Instead we hide this here and rely on Verbs ULPs @@ -202,15 +193,10 @@ static inline int opa_mtu_enum_to_int(int mtu) */ static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) { - int val; - /* Constraining 10KB packets to 8KB packets */ if (mtu == (enum ib_mtu)OPA_MTU_10240) mtu = OPA_MTU_8192; - val = opa_mtu_enum_to_int((int)mtu); - if (val > 0) - return val; - return ib_mtu_enum_to_int(mtu); + return opa_mtu_enum_to_int((enum opa_mtu)mtu); } int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 8a2e0d9351e9..243b4ba0b6f6 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* - * Copyright(c) 2018 Intel Corporation. + * Copyright(c) 2018 - 2020 Intel Corporation. * */ @@ -194,7 +194,7 @@ void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p) { struct hfi1_qp_priv *priv = qp->priv; - p->qp = (kdeth_qp << 16) | priv->rcd->ctxt; + p->qp = (RVT_KDETH_QP_PREFIX << 16) | priv->rcd->ctxt; p->max_len = TID_RDMA_MAX_SEGMENT_SIZE; p->jkey = priv->rcd->jkey; p->max_read = TID_RDMA_MAX_READ_SEGS_PER_REQ; diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 9a3d236bcc88..b219ea90fd6f 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -47,6 +47,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" #include "exp_rcv.h" +#include "ipoib.h" static u8 __get_ib_hdr_len(struct ib_header *hdr) { @@ -126,6 +127,7 @@ const char *hfi1_trace_get_packet_l2_str(u8 l2) #define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x" #define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x" #define DETH_PRN "deth qkey:0x%.8x sqpn:0x%.6x" +#define DETH_ENTROPY_PRN "deth qkey:0x%.8x sqpn:0x%.6x entropy:0x%.2x" #define IETH_PRN "ieth rkey:0x%.8x" #define ATOMICACKETH_PRN "origdata:%llx" #define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx" @@ -444,6 +446,12 @@ const char *parse_everbs_hdrs( break; /* deth */ case OP(UD, SEND_ONLY): + trace_seq_printf(p, DETH_ENTROPY_PRN, + be32_to_cpu(eh->ud.deth[0]), + be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK, + be32_to_cpu(eh->ud.deth[1]) >> + HFI1_IPOIB_ENTROPY_SHIFT); + break; case OP(UD, SEND_ONLY_WITH_IMMEDIATE): trace_seq_printf(p, DETH_PRN, be32_to_cpu(eh->ud.deth[0]), @@ -512,6 +520,38 @@ u16 hfi1_trace_get_tid_idx(u32 ent) return EXP_TID_GET(ent, IDX); } +struct hfi1_ctxt_hist { + atomic_t count; + atomic_t data[255]; +}; + +struct hfi1_ctxt_hist hist = { + .count = ATOMIC_INIT(0) +}; + +const char *hfi1_trace_print_rsm_hist(struct trace_seq *p, unsigned int ctxt) +{ + int i, len = ARRAY_SIZE(hist.data); + const char *ret = trace_seq_buffer_ptr(p); + unsigned long packet_count = atomic_fetch_inc(&hist.count); + + trace_seq_printf(p, "packet[%lu]", packet_count); + for (i = 0; i < len; ++i) { + unsigned long val; + atomic_t *count = &hist.data[i]; + + if (ctxt == i) + val = atomic_fetch_inc(count); + else + val = atomic_read(count); + + if (val) + trace_seq_printf(p, "(%d:%lu)", i, val); + } + trace_seq_putc(p, 0); + return ret; +} + __hfi1_trace_fn(AFFINITY); __hfi1_trace_fn(PKT); __hfi1_trace_fn(PROC); diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h index b5fc5c6cd52f..d8c168dc3ea8 100644 --- a/drivers/infiniband/hw/hfi1/trace_ctxts.h +++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h @@ -1,5 +1,5 @@ /* -* Copyright(c) 2015, 2016 Intel Corporation. +* Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -138,6 +138,15 @@ TRACE_EVENT(hfi1_ctxt_info, ) ); +const char *hfi1_trace_print_rsm_hist(struct trace_seq *p, unsigned int ctxt); +TRACE_EVENT(ctxt_rsm_hist, + TP_PROTO(unsigned int ctxt), + TP_ARGS(ctxt), + TP_STRUCT__entry(__field(unsigned int, ctxt)), + TP_fast_assign(__entry->ctxt = ctxt;), + TP_printk("%s", hfi1_trace_print_rsm_hist(p, __entry->ctxt)) +); + #endif /* __HFI1_TRACE_CTXTS_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 2f6323ad9c59..30865635b449 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -66,6 +66,7 @@ #include "vnic.h" #include "fault.h" #include "affinity.h" +#include "ipoib.h" static unsigned int hfi1_lkey_table_size = 16; module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, @@ -1342,7 +1343,7 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd) IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE | IB_DEVICE_MEM_MGT_EXTENSIONS | - IB_DEVICE_RDMA_NETDEV_OPA_VNIC; + IB_DEVICE_RDMA_NETDEV_OPA; rdi->dparms.props.page_size_cap = PAGE_SIZE; rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; rdi->dparms.props.vendor_part_id = dd->pcidev->device; @@ -1360,7 +1361,6 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd) rdi->dparms.props.max_cq = hfi1_max_cqs; rdi->dparms.props.max_ah = hfi1_max_ahs; rdi->dparms.props.max_cqe = hfi1_max_cqes; - rdi->dparms.props.max_map_per_fmr = 32767; rdi->dparms.props.max_pd = hfi1_max_pds; rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC; rdi->dparms.props.max_qp_init_rd_atom = 255; @@ -1439,6 +1439,8 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, 4096 : hfi1_max_mtu), IB_MTU_4096); props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : mtu_to_enum(ppd->ibmtu, IB_MTU_4096); + props->phys_mtu = HFI1_CAP_IS_KSET(AIP) ? hfi1_max_mtu : + ib_mtu_enum_to_int(props->max_mtu); return 0; } @@ -1793,6 +1795,7 @@ static const struct ib_device_ops hfi1_dev_ops = { .modify_device = modify_device, /* keep process mad in the driver */ .process_mad = hfi1_process_mad, + .rdma_netdev_get_params = hfi1_ipoib_rn_get_params, }; /** @@ -1863,9 +1866,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.dparms.qpn_start = 0; dd->verbs_dev.rdi.dparms.qpn_inc = 1; dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift; - dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16; - dd->verbs_dev.rdi.dparms.qpn_res_end = - dd->verbs_dev.rdi.dparms.qpn_res_start + 65535; + dd->verbs_dev.rdi.dparms.qpn_res_start = RVT_KDETH_QP_BASE; + dd->verbs_dev.rdi.dparms.qpn_res_end = RVT_AIP_QP_MAX; dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC; dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK; dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT; diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h index 5ae781514e32..66150a13f374 100644 --- a/drivers/infiniband/hw/hfi1/vnic.h +++ b/drivers/infiniband/hw/hfi1/vnic.h @@ -1,7 +1,7 @@ #ifndef _HFI1_VNIC_H #define _HFI1_VNIC_H /* - * Copyright(c) 2017 Intel Corporation. + * Copyright(c) 2017 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -69,6 +69,7 @@ #define HFI1_VNIC_SC_SHIFT 4 #define HFI1_VNIC_MAX_QUEUE 16 +#define HFI1_NUM_VNIC_CTXT 8 /** * struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information @@ -104,7 +105,6 @@ struct hfi1_vnic_rx_queue { struct hfi1_vnic_vport_info *vinfo; struct net_device *netdev; struct napi_struct napi; - struct sk_buff_head skbq; }; /** @@ -146,7 +146,6 @@ struct hfi1_vnic_vport_info { /* vnic hfi1 internal functions */ void hfi1_vnic_setup(struct hfi1_devdata *dd); -void hfi1_vnic_cleanup(struct hfi1_devdata *dd); int hfi1_vnic_txreq_init(struct hfi1_devdata *dd); void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 6b14581b9965..a90824de0f57 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2017 - 2018 Intel Corporation. + * Copyright(c) 2017 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -53,6 +53,7 @@ #include <linux/if_vlan.h> #include "vnic.h" +#include "netdev.h" #define HFI_TX_TIMEOUT_MS 1000 @@ -62,114 +63,6 @@ static DEFINE_SPINLOCK(vport_cntr_lock); -static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) -{ - unsigned int rcvctrl_ops = 0; - int ret; - - uctxt->do_interrupt = &handle_receive_interrupt; - - /* Now allocate the RcvHdr queue and eager buffers. */ - ret = hfi1_create_rcvhdrq(dd, uctxt); - if (ret) - goto done; - - ret = hfi1_setup_eagerbufs(uctxt); - if (ret) - goto done; - - if (hfi1_rcvhdrtail_kvaddr(uctxt)) - clear_rcvhdrtail(uctxt); - - rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; - rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB; - - if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR)) - rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; - if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL)) - rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; - if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) - rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; - if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) - rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; - - hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); -done: - return ret; -} - -static int allocate_vnic_ctxt(struct hfi1_devdata *dd, - struct hfi1_ctxtdata **vnic_ctxt) -{ - struct hfi1_ctxtdata *uctxt; - int ret; - - if (dd->flags & HFI1_FROZEN) - return -EIO; - - ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt); - if (ret < 0) { - dd_dev_err(dd, "Unable to create ctxtdata, failing open\n"); - return -ENOMEM; - } - - uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) | - HFI1_CAP_KGET(NODROP_RHQ_FULL) | - HFI1_CAP_KGET(NODROP_EGR_FULL) | - HFI1_CAP_KGET(DMA_RTAIL); - uctxt->seq_cnt = 1; - uctxt->is_vnic = true; - - msix_request_rcd_irq(uctxt); - - hfi1_stats.sps_ctxts++; - dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); - *vnic_ctxt = uctxt; - - return 0; -} - -static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, - struct hfi1_ctxtdata *uctxt) -{ - dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); - flush_wc(); - - /* - * Disable receive context and interrupt available, reset all - * RcvCtxtCtrl bits to default values. - */ - hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | - HFI1_RCVCTRL_TIDFLOW_DIS | - HFI1_RCVCTRL_INTRAVAIL_DIS | - HFI1_RCVCTRL_ONE_PKT_EGR_DIS | - HFI1_RCVCTRL_NO_RHQ_DROP_DIS | - HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); - - /* msix_intr will always be > 0, only clean up if this is true */ - if (uctxt->msix_intr) - msix_free_irq(dd, uctxt->msix_intr); - - uctxt->event_flags = 0; - - hfi1_clear_tids(uctxt); - hfi1_clear_ctxt_pkey(dd, uctxt); - - hfi1_stats.sps_ctxts--; - - hfi1_free_ctxt(uctxt); -} - -void hfi1_vnic_setup(struct hfi1_devdata *dd) -{ - xa_init(&dd->vnic.vesws); -} - -void hfi1_vnic_cleanup(struct hfi1_devdata *dd) -{ - WARN_ON(!xa_empty(&dd->vnic.vesws)); -} - #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \ u64 *src64, *dst64; \ for (src64 = &qstats->x_grp.unicast, \ @@ -179,6 +72,9 @@ void hfi1_vnic_cleanup(struct hfi1_devdata *dd) } \ } while (0) +#define VNIC_MASK (0xFF) +#define VNIC_ID(val) ((1ull << 24) | ((val) & VNIC_MASK)) + /* hfi1_vnic_update_stats - update statistics */ static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo, struct opa_vnic_stats *stats) @@ -454,71 +350,25 @@ static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq, return rc; } -static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq) +static struct hfi1_vnic_vport_info *get_vnic_port(struct hfi1_devdata *dd, + int vesw_id) { - unsigned char *pad_info; - struct sk_buff *skb; + int vnic_id = VNIC_ID(vesw_id); - skb = skb_dequeue(&rxq->skbq); - if (unlikely(!skb)) - return NULL; - - /* remove tail padding and icrc */ - pad_info = skb->data + skb->len - 1; - skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN - - ((*pad_info) & 0x7))); - - return skb; + return hfi1_netdev_get_data(dd, vnic_id); } -/* hfi1_vnic_handle_rx - handle skb receive */ -static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq, - int *work_done, int work_to_do) +static struct hfi1_vnic_vport_info *get_first_vnic_port(struct hfi1_devdata *dd) { - struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; - struct sk_buff *skb; - int rc; - - while (1) { - if (*work_done >= work_to_do) - break; - - skb = hfi1_vnic_get_skb(rxq); - if (unlikely(!skb)) - break; - - rc = hfi1_vnic_decap_skb(rxq, skb); - /* update rx counters */ - hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc); - if (unlikely(rc)) { - dev_kfree_skb_any(skb); - continue; - } - - skb_checksum_none_assert(skb); - skb->protocol = eth_type_trans(skb, rxq->netdev); - - napi_gro_receive(&rxq->napi, skb); - (*work_done)++; - } -} - -/* hfi1_vnic_napi - napi receive polling callback function */ -static int hfi1_vnic_napi(struct napi_struct *napi, int budget) -{ - struct hfi1_vnic_rx_queue *rxq = container_of(napi, - struct hfi1_vnic_rx_queue, napi); - struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; - int work_done = 0; + struct hfi1_vnic_vport_info *vinfo; + int next_id = VNIC_ID(0); - v_dbg("napi %d budget %d\n", rxq->idx, budget); - hfi1_vnic_handle_rx(rxq, &work_done, budget); + vinfo = hfi1_netdev_get_first_data(dd, &next_id); - v_dbg("napi %d work_done %d\n", rxq->idx, work_done); - if (work_done < budget) - napi_complete(napi); + if (next_id > VNIC_ID(VNIC_MASK)) + return NULL; - return work_done; + return vinfo; } void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) @@ -527,13 +377,14 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) struct hfi1_vnic_vport_info *vinfo = NULL; struct hfi1_vnic_rx_queue *rxq; struct sk_buff *skb; - int l4_type, vesw_id = -1; + int l4_type, vesw_id = -1, rc; u8 q_idx; + unsigned char *pad_info; l4_type = hfi1_16B_get_l4(packet->ebuf); if (likely(l4_type == OPA_16B_L4_ETHR)) { vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf); - vinfo = xa_load(&dd->vnic.vesws, vesw_id); + vinfo = get_vnic_port(dd, vesw_id); /* * In case of invalid vesw id, count the error on @@ -541,10 +392,8 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) */ if (unlikely(!vinfo)) { struct hfi1_vnic_vport_info *vinfo_tmp; - unsigned long index = 0; - vinfo_tmp = xa_find(&dd->vnic.vesws, &index, ULONG_MAX, - XA_PRESENT); + vinfo_tmp = get_first_vnic_port(dd); if (vinfo_tmp) { spin_lock(&vport_cntr_lock); vinfo_tmp->stats[0].netstats.rx_nohandler++; @@ -563,12 +412,6 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) rxq = &vinfo->rxq[q_idx]; if (unlikely(!netif_oper_up(vinfo->netdev))) { vinfo->stats[q_idx].rx_drop_state++; - skb_queue_purge(&rxq->skbq); - return; - } - - if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) { - vinfo->stats[q_idx].netstats.rx_fifo_errors++; return; } @@ -580,62 +423,65 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) memcpy(skb->data, packet->ebuf, packet->tlen); skb_put(skb, packet->tlen); - skb_queue_tail(&rxq->skbq, skb); - if (napi_schedule_prep(&rxq->napi)) { - v_dbg("napi %d scheduling\n", q_idx); - __napi_schedule(&rxq->napi); + pad_info = skb->data + skb->len - 1; + skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN - + ((*pad_info) & 0x7))); + + rc = hfi1_vnic_decap_skb(rxq, skb); + + /* update rx counters */ + hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc); + if (unlikely(rc)) { + dev_kfree_skb_any(skb); + return; } + + skb_checksum_none_assert(skb); + skb->protocol = eth_type_trans(skb, rxq->netdev); + + napi_gro_receive(&rxq->napi, skb); } static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo) { struct hfi1_devdata *dd = vinfo->dd; struct net_device *netdev = vinfo->netdev; - int i, rc; + int rc; /* ensure virtual eth switch id is valid */ if (!vinfo->vesw_id) return -EINVAL; - rc = xa_insert(&dd->vnic.vesws, vinfo->vesw_id, vinfo, GFP_KERNEL); + rc = hfi1_netdev_add_data(dd, VNIC_ID(vinfo->vesw_id), vinfo); if (rc < 0) return rc; - for (i = 0; i < vinfo->num_rx_q; i++) { - struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; - - skb_queue_head_init(&rxq->skbq); - napi_enable(&rxq->napi); - } + rc = hfi1_netdev_rx_init(dd); + if (rc) + goto err_remove; netif_carrier_on(netdev); netif_tx_start_all_queues(netdev); set_bit(HFI1_VNIC_UP, &vinfo->flags); return 0; + +err_remove: + hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id)); + return rc; } static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo) { struct hfi1_devdata *dd = vinfo->dd; - u8 i; clear_bit(HFI1_VNIC_UP, &vinfo->flags); netif_carrier_off(vinfo->netdev); netif_tx_disable(vinfo->netdev); - xa_erase(&dd->vnic.vesws, vinfo->vesw_id); - - /* ensure irqs see the change */ - msix_vnic_synchronize_irq(dd); + hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id)); - /* remove unread skbs */ - for (i = 0; i < vinfo->num_rx_q; i++) { - struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; - - napi_disable(&rxq->napi); - skb_queue_purge(&rxq->skbq); - } + hfi1_netdev_rx_destroy(dd); } static int hfi1_netdev_open(struct net_device *netdev) @@ -660,70 +506,31 @@ static int hfi1_netdev_close(struct net_device *netdev) return 0; } -static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd, - struct hfi1_ctxtdata **vnic_ctxt) -{ - int rc; - - rc = allocate_vnic_ctxt(dd, vnic_ctxt); - if (rc) { - dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc); - return rc; - } - - rc = setup_vnic_ctxt(dd, *vnic_ctxt); - if (rc) { - dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc); - deallocate_vnic_ctxt(dd, *vnic_ctxt); - *vnic_ctxt = NULL; - } - - return rc; -} - static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) { struct hfi1_devdata *dd = vinfo->dd; - int i, rc = 0; + int rc = 0; mutex_lock(&hfi1_mutex); - if (!dd->vnic.num_vports) { + if (!dd->vnic_num_vports) { rc = hfi1_vnic_txreq_init(dd); if (rc) goto txreq_fail; } - for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) { - rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]); - if (rc) - break; - hfi1_rcd_get(dd->vnic.ctxt[i]); - dd->vnic.ctxt[i]->vnic_q_idx = i; - } - - if (i < vinfo->num_rx_q) { - /* - * If required amount of contexts is not - * allocated successfully then remaining contexts - * are released. - */ - while (i-- > dd->vnic.num_ctxt) { - deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); - hfi1_rcd_put(dd->vnic.ctxt[i]); - dd->vnic.ctxt[i] = NULL; - } + rc = hfi1_netdev_rx_init(dd); + if (rc) { + dd_dev_err(dd, "Unable to initialize netdev contexts\n"); goto alloc_fail; } - if (dd->vnic.num_ctxt != i) { - dd->vnic.num_ctxt = i; - hfi1_init_vnic_rsm(dd); - } + hfi1_init_vnic_rsm(dd); - dd->vnic.num_vports++; + dd->vnic_num_vports++; hfi1_vnic_sdma_init(vinfo); + alloc_fail: - if (!dd->vnic.num_vports) + if (!dd->vnic_num_vports) hfi1_vnic_txreq_deinit(dd); txreq_fail: mutex_unlock(&hfi1_mutex); @@ -733,20 +540,14 @@ txreq_fail: static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo) { struct hfi1_devdata *dd = vinfo->dd; - int i; mutex_lock(&hfi1_mutex); - if (--dd->vnic.num_vports == 0) { - for (i = 0; i < dd->vnic.num_ctxt; i++) { - deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); - hfi1_rcd_put(dd->vnic.ctxt[i]); - dd->vnic.ctxt[i] = NULL; - } + if (--dd->vnic_num_vports == 0) { hfi1_deinit_vnic_rsm(dd); - dd->vnic.num_ctxt = 0; hfi1_vnic_txreq_deinit(dd); } mutex_unlock(&hfi1_mutex); + hfi1_netdev_rx_destroy(dd); } static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id) @@ -804,7 +605,7 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, struct rdma_netdev *rn; int i, size, rc; - if (!dd->num_vnic_contexts) + if (!dd->num_netdev_contexts) return ERR_PTR(-ENOMEM); if (!port_num || (port_num > dd->num_pports)) @@ -815,15 +616,16 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, - dd->num_sdma, dd->num_vnic_contexts); + chip_sdma_engines(dd), + dd->num_netdev_contexts); if (!netdev) return ERR_PTR(-ENOMEM); rn = netdev_priv(netdev); vinfo = opa_vnic_dev_priv(netdev); vinfo->dd = dd; - vinfo->num_tx_q = dd->num_sdma; - vinfo->num_rx_q = dd->num_vnic_contexts; + vinfo->num_tx_q = chip_sdma_engines(dd); + vinfo->num_rx_q = dd->num_netdev_contexts; vinfo->netdev = netdev; rn->free_rdma_netdev = hfi1_vnic_free_rn; rn->set_id = hfi1_vnic_set_vesw_id; @@ -841,7 +643,6 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, rxq->idx = i; rxq->vinfo = vinfo; rxq->netdev = netdev; - netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64); } rc = hfi1_vnic_init(vinfo); |