25 files changed, 2631 insertions, 459 deletions
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index 0405d26d0833..2e89ec10efed 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -22,9 +22,13 @@ hfi1-y := \
 	init.o \
 	intr.o \
 	iowait.o \
+	ipoib_main.o \
+	ipoib_rx.o \
+	ipoib_tx.o \
 	mad.o \
 	mmu_rb.o \
 	msix.o \
+	netdev_rx.o \
 	opfn.o \
 	pcie.o \
 	pio.o \
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 1aeea5d65c01..2a91b8d95e12 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -64,6 +64,7 @@ struct hfi1_affinity_node_list node_affinity = {
 static const char * const irq_type_names[] = {
 	"SDMA",
 	"RCVCTXT",
+	"NETDEVCTXT",
 	"GENERAL",
 	"OTHER",
 };
@@ -915,6 +916,11 @@ static int get_irq_affinity(struct hfi1_devdata *dd,
 			set = &entry->rcv_intr;
 		scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
 		break;
+	case IRQ_NETDEVCTXT:
+		rcd = (struct hfi1_ctxtdata *)msix->arg;
+		set = &entry->def_intr;
+		scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
+		break;
 	default:
 		dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type);
 		return -EINVAL;
@@ -987,6 +993,10 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
 		if (rcd->ctxt != HFI1_CTRL_CTXT)
 			set = &entry->rcv_intr;
 		break;
+	case IRQ_NETDEVCTXT:
+		rcd = (struct hfi1_ctxtdata *)msix->arg;
+		set = &entry->def_intr;
+		break;
 	default:
 		mutex_unlock(&node_affinity.lock);
 		return;
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index 6a7e6ea4e426..f94ed5d7c7a3 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -52,6 +52,7 @@
 enum irq_type {
 	IRQ_SDMA,
 	IRQ_RCVCTXT,
+	IRQ_NETDEVCTXT,
 	IRQ_GENERAL,
 	IRQ_OTHER
 };
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index e0b1238d31df..15f9c635f292 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -66,10 +66,7 @@
 #include "affinity.h"
 #include "debugfs.h"
 #include "fault.h"
-
-uint kdeth_qp;
-module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
-MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
+#include "netdev.h"
 
 uint num_vls = HFI1_MAX_VLS_SUPPORTED;
 module_param(num_vls, uint, S_IRUGO);
@@ -128,13 +125,15 @@ struct flag_table {
 
 /*
  * RSM instance allocation
- *   0 - Verbs
- *   1 - User Fecn Handling
- *   2 - Vnic
+ *   0 - User Fecn Handling
+ *   1 - Vnic
+ *   2 - AIP
+ *   3 - Verbs
  */
-#define RSM_INS_VERBS             0
-#define RSM_INS_FECN              1
-#define RSM_INS_VNIC              2
+#define RSM_INS_FECN              0
+#define RSM_INS_VNIC              1
+#define RSM_INS_AIP               2
+#define RSM_INS_VERBS             3
 
 /* Bit offset into the GUID which carries HFI id information */
 #define GUID_HFI_INDEX_SHIFT     39
@@ -175,6 +174,25 @@ struct flag_table {
 /* QPN[m+n:1] QW 1, OFFSET 1 */
 #define QPN_SELECT_OFFSET      ((1ull << QW_SHIFT) | (1ull))
 
+/* RSM fields for AIP */
+/* LRH.BTH above is reused for this rule */
+
+/* BTH.DESTQP: QW 1, OFFSET 16 for match */
+#define BTH_DESTQP_QW           1ull
+#define BTH_DESTQP_BIT_OFFSET   16ull
+#define BTH_DESTQP_OFFSET(off) ((BTH_DESTQP_QW << QW_SHIFT) | (off))
+#define BTH_DESTQP_MATCH_OFFSET BTH_DESTQP_OFFSET(BTH_DESTQP_BIT_OFFSET)
+#define BTH_DESTQP_MASK         0xFFull
+#define BTH_DESTQP_VALUE        0x81ull
+
+/* DETH.SQPN: QW 1 Offset 56 for select */
+/* We use 8 most significant Soure QPN bits as entropy fpr AIP */
+#define DETH_AIP_SQPN_QW 3ull
+#define DETH_AIP_SQPN_BIT_OFFSET 56ull
+#define DETH_AIP_SQPN_OFFSET(off) ((DETH_AIP_SQPN_QW << QW_SHIFT) | (off))
+#define DETH_AIP_SQPN_SELECT_OFFSET \
+	DETH_AIP_SQPN_OFFSET(DETH_AIP_SQPN_BIT_OFFSET)
+
 /* RSM fields for Vnic */
 /* L2_TYPE: QW 0, OFFSET 61 - for match */
 #define L2_TYPE_QW             0ull
@@ -8463,6 +8481,49 @@ static void hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
 	local_irq_restore(flags);
 }
 
+/**
+ * hfi1_netdev_rx_napi - napi poll function to move eoi inline
+ * @napi - pointer to napi object
+ * @budget - netdev budget
+ */
+int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget)
+{
+	struct hfi1_netdev_rxq *rxq = container_of(napi,
+			struct hfi1_netdev_rxq, napi);
+	struct hfi1_ctxtdata *rcd = rxq->rcd;
+	int work_done = 0;
+
+	work_done = rcd->do_interrupt(rcd, budget);
+
+	if (work_done < budget) {
+		napi_complete_done(napi, work_done);
+		hfi1_rcd_eoi_intr(rcd);
+	}
+
+	return work_done;
+}
+
+/* Receive packet napi handler for netdevs VNIC and AIP  */
+irqreturn_t receive_context_interrupt_napi(int irq, void *data)
+{
+	struct hfi1_ctxtdata *rcd = data;
+
+	receive_interrupt_common(rcd);
+
+	if (likely(rcd->napi)) {
+		if (likely(napi_schedule_prep(rcd->napi)))
+			__napi_schedule_irqoff(rcd->napi);
+		else
+			__hfi1_rcd_eoi_intr(rcd);
+	} else {
+		WARN_ONCE(1, "Napi IRQ handler without napi set up ctxt=%d\n",
+			  rcd->ctxt);
+		__hfi1_rcd_eoi_intr(rcd);
+	}
+
+	return IRQ_HANDLED;
+}
+
 /*
  * Receive packet IRQ handler.  This routine expects to be on its own IRQ.
  * This routine will try to handle packets immediately (latency), but if
@@ -13330,13 +13391,12 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
  *                             in array of contexts
  *	freectxts  - number of free user contexts
  *	num_send_contexts - number of PIO send contexts being used
- *	num_vnic_contexts - number of contexts reserved for VNIC
+ *	num_netdev_contexts - number of contexts reserved for netdev
  */
 static int set_up_context_variables(struct hfi1_devdata *dd)
 {
 	unsigned long num_kernel_contexts;
-	u16 num_vnic_contexts = HFI1_NUM_VNIC_CTXT;
-	int total_contexts;
+	u16 num_netdev_contexts;
 	int ret;
 	unsigned ngroups;
 	int rmt_count;
@@ -13373,13 +13433,6 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
 		num_kernel_contexts = send_contexts - num_vls - 1;
 	}
 
-	/* Accommodate VNIC contexts if possible */
-	if ((num_kernel_contexts + num_vnic_contexts) > rcv_contexts) {
-		dd_dev_err(dd, "No receive contexts available for VNIC\n");
-		num_vnic_contexts = 0;
-	}
-	total_contexts = num_kernel_contexts + num_vnic_contexts;
-
 	/*
 	 * User contexts:
 	 *	- default to 1 user context per real (non-HT) CPU core if
@@ -13392,28 +13445,32 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
 	/*
 	 * Adjust the counts given a global max.
 	 */
-	if (total_contexts + n_usr_ctxts > rcv_contexts) {
+	if (num_kernel_contexts + n_usr_ctxts > rcv_contexts) {
 		dd_dev_err(dd,
-			   "Reducing # user receive contexts to: %d, from %u\n",
-			   rcv_contexts - total_contexts,
+			   "Reducing # user receive contexts to: %u, from %u\n",
+			   (u32)(rcv_contexts - num_kernel_contexts),
 			   n_usr_ctxts);
 		/* recalculate */
-		n_usr_ctxts = rcv_contexts - total_contexts;
+		n_usr_ctxts = rcv_contexts - num_kernel_contexts;
 	}
 
+	num_netdev_contexts =
+		hfi1_num_netdev_contexts(dd, rcv_contexts -
+					 (num_kernel_contexts + n_usr_ctxts),
+					 &node_affinity.real_cpu_mask);
 	/*
 	 * The RMT entries are currently allocated as shown below:
 	 * 1. QOS (0 to 128 entries);
 	 * 2. FECN (num_kernel_context - 1 + num_user_contexts +
-	 *    num_vnic_contexts);
-	 * 3. VNIC (num_vnic_contexts).
-	 * It should be noted that FECN oversubscribe num_vnic_contexts
-	 * entries of RMT because both VNIC and PSM could allocate any receive
+	 *    num_netdev_contexts);
+	 * 3. netdev (num_netdev_contexts).
+	 * It should be noted that FECN oversubscribe num_netdev_contexts
+	 * entries of RMT because both netdev and PSM could allocate any receive
 	 * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts,
 	 * and PSM FECN must reserve an RMT entry for each possible PSM receive
 	 * context.
 	 */
-	rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2);
+	rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_netdev_contexts * 2);
 	if (HFI1_CAP_IS_KSET(TID_RDMA))
 		rmt_count += num_kernel_contexts - 1;
 	if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
@@ -13426,21 +13483,20 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
 		n_usr_ctxts = user_rmt_reduced;
 	}
 
-	total_contexts += n_usr_ctxts;
-
-	/* the first N are kernel contexts, the rest are user/vnic contexts */
-	dd->num_rcv_contexts = total_contexts;
+	/* the first N are kernel contexts, the rest are user/netdev contexts */
+	dd->num_rcv_contexts =
+		num_kernel_contexts + n_usr_ctxts + num_netdev_contexts;
 	dd->n_krcv_queues = num_kernel_contexts;
 	dd->first_dyn_alloc_ctxt = num_kernel_contexts;
-	dd->num_vnic_contexts = num_vnic_contexts;
+	dd->num_netdev_contexts = num_netdev_contexts;
 	dd->num_user_contexts = n_usr_ctxts;
 	dd->freectxts = n_usr_ctxts;
 	dd_dev_info(dd,
-		    "rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n",
+		    "rcv contexts: chip %d, used %d (kernel %d, netdev %u, user %u)\n",
 		    rcv_contexts,
 		    (int)dd->num_rcv_contexts,
 		    (int)dd->n_krcv_queues,
-		    dd->num_vnic_contexts,
+		    dd->num_netdev_contexts,
 		    dd->num_user_contexts);
 
 	/*
@@ -14119,21 +14175,12 @@ static void init_early_variables(struct hfi1_devdata *dd)
 
 static void init_kdeth_qp(struct hfi1_devdata *dd)
 {
-	/* user changed the KDETH_QP */
-	if (kdeth_qp != 0 && kdeth_qp >= 0xff) {
-		/* out of range or illegal value */
-		dd_dev_err(dd, "Invalid KDETH queue pair prefix, ignoring");
-		kdeth_qp = 0;
-	}
-	if (kdeth_qp == 0)	/* not set, or failed range check */
-		kdeth_qp = DEFAULT_KDETH_QP;
-
 	write_csr(dd, SEND_BTH_QP,
-		  (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK) <<
+		  (RVT_KDETH_QP_PREFIX & SEND_BTH_QP_KDETH_QP_MASK) <<
 		  SEND_BTH_QP_KDETH_QP_SHIFT);
 
 	write_csr(dd, RCV_BTH_QP,
-		  (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK) <<
+		  (RVT_KDETH_QP_PREFIX & RCV_BTH_QP_KDETH_QP_MASK) <<
 		  RCV_BTH_QP_KDETH_QP_SHIFT);
 }
 
@@ -14249,6 +14296,12 @@ static void complete_rsm_map_table(struct hfi1_devdata *dd,
 	}
 }
 
+/* Is a receive side mapping rule */
+static bool has_rsm_rule(struct hfi1_devdata *dd, u8 rule_index)
+{
+	return read_csr(dd, RCV_RSM_CFG + (8 * rule_index)) != 0;
+}
+
 /*
  * Add a receive side mapping rule.
  */
@@ -14485,77 +14538,138 @@ static void init_fecn_handling(struct hfi1_devdata *dd,
 	rmt->used += total_cnt;
 }
 
-/* Initialize RSM for VNIC */
-void hfi1_init_vnic_rsm(struct hfi1_devdata *dd)
+static inline bool hfi1_is_rmt_full(int start, int spare)
+{
+	return (start + spare) > NUM_MAP_ENTRIES;
+}
+
+static bool hfi1_netdev_update_rmt(struct hfi1_devdata *dd)
 {
 	u8 i, j;
 	u8 ctx_id = 0;
 	u64 reg;
 	u32 regoff;
-	struct rsm_rule_data rrd;
+	int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
+	int ctxt_count = hfi1_netdev_ctxt_count(dd);
 
-	if (hfi1_vnic_is_rsm_full(dd, NUM_VNIC_MAP_ENTRIES)) {
-		dd_dev_err(dd, "Vnic RSM disabled, rmt entries used = %d\n",
-			   dd->vnic.rmt_start);
-		return;
+	/* We already have contexts mapped in RMT */
+	if (has_rsm_rule(dd, RSM_INS_VNIC) || has_rsm_rule(dd, RSM_INS_AIP)) {
+		dd_dev_info(dd, "Contexts are already mapped in RMT\n");
+		return true;
+	}
+
+	if (hfi1_is_rmt_full(rmt_start, NUM_NETDEV_MAP_ENTRIES)) {
+		dd_dev_err(dd, "Not enough RMT entries used = %d\n",
+			   rmt_start);
+		return false;
 	}
 
-	dev_dbg(&(dd)->pcidev->dev, "Vnic rsm start = %d, end %d\n",
-		dd->vnic.rmt_start,
-		dd->vnic.rmt_start + NUM_VNIC_MAP_ENTRIES);
+	dev_dbg(&(dd)->pcidev->dev, "RMT start = %d, end %d\n",
+		rmt_start,
+		rmt_start + NUM_NETDEV_MAP_ENTRIES);
 
 	/* Update RSM mapping table, 32 regs, 256 entries - 1 ctx per byte */
-	regoff = RCV_RSM_MAP_TABLE + (dd->vnic.rmt_start / 8) * 8;
+	regoff = RCV_RSM_MAP_TABLE + (rmt_start / 8) * 8;
 	reg = read_csr(dd, regoff);
-	for (i = 0; i < NUM_VNIC_MAP_ENTRIES; i++) {
-		/* Update map register with vnic context */
-		j = (dd->vnic.rmt_start + i) % 8;
+	for (i = 0; i < NUM_NETDEV_MAP_ENTRIES; i++) {
+		/* Update map register with netdev context */
+		j = (rmt_start + i) % 8;
 		reg &= ~(0xffllu << (j * 8));
-		reg |= (u64)dd->vnic.ctxt[ctx_id++]->ctxt << (j * 8);
-		/* Wrap up vnic ctx index */
-		ctx_id %= dd->vnic.num_ctxt;
+		reg |= (u64)hfi1_netdev_get_ctxt(dd, ctx_id++)->ctxt << (j * 8);
+		/* Wrap up netdev ctx index */
+		ctx_id %= ctxt_count;
 		/* Write back map register */
-		if (j == 7 || ((i + 1) == NUM_VNIC_MAP_ENTRIES)) {
+		if (j == 7 || ((i + 1) == NUM_NETDEV_MAP_ENTRIES)) {
 			dev_dbg(&(dd)->pcidev->dev,
-				"Vnic rsm map reg[%d] =0x%llx\n",
+				"RMT[%d] =0x%llx\n",
 				regoff - RCV_RSM_MAP_TABLE, reg);
 
 			write_csr(dd, regoff, reg);
 			regoff += 8;
-			if (i < (NUM_VNIC_MAP_ENTRIES - 1))
+			if (i < (NUM_NETDEV_MAP_ENTRIES - 1))
 				reg = read_csr(dd, regoff);
 		}
 	}
 
-	/* Add rule for vnic */
-	rrd.offset = dd->vnic.rmt_start;
-	rrd.pkt_type = 4;
-	/* Match 16B packets */
-	rrd.field1_off = L2_TYPE_MATCH_OFFSET;
-	rrd.mask1 = L2_TYPE_MASK;
-	rrd.value1 = L2_16B_VALUE;
-	/* Match ETH L4 packets */
-	rrd.field2_off = L4_TYPE_MATCH_OFFSET;
-	rrd.mask2 = L4_16B_TYPE_MASK;
-	rrd.value2 = L4_16B_ETH_VALUE;
-	/* Calc context from veswid and entropy */
-	rrd.index1_off = L4_16B_HDR_VESWID_OFFSET;
-	rrd.index1_width = ilog2(NUM_VNIC_MAP_ENTRIES);
-	rrd.index2_off = L2_16B_ENTROPY_OFFSET;
-	rrd.index2_width = ilog2(NUM_VNIC_MAP_ENTRIES);
-	add_rsm_rule(dd, RSM_INS_VNIC, &rrd);
-
-	/* Enable RSM if not already enabled */
+	return true;
+}
+
+static void hfi1_enable_rsm_rule(struct hfi1_devdata *dd,
+				 int rule, struct rsm_rule_data *rrd)
+{
+	if (!hfi1_netdev_update_rmt(dd)) {
+		dd_dev_err(dd, "Failed to update RMT for RSM%d rule\n", rule);
+		return;
+	}
+
+	add_rsm_rule(dd, rule, rrd);
 	add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
 }
 
+void hfi1_init_aip_rsm(struct hfi1_devdata *dd)
+{
+	/*
+	 * go through with the initialisation only if this rule actually doesn't
+	 * exist yet
+	 */
+	if (atomic_fetch_inc(&dd->ipoib_rsm_usr_num) == 0) {
+		int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
+		struct rsm_rule_data rrd = {
+			.offset = rmt_start,
+			.pkt_type = IB_PACKET_TYPE,
+			.field1_off = LRH_BTH_MATCH_OFFSET,
+			.mask1 = LRH_BTH_MASK,
+			.value1 = LRH_BTH_VALUE,
+			.field2_off = BTH_DESTQP_MATCH_OFFSET,
+			.mask2 = BTH_DESTQP_MASK,
+			.value2 = BTH_DESTQP_VALUE,
+			.index1_off = DETH_AIP_SQPN_SELECT_OFFSET +
+					ilog2(NUM_NETDEV_MAP_ENTRIES),
+			.index1_width = ilog2(NUM_NETDEV_MAP_ENTRIES),
+			.index2_off = DETH_AIP_SQPN_SELECT_OFFSET,
+			.index2_width = ilog2(NUM_NETDEV_MAP_ENTRIES)
+		};
+
+		hfi1_enable_rsm_rule(dd, RSM_INS_AIP, &rrd);
+	}
+}
+
+/* Initialize RSM for VNIC */
+void hfi1_init_vnic_rsm(struct hfi1_devdata *dd)
+{
+	int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
+	struct rsm_rule_data rrd = {
+		/* Add rule for vnic */
+		.offset = rmt_start,
+		.pkt_type = 4,
+		/* Match 16B packets */
+		.field1_off = L2_TYPE_MATCH_OFFSET,
+		.mask1 = L2_TYPE_MASK,
+		.value1 = L2_16B_VALUE,
+		/* Match ETH L4 packets */
+		.field2_off = L4_TYPE_MATCH_OFFSET,
+		.mask2 = L4_16B_TYPE_MASK,
+		.value2 = L4_16B_ETH_VALUE,
+		/* Calc context from veswid and entropy */
+		.index1_off = L4_16B_HDR_VESWID_OFFSET,
+		.index1_width = ilog2(NUM_NETDEV_MAP_ENTRIES),
+		.index2_off = L2_16B_ENTROPY_OFFSET,
+		.index2_width = ilog2(NUM_NETDEV_MAP_ENTRIES)
+	};
+
+	hfi1_enable_rsm_rule(dd, RSM_INS_VNIC, &rrd);
+}
+
 void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd)
 {
 	clear_rsm_rule(dd, RSM_INS_VNIC);
+}
 
-	/* Disable RSM if used only by vnic */
-	if (dd->vnic.rmt_start == 0)
-		clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
+void hfi1_deinit_aip_rsm(struct hfi1_devdata *dd)
+{
+	/* only actually clear the rule if it's the last user asking to do so */
+	if (atomic_fetch_add_unless(&dd->ipoib_rsm_usr_num, -1, 0) == 1)
+		clear_rsm_rule(dd, RSM_INS_AIP);
 }
 
 static int init_rxe(struct hfi1_devdata *dd)
@@ -14574,8 +14688,8 @@ static int init_rxe(struct hfi1_devdata *dd)
 	init_qos(dd, rmt);
 	init_fecn_handling(dd, rmt);
 	complete_rsm_map_table(dd, rmt);
-	/* record number of used rsm map entries for vnic */
-	dd->vnic.rmt_start = rmt->used;
+	/* record number of used rsm map entries for netdev */
+	hfi1_netdev_set_free_rmt_idx(dd, rmt->used);
 	kfree(rmt);
 
 	/*
@@ -15129,6 +15243,10 @@ int hfi1_init_dd(struct hfi1_devdata *dd)
 		 (dd->revision >> CCE_REVISION_SW_SHIFT)
 		    & CCE_REVISION_SW_MASK);
 
+	/* alloc netdev data */
+	if (hfi1_netdev_alloc(dd))
+		goto bail_cleanup;
+
 	ret = set_up_context_variables(dd);
 	if (ret)
 		goto bail_cleanup;
@@ -15229,6 +15347,7 @@ bail_clear_intr:
 	hfi1_comp_vectors_clean_up(dd);
 	msix_clean_up_interrupts(dd);
 bail_cleanup:
+	hfi1_netdev_free(dd);
 	hfi1_pcie_ddcleanup(dd);
 bail_free:
 	hfi1_free_devdata(dd);
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 725509261016..2c6f2de74d4d 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -1,7 +1,7 @@
 #ifndef _CHIP_H
 #define _CHIP_H
 /*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -1447,6 +1447,7 @@ irqreturn_t general_interrupt(int irq, void *data);
 irqreturn_t sdma_interrupt(int irq, void *data);
 irqreturn_t receive_context_interrupt(int irq, void *data);
 irqreturn_t receive_context_thread(int irq, void *data);
+irqreturn_t receive_context_interrupt_napi(int irq, void *data);
 
 int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set);
 void init_qsfp_int(struct hfi1_devdata *dd);
@@ -1455,6 +1456,8 @@ void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr);
 void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr);
 void reset_interrupts(struct hfi1_devdata *dd);
 u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx);
+void hfi1_init_aip_rsm(struct hfi1_devdata *dd);
+void hfi1_deinit_aip_rsm(struct hfi1_devdata *dd);
 
 /*
  * Interrupt source table.
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index 40a1ff0c8a8e..ff423e546b80 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -72,13 +72,6 @@
  * compilation unit
  */
 
-/*
- * If a packet's QP[23:16] bits match this value, then it is
- * a PSM packet and the hardware will expect a KDETH header
- * following the BTH.
- */
-#define DEFAULT_KDETH_QP 0x80
-
 /* driver/hw feature set bitmask */
 #define HFI1_CAP_USER_SHIFT      24
 #define HFI1_CAP_MASK            ((1UL << HFI1_CAP_USER_SHIFT) - 1)
@@ -149,7 +142,8 @@
 				   HFI1_CAP_NO_INTEGRITY |		\
 				   HFI1_CAP_PKEY_CHECK |		\
 				   HFI1_CAP_TID_RDMA |			\
-				   HFI1_CAP_OPFN) <<			\
+				   HFI1_CAP_OPFN |			\
+				   HFI1_CAP_AIP) <<			\
 				  HFI1_CAP_USER_SHIFT)
 /*
  * Set of capabilities that need to be enabled for kernel context in
@@ -166,6 +160,7 @@
 				 HFI1_CAP_PKEY_CHECK |			\
 				 HFI1_CAP_MULTI_PKT_EGR |		\
 				 HFI1_CAP_EXTENDED_PSN |		\
+				 HFI1_CAP_AIP |				\
 				 ((HFI1_CAP_HDRSUPP |			\
 				   HFI1_CAP_MULTI_PKT_EGR |		\
 				   HFI1_CAP_STATIC_RATE_CTRL |		\
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 049d15befe58..a40701a6e1b6 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015-2018 Intel Corporation.
+ * Copyright(c) 2015-2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -54,6 +54,7 @@
 #include <linux/module.h>
 #include <linux/prefetch.h>
 #include <rdma/ib_verbs.h>
+#include <linux/etherdevice.h>
 
 #include "hfi.h"
 #include "trace.h"
@@ -63,6 +64,9 @@
 #include "vnic.h"
 #include "fault.h"
 
+#include "ipoib.h"
+#include "netdev.h"
+
 #undef pr_fmt
 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
 
@@ -748,6 +752,39 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
 	return ret;
 }
 
+static void process_rcv_packet_napi(struct hfi1_packet *packet)
+{
+	packet->etype = rhf_rcv_type(packet->rhf);
+
+	/* total length */
+	packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */
+	/* retrieve eager buffer details */
+	packet->etail = rhf_egr_index(packet->rhf);
+	packet->ebuf = get_egrbuf(packet->rcd, packet->rhf,
+				  &packet->updegr);
+	/*
+	 * Prefetch the contents of the eager buffer.  It is
+	 * OK to send a negative length to prefetch_range().
+	 * The +2 is the size of the RHF.
+	 */
+	prefetch_range(packet->ebuf,
+		       packet->tlen - ((packet->rcd->rcvhdrqentsize -
+				       (rhf_hdrq_offset(packet->rhf)
+					+ 2)) * 4));
+
+	packet->rcd->rhf_rcv_function_map[packet->etype](packet);
+	packet->numpkt++;
+
+	/* Set up for the next packet */
+	packet->rhqoff += packet->rsize;
+	if (packet->rhqoff >= packet->maxcnt)
+		packet->rhqoff = 0;
+
+	packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
+				      packet->rcd->rhf_offset;
+	packet->rhf = rhf_to_cpu(packet->rhf_addr);
+}
+
 static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
 {
 	int ret;
@@ -827,6 +864,36 @@ static inline void finish_packet(struct hfi1_packet *packet)
 }
 
 /*
+ * handle_receive_interrupt_napi_fp - receive a packet
+ * @rcd: the context
+ * @budget: polling budget
+ *
+ * Called from interrupt handler for receive interrupt.
+ * This is the fast path interrupt handler
+ * when executing napi soft irq environment.
+ */
+int handle_receive_interrupt_napi_fp(struct hfi1_ctxtdata *rcd, int budget)
+{
+	struct hfi1_packet packet;
+
+	init_packet(rcd, &packet);
+	if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf)))
+		goto bail;
+
+	while (packet.numpkt < budget) {
+		process_rcv_packet_napi(&packet);
+		if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
+			break;
+
+		process_rcv_update(0, &packet);
+	}
+	hfi1_set_rcd_head(rcd, packet.rhqoff);
+bail:
+	finish_packet(&packet);
+	return packet.numpkt;
+}
+
+/*
  * Handle receive interrupts when using the no dma rtail option.
  */
 int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
@@ -1074,6 +1141,63 @@ bail:
 }
 
 /*
+ * handle_receive_interrupt_napi_sp - receive a packet
+ * @rcd: the context
+ * @budget: polling budget
+ *
+ * Called from interrupt handler for errors or receive interrupt.
+ * This is the slow path interrupt handler
+ * when executing napi soft irq environment.
+ */
+int handle_receive_interrupt_napi_sp(struct hfi1_ctxtdata *rcd, int budget)
+{
+	struct hfi1_devdata *dd = rcd->dd;
+	int last = RCV_PKT_OK;
+	bool needset = true;
+	struct hfi1_packet packet;
+
+	init_packet(rcd, &packet);
+	if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf)))
+		goto bail;
+
+	while (last != RCV_PKT_DONE && packet.numpkt < budget) {
+		if (hfi1_need_drop(dd)) {
+			/* On to the next packet */
+			packet.rhqoff += packet.rsize;
+			packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
+					  packet.rhqoff +
+					  rcd->rhf_offset;
+			packet.rhf = rhf_to_cpu(packet.rhf_addr);
+
+		} else {
+			if (set_armed_to_active(&packet))
+				goto bail;
+			process_rcv_packet_napi(&packet);
+		}
+
+		if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
+			last = RCV_PKT_DONE;
+
+		if (needset) {
+			needset = false;
+			set_all_fastpath(dd, rcd);
+		}
+
+		process_rcv_update(last, &packet);
+	}
+
+	hfi1_set_rcd_head(rcd, packet.rhqoff);
+
+bail:
+	/*
+	 * Always write head at end, and setup rcv interrupt, even
+	 * if no packets were processed.
+	 */
+	finish_packet(&packet);
+	return packet.numpkt;
+}
+
+/*
  * We may discover in the interrupt that the hardware link state has
  * changed from ARMED to ACTIVE (due to the arrival of a non-SC15 packet),
  * and we need to update the driver's notion of the link state.  We cannot
@@ -1550,6 +1674,82 @@ void handle_eflags(struct hfi1_packet *packet)
 		show_eflags_errs(packet);
 }
 
+static void hfi1_ipoib_ib_rcv(struct hfi1_packet *packet)
+{
+	struct hfi1_ibport *ibp;
+	struct net_device *netdev;
+	struct hfi1_ctxtdata *rcd = packet->rcd;
+	struct napi_struct *napi = rcd->napi;
+	struct sk_buff *skb;
+	struct hfi1_netdev_rxq *rxq = container_of(napi,
+			struct hfi1_netdev_rxq, napi);
+	u32 extra_bytes;
+	u32 tlen, qpnum;
+	bool do_work, do_cnp;
+	struct hfi1_ipoib_dev_priv *priv;
+
+	trace_hfi1_rcvhdr(packet);
+
+	hfi1_setup_ib_header(packet);
+
+	packet->ohdr = &((struct ib_header *)packet->hdr)->u.oth;
+	packet->grh = NULL;
+
+	if (unlikely(rhf_err_flags(packet->rhf))) {
+		handle_eflags(packet);
+		return;
+	}
+
+	qpnum = ib_bth_get_qpn(packet->ohdr);
+	netdev = hfi1_netdev_get_data(rcd->dd, qpnum);
+	if (!netdev)
+		goto drop_no_nd;
+
+	trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
+	trace_ctxt_rsm_hist(rcd->ctxt);
+
+	/* handle congestion notifications */
+	do_work = hfi1_may_ecn(packet);
+	if (unlikely(do_work)) {
+		do_cnp = (packet->opcode != IB_OPCODE_CNP);
+		(void)hfi1_process_ecn_slowpath(hfi1_ipoib_priv(netdev)->qp,
+						 packet, do_cnp);
+	}
+
+	/*
+	 * We have split point after last byte of DETH
+	 * lets strip padding and CRC and ICRC.
+	 * tlen is whole packet len so we need to
+	 * subtract header size as well.
+	 */
+	tlen = packet->tlen;
+	extra_bytes = ib_bth_get_pad(packet->ohdr) + (SIZE_OF_CRC << 2) +
+			packet->hlen;
+	if (unlikely(tlen < extra_bytes))
+		goto drop;
+
+	tlen -= extra_bytes;
+
+	skb = hfi1_ipoib_prepare_skb(rxq, tlen, packet->ebuf);
+	if (unlikely(!skb))
+		goto drop;
+
+	priv = hfi1_ipoib_priv(netdev);
+	hfi1_ipoib_update_rx_netstats(priv, 1, skb->len);
+
+	skb->dev = netdev;
+	skb->pkt_type = PACKET_HOST;
+	netif_receive_skb(skb);
+
+	return;
+
+drop:
+	++netdev->stats.rx_dropped;
+drop_no_nd:
+	ibp = rcd_to_iport(packet->rcd);
+	++ibp->rvp.n_pkt_drops;
+}
+
 /*
  * The following functions are called by the interrupt handler. They are type
  * specific handlers for each packet type.
@@ -1572,28 +1772,10 @@ static void process_receive_ib(struct hfi1_packet *packet)
 	hfi1_ib_rcv(packet);
 }
 
-static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
-{
-	/* Packet received in VNIC context via RSM */
-	if (packet->rcd->is_vnic)
-		return true;
-
-	if ((hfi1_16B_get_l2(packet->ebuf) == OPA_16B_L2_TYPE) &&
-	    (hfi1_16B_get_l4(packet->ebuf) == OPA_16B_L4_ETHR))
-		return true;
-
-	return false;
-}
-
 static void process_receive_bypass(struct hfi1_packet *packet)
 {
 	struct hfi1_devdata *dd = packet->rcd->dd;
 
-	if (hfi1_is_vnic_packet(packet)) {
-		hfi1_vnic_bypass_rcv(packet);
-		return;
-	}
-
 	if (hfi1_setup_bypass_packet(packet))
 		return;
 
@@ -1757,3 +1939,14 @@ const rhf_rcv_function_ptr normal_rhf_rcv_functions[] = {
 	[RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
 	[RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
 };
+
+const rhf_rcv_function_ptr netdev_rhf_rcv_functions[] = {
+	[RHF_RCV_TYPE_EXPECTED] = process_receive_invalid,
+	[RHF_RCV_TYPE_EAGER] = process_receive_invalid,
+	[RHF_RCV_TYPE_IB] = hfi1_ipoib_ib_rcv,
+	[RHF_RCV_TYPE_ERROR] = process_receive_error,
+	[RHF_RCV_TYPE_BYPASS] = hfi1_vnic_bypass_rcv,
+	[RHF_RCV_TYPE_INVALID5] = process_receive_invalid,
+	[RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
+	[RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
+};
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index e7fdd70c6e78..8ca51e43cf53 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2015-2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -1264,7 +1264,7 @@ static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
 	memset(&binfo, 0, sizeof(binfo));
 	binfo.hw_version = dd->revision;
 	binfo.sw_version = HFI1_KERN_SWVERSION;
-	binfo.bthqp = kdeth_qp;
+	binfo.bthqp = RVT_KDETH_QP_PREFIX;
 	binfo.jkey = uctxt->jkey;
 	/*
 	 * If more than 64 contexts are enabled the allocated credit
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index b06c2594105a..b4c6bff60a4e 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1,7 +1,7 @@
 #ifndef _HFI1_KERNEL_H
 #define _HFI1_KERNEL_H
 /*
- * Copyright(c) 2015-2018 Intel Corporation.
+ * Copyright(c) 2015-2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -233,6 +233,8 @@ struct hfi1_ctxtdata {
 	intr_handler fast_handler;
 	/** slow handler */
 	intr_handler slow_handler;
+	/* napi pointer assiociated with netdev */
+	struct napi_struct *napi;
 	/* verbs rx_stats per rcd */
 	struct hfi1_opcode_stats_perctx *opstats;
 	/* clear interrupt mask */
@@ -383,11 +385,11 @@ struct hfi1_packet {
 	u32 rhqoff;
 	u32 dlid;
 	u32 slid;
+	int numpkt;
 	u16 tlen;
 	s16 etail;
 	u16 pkey;
 	u8 hlen;
-	u8 numpkt;
 	u8 rsize;
 	u8 updegr;
 	u8 etype;
@@ -985,7 +987,7 @@ typedef void (*hfi1_make_req)(struct rvt_qp *qp,
 			      struct hfi1_pkt_state *ps,
 			      struct rvt_swqe *wqe);
 extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[];
-
+extern const rhf_rcv_function_ptr netdev_rhf_rcv_functions[];
 
 /* return values for the RHF receive functions */
 #define RHF_RCV_CONTINUE  0	/* keep going */
@@ -1045,23 +1047,10 @@ struct hfi1_asic_data {
 #define NUM_MAP_ENTRIES	 256
 #define NUM_MAP_REGS      32
 
-/*
- * Number of VNIC contexts used. Ensure it is less than or equal to
- * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
- */
-#define HFI1_NUM_VNIC_CTXT   8
-
-/* Number of VNIC RSM entries */
-#define NUM_VNIC_MAP_ENTRIES 8
-
 /* Virtual NIC information */
 struct hfi1_vnic_data {
-	struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT];
 	struct kmem_cache *txreq_cache;
-	struct xarray vesws;
 	u8 num_vports;
-	u8 rmt_start;
-	u8 num_ctxt;
 };
 
 struct hfi1_vnic_vport_info;
@@ -1167,8 +1156,8 @@ struct hfi1_devdata {
 	u64 z_send_schedule;
 
 	u64 __percpu *send_schedule;
-	/* number of reserved contexts for VNIC usage */
-	u16 num_vnic_contexts;
+	/* number of reserved contexts for netdev usage */
+	u16 num_netdev_contexts;
 	/* number of receive contexts in use by the driver */
 	u32 num_rcv_contexts;
 	/* number of pio send contexts in use by the driver */
@@ -1417,12 +1406,12 @@ struct hfi1_devdata {
 	struct hfi1_vnic_data vnic;
 	/* Lock to protect IRQ SRC register access */
 	spinlock_t irq_src_lock;
-};
+	int vnic_num_vports;
+	struct net_device *dummy_netdev;
 
-static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
-{
-	return (dd->vnic.rmt_start + spare) > NUM_MAP_ENTRIES;
-}
+	/* Keeps track of IPoIB RSM rule users */
+	atomic_t ipoib_rsm_usr_num;
+};
 
 /* 8051 firmware version helper */
 #define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c))
@@ -1500,6 +1489,8 @@ struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt);
 int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
 int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
 int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
+int handle_receive_interrupt_napi_fp(struct hfi1_ctxtdata *rcd, int budget);
+int handle_receive_interrupt_napi_sp(struct hfi1_ctxtdata *rcd, int budget);
 void set_all_slowpath(struct hfi1_devdata *dd);
 
 extern const struct pci_device_id hfi1_pci_tbl[];
@@ -2250,7 +2241,6 @@ extern int num_user_contexts;
 extern unsigned long n_krcvqs;
 extern uint krcvqs[];
 extern int krcvqsset;
-extern uint kdeth_qp;
 extern uint loopback;
 extern uint quick_linkup;
 extern uint rcv_intr_timeout;
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 3759d9233a1c..5eed4360695f 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -69,6 +69,7 @@
 #include "affinity.h"
 #include "vnic.h"
 #include "exp_rcv.h"
+#include "netdev.h"
 
 #undef pr_fmt
 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -374,6 +375,7 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
 		rcd->numa_id = numa;
 		rcd->rcv_array_groups = dd->rcv_entries.ngroups;
 		rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
+		rcd->msix_intr = CCE_NUM_MSIX_VECTORS;
 
 		mutex_init(&rcd->exp_mutex);
 		spin_lock_init(&rcd->exp_lock);
@@ -1316,6 +1318,7 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
 		goto bail;
 	}
 
+	atomic_set(&dd->ipoib_rsm_usr_num, 0);
 	return dd;
 
 bail:
@@ -1663,9 +1666,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* do the generic initialization */
 	initfail = hfi1_init(dd, 0);
 
-	/* setup vnic */
-	hfi1_vnic_setup(dd);
-
 	ret = hfi1_register_ib_device(dd);
 
 	/*
@@ -1704,7 +1704,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			hfi1_device_remove(dd);
 		if (!ret)
 			hfi1_unregister_ib_device(dd);
-		hfi1_vnic_cleanup(dd);
 		postinit_cleanup(dd);
 		if (initfail)
 			ret = initfail;
@@ -1749,8 +1748,8 @@ static void remove_one(struct pci_dev *pdev)
 	/* unregister from IB core */
 	hfi1_unregister_ib_device(dd);
 
-	/* cleanup vnic */
-	hfi1_vnic_cleanup(dd);
+	/* free netdev data */
+	hfi1_netdev_free(dd);
 
 	/*
 	 * Disable the IB link, disable interrupts on the device,
diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h
new file mode 100644
index 000000000000..185c9b02c974
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for IPOIB functionality
+ */
+
+#ifndef HFI1_IPOIB_H
+#define HFI1_IPOIB_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/list.h>
+#include <linux/if_infiniband.h>
+
+#include "hfi.h"
+#include "iowait.h"
+#include "netdev.h"
+
+#include <rdma/ib_verbs.h>
+
+#define HFI1_IPOIB_ENTROPY_SHIFT   24
+
+#define HFI1_IPOIB_TXREQ_NAME_LEN   32
+
+#define HFI1_IPOIB_PSEUDO_LEN 20
+#define HFI1_IPOIB_ENCAP_LEN 4
+
+struct hfi1_ipoib_dev_priv;
+
+union hfi1_ipoib_flow {
+	u16 as_int;
+	struct {
+		u8 tx_queue;
+		u8 sc5;
+	} __attribute__((__packed__));
+};
+
+/**
+ * struct hfi1_ipoib_circ_buf - List of items to be processed
+ * @items: ring of items
+ * @head: ring head
+ * @tail: ring tail
+ * @max_items: max items + 1 that the ring can contain
+ * @producer_lock: producer sync lock
+ * @consumer_lock: consumer sync lock
+ */
+struct hfi1_ipoib_circ_buf {
+	void **items;
+	unsigned long head;
+	unsigned long tail;
+	unsigned long max_items;
+	spinlock_t producer_lock; /* head sync lock */
+	spinlock_t consumer_lock; /* tail sync lock */
+};
+
+/**
+ * struct hfi1_ipoib_txq - IPOIB per Tx queue information
+ * @priv: private pointer
+ * @sde: sdma engine
+ * @tx_list: tx request list
+ * @sent_txreqs: count of txreqs posted to sdma
+ * @flow: tracks when list needs to be flushed for a flow change
+ * @q_idx: ipoib Tx queue index
+ * @pkts_sent: indicator packets have been sent from this queue
+ * @wait: iowait structure
+ * @complete_txreqs: count of txreqs completed by sdma
+ * @napi: pointer to tx napi interface
+ * @tx_ring: ring of ipoib txreqs to be reaped by napi callback
+ */
+struct hfi1_ipoib_txq {
+	struct hfi1_ipoib_dev_priv *priv;
+	struct sdma_engine *sde;
+	struct list_head tx_list;
+	u64 sent_txreqs;
+	union hfi1_ipoib_flow flow;
+	u8 q_idx;
+	bool pkts_sent;
+	struct iowait wait;
+
+	atomic64_t ____cacheline_aligned_in_smp complete_txreqs;
+	struct napi_struct *napi;
+	struct hfi1_ipoib_circ_buf tx_ring;
+};
+
+struct hfi1_ipoib_dev_priv {
+	struct hfi1_devdata *dd;
+	struct net_device   *netdev;
+	struct ib_device    *device;
+	struct hfi1_ipoib_txq *txqs;
+	struct kmem_cache *txreq_cache;
+	struct napi_struct *tx_napis;
+	u16 pkey;
+	u16 pkey_index;
+	u32 qkey;
+	u8 port_num;
+
+	const struct net_device_ops *netdev_ops;
+	struct rvt_qp *qp;
+	struct pcpu_sw_netstats __percpu *netstats;
+};
+
+/* hfi1 ipoib rdma netdev's private data structure */
+struct hfi1_ipoib_rdma_netdev {
+	struct rdma_netdev rn;  /* keep this first */
+	/* followed by device private data */
+	struct hfi1_ipoib_dev_priv dev_priv;
+};
+
+static inline struct hfi1_ipoib_dev_priv *
+hfi1_ipoib_priv(const struct net_device *dev)
+{
+	return &((struct hfi1_ipoib_rdma_netdev *)netdev_priv(dev))->dev_priv;
+}
+
+static inline void
+hfi1_ipoib_update_rx_netstats(struct hfi1_ipoib_dev_priv *priv,
+			      u64 packets,
+			      u64 bytes)
+{
+	struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats);
+
+	u64_stats_update_begin(&netstats->syncp);
+	netstats->rx_packets += packets;
+	netstats->rx_bytes += bytes;
+	u64_stats_update_end(&netstats->syncp);
+}
+
+static inline void
+hfi1_ipoib_update_tx_netstats(struct hfi1_ipoib_dev_priv *priv,
+			      u64 packets,
+			      u64 bytes)
+{
+	struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats);
+
+	u64_stats_update_begin(&netstats->syncp);
+	netstats->tx_packets += packets;
+	netstats->tx_bytes += bytes;
+	u64_stats_update_end(&netstats->syncp);
+}
+
+int hfi1_ipoib_send_dma(struct net_device *dev,
+			struct sk_buff *skb,
+			struct ib_ah *address,
+			u32 dqpn);
+
+int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv);
+void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv);
+
+int hfi1_ipoib_rxq_init(struct net_device *dev);
+void hfi1_ipoib_rxq_deinit(struct net_device *dev);
+
+void hfi1_ipoib_napi_tx_enable(struct net_device *dev);
+void hfi1_ipoib_napi_tx_disable(struct net_device *dev);
+
+struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
+				       int size, void *data);
+
+int hfi1_ipoib_rn_get_params(struct ib_device *device,
+			     u8 port_num,
+			     enum rdma_netdev_t type,
+			     struct rdma_netdev_alloc_params *params);
+
+#endif /* _IPOIB_H */
diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c
new file mode 100644
index 000000000000..014351ebbefa
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib_main.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for ipoib functionality
+ */
+
+#include "ipoib.h"
+#include "hfi.h"
+
+static u32 qpn_from_mac(u8 *mac_arr)
+{
+	return (u32)mac_arr[1] << 16 | mac_arr[2] << 8 | mac_arr[3];
+}
+
+static int hfi1_ipoib_dev_init(struct net_device *dev)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	int ret;
+
+	priv->netstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+
+	ret = priv->netdev_ops->ndo_init(dev);
+	if (ret)
+		return ret;
+
+	ret = hfi1_netdev_add_data(priv->dd,
+				   qpn_from_mac(priv->netdev->dev_addr),
+				   dev);
+	if (ret < 0) {
+		priv->netdev_ops->ndo_uninit(dev);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void hfi1_ipoib_dev_uninit(struct net_device *dev)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+	hfi1_netdev_remove_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr));
+
+	priv->netdev_ops->ndo_uninit(dev);
+}
+
+static int hfi1_ipoib_dev_open(struct net_device *dev)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	int ret;
+
+	ret = priv->netdev_ops->ndo_open(dev);
+	if (!ret) {
+		struct hfi1_ibport *ibp = to_iport(priv->device,
+						   priv->port_num);
+		struct rvt_qp *qp;
+		u32 qpn = qpn_from_mac(priv->netdev->dev_addr);
+
+		rcu_read_lock();
+		qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn);
+		if (!qp) {
+			rcu_read_unlock();
+			priv->netdev_ops->ndo_stop(dev);
+			return -EINVAL;
+		}
+		rvt_get_qp(qp);
+		priv->qp = qp;
+		rcu_read_unlock();
+
+		hfi1_netdev_enable_queues(priv->dd);
+		hfi1_ipoib_napi_tx_enable(dev);
+	}
+
+	return ret;
+}
+
+static int hfi1_ipoib_dev_stop(struct net_device *dev)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+	if (!priv->qp)
+		return 0;
+
+	hfi1_ipoib_napi_tx_disable(dev);
+	hfi1_netdev_disable_queues(priv->dd);
+
+	rvt_put_qp(priv->qp);
+	priv->qp = NULL;
+
+	return priv->netdev_ops->ndo_stop(dev);
+}
+
+static void hfi1_ipoib_dev_get_stats64(struct net_device *dev,
+				       struct rtnl_link_stats64 *storage)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	u64 rx_packets = 0ull;
+	u64 rx_bytes = 0ull;
+	u64 tx_packets = 0ull;
+	u64 tx_bytes = 0ull;
+	int i;
+
+	netdev_stats_to_stats64(storage, &dev->stats);
+
+	for_each_possible_cpu(i) {
+		const struct pcpu_sw_netstats *stats;
+		unsigned int start;
+		u64 trx_packets;
+		u64 trx_bytes;
+		u64 ttx_packets;
+		u64 ttx_bytes;
+
+		stats = per_cpu_ptr(priv->netstats, i);
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			trx_packets = stats->rx_packets;
+			trx_bytes = stats->rx_bytes;
+			ttx_packets = stats->tx_packets;
+			ttx_bytes = stats->tx_bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		rx_packets += trx_packets;
+		rx_bytes += trx_bytes;
+		tx_packets += ttx_packets;
+		tx_bytes += ttx_bytes;
+	}
+
+	storage->rx_packets += rx_packets;
+	storage->rx_bytes += rx_bytes;
+	storage->tx_packets += tx_packets;
+	storage->tx_bytes += tx_bytes;
+}
+
+static const struct net_device_ops hfi1_ipoib_netdev_ops = {
+	.ndo_init         = hfi1_ipoib_dev_init,
+	.ndo_uninit       = hfi1_ipoib_dev_uninit,
+	.ndo_open         = hfi1_ipoib_dev_open,
+	.ndo_stop         = hfi1_ipoib_dev_stop,
+	.ndo_get_stats64  = hfi1_ipoib_dev_get_stats64,
+};
+
+static int hfi1_ipoib_send(struct net_device *dev,
+			   struct sk_buff *skb,
+			   struct ib_ah *address,
+			   u32 dqpn)
+{
+	return hfi1_ipoib_send_dma(dev, skb, address, dqpn);
+}
+
+static int hfi1_ipoib_mcast_attach(struct net_device *dev,
+				   struct ib_device *device,
+				   union ib_gid *mgid,
+				   u16 mlid,
+				   int set_qkey,
+				   u32 qkey)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	u32 qpn = (u32)qpn_from_mac(priv->netdev->dev_addr);
+	struct hfi1_ibport *ibp = to_iport(priv->device, priv->port_num);
+	struct rvt_qp *qp;
+	int ret = -EINVAL;
+
+	rcu_read_lock();
+
+	qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn);
+	if (qp) {
+		rvt_get_qp(qp);
+		rcu_read_unlock();
+		if (set_qkey)
+			priv->qkey = qkey;
+
+		/* attach QP to multicast group */
+		ret = ib_attach_mcast(&qp->ibqp, mgid, mlid);
+		rvt_put_qp(qp);
+	} else {
+		rcu_read_unlock();
+	}
+
+	return ret;
+}
+
+static int hfi1_ipoib_mcast_detach(struct net_device *dev,
+				   struct ib_device *device,
+				   union ib_gid *mgid,
+				   u16 mlid)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	u32 qpn = (u32)qpn_from_mac(priv->netdev->dev_addr);
+	struct hfi1_ibport *ibp = to_iport(priv->device, priv->port_num);
+	struct rvt_qp *qp;
+	int ret = -EINVAL;
+
+	rcu_read_lock();
+
+	qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn);
+	if (qp) {
+		rvt_get_qp(qp);
+		rcu_read_unlock();
+		ret = ib_detach_mcast(&qp->ibqp, mgid, mlid);
+		rvt_put_qp(qp);
+	} else {
+		rcu_read_unlock();
+	}
+	return ret;
+}
+
+static void hfi1_ipoib_netdev_dtor(struct net_device *dev)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+	hfi1_ipoib_txreq_deinit(priv);
+	hfi1_ipoib_rxq_deinit(priv->netdev);
+
+	free_percpu(priv->netstats);
+}
+
+static void hfi1_ipoib_free_rdma_netdev(struct net_device *dev)
+{
+	hfi1_ipoib_netdev_dtor(dev);
+	free_netdev(dev);
+}
+
+static void hfi1_ipoib_set_id(struct net_device *dev, int id)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+	priv->pkey_index = (u16)id;
+	ib_query_pkey(priv->device,
+		      priv->port_num,
+		      priv->pkey_index,
+		      &priv->pkey);
+}
+
+static int hfi1_ipoib_setup_rn(struct ib_device *device,
+			       u8 port_num,
+			       struct net_device *netdev,
+			       void *param)
+{
+	struct hfi1_devdata *dd = dd_from_ibdev(device);
+	struct rdma_netdev *rn = netdev_priv(netdev);
+	struct hfi1_ipoib_dev_priv *priv;
+	int rc;
+
+	rn->send = hfi1_ipoib_send;
+	rn->attach_mcast = hfi1_ipoib_mcast_attach;
+	rn->detach_mcast = hfi1_ipoib_mcast_detach;
+	rn->set_id = hfi1_ipoib_set_id;
+	rn->hca = device;
+	rn->port_num = port_num;
+	rn->mtu = netdev->mtu;
+
+	priv = hfi1_ipoib_priv(netdev);
+	priv->dd = dd;
+	priv->netdev = netdev;
+	priv->device = device;
+	priv->port_num = port_num;
+	priv->netdev_ops = netdev->netdev_ops;
+
+	netdev->netdev_ops = &hfi1_ipoib_netdev_ops;
+
+	ib_query_pkey(device, port_num, priv->pkey_index, &priv->pkey);
+
+	rc = hfi1_ipoib_txreq_init(priv);
+	if (rc) {
+		dd_dev_err(dd, "IPoIB netdev TX init - failed(%d)\n", rc);
+		hfi1_ipoib_free_rdma_netdev(netdev);
+		return rc;
+	}
+
+	rc = hfi1_ipoib_rxq_init(netdev);
+	if (rc) {
+		dd_dev_err(dd, "IPoIB netdev RX init - failed(%d)\n", rc);
+		hfi1_ipoib_free_rdma_netdev(netdev);
+		return rc;
+	}
+
+	netdev->priv_destructor = hfi1_ipoib_netdev_dtor;
+	netdev->needs_free_netdev = true;
+
+	return 0;
+}
+
+int hfi1_ipoib_rn_get_params(struct ib_device *device,
+			     u8 port_num,
+			     enum rdma_netdev_t type,
+			     struct rdma_netdev_alloc_params *params)
+{
+	struct hfi1_devdata *dd = dd_from_ibdev(device);
+
+	if (type != RDMA_NETDEV_IPOIB)
+		return -EOPNOTSUPP;
+
+	if (!HFI1_CAP_IS_KSET(AIP) || !dd->num_netdev_contexts)
+		return -EOPNOTSUPP;
+
+	if (!port_num || port_num > dd->num_pports)
+		return -EINVAL;
+
+	params->sizeof_priv = sizeof(struct hfi1_ipoib_rdma_netdev);
+	params->txqs = dd->num_sdma;
+	params->rxqs = dd->num_netdev_contexts;
+	params->param = NULL;
+	params->initialize_rdma_netdev = hfi1_ipoib_setup_rn;
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/hfi1/ipoib_rx.c b/drivers/infiniband/hw/hfi1/ipoib_rx.c
new file mode 100644
index 000000000000..3afa7545242c
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib_rx.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+#include "netdev.h"
+#include "ipoib.h"
+
+#define HFI1_IPOIB_SKB_PAD ((NET_SKB_PAD) + (NET_IP_ALIGN))
+
+static void copy_ipoib_buf(struct sk_buff *skb, void *data, int size)
+{
+	void *dst_data;
+
+	skb_checksum_none_assert(skb);
+	skb->protocol = *((__be16 *)data);
+
+	dst_data = skb_put(skb, size);
+	memcpy(dst_data, data, size);
+	skb->mac_header = HFI1_IPOIB_PSEUDO_LEN;
+	skb_pull(skb, HFI1_IPOIB_ENCAP_LEN);
+}
+
+static struct sk_buff *prepare_frag_skb(struct napi_struct *napi, int size)
+{
+	struct sk_buff *skb;
+	int skb_size = SKB_DATA_ALIGN(size + HFI1_IPOIB_SKB_PAD);
+	void *frag;
+
+	skb_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+	skb_size = SKB_DATA_ALIGN(skb_size);
+	frag = napi_alloc_frag(skb_size);
+
+	if (unlikely(!frag))
+		return napi_alloc_skb(napi, size);
+
+	skb = build_skb(frag, skb_size);
+
+	if (unlikely(!skb)) {
+		skb_free_frag(frag);
+		return NULL;
+	}
+
+	skb_reserve(skb, HFI1_IPOIB_SKB_PAD);
+	return skb;
+}
+
+struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
+				       int size, void *data)
+{
+	struct napi_struct *napi = &rxq->napi;
+	int skb_size = size + HFI1_IPOIB_ENCAP_LEN;
+	struct sk_buff *skb;
+
+	/*
+	 * For smaller(4k + skb overhead) allocations we will go using
+	 * napi cache. Otherwise we will try to use napi frag cache.
+	 */
+	if (size <= SKB_WITH_OVERHEAD(PAGE_SIZE))
+		skb = napi_alloc_skb(napi, skb_size);
+	else
+		skb = prepare_frag_skb(napi, skb_size);
+
+	if (unlikely(!skb))
+		return NULL;
+
+	copy_ipoib_buf(skb, data, size);
+
+	return skb;
+}
+
+int hfi1_ipoib_rxq_init(struct net_device *netdev)
+{
+	struct hfi1_ipoib_dev_priv *ipoib_priv = hfi1_ipoib_priv(netdev);
+	struct hfi1_devdata *dd = ipoib_priv->dd;
+	int ret;
+
+	ret = hfi1_netdev_rx_init(dd);
+	if (ret)
+		return ret;
+
+	hfi1_init_aip_rsm(dd);
+
+	return ret;
+}
+
+void hfi1_ipoib_rxq_deinit(struct net_device *netdev)
+{
+	struct hfi1_ipoib_dev_priv *ipoib_priv = hfi1_ipoib_priv(netdev);
+	struct hfi1_devdata *dd = ipoib_priv->dd;
+
+	hfi1_deinit_aip_rsm(dd);
+	hfi1_netdev_rx_destroy(dd);
+}
diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c
new file mode 100644
index 000000000000..883cb9d48022
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c
@@ -0,0 +1,828 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for IPOIB SDMA functionality
+ */
+
+#include <linux/log2.h>
+#include <linux/circ_buf.h>
+
+#include "sdma.h"
+#include "verbs.h"
+#include "trace_ibhdrs.h"
+#include "ipoib.h"
+
+/* Add a convenience helper */
+#define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1))
+#define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size)
+#define CIRC_PREV(val, size) CIRC_ADD(val, -1, size)
+
+/**
+ * struct ipoib_txreq - IPOIB transmit descriptor
+ * @txreq: sdma transmit request
+ * @sdma_hdr: 9b ib headers
+ * @sdma_status: status returned by sdma engine
+ * @priv: ipoib netdev private data
+ * @txq: txq on which skb was output
+ * @skb: skb to send
+ */
+struct ipoib_txreq {
+	struct sdma_txreq           txreq;
+	struct hfi1_sdma_header     sdma_hdr;
+	int                         sdma_status;
+	struct hfi1_ipoib_dev_priv *priv;
+	struct hfi1_ipoib_txq      *txq;
+	struct sk_buff             *skb;
+};
+
+struct ipoib_txparms {
+	struct hfi1_devdata        *dd;
+	struct rdma_ah_attr        *ah_attr;
+	struct hfi1_ibport         *ibp;
+	struct hfi1_ipoib_txq      *txq;
+	union hfi1_ipoib_flow       flow;
+	u32                         dqpn;
+	u8                          hdr_dwords;
+	u8                          entropy;
+};
+
+static u64 hfi1_ipoib_txreqs(const u64 sent, const u64 completed)
+{
+	return sent - completed;
+}
+
+static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq)
+{
+	if (unlikely(hfi1_ipoib_txreqs(++txq->sent_txreqs,
+				       atomic64_read(&txq->complete_txreqs)) >=
+	    min_t(unsigned int, txq->priv->netdev->tx_queue_len,
+		  txq->tx_ring.max_items - 1)))
+		netif_stop_subqueue(txq->priv->netdev, txq->q_idx);
+}
+
+static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq)
+{
+	struct net_device *dev = txq->priv->netdev;
+
+	/* If the queue is already running just return */
+	if (likely(!__netif_subqueue_stopped(dev, txq->q_idx)))
+		return;
+
+	/* If shutting down just return as queue state is irrelevant */
+	if (unlikely(dev->reg_state != NETREG_REGISTERED))
+		return;
+
+	/*
+	 * When the queue has been drained to less than half full it will be
+	 * restarted.
+	 * The size of the txreq ring is fixed at initialization.
+	 * The tx queue len can be adjusted upward while the interface is
+	 * running.
+	 * The tx queue len can be large enough to overflow the txreq_ring.
+	 * Use the minimum of the current tx_queue_len or the rings max txreqs
+	 * to protect against ring overflow.
+	 */
+	if (hfi1_ipoib_txreqs(txq->sent_txreqs,
+			      atomic64_read(&txq->complete_txreqs))
+	    < min_t(unsigned int, dev->tx_queue_len,
+		    txq->tx_ring.max_items) >> 1)
+		netif_wake_subqueue(dev, txq->q_idx);
+}
+
+static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget)
+{
+	struct hfi1_ipoib_dev_priv *priv = tx->priv;
+
+	if (likely(!tx->sdma_status)) {
+		hfi1_ipoib_update_tx_netstats(priv, 1, tx->skb->len);
+	} else {
+		++priv->netdev->stats.tx_errors;
+		dd_dev_warn(priv->dd,
+			    "%s: Status = 0x%x pbc 0x%llx txq = %d sde = %d\n",
+			    __func__, tx->sdma_status,
+			    le64_to_cpu(tx->sdma_hdr.pbc), tx->txq->q_idx,
+			    tx->txq->sde->this_idx);
+	}
+
+	napi_consume_skb(tx->skb, budget);
+	sdma_txclean(priv->dd, &tx->txreq);
+	kmem_cache_free(priv->txreq_cache, tx);
+}
+
+static int hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq, int budget)
+{
+	struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
+	unsigned long head;
+	unsigned long tail;
+	unsigned int max_tx;
+	int work_done;
+	int tx_count;
+
+	spin_lock_bh(&tx_ring->consumer_lock);
+
+	/* Read index before reading contents at that index. */
+	head = smp_load_acquire(&tx_ring->head);
+	tail = tx_ring->tail;
+	max_tx = tx_ring->max_items;
+
+	work_done = min_t(int, CIRC_CNT(head, tail, max_tx), budget);
+
+	for (tx_count = work_done; tx_count; tx_count--) {
+		hfi1_ipoib_free_tx(tx_ring->items[tail], budget);
+		tail = CIRC_NEXT(tail, max_tx);
+	}
+
+	atomic64_add(work_done, &txq->complete_txreqs);
+
+	/* Finished freeing tx items so store the tail value. */
+	smp_store_release(&tx_ring->tail, tail);
+
+	spin_unlock_bh(&tx_ring->consumer_lock);
+
+	hfi1_ipoib_check_queue_stopped(txq);
+
+	return work_done;
+}
+
+static int hfi1_ipoib_process_tx_ring(struct napi_struct *napi, int budget)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(napi->dev);
+	struct hfi1_ipoib_txq *txq = &priv->txqs[napi - priv->tx_napis];
+
+	int work_done = hfi1_ipoib_drain_tx_ring(txq, budget);
+
+	if (work_done < budget)
+		napi_complete_done(napi, work_done);
+
+	return work_done;
+}
+
+static void hfi1_ipoib_add_tx(struct ipoib_txreq *tx)
+{
+	struct hfi1_ipoib_circ_buf *tx_ring = &tx->txq->tx_ring;
+	unsigned long head;
+	unsigned long tail;
+	size_t max_tx;
+
+	spin_lock(&tx_ring->producer_lock);
+
+	head = tx_ring->head;
+	tail = READ_ONCE(tx_ring->tail);
+	max_tx = tx_ring->max_items;
+
+	if (likely(CIRC_SPACE(head, tail, max_tx))) {
+		tx_ring->items[head] = tx;
+
+		/* Finish storing txreq before incrementing head. */
+		smp_store_release(&tx_ring->head, CIRC_ADD(head, 1, max_tx));
+		napi_schedule(tx->txq->napi);
+	} else {
+		struct hfi1_ipoib_txq *txq = tx->txq;
+		struct hfi1_ipoib_dev_priv *priv = tx->priv;
+
+		/* Ring was full */
+		hfi1_ipoib_free_tx(tx, 0);
+		atomic64_inc(&txq->complete_txreqs);
+		dd_dev_dbg(priv->dd, "txq %d full.\n", txq->q_idx);
+	}
+
+	spin_unlock(&tx_ring->producer_lock);
+}
+
+static void hfi1_ipoib_sdma_complete(struct sdma_txreq *txreq, int status)
+{
+	struct ipoib_txreq *tx = container_of(txreq, struct ipoib_txreq, txreq);
+
+	tx->sdma_status = status;
+
+	hfi1_ipoib_add_tx(tx);
+}
+
+static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx,
+					struct ipoib_txparms *txp)
+{
+	struct hfi1_devdata *dd = txp->dd;
+	struct sdma_txreq *txreq = &tx->txreq;
+	struct sk_buff *skb = tx->skb;
+	int ret = 0;
+	int i;
+
+	if (skb_headlen(skb)) {
+		ret = sdma_txadd_kvaddr(dd, txreq, skb->data, skb_headlen(skb));
+		if (unlikely(ret))
+			return ret;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+		ret = sdma_txadd_page(dd,
+				      txreq,
+				      skb_frag_page(frag),
+				      frag->bv_offset,
+				      skb_frag_size(frag));
+		if (unlikely(ret))
+			break;
+	}
+
+	return ret;
+}
+
+static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx,
+				    struct ipoib_txparms *txp)
+{
+	struct hfi1_devdata *dd = txp->dd;
+	struct sdma_txreq *txreq = &tx->txreq;
+	struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr;
+	u16 pkt_bytes =
+		sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2) + tx->skb->len;
+	int ret;
+
+	ret = sdma_txinit(txreq, 0, pkt_bytes, hfi1_ipoib_sdma_complete);
+	if (unlikely(ret))
+		return ret;
+
+	/* add pbc + headers */
+	ret = sdma_txadd_kvaddr(dd,
+				txreq,
+				sdma_hdr,
+				sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2));
+	if (unlikely(ret))
+		return ret;
+
+	/* add the ulp payload */
+	return hfi1_ipoib_build_ulp_payload(tx, txp);
+}
+
+static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx,
+					   struct ipoib_txparms *txp)
+{
+	struct hfi1_ipoib_dev_priv *priv = tx->priv;
+	struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr;
+	struct sk_buff *skb = tx->skb;
+	struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp);
+	struct rdma_ah_attr *ah_attr = txp->ah_attr;
+	struct ib_other_headers *ohdr;
+	struct ib_grh *grh;
+	u16 dwords;
+	u16 slid;
+	u16 dlid;
+	u16 lrh0;
+	u32 bth0;
+	u32 sqpn = (u32)(priv->netdev->dev_addr[1] << 16 |
+			 priv->netdev->dev_addr[2] << 8 |
+			 priv->netdev->dev_addr[3]);
+	u16 payload_dwords;
+	u8 pad_cnt;
+
+	pad_cnt = -skb->len & 3;
+
+	/* Includes ICRC */
+	payload_dwords = ((skb->len + pad_cnt) >> 2) + SIZE_OF_CRC;
+
+	/* header size in dwords LRH+BTH+DETH = (8+12+8)/4. */
+	txp->hdr_dwords = 7;
+
+	if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
+		grh = &sdma_hdr->hdr.ibh.u.l.grh;
+		txp->hdr_dwords +=
+			hfi1_make_grh(txp->ibp,
+				      grh,
+				      rdma_ah_read_grh(ah_attr),
+				      txp->hdr_dwords - LRH_9B_DWORDS,
+				      payload_dwords);
+		lrh0 = HFI1_LRH_GRH;
+		ohdr = &sdma_hdr->hdr.ibh.u.l.oth;
+	} else {
+		lrh0 = HFI1_LRH_BTH;
+		ohdr = &sdma_hdr->hdr.ibh.u.oth;
+	}
+
+	lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4;
+	lrh0 |= (txp->flow.sc5 & 0xf) << 12;
+
+	dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 9B);
+	if (dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
+		slid = be16_to_cpu(IB_LID_PERMISSIVE);
+	} else {
+		u16 lid = (u16)ppd->lid;
+
+		if (lid) {
+			lid |= rdma_ah_get_path_bits(ah_attr) &
+				((1 << ppd->lmc) - 1);
+			slid = lid;
+		} else {
+			slid = be16_to_cpu(IB_LID_PERMISSIVE);
+		}
+	}
+
+	/* Includes ICRC */
+	dwords = txp->hdr_dwords + payload_dwords;
+
+	/* Build the lrh */
+	sdma_hdr->hdr.hdr_type = HFI1_PKT_TYPE_9B;
+	hfi1_make_ib_hdr(&sdma_hdr->hdr.ibh, lrh0, dwords, dlid, slid);
+
+	/* Build the bth */
+	bth0 = (IB_OPCODE_UD_SEND_ONLY << 24) | (pad_cnt << 20) | priv->pkey;
+
+	ohdr->bth[0] = cpu_to_be32(bth0);
+	ohdr->bth[1] = cpu_to_be32(txp->dqpn);
+	ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->sent_txreqs));
+
+	/* Build the deth */
+	ohdr->u.ud.deth[0] = cpu_to_be32(priv->qkey);
+	ohdr->u.ud.deth[1] = cpu_to_be32((txp->entropy <<
+					  HFI1_IPOIB_ENTROPY_SHIFT) | sqpn);
+
+	/* Construct the pbc. */
+	sdma_hdr->pbc =
+		cpu_to_le64(create_pbc(ppd,
+				       ib_is_sc5(txp->flow.sc5) <<
+							      PBC_DC_INFO_SHIFT,
+				       0,
+				       sc_to_vlt(priv->dd, txp->flow.sc5),
+				       dwords - SIZE_OF_CRC +
+						(sizeof(sdma_hdr->pbc) >> 2)));
+}
+
+static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev,
+						      struct sk_buff *skb,
+						      struct ipoib_txparms *txp)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	struct ipoib_txreq *tx;
+	int ret;
+
+	tx = kmem_cache_alloc_node(priv->txreq_cache,
+				   GFP_ATOMIC,
+				   priv->dd->node);
+	if (unlikely(!tx))
+		return ERR_PTR(-ENOMEM);
+
+	/* so that we can test if the sdma decriptors are there */
+	tx->txreq.num_desc = 0;
+	tx->priv = priv;
+	tx->txq = txp->txq;
+	tx->skb = skb;
+
+	hfi1_ipoib_build_ib_tx_headers(tx, txp);
+
+	ret = hfi1_ipoib_build_tx_desc(tx, txp);
+	if (likely(!ret)) {
+		if (txp->txq->flow.as_int != txp->flow.as_int) {
+			txp->txq->flow.tx_queue = txp->flow.tx_queue;
+			txp->txq->flow.sc5 = txp->flow.sc5;
+			txp->txq->sde =
+				sdma_select_engine_sc(priv->dd,
+						      txp->flow.tx_queue,
+						      txp->flow.sc5);
+		}
+
+		return tx;
+	}
+
+	sdma_txclean(priv->dd, &tx->txreq);
+	kmem_cache_free(priv->txreq_cache, tx);
+
+	return ERR_PTR(ret);
+}
+
+static int hfi1_ipoib_submit_tx_list(struct net_device *dev,
+				     struct hfi1_ipoib_txq *txq)
+{
+	int ret;
+	u16 count_out;
+
+	ret = sdma_send_txlist(txq->sde,
+			       iowait_get_ib_work(&txq->wait),
+			       &txq->tx_list,
+			       &count_out);
+	if (likely(!ret) || ret == -EBUSY || ret == -ECOMM)
+		return ret;
+
+	dd_dev_warn(txq->priv->dd, "cannot send skb tx list, err %d.\n", ret);
+
+	return ret;
+}
+
+static int hfi1_ipoib_flush_tx_list(struct net_device *dev,
+				    struct hfi1_ipoib_txq *txq)
+{
+	int ret = 0;
+
+	if (!list_empty(&txq->tx_list)) {
+		/* Flush the current list */
+		ret = hfi1_ipoib_submit_tx_list(dev, txq);
+
+		if (unlikely(ret))
+			if (ret != -EBUSY)
+				++dev->stats.tx_carrier_errors;
+	}
+
+	return ret;
+}
+
+static int hfi1_ipoib_submit_tx(struct hfi1_ipoib_txq *txq,
+				struct ipoib_txreq *tx)
+{
+	int ret;
+
+	ret = sdma_send_txreq(txq->sde,
+			      iowait_get_ib_work(&txq->wait),
+			      &tx->txreq,
+			      txq->pkts_sent);
+	if (likely(!ret)) {
+		txq->pkts_sent = true;
+		iowait_starve_clear(txq->pkts_sent, &txq->wait);
+	}
+
+	return ret;
+}
+
+static int hfi1_ipoib_send_dma_single(struct net_device *dev,
+				      struct sk_buff *skb,
+				      struct ipoib_txparms *txp)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	struct hfi1_ipoib_txq *txq = txp->txq;
+	struct ipoib_txreq *tx;
+	int ret;
+
+	tx = hfi1_ipoib_send_dma_common(dev, skb, txp);
+	if (IS_ERR(tx)) {
+		int ret = PTR_ERR(tx);
+
+		dev_kfree_skb_any(skb);
+
+		if (ret == -ENOMEM)
+			++dev->stats.tx_errors;
+		else
+			++dev->stats.tx_carrier_errors;
+
+		return NETDEV_TX_OK;
+	}
+
+	ret = hfi1_ipoib_submit_tx(txq, tx);
+	if (likely(!ret)) {
+		trace_sdma_output_ibhdr(tx->priv->dd,
+					&tx->sdma_hdr.hdr,
+					ib_is_sc5(txp->flow.sc5));
+		hfi1_ipoib_check_queue_depth(txq);
+		return NETDEV_TX_OK;
+	}
+
+	txq->pkts_sent = false;
+
+	if (ret == -EBUSY) {
+		list_add_tail(&tx->txreq.list, &txq->tx_list);
+
+		trace_sdma_output_ibhdr(tx->priv->dd,
+					&tx->sdma_hdr.hdr,
+					ib_is_sc5(txp->flow.sc5));
+		hfi1_ipoib_check_queue_depth(txq);
+		return NETDEV_TX_OK;
+	}
+
+	if (ret == -ECOMM) {
+		hfi1_ipoib_check_queue_depth(txq);
+		return NETDEV_TX_OK;
+	}
+
+	sdma_txclean(priv->dd, &tx->txreq);
+	dev_kfree_skb_any(skb);
+	kmem_cache_free(priv->txreq_cache, tx);
+	++dev->stats.tx_carrier_errors;
+
+	return NETDEV_TX_OK;
+}
+
+static int hfi1_ipoib_send_dma_list(struct net_device *dev,
+				    struct sk_buff *skb,
+				    struct ipoib_txparms *txp)
+{
+	struct hfi1_ipoib_txq *txq = txp->txq;
+	struct ipoib_txreq *tx;
+
+	/* Has the flow change ? */
+	if (txq->flow.as_int != txp->flow.as_int)
+		(void)hfi1_ipoib_flush_tx_list(dev, txq);
+
+	tx = hfi1_ipoib_send_dma_common(dev, skb, txp);
+	if (IS_ERR(tx)) {
+		int ret = PTR_ERR(tx);
+
+		dev_kfree_skb_any(skb);
+
+		if (ret == -ENOMEM)
+			++dev->stats.tx_errors;
+		else
+			++dev->stats.tx_carrier_errors;
+
+		return NETDEV_TX_OK;
+	}
+
+	list_add_tail(&tx->txreq.list, &txq->tx_list);
+
+	hfi1_ipoib_check_queue_depth(txq);
+
+	trace_sdma_output_ibhdr(tx->priv->dd,
+				&tx->sdma_hdr.hdr,
+				ib_is_sc5(txp->flow.sc5));
+
+	if (!netdev_xmit_more())
+		(void)hfi1_ipoib_flush_tx_list(dev, txq);
+
+	return NETDEV_TX_OK;
+}
+
+static u8 hfi1_ipoib_calc_entropy(struct sk_buff *skb)
+{
+	if (skb_transport_header_was_set(skb)) {
+		u8 *hdr = (u8 *)skb_transport_header(skb);
+
+		return (hdr[0] ^ hdr[1] ^ hdr[2] ^ hdr[3]);
+	}
+
+	return (u8)skb_get_queue_mapping(skb);
+}
+
+int hfi1_ipoib_send_dma(struct net_device *dev,
+			struct sk_buff *skb,
+			struct ib_ah *address,
+			u32 dqpn)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	struct ipoib_txparms txp;
+	struct rdma_netdev *rn = netdev_priv(dev);
+
+	if (unlikely(skb->len > rn->mtu + HFI1_IPOIB_ENCAP_LEN)) {
+		dd_dev_warn(priv->dd, "packet len %d (> %d) too long to send, dropping\n",
+			    skb->len,
+			    rn->mtu + HFI1_IPOIB_ENCAP_LEN);
+		++dev->stats.tx_dropped;
+		++dev->stats.tx_errors;
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	txp.dd = priv->dd;
+	txp.ah_attr = &ibah_to_rvtah(address)->attr;
+	txp.ibp = to_iport(priv->device, priv->port_num);
+	txp.txq = &priv->txqs[skb_get_queue_mapping(skb)];
+	txp.dqpn = dqpn;
+	txp.flow.sc5 = txp.ibp->sl_to_sc[rdma_ah_get_sl(txp.ah_attr)];
+	txp.flow.tx_queue = (u8)skb_get_queue_mapping(skb);
+	txp.entropy = hfi1_ipoib_calc_entropy(skb);
+
+	if (netdev_xmit_more() || !list_empty(&txp.txq->tx_list))
+		return hfi1_ipoib_send_dma_list(dev, skb, &txp);
+
+	return hfi1_ipoib_send_dma_single(dev, skb,  &txp);
+}
+
+/*
+ * hfi1_ipoib_sdma_sleep - ipoib sdma sleep function
+ *
+ * This function gets called from sdma_send_txreq() when there are not enough
+ * sdma descriptors available to send the packet. It adds Tx queue's wait
+ * structure to sdma engine's dmawait list to be woken up when descriptors
+ * become available.
+ */
+static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde,
+				 struct iowait_work *wait,
+				 struct sdma_txreq *txreq,
+				 uint seq,
+				 bool pkts_sent)
+{
+	struct hfi1_ipoib_txq *txq =
+		container_of(wait->iow, struct hfi1_ipoib_txq, wait);
+
+	write_seqlock(&sde->waitlock);
+
+	if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED)) {
+		if (sdma_progress(sde, seq, txreq)) {
+			write_sequnlock(&sde->waitlock);
+			return -EAGAIN;
+		}
+
+		netif_stop_subqueue(txq->priv->netdev, txq->q_idx);
+
+		if (list_empty(&txq->wait.list))
+			iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
+
+		write_sequnlock(&sde->waitlock);
+		return -EBUSY;
+	}
+
+	write_sequnlock(&sde->waitlock);
+	return -EINVAL;
+}
+
+/*
+ * hfi1_ipoib_sdma_wakeup - ipoib sdma wakeup function
+ *
+ * This function gets called when SDMA descriptors becomes available and Tx
+ * queue's wait structure was previously added to sdma engine's dmawait list.
+ */
+static void hfi1_ipoib_sdma_wakeup(struct iowait *wait, int reason)
+{
+	struct hfi1_ipoib_txq *txq =
+		container_of(wait, struct hfi1_ipoib_txq, wait);
+
+	if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED))
+		iowait_schedule(wait, system_highpri_wq, WORK_CPU_UNBOUND);
+}
+
+static void hfi1_ipoib_flush_txq(struct work_struct *work)
+{
+	struct iowait_work *ioww =
+		container_of(work, struct iowait_work, iowork);
+	struct iowait *wait = iowait_ioww_to_iow(ioww);
+	struct hfi1_ipoib_txq *txq =
+		container_of(wait, struct hfi1_ipoib_txq, wait);
+	struct net_device *dev = txq->priv->netdev;
+
+	if (likely(dev->reg_state == NETREG_REGISTERED) &&
+	    likely(__netif_subqueue_stopped(dev, txq->q_idx)) &&
+	    likely(!hfi1_ipoib_flush_tx_list(dev, txq)))
+		netif_wake_subqueue(dev, txq->q_idx);
+}
+
+int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
+{
+	struct net_device *dev = priv->netdev;
+	char buf[HFI1_IPOIB_TXREQ_NAME_LEN];
+	unsigned long tx_ring_size;
+	int i;
+
+	/*
+	 * Ring holds 1 less than tx_ring_size
+	 * Round up to next power of 2 in order to hold at least tx_queue_len
+	 */
+	tx_ring_size = roundup_pow_of_two((unsigned long)dev->tx_queue_len + 1);
+
+	snprintf(buf, sizeof(buf), "hfi1_%u_ipoib_txreq_cache", priv->dd->unit);
+	priv->txreq_cache = kmem_cache_create(buf,
+					      sizeof(struct ipoib_txreq),
+					      0,
+					      0,
+					      NULL);
+	if (!priv->txreq_cache)
+		return -ENOMEM;
+
+	priv->tx_napis = kcalloc_node(dev->num_tx_queues,
+				      sizeof(struct napi_struct),
+				      GFP_ATOMIC,
+				      priv->dd->node);
+	if (!priv->tx_napis)
+		goto free_txreq_cache;
+
+	priv->txqs = kcalloc_node(dev->num_tx_queues,
+				  sizeof(struct hfi1_ipoib_txq),
+				  GFP_ATOMIC,
+				  priv->dd->node);
+	if (!priv->txqs)
+		goto free_tx_napis;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+		iowait_init(&txq->wait,
+			    0,
+			    hfi1_ipoib_flush_txq,
+			    NULL,
+			    hfi1_ipoib_sdma_sleep,
+			    hfi1_ipoib_sdma_wakeup,
+			    NULL,
+			    NULL);
+		txq->priv = priv;
+		txq->sde = NULL;
+		INIT_LIST_HEAD(&txq->tx_list);
+		atomic64_set(&txq->complete_txreqs, 0);
+		txq->q_idx = i;
+		txq->flow.tx_queue = 0xff;
+		txq->flow.sc5 = 0xff;
+		txq->pkts_sent = false;
+
+		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
+					     priv->dd->node);
+
+		txq->tx_ring.items =
+			vzalloc_node(array_size(tx_ring_size,
+						sizeof(struct ipoib_txreq)),
+				     priv->dd->node);
+		if (!txq->tx_ring.items)
+			goto free_txqs;
+
+		spin_lock_init(&txq->tx_ring.producer_lock);
+		spin_lock_init(&txq->tx_ring.consumer_lock);
+		txq->tx_ring.max_items = tx_ring_size;
+
+		txq->napi = &priv->tx_napis[i];
+		netif_tx_napi_add(dev, txq->napi,
+				  hfi1_ipoib_process_tx_ring,
+				  NAPI_POLL_WEIGHT);
+	}
+
+	return 0;
+
+free_txqs:
+	for (i--; i >= 0; i--) {
+		struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+		netif_napi_del(txq->napi);
+		vfree(txq->tx_ring.items);
+	}
+
+	kfree(priv->txqs);
+	priv->txqs = NULL;
+
+free_tx_napis:
+	kfree(priv->tx_napis);
+	priv->tx_napis = NULL;
+
+free_txreq_cache:
+	kmem_cache_destroy(priv->txreq_cache);
+	priv->txreq_cache = NULL;
+	return -ENOMEM;
+}
+
+static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq)
+{
+	struct sdma_txreq *txreq;
+	struct sdma_txreq *txreq_tmp;
+	atomic64_t *complete_txreqs = &txq->complete_txreqs;
+
+	list_for_each_entry_safe(txreq, txreq_tmp, &txq->tx_list, list) {
+		struct ipoib_txreq *tx =
+			container_of(txreq, struct ipoib_txreq, txreq);
+
+		list_del(&txreq->list);
+		sdma_txclean(txq->priv->dd, &tx->txreq);
+		dev_kfree_skb_any(tx->skb);
+		kmem_cache_free(txq->priv->txreq_cache, tx);
+		atomic64_inc(complete_txreqs);
+	}
+
+	if (hfi1_ipoib_txreqs(txq->sent_txreqs, atomic64_read(complete_txreqs)))
+		dd_dev_warn(txq->priv->dd,
+			    "txq %d not empty found %llu requests\n",
+			    txq->q_idx,
+			    hfi1_ipoib_txreqs(txq->sent_txreqs,
+					      atomic64_read(complete_txreqs)));
+}
+
+void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->netdev->num_tx_queues; i++) {
+		struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+		iowait_cancel_work(&txq->wait);
+		iowait_sdma_drain(&txq->wait);
+		hfi1_ipoib_drain_tx_list(txq);
+		netif_napi_del(txq->napi);
+		(void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items);
+		vfree(txq->tx_ring.items);
+	}
+
+	kfree(priv->txqs);
+	priv->txqs = NULL;
+
+	kfree(priv->tx_napis);
+	priv->tx_napis = NULL;
+
+	kmem_cache_destroy(priv->txreq_cache);
+	priv->txreq_cache = NULL;
+}
+
+void hfi1_ipoib_napi_tx_enable(struct net_device *dev)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+		napi_enable(txq->napi);
+	}
+}
+
+void hfi1_ipoib_napi_tx_disable(struct net_device *dev)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+		napi_disable(txq->napi);
+		(void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items);
+	}
+}
diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c
index db82db497b2c..d61ee853d215 100644
--- a/drivers/infiniband/hw/hfi1/msix.c
+++ b/drivers/infiniband/hw/hfi1/msix.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
 /*
- * Copyright(c) 2018 Intel Corporation.
+ * Copyright(c) 2018 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -49,6 +49,7 @@
 #include "hfi.h"
 #include "affinity.h"
 #include "sdma.h"
+#include "netdev.h"
 
 /**
  * msix_initialize() - Calculate, request and configure MSIx IRQs
@@ -69,7 +70,7 @@ int msix_initialize(struct hfi1_devdata *dd)
 	 *	one for each VNIC context
 	 *      ...any new IRQs should be added here.
 	 */
-	total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
+	total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_netdev_contexts;
 
 	if (total >= CCE_NUM_MSIX_VECTORS)
 		return -EINVAL;
@@ -140,7 +141,7 @@ static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
 	ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name);
 	if (ret) {
 		dd_dev_err(dd,
-			   "%s: request for IRQ %d failed, MSIx %lu, err %d\n",
+			   "%s: request for IRQ %d failed, MSIx %lx, err %d\n",
 			   name, irq, nr, ret);
 		spin_lock(&dd->msix_info.msix_lock);
 		__clear_bit(nr, dd->msix_info.in_use_msix);
@@ -160,7 +161,7 @@ static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
 	/* This is a request, so a failure is not fatal */
 	ret = hfi1_get_irq_affinity(dd, me);
 	if (ret)
-		dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
+		dd_dev_err(dd, "%s: unable to pin IRQ %d\n", name, ret);
 
 	return nr;
 }
@@ -171,7 +172,8 @@ static int msix_request_rcd_irq_common(struct hfi1_ctxtdata *rcd,
 				       const char *name)
 {
 	int nr = msix_request_irq(rcd->dd, rcd, handler, thread,
-				  IRQ_RCVCTXT, name);
+				  rcd->is_vnic ? IRQ_NETDEVCTXT : IRQ_RCVCTXT,
+				  name);
 	if (nr < 0)
 		return nr;
 
@@ -204,6 +206,21 @@ int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
 }
 
 /**
+ * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
+ * for netdev context
+ * @rcd: valid netdev contexti
+ */
+int msix_netdev_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+{
+	char name[MAX_NAME_SIZE];
+
+	snprintf(name, sizeof(name), DRIVER_NAME "_%d nd kctxt%d",
+		 rcd->dd->unit, rcd->ctxt);
+	return msix_request_rcd_irq_common(rcd, receive_context_interrupt_napi,
+					   NULL, name);
+}
+
+/**
  * msix_request_smda_ira() - Helper for getting SDMA IRQ resources
  * @sde: valid sdma engine
  *
@@ -355,15 +372,16 @@ void msix_clean_up_interrupts(struct hfi1_devdata *dd)
 }
 
 /**
- * msix_vnic_syncrhonize_irq() - Vnic IRQ synchronize
+ * msix_netdev_syncrhonize_irq() - netdev IRQ synchronize
  * @dd: valid devdata
  */
-void msix_vnic_synchronize_irq(struct hfi1_devdata *dd)
+void msix_netdev_synchronize_irq(struct hfi1_devdata *dd)
 {
 	int i;
+	int ctxt_count = hfi1_netdev_ctxt_count(dd);
 
-	for (i = 0; i < dd->vnic.num_ctxt; i++) {
-		struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
+	for (i = 0; i < ctxt_count; i++) {
+		struct hfi1_ctxtdata *rcd = hfi1_netdev_get_ctxt(dd, i);
 		struct hfi1_msix_entry *me;
 
 		me = &dd->msix_info.msix_entries[rcd->msix_intr];
diff --git a/drivers/infiniband/hw/hfi1/msix.h b/drivers/infiniband/hw/hfi1/msix.h
index 1a02ab7971c8..e63e944bf0fc 100644
--- a/drivers/infiniband/hw/hfi1/msix.h
+++ b/drivers/infiniband/hw/hfi1/msix.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
 /*
- * Copyright(c) 2018 Intel Corporation.
+ * Copyright(c) 2018 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -59,7 +59,8 @@ int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd);
 int msix_request_sdma_irq(struct sdma_engine *sde);
 void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr);
 
-/* VNIC interface */
-void msix_vnic_synchronize_irq(struct hfi1_devdata *dd);
+/* Netdev interface */
+void msix_netdev_synchronize_irq(struct hfi1_devdata *dd);
+int msix_netdev_request_rcd_irq(struct hfi1_ctxtdata *rcd);
 
 #endif
diff --git a/drivers/infiniband/hw/hfi1/netdev.h b/drivers/infiniband/hw/hfi1/netdev.h
new file mode 100644
index 000000000000..947543a3e0c4
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/netdev.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+#ifndef HFI1_NETDEV_H
+#define HFI1_NETDEV_H
+
+#include "hfi.h"
+
+#include <linux/netdevice.h>
+#include <linux/xarray.h>
+
+/**
+ * struct hfi1_netdev_rxq - Receive Queue for HFI
+ * dummy netdev. Both IPoIB and VNIC netdevices will be working on
+ * top of this device.
+ * @napi: napi object
+ * @priv: ptr to netdev_priv
+ * @rcd:  ptr to receive context data
+ */
+struct hfi1_netdev_rxq {
+	struct napi_struct napi;
+	struct hfi1_netdev_priv *priv;
+	struct hfi1_ctxtdata *rcd;
+};
+
+/*
+ * Number of netdev contexts used. Ensure it is less than or equal to
+ * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
+ */
+#define HFI1_MAX_NETDEV_CTXTS   8
+
+/* Number of NETDEV RSM entries */
+#define NUM_NETDEV_MAP_ENTRIES HFI1_MAX_NETDEV_CTXTS
+
+/**
+ * struct hfi1_netdev_priv: data required to setup and run HFI netdev.
+ * @dd:		hfi1_devdata
+ * @rxq:	pointer to dummy netdev receive queues.
+ * @num_rx_q:	number of receive queues
+ * @rmt_index:	first free index in RMT Array
+ * @msix_start: first free MSI-X interrupt vector.
+ * @dev_tbl:	netdev table for unique identifier VNIC and IPoIb VLANs.
+ * @enabled:	atomic counter of netdevs enabling receive queues.
+ *		When 0 NAPI will be disabled.
+ * @netdevs:	atomic counter of netdevs using dummy netdev.
+ *		When 0 receive queues will be freed.
+ */
+struct hfi1_netdev_priv {
+	struct hfi1_devdata *dd;
+	struct hfi1_netdev_rxq *rxq;
+	int num_rx_q;
+	int rmt_start;
+	struct xarray dev_tbl;
+	/* count of enabled napi polls */
+	atomic_t enabled;
+	/* count of netdevs on top */
+	atomic_t netdevs;
+};
+
+static inline
+struct hfi1_netdev_priv *hfi1_netdev_priv(struct net_device *dev)
+{
+	return (struct hfi1_netdev_priv *)&dev[1];
+}
+
+static inline
+int hfi1_netdev_ctxt_count(struct hfi1_devdata *dd)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+	return priv->num_rx_q;
+}
+
+static inline
+struct hfi1_ctxtdata *hfi1_netdev_get_ctxt(struct hfi1_devdata *dd, int ctxt)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+	return priv->rxq[ctxt].rcd;
+}
+
+static inline
+int hfi1_netdev_get_free_rmt_idx(struct hfi1_devdata *dd)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+	return priv->rmt_start;
+}
+
+static inline
+void hfi1_netdev_set_free_rmt_idx(struct hfi1_devdata *dd, int rmt_idx)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+	priv->rmt_start = rmt_idx;
+}
+
+u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts,
+			     struct cpumask *cpu_mask);
+
+void hfi1_netdev_enable_queues(struct hfi1_devdata *dd);
+void hfi1_netdev_disable_queues(struct hfi1_devdata *dd);
+int hfi1_netdev_rx_init(struct hfi1_devdata *dd);
+int hfi1_netdev_rx_destroy(struct hfi1_devdata *dd);
+int hfi1_netdev_alloc(struct hfi1_devdata *dd);
+void hfi1_netdev_free(struct hfi1_devdata *dd);
+int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data);
+void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id);
+void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id);
+void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id);
+
+/* chip.c  */
+int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget);
+
+#endif /* HFI1_NETDEV_H */
diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c
new file mode 100644
index 000000000000..63688e85e8da
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/netdev_rx.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for netdev RX functionality
+ */
+
+#include "sdma.h"
+#include "verbs.h"
+#include "netdev.h"
+#include "hfi.h"
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <rdma/ib_verbs.h>
+
+static int hfi1_netdev_setup_ctxt(struct hfi1_netdev_priv *priv,
+				  struct hfi1_ctxtdata *uctxt)
+{
+	unsigned int rcvctrl_ops;
+	struct hfi1_devdata *dd = priv->dd;
+	int ret;
+
+	uctxt->rhf_rcv_function_map = netdev_rhf_rcv_functions;
+	uctxt->do_interrupt = &handle_receive_interrupt_napi_sp;
+
+	/* Now allocate the RcvHdr queue and eager buffers. */
+	ret = hfi1_create_rcvhdrq(dd, uctxt);
+	if (ret)
+		goto done;
+
+	ret = hfi1_setup_eagerbufs(uctxt);
+	if (ret)
+		goto done;
+
+	clear_rcvhdrtail(uctxt);
+
+	rcvctrl_ops = HFI1_RCVCTRL_CTXT_DIS;
+	rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_DIS;
+
+	if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
+		rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
+	if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
+		rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
+	if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
+		rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
+	if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
+		rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
+
+	hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
+done:
+	return ret;
+}
+
+static int hfi1_netdev_allocate_ctxt(struct hfi1_devdata *dd,
+				     struct hfi1_ctxtdata **ctxt)
+{
+	struct hfi1_ctxtdata *uctxt;
+	int ret;
+
+	if (dd->flags & HFI1_FROZEN)
+		return -EIO;
+
+	ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt);
+	if (ret < 0) {
+		dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
+		return -ENOMEM;
+	}
+
+	uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
+		HFI1_CAP_KGET(NODROP_RHQ_FULL) |
+		HFI1_CAP_KGET(NODROP_EGR_FULL) |
+		HFI1_CAP_KGET(DMA_RTAIL);
+	/* Netdev contexts are always NO_RDMA_RTAIL */
+	uctxt->fast_handler = handle_receive_interrupt_napi_fp;
+	uctxt->slow_handler = handle_receive_interrupt_napi_sp;
+	hfi1_set_seq_cnt(uctxt, 1);
+	uctxt->is_vnic = true;
+
+	hfi1_stats.sps_ctxts++;
+
+	dd_dev_info(dd, "created netdev context %d\n", uctxt->ctxt);
+	*ctxt = uctxt;
+
+	return 0;
+}
+
+static void hfi1_netdev_deallocate_ctxt(struct hfi1_devdata *dd,
+					struct hfi1_ctxtdata *uctxt)
+{
+	flush_wc();
+
+	/*
+	 * Disable receive context and interrupt available, reset all
+	 * RcvCtxtCtrl bits to default values.
+	 */
+	hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
+		     HFI1_RCVCTRL_TIDFLOW_DIS |
+		     HFI1_RCVCTRL_INTRAVAIL_DIS |
+		     HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
+		     HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
+		     HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
+
+	if (uctxt->msix_intr != CCE_NUM_MSIX_VECTORS)
+		msix_free_irq(dd, uctxt->msix_intr);
+
+	uctxt->msix_intr = CCE_NUM_MSIX_VECTORS;
+	uctxt->event_flags = 0;
+
+	hfi1_clear_tids(uctxt);
+	hfi1_clear_ctxt_pkey(dd, uctxt);
+
+	hfi1_stats.sps_ctxts--;
+
+	hfi1_free_ctxt(uctxt);
+}
+
+static int hfi1_netdev_allot_ctxt(struct hfi1_netdev_priv *priv,
+				  struct hfi1_ctxtdata **ctxt)
+{
+	int rc;
+	struct hfi1_devdata *dd = priv->dd;
+
+	rc = hfi1_netdev_allocate_ctxt(dd, ctxt);
+	if (rc) {
+		dd_dev_err(dd, "netdev ctxt alloc failed %d\n", rc);
+		return rc;
+	}
+
+	rc = hfi1_netdev_setup_ctxt(priv, *ctxt);
+	if (rc) {
+		dd_dev_err(dd, "netdev ctxt setup failed %d\n", rc);
+		hfi1_netdev_deallocate_ctxt(dd, *ctxt);
+		*ctxt = NULL;
+	}
+
+	return rc;
+}
+
+/**
+ * hfi1_num_netdev_contexts - Count of netdev recv contexts to use.
+ * @dd: device on which to allocate netdev contexts
+ * @available_contexts: count of available receive contexts
+ * @cpu_mask: mask of possible cpus to include for contexts
+ *
+ * Return: count of physical cores on a node or the remaining available recv
+ * contexts for netdev recv context usage up to the maximum of
+ * HFI1_MAX_NETDEV_CTXTS.
+ * A value of 0 can be returned when acceleration is explicitly turned off,
+ * a memory allocation error occurs or when there are no available contexts.
+ *
+ */
+u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts,
+			     struct cpumask *cpu_mask)
+{
+	cpumask_var_t node_cpu_mask;
+	unsigned int available_cpus;
+
+	if (!HFI1_CAP_IS_KSET(AIP))
+		return 0;
+
+	/* Always give user contexts priority over netdev contexts */
+	if (available_contexts == 0) {
+		dd_dev_info(dd, "No receive contexts available for netdevs.\n");
+		return 0;
+	}
+
+	if (!zalloc_cpumask_var(&node_cpu_mask, GFP_KERNEL)) {
+		dd_dev_err(dd, "Unable to allocate cpu_mask for netdevs.\n");
+		return 0;
+	}
+
+	cpumask_and(node_cpu_mask, cpu_mask,
+		    cpumask_of_node(pcibus_to_node(dd->pcidev->bus)));
+
+	available_cpus = cpumask_weight(node_cpu_mask);
+
+	free_cpumask_var(node_cpu_mask);
+
+	return min3(available_cpus, available_contexts,
+		    (u32)HFI1_MAX_NETDEV_CTXTS);
+}
+
+static int hfi1_netdev_rxq_init(struct net_device *dev)
+{
+	int i;
+	int rc;
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dev);
+	struct hfi1_devdata *dd = priv->dd;
+
+	priv->num_rx_q = dd->num_netdev_contexts;
+	priv->rxq = kcalloc_node(priv->num_rx_q, sizeof(struct hfi1_netdev_rxq),
+				 GFP_KERNEL, dd->node);
+
+	if (!priv->rxq) {
+		dd_dev_err(dd, "Unable to allocate netdev queue data\n");
+		return (-ENOMEM);
+	}
+
+	for (i = 0; i < priv->num_rx_q; i++) {
+		struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
+
+		rc = hfi1_netdev_allot_ctxt(priv, &rxq->rcd);
+		if (rc)
+			goto bail_context_irq_failure;
+
+		hfi1_rcd_get(rxq->rcd);
+		rxq->priv = priv;
+		rxq->rcd->napi = &rxq->napi;
+		dd_dev_info(dd, "Setting rcv queue %d napi to context %d\n",
+			    i, rxq->rcd->ctxt);
+		/*
+		 * Disable BUSY_POLL on this NAPI as this is not supported
+		 * right now.
+		 */
+		set_bit(NAPI_STATE_NO_BUSY_POLL, &rxq->napi.state);
+		netif_napi_add(dev, &rxq->napi, hfi1_netdev_rx_napi, 64);
+		rc = msix_netdev_request_rcd_irq(rxq->rcd);
+		if (rc)
+			goto bail_context_irq_failure;
+	}
+
+	return 0;
+
+bail_context_irq_failure:
+	dd_dev_err(dd, "Unable to allot receive context\n");
+	for (; i >= 0; i--) {
+		struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
+
+		if (rxq->rcd) {
+			hfi1_netdev_deallocate_ctxt(dd, rxq->rcd);
+			hfi1_rcd_put(rxq->rcd);
+			rxq->rcd = NULL;
+		}
+	}
+	kfree(priv->rxq);
+	priv->rxq = NULL;
+
+	return rc;
+}
+
+static void hfi1_netdev_rxq_deinit(struct net_device *dev)
+{
+	int i;
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dev);
+	struct hfi1_devdata *dd = priv->dd;
+
+	for (i = 0; i < priv->num_rx_q; i++) {
+		struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
+
+		netif_napi_del(&rxq->napi);
+		hfi1_netdev_deallocate_ctxt(dd, rxq->rcd);
+		hfi1_rcd_put(rxq->rcd);
+		rxq->rcd = NULL;
+	}
+
+	kfree(priv->rxq);
+	priv->rxq = NULL;
+	priv->num_rx_q = 0;
+}
+
+static void enable_queues(struct hfi1_netdev_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->num_rx_q; i++) {
+		struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
+
+		dd_dev_info(priv->dd, "enabling queue %d on context %d\n", i,
+			    rxq->rcd->ctxt);
+		napi_enable(&rxq->napi);
+		hfi1_rcvctrl(priv->dd,
+			     HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB,
+			     rxq->rcd);
+	}
+}
+
+static void disable_queues(struct hfi1_netdev_priv *priv)
+{
+	int i;
+
+	msix_netdev_synchronize_irq(priv->dd);
+
+	for (i = 0; i < priv->num_rx_q; i++) {
+		struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
+
+		dd_dev_info(priv->dd, "disabling queue %d on context %d\n", i,
+			    rxq->rcd->ctxt);
+
+		/* wait for napi if it was scheduled */
+		hfi1_rcvctrl(priv->dd,
+			     HFI1_RCVCTRL_CTXT_DIS | HFI1_RCVCTRL_INTRAVAIL_DIS,
+			     rxq->rcd);
+		napi_synchronize(&rxq->napi);
+		napi_disable(&rxq->napi);
+	}
+}
+
+/**
+ * hfi1_netdev_rx_init - Incrememnts netdevs counter. When called first time,
+ * it allocates receive queue data and calls netif_napi_add
+ * for each queue.
+ *
+ * @dd: hfi1 dev data
+ */
+int hfi1_netdev_rx_init(struct hfi1_devdata *dd)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+	int res;
+
+	if (atomic_fetch_inc(&priv->netdevs))
+		return 0;
+
+	mutex_lock(&hfi1_mutex);
+	init_dummy_netdev(dd->dummy_netdev);
+	res = hfi1_netdev_rxq_init(dd->dummy_netdev);
+	mutex_unlock(&hfi1_mutex);
+	return res;
+}
+
+/**
+ * hfi1_netdev_rx_destroy - Decrements netdevs counter, when it reaches 0
+ * napi is deleted and receive queses memory is freed.
+ *
+ * @dd: hfi1 dev data
+ */
+int hfi1_netdev_rx_destroy(struct hfi1_devdata *dd)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+	/* destroy the RX queues only if it is the last netdev going away */
+	if (atomic_fetch_add_unless(&priv->netdevs, -1, 0) == 1) {
+		mutex_lock(&hfi1_mutex);
+		hfi1_netdev_rxq_deinit(dd->dummy_netdev);
+		mutex_unlock(&hfi1_mutex);
+	}
+
+	return 0;
+}
+
+/**
+ * hfi1_netdev_alloc - Allocates netdev and private data. It is required
+ * because RMT index and MSI-X interrupt can be set only
+ * during driver initialization.
+ *
+ * @dd: hfi1 dev data
+ */
+int hfi1_netdev_alloc(struct hfi1_devdata *dd)
+{
+	struct hfi1_netdev_priv *priv;
+	const int netdev_size = sizeof(*dd->dummy_netdev) +
+		sizeof(struct hfi1_netdev_priv);
+
+	dd_dev_info(dd, "allocating netdev size %d\n", netdev_size);
+	dd->dummy_netdev = kcalloc_node(1, netdev_size, GFP_KERNEL, dd->node);
+
+	if (!dd->dummy_netdev)
+		return -ENOMEM;
+
+	priv = hfi1_netdev_priv(dd->dummy_netdev);
+	priv->dd = dd;
+	xa_init(&priv->dev_tbl);
+	atomic_set(&priv->enabled, 0);
+	atomic_set(&priv->netdevs, 0);
+
+	return 0;
+}
+
+void hfi1_netdev_free(struct hfi1_devdata *dd)
+{
+	if (dd->dummy_netdev) {
+		dd_dev_info(dd, "hfi1 netdev freed\n");
+		free_netdev(dd->dummy_netdev);
+		dd->dummy_netdev = NULL;
+	}
+}
+
+/**
+ * hfi1_netdev_enable_queues - This is napi enable function.
+ * It enables napi objects associated with queues.
+ * When at least one device has called it it increments atomic counter.
+ * Disable function decrements counter and when it is 0,
+ * calls napi_disable for every queue.
+ *
+ * @dd: hfi1 dev data
+ */
+void hfi1_netdev_enable_queues(struct hfi1_devdata *dd)
+{
+	struct hfi1_netdev_priv *priv;
+
+	if (!dd->dummy_netdev)
+		return;
+
+	priv = hfi1_netdev_priv(dd->dummy_netdev);
+	if (atomic_fetch_inc(&priv->enabled))
+		return;
+
+	mutex_lock(&hfi1_mutex);
+	enable_queues(priv);
+	mutex_unlock(&hfi1_mutex);
+}
+
+void hfi1_netdev_disable_queues(struct hfi1_devdata *dd)
+{
+	struct hfi1_netdev_priv *priv;
+
+	if (!dd->dummy_netdev)
+		return;
+
+	priv = hfi1_netdev_priv(dd->dummy_netdev);
+	if (atomic_dec_if_positive(&priv->enabled))
+		return;
+
+	mutex_lock(&hfi1_mutex);
+	disable_queues(priv);
+	mutex_unlock(&hfi1_mutex);
+}
+
+/**
+ * hfi1_netdev_add_data - Registers data with unique identifier
+ * to be requested later this is needed for VNIC and IPoIB VLANs
+ * implementations.
+ * This call is protected by mutex idr_lock.
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ * @data: data to be associated with index
+ */
+int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+	return xa_insert(&priv->dev_tbl, id, data, GFP_NOWAIT);
+}
+
+/**
+ * hfi1_netdev_remove_data - Removes data with previously given id.
+ * Returns the reference to removed entry.
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ */
+void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+	return xa_erase(&priv->dev_tbl, id);
+}
+
+/**
+ * hfi1_netdev_get_data - Gets data with given id
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ */
+void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+	return xa_load(&priv->dev_tbl, id);
+}
+
+/**
+ * hfi1_netdev_get_first_dat - Gets first entry with greater or equal id.
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ */
+void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+	unsigned long index = *start_id;
+	void *ret;
+
+	ret = xa_find(&priv->dev_tbl, &index, UINT_MAX, XA_PRESENT);
+	*start_id = (int)index;
+	return ret;
+}
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index f8e733aa3bb8..0c2ae9f7b3e8 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2019 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -186,15 +186,6 @@ static void flush_iowait(struct rvt_qp *qp)
 	write_sequnlock_irqrestore(lock, flags);
 }
 
-static inline int opa_mtu_enum_to_int(int mtu)
-{
-	switch (mtu) {
-	case OPA_MTU_8192:  return 8192;
-	case OPA_MTU_10240: return 10240;
-	default:            return -1;
-	}
-}
-
 /**
  * This function is what we would push to the core layer if we wanted to be a
  * "first class citizen".  Instead we hide this here and rely on Verbs ULPs
@@ -202,15 +193,10 @@ static inline int opa_mtu_enum_to_int(int mtu)
  */
 static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
 {
-	int val;
-
 	/* Constraining 10KB packets to 8KB packets */
 	if (mtu == (enum ib_mtu)OPA_MTU_10240)
 		mtu = OPA_MTU_8192;
-	val = opa_mtu_enum_to_int((int)mtu);
-	if (val > 0)
-		return val;
-	return ib_mtu_enum_to_int(mtu);
+	return opa_mtu_enum_to_int((enum opa_mtu)mtu);
 }
 
 int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 8a2e0d9351e9..243b4ba0b6f6 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
 /*
- * Copyright(c) 2018 Intel Corporation.
+ * Copyright(c) 2018 - 2020 Intel Corporation.
  *
  */
 
@@ -194,7 +194,7 @@ void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p)
 {
 	struct hfi1_qp_priv *priv = qp->priv;
 
-	p->qp = (kdeth_qp << 16) | priv->rcd->ctxt;
+	p->qp = (RVT_KDETH_QP_PREFIX << 16) | priv->rcd->ctxt;
 	p->max_len = TID_RDMA_MAX_SEGMENT_SIZE;
 	p->jkey = priv->rcd->jkey;
 	p->max_read = TID_RDMA_MAX_READ_SEGS_PER_REQ;
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 9a3d236bcc88..b219ea90fd6f 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -47,6 +47,7 @@
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 #include "exp_rcv.h"
+#include "ipoib.h"
 
 static u8 __get_ib_hdr_len(struct ib_header *hdr)
 {
@@ -126,6 +127,7 @@ const char *hfi1_trace_get_packet_l2_str(u8 l2)
 #define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x"
 #define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x"
 #define DETH_PRN "deth qkey:0x%.8x sqpn:0x%.6x"
+#define DETH_ENTROPY_PRN "deth qkey:0x%.8x sqpn:0x%.6x entropy:0x%.2x"
 #define IETH_PRN "ieth rkey:0x%.8x"
 #define ATOMICACKETH_PRN "origdata:%llx"
 #define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx"
@@ -444,6 +446,12 @@ const char *parse_everbs_hdrs(
 		break;
 	/* deth */
 	case OP(UD, SEND_ONLY):
+		trace_seq_printf(p, DETH_ENTROPY_PRN,
+				 be32_to_cpu(eh->ud.deth[0]),
+				 be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK,
+				 be32_to_cpu(eh->ud.deth[1]) >>
+					     HFI1_IPOIB_ENTROPY_SHIFT);
+		break;
 	case OP(UD, SEND_ONLY_WITH_IMMEDIATE):
 		trace_seq_printf(p, DETH_PRN,
 				 be32_to_cpu(eh->ud.deth[0]),
@@ -512,6 +520,38 @@ u16 hfi1_trace_get_tid_idx(u32 ent)
 	return EXP_TID_GET(ent, IDX);
 }
 
+struct hfi1_ctxt_hist {
+	atomic_t count;
+	atomic_t data[255];
+};
+
+struct hfi1_ctxt_hist hist = {
+	.count = ATOMIC_INIT(0)
+};
+
+const char *hfi1_trace_print_rsm_hist(struct trace_seq *p, unsigned int ctxt)
+{
+	int i, len = ARRAY_SIZE(hist.data);
+	const char *ret = trace_seq_buffer_ptr(p);
+	unsigned long packet_count = atomic_fetch_inc(&hist.count);
+
+	trace_seq_printf(p, "packet[%lu]", packet_count);
+	for (i = 0; i < len; ++i) {
+		unsigned long val;
+		atomic_t *count = &hist.data[i];
+
+		if (ctxt == i)
+			val = atomic_fetch_inc(count);
+		else
+			val = atomic_read(count);
+
+		if (val)
+			trace_seq_printf(p, "(%d:%lu)", i, val);
+	}
+	trace_seq_putc(p, 0);
+	return ret;
+}
+
 __hfi1_trace_fn(AFFINITY);
 __hfi1_trace_fn(PKT);
 __hfi1_trace_fn(PROC);
diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
index b5fc5c6cd52f..d8c168dc3ea8 100644
--- a/drivers/infiniband/hw/hfi1/trace_ctxts.h
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h
@@ -1,5 +1,5 @@
 /*
-* Copyright(c) 2015, 2016 Intel Corporation.
+* Copyright(c) 2015 - 2020 Intel Corporation.
 *
 * This file is provided under a dual BSD/GPLv2 license.  When using or
 * redistributing this file, you may do so under either license.
@@ -138,6 +138,15 @@ TRACE_EVENT(hfi1_ctxt_info,
 		      )
 );
 
+const char *hfi1_trace_print_rsm_hist(struct trace_seq *p, unsigned int ctxt);
+TRACE_EVENT(ctxt_rsm_hist,
+	    TP_PROTO(unsigned int ctxt),
+	    TP_ARGS(ctxt),
+	    TP_STRUCT__entry(__field(unsigned int, ctxt)),
+	    TP_fast_assign(__entry->ctxt = ctxt;),
+	    TP_printk("%s", hfi1_trace_print_rsm_hist(p, __entry->ctxt))
+);
+
 #endif /* __HFI1_TRACE_CTXTS_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 2f6323ad9c59..30865635b449 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -66,6 +66,7 @@
 #include "vnic.h"
 #include "fault.h"
 #include "affinity.h"
+#include "ipoib.h"
 
 static unsigned int hfi1_lkey_table_size = 16;
 module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
@@ -1342,7 +1343,7 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
 			IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
 			IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE |
 			IB_DEVICE_MEM_MGT_EXTENSIONS |
-			IB_DEVICE_RDMA_NETDEV_OPA_VNIC;
+			IB_DEVICE_RDMA_NETDEV_OPA;
 	rdi->dparms.props.page_size_cap = PAGE_SIZE;
 	rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
 	rdi->dparms.props.vendor_part_id = dd->pcidev->device;
@@ -1360,7 +1361,6 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
 	rdi->dparms.props.max_cq = hfi1_max_cqs;
 	rdi->dparms.props.max_ah = hfi1_max_ahs;
 	rdi->dparms.props.max_cqe = hfi1_max_cqes;
-	rdi->dparms.props.max_map_per_fmr = 32767;
 	rdi->dparms.props.max_pd = hfi1_max_pds;
 	rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC;
 	rdi->dparms.props.max_qp_init_rd_atom = 255;
@@ -1439,6 +1439,8 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
 				      4096 : hfi1_max_mtu), IB_MTU_4096);
 	props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
 		mtu_to_enum(ppd->ibmtu, IB_MTU_4096);
+	props->phys_mtu = HFI1_CAP_IS_KSET(AIP) ? hfi1_max_mtu :
+				ib_mtu_enum_to_int(props->max_mtu);
 
 	return 0;
 }
@@ -1793,6 +1795,7 @@ static const struct ib_device_ops hfi1_dev_ops = {
 	.modify_device = modify_device,
 	/* keep process mad in the driver */
 	.process_mad = hfi1_process_mad,
+	.rdma_netdev_get_params = hfi1_ipoib_rn_get_params,
 };
 
 /**
@@ -1863,9 +1866,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
 	dd->verbs_dev.rdi.dparms.qpn_start = 0;
 	dd->verbs_dev.rdi.dparms.qpn_inc = 1;
 	dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift;
-	dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16;
-	dd->verbs_dev.rdi.dparms.qpn_res_end =
-	dd->verbs_dev.rdi.dparms.qpn_res_start + 65535;
+	dd->verbs_dev.rdi.dparms.qpn_res_start = RVT_KDETH_QP_BASE;
+	dd->verbs_dev.rdi.dparms.qpn_res_end = RVT_AIP_QP_MAX;
 	dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC;
 	dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK;
 	dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT;
diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h
index 5ae781514e32..66150a13f374 100644
--- a/drivers/infiniband/hw/hfi1/vnic.h
+++ b/drivers/infiniband/hw/hfi1/vnic.h
@@ -1,7 +1,7 @@
 #ifndef _HFI1_VNIC_H
 #define _HFI1_VNIC_H
 /*
- * Copyright(c) 2017 Intel Corporation.
+ * Copyright(c) 2017 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -69,6 +69,7 @@
 #define HFI1_VNIC_SC_SHIFT      4
 
 #define HFI1_VNIC_MAX_QUEUE 16
+#define HFI1_NUM_VNIC_CTXT 8
 
 /**
  * struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information
@@ -104,7 +105,6 @@ struct hfi1_vnic_rx_queue {
 	struct hfi1_vnic_vport_info *vinfo;
 	struct net_device           *netdev;
 	struct napi_struct           napi;
-	struct sk_buff_head          skbq;
 };
 
 /**
@@ -146,7 +146,6 @@ struct hfi1_vnic_vport_info {
 
 /* vnic hfi1 internal functions */
 void hfi1_vnic_setup(struct hfi1_devdata *dd);
-void hfi1_vnic_cleanup(struct hfi1_devdata *dd);
 int hfi1_vnic_txreq_init(struct hfi1_devdata *dd);
 void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd);
 
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index 6b14581b9965..a90824de0f57 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2017 - 2018 Intel Corporation.
+ * Copyright(c) 2017 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -53,6 +53,7 @@
 #include <linux/if_vlan.h>
 
 #include "vnic.h"
+#include "netdev.h"
 
 #define HFI_TX_TIMEOUT_MS 1000
 
@@ -62,114 +63,6 @@
 
 static DEFINE_SPINLOCK(vport_cntr_lock);
 
-static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
-{
-	unsigned int rcvctrl_ops = 0;
-	int ret;
-
-	uctxt->do_interrupt = &handle_receive_interrupt;
-
-	/* Now allocate the RcvHdr queue and eager buffers. */
-	ret = hfi1_create_rcvhdrq(dd, uctxt);
-	if (ret)
-		goto done;
-
-	ret = hfi1_setup_eagerbufs(uctxt);
-	if (ret)
-		goto done;
-
-	if (hfi1_rcvhdrtail_kvaddr(uctxt))
-		clear_rcvhdrtail(uctxt);
-
-	rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
-	rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
-
-	if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
-		rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
-	if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
-		rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
-	if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
-		rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
-	if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
-		rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
-
-	hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
-done:
-	return ret;
-}
-
-static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
-			      struct hfi1_ctxtdata **vnic_ctxt)
-{
-	struct hfi1_ctxtdata *uctxt;
-	int ret;
-
-	if (dd->flags & HFI1_FROZEN)
-		return -EIO;
-
-	ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt);
-	if (ret < 0) {
-		dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
-		return -ENOMEM;
-	}
-
-	uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
-			HFI1_CAP_KGET(NODROP_RHQ_FULL) |
-			HFI1_CAP_KGET(NODROP_EGR_FULL) |
-			HFI1_CAP_KGET(DMA_RTAIL);
-	uctxt->seq_cnt = 1;
-	uctxt->is_vnic = true;
-
-	msix_request_rcd_irq(uctxt);
-
-	hfi1_stats.sps_ctxts++;
-	dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
-	*vnic_ctxt = uctxt;
-
-	return 0;
-}
-
-static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
-				 struct hfi1_ctxtdata *uctxt)
-{
-	dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
-	flush_wc();
-
-	/*
-	 * Disable receive context and interrupt available, reset all
-	 * RcvCtxtCtrl bits to default values.
-	 */
-	hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
-		     HFI1_RCVCTRL_TIDFLOW_DIS |
-		     HFI1_RCVCTRL_INTRAVAIL_DIS |
-		     HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
-		     HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
-		     HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
-
-	/* msix_intr will always be > 0, only clean up if this is true */
-	if (uctxt->msix_intr)
-		msix_free_irq(dd, uctxt->msix_intr);
-
-	uctxt->event_flags = 0;
-
-	hfi1_clear_tids(uctxt);
-	hfi1_clear_ctxt_pkey(dd, uctxt);
-
-	hfi1_stats.sps_ctxts--;
-
-	hfi1_free_ctxt(uctxt);
-}
-
-void hfi1_vnic_setup(struct hfi1_devdata *dd)
-{
-	xa_init(&dd->vnic.vesws);
-}
-
-void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
-{
-	WARN_ON(!xa_empty(&dd->vnic.vesws));
-}
-
 #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do {            \
 		u64 *src64, *dst64;                            \
 		for (src64 = &qstats->x_grp.unicast,           \
@@ -179,6 +72,9 @@ void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
 		}                                              \
 	} while (0)
 
+#define VNIC_MASK (0xFF)
+#define VNIC_ID(val) ((1ull << 24) | ((val) & VNIC_MASK))
+
 /* hfi1_vnic_update_stats - update statistics */
 static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
 				   struct opa_vnic_stats *stats)
@@ -454,71 +350,25 @@ static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
 	return rc;
 }
 
-static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
+static struct hfi1_vnic_vport_info *get_vnic_port(struct hfi1_devdata *dd,
+						  int vesw_id)
 {
-	unsigned char *pad_info;
-	struct sk_buff *skb;
+	int vnic_id = VNIC_ID(vesw_id);
 
-	skb = skb_dequeue(&rxq->skbq);
-	if (unlikely(!skb))
-		return NULL;
-
-	/* remove tail padding and icrc */
-	pad_info = skb->data + skb->len - 1;
-	skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
-		       ((*pad_info) & 0x7)));
-
-	return skb;
+	return hfi1_netdev_get_data(dd, vnic_id);
 }
 
-/* hfi1_vnic_handle_rx - handle skb receive */
-static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
-				int *work_done, int work_to_do)
+static struct hfi1_vnic_vport_info *get_first_vnic_port(struct hfi1_devdata *dd)
 {
-	struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
-	struct sk_buff *skb;
-	int rc;
-
-	while (1) {
-		if (*work_done >= work_to_do)
-			break;
-
-		skb = hfi1_vnic_get_skb(rxq);
-		if (unlikely(!skb))
-			break;
-
-		rc = hfi1_vnic_decap_skb(rxq, skb);
-		/* update rx counters */
-		hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
-		if (unlikely(rc)) {
-			dev_kfree_skb_any(skb);
-			continue;
-		}
-
-		skb_checksum_none_assert(skb);
-		skb->protocol = eth_type_trans(skb, rxq->netdev);
-
-		napi_gro_receive(&rxq->napi, skb);
-		(*work_done)++;
-	}
-}
-
-/* hfi1_vnic_napi - napi receive polling callback function */
-static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
-{
-	struct hfi1_vnic_rx_queue *rxq = container_of(napi,
-					      struct hfi1_vnic_rx_queue, napi);
-	struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
-	int work_done = 0;
+	struct hfi1_vnic_vport_info *vinfo;
+	int next_id = VNIC_ID(0);
 
-	v_dbg("napi %d budget %d\n", rxq->idx, budget);
-	hfi1_vnic_handle_rx(rxq, &work_done, budget);
+	vinfo = hfi1_netdev_get_first_data(dd, &next_id);
 
-	v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
-	if (work_done < budget)
-		napi_complete(napi);
+	if (next_id > VNIC_ID(VNIC_MASK))
+		return NULL;
 
-	return work_done;
+	return vinfo;
 }
 
 void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
@@ -527,13 +377,14 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
 	struct hfi1_vnic_vport_info *vinfo = NULL;
 	struct hfi1_vnic_rx_queue *rxq;
 	struct sk_buff *skb;
-	int l4_type, vesw_id = -1;
+	int l4_type, vesw_id = -1, rc;
 	u8 q_idx;
+	unsigned char *pad_info;
 
 	l4_type = hfi1_16B_get_l4(packet->ebuf);
 	if (likely(l4_type == OPA_16B_L4_ETHR)) {
 		vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
-		vinfo = xa_load(&dd->vnic.vesws, vesw_id);
+		vinfo = get_vnic_port(dd, vesw_id);
 
 		/*
 		 * In case of invalid vesw id, count the error on
@@ -541,10 +392,8 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
 		 */
 		if (unlikely(!vinfo)) {
 			struct hfi1_vnic_vport_info *vinfo_tmp;
-			unsigned long index = 0;
 
-			vinfo_tmp = xa_find(&dd->vnic.vesws, &index, ULONG_MAX,
-					XA_PRESENT);
+			vinfo_tmp = get_first_vnic_port(dd);
 			if (vinfo_tmp) {
 				spin_lock(&vport_cntr_lock);
 				vinfo_tmp->stats[0].netstats.rx_nohandler++;
@@ -563,12 +412,6 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
 	rxq = &vinfo->rxq[q_idx];
 	if (unlikely(!netif_oper_up(vinfo->netdev))) {
 		vinfo->stats[q_idx].rx_drop_state++;
-		skb_queue_purge(&rxq->skbq);
-		return;
-	}
-
-	if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
-		vinfo->stats[q_idx].netstats.rx_fifo_errors++;
 		return;
 	}
 
@@ -580,62 +423,65 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
 
 	memcpy(skb->data, packet->ebuf, packet->tlen);
 	skb_put(skb, packet->tlen);
-	skb_queue_tail(&rxq->skbq, skb);
 
-	if (napi_schedule_prep(&rxq->napi)) {
-		v_dbg("napi %d scheduling\n", q_idx);
-		__napi_schedule(&rxq->napi);
+	pad_info = skb->data + skb->len - 1;
+	skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
+		       ((*pad_info) & 0x7)));
+
+	rc = hfi1_vnic_decap_skb(rxq, skb);
+
+	/* update rx counters */
+	hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
+	if (unlikely(rc)) {
+		dev_kfree_skb_any(skb);
+		return;
 	}
+
+	skb_checksum_none_assert(skb);
+	skb->protocol = eth_type_trans(skb, rxq->netdev);
+
+	napi_gro_receive(&rxq->napi, skb);
 }
 
 static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
 {
 	struct hfi1_devdata *dd = vinfo->dd;
 	struct net_device *netdev = vinfo->netdev;
-	int i, rc;
+	int rc;
 
 	/* ensure virtual eth switch id is valid */
 	if (!vinfo->vesw_id)
 		return -EINVAL;
 
-	rc = xa_insert(&dd->vnic.vesws, vinfo->vesw_id, vinfo, GFP_KERNEL);
+	rc = hfi1_netdev_add_data(dd, VNIC_ID(vinfo->vesw_id), vinfo);
 	if (rc < 0)
 		return rc;
 
-	for (i = 0; i < vinfo->num_rx_q; i++) {
-		struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
-
-		skb_queue_head_init(&rxq->skbq);
-		napi_enable(&rxq->napi);
-	}
+	rc = hfi1_netdev_rx_init(dd);
+	if (rc)
+		goto err_remove;
 
 	netif_carrier_on(netdev);
 	netif_tx_start_all_queues(netdev);
 	set_bit(HFI1_VNIC_UP, &vinfo->flags);
 
 	return 0;
+
+err_remove:
+	hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id));
+	return rc;
 }
 
 static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
 {
 	struct hfi1_devdata *dd = vinfo->dd;
-	u8 i;
 
 	clear_bit(HFI1_VNIC_UP, &vinfo->flags);
 	netif_carrier_off(vinfo->netdev);
 	netif_tx_disable(vinfo->netdev);
-	xa_erase(&dd->vnic.vesws, vinfo->vesw_id);
-
-	/* ensure irqs see the change */
-	msix_vnic_synchronize_irq(dd);
+	hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id));
 
-	/* remove unread skbs */
-	for (i = 0; i < vinfo->num_rx_q; i++) {
-		struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
-
-		napi_disable(&rxq->napi);
-		skb_queue_purge(&rxq->skbq);
-	}
+	hfi1_netdev_rx_destroy(dd);
 }
 
 static int hfi1_netdev_open(struct net_device *netdev)
@@ -660,70 +506,31 @@ static int hfi1_netdev_close(struct net_device *netdev)
 	return 0;
 }
 
-static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
-				struct hfi1_ctxtdata **vnic_ctxt)
-{
-	int rc;
-
-	rc = allocate_vnic_ctxt(dd, vnic_ctxt);
-	if (rc) {
-		dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
-		return rc;
-	}
-
-	rc = setup_vnic_ctxt(dd, *vnic_ctxt);
-	if (rc) {
-		dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
-		deallocate_vnic_ctxt(dd, *vnic_ctxt);
-		*vnic_ctxt = NULL;
-	}
-
-	return rc;
-}
-
 static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
 {
 	struct hfi1_devdata *dd = vinfo->dd;
-	int i, rc = 0;
+	int rc = 0;
 
 	mutex_lock(&hfi1_mutex);
-	if (!dd->vnic.num_vports) {
+	if (!dd->vnic_num_vports) {
 		rc = hfi1_vnic_txreq_init(dd);
 		if (rc)
 			goto txreq_fail;
 	}
 
-	for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
-		rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
-		if (rc)
-			break;
-		hfi1_rcd_get(dd->vnic.ctxt[i]);
-		dd->vnic.ctxt[i]->vnic_q_idx = i;
-	}
-
-	if (i < vinfo->num_rx_q) {
-		/*
-		 * If required amount of contexts is not
-		 * allocated successfully then remaining contexts
-		 * are released.
-		 */
-		while (i-- > dd->vnic.num_ctxt) {
-			deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
-			hfi1_rcd_put(dd->vnic.ctxt[i]);
-			dd->vnic.ctxt[i] = NULL;
-		}
+	rc = hfi1_netdev_rx_init(dd);
+	if (rc) {
+		dd_dev_err(dd, "Unable to initialize netdev contexts\n");
 		goto alloc_fail;
 	}
 
-	if (dd->vnic.num_ctxt != i) {
-		dd->vnic.num_ctxt = i;
-		hfi1_init_vnic_rsm(dd);
-	}
+	hfi1_init_vnic_rsm(dd);
 
-	dd->vnic.num_vports++;
+	dd->vnic_num_vports++;
 	hfi1_vnic_sdma_init(vinfo);
+
 alloc_fail:
-	if (!dd->vnic.num_vports)
+	if (!dd->vnic_num_vports)
 		hfi1_vnic_txreq_deinit(dd);
 txreq_fail:
 	mutex_unlock(&hfi1_mutex);
@@ -733,20 +540,14 @@ txreq_fail:
 static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
 {
 	struct hfi1_devdata *dd = vinfo->dd;
-	int i;
 
 	mutex_lock(&hfi1_mutex);
-	if (--dd->vnic.num_vports == 0) {
-		for (i = 0; i < dd->vnic.num_ctxt; i++) {
-			deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
-			hfi1_rcd_put(dd->vnic.ctxt[i]);
-			dd->vnic.ctxt[i] = NULL;
-		}
+	if (--dd->vnic_num_vports == 0) {
 		hfi1_deinit_vnic_rsm(dd);
-		dd->vnic.num_ctxt = 0;
 		hfi1_vnic_txreq_deinit(dd);
 	}
 	mutex_unlock(&hfi1_mutex);
+	hfi1_netdev_rx_destroy(dd);
 }
 
 static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
@@ -804,7 +605,7 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
 	struct rdma_netdev *rn;
 	int i, size, rc;
 
-	if (!dd->num_vnic_contexts)
+	if (!dd->num_netdev_contexts)
 		return ERR_PTR(-ENOMEM);
 
 	if (!port_num || (port_num > dd->num_pports))
@@ -815,15 +616,16 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
 
 	size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
 	netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
-				  dd->num_sdma, dd->num_vnic_contexts);
+				  chip_sdma_engines(dd),
+				  dd->num_netdev_contexts);
 	if (!netdev)
 		return ERR_PTR(-ENOMEM);
 
 	rn = netdev_priv(netdev);
 	vinfo = opa_vnic_dev_priv(netdev);
 	vinfo->dd = dd;
-	vinfo->num_tx_q = dd->num_sdma;
-	vinfo->num_rx_q = dd->num_vnic_contexts;
+	vinfo->num_tx_q = chip_sdma_engines(dd);
+	vinfo->num_rx_q = dd->num_netdev_contexts;
 	vinfo->netdev = netdev;
 	rn->free_rdma_netdev = hfi1_vnic_free_rn;
 	rn->set_id = hfi1_vnic_set_vesw_id;
@@ -841,7 +643,6 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
 		rxq->idx = i;
 		rxq->vinfo = vinfo;
 		rxq->netdev = netdev;
-		netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
 	}
 
 	rc = hfi1_vnic_init(vinfo);