summaryrefslogtreecommitdiff
path: root/lib/netdev-dpdk.c
diff options
context:
space:
mode:
authorFlavio Leitner <fbl@sysclose.org>2022-04-28 09:39:38 -0400
committerIlya Maximets <i.maximets@ovn.org>2022-05-04 20:01:59 +0200
commit29b94e12d57db7813d6fea53e3351d82501abcc3 (patch)
treed6aa670f97eb388321ab1a4fe5014ba010ffba8b /lib/netdev-dpdk.c
parent8c506d3725b6bdba5ee192d15378abbc83f41170 (diff)
downloadopenvswitch-29b94e12d57db7813d6fea53e3351d82501abcc3.tar.gz
netdev-dpdk: Refactor the DPDK transmit path.
This patch split out the common code between vhost and dpdk transmit paths to shared functions to simplify the code and fix an issue. The issue is that the packet coming from non-DPDK device and egressing on a DPDK device currently skips the hwol preparation. Signed-off-by: Flavio Leitner <fbl@sysclose.org> Signed-off-by: Mike Pattrick <mkp@redhat.com> Co-authored-by: Mike Pattrick <mkp@redhat.com> Reviewed-by: David Marchand <david.marchand@redhat.com> Acked-by: Sunil Pai G <sunil.pai.g@intel.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Diffstat (limited to 'lib/netdev-dpdk.c')
-rw-r--r--lib/netdev-dpdk.c335
1 files changed, 140 insertions, 195 deletions
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index e0a2dccf5..d70407a16 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -2563,90 +2563,6 @@ netdev_dpdk_vhost_update_tx_counters(struct netdev_dpdk *dev,
}
static void
-__netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
- struct dp_packet **pkts, int cnt)
-{
- struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
- struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
- struct netdev_dpdk_sw_stats sw_stats_add;
- unsigned int n_packets_to_free = cnt;
- unsigned int total_packets = cnt;
- int i, retries = 0;
- int max_retries = VHOST_ENQ_RETRY_MIN;
- int vid = netdev_dpdk_get_vid(dev);
-
- qid = dev->tx_q[qid % netdev->n_txq].map;
-
- if (OVS_UNLIKELY(vid < 0 || !dev->vhost_reconfigured || qid < 0
- || !(dev->flags & NETDEV_UP))) {
- rte_spinlock_lock(&dev->stats_lock);
- dev->stats.tx_dropped+= cnt;
- rte_spinlock_unlock(&dev->stats_lock);
- goto out;
- }
-
- if (OVS_UNLIKELY(!rte_spinlock_trylock(&dev->tx_q[qid].tx_lock))) {
- COVERAGE_INC(vhost_tx_contention);
- rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
- }
-
- sw_stats_add.tx_invalid_hwol_drops = cnt;
- if (userspace_tso_enabled()) {
- cnt = netdev_dpdk_prep_hwol_batch(dev, cur_pkts, cnt);
- }
-
- sw_stats_add.tx_invalid_hwol_drops -= cnt;
- sw_stats_add.tx_mtu_exceeded_drops = cnt;
- cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
- sw_stats_add.tx_mtu_exceeded_drops -= cnt;
-
- /* Check has QoS has been configured for the netdev */
- sw_stats_add.tx_qos_drops = cnt;
- cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
- sw_stats_add.tx_qos_drops -= cnt;
-
- n_packets_to_free = cnt;
-
- do {
- int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
- unsigned int tx_pkts;
-
- tx_pkts = rte_vhost_enqueue_burst(vid, vhost_qid, cur_pkts, cnt);
- if (OVS_LIKELY(tx_pkts)) {
- /* Packets have been sent.*/
- cnt -= tx_pkts;
- /* Prepare for possible retry.*/
- cur_pkts = &cur_pkts[tx_pkts];
- if (OVS_UNLIKELY(cnt && !retries)) {
- /*
- * Read max retries as there are packets not sent
- * and no retries have already occurred.
- */
- atomic_read_relaxed(&dev->vhost_tx_retries_max, &max_retries);
- }
- } else {
- /* No packets sent - do not retry.*/
- break;
- }
- } while (cnt && (retries++ < max_retries));
-
- rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
-
- sw_stats_add.tx_failure_drops = cnt;
- sw_stats_add.tx_retries = MIN(retries, max_retries);
-
- rte_spinlock_lock(&dev->stats_lock);
- netdev_dpdk_vhost_update_tx_counters(dev, pkts, total_packets,
- &sw_stats_add);
- rte_spinlock_unlock(&dev->stats_lock);
-
-out:
- for (i = 0; i < n_packets_to_free; i++) {
- dp_packet_delete(pkts[i]);
- }
-}
-
-static void
netdev_dpdk_extbuf_free(void *addr OVS_UNUSED, void *opaque)
{
rte_free(opaque);
@@ -2750,76 +2666,70 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig)
return pkt_dest;
}
-/* Tx function. Transmit packets indefinitely */
-static void
-dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
+/* Replace packets in a 'batch' with their corresponding copies using
+ * DPDK memory.
+ *
+ * Returns the number of good packets in the batch. */
+static size_t
+dpdk_copy_batch_to_mbuf(struct netdev *netdev, struct dp_packet_batch *batch)
OVS_NO_THREAD_SAFETY_ANALYSIS
{
- const size_t batch_cnt = dp_packet_batch_size(batch);
-#if !defined(__CHECKER__) && !defined(_WIN32)
- const size_t PKT_ARRAY_SIZE = batch_cnt;
-#else
- /* Sparse or MSVC doesn't like variable length array. */
- enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
-#endif
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
- struct dp_packet *pkts[PKT_ARRAY_SIZE];
- struct netdev_dpdk_sw_stats *sw_stats = dev->sw_stats;
- uint32_t cnt = batch_cnt;
- uint32_t dropped = 0;
- uint32_t tx_failure = 0;
- uint32_t mtu_drops = 0;
- uint32_t qos_drops = 0;
-
- if (dev->type != DPDK_DEV_VHOST) {
- /* Check if QoS has been configured for this netdev. */
- cnt = netdev_dpdk_qos_run(dev, (struct rte_mbuf **) batch->packets,
- batch_cnt, false);
- qos_drops = batch_cnt - cnt;
- }
-
- uint32_t txcnt = 0;
-
- for (uint32_t i = 0; i < cnt; i++) {
- struct dp_packet *packet = batch->packets[i];
- uint32_t size = dp_packet_size(packet);
-
- if (size > dev->max_packet_len
- && !(packet->mbuf.ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
- VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d", size,
- dev->max_packet_len);
- mtu_drops++;
- continue;
- }
+ size_t i, size = dp_packet_batch_size(batch);
+ struct dp_packet *packet;
- pkts[txcnt] = dpdk_copy_dp_packet_to_mbuf(dev->dpdk_mp->mp, packet);
- if (OVS_UNLIKELY(!pkts[txcnt])) {
- dropped = cnt - i;
- break;
- }
+ DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) {
+ if (OVS_UNLIKELY(packet->source == DPBUF_DPDK)) {
+ dp_packet_batch_refill(batch, packet, i);
+ } else {
+ struct dp_packet *pktcopy;
- txcnt++;
- }
+ pktcopy = dpdk_copy_dp_packet_to_mbuf(dev->dpdk_mp->mp, packet);
+ if (pktcopy) {
+ dp_packet_batch_refill(batch, pktcopy, i);
+ }
- if (OVS_LIKELY(txcnt)) {
- if (dev->type == DPDK_DEV_VHOST) {
- __netdev_dpdk_vhost_send(netdev, qid, pkts, txcnt);
- } else {
- tx_failure += netdev_dpdk_eth_tx_burst(dev, qid,
- (struct rte_mbuf **)pkts,
- txcnt);
+ dp_packet_delete(packet);
}
}
- dropped += qos_drops + mtu_drops + tx_failure;
- if (OVS_UNLIKELY(dropped)) {
- rte_spinlock_lock(&dev->stats_lock);
- dev->stats.tx_dropped += dropped;
- sw_stats->tx_failure_drops += tx_failure;
- sw_stats->tx_mtu_exceeded_drops += mtu_drops;
- sw_stats->tx_qos_drops += qos_drops;
- rte_spinlock_unlock(&dev->stats_lock);
+ return dp_packet_batch_size(batch);
+}
+
+static size_t
+netdev_dpdk_common_send(struct netdev *netdev, struct dp_packet_batch *batch,
+ struct netdev_dpdk_sw_stats *stats)
+{
+ struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets;
+ struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+ size_t cnt, pkt_cnt = dp_packet_batch_size(batch);
+
+ memset(stats, 0, sizeof *stats);
+
+ /* Copy dp-packets to mbufs. */
+ if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
+ cnt = dpdk_copy_batch_to_mbuf(netdev, batch);
+ stats->tx_failure_drops += pkt_cnt - cnt;
+ pkt_cnt = cnt;
}
+
+ /* Drop oversized packets. */
+ cnt = netdev_dpdk_filter_packet_len(dev, pkts, pkt_cnt);
+ stats->tx_mtu_exceeded_drops += pkt_cnt - cnt;
+ pkt_cnt = cnt;
+
+ /* Prepare each mbuf for hardware offloading. */
+ if (userspace_tso_enabled()) {
+ cnt = netdev_dpdk_prep_hwol_batch(dev, pkts, pkt_cnt);
+ stats->tx_invalid_hwol_drops += pkt_cnt - cnt;
+ pkt_cnt = cnt;
+ }
+
+ /* Apply Quality of Service policy. */
+ cnt = netdev_dpdk_qos_run(dev, pkts, pkt_cnt, true);
+ stats->tx_qos_drops += pkt_cnt - cnt;
+
+ return cnt;
}
static int
@@ -2827,25 +2737,89 @@ netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
struct dp_packet_batch *batch,
bool concurrent_txq OVS_UNUSED)
{
+ struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+ int max_retries = VHOST_ENQ_RETRY_MIN;
+ int cnt, batch_cnt, vhost_batch_cnt;
+ int vid = netdev_dpdk_get_vid(dev);
+ struct netdev_dpdk_sw_stats stats;
+ struct rte_mbuf **pkts;
+ int retries;
- if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
- dpdk_do_tx_copy(netdev, qid, batch);
+ batch_cnt = cnt = dp_packet_batch_size(batch);
+ qid = dev->tx_q[qid % netdev->n_txq].map;
+ if (OVS_UNLIKELY(vid < 0 || !dev->vhost_reconfigured || qid < 0
+ || !(dev->flags & NETDEV_UP))) {
+ rte_spinlock_lock(&dev->stats_lock);
+ dev->stats.tx_dropped += cnt;
+ rte_spinlock_unlock(&dev->stats_lock);
dp_packet_delete_batch(batch, true);
- } else {
- __netdev_dpdk_vhost_send(netdev, qid, batch->packets,
- dp_packet_batch_size(batch));
+ return 0;
+ }
+
+ if (OVS_UNLIKELY(!rte_spinlock_trylock(&dev->tx_q[qid].tx_lock))) {
+ COVERAGE_INC(vhost_tx_contention);
+ rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
}
+
+ cnt = netdev_dpdk_common_send(netdev, batch, &stats);
+
+ pkts = (struct rte_mbuf **) batch->packets;
+ vhost_batch_cnt = cnt;
+ retries = 0;
+ do {
+ int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
+ int tx_pkts;
+
+ tx_pkts = rte_vhost_enqueue_burst(vid, vhost_qid, pkts, cnt);
+ if (OVS_LIKELY(tx_pkts)) {
+ /* Packets have been sent.*/
+ cnt -= tx_pkts;
+ /* Prepare for possible retry.*/
+ pkts = &pkts[tx_pkts];
+ if (OVS_UNLIKELY(cnt && !retries)) {
+ /*
+ * Read max retries as there are packets not sent
+ * and no retries have already occurred.
+ */
+ atomic_read_relaxed(&dev->vhost_tx_retries_max, &max_retries);
+ }
+ } else {
+ /* No packets sent - do not retry.*/
+ break;
+ }
+ } while (cnt && (retries++ < max_retries));
+
+ rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
+
+ stats.tx_failure_drops += cnt;
+ stats.tx_retries = MIN(retries, max_retries);
+
+ rte_spinlock_lock(&dev->stats_lock);
+ netdev_dpdk_vhost_update_tx_counters(dev, batch->packets, batch_cnt,
+ &stats);
+ rte_spinlock_unlock(&dev->stats_lock);
+
+ pkts = (struct rte_mbuf **) batch->packets;
+ for (int i = 0; i < vhost_batch_cnt; i++) {
+ rte_pktmbuf_free(pkts[i]);
+ }
+
return 0;
}
-static inline void
-netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
- struct dp_packet_batch *batch,
- bool concurrent_txq)
+static int
+netdev_dpdk_eth_send(struct netdev *netdev, int qid,
+ struct dp_packet_batch *batch, bool concurrent_txq)
{
+ struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets;
+ struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+ int batch_cnt = dp_packet_batch_size(batch);
+ struct netdev_dpdk_sw_stats stats;
+ int cnt, dropped;
+
if (OVS_UNLIKELY(!(dev->flags & NETDEV_UP))) {
dp_packet_delete_batch(batch, true);
- return;
+ return 0;
}
if (OVS_UNLIKELY(concurrent_txq)) {
@@ -2853,56 +2827,27 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
}
- if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
- struct netdev *netdev = &dev->up;
+ cnt = netdev_dpdk_common_send(netdev, batch, &stats);
- dpdk_do_tx_copy(netdev, qid, batch);
- dp_packet_delete_batch(batch, true);
- } else {
+ dropped = batch_cnt - cnt;
+
+ dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt);
+ if (OVS_UNLIKELY(dropped)) {
struct netdev_dpdk_sw_stats *sw_stats = dev->sw_stats;
- int dropped;
- int tx_failure, mtu_drops, qos_drops, hwol_drops;
- int batch_cnt = dp_packet_batch_size(batch);
- struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets;
- hwol_drops = batch_cnt;
- if (userspace_tso_enabled()) {
- batch_cnt = netdev_dpdk_prep_hwol_batch(dev, pkts, batch_cnt);
- }
- hwol_drops -= batch_cnt;
- mtu_drops = batch_cnt;
- batch_cnt = netdev_dpdk_filter_packet_len(dev, pkts, batch_cnt);
- mtu_drops -= batch_cnt;
- qos_drops = batch_cnt;
- batch_cnt = netdev_dpdk_qos_run(dev, pkts, batch_cnt, true);
- qos_drops -= batch_cnt;
-
- tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, batch_cnt);
-
- dropped = tx_failure + mtu_drops + qos_drops + hwol_drops;
- if (OVS_UNLIKELY(dropped)) {
- rte_spinlock_lock(&dev->stats_lock);
- dev->stats.tx_dropped += dropped;
- sw_stats->tx_failure_drops += tx_failure;
- sw_stats->tx_mtu_exceeded_drops += mtu_drops;
- sw_stats->tx_qos_drops += qos_drops;
- sw_stats->tx_invalid_hwol_drops += hwol_drops;
- rte_spinlock_unlock(&dev->stats_lock);
- }
+ rte_spinlock_lock(&dev->stats_lock);
+ dev->stats.tx_dropped += dropped;
+ sw_stats->tx_failure_drops += stats.tx_failure_drops;
+ sw_stats->tx_mtu_exceeded_drops += stats.tx_mtu_exceeded_drops;
+ sw_stats->tx_qos_drops += stats.tx_qos_drops;
+ sw_stats->tx_invalid_hwol_drops += stats.tx_invalid_hwol_drops;
+ rte_spinlock_unlock(&dev->stats_lock);
}
if (OVS_UNLIKELY(concurrent_txq)) {
rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
}
-}
-
-static int
-netdev_dpdk_eth_send(struct netdev *netdev, int qid,
- struct dp_packet_batch *batch, bool concurrent_txq)
-{
- struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
- netdev_dpdk_send__(dev, qid, batch, concurrent_txq);
return 0;
}