diff options
-rw-r--r-- | lib/netdev-dpdk.c | 335 |
1 files changed, 140 insertions, 195 deletions
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index e0a2dccf5..d70407a16 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -2563,90 +2563,6 @@ netdev_dpdk_vhost_update_tx_counters(struct netdev_dpdk *dev, } static void -__netdev_dpdk_vhost_send(struct netdev *netdev, int qid, - struct dp_packet **pkts, int cnt) -{ - struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); - struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts; - struct netdev_dpdk_sw_stats sw_stats_add; - unsigned int n_packets_to_free = cnt; - unsigned int total_packets = cnt; - int i, retries = 0; - int max_retries = VHOST_ENQ_RETRY_MIN; - int vid = netdev_dpdk_get_vid(dev); - - qid = dev->tx_q[qid % netdev->n_txq].map; - - if (OVS_UNLIKELY(vid < 0 || !dev->vhost_reconfigured || qid < 0 - || !(dev->flags & NETDEV_UP))) { - rte_spinlock_lock(&dev->stats_lock); - dev->stats.tx_dropped+= cnt; - rte_spinlock_unlock(&dev->stats_lock); - goto out; - } - - if (OVS_UNLIKELY(!rte_spinlock_trylock(&dev->tx_q[qid].tx_lock))) { - COVERAGE_INC(vhost_tx_contention); - rte_spinlock_lock(&dev->tx_q[qid].tx_lock); - } - - sw_stats_add.tx_invalid_hwol_drops = cnt; - if (userspace_tso_enabled()) { - cnt = netdev_dpdk_prep_hwol_batch(dev, cur_pkts, cnt); - } - - sw_stats_add.tx_invalid_hwol_drops -= cnt; - sw_stats_add.tx_mtu_exceeded_drops = cnt; - cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt); - sw_stats_add.tx_mtu_exceeded_drops -= cnt; - - /* Check has QoS has been configured for the netdev */ - sw_stats_add.tx_qos_drops = cnt; - cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true); - sw_stats_add.tx_qos_drops -= cnt; - - n_packets_to_free = cnt; - - do { - int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ; - unsigned int tx_pkts; - - tx_pkts = rte_vhost_enqueue_burst(vid, vhost_qid, cur_pkts, cnt); - if (OVS_LIKELY(tx_pkts)) { - /* Packets have been sent.*/ - cnt -= tx_pkts; - /* Prepare for possible retry.*/ - cur_pkts = &cur_pkts[tx_pkts]; - if (OVS_UNLIKELY(cnt && !retries)) { - /* - * Read max retries as there are packets not sent - * and no retries have already occurred. - */ - atomic_read_relaxed(&dev->vhost_tx_retries_max, &max_retries); - } - } else { - /* No packets sent - do not retry.*/ - break; - } - } while (cnt && (retries++ < max_retries)); - - rte_spinlock_unlock(&dev->tx_q[qid].tx_lock); - - sw_stats_add.tx_failure_drops = cnt; - sw_stats_add.tx_retries = MIN(retries, max_retries); - - rte_spinlock_lock(&dev->stats_lock); - netdev_dpdk_vhost_update_tx_counters(dev, pkts, total_packets, - &sw_stats_add); - rte_spinlock_unlock(&dev->stats_lock); - -out: - for (i = 0; i < n_packets_to_free; i++) { - dp_packet_delete(pkts[i]); - } -} - -static void netdev_dpdk_extbuf_free(void *addr OVS_UNUSED, void *opaque) { rte_free(opaque); @@ -2750,76 +2666,70 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig) return pkt_dest; } -/* Tx function. Transmit packets indefinitely */ -static void -dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) +/* Replace packets in a 'batch' with their corresponding copies using + * DPDK memory. + * + * Returns the number of good packets in the batch. */ +static size_t +dpdk_copy_batch_to_mbuf(struct netdev *netdev, struct dp_packet_batch *batch) OVS_NO_THREAD_SAFETY_ANALYSIS { - const size_t batch_cnt = dp_packet_batch_size(batch); -#if !defined(__CHECKER__) && !defined(_WIN32) - const size_t PKT_ARRAY_SIZE = batch_cnt; -#else - /* Sparse or MSVC doesn't like variable length array. */ - enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST }; -#endif struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); - struct dp_packet *pkts[PKT_ARRAY_SIZE]; - struct netdev_dpdk_sw_stats *sw_stats = dev->sw_stats; - uint32_t cnt = batch_cnt; - uint32_t dropped = 0; - uint32_t tx_failure = 0; - uint32_t mtu_drops = 0; - uint32_t qos_drops = 0; - - if (dev->type != DPDK_DEV_VHOST) { - /* Check if QoS has been configured for this netdev. */ - cnt = netdev_dpdk_qos_run(dev, (struct rte_mbuf **) batch->packets, - batch_cnt, false); - qos_drops = batch_cnt - cnt; - } - - uint32_t txcnt = 0; - - for (uint32_t i = 0; i < cnt; i++) { - struct dp_packet *packet = batch->packets[i]; - uint32_t size = dp_packet_size(packet); - - if (size > dev->max_packet_len - && !(packet->mbuf.ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { - VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d", size, - dev->max_packet_len); - mtu_drops++; - continue; - } + size_t i, size = dp_packet_batch_size(batch); + struct dp_packet *packet; - pkts[txcnt] = dpdk_copy_dp_packet_to_mbuf(dev->dpdk_mp->mp, packet); - if (OVS_UNLIKELY(!pkts[txcnt])) { - dropped = cnt - i; - break; - } + DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { + if (OVS_UNLIKELY(packet->source == DPBUF_DPDK)) { + dp_packet_batch_refill(batch, packet, i); + } else { + struct dp_packet *pktcopy; - txcnt++; - } + pktcopy = dpdk_copy_dp_packet_to_mbuf(dev->dpdk_mp->mp, packet); + if (pktcopy) { + dp_packet_batch_refill(batch, pktcopy, i); + } - if (OVS_LIKELY(txcnt)) { - if (dev->type == DPDK_DEV_VHOST) { - __netdev_dpdk_vhost_send(netdev, qid, pkts, txcnt); - } else { - tx_failure += netdev_dpdk_eth_tx_burst(dev, qid, - (struct rte_mbuf **)pkts, - txcnt); + dp_packet_delete(packet); } } - dropped += qos_drops + mtu_drops + tx_failure; - if (OVS_UNLIKELY(dropped)) { - rte_spinlock_lock(&dev->stats_lock); - dev->stats.tx_dropped += dropped; - sw_stats->tx_failure_drops += tx_failure; - sw_stats->tx_mtu_exceeded_drops += mtu_drops; - sw_stats->tx_qos_drops += qos_drops; - rte_spinlock_unlock(&dev->stats_lock); + return dp_packet_batch_size(batch); +} + +static size_t +netdev_dpdk_common_send(struct netdev *netdev, struct dp_packet_batch *batch, + struct netdev_dpdk_sw_stats *stats) +{ + struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets; + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + size_t cnt, pkt_cnt = dp_packet_batch_size(batch); + + memset(stats, 0, sizeof *stats); + + /* Copy dp-packets to mbufs. */ + if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) { + cnt = dpdk_copy_batch_to_mbuf(netdev, batch); + stats->tx_failure_drops += pkt_cnt - cnt; + pkt_cnt = cnt; } + + /* Drop oversized packets. */ + cnt = netdev_dpdk_filter_packet_len(dev, pkts, pkt_cnt); + stats->tx_mtu_exceeded_drops += pkt_cnt - cnt; + pkt_cnt = cnt; + + /* Prepare each mbuf for hardware offloading. */ + if (userspace_tso_enabled()) { + cnt = netdev_dpdk_prep_hwol_batch(dev, pkts, pkt_cnt); + stats->tx_invalid_hwol_drops += pkt_cnt - cnt; + pkt_cnt = cnt; + } + + /* Apply Quality of Service policy. */ + cnt = netdev_dpdk_qos_run(dev, pkts, pkt_cnt, true); + stats->tx_qos_drops += pkt_cnt - cnt; + + return cnt; } static int @@ -2827,25 +2737,89 @@ netdev_dpdk_vhost_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch, bool concurrent_txq OVS_UNUSED) { + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + int max_retries = VHOST_ENQ_RETRY_MIN; + int cnt, batch_cnt, vhost_batch_cnt; + int vid = netdev_dpdk_get_vid(dev); + struct netdev_dpdk_sw_stats stats; + struct rte_mbuf **pkts; + int retries; - if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) { - dpdk_do_tx_copy(netdev, qid, batch); + batch_cnt = cnt = dp_packet_batch_size(batch); + qid = dev->tx_q[qid % netdev->n_txq].map; + if (OVS_UNLIKELY(vid < 0 || !dev->vhost_reconfigured || qid < 0 + || !(dev->flags & NETDEV_UP))) { + rte_spinlock_lock(&dev->stats_lock); + dev->stats.tx_dropped += cnt; + rte_spinlock_unlock(&dev->stats_lock); dp_packet_delete_batch(batch, true); - } else { - __netdev_dpdk_vhost_send(netdev, qid, batch->packets, - dp_packet_batch_size(batch)); + return 0; + } + + if (OVS_UNLIKELY(!rte_spinlock_trylock(&dev->tx_q[qid].tx_lock))) { + COVERAGE_INC(vhost_tx_contention); + rte_spinlock_lock(&dev->tx_q[qid].tx_lock); } + + cnt = netdev_dpdk_common_send(netdev, batch, &stats); + + pkts = (struct rte_mbuf **) batch->packets; + vhost_batch_cnt = cnt; + retries = 0; + do { + int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ; + int tx_pkts; + + tx_pkts = rte_vhost_enqueue_burst(vid, vhost_qid, pkts, cnt); + if (OVS_LIKELY(tx_pkts)) { + /* Packets have been sent.*/ + cnt -= tx_pkts; + /* Prepare for possible retry.*/ + pkts = &pkts[tx_pkts]; + if (OVS_UNLIKELY(cnt && !retries)) { + /* + * Read max retries as there are packets not sent + * and no retries have already occurred. + */ + atomic_read_relaxed(&dev->vhost_tx_retries_max, &max_retries); + } + } else { + /* No packets sent - do not retry.*/ + break; + } + } while (cnt && (retries++ < max_retries)); + + rte_spinlock_unlock(&dev->tx_q[qid].tx_lock); + + stats.tx_failure_drops += cnt; + stats.tx_retries = MIN(retries, max_retries); + + rte_spinlock_lock(&dev->stats_lock); + netdev_dpdk_vhost_update_tx_counters(dev, batch->packets, batch_cnt, + &stats); + rte_spinlock_unlock(&dev->stats_lock); + + pkts = (struct rte_mbuf **) batch->packets; + for (int i = 0; i < vhost_batch_cnt; i++) { + rte_pktmbuf_free(pkts[i]); + } + return 0; } -static inline void -netdev_dpdk_send__(struct netdev_dpdk *dev, int qid, - struct dp_packet_batch *batch, - bool concurrent_txq) +static int +netdev_dpdk_eth_send(struct netdev *netdev, int qid, + struct dp_packet_batch *batch, bool concurrent_txq) { + struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets; + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + int batch_cnt = dp_packet_batch_size(batch); + struct netdev_dpdk_sw_stats stats; + int cnt, dropped; + if (OVS_UNLIKELY(!(dev->flags & NETDEV_UP))) { dp_packet_delete_batch(batch, true); - return; + return 0; } if (OVS_UNLIKELY(concurrent_txq)) { @@ -2853,56 +2827,27 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid, rte_spinlock_lock(&dev->tx_q[qid].tx_lock); } - if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) { - struct netdev *netdev = &dev->up; + cnt = netdev_dpdk_common_send(netdev, batch, &stats); - dpdk_do_tx_copy(netdev, qid, batch); - dp_packet_delete_batch(batch, true); - } else { + dropped = batch_cnt - cnt; + + dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt); + if (OVS_UNLIKELY(dropped)) { struct netdev_dpdk_sw_stats *sw_stats = dev->sw_stats; - int dropped; - int tx_failure, mtu_drops, qos_drops, hwol_drops; - int batch_cnt = dp_packet_batch_size(batch); - struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets; - hwol_drops = batch_cnt; - if (userspace_tso_enabled()) { - batch_cnt = netdev_dpdk_prep_hwol_batch(dev, pkts, batch_cnt); - } - hwol_drops -= batch_cnt; - mtu_drops = batch_cnt; - batch_cnt = netdev_dpdk_filter_packet_len(dev, pkts, batch_cnt); - mtu_drops -= batch_cnt; - qos_drops = batch_cnt; - batch_cnt = netdev_dpdk_qos_run(dev, pkts, batch_cnt, true); - qos_drops -= batch_cnt; - - tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, batch_cnt); - - dropped = tx_failure + mtu_drops + qos_drops + hwol_drops; - if (OVS_UNLIKELY(dropped)) { - rte_spinlock_lock(&dev->stats_lock); - dev->stats.tx_dropped += dropped; - sw_stats->tx_failure_drops += tx_failure; - sw_stats->tx_mtu_exceeded_drops += mtu_drops; - sw_stats->tx_qos_drops += qos_drops; - sw_stats->tx_invalid_hwol_drops += hwol_drops; - rte_spinlock_unlock(&dev->stats_lock); - } + rte_spinlock_lock(&dev->stats_lock); + dev->stats.tx_dropped += dropped; + sw_stats->tx_failure_drops += stats.tx_failure_drops; + sw_stats->tx_mtu_exceeded_drops += stats.tx_mtu_exceeded_drops; + sw_stats->tx_qos_drops += stats.tx_qos_drops; + sw_stats->tx_invalid_hwol_drops += stats.tx_invalid_hwol_drops; + rte_spinlock_unlock(&dev->stats_lock); } if (OVS_UNLIKELY(concurrent_txq)) { rte_spinlock_unlock(&dev->tx_q[qid].tx_lock); } -} - -static int -netdev_dpdk_eth_send(struct netdev *netdev, int qid, - struct dp_packet_batch *batch, bool concurrent_txq) -{ - struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); - netdev_dpdk_send__(dev, qid, batch, concurrent_txq); return 0; } |