summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorBen Pfaff <blp@ovn.org>2017-09-12 07:12:53 -0700
committerBen Pfaff <blp@ovn.org>2017-09-12 07:12:53 -0700
commita5781d3270e6b94c13a063cead664e5553b6cb70 (patch)
tree2830592d6baf032b0f902f189863c4782a21af51 /lib
parent3a0c5805f90f141ec138ec71d4ec94d84db30531 (diff)
parent3e90f7d753366a27659db71a96798fa89e2a6cd4 (diff)
downloadopenvswitch-a5781d3270e6b94c13a063cead664e5553b6cb70.tar.gz
Merge branch 'dpdk_merge' of https://github.com/darball/ovs.
Diffstat (limited to 'lib')
-rw-r--r--lib/dpif-netdev.c43
-rw-r--r--lib/netdev-dpdk.c236
2 files changed, 155 insertions, 124 deletions
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 0ceef9d82..ca74df8c7 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -796,7 +796,7 @@ pmd_info_show_stats(struct ds *reply,
unsigned long long stats[DP_N_STATS],
uint64_t cycles[PMD_N_CYCLES])
{
- unsigned long long total_packets = 0;
+ unsigned long long total_packets;
uint64_t total_cycles = 0;
int i;
@@ -812,13 +812,12 @@ pmd_info_show_stats(struct ds *reply,
} else {
stats[i] = 0;
}
-
- if (i != DP_STAT_LOST) {
- /* Lost packets are already included in DP_STAT_MISS */
- total_packets += stats[i];
- }
}
+ /* Sum of all the matched and not matched packets gives the total. */
+ total_packets = stats[DP_STAT_EXACT_HIT] + stats[DP_STAT_MASKED_HIT]
+ + stats[DP_STAT_MISS];
+
for (i = 0; i < PMD_N_CYCLES; i++) {
if (cycles[i] > pmd->cycles_zero[i]) {
cycles[i] -= pmd->cycles_zero[i];
@@ -3428,8 +3427,8 @@ rr_numa_list_destroy(struct rr_numa_list *rr)
static int
rxq_cycle_sort(const void *a, const void *b)
{
- struct dp_netdev_rxq * qa;
- struct dp_netdev_rxq * qb;
+ struct dp_netdev_rxq *qa;
+ struct dp_netdev_rxq *qb;
uint64_t total_qa, total_qb;
unsigned i;
@@ -3864,9 +3863,10 @@ dpif_netdev_run(struct dpif *dpif)
dp_netdev_process_rxq_port(non_pmd,
port->rxqs[i].rx,
port->port_no);
- cycles_count_intermediate(non_pmd, NULL, process_packets ?
- PMD_CYCLES_PROCESSING
- : PMD_CYCLES_IDLE);
+ cycles_count_intermediate(non_pmd, NULL,
+ process_packets
+ ? PMD_CYCLES_PROCESSING
+ : PMD_CYCLES_IDLE);
}
}
}
@@ -4858,8 +4858,11 @@ dp_netdev_queue_batches(struct dp_packet *pkt,
* The function returns the number of packets that needs to be processed in the
* 'packets' array (they have been moved to the beginning of the vector).
*
- * If 'md_is_valid' is false, the metadata in 'packets' is not valid and must
- * be initialized by this function using 'port_no'.
+ * For performance reasons a caller may choose not to initialize the metadata
+ * in 'packets_'. If 'md_is_valid' is false, the metadata in 'packets'
+ * is not valid and must be initialized by this function using 'port_no'.
+ * If 'md_is_valid' is true, the metadata is already valid and 'port_no'
+ * will be ignored.
*/
static inline size_t
emc_processing(struct dp_netdev_pmd_thread *pmd,
@@ -4872,13 +4875,13 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
struct netdev_flow_key *key = &keys[0];
size_t n_missed = 0, n_dropped = 0;
struct dp_packet *packet;
- const size_t size = dp_packet_batch_size(packets_);
+ const size_t cnt = dp_packet_batch_size(packets_);
uint32_t cur_min;
int i;
atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
- DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) {
+ DP_PACKET_BATCH_REFILL_FOR_EACH (i, cnt, packet, packets_) {
struct dp_netdev_flow *flow;
if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
@@ -4887,7 +4890,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
continue;
}
- if (i != size - 1) {
+ if (i != cnt - 1) {
struct dp_packet **packets = packets_->packets;
/* Prefetch next packet data and metadata. */
OVS_PREFETCH(dp_packet_data(packets[i+1]));
@@ -4918,7 +4921,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
}
dp_netdev_count_packet(pmd, DP_STAT_EXACT_HIT,
- size - n_dropped - n_missed);
+ cnt - n_dropped - n_missed);
return dp_packet_batch_size(packets_);
}
@@ -5092,10 +5095,8 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
/* Packets enter the datapath from a port (or from recirculation) here.
*
- * For performance reasons a caller may choose not to initialize the metadata
- * in 'packets': in this case 'mdinit' is false and this function needs to
- * initialize it using 'port_no'. If the metadata in 'packets' is already
- * valid, 'md_is_valid' must be true and 'port_no' will be ignored. */
+ * When 'md_is_valid' is true the metadata in 'packets' are already valid.
+ * When false the metadata in 'packets' need to be initialized. */
static void
dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
struct dp_packet_batch *packets,
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index f58e9bed1..648d719ba 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -279,7 +279,7 @@ struct dpdk_qos_ops {
* For all QoS implementations it should always be non-null.
*/
int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts,
- int pkt_cnt);
+ int pkt_cnt, bool may_steal);
};
/* dpdk_qos_ops for each type of user space QoS implementation */
@@ -303,14 +303,12 @@ static struct ovs_list dpdk_list OVS_GUARDED_BY(dpdk_mutex)
static struct ovs_mutex dpdk_mp_mutex OVS_ACQ_AFTER(dpdk_mutex)
= OVS_MUTEX_INITIALIZER;
-static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mp_mutex)
- = OVS_LIST_INITIALIZER(&dpdk_mp_list);
-
struct dpdk_mp {
struct rte_mempool *mp;
int mtu;
int socket_id;
- int refcount;
+ char if_name[IFNAMSIZ];
+ unsigned mp_size;
struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex);
};
@@ -492,45 +490,79 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp OVS_UNUSED,
dp_packet_init_dpdk((struct dp_packet *) pkt, pkt->buf_len);
}
+/*
+ * Full DPDK memory pool name must be unique
+ * and cannot be longer than RTE_MEMPOOL_NAMESIZE
+ */
+static char *
+dpdk_mp_name(struct dpdk_mp *dmp)
+{
+ uint32_t h = hash_string(dmp->if_name, 0);
+ char *mp_name = xcalloc(RTE_MEMPOOL_NAMESIZE, sizeof *mp_name);
+ int ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_%x_%d_%u",
+ h, dmp->mtu, dmp->mp_size);
+ if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) {
+ return NULL;
+ }
+ return mp_name;
+}
+
static struct dpdk_mp *
-dpdk_mp_create(int socket_id, int mtu)
+dpdk_mp_create(struct netdev_dpdk *dev, int mtu)
{
- struct dpdk_mp *dmp;
- unsigned mp_size;
- char *mp_name;
-
- dmp = dpdk_rte_mzalloc(sizeof *dmp);
+ struct dpdk_mp *dmp = dpdk_rte_mzalloc(sizeof *dmp);
if (!dmp) {
return NULL;
}
- dmp->socket_id = socket_id;
+ dmp->socket_id = dev->requested_socket_id;
dmp->mtu = mtu;
- dmp->refcount = 1;
- /* XXX: this is a really rough method of provisioning memory.
- * It's impossible to determine what the exact memory requirements are
- * when the number of ports and rxqs that utilize a particular mempool can
- * change dynamically at runtime. For now, use this rough heurisitic.
+ ovs_strzcpy(dmp->if_name, dev->up.name, IFNAMSIZ);
+
+ /*
+ * XXX: rough estimation of memory required for port:
+ * <packets required to fill the device rxqs>
+ * + <packets that could be stuck on other ports txqs>
+ * + <packets in the pmd threads>
+ * + <additional memory for corner cases>
*/
- if (mtu >= ETHER_MTU) {
- mp_size = MAX_NB_MBUF;
- } else {
- mp_size = MIN_NB_MBUF;
- }
+ dmp->mp_size = dev->requested_n_rxq * dev->requested_rxq_size
+ + dev->requested_n_txq * dev->requested_txq_size
+ + MIN(RTE_MAX_LCORE, dev->requested_n_rxq) * NETDEV_MAX_BURST
+ + MIN_NB_MBUF;
+
+ bool mp_exists = false;
do {
- mp_name = xasprintf("ovs_mp_%d_%d_%u", dmp->mtu, dmp->socket_id,
- mp_size);
+ char *mp_name = dpdk_mp_name(dmp);
- dmp->mp = rte_pktmbuf_pool_create(mp_name, mp_size,
+ VLOG_DBG("Requesting a mempool of %u mbufs for netdev %s "
+ "with %d Rx and %d Tx queues.",
+ dmp->mp_size, dev->up.name,
+ dev->requested_n_rxq, dev->requested_n_txq);
+
+ dmp->mp = rte_pktmbuf_pool_create(mp_name, dmp->mp_size,
MP_CACHE_SZ,
sizeof (struct dp_packet)
- sizeof (struct rte_mbuf),
MBUF_SIZE(mtu)
- sizeof(struct dp_packet),
- socket_id);
+ dmp->socket_id);
if (dmp->mp) {
- VLOG_DBG("Allocated \"%s\" mempool with %u mbufs",
- mp_name, mp_size);
+ VLOG_DBG("Allocated \"%s\" mempool with %u mbufs", mp_name,
+ dmp->mp_size);
+ } else if (rte_errno == EEXIST) {
+ /* A mempool with the same name already exists. We just
+ * retrieve its pointer to be returned to the caller. */
+ dmp->mp = rte_mempool_lookup(mp_name);
+ VLOG_DBG("A mempool with name %s already exists at %p.",
+ mp_name, dmp->mp);
+ /* As the mempool create returned EEXIST we can expect the
+ * lookup has returned a valid pointer. If for some reason
+ * that's not the case we keep track of it. */
+ mp_exists = true;
+ } else {
+ VLOG_ERR("Failed mempool \"%s\" create request of %u mbufs",
+ mp_name, dmp->mp_size);
}
free(mp_name);
if (dmp->mp) {
@@ -541,31 +573,20 @@ dpdk_mp_create(int socket_id, int mtu)
rte_mempool_obj_iter(dmp->mp, ovs_rte_pktmbuf_init, NULL);
return dmp;
}
- } while (rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF);
+ } while (!mp_exists &&
+ (rte_errno == ENOMEM && (dmp->mp_size /= 2) >= MIN_NB_MBUF));
rte_free(dmp);
return NULL;
}
static struct dpdk_mp *
-dpdk_mp_get(int socket_id, int mtu)
+dpdk_mp_get(struct netdev_dpdk *dev, int mtu)
{
struct dpdk_mp *dmp;
ovs_mutex_lock(&dpdk_mp_mutex);
- LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
- if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
- dmp->refcount++;
- goto out;
- }
- }
-
- dmp = dpdk_mp_create(socket_id, mtu);
- if (dmp) {
- ovs_list_push_back(&dpdk_mp_list, &dmp->list_node);
- }
-
-out:
+ dmp = dpdk_mp_create(dev, mtu);
ovs_mutex_unlock(&dpdk_mp_mutex);
return dmp;
@@ -574,18 +595,18 @@ out:
static void
dpdk_mp_put(struct dpdk_mp *dmp)
{
+ char *mp_name;
+
if (!dmp) {
return;
}
ovs_mutex_lock(&dpdk_mp_mutex);
- ovs_assert(dmp->refcount);
-
- if (!--dmp->refcount) {
- ovs_list_remove(&dmp->list_node);
- rte_mempool_free(dmp->mp);
- rte_free(dmp);
- }
+ mp_name = dpdk_mp_name(dmp);
+ VLOG_DBG("Releasing \"%s\" mempool", mp_name);
+ free(mp_name);
+ rte_mempool_free(dmp->mp);
+ rte_free(dmp);
ovs_mutex_unlock(&dpdk_mp_mutex);
}
@@ -600,7 +621,7 @@ netdev_dpdk_mempool_configure(struct netdev_dpdk *dev)
uint32_t buf_size = dpdk_buf_size(dev->requested_mtu);
struct dpdk_mp *mp;
- mp = dpdk_mp_get(dev->requested_socket_id, FRAME_LEN_TO_MTU(buf_size));
+ mp = dpdk_mp_get(dev, FRAME_LEN_TO_MTU(buf_size));
if (!mp) {
VLOG_ERR("Failed to create memory pool for netdev "
"%s, with MTU %d on socket %d: %s\n",
@@ -1501,7 +1522,8 @@ netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
static int
netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
- struct rte_mbuf **pkts, int pkt_cnt)
+ struct rte_mbuf **pkts, int pkt_cnt,
+ bool may_steal)
{
int i = 0;
int cnt = 0;
@@ -1517,7 +1539,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
}
cnt++;
} else {
- rte_pktmbuf_free(pkt);
+ if (may_steal) {
+ rte_pktmbuf_free(pkt);
+ }
}
}
@@ -1526,12 +1550,13 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
static int
ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf **pkts,
- int pkt_cnt)
+ int pkt_cnt, bool may_steal)
{
int cnt = 0;
rte_spinlock_lock(&policer->policer_lock);
- cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);
+ cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
+ pkt_cnt, may_steal);
rte_spinlock_unlock(&policer->policer_lock);
return cnt;
@@ -1635,7 +1660,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
dropped = nb_rx;
nb_rx = ingress_policer_run(policer,
(struct rte_mbuf **) batch->packets,
- nb_rx);
+ nb_rx, true);
dropped -= nb_rx;
}
@@ -1673,7 +1698,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch)
dropped = nb_rx;
nb_rx = ingress_policer_run(policer,
(struct rte_mbuf **) batch->packets,
- nb_rx);
+ nb_rx, true);
dropped -= nb_rx;
}
@@ -1692,13 +1717,13 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch)
static inline int
netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
- int cnt)
+ int cnt, bool may_steal)
{
struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev->qos_conf);
if (qos_conf) {
rte_spinlock_lock(&qos_conf->lock);
- cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);
+ cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal);
rte_spinlock_unlock(&qos_conf->lock);
}
@@ -1772,7 +1797,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
/* Check has QoS has been configured for the netdev */
- cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
+ cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
dropped = total_pkts - cnt;
do {
@@ -1818,51 +1843,56 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
#endif
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
- int dropped = 0;
- int newcnt = 0;
- int i;
+ uint32_t cnt = batch->count;
+ uint32_t dropped = 0;
+
+ if (dev->type != DPDK_DEV_VHOST) {
+ /* Check if QoS has been configured for this netdev. */
+ cnt = netdev_dpdk_qos_run(dev, (struct rte_mbuf **) batch->packets,
+ cnt, false);
+ dropped += batch->count - cnt;
+ }
dp_packet_batch_apply_cutlen(batch);
- for (i = 0; i < batch->count; i++) {
- int size = dp_packet_size(batch->packets[i]);
+ uint32_t txcnt = 0;
+
+ for (uint32_t i = 0; i < cnt; i++) {
+
+ uint32_t size = dp_packet_size(batch->packets[i]);
if (OVS_UNLIKELY(size > dev->max_packet_len)) {
- VLOG_WARN_RL(&rl, "Too big size %d max_packet_len %d",
- (int) size, dev->max_packet_len);
+ VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d",
+ size, dev->max_packet_len);
dropped++;
continue;
}
- pkts[newcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
+ pkts[txcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
- if (!pkts[newcnt]) {
- dropped += batch->count - i;
+ if (!pkts[txcnt]) {
+ dropped += cnt - i;
break;
}
/* We have to do a copy for now */
- memcpy(rte_pktmbuf_mtod(pkts[newcnt], void *),
+ memcpy(rte_pktmbuf_mtod(pkts[txcnt], void *),
dp_packet_data(batch->packets[i]), size);
- rte_pktmbuf_data_len(pkts[newcnt]) = size;
- rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
+ rte_pktmbuf_data_len(pkts[txcnt]) = size;
+ rte_pktmbuf_pkt_len(pkts[txcnt]) = size;
- newcnt++;
+ txcnt++;
}
- if (dev->type == DPDK_DEV_VHOST) {
- __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
- newcnt);
- } else {
- unsigned int qos_pkts = newcnt;
-
- /* Check if QoS has been configured for this netdev. */
- newcnt = netdev_dpdk_qos_run(dev, pkts, newcnt);
-
- dropped += qos_pkts - newcnt;
- dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);
+ if (OVS_LIKELY(txcnt)) {
+ if (dev->type == DPDK_DEV_VHOST) {
+ __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
+ txcnt);
+ } else {
+ dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, txcnt);
+ }
}
if (OVS_UNLIKELY(dropped)) {
@@ -1917,7 +1947,7 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
dp_packet_batch_apply_cutlen(batch);
cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
- cnt = netdev_dpdk_qos_run(dev, pkts, cnt);
+ cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);
dropped = batch->count - cnt;
dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt);
@@ -3134,13 +3164,15 @@ egress_policer_qos_is_equal(const struct qos_conf *conf,
}
static int
-egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int pkt_cnt)
+egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int pkt_cnt,
+ bool may_steal)
{
int cnt = 0;
struct egress_policer *policer =
CONTAINER_OF(conf, struct egress_policer, qos_conf);
- cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts, pkt_cnt);
+ cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
+ pkt_cnt, may_steal);
return cnt;
}
@@ -3175,12 +3207,9 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
rte_eth_dev_stop(dev->port_id);
- if (dev->mtu != dev->requested_mtu
- || dev->socket_id != dev->requested_socket_id) {
- err = netdev_dpdk_mempool_configure(dev);
- if (err) {
- goto out;
- }
+ err = netdev_dpdk_mempool_configure(dev);
+ if (err) {
+ goto out;
}
netdev->n_txq = dev->requested_n_txq;
@@ -3218,18 +3247,19 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev)
netdev_dpdk_remap_txqs(dev);
- if (dev->requested_socket_id != dev->socket_id
- || dev->requested_mtu != dev->mtu) {
- err = netdev_dpdk_mempool_configure(dev);
- if (err) {
- return err;
- } else {
- netdev_change_seq_changed(&dev->up);
- }
+ err = netdev_dpdk_mempool_configure(dev);
+ if (err) {
+ return err;
+ } else {
+ netdev_change_seq_changed(&dev->up);
}
if (netdev_dpdk_get_vid(dev) >= 0) {
- dev->vhost_reconfigured = true;
+ if (dev->vhost_reconfigured == false) {
+ dev->vhost_reconfigured = true;
+ /* Carrier status may need updating. */
+ netdev_change_seq_changed(&dev->up);
+ }
}
return 0;