diff options
author | Ben Pfaff <blp@ovn.org> | 2017-09-12 07:12:53 -0700 |
---|---|---|
committer | Ben Pfaff <blp@ovn.org> | 2017-09-12 07:12:53 -0700 |
commit | a5781d3270e6b94c13a063cead664e5553b6cb70 (patch) | |
tree | 2830592d6baf032b0f902f189863c4782a21af51 /lib | |
parent | 3a0c5805f90f141ec138ec71d4ec94d84db30531 (diff) | |
parent | 3e90f7d753366a27659db71a96798fa89e2a6cd4 (diff) | |
download | openvswitch-a5781d3270e6b94c13a063cead664e5553b6cb70.tar.gz |
Merge branch 'dpdk_merge' of https://github.com/darball/ovs.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/dpif-netdev.c | 43 | ||||
-rw-r--r-- | lib/netdev-dpdk.c | 236 |
2 files changed, 155 insertions, 124 deletions
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 0ceef9d82..ca74df8c7 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -796,7 +796,7 @@ pmd_info_show_stats(struct ds *reply, unsigned long long stats[DP_N_STATS], uint64_t cycles[PMD_N_CYCLES]) { - unsigned long long total_packets = 0; + unsigned long long total_packets; uint64_t total_cycles = 0; int i; @@ -812,13 +812,12 @@ pmd_info_show_stats(struct ds *reply, } else { stats[i] = 0; } - - if (i != DP_STAT_LOST) { - /* Lost packets are already included in DP_STAT_MISS */ - total_packets += stats[i]; - } } + /* Sum of all the matched and not matched packets gives the total. */ + total_packets = stats[DP_STAT_EXACT_HIT] + stats[DP_STAT_MASKED_HIT] + + stats[DP_STAT_MISS]; + for (i = 0; i < PMD_N_CYCLES; i++) { if (cycles[i] > pmd->cycles_zero[i]) { cycles[i] -= pmd->cycles_zero[i]; @@ -3428,8 +3427,8 @@ rr_numa_list_destroy(struct rr_numa_list *rr) static int rxq_cycle_sort(const void *a, const void *b) { - struct dp_netdev_rxq * qa; - struct dp_netdev_rxq * qb; + struct dp_netdev_rxq *qa; + struct dp_netdev_rxq *qb; uint64_t total_qa, total_qb; unsigned i; @@ -3864,9 +3863,10 @@ dpif_netdev_run(struct dpif *dpif) dp_netdev_process_rxq_port(non_pmd, port->rxqs[i].rx, port->port_no); - cycles_count_intermediate(non_pmd, NULL, process_packets ? - PMD_CYCLES_PROCESSING - : PMD_CYCLES_IDLE); + cycles_count_intermediate(non_pmd, NULL, + process_packets + ? PMD_CYCLES_PROCESSING + : PMD_CYCLES_IDLE); } } } @@ -4858,8 +4858,11 @@ dp_netdev_queue_batches(struct dp_packet *pkt, * The function returns the number of packets that needs to be processed in the * 'packets' array (they have been moved to the beginning of the vector). * - * If 'md_is_valid' is false, the metadata in 'packets' is not valid and must - * be initialized by this function using 'port_no'. + * For performance reasons a caller may choose not to initialize the metadata + * in 'packets_'. If 'md_is_valid' is false, the metadata in 'packets' + * is not valid and must be initialized by this function using 'port_no'. + * If 'md_is_valid' is true, the metadata is already valid and 'port_no' + * will be ignored. */ static inline size_t emc_processing(struct dp_netdev_pmd_thread *pmd, @@ -4872,13 +4875,13 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, struct netdev_flow_key *key = &keys[0]; size_t n_missed = 0, n_dropped = 0; struct dp_packet *packet; - const size_t size = dp_packet_batch_size(packets_); + const size_t cnt = dp_packet_batch_size(packets_); uint32_t cur_min; int i; atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min); - DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) { + DP_PACKET_BATCH_REFILL_FOR_EACH (i, cnt, packet, packets_) { struct dp_netdev_flow *flow; if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) { @@ -4887,7 +4890,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, continue; } - if (i != size - 1) { + if (i != cnt - 1) { struct dp_packet **packets = packets_->packets; /* Prefetch next packet data and metadata. */ OVS_PREFETCH(dp_packet_data(packets[i+1])); @@ -4918,7 +4921,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, } dp_netdev_count_packet(pmd, DP_STAT_EXACT_HIT, - size - n_dropped - n_missed); + cnt - n_dropped - n_missed); return dp_packet_batch_size(packets_); } @@ -5092,10 +5095,8 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, /* Packets enter the datapath from a port (or from recirculation) here. * - * For performance reasons a caller may choose not to initialize the metadata - * in 'packets': in this case 'mdinit' is false and this function needs to - * initialize it using 'port_no'. If the metadata in 'packets' is already - * valid, 'md_is_valid' must be true and 'port_no' will be ignored. */ + * When 'md_is_valid' is true the metadata in 'packets' are already valid. + * When false the metadata in 'packets' need to be initialized. */ static void dp_netdev_input__(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets, diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index f58e9bed1..648d719ba 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -279,7 +279,7 @@ struct dpdk_qos_ops { * For all QoS implementations it should always be non-null. */ int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts, - int pkt_cnt); + int pkt_cnt, bool may_steal); }; /* dpdk_qos_ops for each type of user space QoS implementation */ @@ -303,14 +303,12 @@ static struct ovs_list dpdk_list OVS_GUARDED_BY(dpdk_mutex) static struct ovs_mutex dpdk_mp_mutex OVS_ACQ_AFTER(dpdk_mutex) = OVS_MUTEX_INITIALIZER; -static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mp_mutex) - = OVS_LIST_INITIALIZER(&dpdk_mp_list); - struct dpdk_mp { struct rte_mempool *mp; int mtu; int socket_id; - int refcount; + char if_name[IFNAMSIZ]; + unsigned mp_size; struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex); }; @@ -492,45 +490,79 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp OVS_UNUSED, dp_packet_init_dpdk((struct dp_packet *) pkt, pkt->buf_len); } +/* + * Full DPDK memory pool name must be unique + * and cannot be longer than RTE_MEMPOOL_NAMESIZE + */ +static char * +dpdk_mp_name(struct dpdk_mp *dmp) +{ + uint32_t h = hash_string(dmp->if_name, 0); + char *mp_name = xcalloc(RTE_MEMPOOL_NAMESIZE, sizeof *mp_name); + int ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_%x_%d_%u", + h, dmp->mtu, dmp->mp_size); + if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) { + return NULL; + } + return mp_name; +} + static struct dpdk_mp * -dpdk_mp_create(int socket_id, int mtu) +dpdk_mp_create(struct netdev_dpdk *dev, int mtu) { - struct dpdk_mp *dmp; - unsigned mp_size; - char *mp_name; - - dmp = dpdk_rte_mzalloc(sizeof *dmp); + struct dpdk_mp *dmp = dpdk_rte_mzalloc(sizeof *dmp); if (!dmp) { return NULL; } - dmp->socket_id = socket_id; + dmp->socket_id = dev->requested_socket_id; dmp->mtu = mtu; - dmp->refcount = 1; - /* XXX: this is a really rough method of provisioning memory. - * It's impossible to determine what the exact memory requirements are - * when the number of ports and rxqs that utilize a particular mempool can - * change dynamically at runtime. For now, use this rough heurisitic. + ovs_strzcpy(dmp->if_name, dev->up.name, IFNAMSIZ); + + /* + * XXX: rough estimation of memory required for port: + * <packets required to fill the device rxqs> + * + <packets that could be stuck on other ports txqs> + * + <packets in the pmd threads> + * + <additional memory for corner cases> */ - if (mtu >= ETHER_MTU) { - mp_size = MAX_NB_MBUF; - } else { - mp_size = MIN_NB_MBUF; - } + dmp->mp_size = dev->requested_n_rxq * dev->requested_rxq_size + + dev->requested_n_txq * dev->requested_txq_size + + MIN(RTE_MAX_LCORE, dev->requested_n_rxq) * NETDEV_MAX_BURST + + MIN_NB_MBUF; + + bool mp_exists = false; do { - mp_name = xasprintf("ovs_mp_%d_%d_%u", dmp->mtu, dmp->socket_id, - mp_size); + char *mp_name = dpdk_mp_name(dmp); - dmp->mp = rte_pktmbuf_pool_create(mp_name, mp_size, + VLOG_DBG("Requesting a mempool of %u mbufs for netdev %s " + "with %d Rx and %d Tx queues.", + dmp->mp_size, dev->up.name, + dev->requested_n_rxq, dev->requested_n_txq); + + dmp->mp = rte_pktmbuf_pool_create(mp_name, dmp->mp_size, MP_CACHE_SZ, sizeof (struct dp_packet) - sizeof (struct rte_mbuf), MBUF_SIZE(mtu) - sizeof(struct dp_packet), - socket_id); + dmp->socket_id); if (dmp->mp) { - VLOG_DBG("Allocated \"%s\" mempool with %u mbufs", - mp_name, mp_size); + VLOG_DBG("Allocated \"%s\" mempool with %u mbufs", mp_name, + dmp->mp_size); + } else if (rte_errno == EEXIST) { + /* A mempool with the same name already exists. We just + * retrieve its pointer to be returned to the caller. */ + dmp->mp = rte_mempool_lookup(mp_name); + VLOG_DBG("A mempool with name %s already exists at %p.", + mp_name, dmp->mp); + /* As the mempool create returned EEXIST we can expect the + * lookup has returned a valid pointer. If for some reason + * that's not the case we keep track of it. */ + mp_exists = true; + } else { + VLOG_ERR("Failed mempool \"%s\" create request of %u mbufs", + mp_name, dmp->mp_size); } free(mp_name); if (dmp->mp) { @@ -541,31 +573,20 @@ dpdk_mp_create(int socket_id, int mtu) rte_mempool_obj_iter(dmp->mp, ovs_rte_pktmbuf_init, NULL); return dmp; } - } while (rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF); + } while (!mp_exists && + (rte_errno == ENOMEM && (dmp->mp_size /= 2) >= MIN_NB_MBUF)); rte_free(dmp); return NULL; } static struct dpdk_mp * -dpdk_mp_get(int socket_id, int mtu) +dpdk_mp_get(struct netdev_dpdk *dev, int mtu) { struct dpdk_mp *dmp; ovs_mutex_lock(&dpdk_mp_mutex); - LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) { - if (dmp->socket_id == socket_id && dmp->mtu == mtu) { - dmp->refcount++; - goto out; - } - } - - dmp = dpdk_mp_create(socket_id, mtu); - if (dmp) { - ovs_list_push_back(&dpdk_mp_list, &dmp->list_node); - } - -out: + dmp = dpdk_mp_create(dev, mtu); ovs_mutex_unlock(&dpdk_mp_mutex); return dmp; @@ -574,18 +595,18 @@ out: static void dpdk_mp_put(struct dpdk_mp *dmp) { + char *mp_name; + if (!dmp) { return; } ovs_mutex_lock(&dpdk_mp_mutex); - ovs_assert(dmp->refcount); - - if (!--dmp->refcount) { - ovs_list_remove(&dmp->list_node); - rte_mempool_free(dmp->mp); - rte_free(dmp); - } + mp_name = dpdk_mp_name(dmp); + VLOG_DBG("Releasing \"%s\" mempool", mp_name); + free(mp_name); + rte_mempool_free(dmp->mp); + rte_free(dmp); ovs_mutex_unlock(&dpdk_mp_mutex); } @@ -600,7 +621,7 @@ netdev_dpdk_mempool_configure(struct netdev_dpdk *dev) uint32_t buf_size = dpdk_buf_size(dev->requested_mtu); struct dpdk_mp *mp; - mp = dpdk_mp_get(dev->requested_socket_id, FRAME_LEN_TO_MTU(buf_size)); + mp = dpdk_mp_get(dev, FRAME_LEN_TO_MTU(buf_size)); if (!mp) { VLOG_ERR("Failed to create memory pool for netdev " "%s, with MTU %d on socket %d: %s\n", @@ -1501,7 +1522,8 @@ netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter, static int netdev_dpdk_policer_run(struct rte_meter_srtcm *meter, - struct rte_mbuf **pkts, int pkt_cnt) + struct rte_mbuf **pkts, int pkt_cnt, + bool may_steal) { int i = 0; int cnt = 0; @@ -1517,7 +1539,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm *meter, } cnt++; } else { - rte_pktmbuf_free(pkt); + if (may_steal) { + rte_pktmbuf_free(pkt); + } } } @@ -1526,12 +1550,13 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm *meter, static int ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf **pkts, - int pkt_cnt) + int pkt_cnt, bool may_steal) { int cnt = 0; rte_spinlock_lock(&policer->policer_lock); - cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt); + cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, + pkt_cnt, may_steal); rte_spinlock_unlock(&policer->policer_lock); return cnt; @@ -1635,7 +1660,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq, dropped = nb_rx; nb_rx = ingress_policer_run(policer, (struct rte_mbuf **) batch->packets, - nb_rx); + nb_rx, true); dropped -= nb_rx; } @@ -1673,7 +1698,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch) dropped = nb_rx; nb_rx = ingress_policer_run(policer, (struct rte_mbuf **) batch->packets, - nb_rx); + nb_rx, true); dropped -= nb_rx; } @@ -1692,13 +1717,13 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch) static inline int netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts, - int cnt) + int cnt, bool may_steal) { struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev->qos_conf); if (qos_conf) { rte_spinlock_lock(&qos_conf->lock); - cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt); + cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal); rte_spinlock_unlock(&qos_conf->lock); } @@ -1772,7 +1797,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid, cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt); /* Check has QoS has been configured for the netdev */ - cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt); + cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true); dropped = total_pkts - cnt; do { @@ -1818,51 +1843,56 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) #endif struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); struct rte_mbuf *pkts[PKT_ARRAY_SIZE]; - int dropped = 0; - int newcnt = 0; - int i; + uint32_t cnt = batch->count; + uint32_t dropped = 0; + + if (dev->type != DPDK_DEV_VHOST) { + /* Check if QoS has been configured for this netdev. */ + cnt = netdev_dpdk_qos_run(dev, (struct rte_mbuf **) batch->packets, + cnt, false); + dropped += batch->count - cnt; + } dp_packet_batch_apply_cutlen(batch); - for (i = 0; i < batch->count; i++) { - int size = dp_packet_size(batch->packets[i]); + uint32_t txcnt = 0; + + for (uint32_t i = 0; i < cnt; i++) { + + uint32_t size = dp_packet_size(batch->packets[i]); if (OVS_UNLIKELY(size > dev->max_packet_len)) { - VLOG_WARN_RL(&rl, "Too big size %d max_packet_len %d", - (int) size, dev->max_packet_len); + VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d", + size, dev->max_packet_len); dropped++; continue; } - pkts[newcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp); + pkts[txcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp); - if (!pkts[newcnt]) { - dropped += batch->count - i; + if (!pkts[txcnt]) { + dropped += cnt - i; break; } /* We have to do a copy for now */ - memcpy(rte_pktmbuf_mtod(pkts[newcnt], void *), + memcpy(rte_pktmbuf_mtod(pkts[txcnt], void *), dp_packet_data(batch->packets[i]), size); - rte_pktmbuf_data_len(pkts[newcnt]) = size; - rte_pktmbuf_pkt_len(pkts[newcnt]) = size; + rte_pktmbuf_data_len(pkts[txcnt]) = size; + rte_pktmbuf_pkt_len(pkts[txcnt]) = size; - newcnt++; + txcnt++; } - if (dev->type == DPDK_DEV_VHOST) { - __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts, - newcnt); - } else { - unsigned int qos_pkts = newcnt; - - /* Check if QoS has been configured for this netdev. */ - newcnt = netdev_dpdk_qos_run(dev, pkts, newcnt); - - dropped += qos_pkts - newcnt; - dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt); + if (OVS_LIKELY(txcnt)) { + if (dev->type == DPDK_DEV_VHOST) { + __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts, + txcnt); + } else { + dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, txcnt); + } } if (OVS_UNLIKELY(dropped)) { @@ -1917,7 +1947,7 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid, dp_packet_batch_apply_cutlen(batch); cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt); - cnt = netdev_dpdk_qos_run(dev, pkts, cnt); + cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true); dropped = batch->count - cnt; dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt); @@ -3134,13 +3164,15 @@ egress_policer_qos_is_equal(const struct qos_conf *conf, } static int -egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int pkt_cnt) +egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int pkt_cnt, + bool may_steal) { int cnt = 0; struct egress_policer *policer = CONTAINER_OF(conf, struct egress_policer, qos_conf); - cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts, pkt_cnt); + cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts, + pkt_cnt, may_steal); return cnt; } @@ -3175,12 +3207,9 @@ netdev_dpdk_reconfigure(struct netdev *netdev) rte_eth_dev_stop(dev->port_id); - if (dev->mtu != dev->requested_mtu - || dev->socket_id != dev->requested_socket_id) { - err = netdev_dpdk_mempool_configure(dev); - if (err) { - goto out; - } + err = netdev_dpdk_mempool_configure(dev); + if (err) { + goto out; } netdev->n_txq = dev->requested_n_txq; @@ -3218,18 +3247,19 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev) netdev_dpdk_remap_txqs(dev); - if (dev->requested_socket_id != dev->socket_id - || dev->requested_mtu != dev->mtu) { - err = netdev_dpdk_mempool_configure(dev); - if (err) { - return err; - } else { - netdev_change_seq_changed(&dev->up); - } + err = netdev_dpdk_mempool_configure(dev); + if (err) { + return err; + } else { + netdev_change_seq_changed(&dev->up); } if (netdev_dpdk_get_vid(dev) >= 0) { - dev->vhost_reconfigured = true; + if (dev->vhost_reconfigured == false) { + dev->vhost_reconfigured = true; + /* Carrier status may need updating. */ + netdev_change_seq_changed(&dev->up); + } } return 0; |