diff options
author | Ben Pfaff <blp@ovn.org> | 2018-01-02 07:45:17 -0800 |
---|---|---|
committer | Ben Pfaff <blp@ovn.org> | 2018-01-02 07:45:17 -0800 |
commit | 34944e81f00053f96112ea07b2cf770dd26fa1e3 (patch) | |
tree | 62e395aa4bbb68d510a360dc5afd5abda2a3055e | |
parent | 3f33aa477256a276fd0985684c9bcb210f6150c1 (diff) | |
parent | cc4891f39d574986948ac87280cfe9017fe17a39 (diff) | |
download | openvswitch-34944e81f00053f96112ea07b2cf770dd26fa1e3.tar.gz |
Merge branch 'dpdk_merge' of https://github.com/istokes/ovs into HEAD
-rw-r--r-- | Documentation/howto/dpdk.rst | 22 | ||||
-rw-r--r-- | NEWS | 2 | ||||
-rw-r--r-- | lib/automake.mk | 1 | ||||
-rw-r--r-- | lib/dpif-netdev.c | 244 | ||||
-rw-r--r-- | lib/netdev-bsd.c | 6 | ||||
-rw-r--r-- | lib/netdev-dpdk-unixctl.man | 14 | ||||
-rw-r--r-- | lib/netdev-dpdk.c | 84 | ||||
-rw-r--r-- | lib/netdev-dummy.c | 6 | ||||
-rw-r--r-- | lib/netdev-linux.c | 8 | ||||
-rw-r--r-- | lib/netdev-provider.h | 7 | ||||
-rw-r--r-- | lib/netdev.c | 12 | ||||
-rw-r--r-- | lib/netdev.h | 2 | ||||
-rw-r--r-- | manpages.mk | 2 | ||||
-rw-r--r-- | vswitchd/ovs-vswitchd.8.in | 1 |
14 files changed, 290 insertions, 121 deletions
diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst index d1238195a..2393c2ff7 100644 --- a/Documentation/howto/dpdk.rst +++ b/Documentation/howto/dpdk.rst @@ -531,16 +531,18 @@ Add test flows to forward packets betwen DPDK devices and VM ports:: Create a VM using the following configuration: -+----------------------+--------+-----------------+ -| configuration | values | comments | -+----------------------+--------+-----------------+ -| qemu version | 2.2.0 | n/a | -| qemu thread affinity | core 5 | taskset 0x20 | -| memory | 4GB | n/a | -| cores | 2 | n/a | -| Qcow2 image | CentOS7| n/a | -| mrg_rxbuf | off | n/a | -+----------------------+--------+-----------------+ +.. table:: + + ===================== ======== ============ + Configuration Values Comments + ===================== ======== ============ + QEMU version 2.2.0 n/a + QEMU thread affinity core 5 taskset 0x20 + Memory 4GB n/a + Cores 2 n/a + Qcow2 image CentOS7 n/a + mrg_rxbuf off n/a + ===================== ======== ============ You can do this directly with QEMU via the ``qemu-system-x86_64`` application:: @@ -27,6 +27,8 @@ Post-v2.8.0 - DPDK: * Add support for DPDK v17.11 * Add support for vHost IOMMU + * New debug appctl command 'netdev-dpdk/get-mempool-info'. + * All the netdev-dpdk appctl commands described in ovs-vswitchd man page. - vswitchd: * Datapath IDs may now be specified as 0x1 (etc.) instead of 16 digits. diff --git a/lib/automake.mk b/lib/automake.mk index effe5b5c2..4b38a1163 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -465,6 +465,7 @@ MAN_FRAGMENTS += \ lib/db-ctl-base.man \ lib/dpctl.man \ lib/memory-unixctl.man \ + lib/netdev-dpdk-unixctl.man \ lib/ofp-version.man \ lib/ovs.tmac \ lib/service.man \ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 6ba025b66..349e98a8a 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -338,6 +338,8 @@ enum dp_stat_type { DP_STAT_LOST, /* Packets not passed up to the client. */ DP_STAT_LOOKUP_HIT, /* Number of subtable lookups for flow table hits */ + DP_STAT_SENT_PKTS, /* Packets that has been sent. */ + DP_STAT_SENT_BATCHES, /* Number of batches sent. */ DP_N_STATS }; @@ -509,6 +511,9 @@ struct dp_netdev_pmd_cycles { atomic_ullong n[PMD_N_CYCLES]; }; +static void dp_netdev_count_packet(struct dp_netdev_pmd_thread *, + enum dp_stat_type type, int cnt); + struct polled_queue { struct dp_netdev_rxq *rxq; odp_port_t port_no; @@ -527,6 +532,18 @@ struct tx_port { int qid; long long last_used; struct hmap_node node; + struct dp_packet_batch output_pkts; +}; + +/* A set of properties for the current processing loop that is not directly + * associated with the pmd thread itself, but with the packets being + * processed or the short-term system configuration (for example, time). + * Contained by struct dp_netdev_pmd_thread's 'ctx' member. */ +struct dp_netdev_pmd_thread_ctx { + /* Latest measured time. See 'pmd_thread_ctx_time_update()'. */ + long long now; + /* Used to count cycles. See 'cycles_count_end()' */ + unsigned long long last_cycles; }; /* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate @@ -584,8 +601,8 @@ struct dp_netdev_pmd_thread { /* Cycles counters */ struct dp_netdev_pmd_cycles cycles; - /* Used to count cicles. See 'cycles_counter_end()' */ - unsigned long long last_cycles; + /* Current context of the PMD thread. */ + struct dp_netdev_pmd_thread_ctx ctx; struct latch exit_latch; /* For terminating the pmd thread. */ struct seq *reload_seq; @@ -658,8 +675,7 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *, bool may_steal, const struct flow *flow, const struct nlattr *actions, - size_t actions_len, - long long now); + size_t actions_len); static void dp_netdev_input(struct dp_netdev_pmd_thread *, struct dp_packet_batch *, odp_port_t port_no); static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *, @@ -695,6 +711,9 @@ static void dp_netdev_add_rxq_to_pmd(struct dp_netdev_pmd_thread *pmd, static void dp_netdev_del_rxq_from_pmd(struct dp_netdev_pmd_thread *pmd, struct rxq_poll *poll) OVS_REQUIRES(pmd->port_mutex); +static void +dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd); + static void reconfigure_datapath(struct dp_netdev *dp) OVS_REQUIRES(dp->port_mutex); static bool dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd); @@ -719,9 +738,9 @@ static uint64_t dp_netdev_rxq_get_intrvl_cycles(struct dp_netdev_rxq *rx, unsigned idx); static void dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd, - long long now, bool purge); + bool purge); static int dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd, - struct tx_port *tx, long long now); + struct tx_port *tx); static inline bool emc_entry_alive(struct emc_entry *ce); static void emc_clear_entry(struct emc_entry *ce); @@ -765,6 +784,28 @@ emc_cache_slow_sweep(struct emc_cache *flow_cache) flow_cache->sweep_idx = (flow_cache->sweep_idx + 1) & EM_FLOW_HASH_MASK; } +/* Updates the time in PMD threads context and should be called in three cases: + * + * 1. PMD structure initialization: + * - dp_netdev_configure_pmd() + * + * 2. Before processing of the new packet batch: + * - dpif_netdev_execute() + * - dp_netdev_process_rxq_port() + * + * 3. At least once per polling iteration in main polling threads if no + * packets received on current iteration: + * - dpif_netdev_run() + * - pmd_thread_main() + * + * 'pmd->ctx.now' should be used without update in all other cases if possible. + */ +static inline void +pmd_thread_ctx_time_update(struct dp_netdev_pmd_thread *pmd) +{ + pmd->ctx.now = time_msec(); +} + /* Returns true if 'dpif' is a netdev or dummy dpif, false otherwise. */ bool dpif_is_netdev(const struct dpif *dpif) @@ -799,6 +840,7 @@ pmd_info_show_stats(struct ds *reply, { unsigned long long total_packets; uint64_t total_cycles = 0; + double lookups_per_hit = 0, packets_per_batch = 0; int i; /* These loops subtracts reference values ('*_zero') from the counters. @@ -840,15 +882,23 @@ pmd_info_show_stats(struct ds *reply, } ds_put_cstr(reply, ":\n"); + if (stats[DP_STAT_MASKED_HIT] > 0) { + lookups_per_hit = stats[DP_STAT_LOOKUP_HIT] + / (double) stats[DP_STAT_MASKED_HIT]; + } + if (stats[DP_STAT_SENT_BATCHES] > 0) { + packets_per_batch = stats[DP_STAT_SENT_PKTS] + / (double) stats[DP_STAT_SENT_BATCHES]; + } + ds_put_format(reply, "\temc hits:%llu\n\tmegaflow hits:%llu\n" "\tavg. subtable lookups per hit:%.2f\n" - "\tmiss:%llu\n\tlost:%llu\n", + "\tmiss:%llu\n\tlost:%llu\n" + "\tavg. packets per output batch: %.2f\n", stats[DP_STAT_EXACT_HIT], stats[DP_STAT_MASKED_HIT], - stats[DP_STAT_MASKED_HIT] > 0 - ? (1.0*stats[DP_STAT_LOOKUP_HIT])/stats[DP_STAT_MASKED_HIT] - : 0, - stats[DP_STAT_MISS], stats[DP_STAT_LOST]); + lookups_per_hit, stats[DP_STAT_MISS], stats[DP_STAT_LOST], + packets_per_batch); if (total_cycles == 0) { return; @@ -2916,6 +2966,9 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) ovs_mutex_lock(&dp->non_pmd_mutex); } + /* Update current time in PMD context. */ + pmd_thread_ctx_time_update(pmd); + /* The action processing expects the RSS hash to be valid, because * it's always initialized at the beginning of datapath processing. * In this case, though, 'execute->packet' may not have gone through @@ -2928,8 +2981,8 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) dp_packet_batch_init_packet(&pp, execute->packet); dp_netdev_execute_actions(pmd, &pp, false, execute->flow, - execute->actions, execute->actions_len, - time_msec()); + execute->actions, execute->actions_len); + dp_netdev_pmd_flush_output_packets(pmd); if (pmd->core_id == NON_PMD_CORE_ID) { ovs_mutex_unlock(&dp->non_pmd_mutex); @@ -3150,7 +3203,7 @@ cycles_count_start(struct dp_netdev_pmd_thread *pmd) OVS_ACQUIRES(&cycles_counter_fake_mutex) OVS_NO_THREAD_SAFETY_ANALYSIS { - pmd->last_cycles = cycles_counter(); + pmd->ctx.last_cycles = cycles_counter(); } /* Stop counting cycles and add them to the counter 'type' */ @@ -3160,7 +3213,7 @@ cycles_count_end(struct dp_netdev_pmd_thread *pmd, OVS_RELEASES(&cycles_counter_fake_mutex) OVS_NO_THREAD_SAFETY_ANALYSIS { - unsigned long long interval = cycles_counter() - pmd->last_cycles; + unsigned long long interval = cycles_counter() - pmd->ctx.last_cycles; non_atomic_ullong_add(&pmd->cycles.n[type], interval); } @@ -3173,8 +3226,8 @@ cycles_count_intermediate(struct dp_netdev_pmd_thread *pmd, OVS_NO_THREAD_SAFETY_ANALYSIS { unsigned long long new_cycles = cycles_counter(); - unsigned long long interval = new_cycles - pmd->last_cycles; - pmd->last_cycles = new_cycles; + unsigned long long interval = new_cycles - pmd->ctx.last_cycles; + pmd->ctx.last_cycles = new_cycles; non_atomic_ullong_add(&pmd->cycles.n[type], interval); if (rxq && (type == PMD_CYCLES_PROCESSING)) { @@ -3216,6 +3269,42 @@ dp_netdev_rxq_get_intrvl_cycles(struct dp_netdev_rxq *rx, unsigned idx) return processing_cycles; } +static void +dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd, + struct tx_port *p) +{ + int tx_qid; + int output_cnt; + bool dynamic_txqs; + + dynamic_txqs = p->port->dynamic_txqs; + if (dynamic_txqs) { + tx_qid = dpif_netdev_xps_get_tx_qid(pmd, p); + } else { + tx_qid = pmd->static_tx_qid; + } + + output_cnt = dp_packet_batch_size(&p->output_pkts); + + netdev_send(p->port->netdev, tx_qid, &p->output_pkts, dynamic_txqs); + dp_packet_batch_init(&p->output_pkts); + + dp_netdev_count_packet(pmd, DP_STAT_SENT_PKTS, output_cnt); + dp_netdev_count_packet(pmd, DP_STAT_SENT_BATCHES, 1); +} + +static void +dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd) +{ + struct tx_port *p; + + HMAP_FOR_EACH (p, node, &pmd->send_port_cache) { + if (!dp_packet_batch_is_empty(&p->output_pkts)) { + dp_netdev_pmd_flush_output_on_port(pmd, p); + } + } +} + static int dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd, struct netdev_rxq *rx, @@ -3229,9 +3318,11 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd, error = netdev_rxq_recv(rx, &batch); if (!error) { *recirc_depth_get() = 0; + pmd_thread_ctx_time_update(pmd); batch_cnt = batch.count; dp_netdev_input(pmd, &batch, port_no); + dp_netdev_pmd_flush_output_packets(pmd); } else if (error != EAGAIN && error != EOPNOTSUPP) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); @@ -3886,7 +3977,8 @@ dpif_netdev_run(struct dpif *dpif) } } cycles_count_end(non_pmd, PMD_CYCLES_IDLE); - dpif_netdev_xps_revalidate_pmd(non_pmd, time_msec(), false); + pmd_thread_ctx_time_update(non_pmd); + dpif_netdev_xps_revalidate_pmd(non_pmd, false); ovs_mutex_unlock(&dp->non_pmd_mutex); dp_netdev_pmd_unref(non_pmd); @@ -3937,7 +4029,7 @@ pmd_free_cached_ports(struct dp_netdev_pmd_thread *pmd) struct tx_port *tx_port_cached; /* Free all used tx queue ids. */ - dpif_netdev_xps_revalidate_pmd(pmd, 0, true); + dpif_netdev_xps_revalidate_pmd(pmd, true); HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->tnl_port_cache) { free(tx_port_cached); @@ -4079,6 +4171,9 @@ reload: lc = 0; coverage_try_clear(); + /* It's possible that the time was not updated on current + * iteration, if there were no received packets. */ + pmd_thread_ctx_time_update(pmd); dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt); if (!ovsrcu_try_quiesce()) { emc_cache_slow_sweep(&pmd->flow_cache); @@ -4530,8 +4625,9 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, ovs_mutex_init(&pmd->port_mutex); cmap_init(&pmd->flow_table); cmap_init(&pmd->classifiers); - pmd->next_optimization = time_msec() + DPCLS_OPTIMIZATION_INTERVAL; - pmd->rxq_next_cycle_store = time_msec() + PMD_RXQ_INTERVAL_LEN; + pmd_thread_ctx_time_update(pmd); + pmd->next_optimization = pmd->ctx.now + DPCLS_OPTIMIZATION_INTERVAL; + pmd->rxq_next_cycle_store = pmd->ctx.now + PMD_RXQ_INTERVAL_LEN; hmap_init(&pmd->poll_list); hmap_init(&pmd->tx_ports); hmap_init(&pmd->tnl_port_cache); @@ -4704,6 +4800,7 @@ dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd, tx->port = port; tx->qid = -1; + dp_packet_batch_init(&tx->output_pkts); hmap_insert(&pmd->tx_ports, &tx->node, hash_port_no(tx->port->port_no)); pmd->need_reload = true; @@ -4860,19 +4957,18 @@ packet_batch_per_flow_init(struct packet_batch_per_flow *batch, static inline void packet_batch_per_flow_execute(struct packet_batch_per_flow *batch, - struct dp_netdev_pmd_thread *pmd, - long long now) + struct dp_netdev_pmd_thread *pmd) { struct dp_netdev_actions *actions; struct dp_netdev_flow *flow = batch->flow; dp_netdev_flow_used(flow, batch->array.count, batch->byte_count, - batch->tcp_flags, now); + batch->tcp_flags, pmd->ctx.now); actions = dp_netdev_flow_get_actions(flow); dp_netdev_execute_actions(pmd, &batch->array, true, &flow->flow, - actions->actions, actions->size, now); + actions->actions, actions->size); } static inline void @@ -4980,7 +5076,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet, const struct netdev_flow_key *key, struct ofpbuf *actions, struct ofpbuf *put_actions, - int *lost_cnt, long long now) + int *lost_cnt) { struct ofpbuf *add_actions; struct dp_packet_batch b; @@ -5019,7 +5115,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd, * we'll send the packet up twice. */ dp_packet_batch_init_packet(&b, packet); dp_netdev_execute_actions(pmd, &b, true, &match.flow, - actions->data, actions->size, now); + actions->data, actions->size); add_actions = put_actions->size ? put_actions : actions; if (OVS_LIKELY(error != ENOSPC)) { @@ -5047,9 +5143,9 @@ static inline void fast_path_processing(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets_, struct netdev_flow_key *keys, - struct packet_batch_per_flow batches[], size_t *n_batches, - odp_port_t in_port, - long long now) + struct packet_batch_per_flow batches[], + size_t *n_batches, + odp_port_t in_port) { const size_t cnt = dp_packet_batch_size(packets_); #if !defined(__CHECKER__) && !defined(_WIN32) @@ -5106,7 +5202,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, miss_cnt++; handle_packet_upcall(pmd, packet, &keys[i], &actions, - &put_actions, &lost_cnt, now); + &put_actions, &lost_cnt); } ofpbuf_uninit(&actions); @@ -5159,7 +5255,6 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd, OVS_ALIGNED_VAR(CACHE_LINE_SIZE) struct netdev_flow_key keys[PKT_ARRAY_SIZE]; struct packet_batch_per_flow batches[PKT_ARRAY_SIZE]; - long long now = time_msec(); size_t n_batches; odp_port_t in_port; @@ -5169,8 +5264,8 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd, if (!dp_packet_batch_is_empty(packets)) { /* Get ingress port from first packet's metadata. */ in_port = packets->packets[0]->md.in_port.odp_port; - fast_path_processing(pmd, packets, keys, batches, &n_batches, - in_port, now); + fast_path_processing(pmd, packets, keys, + batches, &n_batches, in_port); } /* All the flow batches need to be reset before any call to @@ -5188,7 +5283,7 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd, } for (i = 0; i < n_batches; i++) { - packet_batch_per_flow_execute(&batches[i], pmd, now); + packet_batch_per_flow_execute(&batches[i], pmd); } } @@ -5209,7 +5304,6 @@ dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd, struct dp_netdev_execute_aux { struct dp_netdev_pmd_thread *pmd; - long long now; const struct flow *flow; }; @@ -5233,7 +5327,7 @@ dpif_netdev_register_upcall_cb(struct dpif *dpif, upcall_callback *cb, static void dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd, - long long now, bool purge) + bool purge) { struct tx_port *tx; struct dp_netdev_port *port; @@ -5243,7 +5337,7 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd, if (!tx->port->dynamic_txqs) { continue; } - interval = now - tx->last_used; + interval = pmd->ctx.now - tx->last_used; if (tx->qid >= 0 && (purge || interval >= XPS_TIMEOUT_MS)) { port = tx->port; ovs_mutex_lock(&port->txq_used_mutex); @@ -5256,18 +5350,14 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd, static int dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd, - struct tx_port *tx, long long now) + struct tx_port *tx) { struct dp_netdev_port *port; long long interval; int i, min_cnt, min_qid; - if (OVS_UNLIKELY(!now)) { - now = time_msec(); - } - - interval = now - tx->last_used; - tx->last_used = now; + interval = pmd->ctx.now - tx->last_used; + tx->last_used = pmd->ctx.now; if (OVS_LIKELY(tx->qid >= 0 && interval < XPS_TIMEOUT_MS)) { return tx->qid; @@ -5295,7 +5385,7 @@ dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd, ovs_mutex_unlock(&port->txq_used_mutex); - dpif_netdev_xps_revalidate_pmd(pmd, now, false); + dpif_netdev_xps_revalidate_pmd(pmd, false); VLOG_DBG("Core %d: New TX queue ID %d for port \'%s\'.", pmd->core_id, tx->qid, netdev_get_name(tx->port->netdev)); @@ -5346,7 +5436,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet, bool may_steal, struct flow *flow, ovs_u128 *ufid, struct ofpbuf *actions, - const struct nlattr *userdata, long long now) + const struct nlattr *userdata) { struct dp_packet_batch b; int error; @@ -5359,7 +5449,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd, if (!error || error == ENOSPC) { dp_packet_batch_init_packet(&b, packet); dp_netdev_execute_actions(pmd, &b, may_steal, flow, - actions->data, actions->size, now); + actions->data, actions->size); } else if (may_steal) { dp_packet_delete(packet); } @@ -5375,25 +5465,41 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, struct dp_netdev_pmd_thread *pmd = aux->pmd; struct dp_netdev *dp = pmd->dp; int type = nl_attr_type(a); - long long now = aux->now; struct tx_port *p; switch ((enum ovs_action_attr)type) { case OVS_ACTION_ATTR_OUTPUT: p = pmd_send_port_cache_lookup(pmd, nl_attr_get_odp_port(a)); if (OVS_LIKELY(p)) { - int tx_qid; - bool dynamic_txqs; + struct dp_packet *packet; + struct dp_packet_batch out; - dynamic_txqs = p->port->dynamic_txqs; - if (dynamic_txqs) { - tx_qid = dpif_netdev_xps_get_tx_qid(pmd, p, now); - } else { - tx_qid = pmd->static_tx_qid; + if (!may_steal) { + dp_packet_batch_clone(&out, packets_); + dp_packet_batch_reset_cutlen(packets_); + packets_ = &out; } + dp_packet_batch_apply_cutlen(packets_); - netdev_send(p->port->netdev, tx_qid, packets_, may_steal, - dynamic_txqs); +#ifdef DPDK_NETDEV + if (OVS_UNLIKELY(!dp_packet_batch_is_empty(&p->output_pkts) + && packets_->packets[0]->source + != p->output_pkts.packets[0]->source)) { + /* XXX: netdev-dpdk assumes that all packets in a single + * output batch has the same source. Flush here to + * avoid memory access issues. */ + dp_netdev_pmd_flush_output_on_port(pmd, p); + } +#endif + if (OVS_UNLIKELY(dp_packet_batch_size(&p->output_pkts) + + dp_packet_batch_size(packets_) > NETDEV_MAX_BURST)) { + /* Some packets was generated while input batch processing. + * Flush here to avoid overflow. */ + dp_netdev_pmd_flush_output_on_port(pmd, p); + } + DP_PACKET_BATCH_FOR_EACH (packet, packets_) { + dp_packet_batch_add(&p->output_pkts, packet); + } return; } break; @@ -5470,7 +5576,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, flow_extract(packet, &flow); dpif_flow_hash(dp->dpif, &flow, sizeof flow, &ufid); dp_execute_userspace_action(pmd, packet, may_steal, &flow, - &ufid, &actions, userdata, now); + &ufid, &actions, userdata); } if (clone) { @@ -5631,13 +5737,14 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, conntrack_execute(&dp->conntrack, packets_, aux->flow->dl_type, force, commit, zone, setmark, setlabel, aux->flow->tp_src, - aux->flow->tp_dst, helper, nat_action_info_ref, now); + aux->flow->tp_dst, helper, nat_action_info_ref, + pmd->ctx.now); break; } case OVS_ACTION_ATTR_METER: dp_netdev_run_meter(pmd->dp, packets_, nl_attr_get_u32(a), - time_msec()); + pmd->ctx.now); break; case OVS_ACTION_ATTR_PUSH_VLAN: @@ -5666,10 +5773,9 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets, bool may_steal, const struct flow *flow, - const struct nlattr *actions, size_t actions_len, - long long now) + const struct nlattr *actions, size_t actions_len) { - struct dp_netdev_execute_aux aux = { pmd, now, flow }; + struct dp_netdev_execute_aux aux = { pmd, flow }; odp_execute_actions(&aux, packets, may_steal, actions, actions_len, dp_execute_cb); @@ -6000,9 +6106,8 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd, struct polled_queue *poll_list, int poll_cnt) { struct dpcls *cls; - long long int now = time_msec(); - if (now > pmd->rxq_next_cycle_store) { + if (pmd->ctx.now > pmd->rxq_next_cycle_store) { /* Get the cycles that were used to process each queue and store. */ for (unsigned i = 0; i < poll_cnt; i++) { uint64_t rxq_cyc_curr = dp_netdev_rxq_get_cycles(poll_list[i].rxq, @@ -6012,10 +6117,10 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd, 0); } /* Start new measuring interval */ - pmd->rxq_next_cycle_store = now + PMD_RXQ_INTERVAL_LEN; + pmd->rxq_next_cycle_store = pmd->ctx.now + PMD_RXQ_INTERVAL_LEN; } - if (now > pmd->next_optimization) { + if (pmd->ctx.now > pmd->next_optimization) { /* Try to obtain the flow lock to block out revalidator threads. * If not possible, just try next time. */ if (!ovs_mutex_trylock(&pmd->flow_mutex)) { @@ -6025,7 +6130,8 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd, } ovs_mutex_unlock(&pmd->flow_mutex); /* Start new measuring interval */ - pmd->next_optimization = now + DPCLS_OPTIMIZATION_INTERVAL; + pmd->next_optimization = pmd->ctx.now + + DPCLS_OPTIMIZATION_INTERVAL; } } } diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c index 65c5674a5..05974c100 100644 --- a/lib/netdev-bsd.c +++ b/lib/netdev-bsd.c @@ -680,7 +680,7 @@ netdev_bsd_rxq_drain(struct netdev_rxq *rxq_) */ static int netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED, - struct dp_packet_batch *batch, bool may_steal, + struct dp_packet_batch *batch, bool concurrent_txq OVS_UNUSED) { struct netdev_bsd *dev = netdev_bsd_cast(netdev_); @@ -697,7 +697,7 @@ netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED, for (i = 0; i < batch->count; i++) { const void *data = dp_packet_data(batch->packets[i]); - size_t size = dp_packet_get_send_len(batch->packets[i]); + size_t size = dp_packet_size(batch->packets[i]); while (!error) { ssize_t retval; @@ -728,7 +728,7 @@ netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED, } ovs_mutex_unlock(&dev->mutex); - dp_packet_delete_batch(batch, may_steal); + dp_packet_delete_batch(batch, true); return error; } diff --git a/lib/netdev-dpdk-unixctl.man b/lib/netdev-dpdk-unixctl.man new file mode 100644 index 000000000..ac274cde8 --- /dev/null +++ b/lib/netdev-dpdk-unixctl.man @@ -0,0 +1,14 @@ +.SS "NETDEV-DPDK COMMANDS" +These commands manage DPDK related ports (\fBtype=\fR\fIdpdk*\fR). +.IP "\fBnetdev-dpdk/set-admin-state\fR [\fIinterface\fR] \fBup\fR | \fBdown\fR" +Change the admin state for DPDK \fIinterface\fR to \fBup\fR or \fBdown\fR. +If \fIinterface\fR is not specified, then it applies to all DPDK ports. +.IP "\fBnetdev-dpdk/detach\fR \fIpci-address\fR" +Detaches device with corresponding \fIpci-address\fR from DPDK. This command +can be used to detach device if it wasn't detached automatically after port +deletion. Refer to the documentation for details and instructions. +.IP "\fBnetdev-dpdk/get-mempool-info\fR [\fIinterface\fR]" +Prints the debug information about memory pool used by DPDK \fIinterface\fR. +If called without arguments, information of all the available mempools will +be printed. For additional mempool statistics enable +\fBCONFIG_RTE_LIBRTE_MEMPOOL_DEBUG\fR while building DPDK. diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 8f22264b3..364f545c4 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -1843,8 +1843,6 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) dropped += batch_cnt - cnt; } - dp_packet_batch_apply_cutlen(batch); - uint32_t txcnt = 0; for (uint32_t i = 0; i < cnt; i++) { @@ -1892,14 +1890,13 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) static int netdev_dpdk_vhost_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch, - bool may_steal, bool concurrent_txq OVS_UNUSED) + bool concurrent_txq OVS_UNUSED) { - if (OVS_UNLIKELY(!may_steal || batch->packets[0]->source != DPBUF_DPDK)) { + if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) { dpdk_do_tx_copy(netdev, qid, batch); - dp_packet_delete_batch(batch, may_steal); + dp_packet_delete_batch(batch, true); } else { - dp_packet_batch_apply_cutlen(batch); __netdev_dpdk_vhost_send(netdev, qid, batch->packets, batch->count); } return 0; @@ -1907,11 +1904,11 @@ netdev_dpdk_vhost_send(struct netdev *netdev, int qid, static inline void netdev_dpdk_send__(struct netdev_dpdk *dev, int qid, - struct dp_packet_batch *batch, bool may_steal, + struct dp_packet_batch *batch, bool concurrent_txq) { if (OVS_UNLIKELY(!(dev->flags & NETDEV_UP))) { - dp_packet_delete_batch(batch, may_steal); + dp_packet_delete_batch(batch, true); return; } @@ -1920,19 +1917,16 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid, rte_spinlock_lock(&dev->tx_q[qid].tx_lock); } - if (OVS_UNLIKELY(!may_steal || - batch->packets[0]->source != DPBUF_DPDK)) { + if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) { struct netdev *netdev = &dev->up; dpdk_do_tx_copy(netdev, qid, batch); - dp_packet_delete_batch(batch, may_steal); + dp_packet_delete_batch(batch, true); } else { int tx_cnt, dropped; int batch_cnt = dp_packet_batch_size(batch); struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets; - dp_packet_batch_apply_cutlen(batch); - tx_cnt = netdev_dpdk_filter_packet_len(dev, pkts, batch_cnt); tx_cnt = netdev_dpdk_qos_run(dev, pkts, tx_cnt, true); dropped = batch_cnt - tx_cnt; @@ -1953,12 +1947,11 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid, static int netdev_dpdk_eth_send(struct netdev *netdev, int qid, - struct dp_packet_batch *batch, bool may_steal, - bool concurrent_txq) + struct dp_packet_batch *batch, bool concurrent_txq) { struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); - netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq); + netdev_dpdk_send__(dev, qid, batch, concurrent_txq); return 0; } @@ -2586,6 +2579,56 @@ error: free(response); } +static void +netdev_dpdk_get_mempool_info(struct unixctl_conn *conn, + int argc, const char *argv[], + void *aux OVS_UNUSED) +{ + size_t size; + FILE *stream; + char *response = NULL; + struct netdev *netdev = NULL; + + if (argc == 2) { + netdev = netdev_from_name(argv[1]); + if (!netdev || !is_dpdk_class(netdev->netdev_class)) { + unixctl_command_reply_error(conn, "Not a DPDK Interface"); + goto out; + } + } + + stream = open_memstream(&response, &size); + if (!stream) { + response = xasprintf("Unable to open memstream: %s.", + ovs_strerror(errno)); + unixctl_command_reply_error(conn, response); + goto out; + } + + if (netdev) { + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + + ovs_mutex_lock(&dev->mutex); + ovs_mutex_lock(&dpdk_mp_mutex); + + rte_mempool_dump(stream, dev->mp); + + ovs_mutex_unlock(&dpdk_mp_mutex); + ovs_mutex_unlock(&dev->mutex); + } else { + ovs_mutex_lock(&dpdk_mp_mutex); + rte_mempool_list_dump(stream); + ovs_mutex_unlock(&dpdk_mp_mutex); + } + + fclose(stream); + + unixctl_command_reply(conn, response); +out: + free(response); + netdev_close(netdev); +} + /* * Set virtqueue flags so that we do not receive interrupts. */ @@ -2842,6 +2885,10 @@ netdev_dpdk_class_init(void) "pci address of device", 1, 1, netdev_dpdk_detach, NULL); + unixctl_command_register("netdev-dpdk/get-mempool-info", + "[netdev]", 0, 1, + netdev_dpdk_get_mempool_info, NULL); + ovsthread_once_done(&once); } @@ -2933,8 +2980,7 @@ dpdk_ring_open(const char dev_name[], dpdk_port_t *eth_port_id) static int netdev_dpdk_ring_send(struct netdev *netdev, int qid, - struct dp_packet_batch *batch, bool may_steal, - bool concurrent_txq) + struct dp_packet_batch *batch, bool concurrent_txq) { struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); struct dp_packet *packet; @@ -2947,7 +2993,7 @@ netdev_dpdk_ring_send(struct netdev *netdev, int qid, dp_packet_mbuf_rss_flag_reset(packet); } - netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq); + netdev_dpdk_send__(dev, qid, batch, concurrent_txq); return 0; } diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c index 246cdf1a4..40086a377 100644 --- a/lib/netdev-dummy.c +++ b/lib/netdev-dummy.c @@ -1062,7 +1062,7 @@ netdev_dummy_rxq_drain(struct netdev_rxq *rxq_) static int netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED, - struct dp_packet_batch *batch, bool may_steal, + struct dp_packet_batch *batch, bool concurrent_txq OVS_UNUSED) { struct netdev_dummy *dev = netdev_dummy_cast(netdev); @@ -1071,7 +1071,7 @@ netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED, struct dp_packet *packet; DP_PACKET_BATCH_FOR_EACH(packet, batch) { const void *buffer = dp_packet_data(packet); - size_t size = dp_packet_get_send_len(packet); + size_t size = dp_packet_size(packet); if (batch->packets[i]->packet_type != htonl(PT_ETH)) { error = EPFNOSUPPORT; @@ -1132,7 +1132,7 @@ netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED, ovs_mutex_unlock(&dev->mutex); } - dp_packet_delete_batch(batch, may_steal); + dp_packet_delete_batch(batch, true); return error; } diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 40b089094..ccb6def6c 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -1198,7 +1198,7 @@ netdev_linux_sock_batch_send(int sock, int ifindex, struct dp_packet *packet; DP_PACKET_BATCH_FOR_EACH (packet, batch) { iov[i].iov_base = dp_packet_data(packet); - iov[i].iov_len = dp_packet_get_send_len(packet); + iov[i].iov_len = dp_packet_size(packet); mmsg[i].msg_hdr = (struct msghdr) { .msg_name = &sll, .msg_namelen = sizeof sll, .msg_iov = &iov[i], @@ -1235,7 +1235,7 @@ netdev_linux_tap_batch_send(struct netdev *netdev_, struct netdev_linux *netdev = netdev_linux_cast(netdev_); struct dp_packet *packet; DP_PACKET_BATCH_FOR_EACH (packet, batch) { - size_t size = dp_packet_get_send_len(packet); + size_t size = dp_packet_size(packet); ssize_t retval; int error; @@ -1270,7 +1270,7 @@ netdev_linux_tap_batch_send(struct netdev *netdev_, * expected to do additional queuing of packets. */ static int netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED, - struct dp_packet_batch *batch, bool may_steal, + struct dp_packet_batch *batch, bool concurrent_txq OVS_UNUSED) { int error = 0; @@ -1306,7 +1306,7 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED, } free_batch: - dp_packet_delete_batch(batch, may_steal); + dp_packet_delete_batch(batch, true); return error; } diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 1720deb53..3d9f3365d 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -348,9 +348,8 @@ struct netdev_class { * If the function returns a non-zero value, some of the packets might have * been sent anyway. * - * If 'may_steal' is false, the caller retains ownership of all the - * packets. If 'may_steal' is true, the caller transfers ownership of all - * the packets to the network device, regardless of success. + * The caller transfers ownership of all the packets to the network + * device, regardless of success. * * If 'concurrent_txq' is true, the caller may perform concurrent calls * to netdev_send() with the same 'qid'. The netdev provider is responsible @@ -370,7 +369,7 @@ struct netdev_class { * datapath". It will also prevent the OVS implementation of bonding from * working properly over 'netdev'.) */ int (*send)(struct netdev *netdev, int qid, struct dp_packet_batch *batch, - bool may_steal, bool concurrent_txq); + bool concurrent_txq); /* Registers with the poll loop to wake up from the next call to * poll_block() when the packet transmission queue for 'netdev' has diff --git a/lib/netdev.c b/lib/netdev.c index cd11281a5..b1b6da4a5 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -771,9 +771,8 @@ netdev_get_pt_mode(const struct netdev *netdev) * If the function returns a non-zero value, some of the packets might have * been sent anyway. * - * If 'may_steal' is false, the caller retains ownership of all the packets. - * If 'may_steal' is true, the caller transfers ownership of all the packets - * to the network device, regardless of success. + * The caller transfers ownership of all the packets to the network device, + * regardless of success. * * If 'concurrent_txq' is true, the caller may perform concurrent calls * to netdev_send() with the same 'qid'. The netdev provider is responsible @@ -787,15 +786,12 @@ netdev_get_pt_mode(const struct netdev *netdev) * queues. */ int netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch, - bool may_steal, bool concurrent_txq) + bool concurrent_txq) { - int error = netdev->netdev_class->send(netdev, qid, batch, may_steal, + int error = netdev->netdev_class->send(netdev, qid, batch, concurrent_txq); if (!error) { COVERAGE_INC(netdev_sent); - if (!may_steal) { - dp_packet_batch_reset_cutlen(batch); - } } return error; } diff --git a/lib/netdev.h b/lib/netdev.h index 3a545fe06..dc9d9a0f5 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -181,7 +181,7 @@ int netdev_rxq_drain(struct netdev_rxq *); /* Packet transmission. */ int netdev_send(struct netdev *, int qid, struct dp_packet_batch *, - bool may_steal, bool concurrent_txq); + bool concurrent_txq); void netdev_send_wait(struct netdev *, int qid); /* Flow offloading. */ diff --git a/manpages.mk b/manpages.mk index 7d6a507e0..351155f34 100644 --- a/manpages.mk +++ b/manpages.mk @@ -255,6 +255,7 @@ vswitchd/ovs-vswitchd.8: \ lib/daemon.man \ lib/dpctl.man \ lib/memory-unixctl.man \ + lib/netdev-dpdk-unixctl.man \ lib/service.man \ lib/ssl-bootstrap.man \ lib/ssl.man \ @@ -270,6 +271,7 @@ lib/coverage-unixctl.man: lib/daemon.man: lib/dpctl.man: lib/memory-unixctl.man: +lib/netdev-dpdk-unixctl.man: lib/service.man: lib/ssl-bootstrap.man: lib/ssl.man: diff --git a/vswitchd/ovs-vswitchd.8.in b/vswitchd/ovs-vswitchd.8.in index 478acc52f..80e5f5327 100644 --- a/vswitchd/ovs-vswitchd.8.in +++ b/vswitchd/ovs-vswitchd.8.in @@ -282,6 +282,7 @@ port names, which this thread polls. .IP "\fBdpif-netdev/pmd-rxq-rebalance\fR [\fIdp\fR]" Reassigns rxqs to pmds in the datapath \fIdp\fR based on their current usage. . +.so lib/netdev-dpdk-unixctl.man .so ofproto/ofproto-dpif-unixctl.man .so ofproto/ofproto-unixctl.man .so lib/vlog-unixctl.man |