summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Pfaff <blp@ovn.org>2018-01-02 07:45:17 -0800
committerBen Pfaff <blp@ovn.org>2018-01-02 07:45:17 -0800
commit34944e81f00053f96112ea07b2cf770dd26fa1e3 (patch)
tree62e395aa4bbb68d510a360dc5afd5abda2a3055e
parent3f33aa477256a276fd0985684c9bcb210f6150c1 (diff)
parentcc4891f39d574986948ac87280cfe9017fe17a39 (diff)
downloadopenvswitch-34944e81f00053f96112ea07b2cf770dd26fa1e3.tar.gz
Merge branch 'dpdk_merge' of https://github.com/istokes/ovs into HEAD
-rw-r--r--Documentation/howto/dpdk.rst22
-rw-r--r--NEWS2
-rw-r--r--lib/automake.mk1
-rw-r--r--lib/dpif-netdev.c244
-rw-r--r--lib/netdev-bsd.c6
-rw-r--r--lib/netdev-dpdk-unixctl.man14
-rw-r--r--lib/netdev-dpdk.c84
-rw-r--r--lib/netdev-dummy.c6
-rw-r--r--lib/netdev-linux.c8
-rw-r--r--lib/netdev-provider.h7
-rw-r--r--lib/netdev.c12
-rw-r--r--lib/netdev.h2
-rw-r--r--manpages.mk2
-rw-r--r--vswitchd/ovs-vswitchd.8.in1
14 files changed, 290 insertions, 121 deletions
diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index d1238195a..2393c2ff7 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -531,16 +531,18 @@ Add test flows to forward packets betwen DPDK devices and VM ports::
Create a VM using the following configuration:
-+----------------------+--------+-----------------+
-| configuration | values | comments |
-+----------------------+--------+-----------------+
-| qemu version | 2.2.0 | n/a |
-| qemu thread affinity | core 5 | taskset 0x20 |
-| memory | 4GB | n/a |
-| cores | 2 | n/a |
-| Qcow2 image | CentOS7| n/a |
-| mrg_rxbuf | off | n/a |
-+----------------------+--------+-----------------+
+.. table::
+
+ ===================== ======== ============
+ Configuration Values Comments
+ ===================== ======== ============
+ QEMU version 2.2.0 n/a
+ QEMU thread affinity core 5 taskset 0x20
+ Memory 4GB n/a
+ Cores 2 n/a
+ Qcow2 image CentOS7 n/a
+ mrg_rxbuf off n/a
+ ===================== ======== ============
You can do this directly with QEMU via the ``qemu-system-x86_64`` application::
diff --git a/NEWS b/NEWS
index af98c2f80..a7f2defea 100644
--- a/NEWS
+++ b/NEWS
@@ -27,6 +27,8 @@ Post-v2.8.0
- DPDK:
* Add support for DPDK v17.11
* Add support for vHost IOMMU
+ * New debug appctl command 'netdev-dpdk/get-mempool-info'.
+ * All the netdev-dpdk appctl commands described in ovs-vswitchd man page.
- vswitchd:
* Datapath IDs may now be specified as 0x1 (etc.) instead of 16 digits.
diff --git a/lib/automake.mk b/lib/automake.mk
index effe5b5c2..4b38a1163 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -465,6 +465,7 @@ MAN_FRAGMENTS += \
lib/db-ctl-base.man \
lib/dpctl.man \
lib/memory-unixctl.man \
+ lib/netdev-dpdk-unixctl.man \
lib/ofp-version.man \
lib/ovs.tmac \
lib/service.man \
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 6ba025b66..349e98a8a 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -338,6 +338,8 @@ enum dp_stat_type {
DP_STAT_LOST, /* Packets not passed up to the client. */
DP_STAT_LOOKUP_HIT, /* Number of subtable lookups for flow table
hits */
+ DP_STAT_SENT_PKTS, /* Packets that has been sent. */
+ DP_STAT_SENT_BATCHES, /* Number of batches sent. */
DP_N_STATS
};
@@ -509,6 +511,9 @@ struct dp_netdev_pmd_cycles {
atomic_ullong n[PMD_N_CYCLES];
};
+static void dp_netdev_count_packet(struct dp_netdev_pmd_thread *,
+ enum dp_stat_type type, int cnt);
+
struct polled_queue {
struct dp_netdev_rxq *rxq;
odp_port_t port_no;
@@ -527,6 +532,18 @@ struct tx_port {
int qid;
long long last_used;
struct hmap_node node;
+ struct dp_packet_batch output_pkts;
+};
+
+/* A set of properties for the current processing loop that is not directly
+ * associated with the pmd thread itself, but with the packets being
+ * processed or the short-term system configuration (for example, time).
+ * Contained by struct dp_netdev_pmd_thread's 'ctx' member. */
+struct dp_netdev_pmd_thread_ctx {
+ /* Latest measured time. See 'pmd_thread_ctx_time_update()'. */
+ long long now;
+ /* Used to count cycles. See 'cycles_count_end()' */
+ unsigned long long last_cycles;
};
/* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate
@@ -584,8 +601,8 @@ struct dp_netdev_pmd_thread {
/* Cycles counters */
struct dp_netdev_pmd_cycles cycles;
- /* Used to count cicles. See 'cycles_counter_end()' */
- unsigned long long last_cycles;
+ /* Current context of the PMD thread. */
+ struct dp_netdev_pmd_thread_ctx ctx;
struct latch exit_latch; /* For terminating the pmd thread. */
struct seq *reload_seq;
@@ -658,8 +675,7 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
struct dp_packet_batch *,
bool may_steal, const struct flow *flow,
const struct nlattr *actions,
- size_t actions_len,
- long long now);
+ size_t actions_len);
static void dp_netdev_input(struct dp_netdev_pmd_thread *,
struct dp_packet_batch *, odp_port_t port_no);
static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
@@ -695,6 +711,9 @@ static void dp_netdev_add_rxq_to_pmd(struct dp_netdev_pmd_thread *pmd,
static void dp_netdev_del_rxq_from_pmd(struct dp_netdev_pmd_thread *pmd,
struct rxq_poll *poll)
OVS_REQUIRES(pmd->port_mutex);
+static void
+dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd);
+
static void reconfigure_datapath(struct dp_netdev *dp)
OVS_REQUIRES(dp->port_mutex);
static bool dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd);
@@ -719,9 +738,9 @@ static uint64_t
dp_netdev_rxq_get_intrvl_cycles(struct dp_netdev_rxq *rx, unsigned idx);
static void
dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
- long long now, bool purge);
+ bool purge);
static int dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
- struct tx_port *tx, long long now);
+ struct tx_port *tx);
static inline bool emc_entry_alive(struct emc_entry *ce);
static void emc_clear_entry(struct emc_entry *ce);
@@ -765,6 +784,28 @@ emc_cache_slow_sweep(struct emc_cache *flow_cache)
flow_cache->sweep_idx = (flow_cache->sweep_idx + 1) & EM_FLOW_HASH_MASK;
}
+/* Updates the time in PMD threads context and should be called in three cases:
+ *
+ * 1. PMD structure initialization:
+ * - dp_netdev_configure_pmd()
+ *
+ * 2. Before processing of the new packet batch:
+ * - dpif_netdev_execute()
+ * - dp_netdev_process_rxq_port()
+ *
+ * 3. At least once per polling iteration in main polling threads if no
+ * packets received on current iteration:
+ * - dpif_netdev_run()
+ * - pmd_thread_main()
+ *
+ * 'pmd->ctx.now' should be used without update in all other cases if possible.
+ */
+static inline void
+pmd_thread_ctx_time_update(struct dp_netdev_pmd_thread *pmd)
+{
+ pmd->ctx.now = time_msec();
+}
+
/* Returns true if 'dpif' is a netdev or dummy dpif, false otherwise. */
bool
dpif_is_netdev(const struct dpif *dpif)
@@ -799,6 +840,7 @@ pmd_info_show_stats(struct ds *reply,
{
unsigned long long total_packets;
uint64_t total_cycles = 0;
+ double lookups_per_hit = 0, packets_per_batch = 0;
int i;
/* These loops subtracts reference values ('*_zero') from the counters.
@@ -840,15 +882,23 @@ pmd_info_show_stats(struct ds *reply,
}
ds_put_cstr(reply, ":\n");
+ if (stats[DP_STAT_MASKED_HIT] > 0) {
+ lookups_per_hit = stats[DP_STAT_LOOKUP_HIT]
+ / (double) stats[DP_STAT_MASKED_HIT];
+ }
+ if (stats[DP_STAT_SENT_BATCHES] > 0) {
+ packets_per_batch = stats[DP_STAT_SENT_PKTS]
+ / (double) stats[DP_STAT_SENT_BATCHES];
+ }
+
ds_put_format(reply,
"\temc hits:%llu\n\tmegaflow hits:%llu\n"
"\tavg. subtable lookups per hit:%.2f\n"
- "\tmiss:%llu\n\tlost:%llu\n",
+ "\tmiss:%llu\n\tlost:%llu\n"
+ "\tavg. packets per output batch: %.2f\n",
stats[DP_STAT_EXACT_HIT], stats[DP_STAT_MASKED_HIT],
- stats[DP_STAT_MASKED_HIT] > 0
- ? (1.0*stats[DP_STAT_LOOKUP_HIT])/stats[DP_STAT_MASKED_HIT]
- : 0,
- stats[DP_STAT_MISS], stats[DP_STAT_LOST]);
+ lookups_per_hit, stats[DP_STAT_MISS], stats[DP_STAT_LOST],
+ packets_per_batch);
if (total_cycles == 0) {
return;
@@ -2916,6 +2966,9 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
ovs_mutex_lock(&dp->non_pmd_mutex);
}
+ /* Update current time in PMD context. */
+ pmd_thread_ctx_time_update(pmd);
+
/* The action processing expects the RSS hash to be valid, because
* it's always initialized at the beginning of datapath processing.
* In this case, though, 'execute->packet' may not have gone through
@@ -2928,8 +2981,8 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
dp_packet_batch_init_packet(&pp, execute->packet);
dp_netdev_execute_actions(pmd, &pp, false, execute->flow,
- execute->actions, execute->actions_len,
- time_msec());
+ execute->actions, execute->actions_len);
+ dp_netdev_pmd_flush_output_packets(pmd);
if (pmd->core_id == NON_PMD_CORE_ID) {
ovs_mutex_unlock(&dp->non_pmd_mutex);
@@ -3150,7 +3203,7 @@ cycles_count_start(struct dp_netdev_pmd_thread *pmd)
OVS_ACQUIRES(&cycles_counter_fake_mutex)
OVS_NO_THREAD_SAFETY_ANALYSIS
{
- pmd->last_cycles = cycles_counter();
+ pmd->ctx.last_cycles = cycles_counter();
}
/* Stop counting cycles and add them to the counter 'type' */
@@ -3160,7 +3213,7 @@ cycles_count_end(struct dp_netdev_pmd_thread *pmd,
OVS_RELEASES(&cycles_counter_fake_mutex)
OVS_NO_THREAD_SAFETY_ANALYSIS
{
- unsigned long long interval = cycles_counter() - pmd->last_cycles;
+ unsigned long long interval = cycles_counter() - pmd->ctx.last_cycles;
non_atomic_ullong_add(&pmd->cycles.n[type], interval);
}
@@ -3173,8 +3226,8 @@ cycles_count_intermediate(struct dp_netdev_pmd_thread *pmd,
OVS_NO_THREAD_SAFETY_ANALYSIS
{
unsigned long long new_cycles = cycles_counter();
- unsigned long long interval = new_cycles - pmd->last_cycles;
- pmd->last_cycles = new_cycles;
+ unsigned long long interval = new_cycles - pmd->ctx.last_cycles;
+ pmd->ctx.last_cycles = new_cycles;
non_atomic_ullong_add(&pmd->cycles.n[type], interval);
if (rxq && (type == PMD_CYCLES_PROCESSING)) {
@@ -3216,6 +3269,42 @@ dp_netdev_rxq_get_intrvl_cycles(struct dp_netdev_rxq *rx, unsigned idx)
return processing_cycles;
}
+static void
+dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd,
+ struct tx_port *p)
+{
+ int tx_qid;
+ int output_cnt;
+ bool dynamic_txqs;
+
+ dynamic_txqs = p->port->dynamic_txqs;
+ if (dynamic_txqs) {
+ tx_qid = dpif_netdev_xps_get_tx_qid(pmd, p);
+ } else {
+ tx_qid = pmd->static_tx_qid;
+ }
+
+ output_cnt = dp_packet_batch_size(&p->output_pkts);
+
+ netdev_send(p->port->netdev, tx_qid, &p->output_pkts, dynamic_txqs);
+ dp_packet_batch_init(&p->output_pkts);
+
+ dp_netdev_count_packet(pmd, DP_STAT_SENT_PKTS, output_cnt);
+ dp_netdev_count_packet(pmd, DP_STAT_SENT_BATCHES, 1);
+}
+
+static void
+dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd)
+{
+ struct tx_port *p;
+
+ HMAP_FOR_EACH (p, node, &pmd->send_port_cache) {
+ if (!dp_packet_batch_is_empty(&p->output_pkts)) {
+ dp_netdev_pmd_flush_output_on_port(pmd, p);
+ }
+ }
+}
+
static int
dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
struct netdev_rxq *rx,
@@ -3229,9 +3318,11 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
error = netdev_rxq_recv(rx, &batch);
if (!error) {
*recirc_depth_get() = 0;
+ pmd_thread_ctx_time_update(pmd);
batch_cnt = batch.count;
dp_netdev_input(pmd, &batch, port_no);
+ dp_netdev_pmd_flush_output_packets(pmd);
} else if (error != EAGAIN && error != EOPNOTSUPP) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
@@ -3886,7 +3977,8 @@ dpif_netdev_run(struct dpif *dpif)
}
}
cycles_count_end(non_pmd, PMD_CYCLES_IDLE);
- dpif_netdev_xps_revalidate_pmd(non_pmd, time_msec(), false);
+ pmd_thread_ctx_time_update(non_pmd);
+ dpif_netdev_xps_revalidate_pmd(non_pmd, false);
ovs_mutex_unlock(&dp->non_pmd_mutex);
dp_netdev_pmd_unref(non_pmd);
@@ -3937,7 +4029,7 @@ pmd_free_cached_ports(struct dp_netdev_pmd_thread *pmd)
struct tx_port *tx_port_cached;
/* Free all used tx queue ids. */
- dpif_netdev_xps_revalidate_pmd(pmd, 0, true);
+ dpif_netdev_xps_revalidate_pmd(pmd, true);
HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->tnl_port_cache) {
free(tx_port_cached);
@@ -4079,6 +4171,9 @@ reload:
lc = 0;
coverage_try_clear();
+ /* It's possible that the time was not updated on current
+ * iteration, if there were no received packets. */
+ pmd_thread_ctx_time_update(pmd);
dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt);
if (!ovsrcu_try_quiesce()) {
emc_cache_slow_sweep(&pmd->flow_cache);
@@ -4530,8 +4625,9 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
ovs_mutex_init(&pmd->port_mutex);
cmap_init(&pmd->flow_table);
cmap_init(&pmd->classifiers);
- pmd->next_optimization = time_msec() + DPCLS_OPTIMIZATION_INTERVAL;
- pmd->rxq_next_cycle_store = time_msec() + PMD_RXQ_INTERVAL_LEN;
+ pmd_thread_ctx_time_update(pmd);
+ pmd->next_optimization = pmd->ctx.now + DPCLS_OPTIMIZATION_INTERVAL;
+ pmd->rxq_next_cycle_store = pmd->ctx.now + PMD_RXQ_INTERVAL_LEN;
hmap_init(&pmd->poll_list);
hmap_init(&pmd->tx_ports);
hmap_init(&pmd->tnl_port_cache);
@@ -4704,6 +4800,7 @@ dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
tx->port = port;
tx->qid = -1;
+ dp_packet_batch_init(&tx->output_pkts);
hmap_insert(&pmd->tx_ports, &tx->node, hash_port_no(tx->port->port_no));
pmd->need_reload = true;
@@ -4860,19 +4957,18 @@ packet_batch_per_flow_init(struct packet_batch_per_flow *batch,
static inline void
packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,
- struct dp_netdev_pmd_thread *pmd,
- long long now)
+ struct dp_netdev_pmd_thread *pmd)
{
struct dp_netdev_actions *actions;
struct dp_netdev_flow *flow = batch->flow;
dp_netdev_flow_used(flow, batch->array.count, batch->byte_count,
- batch->tcp_flags, now);
+ batch->tcp_flags, pmd->ctx.now);
actions = dp_netdev_flow_get_actions(flow);
dp_netdev_execute_actions(pmd, &batch->array, true, &flow->flow,
- actions->actions, actions->size, now);
+ actions->actions, actions->size);
}
static inline void
@@ -4980,7 +5076,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
struct dp_packet *packet,
const struct netdev_flow_key *key,
struct ofpbuf *actions, struct ofpbuf *put_actions,
- int *lost_cnt, long long now)
+ int *lost_cnt)
{
struct ofpbuf *add_actions;
struct dp_packet_batch b;
@@ -5019,7 +5115,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
* we'll send the packet up twice. */
dp_packet_batch_init_packet(&b, packet);
dp_netdev_execute_actions(pmd, &b, true, &match.flow,
- actions->data, actions->size, now);
+ actions->data, actions->size);
add_actions = put_actions->size ? put_actions : actions;
if (OVS_LIKELY(error != ENOSPC)) {
@@ -5047,9 +5143,9 @@ static inline void
fast_path_processing(struct dp_netdev_pmd_thread *pmd,
struct dp_packet_batch *packets_,
struct netdev_flow_key *keys,
- struct packet_batch_per_flow batches[], size_t *n_batches,
- odp_port_t in_port,
- long long now)
+ struct packet_batch_per_flow batches[],
+ size_t *n_batches,
+ odp_port_t in_port)
{
const size_t cnt = dp_packet_batch_size(packets_);
#if !defined(__CHECKER__) && !defined(_WIN32)
@@ -5106,7 +5202,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
miss_cnt++;
handle_packet_upcall(pmd, packet, &keys[i], &actions,
- &put_actions, &lost_cnt, now);
+ &put_actions, &lost_cnt);
}
ofpbuf_uninit(&actions);
@@ -5159,7 +5255,6 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
OVS_ALIGNED_VAR(CACHE_LINE_SIZE)
struct netdev_flow_key keys[PKT_ARRAY_SIZE];
struct packet_batch_per_flow batches[PKT_ARRAY_SIZE];
- long long now = time_msec();
size_t n_batches;
odp_port_t in_port;
@@ -5169,8 +5264,8 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
if (!dp_packet_batch_is_empty(packets)) {
/* Get ingress port from first packet's metadata. */
in_port = packets->packets[0]->md.in_port.odp_port;
- fast_path_processing(pmd, packets, keys, batches, &n_batches,
- in_port, now);
+ fast_path_processing(pmd, packets, keys,
+ batches, &n_batches, in_port);
}
/* All the flow batches need to be reset before any call to
@@ -5188,7 +5283,7 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
}
for (i = 0; i < n_batches; i++) {
- packet_batch_per_flow_execute(&batches[i], pmd, now);
+ packet_batch_per_flow_execute(&batches[i], pmd);
}
}
@@ -5209,7 +5304,6 @@ dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
struct dp_netdev_execute_aux {
struct dp_netdev_pmd_thread *pmd;
- long long now;
const struct flow *flow;
};
@@ -5233,7 +5327,7 @@ dpif_netdev_register_upcall_cb(struct dpif *dpif, upcall_callback *cb,
static void
dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
- long long now, bool purge)
+ bool purge)
{
struct tx_port *tx;
struct dp_netdev_port *port;
@@ -5243,7 +5337,7 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
if (!tx->port->dynamic_txqs) {
continue;
}
- interval = now - tx->last_used;
+ interval = pmd->ctx.now - tx->last_used;
if (tx->qid >= 0 && (purge || interval >= XPS_TIMEOUT_MS)) {
port = tx->port;
ovs_mutex_lock(&port->txq_used_mutex);
@@ -5256,18 +5350,14 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
static int
dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
- struct tx_port *tx, long long now)
+ struct tx_port *tx)
{
struct dp_netdev_port *port;
long long interval;
int i, min_cnt, min_qid;
- if (OVS_UNLIKELY(!now)) {
- now = time_msec();
- }
-
- interval = now - tx->last_used;
- tx->last_used = now;
+ interval = pmd->ctx.now - tx->last_used;
+ tx->last_used = pmd->ctx.now;
if (OVS_LIKELY(tx->qid >= 0 && interval < XPS_TIMEOUT_MS)) {
return tx->qid;
@@ -5295,7 +5385,7 @@ dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
ovs_mutex_unlock(&port->txq_used_mutex);
- dpif_netdev_xps_revalidate_pmd(pmd, now, false);
+ dpif_netdev_xps_revalidate_pmd(pmd, false);
VLOG_DBG("Core %d: New TX queue ID %d for port \'%s\'.",
pmd->core_id, tx->qid, netdev_get_name(tx->port->netdev));
@@ -5346,7 +5436,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd,
struct dp_packet *packet, bool may_steal,
struct flow *flow, ovs_u128 *ufid,
struct ofpbuf *actions,
- const struct nlattr *userdata, long long now)
+ const struct nlattr *userdata)
{
struct dp_packet_batch b;
int error;
@@ -5359,7 +5449,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd,
if (!error || error == ENOSPC) {
dp_packet_batch_init_packet(&b, packet);
dp_netdev_execute_actions(pmd, &b, may_steal, flow,
- actions->data, actions->size, now);
+ actions->data, actions->size);
} else if (may_steal) {
dp_packet_delete(packet);
}
@@ -5375,25 +5465,41 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
struct dp_netdev_pmd_thread *pmd = aux->pmd;
struct dp_netdev *dp = pmd->dp;
int type = nl_attr_type(a);
- long long now = aux->now;
struct tx_port *p;
switch ((enum ovs_action_attr)type) {
case OVS_ACTION_ATTR_OUTPUT:
p = pmd_send_port_cache_lookup(pmd, nl_attr_get_odp_port(a));
if (OVS_LIKELY(p)) {
- int tx_qid;
- bool dynamic_txqs;
+ struct dp_packet *packet;
+ struct dp_packet_batch out;
- dynamic_txqs = p->port->dynamic_txqs;
- if (dynamic_txqs) {
- tx_qid = dpif_netdev_xps_get_tx_qid(pmd, p, now);
- } else {
- tx_qid = pmd->static_tx_qid;
+ if (!may_steal) {
+ dp_packet_batch_clone(&out, packets_);
+ dp_packet_batch_reset_cutlen(packets_);
+ packets_ = &out;
}
+ dp_packet_batch_apply_cutlen(packets_);
- netdev_send(p->port->netdev, tx_qid, packets_, may_steal,
- dynamic_txqs);
+#ifdef DPDK_NETDEV
+ if (OVS_UNLIKELY(!dp_packet_batch_is_empty(&p->output_pkts)
+ && packets_->packets[0]->source
+ != p->output_pkts.packets[0]->source)) {
+ /* XXX: netdev-dpdk assumes that all packets in a single
+ * output batch has the same source. Flush here to
+ * avoid memory access issues. */
+ dp_netdev_pmd_flush_output_on_port(pmd, p);
+ }
+#endif
+ if (OVS_UNLIKELY(dp_packet_batch_size(&p->output_pkts)
+ + dp_packet_batch_size(packets_) > NETDEV_MAX_BURST)) {
+ /* Some packets was generated while input batch processing.
+ * Flush here to avoid overflow. */
+ dp_netdev_pmd_flush_output_on_port(pmd, p);
+ }
+ DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
+ dp_packet_batch_add(&p->output_pkts, packet);
+ }
return;
}
break;
@@ -5470,7 +5576,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
flow_extract(packet, &flow);
dpif_flow_hash(dp->dpif, &flow, sizeof flow, &ufid);
dp_execute_userspace_action(pmd, packet, may_steal, &flow,
- &ufid, &actions, userdata, now);
+ &ufid, &actions, userdata);
}
if (clone) {
@@ -5631,13 +5737,14 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
conntrack_execute(&dp->conntrack, packets_, aux->flow->dl_type, force,
commit, zone, setmark, setlabel, aux->flow->tp_src,
- aux->flow->tp_dst, helper, nat_action_info_ref, now);
+ aux->flow->tp_dst, helper, nat_action_info_ref,
+ pmd->ctx.now);
break;
}
case OVS_ACTION_ATTR_METER:
dp_netdev_run_meter(pmd->dp, packets_, nl_attr_get_u32(a),
- time_msec());
+ pmd->ctx.now);
break;
case OVS_ACTION_ATTR_PUSH_VLAN:
@@ -5666,10 +5773,9 @@ static void
dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
struct dp_packet_batch *packets,
bool may_steal, const struct flow *flow,
- const struct nlattr *actions, size_t actions_len,
- long long now)
+ const struct nlattr *actions, size_t actions_len)
{
- struct dp_netdev_execute_aux aux = { pmd, now, flow };
+ struct dp_netdev_execute_aux aux = { pmd, flow };
odp_execute_actions(&aux, packets, may_steal, actions,
actions_len, dp_execute_cb);
@@ -6000,9 +6106,8 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
struct polled_queue *poll_list, int poll_cnt)
{
struct dpcls *cls;
- long long int now = time_msec();
- if (now > pmd->rxq_next_cycle_store) {
+ if (pmd->ctx.now > pmd->rxq_next_cycle_store) {
/* Get the cycles that were used to process each queue and store. */
for (unsigned i = 0; i < poll_cnt; i++) {
uint64_t rxq_cyc_curr = dp_netdev_rxq_get_cycles(poll_list[i].rxq,
@@ -6012,10 +6117,10 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
0);
}
/* Start new measuring interval */
- pmd->rxq_next_cycle_store = now + PMD_RXQ_INTERVAL_LEN;
+ pmd->rxq_next_cycle_store = pmd->ctx.now + PMD_RXQ_INTERVAL_LEN;
}
- if (now > pmd->next_optimization) {
+ if (pmd->ctx.now > pmd->next_optimization) {
/* Try to obtain the flow lock to block out revalidator threads.
* If not possible, just try next time. */
if (!ovs_mutex_trylock(&pmd->flow_mutex)) {
@@ -6025,7 +6130,8 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
}
ovs_mutex_unlock(&pmd->flow_mutex);
/* Start new measuring interval */
- pmd->next_optimization = now + DPCLS_OPTIMIZATION_INTERVAL;
+ pmd->next_optimization = pmd->ctx.now
+ + DPCLS_OPTIMIZATION_INTERVAL;
}
}
}
diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
index 65c5674a5..05974c100 100644
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -680,7 +680,7 @@ netdev_bsd_rxq_drain(struct netdev_rxq *rxq_)
*/
static int
netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
- struct dp_packet_batch *batch, bool may_steal,
+ struct dp_packet_batch *batch,
bool concurrent_txq OVS_UNUSED)
{
struct netdev_bsd *dev = netdev_bsd_cast(netdev_);
@@ -697,7 +697,7 @@ netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
for (i = 0; i < batch->count; i++) {
const void *data = dp_packet_data(batch->packets[i]);
- size_t size = dp_packet_get_send_len(batch->packets[i]);
+ size_t size = dp_packet_size(batch->packets[i]);
while (!error) {
ssize_t retval;
@@ -728,7 +728,7 @@ netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
}
ovs_mutex_unlock(&dev->mutex);
- dp_packet_delete_batch(batch, may_steal);
+ dp_packet_delete_batch(batch, true);
return error;
}
diff --git a/lib/netdev-dpdk-unixctl.man b/lib/netdev-dpdk-unixctl.man
new file mode 100644
index 000000000..ac274cde8
--- /dev/null
+++ b/lib/netdev-dpdk-unixctl.man
@@ -0,0 +1,14 @@
+.SS "NETDEV-DPDK COMMANDS"
+These commands manage DPDK related ports (\fBtype=\fR\fIdpdk*\fR).
+.IP "\fBnetdev-dpdk/set-admin-state\fR [\fIinterface\fR] \fBup\fR | \fBdown\fR"
+Change the admin state for DPDK \fIinterface\fR to \fBup\fR or \fBdown\fR.
+If \fIinterface\fR is not specified, then it applies to all DPDK ports.
+.IP "\fBnetdev-dpdk/detach\fR \fIpci-address\fR"
+Detaches device with corresponding \fIpci-address\fR from DPDK. This command
+can be used to detach device if it wasn't detached automatically after port
+deletion. Refer to the documentation for details and instructions.
+.IP "\fBnetdev-dpdk/get-mempool-info\fR [\fIinterface\fR]"
+Prints the debug information about memory pool used by DPDK \fIinterface\fR.
+If called without arguments, information of all the available mempools will
+be printed. For additional mempool statistics enable
+\fBCONFIG_RTE_LIBRTE_MEMPOOL_DEBUG\fR while building DPDK.
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 8f22264b3..364f545c4 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1843,8 +1843,6 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
dropped += batch_cnt - cnt;
}
- dp_packet_batch_apply_cutlen(batch);
-
uint32_t txcnt = 0;
for (uint32_t i = 0; i < cnt; i++) {
@@ -1892,14 +1890,13 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
static int
netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
struct dp_packet_batch *batch,
- bool may_steal, bool concurrent_txq OVS_UNUSED)
+ bool concurrent_txq OVS_UNUSED)
{
- if (OVS_UNLIKELY(!may_steal || batch->packets[0]->source != DPBUF_DPDK)) {
+ if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
dpdk_do_tx_copy(netdev, qid, batch);
- dp_packet_delete_batch(batch, may_steal);
+ dp_packet_delete_batch(batch, true);
} else {
- dp_packet_batch_apply_cutlen(batch);
__netdev_dpdk_vhost_send(netdev, qid, batch->packets, batch->count);
}
return 0;
@@ -1907,11 +1904,11 @@ netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
static inline void
netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
- struct dp_packet_batch *batch, bool may_steal,
+ struct dp_packet_batch *batch,
bool concurrent_txq)
{
if (OVS_UNLIKELY(!(dev->flags & NETDEV_UP))) {
- dp_packet_delete_batch(batch, may_steal);
+ dp_packet_delete_batch(batch, true);
return;
}
@@ -1920,19 +1917,16 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
}
- if (OVS_UNLIKELY(!may_steal ||
- batch->packets[0]->source != DPBUF_DPDK)) {
+ if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
struct netdev *netdev = &dev->up;
dpdk_do_tx_copy(netdev, qid, batch);
- dp_packet_delete_batch(batch, may_steal);
+ dp_packet_delete_batch(batch, true);
} else {
int tx_cnt, dropped;
int batch_cnt = dp_packet_batch_size(batch);
struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets;
- dp_packet_batch_apply_cutlen(batch);
-
tx_cnt = netdev_dpdk_filter_packet_len(dev, pkts, batch_cnt);
tx_cnt = netdev_dpdk_qos_run(dev, pkts, tx_cnt, true);
dropped = batch_cnt - tx_cnt;
@@ -1953,12 +1947,11 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
static int
netdev_dpdk_eth_send(struct netdev *netdev, int qid,
- struct dp_packet_batch *batch, bool may_steal,
- bool concurrent_txq)
+ struct dp_packet_batch *batch, bool concurrent_txq)
{
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
- netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
+ netdev_dpdk_send__(dev, qid, batch, concurrent_txq);
return 0;
}
@@ -2586,6 +2579,56 @@ error:
free(response);
}
+static void
+netdev_dpdk_get_mempool_info(struct unixctl_conn *conn,
+ int argc, const char *argv[],
+ void *aux OVS_UNUSED)
+{
+ size_t size;
+ FILE *stream;
+ char *response = NULL;
+ struct netdev *netdev = NULL;
+
+ if (argc == 2) {
+ netdev = netdev_from_name(argv[1]);
+ if (!netdev || !is_dpdk_class(netdev->netdev_class)) {
+ unixctl_command_reply_error(conn, "Not a DPDK Interface");
+ goto out;
+ }
+ }
+
+ stream = open_memstream(&response, &size);
+ if (!stream) {
+ response = xasprintf("Unable to open memstream: %s.",
+ ovs_strerror(errno));
+ unixctl_command_reply_error(conn, response);
+ goto out;
+ }
+
+ if (netdev) {
+ struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+
+ ovs_mutex_lock(&dev->mutex);
+ ovs_mutex_lock(&dpdk_mp_mutex);
+
+ rte_mempool_dump(stream, dev->mp);
+
+ ovs_mutex_unlock(&dpdk_mp_mutex);
+ ovs_mutex_unlock(&dev->mutex);
+ } else {
+ ovs_mutex_lock(&dpdk_mp_mutex);
+ rte_mempool_list_dump(stream);
+ ovs_mutex_unlock(&dpdk_mp_mutex);
+ }
+
+ fclose(stream);
+
+ unixctl_command_reply(conn, response);
+out:
+ free(response);
+ netdev_close(netdev);
+}
+
/*
* Set virtqueue flags so that we do not receive interrupts.
*/
@@ -2842,6 +2885,10 @@ netdev_dpdk_class_init(void)
"pci address of device", 1, 1,
netdev_dpdk_detach, NULL);
+ unixctl_command_register("netdev-dpdk/get-mempool-info",
+ "[netdev]", 0, 1,
+ netdev_dpdk_get_mempool_info, NULL);
+
ovsthread_once_done(&once);
}
@@ -2933,8 +2980,7 @@ dpdk_ring_open(const char dev_name[], dpdk_port_t *eth_port_id)
static int
netdev_dpdk_ring_send(struct netdev *netdev, int qid,
- struct dp_packet_batch *batch, bool may_steal,
- bool concurrent_txq)
+ struct dp_packet_batch *batch, bool concurrent_txq)
{
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
struct dp_packet *packet;
@@ -2947,7 +2993,7 @@ netdev_dpdk_ring_send(struct netdev *netdev, int qid,
dp_packet_mbuf_rss_flag_reset(packet);
}
- netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
+ netdev_dpdk_send__(dev, qid, batch, concurrent_txq);
return 0;
}
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index 246cdf1a4..40086a377 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -1062,7 +1062,7 @@ netdev_dummy_rxq_drain(struct netdev_rxq *rxq_)
static int
netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED,
- struct dp_packet_batch *batch, bool may_steal,
+ struct dp_packet_batch *batch,
bool concurrent_txq OVS_UNUSED)
{
struct netdev_dummy *dev = netdev_dummy_cast(netdev);
@@ -1071,7 +1071,7 @@ netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED,
struct dp_packet *packet;
DP_PACKET_BATCH_FOR_EACH(packet, batch) {
const void *buffer = dp_packet_data(packet);
- size_t size = dp_packet_get_send_len(packet);
+ size_t size = dp_packet_size(packet);
if (batch->packets[i]->packet_type != htonl(PT_ETH)) {
error = EPFNOSUPPORT;
@@ -1132,7 +1132,7 @@ netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED,
ovs_mutex_unlock(&dev->mutex);
}
- dp_packet_delete_batch(batch, may_steal);
+ dp_packet_delete_batch(batch, true);
return error;
}
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 40b089094..ccb6def6c 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1198,7 +1198,7 @@ netdev_linux_sock_batch_send(int sock, int ifindex,
struct dp_packet *packet;
DP_PACKET_BATCH_FOR_EACH (packet, batch) {
iov[i].iov_base = dp_packet_data(packet);
- iov[i].iov_len = dp_packet_get_send_len(packet);
+ iov[i].iov_len = dp_packet_size(packet);
mmsg[i].msg_hdr = (struct msghdr) { .msg_name = &sll,
.msg_namelen = sizeof sll,
.msg_iov = &iov[i],
@@ -1235,7 +1235,7 @@ netdev_linux_tap_batch_send(struct netdev *netdev_,
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
struct dp_packet *packet;
DP_PACKET_BATCH_FOR_EACH (packet, batch) {
- size_t size = dp_packet_get_send_len(packet);
+ size_t size = dp_packet_size(packet);
ssize_t retval;
int error;
@@ -1270,7 +1270,7 @@ netdev_linux_tap_batch_send(struct netdev *netdev_,
* expected to do additional queuing of packets. */
static int
netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
- struct dp_packet_batch *batch, bool may_steal,
+ struct dp_packet_batch *batch,
bool concurrent_txq OVS_UNUSED)
{
int error = 0;
@@ -1306,7 +1306,7 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
}
free_batch:
- dp_packet_delete_batch(batch, may_steal);
+ dp_packet_delete_batch(batch, true);
return error;
}
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index 1720deb53..3d9f3365d 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -348,9 +348,8 @@ struct netdev_class {
* If the function returns a non-zero value, some of the packets might have
* been sent anyway.
*
- * If 'may_steal' is false, the caller retains ownership of all the
- * packets. If 'may_steal' is true, the caller transfers ownership of all
- * the packets to the network device, regardless of success.
+ * The caller transfers ownership of all the packets to the network
+ * device, regardless of success.
*
* If 'concurrent_txq' is true, the caller may perform concurrent calls
* to netdev_send() with the same 'qid'. The netdev provider is responsible
@@ -370,7 +369,7 @@ struct netdev_class {
* datapath". It will also prevent the OVS implementation of bonding from
* working properly over 'netdev'.) */
int (*send)(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
- bool may_steal, bool concurrent_txq);
+ bool concurrent_txq);
/* Registers with the poll loop to wake up from the next call to
* poll_block() when the packet transmission queue for 'netdev' has
diff --git a/lib/netdev.c b/lib/netdev.c
index cd11281a5..b1b6da4a5 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -771,9 +771,8 @@ netdev_get_pt_mode(const struct netdev *netdev)
* If the function returns a non-zero value, some of the packets might have
* been sent anyway.
*
- * If 'may_steal' is false, the caller retains ownership of all the packets.
- * If 'may_steal' is true, the caller transfers ownership of all the packets
- * to the network device, regardless of success.
+ * The caller transfers ownership of all the packets to the network device,
+ * regardless of success.
*
* If 'concurrent_txq' is true, the caller may perform concurrent calls
* to netdev_send() with the same 'qid'. The netdev provider is responsible
@@ -787,15 +786,12 @@ netdev_get_pt_mode(const struct netdev *netdev)
* queues. */
int
netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
- bool may_steal, bool concurrent_txq)
+ bool concurrent_txq)
{
- int error = netdev->netdev_class->send(netdev, qid, batch, may_steal,
+ int error = netdev->netdev_class->send(netdev, qid, batch,
concurrent_txq);
if (!error) {
COVERAGE_INC(netdev_sent);
- if (!may_steal) {
- dp_packet_batch_reset_cutlen(batch);
- }
}
return error;
}
diff --git a/lib/netdev.h b/lib/netdev.h
index 3a545fe06..dc9d9a0f5 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -181,7 +181,7 @@ int netdev_rxq_drain(struct netdev_rxq *);
/* Packet transmission. */
int netdev_send(struct netdev *, int qid, struct dp_packet_batch *,
- bool may_steal, bool concurrent_txq);
+ bool concurrent_txq);
void netdev_send_wait(struct netdev *, int qid);
/* Flow offloading. */
diff --git a/manpages.mk b/manpages.mk
index 7d6a507e0..351155f34 100644
--- a/manpages.mk
+++ b/manpages.mk
@@ -255,6 +255,7 @@ vswitchd/ovs-vswitchd.8: \
lib/daemon.man \
lib/dpctl.man \
lib/memory-unixctl.man \
+ lib/netdev-dpdk-unixctl.man \
lib/service.man \
lib/ssl-bootstrap.man \
lib/ssl.man \
@@ -270,6 +271,7 @@ lib/coverage-unixctl.man:
lib/daemon.man:
lib/dpctl.man:
lib/memory-unixctl.man:
+lib/netdev-dpdk-unixctl.man:
lib/service.man:
lib/ssl-bootstrap.man:
lib/ssl.man:
diff --git a/vswitchd/ovs-vswitchd.8.in b/vswitchd/ovs-vswitchd.8.in
index 478acc52f..80e5f5327 100644
--- a/vswitchd/ovs-vswitchd.8.in
+++ b/vswitchd/ovs-vswitchd.8.in
@@ -282,6 +282,7 @@ port names, which this thread polls.
.IP "\fBdpif-netdev/pmd-rxq-rebalance\fR [\fIdp\fR]"
Reassigns rxqs to pmds in the datapath \fIdp\fR based on their current usage.
.
+.so lib/netdev-dpdk-unixctl.man
.so ofproto/ofproto-dpif-unixctl.man
.so ofproto/ofproto-unixctl.man
.so lib/vlog-unixctl.man