summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorIan Stokes <ian.stokes@intel.com>2016-03-02 20:35:54 +0000
committerDaniele Di Proietto <diproiettod@vmware.com>2016-03-02 17:46:27 -0800
commit0bf765f753fd6cfde4918be49474fcd20659ceaa (patch)
tree75441bac1a48afbdcb418b565b82b93566e8a72e /lib
parentd0a46cb4608e632f5028034762f0adde2ce947a0 (diff)
downloadopenvswitch-0bf765f753fd6cfde4918be49474fcd20659ceaa.tar.gz
netdev_dpdk.c: Add QoS functionality.
This patch provides the modifications required in netdev-dpdk.c and vswitch.xml to allow for a DPDK user space QoS algorithm. This patch adds a QoS configuration structure for netdev-dpdk and expected QoS operations 'dpdk_qos_ops'. Various helper functions are also supplied. Also included are the modifications required for vswitch.xml to allow a new QoS implementation for netdev-dpdk devices. This includes a new QoS type `egress-policer` as well as its expected QoS table entries. The QoS functionality implemented for DPDK devices is `egress-policer`. This can be used to drop egress packets at a configurable rate. The INSTALL.DPDK.md guide has also been modified to provide an example configuration of `egress-policer` QoS. Signed-off-by: Ian Stokes <ian.stokes@intel.com> Acked-by: Flavio Leitner <fbl@sysclose.org> Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/netdev-dpdk.c423
1 files changed, 410 insertions, 13 deletions
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index b3518b87d..d21bb4c43 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -45,6 +45,7 @@
#include "ovs-rcu.h"
#include "packets.h"
#include "shash.h"
+#include "smap.h"
#include "sset.h"
#include "unaligned.h"
#include "timeval.h"
@@ -53,6 +54,7 @@
#include "rte_config.h"
#include "rte_mbuf.h"
+#include "rte_meter.h"
#include "rte_virtio_net.h"
VLOG_DEFINE_THIS_MODULE(dpdk);
@@ -145,6 +147,101 @@ static int rte_eal_init_ret = ENODEV;
static struct ovs_mutex dpdk_mutex = OVS_MUTEX_INITIALIZER;
+/* Quality of Service */
+
+/* An instance of a QoS configuration. Always associated with a particular
+ * network device.
+ *
+ * Each QoS implementation subclasses this with whatever additional data it
+ * needs.
+ */
+struct qos_conf {
+ const struct dpdk_qos_ops *ops;
+};
+
+/* A particular implementation of dpdk QoS operations.
+ *
+ * The functions below return 0 if successful or a positive errno value on
+ * failure, except where otherwise noted. All of them must be provided, except
+ * where otherwise noted.
+ */
+struct dpdk_qos_ops {
+
+ /* Name of the QoS type */
+ const char *qos_name;
+
+ /* Called to construct the QoS implementation on 'netdev'. The
+ * implementation should make the appropriate calls to configure QoS
+ * according to 'details'. The implementation may assume that any current
+ * QoS configuration already installed should be destroyed before
+ * constructing the new configuration.
+ *
+ * The contents of 'details' should be documented as valid for 'ovs_name'
+ * in the "other_config" column in the "QoS" table in vswitchd/vswitch.xml
+ * (which is built as ovs-vswitchd.conf.db(8)).
+ *
+ * This function must return 0 if and only if it sets 'netdev->qos_conf'
+ * to an initialized 'struct qos_conf'.
+ *
+ * For all QoS implementations it should always be non-null.
+ */
+ int (*qos_construct)(struct netdev *netdev, const struct smap *details);
+
+ /* Destroys the data structures allocated by the implementation as part of
+ * 'qos_conf.
+ *
+ * For all QoS implementations it should always be non-null.
+ */
+ void (*qos_destruct)(struct netdev *netdev, struct qos_conf *conf);
+
+ /* Retrieves details of 'netdev->qos_conf' configuration into 'details'.
+ *
+ * The contents of 'details' should be documented as valid for 'ovs_name'
+ * in the "other_config" column in the "QoS" table in vswitchd/vswitch.xml
+ * (which is built as ovs-vswitchd.conf.db(8)).
+ */
+ int (*qos_get)(const struct netdev *netdev, struct smap *details);
+
+ /* Reconfigures 'netdev->qos_conf' according to 'details', performing any
+ * required calls to complete the reconfiguration.
+ *
+ * The contents of 'details' should be documented as valid for 'ovs_name'
+ * in the "other_config" column in the "QoS" table in vswitchd/vswitch.xml
+ * (which is built as ovs-vswitchd.conf.db(8)).
+ *
+ * This function may be null if 'qos_conf' is not configurable.
+ */
+ int (*qos_set)(struct netdev *netdev, const struct smap *details);
+
+ /* Modify an array of rte_mbufs. The modification is specific to
+ * each qos implementation.
+ *
+ * The function should take and array of mbufs and an int representing
+ * the current number of mbufs present in the array.
+ *
+ * After the function has performed a qos modification to the array of
+ * mbufs it returns an int representing the number of mbufs now present in
+ * the array. This value is can then be passed to the port send function
+ * along with the modified array for transmission.
+ *
+ * For all QoS implementations it should always be non-null.
+ */
+ int (*qos_run)(struct netdev *netdev, struct rte_mbuf **pkts,
+ int pkt_cnt);
+};
+
+/* dpdk_qos_ops for each type of user space QoS implementation */
+static const struct dpdk_qos_ops egress_policer_ops;
+
+/*
+ * Array of dpdk_qos_ops, contains pointer to all supported QoS
+ * operations.
+ */
+static const struct dpdk_qos_ops *const qos_confs[] = {
+ &egress_policer_ops,
+ NULL
+};
+
/* Contains all 'struct dpdk_dev's. */
static struct ovs_list dpdk_list OVS_GUARDED_BY(dpdk_mutex)
= OVS_LIST_INITIALIZER(&dpdk_list);
@@ -237,6 +334,11 @@ struct netdev_dpdk {
/* In dpdk_list. */
struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
+
+ /* QoS configuration and lock for the device */
+ struct qos_conf *qos_conf;
+ rte_spinlock_t qos_lock;
+
};
struct netdev_rxq_dpdk {
@@ -612,6 +714,10 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no,
goto unlock;
}
+ /* Initialise QoS configuration to NULL and qos lock to unlocked */
+ netdev->qos_conf = NULL;
+ rte_spinlock_init(&netdev->qos_lock);
+
netdev_->n_txq = NR_QUEUE;
netdev_->n_rxq = NR_QUEUE;
netdev_->requested_n_rxq = NR_QUEUE;
@@ -1107,6 +1213,23 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets,
return 0;
}
+static inline int
+netdev_dpdk_qos_run__(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
+ int cnt)
+{
+ struct netdev *netdev = &dev->up;
+
+ if (dev->qos_conf != NULL) {
+ rte_spinlock_lock(&dev->qos_lock);
+ if (dev->qos_conf != NULL) {
+ cnt = dev->qos_conf->ops->qos_run(netdev, pkts, cnt);
+ }
+ rte_spinlock_unlock(&dev->qos_lock);
+ }
+
+ return cnt;
+}
+
static inline void
netdev_dpdk_vhost_update_tx_counters(struct netdev_stats *stats,
struct dp_packet **packets,
@@ -1133,6 +1256,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(vhost_dev);
struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
unsigned int total_pkts = cnt;
+ unsigned int qos_pkts = cnt;
uint64_t start = 0;
qid = vhost_dev->tx_q[qid % vhost_dev->real_n_txq].map;
@@ -1146,6 +1270,10 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
rte_spinlock_lock(&vhost_dev->tx_q[qid].tx_lock);
+ /* Check has QoS has been configured for the netdev */
+ cnt = netdev_dpdk_qos_run__(vhost_dev, cur_pkts, cnt);
+ qos_pkts -= cnt;
+
do {
int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
unsigned int tx_pkts;
@@ -1185,6 +1313,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
rte_spinlock_unlock(&vhost_dev->tx_q[qid].tx_lock);
rte_spinlock_lock(&vhost_dev->stats_lock);
+ cnt += qos_pkts;
netdev_dpdk_vhost_update_tx_counters(&vhost_dev->stats, pkts, total_pkts,
cnt);
rte_spinlock_unlock(&vhost_dev->stats_lock);
@@ -1280,19 +1409,25 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
newcnt++;
}
- if (OVS_UNLIKELY(dropped)) {
- rte_spinlock_lock(&dev->stats_lock);
- dev->stats.tx_dropped += dropped;
- rte_spinlock_unlock(&dev->stats_lock);
- }
-
if (dev->type == DPDK_DEV_VHOST) {
__netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) mbufs, newcnt, true);
} else {
+ unsigned int qos_pkts = newcnt;
+
+ /* Check if QoS has been configured for this netdev. */
+ newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);
+
+ dropped += qos_pkts - newcnt;
dpdk_queue_pkts(dev, qid, mbufs, newcnt);
dpdk_queue_flush(dev, qid);
}
+ if (OVS_UNLIKELY(dropped)) {
+ rte_spinlock_lock(&dev->stats_lock);
+ dev->stats.tx_dropped += dropped;
+ rte_spinlock_unlock(&dev->stats_lock);
+ }
+
if (!dpdk_thread_is_pmd()) {
ovs_mutex_unlock(&nonpmd_mempool_mutex);
}
@@ -1342,15 +1477,24 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
} else {
int next_tx_idx = 0;
int dropped = 0;
+ unsigned int qos_pkts = 0;
+ unsigned int temp_cnt = 0;
for (i = 0; i < cnt; i++) {
int size = dp_packet_size(pkts[i]);
if (OVS_UNLIKELY(size > dev->max_packet_len)) {
if (next_tx_idx != i) {
+ temp_cnt = i - next_tx_idx;
+ qos_pkts = temp_cnt;
+
+ temp_cnt = netdev_dpdk_qos_run__(dev, (struct rte_mbuf**)pkts,
+ temp_cnt);
+ dropped += qos_pkts - temp_cnt;
dpdk_queue_pkts(dev, qid,
(struct rte_mbuf **)&pkts[next_tx_idx],
- i-next_tx_idx);
+ temp_cnt);
+
}
VLOG_WARN_RL(&rl, "Too big size %d max_packet_len %d",
@@ -1362,9 +1506,13 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
}
}
if (next_tx_idx != cnt) {
- dpdk_queue_pkts(dev, qid,
- (struct rte_mbuf **)&pkts[next_tx_idx],
- cnt-next_tx_idx);
+ cnt -= next_tx_idx;
+ qos_pkts = cnt;
+
+ cnt = netdev_dpdk_qos_run__(dev, (struct rte_mbuf**)pkts, cnt);
+ dropped += qos_pkts - cnt;
+ dpdk_queue_pkts(dev, qid, (struct rte_mbuf **)&pkts[next_tx_idx],
+ cnt);
}
if (OVS_UNLIKELY(dropped)) {
@@ -2239,6 +2387,255 @@ unlock_dpdk:
return err;
}
+/* QoS Functions */
+
+/*
+ * Initialize QoS configuration operations.
+ */
+static void
+qos_conf_init(struct qos_conf *conf, const struct dpdk_qos_ops *ops)
+{
+ conf->ops = ops;
+}
+
+/*
+ * Search existing QoS operations in qos_ops and compare each set of
+ * operations qos_name to name. Return a dpdk_qos_ops pointer to a match,
+ * else return NULL
+ */
+static const struct dpdk_qos_ops *
+qos_lookup_name(const char *name)
+{
+ const struct dpdk_qos_ops *const *opsp;
+
+ for (opsp = qos_confs; *opsp != NULL; opsp++) {
+ const struct dpdk_qos_ops *ops = *opsp;
+ if (!strcmp(name, ops->qos_name)) {
+ return ops;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * Call qos_destruct to clean up items associated with the netdevs
+ * qos_conf. Set netdevs qos_conf to NULL.
+ */
+static void
+qos_delete_conf(struct netdev *netdev_)
+{
+ struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
+
+ rte_spinlock_lock(&netdev->qos_lock);
+ if (netdev->qos_conf) {
+ if (netdev->qos_conf->ops->qos_destruct) {
+ netdev->qos_conf->ops->qos_destruct(netdev_, netdev->qos_conf);
+ }
+ netdev->qos_conf = NULL;
+ }
+ rte_spinlock_unlock(&netdev->qos_lock);
+}
+
+static int
+netdev_dpdk_get_qos_types(const struct netdev *netdev OVS_UNUSED,
+ struct sset *types)
+{
+ const struct dpdk_qos_ops *const *opsp;
+
+ for (opsp = qos_confs; *opsp != NULL; opsp++) {
+ const struct dpdk_qos_ops *ops = *opsp;
+ if (ops->qos_construct && ops->qos_name[0] != '\0') {
+ sset_add(types, ops->qos_name);
+ }
+ }
+ return 0;
+}
+
+static int
+netdev_dpdk_get_qos(const struct netdev *netdev_,
+ const char **typep, struct smap *details)
+{
+ struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
+ int error = 0;
+
+ ovs_mutex_lock(&netdev->mutex);
+ if(netdev->qos_conf) {
+ *typep = netdev->qos_conf->ops->qos_name;
+ error = (netdev->qos_conf->ops->qos_get
+ ? netdev->qos_conf->ops->qos_get(netdev_, details): 0);
+ }
+ ovs_mutex_unlock(&netdev->mutex);
+
+ return error;
+}
+
+static int
+netdev_dpdk_set_qos(struct netdev *netdev_,
+ const char *type, const struct smap *details)
+{
+ struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
+ const struct dpdk_qos_ops *new_ops = NULL;
+ int error = 0;
+
+ /* If type is empty or unsupported then the current QoS configuration
+ * for the dpdk-netdev can be destroyed */
+ new_ops = qos_lookup_name(type);
+
+ if (type[0] == '\0' || !new_ops || !new_ops->qos_construct) {
+ qos_delete_conf(netdev_);
+ return EOPNOTSUPP;
+ }
+
+ ovs_mutex_lock(&netdev->mutex);
+
+ if (netdev->qos_conf) {
+ if (new_ops == netdev->qos_conf->ops) {
+ error = new_ops->qos_set ? new_ops->qos_set(netdev_, details) : 0;
+ } else {
+ /* Delete existing QoS configuration. */
+ qos_delete_conf(netdev_);
+ ovs_assert(netdev->qos_conf == NULL);
+
+ /* Install new QoS configuration. */
+ error = new_ops->qos_construct(netdev_, details);
+ ovs_assert((error == 0) == (netdev->qos_conf != NULL));
+ }
+ } else {
+ error = new_ops->qos_construct(netdev_, details);
+ ovs_assert((error == 0) == (netdev->qos_conf != NULL));
+ }
+
+ ovs_mutex_unlock(&netdev->mutex);
+ return error;
+}
+
+/* egress-policer details */
+
+struct egress_policer {
+ struct qos_conf qos_conf;
+ struct rte_meter_srtcm_params app_srtcm_params;
+ struct rte_meter_srtcm egress_meter;
+};
+
+static struct egress_policer *
+egress_policer_get__(const struct netdev *netdev_)
+{
+ struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
+ return CONTAINER_OF(netdev->qos_conf, struct egress_policer, qos_conf);
+}
+
+static int
+egress_policer_qos_construct(struct netdev *netdev_,
+ const struct smap *details)
+{
+ struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
+ struct egress_policer *policer;
+ const char *cir_s;
+ const char *cbs_s;
+ int err = 0;
+
+ rte_spinlock_lock(&netdev->qos_lock);
+ policer = xmalloc(sizeof *policer);
+ qos_conf_init(&policer->qos_conf, &egress_policer_ops);
+ netdev->qos_conf = &policer->qos_conf;
+ cir_s = smap_get(details, "cir");
+ cbs_s = smap_get(details, "cbs");
+ policer->app_srtcm_params.cir = cir_s ? strtoull(cir_s, NULL, 10) : 0;
+ policer->app_srtcm_params.cbs = cbs_s ? strtoull(cbs_s, NULL, 10) : 0;
+ policer->app_srtcm_params.ebs = 0;
+ err = rte_meter_srtcm_config(&policer->egress_meter,
+ &policer->app_srtcm_params);
+ rte_spinlock_unlock(&netdev->qos_lock);
+
+ return err;
+}
+
+static void
+egress_policer_qos_destruct(struct netdev *netdev_ OVS_UNUSED,
+ struct qos_conf *conf)
+{
+ struct egress_policer *policer = CONTAINER_OF(conf, struct egress_policer,
+ qos_conf);
+ free(policer);
+}
+
+static int
+egress_policer_qos_get(const struct netdev *netdev, struct smap *details)
+{
+ struct egress_policer *policer = egress_policer_get__(netdev);
+ smap_add_format(details, "cir", "%llu",
+ 1ULL * policer->app_srtcm_params.cir);
+ smap_add_format(details, "cbs", "%llu",
+ 1ULL * policer->app_srtcm_params.cbs);
+ return 0;
+}
+
+static int
+egress_policer_qos_set(struct netdev *netdev_, const struct smap *details)
+{
+ struct egress_policer *policer;
+ const char *cir_s;
+ const char *cbs_s;
+ int err = 0;
+
+ policer = egress_policer_get__(netdev_);
+ cir_s = smap_get(details, "cir");
+ cbs_s = smap_get(details, "cbs");
+ policer->app_srtcm_params.cir = cir_s ? strtoull(cir_s, NULL, 10) : 0;
+ policer->app_srtcm_params.cbs = cbs_s ? strtoull(cbs_s, NULL, 10) : 0;
+ policer->app_srtcm_params.ebs = 0;
+ err = rte_meter_srtcm_config(&policer->egress_meter,
+ &policer->app_srtcm_params);
+
+ return err;
+}
+
+static inline bool
+egress_policer_pkt_handle__(struct rte_meter_srtcm *meter,
+ struct rte_mbuf *pkt, uint64_t time)
+{
+ uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct ether_hdr);
+
+ return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==
+ e_RTE_METER_GREEN;
+}
+
+static int
+egress_policer_run(struct netdev *netdev_, struct rte_mbuf **pkts,
+ int pkt_cnt)
+{
+ int i = 0;
+ int cnt = 0;
+ struct egress_policer *policer = egress_policer_get__(netdev_);
+ struct rte_mbuf *pkt = NULL;
+ uint64_t current_time = rte_rdtsc();
+
+ for(i = 0; i < pkt_cnt; i++) {
+ pkt = pkts[i];
+ /* Handle current packet */
+ if (egress_policer_pkt_handle__(&policer->egress_meter, pkt,
+ current_time)) {
+ if (cnt != i) {
+ pkts[cnt] = pkt;
+ }
+ cnt++;
+ } else {
+ rte_pktmbuf_free(pkt);
+ }
+ }
+
+ return cnt;
+}
+
+static const struct dpdk_qos_ops egress_policer_ops = {
+ "egress-policer", /* qos_name */
+ egress_policer_qos_construct,
+ egress_policer_qos_destruct,
+ egress_policer_qos_get,
+ egress_policer_qos_set,
+ egress_policer_run
+};
+
#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, MULTIQ, SEND, \
GET_CARRIER, GET_STATS, GET_FEATURES, GET_STATUS, RXQ_RECV) \
{ \
@@ -2276,10 +2673,10 @@ unlock_dpdk:
NULL, /* set_advertisements */ \
\
NULL, /* set_policing */ \
- NULL, /* get_qos_types */ \
+ netdev_dpdk_get_qos_types, \
NULL, /* get_qos_capabilities */ \
- NULL, /* get_qos */ \
- NULL, /* set_qos */ \
+ netdev_dpdk_get_qos, \
+ netdev_dpdk_set_qos, \
NULL, /* get_queue */ \
NULL, /* set_queue */ \
NULL, /* delete_queue */ \