summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/howto/dpdk.rst25
-rw-r--r--NEWS1
-rw-r--r--lib/dp-packet.h31
-rw-r--r--lib/netdev-dpdk.c50
-rw-r--r--lib/netdev-native-tnl.c35
-rw-r--r--lib/netdev.c4
-rw-r--r--vswitchd/vswitch.xml14
7 files changed, 145 insertions, 15 deletions
diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index e96ce5687..db92a8e1b 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -273,6 +273,31 @@ largest frame size supported by Fortville NIC using the DPDK i40e driver, but
larger frames and other DPDK NIC drivers may be supported. These cases are
common for use cases involving East-West traffic only.
+Rx Checksum Offload
+-------------------
+
+By default, DPDK physical ports are enabled with Rx checksum offload. Rx
+checksum offload can be configured on a DPDK physical port either when adding
+or at run time.
+
+To disable Rx checksum offload when adding a DPDK port dpdk0::
+
+ $ ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk \
+ options:rx-checksum-offload=false
+
+Similarly to disable the Rx checksum offloading on a existing DPDK port dpdk0::
+
+ $ ovs-vsctl set Interface dpdk0 type=dpdk options:rx-checksum-offload=false
+
+Rx checksum offload can offer performance improvement only for tunneling
+traffic in OVS-DPDK because the checksum validation of tunnel packets is
+offloaded to the NIC. Also enabling Rx checksum may slightly reduce the
+performance of non-tunnel traffic, specifically for smaller size packet.
+DPDK vectorization is disabled when checksum offloading is configured on DPDK
+physical ports which in turn effects the non-tunnel traffic performance.
+So it is advised to turn off the Rx checksum offload for non-tunnel traffic use
+cases to achieve the best performance.
+
.. _dpdk-ovs-in-guest:
OVS with DPDK Inside VMs
diff --git a/NEWS b/NEWS
index 580a9d7bc..daa9ff519 100644
--- a/NEWS
+++ b/NEWS
@@ -46,6 +46,7 @@ Post-v2.6.0
* New option 'n_rxq_desc' and 'n_txq_desc' fields for DPDK interfaces
which set the number of rx and tx descriptors to use for the given port.
* Support for DPDK v16.11.
+ * Support for rx checksum offload. Refer DPDK HOWTO for details.
- Fedora packaging:
* A package upgrade does not automatically restart OVS service.
- ovs-vswitchd/ovs-vsctl:
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 1469864cf..cf7d247f2 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -598,6 +598,37 @@ dp_packet_rss_invalidate(struct dp_packet *p)
#endif
}
+static inline bool
+dp_packet_ip_checksum_valid(struct dp_packet *p)
+{
+#ifdef DPDK_NETDEV
+ return p->mbuf.ol_flags & PKT_RX_IP_CKSUM_GOOD;
+#else
+ return 0 && p;
+#endif
+}
+
+static inline bool
+dp_packet_l4_checksum_valid(struct dp_packet *p)
+{
+#ifdef DPDK_NETDEV
+ return p->mbuf.ol_flags & PKT_RX_L4_CKSUM_GOOD;
+#else
+ return 0 && p;
+#endif
+}
+
+#ifdef DPDK_NETDEV
+static inline void
+reset_dp_packet_checksum_ol_flags(struct dp_packet *p)
+{
+ p->mbuf.ol_flags &= ~(PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD);
+}
+#else
+#define reset_dp_packet_checksum_ol_flags(arg)
+#endif
+
enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */
struct dp_packet_batch {
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 625f425c2..fc478c4be 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -319,6 +319,10 @@ struct ingress_policer {
rte_spinlock_t policer_lock;
};
+enum dpdk_hw_ol_features {
+ NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0,
+};
+
struct netdev_dpdk {
struct netdev up;
int port_id;
@@ -384,6 +388,10 @@ struct netdev_dpdk {
/* DPDK-ETH Flow control */
struct rte_eth_fc_conf fc_conf;
+
+ /* DPDK-ETH hardware offload features,
+ * from the enum set 'dpdk_hw_ol_features' */
+ uint32_t hw_ol_features;
};
struct netdev_rxq_dpdk {
@@ -633,6 +641,8 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
conf.rxmode.jumbo_frame = 0;
conf.rxmode.max_rx_pkt_len = 0;
}
+ conf.rxmode.hw_ip_checksum = (dev->hw_ol_features &
+ NETDEV_RX_CHECKSUM_OFFLOAD) != 0;
/* A device may report more queues than it makes available (this has
* been observed for Intel xl710, which reserves some of them for
* SRIOV): rte_eth_*_queue_setup will fail if a queue is not
@@ -693,6 +703,29 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
}
static void
+dpdk_eth_checksum_offload_configure(struct netdev_dpdk *dev)
+ OVS_REQUIRES(dev->mutex)
+{
+ struct rte_eth_dev_info info;
+ bool rx_csum_ol_flag = false;
+ uint32_t rx_chksm_offload_capa = DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM |
+ DEV_RX_OFFLOAD_IPV4_CKSUM;
+ rte_eth_dev_info_get(dev->port_id, &info);
+ rx_csum_ol_flag = (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD) != 0;
+
+ if (rx_csum_ol_flag &&
+ (info.rx_offload_capa & rx_chksm_offload_capa) !=
+ rx_chksm_offload_capa) {
+ VLOG_WARN_ONCE("Rx checksum offload is not supported on device %d",
+ dev->port_id);
+ dev->hw_ol_features &= ~NETDEV_RX_CHECKSUM_OFFLOAD;
+ return;
+ }
+ netdev_request_reconfigure(&dev->up);
+}
+
+static void
dpdk_eth_flow_ctrl_setup(struct netdev_dpdk *dev) OVS_REQUIRES(dev->mutex)
{
if (rte_eth_dev_flow_ctrl_set(dev->port_id, &dev->fc_conf)) {
@@ -851,6 +884,9 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
/* Initialize the flow control to NULL */
memset(&dev->fc_conf, 0, sizeof dev->fc_conf);
+
+ /* Initilize the hardware offload flags to 0 */
+ dev->hw_ol_features = 0;
if (type == DPDK_DEV_ETH) {
err = dpdk_eth_dev_init(dev);
if (err) {
@@ -1072,6 +1108,9 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
dev->requested_txq_size);
smap_add_format(args, "configured_txq_descriptors", "%d",
dev->txq_size);
+ if (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD) {
+ smap_add(args, "rx_csum_offload", "true");
+ }
}
ovs_mutex_unlock(&dev->mutex);
@@ -1118,6 +1157,8 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args)
{RTE_FC_NONE, RTE_FC_TX_PAUSE},
{RTE_FC_RX_PAUSE, RTE_FC_FULL }
};
+ bool rx_chksm_ofld;
+ bool temp_flag;
ovs_mutex_lock(&dev->mutex);
@@ -1141,6 +1182,15 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args)
dpdk_eth_flow_ctrl_setup(dev);
}
+ /* Rx checksum offload configuration */
+ /* By default the Rx checksum offload is ON */
+ rx_chksm_ofld = smap_get_bool(args, "rx-checksum-offload", true);
+ temp_flag = (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD)
+ != 0;
+ if (temp_flag != rx_chksm_ofld) {
+ dev->hw_ol_features ^= NETDEV_RX_CHECKSUM_OFFLOAD;
+ dpdk_eth_checksum_offload_configure(dev);
+ }
ovs_mutex_unlock(&dev->mutex);
return 0;
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index ce2582f13..c730e7234 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -85,9 +85,11 @@ netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
ovs_be32 ip_src, ip_dst;
- if (csum(ip, IP_IHL(ip->ip_ihl_ver) * 4)) {
- VLOG_WARN_RL(&err_rl, "ip packet has invalid checksum");
- return NULL;
+ if (OVS_UNLIKELY(!dp_packet_ip_checksum_valid(packet))) {
+ if (csum(ip, IP_IHL(ip->ip_ihl_ver) * 4)) {
+ VLOG_WARN_RL(&err_rl, "ip packet has invalid checksum");
+ return NULL;
+ }
}
if (ntohs(ip->ip_tot_len) > l3_size) {
@@ -179,18 +181,21 @@ udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
}
if (udp->udp_csum) {
- uint32_t csum;
- if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
- csum = packet_csum_pseudoheader6(dp_packet_l3(packet));
- } else {
- csum = packet_csum_pseudoheader(dp_packet_l3(packet));
- }
-
- csum = csum_continue(csum, udp, dp_packet_size(packet) -
- ((const unsigned char *)udp -
- (const unsigned char *)dp_packet_l2(packet)));
- if (csum_finish(csum)) {
- return NULL;
+ if (OVS_UNLIKELY(!dp_packet_l4_checksum_valid(packet))) {
+ uint32_t csum;
+ if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
+ csum = packet_csum_pseudoheader6(dp_packet_l3(packet));
+ } else {
+ csum = packet_csum_pseudoheader(dp_packet_l3(packet));
+ }
+
+ csum = csum_continue(csum, udp, dp_packet_size(packet) -
+ ((const unsigned char *)udp -
+ (const unsigned char *)dp_packet_l2(packet)
+ ));
+ if (csum_finish(csum)) {
+ return NULL;
+ }
}
tnl->flags |= FLOW_TNL_F_CSUM;
}
diff --git a/lib/netdev.c b/lib/netdev.c
index 860781ded..f7a1001f2 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -741,6 +741,10 @@ netdev_pop_header(struct netdev *netdev, struct dp_packet_batch *batch)
for (i = 0; i < batch->count; i++) {
buffers[i] = netdev->netdev_class->pop_header(buffers[i]);
if (buffers[i]) {
+ /* Reset the checksum offload flags if present, to avoid wrong
+ * interpretation in the further packet processing when
+ * recirculated.*/
+ reset_dp_packet_checksum_ol_flags(buffers[i]);
buffers[n_cnt++] = buffers[i];
}
}
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index b4af5a58e..e73f1849e 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -3186,6 +3186,20 @@
</column>
</group>
+ <group title="Rx Checksum Offload Configuration">
+ <p>
+ The checksum validation on the incoming packets are performed on NIC
+ using Rx checksum offload feature. Implemented only for <code>dpdk
+ </code>physical interfaces.
+ </p>
+
+ <column name="options" key="rx-checksum-offload"
+ type='{"type": "boolean"}'>
+ Set to <code>false</code> to disble Rx checksum offloading on <code>
+ dpdk</code>physical ports. By default, Rx checksum offload is enabled.
+ </column>
+ </group>
+
<group title="Common Columns">
The overall purpose of these columns is described under <code>Common
Columns</code> at the beginning of this document.