summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Horman <horms@verge.net.au>2014-01-15 17:17:02 +0900
committerBen Pfaff <blp@nicira.com>2014-01-16 15:09:14 -0800
commitb73c85181df9cc38231a42d6f8095dcb604d230a (patch)
tree7c3d4f9363ad681e7f001e008cf71a2e661d5725
parent1bf02876a46e3e86a59f959fdac57db7f6b0a4ad (diff)
downloadopenvswitch-b73c85181df9cc38231a42d6f8095dcb604d230a.tar.gz
netdev-linux: Read packet auxdata to obtain vlan_tid
If VLAN acceleration is used when the kernel receives a packet then the outer-most VLAN tag will not be present in the packet when it is received by netdev-linux. Rather, it will be present in auxdata. This patch uses recvmsg() instead of recv() to read auxdata for each packet and if the vlan_tid is set then it is added to the packet. Adding the vlan_tid makes use of headroom available in the buffer parameter of rx_recv. Signed-off-by: Simon Horman <horms@verge.net.au> Co-authored-by: Ben Pfaff <blp@nicira.com> Signed-off-by: Ben Pfaff <blp@nicira.com>
-rw-r--r--include/sparse/sys/socket.h1
-rw-r--r--lib/netdev-linux.c156
-rw-r--r--lib/netdev-provider.h10
-rw-r--r--lib/netdev.c34
4 files changed, 184 insertions, 17 deletions
diff --git a/include/sparse/sys/socket.h b/include/sparse/sys/socket.h
index 75ee43c6c..3212bf4b7 100644
--- a/include/sparse/sys/socket.h
+++ b/include/sparse/sys/socket.h
@@ -87,6 +87,7 @@ enum {
};
enum {
+ SOL_PACKET,
SOL_SOCKET
};
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 106c18a3d..9c1a36db1 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -20,11 +20,11 @@
#include <errno.h>
#include <fcntl.h>
-#include <arpa/inet.h>
#include <inttypes.h>
#include <linux/filter.h>
#include <linux/gen_stats.h>
#include <linux/if_ether.h>
+#include <linux/if_packet.h>
#include <linux/if_tun.h>
#include <linux/types.h>
#include <linux/ethtool.h>
@@ -37,10 +37,8 @@
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
-#include <netpacket/packet.h>
#include <net/if.h>
#include <net/if_arp.h>
-#include <net/if_packet.h>
#include <net/route.h>
#include <netinet/in.h>
#include <poll.h>
@@ -109,6 +107,33 @@ COVERAGE_DEFINE(netdev_set_ethtool);
#define TC_RTAB_SIZE 1024
#endif
+/* Linux 2.6.21 introduced struct tpacket_auxdata.
+ * Linux 2.6.27 added the tp_vlan_tci member.
+ * Linux 3.0 defined TP_STATUS_VLAN_VALID.
+ * Linux 3.13 repurposed a padding member for tp_vlan_tpid and defined
+ * TP_STATUS_VLAN_TPID_VALID.
+ *
+ * With all this churn it's easiest to unconditionally define a replacement
+ * structure that has everything we want.
+ */
+#ifndef TP_STATUS_VLAN_VALID
+#define TP_STATUS_VLAN_VALID (1 << 4)
+#endif
+#ifndef TP_STATUS_VLAN_TPID_VALID
+#define TP_STATUS_VLAN_TPID_VALID (1 << 6)
+#endif
+#undef tpacket_auxdata
+#define tpacket_auxdata rpl_tpacket_auxdata
+struct tpacket_auxdata {
+ uint32_t tp_status;
+ uint32_t tp_len;
+ uint32_t tp_snaplen;
+ uint16_t tp_mac;
+ uint16_t tp_net;
+ uint16_t tp_vlan_tci;
+ uint16_t tp_vlan_tpid;
+};
+
enum {
VALID_IFINDEX = 1 << 0,
VALID_ETHERADDR = 1 << 1,
@@ -763,7 +788,7 @@ netdev_linux_rx_construct(struct netdev_rx *rx_)
rx->fd = netdev->tap_fd;
} else {
struct sockaddr_ll sll;
- int ifindex;
+ int ifindex, val;
/* Result of tcpdump -dd inbound */
static const struct sock_filter filt[] = {
{ 0x28, 0, 0, 0xfffff004 }, /* ldh [0] */
@@ -783,6 +808,14 @@ netdev_linux_rx_construct(struct netdev_rx *rx_)
goto error;
}
+ val = 1;
+ if (setsockopt(rx->fd, SOL_PACKET, PACKET_AUXDATA, &val, sizeof val)) {
+ error = errno;
+ VLOG_ERR("%s: failed to mark socket for auxdata (%s)",
+ netdev_get_name(netdev_), ovs_strerror(error));
+ goto error;
+ }
+
/* Set non-blocking mode. */
error = set_nonblocking(rx->fd);
if (error) {
@@ -799,7 +832,7 @@ netdev_linux_rx_construct(struct netdev_rx *rx_)
memset(&sll, 0, sizeof sll);
sll.sll_family = AF_PACKET;
sll.sll_ifindex = ifindex;
- sll.sll_protocol = (OVS_FORCE unsigned short int) htons(ETH_P_ALL);
+ sll.sll_protocol = htons(ETH_P_ALL);
if (bind(rx->fd, (struct sockaddr *) &sll, sizeof sll) < 0) {
error = errno;
VLOG_ERR("%s: failed to bind raw socket (%s)",
@@ -847,31 +880,120 @@ netdev_linux_rx_dealloc(struct netdev_rx *rx_)
free(rx);
}
+static ovs_be16
+auxdata_to_vlan_tpid(const struct tpacket_auxdata *aux)
+{
+ if (aux->tp_status & TP_STATUS_VLAN_TPID_VALID) {
+ return htons(aux->tp_vlan_tpid);
+ } else {
+ return htons(ETH_TYPE_VLAN);
+ }
+}
+
+static bool
+auxdata_has_vlan_tci(const struct tpacket_auxdata *aux)
+{
+ return aux->tp_vlan_tci || aux->tp_status & TP_STATUS_VLAN_VALID;
+}
+
static int
-netdev_linux_rx_recv(struct netdev_rx *rx_, struct ofpbuf *buffer)
+netdev_linux_rx_recv_sock(int fd, struct ofpbuf *buffer)
{
- struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_);
+ size_t size;
ssize_t retval;
- size_t size = ofpbuf_tailroom(buffer);
+ struct iovec iov;
+ struct cmsghdr *cmsg;
+ union {
+ struct cmsghdr cmsg;
+ char buffer[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
+ } cmsg_buffer;
+ struct msghdr msgh;
+
+ /* Reserve headroom for a single VLAN tag */
+ ofpbuf_reserve(buffer, VLAN_HEADER_LEN);
+ size = ofpbuf_tailroom(buffer);
+
+ iov.iov_base = buffer->data;
+ iov.iov_len = size;
+ msgh.msg_name = NULL;
+ msgh.msg_namelen = 0;
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+ msgh.msg_control = &cmsg_buffer;
+ msgh.msg_controllen = sizeof cmsg_buffer;
+ msgh.msg_flags = 0;
do {
- retval = (rx->is_tap
- ? read(rx->fd, buffer->data, size)
- : recv(rx->fd, buffer->data, size, MSG_TRUNC));
+ retval = recvmsg(fd, &msgh, MSG_TRUNC);
} while (retval < 0 && errno == EINTR);
if (retval < 0) {
- if (errno != EAGAIN) {
- VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s",
- ovs_strerror(errno), netdev_rx_get_name(rx_));
+ return errno;
+ } else if (retval > size) {
+ return EMSGSIZE;
+ }
+
+ buffer->size += retval;
+
+ for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg; cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
+ const struct tpacket_auxdata *aux;
+
+ if (cmsg->cmsg_level != SOL_PACKET
+ || cmsg->cmsg_type != PACKET_AUXDATA
+ || cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata))) {
+ continue;
+ }
+
+ aux = ALIGNED_CAST(struct tpacket_auxdata *, CMSG_DATA(cmsg));
+ if (auxdata_has_vlan_tci(aux)) {
+ if (retval < ETH_HEADER_LEN) {
+ return EINVAL;
+ }
+
+ eth_push_vlan(buffer, auxdata_to_vlan_tpid(aux),
+ htons(aux->tp_vlan_tci));
+ break;
}
+ }
+
+ return 0;
+}
+
+static int
+netdev_linux_rx_recv_tap(int fd, struct ofpbuf *buffer)
+{
+ ssize_t retval;
+ size_t size = ofpbuf_tailroom(buffer);
+
+ do {
+ retval = read(fd, buffer->data, size);
+ } while (retval < 0 && errno == EINTR);
+
+ if (retval < 0) {
return errno;
} else if (retval > size) {
return EMSGSIZE;
- } else {
- buffer->size += retval;
- return 0;
}
+
+ buffer->size += retval;
+ return 0;
+}
+
+static int
+netdev_linux_rx_recv(struct netdev_rx *rx_, struct ofpbuf *buffer)
+{
+ struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_);
+ int retval;
+
+ retval = (rx->is_tap
+ ? netdev_linux_rx_recv_tap(rx->fd, buffer)
+ : netdev_linux_rx_recv_sock(rx->fd, buffer));
+ if (retval && retval != EAGAIN && retval != EMSGSIZE) {
+ VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s",
+ ovs_strerror(errno), netdev_rx_get_name(rx_));
+ }
+
+ return retval;
}
static void
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index 1dcc1f45f..673d3aba7 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -643,6 +643,16 @@ struct netdev_class {
* Must return EMSGSIZE, and discard the packet, if the received packet
* is longer than 'ofpbuf_tailroom(buffer)'.
*
+ * Implementations may make use of VLAN_HEADER_LEN bytes of tailroom to
+ * add a VLAN header which is obtained out-of-band to the packet. If
+ * this occurs then VLAN_HEADER_LEN bytes of tailroom will no longer be
+ * available for the packet, otherwise it may be used for the packet
+ * itself.
+ *
+ * It is advised that the tailroom of 'buffer' should be
+ * VLAN_HEADER_LEN bytes longer than the MTU to allow space for an
+ * out-of-band VLAN header to be added to the packet.
+ *
* This function may be set to null if it would always return EOPNOTSUPP
* anyhow. */
int (*rx_recv)(struct netdev_rx *rx, struct ofpbuf *buffer);
diff --git a/lib/netdev.c b/lib/netdev.c
index 66b411a44..8e6242170 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -500,6 +500,13 @@ netdev_parse_name(const char *netdev_name_, char **name, char **type)
}
}
+/* Attempts to open a netdev_rx handle for obtaining packets received on
+ * 'netdev'. On success, returns 0 and stores a nonnull 'netdev_rx *' into
+ * '*rxp'. On failure, returns a positive errno value and stores NULL into
+ * '*rxp'.
+ *
+ * Some kinds of network devices might not support receiving packets. This
+ * function returns EOPNOTSUPP in that case.*/
int
netdev_rx_open(struct netdev *netdev, struct netdev_rx **rxp)
OVS_EXCLUDED(netdev_mutex)
@@ -531,6 +538,7 @@ netdev_rx_open(struct netdev *netdev, struct netdev_rx **rxp)
return error;
}
+/* Closes 'rx'. */
void
netdev_rx_close(struct netdev_rx *rx)
OVS_EXCLUDED(netdev_mutex)
@@ -543,6 +551,29 @@ netdev_rx_close(struct netdev_rx *rx)
}
}
+/* Attempts to receive a packet from 'rx' into the tailroom of 'buffer', which
+ * must initially be empty. If successful, returns 0 and increments
+ * 'buffer->size' by the number of bytes in the received packet, otherwise a
+ * positive errno value.
+ *
+ * Returns EAGAIN immediately if no packet is ready to be received.
+ *
+ * Returns EMSGSIZE, and discards the packet, if the received packet is longer
+ * than 'ofpbuf_tailroom(buffer)'.
+ *
+ * Implementations may make use of VLAN_HEADER_LEN bytes of tailroom to
+ * add a VLAN header which is obtained out-of-band to the packet. If
+ * this occurs then VLAN_HEADER_LEN bytes of tailroom will no longer be
+ * available for the packet, otherwise it may be used for the packet
+ * itself.
+ *
+ * It is advised that the tailroom of 'buffer' should be
+ * VLAN_HEADER_LEN bytes longer than the MTU to allow space for an
+ * out-of-band VLAN header to be added to the packet. At the very least,
+ * 'buffer' must have at least ETH_TOTAL_MIN bytes of tailroom.
+ *
+ * This function may be set to null if it would always return EOPNOTSUPP
+ * anyhow. */
int
netdev_rx_recv(struct netdev_rx *rx, struct ofpbuf *buffer)
{
@@ -563,12 +594,15 @@ netdev_rx_recv(struct netdev_rx *rx, struct ofpbuf *buffer)
}
}
+/* Arranges for poll_block() to wake up when a packet is ready to be received
+ * on 'rx'. */
void
netdev_rx_wait(struct netdev_rx *rx)
{
rx->netdev->netdev_class->rx_wait(rx);
}
+/* Discards any packets ready to be received on 'rx'. */
int
netdev_rx_drain(struct netdev_rx *rx)
{