summaryrefslogtreecommitdiff
path: root/datapath-windows
diff options
context:
space:
mode:
authorPaul Boca <pboca@cloudbasesolutions.com>2016-06-06 16:45:00 +0000
committerBen Pfaff <blp@ovn.org>2016-06-07 10:52:05 -0700
commitc3e85147d9067c9c19451ad36505bcf70eb470b9 (patch)
tree4cae12c3d7784a86e8b4deebff2b52636fddff8a /datapath-windows
parentab7fc30e13b4f07acb659b6801d4f98ddba2f1a6 (diff)
downloadopenvswitch-c3e85147d9067c9c19451ad36505bcf70eb470b9.tar.gz
datapath-windows: Improved offloading on STT tunnel
*Added OvsExtractLayers - populates only the layers field without unnecessary memory operations for flow part *If in STT header the flags are 0 then force packets checksums calculation on receive. *Ensure correct pseudo checksum is set for LSO both on send and receive. Linux includes the segment length to TCP pseudo-checksum conforming to RFC 793 but in case of LSO Windows expects this to be only on Source IP Address, Destination IP Address, and Protocol. *Fragment expiration on rx side of STT was set to 30 seconds, but the correct timeout would be TTL of the packet Signed-off-by: Paul-Daniel Boca <pboca@cloudbasesolutions.com> Acked-by: Sairam Venugopal <vsairam@vmware.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
Diffstat (limited to 'datapath-windows')
-rw-r--r--datapath-windows/ovsext/Flow.c243
-rw-r--r--datapath-windows/ovsext/Flow.h2
-rw-r--r--datapath-windows/ovsext/PacketParser.c97
-rw-r--r--datapath-windows/ovsext/PacketParser.h8
-rw-r--r--datapath-windows/ovsext/Stt.c124
-rw-r--r--datapath-windows/ovsext/User.c17
6 files changed, 377 insertions, 114 deletions
diff --git a/datapath-windows/ovsext/Flow.c b/datapath-windows/ovsext/Flow.c
index c2e02277a..2a918551a 100644
--- a/datapath-windows/ovsext/Flow.c
+++ b/datapath-windows/ovsext/Flow.c
@@ -1570,7 +1570,8 @@ _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs,
ndKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_ND]);
RtlCopyMemory(&icmp6FlowPutKey->ndTarget,
- ndKey->nd_target, sizeof (icmp6FlowPutKey->ndTarget));
+ ndKey->nd_target,
+ sizeof (icmp6FlowPutKey->ndTarget));
RtlCopyMemory(icmp6FlowPutKey->arpSha,
ndKey->nd_sll, ETH_ADDR_LEN);
RtlCopyMemory(icmp6FlowPutKey->arpTha,
@@ -1600,8 +1601,10 @@ _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs,
arpFlowPutKey->nwSrc = arpKey->arp_sip;
arpFlowPutKey->nwDst = arpKey->arp_tip;
- RtlCopyMemory(arpFlowPutKey->arpSha, arpKey->arp_sha, ETH_ADDR_LEN);
- RtlCopyMemory(arpFlowPutKey->arpTha, arpKey->arp_tha, ETH_ADDR_LEN);
+ RtlCopyMemory(arpFlowPutKey->arpSha, arpKey->arp_sha,
+ ETH_ADDR_LEN);
+ RtlCopyMemory(arpFlowPutKey->arpTha, arpKey->arp_tha,
+ ETH_ADDR_LEN);
/* Kernel datapath assumes 'arpFlowPutKey->nwProto' to be in host
* order. */
arpFlowPutKey->nwProto = (UINT8)ntohs((arpKey->arp_op));
@@ -1850,29 +1853,195 @@ OvsGetFlowMetadata(OvsFlowKey *key,
return status;
}
+
/*
- *----------------------------------------------------------------------------
- * Initializes 'flow' members from 'packet', 'skb_priority', 'tun_id', and
- * 'ofp_in_port'.
- *
- * Initializes 'packet' header pointers as follows:
- *
- * - packet->l2 to the start of the Ethernet header.
- *
- * - packet->l3 to just past the Ethernet header, or just past the
- * vlan_header if one is present, to the first byte of the payload of the
- * Ethernet frame.
- *
- * - packet->l4 to just past the IPv4 header, if one is present and has a
- * correct length, and otherwise NULL.
- *
- * - packet->l7 to just past the TCP, UDP, SCTP or ICMP header, if one is
- * present and has a correct length, and otherwise NULL.
- *
- * Returns NDIS_STATUS_SUCCESS normally. Fails only if packet data cannot be accessed
- * (e.g. if Pkt_CopyBytesOut() returns an error).
- *----------------------------------------------------------------------------
- */
+*----------------------------------------------------------------------------
+* Initializes 'layers' members from 'packet'
+*
+* Initializes 'layers' header pointers as follows:
+*
+* - layers->l2 to the start of the Ethernet header.
+*
+* - layers->l3 to just past the Ethernet header, or just past the
+* vlan_header if one is present, to the first byte of the payload of the
+* Ethernet frame.
+*
+* - layers->l4 to just past the IPv4 header, if one is present and has a
+* correct length, and otherwise NULL.
+*
+* - layers->l7 to just past the TCP, UDP, SCTP or ICMP header, if one is
+* present and has a correct length, and otherwise NULL.
+*
+* - layers->isIPv4/isIPv6/isTcp/isUdp/isSctp based on the packet type
+*
+* Returns NDIS_STATUS_SUCCESS normally.
+* Fails only if packet data cannot be accessed.
+* (e.g. if OvsParseIPv6() returns an error).
+*----------------------------------------------------------------------------
+*/
+NDIS_STATUS
+OvsExtractLayers(const NET_BUFFER_LIST *packet,
+ POVS_PACKET_HDR_INFO layers)
+{
+ struct Eth_Header *eth;
+ UINT8 offset = 0;
+ PVOID vlanTagValue;
+ ovs_be16 dlType;
+
+ layers->value = 0;
+
+ /* Link layer. */
+ eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet);
+
+ /*
+ * vlan_tci.
+ */
+ vlanTagValue = NET_BUFFER_LIST_INFO(packet, Ieee8021QNetBufferListInfo);
+ if (!vlanTagValue) {
+ if (eth->dix.typeNBO == ETH_TYPE_802_1PQ_NBO) {
+ offset = sizeof(Eth_802_1pq_Tag);
+ }
+
+ /*
+ * XXX Please note after this point, src mac and dst mac should
+ * not be accessed through eth
+ */
+ eth = (Eth_Header *)((UINT8 *)eth + offset);
+ }
+
+ /*
+ * dl_type.
+ *
+ * XXX assume that at least the first
+ * 12 bytes of received packets are mapped. This code has the stronger
+ * assumption that at least the first 22 bytes of 'packet' is mapped (if my
+ * arithmetic is right).
+ */
+ if (ETH_TYPENOT8023(eth->dix.typeNBO)) {
+ dlType = eth->dix.typeNBO;
+ layers->l3Offset = ETH_HEADER_LEN_DIX + offset;
+ } else if (OvsPacketLenNBL(packet) >= ETH_HEADER_LEN_802_3 &&
+ eth->e802_3.llc.dsap == 0xaa &&
+ eth->e802_3.llc.ssap == 0xaa &&
+ eth->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME &&
+ eth->e802_3.snap.snapOrg[0] == 0x00 &&
+ eth->e802_3.snap.snapOrg[1] == 0x00 &&
+ eth->e802_3.snap.snapOrg[2] == 0x00) {
+ dlType = eth->e802_3.snap.snapType.typeNBO;
+ layers->l3Offset = ETH_HEADER_LEN_802_3 + offset;
+ } else {
+ dlType = htons(OVSWIN_DL_TYPE_NONE);
+ layers->l3Offset = ETH_HEADER_LEN_DIX + offset;
+ }
+
+ /* Network layer. */
+ if (dlType == htons(ETH_TYPE_IPV4)) {
+ struct IPHdr ip_storage;
+ const struct IPHdr *nh;
+
+ layers->isIPv4 = 1;
+ nh = OvsGetIp(packet, layers->l3Offset, &ip_storage);
+ if (nh) {
+ layers->l4Offset = layers->l3Offset + nh->ihl * 4;
+
+ if (!(nh->frag_off & htons(IP_OFFSET))) {
+ if (nh->protocol == SOCKET_IPPROTO_TCP) {
+ OvsParseTcp(packet, NULL, layers);
+ } else if (nh->protocol == SOCKET_IPPROTO_UDP) {
+ OvsParseUdp(packet, NULL, layers);
+ } else if (nh->protocol == SOCKET_IPPROTO_SCTP) {
+ OvsParseSctp(packet, NULL, layers);
+ } else if (nh->protocol == SOCKET_IPPROTO_ICMP) {
+ ICMPHdr icmpStorage;
+ const ICMPHdr *icmp;
+
+ icmp = OvsGetIcmp(packet, layers->l4Offset, &icmpStorage);
+ if (icmp) {
+ layers->l7Offset = layers->l4Offset + sizeof *icmp;
+ }
+ }
+ }
+ }
+ } else if (dlType == htons(ETH_TYPE_IPV6)) {
+ NDIS_STATUS status;
+ Ipv6Key ipv6Key;
+
+ status = OvsParseIPv6(packet, &ipv6Key, layers);
+ if (status != NDIS_STATUS_SUCCESS) {
+ return status;
+ }
+ layers->isIPv6 = 1;
+
+ if (ipv6Key.nwProto == SOCKET_IPPROTO_TCP) {
+ OvsParseTcp(packet, &(ipv6Key.l4), layers);
+ } else if (ipv6Key.nwProto == SOCKET_IPPROTO_UDP) {
+ OvsParseUdp(packet, &(ipv6Key.l4), layers);
+ } else if (ipv6Key.nwProto == SOCKET_IPPROTO_SCTP) {
+ OvsParseSctp(packet, &ipv6Key.l4, layers);
+ } else if (ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) {
+ Icmp6Key icmp6Key;
+ OvsParseIcmpV6(packet, NULL, &icmp6Key, layers);
+ }
+ } else if (OvsEthertypeIsMpls(dlType)) {
+ MPLSHdr mplsStorage;
+ const MPLSHdr *mpls;
+
+ /*
+ * In the presence of an MPLS label stack the end of the L2
+ * header and the beginning of the L3 header differ.
+ *
+ * A network packet may contain multiple MPLS labels, but we
+ * are only interested in the topmost label stack entry.
+ *
+ * Advance network header to the beginning of the L3 header.
+ * layers->l3Offset corresponds to the end of the L2 header.
+ */
+ for (UINT32 i = 0; i < FLOW_MAX_MPLS_LABELS; i++) {
+ mpls = OvsGetMpls(packet, layers->l3Offset, &mplsStorage);
+ if (!mpls) {
+ break;
+ }
+
+ layers->l3Offset += MPLS_HLEN;
+ layers->l4Offset += MPLS_HLEN;
+
+ if (mpls->lse & htonl(MPLS_BOS_MASK)) {
+ /*
+ * Bottom of Stack bit is set, which means there are no
+ * remaining MPLS labels in the packet.
+ */
+ break;
+ }
+ }
+ }
+
+ return NDIS_STATUS_SUCCESS;
+}
+
+/*
+*----------------------------------------------------------------------------
+* Initializes 'flow' members from 'packet', 'skb_priority', 'tun_id', and
+* 'ofp_in_port'.
+*
+* Initializes 'packet' header pointers as follows:
+*
+* - packet->l2 to the start of the Ethernet header.
+*
+* - packet->l3 to just past the Ethernet header, or just past the
+* vlan_header if one is present, to the first byte of the payload of the
+* Ethernet frame.
+*
+* - packet->l4 to just past the IPv4 header, if one is present and has a
+* correct length, and otherwise NULL.
+*
+* - packet->l7 to just past the TCP, UDP, SCTP or ICMP header, if one is
+* present and has a correct length, and otherwise NULL.
+*
+* Returns NDIS_STATUS_SUCCESS normally.
+* Fails only if packet data cannot be accessed.
+* (e.g. if Pkt_CopyBytesOut() returns an error).
+*----------------------------------------------------------------------------
+*/
NDIS_STATUS
OvsExtractFlow(const NET_BUFFER_LIST *packet,
UINT32 inPort,
@@ -1904,8 +2073,8 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
/* Link layer. */
eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet);
- memcpy(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH);
- memcpy(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH);
+ RtlCopyMemory(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH);
+ RtlCopyMemory(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH);
/*
* vlan_tci.
@@ -1927,8 +2096,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
flow->l2.vlanTci = 0;
}
/*
- * XXX
- * Please note after this point, src mac and dst mac should
+ * XXX Please note after this point, src mac and dst mac should
* not be accessed through eth
*/
eth = (Eth_Header *)((UINT8 *)eth + offset);
@@ -1959,7 +2127,8 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
layers->l3Offset = ETH_HEADER_LEN_DIX + offset;
}
- flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE - flow->l2.offset;
+ flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE
+ - flow->l2.offset;
/* Network layer. */
if (flow->l2.dlType == htons(ETH_TYPE_IPV4)) {
struct IPHdr ip_storage;
@@ -2016,9 +2185,9 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
} else if (flow->l2.dlType == htons(ETH_TYPE_IPV6)) {
NDIS_STATUS status;
flow->l2.keyLen += OVS_IPV6_KEY_SIZE;
- status = OvsParseIPv6(packet, flow, layers);
+ status = OvsParseIPv6(packet, &flow->ipv6Key, layers);
if (status != NDIS_STATUS_SUCCESS) {
- memset(&flow->ipv6Key, 0, sizeof (Ipv6Key));
+ RtlZeroMemory(&flow->ipv6Key, sizeof (Ipv6Key));
return status;
}
layers->isIPv6 = 1;
@@ -2033,7 +2202,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
} else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_SCTP) {
OvsParseSctp(packet, &flow->ipv6Key.l4, layers);
} else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) {
- OvsParseIcmpV6(packet, flow, layers);
+ OvsParseIcmpV6(packet, &flow->ipv6Key, &flow->icmp6Key, layers);
flow->l2.keyLen += (OVS_ICMPV6_KEY_SIZE - OVS_IPV6_KEY_SIZE);
}
} else if (flow->l2.dlType == htons(ETH_TYPE_ARP)) {
@@ -2055,10 +2224,10 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet,
}
if (arpKey->nwProto == ARPOP_REQUEST
|| arpKey->nwProto == ARPOP_REPLY) {
- memcpy(&arpKey->nwSrc, arp->arp_spa, 4);
- memcpy(&arpKey->nwDst, arp->arp_tpa, 4);
- memcpy(arpKey->arpSha, arp->arp_sha, ETH_ADDR_LENGTH);
- memcpy(arpKey->arpTha, arp->arp_tha, ETH_ADDR_LENGTH);
+ RtlCopyMemory(&arpKey->nwSrc, arp->arp_spa, 4);
+ RtlCopyMemory(&arpKey->nwDst, arp->arp_tpa, 4);
+ RtlCopyMemory(arpKey->arpSha, arp->arp_sha, ETH_ADDR_LENGTH);
+ RtlCopyMemory(arpKey->arpTha, arp->arp_tha, ETH_ADDR_LENGTH);
}
}
} else if (OvsEthertypeIsMpls(flow->l2.dlType)) {
diff --git a/datapath-windows/ovsext/Flow.h b/datapath-windows/ovsext/Flow.h
index fb3fb5984..d39db453c 100644
--- a/datapath-windows/ovsext/Flow.h
+++ b/datapath-windows/ovsext/Flow.h
@@ -53,6 +53,8 @@ NDIS_STATUS OvsAllocateFlowTable(OVS_DATAPATH *datapath,
NDIS_STATUS OvsGetFlowMetadata(OvsFlowKey *key,
PNL_ATTR *keyAttrs);
+NDIS_STATUS OvsExtractLayers(const NET_BUFFER_LIST *packet,
+ POVS_PACKET_HDR_INFO layers);
NDIS_STATUS OvsExtractFlow(const NET_BUFFER_LIST *pkt, UINT32 inPort,
OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers,
OvsIPv4TunnelKey *tunKey);
diff --git a/datapath-windows/ovsext/PacketParser.c b/datapath-windows/ovsext/PacketParser.c
index 93df3424d..c4a04d072 100644
--- a/datapath-windows/ovsext/PacketParser.c
+++ b/datapath-windows/ovsext/PacketParser.c
@@ -84,14 +84,13 @@ OvsGetPacketBytes(const NET_BUFFER_LIST *nbl,
NDIS_STATUS
OvsParseIPv6(const NET_BUFFER_LIST *packet,
- OvsFlowKey *key,
+ Ipv6Key *ipv6Key,
POVS_PACKET_HDR_INFO layers)
{
UINT16 ofs = layers->l3Offset;
IPv6Hdr ipv6HdrStorage;
const IPv6Hdr *nh;
UINT32 nextHdr;
- Ipv6Key *flow= &key->ipv6Key;
nh = OvsGetPacketBytes(packet, sizeof *nh, ofs, &ipv6HdrStorage);
if (!nh) {
@@ -99,15 +98,15 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet,
}
nextHdr = nh->nexthdr;
- memcpy(&flow->ipv6Src, nh->saddr.s6_addr, 16);
- memcpy(&flow->ipv6Dst, nh->daddr.s6_addr, 16);
+ RtlCopyMemory(&ipv6Key->ipv6Src, nh->saddr.s6_addr, 16);
+ RtlCopyMemory(&ipv6Key->ipv6Dst, nh->daddr.s6_addr, 16);
- flow->nwTos = ((nh->flow_lbl[0] & 0xF0) >> 4) | (nh->priority << 4);
- flow->ipv6Label =
+ ipv6Key->nwTos = ((nh->flow_lbl[0] & 0xF0) >> 4) | (nh->priority << 4);
+ ipv6Key->ipv6Label =
((nh->flow_lbl[0] & 0x0F) << 16) | (nh->flow_lbl[1] << 8) | nh->flow_lbl[2];
- flow->nwTtl = nh->hop_limit;
- flow->nwProto = SOCKET_IPPROTO_NONE;
- flow->nwFrag = OVS_FRAG_TYPE_NONE;
+ ipv6Key->nwTtl = nh->hop_limit;
+ ipv6Key->nwProto = SOCKET_IPPROTO_NONE;
+ ipv6Key->nwFrag = OVS_FRAG_TYPE_NONE;
// Parse extended headers and compute L4 offset
ofs += sizeof(IPv6Hdr);
@@ -160,9 +159,9 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet,
/* We only process the first fragment. */
if (fragHdr->offlg != htons(0)) {
if ((fragHdr->offlg & IP6F_OFF_HOST_ORDER_MASK) == htons(0)) {
- flow->nwFrag = OVS_FRAG_TYPE_FIRST;
+ ipv6Key->nwFrag = OVS_FRAG_TYPE_FIRST;
} else {
- flow->nwFrag = OVS_FRAG_TYPE_LATER;
+ ipv6Key->nwFrag = OVS_FRAG_TYPE_LATER;
nextHdr = SOCKET_IPPROTO_FRAGMENT;
break;
}
@@ -170,7 +169,7 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet,
}
}
- flow->nwProto = (UINT8)nextHdr;
+ ipv6Key->nwProto = (UINT8)nextHdr;
layers->l4Offset = ofs;
return NDIS_STATUS_SUCCESS;
}
@@ -183,10 +182,14 @@ OvsParseTcp(const NET_BUFFER_LIST *packet,
TCPHdr tcpStorage;
const TCPHdr *tcp = OvsGetTcp(packet, layers->l4Offset, &tcpStorage);
if (tcp) {
- flow->tpSrc = tcp->source;
- flow->tpDst = tcp->dest;
- layers->isTcp = 1;
- layers->l7Offset = layers->l4Offset + 4 * tcp->doff;
+ if (flow) {
+ flow->tpSrc = tcp->source;
+ flow->tpDst = tcp->dest;
+ }
+ if (layers) {
+ layers->isTcp = 1;
+ layers->l7Offset = layers->l4Offset + 4 * tcp->doff;
+ }
}
}
@@ -198,10 +201,14 @@ OvsParseSctp(const NET_BUFFER_LIST *packet,
SCTPHdr sctpStorage;
const SCTPHdr *sctp = OvsGetSctp(packet, layers->l4Offset, &sctpStorage);
if (sctp) {
- flow->tpSrc = sctp->source;
- flow->tpDst = sctp->dest;
- layers->isSctp = 1;
- layers->l7Offset = layers->l4Offset + sizeof *sctp;
+ if (flow) {
+ flow->tpSrc = sctp->source;
+ flow->tpDst = sctp->dest;
+ }
+ if (layers) {
+ layers->isSctp = 1;
+ layers->l7Offset = layers->l4Offset + sizeof *sctp;
+ }
}
}
@@ -213,29 +220,33 @@ OvsParseUdp(const NET_BUFFER_LIST *packet,
UDPHdr udpStorage;
const UDPHdr *udp = OvsGetUdp(packet, layers->l4Offset, &udpStorage);
if (udp) {
- flow->tpSrc = udp->source;
- flow->tpDst = udp->dest;
- layers->isUdp = 1;
- if (udp->check == 0) {
- layers->udpCsumZero = 1;
+ if (flow) {
+ flow->tpSrc = udp->source;
+ flow->tpDst = udp->dest;
+ }
+ if (layers) {
+ layers->isUdp = 1;
+ if (udp->check == 0) {
+ layers->udpCsumZero = 1;
+ }
+ layers->l7Offset = layers->l4Offset + sizeof *udp;
}
- layers->l7Offset = layers->l4Offset + sizeof *udp;
}
}
NDIS_STATUS
OvsParseIcmpV6(const NET_BUFFER_LIST *packet,
- OvsFlowKey *key,
- POVS_PACKET_HDR_INFO layers)
+ Ipv6Key *ipv6Key,
+ Icmp6Key *icmp6Key,
+ POVS_PACKET_HDR_INFO layers)
{
UINT16 ofs = layers->l4Offset;
ICMPHdr icmpStorage;
const ICMPHdr *icmp;
- Icmp6Key *flow = &key->icmp6Key;
- memset(&flow->ndTarget, 0, sizeof(flow->ndTarget));
- memset(flow->arpSha, 0, sizeof(flow->arpSha));
- memset(flow->arpTha, 0, sizeof(flow->arpTha));
+ memset(&icmp6Key->ndTarget, 0, sizeof(icmp6Key->ndTarget));
+ memset(icmp6Key->arpSha, 0, sizeof(icmp6Key->arpSha));
+ memset(icmp6Key->arpTha, 0, sizeof(icmp6Key->arpTha));
icmp = OvsGetIcmp(packet, ofs, &icmpStorage);
if (!icmp) {
@@ -247,8 +258,10 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet,
* The ICMPv6 type and code fields use the 16-bit transport port
* fields, so we need to store them in 16-bit network byte order.
*/
- key->ipv6Key.l4.tpSrc = htons(icmp->type);
- key->ipv6Key.l4.tpDst = htons(icmp->code);
+ if (ipv6Key) {
+ ipv6Key->l4.tpSrc = htons(icmp->type);
+ ipv6Key->l4.tpDst = htons(icmp->code);
+ }
if (icmp->code == 0 &&
(icmp->type == ND_NEIGHBOR_SOLICIT ||
@@ -261,7 +274,7 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet,
if (!ndTarget) {
return NDIS_STATUS_FAILURE;
}
- flow->ndTarget = *ndTarget;
+ icmp6Key->ndTarget = *ndTarget;
while ((UINT32)(ofs + 8) <= OvsPacketLenNBL(packet)) {
/*
@@ -288,14 +301,14 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet,
* layer option is specified twice.
*/
if (ndOpt->type == ND_OPT_SOURCE_LINKADDR && optLen == 8) {
- if (Eth_IsNullAddr(flow->arpSha)) {
- memcpy(flow->arpSha, ndOpt + 1, ETH_ADDR_LENGTH);
+ if (Eth_IsNullAddr(icmp6Key->arpSha)) {
+ memcpy(icmp6Key->arpSha, ndOpt + 1, ETH_ADDR_LENGTH);
} else {
goto invalid;
}
} else if (ndOpt->type == ND_OPT_TARGET_LINKADDR && optLen == 8) {
- if (Eth_IsNullAddr(flow->arpTha)) {
- memcpy(flow->arpTha, ndOpt + 1, ETH_ADDR_LENGTH);
+ if (Eth_IsNullAddr(icmp6Key->arpTha)) {
+ memcpy(icmp6Key->arpTha, ndOpt + 1, ETH_ADDR_LENGTH);
} else {
goto invalid;
}
@@ -309,9 +322,9 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet,
return NDIS_STATUS_SUCCESS;
invalid:
- memset(&flow->ndTarget, 0, sizeof(flow->ndTarget));
- memset(flow->arpSha, 0, sizeof(flow->arpSha));
- memset(flow->arpTha, 0, sizeof(flow->arpTha));
+ RtlZeroMemory(&icmp6Key->ndTarget, sizeof(icmp6Key->ndTarget));
+ RtlZeroMemory(icmp6Key->arpSha, sizeof(icmp6Key->arpSha));
+ RtlZeroMemory(icmp6Key->arpTha, sizeof(icmp6Key->arpTha));
return NDIS_STATUS_FAILURE;
}
diff --git a/datapath-windows/ovsext/PacketParser.h b/datapath-windows/ovsext/PacketParser.h
index 47d227f59..f1d7f283d 100644
--- a/datapath-windows/ovsext/PacketParser.h
+++ b/datapath-windows/ovsext/PacketParser.h
@@ -22,7 +22,7 @@
const VOID* OvsGetPacketBytes(const NET_BUFFER_LIST *_pNB, UINT32 len,
UINT32 SrcOffset, VOID *storage);
-NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, OvsFlowKey *key,
+NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, Ipv6Key *key,
POVS_PACKET_HDR_INFO layers);
VOID OvsParseTcp(const NET_BUFFER_LIST *packet, L4Key *flow,
POVS_PACKET_HDR_INFO layers);
@@ -30,8 +30,10 @@ VOID OvsParseUdp(const NET_BUFFER_LIST *packet, L4Key *flow,
POVS_PACKET_HDR_INFO layers);
VOID OvsParseSctp(const NET_BUFFER_LIST *packet, L4Key *flow,
POVS_PACKET_HDR_INFO layers);
-NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, OvsFlowKey *key,
- POVS_PACKET_HDR_INFO layers);
+NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet,
+ Ipv6Key *ipv6Key,
+ Icmp6Key *flow,
+ POVS_PACKET_HDR_INFO layers);
static __inline ULONG
OvsPacketLenNBL(const NET_BUFFER_LIST *_pNB)
diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c
index dd7bf9279..c93db7548 100644
--- a/datapath-windows/ovsext/Stt.c
+++ b/datapath-windows/ovsext/Stt.c
@@ -194,7 +194,7 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
if (layers->isIPv4) {
IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset);
if (!ip->tot_len) {
- ip->tot_len = htons(innerFrameLen - sizeof(EthHdr));
+ ip->tot_len = htons(innerFrameLen - layers->l3Offset);
}
if (!ip->check) {
ip->check = IPChecksum((UINT8 *)ip, ip->ihl * 4, 0);
@@ -231,8 +231,8 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
* memory.
*/
curMdl = NET_BUFFER_CURRENT_MDL(curNb);
- ASSERT((int) (MmGetMdlByteCount(curMdl) - NET_BUFFER_CURRENT_MDL_OFFSET(curNb))
- >= (int) headRoom);
+ ASSERT((int) (MmGetMdlByteCount(curMdl) -
+ NET_BUFFER_CURRENT_MDL_OFFSET(curNb)) >= (int) headRoom);
buf = (PUINT8) MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority);
if (!buf) {
@@ -288,12 +288,12 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
/* Calculate pseudo header chksum */
tcpChksumLen = sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen;
ASSERT(tcpChksumLen < 65535);
- outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr,(uint32 *) &tunKey->dst,
- IPPROTO_TCP, (uint16) tcpChksumLen);
sttHdr->version = 0;
/* Set STT Header */
sttHdr->flags = 0;
+ sttHdr->mss = 0;
+ sttHdr->l4Offset = 0;
if (innerPartialChecksum) {
sttHdr->flags |= STT_CSUM_PARTIAL;
if (layers->isIPv4) {
@@ -327,8 +327,22 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
NET_BUFFER_LIST_INFO(curNbl,
TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
- UINT32 encapMss = OvsGetExternalMtu(switchContext) - sizeof(IPHdr) - sizeof(TCPHdr);
+ UINT32 encapMss = OvsGetExternalMtu(switchContext)
+ - sizeof(IPHdr)
+ - sizeof(TCPHdr);
if (ipTotalLen > encapMss) {
+ /* For Windows LSO, the TCP pseudo checksum must contain Source IP
+ * Address, Destination IP Address, and Protocol; the length of the
+ * payload is excluded because the underlying miniport driver and NIC
+ * generate TCP segments from the large packet that is passed down by
+ * the TCP/IP transport, the transport does not know the size of the
+ * TCP payload for each TCP segment and therefore cannot include the
+ * TCP Length in the pseudo-header.
+ */
+ outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr,
+ (uint32 *) &tunKey->dst,
+ IPPROTO_TCP, (uint16) 0);
+
lsoInfo.Value = 0;
lsoInfo.LsoV2Transmit.TcpHeaderOffset = tcpHeaderOffset;
lsoInfo.LsoV2Transmit.MSS = encapMss;
@@ -336,6 +350,11 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport,
lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4;
NET_BUFFER_LIST_INFO(curNbl,
TcpLargeSendNetBufferListInfo) = lsoInfo.Value;
+ } else {
+ outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr,
+ (uint32 *) &tunKey->dst,
+ IPPROTO_TCP,
+ (uint16) tcpChksumLen);
}
return STATUS_SUCCESS;
@@ -655,7 +674,8 @@ handle_error:
if (lastPacket) {
/* Retrieve the original STT header */
NdisMoveMemory(newSttHdr, &pktFragEntry->sttHdr, sizeof (SttHdr));
- targetPNbl = OvsAllocateNBLFromBuffer(switchContext, pktFragEntry->packetBuf,
+ targetPNbl = OvsAllocateNBLFromBuffer(switchContext,
+ pktFragEntry->packetBuf,
innerPacketLen);
/* Delete this entry and free up the memory/ */
@@ -668,16 +688,32 @@ handle_error:
return lastPacket ? targetPNbl : NULL;
}
-VOID
-OvsDecapSetOffloads(PNET_BUFFER_LIST curNbl, SttHdr *sttHdr)
+
+/*
+*----------------------------------------------------------------------------
+* OvsDecapSetOffloads
+* Processes received STT header and sets TcpIpChecksumNetBufferListInfo
+* accordingly.
+* For TCP packets with total length bigger than destination MSS it
+* populates TcpLargeSendNetBufferListInfo.
+*
+* Returns NDIS_STATUS_SUCCESS normally.
+* Fails only if packet data is invalid.
+* (e.g. if OvsExtractLayers() returns an error).
+*----------------------------------------------------------------------------
+*/
+NDIS_STATUS
+OvsDecapSetOffloads(PNET_BUFFER_LIST *curNbl, SttHdr *sttHdr)
{
if ((sttHdr->flags & STT_CSUM_VERIFIED)
|| !(sttHdr->flags & STT_CSUM_PARTIAL)) {
- return;
+ return NDIS_STATUS_SUCCESS;
}
- UINT8 protoType;
+ NDIS_STATUS status;
NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
+ UINT8 protoType;
+
csumInfo.Value = 0;
csumInfo.Transmit.IpHeaderChecksum = 0;
csumInfo.Transmit.TcpHeaderOffset = sttHdr->l4Offset;
@@ -703,25 +739,66 @@ OvsDecapSetOffloads(PNET_BUFFER_LIST curNbl, SttHdr *sttHdr)
csumInfo.Transmit.IsIPv6 = 1;
csumInfo.Transmit.UdpChecksum = 1;
}
- NET_BUFFER_LIST_INFO(curNbl,
+ NET_BUFFER_LIST_INFO(*curNbl,
TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
- if (sttHdr->mss) {
+ if (sttHdr->mss && (sttHdr->flags & STT_PROTO_TCP)) {
NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
+ PMDL curMdl = NULL;
+ PNET_BUFFER curNb;
+ PUINT8 buf = NULL;
+ OVS_PACKET_HDR_INFO layers;
+
+ status = OvsExtractLayers(*curNbl, &layers);
+ if (status != NDIS_STATUS_SUCCESS) {
+ return status;
+ }
+
+ curNb = NET_BUFFER_LIST_FIRST_NB(*curNbl);
+ curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+
+ buf = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
+ LowPagePriority);
+ buf += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+
+ // apply pseudo checksum on extracted packet
+ if (sttHdr->flags & STT_PROTO_IPV4) {
+ IPHdr *ipHdr;
+ TCPHdr *tcpHdr;
+
+ ipHdr = (IPHdr *)(buf + layers.l3Offset);
+ tcpHdr = (TCPHdr *)(buf + layers.l4Offset);
+
+ tcpHdr->check = IPPseudoChecksum(&ipHdr->saddr,
+ (uint32 *)&ipHdr->daddr,
+ IPPROTO_TCP, 0);
+ } else {
+ IPv6Hdr *ipHdr;
+ TCPHdr *tcpHdr;
+
+ ipHdr = (IPv6Hdr *)(buf + layers.l3Offset);
+ tcpHdr = (TCPHdr *)(buf + layers.l4Offset);
+
+ tcpHdr->check = IPv6PseudoChecksum((UINT32*)&ipHdr->saddr,
+ (UINT32*)&ipHdr->daddr,
+ IPPROTO_TCP, 0);
+ }
+
+ // setup LSO
lsoInfo.Value = 0;
lsoInfo.LsoV2Transmit.TcpHeaderOffset = sttHdr->l4Offset;
- lsoInfo.LsoV2Transmit.MSS = ETH_DEFAULT_MTU
- - sizeof(IPHdr)
- - sizeof(TCPHdr);
+ lsoInfo.LsoV2Transmit.MSS = ntohs(sttHdr->mss);
lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
if (sttHdr->flags & STT_PROTO_IPV4) {
lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4;
} else {
lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv6;
}
- NET_BUFFER_LIST_INFO(curNbl,
+ NET_BUFFER_LIST_INFO(*curNbl,
TcpLargeSendNetBufferListInfo) = lsoInfo.Value;
}
+
+ return NDIS_STATUS_SUCCESS;
}
/*
@@ -736,15 +813,14 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
OvsIPv4TunnelKey *tunKey,
PNET_BUFFER_LIST *newNbl)
{
- NDIS_STATUS status = NDIS_STATUS_FAILURE;
- PNET_BUFFER curNb, newNb;
+ NDIS_STATUS status;
+ PNET_BUFFER curNb;
IPHdr *ipHdr;
char *ipBuf[sizeof(IPHdr)];
SttHdr stt;
SttHdr *sttHdr;
char *sttBuf[STT_HDR_LEN];
UINT32 advanceCnt, hdrLen;
- BOOLEAN isLsoPacket = FALSE;
curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
@@ -767,7 +843,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
TCPHdr *tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4);
/* Skip IP & TCP headers */
- hdrLen = sizeof(IPHdr) + sizeof(TCPHdr),
+ hdrLen = (ipHdr->ihl * 4) + (tcp->doff * 4);
NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
advanceCnt += hdrLen;
@@ -775,7 +851,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
UINT32 totalLen = (seq >> STT_SEQ_LEN_SHIFT);
UINT16 payloadLen = (UINT16)ntohs(ipHdr->tot_len)
- (ipHdr->ihl * 4)
- - (sizeof * tcp);
+ - (tcp->doff * 4);
/* Check if incoming packet requires reassembly */
if (totalLen != payloadLen) {
@@ -788,7 +864,6 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
}
*newNbl = pNbl;
- isLsoPacket = TRUE;
} else {
/* STT Header */
sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr,
@@ -812,7 +887,6 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
OvsCompleteNBL(switchContext, *newNbl, TRUE);
return NDIS_STATUS_FAILURE;
}
- newNb = NET_BUFFER_LIST_FIRST_NB(*newNbl);
ASSERT(sttHdr);
@@ -826,7 +900,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
tunKey->pad = 0;
/* Set Checksum and LSO offload flags */
- OvsDecapSetOffloads(*newNbl, sttHdr);
+ OvsDecapSetOffloads(newNbl, sttHdr);
return NDIS_STATUS_SUCCESS;
}
diff --git a/datapath-windows/ovsext/User.c b/datapath-windows/ovsext/User.c
index 92a71e171..c7ac28456 100644
--- a/datapath-windows/ovsext/User.c
+++ b/datapath-windows/ovsext/User.c
@@ -768,7 +768,8 @@ OvsCreateAndAddPackets(PVOID userData,
NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
UINT32 packetLength;
- tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo);
+ tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl,
+ TcpLargeSendNetBufferListInfo);
nb = NET_BUFFER_LIST_FIRST_NB(nbl);
packetLength = NET_BUFFER_DATA_LENGTH(nb);
@@ -870,7 +871,8 @@ OvsCompletePacketHeader(UINT8 *packet,
(UINT32 *)&ipHdr->DestinationAddress,
IPPROTO_TCP, hdrInfoOut->l4PayLoad);
} else {
- PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + hdrInfoIn->l3Offset);
+ PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet +
+ hdrInfoIn->l3Offset);
hdrInfoOut->l4PayLoad =
(UINT16)(ntohs(ipv6Hdr->PayloadLength) +
hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
@@ -884,9 +886,9 @@ OvsCompletePacketHeader(UINT8 *packet,
hdrInfoOut->tcpCsumNeeded = 1;
ovsUserStats.recalTcpCsum++;
} else if (!isRecv) {
- if (csumInfo.Transmit.TcpChecksum) {
+ if (hdrInfoIn->isTcp && csumInfo.Transmit.TcpChecksum) {
hdrInfoOut->tcpCsumNeeded = 1;
- } else if (csumInfo.Transmit.UdpChecksum) {
+ } else if (hdrInfoIn->isUdp && csumInfo.Transmit.UdpChecksum) {
hdrInfoOut->udpCsumNeeded = 1;
}
if (hdrInfoOut->tcpCsumNeeded || hdrInfoOut->udpCsumNeeded) {
@@ -896,7 +898,8 @@ OvsCompletePacketHeader(UINT8 *packet,
hdrInfoOut->tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP;
#endif
if (hdrInfoIn->isIPv4) {
- PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
+ PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet +
+ hdrInfoIn->l3Offset);
hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
(ipHdr->HeaderLength << 2));
#ifdef DBG
@@ -1004,8 +1007,8 @@ OvsCreateQueueNlPacket(PVOID userData,
csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo);
if (isRecv && (csumInfo.Receive.TcpChecksumFailed ||
- (csumInfo.Receive.UdpChecksumFailed && !hdrInfo->udpCsumZero) ||
- csumInfo.Receive.IpChecksumFailed)) {
+ (csumInfo.Receive.UdpChecksumFailed && !hdrInfo->udpCsumZero) ||
+ csumInfo.Receive.IpChecksumFailed)) {
OVS_LOG_INFO("Packet dropped due to checksum failure.");
ovsUserStats.dropDuetoChecksum++;
return NULL;