summaryrefslogtreecommitdiff
path: root/datapath-windows/ovsext/Gre.c
diff options
context:
space:
mode:
authorAlin Serdean <aserdean@cloudbasesolutions.com>2015-12-11 19:18:25 +0000
committerJustin Pettit <jpettit@ovn.org>2015-12-11 13:44:20 -0800
commit85571a3daa67603ce4096c12ba0522acded4deb0 (patch)
tree12d1b1f9b75f846350fc1f36398216257f4d6426 /datapath-windows/ovsext/Gre.c
parent3819692e6df7607eb94af8fd29a2976a1fc728ad (diff)
downloadopenvswitch-85571a3daa67603ce4096c12ba0522acded4deb0.tar.gz
datapath-windows: Add GRE TEB support for windows datapath
This patch introduces the support for GRE TEB (trasparent ethernet bridging) for the windows datapath. The GRE support is based on http://tools.ietf.org/html/rfc2890, without taking into account the GRE sequence, and it supports only the GRE protocol type 6558 (trasparent ethernet bridging) like its linux counterpart. Util.h: define the GRE pool tag Vport.c/h: sort the includes alphabetically add the function OvsFindTunnelVportByPortType which searches the tunnelVportsArray for a given port type Actions.c : sort the includes alphabetically call the GRE encapsulation / decapsulation functions when needed Gre.c/h : add GRE type defines add initialization/cleanup functions add encapsulation / decapsulation functions with software offloads (hardware offloads will be added in a separate patch) support Tested using: PSPING (https://technet.microsoft.com/en-us/sysinternals/psping.aspx) (ICMP, TCP, UDP) with various packet lengths IPERF3 (https://iperf.fr/iperf-download.php) (TCP, UDP) with various options Signed-off-by: Alin Gabriel Serdean <aserdean@cloudbasesolutions.com> Acked-by: Nithin Raju <nithin@vmware.com> Acked-by: Sorin Vinturis <svinturis@cloudbasesolutions.com> Signed-off-by: Justin Pettit <jpettit@ovn.org>
Diffstat (limited to 'datapath-windows/ovsext/Gre.c')
-rw-r--r--datapath-windows/ovsext/Gre.c453
1 files changed, 453 insertions, 0 deletions
diff --git a/datapath-windows/ovsext/Gre.c b/datapath-windows/ovsext/Gre.c
new file mode 100644
index 000000000..3ebfda3f8
--- /dev/null
+++ b/datapath-windows/ovsext/Gre.c
@@ -0,0 +1,453 @@
+/*
+ * Copyright (c) 2015 Cloudbase Solutions Srl
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "precomp.h"
+
+#include "Atomic.h"
+#include "Checksum.h"
+#include "Flow.h"
+#include "Gre.h"
+#include "IpHelper.h"
+#include "NetProto.h"
+#include "PacketIO.h"
+#include "PacketParser.h"
+#include "Switch.h"
+#include "User.h"
+#include "Util.h"
+#include "Vport.h"
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_GRE
+#include "Debug.h"
+
+static NDIS_STATUS
+OvsDoEncapGre(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl,
+ const OvsIPv4TunnelKey *tunKey,
+ const POVS_FWD_INFO fwdInfo,
+ POVS_PACKET_HDR_INFO layers,
+ POVS_SWITCH_CONTEXT switchContext,
+ PNET_BUFFER_LIST *newNbl);
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsInitGreTunnel --
+ * Initialize GRE tunnel module.
+ * --------------------------------------------------------------------------
+ */
+NTSTATUS
+OvsInitGreTunnel(POVS_VPORT_ENTRY vport)
+{
+ POVS_GRE_VPORT grePort;
+
+ grePort = (POVS_GRE_VPORT)OvsAllocateMemoryWithTag(sizeof(*grePort),
+ OVS_GRE_POOL_TAG);
+ if (!grePort) {
+ OVS_LOG_ERROR("Insufficient memory, can't allocate OVS_GRE_VPORT");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlZeroMemory(grePort, sizeof(*grePort));
+ vport->priv = (PVOID)grePort;
+ return STATUS_SUCCESS;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsCleanupGreTunnel --
+ * Cleanup GRE Tunnel module.
+ * --------------------------------------------------------------------------
+ */
+void
+OvsCleanupGreTunnel(POVS_VPORT_ENTRY vport)
+{
+ if (vport->ovsType != OVS_VPORT_TYPE_GRE ||
+ vport->priv == NULL) {
+ return;
+ }
+
+ OvsFreeMemoryWithTag(vport->priv, OVS_GRE_POOL_TAG);
+ vport->priv = NULL;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsEncapGre --
+ * Encapsulates a packet with an GRE header.
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsEncapGre(POVS_VPORT_ENTRY vport,
+ PNET_BUFFER_LIST curNbl,
+ OvsIPv4TunnelKey *tunKey,
+ POVS_SWITCH_CONTEXT switchContext,
+ POVS_PACKET_HDR_INFO layers,
+ PNET_BUFFER_LIST *newNbl)
+{
+ OVS_FWD_INFO fwdInfo;
+ NDIS_STATUS status;
+
+ status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo);
+ if (status != STATUS_SUCCESS) {
+ OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL);
+ return NDIS_STATUS_FAILURE;
+ }
+
+ status = OvsDoEncapGre(vport, curNbl, tunKey, &fwdInfo, layers,
+ switchContext, newNbl);
+ return status;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsDoEncapGre --
+ * Internal utility function which actually does the GRE encap.
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsDoEncapGre(POVS_VPORT_ENTRY vport,
+ PNET_BUFFER_LIST curNbl,
+ const OvsIPv4TunnelKey *tunKey,
+ const POVS_FWD_INFO fwdInfo,
+ POVS_PACKET_HDR_INFO layers,
+ POVS_SWITCH_CONTEXT switchContext,
+ PNET_BUFFER_LIST *newNbl)
+{
+ NDIS_STATUS status;
+ PNET_BUFFER curNb;
+ PMDL curMdl;
+ PUINT8 bufferStart;
+ EthHdr *ethHdr;
+ IPHdr *ipHdr;
+ PGREHdr greHdr;
+ POVS_GRE_VPORT vportGre;
+ UINT32 headRoom = GreTunHdrSize(tunKey->flags);
+#if DBG
+ UINT32 counterHeadRoom;
+#endif
+ UINT32 packetLength;
+ ULONG mss = 0;
+ ASSERT(*newNbl == NULL);
+
+ curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+ packetLength = NET_BUFFER_DATA_LENGTH(curNb);
+
+ if (layers->isTcp) {
+ NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
+
+ tsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
+ TcpLargeSendNetBufferListInfo);
+ switch (tsoInfo.Transmit.Type) {
+ case NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE:
+ mss = tsoInfo.LsoV1Transmit.MSS;
+ break;
+ case NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE:
+ mss = tsoInfo.LsoV2Transmit.MSS;
+ break;
+ default:
+ OVS_LOG_ERROR("Unknown LSO transmit type:%d",
+ tsoInfo.Transmit.Type);
+ return NDIS_STATUS_FAILURE;
+ }
+ OVS_LOG_TRACE("MSS %u packet len %u", mss,
+ packetLength);
+ if (mss) {
+ OVS_LOG_TRACE("l4Offset %d", layers->l4Offset);
+ *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers,
+ mss, headRoom);
+ if (*newNbl == NULL) {
+ OVS_LOG_ERROR("Unable to segment NBL");
+ return NDIS_STATUS_FAILURE;
+ }
+ /* Clear out LSO flags after this point */
+ NET_BUFFER_LIST_INFO(*newNbl, TcpLargeSendNetBufferListInfo) = 0;
+ }
+ }
+
+ vportGre = (POVS_GRE_VPORT)GetOvsVportPriv(vport);
+ ASSERT(vportGre);
+
+ /* If we didn't split the packet above, make a copy now */
+ if (*newNbl == NULL) {
+ *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
+ FALSE /*NBL info*/);
+ if (*newNbl == NULL) {
+ OVS_LOG_ERROR("Unable to copy NBL");
+ return NDIS_STATUS_FAILURE;
+ }
+ /*
+ * To this point we do not have GRE hardware offloading.
+ * Apply defined checksums
+ */
+ curNb = NET_BUFFER_LIST_FIRST_NB(*newNbl);
+ curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+ bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
+ LowPagePriority);
+ if (!bufferStart) {
+ status = NDIS_STATUS_RESOURCES;
+ goto ret_error;
+ }
+
+ NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
+ csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
+ TcpIpChecksumNetBufferListInfo);
+
+ bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+
+ if (layers->isIPv4) {
+ IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset);
+
+ if (csumInfo.Transmit.IpHeaderChecksum) {
+ ip->check = 0;
+ ip->check = IPChecksum((UINT8 *)ip, 4 * ip->ihl, 0);
+ }
+
+ if (layers->isTcp && csumInfo.Transmit.TcpChecksum) {
+ UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
+ TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset);
+ tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
+ IPPROTO_TCP, csumLength);
+ tcp->check = CalculateChecksumNB(curNb, csumLength,
+ (UINT32)(layers->l4Offset));
+ } else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) {
+ UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
+ UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
+ udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
+ IPPROTO_UDP, csumLength);
+ udp->check = CalculateChecksumNB(curNb, csumLength,
+ (UINT32)(layers->l4Offset));
+ }
+ } else if (layers->isIPv6) {
+ IPv6Hdr *ip = (IPv6Hdr *)(bufferStart + layers->l3Offset);
+
+ if (layers->isTcp && csumInfo.Transmit.TcpChecksum) {
+ UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
+ TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset);
+ tcp->check = IPv6PseudoChecksum((UINT32 *) &ip->saddr,
+ (UINT32 *) &ip->daddr,
+ IPPROTO_TCP, csumLength);
+ tcp->check = CalculateChecksumNB(curNb, csumLength,
+ (UINT32)(layers->l4Offset));
+ } else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) {
+ UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
+ UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
+ udp->check = IPv6PseudoChecksum((UINT32 *) &ip->saddr,
+ (UINT32 *) &ip->daddr,
+ IPPROTO_UDP, csumLength);
+ udp->check = CalculateChecksumNB(curNb, csumLength,
+ (UINT32)(layers->l4Offset));
+ }
+ }
+ /* Clear out TcpIpChecksumNetBufferListInfo flag */
+ NET_BUFFER_LIST_INFO(*newNbl, TcpIpChecksumNetBufferListInfo) = 0;
+ }
+
+ curNbl = *newNbl;
+ for (curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); curNb != NULL;
+ curNb = curNb->Next) {
+#if DBG
+ counterHeadRoom = headRoom;
+#endif
+ status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto ret_error;
+ }
+
+ curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+ bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
+ LowPagePriority);
+ if (!bufferStart) {
+ status = NDIS_STATUS_RESOURCES;
+ goto ret_error;
+ }
+
+ bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+ if (NET_BUFFER_NEXT_NB(curNb)) {
+ OVS_LOG_TRACE("nb length %u next %u",
+ NET_BUFFER_DATA_LENGTH(curNb),
+ NET_BUFFER_DATA_LENGTH(curNb->Next));
+ }
+
+ /* L2 header */
+ ethHdr = (EthHdr *)bufferStart;
+ ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) ==
+ (PCHAR)&fwdInfo->srcMacAddr);
+ NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr,
+ sizeof ethHdr->Destination + sizeof ethHdr->Source);
+ ethHdr->Type = htons(ETH_TYPE_IPV4);
+#if DBG
+ counterHeadRoom -= sizeof *ethHdr;
+#endif
+
+ /* IP header */
+ ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
+
+ ipHdr->ihl = sizeof *ipHdr / 4;
+ ipHdr->version = IPPROTO_IPV4;
+ ipHdr->tos = tunKey->tos;
+ ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr);
+ ipHdr->id = (uint16)atomic_add64(&vportGre->ipId,
+ NET_BUFFER_DATA_LENGTH(curNb));
+ ipHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ?
+ IP_DF_NBO : 0;
+ ipHdr->ttl = tunKey->ttl ? tunKey->ttl : 64;
+ ipHdr->protocol = IPPROTO_GRE;
+ ASSERT(tunKey->dst == fwdInfo->dstIpAddr);
+ ASSERT(tunKey->src == fwdInfo->srcIpAddr || tunKey->src == 0);
+ ipHdr->saddr = fwdInfo->srcIpAddr;
+ ipHdr->daddr = fwdInfo->dstIpAddr;
+
+ ipHdr->check = 0;
+ ipHdr->check = IPChecksum((UINT8 *)ipHdr, sizeof *ipHdr, 0);
+#if DBG
+ counterHeadRoom -= sizeof *ipHdr;
+#endif
+
+ /* GRE header */
+ greHdr = (GREHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
+ greHdr->flags = OvsTunnelFlagsToGreFlags(tunKey->flags);
+ greHdr->protocolType = GRE_NET_TEB;
+#if DBG
+ counterHeadRoom -= sizeof *greHdr;
+#endif
+
+ PCHAR currentOffset = (PCHAR)greHdr + sizeof *greHdr;
+
+ if (tunKey->flags & OVS_TNL_F_CSUM) {
+ RtlZeroMemory(currentOffset, 4);
+ currentOffset += 4;
+#if DBG
+ counterHeadRoom -= 4;
+#endif
+ }
+
+ if (tunKey->flags & OVS_TNL_F_KEY) {
+ RtlZeroMemory(currentOffset, 4);
+ UINT32 key = (tunKey->tunnelId >> 32);
+ RtlCopyMemory(currentOffset, &key, sizeof key);
+ currentOffset += 4;
+#if DBG
+ counterHeadRoom -= 4;
+#endif
+ }
+
+#if DBG
+ ASSERT(counterHeadRoom == 0);
+#endif
+
+ }
+ return STATUS_SUCCESS;
+
+ret_error:
+ OvsCompleteNBL(switchContext, *newNbl, TRUE);
+ *newNbl = NULL;
+ return status;
+}
+
+NDIS_STATUS
+OvsDecapGre(POVS_SWITCH_CONTEXT switchContext,
+ PNET_BUFFER_LIST curNbl,
+ OvsIPv4TunnelKey *tunKey,
+ PNET_BUFFER_LIST *newNbl)
+{
+ PNET_BUFFER curNb;
+ PMDL curMdl;
+ EthHdr *ethHdr;
+ IPHdr *ipHdr;
+ GREHdr *greHdr;
+ UINT32 tunnelSize = 0, packetLength = 0;
+ UINT32 headRoom = 0;
+ PUINT8 bufferStart;
+ NDIS_STATUS status;
+
+ curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+ packetLength = NET_BUFFER_DATA_LENGTH(curNb);
+ tunnelSize = GreTunHdrSize(tunKey->flags);
+ if (packetLength <= tunnelSize) {
+ return NDIS_STATUS_INVALID_LENGTH;
+ }
+
+ /*
+ * Create a copy of the NBL so that we have all the headers in one MDL.
+ */
+ *newNbl = OvsPartialCopyNBL(switchContext, curNbl,
+ tunnelSize + OVS_DEFAULT_COPY_SIZE, 0,
+ TRUE /*copy NBL info */);
+
+ if (*newNbl == NULL) {
+ return NDIS_STATUS_RESOURCES;
+ }
+
+ curNbl = *newNbl;
+ curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+ curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+ bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority) +
+ NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+ if (!bufferStart) {
+ status = NDIS_STATUS_RESOURCES;
+ goto dropNbl;
+ }
+
+ ethHdr = (EthHdr *)bufferStart;
+ headRoom += sizeof *ethHdr;
+
+ ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
+ tunKey->src = ipHdr->saddr;
+ tunKey->dst = ipHdr->daddr;
+ tunKey->tos = ipHdr->tos;
+ tunKey->ttl = ipHdr->ttl;
+ tunKey->pad = 0;
+ headRoom += sizeof *ipHdr;
+
+ greHdr = (GREHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
+ headRoom += sizeof *greHdr;
+
+ /* Validate if GRE header protocol type. */
+ if (greHdr->protocolType != GRE_NET_TEB) {
+ status = STATUS_NDIS_INVALID_PACKET;
+ goto dropNbl;
+ }
+
+ PCHAR currentOffset = (PCHAR)greHdr + sizeof *greHdr;
+
+ if (greHdr->flags & GRE_CSUM) {
+ tunKey->flags |= OVS_TNL_F_CSUM;
+ currentOffset += 4;
+ headRoom += 4;
+ }
+
+ if (greHdr->flags & GRE_KEY) {
+ tunKey->flags |= OVS_TNL_F_KEY;
+ UINT32 key = 0;
+ RtlCopyMemory(&key, currentOffset, 4);
+ tunKey->tunnelId = (UINT64)key << 32;
+ currentOffset += 4;
+ headRoom += 4;
+ }
+
+ /* Clear out the receive flag for the inner packet. */
+ NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0;
+ NdisAdvanceNetBufferDataStart(curNb, GreTunHdrSize(tunKey->flags), FALSE,
+ NULL);
+ ASSERT(headRoom == GreTunHdrSize(tunKey->flags));
+ return NDIS_STATUS_SUCCESS;
+
+dropNbl:
+ OvsCompleteNBL(switchContext, *newNbl, TRUE);
+ *newNbl = NULL;
+ return status;
+}