summaryrefslogtreecommitdiff
path: root/datapath-windows/ovsext/Geneve.c
diff options
context:
space:
mode:
authorYin Lin <linyi@vmware.com>2016-06-24 14:44:30 -0700
committerGurucharan Shetty <guru@ovn.org>2016-06-24 14:47:15 -0700
commit47c3123da2d0140299e4f1d9fe4a2922a072ec1b (patch)
tree2eabe610529dc85263142387dc1dc89e25e5084a /datapath-windows/ovsext/Geneve.c
parent1f5b157ece72c6ed346a48309b29ea0a2e1c0c63 (diff)
downloadopenvswitch-47c3123da2d0140299e4f1d9fe4a2922a072ec1b.tar.gz
datapath-windows: Add Geneve support
Signed-off-by: Yin Lin <linyi@vmware.com> Acked-by: Nithin Raju <nithin@vmware.com> Signed-off-by: Gurucharan Shetty <guru@ovn.org>
Diffstat (limited to 'datapath-windows/ovsext/Geneve.c')
-rw-r--r--datapath-windows/ovsext/Geneve.c356
1 files changed, 356 insertions, 0 deletions
diff --git a/datapath-windows/ovsext/Geneve.c b/datapath-windows/ovsext/Geneve.c
new file mode 100644
index 000000000..5712e4daa
--- /dev/null
+++ b/datapath-windows/ovsext/Geneve.c
@@ -0,0 +1,356 @@
+/*
+ * Copyright (c) 2016 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "precomp.h"
+
+#include "Atomic.h"
+#include "Debug.h"
+#include "Flow.h"
+#include "IpHelper.h"
+#include "Jhash.h"
+#include "NetProto.h"
+#include "Offload.h"
+#include "PacketIO.h"
+#include "PacketParser.h"
+#include "Geneve.h"
+#include "Switch.h"
+#include "User.h"
+#include "Util.h"
+#include "Vport.h"
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_GENEVE
+
+
+NTSTATUS OvsInitGeneveTunnel(POVS_VPORT_ENTRY vport,
+ UINT16 udpDestPort)
+{
+ POVS_GENEVE_VPORT genevePort;
+
+ genevePort = (POVS_GENEVE_VPORT)
+ OvsAllocateMemoryWithTag(sizeof(*genevePort), OVS_GENEVE_POOL_TAG);
+ if (!genevePort) {
+ OVS_LOG_ERROR("Insufficient memory, can't allocate GENEVE_VPORT");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlZeroMemory(genevePort, sizeof(*genevePort));
+ genevePort->dstPort = udpDestPort;
+ vport->priv = (PVOID) genevePort;
+ return STATUS_SUCCESS;
+}
+
+VOID
+OvsCleanupGeneveTunnel(POVS_VPORT_ENTRY vport)
+{
+ if (vport->ovsType != OVS_VPORT_TYPE_GENEVE ||
+ vport->priv == NULL) {
+ return;
+ }
+
+ OvsFreeMemoryWithTag(vport->priv, OVS_GENEVE_POOL_TAG);
+ vport->priv = NULL;
+}
+
+NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport,
+ PNET_BUFFER_LIST curNbl,
+ OvsIPv4TunnelKey *tunKey,
+ POVS_SWITCH_CONTEXT switchContext,
+ POVS_PACKET_HDR_INFO layers,
+ PNET_BUFFER_LIST *newNbl)
+{
+ NTSTATUS status;
+ OVS_FWD_INFO fwdInfo;
+ PNET_BUFFER curNb;
+ PMDL curMdl;
+ PUINT8 bufferStart;
+ EthHdr *ethHdr;
+ IPHdr *ipHdr;
+ UDPHdr *udpHdr;
+ GeneveHdr *geneveHdr;
+ GeneveOptionHdr *optHdr;
+ POVS_GENEVE_VPORT vportGeneve;
+ UINT32 headRoom = OvsGetGeneveTunHdrMinSize() + tunKey->tunOptLen;
+ UINT32 packetLength;
+ ULONG mss = 0;
+ NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
+
+ status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo);
+ if (status != STATUS_SUCCESS) {
+ OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL);
+ // return NDIS_STATUS_PENDING;
+ /*
+ * XXX: Don't know if the completionList will make any sense when
+ * accessed in the callback. Make sure the caveats are known.
+ *
+ * XXX: This code will work once we are able to grab locks in the
+ * callback.
+ */
+ return NDIS_STATUS_FAILURE;
+ }
+
+ curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+ packetLength = NET_BUFFER_DATA_LENGTH(curNb);
+
+ if (layers->isTcp) {
+ mss = OVSGetTcpMSS(curNbl);
+
+ OVS_LOG_TRACE("MSS %u packet len %u", mss,
+ packetLength);
+ if (mss) {
+ OVS_LOG_TRACE("l4Offset %d", layers->l4Offset);
+ *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers,
+ mss, headRoom);
+ if (*newNbl == NULL) {
+ OVS_LOG_ERROR("Unable to segment NBL");
+ return NDIS_STATUS_FAILURE;
+ }
+ /* Clear out LSO flags after this point */
+ NET_BUFFER_LIST_INFO(*newNbl, TcpLargeSendNetBufferListInfo) = 0;
+ }
+ }
+
+ vportGeneve = (POVS_GENEVE_VPORT) GetOvsVportPriv(vport);
+ ASSERT(vportGeneve != NULL);
+
+ /* If we didn't split the packet above, make a copy now */
+ if (*newNbl == NULL) {
+ *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
+ FALSE /*NBL info*/);
+ if (*newNbl == NULL) {
+ OVS_LOG_ERROR("Unable to copy NBL");
+ return NDIS_STATUS_FAILURE;
+ }
+ csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
+ TcpIpChecksumNetBufferListInfo);
+ status = OvsApplySWChecksumOnNB(layers, *newNbl, &csumInfo);
+
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto ret_error;
+ }
+ }
+
+ curNbl = *newNbl;
+ for (curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); curNb != NULL;
+ curNb = curNb->Next) {
+ status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto ret_error;
+ }
+
+ curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+ bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
+ LowPagePriority);
+ if (!bufferStart) {
+ status = NDIS_STATUS_RESOURCES;
+ goto ret_error;
+ }
+
+ bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+ if (NET_BUFFER_NEXT_NB(curNb)) {
+ OVS_LOG_TRACE("nb length %u next %u",
+ NET_BUFFER_DATA_LENGTH(curNb),
+ NET_BUFFER_DATA_LENGTH(curNb->Next));
+ }
+
+ /* L2 header */
+ ethHdr = (EthHdr *)bufferStart;
+ ASSERT(((PCHAR)&fwdInfo.dstMacAddr + sizeof fwdInfo.dstMacAddr) ==
+ (PCHAR)&fwdInfo.srcMacAddr);
+ NdisMoveMemory(ethHdr->Destination, fwdInfo.dstMacAddr,
+ sizeof ethHdr->Destination + sizeof ethHdr->Source);
+ ethHdr->Type = htons(ETH_TYPE_IPV4);
+
+ /* IP header */
+ ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
+
+ ipHdr->ihl = sizeof *ipHdr / 4;
+ ipHdr->version = IPPROTO_IPV4;
+ ipHdr->tos = tunKey->tos;
+ ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr);
+ ipHdr->id = (uint16)atomic_add64(&vportGeneve->ipId,
+ NET_BUFFER_DATA_LENGTH(curNb));
+ ipHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ?
+ IP_DF_NBO : 0;
+ ipHdr->ttl = tunKey->ttl ? tunKey->ttl : GENEVE_DEFAULT_TTL;
+ ipHdr->protocol = IPPROTO_UDP;
+ ASSERT(tunKey->dst == fwdInfo.dstIpAddr);
+ ASSERT(tunKey->src == fwdInfo.srcIpAddr || tunKey->src == 0);
+ ipHdr->saddr = fwdInfo.srcIpAddr;
+ ipHdr->daddr = fwdInfo.dstIpAddr;
+ ipHdr->check = 0;
+
+ /* UDP header */
+ udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
+ udpHdr->source = htons(tunKey->flow_hash | MAXINT16);
+ udpHdr->dest = htons(vportGeneve->dstPort);
+ udpHdr->len = htons(NET_BUFFER_DATA_LENGTH(curNb) - headRoom +
+ sizeof *udpHdr + sizeof *geneveHdr +
+ tunKey->tunOptLen);
+ if (tunKey->flags & OVS_TNL_F_CSUM) {
+ UINT16 udpChksumLen = (UINT16) NET_BUFFER_DATA_LENGTH(curNb) -
+ sizeof *ipHdr - sizeof *ethHdr;
+ udpHdr->check = IPPseudoChecksum(&ipHdr->saddr, &ipHdr->daddr,
+ IPPROTO_UDP, udpChksumLen);
+ } else {
+ udpHdr->check = 0;
+ }
+ /* Geneve header */
+ geneveHdr = (GeneveHdr *)((PCHAR)udpHdr + sizeof *udpHdr);
+ geneveHdr->version = GENEVE_VER;
+ geneveHdr->optLen = tunKey->tunOptLen / 4;
+ geneveHdr->oam = !!(tunKey->flags & OVS_TNL_F_OAM);
+ geneveHdr->critical = !!(tunKey->flags & OVS_TNL_F_CRT_OPT);
+ geneveHdr->reserved1 = 0;
+ geneveHdr->protocol = ETH_P_TEB_NBO;
+ geneveHdr->vni = GENEVE_TUNNELID_TO_VNI(tunKey->tunnelId);
+ geneveHdr->reserved2 = 0;
+
+ /* Geneve header options */
+ optHdr = (GeneveOptionHdr *)(geneveHdr + 1);
+ memcpy(optHdr, TunnelKeyGetOptions(tunKey), tunKey->tunOptLen);
+
+ csumInfo.Value = 0;
+ csumInfo.Transmit.IpHeaderChecksum = 1;
+ csumInfo.Transmit.IsIPv4 = 1;
+ if (tunKey->flags & OVS_TNL_F_CSUM) {
+ csumInfo.Transmit.UdpChecksum = 1;
+ }
+ NET_BUFFER_LIST_INFO(curNbl,
+ TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
+ }
+ return STATUS_SUCCESS;
+
+ret_error:
+ OvsCompleteNBL(switchContext, *newNbl, TRUE);
+ *newNbl = NULL;
+ return status;
+}
+
+NDIS_STATUS OvsDecapGeneve(POVS_SWITCH_CONTEXT switchContext,
+ PNET_BUFFER_LIST curNbl,
+ OvsIPv4TunnelKey *tunKey,
+ PNET_BUFFER_LIST *newNbl)
+{
+ PNET_BUFFER curNb;
+ PMDL curMdl;
+ EthHdr *ethHdr;
+ IPHdr *ipHdr;
+ UDPHdr *udpHdr;
+ GeneveHdr *geneveHdr;
+ UINT32 tunnelSize;
+ UINT32 packetLength;
+ PUINT8 bufferStart;
+ PVOID optStart;
+ NDIS_STATUS status;
+
+ /* Check the length of the UDP payload */
+ curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+ tunnelSize = OvsGetGeneveTunHdrMinSize();
+ packetLength = NET_BUFFER_DATA_LENGTH(curNb);
+ if (packetLength <= tunnelSize) {
+ return NDIS_STATUS_INVALID_LENGTH;
+ }
+
+ /*
+ * Create a copy of the NBL so that we have all the headers in one MDL.
+ */
+ *newNbl = OvsPartialCopyNBL(switchContext, curNbl,
+ tunnelSize, 0,
+ TRUE /*copy NBL info */);
+
+ if (*newNbl == NULL) {
+ return NDIS_STATUS_RESOURCES;
+ }
+
+ /* XXX: Handle VLAN header. */
+ curNbl = *newNbl;
+ curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+ curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+ bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority)
+ + NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+ if (!bufferStart) {
+ status = NDIS_STATUS_RESOURCES;
+ goto dropNbl;
+ }
+
+ ethHdr = (EthHdr *)bufferStart;
+ /* XXX: Handle IP options. */
+ ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
+ tunKey->src = ipHdr->saddr;
+ tunKey->dst = ipHdr->daddr;
+ tunKey->tos = ipHdr->tos;
+ tunKey->ttl = ipHdr->ttl;
+ tunKey->pad = 0;
+ udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
+
+ /* Validate if NIC has indicated checksum failure. */
+ status = OvsValidateUDPChecksum(curNbl, udpHdr->check == 0);
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto dropNbl;
+ }
+
+ /* Calculate and verify UDP checksum if NIC didn't do it. */
+ if (udpHdr->check != 0) {
+ status = OvsCalculateUDPChecksum(curNbl, curNb, ipHdr, udpHdr,
+ packetLength);
+ tunKey->flags |= OVS_TNL_F_CSUM;
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto dropNbl;
+ }
+ }
+
+ geneveHdr = (GeneveHdr *)((PCHAR)udpHdr + sizeof *udpHdr);
+ if (geneveHdr->protocol != ETH_P_TEB_NBO) {
+ status = STATUS_NDIS_INVALID_PACKET;
+ goto dropNbl;
+ }
+ tunKey->flags = OVS_TNL_F_KEY;
+ if (geneveHdr->oam) {
+ tunKey->flags |= OVS_TNL_F_OAM;
+ }
+ tunKey->tunnelId = GENEVE_VNI_TO_TUNNELID(geneveHdr->vni);
+ tunKey->tunOptLen = (uint8)geneveHdr->optLen * 4;
+ if (tunKey->tunOptLen > TUN_OPT_MAX_LEN ||
+ packetLength < tunnelSize + tunKey->tunOptLen) {
+ status = NDIS_STATUS_INVALID_LENGTH;
+ goto dropNbl;
+ }
+ /* Clear out the receive flag for the inner packet. */
+ NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0;
+
+ NdisAdvanceNetBufferDataStart(curNb, tunnelSize, FALSE, NULL);
+ if (tunKey->tunOptLen > 0) {
+ optStart = NdisGetDataBuffer(curNb, tunKey->tunOptLen,
+ TunnelKeyGetOptions(tunKey), 1, 0);
+
+ /* If data is contiguous in the buffer, NdisGetDataBuffer will not copy
+ data to the storage. Manual copy is needed. */
+ if (optStart != TunnelKeyGetOptions(tunKey)) {
+ memcpy(TunnelKeyGetOptions(tunKey), optStart, tunKey->tunOptLen);
+ }
+ NdisAdvanceNetBufferDataStart(curNb, tunKey->tunOptLen, FALSE, NULL);
+ }
+
+ return NDIS_STATUS_SUCCESS;
+
+dropNbl:
+ OvsCompleteNBL(switchContext, *newNbl, TRUE);
+ *newNbl = NULL;
+ return status;
+}