diff options
Diffstat (limited to 'datapath-windows/ovsext/Geneve.c')
-rw-r--r-- | datapath-windows/ovsext/Geneve.c | 356 |
1 files changed, 356 insertions, 0 deletions
diff --git a/datapath-windows/ovsext/Geneve.c b/datapath-windows/ovsext/Geneve.c new file mode 100644 index 000000000..5712e4daa --- /dev/null +++ b/datapath-windows/ovsext/Geneve.c @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2016 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" + +#include "Atomic.h" +#include "Debug.h" +#include "Flow.h" +#include "IpHelper.h" +#include "Jhash.h" +#include "NetProto.h" +#include "Offload.h" +#include "PacketIO.h" +#include "PacketParser.h" +#include "Geneve.h" +#include "Switch.h" +#include "User.h" +#include "Util.h" +#include "Vport.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_GENEVE + + +NTSTATUS OvsInitGeneveTunnel(POVS_VPORT_ENTRY vport, + UINT16 udpDestPort) +{ + POVS_GENEVE_VPORT genevePort; + + genevePort = (POVS_GENEVE_VPORT) + OvsAllocateMemoryWithTag(sizeof(*genevePort), OVS_GENEVE_POOL_TAG); + if (!genevePort) { + OVS_LOG_ERROR("Insufficient memory, can't allocate GENEVE_VPORT"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(genevePort, sizeof(*genevePort)); + genevePort->dstPort = udpDestPort; + vport->priv = (PVOID) genevePort; + return STATUS_SUCCESS; +} + +VOID +OvsCleanupGeneveTunnel(POVS_VPORT_ENTRY vport) +{ + if (vport->ovsType != OVS_VPORT_TYPE_GENEVE || + vport->priv == NULL) { + return; + } + + OvsFreeMemoryWithTag(vport->priv, OVS_GENEVE_POOL_TAG); + vport->priv = NULL; +} + +NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + POVS_SWITCH_CONTEXT switchContext, + POVS_PACKET_HDR_INFO layers, + PNET_BUFFER_LIST *newNbl) +{ + NTSTATUS status; + OVS_FWD_INFO fwdInfo; + PNET_BUFFER curNb; + PMDL curMdl; + PUINT8 bufferStart; + EthHdr *ethHdr; + IPHdr *ipHdr; + UDPHdr *udpHdr; + GeneveHdr *geneveHdr; + GeneveOptionHdr *optHdr; + POVS_GENEVE_VPORT vportGeneve; + UINT32 headRoom = OvsGetGeneveTunHdrMinSize() + tunKey->tunOptLen; + UINT32 packetLength; + ULONG mss = 0; + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; + + status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo); + if (status != STATUS_SUCCESS) { + OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL); + // return NDIS_STATUS_PENDING; + /* + * XXX: Don't know if the completionList will make any sense when + * accessed in the callback. Make sure the caveats are known. + * + * XXX: This code will work once we are able to grab locks in the + * callback. + */ + return NDIS_STATUS_FAILURE; + } + + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + packetLength = NET_BUFFER_DATA_LENGTH(curNb); + + if (layers->isTcp) { + mss = OVSGetTcpMSS(curNbl); + + OVS_LOG_TRACE("MSS %u packet len %u", mss, + packetLength); + if (mss) { + OVS_LOG_TRACE("l4Offset %d", layers->l4Offset); + *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers, + mss, headRoom); + if (*newNbl == NULL) { + OVS_LOG_ERROR("Unable to segment NBL"); + return NDIS_STATUS_FAILURE; + } + /* Clear out LSO flags after this point */ + NET_BUFFER_LIST_INFO(*newNbl, TcpLargeSendNetBufferListInfo) = 0; + } + } + + vportGeneve = (POVS_GENEVE_VPORT) GetOvsVportPriv(vport); + ASSERT(vportGeneve != NULL); + + /* If we didn't split the packet above, make a copy now */ + if (*newNbl == NULL) { + *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom, + FALSE /*NBL info*/); + if (*newNbl == NULL) { + OVS_LOG_ERROR("Unable to copy NBL"); + return NDIS_STATUS_FAILURE; + } + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, + TcpIpChecksumNetBufferListInfo); + status = OvsApplySWChecksumOnNB(layers, *newNbl, &csumInfo); + + if (status != NDIS_STATUS_SUCCESS) { + goto ret_error; + } + } + + curNbl = *newNbl; + for (curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); curNb != NULL; + curNb = curNb->Next) { + status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL); + if (status != NDIS_STATUS_SUCCESS) { + goto ret_error; + } + + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, + LowPagePriority); + if (!bufferStart) { + status = NDIS_STATUS_RESOURCES; + goto ret_error; + } + + bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + if (NET_BUFFER_NEXT_NB(curNb)) { + OVS_LOG_TRACE("nb length %u next %u", + NET_BUFFER_DATA_LENGTH(curNb), + NET_BUFFER_DATA_LENGTH(curNb->Next)); + } + + /* L2 header */ + ethHdr = (EthHdr *)bufferStart; + ASSERT(((PCHAR)&fwdInfo.dstMacAddr + sizeof fwdInfo.dstMacAddr) == + (PCHAR)&fwdInfo.srcMacAddr); + NdisMoveMemory(ethHdr->Destination, fwdInfo.dstMacAddr, + sizeof ethHdr->Destination + sizeof ethHdr->Source); + ethHdr->Type = htons(ETH_TYPE_IPV4); + + /* IP header */ + ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); + + ipHdr->ihl = sizeof *ipHdr / 4; + ipHdr->version = IPPROTO_IPV4; + ipHdr->tos = tunKey->tos; + ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr); + ipHdr->id = (uint16)atomic_add64(&vportGeneve->ipId, + NET_BUFFER_DATA_LENGTH(curNb)); + ipHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ? + IP_DF_NBO : 0; + ipHdr->ttl = tunKey->ttl ? tunKey->ttl : GENEVE_DEFAULT_TTL; + ipHdr->protocol = IPPROTO_UDP; + ASSERT(tunKey->dst == fwdInfo.dstIpAddr); + ASSERT(tunKey->src == fwdInfo.srcIpAddr || tunKey->src == 0); + ipHdr->saddr = fwdInfo.srcIpAddr; + ipHdr->daddr = fwdInfo.dstIpAddr; + ipHdr->check = 0; + + /* UDP header */ + udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr); + udpHdr->source = htons(tunKey->flow_hash | MAXINT16); + udpHdr->dest = htons(vportGeneve->dstPort); + udpHdr->len = htons(NET_BUFFER_DATA_LENGTH(curNb) - headRoom + + sizeof *udpHdr + sizeof *geneveHdr + + tunKey->tunOptLen); + if (tunKey->flags & OVS_TNL_F_CSUM) { + UINT16 udpChksumLen = (UINT16) NET_BUFFER_DATA_LENGTH(curNb) - + sizeof *ipHdr - sizeof *ethHdr; + udpHdr->check = IPPseudoChecksum(&ipHdr->saddr, &ipHdr->daddr, + IPPROTO_UDP, udpChksumLen); + } else { + udpHdr->check = 0; + } + /* Geneve header */ + geneveHdr = (GeneveHdr *)((PCHAR)udpHdr + sizeof *udpHdr); + geneveHdr->version = GENEVE_VER; + geneveHdr->optLen = tunKey->tunOptLen / 4; + geneveHdr->oam = !!(tunKey->flags & OVS_TNL_F_OAM); + geneveHdr->critical = !!(tunKey->flags & OVS_TNL_F_CRT_OPT); + geneveHdr->reserved1 = 0; + geneveHdr->protocol = ETH_P_TEB_NBO; + geneveHdr->vni = GENEVE_TUNNELID_TO_VNI(tunKey->tunnelId); + geneveHdr->reserved2 = 0; + + /* Geneve header options */ + optHdr = (GeneveOptionHdr *)(geneveHdr + 1); + memcpy(optHdr, TunnelKeyGetOptions(tunKey), tunKey->tunOptLen); + + csumInfo.Value = 0; + csumInfo.Transmit.IpHeaderChecksum = 1; + csumInfo.Transmit.IsIPv4 = 1; + if (tunKey->flags & OVS_TNL_F_CSUM) { + csumInfo.Transmit.UdpChecksum = 1; + } + NET_BUFFER_LIST_INFO(curNbl, + TcpIpChecksumNetBufferListInfo) = csumInfo.Value; + } + return STATUS_SUCCESS; + +ret_error: + OvsCompleteNBL(switchContext, *newNbl, TRUE); + *newNbl = NULL; + return status; +} + +NDIS_STATUS OvsDecapGeneve(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl) +{ + PNET_BUFFER curNb; + PMDL curMdl; + EthHdr *ethHdr; + IPHdr *ipHdr; + UDPHdr *udpHdr; + GeneveHdr *geneveHdr; + UINT32 tunnelSize; + UINT32 packetLength; + PUINT8 bufferStart; + PVOID optStart; + NDIS_STATUS status; + + /* Check the length of the UDP payload */ + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + tunnelSize = OvsGetGeneveTunHdrMinSize(); + packetLength = NET_BUFFER_DATA_LENGTH(curNb); + if (packetLength <= tunnelSize) { + return NDIS_STATUS_INVALID_LENGTH; + } + + /* + * Create a copy of the NBL so that we have all the headers in one MDL. + */ + *newNbl = OvsPartialCopyNBL(switchContext, curNbl, + tunnelSize, 0, + TRUE /*copy NBL info */); + + if (*newNbl == NULL) { + return NDIS_STATUS_RESOURCES; + } + + /* XXX: Handle VLAN header. */ + curNbl = *newNbl; + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority) + + NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + if (!bufferStart) { + status = NDIS_STATUS_RESOURCES; + goto dropNbl; + } + + ethHdr = (EthHdr *)bufferStart; + /* XXX: Handle IP options. */ + ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); + tunKey->src = ipHdr->saddr; + tunKey->dst = ipHdr->daddr; + tunKey->tos = ipHdr->tos; + tunKey->ttl = ipHdr->ttl; + tunKey->pad = 0; + udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr); + + /* Validate if NIC has indicated checksum failure. */ + status = OvsValidateUDPChecksum(curNbl, udpHdr->check == 0); + if (status != NDIS_STATUS_SUCCESS) { + goto dropNbl; + } + + /* Calculate and verify UDP checksum if NIC didn't do it. */ + if (udpHdr->check != 0) { + status = OvsCalculateUDPChecksum(curNbl, curNb, ipHdr, udpHdr, + packetLength); + tunKey->flags |= OVS_TNL_F_CSUM; + if (status != NDIS_STATUS_SUCCESS) { + goto dropNbl; + } + } + + geneveHdr = (GeneveHdr *)((PCHAR)udpHdr + sizeof *udpHdr); + if (geneveHdr->protocol != ETH_P_TEB_NBO) { + status = STATUS_NDIS_INVALID_PACKET; + goto dropNbl; + } + tunKey->flags = OVS_TNL_F_KEY; + if (geneveHdr->oam) { + tunKey->flags |= OVS_TNL_F_OAM; + } + tunKey->tunnelId = GENEVE_VNI_TO_TUNNELID(geneveHdr->vni); + tunKey->tunOptLen = (uint8)geneveHdr->optLen * 4; + if (tunKey->tunOptLen > TUN_OPT_MAX_LEN || + packetLength < tunnelSize + tunKey->tunOptLen) { + status = NDIS_STATUS_INVALID_LENGTH; + goto dropNbl; + } + /* Clear out the receive flag for the inner packet. */ + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0; + + NdisAdvanceNetBufferDataStart(curNb, tunnelSize, FALSE, NULL); + if (tunKey->tunOptLen > 0) { + optStart = NdisGetDataBuffer(curNb, tunKey->tunOptLen, + TunnelKeyGetOptions(tunKey), 1, 0); + + /* If data is contiguous in the buffer, NdisGetDataBuffer will not copy + data to the storage. Manual copy is needed. */ + if (optStart != TunnelKeyGetOptions(tunKey)) { + memcpy(TunnelKeyGetOptions(tunKey), optStart, tunKey->tunOptLen); + } + NdisAdvanceNetBufferDataStart(curNb, tunKey->tunOptLen, FALSE, NULL); + } + + return NDIS_STATUS_SUCCESS; + +dropNbl: + OvsCompleteNBL(switchContext, *newNbl, TRUE); + *newNbl = NULL; + return status; +} |