diff options
author | Saurabh Shah <ssaurabh@vmware.com> | 2014-07-27 17:26:58 -0700 |
---|---|---|
committer | Ben Pfaff <blp@nicira.com> | 2014-07-28 10:10:43 -0700 |
commit | c803536e1cf531e29a31edfe740666ee6045d555 (patch) | |
tree | 7fc3591d38d16877cd1370687a1aa2015a0c6b29 /datapath-windows/ovsext | |
parent | 79fe0f4611b60fe0fdf43206bc06201fd724a18d (diff) | |
download | openvswitch-c803536e1cf531e29a31edfe740666ee6045d555.tar.gz |
datapath-windows: Kernel module for HyperV.
The kernel switch extension has support for bridged back forwarding & tunneling
over VXLAN. There is no Netlink integration as it is still being worked out.
Co-Authored-By: Ankur Sharma <ankursharma@vmware.com>
Signed-off-by: Ankur Sharma <ankursharma@vmware.com>
Co-Authored-By: Eitan Eliahu <eliahue@vmware.com>
Signed-off-by: Eitan Eliahu <eliahue@vmware.com>
Co-Authored-By: Guolin Yang <gyang@vmware.com>
Signed-off-by: Guolin Yang <gyang@vmware.com>
Co-Authored-By: Linda Sun <lsun@vmware.com>
Signed-off-by: Linda Sun <lsun@vmware.com>
Co-Authored-By: Nithin Raju <nithin@vmware.com>
Signed-off-by: Nithin Raju <nithin@vmware.com>
Signed-off-by: Saurabh Shah <ssaurabh@vmware.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Diffstat (limited to 'datapath-windows/ovsext')
48 files changed, 16356 insertions, 0 deletions
diff --git a/datapath-windows/ovsext/OvsActions.c b/datapath-windows/ovsext/OvsActions.c new file mode 100644 index 000000000..79fb50f07 --- /dev/null +++ b/datapath-windows/ovsext/OvsActions.c @@ -0,0 +1,1522 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" + +#include "OvsIoctl.h" +#include "OvsSwitch.h" +#include "OvsVport.h" +#include "OvsEvent.h" +#include "OvsUser.h" +#include "OvsNetProto.h" +#include "OvsFlow.h" +#include "OvsVxlan.h" +#include "OvsChecksum.h" +#include "OvsPacketIO.h" + + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_ACTION +#include "OvsDebug.h" + +typedef struct _OVS_ACTION_STATS { + UINT64 rxVxlan; + UINT64 txVxlan; + UINT64 flowMiss; + UINT64 flowUserspace; + UINT64 txTcp; + UINT32 failedFlowMiss; + UINT32 noVport; + UINT32 failedFlowExtract; + UINT32 noResource; + UINT32 noCopiedNbl; + UINT32 failedEncap; + UINT32 failedDecap; + UINT32 cannotGrowDest; + UINT32 zeroActionLen; + UINT32 failedChecksum; +} OVS_ACTION_STATS, *POVS_ACTION_STATS; + +OVS_ACTION_STATS ovsActionStats; + +/* + * There a lot of data that needs to be maintained while executing the pipeline + * as dictated by the actions of a flow, across different functions at different + * levels. Such data is put together in a 'context' structure. Care should be + * exercised while adding new members to the structure - only add ones that get + * used across multiple stages in the pipeline/get used in multiple functions. + */ +#define OVS_DEST_PORTS_ARRAY_MIN_SIZE 2 +typedef struct OvsForwardingContext { + POVS_SWITCH_CONTEXT switchContext; + /* The NBL currently used in the pipeline. */ + PNET_BUFFER_LIST curNbl; + /* NDIS forwarding detail for 'curNbl'. */ + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail; + /* Array of destination ports for 'curNbl'. */ + PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts; + /* send flags while sending 'curNbl' into NDIS. */ + ULONG sendFlags; + /* Total number of output ports, used + unused, in 'curNbl'. */ + UINT32 destPortsSizeIn; + /* Total number of used output ports in 'curNbl'. */ + UINT32 destPortsSizeOut; + /* + * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to + * be freed/completed. + */ + OvsCompletionList *completionList; + /* + * vport number of 'curNbl' when it is passed from the PIF bridge to the INT + * bridge. ie. during tunneling on the Rx side. + */ + UINT32 srcVportNo; + + /* + * Tunnel key: + * - specified in actions during tunneling Tx + * - extracted from an NBL during tunneling Rx + */ + OvsIPv4TunnelKey tunKey; + + /* + * Tunneling - Tx: + * To store the output port, when it is a tunneled port. We don't foresee + * multiple tunneled ports as outport for any given NBL. + */ + POVS_VPORT_ENTRY tunnelTxNic; + + /* + * Tunneling - Rx: + * Points to the Internal port on the PIF Bridge, if the packet needs to be + * de-tunneled. + */ + POVS_VPORT_ENTRY tunnelRxNic; + + /* header information */ + OVS_PACKET_HDR_INFO layers; +} OvsForwardingContext; + + +/* + * -------------------------------------------------------------------------- + * OvsInitForwardingCtx -- + * Function to init/re-init the 'ovsFwdCtx' context as the actions pipeline + * is being executed. + * + * Result: + * NDIS_STATUS_SUCCESS on success + * Other NDIS_STATUS upon failure. Upon failure, it is safe to call + * OvsCompleteNBLForwardingCtx(), since 'ovsFwdCtx' has been initialized + * enough for OvsCompleteNBLForwardingCtx() to do its work. + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsInitForwardingCtx(OvsForwardingContext *ovsFwdCtx, + POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + UINT32 srcVportNo, + ULONG sendFlags, + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail, + OvsCompletionList *completionList, + OVS_PACKET_HDR_INFO *layers, + BOOLEAN resetTunnelInfo) +{ + ASSERT(ovsFwdCtx); + ASSERT(switchContext); + ASSERT(curNbl); + ASSERT(fwdDetail); + + /* + * Set values for curNbl and switchContext so upon failures, we have enough + * information to do cleanup. + */ + ovsFwdCtx->curNbl = curNbl; + ovsFwdCtx->switchContext = switchContext; + ovsFwdCtx->completionList = completionList; + ovsFwdCtx->fwdDetail = fwdDetail; + + if (fwdDetail->NumAvailableDestinations > 0) { + /* + * XXX: even though MSDN says GetNetBufferListDestinations() returns + * NDIS_STATUS, the header files say otherwise. + */ + switchContext->NdisSwitchHandlers.GetNetBufferListDestinations( + switchContext->NdisSwitchContext, curNbl, + &ovsFwdCtx->destinationPorts); + + ASSERT(ovsFwdCtx->destinationPorts); + /* Ensure that none of the elements are consumed yet. */ + ASSERT(ovsFwdCtx->destinationPorts->NumElements == + fwdDetail->NumAvailableDestinations); + } else { + ovsFwdCtx->destinationPorts = NULL; + } + ovsFwdCtx->destPortsSizeIn = fwdDetail->NumAvailableDestinations; + ovsFwdCtx->destPortsSizeOut = 0; + ovsFwdCtx->srcVportNo = srcVportNo; + ovsFwdCtx->sendFlags = sendFlags; + if (layers) { + ovsFwdCtx->layers = *layers; + } else { + RtlZeroMemory(&ovsFwdCtx->layers, sizeof ovsFwdCtx->layers); + } + if (resetTunnelInfo) { + ovsFwdCtx->tunnelTxNic = NULL; + ovsFwdCtx->tunnelRxNic = NULL; + RtlZeroMemory(&ovsFwdCtx->tunKey, sizeof ovsFwdCtx->tunKey); + } + + return NDIS_STATUS_SUCCESS; +} + +/* + * -------------------------------------------------------------------------- + * OvsDetectTunnelRxPkt -- + * Utility function for an RX packet to detect its tunnel type. + * + * Result: + * True - if the tunnel type was detected. + * False - if not a tunnel packet or tunnel type not supported. + * -------------------------------------------------------------------------- + */ +static __inline BOOLEAN +OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx, + const OvsFlowKey *flowKey) +{ + POVS_VPORT_ENTRY tunnelVport = NULL; + + /* XXX: we should also check for the length of the UDP payload to pick + * packets only if they are at least VXLAN header size. + */ + if (!flowKey->ipKey.nwFrag && + flowKey->ipKey.nwProto == IPPROTO_UDP && + flowKey->ipKey.l4.tpDst == VXLAN_UDP_PORT_NBO) { + tunnelVport = OvsGetTunnelVport(OVSWIN_VPORT_TYPE_VXLAN); + ovsActionStats.rxVxlan++; + } + + // We might get tunnel packets even before the tunnel gets initialized. + if (tunnelVport) { + ASSERT(ovsFwdCtx->tunnelRxNic == NULL); + ovsFwdCtx->tunnelRxNic = tunnelVport; + return TRUE; + } + + return FALSE; +} + +/* + * -------------------------------------------------------------------------- + * OvsDetectTunnelPkt -- + * Utility function to detect the tunnel type of a TX/RX packet. + * + * Result: + * True - if the tunnel type was detected. + * False - if not a tunnel packet or tunnel type not supported. + * + * if result==True, the forwarding context gets initialized with the + * right tunnel vport. + * -------------------------------------------------------------------------- + */ +static __inline BOOLEAN +OvsDetectTunnelPkt(OvsForwardingContext *ovsFwdCtx, + const POVS_VPORT_ENTRY dstVport, + const OvsFlowKey *flowKey) +{ + /* + * The source of NBL during tunneling Rx could be the external port or if + * it being executed from userspace, the source port is default port. + */ + + if (OvsIsInternalVportType(dstVport->ovsType)) { + BOOLEAN validSrcPort = (ovsFwdCtx->fwdDetail->SourcePortId == + ovsFwdCtx->switchContext->externalPortId) + || (ovsFwdCtx->fwdDetail->SourcePortId == + NDIS_SWITCH_DEFAULT_PORT_ID); + + if (validSrcPort && OvsDetectTunnelRxPkt(ovsFwdCtx, flowKey)) { + ASSERT(ovsFwdCtx->tunnelTxNic == NULL); + ASSERT(ovsFwdCtx->tunnelRxNic != NULL); + return TRUE; + } + } else if (OvsIsTunnelVportType(dstVport->ovsType)) { + ASSERT(ovsFwdCtx->tunnelTxNic == NULL); + ASSERT(ovsFwdCtx->tunnelRxNic == NULL); + ASSERT(ovsFwdCtx->tunKey.dst != 0); + ovsActionStats.txVxlan++; + ovsFwdCtx->tunnelTxNic = dstVport; + return TRUE; + } + + return FALSE; +} + + +/* + * -------------------------------------------------------------------------- + * OvsAddPorts -- + * Add the specified destination vport into the forwarding context. If the + * vport is a VIF/external port, it is added directly to the NBL. If it is + * a tunneling port, it is NOT added to the NBL. + * + * Result: + * NDIS_STATUS_SUCCESS on success + * Other NDIS_STATUS upon failure. + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsAddPorts(OvsForwardingContext *ovsFwdCtx, + OvsFlowKey *flowKey, + NDIS_SWITCH_PORT_ID dstPortId, + BOOLEAN preserveVLAN, + BOOLEAN preservePriority) +{ + POVS_VPORT_ENTRY vport; + PNDIS_SWITCH_PORT_DESTINATION fwdPort; + NDIS_STATUS status; + POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext; + + /* + * We hold the dispatch lock that protects the list of vports, so vports + * validated here can be added as destinations safely before we call into + * NDIS. + * + * Some of the vports can be tunnelled ports as well in which case + * they should be added to a separate list of tunnelled destination ports + * instead of the VIF ports. The context for the tunnel is settable + * in OvsForwardingContext. + */ + vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext, dstPortId); + if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) { + /* + * There may be some latency between a port disappearing, and userspace + * updating the recalculated flows. In the meantime, handle invalid + * ports gracefully. + */ + ovsActionStats.noVport++; + return NDIS_STATUS_SUCCESS; + } + ASSERT(vport->nicState == NdisSwitchNicStateConnected); + vport->stats.txPackets++; + vport->stats.txBytes += + NET_BUFFER_DATA_LENGTH(NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl)); + + if (OvsDetectTunnelPkt(ovsFwdCtx, vport, flowKey)) { + ASSERT(ovsFwdCtx->tunnelTxNic || ovsFwdCtx->tunnelRxNic); + return NDIS_STATUS_SUCCESS; + } + + if (ovsFwdCtx->destPortsSizeOut == ovsFwdCtx->destPortsSizeIn) { + if (ovsFwdCtx->destPortsSizeIn == 0) { + ASSERT(ovsFwdCtx->destinationPorts == NULL); + ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0); + status = + switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations( + switchContext->NdisSwitchContext, ovsFwdCtx->curNbl, + OVS_DEST_PORTS_ARRAY_MIN_SIZE, + &ovsFwdCtx->destinationPorts); + if (status != NDIS_STATUS_SUCCESS) { + ovsActionStats.cannotGrowDest++; + return status; + } + ovsFwdCtx->destPortsSizeIn = + ovsFwdCtx->fwdDetail->NumAvailableDestinations; + ASSERT(ovsFwdCtx->destinationPorts); + } else { + ASSERT(ovsFwdCtx->destinationPorts != NULL); + /* + * NumElements: + * A ULONG value that specifies the total number of + * NDIS_SWITCH_PORT_DESTINATION elements in the + * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure. + * + * NumDestinations: + * A ULONG value that specifies the number of + * NDIS_SWITCH_PORT_DESTINATION elements in the + * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure that + * specify port destinations. + * + * NumAvailableDestinations: + * A value that specifies the number of unused extensible switch + * destination ports elements within an NET_BUFFER_LIST structure. + */ + ASSERT(ovsFwdCtx->destinationPorts->NumElements == + ovsFwdCtx->destPortsSizeIn); + ASSERT(ovsFwdCtx->destinationPorts->NumDestinations == + ovsFwdCtx->destPortsSizeOut - + ovsFwdCtx->fwdDetail->NumAvailableDestinations); + ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations > 0); + /* + * Before we grow the array of destination ports, the current set + * of ports needs to be committed. Only the ports added since the + * last commit need to be part of the new update. + */ + status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations( + switchContext->NdisSwitchContext, ovsFwdCtx->curNbl, + ovsFwdCtx->fwdDetail->NumAvailableDestinations, + ovsFwdCtx->destinationPorts); + if (status != NDIS_STATUS_SUCCESS) { + ovsActionStats.cannotGrowDest++; + return status; + } + ASSERT(ovsFwdCtx->destinationPorts->NumElements == + ovsFwdCtx->destPortsSizeIn); + ASSERT(ovsFwdCtx->destinationPorts->NumDestinations == + ovsFwdCtx->destPortsSizeOut); + ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0); + + status = switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations( + switchContext->NdisSwitchContext, ovsFwdCtx->curNbl, + ovsFwdCtx->destPortsSizeIn, &ovsFwdCtx->destinationPorts); + if (status != NDIS_STATUS_SUCCESS) { + ovsActionStats.cannotGrowDest++; + return status; + } + ASSERT(ovsFwdCtx->destinationPorts != NULL); + ovsFwdCtx->destPortsSizeIn <<= 1; + } + } + + ASSERT(ovsFwdCtx->destPortsSizeOut < ovsFwdCtx->destPortsSizeIn); + fwdPort = + NDIS_SWITCH_PORT_DESTINATION_AT_ARRAY_INDEX(ovsFwdCtx->destinationPorts, + ovsFwdCtx->destPortsSizeOut); + + fwdPort->PortId = vport->portId; + fwdPort->NicIndex = vport->nicIndex; + fwdPort->IsExcluded = 0; + fwdPort->PreserveVLAN = preserveVLAN; + fwdPort->PreservePriority = preservePriority; + ovsFwdCtx->destPortsSizeOut += 1; + + return NDIS_STATUS_SUCCESS; +} + + +/* + * -------------------------------------------------------------------------- + * OvsClearTunTxCtx -- + * Utility function to clear tx tunneling context. + * -------------------------------------------------------------------------- + */ +static __inline VOID +OvsClearTunTxCtx(OvsForwardingContext *ovsFwdCtx) +{ + ovsFwdCtx->tunnelTxNic = NULL; + ovsFwdCtx->tunKey.dst = 0; +} + + +/* + * -------------------------------------------------------------------------- + * OvsClearTunRxCtx -- + * Utility function to clear rx tunneling context. + * -------------------------------------------------------------------------- + */ +static __inline VOID +OvsClearTunRxCtx(OvsForwardingContext *ovsFwdCtx) +{ + ovsFwdCtx->tunnelRxNic = NULL; + ovsFwdCtx->tunKey.dst = 0; +} + + +/* + * -------------------------------------------------------------------------- + * OvsCompleteNBLForwardingCtx -- + * This utility function is responsible for freeing/completing an NBL - either + * by adding it to a completion list or by freeing it. + * + * Side effects: + * It also resets the necessary fields in 'ovsFwdCtx'. + * -------------------------------------------------------------------------- + */ +static __inline VOID +OvsCompleteNBLForwardingCtx(OvsForwardingContext *ovsFwdCtx, + PCWSTR dropReason) +{ + NDIS_STRING filterReason; + + RtlInitUnicodeString(&filterReason, dropReason); + if (ovsFwdCtx->completionList) { + OvsAddPktCompletionList(ovsFwdCtx->completionList, TRUE, + ovsFwdCtx->fwdDetail->SourcePortId, ovsFwdCtx->curNbl, 1, + &filterReason); + ovsFwdCtx->curNbl = NULL; + } else { + /* If there is no completionList, we assume this is ovs created NBL */ + ovsFwdCtx->curNbl = OvsCompleteNBL(ovsFwdCtx->switchContext, + ovsFwdCtx->curNbl, TRUE); + ASSERT(ovsFwdCtx->curNbl == NULL); + } + /* XXX: these can be made debug only to save cycles. Ideally the pipeline + * using these fields should reset the values at the end of the pipeline. */ + ovsFwdCtx->destPortsSizeOut = 0; + ovsFwdCtx->tunnelTxNic = NULL; + ovsFwdCtx->tunnelRxNic = NULL; +} + +/* + * -------------------------------------------------------------------------- + * OvsDoFlowLookupOutput -- + * Function to be used for the second stage of a tunneling workflow, ie.: + * - On the encapsulated packet on Tx path, to do a flow extract, flow + * lookup and excuting the actions. + * - On the decapsulated packet on Rx path, to do a flow extract, flow + * lookup and excuting the actions. + * + * XXX: It is assumed that the NBL in 'ovsFwdCtx' is owned by OVS. This is + * until the new buffer management framework is adopted. + * + * Side effects: + * The NBL in 'ovsFwdCtx' is consumed. + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx) +{ + OvsFlowKey key; + OvsFlow *flow; + UINT64 hash; + NDIS_STATUS status; + POVS_VPORT_ENTRY vport = + OvsFindVportByPortNo(ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo); + if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) { + ASSERT(FALSE); // XXX: let's catch this for now + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"OVS-Dropped due to internal/tunnel port removal"); + ovsActionStats.noVport++; + return NDIS_STATUS_SUCCESS; + } + ASSERT(vport->nicState == NdisSwitchNicStateConnected); + + /* Assert that in the Rx direction, key is always setup. */ + ASSERT(ovsFwdCtx->tunnelRxNic == NULL || ovsFwdCtx->tunKey.dst != 0); + status = OvsExtractFlow(ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo, + &key, &ovsFwdCtx->layers, ovsFwdCtx->tunKey.dst != 0 ? + &ovsFwdCtx->tunKey : NULL); + if (status != NDIS_STATUS_SUCCESS) { + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"OVS-Flow extract failed"); + ovsActionStats.failedFlowExtract++; + return status; + } + + flow = OvsLookupFlow(&ovsFwdCtx->switchContext->datapath, &key, &hash, FALSE); + if (flow) { + OvsFlowUsed(flow, ovsFwdCtx->curNbl, &ovsFwdCtx->layers); + ovsFwdCtx->switchContext->datapath.hits++; + status = OvsActionsExecute(ovsFwdCtx->switchContext, + ovsFwdCtx->completionList, ovsFwdCtx->curNbl, + ovsFwdCtx->srcVportNo, ovsFwdCtx->sendFlags, + &key, &hash, &ovsFwdCtx->layers, + flow->actions, flow->actionsLen); + ovsFwdCtx->curNbl = NULL; + } else { + LIST_ENTRY missedPackets; + UINT32 num = 0; + ovsFwdCtx->switchContext->datapath.misses++; + InitializeListHead(&missedPackets); + status = OvsCreateAndAddPackets( + OVS_DEFAULT_PACKET_QUEUE, NULL, 0, OVS_PACKET_CMD_MISS, + ovsFwdCtx->srcVportNo, + key.tunKey.dst != 0 ? + (OvsIPv4TunnelKey *)&key.tunKey : NULL, + ovsFwdCtx->curNbl, + ovsFwdCtx->tunnelRxNic != NULL, &ovsFwdCtx->layers, + ovsFwdCtx->switchContext, &missedPackets, &num); + if (num) { + OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, num); + } + if (status == NDIS_STATUS_SUCCESS) { + /* Complete the packet since it was copied to user buffer. */ + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"OVS-Dropped since packet was copied to userspace"); + ovsActionStats.flowMiss++; + status = NDIS_STATUS_SUCCESS; + } else { + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"OVS-Dropped due to failure to queue to userspace"); + status = NDIS_STATUS_FAILURE; + ovsActionStats.failedFlowMiss++; + } + } + + return status; +} + +/* + * -------------------------------------------------------------------------- + * OvsTunnelPortTx -- + * The start function for Tx tunneling - encapsulates the packet, and + * outputs the packet on the PIF bridge. + * + * Side effects: + * The NBL in 'ovsFwdCtx' is consumed. + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx) +{ + NDIS_STATUS status = NDIS_STATUS_FAILURE; + PNET_BUFFER_LIST newNbl = NULL; + + /* + * Setup the source port to be the internal port to as to facilitate the + * second OvsLookupFlow. + */ + ASSERT(ovsFwdCtx->switchContext->internalVport); + ovsFwdCtx->srcVportNo = + ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->portNo; + + ovsFwdCtx->fwdDetail->SourcePortId = ovsFwdCtx->switchContext->internalPortId; + ovsFwdCtx->fwdDetail->SourceNicIndex = + ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->nicIndex; + + /* Do the encap. Encap function does not consume the NBL. */ + switch(ovsFwdCtx->tunnelTxNic->ovsType) { + case OVSWIN_VPORT_TYPE_VXLAN: + status = OvsEncapVxlan(ovsFwdCtx->curNbl, &ovsFwdCtx->tunKey, + ovsFwdCtx->switchContext, + (VOID *)ovsFwdCtx->completionList, + &ovsFwdCtx->layers, &newNbl); + break; + default: + ASSERT(! "Tx: Unhandled tunnel type"); + } + + /* Reset the tunnel context so that it doesn't get used after this point. */ + OvsClearTunTxCtx(ovsFwdCtx); + + if (status == NDIS_STATUS_SUCCESS) { + ASSERT(newNbl); + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"Complete after cloning NBL for encapsulation"); + ovsFwdCtx->curNbl = newNbl; + status = OvsDoFlowLookupOutput(ovsFwdCtx); + ASSERT(ovsFwdCtx->curNbl == NULL); + } else { + /* + * XXX: Temporary freeing of the packet until we register a + * callback to IP helper. + */ + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"OVS-Dropped due to encap failure"); + ovsActionStats.failedEncap++; + status = NDIS_STATUS_SUCCESS; + } + + return status; +} + +/* + * -------------------------------------------------------------------------- + * OvsTunnelPortRx -- + * Decapsulate the incoming NBL based on the tunnel type and goes through + * the flow lookup for the inner packet. + * + * Note: IP checksum is validate here, but L4 checksum validation needs + * to be done by the corresponding tunnel types. + * + * Side effects: + * The NBL in 'ovsFwdCtx' is consumed. + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + PNET_BUFFER_LIST newNbl = NULL; + POVS_VPORT_ENTRY tunnelRxVport = ovsFwdCtx->tunnelRxNic; + + if (OvsValidateIPChecksum(ovsFwdCtx->curNbl, &ovsFwdCtx->layers) + != NDIS_STATUS_SUCCESS) { + ovsActionStats.failedChecksum++; + OVS_LOG_INFO("Packet dropped due to IP checksum failure."); + goto dropNbl; + } + + switch(tunnelRxVport->ovsType) { + case OVSWIN_VPORT_TYPE_VXLAN: + /* + * OvsDoDecapVxlan should return a new NBL if it was copied, and + * this new NBL should be setup as the ovsFwdCtx->curNbl. + */ + status = OvsDoDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + &ovsFwdCtx->tunKey, &newNbl); + break; + default: + OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n", + tunnelRxVport->ovsType); + ASSERT(! "Rx: Unhandled tunnel type"); + status = NDIS_STATUS_NOT_SUPPORTED; + } + + if (status != NDIS_STATUS_SUCCESS) { + ovsActionStats.failedDecap++; + goto dropNbl; + } + + /* + * tunnelRxNic and other fields will be cleared, re-init the context + * before usage. + */ + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"OVS-dropped due to new decap packet"); + + /* Decapsulated packet is in a new NBL */ + ovsFwdCtx->tunnelRxNic = tunnelRxVport; + OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, + newNbl, tunnelRxVport->portNo, 0, + NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), + ovsFwdCtx->completionList, + &ovsFwdCtx->layers, FALSE); + + /* + * Set the NBL's SourcePortId and SourceNicIndex to default values to + * keep NDIS happy when we forward the packet. + */ + ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; + ovsFwdCtx->fwdDetail->SourceNicIndex = 0; + + status = OvsDoFlowLookupOutput(ovsFwdCtx); + ASSERT(ovsFwdCtx->curNbl == NULL); + OvsClearTunRxCtx(ovsFwdCtx); + + return status; + +dropNbl: + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"OVS-dropped due to decap failure"); + OvsClearTunRxCtx(ovsFwdCtx); + return status; +} + + +/* + * -------------------------------------------------------------------------- + * OvsOutputForwardingCtx -- + * This function outputs an NBL to NDIS or to a tunneling pipeline based on + * the ports added so far into 'ovsFwdCtx'. + * + * Side effects: + * This function consumes the NBL - either by forwarding it successfully to + * NDIS, or adding it to the completion list in 'ovsFwdCtx', or freeing it. + * + * Also makes sure that the list of destination ports - tunnel or otherwise is + * drained. + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsOutputForwardingCtx(OvsForwardingContext *ovsFwdCtx) +{ + NDIS_STATUS status = STATUS_SUCCESS; + POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext; + + /* + * Handle the case where the some of the destination ports are tunneled + * ports - the non-tunneled ports get a unmodified copy of the NBL, and the + * tunneling pipeline starts when we output the packet to tunneled port. + */ + if (ovsFwdCtx->destPortsSizeOut > 0) { + PNET_BUFFER_LIST newNbl = NULL; + PNET_BUFFER nb; + UINT32 portsToUpdate = + ovsFwdCtx->fwdDetail->NumAvailableDestinations - + (ovsFwdCtx->destPortsSizeIn - ovsFwdCtx->destPortsSizeOut); + + ASSERT(ovsFwdCtx->destinationPorts != NULL); + + /* + * Create a copy of the packet in order to do encap on it later. Also, + * don't copy the offload context since the encap'd packet has a + * different set of headers. This will change when we implement offloads + * before doing encapsulation. + */ + if (ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL) { + nb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); + newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + 0, 0, TRUE /*copy NBL info*/); + if (newNbl == NULL) { + status = NDIS_STATUS_RESOURCES; + ovsActionStats.noCopiedNbl++; + goto dropit; + } + } + + /* It does not seem like we'll get here unless 'portsToUpdate' > 0. */ + ASSERT(portsToUpdate > 0); + status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations( + switchContext->NdisSwitchContext, ovsFwdCtx->curNbl, + portsToUpdate, ovsFwdCtx->destinationPorts); + if (status != NDIS_STATUS_SUCCESS) { + OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE); + ovsActionStats.cannotGrowDest++; + goto dropit; + } + + OvsSendNBLIngress(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + ovsFwdCtx->sendFlags); + /* End this pipeline by resetting the corresponding context. */ + ovsFwdCtx->destPortsSizeOut = 0; + ovsFwdCtx->curNbl = NULL; + if (newNbl) { + status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, + newNbl, ovsFwdCtx->srcVportNo, 0, + NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), + ovsFwdCtx->completionList, + &ovsFwdCtx->layers, FALSE); + if (status != NDIS_STATUS_SUCCESS) { + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"Dropped due to resouces"); + goto dropit; + } + } + } + + if (ovsFwdCtx->tunnelTxNic != NULL) { + status = OvsTunnelPortTx(ovsFwdCtx); + ASSERT(ovsFwdCtx->tunnelTxNic == NULL); + ASSERT(ovsFwdCtx->tunKey.dst == 0); + } else if (ovsFwdCtx->tunnelRxNic != NULL) { + status = OvsTunnelPortRx(ovsFwdCtx); + ASSERT(ovsFwdCtx->tunnelRxNic == NULL); + ASSERT(ovsFwdCtx->tunKey.dst == 0); + } + ASSERT(ovsFwdCtx->curNbl == NULL); + + return status; + +dropit: + if (status != NDIS_STATUS_SUCCESS) { + OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"Dropped due to XXX"); + } + + return status; +} + + +/* + * -------------------------------------------------------------------------- + * OvsLookupFlowOutput -- + * Utility function for external callers to do flow extract, lookup, + * actions execute on a given NBL. + * + * Note: If this is being used from a callback function, make sure that the + * arguments specified are still valid in the asynchronous context. + * + * Side effects: + * This function consumes the NBL. + * -------------------------------------------------------------------------- + */ +VOID +OvsLookupFlowOutput(POVS_SWITCH_CONTEXT switchContext, + VOID *compList, + PNET_BUFFER_LIST curNbl) +{ + NDIS_STATUS status; + OvsForwardingContext ovsFwdCtx; + POVS_VPORT_ENTRY internalVport = + (POVS_VPORT_ENTRY)switchContext->internalVport; + + /* XXX: make sure comp list was not a stack variable previously. */ + OvsCompletionList *completionList = (OvsCompletionList *)compList; + + /* + * XXX: can internal port disappear while we are busy doing ARP resolution? + * It could, but will we get this callback from IP helper in that case. Need + * to check. + */ + ASSERT(switchContext->internalVport); + status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, + internalVport->portNo, 0, + NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl), + completionList, NULL, TRUE); + if (status != NDIS_STATUS_SUCCESS) { + OvsCompleteNBLForwardingCtx(&ovsFwdCtx, + L"OVS-Dropped due to resources"); + return; + } + + ASSERT(FALSE); + /* + * XXX: We need to acquire the dispatch lock and the datapath lock. + */ + + OvsDoFlowLookupOutput(&ovsFwdCtx); +} + + +/* + * -------------------------------------------------------------------------- + * OvsOutputBeforeSetAction -- + * Function to be called to complete one set of actions on an NBL, before + * we start the next one. + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsOutputBeforeSetAction(OvsForwardingContext *ovsFwdCtx) +{ + PNET_BUFFER_LIST newNbl; + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + PNET_BUFFER nb; + + /* + * Create a copy and work on the copy after this point. The original NBL is + * forwarded. One reason to not use the copy for forwarding is that + * ports have already been added to the original NBL, and it might be + * inefficient/impossible to remove/re-add them to the copy. There's no + * notion of removing the ports, the ports need to be marked as + * "isExcluded". There's seems no real advantage to retaining the original + * and sending out the copy instead. + * + * XXX: We are copying the offload context here. This is to handle actions + * such as: + * outport, pop_vlan(), outport, push_vlan(), outport + * + * copy size needs to include inner ether + IP + TCP, need to revisit + * if we support IP options. + * XXX Head room needs to include the additional encap. + * XXX copySize check is not considering multiple NBs. + */ + nb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); + newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + 0, 0, TRUE /*copy NBL info*/); + + ASSERT(ovsFwdCtx->destPortsSizeOut > 0 || + ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL); + + /* Send the original packet out */ + status = OvsOutputForwardingCtx(ovsFwdCtx); + ASSERT(ovsFwdCtx->curNbl == NULL); + ASSERT(ovsFwdCtx->destPortsSizeOut == 0); + ASSERT(ovsFwdCtx->tunnelRxNic == NULL); + ASSERT(ovsFwdCtx->tunnelTxNic == NULL); + + /* If we didn't make a copy, can't continue. */ + if (newNbl == NULL) { + ovsActionStats.noCopiedNbl++; + return NDIS_STATUS_RESOURCES; + } + + /* Finish the remaining actions with the new NBL */ + if (status != NDIS_STATUS_SUCCESS) { + OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE); + } else { + status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, + newNbl, ovsFwdCtx->srcVportNo, 0, + NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), + ovsFwdCtx->completionList, + &ovsFwdCtx->layers, FALSE); + } + + return status; +} + + +/* + * -------------------------------------------------------------------------- + * OvsPopVlanInPktBuf -- + * Function to pop a VLAN tag when the tag is in the packet buffer. + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx) +{ + PNET_BUFFER curNb; + PMDL curMdl; + PUINT8 bufferStart; + ULONG dataLength = sizeof (DL_EUI48) + sizeof (DL_EUI48); + UINT32 packetLen, mdlLen; + PNET_BUFFER_LIST newNbl; + NDIS_STATUS status; + + /* + * Declare a dummy vlanTag structure since we need to compute the size + * of shiftLength. The NDIS one is a unionized structure. + */ + NDIS_PACKET_8021Q_INFO vlanTag = {0}; + ULONG shiftLength = sizeof (vlanTag.TagHeader); + PUINT8 tempBuffer[sizeof (DL_EUI48) + sizeof (DL_EUI48)]; + + newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + 0, 0, TRUE /* copy NBL info */); + if (!newNbl) { + ovsActionStats.noCopiedNbl++; + return NDIS_STATUS_RESOURCES; + } + + /* Complete the original NBL and create a copy to modify. */ + OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"OVS-Dropped due to copy"); + + status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, + newNbl, ovsFwdCtx->srcVportNo, 0, + NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), + NULL, &ovsFwdCtx->layers, FALSE); + if (status != NDIS_STATUS_SUCCESS) { + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"Dropped due to resouces"); + return NDIS_STATUS_RESOURCES; + } + + curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); + packetLen = NET_BUFFER_DATA_LENGTH(curNb); + ASSERT(curNb->Next == NULL); + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority); + if (!bufferStart) { + return NDIS_STATUS_RESOURCES; + } + mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + /* Bail out if L2 + VLAN header is not contiguous in the first buffer. */ + if (MIN(packetLen, mdlLen) < sizeof (EthHdr) + shiftLength) { + ASSERT(FALSE); + return NDIS_STATUS_FAILURE; + } + bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + RtlCopyMemory(tempBuffer, bufferStart, dataLength); + RtlCopyMemory(bufferStart + shiftLength, tempBuffer, dataLength); + NdisAdvanceNetBufferDataStart(curNb, shiftLength, FALSE, NULL); + + return NDIS_STATUS_SUCCESS; +} + +/* + * -------------------------------------------------------------------------- + * OvsTunnelAttrToIPv4TunnelKey -- + * Convert tunnel attribute to OvsIPv4TunnelKey. + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsTunnelAttrToIPv4TunnelKey(struct nlattr *attr, + OvsIPv4TunnelKey *tunKey) +{ + struct nlattr *a; + INT rem; + + tunKey->attr[0] = 0; + tunKey->attr[1] = 0; + tunKey->attr[2] = 0; + ASSERT(nl_attr_type(attr) == OVS_KEY_ATTR_TUNNEL); + + NL_ATTR_FOR_EACH_UNSAFE (a, rem, nl_attr_data(attr), + nl_attr_get_size(attr)) { + switch (nl_attr_type(a)) { + case OVS_TUNNEL_KEY_ATTR_ID: + tunKey->tunnelId = nl_attr_get_be64(a); + tunKey->flags |= OVS_TNL_F_KEY; + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: + tunKey->src = nl_attr_get_be32(a); + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_DST: + tunKey->dst = nl_attr_get_be32(a); + break; + case OVS_TUNNEL_KEY_ATTR_TOS: + tunKey->tos = nl_attr_get_u8(a); + break; + case OVS_TUNNEL_KEY_ATTR_TTL: + tunKey->ttl = nl_attr_get_u8(a); + break; + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: + tunKey->flags |= OVS_TNL_F_DONT_FRAGMENT; + break; + case OVS_TUNNEL_KEY_ATTR_CSUM: + tunKey->flags |= OVS_TNL_F_CSUM; + break; + default: + ASSERT(0); + } + } + + return NDIS_STATUS_SUCCESS; +} + +/* + *---------------------------------------------------------------------------- + * OvsUpdateEthHeader -- + * Updates the ethernet header in ovsFwdCtx.curNbl inline based on the + * specified key. + *---------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsUpdateEthHeader(OvsForwardingContext *ovsFwdCtx, + const struct ovs_key_ethernet *ethAttr) +{ + PNET_BUFFER curNb; + PMDL curMdl; + PUINT8 bufferStart; + EthHdr *ethHdr; + UINT32 packetLen, mdlLen; + + curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); + ASSERT(curNb->Next == NULL); + packetLen = NET_BUFFER_DATA_LENGTH(curNb); + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority); + if (!bufferStart) { + ovsActionStats.noResource++; + return NDIS_STATUS_RESOURCES; + } + mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + ASSERT(mdlLen > 0); + /* Bail out if the L2 header is not in a contiguous buffer. */ + if (MIN(packetLen, mdlLen) < sizeof *ethHdr) { + ASSERT(FALSE); + return NDIS_STATUS_FAILURE; + } + ethHdr = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(curNb)); + + RtlCopyMemory(ethHdr->Destination, ethAttr->eth_dst, + sizeof ethHdr->Destination); + RtlCopyMemory(ethHdr->Source, ethAttr->eth_src, sizeof ethHdr->Source); + + return NDIS_STATUS_SUCCESS; +} + +/* + *---------------------------------------------------------------------------- + * OvsUpdateIPv4Header -- + * Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the + * specified key. + *---------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, + const struct ovs_key_ipv4 *ipAttr) +{ + PNET_BUFFER curNb; + PMDL curMdl; + ULONG curMdlOffset; + PUINT8 bufferStart; + UINT32 mdlLen, hdrSize, packetLen; + OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers; + NDIS_STATUS status; + IPHdr *ipHdr; + TCPHdr *tcpHdr = NULL; + UDPHdr *udpHdr = NULL; + + ASSERT(layers->value != 0); + + /* + * Peek into the MDL to get a handle to the IP header and if required + * the TCP/UDP header as well. We check if the required headers are in one + * contiguous MDL, and if not, we copy them over to one MDL. + */ + curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); + ASSERT(curNb->Next == NULL); + packetLen = NET_BUFFER_DATA_LENGTH(curNb); + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority); + if (!bufferStart) { + ovsActionStats.noResource++; + return NDIS_STATUS_RESOURCES; + } + curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + mdlLen -= curMdlOffset; + ASSERT((INT)mdlLen >= 0); + + if (layers->isTcp || layers->isUdp) { + hdrSize = layers->l4Offset + + layers->isTcp ? sizeof (*tcpHdr) : sizeof (*udpHdr); + } else { + hdrSize = layers->l3Offset + sizeof (*ipHdr); + } + + /* Count of number of bytes of valid data there are in the first MDL. */ + mdlLen = MIN(packetLen, mdlLen); + if (mdlLen < hdrSize) { + PNET_BUFFER_LIST newNbl; + newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + hdrSize, 0, TRUE /*copy NBL info*/); + if (!newNbl) { + ovsActionStats.noCopiedNbl++; + return NDIS_STATUS_RESOURCES; + } + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"Complete after partial copy."); + + status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, + newNbl, ovsFwdCtx->srcVportNo, 0, + NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), + NULL, &ovsFwdCtx->layers, FALSE); + if (status != NDIS_STATUS_SUCCESS) { + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"OVS-Dropped due to resources"); + return NDIS_STATUS_RESOURCES; + } + + curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); + ASSERT(curNb->Next == NULL); + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority); + if (!curMdl) { + ovsActionStats.noResource++; + return NDIS_STATUS_RESOURCES; + } + curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + mdlLen -= curMdlOffset; + ASSERT(mdlLen >= hdrSize); + } + + ipHdr = (IPHdr *)(bufferStart + curMdlOffset + layers->l3Offset); + + if (layers->isTcp) { + tcpHdr = (TCPHdr *)(bufferStart + curMdlOffset + layers->l4Offset); + } else if (layers->isUdp) { + udpHdr = (UDPHdr *)(bufferStart + curMdlOffset + layers->l4Offset); + } + + /* + * Adjust the IP header inline as dictated by the action, nad also update + * the IP and the TCP checksum for the data modified. + * + * In the future, this could be optimized to make one call to + * ChecksumUpdate32(). Ignoring this for now, since for the most common + * case, we only update the TTL. + */ + if (ipHdr->saddr != ipAttr->ipv4_src) { + if (tcpHdr) { + tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->saddr, + ipAttr->ipv4_src); + } else if (udpHdr && udpHdr->check) { + udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->saddr, + ipAttr->ipv4_src); + } + + if (ipHdr->check != 0) { + ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->saddr, + ipAttr->ipv4_src); + } + ipHdr->saddr = ipAttr->ipv4_src; + } + if (ipHdr->daddr != ipAttr->ipv4_dst) { + if (tcpHdr) { + tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->daddr, + ipAttr->ipv4_dst); + } else if (udpHdr && udpHdr->check) { + udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->daddr, + ipAttr->ipv4_dst); + } + + if (ipHdr->check != 0) { + ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->daddr, + ipAttr->ipv4_dst); + } + ipHdr->daddr = ipAttr->ipv4_dst; + } + if (ipHdr->protocol != ipAttr->ipv4_proto) { + UINT16 oldProto = (ipHdr->protocol << 16) & 0xff00; + UINT16 newProto = (ipAttr->ipv4_proto << 16) & 0xff00; + if (tcpHdr) { + tcpHdr->check = ChecksumUpdate16(tcpHdr->check, oldProto, newProto); + } else if (udpHdr && udpHdr->check) { + udpHdr->check = ChecksumUpdate16(udpHdr->check, oldProto, newProto); + } + + if (ipHdr->check != 0) { + ipHdr->check = ChecksumUpdate16(ipHdr->check, oldProto, newProto); + } + ipHdr->protocol = ipAttr->ipv4_proto; + } + if (ipHdr->ttl != ipAttr->ipv4_ttl) { + UINT16 oldTtl = (ipHdr->ttl) & 0xff; + UINT16 newTtl = (ipAttr->ipv4_ttl) & 0xff; + if (ipHdr->check != 0) { + ipHdr->check = ChecksumUpdate16(ipHdr->check, oldTtl, newTtl); + } + ipHdr->ttl = ipAttr->ipv4_ttl; + } + + return NDIS_STATUS_SUCCESS; +} + +/* + * -------------------------------------------------------------------------- + * OvsExecuteSetAction -- + * Executes a set() action, but storing the actions into 'ovsFwdCtx' + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx, + OvsFlowKey *key, + UINT64 *hash, + const struct nlattr *a) +{ + enum ovs_key_attr type = nl_attr_type(a); + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + + switch (type) { + case OVS_KEY_ATTR_ETHERNET: + status = OvsUpdateEthHeader(ovsFwdCtx, + nl_attr_get_unspec(a, sizeof(struct ovs_key_ethernet))); + break; + + case OVS_KEY_ATTR_IPV4: + status = OvsUpdateIPv4Header(ovsFwdCtx, + nl_attr_get_unspec(a, sizeof(struct ovs_key_ipv4))); + break; + + case OVS_KEY_ATTR_TUNNEL: + { + OvsIPv4TunnelKey tunKey; + + status = OvsTunnelAttrToIPv4TunnelKey((struct nlattr *)a, &tunKey); + ASSERT(status == NDIS_STATUS_SUCCESS); + tunKey.flow_hash = (uint16)(hash ? *hash : OvsHashFlow(key)); + RtlCopyMemory(&ovsFwdCtx->tunKey, &tunKey, sizeof ovsFwdCtx->tunKey); + + break; + } + case OVS_KEY_ATTR_SKB_MARK: + /* XXX: Not relevant to Hyper-V. Return OK */ + break; + case OVS_KEY_ATTR_UNSPEC: + case OVS_KEY_ATTR_ENCAP: + case OVS_KEY_ATTR_ETHERTYPE: + case OVS_KEY_ATTR_IN_PORT: + case OVS_KEY_ATTR_VLAN: + case OVS_KEY_ATTR_ICMP: + case OVS_KEY_ATTR_ICMPV6: + case OVS_KEY_ATTR_ARP: + case OVS_KEY_ATTR_ND: + case __OVS_KEY_ATTR_MAX: + default: + OVS_LOG_INFO("Unhandled attribute %#x", type); + ASSERT(FALSE); + } + return status; +} + +/* + * -------------------------------------------------------------------------- + * OvsActionsExecute -- + * Interpret and execute the specified 'actions' on the specifed packet + * 'curNbl'. The expectation is that if the packet needs to be dropped + * (completed) for some reason, it is added to 'completionList' so that the + * caller can complete the packet. If 'completionList' is NULL, the NBL is + * assumed to be generated by OVS and freed up. Otherwise, the function + * consumes the NBL by generating a NDIS send indication for the packet. + * + * There are one or more of "clone" NBLs that may get generated while + * executing the actions. Upon any failures, the "cloned" NBLs are freed up, + * and the caller does not have to worry about them. + * + * Success or failure is returned based on whether the specified actions + * were executed successfully on the packet or not. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext, + OvsCompletionList *completionList, + PNET_BUFFER_LIST curNbl, + UINT32 portNo, + ULONG sendFlags, + OvsFlowKey *key, + UINT64 *hash, + OVS_PACKET_HDR_INFO *layers, + const struct nlattr *actions, + INT actionsLen) +{ + const struct nlattr *a; + INT rem; + UINT32 dstPortID; + OvsForwardingContext ovsFwdCtx; + PCWSTR dropReason = L""; + NDIS_STATUS status; + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail = + NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); + + /* XXX: ASSERT that the flow table lock is held. */ + status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, portNo, + sendFlags, fwdDetail, completionList, + layers, TRUE); + if (status != NDIS_STATUS_SUCCESS) { + dropReason = L"OVS-initing destination port list failed"; + goto dropit; + } + + if (actionsLen == 0) { + dropReason = L"OVS-Dropped due to Flow action"; + ovsActionStats.zeroActionLen++; + goto dropit; + } + + NL_ATTR_FOR_EACH_UNSAFE (a, rem, actions, actionsLen) { + switch(nl_attr_type(a)) { + case OVS_ACTION_ATTR_OUTPUT: + dstPortID = nl_attr_get_u32(a); + status = OvsAddPorts(&ovsFwdCtx, key, dstPortID, + TRUE, TRUE); + if (status != NDIS_STATUS_SUCCESS) { + dropReason = L"OVS-adding destination port failed"; + goto dropit; + } + break; + + case OVS_ACTION_ATTR_PUSH_VLAN: + { + struct ovs_action_push_vlan *vlan; + PVOID vlanTagValue; + PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag; + + if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL + || ovsFwdCtx.tunnelRxNic != NULL) { + status = OvsOutputBeforeSetAction(&ovsFwdCtx); + if (status != NDIS_STATUS_SUCCESS) { + dropReason = L"OVS-adding destination failed"; + goto dropit; + } + } + + vlanTagValue = NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl, + Ieee8021QNetBufferListInfo); + if (vlanTagValue != NULL) { + /* + * XXX: We don't support double VLAN tag offload. In such cases, + * we need to insert the existing one into the packet buffer, + * and add the new one as offload. This will take care of + * guest tag-in-tag case as well as OVS rules that specify + * tag-in-tag. + */ + } else { + vlanTagValue = 0; + vlanTag = (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue; + vlan = (struct ovs_action_push_vlan *)nl_attr_get(a); + vlanTag->TagHeader.VlanId = ntohs(vlan->vlan_tci) & 0xfff; + vlanTag->TagHeader.UserPriority = ntohs(vlan->vlan_tci) >> 13; + + NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl, + Ieee8021QNetBufferListInfo) = vlanTagValue; + } + break; + } + + case OVS_ACTION_ATTR_POP_VLAN: + { + if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL + || ovsFwdCtx.tunnelRxNic != NULL) { + status = OvsOutputBeforeSetAction(&ovsFwdCtx); + if (status != NDIS_STATUS_SUCCESS) { + dropReason = L"OVS-adding destination failed"; + goto dropit; + } + } + + if (NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl, + Ieee8021QNetBufferListInfo) != 0) { + NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl, + Ieee8021QNetBufferListInfo) = 0; + } else { + /* + * The VLAN tag is inserted into the packet buffer. Pop the tag + * by packet buffer modification. + */ + status = OvsPopVlanInPktBuf(&ovsFwdCtx); + if (status != NDIS_STATUS_SUCCESS) { + dropReason = L"OVS-pop vlan action failed"; + goto dropit; + } + } + break; + } + + case OVS_ACTION_ATTR_USERSPACE: + { + const struct nlattr *userdata_attr; + const struct nlattr *queue_attr; + POVS_PACKET_QUEUE_ELEM elem; + UINT32 queueId = OVS_DEFAULT_PACKET_QUEUE; + //XXX confusing that portNo is actually portId for external port. + BOOLEAN isRecv = (portNo == switchContext->externalPortId) + || OvsIsTunnelVportNo(portNo); + + queue_attr = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_PID); + userdata_attr = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA); + + elem = OvsCreateQueuePacket(queueId, (PVOID)userdata_attr, + userdata_attr->nla_len, + OVS_PACKET_CMD_ACTION, + portNo, (OvsIPv4TunnelKey *)&key->tunKey, + ovsFwdCtx.curNbl, + NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx.curNbl), + isRecv, + layers); + if (elem) { + LIST_ENTRY missedPackets; + InitializeListHead(&missedPackets); + InsertTailList(&missedPackets, &elem->link); + OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, 1); + dropReason = L"OVS-Completed since packet was copied to " + L"userspace"; + } else { + dropReason = L"OVS-Dropped due to failure to queue to " + L"userspace"; + goto dropit; + } + break; + } + case OVS_ACTION_ATTR_SET: + { + if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL + || ovsFwdCtx.tunnelRxNic != NULL) { + status = OvsOutputBeforeSetAction(&ovsFwdCtx); + if (status != NDIS_STATUS_SUCCESS) { + dropReason = L"OVS-adding destination failed"; + goto dropit; + } + } + + status = OvsExecuteSetAction(&ovsFwdCtx, key, hash, + nl_attr_get(a)); + if (status != NDIS_STATUS_SUCCESS) { + dropReason = L"OVS-set action failed"; + goto dropit; + } + break; + } + case OVS_ACTION_ATTR_SAMPLE: + break; + case OVS_ACTION_ATTR_UNSPEC: + case __OVS_ACTION_ATTR_MAX: + default: + break; + } + } + + if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL + || ovsFwdCtx.tunnelRxNic != NULL) { + status = OvsOutputForwardingCtx(&ovsFwdCtx); + ASSERT(ovsFwdCtx.curNbl == NULL); + } + + ASSERT(ovsFwdCtx.destPortsSizeOut == 0); + ASSERT(ovsFwdCtx.tunnelRxNic == NULL); + ASSERT(ovsFwdCtx.tunnelTxNic == NULL); + +dropit: + /* + * If curNbl != NULL, it implies the NBL has not been not freed up so far. + */ + if (ovsFwdCtx.curNbl) { + OvsCompleteNBLForwardingCtx(&ovsFwdCtx, dropReason); + } + + return status; +} diff --git a/datapath-windows/ovsext/OvsAtomic.h b/datapath-windows/ovsext/OvsAtomic.h new file mode 100644 index 000000000..a94d1fb15 --- /dev/null +++ b/datapath-windows/ovsext/OvsAtomic.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_ATOMIC_H_ +#define __OVS_ATOMIC_H_ 1 + +static __inline UINT64 +atomic_add64(UINT64 *ptr, UINT32 val) +{ + return InterlockedAdd64((LONGLONG volatile *) ptr, (LONGLONG) val); +} + +static __inline UINT64 +atomic_inc64(UINT64 *ptr) +{ + return InterlockedIncrement64((LONGLONG volatile *) ptr); +} + +#endif /* __OVS_ATOMIC_H_ */ diff --git a/datapath-windows/ovsext/OvsBufferMgmt.c b/datapath-windows/ovsext/OvsBufferMgmt.c new file mode 100644 index 000000000..8aa806061 --- /dev/null +++ b/datapath-windows/ovsext/OvsBufferMgmt.c @@ -0,0 +1,1535 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * **************************************************************************** + * + * Simple Buffer Management framework for OVS + * + * It introduces four NDIS buffer pools + * **Fix size net buffer list pool--this is used for small buffer + * One allocation will include NBL + NB + MDL + Data + CONTEXT. + * + * **Variable size net buffer list pool--this is used for variable size + * buffer. The allocation of net buffer list will include NBL + NB + + * CONTEXT, a separate allocation of MDL + data buffer is required. + * + * **NBL only net buffer list pool-- this is used for partial copy + * (or clone). In this case we can not allocate net buffer list and + * net buffer at the same time. + * + * **Net buffer pool-- this is required when net buffer need to be + * allocated separately. + * + * A Buffer context is defined to track the buffer specific information + * so that during NBL completion, proper action can be taken. Please see + * code for details. + * + * Here is the usage of the management API + * All external NBL should be initialized its NBL context by calling + * OvsInitExternalNBLContext() + * + * After the external NBL context is initialized, it can call the following + * API to allocate, copy or partial copy NBL. + * + * OvsAllocateFixSizeNBL() + * OvsAllocateVariableSizeNBL() + * + * OvsPartialCopyNBL() + * OvsPartialCopyToMultipleNBLs() + * + * OvsFullCopyNBL() + * OvsFullCopyToMultipleNBLs() + * + * See code comments for detail description of the functions. + * + * All NBLs is completed through + * OvsCompleteNBL() + * If this API return non NULL value, then the returned NBL should be + * returned to upper layer by calling + * NdisFSendNetBufferListsComplete() if the buffer is from upper + * layer. In case of WFP, it can call the corresponding completion routine + * to return the NBL to the framework. + * + * NOTE: + * 1. Copy or partial copy will not copy destination port array + * 2. Copy or partial copy will copy src port id and index + * 3. New Allocated NBL will have src port set to default port id + * 4. If original packet has direction flag set, the copied or partial + * copied NBL will still be in same direction. + * 5. When you advance or retreate the buffer, you may need to update + * relevant meta data to keep it consistent. + * + * **************************************************************************** + */ + +#include "precomp.h" +#include "OvsSwitch.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_BUFMGMT +#include "OvsDebug.h" +#include "OvsNetProto.h" +#include "OvsFlow.h" +#include "OvsChecksum.h" +#include "OvsPacketParser.h" + +/* + * -------------------------------------------------------------------------- + * OvsInitBufferPool -- + * + * Allocate NBL and NB pool + * + * XXX: more optimization may be done for buffer management include local cache + * of NBL, NB, data, context, MDL. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsInitBufferPool(PVOID ovsContext) +{ + POVS_NBL_POOL ovsPool; + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + NET_BUFFER_LIST_POOL_PARAMETERS nblParam; + NET_BUFFER_POOL_PARAMETERS nbParam; + + C_ASSERT(MEMORY_ALLOCATION_ALIGNMENT >= 8); + + OVS_LOG_TRACE("Enter: context: %p", context); + + ovsPool = &context->ovsPool; + RtlZeroMemory(ovsPool, sizeof (OVS_NBL_POOL)); + ovsPool->ndisHandle = context->NdisFilterHandle; + ovsPool->ndisContext = context->NdisSwitchContext; + /* + * fix size NBL pool includes + * NBL + NB + MDL + DATA + Context + * This is mainly used for Packet execute or slow path when copy is + * required and size is less than OVS_DEFAULT_DATA_SIZE. We expect + * Most of packet from user space will use this Pool. (This is + * true for all bfd and cfm packet. + */ + RtlZeroMemory(&nblParam, sizeof (nblParam)); + OVS_INIT_OBJECT_HEADER(&nblParam.Header, + NDIS_OBJECT_TYPE_DEFAULT, + NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1, + NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1); + nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE; + nblParam.PoolTag = OVS_FIX_SIZE_NBL_POOL_TAG; + nblParam.fAllocateNetBuffer = TRUE; + nblParam.DataSize = OVS_DEFAULT_DATA_SIZE + OVS_DEFAULT_HEADROOM_SIZE; + + ovsPool->fixSizePool = + NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam); + if (ovsPool->fixSizePool == NULL) { + goto pool_cleanup; + } + + /* + * Zero Size NBL Pool includes + * NBL + NB + Context + * This is mainly for packet with large data Size, in this case MDL and + * Data will be allocate separately. + */ + RtlZeroMemory(&nblParam, sizeof (nblParam)); + OVS_INIT_OBJECT_HEADER(&nblParam.Header, + NDIS_OBJECT_TYPE_DEFAULT, + NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1, + NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1); + + nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE; + nblParam.PoolTag = OVS_VARIABLE_SIZE_NBL_POOL_TAG; + nblParam.fAllocateNetBuffer = TRUE; + nblParam.DataSize = 0; + + ovsPool->zeroSizePool = + NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam); + if (ovsPool->zeroSizePool == NULL) { + goto pool_cleanup; + } + + /* + * NBL only pool just includes + * NBL (+ context) + * This is mainly used for clone and partial copy + */ + RtlZeroMemory(&nblParam, sizeof (nblParam)); + OVS_INIT_OBJECT_HEADER(&nblParam.Header, + NDIS_OBJECT_TYPE_DEFAULT, + NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1, + NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1); + + nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE; + nblParam.PoolTag = OVS_NBL_ONLY_POOL_TAG; + nblParam.fAllocateNetBuffer = FALSE; + nblParam.DataSize = 0; + + ovsPool->nblOnlyPool = + NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam); + if (ovsPool->nblOnlyPool == NULL) { + goto pool_cleanup; + } + + /* nb Pool + * NB only pool, used for copy + */ + + OVS_INIT_OBJECT_HEADER(&nbParam.Header, + NDIS_OBJECT_TYPE_DEFAULT, + NET_BUFFER_POOL_PARAMETERS_REVISION_1, + NDIS_SIZEOF_NET_BUFFER_POOL_PARAMETERS_REVISION_1); + nbParam.PoolTag = OVS_NET_BUFFER_POOL_TAG; + nbParam.DataSize = 0; + ovsPool->nbPool = + NdisAllocateNetBufferPool(context->NdisSwitchContext, &nbParam); + if (ovsPool->nbPool == NULL) { + goto pool_cleanup; + } + OVS_LOG_TRACE("Exit: fixSizePool: %p zeroSizePool: %p nblOnlyPool: %p" + "nbPool: %p", ovsPool->fixSizePool, ovsPool->zeroSizePool, + ovsPool->nblOnlyPool, ovsPool->nbPool); + return NDIS_STATUS_SUCCESS; + +pool_cleanup: + OvsCleanupBufferPool(context); + OVS_LOG_TRACE("Exit: Fail to initialize ovs buffer pool"); + return NDIS_STATUS_RESOURCES; +} + + +/* + * -------------------------------------------------------------------------- + * OvsCleanupBufferPool -- + * Free Buffer pool for NBL and NB. + * -------------------------------------------------------------------------- + */ +VOID +OvsCleanupBufferPool(PVOID ovsContext) +{ + POVS_NBL_POOL ovsPool; + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + ovsPool = &context->ovsPool; + OVS_LOG_TRACE("Enter: context: %p", context); +#ifdef DBG + ASSERT(ovsPool->fixNBLCount == 0); + ASSERT(ovsPool->zeroNBLCount == 0); + ASSERT(ovsPool->nblOnlyCount == 0); + ASSERT(ovsPool->nbCount == 0); + ASSERT(ovsPool->sysNBLCount == 0); + ASSERT(ovsPool->fragNBLCount == 0); +#endif + + if (ovsPool->fixSizePool) { + NdisFreeNetBufferListPool(ovsPool->fixSizePool); + ovsPool->fixSizePool = NULL; + } + if (ovsPool->zeroSizePool) { + NdisFreeNetBufferListPool(ovsPool->zeroSizePool); + ovsPool->zeroSizePool = NULL; + } + if (ovsPool->nblOnlyPool) { + NdisFreeNetBufferListPool(ovsPool->nblOnlyPool); + ovsPool->nblOnlyPool = NULL; + } + if (ovsPool->nbPool) { + NdisFreeNetBufferPool(ovsPool->nbPool); + ovsPool->nbPool = NULL; + } + OVS_LOG_TRACE("Exit: cleanup OVS Buffer pool"); +} + + +static VOID +OvsInitNBLContext(POVS_BUFFER_CONTEXT ctx, + UINT16 flags, + UINT32 origDataLength, + UINT32 srcPortNo) +{ + ctx->magic = OVS_CTX_MAGIC; + ctx->refCount = 1; + ctx->flags = flags; + ctx->srcPortNo = srcPortNo; + ctx->origDataLength = origDataLength; +} + + +static VOID +OvsDumpForwardingDetails(PNET_BUFFER_LIST nbl) +{ + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info; + info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl); + if (info == NULL) { + return; + } + OVS_LOG_INFO("nbl: %p, numAvailableDest: %d, srcId:%d, srcIndex: %d " + "isDataSafe: %s, safeDataSize: %d", + nbl, info->NumAvailableDestinations, info->SourcePortId, + info->SourceNicIndex, + info->IsPacketDataSafe ? "TRUE" : "FALSE", + info->IsPacketDataSafe ? 0 : info->SafePacketDataSize); + +} + +static VOID +OvsDumpNBLContext(PNET_BUFFER_LIST nbl) +{ + PNET_BUFFER_LIST_CONTEXT ctx = nbl->Context; + if (ctx == NULL) { + OVS_LOG_INFO("No Net Buffer List context"); + return; + } + while (ctx) { + OVS_LOG_INFO("nbl: %p, ctx: %p, TotalSize: %d, Offset: %d", + nbl, ctx, ctx->Size, ctx->Offset); + ctx = ctx->Next; + } +} + + +static VOID +OvsDumpMDLChain(PMDL mdl) +{ + PMDL tmp; + tmp = mdl; + while (tmp) { + OVS_LOG_INFO("MDL: %p, Size: %d, MappedSystemVa: %p, StartVa: %p" + " ByteCount: %d, ByteOffset: %d", + tmp, tmp->Size, tmp->MappedSystemVa, + tmp->StartVa, tmp->ByteCount, tmp->ByteOffset); + tmp = tmp->Next; + } +} + + +static VOID +OvsDumpNetBuffer(PNET_BUFFER nb) +{ + OVS_LOG_INFO("NET_BUFFER: %p, ChecksumBias: %d Handle: %p, MDLChain: %p " + "CurrMDL: %p, CurrOffset: %d, DataLen: %d, Offset: %d", + nb, + NET_BUFFER_CHECKSUM_BIAS(nb), nb->NdisPoolHandle, + NET_BUFFER_FIRST_MDL(nb), + NET_BUFFER_CURRENT_MDL(nb), + NET_BUFFER_CURRENT_MDL_OFFSET(nb), + NET_BUFFER_DATA_LENGTH(nb), + NET_BUFFER_DATA_OFFSET(nb)); + OvsDumpMDLChain(NET_BUFFER_FIRST_MDL(nb)); +} + + +static VOID +OvsDumpNetBufferList(PNET_BUFFER_LIST nbl) +{ + PNET_BUFFER nb; + OVS_LOG_INFO("NBL: %p, parent: %p, SrcHandle: %p, ChildCount:%d " + "poolHandle: %p", + nbl, nbl->ParentNetBufferList, + nbl->SourceHandle, nbl->ChildRefCount, + nbl->NdisPoolHandle); + OvsDumpNBLContext(nbl); + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + while (nb) { + OvsDumpNetBuffer(nb); + nb = NET_BUFFER_NEXT_NB(nb); + } +} + +/* + * -------------------------------------------------------------------------- + * OvsAllocateFixSizeNBL -- + * + * Allocate fix size NBL which include + * NBL + NB + MBL + Data + Context + * Please note: + * * Forwarding Context is allocated, but forwarding detail information + * is not initailized. + * * The headroom can not be larger than OVS_DEFAULT_HEADROOM_SIZE(128 + * byte). + * -------------------------------------------------------------------------- + */ +PNET_BUFFER_LIST +OvsAllocateFixSizeNBL(PVOID ovsContext, + UINT32 size, + UINT32 headRoom) +{ + PNET_BUFFER_LIST nbl = NULL; + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + POVS_BUFFER_CONTEXT ctx; + POVS_NBL_POOL ovsPool = &context->ovsPool; + NDIS_STATUS status; + UINT32 line; + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info; + + if ((headRoom + size) > OVS_FIX_NBL_DATA_SIZE || size == 0) { + line = __LINE__; + goto allocate_done; + } + + nbl = NdisAllocateNetBufferList(ovsPool->fixSizePool, + (UINT16)sizeof (OVS_BUFFER_CONTEXT), + (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL); + + if (nbl == NULL) { + line = __LINE__; + goto allocate_done; + } + + nbl->SourceHandle = ovsPool->ndisHandle; + status = context->NdisSwitchHandlers. + AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl); + + if (status != NDIS_STATUS_SUCCESS) { + NdisFreeNetBufferList(nbl); + nbl = NULL; + line = __LINE__; + goto allocate_done; + } + info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl); + ASSERT(info); + info->IsPacketDataSafe = TRUE; + info->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; + + status = NdisRetreatNetBufferDataStart(NET_BUFFER_LIST_FIRST_NB(nbl), + size, 0, NULL); + ASSERT(status == NDIS_STATUS_SUCCESS); + +#ifdef DBG + InterlockedIncrement((LONG volatile *)&ovsPool->fixNBLCount); + OvsDumpNetBufferList(nbl); + OvsDumpForwardingDetails(nbl); +#endif + + ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + ASSERT(ctx); + + OvsInitNBLContext(ctx, OVS_BUFFER_FROM_FIX_SIZE_POOL | + OVS_BUFFER_PRIVATE_FORWARD_CONTEXT, size, + OVS_DEFAULT_PORT_NO); + line = __LINE__; +allocate_done: + OVS_LOG_LOUD("Allocate Fix NBL: %p, line: %d", nbl, line); + return nbl; +} + + +static PMDL +OvsAllocateMDLAndData(NDIS_HANDLE ndisHandle, + UINT32 dataSize) +{ + PMDL mdl; + PVOID data; + + data = OvsAllocateMemory(dataSize); + if (data == NULL) { + return NULL; + } + + mdl = NdisAllocateMdl(ndisHandle, data, dataSize); + if (mdl == NULL) { + OvsFreeMemory(data); + } + + return mdl; +} + + +static VOID +OvsFreeMDLAndData(PMDL mdl) +{ + PVOID data; + + data = MmGetMdlVirtualAddress(mdl); + NdisFreeMdl(mdl); + OvsFreeMemory(data); +} + + +/* + * -------------------------------------------------------------------------- + * OvsAllocateVariableSizeNBL -- + * + * Allocate variable size NBL, the NBL looks like + * NBL + NB + Context + * MDL + Data + * -------------------------------------------------------------------------- + */ +PNET_BUFFER_LIST +OvsAllocateVariableSizeNBL(PVOID ovsContext, + UINT32 size, + UINT32 headRoom) +{ + PNET_BUFFER_LIST nbl = NULL; + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + POVS_NBL_POOL ovsPool = &context->ovsPool; + POVS_BUFFER_CONTEXT ctx; + UINT32 realSize; + PMDL mdl; + NDIS_STATUS status; + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info; + if (size == 0) { + return NULL; + } + realSize = MEM_ALIGN_SIZE(size + headRoom); + + mdl = OvsAllocateMDLAndData(ovsPool->ndisHandle, realSize); + if (mdl == NULL) { + return NULL; + } + + nbl = NdisAllocateNetBufferAndNetBufferList(ovsPool->zeroSizePool, + (UINT16)sizeof (OVS_BUFFER_CONTEXT), + (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL, + mdl, realSize, 0); + if (nbl == NULL) { + OvsFreeMDLAndData(mdl); + return NULL; + } + + nbl->SourceHandle = ovsPool->ndisHandle; + status = context->NdisSwitchHandlers. + AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl); + + if (status != NDIS_STATUS_SUCCESS) { + /* + * do we need to remove mdl from nbl XXX + */ + OvsFreeMDLAndData(mdl); + NdisFreeNetBufferList(nbl); + return NULL; + } + + info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl); + ASSERT(info); + info->IsPacketDataSafe = TRUE; + info->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; + status = NdisRetreatNetBufferDataStart(NET_BUFFER_LIST_FIRST_NB(nbl), + size, 0, NULL); + ASSERT(status == NDIS_STATUS_SUCCESS); + +#ifdef DBG + InterlockedIncrement((LONG volatile *)&ovsPool->zeroNBLCount); + OvsDumpNetBufferList(nbl); + OvsDumpForwardingDetails(nbl); +#endif + + ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + + OvsInitNBLContext(ctx, OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA | + OVS_BUFFER_PRIVATE_FORWARD_CONTEXT | + OVS_BUFFER_FROM_ZERO_SIZE_POOL, + size, OVS_DEFAULT_PORT_NO); + + OVS_LOG_LOUD("Allocate variable size NBL: %p", nbl); + return nbl; +} + + +/* + * -------------------------------------------------------------------------- + * OvsInitExternalNBLContext -- + * + * For NBL not allocated by OVS, it will allocate and initialize + * the NBL context. + * -------------------------------------------------------------------------- + */ +POVS_BUFFER_CONTEXT +OvsInitExternalNBLContext(PVOID ovsContext, + PNET_BUFFER_LIST nbl, + BOOLEAN isRecv) +{ + NDIS_HANDLE poolHandle; + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + POVS_BUFFER_CONTEXT ctx; + PNET_BUFFER nb; + NDIS_STATUS status; + UINT16 flags; + + poolHandle = NdisGetPoolFromNetBufferList(nbl); + + if (poolHandle == context->ovsPool.ndisHandle) { + return (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + } + status = NdisAllocateNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT), + OVS_DEFAULT_NBL_CONTEXT_FILL, + OVS_OTHER_POOL_TAG); + if (status != NDIS_STATUS_SUCCESS) { + return NULL; + } +#ifdef DBG + OvsDumpNBLContext(nbl); + InterlockedIncrement((LONG volatile *)&context->ovsPool.sysNBLCount); +#endif + flags = isRecv ? OVS_BUFFER_RECV_BUFFER : OVS_BUFFER_SEND_BUFFER; + flags |= OVS_BUFFER_NEED_COMPLETE | OVS_BUFFER_PRIVATE_CONTEXT; + ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + /* + * we use first nb to decide whether we need advance or retreat during + * complete. + */ + OvsInitNBLContext(ctx, flags, NET_BUFFER_DATA_LENGTH(nb), OVS_DEFAULT_PORT_NO); + return ctx; +} + +/* + * -------------------------------------------------------------------------- + * OvsAllocateNBLContext + * + * Create NBL buffer context and forwarding context. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsAllocateNBLContext(POVS_SWITCH_CONTEXT context, + PNET_BUFFER_LIST nbl) +{ + POVS_NBL_POOL ovsPool = &context->ovsPool; + NDIS_STATUS status; + + status = NdisAllocateNetBufferListContext(nbl, + sizeof (OVS_BUFFER_CONTEXT), + OVS_DEFAULT_NBL_CONTEXT_FILL, + OVS_OTHER_POOL_TAG); + if (status != NDIS_STATUS_SUCCESS) { + return NDIS_STATUS_FAILURE; + } + + nbl->SourceHandle = ovsPool->ndisHandle; + status = context->NdisSwitchHandlers. + AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl); + + if (status != NDIS_STATUS_SUCCESS) { + NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT)); + return NDIS_STATUS_FAILURE; + } + return status; +} + +/* + * -------------------------------------------------------------------------- + * OvsFreeNBLContext + * + * Free the NBL buffer context and forwarding context. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsFreeNBLContext(POVS_SWITCH_CONTEXT context, + PNET_BUFFER_LIST nbl) +{ + POVS_NBL_POOL ovsPool = &context->ovsPool; + + context->NdisSwitchHandlers. + FreeNetBufferListForwardingContext(ovsPool->ndisContext, nbl); + NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT)); + + return NDIS_STATUS_SUCCESS; +} + +/* + * -------------------------------------------------------------------------- + * OvsCopyNBLInfo + * + * Copy NBL info from src to dst + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsCopyNBLInfo(PNET_BUFFER_LIST srcNbl, PNET_BUFFER_LIST dstNbl, + POVS_BUFFER_CONTEXT srcCtx, UINT32 copySize, + BOOLEAN copyNblInfo) +{ + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO srcInfo, dstInfo; + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + + srcInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(srcNbl); + dstInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(dstNbl); + if (srcInfo) { +#ifdef OVS_USE_COPY_NET_BUFFER_LIST_INFO + status = context->NdisSwitchHandlers. + CopyNetBufferListInfo(ovsPool->ndisContext, dstNbl, srcNbl, 0); + + if (status != NDIS_STATUS_SUCCESS) { + return status; + } +#else + dstInfo->SourcePortId = srcInfo->SourcePortId; + dstInfo->SourceNicIndex = srcInfo->SourceNicIndex; + if (copyNblInfo) { + if (srcCtx->flags & OVS_BUFFER_RECV_BUFFER) { + NdisCopyReceiveNetBufferListInfo(dstNbl, srcNbl); + } else if (srcCtx->flags & OVS_BUFFER_SEND_BUFFER) { + NdisCopySendNetBufferListInfo(dstNbl, srcNbl); + } + } +#endif + dstInfo->IsPacketDataSafe = srcInfo->IsPacketDataSafe; + if (!srcInfo->IsPacketDataSafe && copySize > + srcInfo->SafePacketDataSize) { + srcInfo->SafePacketDataSize = copySize; + } + } else { + /* + * Assume all data are safe + */ + dstInfo->IsPacketDataSafe = TRUE; + dstInfo->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; + } + return status; +} + +/* + * -------------------------------------------------------------------------- + * OvsPartialCopyNBL -- + * + * Partial copy NBL, if there is multiple NB in NBL, each one will be + * copied. We also reserve headroom for the new NBL. + * + * Please note, + * NBL should have OVS_BUFFER_CONTEXT setup before calling + * this function. + * The NBL should already have ref to itself so that during copy + * it will not be freed. + * -------------------------------------------------------------------------- + */ +PNET_BUFFER_LIST +OvsPartialCopyNBL(PVOID ovsContext, + PNET_BUFFER_LIST nbl, + UINT32 copySize, + UINT32 headRoom, + BOOLEAN copyNblInfo) +{ + PNET_BUFFER_LIST newNbl; + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + NDIS_STATUS status; + PNET_BUFFER srcNb, dstNb; + ULONG byteCopied; + POVS_NBL_POOL ovsPool = &context->ovsPool; + POVS_BUFFER_CONTEXT srcCtx, dstCtx; + UINT16 flags; + + srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) { + OVS_LOG_INFO("src nbl must have ctx initialized"); + ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC); + return NULL; + } + + if (copySize) { + NdisAdvanceNetBufferListDataStart(nbl, copySize, FALSE, NULL); + } + newNbl = NdisAllocateCloneNetBufferList(nbl, ovsPool->nblOnlyPool, + NULL, 0); + if (copySize) { + status = NdisRetreatNetBufferListDataStart(nbl, copySize, 0, + NULL, NULL); + ASSERT(status == NDIS_STATUS_SUCCESS); + } + + if (newNbl == NULL) { + return NULL; + } + + /* + * Allocate private memory for copy + */ + if (copySize + headRoom) { + status = NdisRetreatNetBufferListDataStart(newNbl, copySize + headRoom, + 0, NULL, NULL); + if (status != NDIS_STATUS_SUCCESS) { + goto retreat_error; + } + + if (headRoom) { + NdisAdvanceNetBufferListDataStart(newNbl, headRoom, FALSE, NULL); + } + if (copySize) { + srcNb = NET_BUFFER_LIST_FIRST_NB(nbl); + dstNb = NET_BUFFER_LIST_FIRST_NB(newNbl); + + while (srcNb) { + status = NdisCopyFromNetBufferToNetBuffer(dstNb, 0, copySize, + srcNb, 0, + &byteCopied); + if (status != NDIS_STATUS_SUCCESS || copySize != byteCopied) { + goto nbl_context_error; + } + srcNb = NET_BUFFER_NEXT_NB(srcNb); + dstNb = NET_BUFFER_NEXT_NB(dstNb); + } + } + } + + status = OvsAllocateNBLContext(context, newNbl); + if (status != NDIS_STATUS_SUCCESS) { + goto nbl_context_error; + } + + status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, copySize, copyNblInfo); + if (status != NDIS_STATUS_SUCCESS) { + goto copy_list_info_error; + } + +#ifdef DBG + InterlockedIncrement((LONG volatile *)&ovsPool->nblOnlyCount); +#endif + + newNbl->ParentNetBufferList = nbl; + + dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl); + ASSERT(dstCtx != NULL); + + flags = srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | OVS_BUFFER_SEND_BUFFER); + + flags |= OVS_BUFFER_FROM_NBL_ONLY_POOL | OVS_BUFFER_PRIVATE_CONTEXT | + OVS_BUFFER_PRIVATE_FORWARD_CONTEXT; + + srcNb = NET_BUFFER_LIST_FIRST_NB(nbl); + OvsInitNBLContext(dstCtx, flags, NET_BUFFER_DATA_LENGTH(srcNb) - copySize, + OVS_DEFAULT_PORT_NO); + + InterlockedIncrement((LONG volatile *)&srcCtx->refCount); +#ifdef DBG + OvsDumpNetBufferList(nbl); + OvsDumpForwardingDetails(nbl); + + OvsDumpNetBufferList(newNbl); + OvsDumpForwardingDetails(newNbl); +#endif + OVS_LOG_LOUD("Partial Copy new NBL: %p", newNbl); + return newNbl; + +copy_list_info_error: + OvsFreeNBLContext(context, newNbl); +nbl_context_error: + if (copySize) { + NdisAdvanceNetBufferListDataStart(newNbl, copySize, TRUE, NULL); + } +retreat_error: + NdisFreeCloneNetBufferList(newNbl, 0); + return NULL; +} + +/* + * -------------------------------------------------------------------------- + * OvsPartialCopyToMultipleNBLs -- + * + * This is similar to OvsPartialCopyNBL() except that each NB will + * have its own NBL. + * -------------------------------------------------------------------------- + */ +PNET_BUFFER_LIST +OvsPartialCopyToMultipleNBLs(PVOID ovsContext, + PNET_BUFFER_LIST nbl, + UINT32 copySize, + UINT32 headRoom, + BOOLEAN copyNblInfo) +{ + PNET_BUFFER nb, nextNb = NULL, firstNb, prevNb; + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + PNET_BUFFER_LIST firstNbl = NULL, newNbl, prevNbl = NULL; + + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + if (NET_BUFFER_NEXT_NB(nb) == NULL) { + return OvsPartialCopyNBL(context, nbl, copySize, headRoom, copyNblInfo); + } + + firstNb = nb; + prevNb = nb; + + while (nb) { + nextNb = NET_BUFFER_NEXT_NB(nb); + NET_BUFFER_NEXT_NB(nb) = NULL; + + NET_BUFFER_LIST_FIRST_NB(nbl) = nb; + + newNbl = OvsPartialCopyNBL(context, nbl, copySize, headRoom, + copyNblInfo); + if (newNbl == NULL) { + goto cleanup; + } + if (prevNbl == NULL) { + firstNbl = newNbl; + } else { + NET_BUFFER_LIST_NEXT_NBL(prevNbl) = nbl; + NET_BUFFER_NEXT_NB(prevNb) = nb; + } + prevNbl = newNbl; + prevNb = nb; + nb = nextNb; + } + NET_BUFFER_LIST_FIRST_NB(nbl) = firstNb; + return firstNbl; + +cleanup: + NET_BUFFER_NEXT_NB(prevNb) = nb; + NET_BUFFER_NEXT_NB(nb) = nextNb; + NET_BUFFER_LIST_FIRST_NB(nbl) = firstNb; + + newNbl = firstNbl; + while (newNbl) { + firstNbl = NET_BUFFER_LIST_NEXT_NBL(newNbl); + NET_BUFFER_LIST_NEXT_NBL(firstNbl) = NULL; + OvsCompleteNBL(context, newNbl, TRUE); + newNbl = firstNbl; + } + return NULL; +} + + +static PNET_BUFFER_LIST +OvsCopySinglePacketNBL(PVOID ovsContext, + PNET_BUFFER_LIST nbl, + PNET_BUFFER nb, + UINT32 headRoom, + BOOLEAN copyNblInfo) +{ + UINT32 size; + ULONG copiedSize; + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + PNET_BUFFER_LIST newNbl; + PNET_BUFFER newNb; + NDIS_STATUS status; + POVS_BUFFER_CONTEXT srcCtx, dstCtx; + + size = NET_BUFFER_DATA_LENGTH(nb); + if ((size + headRoom) <= OVS_FIX_NBL_DATA_SIZE) { + newNbl = OvsAllocateFixSizeNBL(context, size, headRoom); + } else { + newNbl = OvsAllocateVariableSizeNBL(context, size, headRoom); + } + if (newNbl == NULL) { + return NULL; + } + newNb = NET_BUFFER_LIST_FIRST_NB(newNbl); + status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, size, nb, 0, + &copiedSize); + + srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + if (status == NDIS_STATUS_SUCCESS) { + status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, copiedSize, copyNblInfo); + } + + if (status != NDIS_STATUS_SUCCESS || copiedSize != size) { + OvsCompleteNBL(context, newNbl, TRUE); + return NULL; + } + + dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl); + ASSERT(dstCtx && srcCtx); + ASSERT(srcCtx->magic == OVS_CTX_MAGIC && dstCtx->magic == OVS_CTX_MAGIC); + + dstCtx->flags |= srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | + OVS_BUFFER_SEND_BUFFER); +#ifdef DBG + OvsDumpNetBufferList(newNbl); + OvsDumpForwardingDetails(newNbl); +#endif + OVS_LOG_LOUD("Copy single nb to new NBL: %p", newNbl); + return newNbl; +} + +/* + * -------------------------------------------------------------------------- + * OvsFullCopyNBL -- + * + * Copy the NBL to a new NBL including data. + * + * Notes: + * The NBL can have multiple NBs, but the final result is one NBL. + * -------------------------------------------------------------------------- + */ +PNET_BUFFER_LIST +OvsFullCopyNBL(PVOID ovsContext, + PNET_BUFFER_LIST nbl, + UINT32 headRoom, + BOOLEAN copyNblInfo) +{ + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + POVS_NBL_POOL ovsPool = &context->ovsPool; + PNET_BUFFER_LIST newNbl; + PNET_BUFFER nb, newNb, firstNb = NULL, prevNb = NULL; + POVS_BUFFER_CONTEXT dstCtx, srcCtx; + PMDL mdl; + NDIS_STATUS status; + UINT32 size, totalSize; + ULONG copiedSize; + UINT16 flags; + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO dstInfo; + + srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) { + OVS_LOG_INFO("src nbl must have ctx initialized"); + ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC); + return NULL; + } + + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + + if (NET_BUFFER_NEXT_NB(nb) == NULL) { + return OvsCopySinglePacketNBL(context, nbl, nb, headRoom, copyNblInfo); + } + + newNbl = NdisAllocateNetBufferList(ovsPool->nblOnlyPool, + (UINT16)sizeof (OVS_BUFFER_CONTEXT), + (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL); + if (newNbl == NULL) { + return NULL; + } + + while (nb) { + size = NET_BUFFER_DATA_LENGTH(nb); + totalSize = MEM_ALIGN_SIZE(size + headRoom); + mdl = OvsAllocateMDLAndData(ovsPool->ndisHandle, totalSize); + + if (mdl == NULL) { + goto nblcopy_error; + } + newNb = NdisAllocateNetBuffer(ovsPool->nbPool, mdl, totalSize, 0); + if (newNb == NULL) { + OvsFreeMDLAndData(mdl); + goto nblcopy_error; + } + if (firstNb == NULL) { + firstNb = newNb; + } else { + NET_BUFFER_NEXT_NB(prevNb) = newNb; + } + prevNb = newNb; +#ifdef DBG + InterlockedIncrement((LONG volatile *)&ovsPool->nbCount); +#endif + status = NdisRetreatNetBufferDataStart(newNb, size, 0, NULL); + ASSERT(status == NDIS_STATUS_SUCCESS); + + status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, size, nb, 0, + &copiedSize); + if (status != NDIS_STATUS_SUCCESS || size != copiedSize) { + goto nblcopy_error; + } + + nb = NET_BUFFER_NEXT_NB(nb); + } + + NET_BUFFER_LIST_FIRST_NB(newNbl) = firstNb; + + newNbl->SourceHandle = ovsPool->ndisHandle; + status = context->NdisSwitchHandlers. + AllocateNetBufferListForwardingContext(ovsPool->ndisContext, newNbl); + + if (status != NDIS_STATUS_SUCCESS) { + goto nblcopy_error; + } + + status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, 0, copyNblInfo); + if (status != NDIS_STATUS_SUCCESS) { + goto nblcopy_error; + } + + dstInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl); + dstInfo->IsPacketDataSafe = TRUE; + + dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl); + + flags = srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | OVS_BUFFER_SEND_BUFFER); + + flags |= OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA | + OVS_BUFFER_PRIVATE_NET_BUFFER | OVS_BUFFER_FROM_NBL_ONLY_POOL | + OVS_BUFFER_PRIVATE_FORWARD_CONTEXT; + + OvsInitNBLContext(dstCtx, flags, NET_BUFFER_DATA_LENGTH(firstNb), + OVS_DEFAULT_PORT_NO); + +#ifdef DBG + OvsDumpNetBufferList(nbl); + OvsDumpForwardingDetails(nbl); + InterlockedIncrement((LONG volatile *)&ovsPool->nblOnlyCount); +#endif + OVS_LOG_LOUD("newNbl: %p", newNbl); + return newNbl; + +nblcopy_error: + while (firstNb) { +#ifdef DBG + InterlockedDecrement((LONG volatile *)&ovsPool->nbCount); +#endif + prevNb = firstNb; + firstNb = NET_BUFFER_NEXT_NB(prevNb); + mdl = NET_BUFFER_FIRST_MDL(prevNb); + NET_BUFFER_FIRST_MDL(prevNb) = NULL; + NdisFreeNetBuffer(prevNb); + OvsFreeMDLAndData(mdl); + } + NdisFreeNetBufferList(newNbl); + OVS_LOG_ERROR("OvsFullCopyNBL failed"); + return NULL; +} + +/* + * -------------------------------------------------------------------------- + * GetSegmentHeaderInfo + * + * Extract header size and sequence number for the segment. + * -------------------------------------------------------------------------- + */ +static NDIS_STATUS +GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl, + const POVS_PACKET_HDR_INFO hdrInfo, + UINT32 *hdrSize, UINT32 *seqNumber) +{ + TCPHdr tcpStorage; + const TCPHdr *tcp; + + /* Parse the orginal Eth/IP/TCP header */ + tcp = OvsGetPacketBytes(nbl, sizeof *tcp, hdrInfo->l4Offset, &tcpStorage); + if (tcp == NULL) { + return NDIS_STATUS_FAILURE; + } + *seqNumber = ntohl(tcp->seq); + *hdrSize = hdrInfo->l4Offset + TCP_HDR_LEN(tcp); + + return NDIS_STATUS_SUCCESS; +} + + +/* + * -------------------------------------------------------------------------- + * FixSegmentHeader + * + * Fix IP length, IP checksum, TCP sequence number and TCP checksum + * in the segment. + * -------------------------------------------------------------------------- + */ +static NDIS_STATUS +FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber) +{ + EthHdr *dstEth; + IPHdr *dstIP; + TCPHdr *dstTCP; + PMDL mdl; + PUINT8 bufferStart; + + mdl = NET_BUFFER_FIRST_MDL(nb); + + bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority); + if (!bufferStart) { + return NDIS_STATUS_RESOURCES; + } + dstEth = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(nb)); + ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb) + >= sizeof(EthHdr) + sizeof(IPHdr) + sizeof(TCPHdr)); + dstIP = (IPHdr *)((PCHAR)dstEth + sizeof *dstEth); + dstTCP = (TCPHdr *)((PCHAR)dstIP + dstIP->ihl * 4); + ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb) + >= sizeof(EthHdr) + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP)); + + /* Fix IP length and checksum */ + ASSERT(dstIP->protocol == IPPROTO_TCP); + dstIP->tot_len = htons(segmentSize + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP)); + dstIP->check = 0; + dstIP->check = IPChecksum((UINT8 *)dstIP, dstIP->ihl * 4, 0); + + /* Fix TCP checksum */ + dstTCP->seq = htonl(seqNumber); + dstTCP->check = + IPPseudoChecksum((UINT32 *)&dstIP->saddr, + (UINT32 *)&dstIP->daddr, + IPPROTO_TCP, segmentSize + TCP_HDR_LEN(dstTCP)); + dstTCP->check = CalculateChecksumNB(nb, + (UINT16)(NET_BUFFER_DATA_LENGTH(nb) - sizeof *dstEth - dstIP->ihl * 4), + sizeof *dstEth + dstIP->ihl * 4); + return STATUS_SUCCESS; +} + +/* + * -------------------------------------------------------------------------- + * OvsTcpSegmentyNBL -- + * + * Segment TCP payload, and prepend each segment with ether/IP/TCP header. + * Leave headRoom for additional encap. + * + * Please note, + * NBL should have OVS_BUFFER_CONTEXT setup before calling + * this function. + * The NBL should already have ref to itself so that during copy + * it will not be freed. + * Currently this API assert there is only one NB in an NBL, it needs + * to be fixed if we receive multiple NBs in an NBL. + * -------------------------------------------------------------------------- + */ +PNET_BUFFER_LIST +OvsTcpSegmentNBL(PVOID ovsContext, + PNET_BUFFER_LIST nbl, + POVS_PACKET_HDR_INFO hdrInfo, + UINT32 mss, + UINT32 headRoom) +{ + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; +#ifdef DBG + POVS_NBL_POOL ovsPool = &context->ovsPool; +#endif + POVS_BUFFER_CONTEXT dstCtx, srcCtx; + UINT32 size, hdrSize, seqNumber; + PNET_BUFFER_LIST newNbl; + PNET_BUFFER nb, newNb; + NDIS_STATUS status; + UINT16 segmentSize; + ULONG copiedSize; + + srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) { + OVS_LOG_INFO("src nbl must have ctx initialized"); + ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC); + return NULL; + } + + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + ASSERT(NET_BUFFER_NEXT_NB(nb) == NULL); + + /* Figure out the segment header size */ + status = GetSegmentHeaderInfo(nbl, hdrInfo, &hdrSize, &seqNumber); + if (status != NDIS_STATUS_SUCCESS) { + OVS_LOG_INFO("Cannot parse NBL header"); + return NULL; + } + + size = NET_BUFFER_DATA_LENGTH(nb) - hdrSize; + + /* XXX add to ovsPool counters? */ + newNbl = NdisAllocateFragmentNetBufferList(nbl, NULL, + NULL, hdrSize, mss, hdrSize + headRoom , 0, 0); + if (newNbl == NULL) { + return NULL; + } + + /* Now deal with TCP payload */ + for (newNb = NET_BUFFER_LIST_FIRST_NB(newNbl); newNb != NULL; + newNb = NET_BUFFER_NEXT_NB(newNb)) { + segmentSize = (size > mss ? mss : size) & 0xffff; + if (headRoom) { + NdisAdvanceNetBufferDataStart(newNb, headRoom, FALSE, NULL); + } + + /* Now copy the eth/IP/TCP header and fix up */ + status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, hdrSize, nb, 0, + &copiedSize); + if (status != NDIS_STATUS_SUCCESS || hdrSize != copiedSize) { + goto nblcopy_error; + } + + status = FixSegmentHeader(newNb, segmentSize, seqNumber); + if (status != NDIS_STATUS_SUCCESS) { + goto nblcopy_error; + } + + + /* Move on to the next segment */ + size -= segmentSize; + seqNumber += segmentSize; + } + + status = OvsAllocateNBLContext(context, newNbl); + if (status != NDIS_STATUS_SUCCESS) { + goto nblcopy_error; + } + + status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, hdrSize + headRoom, FALSE); + if (status != NDIS_STATUS_SUCCESS) { + goto nbl_context_error; + } + + newNbl->ParentNetBufferList = nbl; + + /* Remember it's a fragment NBL so we can free it properly */ + dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl); + ASSERT(dstCtx != NULL); + dstCtx->flags = OVS_BUFFER_FRAGMENT | OVS_BUFFER_PRIVATE_CONTEXT | + OVS_BUFFER_PRIVATE_FORWARD_CONTEXT | OVS_BUFFER_SEND_BUFFER; + dstCtx->refCount = 1; + dstCtx->magic = OVS_CTX_MAGIC; + dstCtx->dataOffsetDelta = hdrSize + headRoom; + + InterlockedIncrement((LONG volatile *)&srcCtx->refCount); +#ifdef DBG + InterlockedIncrement((LONG volatile *)&ovsPool->fragNBLCount); + + OvsDumpNetBufferList(nbl); + OvsDumpForwardingDetails(nbl); + + OvsDumpNetBufferList(newNbl); + OvsDumpForwardingDetails(newNbl); +#endif + OVS_LOG_TRACE("Segment nbl %p to newNbl: %p", nbl, newNbl); + return newNbl; + +nbl_context_error: + OvsFreeNBLContext(context, newNbl); +nblcopy_error: +#ifdef DBG + InterlockedDecrement((LONG volatile *)&ovsPool->fragNBLCount); +#endif + NdisFreeFragmentNetBufferList(newNbl, hdrSize + headRoom, 0); + return NULL; +} + + +/* + * -------------------------------------------------------------------------- + * OvsFullCopyToMultipleNBLs -- + * + * Copy NBL to multiple NBLs, each NB will have its own NBL + * -------------------------------------------------------------------------- + */ +PNET_BUFFER_LIST +OvsFullCopyToMultipleNBLs(PVOID ovsContext, + PNET_BUFFER_LIST nbl, + UINT32 headRoom, + BOOLEAN copyNblInfo) +{ + + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + PNET_BUFFER_LIST firstNbl, currNbl, newNbl; + PNET_BUFFER nb; + POVS_BUFFER_CONTEXT srcCtx; + + srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) { + OVS_LOG_INFO("src nbl must have ctx initialized"); + ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC); + return NULL; + } + + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + newNbl = OvsCopySinglePacketNBL(context, nbl, nb, headRoom, copyNblInfo); + + if (newNbl == NULL || NET_BUFFER_NEXT_NB(nb) == NULL) { + return newNbl; + } else { + firstNbl = newNbl; + currNbl = newNbl; + } + + while (nb) { + newNbl = OvsCopySinglePacketNBL(context, nbl, nb, headRoom, + copyNblInfo); + if (newNbl == NULL) { + goto copymultiple_error; + } + NET_BUFFER_LIST_NEXT_NBL(currNbl) = newNbl; + currNbl = newNbl; + nb = NET_BUFFER_NEXT_NB(nb); + } + return firstNbl; + +copymultiple_error: + while (firstNbl) { + currNbl = firstNbl; + firstNbl = NET_BUFFER_LIST_NEXT_NBL(firstNbl); + NET_BUFFER_LIST_NEXT_NBL(currNbl) = NULL; + OvsCompleteNBL(context, currNbl, TRUE); + } + return NULL; + +} + + +/* + * -------------------------------------------------------------------------- + * OvsCompleteNBL -- + * + * This function tries to free the NBL allocated by OVS buffer + * management module. If it trigger the completion of the parent + * NBL, it will recursively call itself. If it trigger the completion + * of external NBL, it will be returned to the caller. The caller + * is responsible to call API to return to upper layer. + * -------------------------------------------------------------------------- + */ +PNET_BUFFER_LIST +OvsCompleteNBL(POVS_SWITCH_CONTEXT context, + PNET_BUFFER_LIST nbl, + BOOLEAN updateRef) +{ + POVS_BUFFER_CONTEXT ctx; + UINT16 flags; + PNET_BUFFER_LIST parent; + NDIS_STATUS status; + NDIS_HANDLE poolHandle; + LONG value; + POVS_NBL_POOL ovsPool = &context->ovsPool; + PNET_BUFFER nb; + + + ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + + ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC); + + OVS_LOG_TRACE("Enter: nbl: %p, ctx: %p, refCount: %d, updateRef:%d", + nbl, ctx, ctx->refCount, updateRef); + + if (updateRef) { + value = InterlockedDecrement((LONG volatile *)&ctx->refCount); + if (value != 0) { + return NULL; + } + } else { + /* + * This is a special case, the refCount must be zero + */ + ASSERT(ctx->refCount == 0); + } + + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + + flags = ctx->flags; + if (!(flags & OVS_BUFFER_FRAGMENT) && + NET_BUFFER_DATA_LENGTH(nb) != ctx->origDataLength) { + UINT32 diff; + if (NET_BUFFER_DATA_LENGTH(nb) < ctx->origDataLength) { + diff = ctx->origDataLength -NET_BUFFER_DATA_LENGTH(nb); + status = NdisRetreatNetBufferListDataStart(nbl, diff, 0, + NULL, NULL); + ASSERT(status == NDIS_STATUS_SUCCESS); + } else { + diff = NET_BUFFER_DATA_LENGTH(nb) - ctx->origDataLength; + NdisAdvanceNetBufferListDataStart(nbl, diff, TRUE, NULL); + } + } + + if (ctx->flags & OVS_BUFFER_PRIVATE_CONTEXT) { + NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT)); + } + + if (flags & OVS_BUFFER_NEED_COMPLETE) { + /* + * return to caller for completion + */ +#ifdef DBG + InterlockedDecrement((LONG volatile *)&ovsPool->sysNBLCount); +#endif + return nbl; + } + + if (flags & OVS_BUFFER_PRIVATE_FORWARD_CONTEXT) { + context->NdisSwitchHandlers. + FreeNetBufferListForwardingContext(ovsPool->ndisContext, nbl); + } + + if (flags & (OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA)) { + PNET_BUFFER nb = NET_BUFFER_LIST_FIRST_NB(nbl); + while (nb) { + PMDL mdl = NET_BUFFER_FIRST_MDL(nb); + NET_BUFFER_FIRST_MDL(nb) = NULL; + ASSERT(mdl->Next == NULL); + OvsFreeMDLAndData(mdl); + nb = NET_BUFFER_NEXT_NB(nb); + } + } + + if (flags & OVS_BUFFER_PRIVATE_NET_BUFFER) { + PNET_BUFFER nb, nextNb; + + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + while (nb) { + nextNb = NET_BUFFER_NEXT_NB(nb); + NdisFreeNetBuffer(nb); +#ifdef DBG + InterlockedDecrement((LONG volatile *)&ovsPool->nbCount); +#endif + nb = nextNb; + } + NET_BUFFER_LIST_FIRST_NB(nbl) = NULL; + } + + parent = nbl->ParentNetBufferList; + + poolHandle = NdisGetPoolFromNetBufferList(nbl); + if (flags & OVS_BUFFER_FROM_FIX_SIZE_POOL) { + ASSERT(poolHandle == ovsPool->fixSizePool); +#ifdef DBG + InterlockedDecrement((LONG volatile *)&ovsPool->fixNBLCount); +#endif + NdisFreeNetBufferList(nbl); + } else if (flags & OVS_BUFFER_FROM_ZERO_SIZE_POOL) { + ASSERT(poolHandle == ovsPool->zeroSizePool); +#ifdef DBG + InterlockedDecrement((LONG volatile *)&ovsPool->zeroNBLCount); +#endif + NdisFreeNetBufferList(nbl); + } else if (flags & OVS_BUFFER_FROM_NBL_ONLY_POOL) { + ASSERT(poolHandle == ovsPool->nblOnlyPool); +#ifdef DBG + InterlockedDecrement((LONG volatile *)&ovsPool->nblOnlyCount); +#endif + NdisFreeCloneNetBufferList(nbl, 0); + } else if (flags & OVS_BUFFER_FRAGMENT) { + OVS_LOG_TRACE("Free fragment %p parent %p", nbl, parent); +#ifdef DBG + InterlockedDecrement((LONG volatile *)&ovsPool->fragNBLCount); +#endif + NdisFreeFragmentNetBufferList(nbl, ctx->dataOffsetDelta, 0); + } + + if (parent != NULL) { + ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(parent); + ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC); + value = InterlockedDecrement((LONG volatile *)&ctx->refCount); + if (value == 0) { + return OvsCompleteNBL(context, parent, FALSE); + } + } + return NULL; +} + +/* + * -------------------------------------------------------------------------- + * OvsSetCtxSourcePortNo -- + * Setter function which stores the source port of an NBL in the NBL + * Context Info. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsSetCtxSourcePortNo(PNET_BUFFER_LIST nbl, + UINT32 portNo) +{ + POVS_BUFFER_CONTEXT ctx; + ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + if (ctx == NULL) { + ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC); + return STATUS_INVALID_PARAMETER; + } + + ctx->srcPortNo = portNo; + return NDIS_STATUS_SUCCESS; +} + +/* + * -------------------------------------------------------------------------- + * OvsGetCtxSourcePortNo -- + * Get source port of an NBL from its Context Info. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsGetCtxSourcePortNo(PNET_BUFFER_LIST nbl, + UINT32 *portNo) +{ + POVS_BUFFER_CONTEXT ctx; + ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + if (ctx == NULL || portNo == NULL) { + ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC); + return STATUS_INVALID_PARAMETER; + } + *portNo = ctx->srcPortNo; + return NDIS_STATUS_SUCCESS; +} diff --git a/datapath-windows/ovsext/OvsBufferMgmt.h b/datapath-windows/ovsext/OvsBufferMgmt.h new file mode 100644 index 000000000..9c00b1b5c --- /dev/null +++ b/datapath-windows/ovsext/OvsBufferMgmt.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_BUFFER_MGMT_H_ +#define __OVS_BUFFER_MGMT_H_ 1 + +#define MEM_ALIGN MEMORY_ALLOCATION_ALIGNMENT +#define MEM_ALIGN_SIZE(_x) ((MEM_ALIGN - 1 + (_x))/MEM_ALIGN * MEM_ALIGN) +#define OVS_CTX_MAGIC 0xabcd + +#define OVS_DEFAULT_NBL_CONTEXT_SIZE MEM_ALIGN_SIZE(64) +#define OVS_DEFAULT_NBL_CONTEXT_FILL \ + (OVS_DEFAULT_NBL_CONTEXT_SIZE - sizeof (OVS_BUFFER_CONTEXT)) + +#define OVS_DEFAULT_DATA_SIZE 256 +#define OVS_DEFAULT_HEADROOM_SIZE 128 +#define OVS_FIX_NBL_DATA_SIZE (OVS_DEFAULT_DATA_SIZE + OVS_DEFAULT_HEADROOM_SIZE) + +/* Default we copy 18 bytes, to make sure ethernet header and vlan is in + * continuous buffer */ +#define OVS_DEFAULT_COPY_SIZE 18 + +enum { + OVS_BUFFER_NEED_COMPLETE = BIT16(0), + OVS_BUFFER_PRIVATE_MDL = BIT16(1), + OVS_BUFFER_PRIVATE_DATA = BIT16(2), + OVS_BUFFER_PRIVATE_NET_BUFFER = BIT16(3), + OVS_BUFFER_PRIVATE_FORWARD_CONTEXT = BIT16(4), + OVS_BUFFER_PRIVATE_CONTEXT = BIT16(5), + OVS_BUFFER_FROM_FIX_SIZE_POOL = BIT16(6), + OVS_BUFFER_FROM_ZERO_SIZE_POOL = BIT16(7), + OVS_BUFFER_FROM_NBL_ONLY_POOL = BIT16(8), + OVS_BUFFER_RECV_BUFFER = BIT16(9), + OVS_BUFFER_SEND_BUFFER = BIT16(10), + OVS_BUFFER_FRAGMENT = BIT16(11), +}; + +typedef union _OVS_BUFFER_CONTEXT { + struct { + UINT16 magic; + UINT16 flags; + UINT32 srcPortNo; + UINT32 refCount; + union { + UINT32 origDataLength; + UINT32 dataOffsetDelta; + }; + }; + + UINT64 value[MEM_ALIGN_SIZE(16) >> 3]; +} OVS_BUFFER_CONTEXT, *POVS_BUFFER_CONTEXT; + + +typedef struct _OVS_NBL_POOL { + NDIS_SWITCH_CONTEXT ndisContext; + NDIS_HANDLE ndisHandle; + NDIS_HANDLE fixSizePool; // data size of 256 + NDIS_HANDLE zeroSizePool; // no data, NBL + NB + Context + NDIS_HANDLE nblOnlyPool; // NBL + context for clone + NDIS_HANDLE nbPool; // NB for clone +#ifdef DBG + LONG fixNBLCount; + LONG zeroNBLCount; + LONG nblOnlyCount; + LONG nbCount; + LONG sysNBLCount; + LONG fragNBLCount; +#endif +} OVS_NBL_POOL, *POVS_NBL_POOL; + + +NDIS_STATUS OvsInitBufferPool(PVOID context); +VOID OvsCleanupBufferPool(PVOID context); + +PNET_BUFFER_LIST OvsAllocateFixSizeNBL(PVOID context, + UINT32 size, + UINT32 headRoom); +PNET_BUFFER_LIST OvsAllocateVariableSizeNBL(PVOID context, + UINT32 size, + UINT32 headRoom); + +POVS_BUFFER_CONTEXT OvsInitExternalNBLContext(PVOID context, + PNET_BUFFER_LIST nbl, + BOOLEAN isRecv); + +PNET_BUFFER_LIST OvsPartialCopyNBL(PVOID context, + PNET_BUFFER_LIST nbl, + UINT32 copySize, + UINT32 headRoom, + BOOLEAN copyNblInfo); +PNET_BUFFER_LIST OvsPartialCopyToMultipleNBLs(PVOID context, + PNET_BUFFER_LIST nbl, + UINT32 copySize, + UINT32 headRoom, + BOOLEAN copyNblInfo); +PNET_BUFFER_LIST OvsFullCopyNBL(PVOID context, PNET_BUFFER_LIST nbl, + UINT32 headRoom, BOOLEAN copyNblInfo); +PNET_BUFFER_LIST OvsTcpSegmentNBL(PVOID context, + PNET_BUFFER_LIST nbl, + POVS_PACKET_HDR_INFO hdrInfo, + UINT32 MSS, + UINT32 headRoom); +PNET_BUFFER_LIST OvsFullCopyToMultipleNBLs(PVOID context, + PNET_BUFFER_LIST nbl, UINT32 headRoom, BOOLEAN copyNblInfo); +PNET_BUFFER_LIST OvsCompleteNBL(PVOID context, PNET_BUFFER_LIST nbl, + BOOLEAN updateRef); +NDIS_STATUS OvsSetCtxSourcePortNo(PNET_BUFFER_LIST nbl, UINT32 portNo); + +NDIS_STATUS OvsGetCtxSourcePortNo(PNET_BUFFER_LIST nbl, UINT32 *portNo); + +#endif /* __OVS_BUFFER_MGMT_H_ */ diff --git a/datapath-windows/ovsext/OvsChecksum.c b/datapath-windows/ovsext/OvsChecksum.c new file mode 100644 index 000000000..e19237389 --- /dev/null +++ b/datapath-windows/ovsext/OvsChecksum.c @@ -0,0 +1,578 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#include "OvsChecksum.h" +#include "OvsFlow.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_CHECKSUM +#include "OvsDebug.h" +#include "OvsPacketParser.h" + +#ifndef htons +#define htons(_x) (((UINT16)(_x) >> 8) + (((UINT16)(_x) << 8) & 0xff00)) +#endif + +#ifndef swap64 +#define swap64(_x) ((((UINT64)(_x) >> 8) & 0x00ff00ff00ff00ff) + \ + (((UINT64)(_x) << 8) & 0xff00ff00ff00ff00)) +#endif + +#define fold64(_x) \ + _x = ((_x) >> 32) + ((_x) & 0xffffffff); \ + _x = (UINT32)(((_x) >> 32) + (_x)); \ + _x = ((_x) >> 16) + ((_x) & 0xffff); \ + _x = (UINT16)(((_x) >> 16) + (_x)) + +#define fold32(_x) \ + _x = ((_x) >> 16) + ((_x) & 0xffff); \ + _x = (UINT16)(((_x) >> 16) + (_x)) + + +/* + *---------------------------------------------------------------------------- + * CalculateOnesComplement -- + * + * Given the start address and buffer length, calculate the 1's complement + * This routine can be used when multiple buffers are used for a packets. + * + * PLEASE NOTE, even though the last parameter is UINT64, but the assumption + * is it will not overflowed after adding the extra data. + * ------------------------------------------------ + * + * Result: + * As name indicate, the final data is not 1's complemnent + *---------------------------------------------------------------------------- + */ +UINT64 +CalculateOnesComplement(UINT8 *start, + UINT16 totalLength, + UINT64 initial, + BOOLEAN isEvenStart) +{ + UINT64 sum = 0, val; + UINT64 *src = (UINT64 *)start; + union { + UINT32 val; + UINT8 b8[4]; + } tmp; + + while (totalLength > 7) { + val = *src; + sum += (val >> 32) + (val & 0xffffffff); + src++; + totalLength -= 8; + } + if (totalLength > 3) { + sum += *(UINT32 *)src; + src = (UINT64 *)((UINT8 *)src + 4); + totalLength -= 4; + } + start = (UINT8 *)src; + tmp.val = 0; + switch (totalLength) { + case 3: + tmp.b8[2] = start[2]; + case 2: + tmp.b8[1] = start[1]; + case 1: + tmp.b8[0] = start[0]; + sum += tmp.val; + } + sum = (isEvenStart ? sum : swap64(sum)) + initial; + return sum; +} + +/* + *---------------------------------------------------------------------------- + * CalculateChecksum -- + * + * Given the start point, and length, calculate the checksum + * as 1's complement of 1's comlement. + * + * This assume the checksum field is initailized properly. + * + * Input Parameter: + * ptr: point to the data to be checksumed + * totalLength: total length of the data + * initial: inital value to remit the checksum. Please note this + * value should be network byte order value. + * + * The last parameter may be useful where you don't want to set + * checksum field to zero, in that case you can pass ~checksum, + * this is equivalent of set checksum field to zero. + * + * Result: + * The result can be assigned to checksum field directly. + *---------------------------------------------------------------------------- + */ +UINT16 +CalculateChecksum(UINT8 *ptr, + UINT16 totalLength, + UINT16 initial) +{ + UINT64 sum = CalculateOnesComplement(ptr, totalLength, initial, TRUE); + fold64(sum); + return (UINT16)~sum; +} + +/* + *---------------------------------------------------------------------------- + * CopyAndCalculateOnesComplement -- + * + * Given the start address and buffer length, calculate the 1's complement + * at same time, copt the data from src to dst. + * + * This routine can be used when multiple buffers are used for a packets. + * + * PLEASE NOTE, even though the last parameter is UINT64, but the assumption + * is it will not overflowed after adding the extra data. + * ------------------------------------------------ + * + * Result: + * As name indicate, the final data is not 1's complemnent + *---------------------------------------------------------------------------- + */ +UINT64 +CopyAndCalculateOnesComplement(UINT8 *dst, + UINT8 *src, + UINT16 length, + UINT64 initial, + BOOLEAN isEvenStart) +{ + UINT64 sum =0, val; + UINT64 *src64, *dst64; + union { + UINT32 val; + UINT8 b8[4]; + } tmp; + + src64 = (UINT64 *)src; + dst64 = (UINT64 *)dst; + + while (length > 7) { + val = *src64; + *dst64 = val; + sum += (val >> 32) + (val & 0xffffffff); + src64++; + dst64++; + length -= 8; + } + + if (length > 3) { + val = *(UINT32 *)src64; + *(UINT32 *)dst64 = (UINT32)val; + sum += (UINT32)val; + dst64 = (UINT64 *)((UINT8 *)dst64 + 4); + src64 = (UINT64 *)((UINT8 *)src64 + 4); + length -= 4; + } + src = (UINT8 *)src64; + dst = (UINT8 *)dst64; + tmp.val = 0; + switch (length) { + case 3: + dst[2] = src[2]; + tmp.b8[2] = src[2]; + case 2: + dst[1] = src[1]; + tmp.b8[1] = src[1]; + case 1: + dst[0] = src[0]; + tmp.b8[0] = src[0]; + sum += tmp.val; + } + sum = (isEvenStart ? sum : swap64(sum)) + initial; + return sum; +} + +/* + *---------------------------------------------------------------------------- + * CopyAndCalculateChecksum -- + * + * This is similar to CalculateChecksum, except it will also copy data to + * destination address. + *---------------------------------------------------------------------------- + */ +UINT16 +CopyAndCalculateChecksum(UINT8 *dst, + UINT8 *src, + UINT16 length, + UINT16 initial) +{ + + UINT64 sum = CopyAndCalculateOnesComplement(dst, src, length, initial, + TRUE); + fold64(sum); + return (UINT16)~sum; +} + + +/* + *---------------------------------------------------------------------------- + * IPChecksum -- + * + * Give IP header, calculate the IP checksum. + * We assume IP checksum field is initialized properly + * + * Input Pramater: + * ipHdr: IP header start point + * length: IP header length (potentially include IP options) + * initial: same as CalculateChecksum + * + * Result: + * The result is already 1's complement, so can be assigned + * to checksum field directly + *---------------------------------------------------------------------------- + */ +UINT16 +IPChecksum(UINT8 *ipHdr, + UINT16 length, + UINT16 initial) +{ + UINT32 sum = initial; + UINT16 *ptr = (UINT16 *)ipHdr; + ASSERT((length & 0x3) == 0); + while (length > 1) { + sum += ptr[0]; + ptr++; + length -= 2; + } + fold32(sum); + return (UINT16)~sum; +} + +/* + *---------------------------------------------------------------------------- + * IPPseudoChecksum -- + * + * Give src and dst IP address, protocol value and total + * upper layer length(not include IP header, but include + * upller layer protocol header, for example it include + * TCP header for TCP checksum), calculate the pseudo + * checksum, please note this checksum is just 1's complement + * addition. + * + * Input Parameter: + * src: please note it is in network byte order + * dst: same as src + * protocol: protocol value in IP header + * totalLength: total length of upper layer data including + * header. + * + * Result: + * + * This value should be put in TCP checksum field before + * calculating TCP checksum using CalculateChecksum with + * initial value of 0. + *---------------------------------------------------------------------------- + */ +UINT16 +IPPseudoChecksum(UINT32 *src, + UINT32 *dst, + UINT8 protocol, + UINT16 totalLength) +{ + UINT32 sum = (UINT32)htons(totalLength) + htons(protocol); + sum += (*src >> 16) + (*src & 0xffff); + sum += (*dst >> 16) + (*dst & 0xffff); + fold32(sum); + return (UINT16)sum; +} + +/* + *---------------------------------------------------------------------------- + * IPv6PseudoChecksum -- + * + * Given IPv6 src and dst address, upper layer protocol and total + * upper layer protocol data length including upper layer header + * part, calculate the pseudo checksum for upper layer protocol + * checksum. + * + * please note this checksum is just 1's complement addition. + * + * Input Parameter: + * src: src IPv6 address in network byte order + * dst: dst IPv6 address. + * protocol: upper layer protocol + * totalLength: total length of upper layer data. Please note this is + * in host byte order. + * + * Result: + * + * Place in upper layer checksum field before calculate upper layer + * checksum. + *---------------------------------------------------------------------------- + */ +UINT16 +IPv6PseudoChecksum(UINT32 *src, + UINT32 *dst, + UINT8 protocol, + UINT16 totalLength) +{ + UINT64 sum = (UINT32)htons(totalLength) + htons(protocol); + sum += (UINT64)src[0] + src[1] + src[2] + src[3]; + sum += (UINT64)dst[0] + dst[1] + dst[2] + dst[3]; + fold64(sum); + return (UINT16)sum; +} + +/* + *---------------------------------------------------------------------------- + * ChecksumUpdate32 -- + * + * Given old checksum value (as it is in checksum field), + * prev value of the relevant field in network byte order + * new value of the relevant field in the network byte order + * calculate the new checksum. + * Please check relevant RFC for reference. + * + * Input Pramater: + * oldSum: old checksum value in checksum field + * prev: previous value of relevant 32 bit feld in network + * byte order. + * new: new value of the relevant 32 bit field in network + * byte order. + * + * Result: + * new checksum value to be placed in the checksum field. + *---------------------------------------------------------------------------- + */ +UINT16 +ChecksumUpdate32(UINT16 oldSum, + UINT32 prev, + UINT32 newValue) +{ + UINT32 sum = ~prev; + sum = (sum >> 16) + (sum & 0xffff); + sum += (newValue >> 16) + (newValue & 0xffff); + sum += (UINT16)~oldSum; + fold32(sum); + return (UINT16)~sum; +} + + +/* + *---------------------------------------------------------------------------- + * ChecksumUpdate16 -- + * + * Given old checksum value (as it is in checksum field), + * prev value of the relevant field in network byte order + * new value of the relevant field in the network byte order + * calculate the new checksum. + * Please check relevant RFC for reference. + * + * Input Pramater: + * oldSum: old checksum value in checksum field + * prev: previous value of relevant 32 bit feld in network + * byte order. + * new: new value of the relevant 32 bit field in network + * byte order. + * + * Result: + * new checksum value to be placed in the checksum field. + *---------------------------------------------------------------------------- + */ +UINT16 +ChecksumUpdate16(UINT16 oldSum, + UINT16 prev, + UINT16 newValue) +{ + UINT32 sum = (UINT16)~oldSum; + sum += (UINT32)((UINT16)~prev) + newValue; + fold32(sum); + return (UINT16)~sum; +} + +/* + *---------------------------------------------------------------------------- + * CalculateChecksumNB -- + * + * Calculates checksum over a length of bytes contained in an NB. + * + * nb : NB which contains the packet bytes. + * csumDataLen : Length of bytes to be checksummed. + * offset : offset to the first bytes of the data stream to be + * checksumed. + * + * Result: + * return 0, if there is a failure. + *---------------------------------------------------------------------------- + */ +UINT16 +CalculateChecksumNB(const PNET_BUFFER nb, + UINT16 csumDataLen, + UINT32 offset) +{ + ULONG mdlLen; + UINT16 csLen; + PUCHAR src; + UINT64 csum = 0; + PMDL currentMdl; + ULONG firstMdlLen; + /* Running count of bytes in remainder of the MDLs including current. */ + ULONG packetLen; + + if ((nb == NULL) || (csumDataLen == 0) + || (offset >= NET_BUFFER_DATA_LENGTH(nb)) + || (offset + csumDataLen > NET_BUFFER_DATA_LENGTH(nb))) { + OVS_LOG_ERROR("Invalid parameters - csum length %u, offset %u," + "pkt%s len %u", csumDataLen, offset, nb? "":"(null)", + nb? NET_BUFFER_DATA_LENGTH(nb) : 0); + return 0; + } + + currentMdl = NET_BUFFER_CURRENT_MDL(nb); + packetLen = NET_BUFFER_DATA_LENGTH(nb); + firstMdlLen = + MmGetMdlByteCount(currentMdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb); + + firstMdlLen = MIN(firstMdlLen, packetLen); + if (offset < firstMdlLen) { + src = (PUCHAR) MmGetSystemAddressForMdlSafe(currentMdl, LowPagePriority); + if (!src) { + return 0; + } + src += (NET_BUFFER_CURRENT_MDL_OFFSET(nb) + offset); + mdlLen = firstMdlLen - offset; + packetLen -= firstMdlLen; + ASSERT((INT)packetLen >= 0); + } else { + offset -= firstMdlLen; + packetLen -= firstMdlLen; + ASSERT((INT)packetLen >= 0); + currentMdl = NDIS_MDL_LINKAGE(currentMdl); + mdlLen = MmGetMdlByteCount(currentMdl); + mdlLen = MIN(mdlLen, packetLen); + + while (offset >= mdlLen) { + offset -= mdlLen; + packetLen -= mdlLen; + ASSERT((INT)packetLen >= 0); + currentMdl = NDIS_MDL_LINKAGE(currentMdl); + mdlLen = MmGetMdlByteCount(currentMdl); + mdlLen = MIN(mdlLen, packetLen); + } + + src = (PUCHAR)MmGetSystemAddressForMdlSafe(currentMdl, LowPagePriority); + if (!src) { + return 0; + } + + src += offset; + mdlLen -= offset; + } + + while (csumDataLen && (currentMdl != NULL)) { + ASSERT(mdlLen < 65536); + csLen = MIN((UINT16) mdlLen, csumDataLen); + //XXX Not handling odd bytes yet. + ASSERT(((csLen & 0x1) == 0) || csumDataLen <= mdlLen); + + csum = CalculateOnesComplement(src, csLen, csum, TRUE); + fold64(csum); + + csumDataLen -= csLen; + currentMdl = NDIS_MDL_LINKAGE(currentMdl); + if (csumDataLen && currentMdl) { + src = MmGetSystemAddressForMdlSafe(currentMdl, LowPagePriority); + if (!src) { + return 0; + } + + mdlLen = MmGetMdlByteCount(currentMdl); + mdlLen = MIN(mdlLen, packetLen); + /* packetLen does not include the current MDL from here on. */ + packetLen -= mdlLen; + ASSERT((INT)packetLen >= 0); + } + } + + ASSERT(csumDataLen == 0); + ASSERT((csum & ~0xffff) == 0); + return (UINT16) ~csum; +} + +/* + * -------------------------------------------------------------------------- + * OvsValidateIPChecksum + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsValidateIPChecksum(PNET_BUFFER_LIST curNbl, + POVS_PACKET_HDR_INFO hdrInfo) +{ + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; + uint16_t checksum, hdrChecksum; + struct IPHdr ip_storage; + const IPHdr *ipHdr; + + if (!hdrInfo->isIPv4) { + return NDIS_STATUS_SUCCESS; + } + + /* First check if NIC has indicated checksum failure. */ + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, + TcpIpChecksumNetBufferListInfo); + if (csumInfo.Receive.IpChecksumFailed) { + return NDIS_STATUS_FAILURE; + } + + /* Next, check if the NIC did not validate the RX checksum. */ + if (!csumInfo.Receive.IpChecksumSucceeded) { + ipHdr = OvsGetIp(curNbl, hdrInfo->l3Offset, &ip_storage); + if (ipHdr) { + ip_storage = *ipHdr; + hdrChecksum = ipHdr->check; + ip_storage.check = 0; + checksum = IPChecksum((uint8 *)&ip_storage, ipHdr->ihl * 4, 0); + if (checksum != hdrChecksum) { + return NDIS_STATUS_FAILURE; + } + } + } + return NDIS_STATUS_SUCCESS; +} + +/* + *---------------------------------------------------------------------------- + * OvsValidateUDPChecksum + *---------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsValidateUDPChecksum(PNET_BUFFER_LIST curNbl, BOOLEAN udpCsumZero) +{ + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; + + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo); + + if (udpCsumZero) { + /* Zero is valid checksum. */ + csumInfo.Receive.UdpChecksumFailed = 0; + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value; + return NDIS_STATUS_SUCCESS; + } + + /* First check if NIC has indicated UDP checksum failure. */ + if (csumInfo.Receive.UdpChecksumFailed) { + return NDIS_STATUS_INVALID_PACKET; + } + + return NDIS_STATUS_SUCCESS; +} diff --git a/datapath-windows/ovsext/OvsChecksum.h b/datapath-windows/ovsext/OvsChecksum.h new file mode 100644 index 000000000..d0070d2f5 --- /dev/null +++ b/datapath-windows/ovsext/OvsChecksum.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_CHECKSUM_H_ +#define __OVS_CHECKSUM_H_ 1 + +typedef union _OVS_PACKET_HDR_INFO *POVS_PACKET_HDR_INFO; + +UINT16 CalculateChecksum(UINT8 *ptr, UINT16 length, UINT16 initial); +UINT16 CopyAndCalculateChecksum(UINT8 *dst, UINT8 *src, UINT16 length, + UINT16 initial); +UINT16 IPChecksum(UINT8 *ipHdr, UINT16 length, UINT16 initial); +UINT16 IPPseudoChecksum(UINT32 *src, UINT32 *dst, UINT8 protocol, + UINT16 totalLength); +UINT16 IPv6PseudoChecksum(UINT32 *src, UINT32 *dst, UINT8 protocol, + UINT16 totalLength); +UINT16 ChecksumUpdate32(UINT16 oldSum, UINT32 prev, UINT32 newValue); +UINT16 ChecksumUpdate16(UINT16 oldSum, UINT16 prev, UINT16 newValue); +UINT16 CalculateChecksumNB(const PNET_BUFFER nb, UINT16 csumDataLen, + UINT32 offset); +NDIS_STATUS OvsValidateIPChecksum(PNET_BUFFER_LIST curNbl, + POVS_PACKET_HDR_INFO hdrInfo); +NDIS_STATUS OvsValidateUDPChecksum(PNET_BUFFER_LIST curNbl, + BOOLEAN udpCsumZero); + +#endif /* __OVS_CHECKSUM_H_ */ diff --git a/datapath-windows/ovsext/OvsDebug.c b/datapath-windows/ovsext/OvsDebug.c new file mode 100644 index 000000000..8610008df --- /dev/null +++ b/datapath-windows/ovsext/OvsDebug.c @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" + +#include "OvsDebug.h" +#ifdef DBG +#define OVS_DBG_DEFAULT OVS_DBG_INFO +#else +#define OVS_DBG_DEFAULT OVS_DBG_ERROR +#endif + +UINT32 ovsLogFlags = 0xffffffff; +UINT32 ovsLogLevel = OVS_DBG_DEFAULT; + +#define OVS_LOG_BUFFER_SIZE 384 + +/* + * -------------------------------------------------------------------------- + * OvsLog -- + * Utility function to log to the Windows debug console. + * -------------------------------------------------------------------------- + */ +VOID +OvsLog(UINT32 level, + UINT32 flag, + CHAR *funcName, + UINT32 line, + CHAR *format, + ...) +{ + va_list args; + CHAR buf[OVS_LOG_BUFFER_SIZE]; + + if (level > ovsLogLevel || (ovsLogFlags & flag) == 0) { + return; + } + + buf[0] = 0; + va_start(args, format); + RtlStringCbVPrintfA(buf, sizeof (buf), format, args); + va_end(args); + + DbgPrintEx(DPFLTR_IHVNETWORK_ID, level, "%s:%lu %s\n", funcName, line, buf); +} diff --git a/datapath-windows/ovsext/OvsDebug.h b/datapath-windows/ovsext/OvsDebug.h new file mode 100644 index 000000000..3705d1e9b --- /dev/null +++ b/datapath-windows/ovsext/OvsDebug.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_DEBUG_H_ +#define __OVS_DEBUG_H_ 1 + +#define OVS_DBG_INIT BIT32(0) +#define OVS_DBG_SWITCH BIT32(1) +#define OVS_DBG_VPORT BIT32(2) +#define OVS_DBG_FLOW BIT32(3) +#define OVS_DBG_QOS BIT32(4) +#define OVS_DBG_USER BIT32(5) +#define OVS_DBG_EXECUTE BIT32(6) +#define OVS_DBG_EVENT BIT32(7) +#define OVS_DBG_DISPATCH BIT32(8) +#define OVS_DBG_OID BIT32(9) +#define OVS_DBG_STATUS BIT32(10) +#define OVS_DBG_CHECKSUM BIT32(11) +#define OVS_DBG_VXLAN BIT32(12) +#define OVS_DBG_GRE BIT32(13) +#define OVS_DBG_GRE64 BIT32(14) +#define OVS_DBG_ACTION BIT32(15) +#define OVS_DBG_IOCTL BIT32(16) +#define OVS_DBG_PROPERTY BIT32(17) +#define OVS_DBG_IPHELPER BIT32(18) +#define OVS_DBG_BUFMGMT BIT32(19) +#define OVS_DBG_OTHERS BIT32(21) + +#define OVS_DBG_RESERVED BIT32(31) +//Please add above OVS_DBG_RESERVED. + +#define OVS_DBG_ERROR DPFLTR_ERROR_LEVEL +#define OVS_DBG_WARN DPFLTR_WARNING_LEVEL +#define OVS_DBG_TRACE DPFLTR_TRACE_LEVEL +#define OVS_DBG_INFO DPFLTR_INFO_LEVEL +#define OVS_DBG_LOUD (DPFLTR_INFO_LEVEL + 1) + + + +VOID OvsLog(UINT32 level, UINT32 flag, CHAR *funcName, + UINT32 line, CHAR *format, ...); + + +#define OVS_LOG_LOUD(_format, ...) \ + OvsLog(OVS_DBG_LOUD, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__) + +#define OVS_LOG_INFO(_format, ...) \ + OvsLog(OVS_DBG_INFO, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__) + +#define OVS_LOG_TRACE(_format, ...) \ + OvsLog(OVS_DBG_TRACE, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__) + +#define OVS_LOG_ERROR(_format, ...) \ + OvsLog(OVS_DBG_ERROR, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__) + +#define OVS_LOG_WARN(_format, ...) \ + OvsLog(OVS_DBG_WARN, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__) + +#if DBG +#define OVS_VERIFY_IRQL(_x) \ + if (KeGetCurrentIrql() != (KIRQL)_x) { \ + OVS_LOG_WARN("expected IRQL %u, actual IRQL: %u", \ + _x, KeGetCurrentIrql()); \ + } + +#define OVS_VERIFY_IRQL_LE(_x) \ + if (KeGetCurrentIrql() > (KIRQL)_x) { \ + OVS_LOG_WARN("expected IRQL <= %u, actual IRQL: %u", \ + _x, KeGetCurrentIrql()); \ + } + +#else +#define OVS_VERIFY_IRQL(_x) +#define OVS_VERIFY_IRQL_LE(_x) +#endif + +#endif /* __OVS_DEBUG_H_ */ diff --git a/datapath-windows/ovsext/OvsDriver.c b/datapath-windows/ovsext/OvsDriver.c new file mode 100644 index 000000000..f027410a3 --- /dev/null +++ b/datapath-windows/ovsext/OvsDriver.c @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#include "OvsSwitch.h" +#include "OvsIoctl.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_DRIVER +#include "OvsDebug.h" + +/* Global handles. XXX: Some of them need not be global. */ +/* + * Maps to DriverObject and FilterDriverContext parameters in the NDIS filter + * driver functions. + * DriverObject is specified by NDIS. + * FilterDriverContext is specified by the filter driver. + */ +NDIS_HANDLE gOvsExtDriverObject; + +/* + * Maps to NdisFilterHandle parameter in the NDIS filter driver functions. + * NdisFilterHandle is returned by NDISFRegisterFilterDriver. + */ +NDIS_HANDLE gOvsExtDriverHandle; + +/* + * Maps to FilterModuleContext parameter in the NDIS filter driver functions. + * FilterModuleContext is a allocated by the driver in the FilterAttach + * function. + */ +extern POVS_SWITCH_CONTEXT gOvsSwitchContext; + +static PWCHAR ovsExtFriendlyName = L"VMWare OVS Extension"; +static PWCHAR ovsExtServiceName = L"OVSExt"; +NDIS_STRING ovsExtGuidUC; +NDIS_STRING ovsExtFriendlyNameUC; + +static PWCHAR ovsExtGuidStr = L"{583CC151-73EC-4A6A-8B47-578297AD7623}"; +static const GUID ovsExtGuid = { + 0x583cc151, + 0x73ec, + 0x4a6a, + {0x8b, 0x47, 0x57, 0x82, 0x97, 0xad, 0x76, 0x23} +}; + +/* Declarations of callback functions for the filter driver. */ +DRIVER_UNLOAD OvsExtUnload; +FILTER_NET_PNP_EVENT OvsExtNetPnPEvent; +FILTER_STATUS OvsExtStatus; + +FILTER_ATTACH OvsExtAttach; +FILTER_DETACH OvsExtDetach; +FILTER_RESTART OvsExtRestart; +FILTER_PAUSE OvsExtPause; + +FILTER_SEND_NET_BUFFER_LISTS OvsExtSendNBL; +FILTER_SEND_NET_BUFFER_LISTS_COMPLETE OvsExtSendNBLComplete; +FILTER_CANCEL_SEND_NET_BUFFER_LISTS OvsExtCancelSendNBL; +FILTER_RECEIVE_NET_BUFFER_LISTS OvsExtReceiveNBL; +FILTER_RETURN_NET_BUFFER_LISTS OvsExtReturnNBL; + +FILTER_OID_REQUEST OvsExtOidRequest; +FILTER_OID_REQUEST_COMPLETE OvsExtOidRequestComplete; +FILTER_CANCEL_OID_REQUEST OvsExtCancelOidRequest; + + +/* + * -------------------------------------------------------------------------- + * Init/Load function for the OVSEXT filter Driver. + * -------------------------------------------------------------------------- + */ +NTSTATUS +DriverEntry(PDRIVER_OBJECT driverObject, + PUNICODE_STRING registryPath) +{ + NDIS_STATUS status; + NDIS_FILTER_DRIVER_CHARACTERISTICS driverChars; + + UNREFERENCED_PARAMETER(registryPath); + + gOvsExtDriverObject = driverObject; + + RtlZeroMemory(&driverChars, sizeof driverChars); + driverChars.Header.Type = NDIS_OBJECT_TYPE_FILTER_DRIVER_CHARACTERISTICS; + driverChars.Header.Size = sizeof driverChars; + driverChars.Header.Revision = NDIS_FILTER_CHARACTERISTICS_REVISION_2; + driverChars.MajorNdisVersion = NDIS_FILTER_MAJOR_VERSION; + driverChars.MinorNdisVersion = NDIS_FILTER_MINOR_VERSION; + driverChars.MajorDriverVersion = 1; + driverChars.MinorDriverVersion = 0; + driverChars.Flags = 0; + + RtlInitUnicodeString(&driverChars.ServiceName, ovsExtServiceName); + RtlInitUnicodeString(&ovsExtFriendlyNameUC, ovsExtFriendlyName); + RtlInitUnicodeString(&ovsExtGuidUC, ovsExtGuidStr); + + driverChars.FriendlyName = ovsExtFriendlyNameUC; + driverChars.UniqueName = ovsExtGuidUC; + + driverChars.AttachHandler = OvsExtAttach; + driverChars.DetachHandler = OvsExtDetach; + driverChars.RestartHandler = OvsExtRestart; + driverChars.PauseHandler = OvsExtPause; + + driverChars.SendNetBufferListsHandler = OvsExtSendNBL; + driverChars.SendNetBufferListsCompleteHandler = OvsExtSendNBLComplete; + driverChars.CancelSendNetBufferListsHandler = OvsExtCancelSendNBL; + driverChars.ReceiveNetBufferListsHandler = NULL; + driverChars.ReturnNetBufferListsHandler = NULL; + + driverChars.OidRequestHandler = OvsExtOidRequest; + driverChars.OidRequestCompleteHandler = OvsExtOidRequestComplete; + driverChars.CancelOidRequestHandler = OvsExtCancelOidRequest; + + driverChars.DevicePnPEventNotifyHandler = NULL; + driverChars.NetPnPEventHandler = OvsExtNetPnPEvent; + driverChars.StatusHandler = NULL; + + driverObject->DriverUnload = OvsExtUnload; + + status = NdisFRegisterFilterDriver(driverObject, + (NDIS_HANDLE) gOvsExtDriverObject, + &driverChars, &gOvsExtDriverHandle); + if (status != NDIS_STATUS_SUCCESS) { + return status; + } + + /* Create the communication channel for usersapce. */ + status = OvsCreateDeviceObject(gOvsExtDriverHandle); + if (status != NDIS_STATUS_SUCCESS) { + NdisFDeregisterFilterDriver(gOvsExtDriverHandle); + gOvsExtDriverHandle = NULL; + } + + return status; +} + + +/* + * -------------------------------------------------------------------------- + * Un-init/Unload function for the OVS intermediate Driver. + * -------------------------------------------------------------------------- + */ +VOID +OvsExtUnload(struct _DRIVER_OBJECT *driverObject) +{ + UNREFERENCED_PARAMETER(driverObject); + + OvsDeleteDeviceObject(); + NdisFDeregisterFilterDriver(gOvsExtDriverHandle); +} + + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterStatus function. + * -------------------------------------------------------------------------- + */ +VOID +OvsExtStatus(NDIS_HANDLE filterModuleContext, + PNDIS_STATUS_INDICATION statusIndication) +{ + UNREFERENCED_PARAMETER(statusIndication); + POVS_SWITCH_CONTEXT switchObject = (POVS_SWITCH_CONTEXT)filterModuleContext; + + NdisFIndicateStatus(switchObject->NdisFilterHandle, statusIndication); + return; +} diff --git a/datapath-windows/ovsext/OvsEth.h b/datapath-windows/ovsext/OvsEth.h new file mode 100644 index 000000000..271fd85eb --- /dev/null +++ b/datapath-windows/ovsext/OvsEth.h @@ -0,0 +1,450 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_ETH_H_ +#define __OVS_ETH_H_ 1 + +#define ETH_LADRF_LEN 2 +#define ETH_ADDR_LENGTH 6 + +typedef UINT8 Eth_Address[ETH_ADDR_LENGTH]; + +#define ETH_ADDR_FMT_STR "%02x:%02x:%02x:%02x:%02x:%02x" +#define ETH_ADDR_FMT_ARGS(a) ((UINT8 *)a)[0], ((UINT8 *)a)[1], ((UINT8 *)a)[2], \ + ((UINT8 *)a)[3], ((UINT8 *)a)[4], ((UINT8 *)a)[5] + +#define ETH_MAX_EXACT_MULTICAST_ADDRS 32 + +typedef enum Eth_RxMode { + ETH_FILTER_UNICAST = 0x0001, /* pass unicast (directed) frames */ + ETH_FILTER_MULTICAST = 0x0002, /* pass some multicast frames */ + ETH_FILTER_ALLMULTI = 0x0004, /* pass *all* multicast frames */ + ETH_FILTER_BROADCAST = 0x0008, /* pass broadcast frames */ + ETH_FILTER_PROMISC = 0x0010, /* pass all frames (ie no filter) */ + ETH_FILTER_USE_LADRF = 0x0020, /* use the LADRF for multicast filtering */ + ETH_FILTER_SINK = 0x10000 /* pass not-matched unicast frames */ +} Eth_RxMode; + +/* filter flags printf helpers */ +#define ETH_FILTER_FLAG_FMT_STR "%s%s%s%s%s%s%s" +#define ETH_FILTER_FLAG_FMT_ARGS(f) (f) & ETH_FILTER_UNICAST ? " UNICAST" : "", \ + (f) & ETH_FILTER_MULTICAST ? " MULTICAST" : "", \ + (f) & ETH_FILTER_ALLMULTI ? " ALLMULTI" : "", \ + (f) & ETH_FILTER_BROADCAST ? " BROADCAST" : "", \ + (f) & ETH_FILTER_PROMISC ? " PROMISC" : "", \ + (f) & ETH_FILTER_USE_LADRF ? " USE_LADRF" : "", \ + (f) & ETH_FILTER_SINK ? " SINK" : "" + +/* Ethernet header type */ +typedef enum { + ETH_HEADER_TYPE_DIX, + ETH_HEADER_TYPE_802_1PQ, + ETH_HEADER_TYPE_802_3, + ETH_HEADER_TYPE_802_1PQ_802_3, +} Eth_HdrType; + +/* DIX type fields we care about */ +typedef enum { + ETH_TYPE_IPV4 = 0x0800, + ETH_TYPE_IPV6 = 0x86DD, + ETH_TYPE_ARP = 0x0806, + ETH_TYPE_RARP = 0x8035, + ETH_TYPE_LLDP = 0x88CC, + ETH_TYPE_CDP = 0x2000, + ETH_TYPE_802_1PQ = 0x8100, // not really a DIX type, but used as such + ETH_TYPE_LLC = 0xFFFF, // 0xFFFF is IANA reserved, used to mark LLC +} Eth_DixType; + +typedef enum { + ETH_TYPE_IPV4_NBO = 0x0008, + ETH_TYPE_IPV6_NBO = 0xDD86, + ETH_TYPE_ARP_NBO = 0x0608, + ETH_TYPE_RARP_NBO = 0x3580, + ETH_TYPE_LLDP_NBO = 0xCC88, + ETH_TYPE_CDP_NBO = 0x0020, + ETH_TYPE_AKIMBI_NBO = 0xDE88, + ETH_TYPE_802_1PQ_NBO = 0x0081, // not really a DIX type, but used as such +} Eth_DixTypeNBO; + +/* low two bits of the LLC control byte */ +typedef enum { + ETH_LLC_CONTROL_IFRAME = 0x0, // both 0x0 and 0x2, only low bit of 0 needed + ETH_LLC_CONTROL_SFRAME = 0x1, + ETH_LLC_CONTROL_UFRAME = 0x3, +} Eth_LLCControlBits; + +#define ETH_LLC_CONTROL_UFRAME_MASK (0x3) + +typedef struct Eth_DIX { + UINT16 typeNBO; // indicates the higher level protocol +} Eth_DIX; + +/* + * LLC header come in two varieties: 8 bit control and 16 bit control. + * when the lower two bits of the first byte's control are '11', this + * indicated the 8 bit control field. + */ +typedef struct Eth_LLC8 { + UINT8 dsap; + UINT8 ssap; + UINT8 control; +} Eth_LLC8; + +typedef struct Eth_LLC16 { + UINT8 dsap; + UINT8 ssap; + UINT16 control; +} Eth_LLC16; + +typedef struct Eth_SNAP { + UINT8 snapOrg[3]; + Eth_DIX snapType; +} Eth_SNAP; + +typedef struct Eth_802_3 { + UINT16 lenNBO; // length of the frame + Eth_LLC8 llc; // LLC header + Eth_SNAP snap; // SNAP header +} Eth_802_3; + +// 802.1p QOS/priority tags +enum { + ETH_802_1_P_BEST_EFFORT = 0, + ETH_802_1_P_BACKGROUND = 1, + ETH_802_1_P_EXCELLENT_EFFORT = 2, + ETH_802_1_P_CRITICAL_APPS = 3, + ETH_802_1_P_VIDEO = 4, + ETH_802_1_P_VOICE = 5, + ETH_802_1_P_INTERNETWORK_CONROL = 6, + ETH_802_1_P_NETWORK_CONTROL = 7 +}; + +typedef struct Eth_802_1pq_Tag { + UINT16 typeNBO; // always ETH_TYPE_802_1PQ + UINT16 vidHi:4, // 802.1q vlan ID high nibble + canonical:1, // bit order? (should always be 0) + priority:3, // 802.1p priority tag + vidLo:8; // 802.1q vlan ID low byte +} Eth_802_1pq_Tag; + +typedef struct Eth_802_1pq { + Eth_802_1pq_Tag tag; // VLAN/QOS tag + union { + Eth_DIX dix; // DIX header follows + Eth_802_3 e802_3; // or 802.3 header follows + }; +} Eth_802_1pq; + +typedef struct Eth_Header { + Eth_Address dst; // all types of ethernet frame have dst first + Eth_Address src; // and the src next (at least all the ones we'll see) + union { + Eth_DIX dix; // followed by a DIX header... + Eth_802_3 e802_3; // ...or an 802.3 header + Eth_802_1pq e802_1pq; // ...or an 802.1[pq] tag and a header + }; +} Eth_Header; + +#define ETH_BROADCAST_ADDRESS { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } + +static Eth_Address netEthBroadcastAddr = ETH_BROADCAST_ADDRESS; + +/* + * simple predicate for 1536 boundary. + * the parameter is a network ordered UINT16, which is compared to 0x06, + * testing for "length" values greater than or equal to 0x0600 (1536) + */ + +#define ETH_TYPENOT8023(x) (((x) & 0xff) >= 0x06) + +/* + * header length macros + * + * first two are typical: ETH_HEADER_LEN_DIX, ETH_HEADER_LEN_802_1PQ + * last two are suspicious, due to 802.3 incompleteness + */ + +#define ETH_HEADER_LEN_DIX (sizeof(Eth_Address) + \ + sizeof(Eth_Address) + \ + sizeof(Eth_DIX)) +#define ETH_HEADER_LEN_802_1PQ (sizeof(Eth_Address) + \ + sizeof(Eth_Address) + \ + sizeof(Eth_802_1pq_Tag) + \ + sizeof(Eth_DIX)) +#define ETH_HEADER_LEN_802_2_LLC (sizeof(Eth_Address) + \ + sizeof(Eth_Address) + \ + sizeof(UINT16) + \ + sizeof(Eth_LLC8)) +#define ETH_HEADER_LEN_802_2_LLC16 (sizeof(Eth_Address) + \ + sizeof(Eth_Address) + \ + sizeof(UINT16) + \ + sizeof(Eth_LLC16)) +#define ETH_HEADER_LEN_802_3 (sizeof(Eth_Address) + \ + sizeof(Eth_Address) + \ + sizeof(Eth_802_3)) +#define ETH_HEADER_LEN_802_1PQ_LLC (sizeof(Eth_Address) + \ + sizeof(Eth_Address) + \ + sizeof(Eth_802_1pq_Tag) + \ + sizeof(UINT16) + \ + sizeof(Eth_LLC8)) +#define ETH_HEADER_LEN_802_1PQ_LLC16 (sizeof(Eth_Address) + \ + sizeof(Eth_Address) + \ + sizeof(Eth_802_1pq_Tag) + \ + sizeof(UINT16) + \ + sizeof(Eth_LLC16)) +#define ETH_HEADER_LEN_802_1PQ_802_3 (sizeof(Eth_Address) + \ + sizeof(Eth_Address) + \ + sizeof(Eth_802_1pq_Tag) + \ + sizeof(Eth_802_3)) + +#define ETH_MIN_HEADER_LEN (ETH_HEADER_LEN_DIX) +#define ETH_MAX_HEADER_LEN (ETH_HEADER_LEN_802_1PQ_802_3) + +#define ETH_MIN_FRAME_LEN 60 +#define ETH_MAX_STD_MTU 1500 +#define ETH_MAX_STD_FRAMELEN (ETH_MAX_STD_MTU + ETH_MAX_HEADER_LEN) +#define ETH_MAX_JUMBO_MTU 9000 +#define ETH_MAX_JUMBO_FRAMELEN (ETH_MAX_JUMBO_MTU + ETH_MAX_HEADER_LEN) + +#define ETH_DEFAULT_MTU 1500 + +#define ETH_FCS_LEN 4 +#define ETH_VLAN_LEN sizeof(Eth_802_1pq_Tag) + + +/* + *---------------------------------------------------------------------------- + * Do the two ethernet addresses match? + *---------------------------------------------------------------------------- + */ +static __inline BOOLEAN +Eth_IsAddrMatch(const Eth_Address addr1, const Eth_Address addr2) +{ + return !memcmp(addr1, addr2, ETH_ADDR_LENGTH); +} + + +/* + *---------------------------------------------------------------------------- + * Is the address the broadcast address? + *---------------------------------------------------------------------------- + */ +static __inline BOOLEAN +Eth_IsBroadcastAddr(const Eth_Address addr) +{ + return Eth_IsAddrMatch(addr, netEthBroadcastAddr); +} + + +/* + *---------------------------------------------------------------------------- + * Is the address a unicast address? + *---------------------------------------------------------------------------- + */ +static __inline BOOLEAN +Eth_IsUnicastAddr(const Eth_Address addr) +{ + // broadcast and multicast frames always have the low bit set in byte 0 + return !(((CHAR *)addr)[0] & 0x1); +} + +/* + *---------------------------------------------------------------------------- + * Is the address the all-zeros address? + *---------------------------------------------------------------------------- + */ +static __inline BOOLEAN +Eth_IsNullAddr(const Eth_Address addr) +{ + return ((addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]) == 0); +} + +/* + *---------------------------------------------------------------------------- + * + * Eth_HeaderType -- + * return an Eth_HdrType depending on the eth header + * contents. will not work in all cases, especially since it + * requres ETH_HEADER_LEN_802_1PQ bytes to determine the type + * + * HeaderType isn't sufficient to determine the length of + * the eth header. for 802.3 header, its not clear without + * examination, whether a SNAP is included + * + * returned type: + * + * ETH_HEADER_TYPE_DIX: typical 14 byte eth header + * ETH_HEADER_TYPE_802_1PQ: DIX+vlan tagging + * ETH_HEADER_TYPE_802_3: 802.3 eth header + * ETH_HEADER_TYPE_802_1PQ_802_3: 802.3 + vlan tag + * + * the test for DIX was moved from a 1500 boundary to a 1536 + * boundary, since the vmxnet2 MTU was updated to 1514. when + * W2K8 attempted to send LLC frames, these were interpreted + * as DIX frames instead of the correct 802.3 type + * + * these links may help if they're valid: + * + * http://standards.ieee.org/regauth/ethertype/type-tut.html + * http://standards.ieee.org/regauth/ethertype/type-pub.html + * + * Results: + * Eth_HdrType value + * + *---------------------------------------------------------------------------- + */ +static __inline Eth_HdrType +Eth_HeaderType(const Eth_Header *eh) +{ + /* + * we use 1536 (IEEE 802.3-std mentions 1536, but iana indicates + * type of 0-0x5dc are 802.3) instead of some #def symbol to prevent + * inadvertant reuse of the same macro for buffer size decls. + */ + if (ETH_TYPENOT8023(eh->dix.typeNBO)) { + if (eh->dix.typeNBO != ETH_TYPE_802_1PQ_NBO) { + /* typical case */ + return ETH_HEADER_TYPE_DIX; + } + + /* some type of 802.1pq tagged frame */ + if (ETH_TYPENOT8023(eh->e802_1pq.dix.typeNBO)) { + /* vlan tagging with dix style type */ + return ETH_HEADER_TYPE_802_1PQ; + } + + /* vlan tagging with 802.3 header */ + return ETH_HEADER_TYPE_802_1PQ_802_3; + } + + /* assume 802.3 */ + return ETH_HEADER_TYPE_802_3; +} + + +/* + *---------------------------------------------------------------------------- + * + * Eth_EncapsulatedPktType -- + * Get the encapsulated (layer 3) frame type. + * for LLC frames without SNAP, we don't have + * an encapsulated type, and return ETH_TYPE_LLC. + * + * IANA reserves 0xFFFF, which we reuse to indicate + * ETH_TYPE_LLC. + * + * Results: + * NBO frame type. + * + *---------------------------------------------------------------------------- + */ +static __inline UINT16 +Eth_EncapsulatedPktType(const Eth_Header *eh) +{ + Eth_HdrType type = Eth_HeaderType(eh); + + switch (type) { + case ETH_HEADER_TYPE_DIX: return eh->dix.typeNBO; + case ETH_HEADER_TYPE_802_1PQ: return eh->e802_1pq.dix.typeNBO; + case ETH_HEADER_TYPE_802_3: + /* + * Documentation describes SNAP headers as having ONLY + * 0x03 as the control fields, not just the lower two bits + * This prevents the use of Eth_IsLLCControlUFormat. + */ + if ((eh->e802_3.llc.dsap == 0xaa) && (eh->e802_3.llc.ssap == 0xaa) && + (eh->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME)) { + return eh->e802_3.snap.snapType.typeNBO; + } else { + // LLC, no snap header, then no type + return ETH_TYPE_LLC; + } + + case ETH_HEADER_TYPE_802_1PQ_802_3: + if ((eh->e802_1pq.e802_3.llc.dsap == 0xaa) && + (eh->e802_1pq.e802_3.llc.ssap == 0xaa) && + (eh->e802_1pq.e802_3.llc.control == ETH_LLC_CONTROL_UFRAME)) { + return eh->e802_1pq.e802_3.snap.snapType.typeNBO; + } else { + // tagged LLC, no snap header, then no type + return ETH_TYPE_LLC; + } + } + + ASSERT(FALSE); + return 0; +} + +/* + *---------------------------------------------------------------------------- + * Is the frame of the requested protocol type or is it an 802.1[pq] + * encapsulation of such a frame? + *---------------------------------------------------------------------------- + */ +static __inline BOOLEAN +Eth_IsDixType(const Eth_Header *eh, const Eth_DixTypeNBO type) +{ + return Eth_EncapsulatedPktType(eh) == type; +} + + +/* + *---------------------------------------------------------------------------- + * Is the frame an IPV4 frame? + *---------------------------------------------------------------------------- + */ +static __inline BOOLEAN +Eth_IsIPV4(const Eth_Header *eh) +{ + return Eth_IsDixType(eh, ETH_TYPE_IPV4_NBO); +} + + +/* + *---------------------------------------------------------------------------- + * Is the frame an IPV6 frame? + *---------------------------------------------------------------------------- + */ +static __inline BOOLEAN +Eth_IsIPV6(const Eth_Header *eh) +{ + return Eth_IsDixType(eh, ETH_TYPE_IPV6_NBO); +} + + +/* + *---------------------------------------------------------------------------- + * Is the frame an ARP frame? + *---------------------------------------------------------------------------- + */ +static __inline BOOLEAN +Eth_IsARP(const Eth_Header *eh) +{ + return Eth_IsDixType(eh, ETH_TYPE_ARP_NBO); +} + + +/* + *---------------------------------------------------------------------------- + * Does the frame contain an 802.1[pq] tag? + *---------------------------------------------------------------------------- + */ +static __inline BOOLEAN +Eth_IsFrameTagged(const Eth_Header *eh) +{ + return (eh->dix.typeNBO == ETH_TYPE_802_1PQ_NBO); +} +#endif /* __OVS_ETH_H_ */ diff --git a/datapath-windows/ovsext/OvsEvent.c b/datapath-windows/ovsext/OvsEvent.c new file mode 100644 index 000000000..a75b2bd2a --- /dev/null +++ b/datapath-windows/ovsext/OvsEvent.c @@ -0,0 +1,496 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" + +#include "OvsIoctl.h" +#include "OvsSwitch.h" +#include "OvsVport.h" +#include "OvsEvent.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_EVENT +#include "OvsDebug.h" + +LIST_ENTRY ovsEventQueue; +UINT32 ovsNumEventQueue; +UINT32 ovsNumPollAll; + +extern PNDIS_SPIN_LOCK gOvsCtrlLock; + +NTSTATUS +OvsInitEventQueue() +{ + InitializeListHead(&ovsEventQueue); + return STATUS_SUCCESS; +} + +VOID +OvsCleanupEventQueue() +{ + ASSERT(IsListEmpty(&ovsEventQueue)); + ASSERT(ovsNumEventQueue == 0); +} + +static __inline VOID +OvsAcquireEventQueueLock() +{ + NdisAcquireSpinLock(gOvsCtrlLock); +} + +static __inline VOID +OvsReleaseEventQueueLock() +{ + NdisReleaseSpinLock(gOvsCtrlLock); +} + +/* + * -------------------------------------------------------------------------- + * Cleanup the event queue of the OpenInstance. + * -------------------------------------------------------------------------- + */ +VOID +OvsCleanupEvent(POVS_OPEN_INSTANCE instance) +{ + POVS_EVENT_QUEUE queue; + PIRP irp = NULL; + queue = (POVS_EVENT_QUEUE)instance->eventQueue; + if (queue) { + POVS_EVENT_QUEUE_ELEM elem; + PLIST_ENTRY link, next; + + OvsAcquireEventQueueLock(); + RemoveEntryList(&queue->queueLink); + ovsNumEventQueue--; + if (queue->pendingIrp) { + PDRIVER_CANCEL cancelRoutine; + irp = queue->pendingIrp; + cancelRoutine = IoSetCancelRoutine(irp, NULL); + queue->pendingIrp = NULL; + if (cancelRoutine == NULL) { + irp = NULL; + } + } + instance->eventQueue = NULL; + OvsReleaseEventQueueLock(); + if (irp) { + OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS); + } + + LIST_FORALL_SAFE(&queue->elemList, link, next) { + elem = CONTAINING_RECORD(link, OVS_EVENT_QUEUE_ELEM, link); + OvsFreeMemory(elem); + } + OvsFreeMemory(queue); + } +} + +/* + * -------------------------------------------------------------------------- + * When event is generated, we need to post the event to all + * the event queues. If there is pending Irp waiting for event + * complete the Irp to wakeup the user thread. + * + * Side effects: User thread may be woken up. + * -------------------------------------------------------------------------- + */ +VOID +OvsPostEvent(UINT32 portNo, + UINT32 status) +{ + POVS_EVENT_QUEUE_ELEM elem; + POVS_EVENT_QUEUE queue; + PLIST_ENTRY link; + BOOLEAN triggerPollAll = FALSE; + LIST_ENTRY list; + PLIST_ENTRY entry; + PIRP irp; + + InitializeListHead(&list); + + OVS_LOG_TRACE("Enter: portNo: %#x, status: %#x", portNo, status); + + OvsAcquireEventQueueLock(); + + LIST_FORALL(&ovsEventQueue, link) { + queue = CONTAINING_RECORD(link, OVS_EVENT_QUEUE, queueLink); + if ((status & queue->mask) == 0 || + queue->pollAll) { + continue; + } + if (queue->numElems > (OVS_MAX_VPORT_ARRAY_SIZE >> 1) || + portNo == OVS_DEFAULT_PORT_NO) { + queue->pollAll = TRUE; + } else { + elem = (POVS_EVENT_QUEUE_ELEM)OvsAllocateMemory(sizeof(*elem)); + if (elem == NULL) { + queue->pollAll = TRUE; + } else { + elem->portNo = portNo; + elem->status = (status & queue->mask); + InsertTailList(&queue->elemList, &elem->link); + queue->numElems++; + OVS_LOG_INFO("Queue: %p, numElems: %d", + queue, queue->numElems); + } + } + if (queue->pollAll) { + PLIST_ENTRY curr, next; + triggerPollAll = TRUE; + ovsNumPollAll++; + LIST_FORALL_SAFE(&queue->elemList, curr, next) { + RemoveEntryList(curr); + elem = CONTAINING_RECORD(curr, OVS_EVENT_QUEUE_ELEM, link); + OvsFreeMemory(elem); + } + queue->numElems = 0; + } + if (queue->pendingIrp != NULL) { + PDRIVER_CANCEL cancelRoutine; + irp = queue->pendingIrp; + queue->pendingIrp = NULL; + cancelRoutine = IoSetCancelRoutine(irp, NULL); + if (cancelRoutine) { + InsertTailList(&list, &irp->Tail.Overlay.ListEntry); + } + } + } + OvsReleaseEventQueueLock(); + while (!IsListEmpty(&list)) { + entry = RemoveHeadList(&list); + irp = CONTAINING_RECORD(entry, IRP, Tail.Overlay.ListEntry); + OVS_LOG_INFO("Wakeup thread with IRP: %p", irp); + OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS); + } + OVS_LOG_TRACE("Exit: triggered pollAll: %s", + (triggerPollAll ? "TRUE" : "FALSE")); +} + + +/* + * -------------------------------------------------------------------------- + * Subscribe for event notification. + * + * Results: + * STATUS_SUCCESS for valid request and enough resource. + * STATUS_NO_RESOURCES for queue allocation failure + * STATUS_INVALID_PARAMETER for invalid request + * + * Side effects: + * Event queue is created for the current open instance. + * -------------------------------------------------------------------------- + */ +NTSTATUS +OvsSubscribeEventIoctl(PFILE_OBJECT fileObject, + PVOID inputBuffer, + UINT32 inputLength) +{ + POVS_EVENT_SUBSCRIBE request = (POVS_EVENT_SUBSCRIBE)inputBuffer; + NTSTATUS status = STATUS_SUCCESS; + POVS_OPEN_INSTANCE instance; + POVS_EVENT_QUEUE queue = NULL; + + OVS_LOG_TRACE("Enter: fileObject: %p, inputLength: %d", fileObject, + inputLength); + + if (inputLength < sizeof (OVS_EVENT_SUBSCRIBE) || + (request->mask & OVS_EVENT_MASK_ALL) == 0) { + OVS_LOG_TRACE("Exit: subscribe failed with invalid request."); + return STATUS_INVALID_PARAMETER; + } + + OvsAcquireEventQueueLock(); + + instance = OvsGetOpenInstance(fileObject, request->dpNo); + + if (instance == NULL) { + status = STATUS_INVALID_PARAMETER; + OVS_LOG_WARN("can not find open instance"); + goto done_event_subscribe; + } + + /* + * XXX for now, we don't allow change mask. + */ + queue = (POVS_EVENT_QUEUE)instance->eventQueue; + if (request->subscribe && queue) { + if (queue->mask != request->mask) { + status = STATUS_INVALID_PARAMETER; + OVS_LOG_WARN("Can not chnage mask when the queue is subscribed"); + } + status = STATUS_SUCCESS; + goto done_event_subscribe; + } else if (!request->subscribe && queue == NULL) { + status = STATUS_SUCCESS; + goto done_event_subscribe; + } + + if (request->subscribe) { + queue = (POVS_EVENT_QUEUE)OvsAllocateMemory(sizeof (OVS_EVENT_QUEUE)); + if (queue == NULL) { + status = STATUS_NO_MEMORY; + OVS_LOG_WARN("Fail to allocate event queue"); + goto done_event_subscribe; + } + InitializeListHead(&queue->elemList); + queue->mask = request->mask; + queue->pendingIrp = NULL; + queue->numElems = 0; + queue->pollAll = TRUE; /* always poll all in the begining */ + InsertHeadList(&ovsEventQueue, &queue->queueLink); + ovsNumEventQueue++; + instance->eventQueue = queue; + queue->instance = instance; + } else { + queue = (POVS_EVENT_QUEUE)instance->eventQueue; + RemoveEntryList(&queue->queueLink); + ovsNumEventQueue--; + instance->eventQueue = NULL; + } +done_event_subscribe: + if (!request->subscribe && queue) { + POVS_EVENT_QUEUE_ELEM elem; + PLIST_ENTRY link, next; + PIRP irp = NULL; + if (queue->pendingIrp) { + PDRIVER_CANCEL cancelRoutine; + irp = queue->pendingIrp; + queue->pendingIrp = NULL; + cancelRoutine = IoSetCancelRoutine(irp, NULL); + if (cancelRoutine == NULL) { + irp = NULL; + } + } + OvsReleaseEventQueueLock(); + if (irp) { + OvsCompleteIrpRequest(queue->pendingIrp, 0, STATUS_SUCCESS); + } + LIST_FORALL_SAFE(&queue->elemList, link, next) { + elem = CONTAINING_RECORD(link, OVS_EVENT_QUEUE_ELEM, link); + OvsFreeMemory(elem); + } + OvsFreeMemory(queue); + } else { + OvsReleaseEventQueueLock(); + } + OVS_LOG_TRACE("Exit: subscribe event with status: %#x.", status); + return status; +} + +/* + * -------------------------------------------------------------------------- + * Poll event queued in the event queue. always synchronous. + * + * Results: + * STATUS_SUCCESS for valid request + * STATUS_BUFFER_TOO_SMALL if outputBuffer is too small. + * STATUS_INVALID_PARAMETER for invalid request + * + * Side effects: + * Event will be removed from event queue. + * -------------------------------------------------------------------------- + */ +NTSTATUS +OvsPollEventIoctl(PFILE_OBJECT fileObject, + PVOID inputBuffer, + UINT32 inputLength, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + POVS_EVENT_POLL poll; + POVS_EVENT_STATUS eventStatus; + POVS_EVENT_ENTRY entry; + POVS_EVENT_QUEUE queue; + POVS_EVENT_QUEUE_ELEM elem; + POVS_OPEN_INSTANCE instance; + UINT32 numEntry, i; + + OVS_LOG_TRACE("Enter: inputLength:%d, outputLength: %d", + inputLength, outputLength); + + ASSERT(replyLen); + if (inputLength < sizeof (OVS_EVENT_POLL)) { + OVS_LOG_TRACE("Exit: input buffer too small"); + return STATUS_INVALID_PARAMETER; + } + *replyLen = sizeof (OVS_EVENT_STATUS) + sizeof (OVS_EVENT_ENTRY); + if (outputLength < *replyLen) { + OVS_LOG_TRACE("Exit: output buffer too small"); + return STATUS_BUFFER_TOO_SMALL; + } + poll = (POVS_EVENT_POLL)inputBuffer; + + OvsAcquireEventQueueLock(); + instance = OvsGetOpenInstance(fileObject, poll->dpNo); + if (instance == NULL) { + OvsReleaseEventQueueLock(); + *replyLen = 0; + OVS_LOG_TRACE("Exit: can not find Open instance"); + return STATUS_INVALID_PARAMETER; + } + + eventStatus = (POVS_EVENT_STATUS)outputBuffer; + numEntry = + (outputLength - sizeof (OVS_EVENT_STATUS)) / sizeof (OVS_EVENT_ENTRY); + queue = (POVS_EVENT_QUEUE)instance->eventQueue; + if (queue->pollAll) { + eventStatus->numberEntries = 1; + numEntry = 1; + entry = &eventStatus->eventEntries[0]; + entry->portNo = OVS_DEFAULT_PORT_NO; + entry->status = OVS_DEFAULT_EVENT_STATUS; + queue->pollAll = FALSE; + goto event_poll_done; + } + numEntry = MIN(numEntry, queue->numElems); + eventStatus->numberEntries = numEntry; + + for (i = 0; i < numEntry; i++) { + elem = (POVS_EVENT_QUEUE_ELEM)RemoveHeadList(&queue->elemList); + entry = &eventStatus->eventEntries[i]; + entry->portNo = elem->portNo; + entry->status = elem->status; + OvsFreeMemory(elem); + queue->numElems--; + } +event_poll_done: + OvsReleaseEventQueueLock(); + *replyLen = sizeof (OVS_EVENT_STATUS) + + numEntry * sizeof (OVS_EVENT_ENTRY); + OVS_LOG_TRACE("Exit: numEventPolled: %d", numEntry); + return STATUS_SUCCESS; +} + + +/* + * -------------------------------------------------------------------------- + * Cancel wait IRP for event + * + * Please note, when this routine is called, it is always guaranteed that + * IRP is valid. + * + * Side effects: Pending IRP is completed. + * -------------------------------------------------------------------------- + */ +VOID +OvsCancelIrp(PDEVICE_OBJECT deviceObject, + PIRP irp) +{ + PIO_STACK_LOCATION irpSp; + PFILE_OBJECT fileObject; + POVS_EVENT_QUEUE queue; + POVS_OPEN_INSTANCE instance; + + UNREFERENCED_PARAMETER(deviceObject); + + IoReleaseCancelSpinLock(irp->CancelIrql); + + irpSp = IoGetCurrentIrpStackLocation(irp); + fileObject = irpSp->FileObject; + + if (fileObject == NULL) { + goto done; + } + OvsAcquireEventQueueLock(); + instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; + if (instance == NULL || instance->eventQueue == NULL) { + OvsReleaseEventQueueLock(); + goto done; + } + queue = instance->eventQueue; + if (queue->pendingIrp == irp) { + queue->pendingIrp = NULL; + } + OvsReleaseEventQueueLock(); +done: + OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED); +} + +/* + * -------------------------------------------------------------------------- + * Wait for event. + * + * Results: + * STATUS_SUCCESS for valid request + * STATUS_DEVICE_BUSY if already in waiting state. + * STATUS_INVALID_PARAMETER for invalid request + * STATUS_PENDING wait for event + * + * Side effects: + * May return pending to IO manager. + * -------------------------------------------------------------------------- + */ +NTSTATUS +OvsWaitEventIoctl(PIRP irp, + PFILE_OBJECT fileObject, + PVOID inputBuffer, + UINT32 inputLength) +{ + NTSTATUS status; + POVS_EVENT_POLL poll; + POVS_EVENT_QUEUE queue; + POVS_OPEN_INSTANCE instance; + BOOLEAN cancelled = FALSE; + OVS_LOG_TRACE("Enter: inputLength: %u", inputLength); + + if (inputLength < sizeof (OVS_EVENT_POLL)) { + OVS_LOG_TRACE("Exit: Invalid input buffer length."); + return STATUS_INVALID_PARAMETER; + } + poll = (POVS_EVENT_POLL)inputBuffer; + + OvsAcquireEventQueueLock(); + + instance = OvsGetOpenInstance(fileObject, poll->dpNo); + if (instance == NULL) { + OvsReleaseEventQueueLock(); + OVS_LOG_TRACE("Exit: Can not find open instance, dpNo: %d", poll->dpNo); + return STATUS_INVALID_PARAMETER; + } + + queue = (POVS_EVENT_QUEUE)instance->eventQueue; + if (queue->pendingIrp) { + OvsReleaseEventQueueLock(); + OVS_LOG_TRACE("Exit: Event queue already in pending state"); + return STATUS_DEVICE_BUSY; + } + + status = (queue->numElems != 0 || queue->pollAll) ? + STATUS_SUCCESS : STATUS_PENDING; + if (status == STATUS_PENDING) { + PDRIVER_CANCEL cancelRoutine; + IoMarkIrpPending(irp); + IoSetCancelRoutine(irp, OvsCancelIrp); + if (irp->Cancel) { + cancelRoutine = IoSetCancelRoutine(irp, NULL); + if (cancelRoutine) { + cancelled = TRUE; + } + } else { + queue->pendingIrp = irp; + } + } + OvsReleaseEventQueueLock(); + if (cancelled) { + OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED); + OVS_LOG_INFO("Event IRP cancelled: %p", irp); + } + OVS_LOG_TRACE("Exit: return status: %#x", status); + return status; +} diff --git a/datapath-windows/ovsext/OvsEvent.h b/datapath-windows/ovsext/OvsEvent.h new file mode 100644 index 000000000..4ae2ba29d --- /dev/null +++ b/datapath-windows/ovsext/OvsEvent.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_EVENT_H_ +#define __OVS_EVENT_H_ 1 + +typedef struct _OVS_EVENT_QUEUE_ELEM { + LIST_ENTRY link; + UINT32 portNo; + UINT32 status; +} OVS_EVENT_QUEUE_ELEM, *POVS_EVENT_QUEUE_ELEM; + +typedef struct _OVS_EVENT_QUEUE { + LIST_ENTRY queueLink; + LIST_ENTRY elemList; + UINT32 mask; + UINT16 numElems; + BOOLEAN pollAll; + PIRP pendingIrp; + PVOID instance; +} OVS_EVENT_QUEUE, *POVS_EVENT_QUEUE; + +NTSTATUS OvsInitEventQueue(VOID); +VOID OvsCleanupEventQueue(VOID); + +struct _OVS_OPEN_INSTANCE; + +VOID OvsCleanupEvent(struct _OVS_OPEN_INSTANCE *instance); +VOID OvsPostEvent(UINT32 portNo, UINT32 status); +NTSTATUS OvsSubscribeEventIoctl(PFILE_OBJECT fileObject, PVOID inputBuffer, + UINT32 inputLength); +NTSTATUS OvsPollEventIoctl(PFILE_OBJECT fileObject, PVOID inputBuffer, + UINT32 inputLength, PVOID outputBuffer, + UINT32 outputLength, UINT32 *replyLen); +NTSTATUS OvsWaitEventIoctl(PIRP irp, PFILE_OBJECT fileObject, + PVOID inputBuffer, UINT32 inputLength); +#endif /* __OVS_EVENT_H_ */ diff --git a/datapath-windows/ovsext/OvsFlow.c b/datapath-windows/ovsext/OvsFlow.c new file mode 100644 index 000000000..daa64e007 --- /dev/null +++ b/datapath-windows/ovsext/OvsFlow.c @@ -0,0 +1,978 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#include "OvsNetProto.h" +#include "OvsUtil.h" +#include "OvsJhash.h" +#include "OvsFlow.h" +#include "OvsPacketParser.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_FLOW +#include "OvsDebug.h" + +#pragma warning( push ) +#pragma warning( disable:4127 ) + +extern PNDIS_SPIN_LOCK gOvsCtrlLock; +extern POVS_SWITCH_CONTEXT gOvsSwitchContext; + +static NTSTATUS ReportFlowInfo(OvsFlow *flow, UINT32 getFlags, + UINT32 getActionsLen, OvsFlowInfo *info); +static NTSTATUS HandleFlowPut(OvsFlowPut *put, + OVS_DATAPATH *datapath, + struct OvsFlowStats *stats); +static NTSTATUS OvsPrepareFlow(OvsFlow **flow, const OvsFlowPut *put, + UINT64 hash); +static VOID RemoveFlow(OVS_DATAPATH *datapath, OvsFlow **flow); +static VOID DeleteAllFlows(OVS_DATAPATH *datapath); +static NTSTATUS AddFlow(OVS_DATAPATH *datapath, OvsFlow *flow); +static VOID FreeFlow(OvsFlow *flow); +static VOID __inline *GetStartAddrNBL(const NET_BUFFER_LIST *_pNB); + +#define OVS_FLOW_TABLE_SIZE 2048 +#define OVS_FLOW_TABLE_MASK (OVS_FLOW_TABLE_SIZE -1) +#define HASH_BUCKET(hash) ((hash) & OVS_FLOW_TABLE_MASK) + +/* + *---------------------------------------------------------------------------- + * OvsDeleteFlowTable -- + * Results: + * NDIS_STATUS_SUCCESS always. + *---------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsDeleteFlowTable(OVS_DATAPATH *datapath) +{ + if (datapath == NULL || datapath->flowTable == NULL) { + return NDIS_STATUS_SUCCESS; + } + + DeleteAllFlows(datapath); + OvsFreeMemory(datapath->flowTable); + datapath->flowTable = NULL; + NdisFreeRWLock(datapath->lock); + + return NDIS_STATUS_SUCCESS; +} + +/* + *---------------------------------------------------------------------------- + * OvsAllocateFlowTable -- + * Results: + * NDIS_STATUS_SUCCESS on success. + * NDIS_STATUS_RESOURCES if memory couldn't be allocated + *---------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsAllocateFlowTable(OVS_DATAPATH *datapath, + POVS_SWITCH_CONTEXT switchContext) +{ + PLIST_ENTRY bucket; + int i; + + datapath->flowTable = OvsAllocateMemory(OVS_FLOW_TABLE_SIZE * + sizeof (LIST_ENTRY)); + if (!datapath->flowTable) { + return NDIS_STATUS_RESOURCES; + } + for (i = 0; i < OVS_FLOW_TABLE_SIZE; i++) { + bucket = &(datapath->flowTable[i]); + InitializeListHead(bucket); + } + datapath->lock = NdisAllocateRWLock(switchContext->NdisFilterHandle); + + return NDIS_STATUS_SUCCESS; +} + + +/* + *---------------------------------------------------------------------------- + * GetStartAddrNBL -- + * Get the virtual address of the frame. + * + * Results: + * Virtual address of the frame. + *---------------------------------------------------------------------------- + */ +static __inline VOID * +GetStartAddrNBL(const NET_BUFFER_LIST *_pNB) +{ + PMDL curMdl; + PUINT8 curBuffer; + PEthHdr curHeader; + + ASSERT(_pNB); + + // Ethernet Header is a guaranteed safe access. + curMdl = (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdl; + curBuffer = MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); + if (!curBuffer) { + return NULL; + } + + curHeader = (PEthHdr) + (curBuffer + (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdlOffset); + + return (VOID *) curHeader; +} + +VOID +OvsFlowUsed(OvsFlow *flow, + const NET_BUFFER_LIST *packet, + const POVS_PACKET_HDR_INFO layers) +{ + LARGE_INTEGER tickCount; + + KeQueryTickCount(&tickCount); + flow->used = tickCount.QuadPart * ovsTimeIncrementPerTick; + flow->used += ovsUserTimestampDelta; + flow->packetCount++; + flow->byteCount += OvsPacketLenNBL(packet); + flow->tcpFlags |= OvsGetTcpFlags(packet, &flow->key, layers); +} + + +VOID +DeleteAllFlows(OVS_DATAPATH *datapath) +{ + INT i; + PLIST_ENTRY bucket; + + for (i = 0; i < OVS_FLOW_TABLE_SIZE; i++) { + PLIST_ENTRY next; + bucket = &(datapath->flowTable[i]); + while (!IsListEmpty(bucket)) { + OvsFlow *flow; + next = bucket->Flink; + flow = CONTAINING_RECORD(next, OvsFlow, ListEntry); + RemoveFlow(datapath, &flow); + } + } +} + +/* + *---------------------------------------------------------------------------- + * Initializes 'flow' members from 'packet', 'skb_priority', 'tun_id', and + * 'ofp_in_port'. + * + * Initializes 'packet' header pointers as follows: + * + * - packet->l2 to the start of the Ethernet header. + * + * - packet->l3 to just past the Ethernet header, or just past the + * vlan_header if one is present, to the first byte of the payload of the + * Ethernet frame. + * + * - packet->l4 to just past the IPv4 header, if one is present and has a + * correct length, and otherwise NULL. + * + * - packet->l7 to just past the TCP or UDP or ICMP header, if one is + * present and has a correct length, and otherwise NULL. + * + * Returns NDIS_STATUS_SUCCESS normally. Fails only if packet data cannot be accessed + * (e.g. if Pkt_CopyBytesOut() returns an error). + *---------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsExtractFlow(const NET_BUFFER_LIST *packet, + UINT32 inPort, + OvsFlowKey *flow, + POVS_PACKET_HDR_INFO layers, + OvsIPv4TunnelKey *tunKey) +{ + struct Eth_Header *eth; + UINT8 offset = 0; + PVOID vlanTagValue; + + layers->value = 0; + + if (tunKey) { + ASSERT(tunKey->dst != 0); + RtlMoveMemory(&flow->tunKey, tunKey, sizeof flow->tunKey); + flow->l2.offset = 0; + } else { + flow->tunKey.dst = 0; + flow->l2.offset = OVS_WIN_TUNNEL_KEY_SIZE; + } + + flow->l2.inPort = inPort; + + if ( OvsPacketLenNBL(packet) < ETH_HEADER_LEN_DIX) { + flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + 8 - flow->l2.offset; + return NDIS_STATUS_SUCCESS; + } + + /* Link layer. */ + eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet); + memcpy(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH); + memcpy(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH); + + /* + * vlan_tci. + */ + vlanTagValue = NET_BUFFER_LIST_INFO(packet, Ieee8021QNetBufferListInfo); + if (vlanTagValue) { + PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag = + (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue; + flow->l2.vlanTci = htons(vlanTag->TagHeader.VlanId | OVSWIN_VLAN_CFI | + (vlanTag->TagHeader.UserPriority << 13)); + } else { + if (eth->dix.typeNBO == ETH_TYPE_802_1PQ_NBO) { + Eth_802_1pq_Tag *tag= (Eth_802_1pq_Tag *)ð->dix.typeNBO; + flow->l2.vlanTci = ((UINT16)tag->priority << 13) | + OVSWIN_VLAN_CFI | + ((UINT16)tag->vidHi << 8) | tag->vidLo; + offset = sizeof (Eth_802_1pq_Tag); + } else { + flow->l2.vlanTci = 0; + } + /* + * XXX + * Please note after this point, src mac and dst mac should + * not be accessed through eth + */ + eth = (Eth_Header *)((UINT8 *)eth + offset); + } + + /* + * dl_type. + * + * XXX assume that at least the first + * 12 bytes of received packets are mapped. This code has the stronger + * assumption that at least the first 22 bytes of 'packet' is mapped (if my + * arithmetic is right). + */ + if (ETH_TYPENOT8023(eth->dix.typeNBO)) { + flow->l2.dlType = eth->dix.typeNBO; + layers->l3Offset = ETH_HEADER_LEN_DIX + offset; + } else if (OvsPacketLenNBL(packet) >= ETH_HEADER_LEN_802_3 && + eth->e802_3.llc.dsap == 0xaa && + eth->e802_3.llc.ssap == 0xaa && + eth->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME && + eth->e802_3.snap.snapOrg[0] == 0x00 && + eth->e802_3.snap.snapOrg[1] == 0x00 && + eth->e802_3.snap.snapOrg[2] == 0x00) { + flow->l2.dlType = eth->e802_3.snap.snapType.typeNBO; + layers->l3Offset = ETH_HEADER_LEN_802_3 + offset; + } else { + flow->l2.dlType = htons(OVSWIN_DL_TYPE_NONE); + layers->l3Offset = ETH_HEADER_LEN_DIX + offset; + } + + flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE - flow->l2.offset; + /* Network layer. */ + if (flow->l2.dlType == htons(ETH_TYPE_IPV4)) { + struct IPHdr ip_storage; + const struct IPHdr *nh; + IpKey *ipKey = &flow->ipKey; + + flow->l2.keyLen += OVS_IP_KEY_SIZE; + layers->isIPv4 = 1; + nh = OvsGetIp(packet, layers->l3Offset, &ip_storage); + if (nh) { + layers->l4Offset = layers->l3Offset + nh->ihl * 4; + + ipKey->nwSrc = nh->saddr; + ipKey->nwDst = nh->daddr; + ipKey->nwProto = nh->protocol; + + ipKey->nwTos = nh->tos; + if (nh->frag_off & htons(IP_MF | IP_OFFSET)) { + ipKey->nwFrag = OVSWIN_NW_FRAG_ANY; + if (nh->frag_off & htons(IP_OFFSET)) { + ipKey->nwFrag |= OVSWIN_NW_FRAG_LATER; + } + } else { + ipKey->nwFrag = 0; + } + + ipKey->nwTtl = nh->ttl; + ipKey->l4.tpSrc = 0; + ipKey->l4.tpDst = 0; + + if (!(nh->frag_off & htons(IP_OFFSET))) { + if (ipKey->nwProto == SOCKET_IPPROTO_TCP) { + OvsParseTcp(packet, &ipKey->l4, layers); + } else if (ipKey->nwProto == SOCKET_IPPROTO_UDP) { + OvsParseUdp(packet, &ipKey->l4, layers); + } else if (ipKey->nwProto == SOCKET_IPPROTO_ICMP) { + ICMPHdr icmpStorage; + const ICMPHdr *icmp; + + icmp = OvsGetIcmp(packet, layers->l4Offset, &icmpStorage); + if (icmp) { + ipKey->l4.tpSrc = htons(icmp->type); + ipKey->l4.tpDst = htons(icmp->code); + layers->l7Offset = layers->l4Offset + sizeof *icmp; + } + } + } + } else { + ((UINT64 *)ipKey)[0] = 0; + ((UINT64 *)ipKey)[1] = 0; + } + } else if (flow->l2.dlType == htons(ETH_TYPE_IPV6)) { + NDIS_STATUS status; + flow->l2.keyLen += OVS_IPV6_KEY_SIZE; + status = OvsParseIPv6(packet, flow, layers); + if (status != NDIS_STATUS_SUCCESS) { + memset(&flow->ipv6Key, 0, sizeof (Ipv6Key)); + return status; + } + layers->isIPv6 = 1; + flow->ipv6Key.l4.tpSrc = 0; + flow->ipv6Key.l4.tpDst = 0; + flow->ipv6Key.pad = 0; + + if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_TCP) { + OvsParseTcp(packet, &(flow->ipv6Key.l4), layers); + } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_UDP) { + OvsParseUdp(packet, &(flow->ipv6Key.l4), layers); + } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) { + OvsParseIcmpV6(packet, flow, layers); + flow->l2.keyLen += (OVS_ICMPV6_KEY_SIZE - OVS_IPV6_KEY_SIZE); + } + } else if (flow->l2.dlType == htons(ETH_TYPE_ARP)) { + EtherArp arpStorage; + const EtherArp *arp; + ArpKey *arpKey = &flow->arpKey; + ((UINT64 *)arpKey)[0] = 0; + ((UINT64 *)arpKey)[1] = 0; + ((UINT64 *)arpKey)[2] = 0; + flow->l2.keyLen += OVS_ARP_KEY_SIZE; + arp = OvsGetArp(packet, layers->l3Offset, &arpStorage); + if (arp && arp->ea_hdr.ar_hrd == htons(1) && + arp->ea_hdr.ar_pro == htons(ETH_TYPE_IPV4) && + arp->ea_hdr.ar_hln == ETH_ADDR_LENGTH && + arp->ea_hdr.ar_pln == 4) { + /* We only match on the lower 8 bits of the opcode. */ + if (ntohs(arp->ea_hdr.ar_op) <= 0xff) { + arpKey->nwProto = (UINT8)ntohs(arp->ea_hdr.ar_op); + } + if (arpKey->nwProto == ARPOP_REQUEST + || arpKey->nwProto == ARPOP_REPLY) { + memcpy(&arpKey->nwSrc, arp->arp_spa, 4); + memcpy(&arpKey->nwDst, arp->arp_tpa, 4); + memcpy(arpKey->arpSha, arp->arp_sha, ETH_ADDR_LENGTH); + memcpy(arpKey->arpTha, arp->arp_tha, ETH_ADDR_LENGTH); + } + } + } + + return NDIS_STATUS_SUCCESS; +} + +__inline BOOLEAN +FlowEqual(UINT64 *src, UINT64 *dst, UINT32 size) +{ + UINT32 i; + ASSERT((size & 0x7) == 0); + ASSERT(((UINT64)src & 0x7) == 0); + ASSERT(((UINT64)dst & 0x7) == 0); + for (i = 0; i < (size >> 3); i++) { + if (src[i] != dst[i]) { + return FALSE; + } + } + return TRUE; +} + + +/* + * ---------------------------------------------------------------------------- + * AddFlow -- + * Add a flow to flow table. + * + * Results: + * NDIS_STATUS_SUCCESS if no same flow in the flow table. + * ---------------------------------------------------------------------------- + */ +NTSTATUS +AddFlow(OVS_DATAPATH *datapath, OvsFlow *flow) +{ + PLIST_ENTRY head; + + if (OvsLookupFlow(datapath, &flow->key, &flow->hash, TRUE) != NULL) { + return STATUS_INVALID_HANDLE; + } + + head = &(datapath->flowTable[HASH_BUCKET(flow->hash)]); + /* + * We need fence here to make sure flow's nextPtr is updated before + * head->nextPtr is updated. + */ + KeMemoryBarrier(); + + //KeAcquireSpinLock(&FilterDeviceExtension->NblQueueLock, &oldIrql); + InsertTailList(head, &flow->ListEntry); + //KeReleaseSpinLock(&FilterDeviceExtension->NblQueueLock, oldIrql); + + datapath->nFlows++; + + return STATUS_SUCCESS; +} + + +/* ---------------------------------------------------------------------------- + * RemoveFlow -- + * Remove a flow from flow table, and added to wait list + * ---------------------------------------------------------------------------- + */ +VOID +RemoveFlow(OVS_DATAPATH *datapath, + OvsFlow **flow) +{ + OvsFlow *f = *flow; + *flow = NULL; + UNREFERENCED_PARAMETER(datapath); + + ASSERT(datapath->nFlows); + datapath->nFlows--; + // Remove the flow from queue + RemoveEntryList(&f->ListEntry); + FreeFlow(f); +} + + +/* + * ---------------------------------------------------------------------------- + * OvsLookupFlow -- + * + * Find flow from flow table based on flow key. + * Caller should either hold portset handle or should + * have a flowRef in datapath or Acquired datapath. + * + * Results: + * Flow pointer if lookup successful. + * NULL if not exists. + * ---------------------------------------------------------------------------- + */ +OvsFlow * +OvsLookupFlow(OVS_DATAPATH *datapath, + const OvsFlowKey *key, + UINT64 *hash, + BOOLEAN hashValid) +{ + PLIST_ENTRY link, head; + UINT16 offset = key->l2.offset; + UINT16 size = key->l2.keyLen; + UINT8 *start; + + ASSERT(key->tunKey.dst || offset == sizeof (OvsIPv4TunnelKey)); + ASSERT(!key->tunKey.dst || offset == 0); + + start = (UINT8 *)key + offset; + + if (!hashValid) { + *hash = OvsJhashBytes(start, size, 0); + } + + head = &datapath->flowTable[HASH_BUCKET(*hash)]; + link = head->Flink; + while (link != head) { + OvsFlow *flow = CONTAINING_RECORD(link, OvsFlow, ListEntry); + + if (flow->hash == *hash && + flow->key.l2.val == key->l2.val && + FlowEqual((UINT64 *)((uint8 *)&flow->key + offset), + (UINT64 *)start, size)) { + return flow; + } + link = link->Flink; + } + return NULL; +} + + +/* + * ---------------------------------------------------------------------------- + * OvsHashFlow -- + * Calculate the hash for the given flow key. + * ---------------------------------------------------------------------------- + */ +UINT64 +OvsHashFlow(const OvsFlowKey *key) +{ + UINT16 offset = key->l2.offset; + UINT16 size = key->l2.keyLen; + UINT8 *start; + + ASSERT(key->tunKey.dst || offset == sizeof (OvsIPv4TunnelKey)); + ASSERT(!key->tunKey.dst || offset == 0); + start = (UINT8 *)key + offset; + return OvsJhashBytes(start, size, 0); +} + + +/* + * ---------------------------------------------------------------------------- + * FreeFlow -- + * Free a flow and its actions. + * ---------------------------------------------------------------------------- + */ +VOID +FreeFlow(OvsFlow *flow) +{ + ASSERT(flow); + OvsFreeMemory(flow); +} + +NTSTATUS +OvsDoDumpFlows(OvsFlowDumpInput *dumpInput, + OvsFlowDumpOutput *dumpOutput, + UINT32 *replyLen) +{ + UINT32 dpNo; + OVS_DATAPATH *datapath = NULL; + OvsFlow *flow; + PLIST_ENTRY node, head; + UINT32 column = 0; + UINT32 rowIndex, columnIndex; + LOCK_STATE_EX dpLockState; + NTSTATUS status = STATUS_SUCCESS; + BOOLEAN findNextNonEmpty = FALSE; + + dpNo = dumpInput->dpNo; + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != dpNo) { + status = STATUS_INVALID_PARAMETER; + goto unlock; + } + + rowIndex = dumpInput->position[0]; + if (rowIndex >= OVS_FLOW_TABLE_SIZE) { + dumpOutput->n = 0; + *replyLen = sizeof(*dumpOutput); + goto unlock; + } + + columnIndex = dumpInput->position[1]; + + datapath = &gOvsSwitchContext->datapath; + ASSERT(datapath); + OvsAcquireDatapathRead(datapath, &dpLockState, FALSE); + + head = &datapath->flowTable[rowIndex]; + node = head->Flink; + + while (column < columnIndex) { + if (node == head) { + break; + } + node = node->Flink; + column++; + } + + if (node == head) { + findNextNonEmpty = TRUE; + columnIndex = 0; + } + + if (findNextNonEmpty) { + while (head == node) { + if (++rowIndex >= OVS_FLOW_TABLE_SIZE) { + dumpOutput->n = 0; + goto dp_unlock; + } + head = &datapath->flowTable[rowIndex]; + node = head->Flink; + } + } + + ASSERT(node != head); + ASSERT(rowIndex < OVS_FLOW_TABLE_SIZE); + + flow = CONTAINING_RECORD(node, OvsFlow, ListEntry); + status = ReportFlowInfo(flow, dumpInput->getFlags, dumpInput->actionsLen, + &dumpOutput->flow); + + if (status == STATUS_BUFFER_TOO_SMALL) { + dumpOutput->n = sizeof(OvsFlowDumpOutput) + flow->actionsLen; + *replyLen = sizeof(*dumpOutput); + } else { + dumpOutput->n = 1; //one flow reported. + *replyLen = sizeof(*dumpOutput) + dumpOutput->flow.actionsLen; + } + + dumpOutput->position[0] = rowIndex; + dumpOutput->position[1] = ++columnIndex; + +dp_unlock: + OvsReleaseDatapath(datapath, &dpLockState); + +unlock: + NdisReleaseSpinLock(gOvsCtrlLock); + return status; +} + +NTSTATUS +OvsDumpFlowIoctl(PVOID inputBuffer, + UINT32 inputLength, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + OvsFlowDumpOutput *dumpOutput = (OvsFlowDumpOutput *)outputBuffer; + OvsFlowDumpInput *dumpInput = (OvsFlowDumpInput *)inputBuffer; + + if (inputBuffer == NULL || outputBuffer == NULL) { + return STATUS_INVALID_PARAMETER; + } + + if ((inputLength != sizeof(OvsFlowDumpInput)) + || (outputLength != sizeof *dumpOutput + dumpInput->actionsLen)) { + return STATUS_INFO_LENGTH_MISMATCH; + } + + return OvsDoDumpFlows(dumpInput, dumpOutput, replyLen); +} + +static NTSTATUS +ReportFlowInfo(OvsFlow *flow, + UINT32 getFlags, + UINT32 getActionsLen, + OvsFlowInfo *info) +{ + NTSTATUS status = STATUS_SUCCESS; + + if (getFlags & FLOW_GET_KEY) { + // always copy the tunnel key part + RtlCopyMemory(&info->key, &flow->key, + flow->key.l2.keyLen + flow->key.l2.offset); + } + + if (getFlags & FLOW_GET_STATS) { + OvsFlowStats *stats = &info->stats; + stats->packetCount = flow->packetCount; + stats->byteCount = flow->byteCount; + stats->used = (UINT32)flow->used; + stats->tcpFlags = flow->tcpFlags; + } + + if (getFlags & FLOW_GET_ACTIONS) { + if (flow->actionsLen == 0) { + info->actionsLen = 0; + } else if (flow->actionsLen > getActionsLen) { + info->actionsLen = 0; + status = STATUS_BUFFER_TOO_SMALL; + } else { + RtlCopyMemory(info->actions, flow->actions, flow->actionsLen); + info->actionsLen = flow->actionsLen; + } + } + + return status; +} + +NTSTATUS +OvsPutFlowIoctl(PVOID inputBuffer, + UINT32 inputLength, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + NTSTATUS status = STATUS_SUCCESS; + OVS_DATAPATH *datapath = NULL; + struct OvsFlowStats stats; + ULONG actionsLen; + OvsFlowPut *put; + UINT32 dpNo; + LOCK_STATE_EX dpLockState; + + if ((inputLength < sizeof(OvsFlowPut)) || (inputBuffer == NULL)) { + return STATUS_INFO_LENGTH_MISMATCH; + } + + if ((outputLength != sizeof(stats)) || (outputBuffer == NULL)) { + return STATUS_INFO_LENGTH_MISMATCH; + } + + put = (OvsFlowPut *)inputBuffer; + if (put->actionsLen > 0) { + actionsLen = put->actionsLen; + } else { + actionsLen = 0; + } + if (inputLength != actionsLen + sizeof(*put)) { + return STATUS_INFO_LENGTH_MISMATCH; + } + + dpNo = put->dpNo; + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != dpNo) { + status = STATUS_INVALID_PARAMETER; + goto unlock; + } + + datapath = &gOvsSwitchContext->datapath; + ASSERT(datapath); + RtlZeroMemory(&stats, sizeof(stats)); + OvsAcquireDatapathWrite(datapath, &dpLockState, FALSE); + status = HandleFlowPut(put, datapath, &stats); + OvsReleaseDatapath(datapath, &dpLockState); + + if (status == STATUS_SUCCESS) { + // Copy stats to User mode app + NdisMoveMemory(outputBuffer, (PVOID)&stats, sizeof(stats)); + *replyLen = sizeof stats; + } + +unlock: + NdisReleaseSpinLock(gOvsCtrlLock); + return status; +} + + +/* Handles flow add, modify as well as delete */ +static NTSTATUS +HandleFlowPut(OvsFlowPut *put, + OVS_DATAPATH *datapath, + struct OvsFlowStats *stats) +{ + BOOLEAN mayCreate, mayModify, mayDelete; + OvsFlow *KernelFlow; + UINT64 hash; + NTSTATUS status = STATUS_SUCCESS; + + mayCreate = (put->flags & OVSWIN_FLOW_PUT_CREATE) != 0; + mayModify = (put->flags & OVSWIN_FLOW_PUT_MODIFY) != 0; + mayDelete = (put->flags & OVSWIN_FLOW_PUT_DELETE) != 0; + + if ((mayCreate || mayModify) == mayDelete) { + return STATUS_INVALID_PARAMETER; + } + + KernelFlow = OvsLookupFlow(datapath, &put->key, &hash, FALSE); + if (!KernelFlow) { + if (!mayCreate) { + return STATUS_INVALID_PARAMETER; + } + + status = OvsPrepareFlow(&KernelFlow, put, hash); + if (status != STATUS_SUCCESS) { + FreeFlow(KernelFlow); + return STATUS_UNSUCCESSFUL; + } + + status = AddFlow(datapath, KernelFlow); + if (status != STATUS_SUCCESS) { + FreeFlow(KernelFlow); + return STATUS_UNSUCCESSFUL; + } + + /* Validate the flow addition */ + { + UINT64 newHash; + OvsFlow *flow = OvsLookupFlow(datapath, &put->key, &newHash, + FALSE); + ASSERT(flow); + ASSERT(newHash == hash); + if (!flow || newHash != hash) { + return STATUS_UNSUCCESSFUL; + } + } + } else { + stats->packetCount = KernelFlow->packetCount; + stats->byteCount = KernelFlow->byteCount; + stats->tcpFlags = KernelFlow->tcpFlags; + stats->used = (UINT32)KernelFlow->used; + + if (mayModify) { + OvsFlow *newFlow; + status = OvsPrepareFlow(&newFlow, put, hash); + if (status != STATUS_SUCCESS) { + return STATUS_UNSUCCESSFUL; + } + + KernelFlow = OvsLookupFlow(datapath, &put->key, &hash, TRUE); + if (KernelFlow) { + if ((put->flags & OVSWIN_FLOW_PUT_CLEAR) == 0) { + newFlow->packetCount = KernelFlow->packetCount; + newFlow->byteCount = KernelFlow->byteCount; + newFlow->tcpFlags = KernelFlow->tcpFlags; + } + RemoveFlow(datapath, &KernelFlow); + } else { + if ((put->flags & OVSWIN_FLOW_PUT_CLEAR) == 0) { + newFlow->packetCount = stats->packetCount; + newFlow->byteCount = stats->byteCount; + newFlow->tcpFlags = stats->tcpFlags; + } + } + status = AddFlow(datapath, newFlow); + ASSERT(status == STATUS_SUCCESS); + + /* Validate the flow addition */ + { + UINT64 newHash; + OvsFlow *testflow = OvsLookupFlow(datapath, &put->key, + &newHash, FALSE); + ASSERT(testflow); + ASSERT(newHash == hash); + if (!testflow || newHash != hash) { + FreeFlow(newFlow); + return STATUS_UNSUCCESSFUL; + } + } + } else { + if (mayDelete) { + if (KernelFlow) { + RemoveFlow(datapath, &KernelFlow); + } + } else { + return STATUS_UNSUCCESSFUL; + } + } + } + return STATUS_SUCCESS; +} + +static NTSTATUS +OvsPrepareFlow(OvsFlow **flow, + const OvsFlowPut *put, + UINT64 hash) +{ + OvsFlow *localFlow = *flow; + NTSTATUS status = STATUS_SUCCESS; + + do { + *flow = localFlow = + OvsAllocateMemory(sizeof(OvsFlow) + put->actionsLen); + if (localFlow == NULL) { + status = STATUS_NO_MEMORY; + break; + } + + localFlow->key = put->key; + localFlow->actionsLen = put->actionsLen; + if (put->actionsLen) { + NdisMoveMemory((PUCHAR)localFlow->actions, put->actions, + put->actionsLen); + } + localFlow->userActionsLen = 0; // 0 indicate no conversion is made + localFlow->used = 0; + localFlow->packetCount = 0; + localFlow->byteCount = 0; + localFlow->tcpFlags = 0; + localFlow->hash = hash; + } while(FALSE); + + return status; +} + +NTSTATUS +OvsGetFlowIoctl(PVOID inputBuffer, + UINT32 inputLength, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + NTSTATUS status = STATUS_SUCCESS; + OVS_DATAPATH *datapath = NULL; + OvsFlow *flow; + UINT32 getFlags, getActionsLen; + OvsFlowGetInput *getInput; + OvsFlowGetOutput *getOutput; + UINT64 hash; + UINT32 dpNo; + LOCK_STATE_EX dpLockState; + + if (inputLength != sizeof(OvsFlowGetInput) + || inputBuffer == NULL) { + return STATUS_INFO_LENGTH_MISMATCH; + } + + getInput = (OvsFlowGetInput *) inputBuffer; + getFlags = getInput->getFlags; + getActionsLen = getInput->actionsLen; + if (getInput->getFlags & FLOW_GET_KEY) { + return STATUS_INVALID_PARAMETER; + } + + if (outputBuffer == NULL + || outputLength != (sizeof *getOutput + + getInput->actionsLen)) { + return STATUS_INFO_LENGTH_MISMATCH; + } + + dpNo = getInput->dpNo; + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != dpNo) { + status = STATUS_INVALID_PARAMETER; + goto unlock; + } + + datapath = &gOvsSwitchContext->datapath; + ASSERT(datapath); + OvsAcquireDatapathRead(datapath, &dpLockState, FALSE); + flow = OvsLookupFlow(datapath, &getInput->key, &hash, FALSE); + if (!flow) { + status = STATUS_INVALID_PARAMETER; + goto dp_unlock; + } + + // XXX: can be optimized to return only how much is written out + *replyLen = outputLength; + getOutput = (OvsFlowGetOutput *)outputBuffer; + ReportFlowInfo(flow, getFlags, getActionsLen, &getOutput->info); + +dp_unlock: + OvsReleaseDatapath(datapath, &dpLockState); +unlock: + NdisReleaseSpinLock(gOvsCtrlLock); + return status; +} + +NTSTATUS +OvsFlushFlowIoctl(PVOID inputBuffer, + UINT32 inputLength) +{ + NTSTATUS status = STATUS_SUCCESS; + OVS_DATAPATH *datapath = NULL; + UINT32 dpNo; + LOCK_STATE_EX dpLockState; + + if (inputLength != sizeof(UINT32) || inputBuffer == NULL) { + return STATUS_INFO_LENGTH_MISMATCH; + } + + dpNo = *(UINT32 *)inputBuffer; + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != dpNo) { + status = STATUS_INVALID_PARAMETER; + goto unlock; + } + + datapath = &gOvsSwitchContext->datapath; + ASSERT(datapath); + OvsAcquireDatapathWrite(datapath, &dpLockState, FALSE); + DeleteAllFlows(datapath); + OvsReleaseDatapath(datapath, &dpLockState); + +unlock: + NdisReleaseSpinLock(gOvsCtrlLock); + return status; +} + +#pragma warning( pop ) diff --git a/datapath-windows/ovsext/OvsFlow.h b/datapath-windows/ovsext/OvsFlow.h new file mode 100644 index 000000000..93368b33b --- /dev/null +++ b/datapath-windows/ovsext/OvsFlow.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_FLOW_H_ +#define __OVS_FLOW_H_ 1 + +#include "precomp.h" +#include "OvsSwitch.h" +#include "OvsUser.h" +#include "OvsNetProto.h" + +typedef struct _OvsFlow { + LIST_ENTRY ListEntry; // In Datapath's flowTable. + OvsFlowKey key; + UINT64 hash; + UINT32 actionsLen; + UINT8 tcpFlags; + UINT64 used; + UINT64 packetCount; + UINT64 byteCount; + UINT32 userActionsLen; // used for flow query + UINT32 actionBufferLen; // used for flow reuse + struct nlattr actions[1]; +} OvsFlow; + + +typedef struct _OvsLayers { + UINT32 l3Ofs; // IPv4, IPv6, ARP, or other L3 header. + UINT32 l4Ofs; // TCP, UDP, ICMP, ICMPv6, or other L4 header. + UINT32 l7Ofs; // L4 protocol's payload. +} OvsLayers; + +extern UINT64 ovsUserTimestampDelta; +extern UINT64 ovsTimeIncrementPerTick; + +NDIS_STATUS OvsDeleteFlowTable(OVS_DATAPATH *datapath); +NDIS_STATUS OvsAllocateFlowTable(OVS_DATAPATH *datapath, + POVS_SWITCH_CONTEXT switchContext); + +NDIS_STATUS OvsExtractFlow(const NET_BUFFER_LIST *pkt, UINT32 inPort, + OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers, + OvsIPv4TunnelKey *tunKey); +OvsFlow *OvsLookupFlow(OVS_DATAPATH *datapath, const OvsFlowKey *key, + UINT64 *hash, BOOLEAN hashValid); +UINT64 OvsHashFlow(const OvsFlowKey *key); +VOID OvsFlowUsed(OvsFlow *flow, const NET_BUFFER_LIST *pkt, + const POVS_PACKET_HDR_INFO layers); + +NTSTATUS OvsDumpFlowIoctl(PVOID inputBuffer, UINT32 inputLength, + PVOID outputBuffer, UINT32 outputLength, + UINT32 *replyLen); +NTSTATUS OvsPutFlowIoctl(PVOID inputBuffer, UINT32 inputLength, + PVOID outputBuffer, UINT32 outputLength, + UINT32 *replyLen); +NTSTATUS OvsGetFlowIoctl(PVOID inputBuffer, UINT32 inputLength, + PVOID outputBuffer, UINT32 outputLength, + UINT32 *replyLen); +NTSTATUS OvsFlushFlowIoctl(PVOID inputBuffer, UINT32 inputLength); + +/* Flags for tunneling */ +#define OVS_TNL_F_DONT_FRAGMENT (1 << 0) +#define OVS_TNL_F_CSUM (1 << 1) +#define OVS_TNL_F_KEY (1 << 2) + +#endif /* __OVS_FLOW_H_ */ diff --git a/datapath-windows/ovsext/OvsIoctl.c b/datapath-windows/ovsext/OvsIoctl.c new file mode 100644 index 000000000..893cbf744 --- /dev/null +++ b/datapath-windows/ovsext/OvsIoctl.c @@ -0,0 +1,758 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#include "OvsIoctl.h" +#include "OvsJhash.h" +#include "OvsSwitch.h" +#include "OvsVport.h" +#include "OvsEvent.h" +#include "OvsUser.h" +#include "OvsPacketIO.h" +#include "OvsNetProto.h" +#include "OvsFlow.h" +#include "OvsUser.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_IOCTL +#include "OvsDebug.h" + +/* Handles to the device object for communication with userspace. */ +NDIS_HANDLE gOvsDeviceHandle; +PDEVICE_OBJECT gOvsDeviceObject; + +/* + * There seems to be a skew between the kernel's version of current time and + * the userspace's version of current time. The skew was seen to + * monotonically increase as well. + * + * In order to deal with the situation, we pass down the userspace's version + * of the timestamp to the kernel, and let the kernel calculate the delta. + */ +UINT64 ovsUserTimestampDelta; +UINT64 ovsTimeIncrementPerTick; + +_Dispatch_type_(IRP_MJ_CREATE) +_Dispatch_type_(IRP_MJ_CLOSE) +DRIVER_DISPATCH OvsOpenCloseDevice; + +_Dispatch_type_(IRP_MJ_CLEANUP) +DRIVER_DISPATCH OvsCleanupDevice; + +_Dispatch_type_(IRP_MJ_DEVICE_CONTROL) +DRIVER_DISPATCH OvsDeviceControl; + +#ifdef ALLOC_PRAGMA +#pragma alloc_text(INIT, OvsCreateDeviceObject) +#pragma alloc_text(PAGE, OvsOpenCloseDevice) +#pragma alloc_text(PAGE, OvsCleanupDevice) +#pragma alloc_text(PAGE, OvsDeviceControl) +#endif // ALLOC_PRAGMA + + +#define OVS_MAX_OPEN_INSTANCES 128 + +POVS_OPEN_INSTANCE ovsOpenInstanceArray[OVS_MAX_OPEN_INSTANCES]; +UINT32 ovsNumberOfOpenInstances; +extern POVS_SWITCH_CONTEXT gOvsSwitchContext; + +NDIS_SPIN_LOCK ovsCtrlLockObj; +NDIS_SPIN_LOCK ovsFlowLockObj; +PNDIS_SPIN_LOCK gOvsCtrlLock; +PNDIS_SPIN_LOCK ovsFlowLock; + +VOID +OvsInitIoctl() +{ + gOvsCtrlLock = &ovsCtrlLockObj; + ovsFlowLock = &ovsFlowLockObj; + NdisAllocateSpinLock(ovsFlowLock); + NdisAllocateSpinLock(gOvsCtrlLock); +} + +VOID +OvsCleanupIoctl() +{ + if (ovsFlowLock) { + NdisFreeSpinLock(ovsFlowLock); + NdisFreeSpinLock(gOvsCtrlLock); + gOvsCtrlLock = NULL; + gOvsCtrlLock = NULL; + } +} + +VOID +OvsInit() +{ + OvsInitIoctl(); + OvsInitEventQueue(); + OvsUserInit(); +} + +VOID +OvsCleanup() +{ + OvsCleanupEventQueue(); + OvsCleanupIoctl(); + OvsUserCleanup(); +} + +VOID +OvsAcquireCtrlLock() +{ + NdisAcquireSpinLock(gOvsCtrlLock); +} +VOID +OvsReleaseCtrlLock() +{ + NdisReleaseSpinLock(gOvsCtrlLock); +} + + +/* + * -------------------------------------------------------------------------- + * Creates the communication device between user and kernel, and also + * initializes the data associated data structures. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsCreateDeviceObject(NDIS_HANDLE ovsExtDriverHandle) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + UNICODE_STRING deviceName; + UNICODE_STRING symbolicDeviceName; + PDRIVER_DISPATCH dispatchTable[IRP_MJ_MAXIMUM_FUNCTION+1]; + NDIS_DEVICE_OBJECT_ATTRIBUTES deviceAttributes; + OVS_LOG_TRACE("ovsExtDriverHandle: %p", ovsExtDriverHandle); + + RtlZeroMemory(dispatchTable, + (IRP_MJ_MAXIMUM_FUNCTION + 1) * sizeof (PDRIVER_DISPATCH)); + dispatchTable[IRP_MJ_CREATE] = OvsOpenCloseDevice; + dispatchTable[IRP_MJ_CLOSE] = OvsOpenCloseDevice; + dispatchTable[IRP_MJ_CLEANUP] = OvsCleanupDevice; + dispatchTable[IRP_MJ_DEVICE_CONTROL] = OvsDeviceControl; + + NdisInitUnicodeString(&deviceName, OVS_NT_DEVICE_NAME); + NdisInitUnicodeString(&symbolicDeviceName, OVS_DOS_DEVICE_NAME); + + RtlZeroMemory(&deviceAttributes, sizeof (NDIS_DEVICE_OBJECT_ATTRIBUTES)); + + OVS_INIT_OBJECT_HEADER(&deviceAttributes.Header, + NDIS_OBJECT_TYPE_DEVICE_OBJECT_ATTRIBUTES, + NDIS_DEVICE_OBJECT_ATTRIBUTES_REVISION_1, + sizeof (NDIS_DEVICE_OBJECT_ATTRIBUTES)); + + deviceAttributes.DeviceName = &deviceName; + deviceAttributes.SymbolicName = &symbolicDeviceName; + deviceAttributes.MajorFunctions = dispatchTable; + deviceAttributes.ExtensionSize = sizeof (OVS_DEVICE_EXTENSION); + + status = NdisRegisterDeviceEx(ovsExtDriverHandle, + &deviceAttributes, + &gOvsDeviceObject, + &gOvsDeviceHandle); + if (status != NDIS_STATUS_SUCCESS) { + POVS_DEVICE_EXTENSION ovsExt = + (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(gOvsDeviceObject); + ASSERT(gOvsDeviceObject != NULL); + ASSERT(gOvsDeviceHandle != NULL); + + if (ovsExt) { + ovsExt->numberOpenInstance = 0; + } + } else { + /* Initialize the associated data structures. */ + OvsInit(); + } + OVS_LOG_TRACE("DeviceObject: %p", gOvsDeviceObject); + return status; +} + + +VOID +OvsDeleteDeviceObject() +{ + if (gOvsDeviceHandle) { +#ifdef DBG + POVS_DEVICE_EXTENSION ovsExt = (POVS_DEVICE_EXTENSION) + NdisGetDeviceReservedExtension(gOvsDeviceObject); + if (ovsExt) { + ASSERT(ovsExt->numberOpenInstance == 0); + } +#endif + + ASSERT(gOvsDeviceObject); + NdisDeregisterDeviceEx(gOvsDeviceHandle); + gOvsDeviceHandle = NULL; + gOvsDeviceObject = NULL; + } + OvsCleanup(); +} + +POVS_OPEN_INSTANCE +OvsGetOpenInstance(PFILE_OBJECT fileObject, + UINT32 dpNo) +{ + POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; + ASSERT(instance); + ASSERT(instance->fileObject == fileObject); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != dpNo) { + return NULL; + } + return instance; +} + + +POVS_OPEN_INSTANCE +OvsFindOpenInstance(PFILE_OBJECT fileObject) +{ + UINT32 i, j; + for (i = 0, j = 0; i < OVS_MAX_OPEN_INSTANCES && + j < ovsNumberOfOpenInstances; i++) { + if (ovsOpenInstanceArray[i]) { + if (ovsOpenInstanceArray[i]->fileObject == fileObject) { + return ovsOpenInstanceArray[i]; + } + j++; + } + } + return NULL; +} + +NTSTATUS +OvsAddOpenInstance(PFILE_OBJECT fileObject) +{ + POVS_OPEN_INSTANCE instance = + (POVS_OPEN_INSTANCE) OvsAllocateMemory(sizeof (OVS_OPEN_INSTANCE)); + UINT32 i; + + if (instance == NULL) { + return STATUS_NO_MEMORY; + } + OvsAcquireCtrlLock(); + ASSERT(OvsFindOpenInstance(fileObject) == NULL); + + if (ovsNumberOfOpenInstances >= OVS_MAX_OPEN_INSTANCES) { + OvsReleaseCtrlLock(); + OvsFreeMemory(instance); + return STATUS_INSUFFICIENT_RESOURCES; + } + RtlZeroMemory(instance, sizeof (OVS_OPEN_INSTANCE)); + + for (i = 0; i < OVS_MAX_OPEN_INSTANCES; i++) { + if (ovsOpenInstanceArray[i] == NULL) { + ovsOpenInstanceArray[i] = instance; + instance->cookie = i; + break; + } + } + ASSERT(i < OVS_MAX_OPEN_INSTANCES); + instance->fileObject = fileObject; + ASSERT(fileObject->FsContext == NULL); + fileObject->FsContext = instance; + OvsReleaseCtrlLock(); + return STATUS_SUCCESS; +} + +static VOID +OvsCleanupOpenInstance(PFILE_OBJECT fileObject) +{ + POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; + ASSERT(instance); + ASSERT(fileObject == instance->fileObject); + OvsCleanupEvent(instance); + OvsCleanupPacketQueue(instance); +} + +VOID +OvsRemoveOpenInstance(PFILE_OBJECT fileObject) +{ + POVS_OPEN_INSTANCE instance; + ASSERT(fileObject->FsContext); + instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; + ASSERT(instance->cookie < OVS_MAX_OPEN_INSTANCES); + + OvsAcquireCtrlLock(); + fileObject->FsContext = NULL; + ASSERT(ovsOpenInstanceArray[instance->cookie] == instance); + ovsOpenInstanceArray[instance->cookie] = NULL; + OvsReleaseCtrlLock(); + ASSERT(instance->eventQueue == NULL); + ASSERT (instance->packetQueue == NULL); + OvsFreeMemory(instance); +} + +NTSTATUS +OvsCompleteIrpRequest(PIRP irp, + ULONG_PTR infoPtr, + NTSTATUS status) +{ + irp->IoStatus.Information = infoPtr; + irp->IoStatus.Status = status; + IoCompleteRequest(irp, IO_NO_INCREMENT); + return status; +} + + +NTSTATUS +OvsOpenCloseDevice(PDEVICE_OBJECT deviceObject, + PIRP irp) +{ + PIO_STACK_LOCATION irpSp; + NTSTATUS status = STATUS_SUCCESS; + PFILE_OBJECT fileObject; + POVS_DEVICE_EXTENSION ovsExt = + (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject); + + ASSERT(deviceObject == gOvsDeviceObject); + ASSERT(ovsExt != NULL); + + irpSp = IoGetCurrentIrpStackLocation(irp); + fileObject = irpSp->FileObject; + OVS_LOG_TRACE("DeviceObject: %p, fileObject:%p, instance: %u", + deviceObject, fileObject, + ovsExt->numberOpenInstance); + + switch (irpSp->MajorFunction) { + case IRP_MJ_CREATE: + status = OvsAddOpenInstance(fileObject); + if (STATUS_SUCCESS == status) { + InterlockedIncrement((LONG volatile *)&ovsExt->numberOpenInstance); + } + break; + case IRP_MJ_CLOSE: + ASSERT(ovsExt->numberOpenInstance > 0); + OvsRemoveOpenInstance(fileObject); + InterlockedDecrement((LONG volatile *)&ovsExt->numberOpenInstance); + break; + default: + ASSERT(0); + } + return OvsCompleteIrpRequest(irp, (ULONG_PTR)0, status); +} + +_Use_decl_annotations_ +NTSTATUS +OvsCleanupDevice(PDEVICE_OBJECT deviceObject, + PIRP irp) +{ + + PIO_STACK_LOCATION irpSp; + PFILE_OBJECT fileObject; + + NTSTATUS status = STATUS_SUCCESS; +#ifdef DBG + POVS_DEVICE_EXTENSION ovsExt = + (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject); + if (ovsExt) { + ASSERT(ovsExt->numberOpenInstance > 0); + } +#else + UNREFERENCED_PARAMETER(deviceObject); +#endif + ASSERT(deviceObject == gOvsDeviceObject); + irpSp = IoGetCurrentIrpStackLocation(irp); + fileObject = irpSp->FileObject; + + ASSERT(irpSp->MajorFunction == IRP_MJ_CLEANUP); + + OvsCleanupOpenInstance(fileObject); + + return OvsCompleteIrpRequest(irp, (ULONG_PTR)0, status); +} + +/* + *---------------------------------------------------------------------------- + * OvsGetVersionIoctl -- + * + * On entry None + * On exit Driver version + * + * Result: + * STATUS_SUCCESS + * STATUS_BUFFER_TOO_SMALL + *---------------------------------------------------------------------------- + */ +NTSTATUS +OvsGetVersionIoctl(PVOID outputBuffer, + uint32 outputLength, + uint32 *replyLen) +{ + POVS_VERSION driverOut = (POVS_VERSION)outputBuffer; + + if (outputLength < sizeof (*driverOut)) { + return STATUS_BUFFER_TOO_SMALL; + } + *replyLen = sizeof (*driverOut); + driverOut->mjrDrvVer = OVS_DRIVER_MAJOR_VER; + driverOut->mnrDrvVer = OVS_DRIVER_MINOR_VER; + + return STATUS_SUCCESS; +} + + +/* + *---------------------------------------------------------------------------- + * OvsDpDumpIoctl -- + * Get All Datapath. For now, we only support one datapath. + * + * Result: + * STATUS_SUCCESS + * STATUS_BUFFER_TOO_SMALL + *---------------------------------------------------------------------------- + */ +NTSTATUS +OvsDpDumpIoctl(PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + *replyLen = sizeof (UINT32); + if (outputLength < sizeof (UINT32)) { + return STATUS_BUFFER_TOO_SMALL; + } + OvsAcquireCtrlLock(); + if (gOvsSwitchContext) { + *(UINT32 *)outputBuffer = gOvsSwitchContext->dpNo; + } else { + *replyLen = 0; + } + OvsReleaseCtrlLock(); + + return STATUS_SUCCESS; +} + + +/* + *---------------------------------------------------------------------------- + * OvsDpGetIoctl -- + * Given dpNo, get all datapath info as defined in OVS_DP_INFO. + * + * Result: + * STATUS_SUCCESS + * STATUS_BUFFER_TOO_SMALL + * STATUS_INVALID_PARAMETER + *---------------------------------------------------------------------------- + */ +NTSTATUS +OvsDpGetIoctl(PVOID inputBuffer, + UINT32 inputLength, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + UINT32 dpNo; + POVS_DP_INFO info; + OVS_DATAPATH *datapath; + + if (inputLength < sizeof (UINT32)) { + return STATUS_INVALID_PARAMETER; + } + + if (outputLength < sizeof (OVS_DP_INFO)) { + *replyLen = sizeof (OVS_DP_INFO); + return STATUS_BUFFER_TOO_SMALL; + } + + dpNo = *(UINT32 *)inputBuffer; + OvsAcquireCtrlLock(); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != dpNo) { + OvsReleaseCtrlLock(); + return STATUS_INVALID_PARAMETER; + } + *replyLen = sizeof (OVS_DP_INFO); + RtlZeroMemory(outputBuffer, sizeof (OVS_DP_INFO)); + info = (POVS_DP_INFO)outputBuffer; + RtlCopyMemory(info->name, "ovs-system", sizeof ("ovs-system")); + datapath = &gOvsSwitchContext->datapath; + info->nMissed = datapath->misses; + info->nHit = datapath->hits; + info->nLost = datapath->lost; + info->nFlows = datapath->nFlows; + OvsReleaseCtrlLock(); + return STATUS_SUCCESS; +} + +NTSTATUS +OvsDeviceControl(PDEVICE_OBJECT deviceObject, + PIRP irp) +{ + + PIO_STACK_LOCATION irpSp; + NTSTATUS status = STATUS_SUCCESS; + PFILE_OBJECT fileObject; + PVOID inputBuffer; + PVOID outputBuffer; + UINT32 inputBufferLen, outputBufferLen, mdlBufferLen; + UINT32 code, replyLen = 0; +#ifdef DBG + POVS_DEVICE_EXTENSION ovsExt = + (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject); + ASSERT(deviceObject == gOvsDeviceObject); + ASSERT(ovsExt); + ASSERT(ovsExt->numberOpenInstance > 0); +#else + UNREFERENCED_PARAMETER(deviceObject); +#endif + + irpSp = IoGetCurrentIrpStackLocation(irp); + + + ASSERT(irpSp->MajorFunction == IRP_MJ_DEVICE_CONTROL); + ASSERT(irpSp->FileObject != NULL); + + fileObject = irpSp->FileObject; + code = irpSp->Parameters.DeviceIoControl.IoControlCode; + inputBufferLen = irpSp->Parameters.DeviceIoControl.InputBufferLength; + outputBufferLen = irpSp->Parameters.DeviceIoControl.OutputBufferLength; + /* + * In case of an IRP with METHOD_IN_DIRECT or METHOD_OUT_DIRECT, the size + * of the MDL is stored in Parameters.DeviceIoControl.OutputBufferLength. + */ + mdlBufferLen = outputBufferLen; + outputBuffer = inputBuffer = irp->AssociatedIrp.SystemBuffer; + + switch(code) { + case OVS_IOCTL_VERSION_GET: + status = OvsGetVersionIoctl(outputBuffer, outputBufferLen, + &replyLen); + break; + case OVS_IOCTL_DP_DUMP: + status = OvsDpDumpIoctl(outputBuffer, outputBufferLen, &replyLen); + break; + case OVS_IOCTL_DP_GET: + if (irp->MdlAddress == NULL) { + status = STATUS_INVALID_PARAMETER; + break; + } + outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + NormalPagePriority); + if (outputBuffer == NULL) { + status = STATUS_INSUFFICIENT_RESOURCES; + } else { + status = OvsDpGetIoctl(inputBuffer, inputBufferLen, + outputBuffer, outputBufferLen, &replyLen); + } + break; + case OVS_IOCTL_DP_SET: + status = STATUS_NOT_IMPLEMENTED; + break; + case OVS_IOCTL_VPORT_DUMP: + if (irp->MdlAddress == NULL) { + status = STATUS_INVALID_PARAMETER; + break; + } + outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + NormalPagePriority); + if (outputBuffer) { + status = OvsDumpVportIoctl(inputBuffer, inputBufferLen, + outputBuffer, outputBufferLen, + &replyLen); + } else { + status = STATUS_INSUFFICIENT_RESOURCES; + } + break; + case OVS_IOCTL_VPORT_GET: + if (irp->MdlAddress == NULL) { + status = STATUS_INVALID_PARAMETER; + break; + } + outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + NormalPagePriority); + if (outputBuffer) { + status = OvsGetVportIoctl(inputBuffer, inputBufferLen, + outputBuffer, outputBufferLen, + &replyLen); + } else { + status = STATUS_INSUFFICIENT_RESOURCES; + } + break; + case OVS_IOCTL_VPORT_SET: + status = STATUS_NOT_IMPLEMENTED; + break; + case OVS_IOCTL_VPORT_ADD: + if (irp->MdlAddress == NULL) { + status = STATUS_INVALID_PARAMETER; + break; + } + outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + NormalPagePriority); + if (outputBuffer) { + status = OvsAddVportIoctl(inputBuffer, inputBufferLen, + outputBuffer, outputBufferLen, + &replyLen); + } else { + status = STATUS_INSUFFICIENT_RESOURCES; + } + break; + case OVS_IOCTL_VPORT_DEL: + status = OvsDelVportIoctl(inputBuffer, inputBufferLen, + &replyLen); + break; + case OVS_IOCTL_VPORT_EXT_INFO: + if (irp->MdlAddress == NULL) { + status = STATUS_INVALID_PARAMETER; + break; + } + outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + NormalPagePriority); + if (outputBuffer) { + status = OvsGetExtInfoIoctl(inputBuffer, inputBufferLen, + outputBuffer, outputBufferLen, + &replyLen); + } else { + OVS_LOG_INFO("ExtInfo: fail to get outputBuffer address"); + status = STATUS_INSUFFICIENT_RESOURCES; + } + break; + case OVS_IOCTL_FLOW_DUMP: + if (irp->MdlAddress == NULL) { + status = STATUS_INVALID_PARAMETER; + break; + } + outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + NormalPagePriority); + if (outputBuffer) { + status = OvsDumpFlowIoctl(inputBuffer, inputBufferLen, + outputBuffer, outputBufferLen, + &replyLen); + } else { + status = STATUS_INSUFFICIENT_RESOURCES; + } + break; + case OVS_IOCTL_FLOW_GET: + if (irp->MdlAddress == NULL) { + status = STATUS_INVALID_PARAMETER; + break; + } + outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + NormalPagePriority); + if (outputBuffer) { + status = OvsGetFlowIoctl(inputBuffer, inputBufferLen, + outputBuffer, outputBufferLen, + &replyLen); + } else { + status = STATUS_INSUFFICIENT_RESOURCES; + } + break; + case OVS_IOCTL_FLOW_PUT: + // XXX: This is not really working - mapping the input buffer + // XXX: inputBufferLen = mdlBufferLen; + // inputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + // NormalPagePriority); + status = OvsPutFlowIoctl(inputBuffer, inputBufferLen, + outputBuffer, outputBufferLen, + &replyLen); + break; + case OVS_IOCTL_FLOW_FLUSH: + status = OvsFlushFlowIoctl(inputBuffer, inputBufferLen); + break; + case OVS_IOCTL_QOS_QUEUE_DUMP: + case OVS_IOCTL_QOS_QUEUE_GET: + case OVS_IOCTL_QOS_QUEUE_SET: + status = STATUS_NOT_IMPLEMENTED; + break; + case OVS_IOCTL_DATAPATH_SUBSCRIBE: + status = OvsSubscribeDpIoctl(fileObject, inputBuffer, + inputBufferLen); + break; + case OVS_IOCTL_DATAPATH_READ: + if (irp->MdlAddress == NULL) { + status = STATUS_INVALID_PARAMETER; + break; + } + outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + NormalPagePriority); + if (outputBuffer) { + status = OvsReadDpIoctl(fileObject, outputBuffer, + outputBufferLen, &replyLen); + } else { + status = STATUS_INSUFFICIENT_RESOURCES; + } + break; + case OVS_IOCTL_DATAPATH_OPERATE: + status = STATUS_NOT_IMPLEMENTED; + break; + case OVS_IOCTL_DATAPATH_EXECUTE: + // XXX: need to make the input direct + status = OvsExecuteDpIoctl(inputBuffer, inputBufferLen, + outputBufferLen); + break; + case OVS_IOCTL_DATAPATH_PURGE: + status = OvsPurgeDpIoctl(fileObject); + break; + case OVS_IOCTL_DATAPATH_WAIT: + status = OvsWaitDpIoctl(irp, fileObject); + break; + case OVS_IOCTL_EVENT_SUBSCRIBE: + status = OvsSubscribeEventIoctl(fileObject, inputBuffer, + inputBufferLen); + break; + case OVS_IOCTL_EVENT_POLL: + if (irp->MdlAddress == NULL) { + status = STATUS_INVALID_PARAMETER; + break; + } + outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + NormalPagePriority); + if (outputBuffer == NULL) { + status = STATUS_INSUFFICIENT_RESOURCES; + } else { + status = OvsPollEventIoctl(fileObject, inputBuffer, + inputBufferLen, outputBuffer, + outputBufferLen, &replyLen); + } + break; + case OVS_IOCTL_EVENT_WAIT: + status = OvsWaitEventIoctl(irp, fileObject, + inputBuffer, inputBufferLen); + break; + case OVS_IOCTL_DP_TIMESTAMP_SET: + if (inputBufferLen != sizeof (ovsUserTimestampDelta)) { + status = STATUS_INFO_LENGTH_MISMATCH; + } else { + int64 currentUserTS = *(int64 *)inputBuffer; + LARGE_INTEGER tickCount; + + /* So many ticks since system booted. */ + KeQueryTickCount(&tickCount); + ovsUserTimestampDelta = currentUserTS - + (tickCount.QuadPart * ovsTimeIncrementPerTick); + status = STATUS_SUCCESS; + } + break; + default: + status = STATUS_INVALID_DEVICE_REQUEST; + break; + } + + if (status == STATUS_PENDING) { + return status; + } else { + /* + * When the system-address-space mapping that is returned by + * MmGetSystemAddressForMdlSafe is no longer needed, it must be + * released. + * http://msdn.microsoft.com/en-us/library/windows/hardware/ff554559(v=vs.85).aspx + * + * We might have to release the MDL here. + */ + return OvsCompleteIrpRequest(irp, (ULONG_PTR)replyLen, status); + } +} diff --git a/datapath-windows/ovsext/OvsIoctl.h b/datapath-windows/ovsext/OvsIoctl.h new file mode 100644 index 000000000..9f2bf8599 --- /dev/null +++ b/datapath-windows/ovsext/OvsIoctl.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_IOCTL_H_ +#define __OVS_IOCTL_H_ 1 + +typedef struct _OVS_DEVICE_EXTENSION { + INT numberOpenInstance; +} OVS_DEVICE_EXTENSION, *POVS_DEVICE_EXTENSION; + + +typedef struct _OVS_OPEN_INSTANCE { + UINT32 cookie; + PFILE_OBJECT fileObject; + PVOID eventQueue; + PVOID packetQueue; +} OVS_OPEN_INSTANCE, *POVS_OPEN_INSTANCE; + +NDIS_STATUS OvsCreateDeviceObject(NDIS_HANDLE ovsExtDriverHandle); +VOID OvsDeleteDeviceObject(); + +POVS_OPEN_INSTANCE OvsGetOpenInstance(PFILE_OBJECT fileObject, + UINT32 dpNo); + +NTSTATUS OvsCompleteIrpRequest(PIRP irp, ULONG_PTR infoPtr, NTSTATUS status); + +#endif /* __OVS_IOCTL_H_ */ diff --git a/datapath-windows/ovsext/OvsIpHelper.c b/datapath-windows/ovsext/OvsIpHelper.c new file mode 100644 index 000000000..cd2625a30 --- /dev/null +++ b/datapath-windows/ovsext/OvsIpHelper.c @@ -0,0 +1,1689 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#include "OvsIpHelper.h" +#include "OvsSwitch.h" +#include "OvsJhash.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_IPHELPER +#include "OvsDebug.h" + +/* + * Fow now, we assume only one internal adapter + */ + +KSTART_ROUTINE OvsStartIpHelper; + + +/* + * Only when the internal IP is configured and virtual + * internal port is connected, the IP helper request can be + * queued. + */ +static BOOLEAN ovsInternalIPConfigured; +static UINT32 ovsInternalPortNo; +static GUID ovsInternalNetCfgId; +static MIB_IF_ROW2 ovsInternalRow; +static MIB_IPINTERFACE_ROW ovsInternalIPRow; + +/* we only keep one internal IP for reference, it will not be used for + * determining SRC IP of Tunnel + */ +static UINT32 ovsInternalIP; + + +/* + * FWD_ENTRY --------> IPFORWARD_ENTRY + * | + * |--------------------------------------> IPENIGH_ENTRY + * + * IPFORWARD_ENTRY ------> FWD_ENTRY LIST with same IPFORWARD + * + * IPNEIGH_ENTRY ------> FWD_ENTRY LIST with same IPNEIGH + * + */ + +static PLIST_ENTRY ovsFwdHashTable; // based on DST IP +static PLIST_ENTRY ovsRouteHashTable; // based on DST PREFIX +static PLIST_ENTRY ovsNeighHashTable; // based on DST IP +static LIST_ENTRY ovsSortedIPNeighList; +static UINT32 ovsNumFwdEntries; + + +static PNDIS_RW_LOCK_EX ovsTableLock; +static NDIS_SPIN_LOCK ovsIpHelperLock; + +static LIST_ENTRY ovsIpHelperRequestList; +static UINT32 ovsNumIpHelperRequests; + +static HANDLE ipInterfaceNotificationHandle; +static HANDLE ipRouteNotificationHandle; +static HANDLE unicastIPNotificationHandle; + +static OVS_IP_HELPER_THREAD_CONTEXT ovsIpHelperThreadContext; + +static POVS_IPFORWARD_ENTRY OvsLookupIPForwardEntry(PIP_ADDRESS_PREFIX prefix); +static VOID OvsRemoveIPForwardEntry(POVS_IPFORWARD_ENTRY ipf); +static VOID OvsRemoveAllFwdEntriesWithSrc(UINT32 ipAddr); +static VOID OvsCleanupIpHelperRequestList(VOID); +static VOID OvsCleanupFwdTable(VOID); +static VOID OvsAddToSortedNeighList(POVS_IPNEIGH_ENTRY ipn); + +static VOID +OvsDumpIfRow(PMIB_IF_ROW2 ifRow) +{ + OVS_LOG_INFO("InterfaceLuid: NetLuidIndex: %d, type: %d", + ifRow->InterfaceLuid.Info.NetLuidIndex, + ifRow->InterfaceLuid.Info.IfType); + OVS_LOG_INFO("InterfaceIndex: %d", ifRow->InterfaceIndex); + + OVS_LOG_INFO("Interface GUID: %08x-%04x-%04x-%04x-%02x%02x%02x%02x%02x%02x", + ifRow->InterfaceGuid.Data1, + ifRow->InterfaceGuid.Data2, + ifRow->InterfaceGuid.Data3, + *(UINT16 *)ifRow->InterfaceGuid.Data4, + ifRow->InterfaceGuid.Data4[2], + ifRow->InterfaceGuid.Data4[3], + ifRow->InterfaceGuid.Data4[4], + ifRow->InterfaceGuid.Data4[5], + ifRow->InterfaceGuid.Data4[6], + ifRow->InterfaceGuid.Data4[7]); + OVS_LOG_INFO("Perm MAC Address: %02x:%02x:%02x:%02x:%02x:%02x", + ifRow->PermanentPhysicalAddress[0], + ifRow->PermanentPhysicalAddress[1], + ifRow->PermanentPhysicalAddress[2], + ifRow->PermanentPhysicalAddress[3], + ifRow->PermanentPhysicalAddress[4], + ifRow->PermanentPhysicalAddress[5]); +} + + +static VOID +OvsDumpIfTable(PMIB_IF_TABLE2 ifTable) +{ + PMIB_IF_ROW2 ifRow; + UINT32 i; + + OVS_LOG_INFO("======Number of entries: %d========", ifTable->NumEntries); + + for (i = 0; i < ifTable->NumEntries; i++) { + ifRow = &ifTable->Table[i]; + OvsDumpIfRow(ifRow); + } +} + + +NTSTATUS +OvsGetIfEntry(GUID *interfaceGuid, PMIB_IF_ROW2 ifEntry) +{ + NTSTATUS status; + PMIB_IF_TABLE2 ifTable; + UINT32 i; + + if (interfaceGuid == NULL || ifEntry == NULL) { + return STATUS_INVALID_PARAMETER; + } + + status = GetIfTable2Ex(MibIfTableNormal, &ifTable); + + if (status != STATUS_SUCCESS) { + OVS_LOG_INFO("Fail to get if table, status: %x", status); + return status; + } + status = STATUS_NOT_FOUND; + + for (i = 0; i < ifTable->NumEntries; i++) { + PMIB_IF_ROW2 ifRow; + + ifRow = &ifTable->Table[i]; + if (!memcmp(interfaceGuid, &ifRow->InterfaceGuid, sizeof (GUID))) { + RtlCopyMemory(ifEntry, ifRow, sizeof (MIB_IF_ROW2)); + status = STATUS_SUCCESS; + OvsDumpIfRow(ifEntry); + break; + } + } + + FreeMibTable(ifTable); + return status; +} + + +static VOID +OvsDumpIPInterfaceEntry(PMIB_IPINTERFACE_ROW ipRow) +{ + OVS_LOG_INFO("InterfaceLuid: NetLuidIndex: %d, type: %d", + ipRow->InterfaceLuid.Info.NetLuidIndex, + ipRow->InterfaceLuid.Info.IfType); + OVS_LOG_INFO("InterfaceIndex: %d", ipRow->InterfaceIndex); + + OVS_LOG_INFO("MaxReassembleSize: %u", ipRow->MaxReassemblySize); +} + + +NTSTATUS +OvsGetIPInterfaceEntry(NET_LUID luid, + PMIB_IPINTERFACE_ROW ipRow) +{ + NTSTATUS status; + + if (ipRow == NULL) { + return STATUS_INVALID_PARAMETER; + } + + ipRow->Family = AF_INET; + ipRow->InterfaceLuid.Value = luid.Value; + + status = GetIpInterfaceEntry(ipRow); + + if (status != STATUS_SUCCESS) { + OVS_LOG_INFO("Fail to get internal IP Interface mib row, status: %x", + status); + return status; + } + OvsDumpIPInterfaceEntry(ipRow); + return status; +} + + +static VOID +OvsDumpIPEntry(PMIB_UNICASTIPADDRESS_ROW ipRow) +{ + UINT32 ipAddr; + + OVS_LOG_INFO("InterfaceLuid: NetLuidIndex: %d, type: %d", + ipRow->InterfaceLuid.Info.NetLuidIndex, + ipRow->InterfaceLuid.Info.IfType); + + OVS_LOG_INFO("InterfaceIndex: %d", ipRow->InterfaceIndex); + + ASSERT(ipRow->Address.si_family == AF_INET); + + ipAddr = ipRow->Address.Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("Unicast Address: %d.%d.%d.%d\n", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, ipAddr >> 24); +} + + +NTSTATUS +OvsGetIPEntry(NET_LUID interfaceLuid, + PMIB_UNICASTIPADDRESS_ROW ipEntry) +{ + PMIB_UNICASTIPADDRESS_TABLE ipTable; + NTSTATUS status; + UINT32 i; + + if (ipEntry == NULL || ipEntry == NULL) { + return STATUS_INVALID_PARAMETER; + } + + status = GetUnicastIpAddressTable(AF_INET, &ipTable); + + if (status != STATUS_SUCCESS) { + OVS_LOG_INFO("Fail to get unicast address table, status: %x", status); + return status; + } + + status = STATUS_NOT_FOUND; + + for (i = 0; i < ipTable->NumEntries; i++) { + PMIB_UNICASTIPADDRESS_ROW ipRow; + + ipRow = &ipTable->Table[i]; + if (ipRow->InterfaceLuid.Value == interfaceLuid.Value) { + RtlCopyMemory(ipEntry, ipRow, sizeof (*ipRow)); + OvsDumpIPEntry(ipEntry); + status = STATUS_SUCCESS; + break; + } + } + + FreeMibTable(ipTable); + return status; +} + +#ifdef OVS_ENABLE_IPPATH +static VOID +OvsDumpIPPath(PMIB_IPPATH_ROW ipPath) +{ + UINT32 ipAddr = ipPath->Source.Ipv4.sin_addr.s_addr; + + OVS_LOG_INFO("Source: %d.%d.%d.%d", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); + + ipAddr = ipPath->Destination.Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("Destination: %d.%d.%d.%d", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); + + ipAddr = ipPath->CurrentNextHop.Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("NextHop: %d.%d.%d.%d", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); +} + + +NTSTATUS +OvsGetIPPathEntry(PMIB_IPPATH_ROW ipPath) +{ + NTSTATUS status; + UINT32 ipAddr = ipPath->Destination.Ipv4.sin_addr.s_addr; + + status = GetIpPathEntry(ipPath); + + if (status != STATUS_SUCCESS) { + OVS_LOG_INFO("Fail to get IP path to %d.%d.%d.%d, status:%x", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, status); + return status; + } + OvsDumpIPPath(ipPath); + return status; +} +#endif + +static VOID +OvsDumpRoute(const SOCKADDR_INET *sourceAddress, + const SOCKADDR_INET *destinationAddress, + PMIB_IPFORWARD_ROW2 route) +{ + UINT32 ipAddr = destinationAddress->Ipv4.sin_addr.s_addr; + + OVS_LOG_INFO("Destination: %d.%d.%d.%d", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); + + ipAddr = sourceAddress->Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("Source: %d.%d.%d.%d", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); + + ipAddr = route->NextHop.Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("NextHop: %d.%d.%d.%d", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); +} + + +NTSTATUS +OvsGetRoute(NET_LUID interfaceLuid, + const SOCKADDR_INET *destinationAddress, + PMIB_IPFORWARD_ROW2 route, + SOCKADDR_INET *sourceAddress) +{ + NTSTATUS status; + + if (destinationAddress == NULL || route == NULL) { + return STATUS_INVALID_PARAMETER; + } + + status = GetBestRoute2(&interfaceLuid, 0, + NULL, destinationAddress, + 0, route, sourceAddress); + + if (status != STATUS_SUCCESS) { + UINT32 ipAddr = destinationAddress->Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("Fail to get route to %d.%d.%d.%d, status: %x", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, status); + return status; + } + + OvsDumpRoute(sourceAddress, destinationAddress, route); + return status; +} + +static VOID +OvsDumpIPNeigh(PMIB_IPNET_ROW2 ipNeigh) +{ + UINT32 ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr; + + OVS_LOG_INFO("Neigh: %d.%d.%d.%d", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); + OVS_LOG_INFO("MAC Address: %02x:%02x:%02x:%02x:%02x:%02x", + ipNeigh->PhysicalAddress[0], + ipNeigh->PhysicalAddress[1], + ipNeigh->PhysicalAddress[2], + ipNeigh->PhysicalAddress[3], + ipNeigh->PhysicalAddress[4], + ipNeigh->PhysicalAddress[5]); +} + + +NTSTATUS +OvsGetIPNeighEntry(PMIB_IPNET_ROW2 ipNeigh) +{ + NTSTATUS status; + + ASSERT(ipNeigh); + + status = GetIpNetEntry2(ipNeigh); + + if (status != STATUS_SUCCESS) { + UINT32 ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("Fail to get ARP entry: %d.%d.%d.%d, status: %x", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, status); + return status; + } + if (ipNeigh->State == NlnsReachable || + ipNeigh->State == NlnsPermanent) { + OvsDumpIPNeigh(ipNeigh); + return STATUS_SUCCESS; + } + return STATUS_FWP_TCPIP_NOT_READY; +} + + +NTSTATUS +OvsResolveIPNeighEntry(PMIB_IPNET_ROW2 ipNeigh) +{ + NTSTATUS status; + + ASSERT(ipNeigh); + status = ResolveIpNetEntry2(ipNeigh, NULL); + + if (status != STATUS_SUCCESS) { + UINT32 ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("Fail to resolve ARP entry: %d.%d.%d.%d, status: %x", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, status); + return status; + } + + if (ipNeigh->State == NlnsReachable || + ipNeigh->State == NlnsPermanent) { + OvsDumpIPNeigh(ipNeigh); + return STATUS_SUCCESS; + } + return STATUS_FWP_TCPIP_NOT_READY; +} + + +NTSTATUS +OvsGetOrResolveIPNeigh(UINT32 ipAddr, + PMIB_IPNET_ROW2 ipNeigh) +{ + NTSTATUS status; + + ASSERT(ipNeigh); + + RtlZeroMemory(ipNeigh, sizeof (*ipNeigh)); + ipNeigh->InterfaceLuid.Value = ovsInternalRow.InterfaceLuid.Value; + ipNeigh->InterfaceIndex = ovsInternalRow.InterfaceIndex; + ipNeigh->Address.si_family = AF_INET; + ipNeigh->Address.Ipv4.sin_addr.s_addr = ipAddr; + + status = OvsGetIPNeighEntry(ipNeigh); + + if (status != STATUS_SUCCESS) { + RtlZeroMemory(ipNeigh, sizeof (*ipNeigh)); + ipNeigh->InterfaceLuid.Value = ovsInternalRow.InterfaceLuid.Value; + ipNeigh->InterfaceIndex = ovsInternalRow.InterfaceIndex; + ipNeigh->Address.si_family = AF_INET; + ipNeigh->Address.Ipv4.sin_addr.s_addr = ipAddr; + status = OvsResolveIPNeighEntry(ipNeigh); + } + return status; +} + + +static VOID +OvsChangeCallbackIpInterface(PVOID context, + PMIB_IPINTERFACE_ROW ipRow, + MIB_NOTIFICATION_TYPE notificationType) +{ + UNREFERENCED_PARAMETER(context); + switch (notificationType) { + case MibParameterNotification: + case MibAddInstance: + if (ipRow->InterfaceLuid.Info.NetLuidIndex == + ovsInternalRow.InterfaceLuid.Info.NetLuidIndex && + ipRow->InterfaceLuid.Info.IfType == + ovsInternalRow.InterfaceLuid.Info.IfType && + ipRow->InterfaceIndex == ovsInternalRow.InterfaceIndex) { + /* + * Update the IP Interface Row + */ + NdisAcquireSpinLock(&ovsIpHelperLock); + RtlCopyMemory(&ovsInternalIPRow, ipRow, + sizeof (PMIB_IPINTERFACE_ROW)); + ovsInternalIPConfigured = TRUE; + NdisReleaseSpinLock(&ovsIpHelperLock); + } + OVS_LOG_INFO("IP Interface with NetLuidIndex: %d, type: %d is %s", + ipRow->InterfaceLuid.Info.NetLuidIndex, + ipRow->InterfaceLuid.Info.IfType, + notificationType == MibAddInstance ? "added" : "modified"); + break; + case MibDeleteInstance: + OVS_LOG_INFO("IP Interface with NetLuidIndex: %d, type: %d, deleted", + ipRow->InterfaceLuid.Info.NetLuidIndex, + ipRow->InterfaceLuid.Info.IfType); + if (ipRow->InterfaceLuid.Info.NetLuidIndex == + ovsInternalRow.InterfaceLuid.Info.NetLuidIndex && + ipRow->InterfaceLuid.Info.IfType == + ovsInternalRow.InterfaceLuid.Info.IfType && + ipRow->InterfaceIndex == ovsInternalRow.InterfaceIndex) { + + NdisAcquireSpinLock(&ovsIpHelperLock); + ovsInternalIPConfigured = FALSE; + NdisReleaseSpinLock(&ovsIpHelperLock); + + OvsCleanupIpHelperRequestList(); + + OvsCleanupFwdTable(); + } + + break; + case MibInitialNotification: + OVS_LOG_INFO("Get Initial notification for IP Interface change."); + default: + return; + } +} + + +static VOID +OvsChangeCallbackIpRoute(PVOID context, + PMIB_IPFORWARD_ROW2 ipRoute, + MIB_NOTIFICATION_TYPE notificationType) +{ + UINT32 ipAddr, nextHop; + + UNREFERENCED_PARAMETER(context); + switch (notificationType) { + case MibAddInstance: + + ASSERT(ipRoute); + ipAddr = ipRoute->DestinationPrefix.Prefix.Ipv4.sin_addr.s_addr; + nextHop = ipRoute->NextHop.Ipv4.sin_addr.s_addr; + + OVS_LOG_INFO("IPRoute: To %d.%d.%d.%d/%d through %d.%d.%d.%d added", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, + ipRoute->DestinationPrefix.PrefixLength, + nextHop & 0xff, (nextHop >> 8) & 0xff, + (nextHop >> 16) & 0xff, (nextHop >> 24) & 0xff); + break; + + case MibParameterNotification: + case MibDeleteInstance: + ASSERT(ipRoute); + ipAddr = ipRoute->DestinationPrefix.Prefix.Ipv4.sin_addr.s_addr; + nextHop = ipRoute->NextHop.Ipv4.sin_addr.s_addr; + + OVS_LOG_INFO("IPRoute: To %d.%d.%d.%d/%d through %d.%d.%d.%d %s.", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, + ipRoute->DestinationPrefix.PrefixLength, + nextHop & 0xff, (nextHop >> 8) & 0xff, + (nextHop >> 16) & 0xff, (nextHop >> 24) & 0xff, + notificationType == MibDeleteInstance ? "deleted" : + "modified"); + + if (ipRoute->InterfaceLuid.Info.NetLuidIndex == + ovsInternalRow.InterfaceLuid.Info.NetLuidIndex && + ipRoute->InterfaceLuid.Info.IfType == + ovsInternalRow.InterfaceLuid.Info.IfType && + ipRoute->InterfaceIndex == ovsInternalRow.InterfaceIndex) { + + POVS_IPFORWARD_ENTRY ipf; + LOCK_STATE_EX lockState; + + NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); + ipf = OvsLookupIPForwardEntry(&ipRoute->DestinationPrefix); + if (ipf != NULL) { + OvsRemoveIPForwardEntry(ipf); + } + NdisReleaseRWLock(ovsTableLock, &lockState); + } + break; + + case MibInitialNotification: + OVS_LOG_INFO("Get Initial notification for IP Route change."); + default: + return; + } +} + + +static VOID +OvsChangeCallbackUnicastIpAddress(PVOID context, + PMIB_UNICASTIPADDRESS_ROW unicastRow, + MIB_NOTIFICATION_TYPE notificationType) +{ + UINT32 ipAddr; + + UNREFERENCED_PARAMETER(context); + switch (notificationType) { + case MibParameterNotification: + case MibAddInstance: + ASSERT(unicastRow); + ipAddr = unicastRow->Address.Ipv4.sin_addr.s_addr; + if (unicastRow->InterfaceLuid.Info.NetLuidIndex == + ovsInternalRow.InterfaceLuid.Info.NetLuidIndex && + unicastRow->InterfaceLuid.Info.IfType == + ovsInternalRow.InterfaceLuid.Info.IfType && + unicastRow->InterfaceIndex == ovsInternalRow.InterfaceIndex) { + ovsInternalIP = ipAddr; + } + OVS_LOG_INFO("IP Address: %d.%d.%d.%d is %s", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, + notificationType == MibAddInstance ? "added": "modified"); + break; + + case MibDeleteInstance: + ASSERT(unicastRow); + ipAddr = unicastRow->Address.Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("IP Address removed: %d.%d.%d.%d", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); + if (unicastRow->InterfaceLuid.Info.NetLuidIndex == + ovsInternalRow.InterfaceLuid.Info.NetLuidIndex && + unicastRow->InterfaceLuid.Info.IfType == + ovsInternalRow.InterfaceLuid.Info.IfType && + unicastRow->InterfaceIndex == ovsInternalRow.InterfaceIndex) { + + LOCK_STATE_EX lockState; + NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); + OvsRemoveAllFwdEntriesWithSrc(ipAddr); + NdisReleaseRWLock(ovsTableLock, &lockState); + + } + break; + + case MibInitialNotification: + OVS_LOG_INFO("Get Initial notification for Unicast IP Address change."); + default: + return; + } +} + + +static VOID +OvsCancelChangeNotification() +{ + if (ipInterfaceNotificationHandle != NULL) { + CancelMibChangeNotify2(ipInterfaceNotificationHandle); + ipInterfaceNotificationHandle = NULL; + } + if (ipRouteNotificationHandle != NULL) { + CancelMibChangeNotify2(ipRouteNotificationHandle); + ipRouteNotificationHandle = NULL; + } + if (unicastIPNotificationHandle != NULL) { + CancelMibChangeNotify2(unicastIPNotificationHandle); + unicastIPNotificationHandle = NULL; + } +} + + +static NTSTATUS +OvsRegisterChangeNotification() +{ + NTSTATUS status; + + + status = NotifyIpInterfaceChange(AF_INET, OvsChangeCallbackIpInterface, + NULL, TRUE, + &ipInterfaceNotificationHandle); + if (status != STATUS_SUCCESS) { + OVS_LOG_ERROR("Fail to register Notify IP interface change, status:%x.", + status); + return status; + } + + status = NotifyRouteChange2(AF_INET, OvsChangeCallbackIpRoute, NULL, + TRUE, &ipRouteNotificationHandle); + if (status != STATUS_SUCCESS) { + OVS_LOG_ERROR("Fail to regiter ip route change, status: %x.", + status); + goto register_cleanup; + } + status = NotifyUnicastIpAddressChange(AF_INET, + OvsChangeCallbackUnicastIpAddress, + NULL, TRUE, + &unicastIPNotificationHandle); + if (status != STATUS_SUCCESS) { + OVS_LOG_ERROR("Fail to regiter unicast ip change, status: %x.", status); + } +register_cleanup: + if (status != STATUS_SUCCESS) { + OvsCancelChangeNotification(); + } + + return status; +} + + +static POVS_IPNEIGH_ENTRY +OvsLookupIPNeighEntry(UINT32 ipAddr) +{ + PLIST_ENTRY link; + POVS_IPNEIGH_ENTRY entry; + UINT32 hash = OvsJhashWords(&ipAddr, 1, OVS_HASH_BASIS); + + LIST_FORALL(&ovsNeighHashTable[hash & OVS_NEIGH_HASH_TABLE_MASK], link) { + entry = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, link); + if (entry->ipAddr == ipAddr) { + return entry; + } + } + return NULL; +} + + +static UINT32 +OvsHashIPPrefix(PIP_ADDRESS_PREFIX prefix) +{ + UINT64 words = (UINT64)prefix->Prefix.Ipv4.sin_addr.s_addr << 32 | + (UINT32)prefix->PrefixLength; + return OvsJhashWords((UINT32 *)&words, 2, OVS_HASH_BASIS); +} + + +static POVS_IPFORWARD_ENTRY +OvsLookupIPForwardEntry(PIP_ADDRESS_PREFIX prefix) +{ + + PLIST_ENTRY link; + POVS_IPFORWARD_ENTRY ipfEntry; + UINT32 hash; + ASSERT(prefix->Prefix.si_family == AF_INET); + + hash = RtlUlongByteSwap(prefix->Prefix.Ipv4.sin_addr.s_addr); + + ASSERT(prefix->PrefixLength >= 32 || + (hash & (((UINT32)1 << (32 - prefix->PrefixLength)) - 1)) == 0); + + hash = OvsHashIPPrefix(prefix); + LIST_FORALL(&ovsRouteHashTable[hash & OVS_ROUTE_HASH_TABLE_MASK], link) { + ipfEntry = CONTAINING_RECORD(link, OVS_IPFORWARD_ENTRY, link); + if (ipfEntry->prefix.PrefixLength == prefix->PrefixLength && + ipfEntry->prefix.Prefix.Ipv4.sin_addr.s_addr == + prefix->Prefix.Ipv4.sin_addr.s_addr) { + return ipfEntry; + } + } + return NULL; +} + + +static POVS_FWD_ENTRY +OvsLookupIPFwdEntry(UINT32 dstIp) +{ + PLIST_ENTRY link; + POVS_FWD_ENTRY entry; + UINT32 hash = OvsJhashWords(&dstIp, 1, OVS_HASH_BASIS); + + LIST_FORALL(&ovsFwdHashTable[hash & OVS_FWD_HASH_TABLE_MASK], link) { + entry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, link); + if (entry->info.dstIpAddr == dstIp) { + return entry; + } + } + return NULL; +} + + +NTSTATUS +OvsLookupIPFwdInfo(UINT32 dstIp, + POVS_FWD_INFO info) +{ + POVS_FWD_ENTRY entry; + LOCK_STATE_EX lockState; + NTSTATUS status = STATUS_NOT_FOUND; + + NdisAcquireRWLockRead(ovsTableLock, &lockState, 0); + entry = OvsLookupIPFwdEntry(dstIp); + if (entry) { + info->value[0] = entry->info.value[0]; + info->value[1] = entry->info.value[1]; + info->value[2] = entry->info.value[2]; + status = STATUS_SUCCESS; + } + NdisReleaseRWLock(ovsTableLock, &lockState); + return status; +} + + +static POVS_IPNEIGH_ENTRY +OvsCreateIPNeighEntry(PMIB_IPNET_ROW2 ipNeigh) +{ + + POVS_IPNEIGH_ENTRY entry; + UINT64 timeVal; + + ASSERT(ipNeigh != NULL); + entry = (POVS_IPNEIGH_ENTRY)OvsAllocateMemory(sizeof (OVS_IPNEIGH_ENTRY)); + if (entry == NULL) { + return NULL; + } + + RtlZeroMemory(entry, sizeof (OVS_IPNEIGH_ENTRY)); + entry->ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr; + KeQuerySystemTime((LARGE_INTEGER *)&timeVal); + entry->timeout = timeVal + OVS_IPNEIGH_TIMEOUT; + RtlCopyMemory(entry->macAddr, ipNeigh->PhysicalAddress, + MAC_ADDRESS_LEN); + InitializeListHead(&entry->fwdList); + + return entry; +} + + +static POVS_IPFORWARD_ENTRY +OvsCreateIPForwardEntry(PMIB_IPFORWARD_ROW2 ipRoute) +{ + + POVS_IPFORWARD_ENTRY entry; + + ASSERT(ipRoute); + + entry = + (POVS_IPFORWARD_ENTRY)OvsAllocateMemory(sizeof (OVS_IPFORWARD_ENTRY)); + if (entry == NULL) { + return NULL; + } + + RtlZeroMemory(entry, sizeof (OVS_IPFORWARD_ENTRY)); + RtlCopyMemory(&entry->prefix, &ipRoute->DestinationPrefix, + sizeof (IP_ADDRESS_PREFIX)); + entry->nextHop = ipRoute->NextHop.Ipv4.sin_addr.s_addr; + InitializeListHead(&entry->fwdList); + + return entry; +} + + +static POVS_FWD_ENTRY +OvsCreateFwdEntry(POVS_FWD_INFO fwdInfo) +{ + POVS_FWD_ENTRY entry; + + entry = (POVS_FWD_ENTRY)OvsAllocateMemory(sizeof (OVS_FWD_ENTRY)); + if (entry == NULL) { + return NULL; + } + + RtlZeroMemory(entry, sizeof (OVS_FWD_ENTRY)); + RtlCopyMemory(&entry->info, fwdInfo, sizeof (OVS_FWD_INFO)); + return entry; +} + + +static VOID +OvsRemoveFwdEntry(POVS_FWD_ENTRY fwdEntry) +{ + POVS_IPFORWARD_ENTRY ipf; + POVS_IPNEIGH_ENTRY ipn; + + ipf = fwdEntry->ipf; + ipn = fwdEntry->ipn; + + RemoveEntryList(&fwdEntry->link); + ovsNumFwdEntries--; + + RemoveEntryList(&fwdEntry->ipfLink); + ipf->refCount--; + + RemoveEntryList(&fwdEntry->ipnLink); + ipn->refCount--; + + if (ipf->refCount == 0) { + ASSERT(IsListEmpty(&ipf->fwdList)); + RemoveEntryList(&ipf->link); + OvsFreeMemory(ipf); + } + + if (ipn->refCount == 0) { + ASSERT(IsListEmpty(&ipn->fwdList)); + RemoveEntryList(&ipn->link); + NdisAcquireSpinLock(&ovsIpHelperLock); + RemoveEntryList(&ipn->slink); + NdisReleaseSpinLock(&ovsIpHelperLock); + OvsFreeMemory(ipn); + } + + OvsFreeMemory(fwdEntry); +} + + +static VOID +OvsRemoveIPForwardEntry(POVS_IPFORWARD_ENTRY ipf) +{ + POVS_FWD_ENTRY fwdEntry; + PLIST_ENTRY link, next; + + ipf->refCount++; + + LIST_FORALL_SAFE(&ipf->fwdList, link, next) { + fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, ipfLink); + OvsRemoveFwdEntry(fwdEntry); + } + ASSERT(ipf->refCount == 1); + + RemoveEntryList(&ipf->link); + OvsFreeMemory(ipf); +} + + +static VOID +OvsRemoveIPNeighEntry(POVS_IPNEIGH_ENTRY ipn) +{ + PLIST_ENTRY link, next; + POVS_FWD_ENTRY fwdEntry; + + ipn->refCount++; + + LIST_FORALL_SAFE(&ipn->fwdList, link, next) { + fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, ipnLink); + OvsRemoveFwdEntry(fwdEntry); + } + + if (ipn->refCount == 1) { + RemoveEntryList(&ipn->link); + NdisAcquireSpinLock(&ovsIpHelperLock); + RemoveEntryList(&ipn->slink); + NdisReleaseSpinLock(&ovsIpHelperLock); + OvsFreeMemory(ipn); + } +} + + +static VOID +OvsAddToSortedNeighList(POVS_IPNEIGH_ENTRY ipn) +{ + PLIST_ENTRY link; + POVS_IPNEIGH_ENTRY entry; + + if (!IsListEmpty(&ovsSortedIPNeighList)) { + link = ovsSortedIPNeighList.Blink; + entry = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, slink); + if (entry->timeout > ipn->timeout) { + ipn->timeout++; + } + } + InsertTailList(&ovsSortedIPNeighList, &ipn->slink); +} + + +static VOID +OvsAddIPFwdCache(POVS_FWD_ENTRY fwdEntry, + POVS_IPFORWARD_ENTRY ipf, + POVS_IPNEIGH_ENTRY ipn) + +{ + UINT32 hash; + + if (ipn->refCount == 0) { + NdisAcquireSpinLock(&ovsIpHelperLock); + OvsAddToSortedNeighList(ipn); + NdisReleaseSpinLock(&ovsIpHelperLock); + hash = OvsJhashWords(&ipn->ipAddr, 1, OVS_HASH_BASIS); + InsertHeadList(&ovsNeighHashTable[hash & OVS_NEIGH_HASH_TABLE_MASK], + &ipn->link); + } + if (ipf->refCount == 0) { + hash = OvsHashIPPrefix(&ipf->prefix); + InsertHeadList(&ovsRouteHashTable[hash & OVS_ROUTE_HASH_TABLE_MASK], + &ipf->link); + } + + InsertHeadList(&ipf->fwdList, &fwdEntry->ipfLink); + ipf->refCount++; + fwdEntry->ipf = ipf; + + InsertHeadList(&ipn->fwdList, &fwdEntry->ipnLink); + ipn->refCount++; + fwdEntry->ipn = ipn; + + hash = OvsJhashWords(&fwdEntry->info.dstIpAddr, 1, OVS_HASH_BASIS); + InsertHeadList(&ovsFwdHashTable[hash & OVS_FWD_HASH_TABLE_MASK], + &fwdEntry->link); + ovsNumFwdEntries++; +} + + +static VOID +OvsRemoveAllFwdEntriesWithSrc(UINT32 ipAddr) +{ + UINT32 i; + POVS_FWD_ENTRY fwdEntry; + PLIST_ENTRY link, next; + + for (i = 0; i < OVS_FWD_HASH_TABLE_SIZE; i++) { + LIST_FORALL_SAFE(&ovsFwdHashTable[i], link, next) { + fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, link); + if (fwdEntry->info.srcIpAddr == ipAddr) { + OvsRemoveFwdEntry(fwdEntry); + } + } + } +} + + +static VOID +OvsCleanupFwdTable(VOID) +{ + PLIST_ENTRY link, next; + POVS_IPNEIGH_ENTRY ipn; + UINT32 i; + LOCK_STATE_EX lockState; + + NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); + if (ovsNumFwdEntries) { + LIST_FORALL_SAFE(&ovsSortedIPNeighList, link, next) { + ipn = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, slink); + OvsRemoveIPNeighEntry(ipn); + } + } + for (i = 0; i < OVS_FWD_HASH_TABLE_SIZE; i++) { + ASSERT(IsListEmpty(&ovsFwdHashTable[i])); + } + for (i = 0; i < OVS_ROUTE_HASH_TABLE_SIZE; i++) { + ASSERT(IsListEmpty(&ovsRouteHashTable[i])); + } + NdisReleaseRWLock(ovsTableLock, &lockState); +} + + +static VOID +OvsCleanupIpHelperRequestList(VOID) +{ + LIST_ENTRY list; + PLIST_ENTRY next, link; + POVS_IP_HELPER_REQUEST request; + + NdisAcquireSpinLock(&ovsIpHelperLock); + if (ovsNumIpHelperRequests == 0) { + NdisReleaseSpinLock(&ovsIpHelperLock); + return; + } + + InitializeListHead(&list); + OvsAppendList(&list, &ovsIpHelperRequestList); + ovsNumIpHelperRequests = 0; + NdisReleaseSpinLock(&ovsIpHelperLock); + + LIST_FORALL_SAFE(&list, link, next) { + request = CONTAINING_RECORD(link, OVS_IP_HELPER_REQUEST, link); + + if (request->command == OVS_IP_HELPER_FWD_REQUEST && + request->fwdReq.cb) { + request->fwdReq.cb(request->fwdReq.nbl, + request->fwdReq.inPort, + &request->fwdReq.tunnelKey, + request->fwdReq.cbData1, + request->fwdReq.cbData2, + STATUS_DEVICE_NOT_READY, + NULL); + } + OvsFreeMemory(request); + } +} + + + +static VOID +OvsWakeupIPHelper(VOID) +{ + KeSetEvent(&ovsIpHelperThreadContext.event, 0, FALSE); +} + +VOID +OvsInternalAdapterDown(VOID) +{ + NdisAcquireSpinLock(&ovsIpHelperLock); + ovsInternalPortNo = OVS_DEFAULT_PORT_NO; + ovsInternalIPConfigured = FALSE; + NdisReleaseSpinLock(&ovsIpHelperLock); + + OvsCleanupIpHelperRequestList(); + + OvsCleanupFwdTable(); +} + + +VOID +OvsInternalAdapterUp(UINT32 portNo, + GUID *netCfgInstanceId) +{ + POVS_IP_HELPER_REQUEST request; + + RtlCopyMemory(&ovsInternalNetCfgId, netCfgInstanceId, sizeof (GUID)); + RtlZeroMemory(&ovsInternalRow, sizeof (MIB_IF_ROW2)); + + request = + (POVS_IP_HELPER_REQUEST)OvsAllocateMemory(sizeof (OVS_IP_HELPER_REQUEST)); + if (request == NULL) { + OVS_LOG_ERROR("Fail to initialize Internal Adapter"); + return; + } + RtlZeroMemory(request, sizeof (OVS_IP_HELPER_REQUEST)); + request->command = OVS_IP_HELPER_INTERNAL_ADAPTER_UP; + + NdisAcquireSpinLock(&ovsIpHelperLock); + ovsInternalPortNo = portNo; + InsertHeadList(&ovsIpHelperRequestList, &request->link); + ovsNumIpHelperRequests++; + if (ovsNumIpHelperRequests == 1) { + OvsWakeupIPHelper(); + } + NdisReleaseSpinLock(&ovsIpHelperLock); +} + + +static VOID +OvsHandleInternalAdapterUp(POVS_IP_HELPER_REQUEST request) +{ + NTSTATUS status; + MIB_UNICASTIPADDRESS_ROW ipEntry; + GUID *netCfgInstanceId = &ovsInternalNetCfgId; + + OvsFreeMemory(request); + + status = OvsGetIfEntry(&ovsInternalNetCfgId, &ovsInternalRow); + + if (status != STATUS_SUCCESS) { + OVS_LOG_ERROR("Fali to get IF entry for internal port with GUID" + " %08x-%04x-%04x-%04x-%02x%02x%02x%02x%02x%02x", + netCfgInstanceId->Data1, + netCfgInstanceId->Data2, + netCfgInstanceId->Data3, + *(UINT16 *)netCfgInstanceId->Data4, + netCfgInstanceId->Data4[2], + netCfgInstanceId->Data4[3], + netCfgInstanceId->Data4[4], + netCfgInstanceId->Data4[5], + netCfgInstanceId->Data4[6], + netCfgInstanceId->Data4[7]); + return; + } + + status = OvsGetIPInterfaceEntry(ovsInternalRow.InterfaceLuid, + &ovsInternalIPRow); + + if (status == STATUS_SUCCESS) { + NdisAcquireSpinLock(&ovsIpHelperLock); + ovsInternalIPConfigured = TRUE; + NdisReleaseSpinLock(&ovsIpHelperLock); + } else { + return; + } + + status = OvsGetIPEntry(ovsInternalRow.InterfaceLuid, &ipEntry); + if (status != STATUS_SUCCESS) { + OVS_LOG_INFO("Fali to get IP entry for internal port with GUID" + " %08x-%04x-%04x-%04x-%02x%02x%02x%02x%02x%02x", + netCfgInstanceId->Data1, + netCfgInstanceId->Data2, + netCfgInstanceId->Data3, + *(UINT16 *)netCfgInstanceId->Data4, + netCfgInstanceId->Data4[2], + netCfgInstanceId->Data4[3], + netCfgInstanceId->Data4[4], + netCfgInstanceId->Data4[5], + netCfgInstanceId->Data4[6], + netCfgInstanceId->Data4[7]); + } +} + + +static NTSTATUS +OvsEnqueueIpHelperRequest(POVS_IP_HELPER_REQUEST request) +{ + + NdisAcquireSpinLock(&ovsIpHelperLock); + + if (ovsInternalPortNo == OVS_DEFAULT_PORT_NO || + ovsInternalIPConfigured == FALSE) { + NdisReleaseSpinLock(&ovsIpHelperLock); + OvsFreeMemory(request); + return STATUS_NDIS_ADAPTER_NOT_READY; + } else { + InsertHeadList(&ovsIpHelperRequestList, &request->link); + ovsNumIpHelperRequests++; + if (ovsNumIpHelperRequests == 1) { + OvsWakeupIPHelper(); + } + NdisReleaseSpinLock(&ovsIpHelperLock); + return STATUS_SUCCESS; + } +} + + +NTSTATUS +OvsFwdIPHelperRequest(PNET_BUFFER_LIST nbl, + UINT32 inPort, + const OvsIPv4TunnelKey *tunnelKey, + OvsIPHelperCallback cb, + PVOID cbData1, + PVOID cbData2) +{ + POVS_IP_HELPER_REQUEST request; + + request = + (POVS_IP_HELPER_REQUEST)OvsAllocateMemory(sizeof (OVS_IP_HELPER_REQUEST)); + + if (request == NULL) { + return STATUS_INSUFFICIENT_RESOURCES; + } + request->command = OVS_IP_HELPER_FWD_REQUEST; + request->fwdReq.nbl = nbl; + request->fwdReq.inPort = inPort; + RtlCopyMemory(&request->fwdReq.tunnelKey, tunnelKey, + sizeof (*tunnelKey)); + request->fwdReq.cb = cb; + request->fwdReq.cbData1 = cbData1; + request->fwdReq.cbData2 = cbData2; + + return OvsEnqueueIpHelperRequest(request); +} + + +static VOID +OvsHandleFwdRequest(POVS_IP_HELPER_REQUEST request) +{ + SOCKADDR_INET dst, src; + NTSTATUS status = STATUS_SUCCESS; + MIB_IPFORWARD_ROW2 ipRoute; + MIB_IPNET_ROW2 ipNeigh; + OVS_FWD_INFO fwdInfo; + UINT32 ipAddr; + UINT32 srcAddr; + POVS_FWD_ENTRY fwdEntry = NULL; + POVS_IPFORWARD_ENTRY ipf = NULL; + POVS_IPNEIGH_ENTRY ipn = NULL; + LOCK_STATE_EX lockState; + BOOLEAN newIPF = FALSE; + BOOLEAN newIPN = FALSE; + BOOLEAN newFWD = FALSE; + + status = OvsLookupIPFwdInfo(request->fwdReq.tunnelKey.dst, + &fwdInfo); + if (status == STATUS_SUCCESS) { + goto fwd_handle_nbl; + } + + /* find IPRoute */ + RtlZeroMemory(&dst, sizeof(dst)); + RtlZeroMemory(&src, sizeof(src)); + RtlZeroMemory(&ipRoute, sizeof (MIB_IPFORWARD_ROW2)); + dst.si_family = AF_INET; + dst.Ipv4.sin_addr.s_addr = request->fwdReq.tunnelKey.dst; + + status = OvsGetRoute(ovsInternalRow.InterfaceLuid, &dst, &ipRoute, &src); + if (status != STATUS_SUCCESS) { + goto fwd_handle_nbl; + } + srcAddr = src.Ipv4.sin_addr.s_addr; + + /* find IPNeigh */ + ipAddr = ipRoute.NextHop.Ipv4.sin_addr.s_addr; + if (ipAddr != 0) { + NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); + ipn = OvsLookupIPNeighEntry(ipAddr); + if (ipn) { + goto fwd_request_done; + } + NdisReleaseRWLock(ovsTableLock, &lockState); + } + RtlZeroMemory(&ipNeigh, sizeof (ipNeigh)); + ipNeigh.InterfaceLuid.Value = ovsInternalRow.InterfaceLuid.Value; + if (ipAddr == 0) { + ipAddr = request->fwdReq.tunnelKey.dst; + } + status = OvsGetOrResolveIPNeigh(ipAddr, &ipNeigh); + if (status != STATUS_SUCCESS) { + goto fwd_handle_nbl; + } + + NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); + +fwd_request_done: + + /* + * Initialize ipf + */ + ipf = OvsLookupIPForwardEntry(&ipRoute.DestinationPrefix); + if (ipf == NULL) { + ipf = OvsCreateIPForwardEntry(&ipRoute); + if (ipf == NULL) { + NdisReleaseRWLock(ovsTableLock, &lockState); + status = STATUS_INSUFFICIENT_RESOURCES; + goto fwd_handle_nbl; + } + newIPF = TRUE; + } else { + PLIST_ENTRY link; + link = ipf->fwdList.Flink; + fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, ipfLink); + srcAddr = fwdEntry->info.srcIpAddr; + } + + /* + * initialize ipn + */ + if (ipn == NULL) { + ipn = OvsLookupIPNeighEntry(ipAddr); + if (ipn == NULL) { + ipn = OvsCreateIPNeighEntry(&ipNeigh); + if (ipn == NULL) { + NdisReleaseRWLock(ovsTableLock, &lockState); + status = STATUS_INSUFFICIENT_RESOURCES; + goto fwd_handle_nbl; + } + newIPN = TRUE; + } + } + + /* + * initialize fwdEntry + */ + fwdInfo.dstIpAddr = request->fwdReq.tunnelKey.dst; + fwdInfo.srcIpAddr = srcAddr; + RtlCopyMemory(fwdInfo.dstMacAddr, ipn->macAddr, MAC_ADDRESS_LEN); + RtlCopyMemory(fwdInfo.srcMacAddr, ovsInternalRow.PhysicalAddress, + MAC_ADDRESS_LEN); + fwdInfo.srcPortNo = request->fwdReq.inPort; + + fwdEntry = OvsCreateFwdEntry(&fwdInfo); + if (fwdEntry == NULL) { + NdisReleaseRWLock(ovsTableLock, &lockState); + status = STATUS_INSUFFICIENT_RESOURCES; + goto fwd_handle_nbl; + } + newFWD = TRUE; + /* + * Cache the result + */ + OvsAddIPFwdCache(fwdEntry, ipf, ipn); + NdisReleaseRWLock(ovsTableLock, &lockState); + +fwd_handle_nbl: + + if (status != STATUS_SUCCESS) { + if (newFWD) { + ASSERT(fwdEntry != NULL); + OvsFreeMemory(fwdEntry); + } + if (newIPF) { + ASSERT(ipf && ipf->refCount == 0); + OvsFreeMemory(ipf); + } + if (newIPN) { + ASSERT(ipn && ipn->refCount == 0); + OvsFreeMemory(ipn); + } + ipAddr = request->fwdReq.tunnelKey.dst; + OVS_LOG_INFO("Fail to handle IP helper request for dst: %d.%d.%d.%d", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); + } + if (request->fwdReq.cb) { + request->fwdReq.cb(request->fwdReq.nbl, + request->fwdReq.inPort, + &request->fwdReq.tunnelKey, + request->fwdReq.cbData1, + request->fwdReq.cbData2, + status, + status == STATUS_SUCCESS ? &fwdInfo : NULL); + } + OvsFreeMemory(request); +} + + +static VOID +OvsUpdateIPNeighEntry(UINT32 ipAddr, + PMIB_IPNET_ROW2 ipNeigh, + NTSTATUS status) +{ + UINT64 timeVal; + POVS_IPNEIGH_ENTRY ipn; + LOCK_STATE_EX lockState; + KeQuerySystemTime((LARGE_INTEGER *)&timeVal); + /* + * if mac changed, update all relevant fwdEntry + */ + if (status != STATUS_SUCCESS) { + NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); + } else { + NdisAcquireRWLockRead(ovsTableLock, &lockState, 0); + } + ipn = OvsLookupIPNeighEntry(ipAddr); + if (ipn == NULL) { + NdisReleaseRWLock(ovsTableLock, &lockState); + return; + } + if (status != STATUS_SUCCESS) { + OvsRemoveIPNeighEntry(ipn); + NdisReleaseRWLock(ovsTableLock, &lockState); + return; + } + + if (memcmp((const PVOID)ipn->macAddr, + (const PVOID)ipNeigh->PhysicalAddress, + (size_t)MAC_ADDRESS_LEN)) { + PLIST_ENTRY link; + POVS_FWD_ENTRY fwdEntry; + NdisReleaseRWLock(ovsTableLock, &lockState); + /* + * need update, release and acquire write lock + * This is not the common case. + */ + + NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); + ipn = OvsLookupIPNeighEntry(ipAddr); + + if (ipn == NULL) { + NdisReleaseRWLock(ovsTableLock, &lockState); + return; + } + + LIST_FORALL(&ipn->fwdList, link) { + fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, ipnLink); + RtlCopyMemory(fwdEntry->info.dstMacAddr, + ipNeigh->PhysicalAddress, MAC_ADDRESS_LEN); + } + } + /* + * update timeout and move to the end of + * the sorted list + */ + + NdisAcquireSpinLock(&ovsIpHelperLock); + RemoveEntryList(&ipn->slink); + ipn->timeout = timeVal + OVS_IPNEIGH_TIMEOUT; + OvsAddToSortedNeighList(ipn); + NdisReleaseSpinLock(&ovsIpHelperLock); + NdisReleaseRWLock(ovsTableLock, &lockState); +} + + +static VOID +OvsHandleIPNeighTimeout(UINT32 ipAddr) +{ + MIB_IPNET_ROW2 ipNeigh; + NTSTATUS status; + + status = OvsGetOrResolveIPNeigh(ipAddr, &ipNeigh); + + OvsUpdateIPNeighEntry(ipAddr, &ipNeigh, status); +} + + +/* + *---------------------------------------------------------------------------- + * IP Helper system threash handle following request + * 1. Intialize Internal port row when internal port is connected + * 2. Handle FWD request + * 3. Handle IP Neigh timeout + * + * IP Interface, unicast address, and IP route change will be handled + * by the revelant callback. + *---------------------------------------------------------------------------- + */ +VOID +OvsStartIpHelper(PVOID data) +{ + POVS_IP_HELPER_THREAD_CONTEXT context = (POVS_IP_HELPER_THREAD_CONTEXT)data; + POVS_IP_HELPER_REQUEST req; + POVS_IPNEIGH_ENTRY ipn; + PLIST_ENTRY link; + UINT64 timeVal, timeout; + + OVS_LOG_INFO("Start the IP Helper Thread, context: %p", context); + + NdisAcquireSpinLock(&ovsIpHelperLock); + while (!context->exit) { + + timeout = 0; + while (!IsListEmpty(&ovsIpHelperRequestList)) { + if (context->exit) { + goto ip_helper_wait; + } + link = ovsIpHelperRequestList.Flink; + RemoveEntryList(link); + NdisReleaseSpinLock(&ovsIpHelperLock); + req = CONTAINING_RECORD(link, OVS_IP_HELPER_REQUEST, link); + switch (req->command) { + case OVS_IP_HELPER_INTERNAL_ADAPTER_UP: + OvsHandleInternalAdapterUp(req); + break; + case OVS_IP_HELPER_FWD_REQUEST: + OvsHandleFwdRequest(req); + break; + default: + OvsFreeMemory(req); + } + NdisAcquireSpinLock(&ovsIpHelperLock); + } + + /* for now, let us hold the lock here, if this cause any issue + * we will change to use IpHelper lock only to protect + * IPN + */ + while (!IsListEmpty(&ovsSortedIPNeighList)) { + UINT32 ipAddr; + if (context->exit) { + goto ip_helper_wait; + } + link = ovsSortedIPNeighList.Flink; + ipn = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, slink); + KeQuerySystemTime((LARGE_INTEGER *)&timeVal); + if (ipn->timeout > timeVal) { + timeout = ipn->timeout; + break; + } + ipAddr = ipn->ipAddr; + + NdisReleaseSpinLock(&ovsIpHelperLock); + + OvsHandleIPNeighTimeout(ipAddr); + + NdisAcquireSpinLock(&ovsIpHelperLock); + } + if (!IsListEmpty(&ovsIpHelperRequestList)) { + continue; + } + +ip_helper_wait: + if (context->exit) { + break; + } + + KeClearEvent(&context->event); + NdisReleaseSpinLock(&ovsIpHelperLock); + + KeWaitForSingleObject(&context->event, Executive, KernelMode, + FALSE, (LARGE_INTEGER *)&timeout); + NdisAcquireSpinLock(&ovsIpHelperLock); + } + NdisReleaseSpinLock(&ovsIpHelperLock); + OvsCleanupFwdTable(); + OvsCleanupIpHelperRequestList(); + + OVS_LOG_INFO("Terminating the OVS IP Helper system thread"); + + PsTerminateSystemThread(STATUS_SUCCESS); +} + + +NTSTATUS +OvsInitIpHelper(NDIS_HANDLE ndisFilterHandle) +{ + NTSTATUS status; + HANDLE threadHandle; + UINT32 i; + + ovsFwdHashTable = (PLIST_ENTRY)OvsAllocateMemory(sizeof(LIST_ENTRY) * + OVS_FWD_HASH_TABLE_SIZE); + + ovsRouteHashTable = (PLIST_ENTRY)OvsAllocateMemory(sizeof(LIST_ENTRY) * + OVS_ROUTE_HASH_TABLE_SIZE); + + ovsNeighHashTable = (PLIST_ENTRY)OvsAllocateMemory(sizeof(LIST_ENTRY) * + OVS_NEIGH_HASH_TABLE_SIZE); + + RtlZeroMemory(&ovsInternalRow, sizeof(MIB_IF_ROW2)); + RtlZeroMemory(&ovsInternalIPRow, sizeof (MIB_IPINTERFACE_ROW)); + ovsInternalIP = 0; + + ovsInternalPortNo = OVS_DEFAULT_PORT_NO; + + InitializeListHead(&ovsSortedIPNeighList); + + ovsTableLock = NdisAllocateRWLock(ndisFilterHandle); + NdisAllocateSpinLock(&ovsIpHelperLock); + + InitializeListHead(&ovsIpHelperRequestList); + ovsNumIpHelperRequests = 0; + ipInterfaceNotificationHandle = NULL; + ipRouteNotificationHandle = NULL; + unicastIPNotificationHandle = NULL; + + if (ovsFwdHashTable == NULL || + ovsRouteHashTable == NULL || + ovsNeighHashTable == NULL || + ovsTableLock == NULL) { + status = STATUS_INSUFFICIENT_RESOURCES; + goto init_cleanup; + } + + for (i = 0; i < OVS_FWD_HASH_TABLE_SIZE; i++) { + InitializeListHead(&ovsFwdHashTable[i]); + } + + for (i = 0; i < OVS_ROUTE_HASH_TABLE_SIZE; i++) { + InitializeListHead(&ovsRouteHashTable[i]); + } + + for (i = 0; i < OVS_NEIGH_HASH_TABLE_SIZE; i++) { + InitializeListHead(&ovsNeighHashTable[i]); + } + + + KeInitializeEvent(&ovsIpHelperThreadContext.event, NotificationEvent, + FALSE); + status = OvsRegisterChangeNotification(); + ovsIpHelperThreadContext.exit = 0; + if (status == STATUS_SUCCESS) { + status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, + NULL, NULL, NULL, OvsStartIpHelper, + &ovsIpHelperThreadContext); + if (status != STATUS_SUCCESS) { + goto init_cleanup; + } + ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, + KernelMode, + &ovsIpHelperThreadContext.threadObject, + NULL); + ZwClose(threadHandle); + } + +init_cleanup: + + if (status != STATUS_SUCCESS) { + OvsCancelChangeNotification(); + if (ovsFwdHashTable) { + OvsFreeMemory(ovsFwdHashTable); + ovsFwdHashTable = NULL; + } + if (ovsRouteHashTable) { + OvsFreeMemory(ovsRouteHashTable); + ovsRouteHashTable = NULL; + } + if (ovsNeighHashTable) { + OvsFreeMemory(ovsNeighHashTable); + ovsNeighHashTable = NULL; + } + if (ovsTableLock) { + NdisFreeRWLock(ovsTableLock); + ovsTableLock = NULL; + } + NdisFreeSpinLock(&ovsIpHelperLock); + } + return STATUS_SUCCESS; +} + + +VOID +OvsCleanupIpHelper(VOID) +{ + OvsCancelChangeNotification(); + + NdisAcquireSpinLock(&ovsIpHelperLock); + ovsIpHelperThreadContext.exit = 1; + OvsWakeupIPHelper(); + NdisReleaseSpinLock(&ovsIpHelperLock); + + KeWaitForSingleObject(ovsIpHelperThreadContext.threadObject, Executive, + KernelMode, FALSE, NULL); + ObDereferenceObject(ovsIpHelperThreadContext.threadObject); + + OvsFreeMemory(ovsFwdHashTable); + OvsFreeMemory(ovsRouteHashTable); + OvsFreeMemory(ovsNeighHashTable); + + NdisFreeRWLock(ovsTableLock); + NdisFreeSpinLock(&ovsIpHelperLock); +} + +VOID +OvsCancelFwdIpHelperRequest(PNET_BUFFER_LIST nbl) +{ + PLIST_ENTRY link, next; + POVS_IP_HELPER_REQUEST req; + LIST_ENTRY list; + InitializeListHead(&list); + + NdisAcquireSpinLock(&ovsIpHelperLock); + LIST_FORALL_SAFE(&ovsIpHelperRequestList, link, next) { + req = CONTAINING_RECORD(link, OVS_IP_HELPER_REQUEST, link); + if (req->command == OVS_IP_HELPER_FWD_REQUEST && + (nbl == NULL || req->fwdReq.nbl == nbl)) { + RemoveEntryList(link); + InsertHeadList(&list, link); + if (nbl != NULL) { + break; + } + } + } + NdisReleaseSpinLock(&ovsIpHelperLock); + + LIST_FORALL_SAFE(&list, link, next) { + req = CONTAINING_RECORD(link, OVS_IP_HELPER_REQUEST, link); + if (req->fwdReq.cb) { + req->fwdReq.cb(req->fwdReq.nbl, req->fwdReq.inPort, + &req->fwdReq.tunnelKey, + req->fwdReq.cbData1, + req->fwdReq.cbData2, + STATUS_DEVICE_NOT_READY, + NULL); + } + OvsFreeMemory(req); + } +} diff --git a/datapath-windows/ovsext/OvsIpHelper.h b/datapath-windows/ovsext/OvsIpHelper.h new file mode 100644 index 000000000..dc2602bde --- /dev/null +++ b/datapath-windows/ovsext/OvsIpHelper.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_IP_HELPER_H_ +#define __OVS_IP_HELPER_H_ 1 + +#include <ntddk.h> +#include <netioapi.h> + +#define OVS_FWD_HASH_TABLE_SIZE ((UINT32)1 << 10) +#define OVS_FWD_HASH_TABLE_MASK (OVS_FWD_HASH_TABLE_SIZE - 1) + +#define OVS_ROUTE_HASH_TABLE_SIZE ((UINT32)1 << 8) +#define OVS_ROUTE_HASH_TABLE_MASK (OVS_ROUTE_HASH_TABLE_SIZE - 1) + +#define OVS_NEIGH_HASH_TABLE_SIZE ((UINT32)1 << 8) +#define OVS_NEIGH_HASH_TABLE_MASK (OVS_NEIGH_HASH_TABLE_SIZE - 1) + +#define OVS_IPNEIGH_TIMEOUT 100000000 // 10 s + + +typedef struct _OVS_IPNEIGH_ENTRY { + UINT8 macAddr[MAC_ADDRESS_LEN]; + UINT16 refCount; + UINT32 ipAddr; + UINT32 pad; + UINT64 timeout; + LIST_ENTRY link; + LIST_ENTRY slink; + LIST_ENTRY fwdList; +} OVS_IPNEIGH_ENTRY, *POVS_IPNEIGH_ENTRY; + +typedef struct _OVS_IPFORWARD_ENTRY { + IP_ADDRESS_PREFIX prefix; + UINT32 nextHop; + UINT16 refCount; + LIST_ENTRY link; + LIST_ENTRY fwdList; +} OVS_IPFORWARD_ENTRY, *POVS_IPFORWARD_ENTRY; + +typedef union _OVS_FWD_INFO { + struct { + UINT32 dstIpAddr; + UINT32 srcIpAddr; + UINT8 dstMacAddr[MAC_ADDRESS_LEN]; + UINT8 srcMacAddr[MAC_ADDRESS_LEN]; + UINT32 srcPortNo; + }; + UINT64 value[3]; +} OVS_FWD_INFO, *POVS_FWD_INFO; + +typedef struct _OVS_FWD_ENTRY { + OVS_FWD_INFO info; + POVS_IPFORWARD_ENTRY ipf; + POVS_IPNEIGH_ENTRY ipn; + LIST_ENTRY link; + LIST_ENTRY ipfLink; + LIST_ENTRY ipnLink; +} OVS_FWD_ENTRY, *POVS_FWD_ENTRY; + + +enum { + OVS_IP_HELPER_INTERNAL_ADAPTER_UP, + OVS_IP_HELPER_FWD_REQUEST, +}; + +typedef VOID (*OvsIPHelperCallback)(PNET_BUFFER_LIST nbl, + UINT32 inPort, + PVOID tunnelKey, + PVOID cbData1, + PVOID cbData2, + NTSTATUS status, + POVS_FWD_INFO fwdInfo); + +typedef struct _OVS_FWD_REQUEST_INFO { + PNET_BUFFER_LIST nbl; + UINT32 inPort; + OvsIPv4TunnelKey tunnelKey; + OvsIPHelperCallback cb; + PVOID cbData1; + PVOID cbData2; +} OVS_FWD_REQUEST_INFO, *POVS_FWD_REQUEST_INFO; + + +typedef struct _OVS_IP_HELPER_REQUEST { + LIST_ENTRY link; + UINT32 command; + union { + OVS_FWD_REQUEST_INFO fwdReq; + UINT32 dummy; + }; +} OVS_IP_HELPER_REQUEST, *POVS_IP_HELPER_REQUEST; + + +typedef struct _OVS_IP_HELPER_THREAD_CONTEXT { + KEVENT event; + PVOID threadObject; + UINT32 exit; +} OVS_IP_HELPER_THREAD_CONTEXT, *POVS_IP_HELPER_THREAD_CONTEXT; + +NTSTATUS OvsInitIpHelper(NDIS_HANDLE ndisFilterHandle); +VOID OvsCleanupIpHelper(VOID); + +VOID OvsInternalAdapterUp(UINT32 portNo, GUID *netCfgInstanceId); +VOID OvsInternalAdapterDown(VOID); + +NTSTATUS OvsFwdIPHelperRequest(PNET_BUFFER_LIST nbl, UINT32 inPort, + const PVOID tunnelKey, + OvsIPHelperCallback cb, + PVOID cbData1, + PVOID cbData2); +NTSTATUS OvsLookupIPFwdInfo(UINT32 dstIp, POVS_FWD_INFO info); +VOID OvsCancelFwdIpHelperRequest(PNET_BUFFER_LIST nbl); + +#endif /* __OVS_IP_HELPER_H_ */ diff --git a/datapath-windows/ovsext/OvsJhash.c b/datapath-windows/ovsext/OvsJhash.c new file mode 100644 index 000000000..db08d0b46 --- /dev/null +++ b/datapath-windows/ovsext/OvsJhash.c @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2008, 2009, 2010, 2012, 2014 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" + +static __inline UINT32 +GetUnalignedU32(const UINT32 *p_) +{ + const UINT8 *p = (const UINT8 *)p_; + return ntohl((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]); +} + +/* This is the public domain lookup3 hash by Bob Jenkins from + * http://burtleburtle.net/bob/c/lookup3.c, modified for style. */ + +static __inline UINT32 +JhashRot(UINT32 x, INT k) +{ + return (x << k) | (x >> (32 - k)); +} + +static __inline VOID +JhashMix(UINT32 *a, UINT32 *b, UINT32 *c) +{ + *a -= *c; *a ^= JhashRot(*c, 4); *c += *b; + *b -= *a; *b ^= JhashRot(*a, 6); *a += *c; + *c -= *b; *c ^= JhashRot(*b, 8); *b += *a; + *a -= *c; *a ^= JhashRot(*c, 16); *c += *b; + *b -= *a; *b ^= JhashRot(*a, 19); *a += *c; + *c -= *b; *c ^= JhashRot(*b, 4); *b += *a; +} + +static __inline VOID +JhashFinal(UINT32 *a, UINT32 *b, UINT32 *c) +{ + *c ^= *b; *c -= JhashRot(*b, 14); + *a ^= *c; *a -= JhashRot(*c, 11); + *b ^= *a; *b -= JhashRot(*a, 25); + *c ^= *b; *c -= JhashRot(*b, 16); + *a ^= *c; *a -= JhashRot(*c, 4); + *b ^= *a; *b -= JhashRot(*a, 14); + *c ^= *b; *c -= JhashRot(*b, 24); +} + +/* Returns the Jenkins hash of the 'n' 32-bit words at 'p', starting from + * 'basis'. 'p' must be properly aligned. + * + * Use hash_words() instead, unless you're computing a hash function whose + * value is exposed "on the wire" so we don't want to change it. */ +UINT32 +OvsJhashWords(const UINT32 *p, SIZE_T n, UINT32 basis) +{ + UINT32 a, b, c; + + a = b = c = 0xdeadbeef + (((UINT32) n) << 2) + basis; + + while (n > 3) { + a += p[0]; + b += p[1]; + c += p[2]; + JhashMix(&a, &b, &c); + n -= 3; + p += 3; + } + + switch (n) { + case 3: + c += p[2]; + /* fall through */ + case 2: + b += p[1]; + /* fall through */ + case 1: + a += p[0]; + JhashFinal(&a, &b, &c); + /* fall through */ + case 0: + break; + } + return c; +} + +/* Returns the Jenkins hash of the 'n' bytes at 'p', starting from 'basis'. + * + * Use hash_bytes() instead, unless you're computing a hash function whose + * value is exposed "on the wire" so we don't want to change it. */ +UINT32 +OvsJhashBytes(const VOID *p_, SIZE_T n, UINT32 basis) +{ + const UINT32 *p = p_; + UINT32 a, b, c; + + a = b = c = 0xdeadbeef + (UINT32)n + basis; + + while (n >= 12) { + a += GetUnalignedU32(p); + b += GetUnalignedU32(p + 1); + c += GetUnalignedU32(p + 2); + JhashMix(&a, &b, &c); + n -= 12; + p += 3; + } + + if (n) { + UINT32 tmp[3]; + + tmp[0] = tmp[1] = tmp[2] = 0; + memcpy(tmp, p, n); + a += tmp[0]; + b += tmp[1]; + c += tmp[2]; + JhashFinal(&a, &b, &c); + } + + return c; +} diff --git a/datapath-windows/ovsext/OvsJhash.h b/datapath-windows/ovsext/OvsJhash.h new file mode 100644 index 000000000..a12be8e27 --- /dev/null +++ b/datapath-windows/ovsext/OvsJhash.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2008, 2009, 2010, 2012, 2014 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_JHASH_H_ +#define __OVS_JHASH_H_ 1 + +/* This is the public domain lookup3 hash by Bob Jenkins from + * http://burtleburtle.net/bob/c/lookup3.c, modified for style. + * + * Use the functions in hash.h instead if you can. These are here just for + * places where we've exposed a hash function "on the wire" and don't want it + * to change. */ + +uint32_t OvsJhashWords(const uint32_t *, size_t n_word, uint32_t basis); +uint32_t OvsJhashBytes(const void *, size_t n_bytes, uint32_t basis); + +#endif /* __OVS_JHASH_H */ diff --git a/datapath-windows/ovsext/OvsNetProto.h b/datapath-windows/ovsext/OvsNetProto.h new file mode 100644 index 000000000..5e98206f5 --- /dev/null +++ b/datapath-windows/ovsext/OvsNetProto.h @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_NET_PROTO_H_ +#define __OVS_NET_PROTO_H_ 1 + +#include "precomp.h" +#include "OvsEth.h" + +#define ETH_ADDR_LENGTH 6 +/* + * There is a more inclusive definition of ethernet header (Eth_Header) in + * OvsEth.h that is used for packet parsing. For simple cases, , use the following definition. + */ +typedef struct EthHdr { + UINT8 Destination[ETH_ADDR_LENGTH]; + UINT8 Source[ETH_ADDR_LENGTH]; + UINT16 Type; +} EthHdr, *PEthHdr; + +#define IPV4 4 +#define IPV6 6 + +#define IP_HDR_MIN_LENGTH 20 +#define TCP_HDR_MIN_LENGTH 20 +#define TCP_CSUM_OFFSET 16 +#define UDP_CSUM_OFFSET 6 +#define ICMP_CSUM_OFFSET 2 +#define INET_CSUM_LENGTH (sizeof(UINT16)) + +#define IP4_UNITS_TO_BYTES(x) ((x) << 2) +#define IP4_BYTES_TO_UNITS(x) ((x) >> 2) + +// length unit for ip->ihl, tcp->doff +typedef UINT32 IP4UnitLength; + +#define IP4_LENGTH_UNIT (sizeof(IP4UnitLength)) +#define IP4_HDR_MIN_LENGTH_IN_UNITS (IP_HDR_MIN_LENGTH / IP4_LENGTH_UNIT) +#define TCP_HDR_MIN_LENGTH_IN_UNITS (TCP_HDR_MIN_LENGTH / IP4_LENGTH_UNIT) + +#define IP4_IHL_NO_OPTIONS IP4_HDR_MIN_LENGTH_IN_UNITS +#define IP4_HDR_LEN(iph) IP4_UNITS_TO_BYTES((iph)->ihl) + +// length unit for ip->frag_off +typedef UINT64 IP4FragUnitLength; + +#define IP4_FRAG_UNIT_LENGTH (sizeof(IP4FragUnitLength)) + +// length UINT for ipv6 header length. +typedef UINT64 IP6UnitLength; + +#define TCP_HDR_LEN(tcph) IP4_UNITS_TO_BYTES((tcph)->doff) +#define TCP_DATA_LENGTH(iph, tcph) (ntohs(iph->tot_len) - \ + IP4_HDR_LEN(iph) - TCP_HDR_LEN(tcph)) + +#define TCP_DATA_OFFSET_NO_OPTIONS TCP_HDR_MIN_LENGTH_IN_UNITS +#define TCP_DATA_OFFSET_WITH_TIMESTAMP 8 + +/* + * This is the maximum value for the length field in the IP header. The meaning + * varies with IP protocols: + * IPv4: the total ip length (including ip header and extention) + * IPv6: the IP payload length (including IP extensions) + */ +#define IP_MAX_PACKET 0xFFFF + +#define IPPROTO_ICMP 1 +#define IPPROTO_IGMP 2 +#define IPPROTO_UDP 17 +#define IPPROTO_TCP 6 +#define IPPROTO_RSVD 0xff + +#define IPPROTO_HOPOPTS 0 /* Hop-by-hop option header */ +#define IPPROTO_IPV6 41 /* IPv6 in IPv6 */ +#define IPPROTO_ROUTING 43 /* Routing header */ +#define IPPROTO_FRAGMENT 44 /* Fragmentation/reassembly header */ +#define IPPROTO_GRE 47 /* General Routing Encapsulation */ +#define IPPROTO_ESP 50 /* Encap. Security Payload */ +#define IPPROTO_AH 51 /* Authentication header */ +#define IPPROTO_ICMPV6 58 /* ICMP for IPv6 */ +#define IPPROTO_NONE 59 /* No next header */ +#define IPPROTO_DSTOPTS 60 /* Destination options header */ +#define IPPROTO_ETHERIP 97 /* etherIp tunneled protocol */ + +/* ICMPv6 types. */ +#define ND_NEIGHBOR_SOLICIT 135 /* neighbor solicitation */ +#define ND_NEIGHBOR_ADVERT 136 /* neighbor advertisment */ + +/* IPv6 Neighbor discovery option header. */ +#define ND_OPT_SOURCE_LINKADDR 1 +#define ND_OPT_TARGET_LINKADDR 2 + +/* Collides with MS definition (opposite order) */ +#define IP6F_OFF_HOST_ORDER_MASK 0xfff8 + +#define ARPOP_REQUEST 1 /* ARP request. */ +#define ARPOP_REPLY 2 /* ARP reply. */ +#define RARPOP_REQUEST 3 /* RARP request. */ +#define RARPOP_REPLY 4 /* RARP reply. */ + + /* all ARP NBO's assume short ar_op */ +#define ARPOP_REQUEST_NBO 0x0100 /* NBO ARP request. */ +#define ARPOP_REPLY_NBO 0x0200 /* NBO ARP reply. */ +#define RARPOP_REQUEST_NBO 0x0300 /* NBO RARP request. */ +#define RARPOP_REPLY_NBO 0x0300 /* NBO RARP reply. */ + +#define ICMP_ECHO 8 /* Echo Request */ +#define ICMP_ECHOREPLY 0 /* Echo Reply */ +#define ICMP_DEST_UNREACH 3 /* Destination Unreachable */ + +/* IGMP related constants */ +#define IGMP_UNKNOWN 0x00 /* For IGMP packets where we don't know the type */ + /* Eg: Fragmented packets without the header */ + +/* Constants from RFC 3376 */ +#define IGMP_QUERY 0x11 /* IGMP Host Membership Query. */ +#define IGMP_V1REPORT 0x12 /* IGMPv1 Host Membership Report. */ +#define IGMP_V2REPORT 0x16 /* IGMPv2 Host Membership Report. */ +#define IGMP_V3REPORT 0x22 /* IGMPv3 Host Membership Report. */ +#define IGMP_V2LEAVE 0x17 /* IGMPv2 Leave. */ + +/* Constants from RFC 2710 and RFC 3810 */ +#define MLD_QUERY 0x82 /* Multicast Listener Query. */ +#define MLD_V1REPORT 0x83 /* Multicast Listener V1 Report. */ +#define MLD_V2REPORT 0x8F /* Multicast Listener V2 Report. */ +#define MLD_DONE 0x84 /* Multicast Listener Done. */ + +/* IPv4 offset flags */ +#define IP_CE 0x8000 /* Flag: "Congestion" */ +#define IP_DF 0x4000 /* Flag: "Don't Fragment" */ +#define IP_MF 0x2000 /* Flag: "More Fragments" */ +#define IP_OFFSET 0x1FFF /* "Fragment Offset" part */ + +#define IP_OFFSET_NBO 0xFF1F /* "Fragment Offset" part, NBO */ +#define IP_DF_NBO 0x0040 /* NBO version of don't fragment */ +#define IP_MF_NBO 0x0020 /* NBO version of more fragments */ + +#define IPOPT_RTRALT 0x94 + +/* IP Explicit Congestion Notification bits (TOS field) */ +#define IP_ECN_NOT_ECT 0 +#define IP_ECN_ECT_1 1 +#define IP_ECN_ECT_0 2 +#define IP_ECN_CE 3 +#define IP_ECN_MASK 3 + +/* TCP options */ +#define TCP_OPT_NOP 1 /* Padding */ +#define TCP_OPT_EOL 0 /* End of options */ +#define TCP_OPT_MSS 2 /* Segment size negotiating */ +#define TCP_OPT_WINDOW 3 /* Window scaling */ +#define TCP_OPT_SACK_PERM 4 /* SACK Permitted */ +#define TCP_OPT_SACK 5 /* SACK Block */ +#define TCP_OPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ +#define TCP_OPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ + +#define TCP_OPT_LEN_MSS 4 +#define TCP_OPT_LEN_WINDOW 3 +#define TCP_OPT_LEN_SACK_PERM 2 +#define TCP_OPT_LEN_TIMESTAMP 10 +#define TCP_OPT_LEN_MD5SIG 18 + +#define SOCKET_IPPROTO_HOPOPTS IPPROTO_HOPOPTS +#define SOCKET_IPPROTO_ROUTING IPPROTO_ROUTING +#define SOCKET_IPPROTO_FRAGMENT IPPROTO_FRAGMENT +#define SOCKET_IPPROTO_AH IPPROTO_AH +#define SOCKET_IPPROTO_ICMPV6 IPPROTO_ICMPV6 +#define SOCKET_IPPROTO_NONE IPPROTO_NONE +#define SOCKET_IPPROTO_DSTOPTS IPPROTO_DSTOPTS +#define SOCKET_IPPROTO_EON 80 +#define SOCKET_IPPROTO_ETHERIP IPPROTO_ETHERIP +#define SOCKET_IPPROTO_ENCAP 98 +#define SOCKET_IPPROTO_PIM 103 +#define SOCKET_IPPROTO_IPCOMP 108 +#define SOCKET_IPPROTO_CARP 112 +#define SOCKET_IPPROTO_PFSYNC 240 +#define SOCKET_IPPROTO_RAW IPPROTO_RSVD + +typedef union _OVS_PACKET_HDR_INFO { + struct { + UINT16 l3Offset; + UINT16 l4Offset; + union { + UINT16 l7Offset; + UINT16 l4PayLoad; + }; + UINT16 isIPv4:1; + UINT16 isIPv6:1; + UINT16 isTcp:1; + UINT16 isUdp:1; + UINT16 tcpCsumNeeded:1; + UINT16 udpCsumNeeded:1; + UINT16 udpCsumZero:1; + UINT16 pad:9; + } ; + UINT64 value; +} OVS_PACKET_HDR_INFO, *POVS_PACKET_HDR_INFO; + +typedef struct IPHdr { + UINT8 ihl:4, + version:4; + UINT8 tos; + UINT16 tot_len; + UINT16 id; + UINT16 frag_off; + UINT8 ttl; + UINT8 protocol; + UINT16 check; + UINT32 saddr; + UINT32 daddr; +} IPHdr; + + + /* + * IPv6 fixed header + * + * BEWARE, it is incorrect. The first 4 bits of flow_lbl + * are glued to priority now, forming "class". + */ + +typedef struct IPv6Hdr { + UINT8 priority:4, + version:4; + UINT8 flow_lbl[3]; + + UINT16 payload_len; + UINT8 nexthdr; + UINT8 hop_limit; + + struct in6_addr saddr; + struct in6_addr daddr; +} IPv6Hdr; + +// Generic IPv6 extension header +typedef struct IPv6ExtHdr { + UINT8 nextHeader; // type of the next header + UINT8 hdrExtLen; // length of header extensions (beyond 8 bytes) + UINT16 optPad1; + UINT32 optPad2; +} IPv6ExtHdr; + +typedef struct IPv6FragHdr { + UINT8 nextHeader; + UINT8 reserved; + UINT16 offlg; + UINT32 ident; +} IPv6FragHdr; + +typedef struct IPv6NdOptHdr { + UINT8 type; + UINT8 len; +} IPv6NdOptHdr; + +typedef struct ICMPHdr { + UINT8 type; + UINT8 code; + UINT16 checksum; +} ICMPHdr; + +typedef struct ICMPEcho { + UINT16 id; + UINT16 seq; +} ICMPEcho; + +typedef struct UDPHdr { + UINT16 source; + UINT16 dest; + UINT16 len; + UINT16 check; +} UDPHdr; + +typedef struct TCPHdr { + UINT16 source; + UINT16 dest; + UINT32 seq; + UINT32 ack_seq; + UINT16 res1:4, + doff:4, + fin:1, + syn:1, + rst:1, + psh:1, + ack:1, + urg:1, + ece:1, + cwr:1; + UINT16 window; + UINT16 check; + UINT16 urg_ptr; +} TCPHdr; + +typedef struct PseudoHdr { + UINT32 sourceIPAddr; + UINT32 destIPAddr; + UINT8 zero; + UINT8 protocol; + UINT16 length; +} PseudoHdr; + +typedef struct PseudoHdrIPv6 { + UINT8 sourceIPAddr[16]; + UINT8 destIPAddr[16]; + UINT8 zero; + UINT8 protocol; + UINT16 length; +} PseudoHdrIPv6; + + +struct ArpHdr { + UINT16 ar_hrd; /* Format of hardware address. */ + UINT16 ar_pro; /* Format of protocol address. */ + UINT8 ar_hln; /* Length of hardware address. */ + UINT8 ar_pln; /* Length of protocol address. */ + UINT16 ar_op; /* ARP opcode (command). */ +}; + +typedef struct EtherArp { + struct ArpHdr ea_hdr; /* fixed-size header */ + Eth_Address arp_sha; /* sender hardware address */ + UINT8 arp_spa[4]; /* sender protocol address */ + Eth_Address arp_tha; /* target hardware address */ + UINT8 arp_tpa[4]; /* target protocol address */ +} EtherArp; + +typedef struct IGMPHdr { + UINT8 type; + UINT8 maxResponseTime; + UINT16 csum; + UINT8 groupAddr[4]; +} IGMPHdr; + +typedef struct IGMPV3Trailer { + UINT8 qrv:3, + s:1, + resv:4; + UINT8 qqic; + UINT16 numSources; +} IGMPV3Trailer; + +typedef struct IPOpt { + UINT8 type; + UINT8 length; + UINT16 value; +} IPOpt; + +/* + * IP protocol types + */ +#define SOCKET_IPPROTO_IP 0 +#define SOCKET_IPPROTO_ICMP 1 +#define SOCKET_IPPROTO_TCP 6 +#define SOCKET_IPPROTO_UDP 17 +#define SOCKET_IPPROTO_GRE 47 + +#endif /* __OVS_NET_PROTO_H_ */ diff --git a/datapath-windows/ovsext/OvsOid.c b/datapath-windows/ovsext/OvsOid.c new file mode 100644 index 000000000..487191ad1 --- /dev/null +++ b/datapath-windows/ovsext/OvsOid.c @@ -0,0 +1,855 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#include "OvsIoctl.h" +#include "OvsSwitch.h" +#include "OvsVport.h" +#include "OvsNetProto.h" +#include "OvsUser.h" +#include "OvsFlow.h" +#include "OvsEvent.h" +#include "OvsUser.h" +#include "OvsOid.h" + +/* Due to an imported header file */ +#pragma warning( disable:4505 ) + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_DISPATCH +#include "OvsDebug.h" + +typedef struct _OVS_OID_CONTEXT { + NDIS_EVENT oidComplete; + NDIS_STATUS status; +} OVS_OID_CONTEXT, *POVS_OID_CONTEXT; + + +VOID +OvsExtOidRequestComplete(NDIS_HANDLE filterModuleContext, + PNDIS_OID_REQUEST oidRequest, + NDIS_STATUS status); +static VOID +OvsOidRequestCompleteMethod(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest, + PNDIS_OID_REQUEST origOidRequest, + NDIS_STATUS status); +static VOID +OvsOidRequestCompleteSetInfo(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest, + PNDIS_OID_REQUEST origOidRequest, + NDIS_STATUS status); +static VOID +OvsOidRequestCompleteQuery(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest, + PNDIS_OID_REQUEST origOidRequest, + NDIS_STATUS status); + +static NDIS_STATUS +OvsProcessSetOidPortProp(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest); +static NDIS_STATUS +OvsProcessSetOidPort(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest); +static NDIS_STATUS +OvsProcessSetOidNic(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest); + +__inline BOOLEAN +OvsCheckOidHeaderFunc(PNDIS_OBJECT_HEADER header, + LONG propRev, + LONG propSize) +{ + return header->Type != NDIS_OBJECT_TYPE_DEFAULT || + header->Revision < propRev || + header->Size < propSize; +} + +#define OvsCheckOidHeader(_hdr, _rev) \ + OvsCheckOidHeaderFunc(_hdr, _rev, ##NDIS_SIZEOF_##_rev) + +static __inline VOID +OvsOidSetOrigRequest(PNDIS_OID_REQUEST clonedRequest, + PNDIS_OID_REQUEST origRequest) +{ + *(PVOID*)(&clonedRequest->SourceReserved[0]) = origRequest; +} + +static __inline PNDIS_OID_REQUEST +OvsOidGetOrigRequest(PNDIS_OID_REQUEST clonedRequest) +{ + return *((PVOID*)(&clonedRequest->SourceReserved[0])); +} + +static __inline VOID +OvsOidSetContext(PNDIS_OID_REQUEST clonedRequest, + POVS_OID_CONTEXT origRequest) +{ + *(PVOID*)(&clonedRequest->SourceReserved[8]) = origRequest; +} + +static __inline POVS_OID_CONTEXT +OvsOidGetContext(PNDIS_OID_REQUEST clonedRequest) +{ + return *((PVOID*)(&clonedRequest->SourceReserved[8])); +} + +static NDIS_STATUS +OvsProcessSetOidPortProp(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION); + PNDIS_SWITCH_PORT_PROPERTY_PARAMETERS portPropParam = + setInfo->InformationBuffer; + BOOLEAN checkFailed = TRUE; + + UNREFERENCED_PARAMETER(switchObject); + + if (setInfo->Oid == OID_SWITCH_PORT_PROPERTY_DELETE) { + checkFailed = OvsCheckOidHeader( + (PNDIS_OBJECT_HEADER)portPropParam, + NDIS_SWITCH_PORT_PROPERTY_DELETE_PARAMETERS_REVISION_1); + } else { + /* it must be a add or update request */ + checkFailed = OvsCheckOidHeader( + (PNDIS_OBJECT_HEADER)portPropParam, + NDIS_SWITCH_PORT_PROPERTY_PARAMETERS_REVISION_1); + } + + if (checkFailed) { + status = NDIS_STATUS_INVALID_PARAMETER; + goto done; + } + + if (portPropParam->PropertyType == NdisSwitchPortPropertyTypeVlan) { + status = NDIS_STATUS_NOT_SUPPORTED; + goto done; + } + +done: + return status; +} + +static NDIS_STATUS +OvsProcessSetOidPort(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION); + PNDIS_SWITCH_PORT_PARAMETERS portParam = setInfo->InformationBuffer; + + if (OvsCheckOidHeader((PNDIS_OBJECT_HEADER)portParam, + NDIS_SWITCH_PORT_PARAMETERS_REVISION_1)) { + status = NDIS_STATUS_NOT_SUPPORTED; + goto done; + } + + switch(setInfo->Oid) { + case OID_SWITCH_PORT_CREATE: + status = OvsCreatePort(switchObject, portParam); + break; + case OID_SWITCH_PORT_TEARDOWN: + OvsTeardownPort(switchObject, portParam); + break; + case OID_SWITCH_PORT_DELETE: + OvsDeletePort(switchObject, portParam); + break; + default: + break; + } + +done: + return status; +} + +static NDIS_STATUS +OvsProcessSetOidNic(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION); + PNDIS_SWITCH_NIC_PARAMETERS nicParam = setInfo->InformationBuffer; + + if (OvsCheckOidHeader((PNDIS_OBJECT_HEADER)nicParam, + NDIS_SWITCH_NIC_PARAMETERS_REVISION_1)) { + status = NDIS_STATUS_NOT_SUPPORTED; + goto done; + } + + switch(setInfo->Oid) { + case OID_SWITCH_NIC_CREATE: + status = OvsCreateNic(switchObject, nicParam); + break; + case OID_SWITCH_NIC_CONNECT: + OvsConnectNic(switchObject, nicParam); + break; + case OID_SWITCH_NIC_UPDATED: + OvsUpdateNic(switchObject, nicParam); + break; + case OID_SWITCH_NIC_DISCONNECT: + OvsDisconnectNic(switchObject, nicParam); + break; + case OID_SWITCH_NIC_DELETE: + OvsDeleteNic(switchObject, nicParam); + break; + default: + break; + } + +done: + return status; + +} + +static NDIS_STATUS +OvsProcessSetOid(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest, + PBOOLEAN complete) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION); + + *complete = FALSE; + + OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu", + oidRequest, setInfo->Oid); + + /* Verify the basic Oid paramters first */ + if (setInfo->InformationBufferLength && + (setInfo->InformationBufferLength < sizeof(NDIS_OBJECT_HEADER))) { + status = NDIS_STATUS_INVALID_OID; + OVS_LOG_INFO("Invalid input %d", setInfo->InformationBufferLength); + goto error; + } + + /* Documentation does not specify what should be done + * if informationBuffer is not present. Although it mentions the + * structure type informationBUffer points to for each oid request, + * but it does not explicitly mention that it is a MUST. + * hence we are following this scenario same way as what sample code + * mentions. */ + if (!(setInfo->InformationBufferLength)) { + /* We cannot do anything about this oid request, + * lets just pass it down. */ + OVS_LOG_INFO("Buffer Length Zero"); + goto done; + } + + switch(setInfo->Oid) { + case OID_SWITCH_PORT_PROPERTY_ADD: + case OID_SWITCH_PORT_PROPERTY_UPDATE: + case OID_SWITCH_PORT_PROPERTY_DELETE: + status = OvsProcessSetOidPortProp(switchObject, oidRequest); + break; + + case OID_SWITCH_PORT_CREATE: + case OID_SWITCH_PORT_UPDATED: + case OID_SWITCH_PORT_TEARDOWN: + case OID_SWITCH_PORT_DELETE: + status = OvsProcessSetOidPort(switchObject, oidRequest); + break; + + case OID_SWITCH_NIC_CREATE: + case OID_SWITCH_NIC_CONNECT: + case OID_SWITCH_NIC_UPDATED: + case OID_SWITCH_NIC_DISCONNECT: + case OID_SWITCH_NIC_DELETE: + status = OvsProcessSetOidNic(switchObject, oidRequest); + break; + + default: + /* Non handled OID request */ + break; + } + + if (status != NDIS_STATUS_SUCCESS) { + goto error; + } + + goto done; + +error: + *complete = TRUE; +done: + OVS_LOG_TRACE("Exit: status %8x.", status); + return status; +} + +static NDIS_STATUS +OvsProcessMethodOid(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest, + PBOOLEAN complete, + PULONG bytesNeededParam) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + struct _METHOD *methodInfo = &(oidRequest->DATA.METHOD_INFORMATION); + struct _SET *nicReqSetInfo = NULL; + PNDIS_OBJECT_HEADER header = NULL; + PNDIS_OID_REQUEST nicOidRequest = NULL; + + UNREFERENCED_PARAMETER(switchObject); + + OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu", + oidRequest, methodInfo->Oid); + + *complete = FALSE; + *bytesNeededParam = 0; + header = methodInfo->InformationBuffer; + + switch(methodInfo->Oid) { + /* We deal with only OID_SWITCH_NIC_REQUEST as of now */ + case OID_SWITCH_NIC_REQUEST: + if (OvsCheckOidHeader(header, + NDIS_SWITCH_NIC_OID_REQUEST_REVISION_1)) { + OVS_LOG_INFO("Check Header failed"); + status = NDIS_STATUS_NOT_SUPPORTED; + *complete = TRUE; + goto done; + } + + nicOidRequest = (((PNDIS_SWITCH_NIC_OID_REQUEST)header)->OidRequest); + nicReqSetInfo = &(nicOidRequest->DATA.SET_INFORMATION); + + /* Fail the SR-IOV VF case */ + if ((nicOidRequest->RequestType == NdisRequestSetInformation) && + (nicReqSetInfo->Oid == OID_NIC_SWITCH_ALLOCATE_VF)) { + OVS_LOG_INFO("We do not support Oid: " + "OID_NIC_SWITCH_ALLOCATE_VF"); + status = NDIS_STATUS_FAILURE; + *complete = TRUE; + } + break; + default: + /* No op */ + break; + } + +done: + OVS_LOG_TRACE("Exit: status %8x.", status); + return status; +} + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterOidRequest function. + * -------------------------------------------------------------------------- + */ + +NDIS_STATUS +OvsExtOidRequest(NDIS_HANDLE filterModuleContext, + PNDIS_OID_REQUEST oidRequest) +{ + POVS_SWITCH_CONTEXT switchObject = (POVS_SWITCH_CONTEXT)filterModuleContext; + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + PNDIS_OID_REQUEST clonedOidRequest = NULL; + struct _METHOD *methodInfo = &(oidRequest->DATA.METHOD_INFORMATION); + BOOLEAN completeOid = FALSE; + ULONG bytesNeeded = 0; + + OVS_LOG_TRACE("Enter: oidRequest %p, reqType: %d", + oidRequest, oidRequest->RequestType); + status = NdisAllocateCloneOidRequest(switchObject->NdisFilterHandle, + oidRequest, OVS_MEMORY_TAG, + &clonedOidRequest); + if (status != NDIS_STATUS_SUCCESS) { + goto done; + } + + NdisInterlockedIncrement(&(switchObject->pendingOidCount)); + + /* set the original oid request in cloned one. */ + OvsOidSetOrigRequest(clonedOidRequest, oidRequest); + OvsOidSetContext(clonedOidRequest, NULL); + + switch(clonedOidRequest->RequestType) { + case NdisRequestSetInformation: + status = OvsProcessSetOid(switchObject, clonedOidRequest, + &completeOid); + break; + case NdisRequestMethod: + status = OvsProcessMethodOid(switchObject, clonedOidRequest, + &completeOid, &bytesNeeded); + break; + default: + /* We do not handle other request types as of now. + * We are just a passthrough for those. */ + break; + } + + if (completeOid == TRUE) { + /* dont leave any reference back to original request, + * even if we are freeing it up. */ + OVS_LOG_INFO("Complete True oidRequest %p.", oidRequest); + OvsOidSetOrigRequest(clonedOidRequest, NULL); + NdisFreeCloneOidRequest(switchObject->NdisFilterHandle, + clonedOidRequest); + methodInfo->BytesNeeded = bytesNeeded; + NdisInterlockedDecrement(&switchObject->pendingOidCount); + goto done; + } + + /* pass the request down */ + status = NdisFOidRequest(switchObject->NdisFilterHandle, clonedOidRequest); + if (status != NDIS_STATUS_PENDING) { + OvsExtOidRequestComplete(switchObject, clonedOidRequest, status); + /* sample code says so */ + status = NDIS_STATUS_PENDING; + } + +done: + OVS_LOG_TRACE("Exit: status %8x.", status); + return status; +} + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterOidRequestComplete function. + * -------------------------------------------------------------------------- + */ +VOID +OvsExtOidRequestComplete(NDIS_HANDLE filterModuleContext, + PNDIS_OID_REQUEST oidRequest, + NDIS_STATUS status) +{ + POVS_SWITCH_CONTEXT switchObject = (POVS_SWITCH_CONTEXT)filterModuleContext; + PNDIS_OID_REQUEST origReq = OvsOidGetOrigRequest(oidRequest); + POVS_OID_CONTEXT oidContext = OvsOidGetContext(oidRequest); + + /* Only one of the two should be set */ + ASSERT(origReq != NULL || oidContext != NULL); + ASSERT(oidContext != NULL || origReq != NULL); + + OVS_LOG_TRACE("Enter: oidRequest %p, reqType: %d", + oidRequest, oidRequest->RequestType); + + if (origReq == NULL) { + NdisInterlockedDecrement(&(switchObject->pendingOidCount)); + oidContext->status = status; + NdisSetEvent(&oidContext->oidComplete); + OVS_LOG_INFO("Internally generated request"); + goto done; + } + + switch(oidRequest->RequestType) { + case NdisRequestMethod: + OvsOidRequestCompleteMethod(switchObject, oidRequest, + origReq, status); + break; + + case NdisRequestSetInformation: + OvsOidRequestCompleteSetInfo(switchObject, oidRequest, + origReq, status); + break; + + case NdisRequestQueryInformation: + case NdisRequestQueryStatistics: + default: + OvsOidRequestCompleteQuery(switchObject, oidRequest, + origReq, status); + break; + } + + OvsOidSetOrigRequest(oidRequest, NULL); + + NdisFreeCloneOidRequest(switchObject->NdisFilterHandle, oidRequest); + NdisFOidRequestComplete(switchObject->NdisFilterHandle, origReq, status); + NdisInterlockedDecrement(&(switchObject->pendingOidCount)); + +done: + OVS_LOG_TRACE("Exit"); +} + +static VOID +OvsOidRequestCompleteMethod(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest, + PNDIS_OID_REQUEST origOidRequest, + NDIS_STATUS status) +{ + UNREFERENCED_PARAMETER(status); + UNREFERENCED_PARAMETER(switchObject); + + struct _METHOD *methodInfo = &(oidRequest->DATA.METHOD_INFORMATION); + struct _METHOD *origMethodInfo = &(origOidRequest->DATA. + METHOD_INFORMATION); + + OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu", + oidRequest, methodInfo->Oid); + + origMethodInfo->OutputBufferLength = methodInfo->OutputBufferLength; + origMethodInfo->BytesRead = methodInfo->BytesRead; + origMethodInfo->BytesNeeded = methodInfo->BytesNeeded; + origMethodInfo->BytesWritten = methodInfo->BytesWritten; + + OVS_LOG_TRACE("Exit"); +} + +static VOID +OvsOidRequestCompleteSetInfo(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest, + PNDIS_OID_REQUEST origOidRequest, + NDIS_STATUS status) +{ + struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION); + struct _SET *origSetInfo = &(origOidRequest->DATA.SET_INFORMATION); + PNDIS_OBJECT_HEADER origHeader = origSetInfo->InformationBuffer; + + OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu", + oidRequest, setInfo->Oid); + + origSetInfo->BytesRead = setInfo->BytesRead; + origSetInfo->BytesNeeded = setInfo->BytesNeeded; + + if (status != NDIS_STATUS_SUCCESS) { + + switch(setInfo->Oid) { + case OID_SWITCH_PORT_CREATE: + OvsDeletePort(switchObject, + (PNDIS_SWITCH_PORT_PARAMETERS)origHeader); + break; + + case OID_SWITCH_NIC_CREATE: + OvsDeleteNic(switchObject, + (PNDIS_SWITCH_NIC_PARAMETERS)origHeader); + break; + + default: + break; + } + } + + OVS_LOG_TRACE("Exit"); +} + +static VOID +OvsOidRequestCompleteQuery(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest, + PNDIS_OID_REQUEST origOidRequest, + NDIS_STATUS status) +{ + UNREFERENCED_PARAMETER(switchObject); + UNREFERENCED_PARAMETER(status); + + struct _QUERY *queryInfo = &((oidRequest->DATA).QUERY_INFORMATION); + struct _QUERY *origQueryInfo = &((origOidRequest->DATA).QUERY_INFORMATION); + + OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu", + oidRequest, queryInfo->Oid); + + origQueryInfo->BytesWritten = queryInfo->BytesWritten; + origQueryInfo->BytesNeeded = queryInfo->BytesNeeded; + + OVS_LOG_TRACE("Exit"); +} + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterCancelOidRequest function. + * -------------------------------------------------------------------------- + */ +VOID +OvsExtCancelOidRequest(NDIS_HANDLE filterModuleContext, + PVOID requestId) +{ + OVS_LOG_TRACE("Enter: requestId: %p", requestId); + + UNREFERENCED_PARAMETER(filterModuleContext); + UNREFERENCED_PARAMETER(requestId); +} + + +/* + * -------------------------------------------------------------------------- + * Utility function to issue the specified OID to the NDIS stack. The OID is + * directed towards the miniport edge of the extensible switch. + * An OID that gets issued may not complete immediately, and in such cases, the + * function waits for the OID to complete. Thus, this function must not be + * called at the PASSIVE_LEVEL. + * -------------------------------------------------------------------------- + */ +static NDIS_STATUS +OvsIssueOidRequest(POVS_SWITCH_CONTEXT switchContext, + NDIS_REQUEST_TYPE oidType, + UINT32 oidRequestEnum, + PVOID oidInputBuffer, + UINT32 inputSize, + PVOID oidOutputBuffer, + UINT32 outputSize, + UINT32 *outputSizeNeeded) +{ + NDIS_STATUS status; + PNDIS_OID_REQUEST oidRequest; + POVS_OID_CONTEXT oidContext; + ULONG OvsExtOidRequestId = 'ISVO'; + + DBG_UNREFERENCED_PARAMETER(inputSize); + DBG_UNREFERENCED_PARAMETER(oidInputBuffer); + + OVS_LOG_TRACE("Enter: switchContext: %p, oidType: %d", + switchContext, oidType); + + ASSERT(oidInputBuffer == NULL || inputSize != 0); + ASSERT(oidOutputBuffer == NULL || outputSize != 0); + ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); + + oidRequest = OvsAllocateMemory(sizeof *oidRequest); + if (!oidRequest) { + status = NDIS_STATUS_RESOURCES; + goto done; + } + + oidContext = OvsAllocateMemory(sizeof *oidContext); + if (!oidContext) { + OvsFreeMemory(oidRequest); + status = NDIS_STATUS_RESOURCES; + goto done; + } + + RtlZeroMemory(oidRequest, sizeof *oidRequest); + RtlZeroMemory(oidContext, sizeof *oidContext); + + oidRequest->Header.Type = NDIS_OBJECT_TYPE_OID_REQUEST; + oidRequest->Header.Revision = NDIS_OID_REQUEST_REVISION_1; + oidRequest->Header.Size = NDIS_SIZEOF_OID_REQUEST_REVISION_1; + + oidRequest->RequestType = oidType; + oidRequest->PortNumber = 0; + oidRequest->Timeout = 0; + oidRequest->RequestId = (PVOID)OvsExtOidRequestId; + + switch(oidType) { + case NdisRequestQueryInformation: + oidRequest->DATA.QUERY_INFORMATION.Oid = oidRequestEnum; + oidRequest->DATA.QUERY_INFORMATION.InformationBuffer = oidOutputBuffer; + oidRequest->DATA.QUERY_INFORMATION.InformationBufferLength = outputSize; + break; + default: + ASSERT(FALSE); + status = NDIS_STATUS_INVALID_PARAMETER; + break; + } + + /* + * We make use of the SourceReserved field in the OID request to store + * pointers to the original OID (if any), and also context for completion + * (if any). + */ + oidContext->status = NDIS_STATUS_SUCCESS; + NdisInitializeEvent(&oidContext->oidComplete); + + OvsOidSetOrigRequest(oidRequest, NULL); + OvsOidSetContext(oidRequest, oidContext); + + NdisInterlockedIncrement(&(switchContext->pendingOidCount)); + status = NdisFOidRequest(switchContext->NdisFilterHandle, oidRequest); + if (status == NDIS_STATUS_PENDING) { + NdisWaitEvent(&oidContext->oidComplete, 0); + } else { + NdisInterlockedDecrement(&(switchContext->pendingOidCount)); + } + + if (status == NDIS_STATUS_INVALID_LENGTH || + oidContext->status == NDIS_STATUS_INVALID_LENGTH) { + switch(oidType) { + case NdisRequestQueryInformation: + *outputSizeNeeded = oidRequest->DATA.QUERY_INFORMATION.BytesNeeded; + } + } + + status = oidContext->status; + ASSERT(status != NDIS_STATUS_PENDING); + + OvsFreeMemory(oidRequest); + OvsFreeMemory(oidContext); + +done: + OVS_LOG_TRACE("Exit: status %8x.", status); + return status; +} + + +/* + * -------------------------------------------------------------------------- + * Utility function to query if the extensible switch has completed activation + * successfully. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsQuerySwitchActivationComplete(POVS_SWITCH_CONTEXT switchContext, + BOOLEAN *switchActive) +{ + NDIS_STATUS status; + PNDIS_SWITCH_PARAMETERS switchParams; + UINT32 outputSizeNeeded; + + OVS_LOG_TRACE("Enter: switchContext: %p, switchActive: %p", + switchContext, switchActive); + + switchParams = OvsAllocateMemory(sizeof *switchParams); + if (!switchParams) { + status = NDIS_STATUS_RESOURCES; + goto done; + } + + /* + * Even though 'switchParms' is supposed to be populated by the OID, it + * needs to be initialized nevertheless. Otherwise, OID returns + * NDIS_STATUS_INVALID_PARAMETER. This is not clear in the documentation. + */ + RtlZeroMemory(switchParams, sizeof *switchParams); + switchParams->Header.Revision = NDIS_SWITCH_PARAMETERS_REVISION_1; + switchParams->Header.Type = NDIS_OBJECT_TYPE_DEFAULT; + switchParams->Header.Size = NDIS_SIZEOF_NDIS_SWITCH_PARAMETERS_REVISION_1; + + status = OvsIssueOidRequest(switchContext, NdisRequestQueryInformation, + OID_SWITCH_PARAMETERS, NULL, 0, + (PVOID)switchParams, sizeof *switchParams, + &outputSizeNeeded); + + ASSERT(status != NDIS_STATUS_INVALID_LENGTH); + ASSERT(status != NDIS_STATUS_PENDING); + if (status == NDIS_STATUS_SUCCESS) { + ASSERT(switchParams->Header.Type == NDIS_OBJECT_TYPE_DEFAULT); + ASSERT(switchParams->Header.Revision == NDIS_SWITCH_PARAMETERS_REVISION_1); + ASSERT(switchParams->Header.Size == + NDIS_SIZEOF_NDIS_SWITCH_PARAMETERS_REVISION_1); + *switchActive = switchParams->IsActive; + } + + OvsFreeMemory(switchParams); + +done: + OVS_LOG_TRACE("Exit: status %8x, switchActive: %d.", + status, *switchActive); + return status; +} + + +/* + * -------------------------------------------------------------------------- + * Utility function to get the array of ports on the extensible switch. Upon + * success, the caller needs to free the returned array. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsGetPortsOnSwitch(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_PORT_ARRAY *portArrayOut) +{ + PNDIS_SWITCH_PORT_ARRAY portArray; + UINT32 arraySize = sizeof *portArray; + NDIS_STATUS status = NDIS_STATUS_FAILURE; + + OVS_LOG_TRACE("Enter: switchContext: %p, portArray: %p", + switchContext, portArrayOut); + do { + UINT32 reqdArraySize; + + portArray = OvsAllocateMemory(arraySize); + if (!portArray) { + status = NDIS_STATUS_RESOURCES; + goto done; + } + + /* + * Even though 'portArray' is supposed to be populated by the OID, it + * needs to be initialized nevertheless. Otherwise, OID returns + * NDIS_STATUS_INVALID_PARAMETER. This is not clear in the documentation. + */ + RtlZeroMemory(portArray, sizeof *portArray); + portArray->Header.Revision = NDIS_SWITCH_PORT_ARRAY_REVISION_1; + portArray->Header.Type = NDIS_OBJECT_TYPE_DEFAULT; + portArray->Header.Size = NDIS_SIZEOF_NDIS_SWITCH_PORT_ARRAY_REVISION_1; + + status = OvsIssueOidRequest(switchContext, NdisRequestQueryInformation, + OID_SWITCH_PORT_ARRAY, NULL, 0, + (PVOID)portArray, arraySize, + &reqdArraySize); + if (status == NDIS_STATUS_SUCCESS) { + *portArrayOut = portArray; + break; + } + + OvsFreeMemory(portArray); + arraySize = reqdArraySize; + if (status != NDIS_STATUS_INVALID_LENGTH) { + break; + } + } while(status == NDIS_STATUS_INVALID_LENGTH); + +done: + OVS_LOG_TRACE("Exit: status %8x.", status); + return status; +} + + +/* + * -------------------------------------------------------------------------- + * Utility function to get the array of nics on the extensible switch. Upon + * success, the caller needs to free the returned array. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsGetNicsOnSwitch(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_ARRAY *nicArrayOut) +{ + PNDIS_SWITCH_NIC_ARRAY nicArray; + UINT32 arraySize = sizeof *nicArray; + NDIS_STATUS status = NDIS_STATUS_FAILURE; + + OVS_LOG_TRACE("Enter: switchContext: %p, nicArray: %p", + switchContext, nicArrayOut); + + do { + UINT32 reqdArraySize; + + nicArray = OvsAllocateMemory(arraySize); + if (!nicArray) { + status = NDIS_STATUS_RESOURCES; + goto done; + } + + /* + * Even though 'nicArray' is supposed to be populated by the OID, it + * needs to be initialized nevertheless. Otherwise, OID returns + * NDIS_STATUS_INVALID_PARAMETER. This is not clear in the documentation. + */ + RtlZeroMemory(nicArray, sizeof *nicArray); + nicArray->Header.Revision = NDIS_SWITCH_NIC_ARRAY_REVISION_1; + nicArray->Header.Type = NDIS_OBJECT_TYPE_DEFAULT; + nicArray->Header.Size = NDIS_SIZEOF_NDIS_SWITCH_NIC_ARRAY_REVISION_1; + + status = OvsIssueOidRequest(switchContext, NdisRequestQueryInformation, + OID_SWITCH_NIC_ARRAY, NULL, 0, + (PVOID)nicArray, arraySize, + &reqdArraySize); + if (status == NDIS_STATUS_SUCCESS) { + *nicArrayOut = nicArray; + break; + } + + OvsFreeMemory(nicArray); + arraySize = reqdArraySize; + if (status != NDIS_STATUS_INVALID_LENGTH) { + break; + } + } while(status == NDIS_STATUS_INVALID_LENGTH); + +done: + OVS_LOG_TRACE("Exit: status %8x.", status); + return status; +} diff --git a/datapath-windows/ovsext/OvsOid.h b/datapath-windows/ovsext/OvsOid.h new file mode 100644 index 000000000..40a5ec69a --- /dev/null +++ b/datapath-windows/ovsext/OvsOid.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_OID_H_ +#define __OVS_OID_H_ 1 + +NDIS_STATUS OvsQuerySwitchActivationComplete(POVS_SWITCH_CONTEXT switchContext, + BOOLEAN *switchActive); +NDIS_STATUS OvsGetPortsOnSwitch(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_PORT_ARRAY *portArrayOut); +NDIS_STATUS OvsGetNicsOnSwitch(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_ARRAY *nicArrayOut); +#endif /* __OVS_OID_H_ */ diff --git a/datapath-windows/ovsext/OvsPacketIO.c b/datapath-windows/ovsext/OvsPacketIO.c new file mode 100644 index 000000000..39e57036e --- /dev/null +++ b/datapath-windows/ovsext/OvsPacketIO.c @@ -0,0 +1,441 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains the implementation of the datapath/forwarding + * functionality of the OVS. + */ + +#include "precomp.h" +#include "OvsIoctl.h" +#include "OvsSwitch.h" +#include "OvsVport.h" +#include "OvsNetProto.h" +#include "OvsUser.h" +#include "OvsPacketIO.h" +#include "OvsFlow.h" +#include "OvsEvent.h" +#include "OvsUser.h" + +/* Due to an imported header file */ +#pragma warning( disable:4505 ) + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_DISPATCH +#include "OvsDebug.h" + +extern NDIS_STRING ovsExtGuidUC; +extern NDIS_STRING ovsExtFriendlyNameUC; + +static VOID OvsFinalizeCompletionList(OvsCompletionList *completionList); +static VOID OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST netBufferLists, ULONG sendCompleteFlags); + +__inline VOID +OvsInitCompletionList(OvsCompletionList *completionList, + POVS_SWITCH_CONTEXT switchContext, + ULONG sendCompleteFlags) +{ + ASSERT(completionList); + completionList->dropNbl = NULL; + completionList->dropNblNext = &completionList->dropNbl; + completionList->switchContext = switchContext; + completionList->sendCompleteFlags = sendCompleteFlags; +} + +/* Utility function used to complete an NBL. */ +__inline VOID +OvsAddPktCompletionList(OvsCompletionList *completionList, + BOOLEAN incoming, + NDIS_SWITCH_PORT_ID sourcePort, + PNET_BUFFER_LIST netBufferList, + UINT32 netBufferListCount, + PNDIS_STRING filterReason) +{ + POVS_BUFFER_CONTEXT ctx; + + /* XXX: We handle one NBL at a time. */ + ASSERT(netBufferList->Next == NULL); + + /* Make sure it has a context. */ + ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(netBufferList); + ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC); + + completionList->switchContext->NdisSwitchHandlers.ReportFilteredNetBufferLists( + completionList->switchContext->NdisSwitchContext, &ovsExtGuidUC, + &ovsExtFriendlyNameUC, sourcePort, + incoming ? NDIS_SWITCH_REPORT_FILTERED_NBL_FLAGS_IS_INCOMING : 0, + netBufferListCount, netBufferList, filterReason); + + *completionList->dropNblNext = netBufferList; + completionList->dropNblNext = &netBufferList->Next; + ASSERT(completionList->dropNbl); +} + +static __inline VOID +OvsReportNBLIngressError(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST nblList, + PNDIS_STRING filterReason, + NDIS_STATUS error) +{ + PNET_BUFFER_LIST nbl = nblList; + while (nbl) { + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail; + fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl); + + nbl->Status = error; + + /* This can be optimized by batching NBL's from the same + * SourcePortId. */ + switchContext->NdisSwitchHandlers.ReportFilteredNetBufferLists( + switchContext->NdisSwitchContext, &ovsExtGuidUC, + &ovsExtFriendlyNameUC, fwdDetail->SourcePortId, + NDIS_SWITCH_REPORT_FILTERED_NBL_FLAGS_IS_INCOMING, + 1 /*Nbl count.*/, nbl, filterReason); + + nbl = NET_BUFFER_LIST_NEXT_NBL(nbl); + } +} + +static __inline ULONG +OvsGetSendCompleteFlags(ULONG sendFlags) +{ + BOOLEAN dispatch, sameSource; + ULONG sendCompleteFlags; + + dispatch = NDIS_TEST_SEND_AT_DISPATCH_LEVEL(sendFlags); + sendCompleteFlags = (dispatch ? + NDIS_SEND_COMPLETE_FLAGS_DISPATCH_LEVEL : 0); + sameSource = NDIS_TEST_SEND_FLAG(sendFlags, + NDIS_SEND_FLAGS_SWITCH_SINGLE_SOURCE); + sendCompleteFlags |= (sameSource ? + NDIS_SEND_COMPLETE_FLAGS_SWITCH_SINGLE_SOURCE : 0); + + return sendCompleteFlags; +} + +VOID +OvsSendNBLIngress(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST netBufferLists, + ULONG sendFlags) +{ + if (switchContext->dataFlowState == OvsSwitchPaused) { + /* If a filter module is in the Paused state, the filter driver must not + * originate any send requests for that filter module. If NDIS calls + * FilterSendNetBufferLists, the driver must not call + * NdisFSendNetBufferLists to pass on the data until the driver is + * restarted. The driver should call NdisFSendNetBufferListsComplete + * immediately to complete the send operation. It should set the + * complete status in each NET_BUFFER_LIST structure to + * NDIS_STATUS_PAUSED. + * + * http://msdn.microsoft.com/en-us/library/windows/hardware/ + * ff549966(v=vs.85).aspx */ + NDIS_STRING filterReason; + ULONG sendCompleteFlags = OvsGetSendCompleteFlags(sendFlags); + + RtlInitUnicodeString(&filterReason, + L"Switch state PAUSED, drop before FSendNBL."); + OvsReportNBLIngressError(switchContext, netBufferLists, &filterReason, + NDIS_STATUS_PAUSED); + OvsCompleteNBLIngress(switchContext, netBufferLists, + sendCompleteFlags); + return; + } + + ASSERT(switchContext->dataFlowState == OvsSwitchRunning); + + NdisFSendNetBufferLists(switchContext->NdisFilterHandle, netBufferLists, + NDIS_DEFAULT_PORT_NUMBER, sendFlags); +} + +static __inline VOID +OvsStartNBLIngressError(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST nblList, + ULONG sendCompleteFlags, + PNDIS_STRING filterReason, + NDIS_STATUS error) +{ + ASSERT(error); + OvsReportNBLIngressError(switchContext, nblList, filterReason, error); + NdisFSendNetBufferListsComplete(switchContext->NdisFilterHandle, nblList, + sendCompleteFlags); +} + +static VOID +OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST netBufferLists, + ULONG SendFlags) +{ + NDIS_SWITCH_PORT_ID sourcePort = 0; + NDIS_SWITCH_NIC_INDEX sourceIndex = 0; + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail; + PNET_BUFFER_LIST curNbl = NULL, nextNbl = NULL; + ULONG sendCompleteFlags; + UCHAR dispatch; + LOCK_STATE_EX lockState, dpLockState; + NDIS_STATUS status; + NDIS_STRING filterReason; + LIST_ENTRY missedPackets; + UINT32 num = 0; + OvsCompletionList completionList; + + dispatch = NDIS_TEST_SEND_AT_DISPATCH_LEVEL(SendFlags)? + NDIS_RWL_AT_DISPATCH_LEVEL : 0; + sendCompleteFlags = OvsGetSendCompleteFlags(SendFlags); + SendFlags |= NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP; + + InitializeListHead(&missedPackets); + OvsInitCompletionList(&completionList, switchContext, sendCompleteFlags); + + for (curNbl = netBufferLists; curNbl != NULL; curNbl = nextNbl) { + POVS_VPORT_ENTRY vport; + UINT32 portNo; + OVS_DATAPATH *datapath = &switchContext->datapath; + OVS_PACKET_HDR_INFO layers; + OvsFlowKey key; + UINT64 hash; + PNET_BUFFER curNb; + + nextNbl = curNbl->Next; + curNbl->Next = NULL; + + /* Ethernet Header is a guaranteed safe access. */ + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + if (curNb->Next != NULL) { + /* XXX: This case is not handled yet. */ + ASSERT(FALSE); + } else { + POVS_BUFFER_CONTEXT ctx; + OvsFlow *flow; + + fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); + sourcePort = fwdDetail->SourcePortId; + sourceIndex = (NDIS_SWITCH_NIC_INDEX)fwdDetail->SourceNicIndex; + + /* Take the DispatchLock so none of the VPORTs disconnect while + * we are setting destination ports. + * + * XXX: acquire/release the dispatch lock for a "batch" of packets + * rather than for each packet. */ + NdisAcquireRWLockRead(switchContext->dispatchLock, &lockState, + dispatch); + + ctx = OvsInitExternalNBLContext(switchContext, curNbl, + sourcePort == switchContext->externalPortId); + if (ctx == NULL) { + RtlInitUnicodeString(&filterReason, + L"Cannot allocate external NBL context."); + + OvsStartNBLIngressError(switchContext, curNbl, + sendCompleteFlags, &filterReason, + NDIS_STATUS_RESOURCES); + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + continue; + } + + vport = OvsFindVportByPortIdAndNicIndex(switchContext, sourcePort, + sourceIndex); + if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) { + RtlInitUnicodeString(&filterReason, + L"OVS-Cannot forward packet from unknown source port"); + goto dropit; + } else { + portNo = vport->portNo; + } + + vport->stats.rxPackets++; + vport->stats.rxBytes += NET_BUFFER_DATA_LENGTH(curNb); + + status = OvsExtractFlow(curNbl, vport->portNo, &key, &layers, NULL); + if (status != NDIS_STATUS_SUCCESS) { + RtlInitUnicodeString(&filterReason, L"OVS-Flow extract failed"); + goto dropit; + } + + ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); + OvsAcquireDatapathRead(datapath, &dpLockState, dispatch); + + flow = OvsLookupFlow(datapath, &key, &hash, FALSE); + if (flow) { + OvsFlowUsed(flow, curNbl, &layers); + datapath->hits++; + /* If successful, OvsActionsExecute() consumes the NBL. + * Otherwise, it adds it to the completionList. No need to + * check the return value. */ + OvsActionsExecute(switchContext, &completionList, curNbl, + portNo, SendFlags, &key, &hash, &layers, + flow->actions, flow->actionsLen); + OvsReleaseDatapath(datapath, &dpLockState); + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + continue; + } else { + OvsReleaseDatapath(datapath, &dpLockState); + + datapath->misses++; + status = OvsCreateAndAddPackets(OVS_DEFAULT_PACKET_QUEUE, + NULL, 0, OVS_PACKET_CMD_MISS, + portNo, + key.tunKey.dst != 0 ? + (OvsIPv4TunnelKey *)&key.tunKey : + NULL, curNbl, + sourcePort == + switchContext->externalPortId, + &layers, switchContext, + &missedPackets, &num); + if (status == NDIS_STATUS_SUCCESS) { + /* Complete the packet since it was copied to user + * buffer. */ + RtlInitUnicodeString(&filterReason, + L"OVS-Dropped since packet was copied to userspace"); + } else { + RtlInitUnicodeString(&filterReason, + L"OVS-Dropped due to failure to queue to userspace"); + } + goto dropit; + } + +dropit: + OvsAddPktCompletionList(&completionList, TRUE, sourcePort, curNbl, 0, + &filterReason); + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + } + } + + /* Queue the missed packets. */ + OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, num); + OvsFinalizeCompletionList(&completionList); +} + + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterSendNetBufferLists Function. + * -------------------------------------------------------------------------- + */ +VOID +OvsExtSendNBL(NDIS_HANDLE filterModuleContext, + PNET_BUFFER_LIST netBufferLists, + NDIS_PORT_NUMBER portNumber, + ULONG sendFlags) +{ + UNREFERENCED_PARAMETER(portNumber); + + /* 'filterModuleContext' is the switch context that gets created in the + * AttachHandler. */ + POVS_SWITCH_CONTEXT switchContext; + switchContext = (POVS_SWITCH_CONTEXT) filterModuleContext; + + if (switchContext->dataFlowState == OvsSwitchPaused) { + NDIS_STRING filterReason; + ULONG sendCompleteFlags = OvsGetSendCompleteFlags(sendFlags); + + RtlInitUnicodeString(&filterReason, + L"Switch state PAUSED, drop on ingress."); + OvsStartNBLIngressError(switchContext, netBufferLists, + sendCompleteFlags, &filterReason, + NDIS_STATUS_PAUSED); + return; + } + + ASSERT(switchContext->dataFlowState == OvsSwitchRunning); + + OvsStartNBLIngress(switchContext, netBufferLists, sendFlags); +} + +static VOID +OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST netBufferLists, + ULONG sendCompleteFlags) +{ + PNET_BUFFER_LIST curNbl = NULL, nextNbl = NULL; + OvsCompletionList newList; + + newList.dropNbl = NULL; + newList.dropNblNext = &newList.dropNbl; + + for (curNbl = netBufferLists; curNbl != NULL; curNbl = nextNbl) { + nextNbl = curNbl->Next; + curNbl->Next = NULL; + + curNbl = OvsCompleteNBL(switchContext, curNbl, TRUE); + if (curNbl != NULL) { + /* NBL originated from the upper layer. */ + *newList.dropNblNext = curNbl; + newList.dropNblNext = &curNbl->Next; + } + } + + /* Complete the NBL's that were sent by the upper layer. */ + if (newList.dropNbl != NULL) { + NdisFSendNetBufferListsComplete(switchContext->NdisFilterHandle, newList.dropNbl, + sendCompleteFlags); + } +} + + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterSendNetBufferListsComplete function. + * -------------------------------------------------------------------------- + */ +VOID +OvsExtSendNBLComplete(NDIS_HANDLE filterModuleContext, + PNET_BUFFER_LIST netBufferLists, + ULONG sendCompleteFlags) +{ + OvsCompleteNBLIngress((POVS_SWITCH_CONTEXT)filterModuleContext, + netBufferLists, sendCompleteFlags); +} + + +VOID +OvsFinalizeCompletionList(OvsCompletionList *completionList) +{ + if (completionList->dropNbl != NULL) { + OvsCompleteNBLIngress(completionList->switchContext, + completionList->dropNbl, + completionList->sendCompleteFlags); + + completionList->dropNbl = NULL; + completionList->dropNblNext = &completionList->dropNbl; + } +} + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterCancelSendNetBufferLists function. + * + * "If a filter driver specifies a FilterSendNetBufferLists function and it + * queues send requests, it must also specify a + * FilterCancelSendNetBufferLists function." + * + * http://msdn.microsoft.com/en-us/library/windows/hardware/ + * ff549966(v=vs.85).aspx + * -------------------------------------------------------------------------- + */ +VOID +OvsExtCancelSendNBL(NDIS_HANDLE filterModuleContext, + PVOID CancelId) +{ + UNREFERENCED_PARAMETER(filterModuleContext); + UNREFERENCED_PARAMETER(CancelId); + + /* All send requests get completed synchronously, so there is no need to + * implement this callback. */ +} diff --git a/datapath-windows/ovsext/OvsPacketIO.h b/datapath-windows/ovsext/OvsPacketIO.h new file mode 100644 index 000000000..322a8aab7 --- /dev/null +++ b/datapath-windows/ovsext/OvsPacketIO.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_PACKETIO_H_ +#define __OVS_PACKETIO_H_ 1 + +typedef union _OVS_PACKET_HDR_INFO OVS_PACKET_HDR_INFO; + +/* + * Data structures and utility functions to help manage a list of packets to be + * completed (dropped). + */ +typedef struct OvsCompletionList { + PNET_BUFFER_LIST dropNbl; + PNET_BUFFER_LIST *dropNblNext; + POVS_SWITCH_CONTEXT switchContext; + ULONG sendCompleteFlags; +} OvsCompletionList; + +VOID OvsInitCompletionList(OvsCompletionList *completionList, + POVS_SWITCH_CONTEXT switchContext, + ULONG sendCompleteFlags); +VOID OvsAddPktCompletionList(OvsCompletionList *completionList, + BOOLEAN incoming, + NDIS_SWITCH_PORT_ID sourcePort, + PNET_BUFFER_LIST netBufferList, + UINT32 netBufferListCount, + PNDIS_STRING filterReason); + + +/* + * Functions related to packet processing. + */ +VOID OvsSendNBLIngress(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST netBufferLists, + ULONG sendFlags); + +NDIS_STATUS OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext, + OvsCompletionList *completionList, + PNET_BUFFER_LIST curNbl, UINT32 srcVportNo, + ULONG sendFlags, OvsFlowKey *key, UINT64 *hash, + OVS_PACKET_HDR_INFO *layers, + const struct nlattr *actions, int actionsLen); + +VOID OvsLookupFlowOutput(POVS_SWITCH_CONTEXT switchContext, + VOID *compList, PNET_BUFFER_LIST curNbl); + +#endif /* __OVS_PACKETIO_H_ */ diff --git a/datapath-windows/ovsext/OvsPacketParser.c b/datapath-windows/ovsext/OvsPacketParser.c new file mode 100644 index 000000000..0a9343551 --- /dev/null +++ b/datapath-windows/ovsext/OvsPacketParser.c @@ -0,0 +1,303 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "OvsPacketParser.h" + +//XXX consider moving to NdisGetDataBuffer. +const VOID * +OvsGetPacketBytes(const NET_BUFFER_LIST *nbl, + UINT32 len, + UINT32 srcOffset, + VOID *storage) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + PNET_BUFFER netBuffer = NET_BUFFER_LIST_FIRST_NB(nbl); + PMDL currentMdl; + BOOLEAN firstMDL = TRUE; + ULONG destOffset = 0; + VOID *dest = storage; + const UINT32 copyLen = len; + ULONG packetLen; + + packetLen = NET_BUFFER_DATA_LENGTH(netBuffer); + // Start copy from current MDL + currentMdl = NET_BUFFER_CURRENT_MDL(netBuffer); + + // Data on current MDL may be offset from start of MDL + while (destOffset < copyLen && currentMdl) { + PUCHAR srcMemory = MmGetSystemAddressForMdlSafe(currentMdl, + LowPagePriority); + ULONG length = MmGetMdlByteCount(currentMdl); + if (!srcMemory) { + status = NDIS_STATUS_RESOURCES; + break; + } + + if (firstMDL) { + ULONG mdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(netBuffer); + srcMemory += mdlOffset; + length -= mdlOffset; + firstMDL = FALSE; + } + length = MIN(length, packetLen); + packetLen -= length; + ASSERT((INT)packetLen >= 0); + + if (srcOffset >= length) { + currentMdl = NDIS_MDL_LINKAGE(currentMdl); + srcOffset -= length; + continue; + } else { + srcMemory += srcOffset; + length -= srcOffset; + srcOffset = 0; + } + + length = min(length, copyLen-destOffset); + + NdisMoveMemory((PUCHAR)dest+destOffset, srcMemory, length); + destOffset += length; + + currentMdl = NDIS_MDL_LINKAGE(currentMdl); + } + + if (destOffset == copyLen) { + ASSERT(status == NDIS_STATUS_SUCCESS); + return storage; + } + + return NULL; +} + +NDIS_STATUS +OvsParseIPv6(const NET_BUFFER_LIST *packet, + OvsFlowKey *key, + POVS_PACKET_HDR_INFO layers) +{ + UINT16 ofs = layers->l3Offset; + IPv6Hdr ipv6HdrStorage; + const IPv6Hdr *nh; + UINT32 nextHdr; + Ipv6Key *flow= &key->ipv6Key; + + ofs = layers->l3Offset; + nh = OvsGetPacketBytes(packet, sizeof *nh, ofs, &ipv6HdrStorage); + if (!nh) { + return NDIS_STATUS_FAILURE; + } + + nextHdr = nh->nexthdr; + memcpy(&flow->ipv6Src, nh->saddr.s6_addr, 16); + memcpy(&flow->ipv6Dst, nh->daddr.s6_addr, 16); + + flow->nwTos = ((nh->flow_lbl[0] & 0xF0) >> 4) | (nh->priority << 4); + flow->ipv6Label = + ((nh->flow_lbl[0] & 0x0F) << 16) | (nh->flow_lbl[1] << 8) | nh->flow_lbl[2]; + flow->nwTtl = nh->hop_limit; + flow->nwProto = SOCKET_IPPROTO_NONE; + flow->nwFrag = 0; + + // Parse extended headers and compute L4 offset + ofs += sizeof(IPv6Hdr); + for (;;) { + if ((nextHdr != SOCKET_IPPROTO_HOPOPTS) + && (nextHdr != SOCKET_IPPROTO_ROUTING) + && (nextHdr != SOCKET_IPPROTO_DSTOPTS) + && (nextHdr != SOCKET_IPPROTO_AH) + && (nextHdr != SOCKET_IPPROTO_FRAGMENT)) { + /* + * It's either a terminal header (e.g., TCP, UDP) or one we + * don't understand. In either case, we're done with the + * packet, so use it to fill in 'nw_proto'. + */ + break; + } + + if (nextHdr == SOCKET_IPPROTO_HOPOPTS + || nextHdr == SOCKET_IPPROTO_ROUTING + || nextHdr == SOCKET_IPPROTO_DSTOPTS + || nextHdr == SOCKET_IPPROTO_AH) { + IPv6ExtHdr extHdrStorage; + const IPv6ExtHdr *extHdr; + UINT8 len; + + extHdr = OvsGetPacketBytes(packet, sizeof *extHdr, ofs, &extHdrStorage); + if (!extHdr) { + return NDIS_STATUS_FAILURE; + } + + len = extHdr->hdrExtLen; + ofs += nextHdr == SOCKET_IPPROTO_AH ? (len + 2) * 4 : (len + 1) * 8; + nextHdr = extHdr->nextHeader; + if (OvsPacketLenNBL(packet) < ofs) { + return NDIS_STATUS_FAILURE; + } + } else if (nextHdr == SOCKET_IPPROTO_FRAGMENT) { + IPv6FragHdr fragHdrStorage; + const IPv6FragHdr *fragHdr; + + fragHdr = OvsGetPacketBytes(packet, sizeof *fragHdr, ofs, + &fragHdrStorage); + if (!fragHdr) { + return NDIS_STATUS_FAILURE; + } + + nextHdr = fragHdr->nextHeader; + ofs += sizeof *fragHdr; + + /* We only process the first fragment. */ + if (fragHdr->offlg != htons(0)) { + if ((fragHdr->offlg & IP6F_OFF_HOST_ORDER_MASK) == htons(0)) { + flow->nwFrag = OVSWIN_NW_FRAG_ANY; + } else { + flow->nwFrag |= OVSWIN_NW_FRAG_LATER; + nextHdr = SOCKET_IPPROTO_FRAGMENT; + break; + } + } + } + } + + flow->nwProto = (UINT8)nextHdr; + layers->l4Offset = ofs; + return NDIS_STATUS_SUCCESS; +} + +VOID +OvsParseTcp(const NET_BUFFER_LIST *packet, + L4Key *flow, + POVS_PACKET_HDR_INFO layers) +{ + TCPHdr tcpStorage; + const TCPHdr *tcp = OvsGetTcp(packet, layers->l4Offset, &tcpStorage); + if (tcp) { + flow->tpSrc = tcp->source; + flow->tpDst = tcp->dest; + layers->isTcp = 1; + layers->l7Offset = layers->l4Offset + 4 * tcp->doff; + } +} + +VOID +OvsParseUdp(const NET_BUFFER_LIST *packet, + L4Key *flow, + POVS_PACKET_HDR_INFO layers) +{ + UDPHdr udpStorage; + const UDPHdr *udp = OvsGetUdp(packet, layers->l4Offset, &udpStorage); + if (udp) { + flow->tpSrc = udp->source; + flow->tpDst = udp->dest; + layers->isUdp = 1; + if (udp->check == 0) { + layers->udpCsumZero = 1; + } + layers->l7Offset = layers->l4Offset + sizeof *udp; + } +} + +NDIS_STATUS +OvsParseIcmpV6(const NET_BUFFER_LIST *packet, + OvsFlowKey *key, + POVS_PACKET_HDR_INFO layers) +{ + UINT16 ofs = layers->l4Offset; + ICMPHdr icmpStorage; + const ICMPHdr *icmp; + Icmp6Key *flow = &key->icmp6Key; + + memset(&flow->ndTarget, 0, sizeof(flow->ndTarget)); + memset(flow->arpSha, 0, sizeof(flow->arpSha)); + memset(flow->arpTha, 0, sizeof(flow->arpTha)); + + icmp = OvsGetIcmp(packet, ofs, &icmpStorage); + if (!icmp) { + return NDIS_STATUS_FAILURE; + } + ofs += sizeof *icmp; + + /* + * The ICMPv6 type and code fields use the 16-bit transport port + * fields, so we need to store them in 16-bit network byte order. + */ + key->ipv6Key.l4.tpSrc = htons(icmp->type); + key->ipv6Key.l4.tpDst = htons(icmp->code); + + if (icmp->code == 0 && + (icmp->type == ND_NEIGHBOR_SOLICIT || + icmp->type == ND_NEIGHBOR_ADVERT)) { + struct in6_addr ndTargetStorage; + const struct in6_addr *ndTarget; + + ndTarget = OvsGetPacketBytes(packet, sizeof *ndTarget, ofs, + &ndTargetStorage); + if (!ndTarget) { + return NDIS_STATUS_FAILURE; + } + flow->ndTarget = *ndTarget; + + while ((UINT32)(ofs + 8) <= OvsPacketLenNBL(packet)) { + /* + * The minimum size of an option is 8 bytes, which also is + * the size of Ethernet link-layer options. + */ + IPv6NdOptHdr ndOptStorage; + const IPv6NdOptHdr *ndOpt; + UINT16 optLen; + + ndOpt = OvsGetPacketBytes(packet, sizeof *ndOpt, ofs, &ndOptStorage); + if (!ndOpt) { + return NDIS_STATUS_FAILURE; + } + + optLen = ndOpt->len * 8; + if (!optLen || (UINT32)(ofs + optLen) > OvsPacketLenNBL(packet)) { + goto invalid; + } + + /* + * Store the link layer address if the appropriate option is + * provided. It is considered an error if the same link + * layer option is specified twice. + */ + if (ndOpt->type == ND_OPT_SOURCE_LINKADDR && optLen == 8) { + if (Eth_IsNullAddr(flow->arpSha)) { + memcpy(flow->arpSha, ndOpt + 1, ETH_ADDR_LENGTH); + } else { + goto invalid; + } + } else if (ndOpt->type == ND_OPT_TARGET_LINKADDR && optLen == 8) { + if (Eth_IsNullAddr(flow->arpTha)) { + memcpy(flow->arpTha, ndOpt + 1, ETH_ADDR_LENGTH); + } else { + goto invalid; + } + } + + ofs += optLen; + } + } + + layers->l7Offset = ofs; + return NDIS_STATUS_SUCCESS; + +invalid: + memset(&flow->ndTarget, 0, sizeof(flow->ndTarget)); + memset(flow->arpSha, 0, sizeof(flow->arpSha)); + memset(flow->arpTha, 0, sizeof(flow->arpTha)); + + return NDIS_STATUS_FAILURE; +} diff --git a/datapath-windows/ovsext/OvsPacketParser.h b/datapath-windows/ovsext/OvsPacketParser.h new file mode 100644 index 000000000..ab3c613a7 --- /dev/null +++ b/datapath-windows/ovsext/OvsPacketParser.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_PACKET_PARSER_H_ +#define __OVS_PACKET_PARSER_H_ 1 + +#include "precomp.h" +#include "OvsNetProto.h" + +const VOID* OvsGetPacketBytes(const NET_BUFFER_LIST *_pNB, UINT32 len, + UINT32 SrcOffset, VOID *storage); +NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, OvsFlowKey *key, + POVS_PACKET_HDR_INFO layers); +VOID OvsParseTcp(const NET_BUFFER_LIST *packet, L4Key *flow, + POVS_PACKET_HDR_INFO layers); +VOID OvsParseUdp(const NET_BUFFER_LIST *packet, L4Key *flow, + POVS_PACKET_HDR_INFO layers); +NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, OvsFlowKey *key, + POVS_PACKET_HDR_INFO layers); + +static __inline ULONG +OvsPacketLenNBL(const NET_BUFFER_LIST *_pNB) +{ + INT length = 0; + NET_BUFFER *nb; + + nb = NET_BUFFER_LIST_FIRST_NB(_pNB); + ASSERT(nb); + while(nb) { + length += NET_BUFFER_DATA_LENGTH(nb); + nb = NET_BUFFER_NEXT_NB(nb); + } + + return length; +} + +/* + * Returns the ctl field from the TCP header in 'packet', or 0 if the field + * can't be read. The caller must have ensured that 'packet' contains a TCP + * header. + * + * We can't just use TCPHdr, from netProto.h, for this because that + * breaks the flags down into individual bit-fields. We can't even use + * offsetof because that will try to take the address of a bit-field, + * which C does not allow. + */ +static UINT16 +OvsGetTcpCtl(const NET_BUFFER_LIST *packet, // IN + const POVS_PACKET_HDR_INFO layers) // IN +{ +#define TCP_CTL_OFS 12 // Offset of "ctl" field in TCP header. +#define TCP_FLAGS(CTL) ((CTL) & 0x3f) // Obtain TCP flags from CTL. + + const UINT16 *ctl; + UINT16 storage; + + ctl = OvsGetPacketBytes(packet, sizeof *ctl, layers->l4Offset + TCP_CTL_OFS, + &storage); + return ctl ? *ctl : 0; +} + + +static UINT8 +OvsGetTcpFlags(const NET_BUFFER_LIST *packet, // IN + const OvsFlowKey *key, // IN + const POVS_PACKET_HDR_INFO layers) // IN +{ + UNREFERENCED_PARAMETER(key); // should be removed later + + if (layers->isTcp) { + return TCP_FLAGS(OvsGetTcpCtl(packet, layers)); + } else { + return 0; + } +} + +static const EtherArp * +OvsGetArp(const NET_BUFFER_LIST *packet, + UINT32 ofs, + EtherArp *storage) +{ + return OvsGetPacketBytes(packet, sizeof *storage, ofs, storage); +} + +static const IPHdr * +OvsGetIp(const NET_BUFFER_LIST *packet, + UINT32 ofs, + IPHdr *storage) +{ + const IPHdr *ip = OvsGetPacketBytes(packet, sizeof *ip, ofs, storage); + if (ip) { + int ipLen = ip->ihl * 4; + if (ipLen >= sizeof *ip && OvsPacketLenNBL(packet) >= ofs + ipLen) { + return ip; + } + } + return NULL; +} + +static const TCPHdr * +OvsGetTcp(const NET_BUFFER_LIST *packet, + UINT32 ofs, + TCPHdr *storage) +{ + const TCPHdr *tcp = OvsGetPacketBytes(packet, sizeof *tcp, ofs, storage); + if (tcp) { + int tcpLen = tcp->doff * 4; + if (tcpLen >= sizeof *tcp && OvsPacketLenNBL(packet) >= ofs + tcpLen) { + return tcp; + } + } + return NULL; +} + +static const UDPHdr * +OvsGetUdp(const NET_BUFFER_LIST *packet, + UINT32 ofs, + UDPHdr *storage) +{ + return OvsGetPacketBytes(packet, sizeof *storage, ofs, storage); +} + +static const ICMPHdr * +OvsGetIcmp(const NET_BUFFER_LIST *packet, + UINT32 ofs, + ICMPHdr *storage) +{ + return OvsGetPacketBytes(packet, sizeof *storage, ofs, storage); +} + +#endif /* __OVS_PACKET_PARSER_H_ */ diff --git a/datapath-windows/ovsext/OvsSwitch.c b/datapath-windows/ovsext/OvsSwitch.c new file mode 100644 index 000000000..97ce2aec9 --- /dev/null +++ b/datapath-windows/ovsext/OvsSwitch.c @@ -0,0 +1,529 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains the implementation of the management functionality of the + * OVS. + */ + +#include "precomp.h" + +#include "OvsIoctl.h" +#include "OvsSwitch.h" +#include "OvsVport.h" +#include "OvsEvent.h" +#include "OvsFlow.h" +#include "OvsIpHelper.h" +#include "OvsTunnelIntf.h" +#include "OvsOid.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_SWITCH +#include "OvsDebug.h" + +POVS_SWITCH_CONTEXT gOvsSwitchContext; +BOOLEAN gOvsInAttach; +extern PNDIS_SPIN_LOCK gOvsCtrlLock; +extern NDIS_HANDLE gOvsExtDriverHandle; +extern NDIS_HANDLE gOvsExtDriverObject; + +static NDIS_STATUS OvsCreateSwitch(NDIS_HANDLE ndisFilterHandle, + POVS_SWITCH_CONTEXT *switchContextOut); +static NDIS_STATUS OvsInitSwitchContext(POVS_SWITCH_CONTEXT switchContext); +static VOID OvsDeleteSwitch(POVS_SWITCH_CONTEXT switchContext); +static VOID OvsCleanupSwitchContext(POVS_SWITCH_CONTEXT switchContext); +static NDIS_STATUS OvsActivateSwitch(POVS_SWITCH_CONTEXT switchContext); + + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterAttach function. + * + * This function allocates the switch context, and initializes its necessary + * members. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsExtAttach(NDIS_HANDLE ndisFilterHandle, + NDIS_HANDLE filterDriverContext, + PNDIS_FILTER_ATTACH_PARAMETERS attachParameters) +{ + NDIS_STATUS status = NDIS_STATUS_FAILURE; + NDIS_FILTER_ATTRIBUTES ovsExtAttributes; + POVS_SWITCH_CONTEXT switchContext = NULL; + + UNREFERENCED_PARAMETER(filterDriverContext); + + OVS_LOG_TRACE("Enter: ndisFilterHandle %p", ndisFilterHandle); + + ASSERT(filterDriverContext == (NDIS_HANDLE)gOvsExtDriverObject); + if (attachParameters->MiniportMediaType != NdisMedium802_3) { + status = NDIS_STATUS_INVALID_PARAMETER; + goto cleanup; + } + + if (gOvsExtDriverHandle == NULL) { + OVS_LOG_TRACE("Exit: OVSEXT driver is not loaded."); + ASSERT(FALSE); + goto cleanup; + } + + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext) { + NdisReleaseSpinLock(gOvsCtrlLock); + OVS_LOG_TRACE("Exit: Failed to create OVS Switch, only one datapath is" + "supported, %p.", gOvsSwitchContext); + goto cleanup; + } + if (gOvsInAttach) { + NdisReleaseSpinLock(gOvsCtrlLock); + /* Just fail the request. */ + OVS_LOG_TRACE("Exit: Failed to create OVS Switch, since another attach" + "instance is in attach process."); + goto cleanup; + } + gOvsInAttach = TRUE; + NdisReleaseSpinLock(gOvsCtrlLock); + + status = OvsInitIpHelper(ndisFilterHandle); + if (status != STATUS_SUCCESS) { + OVS_LOG_ERROR("Exit: Failed to initialize IP helper."); + goto cleanup; + } + + status = OvsCreateSwitch(ndisFilterHandle, &switchContext); + if (status != NDIS_STATUS_SUCCESS) { + OvsCleanupIpHelper(); + goto cleanup; + } + ASSERT(switchContext); + + /* + * Register the switch context with NDIS so NDIS can pass it back to the + * Filterxxx callback functions as the 'FilterModuleContext' parameter. + */ + RtlZeroMemory(&ovsExtAttributes, sizeof(NDIS_FILTER_ATTRIBUTES)); + ovsExtAttributes.Header.Revision = NDIS_FILTER_ATTRIBUTES_REVISION_1; + ovsExtAttributes.Header.Size = sizeof(NDIS_FILTER_ATTRIBUTES); + ovsExtAttributes.Header.Type = NDIS_OBJECT_TYPE_FILTER_ATTRIBUTES; + ovsExtAttributes.Flags = 0; + + NDIS_DECLARE_FILTER_MODULE_CONTEXT(OVS_SWITCH_CONTEXT); + status = NdisFSetAttributes(ndisFilterHandle, switchContext, &ovsExtAttributes); + if (status != NDIS_STATUS_SUCCESS) { + OVS_LOG_ERROR("Failed to set attributes."); + OvsCleanupIpHelper(); + goto cleanup; + } + + /* Setup the state machine. */ + switchContext->controlFlowState = OvsSwitchAttached; + switchContext->dataFlowState = OvsSwitchPaused; + + gOvsSwitchContext = switchContext; + KeMemoryBarrier(); + +cleanup: + gOvsInAttach = FALSE; + if (status != NDIS_STATUS_SUCCESS) { + if (switchContext != NULL) { + OvsDeleteSwitch(switchContext); + } + } + OVS_LOG_TRACE("Exit: status %x", status); + + return status; +} + + +/* + * -------------------------------------------------------------------------- + * This function allocated the switch context, and initializes its necessary + * members. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsCreateSwitch(NDIS_HANDLE ndisFilterHandle, + POVS_SWITCH_CONTEXT *switchContextOut) +{ + NDIS_STATUS status; + POVS_SWITCH_CONTEXT switchContext; + NDIS_SWITCH_CONTEXT hostSwitchContext; + NDIS_SWITCH_OPTIONAL_HANDLERS hostSwitchHandler; + + OVS_LOG_TRACE("Enter: Create switch object"); + + switchContext = + (POVS_SWITCH_CONTEXT) OvsAllocateMemory(sizeof(OVS_SWITCH_CONTEXT)); + if (switchContext == NULL) { + status = NDIS_STATUS_RESOURCES; + goto create_switch_done; + } + RtlZeroMemory(switchContext, sizeof(OVS_SWITCH_CONTEXT)); + + /* Initialize the switch. */ + hostSwitchHandler.Header.Type = NDIS_OBJECT_TYPE_SWITCH_OPTIONAL_HANDLERS; + hostSwitchHandler.Header.Size = NDIS_SIZEOF_SWITCH_OPTIONAL_HANDLERS_REVISION_1; + hostSwitchHandler.Header.Revision = NDIS_SWITCH_OPTIONAL_HANDLERS_REVISION_1; + + status = NdisFGetOptionalSwitchHandlers(ndisFilterHandle, + &hostSwitchContext, + &hostSwitchHandler); + if (status != NDIS_STATUS_SUCCESS) { + OVS_LOG_ERROR("OvsExtAttach: Extension is running in " + "non-switch environment."); + OvsFreeMemory(switchContext); + goto create_switch_done; + } + + switchContext->NdisFilterHandle = ndisFilterHandle; + switchContext->NdisSwitchContext = hostSwitchContext; + RtlCopyMemory(&switchContext->NdisSwitchHandlers, &hostSwitchHandler, + sizeof(NDIS_SWITCH_OPTIONAL_HANDLERS)); + + status = OvsInitSwitchContext(switchContext); + if (status != NDIS_STATUS_SUCCESS) { + OvsFreeMemory(switchContext); + goto create_switch_done; + } + + status = OvsTunnelFilterInitialize(gOvsExtDriverObject); + if (status != NDIS_STATUS_SUCCESS) { + OvsFreeMemory(switchContext); + goto create_switch_done; + } + *switchContextOut = switchContext; + +create_switch_done: + OVS_LOG_TRACE("Exit: switchContext: %p status: %#lx", + switchContext, status); + return status; +} + + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterDetach function. + * -------------------------------------------------------------------------- + */ +_Use_decl_annotations_ +VOID +OvsExtDetach(NDIS_HANDLE filterModuleContext) +{ + POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)filterModuleContext; + + OVS_LOG_TRACE("Enter: filterModuleContext %p", filterModuleContext); + + ASSERT(switchContext->dataFlowState == OvsSwitchPaused); + switchContext->controlFlowState = OvsSwitchDetached; + KeMemoryBarrier(); + while(switchContext->pendingOidCount > 0) { + NdisMSleep(1000); + } + OvsDeleteSwitch(switchContext); + OvsCleanupIpHelper(); + gOvsSwitchContext = NULL; + /* This completes the cleanup, and a new attach can be handled now. */ + + OVS_LOG_TRACE("Exit: OvsDetach Successfully"); +} + + +/* + * -------------------------------------------------------------------------- + * This function deletes the switch by freeing all memory previously allocated. + * XXX need synchronization with other path. + * -------------------------------------------------------------------------- + */ +VOID +OvsDeleteSwitch(POVS_SWITCH_CONTEXT switchContext) +{ + UINT32 dpNo = switchContext->dpNo; + + OVS_LOG_TRACE("Enter: switchContext:%p", switchContext); + + OvsTunnelFilterUninitialize(gOvsExtDriverObject); + OvsClearAllSwitchVports(switchContext); + OvsCleanupSwitchContext(switchContext); + OvsFreeMemory(switchContext); + OVS_LOG_TRACE("Exit: deleted switch %p dpNo: %d", switchContext, dpNo); +} + + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterRestart function. + * -------------------------------------------------------------------------- + */ +_Use_decl_annotations_ +NDIS_STATUS +OvsExtRestart(NDIS_HANDLE filterModuleContext, + PNDIS_FILTER_RESTART_PARAMETERS filterRestartParameters) +{ + POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)filterModuleContext; + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + BOOLEAN switchActive; + + UNREFERENCED_PARAMETER(filterRestartParameters); + + OVS_LOG_TRACE("Enter: filterModuleContext %p", + filterModuleContext); + + /* Activate the switch if this is the first restart. */ + if (!switchContext->isActivated && !switchContext->isActivateFailed) { + status = OvsQuerySwitchActivationComplete(switchContext, + &switchActive); + if (status != NDIS_STATUS_SUCCESS) { + switchContext->isActivateFailed = TRUE; + status = NDIS_STATUS_RESOURCES; + goto cleanup; + } + + if (switchActive) { + status = OvsActivateSwitch(switchContext); + + if (status != NDIS_STATUS_SUCCESS) { + OVS_LOG_WARN("Failed to activate switch, dpNo:%d", + switchContext->dpNo); + status = NDIS_STATUS_RESOURCES; + goto cleanup; + } + } + } + + ASSERT(switchContext->dataFlowState == OvsSwitchPaused); + switchContext->dataFlowState = OvsSwitchRunning; + +cleanup: + OVS_LOG_TRACE("Exit: Restart switch:%p, dpNo: %d, status: %#x", + switchContext, switchContext->dpNo, status); + return status; +} + + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterPause function + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsExtPause(NDIS_HANDLE filterModuleContext, + PNDIS_FILTER_PAUSE_PARAMETERS pauseParameters) +{ + POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)filterModuleContext; + + UNREFERENCED_PARAMETER(pauseParameters); + OVS_LOG_TRACE("Enter: filterModuleContext %p", + filterModuleContext); + + ASSERT(switchContext->dataFlowState == OvsSwitchRunning); + switchContext->dataFlowState = OvsSwitchPaused; + KeMemoryBarrier(); + while(switchContext->pendingOidCount > 0) { + NdisMSleep(1000); + } + + OVS_LOG_TRACE("Exit: OvsDetach Successfully"); + return NDIS_STATUS_SUCCESS; +} + +static NDIS_STATUS +OvsInitSwitchContext(POVS_SWITCH_CONTEXT switchContext) +{ + int i; + NTSTATUS status; + + OVS_LOG_TRACE("Enter: switchContext: %p", switchContext); + + switchContext->dispatchLock = + NdisAllocateRWLock(switchContext->NdisFilterHandle); + + switchContext->vportArray = + (PVOID *)OvsAllocateMemory(sizeof (PVOID) * OVS_MAX_VPORT_ARRAY_SIZE); + switchContext->nameHashArray = (PLIST_ENTRY) + OvsAllocateMemory(sizeof (LIST_ENTRY) * OVS_MAX_VPORT_ARRAY_SIZE); + switchContext->portHashArray = (PLIST_ENTRY) + OvsAllocateMemory(sizeof (LIST_ENTRY) * OVS_MAX_VPORT_ARRAY_SIZE); + status = OvsAllocateFlowTable(&switchContext->datapath, switchContext); + + if (status == NDIS_STATUS_SUCCESS) { + status = OvsInitBufferPool(switchContext); + } + if (status != NDIS_STATUS_SUCCESS || + switchContext->dispatchLock == NULL || + switchContext->vportArray == NULL || + switchContext->nameHashArray == NULL || + switchContext->portHashArray == NULL) { + if (switchContext->dispatchLock) { + NdisFreeRWLock(switchContext->dispatchLock); + } + if (switchContext->vportArray) { + OvsFreeMemory(switchContext->vportArray); + } + if (switchContext->nameHashArray) { + OvsFreeMemory(switchContext->nameHashArray); + } + if (switchContext->portHashArray) { + OvsFreeMemory(switchContext->portHashArray); + } + OvsDeleteFlowTable(&switchContext->datapath); + OvsCleanupBufferPool(switchContext); + + OVS_LOG_TRACE("Exit: Failed to init switchContext"); + return NDIS_STATUS_RESOURCES; + } + + for (i = 0; i < OVS_MAX_VPORT_ARRAY_SIZE; i++) { + InitializeListHead(&switchContext->nameHashArray[i]); + } + for (i = 0; i < OVS_MAX_VPORT_ARRAY_SIZE; i++) { + InitializeListHead(&switchContext->portHashArray[i]); + } + RtlZeroMemory(switchContext->vportArray, + sizeof (PVOID) * OVS_MAX_VPORT_ARRAY_SIZE); + + switchContext->isActivated = FALSE; + switchContext->isActivateFailed = FALSE; + switchContext->dpNo = OVS_DP_NUMBER; + switchContext->lastPortIndex = OVS_MAX_VPORT_ARRAY_SIZE -1; + ovsTimeIncrementPerTick = KeQueryTimeIncrement() / 10000; + OVS_LOG_TRACE("Exit: Succesfully initialized switchContext: %p", + switchContext); + return NDIS_STATUS_SUCCESS; +} + +static VOID +OvsCleanupSwitchContext(POVS_SWITCH_CONTEXT switchContext) +{ + OVS_LOG_TRACE("Enter: Delete switchContext:%p", switchContext); + + /* We need to do cleanup for tunnel port here. */ + ASSERT(switchContext->numVports == 0); + + NdisFreeRWLock(switchContext->dispatchLock); + OvsFreeMemory(switchContext->nameHashArray); + OvsFreeMemory(switchContext->portHashArray); + OvsFreeMemory(switchContext->vportArray); + OvsDeleteFlowTable(&switchContext->datapath); + OvsCleanupBufferPool(switchContext); + OVS_LOG_TRACE("Exit: Delete switchContext: %p", switchContext); +} + +/* + * -------------------------------------------------------------------------- + * This function activates the switch by initializing it with all the runtime + * state. First it queries all of the MAC addresses set as custom switch policy + * to allow sends from, and adds tme to the property list. Then it queries the + * NIC list and verifies it can support all of the NICs currently connected to + * the switch, and adds the NICs to the NIC list. + * -------------------------------------------------------------------------- + */ +static NDIS_STATUS +OvsActivateSwitch(POVS_SWITCH_CONTEXT switchContext) +{ + NDIS_STATUS status; + + ASSERT(!switchContext->isActivated); + + OVS_LOG_TRACE("Enter: activate switch %p, dpNo: %ld", + switchContext, switchContext->dpNo); + + status = OvsAddConfiguredSwitchPorts(switchContext); + + if (status != NDIS_STATUS_SUCCESS) { + OVS_LOG_WARN("Failed to add configured switch ports"); + goto cleanup; + + } + status = OvsInitConfiguredSwitchNics(switchContext); + + if (status != NDIS_STATUS_SUCCESS) { + OVS_LOG_WARN("Failed to add configured vports"); + OvsClearAllSwitchVports(switchContext); + goto cleanup; + } + switchContext->isActivated = TRUE; + OvsPostEvent(OVS_DEFAULT_PORT_NO, OVS_DEFAULT_EVENT_STATUS); + +cleanup: + OVS_LOG_TRACE("Exit: activate switch:%p, isActivated: %s, status = %lx", + switchContext, + (switchContext->isActivated ? "TRUE" : "FALSE"), status); + return status; +} + +PVOID +OvsGetVportFromIndex(UINT16 index) +{ + if (index < OVS_MAX_VPORT_ARRAY_SIZE && + !OVS_IS_VPORT_ENTRY_NULL(gOvsSwitchContext, index)) { + return gOvsSwitchContext->vportArray[index]; + } + return NULL; +} + +PVOID +OvsGetExternalVport() +{ + return gOvsSwitchContext->externalVport; +} + + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterNetPnPEvent function. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsExtNetPnPEvent(NDIS_HANDLE filterModuleContext, + PNET_PNP_EVENT_NOTIFICATION netPnPEvent) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)filterModuleContext; + BOOLEAN switchActive; + + OVS_LOG_TRACE("Enter: filterModuleContext: %p, NetEvent: %d", + filterModuleContext, (netPnPEvent->NetPnPEvent).NetEvent); + /* + * The only interesting event is the NetEventSwitchActivate. It provides + * an asynchronous notification of the switch completing activation. + */ + if (netPnPEvent->NetPnPEvent.NetEvent == NetEventSwitchActivate) { + status = OvsQuerySwitchActivationComplete(switchContext, &switchActive); + if (status != NDIS_STATUS_SUCCESS) { + switchContext->isActivateFailed = TRUE; + } else { + ASSERT(switchContext->isActivated == FALSE); + ASSERT(switchActive == TRUE); + if (switchContext->isActivated == FALSE && switchActive == TRUE) { + status = OvsActivateSwitch(switchContext); + OVS_LOG_TRACE("OvsExtNetPnPEvent: activated switch: %p " + "status: %s", switchContext, + status ? "TRUE" : "FALSE"); + } + } + } + + if (status == NDIS_STATUS_SUCCESS) { + status = NdisFNetPnPEvent(switchContext->NdisFilterHandle, + netPnPEvent); + } + OVS_LOG_TRACE("Exit: OvsExtNetPnPEvent"); + + return status; +} diff --git a/datapath-windows/ovsext/OvsSwitch.h b/datapath-windows/ovsext/OvsSwitch.h new file mode 100644 index 000000000..d49fe9b70 --- /dev/null +++ b/datapath-windows/ovsext/OvsSwitch.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains the definition of the switch object for the OVS. + */ + +#ifndef __OVS_SWITCH_H_ +#define __OVS_SWITCH_H_ 1 + +#include "OvsNetProto.h" +#include "OvsBufferMgmt.h" +#define OVS_MAX_VPORT_ARRAY_SIZE 1024 + +#define OVS_VPORT_MASK (OVS_MAX_VPORT_ARRAY_SIZE - 1) + +#define OVS_INTERNAL_VPORT_DEFAULT_INDEX 0 + +//Tunnel port indicies +#define RESERVED_START_INDEX1 1 +#define OVS_TUNNEL_INDEX_START RESERVED_START_INDEX1 +#define OVS_VXLAN_VPORT_INDEX 2 +#define OVS_GRE_VPORT_INDEX 3 +#define OVS_GRE64_VPORT_INDEX 4 +#define OVS_TUNNEL_INDEX_END OVS_GRE64_VPORT_INDEX + +#define OVS_EXTERNAL_VPORT_START 8 +#define OVS_EXTERNAL_VPORT_END 40 +#define OVS_INTERNAL_VPORT_START 40 +#define OVS_INTERNAL_VPOR_END 72 +#define OVS_VM_VPORT_START 72 +#define OVS_VM_VPORT_MAX 0xffff +#define OVS_VPORT_INDEX(_portNo) ((_portNo) & 0xffffff) +#define OVS_VPORT_PORT_NO(_index, _gen) \ + (((_index) & 0xffffff) | ((UINT32)(_gen) << 24)) +#define OVS_VPORT_GEN(portNo) (portNo >> 24) + +#define OVS_MAX_PHYS_ADAPTERS 32 +#define OVS_MAX_IP_VPOR 32 + +#define OVS_HASH_BASIS 0x13578642 + +typedef struct _OVS_DATAPATH +{ + PLIST_ENTRY flowTable; // Contains OvsFlows. + UINT32 nFlows; // Number of entries in flowTable. + + // List_Links queues[64]; // Hash table of queue IDs. + + /* Statistics. */ + UINT64 hits; // Number of flow table hits. + UINT64 misses; // Number of flow table misses. + UINT64 lost; // Number of dropped misses. + + /* Used to protect the flows in the flowtable. */ + PNDIS_RW_LOCK_EX lock; +} OVS_DATAPATH, *POVS_DATAPATH; + +/* + * OVS_SWITCH_CONTEXT + * + * The context allocated per switch., For OVS, we only + * support one switch which corresponding to one datapath. + * Each datapath can have multiple logical bridges configured + * which is maintained by vswitchd. + */ + +typedef enum OVS_SWITCH_DATAFLOW_STATE +{ + OvsSwitchPaused, + OvsSwitchRunning +} OVS_SWITCH_DATAFLOW_STATE, *POVS_SWITCH_DATAFLOW_STATE; + +typedef enum OVS_SWITCH_CONTROFLOW_STATE +{ + OvsSwitchUnknown, + OvsSwitchAttached, + OvsSwitchDetached +} OVS_SWITCH_CONTROLFLOW_STATE, *POVS_SWITCH_CONTROLFLOW_STATE; + +// XXX: Take care of alignment and grouping members by cacheline +typedef struct _OVS_SWITCH_CONTEXT +{ + /* Coarse and fine-grained switch states. */ + OVS_SWITCH_DATAFLOW_STATE dataFlowState; + OVS_SWITCH_CONTROLFLOW_STATE controlFlowState; + BOOLEAN isActivated; + BOOLEAN isActivateFailed; + + UINT32 dpNo; + + NDIS_SWITCH_PORT_ID externalPortId; + NDIS_SWITCH_PORT_ID internalPortId; + PVOID externalVport; // the virtual adapter vport + PVOID internalVport; + + PVOID *vportArray; + PLIST_ENTRY nameHashArray; // based on ovsName + PLIST_ENTRY portHashArray; // based on portId + + UINT32 numPhysicalNics; + UINT32 numVports; // include validation port + UINT32 lastPortIndex; + + /* Lock taken over the switch. This protects the ports on the switch. */ + PNDIS_RW_LOCK_EX dispatchLock; + + /* The flowtable. */ + OVS_DATAPATH datapath; + + /* Handle to the OVSExt filter driver. Same as 'gOvsExtDriverHandle'. */ + NDIS_HANDLE NdisFilterHandle; + + /* Handle and callbacks exposed by the underlying hyper-v switch. */ + NDIS_SWITCH_CONTEXT NdisSwitchContext; + NDIS_SWITCH_OPTIONAL_HANDLERS NdisSwitchHandlers; + + volatile LONG pendingInjectedNblCount; + volatile LONG pendingOidCount; + + OVS_NBL_POOL ovsPool; +} OVS_SWITCH_CONTEXT, *POVS_SWITCH_CONTEXT; + + +static __inline VOID +OvsAcquireDatapathRead(OVS_DATAPATH *datapath, + LOCK_STATE_EX *lockState, + BOOLEAN dispatch) +{ + ASSERT(datapath); + NdisAcquireRWLockRead(datapath->lock, lockState, dispatch); +} + +static __inline VOID +OvsAcquireDatapathWrite(OVS_DATAPATH *datapath, + LOCK_STATE_EX *lockState, + BOOLEAN dispatch) +{ + ASSERT(datapath); + NdisAcquireRWLockWrite(datapath->lock, lockState, dispatch); +} + + +static __inline VOID +OvsReleaseDatapath(OVS_DATAPATH *datapath, + LOCK_STATE_EX *lockState) +{ + ASSERT(datapath); + NdisReleaseRWLock(datapath->lock, lockState); +} + + +PVOID OvsGetVportFromIndex(UINT16 index); +PVOID OvsGetExternalVport(); + +#endif /* __OVS_SWITCH_H_ */ diff --git a/datapath-windows/ovsext/OvsTunnel.c b/datapath-windows/ovsext/OvsTunnel.c new file mode 100644 index 000000000..b5a369a9d --- /dev/null +++ b/datapath-windows/ovsext/OvsTunnel.c @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * OvsTunnel.c + * WFP Classified callback function and Action code for injecting a packet to the vswitch + */ + +#include "precomp.h" + +#pragma warning(push) +#pragma warning(disable:4201) // unnamed struct/union +#include <fwpsk.h> +#pragma warning(pop) + +#pragma warning( push ) +#pragma warning( disable:4127 ) + +#include <fwpmk.h> +#include "OvsTunnel.h" +#include "OvsIoctl.h" +#include "OvsSwitch.h" +#include "OvsVport.h" +#include "OvsEvent.h" +#include "OvsUser.h" +#include "OvsVxlan.h" +#include "OvsPacketIO.h" +#include "OvsNetProto.h" +#include "OvsFlow.h" + +extern POVS_SWITCH_CONTEXT gOvsSwitchContext; + +static NTSTATUS +OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, + OVS_TUNNEL_PENDED_PACKET *packet); + +VOID OvsAcquireDatapathRead(OVS_DATAPATH *datapath, + LOCK_STATE_EX *lockState, + BOOLEAN dispatch); +VOID OvsAcquireDatapathWrite(OVS_DATAPATH *datapath, + LOCK_STATE_EX *lockState, + BOOLEAN dispatch); +VOID OvsReleaseDatapath(OVS_DATAPATH *datapath, + LOCK_STATE_EX *lockState); + + +NTSTATUS +OvsTunnelNotify(FWPS_CALLOUT_NOTIFY_TYPE notifyType, + const GUID *filterKey, + const FWPS_FILTER *filter) +{ + UNREFERENCED_PARAMETER(notifyType); + UNREFERENCED_PARAMETER(filterKey); + UNREFERENCED_PARAMETER(filter); + + return STATUS_SUCCESS; +} + +static NTSTATUS +OvsTunnelAnalyzePacket(OVS_TUNNEL_PENDED_PACKET *packet) +{ + NTSTATUS status = STATUS_SUCCESS; + UINT32 packetLength = 0; + ULONG bytesCopied = 0; + NET_BUFFER_LIST *copiedNBL = NULL; + NET_BUFFER *netBuffer; + NDIS_STATUS ndisStatus; + + /* + * For inbound net buffer list, we can assume it contains only one + * net buffer (unless it was an re-assembeled fragments). in both cases + * the first net buffer should include all headers, we assert if the retreat fails + */ + netBuffer = NET_BUFFER_LIST_FIRST_NB(packet->netBufferList); + + /* Drop the packet from the host stack */ + packet->classifyOut->actionType = FWP_ACTION_BLOCK; + packet->classifyOut->rights &= ~FWPS_RIGHT_ACTION_WRITE; + + /* Adjust the net buffer list offset to the start of the IP header */ + ndisStatus = NdisRetreatNetBufferDataStart(netBuffer, + packet->ipHeaderSize + + packet->transportHeaderSize, + 0, NULL); + ASSERT(ndisStatus == NDIS_STATUS_SUCCESS); + + /* Single NBL element for WFP */ + ASSERT(packet->netBufferList->Next == NULL); + + /* Note that the copy will inherit the original net buffer list's offset */ + packetLength = NET_BUFFER_DATA_LENGTH(netBuffer); + copiedNBL = OvsAllocateVariableSizeNBL(gOvsSwitchContext, packetLength, + OVS_DEFAULT_HEADROOM_SIZE); + + if (copiedNBL == NULL) { + goto analyzeDone; + } + + status = NdisCopyFromNetBufferToNetBuffer(NET_BUFFER_LIST_FIRST_NB(copiedNBL), + 0, packetLength, + netBuffer, 0, &bytesCopied); + if (status != NDIS_STATUS_SUCCESS || packetLength != bytesCopied) { + goto analyzeFreeNBL; + } + + status = OvsInjectPacketThroughActions(copiedNBL, + packet); + goto analyzeDone; + + /* Undo the adjustment on the original net buffer list */ +analyzeFreeNBL: + OvsCompleteNBL(gOvsSwitchContext, copiedNBL, TRUE); +analyzeDone: + NdisAdvanceNetBufferDataStart(netBuffer, + packet->transportHeaderSize + packet->ipHeaderSize, + FALSE, + NULL); + return status; +} + + +/* + * -------------------------------------------------------------------------- + * This is the classifyFn function of the datagram-data callout. It + * allocates a packet structure to store the classify and meta data and + * it references the net buffer list for out-of-band modification and + * re-injection. The packet structure will be queued to the global packet + * queue. The worker thread will then be signaled, if idle, to process + * the queue. + * -------------------------------------------------------------------------- + */ +VOID +OvsTunnelClassify(const FWPS_INCOMING_VALUES *inFixedValues, + const FWPS_INCOMING_METADATA_VALUES *inMetaValues, + VOID *layerData, + const VOID *classifyContext, + const FWPS_FILTER *filter, + UINT64 flowContext, + FWPS_CLASSIFY_OUT *classifyOut) +{ + OVS_TUNNEL_PENDED_PACKET packetStorage; + OVS_TUNNEL_PENDED_PACKET *packet = &packetStorage; + FWP_DIRECTION direction; + + UNREFERENCED_PARAMETER(classifyContext); + UNREFERENCED_PARAMETER(filter); + UNREFERENCED_PARAMETER(flowContext); + + ASSERT(layerData != NULL); + + /* We don't have the necessary right to alter the packet flow */ + if ((classifyOut->rights & FWPS_RIGHT_ACTION_WRITE) == 0) { + /* XXX TBD revisit protect against other filters owning this packet */ + ASSERT(FALSE); + goto Exit; + } + + RtlZeroMemory(packet, sizeof(OVS_TUNNEL_PENDED_PACKET)); + + /* classifyOut cannot be accessed from a different thread context */ + packet->classifyOut = classifyOut; + + if (inFixedValues->layerId == FWPS_LAYER_DATAGRAM_DATA_V4) { + direction = + inFixedValues->incomingValue[FWPS_FIELD_DATAGRAM_DATA_V4_DIRECTION].\ + value.uint32; + } + else { + ASSERT(inFixedValues->layerId == FWPS_LAYER_DATAGRAM_DATA_V6); + direction = + inFixedValues->incomingValue[FWPS_FIELD_DATAGRAM_DATA_V6_DIRECTION].\ + value.uint32; + } + + packet->netBufferList = layerData; + + ASSERT(FWPS_IS_METADATA_FIELD_PRESENT(inMetaValues, + FWPS_METADATA_FIELD_COMPARTMENT_ID)); + + ASSERT(direction == FWP_DIRECTION_INBOUND); + + ASSERT(FWPS_IS_METADATA_FIELD_PRESENT( + inMetaValues, + FWPS_METADATA_FIELD_IP_HEADER_SIZE)); + ASSERT(FWPS_IS_METADATA_FIELD_PRESENT( + inMetaValues, + FWPS_METADATA_FIELD_TRANSPORT_HEADER_SIZE)); + + packet->ipHeaderSize = inMetaValues->ipHeaderSize; + packet->transportHeaderSize = inMetaValues->transportHeaderSize; + + ASSERT(inFixedValues->incomingValue[FWPS_FIELD_DATAGRAM_DATA_V4_IP_PROTOCOL].value.uint8 == IPPROTO_UDP ); + OvsTunnelAnalyzePacket(packet); + +Exit: + ; +} + + +static NTSTATUS +OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, + OVS_TUNNEL_PENDED_PACKET *packet) +{ + NTSTATUS status = STATUS_SUCCESS; + OvsIPv4TunnelKey tunnelKey; + NET_BUFFER *pNb; + ULONG sendCompleteFlags = 0; + BOOLEAN dispatch; + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail; + LOCK_STATE_EX lockState, dpLockState; + LIST_ENTRY missedPackets; + OvsCompletionList completionList; + KIRQL irql; + ULONG SendFlags = NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP; + OVS_DATAPATH *datapath = &gOvsSwitchContext->datapath; + + ASSERT(gOvsSwitchContext); + + /* Fill the tunnel key */ + status = OvsSlowPathDecapVxlan(pNbl, &tunnelKey); + + if(!NT_SUCCESS(status)) { + goto dropit; + } + + pNb = NET_BUFFER_LIST_FIRST_NB(pNbl); + + NdisAdvanceNetBufferDataStart(pNb, + packet->transportHeaderSize + packet->ipHeaderSize + + sizeof(VXLANHdr), + FALSE, + NULL); + + /* Most likely (always) dispatch irql */ + irql = KeGetCurrentIrql(); + + /* dispatch is used for datapath lock as well */ + dispatch = (irql == DISPATCH_LEVEL) ? NDIS_RWL_AT_DISPATCH_LEVEL : 0; + if (dispatch) { + sendCompleteFlags |= NDIS_SEND_COMPLETE_FLAGS_DISPATCH_LEVEL; + } + + InitializeListHead(&missedPackets); + OvsInitCompletionList(&completionList, gOvsSwitchContext, + sendCompleteFlags); + + { + POVS_VPORT_ENTRY vport; + UINT32 portNo; + OVS_PACKET_HDR_INFO layers; + OvsFlowKey key; + UINT64 hash; + PNET_BUFFER curNb; + OvsFlow *flow; + + fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl); + + /* + * XXX WFP packets contain a single NBL structure. + * Reassembeled packet "may" have multiple NBs, however, a simple test shows + * that the packet still has a single NB (after reassemble) + * We still need to check if the Ethernet header of the innet packet is in a single MD + */ + + curNb = NET_BUFFER_LIST_FIRST_NB(pNbl); + ASSERT(curNb->Next == NULL); + + NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, dispatch); + + /* Lock the flowtable for the duration of accessing the flow */ + OvsAcquireDatapathRead(datapath, &dpLockState, NDIS_RWL_AT_DISPATCH_LEVEL); + + SendFlags |= NDIS_SEND_FLAGS_DISPATCH_LEVEL; + + vport = OvsGetTunnelVport(OVSWIN_VPORT_TYPE_VXLAN); + + if (vport == NULL){ + status = STATUS_UNSUCCESSFUL; + goto unlockAndDrop; + } + + ASSERT(vport->ovsType == OVSWIN_VPORT_TYPE_VXLAN); + + portNo = vport->portNo; + + status = OvsExtractFlow(pNbl, portNo, &key, &layers, &tunnelKey); + if (status != NDIS_STATUS_SUCCESS) { + goto unlockAndDrop; + } + + flow = OvsLookupFlow(datapath, &key, &hash, FALSE); + if (flow) { + OvsFlowUsed(flow, pNbl, &layers); + datapath->hits++; + + OvsActionsExecute(gOvsSwitchContext, &completionList, pNbl, + portNo, SendFlags, &key, &hash, &layers, + flow->actions, flow->actionsLen); + + OvsReleaseDatapath(datapath, &dpLockState); + } else { + POVS_PACKET_QUEUE_ELEM elem; + + datapath->misses++; + elem = OvsCreateQueuePacket(1, NULL, 0, OVS_PACKET_CMD_MISS, + portNo, &key.tunKey, pNbl, curNb, + TRUE, &layers); + if (elem) { + /* Complete the packet since it was copied to user buffer. */ + InsertTailList(&missedPackets, &elem->link); + OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, 1); + } else { + status = STATUS_INSUFFICIENT_RESOURCES; + } + goto unlockAndDrop; + } + + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + + } + + return status; + +unlockAndDrop: + OvsReleaseDatapath(datapath, &dpLockState); + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); +dropit: + pNbl = OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE); + ASSERT(pNbl == NULL); + return status; +} + +#pragma warning(pop) diff --git a/datapath-windows/ovsext/OvsTunnel.h b/datapath-windows/ovsext/OvsTunnel.h new file mode 100644 index 000000000..110ff747b --- /dev/null +++ b/datapath-windows/ovsext/OvsTunnel.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_TUNNEL_H_ +#define __OVS_TUNNEL_H_ 1 + +// +// OVS_TUNNEL_PENDED_PACKET is the object type we used to store all information +// needed for out-of-band packet modification and re-injection. This type +// also points back to the flow context the packet belongs to. + +typedef struct OVS_TUNNEL_PENDED_PACKET_ +{ + /* Common fields for inbound and outbound traffic */ + NET_BUFFER_LIST *netBufferList; + + UINT32 ipHeaderSize; + UINT32 transportHeaderSize; + FWPS_CLASSIFY_OUT *classifyOut; +} OVS_TUNNEL_PENDED_PACKET; + +/* Shared global data. */ + +extern UINT16 configNewDestPort; + +extern UINT32 gCalloutIdV4; + +// +// Shared function prototypes +// +VOID OvsTunnelClassify(const FWPS_INCOMING_VALUES *inFixedValues, + const FWPS_INCOMING_METADATA_VALUES *inMetaValues, + VOID *layerData, + const VOID *classifyContext, + const FWPS_FILTER *filter, + UINT64 flowContext, + FWPS_CLASSIFY_OUT *classifyOut); + + +NTSTATUS OvsTunnelNotify(FWPS_CALLOUT_NOTIFY_TYPE notifyType, + const GUID *filterKey, + const FWPS_FILTER *filter); + +#endif /* __OVS_TUNNEL_H_ */ diff --git a/datapath-windows/ovsext/OvsTunnelFilter.c b/datapath-windows/ovsext/OvsTunnelFilter.c new file mode 100644 index 000000000..a1de00734 --- /dev/null +++ b/datapath-windows/ovsext/OvsTunnelFilter.c @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" + +#pragma warning(push) +#pragma warning(disable:4201) // unnamed struct/union + + +#include <fwpsk.h> + +#pragma warning(pop) + +#include <fwpmk.h> +#include <ws2ipdef.h> +#include <in6addr.h> +#include <ip2string.h> + +#include "OvsTunnel.h" +#include "OvsSwitch.h" +#include "OvsVport.h" +#include "OvsEvent.h" +#include "OvsUser.h" +#include "OvsVxlan.h" + + +#define INITGUID +#include <guiddef.h> + + +/* Configurable parameters (addresses and ports are in host order) */ +UINT16 configNewDestPort = VXLAN_UDP_PORT; + +/* + * Callout and sublayer GUIDs + */ +// b16b0a6e-2b2a-41a3-8b39-bd3ffc855ff8 +DEFINE_GUID( + OVS_TUNNEL_CALLOUT_V4, + 0xb16b0a6e, + 0x2b2a, + 0x41a3, + 0x8b, 0x39, 0xbd, 0x3f, 0xfc, 0x85, 0x5f, 0xf8 + ); + +/* 0104fd7e-c825-414e-94c9-f0d525bbc169 */ +DEFINE_GUID( + OVS_TUNNEL_SUBLAYER, + 0x0104fd7e, + 0xc825, + 0x414e, + 0x94, 0xc9, 0xf0, 0xd5, 0x25, 0xbb, 0xc1, 0x69 + ); + +/* + * Callout driver global variables + */ +PDEVICE_OBJECT gDeviceObject; + +HANDLE gEngineHandle; +UINT32 gCalloutIdV4; + + +/* Callout driver implementation */ + +NTSTATUS +OvsTunnelAddFilter(PWSTR filterName, + const PWSTR filterDesc, + USHORT remotePort, + FWP_DIRECTION direction, + UINT64 context, + const GUID *layerKey, + const GUID *calloutKey) +{ + NTSTATUS status = STATUS_SUCCESS; + FWPM_FILTER filter = {0}; + FWPM_FILTER_CONDITION filterConditions[3] = {0}; + UINT conditionIndex; + + UNREFERENCED_PARAMETER(remotePort); + UNREFERENCED_PARAMETER(direction); + + filter.layerKey = *layerKey; + filter.displayData.name = (wchar_t*)filterName; + filter.displayData.description = (wchar_t*)filterDesc; + + filter.action.type = FWP_ACTION_CALLOUT_TERMINATING; + filter.action.calloutKey = *calloutKey; + filter.filterCondition = filterConditions; + filter.subLayerKey = OVS_TUNNEL_SUBLAYER; + filter.weight.type = FWP_EMPTY; // auto-weight. + filter.rawContext = context; + + conditionIndex = 0; + + filterConditions[conditionIndex].fieldKey = FWPM_CONDITION_DIRECTION; + filterConditions[conditionIndex].matchType = FWP_MATCH_EQUAL; + filterConditions[conditionIndex].conditionValue.type = FWP_UINT32; + filterConditions[conditionIndex].conditionValue.uint32 = direction; + + conditionIndex++; + + filterConditions[conditionIndex].fieldKey = FWPM_CONDITION_IP_LOCAL_PORT; + filterConditions[conditionIndex].matchType = FWP_MATCH_EQUAL; + filterConditions[conditionIndex].conditionValue.type = FWP_UINT16; + filterConditions[conditionIndex].conditionValue.uint16 = remotePort; + + conditionIndex++; + + filter.numFilterConditions = conditionIndex; + + status = FwpmFilterAdd(gEngineHandle, + &filter, + NULL, + NULL); + + return status; +} + +/* + * -------------------------------------------------------------------------- + * This function registers callouts and filters that intercept UDP traffic at + * WFP FWPM_LAYER_DATAGRAM_DATA_V4 + * -------------------------------------------------------------------------- + */ +NTSTATUS +OvsTunnelRegisterDatagramDataCallouts(const GUID *layerKey, + const GUID *calloutKey, + VOID *deviceObject, + UINT32 *calloutId) +{ + NTSTATUS status = STATUS_SUCCESS; + + FWPS_CALLOUT sCallout = {0}; + FWPM_CALLOUT mCallout = {0}; + + FWPM_DISPLAY_DATA displayData = {0}; + + BOOLEAN calloutRegistered = FALSE; + + sCallout.calloutKey = *calloutKey; + sCallout.classifyFn = OvsTunnelClassify; + sCallout.notifyFn = OvsTunnelNotify; +#if FLOW_CONTEXT + /* Currnetly we don't associate a context with the flow */ + sCallout.flowDeleteFn = OvsTunnelFlowDelete; + sCallout.flags = FWP_CALLOUT_FLAG_CONDITIONAL_ON_FLOW; +#endif + + status = FwpsCalloutRegister(deviceObject, + &sCallout, + calloutId); + + if (!NT_SUCCESS(status)) { + goto Exit; + } + calloutRegistered = TRUE; + + displayData.name = L"Datagram-Data OVS Callout"; + displayData.description = L"Proxies destination address/port for UDP"; + + mCallout.calloutKey = *calloutKey; + mCallout.displayData = displayData; + mCallout.applicableLayer = *layerKey; + + status = FwpmCalloutAdd(gEngineHandle, + &mCallout, + NULL, + NULL); + + if (!NT_SUCCESS(status)) { + goto Exit; + } + + status = OvsTunnelAddFilter(L"Datagram-Data OVS Filter (Inbound)", + L"address/port for UDP", + configNewDestPort, + FWP_DIRECTION_INBOUND, + 0, + layerKey, + calloutKey); + +Exit: + + if (!NT_SUCCESS(status)){ + if (calloutRegistered) { + FwpsCalloutUnregisterById(*calloutId); + *calloutId = 0; + } + } + + return status; +} + +/* + * -------------------------------------------------------------------------- + * This function registers dynamic callouts and filters that intercept UDP + * Callouts and filters will be removed during De-Initialize. + * -------------------------------------------------------------------------- + */ +NTSTATUS +OvsTunnelRegisterCallouts(VOID *deviceObject) +{ + NTSTATUS status = STATUS_SUCCESS; + FWPM_SUBLAYER OvsTunnelSubLayer; + + BOOLEAN engineOpened = FALSE; + BOOLEAN inTransaction = FALSE; + + FWPM_SESSION session = {0}; + + session.flags = FWPM_SESSION_FLAG_DYNAMIC; + + status = FwpmEngineOpen(NULL, + RPC_C_AUTHN_WINNT, + NULL, + &session, + &gEngineHandle); + + if (!NT_SUCCESS(status)) { + goto Exit; + } + engineOpened = TRUE; + + status = FwpmTransactionBegin(gEngineHandle, 0); + if (!NT_SUCCESS(status)) { + goto Exit; + } + inTransaction = TRUE; + + RtlZeroMemory(&OvsTunnelSubLayer, sizeof(FWPM_SUBLAYER)); + + OvsTunnelSubLayer.subLayerKey = OVS_TUNNEL_SUBLAYER; + OvsTunnelSubLayer.displayData.name = L"Datagram-Data OVS Sub-Layer"; + OvsTunnelSubLayer.displayData.description = + L"Sub-Layer for use by Datagram-Data OVS callouts"; + OvsTunnelSubLayer.flags = 0; + OvsTunnelSubLayer.weight = FWP_EMPTY; /* auto-weight */ + + status = FwpmSubLayerAdd(gEngineHandle, &OvsTunnelSubLayer, NULL); + if (!NT_SUCCESS(status)) { + goto Exit; + } + + // In order to use this callout a socket must be opened + status = OvsTunnelRegisterDatagramDataCallouts(&FWPM_LAYER_DATAGRAM_DATA_V4, + &OVS_TUNNEL_CALLOUT_V4, + deviceObject, + &gCalloutIdV4); + if (!NT_SUCCESS(status)) { + goto Exit; + } + + status = FwpmTransactionCommit(gEngineHandle); + if (!NT_SUCCESS(status)){ + goto Exit; + } + inTransaction = FALSE; + +Exit: + + if (!NT_SUCCESS(status)) { + if (inTransaction) { + FwpmTransactionAbort(gEngineHandle); + } + if (engineOpened) { + FwpmEngineClose(gEngineHandle); + gEngineHandle = NULL; + } + } + + return status; +} + +VOID +OvsTunnelUnregisterCallouts(VOID) +{ + FwpmEngineClose(gEngineHandle); + gEngineHandle = NULL; + FwpsCalloutUnregisterById(gCalloutIdV4); +} + + +VOID +OvsTunnelFilterUninitialize(PDRIVER_OBJECT driverObject) +{ + UNREFERENCED_PARAMETER(driverObject); + + OvsTunnelUnregisterCallouts(); + IoDeleteDevice(gDeviceObject); +} + + +NTSTATUS +OvsTunnelFilterInitialize(PDRIVER_OBJECT driverObject) +{ + NTSTATUS status = STATUS_SUCCESS; + UNICODE_STRING deviceName; + + RtlInitUnicodeString(&deviceName, + L"\\Device\\OvsTunnelFilter"); + + status = IoCreateDevice(driverObject, + 0, + &deviceName, + FILE_DEVICE_NETWORK, + 0, + FALSE, + &gDeviceObject); + + if (!NT_SUCCESS(status)){ + goto Exit; + } + + status = OvsTunnelRegisterCallouts(gDeviceObject); + +Exit: + + if (!NT_SUCCESS(status)){ + if (gEngineHandle != NULL) { + OvsTunnelUnregisterCallouts(); + } + + if (gDeviceObject) { + IoDeleteDevice(gDeviceObject); + } + } + + return status; +} diff --git a/datapath-windows/ovsext/OvsTunnelIntf.h b/datapath-windows/ovsext/OvsTunnelIntf.h new file mode 100644 index 000000000..3543c8a7e --- /dev/null +++ b/datapath-windows/ovsext/OvsTunnelIntf.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_TUNNEL_INTF_H_ +#define __OVS_TUNNEL_INTF_H_ 1 + +/* Tunnel callout driver load/unload functions */ +NTSTATUS OvsTunnelFilterInitialize(PDRIVER_OBJECT driverObject); + +VOID OvsTunnelFilterUninitialize(PDRIVER_OBJECT driverObject); + +#endif /* __OVS_TUNNEL_INTF_H_ */ diff --git a/datapath-windows/ovsext/OvsTypes.h b/datapath-windows/ovsext/OvsTypes.h new file mode 100644 index 000000000..402c39fd8 --- /dev/null +++ b/datapath-windows/ovsext/OvsTypes.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_TYPES_H_ +#define __OVS_TYPES_H_ 1 + +typedef unsigned long long uint64, uint64_t, ovs_be64, u64; +typedef long long int64, int64_t; +typedef unsigned int uint32, uint32_t, ovs_be32, u32; +typedef unsigned short uint16, uint16_t, ovs_be16, u16; +typedef unsigned char uint8, uint8_t, u8; +typedef uint64 __u64, __be64; +typedef uint32 __u32, __be32; +typedef uint16 __u16, __be16; +typedef uint8 __u8; + +#define ETH_ALEN 6 + +#endif /* __OVS_TYPES_H_ */ diff --git a/datapath-windows/ovsext/OvsUser.c b/datapath-windows/ovsext/OvsUser.c new file mode 100644 index 000000000..8271d52de --- /dev/null +++ b/datapath-windows/ovsext/OvsUser.c @@ -0,0 +1,859 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * OvsUser.c + * Manage packet queue for packet miss for userAction. + */ + + +#include "precomp.h" + +#include "OvsIoctl.h" +#include "OvsSwitch.h" +#include "OvsVport.h" +#include "OvsEvent.h" +#include "OvsUser.h" +#include "OvsPacketIO.h" +#include "OvsChecksum.h" +#include "OvsNetProto.h" +#include "OvsFlow.h" +#include "OvsTunnelIntf.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_USER +#include "OvsDebug.h" + +OVS_USER_PACKET_QUEUE ovsPacketQueues[OVS_MAX_NUM_PACKET_QUEUES]; + +POVS_PACKET_QUEUE_ELEM OvsGetNextPacket(POVS_OPEN_INSTANCE instance); +extern PNDIS_SPIN_LOCK gOvsCtrlLock; +extern POVS_SWITCH_CONTEXT gOvsSwitchContext; +OVS_USER_STATS ovsUserStats; + + +NTSTATUS +OvsUserInit() +{ + UINT32 i; + POVS_USER_PACKET_QUEUE queue; + for (i = 0; i < OVS_MAX_NUM_PACKET_QUEUES; i++) { + queue = &ovsPacketQueues[i]; + RtlZeroMemory(queue, sizeof (*queue)); + InitializeListHead(&queue->packetList); + NdisAllocateSpinLock(&queue->queueLock); + } + return STATUS_SUCCESS; +} + +VOID +OvsUserCleanup() +{ + UINT32 i; + POVS_USER_PACKET_QUEUE queue; + for (i = 0; i < OVS_MAX_NUM_PACKET_QUEUES; i++) { + queue = &ovsPacketQueues[i]; + ASSERT(IsListEmpty(&queue->packetList)); + ASSERT(queue->instance == NULL); + ASSERT(queue->pendingIrp == NULL); + NdisFreeSpinLock(&queue->queueLock); + } +} + +static VOID +OvsPurgePacketQueue(POVS_USER_PACKET_QUEUE queue, + POVS_OPEN_INSTANCE instance) +{ + PLIST_ENTRY link, next; + LIST_ENTRY tmp; + POVS_PACKET_QUEUE_ELEM elem; + + InitializeListHead(&tmp); + NdisAcquireSpinLock(&queue->queueLock); + if (queue->instance != instance) { + NdisReleaseSpinLock(&queue->queueLock); + return; + } + + if (queue->numPackets) { + OvsAppendList(&tmp, &queue->packetList); + queue->numPackets = 0; + } + NdisReleaseSpinLock(&queue->queueLock); + LIST_FORALL_SAFE(&tmp, link, next) { + RemoveEntryList(link); + elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link); + OvsFreeMemory(elem); + } +} + + +VOID +OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance) +{ + POVS_USER_PACKET_QUEUE queue; + POVS_PACKET_QUEUE_ELEM elem; + PLIST_ENTRY link, next; + LIST_ENTRY tmp; + PIRP irp = NULL; + + InitializeListHead(&tmp); + queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue; + if (queue) { + PDRIVER_CANCEL cancelRoutine; + NdisAcquireSpinLock(&queue->queueLock); + if (queue->instance != instance) { + NdisReleaseSpinLock(&queue->queueLock); + return; + } + + if (queue->numPackets) { + OvsAppendList(&tmp, &queue->packetList); + queue->numPackets = 0; + } + queue->instance = NULL; + queue->queueId = OVS_MAX_NUM_PACKET_QUEUES; + instance->packetQueue = NULL; + irp = queue->pendingIrp; + queue->pendingIrp = NULL; + if (irp) { + cancelRoutine = IoSetCancelRoutine(irp, NULL); + if (cancelRoutine == NULL) { + irp = NULL; + } + } + NdisReleaseSpinLock(&queue->queueLock); + } + LIST_FORALL_SAFE(&tmp, link, next) { + RemoveEntryList(link); + elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link); + OvsFreeMemory(elem); + } + if (irp) { + OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS); + } +} + +NTSTATUS +OvsSubscribeDpIoctl(PFILE_OBJECT fileObject, + PVOID inputBuffer, + UINT32 inputLength) +{ + POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; + UINT32 queueId; + POVS_USER_PACKET_QUEUE queue; + if (inputLength < sizeof (UINT32)) { + return STATUS_INVALID_PARAMETER; + } + queueId = *(UINT32 *)inputBuffer; + if (instance->packetQueue && queueId >= OVS_MAX_NUM_PACKET_QUEUES) { + /* + * unsubscribe + */ + OvsCleanupPacketQueue(instance); + } else if (instance->packetQueue == NULL && + queueId < OVS_MAX_NUM_PACKET_QUEUES) { + queue = &ovsPacketQueues[queueId]; + NdisAcquireSpinLock(&queue->queueLock); + if (ovsPacketQueues[queueId].instance) { + if (ovsPacketQueues[queueId].instance != instance) { + NdisReleaseSpinLock(&queue->queueLock); + return STATUS_INSUFFICIENT_RESOURCES; + } else { + NdisReleaseSpinLock(&queue->queueLock); + return STATUS_SUCCESS; + } + } + queue->queueId = queueId; + queue->instance = instance; + instance->packetQueue = queue; + ASSERT(IsListEmpty(&queue->packetList)); + NdisReleaseSpinLock(&queue->queueLock); + } else { + return STATUS_INVALID_PARAMETER; + } + return STATUS_SUCCESS; +} + + +NTSTATUS +OvsReadDpIoctl(PFILE_OBJECT fileObject, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; + POVS_PACKET_QUEUE_ELEM elem; + UINT32 len; + +#define TCP_CSUM_OFFSET 16 +#define UDP_CSUM_OFFSET 6 + ASSERT(instance); + + if (instance->packetQueue == NULL) { + return STATUS_INVALID_PARAMETER; + } + if (outputLength < (sizeof (OVS_PACKET_INFO) + OVS_MIN_PACKET_SIZE)) { + return STATUS_BUFFER_TOO_SMALL; + } + + elem = OvsGetNextPacket(instance); + if (elem) { + /* + * XXX revisit this later + */ + len = elem->packet.totalLen > outputLength ? outputLength : + elem->packet.totalLen; + + if ((elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) && + len == elem->packet.totalLen) { + UINT16 sum, *ptr; + UINT16 size = (UINT16)(elem->packet.userDataLen + + elem->hdrInfo.l4Offset + + (UINT16)sizeof (OVS_PACKET_INFO)); + RtlCopyMemory(outputBuffer, &elem->packet, size); + ASSERT(len - size >= elem->hdrInfo.l4PayLoad); + sum = CopyAndCalculateChecksum((UINT8 *)outputBuffer + size, + (UINT8 *)&elem->packet + size, + elem->hdrInfo.l4PayLoad, 0); + ptr =(UINT16 *)((UINT8 *)outputBuffer + size + + (elem->hdrInfo.tcpCsumNeeded ? + TCP_CSUM_OFFSET : UDP_CSUM_OFFSET)); + *ptr = sum; + ovsUserStats.l4Csum++; + } else { + RtlCopyMemory(outputBuffer, &elem->packet, len); + } + + *replyLen = len; + OvsFreeMemory(elem); + } + return STATUS_SUCCESS; +} + +/* Helper function to allocate a Forwarding Context for an NBL */ +NTSTATUS +OvsAllocateForwardingContextForNBL(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST nbl) +{ + return switchContext->NdisSwitchHandlers. + AllocateNetBufferListForwardingContext( + switchContext->NdisSwitchContext, nbl); +} + +/* + * -------------------------------------------------------------------------- + * This function allocates all the stuff necessary for creating an NBL from the + * input buffer of specified length, namely, a nonpaged data buffer of size + * length, an MDL from it, and a NB and NBL from it. It does not allocate an NBL + * context yet. It also copies data from the specified buffer to the NBL. + * -------------------------------------------------------------------------- + */ +PNET_BUFFER_LIST +OvsAllocateNBLForUserBuffer(POVS_SWITCH_CONTEXT switchContext, + PVOID userBuffer, + ULONG length) +{ + UINT8 *data = NULL; + PNET_BUFFER_LIST nbl = NULL; + PNET_BUFFER nb; + PMDL mdl; + + if (length > OVS_DEFAULT_DATA_SIZE) { + nbl = OvsAllocateVariableSizeNBL(switchContext, length, + OVS_DEFAULT_HEADROOM_SIZE); + + } else { + nbl = OvsAllocateFixSizeNBL(switchContext, length, + OVS_DEFAULT_HEADROOM_SIZE); + } + if (nbl == NULL) { + return NULL; + } + + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + mdl = NET_BUFFER_CURRENT_MDL(nb); + data = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority) + + NET_BUFFER_CURRENT_MDL_OFFSET(nb); + if (!data) { + OvsCompleteNBL(switchContext, nbl, TRUE); + return NULL; + } + + NdisMoveMemory(data, userBuffer, length); + + return nbl; +} + +NTSTATUS +OvsExecuteDpIoctl(PVOID inputBuffer, + UINT32 inputLength, + UINT32 outputLength) +{ + NTSTATUS status = STATUS_SUCCESS; + NTSTATUS ndisStatus; + OvsPacketExecute *execute; + LOCK_STATE_EX lockState; + PNET_BUFFER_LIST pNbl; + struct nlattr *actions; + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail; + OvsFlowKey key; + OVS_PACKET_HDR_INFO layers; + + if (inputLength < sizeof(*execute) || outputLength != 0) { + return STATUS_INFO_LENGTH_MISMATCH; + } + + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL) { + status = STATUS_INVALID_PARAMETER; + goto unlock; + } + + execute = (struct OvsPacketExecute *) inputBuffer; + + if (execute->packetLen == 0) { + status = STATUS_INVALID_PARAMETER; + goto unlock; + } + + if (inputLength != sizeof (*execute) + + execute->actionsLen + execute->packetLen) { + status = STATUS_INFO_LENGTH_MISMATCH; + goto unlock; + } + actions = (struct nlattr *)((PCHAR)&execute->actions + execute->packetLen); + + /* + * Allocate the NBL, copy the data from the userspace buffer. Allocate + * also, the forwarding context for the packet. + */ + pNbl = OvsAllocateNBLForUserBuffer(gOvsSwitchContext, &execute->packetBuf, + execute->packetLen); + if (pNbl == NULL) { + status = STATUS_NO_MEMORY; + goto unlock; + } + + fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl); + fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; + fwdDetail->SourceNicIndex = 0; + // XXX: Figure out if any of the other members of fwdDetail need to be set. + + ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers, + NULL); + if (ndisStatus == NDIS_STATUS_SUCCESS) { + ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); + NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, + NDIS_RWL_AT_DISPATCH_LEVEL); + ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl, + 0, // XXX: we are passing 0 for srcVportNo + NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP, + &key, NULL, &layers, actions, + execute->actionsLen); + pNbl = NULL; + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + } + if (ndisStatus != NDIS_STATUS_SUCCESS) { + status = STATUS_UNSUCCESSFUL; + } + + if (pNbl) { + OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE); + } +unlock: + NdisReleaseSpinLock(gOvsCtrlLock); + return status; +} + + +NTSTATUS +OvsPurgeDpIoctl(PFILE_OBJECT fileObject) +{ + POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; + POVS_USER_PACKET_QUEUE queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue; + + if (queue == NULL) { + return STATUS_INVALID_PARAMETER; + } + OvsPurgePacketQueue(queue, instance); + return STATUS_SUCCESS; +} + +VOID +OvsCancelIrpDatapath(PDEVICE_OBJECT deviceObject, + PIRP irp) +{ + PIO_STACK_LOCATION irpSp; + PFILE_OBJECT fileObject; + POVS_OPEN_INSTANCE instance; + POVS_USER_PACKET_QUEUE queue = NULL; + + UNREFERENCED_PARAMETER(deviceObject); + + IoReleaseCancelSpinLock(irp->CancelIrql); + irpSp = IoGetCurrentIrpStackLocation(irp); + fileObject = irpSp->FileObject; + + if (fileObject == NULL) { + goto done; + } + NdisAcquireSpinLock(gOvsCtrlLock); + instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; + if (instance) { + queue = instance->packetQueue; + } + if (instance == NULL || queue == NULL) { + NdisReleaseSpinLock(gOvsCtrlLock); + goto done; + } + NdisReleaseSpinLock(gOvsCtrlLock); + NdisAcquireSpinLock(&queue->queueLock); + if (queue->pendingIrp == irp) { + queue->pendingIrp = NULL; + } + NdisReleaseSpinLock(&queue->queueLock); +done: + OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED); +} + + +NTSTATUS +OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject) +{ + POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; + POVS_USER_PACKET_QUEUE queue = + (POVS_USER_PACKET_QUEUE)instance->packetQueue; + NTSTATUS status = STATUS_SUCCESS; + BOOLEAN cancelled = FALSE; + + if (queue == NULL) { + return STATUS_INVALID_PARAMETER; + } + NdisAcquireSpinLock(&queue->queueLock); + if (queue->instance != instance) { + NdisReleaseSpinLock(&queue->queueLock); + return STATUS_INVALID_PARAMETER; + } + if (queue->pendingIrp) { + NdisReleaseSpinLock(&queue->queueLock); + return STATUS_DEVICE_BUSY; + } + if (queue->numPackets == 0) { + PDRIVER_CANCEL cancelRoutine; + IoMarkIrpPending(irp); + IoSetCancelRoutine(irp, OvsCancelIrpDatapath); + if (irp->Cancel) { + cancelRoutine = IoSetCancelRoutine(irp, NULL); + if (cancelRoutine) { + cancelled = TRUE; + } + } else { + queue->pendingIrp = irp; + } + status = STATUS_PENDING; + } + NdisReleaseSpinLock(&queue->queueLock); + if (cancelled) { + OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED); + OVS_LOG_INFO("Datapath IRP cancelled: %p", irp); + } + return status; +} + + +POVS_PACKET_QUEUE_ELEM +OvsGetNextPacket(POVS_OPEN_INSTANCE instance) +{ + POVS_USER_PACKET_QUEUE queue; + PLIST_ENTRY link; + queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue; + if (queue == NULL) { + return NULL; + } + NdisAcquireSpinLock(&queue->queueLock); + if (queue->instance != instance || queue->numPackets == 0) { + NdisReleaseSpinLock(&queue->queueLock); + return NULL; + } + link = RemoveHeadList(&queue->packetList); + queue->numPackets--; + NdisReleaseSpinLock(&queue->queueLock); + return CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link); +} + + +POVS_USER_PACKET_QUEUE +OvsGetQueue(UINT32 queueId) +{ + POVS_USER_PACKET_QUEUE queue; + if (queueId >= OVS_MAX_NUM_PACKET_QUEUES) { + return NULL; + } + queue = &ovsPacketQueues[queueId]; + return queue->instance != NULL ? queue : NULL; +} + +/* + *---------------------------------------------------------------------------- + * OvsCreateQueuePacket -- + * + * Create a packet which will be forwarded to user space. + * + * InputParameter: + * queueId Identify the queue the packet to be inserted + * This will be used when multiple queues is supported + * in userspace + * userData: when cmd is user action, this field contain + * user action data. + * userDataLen: as name indicated + * cmd: either miss or user action + * inPort: datapath port id from which the packet is received. + * tunnelKey: tunnelKey for tunneled packet + * nbl: the NET_BUFFER_LIST which contain the packet + * nb: the packet + * isRecv: This is used to decide how to interprete the csum info + * hdrInfo: include hdr info initialized during flow extraction. + * + * Results: + * NULL if fail to create the packet + * The packet element otherwise + *---------------------------------------------------------------------------- + */ +POVS_PACKET_QUEUE_ELEM +OvsCreateQueuePacket(UINT32 queueId, + PVOID userData, + UINT32 userDataLen, + UINT32 cmd, + UINT32 inPort, + OvsIPv4TunnelKey *tunnelKey, + PNET_BUFFER_LIST nbl, + PNET_BUFFER nb, + BOOLEAN isRecv, + POVS_PACKET_HDR_INFO hdrInfo) +{ +#define VLAN_TAG_SIZE 4 + UINT32 allocLen, dataLen, extraLen = 0; + POVS_PACKET_QUEUE_ELEM elem; + PMDL mdl; + UINT8 *src, *dst; + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; + NDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo; + + csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo); + + if (isRecv && (csumInfo.Receive.TcpChecksumFailed || + (csumInfo.Receive.UdpChecksumFailed && + !hdrInfo->udpCsumZero) || + csumInfo.Receive.IpChecksumFailed)) { + OVS_LOG_INFO("Packet dropped due to checksum failure."); + ovsUserStats.dropDuetoChecksum++; + return NULL; + } + + vlanInfo.Value = NET_BUFFER_LIST_INFO(nbl, Ieee8021QNetBufferListInfo); + if (vlanInfo.TagHeader.VlanId) { + /* + * We may also need to check priority XXX + */ + extraLen = VLAN_TAG_SIZE; + } + + dataLen = NET_BUFFER_DATA_LENGTH(nb); + allocLen = sizeof (OVS_PACKET_QUEUE_ELEM) + userDataLen + dataLen + + extraLen; + + elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemory(allocLen); + if (elem == NULL) { + ovsUserStats.dropDuetoResource++; + return NULL; + } + elem->hdrInfo.value = hdrInfo->value; + elem->packet.totalLen = sizeof (OVS_PACKET_INFO) + userDataLen + dataLen + + extraLen; + elem->packet.queue = queueId; + elem->packet.userDataLen = userDataLen; + elem->packet.inPort = inPort; + elem->packet.cmd = cmd; + if (cmd == (UINT32)OVS_PACKET_CMD_MISS) { + ovsUserStats.miss++; + } else { + ovsUserStats.action++; + } + elem->packet.packetLen = dataLen + extraLen; + if (tunnelKey) { + RtlCopyMemory(&elem->packet.tunnelKey, tunnelKey, + sizeof (*tunnelKey)); + } else { + RtlZeroMemory(&elem->packet.tunnelKey, + sizeof (elem->packet.tunnelKey)); + } + + dst = elem->packet.data; + if (userDataLen) { + RtlCopyMemory(dst, userData, userDataLen); + dst = dst + userDataLen; + } + dst += extraLen; + + mdl = NET_BUFFER_CURRENT_MDL(nb); + src = NdisGetDataBuffer(nb, dataLen, dst, 1, 0); + if (src == NULL) { + OvsFreeMemory(elem); + ovsUserStats.dropDuetoResource++; + return NULL; + } else if (src != dst) { + /* Copy the data from the NDIS buffer to dst. */ + RtlCopyMemory(dst, src, dataLen); + } + + dst = elem->packet.data + userDataLen + extraLen; + /* + * Fix IP hdr if necessary + */ + if ((isRecv && csumInfo.Receive.IpChecksumValueInvalid) || + (!isRecv && csumInfo.Transmit.IsIPv4 && + csumInfo.Transmit.IpHeaderChecksum)) { + PIPV4_HEADER ipHdr = (PIPV4_HEADER)(dst + hdrInfo->l3Offset); + ASSERT(elem->hdrInfo.isIPv4); + ASSERT(ipHdr->Version == 4); + ipHdr->HeaderChecksum = IPChecksum((UINT8 *)ipHdr, + ipHdr->HeaderLength << 2, + (UINT16)~ipHdr->HeaderChecksum); + ovsUserStats.ipCsum++; + } + ASSERT(elem->hdrInfo.tcpCsumNeeded == 0 && + elem->hdrInfo.udpCsumNeeded == 0); + /* + * Fow now, we will not do verification + * There is no correctness issue here. + * XXX + */ + /* + * calculate TCP/UDP pseudo checksum + */ + if (isRecv && csumInfo.Receive.TcpChecksumValueInvalid) { + /* + * Only this case, we need to reclaculate pseudo checksum + * all other cases, it is assumed the pseudo checksum is + * filled already. + * + */ + PTCP_HDR tcpHdr = (PTCP_HDR)(dst + hdrInfo->l4Offset); + if (hdrInfo->isIPv4) { + PIPV4_HEADER ipHdr = (PIPV4_HEADER)(dst + hdrInfo->l3Offset); + elem->hdrInfo.l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) - + (ipHdr->HeaderLength << 2)); + tcpHdr->th_sum = + IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress, + (UINT32 *)&ipHdr->DestinationAddress, + IPPROTO_TCP, elem->hdrInfo.l4PayLoad); + } else { + PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(dst + hdrInfo->l3Offset); + elem->hdrInfo.l4PayLoad = + (UINT16)(ntohs(ipv6Hdr->PayloadLength) + + hdrInfo->l3Offset + sizeof(IPV6_HEADER) - + hdrInfo->l4Offset); + ASSERT(hdrInfo->isIPv6); + tcpHdr->th_sum = + IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress, + (UINT32 *)&ipv6Hdr->DestinationAddress, + IPPROTO_TCP, elem->hdrInfo.l4PayLoad); + } + elem->hdrInfo.tcpCsumNeeded = 1; + ovsUserStats.recalTcpCsum++; + } else if (!isRecv) { + if (csumInfo.Transmit.TcpChecksum) { + elem->hdrInfo.tcpCsumNeeded = 1; + } else if (csumInfo.Transmit.UdpChecksum) { + elem->hdrInfo.udpCsumNeeded = 1; + } + if (elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) { +#ifdef DBG + UINT16 sum, *ptr; + UINT8 proto = + elem->hdrInfo.tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP; +#endif + if (hdrInfo->isIPv4) { + PIPV4_HEADER ipHdr = (PIPV4_HEADER)(dst + hdrInfo->l3Offset); + elem->hdrInfo.l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) - + (ipHdr->HeaderLength << 2)); +#ifdef DBG + sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress, + (UINT32 *)&ipHdr->DestinationAddress, + proto, elem->hdrInfo.l4PayLoad); +#endif + } else { + PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(dst + + hdrInfo->l3Offset); + elem->hdrInfo.l4PayLoad = + (UINT16)(ntohs(ipv6Hdr->PayloadLength) + + hdrInfo->l3Offset + sizeof(IPV6_HEADER) - + hdrInfo->l4Offset); + ASSERT(hdrInfo->isIPv6); +#ifdef DBG + sum = IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress, + (UINT32 *)&ipv6Hdr->DestinationAddress, + proto, elem->hdrInfo.l4PayLoad); +#endif + } +#ifdef DBG + ptr = (UINT16 *)(dst + hdrInfo->l4Offset + + (elem->hdrInfo.tcpCsumNeeded ? + TCP_CSUM_OFFSET : UDP_CSUM_OFFSET)); + ASSERT(*ptr == sum); +#endif + } + } + /* + * Finally insert VLAN tag + */ + if (extraLen) { + dst = elem->packet.data + userDataLen; + src = dst + extraLen; + ((UINT32 *)dst)[0] = ((UINT32 *)src)[0]; + ((UINT32 *)dst)[1] = ((UINT32 *)src)[1]; + ((UINT32 *)dst)[2] = ((UINT32 *)src)[2]; + dst += 12; + ((UINT16 *)dst)[0] = htons(0x8100); + ((UINT16 *)dst)[1] = htons(vlanInfo.TagHeader.VlanId | + (vlanInfo.TagHeader.UserPriority << 13)); + elem->hdrInfo.l3Offset += VLAN_TAG_SIZE; + elem->hdrInfo.l4Offset += VLAN_TAG_SIZE; + ovsUserStats.vlanInsert++; + } + + return elem; +} + + +VOID +OvsQueuePackets(UINT32 queueId, + PLIST_ENTRY packetList, + UINT32 numElems) +{ + POVS_USER_PACKET_QUEUE queue = OvsGetQueue(queueId); + POVS_PACKET_QUEUE_ELEM elem; + PIRP irp = NULL; + PLIST_ENTRY link; + UINT32 num = 0; + + OVS_LOG_LOUD("Enter: queueId %u, numELems: %u", + queueId, numElems); + if (queue == NULL) { + goto cleanup; + } + + NdisAcquireSpinLock(&queue->queueLock); + if (queue->instance == NULL) { + NdisReleaseSpinLock(&queue->queueLock); + goto cleanup; + } else { + OvsAppendList(&queue->packetList, packetList); + queue->numPackets += numElems; + } + if (queue->pendingIrp) { + PDRIVER_CANCEL cancelRoutine; + irp = queue->pendingIrp; + queue->pendingIrp = NULL; + cancelRoutine = IoSetCancelRoutine(irp, NULL); + if (cancelRoutine == NULL) { + irp = NULL; + } + } + NdisReleaseSpinLock(&queue->queueLock); + if (irp) { + OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS); + } + +cleanup: + while (!IsListEmpty(packetList)) { + link = RemoveHeadList(packetList); + elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link); + OvsFreeMemory(elem); + num++; + } + OVS_LOG_LOUD("Exit: drop %u packets", num); +} + + +/* + *---------------------------------------------------------------------------- + * OvsCreateAndAddPackets -- + * + * Create a packet and forwarded to user space. + * + * This function would fragment packet if needed, and queue + * each segment to user space. + *---------------------------------------------------------------------------- + */ +NTSTATUS +OvsCreateAndAddPackets(UINT32 queueId, + PVOID userData, + UINT32 userDataLen, + UINT32 cmd, + UINT32 inPort, + OvsIPv4TunnelKey *tunnelKey, + PNET_BUFFER_LIST nbl, + BOOLEAN isRecv, + POVS_PACKET_HDR_INFO hdrInfo, + POVS_SWITCH_CONTEXT switchContext, + LIST_ENTRY *list, + UINT32 *num) +{ + POVS_PACKET_QUEUE_ELEM elem; + PNET_BUFFER_LIST newNbl = NULL; + PNET_BUFFER nb; + + if (hdrInfo->isTcp) { + NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo; + UINT32 packetLength; + + tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo); + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + packetLength = NET_BUFFER_DATA_LENGTH(nb); + + OVS_LOG_TRACE("MSS %u packet len %u", + tsoInfo.LsoV1Transmit.MSS, packetLength); + if (tsoInfo.LsoV1Transmit.MSS) { + OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset); + newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo, + tsoInfo.LsoV1Transmit.MSS , 0); + if (newNbl == NULL) { + return NDIS_STATUS_FAILURE; + } + nbl = newNbl; + } + } + + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + while (nb) { + elem = OvsCreateQueuePacket(queueId, userData, userDataLen, + cmd, inPort, tunnelKey, nbl, nb, + isRecv, hdrInfo); + if (elem) { + InsertTailList(list, &elem->link); + (*num)++; + } + nb = NET_BUFFER_NEXT_NB(nb); + } + if (newNbl) { + OvsCompleteNBL(switchContext, newNbl, TRUE); + } + return NDIS_STATUS_SUCCESS; +} diff --git a/datapath-windows/ovsext/OvsUser.h b/datapath-windows/ovsext/OvsUser.h new file mode 100644 index 000000000..b1e6e1ef0 --- /dev/null +++ b/datapath-windows/ovsext/OvsUser.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This file contains structures and function definitions necessary for + * forwarding packet to user space. + */ + +#ifndef __OVS_USER_H_ +#define __OVS_USER_H_ 1 + +/* + * Even we have more cores, I don't think we need + * more than 32 queues for processing packets to + * userspace + */ +#define OVS_MAX_NUM_PACKET_QUEUES 32 +#define OVS_DEFAULT_PACKET_QUEUE 1 +#define OVS_MAX_PACKET_QUEUE_LEN 4096 + +/* + * Only when OVS_PER_VPORT_QUEUE_CTRL is defined + * we will apply this constraint + */ +#define OVS_MAX_PACKETS_PER_VPORT 128 +#define OVS_MAX_PACKETS_PER_TUNNEL 1024 + +typedef struct _OVS_USER_PACKET_QUEUE { + UINT32 queueId; + UINT32 numPackets; + LIST_ENTRY packetList; + PVOID instance; + PIRP pendingIrp; + NDIS_SPIN_LOCK queueLock; +} OVS_USER_PACKET_QUEUE, *POVS_USER_PACKET_QUEUE; + +typedef struct _OVS_PACKET_QUEUE_ELEM { + LIST_ENTRY link; + OVS_PACKET_HDR_INFO hdrInfo; + OVS_PACKET_INFO packet; +} OVS_PACKET_QUEUE_ELEM, *POVS_PACKET_QUEUE_ELEM; + +struct _OVS_OPEN_INSTANCE; + +typedef struct _OVS_USER_STATS { + UINT64 miss; + UINT64 action; + UINT32 dropDuetoResource; + UINT32 dropDuetoChecksum; + UINT32 ipCsum; + UINT32 recalTcpCsum; + UINT32 vlanInsert; + UINT32 l4Csum; +} OVS_USER_STATS, *POVS_USER_STATS; + + +NTSTATUS OvsUserInit(); +VOID OvsUserCleanup(); + +VOID OvsCleanupPacketQueue(struct _OVS_OPEN_INSTANCE *instance); + +POVS_PACKET_QUEUE_ELEM OvsCreateQueuePacket(UINT32 queueId, + PVOID userData, + UINT32 userDataLen, + UINT32 cmd, UINT32 inPort, + OvsIPv4TunnelKey *tunnelKey, + PNET_BUFFER_LIST nbl, + PNET_BUFFER nb, + BOOLEAN isRecv, + POVS_PACKET_HDR_INFO hdrInfo); + +VOID OvsQueuePackets(UINT32 queueId, PLIST_ENTRY packetList, + UINT32 numElems); +NTSTATUS OvsCreateAndAddPackets(UINT32 queueId, + PVOID userData, + UINT32 userDataLen, + UINT32 cmd, + UINT32 inPort, + OvsIPv4TunnelKey *tunnelKey, + PNET_BUFFER_LIST nbl, + BOOLEAN isRecv, + POVS_PACKET_HDR_INFO hdrInfo, + POVS_SWITCH_CONTEXT switchContext, + LIST_ENTRY *list, + UINT32 *num); + +NTSTATUS OvsSubscribeDpIoctl(PFILE_OBJECT fileObject, + PVOID inputBuffer, + UINT32 inputLength); + +NTSTATUS OvsReadDpIoctl(PFILE_OBJECT fileObject, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen); +NTSTATUS OvsExecuteDpIoctl(PVOID inputBuffer, + UINT32 inputLength, + UINT32 outputLength); +NTSTATUS OvsPurgeDpIoctl(PFILE_OBJECT fileObject); + +NTSTATUS OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject); + +#endif /* __OVS_USER_H_ */ diff --git a/datapath-windows/ovsext/OvsUtil.c b/datapath-windows/ovsext/OvsUtil.c new file mode 100644 index 000000000..e70f9a1fc --- /dev/null +++ b/datapath-windows/ovsext/OvsUtil.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_OTHERS + +#include "OvsDebug.h" + +extern NDIS_HANDLE gOvsExtDriverHandle; + +VOID * +OvsAllocateMemory(size_t size) +{ + OVS_VERIFY_IRQL_LE(DISPATCH_LEVEL); + return NdisAllocateMemoryWithTagPriority(gOvsExtDriverHandle, + (UINT32)size, OVS_MEMORY_TAG, NormalPoolPriority); +} + +VOID * +OvsAllocateAlignedMemory(size_t size, UINT16 align) +{ + OVS_VERIFY_IRQL_LE(DISPATCH_LEVEL); + + ASSERT((align == 8) || (align == 16)); + + if ((align == 8) || (align == 16)) { + /* + * XXX: NdisAllocateMemory*() functions don't talk anything about + * alignment. Hence using ExAllocatePool*(); + */ + return (VOID *)ExAllocatePoolWithTagPriority(NonPagedPool, size, + OVS_MEMORY_TAG, + NormalPoolPriority); + } + + /* Invalid user input. */ + return NULL; +} + +VOID +OvsFreeMemory(VOID *ptr) +{ + ASSERT(ptr); + NdisFreeMemoryWithTagPriority(gOvsExtDriverHandle, ptr, OVS_MEMORY_TAG); +} + +VOID +OvsFreeAlignedMemory(VOID *ptr) +{ + ASSERT(ptr); + ExFreePoolWithTag(ptr, OVS_MEMORY_TAG); +} + +VOID +OvsAppendList(PLIST_ENTRY dst, PLIST_ENTRY src) +{ + PLIST_ENTRY srcFirst, srcLast, dstLast; + if (IsListEmpty(src)) { + return; + } + srcFirst = src->Flink; + srcLast = src->Blink; + dstLast = dst->Blink; + + dstLast->Flink = srcFirst; + srcFirst->Blink = dstLast; + + srcLast->Flink = dst; + dst->Blink = srcLast; + + src->Flink = src; + src->Blink = src; +} diff --git a/datapath-windows/ovsext/OvsUtil.h b/datapath-windows/ovsext/OvsUtil.h new file mode 100644 index 000000000..bff06b80d --- /dev/null +++ b/datapath-windows/ovsext/OvsUtil.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_UTIL_H_ +#define __OVS_UTIL_H_ 1 + +#define OVS_MEMORY_TAG 'TSVO' +#define OVS_FIX_SIZE_NBL_POOL_TAG 'FSVO' +#define OVS_VARIABLE_SIZE_NBL_POOL_TAG 'VSVO' +#define OVS_NBL_ONLY_POOL_TAG 'OSVO' +#define OVS_NET_BUFFER_POOL_TAG 'NSVO' +#define OVS_OTHER_POOL_TAG 'MSVO' + +VOID *OvsAllocateMemory(size_t size); +VOID *OvsAllocateAlignedMemory(size_t size, UINT16 align); +VOID OvsFreeMemory(VOID *ptr); +VOID OvsFreeAlignedMemory(VOID *ptr); + +#define LIST_FORALL(_headPtr, _itemPtr) \ + for (_itemPtr = (_headPtr)->Flink; \ + _itemPtr != _headPtr; \ + _itemPtr = (_itemPtr)->Flink) + +#define LIST_FORALL_SAFE(_headPtr, _itemPtr, _nextPtr) \ + for (_itemPtr = (_headPtr)->Flink, _nextPtr = (_itemPtr)->Flink; \ + _itemPtr != _headPtr; \ + _itemPtr = _nextPtr, _nextPtr = (_itemPtr)->Flink) + +#define LIST_FORALL_REVERSE(_headPtr, _itemPtr) \ + for (_itemPtr = (_headPtr)->Blink; \ + _itemPtr != _headPtr; \ + _itemPtr = (_itemPtr)->Blink) + +#define LIST_FORALL_REVERSE_SAFE(_headPtr, _itemPtr, _nextPtr) \ + for (_itemPtr = (_headPtr)->Blink, _nextPtr = (_itemPtr)->Blink; \ + _itemPtr != _headPtr; \ + _itemPtr = _nextPtr, _nextPtr = (_itemPtr)->Blink) + +VOID OvsAppendList(PLIST_ENTRY dst, PLIST_ENTRY src); + + +#define MIN(_a, _b) (_a) > (_b) ? (_b) : (_a) +#define ARRAY_SIZE(_x) ((sizeof(_x))/sizeof (_x)[0]) +#define OVS_SWITCH_PORT_ID_INVALID (NDIS_SWITCH_PORT_ID)(-1) + +#ifndef htons +#define htons(_x) _byteswap_ushort((USHORT)(_x)) +#define ntohs(_x) _byteswap_ushort((USHORT)(_x)) +#define htonl(_x) _byteswap_ulong((ULONG)(_x)) +#define ntohl(_x) _byteswap_ulong((ULONG)(_x)) +#endif + +#define OVS_INIT_OBJECT_HEADER(_obj, _type, _revision, _size) \ + { \ + PNDIS_OBJECT_HEADER hdrp = _obj; \ + hdrp->Type = _type; \ + hdrp->Revision = _revision; \ + hdrp->Size = _size; \ + } + + +#define BIT16(_x) ((UINT16)0x1 << (_x)) +#define BIT32(_x) ((UINT32)0x1 << (_x)) + +#endif /* __OVS_UTIL_H_ */ diff --git a/datapath-windows/ovsext/OvsVport.c b/datapath-windows/ovsext/OvsVport.c new file mode 100644 index 000000000..35bdaea7b --- /dev/null +++ b/datapath-windows/ovsext/OvsVport.c @@ -0,0 +1,1416 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#include "OvsIoctl.h" +#include "OvsJhash.h" +#include "OvsSwitch.h" +#include "OvsVport.h" +#include "OvsEvent.h" +#include "OvsUser.h" +#include "OvsVxlan.h" +#include "OvsIpHelper.h" +#include "OvsOid.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_VPORT +#include "OvsDebug.h" + +#define VPORT_NIC_ENTER(_nic) \ + OVS_LOG_TRACE("Enter: PortId: %x, NicIndex: %d", _nic->PortId, \ + _nic->NicIndex) + +#define VPORT_NIC_EXIT(_nic) \ + OVS_LOG_TRACE("Exit: PortId: %x, NicIndex: %d", _nic->PortId, \ + _nic->NicIndex) + +#define VPORT_PORT_ENTER(_port) \ + OVS_LOG_TRACE("Enter: PortId: %x", _port->PortId) + +#define VPORT_PORT_EXIT(_port) \ + OVS_LOG_TRACE("Exit: PortId: %x", _port->PortId) + +#define OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC 100 + +extern POVS_SWITCH_CONTEXT gOvsSwitchContext; +extern PNDIS_SPIN_LOCK gOvsCtrlLock; + +static UINT32 OvsGetVportNo(POVS_SWITCH_CONTEXT switchContext, UINT32 nicIndex, + OVS_VPORT_TYPE ovsType); +static POVS_VPORT_ENTRY OvsAllocateVport(VOID); +static VOID OvsInitVportWithPortParam(POVS_VPORT_ENTRY vport, + PNDIS_SWITCH_PORT_PARAMETERS portParam); +static VOID OvsInitVportWithNicParam(POVS_SWITCH_CONTEXT switchContext, + POVS_VPORT_ENTRY vport, PNDIS_SWITCH_NIC_PARAMETERS nicParam); +static VOID OvsInitPhysNicVport(POVS_VPORT_ENTRY vport, POVS_VPORT_ENTRY + virtVport, UINT32 nicIndex); +static VOID OvsInitPhysNicVport(POVS_VPORT_ENTRY vport, POVS_VPORT_ENTRY + virtVport, UINT32 nicIndex); +static NDIS_STATUS OvsInitVportCommon(POVS_SWITCH_CONTEXT switchContext, + POVS_VPORT_ENTRY vport); +static VOID OvsRemoveAndDeleteVport(POVS_SWITCH_CONTEXT switchContext, + POVS_VPORT_ENTRY vport); +static __inline VOID OvsWaitActivate(POVS_SWITCH_CONTEXT switchContext, + ULONG sleepMicroSec); + +/* + * Functions implemented in relaton to NDIS port manipulation. + */ +NDIS_STATUS +OvsCreatePort(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_PORT_PARAMETERS portParam) +{ + POVS_VPORT_ENTRY vport; + LOCK_STATE_EX lockState; + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + + VPORT_PORT_ENTER(portParam); + + NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); + vport = OvsFindVportByPortIdAndNicIndex(switchContext, + portParam->PortId, 0); + if (vport != NULL) { + status = STATUS_DATA_NOT_ACCEPTED; + goto create_port_done; + } + vport = (POVS_VPORT_ENTRY)OvsAllocateVport(); + if (vport == NULL) { + status = NDIS_STATUS_RESOURCES; + goto create_port_done; + } + OvsInitVportWithPortParam(vport, portParam); + OvsInitVportCommon(switchContext, vport); + +create_port_done: + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + VPORT_PORT_EXIT(portParam); + return status; +} + +VOID +OvsTeardownPort(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_PORT_PARAMETERS portParam) +{ + POVS_VPORT_ENTRY vport; + LOCK_STATE_EX lockState; + + VPORT_PORT_ENTER(portParam); + + NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); + vport = OvsFindVportByPortIdAndNicIndex(switchContext, + portParam->PortId, 0); + if (vport) { + /* add assertion here + */ + vport->portState = NdisSwitchPortStateTeardown; + vport->ovsState = OVS_STATE_PORT_TEAR_DOWN; + } else { + OVS_LOG_WARN("Vport not present."); + } + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + + VPORT_PORT_EXIT(portParam); +} + + + +VOID +OvsDeletePort(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_PORT_PARAMETERS portParam) +{ + POVS_VPORT_ENTRY vport; + LOCK_STATE_EX lockState; + + VPORT_PORT_ENTER(portParam); + + NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); + vport = OvsFindVportByPortIdAndNicIndex(switchContext, + portParam->PortId, 0); + if (vport) { + OvsRemoveAndDeleteVport(switchContext, vport); + } else { + OVS_LOG_WARN("Vport not present."); + } + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + + VPORT_PORT_EXIT(portParam); +} + + +/* + * Functions implemented in relaton to NDIS NIC manipulation. + */ +NDIS_STATUS +OvsCreateNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam) +{ + POVS_VPORT_ENTRY vport; + UINT32 portNo = 0; + UINT32 event = 0; + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + + LOCK_STATE_EX lockState; + + VPORT_NIC_ENTER(nicParam); + + /* Wait for lists to be initialized. */ + OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC); + + if (!switchContext->isActivated) { + OVS_LOG_WARN("Switch is not activated yet."); + /* Veto the creation of nic */ + status = NDIS_STATUS_NOT_SUPPORTED; + goto done; + } + + NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); + vport = OvsFindVportByPortIdAndNicIndex(switchContext, nicParam->PortId, 0); + if (vport == NULL) { + OVS_LOG_ERROR("Create NIC without Switch Port," + " PortId: %x, NicIndex: %d", + nicParam->PortId, nicParam->NicIndex); + status = NDIS_STATUS_INVALID_PARAMETER; + goto add_nic_done; + } + + if (nicParam->NicType == NdisSwitchNicTypeExternal && + nicParam->NicIndex != 0) { + POVS_VPORT_ENTRY virtVport = + (POVS_VPORT_ENTRY)switchContext->externalVport; + vport = (POVS_VPORT_ENTRY)OvsAllocateVport(); + if (vport == NULL) { + status = NDIS_STATUS_RESOURCES; + goto add_nic_done; + } + OvsInitPhysNicVport(vport, virtVport, nicParam->NicIndex); + status = OvsInitVportCommon(switchContext, vport); + if (status != NDIS_STATUS_SUCCESS) { + OvsFreeMemory(vport); + goto add_nic_done; + } + } + OvsInitVportWithNicParam(switchContext, vport, nicParam); + portNo = vport->portNo; + if (vport->ovsState == OVS_STATE_CONNECTED) { + event = OVS_EVENT_CONNECT | OVS_EVENT_LINK_UP; + } else if (vport->ovsState == OVS_STATE_NIC_CREATED) { + event = OVS_EVENT_CONNECT; + } + +add_nic_done: + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + if (portNo && event) { + OvsPostEvent(portNo, event); + } + +done: + VPORT_NIC_EXIT(nicParam); + OVS_LOG_TRACE("Exit: status %8x.\n", status); + + return status; +} + + +/* Mark already created NIC as connected. */ +VOID +OvsConnectNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam) +{ + LOCK_STATE_EX lockState; + POVS_VPORT_ENTRY vport; + UINT32 portNo = 0; + + VPORT_NIC_ENTER(nicParam); + + /* Wait for lists to be initialized. */ + OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC); + + if (!switchContext->isActivated) { + OVS_LOG_WARN("Switch is not activated yet."); + goto done; + } + + NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); + vport = OvsFindVportByPortIdAndNicIndex(switchContext, + nicParam->PortId, + nicParam->NicIndex); + + if (!vport) { + OVS_LOG_WARN("Vport not present."); + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + ASSERT(0); + goto done; + } + + vport->ovsState = OVS_STATE_CONNECTED; + vport->nicState = NdisSwitchNicStateConnected; + portNo = vport->portNo; + + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + + OvsPostEvent(portNo, OVS_EVENT_LINK_UP); + + if (nicParam->NicType == NdisSwitchNicTypeInternal) { + OvsInternalAdapterUp(portNo, &nicParam->NetCfgInstanceId); + } + +done: + VPORT_NIC_EXIT(nicParam); +} + +VOID +OvsUpdateNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam) +{ + POVS_VPORT_ENTRY vport; + LOCK_STATE_EX lockState; + + UINT32 status = 0, portNo = 0; + + VPORT_NIC_ENTER(nicParam); + + /* Wait for lists to be initialized. */ + OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC); + + if (!switchContext->isActivated) { + OVS_LOG_WARN("Switch is not activated yet."); + goto update_nic_done; + } + + NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); + vport = OvsFindVportByPortIdAndNicIndex(switchContext, + nicParam->PortId, + nicParam->NicIndex); + if (vport == NULL) { + OVS_LOG_WARN("Vport search failed."); + goto update_nic_done; + } + switch (nicParam->NicType) { + case NdisSwitchNicTypeExternal: + case NdisSwitchNicTypeInternal: + RtlCopyMemory(&vport->netCfgInstanceId, &nicParam->NetCfgInstanceId, + sizeof (GUID)); + break; + case NdisSwitchNicTypeSynthetic: + case NdisSwitchNicTypeEmulated: + if (!RtlEqualMemory(vport->vmMacAddress, nicParam->VMMacAddress, + sizeof (vport->vmMacAddress))) { + status |= OVS_EVENT_MAC_CHANGE; + RtlCopyMemory(vport->vmMacAddress, nicParam->VMMacAddress, + sizeof (vport->vmMacAddress)); + } + break; + default: + ASSERT(0); + } + if (!RtlEqualMemory(vport->permMacAddress, nicParam->PermanentMacAddress, + sizeof (vport->permMacAddress))) { + RtlCopyMemory(vport->permMacAddress, nicParam->PermanentMacAddress, + sizeof (vport->permMacAddress)); + status |= OVS_EVENT_MAC_CHANGE; + } + if (!RtlEqualMemory(vport->currMacAddress, nicParam->CurrentMacAddress, + sizeof (vport->currMacAddress))) { + RtlCopyMemory(vport->currMacAddress, nicParam->CurrentMacAddress, + sizeof (vport->currMacAddress)); + status |= OVS_EVENT_MAC_CHANGE; + } + + if (vport->mtu != nicParam->MTU) { + vport->mtu = nicParam->MTU; + status |= OVS_EVENT_MTU_CHANGE; + } + vport->numaNodeId = nicParam->NumaNodeId; + portNo = vport->portNo; + + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + if (status && portNo) { + OvsPostEvent(portNo, status); + } +update_nic_done: + VPORT_NIC_EXIT(nicParam); +} + + +VOID +OvsDisconnectNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam) +{ + POVS_VPORT_ENTRY vport; + UINT32 portNo = 0; + LOCK_STATE_EX lockState; + BOOLEAN isInternalPort = FALSE; + + VPORT_NIC_ENTER(nicParam); + + /* Wait for lists to be initialized. */ + OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC); + + if (!switchContext->isActivated) { + OVS_LOG_WARN("Switch is not activated yet."); + goto done; + } + + NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); + vport = OvsFindVportByPortIdAndNicIndex(switchContext, + nicParam->PortId, + nicParam->NicIndex); + + if (!vport) { + OVS_LOG_WARN("Vport not present."); + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + goto done; + } + + vport->nicState = NdisSwitchNicStateDisconnected; + vport->ovsState = OVS_STATE_NIC_CREATED; + portNo = vport->portNo; + + if (vport->ovsType == OVSWIN_VPORT_TYPE_INTERNAL) { + isInternalPort = TRUE; + } + + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + + OvsPostEvent(portNo, OVS_EVENT_LINK_DOWN); + + if (isInternalPort) { + OvsInternalAdapterDown(); + } + +done: + VPORT_NIC_EXIT(nicParam); +} + + +VOID +OvsDeleteNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam) +{ + LOCK_STATE_EX lockState; + POVS_VPORT_ENTRY vport; + UINT32 portNo = 0; + + VPORT_NIC_ENTER(nicParam); + /* Wait for lists to be initialized. */ + OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC); + + if (!switchContext->isActivated) { + OVS_LOG_WARN("Switch is not activated yet."); + goto done; + } + + NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); + vport = OvsFindVportByPortIdAndNicIndex(switchContext, + nicParam->PortId, + nicParam->NicIndex); + + if (!vport) { + OVS_LOG_WARN("Vport not present."); + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + goto done; + } + + portNo = vport->portNo; + if (vport->portType == NdisSwitchPortTypeExternal && + vport->nicIndex != 0) { + OvsRemoveAndDeleteVport(switchContext, vport); + } + vport->nicState = NdisSwitchNicStateUnknown; + vport->ovsState = OVS_STATE_PORT_CREATED; + + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + OvsPostEvent(portNo, OVS_EVENT_DISCONNECT); + +done: + VPORT_NIC_EXIT(nicParam); +} + + +/* + * OVS Vport related functionality. + */ +POVS_VPORT_ENTRY +OvsFindVportByPortNo(POVS_SWITCH_CONTEXT switchContext, + UINT32 portNo) +{ + if (OVS_VPORT_INDEX(portNo) < OVS_MAX_VPORT_ARRAY_SIZE) { + if (OVS_IS_VPORT_ENTRY_NULL(switchContext, OVS_VPORT_INDEX(portNo))) { + return NULL; + } else { + POVS_VPORT_ENTRY vport; + vport = (POVS_VPORT_ENTRY) + switchContext->vportArray[OVS_VPORT_INDEX(portNo)]; + return vport->portNo == portNo ? vport : NULL; + } + } + return NULL; +} + + +POVS_VPORT_ENTRY +OvsFindVportByOvsName(POVS_SWITCH_CONTEXT switchContext, + CHAR *name, + UINT32 length) +{ + POVS_VPORT_ENTRY vport; + PLIST_ENTRY head, link; + UINT32 hash = OvsJhashBytes((const VOID *)name, length, OVS_HASH_BASIS); + head = &(switchContext->nameHashArray[hash & OVS_VPORT_MASK]); + LIST_FORALL(head, link) { + vport = CONTAINING_RECORD(link, OVS_VPORT_ENTRY, nameLink); + if (vport->ovsNameLen == length && + RtlEqualMemory(name, vport->ovsName, length)) { + return vport; + } + } + return NULL; +} + +POVS_VPORT_ENTRY +OvsFindVportByPortIdAndNicIndex(POVS_SWITCH_CONTEXT switchContext, + NDIS_SWITCH_PORT_ID portId, + NDIS_SWITCH_NIC_INDEX index) +{ + if (portId == switchContext->externalPortId) { + if (index == 0) { + return (POVS_VPORT_ENTRY)switchContext->externalVport; + } else if (index > OVS_MAX_PHYS_ADAPTERS) { + return NULL; + } + if (OVS_IS_VPORT_ENTRY_NULL(switchContext, + index + OVS_EXTERNAL_VPORT_START)) { + return NULL; + } else { + return (POVS_VPORT_ENTRY)switchContext->vportArray[ + index + OVS_EXTERNAL_VPORT_START]; + } + } else if (switchContext->internalPortId == portId) { + return (POVS_VPORT_ENTRY)switchContext->internalVport; + } else { + PLIST_ENTRY head, link; + POVS_VPORT_ENTRY vport; + UINT32 hash; + hash = OvsJhashWords((UINT32 *)&portId, 1, OVS_HASH_BASIS); + head = &(switchContext->portHashArray[hash & OVS_VPORT_MASK]); + LIST_FORALL(head, link) { + vport = CONTAINING_RECORD(link, OVS_VPORT_ENTRY, portLink); + if (portId == vport->portId && index == vport->nicIndex) { + return vport; + } + } + return NULL; + } +} + +static UINT32 +OvsGetVportNo(POVS_SWITCH_CONTEXT switchContext, + UINT32 nicIndex, + OVS_VPORT_TYPE ovsType) +{ + UINT32 index = 0xffffff, i = 0; + UINT64 gen; + + switch (ovsType) { + case OVSWIN_VPORT_TYPE_EXTERNAL: + if (nicIndex == 0) { + return 0; // not a valid portNo + } else if (nicIndex > OVS_MAX_PHYS_ADAPTERS) { + return 0; + } else { + index = nicIndex + OVS_EXTERNAL_VPORT_START; + } + break; + case OVSWIN_VPORT_TYPE_INTERNAL: + index = OVS_INTERNAL_VPORT_DEFAULT_INDEX; + break; + case OVSWIN_VPORT_TYPE_SYNTHETIC: + case OVSWIN_VPORT_TYPE_EMULATED: + index = switchContext->lastPortIndex + 1; + if (index == OVS_MAX_VPORT_ARRAY_SIZE) { + index = OVS_VM_VPORT_START; + } + while (!OVS_IS_VPORT_ENTRY_NULL(switchContext, index) && + i < (OVS_MAX_VPORT_ARRAY_SIZE - OVS_VM_VPORT_START)) { + index++; + i++; + if (index == OVS_MAX_VPORT_ARRAY_SIZE) { + index = OVS_VM_VPORT_START; + } + } + if (i == (OVS_MAX_VPORT_ARRAY_SIZE - OVS_VM_VPORT_START)) { + return 0; // not available + } + switchContext->lastPortIndex = index; + break; + case OVSWIN_VPORT_TYPE_GRE: + index = OVS_GRE_VPORT_INDEX; + break; + case OVSWIN_VPORT_TYPE_GRE64: + index = OVS_GRE64_VPORT_INDEX; + break; + case OVSWIN_VPORT_TYPE_VXLAN: + index = OVS_VXLAN_VPORT_INDEX; + break; + case OVSWIN_VPORT_TYPE_LOCAL: + default: + ASSERT(0); + } + if (index > OVS_MAX_VPORT_ARRAY_SIZE) { + return 0; + } + gen = (UINT64)switchContext->vportArray[index]; + if (gen > 0xff) { + return 0; + } else if (gen == 0) { + gen++; + } + return OVS_VPORT_PORT_NO(index, (UINT32)gen); +} + + +static POVS_VPORT_ENTRY +OvsAllocateVport(VOID) +{ + POVS_VPORT_ENTRY vport; + vport = (POVS_VPORT_ENTRY)OvsAllocateMemory(sizeof (OVS_VPORT_ENTRY)); + if (vport == NULL) { + return NULL; + } + RtlZeroMemory(vport, sizeof (OVS_VPORT_ENTRY)); + vport->ovsState = OVS_STATE_UNKNOWN; + return vport; +} + +static VOID +OvsInitVportWithPortParam(POVS_VPORT_ENTRY vport, + PNDIS_SWITCH_PORT_PARAMETERS portParam) +{ + vport->isValidationPort = portParam->IsValidationPort; + vport->portType = portParam->PortType; + vport->portState = portParam->PortState; + vport->portId = portParam->PortId; + vport->nicState = NdisSwitchNicStateUnknown; + + switch (vport->portType) { + case NdisSwitchPortTypeExternal: + vport->ovsType = OVSWIN_VPORT_TYPE_EXTERNAL; + break; + case NdisSwitchPortTypeInternal: + vport->ovsType = OVSWIN_VPORT_TYPE_INTERNAL; + break; + case NdisSwitchPortTypeSynthetic: + vport->ovsType = OVSWIN_VPORT_TYPE_SYNTHETIC; + break; + case NdisSwitchPortTypeEmulated: + vport->ovsType = OVSWIN_VPORT_TYPE_EMULATED; + break; + } + RtlCopyMemory(&vport->portName, &portParam->PortName, + sizeof (NDIS_SWITCH_PORT_NAME)); + switch (vport->portState) { + case NdisSwitchPortStateCreated: + vport->ovsState = OVS_STATE_PORT_CREATED; + break; + case NdisSwitchPortStateTeardown: + vport->ovsState = OVS_STATE_PORT_TEAR_DOWN; + break; + case NdisSwitchPortStateDeleted: + vport->ovsState = OVS_STATE_PORT_DELETED; + break; + } +} + + +static VOID +OvsInitVportWithNicParam(POVS_SWITCH_CONTEXT switchContext, + POVS_VPORT_ENTRY vport, + PNDIS_SWITCH_NIC_PARAMETERS nicParam) +{ + ASSERT(vport->portId == nicParam->PortId); + ASSERT(vport->ovsState == OVS_STATE_PORT_CREATED); + + UNREFERENCED_PARAMETER(switchContext); + + RtlCopyMemory(vport->permMacAddress, nicParam->PermanentMacAddress, + sizeof (nicParam->PermanentMacAddress)); + RtlCopyMemory(vport->currMacAddress, nicParam->CurrentMacAddress, + sizeof (nicParam->CurrentMacAddress)); + + if (nicParam->NicType == NdisSwitchNicTypeSynthetic || + nicParam->NicType == NdisSwitchNicTypeEmulated) { + RtlCopyMemory(vport->vmMacAddress, nicParam->VMMacAddress, + sizeof (nicParam->VMMacAddress)); + RtlCopyMemory(&vport->vmName, &nicParam->VmName, + sizeof (nicParam->VmName)); + } else { + RtlCopyMemory(&vport->netCfgInstanceId, &nicParam->NetCfgInstanceId, + sizeof (nicParam->NetCfgInstanceId)); + } + RtlCopyMemory(&vport->nicName, &nicParam->NicName, + sizeof (nicParam->NicName)); + vport->mtu = nicParam->MTU; + vport->nicState = nicParam->NicState; + vport->nicIndex = nicParam->NicIndex; + vport->numaNodeId = nicParam->NumaNodeId; + + switch (vport->nicState) { + case NdisSwitchNicStateCreated: + vport->ovsState = OVS_STATE_NIC_CREATED; + break; + case NdisSwitchNicStateConnected: + vport->ovsState = OVS_STATE_CONNECTED; + break; + case NdisSwitchNicStateDisconnected: + vport->ovsState = OVS_STATE_NIC_CREATED; + break; + case NdisSwitchNicStateDeleted: + vport->ovsState = OVS_STATE_PORT_CREATED; + break; + } +} + +static VOID +OvsInitPhysNicVport(POVS_VPORT_ENTRY vport, + POVS_VPORT_ENTRY virtVport, + UINT32 nicIndex) +{ + vport->isValidationPort = virtVport->isValidationPort; + vport->portType = virtVport->portType; + vport->portState = virtVport->portState; + vport->portId = virtVport->portId; + vport->nicState = NdisSwitchNicStateUnknown; + vport->ovsType = OVSWIN_VPORT_TYPE_EXTERNAL; + vport->nicIndex = (NDIS_SWITCH_NIC_INDEX)nicIndex; + RtlCopyMemory(&vport->portName, &virtVport->portName, + sizeof (NDIS_SWITCH_PORT_NAME)); + vport->ovsState = OVS_STATE_PORT_CREATED; +} +static NDIS_STATUS +OvsInitVportCommon(POVS_SWITCH_CONTEXT switchContext, +POVS_VPORT_ENTRY vport) +{ + UINT32 hash; + size_t len; + if (vport->portType != NdisSwitchPortTypeExternal || + vport->nicIndex != 0) { + vport->portNo = OvsGetVportNo(switchContext, vport->nicIndex, + vport->ovsType); + if (vport->portNo == 0) { + return NDIS_STATUS_RESOURCES; + } + ASSERT(OVS_IS_VPORT_ENTRY_NULL(switchContext, + OVS_VPORT_INDEX(vport->portNo))); + + switchContext->vportArray[OVS_VPORT_INDEX(vport->portNo)] = vport; + } + switch (vport->portType) { + case NdisSwitchPortTypeExternal: + if (vport->nicIndex == 0) { + switchContext->externalPortId = vport->portId; + switchContext->externalVport = vport; + RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, + "external.virtualAdapter"); + } + else { + switchContext->numPhysicalNics++; + RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, + "external.%lu", (UINT32)vport->nicIndex); + } + break; + case NdisSwitchPortTypeInternal: + switchContext->internalPortId = vport->portId; + switchContext->internalVport = vport; + RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, + "internal"); + break; + case NdisSwitchPortTypeSynthetic: + RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, + "vmNICSyn.%lx", vport->portNo); + break; + case NdisSwitchPortTypeEmulated: + RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, + "vmNICEmu.%lx", vport->portNo); + break; + } + StringCbLengthA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, &len); + vport->ovsNameLen = (UINT32)len; + if (vport->portType == NdisSwitchPortTypeExternal && + vport->nicIndex == 0) { + return NDIS_STATUS_SUCCESS; + } + hash = OvsJhashBytes(vport->ovsName, vport->ovsNameLen, OVS_HASH_BASIS); + InsertHeadList(&switchContext->nameHashArray[hash & OVS_VPORT_MASK], + &vport->nameLink); + hash = OvsJhashWords(&vport->portId, 1, OVS_HASH_BASIS); + InsertHeadList(&switchContext->portHashArray[hash & OVS_VPORT_MASK], + &vport->portLink); + switchContext->numVports++; + return NDIS_STATUS_SUCCESS; +} + + +static VOID +OvsRemoveAndDeleteVport(POVS_SWITCH_CONTEXT switchContext, + POVS_VPORT_ENTRY vport) +{ + UINT64 gen = vport->portNo >> 24; + switch (vport->ovsType) { + case OVSWIN_VPORT_TYPE_EXTERNAL: + if (vport->nicIndex == 0) { + ASSERT(switchContext->numPhysicalNics == 0); + switchContext->externalPortId = 0; + switchContext->externalVport = NULL; + OvsFreeMemory(vport); + return; + } else { + ASSERT(switchContext->numPhysicalNics); + switchContext->numPhysicalNics--; + } + break; + case OVSWIN_VPORT_TYPE_INTERNAL: + switchContext->internalPortId = 0; + switchContext->internalVport = NULL; + OvsInternalAdapterDown(); + break; + case OVSWIN_VPORT_TYPE_VXLAN: + OvsCleanupVxlanTunnel(vport); + break; + case OVSWIN_VPORT_TYPE_GRE: + case OVSWIN_VPORT_TYPE_GRE64: + break; + case OVSWIN_VPORT_TYPE_EMULATED: + case OVSWIN_VPORT_TYPE_SYNTHETIC: + default: + break; + } + + RemoveEntryList(&vport->nameLink); + RemoveEntryList(&vport->portLink); + gen = (gen + 1) & 0xff; + switchContext->vportArray[OVS_VPORT_INDEX(vport->portNo)] = + (PVOID)(UINT64)gen; + switchContext->numVports--; + OvsFreeMemory(vport); +} + + +NDIS_STATUS +OvsAddConfiguredSwitchPorts(POVS_SWITCH_CONTEXT switchContext) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + ULONG arrIndex; + PNDIS_SWITCH_PORT_PARAMETERS portParam; + PNDIS_SWITCH_PORT_ARRAY portArray = NULL; + POVS_VPORT_ENTRY vport; + + OVS_LOG_TRACE("Enter: switchContext:%p", switchContext); + + status = OvsGetPortsOnSwitch(switchContext, &portArray); + if (status != NDIS_STATUS_SUCCESS) { + goto cleanup; + } + + for (arrIndex = 0; arrIndex < portArray->NumElements; arrIndex++) { + portParam = NDIS_SWITCH_PORT_AT_ARRAY_INDEX(portArray, arrIndex); + vport = (POVS_VPORT_ENTRY)OvsAllocateVport(); + if (vport == NULL) { + status = NDIS_STATUS_RESOURCES; + goto cleanup; + } + OvsInitVportWithPortParam(vport, portParam); + status = OvsInitVportCommon(switchContext, vport); + if (status != NDIS_STATUS_SUCCESS) { + OvsFreeMemory(vport); + goto cleanup; + } + } +cleanup: + if (status != NDIS_STATUS_SUCCESS) { + OvsClearAllSwitchVports(switchContext); + } + + if (portArray != NULL) { + OvsFreeMemory(portArray); + } + OVS_LOG_TRACE("Exit: status: %x", status); + return status; +} + + +NDIS_STATUS +OvsInitConfiguredSwitchNics(POVS_SWITCH_CONTEXT switchContext) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + PNDIS_SWITCH_NIC_ARRAY nicArray = NULL; + ULONG arrIndex; + PNDIS_SWITCH_NIC_PARAMETERS nicParam; + POVS_VPORT_ENTRY vport; + + OVS_LOG_TRACE("Enter: switchContext: %p", switchContext); + /* + * Now, get NIC list. + */ + status = OvsGetNicsOnSwitch(switchContext, &nicArray); + if (status != NDIS_STATUS_SUCCESS) { + goto cleanup; + } + for (arrIndex = 0; arrIndex < nicArray->NumElements; ++arrIndex) { + + nicParam = NDIS_SWITCH_NIC_AT_ARRAY_INDEX(nicArray, arrIndex); + + /* + * XXX: Check if the port is configured with a VLAN. Disallow such a + * configuration, since we don't support tag-in-tag. + */ + + /* + * XXX: Check if the port is connected to a VF. Disconnect the VF in + * such a case. + */ + + if (nicParam->NicType == NdisSwitchNicTypeExternal && + nicParam->NicIndex != 0) { + POVS_VPORT_ENTRY virtVport = + (POVS_VPORT_ENTRY)switchContext->externalVport; + vport = OvsAllocateVport(); + if (vport) { + OvsInitPhysNicVport(vport, virtVport, nicParam->NicIndex); + status = OvsInitVportCommon(switchContext, vport); + if (status != NDIS_STATUS_SUCCESS) { + OvsFreeMemory(vport); + vport = NULL; + } + } + } else { + vport = OvsFindVportByPortIdAndNicIndex(switchContext, + nicParam->PortId, + nicParam->NicIndex); + } + if (vport == NULL) { + OVS_LOG_ERROR("Fail to allocate vport"); + continue; + } + OvsInitVportWithNicParam(switchContext, vport, nicParam); + if (nicParam->NicType == NdisSwitchNicTypeInternal) { + OvsInternalAdapterUp(vport->portNo, &nicParam->NetCfgInstanceId); + } + } +cleanup: + + if (nicArray != NULL) { + OvsFreeMemory(nicArray); + } + OVS_LOG_TRACE("Exit: status: %x", status); + return status; +} + +VOID +OvsClearAllSwitchVports(POVS_SWITCH_CONTEXT switchContext) +{ + UINT32 i; + + for (i = 0; i < OVS_MAX_VPORT_ARRAY_SIZE; i++) { + if (!OVS_IS_VPORT_ENTRY_NULL(switchContext, i)) { + OvsRemoveAndDeleteVport(switchContext, + (POVS_VPORT_ENTRY)switchContext->vportArray[i]); + } + } + if (switchContext->externalVport) { + OvsRemoveAndDeleteVport(switchContext, + (POVS_VPORT_ENTRY)switchContext->externalVport); + } +} + +NTSTATUS +OvsDumpVportIoctl(PVOID inputBuffer, + UINT32 inputLength, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + UINT32 numVports, count; + UINT32 dpNo, i; + UINT32 *outPtr; + POVS_VPORT_ENTRY vport; + LOCK_STATE_EX lockState; + + if (inputLength < sizeof (UINT32)) { + return STATUS_INVALID_PARAMETER; + } + dpNo = *(UINT32 *)inputBuffer; + + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != dpNo) { + NdisReleaseSpinLock(gOvsCtrlLock); + return STATUS_INVALID_PARAMETER; + } + /* + * We should hold SwitchContext RW lock + */ + + NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, + NDIS_RWL_AT_DISPATCH_LEVEL); + numVports = outputLength/sizeof (UINT32); + numVports = MIN(gOvsSwitchContext->numVports, numVports); + outPtr = (UINT32 *)outputBuffer; + for (i = 0, count = 0; + i < OVS_MAX_VPORT_ARRAY_SIZE && count < numVports; i++) { + vport = (POVS_VPORT_ENTRY)gOvsSwitchContext->vportArray[i]; + if (OVS_IS_VPORT_ENTRY_NULL(gOvsSwitchContext, i)) { + continue; + } + if (vport->ovsState == OVS_STATE_CONNECTED || + vport->ovsState == OVS_STATE_NIC_CREATED) { + *outPtr = vport->portNo; + outPtr++; + count++; + } + } + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + NdisReleaseSpinLock(gOvsCtrlLock); + *replyLen = count * sizeof (UINT32); + return STATUS_SUCCESS; +} + + +NTSTATUS +OvsGetVportIoctl(PVOID inputBuffer, + UINT32 inputLength, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + UINT32 dpNo; + POVS_VPORT_GET get; + POVS_VPORT_INFO info; + POVS_VPORT_ENTRY vport; + size_t len; + LOCK_STATE_EX lockState; + + if (inputLength < sizeof (OVS_VPORT_GET) || + outputLength < sizeof (OVS_VPORT_INFO)) { + return STATUS_INVALID_PARAMETER; + } + get = (POVS_VPORT_GET)inputBuffer; + dpNo = get->dpNo; + info = (POVS_VPORT_INFO)outputBuffer; + RtlZeroMemory(info, sizeof (POVS_VPORT_INFO)); + + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != dpNo) { + NdisReleaseSpinLock(gOvsCtrlLock); + return STATUS_INVALID_PARAMETER; + } + + NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, + NDIS_RWL_AT_DISPATCH_LEVEL); + if (get->portNo == 0) { + StringCbLengthA(get->name, OVS_MAX_PORT_NAME_LENGTH - 1, &len); + vport = OvsFindVportByOvsName(gOvsSwitchContext, get->name, (UINT32)len); + } else { + vport = OvsFindVportByPortNo(gOvsSwitchContext, get->portNo); + } + if (vport == NULL || (vport->ovsState != OVS_STATE_CONNECTED && + vport->ovsState != OVS_STATE_NIC_CREATED)) { + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + NdisReleaseSpinLock(gOvsCtrlLock); + /* + * XXX Change to NO DEVICE + */ + return STATUS_DEVICE_DOES_NOT_EXIST; + } + info->dpNo = dpNo; + info->portNo = vport->portNo; + info->type = vport->ovsType; + RtlCopyMemory(info->macAddress, vport->permMacAddress, + sizeof (vport->permMacAddress)); + RtlCopyMemory(info->name, vport->ovsName, vport->ovsNameLen + 1); + + info->rxPackets = vport->stats.rxPackets; + info->rxBytes = vport->stats.rxBytes; + info->txPackets = vport->stats.txPackets; + info->txBytes = vport->stats.txBytes; + info->rxErrors = vport->errStats.rxErrors; + info->txErrors = vport->errStats.txErrors; + info->rxDropped = vport->errStats.rxDropped; + info->txDropped = vport->errStats.txDropped; + + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + NdisReleaseSpinLock(gOvsCtrlLock); + *replyLen = sizeof (OVS_VPORT_INFO); + return STATUS_SUCCESS; +} + + +NTSTATUS +OvsInitTunnelVport(POVS_VPORT_ENTRY vport, + POVS_VPORT_ADD_REQUEST addReq) +{ + size_t len; + NTSTATUS status = STATUS_SUCCESS; + + vport->isValidationPort = FALSE; + vport->ovsType = addReq->type; + vport->ovsState = OVS_STATE_PORT_CREATED; + RtlCopyMemory(vport->ovsName, addReq->name, OVS_MAX_PORT_NAME_LENGTH); + vport->ovsName[OVS_MAX_PORT_NAME_LENGTH - 1] = 0; + StringCbLengthA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, &len); + vport->ovsNameLen = (UINT32)len; + switch (addReq->type) { + case OVSWIN_VPORT_TYPE_GRE: + break; + case OVSWIN_VPORT_TYPE_GRE64: + break; + case OVSWIN_VPORT_TYPE_VXLAN: + status = OvsInitVxlanTunnel(vport, addReq); + break; + default: + ASSERT(0); + } + return status; +} + +NTSTATUS +OvsAddVportIoctl(PVOID inputBuffer, + UINT32 inputLength, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + NTSTATUS status = STATUS_SUCCESS; + POVS_VPORT_INFO vportInfo; + POVS_VPORT_ADD_REQUEST addReq; + POVS_VPORT_ENTRY vport; + LOCK_STATE_EX lockState; + UINT32 index; + UINT32 portNo; + + OVS_LOG_TRACE("Enter: inputLength: %u, outputLength: %u", + inputLength, outputLength); + if (inputLength < sizeof (OVS_VPORT_ADD_REQUEST) || + outputLength < sizeof (OVS_VPORT_INFO)) { + status = STATUS_INVALID_PARAMETER; + goto vport_add_done; + } + addReq = (POVS_VPORT_ADD_REQUEST)inputBuffer; + addReq->name[OVS_MAX_PORT_NAME_LENGTH - 1] = 0; + + switch (addReq->type) { + case OVSWIN_VPORT_TYPE_GRE: + index = OVS_GRE_VPORT_INDEX; + break; + case OVSWIN_VPORT_TYPE_GRE64: + index = OVS_GRE64_VPORT_INDEX; + break; + case OVSWIN_VPORT_TYPE_VXLAN: + index = OVS_VXLAN_VPORT_INDEX; + break; + default: + status = STATUS_NOT_SUPPORTED; + goto vport_add_done; + } + + vport = (POVS_VPORT_ENTRY)OvsAllocateVport(); + if (vport == NULL) { + status = STATUS_INSUFFICIENT_RESOURCES; + goto vport_add_done; + } + + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != addReq->dpNo) { + NdisReleaseSpinLock(gOvsCtrlLock); + status = STATUS_INVALID_PARAMETER; + OvsFreeMemory(vport); + goto vport_add_done; + } + NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, + NDIS_RWL_AT_DISPATCH_LEVEL); + if (!OVS_IS_VPORT_ENTRY_NULL(gOvsSwitchContext, index)) { + status = STATUS_DEVICE_BUSY; + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + NdisReleaseSpinLock(gOvsCtrlLock); + OvsFreeMemory(vport); + goto vport_add_done; + } + + status = OvsInitTunnelVport(vport, addReq); + if (status != STATUS_SUCCESS) { + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + NdisReleaseSpinLock(gOvsCtrlLock); + OvsFreeMemory(vport); + goto vport_add_done; + } + + status = OvsInitVportCommon(gOvsSwitchContext, vport); + ASSERT(status == NDIS_STATUS_SUCCESS); + + vport->ovsState = OVS_STATE_CONNECTED; + vport->nicState = NdisSwitchNicStateConnected; + + vportInfo = (POVS_VPORT_INFO)outputBuffer; + + RtlZeroMemory(vportInfo, sizeof (POVS_VPORT_INFO)); + vportInfo->dpNo = gOvsSwitchContext->dpNo; + vportInfo->portNo = vport->portNo; + vportInfo->type = vport->ovsType; + RtlCopyMemory(vportInfo->name, vport->ovsName, vport->ovsNameLen + 1); + portNo = vport->portNo; + + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + NdisReleaseSpinLock(gOvsCtrlLock); + OvsPostEvent(portNo, OVS_EVENT_CONNECT | OVS_EVENT_LINK_UP); + *replyLen = sizeof (OVS_VPORT_INFO); + status = STATUS_SUCCESS; +vport_add_done: + OVS_LOG_TRACE("Exit: byteReturned: %u, status: %x", + *replyLen, status); + return status; +} + +NTSTATUS +OvsDelVportIoctl(PVOID inputBuffer, + UINT32 inputLength, + UINT32 *replyLen) +{ + NTSTATUS status = STATUS_SUCCESS; + POVS_VPORT_DELETE_REQUEST delReq; + LOCK_STATE_EX lockState; + POVS_VPORT_ENTRY vport; + size_t len; + UINT32 portNo = 0; + + OVS_LOG_TRACE("Enter: inputLength: %u", inputLength); + + if (inputLength < sizeof (OVS_VPORT_DELETE_REQUEST)) { + status = STATUS_INVALID_PARAMETER; + goto vport_del_done; + } + delReq = (POVS_VPORT_DELETE_REQUEST)inputBuffer; + + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != delReq->dpNo) { + NdisReleaseSpinLock(gOvsCtrlLock); + status = STATUS_INVALID_PARAMETER; + goto vport_del_done; + } + NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, + NDIS_RWL_AT_DISPATCH_LEVEL); + if (delReq->portNo == 0) { + StringCbLengthA(delReq->name, OVS_MAX_PORT_NAME_LENGTH - 1, &len); + vport = OvsFindVportByOvsName(gOvsSwitchContext, delReq->name, + (UINT32)len); + } else { + vport = OvsFindVportByPortNo(gOvsSwitchContext, delReq->portNo); + } + if (vport) { + OVS_LOG_INFO("delete vport: %s, portNo: %x", vport->ovsName, + vport->portNo); + portNo = vport->portNo; + OvsRemoveAndDeleteVport(gOvsSwitchContext, vport); + } else { + status = STATUS_DEVICE_DOES_NOT_EXIST; + } + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + NdisReleaseSpinLock(gOvsCtrlLock); + if (vport) { + OvsPostEvent(portNo, OVS_EVENT_DISCONNECT | OVS_EVENT_LINK_DOWN); + } +vport_del_done: + OVS_LOG_TRACE("Exit: byteReturned: %u, status: %x", + *replyLen, status); + return status; +} + +NTSTATUS +OvsConvertIfCountedStrToAnsiStr(PIF_COUNTED_STRING wStr, + CHAR *str, + UINT16 maxStrLen) +{ + ANSI_STRING astr; + UNICODE_STRING ustr; + NTSTATUS status; + UINT32 size; + + ustr.Buffer = wStr->String; + ustr.Length = wStr->Length; + ustr.MaximumLength = IF_MAX_STRING_SIZE; + + astr.Buffer = str; + astr.MaximumLength = maxStrLen; + astr.Length = 0; + + size = RtlUnicodeStringToAnsiSize(&ustr); + if (size > maxStrLen) { + return STATUS_BUFFER_OVERFLOW; + } + + status = RtlUnicodeStringToAnsiString(&astr, &ustr, FALSE); + + ASSERT(status == STATUS_SUCCESS); + if (status != STATUS_SUCCESS) { + return status; + } + ASSERT(astr.Length <= maxStrLen); + str[astr.Length] = 0; + return STATUS_SUCCESS; +} + + +NTSTATUS +OvsGetExtInfoIoctl(PVOID inputBuffer, + UINT32 inputLength, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + POVS_VPORT_GET get; + POVS_VPORT_EXT_INFO info; + POVS_VPORT_ENTRY vport; + size_t len; + LOCK_STATE_EX lockState; + NTSTATUS status = STATUS_SUCCESS; + NDIS_SWITCH_NIC_NAME nicName; + NDIS_VM_NAME vmName; + BOOLEAN doConvert = FALSE; + + OVS_LOG_TRACE("Enter: inputLength: %u, outputLength: %u", + inputLength, outputLength); + + if (inputLength < sizeof (OVS_VPORT_GET) || + outputLength < sizeof (OVS_VPORT_EXT_INFO)) { + status = STATUS_INVALID_PARAMETER; + goto ext_info_done; + } + get = (POVS_VPORT_GET)inputBuffer; + info = (POVS_VPORT_EXT_INFO)outputBuffer; + RtlZeroMemory(info, sizeof (POVS_VPORT_EXT_INFO)); + + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != get->dpNo) { + NdisReleaseSpinLock(gOvsCtrlLock); + status = STATUS_INVALID_PARAMETER; + goto ext_info_done; + } + NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, + NDIS_RWL_AT_DISPATCH_LEVEL); + if (get->portNo == 0) { + StringCbLengthA(get->name, OVS_MAX_PORT_NAME_LENGTH - 1, &len); + vport = OvsFindVportByOvsName(gOvsSwitchContext, get->name, + (UINT32)len); + } else { + vport = OvsFindVportByPortNo(gOvsSwitchContext, get->portNo); + } + if (vport == NULL || (vport->ovsState != OVS_STATE_CONNECTED && + vport->ovsState != OVS_STATE_NIC_CREATED)) { + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + NdisReleaseSpinLock(gOvsCtrlLock); + if (get->portNo) { + OVS_LOG_WARN("vport %u does not exist any more", get->portNo); + } else { + OVS_LOG_WARN("vport %s does not exist any more", get->name); + } + status = STATUS_DEVICE_DOES_NOT_EXIST; + goto ext_info_done; + } + info->dpNo = get->dpNo; + info->portNo = vport->portNo; + RtlCopyMemory(info->macAddress, vport->currMacAddress, + sizeof (vport->currMacAddress)); + RtlCopyMemory(info->permMACAddress, vport->permMacAddress, + sizeof (vport->permMacAddress)); + if (vport->ovsType == OVSWIN_VPORT_TYPE_SYNTHETIC || + vport->ovsType == OVSWIN_VPORT_TYPE_EMULATED) { + RtlCopyMemory(info->vmMACAddress, vport->vmMacAddress, + sizeof (vport->vmMacAddress)); + } + info->nicIndex = vport->nicIndex; + info->portId = vport->portId; + info->type = vport->ovsType; + info->mtu = vport->mtu; + /* + * TO be revisit XXX + */ + if (vport->ovsState == OVS_STATE_NIC_CREATED) { + info->status = OVS_EVENT_CONNECT | OVS_EVENT_LINK_DOWN; + } else if (vport->ovsState == OVS_STATE_CONNECTED) { + info->status = OVS_EVENT_CONNECT | OVS_EVENT_LINK_UP; + } else { + info->status = OVS_EVENT_DISCONNECT; + } + if ((info->type == OVSWIN_VPORT_TYPE_SYNTHETIC || + info->type == OVSWIN_VPORT_TYPE_EMULATED) && + (vport->ovsState == OVS_STATE_NIC_CREATED || + vport->ovsState == OVS_STATE_CONNECTED)) { + RtlCopyMemory(&vmName, &vport->vmName, sizeof (NDIS_VM_NAME)); + RtlCopyMemory(&nicName, &vport->nicName, sizeof + (NDIS_SWITCH_NIC_NAME)); + doConvert = TRUE; + } else { + info->vmUUID[0] = 0; + info->vifUUID[0] = 0; + } + + RtlCopyMemory(info->name, vport->ovsName, vport->ovsNameLen + 1); + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + NdisReleaseSpinLock(gOvsCtrlLock); + if (doConvert) { + status = OvsConvertIfCountedStrToAnsiStr(&vmName, + info->vmUUID, + OVS_MAX_VM_UUID_LEN); + if (status != STATUS_SUCCESS) { + OVS_LOG_INFO("Fail to convert VM name."); + info->vmUUID[0] = 0; + } + + status = OvsConvertIfCountedStrToAnsiStr(&nicName, + info->vifUUID, + OVS_MAX_VIF_UUID_LEN); + if (status != STATUS_SUCCESS) { + OVS_LOG_INFO("Fail to convert nic name"); + info->vifUUID[0] = 0; + } + /* + * for now ignore status + */ + status = STATUS_SUCCESS; + } + *replyLen = sizeof (OVS_VPORT_EXT_INFO); + +ext_info_done: + OVS_LOG_TRACE("Exit: byteReturned: %u, status: %x", + *replyLen, status); + return status; +} + + +static __inline VOID +OvsWaitActivate(POVS_SWITCH_CONTEXT switchContext, ULONG sleepMicroSec) +{ + while ((!switchContext->isActivated) && + (!switchContext->isActivateFailed)) { + /* Wait for the switch to be active and + * the list of ports in OVS to be initialized. */ + NdisMSleep(sleepMicroSec); + } +} diff --git a/datapath-windows/ovsext/OvsVport.h b/datapath-windows/ovsext/OvsVport.h new file mode 100644 index 000000000..8fe23f152 --- /dev/null +++ b/datapath-windows/ovsext/OvsVport.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_VPORT_H_ +#define __OVS_VPORT_H_ 1 + +#include "OvsSwitch.h" + +/* + * A Vport, or Virtual Port, is a port on the OVS. It can be one of the + * following types. Some of the Vports are "real" ports on the hyper-v switch, + * and some are not: + * - VIF port (VM's NIC) + * - External Adapters (physical NIC) + * - Internal Adapter (Virtual adapter exposed on the host). + * - Tunnel ports created by OVS userspace. + */ + +typedef enum { + OVS_STATE_UNKNOWN, + OVS_STATE_PORT_CREATED, + OVS_STATE_NIC_CREATED, + OVS_STATE_CONNECTED, + OVS_STATE_PORT_TEAR_DOWN, + OVS_STATE_PORT_DELETED, +} OVS_VPORT_STATE; + +typedef struct _OVS_VPORT_STATS { + UINT64 rxBytes; + UINT64 rxPackets; + UINT64 txBytes; + UINT64 txPackets; +} OVS_VPORT_STATS; + +typedef struct _OVS_VPORT_ERR_STATS { + UINT64 rxErrors; + UINT64 txErrors; + UINT64 rxDropped; + UINT64 txDropped; +} OVS_VPORT_ERR_STATS; +/* + * Each internal, external adapter or vritual adapter has + * one vport entry. In addition, we have one vport for each + * tunnel type, such as vxlan, gre, gre64 + */ +typedef struct _OVS_VPORT_ENTRY { + LIST_ENTRY nameLink; + LIST_ENTRY portLink; + + OVS_VPORT_STATE ovsState; + OVS_VPORT_TYPE ovsType; + OVS_VPORT_STATS stats; + OVS_VPORT_ERR_STATS errStats; + UINT32 portNo; + UINT32 mtu; + CHAR ovsName[OVS_MAX_PORT_NAME_LENGTH]; + UINT32 ovsNameLen; + + PVOID priv; + NDIS_SWITCH_PORT_ID portId; + NDIS_SWITCH_NIC_INDEX nicIndex; + UINT16 numaNodeId; + NDIS_SWITCH_PORT_STATE portState; + NDIS_SWITCH_NIC_STATE nicState; + NDIS_SWITCH_PORT_TYPE portType; + BOOLEAN isValidationPort; + + UINT8 permMacAddress[MAC_ADDRESS_LEN]; + UINT8 currMacAddress[MAC_ADDRESS_LEN]; + UINT8 vmMacAddress[MAC_ADDRESS_LEN]; + + NDIS_SWITCH_PORT_NAME portName; + NDIS_SWITCH_NIC_NAME nicName; + NDIS_VM_NAME vmName; + GUID netCfgInstanceId; +} OVS_VPORT_ENTRY, *POVS_VPORT_ENTRY; + +struct _OVS_SWITCH_CONTEXT; + +#define OVS_IS_VPORT_ENTRY_NULL(_SwitchContext, _i) \ + ((UINT64)(_SwitchContext)->vportArray[_i] <= 0xff) + +POVS_VPORT_ENTRY +OvsFindVportByPortNo(struct _OVS_SWITCH_CONTEXT *switchContext, + UINT32 portNo); +POVS_VPORT_ENTRY +OvsFindVportByOvsName(struct _OVS_SWITCH_CONTEXT *switchContext, + CHAR *name, UINT32 length); +POVS_VPORT_ENTRY +OvsFindVportByPortIdAndNicIndex(struct _OVS_SWITCH_CONTEXT *switchContext, + NDIS_SWITCH_PORT_ID portId, + NDIS_SWITCH_NIC_INDEX index); + +NDIS_STATUS OvsAddConfiguredSwitchPorts(struct _OVS_SWITCH_CONTEXT *switchContext); +NDIS_STATUS OvsInitConfiguredSwitchNics(struct _OVS_SWITCH_CONTEXT *switchContext); + +VOID OvsClearAllSwitchVports(struct _OVS_SWITCH_CONTEXT *switchContext); + +NTSTATUS OvsDumpVportIoctl(PVOID inputBuffer, UINT32 inputLength, + PVOID outputBuffer, UINT32 outputLength, + UINT32 *replyLen); +NTSTATUS OvsGetVportIoctl(PVOID inputBuffer, UINT32 inputLength, + PVOID outputBuffer, UINT32 outputLength, + UINT32 *replyLen); +NTSTATUS OvsAddVportIoctl(PVOID inputBuffer, UINT32 inputLength, + PVOID outputBuffer, UINT32 outputLength, + UINT32 *replyLen); +NTSTATUS OvsDelVportIoctl(PVOID inputBuffer, UINT32 inputLength, + UINT32 *replyLen); +NTSTATUS OvsGetExtInfoIoctl(PVOID inputBuffer, UINT32 inputLength, + PVOID outputBuffer, UINT32 outputLength, + UINT32 *replyLen); +NDIS_STATUS OvsCreateNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam); +NDIS_STATUS OvsCreatePort(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_PORT_PARAMETERS portParam); +VOID OvsTeardownPort(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_PORT_PARAMETERS portParam); +VOID OvsDeletePort(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_PORT_PARAMETERS portParam); +VOID OvsConnectNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam); +VOID OvsUpdateNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam); +VOID OvsDeleteNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam); +VOID OvsDisconnectNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam); + +static __inline BOOLEAN +OvsIsTunnelVportType(OVS_VPORT_TYPE ovsType) +{ + return ovsType == OVSWIN_VPORT_TYPE_VXLAN || + ovsType == OVSWIN_VPORT_TYPE_GRE || + ovsType == OVSWIN_VPORT_TYPE_GRE64; +} + +static __inline BOOLEAN +OvsIsInternalVportType(OVS_VPORT_TYPE ovsType) +{ + return ovsType == OVSWIN_VPORT_TYPE_INTERNAL; +} + +static __inline BOOLEAN +OvsIsTunnelVportNo(UINT32 portNo) +{ + UINT32 idx = OVS_VPORT_INDEX(portNo); + return (idx >= OVS_TUNNEL_INDEX_START && idx <= OVS_TUNNEL_INDEX_END); +} + +static __inline POVS_VPORT_ENTRY +OvsGetTunnelVport(OVS_VPORT_TYPE type) +{ + ASSERT(OvsIsTunnelVportType(type)); + switch(type) { + case OVSWIN_VPORT_TYPE_VXLAN: + return (POVS_VPORT_ENTRY) OvsGetVportFromIndex(OVS_VXLAN_VPORT_INDEX); + default: + ASSERT(! "OvsGetTunnelVport not implemented for this tunnel."); + } + + return NULL; +} + +static __inline PVOID +OvsGetVportPriv(OVS_VPORT_TYPE type) +{ + return OvsGetTunnelVport(type)->priv; +} + +static __inline UINT32 +OvsGetExternalMtu() +{ + return ((POVS_VPORT_ENTRY) OvsGetExternalVport())->mtu; +} + +#endif /* __OVS_VPORT_H_ */ diff --git a/datapath-windows/ovsext/OvsVxlan.c b/datapath-windows/ovsext/OvsVxlan.c new file mode 100644 index 000000000..63909aeed --- /dev/null +++ b/datapath-windows/ovsext/OvsVxlan.c @@ -0,0 +1,507 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#include "OvsNetProto.h" +#include "OvsIoctl.h" +#include "OvsSwitch.h" +#include "OvsVport.h" +#include "OvsFlow.h" +#include "OvsVxlan.h" +#include "OvsIpHelper.h" +#include "OvsChecksum.h" +#include "OvsUser.h" +#include "OvsPacketIO.h" +#include "OvsFlow.h" +#include "OvsPacketParser.h" +#include "OvsChecksum.h" + +#pragma warning( push ) +#pragma warning( disable:4127 ) + + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_VXLAN +#include "OvsDebug.h" + +/* Helper macro to check if a VXLAN ID is valid. */ +#define VXLAN_ID_IS_VALID(vxlanID) (0 < (vxlanID) && (vxlanID) <= 0xffffff) +#define VXLAN_TUNNELID_TO_VNI(_tID) (UINT32)(((UINT64)(_tID)) >> 40) +#define VXLAN_VNI_TO_TUNNELID(_vni) (((UINT64)(_vni)) << 40) +#define IP_DF_NBO 0x0040 +#define VXLAN_DEFAULT_TTL 64 +#define VXLAN_MULTICAST_TTL 64 +#define VXLAN_DEFAULT_INSTANCE_ID 1 + +/* Move to a header file */ +extern POVS_SWITCH_CONTEXT gOvsSwitchContext; + +NTSTATUS +OvsInitVxlanTunnel(POVS_VPORT_ENTRY vport, + POVS_VPORT_ADD_REQUEST addReq) +{ + POVS_VXLAN_VPORT vxlanPort; + NTSTATUS status = STATUS_SUCCESS; + + ASSERT(addReq->type == OVSWIN_VPORT_TYPE_VXLAN); + + vxlanPort = OvsAllocateMemory(sizeof (*vxlanPort)); + if (vxlanPort == NULL) { + status = STATUS_INSUFFICIENT_RESOURCES; + } else { + RtlZeroMemory(vxlanPort, sizeof (*vxlanPort)); + vxlanPort->dstPort = addReq->dstPort; + /* + * since we are installing the WFP filter before the port is created + * We need to check if it is the same number + * XXX should be removed later + */ + ASSERT(vxlanPort->dstPort == VXLAN_UDP_PORT); + vport->priv = (PVOID)vxlanPort; + } + return status; +} + + +VOID +OvsCleanupVxlanTunnel(POVS_VPORT_ENTRY vport) +{ + if (vport->ovsType != OVSWIN_VPORT_TYPE_VXLAN || + vport->priv == NULL) { + return; + } + + OvsFreeMemory(vport->priv); + vport->priv = NULL; +} + + +/* + *---------------------------------------------------------------------------- + * OvsDoEncapVxlan + * Encapsulates the packet. + *---------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + POVS_FWD_INFO fwdInfo, + POVS_PACKET_HDR_INFO layers, + POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST *newNbl) +{ + NDIS_STATUS status; + PNET_BUFFER curNb; + PMDL curMdl; + PUINT8 bufferStart; + EthHdr *ethHdr; + IPHdr *ipHdr; + UDPHdr *udpHdr; + VXLANHdr *vxlanHdr; + UINT32 headRoom = OvsGetVxlanTunHdrSize(); + UINT32 packetLength; + + /* + * XXX: the assumption currently is that the NBL is owned by OVS, and + * headroom has already been allocated as part of allocating the NBL and + * MDL. + */ + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + packetLength = NET_BUFFER_DATA_LENGTH(curNb); + if (layers->isTcp) { + NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo; + + tsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl, + TcpLargeSendNetBufferListInfo); + OVS_LOG_TRACE("MSS %u packet len %u", tsoInfo.LsoV1Transmit.MSS, packetLength); + if (tsoInfo.LsoV1Transmit.MSS) { + OVS_LOG_TRACE("l4Offset %d", layers->l4Offset); + *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers, + tsoInfo.LsoV1Transmit.MSS, headRoom); + if (*newNbl == NULL) { + OVS_LOG_ERROR("Unable to segment NBL"); + return NDIS_STATUS_FAILURE; + } + } + } + /* If we didn't split the packet above, make a copy now */ + if (*newNbl == NULL) { + *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom, + FALSE /*NBL info*/); + if (*newNbl == NULL) { + OVS_LOG_ERROR("Unable to copy NBL"); + return NDIS_STATUS_FAILURE; + } + } + + curNbl = *newNbl; + for (curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); curNb != NULL; + curNb = curNb->Next) { + status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL); + if (status != NDIS_STATUS_SUCCESS) { + goto ret_error; + } + + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); + if (!bufferStart) { + status = NDIS_STATUS_RESOURCES; + goto ret_error; + } + + bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + if (NET_BUFFER_NEXT_NB(curNb)) { + OVS_LOG_TRACE("nb length %u next %u", NET_BUFFER_DATA_LENGTH(curNb), + NET_BUFFER_DATA_LENGTH(curNb->Next)); + } + + /* L2 header */ + ethHdr = (EthHdr *)bufferStart; + NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr, + sizeof ethHdr->Destination + sizeof ethHdr->Source); + ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) == + (PCHAR)&fwdInfo->srcMacAddr); + ethHdr->Type = htons(ETH_TYPE_IPV4); + + // XXX: question: there are fields in the OvsIPv4TunnelKey for ttl and such, + // should we use those values instead? or will they end up being + // uninitialized; + /* IP header */ + ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); + + ipHdr->ihl = sizeof *ipHdr / 4; + ipHdr->version = IPV4; + ipHdr->tos = 0; + ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr); + ipHdr->id = 0; + ipHdr->frag_off = IP_DF_NBO; + ipHdr->ttl = tunKey->ttl ? tunKey->ttl : VXLAN_DEFAULT_TTL; + ipHdr->protocol = IPPROTO_UDP; + ASSERT(tunKey->dst == fwdInfo->dstIpAddr); + ASSERT(tunKey->src == fwdInfo->srcIpAddr || tunKey->src == 0); + ipHdr->saddr = fwdInfo->srcIpAddr; + ipHdr->daddr = fwdInfo->dstIpAddr; + ipHdr->check = 0; + ipHdr->check = IPChecksum((UINT8 *)ipHdr, sizeof *ipHdr, 0); + + /* UDP header */ + udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr); + udpHdr->source = htons(tunKey->flow_hash | 32768); + udpHdr->dest = VXLAN_UDP_PORT_NBO; + udpHdr->len = htons(NET_BUFFER_DATA_LENGTH(curNb) - headRoom + + sizeof *udpHdr + sizeof *vxlanHdr); + udpHdr->check = 0; + + /* VXLAN header */ + vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof *udpHdr); + vxlanHdr->flags1 = 0; + vxlanHdr->locallyReplicate = 0; + vxlanHdr->flags2 = 0; + vxlanHdr->reserved1 = 0; + if (tunKey->flags | OVS_TNL_F_KEY) { + vxlanHdr->vxlanID = VXLAN_TUNNELID_TO_VNI(tunKey->tunnelId); + vxlanHdr->instanceID = 1; + } + vxlanHdr->reserved2 = 0; + } + return STATUS_SUCCESS; + +ret_error: + OvsCompleteNBL(switchContext, *newNbl, TRUE); + *newNbl = NULL; + return status; +} + + +/* + *---------------------------------------------------------------------------- + * OvsEncapVxlan -- + * Encapsulates the packet if L2/L3 for destination resolves. Otherwise, + * enqueues a callback that does encapsulatation after resolution. + *---------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsEncapVxlan(PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + POVS_SWITCH_CONTEXT switchContext, + VOID *completionList, + POVS_PACKET_HDR_INFO layers, + PNET_BUFFER_LIST *newNbl) +{ + NTSTATUS status; + OVS_FWD_INFO fwdInfo; + UNREFERENCED_PARAMETER(completionList); + + status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo); + if (status != STATUS_SUCCESS) { + OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL); + // return NDIS_STATUS_PENDING; + /* + * XXX: Don't know if the completionList will make any sense when + * accessed in the callback. Make sure the caveats are known. + * + * XXX: This code will work once we are able to grab locks in the + * callback. + */ + return NDIS_STATUS_FAILURE; + } + + return OvsDoEncapVxlan(curNbl, tunKey, &fwdInfo, layers, + switchContext, newNbl); +} + + +/* + *---------------------------------------------------------------------------- + * OvsIpHlprCbVxlan -- + * Callback function for IP helper. + * XXX: not used currently + *---------------------------------------------------------------------------- + */ +static VOID +OvsIpHlprCbVxlan(PNET_BUFFER_LIST curNbl, + UINT32 inPort, + OvsIPv4TunnelKey *tunKey, + PVOID cbData1, + PVOID cbData2, + NTSTATUS result, + POVS_FWD_INFO fwdInfo) +{ + OVS_PACKET_HDR_INFO layers; + OvsFlowKey key; + NDIS_STATUS status; + UNREFERENCED_PARAMETER(inPort); + + status = OvsExtractFlow(curNbl, inPort, &key, &layers, NULL); + if (result == STATUS_SUCCESS) { + status = OvsDoEncapVxlan(curNbl, tunKey, fwdInfo, &layers, + (POVS_SWITCH_CONTEXT)cbData1, NULL); + } else { + status = NDIS_STATUS_FAILURE; + } + + if (status != NDIS_STATUS_SUCCESS) { + // XXX: Free up the NBL; + return; + } + + OvsLookupFlowOutput((POVS_SWITCH_CONTEXT)cbData1, cbData2, curNbl); +} + +/* + *---------------------------------------------------------------------------- + * OvsCalculateUDPChecksum + * Calculate UDP checksum + *---------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsCalculateUDPChecksum(PNET_BUFFER_LIST curNbl, + PNET_BUFFER curNb, + IPHdr *ipHdr, + UDPHdr *udpHdr, + UINT32 packetLength) +{ + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; + UINT16 checkSum; + + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo); + + /* Next check if UDP checksum has been calculated. */ + if (!csumInfo.Receive.UdpChecksumSucceeded) { + UINT32 l4Payload; + + checkSum = udpHdr->check; + + l4Payload = packetLength - sizeof(EthHdr) - ipHdr->ihl * 4; + udpHdr->check = 0; + udpHdr->check = + IPPseudoChecksum((UINT32 *)&ipHdr->saddr, + (UINT32 *)&ipHdr->daddr, + IPPROTO_UDP, (UINT16)l4Payload); + udpHdr->check = CalculateChecksumNB(curNb, (UINT16)l4Payload, + sizeof(EthHdr) + ipHdr->ihl * 4); + if (checkSum != udpHdr->check) { + OVS_LOG_TRACE("UDP checksum incorrect."); + return NDIS_STATUS_INVALID_PACKET; + } + } + + csumInfo.Receive.UdpChecksumSucceeded = 1; + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value; + return NDIS_STATUS_SUCCESS; +} + +/* + *---------------------------------------------------------------------------- + * OvsDoDecapVxlan + * Decapsulates to tunnel header in 'curNbl' and puts into 'tunKey'. + *---------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl) +{ + PNET_BUFFER curNb; + PMDL curMdl; + EthHdr *ethHdr; + IPHdr *ipHdr; + UDPHdr *udpHdr; + VXLANHdr *vxlanHdr; + UINT32 tunnelSize = 0, packetLength = 0; + PUINT8 bufferStart; + NDIS_STATUS status; + + /* Check the the length of the UDP payload */ + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + packetLength = NET_BUFFER_DATA_LENGTH(curNb); + tunnelSize = OvsGetVxlanTunHdrSize(); + if (packetLength <= tunnelSize) { + return NDIS_STATUS_INVALID_LENGTH; + } + + /* + * Create a copy of the NBL so that we have all the headers in one MDL. + */ + *newNbl = OvsPartialCopyNBL(switchContext, curNbl, + tunnelSize + OVS_DEFAULT_COPY_SIZE, 0, + TRUE /*copy NBL info */); + + if (*newNbl == NULL) { + return NDIS_STATUS_RESOURCES; + } + + /* XXX: Handle VLAN header. */ + curNbl = *newNbl; + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority) + + NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + if (!bufferStart) { + status = NDIS_STATUS_RESOURCES; + goto dropNbl; + } + + ethHdr = (EthHdr *)bufferStart; + /* XXX: Handle IP options. */ + ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); + tunKey->src = ipHdr->saddr; + tunKey->dst = ipHdr->daddr; + tunKey->tos = ipHdr->tos; + tunKey->ttl = ipHdr->ttl; + tunKey->pad = 0; + udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr); + + /* Validate if NIC has indicated checksum failure. */ + status = OvsValidateUDPChecksum(curNbl, udpHdr->check == 0); + if (status != NDIS_STATUS_SUCCESS) { + goto dropNbl; + } + + /* Calculate and verify UDP checksum if NIC didn't do it. */ + if (udpHdr->check != 0) { + status = OvsCalculateUDPChecksum(curNbl, curNb, ipHdr, udpHdr, packetLength); + if (status != NDIS_STATUS_SUCCESS) { + goto dropNbl; + } + } + + vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof *udpHdr); + if (vxlanHdr->instanceID) { + tunKey->flags = OVS_TNL_F_KEY; + tunKey->tunnelId = VXLAN_VNI_TO_TUNNELID(vxlanHdr->vxlanID); + } else { + tunKey->flags = 0; + tunKey->tunnelId = 0; + } + + /* Clear out the receive flag for the inner packet. */ + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0; + NdisAdvanceNetBufferDataStart(curNb, tunnelSize, FALSE, NULL); + return NDIS_STATUS_SUCCESS; + +dropNbl: + OvsCompleteNBL(switchContext, *newNbl, TRUE); + *newNbl = NULL; + return status; +} + + +NDIS_STATUS +OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, + OvsIPv4TunnelKey *tunnelKey) +{ + NDIS_STATUS status = NDIS_STATUS_FAILURE; + UDPHdr udpStorage; + const UDPHdr *udp; + VXLANHdr *VxlanHeader; + VXLANHdr VxlanHeaderBuffer; + struct IPHdr ip_storage; + const struct IPHdr *nh; + OVS_PACKET_HDR_INFO layers; + + layers.value = 0; + + do { + nh = OvsGetIp(packet, layers.l3Offset, &ip_storage); + if (nh) { + layers.l4Offset = layers.l3Offset + nh->ihl * 4; + } else { + break; + } + + /* make sure it's a VXLAN packet */ + udp = OvsGetUdp(packet, layers.l4Offset, &udpStorage); + if (udp) { + layers.l7Offset = layers.l4Offset + sizeof *udp; + } else { + break; + } + + /* XXX Should be tested against the dynamic port # in the VXLAN vport */ + ASSERT(udp->dest == RtlUshortByteSwap(VXLAN_UDP_PORT)); + + VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(packet, + sizeof(*VxlanHeader), + layers.l7Offset, + &VxlanHeaderBuffer); + + if (VxlanHeader) { + tunnelKey->src = nh->saddr; + tunnelKey->dst = nh->daddr; + tunnelKey->ttl = nh->ttl; + tunnelKey->tos = nh->tos; + if (VxlanHeader->instanceID) { + tunnelKey->flags = OVS_TNL_F_KEY; + tunnelKey->tunnelId = VXLAN_VNI_TO_TUNNELID(VxlanHeader->vxlanID); + } else { + tunnelKey->flags = 0; + tunnelKey->tunnelId = 0; + } + } else { + break; + } + status = NDIS_STATUS_SUCCESS; + + } while(FALSE); + + return status; +} + +#pragma warning( pop ) diff --git a/datapath-windows/ovsext/OvsVxlan.h b/datapath-windows/ovsext/OvsVxlan.h new file mode 100644 index 000000000..55cfc8203 --- /dev/null +++ b/datapath-windows/ovsext/OvsVxlan.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_VXLAN_H_ +#define __OVS_VXLAN_H_ 1 + +#include "OvsNetProto.h" +typedef struct _OVS_VXLAN_VPORT { + UINT32 dstPort; + UINT64 inPkts; + UINT64 outPkts; + UINT64 slowInPkts; + UINT64 slowOutPkts; + /* + * To be filled + */ +} OVS_VXLAN_VPORT, *POVS_VXLAN_VPORT; + +/* VXLAN header. */ +typedef struct VXLANHdr { + /* Flags. */ + UINT32 flags1:2; + /* Packet needs replication to multicast group (used for multicast proxy). */ + UINT32 locallyReplicate:1; + /* Instance ID flag, must be set to 1. */ + UINT32 instanceID:1; + /* Flags. */ + UINT32 flags2:4; + /* Reserved. */ + UINT32 reserved1:24; + /* VXLAN ID. */ + UINT32 vxlanID:24; + /* Reserved. */ + UINT32 reserved2:8; +} VXLANHdr; + +NTSTATUS OvsInitVxlanTunnel(POVS_VPORT_ENTRY vport, + POVS_VPORT_ADD_REQUEST addReq); + +VOID OvsCleanupVxlanTunnel(POVS_VPORT_ENTRY vport); + +NDIS_STATUS OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, + OvsIPv4TunnelKey *tunnelKey); + +NDIS_STATUS OvsEncapVxlan(PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + POVS_SWITCH_CONTEXT switchContext, + VOID *completionList, + POVS_PACKET_HDR_INFO layers, + PNET_BUFFER_LIST *newNbl); + +NDIS_STATUS OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl); + +static __inline UINT32 +OvsGetVxlanTunHdrSize(VOID) +{ + /* XXX: Can L2 include VLAN at all? */ + return sizeof (EthHdr) + sizeof (IPHdr) + sizeof (UDPHdr) + + sizeof (VXLANHdr); +} + +#define VXLAN_UDP_PORT 4789 +#define VXLAN_UDP_PORT_NBO 0xB512 + +#endif /* __OVS_VXLAN_H_ */ diff --git a/datapath-windows/ovsext/ovsext.inf b/datapath-windows/ovsext/ovsext.inf new file mode 100644 index 000000000..d1f68eb6f --- /dev/null +++ b/datapath-windows/ovsext/ovsext.inf @@ -0,0 +1,85 @@ +; +; Copyright (c) VMWare. All Rights Reserved. +; + +[version] +Signature = "$Windows NT$" +Class = NetService +ClassGUID = {4D36E974-E325-11CE-BFC1-08002BE10318} +Provider = %VMWare% +CatalogFile = ovsext.cat +DriverVer = 10/10/2013,1.0 + +[Manufacturer] +%VMWare%=VMWare,NTx86,NTia64,NTamd64 + +[VMWare.NTx86] +%OVSExt_Desc%=Install, OVSExt + +[VMWare.NTia64] +%OVSExt_Desc%=Install, OVSExt + +[VMWare.NTamd64] +%OVSExt_Desc%=Install, OVSExt + +;------------------------------------------------------------------------- +; Installation Section +;------------------------------------------------------------------------- +[Install] +AddReg=Inst_Ndi +Characteristics=0x40000 +NetCfgInstanceId="{583CC151-73EC-4A6A-8B47-578297AD7623}" +Copyfiles = OVSExt.copyfiles.sys + +[SourceDisksNames] +1=%OVSExt_Desc%,"",, + +[SourceDisksFiles] +OVSExt.sys=1 + +[DestinationDirs] +DefaultDestDir=12 +OVSExt.copyfiles.sys=12 + +[OVSExt.copyfiles.sys] +OVSExt.sys,,,2 + + +;------------------------------------------------------------------------- +; Ndi installation support +;------------------------------------------------------------------------- +[Inst_Ndi] +HKR, Ndi,Service,,"OVSExt" +HKR, Ndi,CoServices,0x00010000,"OVSExt" +HKR, Ndi,HelpText,,%OVSExt_HelpText% +HKR, Ndi,FilterClass,,"ms_switch_forward" +HKR, Ndi,FilterType,0x00010001,0x00000002 +HKR, Ndi\Interfaces,UpperRange,,"noupper" +HKR, Ndi\Interfaces,LowerRange,,"nolower" +HKR, Ndi\Interfaces, FilterMediaTypes,,"vmnetextension" +HKR, Ndi,FilterRunType, 0x00010001, 2 ; optional + +;------------------------------------------------------------------------- +; Service installation support, common.EventLog here is to demonstrate how to +; write an enent log +;------------------------------------------------------------------------- +[Install.Services] +AddService=OVSExt,,OVSExt_Service_Inst;, common.EventLog + +[OVSExt_Service_Inst] +DisplayName = %OVSExt_Desc% +ServiceType = 1 ;SERVICE_KERNEL_DRIVER +StartType = 1 ;SERVICE_SYSTEM_START +ErrorControl = 1 ;SERVICE_ERROR_NORMAL +ServiceBinary = %12%\OVSExt.sys +LoadOrderGroup = NDIS +Description = %OVSExt_Desc% +AddReg = Common.Params.reg + +[Install.Remove.Services] +DelService=OVSExt,0x200 + +[Strings] +VMWare = "VMWare" +OVSExt_Desc = "VMWare OVS Extension" +OVSExt_HelpText = "VMWare OVS forwarding switch extension" diff --git a/datapath-windows/ovsext/ovsext.rc b/datapath-windows/ovsext/ovsext.rc new file mode 100644 index 000000000..8f6e12143 --- /dev/null +++ b/datapath-windows/ovsext/ovsext.rc @@ -0,0 +1,23 @@ +// +// Copyright (c) Microsoft Corporation. All Rights Reserved. +// + +#include <windows.h> +#include <ntverp.h> + +/*-----------------------------------------------*/ +/* the following lines are specific to this file */ +/*-----------------------------------------------*/ + +/* VER_FILETYPE, VER_FILESUBTYPE, VER_FILEDESCRIPTION_STR + * and VER_INTERNALNAME_STR must be defined before including COMMON.VER + * The strings don't need a '\0', since common.ver has them. + */ +#define VER_FILETYPE VFT_DRV +#define VER_FILESUBTYPE VFT2_DRV_NETWORK +#define VER_FILEDESCRIPTION_STR "VMWare OVS Extension" +#define VER_INTERNALNAME_STR "OVSExt.SYS" +#define VER_ORIGINALFILENAME_STR "OVSExt.SYS" +#define VER_LANGNEUTRAL + +#include "common.ver" diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj new file mode 100644 index 000000000..9728f2003 --- /dev/null +++ b/datapath-windows/ovsext/ovsext.vcxproj @@ -0,0 +1,164 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Win8.1 Debug|x64"> + <Configuration>Win8.1 Debug</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Win8 Debug|x64"> + <Configuration>Win8 Debug</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Win8.1 Release|x64"> + <Configuration>Win8.1 Release</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Win8 Release|x64"> + <Configuration>Win8 Release</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="PropertySheets"> + <DriverType>WDM</DriverType> + <ConfigurationType>Driver</ConfigurationType> + </PropertyGroup> + <PropertyGroup Label="Globals"> + <VCTargetsPath Condition="'$(VCTargetsPath11)' != '' and '$(VisualStudioVersion)' == '11.0'">$(VCTargetsPath11)</VCTargetsPath> + <Configuration>Win8 Debug</Configuration> + <Platform Condition="'$(Platform)' == ''">Win32</Platform> + <SampleGuid>{0D37F250-E766-44C7-90B4-D7E07E77D1AA}</SampleGuid> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Label="Globals"> + <ProjectGuid>{63FE215D-98BE-4440-8081-C6160EFB80FA}</ProjectGuid> + <RootNamespace>$(MSBuildProjectName)</RootNamespace> + </PropertyGroup> + <PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Win8.1 Debug|x64'"> + <TargetVersion>WindowsV6.3</TargetVersion> + <UseDebugLibraries>True</UseDebugLibraries> + <PlatformToolset>WindowsKernelModeDriver8.1</PlatformToolset> + </PropertyGroup> + <PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Win8 Debug|x64'"> + <TargetVersion>Win8</TargetVersion> + <UseDebugLibraries>True</UseDebugLibraries> + <PlatformToolset>WindowsKernelModeDriver8.1</PlatformToolset> + </PropertyGroup> + <PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Win8.1 Release|x64'"> + <TargetVersion>WindowsV6.3</TargetVersion> + <UseDebugLibraries>False</UseDebugLibraries> + <PlatformToolset>WindowsKernelModeDriver8.1</PlatformToolset> + </PropertyGroup> + <PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Win8 Release|x64'"> + <TargetVersion>Win8</TargetVersion> + <UseDebugLibraries>False</UseDebugLibraries> + <PlatformToolset>WindowsKernelModeDriver8.1</PlatformToolset> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <PropertyGroup> + <OutDir>$(IntDir)</OutDir> + </PropertyGroup> + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Win8 Release|x64'"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" /> + </ImportGroup> + <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Win8.1 Release|x64'" Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" /> + </ImportGroup> + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Win8 Debug|x64'"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" /> + </ImportGroup> + <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Win8.1 Debug|x64'" Label="PropertySheets"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" /> + </ImportGroup> + <ItemGroup Label="WrappedTaskItems"> + <ClInclude Include="OvsAtomic.h" /> + <ClInclude Include="OvsBufferMgmt.h" /> + <ClInclude Include="OvsChecksum.h" /> + <ClInclude Include="OvsDebug.h" /> + <ClInclude Include="OvsEth.h" /> + <ClInclude Include="OvsEvent.h" /> + <ClInclude Include="OvsFlow.h" /> + <ClInclude Include="OvsIoctl.h" /> + <ClInclude Include="OvsIpHelper.h" /> + <ClInclude Include="OvsJhash.h" /> + <ClInclude Include="OvsNetProto.h" /> + <ClInclude Include="OvsOid.h" /> + <ClInclude Include="OvsPacketParser.h" /> + <ClInclude Include="OvsSwitch.h" /> + <ClInclude Include="OvsTunnel.h" /> + <ClInclude Include="OvsTunnelIntf.h" /> + <ClInclude Include="OvsTypes.h" /> + <ClInclude Include="OvsUser.h" /> + <ClInclude Include="OvsUtil.h" /> + <ClInclude Include="OvsVport.h" /> + <ClInclude Include="OvsVxlan.h" /> + <ClInclude Include="precomp.h" /> + </ItemGroup> + <PropertyGroup> + <TargetName>OVSExt</TargetName> + </PropertyGroup> + <ItemDefinitionGroup> + <ClCompile> + <PreprocessorDefinitions>%(PreprocessorDefinitions);NDIS_WDM=1;NDIS630=1;OVS_WIN_DP=1</PreprocessorDefinitions> + </ClCompile> + <Midl> + <PreprocessorDefinitions>%(PreprocessorDefinitions);NDIS_WDM=1;NDIS630=1</PreprocessorDefinitions> + </Midl> + <ResourceCompile> + <PreprocessorDefinitions>%(PreprocessorDefinitions);NDIS_WDM=1;NDIS630=1</PreprocessorDefinitions> + </ResourceCompile> + </ItemDefinitionGroup> + <ItemDefinitionGroup> + <Link> + <AdditionalDependencies>%(AdditionalDependencies);$(DDK_LIB_PATH)\ndis.lib;$(DDK_LIB_PATH)\fwpkclnt.lib;$(SDK_LIB_PATH)\uuid.lib;$(DDK_LIB_PATH)\netio.lib</AdditionalDependencies> + </Link> + <ClCompile> + <TreatWarningAsError>true</TreatWarningAsError> + <WarningLevel>Level4</WarningLevel> + <ExceptionHandling> + </ExceptionHandling> + <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Win8 Debug|x64'">$(IntDir);%(AdditionalIncludeDirectories);..\..</AdditionalIncludeDirectories> + <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Win8.1 Debug|x64'">$(IntDir);%(AdditionalIncludeDirectories);..\..</AdditionalIncludeDirectories> + <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Win8 Release|x64'">$(IntDir);%(AdditionalIncludeDirectories);..\..</AdditionalIncludeDirectories> + <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Win8.1 Release|x64'">$(IntDir);%(AdditionalIncludeDirectories);..\..</AdditionalIncludeDirectories> + </ClCompile> + </ItemDefinitionGroup> + <ItemGroup> + <ClCompile Include="OvsDriver.c" /> + <ClCompile Include="OvsJhash.c" /> + <ClCompile Include="OvsOid.c" /> + <ClCompile Include="OvsPacketIO.c" /> + <ClCompile Include="OvsPacketParser.c" /> + <ClCompile Include="OvsBufferMgmt.c" /> + <ClCompile Include="OvsChecksum.c" /> + <ClCompile Include="OvsIpHelper.c" /> + <ClCompile Include="OvsTunnel.c" /> + <ClCompile Include="OvsTunnelFilter.c" /> + <ClCompile Include="OvsVxlan.c" /> + <ClCompile Include="OvsActions.c" /> + <ClCompile Include="OvsDebug.c" /> + <ClCompile Include="OvsEvent.c" /> + <ClCompile Include="OvsFlow.c" /> + <ClCompile Include="OvsUser.c" /> + <ClCompile Include="OvsIoctl.c" /> + <ClCompile Include="OvsSwitch.c" /> + <ClCompile Include="OvsUtil.c" /> + <ClCompile Include="OvsVport.c" /> + <ClCompile Include="precompsrc.c"> + <AdditionalIncludeDirectories>;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <PreCompiledHeaderFile>precomp.h</PreCompiledHeaderFile> + <PreCompiledHeader>Create</PreCompiledHeader> + <PreCompiledHeaderOutputFile>$(IntDir)\precomp.h.pch</PreCompiledHeaderOutputFile> + </ClCompile> + <ResourceCompile Include="ovsext.rc" /> + </ItemGroup> + <ItemGroup> + <Inf Exclude="@(Inf)" Include="*.inf" /> + <FilesToPackage Include="$(TargetPath)" /> + </ItemGroup> + <ItemGroup> + <None Exclude="@(None)" Include="*.txt;*.htm;*.html" /> + <None Exclude="@(None)" Include="*.ico;*.cur;*.bmp;*.dlg;*.rct;*.gif;*.jpg;*.jpeg;*.wav;*.jpe;*.tiff;*.tif;*.png;*.rc2" /> + <None Exclude="@(None)" Include="*.def;*.bat;*.hpj;*.asmx" /> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> +</Project>
\ No newline at end of file diff --git a/datapath-windows/ovsext/ovsext.vcxproj.user b/datapath-windows/ovsext/ovsext.vcxproj.user new file mode 100644 index 000000000..7169f02fb --- /dev/null +++ b/datapath-windows/ovsext/ovsext.vcxproj.user @@ -0,0 +1,15 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Win8 Debug|x64'"> + <SignMode>TestSign</SignMode> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Win8.1 Debug|x64'"> + <SignMode>TestSign</SignMode> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Win8 Release|x64'"> + <SignMode>TestSign</SignMode> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Win8.1 Release|x64'"> + <SignMode>TestSign</SignMode> + </PropertyGroup> +</Project> diff --git a/datapath-windows/ovsext/precomp.h b/datapath-windows/ovsext/precomp.h new file mode 100644 index 000000000..45e72de6f --- /dev/null +++ b/datapath-windows/ovsext/precomp.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <ndis.h> +#include <netiodef.h> +#include <intsafe.h> +#include <ntintsafe.h> +#include <ntstrsafe.h> +#include <Strsafe.h> + +#include "OvsTypes.h" +#include "..\include\OvsPub.h" +#include "OvsUtil.h" +/* + * Include openvswitch.h from userspace. Changing the location the file from + * include/linux is pending discussion. + */ +#include "include\linux\openvswitch.h" diff --git a/datapath-windows/ovsext/precompsrc.c b/datapath-windows/ovsext/precompsrc.c new file mode 100644 index 000000000..133b6872d --- /dev/null +++ b/datapath-windows/ovsext/precompsrc.c @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" |