summaryrefslogtreecommitdiff
path: root/datapath-windows/ovsext
diff options
context:
space:
mode:
authorSaurabh Shah <ssaurabh@vmware.com>2014-07-27 17:26:58 -0700
committerBen Pfaff <blp@nicira.com>2014-07-28 10:10:43 -0700
commitc803536e1cf531e29a31edfe740666ee6045d555 (patch)
tree7fc3591d38d16877cd1370687a1aa2015a0c6b29 /datapath-windows/ovsext
parent79fe0f4611b60fe0fdf43206bc06201fd724a18d (diff)
downloadopenvswitch-c803536e1cf531e29a31edfe740666ee6045d555.tar.gz
datapath-windows: Kernel module for HyperV.
The kernel switch extension has support for bridged back forwarding & tunneling over VXLAN. There is no Netlink integration as it is still being worked out. Co-Authored-By: Ankur Sharma <ankursharma@vmware.com> Signed-off-by: Ankur Sharma <ankursharma@vmware.com> Co-Authored-By: Eitan Eliahu <eliahue@vmware.com> Signed-off-by: Eitan Eliahu <eliahue@vmware.com> Co-Authored-By: Guolin Yang <gyang@vmware.com> Signed-off-by: Guolin Yang <gyang@vmware.com> Co-Authored-By: Linda Sun <lsun@vmware.com> Signed-off-by: Linda Sun <lsun@vmware.com> Co-Authored-By: Nithin Raju <nithin@vmware.com> Signed-off-by: Nithin Raju <nithin@vmware.com> Signed-off-by: Saurabh Shah <ssaurabh@vmware.com> Signed-off-by: Ben Pfaff <blp@nicira.com>
Diffstat (limited to 'datapath-windows/ovsext')
-rw-r--r--datapath-windows/ovsext/OvsActions.c1522
-rw-r--r--datapath-windows/ovsext/OvsAtomic.h32
-rw-r--r--datapath-windows/ovsext/OvsBufferMgmt.c1535
-rw-r--r--datapath-windows/ovsext/OvsBufferMgmt.h124
-rw-r--r--datapath-windows/ovsext/OvsChecksum.c578
-rw-r--r--datapath-windows/ovsext/OvsChecksum.h39
-rw-r--r--datapath-windows/ovsext/OvsDebug.c58
-rw-r--r--datapath-windows/ovsext/OvsDebug.h90
-rw-r--r--datapath-windows/ovsext/OvsDriver.c184
-rw-r--r--datapath-windows/ovsext/OvsEth.h450
-rw-r--r--datapath-windows/ovsext/OvsEvent.c496
-rw-r--r--datapath-windows/ovsext/OvsEvent.h50
-rw-r--r--datapath-windows/ovsext/OvsFlow.c978
-rw-r--r--datapath-windows/ovsext/OvsFlow.h78
-rw-r--r--datapath-windows/ovsext/OvsIoctl.c758
-rw-r--r--datapath-windows/ovsext/OvsIoctl.h40
-rw-r--r--datapath-windows/ovsext/OvsIpHelper.c1689
-rw-r--r--datapath-windows/ovsext/OvsIpHelper.h128
-rw-r--r--datapath-windows/ovsext/OvsJhash.c129
-rw-r--r--datapath-windows/ovsext/OvsJhash.h30
-rw-r--r--datapath-windows/ovsext/OvsNetProto.h368
-rw-r--r--datapath-windows/ovsext/OvsOid.c855
-rw-r--r--datapath-windows/ovsext/OvsOid.h26
-rw-r--r--datapath-windows/ovsext/OvsPacketIO.c441
-rw-r--r--datapath-windows/ovsext/OvsPacketIO.h61
-rw-r--r--datapath-windows/ovsext/OvsPacketParser.c303
-rw-r--r--datapath-windows/ovsext/OvsPacketParser.h144
-rw-r--r--datapath-windows/ovsext/OvsSwitch.c529
-rw-r--r--datapath-windows/ovsext/OvsSwitch.h169
-rw-r--r--datapath-windows/ovsext/OvsTunnel.c346
-rw-r--r--datapath-windows/ovsext/OvsTunnel.h57
-rw-r--r--datapath-windows/ovsext/OvsTunnelFilter.c343
-rw-r--r--datapath-windows/ovsext/OvsTunnelIntf.h25
-rw-r--r--datapath-windows/ovsext/OvsTypes.h32
-rw-r--r--datapath-windows/ovsext/OvsUser.c859
-rw-r--r--datapath-windows/ovsext/OvsUser.h114
-rw-r--r--datapath-windows/ovsext/OvsUtil.c89
-rw-r--r--datapath-windows/ovsext/OvsUtil.h78
-rw-r--r--datapath-windows/ovsext/OvsVport.c1416
-rw-r--r--datapath-windows/ovsext/OvsVport.h190
-rw-r--r--datapath-windows/ovsext/OvsVxlan.c507
-rw-r--r--datapath-windows/ovsext/OvsVxlan.h81
-rw-r--r--datapath-windows/ovsext/ovsext.inf85
-rw-r--r--datapath-windows/ovsext/ovsext.rc23
-rw-r--r--datapath-windows/ovsext/ovsext.vcxproj164
-rw-r--r--datapath-windows/ovsext/ovsext.vcxproj.user15
-rw-r--r--datapath-windows/ovsext/precomp.h31
-rw-r--r--datapath-windows/ovsext/precompsrc.c17
48 files changed, 16356 insertions, 0 deletions
diff --git a/datapath-windows/ovsext/OvsActions.c b/datapath-windows/ovsext/OvsActions.c
new file mode 100644
index 000000000..79fb50f07
--- /dev/null
+++ b/datapath-windows/ovsext/OvsActions.c
@@ -0,0 +1,1522 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "precomp.h"
+
+#include "OvsIoctl.h"
+#include "OvsSwitch.h"
+#include "OvsVport.h"
+#include "OvsEvent.h"
+#include "OvsUser.h"
+#include "OvsNetProto.h"
+#include "OvsFlow.h"
+#include "OvsVxlan.h"
+#include "OvsChecksum.h"
+#include "OvsPacketIO.h"
+
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_ACTION
+#include "OvsDebug.h"
+
+typedef struct _OVS_ACTION_STATS {
+ UINT64 rxVxlan;
+ UINT64 txVxlan;
+ UINT64 flowMiss;
+ UINT64 flowUserspace;
+ UINT64 txTcp;
+ UINT32 failedFlowMiss;
+ UINT32 noVport;
+ UINT32 failedFlowExtract;
+ UINT32 noResource;
+ UINT32 noCopiedNbl;
+ UINT32 failedEncap;
+ UINT32 failedDecap;
+ UINT32 cannotGrowDest;
+ UINT32 zeroActionLen;
+ UINT32 failedChecksum;
+} OVS_ACTION_STATS, *POVS_ACTION_STATS;
+
+OVS_ACTION_STATS ovsActionStats;
+
+/*
+ * There a lot of data that needs to be maintained while executing the pipeline
+ * as dictated by the actions of a flow, across different functions at different
+ * levels. Such data is put together in a 'context' structure. Care should be
+ * exercised while adding new members to the structure - only add ones that get
+ * used across multiple stages in the pipeline/get used in multiple functions.
+ */
+#define OVS_DEST_PORTS_ARRAY_MIN_SIZE 2
+typedef struct OvsForwardingContext {
+ POVS_SWITCH_CONTEXT switchContext;
+ /* The NBL currently used in the pipeline. */
+ PNET_BUFFER_LIST curNbl;
+ /* NDIS forwarding detail for 'curNbl'. */
+ PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
+ /* Array of destination ports for 'curNbl'. */
+ PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts;
+ /* send flags while sending 'curNbl' into NDIS. */
+ ULONG sendFlags;
+ /* Total number of output ports, used + unused, in 'curNbl'. */
+ UINT32 destPortsSizeIn;
+ /* Total number of used output ports in 'curNbl'. */
+ UINT32 destPortsSizeOut;
+ /*
+ * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to
+ * be freed/completed.
+ */
+ OvsCompletionList *completionList;
+ /*
+ * vport number of 'curNbl' when it is passed from the PIF bridge to the INT
+ * bridge. ie. during tunneling on the Rx side.
+ */
+ UINT32 srcVportNo;
+
+ /*
+ * Tunnel key:
+ * - specified in actions during tunneling Tx
+ * - extracted from an NBL during tunneling Rx
+ */
+ OvsIPv4TunnelKey tunKey;
+
+ /*
+ * Tunneling - Tx:
+ * To store the output port, when it is a tunneled port. We don't foresee
+ * multiple tunneled ports as outport for any given NBL.
+ */
+ POVS_VPORT_ENTRY tunnelTxNic;
+
+ /*
+ * Tunneling - Rx:
+ * Points to the Internal port on the PIF Bridge, if the packet needs to be
+ * de-tunneled.
+ */
+ POVS_VPORT_ENTRY tunnelRxNic;
+
+ /* header information */
+ OVS_PACKET_HDR_INFO layers;
+} OvsForwardingContext;
+
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsInitForwardingCtx --
+ * Function to init/re-init the 'ovsFwdCtx' context as the actions pipeline
+ * is being executed.
+ *
+ * Result:
+ * NDIS_STATUS_SUCCESS on success
+ * Other NDIS_STATUS upon failure. Upon failure, it is safe to call
+ * OvsCompleteNBLForwardingCtx(), since 'ovsFwdCtx' has been initialized
+ * enough for OvsCompleteNBLForwardingCtx() to do its work.
+ * --------------------------------------------------------------------------
+ */
+static __inline NDIS_STATUS
+OvsInitForwardingCtx(OvsForwardingContext *ovsFwdCtx,
+ POVS_SWITCH_CONTEXT switchContext,
+ PNET_BUFFER_LIST curNbl,
+ UINT32 srcVportNo,
+ ULONG sendFlags,
+ PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail,
+ OvsCompletionList *completionList,
+ OVS_PACKET_HDR_INFO *layers,
+ BOOLEAN resetTunnelInfo)
+{
+ ASSERT(ovsFwdCtx);
+ ASSERT(switchContext);
+ ASSERT(curNbl);
+ ASSERT(fwdDetail);
+
+ /*
+ * Set values for curNbl and switchContext so upon failures, we have enough
+ * information to do cleanup.
+ */
+ ovsFwdCtx->curNbl = curNbl;
+ ovsFwdCtx->switchContext = switchContext;
+ ovsFwdCtx->completionList = completionList;
+ ovsFwdCtx->fwdDetail = fwdDetail;
+
+ if (fwdDetail->NumAvailableDestinations > 0) {
+ /*
+ * XXX: even though MSDN says GetNetBufferListDestinations() returns
+ * NDIS_STATUS, the header files say otherwise.
+ */
+ switchContext->NdisSwitchHandlers.GetNetBufferListDestinations(
+ switchContext->NdisSwitchContext, curNbl,
+ &ovsFwdCtx->destinationPorts);
+
+ ASSERT(ovsFwdCtx->destinationPorts);
+ /* Ensure that none of the elements are consumed yet. */
+ ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
+ fwdDetail->NumAvailableDestinations);
+ } else {
+ ovsFwdCtx->destinationPorts = NULL;
+ }
+ ovsFwdCtx->destPortsSizeIn = fwdDetail->NumAvailableDestinations;
+ ovsFwdCtx->destPortsSizeOut = 0;
+ ovsFwdCtx->srcVportNo = srcVportNo;
+ ovsFwdCtx->sendFlags = sendFlags;
+ if (layers) {
+ ovsFwdCtx->layers = *layers;
+ } else {
+ RtlZeroMemory(&ovsFwdCtx->layers, sizeof ovsFwdCtx->layers);
+ }
+ if (resetTunnelInfo) {
+ ovsFwdCtx->tunnelTxNic = NULL;
+ ovsFwdCtx->tunnelRxNic = NULL;
+ RtlZeroMemory(&ovsFwdCtx->tunKey, sizeof ovsFwdCtx->tunKey);
+ }
+
+ return NDIS_STATUS_SUCCESS;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsDetectTunnelRxPkt --
+ * Utility function for an RX packet to detect its tunnel type.
+ *
+ * Result:
+ * True - if the tunnel type was detected.
+ * False - if not a tunnel packet or tunnel type not supported.
+ * --------------------------------------------------------------------------
+ */
+static __inline BOOLEAN
+OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx,
+ const OvsFlowKey *flowKey)
+{
+ POVS_VPORT_ENTRY tunnelVport = NULL;
+
+ /* XXX: we should also check for the length of the UDP payload to pick
+ * packets only if they are at least VXLAN header size.
+ */
+ if (!flowKey->ipKey.nwFrag &&
+ flowKey->ipKey.nwProto == IPPROTO_UDP &&
+ flowKey->ipKey.l4.tpDst == VXLAN_UDP_PORT_NBO) {
+ tunnelVport = OvsGetTunnelVport(OVSWIN_VPORT_TYPE_VXLAN);
+ ovsActionStats.rxVxlan++;
+ }
+
+ // We might get tunnel packets even before the tunnel gets initialized.
+ if (tunnelVport) {
+ ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
+ ovsFwdCtx->tunnelRxNic = tunnelVport;
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsDetectTunnelPkt --
+ * Utility function to detect the tunnel type of a TX/RX packet.
+ *
+ * Result:
+ * True - if the tunnel type was detected.
+ * False - if not a tunnel packet or tunnel type not supported.
+ *
+ * if result==True, the forwarding context gets initialized with the
+ * right tunnel vport.
+ * --------------------------------------------------------------------------
+ */
+static __inline BOOLEAN
+OvsDetectTunnelPkt(OvsForwardingContext *ovsFwdCtx,
+ const POVS_VPORT_ENTRY dstVport,
+ const OvsFlowKey *flowKey)
+{
+ /*
+ * The source of NBL during tunneling Rx could be the external port or if
+ * it being executed from userspace, the source port is default port.
+ */
+
+ if (OvsIsInternalVportType(dstVport->ovsType)) {
+ BOOLEAN validSrcPort = (ovsFwdCtx->fwdDetail->SourcePortId ==
+ ovsFwdCtx->switchContext->externalPortId)
+ || (ovsFwdCtx->fwdDetail->SourcePortId ==
+ NDIS_SWITCH_DEFAULT_PORT_ID);
+
+ if (validSrcPort && OvsDetectTunnelRxPkt(ovsFwdCtx, flowKey)) {
+ ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
+ ASSERT(ovsFwdCtx->tunnelRxNic != NULL);
+ return TRUE;
+ }
+ } else if (OvsIsTunnelVportType(dstVport->ovsType)) {
+ ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
+ ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
+ ASSERT(ovsFwdCtx->tunKey.dst != 0);
+ ovsActionStats.txVxlan++;
+ ovsFwdCtx->tunnelTxNic = dstVport;
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsAddPorts --
+ * Add the specified destination vport into the forwarding context. If the
+ * vport is a VIF/external port, it is added directly to the NBL. If it is
+ * a tunneling port, it is NOT added to the NBL.
+ *
+ * Result:
+ * NDIS_STATUS_SUCCESS on success
+ * Other NDIS_STATUS upon failure.
+ * --------------------------------------------------------------------------
+ */
+static __inline NDIS_STATUS
+OvsAddPorts(OvsForwardingContext *ovsFwdCtx,
+ OvsFlowKey *flowKey,
+ NDIS_SWITCH_PORT_ID dstPortId,
+ BOOLEAN preserveVLAN,
+ BOOLEAN preservePriority)
+{
+ POVS_VPORT_ENTRY vport;
+ PNDIS_SWITCH_PORT_DESTINATION fwdPort;
+ NDIS_STATUS status;
+ POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext;
+
+ /*
+ * We hold the dispatch lock that protects the list of vports, so vports
+ * validated here can be added as destinations safely before we call into
+ * NDIS.
+ *
+ * Some of the vports can be tunnelled ports as well in which case
+ * they should be added to a separate list of tunnelled destination ports
+ * instead of the VIF ports. The context for the tunnel is settable
+ * in OvsForwardingContext.
+ */
+ vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext, dstPortId);
+ if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
+ /*
+ * There may be some latency between a port disappearing, and userspace
+ * updating the recalculated flows. In the meantime, handle invalid
+ * ports gracefully.
+ */
+ ovsActionStats.noVport++;
+ return NDIS_STATUS_SUCCESS;
+ }
+ ASSERT(vport->nicState == NdisSwitchNicStateConnected);
+ vport->stats.txPackets++;
+ vport->stats.txBytes +=
+ NET_BUFFER_DATA_LENGTH(NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl));
+
+ if (OvsDetectTunnelPkt(ovsFwdCtx, vport, flowKey)) {
+ ASSERT(ovsFwdCtx->tunnelTxNic || ovsFwdCtx->tunnelRxNic);
+ return NDIS_STATUS_SUCCESS;
+ }
+
+ if (ovsFwdCtx->destPortsSizeOut == ovsFwdCtx->destPortsSizeIn) {
+ if (ovsFwdCtx->destPortsSizeIn == 0) {
+ ASSERT(ovsFwdCtx->destinationPorts == NULL);
+ ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0);
+ status =
+ switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations(
+ switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
+ OVS_DEST_PORTS_ARRAY_MIN_SIZE,
+ &ovsFwdCtx->destinationPorts);
+ if (status != NDIS_STATUS_SUCCESS) {
+ ovsActionStats.cannotGrowDest++;
+ return status;
+ }
+ ovsFwdCtx->destPortsSizeIn =
+ ovsFwdCtx->fwdDetail->NumAvailableDestinations;
+ ASSERT(ovsFwdCtx->destinationPorts);
+ } else {
+ ASSERT(ovsFwdCtx->destinationPorts != NULL);
+ /*
+ * NumElements:
+ * A ULONG value that specifies the total number of
+ * NDIS_SWITCH_PORT_DESTINATION elements in the
+ * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure.
+ *
+ * NumDestinations:
+ * A ULONG value that specifies the number of
+ * NDIS_SWITCH_PORT_DESTINATION elements in the
+ * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure that
+ * specify port destinations.
+ *
+ * NumAvailableDestinations:
+ * A value that specifies the number of unused extensible switch
+ * destination ports elements within an NET_BUFFER_LIST structure.
+ */
+ ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
+ ovsFwdCtx->destPortsSizeIn);
+ ASSERT(ovsFwdCtx->destinationPorts->NumDestinations ==
+ ovsFwdCtx->destPortsSizeOut -
+ ovsFwdCtx->fwdDetail->NumAvailableDestinations);
+ ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations > 0);
+ /*
+ * Before we grow the array of destination ports, the current set
+ * of ports needs to be committed. Only the ports added since the
+ * last commit need to be part of the new update.
+ */
+ status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations(
+ switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
+ ovsFwdCtx->fwdDetail->NumAvailableDestinations,
+ ovsFwdCtx->destinationPorts);
+ if (status != NDIS_STATUS_SUCCESS) {
+ ovsActionStats.cannotGrowDest++;
+ return status;
+ }
+ ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
+ ovsFwdCtx->destPortsSizeIn);
+ ASSERT(ovsFwdCtx->destinationPorts->NumDestinations ==
+ ovsFwdCtx->destPortsSizeOut);
+ ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0);
+
+ status = switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations(
+ switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
+ ovsFwdCtx->destPortsSizeIn, &ovsFwdCtx->destinationPorts);
+ if (status != NDIS_STATUS_SUCCESS) {
+ ovsActionStats.cannotGrowDest++;
+ return status;
+ }
+ ASSERT(ovsFwdCtx->destinationPorts != NULL);
+ ovsFwdCtx->destPortsSizeIn <<= 1;
+ }
+ }
+
+ ASSERT(ovsFwdCtx->destPortsSizeOut < ovsFwdCtx->destPortsSizeIn);
+ fwdPort =
+ NDIS_SWITCH_PORT_DESTINATION_AT_ARRAY_INDEX(ovsFwdCtx->destinationPorts,
+ ovsFwdCtx->destPortsSizeOut);
+
+ fwdPort->PortId = vport->portId;
+ fwdPort->NicIndex = vport->nicIndex;
+ fwdPort->IsExcluded = 0;
+ fwdPort->PreserveVLAN = preserveVLAN;
+ fwdPort->PreservePriority = preservePriority;
+ ovsFwdCtx->destPortsSizeOut += 1;
+
+ return NDIS_STATUS_SUCCESS;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsClearTunTxCtx --
+ * Utility function to clear tx tunneling context.
+ * --------------------------------------------------------------------------
+ */
+static __inline VOID
+OvsClearTunTxCtx(OvsForwardingContext *ovsFwdCtx)
+{
+ ovsFwdCtx->tunnelTxNic = NULL;
+ ovsFwdCtx->tunKey.dst = 0;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsClearTunRxCtx --
+ * Utility function to clear rx tunneling context.
+ * --------------------------------------------------------------------------
+ */
+static __inline VOID
+OvsClearTunRxCtx(OvsForwardingContext *ovsFwdCtx)
+{
+ ovsFwdCtx->tunnelRxNic = NULL;
+ ovsFwdCtx->tunKey.dst = 0;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsCompleteNBLForwardingCtx --
+ * This utility function is responsible for freeing/completing an NBL - either
+ * by adding it to a completion list or by freeing it.
+ *
+ * Side effects:
+ * It also resets the necessary fields in 'ovsFwdCtx'.
+ * --------------------------------------------------------------------------
+ */
+static __inline VOID
+OvsCompleteNBLForwardingCtx(OvsForwardingContext *ovsFwdCtx,
+ PCWSTR dropReason)
+{
+ NDIS_STRING filterReason;
+
+ RtlInitUnicodeString(&filterReason, dropReason);
+ if (ovsFwdCtx->completionList) {
+ OvsAddPktCompletionList(ovsFwdCtx->completionList, TRUE,
+ ovsFwdCtx->fwdDetail->SourcePortId, ovsFwdCtx->curNbl, 1,
+ &filterReason);
+ ovsFwdCtx->curNbl = NULL;
+ } else {
+ /* If there is no completionList, we assume this is ovs created NBL */
+ ovsFwdCtx->curNbl = OvsCompleteNBL(ovsFwdCtx->switchContext,
+ ovsFwdCtx->curNbl, TRUE);
+ ASSERT(ovsFwdCtx->curNbl == NULL);
+ }
+ /* XXX: these can be made debug only to save cycles. Ideally the pipeline
+ * using these fields should reset the values at the end of the pipeline. */
+ ovsFwdCtx->destPortsSizeOut = 0;
+ ovsFwdCtx->tunnelTxNic = NULL;
+ ovsFwdCtx->tunnelRxNic = NULL;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsDoFlowLookupOutput --
+ * Function to be used for the second stage of a tunneling workflow, ie.:
+ * - On the encapsulated packet on Tx path, to do a flow extract, flow
+ * lookup and excuting the actions.
+ * - On the decapsulated packet on Rx path, to do a flow extract, flow
+ * lookup and excuting the actions.
+ *
+ * XXX: It is assumed that the NBL in 'ovsFwdCtx' is owned by OVS. This is
+ * until the new buffer management framework is adopted.
+ *
+ * Side effects:
+ * The NBL in 'ovsFwdCtx' is consumed.
+ * --------------------------------------------------------------------------
+ */
+static __inline NDIS_STATUS
+OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)
+{
+ OvsFlowKey key;
+ OvsFlow *flow;
+ UINT64 hash;
+ NDIS_STATUS status;
+ POVS_VPORT_ENTRY vport =
+ OvsFindVportByPortNo(ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo);
+ if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
+ ASSERT(FALSE); // XXX: let's catch this for now
+ OvsCompleteNBLForwardingCtx(ovsFwdCtx,
+ L"OVS-Dropped due to internal/tunnel port removal");
+ ovsActionStats.noVport++;
+ return NDIS_STATUS_SUCCESS;
+ }
+ ASSERT(vport->nicState == NdisSwitchNicStateConnected);
+
+ /* Assert that in the Rx direction, key is always setup. */
+ ASSERT(ovsFwdCtx->tunnelRxNic == NULL || ovsFwdCtx->tunKey.dst != 0);
+ status = OvsExtractFlow(ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo,
+ &key, &ovsFwdCtx->layers, ovsFwdCtx->tunKey.dst != 0 ?
+ &ovsFwdCtx->tunKey : NULL);
+ if (status != NDIS_STATUS_SUCCESS) {
+ OvsCompleteNBLForwardingCtx(ovsFwdCtx,
+ L"OVS-Flow extract failed");
+ ovsActionStats.failedFlowExtract++;
+ return status;
+ }
+
+ flow = OvsLookupFlow(&ovsFwdCtx->switchContext->datapath, &key, &hash, FALSE);
+ if (flow) {
+ OvsFlowUsed(flow, ovsFwdCtx->curNbl, &ovsFwdCtx->layers);
+ ovsFwdCtx->switchContext->datapath.hits++;
+ status = OvsActionsExecute(ovsFwdCtx->switchContext,
+ ovsFwdCtx->completionList, ovsFwdCtx->curNbl,
+ ovsFwdCtx->srcVportNo, ovsFwdCtx->sendFlags,
+ &key, &hash, &ovsFwdCtx->layers,
+ flow->actions, flow->actionsLen);
+ ovsFwdCtx->curNbl = NULL;
+ } else {
+ LIST_ENTRY missedPackets;
+ UINT32 num = 0;
+ ovsFwdCtx->switchContext->datapath.misses++;
+ InitializeListHead(&missedPackets);
+ status = OvsCreateAndAddPackets(
+ OVS_DEFAULT_PACKET_QUEUE, NULL, 0, OVS_PACKET_CMD_MISS,
+ ovsFwdCtx->srcVportNo,
+ key.tunKey.dst != 0 ?
+ (OvsIPv4TunnelKey *)&key.tunKey : NULL,
+ ovsFwdCtx->curNbl,
+ ovsFwdCtx->tunnelRxNic != NULL, &ovsFwdCtx->layers,
+ ovsFwdCtx->switchContext, &missedPackets, &num);
+ if (num) {
+ OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, num);
+ }
+ if (status == NDIS_STATUS_SUCCESS) {
+ /* Complete the packet since it was copied to user buffer. */
+ OvsCompleteNBLForwardingCtx(ovsFwdCtx,
+ L"OVS-Dropped since packet was copied to userspace");
+ ovsActionStats.flowMiss++;
+ status = NDIS_STATUS_SUCCESS;
+ } else {
+ OvsCompleteNBLForwardingCtx(ovsFwdCtx,
+ L"OVS-Dropped due to failure to queue to userspace");
+ status = NDIS_STATUS_FAILURE;
+ ovsActionStats.failedFlowMiss++;
+ }
+ }
+
+ return status;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsTunnelPortTx --
+ * The start function for Tx tunneling - encapsulates the packet, and
+ * outputs the packet on the PIF bridge.
+ *
+ * Side effects:
+ * The NBL in 'ovsFwdCtx' is consumed.
+ * --------------------------------------------------------------------------
+ */
+static __inline NDIS_STATUS
+OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx)
+{
+ NDIS_STATUS status = NDIS_STATUS_FAILURE;
+ PNET_BUFFER_LIST newNbl = NULL;
+
+ /*
+ * Setup the source port to be the internal port to as to facilitate the
+ * second OvsLookupFlow.
+ */
+ ASSERT(ovsFwdCtx->switchContext->internalVport);
+ ovsFwdCtx->srcVportNo =
+ ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->portNo;
+
+ ovsFwdCtx->fwdDetail->SourcePortId = ovsFwdCtx->switchContext->internalPortId;
+ ovsFwdCtx->fwdDetail->SourceNicIndex =
+ ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->nicIndex;
+
+ /* Do the encap. Encap function does not consume the NBL. */
+ switch(ovsFwdCtx->tunnelTxNic->ovsType) {
+ case OVSWIN_VPORT_TYPE_VXLAN:
+ status = OvsEncapVxlan(ovsFwdCtx->curNbl, &ovsFwdCtx->tunKey,
+ ovsFwdCtx->switchContext,
+ (VOID *)ovsFwdCtx->completionList,
+ &ovsFwdCtx->layers, &newNbl);
+ break;
+ default:
+ ASSERT(! "Tx: Unhandled tunnel type");
+ }
+
+ /* Reset the tunnel context so that it doesn't get used after this point. */
+ OvsClearTunTxCtx(ovsFwdCtx);
+
+ if (status == NDIS_STATUS_SUCCESS) {
+ ASSERT(newNbl);
+ OvsCompleteNBLForwardingCtx(ovsFwdCtx,
+ L"Complete after cloning NBL for encapsulation");
+ ovsFwdCtx->curNbl = newNbl;
+ status = OvsDoFlowLookupOutput(ovsFwdCtx);
+ ASSERT(ovsFwdCtx->curNbl == NULL);
+ } else {
+ /*
+ * XXX: Temporary freeing of the packet until we register a
+ * callback to IP helper.
+ */
+ OvsCompleteNBLForwardingCtx(ovsFwdCtx,
+ L"OVS-Dropped due to encap failure");
+ ovsActionStats.failedEncap++;
+ status = NDIS_STATUS_SUCCESS;
+ }
+
+ return status;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsTunnelPortRx --
+ * Decapsulate the incoming NBL based on the tunnel type and goes through
+ * the flow lookup for the inner packet.
+ *
+ * Note: IP checksum is validate here, but L4 checksum validation needs
+ * to be done by the corresponding tunnel types.
+ *
+ * Side effects:
+ * The NBL in 'ovsFwdCtx' is consumed.
+ * --------------------------------------------------------------------------
+ */
+static __inline NDIS_STATUS
+OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
+{
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+ PNET_BUFFER_LIST newNbl = NULL;
+ POVS_VPORT_ENTRY tunnelRxVport = ovsFwdCtx->tunnelRxNic;
+
+ if (OvsValidateIPChecksum(ovsFwdCtx->curNbl, &ovsFwdCtx->layers)
+ != NDIS_STATUS_SUCCESS) {
+ ovsActionStats.failedChecksum++;
+ OVS_LOG_INFO("Packet dropped due to IP checksum failure.");
+ goto dropNbl;
+ }
+
+ switch(tunnelRxVport->ovsType) {
+ case OVSWIN_VPORT_TYPE_VXLAN:
+ /*
+ * OvsDoDecapVxlan should return a new NBL if it was copied, and
+ * this new NBL should be setup as the ovsFwdCtx->curNbl.
+ */
+ status = OvsDoDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
+ &ovsFwdCtx->tunKey, &newNbl);
+ break;
+ default:
+ OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n",
+ tunnelRxVport->ovsType);
+ ASSERT(! "Rx: Unhandled tunnel type");
+ status = NDIS_STATUS_NOT_SUPPORTED;
+ }
+
+ if (status != NDIS_STATUS_SUCCESS) {
+ ovsActionStats.failedDecap++;
+ goto dropNbl;
+ }
+
+ /*
+ * tunnelRxNic and other fields will be cleared, re-init the context
+ * before usage.
+ */
+ OvsCompleteNBLForwardingCtx(ovsFwdCtx,
+ L"OVS-dropped due to new decap packet");
+
+ /* Decapsulated packet is in a new NBL */
+ ovsFwdCtx->tunnelRxNic = tunnelRxVport;
+ OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
+ newNbl, tunnelRxVport->portNo, 0,
+ NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
+ ovsFwdCtx->completionList,
+ &ovsFwdCtx->layers, FALSE);
+
+ /*
+ * Set the NBL's SourcePortId and SourceNicIndex to default values to
+ * keep NDIS happy when we forward the packet.
+ */
+ ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
+ ovsFwdCtx->fwdDetail->SourceNicIndex = 0;
+
+ status = OvsDoFlowLookupOutput(ovsFwdCtx);
+ ASSERT(ovsFwdCtx->curNbl == NULL);
+ OvsClearTunRxCtx(ovsFwdCtx);
+
+ return status;
+
+dropNbl:
+ OvsCompleteNBLForwardingCtx(ovsFwdCtx,
+ L"OVS-dropped due to decap failure");
+ OvsClearTunRxCtx(ovsFwdCtx);
+ return status;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsOutputForwardingCtx --
+ * This function outputs an NBL to NDIS or to a tunneling pipeline based on
+ * the ports added so far into 'ovsFwdCtx'.
+ *
+ * Side effects:
+ * This function consumes the NBL - either by forwarding it successfully to
+ * NDIS, or adding it to the completion list in 'ovsFwdCtx', or freeing it.
+ *
+ * Also makes sure that the list of destination ports - tunnel or otherwise is
+ * drained.
+ * --------------------------------------------------------------------------
+ */
+static __inline NDIS_STATUS
+OvsOutputForwardingCtx(OvsForwardingContext *ovsFwdCtx)
+{
+ NDIS_STATUS status = STATUS_SUCCESS;
+ POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext;
+
+ /*
+ * Handle the case where the some of the destination ports are tunneled
+ * ports - the non-tunneled ports get a unmodified copy of the NBL, and the
+ * tunneling pipeline starts when we output the packet to tunneled port.
+ */
+ if (ovsFwdCtx->destPortsSizeOut > 0) {
+ PNET_BUFFER_LIST newNbl = NULL;
+ PNET_BUFFER nb;
+ UINT32 portsToUpdate =
+ ovsFwdCtx->fwdDetail->NumAvailableDestinations -
+ (ovsFwdCtx->destPortsSizeIn - ovsFwdCtx->destPortsSizeOut);
+
+ ASSERT(ovsFwdCtx->destinationPorts != NULL);
+
+ /*
+ * Create a copy of the packet in order to do encap on it later. Also,
+ * don't copy the offload context since the encap'd packet has a
+ * different set of headers. This will change when we implement offloads
+ * before doing encapsulation.
+ */
+ if (ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL) {
+ nb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
+ newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
+ 0, 0, TRUE /*copy NBL info*/);
+ if (newNbl == NULL) {
+ status = NDIS_STATUS_RESOURCES;
+ ovsActionStats.noCopiedNbl++;
+ goto dropit;
+ }
+ }
+
+ /* It does not seem like we'll get here unless 'portsToUpdate' > 0. */
+ ASSERT(portsToUpdate > 0);
+ status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations(
+ switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
+ portsToUpdate, ovsFwdCtx->destinationPorts);
+ if (status != NDIS_STATUS_SUCCESS) {
+ OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
+ ovsActionStats.cannotGrowDest++;
+ goto dropit;
+ }
+
+ OvsSendNBLIngress(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
+ ovsFwdCtx->sendFlags);
+ /* End this pipeline by resetting the corresponding context. */
+ ovsFwdCtx->destPortsSizeOut = 0;
+ ovsFwdCtx->curNbl = NULL;
+ if (newNbl) {
+ status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
+ newNbl, ovsFwdCtx->srcVportNo, 0,
+ NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
+ ovsFwdCtx->completionList,
+ &ovsFwdCtx->layers, FALSE);
+ if (status != NDIS_STATUS_SUCCESS) {
+ OvsCompleteNBLForwardingCtx(ovsFwdCtx,
+ L"Dropped due to resouces");
+ goto dropit;
+ }
+ }
+ }
+
+ if (ovsFwdCtx->tunnelTxNic != NULL) {
+ status = OvsTunnelPortTx(ovsFwdCtx);
+ ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
+ ASSERT(ovsFwdCtx->tunKey.dst == 0);
+ } else if (ovsFwdCtx->tunnelRxNic != NULL) {
+ status = OvsTunnelPortRx(ovsFwdCtx);
+ ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
+ ASSERT(ovsFwdCtx->tunKey.dst == 0);
+ }
+ ASSERT(ovsFwdCtx->curNbl == NULL);
+
+ return status;
+
+dropit:
+ if (status != NDIS_STATUS_SUCCESS) {
+ OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"Dropped due to XXX");
+ }
+
+ return status;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsLookupFlowOutput --
+ * Utility function for external callers to do flow extract, lookup,
+ * actions execute on a given NBL.
+ *
+ * Note: If this is being used from a callback function, make sure that the
+ * arguments specified are still valid in the asynchronous context.
+ *
+ * Side effects:
+ * This function consumes the NBL.
+ * --------------------------------------------------------------------------
+ */
+VOID
+OvsLookupFlowOutput(POVS_SWITCH_CONTEXT switchContext,
+ VOID *compList,
+ PNET_BUFFER_LIST curNbl)
+{
+ NDIS_STATUS status;
+ OvsForwardingContext ovsFwdCtx;
+ POVS_VPORT_ENTRY internalVport =
+ (POVS_VPORT_ENTRY)switchContext->internalVport;
+
+ /* XXX: make sure comp list was not a stack variable previously. */
+ OvsCompletionList *completionList = (OvsCompletionList *)compList;
+
+ /*
+ * XXX: can internal port disappear while we are busy doing ARP resolution?
+ * It could, but will we get this callback from IP helper in that case. Need
+ * to check.
+ */
+ ASSERT(switchContext->internalVport);
+ status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl,
+ internalVport->portNo, 0,
+ NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl),
+ completionList, NULL, TRUE);
+ if (status != NDIS_STATUS_SUCCESS) {
+ OvsCompleteNBLForwardingCtx(&ovsFwdCtx,
+ L"OVS-Dropped due to resources");
+ return;
+ }
+
+ ASSERT(FALSE);
+ /*
+ * XXX: We need to acquire the dispatch lock and the datapath lock.
+ */
+
+ OvsDoFlowLookupOutput(&ovsFwdCtx);
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsOutputBeforeSetAction --
+ * Function to be called to complete one set of actions on an NBL, before
+ * we start the next one.
+ * --------------------------------------------------------------------------
+ */
+static __inline NDIS_STATUS
+OvsOutputBeforeSetAction(OvsForwardingContext *ovsFwdCtx)
+{
+ PNET_BUFFER_LIST newNbl;
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+ PNET_BUFFER nb;
+
+ /*
+ * Create a copy and work on the copy after this point. The original NBL is
+ * forwarded. One reason to not use the copy for forwarding is that
+ * ports have already been added to the original NBL, and it might be
+ * inefficient/impossible to remove/re-add them to the copy. There's no
+ * notion of removing the ports, the ports need to be marked as
+ * "isExcluded". There's seems no real advantage to retaining the original
+ * and sending out the copy instead.
+ *
+ * XXX: We are copying the offload context here. This is to handle actions
+ * such as:
+ * outport, pop_vlan(), outport, push_vlan(), outport
+ *
+ * copy size needs to include inner ether + IP + TCP, need to revisit
+ * if we support IP options.
+ * XXX Head room needs to include the additional encap.
+ * XXX copySize check is not considering multiple NBs.
+ */
+ nb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
+ newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
+ 0, 0, TRUE /*copy NBL info*/);
+
+ ASSERT(ovsFwdCtx->destPortsSizeOut > 0 ||
+ ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL);
+
+ /* Send the original packet out */
+ status = OvsOutputForwardingCtx(ovsFwdCtx);
+ ASSERT(ovsFwdCtx->curNbl == NULL);
+ ASSERT(ovsFwdCtx->destPortsSizeOut == 0);
+ ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
+ ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
+
+ /* If we didn't make a copy, can't continue. */
+ if (newNbl == NULL) {
+ ovsActionStats.noCopiedNbl++;
+ return NDIS_STATUS_RESOURCES;
+ }
+
+ /* Finish the remaining actions with the new NBL */
+ if (status != NDIS_STATUS_SUCCESS) {
+ OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
+ } else {
+ status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
+ newNbl, ovsFwdCtx->srcVportNo, 0,
+ NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
+ ovsFwdCtx->completionList,
+ &ovsFwdCtx->layers, FALSE);
+ }
+
+ return status;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsPopVlanInPktBuf --
+ * Function to pop a VLAN tag when the tag is in the packet buffer.
+ * --------------------------------------------------------------------------
+ */
+static __inline NDIS_STATUS
+OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx)
+{
+ PNET_BUFFER curNb;
+ PMDL curMdl;
+ PUINT8 bufferStart;
+ ULONG dataLength = sizeof (DL_EUI48) + sizeof (DL_EUI48);
+ UINT32 packetLen, mdlLen;
+ PNET_BUFFER_LIST newNbl;
+ NDIS_STATUS status;
+
+ /*
+ * Declare a dummy vlanTag structure since we need to compute the size
+ * of shiftLength. The NDIS one is a unionized structure.
+ */
+ NDIS_PACKET_8021Q_INFO vlanTag = {0};
+ ULONG shiftLength = sizeof (vlanTag.TagHeader);
+ PUINT8 tempBuffer[sizeof (DL_EUI48) + sizeof (DL_EUI48)];
+
+ newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
+ 0, 0, TRUE /* copy NBL info */);
+ if (!newNbl) {
+ ovsActionStats.noCopiedNbl++;
+ return NDIS_STATUS_RESOURCES;
+ }
+
+ /* Complete the original NBL and create a copy to modify. */
+ OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"OVS-Dropped due to copy");
+
+ status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
+ newNbl, ovsFwdCtx->srcVportNo, 0,
+ NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
+ NULL, &ovsFwdCtx->layers, FALSE);
+ if (status != NDIS_STATUS_SUCCESS) {
+ OvsCompleteNBLForwardingCtx(ovsFwdCtx,
+ L"Dropped due to resouces");
+ return NDIS_STATUS_RESOURCES;
+ }
+
+ curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
+ packetLen = NET_BUFFER_DATA_LENGTH(curNb);
+ ASSERT(curNb->Next == NULL);
+ curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+ NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
+ if (!bufferStart) {
+ return NDIS_STATUS_RESOURCES;
+ }
+ mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+ /* Bail out if L2 + VLAN header is not contiguous in the first buffer. */
+ if (MIN(packetLen, mdlLen) < sizeof (EthHdr) + shiftLength) {
+ ASSERT(FALSE);
+ return NDIS_STATUS_FAILURE;
+ }
+ bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+ RtlCopyMemory(tempBuffer, bufferStart, dataLength);
+ RtlCopyMemory(bufferStart + shiftLength, tempBuffer, dataLength);
+ NdisAdvanceNetBufferDataStart(curNb, shiftLength, FALSE, NULL);
+
+ return NDIS_STATUS_SUCCESS;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsTunnelAttrToIPv4TunnelKey --
+ * Convert tunnel attribute to OvsIPv4TunnelKey.
+ * --------------------------------------------------------------------------
+ */
+static __inline NDIS_STATUS
+OvsTunnelAttrToIPv4TunnelKey(struct nlattr *attr,
+ OvsIPv4TunnelKey *tunKey)
+{
+ struct nlattr *a;
+ INT rem;
+
+ tunKey->attr[0] = 0;
+ tunKey->attr[1] = 0;
+ tunKey->attr[2] = 0;
+ ASSERT(nl_attr_type(attr) == OVS_KEY_ATTR_TUNNEL);
+
+ NL_ATTR_FOR_EACH_UNSAFE (a, rem, nl_attr_data(attr),
+ nl_attr_get_size(attr)) {
+ switch (nl_attr_type(a)) {
+ case OVS_TUNNEL_KEY_ATTR_ID:
+ tunKey->tunnelId = nl_attr_get_be64(a);
+ tunKey->flags |= OVS_TNL_F_KEY;
+ break;
+ case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
+ tunKey->src = nl_attr_get_be32(a);
+ break;
+ case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
+ tunKey->dst = nl_attr_get_be32(a);
+ break;
+ case OVS_TUNNEL_KEY_ATTR_TOS:
+ tunKey->tos = nl_attr_get_u8(a);
+ break;
+ case OVS_TUNNEL_KEY_ATTR_TTL:
+ tunKey->ttl = nl_attr_get_u8(a);
+ break;
+ case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
+ tunKey->flags |= OVS_TNL_F_DONT_FRAGMENT;
+ break;
+ case OVS_TUNNEL_KEY_ATTR_CSUM:
+ tunKey->flags |= OVS_TNL_F_CSUM;
+ break;
+ default:
+ ASSERT(0);
+ }
+ }
+
+ return NDIS_STATUS_SUCCESS;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsUpdateEthHeader --
+ * Updates the ethernet header in ovsFwdCtx.curNbl inline based on the
+ * specified key.
+ *----------------------------------------------------------------------------
+ */
+static __inline NDIS_STATUS
+OvsUpdateEthHeader(OvsForwardingContext *ovsFwdCtx,
+ const struct ovs_key_ethernet *ethAttr)
+{
+ PNET_BUFFER curNb;
+ PMDL curMdl;
+ PUINT8 bufferStart;
+ EthHdr *ethHdr;
+ UINT32 packetLen, mdlLen;
+
+ curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
+ ASSERT(curNb->Next == NULL);
+ packetLen = NET_BUFFER_DATA_LENGTH(curNb);
+ curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+ NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
+ if (!bufferStart) {
+ ovsActionStats.noResource++;
+ return NDIS_STATUS_RESOURCES;
+ }
+ mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+ ASSERT(mdlLen > 0);
+ /* Bail out if the L2 header is not in a contiguous buffer. */
+ if (MIN(packetLen, mdlLen) < sizeof *ethHdr) {
+ ASSERT(FALSE);
+ return NDIS_STATUS_FAILURE;
+ }
+ ethHdr = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(curNb));
+
+ RtlCopyMemory(ethHdr->Destination, ethAttr->eth_dst,
+ sizeof ethHdr->Destination);
+ RtlCopyMemory(ethHdr->Source, ethAttr->eth_src, sizeof ethHdr->Source);
+
+ return NDIS_STATUS_SUCCESS;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsUpdateIPv4Header --
+ * Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the
+ * specified key.
+ *----------------------------------------------------------------------------
+ */
+static __inline NDIS_STATUS
+OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
+ const struct ovs_key_ipv4 *ipAttr)
+{
+ PNET_BUFFER curNb;
+ PMDL curMdl;
+ ULONG curMdlOffset;
+ PUINT8 bufferStart;
+ UINT32 mdlLen, hdrSize, packetLen;
+ OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers;
+ NDIS_STATUS status;
+ IPHdr *ipHdr;
+ TCPHdr *tcpHdr = NULL;
+ UDPHdr *udpHdr = NULL;
+
+ ASSERT(layers->value != 0);
+
+ /*
+ * Peek into the MDL to get a handle to the IP header and if required
+ * the TCP/UDP header as well. We check if the required headers are in one
+ * contiguous MDL, and if not, we copy them over to one MDL.
+ */
+ curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
+ ASSERT(curNb->Next == NULL);
+ packetLen = NET_BUFFER_DATA_LENGTH(curNb);
+ curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+ NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
+ if (!bufferStart) {
+ ovsActionStats.noResource++;
+ return NDIS_STATUS_RESOURCES;
+ }
+ curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+ mdlLen -= curMdlOffset;
+ ASSERT((INT)mdlLen >= 0);
+
+ if (layers->isTcp || layers->isUdp) {
+ hdrSize = layers->l4Offset +
+ layers->isTcp ? sizeof (*tcpHdr) : sizeof (*udpHdr);
+ } else {
+ hdrSize = layers->l3Offset + sizeof (*ipHdr);
+ }
+
+ /* Count of number of bytes of valid data there are in the first MDL. */
+ mdlLen = MIN(packetLen, mdlLen);
+ if (mdlLen < hdrSize) {
+ PNET_BUFFER_LIST newNbl;
+ newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
+ hdrSize, 0, TRUE /*copy NBL info*/);
+ if (!newNbl) {
+ ovsActionStats.noCopiedNbl++;
+ return NDIS_STATUS_RESOURCES;
+ }
+ OvsCompleteNBLForwardingCtx(ovsFwdCtx,
+ L"Complete after partial copy.");
+
+ status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
+ newNbl, ovsFwdCtx->srcVportNo, 0,
+ NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
+ NULL, &ovsFwdCtx->layers, FALSE);
+ if (status != NDIS_STATUS_SUCCESS) {
+ OvsCompleteNBLForwardingCtx(ovsFwdCtx,
+ L"OVS-Dropped due to resources");
+ return NDIS_STATUS_RESOURCES;
+ }
+
+ curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
+ ASSERT(curNb->Next == NULL);
+ curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+ NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
+ if (!curMdl) {
+ ovsActionStats.noResource++;
+ return NDIS_STATUS_RESOURCES;
+ }
+ curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+ mdlLen -= curMdlOffset;
+ ASSERT(mdlLen >= hdrSize);
+ }
+
+ ipHdr = (IPHdr *)(bufferStart + curMdlOffset + layers->l3Offset);
+
+ if (layers->isTcp) {
+ tcpHdr = (TCPHdr *)(bufferStart + curMdlOffset + layers->l4Offset);
+ } else if (layers->isUdp) {
+ udpHdr = (UDPHdr *)(bufferStart + curMdlOffset + layers->l4Offset);
+ }
+
+ /*
+ * Adjust the IP header inline as dictated by the action, nad also update
+ * the IP and the TCP checksum for the data modified.
+ *
+ * In the future, this could be optimized to make one call to
+ * ChecksumUpdate32(). Ignoring this for now, since for the most common
+ * case, we only update the TTL.
+ */
+ if (ipHdr->saddr != ipAttr->ipv4_src) {
+ if (tcpHdr) {
+ tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->saddr,
+ ipAttr->ipv4_src);
+ } else if (udpHdr && udpHdr->check) {
+ udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->saddr,
+ ipAttr->ipv4_src);
+ }
+
+ if (ipHdr->check != 0) {
+ ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->saddr,
+ ipAttr->ipv4_src);
+ }
+ ipHdr->saddr = ipAttr->ipv4_src;
+ }
+ if (ipHdr->daddr != ipAttr->ipv4_dst) {
+ if (tcpHdr) {
+ tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->daddr,
+ ipAttr->ipv4_dst);
+ } else if (udpHdr && udpHdr->check) {
+ udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->daddr,
+ ipAttr->ipv4_dst);
+ }
+
+ if (ipHdr->check != 0) {
+ ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->daddr,
+ ipAttr->ipv4_dst);
+ }
+ ipHdr->daddr = ipAttr->ipv4_dst;
+ }
+ if (ipHdr->protocol != ipAttr->ipv4_proto) {
+ UINT16 oldProto = (ipHdr->protocol << 16) & 0xff00;
+ UINT16 newProto = (ipAttr->ipv4_proto << 16) & 0xff00;
+ if (tcpHdr) {
+ tcpHdr->check = ChecksumUpdate16(tcpHdr->check, oldProto, newProto);
+ } else if (udpHdr && udpHdr->check) {
+ udpHdr->check = ChecksumUpdate16(udpHdr->check, oldProto, newProto);
+ }
+
+ if (ipHdr->check != 0) {
+ ipHdr->check = ChecksumUpdate16(ipHdr->check, oldProto, newProto);
+ }
+ ipHdr->protocol = ipAttr->ipv4_proto;
+ }
+ if (ipHdr->ttl != ipAttr->ipv4_ttl) {
+ UINT16 oldTtl = (ipHdr->ttl) & 0xff;
+ UINT16 newTtl = (ipAttr->ipv4_ttl) & 0xff;
+ if (ipHdr->check != 0) {
+ ipHdr->check = ChecksumUpdate16(ipHdr->check, oldTtl, newTtl);
+ }
+ ipHdr->ttl = ipAttr->ipv4_ttl;
+ }
+
+ return NDIS_STATUS_SUCCESS;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsExecuteSetAction --
+ * Executes a set() action, but storing the actions into 'ovsFwdCtx'
+ * --------------------------------------------------------------------------
+ */
+static __inline NDIS_STATUS
+OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx,
+ OvsFlowKey *key,
+ UINT64 *hash,
+ const struct nlattr *a)
+{
+ enum ovs_key_attr type = nl_attr_type(a);
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+
+ switch (type) {
+ case OVS_KEY_ATTR_ETHERNET:
+ status = OvsUpdateEthHeader(ovsFwdCtx,
+ nl_attr_get_unspec(a, sizeof(struct ovs_key_ethernet)));
+ break;
+
+ case OVS_KEY_ATTR_IPV4:
+ status = OvsUpdateIPv4Header(ovsFwdCtx,
+ nl_attr_get_unspec(a, sizeof(struct ovs_key_ipv4)));
+ break;
+
+ case OVS_KEY_ATTR_TUNNEL:
+ {
+ OvsIPv4TunnelKey tunKey;
+
+ status = OvsTunnelAttrToIPv4TunnelKey((struct nlattr *)a, &tunKey);
+ ASSERT(status == NDIS_STATUS_SUCCESS);
+ tunKey.flow_hash = (uint16)(hash ? *hash : OvsHashFlow(key));
+ RtlCopyMemory(&ovsFwdCtx->tunKey, &tunKey, sizeof ovsFwdCtx->tunKey);
+
+ break;
+ }
+ case OVS_KEY_ATTR_SKB_MARK:
+ /* XXX: Not relevant to Hyper-V. Return OK */
+ break;
+ case OVS_KEY_ATTR_UNSPEC:
+ case OVS_KEY_ATTR_ENCAP:
+ case OVS_KEY_ATTR_ETHERTYPE:
+ case OVS_KEY_ATTR_IN_PORT:
+ case OVS_KEY_ATTR_VLAN:
+ case OVS_KEY_ATTR_ICMP:
+ case OVS_KEY_ATTR_ICMPV6:
+ case OVS_KEY_ATTR_ARP:
+ case OVS_KEY_ATTR_ND:
+ case __OVS_KEY_ATTR_MAX:
+ default:
+ OVS_LOG_INFO("Unhandled attribute %#x", type);
+ ASSERT(FALSE);
+ }
+ return status;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsActionsExecute --
+ * Interpret and execute the specified 'actions' on the specifed packet
+ * 'curNbl'. The expectation is that if the packet needs to be dropped
+ * (completed) for some reason, it is added to 'completionList' so that the
+ * caller can complete the packet. If 'completionList' is NULL, the NBL is
+ * assumed to be generated by OVS and freed up. Otherwise, the function
+ * consumes the NBL by generating a NDIS send indication for the packet.
+ *
+ * There are one or more of "clone" NBLs that may get generated while
+ * executing the actions. Upon any failures, the "cloned" NBLs are freed up,
+ * and the caller does not have to worry about them.
+ *
+ * Success or failure is returned based on whether the specified actions
+ * were executed successfully on the packet or not.
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext,
+ OvsCompletionList *completionList,
+ PNET_BUFFER_LIST curNbl,
+ UINT32 portNo,
+ ULONG sendFlags,
+ OvsFlowKey *key,
+ UINT64 *hash,
+ OVS_PACKET_HDR_INFO *layers,
+ const struct nlattr *actions,
+ INT actionsLen)
+{
+ const struct nlattr *a;
+ INT rem;
+ UINT32 dstPortID;
+ OvsForwardingContext ovsFwdCtx;
+ PCWSTR dropReason = L"";
+ NDIS_STATUS status;
+ PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail =
+ NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl);
+
+ /* XXX: ASSERT that the flow table lock is held. */
+ status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, portNo,
+ sendFlags, fwdDetail, completionList,
+ layers, TRUE);
+ if (status != NDIS_STATUS_SUCCESS) {
+ dropReason = L"OVS-initing destination port list failed";
+ goto dropit;
+ }
+
+ if (actionsLen == 0) {
+ dropReason = L"OVS-Dropped due to Flow action";
+ ovsActionStats.zeroActionLen++;
+ goto dropit;
+ }
+
+ NL_ATTR_FOR_EACH_UNSAFE (a, rem, actions, actionsLen) {
+ switch(nl_attr_type(a)) {
+ case OVS_ACTION_ATTR_OUTPUT:
+ dstPortID = nl_attr_get_u32(a);
+ status = OvsAddPorts(&ovsFwdCtx, key, dstPortID,
+ TRUE, TRUE);
+ if (status != NDIS_STATUS_SUCCESS) {
+ dropReason = L"OVS-adding destination port failed";
+ goto dropit;
+ }
+ break;
+
+ case OVS_ACTION_ATTR_PUSH_VLAN:
+ {
+ struct ovs_action_push_vlan *vlan;
+ PVOID vlanTagValue;
+ PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag;
+
+ if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
+ || ovsFwdCtx.tunnelRxNic != NULL) {
+ status = OvsOutputBeforeSetAction(&ovsFwdCtx);
+ if (status != NDIS_STATUS_SUCCESS) {
+ dropReason = L"OVS-adding destination failed";
+ goto dropit;
+ }
+ }
+
+ vlanTagValue = NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
+ Ieee8021QNetBufferListInfo);
+ if (vlanTagValue != NULL) {
+ /*
+ * XXX: We don't support double VLAN tag offload. In such cases,
+ * we need to insert the existing one into the packet buffer,
+ * and add the new one as offload. This will take care of
+ * guest tag-in-tag case as well as OVS rules that specify
+ * tag-in-tag.
+ */
+ } else {
+ vlanTagValue = 0;
+ vlanTag = (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue;
+ vlan = (struct ovs_action_push_vlan *)nl_attr_get(a);
+ vlanTag->TagHeader.VlanId = ntohs(vlan->vlan_tci) & 0xfff;
+ vlanTag->TagHeader.UserPriority = ntohs(vlan->vlan_tci) >> 13;
+
+ NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
+ Ieee8021QNetBufferListInfo) = vlanTagValue;
+ }
+ break;
+ }
+
+ case OVS_ACTION_ATTR_POP_VLAN:
+ {
+ if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
+ || ovsFwdCtx.tunnelRxNic != NULL) {
+ status = OvsOutputBeforeSetAction(&ovsFwdCtx);
+ if (status != NDIS_STATUS_SUCCESS) {
+ dropReason = L"OVS-adding destination failed";
+ goto dropit;
+ }
+ }
+
+ if (NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
+ Ieee8021QNetBufferListInfo) != 0) {
+ NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
+ Ieee8021QNetBufferListInfo) = 0;
+ } else {
+ /*
+ * The VLAN tag is inserted into the packet buffer. Pop the tag
+ * by packet buffer modification.
+ */
+ status = OvsPopVlanInPktBuf(&ovsFwdCtx);
+ if (status != NDIS_STATUS_SUCCESS) {
+ dropReason = L"OVS-pop vlan action failed";
+ goto dropit;
+ }
+ }
+ break;
+ }
+
+ case OVS_ACTION_ATTR_USERSPACE:
+ {
+ const struct nlattr *userdata_attr;
+ const struct nlattr *queue_attr;
+ POVS_PACKET_QUEUE_ELEM elem;
+ UINT32 queueId = OVS_DEFAULT_PACKET_QUEUE;
+ //XXX confusing that portNo is actually portId for external port.
+ BOOLEAN isRecv = (portNo == switchContext->externalPortId)
+ || OvsIsTunnelVportNo(portNo);
+
+ queue_attr = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_PID);
+ userdata_attr = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
+
+ elem = OvsCreateQueuePacket(queueId, (PVOID)userdata_attr,
+ userdata_attr->nla_len,
+ OVS_PACKET_CMD_ACTION,
+ portNo, (OvsIPv4TunnelKey *)&key->tunKey,
+ ovsFwdCtx.curNbl,
+ NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx.curNbl),
+ isRecv,
+ layers);
+ if (elem) {
+ LIST_ENTRY missedPackets;
+ InitializeListHead(&missedPackets);
+ InsertTailList(&missedPackets, &elem->link);
+ OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, 1);
+ dropReason = L"OVS-Completed since packet was copied to "
+ L"userspace";
+ } else {
+ dropReason = L"OVS-Dropped due to failure to queue to "
+ L"userspace";
+ goto dropit;
+ }
+ break;
+ }
+ case OVS_ACTION_ATTR_SET:
+ {
+ if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
+ || ovsFwdCtx.tunnelRxNic != NULL) {
+ status = OvsOutputBeforeSetAction(&ovsFwdCtx);
+ if (status != NDIS_STATUS_SUCCESS) {
+ dropReason = L"OVS-adding destination failed";
+ goto dropit;
+ }
+ }
+
+ status = OvsExecuteSetAction(&ovsFwdCtx, key, hash,
+ nl_attr_get(a));
+ if (status != NDIS_STATUS_SUCCESS) {
+ dropReason = L"OVS-set action failed";
+ goto dropit;
+ }
+ break;
+ }
+ case OVS_ACTION_ATTR_SAMPLE:
+ break;
+ case OVS_ACTION_ATTR_UNSPEC:
+ case __OVS_ACTION_ATTR_MAX:
+ default:
+ break;
+ }
+ }
+
+ if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
+ || ovsFwdCtx.tunnelRxNic != NULL) {
+ status = OvsOutputForwardingCtx(&ovsFwdCtx);
+ ASSERT(ovsFwdCtx.curNbl == NULL);
+ }
+
+ ASSERT(ovsFwdCtx.destPortsSizeOut == 0);
+ ASSERT(ovsFwdCtx.tunnelRxNic == NULL);
+ ASSERT(ovsFwdCtx.tunnelTxNic == NULL);
+
+dropit:
+ /*
+ * If curNbl != NULL, it implies the NBL has not been not freed up so far.
+ */
+ if (ovsFwdCtx.curNbl) {
+ OvsCompleteNBLForwardingCtx(&ovsFwdCtx, dropReason);
+ }
+
+ return status;
+}
diff --git a/datapath-windows/ovsext/OvsAtomic.h b/datapath-windows/ovsext/OvsAtomic.h
new file mode 100644
index 000000000..a94d1fb15
--- /dev/null
+++ b/datapath-windows/ovsext/OvsAtomic.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_ATOMIC_H_
+#define __OVS_ATOMIC_H_ 1
+
+static __inline UINT64
+atomic_add64(UINT64 *ptr, UINT32 val)
+{
+ return InterlockedAdd64((LONGLONG volatile *) ptr, (LONGLONG) val);
+}
+
+static __inline UINT64
+atomic_inc64(UINT64 *ptr)
+{
+ return InterlockedIncrement64((LONGLONG volatile *) ptr);
+}
+
+#endif /* __OVS_ATOMIC_H_ */
diff --git a/datapath-windows/ovsext/OvsBufferMgmt.c b/datapath-windows/ovsext/OvsBufferMgmt.c
new file mode 100644
index 000000000..8aa806061
--- /dev/null
+++ b/datapath-windows/ovsext/OvsBufferMgmt.c
@@ -0,0 +1,1535 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * ****************************************************************************
+ *
+ * Simple Buffer Management framework for OVS
+ *
+ * It introduces four NDIS buffer pools
+ * **Fix size net buffer list pool--this is used for small buffer
+ * One allocation will include NBL + NB + MDL + Data + CONTEXT.
+ *
+ * **Variable size net buffer list pool--this is used for variable size
+ * buffer. The allocation of net buffer list will include NBL + NB +
+ * CONTEXT, a separate allocation of MDL + data buffer is required.
+ *
+ * **NBL only net buffer list pool-- this is used for partial copy
+ * (or clone). In this case we can not allocate net buffer list and
+ * net buffer at the same time.
+ *
+ * **Net buffer pool-- this is required when net buffer need to be
+ * allocated separately.
+ *
+ * A Buffer context is defined to track the buffer specific information
+ * so that during NBL completion, proper action can be taken. Please see
+ * code for details.
+ *
+ * Here is the usage of the management API
+ * All external NBL should be initialized its NBL context by calling
+ * OvsInitExternalNBLContext()
+ *
+ * After the external NBL context is initialized, it can call the following
+ * API to allocate, copy or partial copy NBL.
+ *
+ * OvsAllocateFixSizeNBL()
+ * OvsAllocateVariableSizeNBL()
+ *
+ * OvsPartialCopyNBL()
+ * OvsPartialCopyToMultipleNBLs()
+ *
+ * OvsFullCopyNBL()
+ * OvsFullCopyToMultipleNBLs()
+ *
+ * See code comments for detail description of the functions.
+ *
+ * All NBLs is completed through
+ * OvsCompleteNBL()
+ * If this API return non NULL value, then the returned NBL should be
+ * returned to upper layer by calling
+ * NdisFSendNetBufferListsComplete() if the buffer is from upper
+ * layer. In case of WFP, it can call the corresponding completion routine
+ * to return the NBL to the framework.
+ *
+ * NOTE:
+ * 1. Copy or partial copy will not copy destination port array
+ * 2. Copy or partial copy will copy src port id and index
+ * 3. New Allocated NBL will have src port set to default port id
+ * 4. If original packet has direction flag set, the copied or partial
+ * copied NBL will still be in same direction.
+ * 5. When you advance or retreate the buffer, you may need to update
+ * relevant meta data to keep it consistent.
+ *
+ * ****************************************************************************
+ */
+
+#include "precomp.h"
+#include "OvsSwitch.h"
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_BUFMGMT
+#include "OvsDebug.h"
+#include "OvsNetProto.h"
+#include "OvsFlow.h"
+#include "OvsChecksum.h"
+#include "OvsPacketParser.h"
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsInitBufferPool --
+ *
+ * Allocate NBL and NB pool
+ *
+ * XXX: more optimization may be done for buffer management include local cache
+ * of NBL, NB, data, context, MDL.
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsInitBufferPool(PVOID ovsContext)
+{
+ POVS_NBL_POOL ovsPool;
+ POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
+ NET_BUFFER_LIST_POOL_PARAMETERS nblParam;
+ NET_BUFFER_POOL_PARAMETERS nbParam;
+
+ C_ASSERT(MEMORY_ALLOCATION_ALIGNMENT >= 8);
+
+ OVS_LOG_TRACE("Enter: context: %p", context);
+
+ ovsPool = &context->ovsPool;
+ RtlZeroMemory(ovsPool, sizeof (OVS_NBL_POOL));
+ ovsPool->ndisHandle = context->NdisFilterHandle;
+ ovsPool->ndisContext = context->NdisSwitchContext;
+ /*
+ * fix size NBL pool includes
+ * NBL + NB + MDL + DATA + Context
+ * This is mainly used for Packet execute or slow path when copy is
+ * required and size is less than OVS_DEFAULT_DATA_SIZE. We expect
+ * Most of packet from user space will use this Pool. (This is
+ * true for all bfd and cfm packet.
+ */
+ RtlZeroMemory(&nblParam, sizeof (nblParam));
+ OVS_INIT_OBJECT_HEADER(&nblParam.Header,
+ NDIS_OBJECT_TYPE_DEFAULT,
+ NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1,
+ NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1);
+ nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE;
+ nblParam.PoolTag = OVS_FIX_SIZE_NBL_POOL_TAG;
+ nblParam.fAllocateNetBuffer = TRUE;
+ nblParam.DataSize = OVS_DEFAULT_DATA_SIZE + OVS_DEFAULT_HEADROOM_SIZE;
+
+ ovsPool->fixSizePool =
+ NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam);
+ if (ovsPool->fixSizePool == NULL) {
+ goto pool_cleanup;
+ }
+
+ /*
+ * Zero Size NBL Pool includes
+ * NBL + NB + Context
+ * This is mainly for packet with large data Size, in this case MDL and
+ * Data will be allocate separately.
+ */
+ RtlZeroMemory(&nblParam, sizeof (nblParam));
+ OVS_INIT_OBJECT_HEADER(&nblParam.Header,
+ NDIS_OBJECT_TYPE_DEFAULT,
+ NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1,
+ NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1);
+
+ nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE;
+ nblParam.PoolTag = OVS_VARIABLE_SIZE_NBL_POOL_TAG;
+ nblParam.fAllocateNetBuffer = TRUE;
+ nblParam.DataSize = 0;
+
+ ovsPool->zeroSizePool =
+ NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam);
+ if (ovsPool->zeroSizePool == NULL) {
+ goto pool_cleanup;
+ }
+
+ /*
+ * NBL only pool just includes
+ * NBL (+ context)
+ * This is mainly used for clone and partial copy
+ */
+ RtlZeroMemory(&nblParam, sizeof (nblParam));
+ OVS_INIT_OBJECT_HEADER(&nblParam.Header,
+ NDIS_OBJECT_TYPE_DEFAULT,
+ NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1,
+ NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1);
+
+ nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE;
+ nblParam.PoolTag = OVS_NBL_ONLY_POOL_TAG;
+ nblParam.fAllocateNetBuffer = FALSE;
+ nblParam.DataSize = 0;
+
+ ovsPool->nblOnlyPool =
+ NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam);
+ if (ovsPool->nblOnlyPool == NULL) {
+ goto pool_cleanup;
+ }
+
+ /* nb Pool
+ * NB only pool, used for copy
+ */
+
+ OVS_INIT_OBJECT_HEADER(&nbParam.Header,
+ NDIS_OBJECT_TYPE_DEFAULT,
+ NET_BUFFER_POOL_PARAMETERS_REVISION_1,
+ NDIS_SIZEOF_NET_BUFFER_POOL_PARAMETERS_REVISION_1);
+ nbParam.PoolTag = OVS_NET_BUFFER_POOL_TAG;
+ nbParam.DataSize = 0;
+ ovsPool->nbPool =
+ NdisAllocateNetBufferPool(context->NdisSwitchContext, &nbParam);
+ if (ovsPool->nbPool == NULL) {
+ goto pool_cleanup;
+ }
+ OVS_LOG_TRACE("Exit: fixSizePool: %p zeroSizePool: %p nblOnlyPool: %p"
+ "nbPool: %p", ovsPool->fixSizePool, ovsPool->zeroSizePool,
+ ovsPool->nblOnlyPool, ovsPool->nbPool);
+ return NDIS_STATUS_SUCCESS;
+
+pool_cleanup:
+ OvsCleanupBufferPool(context);
+ OVS_LOG_TRACE("Exit: Fail to initialize ovs buffer pool");
+ return NDIS_STATUS_RESOURCES;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsCleanupBufferPool --
+ * Free Buffer pool for NBL and NB.
+ * --------------------------------------------------------------------------
+ */
+VOID
+OvsCleanupBufferPool(PVOID ovsContext)
+{
+ POVS_NBL_POOL ovsPool;
+ POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
+ ovsPool = &context->ovsPool;
+ OVS_LOG_TRACE("Enter: context: %p", context);
+#ifdef DBG
+ ASSERT(ovsPool->fixNBLCount == 0);
+ ASSERT(ovsPool->zeroNBLCount == 0);
+ ASSERT(ovsPool->nblOnlyCount == 0);
+ ASSERT(ovsPool->nbCount == 0);
+ ASSERT(ovsPool->sysNBLCount == 0);
+ ASSERT(ovsPool->fragNBLCount == 0);
+#endif
+
+ if (ovsPool->fixSizePool) {
+ NdisFreeNetBufferListPool(ovsPool->fixSizePool);
+ ovsPool->fixSizePool = NULL;
+ }
+ if (ovsPool->zeroSizePool) {
+ NdisFreeNetBufferListPool(ovsPool->zeroSizePool);
+ ovsPool->zeroSizePool = NULL;
+ }
+ if (ovsPool->nblOnlyPool) {
+ NdisFreeNetBufferListPool(ovsPool->nblOnlyPool);
+ ovsPool->nblOnlyPool = NULL;
+ }
+ if (ovsPool->nbPool) {
+ NdisFreeNetBufferPool(ovsPool->nbPool);
+ ovsPool->nbPool = NULL;
+ }
+ OVS_LOG_TRACE("Exit: cleanup OVS Buffer pool");
+}
+
+
+static VOID
+OvsInitNBLContext(POVS_BUFFER_CONTEXT ctx,
+ UINT16 flags,
+ UINT32 origDataLength,
+ UINT32 srcPortNo)
+{
+ ctx->magic = OVS_CTX_MAGIC;
+ ctx->refCount = 1;
+ ctx->flags = flags;
+ ctx->srcPortNo = srcPortNo;
+ ctx->origDataLength = origDataLength;
+}
+
+
+static VOID
+OvsDumpForwardingDetails(PNET_BUFFER_LIST nbl)
+{
+ PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info;
+ info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl);
+ if (info == NULL) {
+ return;
+ }
+ OVS_LOG_INFO("nbl: %p, numAvailableDest: %d, srcId:%d, srcIndex: %d "
+ "isDataSafe: %s, safeDataSize: %d",
+ nbl, info->NumAvailableDestinations, info->SourcePortId,
+ info->SourceNicIndex,
+ info->IsPacketDataSafe ? "TRUE" : "FALSE",
+ info->IsPacketDataSafe ? 0 : info->SafePacketDataSize);
+
+}
+
+static VOID
+OvsDumpNBLContext(PNET_BUFFER_LIST nbl)
+{
+ PNET_BUFFER_LIST_CONTEXT ctx = nbl->Context;
+ if (ctx == NULL) {
+ OVS_LOG_INFO("No Net Buffer List context");
+ return;
+ }
+ while (ctx) {
+ OVS_LOG_INFO("nbl: %p, ctx: %p, TotalSize: %d, Offset: %d",
+ nbl, ctx, ctx->Size, ctx->Offset);
+ ctx = ctx->Next;
+ }
+}
+
+
+static VOID
+OvsDumpMDLChain(PMDL mdl)
+{
+ PMDL tmp;
+ tmp = mdl;
+ while (tmp) {
+ OVS_LOG_INFO("MDL: %p, Size: %d, MappedSystemVa: %p, StartVa: %p"
+ " ByteCount: %d, ByteOffset: %d",
+ tmp, tmp->Size, tmp->MappedSystemVa,
+ tmp->StartVa, tmp->ByteCount, tmp->ByteOffset);
+ tmp = tmp->Next;
+ }
+}
+
+
+static VOID
+OvsDumpNetBuffer(PNET_BUFFER nb)
+{
+ OVS_LOG_INFO("NET_BUFFER: %p, ChecksumBias: %d Handle: %p, MDLChain: %p "
+ "CurrMDL: %p, CurrOffset: %d, DataLen: %d, Offset: %d",
+ nb,
+ NET_BUFFER_CHECKSUM_BIAS(nb), nb->NdisPoolHandle,
+ NET_BUFFER_FIRST_MDL(nb),
+ NET_BUFFER_CURRENT_MDL(nb),
+ NET_BUFFER_CURRENT_MDL_OFFSET(nb),
+ NET_BUFFER_DATA_LENGTH(nb),
+ NET_BUFFER_DATA_OFFSET(nb));
+ OvsDumpMDLChain(NET_BUFFER_FIRST_MDL(nb));
+}
+
+
+static VOID
+OvsDumpNetBufferList(PNET_BUFFER_LIST nbl)
+{
+ PNET_BUFFER nb;
+ OVS_LOG_INFO("NBL: %p, parent: %p, SrcHandle: %p, ChildCount:%d "
+ "poolHandle: %p",
+ nbl, nbl->ParentNetBufferList,
+ nbl->SourceHandle, nbl->ChildRefCount,
+ nbl->NdisPoolHandle);
+ OvsDumpNBLContext(nbl);
+ nb = NET_BUFFER_LIST_FIRST_NB(nbl);
+ while (nb) {
+ OvsDumpNetBuffer(nb);
+ nb = NET_BUFFER_NEXT_NB(nb);
+ }
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsAllocateFixSizeNBL --
+ *
+ * Allocate fix size NBL which include
+ * NBL + NB + MBL + Data + Context
+ * Please note:
+ * * Forwarding Context is allocated, but forwarding detail information
+ * is not initailized.
+ * * The headroom can not be larger than OVS_DEFAULT_HEADROOM_SIZE(128
+ * byte).
+ * --------------------------------------------------------------------------
+ */
+PNET_BUFFER_LIST
+OvsAllocateFixSizeNBL(PVOID ovsContext,
+ UINT32 size,
+ UINT32 headRoom)
+{
+ PNET_BUFFER_LIST nbl = NULL;
+ POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
+ POVS_BUFFER_CONTEXT ctx;
+ POVS_NBL_POOL ovsPool = &context->ovsPool;
+ NDIS_STATUS status;
+ UINT32 line;
+ PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info;
+
+ if ((headRoom + size) > OVS_FIX_NBL_DATA_SIZE || size == 0) {
+ line = __LINE__;
+ goto allocate_done;
+ }
+
+ nbl = NdisAllocateNetBufferList(ovsPool->fixSizePool,
+ (UINT16)sizeof (OVS_BUFFER_CONTEXT),
+ (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL);
+
+ if (nbl == NULL) {
+ line = __LINE__;
+ goto allocate_done;
+ }
+
+ nbl->SourceHandle = ovsPool->ndisHandle;
+ status = context->NdisSwitchHandlers.
+ AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
+
+ if (status != NDIS_STATUS_SUCCESS) {
+ NdisFreeNetBufferList(nbl);
+ nbl = NULL;
+ line = __LINE__;
+ goto allocate_done;
+ }
+ info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl);
+ ASSERT(info);
+ info->IsPacketDataSafe = TRUE;
+ info->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
+
+ status = NdisRetreatNetBufferDataStart(NET_BUFFER_LIST_FIRST_NB(nbl),
+ size, 0, NULL);
+ ASSERT(status == NDIS_STATUS_SUCCESS);
+
+#ifdef DBG
+ InterlockedIncrement((LONG volatile *)&ovsPool->fixNBLCount);
+ OvsDumpNetBufferList(nbl);
+ OvsDumpForwardingDetails(nbl);
+#endif
+
+ ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
+ ASSERT(ctx);
+
+ OvsInitNBLContext(ctx, OVS_BUFFER_FROM_FIX_SIZE_POOL |
+ OVS_BUFFER_PRIVATE_FORWARD_CONTEXT, size,
+ OVS_DEFAULT_PORT_NO);
+ line = __LINE__;
+allocate_done:
+ OVS_LOG_LOUD("Allocate Fix NBL: %p, line: %d", nbl, line);
+ return nbl;
+}
+
+
+static PMDL
+OvsAllocateMDLAndData(NDIS_HANDLE ndisHandle,
+ UINT32 dataSize)
+{
+ PMDL mdl;
+ PVOID data;
+
+ data = OvsAllocateMemory(dataSize);
+ if (data == NULL) {
+ return NULL;
+ }
+
+ mdl = NdisAllocateMdl(ndisHandle, data, dataSize);
+ if (mdl == NULL) {
+ OvsFreeMemory(data);
+ }
+
+ return mdl;
+}
+
+
+static VOID
+OvsFreeMDLAndData(PMDL mdl)
+{
+ PVOID data;
+
+ data = MmGetMdlVirtualAddress(mdl);
+ NdisFreeMdl(mdl);
+ OvsFreeMemory(data);
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsAllocateVariableSizeNBL --
+ *
+ * Allocate variable size NBL, the NBL looks like
+ * NBL + NB + Context
+ * MDL + Data
+ * --------------------------------------------------------------------------
+ */
+PNET_BUFFER_LIST
+OvsAllocateVariableSizeNBL(PVOID ovsContext,
+ UINT32 size,
+ UINT32 headRoom)
+{
+ PNET_BUFFER_LIST nbl = NULL;
+ POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
+ POVS_NBL_POOL ovsPool = &context->ovsPool;
+ POVS_BUFFER_CONTEXT ctx;
+ UINT32 realSize;
+ PMDL mdl;
+ NDIS_STATUS status;
+ PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info;
+ if (size == 0) {
+ return NULL;
+ }
+ realSize = MEM_ALIGN_SIZE(size + headRoom);
+
+ mdl = OvsAllocateMDLAndData(ovsPool->ndisHandle, realSize);
+ if (mdl == NULL) {
+ return NULL;
+ }
+
+ nbl = NdisAllocateNetBufferAndNetBufferList(ovsPool->zeroSizePool,
+ (UINT16)sizeof (OVS_BUFFER_CONTEXT),
+ (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL,
+ mdl, realSize, 0);
+ if (nbl == NULL) {
+ OvsFreeMDLAndData(mdl);
+ return NULL;
+ }
+
+ nbl->SourceHandle = ovsPool->ndisHandle;
+ status = context->NdisSwitchHandlers.
+ AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
+
+ if (status != NDIS_STATUS_SUCCESS) {
+ /*
+ * do we need to remove mdl from nbl XXX
+ */
+ OvsFreeMDLAndData(mdl);
+ NdisFreeNetBufferList(nbl);
+ return NULL;
+ }
+
+ info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl);
+ ASSERT(info);
+ info->IsPacketDataSafe = TRUE;
+ info->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
+ status = NdisRetreatNetBufferDataStart(NET_BUFFER_LIST_FIRST_NB(nbl),
+ size, 0, NULL);
+ ASSERT(status == NDIS_STATUS_SUCCESS);
+
+#ifdef DBG
+ InterlockedIncrement((LONG volatile *)&ovsPool->zeroNBLCount);
+ OvsDumpNetBufferList(nbl);
+ OvsDumpForwardingDetails(nbl);
+#endif
+
+ ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
+
+ OvsInitNBLContext(ctx, OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA |
+ OVS_BUFFER_PRIVATE_FORWARD_CONTEXT |
+ OVS_BUFFER_FROM_ZERO_SIZE_POOL,
+ size, OVS_DEFAULT_PORT_NO);
+
+ OVS_LOG_LOUD("Allocate variable size NBL: %p", nbl);
+ return nbl;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsInitExternalNBLContext --
+ *
+ * For NBL not allocated by OVS, it will allocate and initialize
+ * the NBL context.
+ * --------------------------------------------------------------------------
+ */
+POVS_BUFFER_CONTEXT
+OvsInitExternalNBLContext(PVOID ovsContext,
+ PNET_BUFFER_LIST nbl,
+ BOOLEAN isRecv)
+{
+ NDIS_HANDLE poolHandle;
+ POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
+ POVS_BUFFER_CONTEXT ctx;
+ PNET_BUFFER nb;
+ NDIS_STATUS status;
+ UINT16 flags;
+
+ poolHandle = NdisGetPoolFromNetBufferList(nbl);
+
+ if (poolHandle == context->ovsPool.ndisHandle) {
+ return (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
+ }
+ status = NdisAllocateNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT),
+ OVS_DEFAULT_NBL_CONTEXT_FILL,
+ OVS_OTHER_POOL_TAG);
+ if (status != NDIS_STATUS_SUCCESS) {
+ return NULL;
+ }
+#ifdef DBG
+ OvsDumpNBLContext(nbl);
+ InterlockedIncrement((LONG volatile *)&context->ovsPool.sysNBLCount);
+#endif
+ flags = isRecv ? OVS_BUFFER_RECV_BUFFER : OVS_BUFFER_SEND_BUFFER;
+ flags |= OVS_BUFFER_NEED_COMPLETE | OVS_BUFFER_PRIVATE_CONTEXT;
+ ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
+
+ nb = NET_BUFFER_LIST_FIRST_NB(nbl);
+ /*
+ * we use first nb to decide whether we need advance or retreat during
+ * complete.
+ */
+ OvsInitNBLContext(ctx, flags, NET_BUFFER_DATA_LENGTH(nb), OVS_DEFAULT_PORT_NO);
+ return ctx;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsAllocateNBLContext
+ *
+ * Create NBL buffer context and forwarding context.
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsAllocateNBLContext(POVS_SWITCH_CONTEXT context,
+ PNET_BUFFER_LIST nbl)
+{
+ POVS_NBL_POOL ovsPool = &context->ovsPool;
+ NDIS_STATUS status;
+
+ status = NdisAllocateNetBufferListContext(nbl,
+ sizeof (OVS_BUFFER_CONTEXT),
+ OVS_DEFAULT_NBL_CONTEXT_FILL,
+ OVS_OTHER_POOL_TAG);
+ if (status != NDIS_STATUS_SUCCESS) {
+ return NDIS_STATUS_FAILURE;
+ }
+
+ nbl->SourceHandle = ovsPool->ndisHandle;
+ status = context->NdisSwitchHandlers.
+ AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
+
+ if (status != NDIS_STATUS_SUCCESS) {
+ NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT));
+ return NDIS_STATUS_FAILURE;
+ }
+ return status;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsFreeNBLContext
+ *
+ * Free the NBL buffer context and forwarding context.
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsFreeNBLContext(POVS_SWITCH_CONTEXT context,
+ PNET_BUFFER_LIST nbl)
+{
+ POVS_NBL_POOL ovsPool = &context->ovsPool;
+
+ context->NdisSwitchHandlers.
+ FreeNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
+ NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT));
+
+ return NDIS_STATUS_SUCCESS;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsCopyNBLInfo
+ *
+ * Copy NBL info from src to dst
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsCopyNBLInfo(PNET_BUFFER_LIST srcNbl, PNET_BUFFER_LIST dstNbl,
+ POVS_BUFFER_CONTEXT srcCtx, UINT32 copySize,
+ BOOLEAN copyNblInfo)
+{
+ PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO srcInfo, dstInfo;
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+
+ srcInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(srcNbl);
+ dstInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(dstNbl);
+ if (srcInfo) {
+#ifdef OVS_USE_COPY_NET_BUFFER_LIST_INFO
+ status = context->NdisSwitchHandlers.
+ CopyNetBufferListInfo(ovsPool->ndisContext, dstNbl, srcNbl, 0);
+
+ if (status != NDIS_STATUS_SUCCESS) {
+ return status;
+ }
+#else
+ dstInfo->SourcePortId = srcInfo->SourcePortId;
+ dstInfo->SourceNicIndex = srcInfo->SourceNicIndex;
+ if (copyNblInfo) {
+ if (srcCtx->flags & OVS_BUFFER_RECV_BUFFER) {
+ NdisCopyReceiveNetBufferListInfo(dstNbl, srcNbl);
+ } else if (srcCtx->flags & OVS_BUFFER_SEND_BUFFER) {
+ NdisCopySendNetBufferListInfo(dstNbl, srcNbl);
+ }
+ }
+#endif
+ dstInfo->IsPacketDataSafe = srcInfo->IsPacketDataSafe;
+ if (!srcInfo->IsPacketDataSafe && copySize >
+ srcInfo->SafePacketDataSize) {
+ srcInfo->SafePacketDataSize = copySize;
+ }
+ } else {
+ /*
+ * Assume all data are safe
+ */
+ dstInfo->IsPacketDataSafe = TRUE;
+ dstInfo->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
+ }
+ return status;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsPartialCopyNBL --
+ *
+ * Partial copy NBL, if there is multiple NB in NBL, each one will be
+ * copied. We also reserve headroom for the new NBL.
+ *
+ * Please note,
+ * NBL should have OVS_BUFFER_CONTEXT setup before calling
+ * this function.
+ * The NBL should already have ref to itself so that during copy
+ * it will not be freed.
+ * --------------------------------------------------------------------------
+ */
+PNET_BUFFER_LIST
+OvsPartialCopyNBL(PVOID ovsContext,
+ PNET_BUFFER_LIST nbl,
+ UINT32 copySize,
+ UINT32 headRoom,
+ BOOLEAN copyNblInfo)
+{
+ PNET_BUFFER_LIST newNbl;
+ POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
+ NDIS_STATUS status;
+ PNET_BUFFER srcNb, dstNb;
+ ULONG byteCopied;
+ POVS_NBL_POOL ovsPool = &context->ovsPool;
+ POVS_BUFFER_CONTEXT srcCtx, dstCtx;
+ UINT16 flags;
+
+ srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
+ if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
+ OVS_LOG_INFO("src nbl must have ctx initialized");
+ ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
+ return NULL;
+ }
+
+ if (copySize) {
+ NdisAdvanceNetBufferListDataStart(nbl, copySize, FALSE, NULL);
+ }
+ newNbl = NdisAllocateCloneNetBufferList(nbl, ovsPool->nblOnlyPool,
+ NULL, 0);
+ if (copySize) {
+ status = NdisRetreatNetBufferListDataStart(nbl, copySize, 0,
+ NULL, NULL);
+ ASSERT(status == NDIS_STATUS_SUCCESS);
+ }
+
+ if (newNbl == NULL) {
+ return NULL;
+ }
+
+ /*
+ * Allocate private memory for copy
+ */
+ if (copySize + headRoom) {
+ status = NdisRetreatNetBufferListDataStart(newNbl, copySize + headRoom,
+ 0, NULL, NULL);
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto retreat_error;
+ }
+
+ if (headRoom) {
+ NdisAdvanceNetBufferListDataStart(newNbl, headRoom, FALSE, NULL);
+ }
+ if (copySize) {
+ srcNb = NET_BUFFER_LIST_FIRST_NB(nbl);
+ dstNb = NET_BUFFER_LIST_FIRST_NB(newNbl);
+
+ while (srcNb) {
+ status = NdisCopyFromNetBufferToNetBuffer(dstNb, 0, copySize,
+ srcNb, 0,
+ &byteCopied);
+ if (status != NDIS_STATUS_SUCCESS || copySize != byteCopied) {
+ goto nbl_context_error;
+ }
+ srcNb = NET_BUFFER_NEXT_NB(srcNb);
+ dstNb = NET_BUFFER_NEXT_NB(dstNb);
+ }
+ }
+ }
+
+ status = OvsAllocateNBLContext(context, newNbl);
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto nbl_context_error;
+ }
+
+ status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, copySize, copyNblInfo);
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto copy_list_info_error;
+ }
+
+#ifdef DBG
+ InterlockedIncrement((LONG volatile *)&ovsPool->nblOnlyCount);
+#endif
+
+ newNbl->ParentNetBufferList = nbl;
+
+ dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
+ ASSERT(dstCtx != NULL);
+
+ flags = srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | OVS_BUFFER_SEND_BUFFER);
+
+ flags |= OVS_BUFFER_FROM_NBL_ONLY_POOL | OVS_BUFFER_PRIVATE_CONTEXT |
+ OVS_BUFFER_PRIVATE_FORWARD_CONTEXT;
+
+ srcNb = NET_BUFFER_LIST_FIRST_NB(nbl);
+ OvsInitNBLContext(dstCtx, flags, NET_BUFFER_DATA_LENGTH(srcNb) - copySize,
+ OVS_DEFAULT_PORT_NO);
+
+ InterlockedIncrement((LONG volatile *)&srcCtx->refCount);
+#ifdef DBG
+ OvsDumpNetBufferList(nbl);
+ OvsDumpForwardingDetails(nbl);
+
+ OvsDumpNetBufferList(newNbl);
+ OvsDumpForwardingDetails(newNbl);
+#endif
+ OVS_LOG_LOUD("Partial Copy new NBL: %p", newNbl);
+ return newNbl;
+
+copy_list_info_error:
+ OvsFreeNBLContext(context, newNbl);
+nbl_context_error:
+ if (copySize) {
+ NdisAdvanceNetBufferListDataStart(newNbl, copySize, TRUE, NULL);
+ }
+retreat_error:
+ NdisFreeCloneNetBufferList(newNbl, 0);
+ return NULL;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsPartialCopyToMultipleNBLs --
+ *
+ * This is similar to OvsPartialCopyNBL() except that each NB will
+ * have its own NBL.
+ * --------------------------------------------------------------------------
+ */
+PNET_BUFFER_LIST
+OvsPartialCopyToMultipleNBLs(PVOID ovsContext,
+ PNET_BUFFER_LIST nbl,
+ UINT32 copySize,
+ UINT32 headRoom,
+ BOOLEAN copyNblInfo)
+{
+ PNET_BUFFER nb, nextNb = NULL, firstNb, prevNb;
+ POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
+ PNET_BUFFER_LIST firstNbl = NULL, newNbl, prevNbl = NULL;
+
+ nb = NET_BUFFER_LIST_FIRST_NB(nbl);
+ if (NET_BUFFER_NEXT_NB(nb) == NULL) {
+ return OvsPartialCopyNBL(context, nbl, copySize, headRoom, copyNblInfo);
+ }
+
+ firstNb = nb;
+ prevNb = nb;
+
+ while (nb) {
+ nextNb = NET_BUFFER_NEXT_NB(nb);
+ NET_BUFFER_NEXT_NB(nb) = NULL;
+
+ NET_BUFFER_LIST_FIRST_NB(nbl) = nb;
+
+ newNbl = OvsPartialCopyNBL(context, nbl, copySize, headRoom,
+ copyNblInfo);
+ if (newNbl == NULL) {
+ goto cleanup;
+ }
+ if (prevNbl == NULL) {
+ firstNbl = newNbl;
+ } else {
+ NET_BUFFER_LIST_NEXT_NBL(prevNbl) = nbl;
+ NET_BUFFER_NEXT_NB(prevNb) = nb;
+ }
+ prevNbl = newNbl;
+ prevNb = nb;
+ nb = nextNb;
+ }
+ NET_BUFFER_LIST_FIRST_NB(nbl) = firstNb;
+ return firstNbl;
+
+cleanup:
+ NET_BUFFER_NEXT_NB(prevNb) = nb;
+ NET_BUFFER_NEXT_NB(nb) = nextNb;
+ NET_BUFFER_LIST_FIRST_NB(nbl) = firstNb;
+
+ newNbl = firstNbl;
+ while (newNbl) {
+ firstNbl = NET_BUFFER_LIST_NEXT_NBL(newNbl);
+ NET_BUFFER_LIST_NEXT_NBL(firstNbl) = NULL;
+ OvsCompleteNBL(context, newNbl, TRUE);
+ newNbl = firstNbl;
+ }
+ return NULL;
+}
+
+
+static PNET_BUFFER_LIST
+OvsCopySinglePacketNBL(PVOID ovsContext,
+ PNET_BUFFER_LIST nbl,
+ PNET_BUFFER nb,
+ UINT32 headRoom,
+ BOOLEAN copyNblInfo)
+{
+ UINT32 size;
+ ULONG copiedSize;
+ POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
+ PNET_BUFFER_LIST newNbl;
+ PNET_BUFFER newNb;
+ NDIS_STATUS status;
+ POVS_BUFFER_CONTEXT srcCtx, dstCtx;
+
+ size = NET_BUFFER_DATA_LENGTH(nb);
+ if ((size + headRoom) <= OVS_FIX_NBL_DATA_SIZE) {
+ newNbl = OvsAllocateFixSizeNBL(context, size, headRoom);
+ } else {
+ newNbl = OvsAllocateVariableSizeNBL(context, size, headRoom);
+ }
+ if (newNbl == NULL) {
+ return NULL;
+ }
+ newNb = NET_BUFFER_LIST_FIRST_NB(newNbl);
+ status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, size, nb, 0,
+ &copiedSize);
+
+ srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
+ if (status == NDIS_STATUS_SUCCESS) {
+ status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, copiedSize, copyNblInfo);
+ }
+
+ if (status != NDIS_STATUS_SUCCESS || copiedSize != size) {
+ OvsCompleteNBL(context, newNbl, TRUE);
+ return NULL;
+ }
+
+ dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
+ ASSERT(dstCtx && srcCtx);
+ ASSERT(srcCtx->magic == OVS_CTX_MAGIC && dstCtx->magic == OVS_CTX_MAGIC);
+
+ dstCtx->flags |= srcCtx->flags & (OVS_BUFFER_RECV_BUFFER |
+ OVS_BUFFER_SEND_BUFFER);
+#ifdef DBG
+ OvsDumpNetBufferList(newNbl);
+ OvsDumpForwardingDetails(newNbl);
+#endif
+ OVS_LOG_LOUD("Copy single nb to new NBL: %p", newNbl);
+ return newNbl;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsFullCopyNBL --
+ *
+ * Copy the NBL to a new NBL including data.
+ *
+ * Notes:
+ * The NBL can have multiple NBs, but the final result is one NBL.
+ * --------------------------------------------------------------------------
+ */
+PNET_BUFFER_LIST
+OvsFullCopyNBL(PVOID ovsContext,
+ PNET_BUFFER_LIST nbl,
+ UINT32 headRoom,
+ BOOLEAN copyNblInfo)
+{
+ POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
+ POVS_NBL_POOL ovsPool = &context->ovsPool;
+ PNET_BUFFER_LIST newNbl;
+ PNET_BUFFER nb, newNb, firstNb = NULL, prevNb = NULL;
+ POVS_BUFFER_CONTEXT dstCtx, srcCtx;
+ PMDL mdl;
+ NDIS_STATUS status;
+ UINT32 size, totalSize;
+ ULONG copiedSize;
+ UINT16 flags;
+ PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO dstInfo;
+
+ srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
+ if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
+ OVS_LOG_INFO("src nbl must have ctx initialized");
+ ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
+ return NULL;
+ }
+
+ nb = NET_BUFFER_LIST_FIRST_NB(nbl);
+
+ if (NET_BUFFER_NEXT_NB(nb) == NULL) {
+ return OvsCopySinglePacketNBL(context, nbl, nb, headRoom, copyNblInfo);
+ }
+
+ newNbl = NdisAllocateNetBufferList(ovsPool->nblOnlyPool,
+ (UINT16)sizeof (OVS_BUFFER_CONTEXT),
+ (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL);
+ if (newNbl == NULL) {
+ return NULL;
+ }
+
+ while (nb) {
+ size = NET_BUFFER_DATA_LENGTH(nb);
+ totalSize = MEM_ALIGN_SIZE(size + headRoom);
+ mdl = OvsAllocateMDLAndData(ovsPool->ndisHandle, totalSize);
+
+ if (mdl == NULL) {
+ goto nblcopy_error;
+ }
+ newNb = NdisAllocateNetBuffer(ovsPool->nbPool, mdl, totalSize, 0);
+ if (newNb == NULL) {
+ OvsFreeMDLAndData(mdl);
+ goto nblcopy_error;
+ }
+ if (firstNb == NULL) {
+ firstNb = newNb;
+ } else {
+ NET_BUFFER_NEXT_NB(prevNb) = newNb;
+ }
+ prevNb = newNb;
+#ifdef DBG
+ InterlockedIncrement((LONG volatile *)&ovsPool->nbCount);
+#endif
+ status = NdisRetreatNetBufferDataStart(newNb, size, 0, NULL);
+ ASSERT(status == NDIS_STATUS_SUCCESS);
+
+ status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, size, nb, 0,
+ &copiedSize);
+ if (status != NDIS_STATUS_SUCCESS || size != copiedSize) {
+ goto nblcopy_error;
+ }
+
+ nb = NET_BUFFER_NEXT_NB(nb);
+ }
+
+ NET_BUFFER_LIST_FIRST_NB(newNbl) = firstNb;
+
+ newNbl->SourceHandle = ovsPool->ndisHandle;
+ status = context->NdisSwitchHandlers.
+ AllocateNetBufferListForwardingContext(ovsPool->ndisContext, newNbl);
+
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto nblcopy_error;
+ }
+
+ status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, 0, copyNblInfo);
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto nblcopy_error;
+ }
+
+ dstInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl);
+ dstInfo->IsPacketDataSafe = TRUE;
+
+ dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
+
+ flags = srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | OVS_BUFFER_SEND_BUFFER);
+
+ flags |= OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA |
+ OVS_BUFFER_PRIVATE_NET_BUFFER | OVS_BUFFER_FROM_NBL_ONLY_POOL |
+ OVS_BUFFER_PRIVATE_FORWARD_CONTEXT;
+
+ OvsInitNBLContext(dstCtx, flags, NET_BUFFER_DATA_LENGTH(firstNb),
+ OVS_DEFAULT_PORT_NO);
+
+#ifdef DBG
+ OvsDumpNetBufferList(nbl);
+ OvsDumpForwardingDetails(nbl);
+ InterlockedIncrement((LONG volatile *)&ovsPool->nblOnlyCount);
+#endif
+ OVS_LOG_LOUD("newNbl: %p", newNbl);
+ return newNbl;
+
+nblcopy_error:
+ while (firstNb) {
+#ifdef DBG
+ InterlockedDecrement((LONG volatile *)&ovsPool->nbCount);
+#endif
+ prevNb = firstNb;
+ firstNb = NET_BUFFER_NEXT_NB(prevNb);
+ mdl = NET_BUFFER_FIRST_MDL(prevNb);
+ NET_BUFFER_FIRST_MDL(prevNb) = NULL;
+ NdisFreeNetBuffer(prevNb);
+ OvsFreeMDLAndData(mdl);
+ }
+ NdisFreeNetBufferList(newNbl);
+ OVS_LOG_ERROR("OvsFullCopyNBL failed");
+ return NULL;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * GetSegmentHeaderInfo
+ *
+ * Extract header size and sequence number for the segment.
+ * --------------------------------------------------------------------------
+ */
+static NDIS_STATUS
+GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl,
+ const POVS_PACKET_HDR_INFO hdrInfo,
+ UINT32 *hdrSize, UINT32 *seqNumber)
+{
+ TCPHdr tcpStorage;
+ const TCPHdr *tcp;
+
+ /* Parse the orginal Eth/IP/TCP header */
+ tcp = OvsGetPacketBytes(nbl, sizeof *tcp, hdrInfo->l4Offset, &tcpStorage);
+ if (tcp == NULL) {
+ return NDIS_STATUS_FAILURE;
+ }
+ *seqNumber = ntohl(tcp->seq);
+ *hdrSize = hdrInfo->l4Offset + TCP_HDR_LEN(tcp);
+
+ return NDIS_STATUS_SUCCESS;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * FixSegmentHeader
+ *
+ * Fix IP length, IP checksum, TCP sequence number and TCP checksum
+ * in the segment.
+ * --------------------------------------------------------------------------
+ */
+static NDIS_STATUS
+FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber)
+{
+ EthHdr *dstEth;
+ IPHdr *dstIP;
+ TCPHdr *dstTCP;
+ PMDL mdl;
+ PUINT8 bufferStart;
+
+ mdl = NET_BUFFER_FIRST_MDL(nb);
+
+ bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority);
+ if (!bufferStart) {
+ return NDIS_STATUS_RESOURCES;
+ }
+ dstEth = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(nb));
+ ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb)
+ >= sizeof(EthHdr) + sizeof(IPHdr) + sizeof(TCPHdr));
+ dstIP = (IPHdr *)((PCHAR)dstEth + sizeof *dstEth);
+ dstTCP = (TCPHdr *)((PCHAR)dstIP + dstIP->ihl * 4);
+ ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb)
+ >= sizeof(EthHdr) + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP));
+
+ /* Fix IP length and checksum */
+ ASSERT(dstIP->protocol == IPPROTO_TCP);
+ dstIP->tot_len = htons(segmentSize + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP));
+ dstIP->check = 0;
+ dstIP->check = IPChecksum((UINT8 *)dstIP, dstIP->ihl * 4, 0);
+
+ /* Fix TCP checksum */
+ dstTCP->seq = htonl(seqNumber);
+ dstTCP->check =
+ IPPseudoChecksum((UINT32 *)&dstIP->saddr,
+ (UINT32 *)&dstIP->daddr,
+ IPPROTO_TCP, segmentSize + TCP_HDR_LEN(dstTCP));
+ dstTCP->check = CalculateChecksumNB(nb,
+ (UINT16)(NET_BUFFER_DATA_LENGTH(nb) - sizeof *dstEth - dstIP->ihl * 4),
+ sizeof *dstEth + dstIP->ihl * 4);
+ return STATUS_SUCCESS;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsTcpSegmentyNBL --
+ *
+ * Segment TCP payload, and prepend each segment with ether/IP/TCP header.
+ * Leave headRoom for additional encap.
+ *
+ * Please note,
+ * NBL should have OVS_BUFFER_CONTEXT setup before calling
+ * this function.
+ * The NBL should already have ref to itself so that during copy
+ * it will not be freed.
+ * Currently this API assert there is only one NB in an NBL, it needs
+ * to be fixed if we receive multiple NBs in an NBL.
+ * --------------------------------------------------------------------------
+ */
+PNET_BUFFER_LIST
+OvsTcpSegmentNBL(PVOID ovsContext,
+ PNET_BUFFER_LIST nbl,
+ POVS_PACKET_HDR_INFO hdrInfo,
+ UINT32 mss,
+ UINT32 headRoom)
+{
+ POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
+#ifdef DBG
+ POVS_NBL_POOL ovsPool = &context->ovsPool;
+#endif
+ POVS_BUFFER_CONTEXT dstCtx, srcCtx;
+ UINT32 size, hdrSize, seqNumber;
+ PNET_BUFFER_LIST newNbl;
+ PNET_BUFFER nb, newNb;
+ NDIS_STATUS status;
+ UINT16 segmentSize;
+ ULONG copiedSize;
+
+ srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
+ if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
+ OVS_LOG_INFO("src nbl must have ctx initialized");
+ ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
+ return NULL;
+ }
+
+ nb = NET_BUFFER_LIST_FIRST_NB(nbl);
+ ASSERT(NET_BUFFER_NEXT_NB(nb) == NULL);
+
+ /* Figure out the segment header size */
+ status = GetSegmentHeaderInfo(nbl, hdrInfo, &hdrSize, &seqNumber);
+ if (status != NDIS_STATUS_SUCCESS) {
+ OVS_LOG_INFO("Cannot parse NBL header");
+ return NULL;
+ }
+
+ size = NET_BUFFER_DATA_LENGTH(nb) - hdrSize;
+
+ /* XXX add to ovsPool counters? */
+ newNbl = NdisAllocateFragmentNetBufferList(nbl, NULL,
+ NULL, hdrSize, mss, hdrSize + headRoom , 0, 0);
+ if (newNbl == NULL) {
+ return NULL;
+ }
+
+ /* Now deal with TCP payload */
+ for (newNb = NET_BUFFER_LIST_FIRST_NB(newNbl); newNb != NULL;
+ newNb = NET_BUFFER_NEXT_NB(newNb)) {
+ segmentSize = (size > mss ? mss : size) & 0xffff;
+ if (headRoom) {
+ NdisAdvanceNetBufferDataStart(newNb, headRoom, FALSE, NULL);
+ }
+
+ /* Now copy the eth/IP/TCP header and fix up */
+ status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, hdrSize, nb, 0,
+ &copiedSize);
+ if (status != NDIS_STATUS_SUCCESS || hdrSize != copiedSize) {
+ goto nblcopy_error;
+ }
+
+ status = FixSegmentHeader(newNb, segmentSize, seqNumber);
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto nblcopy_error;
+ }
+
+
+ /* Move on to the next segment */
+ size -= segmentSize;
+ seqNumber += segmentSize;
+ }
+
+ status = OvsAllocateNBLContext(context, newNbl);
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto nblcopy_error;
+ }
+
+ status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, hdrSize + headRoom, FALSE);
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto nbl_context_error;
+ }
+
+ newNbl->ParentNetBufferList = nbl;
+
+ /* Remember it's a fragment NBL so we can free it properly */
+ dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
+ ASSERT(dstCtx != NULL);
+ dstCtx->flags = OVS_BUFFER_FRAGMENT | OVS_BUFFER_PRIVATE_CONTEXT |
+ OVS_BUFFER_PRIVATE_FORWARD_CONTEXT | OVS_BUFFER_SEND_BUFFER;
+ dstCtx->refCount = 1;
+ dstCtx->magic = OVS_CTX_MAGIC;
+ dstCtx->dataOffsetDelta = hdrSize + headRoom;
+
+ InterlockedIncrement((LONG volatile *)&srcCtx->refCount);
+#ifdef DBG
+ InterlockedIncrement((LONG volatile *)&ovsPool->fragNBLCount);
+
+ OvsDumpNetBufferList(nbl);
+ OvsDumpForwardingDetails(nbl);
+
+ OvsDumpNetBufferList(newNbl);
+ OvsDumpForwardingDetails(newNbl);
+#endif
+ OVS_LOG_TRACE("Segment nbl %p to newNbl: %p", nbl, newNbl);
+ return newNbl;
+
+nbl_context_error:
+ OvsFreeNBLContext(context, newNbl);
+nblcopy_error:
+#ifdef DBG
+ InterlockedDecrement((LONG volatile *)&ovsPool->fragNBLCount);
+#endif
+ NdisFreeFragmentNetBufferList(newNbl, hdrSize + headRoom, 0);
+ return NULL;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsFullCopyToMultipleNBLs --
+ *
+ * Copy NBL to multiple NBLs, each NB will have its own NBL
+ * --------------------------------------------------------------------------
+ */
+PNET_BUFFER_LIST
+OvsFullCopyToMultipleNBLs(PVOID ovsContext,
+ PNET_BUFFER_LIST nbl,
+ UINT32 headRoom,
+ BOOLEAN copyNblInfo)
+{
+
+ POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
+ PNET_BUFFER_LIST firstNbl, currNbl, newNbl;
+ PNET_BUFFER nb;
+ POVS_BUFFER_CONTEXT srcCtx;
+
+ srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
+ if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
+ OVS_LOG_INFO("src nbl must have ctx initialized");
+ ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
+ return NULL;
+ }
+
+ nb = NET_BUFFER_LIST_FIRST_NB(nbl);
+ newNbl = OvsCopySinglePacketNBL(context, nbl, nb, headRoom, copyNblInfo);
+
+ if (newNbl == NULL || NET_BUFFER_NEXT_NB(nb) == NULL) {
+ return newNbl;
+ } else {
+ firstNbl = newNbl;
+ currNbl = newNbl;
+ }
+
+ while (nb) {
+ newNbl = OvsCopySinglePacketNBL(context, nbl, nb, headRoom,
+ copyNblInfo);
+ if (newNbl == NULL) {
+ goto copymultiple_error;
+ }
+ NET_BUFFER_LIST_NEXT_NBL(currNbl) = newNbl;
+ currNbl = newNbl;
+ nb = NET_BUFFER_NEXT_NB(nb);
+ }
+ return firstNbl;
+
+copymultiple_error:
+ while (firstNbl) {
+ currNbl = firstNbl;
+ firstNbl = NET_BUFFER_LIST_NEXT_NBL(firstNbl);
+ NET_BUFFER_LIST_NEXT_NBL(currNbl) = NULL;
+ OvsCompleteNBL(context, currNbl, TRUE);
+ }
+ return NULL;
+
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsCompleteNBL --
+ *
+ * This function tries to free the NBL allocated by OVS buffer
+ * management module. If it trigger the completion of the parent
+ * NBL, it will recursively call itself. If it trigger the completion
+ * of external NBL, it will be returned to the caller. The caller
+ * is responsible to call API to return to upper layer.
+ * --------------------------------------------------------------------------
+ */
+PNET_BUFFER_LIST
+OvsCompleteNBL(POVS_SWITCH_CONTEXT context,
+ PNET_BUFFER_LIST nbl,
+ BOOLEAN updateRef)
+{
+ POVS_BUFFER_CONTEXT ctx;
+ UINT16 flags;
+ PNET_BUFFER_LIST parent;
+ NDIS_STATUS status;
+ NDIS_HANDLE poolHandle;
+ LONG value;
+ POVS_NBL_POOL ovsPool = &context->ovsPool;
+ PNET_BUFFER nb;
+
+
+ ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
+
+ ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
+
+ OVS_LOG_TRACE("Enter: nbl: %p, ctx: %p, refCount: %d, updateRef:%d",
+ nbl, ctx, ctx->refCount, updateRef);
+
+ if (updateRef) {
+ value = InterlockedDecrement((LONG volatile *)&ctx->refCount);
+ if (value != 0) {
+ return NULL;
+ }
+ } else {
+ /*
+ * This is a special case, the refCount must be zero
+ */
+ ASSERT(ctx->refCount == 0);
+ }
+
+ nb = NET_BUFFER_LIST_FIRST_NB(nbl);
+
+ flags = ctx->flags;
+ if (!(flags & OVS_BUFFER_FRAGMENT) &&
+ NET_BUFFER_DATA_LENGTH(nb) != ctx->origDataLength) {
+ UINT32 diff;
+ if (NET_BUFFER_DATA_LENGTH(nb) < ctx->origDataLength) {
+ diff = ctx->origDataLength -NET_BUFFER_DATA_LENGTH(nb);
+ status = NdisRetreatNetBufferListDataStart(nbl, diff, 0,
+ NULL, NULL);
+ ASSERT(status == NDIS_STATUS_SUCCESS);
+ } else {
+ diff = NET_BUFFER_DATA_LENGTH(nb) - ctx->origDataLength;
+ NdisAdvanceNetBufferListDataStart(nbl, diff, TRUE, NULL);
+ }
+ }
+
+ if (ctx->flags & OVS_BUFFER_PRIVATE_CONTEXT) {
+ NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT));
+ }
+
+ if (flags & OVS_BUFFER_NEED_COMPLETE) {
+ /*
+ * return to caller for completion
+ */
+#ifdef DBG
+ InterlockedDecrement((LONG volatile *)&ovsPool->sysNBLCount);
+#endif
+ return nbl;
+ }
+
+ if (flags & OVS_BUFFER_PRIVATE_FORWARD_CONTEXT) {
+ context->NdisSwitchHandlers.
+ FreeNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
+ }
+
+ if (flags & (OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA)) {
+ PNET_BUFFER nb = NET_BUFFER_LIST_FIRST_NB(nbl);
+ while (nb) {
+ PMDL mdl = NET_BUFFER_FIRST_MDL(nb);
+ NET_BUFFER_FIRST_MDL(nb) = NULL;
+ ASSERT(mdl->Next == NULL);
+ OvsFreeMDLAndData(mdl);
+ nb = NET_BUFFER_NEXT_NB(nb);
+ }
+ }
+
+ if (flags & OVS_BUFFER_PRIVATE_NET_BUFFER) {
+ PNET_BUFFER nb, nextNb;
+
+ nb = NET_BUFFER_LIST_FIRST_NB(nbl);
+ while (nb) {
+ nextNb = NET_BUFFER_NEXT_NB(nb);
+ NdisFreeNetBuffer(nb);
+#ifdef DBG
+ InterlockedDecrement((LONG volatile *)&ovsPool->nbCount);
+#endif
+ nb = nextNb;
+ }
+ NET_BUFFER_LIST_FIRST_NB(nbl) = NULL;
+ }
+
+ parent = nbl->ParentNetBufferList;
+
+ poolHandle = NdisGetPoolFromNetBufferList(nbl);
+ if (flags & OVS_BUFFER_FROM_FIX_SIZE_POOL) {
+ ASSERT(poolHandle == ovsPool->fixSizePool);
+#ifdef DBG
+ InterlockedDecrement((LONG volatile *)&ovsPool->fixNBLCount);
+#endif
+ NdisFreeNetBufferList(nbl);
+ } else if (flags & OVS_BUFFER_FROM_ZERO_SIZE_POOL) {
+ ASSERT(poolHandle == ovsPool->zeroSizePool);
+#ifdef DBG
+ InterlockedDecrement((LONG volatile *)&ovsPool->zeroNBLCount);
+#endif
+ NdisFreeNetBufferList(nbl);
+ } else if (flags & OVS_BUFFER_FROM_NBL_ONLY_POOL) {
+ ASSERT(poolHandle == ovsPool->nblOnlyPool);
+#ifdef DBG
+ InterlockedDecrement((LONG volatile *)&ovsPool->nblOnlyCount);
+#endif
+ NdisFreeCloneNetBufferList(nbl, 0);
+ } else if (flags & OVS_BUFFER_FRAGMENT) {
+ OVS_LOG_TRACE("Free fragment %p parent %p", nbl, parent);
+#ifdef DBG
+ InterlockedDecrement((LONG volatile *)&ovsPool->fragNBLCount);
+#endif
+ NdisFreeFragmentNetBufferList(nbl, ctx->dataOffsetDelta, 0);
+ }
+
+ if (parent != NULL) {
+ ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(parent);
+ ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
+ value = InterlockedDecrement((LONG volatile *)&ctx->refCount);
+ if (value == 0) {
+ return OvsCompleteNBL(context, parent, FALSE);
+ }
+ }
+ return NULL;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsSetCtxSourcePortNo --
+ * Setter function which stores the source port of an NBL in the NBL
+ * Context Info.
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsSetCtxSourcePortNo(PNET_BUFFER_LIST nbl,
+ UINT32 portNo)
+{
+ POVS_BUFFER_CONTEXT ctx;
+ ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
+ if (ctx == NULL) {
+ ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ ctx->srcPortNo = portNo;
+ return NDIS_STATUS_SUCCESS;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsGetCtxSourcePortNo --
+ * Get source port of an NBL from its Context Info.
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsGetCtxSourcePortNo(PNET_BUFFER_LIST nbl,
+ UINT32 *portNo)
+{
+ POVS_BUFFER_CONTEXT ctx;
+ ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
+ if (ctx == NULL || portNo == NULL) {
+ ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
+ return STATUS_INVALID_PARAMETER;
+ }
+ *portNo = ctx->srcPortNo;
+ return NDIS_STATUS_SUCCESS;
+}
diff --git a/datapath-windows/ovsext/OvsBufferMgmt.h b/datapath-windows/ovsext/OvsBufferMgmt.h
new file mode 100644
index 000000000..9c00b1b5c
--- /dev/null
+++ b/datapath-windows/ovsext/OvsBufferMgmt.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_BUFFER_MGMT_H_
+#define __OVS_BUFFER_MGMT_H_ 1
+
+#define MEM_ALIGN MEMORY_ALLOCATION_ALIGNMENT
+#define MEM_ALIGN_SIZE(_x) ((MEM_ALIGN - 1 + (_x))/MEM_ALIGN * MEM_ALIGN)
+#define OVS_CTX_MAGIC 0xabcd
+
+#define OVS_DEFAULT_NBL_CONTEXT_SIZE MEM_ALIGN_SIZE(64)
+#define OVS_DEFAULT_NBL_CONTEXT_FILL \
+ (OVS_DEFAULT_NBL_CONTEXT_SIZE - sizeof (OVS_BUFFER_CONTEXT))
+
+#define OVS_DEFAULT_DATA_SIZE 256
+#define OVS_DEFAULT_HEADROOM_SIZE 128
+#define OVS_FIX_NBL_DATA_SIZE (OVS_DEFAULT_DATA_SIZE + OVS_DEFAULT_HEADROOM_SIZE)
+
+/* Default we copy 18 bytes, to make sure ethernet header and vlan is in
+ * continuous buffer */
+#define OVS_DEFAULT_COPY_SIZE 18
+
+enum {
+ OVS_BUFFER_NEED_COMPLETE = BIT16(0),
+ OVS_BUFFER_PRIVATE_MDL = BIT16(1),
+ OVS_BUFFER_PRIVATE_DATA = BIT16(2),
+ OVS_BUFFER_PRIVATE_NET_BUFFER = BIT16(3),
+ OVS_BUFFER_PRIVATE_FORWARD_CONTEXT = BIT16(4),
+ OVS_BUFFER_PRIVATE_CONTEXT = BIT16(5),
+ OVS_BUFFER_FROM_FIX_SIZE_POOL = BIT16(6),
+ OVS_BUFFER_FROM_ZERO_SIZE_POOL = BIT16(7),
+ OVS_BUFFER_FROM_NBL_ONLY_POOL = BIT16(8),
+ OVS_BUFFER_RECV_BUFFER = BIT16(9),
+ OVS_BUFFER_SEND_BUFFER = BIT16(10),
+ OVS_BUFFER_FRAGMENT = BIT16(11),
+};
+
+typedef union _OVS_BUFFER_CONTEXT {
+ struct {
+ UINT16 magic;
+ UINT16 flags;
+ UINT32 srcPortNo;
+ UINT32 refCount;
+ union {
+ UINT32 origDataLength;
+ UINT32 dataOffsetDelta;
+ };
+ };
+
+ UINT64 value[MEM_ALIGN_SIZE(16) >> 3];
+} OVS_BUFFER_CONTEXT, *POVS_BUFFER_CONTEXT;
+
+
+typedef struct _OVS_NBL_POOL {
+ NDIS_SWITCH_CONTEXT ndisContext;
+ NDIS_HANDLE ndisHandle;
+ NDIS_HANDLE fixSizePool; // data size of 256
+ NDIS_HANDLE zeroSizePool; // no data, NBL + NB + Context
+ NDIS_HANDLE nblOnlyPool; // NBL + context for clone
+ NDIS_HANDLE nbPool; // NB for clone
+#ifdef DBG
+ LONG fixNBLCount;
+ LONG zeroNBLCount;
+ LONG nblOnlyCount;
+ LONG nbCount;
+ LONG sysNBLCount;
+ LONG fragNBLCount;
+#endif
+} OVS_NBL_POOL, *POVS_NBL_POOL;
+
+
+NDIS_STATUS OvsInitBufferPool(PVOID context);
+VOID OvsCleanupBufferPool(PVOID context);
+
+PNET_BUFFER_LIST OvsAllocateFixSizeNBL(PVOID context,
+ UINT32 size,
+ UINT32 headRoom);
+PNET_BUFFER_LIST OvsAllocateVariableSizeNBL(PVOID context,
+ UINT32 size,
+ UINT32 headRoom);
+
+POVS_BUFFER_CONTEXT OvsInitExternalNBLContext(PVOID context,
+ PNET_BUFFER_LIST nbl,
+ BOOLEAN isRecv);
+
+PNET_BUFFER_LIST OvsPartialCopyNBL(PVOID context,
+ PNET_BUFFER_LIST nbl,
+ UINT32 copySize,
+ UINT32 headRoom,
+ BOOLEAN copyNblInfo);
+PNET_BUFFER_LIST OvsPartialCopyToMultipleNBLs(PVOID context,
+ PNET_BUFFER_LIST nbl,
+ UINT32 copySize,
+ UINT32 headRoom,
+ BOOLEAN copyNblInfo);
+PNET_BUFFER_LIST OvsFullCopyNBL(PVOID context, PNET_BUFFER_LIST nbl,
+ UINT32 headRoom, BOOLEAN copyNblInfo);
+PNET_BUFFER_LIST OvsTcpSegmentNBL(PVOID context,
+ PNET_BUFFER_LIST nbl,
+ POVS_PACKET_HDR_INFO hdrInfo,
+ UINT32 MSS,
+ UINT32 headRoom);
+PNET_BUFFER_LIST OvsFullCopyToMultipleNBLs(PVOID context,
+ PNET_BUFFER_LIST nbl, UINT32 headRoom, BOOLEAN copyNblInfo);
+PNET_BUFFER_LIST OvsCompleteNBL(PVOID context, PNET_BUFFER_LIST nbl,
+ BOOLEAN updateRef);
+NDIS_STATUS OvsSetCtxSourcePortNo(PNET_BUFFER_LIST nbl, UINT32 portNo);
+
+NDIS_STATUS OvsGetCtxSourcePortNo(PNET_BUFFER_LIST nbl, UINT32 *portNo);
+
+#endif /* __OVS_BUFFER_MGMT_H_ */
diff --git a/datapath-windows/ovsext/OvsChecksum.c b/datapath-windows/ovsext/OvsChecksum.c
new file mode 100644
index 000000000..e19237389
--- /dev/null
+++ b/datapath-windows/ovsext/OvsChecksum.c
@@ -0,0 +1,578 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "precomp.h"
+#include "OvsChecksum.h"
+#include "OvsFlow.h"
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_CHECKSUM
+#include "OvsDebug.h"
+#include "OvsPacketParser.h"
+
+#ifndef htons
+#define htons(_x) (((UINT16)(_x) >> 8) + (((UINT16)(_x) << 8) & 0xff00))
+#endif
+
+#ifndef swap64
+#define swap64(_x) ((((UINT64)(_x) >> 8) & 0x00ff00ff00ff00ff) + \
+ (((UINT64)(_x) << 8) & 0xff00ff00ff00ff00))
+#endif
+
+#define fold64(_x) \
+ _x = ((_x) >> 32) + ((_x) & 0xffffffff); \
+ _x = (UINT32)(((_x) >> 32) + (_x)); \
+ _x = ((_x) >> 16) + ((_x) & 0xffff); \
+ _x = (UINT16)(((_x) >> 16) + (_x))
+
+#define fold32(_x) \
+ _x = ((_x) >> 16) + ((_x) & 0xffff); \
+ _x = (UINT16)(((_x) >> 16) + (_x))
+
+
+/*
+ *----------------------------------------------------------------------------
+ * CalculateOnesComplement --
+ *
+ * Given the start address and buffer length, calculate the 1's complement
+ * This routine can be used when multiple buffers are used for a packets.
+ *
+ * PLEASE NOTE, even though the last parameter is UINT64, but the assumption
+ * is it will not overflowed after adding the extra data.
+ * ------------------------------------------------
+ *
+ * Result:
+ * As name indicate, the final data is not 1's complemnent
+ *----------------------------------------------------------------------------
+ */
+UINT64
+CalculateOnesComplement(UINT8 *start,
+ UINT16 totalLength,
+ UINT64 initial,
+ BOOLEAN isEvenStart)
+{
+ UINT64 sum = 0, val;
+ UINT64 *src = (UINT64 *)start;
+ union {
+ UINT32 val;
+ UINT8 b8[4];
+ } tmp;
+
+ while (totalLength > 7) {
+ val = *src;
+ sum += (val >> 32) + (val & 0xffffffff);
+ src++;
+ totalLength -= 8;
+ }
+ if (totalLength > 3) {
+ sum += *(UINT32 *)src;
+ src = (UINT64 *)((UINT8 *)src + 4);
+ totalLength -= 4;
+ }
+ start = (UINT8 *)src;
+ tmp.val = 0;
+ switch (totalLength) {
+ case 3:
+ tmp.b8[2] = start[2];
+ case 2:
+ tmp.b8[1] = start[1];
+ case 1:
+ tmp.b8[0] = start[0];
+ sum += tmp.val;
+ }
+ sum = (isEvenStart ? sum : swap64(sum)) + initial;
+ return sum;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * CalculateChecksum --
+ *
+ * Given the start point, and length, calculate the checksum
+ * as 1's complement of 1's comlement.
+ *
+ * This assume the checksum field is initailized properly.
+ *
+ * Input Parameter:
+ * ptr: point to the data to be checksumed
+ * totalLength: total length of the data
+ * initial: inital value to remit the checksum. Please note this
+ * value should be network byte order value.
+ *
+ * The last parameter may be useful where you don't want to set
+ * checksum field to zero, in that case you can pass ~checksum,
+ * this is equivalent of set checksum field to zero.
+ *
+ * Result:
+ * The result can be assigned to checksum field directly.
+ *----------------------------------------------------------------------------
+ */
+UINT16
+CalculateChecksum(UINT8 *ptr,
+ UINT16 totalLength,
+ UINT16 initial)
+{
+ UINT64 sum = CalculateOnesComplement(ptr, totalLength, initial, TRUE);
+ fold64(sum);
+ return (UINT16)~sum;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * CopyAndCalculateOnesComplement --
+ *
+ * Given the start address and buffer length, calculate the 1's complement
+ * at same time, copt the data from src to dst.
+ *
+ * This routine can be used when multiple buffers are used for a packets.
+ *
+ * PLEASE NOTE, even though the last parameter is UINT64, but the assumption
+ * is it will not overflowed after adding the extra data.
+ * ------------------------------------------------
+ *
+ * Result:
+ * As name indicate, the final data is not 1's complemnent
+ *----------------------------------------------------------------------------
+ */
+UINT64
+CopyAndCalculateOnesComplement(UINT8 *dst,
+ UINT8 *src,
+ UINT16 length,
+ UINT64 initial,
+ BOOLEAN isEvenStart)
+{
+ UINT64 sum =0, val;
+ UINT64 *src64, *dst64;
+ union {
+ UINT32 val;
+ UINT8 b8[4];
+ } tmp;
+
+ src64 = (UINT64 *)src;
+ dst64 = (UINT64 *)dst;
+
+ while (length > 7) {
+ val = *src64;
+ *dst64 = val;
+ sum += (val >> 32) + (val & 0xffffffff);
+ src64++;
+ dst64++;
+ length -= 8;
+ }
+
+ if (length > 3) {
+ val = *(UINT32 *)src64;
+ *(UINT32 *)dst64 = (UINT32)val;
+ sum += (UINT32)val;
+ dst64 = (UINT64 *)((UINT8 *)dst64 + 4);
+ src64 = (UINT64 *)((UINT8 *)src64 + 4);
+ length -= 4;
+ }
+ src = (UINT8 *)src64;
+ dst = (UINT8 *)dst64;
+ tmp.val = 0;
+ switch (length) {
+ case 3:
+ dst[2] = src[2];
+ tmp.b8[2] = src[2];
+ case 2:
+ dst[1] = src[1];
+ tmp.b8[1] = src[1];
+ case 1:
+ dst[0] = src[0];
+ tmp.b8[0] = src[0];
+ sum += tmp.val;
+ }
+ sum = (isEvenStart ? sum : swap64(sum)) + initial;
+ return sum;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * CopyAndCalculateChecksum --
+ *
+ * This is similar to CalculateChecksum, except it will also copy data to
+ * destination address.
+ *----------------------------------------------------------------------------
+ */
+UINT16
+CopyAndCalculateChecksum(UINT8 *dst,
+ UINT8 *src,
+ UINT16 length,
+ UINT16 initial)
+{
+
+ UINT64 sum = CopyAndCalculateOnesComplement(dst, src, length, initial,
+ TRUE);
+ fold64(sum);
+ return (UINT16)~sum;
+}
+
+
+/*
+ *----------------------------------------------------------------------------
+ * IPChecksum --
+ *
+ * Give IP header, calculate the IP checksum.
+ * We assume IP checksum field is initialized properly
+ *
+ * Input Pramater:
+ * ipHdr: IP header start point
+ * length: IP header length (potentially include IP options)
+ * initial: same as CalculateChecksum
+ *
+ * Result:
+ * The result is already 1's complement, so can be assigned
+ * to checksum field directly
+ *----------------------------------------------------------------------------
+ */
+UINT16
+IPChecksum(UINT8 *ipHdr,
+ UINT16 length,
+ UINT16 initial)
+{
+ UINT32 sum = initial;
+ UINT16 *ptr = (UINT16 *)ipHdr;
+ ASSERT((length & 0x3) == 0);
+ while (length > 1) {
+ sum += ptr[0];
+ ptr++;
+ length -= 2;
+ }
+ fold32(sum);
+ return (UINT16)~sum;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * IPPseudoChecksum --
+ *
+ * Give src and dst IP address, protocol value and total
+ * upper layer length(not include IP header, but include
+ * upller layer protocol header, for example it include
+ * TCP header for TCP checksum), calculate the pseudo
+ * checksum, please note this checksum is just 1's complement
+ * addition.
+ *
+ * Input Parameter:
+ * src: please note it is in network byte order
+ * dst: same as src
+ * protocol: protocol value in IP header
+ * totalLength: total length of upper layer data including
+ * header.
+ *
+ * Result:
+ *
+ * This value should be put in TCP checksum field before
+ * calculating TCP checksum using CalculateChecksum with
+ * initial value of 0.
+ *----------------------------------------------------------------------------
+ */
+UINT16
+IPPseudoChecksum(UINT32 *src,
+ UINT32 *dst,
+ UINT8 protocol,
+ UINT16 totalLength)
+{
+ UINT32 sum = (UINT32)htons(totalLength) + htons(protocol);
+ sum += (*src >> 16) + (*src & 0xffff);
+ sum += (*dst >> 16) + (*dst & 0xffff);
+ fold32(sum);
+ return (UINT16)sum;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * IPv6PseudoChecksum --
+ *
+ * Given IPv6 src and dst address, upper layer protocol and total
+ * upper layer protocol data length including upper layer header
+ * part, calculate the pseudo checksum for upper layer protocol
+ * checksum.
+ *
+ * please note this checksum is just 1's complement addition.
+ *
+ * Input Parameter:
+ * src: src IPv6 address in network byte order
+ * dst: dst IPv6 address.
+ * protocol: upper layer protocol
+ * totalLength: total length of upper layer data. Please note this is
+ * in host byte order.
+ *
+ * Result:
+ *
+ * Place in upper layer checksum field before calculate upper layer
+ * checksum.
+ *----------------------------------------------------------------------------
+ */
+UINT16
+IPv6PseudoChecksum(UINT32 *src,
+ UINT32 *dst,
+ UINT8 protocol,
+ UINT16 totalLength)
+{
+ UINT64 sum = (UINT32)htons(totalLength) + htons(protocol);
+ sum += (UINT64)src[0] + src[1] + src[2] + src[3];
+ sum += (UINT64)dst[0] + dst[1] + dst[2] + dst[3];
+ fold64(sum);
+ return (UINT16)sum;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * ChecksumUpdate32 --
+ *
+ * Given old checksum value (as it is in checksum field),
+ * prev value of the relevant field in network byte order
+ * new value of the relevant field in the network byte order
+ * calculate the new checksum.
+ * Please check relevant RFC for reference.
+ *
+ * Input Pramater:
+ * oldSum: old checksum value in checksum field
+ * prev: previous value of relevant 32 bit feld in network
+ * byte order.
+ * new: new value of the relevant 32 bit field in network
+ * byte order.
+ *
+ * Result:
+ * new checksum value to be placed in the checksum field.
+ *----------------------------------------------------------------------------
+ */
+UINT16
+ChecksumUpdate32(UINT16 oldSum,
+ UINT32 prev,
+ UINT32 newValue)
+{
+ UINT32 sum = ~prev;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (newValue >> 16) + (newValue & 0xffff);
+ sum += (UINT16)~oldSum;
+ fold32(sum);
+ return (UINT16)~sum;
+}
+
+
+/*
+ *----------------------------------------------------------------------------
+ * ChecksumUpdate16 --
+ *
+ * Given old checksum value (as it is in checksum field),
+ * prev value of the relevant field in network byte order
+ * new value of the relevant field in the network byte order
+ * calculate the new checksum.
+ * Please check relevant RFC for reference.
+ *
+ * Input Pramater:
+ * oldSum: old checksum value in checksum field
+ * prev: previous value of relevant 32 bit feld in network
+ * byte order.
+ * new: new value of the relevant 32 bit field in network
+ * byte order.
+ *
+ * Result:
+ * new checksum value to be placed in the checksum field.
+ *----------------------------------------------------------------------------
+ */
+UINT16
+ChecksumUpdate16(UINT16 oldSum,
+ UINT16 prev,
+ UINT16 newValue)
+{
+ UINT32 sum = (UINT16)~oldSum;
+ sum += (UINT32)((UINT16)~prev) + newValue;
+ fold32(sum);
+ return (UINT16)~sum;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * CalculateChecksumNB --
+ *
+ * Calculates checksum over a length of bytes contained in an NB.
+ *
+ * nb : NB which contains the packet bytes.
+ * csumDataLen : Length of bytes to be checksummed.
+ * offset : offset to the first bytes of the data stream to be
+ * checksumed.
+ *
+ * Result:
+ * return 0, if there is a failure.
+ *----------------------------------------------------------------------------
+ */
+UINT16
+CalculateChecksumNB(const PNET_BUFFER nb,
+ UINT16 csumDataLen,
+ UINT32 offset)
+{
+ ULONG mdlLen;
+ UINT16 csLen;
+ PUCHAR src;
+ UINT64 csum = 0;
+ PMDL currentMdl;
+ ULONG firstMdlLen;
+ /* Running count of bytes in remainder of the MDLs including current. */
+ ULONG packetLen;
+
+ if ((nb == NULL) || (csumDataLen == 0)
+ || (offset >= NET_BUFFER_DATA_LENGTH(nb))
+ || (offset + csumDataLen > NET_BUFFER_DATA_LENGTH(nb))) {
+ OVS_LOG_ERROR("Invalid parameters - csum length %u, offset %u,"
+ "pkt%s len %u", csumDataLen, offset, nb? "":"(null)",
+ nb? NET_BUFFER_DATA_LENGTH(nb) : 0);
+ return 0;
+ }
+
+ currentMdl = NET_BUFFER_CURRENT_MDL(nb);
+ packetLen = NET_BUFFER_DATA_LENGTH(nb);
+ firstMdlLen =
+ MmGetMdlByteCount(currentMdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb);
+
+ firstMdlLen = MIN(firstMdlLen, packetLen);
+ if (offset < firstMdlLen) {
+ src = (PUCHAR) MmGetSystemAddressForMdlSafe(currentMdl, LowPagePriority);
+ if (!src) {
+ return 0;
+ }
+ src += (NET_BUFFER_CURRENT_MDL_OFFSET(nb) + offset);
+ mdlLen = firstMdlLen - offset;
+ packetLen -= firstMdlLen;
+ ASSERT((INT)packetLen >= 0);
+ } else {
+ offset -= firstMdlLen;
+ packetLen -= firstMdlLen;
+ ASSERT((INT)packetLen >= 0);
+ currentMdl = NDIS_MDL_LINKAGE(currentMdl);
+ mdlLen = MmGetMdlByteCount(currentMdl);
+ mdlLen = MIN(mdlLen, packetLen);
+
+ while (offset >= mdlLen) {
+ offset -= mdlLen;
+ packetLen -= mdlLen;
+ ASSERT((INT)packetLen >= 0);
+ currentMdl = NDIS_MDL_LINKAGE(currentMdl);
+ mdlLen = MmGetMdlByteCount(currentMdl);
+ mdlLen = MIN(mdlLen, packetLen);
+ }
+
+ src = (PUCHAR)MmGetSystemAddressForMdlSafe(currentMdl, LowPagePriority);
+ if (!src) {
+ return 0;
+ }
+
+ src += offset;
+ mdlLen -= offset;
+ }
+
+ while (csumDataLen && (currentMdl != NULL)) {
+ ASSERT(mdlLen < 65536);
+ csLen = MIN((UINT16) mdlLen, csumDataLen);
+ //XXX Not handling odd bytes yet.
+ ASSERT(((csLen & 0x1) == 0) || csumDataLen <= mdlLen);
+
+ csum = CalculateOnesComplement(src, csLen, csum, TRUE);
+ fold64(csum);
+
+ csumDataLen -= csLen;
+ currentMdl = NDIS_MDL_LINKAGE(currentMdl);
+ if (csumDataLen && currentMdl) {
+ src = MmGetSystemAddressForMdlSafe(currentMdl, LowPagePriority);
+ if (!src) {
+ return 0;
+ }
+
+ mdlLen = MmGetMdlByteCount(currentMdl);
+ mdlLen = MIN(mdlLen, packetLen);
+ /* packetLen does not include the current MDL from here on. */
+ packetLen -= mdlLen;
+ ASSERT((INT)packetLen >= 0);
+ }
+ }
+
+ ASSERT(csumDataLen == 0);
+ ASSERT((csum & ~0xffff) == 0);
+ return (UINT16) ~csum;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsValidateIPChecksum
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsValidateIPChecksum(PNET_BUFFER_LIST curNbl,
+ POVS_PACKET_HDR_INFO hdrInfo)
+{
+ NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
+ uint16_t checksum, hdrChecksum;
+ struct IPHdr ip_storage;
+ const IPHdr *ipHdr;
+
+ if (!hdrInfo->isIPv4) {
+ return NDIS_STATUS_SUCCESS;
+ }
+
+ /* First check if NIC has indicated checksum failure. */
+ csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
+ TcpIpChecksumNetBufferListInfo);
+ if (csumInfo.Receive.IpChecksumFailed) {
+ return NDIS_STATUS_FAILURE;
+ }
+
+ /* Next, check if the NIC did not validate the RX checksum. */
+ if (!csumInfo.Receive.IpChecksumSucceeded) {
+ ipHdr = OvsGetIp(curNbl, hdrInfo->l3Offset, &ip_storage);
+ if (ipHdr) {
+ ip_storage = *ipHdr;
+ hdrChecksum = ipHdr->check;
+ ip_storage.check = 0;
+ checksum = IPChecksum((uint8 *)&ip_storage, ipHdr->ihl * 4, 0);
+ if (checksum != hdrChecksum) {
+ return NDIS_STATUS_FAILURE;
+ }
+ }
+ }
+ return NDIS_STATUS_SUCCESS;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsValidateUDPChecksum
+ *----------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsValidateUDPChecksum(PNET_BUFFER_LIST curNbl, BOOLEAN udpCsumZero)
+{
+ NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
+
+ csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo);
+
+ if (udpCsumZero) {
+ /* Zero is valid checksum. */
+ csumInfo.Receive.UdpChecksumFailed = 0;
+ NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
+ return NDIS_STATUS_SUCCESS;
+ }
+
+ /* First check if NIC has indicated UDP checksum failure. */
+ if (csumInfo.Receive.UdpChecksumFailed) {
+ return NDIS_STATUS_INVALID_PACKET;
+ }
+
+ return NDIS_STATUS_SUCCESS;
+}
diff --git a/datapath-windows/ovsext/OvsChecksum.h b/datapath-windows/ovsext/OvsChecksum.h
new file mode 100644
index 000000000..d0070d2f5
--- /dev/null
+++ b/datapath-windows/ovsext/OvsChecksum.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_CHECKSUM_H_
+#define __OVS_CHECKSUM_H_ 1
+
+typedef union _OVS_PACKET_HDR_INFO *POVS_PACKET_HDR_INFO;
+
+UINT16 CalculateChecksum(UINT8 *ptr, UINT16 length, UINT16 initial);
+UINT16 CopyAndCalculateChecksum(UINT8 *dst, UINT8 *src, UINT16 length,
+ UINT16 initial);
+UINT16 IPChecksum(UINT8 *ipHdr, UINT16 length, UINT16 initial);
+UINT16 IPPseudoChecksum(UINT32 *src, UINT32 *dst, UINT8 protocol,
+ UINT16 totalLength);
+UINT16 IPv6PseudoChecksum(UINT32 *src, UINT32 *dst, UINT8 protocol,
+ UINT16 totalLength);
+UINT16 ChecksumUpdate32(UINT16 oldSum, UINT32 prev, UINT32 newValue);
+UINT16 ChecksumUpdate16(UINT16 oldSum, UINT16 prev, UINT16 newValue);
+UINT16 CalculateChecksumNB(const PNET_BUFFER nb, UINT16 csumDataLen,
+ UINT32 offset);
+NDIS_STATUS OvsValidateIPChecksum(PNET_BUFFER_LIST curNbl,
+ POVS_PACKET_HDR_INFO hdrInfo);
+NDIS_STATUS OvsValidateUDPChecksum(PNET_BUFFER_LIST curNbl,
+ BOOLEAN udpCsumZero);
+
+#endif /* __OVS_CHECKSUM_H_ */
diff --git a/datapath-windows/ovsext/OvsDebug.c b/datapath-windows/ovsext/OvsDebug.c
new file mode 100644
index 000000000..8610008df
--- /dev/null
+++ b/datapath-windows/ovsext/OvsDebug.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "precomp.h"
+
+#include "OvsDebug.h"
+#ifdef DBG
+#define OVS_DBG_DEFAULT OVS_DBG_INFO
+#else
+#define OVS_DBG_DEFAULT OVS_DBG_ERROR
+#endif
+
+UINT32 ovsLogFlags = 0xffffffff;
+UINT32 ovsLogLevel = OVS_DBG_DEFAULT;
+
+#define OVS_LOG_BUFFER_SIZE 384
+
+/*
+ * --------------------------------------------------------------------------
+ * OvsLog --
+ * Utility function to log to the Windows debug console.
+ * --------------------------------------------------------------------------
+ */
+VOID
+OvsLog(UINT32 level,
+ UINT32 flag,
+ CHAR *funcName,
+ UINT32 line,
+ CHAR *format,
+ ...)
+{
+ va_list args;
+ CHAR buf[OVS_LOG_BUFFER_SIZE];
+
+ if (level > ovsLogLevel || (ovsLogFlags & flag) == 0) {
+ return;
+ }
+
+ buf[0] = 0;
+ va_start(args, format);
+ RtlStringCbVPrintfA(buf, sizeof (buf), format, args);
+ va_end(args);
+
+ DbgPrintEx(DPFLTR_IHVNETWORK_ID, level, "%s:%lu %s\n", funcName, line, buf);
+}
diff --git a/datapath-windows/ovsext/OvsDebug.h b/datapath-windows/ovsext/OvsDebug.h
new file mode 100644
index 000000000..3705d1e9b
--- /dev/null
+++ b/datapath-windows/ovsext/OvsDebug.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_DEBUG_H_
+#define __OVS_DEBUG_H_ 1
+
+#define OVS_DBG_INIT BIT32(0)
+#define OVS_DBG_SWITCH BIT32(1)
+#define OVS_DBG_VPORT BIT32(2)
+#define OVS_DBG_FLOW BIT32(3)
+#define OVS_DBG_QOS BIT32(4)
+#define OVS_DBG_USER BIT32(5)
+#define OVS_DBG_EXECUTE BIT32(6)
+#define OVS_DBG_EVENT BIT32(7)
+#define OVS_DBG_DISPATCH BIT32(8)
+#define OVS_DBG_OID BIT32(9)
+#define OVS_DBG_STATUS BIT32(10)
+#define OVS_DBG_CHECKSUM BIT32(11)
+#define OVS_DBG_VXLAN BIT32(12)
+#define OVS_DBG_GRE BIT32(13)
+#define OVS_DBG_GRE64 BIT32(14)
+#define OVS_DBG_ACTION BIT32(15)
+#define OVS_DBG_IOCTL BIT32(16)
+#define OVS_DBG_PROPERTY BIT32(17)
+#define OVS_DBG_IPHELPER BIT32(18)
+#define OVS_DBG_BUFMGMT BIT32(19)
+#define OVS_DBG_OTHERS BIT32(21)
+
+#define OVS_DBG_RESERVED BIT32(31)
+//Please add above OVS_DBG_RESERVED.
+
+#define OVS_DBG_ERROR DPFLTR_ERROR_LEVEL
+#define OVS_DBG_WARN DPFLTR_WARNING_LEVEL
+#define OVS_DBG_TRACE DPFLTR_TRACE_LEVEL
+#define OVS_DBG_INFO DPFLTR_INFO_LEVEL
+#define OVS_DBG_LOUD (DPFLTR_INFO_LEVEL + 1)
+
+
+
+VOID OvsLog(UINT32 level, UINT32 flag, CHAR *funcName,
+ UINT32 line, CHAR *format, ...);
+
+
+#define OVS_LOG_LOUD(_format, ...) \
+ OvsLog(OVS_DBG_LOUD, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__)
+
+#define OVS_LOG_INFO(_format, ...) \
+ OvsLog(OVS_DBG_INFO, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__)
+
+#define OVS_LOG_TRACE(_format, ...) \
+ OvsLog(OVS_DBG_TRACE, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__)
+
+#define OVS_LOG_ERROR(_format, ...) \
+ OvsLog(OVS_DBG_ERROR, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__)
+
+#define OVS_LOG_WARN(_format, ...) \
+ OvsLog(OVS_DBG_WARN, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__)
+
+#if DBG
+#define OVS_VERIFY_IRQL(_x) \
+ if (KeGetCurrentIrql() != (KIRQL)_x) { \
+ OVS_LOG_WARN("expected IRQL %u, actual IRQL: %u", \
+ _x, KeGetCurrentIrql()); \
+ }
+
+#define OVS_VERIFY_IRQL_LE(_x) \
+ if (KeGetCurrentIrql() > (KIRQL)_x) { \
+ OVS_LOG_WARN("expected IRQL <= %u, actual IRQL: %u", \
+ _x, KeGetCurrentIrql()); \
+ }
+
+#else
+#define OVS_VERIFY_IRQL(_x)
+#define OVS_VERIFY_IRQL_LE(_x)
+#endif
+
+#endif /* __OVS_DEBUG_H_ */
diff --git a/datapath-windows/ovsext/OvsDriver.c b/datapath-windows/ovsext/OvsDriver.c
new file mode 100644
index 000000000..f027410a3
--- /dev/null
+++ b/datapath-windows/ovsext/OvsDriver.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "precomp.h"
+#include "OvsSwitch.h"
+#include "OvsIoctl.h"
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_DRIVER
+#include "OvsDebug.h"
+
+/* Global handles. XXX: Some of them need not be global. */
+/*
+ * Maps to DriverObject and FilterDriverContext parameters in the NDIS filter
+ * driver functions.
+ * DriverObject is specified by NDIS.
+ * FilterDriverContext is specified by the filter driver.
+ */
+NDIS_HANDLE gOvsExtDriverObject;
+
+/*
+ * Maps to NdisFilterHandle parameter in the NDIS filter driver functions.
+ * NdisFilterHandle is returned by NDISFRegisterFilterDriver.
+ */
+NDIS_HANDLE gOvsExtDriverHandle;
+
+/*
+ * Maps to FilterModuleContext parameter in the NDIS filter driver functions.
+ * FilterModuleContext is a allocated by the driver in the FilterAttach
+ * function.
+ */
+extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
+
+static PWCHAR ovsExtFriendlyName = L"VMWare OVS Extension";
+static PWCHAR ovsExtServiceName = L"OVSExt";
+NDIS_STRING ovsExtGuidUC;
+NDIS_STRING ovsExtFriendlyNameUC;
+
+static PWCHAR ovsExtGuidStr = L"{583CC151-73EC-4A6A-8B47-578297AD7623}";
+static const GUID ovsExtGuid = {
+ 0x583cc151,
+ 0x73ec,
+ 0x4a6a,
+ {0x8b, 0x47, 0x57, 0x82, 0x97, 0xad, 0x76, 0x23}
+};
+
+/* Declarations of callback functions for the filter driver. */
+DRIVER_UNLOAD OvsExtUnload;
+FILTER_NET_PNP_EVENT OvsExtNetPnPEvent;
+FILTER_STATUS OvsExtStatus;
+
+FILTER_ATTACH OvsExtAttach;
+FILTER_DETACH OvsExtDetach;
+FILTER_RESTART OvsExtRestart;
+FILTER_PAUSE OvsExtPause;
+
+FILTER_SEND_NET_BUFFER_LISTS OvsExtSendNBL;
+FILTER_SEND_NET_BUFFER_LISTS_COMPLETE OvsExtSendNBLComplete;
+FILTER_CANCEL_SEND_NET_BUFFER_LISTS OvsExtCancelSendNBL;
+FILTER_RECEIVE_NET_BUFFER_LISTS OvsExtReceiveNBL;
+FILTER_RETURN_NET_BUFFER_LISTS OvsExtReturnNBL;
+
+FILTER_OID_REQUEST OvsExtOidRequest;
+FILTER_OID_REQUEST_COMPLETE OvsExtOidRequestComplete;
+FILTER_CANCEL_OID_REQUEST OvsExtCancelOidRequest;
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Init/Load function for the OVSEXT filter Driver.
+ * --------------------------------------------------------------------------
+ */
+NTSTATUS
+DriverEntry(PDRIVER_OBJECT driverObject,
+ PUNICODE_STRING registryPath)
+{
+ NDIS_STATUS status;
+ NDIS_FILTER_DRIVER_CHARACTERISTICS driverChars;
+
+ UNREFERENCED_PARAMETER(registryPath);
+
+ gOvsExtDriverObject = driverObject;
+
+ RtlZeroMemory(&driverChars, sizeof driverChars);
+ driverChars.Header.Type = NDIS_OBJECT_TYPE_FILTER_DRIVER_CHARACTERISTICS;
+ driverChars.Header.Size = sizeof driverChars;
+ driverChars.Header.Revision = NDIS_FILTER_CHARACTERISTICS_REVISION_2;
+ driverChars.MajorNdisVersion = NDIS_FILTER_MAJOR_VERSION;
+ driverChars.MinorNdisVersion = NDIS_FILTER_MINOR_VERSION;
+ driverChars.MajorDriverVersion = 1;
+ driverChars.MinorDriverVersion = 0;
+ driverChars.Flags = 0;
+
+ RtlInitUnicodeString(&driverChars.ServiceName, ovsExtServiceName);
+ RtlInitUnicodeString(&ovsExtFriendlyNameUC, ovsExtFriendlyName);
+ RtlInitUnicodeString(&ovsExtGuidUC, ovsExtGuidStr);
+
+ driverChars.FriendlyName = ovsExtFriendlyNameUC;
+ driverChars.UniqueName = ovsExtGuidUC;
+
+ driverChars.AttachHandler = OvsExtAttach;
+ driverChars.DetachHandler = OvsExtDetach;
+ driverChars.RestartHandler = OvsExtRestart;
+ driverChars.PauseHandler = OvsExtPause;
+
+ driverChars.SendNetBufferListsHandler = OvsExtSendNBL;
+ driverChars.SendNetBufferListsCompleteHandler = OvsExtSendNBLComplete;
+ driverChars.CancelSendNetBufferListsHandler = OvsExtCancelSendNBL;
+ driverChars.ReceiveNetBufferListsHandler = NULL;
+ driverChars.ReturnNetBufferListsHandler = NULL;
+
+ driverChars.OidRequestHandler = OvsExtOidRequest;
+ driverChars.OidRequestCompleteHandler = OvsExtOidRequestComplete;
+ driverChars.CancelOidRequestHandler = OvsExtCancelOidRequest;
+
+ driverChars.DevicePnPEventNotifyHandler = NULL;
+ driverChars.NetPnPEventHandler = OvsExtNetPnPEvent;
+ driverChars.StatusHandler = NULL;
+
+ driverObject->DriverUnload = OvsExtUnload;
+
+ status = NdisFRegisterFilterDriver(driverObject,
+ (NDIS_HANDLE) gOvsExtDriverObject,
+ &driverChars, &gOvsExtDriverHandle);
+ if (status != NDIS_STATUS_SUCCESS) {
+ return status;
+ }
+
+ /* Create the communication channel for usersapce. */
+ status = OvsCreateDeviceObject(gOvsExtDriverHandle);
+ if (status != NDIS_STATUS_SUCCESS) {
+ NdisFDeregisterFilterDriver(gOvsExtDriverHandle);
+ gOvsExtDriverHandle = NULL;
+ }
+
+ return status;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Un-init/Unload function for the OVS intermediate Driver.
+ * --------------------------------------------------------------------------
+ */
+VOID
+OvsExtUnload(struct _DRIVER_OBJECT *driverObject)
+{
+ UNREFERENCED_PARAMETER(driverObject);
+
+ OvsDeleteDeviceObject();
+ NdisFDeregisterFilterDriver(gOvsExtDriverHandle);
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Implements filter driver's FilterStatus function.
+ * --------------------------------------------------------------------------
+ */
+VOID
+OvsExtStatus(NDIS_HANDLE filterModuleContext,
+ PNDIS_STATUS_INDICATION statusIndication)
+{
+ UNREFERENCED_PARAMETER(statusIndication);
+ POVS_SWITCH_CONTEXT switchObject = (POVS_SWITCH_CONTEXT)filterModuleContext;
+
+ NdisFIndicateStatus(switchObject->NdisFilterHandle, statusIndication);
+ return;
+}
diff --git a/datapath-windows/ovsext/OvsEth.h b/datapath-windows/ovsext/OvsEth.h
new file mode 100644
index 000000000..271fd85eb
--- /dev/null
+++ b/datapath-windows/ovsext/OvsEth.h
@@ -0,0 +1,450 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_ETH_H_
+#define __OVS_ETH_H_ 1
+
+#define ETH_LADRF_LEN 2
+#define ETH_ADDR_LENGTH 6
+
+typedef UINT8 Eth_Address[ETH_ADDR_LENGTH];
+
+#define ETH_ADDR_FMT_STR "%02x:%02x:%02x:%02x:%02x:%02x"
+#define ETH_ADDR_FMT_ARGS(a) ((UINT8 *)a)[0], ((UINT8 *)a)[1], ((UINT8 *)a)[2], \
+ ((UINT8 *)a)[3], ((UINT8 *)a)[4], ((UINT8 *)a)[5]
+
+#define ETH_MAX_EXACT_MULTICAST_ADDRS 32
+
+typedef enum Eth_RxMode {
+ ETH_FILTER_UNICAST = 0x0001, /* pass unicast (directed) frames */
+ ETH_FILTER_MULTICAST = 0x0002, /* pass some multicast frames */
+ ETH_FILTER_ALLMULTI = 0x0004, /* pass *all* multicast frames */
+ ETH_FILTER_BROADCAST = 0x0008, /* pass broadcast frames */
+ ETH_FILTER_PROMISC = 0x0010, /* pass all frames (ie no filter) */
+ ETH_FILTER_USE_LADRF = 0x0020, /* use the LADRF for multicast filtering */
+ ETH_FILTER_SINK = 0x10000 /* pass not-matched unicast frames */
+} Eth_RxMode;
+
+/* filter flags printf helpers */
+#define ETH_FILTER_FLAG_FMT_STR "%s%s%s%s%s%s%s"
+#define ETH_FILTER_FLAG_FMT_ARGS(f) (f) & ETH_FILTER_UNICAST ? " UNICAST" : "", \
+ (f) & ETH_FILTER_MULTICAST ? " MULTICAST" : "", \
+ (f) & ETH_FILTER_ALLMULTI ? " ALLMULTI" : "", \
+ (f) & ETH_FILTER_BROADCAST ? " BROADCAST" : "", \
+ (f) & ETH_FILTER_PROMISC ? " PROMISC" : "", \
+ (f) & ETH_FILTER_USE_LADRF ? " USE_LADRF" : "", \
+ (f) & ETH_FILTER_SINK ? " SINK" : ""
+
+/* Ethernet header type */
+typedef enum {
+ ETH_HEADER_TYPE_DIX,
+ ETH_HEADER_TYPE_802_1PQ,
+ ETH_HEADER_TYPE_802_3,
+ ETH_HEADER_TYPE_802_1PQ_802_3,
+} Eth_HdrType;
+
+/* DIX type fields we care about */
+typedef enum {
+ ETH_TYPE_IPV4 = 0x0800,
+ ETH_TYPE_IPV6 = 0x86DD,
+ ETH_TYPE_ARP = 0x0806,
+ ETH_TYPE_RARP = 0x8035,
+ ETH_TYPE_LLDP = 0x88CC,
+ ETH_TYPE_CDP = 0x2000,
+ ETH_TYPE_802_1PQ = 0x8100, // not really a DIX type, but used as such
+ ETH_TYPE_LLC = 0xFFFF, // 0xFFFF is IANA reserved, used to mark LLC
+} Eth_DixType;
+
+typedef enum {
+ ETH_TYPE_IPV4_NBO = 0x0008,
+ ETH_TYPE_IPV6_NBO = 0xDD86,
+ ETH_TYPE_ARP_NBO = 0x0608,
+ ETH_TYPE_RARP_NBO = 0x3580,
+ ETH_TYPE_LLDP_NBO = 0xCC88,
+ ETH_TYPE_CDP_NBO = 0x0020,
+ ETH_TYPE_AKIMBI_NBO = 0xDE88,
+ ETH_TYPE_802_1PQ_NBO = 0x0081, // not really a DIX type, but used as such
+} Eth_DixTypeNBO;
+
+/* low two bits of the LLC control byte */
+typedef enum {
+ ETH_LLC_CONTROL_IFRAME = 0x0, // both 0x0 and 0x2, only low bit of 0 needed
+ ETH_LLC_CONTROL_SFRAME = 0x1,
+ ETH_LLC_CONTROL_UFRAME = 0x3,
+} Eth_LLCControlBits;
+
+#define ETH_LLC_CONTROL_UFRAME_MASK (0x3)
+
+typedef struct Eth_DIX {
+ UINT16 typeNBO; // indicates the higher level protocol
+} Eth_DIX;
+
+/*
+ * LLC header come in two varieties: 8 bit control and 16 bit control.
+ * when the lower two bits of the first byte's control are '11', this
+ * indicated the 8 bit control field.
+ */
+typedef struct Eth_LLC8 {
+ UINT8 dsap;
+ UINT8 ssap;
+ UINT8 control;
+} Eth_LLC8;
+
+typedef struct Eth_LLC16 {
+ UINT8 dsap;
+ UINT8 ssap;
+ UINT16 control;
+} Eth_LLC16;
+
+typedef struct Eth_SNAP {
+ UINT8 snapOrg[3];
+ Eth_DIX snapType;
+} Eth_SNAP;
+
+typedef struct Eth_802_3 {
+ UINT16 lenNBO; // length of the frame
+ Eth_LLC8 llc; // LLC header
+ Eth_SNAP snap; // SNAP header
+} Eth_802_3;
+
+// 802.1p QOS/priority tags
+enum {
+ ETH_802_1_P_BEST_EFFORT = 0,
+ ETH_802_1_P_BACKGROUND = 1,
+ ETH_802_1_P_EXCELLENT_EFFORT = 2,
+ ETH_802_1_P_CRITICAL_APPS = 3,
+ ETH_802_1_P_VIDEO = 4,
+ ETH_802_1_P_VOICE = 5,
+ ETH_802_1_P_INTERNETWORK_CONROL = 6,
+ ETH_802_1_P_NETWORK_CONTROL = 7
+};
+
+typedef struct Eth_802_1pq_Tag {
+ UINT16 typeNBO; // always ETH_TYPE_802_1PQ
+ UINT16 vidHi:4, // 802.1q vlan ID high nibble
+ canonical:1, // bit order? (should always be 0)
+ priority:3, // 802.1p priority tag
+ vidLo:8; // 802.1q vlan ID low byte
+} Eth_802_1pq_Tag;
+
+typedef struct Eth_802_1pq {
+ Eth_802_1pq_Tag tag; // VLAN/QOS tag
+ union {
+ Eth_DIX dix; // DIX header follows
+ Eth_802_3 e802_3; // or 802.3 header follows
+ };
+} Eth_802_1pq;
+
+typedef struct Eth_Header {
+ Eth_Address dst; // all types of ethernet frame have dst first
+ Eth_Address src; // and the src next (at least all the ones we'll see)
+ union {
+ Eth_DIX dix; // followed by a DIX header...
+ Eth_802_3 e802_3; // ...or an 802.3 header
+ Eth_802_1pq e802_1pq; // ...or an 802.1[pq] tag and a header
+ };
+} Eth_Header;
+
+#define ETH_BROADCAST_ADDRESS { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }
+
+static Eth_Address netEthBroadcastAddr = ETH_BROADCAST_ADDRESS;
+
+/*
+ * simple predicate for 1536 boundary.
+ * the parameter is a network ordered UINT16, which is compared to 0x06,
+ * testing for "length" values greater than or equal to 0x0600 (1536)
+ */
+
+#define ETH_TYPENOT8023(x) (((x) & 0xff) >= 0x06)
+
+/*
+ * header length macros
+ *
+ * first two are typical: ETH_HEADER_LEN_DIX, ETH_HEADER_LEN_802_1PQ
+ * last two are suspicious, due to 802.3 incompleteness
+ */
+
+#define ETH_HEADER_LEN_DIX (sizeof(Eth_Address) + \
+ sizeof(Eth_Address) + \
+ sizeof(Eth_DIX))
+#define ETH_HEADER_LEN_802_1PQ (sizeof(Eth_Address) + \
+ sizeof(Eth_Address) + \
+ sizeof(Eth_802_1pq_Tag) + \
+ sizeof(Eth_DIX))
+#define ETH_HEADER_LEN_802_2_LLC (sizeof(Eth_Address) + \
+ sizeof(Eth_Address) + \
+ sizeof(UINT16) + \
+ sizeof(Eth_LLC8))
+#define ETH_HEADER_LEN_802_2_LLC16 (sizeof(Eth_Address) + \
+ sizeof(Eth_Address) + \
+ sizeof(UINT16) + \
+ sizeof(Eth_LLC16))
+#define ETH_HEADER_LEN_802_3 (sizeof(Eth_Address) + \
+ sizeof(Eth_Address) + \
+ sizeof(Eth_802_3))
+#define ETH_HEADER_LEN_802_1PQ_LLC (sizeof(Eth_Address) + \
+ sizeof(Eth_Address) + \
+ sizeof(Eth_802_1pq_Tag) + \
+ sizeof(UINT16) + \
+ sizeof(Eth_LLC8))
+#define ETH_HEADER_LEN_802_1PQ_LLC16 (sizeof(Eth_Address) + \
+ sizeof(Eth_Address) + \
+ sizeof(Eth_802_1pq_Tag) + \
+ sizeof(UINT16) + \
+ sizeof(Eth_LLC16))
+#define ETH_HEADER_LEN_802_1PQ_802_3 (sizeof(Eth_Address) + \
+ sizeof(Eth_Address) + \
+ sizeof(Eth_802_1pq_Tag) + \
+ sizeof(Eth_802_3))
+
+#define ETH_MIN_HEADER_LEN (ETH_HEADER_LEN_DIX)
+#define ETH_MAX_HEADER_LEN (ETH_HEADER_LEN_802_1PQ_802_3)
+
+#define ETH_MIN_FRAME_LEN 60
+#define ETH_MAX_STD_MTU 1500
+#define ETH_MAX_STD_FRAMELEN (ETH_MAX_STD_MTU + ETH_MAX_HEADER_LEN)
+#define ETH_MAX_JUMBO_MTU 9000
+#define ETH_MAX_JUMBO_FRAMELEN (ETH_MAX_JUMBO_MTU + ETH_MAX_HEADER_LEN)
+
+#define ETH_DEFAULT_MTU 1500
+
+#define ETH_FCS_LEN 4
+#define ETH_VLAN_LEN sizeof(Eth_802_1pq_Tag)
+
+
+/*
+ *----------------------------------------------------------------------------
+ * Do the two ethernet addresses match?
+ *----------------------------------------------------------------------------
+ */
+static __inline BOOLEAN
+Eth_IsAddrMatch(const Eth_Address addr1, const Eth_Address addr2)
+{
+ return !memcmp(addr1, addr2, ETH_ADDR_LENGTH);
+}
+
+
+/*
+ *----------------------------------------------------------------------------
+ * Is the address the broadcast address?
+ *----------------------------------------------------------------------------
+ */
+static __inline BOOLEAN
+Eth_IsBroadcastAddr(const Eth_Address addr)
+{
+ return Eth_IsAddrMatch(addr, netEthBroadcastAddr);
+}
+
+
+/*
+ *----------------------------------------------------------------------------
+ * Is the address a unicast address?
+ *----------------------------------------------------------------------------
+ */
+static __inline BOOLEAN
+Eth_IsUnicastAddr(const Eth_Address addr)
+{
+ // broadcast and multicast frames always have the low bit set in byte 0
+ return !(((CHAR *)addr)[0] & 0x1);
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * Is the address the all-zeros address?
+ *----------------------------------------------------------------------------
+ */
+static __inline BOOLEAN
+Eth_IsNullAddr(const Eth_Address addr)
+{
+ return ((addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]) == 0);
+}
+
+/*
+ *----------------------------------------------------------------------------
+ *
+ * Eth_HeaderType --
+ * return an Eth_HdrType depending on the eth header
+ * contents. will not work in all cases, especially since it
+ * requres ETH_HEADER_LEN_802_1PQ bytes to determine the type
+ *
+ * HeaderType isn't sufficient to determine the length of
+ * the eth header. for 802.3 header, its not clear without
+ * examination, whether a SNAP is included
+ *
+ * returned type:
+ *
+ * ETH_HEADER_TYPE_DIX: typical 14 byte eth header
+ * ETH_HEADER_TYPE_802_1PQ: DIX+vlan tagging
+ * ETH_HEADER_TYPE_802_3: 802.3 eth header
+ * ETH_HEADER_TYPE_802_1PQ_802_3: 802.3 + vlan tag
+ *
+ * the test for DIX was moved from a 1500 boundary to a 1536
+ * boundary, since the vmxnet2 MTU was updated to 1514. when
+ * W2K8 attempted to send LLC frames, these were interpreted
+ * as DIX frames instead of the correct 802.3 type
+ *
+ * these links may help if they're valid:
+ *
+ * http://standards.ieee.org/regauth/ethertype/type-tut.html
+ * http://standards.ieee.org/regauth/ethertype/type-pub.html
+ *
+ * Results:
+ * Eth_HdrType value
+ *
+ *----------------------------------------------------------------------------
+ */
+static __inline Eth_HdrType
+Eth_HeaderType(const Eth_Header *eh)
+{
+ /*
+ * we use 1536 (IEEE 802.3-std mentions 1536, but iana indicates
+ * type of 0-0x5dc are 802.3) instead of some #def symbol to prevent
+ * inadvertant reuse of the same macro for buffer size decls.
+ */
+ if (ETH_TYPENOT8023(eh->dix.typeNBO)) {
+ if (eh->dix.typeNBO != ETH_TYPE_802_1PQ_NBO) {
+ /* typical case */
+ return ETH_HEADER_TYPE_DIX;
+ }
+
+ /* some type of 802.1pq tagged frame */
+ if (ETH_TYPENOT8023(eh->e802_1pq.dix.typeNBO)) {
+ /* vlan tagging with dix style type */
+ return ETH_HEADER_TYPE_802_1PQ;
+ }
+
+ /* vlan tagging with 802.3 header */
+ return ETH_HEADER_TYPE_802_1PQ_802_3;
+ }
+
+ /* assume 802.3 */
+ return ETH_HEADER_TYPE_802_3;
+}
+
+
+/*
+ *----------------------------------------------------------------------------
+ *
+ * Eth_EncapsulatedPktType --
+ * Get the encapsulated (layer 3) frame type.
+ * for LLC frames without SNAP, we don't have
+ * an encapsulated type, and return ETH_TYPE_LLC.
+ *
+ * IANA reserves 0xFFFF, which we reuse to indicate
+ * ETH_TYPE_LLC.
+ *
+ * Results:
+ * NBO frame type.
+ *
+ *----------------------------------------------------------------------------
+ */
+static __inline UINT16
+Eth_EncapsulatedPktType(const Eth_Header *eh)
+{
+ Eth_HdrType type = Eth_HeaderType(eh);
+
+ switch (type) {
+ case ETH_HEADER_TYPE_DIX: return eh->dix.typeNBO;
+ case ETH_HEADER_TYPE_802_1PQ: return eh->e802_1pq.dix.typeNBO;
+ case ETH_HEADER_TYPE_802_3:
+ /*
+ * Documentation describes SNAP headers as having ONLY
+ * 0x03 as the control fields, not just the lower two bits
+ * This prevents the use of Eth_IsLLCControlUFormat.
+ */
+ if ((eh->e802_3.llc.dsap == 0xaa) && (eh->e802_3.llc.ssap == 0xaa) &&
+ (eh->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME)) {
+ return eh->e802_3.snap.snapType.typeNBO;
+ } else {
+ // LLC, no snap header, then no type
+ return ETH_TYPE_LLC;
+ }
+
+ case ETH_HEADER_TYPE_802_1PQ_802_3:
+ if ((eh->e802_1pq.e802_3.llc.dsap == 0xaa) &&
+ (eh->e802_1pq.e802_3.llc.ssap == 0xaa) &&
+ (eh->e802_1pq.e802_3.llc.control == ETH_LLC_CONTROL_UFRAME)) {
+ return eh->e802_1pq.e802_3.snap.snapType.typeNBO;
+ } else {
+ // tagged LLC, no snap header, then no type
+ return ETH_TYPE_LLC;
+ }
+ }
+
+ ASSERT(FALSE);
+ return 0;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * Is the frame of the requested protocol type or is it an 802.1[pq]
+ * encapsulation of such a frame?
+ *----------------------------------------------------------------------------
+ */
+static __inline BOOLEAN
+Eth_IsDixType(const Eth_Header *eh, const Eth_DixTypeNBO type)
+{
+ return Eth_EncapsulatedPktType(eh) == type;
+}
+
+
+/*
+ *----------------------------------------------------------------------------
+ * Is the frame an IPV4 frame?
+ *----------------------------------------------------------------------------
+ */
+static __inline BOOLEAN
+Eth_IsIPV4(const Eth_Header *eh)
+{
+ return Eth_IsDixType(eh, ETH_TYPE_IPV4_NBO);
+}
+
+
+/*
+ *----------------------------------------------------------------------------
+ * Is the frame an IPV6 frame?
+ *----------------------------------------------------------------------------
+ */
+static __inline BOOLEAN
+Eth_IsIPV6(const Eth_Header *eh)
+{
+ return Eth_IsDixType(eh, ETH_TYPE_IPV6_NBO);
+}
+
+
+/*
+ *----------------------------------------------------------------------------
+ * Is the frame an ARP frame?
+ *----------------------------------------------------------------------------
+ */
+static __inline BOOLEAN
+Eth_IsARP(const Eth_Header *eh)
+{
+ return Eth_IsDixType(eh, ETH_TYPE_ARP_NBO);
+}
+
+
+/*
+ *----------------------------------------------------------------------------
+ * Does the frame contain an 802.1[pq] tag?
+ *----------------------------------------------------------------------------
+ */
+static __inline BOOLEAN
+Eth_IsFrameTagged(const Eth_Header *eh)
+{
+ return (eh->dix.typeNBO == ETH_TYPE_802_1PQ_NBO);
+}
+#endif /* __OVS_ETH_H_ */
diff --git a/datapath-windows/ovsext/OvsEvent.c b/datapath-windows/ovsext/OvsEvent.c
new file mode 100644
index 000000000..a75b2bd2a
--- /dev/null
+++ b/datapath-windows/ovsext/OvsEvent.c
@@ -0,0 +1,496 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "precomp.h"
+
+#include "OvsIoctl.h"
+#include "OvsSwitch.h"
+#include "OvsVport.h"
+#include "OvsEvent.h"
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_EVENT
+#include "OvsDebug.h"
+
+LIST_ENTRY ovsEventQueue;
+UINT32 ovsNumEventQueue;
+UINT32 ovsNumPollAll;
+
+extern PNDIS_SPIN_LOCK gOvsCtrlLock;
+
+NTSTATUS
+OvsInitEventQueue()
+{
+ InitializeListHead(&ovsEventQueue);
+ return STATUS_SUCCESS;
+}
+
+VOID
+OvsCleanupEventQueue()
+{
+ ASSERT(IsListEmpty(&ovsEventQueue));
+ ASSERT(ovsNumEventQueue == 0);
+}
+
+static __inline VOID
+OvsAcquireEventQueueLock()
+{
+ NdisAcquireSpinLock(gOvsCtrlLock);
+}
+
+static __inline VOID
+OvsReleaseEventQueueLock()
+{
+ NdisReleaseSpinLock(gOvsCtrlLock);
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * Cleanup the event queue of the OpenInstance.
+ * --------------------------------------------------------------------------
+ */
+VOID
+OvsCleanupEvent(POVS_OPEN_INSTANCE instance)
+{
+ POVS_EVENT_QUEUE queue;
+ PIRP irp = NULL;
+ queue = (POVS_EVENT_QUEUE)instance->eventQueue;
+ if (queue) {
+ POVS_EVENT_QUEUE_ELEM elem;
+ PLIST_ENTRY link, next;
+
+ OvsAcquireEventQueueLock();
+ RemoveEntryList(&queue->queueLink);
+ ovsNumEventQueue--;
+ if (queue->pendingIrp) {
+ PDRIVER_CANCEL cancelRoutine;
+ irp = queue->pendingIrp;
+ cancelRoutine = IoSetCancelRoutine(irp, NULL);
+ queue->pendingIrp = NULL;
+ if (cancelRoutine == NULL) {
+ irp = NULL;
+ }
+ }
+ instance->eventQueue = NULL;
+ OvsReleaseEventQueueLock();
+ if (irp) {
+ OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
+ }
+
+ LIST_FORALL_SAFE(&queue->elemList, link, next) {
+ elem = CONTAINING_RECORD(link, OVS_EVENT_QUEUE_ELEM, link);
+ OvsFreeMemory(elem);
+ }
+ OvsFreeMemory(queue);
+ }
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * When event is generated, we need to post the event to all
+ * the event queues. If there is pending Irp waiting for event
+ * complete the Irp to wakeup the user thread.
+ *
+ * Side effects: User thread may be woken up.
+ * --------------------------------------------------------------------------
+ */
+VOID
+OvsPostEvent(UINT32 portNo,
+ UINT32 status)
+{
+ POVS_EVENT_QUEUE_ELEM elem;
+ POVS_EVENT_QUEUE queue;
+ PLIST_ENTRY link;
+ BOOLEAN triggerPollAll = FALSE;
+ LIST_ENTRY list;
+ PLIST_ENTRY entry;
+ PIRP irp;
+
+ InitializeListHead(&list);
+
+ OVS_LOG_TRACE("Enter: portNo: %#x, status: %#x", portNo, status);
+
+ OvsAcquireEventQueueLock();
+
+ LIST_FORALL(&ovsEventQueue, link) {
+ queue = CONTAINING_RECORD(link, OVS_EVENT_QUEUE, queueLink);
+ if ((status & queue->mask) == 0 ||
+ queue->pollAll) {
+ continue;
+ }
+ if (queue->numElems > (OVS_MAX_VPORT_ARRAY_SIZE >> 1) ||
+ portNo == OVS_DEFAULT_PORT_NO) {
+ queue->pollAll = TRUE;
+ } else {
+ elem = (POVS_EVENT_QUEUE_ELEM)OvsAllocateMemory(sizeof(*elem));
+ if (elem == NULL) {
+ queue->pollAll = TRUE;
+ } else {
+ elem->portNo = portNo;
+ elem->status = (status & queue->mask);
+ InsertTailList(&queue->elemList, &elem->link);
+ queue->numElems++;
+ OVS_LOG_INFO("Queue: %p, numElems: %d",
+ queue, queue->numElems);
+ }
+ }
+ if (queue->pollAll) {
+ PLIST_ENTRY curr, next;
+ triggerPollAll = TRUE;
+ ovsNumPollAll++;
+ LIST_FORALL_SAFE(&queue->elemList, curr, next) {
+ RemoveEntryList(curr);
+ elem = CONTAINING_RECORD(curr, OVS_EVENT_QUEUE_ELEM, link);
+ OvsFreeMemory(elem);
+ }
+ queue->numElems = 0;
+ }
+ if (queue->pendingIrp != NULL) {
+ PDRIVER_CANCEL cancelRoutine;
+ irp = queue->pendingIrp;
+ queue->pendingIrp = NULL;
+ cancelRoutine = IoSetCancelRoutine(irp, NULL);
+ if (cancelRoutine) {
+ InsertTailList(&list, &irp->Tail.Overlay.ListEntry);
+ }
+ }
+ }
+ OvsReleaseEventQueueLock();
+ while (!IsListEmpty(&list)) {
+ entry = RemoveHeadList(&list);
+ irp = CONTAINING_RECORD(entry, IRP, Tail.Overlay.ListEntry);
+ OVS_LOG_INFO("Wakeup thread with IRP: %p", irp);
+ OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
+ }
+ OVS_LOG_TRACE("Exit: triggered pollAll: %s",
+ (triggerPollAll ? "TRUE" : "FALSE"));
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Subscribe for event notification.
+ *
+ * Results:
+ * STATUS_SUCCESS for valid request and enough resource.
+ * STATUS_NO_RESOURCES for queue allocation failure
+ * STATUS_INVALID_PARAMETER for invalid request
+ *
+ * Side effects:
+ * Event queue is created for the current open instance.
+ * --------------------------------------------------------------------------
+ */
+NTSTATUS
+OvsSubscribeEventIoctl(PFILE_OBJECT fileObject,
+ PVOID inputBuffer,
+ UINT32 inputLength)
+{
+ POVS_EVENT_SUBSCRIBE request = (POVS_EVENT_SUBSCRIBE)inputBuffer;
+ NTSTATUS status = STATUS_SUCCESS;
+ POVS_OPEN_INSTANCE instance;
+ POVS_EVENT_QUEUE queue = NULL;
+
+ OVS_LOG_TRACE("Enter: fileObject: %p, inputLength: %d", fileObject,
+ inputLength);
+
+ if (inputLength < sizeof (OVS_EVENT_SUBSCRIBE) ||
+ (request->mask & OVS_EVENT_MASK_ALL) == 0) {
+ OVS_LOG_TRACE("Exit: subscribe failed with invalid request.");
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ OvsAcquireEventQueueLock();
+
+ instance = OvsGetOpenInstance(fileObject, request->dpNo);
+
+ if (instance == NULL) {
+ status = STATUS_INVALID_PARAMETER;
+ OVS_LOG_WARN("can not find open instance");
+ goto done_event_subscribe;
+ }
+
+ /*
+ * XXX for now, we don't allow change mask.
+ */
+ queue = (POVS_EVENT_QUEUE)instance->eventQueue;
+ if (request->subscribe && queue) {
+ if (queue->mask != request->mask) {
+ status = STATUS_INVALID_PARAMETER;
+ OVS_LOG_WARN("Can not chnage mask when the queue is subscribed");
+ }
+ status = STATUS_SUCCESS;
+ goto done_event_subscribe;
+ } else if (!request->subscribe && queue == NULL) {
+ status = STATUS_SUCCESS;
+ goto done_event_subscribe;
+ }
+
+ if (request->subscribe) {
+ queue = (POVS_EVENT_QUEUE)OvsAllocateMemory(sizeof (OVS_EVENT_QUEUE));
+ if (queue == NULL) {
+ status = STATUS_NO_MEMORY;
+ OVS_LOG_WARN("Fail to allocate event queue");
+ goto done_event_subscribe;
+ }
+ InitializeListHead(&queue->elemList);
+ queue->mask = request->mask;
+ queue->pendingIrp = NULL;
+ queue->numElems = 0;
+ queue->pollAll = TRUE; /* always poll all in the begining */
+ InsertHeadList(&ovsEventQueue, &queue->queueLink);
+ ovsNumEventQueue++;
+ instance->eventQueue = queue;
+ queue->instance = instance;
+ } else {
+ queue = (POVS_EVENT_QUEUE)instance->eventQueue;
+ RemoveEntryList(&queue->queueLink);
+ ovsNumEventQueue--;
+ instance->eventQueue = NULL;
+ }
+done_event_subscribe:
+ if (!request->subscribe && queue) {
+ POVS_EVENT_QUEUE_ELEM elem;
+ PLIST_ENTRY link, next;
+ PIRP irp = NULL;
+ if (queue->pendingIrp) {
+ PDRIVER_CANCEL cancelRoutine;
+ irp = queue->pendingIrp;
+ queue->pendingIrp = NULL;
+ cancelRoutine = IoSetCancelRoutine(irp, NULL);
+ if (cancelRoutine == NULL) {
+ irp = NULL;
+ }
+ }
+ OvsReleaseEventQueueLock();
+ if (irp) {
+ OvsCompleteIrpRequest(queue->pendingIrp, 0, STATUS_SUCCESS);
+ }
+ LIST_FORALL_SAFE(&queue->elemList, link, next) {
+ elem = CONTAINING_RECORD(link, OVS_EVENT_QUEUE_ELEM, link);
+ OvsFreeMemory(elem);
+ }
+ OvsFreeMemory(queue);
+ } else {
+ OvsReleaseEventQueueLock();
+ }
+ OVS_LOG_TRACE("Exit: subscribe event with status: %#x.", status);
+ return status;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * Poll event queued in the event queue. always synchronous.
+ *
+ * Results:
+ * STATUS_SUCCESS for valid request
+ * STATUS_BUFFER_TOO_SMALL if outputBuffer is too small.
+ * STATUS_INVALID_PARAMETER for invalid request
+ *
+ * Side effects:
+ * Event will be removed from event queue.
+ * --------------------------------------------------------------------------
+ */
+NTSTATUS
+OvsPollEventIoctl(PFILE_OBJECT fileObject,
+ PVOID inputBuffer,
+ UINT32 inputLength,
+ PVOID outputBuffer,
+ UINT32 outputLength,
+ UINT32 *replyLen)
+{
+ POVS_EVENT_POLL poll;
+ POVS_EVENT_STATUS eventStatus;
+ POVS_EVENT_ENTRY entry;
+ POVS_EVENT_QUEUE queue;
+ POVS_EVENT_QUEUE_ELEM elem;
+ POVS_OPEN_INSTANCE instance;
+ UINT32 numEntry, i;
+
+ OVS_LOG_TRACE("Enter: inputLength:%d, outputLength: %d",
+ inputLength, outputLength);
+
+ ASSERT(replyLen);
+ if (inputLength < sizeof (OVS_EVENT_POLL)) {
+ OVS_LOG_TRACE("Exit: input buffer too small");
+ return STATUS_INVALID_PARAMETER;
+ }
+ *replyLen = sizeof (OVS_EVENT_STATUS) + sizeof (OVS_EVENT_ENTRY);
+ if (outputLength < *replyLen) {
+ OVS_LOG_TRACE("Exit: output buffer too small");
+ return STATUS_BUFFER_TOO_SMALL;
+ }
+ poll = (POVS_EVENT_POLL)inputBuffer;
+
+ OvsAcquireEventQueueLock();
+ instance = OvsGetOpenInstance(fileObject, poll->dpNo);
+ if (instance == NULL) {
+ OvsReleaseEventQueueLock();
+ *replyLen = 0;
+ OVS_LOG_TRACE("Exit: can not find Open instance");
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ eventStatus = (POVS_EVENT_STATUS)outputBuffer;
+ numEntry =
+ (outputLength - sizeof (OVS_EVENT_STATUS)) / sizeof (OVS_EVENT_ENTRY);
+ queue = (POVS_EVENT_QUEUE)instance->eventQueue;
+ if (queue->pollAll) {
+ eventStatus->numberEntries = 1;
+ numEntry = 1;
+ entry = &eventStatus->eventEntries[0];
+ entry->portNo = OVS_DEFAULT_PORT_NO;
+ entry->status = OVS_DEFAULT_EVENT_STATUS;
+ queue->pollAll = FALSE;
+ goto event_poll_done;
+ }
+ numEntry = MIN(numEntry, queue->numElems);
+ eventStatus->numberEntries = numEntry;
+
+ for (i = 0; i < numEntry; i++) {
+ elem = (POVS_EVENT_QUEUE_ELEM)RemoveHeadList(&queue->elemList);
+ entry = &eventStatus->eventEntries[i];
+ entry->portNo = elem->portNo;
+ entry->status = elem->status;
+ OvsFreeMemory(elem);
+ queue->numElems--;
+ }
+event_poll_done:
+ OvsReleaseEventQueueLock();
+ *replyLen = sizeof (OVS_EVENT_STATUS) +
+ numEntry * sizeof (OVS_EVENT_ENTRY);
+ OVS_LOG_TRACE("Exit: numEventPolled: %d", numEntry);
+ return STATUS_SUCCESS;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Cancel wait IRP for event
+ *
+ * Please note, when this routine is called, it is always guaranteed that
+ * IRP is valid.
+ *
+ * Side effects: Pending IRP is completed.
+ * --------------------------------------------------------------------------
+ */
+VOID
+OvsCancelIrp(PDEVICE_OBJECT deviceObject,
+ PIRP irp)
+{
+ PIO_STACK_LOCATION irpSp;
+ PFILE_OBJECT fileObject;
+ POVS_EVENT_QUEUE queue;
+ POVS_OPEN_INSTANCE instance;
+
+ UNREFERENCED_PARAMETER(deviceObject);
+
+ IoReleaseCancelSpinLock(irp->CancelIrql);
+
+ irpSp = IoGetCurrentIrpStackLocation(irp);
+ fileObject = irpSp->FileObject;
+
+ if (fileObject == NULL) {
+ goto done;
+ }
+ OvsAcquireEventQueueLock();
+ instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
+ if (instance == NULL || instance->eventQueue == NULL) {
+ OvsReleaseEventQueueLock();
+ goto done;
+ }
+ queue = instance->eventQueue;
+ if (queue->pendingIrp == irp) {
+ queue->pendingIrp = NULL;
+ }
+ OvsReleaseEventQueueLock();
+done:
+ OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * Wait for event.
+ *
+ * Results:
+ * STATUS_SUCCESS for valid request
+ * STATUS_DEVICE_BUSY if already in waiting state.
+ * STATUS_INVALID_PARAMETER for invalid request
+ * STATUS_PENDING wait for event
+ *
+ * Side effects:
+ * May return pending to IO manager.
+ * --------------------------------------------------------------------------
+ */
+NTSTATUS
+OvsWaitEventIoctl(PIRP irp,
+ PFILE_OBJECT fileObject,
+ PVOID inputBuffer,
+ UINT32 inputLength)
+{
+ NTSTATUS status;
+ POVS_EVENT_POLL poll;
+ POVS_EVENT_QUEUE queue;
+ POVS_OPEN_INSTANCE instance;
+ BOOLEAN cancelled = FALSE;
+ OVS_LOG_TRACE("Enter: inputLength: %u", inputLength);
+
+ if (inputLength < sizeof (OVS_EVENT_POLL)) {
+ OVS_LOG_TRACE("Exit: Invalid input buffer length.");
+ return STATUS_INVALID_PARAMETER;
+ }
+ poll = (POVS_EVENT_POLL)inputBuffer;
+
+ OvsAcquireEventQueueLock();
+
+ instance = OvsGetOpenInstance(fileObject, poll->dpNo);
+ if (instance == NULL) {
+ OvsReleaseEventQueueLock();
+ OVS_LOG_TRACE("Exit: Can not find open instance, dpNo: %d", poll->dpNo);
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ queue = (POVS_EVENT_QUEUE)instance->eventQueue;
+ if (queue->pendingIrp) {
+ OvsReleaseEventQueueLock();
+ OVS_LOG_TRACE("Exit: Event queue already in pending state");
+ return STATUS_DEVICE_BUSY;
+ }
+
+ status = (queue->numElems != 0 || queue->pollAll) ?
+ STATUS_SUCCESS : STATUS_PENDING;
+ if (status == STATUS_PENDING) {
+ PDRIVER_CANCEL cancelRoutine;
+ IoMarkIrpPending(irp);
+ IoSetCancelRoutine(irp, OvsCancelIrp);
+ if (irp->Cancel) {
+ cancelRoutine = IoSetCancelRoutine(irp, NULL);
+ if (cancelRoutine) {
+ cancelled = TRUE;
+ }
+ } else {
+ queue->pendingIrp = irp;
+ }
+ }
+ OvsReleaseEventQueueLock();
+ if (cancelled) {
+ OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
+ OVS_LOG_INFO("Event IRP cancelled: %p", irp);
+ }
+ OVS_LOG_TRACE("Exit: return status: %#x", status);
+ return status;
+}
diff --git a/datapath-windows/ovsext/OvsEvent.h b/datapath-windows/ovsext/OvsEvent.h
new file mode 100644
index 000000000..4ae2ba29d
--- /dev/null
+++ b/datapath-windows/ovsext/OvsEvent.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_EVENT_H_
+#define __OVS_EVENT_H_ 1
+
+typedef struct _OVS_EVENT_QUEUE_ELEM {
+ LIST_ENTRY link;
+ UINT32 portNo;
+ UINT32 status;
+} OVS_EVENT_QUEUE_ELEM, *POVS_EVENT_QUEUE_ELEM;
+
+typedef struct _OVS_EVENT_QUEUE {
+ LIST_ENTRY queueLink;
+ LIST_ENTRY elemList;
+ UINT32 mask;
+ UINT16 numElems;
+ BOOLEAN pollAll;
+ PIRP pendingIrp;
+ PVOID instance;
+} OVS_EVENT_QUEUE, *POVS_EVENT_QUEUE;
+
+NTSTATUS OvsInitEventQueue(VOID);
+VOID OvsCleanupEventQueue(VOID);
+
+struct _OVS_OPEN_INSTANCE;
+
+VOID OvsCleanupEvent(struct _OVS_OPEN_INSTANCE *instance);
+VOID OvsPostEvent(UINT32 portNo, UINT32 status);
+NTSTATUS OvsSubscribeEventIoctl(PFILE_OBJECT fileObject, PVOID inputBuffer,
+ UINT32 inputLength);
+NTSTATUS OvsPollEventIoctl(PFILE_OBJECT fileObject, PVOID inputBuffer,
+ UINT32 inputLength, PVOID outputBuffer,
+ UINT32 outputLength, UINT32 *replyLen);
+NTSTATUS OvsWaitEventIoctl(PIRP irp, PFILE_OBJECT fileObject,
+ PVOID inputBuffer, UINT32 inputLength);
+#endif /* __OVS_EVENT_H_ */
diff --git a/datapath-windows/ovsext/OvsFlow.c b/datapath-windows/ovsext/OvsFlow.c
new file mode 100644
index 000000000..daa64e007
--- /dev/null
+++ b/datapath-windows/ovsext/OvsFlow.c
@@ -0,0 +1,978 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "precomp.h"
+#include "OvsNetProto.h"
+#include "OvsUtil.h"
+#include "OvsJhash.h"
+#include "OvsFlow.h"
+#include "OvsPacketParser.h"
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_FLOW
+#include "OvsDebug.h"
+
+#pragma warning( push )
+#pragma warning( disable:4127 )
+
+extern PNDIS_SPIN_LOCK gOvsCtrlLock;
+extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
+
+static NTSTATUS ReportFlowInfo(OvsFlow *flow, UINT32 getFlags,
+ UINT32 getActionsLen, OvsFlowInfo *info);
+static NTSTATUS HandleFlowPut(OvsFlowPut *put,
+ OVS_DATAPATH *datapath,
+ struct OvsFlowStats *stats);
+static NTSTATUS OvsPrepareFlow(OvsFlow **flow, const OvsFlowPut *put,
+ UINT64 hash);
+static VOID RemoveFlow(OVS_DATAPATH *datapath, OvsFlow **flow);
+static VOID DeleteAllFlows(OVS_DATAPATH *datapath);
+static NTSTATUS AddFlow(OVS_DATAPATH *datapath, OvsFlow *flow);
+static VOID FreeFlow(OvsFlow *flow);
+static VOID __inline *GetStartAddrNBL(const NET_BUFFER_LIST *_pNB);
+
+#define OVS_FLOW_TABLE_SIZE 2048
+#define OVS_FLOW_TABLE_MASK (OVS_FLOW_TABLE_SIZE -1)
+#define HASH_BUCKET(hash) ((hash) & OVS_FLOW_TABLE_MASK)
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsDeleteFlowTable --
+ * Results:
+ * NDIS_STATUS_SUCCESS always.
+ *----------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsDeleteFlowTable(OVS_DATAPATH *datapath)
+{
+ if (datapath == NULL || datapath->flowTable == NULL) {
+ return NDIS_STATUS_SUCCESS;
+ }
+
+ DeleteAllFlows(datapath);
+ OvsFreeMemory(datapath->flowTable);
+ datapath->flowTable = NULL;
+ NdisFreeRWLock(datapath->lock);
+
+ return NDIS_STATUS_SUCCESS;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsAllocateFlowTable --
+ * Results:
+ * NDIS_STATUS_SUCCESS on success.
+ * NDIS_STATUS_RESOURCES if memory couldn't be allocated
+ *----------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsAllocateFlowTable(OVS_DATAPATH *datapath,
+ POVS_SWITCH_CONTEXT switchContext)
+{
+ PLIST_ENTRY bucket;
+ int i;
+
+ datapath->flowTable = OvsAllocateMemory(OVS_FLOW_TABLE_SIZE *
+ sizeof (LIST_ENTRY));
+ if (!datapath->flowTable) {
+ return NDIS_STATUS_RESOURCES;
+ }
+ for (i = 0; i < OVS_FLOW_TABLE_SIZE; i++) {
+ bucket = &(datapath->flowTable[i]);
+ InitializeListHead(bucket);
+ }
+ datapath->lock = NdisAllocateRWLock(switchContext->NdisFilterHandle);
+
+ return NDIS_STATUS_SUCCESS;
+}
+
+
+/*
+ *----------------------------------------------------------------------------
+ * GetStartAddrNBL --
+ * Get the virtual address of the frame.
+ *
+ * Results:
+ * Virtual address of the frame.
+ *----------------------------------------------------------------------------
+ */
+static __inline VOID *
+GetStartAddrNBL(const NET_BUFFER_LIST *_pNB)
+{
+ PMDL curMdl;
+ PUINT8 curBuffer;
+ PEthHdr curHeader;
+
+ ASSERT(_pNB);
+
+ // Ethernet Header is a guaranteed safe access.
+ curMdl = (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdl;
+ curBuffer = MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority);
+ if (!curBuffer) {
+ return NULL;
+ }
+
+ curHeader = (PEthHdr)
+ (curBuffer + (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdlOffset);
+
+ return (VOID *) curHeader;
+}
+
+VOID
+OvsFlowUsed(OvsFlow *flow,
+ const NET_BUFFER_LIST *packet,
+ const POVS_PACKET_HDR_INFO layers)
+{
+ LARGE_INTEGER tickCount;
+
+ KeQueryTickCount(&tickCount);
+ flow->used = tickCount.QuadPart * ovsTimeIncrementPerTick;
+ flow->used += ovsUserTimestampDelta;
+ flow->packetCount++;
+ flow->byteCount += OvsPacketLenNBL(packet);
+ flow->tcpFlags |= OvsGetTcpFlags(packet, &flow->key, layers);
+}
+
+
+VOID
+DeleteAllFlows(OVS_DATAPATH *datapath)
+{
+ INT i;
+ PLIST_ENTRY bucket;
+
+ for (i = 0; i < OVS_FLOW_TABLE_SIZE; i++) {
+ PLIST_ENTRY next;
+ bucket = &(datapath->flowTable[i]);
+ while (!IsListEmpty(bucket)) {
+ OvsFlow *flow;
+ next = bucket->Flink;
+ flow = CONTAINING_RECORD(next, OvsFlow, ListEntry);
+ RemoveFlow(datapath, &flow);
+ }
+ }
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * Initializes 'flow' members from 'packet', 'skb_priority', 'tun_id', and
+ * 'ofp_in_port'.
+ *
+ * Initializes 'packet' header pointers as follows:
+ *
+ * - packet->l2 to the start of the Ethernet header.
+ *
+ * - packet->l3 to just past the Ethernet header, or just past the
+ * vlan_header if one is present, to the first byte of the payload of the
+ * Ethernet frame.
+ *
+ * - packet->l4 to just past the IPv4 header, if one is present and has a
+ * correct length, and otherwise NULL.
+ *
+ * - packet->l7 to just past the TCP or UDP or ICMP header, if one is
+ * present and has a correct length, and otherwise NULL.
+ *
+ * Returns NDIS_STATUS_SUCCESS normally. Fails only if packet data cannot be accessed
+ * (e.g. if Pkt_CopyBytesOut() returns an error).
+ *----------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsExtractFlow(const NET_BUFFER_LIST *packet,
+ UINT32 inPort,
+ OvsFlowKey *flow,
+ POVS_PACKET_HDR_INFO layers,
+ OvsIPv4TunnelKey *tunKey)
+{
+ struct Eth_Header *eth;
+ UINT8 offset = 0;
+ PVOID vlanTagValue;
+
+ layers->value = 0;
+
+ if (tunKey) {
+ ASSERT(tunKey->dst != 0);
+ RtlMoveMemory(&flow->tunKey, tunKey, sizeof flow->tunKey);
+ flow->l2.offset = 0;
+ } else {
+ flow->tunKey.dst = 0;
+ flow->l2.offset = OVS_WIN_TUNNEL_KEY_SIZE;
+ }
+
+ flow->l2.inPort = inPort;
+
+ if ( OvsPacketLenNBL(packet) < ETH_HEADER_LEN_DIX) {
+ flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + 8 - flow->l2.offset;
+ return NDIS_STATUS_SUCCESS;
+ }
+
+ /* Link layer. */
+ eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet);
+ memcpy(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH);
+ memcpy(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH);
+
+ /*
+ * vlan_tci.
+ */
+ vlanTagValue = NET_BUFFER_LIST_INFO(packet, Ieee8021QNetBufferListInfo);
+ if (vlanTagValue) {
+ PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag =
+ (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue;
+ flow->l2.vlanTci = htons(vlanTag->TagHeader.VlanId | OVSWIN_VLAN_CFI |
+ (vlanTag->TagHeader.UserPriority << 13));
+ } else {
+ if (eth->dix.typeNBO == ETH_TYPE_802_1PQ_NBO) {
+ Eth_802_1pq_Tag *tag= (Eth_802_1pq_Tag *)&eth->dix.typeNBO;
+ flow->l2.vlanTci = ((UINT16)tag->priority << 13) |
+ OVSWIN_VLAN_CFI |
+ ((UINT16)tag->vidHi << 8) | tag->vidLo;
+ offset = sizeof (Eth_802_1pq_Tag);
+ } else {
+ flow->l2.vlanTci = 0;
+ }
+ /*
+ * XXX
+ * Please note after this point, src mac and dst mac should
+ * not be accessed through eth
+ */
+ eth = (Eth_Header *)((UINT8 *)eth + offset);
+ }
+
+ /*
+ * dl_type.
+ *
+ * XXX assume that at least the first
+ * 12 bytes of received packets are mapped. This code has the stronger
+ * assumption that at least the first 22 bytes of 'packet' is mapped (if my
+ * arithmetic is right).
+ */
+ if (ETH_TYPENOT8023(eth->dix.typeNBO)) {
+ flow->l2.dlType = eth->dix.typeNBO;
+ layers->l3Offset = ETH_HEADER_LEN_DIX + offset;
+ } else if (OvsPacketLenNBL(packet) >= ETH_HEADER_LEN_802_3 &&
+ eth->e802_3.llc.dsap == 0xaa &&
+ eth->e802_3.llc.ssap == 0xaa &&
+ eth->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME &&
+ eth->e802_3.snap.snapOrg[0] == 0x00 &&
+ eth->e802_3.snap.snapOrg[1] == 0x00 &&
+ eth->e802_3.snap.snapOrg[2] == 0x00) {
+ flow->l2.dlType = eth->e802_3.snap.snapType.typeNBO;
+ layers->l3Offset = ETH_HEADER_LEN_802_3 + offset;
+ } else {
+ flow->l2.dlType = htons(OVSWIN_DL_TYPE_NONE);
+ layers->l3Offset = ETH_HEADER_LEN_DIX + offset;
+ }
+
+ flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE - flow->l2.offset;
+ /* Network layer. */
+ if (flow->l2.dlType == htons(ETH_TYPE_IPV4)) {
+ struct IPHdr ip_storage;
+ const struct IPHdr *nh;
+ IpKey *ipKey = &flow->ipKey;
+
+ flow->l2.keyLen += OVS_IP_KEY_SIZE;
+ layers->isIPv4 = 1;
+ nh = OvsGetIp(packet, layers->l3Offset, &ip_storage);
+ if (nh) {
+ layers->l4Offset = layers->l3Offset + nh->ihl * 4;
+
+ ipKey->nwSrc = nh->saddr;
+ ipKey->nwDst = nh->daddr;
+ ipKey->nwProto = nh->protocol;
+
+ ipKey->nwTos = nh->tos;
+ if (nh->frag_off & htons(IP_MF | IP_OFFSET)) {
+ ipKey->nwFrag = OVSWIN_NW_FRAG_ANY;
+ if (nh->frag_off & htons(IP_OFFSET)) {
+ ipKey->nwFrag |= OVSWIN_NW_FRAG_LATER;
+ }
+ } else {
+ ipKey->nwFrag = 0;
+ }
+
+ ipKey->nwTtl = nh->ttl;
+ ipKey->l4.tpSrc = 0;
+ ipKey->l4.tpDst = 0;
+
+ if (!(nh->frag_off & htons(IP_OFFSET))) {
+ if (ipKey->nwProto == SOCKET_IPPROTO_TCP) {
+ OvsParseTcp(packet, &ipKey->l4, layers);
+ } else if (ipKey->nwProto == SOCKET_IPPROTO_UDP) {
+ OvsParseUdp(packet, &ipKey->l4, layers);
+ } else if (ipKey->nwProto == SOCKET_IPPROTO_ICMP) {
+ ICMPHdr icmpStorage;
+ const ICMPHdr *icmp;
+
+ icmp = OvsGetIcmp(packet, layers->l4Offset, &icmpStorage);
+ if (icmp) {
+ ipKey->l4.tpSrc = htons(icmp->type);
+ ipKey->l4.tpDst = htons(icmp->code);
+ layers->l7Offset = layers->l4Offset + sizeof *icmp;
+ }
+ }
+ }
+ } else {
+ ((UINT64 *)ipKey)[0] = 0;
+ ((UINT64 *)ipKey)[1] = 0;
+ }
+ } else if (flow->l2.dlType == htons(ETH_TYPE_IPV6)) {
+ NDIS_STATUS status;
+ flow->l2.keyLen += OVS_IPV6_KEY_SIZE;
+ status = OvsParseIPv6(packet, flow, layers);
+ if (status != NDIS_STATUS_SUCCESS) {
+ memset(&flow->ipv6Key, 0, sizeof (Ipv6Key));
+ return status;
+ }
+ layers->isIPv6 = 1;
+ flow->ipv6Key.l4.tpSrc = 0;
+ flow->ipv6Key.l4.tpDst = 0;
+ flow->ipv6Key.pad = 0;
+
+ if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_TCP) {
+ OvsParseTcp(packet, &(flow->ipv6Key.l4), layers);
+ } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_UDP) {
+ OvsParseUdp(packet, &(flow->ipv6Key.l4), layers);
+ } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) {
+ OvsParseIcmpV6(packet, flow, layers);
+ flow->l2.keyLen += (OVS_ICMPV6_KEY_SIZE - OVS_IPV6_KEY_SIZE);
+ }
+ } else if (flow->l2.dlType == htons(ETH_TYPE_ARP)) {
+ EtherArp arpStorage;
+ const EtherArp *arp;
+ ArpKey *arpKey = &flow->arpKey;
+ ((UINT64 *)arpKey)[0] = 0;
+ ((UINT64 *)arpKey)[1] = 0;
+ ((UINT64 *)arpKey)[2] = 0;
+ flow->l2.keyLen += OVS_ARP_KEY_SIZE;
+ arp = OvsGetArp(packet, layers->l3Offset, &arpStorage);
+ if (arp && arp->ea_hdr.ar_hrd == htons(1) &&
+ arp->ea_hdr.ar_pro == htons(ETH_TYPE_IPV4) &&
+ arp->ea_hdr.ar_hln == ETH_ADDR_LENGTH &&
+ arp->ea_hdr.ar_pln == 4) {
+ /* We only match on the lower 8 bits of the opcode. */
+ if (ntohs(arp->ea_hdr.ar_op) <= 0xff) {
+ arpKey->nwProto = (UINT8)ntohs(arp->ea_hdr.ar_op);
+ }
+ if (arpKey->nwProto == ARPOP_REQUEST
+ || arpKey->nwProto == ARPOP_REPLY) {
+ memcpy(&arpKey->nwSrc, arp->arp_spa, 4);
+ memcpy(&arpKey->nwDst, arp->arp_tpa, 4);
+ memcpy(arpKey->arpSha, arp->arp_sha, ETH_ADDR_LENGTH);
+ memcpy(arpKey->arpTha, arp->arp_tha, ETH_ADDR_LENGTH);
+ }
+ }
+ }
+
+ return NDIS_STATUS_SUCCESS;
+}
+
+__inline BOOLEAN
+FlowEqual(UINT64 *src, UINT64 *dst, UINT32 size)
+{
+ UINT32 i;
+ ASSERT((size & 0x7) == 0);
+ ASSERT(((UINT64)src & 0x7) == 0);
+ ASSERT(((UINT64)dst & 0x7) == 0);
+ for (i = 0; i < (size >> 3); i++) {
+ if (src[i] != dst[i]) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+
+/*
+ * ----------------------------------------------------------------------------
+ * AddFlow --
+ * Add a flow to flow table.
+ *
+ * Results:
+ * NDIS_STATUS_SUCCESS if no same flow in the flow table.
+ * ----------------------------------------------------------------------------
+ */
+NTSTATUS
+AddFlow(OVS_DATAPATH *datapath, OvsFlow *flow)
+{
+ PLIST_ENTRY head;
+
+ if (OvsLookupFlow(datapath, &flow->key, &flow->hash, TRUE) != NULL) {
+ return STATUS_INVALID_HANDLE;
+ }
+
+ head = &(datapath->flowTable[HASH_BUCKET(flow->hash)]);
+ /*
+ * We need fence here to make sure flow's nextPtr is updated before
+ * head->nextPtr is updated.
+ */
+ KeMemoryBarrier();
+
+ //KeAcquireSpinLock(&FilterDeviceExtension->NblQueueLock, &oldIrql);
+ InsertTailList(head, &flow->ListEntry);
+ //KeReleaseSpinLock(&FilterDeviceExtension->NblQueueLock, oldIrql);
+
+ datapath->nFlows++;
+
+ return STATUS_SUCCESS;
+}
+
+
+/* ----------------------------------------------------------------------------
+ * RemoveFlow --
+ * Remove a flow from flow table, and added to wait list
+ * ----------------------------------------------------------------------------
+ */
+VOID
+RemoveFlow(OVS_DATAPATH *datapath,
+ OvsFlow **flow)
+{
+ OvsFlow *f = *flow;
+ *flow = NULL;
+ UNREFERENCED_PARAMETER(datapath);
+
+ ASSERT(datapath->nFlows);
+ datapath->nFlows--;
+ // Remove the flow from queue
+ RemoveEntryList(&f->ListEntry);
+ FreeFlow(f);
+}
+
+
+/*
+ * ----------------------------------------------------------------------------
+ * OvsLookupFlow --
+ *
+ * Find flow from flow table based on flow key.
+ * Caller should either hold portset handle or should
+ * have a flowRef in datapath or Acquired datapath.
+ *
+ * Results:
+ * Flow pointer if lookup successful.
+ * NULL if not exists.
+ * ----------------------------------------------------------------------------
+ */
+OvsFlow *
+OvsLookupFlow(OVS_DATAPATH *datapath,
+ const OvsFlowKey *key,
+ UINT64 *hash,
+ BOOLEAN hashValid)
+{
+ PLIST_ENTRY link, head;
+ UINT16 offset = key->l2.offset;
+ UINT16 size = key->l2.keyLen;
+ UINT8 *start;
+
+ ASSERT(key->tunKey.dst || offset == sizeof (OvsIPv4TunnelKey));
+ ASSERT(!key->tunKey.dst || offset == 0);
+
+ start = (UINT8 *)key + offset;
+
+ if (!hashValid) {
+ *hash = OvsJhashBytes(start, size, 0);
+ }
+
+ head = &datapath->flowTable[HASH_BUCKET(*hash)];
+ link = head->Flink;
+ while (link != head) {
+ OvsFlow *flow = CONTAINING_RECORD(link, OvsFlow, ListEntry);
+
+ if (flow->hash == *hash &&
+ flow->key.l2.val == key->l2.val &&
+ FlowEqual((UINT64 *)((uint8 *)&flow->key + offset),
+ (UINT64 *)start, size)) {
+ return flow;
+ }
+ link = link->Flink;
+ }
+ return NULL;
+}
+
+
+/*
+ * ----------------------------------------------------------------------------
+ * OvsHashFlow --
+ * Calculate the hash for the given flow key.
+ * ----------------------------------------------------------------------------
+ */
+UINT64
+OvsHashFlow(const OvsFlowKey *key)
+{
+ UINT16 offset = key->l2.offset;
+ UINT16 size = key->l2.keyLen;
+ UINT8 *start;
+
+ ASSERT(key->tunKey.dst || offset == sizeof (OvsIPv4TunnelKey));
+ ASSERT(!key->tunKey.dst || offset == 0);
+ start = (UINT8 *)key + offset;
+ return OvsJhashBytes(start, size, 0);
+}
+
+
+/*
+ * ----------------------------------------------------------------------------
+ * FreeFlow --
+ * Free a flow and its actions.
+ * ----------------------------------------------------------------------------
+ */
+VOID
+FreeFlow(OvsFlow *flow)
+{
+ ASSERT(flow);
+ OvsFreeMemory(flow);
+}
+
+NTSTATUS
+OvsDoDumpFlows(OvsFlowDumpInput *dumpInput,
+ OvsFlowDumpOutput *dumpOutput,
+ UINT32 *replyLen)
+{
+ UINT32 dpNo;
+ OVS_DATAPATH *datapath = NULL;
+ OvsFlow *flow;
+ PLIST_ENTRY node, head;
+ UINT32 column = 0;
+ UINT32 rowIndex, columnIndex;
+ LOCK_STATE_EX dpLockState;
+ NTSTATUS status = STATUS_SUCCESS;
+ BOOLEAN findNextNonEmpty = FALSE;
+
+ dpNo = dumpInput->dpNo;
+ NdisAcquireSpinLock(gOvsCtrlLock);
+ if (gOvsSwitchContext == NULL ||
+ gOvsSwitchContext->dpNo != dpNo) {
+ status = STATUS_INVALID_PARAMETER;
+ goto unlock;
+ }
+
+ rowIndex = dumpInput->position[0];
+ if (rowIndex >= OVS_FLOW_TABLE_SIZE) {
+ dumpOutput->n = 0;
+ *replyLen = sizeof(*dumpOutput);
+ goto unlock;
+ }
+
+ columnIndex = dumpInput->position[1];
+
+ datapath = &gOvsSwitchContext->datapath;
+ ASSERT(datapath);
+ OvsAcquireDatapathRead(datapath, &dpLockState, FALSE);
+
+ head = &datapath->flowTable[rowIndex];
+ node = head->Flink;
+
+ while (column < columnIndex) {
+ if (node == head) {
+ break;
+ }
+ node = node->Flink;
+ column++;
+ }
+
+ if (node == head) {
+ findNextNonEmpty = TRUE;
+ columnIndex = 0;
+ }
+
+ if (findNextNonEmpty) {
+ while (head == node) {
+ if (++rowIndex >= OVS_FLOW_TABLE_SIZE) {
+ dumpOutput->n = 0;
+ goto dp_unlock;
+ }
+ head = &datapath->flowTable[rowIndex];
+ node = head->Flink;
+ }
+ }
+
+ ASSERT(node != head);
+ ASSERT(rowIndex < OVS_FLOW_TABLE_SIZE);
+
+ flow = CONTAINING_RECORD(node, OvsFlow, ListEntry);
+ status = ReportFlowInfo(flow, dumpInput->getFlags, dumpInput->actionsLen,
+ &dumpOutput->flow);
+
+ if (status == STATUS_BUFFER_TOO_SMALL) {
+ dumpOutput->n = sizeof(OvsFlowDumpOutput) + flow->actionsLen;
+ *replyLen = sizeof(*dumpOutput);
+ } else {
+ dumpOutput->n = 1; //one flow reported.
+ *replyLen = sizeof(*dumpOutput) + dumpOutput->flow.actionsLen;
+ }
+
+ dumpOutput->position[0] = rowIndex;
+ dumpOutput->position[1] = ++columnIndex;
+
+dp_unlock:
+ OvsReleaseDatapath(datapath, &dpLockState);
+
+unlock:
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ return status;
+}
+
+NTSTATUS
+OvsDumpFlowIoctl(PVOID inputBuffer,
+ UINT32 inputLength,
+ PVOID outputBuffer,
+ UINT32 outputLength,
+ UINT32 *replyLen)
+{
+ OvsFlowDumpOutput *dumpOutput = (OvsFlowDumpOutput *)outputBuffer;
+ OvsFlowDumpInput *dumpInput = (OvsFlowDumpInput *)inputBuffer;
+
+ if (inputBuffer == NULL || outputBuffer == NULL) {
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ if ((inputLength != sizeof(OvsFlowDumpInput))
+ || (outputLength != sizeof *dumpOutput + dumpInput->actionsLen)) {
+ return STATUS_INFO_LENGTH_MISMATCH;
+ }
+
+ return OvsDoDumpFlows(dumpInput, dumpOutput, replyLen);
+}
+
+static NTSTATUS
+ReportFlowInfo(OvsFlow *flow,
+ UINT32 getFlags,
+ UINT32 getActionsLen,
+ OvsFlowInfo *info)
+{
+ NTSTATUS status = STATUS_SUCCESS;
+
+ if (getFlags & FLOW_GET_KEY) {
+ // always copy the tunnel key part
+ RtlCopyMemory(&info->key, &flow->key,
+ flow->key.l2.keyLen + flow->key.l2.offset);
+ }
+
+ if (getFlags & FLOW_GET_STATS) {
+ OvsFlowStats *stats = &info->stats;
+ stats->packetCount = flow->packetCount;
+ stats->byteCount = flow->byteCount;
+ stats->used = (UINT32)flow->used;
+ stats->tcpFlags = flow->tcpFlags;
+ }
+
+ if (getFlags & FLOW_GET_ACTIONS) {
+ if (flow->actionsLen == 0) {
+ info->actionsLen = 0;
+ } else if (flow->actionsLen > getActionsLen) {
+ info->actionsLen = 0;
+ status = STATUS_BUFFER_TOO_SMALL;
+ } else {
+ RtlCopyMemory(info->actions, flow->actions, flow->actionsLen);
+ info->actionsLen = flow->actionsLen;
+ }
+ }
+
+ return status;
+}
+
+NTSTATUS
+OvsPutFlowIoctl(PVOID inputBuffer,
+ UINT32 inputLength,
+ PVOID outputBuffer,
+ UINT32 outputLength,
+ UINT32 *replyLen)
+{
+ NTSTATUS status = STATUS_SUCCESS;
+ OVS_DATAPATH *datapath = NULL;
+ struct OvsFlowStats stats;
+ ULONG actionsLen;
+ OvsFlowPut *put;
+ UINT32 dpNo;
+ LOCK_STATE_EX dpLockState;
+
+ if ((inputLength < sizeof(OvsFlowPut)) || (inputBuffer == NULL)) {
+ return STATUS_INFO_LENGTH_MISMATCH;
+ }
+
+ if ((outputLength != sizeof(stats)) || (outputBuffer == NULL)) {
+ return STATUS_INFO_LENGTH_MISMATCH;
+ }
+
+ put = (OvsFlowPut *)inputBuffer;
+ if (put->actionsLen > 0) {
+ actionsLen = put->actionsLen;
+ } else {
+ actionsLen = 0;
+ }
+ if (inputLength != actionsLen + sizeof(*put)) {
+ return STATUS_INFO_LENGTH_MISMATCH;
+ }
+
+ dpNo = put->dpNo;
+ NdisAcquireSpinLock(gOvsCtrlLock);
+ if (gOvsSwitchContext == NULL ||
+ gOvsSwitchContext->dpNo != dpNo) {
+ status = STATUS_INVALID_PARAMETER;
+ goto unlock;
+ }
+
+ datapath = &gOvsSwitchContext->datapath;
+ ASSERT(datapath);
+ RtlZeroMemory(&stats, sizeof(stats));
+ OvsAcquireDatapathWrite(datapath, &dpLockState, FALSE);
+ status = HandleFlowPut(put, datapath, &stats);
+ OvsReleaseDatapath(datapath, &dpLockState);
+
+ if (status == STATUS_SUCCESS) {
+ // Copy stats to User mode app
+ NdisMoveMemory(outputBuffer, (PVOID)&stats, sizeof(stats));
+ *replyLen = sizeof stats;
+ }
+
+unlock:
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ return status;
+}
+
+
+/* Handles flow add, modify as well as delete */
+static NTSTATUS
+HandleFlowPut(OvsFlowPut *put,
+ OVS_DATAPATH *datapath,
+ struct OvsFlowStats *stats)
+{
+ BOOLEAN mayCreate, mayModify, mayDelete;
+ OvsFlow *KernelFlow;
+ UINT64 hash;
+ NTSTATUS status = STATUS_SUCCESS;
+
+ mayCreate = (put->flags & OVSWIN_FLOW_PUT_CREATE) != 0;
+ mayModify = (put->flags & OVSWIN_FLOW_PUT_MODIFY) != 0;
+ mayDelete = (put->flags & OVSWIN_FLOW_PUT_DELETE) != 0;
+
+ if ((mayCreate || mayModify) == mayDelete) {
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ KernelFlow = OvsLookupFlow(datapath, &put->key, &hash, FALSE);
+ if (!KernelFlow) {
+ if (!mayCreate) {
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ status = OvsPrepareFlow(&KernelFlow, put, hash);
+ if (status != STATUS_SUCCESS) {
+ FreeFlow(KernelFlow);
+ return STATUS_UNSUCCESSFUL;
+ }
+
+ status = AddFlow(datapath, KernelFlow);
+ if (status != STATUS_SUCCESS) {
+ FreeFlow(KernelFlow);
+ return STATUS_UNSUCCESSFUL;
+ }
+
+ /* Validate the flow addition */
+ {
+ UINT64 newHash;
+ OvsFlow *flow = OvsLookupFlow(datapath, &put->key, &newHash,
+ FALSE);
+ ASSERT(flow);
+ ASSERT(newHash == hash);
+ if (!flow || newHash != hash) {
+ return STATUS_UNSUCCESSFUL;
+ }
+ }
+ } else {
+ stats->packetCount = KernelFlow->packetCount;
+ stats->byteCount = KernelFlow->byteCount;
+ stats->tcpFlags = KernelFlow->tcpFlags;
+ stats->used = (UINT32)KernelFlow->used;
+
+ if (mayModify) {
+ OvsFlow *newFlow;
+ status = OvsPrepareFlow(&newFlow, put, hash);
+ if (status != STATUS_SUCCESS) {
+ return STATUS_UNSUCCESSFUL;
+ }
+
+ KernelFlow = OvsLookupFlow(datapath, &put->key, &hash, TRUE);
+ if (KernelFlow) {
+ if ((put->flags & OVSWIN_FLOW_PUT_CLEAR) == 0) {
+ newFlow->packetCount = KernelFlow->packetCount;
+ newFlow->byteCount = KernelFlow->byteCount;
+ newFlow->tcpFlags = KernelFlow->tcpFlags;
+ }
+ RemoveFlow(datapath, &KernelFlow);
+ } else {
+ if ((put->flags & OVSWIN_FLOW_PUT_CLEAR) == 0) {
+ newFlow->packetCount = stats->packetCount;
+ newFlow->byteCount = stats->byteCount;
+ newFlow->tcpFlags = stats->tcpFlags;
+ }
+ }
+ status = AddFlow(datapath, newFlow);
+ ASSERT(status == STATUS_SUCCESS);
+
+ /* Validate the flow addition */
+ {
+ UINT64 newHash;
+ OvsFlow *testflow = OvsLookupFlow(datapath, &put->key,
+ &newHash, FALSE);
+ ASSERT(testflow);
+ ASSERT(newHash == hash);
+ if (!testflow || newHash != hash) {
+ FreeFlow(newFlow);
+ return STATUS_UNSUCCESSFUL;
+ }
+ }
+ } else {
+ if (mayDelete) {
+ if (KernelFlow) {
+ RemoveFlow(datapath, &KernelFlow);
+ }
+ } else {
+ return STATUS_UNSUCCESSFUL;
+ }
+ }
+ }
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS
+OvsPrepareFlow(OvsFlow **flow,
+ const OvsFlowPut *put,
+ UINT64 hash)
+{
+ OvsFlow *localFlow = *flow;
+ NTSTATUS status = STATUS_SUCCESS;
+
+ do {
+ *flow = localFlow =
+ OvsAllocateMemory(sizeof(OvsFlow) + put->actionsLen);
+ if (localFlow == NULL) {
+ status = STATUS_NO_MEMORY;
+ break;
+ }
+
+ localFlow->key = put->key;
+ localFlow->actionsLen = put->actionsLen;
+ if (put->actionsLen) {
+ NdisMoveMemory((PUCHAR)localFlow->actions, put->actions,
+ put->actionsLen);
+ }
+ localFlow->userActionsLen = 0; // 0 indicate no conversion is made
+ localFlow->used = 0;
+ localFlow->packetCount = 0;
+ localFlow->byteCount = 0;
+ localFlow->tcpFlags = 0;
+ localFlow->hash = hash;
+ } while(FALSE);
+
+ return status;
+}
+
+NTSTATUS
+OvsGetFlowIoctl(PVOID inputBuffer,
+ UINT32 inputLength,
+ PVOID outputBuffer,
+ UINT32 outputLength,
+ UINT32 *replyLen)
+{
+ NTSTATUS status = STATUS_SUCCESS;
+ OVS_DATAPATH *datapath = NULL;
+ OvsFlow *flow;
+ UINT32 getFlags, getActionsLen;
+ OvsFlowGetInput *getInput;
+ OvsFlowGetOutput *getOutput;
+ UINT64 hash;
+ UINT32 dpNo;
+ LOCK_STATE_EX dpLockState;
+
+ if (inputLength != sizeof(OvsFlowGetInput)
+ || inputBuffer == NULL) {
+ return STATUS_INFO_LENGTH_MISMATCH;
+ }
+
+ getInput = (OvsFlowGetInput *) inputBuffer;
+ getFlags = getInput->getFlags;
+ getActionsLen = getInput->actionsLen;
+ if (getInput->getFlags & FLOW_GET_KEY) {
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ if (outputBuffer == NULL
+ || outputLength != (sizeof *getOutput +
+ getInput->actionsLen)) {
+ return STATUS_INFO_LENGTH_MISMATCH;
+ }
+
+ dpNo = getInput->dpNo;
+ NdisAcquireSpinLock(gOvsCtrlLock);
+ if (gOvsSwitchContext == NULL ||
+ gOvsSwitchContext->dpNo != dpNo) {
+ status = STATUS_INVALID_PARAMETER;
+ goto unlock;
+ }
+
+ datapath = &gOvsSwitchContext->datapath;
+ ASSERT(datapath);
+ OvsAcquireDatapathRead(datapath, &dpLockState, FALSE);
+ flow = OvsLookupFlow(datapath, &getInput->key, &hash, FALSE);
+ if (!flow) {
+ status = STATUS_INVALID_PARAMETER;
+ goto dp_unlock;
+ }
+
+ // XXX: can be optimized to return only how much is written out
+ *replyLen = outputLength;
+ getOutput = (OvsFlowGetOutput *)outputBuffer;
+ ReportFlowInfo(flow, getFlags, getActionsLen, &getOutput->info);
+
+dp_unlock:
+ OvsReleaseDatapath(datapath, &dpLockState);
+unlock:
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ return status;
+}
+
+NTSTATUS
+OvsFlushFlowIoctl(PVOID inputBuffer,
+ UINT32 inputLength)
+{
+ NTSTATUS status = STATUS_SUCCESS;
+ OVS_DATAPATH *datapath = NULL;
+ UINT32 dpNo;
+ LOCK_STATE_EX dpLockState;
+
+ if (inputLength != sizeof(UINT32) || inputBuffer == NULL) {
+ return STATUS_INFO_LENGTH_MISMATCH;
+ }
+
+ dpNo = *(UINT32 *)inputBuffer;
+ NdisAcquireSpinLock(gOvsCtrlLock);
+ if (gOvsSwitchContext == NULL ||
+ gOvsSwitchContext->dpNo != dpNo) {
+ status = STATUS_INVALID_PARAMETER;
+ goto unlock;
+ }
+
+ datapath = &gOvsSwitchContext->datapath;
+ ASSERT(datapath);
+ OvsAcquireDatapathWrite(datapath, &dpLockState, FALSE);
+ DeleteAllFlows(datapath);
+ OvsReleaseDatapath(datapath, &dpLockState);
+
+unlock:
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ return status;
+}
+
+#pragma warning( pop )
diff --git a/datapath-windows/ovsext/OvsFlow.h b/datapath-windows/ovsext/OvsFlow.h
new file mode 100644
index 000000000..93368b33b
--- /dev/null
+++ b/datapath-windows/ovsext/OvsFlow.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_FLOW_H_
+#define __OVS_FLOW_H_ 1
+
+#include "precomp.h"
+#include "OvsSwitch.h"
+#include "OvsUser.h"
+#include "OvsNetProto.h"
+
+typedef struct _OvsFlow {
+ LIST_ENTRY ListEntry; // In Datapath's flowTable.
+ OvsFlowKey key;
+ UINT64 hash;
+ UINT32 actionsLen;
+ UINT8 tcpFlags;
+ UINT64 used;
+ UINT64 packetCount;
+ UINT64 byteCount;
+ UINT32 userActionsLen; // used for flow query
+ UINT32 actionBufferLen; // used for flow reuse
+ struct nlattr actions[1];
+} OvsFlow;
+
+
+typedef struct _OvsLayers {
+ UINT32 l3Ofs; // IPv4, IPv6, ARP, or other L3 header.
+ UINT32 l4Ofs; // TCP, UDP, ICMP, ICMPv6, or other L4 header.
+ UINT32 l7Ofs; // L4 protocol's payload.
+} OvsLayers;
+
+extern UINT64 ovsUserTimestampDelta;
+extern UINT64 ovsTimeIncrementPerTick;
+
+NDIS_STATUS OvsDeleteFlowTable(OVS_DATAPATH *datapath);
+NDIS_STATUS OvsAllocateFlowTable(OVS_DATAPATH *datapath,
+ POVS_SWITCH_CONTEXT switchContext);
+
+NDIS_STATUS OvsExtractFlow(const NET_BUFFER_LIST *pkt, UINT32 inPort,
+ OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers,
+ OvsIPv4TunnelKey *tunKey);
+OvsFlow *OvsLookupFlow(OVS_DATAPATH *datapath, const OvsFlowKey *key,
+ UINT64 *hash, BOOLEAN hashValid);
+UINT64 OvsHashFlow(const OvsFlowKey *key);
+VOID OvsFlowUsed(OvsFlow *flow, const NET_BUFFER_LIST *pkt,
+ const POVS_PACKET_HDR_INFO layers);
+
+NTSTATUS OvsDumpFlowIoctl(PVOID inputBuffer, UINT32 inputLength,
+ PVOID outputBuffer, UINT32 outputLength,
+ UINT32 *replyLen);
+NTSTATUS OvsPutFlowIoctl(PVOID inputBuffer, UINT32 inputLength,
+ PVOID outputBuffer, UINT32 outputLength,
+ UINT32 *replyLen);
+NTSTATUS OvsGetFlowIoctl(PVOID inputBuffer, UINT32 inputLength,
+ PVOID outputBuffer, UINT32 outputLength,
+ UINT32 *replyLen);
+NTSTATUS OvsFlushFlowIoctl(PVOID inputBuffer, UINT32 inputLength);
+
+/* Flags for tunneling */
+#define OVS_TNL_F_DONT_FRAGMENT (1 << 0)
+#define OVS_TNL_F_CSUM (1 << 1)
+#define OVS_TNL_F_KEY (1 << 2)
+
+#endif /* __OVS_FLOW_H_ */
diff --git a/datapath-windows/ovsext/OvsIoctl.c b/datapath-windows/ovsext/OvsIoctl.c
new file mode 100644
index 000000000..893cbf744
--- /dev/null
+++ b/datapath-windows/ovsext/OvsIoctl.c
@@ -0,0 +1,758 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "precomp.h"
+#include "OvsIoctl.h"
+#include "OvsJhash.h"
+#include "OvsSwitch.h"
+#include "OvsVport.h"
+#include "OvsEvent.h"
+#include "OvsUser.h"
+#include "OvsPacketIO.h"
+#include "OvsNetProto.h"
+#include "OvsFlow.h"
+#include "OvsUser.h"
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_IOCTL
+#include "OvsDebug.h"
+
+/* Handles to the device object for communication with userspace. */
+NDIS_HANDLE gOvsDeviceHandle;
+PDEVICE_OBJECT gOvsDeviceObject;
+
+/*
+ * There seems to be a skew between the kernel's version of current time and
+ * the userspace's version of current time. The skew was seen to
+ * monotonically increase as well.
+ *
+ * In order to deal with the situation, we pass down the userspace's version
+ * of the timestamp to the kernel, and let the kernel calculate the delta.
+ */
+UINT64 ovsUserTimestampDelta;
+UINT64 ovsTimeIncrementPerTick;
+
+_Dispatch_type_(IRP_MJ_CREATE)
+_Dispatch_type_(IRP_MJ_CLOSE)
+DRIVER_DISPATCH OvsOpenCloseDevice;
+
+_Dispatch_type_(IRP_MJ_CLEANUP)
+DRIVER_DISPATCH OvsCleanupDevice;
+
+_Dispatch_type_(IRP_MJ_DEVICE_CONTROL)
+DRIVER_DISPATCH OvsDeviceControl;
+
+#ifdef ALLOC_PRAGMA
+#pragma alloc_text(INIT, OvsCreateDeviceObject)
+#pragma alloc_text(PAGE, OvsOpenCloseDevice)
+#pragma alloc_text(PAGE, OvsCleanupDevice)
+#pragma alloc_text(PAGE, OvsDeviceControl)
+#endif // ALLOC_PRAGMA
+
+
+#define OVS_MAX_OPEN_INSTANCES 128
+
+POVS_OPEN_INSTANCE ovsOpenInstanceArray[OVS_MAX_OPEN_INSTANCES];
+UINT32 ovsNumberOfOpenInstances;
+extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
+
+NDIS_SPIN_LOCK ovsCtrlLockObj;
+NDIS_SPIN_LOCK ovsFlowLockObj;
+PNDIS_SPIN_LOCK gOvsCtrlLock;
+PNDIS_SPIN_LOCK ovsFlowLock;
+
+VOID
+OvsInitIoctl()
+{
+ gOvsCtrlLock = &ovsCtrlLockObj;
+ ovsFlowLock = &ovsFlowLockObj;
+ NdisAllocateSpinLock(ovsFlowLock);
+ NdisAllocateSpinLock(gOvsCtrlLock);
+}
+
+VOID
+OvsCleanupIoctl()
+{
+ if (ovsFlowLock) {
+ NdisFreeSpinLock(ovsFlowLock);
+ NdisFreeSpinLock(gOvsCtrlLock);
+ gOvsCtrlLock = NULL;
+ gOvsCtrlLock = NULL;
+ }
+}
+
+VOID
+OvsInit()
+{
+ OvsInitIoctl();
+ OvsInitEventQueue();
+ OvsUserInit();
+}
+
+VOID
+OvsCleanup()
+{
+ OvsCleanupEventQueue();
+ OvsCleanupIoctl();
+ OvsUserCleanup();
+}
+
+VOID
+OvsAcquireCtrlLock()
+{
+ NdisAcquireSpinLock(gOvsCtrlLock);
+}
+VOID
+OvsReleaseCtrlLock()
+{
+ NdisReleaseSpinLock(gOvsCtrlLock);
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Creates the communication device between user and kernel, and also
+ * initializes the data associated data structures.
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsCreateDeviceObject(NDIS_HANDLE ovsExtDriverHandle)
+{
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+ UNICODE_STRING deviceName;
+ UNICODE_STRING symbolicDeviceName;
+ PDRIVER_DISPATCH dispatchTable[IRP_MJ_MAXIMUM_FUNCTION+1];
+ NDIS_DEVICE_OBJECT_ATTRIBUTES deviceAttributes;
+ OVS_LOG_TRACE("ovsExtDriverHandle: %p", ovsExtDriverHandle);
+
+ RtlZeroMemory(dispatchTable,
+ (IRP_MJ_MAXIMUM_FUNCTION + 1) * sizeof (PDRIVER_DISPATCH));
+ dispatchTable[IRP_MJ_CREATE] = OvsOpenCloseDevice;
+ dispatchTable[IRP_MJ_CLOSE] = OvsOpenCloseDevice;
+ dispatchTable[IRP_MJ_CLEANUP] = OvsCleanupDevice;
+ dispatchTable[IRP_MJ_DEVICE_CONTROL] = OvsDeviceControl;
+
+ NdisInitUnicodeString(&deviceName, OVS_NT_DEVICE_NAME);
+ NdisInitUnicodeString(&symbolicDeviceName, OVS_DOS_DEVICE_NAME);
+
+ RtlZeroMemory(&deviceAttributes, sizeof (NDIS_DEVICE_OBJECT_ATTRIBUTES));
+
+ OVS_INIT_OBJECT_HEADER(&deviceAttributes.Header,
+ NDIS_OBJECT_TYPE_DEVICE_OBJECT_ATTRIBUTES,
+ NDIS_DEVICE_OBJECT_ATTRIBUTES_REVISION_1,
+ sizeof (NDIS_DEVICE_OBJECT_ATTRIBUTES));
+
+ deviceAttributes.DeviceName = &deviceName;
+ deviceAttributes.SymbolicName = &symbolicDeviceName;
+ deviceAttributes.MajorFunctions = dispatchTable;
+ deviceAttributes.ExtensionSize = sizeof (OVS_DEVICE_EXTENSION);
+
+ status = NdisRegisterDeviceEx(ovsExtDriverHandle,
+ &deviceAttributes,
+ &gOvsDeviceObject,
+ &gOvsDeviceHandle);
+ if (status != NDIS_STATUS_SUCCESS) {
+ POVS_DEVICE_EXTENSION ovsExt =
+ (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(gOvsDeviceObject);
+ ASSERT(gOvsDeviceObject != NULL);
+ ASSERT(gOvsDeviceHandle != NULL);
+
+ if (ovsExt) {
+ ovsExt->numberOpenInstance = 0;
+ }
+ } else {
+ /* Initialize the associated data structures. */
+ OvsInit();
+ }
+ OVS_LOG_TRACE("DeviceObject: %p", gOvsDeviceObject);
+ return status;
+}
+
+
+VOID
+OvsDeleteDeviceObject()
+{
+ if (gOvsDeviceHandle) {
+#ifdef DBG
+ POVS_DEVICE_EXTENSION ovsExt = (POVS_DEVICE_EXTENSION)
+ NdisGetDeviceReservedExtension(gOvsDeviceObject);
+ if (ovsExt) {
+ ASSERT(ovsExt->numberOpenInstance == 0);
+ }
+#endif
+
+ ASSERT(gOvsDeviceObject);
+ NdisDeregisterDeviceEx(gOvsDeviceHandle);
+ gOvsDeviceHandle = NULL;
+ gOvsDeviceObject = NULL;
+ }
+ OvsCleanup();
+}
+
+POVS_OPEN_INSTANCE
+OvsGetOpenInstance(PFILE_OBJECT fileObject,
+ UINT32 dpNo)
+{
+ POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
+ ASSERT(instance);
+ ASSERT(instance->fileObject == fileObject);
+ if (gOvsSwitchContext == NULL ||
+ gOvsSwitchContext->dpNo != dpNo) {
+ return NULL;
+ }
+ return instance;
+}
+
+
+POVS_OPEN_INSTANCE
+OvsFindOpenInstance(PFILE_OBJECT fileObject)
+{
+ UINT32 i, j;
+ for (i = 0, j = 0; i < OVS_MAX_OPEN_INSTANCES &&
+ j < ovsNumberOfOpenInstances; i++) {
+ if (ovsOpenInstanceArray[i]) {
+ if (ovsOpenInstanceArray[i]->fileObject == fileObject) {
+ return ovsOpenInstanceArray[i];
+ }
+ j++;
+ }
+ }
+ return NULL;
+}
+
+NTSTATUS
+OvsAddOpenInstance(PFILE_OBJECT fileObject)
+{
+ POVS_OPEN_INSTANCE instance =
+ (POVS_OPEN_INSTANCE) OvsAllocateMemory(sizeof (OVS_OPEN_INSTANCE));
+ UINT32 i;
+
+ if (instance == NULL) {
+ return STATUS_NO_MEMORY;
+ }
+ OvsAcquireCtrlLock();
+ ASSERT(OvsFindOpenInstance(fileObject) == NULL);
+
+ if (ovsNumberOfOpenInstances >= OVS_MAX_OPEN_INSTANCES) {
+ OvsReleaseCtrlLock();
+ OvsFreeMemory(instance);
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+ RtlZeroMemory(instance, sizeof (OVS_OPEN_INSTANCE));
+
+ for (i = 0; i < OVS_MAX_OPEN_INSTANCES; i++) {
+ if (ovsOpenInstanceArray[i] == NULL) {
+ ovsOpenInstanceArray[i] = instance;
+ instance->cookie = i;
+ break;
+ }
+ }
+ ASSERT(i < OVS_MAX_OPEN_INSTANCES);
+ instance->fileObject = fileObject;
+ ASSERT(fileObject->FsContext == NULL);
+ fileObject->FsContext = instance;
+ OvsReleaseCtrlLock();
+ return STATUS_SUCCESS;
+}
+
+static VOID
+OvsCleanupOpenInstance(PFILE_OBJECT fileObject)
+{
+ POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
+ ASSERT(instance);
+ ASSERT(fileObject == instance->fileObject);
+ OvsCleanupEvent(instance);
+ OvsCleanupPacketQueue(instance);
+}
+
+VOID
+OvsRemoveOpenInstance(PFILE_OBJECT fileObject)
+{
+ POVS_OPEN_INSTANCE instance;
+ ASSERT(fileObject->FsContext);
+ instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
+ ASSERT(instance->cookie < OVS_MAX_OPEN_INSTANCES);
+
+ OvsAcquireCtrlLock();
+ fileObject->FsContext = NULL;
+ ASSERT(ovsOpenInstanceArray[instance->cookie] == instance);
+ ovsOpenInstanceArray[instance->cookie] = NULL;
+ OvsReleaseCtrlLock();
+ ASSERT(instance->eventQueue == NULL);
+ ASSERT (instance->packetQueue == NULL);
+ OvsFreeMemory(instance);
+}
+
+NTSTATUS
+OvsCompleteIrpRequest(PIRP irp,
+ ULONG_PTR infoPtr,
+ NTSTATUS status)
+{
+ irp->IoStatus.Information = infoPtr;
+ irp->IoStatus.Status = status;
+ IoCompleteRequest(irp, IO_NO_INCREMENT);
+ return status;
+}
+
+
+NTSTATUS
+OvsOpenCloseDevice(PDEVICE_OBJECT deviceObject,
+ PIRP irp)
+{
+ PIO_STACK_LOCATION irpSp;
+ NTSTATUS status = STATUS_SUCCESS;
+ PFILE_OBJECT fileObject;
+ POVS_DEVICE_EXTENSION ovsExt =
+ (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject);
+
+ ASSERT(deviceObject == gOvsDeviceObject);
+ ASSERT(ovsExt != NULL);
+
+ irpSp = IoGetCurrentIrpStackLocation(irp);
+ fileObject = irpSp->FileObject;
+ OVS_LOG_TRACE("DeviceObject: %p, fileObject:%p, instance: %u",
+ deviceObject, fileObject,
+ ovsExt->numberOpenInstance);
+
+ switch (irpSp->MajorFunction) {
+ case IRP_MJ_CREATE:
+ status = OvsAddOpenInstance(fileObject);
+ if (STATUS_SUCCESS == status) {
+ InterlockedIncrement((LONG volatile *)&ovsExt->numberOpenInstance);
+ }
+ break;
+ case IRP_MJ_CLOSE:
+ ASSERT(ovsExt->numberOpenInstance > 0);
+ OvsRemoveOpenInstance(fileObject);
+ InterlockedDecrement((LONG volatile *)&ovsExt->numberOpenInstance);
+ break;
+ default:
+ ASSERT(0);
+ }
+ return OvsCompleteIrpRequest(irp, (ULONG_PTR)0, status);
+}
+
+_Use_decl_annotations_
+NTSTATUS
+OvsCleanupDevice(PDEVICE_OBJECT deviceObject,
+ PIRP irp)
+{
+
+ PIO_STACK_LOCATION irpSp;
+ PFILE_OBJECT fileObject;
+
+ NTSTATUS status = STATUS_SUCCESS;
+#ifdef DBG
+ POVS_DEVICE_EXTENSION ovsExt =
+ (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject);
+ if (ovsExt) {
+ ASSERT(ovsExt->numberOpenInstance > 0);
+ }
+#else
+ UNREFERENCED_PARAMETER(deviceObject);
+#endif
+ ASSERT(deviceObject == gOvsDeviceObject);
+ irpSp = IoGetCurrentIrpStackLocation(irp);
+ fileObject = irpSp->FileObject;
+
+ ASSERT(irpSp->MajorFunction == IRP_MJ_CLEANUP);
+
+ OvsCleanupOpenInstance(fileObject);
+
+ return OvsCompleteIrpRequest(irp, (ULONG_PTR)0, status);
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsGetVersionIoctl --
+ *
+ * On entry None
+ * On exit Driver version
+ *
+ * Result:
+ * STATUS_SUCCESS
+ * STATUS_BUFFER_TOO_SMALL
+ *----------------------------------------------------------------------------
+ */
+NTSTATUS
+OvsGetVersionIoctl(PVOID outputBuffer,
+ uint32 outputLength,
+ uint32 *replyLen)
+{
+ POVS_VERSION driverOut = (POVS_VERSION)outputBuffer;
+
+ if (outputLength < sizeof (*driverOut)) {
+ return STATUS_BUFFER_TOO_SMALL;
+ }
+ *replyLen = sizeof (*driverOut);
+ driverOut->mjrDrvVer = OVS_DRIVER_MAJOR_VER;
+ driverOut->mnrDrvVer = OVS_DRIVER_MINOR_VER;
+
+ return STATUS_SUCCESS;
+}
+
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsDpDumpIoctl --
+ * Get All Datapath. For now, we only support one datapath.
+ *
+ * Result:
+ * STATUS_SUCCESS
+ * STATUS_BUFFER_TOO_SMALL
+ *----------------------------------------------------------------------------
+ */
+NTSTATUS
+OvsDpDumpIoctl(PVOID outputBuffer,
+ UINT32 outputLength,
+ UINT32 *replyLen)
+{
+ *replyLen = sizeof (UINT32);
+ if (outputLength < sizeof (UINT32)) {
+ return STATUS_BUFFER_TOO_SMALL;
+ }
+ OvsAcquireCtrlLock();
+ if (gOvsSwitchContext) {
+ *(UINT32 *)outputBuffer = gOvsSwitchContext->dpNo;
+ } else {
+ *replyLen = 0;
+ }
+ OvsReleaseCtrlLock();
+
+ return STATUS_SUCCESS;
+}
+
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsDpGetIoctl --
+ * Given dpNo, get all datapath info as defined in OVS_DP_INFO.
+ *
+ * Result:
+ * STATUS_SUCCESS
+ * STATUS_BUFFER_TOO_SMALL
+ * STATUS_INVALID_PARAMETER
+ *----------------------------------------------------------------------------
+ */
+NTSTATUS
+OvsDpGetIoctl(PVOID inputBuffer,
+ UINT32 inputLength,
+ PVOID outputBuffer,
+ UINT32 outputLength,
+ UINT32 *replyLen)
+{
+ UINT32 dpNo;
+ POVS_DP_INFO info;
+ OVS_DATAPATH *datapath;
+
+ if (inputLength < sizeof (UINT32)) {
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ if (outputLength < sizeof (OVS_DP_INFO)) {
+ *replyLen = sizeof (OVS_DP_INFO);
+ return STATUS_BUFFER_TOO_SMALL;
+ }
+
+ dpNo = *(UINT32 *)inputBuffer;
+ OvsAcquireCtrlLock();
+ if (gOvsSwitchContext == NULL ||
+ gOvsSwitchContext->dpNo != dpNo) {
+ OvsReleaseCtrlLock();
+ return STATUS_INVALID_PARAMETER;
+ }
+ *replyLen = sizeof (OVS_DP_INFO);
+ RtlZeroMemory(outputBuffer, sizeof (OVS_DP_INFO));
+ info = (POVS_DP_INFO)outputBuffer;
+ RtlCopyMemory(info->name, "ovs-system", sizeof ("ovs-system"));
+ datapath = &gOvsSwitchContext->datapath;
+ info->nMissed = datapath->misses;
+ info->nHit = datapath->hits;
+ info->nLost = datapath->lost;
+ info->nFlows = datapath->nFlows;
+ OvsReleaseCtrlLock();
+ return STATUS_SUCCESS;
+}
+
+NTSTATUS
+OvsDeviceControl(PDEVICE_OBJECT deviceObject,
+ PIRP irp)
+{
+
+ PIO_STACK_LOCATION irpSp;
+ NTSTATUS status = STATUS_SUCCESS;
+ PFILE_OBJECT fileObject;
+ PVOID inputBuffer;
+ PVOID outputBuffer;
+ UINT32 inputBufferLen, outputBufferLen, mdlBufferLen;
+ UINT32 code, replyLen = 0;
+#ifdef DBG
+ POVS_DEVICE_EXTENSION ovsExt =
+ (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject);
+ ASSERT(deviceObject == gOvsDeviceObject);
+ ASSERT(ovsExt);
+ ASSERT(ovsExt->numberOpenInstance > 0);
+#else
+ UNREFERENCED_PARAMETER(deviceObject);
+#endif
+
+ irpSp = IoGetCurrentIrpStackLocation(irp);
+
+
+ ASSERT(irpSp->MajorFunction == IRP_MJ_DEVICE_CONTROL);
+ ASSERT(irpSp->FileObject != NULL);
+
+ fileObject = irpSp->FileObject;
+ code = irpSp->Parameters.DeviceIoControl.IoControlCode;
+ inputBufferLen = irpSp->Parameters.DeviceIoControl.InputBufferLength;
+ outputBufferLen = irpSp->Parameters.DeviceIoControl.OutputBufferLength;
+ /*
+ * In case of an IRP with METHOD_IN_DIRECT or METHOD_OUT_DIRECT, the size
+ * of the MDL is stored in Parameters.DeviceIoControl.OutputBufferLength.
+ */
+ mdlBufferLen = outputBufferLen;
+ outputBuffer = inputBuffer = irp->AssociatedIrp.SystemBuffer;
+
+ switch(code) {
+ case OVS_IOCTL_VERSION_GET:
+ status = OvsGetVersionIoctl(outputBuffer, outputBufferLen,
+ &replyLen);
+ break;
+ case OVS_IOCTL_DP_DUMP:
+ status = OvsDpDumpIoctl(outputBuffer, outputBufferLen, &replyLen);
+ break;
+ case OVS_IOCTL_DP_GET:
+ if (irp->MdlAddress == NULL) {
+ status = STATUS_INVALID_PARAMETER;
+ break;
+ }
+ outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress,
+ NormalPagePriority);
+ if (outputBuffer == NULL) {
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ } else {
+ status = OvsDpGetIoctl(inputBuffer, inputBufferLen,
+ outputBuffer, outputBufferLen, &replyLen);
+ }
+ break;
+ case OVS_IOCTL_DP_SET:
+ status = STATUS_NOT_IMPLEMENTED;
+ break;
+ case OVS_IOCTL_VPORT_DUMP:
+ if (irp->MdlAddress == NULL) {
+ status = STATUS_INVALID_PARAMETER;
+ break;
+ }
+ outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress,
+ NormalPagePriority);
+ if (outputBuffer) {
+ status = OvsDumpVportIoctl(inputBuffer, inputBufferLen,
+ outputBuffer, outputBufferLen,
+ &replyLen);
+ } else {
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ }
+ break;
+ case OVS_IOCTL_VPORT_GET:
+ if (irp->MdlAddress == NULL) {
+ status = STATUS_INVALID_PARAMETER;
+ break;
+ }
+ outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress,
+ NormalPagePriority);
+ if (outputBuffer) {
+ status = OvsGetVportIoctl(inputBuffer, inputBufferLen,
+ outputBuffer, outputBufferLen,
+ &replyLen);
+ } else {
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ }
+ break;
+ case OVS_IOCTL_VPORT_SET:
+ status = STATUS_NOT_IMPLEMENTED;
+ break;
+ case OVS_IOCTL_VPORT_ADD:
+ if (irp->MdlAddress == NULL) {
+ status = STATUS_INVALID_PARAMETER;
+ break;
+ }
+ outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress,
+ NormalPagePriority);
+ if (outputBuffer) {
+ status = OvsAddVportIoctl(inputBuffer, inputBufferLen,
+ outputBuffer, outputBufferLen,
+ &replyLen);
+ } else {
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ }
+ break;
+ case OVS_IOCTL_VPORT_DEL:
+ status = OvsDelVportIoctl(inputBuffer, inputBufferLen,
+ &replyLen);
+ break;
+ case OVS_IOCTL_VPORT_EXT_INFO:
+ if (irp->MdlAddress == NULL) {
+ status = STATUS_INVALID_PARAMETER;
+ break;
+ }
+ outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress,
+ NormalPagePriority);
+ if (outputBuffer) {
+ status = OvsGetExtInfoIoctl(inputBuffer, inputBufferLen,
+ outputBuffer, outputBufferLen,
+ &replyLen);
+ } else {
+ OVS_LOG_INFO("ExtInfo: fail to get outputBuffer address");
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ }
+ break;
+ case OVS_IOCTL_FLOW_DUMP:
+ if (irp->MdlAddress == NULL) {
+ status = STATUS_INVALID_PARAMETER;
+ break;
+ }
+ outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress,
+ NormalPagePriority);
+ if (outputBuffer) {
+ status = OvsDumpFlowIoctl(inputBuffer, inputBufferLen,
+ outputBuffer, outputBufferLen,
+ &replyLen);
+ } else {
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ }
+ break;
+ case OVS_IOCTL_FLOW_GET:
+ if (irp->MdlAddress == NULL) {
+ status = STATUS_INVALID_PARAMETER;
+ break;
+ }
+ outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress,
+ NormalPagePriority);
+ if (outputBuffer) {
+ status = OvsGetFlowIoctl(inputBuffer, inputBufferLen,
+ outputBuffer, outputBufferLen,
+ &replyLen);
+ } else {
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ }
+ break;
+ case OVS_IOCTL_FLOW_PUT:
+ // XXX: This is not really working - mapping the input buffer
+ // XXX: inputBufferLen = mdlBufferLen;
+ // inputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress,
+ // NormalPagePriority);
+ status = OvsPutFlowIoctl(inputBuffer, inputBufferLen,
+ outputBuffer, outputBufferLen,
+ &replyLen);
+ break;
+ case OVS_IOCTL_FLOW_FLUSH:
+ status = OvsFlushFlowIoctl(inputBuffer, inputBufferLen);
+ break;
+ case OVS_IOCTL_QOS_QUEUE_DUMP:
+ case OVS_IOCTL_QOS_QUEUE_GET:
+ case OVS_IOCTL_QOS_QUEUE_SET:
+ status = STATUS_NOT_IMPLEMENTED;
+ break;
+ case OVS_IOCTL_DATAPATH_SUBSCRIBE:
+ status = OvsSubscribeDpIoctl(fileObject, inputBuffer,
+ inputBufferLen);
+ break;
+ case OVS_IOCTL_DATAPATH_READ:
+ if (irp->MdlAddress == NULL) {
+ status = STATUS_INVALID_PARAMETER;
+ break;
+ }
+ outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress,
+ NormalPagePriority);
+ if (outputBuffer) {
+ status = OvsReadDpIoctl(fileObject, outputBuffer,
+ outputBufferLen, &replyLen);
+ } else {
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ }
+ break;
+ case OVS_IOCTL_DATAPATH_OPERATE:
+ status = STATUS_NOT_IMPLEMENTED;
+ break;
+ case OVS_IOCTL_DATAPATH_EXECUTE:
+ // XXX: need to make the input direct
+ status = OvsExecuteDpIoctl(inputBuffer, inputBufferLen,
+ outputBufferLen);
+ break;
+ case OVS_IOCTL_DATAPATH_PURGE:
+ status = OvsPurgeDpIoctl(fileObject);
+ break;
+ case OVS_IOCTL_DATAPATH_WAIT:
+ status = OvsWaitDpIoctl(irp, fileObject);
+ break;
+ case OVS_IOCTL_EVENT_SUBSCRIBE:
+ status = OvsSubscribeEventIoctl(fileObject, inputBuffer,
+ inputBufferLen);
+ break;
+ case OVS_IOCTL_EVENT_POLL:
+ if (irp->MdlAddress == NULL) {
+ status = STATUS_INVALID_PARAMETER;
+ break;
+ }
+ outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress,
+ NormalPagePriority);
+ if (outputBuffer == NULL) {
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ } else {
+ status = OvsPollEventIoctl(fileObject, inputBuffer,
+ inputBufferLen, outputBuffer,
+ outputBufferLen, &replyLen);
+ }
+ break;
+ case OVS_IOCTL_EVENT_WAIT:
+ status = OvsWaitEventIoctl(irp, fileObject,
+ inputBuffer, inputBufferLen);
+ break;
+ case OVS_IOCTL_DP_TIMESTAMP_SET:
+ if (inputBufferLen != sizeof (ovsUserTimestampDelta)) {
+ status = STATUS_INFO_LENGTH_MISMATCH;
+ } else {
+ int64 currentUserTS = *(int64 *)inputBuffer;
+ LARGE_INTEGER tickCount;
+
+ /* So many ticks since system booted. */
+ KeQueryTickCount(&tickCount);
+ ovsUserTimestampDelta = currentUserTS -
+ (tickCount.QuadPart * ovsTimeIncrementPerTick);
+ status = STATUS_SUCCESS;
+ }
+ break;
+ default:
+ status = STATUS_INVALID_DEVICE_REQUEST;
+ break;
+ }
+
+ if (status == STATUS_PENDING) {
+ return status;
+ } else {
+ /*
+ * When the system-address-space mapping that is returned by
+ * MmGetSystemAddressForMdlSafe is no longer needed, it must be
+ * released.
+ * http://msdn.microsoft.com/en-us/library/windows/hardware/ff554559(v=vs.85).aspx
+ *
+ * We might have to release the MDL here.
+ */
+ return OvsCompleteIrpRequest(irp, (ULONG_PTR)replyLen, status);
+ }
+}
diff --git a/datapath-windows/ovsext/OvsIoctl.h b/datapath-windows/ovsext/OvsIoctl.h
new file mode 100644
index 000000000..9f2bf8599
--- /dev/null
+++ b/datapath-windows/ovsext/OvsIoctl.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_IOCTL_H_
+#define __OVS_IOCTL_H_ 1
+
+typedef struct _OVS_DEVICE_EXTENSION {
+ INT numberOpenInstance;
+} OVS_DEVICE_EXTENSION, *POVS_DEVICE_EXTENSION;
+
+
+typedef struct _OVS_OPEN_INSTANCE {
+ UINT32 cookie;
+ PFILE_OBJECT fileObject;
+ PVOID eventQueue;
+ PVOID packetQueue;
+} OVS_OPEN_INSTANCE, *POVS_OPEN_INSTANCE;
+
+NDIS_STATUS OvsCreateDeviceObject(NDIS_HANDLE ovsExtDriverHandle);
+VOID OvsDeleteDeviceObject();
+
+POVS_OPEN_INSTANCE OvsGetOpenInstance(PFILE_OBJECT fileObject,
+ UINT32 dpNo);
+
+NTSTATUS OvsCompleteIrpRequest(PIRP irp, ULONG_PTR infoPtr, NTSTATUS status);
+
+#endif /* __OVS_IOCTL_H_ */
diff --git a/datapath-windows/ovsext/OvsIpHelper.c b/datapath-windows/ovsext/OvsIpHelper.c
new file mode 100644
index 000000000..cd2625a30
--- /dev/null
+++ b/datapath-windows/ovsext/OvsIpHelper.c
@@ -0,0 +1,1689 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "precomp.h"
+#include "OvsIpHelper.h"
+#include "OvsSwitch.h"
+#include "OvsJhash.h"
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_IPHELPER
+#include "OvsDebug.h"
+
+/*
+ * Fow now, we assume only one internal adapter
+ */
+
+KSTART_ROUTINE OvsStartIpHelper;
+
+
+/*
+ * Only when the internal IP is configured and virtual
+ * internal port is connected, the IP helper request can be
+ * queued.
+ */
+static BOOLEAN ovsInternalIPConfigured;
+static UINT32 ovsInternalPortNo;
+static GUID ovsInternalNetCfgId;
+static MIB_IF_ROW2 ovsInternalRow;
+static MIB_IPINTERFACE_ROW ovsInternalIPRow;
+
+/* we only keep one internal IP for reference, it will not be used for
+ * determining SRC IP of Tunnel
+ */
+static UINT32 ovsInternalIP;
+
+
+/*
+ * FWD_ENTRY --------> IPFORWARD_ENTRY
+ * |
+ * |--------------------------------------> IPENIGH_ENTRY
+ *
+ * IPFORWARD_ENTRY ------> FWD_ENTRY LIST with same IPFORWARD
+ *
+ * IPNEIGH_ENTRY ------> FWD_ENTRY LIST with same IPNEIGH
+ *
+ */
+
+static PLIST_ENTRY ovsFwdHashTable; // based on DST IP
+static PLIST_ENTRY ovsRouteHashTable; // based on DST PREFIX
+static PLIST_ENTRY ovsNeighHashTable; // based on DST IP
+static LIST_ENTRY ovsSortedIPNeighList;
+static UINT32 ovsNumFwdEntries;
+
+
+static PNDIS_RW_LOCK_EX ovsTableLock;
+static NDIS_SPIN_LOCK ovsIpHelperLock;
+
+static LIST_ENTRY ovsIpHelperRequestList;
+static UINT32 ovsNumIpHelperRequests;
+
+static HANDLE ipInterfaceNotificationHandle;
+static HANDLE ipRouteNotificationHandle;
+static HANDLE unicastIPNotificationHandle;
+
+static OVS_IP_HELPER_THREAD_CONTEXT ovsIpHelperThreadContext;
+
+static POVS_IPFORWARD_ENTRY OvsLookupIPForwardEntry(PIP_ADDRESS_PREFIX prefix);
+static VOID OvsRemoveIPForwardEntry(POVS_IPFORWARD_ENTRY ipf);
+static VOID OvsRemoveAllFwdEntriesWithSrc(UINT32 ipAddr);
+static VOID OvsCleanupIpHelperRequestList(VOID);
+static VOID OvsCleanupFwdTable(VOID);
+static VOID OvsAddToSortedNeighList(POVS_IPNEIGH_ENTRY ipn);
+
+static VOID
+OvsDumpIfRow(PMIB_IF_ROW2 ifRow)
+{
+ OVS_LOG_INFO("InterfaceLuid: NetLuidIndex: %d, type: %d",
+ ifRow->InterfaceLuid.Info.NetLuidIndex,
+ ifRow->InterfaceLuid.Info.IfType);
+ OVS_LOG_INFO("InterfaceIndex: %d", ifRow->InterfaceIndex);
+
+ OVS_LOG_INFO("Interface GUID: %08x-%04x-%04x-%04x-%02x%02x%02x%02x%02x%02x",
+ ifRow->InterfaceGuid.Data1,
+ ifRow->InterfaceGuid.Data2,
+ ifRow->InterfaceGuid.Data3,
+ *(UINT16 *)ifRow->InterfaceGuid.Data4,
+ ifRow->InterfaceGuid.Data4[2],
+ ifRow->InterfaceGuid.Data4[3],
+ ifRow->InterfaceGuid.Data4[4],
+ ifRow->InterfaceGuid.Data4[5],
+ ifRow->InterfaceGuid.Data4[6],
+ ifRow->InterfaceGuid.Data4[7]);
+ OVS_LOG_INFO("Perm MAC Address: %02x:%02x:%02x:%02x:%02x:%02x",
+ ifRow->PermanentPhysicalAddress[0],
+ ifRow->PermanentPhysicalAddress[1],
+ ifRow->PermanentPhysicalAddress[2],
+ ifRow->PermanentPhysicalAddress[3],
+ ifRow->PermanentPhysicalAddress[4],
+ ifRow->PermanentPhysicalAddress[5]);
+}
+
+
+static VOID
+OvsDumpIfTable(PMIB_IF_TABLE2 ifTable)
+{
+ PMIB_IF_ROW2 ifRow;
+ UINT32 i;
+
+ OVS_LOG_INFO("======Number of entries: %d========", ifTable->NumEntries);
+
+ for (i = 0; i < ifTable->NumEntries; i++) {
+ ifRow = &ifTable->Table[i];
+ OvsDumpIfRow(ifRow);
+ }
+}
+
+
+NTSTATUS
+OvsGetIfEntry(GUID *interfaceGuid, PMIB_IF_ROW2 ifEntry)
+{
+ NTSTATUS status;
+ PMIB_IF_TABLE2 ifTable;
+ UINT32 i;
+
+ if (interfaceGuid == NULL || ifEntry == NULL) {
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ status = GetIfTable2Ex(MibIfTableNormal, &ifTable);
+
+ if (status != STATUS_SUCCESS) {
+ OVS_LOG_INFO("Fail to get if table, status: %x", status);
+ return status;
+ }
+ status = STATUS_NOT_FOUND;
+
+ for (i = 0; i < ifTable->NumEntries; i++) {
+ PMIB_IF_ROW2 ifRow;
+
+ ifRow = &ifTable->Table[i];
+ if (!memcmp(interfaceGuid, &ifRow->InterfaceGuid, sizeof (GUID))) {
+ RtlCopyMemory(ifEntry, ifRow, sizeof (MIB_IF_ROW2));
+ status = STATUS_SUCCESS;
+ OvsDumpIfRow(ifEntry);
+ break;
+ }
+ }
+
+ FreeMibTable(ifTable);
+ return status;
+}
+
+
+static VOID
+OvsDumpIPInterfaceEntry(PMIB_IPINTERFACE_ROW ipRow)
+{
+ OVS_LOG_INFO("InterfaceLuid: NetLuidIndex: %d, type: %d",
+ ipRow->InterfaceLuid.Info.NetLuidIndex,
+ ipRow->InterfaceLuid.Info.IfType);
+ OVS_LOG_INFO("InterfaceIndex: %d", ipRow->InterfaceIndex);
+
+ OVS_LOG_INFO("MaxReassembleSize: %u", ipRow->MaxReassemblySize);
+}
+
+
+NTSTATUS
+OvsGetIPInterfaceEntry(NET_LUID luid,
+ PMIB_IPINTERFACE_ROW ipRow)
+{
+ NTSTATUS status;
+
+ if (ipRow == NULL) {
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ ipRow->Family = AF_INET;
+ ipRow->InterfaceLuid.Value = luid.Value;
+
+ status = GetIpInterfaceEntry(ipRow);
+
+ if (status != STATUS_SUCCESS) {
+ OVS_LOG_INFO("Fail to get internal IP Interface mib row, status: %x",
+ status);
+ return status;
+ }
+ OvsDumpIPInterfaceEntry(ipRow);
+ return status;
+}
+
+
+static VOID
+OvsDumpIPEntry(PMIB_UNICASTIPADDRESS_ROW ipRow)
+{
+ UINT32 ipAddr;
+
+ OVS_LOG_INFO("InterfaceLuid: NetLuidIndex: %d, type: %d",
+ ipRow->InterfaceLuid.Info.NetLuidIndex,
+ ipRow->InterfaceLuid.Info.IfType);
+
+ OVS_LOG_INFO("InterfaceIndex: %d", ipRow->InterfaceIndex);
+
+ ASSERT(ipRow->Address.si_family == AF_INET);
+
+ ipAddr = ipRow->Address.Ipv4.sin_addr.s_addr;
+ OVS_LOG_INFO("Unicast Address: %d.%d.%d.%d\n",
+ ipAddr & 0xff, (ipAddr >> 8) & 0xff,
+ (ipAddr >> 16) & 0xff, ipAddr >> 24);
+}
+
+
+NTSTATUS
+OvsGetIPEntry(NET_LUID interfaceLuid,
+ PMIB_UNICASTIPADDRESS_ROW ipEntry)
+{
+ PMIB_UNICASTIPADDRESS_TABLE ipTable;
+ NTSTATUS status;
+ UINT32 i;
+
+ if (ipEntry == NULL || ipEntry == NULL) {
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ status = GetUnicastIpAddressTable(AF_INET, &ipTable);
+
+ if (status != STATUS_SUCCESS) {
+ OVS_LOG_INFO("Fail to get unicast address table, status: %x", status);
+ return status;
+ }
+
+ status = STATUS_NOT_FOUND;
+
+ for (i = 0; i < ipTable->NumEntries; i++) {
+ PMIB_UNICASTIPADDRESS_ROW ipRow;
+
+ ipRow = &ipTable->Table[i];
+ if (ipRow->InterfaceLuid.Value == interfaceLuid.Value) {
+ RtlCopyMemory(ipEntry, ipRow, sizeof (*ipRow));
+ OvsDumpIPEntry(ipEntry);
+ status = STATUS_SUCCESS;
+ break;
+ }
+ }
+
+ FreeMibTable(ipTable);
+ return status;
+}
+
+#ifdef OVS_ENABLE_IPPATH
+static VOID
+OvsDumpIPPath(PMIB_IPPATH_ROW ipPath)
+{
+ UINT32 ipAddr = ipPath->Source.Ipv4.sin_addr.s_addr;
+
+ OVS_LOG_INFO("Source: %d.%d.%d.%d",
+ ipAddr & 0xff, (ipAddr >> 8) & 0xff,
+ (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff);
+
+ ipAddr = ipPath->Destination.Ipv4.sin_addr.s_addr;
+ OVS_LOG_INFO("Destination: %d.%d.%d.%d",
+ ipAddr & 0xff, (ipAddr >> 8) & 0xff,
+ (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff);
+
+ ipAddr = ipPath->CurrentNextHop.Ipv4.sin_addr.s_addr;
+ OVS_LOG_INFO("NextHop: %d.%d.%d.%d",
+ ipAddr & 0xff, (ipAddr >> 8) & 0xff,
+ (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff);
+}
+
+
+NTSTATUS
+OvsGetIPPathEntry(PMIB_IPPATH_ROW ipPath)
+{
+ NTSTATUS status;
+ UINT32 ipAddr = ipPath->Destination.Ipv4.sin_addr.s_addr;
+
+ status = GetIpPathEntry(ipPath);
+
+ if (status != STATUS_SUCCESS) {
+ OVS_LOG_INFO("Fail to get IP path to %d.%d.%d.%d, status:%x",
+ ipAddr & 0xff, (ipAddr >> 8) & 0xff,
+ (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, status);
+ return status;
+ }
+ OvsDumpIPPath(ipPath);
+ return status;
+}
+#endif
+
+static VOID
+OvsDumpRoute(const SOCKADDR_INET *sourceAddress,
+ const SOCKADDR_INET *destinationAddress,
+ PMIB_IPFORWARD_ROW2 route)
+{
+ UINT32 ipAddr = destinationAddress->Ipv4.sin_addr.s_addr;
+
+ OVS_LOG_INFO("Destination: %d.%d.%d.%d",
+ ipAddr & 0xff, (ipAddr >> 8) & 0xff,
+ (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff);
+
+ ipAddr = sourceAddress->Ipv4.sin_addr.s_addr;
+ OVS_LOG_INFO("Source: %d.%d.%d.%d",
+ ipAddr & 0xff, (ipAddr >> 8) & 0xff,
+ (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff);
+
+ ipAddr = route->NextHop.Ipv4.sin_addr.s_addr;
+ OVS_LOG_INFO("NextHop: %d.%d.%d.%d",
+ ipAddr & 0xff, (ipAddr >> 8) & 0xff,
+ (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff);
+}
+
+
+NTSTATUS
+OvsGetRoute(NET_LUID interfaceLuid,
+ const SOCKADDR_INET *destinationAddress,
+ PMIB_IPFORWARD_ROW2 route,
+ SOCKADDR_INET *sourceAddress)
+{
+ NTSTATUS status;
+
+ if (destinationAddress == NULL || route == NULL) {
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ status = GetBestRoute2(&interfaceLuid, 0,
+ NULL, destinationAddress,
+ 0, route, sourceAddress);
+
+ if (status != STATUS_SUCCESS) {
+ UINT32 ipAddr = destinationAddress->Ipv4.sin_addr.s_addr;
+ OVS_LOG_INFO("Fail to get route to %d.%d.%d.%d, status: %x",
+ ipAddr & 0xff, (ipAddr >> 8) & 0xff,
+ (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, status);
+ return status;
+ }
+
+ OvsDumpRoute(sourceAddress, destinationAddress, route);
+ return status;
+}
+
+static VOID
+OvsDumpIPNeigh(PMIB_IPNET_ROW2 ipNeigh)
+{
+ UINT32 ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr;
+
+ OVS_LOG_INFO("Neigh: %d.%d.%d.%d",
+ ipAddr & 0xff, (ipAddr >> 8) & 0xff,
+ (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff);
+ OVS_LOG_INFO("MAC Address: %02x:%02x:%02x:%02x:%02x:%02x",
+ ipNeigh->PhysicalAddress[0],
+ ipNeigh->PhysicalAddress[1],
+ ipNeigh->PhysicalAddress[2],
+ ipNeigh->PhysicalAddress[3],
+ ipNeigh->PhysicalAddress[4],
+ ipNeigh->PhysicalAddress[5]);
+}
+
+
+NTSTATUS
+OvsGetIPNeighEntry(PMIB_IPNET_ROW2 ipNeigh)
+{
+ NTSTATUS status;
+
+ ASSERT(ipNeigh);
+
+ status = GetIpNetEntry2(ipNeigh);
+
+ if (status != STATUS_SUCCESS) {
+ UINT32 ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr;
+ OVS_LOG_INFO("Fail to get ARP entry: %d.%d.%d.%d, status: %x",
+ ipAddr & 0xff, (ipAddr >> 8) & 0xff,
+ (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, status);
+ return status;
+ }
+ if (ipNeigh->State == NlnsReachable ||
+ ipNeigh->State == NlnsPermanent) {
+ OvsDumpIPNeigh(ipNeigh);
+ return STATUS_SUCCESS;
+ }
+ return STATUS_FWP_TCPIP_NOT_READY;
+}
+
+
+NTSTATUS
+OvsResolveIPNeighEntry(PMIB_IPNET_ROW2 ipNeigh)
+{
+ NTSTATUS status;
+
+ ASSERT(ipNeigh);
+ status = ResolveIpNetEntry2(ipNeigh, NULL);
+
+ if (status != STATUS_SUCCESS) {
+ UINT32 ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr;
+ OVS_LOG_INFO("Fail to resolve ARP entry: %d.%d.%d.%d, status: %x",
+ ipAddr & 0xff, (ipAddr >> 8) & 0xff,
+ (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, status);
+ return status;
+ }
+
+ if (ipNeigh->State == NlnsReachable ||
+ ipNeigh->State == NlnsPermanent) {
+ OvsDumpIPNeigh(ipNeigh);
+ return STATUS_SUCCESS;
+ }
+ return STATUS_FWP_TCPIP_NOT_READY;
+}
+
+
+NTSTATUS
+OvsGetOrResolveIPNeigh(UINT32 ipAddr,
+ PMIB_IPNET_ROW2 ipNeigh)
+{
+ NTSTATUS status;
+
+ ASSERT(ipNeigh);
+
+ RtlZeroMemory(ipNeigh, sizeof (*ipNeigh));
+ ipNeigh->InterfaceLuid.Value = ovsInternalRow.InterfaceLuid.Value;
+ ipNeigh->InterfaceIndex = ovsInternalRow.InterfaceIndex;
+ ipNeigh->Address.si_family = AF_INET;
+ ipNeigh->Address.Ipv4.sin_addr.s_addr = ipAddr;
+
+ status = OvsGetIPNeighEntry(ipNeigh);
+
+ if (status != STATUS_SUCCESS) {
+ RtlZeroMemory(ipNeigh, sizeof (*ipNeigh));
+ ipNeigh->InterfaceLuid.Value = ovsInternalRow.InterfaceLuid.Value;
+ ipNeigh->InterfaceIndex = ovsInternalRow.InterfaceIndex;
+ ipNeigh->Address.si_family = AF_INET;
+ ipNeigh->Address.Ipv4.sin_addr.s_addr = ipAddr;
+ status = OvsResolveIPNeighEntry(ipNeigh);
+ }
+ return status;
+}
+
+
+static VOID
+OvsChangeCallbackIpInterface(PVOID context,
+ PMIB_IPINTERFACE_ROW ipRow,
+ MIB_NOTIFICATION_TYPE notificationType)
+{
+ UNREFERENCED_PARAMETER(context);
+ switch (notificationType) {
+ case MibParameterNotification:
+ case MibAddInstance:
+ if (ipRow->InterfaceLuid.Info.NetLuidIndex ==
+ ovsInternalRow.InterfaceLuid.Info.NetLuidIndex &&
+ ipRow->InterfaceLuid.Info.IfType ==
+ ovsInternalRow.InterfaceLuid.Info.IfType &&
+ ipRow->InterfaceIndex == ovsInternalRow.InterfaceIndex) {
+ /*
+ * Update the IP Interface Row
+ */
+ NdisAcquireSpinLock(&ovsIpHelperLock);
+ RtlCopyMemory(&ovsInternalIPRow, ipRow,
+ sizeof (PMIB_IPINTERFACE_ROW));
+ ovsInternalIPConfigured = TRUE;
+ NdisReleaseSpinLock(&ovsIpHelperLock);
+ }
+ OVS_LOG_INFO("IP Interface with NetLuidIndex: %d, type: %d is %s",
+ ipRow->InterfaceLuid.Info.NetLuidIndex,
+ ipRow->InterfaceLuid.Info.IfType,
+ notificationType == MibAddInstance ? "added" : "modified");
+ break;
+ case MibDeleteInstance:
+ OVS_LOG_INFO("IP Interface with NetLuidIndex: %d, type: %d, deleted",
+ ipRow->InterfaceLuid.Info.NetLuidIndex,
+ ipRow->InterfaceLuid.Info.IfType);
+ if (ipRow->InterfaceLuid.Info.NetLuidIndex ==
+ ovsInternalRow.InterfaceLuid.Info.NetLuidIndex &&
+ ipRow->InterfaceLuid.Info.IfType ==
+ ovsInternalRow.InterfaceLuid.Info.IfType &&
+ ipRow->InterfaceIndex == ovsInternalRow.InterfaceIndex) {
+
+ NdisAcquireSpinLock(&ovsIpHelperLock);
+ ovsInternalIPConfigured = FALSE;
+ NdisReleaseSpinLock(&ovsIpHelperLock);
+
+ OvsCleanupIpHelperRequestList();
+
+ OvsCleanupFwdTable();
+ }
+
+ break;
+ case MibInitialNotification:
+ OVS_LOG_INFO("Get Initial notification for IP Interface change.");
+ default:
+ return;
+ }
+}
+
+
+static VOID
+OvsChangeCallbackIpRoute(PVOID context,
+ PMIB_IPFORWARD_ROW2 ipRoute,
+ MIB_NOTIFICATION_TYPE notificationType)
+{
+ UINT32 ipAddr, nextHop;
+
+ UNREFERENCED_PARAMETER(context);
+ switch (notificationType) {
+ case MibAddInstance:
+
+ ASSERT(ipRoute);
+ ipAddr = ipRoute->DestinationPrefix.Prefix.Ipv4.sin_addr.s_addr;
+ nextHop = ipRoute->NextHop.Ipv4.sin_addr.s_addr;
+
+ OVS_LOG_INFO("IPRoute: To %d.%d.%d.%d/%d through %d.%d.%d.%d added",
+ ipAddr & 0xff, (ipAddr >> 8) & 0xff,
+ (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff,
+ ipRoute->DestinationPrefix.PrefixLength,
+ nextHop & 0xff, (nextHop >> 8) & 0xff,
+ (nextHop >> 16) & 0xff, (nextHop >> 24) & 0xff);
+ break;
+
+ case MibParameterNotification:
+ case MibDeleteInstance:
+ ASSERT(ipRoute);
+ ipAddr = ipRoute->DestinationPrefix.Prefix.Ipv4.sin_addr.s_addr;
+ nextHop = ipRoute->NextHop.Ipv4.sin_addr.s_addr;
+
+ OVS_LOG_INFO("IPRoute: To %d.%d.%d.%d/%d through %d.%d.%d.%d %s.",
+ ipAddr & 0xff, (ipAddr >> 8) & 0xff,
+ (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff,
+ ipRoute->DestinationPrefix.PrefixLength,
+ nextHop & 0xff, (nextHop >> 8) & 0xff,
+ (nextHop >> 16) & 0xff, (nextHop >> 24) & 0xff,
+ notificationType == MibDeleteInstance ? "deleted" :
+ "modified");
+
+ if (ipRoute->InterfaceLuid.Info.NetLuidIndex ==
+ ovsInternalRow.InterfaceLuid.Info.NetLuidIndex &&
+ ipRoute->InterfaceLuid.Info.IfType ==
+ ovsInternalRow.InterfaceLuid.Info.IfType &&
+ ipRoute->InterfaceIndex == ovsInternalRow.InterfaceIndex) {
+
+ POVS_IPFORWARD_ENTRY ipf;
+ LOCK_STATE_EX lockState;
+
+ NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0);
+ ipf = OvsLookupIPForwardEntry(&ipRoute->DestinationPrefix);
+ if (ipf != NULL) {
+ OvsRemoveIPForwardEntry(ipf);
+ }
+ NdisReleaseRWLock(ovsTableLock, &lockState);
+ }
+ break;
+
+ case MibInitialNotification:
+ OVS_LOG_INFO("Get Initial notification for IP Route change.");
+ default:
+ return;
+ }
+}
+
+
+static VOID
+OvsChangeCallbackUnicastIpAddress(PVOID context,
+ PMIB_UNICASTIPADDRESS_ROW unicastRow,
+ MIB_NOTIFICATION_TYPE notificationType)
+{
+ UINT32 ipAddr;
+
+ UNREFERENCED_PARAMETER(context);
+ switch (notificationType) {
+ case MibParameterNotification:
+ case MibAddInstance:
+ ASSERT(unicastRow);
+ ipAddr = unicastRow->Address.Ipv4.sin_addr.s_addr;
+ if (unicastRow->InterfaceLuid.Info.NetLuidIndex ==
+ ovsInternalRow.InterfaceLuid.Info.NetLuidIndex &&
+ unicastRow->InterfaceLuid.Info.IfType ==
+ ovsInternalRow.InterfaceLuid.Info.IfType &&
+ unicastRow->InterfaceIndex == ovsInternalRow.InterfaceIndex) {
+ ovsInternalIP = ipAddr;
+ }
+ OVS_LOG_INFO("IP Address: %d.%d.%d.%d is %s",
+ ipAddr & 0xff, (ipAddr >> 8) & 0xff,
+ (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff,
+ notificationType == MibAddInstance ? "added": "modified");
+ break;
+
+ case MibDeleteInstance:
+ ASSERT(unicastRow);
+ ipAddr = unicastRow->Address.Ipv4.sin_addr.s_addr;
+ OVS_LOG_INFO("IP Address removed: %d.%d.%d.%d",
+ ipAddr & 0xff, (ipAddr >> 8) & 0xff,
+ (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff);
+ if (unicastRow->InterfaceLuid.Info.NetLuidIndex ==
+ ovsInternalRow.InterfaceLuid.Info.NetLuidIndex &&
+ unicastRow->InterfaceLuid.Info.IfType ==
+ ovsInternalRow.InterfaceLuid.Info.IfType &&
+ unicastRow->InterfaceIndex == ovsInternalRow.InterfaceIndex) {
+
+ LOCK_STATE_EX lockState;
+ NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0);
+ OvsRemoveAllFwdEntriesWithSrc(ipAddr);
+ NdisReleaseRWLock(ovsTableLock, &lockState);
+
+ }
+ break;
+
+ case MibInitialNotification:
+ OVS_LOG_INFO("Get Initial notification for Unicast IP Address change.");
+ default:
+ return;
+ }
+}
+
+
+static VOID
+OvsCancelChangeNotification()
+{
+ if (ipInterfaceNotificationHandle != NULL) {
+ CancelMibChangeNotify2(ipInterfaceNotificationHandle);
+ ipInterfaceNotificationHandle = NULL;
+ }
+ if (ipRouteNotificationHandle != NULL) {
+ CancelMibChangeNotify2(ipRouteNotificationHandle);
+ ipRouteNotificationHandle = NULL;
+ }
+ if (unicastIPNotificationHandle != NULL) {
+ CancelMibChangeNotify2(unicastIPNotificationHandle);
+ unicastIPNotificationHandle = NULL;
+ }
+}
+
+
+static NTSTATUS
+OvsRegisterChangeNotification()
+{
+ NTSTATUS status;
+
+
+ status = NotifyIpInterfaceChange(AF_INET, OvsChangeCallbackIpInterface,
+ NULL, TRUE,
+ &ipInterfaceNotificationHandle);
+ if (status != STATUS_SUCCESS) {
+ OVS_LOG_ERROR("Fail to register Notify IP interface change, status:%x.",
+ status);
+ return status;
+ }
+
+ status = NotifyRouteChange2(AF_INET, OvsChangeCallbackIpRoute, NULL,
+ TRUE, &ipRouteNotificationHandle);
+ if (status != STATUS_SUCCESS) {
+ OVS_LOG_ERROR("Fail to regiter ip route change, status: %x.",
+ status);
+ goto register_cleanup;
+ }
+ status = NotifyUnicastIpAddressChange(AF_INET,
+ OvsChangeCallbackUnicastIpAddress,
+ NULL, TRUE,
+ &unicastIPNotificationHandle);
+ if (status != STATUS_SUCCESS) {
+ OVS_LOG_ERROR("Fail to regiter unicast ip change, status: %x.", status);
+ }
+register_cleanup:
+ if (status != STATUS_SUCCESS) {
+ OvsCancelChangeNotification();
+ }
+
+ return status;
+}
+
+
+static POVS_IPNEIGH_ENTRY
+OvsLookupIPNeighEntry(UINT32 ipAddr)
+{
+ PLIST_ENTRY link;
+ POVS_IPNEIGH_ENTRY entry;
+ UINT32 hash = OvsJhashWords(&ipAddr, 1, OVS_HASH_BASIS);
+
+ LIST_FORALL(&ovsNeighHashTable[hash & OVS_NEIGH_HASH_TABLE_MASK], link) {
+ entry = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, link);
+ if (entry->ipAddr == ipAddr) {
+ return entry;
+ }
+ }
+ return NULL;
+}
+
+
+static UINT32
+OvsHashIPPrefix(PIP_ADDRESS_PREFIX prefix)
+{
+ UINT64 words = (UINT64)prefix->Prefix.Ipv4.sin_addr.s_addr << 32 |
+ (UINT32)prefix->PrefixLength;
+ return OvsJhashWords((UINT32 *)&words, 2, OVS_HASH_BASIS);
+}
+
+
+static POVS_IPFORWARD_ENTRY
+OvsLookupIPForwardEntry(PIP_ADDRESS_PREFIX prefix)
+{
+
+ PLIST_ENTRY link;
+ POVS_IPFORWARD_ENTRY ipfEntry;
+ UINT32 hash;
+ ASSERT(prefix->Prefix.si_family == AF_INET);
+
+ hash = RtlUlongByteSwap(prefix->Prefix.Ipv4.sin_addr.s_addr);
+
+ ASSERT(prefix->PrefixLength >= 32 ||
+ (hash & (((UINT32)1 << (32 - prefix->PrefixLength)) - 1)) == 0);
+
+ hash = OvsHashIPPrefix(prefix);
+ LIST_FORALL(&ovsRouteHashTable[hash & OVS_ROUTE_HASH_TABLE_MASK], link) {
+ ipfEntry = CONTAINING_RECORD(link, OVS_IPFORWARD_ENTRY, link);
+ if (ipfEntry->prefix.PrefixLength == prefix->PrefixLength &&
+ ipfEntry->prefix.Prefix.Ipv4.sin_addr.s_addr ==
+ prefix->Prefix.Ipv4.sin_addr.s_addr) {
+ return ipfEntry;
+ }
+ }
+ return NULL;
+}
+
+
+static POVS_FWD_ENTRY
+OvsLookupIPFwdEntry(UINT32 dstIp)
+{
+ PLIST_ENTRY link;
+ POVS_FWD_ENTRY entry;
+ UINT32 hash = OvsJhashWords(&dstIp, 1, OVS_HASH_BASIS);
+
+ LIST_FORALL(&ovsFwdHashTable[hash & OVS_FWD_HASH_TABLE_MASK], link) {
+ entry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, link);
+ if (entry->info.dstIpAddr == dstIp) {
+ return entry;
+ }
+ }
+ return NULL;
+}
+
+
+NTSTATUS
+OvsLookupIPFwdInfo(UINT32 dstIp,
+ POVS_FWD_INFO info)
+{
+ POVS_FWD_ENTRY entry;
+ LOCK_STATE_EX lockState;
+ NTSTATUS status = STATUS_NOT_FOUND;
+
+ NdisAcquireRWLockRead(ovsTableLock, &lockState, 0);
+ entry = OvsLookupIPFwdEntry(dstIp);
+ if (entry) {
+ info->value[0] = entry->info.value[0];
+ info->value[1] = entry->info.value[1];
+ info->value[2] = entry->info.value[2];
+ status = STATUS_SUCCESS;
+ }
+ NdisReleaseRWLock(ovsTableLock, &lockState);
+ return status;
+}
+
+
+static POVS_IPNEIGH_ENTRY
+OvsCreateIPNeighEntry(PMIB_IPNET_ROW2 ipNeigh)
+{
+
+ POVS_IPNEIGH_ENTRY entry;
+ UINT64 timeVal;
+
+ ASSERT(ipNeigh != NULL);
+ entry = (POVS_IPNEIGH_ENTRY)OvsAllocateMemory(sizeof (OVS_IPNEIGH_ENTRY));
+ if (entry == NULL) {
+ return NULL;
+ }
+
+ RtlZeroMemory(entry, sizeof (OVS_IPNEIGH_ENTRY));
+ entry->ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr;
+ KeQuerySystemTime((LARGE_INTEGER *)&timeVal);
+ entry->timeout = timeVal + OVS_IPNEIGH_TIMEOUT;
+ RtlCopyMemory(entry->macAddr, ipNeigh->PhysicalAddress,
+ MAC_ADDRESS_LEN);
+ InitializeListHead(&entry->fwdList);
+
+ return entry;
+}
+
+
+static POVS_IPFORWARD_ENTRY
+OvsCreateIPForwardEntry(PMIB_IPFORWARD_ROW2 ipRoute)
+{
+
+ POVS_IPFORWARD_ENTRY entry;
+
+ ASSERT(ipRoute);
+
+ entry =
+ (POVS_IPFORWARD_ENTRY)OvsAllocateMemory(sizeof (OVS_IPFORWARD_ENTRY));
+ if (entry == NULL) {
+ return NULL;
+ }
+
+ RtlZeroMemory(entry, sizeof (OVS_IPFORWARD_ENTRY));
+ RtlCopyMemory(&entry->prefix, &ipRoute->DestinationPrefix,
+ sizeof (IP_ADDRESS_PREFIX));
+ entry->nextHop = ipRoute->NextHop.Ipv4.sin_addr.s_addr;
+ InitializeListHead(&entry->fwdList);
+
+ return entry;
+}
+
+
+static POVS_FWD_ENTRY
+OvsCreateFwdEntry(POVS_FWD_INFO fwdInfo)
+{
+ POVS_FWD_ENTRY entry;
+
+ entry = (POVS_FWD_ENTRY)OvsAllocateMemory(sizeof (OVS_FWD_ENTRY));
+ if (entry == NULL) {
+ return NULL;
+ }
+
+ RtlZeroMemory(entry, sizeof (OVS_FWD_ENTRY));
+ RtlCopyMemory(&entry->info, fwdInfo, sizeof (OVS_FWD_INFO));
+ return entry;
+}
+
+
+static VOID
+OvsRemoveFwdEntry(POVS_FWD_ENTRY fwdEntry)
+{
+ POVS_IPFORWARD_ENTRY ipf;
+ POVS_IPNEIGH_ENTRY ipn;
+
+ ipf = fwdEntry->ipf;
+ ipn = fwdEntry->ipn;
+
+ RemoveEntryList(&fwdEntry->link);
+ ovsNumFwdEntries--;
+
+ RemoveEntryList(&fwdEntry->ipfLink);
+ ipf->refCount--;
+
+ RemoveEntryList(&fwdEntry->ipnLink);
+ ipn->refCount--;
+
+ if (ipf->refCount == 0) {
+ ASSERT(IsListEmpty(&ipf->fwdList));
+ RemoveEntryList(&ipf->link);
+ OvsFreeMemory(ipf);
+ }
+
+ if (ipn->refCount == 0) {
+ ASSERT(IsListEmpty(&ipn->fwdList));
+ RemoveEntryList(&ipn->link);
+ NdisAcquireSpinLock(&ovsIpHelperLock);
+ RemoveEntryList(&ipn->slink);
+ NdisReleaseSpinLock(&ovsIpHelperLock);
+ OvsFreeMemory(ipn);
+ }
+
+ OvsFreeMemory(fwdEntry);
+}
+
+
+static VOID
+OvsRemoveIPForwardEntry(POVS_IPFORWARD_ENTRY ipf)
+{
+ POVS_FWD_ENTRY fwdEntry;
+ PLIST_ENTRY link, next;
+
+ ipf->refCount++;
+
+ LIST_FORALL_SAFE(&ipf->fwdList, link, next) {
+ fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, ipfLink);
+ OvsRemoveFwdEntry(fwdEntry);
+ }
+ ASSERT(ipf->refCount == 1);
+
+ RemoveEntryList(&ipf->link);
+ OvsFreeMemory(ipf);
+}
+
+
+static VOID
+OvsRemoveIPNeighEntry(POVS_IPNEIGH_ENTRY ipn)
+{
+ PLIST_ENTRY link, next;
+ POVS_FWD_ENTRY fwdEntry;
+
+ ipn->refCount++;
+
+ LIST_FORALL_SAFE(&ipn->fwdList, link, next) {
+ fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, ipnLink);
+ OvsRemoveFwdEntry(fwdEntry);
+ }
+
+ if (ipn->refCount == 1) {
+ RemoveEntryList(&ipn->link);
+ NdisAcquireSpinLock(&ovsIpHelperLock);
+ RemoveEntryList(&ipn->slink);
+ NdisReleaseSpinLock(&ovsIpHelperLock);
+ OvsFreeMemory(ipn);
+ }
+}
+
+
+static VOID
+OvsAddToSortedNeighList(POVS_IPNEIGH_ENTRY ipn)
+{
+ PLIST_ENTRY link;
+ POVS_IPNEIGH_ENTRY entry;
+
+ if (!IsListEmpty(&ovsSortedIPNeighList)) {
+ link = ovsSortedIPNeighList.Blink;
+ entry = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, slink);
+ if (entry->timeout > ipn->timeout) {
+ ipn->timeout++;
+ }
+ }
+ InsertTailList(&ovsSortedIPNeighList, &ipn->slink);
+}
+
+
+static VOID
+OvsAddIPFwdCache(POVS_FWD_ENTRY fwdEntry,
+ POVS_IPFORWARD_ENTRY ipf,
+ POVS_IPNEIGH_ENTRY ipn)
+
+{
+ UINT32 hash;
+
+ if (ipn->refCount == 0) {
+ NdisAcquireSpinLock(&ovsIpHelperLock);
+ OvsAddToSortedNeighList(ipn);
+ NdisReleaseSpinLock(&ovsIpHelperLock);
+ hash = OvsJhashWords(&ipn->ipAddr, 1, OVS_HASH_BASIS);
+ InsertHeadList(&ovsNeighHashTable[hash & OVS_NEIGH_HASH_TABLE_MASK],
+ &ipn->link);
+ }
+ if (ipf->refCount == 0) {
+ hash = OvsHashIPPrefix(&ipf->prefix);
+ InsertHeadList(&ovsRouteHashTable[hash & OVS_ROUTE_HASH_TABLE_MASK],
+ &ipf->link);
+ }
+
+ InsertHeadList(&ipf->fwdList, &fwdEntry->ipfLink);
+ ipf->refCount++;
+ fwdEntry->ipf = ipf;
+
+ InsertHeadList(&ipn->fwdList, &fwdEntry->ipnLink);
+ ipn->refCount++;
+ fwdEntry->ipn = ipn;
+
+ hash = OvsJhashWords(&fwdEntry->info.dstIpAddr, 1, OVS_HASH_BASIS);
+ InsertHeadList(&ovsFwdHashTable[hash & OVS_FWD_HASH_TABLE_MASK],
+ &fwdEntry->link);
+ ovsNumFwdEntries++;
+}
+
+
+static VOID
+OvsRemoveAllFwdEntriesWithSrc(UINT32 ipAddr)
+{
+ UINT32 i;
+ POVS_FWD_ENTRY fwdEntry;
+ PLIST_ENTRY link, next;
+
+ for (i = 0; i < OVS_FWD_HASH_TABLE_SIZE; i++) {
+ LIST_FORALL_SAFE(&ovsFwdHashTable[i], link, next) {
+ fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, link);
+ if (fwdEntry->info.srcIpAddr == ipAddr) {
+ OvsRemoveFwdEntry(fwdEntry);
+ }
+ }
+ }
+}
+
+
+static VOID
+OvsCleanupFwdTable(VOID)
+{
+ PLIST_ENTRY link, next;
+ POVS_IPNEIGH_ENTRY ipn;
+ UINT32 i;
+ LOCK_STATE_EX lockState;
+
+ NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0);
+ if (ovsNumFwdEntries) {
+ LIST_FORALL_SAFE(&ovsSortedIPNeighList, link, next) {
+ ipn = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, slink);
+ OvsRemoveIPNeighEntry(ipn);
+ }
+ }
+ for (i = 0; i < OVS_FWD_HASH_TABLE_SIZE; i++) {
+ ASSERT(IsListEmpty(&ovsFwdHashTable[i]));
+ }
+ for (i = 0; i < OVS_ROUTE_HASH_TABLE_SIZE; i++) {
+ ASSERT(IsListEmpty(&ovsRouteHashTable[i]));
+ }
+ NdisReleaseRWLock(ovsTableLock, &lockState);
+}
+
+
+static VOID
+OvsCleanupIpHelperRequestList(VOID)
+{
+ LIST_ENTRY list;
+ PLIST_ENTRY next, link;
+ POVS_IP_HELPER_REQUEST request;
+
+ NdisAcquireSpinLock(&ovsIpHelperLock);
+ if (ovsNumIpHelperRequests == 0) {
+ NdisReleaseSpinLock(&ovsIpHelperLock);
+ return;
+ }
+
+ InitializeListHead(&list);
+ OvsAppendList(&list, &ovsIpHelperRequestList);
+ ovsNumIpHelperRequests = 0;
+ NdisReleaseSpinLock(&ovsIpHelperLock);
+
+ LIST_FORALL_SAFE(&list, link, next) {
+ request = CONTAINING_RECORD(link, OVS_IP_HELPER_REQUEST, link);
+
+ if (request->command == OVS_IP_HELPER_FWD_REQUEST &&
+ request->fwdReq.cb) {
+ request->fwdReq.cb(request->fwdReq.nbl,
+ request->fwdReq.inPort,
+ &request->fwdReq.tunnelKey,
+ request->fwdReq.cbData1,
+ request->fwdReq.cbData2,
+ STATUS_DEVICE_NOT_READY,
+ NULL);
+ }
+ OvsFreeMemory(request);
+ }
+}
+
+
+
+static VOID
+OvsWakeupIPHelper(VOID)
+{
+ KeSetEvent(&ovsIpHelperThreadContext.event, 0, FALSE);
+}
+
+VOID
+OvsInternalAdapterDown(VOID)
+{
+ NdisAcquireSpinLock(&ovsIpHelperLock);
+ ovsInternalPortNo = OVS_DEFAULT_PORT_NO;
+ ovsInternalIPConfigured = FALSE;
+ NdisReleaseSpinLock(&ovsIpHelperLock);
+
+ OvsCleanupIpHelperRequestList();
+
+ OvsCleanupFwdTable();
+}
+
+
+VOID
+OvsInternalAdapterUp(UINT32 portNo,
+ GUID *netCfgInstanceId)
+{
+ POVS_IP_HELPER_REQUEST request;
+
+ RtlCopyMemory(&ovsInternalNetCfgId, netCfgInstanceId, sizeof (GUID));
+ RtlZeroMemory(&ovsInternalRow, sizeof (MIB_IF_ROW2));
+
+ request =
+ (POVS_IP_HELPER_REQUEST)OvsAllocateMemory(sizeof (OVS_IP_HELPER_REQUEST));
+ if (request == NULL) {
+ OVS_LOG_ERROR("Fail to initialize Internal Adapter");
+ return;
+ }
+ RtlZeroMemory(request, sizeof (OVS_IP_HELPER_REQUEST));
+ request->command = OVS_IP_HELPER_INTERNAL_ADAPTER_UP;
+
+ NdisAcquireSpinLock(&ovsIpHelperLock);
+ ovsInternalPortNo = portNo;
+ InsertHeadList(&ovsIpHelperRequestList, &request->link);
+ ovsNumIpHelperRequests++;
+ if (ovsNumIpHelperRequests == 1) {
+ OvsWakeupIPHelper();
+ }
+ NdisReleaseSpinLock(&ovsIpHelperLock);
+}
+
+
+static VOID
+OvsHandleInternalAdapterUp(POVS_IP_HELPER_REQUEST request)
+{
+ NTSTATUS status;
+ MIB_UNICASTIPADDRESS_ROW ipEntry;
+ GUID *netCfgInstanceId = &ovsInternalNetCfgId;
+
+ OvsFreeMemory(request);
+
+ status = OvsGetIfEntry(&ovsInternalNetCfgId, &ovsInternalRow);
+
+ if (status != STATUS_SUCCESS) {
+ OVS_LOG_ERROR("Fali to get IF entry for internal port with GUID"
+ " %08x-%04x-%04x-%04x-%02x%02x%02x%02x%02x%02x",
+ netCfgInstanceId->Data1,
+ netCfgInstanceId->Data2,
+ netCfgInstanceId->Data3,
+ *(UINT16 *)netCfgInstanceId->Data4,
+ netCfgInstanceId->Data4[2],
+ netCfgInstanceId->Data4[3],
+ netCfgInstanceId->Data4[4],
+ netCfgInstanceId->Data4[5],
+ netCfgInstanceId->Data4[6],
+ netCfgInstanceId->Data4[7]);
+ return;
+ }
+
+ status = OvsGetIPInterfaceEntry(ovsInternalRow.InterfaceLuid,
+ &ovsInternalIPRow);
+
+ if (status == STATUS_SUCCESS) {
+ NdisAcquireSpinLock(&ovsIpHelperLock);
+ ovsInternalIPConfigured = TRUE;
+ NdisReleaseSpinLock(&ovsIpHelperLock);
+ } else {
+ return;
+ }
+
+ status = OvsGetIPEntry(ovsInternalRow.InterfaceLuid, &ipEntry);
+ if (status != STATUS_SUCCESS) {
+ OVS_LOG_INFO("Fali to get IP entry for internal port with GUID"
+ " %08x-%04x-%04x-%04x-%02x%02x%02x%02x%02x%02x",
+ netCfgInstanceId->Data1,
+ netCfgInstanceId->Data2,
+ netCfgInstanceId->Data3,
+ *(UINT16 *)netCfgInstanceId->Data4,
+ netCfgInstanceId->Data4[2],
+ netCfgInstanceId->Data4[3],
+ netCfgInstanceId->Data4[4],
+ netCfgInstanceId->Data4[5],
+ netCfgInstanceId->Data4[6],
+ netCfgInstanceId->Data4[7]);
+ }
+}
+
+
+static NTSTATUS
+OvsEnqueueIpHelperRequest(POVS_IP_HELPER_REQUEST request)
+{
+
+ NdisAcquireSpinLock(&ovsIpHelperLock);
+
+ if (ovsInternalPortNo == OVS_DEFAULT_PORT_NO ||
+ ovsInternalIPConfigured == FALSE) {
+ NdisReleaseSpinLock(&ovsIpHelperLock);
+ OvsFreeMemory(request);
+ return STATUS_NDIS_ADAPTER_NOT_READY;
+ } else {
+ InsertHeadList(&ovsIpHelperRequestList, &request->link);
+ ovsNumIpHelperRequests++;
+ if (ovsNumIpHelperRequests == 1) {
+ OvsWakeupIPHelper();
+ }
+ NdisReleaseSpinLock(&ovsIpHelperLock);
+ return STATUS_SUCCESS;
+ }
+}
+
+
+NTSTATUS
+OvsFwdIPHelperRequest(PNET_BUFFER_LIST nbl,
+ UINT32 inPort,
+ const OvsIPv4TunnelKey *tunnelKey,
+ OvsIPHelperCallback cb,
+ PVOID cbData1,
+ PVOID cbData2)
+{
+ POVS_IP_HELPER_REQUEST request;
+
+ request =
+ (POVS_IP_HELPER_REQUEST)OvsAllocateMemory(sizeof (OVS_IP_HELPER_REQUEST));
+
+ if (request == NULL) {
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+ request->command = OVS_IP_HELPER_FWD_REQUEST;
+ request->fwdReq.nbl = nbl;
+ request->fwdReq.inPort = inPort;
+ RtlCopyMemory(&request->fwdReq.tunnelKey, tunnelKey,
+ sizeof (*tunnelKey));
+ request->fwdReq.cb = cb;
+ request->fwdReq.cbData1 = cbData1;
+ request->fwdReq.cbData2 = cbData2;
+
+ return OvsEnqueueIpHelperRequest(request);
+}
+
+
+static VOID
+OvsHandleFwdRequest(POVS_IP_HELPER_REQUEST request)
+{
+ SOCKADDR_INET dst, src;
+ NTSTATUS status = STATUS_SUCCESS;
+ MIB_IPFORWARD_ROW2 ipRoute;
+ MIB_IPNET_ROW2 ipNeigh;
+ OVS_FWD_INFO fwdInfo;
+ UINT32 ipAddr;
+ UINT32 srcAddr;
+ POVS_FWD_ENTRY fwdEntry = NULL;
+ POVS_IPFORWARD_ENTRY ipf = NULL;
+ POVS_IPNEIGH_ENTRY ipn = NULL;
+ LOCK_STATE_EX lockState;
+ BOOLEAN newIPF = FALSE;
+ BOOLEAN newIPN = FALSE;
+ BOOLEAN newFWD = FALSE;
+
+ status = OvsLookupIPFwdInfo(request->fwdReq.tunnelKey.dst,
+ &fwdInfo);
+ if (status == STATUS_SUCCESS) {
+ goto fwd_handle_nbl;
+ }
+
+ /* find IPRoute */
+ RtlZeroMemory(&dst, sizeof(dst));
+ RtlZeroMemory(&src, sizeof(src));
+ RtlZeroMemory(&ipRoute, sizeof (MIB_IPFORWARD_ROW2));
+ dst.si_family = AF_INET;
+ dst.Ipv4.sin_addr.s_addr = request->fwdReq.tunnelKey.dst;
+
+ status = OvsGetRoute(ovsInternalRow.InterfaceLuid, &dst, &ipRoute, &src);
+ if (status != STATUS_SUCCESS) {
+ goto fwd_handle_nbl;
+ }
+ srcAddr = src.Ipv4.sin_addr.s_addr;
+
+ /* find IPNeigh */
+ ipAddr = ipRoute.NextHop.Ipv4.sin_addr.s_addr;
+ if (ipAddr != 0) {
+ NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0);
+ ipn = OvsLookupIPNeighEntry(ipAddr);
+ if (ipn) {
+ goto fwd_request_done;
+ }
+ NdisReleaseRWLock(ovsTableLock, &lockState);
+ }
+ RtlZeroMemory(&ipNeigh, sizeof (ipNeigh));
+ ipNeigh.InterfaceLuid.Value = ovsInternalRow.InterfaceLuid.Value;
+ if (ipAddr == 0) {
+ ipAddr = request->fwdReq.tunnelKey.dst;
+ }
+ status = OvsGetOrResolveIPNeigh(ipAddr, &ipNeigh);
+ if (status != STATUS_SUCCESS) {
+ goto fwd_handle_nbl;
+ }
+
+ NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0);
+
+fwd_request_done:
+
+ /*
+ * Initialize ipf
+ */
+ ipf = OvsLookupIPForwardEntry(&ipRoute.DestinationPrefix);
+ if (ipf == NULL) {
+ ipf = OvsCreateIPForwardEntry(&ipRoute);
+ if (ipf == NULL) {
+ NdisReleaseRWLock(ovsTableLock, &lockState);
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ goto fwd_handle_nbl;
+ }
+ newIPF = TRUE;
+ } else {
+ PLIST_ENTRY link;
+ link = ipf->fwdList.Flink;
+ fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, ipfLink);
+ srcAddr = fwdEntry->info.srcIpAddr;
+ }
+
+ /*
+ * initialize ipn
+ */
+ if (ipn == NULL) {
+ ipn = OvsLookupIPNeighEntry(ipAddr);
+ if (ipn == NULL) {
+ ipn = OvsCreateIPNeighEntry(&ipNeigh);
+ if (ipn == NULL) {
+ NdisReleaseRWLock(ovsTableLock, &lockState);
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ goto fwd_handle_nbl;
+ }
+ newIPN = TRUE;
+ }
+ }
+
+ /*
+ * initialize fwdEntry
+ */
+ fwdInfo.dstIpAddr = request->fwdReq.tunnelKey.dst;
+ fwdInfo.srcIpAddr = srcAddr;
+ RtlCopyMemory(fwdInfo.dstMacAddr, ipn->macAddr, MAC_ADDRESS_LEN);
+ RtlCopyMemory(fwdInfo.srcMacAddr, ovsInternalRow.PhysicalAddress,
+ MAC_ADDRESS_LEN);
+ fwdInfo.srcPortNo = request->fwdReq.inPort;
+
+ fwdEntry = OvsCreateFwdEntry(&fwdInfo);
+ if (fwdEntry == NULL) {
+ NdisReleaseRWLock(ovsTableLock, &lockState);
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ goto fwd_handle_nbl;
+ }
+ newFWD = TRUE;
+ /*
+ * Cache the result
+ */
+ OvsAddIPFwdCache(fwdEntry, ipf, ipn);
+ NdisReleaseRWLock(ovsTableLock, &lockState);
+
+fwd_handle_nbl:
+
+ if (status != STATUS_SUCCESS) {
+ if (newFWD) {
+ ASSERT(fwdEntry != NULL);
+ OvsFreeMemory(fwdEntry);
+ }
+ if (newIPF) {
+ ASSERT(ipf && ipf->refCount == 0);
+ OvsFreeMemory(ipf);
+ }
+ if (newIPN) {
+ ASSERT(ipn && ipn->refCount == 0);
+ OvsFreeMemory(ipn);
+ }
+ ipAddr = request->fwdReq.tunnelKey.dst;
+ OVS_LOG_INFO("Fail to handle IP helper request for dst: %d.%d.%d.%d",
+ ipAddr & 0xff, (ipAddr >> 8) & 0xff,
+ (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff);
+ }
+ if (request->fwdReq.cb) {
+ request->fwdReq.cb(request->fwdReq.nbl,
+ request->fwdReq.inPort,
+ &request->fwdReq.tunnelKey,
+ request->fwdReq.cbData1,
+ request->fwdReq.cbData2,
+ status,
+ status == STATUS_SUCCESS ? &fwdInfo : NULL);
+ }
+ OvsFreeMemory(request);
+}
+
+
+static VOID
+OvsUpdateIPNeighEntry(UINT32 ipAddr,
+ PMIB_IPNET_ROW2 ipNeigh,
+ NTSTATUS status)
+{
+ UINT64 timeVal;
+ POVS_IPNEIGH_ENTRY ipn;
+ LOCK_STATE_EX lockState;
+ KeQuerySystemTime((LARGE_INTEGER *)&timeVal);
+ /*
+ * if mac changed, update all relevant fwdEntry
+ */
+ if (status != STATUS_SUCCESS) {
+ NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0);
+ } else {
+ NdisAcquireRWLockRead(ovsTableLock, &lockState, 0);
+ }
+ ipn = OvsLookupIPNeighEntry(ipAddr);
+ if (ipn == NULL) {
+ NdisReleaseRWLock(ovsTableLock, &lockState);
+ return;
+ }
+ if (status != STATUS_SUCCESS) {
+ OvsRemoveIPNeighEntry(ipn);
+ NdisReleaseRWLock(ovsTableLock, &lockState);
+ return;
+ }
+
+ if (memcmp((const PVOID)ipn->macAddr,
+ (const PVOID)ipNeigh->PhysicalAddress,
+ (size_t)MAC_ADDRESS_LEN)) {
+ PLIST_ENTRY link;
+ POVS_FWD_ENTRY fwdEntry;
+ NdisReleaseRWLock(ovsTableLock, &lockState);
+ /*
+ * need update, release and acquire write lock
+ * This is not the common case.
+ */
+
+ NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0);
+ ipn = OvsLookupIPNeighEntry(ipAddr);
+
+ if (ipn == NULL) {
+ NdisReleaseRWLock(ovsTableLock, &lockState);
+ return;
+ }
+
+ LIST_FORALL(&ipn->fwdList, link) {
+ fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, ipnLink);
+ RtlCopyMemory(fwdEntry->info.dstMacAddr,
+ ipNeigh->PhysicalAddress, MAC_ADDRESS_LEN);
+ }
+ }
+ /*
+ * update timeout and move to the end of
+ * the sorted list
+ */
+
+ NdisAcquireSpinLock(&ovsIpHelperLock);
+ RemoveEntryList(&ipn->slink);
+ ipn->timeout = timeVal + OVS_IPNEIGH_TIMEOUT;
+ OvsAddToSortedNeighList(ipn);
+ NdisReleaseSpinLock(&ovsIpHelperLock);
+ NdisReleaseRWLock(ovsTableLock, &lockState);
+}
+
+
+static VOID
+OvsHandleIPNeighTimeout(UINT32 ipAddr)
+{
+ MIB_IPNET_ROW2 ipNeigh;
+ NTSTATUS status;
+
+ status = OvsGetOrResolveIPNeigh(ipAddr, &ipNeigh);
+
+ OvsUpdateIPNeighEntry(ipAddr, &ipNeigh, status);
+}
+
+
+/*
+ *----------------------------------------------------------------------------
+ * IP Helper system threash handle following request
+ * 1. Intialize Internal port row when internal port is connected
+ * 2. Handle FWD request
+ * 3. Handle IP Neigh timeout
+ *
+ * IP Interface, unicast address, and IP route change will be handled
+ * by the revelant callback.
+ *----------------------------------------------------------------------------
+ */
+VOID
+OvsStartIpHelper(PVOID data)
+{
+ POVS_IP_HELPER_THREAD_CONTEXT context = (POVS_IP_HELPER_THREAD_CONTEXT)data;
+ POVS_IP_HELPER_REQUEST req;
+ POVS_IPNEIGH_ENTRY ipn;
+ PLIST_ENTRY link;
+ UINT64 timeVal, timeout;
+
+ OVS_LOG_INFO("Start the IP Helper Thread, context: %p", context);
+
+ NdisAcquireSpinLock(&ovsIpHelperLock);
+ while (!context->exit) {
+
+ timeout = 0;
+ while (!IsListEmpty(&ovsIpHelperRequestList)) {
+ if (context->exit) {
+ goto ip_helper_wait;
+ }
+ link = ovsIpHelperRequestList.Flink;
+ RemoveEntryList(link);
+ NdisReleaseSpinLock(&ovsIpHelperLock);
+ req = CONTAINING_RECORD(link, OVS_IP_HELPER_REQUEST, link);
+ switch (req->command) {
+ case OVS_IP_HELPER_INTERNAL_ADAPTER_UP:
+ OvsHandleInternalAdapterUp(req);
+ break;
+ case OVS_IP_HELPER_FWD_REQUEST:
+ OvsHandleFwdRequest(req);
+ break;
+ default:
+ OvsFreeMemory(req);
+ }
+ NdisAcquireSpinLock(&ovsIpHelperLock);
+ }
+
+ /* for now, let us hold the lock here, if this cause any issue
+ * we will change to use IpHelper lock only to protect
+ * IPN
+ */
+ while (!IsListEmpty(&ovsSortedIPNeighList)) {
+ UINT32 ipAddr;
+ if (context->exit) {
+ goto ip_helper_wait;
+ }
+ link = ovsSortedIPNeighList.Flink;
+ ipn = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, slink);
+ KeQuerySystemTime((LARGE_INTEGER *)&timeVal);
+ if (ipn->timeout > timeVal) {
+ timeout = ipn->timeout;
+ break;
+ }
+ ipAddr = ipn->ipAddr;
+
+ NdisReleaseSpinLock(&ovsIpHelperLock);
+
+ OvsHandleIPNeighTimeout(ipAddr);
+
+ NdisAcquireSpinLock(&ovsIpHelperLock);
+ }
+ if (!IsListEmpty(&ovsIpHelperRequestList)) {
+ continue;
+ }
+
+ip_helper_wait:
+ if (context->exit) {
+ break;
+ }
+
+ KeClearEvent(&context->event);
+ NdisReleaseSpinLock(&ovsIpHelperLock);
+
+ KeWaitForSingleObject(&context->event, Executive, KernelMode,
+ FALSE, (LARGE_INTEGER *)&timeout);
+ NdisAcquireSpinLock(&ovsIpHelperLock);
+ }
+ NdisReleaseSpinLock(&ovsIpHelperLock);
+ OvsCleanupFwdTable();
+ OvsCleanupIpHelperRequestList();
+
+ OVS_LOG_INFO("Terminating the OVS IP Helper system thread");
+
+ PsTerminateSystemThread(STATUS_SUCCESS);
+}
+
+
+NTSTATUS
+OvsInitIpHelper(NDIS_HANDLE ndisFilterHandle)
+{
+ NTSTATUS status;
+ HANDLE threadHandle;
+ UINT32 i;
+
+ ovsFwdHashTable = (PLIST_ENTRY)OvsAllocateMemory(sizeof(LIST_ENTRY) *
+ OVS_FWD_HASH_TABLE_SIZE);
+
+ ovsRouteHashTable = (PLIST_ENTRY)OvsAllocateMemory(sizeof(LIST_ENTRY) *
+ OVS_ROUTE_HASH_TABLE_SIZE);
+
+ ovsNeighHashTable = (PLIST_ENTRY)OvsAllocateMemory(sizeof(LIST_ENTRY) *
+ OVS_NEIGH_HASH_TABLE_SIZE);
+
+ RtlZeroMemory(&ovsInternalRow, sizeof(MIB_IF_ROW2));
+ RtlZeroMemory(&ovsInternalIPRow, sizeof (MIB_IPINTERFACE_ROW));
+ ovsInternalIP = 0;
+
+ ovsInternalPortNo = OVS_DEFAULT_PORT_NO;
+
+ InitializeListHead(&ovsSortedIPNeighList);
+
+ ovsTableLock = NdisAllocateRWLock(ndisFilterHandle);
+ NdisAllocateSpinLock(&ovsIpHelperLock);
+
+ InitializeListHead(&ovsIpHelperRequestList);
+ ovsNumIpHelperRequests = 0;
+ ipInterfaceNotificationHandle = NULL;
+ ipRouteNotificationHandle = NULL;
+ unicastIPNotificationHandle = NULL;
+
+ if (ovsFwdHashTable == NULL ||
+ ovsRouteHashTable == NULL ||
+ ovsNeighHashTable == NULL ||
+ ovsTableLock == NULL) {
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ goto init_cleanup;
+ }
+
+ for (i = 0; i < OVS_FWD_HASH_TABLE_SIZE; i++) {
+ InitializeListHead(&ovsFwdHashTable[i]);
+ }
+
+ for (i = 0; i < OVS_ROUTE_HASH_TABLE_SIZE; i++) {
+ InitializeListHead(&ovsRouteHashTable[i]);
+ }
+
+ for (i = 0; i < OVS_NEIGH_HASH_TABLE_SIZE; i++) {
+ InitializeListHead(&ovsNeighHashTable[i]);
+ }
+
+
+ KeInitializeEvent(&ovsIpHelperThreadContext.event, NotificationEvent,
+ FALSE);
+ status = OvsRegisterChangeNotification();
+ ovsIpHelperThreadContext.exit = 0;
+ if (status == STATUS_SUCCESS) {
+ status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE,
+ NULL, NULL, NULL, OvsStartIpHelper,
+ &ovsIpHelperThreadContext);
+ if (status != STATUS_SUCCESS) {
+ goto init_cleanup;
+ }
+ ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL,
+ KernelMode,
+ &ovsIpHelperThreadContext.threadObject,
+ NULL);
+ ZwClose(threadHandle);
+ }
+
+init_cleanup:
+
+ if (status != STATUS_SUCCESS) {
+ OvsCancelChangeNotification();
+ if (ovsFwdHashTable) {
+ OvsFreeMemory(ovsFwdHashTable);
+ ovsFwdHashTable = NULL;
+ }
+ if (ovsRouteHashTable) {
+ OvsFreeMemory(ovsRouteHashTable);
+ ovsRouteHashTable = NULL;
+ }
+ if (ovsNeighHashTable) {
+ OvsFreeMemory(ovsNeighHashTable);
+ ovsNeighHashTable = NULL;
+ }
+ if (ovsTableLock) {
+ NdisFreeRWLock(ovsTableLock);
+ ovsTableLock = NULL;
+ }
+ NdisFreeSpinLock(&ovsIpHelperLock);
+ }
+ return STATUS_SUCCESS;
+}
+
+
+VOID
+OvsCleanupIpHelper(VOID)
+{
+ OvsCancelChangeNotification();
+
+ NdisAcquireSpinLock(&ovsIpHelperLock);
+ ovsIpHelperThreadContext.exit = 1;
+ OvsWakeupIPHelper();
+ NdisReleaseSpinLock(&ovsIpHelperLock);
+
+ KeWaitForSingleObject(ovsIpHelperThreadContext.threadObject, Executive,
+ KernelMode, FALSE, NULL);
+ ObDereferenceObject(ovsIpHelperThreadContext.threadObject);
+
+ OvsFreeMemory(ovsFwdHashTable);
+ OvsFreeMemory(ovsRouteHashTable);
+ OvsFreeMemory(ovsNeighHashTable);
+
+ NdisFreeRWLock(ovsTableLock);
+ NdisFreeSpinLock(&ovsIpHelperLock);
+}
+
+VOID
+OvsCancelFwdIpHelperRequest(PNET_BUFFER_LIST nbl)
+{
+ PLIST_ENTRY link, next;
+ POVS_IP_HELPER_REQUEST req;
+ LIST_ENTRY list;
+ InitializeListHead(&list);
+
+ NdisAcquireSpinLock(&ovsIpHelperLock);
+ LIST_FORALL_SAFE(&ovsIpHelperRequestList, link, next) {
+ req = CONTAINING_RECORD(link, OVS_IP_HELPER_REQUEST, link);
+ if (req->command == OVS_IP_HELPER_FWD_REQUEST &&
+ (nbl == NULL || req->fwdReq.nbl == nbl)) {
+ RemoveEntryList(link);
+ InsertHeadList(&list, link);
+ if (nbl != NULL) {
+ break;
+ }
+ }
+ }
+ NdisReleaseSpinLock(&ovsIpHelperLock);
+
+ LIST_FORALL_SAFE(&list, link, next) {
+ req = CONTAINING_RECORD(link, OVS_IP_HELPER_REQUEST, link);
+ if (req->fwdReq.cb) {
+ req->fwdReq.cb(req->fwdReq.nbl, req->fwdReq.inPort,
+ &req->fwdReq.tunnelKey,
+ req->fwdReq.cbData1,
+ req->fwdReq.cbData2,
+ STATUS_DEVICE_NOT_READY,
+ NULL);
+ }
+ OvsFreeMemory(req);
+ }
+}
diff --git a/datapath-windows/ovsext/OvsIpHelper.h b/datapath-windows/ovsext/OvsIpHelper.h
new file mode 100644
index 000000000..dc2602bde
--- /dev/null
+++ b/datapath-windows/ovsext/OvsIpHelper.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_IP_HELPER_H_
+#define __OVS_IP_HELPER_H_ 1
+
+#include <ntddk.h>
+#include <netioapi.h>
+
+#define OVS_FWD_HASH_TABLE_SIZE ((UINT32)1 << 10)
+#define OVS_FWD_HASH_TABLE_MASK (OVS_FWD_HASH_TABLE_SIZE - 1)
+
+#define OVS_ROUTE_HASH_TABLE_SIZE ((UINT32)1 << 8)
+#define OVS_ROUTE_HASH_TABLE_MASK (OVS_ROUTE_HASH_TABLE_SIZE - 1)
+
+#define OVS_NEIGH_HASH_TABLE_SIZE ((UINT32)1 << 8)
+#define OVS_NEIGH_HASH_TABLE_MASK (OVS_NEIGH_HASH_TABLE_SIZE - 1)
+
+#define OVS_IPNEIGH_TIMEOUT 100000000 // 10 s
+
+
+typedef struct _OVS_IPNEIGH_ENTRY {
+ UINT8 macAddr[MAC_ADDRESS_LEN];
+ UINT16 refCount;
+ UINT32 ipAddr;
+ UINT32 pad;
+ UINT64 timeout;
+ LIST_ENTRY link;
+ LIST_ENTRY slink;
+ LIST_ENTRY fwdList;
+} OVS_IPNEIGH_ENTRY, *POVS_IPNEIGH_ENTRY;
+
+typedef struct _OVS_IPFORWARD_ENTRY {
+ IP_ADDRESS_PREFIX prefix;
+ UINT32 nextHop;
+ UINT16 refCount;
+ LIST_ENTRY link;
+ LIST_ENTRY fwdList;
+} OVS_IPFORWARD_ENTRY, *POVS_IPFORWARD_ENTRY;
+
+typedef union _OVS_FWD_INFO {
+ struct {
+ UINT32 dstIpAddr;
+ UINT32 srcIpAddr;
+ UINT8 dstMacAddr[MAC_ADDRESS_LEN];
+ UINT8 srcMacAddr[MAC_ADDRESS_LEN];
+ UINT32 srcPortNo;
+ };
+ UINT64 value[3];
+} OVS_FWD_INFO, *POVS_FWD_INFO;
+
+typedef struct _OVS_FWD_ENTRY {
+ OVS_FWD_INFO info;
+ POVS_IPFORWARD_ENTRY ipf;
+ POVS_IPNEIGH_ENTRY ipn;
+ LIST_ENTRY link;
+ LIST_ENTRY ipfLink;
+ LIST_ENTRY ipnLink;
+} OVS_FWD_ENTRY, *POVS_FWD_ENTRY;
+
+
+enum {
+ OVS_IP_HELPER_INTERNAL_ADAPTER_UP,
+ OVS_IP_HELPER_FWD_REQUEST,
+};
+
+typedef VOID (*OvsIPHelperCallback)(PNET_BUFFER_LIST nbl,
+ UINT32 inPort,
+ PVOID tunnelKey,
+ PVOID cbData1,
+ PVOID cbData2,
+ NTSTATUS status,
+ POVS_FWD_INFO fwdInfo);
+
+typedef struct _OVS_FWD_REQUEST_INFO {
+ PNET_BUFFER_LIST nbl;
+ UINT32 inPort;
+ OvsIPv4TunnelKey tunnelKey;
+ OvsIPHelperCallback cb;
+ PVOID cbData1;
+ PVOID cbData2;
+} OVS_FWD_REQUEST_INFO, *POVS_FWD_REQUEST_INFO;
+
+
+typedef struct _OVS_IP_HELPER_REQUEST {
+ LIST_ENTRY link;
+ UINT32 command;
+ union {
+ OVS_FWD_REQUEST_INFO fwdReq;
+ UINT32 dummy;
+ };
+} OVS_IP_HELPER_REQUEST, *POVS_IP_HELPER_REQUEST;
+
+
+typedef struct _OVS_IP_HELPER_THREAD_CONTEXT {
+ KEVENT event;
+ PVOID threadObject;
+ UINT32 exit;
+} OVS_IP_HELPER_THREAD_CONTEXT, *POVS_IP_HELPER_THREAD_CONTEXT;
+
+NTSTATUS OvsInitIpHelper(NDIS_HANDLE ndisFilterHandle);
+VOID OvsCleanupIpHelper(VOID);
+
+VOID OvsInternalAdapterUp(UINT32 portNo, GUID *netCfgInstanceId);
+VOID OvsInternalAdapterDown(VOID);
+
+NTSTATUS OvsFwdIPHelperRequest(PNET_BUFFER_LIST nbl, UINT32 inPort,
+ const PVOID tunnelKey,
+ OvsIPHelperCallback cb,
+ PVOID cbData1,
+ PVOID cbData2);
+NTSTATUS OvsLookupIPFwdInfo(UINT32 dstIp, POVS_FWD_INFO info);
+VOID OvsCancelFwdIpHelperRequest(PNET_BUFFER_LIST nbl);
+
+#endif /* __OVS_IP_HELPER_H_ */
diff --git a/datapath-windows/ovsext/OvsJhash.c b/datapath-windows/ovsext/OvsJhash.c
new file mode 100644
index 000000000..db08d0b46
--- /dev/null
+++ b/datapath-windows/ovsext/OvsJhash.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2008, 2009, 2010, 2012, 2014 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "precomp.h"
+
+static __inline UINT32
+GetUnalignedU32(const UINT32 *p_)
+{
+ const UINT8 *p = (const UINT8 *)p_;
+ return ntohl((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]);
+}
+
+/* This is the public domain lookup3 hash by Bob Jenkins from
+ * http://burtleburtle.net/bob/c/lookup3.c, modified for style. */
+
+static __inline UINT32
+JhashRot(UINT32 x, INT k)
+{
+ return (x << k) | (x >> (32 - k));
+}
+
+static __inline VOID
+JhashMix(UINT32 *a, UINT32 *b, UINT32 *c)
+{
+ *a -= *c; *a ^= JhashRot(*c, 4); *c += *b;
+ *b -= *a; *b ^= JhashRot(*a, 6); *a += *c;
+ *c -= *b; *c ^= JhashRot(*b, 8); *b += *a;
+ *a -= *c; *a ^= JhashRot(*c, 16); *c += *b;
+ *b -= *a; *b ^= JhashRot(*a, 19); *a += *c;
+ *c -= *b; *c ^= JhashRot(*b, 4); *b += *a;
+}
+
+static __inline VOID
+JhashFinal(UINT32 *a, UINT32 *b, UINT32 *c)
+{
+ *c ^= *b; *c -= JhashRot(*b, 14);
+ *a ^= *c; *a -= JhashRot(*c, 11);
+ *b ^= *a; *b -= JhashRot(*a, 25);
+ *c ^= *b; *c -= JhashRot(*b, 16);
+ *a ^= *c; *a -= JhashRot(*c, 4);
+ *b ^= *a; *b -= JhashRot(*a, 14);
+ *c ^= *b; *c -= JhashRot(*b, 24);
+}
+
+/* Returns the Jenkins hash of the 'n' 32-bit words at 'p', starting from
+ * 'basis'. 'p' must be properly aligned.
+ *
+ * Use hash_words() instead, unless you're computing a hash function whose
+ * value is exposed "on the wire" so we don't want to change it. */
+UINT32
+OvsJhashWords(const UINT32 *p, SIZE_T n, UINT32 basis)
+{
+ UINT32 a, b, c;
+
+ a = b = c = 0xdeadbeef + (((UINT32) n) << 2) + basis;
+
+ while (n > 3) {
+ a += p[0];
+ b += p[1];
+ c += p[2];
+ JhashMix(&a, &b, &c);
+ n -= 3;
+ p += 3;
+ }
+
+ switch (n) {
+ case 3:
+ c += p[2];
+ /* fall through */
+ case 2:
+ b += p[1];
+ /* fall through */
+ case 1:
+ a += p[0];
+ JhashFinal(&a, &b, &c);
+ /* fall through */
+ case 0:
+ break;
+ }
+ return c;
+}
+
+/* Returns the Jenkins hash of the 'n' bytes at 'p', starting from 'basis'.
+ *
+ * Use hash_bytes() instead, unless you're computing a hash function whose
+ * value is exposed "on the wire" so we don't want to change it. */
+UINT32
+OvsJhashBytes(const VOID *p_, SIZE_T n, UINT32 basis)
+{
+ const UINT32 *p = p_;
+ UINT32 a, b, c;
+
+ a = b = c = 0xdeadbeef + (UINT32)n + basis;
+
+ while (n >= 12) {
+ a += GetUnalignedU32(p);
+ b += GetUnalignedU32(p + 1);
+ c += GetUnalignedU32(p + 2);
+ JhashMix(&a, &b, &c);
+ n -= 12;
+ p += 3;
+ }
+
+ if (n) {
+ UINT32 tmp[3];
+
+ tmp[0] = tmp[1] = tmp[2] = 0;
+ memcpy(tmp, p, n);
+ a += tmp[0];
+ b += tmp[1];
+ c += tmp[2];
+ JhashFinal(&a, &b, &c);
+ }
+
+ return c;
+}
diff --git a/datapath-windows/ovsext/OvsJhash.h b/datapath-windows/ovsext/OvsJhash.h
new file mode 100644
index 000000000..a12be8e27
--- /dev/null
+++ b/datapath-windows/ovsext/OvsJhash.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2008, 2009, 2010, 2012, 2014 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_JHASH_H_
+#define __OVS_JHASH_H_ 1
+
+/* This is the public domain lookup3 hash by Bob Jenkins from
+ * http://burtleburtle.net/bob/c/lookup3.c, modified for style.
+ *
+ * Use the functions in hash.h instead if you can. These are here just for
+ * places where we've exposed a hash function "on the wire" and don't want it
+ * to change. */
+
+uint32_t OvsJhashWords(const uint32_t *, size_t n_word, uint32_t basis);
+uint32_t OvsJhashBytes(const void *, size_t n_bytes, uint32_t basis);
+
+#endif /* __OVS_JHASH_H */
diff --git a/datapath-windows/ovsext/OvsNetProto.h b/datapath-windows/ovsext/OvsNetProto.h
new file mode 100644
index 000000000..5e98206f5
--- /dev/null
+++ b/datapath-windows/ovsext/OvsNetProto.h
@@ -0,0 +1,368 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_NET_PROTO_H_
+#define __OVS_NET_PROTO_H_ 1
+
+#include "precomp.h"
+#include "OvsEth.h"
+
+#define ETH_ADDR_LENGTH 6
+/*
+ * There is a more inclusive definition of ethernet header (Eth_Header) in
+ * OvsEth.h that is used for packet parsing. For simple cases, , use the following definition.
+ */
+typedef struct EthHdr {
+ UINT8 Destination[ETH_ADDR_LENGTH];
+ UINT8 Source[ETH_ADDR_LENGTH];
+ UINT16 Type;
+} EthHdr, *PEthHdr;
+
+#define IPV4 4
+#define IPV6 6
+
+#define IP_HDR_MIN_LENGTH 20
+#define TCP_HDR_MIN_LENGTH 20
+#define TCP_CSUM_OFFSET 16
+#define UDP_CSUM_OFFSET 6
+#define ICMP_CSUM_OFFSET 2
+#define INET_CSUM_LENGTH (sizeof(UINT16))
+
+#define IP4_UNITS_TO_BYTES(x) ((x) << 2)
+#define IP4_BYTES_TO_UNITS(x) ((x) >> 2)
+
+// length unit for ip->ihl, tcp->doff
+typedef UINT32 IP4UnitLength;
+
+#define IP4_LENGTH_UNIT (sizeof(IP4UnitLength))
+#define IP4_HDR_MIN_LENGTH_IN_UNITS (IP_HDR_MIN_LENGTH / IP4_LENGTH_UNIT)
+#define TCP_HDR_MIN_LENGTH_IN_UNITS (TCP_HDR_MIN_LENGTH / IP4_LENGTH_UNIT)
+
+#define IP4_IHL_NO_OPTIONS IP4_HDR_MIN_LENGTH_IN_UNITS
+#define IP4_HDR_LEN(iph) IP4_UNITS_TO_BYTES((iph)->ihl)
+
+// length unit for ip->frag_off
+typedef UINT64 IP4FragUnitLength;
+
+#define IP4_FRAG_UNIT_LENGTH (sizeof(IP4FragUnitLength))
+
+// length UINT for ipv6 header length.
+typedef UINT64 IP6UnitLength;
+
+#define TCP_HDR_LEN(tcph) IP4_UNITS_TO_BYTES((tcph)->doff)
+#define TCP_DATA_LENGTH(iph, tcph) (ntohs(iph->tot_len) - \
+ IP4_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
+
+#define TCP_DATA_OFFSET_NO_OPTIONS TCP_HDR_MIN_LENGTH_IN_UNITS
+#define TCP_DATA_OFFSET_WITH_TIMESTAMP 8
+
+/*
+ * This is the maximum value for the length field in the IP header. The meaning
+ * varies with IP protocols:
+ * IPv4: the total ip length (including ip header and extention)
+ * IPv6: the IP payload length (including IP extensions)
+ */
+#define IP_MAX_PACKET 0xFFFF
+
+#define IPPROTO_ICMP 1
+#define IPPROTO_IGMP 2
+#define IPPROTO_UDP 17
+#define IPPROTO_TCP 6
+#define IPPROTO_RSVD 0xff
+
+#define IPPROTO_HOPOPTS 0 /* Hop-by-hop option header */
+#define IPPROTO_IPV6 41 /* IPv6 in IPv6 */
+#define IPPROTO_ROUTING 43 /* Routing header */
+#define IPPROTO_FRAGMENT 44 /* Fragmentation/reassembly header */
+#define IPPROTO_GRE 47 /* General Routing Encapsulation */
+#define IPPROTO_ESP 50 /* Encap. Security Payload */
+#define IPPROTO_AH 51 /* Authentication header */
+#define IPPROTO_ICMPV6 58 /* ICMP for IPv6 */
+#define IPPROTO_NONE 59 /* No next header */
+#define IPPROTO_DSTOPTS 60 /* Destination options header */
+#define IPPROTO_ETHERIP 97 /* etherIp tunneled protocol */
+
+/* ICMPv6 types. */
+#define ND_NEIGHBOR_SOLICIT 135 /* neighbor solicitation */
+#define ND_NEIGHBOR_ADVERT 136 /* neighbor advertisment */
+
+/* IPv6 Neighbor discovery option header. */
+#define ND_OPT_SOURCE_LINKADDR 1
+#define ND_OPT_TARGET_LINKADDR 2
+
+/* Collides with MS definition (opposite order) */
+#define IP6F_OFF_HOST_ORDER_MASK 0xfff8
+
+#define ARPOP_REQUEST 1 /* ARP request. */
+#define ARPOP_REPLY 2 /* ARP reply. */
+#define RARPOP_REQUEST 3 /* RARP request. */
+#define RARPOP_REPLY 4 /* RARP reply. */
+
+ /* all ARP NBO's assume short ar_op */
+#define ARPOP_REQUEST_NBO 0x0100 /* NBO ARP request. */
+#define ARPOP_REPLY_NBO 0x0200 /* NBO ARP reply. */
+#define RARPOP_REQUEST_NBO 0x0300 /* NBO RARP request. */
+#define RARPOP_REPLY_NBO 0x0300 /* NBO RARP reply. */
+
+#define ICMP_ECHO 8 /* Echo Request */
+#define ICMP_ECHOREPLY 0 /* Echo Reply */
+#define ICMP_DEST_UNREACH 3 /* Destination Unreachable */
+
+/* IGMP related constants */
+#define IGMP_UNKNOWN 0x00 /* For IGMP packets where we don't know the type */
+ /* Eg: Fragmented packets without the header */
+
+/* Constants from RFC 3376 */
+#define IGMP_QUERY 0x11 /* IGMP Host Membership Query. */
+#define IGMP_V1REPORT 0x12 /* IGMPv1 Host Membership Report. */
+#define IGMP_V2REPORT 0x16 /* IGMPv2 Host Membership Report. */
+#define IGMP_V3REPORT 0x22 /* IGMPv3 Host Membership Report. */
+#define IGMP_V2LEAVE 0x17 /* IGMPv2 Leave. */
+
+/* Constants from RFC 2710 and RFC 3810 */
+#define MLD_QUERY 0x82 /* Multicast Listener Query. */
+#define MLD_V1REPORT 0x83 /* Multicast Listener V1 Report. */
+#define MLD_V2REPORT 0x8F /* Multicast Listener V2 Report. */
+#define MLD_DONE 0x84 /* Multicast Listener Done. */
+
+/* IPv4 offset flags */
+#define IP_CE 0x8000 /* Flag: "Congestion" */
+#define IP_DF 0x4000 /* Flag: "Don't Fragment" */
+#define IP_MF 0x2000 /* Flag: "More Fragments" */
+#define IP_OFFSET 0x1FFF /* "Fragment Offset" part */
+
+#define IP_OFFSET_NBO 0xFF1F /* "Fragment Offset" part, NBO */
+#define IP_DF_NBO 0x0040 /* NBO version of don't fragment */
+#define IP_MF_NBO 0x0020 /* NBO version of more fragments */
+
+#define IPOPT_RTRALT 0x94
+
+/* IP Explicit Congestion Notification bits (TOS field) */
+#define IP_ECN_NOT_ECT 0
+#define IP_ECN_ECT_1 1
+#define IP_ECN_ECT_0 2
+#define IP_ECN_CE 3
+#define IP_ECN_MASK 3
+
+/* TCP options */
+#define TCP_OPT_NOP 1 /* Padding */
+#define TCP_OPT_EOL 0 /* End of options */
+#define TCP_OPT_MSS 2 /* Segment size negotiating */
+#define TCP_OPT_WINDOW 3 /* Window scaling */
+#define TCP_OPT_SACK_PERM 4 /* SACK Permitted */
+#define TCP_OPT_SACK 5 /* SACK Block */
+#define TCP_OPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
+#define TCP_OPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
+
+#define TCP_OPT_LEN_MSS 4
+#define TCP_OPT_LEN_WINDOW 3
+#define TCP_OPT_LEN_SACK_PERM 2
+#define TCP_OPT_LEN_TIMESTAMP 10
+#define TCP_OPT_LEN_MD5SIG 18
+
+#define SOCKET_IPPROTO_HOPOPTS IPPROTO_HOPOPTS
+#define SOCKET_IPPROTO_ROUTING IPPROTO_ROUTING
+#define SOCKET_IPPROTO_FRAGMENT IPPROTO_FRAGMENT
+#define SOCKET_IPPROTO_AH IPPROTO_AH
+#define SOCKET_IPPROTO_ICMPV6 IPPROTO_ICMPV6
+#define SOCKET_IPPROTO_NONE IPPROTO_NONE
+#define SOCKET_IPPROTO_DSTOPTS IPPROTO_DSTOPTS
+#define SOCKET_IPPROTO_EON 80
+#define SOCKET_IPPROTO_ETHERIP IPPROTO_ETHERIP
+#define SOCKET_IPPROTO_ENCAP 98
+#define SOCKET_IPPROTO_PIM 103
+#define SOCKET_IPPROTO_IPCOMP 108
+#define SOCKET_IPPROTO_CARP 112
+#define SOCKET_IPPROTO_PFSYNC 240
+#define SOCKET_IPPROTO_RAW IPPROTO_RSVD
+
+typedef union _OVS_PACKET_HDR_INFO {
+ struct {
+ UINT16 l3Offset;
+ UINT16 l4Offset;
+ union {
+ UINT16 l7Offset;
+ UINT16 l4PayLoad;
+ };
+ UINT16 isIPv4:1;
+ UINT16 isIPv6:1;
+ UINT16 isTcp:1;
+ UINT16 isUdp:1;
+ UINT16 tcpCsumNeeded:1;
+ UINT16 udpCsumNeeded:1;
+ UINT16 udpCsumZero:1;
+ UINT16 pad:9;
+ } ;
+ UINT64 value;
+} OVS_PACKET_HDR_INFO, *POVS_PACKET_HDR_INFO;
+
+typedef struct IPHdr {
+ UINT8 ihl:4,
+ version:4;
+ UINT8 tos;
+ UINT16 tot_len;
+ UINT16 id;
+ UINT16 frag_off;
+ UINT8 ttl;
+ UINT8 protocol;
+ UINT16 check;
+ UINT32 saddr;
+ UINT32 daddr;
+} IPHdr;
+
+
+ /*
+ * IPv6 fixed header
+ *
+ * BEWARE, it is incorrect. The first 4 bits of flow_lbl
+ * are glued to priority now, forming "class".
+ */
+
+typedef struct IPv6Hdr {
+ UINT8 priority:4,
+ version:4;
+ UINT8 flow_lbl[3];
+
+ UINT16 payload_len;
+ UINT8 nexthdr;
+ UINT8 hop_limit;
+
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+} IPv6Hdr;
+
+// Generic IPv6 extension header
+typedef struct IPv6ExtHdr {
+ UINT8 nextHeader; // type of the next header
+ UINT8 hdrExtLen; // length of header extensions (beyond 8 bytes)
+ UINT16 optPad1;
+ UINT32 optPad2;
+} IPv6ExtHdr;
+
+typedef struct IPv6FragHdr {
+ UINT8 nextHeader;
+ UINT8 reserved;
+ UINT16 offlg;
+ UINT32 ident;
+} IPv6FragHdr;
+
+typedef struct IPv6NdOptHdr {
+ UINT8 type;
+ UINT8 len;
+} IPv6NdOptHdr;
+
+typedef struct ICMPHdr {
+ UINT8 type;
+ UINT8 code;
+ UINT16 checksum;
+} ICMPHdr;
+
+typedef struct ICMPEcho {
+ UINT16 id;
+ UINT16 seq;
+} ICMPEcho;
+
+typedef struct UDPHdr {
+ UINT16 source;
+ UINT16 dest;
+ UINT16 len;
+ UINT16 check;
+} UDPHdr;
+
+typedef struct TCPHdr {
+ UINT16 source;
+ UINT16 dest;
+ UINT32 seq;
+ UINT32 ack_seq;
+ UINT16 res1:4,
+ doff:4,
+ fin:1,
+ syn:1,
+ rst:1,
+ psh:1,
+ ack:1,
+ urg:1,
+ ece:1,
+ cwr:1;
+ UINT16 window;
+ UINT16 check;
+ UINT16 urg_ptr;
+} TCPHdr;
+
+typedef struct PseudoHdr {
+ UINT32 sourceIPAddr;
+ UINT32 destIPAddr;
+ UINT8 zero;
+ UINT8 protocol;
+ UINT16 length;
+} PseudoHdr;
+
+typedef struct PseudoHdrIPv6 {
+ UINT8 sourceIPAddr[16];
+ UINT8 destIPAddr[16];
+ UINT8 zero;
+ UINT8 protocol;
+ UINT16 length;
+} PseudoHdrIPv6;
+
+
+struct ArpHdr {
+ UINT16 ar_hrd; /* Format of hardware address. */
+ UINT16 ar_pro; /* Format of protocol address. */
+ UINT8 ar_hln; /* Length of hardware address. */
+ UINT8 ar_pln; /* Length of protocol address. */
+ UINT16 ar_op; /* ARP opcode (command). */
+};
+
+typedef struct EtherArp {
+ struct ArpHdr ea_hdr; /* fixed-size header */
+ Eth_Address arp_sha; /* sender hardware address */
+ UINT8 arp_spa[4]; /* sender protocol address */
+ Eth_Address arp_tha; /* target hardware address */
+ UINT8 arp_tpa[4]; /* target protocol address */
+} EtherArp;
+
+typedef struct IGMPHdr {
+ UINT8 type;
+ UINT8 maxResponseTime;
+ UINT16 csum;
+ UINT8 groupAddr[4];
+} IGMPHdr;
+
+typedef struct IGMPV3Trailer {
+ UINT8 qrv:3,
+ s:1,
+ resv:4;
+ UINT8 qqic;
+ UINT16 numSources;
+} IGMPV3Trailer;
+
+typedef struct IPOpt {
+ UINT8 type;
+ UINT8 length;
+ UINT16 value;
+} IPOpt;
+
+/*
+ * IP protocol types
+ */
+#define SOCKET_IPPROTO_IP 0
+#define SOCKET_IPPROTO_ICMP 1
+#define SOCKET_IPPROTO_TCP 6
+#define SOCKET_IPPROTO_UDP 17
+#define SOCKET_IPPROTO_GRE 47
+
+#endif /* __OVS_NET_PROTO_H_ */
diff --git a/datapath-windows/ovsext/OvsOid.c b/datapath-windows/ovsext/OvsOid.c
new file mode 100644
index 000000000..487191ad1
--- /dev/null
+++ b/datapath-windows/ovsext/OvsOid.c
@@ -0,0 +1,855 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "precomp.h"
+#include "OvsIoctl.h"
+#include "OvsSwitch.h"
+#include "OvsVport.h"
+#include "OvsNetProto.h"
+#include "OvsUser.h"
+#include "OvsFlow.h"
+#include "OvsEvent.h"
+#include "OvsUser.h"
+#include "OvsOid.h"
+
+/* Due to an imported header file */
+#pragma warning( disable:4505 )
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_DISPATCH
+#include "OvsDebug.h"
+
+typedef struct _OVS_OID_CONTEXT {
+ NDIS_EVENT oidComplete;
+ NDIS_STATUS status;
+} OVS_OID_CONTEXT, *POVS_OID_CONTEXT;
+
+
+VOID
+OvsExtOidRequestComplete(NDIS_HANDLE filterModuleContext,
+ PNDIS_OID_REQUEST oidRequest,
+ NDIS_STATUS status);
+static VOID
+OvsOidRequestCompleteMethod(POVS_SWITCH_CONTEXT switchObject,
+ PNDIS_OID_REQUEST oidRequest,
+ PNDIS_OID_REQUEST origOidRequest,
+ NDIS_STATUS status);
+static VOID
+OvsOidRequestCompleteSetInfo(POVS_SWITCH_CONTEXT switchObject,
+ PNDIS_OID_REQUEST oidRequest,
+ PNDIS_OID_REQUEST origOidRequest,
+ NDIS_STATUS status);
+static VOID
+OvsOidRequestCompleteQuery(POVS_SWITCH_CONTEXT switchObject,
+ PNDIS_OID_REQUEST oidRequest,
+ PNDIS_OID_REQUEST origOidRequest,
+ NDIS_STATUS status);
+
+static NDIS_STATUS
+OvsProcessSetOidPortProp(POVS_SWITCH_CONTEXT switchObject,
+ PNDIS_OID_REQUEST oidRequest);
+static NDIS_STATUS
+OvsProcessSetOidPort(POVS_SWITCH_CONTEXT switchObject,
+ PNDIS_OID_REQUEST oidRequest);
+static NDIS_STATUS
+OvsProcessSetOidNic(POVS_SWITCH_CONTEXT switchObject,
+ PNDIS_OID_REQUEST oidRequest);
+
+__inline BOOLEAN
+OvsCheckOidHeaderFunc(PNDIS_OBJECT_HEADER header,
+ LONG propRev,
+ LONG propSize)
+{
+ return header->Type != NDIS_OBJECT_TYPE_DEFAULT ||
+ header->Revision < propRev ||
+ header->Size < propSize;
+}
+
+#define OvsCheckOidHeader(_hdr, _rev) \
+ OvsCheckOidHeaderFunc(_hdr, _rev, ##NDIS_SIZEOF_##_rev)
+
+static __inline VOID
+OvsOidSetOrigRequest(PNDIS_OID_REQUEST clonedRequest,
+ PNDIS_OID_REQUEST origRequest)
+{
+ *(PVOID*)(&clonedRequest->SourceReserved[0]) = origRequest;
+}
+
+static __inline PNDIS_OID_REQUEST
+OvsOidGetOrigRequest(PNDIS_OID_REQUEST clonedRequest)
+{
+ return *((PVOID*)(&clonedRequest->SourceReserved[0]));
+}
+
+static __inline VOID
+OvsOidSetContext(PNDIS_OID_REQUEST clonedRequest,
+ POVS_OID_CONTEXT origRequest)
+{
+ *(PVOID*)(&clonedRequest->SourceReserved[8]) = origRequest;
+}
+
+static __inline POVS_OID_CONTEXT
+OvsOidGetContext(PNDIS_OID_REQUEST clonedRequest)
+{
+ return *((PVOID*)(&clonedRequest->SourceReserved[8]));
+}
+
+static NDIS_STATUS
+OvsProcessSetOidPortProp(POVS_SWITCH_CONTEXT switchObject,
+ PNDIS_OID_REQUEST oidRequest)
+{
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+ struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION);
+ PNDIS_SWITCH_PORT_PROPERTY_PARAMETERS portPropParam =
+ setInfo->InformationBuffer;
+ BOOLEAN checkFailed = TRUE;
+
+ UNREFERENCED_PARAMETER(switchObject);
+
+ if (setInfo->Oid == OID_SWITCH_PORT_PROPERTY_DELETE) {
+ checkFailed = OvsCheckOidHeader(
+ (PNDIS_OBJECT_HEADER)portPropParam,
+ NDIS_SWITCH_PORT_PROPERTY_DELETE_PARAMETERS_REVISION_1);
+ } else {
+ /* it must be a add or update request */
+ checkFailed = OvsCheckOidHeader(
+ (PNDIS_OBJECT_HEADER)portPropParam,
+ NDIS_SWITCH_PORT_PROPERTY_PARAMETERS_REVISION_1);
+ }
+
+ if (checkFailed) {
+ status = NDIS_STATUS_INVALID_PARAMETER;
+ goto done;
+ }
+
+ if (portPropParam->PropertyType == NdisSwitchPortPropertyTypeVlan) {
+ status = NDIS_STATUS_NOT_SUPPORTED;
+ goto done;
+ }
+
+done:
+ return status;
+}
+
+static NDIS_STATUS
+OvsProcessSetOidPort(POVS_SWITCH_CONTEXT switchObject,
+ PNDIS_OID_REQUEST oidRequest)
+{
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+ struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION);
+ PNDIS_SWITCH_PORT_PARAMETERS portParam = setInfo->InformationBuffer;
+
+ if (OvsCheckOidHeader((PNDIS_OBJECT_HEADER)portParam,
+ NDIS_SWITCH_PORT_PARAMETERS_REVISION_1)) {
+ status = NDIS_STATUS_NOT_SUPPORTED;
+ goto done;
+ }
+
+ switch(setInfo->Oid) {
+ case OID_SWITCH_PORT_CREATE:
+ status = OvsCreatePort(switchObject, portParam);
+ break;
+ case OID_SWITCH_PORT_TEARDOWN:
+ OvsTeardownPort(switchObject, portParam);
+ break;
+ case OID_SWITCH_PORT_DELETE:
+ OvsDeletePort(switchObject, portParam);
+ break;
+ default:
+ break;
+ }
+
+done:
+ return status;
+}
+
+static NDIS_STATUS
+OvsProcessSetOidNic(POVS_SWITCH_CONTEXT switchObject,
+ PNDIS_OID_REQUEST oidRequest)
+{
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+ struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION);
+ PNDIS_SWITCH_NIC_PARAMETERS nicParam = setInfo->InformationBuffer;
+
+ if (OvsCheckOidHeader((PNDIS_OBJECT_HEADER)nicParam,
+ NDIS_SWITCH_NIC_PARAMETERS_REVISION_1)) {
+ status = NDIS_STATUS_NOT_SUPPORTED;
+ goto done;
+ }
+
+ switch(setInfo->Oid) {
+ case OID_SWITCH_NIC_CREATE:
+ status = OvsCreateNic(switchObject, nicParam);
+ break;
+ case OID_SWITCH_NIC_CONNECT:
+ OvsConnectNic(switchObject, nicParam);
+ break;
+ case OID_SWITCH_NIC_UPDATED:
+ OvsUpdateNic(switchObject, nicParam);
+ break;
+ case OID_SWITCH_NIC_DISCONNECT:
+ OvsDisconnectNic(switchObject, nicParam);
+ break;
+ case OID_SWITCH_NIC_DELETE:
+ OvsDeleteNic(switchObject, nicParam);
+ break;
+ default:
+ break;
+ }
+
+done:
+ return status;
+
+}
+
+static NDIS_STATUS
+OvsProcessSetOid(POVS_SWITCH_CONTEXT switchObject,
+ PNDIS_OID_REQUEST oidRequest,
+ PBOOLEAN complete)
+{
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+ struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION);
+
+ *complete = FALSE;
+
+ OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu",
+ oidRequest, setInfo->Oid);
+
+ /* Verify the basic Oid paramters first */
+ if (setInfo->InformationBufferLength &&
+ (setInfo->InformationBufferLength < sizeof(NDIS_OBJECT_HEADER))) {
+ status = NDIS_STATUS_INVALID_OID;
+ OVS_LOG_INFO("Invalid input %d", setInfo->InformationBufferLength);
+ goto error;
+ }
+
+ /* Documentation does not specify what should be done
+ * if informationBuffer is not present. Although it mentions the
+ * structure type informationBUffer points to for each oid request,
+ * but it does not explicitly mention that it is a MUST.
+ * hence we are following this scenario same way as what sample code
+ * mentions. */
+ if (!(setInfo->InformationBufferLength)) {
+ /* We cannot do anything about this oid request,
+ * lets just pass it down. */
+ OVS_LOG_INFO("Buffer Length Zero");
+ goto done;
+ }
+
+ switch(setInfo->Oid) {
+ case OID_SWITCH_PORT_PROPERTY_ADD:
+ case OID_SWITCH_PORT_PROPERTY_UPDATE:
+ case OID_SWITCH_PORT_PROPERTY_DELETE:
+ status = OvsProcessSetOidPortProp(switchObject, oidRequest);
+ break;
+
+ case OID_SWITCH_PORT_CREATE:
+ case OID_SWITCH_PORT_UPDATED:
+ case OID_SWITCH_PORT_TEARDOWN:
+ case OID_SWITCH_PORT_DELETE:
+ status = OvsProcessSetOidPort(switchObject, oidRequest);
+ break;
+
+ case OID_SWITCH_NIC_CREATE:
+ case OID_SWITCH_NIC_CONNECT:
+ case OID_SWITCH_NIC_UPDATED:
+ case OID_SWITCH_NIC_DISCONNECT:
+ case OID_SWITCH_NIC_DELETE:
+ status = OvsProcessSetOidNic(switchObject, oidRequest);
+ break;
+
+ default:
+ /* Non handled OID request */
+ break;
+ }
+
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto error;
+ }
+
+ goto done;
+
+error:
+ *complete = TRUE;
+done:
+ OVS_LOG_TRACE("Exit: status %8x.", status);
+ return status;
+}
+
+static NDIS_STATUS
+OvsProcessMethodOid(POVS_SWITCH_CONTEXT switchObject,
+ PNDIS_OID_REQUEST oidRequest,
+ PBOOLEAN complete,
+ PULONG bytesNeededParam)
+{
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+ struct _METHOD *methodInfo = &(oidRequest->DATA.METHOD_INFORMATION);
+ struct _SET *nicReqSetInfo = NULL;
+ PNDIS_OBJECT_HEADER header = NULL;
+ PNDIS_OID_REQUEST nicOidRequest = NULL;
+
+ UNREFERENCED_PARAMETER(switchObject);
+
+ OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu",
+ oidRequest, methodInfo->Oid);
+
+ *complete = FALSE;
+ *bytesNeededParam = 0;
+ header = methodInfo->InformationBuffer;
+
+ switch(methodInfo->Oid) {
+ /* We deal with only OID_SWITCH_NIC_REQUEST as of now */
+ case OID_SWITCH_NIC_REQUEST:
+ if (OvsCheckOidHeader(header,
+ NDIS_SWITCH_NIC_OID_REQUEST_REVISION_1)) {
+ OVS_LOG_INFO("Check Header failed");
+ status = NDIS_STATUS_NOT_SUPPORTED;
+ *complete = TRUE;
+ goto done;
+ }
+
+ nicOidRequest = (((PNDIS_SWITCH_NIC_OID_REQUEST)header)->OidRequest);
+ nicReqSetInfo = &(nicOidRequest->DATA.SET_INFORMATION);
+
+ /* Fail the SR-IOV VF case */
+ if ((nicOidRequest->RequestType == NdisRequestSetInformation) &&
+ (nicReqSetInfo->Oid == OID_NIC_SWITCH_ALLOCATE_VF)) {
+ OVS_LOG_INFO("We do not support Oid: "
+ "OID_NIC_SWITCH_ALLOCATE_VF");
+ status = NDIS_STATUS_FAILURE;
+ *complete = TRUE;
+ }
+ break;
+ default:
+ /* No op */
+ break;
+ }
+
+done:
+ OVS_LOG_TRACE("Exit: status %8x.", status);
+ return status;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * Implements filter driver's FilterOidRequest function.
+ * --------------------------------------------------------------------------
+ */
+
+NDIS_STATUS
+OvsExtOidRequest(NDIS_HANDLE filterModuleContext,
+ PNDIS_OID_REQUEST oidRequest)
+{
+ POVS_SWITCH_CONTEXT switchObject = (POVS_SWITCH_CONTEXT)filterModuleContext;
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+ PNDIS_OID_REQUEST clonedOidRequest = NULL;
+ struct _METHOD *methodInfo = &(oidRequest->DATA.METHOD_INFORMATION);
+ BOOLEAN completeOid = FALSE;
+ ULONG bytesNeeded = 0;
+
+ OVS_LOG_TRACE("Enter: oidRequest %p, reqType: %d",
+ oidRequest, oidRequest->RequestType);
+ status = NdisAllocateCloneOidRequest(switchObject->NdisFilterHandle,
+ oidRequest, OVS_MEMORY_TAG,
+ &clonedOidRequest);
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto done;
+ }
+
+ NdisInterlockedIncrement(&(switchObject->pendingOidCount));
+
+ /* set the original oid request in cloned one. */
+ OvsOidSetOrigRequest(clonedOidRequest, oidRequest);
+ OvsOidSetContext(clonedOidRequest, NULL);
+
+ switch(clonedOidRequest->RequestType) {
+ case NdisRequestSetInformation:
+ status = OvsProcessSetOid(switchObject, clonedOidRequest,
+ &completeOid);
+ break;
+ case NdisRequestMethod:
+ status = OvsProcessMethodOid(switchObject, clonedOidRequest,
+ &completeOid, &bytesNeeded);
+ break;
+ default:
+ /* We do not handle other request types as of now.
+ * We are just a passthrough for those. */
+ break;
+ }
+
+ if (completeOid == TRUE) {
+ /* dont leave any reference back to original request,
+ * even if we are freeing it up. */
+ OVS_LOG_INFO("Complete True oidRequest %p.", oidRequest);
+ OvsOidSetOrigRequest(clonedOidRequest, NULL);
+ NdisFreeCloneOidRequest(switchObject->NdisFilterHandle,
+ clonedOidRequest);
+ methodInfo->BytesNeeded = bytesNeeded;
+ NdisInterlockedDecrement(&switchObject->pendingOidCount);
+ goto done;
+ }
+
+ /* pass the request down */
+ status = NdisFOidRequest(switchObject->NdisFilterHandle, clonedOidRequest);
+ if (status != NDIS_STATUS_PENDING) {
+ OvsExtOidRequestComplete(switchObject, clonedOidRequest, status);
+ /* sample code says so */
+ status = NDIS_STATUS_PENDING;
+ }
+
+done:
+ OVS_LOG_TRACE("Exit: status %8x.", status);
+ return status;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * Implements filter driver's FilterOidRequestComplete function.
+ * --------------------------------------------------------------------------
+ */
+VOID
+OvsExtOidRequestComplete(NDIS_HANDLE filterModuleContext,
+ PNDIS_OID_REQUEST oidRequest,
+ NDIS_STATUS status)
+{
+ POVS_SWITCH_CONTEXT switchObject = (POVS_SWITCH_CONTEXT)filterModuleContext;
+ PNDIS_OID_REQUEST origReq = OvsOidGetOrigRequest(oidRequest);
+ POVS_OID_CONTEXT oidContext = OvsOidGetContext(oidRequest);
+
+ /* Only one of the two should be set */
+ ASSERT(origReq != NULL || oidContext != NULL);
+ ASSERT(oidContext != NULL || origReq != NULL);
+
+ OVS_LOG_TRACE("Enter: oidRequest %p, reqType: %d",
+ oidRequest, oidRequest->RequestType);
+
+ if (origReq == NULL) {
+ NdisInterlockedDecrement(&(switchObject->pendingOidCount));
+ oidContext->status = status;
+ NdisSetEvent(&oidContext->oidComplete);
+ OVS_LOG_INFO("Internally generated request");
+ goto done;
+ }
+
+ switch(oidRequest->RequestType) {
+ case NdisRequestMethod:
+ OvsOidRequestCompleteMethod(switchObject, oidRequest,
+ origReq, status);
+ break;
+
+ case NdisRequestSetInformation:
+ OvsOidRequestCompleteSetInfo(switchObject, oidRequest,
+ origReq, status);
+ break;
+
+ case NdisRequestQueryInformation:
+ case NdisRequestQueryStatistics:
+ default:
+ OvsOidRequestCompleteQuery(switchObject, oidRequest,
+ origReq, status);
+ break;
+ }
+
+ OvsOidSetOrigRequest(oidRequest, NULL);
+
+ NdisFreeCloneOidRequest(switchObject->NdisFilterHandle, oidRequest);
+ NdisFOidRequestComplete(switchObject->NdisFilterHandle, origReq, status);
+ NdisInterlockedDecrement(&(switchObject->pendingOidCount));
+
+done:
+ OVS_LOG_TRACE("Exit");
+}
+
+static VOID
+OvsOidRequestCompleteMethod(POVS_SWITCH_CONTEXT switchObject,
+ PNDIS_OID_REQUEST oidRequest,
+ PNDIS_OID_REQUEST origOidRequest,
+ NDIS_STATUS status)
+{
+ UNREFERENCED_PARAMETER(status);
+ UNREFERENCED_PARAMETER(switchObject);
+
+ struct _METHOD *methodInfo = &(oidRequest->DATA.METHOD_INFORMATION);
+ struct _METHOD *origMethodInfo = &(origOidRequest->DATA.
+ METHOD_INFORMATION);
+
+ OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu",
+ oidRequest, methodInfo->Oid);
+
+ origMethodInfo->OutputBufferLength = methodInfo->OutputBufferLength;
+ origMethodInfo->BytesRead = methodInfo->BytesRead;
+ origMethodInfo->BytesNeeded = methodInfo->BytesNeeded;
+ origMethodInfo->BytesWritten = methodInfo->BytesWritten;
+
+ OVS_LOG_TRACE("Exit");
+}
+
+static VOID
+OvsOidRequestCompleteSetInfo(POVS_SWITCH_CONTEXT switchObject,
+ PNDIS_OID_REQUEST oidRequest,
+ PNDIS_OID_REQUEST origOidRequest,
+ NDIS_STATUS status)
+{
+ struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION);
+ struct _SET *origSetInfo = &(origOidRequest->DATA.SET_INFORMATION);
+ PNDIS_OBJECT_HEADER origHeader = origSetInfo->InformationBuffer;
+
+ OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu",
+ oidRequest, setInfo->Oid);
+
+ origSetInfo->BytesRead = setInfo->BytesRead;
+ origSetInfo->BytesNeeded = setInfo->BytesNeeded;
+
+ if (status != NDIS_STATUS_SUCCESS) {
+
+ switch(setInfo->Oid) {
+ case OID_SWITCH_PORT_CREATE:
+ OvsDeletePort(switchObject,
+ (PNDIS_SWITCH_PORT_PARAMETERS)origHeader);
+ break;
+
+ case OID_SWITCH_NIC_CREATE:
+ OvsDeleteNic(switchObject,
+ (PNDIS_SWITCH_NIC_PARAMETERS)origHeader);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ OVS_LOG_TRACE("Exit");
+}
+
+static VOID
+OvsOidRequestCompleteQuery(POVS_SWITCH_CONTEXT switchObject,
+ PNDIS_OID_REQUEST oidRequest,
+ PNDIS_OID_REQUEST origOidRequest,
+ NDIS_STATUS status)
+{
+ UNREFERENCED_PARAMETER(switchObject);
+ UNREFERENCED_PARAMETER(status);
+
+ struct _QUERY *queryInfo = &((oidRequest->DATA).QUERY_INFORMATION);
+ struct _QUERY *origQueryInfo = &((origOidRequest->DATA).QUERY_INFORMATION);
+
+ OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu",
+ oidRequest, queryInfo->Oid);
+
+ origQueryInfo->BytesWritten = queryInfo->BytesWritten;
+ origQueryInfo->BytesNeeded = queryInfo->BytesNeeded;
+
+ OVS_LOG_TRACE("Exit");
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * Implements filter driver's FilterCancelOidRequest function.
+ * --------------------------------------------------------------------------
+ */
+VOID
+OvsExtCancelOidRequest(NDIS_HANDLE filterModuleContext,
+ PVOID requestId)
+{
+ OVS_LOG_TRACE("Enter: requestId: %p", requestId);
+
+ UNREFERENCED_PARAMETER(filterModuleContext);
+ UNREFERENCED_PARAMETER(requestId);
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Utility function to issue the specified OID to the NDIS stack. The OID is
+ * directed towards the miniport edge of the extensible switch.
+ * An OID that gets issued may not complete immediately, and in such cases, the
+ * function waits for the OID to complete. Thus, this function must not be
+ * called at the PASSIVE_LEVEL.
+ * --------------------------------------------------------------------------
+ */
+static NDIS_STATUS
+OvsIssueOidRequest(POVS_SWITCH_CONTEXT switchContext,
+ NDIS_REQUEST_TYPE oidType,
+ UINT32 oidRequestEnum,
+ PVOID oidInputBuffer,
+ UINT32 inputSize,
+ PVOID oidOutputBuffer,
+ UINT32 outputSize,
+ UINT32 *outputSizeNeeded)
+{
+ NDIS_STATUS status;
+ PNDIS_OID_REQUEST oidRequest;
+ POVS_OID_CONTEXT oidContext;
+ ULONG OvsExtOidRequestId = 'ISVO';
+
+ DBG_UNREFERENCED_PARAMETER(inputSize);
+ DBG_UNREFERENCED_PARAMETER(oidInputBuffer);
+
+ OVS_LOG_TRACE("Enter: switchContext: %p, oidType: %d",
+ switchContext, oidType);
+
+ ASSERT(oidInputBuffer == NULL || inputSize != 0);
+ ASSERT(oidOutputBuffer == NULL || outputSize != 0);
+ ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL);
+
+ oidRequest = OvsAllocateMemory(sizeof *oidRequest);
+ if (!oidRequest) {
+ status = NDIS_STATUS_RESOURCES;
+ goto done;
+ }
+
+ oidContext = OvsAllocateMemory(sizeof *oidContext);
+ if (!oidContext) {
+ OvsFreeMemory(oidRequest);
+ status = NDIS_STATUS_RESOURCES;
+ goto done;
+ }
+
+ RtlZeroMemory(oidRequest, sizeof *oidRequest);
+ RtlZeroMemory(oidContext, sizeof *oidContext);
+
+ oidRequest->Header.Type = NDIS_OBJECT_TYPE_OID_REQUEST;
+ oidRequest->Header.Revision = NDIS_OID_REQUEST_REVISION_1;
+ oidRequest->Header.Size = NDIS_SIZEOF_OID_REQUEST_REVISION_1;
+
+ oidRequest->RequestType = oidType;
+ oidRequest->PortNumber = 0;
+ oidRequest->Timeout = 0;
+ oidRequest->RequestId = (PVOID)OvsExtOidRequestId;
+
+ switch(oidType) {
+ case NdisRequestQueryInformation:
+ oidRequest->DATA.QUERY_INFORMATION.Oid = oidRequestEnum;
+ oidRequest->DATA.QUERY_INFORMATION.InformationBuffer = oidOutputBuffer;
+ oidRequest->DATA.QUERY_INFORMATION.InformationBufferLength = outputSize;
+ break;
+ default:
+ ASSERT(FALSE);
+ status = NDIS_STATUS_INVALID_PARAMETER;
+ break;
+ }
+
+ /*
+ * We make use of the SourceReserved field in the OID request to store
+ * pointers to the original OID (if any), and also context for completion
+ * (if any).
+ */
+ oidContext->status = NDIS_STATUS_SUCCESS;
+ NdisInitializeEvent(&oidContext->oidComplete);
+
+ OvsOidSetOrigRequest(oidRequest, NULL);
+ OvsOidSetContext(oidRequest, oidContext);
+
+ NdisInterlockedIncrement(&(switchContext->pendingOidCount));
+ status = NdisFOidRequest(switchContext->NdisFilterHandle, oidRequest);
+ if (status == NDIS_STATUS_PENDING) {
+ NdisWaitEvent(&oidContext->oidComplete, 0);
+ } else {
+ NdisInterlockedDecrement(&(switchContext->pendingOidCount));
+ }
+
+ if (status == NDIS_STATUS_INVALID_LENGTH ||
+ oidContext->status == NDIS_STATUS_INVALID_LENGTH) {
+ switch(oidType) {
+ case NdisRequestQueryInformation:
+ *outputSizeNeeded = oidRequest->DATA.QUERY_INFORMATION.BytesNeeded;
+ }
+ }
+
+ status = oidContext->status;
+ ASSERT(status != NDIS_STATUS_PENDING);
+
+ OvsFreeMemory(oidRequest);
+ OvsFreeMemory(oidContext);
+
+done:
+ OVS_LOG_TRACE("Exit: status %8x.", status);
+ return status;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Utility function to query if the extensible switch has completed activation
+ * successfully.
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsQuerySwitchActivationComplete(POVS_SWITCH_CONTEXT switchContext,
+ BOOLEAN *switchActive)
+{
+ NDIS_STATUS status;
+ PNDIS_SWITCH_PARAMETERS switchParams;
+ UINT32 outputSizeNeeded;
+
+ OVS_LOG_TRACE("Enter: switchContext: %p, switchActive: %p",
+ switchContext, switchActive);
+
+ switchParams = OvsAllocateMemory(sizeof *switchParams);
+ if (!switchParams) {
+ status = NDIS_STATUS_RESOURCES;
+ goto done;
+ }
+
+ /*
+ * Even though 'switchParms' is supposed to be populated by the OID, it
+ * needs to be initialized nevertheless. Otherwise, OID returns
+ * NDIS_STATUS_INVALID_PARAMETER. This is not clear in the documentation.
+ */
+ RtlZeroMemory(switchParams, sizeof *switchParams);
+ switchParams->Header.Revision = NDIS_SWITCH_PARAMETERS_REVISION_1;
+ switchParams->Header.Type = NDIS_OBJECT_TYPE_DEFAULT;
+ switchParams->Header.Size = NDIS_SIZEOF_NDIS_SWITCH_PARAMETERS_REVISION_1;
+
+ status = OvsIssueOidRequest(switchContext, NdisRequestQueryInformation,
+ OID_SWITCH_PARAMETERS, NULL, 0,
+ (PVOID)switchParams, sizeof *switchParams,
+ &outputSizeNeeded);
+
+ ASSERT(status != NDIS_STATUS_INVALID_LENGTH);
+ ASSERT(status != NDIS_STATUS_PENDING);
+ if (status == NDIS_STATUS_SUCCESS) {
+ ASSERT(switchParams->Header.Type == NDIS_OBJECT_TYPE_DEFAULT);
+ ASSERT(switchParams->Header.Revision == NDIS_SWITCH_PARAMETERS_REVISION_1);
+ ASSERT(switchParams->Header.Size ==
+ NDIS_SIZEOF_NDIS_SWITCH_PARAMETERS_REVISION_1);
+ *switchActive = switchParams->IsActive;
+ }
+
+ OvsFreeMemory(switchParams);
+
+done:
+ OVS_LOG_TRACE("Exit: status %8x, switchActive: %d.",
+ status, *switchActive);
+ return status;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Utility function to get the array of ports on the extensible switch. Upon
+ * success, the caller needs to free the returned array.
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsGetPortsOnSwitch(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_PORT_ARRAY *portArrayOut)
+{
+ PNDIS_SWITCH_PORT_ARRAY portArray;
+ UINT32 arraySize = sizeof *portArray;
+ NDIS_STATUS status = NDIS_STATUS_FAILURE;
+
+ OVS_LOG_TRACE("Enter: switchContext: %p, portArray: %p",
+ switchContext, portArrayOut);
+ do {
+ UINT32 reqdArraySize;
+
+ portArray = OvsAllocateMemory(arraySize);
+ if (!portArray) {
+ status = NDIS_STATUS_RESOURCES;
+ goto done;
+ }
+
+ /*
+ * Even though 'portArray' is supposed to be populated by the OID, it
+ * needs to be initialized nevertheless. Otherwise, OID returns
+ * NDIS_STATUS_INVALID_PARAMETER. This is not clear in the documentation.
+ */
+ RtlZeroMemory(portArray, sizeof *portArray);
+ portArray->Header.Revision = NDIS_SWITCH_PORT_ARRAY_REVISION_1;
+ portArray->Header.Type = NDIS_OBJECT_TYPE_DEFAULT;
+ portArray->Header.Size = NDIS_SIZEOF_NDIS_SWITCH_PORT_ARRAY_REVISION_1;
+
+ status = OvsIssueOidRequest(switchContext, NdisRequestQueryInformation,
+ OID_SWITCH_PORT_ARRAY, NULL, 0,
+ (PVOID)portArray, arraySize,
+ &reqdArraySize);
+ if (status == NDIS_STATUS_SUCCESS) {
+ *portArrayOut = portArray;
+ break;
+ }
+
+ OvsFreeMemory(portArray);
+ arraySize = reqdArraySize;
+ if (status != NDIS_STATUS_INVALID_LENGTH) {
+ break;
+ }
+ } while(status == NDIS_STATUS_INVALID_LENGTH);
+
+done:
+ OVS_LOG_TRACE("Exit: status %8x.", status);
+ return status;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Utility function to get the array of nics on the extensible switch. Upon
+ * success, the caller needs to free the returned array.
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsGetNicsOnSwitch(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_NIC_ARRAY *nicArrayOut)
+{
+ PNDIS_SWITCH_NIC_ARRAY nicArray;
+ UINT32 arraySize = sizeof *nicArray;
+ NDIS_STATUS status = NDIS_STATUS_FAILURE;
+
+ OVS_LOG_TRACE("Enter: switchContext: %p, nicArray: %p",
+ switchContext, nicArrayOut);
+
+ do {
+ UINT32 reqdArraySize;
+
+ nicArray = OvsAllocateMemory(arraySize);
+ if (!nicArray) {
+ status = NDIS_STATUS_RESOURCES;
+ goto done;
+ }
+
+ /*
+ * Even though 'nicArray' is supposed to be populated by the OID, it
+ * needs to be initialized nevertheless. Otherwise, OID returns
+ * NDIS_STATUS_INVALID_PARAMETER. This is not clear in the documentation.
+ */
+ RtlZeroMemory(nicArray, sizeof *nicArray);
+ nicArray->Header.Revision = NDIS_SWITCH_NIC_ARRAY_REVISION_1;
+ nicArray->Header.Type = NDIS_OBJECT_TYPE_DEFAULT;
+ nicArray->Header.Size = NDIS_SIZEOF_NDIS_SWITCH_NIC_ARRAY_REVISION_1;
+
+ status = OvsIssueOidRequest(switchContext, NdisRequestQueryInformation,
+ OID_SWITCH_NIC_ARRAY, NULL, 0,
+ (PVOID)nicArray, arraySize,
+ &reqdArraySize);
+ if (status == NDIS_STATUS_SUCCESS) {
+ *nicArrayOut = nicArray;
+ break;
+ }
+
+ OvsFreeMemory(nicArray);
+ arraySize = reqdArraySize;
+ if (status != NDIS_STATUS_INVALID_LENGTH) {
+ break;
+ }
+ } while(status == NDIS_STATUS_INVALID_LENGTH);
+
+done:
+ OVS_LOG_TRACE("Exit: status %8x.", status);
+ return status;
+}
diff --git a/datapath-windows/ovsext/OvsOid.h b/datapath-windows/ovsext/OvsOid.h
new file mode 100644
index 000000000..40a5ec69a
--- /dev/null
+++ b/datapath-windows/ovsext/OvsOid.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_OID_H_
+#define __OVS_OID_H_ 1
+
+NDIS_STATUS OvsQuerySwitchActivationComplete(POVS_SWITCH_CONTEXT switchContext,
+ BOOLEAN *switchActive);
+NDIS_STATUS OvsGetPortsOnSwitch(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_PORT_ARRAY *portArrayOut);
+NDIS_STATUS OvsGetNicsOnSwitch(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_NIC_ARRAY *nicArrayOut);
+#endif /* __OVS_OID_H_ */
diff --git a/datapath-windows/ovsext/OvsPacketIO.c b/datapath-windows/ovsext/OvsPacketIO.c
new file mode 100644
index 000000000..39e57036e
--- /dev/null
+++ b/datapath-windows/ovsext/OvsPacketIO.c
@@ -0,0 +1,441 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file contains the implementation of the datapath/forwarding
+ * functionality of the OVS.
+ */
+
+#include "precomp.h"
+#include "OvsIoctl.h"
+#include "OvsSwitch.h"
+#include "OvsVport.h"
+#include "OvsNetProto.h"
+#include "OvsUser.h"
+#include "OvsPacketIO.h"
+#include "OvsFlow.h"
+#include "OvsEvent.h"
+#include "OvsUser.h"
+
+/* Due to an imported header file */
+#pragma warning( disable:4505 )
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_DISPATCH
+#include "OvsDebug.h"
+
+extern NDIS_STRING ovsExtGuidUC;
+extern NDIS_STRING ovsExtFriendlyNameUC;
+
+static VOID OvsFinalizeCompletionList(OvsCompletionList *completionList);
+static VOID OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext,
+ PNET_BUFFER_LIST netBufferLists, ULONG sendCompleteFlags);
+
+__inline VOID
+OvsInitCompletionList(OvsCompletionList *completionList,
+ POVS_SWITCH_CONTEXT switchContext,
+ ULONG sendCompleteFlags)
+{
+ ASSERT(completionList);
+ completionList->dropNbl = NULL;
+ completionList->dropNblNext = &completionList->dropNbl;
+ completionList->switchContext = switchContext;
+ completionList->sendCompleteFlags = sendCompleteFlags;
+}
+
+/* Utility function used to complete an NBL. */
+__inline VOID
+OvsAddPktCompletionList(OvsCompletionList *completionList,
+ BOOLEAN incoming,
+ NDIS_SWITCH_PORT_ID sourcePort,
+ PNET_BUFFER_LIST netBufferList,
+ UINT32 netBufferListCount,
+ PNDIS_STRING filterReason)
+{
+ POVS_BUFFER_CONTEXT ctx;
+
+ /* XXX: We handle one NBL at a time. */
+ ASSERT(netBufferList->Next == NULL);
+
+ /* Make sure it has a context. */
+ ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(netBufferList);
+ ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
+
+ completionList->switchContext->NdisSwitchHandlers.ReportFilteredNetBufferLists(
+ completionList->switchContext->NdisSwitchContext, &ovsExtGuidUC,
+ &ovsExtFriendlyNameUC, sourcePort,
+ incoming ? NDIS_SWITCH_REPORT_FILTERED_NBL_FLAGS_IS_INCOMING : 0,
+ netBufferListCount, netBufferList, filterReason);
+
+ *completionList->dropNblNext = netBufferList;
+ completionList->dropNblNext = &netBufferList->Next;
+ ASSERT(completionList->dropNbl);
+}
+
+static __inline VOID
+OvsReportNBLIngressError(POVS_SWITCH_CONTEXT switchContext,
+ PNET_BUFFER_LIST nblList,
+ PNDIS_STRING filterReason,
+ NDIS_STATUS error)
+{
+ PNET_BUFFER_LIST nbl = nblList;
+ while (nbl) {
+ PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
+ fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl);
+
+ nbl->Status = error;
+
+ /* This can be optimized by batching NBL's from the same
+ * SourcePortId. */
+ switchContext->NdisSwitchHandlers.ReportFilteredNetBufferLists(
+ switchContext->NdisSwitchContext, &ovsExtGuidUC,
+ &ovsExtFriendlyNameUC, fwdDetail->SourcePortId,
+ NDIS_SWITCH_REPORT_FILTERED_NBL_FLAGS_IS_INCOMING,
+ 1 /*Nbl count.*/, nbl, filterReason);
+
+ nbl = NET_BUFFER_LIST_NEXT_NBL(nbl);
+ }
+}
+
+static __inline ULONG
+OvsGetSendCompleteFlags(ULONG sendFlags)
+{
+ BOOLEAN dispatch, sameSource;
+ ULONG sendCompleteFlags;
+
+ dispatch = NDIS_TEST_SEND_AT_DISPATCH_LEVEL(sendFlags);
+ sendCompleteFlags = (dispatch ?
+ NDIS_SEND_COMPLETE_FLAGS_DISPATCH_LEVEL : 0);
+ sameSource = NDIS_TEST_SEND_FLAG(sendFlags,
+ NDIS_SEND_FLAGS_SWITCH_SINGLE_SOURCE);
+ sendCompleteFlags |= (sameSource ?
+ NDIS_SEND_COMPLETE_FLAGS_SWITCH_SINGLE_SOURCE : 0);
+
+ return sendCompleteFlags;
+}
+
+VOID
+OvsSendNBLIngress(POVS_SWITCH_CONTEXT switchContext,
+ PNET_BUFFER_LIST netBufferLists,
+ ULONG sendFlags)
+{
+ if (switchContext->dataFlowState == OvsSwitchPaused) {
+ /* If a filter module is in the Paused state, the filter driver must not
+ * originate any send requests for that filter module. If NDIS calls
+ * FilterSendNetBufferLists, the driver must not call
+ * NdisFSendNetBufferLists to pass on the data until the driver is
+ * restarted. The driver should call NdisFSendNetBufferListsComplete
+ * immediately to complete the send operation. It should set the
+ * complete status in each NET_BUFFER_LIST structure to
+ * NDIS_STATUS_PAUSED.
+ *
+ * http://msdn.microsoft.com/en-us/library/windows/hardware/
+ * ff549966(v=vs.85).aspx */
+ NDIS_STRING filterReason;
+ ULONG sendCompleteFlags = OvsGetSendCompleteFlags(sendFlags);
+
+ RtlInitUnicodeString(&filterReason,
+ L"Switch state PAUSED, drop before FSendNBL.");
+ OvsReportNBLIngressError(switchContext, netBufferLists, &filterReason,
+ NDIS_STATUS_PAUSED);
+ OvsCompleteNBLIngress(switchContext, netBufferLists,
+ sendCompleteFlags);
+ return;
+ }
+
+ ASSERT(switchContext->dataFlowState == OvsSwitchRunning);
+
+ NdisFSendNetBufferLists(switchContext->NdisFilterHandle, netBufferLists,
+ NDIS_DEFAULT_PORT_NUMBER, sendFlags);
+}
+
+static __inline VOID
+OvsStartNBLIngressError(POVS_SWITCH_CONTEXT switchContext,
+ PNET_BUFFER_LIST nblList,
+ ULONG sendCompleteFlags,
+ PNDIS_STRING filterReason,
+ NDIS_STATUS error)
+{
+ ASSERT(error);
+ OvsReportNBLIngressError(switchContext, nblList, filterReason, error);
+ NdisFSendNetBufferListsComplete(switchContext->NdisFilterHandle, nblList,
+ sendCompleteFlags);
+}
+
+static VOID
+OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext,
+ PNET_BUFFER_LIST netBufferLists,
+ ULONG SendFlags)
+{
+ NDIS_SWITCH_PORT_ID sourcePort = 0;
+ NDIS_SWITCH_NIC_INDEX sourceIndex = 0;
+ PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
+ PNET_BUFFER_LIST curNbl = NULL, nextNbl = NULL;
+ ULONG sendCompleteFlags;
+ UCHAR dispatch;
+ LOCK_STATE_EX lockState, dpLockState;
+ NDIS_STATUS status;
+ NDIS_STRING filterReason;
+ LIST_ENTRY missedPackets;
+ UINT32 num = 0;
+ OvsCompletionList completionList;
+
+ dispatch = NDIS_TEST_SEND_AT_DISPATCH_LEVEL(SendFlags)?
+ NDIS_RWL_AT_DISPATCH_LEVEL : 0;
+ sendCompleteFlags = OvsGetSendCompleteFlags(SendFlags);
+ SendFlags |= NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP;
+
+ InitializeListHead(&missedPackets);
+ OvsInitCompletionList(&completionList, switchContext, sendCompleteFlags);
+
+ for (curNbl = netBufferLists; curNbl != NULL; curNbl = nextNbl) {
+ POVS_VPORT_ENTRY vport;
+ UINT32 portNo;
+ OVS_DATAPATH *datapath = &switchContext->datapath;
+ OVS_PACKET_HDR_INFO layers;
+ OvsFlowKey key;
+ UINT64 hash;
+ PNET_BUFFER curNb;
+
+ nextNbl = curNbl->Next;
+ curNbl->Next = NULL;
+
+ /* Ethernet Header is a guaranteed safe access. */
+ curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+ if (curNb->Next != NULL) {
+ /* XXX: This case is not handled yet. */
+ ASSERT(FALSE);
+ } else {
+ POVS_BUFFER_CONTEXT ctx;
+ OvsFlow *flow;
+
+ fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl);
+ sourcePort = fwdDetail->SourcePortId;
+ sourceIndex = (NDIS_SWITCH_NIC_INDEX)fwdDetail->SourceNicIndex;
+
+ /* Take the DispatchLock so none of the VPORTs disconnect while
+ * we are setting destination ports.
+ *
+ * XXX: acquire/release the dispatch lock for a "batch" of packets
+ * rather than for each packet. */
+ NdisAcquireRWLockRead(switchContext->dispatchLock, &lockState,
+ dispatch);
+
+ ctx = OvsInitExternalNBLContext(switchContext, curNbl,
+ sourcePort == switchContext->externalPortId);
+ if (ctx == NULL) {
+ RtlInitUnicodeString(&filterReason,
+ L"Cannot allocate external NBL context.");
+
+ OvsStartNBLIngressError(switchContext, curNbl,
+ sendCompleteFlags, &filterReason,
+ NDIS_STATUS_RESOURCES);
+ NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
+ continue;
+ }
+
+ vport = OvsFindVportByPortIdAndNicIndex(switchContext, sourcePort,
+ sourceIndex);
+ if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
+ RtlInitUnicodeString(&filterReason,
+ L"OVS-Cannot forward packet from unknown source port");
+ goto dropit;
+ } else {
+ portNo = vport->portNo;
+ }
+
+ vport->stats.rxPackets++;
+ vport->stats.rxBytes += NET_BUFFER_DATA_LENGTH(curNb);
+
+ status = OvsExtractFlow(curNbl, vport->portNo, &key, &layers, NULL);
+ if (status != NDIS_STATUS_SUCCESS) {
+ RtlInitUnicodeString(&filterReason, L"OVS-Flow extract failed");
+ goto dropit;
+ }
+
+ ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
+ OvsAcquireDatapathRead(datapath, &dpLockState, dispatch);
+
+ flow = OvsLookupFlow(datapath, &key, &hash, FALSE);
+ if (flow) {
+ OvsFlowUsed(flow, curNbl, &layers);
+ datapath->hits++;
+ /* If successful, OvsActionsExecute() consumes the NBL.
+ * Otherwise, it adds it to the completionList. No need to
+ * check the return value. */
+ OvsActionsExecute(switchContext, &completionList, curNbl,
+ portNo, SendFlags, &key, &hash, &layers,
+ flow->actions, flow->actionsLen);
+ OvsReleaseDatapath(datapath, &dpLockState);
+ NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
+ continue;
+ } else {
+ OvsReleaseDatapath(datapath, &dpLockState);
+
+ datapath->misses++;
+ status = OvsCreateAndAddPackets(OVS_DEFAULT_PACKET_QUEUE,
+ NULL, 0, OVS_PACKET_CMD_MISS,
+ portNo,
+ key.tunKey.dst != 0 ?
+ (OvsIPv4TunnelKey *)&key.tunKey :
+ NULL, curNbl,
+ sourcePort ==
+ switchContext->externalPortId,
+ &layers, switchContext,
+ &missedPackets, &num);
+ if (status == NDIS_STATUS_SUCCESS) {
+ /* Complete the packet since it was copied to user
+ * buffer. */
+ RtlInitUnicodeString(&filterReason,
+ L"OVS-Dropped since packet was copied to userspace");
+ } else {
+ RtlInitUnicodeString(&filterReason,
+ L"OVS-Dropped due to failure to queue to userspace");
+ }
+ goto dropit;
+ }
+
+dropit:
+ OvsAddPktCompletionList(&completionList, TRUE, sourcePort, curNbl, 0,
+ &filterReason);
+ NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
+ }
+ }
+
+ /* Queue the missed packets. */
+ OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, num);
+ OvsFinalizeCompletionList(&completionList);
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Implements filter driver's FilterSendNetBufferLists Function.
+ * --------------------------------------------------------------------------
+ */
+VOID
+OvsExtSendNBL(NDIS_HANDLE filterModuleContext,
+ PNET_BUFFER_LIST netBufferLists,
+ NDIS_PORT_NUMBER portNumber,
+ ULONG sendFlags)
+{
+ UNREFERENCED_PARAMETER(portNumber);
+
+ /* 'filterModuleContext' is the switch context that gets created in the
+ * AttachHandler. */
+ POVS_SWITCH_CONTEXT switchContext;
+ switchContext = (POVS_SWITCH_CONTEXT) filterModuleContext;
+
+ if (switchContext->dataFlowState == OvsSwitchPaused) {
+ NDIS_STRING filterReason;
+ ULONG sendCompleteFlags = OvsGetSendCompleteFlags(sendFlags);
+
+ RtlInitUnicodeString(&filterReason,
+ L"Switch state PAUSED, drop on ingress.");
+ OvsStartNBLIngressError(switchContext, netBufferLists,
+ sendCompleteFlags, &filterReason,
+ NDIS_STATUS_PAUSED);
+ return;
+ }
+
+ ASSERT(switchContext->dataFlowState == OvsSwitchRunning);
+
+ OvsStartNBLIngress(switchContext, netBufferLists, sendFlags);
+}
+
+static VOID
+OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext,
+ PNET_BUFFER_LIST netBufferLists,
+ ULONG sendCompleteFlags)
+{
+ PNET_BUFFER_LIST curNbl = NULL, nextNbl = NULL;
+ OvsCompletionList newList;
+
+ newList.dropNbl = NULL;
+ newList.dropNblNext = &newList.dropNbl;
+
+ for (curNbl = netBufferLists; curNbl != NULL; curNbl = nextNbl) {
+ nextNbl = curNbl->Next;
+ curNbl->Next = NULL;
+
+ curNbl = OvsCompleteNBL(switchContext, curNbl, TRUE);
+ if (curNbl != NULL) {
+ /* NBL originated from the upper layer. */
+ *newList.dropNblNext = curNbl;
+ newList.dropNblNext = &curNbl->Next;
+ }
+ }
+
+ /* Complete the NBL's that were sent by the upper layer. */
+ if (newList.dropNbl != NULL) {
+ NdisFSendNetBufferListsComplete(switchContext->NdisFilterHandle, newList.dropNbl,
+ sendCompleteFlags);
+ }
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Implements filter driver's FilterSendNetBufferListsComplete function.
+ * --------------------------------------------------------------------------
+ */
+VOID
+OvsExtSendNBLComplete(NDIS_HANDLE filterModuleContext,
+ PNET_BUFFER_LIST netBufferLists,
+ ULONG sendCompleteFlags)
+{
+ OvsCompleteNBLIngress((POVS_SWITCH_CONTEXT)filterModuleContext,
+ netBufferLists, sendCompleteFlags);
+}
+
+
+VOID
+OvsFinalizeCompletionList(OvsCompletionList *completionList)
+{
+ if (completionList->dropNbl != NULL) {
+ OvsCompleteNBLIngress(completionList->switchContext,
+ completionList->dropNbl,
+ completionList->sendCompleteFlags);
+
+ completionList->dropNbl = NULL;
+ completionList->dropNblNext = &completionList->dropNbl;
+ }
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * Implements filter driver's FilterCancelSendNetBufferLists function.
+ *
+ * "If a filter driver specifies a FilterSendNetBufferLists function and it
+ * queues send requests, it must also specify a
+ * FilterCancelSendNetBufferLists function."
+ *
+ * http://msdn.microsoft.com/en-us/library/windows/hardware/
+ * ff549966(v=vs.85).aspx
+ * --------------------------------------------------------------------------
+ */
+VOID
+OvsExtCancelSendNBL(NDIS_HANDLE filterModuleContext,
+ PVOID CancelId)
+{
+ UNREFERENCED_PARAMETER(filterModuleContext);
+ UNREFERENCED_PARAMETER(CancelId);
+
+ /* All send requests get completed synchronously, so there is no need to
+ * implement this callback. */
+}
diff --git a/datapath-windows/ovsext/OvsPacketIO.h b/datapath-windows/ovsext/OvsPacketIO.h
new file mode 100644
index 000000000..322a8aab7
--- /dev/null
+++ b/datapath-windows/ovsext/OvsPacketIO.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_PACKETIO_H_
+#define __OVS_PACKETIO_H_ 1
+
+typedef union _OVS_PACKET_HDR_INFO OVS_PACKET_HDR_INFO;
+
+/*
+ * Data structures and utility functions to help manage a list of packets to be
+ * completed (dropped).
+ */
+typedef struct OvsCompletionList {
+ PNET_BUFFER_LIST dropNbl;
+ PNET_BUFFER_LIST *dropNblNext;
+ POVS_SWITCH_CONTEXT switchContext;
+ ULONG sendCompleteFlags;
+} OvsCompletionList;
+
+VOID OvsInitCompletionList(OvsCompletionList *completionList,
+ POVS_SWITCH_CONTEXT switchContext,
+ ULONG sendCompleteFlags);
+VOID OvsAddPktCompletionList(OvsCompletionList *completionList,
+ BOOLEAN incoming,
+ NDIS_SWITCH_PORT_ID sourcePort,
+ PNET_BUFFER_LIST netBufferList,
+ UINT32 netBufferListCount,
+ PNDIS_STRING filterReason);
+
+
+/*
+ * Functions related to packet processing.
+ */
+VOID OvsSendNBLIngress(POVS_SWITCH_CONTEXT switchContext,
+ PNET_BUFFER_LIST netBufferLists,
+ ULONG sendFlags);
+
+NDIS_STATUS OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext,
+ OvsCompletionList *completionList,
+ PNET_BUFFER_LIST curNbl, UINT32 srcVportNo,
+ ULONG sendFlags, OvsFlowKey *key, UINT64 *hash,
+ OVS_PACKET_HDR_INFO *layers,
+ const struct nlattr *actions, int actionsLen);
+
+VOID OvsLookupFlowOutput(POVS_SWITCH_CONTEXT switchContext,
+ VOID *compList, PNET_BUFFER_LIST curNbl);
+
+#endif /* __OVS_PACKETIO_H_ */
diff --git a/datapath-windows/ovsext/OvsPacketParser.c b/datapath-windows/ovsext/OvsPacketParser.c
new file mode 100644
index 000000000..0a9343551
--- /dev/null
+++ b/datapath-windows/ovsext/OvsPacketParser.c
@@ -0,0 +1,303 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OvsPacketParser.h"
+
+//XXX consider moving to NdisGetDataBuffer.
+const VOID *
+OvsGetPacketBytes(const NET_BUFFER_LIST *nbl,
+ UINT32 len,
+ UINT32 srcOffset,
+ VOID *storage)
+{
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+ PNET_BUFFER netBuffer = NET_BUFFER_LIST_FIRST_NB(nbl);
+ PMDL currentMdl;
+ BOOLEAN firstMDL = TRUE;
+ ULONG destOffset = 0;
+ VOID *dest = storage;
+ const UINT32 copyLen = len;
+ ULONG packetLen;
+
+ packetLen = NET_BUFFER_DATA_LENGTH(netBuffer);
+ // Start copy from current MDL
+ currentMdl = NET_BUFFER_CURRENT_MDL(netBuffer);
+
+ // Data on current MDL may be offset from start of MDL
+ while (destOffset < copyLen && currentMdl) {
+ PUCHAR srcMemory = MmGetSystemAddressForMdlSafe(currentMdl,
+ LowPagePriority);
+ ULONG length = MmGetMdlByteCount(currentMdl);
+ if (!srcMemory) {
+ status = NDIS_STATUS_RESOURCES;
+ break;
+ }
+
+ if (firstMDL) {
+ ULONG mdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(netBuffer);
+ srcMemory += mdlOffset;
+ length -= mdlOffset;
+ firstMDL = FALSE;
+ }
+ length = MIN(length, packetLen);
+ packetLen -= length;
+ ASSERT((INT)packetLen >= 0);
+
+ if (srcOffset >= length) {
+ currentMdl = NDIS_MDL_LINKAGE(currentMdl);
+ srcOffset -= length;
+ continue;
+ } else {
+ srcMemory += srcOffset;
+ length -= srcOffset;
+ srcOffset = 0;
+ }
+
+ length = min(length, copyLen-destOffset);
+
+ NdisMoveMemory((PUCHAR)dest+destOffset, srcMemory, length);
+ destOffset += length;
+
+ currentMdl = NDIS_MDL_LINKAGE(currentMdl);
+ }
+
+ if (destOffset == copyLen) {
+ ASSERT(status == NDIS_STATUS_SUCCESS);
+ return storage;
+ }
+
+ return NULL;
+}
+
+NDIS_STATUS
+OvsParseIPv6(const NET_BUFFER_LIST *packet,
+ OvsFlowKey *key,
+ POVS_PACKET_HDR_INFO layers)
+{
+ UINT16 ofs = layers->l3Offset;
+ IPv6Hdr ipv6HdrStorage;
+ const IPv6Hdr *nh;
+ UINT32 nextHdr;
+ Ipv6Key *flow= &key->ipv6Key;
+
+ ofs = layers->l3Offset;
+ nh = OvsGetPacketBytes(packet, sizeof *nh, ofs, &ipv6HdrStorage);
+ if (!nh) {
+ return NDIS_STATUS_FAILURE;
+ }
+
+ nextHdr = nh->nexthdr;
+ memcpy(&flow->ipv6Src, nh->saddr.s6_addr, 16);
+ memcpy(&flow->ipv6Dst, nh->daddr.s6_addr, 16);
+
+ flow->nwTos = ((nh->flow_lbl[0] & 0xF0) >> 4) | (nh->priority << 4);
+ flow->ipv6Label =
+ ((nh->flow_lbl[0] & 0x0F) << 16) | (nh->flow_lbl[1] << 8) | nh->flow_lbl[2];
+ flow->nwTtl = nh->hop_limit;
+ flow->nwProto = SOCKET_IPPROTO_NONE;
+ flow->nwFrag = 0;
+
+ // Parse extended headers and compute L4 offset
+ ofs += sizeof(IPv6Hdr);
+ for (;;) {
+ if ((nextHdr != SOCKET_IPPROTO_HOPOPTS)
+ && (nextHdr != SOCKET_IPPROTO_ROUTING)
+ && (nextHdr != SOCKET_IPPROTO_DSTOPTS)
+ && (nextHdr != SOCKET_IPPROTO_AH)
+ && (nextHdr != SOCKET_IPPROTO_FRAGMENT)) {
+ /*
+ * It's either a terminal header (e.g., TCP, UDP) or one we
+ * don't understand. In either case, we're done with the
+ * packet, so use it to fill in 'nw_proto'.
+ */
+ break;
+ }
+
+ if (nextHdr == SOCKET_IPPROTO_HOPOPTS
+ || nextHdr == SOCKET_IPPROTO_ROUTING
+ || nextHdr == SOCKET_IPPROTO_DSTOPTS
+ || nextHdr == SOCKET_IPPROTO_AH) {
+ IPv6ExtHdr extHdrStorage;
+ const IPv6ExtHdr *extHdr;
+ UINT8 len;
+
+ extHdr = OvsGetPacketBytes(packet, sizeof *extHdr, ofs, &extHdrStorage);
+ if (!extHdr) {
+ return NDIS_STATUS_FAILURE;
+ }
+
+ len = extHdr->hdrExtLen;
+ ofs += nextHdr == SOCKET_IPPROTO_AH ? (len + 2) * 4 : (len + 1) * 8;
+ nextHdr = extHdr->nextHeader;
+ if (OvsPacketLenNBL(packet) < ofs) {
+ return NDIS_STATUS_FAILURE;
+ }
+ } else if (nextHdr == SOCKET_IPPROTO_FRAGMENT) {
+ IPv6FragHdr fragHdrStorage;
+ const IPv6FragHdr *fragHdr;
+
+ fragHdr = OvsGetPacketBytes(packet, sizeof *fragHdr, ofs,
+ &fragHdrStorage);
+ if (!fragHdr) {
+ return NDIS_STATUS_FAILURE;
+ }
+
+ nextHdr = fragHdr->nextHeader;
+ ofs += sizeof *fragHdr;
+
+ /* We only process the first fragment. */
+ if (fragHdr->offlg != htons(0)) {
+ if ((fragHdr->offlg & IP6F_OFF_HOST_ORDER_MASK) == htons(0)) {
+ flow->nwFrag = OVSWIN_NW_FRAG_ANY;
+ } else {
+ flow->nwFrag |= OVSWIN_NW_FRAG_LATER;
+ nextHdr = SOCKET_IPPROTO_FRAGMENT;
+ break;
+ }
+ }
+ }
+ }
+
+ flow->nwProto = (UINT8)nextHdr;
+ layers->l4Offset = ofs;
+ return NDIS_STATUS_SUCCESS;
+}
+
+VOID
+OvsParseTcp(const NET_BUFFER_LIST *packet,
+ L4Key *flow,
+ POVS_PACKET_HDR_INFO layers)
+{
+ TCPHdr tcpStorage;
+ const TCPHdr *tcp = OvsGetTcp(packet, layers->l4Offset, &tcpStorage);
+ if (tcp) {
+ flow->tpSrc = tcp->source;
+ flow->tpDst = tcp->dest;
+ layers->isTcp = 1;
+ layers->l7Offset = layers->l4Offset + 4 * tcp->doff;
+ }
+}
+
+VOID
+OvsParseUdp(const NET_BUFFER_LIST *packet,
+ L4Key *flow,
+ POVS_PACKET_HDR_INFO layers)
+{
+ UDPHdr udpStorage;
+ const UDPHdr *udp = OvsGetUdp(packet, layers->l4Offset, &udpStorage);
+ if (udp) {
+ flow->tpSrc = udp->source;
+ flow->tpDst = udp->dest;
+ layers->isUdp = 1;
+ if (udp->check == 0) {
+ layers->udpCsumZero = 1;
+ }
+ layers->l7Offset = layers->l4Offset + sizeof *udp;
+ }
+}
+
+NDIS_STATUS
+OvsParseIcmpV6(const NET_BUFFER_LIST *packet,
+ OvsFlowKey *key,
+ POVS_PACKET_HDR_INFO layers)
+{
+ UINT16 ofs = layers->l4Offset;
+ ICMPHdr icmpStorage;
+ const ICMPHdr *icmp;
+ Icmp6Key *flow = &key->icmp6Key;
+
+ memset(&flow->ndTarget, 0, sizeof(flow->ndTarget));
+ memset(flow->arpSha, 0, sizeof(flow->arpSha));
+ memset(flow->arpTha, 0, sizeof(flow->arpTha));
+
+ icmp = OvsGetIcmp(packet, ofs, &icmpStorage);
+ if (!icmp) {
+ return NDIS_STATUS_FAILURE;
+ }
+ ofs += sizeof *icmp;
+
+ /*
+ * The ICMPv6 type and code fields use the 16-bit transport port
+ * fields, so we need to store them in 16-bit network byte order.
+ */
+ key->ipv6Key.l4.tpSrc = htons(icmp->type);
+ key->ipv6Key.l4.tpDst = htons(icmp->code);
+
+ if (icmp->code == 0 &&
+ (icmp->type == ND_NEIGHBOR_SOLICIT ||
+ icmp->type == ND_NEIGHBOR_ADVERT)) {
+ struct in6_addr ndTargetStorage;
+ const struct in6_addr *ndTarget;
+
+ ndTarget = OvsGetPacketBytes(packet, sizeof *ndTarget, ofs,
+ &ndTargetStorage);
+ if (!ndTarget) {
+ return NDIS_STATUS_FAILURE;
+ }
+ flow->ndTarget = *ndTarget;
+
+ while ((UINT32)(ofs + 8) <= OvsPacketLenNBL(packet)) {
+ /*
+ * The minimum size of an option is 8 bytes, which also is
+ * the size of Ethernet link-layer options.
+ */
+ IPv6NdOptHdr ndOptStorage;
+ const IPv6NdOptHdr *ndOpt;
+ UINT16 optLen;
+
+ ndOpt = OvsGetPacketBytes(packet, sizeof *ndOpt, ofs, &ndOptStorage);
+ if (!ndOpt) {
+ return NDIS_STATUS_FAILURE;
+ }
+
+ optLen = ndOpt->len * 8;
+ if (!optLen || (UINT32)(ofs + optLen) > OvsPacketLenNBL(packet)) {
+ goto invalid;
+ }
+
+ /*
+ * Store the link layer address if the appropriate option is
+ * provided. It is considered an error if the same link
+ * layer option is specified twice.
+ */
+ if (ndOpt->type == ND_OPT_SOURCE_LINKADDR && optLen == 8) {
+ if (Eth_IsNullAddr(flow->arpSha)) {
+ memcpy(flow->arpSha, ndOpt + 1, ETH_ADDR_LENGTH);
+ } else {
+ goto invalid;
+ }
+ } else if (ndOpt->type == ND_OPT_TARGET_LINKADDR && optLen == 8) {
+ if (Eth_IsNullAddr(flow->arpTha)) {
+ memcpy(flow->arpTha, ndOpt + 1, ETH_ADDR_LENGTH);
+ } else {
+ goto invalid;
+ }
+ }
+
+ ofs += optLen;
+ }
+ }
+
+ layers->l7Offset = ofs;
+ return NDIS_STATUS_SUCCESS;
+
+invalid:
+ memset(&flow->ndTarget, 0, sizeof(flow->ndTarget));
+ memset(flow->arpSha, 0, sizeof(flow->arpSha));
+ memset(flow->arpTha, 0, sizeof(flow->arpTha));
+
+ return NDIS_STATUS_FAILURE;
+}
diff --git a/datapath-windows/ovsext/OvsPacketParser.h b/datapath-windows/ovsext/OvsPacketParser.h
new file mode 100644
index 000000000..ab3c613a7
--- /dev/null
+++ b/datapath-windows/ovsext/OvsPacketParser.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_PACKET_PARSER_H_
+#define __OVS_PACKET_PARSER_H_ 1
+
+#include "precomp.h"
+#include "OvsNetProto.h"
+
+const VOID* OvsGetPacketBytes(const NET_BUFFER_LIST *_pNB, UINT32 len,
+ UINT32 SrcOffset, VOID *storage);
+NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, OvsFlowKey *key,
+ POVS_PACKET_HDR_INFO layers);
+VOID OvsParseTcp(const NET_BUFFER_LIST *packet, L4Key *flow,
+ POVS_PACKET_HDR_INFO layers);
+VOID OvsParseUdp(const NET_BUFFER_LIST *packet, L4Key *flow,
+ POVS_PACKET_HDR_INFO layers);
+NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, OvsFlowKey *key,
+ POVS_PACKET_HDR_INFO layers);
+
+static __inline ULONG
+OvsPacketLenNBL(const NET_BUFFER_LIST *_pNB)
+{
+ INT length = 0;
+ NET_BUFFER *nb;
+
+ nb = NET_BUFFER_LIST_FIRST_NB(_pNB);
+ ASSERT(nb);
+ while(nb) {
+ length += NET_BUFFER_DATA_LENGTH(nb);
+ nb = NET_BUFFER_NEXT_NB(nb);
+ }
+
+ return length;
+}
+
+/*
+ * Returns the ctl field from the TCP header in 'packet', or 0 if the field
+ * can't be read. The caller must have ensured that 'packet' contains a TCP
+ * header.
+ *
+ * We can't just use TCPHdr, from netProto.h, for this because that
+ * breaks the flags down into individual bit-fields. We can't even use
+ * offsetof because that will try to take the address of a bit-field,
+ * which C does not allow.
+ */
+static UINT16
+OvsGetTcpCtl(const NET_BUFFER_LIST *packet, // IN
+ const POVS_PACKET_HDR_INFO layers) // IN
+{
+#define TCP_CTL_OFS 12 // Offset of "ctl" field in TCP header.
+#define TCP_FLAGS(CTL) ((CTL) & 0x3f) // Obtain TCP flags from CTL.
+
+ const UINT16 *ctl;
+ UINT16 storage;
+
+ ctl = OvsGetPacketBytes(packet, sizeof *ctl, layers->l4Offset + TCP_CTL_OFS,
+ &storage);
+ return ctl ? *ctl : 0;
+}
+
+
+static UINT8
+OvsGetTcpFlags(const NET_BUFFER_LIST *packet, // IN
+ const OvsFlowKey *key, // IN
+ const POVS_PACKET_HDR_INFO layers) // IN
+{
+ UNREFERENCED_PARAMETER(key); // should be removed later
+
+ if (layers->isTcp) {
+ return TCP_FLAGS(OvsGetTcpCtl(packet, layers));
+ } else {
+ return 0;
+ }
+}
+
+static const EtherArp *
+OvsGetArp(const NET_BUFFER_LIST *packet,
+ UINT32 ofs,
+ EtherArp *storage)
+{
+ return OvsGetPacketBytes(packet, sizeof *storage, ofs, storage);
+}
+
+static const IPHdr *
+OvsGetIp(const NET_BUFFER_LIST *packet,
+ UINT32 ofs,
+ IPHdr *storage)
+{
+ const IPHdr *ip = OvsGetPacketBytes(packet, sizeof *ip, ofs, storage);
+ if (ip) {
+ int ipLen = ip->ihl * 4;
+ if (ipLen >= sizeof *ip && OvsPacketLenNBL(packet) >= ofs + ipLen) {
+ return ip;
+ }
+ }
+ return NULL;
+}
+
+static const TCPHdr *
+OvsGetTcp(const NET_BUFFER_LIST *packet,
+ UINT32 ofs,
+ TCPHdr *storage)
+{
+ const TCPHdr *tcp = OvsGetPacketBytes(packet, sizeof *tcp, ofs, storage);
+ if (tcp) {
+ int tcpLen = tcp->doff * 4;
+ if (tcpLen >= sizeof *tcp && OvsPacketLenNBL(packet) >= ofs + tcpLen) {
+ return tcp;
+ }
+ }
+ return NULL;
+}
+
+static const UDPHdr *
+OvsGetUdp(const NET_BUFFER_LIST *packet,
+ UINT32 ofs,
+ UDPHdr *storage)
+{
+ return OvsGetPacketBytes(packet, sizeof *storage, ofs, storage);
+}
+
+static const ICMPHdr *
+OvsGetIcmp(const NET_BUFFER_LIST *packet,
+ UINT32 ofs,
+ ICMPHdr *storage)
+{
+ return OvsGetPacketBytes(packet, sizeof *storage, ofs, storage);
+}
+
+#endif /* __OVS_PACKET_PARSER_H_ */
diff --git a/datapath-windows/ovsext/OvsSwitch.c b/datapath-windows/ovsext/OvsSwitch.c
new file mode 100644
index 000000000..97ce2aec9
--- /dev/null
+++ b/datapath-windows/ovsext/OvsSwitch.c
@@ -0,0 +1,529 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file contains the implementation of the management functionality of the
+ * OVS.
+ */
+
+#include "precomp.h"
+
+#include "OvsIoctl.h"
+#include "OvsSwitch.h"
+#include "OvsVport.h"
+#include "OvsEvent.h"
+#include "OvsFlow.h"
+#include "OvsIpHelper.h"
+#include "OvsTunnelIntf.h"
+#include "OvsOid.h"
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_SWITCH
+#include "OvsDebug.h"
+
+POVS_SWITCH_CONTEXT gOvsSwitchContext;
+BOOLEAN gOvsInAttach;
+extern PNDIS_SPIN_LOCK gOvsCtrlLock;
+extern NDIS_HANDLE gOvsExtDriverHandle;
+extern NDIS_HANDLE gOvsExtDriverObject;
+
+static NDIS_STATUS OvsCreateSwitch(NDIS_HANDLE ndisFilterHandle,
+ POVS_SWITCH_CONTEXT *switchContextOut);
+static NDIS_STATUS OvsInitSwitchContext(POVS_SWITCH_CONTEXT switchContext);
+static VOID OvsDeleteSwitch(POVS_SWITCH_CONTEXT switchContext);
+static VOID OvsCleanupSwitchContext(POVS_SWITCH_CONTEXT switchContext);
+static NDIS_STATUS OvsActivateSwitch(POVS_SWITCH_CONTEXT switchContext);
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Implements filter driver's FilterAttach function.
+ *
+ * This function allocates the switch context, and initializes its necessary
+ * members.
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsExtAttach(NDIS_HANDLE ndisFilterHandle,
+ NDIS_HANDLE filterDriverContext,
+ PNDIS_FILTER_ATTACH_PARAMETERS attachParameters)
+{
+ NDIS_STATUS status = NDIS_STATUS_FAILURE;
+ NDIS_FILTER_ATTRIBUTES ovsExtAttributes;
+ POVS_SWITCH_CONTEXT switchContext = NULL;
+
+ UNREFERENCED_PARAMETER(filterDriverContext);
+
+ OVS_LOG_TRACE("Enter: ndisFilterHandle %p", ndisFilterHandle);
+
+ ASSERT(filterDriverContext == (NDIS_HANDLE)gOvsExtDriverObject);
+ if (attachParameters->MiniportMediaType != NdisMedium802_3) {
+ status = NDIS_STATUS_INVALID_PARAMETER;
+ goto cleanup;
+ }
+
+ if (gOvsExtDriverHandle == NULL) {
+ OVS_LOG_TRACE("Exit: OVSEXT driver is not loaded.");
+ ASSERT(FALSE);
+ goto cleanup;
+ }
+
+ NdisAcquireSpinLock(gOvsCtrlLock);
+ if (gOvsSwitchContext) {
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ OVS_LOG_TRACE("Exit: Failed to create OVS Switch, only one datapath is"
+ "supported, %p.", gOvsSwitchContext);
+ goto cleanup;
+ }
+ if (gOvsInAttach) {
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ /* Just fail the request. */
+ OVS_LOG_TRACE("Exit: Failed to create OVS Switch, since another attach"
+ "instance is in attach process.");
+ goto cleanup;
+ }
+ gOvsInAttach = TRUE;
+ NdisReleaseSpinLock(gOvsCtrlLock);
+
+ status = OvsInitIpHelper(ndisFilterHandle);
+ if (status != STATUS_SUCCESS) {
+ OVS_LOG_ERROR("Exit: Failed to initialize IP helper.");
+ goto cleanup;
+ }
+
+ status = OvsCreateSwitch(ndisFilterHandle, &switchContext);
+ if (status != NDIS_STATUS_SUCCESS) {
+ OvsCleanupIpHelper();
+ goto cleanup;
+ }
+ ASSERT(switchContext);
+
+ /*
+ * Register the switch context with NDIS so NDIS can pass it back to the
+ * Filterxxx callback functions as the 'FilterModuleContext' parameter.
+ */
+ RtlZeroMemory(&ovsExtAttributes, sizeof(NDIS_FILTER_ATTRIBUTES));
+ ovsExtAttributes.Header.Revision = NDIS_FILTER_ATTRIBUTES_REVISION_1;
+ ovsExtAttributes.Header.Size = sizeof(NDIS_FILTER_ATTRIBUTES);
+ ovsExtAttributes.Header.Type = NDIS_OBJECT_TYPE_FILTER_ATTRIBUTES;
+ ovsExtAttributes.Flags = 0;
+
+ NDIS_DECLARE_FILTER_MODULE_CONTEXT(OVS_SWITCH_CONTEXT);
+ status = NdisFSetAttributes(ndisFilterHandle, switchContext, &ovsExtAttributes);
+ if (status != NDIS_STATUS_SUCCESS) {
+ OVS_LOG_ERROR("Failed to set attributes.");
+ OvsCleanupIpHelper();
+ goto cleanup;
+ }
+
+ /* Setup the state machine. */
+ switchContext->controlFlowState = OvsSwitchAttached;
+ switchContext->dataFlowState = OvsSwitchPaused;
+
+ gOvsSwitchContext = switchContext;
+ KeMemoryBarrier();
+
+cleanup:
+ gOvsInAttach = FALSE;
+ if (status != NDIS_STATUS_SUCCESS) {
+ if (switchContext != NULL) {
+ OvsDeleteSwitch(switchContext);
+ }
+ }
+ OVS_LOG_TRACE("Exit: status %x", status);
+
+ return status;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * This function allocated the switch context, and initializes its necessary
+ * members.
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsCreateSwitch(NDIS_HANDLE ndisFilterHandle,
+ POVS_SWITCH_CONTEXT *switchContextOut)
+{
+ NDIS_STATUS status;
+ POVS_SWITCH_CONTEXT switchContext;
+ NDIS_SWITCH_CONTEXT hostSwitchContext;
+ NDIS_SWITCH_OPTIONAL_HANDLERS hostSwitchHandler;
+
+ OVS_LOG_TRACE("Enter: Create switch object");
+
+ switchContext =
+ (POVS_SWITCH_CONTEXT) OvsAllocateMemory(sizeof(OVS_SWITCH_CONTEXT));
+ if (switchContext == NULL) {
+ status = NDIS_STATUS_RESOURCES;
+ goto create_switch_done;
+ }
+ RtlZeroMemory(switchContext, sizeof(OVS_SWITCH_CONTEXT));
+
+ /* Initialize the switch. */
+ hostSwitchHandler.Header.Type = NDIS_OBJECT_TYPE_SWITCH_OPTIONAL_HANDLERS;
+ hostSwitchHandler.Header.Size = NDIS_SIZEOF_SWITCH_OPTIONAL_HANDLERS_REVISION_1;
+ hostSwitchHandler.Header.Revision = NDIS_SWITCH_OPTIONAL_HANDLERS_REVISION_1;
+
+ status = NdisFGetOptionalSwitchHandlers(ndisFilterHandle,
+ &hostSwitchContext,
+ &hostSwitchHandler);
+ if (status != NDIS_STATUS_SUCCESS) {
+ OVS_LOG_ERROR("OvsExtAttach: Extension is running in "
+ "non-switch environment.");
+ OvsFreeMemory(switchContext);
+ goto create_switch_done;
+ }
+
+ switchContext->NdisFilterHandle = ndisFilterHandle;
+ switchContext->NdisSwitchContext = hostSwitchContext;
+ RtlCopyMemory(&switchContext->NdisSwitchHandlers, &hostSwitchHandler,
+ sizeof(NDIS_SWITCH_OPTIONAL_HANDLERS));
+
+ status = OvsInitSwitchContext(switchContext);
+ if (status != NDIS_STATUS_SUCCESS) {
+ OvsFreeMemory(switchContext);
+ goto create_switch_done;
+ }
+
+ status = OvsTunnelFilterInitialize(gOvsExtDriverObject);
+ if (status != NDIS_STATUS_SUCCESS) {
+ OvsFreeMemory(switchContext);
+ goto create_switch_done;
+ }
+ *switchContextOut = switchContext;
+
+create_switch_done:
+ OVS_LOG_TRACE("Exit: switchContext: %p status: %#lx",
+ switchContext, status);
+ return status;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Implements filter driver's FilterDetach function.
+ * --------------------------------------------------------------------------
+ */
+_Use_decl_annotations_
+VOID
+OvsExtDetach(NDIS_HANDLE filterModuleContext)
+{
+ POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)filterModuleContext;
+
+ OVS_LOG_TRACE("Enter: filterModuleContext %p", filterModuleContext);
+
+ ASSERT(switchContext->dataFlowState == OvsSwitchPaused);
+ switchContext->controlFlowState = OvsSwitchDetached;
+ KeMemoryBarrier();
+ while(switchContext->pendingOidCount > 0) {
+ NdisMSleep(1000);
+ }
+ OvsDeleteSwitch(switchContext);
+ OvsCleanupIpHelper();
+ gOvsSwitchContext = NULL;
+ /* This completes the cleanup, and a new attach can be handled now. */
+
+ OVS_LOG_TRACE("Exit: OvsDetach Successfully");
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * This function deletes the switch by freeing all memory previously allocated.
+ * XXX need synchronization with other path.
+ * --------------------------------------------------------------------------
+ */
+VOID
+OvsDeleteSwitch(POVS_SWITCH_CONTEXT switchContext)
+{
+ UINT32 dpNo = switchContext->dpNo;
+
+ OVS_LOG_TRACE("Enter: switchContext:%p", switchContext);
+
+ OvsTunnelFilterUninitialize(gOvsExtDriverObject);
+ OvsClearAllSwitchVports(switchContext);
+ OvsCleanupSwitchContext(switchContext);
+ OvsFreeMemory(switchContext);
+ OVS_LOG_TRACE("Exit: deleted switch %p dpNo: %d", switchContext, dpNo);
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Implements filter driver's FilterRestart function.
+ * --------------------------------------------------------------------------
+ */
+_Use_decl_annotations_
+NDIS_STATUS
+OvsExtRestart(NDIS_HANDLE filterModuleContext,
+ PNDIS_FILTER_RESTART_PARAMETERS filterRestartParameters)
+{
+ POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)filterModuleContext;
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+ BOOLEAN switchActive;
+
+ UNREFERENCED_PARAMETER(filterRestartParameters);
+
+ OVS_LOG_TRACE("Enter: filterModuleContext %p",
+ filterModuleContext);
+
+ /* Activate the switch if this is the first restart. */
+ if (!switchContext->isActivated && !switchContext->isActivateFailed) {
+ status = OvsQuerySwitchActivationComplete(switchContext,
+ &switchActive);
+ if (status != NDIS_STATUS_SUCCESS) {
+ switchContext->isActivateFailed = TRUE;
+ status = NDIS_STATUS_RESOURCES;
+ goto cleanup;
+ }
+
+ if (switchActive) {
+ status = OvsActivateSwitch(switchContext);
+
+ if (status != NDIS_STATUS_SUCCESS) {
+ OVS_LOG_WARN("Failed to activate switch, dpNo:%d",
+ switchContext->dpNo);
+ status = NDIS_STATUS_RESOURCES;
+ goto cleanup;
+ }
+ }
+ }
+
+ ASSERT(switchContext->dataFlowState == OvsSwitchPaused);
+ switchContext->dataFlowState = OvsSwitchRunning;
+
+cleanup:
+ OVS_LOG_TRACE("Exit: Restart switch:%p, dpNo: %d, status: %#x",
+ switchContext, switchContext->dpNo, status);
+ return status;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Implements filter driver's FilterPause function
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsExtPause(NDIS_HANDLE filterModuleContext,
+ PNDIS_FILTER_PAUSE_PARAMETERS pauseParameters)
+{
+ POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)filterModuleContext;
+
+ UNREFERENCED_PARAMETER(pauseParameters);
+ OVS_LOG_TRACE("Enter: filterModuleContext %p",
+ filterModuleContext);
+
+ ASSERT(switchContext->dataFlowState == OvsSwitchRunning);
+ switchContext->dataFlowState = OvsSwitchPaused;
+ KeMemoryBarrier();
+ while(switchContext->pendingOidCount > 0) {
+ NdisMSleep(1000);
+ }
+
+ OVS_LOG_TRACE("Exit: OvsDetach Successfully");
+ return NDIS_STATUS_SUCCESS;
+}
+
+static NDIS_STATUS
+OvsInitSwitchContext(POVS_SWITCH_CONTEXT switchContext)
+{
+ int i;
+ NTSTATUS status;
+
+ OVS_LOG_TRACE("Enter: switchContext: %p", switchContext);
+
+ switchContext->dispatchLock =
+ NdisAllocateRWLock(switchContext->NdisFilterHandle);
+
+ switchContext->vportArray =
+ (PVOID *)OvsAllocateMemory(sizeof (PVOID) * OVS_MAX_VPORT_ARRAY_SIZE);
+ switchContext->nameHashArray = (PLIST_ENTRY)
+ OvsAllocateMemory(sizeof (LIST_ENTRY) * OVS_MAX_VPORT_ARRAY_SIZE);
+ switchContext->portHashArray = (PLIST_ENTRY)
+ OvsAllocateMemory(sizeof (LIST_ENTRY) * OVS_MAX_VPORT_ARRAY_SIZE);
+ status = OvsAllocateFlowTable(&switchContext->datapath, switchContext);
+
+ if (status == NDIS_STATUS_SUCCESS) {
+ status = OvsInitBufferPool(switchContext);
+ }
+ if (status != NDIS_STATUS_SUCCESS ||
+ switchContext->dispatchLock == NULL ||
+ switchContext->vportArray == NULL ||
+ switchContext->nameHashArray == NULL ||
+ switchContext->portHashArray == NULL) {
+ if (switchContext->dispatchLock) {
+ NdisFreeRWLock(switchContext->dispatchLock);
+ }
+ if (switchContext->vportArray) {
+ OvsFreeMemory(switchContext->vportArray);
+ }
+ if (switchContext->nameHashArray) {
+ OvsFreeMemory(switchContext->nameHashArray);
+ }
+ if (switchContext->portHashArray) {
+ OvsFreeMemory(switchContext->portHashArray);
+ }
+ OvsDeleteFlowTable(&switchContext->datapath);
+ OvsCleanupBufferPool(switchContext);
+
+ OVS_LOG_TRACE("Exit: Failed to init switchContext");
+ return NDIS_STATUS_RESOURCES;
+ }
+
+ for (i = 0; i < OVS_MAX_VPORT_ARRAY_SIZE; i++) {
+ InitializeListHead(&switchContext->nameHashArray[i]);
+ }
+ for (i = 0; i < OVS_MAX_VPORT_ARRAY_SIZE; i++) {
+ InitializeListHead(&switchContext->portHashArray[i]);
+ }
+ RtlZeroMemory(switchContext->vportArray,
+ sizeof (PVOID) * OVS_MAX_VPORT_ARRAY_SIZE);
+
+ switchContext->isActivated = FALSE;
+ switchContext->isActivateFailed = FALSE;
+ switchContext->dpNo = OVS_DP_NUMBER;
+ switchContext->lastPortIndex = OVS_MAX_VPORT_ARRAY_SIZE -1;
+ ovsTimeIncrementPerTick = KeQueryTimeIncrement() / 10000;
+ OVS_LOG_TRACE("Exit: Succesfully initialized switchContext: %p",
+ switchContext);
+ return NDIS_STATUS_SUCCESS;
+}
+
+static VOID
+OvsCleanupSwitchContext(POVS_SWITCH_CONTEXT switchContext)
+{
+ OVS_LOG_TRACE("Enter: Delete switchContext:%p", switchContext);
+
+ /* We need to do cleanup for tunnel port here. */
+ ASSERT(switchContext->numVports == 0);
+
+ NdisFreeRWLock(switchContext->dispatchLock);
+ OvsFreeMemory(switchContext->nameHashArray);
+ OvsFreeMemory(switchContext->portHashArray);
+ OvsFreeMemory(switchContext->vportArray);
+ OvsDeleteFlowTable(&switchContext->datapath);
+ OvsCleanupBufferPool(switchContext);
+ OVS_LOG_TRACE("Exit: Delete switchContext: %p", switchContext);
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * This function activates the switch by initializing it with all the runtime
+ * state. First it queries all of the MAC addresses set as custom switch policy
+ * to allow sends from, and adds tme to the property list. Then it queries the
+ * NIC list and verifies it can support all of the NICs currently connected to
+ * the switch, and adds the NICs to the NIC list.
+ * --------------------------------------------------------------------------
+ */
+static NDIS_STATUS
+OvsActivateSwitch(POVS_SWITCH_CONTEXT switchContext)
+{
+ NDIS_STATUS status;
+
+ ASSERT(!switchContext->isActivated);
+
+ OVS_LOG_TRACE("Enter: activate switch %p, dpNo: %ld",
+ switchContext, switchContext->dpNo);
+
+ status = OvsAddConfiguredSwitchPorts(switchContext);
+
+ if (status != NDIS_STATUS_SUCCESS) {
+ OVS_LOG_WARN("Failed to add configured switch ports");
+ goto cleanup;
+
+ }
+ status = OvsInitConfiguredSwitchNics(switchContext);
+
+ if (status != NDIS_STATUS_SUCCESS) {
+ OVS_LOG_WARN("Failed to add configured vports");
+ OvsClearAllSwitchVports(switchContext);
+ goto cleanup;
+ }
+ switchContext->isActivated = TRUE;
+ OvsPostEvent(OVS_DEFAULT_PORT_NO, OVS_DEFAULT_EVENT_STATUS);
+
+cleanup:
+ OVS_LOG_TRACE("Exit: activate switch:%p, isActivated: %s, status = %lx",
+ switchContext,
+ (switchContext->isActivated ? "TRUE" : "FALSE"), status);
+ return status;
+}
+
+PVOID
+OvsGetVportFromIndex(UINT16 index)
+{
+ if (index < OVS_MAX_VPORT_ARRAY_SIZE &&
+ !OVS_IS_VPORT_ENTRY_NULL(gOvsSwitchContext, index)) {
+ return gOvsSwitchContext->vportArray[index];
+ }
+ return NULL;
+}
+
+PVOID
+OvsGetExternalVport()
+{
+ return gOvsSwitchContext->externalVport;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Implements filter driver's FilterNetPnPEvent function.
+ * --------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsExtNetPnPEvent(NDIS_HANDLE filterModuleContext,
+ PNET_PNP_EVENT_NOTIFICATION netPnPEvent)
+{
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+ POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)filterModuleContext;
+ BOOLEAN switchActive;
+
+ OVS_LOG_TRACE("Enter: filterModuleContext: %p, NetEvent: %d",
+ filterModuleContext, (netPnPEvent->NetPnPEvent).NetEvent);
+ /*
+ * The only interesting event is the NetEventSwitchActivate. It provides
+ * an asynchronous notification of the switch completing activation.
+ */
+ if (netPnPEvent->NetPnPEvent.NetEvent == NetEventSwitchActivate) {
+ status = OvsQuerySwitchActivationComplete(switchContext, &switchActive);
+ if (status != NDIS_STATUS_SUCCESS) {
+ switchContext->isActivateFailed = TRUE;
+ } else {
+ ASSERT(switchContext->isActivated == FALSE);
+ ASSERT(switchActive == TRUE);
+ if (switchContext->isActivated == FALSE && switchActive == TRUE) {
+ status = OvsActivateSwitch(switchContext);
+ OVS_LOG_TRACE("OvsExtNetPnPEvent: activated switch: %p "
+ "status: %s", switchContext,
+ status ? "TRUE" : "FALSE");
+ }
+ }
+ }
+
+ if (status == NDIS_STATUS_SUCCESS) {
+ status = NdisFNetPnPEvent(switchContext->NdisFilterHandle,
+ netPnPEvent);
+ }
+ OVS_LOG_TRACE("Exit: OvsExtNetPnPEvent");
+
+ return status;
+}
diff --git a/datapath-windows/ovsext/OvsSwitch.h b/datapath-windows/ovsext/OvsSwitch.h
new file mode 100644
index 000000000..d49fe9b70
--- /dev/null
+++ b/datapath-windows/ovsext/OvsSwitch.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file contains the definition of the switch object for the OVS.
+ */
+
+#ifndef __OVS_SWITCH_H_
+#define __OVS_SWITCH_H_ 1
+
+#include "OvsNetProto.h"
+#include "OvsBufferMgmt.h"
+#define OVS_MAX_VPORT_ARRAY_SIZE 1024
+
+#define OVS_VPORT_MASK (OVS_MAX_VPORT_ARRAY_SIZE - 1)
+
+#define OVS_INTERNAL_VPORT_DEFAULT_INDEX 0
+
+//Tunnel port indicies
+#define RESERVED_START_INDEX1 1
+#define OVS_TUNNEL_INDEX_START RESERVED_START_INDEX1
+#define OVS_VXLAN_VPORT_INDEX 2
+#define OVS_GRE_VPORT_INDEX 3
+#define OVS_GRE64_VPORT_INDEX 4
+#define OVS_TUNNEL_INDEX_END OVS_GRE64_VPORT_INDEX
+
+#define OVS_EXTERNAL_VPORT_START 8
+#define OVS_EXTERNAL_VPORT_END 40
+#define OVS_INTERNAL_VPORT_START 40
+#define OVS_INTERNAL_VPOR_END 72
+#define OVS_VM_VPORT_START 72
+#define OVS_VM_VPORT_MAX 0xffff
+#define OVS_VPORT_INDEX(_portNo) ((_portNo) & 0xffffff)
+#define OVS_VPORT_PORT_NO(_index, _gen) \
+ (((_index) & 0xffffff) | ((UINT32)(_gen) << 24))
+#define OVS_VPORT_GEN(portNo) (portNo >> 24)
+
+#define OVS_MAX_PHYS_ADAPTERS 32
+#define OVS_MAX_IP_VPOR 32
+
+#define OVS_HASH_BASIS 0x13578642
+
+typedef struct _OVS_DATAPATH
+{
+ PLIST_ENTRY flowTable; // Contains OvsFlows.
+ UINT32 nFlows; // Number of entries in flowTable.
+
+ // List_Links queues[64]; // Hash table of queue IDs.
+
+ /* Statistics. */
+ UINT64 hits; // Number of flow table hits.
+ UINT64 misses; // Number of flow table misses.
+ UINT64 lost; // Number of dropped misses.
+
+ /* Used to protect the flows in the flowtable. */
+ PNDIS_RW_LOCK_EX lock;
+} OVS_DATAPATH, *POVS_DATAPATH;
+
+/*
+ * OVS_SWITCH_CONTEXT
+ *
+ * The context allocated per switch., For OVS, we only
+ * support one switch which corresponding to one datapath.
+ * Each datapath can have multiple logical bridges configured
+ * which is maintained by vswitchd.
+ */
+
+typedef enum OVS_SWITCH_DATAFLOW_STATE
+{
+ OvsSwitchPaused,
+ OvsSwitchRunning
+} OVS_SWITCH_DATAFLOW_STATE, *POVS_SWITCH_DATAFLOW_STATE;
+
+typedef enum OVS_SWITCH_CONTROFLOW_STATE
+{
+ OvsSwitchUnknown,
+ OvsSwitchAttached,
+ OvsSwitchDetached
+} OVS_SWITCH_CONTROLFLOW_STATE, *POVS_SWITCH_CONTROLFLOW_STATE;
+
+// XXX: Take care of alignment and grouping members by cacheline
+typedef struct _OVS_SWITCH_CONTEXT
+{
+ /* Coarse and fine-grained switch states. */
+ OVS_SWITCH_DATAFLOW_STATE dataFlowState;
+ OVS_SWITCH_CONTROLFLOW_STATE controlFlowState;
+ BOOLEAN isActivated;
+ BOOLEAN isActivateFailed;
+
+ UINT32 dpNo;
+
+ NDIS_SWITCH_PORT_ID externalPortId;
+ NDIS_SWITCH_PORT_ID internalPortId;
+ PVOID externalVport; // the virtual adapter vport
+ PVOID internalVport;
+
+ PVOID *vportArray;
+ PLIST_ENTRY nameHashArray; // based on ovsName
+ PLIST_ENTRY portHashArray; // based on portId
+
+ UINT32 numPhysicalNics;
+ UINT32 numVports; // include validation port
+ UINT32 lastPortIndex;
+
+ /* Lock taken over the switch. This protects the ports on the switch. */
+ PNDIS_RW_LOCK_EX dispatchLock;
+
+ /* The flowtable. */
+ OVS_DATAPATH datapath;
+
+ /* Handle to the OVSExt filter driver. Same as 'gOvsExtDriverHandle'. */
+ NDIS_HANDLE NdisFilterHandle;
+
+ /* Handle and callbacks exposed by the underlying hyper-v switch. */
+ NDIS_SWITCH_CONTEXT NdisSwitchContext;
+ NDIS_SWITCH_OPTIONAL_HANDLERS NdisSwitchHandlers;
+
+ volatile LONG pendingInjectedNblCount;
+ volatile LONG pendingOidCount;
+
+ OVS_NBL_POOL ovsPool;
+} OVS_SWITCH_CONTEXT, *POVS_SWITCH_CONTEXT;
+
+
+static __inline VOID
+OvsAcquireDatapathRead(OVS_DATAPATH *datapath,
+ LOCK_STATE_EX *lockState,
+ BOOLEAN dispatch)
+{
+ ASSERT(datapath);
+ NdisAcquireRWLockRead(datapath->lock, lockState, dispatch);
+}
+
+static __inline VOID
+OvsAcquireDatapathWrite(OVS_DATAPATH *datapath,
+ LOCK_STATE_EX *lockState,
+ BOOLEAN dispatch)
+{
+ ASSERT(datapath);
+ NdisAcquireRWLockWrite(datapath->lock, lockState, dispatch);
+}
+
+
+static __inline VOID
+OvsReleaseDatapath(OVS_DATAPATH *datapath,
+ LOCK_STATE_EX *lockState)
+{
+ ASSERT(datapath);
+ NdisReleaseRWLock(datapath->lock, lockState);
+}
+
+
+PVOID OvsGetVportFromIndex(UINT16 index);
+PVOID OvsGetExternalVport();
+
+#endif /* __OVS_SWITCH_H_ */
diff --git a/datapath-windows/ovsext/OvsTunnel.c b/datapath-windows/ovsext/OvsTunnel.c
new file mode 100644
index 000000000..b5a369a9d
--- /dev/null
+++ b/datapath-windows/ovsext/OvsTunnel.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * OvsTunnel.c
+ * WFP Classified callback function and Action code for injecting a packet to the vswitch
+ */
+
+#include "precomp.h"
+
+#pragma warning(push)
+#pragma warning(disable:4201) // unnamed struct/union
+#include <fwpsk.h>
+#pragma warning(pop)
+
+#pragma warning( push )
+#pragma warning( disable:4127 )
+
+#include <fwpmk.h>
+#include "OvsTunnel.h"
+#include "OvsIoctl.h"
+#include "OvsSwitch.h"
+#include "OvsVport.h"
+#include "OvsEvent.h"
+#include "OvsUser.h"
+#include "OvsVxlan.h"
+#include "OvsPacketIO.h"
+#include "OvsNetProto.h"
+#include "OvsFlow.h"
+
+extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
+
+static NTSTATUS
+OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl,
+ OVS_TUNNEL_PENDED_PACKET *packet);
+
+VOID OvsAcquireDatapathRead(OVS_DATAPATH *datapath,
+ LOCK_STATE_EX *lockState,
+ BOOLEAN dispatch);
+VOID OvsAcquireDatapathWrite(OVS_DATAPATH *datapath,
+ LOCK_STATE_EX *lockState,
+ BOOLEAN dispatch);
+VOID OvsReleaseDatapath(OVS_DATAPATH *datapath,
+ LOCK_STATE_EX *lockState);
+
+
+NTSTATUS
+OvsTunnelNotify(FWPS_CALLOUT_NOTIFY_TYPE notifyType,
+ const GUID *filterKey,
+ const FWPS_FILTER *filter)
+{
+ UNREFERENCED_PARAMETER(notifyType);
+ UNREFERENCED_PARAMETER(filterKey);
+ UNREFERENCED_PARAMETER(filter);
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS
+OvsTunnelAnalyzePacket(OVS_TUNNEL_PENDED_PACKET *packet)
+{
+ NTSTATUS status = STATUS_SUCCESS;
+ UINT32 packetLength = 0;
+ ULONG bytesCopied = 0;
+ NET_BUFFER_LIST *copiedNBL = NULL;
+ NET_BUFFER *netBuffer;
+ NDIS_STATUS ndisStatus;
+
+ /*
+ * For inbound net buffer list, we can assume it contains only one
+ * net buffer (unless it was an re-assembeled fragments). in both cases
+ * the first net buffer should include all headers, we assert if the retreat fails
+ */
+ netBuffer = NET_BUFFER_LIST_FIRST_NB(packet->netBufferList);
+
+ /* Drop the packet from the host stack */
+ packet->classifyOut->actionType = FWP_ACTION_BLOCK;
+ packet->classifyOut->rights &= ~FWPS_RIGHT_ACTION_WRITE;
+
+ /* Adjust the net buffer list offset to the start of the IP header */
+ ndisStatus = NdisRetreatNetBufferDataStart(netBuffer,
+ packet->ipHeaderSize +
+ packet->transportHeaderSize,
+ 0, NULL);
+ ASSERT(ndisStatus == NDIS_STATUS_SUCCESS);
+
+ /* Single NBL element for WFP */
+ ASSERT(packet->netBufferList->Next == NULL);
+
+ /* Note that the copy will inherit the original net buffer list's offset */
+ packetLength = NET_BUFFER_DATA_LENGTH(netBuffer);
+ copiedNBL = OvsAllocateVariableSizeNBL(gOvsSwitchContext, packetLength,
+ OVS_DEFAULT_HEADROOM_SIZE);
+
+ if (copiedNBL == NULL) {
+ goto analyzeDone;
+ }
+
+ status = NdisCopyFromNetBufferToNetBuffer(NET_BUFFER_LIST_FIRST_NB(copiedNBL),
+ 0, packetLength,
+ netBuffer, 0, &bytesCopied);
+ if (status != NDIS_STATUS_SUCCESS || packetLength != bytesCopied) {
+ goto analyzeFreeNBL;
+ }
+
+ status = OvsInjectPacketThroughActions(copiedNBL,
+ packet);
+ goto analyzeDone;
+
+ /* Undo the adjustment on the original net buffer list */
+analyzeFreeNBL:
+ OvsCompleteNBL(gOvsSwitchContext, copiedNBL, TRUE);
+analyzeDone:
+ NdisAdvanceNetBufferDataStart(netBuffer,
+ packet->transportHeaderSize + packet->ipHeaderSize,
+ FALSE,
+ NULL);
+ return status;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * This is the classifyFn function of the datagram-data callout. It
+ * allocates a packet structure to store the classify and meta data and
+ * it references the net buffer list for out-of-band modification and
+ * re-injection. The packet structure will be queued to the global packet
+ * queue. The worker thread will then be signaled, if idle, to process
+ * the queue.
+ * --------------------------------------------------------------------------
+ */
+VOID
+OvsTunnelClassify(const FWPS_INCOMING_VALUES *inFixedValues,
+ const FWPS_INCOMING_METADATA_VALUES *inMetaValues,
+ VOID *layerData,
+ const VOID *classifyContext,
+ const FWPS_FILTER *filter,
+ UINT64 flowContext,
+ FWPS_CLASSIFY_OUT *classifyOut)
+{
+ OVS_TUNNEL_PENDED_PACKET packetStorage;
+ OVS_TUNNEL_PENDED_PACKET *packet = &packetStorage;
+ FWP_DIRECTION direction;
+
+ UNREFERENCED_PARAMETER(classifyContext);
+ UNREFERENCED_PARAMETER(filter);
+ UNREFERENCED_PARAMETER(flowContext);
+
+ ASSERT(layerData != NULL);
+
+ /* We don't have the necessary right to alter the packet flow */
+ if ((classifyOut->rights & FWPS_RIGHT_ACTION_WRITE) == 0) {
+ /* XXX TBD revisit protect against other filters owning this packet */
+ ASSERT(FALSE);
+ goto Exit;
+ }
+
+ RtlZeroMemory(packet, sizeof(OVS_TUNNEL_PENDED_PACKET));
+
+ /* classifyOut cannot be accessed from a different thread context */
+ packet->classifyOut = classifyOut;
+
+ if (inFixedValues->layerId == FWPS_LAYER_DATAGRAM_DATA_V4) {
+ direction =
+ inFixedValues->incomingValue[FWPS_FIELD_DATAGRAM_DATA_V4_DIRECTION].\
+ value.uint32;
+ }
+ else {
+ ASSERT(inFixedValues->layerId == FWPS_LAYER_DATAGRAM_DATA_V6);
+ direction =
+ inFixedValues->incomingValue[FWPS_FIELD_DATAGRAM_DATA_V6_DIRECTION].\
+ value.uint32;
+ }
+
+ packet->netBufferList = layerData;
+
+ ASSERT(FWPS_IS_METADATA_FIELD_PRESENT(inMetaValues,
+ FWPS_METADATA_FIELD_COMPARTMENT_ID));
+
+ ASSERT(direction == FWP_DIRECTION_INBOUND);
+
+ ASSERT(FWPS_IS_METADATA_FIELD_PRESENT(
+ inMetaValues,
+ FWPS_METADATA_FIELD_IP_HEADER_SIZE));
+ ASSERT(FWPS_IS_METADATA_FIELD_PRESENT(
+ inMetaValues,
+ FWPS_METADATA_FIELD_TRANSPORT_HEADER_SIZE));
+
+ packet->ipHeaderSize = inMetaValues->ipHeaderSize;
+ packet->transportHeaderSize = inMetaValues->transportHeaderSize;
+
+ ASSERT(inFixedValues->incomingValue[FWPS_FIELD_DATAGRAM_DATA_V4_IP_PROTOCOL].value.uint8 == IPPROTO_UDP );
+ OvsTunnelAnalyzePacket(packet);
+
+Exit:
+ ;
+}
+
+
+static NTSTATUS
+OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl,
+ OVS_TUNNEL_PENDED_PACKET *packet)
+{
+ NTSTATUS status = STATUS_SUCCESS;
+ OvsIPv4TunnelKey tunnelKey;
+ NET_BUFFER *pNb;
+ ULONG sendCompleteFlags = 0;
+ BOOLEAN dispatch;
+ PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
+ LOCK_STATE_EX lockState, dpLockState;
+ LIST_ENTRY missedPackets;
+ OvsCompletionList completionList;
+ KIRQL irql;
+ ULONG SendFlags = NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP;
+ OVS_DATAPATH *datapath = &gOvsSwitchContext->datapath;
+
+ ASSERT(gOvsSwitchContext);
+
+ /* Fill the tunnel key */
+ status = OvsSlowPathDecapVxlan(pNbl, &tunnelKey);
+
+ if(!NT_SUCCESS(status)) {
+ goto dropit;
+ }
+
+ pNb = NET_BUFFER_LIST_FIRST_NB(pNbl);
+
+ NdisAdvanceNetBufferDataStart(pNb,
+ packet->transportHeaderSize + packet->ipHeaderSize +
+ sizeof(VXLANHdr),
+ FALSE,
+ NULL);
+
+ /* Most likely (always) dispatch irql */
+ irql = KeGetCurrentIrql();
+
+ /* dispatch is used for datapath lock as well */
+ dispatch = (irql == DISPATCH_LEVEL) ? NDIS_RWL_AT_DISPATCH_LEVEL : 0;
+ if (dispatch) {
+ sendCompleteFlags |= NDIS_SEND_COMPLETE_FLAGS_DISPATCH_LEVEL;
+ }
+
+ InitializeListHead(&missedPackets);
+ OvsInitCompletionList(&completionList, gOvsSwitchContext,
+ sendCompleteFlags);
+
+ {
+ POVS_VPORT_ENTRY vport;
+ UINT32 portNo;
+ OVS_PACKET_HDR_INFO layers;
+ OvsFlowKey key;
+ UINT64 hash;
+ PNET_BUFFER curNb;
+ OvsFlow *flow;
+
+ fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl);
+
+ /*
+ * XXX WFP packets contain a single NBL structure.
+ * Reassembeled packet "may" have multiple NBs, however, a simple test shows
+ * that the packet still has a single NB (after reassemble)
+ * We still need to check if the Ethernet header of the innet packet is in a single MD
+ */
+
+ curNb = NET_BUFFER_LIST_FIRST_NB(pNbl);
+ ASSERT(curNb->Next == NULL);
+
+ NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, dispatch);
+
+ /* Lock the flowtable for the duration of accessing the flow */
+ OvsAcquireDatapathRead(datapath, &dpLockState, NDIS_RWL_AT_DISPATCH_LEVEL);
+
+ SendFlags |= NDIS_SEND_FLAGS_DISPATCH_LEVEL;
+
+ vport = OvsGetTunnelVport(OVSWIN_VPORT_TYPE_VXLAN);
+
+ if (vport == NULL){
+ status = STATUS_UNSUCCESSFUL;
+ goto unlockAndDrop;
+ }
+
+ ASSERT(vport->ovsType == OVSWIN_VPORT_TYPE_VXLAN);
+
+ portNo = vport->portNo;
+
+ status = OvsExtractFlow(pNbl, portNo, &key, &layers, &tunnelKey);
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto unlockAndDrop;
+ }
+
+ flow = OvsLookupFlow(datapath, &key, &hash, FALSE);
+ if (flow) {
+ OvsFlowUsed(flow, pNbl, &layers);
+ datapath->hits++;
+
+ OvsActionsExecute(gOvsSwitchContext, &completionList, pNbl,
+ portNo, SendFlags, &key, &hash, &layers,
+ flow->actions, flow->actionsLen);
+
+ OvsReleaseDatapath(datapath, &dpLockState);
+ } else {
+ POVS_PACKET_QUEUE_ELEM elem;
+
+ datapath->misses++;
+ elem = OvsCreateQueuePacket(1, NULL, 0, OVS_PACKET_CMD_MISS,
+ portNo, &key.tunKey, pNbl, curNb,
+ TRUE, &layers);
+ if (elem) {
+ /* Complete the packet since it was copied to user buffer. */
+ InsertTailList(&missedPackets, &elem->link);
+ OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, 1);
+ } else {
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ }
+ goto unlockAndDrop;
+ }
+
+ NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
+
+ }
+
+ return status;
+
+unlockAndDrop:
+ OvsReleaseDatapath(datapath, &dpLockState);
+ NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
+dropit:
+ pNbl = OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE);
+ ASSERT(pNbl == NULL);
+ return status;
+}
+
+#pragma warning(pop)
diff --git a/datapath-windows/ovsext/OvsTunnel.h b/datapath-windows/ovsext/OvsTunnel.h
new file mode 100644
index 000000000..110ff747b
--- /dev/null
+++ b/datapath-windows/ovsext/OvsTunnel.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_TUNNEL_H_
+#define __OVS_TUNNEL_H_ 1
+
+//
+// OVS_TUNNEL_PENDED_PACKET is the object type we used to store all information
+// needed for out-of-band packet modification and re-injection. This type
+// also points back to the flow context the packet belongs to.
+
+typedef struct OVS_TUNNEL_PENDED_PACKET_
+{
+ /* Common fields for inbound and outbound traffic */
+ NET_BUFFER_LIST *netBufferList;
+
+ UINT32 ipHeaderSize;
+ UINT32 transportHeaderSize;
+ FWPS_CLASSIFY_OUT *classifyOut;
+} OVS_TUNNEL_PENDED_PACKET;
+
+/* Shared global data. */
+
+extern UINT16 configNewDestPort;
+
+extern UINT32 gCalloutIdV4;
+
+//
+// Shared function prototypes
+//
+VOID OvsTunnelClassify(const FWPS_INCOMING_VALUES *inFixedValues,
+ const FWPS_INCOMING_METADATA_VALUES *inMetaValues,
+ VOID *layerData,
+ const VOID *classifyContext,
+ const FWPS_FILTER *filter,
+ UINT64 flowContext,
+ FWPS_CLASSIFY_OUT *classifyOut);
+
+
+NTSTATUS OvsTunnelNotify(FWPS_CALLOUT_NOTIFY_TYPE notifyType,
+ const GUID *filterKey,
+ const FWPS_FILTER *filter);
+
+#endif /* __OVS_TUNNEL_H_ */
diff --git a/datapath-windows/ovsext/OvsTunnelFilter.c b/datapath-windows/ovsext/OvsTunnelFilter.c
new file mode 100644
index 000000000..a1de00734
--- /dev/null
+++ b/datapath-windows/ovsext/OvsTunnelFilter.c
@@ -0,0 +1,343 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "precomp.h"
+
+#pragma warning(push)
+#pragma warning(disable:4201) // unnamed struct/union
+
+
+#include <fwpsk.h>
+
+#pragma warning(pop)
+
+#include <fwpmk.h>
+#include <ws2ipdef.h>
+#include <in6addr.h>
+#include <ip2string.h>
+
+#include "OvsTunnel.h"
+#include "OvsSwitch.h"
+#include "OvsVport.h"
+#include "OvsEvent.h"
+#include "OvsUser.h"
+#include "OvsVxlan.h"
+
+
+#define INITGUID
+#include <guiddef.h>
+
+
+/* Configurable parameters (addresses and ports are in host order) */
+UINT16 configNewDestPort = VXLAN_UDP_PORT;
+
+/*
+ * Callout and sublayer GUIDs
+ */
+// b16b0a6e-2b2a-41a3-8b39-bd3ffc855ff8
+DEFINE_GUID(
+ OVS_TUNNEL_CALLOUT_V4,
+ 0xb16b0a6e,
+ 0x2b2a,
+ 0x41a3,
+ 0x8b, 0x39, 0xbd, 0x3f, 0xfc, 0x85, 0x5f, 0xf8
+ );
+
+/* 0104fd7e-c825-414e-94c9-f0d525bbc169 */
+DEFINE_GUID(
+ OVS_TUNNEL_SUBLAYER,
+ 0x0104fd7e,
+ 0xc825,
+ 0x414e,
+ 0x94, 0xc9, 0xf0, 0xd5, 0x25, 0xbb, 0xc1, 0x69
+ );
+
+/*
+ * Callout driver global variables
+ */
+PDEVICE_OBJECT gDeviceObject;
+
+HANDLE gEngineHandle;
+UINT32 gCalloutIdV4;
+
+
+/* Callout driver implementation */
+
+NTSTATUS
+OvsTunnelAddFilter(PWSTR filterName,
+ const PWSTR filterDesc,
+ USHORT remotePort,
+ FWP_DIRECTION direction,
+ UINT64 context,
+ const GUID *layerKey,
+ const GUID *calloutKey)
+{
+ NTSTATUS status = STATUS_SUCCESS;
+ FWPM_FILTER filter = {0};
+ FWPM_FILTER_CONDITION filterConditions[3] = {0};
+ UINT conditionIndex;
+
+ UNREFERENCED_PARAMETER(remotePort);
+ UNREFERENCED_PARAMETER(direction);
+
+ filter.layerKey = *layerKey;
+ filter.displayData.name = (wchar_t*)filterName;
+ filter.displayData.description = (wchar_t*)filterDesc;
+
+ filter.action.type = FWP_ACTION_CALLOUT_TERMINATING;
+ filter.action.calloutKey = *calloutKey;
+ filter.filterCondition = filterConditions;
+ filter.subLayerKey = OVS_TUNNEL_SUBLAYER;
+ filter.weight.type = FWP_EMPTY; // auto-weight.
+ filter.rawContext = context;
+
+ conditionIndex = 0;
+
+ filterConditions[conditionIndex].fieldKey = FWPM_CONDITION_DIRECTION;
+ filterConditions[conditionIndex].matchType = FWP_MATCH_EQUAL;
+ filterConditions[conditionIndex].conditionValue.type = FWP_UINT32;
+ filterConditions[conditionIndex].conditionValue.uint32 = direction;
+
+ conditionIndex++;
+
+ filterConditions[conditionIndex].fieldKey = FWPM_CONDITION_IP_LOCAL_PORT;
+ filterConditions[conditionIndex].matchType = FWP_MATCH_EQUAL;
+ filterConditions[conditionIndex].conditionValue.type = FWP_UINT16;
+ filterConditions[conditionIndex].conditionValue.uint16 = remotePort;
+
+ conditionIndex++;
+
+ filter.numFilterConditions = conditionIndex;
+
+ status = FwpmFilterAdd(gEngineHandle,
+ &filter,
+ NULL,
+ NULL);
+
+ return status;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * This function registers callouts and filters that intercept UDP traffic at
+ * WFP FWPM_LAYER_DATAGRAM_DATA_V4
+ * --------------------------------------------------------------------------
+ */
+NTSTATUS
+OvsTunnelRegisterDatagramDataCallouts(const GUID *layerKey,
+ const GUID *calloutKey,
+ VOID *deviceObject,
+ UINT32 *calloutId)
+{
+ NTSTATUS status = STATUS_SUCCESS;
+
+ FWPS_CALLOUT sCallout = {0};
+ FWPM_CALLOUT mCallout = {0};
+
+ FWPM_DISPLAY_DATA displayData = {0};
+
+ BOOLEAN calloutRegistered = FALSE;
+
+ sCallout.calloutKey = *calloutKey;
+ sCallout.classifyFn = OvsTunnelClassify;
+ sCallout.notifyFn = OvsTunnelNotify;
+#if FLOW_CONTEXT
+ /* Currnetly we don't associate a context with the flow */
+ sCallout.flowDeleteFn = OvsTunnelFlowDelete;
+ sCallout.flags = FWP_CALLOUT_FLAG_CONDITIONAL_ON_FLOW;
+#endif
+
+ status = FwpsCalloutRegister(deviceObject,
+ &sCallout,
+ calloutId);
+
+ if (!NT_SUCCESS(status)) {
+ goto Exit;
+ }
+ calloutRegistered = TRUE;
+
+ displayData.name = L"Datagram-Data OVS Callout";
+ displayData.description = L"Proxies destination address/port for UDP";
+
+ mCallout.calloutKey = *calloutKey;
+ mCallout.displayData = displayData;
+ mCallout.applicableLayer = *layerKey;
+
+ status = FwpmCalloutAdd(gEngineHandle,
+ &mCallout,
+ NULL,
+ NULL);
+
+ if (!NT_SUCCESS(status)) {
+ goto Exit;
+ }
+
+ status = OvsTunnelAddFilter(L"Datagram-Data OVS Filter (Inbound)",
+ L"address/port for UDP",
+ configNewDestPort,
+ FWP_DIRECTION_INBOUND,
+ 0,
+ layerKey,
+ calloutKey);
+
+Exit:
+
+ if (!NT_SUCCESS(status)){
+ if (calloutRegistered) {
+ FwpsCalloutUnregisterById(*calloutId);
+ *calloutId = 0;
+ }
+ }
+
+ return status;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * This function registers dynamic callouts and filters that intercept UDP
+ * Callouts and filters will be removed during De-Initialize.
+ * --------------------------------------------------------------------------
+ */
+NTSTATUS
+OvsTunnelRegisterCallouts(VOID *deviceObject)
+{
+ NTSTATUS status = STATUS_SUCCESS;
+ FWPM_SUBLAYER OvsTunnelSubLayer;
+
+ BOOLEAN engineOpened = FALSE;
+ BOOLEAN inTransaction = FALSE;
+
+ FWPM_SESSION session = {0};
+
+ session.flags = FWPM_SESSION_FLAG_DYNAMIC;
+
+ status = FwpmEngineOpen(NULL,
+ RPC_C_AUTHN_WINNT,
+ NULL,
+ &session,
+ &gEngineHandle);
+
+ if (!NT_SUCCESS(status)) {
+ goto Exit;
+ }
+ engineOpened = TRUE;
+
+ status = FwpmTransactionBegin(gEngineHandle, 0);
+ if (!NT_SUCCESS(status)) {
+ goto Exit;
+ }
+ inTransaction = TRUE;
+
+ RtlZeroMemory(&OvsTunnelSubLayer, sizeof(FWPM_SUBLAYER));
+
+ OvsTunnelSubLayer.subLayerKey = OVS_TUNNEL_SUBLAYER;
+ OvsTunnelSubLayer.displayData.name = L"Datagram-Data OVS Sub-Layer";
+ OvsTunnelSubLayer.displayData.description =
+ L"Sub-Layer for use by Datagram-Data OVS callouts";
+ OvsTunnelSubLayer.flags = 0;
+ OvsTunnelSubLayer.weight = FWP_EMPTY; /* auto-weight */
+
+ status = FwpmSubLayerAdd(gEngineHandle, &OvsTunnelSubLayer, NULL);
+ if (!NT_SUCCESS(status)) {
+ goto Exit;
+ }
+
+ // In order to use this callout a socket must be opened
+ status = OvsTunnelRegisterDatagramDataCallouts(&FWPM_LAYER_DATAGRAM_DATA_V4,
+ &OVS_TUNNEL_CALLOUT_V4,
+ deviceObject,
+ &gCalloutIdV4);
+ if (!NT_SUCCESS(status)) {
+ goto Exit;
+ }
+
+ status = FwpmTransactionCommit(gEngineHandle);
+ if (!NT_SUCCESS(status)){
+ goto Exit;
+ }
+ inTransaction = FALSE;
+
+Exit:
+
+ if (!NT_SUCCESS(status)) {
+ if (inTransaction) {
+ FwpmTransactionAbort(gEngineHandle);
+ }
+ if (engineOpened) {
+ FwpmEngineClose(gEngineHandle);
+ gEngineHandle = NULL;
+ }
+ }
+
+ return status;
+}
+
+VOID
+OvsTunnelUnregisterCallouts(VOID)
+{
+ FwpmEngineClose(gEngineHandle);
+ gEngineHandle = NULL;
+ FwpsCalloutUnregisterById(gCalloutIdV4);
+}
+
+
+VOID
+OvsTunnelFilterUninitialize(PDRIVER_OBJECT driverObject)
+{
+ UNREFERENCED_PARAMETER(driverObject);
+
+ OvsTunnelUnregisterCallouts();
+ IoDeleteDevice(gDeviceObject);
+}
+
+
+NTSTATUS
+OvsTunnelFilterInitialize(PDRIVER_OBJECT driverObject)
+{
+ NTSTATUS status = STATUS_SUCCESS;
+ UNICODE_STRING deviceName;
+
+ RtlInitUnicodeString(&deviceName,
+ L"\\Device\\OvsTunnelFilter");
+
+ status = IoCreateDevice(driverObject,
+ 0,
+ &deviceName,
+ FILE_DEVICE_NETWORK,
+ 0,
+ FALSE,
+ &gDeviceObject);
+
+ if (!NT_SUCCESS(status)){
+ goto Exit;
+ }
+
+ status = OvsTunnelRegisterCallouts(gDeviceObject);
+
+Exit:
+
+ if (!NT_SUCCESS(status)){
+ if (gEngineHandle != NULL) {
+ OvsTunnelUnregisterCallouts();
+ }
+
+ if (gDeviceObject) {
+ IoDeleteDevice(gDeviceObject);
+ }
+ }
+
+ return status;
+}
diff --git a/datapath-windows/ovsext/OvsTunnelIntf.h b/datapath-windows/ovsext/OvsTunnelIntf.h
new file mode 100644
index 000000000..3543c8a7e
--- /dev/null
+++ b/datapath-windows/ovsext/OvsTunnelIntf.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_TUNNEL_INTF_H_
+#define __OVS_TUNNEL_INTF_H_ 1
+
+/* Tunnel callout driver load/unload functions */
+NTSTATUS OvsTunnelFilterInitialize(PDRIVER_OBJECT driverObject);
+
+VOID OvsTunnelFilterUninitialize(PDRIVER_OBJECT driverObject);
+
+#endif /* __OVS_TUNNEL_INTF_H_ */
diff --git a/datapath-windows/ovsext/OvsTypes.h b/datapath-windows/ovsext/OvsTypes.h
new file mode 100644
index 000000000..402c39fd8
--- /dev/null
+++ b/datapath-windows/ovsext/OvsTypes.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_TYPES_H_
+#define __OVS_TYPES_H_ 1
+
+typedef unsigned long long uint64, uint64_t, ovs_be64, u64;
+typedef long long int64, int64_t;
+typedef unsigned int uint32, uint32_t, ovs_be32, u32;
+typedef unsigned short uint16, uint16_t, ovs_be16, u16;
+typedef unsigned char uint8, uint8_t, u8;
+typedef uint64 __u64, __be64;
+typedef uint32 __u32, __be32;
+typedef uint16 __u16, __be16;
+typedef uint8 __u8;
+
+#define ETH_ALEN 6
+
+#endif /* __OVS_TYPES_H_ */
diff --git a/datapath-windows/ovsext/OvsUser.c b/datapath-windows/ovsext/OvsUser.c
new file mode 100644
index 000000000..8271d52de
--- /dev/null
+++ b/datapath-windows/ovsext/OvsUser.c
@@ -0,0 +1,859 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * OvsUser.c
+ * Manage packet queue for packet miss for userAction.
+ */
+
+
+#include "precomp.h"
+
+#include "OvsIoctl.h"
+#include "OvsSwitch.h"
+#include "OvsVport.h"
+#include "OvsEvent.h"
+#include "OvsUser.h"
+#include "OvsPacketIO.h"
+#include "OvsChecksum.h"
+#include "OvsNetProto.h"
+#include "OvsFlow.h"
+#include "OvsTunnelIntf.h"
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_USER
+#include "OvsDebug.h"
+
+OVS_USER_PACKET_QUEUE ovsPacketQueues[OVS_MAX_NUM_PACKET_QUEUES];
+
+POVS_PACKET_QUEUE_ELEM OvsGetNextPacket(POVS_OPEN_INSTANCE instance);
+extern PNDIS_SPIN_LOCK gOvsCtrlLock;
+extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
+OVS_USER_STATS ovsUserStats;
+
+
+NTSTATUS
+OvsUserInit()
+{
+ UINT32 i;
+ POVS_USER_PACKET_QUEUE queue;
+ for (i = 0; i < OVS_MAX_NUM_PACKET_QUEUES; i++) {
+ queue = &ovsPacketQueues[i];
+ RtlZeroMemory(queue, sizeof (*queue));
+ InitializeListHead(&queue->packetList);
+ NdisAllocateSpinLock(&queue->queueLock);
+ }
+ return STATUS_SUCCESS;
+}
+
+VOID
+OvsUserCleanup()
+{
+ UINT32 i;
+ POVS_USER_PACKET_QUEUE queue;
+ for (i = 0; i < OVS_MAX_NUM_PACKET_QUEUES; i++) {
+ queue = &ovsPacketQueues[i];
+ ASSERT(IsListEmpty(&queue->packetList));
+ ASSERT(queue->instance == NULL);
+ ASSERT(queue->pendingIrp == NULL);
+ NdisFreeSpinLock(&queue->queueLock);
+ }
+}
+
+static VOID
+OvsPurgePacketQueue(POVS_USER_PACKET_QUEUE queue,
+ POVS_OPEN_INSTANCE instance)
+{
+ PLIST_ENTRY link, next;
+ LIST_ENTRY tmp;
+ POVS_PACKET_QUEUE_ELEM elem;
+
+ InitializeListHead(&tmp);
+ NdisAcquireSpinLock(&queue->queueLock);
+ if (queue->instance != instance) {
+ NdisReleaseSpinLock(&queue->queueLock);
+ return;
+ }
+
+ if (queue->numPackets) {
+ OvsAppendList(&tmp, &queue->packetList);
+ queue->numPackets = 0;
+ }
+ NdisReleaseSpinLock(&queue->queueLock);
+ LIST_FORALL_SAFE(&tmp, link, next) {
+ RemoveEntryList(link);
+ elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
+ OvsFreeMemory(elem);
+ }
+}
+
+
+VOID
+OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance)
+{
+ POVS_USER_PACKET_QUEUE queue;
+ POVS_PACKET_QUEUE_ELEM elem;
+ PLIST_ENTRY link, next;
+ LIST_ENTRY tmp;
+ PIRP irp = NULL;
+
+ InitializeListHead(&tmp);
+ queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
+ if (queue) {
+ PDRIVER_CANCEL cancelRoutine;
+ NdisAcquireSpinLock(&queue->queueLock);
+ if (queue->instance != instance) {
+ NdisReleaseSpinLock(&queue->queueLock);
+ return;
+ }
+
+ if (queue->numPackets) {
+ OvsAppendList(&tmp, &queue->packetList);
+ queue->numPackets = 0;
+ }
+ queue->instance = NULL;
+ queue->queueId = OVS_MAX_NUM_PACKET_QUEUES;
+ instance->packetQueue = NULL;
+ irp = queue->pendingIrp;
+ queue->pendingIrp = NULL;
+ if (irp) {
+ cancelRoutine = IoSetCancelRoutine(irp, NULL);
+ if (cancelRoutine == NULL) {
+ irp = NULL;
+ }
+ }
+ NdisReleaseSpinLock(&queue->queueLock);
+ }
+ LIST_FORALL_SAFE(&tmp, link, next) {
+ RemoveEntryList(link);
+ elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
+ OvsFreeMemory(elem);
+ }
+ if (irp) {
+ OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
+ }
+}
+
+NTSTATUS
+OvsSubscribeDpIoctl(PFILE_OBJECT fileObject,
+ PVOID inputBuffer,
+ UINT32 inputLength)
+{
+ POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
+ UINT32 queueId;
+ POVS_USER_PACKET_QUEUE queue;
+ if (inputLength < sizeof (UINT32)) {
+ return STATUS_INVALID_PARAMETER;
+ }
+ queueId = *(UINT32 *)inputBuffer;
+ if (instance->packetQueue && queueId >= OVS_MAX_NUM_PACKET_QUEUES) {
+ /*
+ * unsubscribe
+ */
+ OvsCleanupPacketQueue(instance);
+ } else if (instance->packetQueue == NULL &&
+ queueId < OVS_MAX_NUM_PACKET_QUEUES) {
+ queue = &ovsPacketQueues[queueId];
+ NdisAcquireSpinLock(&queue->queueLock);
+ if (ovsPacketQueues[queueId].instance) {
+ if (ovsPacketQueues[queueId].instance != instance) {
+ NdisReleaseSpinLock(&queue->queueLock);
+ return STATUS_INSUFFICIENT_RESOURCES;
+ } else {
+ NdisReleaseSpinLock(&queue->queueLock);
+ return STATUS_SUCCESS;
+ }
+ }
+ queue->queueId = queueId;
+ queue->instance = instance;
+ instance->packetQueue = queue;
+ ASSERT(IsListEmpty(&queue->packetList));
+ NdisReleaseSpinLock(&queue->queueLock);
+ } else {
+ return STATUS_INVALID_PARAMETER;
+ }
+ return STATUS_SUCCESS;
+}
+
+
+NTSTATUS
+OvsReadDpIoctl(PFILE_OBJECT fileObject,
+ PVOID outputBuffer,
+ UINT32 outputLength,
+ UINT32 *replyLen)
+{
+ POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
+ POVS_PACKET_QUEUE_ELEM elem;
+ UINT32 len;
+
+#define TCP_CSUM_OFFSET 16
+#define UDP_CSUM_OFFSET 6
+ ASSERT(instance);
+
+ if (instance->packetQueue == NULL) {
+ return STATUS_INVALID_PARAMETER;
+ }
+ if (outputLength < (sizeof (OVS_PACKET_INFO) + OVS_MIN_PACKET_SIZE)) {
+ return STATUS_BUFFER_TOO_SMALL;
+ }
+
+ elem = OvsGetNextPacket(instance);
+ if (elem) {
+ /*
+ * XXX revisit this later
+ */
+ len = elem->packet.totalLen > outputLength ? outputLength :
+ elem->packet.totalLen;
+
+ if ((elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) &&
+ len == elem->packet.totalLen) {
+ UINT16 sum, *ptr;
+ UINT16 size = (UINT16)(elem->packet.userDataLen +
+ elem->hdrInfo.l4Offset +
+ (UINT16)sizeof (OVS_PACKET_INFO));
+ RtlCopyMemory(outputBuffer, &elem->packet, size);
+ ASSERT(len - size >= elem->hdrInfo.l4PayLoad);
+ sum = CopyAndCalculateChecksum((UINT8 *)outputBuffer + size,
+ (UINT8 *)&elem->packet + size,
+ elem->hdrInfo.l4PayLoad, 0);
+ ptr =(UINT16 *)((UINT8 *)outputBuffer + size +
+ (elem->hdrInfo.tcpCsumNeeded ?
+ TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
+ *ptr = sum;
+ ovsUserStats.l4Csum++;
+ } else {
+ RtlCopyMemory(outputBuffer, &elem->packet, len);
+ }
+
+ *replyLen = len;
+ OvsFreeMemory(elem);
+ }
+ return STATUS_SUCCESS;
+}
+
+/* Helper function to allocate a Forwarding Context for an NBL */
+NTSTATUS
+OvsAllocateForwardingContextForNBL(POVS_SWITCH_CONTEXT switchContext,
+ PNET_BUFFER_LIST nbl)
+{
+ return switchContext->NdisSwitchHandlers.
+ AllocateNetBufferListForwardingContext(
+ switchContext->NdisSwitchContext, nbl);
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * This function allocates all the stuff necessary for creating an NBL from the
+ * input buffer of specified length, namely, a nonpaged data buffer of size
+ * length, an MDL from it, and a NB and NBL from it. It does not allocate an NBL
+ * context yet. It also copies data from the specified buffer to the NBL.
+ * --------------------------------------------------------------------------
+ */
+PNET_BUFFER_LIST
+OvsAllocateNBLForUserBuffer(POVS_SWITCH_CONTEXT switchContext,
+ PVOID userBuffer,
+ ULONG length)
+{
+ UINT8 *data = NULL;
+ PNET_BUFFER_LIST nbl = NULL;
+ PNET_BUFFER nb;
+ PMDL mdl;
+
+ if (length > OVS_DEFAULT_DATA_SIZE) {
+ nbl = OvsAllocateVariableSizeNBL(switchContext, length,
+ OVS_DEFAULT_HEADROOM_SIZE);
+
+ } else {
+ nbl = OvsAllocateFixSizeNBL(switchContext, length,
+ OVS_DEFAULT_HEADROOM_SIZE);
+ }
+ if (nbl == NULL) {
+ return NULL;
+ }
+
+ nb = NET_BUFFER_LIST_FIRST_NB(nbl);
+ mdl = NET_BUFFER_CURRENT_MDL(nb);
+ data = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority) +
+ NET_BUFFER_CURRENT_MDL_OFFSET(nb);
+ if (!data) {
+ OvsCompleteNBL(switchContext, nbl, TRUE);
+ return NULL;
+ }
+
+ NdisMoveMemory(data, userBuffer, length);
+
+ return nbl;
+}
+
+NTSTATUS
+OvsExecuteDpIoctl(PVOID inputBuffer,
+ UINT32 inputLength,
+ UINT32 outputLength)
+{
+ NTSTATUS status = STATUS_SUCCESS;
+ NTSTATUS ndisStatus;
+ OvsPacketExecute *execute;
+ LOCK_STATE_EX lockState;
+ PNET_BUFFER_LIST pNbl;
+ struct nlattr *actions;
+ PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
+ OvsFlowKey key;
+ OVS_PACKET_HDR_INFO layers;
+
+ if (inputLength < sizeof(*execute) || outputLength != 0) {
+ return STATUS_INFO_LENGTH_MISMATCH;
+ }
+
+ NdisAcquireSpinLock(gOvsCtrlLock);
+ if (gOvsSwitchContext == NULL) {
+ status = STATUS_INVALID_PARAMETER;
+ goto unlock;
+ }
+
+ execute = (struct OvsPacketExecute *) inputBuffer;
+
+ if (execute->packetLen == 0) {
+ status = STATUS_INVALID_PARAMETER;
+ goto unlock;
+ }
+
+ if (inputLength != sizeof (*execute) +
+ execute->actionsLen + execute->packetLen) {
+ status = STATUS_INFO_LENGTH_MISMATCH;
+ goto unlock;
+ }
+ actions = (struct nlattr *)((PCHAR)&execute->actions + execute->packetLen);
+
+ /*
+ * Allocate the NBL, copy the data from the userspace buffer. Allocate
+ * also, the forwarding context for the packet.
+ */
+ pNbl = OvsAllocateNBLForUserBuffer(gOvsSwitchContext, &execute->packetBuf,
+ execute->packetLen);
+ if (pNbl == NULL) {
+ status = STATUS_NO_MEMORY;
+ goto unlock;
+ }
+
+ fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl);
+ fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
+ fwdDetail->SourceNicIndex = 0;
+ // XXX: Figure out if any of the other members of fwdDetail need to be set.
+
+ ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers,
+ NULL);
+ if (ndisStatus == NDIS_STATUS_SUCCESS) {
+ ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
+ NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState,
+ NDIS_RWL_AT_DISPATCH_LEVEL);
+ ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl,
+ 0, // XXX: we are passing 0 for srcVportNo
+ NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP,
+ &key, NULL, &layers, actions,
+ execute->actionsLen);
+ pNbl = NULL;
+ NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
+ }
+ if (ndisStatus != NDIS_STATUS_SUCCESS) {
+ status = STATUS_UNSUCCESSFUL;
+ }
+
+ if (pNbl) {
+ OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE);
+ }
+unlock:
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ return status;
+}
+
+
+NTSTATUS
+OvsPurgeDpIoctl(PFILE_OBJECT fileObject)
+{
+ POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
+ POVS_USER_PACKET_QUEUE queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
+
+ if (queue == NULL) {
+ return STATUS_INVALID_PARAMETER;
+ }
+ OvsPurgePacketQueue(queue, instance);
+ return STATUS_SUCCESS;
+}
+
+VOID
+OvsCancelIrpDatapath(PDEVICE_OBJECT deviceObject,
+ PIRP irp)
+{
+ PIO_STACK_LOCATION irpSp;
+ PFILE_OBJECT fileObject;
+ POVS_OPEN_INSTANCE instance;
+ POVS_USER_PACKET_QUEUE queue = NULL;
+
+ UNREFERENCED_PARAMETER(deviceObject);
+
+ IoReleaseCancelSpinLock(irp->CancelIrql);
+ irpSp = IoGetCurrentIrpStackLocation(irp);
+ fileObject = irpSp->FileObject;
+
+ if (fileObject == NULL) {
+ goto done;
+ }
+ NdisAcquireSpinLock(gOvsCtrlLock);
+ instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
+ if (instance) {
+ queue = instance->packetQueue;
+ }
+ if (instance == NULL || queue == NULL) {
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ goto done;
+ }
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ NdisAcquireSpinLock(&queue->queueLock);
+ if (queue->pendingIrp == irp) {
+ queue->pendingIrp = NULL;
+ }
+ NdisReleaseSpinLock(&queue->queueLock);
+done:
+ OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
+}
+
+
+NTSTATUS
+OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject)
+{
+ POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
+ POVS_USER_PACKET_QUEUE queue =
+ (POVS_USER_PACKET_QUEUE)instance->packetQueue;
+ NTSTATUS status = STATUS_SUCCESS;
+ BOOLEAN cancelled = FALSE;
+
+ if (queue == NULL) {
+ return STATUS_INVALID_PARAMETER;
+ }
+ NdisAcquireSpinLock(&queue->queueLock);
+ if (queue->instance != instance) {
+ NdisReleaseSpinLock(&queue->queueLock);
+ return STATUS_INVALID_PARAMETER;
+ }
+ if (queue->pendingIrp) {
+ NdisReleaseSpinLock(&queue->queueLock);
+ return STATUS_DEVICE_BUSY;
+ }
+ if (queue->numPackets == 0) {
+ PDRIVER_CANCEL cancelRoutine;
+ IoMarkIrpPending(irp);
+ IoSetCancelRoutine(irp, OvsCancelIrpDatapath);
+ if (irp->Cancel) {
+ cancelRoutine = IoSetCancelRoutine(irp, NULL);
+ if (cancelRoutine) {
+ cancelled = TRUE;
+ }
+ } else {
+ queue->pendingIrp = irp;
+ }
+ status = STATUS_PENDING;
+ }
+ NdisReleaseSpinLock(&queue->queueLock);
+ if (cancelled) {
+ OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
+ OVS_LOG_INFO("Datapath IRP cancelled: %p", irp);
+ }
+ return status;
+}
+
+
+POVS_PACKET_QUEUE_ELEM
+OvsGetNextPacket(POVS_OPEN_INSTANCE instance)
+{
+ POVS_USER_PACKET_QUEUE queue;
+ PLIST_ENTRY link;
+ queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
+ if (queue == NULL) {
+ return NULL;
+ }
+ NdisAcquireSpinLock(&queue->queueLock);
+ if (queue->instance != instance || queue->numPackets == 0) {
+ NdisReleaseSpinLock(&queue->queueLock);
+ return NULL;
+ }
+ link = RemoveHeadList(&queue->packetList);
+ queue->numPackets--;
+ NdisReleaseSpinLock(&queue->queueLock);
+ return CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
+}
+
+
+POVS_USER_PACKET_QUEUE
+OvsGetQueue(UINT32 queueId)
+{
+ POVS_USER_PACKET_QUEUE queue;
+ if (queueId >= OVS_MAX_NUM_PACKET_QUEUES) {
+ return NULL;
+ }
+ queue = &ovsPacketQueues[queueId];
+ return queue->instance != NULL ? queue : NULL;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsCreateQueuePacket --
+ *
+ * Create a packet which will be forwarded to user space.
+ *
+ * InputParameter:
+ * queueId Identify the queue the packet to be inserted
+ * This will be used when multiple queues is supported
+ * in userspace
+ * userData: when cmd is user action, this field contain
+ * user action data.
+ * userDataLen: as name indicated
+ * cmd: either miss or user action
+ * inPort: datapath port id from which the packet is received.
+ * tunnelKey: tunnelKey for tunneled packet
+ * nbl: the NET_BUFFER_LIST which contain the packet
+ * nb: the packet
+ * isRecv: This is used to decide how to interprete the csum info
+ * hdrInfo: include hdr info initialized during flow extraction.
+ *
+ * Results:
+ * NULL if fail to create the packet
+ * The packet element otherwise
+ *----------------------------------------------------------------------------
+ */
+POVS_PACKET_QUEUE_ELEM
+OvsCreateQueuePacket(UINT32 queueId,
+ PVOID userData,
+ UINT32 userDataLen,
+ UINT32 cmd,
+ UINT32 inPort,
+ OvsIPv4TunnelKey *tunnelKey,
+ PNET_BUFFER_LIST nbl,
+ PNET_BUFFER nb,
+ BOOLEAN isRecv,
+ POVS_PACKET_HDR_INFO hdrInfo)
+{
+#define VLAN_TAG_SIZE 4
+ UINT32 allocLen, dataLen, extraLen = 0;
+ POVS_PACKET_QUEUE_ELEM elem;
+ PMDL mdl;
+ UINT8 *src, *dst;
+ NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
+ NDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo;
+
+ csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo);
+
+ if (isRecv && (csumInfo.Receive.TcpChecksumFailed ||
+ (csumInfo.Receive.UdpChecksumFailed &&
+ !hdrInfo->udpCsumZero) ||
+ csumInfo.Receive.IpChecksumFailed)) {
+ OVS_LOG_INFO("Packet dropped due to checksum failure.");
+ ovsUserStats.dropDuetoChecksum++;
+ return NULL;
+ }
+
+ vlanInfo.Value = NET_BUFFER_LIST_INFO(nbl, Ieee8021QNetBufferListInfo);
+ if (vlanInfo.TagHeader.VlanId) {
+ /*
+ * We may also need to check priority XXX
+ */
+ extraLen = VLAN_TAG_SIZE;
+ }
+
+ dataLen = NET_BUFFER_DATA_LENGTH(nb);
+ allocLen = sizeof (OVS_PACKET_QUEUE_ELEM) + userDataLen + dataLen +
+ extraLen;
+
+ elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemory(allocLen);
+ if (elem == NULL) {
+ ovsUserStats.dropDuetoResource++;
+ return NULL;
+ }
+ elem->hdrInfo.value = hdrInfo->value;
+ elem->packet.totalLen = sizeof (OVS_PACKET_INFO) + userDataLen + dataLen +
+ extraLen;
+ elem->packet.queue = queueId;
+ elem->packet.userDataLen = userDataLen;
+ elem->packet.inPort = inPort;
+ elem->packet.cmd = cmd;
+ if (cmd == (UINT32)OVS_PACKET_CMD_MISS) {
+ ovsUserStats.miss++;
+ } else {
+ ovsUserStats.action++;
+ }
+ elem->packet.packetLen = dataLen + extraLen;
+ if (tunnelKey) {
+ RtlCopyMemory(&elem->packet.tunnelKey, tunnelKey,
+ sizeof (*tunnelKey));
+ } else {
+ RtlZeroMemory(&elem->packet.tunnelKey,
+ sizeof (elem->packet.tunnelKey));
+ }
+
+ dst = elem->packet.data;
+ if (userDataLen) {
+ RtlCopyMemory(dst, userData, userDataLen);
+ dst = dst + userDataLen;
+ }
+ dst += extraLen;
+
+ mdl = NET_BUFFER_CURRENT_MDL(nb);
+ src = NdisGetDataBuffer(nb, dataLen, dst, 1, 0);
+ if (src == NULL) {
+ OvsFreeMemory(elem);
+ ovsUserStats.dropDuetoResource++;
+ return NULL;
+ } else if (src != dst) {
+ /* Copy the data from the NDIS buffer to dst. */
+ RtlCopyMemory(dst, src, dataLen);
+ }
+
+ dst = elem->packet.data + userDataLen + extraLen;
+ /*
+ * Fix IP hdr if necessary
+ */
+ if ((isRecv && csumInfo.Receive.IpChecksumValueInvalid) ||
+ (!isRecv && csumInfo.Transmit.IsIPv4 &&
+ csumInfo.Transmit.IpHeaderChecksum)) {
+ PIPV4_HEADER ipHdr = (PIPV4_HEADER)(dst + hdrInfo->l3Offset);
+ ASSERT(elem->hdrInfo.isIPv4);
+ ASSERT(ipHdr->Version == 4);
+ ipHdr->HeaderChecksum = IPChecksum((UINT8 *)ipHdr,
+ ipHdr->HeaderLength << 2,
+ (UINT16)~ipHdr->HeaderChecksum);
+ ovsUserStats.ipCsum++;
+ }
+ ASSERT(elem->hdrInfo.tcpCsumNeeded == 0 &&
+ elem->hdrInfo.udpCsumNeeded == 0);
+ /*
+ * Fow now, we will not do verification
+ * There is no correctness issue here.
+ * XXX
+ */
+ /*
+ * calculate TCP/UDP pseudo checksum
+ */
+ if (isRecv && csumInfo.Receive.TcpChecksumValueInvalid) {
+ /*
+ * Only this case, we need to reclaculate pseudo checksum
+ * all other cases, it is assumed the pseudo checksum is
+ * filled already.
+ *
+ */
+ PTCP_HDR tcpHdr = (PTCP_HDR)(dst + hdrInfo->l4Offset);
+ if (hdrInfo->isIPv4) {
+ PIPV4_HEADER ipHdr = (PIPV4_HEADER)(dst + hdrInfo->l3Offset);
+ elem->hdrInfo.l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
+ (ipHdr->HeaderLength << 2));
+ tcpHdr->th_sum =
+ IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
+ (UINT32 *)&ipHdr->DestinationAddress,
+ IPPROTO_TCP, elem->hdrInfo.l4PayLoad);
+ } else {
+ PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(dst + hdrInfo->l3Offset);
+ elem->hdrInfo.l4PayLoad =
+ (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
+ hdrInfo->l3Offset + sizeof(IPV6_HEADER) -
+ hdrInfo->l4Offset);
+ ASSERT(hdrInfo->isIPv6);
+ tcpHdr->th_sum =
+ IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
+ (UINT32 *)&ipv6Hdr->DestinationAddress,
+ IPPROTO_TCP, elem->hdrInfo.l4PayLoad);
+ }
+ elem->hdrInfo.tcpCsumNeeded = 1;
+ ovsUserStats.recalTcpCsum++;
+ } else if (!isRecv) {
+ if (csumInfo.Transmit.TcpChecksum) {
+ elem->hdrInfo.tcpCsumNeeded = 1;
+ } else if (csumInfo.Transmit.UdpChecksum) {
+ elem->hdrInfo.udpCsumNeeded = 1;
+ }
+ if (elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) {
+#ifdef DBG
+ UINT16 sum, *ptr;
+ UINT8 proto =
+ elem->hdrInfo.tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP;
+#endif
+ if (hdrInfo->isIPv4) {
+ PIPV4_HEADER ipHdr = (PIPV4_HEADER)(dst + hdrInfo->l3Offset);
+ elem->hdrInfo.l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
+ (ipHdr->HeaderLength << 2));
+#ifdef DBG
+ sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
+ (UINT32 *)&ipHdr->DestinationAddress,
+ proto, elem->hdrInfo.l4PayLoad);
+#endif
+ } else {
+ PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(dst +
+ hdrInfo->l3Offset);
+ elem->hdrInfo.l4PayLoad =
+ (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
+ hdrInfo->l3Offset + sizeof(IPV6_HEADER) -
+ hdrInfo->l4Offset);
+ ASSERT(hdrInfo->isIPv6);
+#ifdef DBG
+ sum = IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
+ (UINT32 *)&ipv6Hdr->DestinationAddress,
+ proto, elem->hdrInfo.l4PayLoad);
+#endif
+ }
+#ifdef DBG
+ ptr = (UINT16 *)(dst + hdrInfo->l4Offset +
+ (elem->hdrInfo.tcpCsumNeeded ?
+ TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
+ ASSERT(*ptr == sum);
+#endif
+ }
+ }
+ /*
+ * Finally insert VLAN tag
+ */
+ if (extraLen) {
+ dst = elem->packet.data + userDataLen;
+ src = dst + extraLen;
+ ((UINT32 *)dst)[0] = ((UINT32 *)src)[0];
+ ((UINT32 *)dst)[1] = ((UINT32 *)src)[1];
+ ((UINT32 *)dst)[2] = ((UINT32 *)src)[2];
+ dst += 12;
+ ((UINT16 *)dst)[0] = htons(0x8100);
+ ((UINT16 *)dst)[1] = htons(vlanInfo.TagHeader.VlanId |
+ (vlanInfo.TagHeader.UserPriority << 13));
+ elem->hdrInfo.l3Offset += VLAN_TAG_SIZE;
+ elem->hdrInfo.l4Offset += VLAN_TAG_SIZE;
+ ovsUserStats.vlanInsert++;
+ }
+
+ return elem;
+}
+
+
+VOID
+OvsQueuePackets(UINT32 queueId,
+ PLIST_ENTRY packetList,
+ UINT32 numElems)
+{
+ POVS_USER_PACKET_QUEUE queue = OvsGetQueue(queueId);
+ POVS_PACKET_QUEUE_ELEM elem;
+ PIRP irp = NULL;
+ PLIST_ENTRY link;
+ UINT32 num = 0;
+
+ OVS_LOG_LOUD("Enter: queueId %u, numELems: %u",
+ queueId, numElems);
+ if (queue == NULL) {
+ goto cleanup;
+ }
+
+ NdisAcquireSpinLock(&queue->queueLock);
+ if (queue->instance == NULL) {
+ NdisReleaseSpinLock(&queue->queueLock);
+ goto cleanup;
+ } else {
+ OvsAppendList(&queue->packetList, packetList);
+ queue->numPackets += numElems;
+ }
+ if (queue->pendingIrp) {
+ PDRIVER_CANCEL cancelRoutine;
+ irp = queue->pendingIrp;
+ queue->pendingIrp = NULL;
+ cancelRoutine = IoSetCancelRoutine(irp, NULL);
+ if (cancelRoutine == NULL) {
+ irp = NULL;
+ }
+ }
+ NdisReleaseSpinLock(&queue->queueLock);
+ if (irp) {
+ OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
+ }
+
+cleanup:
+ while (!IsListEmpty(packetList)) {
+ link = RemoveHeadList(packetList);
+ elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
+ OvsFreeMemory(elem);
+ num++;
+ }
+ OVS_LOG_LOUD("Exit: drop %u packets", num);
+}
+
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsCreateAndAddPackets --
+ *
+ * Create a packet and forwarded to user space.
+ *
+ * This function would fragment packet if needed, and queue
+ * each segment to user space.
+ *----------------------------------------------------------------------------
+ */
+NTSTATUS
+OvsCreateAndAddPackets(UINT32 queueId,
+ PVOID userData,
+ UINT32 userDataLen,
+ UINT32 cmd,
+ UINT32 inPort,
+ OvsIPv4TunnelKey *tunnelKey,
+ PNET_BUFFER_LIST nbl,
+ BOOLEAN isRecv,
+ POVS_PACKET_HDR_INFO hdrInfo,
+ POVS_SWITCH_CONTEXT switchContext,
+ LIST_ENTRY *list,
+ UINT32 *num)
+{
+ POVS_PACKET_QUEUE_ELEM elem;
+ PNET_BUFFER_LIST newNbl = NULL;
+ PNET_BUFFER nb;
+
+ if (hdrInfo->isTcp) {
+ NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
+ UINT32 packetLength;
+
+ tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo);
+ nb = NET_BUFFER_LIST_FIRST_NB(nbl);
+ packetLength = NET_BUFFER_DATA_LENGTH(nb);
+
+ OVS_LOG_TRACE("MSS %u packet len %u",
+ tsoInfo.LsoV1Transmit.MSS, packetLength);
+ if (tsoInfo.LsoV1Transmit.MSS) {
+ OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset);
+ newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo,
+ tsoInfo.LsoV1Transmit.MSS , 0);
+ if (newNbl == NULL) {
+ return NDIS_STATUS_FAILURE;
+ }
+ nbl = newNbl;
+ }
+ }
+
+ nb = NET_BUFFER_LIST_FIRST_NB(nbl);
+ while (nb) {
+ elem = OvsCreateQueuePacket(queueId, userData, userDataLen,
+ cmd, inPort, tunnelKey, nbl, nb,
+ isRecv, hdrInfo);
+ if (elem) {
+ InsertTailList(list, &elem->link);
+ (*num)++;
+ }
+ nb = NET_BUFFER_NEXT_NB(nb);
+ }
+ if (newNbl) {
+ OvsCompleteNBL(switchContext, newNbl, TRUE);
+ }
+ return NDIS_STATUS_SUCCESS;
+}
diff --git a/datapath-windows/ovsext/OvsUser.h b/datapath-windows/ovsext/OvsUser.h
new file mode 100644
index 000000000..b1e6e1ef0
--- /dev/null
+++ b/datapath-windows/ovsext/OvsUser.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This file contains structures and function definitions necessary for
+ * forwarding packet to user space.
+ */
+
+#ifndef __OVS_USER_H_
+#define __OVS_USER_H_ 1
+
+/*
+ * Even we have more cores, I don't think we need
+ * more than 32 queues for processing packets to
+ * userspace
+ */
+#define OVS_MAX_NUM_PACKET_QUEUES 32
+#define OVS_DEFAULT_PACKET_QUEUE 1
+#define OVS_MAX_PACKET_QUEUE_LEN 4096
+
+/*
+ * Only when OVS_PER_VPORT_QUEUE_CTRL is defined
+ * we will apply this constraint
+ */
+#define OVS_MAX_PACKETS_PER_VPORT 128
+#define OVS_MAX_PACKETS_PER_TUNNEL 1024
+
+typedef struct _OVS_USER_PACKET_QUEUE {
+ UINT32 queueId;
+ UINT32 numPackets;
+ LIST_ENTRY packetList;
+ PVOID instance;
+ PIRP pendingIrp;
+ NDIS_SPIN_LOCK queueLock;
+} OVS_USER_PACKET_QUEUE, *POVS_USER_PACKET_QUEUE;
+
+typedef struct _OVS_PACKET_QUEUE_ELEM {
+ LIST_ENTRY link;
+ OVS_PACKET_HDR_INFO hdrInfo;
+ OVS_PACKET_INFO packet;
+} OVS_PACKET_QUEUE_ELEM, *POVS_PACKET_QUEUE_ELEM;
+
+struct _OVS_OPEN_INSTANCE;
+
+typedef struct _OVS_USER_STATS {
+ UINT64 miss;
+ UINT64 action;
+ UINT32 dropDuetoResource;
+ UINT32 dropDuetoChecksum;
+ UINT32 ipCsum;
+ UINT32 recalTcpCsum;
+ UINT32 vlanInsert;
+ UINT32 l4Csum;
+} OVS_USER_STATS, *POVS_USER_STATS;
+
+
+NTSTATUS OvsUserInit();
+VOID OvsUserCleanup();
+
+VOID OvsCleanupPacketQueue(struct _OVS_OPEN_INSTANCE *instance);
+
+POVS_PACKET_QUEUE_ELEM OvsCreateQueuePacket(UINT32 queueId,
+ PVOID userData,
+ UINT32 userDataLen,
+ UINT32 cmd, UINT32 inPort,
+ OvsIPv4TunnelKey *tunnelKey,
+ PNET_BUFFER_LIST nbl,
+ PNET_BUFFER nb,
+ BOOLEAN isRecv,
+ POVS_PACKET_HDR_INFO hdrInfo);
+
+VOID OvsQueuePackets(UINT32 queueId, PLIST_ENTRY packetList,
+ UINT32 numElems);
+NTSTATUS OvsCreateAndAddPackets(UINT32 queueId,
+ PVOID userData,
+ UINT32 userDataLen,
+ UINT32 cmd,
+ UINT32 inPort,
+ OvsIPv4TunnelKey *tunnelKey,
+ PNET_BUFFER_LIST nbl,
+ BOOLEAN isRecv,
+ POVS_PACKET_HDR_INFO hdrInfo,
+ POVS_SWITCH_CONTEXT switchContext,
+ LIST_ENTRY *list,
+ UINT32 *num);
+
+NTSTATUS OvsSubscribeDpIoctl(PFILE_OBJECT fileObject,
+ PVOID inputBuffer,
+ UINT32 inputLength);
+
+NTSTATUS OvsReadDpIoctl(PFILE_OBJECT fileObject,
+ PVOID outputBuffer,
+ UINT32 outputLength,
+ UINT32 *replyLen);
+NTSTATUS OvsExecuteDpIoctl(PVOID inputBuffer,
+ UINT32 inputLength,
+ UINT32 outputLength);
+NTSTATUS OvsPurgeDpIoctl(PFILE_OBJECT fileObject);
+
+NTSTATUS OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject);
+
+#endif /* __OVS_USER_H_ */
diff --git a/datapath-windows/ovsext/OvsUtil.c b/datapath-windows/ovsext/OvsUtil.c
new file mode 100644
index 000000000..e70f9a1fc
--- /dev/null
+++ b/datapath-windows/ovsext/OvsUtil.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "precomp.h"
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_OTHERS
+
+#include "OvsDebug.h"
+
+extern NDIS_HANDLE gOvsExtDriverHandle;
+
+VOID *
+OvsAllocateMemory(size_t size)
+{
+ OVS_VERIFY_IRQL_LE(DISPATCH_LEVEL);
+ return NdisAllocateMemoryWithTagPriority(gOvsExtDriverHandle,
+ (UINT32)size, OVS_MEMORY_TAG, NormalPoolPriority);
+}
+
+VOID *
+OvsAllocateAlignedMemory(size_t size, UINT16 align)
+{
+ OVS_VERIFY_IRQL_LE(DISPATCH_LEVEL);
+
+ ASSERT((align == 8) || (align == 16));
+
+ if ((align == 8) || (align == 16)) {
+ /*
+ * XXX: NdisAllocateMemory*() functions don't talk anything about
+ * alignment. Hence using ExAllocatePool*();
+ */
+ return (VOID *)ExAllocatePoolWithTagPriority(NonPagedPool, size,
+ OVS_MEMORY_TAG,
+ NormalPoolPriority);
+ }
+
+ /* Invalid user input. */
+ return NULL;
+}
+
+VOID
+OvsFreeMemory(VOID *ptr)
+{
+ ASSERT(ptr);
+ NdisFreeMemoryWithTagPriority(gOvsExtDriverHandle, ptr, OVS_MEMORY_TAG);
+}
+
+VOID
+OvsFreeAlignedMemory(VOID *ptr)
+{
+ ASSERT(ptr);
+ ExFreePoolWithTag(ptr, OVS_MEMORY_TAG);
+}
+
+VOID
+OvsAppendList(PLIST_ENTRY dst, PLIST_ENTRY src)
+{
+ PLIST_ENTRY srcFirst, srcLast, dstLast;
+ if (IsListEmpty(src)) {
+ return;
+ }
+ srcFirst = src->Flink;
+ srcLast = src->Blink;
+ dstLast = dst->Blink;
+
+ dstLast->Flink = srcFirst;
+ srcFirst->Blink = dstLast;
+
+ srcLast->Flink = dst;
+ dst->Blink = srcLast;
+
+ src->Flink = src;
+ src->Blink = src;
+}
diff --git a/datapath-windows/ovsext/OvsUtil.h b/datapath-windows/ovsext/OvsUtil.h
new file mode 100644
index 000000000..bff06b80d
--- /dev/null
+++ b/datapath-windows/ovsext/OvsUtil.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_UTIL_H_
+#define __OVS_UTIL_H_ 1
+
+#define OVS_MEMORY_TAG 'TSVO'
+#define OVS_FIX_SIZE_NBL_POOL_TAG 'FSVO'
+#define OVS_VARIABLE_SIZE_NBL_POOL_TAG 'VSVO'
+#define OVS_NBL_ONLY_POOL_TAG 'OSVO'
+#define OVS_NET_BUFFER_POOL_TAG 'NSVO'
+#define OVS_OTHER_POOL_TAG 'MSVO'
+
+VOID *OvsAllocateMemory(size_t size);
+VOID *OvsAllocateAlignedMemory(size_t size, UINT16 align);
+VOID OvsFreeMemory(VOID *ptr);
+VOID OvsFreeAlignedMemory(VOID *ptr);
+
+#define LIST_FORALL(_headPtr, _itemPtr) \
+ for (_itemPtr = (_headPtr)->Flink; \
+ _itemPtr != _headPtr; \
+ _itemPtr = (_itemPtr)->Flink)
+
+#define LIST_FORALL_SAFE(_headPtr, _itemPtr, _nextPtr) \
+ for (_itemPtr = (_headPtr)->Flink, _nextPtr = (_itemPtr)->Flink; \
+ _itemPtr != _headPtr; \
+ _itemPtr = _nextPtr, _nextPtr = (_itemPtr)->Flink)
+
+#define LIST_FORALL_REVERSE(_headPtr, _itemPtr) \
+ for (_itemPtr = (_headPtr)->Blink; \
+ _itemPtr != _headPtr; \
+ _itemPtr = (_itemPtr)->Blink)
+
+#define LIST_FORALL_REVERSE_SAFE(_headPtr, _itemPtr, _nextPtr) \
+ for (_itemPtr = (_headPtr)->Blink, _nextPtr = (_itemPtr)->Blink; \
+ _itemPtr != _headPtr; \
+ _itemPtr = _nextPtr, _nextPtr = (_itemPtr)->Blink)
+
+VOID OvsAppendList(PLIST_ENTRY dst, PLIST_ENTRY src);
+
+
+#define MIN(_a, _b) (_a) > (_b) ? (_b) : (_a)
+#define ARRAY_SIZE(_x) ((sizeof(_x))/sizeof (_x)[0])
+#define OVS_SWITCH_PORT_ID_INVALID (NDIS_SWITCH_PORT_ID)(-1)
+
+#ifndef htons
+#define htons(_x) _byteswap_ushort((USHORT)(_x))
+#define ntohs(_x) _byteswap_ushort((USHORT)(_x))
+#define htonl(_x) _byteswap_ulong((ULONG)(_x))
+#define ntohl(_x) _byteswap_ulong((ULONG)(_x))
+#endif
+
+#define OVS_INIT_OBJECT_HEADER(_obj, _type, _revision, _size) \
+ { \
+ PNDIS_OBJECT_HEADER hdrp = _obj; \
+ hdrp->Type = _type; \
+ hdrp->Revision = _revision; \
+ hdrp->Size = _size; \
+ }
+
+
+#define BIT16(_x) ((UINT16)0x1 << (_x))
+#define BIT32(_x) ((UINT32)0x1 << (_x))
+
+#endif /* __OVS_UTIL_H_ */
diff --git a/datapath-windows/ovsext/OvsVport.c b/datapath-windows/ovsext/OvsVport.c
new file mode 100644
index 000000000..35bdaea7b
--- /dev/null
+++ b/datapath-windows/ovsext/OvsVport.c
@@ -0,0 +1,1416 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "precomp.h"
+#include "OvsIoctl.h"
+#include "OvsJhash.h"
+#include "OvsSwitch.h"
+#include "OvsVport.h"
+#include "OvsEvent.h"
+#include "OvsUser.h"
+#include "OvsVxlan.h"
+#include "OvsIpHelper.h"
+#include "OvsOid.h"
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_VPORT
+#include "OvsDebug.h"
+
+#define VPORT_NIC_ENTER(_nic) \
+ OVS_LOG_TRACE("Enter: PortId: %x, NicIndex: %d", _nic->PortId, \
+ _nic->NicIndex)
+
+#define VPORT_NIC_EXIT(_nic) \
+ OVS_LOG_TRACE("Exit: PortId: %x, NicIndex: %d", _nic->PortId, \
+ _nic->NicIndex)
+
+#define VPORT_PORT_ENTER(_port) \
+ OVS_LOG_TRACE("Enter: PortId: %x", _port->PortId)
+
+#define VPORT_PORT_EXIT(_port) \
+ OVS_LOG_TRACE("Exit: PortId: %x", _port->PortId)
+
+#define OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC 100
+
+extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
+extern PNDIS_SPIN_LOCK gOvsCtrlLock;
+
+static UINT32 OvsGetVportNo(POVS_SWITCH_CONTEXT switchContext, UINT32 nicIndex,
+ OVS_VPORT_TYPE ovsType);
+static POVS_VPORT_ENTRY OvsAllocateVport(VOID);
+static VOID OvsInitVportWithPortParam(POVS_VPORT_ENTRY vport,
+ PNDIS_SWITCH_PORT_PARAMETERS portParam);
+static VOID OvsInitVportWithNicParam(POVS_SWITCH_CONTEXT switchContext,
+ POVS_VPORT_ENTRY vport, PNDIS_SWITCH_NIC_PARAMETERS nicParam);
+static VOID OvsInitPhysNicVport(POVS_VPORT_ENTRY vport, POVS_VPORT_ENTRY
+ virtVport, UINT32 nicIndex);
+static VOID OvsInitPhysNicVport(POVS_VPORT_ENTRY vport, POVS_VPORT_ENTRY
+ virtVport, UINT32 nicIndex);
+static NDIS_STATUS OvsInitVportCommon(POVS_SWITCH_CONTEXT switchContext,
+ POVS_VPORT_ENTRY vport);
+static VOID OvsRemoveAndDeleteVport(POVS_SWITCH_CONTEXT switchContext,
+ POVS_VPORT_ENTRY vport);
+static __inline VOID OvsWaitActivate(POVS_SWITCH_CONTEXT switchContext,
+ ULONG sleepMicroSec);
+
+/*
+ * Functions implemented in relaton to NDIS port manipulation.
+ */
+NDIS_STATUS
+OvsCreatePort(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_PORT_PARAMETERS portParam)
+{
+ POVS_VPORT_ENTRY vport;
+ LOCK_STATE_EX lockState;
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+
+ VPORT_PORT_ENTER(portParam);
+
+ NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0);
+ vport = OvsFindVportByPortIdAndNicIndex(switchContext,
+ portParam->PortId, 0);
+ if (vport != NULL) {
+ status = STATUS_DATA_NOT_ACCEPTED;
+ goto create_port_done;
+ }
+ vport = (POVS_VPORT_ENTRY)OvsAllocateVport();
+ if (vport == NULL) {
+ status = NDIS_STATUS_RESOURCES;
+ goto create_port_done;
+ }
+ OvsInitVportWithPortParam(vport, portParam);
+ OvsInitVportCommon(switchContext, vport);
+
+create_port_done:
+ NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
+ VPORT_PORT_EXIT(portParam);
+ return status;
+}
+
+VOID
+OvsTeardownPort(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_PORT_PARAMETERS portParam)
+{
+ POVS_VPORT_ENTRY vport;
+ LOCK_STATE_EX lockState;
+
+ VPORT_PORT_ENTER(portParam);
+
+ NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0);
+ vport = OvsFindVportByPortIdAndNicIndex(switchContext,
+ portParam->PortId, 0);
+ if (vport) {
+ /* add assertion here
+ */
+ vport->portState = NdisSwitchPortStateTeardown;
+ vport->ovsState = OVS_STATE_PORT_TEAR_DOWN;
+ } else {
+ OVS_LOG_WARN("Vport not present.");
+ }
+ NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
+
+ VPORT_PORT_EXIT(portParam);
+}
+
+
+
+VOID
+OvsDeletePort(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_PORT_PARAMETERS portParam)
+{
+ POVS_VPORT_ENTRY vport;
+ LOCK_STATE_EX lockState;
+
+ VPORT_PORT_ENTER(portParam);
+
+ NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0);
+ vport = OvsFindVportByPortIdAndNicIndex(switchContext,
+ portParam->PortId, 0);
+ if (vport) {
+ OvsRemoveAndDeleteVport(switchContext, vport);
+ } else {
+ OVS_LOG_WARN("Vport not present.");
+ }
+ NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
+
+ VPORT_PORT_EXIT(portParam);
+}
+
+
+/*
+ * Functions implemented in relaton to NDIS NIC manipulation.
+ */
+NDIS_STATUS
+OvsCreateNic(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_NIC_PARAMETERS nicParam)
+{
+ POVS_VPORT_ENTRY vport;
+ UINT32 portNo = 0;
+ UINT32 event = 0;
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+
+ LOCK_STATE_EX lockState;
+
+ VPORT_NIC_ENTER(nicParam);
+
+ /* Wait for lists to be initialized. */
+ OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC);
+
+ if (!switchContext->isActivated) {
+ OVS_LOG_WARN("Switch is not activated yet.");
+ /* Veto the creation of nic */
+ status = NDIS_STATUS_NOT_SUPPORTED;
+ goto done;
+ }
+
+ NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0);
+ vport = OvsFindVportByPortIdAndNicIndex(switchContext, nicParam->PortId, 0);
+ if (vport == NULL) {
+ OVS_LOG_ERROR("Create NIC without Switch Port,"
+ " PortId: %x, NicIndex: %d",
+ nicParam->PortId, nicParam->NicIndex);
+ status = NDIS_STATUS_INVALID_PARAMETER;
+ goto add_nic_done;
+ }
+
+ if (nicParam->NicType == NdisSwitchNicTypeExternal &&
+ nicParam->NicIndex != 0) {
+ POVS_VPORT_ENTRY virtVport =
+ (POVS_VPORT_ENTRY)switchContext->externalVport;
+ vport = (POVS_VPORT_ENTRY)OvsAllocateVport();
+ if (vport == NULL) {
+ status = NDIS_STATUS_RESOURCES;
+ goto add_nic_done;
+ }
+ OvsInitPhysNicVport(vport, virtVport, nicParam->NicIndex);
+ status = OvsInitVportCommon(switchContext, vport);
+ if (status != NDIS_STATUS_SUCCESS) {
+ OvsFreeMemory(vport);
+ goto add_nic_done;
+ }
+ }
+ OvsInitVportWithNicParam(switchContext, vport, nicParam);
+ portNo = vport->portNo;
+ if (vport->ovsState == OVS_STATE_CONNECTED) {
+ event = OVS_EVENT_CONNECT | OVS_EVENT_LINK_UP;
+ } else if (vport->ovsState == OVS_STATE_NIC_CREATED) {
+ event = OVS_EVENT_CONNECT;
+ }
+
+add_nic_done:
+ NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
+ if (portNo && event) {
+ OvsPostEvent(portNo, event);
+ }
+
+done:
+ VPORT_NIC_EXIT(nicParam);
+ OVS_LOG_TRACE("Exit: status %8x.\n", status);
+
+ return status;
+}
+
+
+/* Mark already created NIC as connected. */
+VOID
+OvsConnectNic(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_NIC_PARAMETERS nicParam)
+{
+ LOCK_STATE_EX lockState;
+ POVS_VPORT_ENTRY vport;
+ UINT32 portNo = 0;
+
+ VPORT_NIC_ENTER(nicParam);
+
+ /* Wait for lists to be initialized. */
+ OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC);
+
+ if (!switchContext->isActivated) {
+ OVS_LOG_WARN("Switch is not activated yet.");
+ goto done;
+ }
+
+ NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0);
+ vport = OvsFindVportByPortIdAndNicIndex(switchContext,
+ nicParam->PortId,
+ nicParam->NicIndex);
+
+ if (!vport) {
+ OVS_LOG_WARN("Vport not present.");
+ NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
+ ASSERT(0);
+ goto done;
+ }
+
+ vport->ovsState = OVS_STATE_CONNECTED;
+ vport->nicState = NdisSwitchNicStateConnected;
+ portNo = vport->portNo;
+
+ NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
+
+ OvsPostEvent(portNo, OVS_EVENT_LINK_UP);
+
+ if (nicParam->NicType == NdisSwitchNicTypeInternal) {
+ OvsInternalAdapterUp(portNo, &nicParam->NetCfgInstanceId);
+ }
+
+done:
+ VPORT_NIC_EXIT(nicParam);
+}
+
+VOID
+OvsUpdateNic(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_NIC_PARAMETERS nicParam)
+{
+ POVS_VPORT_ENTRY vport;
+ LOCK_STATE_EX lockState;
+
+ UINT32 status = 0, portNo = 0;
+
+ VPORT_NIC_ENTER(nicParam);
+
+ /* Wait for lists to be initialized. */
+ OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC);
+
+ if (!switchContext->isActivated) {
+ OVS_LOG_WARN("Switch is not activated yet.");
+ goto update_nic_done;
+ }
+
+ NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0);
+ vport = OvsFindVportByPortIdAndNicIndex(switchContext,
+ nicParam->PortId,
+ nicParam->NicIndex);
+ if (vport == NULL) {
+ OVS_LOG_WARN("Vport search failed.");
+ goto update_nic_done;
+ }
+ switch (nicParam->NicType) {
+ case NdisSwitchNicTypeExternal:
+ case NdisSwitchNicTypeInternal:
+ RtlCopyMemory(&vport->netCfgInstanceId, &nicParam->NetCfgInstanceId,
+ sizeof (GUID));
+ break;
+ case NdisSwitchNicTypeSynthetic:
+ case NdisSwitchNicTypeEmulated:
+ if (!RtlEqualMemory(vport->vmMacAddress, nicParam->VMMacAddress,
+ sizeof (vport->vmMacAddress))) {
+ status |= OVS_EVENT_MAC_CHANGE;
+ RtlCopyMemory(vport->vmMacAddress, nicParam->VMMacAddress,
+ sizeof (vport->vmMacAddress));
+ }
+ break;
+ default:
+ ASSERT(0);
+ }
+ if (!RtlEqualMemory(vport->permMacAddress, nicParam->PermanentMacAddress,
+ sizeof (vport->permMacAddress))) {
+ RtlCopyMemory(vport->permMacAddress, nicParam->PermanentMacAddress,
+ sizeof (vport->permMacAddress));
+ status |= OVS_EVENT_MAC_CHANGE;
+ }
+ if (!RtlEqualMemory(vport->currMacAddress, nicParam->CurrentMacAddress,
+ sizeof (vport->currMacAddress))) {
+ RtlCopyMemory(vport->currMacAddress, nicParam->CurrentMacAddress,
+ sizeof (vport->currMacAddress));
+ status |= OVS_EVENT_MAC_CHANGE;
+ }
+
+ if (vport->mtu != nicParam->MTU) {
+ vport->mtu = nicParam->MTU;
+ status |= OVS_EVENT_MTU_CHANGE;
+ }
+ vport->numaNodeId = nicParam->NumaNodeId;
+ portNo = vport->portNo;
+
+ NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
+ if (status && portNo) {
+ OvsPostEvent(portNo, status);
+ }
+update_nic_done:
+ VPORT_NIC_EXIT(nicParam);
+}
+
+
+VOID
+OvsDisconnectNic(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_NIC_PARAMETERS nicParam)
+{
+ POVS_VPORT_ENTRY vport;
+ UINT32 portNo = 0;
+ LOCK_STATE_EX lockState;
+ BOOLEAN isInternalPort = FALSE;
+
+ VPORT_NIC_ENTER(nicParam);
+
+ /* Wait for lists to be initialized. */
+ OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC);
+
+ if (!switchContext->isActivated) {
+ OVS_LOG_WARN("Switch is not activated yet.");
+ goto done;
+ }
+
+ NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0);
+ vport = OvsFindVportByPortIdAndNicIndex(switchContext,
+ nicParam->PortId,
+ nicParam->NicIndex);
+
+ if (!vport) {
+ OVS_LOG_WARN("Vport not present.");
+ NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
+ goto done;
+ }
+
+ vport->nicState = NdisSwitchNicStateDisconnected;
+ vport->ovsState = OVS_STATE_NIC_CREATED;
+ portNo = vport->portNo;
+
+ if (vport->ovsType == OVSWIN_VPORT_TYPE_INTERNAL) {
+ isInternalPort = TRUE;
+ }
+
+ NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
+
+ OvsPostEvent(portNo, OVS_EVENT_LINK_DOWN);
+
+ if (isInternalPort) {
+ OvsInternalAdapterDown();
+ }
+
+done:
+ VPORT_NIC_EXIT(nicParam);
+}
+
+
+VOID
+OvsDeleteNic(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_NIC_PARAMETERS nicParam)
+{
+ LOCK_STATE_EX lockState;
+ POVS_VPORT_ENTRY vport;
+ UINT32 portNo = 0;
+
+ VPORT_NIC_ENTER(nicParam);
+ /* Wait for lists to be initialized. */
+ OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC);
+
+ if (!switchContext->isActivated) {
+ OVS_LOG_WARN("Switch is not activated yet.");
+ goto done;
+ }
+
+ NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0);
+ vport = OvsFindVportByPortIdAndNicIndex(switchContext,
+ nicParam->PortId,
+ nicParam->NicIndex);
+
+ if (!vport) {
+ OVS_LOG_WARN("Vport not present.");
+ NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
+ goto done;
+ }
+
+ portNo = vport->portNo;
+ if (vport->portType == NdisSwitchPortTypeExternal &&
+ vport->nicIndex != 0) {
+ OvsRemoveAndDeleteVport(switchContext, vport);
+ }
+ vport->nicState = NdisSwitchNicStateUnknown;
+ vport->ovsState = OVS_STATE_PORT_CREATED;
+
+ NdisReleaseRWLock(switchContext->dispatchLock, &lockState);
+ OvsPostEvent(portNo, OVS_EVENT_DISCONNECT);
+
+done:
+ VPORT_NIC_EXIT(nicParam);
+}
+
+
+/*
+ * OVS Vport related functionality.
+ */
+POVS_VPORT_ENTRY
+OvsFindVportByPortNo(POVS_SWITCH_CONTEXT switchContext,
+ UINT32 portNo)
+{
+ if (OVS_VPORT_INDEX(portNo) < OVS_MAX_VPORT_ARRAY_SIZE) {
+ if (OVS_IS_VPORT_ENTRY_NULL(switchContext, OVS_VPORT_INDEX(portNo))) {
+ return NULL;
+ } else {
+ POVS_VPORT_ENTRY vport;
+ vport = (POVS_VPORT_ENTRY)
+ switchContext->vportArray[OVS_VPORT_INDEX(portNo)];
+ return vport->portNo == portNo ? vport : NULL;
+ }
+ }
+ return NULL;
+}
+
+
+POVS_VPORT_ENTRY
+OvsFindVportByOvsName(POVS_SWITCH_CONTEXT switchContext,
+ CHAR *name,
+ UINT32 length)
+{
+ POVS_VPORT_ENTRY vport;
+ PLIST_ENTRY head, link;
+ UINT32 hash = OvsJhashBytes((const VOID *)name, length, OVS_HASH_BASIS);
+ head = &(switchContext->nameHashArray[hash & OVS_VPORT_MASK]);
+ LIST_FORALL(head, link) {
+ vport = CONTAINING_RECORD(link, OVS_VPORT_ENTRY, nameLink);
+ if (vport->ovsNameLen == length &&
+ RtlEqualMemory(name, vport->ovsName, length)) {
+ return vport;
+ }
+ }
+ return NULL;
+}
+
+POVS_VPORT_ENTRY
+OvsFindVportByPortIdAndNicIndex(POVS_SWITCH_CONTEXT switchContext,
+ NDIS_SWITCH_PORT_ID portId,
+ NDIS_SWITCH_NIC_INDEX index)
+{
+ if (portId == switchContext->externalPortId) {
+ if (index == 0) {
+ return (POVS_VPORT_ENTRY)switchContext->externalVport;
+ } else if (index > OVS_MAX_PHYS_ADAPTERS) {
+ return NULL;
+ }
+ if (OVS_IS_VPORT_ENTRY_NULL(switchContext,
+ index + OVS_EXTERNAL_VPORT_START)) {
+ return NULL;
+ } else {
+ return (POVS_VPORT_ENTRY)switchContext->vportArray[
+ index + OVS_EXTERNAL_VPORT_START];
+ }
+ } else if (switchContext->internalPortId == portId) {
+ return (POVS_VPORT_ENTRY)switchContext->internalVport;
+ } else {
+ PLIST_ENTRY head, link;
+ POVS_VPORT_ENTRY vport;
+ UINT32 hash;
+ hash = OvsJhashWords((UINT32 *)&portId, 1, OVS_HASH_BASIS);
+ head = &(switchContext->portHashArray[hash & OVS_VPORT_MASK]);
+ LIST_FORALL(head, link) {
+ vport = CONTAINING_RECORD(link, OVS_VPORT_ENTRY, portLink);
+ if (portId == vport->portId && index == vport->nicIndex) {
+ return vport;
+ }
+ }
+ return NULL;
+ }
+}
+
+static UINT32
+OvsGetVportNo(POVS_SWITCH_CONTEXT switchContext,
+ UINT32 nicIndex,
+ OVS_VPORT_TYPE ovsType)
+{
+ UINT32 index = 0xffffff, i = 0;
+ UINT64 gen;
+
+ switch (ovsType) {
+ case OVSWIN_VPORT_TYPE_EXTERNAL:
+ if (nicIndex == 0) {
+ return 0; // not a valid portNo
+ } else if (nicIndex > OVS_MAX_PHYS_ADAPTERS) {
+ return 0;
+ } else {
+ index = nicIndex + OVS_EXTERNAL_VPORT_START;
+ }
+ break;
+ case OVSWIN_VPORT_TYPE_INTERNAL:
+ index = OVS_INTERNAL_VPORT_DEFAULT_INDEX;
+ break;
+ case OVSWIN_VPORT_TYPE_SYNTHETIC:
+ case OVSWIN_VPORT_TYPE_EMULATED:
+ index = switchContext->lastPortIndex + 1;
+ if (index == OVS_MAX_VPORT_ARRAY_SIZE) {
+ index = OVS_VM_VPORT_START;
+ }
+ while (!OVS_IS_VPORT_ENTRY_NULL(switchContext, index) &&
+ i < (OVS_MAX_VPORT_ARRAY_SIZE - OVS_VM_VPORT_START)) {
+ index++;
+ i++;
+ if (index == OVS_MAX_VPORT_ARRAY_SIZE) {
+ index = OVS_VM_VPORT_START;
+ }
+ }
+ if (i == (OVS_MAX_VPORT_ARRAY_SIZE - OVS_VM_VPORT_START)) {
+ return 0; // not available
+ }
+ switchContext->lastPortIndex = index;
+ break;
+ case OVSWIN_VPORT_TYPE_GRE:
+ index = OVS_GRE_VPORT_INDEX;
+ break;
+ case OVSWIN_VPORT_TYPE_GRE64:
+ index = OVS_GRE64_VPORT_INDEX;
+ break;
+ case OVSWIN_VPORT_TYPE_VXLAN:
+ index = OVS_VXLAN_VPORT_INDEX;
+ break;
+ case OVSWIN_VPORT_TYPE_LOCAL:
+ default:
+ ASSERT(0);
+ }
+ if (index > OVS_MAX_VPORT_ARRAY_SIZE) {
+ return 0;
+ }
+ gen = (UINT64)switchContext->vportArray[index];
+ if (gen > 0xff) {
+ return 0;
+ } else if (gen == 0) {
+ gen++;
+ }
+ return OVS_VPORT_PORT_NO(index, (UINT32)gen);
+}
+
+
+static POVS_VPORT_ENTRY
+OvsAllocateVport(VOID)
+{
+ POVS_VPORT_ENTRY vport;
+ vport = (POVS_VPORT_ENTRY)OvsAllocateMemory(sizeof (OVS_VPORT_ENTRY));
+ if (vport == NULL) {
+ return NULL;
+ }
+ RtlZeroMemory(vport, sizeof (OVS_VPORT_ENTRY));
+ vport->ovsState = OVS_STATE_UNKNOWN;
+ return vport;
+}
+
+static VOID
+OvsInitVportWithPortParam(POVS_VPORT_ENTRY vport,
+ PNDIS_SWITCH_PORT_PARAMETERS portParam)
+{
+ vport->isValidationPort = portParam->IsValidationPort;
+ vport->portType = portParam->PortType;
+ vport->portState = portParam->PortState;
+ vport->portId = portParam->PortId;
+ vport->nicState = NdisSwitchNicStateUnknown;
+
+ switch (vport->portType) {
+ case NdisSwitchPortTypeExternal:
+ vport->ovsType = OVSWIN_VPORT_TYPE_EXTERNAL;
+ break;
+ case NdisSwitchPortTypeInternal:
+ vport->ovsType = OVSWIN_VPORT_TYPE_INTERNAL;
+ break;
+ case NdisSwitchPortTypeSynthetic:
+ vport->ovsType = OVSWIN_VPORT_TYPE_SYNTHETIC;
+ break;
+ case NdisSwitchPortTypeEmulated:
+ vport->ovsType = OVSWIN_VPORT_TYPE_EMULATED;
+ break;
+ }
+ RtlCopyMemory(&vport->portName, &portParam->PortName,
+ sizeof (NDIS_SWITCH_PORT_NAME));
+ switch (vport->portState) {
+ case NdisSwitchPortStateCreated:
+ vport->ovsState = OVS_STATE_PORT_CREATED;
+ break;
+ case NdisSwitchPortStateTeardown:
+ vport->ovsState = OVS_STATE_PORT_TEAR_DOWN;
+ break;
+ case NdisSwitchPortStateDeleted:
+ vport->ovsState = OVS_STATE_PORT_DELETED;
+ break;
+ }
+}
+
+
+static VOID
+OvsInitVportWithNicParam(POVS_SWITCH_CONTEXT switchContext,
+ POVS_VPORT_ENTRY vport,
+ PNDIS_SWITCH_NIC_PARAMETERS nicParam)
+{
+ ASSERT(vport->portId == nicParam->PortId);
+ ASSERT(vport->ovsState == OVS_STATE_PORT_CREATED);
+
+ UNREFERENCED_PARAMETER(switchContext);
+
+ RtlCopyMemory(vport->permMacAddress, nicParam->PermanentMacAddress,
+ sizeof (nicParam->PermanentMacAddress));
+ RtlCopyMemory(vport->currMacAddress, nicParam->CurrentMacAddress,
+ sizeof (nicParam->CurrentMacAddress));
+
+ if (nicParam->NicType == NdisSwitchNicTypeSynthetic ||
+ nicParam->NicType == NdisSwitchNicTypeEmulated) {
+ RtlCopyMemory(vport->vmMacAddress, nicParam->VMMacAddress,
+ sizeof (nicParam->VMMacAddress));
+ RtlCopyMemory(&vport->vmName, &nicParam->VmName,
+ sizeof (nicParam->VmName));
+ } else {
+ RtlCopyMemory(&vport->netCfgInstanceId, &nicParam->NetCfgInstanceId,
+ sizeof (nicParam->NetCfgInstanceId));
+ }
+ RtlCopyMemory(&vport->nicName, &nicParam->NicName,
+ sizeof (nicParam->NicName));
+ vport->mtu = nicParam->MTU;
+ vport->nicState = nicParam->NicState;
+ vport->nicIndex = nicParam->NicIndex;
+ vport->numaNodeId = nicParam->NumaNodeId;
+
+ switch (vport->nicState) {
+ case NdisSwitchNicStateCreated:
+ vport->ovsState = OVS_STATE_NIC_CREATED;
+ break;
+ case NdisSwitchNicStateConnected:
+ vport->ovsState = OVS_STATE_CONNECTED;
+ break;
+ case NdisSwitchNicStateDisconnected:
+ vport->ovsState = OVS_STATE_NIC_CREATED;
+ break;
+ case NdisSwitchNicStateDeleted:
+ vport->ovsState = OVS_STATE_PORT_CREATED;
+ break;
+ }
+}
+
+static VOID
+OvsInitPhysNicVport(POVS_VPORT_ENTRY vport,
+ POVS_VPORT_ENTRY virtVport,
+ UINT32 nicIndex)
+{
+ vport->isValidationPort = virtVport->isValidationPort;
+ vport->portType = virtVport->portType;
+ vport->portState = virtVport->portState;
+ vport->portId = virtVport->portId;
+ vport->nicState = NdisSwitchNicStateUnknown;
+ vport->ovsType = OVSWIN_VPORT_TYPE_EXTERNAL;
+ vport->nicIndex = (NDIS_SWITCH_NIC_INDEX)nicIndex;
+ RtlCopyMemory(&vport->portName, &virtVport->portName,
+ sizeof (NDIS_SWITCH_PORT_NAME));
+ vport->ovsState = OVS_STATE_PORT_CREATED;
+}
+static NDIS_STATUS
+OvsInitVportCommon(POVS_SWITCH_CONTEXT switchContext,
+POVS_VPORT_ENTRY vport)
+{
+ UINT32 hash;
+ size_t len;
+ if (vport->portType != NdisSwitchPortTypeExternal ||
+ vport->nicIndex != 0) {
+ vport->portNo = OvsGetVportNo(switchContext, vport->nicIndex,
+ vport->ovsType);
+ if (vport->portNo == 0) {
+ return NDIS_STATUS_RESOURCES;
+ }
+ ASSERT(OVS_IS_VPORT_ENTRY_NULL(switchContext,
+ OVS_VPORT_INDEX(vport->portNo)));
+
+ switchContext->vportArray[OVS_VPORT_INDEX(vport->portNo)] = vport;
+ }
+ switch (vport->portType) {
+ case NdisSwitchPortTypeExternal:
+ if (vport->nicIndex == 0) {
+ switchContext->externalPortId = vport->portId;
+ switchContext->externalVport = vport;
+ RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1,
+ "external.virtualAdapter");
+ }
+ else {
+ switchContext->numPhysicalNics++;
+ RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1,
+ "external.%lu", (UINT32)vport->nicIndex);
+ }
+ break;
+ case NdisSwitchPortTypeInternal:
+ switchContext->internalPortId = vport->portId;
+ switchContext->internalVport = vport;
+ RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1,
+ "internal");
+ break;
+ case NdisSwitchPortTypeSynthetic:
+ RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1,
+ "vmNICSyn.%lx", vport->portNo);
+ break;
+ case NdisSwitchPortTypeEmulated:
+ RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1,
+ "vmNICEmu.%lx", vport->portNo);
+ break;
+ }
+ StringCbLengthA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, &len);
+ vport->ovsNameLen = (UINT32)len;
+ if (vport->portType == NdisSwitchPortTypeExternal &&
+ vport->nicIndex == 0) {
+ return NDIS_STATUS_SUCCESS;
+ }
+ hash = OvsJhashBytes(vport->ovsName, vport->ovsNameLen, OVS_HASH_BASIS);
+ InsertHeadList(&switchContext->nameHashArray[hash & OVS_VPORT_MASK],
+ &vport->nameLink);
+ hash = OvsJhashWords(&vport->portId, 1, OVS_HASH_BASIS);
+ InsertHeadList(&switchContext->portHashArray[hash & OVS_VPORT_MASK],
+ &vport->portLink);
+ switchContext->numVports++;
+ return NDIS_STATUS_SUCCESS;
+}
+
+
+static VOID
+OvsRemoveAndDeleteVport(POVS_SWITCH_CONTEXT switchContext,
+ POVS_VPORT_ENTRY vport)
+{
+ UINT64 gen = vport->portNo >> 24;
+ switch (vport->ovsType) {
+ case OVSWIN_VPORT_TYPE_EXTERNAL:
+ if (vport->nicIndex == 0) {
+ ASSERT(switchContext->numPhysicalNics == 0);
+ switchContext->externalPortId = 0;
+ switchContext->externalVport = NULL;
+ OvsFreeMemory(vport);
+ return;
+ } else {
+ ASSERT(switchContext->numPhysicalNics);
+ switchContext->numPhysicalNics--;
+ }
+ break;
+ case OVSWIN_VPORT_TYPE_INTERNAL:
+ switchContext->internalPortId = 0;
+ switchContext->internalVport = NULL;
+ OvsInternalAdapterDown();
+ break;
+ case OVSWIN_VPORT_TYPE_VXLAN:
+ OvsCleanupVxlanTunnel(vport);
+ break;
+ case OVSWIN_VPORT_TYPE_GRE:
+ case OVSWIN_VPORT_TYPE_GRE64:
+ break;
+ case OVSWIN_VPORT_TYPE_EMULATED:
+ case OVSWIN_VPORT_TYPE_SYNTHETIC:
+ default:
+ break;
+ }
+
+ RemoveEntryList(&vport->nameLink);
+ RemoveEntryList(&vport->portLink);
+ gen = (gen + 1) & 0xff;
+ switchContext->vportArray[OVS_VPORT_INDEX(vport->portNo)] =
+ (PVOID)(UINT64)gen;
+ switchContext->numVports--;
+ OvsFreeMemory(vport);
+}
+
+
+NDIS_STATUS
+OvsAddConfiguredSwitchPorts(POVS_SWITCH_CONTEXT switchContext)
+{
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+ ULONG arrIndex;
+ PNDIS_SWITCH_PORT_PARAMETERS portParam;
+ PNDIS_SWITCH_PORT_ARRAY portArray = NULL;
+ POVS_VPORT_ENTRY vport;
+
+ OVS_LOG_TRACE("Enter: switchContext:%p", switchContext);
+
+ status = OvsGetPortsOnSwitch(switchContext, &portArray);
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto cleanup;
+ }
+
+ for (arrIndex = 0; arrIndex < portArray->NumElements; arrIndex++) {
+ portParam = NDIS_SWITCH_PORT_AT_ARRAY_INDEX(portArray, arrIndex);
+ vport = (POVS_VPORT_ENTRY)OvsAllocateVport();
+ if (vport == NULL) {
+ status = NDIS_STATUS_RESOURCES;
+ goto cleanup;
+ }
+ OvsInitVportWithPortParam(vport, portParam);
+ status = OvsInitVportCommon(switchContext, vport);
+ if (status != NDIS_STATUS_SUCCESS) {
+ OvsFreeMemory(vport);
+ goto cleanup;
+ }
+ }
+cleanup:
+ if (status != NDIS_STATUS_SUCCESS) {
+ OvsClearAllSwitchVports(switchContext);
+ }
+
+ if (portArray != NULL) {
+ OvsFreeMemory(portArray);
+ }
+ OVS_LOG_TRACE("Exit: status: %x", status);
+ return status;
+}
+
+
+NDIS_STATUS
+OvsInitConfiguredSwitchNics(POVS_SWITCH_CONTEXT switchContext)
+{
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+ PNDIS_SWITCH_NIC_ARRAY nicArray = NULL;
+ ULONG arrIndex;
+ PNDIS_SWITCH_NIC_PARAMETERS nicParam;
+ POVS_VPORT_ENTRY vport;
+
+ OVS_LOG_TRACE("Enter: switchContext: %p", switchContext);
+ /*
+ * Now, get NIC list.
+ */
+ status = OvsGetNicsOnSwitch(switchContext, &nicArray);
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto cleanup;
+ }
+ for (arrIndex = 0; arrIndex < nicArray->NumElements; ++arrIndex) {
+
+ nicParam = NDIS_SWITCH_NIC_AT_ARRAY_INDEX(nicArray, arrIndex);
+
+ /*
+ * XXX: Check if the port is configured with a VLAN. Disallow such a
+ * configuration, since we don't support tag-in-tag.
+ */
+
+ /*
+ * XXX: Check if the port is connected to a VF. Disconnect the VF in
+ * such a case.
+ */
+
+ if (nicParam->NicType == NdisSwitchNicTypeExternal &&
+ nicParam->NicIndex != 0) {
+ POVS_VPORT_ENTRY virtVport =
+ (POVS_VPORT_ENTRY)switchContext->externalVport;
+ vport = OvsAllocateVport();
+ if (vport) {
+ OvsInitPhysNicVport(vport, virtVport, nicParam->NicIndex);
+ status = OvsInitVportCommon(switchContext, vport);
+ if (status != NDIS_STATUS_SUCCESS) {
+ OvsFreeMemory(vport);
+ vport = NULL;
+ }
+ }
+ } else {
+ vport = OvsFindVportByPortIdAndNicIndex(switchContext,
+ nicParam->PortId,
+ nicParam->NicIndex);
+ }
+ if (vport == NULL) {
+ OVS_LOG_ERROR("Fail to allocate vport");
+ continue;
+ }
+ OvsInitVportWithNicParam(switchContext, vport, nicParam);
+ if (nicParam->NicType == NdisSwitchNicTypeInternal) {
+ OvsInternalAdapterUp(vport->portNo, &nicParam->NetCfgInstanceId);
+ }
+ }
+cleanup:
+
+ if (nicArray != NULL) {
+ OvsFreeMemory(nicArray);
+ }
+ OVS_LOG_TRACE("Exit: status: %x", status);
+ return status;
+}
+
+VOID
+OvsClearAllSwitchVports(POVS_SWITCH_CONTEXT switchContext)
+{
+ UINT32 i;
+
+ for (i = 0; i < OVS_MAX_VPORT_ARRAY_SIZE; i++) {
+ if (!OVS_IS_VPORT_ENTRY_NULL(switchContext, i)) {
+ OvsRemoveAndDeleteVport(switchContext,
+ (POVS_VPORT_ENTRY)switchContext->vportArray[i]);
+ }
+ }
+ if (switchContext->externalVport) {
+ OvsRemoveAndDeleteVport(switchContext,
+ (POVS_VPORT_ENTRY)switchContext->externalVport);
+ }
+}
+
+NTSTATUS
+OvsDumpVportIoctl(PVOID inputBuffer,
+ UINT32 inputLength,
+ PVOID outputBuffer,
+ UINT32 outputLength,
+ UINT32 *replyLen)
+{
+ UINT32 numVports, count;
+ UINT32 dpNo, i;
+ UINT32 *outPtr;
+ POVS_VPORT_ENTRY vport;
+ LOCK_STATE_EX lockState;
+
+ if (inputLength < sizeof (UINT32)) {
+ return STATUS_INVALID_PARAMETER;
+ }
+ dpNo = *(UINT32 *)inputBuffer;
+
+ NdisAcquireSpinLock(gOvsCtrlLock);
+ if (gOvsSwitchContext == NULL ||
+ gOvsSwitchContext->dpNo != dpNo) {
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ return STATUS_INVALID_PARAMETER;
+ }
+ /*
+ * We should hold SwitchContext RW lock
+ */
+
+ NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState,
+ NDIS_RWL_AT_DISPATCH_LEVEL);
+ numVports = outputLength/sizeof (UINT32);
+ numVports = MIN(gOvsSwitchContext->numVports, numVports);
+ outPtr = (UINT32 *)outputBuffer;
+ for (i = 0, count = 0;
+ i < OVS_MAX_VPORT_ARRAY_SIZE && count < numVports; i++) {
+ vport = (POVS_VPORT_ENTRY)gOvsSwitchContext->vportArray[i];
+ if (OVS_IS_VPORT_ENTRY_NULL(gOvsSwitchContext, i)) {
+ continue;
+ }
+ if (vport->ovsState == OVS_STATE_CONNECTED ||
+ vport->ovsState == OVS_STATE_NIC_CREATED) {
+ *outPtr = vport->portNo;
+ outPtr++;
+ count++;
+ }
+ }
+ NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ *replyLen = count * sizeof (UINT32);
+ return STATUS_SUCCESS;
+}
+
+
+NTSTATUS
+OvsGetVportIoctl(PVOID inputBuffer,
+ UINT32 inputLength,
+ PVOID outputBuffer,
+ UINT32 outputLength,
+ UINT32 *replyLen)
+{
+ UINT32 dpNo;
+ POVS_VPORT_GET get;
+ POVS_VPORT_INFO info;
+ POVS_VPORT_ENTRY vport;
+ size_t len;
+ LOCK_STATE_EX lockState;
+
+ if (inputLength < sizeof (OVS_VPORT_GET) ||
+ outputLength < sizeof (OVS_VPORT_INFO)) {
+ return STATUS_INVALID_PARAMETER;
+ }
+ get = (POVS_VPORT_GET)inputBuffer;
+ dpNo = get->dpNo;
+ info = (POVS_VPORT_INFO)outputBuffer;
+ RtlZeroMemory(info, sizeof (POVS_VPORT_INFO));
+
+ NdisAcquireSpinLock(gOvsCtrlLock);
+ if (gOvsSwitchContext == NULL ||
+ gOvsSwitchContext->dpNo != dpNo) {
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState,
+ NDIS_RWL_AT_DISPATCH_LEVEL);
+ if (get->portNo == 0) {
+ StringCbLengthA(get->name, OVS_MAX_PORT_NAME_LENGTH - 1, &len);
+ vport = OvsFindVportByOvsName(gOvsSwitchContext, get->name, (UINT32)len);
+ } else {
+ vport = OvsFindVportByPortNo(gOvsSwitchContext, get->portNo);
+ }
+ if (vport == NULL || (vport->ovsState != OVS_STATE_CONNECTED &&
+ vport->ovsState != OVS_STATE_NIC_CREATED)) {
+ NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ /*
+ * XXX Change to NO DEVICE
+ */
+ return STATUS_DEVICE_DOES_NOT_EXIST;
+ }
+ info->dpNo = dpNo;
+ info->portNo = vport->portNo;
+ info->type = vport->ovsType;
+ RtlCopyMemory(info->macAddress, vport->permMacAddress,
+ sizeof (vport->permMacAddress));
+ RtlCopyMemory(info->name, vport->ovsName, vport->ovsNameLen + 1);
+
+ info->rxPackets = vport->stats.rxPackets;
+ info->rxBytes = vport->stats.rxBytes;
+ info->txPackets = vport->stats.txPackets;
+ info->txBytes = vport->stats.txBytes;
+ info->rxErrors = vport->errStats.rxErrors;
+ info->txErrors = vport->errStats.txErrors;
+ info->rxDropped = vport->errStats.rxDropped;
+ info->txDropped = vport->errStats.txDropped;
+
+ NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ *replyLen = sizeof (OVS_VPORT_INFO);
+ return STATUS_SUCCESS;
+}
+
+
+NTSTATUS
+OvsInitTunnelVport(POVS_VPORT_ENTRY vport,
+ POVS_VPORT_ADD_REQUEST addReq)
+{
+ size_t len;
+ NTSTATUS status = STATUS_SUCCESS;
+
+ vport->isValidationPort = FALSE;
+ vport->ovsType = addReq->type;
+ vport->ovsState = OVS_STATE_PORT_CREATED;
+ RtlCopyMemory(vport->ovsName, addReq->name, OVS_MAX_PORT_NAME_LENGTH);
+ vport->ovsName[OVS_MAX_PORT_NAME_LENGTH - 1] = 0;
+ StringCbLengthA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, &len);
+ vport->ovsNameLen = (UINT32)len;
+ switch (addReq->type) {
+ case OVSWIN_VPORT_TYPE_GRE:
+ break;
+ case OVSWIN_VPORT_TYPE_GRE64:
+ break;
+ case OVSWIN_VPORT_TYPE_VXLAN:
+ status = OvsInitVxlanTunnel(vport, addReq);
+ break;
+ default:
+ ASSERT(0);
+ }
+ return status;
+}
+
+NTSTATUS
+OvsAddVportIoctl(PVOID inputBuffer,
+ UINT32 inputLength,
+ PVOID outputBuffer,
+ UINT32 outputLength,
+ UINT32 *replyLen)
+{
+ NTSTATUS status = STATUS_SUCCESS;
+ POVS_VPORT_INFO vportInfo;
+ POVS_VPORT_ADD_REQUEST addReq;
+ POVS_VPORT_ENTRY vport;
+ LOCK_STATE_EX lockState;
+ UINT32 index;
+ UINT32 portNo;
+
+ OVS_LOG_TRACE("Enter: inputLength: %u, outputLength: %u",
+ inputLength, outputLength);
+ if (inputLength < sizeof (OVS_VPORT_ADD_REQUEST) ||
+ outputLength < sizeof (OVS_VPORT_INFO)) {
+ status = STATUS_INVALID_PARAMETER;
+ goto vport_add_done;
+ }
+ addReq = (POVS_VPORT_ADD_REQUEST)inputBuffer;
+ addReq->name[OVS_MAX_PORT_NAME_LENGTH - 1] = 0;
+
+ switch (addReq->type) {
+ case OVSWIN_VPORT_TYPE_GRE:
+ index = OVS_GRE_VPORT_INDEX;
+ break;
+ case OVSWIN_VPORT_TYPE_GRE64:
+ index = OVS_GRE64_VPORT_INDEX;
+ break;
+ case OVSWIN_VPORT_TYPE_VXLAN:
+ index = OVS_VXLAN_VPORT_INDEX;
+ break;
+ default:
+ status = STATUS_NOT_SUPPORTED;
+ goto vport_add_done;
+ }
+
+ vport = (POVS_VPORT_ENTRY)OvsAllocateVport();
+ if (vport == NULL) {
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ goto vport_add_done;
+ }
+
+ NdisAcquireSpinLock(gOvsCtrlLock);
+ if (gOvsSwitchContext == NULL ||
+ gOvsSwitchContext->dpNo != addReq->dpNo) {
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ status = STATUS_INVALID_PARAMETER;
+ OvsFreeMemory(vport);
+ goto vport_add_done;
+ }
+ NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState,
+ NDIS_RWL_AT_DISPATCH_LEVEL);
+ if (!OVS_IS_VPORT_ENTRY_NULL(gOvsSwitchContext, index)) {
+ status = STATUS_DEVICE_BUSY;
+ NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ OvsFreeMemory(vport);
+ goto vport_add_done;
+ }
+
+ status = OvsInitTunnelVport(vport, addReq);
+ if (status != STATUS_SUCCESS) {
+ NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ OvsFreeMemory(vport);
+ goto vport_add_done;
+ }
+
+ status = OvsInitVportCommon(gOvsSwitchContext, vport);
+ ASSERT(status == NDIS_STATUS_SUCCESS);
+
+ vport->ovsState = OVS_STATE_CONNECTED;
+ vport->nicState = NdisSwitchNicStateConnected;
+
+ vportInfo = (POVS_VPORT_INFO)outputBuffer;
+
+ RtlZeroMemory(vportInfo, sizeof (POVS_VPORT_INFO));
+ vportInfo->dpNo = gOvsSwitchContext->dpNo;
+ vportInfo->portNo = vport->portNo;
+ vportInfo->type = vport->ovsType;
+ RtlCopyMemory(vportInfo->name, vport->ovsName, vport->ovsNameLen + 1);
+ portNo = vport->portNo;
+
+ NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ OvsPostEvent(portNo, OVS_EVENT_CONNECT | OVS_EVENT_LINK_UP);
+ *replyLen = sizeof (OVS_VPORT_INFO);
+ status = STATUS_SUCCESS;
+vport_add_done:
+ OVS_LOG_TRACE("Exit: byteReturned: %u, status: %x",
+ *replyLen, status);
+ return status;
+}
+
+NTSTATUS
+OvsDelVportIoctl(PVOID inputBuffer,
+ UINT32 inputLength,
+ UINT32 *replyLen)
+{
+ NTSTATUS status = STATUS_SUCCESS;
+ POVS_VPORT_DELETE_REQUEST delReq;
+ LOCK_STATE_EX lockState;
+ POVS_VPORT_ENTRY vport;
+ size_t len;
+ UINT32 portNo = 0;
+
+ OVS_LOG_TRACE("Enter: inputLength: %u", inputLength);
+
+ if (inputLength < sizeof (OVS_VPORT_DELETE_REQUEST)) {
+ status = STATUS_INVALID_PARAMETER;
+ goto vport_del_done;
+ }
+ delReq = (POVS_VPORT_DELETE_REQUEST)inputBuffer;
+
+ NdisAcquireSpinLock(gOvsCtrlLock);
+ if (gOvsSwitchContext == NULL ||
+ gOvsSwitchContext->dpNo != delReq->dpNo) {
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ status = STATUS_INVALID_PARAMETER;
+ goto vport_del_done;
+ }
+ NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState,
+ NDIS_RWL_AT_DISPATCH_LEVEL);
+ if (delReq->portNo == 0) {
+ StringCbLengthA(delReq->name, OVS_MAX_PORT_NAME_LENGTH - 1, &len);
+ vport = OvsFindVportByOvsName(gOvsSwitchContext, delReq->name,
+ (UINT32)len);
+ } else {
+ vport = OvsFindVportByPortNo(gOvsSwitchContext, delReq->portNo);
+ }
+ if (vport) {
+ OVS_LOG_INFO("delete vport: %s, portNo: %x", vport->ovsName,
+ vport->portNo);
+ portNo = vport->portNo;
+ OvsRemoveAndDeleteVport(gOvsSwitchContext, vport);
+ } else {
+ status = STATUS_DEVICE_DOES_NOT_EXIST;
+ }
+ NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ if (vport) {
+ OvsPostEvent(portNo, OVS_EVENT_DISCONNECT | OVS_EVENT_LINK_DOWN);
+ }
+vport_del_done:
+ OVS_LOG_TRACE("Exit: byteReturned: %u, status: %x",
+ *replyLen, status);
+ return status;
+}
+
+NTSTATUS
+OvsConvertIfCountedStrToAnsiStr(PIF_COUNTED_STRING wStr,
+ CHAR *str,
+ UINT16 maxStrLen)
+{
+ ANSI_STRING astr;
+ UNICODE_STRING ustr;
+ NTSTATUS status;
+ UINT32 size;
+
+ ustr.Buffer = wStr->String;
+ ustr.Length = wStr->Length;
+ ustr.MaximumLength = IF_MAX_STRING_SIZE;
+
+ astr.Buffer = str;
+ astr.MaximumLength = maxStrLen;
+ astr.Length = 0;
+
+ size = RtlUnicodeStringToAnsiSize(&ustr);
+ if (size > maxStrLen) {
+ return STATUS_BUFFER_OVERFLOW;
+ }
+
+ status = RtlUnicodeStringToAnsiString(&astr, &ustr, FALSE);
+
+ ASSERT(status == STATUS_SUCCESS);
+ if (status != STATUS_SUCCESS) {
+ return status;
+ }
+ ASSERT(astr.Length <= maxStrLen);
+ str[astr.Length] = 0;
+ return STATUS_SUCCESS;
+}
+
+
+NTSTATUS
+OvsGetExtInfoIoctl(PVOID inputBuffer,
+ UINT32 inputLength,
+ PVOID outputBuffer,
+ UINT32 outputLength,
+ UINT32 *replyLen)
+{
+ POVS_VPORT_GET get;
+ POVS_VPORT_EXT_INFO info;
+ POVS_VPORT_ENTRY vport;
+ size_t len;
+ LOCK_STATE_EX lockState;
+ NTSTATUS status = STATUS_SUCCESS;
+ NDIS_SWITCH_NIC_NAME nicName;
+ NDIS_VM_NAME vmName;
+ BOOLEAN doConvert = FALSE;
+
+ OVS_LOG_TRACE("Enter: inputLength: %u, outputLength: %u",
+ inputLength, outputLength);
+
+ if (inputLength < sizeof (OVS_VPORT_GET) ||
+ outputLength < sizeof (OVS_VPORT_EXT_INFO)) {
+ status = STATUS_INVALID_PARAMETER;
+ goto ext_info_done;
+ }
+ get = (POVS_VPORT_GET)inputBuffer;
+ info = (POVS_VPORT_EXT_INFO)outputBuffer;
+ RtlZeroMemory(info, sizeof (POVS_VPORT_EXT_INFO));
+
+ NdisAcquireSpinLock(gOvsCtrlLock);
+ if (gOvsSwitchContext == NULL ||
+ gOvsSwitchContext->dpNo != get->dpNo) {
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ status = STATUS_INVALID_PARAMETER;
+ goto ext_info_done;
+ }
+ NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState,
+ NDIS_RWL_AT_DISPATCH_LEVEL);
+ if (get->portNo == 0) {
+ StringCbLengthA(get->name, OVS_MAX_PORT_NAME_LENGTH - 1, &len);
+ vport = OvsFindVportByOvsName(gOvsSwitchContext, get->name,
+ (UINT32)len);
+ } else {
+ vport = OvsFindVportByPortNo(gOvsSwitchContext, get->portNo);
+ }
+ if (vport == NULL || (vport->ovsState != OVS_STATE_CONNECTED &&
+ vport->ovsState != OVS_STATE_NIC_CREATED)) {
+ NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ if (get->portNo) {
+ OVS_LOG_WARN("vport %u does not exist any more", get->portNo);
+ } else {
+ OVS_LOG_WARN("vport %s does not exist any more", get->name);
+ }
+ status = STATUS_DEVICE_DOES_NOT_EXIST;
+ goto ext_info_done;
+ }
+ info->dpNo = get->dpNo;
+ info->portNo = vport->portNo;
+ RtlCopyMemory(info->macAddress, vport->currMacAddress,
+ sizeof (vport->currMacAddress));
+ RtlCopyMemory(info->permMACAddress, vport->permMacAddress,
+ sizeof (vport->permMacAddress));
+ if (vport->ovsType == OVSWIN_VPORT_TYPE_SYNTHETIC ||
+ vport->ovsType == OVSWIN_VPORT_TYPE_EMULATED) {
+ RtlCopyMemory(info->vmMACAddress, vport->vmMacAddress,
+ sizeof (vport->vmMacAddress));
+ }
+ info->nicIndex = vport->nicIndex;
+ info->portId = vport->portId;
+ info->type = vport->ovsType;
+ info->mtu = vport->mtu;
+ /*
+ * TO be revisit XXX
+ */
+ if (vport->ovsState == OVS_STATE_NIC_CREATED) {
+ info->status = OVS_EVENT_CONNECT | OVS_EVENT_LINK_DOWN;
+ } else if (vport->ovsState == OVS_STATE_CONNECTED) {
+ info->status = OVS_EVENT_CONNECT | OVS_EVENT_LINK_UP;
+ } else {
+ info->status = OVS_EVENT_DISCONNECT;
+ }
+ if ((info->type == OVSWIN_VPORT_TYPE_SYNTHETIC ||
+ info->type == OVSWIN_VPORT_TYPE_EMULATED) &&
+ (vport->ovsState == OVS_STATE_NIC_CREATED ||
+ vport->ovsState == OVS_STATE_CONNECTED)) {
+ RtlCopyMemory(&vmName, &vport->vmName, sizeof (NDIS_VM_NAME));
+ RtlCopyMemory(&nicName, &vport->nicName, sizeof
+ (NDIS_SWITCH_NIC_NAME));
+ doConvert = TRUE;
+ } else {
+ info->vmUUID[0] = 0;
+ info->vifUUID[0] = 0;
+ }
+
+ RtlCopyMemory(info->name, vport->ovsName, vport->ovsNameLen + 1);
+ NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
+ NdisReleaseSpinLock(gOvsCtrlLock);
+ if (doConvert) {
+ status = OvsConvertIfCountedStrToAnsiStr(&vmName,
+ info->vmUUID,
+ OVS_MAX_VM_UUID_LEN);
+ if (status != STATUS_SUCCESS) {
+ OVS_LOG_INFO("Fail to convert VM name.");
+ info->vmUUID[0] = 0;
+ }
+
+ status = OvsConvertIfCountedStrToAnsiStr(&nicName,
+ info->vifUUID,
+ OVS_MAX_VIF_UUID_LEN);
+ if (status != STATUS_SUCCESS) {
+ OVS_LOG_INFO("Fail to convert nic name");
+ info->vifUUID[0] = 0;
+ }
+ /*
+ * for now ignore status
+ */
+ status = STATUS_SUCCESS;
+ }
+ *replyLen = sizeof (OVS_VPORT_EXT_INFO);
+
+ext_info_done:
+ OVS_LOG_TRACE("Exit: byteReturned: %u, status: %x",
+ *replyLen, status);
+ return status;
+}
+
+
+static __inline VOID
+OvsWaitActivate(POVS_SWITCH_CONTEXT switchContext, ULONG sleepMicroSec)
+{
+ while ((!switchContext->isActivated) &&
+ (!switchContext->isActivateFailed)) {
+ /* Wait for the switch to be active and
+ * the list of ports in OVS to be initialized. */
+ NdisMSleep(sleepMicroSec);
+ }
+}
diff --git a/datapath-windows/ovsext/OvsVport.h b/datapath-windows/ovsext/OvsVport.h
new file mode 100644
index 000000000..8fe23f152
--- /dev/null
+++ b/datapath-windows/ovsext/OvsVport.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_VPORT_H_
+#define __OVS_VPORT_H_ 1
+
+#include "OvsSwitch.h"
+
+/*
+ * A Vport, or Virtual Port, is a port on the OVS. It can be one of the
+ * following types. Some of the Vports are "real" ports on the hyper-v switch,
+ * and some are not:
+ * - VIF port (VM's NIC)
+ * - External Adapters (physical NIC)
+ * - Internal Adapter (Virtual adapter exposed on the host).
+ * - Tunnel ports created by OVS userspace.
+ */
+
+typedef enum {
+ OVS_STATE_UNKNOWN,
+ OVS_STATE_PORT_CREATED,
+ OVS_STATE_NIC_CREATED,
+ OVS_STATE_CONNECTED,
+ OVS_STATE_PORT_TEAR_DOWN,
+ OVS_STATE_PORT_DELETED,
+} OVS_VPORT_STATE;
+
+typedef struct _OVS_VPORT_STATS {
+ UINT64 rxBytes;
+ UINT64 rxPackets;
+ UINT64 txBytes;
+ UINT64 txPackets;
+} OVS_VPORT_STATS;
+
+typedef struct _OVS_VPORT_ERR_STATS {
+ UINT64 rxErrors;
+ UINT64 txErrors;
+ UINT64 rxDropped;
+ UINT64 txDropped;
+} OVS_VPORT_ERR_STATS;
+/*
+ * Each internal, external adapter or vritual adapter has
+ * one vport entry. In addition, we have one vport for each
+ * tunnel type, such as vxlan, gre, gre64
+ */
+typedef struct _OVS_VPORT_ENTRY {
+ LIST_ENTRY nameLink;
+ LIST_ENTRY portLink;
+
+ OVS_VPORT_STATE ovsState;
+ OVS_VPORT_TYPE ovsType;
+ OVS_VPORT_STATS stats;
+ OVS_VPORT_ERR_STATS errStats;
+ UINT32 portNo;
+ UINT32 mtu;
+ CHAR ovsName[OVS_MAX_PORT_NAME_LENGTH];
+ UINT32 ovsNameLen;
+
+ PVOID priv;
+ NDIS_SWITCH_PORT_ID portId;
+ NDIS_SWITCH_NIC_INDEX nicIndex;
+ UINT16 numaNodeId;
+ NDIS_SWITCH_PORT_STATE portState;
+ NDIS_SWITCH_NIC_STATE nicState;
+ NDIS_SWITCH_PORT_TYPE portType;
+ BOOLEAN isValidationPort;
+
+ UINT8 permMacAddress[MAC_ADDRESS_LEN];
+ UINT8 currMacAddress[MAC_ADDRESS_LEN];
+ UINT8 vmMacAddress[MAC_ADDRESS_LEN];
+
+ NDIS_SWITCH_PORT_NAME portName;
+ NDIS_SWITCH_NIC_NAME nicName;
+ NDIS_VM_NAME vmName;
+ GUID netCfgInstanceId;
+} OVS_VPORT_ENTRY, *POVS_VPORT_ENTRY;
+
+struct _OVS_SWITCH_CONTEXT;
+
+#define OVS_IS_VPORT_ENTRY_NULL(_SwitchContext, _i) \
+ ((UINT64)(_SwitchContext)->vportArray[_i] <= 0xff)
+
+POVS_VPORT_ENTRY
+OvsFindVportByPortNo(struct _OVS_SWITCH_CONTEXT *switchContext,
+ UINT32 portNo);
+POVS_VPORT_ENTRY
+OvsFindVportByOvsName(struct _OVS_SWITCH_CONTEXT *switchContext,
+ CHAR *name, UINT32 length);
+POVS_VPORT_ENTRY
+OvsFindVportByPortIdAndNicIndex(struct _OVS_SWITCH_CONTEXT *switchContext,
+ NDIS_SWITCH_PORT_ID portId,
+ NDIS_SWITCH_NIC_INDEX index);
+
+NDIS_STATUS OvsAddConfiguredSwitchPorts(struct _OVS_SWITCH_CONTEXT *switchContext);
+NDIS_STATUS OvsInitConfiguredSwitchNics(struct _OVS_SWITCH_CONTEXT *switchContext);
+
+VOID OvsClearAllSwitchVports(struct _OVS_SWITCH_CONTEXT *switchContext);
+
+NTSTATUS OvsDumpVportIoctl(PVOID inputBuffer, UINT32 inputLength,
+ PVOID outputBuffer, UINT32 outputLength,
+ UINT32 *replyLen);
+NTSTATUS OvsGetVportIoctl(PVOID inputBuffer, UINT32 inputLength,
+ PVOID outputBuffer, UINT32 outputLength,
+ UINT32 *replyLen);
+NTSTATUS OvsAddVportIoctl(PVOID inputBuffer, UINT32 inputLength,
+ PVOID outputBuffer, UINT32 outputLength,
+ UINT32 *replyLen);
+NTSTATUS OvsDelVportIoctl(PVOID inputBuffer, UINT32 inputLength,
+ UINT32 *replyLen);
+NTSTATUS OvsGetExtInfoIoctl(PVOID inputBuffer, UINT32 inputLength,
+ PVOID outputBuffer, UINT32 outputLength,
+ UINT32 *replyLen);
+NDIS_STATUS OvsCreateNic(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_NIC_PARAMETERS nicParam);
+NDIS_STATUS OvsCreatePort(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_PORT_PARAMETERS portParam);
+VOID OvsTeardownPort(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_PORT_PARAMETERS portParam);
+VOID OvsDeletePort(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_PORT_PARAMETERS portParam);
+VOID OvsConnectNic(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_NIC_PARAMETERS nicParam);
+VOID OvsUpdateNic(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_NIC_PARAMETERS nicParam);
+VOID OvsDeleteNic(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_NIC_PARAMETERS nicParam);
+VOID OvsDisconnectNic(POVS_SWITCH_CONTEXT switchContext,
+ PNDIS_SWITCH_NIC_PARAMETERS nicParam);
+
+static __inline BOOLEAN
+OvsIsTunnelVportType(OVS_VPORT_TYPE ovsType)
+{
+ return ovsType == OVSWIN_VPORT_TYPE_VXLAN ||
+ ovsType == OVSWIN_VPORT_TYPE_GRE ||
+ ovsType == OVSWIN_VPORT_TYPE_GRE64;
+}
+
+static __inline BOOLEAN
+OvsIsInternalVportType(OVS_VPORT_TYPE ovsType)
+{
+ return ovsType == OVSWIN_VPORT_TYPE_INTERNAL;
+}
+
+static __inline BOOLEAN
+OvsIsTunnelVportNo(UINT32 portNo)
+{
+ UINT32 idx = OVS_VPORT_INDEX(portNo);
+ return (idx >= OVS_TUNNEL_INDEX_START && idx <= OVS_TUNNEL_INDEX_END);
+}
+
+static __inline POVS_VPORT_ENTRY
+OvsGetTunnelVport(OVS_VPORT_TYPE type)
+{
+ ASSERT(OvsIsTunnelVportType(type));
+ switch(type) {
+ case OVSWIN_VPORT_TYPE_VXLAN:
+ return (POVS_VPORT_ENTRY) OvsGetVportFromIndex(OVS_VXLAN_VPORT_INDEX);
+ default:
+ ASSERT(! "OvsGetTunnelVport not implemented for this tunnel.");
+ }
+
+ return NULL;
+}
+
+static __inline PVOID
+OvsGetVportPriv(OVS_VPORT_TYPE type)
+{
+ return OvsGetTunnelVport(type)->priv;
+}
+
+static __inline UINT32
+OvsGetExternalMtu()
+{
+ return ((POVS_VPORT_ENTRY) OvsGetExternalVport())->mtu;
+}
+
+#endif /* __OVS_VPORT_H_ */
diff --git a/datapath-windows/ovsext/OvsVxlan.c b/datapath-windows/ovsext/OvsVxlan.c
new file mode 100644
index 000000000..63909aeed
--- /dev/null
+++ b/datapath-windows/ovsext/OvsVxlan.c
@@ -0,0 +1,507 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "precomp.h"
+#include "OvsNetProto.h"
+#include "OvsIoctl.h"
+#include "OvsSwitch.h"
+#include "OvsVport.h"
+#include "OvsFlow.h"
+#include "OvsVxlan.h"
+#include "OvsIpHelper.h"
+#include "OvsChecksum.h"
+#include "OvsUser.h"
+#include "OvsPacketIO.h"
+#include "OvsFlow.h"
+#include "OvsPacketParser.h"
+#include "OvsChecksum.h"
+
+#pragma warning( push )
+#pragma warning( disable:4127 )
+
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_VXLAN
+#include "OvsDebug.h"
+
+/* Helper macro to check if a VXLAN ID is valid. */
+#define VXLAN_ID_IS_VALID(vxlanID) (0 < (vxlanID) && (vxlanID) <= 0xffffff)
+#define VXLAN_TUNNELID_TO_VNI(_tID) (UINT32)(((UINT64)(_tID)) >> 40)
+#define VXLAN_VNI_TO_TUNNELID(_vni) (((UINT64)(_vni)) << 40)
+#define IP_DF_NBO 0x0040
+#define VXLAN_DEFAULT_TTL 64
+#define VXLAN_MULTICAST_TTL 64
+#define VXLAN_DEFAULT_INSTANCE_ID 1
+
+/* Move to a header file */
+extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
+
+NTSTATUS
+OvsInitVxlanTunnel(POVS_VPORT_ENTRY vport,
+ POVS_VPORT_ADD_REQUEST addReq)
+{
+ POVS_VXLAN_VPORT vxlanPort;
+ NTSTATUS status = STATUS_SUCCESS;
+
+ ASSERT(addReq->type == OVSWIN_VPORT_TYPE_VXLAN);
+
+ vxlanPort = OvsAllocateMemory(sizeof (*vxlanPort));
+ if (vxlanPort == NULL) {
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ } else {
+ RtlZeroMemory(vxlanPort, sizeof (*vxlanPort));
+ vxlanPort->dstPort = addReq->dstPort;
+ /*
+ * since we are installing the WFP filter before the port is created
+ * We need to check if it is the same number
+ * XXX should be removed later
+ */
+ ASSERT(vxlanPort->dstPort == VXLAN_UDP_PORT);
+ vport->priv = (PVOID)vxlanPort;
+ }
+ return status;
+}
+
+
+VOID
+OvsCleanupVxlanTunnel(POVS_VPORT_ENTRY vport)
+{
+ if (vport->ovsType != OVSWIN_VPORT_TYPE_VXLAN ||
+ vport->priv == NULL) {
+ return;
+ }
+
+ OvsFreeMemory(vport->priv);
+ vport->priv = NULL;
+}
+
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsDoEncapVxlan
+ * Encapsulates the packet.
+ *----------------------------------------------------------------------------
+ */
+static __inline NDIS_STATUS
+OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl,
+ OvsIPv4TunnelKey *tunKey,
+ POVS_FWD_INFO fwdInfo,
+ POVS_PACKET_HDR_INFO layers,
+ POVS_SWITCH_CONTEXT switchContext,
+ PNET_BUFFER_LIST *newNbl)
+{
+ NDIS_STATUS status;
+ PNET_BUFFER curNb;
+ PMDL curMdl;
+ PUINT8 bufferStart;
+ EthHdr *ethHdr;
+ IPHdr *ipHdr;
+ UDPHdr *udpHdr;
+ VXLANHdr *vxlanHdr;
+ UINT32 headRoom = OvsGetVxlanTunHdrSize();
+ UINT32 packetLength;
+
+ /*
+ * XXX: the assumption currently is that the NBL is owned by OVS, and
+ * headroom has already been allocated as part of allocating the NBL and
+ * MDL.
+ */
+ curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+ packetLength = NET_BUFFER_DATA_LENGTH(curNb);
+ if (layers->isTcp) {
+ NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
+
+ tsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
+ TcpLargeSendNetBufferListInfo);
+ OVS_LOG_TRACE("MSS %u packet len %u", tsoInfo.LsoV1Transmit.MSS, packetLength);
+ if (tsoInfo.LsoV1Transmit.MSS) {
+ OVS_LOG_TRACE("l4Offset %d", layers->l4Offset);
+ *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers,
+ tsoInfo.LsoV1Transmit.MSS, headRoom);
+ if (*newNbl == NULL) {
+ OVS_LOG_ERROR("Unable to segment NBL");
+ return NDIS_STATUS_FAILURE;
+ }
+ }
+ }
+ /* If we didn't split the packet above, make a copy now */
+ if (*newNbl == NULL) {
+ *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
+ FALSE /*NBL info*/);
+ if (*newNbl == NULL) {
+ OVS_LOG_ERROR("Unable to copy NBL");
+ return NDIS_STATUS_FAILURE;
+ }
+ }
+
+ curNbl = *newNbl;
+ for (curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); curNb != NULL;
+ curNb = curNb->Next) {
+ status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto ret_error;
+ }
+
+ curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+ bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority);
+ if (!bufferStart) {
+ status = NDIS_STATUS_RESOURCES;
+ goto ret_error;
+ }
+
+ bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+ if (NET_BUFFER_NEXT_NB(curNb)) {
+ OVS_LOG_TRACE("nb length %u next %u", NET_BUFFER_DATA_LENGTH(curNb),
+ NET_BUFFER_DATA_LENGTH(curNb->Next));
+ }
+
+ /* L2 header */
+ ethHdr = (EthHdr *)bufferStart;
+ NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr,
+ sizeof ethHdr->Destination + sizeof ethHdr->Source);
+ ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) ==
+ (PCHAR)&fwdInfo->srcMacAddr);
+ ethHdr->Type = htons(ETH_TYPE_IPV4);
+
+ // XXX: question: there are fields in the OvsIPv4TunnelKey for ttl and such,
+ // should we use those values instead? or will they end up being
+ // uninitialized;
+ /* IP header */
+ ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
+
+ ipHdr->ihl = sizeof *ipHdr / 4;
+ ipHdr->version = IPV4;
+ ipHdr->tos = 0;
+ ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr);
+ ipHdr->id = 0;
+ ipHdr->frag_off = IP_DF_NBO;
+ ipHdr->ttl = tunKey->ttl ? tunKey->ttl : VXLAN_DEFAULT_TTL;
+ ipHdr->protocol = IPPROTO_UDP;
+ ASSERT(tunKey->dst == fwdInfo->dstIpAddr);
+ ASSERT(tunKey->src == fwdInfo->srcIpAddr || tunKey->src == 0);
+ ipHdr->saddr = fwdInfo->srcIpAddr;
+ ipHdr->daddr = fwdInfo->dstIpAddr;
+ ipHdr->check = 0;
+ ipHdr->check = IPChecksum((UINT8 *)ipHdr, sizeof *ipHdr, 0);
+
+ /* UDP header */
+ udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
+ udpHdr->source = htons(tunKey->flow_hash | 32768);
+ udpHdr->dest = VXLAN_UDP_PORT_NBO;
+ udpHdr->len = htons(NET_BUFFER_DATA_LENGTH(curNb) - headRoom +
+ sizeof *udpHdr + sizeof *vxlanHdr);
+ udpHdr->check = 0;
+
+ /* VXLAN header */
+ vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof *udpHdr);
+ vxlanHdr->flags1 = 0;
+ vxlanHdr->locallyReplicate = 0;
+ vxlanHdr->flags2 = 0;
+ vxlanHdr->reserved1 = 0;
+ if (tunKey->flags | OVS_TNL_F_KEY) {
+ vxlanHdr->vxlanID = VXLAN_TUNNELID_TO_VNI(tunKey->tunnelId);
+ vxlanHdr->instanceID = 1;
+ }
+ vxlanHdr->reserved2 = 0;
+ }
+ return STATUS_SUCCESS;
+
+ret_error:
+ OvsCompleteNBL(switchContext, *newNbl, TRUE);
+ *newNbl = NULL;
+ return status;
+}
+
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsEncapVxlan --
+ * Encapsulates the packet if L2/L3 for destination resolves. Otherwise,
+ * enqueues a callback that does encapsulatation after resolution.
+ *----------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsEncapVxlan(PNET_BUFFER_LIST curNbl,
+ OvsIPv4TunnelKey *tunKey,
+ POVS_SWITCH_CONTEXT switchContext,
+ VOID *completionList,
+ POVS_PACKET_HDR_INFO layers,
+ PNET_BUFFER_LIST *newNbl)
+{
+ NTSTATUS status;
+ OVS_FWD_INFO fwdInfo;
+ UNREFERENCED_PARAMETER(completionList);
+
+ status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo);
+ if (status != STATUS_SUCCESS) {
+ OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL);
+ // return NDIS_STATUS_PENDING;
+ /*
+ * XXX: Don't know if the completionList will make any sense when
+ * accessed in the callback. Make sure the caveats are known.
+ *
+ * XXX: This code will work once we are able to grab locks in the
+ * callback.
+ */
+ return NDIS_STATUS_FAILURE;
+ }
+
+ return OvsDoEncapVxlan(curNbl, tunKey, &fwdInfo, layers,
+ switchContext, newNbl);
+}
+
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsIpHlprCbVxlan --
+ * Callback function for IP helper.
+ * XXX: not used currently
+ *----------------------------------------------------------------------------
+ */
+static VOID
+OvsIpHlprCbVxlan(PNET_BUFFER_LIST curNbl,
+ UINT32 inPort,
+ OvsIPv4TunnelKey *tunKey,
+ PVOID cbData1,
+ PVOID cbData2,
+ NTSTATUS result,
+ POVS_FWD_INFO fwdInfo)
+{
+ OVS_PACKET_HDR_INFO layers;
+ OvsFlowKey key;
+ NDIS_STATUS status;
+ UNREFERENCED_PARAMETER(inPort);
+
+ status = OvsExtractFlow(curNbl, inPort, &key, &layers, NULL);
+ if (result == STATUS_SUCCESS) {
+ status = OvsDoEncapVxlan(curNbl, tunKey, fwdInfo, &layers,
+ (POVS_SWITCH_CONTEXT)cbData1, NULL);
+ } else {
+ status = NDIS_STATUS_FAILURE;
+ }
+
+ if (status != NDIS_STATUS_SUCCESS) {
+ // XXX: Free up the NBL;
+ return;
+ }
+
+ OvsLookupFlowOutput((POVS_SWITCH_CONTEXT)cbData1, cbData2, curNbl);
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsCalculateUDPChecksum
+ * Calculate UDP checksum
+ *----------------------------------------------------------------------------
+ */
+static __inline NDIS_STATUS
+OvsCalculateUDPChecksum(PNET_BUFFER_LIST curNbl,
+ PNET_BUFFER curNb,
+ IPHdr *ipHdr,
+ UDPHdr *udpHdr,
+ UINT32 packetLength)
+{
+ NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
+ UINT16 checkSum;
+
+ csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo);
+
+ /* Next check if UDP checksum has been calculated. */
+ if (!csumInfo.Receive.UdpChecksumSucceeded) {
+ UINT32 l4Payload;
+
+ checkSum = udpHdr->check;
+
+ l4Payload = packetLength - sizeof(EthHdr) - ipHdr->ihl * 4;
+ udpHdr->check = 0;
+ udpHdr->check =
+ IPPseudoChecksum((UINT32 *)&ipHdr->saddr,
+ (UINT32 *)&ipHdr->daddr,
+ IPPROTO_UDP, (UINT16)l4Payload);
+ udpHdr->check = CalculateChecksumNB(curNb, (UINT16)l4Payload,
+ sizeof(EthHdr) + ipHdr->ihl * 4);
+ if (checkSum != udpHdr->check) {
+ OVS_LOG_TRACE("UDP checksum incorrect.");
+ return NDIS_STATUS_INVALID_PACKET;
+ }
+ }
+
+ csumInfo.Receive.UdpChecksumSucceeded = 1;
+ NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
+ return NDIS_STATUS_SUCCESS;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsDoDecapVxlan
+ * Decapsulates to tunnel header in 'curNbl' and puts into 'tunKey'.
+ *----------------------------------------------------------------------------
+ */
+NDIS_STATUS
+OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext,
+ PNET_BUFFER_LIST curNbl,
+ OvsIPv4TunnelKey *tunKey,
+ PNET_BUFFER_LIST *newNbl)
+{
+ PNET_BUFFER curNb;
+ PMDL curMdl;
+ EthHdr *ethHdr;
+ IPHdr *ipHdr;
+ UDPHdr *udpHdr;
+ VXLANHdr *vxlanHdr;
+ UINT32 tunnelSize = 0, packetLength = 0;
+ PUINT8 bufferStart;
+ NDIS_STATUS status;
+
+ /* Check the the length of the UDP payload */
+ curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+ packetLength = NET_BUFFER_DATA_LENGTH(curNb);
+ tunnelSize = OvsGetVxlanTunHdrSize();
+ if (packetLength <= tunnelSize) {
+ return NDIS_STATUS_INVALID_LENGTH;
+ }
+
+ /*
+ * Create a copy of the NBL so that we have all the headers in one MDL.
+ */
+ *newNbl = OvsPartialCopyNBL(switchContext, curNbl,
+ tunnelSize + OVS_DEFAULT_COPY_SIZE, 0,
+ TRUE /*copy NBL info */);
+
+ if (*newNbl == NULL) {
+ return NDIS_STATUS_RESOURCES;
+ }
+
+ /* XXX: Handle VLAN header. */
+ curNbl = *newNbl;
+ curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
+ curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+ bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority) +
+ NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+ if (!bufferStart) {
+ status = NDIS_STATUS_RESOURCES;
+ goto dropNbl;
+ }
+
+ ethHdr = (EthHdr *)bufferStart;
+ /* XXX: Handle IP options. */
+ ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
+ tunKey->src = ipHdr->saddr;
+ tunKey->dst = ipHdr->daddr;
+ tunKey->tos = ipHdr->tos;
+ tunKey->ttl = ipHdr->ttl;
+ tunKey->pad = 0;
+ udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
+
+ /* Validate if NIC has indicated checksum failure. */
+ status = OvsValidateUDPChecksum(curNbl, udpHdr->check == 0);
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto dropNbl;
+ }
+
+ /* Calculate and verify UDP checksum if NIC didn't do it. */
+ if (udpHdr->check != 0) {
+ status = OvsCalculateUDPChecksum(curNbl, curNb, ipHdr, udpHdr, packetLength);
+ if (status != NDIS_STATUS_SUCCESS) {
+ goto dropNbl;
+ }
+ }
+
+ vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof *udpHdr);
+ if (vxlanHdr->instanceID) {
+ tunKey->flags = OVS_TNL_F_KEY;
+ tunKey->tunnelId = VXLAN_VNI_TO_TUNNELID(vxlanHdr->vxlanID);
+ } else {
+ tunKey->flags = 0;
+ tunKey->tunnelId = 0;
+ }
+
+ /* Clear out the receive flag for the inner packet. */
+ NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0;
+ NdisAdvanceNetBufferDataStart(curNb, tunnelSize, FALSE, NULL);
+ return NDIS_STATUS_SUCCESS;
+
+dropNbl:
+ OvsCompleteNBL(switchContext, *newNbl, TRUE);
+ *newNbl = NULL;
+ return status;
+}
+
+
+NDIS_STATUS
+OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet,
+ OvsIPv4TunnelKey *tunnelKey)
+{
+ NDIS_STATUS status = NDIS_STATUS_FAILURE;
+ UDPHdr udpStorage;
+ const UDPHdr *udp;
+ VXLANHdr *VxlanHeader;
+ VXLANHdr VxlanHeaderBuffer;
+ struct IPHdr ip_storage;
+ const struct IPHdr *nh;
+ OVS_PACKET_HDR_INFO layers;
+
+ layers.value = 0;
+
+ do {
+ nh = OvsGetIp(packet, layers.l3Offset, &ip_storage);
+ if (nh) {
+ layers.l4Offset = layers.l3Offset + nh->ihl * 4;
+ } else {
+ break;
+ }
+
+ /* make sure it's a VXLAN packet */
+ udp = OvsGetUdp(packet, layers.l4Offset, &udpStorage);
+ if (udp) {
+ layers.l7Offset = layers.l4Offset + sizeof *udp;
+ } else {
+ break;
+ }
+
+ /* XXX Should be tested against the dynamic port # in the VXLAN vport */
+ ASSERT(udp->dest == RtlUshortByteSwap(VXLAN_UDP_PORT));
+
+ VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(packet,
+ sizeof(*VxlanHeader),
+ layers.l7Offset,
+ &VxlanHeaderBuffer);
+
+ if (VxlanHeader) {
+ tunnelKey->src = nh->saddr;
+ tunnelKey->dst = nh->daddr;
+ tunnelKey->ttl = nh->ttl;
+ tunnelKey->tos = nh->tos;
+ if (VxlanHeader->instanceID) {
+ tunnelKey->flags = OVS_TNL_F_KEY;
+ tunnelKey->tunnelId = VXLAN_VNI_TO_TUNNELID(VxlanHeader->vxlanID);
+ } else {
+ tunnelKey->flags = 0;
+ tunnelKey->tunnelId = 0;
+ }
+ } else {
+ break;
+ }
+ status = NDIS_STATUS_SUCCESS;
+
+ } while(FALSE);
+
+ return status;
+}
+
+#pragma warning( pop )
diff --git a/datapath-windows/ovsext/OvsVxlan.h b/datapath-windows/ovsext/OvsVxlan.h
new file mode 100644
index 000000000..55cfc8203
--- /dev/null
+++ b/datapath-windows/ovsext/OvsVxlan.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OVS_VXLAN_H_
+#define __OVS_VXLAN_H_ 1
+
+#include "OvsNetProto.h"
+typedef struct _OVS_VXLAN_VPORT {
+ UINT32 dstPort;
+ UINT64 inPkts;
+ UINT64 outPkts;
+ UINT64 slowInPkts;
+ UINT64 slowOutPkts;
+ /*
+ * To be filled
+ */
+} OVS_VXLAN_VPORT, *POVS_VXLAN_VPORT;
+
+/* VXLAN header. */
+typedef struct VXLANHdr {
+ /* Flags. */
+ UINT32 flags1:2;
+ /* Packet needs replication to multicast group (used for multicast proxy). */
+ UINT32 locallyReplicate:1;
+ /* Instance ID flag, must be set to 1. */
+ UINT32 instanceID:1;
+ /* Flags. */
+ UINT32 flags2:4;
+ /* Reserved. */
+ UINT32 reserved1:24;
+ /* VXLAN ID. */
+ UINT32 vxlanID:24;
+ /* Reserved. */
+ UINT32 reserved2:8;
+} VXLANHdr;
+
+NTSTATUS OvsInitVxlanTunnel(POVS_VPORT_ENTRY vport,
+ POVS_VPORT_ADD_REQUEST addReq);
+
+VOID OvsCleanupVxlanTunnel(POVS_VPORT_ENTRY vport);
+
+NDIS_STATUS OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet,
+ OvsIPv4TunnelKey *tunnelKey);
+
+NDIS_STATUS OvsEncapVxlan(PNET_BUFFER_LIST curNbl,
+ OvsIPv4TunnelKey *tunKey,
+ POVS_SWITCH_CONTEXT switchContext,
+ VOID *completionList,
+ POVS_PACKET_HDR_INFO layers,
+ PNET_BUFFER_LIST *newNbl);
+
+NDIS_STATUS OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext,
+ PNET_BUFFER_LIST curNbl,
+ OvsIPv4TunnelKey *tunKey,
+ PNET_BUFFER_LIST *newNbl);
+
+static __inline UINT32
+OvsGetVxlanTunHdrSize(VOID)
+{
+ /* XXX: Can L2 include VLAN at all? */
+ return sizeof (EthHdr) + sizeof (IPHdr) + sizeof (UDPHdr) +
+ sizeof (VXLANHdr);
+}
+
+#define VXLAN_UDP_PORT 4789
+#define VXLAN_UDP_PORT_NBO 0xB512
+
+#endif /* __OVS_VXLAN_H_ */
diff --git a/datapath-windows/ovsext/ovsext.inf b/datapath-windows/ovsext/ovsext.inf
new file mode 100644
index 000000000..d1f68eb6f
--- /dev/null
+++ b/datapath-windows/ovsext/ovsext.inf
@@ -0,0 +1,85 @@
+;
+; Copyright (c) VMWare. All Rights Reserved.
+;
+
+[version]
+Signature = "$Windows NT$"
+Class = NetService
+ClassGUID = {4D36E974-E325-11CE-BFC1-08002BE10318}
+Provider = %VMWare%
+CatalogFile = ovsext.cat
+DriverVer = 10/10/2013,1.0
+
+[Manufacturer]
+%VMWare%=VMWare,NTx86,NTia64,NTamd64
+
+[VMWare.NTx86]
+%OVSExt_Desc%=Install, OVSExt
+
+[VMWare.NTia64]
+%OVSExt_Desc%=Install, OVSExt
+
+[VMWare.NTamd64]
+%OVSExt_Desc%=Install, OVSExt
+
+;-------------------------------------------------------------------------
+; Installation Section
+;-------------------------------------------------------------------------
+[Install]
+AddReg=Inst_Ndi
+Characteristics=0x40000
+NetCfgInstanceId="{583CC151-73EC-4A6A-8B47-578297AD7623}"
+Copyfiles = OVSExt.copyfiles.sys
+
+[SourceDisksNames]
+1=%OVSExt_Desc%,"",,
+
+[SourceDisksFiles]
+OVSExt.sys=1
+
+[DestinationDirs]
+DefaultDestDir=12
+OVSExt.copyfiles.sys=12
+
+[OVSExt.copyfiles.sys]
+OVSExt.sys,,,2
+
+
+;-------------------------------------------------------------------------
+; Ndi installation support
+;-------------------------------------------------------------------------
+[Inst_Ndi]
+HKR, Ndi,Service,,"OVSExt"
+HKR, Ndi,CoServices,0x00010000,"OVSExt"
+HKR, Ndi,HelpText,,%OVSExt_HelpText%
+HKR, Ndi,FilterClass,,"ms_switch_forward"
+HKR, Ndi,FilterType,0x00010001,0x00000002
+HKR, Ndi\Interfaces,UpperRange,,"noupper"
+HKR, Ndi\Interfaces,LowerRange,,"nolower"
+HKR, Ndi\Interfaces, FilterMediaTypes,,"vmnetextension"
+HKR, Ndi,FilterRunType, 0x00010001, 2 ; optional
+
+;-------------------------------------------------------------------------
+; Service installation support, common.EventLog here is to demonstrate how to
+; write an enent log
+;-------------------------------------------------------------------------
+[Install.Services]
+AddService=OVSExt,,OVSExt_Service_Inst;, common.EventLog
+
+[OVSExt_Service_Inst]
+DisplayName = %OVSExt_Desc%
+ServiceType = 1 ;SERVICE_KERNEL_DRIVER
+StartType = 1 ;SERVICE_SYSTEM_START
+ErrorControl = 1 ;SERVICE_ERROR_NORMAL
+ServiceBinary = %12%\OVSExt.sys
+LoadOrderGroup = NDIS
+Description = %OVSExt_Desc%
+AddReg = Common.Params.reg
+
+[Install.Remove.Services]
+DelService=OVSExt,0x200
+
+[Strings]
+VMWare = "VMWare"
+OVSExt_Desc = "VMWare OVS Extension"
+OVSExt_HelpText = "VMWare OVS forwarding switch extension"
diff --git a/datapath-windows/ovsext/ovsext.rc b/datapath-windows/ovsext/ovsext.rc
new file mode 100644
index 000000000..8f6e12143
--- /dev/null
+++ b/datapath-windows/ovsext/ovsext.rc
@@ -0,0 +1,23 @@
+//
+// Copyright (c) Microsoft Corporation. All Rights Reserved.
+//
+
+#include <windows.h>
+#include <ntverp.h>
+
+/*-----------------------------------------------*/
+/* the following lines are specific to this file */
+/*-----------------------------------------------*/
+
+/* VER_FILETYPE, VER_FILESUBTYPE, VER_FILEDESCRIPTION_STR
+ * and VER_INTERNALNAME_STR must be defined before including COMMON.VER
+ * The strings don't need a '\0', since common.ver has them.
+ */
+#define VER_FILETYPE VFT_DRV
+#define VER_FILESUBTYPE VFT2_DRV_NETWORK
+#define VER_FILEDESCRIPTION_STR "VMWare OVS Extension"
+#define VER_INTERNALNAME_STR "OVSExt.SYS"
+#define VER_ORIGINALFILENAME_STR "OVSExt.SYS"
+#define VER_LANGNEUTRAL
+
+#include "common.ver"
diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj
new file mode 100644
index 000000000..9728f2003
--- /dev/null
+++ b/datapath-windows/ovsext/ovsext.vcxproj
@@ -0,0 +1,164 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Win8.1 Debug|x64">
+ <Configuration>Win8.1 Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Win8 Debug|x64">
+ <Configuration>Win8 Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Win8.1 Release|x64">
+ <Configuration>Win8.1 Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Win8 Release|x64">
+ <Configuration>Win8 Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="PropertySheets">
+ <DriverType>WDM</DriverType>
+ <ConfigurationType>Driver</ConfigurationType>
+ </PropertyGroup>
+ <PropertyGroup Label="Globals">
+ <VCTargetsPath Condition="'$(VCTargetsPath11)' != '' and '$(VisualStudioVersion)' == '11.0'">$(VCTargetsPath11)</VCTargetsPath>
+ <Configuration>Win8 Debug</Configuration>
+ <Platform Condition="'$(Platform)' == ''">Win32</Platform>
+ <SampleGuid>{0D37F250-E766-44C7-90B4-D7E07E77D1AA}</SampleGuid>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{63FE215D-98BE-4440-8081-C6160EFB80FA}</ProjectGuid>
+ <RootNamespace>$(MSBuildProjectName)</RootNamespace>
+ </PropertyGroup>
+ <PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Win8.1 Debug|x64'">
+ <TargetVersion>WindowsV6.3</TargetVersion>
+ <UseDebugLibraries>True</UseDebugLibraries>
+ <PlatformToolset>WindowsKernelModeDriver8.1</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Win8 Debug|x64'">
+ <TargetVersion>Win8</TargetVersion>
+ <UseDebugLibraries>True</UseDebugLibraries>
+ <PlatformToolset>WindowsKernelModeDriver8.1</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Win8.1 Release|x64'">
+ <TargetVersion>WindowsV6.3</TargetVersion>
+ <UseDebugLibraries>False</UseDebugLibraries>
+ <PlatformToolset>WindowsKernelModeDriver8.1</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Win8 Release|x64'">
+ <TargetVersion>Win8</TargetVersion>
+ <UseDebugLibraries>False</UseDebugLibraries>
+ <PlatformToolset>WindowsKernelModeDriver8.1</PlatformToolset>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <PropertyGroup>
+ <OutDir>$(IntDir)</OutDir>
+ </PropertyGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Win8 Release|x64'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Win8.1 Release|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Win8 Debug|x64'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Win8.1 Debug|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" />
+ </ImportGroup>
+ <ItemGroup Label="WrappedTaskItems">
+ <ClInclude Include="OvsAtomic.h" />
+ <ClInclude Include="OvsBufferMgmt.h" />
+ <ClInclude Include="OvsChecksum.h" />
+ <ClInclude Include="OvsDebug.h" />
+ <ClInclude Include="OvsEth.h" />
+ <ClInclude Include="OvsEvent.h" />
+ <ClInclude Include="OvsFlow.h" />
+ <ClInclude Include="OvsIoctl.h" />
+ <ClInclude Include="OvsIpHelper.h" />
+ <ClInclude Include="OvsJhash.h" />
+ <ClInclude Include="OvsNetProto.h" />
+ <ClInclude Include="OvsOid.h" />
+ <ClInclude Include="OvsPacketParser.h" />
+ <ClInclude Include="OvsSwitch.h" />
+ <ClInclude Include="OvsTunnel.h" />
+ <ClInclude Include="OvsTunnelIntf.h" />
+ <ClInclude Include="OvsTypes.h" />
+ <ClInclude Include="OvsUser.h" />
+ <ClInclude Include="OvsUtil.h" />
+ <ClInclude Include="OvsVport.h" />
+ <ClInclude Include="OvsVxlan.h" />
+ <ClInclude Include="precomp.h" />
+ </ItemGroup>
+ <PropertyGroup>
+ <TargetName>OVSExt</TargetName>
+ </PropertyGroup>
+ <ItemDefinitionGroup>
+ <ClCompile>
+ <PreprocessorDefinitions>%(PreprocessorDefinitions);NDIS_WDM=1;NDIS630=1;OVS_WIN_DP=1</PreprocessorDefinitions>
+ </ClCompile>
+ <Midl>
+ <PreprocessorDefinitions>%(PreprocessorDefinitions);NDIS_WDM=1;NDIS630=1</PreprocessorDefinitions>
+ </Midl>
+ <ResourceCompile>
+ <PreprocessorDefinitions>%(PreprocessorDefinitions);NDIS_WDM=1;NDIS630=1</PreprocessorDefinitions>
+ </ResourceCompile>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup>
+ <Link>
+ <AdditionalDependencies>%(AdditionalDependencies);$(DDK_LIB_PATH)\ndis.lib;$(DDK_LIB_PATH)\fwpkclnt.lib;$(SDK_LIB_PATH)\uuid.lib;$(DDK_LIB_PATH)\netio.lib</AdditionalDependencies>
+ </Link>
+ <ClCompile>
+ <TreatWarningAsError>true</TreatWarningAsError>
+ <WarningLevel>Level4</WarningLevel>
+ <ExceptionHandling>
+ </ExceptionHandling>
+ <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Win8 Debug|x64'">$(IntDir);%(AdditionalIncludeDirectories);..\..</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Win8.1 Debug|x64'">$(IntDir);%(AdditionalIncludeDirectories);..\..</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Win8 Release|x64'">$(IntDir);%(AdditionalIncludeDirectories);..\..</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Win8.1 Release|x64'">$(IntDir);%(AdditionalIncludeDirectories);..\..</AdditionalIncludeDirectories>
+ </ClCompile>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="OvsDriver.c" />
+ <ClCompile Include="OvsJhash.c" />
+ <ClCompile Include="OvsOid.c" />
+ <ClCompile Include="OvsPacketIO.c" />
+ <ClCompile Include="OvsPacketParser.c" />
+ <ClCompile Include="OvsBufferMgmt.c" />
+ <ClCompile Include="OvsChecksum.c" />
+ <ClCompile Include="OvsIpHelper.c" />
+ <ClCompile Include="OvsTunnel.c" />
+ <ClCompile Include="OvsTunnelFilter.c" />
+ <ClCompile Include="OvsVxlan.c" />
+ <ClCompile Include="OvsActions.c" />
+ <ClCompile Include="OvsDebug.c" />
+ <ClCompile Include="OvsEvent.c" />
+ <ClCompile Include="OvsFlow.c" />
+ <ClCompile Include="OvsUser.c" />
+ <ClCompile Include="OvsIoctl.c" />
+ <ClCompile Include="OvsSwitch.c" />
+ <ClCompile Include="OvsUtil.c" />
+ <ClCompile Include="OvsVport.c" />
+ <ClCompile Include="precompsrc.c">
+ <AdditionalIncludeDirectories>;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreCompiledHeaderFile>precomp.h</PreCompiledHeaderFile>
+ <PreCompiledHeader>Create</PreCompiledHeader>
+ <PreCompiledHeaderOutputFile>$(IntDir)\precomp.h.pch</PreCompiledHeaderOutputFile>
+ </ClCompile>
+ <ResourceCompile Include="ovsext.rc" />
+ </ItemGroup>
+ <ItemGroup>
+ <Inf Exclude="@(Inf)" Include="*.inf" />
+ <FilesToPackage Include="$(TargetPath)" />
+ </ItemGroup>
+ <ItemGroup>
+ <None Exclude="@(None)" Include="*.txt;*.htm;*.html" />
+ <None Exclude="@(None)" Include="*.ico;*.cur;*.bmp;*.dlg;*.rct;*.gif;*.jpg;*.jpeg;*.wav;*.jpe;*.tiff;*.tif;*.png;*.rc2" />
+ <None Exclude="@(None)" Include="*.def;*.bat;*.hpj;*.asmx" />
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+</Project> \ No newline at end of file
diff --git a/datapath-windows/ovsext/ovsext.vcxproj.user b/datapath-windows/ovsext/ovsext.vcxproj.user
new file mode 100644
index 000000000..7169f02fb
--- /dev/null
+++ b/datapath-windows/ovsext/ovsext.vcxproj.user
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Win8 Debug|x64'">
+ <SignMode>TestSign</SignMode>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Win8.1 Debug|x64'">
+ <SignMode>TestSign</SignMode>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Win8 Release|x64'">
+ <SignMode>TestSign</SignMode>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Win8.1 Release|x64'">
+ <SignMode>TestSign</SignMode>
+ </PropertyGroup>
+</Project>
diff --git a/datapath-windows/ovsext/precomp.h b/datapath-windows/ovsext/precomp.h
new file mode 100644
index 000000000..45e72de6f
--- /dev/null
+++ b/datapath-windows/ovsext/precomp.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <ndis.h>
+#include <netiodef.h>
+#include <intsafe.h>
+#include <ntintsafe.h>
+#include <ntstrsafe.h>
+#include <Strsafe.h>
+
+#include "OvsTypes.h"
+#include "..\include\OvsPub.h"
+#include "OvsUtil.h"
+/*
+ * Include openvswitch.h from userspace. Changing the location the file from
+ * include/linux is pending discussion.
+ */
+#include "include\linux\openvswitch.h"
diff --git a/datapath-windows/ovsext/precompsrc.c b/datapath-windows/ovsext/precompsrc.c
new file mode 100644
index 000000000..133b6872d
--- /dev/null
+++ b/datapath-windows/ovsext/precompsrc.c
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "precomp.h"