From edb2335861d629f791467f77729e743378dac15f Mon Sep 17 00:00:00 2001 From: Wilson Peng Date: Wed, 6 Apr 2022 21:18:19 +0800 Subject: datapath-windows: Add IPv6 Geneve tunnel support in Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the first step OVS Windows will support IPv6 tunnel(Geneve IPv6 tunnel). Implementation on Windows ------------------------- 1. For the IPv6 tunnel support,  OvsIPTunnelKey will replace original OvsIPv4TunnelKey in the related flow context handing. 2. The related src and dst address will be  changed to SOCKADDR_INET type from UINT32. 3. For the IPv6 tunnel,  one node running OVS-Windows could encapsulate IPv4/IPv6 Packets via IPV6 Geneve Tunnel, and the node could also encapsulate IPv4/IPv6 packet Via IPv4 Geneve tunnel. 4. Related IPHelper data structure will be adapted to support IPv6 Tunnel. In the IPHelper part the related Windows API(such as GetUnicastIpAddressTable/GetBestRoute2/GetIpNetEntry2/ ResolveIpNetEntry2) and Windows data structure(MIB_IPFORWARD_ROW2/MIB_IPNET_ROW2/IP_ADDRESS_PREFIX) Have already supported both IPv4 and IPV6. Now OVS Windows has been adjusted some functions And data structured to support IPV6 tunnel also. 5. OVS_TUNNEL_KEY_ATTR_IPV6_SRC and OVS_TUNNEL_KEY_ATTR_IPV6_DST filed will be supported in OVS-Windows kernel for IPV6 tunnel. Testing done. ------------------------- Related topo, 1 Windows VM(Win2019) and 2 Ubuntu 16.04 server. Both VMs Are running on one ESX host. 1. Setup one IPV6 Geneve Tunnel between 1 Windows VM and 1 Ubuntu server. Windows VM, vif0( 6000::2/40.1.1.10) vif1(5000::2)—— Ubuntu VM Eth2(5000::9), name space ns1 with interface ns1_link_peer(6000::9/40.1.1.2) Related tunnnel, ovs-vsctl.exe add-port br-int bms-tun0 -- set interface bms-tun0 type=Geneve options:csum=true options:key=flow options:local_ip="5000::2" options:remote_ip=flow In this topo, traffic from Vif0(Win) to ns1_link_peer(Ubuntu) will be gone through the Geneve tunnel (5000::2—>5000::9) for both IPv4 traffic(40.1.1.10-->40.1.1.2) and IPv6 traffic(6000::2—>6000::9) 2. Setup one IPV4 Geneve Tunnel between Windows VM and 1 Ubuntu server. Windows VM, vif0( 6000::2/40.1.1.10) vif1(50.1.1.11)—— Ubuntu, Eth2(50.1.1.9), name space ns1 with interface ns1_link_peer(6000::19/40.1.1.9) Related tunnnel, ovs-vsctl.exe -- set Interface bms-tun0 type=geneve options:csum=true options:key=flow options:local_ip="50.1.1.11" options:remote_ip=flow In this topo, traffic from Vif0(Win) to ns1_link_peer(Ubuntu) will be gone through the Geneve Tunnel (50.1.1.11—>50.1.1.9) for both IPv4 traffic(40.1.1.10-->40.1.1.9) and IPv6 traffic(6000::2—>6000::19). 3.Regression test for IpV4 in Antrea project (about 60 test case) is PASS Future Work ----------- Add other type IPv6 tunnel support for Gre/Vxlan/Stt. Signed-off-by: Wilson Peng Signed-off-by: Alin-Gabriel Serdean --- datapath-windows/ovsext/Actions.c | 62 +++-- datapath-windows/ovsext/Actions.h | 2 +- datapath-windows/ovsext/Conntrack.c | 3 +- datapath-windows/ovsext/DpInternal.h | 25 +- datapath-windows/ovsext/Flow.c | 155 +++++++---- datapath-windows/ovsext/Flow.h | 7 +- datapath-windows/ovsext/Geneve.c | 163 ++++++++---- datapath-windows/ovsext/Geneve.h | 19 +- datapath-windows/ovsext/Gre.c | 37 ++- datapath-windows/ovsext/Gre.h | 4 +- datapath-windows/ovsext/IpHelper.c | 482 +++++++++++++++++++++++------------ datapath-windows/ovsext/IpHelper.h | 91 ++++++- datapath-windows/ovsext/Offload.c | 47 +++- datapath-windows/ovsext/Offload.h | 13 +- datapath-windows/ovsext/Stt.c | 40 ++- datapath-windows/ovsext/Stt.h | 4 +- datapath-windows/ovsext/Tunnel.c | 8 +- datapath-windows/ovsext/User.c | 7 +- datapath-windows/ovsext/User.h | 2 +- datapath-windows/ovsext/Vxlan.c | 166 +++++++----- datapath-windows/ovsext/Vxlan.h | 12 +- 21 files changed, 918 insertions(+), 431 deletions(-) (limited to 'datapath-windows') diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c index 218e7db81..c66ece080 100644 --- a/datapath-windows/ovsext/Actions.c +++ b/datapath-windows/ovsext/Actions.c @@ -186,19 +186,29 @@ OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx, * packets only if they are at least VXLAN header size. */ - /* - * For some of the tunnel types such as GRE, the dstPort is not applicable - * since GRE does not have a L4 port. We use '0' for convenience. - */ - if (!flowKey->ipKey.nwFrag) { - UINT16 dstPort = htons(flowKey->ipKey.l4.tpDst); + /* + * For some of the tunnel types such as GRE, the dstPort is not applicable + * since GRE does not have a L4 port. We use '0' for convenience. + */ - ASSERT(flowKey->ipKey.nwProto != IPPROTO_GRE || dstPort == 0); + if ((flowKey->l2.dlType == htons(ETH_TYPE_IPV4) && + !flowKey->ipKey.nwFrag) || + (flowKey->l2.dlType == htons(ETH_TYPE_IPV6) && + !flowKey->ipv6Key.nwFrag)) { + UINT16 dstPort = 0; + uint8_t nwProto = 0; + if (flowKey->l2.dlType == htons(ETH_TYPE_IPV6)) { + dstPort = htons(flowKey->ipv6Key.l4.tpDst); + nwProto = flowKey->ipv6Key.nwProto; + } else if (flowKey->l2.dlType == htons(ETH_TYPE_IPV4)) { + dstPort = htons(flowKey->ipKey.l4.tpDst); + nwProto = flowKey->ipKey.nwProto; + } + ASSERT(nwProto != IPPROTO_GRE || dstPort == 0); tunnelVport = OvsFindTunnelVportByDstPortAndNWProto(ovsFwdCtx->switchContext, - dstPort, - flowKey->ipKey.nwProto); + dstPort, nwProto); if (tunnelVport) { switch(tunnelVport->ovsType) { case OVS_VPORT_TYPE_STT: @@ -290,12 +300,12 @@ OvsDetectTunnelPkt(OvsForwardingContext *ovsFwdCtx, (vport->ovsType != OVS_VPORT_TYPE_NETDEV && vport->ovsType != OVS_VPORT_TYPE_INTERNAL && !OvsIsTunnelVportType(vport->ovsType))) { - ovsFwdCtx->tunKey.dst = 0; + RtlZeroMemory(&ovsFwdCtx->tunKey.dst, sizeof(ovsFwdCtx->tunKey.dst)); } } /* Tunnel the packet only if tunnel context is set. */ - if (ovsFwdCtx->tunKey.dst != 0) { + if (!OvsIphIsZero(&(ovsFwdCtx->tunKey.dst))) { switch(dstVport->ovsType) { case OVS_VPORT_TYPE_GRE: ovsActionStats.txGre++; @@ -470,7 +480,7 @@ static __inline VOID OvsClearTunTxCtx(OvsForwardingContext *ovsFwdCtx) { ovsFwdCtx->tunnelTxNic = NULL; - ovsFwdCtx->tunKey.dst = 0; + RtlZeroMemory(&ovsFwdCtx->tunKey.dst, sizeof(ovsFwdCtx->tunKey.dst)); } @@ -484,7 +494,7 @@ static __inline VOID OvsClearTunRxCtx(OvsForwardingContext *ovsFwdCtx) { ovsFwdCtx->tunnelRxNic = NULL; - ovsFwdCtx->tunKey.dst = 0; + RtlZeroMemory(&ovsFwdCtx->tunKey.dst, sizeof(ovsFwdCtx->tunKey.dst)); } @@ -540,7 +550,7 @@ OvsCompleteNBLForwardingCtx(OvsForwardingContext *ovsFwdCtx, * -------------------------------------------------------------------------- */ static __inline NDIS_STATUS -OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx) +OvsDoFlowLookupOutput(OvsForwardingContext* ovsFwdCtx) { OvsFlowKey key = { 0 }; OvsFlow *flow = NULL; @@ -557,11 +567,11 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx) ASSERT(vport->nicState == NdisSwitchNicStateConnected); /* Assert that in the Rx direction, key is always setup. */ - ASSERT(ovsFwdCtx->tunnelRxNic == NULL || ovsFwdCtx->tunKey.dst != 0); + ASSERT(ovsFwdCtx->tunnelRxNic == NULL || !OvsIphIsZero(&(ovsFwdCtx->tunKey.dst))); status = OvsExtractFlow(ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo, &key, &ovsFwdCtx->layers, - ovsFwdCtx->tunKey.dst != 0 ? &ovsFwdCtx->tunKey : NULL); + !OvsIphIsZero(&(ovsFwdCtx->tunKey.dst))? &(ovsFwdCtx->tunKey):NULL); if (status != NDIS_STATUS_SUCCESS) { OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"OVS-Flow extract failed"); @@ -647,6 +657,7 @@ OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx) OvsDoFragmentNbl(ovsFwdCtx, ctx->mru); } OVS_FWD_INFO switchFwdInfo = { 0 }; + /* Apply the encapsulation. The encapsulation will not consume the NBL. */ switch(ovsFwdCtx->tunnelTxNic->ovsType) { case OVS_VPORT_TYPE_GRE: @@ -915,11 +926,11 @@ OvsOutputForwardingCtx(OvsForwardingContext *ovsFwdCtx) if (ovsFwdCtx->tunnelTxNic != NULL) { status = OvsTunnelPortTx(ovsFwdCtx); ASSERT(ovsFwdCtx->tunnelTxNic == NULL); - ASSERT(ovsFwdCtx->tunKey.dst == 0); + ASSERT(OvsIphIsZero(&(ovsFwdCtx->tunKey.dst))); } else if (ovsFwdCtx->tunnelRxNic != NULL) { status = OvsTunnelPortRx(ovsFwdCtx); ASSERT(ovsFwdCtx->tunnelRxNic == NULL); - ASSERT(ovsFwdCtx->tunKey.dst == 0); + ASSERT(OvsIphIsZero(&(ovsFwdCtx->tunKey.dst))); } ASSERT(ovsFwdCtx->curNbl == NULL); @@ -1753,10 +1764,10 @@ OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx, case OVS_KEY_ATTR_TUNNEL: { - OvsIPv4TunnelKey tunKey; + OvsIPTunnelKey tunKey = { 0 }; tunKey.flow_hash = (uint16)(hash ? *hash : OvsHashFlow(key)); tunKey.dst_port = key->ipKey.l4.tpDst; - NTSTATUS convertStatus = OvsTunnelAttrToIPv4TunnelKey((PNL_ATTR)a, &tunKey); + NTSTATUS convertStatus = OvsTunnelAttrToIPTunnelKey((PNL_ATTR)a, &tunKey); status = SUCCEEDED(convertStatus) ? NDIS_STATUS_SUCCESS : NDIS_STATUS_FAILURE; ASSERT(status == NDIS_STATUS_SUCCESS); RtlCopyMemory(&ovsFwdCtx->tunKey, &tunKey, sizeof ovsFwdCtx->tunKey); @@ -1874,7 +1885,7 @@ OvsOutputUserspaceAction(OvsForwardingContext *ovsFwdCtx, POVS_PACKET_HDR_INFO layers = &ovsFwdCtx->layers; BOOLEAN isRecv = FALSE; OVS_FWD_INFO fwdInfo; - OvsIPv4TunnelKey tunKey; + OvsIPTunnelKey tunKey; POVS_VPORT_ENTRY vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo); @@ -1894,10 +1905,11 @@ OvsOutputUserspaceAction(OvsForwardingContext *ovsFwdCtx, if (egrTunAttr) { RtlZeroMemory(&tunKey, sizeof tunKey); RtlCopyMemory(&tunKey, &ovsFwdCtx->tunKey, sizeof tunKey); - if (!tunKey.src) { - status = OvsLookupIPFwdInfo(tunKey.src, tunKey.dst, &fwdInfo); - if (status == NDIS_STATUS_SUCCESS && tunKey.dst == fwdInfo.dstIpAddr) { - tunKey.src = fwdInfo.srcIpAddr; + if (!OvsIphIsZero(&(tunKey.src))) { + status = OvsLookupIPhFwdInfo(tunKey.src, tunKey.dst, &fwdInfo); + if (status == NDIS_STATUS_SUCCESS && + OvsIphAddrEquals(&(tunKey.dst), &(fwdInfo.dstIphAddr))) { + OvsCopyIphAddress(&(tunKey.src), &fwdInfo.srcIphAddr); } } tunKey.flow_hash = tunKey.flow_hash ? tunKey.flow_hash : MAXINT16; diff --git a/datapath-windows/ovsext/Actions.h b/datapath-windows/ovsext/Actions.h index bc12e1166..b374c3a18 100644 --- a/datapath-windows/ovsext/Actions.h +++ b/datapath-windows/ovsext/Actions.h @@ -58,7 +58,7 @@ typedef struct OvsForwardingContext { * - specified in actions during tunneling Tx * - extracted from an NBL during tunneling Rx */ - OvsIPv4TunnelKey tunKey; + OvsIPTunnelKey tunKey; /* * Tunneling - Tx: diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 7f1d2fb41..237efa9e9 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -20,6 +20,7 @@ #include "PacketParser.h" #include "Event.h" #include "Conntrack-nat.h" +#include "IpHelper.h" #pragma warning(push) #pragma warning(disable:4311) @@ -511,7 +512,7 @@ OvsDetectCtPacket(OvsForwardingContext *fwdCtx, status = OvsExtractFlow(fwdCtx->curNbl, fwdCtx->srcVportNo, &newFlowKey, &fwdCtx->layers, - fwdCtx->tunKey.dst != 0 ? &fwdCtx->tunKey : NULL); + !OvsIphIsZero(&(fwdCtx->tunKey.dst)) ? &(fwdCtx->tunKey) : NULL); if (status != NDIS_STATUS_SUCCESS) { OVS_LOG_ERROR("Extract flow failed Nbl %p", fwdCtx->curNbl); return status; diff --git a/datapath-windows/ovsext/DpInternal.h b/datapath-windows/ovsext/DpInternal.h index 58e7ed8e2..b5027e35e 100644 --- a/datapath-windows/ovsext/DpInternal.h +++ b/datapath-windows/ovsext/DpInternal.h @@ -137,7 +137,7 @@ typedef struct L2Key { #define NUM_PKT_ATTR_REQUIRED 35 #define TUN_OPT_MAX_LEN 255 -typedef union OvsIPv4TunnelKey { +typedef union OvsIPTunnelKey { /* Options should always be the first member of tunnel key. * They are stored at the end of the array if they are less than the * maximum size. This allows us to get the benefits of variable length @@ -146,8 +146,9 @@ typedef union OvsIPv4TunnelKey { struct { UINT8 tunOpts[TUN_OPT_MAX_LEN]; /* Tunnel options. */ UINT8 tunOptLen; /* Tunnel option length in byte. */ - ovs_be32 dst; - ovs_be32 src; + + SOCKADDR_INET dst; /* IPv4/6 destination address. */ + SOCKADDR_INET src; /* IPv4/6 source address. */ ovs_be64 tunnelId; uint16_t flags; uint8_t tos; @@ -161,24 +162,24 @@ typedef union OvsIPv4TunnelKey { }; }; uint64_t attr[NUM_PKT_ATTR_REQUIRED]; -} OvsIPv4TunnelKey; /* Size of 280 byte. */ +} OvsIPTunnelKey; /* Size of 280+40-8= 312 byte. */ static __inline uint8_t -TunnelKeyGetOptionsOffset(const OvsIPv4TunnelKey *key) +IPTunnelKeyGetOptionsOffset(const OvsIPTunnelKey *key) { return TUN_OPT_MAX_LEN - key->tunOptLen; } static __inline uint8_t * -TunnelKeyGetOptions(OvsIPv4TunnelKey *key) +IPTunnelKeyGetOptions(OvsIPTunnelKey *key) { - return key->tunOpts + TunnelKeyGetOptionsOffset(key); + return key->tunOpts + IPTunnelKeyGetOptionsOffset(key); } static __inline uint16_t -TunnelKeyGetRealSize(OvsIPv4TunnelKey *key) +IPTunnelKeyGetRealSize(OvsIPTunnelKey *key) { - return sizeof(OvsIPv4TunnelKey) - TunnelKeyGetOptionsOffset(key); + return sizeof(OvsIPTunnelKey) - IPTunnelKeyGetOptionsOffset(key); } typedef struct MplsKey { @@ -187,7 +188,7 @@ typedef struct MplsKey { } MplsKey; /* Size of 8 bytes. */ typedef __declspec(align(8)) struct OvsFlowKey { - OvsIPv4TunnelKey tunKey; /* 280 bytes */ + OvsIPTunnelKey tunKey; /* 280 bytes? */ L2Key l2; /* 32 bytes */ union { /* These headers are mutually exclusive. */ @@ -209,7 +210,7 @@ typedef __declspec(align(8)) struct OvsFlowKey { } ct; /* Connection Tracking Flags */ } OvsFlowKey; -#define OVS_WIN_TUNNEL_KEY_SIZE (sizeof (OvsIPv4TunnelKey)) +#define OVS_WIN_IP_TUNNEL_KEY_SIZE (sizeof (OvsIPTunnelKey)) #define OVS_L2_KEY_SIZE (sizeof (L2Key)) #define OVS_IP_KEY_SIZE (sizeof (IpKey)) #define OVS_IPV6_KEY_SIZE (sizeof (Ipv6Key)) @@ -295,7 +296,7 @@ typedef struct _OVS_PACKET_INFO { uint32_t queue; uint32_t inPort; uint32_t cmd; - OvsIPv4TunnelKey tunnelKey; + OvsIPTunnelKey tunnelKey; uint8_t *payload; /* Includes user data defined as chain of netlink attributes followed by the * packet data. */ diff --git a/datapath-windows/ovsext/Flow.c b/datapath-windows/ovsext/Flow.c index ac0582c18..0a7b2b1d4 100644 --- a/datapath-windows/ovsext/Flow.c +++ b/datapath-windows/ovsext/Flow.c @@ -22,6 +22,8 @@ #include "PacketParser.h" #include "Datapath.h" #include "Geneve.h" +#include "IpHelper.h" +#include #ifdef OVS_DBG_MOD #undef OVS_DBG_MOD @@ -86,7 +88,7 @@ static NTSTATUS OvsDoDumpFlows(OvsFlowDumpInput *dumpInput, UINT32 *replyLen); static NTSTATUS OvsProbeSupportedFeature(POVS_MESSAGE msgIn, PNL_ATTR keyAttr); -static UINT16 OvsGetFlowL2Offset(const OvsIPv4TunnelKey *tunKey); +static UINT16 OvsGetFlowIPL2Offset(const OvsIPTunnelKey *tunKey); #define OVS_FLOW_TABLE_SIZE 2048 #define OVS_FLOW_TABLE_MASK (OVS_FLOW_TABLE_SIZE -1) @@ -195,7 +197,11 @@ const NL_POLICY nlFlowTunnelKeyPolicy[] = { [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = {.type = NL_A_UNSPEC, .minLen = 4, .maxLen = 4, .optional = TRUE}, [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = {.type = NL_A_UNSPEC, .minLen = 4 , - .maxLen = 4, .optional = FALSE}, + .maxLen = 4, .optional = TRUE}, + [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = {.type = NL_A_UNSPEC, .minLen = 16, + .maxLen = 16, .optional = TRUE}, + [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = {.type = NL_A_UNSPEC, .minLen = 16, + .maxLen = 16, .optional = TRUE}, [OVS_TUNNEL_KEY_ATTR_TOS] = {.type = NL_A_UNSPEC, .minLen = 1, .maxLen = 1, .optional = TRUE}, [OVS_TUNNEL_KEY_ATTR_TTL] = {.type = NL_A_UNSPEC, .minLen = 1, @@ -1017,7 +1023,7 @@ MapFlowKeyToNlKey(PNL_BUFFER nlBuf, goto done; } - if (flowKey->tunKey.dst) { + if (!OvsIphIsZero(&(flowKey->tunKey.dst))) { rc = MapFlowTunKeyToNlKey(nlBuf, &(flowKey->tunKey), tunKeyType); if (rc != STATUS_SUCCESS) { @@ -1038,12 +1044,12 @@ error_nested_start: /* *---------------------------------------------------------------------------- * MapFlowTunKeyToNlKey -- - * Maps OvsIPv4TunnelKey to OVS_TUNNEL_KEY_ATTR_ID attribute. + * Maps OvsIPTunnelKey to OVS_TUNNEL_KEY_ATTR_ID attribute. *---------------------------------------------------------------------------- */ NTSTATUS MapFlowTunKeyToNlKey(PNL_BUFFER nlBuf, - OvsIPv4TunnelKey *tunKey, + OvsIPTunnelKey*tunKey, UINT16 tunKeyType) { NTSTATUS rc = STATUS_SUCCESS; @@ -1062,16 +1068,39 @@ MapFlowTunKeyToNlKey(PNL_BUFFER nlBuf, goto done; } - if (!NlMsgPutTailU32(nlBuf, OVS_TUNNEL_KEY_ATTR_IPV4_DST, - tunKey->dst)) { - rc = STATUS_UNSUCCESSFUL; - goto done; + + if (!OvsIphIsZero(&tunKey->dst)) { + if (tunKey->dst.si_family == AF_INET) { + if (!NlMsgPutTailU32(nlBuf, OVS_TUNNEL_KEY_ATTR_IPV4_DST, + tunKey->dst.Ipv4.sin_addr.s_addr)) { + rc = STATUS_UNSUCCESSFUL; + goto done; + } + } else if (tunKey->dst.si_family == AF_INET6) { + if (!NlMsgPutTailUnspec(nlBuf, OVS_TUNNEL_KEY_ATTR_IPV6_DST, + (PCHAR)&tunKey->dst.Ipv6.sin6_addr, + sizeof(tunKey->dst.Ipv6.sin6_addr))) { + rc = STATUS_UNSUCCESSFUL; + goto done; + } + } } - if (!NlMsgPutTailU32(nlBuf, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, - tunKey->src)) { - rc = STATUS_UNSUCCESSFUL; - goto done; + if (!OvsIphIsZero(&tunKey->src)) { + if (tunKey->src.si_family == AF_INET) { + if (!NlMsgPutTailU32(nlBuf, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, + tunKey->src.Ipv4.sin_addr.s_addr)) { + rc = STATUS_UNSUCCESSFUL; + goto done; + } + } else if (tunKey->src.si_family == AF_INET6) { + if (!NlMsgPutTailUnspec(nlBuf, OVS_TUNNEL_KEY_ATTR_IPV6_SRC, + (PCHAR)&tunKey->src.Ipv6.sin6_addr, + sizeof(tunKey->src.Ipv6.sin6_addr))) { + rc = STATUS_UNSUCCESSFUL; + goto done; + } + } } if (!NlMsgPutTailU8(nlBuf, OVS_TUNNEL_KEY_ATTR_TOS, @@ -1088,7 +1117,7 @@ MapFlowTunKeyToNlKey(PNL_BUFFER nlBuf, if (tunKey->tunOptLen > 0 && !NlMsgPutTailUnspec(nlBuf, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, - (PCHAR)TunnelKeyGetOptions(tunKey), + (PCHAR)IPTunnelKeyGetOptions(tunKey), tunKey->tunOptLen)) { rc = STATUS_UNSUCCESSFUL; goto done; @@ -1581,7 +1610,7 @@ _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs, } /* ==== L3 + L4. ==== */ - destKey->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE + destKey->l2.keyLen = OVS_WIN_IP_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE - destKey->l2.offset; switch (ntohs(destKey->l2.dlType)) { @@ -1760,7 +1789,7 @@ _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs, */ static __inline NTSTATUS OvsTunnelAttrToGeneveOptions(PNL_ATTR attr, - OvsIPv4TunnelKey *tunKey) + OvsIPTunnelKey *tunKey) { UINT32 optLen = NlAttrGetSize(attr); GeneveOptionHdr *option; @@ -1790,30 +1819,29 @@ OvsTunnelAttrToGeneveOptions(PNL_ATTR attr, option = (GeneveOptionHdr *)((UINT8 *)option + len); optLen -= len; } - memcpy(TunnelKeyGetOptions(tunKey), NlAttrData(attr), tunKey->tunOptLen); + memcpy(IPTunnelKeyGetOptions(tunKey), NlAttrData(attr), tunKey->tunOptLen); if (isCritical) { tunKey->flags |= OVS_TNL_F_CRT_OPT; } return STATUS_SUCCESS; } - /* *---------------------------------------------------------------------------- - * OvsTunnelAttrToIPv4TunnelKey -- + * OvsTunnelAttrToIPTunnelKey -- * Converts OVS_KEY_ATTR_TUNNEL attribute to tunKey. *---------------------------------------------------------------------------- */ NTSTATUS -OvsTunnelAttrToIPv4TunnelKey(PNL_ATTR attr, - OvsIPv4TunnelKey *tunKey) +OvsTunnelAttrToIPTunnelKey(PNL_ATTR attr, + OvsIPTunnelKey *tunKey) { PNL_ATTR a; INT rem; INT hasOpt = 0; NTSTATUS status; - memset(tunKey, 0, OVS_WIN_TUNNEL_KEY_SIZE); + memset(tunKey, 0, OVS_WIN_IP_TUNNEL_KEY_SIZE); ASSERT(NlAttrType(attr) == OVS_KEY_ATTR_TUNNEL); NL_ATTR_FOR_EACH_UNSAFE(a, rem, NlAttrData(attr), @@ -1824,10 +1852,26 @@ OvsTunnelAttrToIPv4TunnelKey(PNL_ATTR attr, tunKey->flags |= OVS_TNL_F_KEY; break; case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: - tunKey->src = NlAttrGetBe32(a); + tunKey->src.si_family = AF_INET; + tunKey->src.Ipv4.sin_addr.s_addr = NlAttrGetBe32(a); break; case OVS_TUNNEL_KEY_ATTR_IPV4_DST: - tunKey->dst = NlAttrGetBe32(a); + tunKey->dst.si_family = AF_INET; + tunKey->dst.Ipv4.sin_addr.s_addr = NlAttrGetBe32(a); + break; + case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: + tunKey->src.si_family = AF_INET6; + RtlCopyMemory(&tunKey->src.Ipv6.sin6_addr, + NlAttrGetUnspec(a, + sizeof(tunKey->src.Ipv6.sin6_addr)), + sizeof(tunKey->src.Ipv6.sin6_addr)); + break; + case OVS_TUNNEL_KEY_ATTR_IPV6_DST: + tunKey->dst.si_family = AF_INET6; + RtlCopyMemory(&tunKey->dst.Ipv6.sin6_addr, + NlAttrGetUnspec(a, + sizeof(tunKey->dst.Ipv6.sin6_addr)), + sizeof(tunKey->dst.Ipv6.sin6_addr)); break; case OVS_TUNNEL_KEY_ATTR_TOS: tunKey->tos = NlAttrGetU8(a); @@ -1880,7 +1924,7 @@ MapTunAttrToFlowPut(PNL_ATTR *keyAttrs, PNL_ATTR *tunAttrs, OvsFlowKey *destKey) { - memset(&destKey->tunKey, 0, OVS_WIN_TUNNEL_KEY_SIZE); + memset(&destKey->tunKey, 0, OVS_WIN_IP_TUNNEL_KEY_SIZE); if (keyAttrs[OVS_KEY_ATTR_TUNNEL]) { /* XXX: This blocks performs same functionality as OvsTunnelAttrToIPv4TunnelKey. Consider refactoring the code.*/ @@ -1891,15 +1935,35 @@ MapTunAttrToFlowPut(PNL_ATTR *keyAttrs, } if (tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV4_DST]) { - destKey->tunKey.dst = + destKey->tunKey.dst.si_family = AF_INET; + destKey->tunKey.dst.Ipv4.sin_addr.s_addr = NlAttrGetU32(tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV4_DST]); } if (tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV4_SRC]) { - destKey->tunKey.src = + destKey->tunKey.src.si_family = AF_INET; + destKey->tunKey.src.Ipv4.sin_addr.s_addr = NlAttrGetU32(tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV4_SRC]); } + if (tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV6_DST]) { + destKey->tunKey.dst.si_family = AF_INET6; + RtlCopyMemory(&destKey->tunKey.dst.Ipv6.sin6_addr, + NlAttrGetUnspec( + tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV6_DST], + sizeof(destKey->tunKey.dst.Ipv6.sin6_addr)), + sizeof(destKey->tunKey.dst.Ipv6.sin6_addr)); + } + + if (tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV6_SRC]) { + destKey->tunKey.src.si_family = AF_INET6; + RtlCopyMemory(&destKey->tunKey.src.Ipv6.sin6_addr, + NlAttrGetUnspec( + tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV6_SRC], + sizeof(destKey->tunKey.src.Ipv6.sin6_addr)), + sizeof(destKey->tunKey.src.Ipv6.sin6_addr)); + } + if (tunAttrs[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT]) { destKey->tunKey.flags |= OVS_TNL_F_DONT_FRAGMENT; } @@ -1935,9 +1999,9 @@ MapTunAttrToFlowPut(PNL_ATTR *keyAttrs, destKey->tunKey.flags |= OVS_TNL_F_GENEVE_OPT; } } - destKey->l2.offset = OvsGetFlowL2Offset(&destKey->tunKey); + destKey->l2.offset = OvsGetFlowIPL2Offset(&(destKey->tunKey)); } else { - destKey->l2.offset = OvsGetFlowL2Offset(NULL); + destKey->l2.offset = OvsGetFlowIPL2Offset(NULL); } } @@ -2110,16 +2174,16 @@ OvsGetFlowMetadata(OvsFlowKey *key, } UINT16 -OvsGetFlowL2Offset(const OvsIPv4TunnelKey *tunKey) +OvsGetFlowIPL2Offset(const OvsIPTunnelKey *tunKey) { if (tunKey != NULL) { // Align with int64 boundary if (tunKey->tunOptLen == 0) { return (TUN_OPT_MAX_LEN + 1) / 8 * 8; } - return TunnelKeyGetOptionsOffset(tunKey) / 8 * 8; + return IPTunnelKeyGetOptionsOffset(tunKey) / 8 * 8; } else { - return OVS_WIN_TUNNEL_KEY_SIZE; + return OVS_WIN_IP_TUNNEL_KEY_SIZE; } } @@ -2319,7 +2383,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, UINT32 inPort, OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers, - OvsIPv4TunnelKey *tunKey) + OvsIPTunnelKey *tunKey) { struct Eth_Header *eth; UINT8 offset = 0; @@ -2328,19 +2392,20 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, layers->value = 0; if (tunKey) { - ASSERT(tunKey->dst != 0); - UINT8 optOffset = TunnelKeyGetOptionsOffset(tunKey); + ASSERT(!OvsIphIsZero(&(tunKey->dst))); + UINT8 optOffset = IPTunnelKeyGetOptionsOffset(tunKey); RtlMoveMemory(((UINT8 *)&flow->tunKey) + optOffset, ((UINT8 *)tunKey) + optOffset, - TunnelKeyGetRealSize(tunKey)); + IPTunnelKeyGetRealSize(tunKey)); } else { - flow->tunKey.dst = 0; + RtlZeroMemory(&flow->tunKey.dst, + sizeof(flow->tunKey.dst)); } - flow->l2.offset = OvsGetFlowL2Offset(tunKey); + flow->l2.offset = OvsGetFlowIPL2Offset(tunKey); flow->l2.inPort = inPort; if (OvsPacketLenNBL(packet) < ETH_HEADER_LEN_DIX) { - flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + 8 - flow->l2.offset; + flow->l2.keyLen = OVS_WIN_IP_TUNNEL_KEY_SIZE + 8 - flow->l2.offset; return NDIS_STATUS_SUCCESS; } @@ -2403,7 +2468,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, layers->l3Offset = ETH_HEADER_LEN_DIX + offset; } - flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE + flow->l2.keyLen = OVS_WIN_IP_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE - flow->l2.offset; /* Network layer. */ if (flow->l2.dlType == htons(ETH_TYPE_IPV4)) { @@ -2670,8 +2735,8 @@ OvsLookupFlow(OVS_DATAPATH *datapath, UINT16 size = key->l2.keyLen; UINT8 *start; - ASSERT(key->tunKey.dst || offset == sizeof(OvsIPv4TunnelKey)); - ASSERT(!key->tunKey.dst || offset == OvsGetFlowL2Offset(&key->tunKey)); + ASSERT(!OvsIphIsZero(&(key->tunKey.dst)) || offset == sizeof(OvsIPTunnelKey)); + ASSERT(OvsIphIsZero(&(key->tunKey.dst)) || offset == OvsGetFlowIPL2Offset(&key->tunKey)); start = (UINT8 *)key + offset; @@ -2734,8 +2799,8 @@ OvsHashFlow(const OvsFlowKey *key) UINT16 size = key->l2.keyLen; UINT8 *start; - ASSERT(key->tunKey.dst || offset == sizeof(OvsIPv4TunnelKey)); - ASSERT(!key->tunKey.dst || offset == OvsGetFlowL2Offset(&key->tunKey)); + ASSERT(!OvsIphIsZero(&(key->tunKey.dst)) || offset == sizeof(OvsIPTunnelKey)); + ASSERT(OvsIphIsZero(&(key->tunKey.dst)) || offset == OvsGetFlowIPL2Offset(&key->tunKey)); start = (UINT8 *)key + offset; return OvsJhashBytes(start, size, 0); } @@ -3160,6 +3225,8 @@ OvsTunKeyAttrSize(void) return NlAttrTotalSize(8) /* OVS_TUNNEL_KEY_ATTR_ID */ + NlAttrTotalSize(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ + NlAttrTotalSize(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ + + NlAttrTotalSize(16) /* OVS_TUNNEL_KEY_ATTR_IPV6_SRC */ + + NlAttrTotalSize(16) /* OVS_TUNNEL_KEY_ATTR_IPV6_DST */ + NlAttrTotalSize(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ + NlAttrTotalSize(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ + NlAttrTotalSize(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ diff --git a/datapath-windows/ovsext/Flow.h b/datapath-windows/ovsext/Flow.h index 23d252c37..8f7214124 100644 --- a/datapath-windows/ovsext/Flow.h +++ b/datapath-windows/ovsext/Flow.h @@ -56,7 +56,7 @@ NDIS_STATUS OvsExtractLayers(const NET_BUFFER_LIST *packet, POVS_PACKET_HDR_INFO layers); NDIS_STATUS OvsExtractFlow(const NET_BUFFER_LIST *pkt, UINT32 inPort, OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers, - OvsIPv4TunnelKey *tunKey); + OvsIPTunnelKey *tunKey); OvsFlow* OvsLookupFlow(OVS_DATAPATH *datapath, const OvsFlowKey *key, UINT64 *hash, BOOLEAN hashValid); OvsFlow* OvsLookupFlowRecirc(OVS_DATAPATH *datapath, @@ -80,13 +80,14 @@ NTSTATUS OvsFlowNlGetCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx, UINT32 *replyLen); NTSTATUS MapFlowKeyToNlKey(PNL_BUFFER nlBuf, OvsFlowKey *flowKey, UINT16 keyType, UINT16 tunKeyType); -NTSTATUS MapFlowTunKeyToNlKey(PNL_BUFFER nlBuf, OvsIPv4TunnelKey *tunKey, +NTSTATUS MapFlowTunKeyToNlKey(PNL_BUFFER nlBuf, OvsIPTunnelKey *tunKey, UINT16 tunKeyType); VOID MapTunAttrToFlowPut(PNL_ATTR *keyAttrs, PNL_ATTR *tunAttrs, OvsFlowKey *destKey); UINT32 OvsFlowKeyAttrSize(void); UINT32 OvsTunKeyAttrSize(void); -NTSTATUS OvsTunnelAttrToIPv4TunnelKey(PNL_ATTR attr, OvsIPv4TunnelKey *tunKey); +NTSTATUS +OvsTunnelAttrToIPTunnelKey(PNL_ATTR attr, OvsIPTunnelKey *tunKey); /* Flags for tunneling */ #define OVS_TNL_F_DONT_FRAGMENT (1 << 0) diff --git a/datapath-windows/ovsext/Geneve.c b/datapath-windows/ovsext/Geneve.c index 5692b98fb..e4e81c157 100644 --- a/datapath-windows/ovsext/Geneve.c +++ b/datapath-windows/ovsext/Geneve.c @@ -69,7 +69,7 @@ OvsCleanupGeneveTunnel(POVS_VPORT_ENTRY vport) NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, + OvsIPTunnelKey* tunKey, POVS_SWITCH_CONTEXT switchContext, POVS_PACKET_HDR_INFO layers, PNET_BUFFER_LIST *newNbl, @@ -80,18 +80,27 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport, PNET_BUFFER curNb; PMDL curMdl; PUINT8 bufferStart; - EthHdr *ethHdr; - IPHdr *ipHdr; - UDPHdr *udpHdr; - GeneveHdr *geneveHdr; - GeneveOptionHdr *optHdr; + EthHdr* ethHdr; + IPHdr* ipHdr = NULL; + IPv6Hdr* ipv6Hdr = NULL; + UDPHdr* udpHdr = NULL; + GeneveHdr* geneveHdr; + GeneveOptionHdr* optHdr; POVS_GENEVE_VPORT vportGeneve; - UINT32 headRoom = OvsGetGeneveTunHdrMinSize() + tunKey->tunOptLen; + UINT32 headRoom = 0; + UINT32 packetLength; ULONG mss = 0; NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; - status = OvsLookupIPFwdInfo(tunKey->src, tunKey->dst, &fwdInfo); + if (tunKey->dst.si_family == AF_INET) { + headRoom = OvsGetGeneveTunHdrMinSize() + tunKey->tunOptLen; + } + else if (tunKey->dst.si_family == AF_INET6) { + headRoom = OvsGetGeneveIPv6TunHdrMinSize() + tunKey->tunOptLen; + } + + status = OvsLookupIPhFwdInfo(tunKey->src, tunKey->dst, &fwdInfo); if (status != STATUS_SUCCESS) { OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL); // return NDIS_STATUS_PENDING; @@ -104,7 +113,6 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport, */ return NDIS_STATUS_FAILURE; } - RtlCopyMemory(switchFwdInfo->value, fwdInfo.value, sizeof fwdInfo.value); curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); @@ -176,29 +184,61 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport, sizeof ethHdr->Destination); NdisMoveMemory(ethHdr->Source, fwdInfo.srcMacAddr, sizeof ethHdr->Source); - ethHdr->Type = htons(ETH_TYPE_IPV4); - /* IP header */ - ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); + if (tunKey->dst.si_family == AF_INET) { + ethHdr->Type = htons(ETH_TYPE_IPV4); + } else if (tunKey->dst.si_family == AF_INET6) { + ethHdr->Type = htons(ETH_TYPE_IPV6); + } + + if (tunKey->dst.si_family == AF_INET) { + /* IP header */ + ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); - ipHdr->ihl = sizeof *ipHdr / 4; - ipHdr->version = IPPROTO_IPV4; - ipHdr->tos = tunKey->tos; - ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr); - ipHdr->id = (uint16)atomic_add64(&vportGeneve->ipId, + ipHdr->ihl = sizeof *ipHdr / 4; + ipHdr->version = IPPROTO_IPV4; + ipHdr->tos = tunKey->tos; + ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr); + ipHdr->id = (uint16)atomic_add64(&vportGeneve->ipId, NET_BUFFER_DATA_LENGTH(curNb)); - ipHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ? + ipHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ? IP_DF_NBO : 0; - ipHdr->ttl = tunKey->ttl ? tunKey->ttl : GENEVE_DEFAULT_TTL; - ipHdr->protocol = IPPROTO_UDP; - ASSERT(tunKey->dst == fwdInfo.dstIpAddr); - ASSERT(tunKey->src == fwdInfo.srcIpAddr || tunKey->src == 0); - ipHdr->saddr = fwdInfo.srcIpAddr; - ipHdr->daddr = fwdInfo.dstIpAddr; - ipHdr->check = 0; + ipHdr->ttl = tunKey->ttl ? tunKey->ttl : GENEVE_DEFAULT_TTL; + ipHdr->protocol = IPPROTO_UDP; + ASSERT(OvsIphAddrEquals(&tunKey->dst, &fwdInfo.dstIphAddr)); + ASSERT(OvsIphAddrEquals(&tunKey->src, &fwdInfo.srcIphAddr) || OvsIphIsZero(&tunKey->src)); + ipHdr->saddr = fwdInfo.srcIphAddr.Ipv4.sin_addr.s_addr; + ipHdr->daddr = fwdInfo.dstIphAddr.Ipv4.sin_addr.s_addr; + ipHdr->check = 0; + } else if (tunKey->dst.si_family == AF_INET6) { + /* IPv6 header */ + ipv6Hdr = (IPv6Hdr *)((PCHAR)ethHdr + sizeof *ethHdr); + + ipv6Hdr->version = IPV6; + ipv6Hdr->priority = 0; + ipv6Hdr->flow_lbl[0] = 0; + ipv6Hdr->flow_lbl[1] = 0; + ipv6Hdr->flow_lbl[2] = 0; + ipv6Hdr->payload_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr - sizeof *ipv6Hdr); + ipv6Hdr->hop_limit = tunKey->ttl ? tunKey->ttl : GENEVE_DEFAULT_TTL; + ipv6Hdr->nexthdr = IPPROTO_UDP; + ASSERT(OvsIphAddrEquals(&(tunKey->dst), &(fwdInfo.dstIphAddr))); + ASSERT(OvsIphAddrEquals(&(tunKey->src), &(fwdInfo.srcIphAddr)) || OvsIphIsZero(&(tunKey->src))); + RtlCopyMemory(&ipv6Hdr->saddr, &fwdInfo.srcIphAddr.Ipv6.sin6_addr, + sizeof(ipv6Hdr->saddr)); + RtlCopyMemory(&ipv6Hdr->daddr, &fwdInfo.dstIphAddr.Ipv6.sin6_addr, + sizeof(ipv6Hdr->daddr)); + } /* UDP header */ - udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr); + + if (tunKey->dst.si_family == AF_INET) { + udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr); + } else if (tunKey->dst.si_family == AF_INET6) { + udpHdr = (UDPHdr *)((PCHAR)ipv6Hdr + sizeof *ipv6Hdr); + } + + ASSERT(udpHdr); udpHdr->source = htons(tunKey->flow_hash | MAXINT16); udpHdr->dest = tunKey->dst_port ? tunKey->dst_port : htons(vportGeneve->dstPort); @@ -206,10 +246,20 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport, sizeof *udpHdr + sizeof *geneveHdr + tunKey->tunOptLen); if (tunKey->flags & OVS_TNL_F_CSUM) { - UINT16 udpChksumLen = (UINT16) NET_BUFFER_DATA_LENGTH(curNb) - - sizeof *ipHdr - sizeof *ethHdr; - udpHdr->check = IPPseudoChecksum(&ipHdr->saddr, &ipHdr->daddr, - IPPROTO_UDP, udpChksumLen); + UINT16 udpChksumLen = 0; + if (tunKey->dst.si_family == AF_INET) { + udpChksumLen = (UINT16) NET_BUFFER_DATA_LENGTH(curNb) - + sizeof *ipHdr - sizeof *ethHdr; + udpHdr->check = IPPseudoChecksum(&ipHdr->saddr, &ipHdr->daddr, + IPPROTO_UDP, udpChksumLen); + } else if (tunKey->dst.si_family == AF_INET6) { + udpChksumLen = (UINT16) NET_BUFFER_DATA_LENGTH(curNb) - + sizeof *ipv6Hdr - sizeof *ethHdr; + + udpHdr->check = IPv6PseudoChecksum((UINT32*)&ipv6Hdr->saddr, + (UINT32*)&ipv6Hdr->daddr, + IPPROTO_UDP, udpChksumLen); + } } else { udpHdr->check = 0; } @@ -226,17 +276,25 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport, /* Geneve header options */ optHdr = (GeneveOptionHdr *)(geneveHdr + 1); - memcpy(optHdr, TunnelKeyGetOptions(tunKey), tunKey->tunOptLen); + memcpy(optHdr, IPTunnelKeyGetOptions(tunKey), tunKey->tunOptLen); csumInfo.Value = 0; csumInfo.Transmit.IpHeaderChecksum = 1; - csumInfo.Transmit.IsIPv4 = 1; + + if (tunKey->dst.si_family == AF_INET) { + csumInfo.Transmit.IsIPv4 = 1; + } else if (tunKey->dst.si_family == AF_INET6) { + csumInfo.Transmit.IsIPv4 = 0; + csumInfo.Transmit.IsIPv6 = 1; + } + if (tunKey->flags & OVS_TNL_F_CSUM) { csumInfo.Transmit.UdpChecksum = 1; } NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value; } + return STATUS_SUCCESS; ret_error: @@ -247,13 +305,14 @@ ret_error: NDIS_STATUS OvsDecapGeneve(POVS_SWITCH_CONTEXT switchContext, PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, + OvsIPTunnelKey *tunKey, PNET_BUFFER_LIST *newNbl) { PNET_BUFFER curNb; PMDL curMdl; EthHdr *ethHdr; IPHdr *ipHdr; + IPv6Hdr *ipv6Hdr; UDPHdr *udpHdr; GeneveHdr *geneveHdr; UINT32 tunnelSize; @@ -300,14 +359,29 @@ NDIS_STATUS OvsDecapGeneve(POVS_SWITCH_CONTEXT switchContext, ethHdr = (EthHdr *)bufferStart; /* XXX: Handle IP options. */ - ipHdr = (IPHdr *)(bufferStart + layers.l3Offset); - tunKey->src = ipHdr->saddr; - tunKey->dst = ipHdr->daddr; - tunKey->tos = ipHdr->tos; - tunKey->ttl = ipHdr->ttl; - tunKey->pad = 0; - udpHdr = (UDPHdr *)(bufferStart + layers.l4Offset); + if (layers.isIPv4) { + ipHdr = (IPHdr *)(bufferStart + layers.l3Offset); + tunKey->src.si_family = AF_INET; + tunKey->src.Ipv4.sin_addr.s_addr = ipHdr->saddr; + tunKey->dst.si_family = AF_INET; + tunKey->dst.Ipv4.sin_addr.s_addr = ipHdr->daddr; + tunKey->tos = ipHdr->tos; + tunKey->ttl = ipHdr->ttl; + tunKey->pad = 0; + } else if (layers.isIPv6) { + ipv6Hdr = (IPv6Hdr *)(bufferStart + layers.l3Offset); + tunKey->src.si_family = AF_INET6; + RtlCopyMemory(&(tunKey->src.Ipv6.sin6_addr), &ipv6Hdr->saddr, + sizeof(tunKey->src.Ipv6.sin6_addr)); + tunKey->dst.si_family = AF_INET6; + RtlCopyMemory(&(tunKey->dst.Ipv6.sin6_addr), &ipv6Hdr->daddr, + sizeof(tunKey->dst.Ipv6.sin6_addr)); + tunKey->tos = 0;/*???*/ + tunKey->ttl = ipv6Hdr->hop_limit; + tunKey->pad = 0; + } + udpHdr = (UDPHdr *)(bufferStart + layers.l4Offset); /* Validate if NIC has indicated checksum failure. */ status = OvsValidateUDPChecksum(curNbl, udpHdr->check == 0); if (status != NDIS_STATUS_SUCCESS) { @@ -316,7 +390,7 @@ NDIS_STATUS OvsDecapGeneve(POVS_SWITCH_CONTEXT switchContext, /* Calculate and verify UDP checksum if NIC didn't do it. */ if (udpHdr->check != 0) { - status = OvsCalculateUDPChecksum(curNbl, curNb, ipHdr, udpHdr, + status = OvsCalculateUDPChecksum(curNbl, curNb, ethHdr, udpHdr, packetLength, &layers); tunKey->flags |= OVS_TNL_F_CSUM; if (status != NDIS_STATUS_SUCCESS) { @@ -346,17 +420,16 @@ NDIS_STATUS OvsDecapGeneve(POVS_SWITCH_CONTEXT switchContext, NdisAdvanceNetBufferDataStart(curNb, tunnelSize, FALSE, NULL); if (tunKey->tunOptLen > 0) { optStart = NdisGetDataBuffer(curNb, tunKey->tunOptLen, - TunnelKeyGetOptions(tunKey), 1, 0); + IPTunnelKeyGetOptions(tunKey), 1, 0); /* If data is contiguous in the buffer, NdisGetDataBuffer will not copy data to the storage. Manual copy is needed. */ - if (optStart != TunnelKeyGetOptions(tunKey)) { - memcpy(TunnelKeyGetOptions(tunKey), optStart, tunKey->tunOptLen); + if (optStart != IPTunnelKeyGetOptions(tunKey)) { + memcpy(IPTunnelKeyGetOptions(tunKey), optStart, tunKey->tunOptLen); } NdisAdvanceNetBufferDataStart(curNb, tunKey->tunOptLen, FALSE, NULL); tunKey->flags |= OVS_TNL_F_GENEVE_OPT; } - return NDIS_STATUS_SUCCESS; dropNbl: diff --git a/datapath-windows/ovsext/Geneve.h b/datapath-windows/ovsext/Geneve.h index db758dd07..6307479d1 100644 --- a/datapath-windows/ovsext/Geneve.h +++ b/datapath-windows/ovsext/Geneve.h @@ -87,7 +87,7 @@ VOID OvsCleanupGeneveTunnel(POVS_VPORT_ENTRY vport); NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, + OvsIPTunnelKey *tunKey, POVS_SWITCH_CONTEXT switchContext, POVS_PACKET_HDR_INFO layers, PNET_BUFFER_LIST *newNbl, @@ -95,7 +95,7 @@ NDIS_STATUS OvsEncapGeneve(POVS_VPORT_ENTRY vport, NDIS_STATUS OvsDecapGeneve(POVS_SWITCH_CONTEXT switchContext, PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, + OvsIPTunnelKey *tunKey, PNET_BUFFER_LIST *newNbl); static __inline UINT32 @@ -113,6 +113,21 @@ OvsGetGeneveTunHdrMaxSize(VOID) return OvsGetGeneveTunHdrMinSize() + TUN_OPT_MAX_LEN; } +static __inline UINT32 +OvsGetGeneveIPv6TunHdrMinSize(VOID) +{ + /* XXX: Can L2 include VLAN at all? */ + return sizeof (EthHdr) + sizeof (IPv6Hdr) + sizeof (UDPHdr) + + sizeof (GeneveHdr); +} + +static __inline UINT32 +OvsGetGeneveIPv6TunHdrMaxSize(VOID) +{ + /* XXX: Can L2 include VLAN at all? */ + return OvsGetGeneveIPv6TunHdrMinSize() + TUN_OPT_MAX_LEN; +} + static __inline UINT32 OvsGetGeneveTunHdrSizeFromLayers(POVS_PACKET_HDR_INFO layers) { diff --git a/datapath-windows/ovsext/Gre.c b/datapath-windows/ovsext/Gre.c index afdd5abe3..d87864029 100644 --- a/datapath-windows/ovsext/Gre.c +++ b/datapath-windows/ovsext/Gre.c @@ -37,7 +37,7 @@ static NDIS_STATUS OvsDoEncapGre(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl, - const OvsIPv4TunnelKey *tunKey, + const OvsIPTunnelKey *tunKey, const POVS_FWD_INFO fwdInfo, POVS_PACKET_HDR_INFO layers, POVS_SWITCH_CONTEXT switchContext, @@ -93,7 +93,7 @@ OvsCleanupGreTunnel(POVS_VPORT_ENTRY vport) NDIS_STATUS OvsEncapGre(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, + OvsIPTunnelKey *tunKey, POVS_SWITCH_CONTEXT switchContext, POVS_PACKET_HDR_INFO layers, PNET_BUFFER_LIST *newNbl, @@ -102,7 +102,12 @@ OvsEncapGre(POVS_VPORT_ENTRY vport, OVS_FWD_INFO fwdInfo; NDIS_STATUS status; - status = OvsLookupIPFwdInfo(tunKey->src, tunKey->dst, &fwdInfo); + if (tunKey->dst.si_family != AF_INET) { + /*V6 tunnel support will be supported later*/ + return NDIS_STATUS_FAILURE; + } + + status = OvsLookupIPhFwdInfo(tunKey->src, tunKey->dst, &fwdInfo); if (status != STATUS_SUCCESS) { OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL); return NDIS_STATUS_FAILURE; @@ -124,7 +129,7 @@ OvsEncapGre(POVS_VPORT_ENTRY vport, NDIS_STATUS OvsDoEncapGre(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl, - const OvsIPv4TunnelKey *tunKey, + const OvsIPTunnelKey *tunKey, const POVS_FWD_INFO fwdInfo, POVS_PACKET_HDR_INFO layers, POVS_SWITCH_CONTEXT switchContext, @@ -239,10 +244,11 @@ OvsDoEncapGre(POVS_VPORT_ENTRY vport, IP_DF_NBO : 0; ipHdr->ttl = tunKey->ttl ? tunKey->ttl : 64; ipHdr->protocol = IPPROTO_GRE; - ASSERT(tunKey->dst == fwdInfo->dstIpAddr); - ASSERT(tunKey->src == fwdInfo->srcIpAddr || tunKey->src == 0); - ipHdr->saddr = fwdInfo->srcIpAddr; - ipHdr->daddr = fwdInfo->dstIpAddr; + ASSERT(OvsIphAddrEquals(&tunKey->dst, &fwdInfo->dstIphAddr)); + ASSERT(OvsIphAddrEquals(&tunKey->src, &fwdInfo->srcIphAddr) || + OvsIphIsZero(&tunKey->src)); + ipHdr->saddr = fwdInfo->srcIphAddr.Ipv4.sin_addr.s_addr; + ipHdr->daddr = fwdInfo->dstIphAddr.Ipv4.sin_addr.s_addr; ipHdr->check = 0; ipHdr->check = IPChecksum((UINT8 *)ipHdr, sizeof *ipHdr, 0); @@ -306,7 +312,7 @@ ret_error: NDIS_STATUS OvsDecapGre(POVS_SWITCH_CONTEXT switchContext, PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, + OvsIPTunnelKey *tunKey, PNET_BUFFER_LIST *newNbl) { PNET_BUFFER curNb; @@ -325,6 +331,12 @@ OvsDecapGre(POVS_SWITCH_CONTEXT switchContext, ASSERT(*newNbl == NULL); *newNbl = NULL; + + if (tunKey->dst.si_family != AF_INET) { + /*V6 tunnel support will be supported later*/ + return NDIS_STATUS_FAILURE; + } + status = OvsExtractLayers(curNbl, &layers); if (status != NDIS_STATUS_SUCCESS) { return status; @@ -363,8 +375,11 @@ OvsDecapGre(POVS_SWITCH_CONTEXT switchContext, headRoom += layers.l3Offset; ipHdr = (IPHdr *)(bufferStart + layers.l3Offset); - tunKey->src = ipHdr->saddr; - tunKey->dst = ipHdr->daddr; + tunKey->src.Ipv4.sin_addr.s_addr = ipHdr->saddr; + tunKey->src.Ipv4.sin_family = AF_INET; + tunKey->dst.Ipv4.sin_addr.s_addr = ipHdr->daddr; + tunKey->dst.Ipv4.sin_family = AF_INET; + tunKey->tos = ipHdr->tos; tunKey->ttl = ipHdr->ttl; tunKey->pad = 0; diff --git a/datapath-windows/ovsext/Gre.h b/datapath-windows/ovsext/Gre.h index 45f3f59f3..144b6195e 100644 --- a/datapath-windows/ovsext/Gre.h +++ b/datapath-windows/ovsext/Gre.h @@ -66,7 +66,7 @@ void OvsCleanupGreTunnel(POVS_VPORT_ENTRY vport); NDIS_STATUS OvsEncapGre(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, + OvsIPTunnelKey *tunKey, POVS_SWITCH_CONTEXT switchContext, POVS_PACKET_HDR_INFO layers, PNET_BUFFER_LIST *newNbl, @@ -74,7 +74,7 @@ NDIS_STATUS OvsEncapGre(POVS_VPORT_ENTRY vport, NDIS_STATUS OvsDecapGre(POVS_SWITCH_CONTEXT switchContext, PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, + OvsIPTunnelKey *tunKey, PNET_BUFFER_LIST *newNbl); static __inline UINT16 diff --git a/datapath-windows/ovsext/IpHelper.c b/datapath-windows/ovsext/IpHelper.c index d7fa2ca84..b8866bed9 100644 --- a/datapath-windows/ovsext/IpHelper.c +++ b/datapath-windows/ovsext/IpHelper.c @@ -18,6 +18,7 @@ #include "IpHelper.h" #include "Switch.h" #include "Jhash.h" +#include extern POVS_SWITCH_CONTEXT gOvsSwitchContext; @@ -71,7 +72,7 @@ static OVS_IP_HELPER_THREAD_CONTEXT ovsIpHelperThreadContext; static POVS_IPFORWARD_ENTRY OvsLookupIPForwardEntry(PIP_ADDRESS_PREFIX prefix); static VOID OvsRemoveIPForwardEntry(POVS_IPFORWARD_ENTRY ipf); -static VOID OvsRemoveAllFwdEntriesWithSrc(UINT32 ipAddr); +static VOID OvsRemoveAllFwdEntriesWithSrc(SOCKADDR_INET ipAddr); static VOID OvsRemoveIPNeighEntriesWithInstance(POVS_IPHELPER_INSTANCE instance); static VOID OvsCleanupIpHelperRequestList(VOID); static VOID OvsCleanupFwdTable(VOID); @@ -196,24 +197,83 @@ OvsGetIPInterfaceEntry(NET_LUID luid, return status; } +static __inline VOID +OvsDumpIpAddrDesc(char *desc, char *andMsg, char *tailMsg, + const SOCKADDR_INET *IpAddress) +{ + if (IpAddress->si_family == AF_INET) { + UINT32 ipAddr = 0; + ipAddr = IpAddress->Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("%s: %d.%d.%d.%d%s%s", desc, + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, ipAddr >> 24, + andMsg?andMsg:"", tailMsg?tailMsg:""); + } else if (IpAddress->si_family == AF_INET6) { + struct in6_addr *pAddr = NULL; + char wszAddr[256] = {0}; + + pAddr = (struct in6_addr *)&(IpAddress->Ipv6.sin6_addr); + if (!RtlIpv6AddressToStringA(pAddr, wszAddr)) { + OVS_LOG_INFO("%s Ipv6 Address got failed\n", desc); + } else { + OVS_LOG_INFO("%s(IPv6): %s%s%s", desc, wszAddr, + andMsg?andMsg:"", tailMsg?tailMsg:""); + } + } +} + +static __inline VOID +OvsDumpIpAddrMsg(char *desc, const SOCKADDR_INET *IpAddress) +{ + OvsDumpIpAddrDesc(desc, NULL, NULL, IpAddress); +} + +static __inline VOID +OvsDumpIpAddrMsgStatus(char *descV4, char *descV6, NTSTATUS status, + const SOCKADDR_INET *IpAddress) +{ + if (!descV4 || !descV6 || !IpAddress) { + return; + } + + if (IpAddress->si_family == AF_INET) { + UINT32 ipAddr = IpAddress->Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("%s: %d.%d.%d.%d, status: %x", descV4, + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, status); + } else if (IpAddress->si_family == AF_INET6) { + struct in6_addr *pAddr = NULL; + char wszAddr[256] = {0}; + pAddr = (struct in6_addr*)&(IpAddress->Ipv6.sin6_addr); + if (!RtlIpv6AddressToStringA(pAddr, wszAddr)) { + OVS_LOG_INFO("Ipv6 Address got failed\n"); + } else { + OVS_LOG_INFO("%s: %s, status: %x", + descV6, wszAddr, status); + } + } + +} + +static __inline VOID +OvsDumpIpAddrDescStatus(char *desc, NTSTATUS status, + const SOCKADDR_INET *IpAddress) +{ + OvsDumpIpAddrMsgStatus(desc, desc, status, IpAddress); +} + static VOID OvsDumpIPEntry(PMIB_UNICASTIPADDRESS_ROW ipRow) { - UINT32 ipAddr; - OVS_LOG_INFO("InterfaceLuid: NetLuidIndex: %d, type: %d", ipRow->InterfaceLuid.Info.NetLuidIndex, ipRow->InterfaceLuid.Info.IfType); OVS_LOG_INFO("InterfaceIndex: %d", ipRow->InterfaceIndex); - ASSERT(ipRow->Address.si_family == AF_INET); - - ipAddr = ipRow->Address.Ipv4.sin_addr.s_addr; - OVS_LOG_INFO("Unicast Address: %d.%d.%d.%d\n", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, ipAddr >> 24); + OVS_LOG_INFO("Address.si_family: %d", ipRow->Address.si_family); + OvsDumpIpAddrMsg("Unicast Address", &(ipRow->Address)); } @@ -229,7 +289,7 @@ OvsGetIPEntry(NET_LUID interfaceLuid, return STATUS_INVALID_PARAMETER; } - status = GetUnicastIpAddressTable(AF_INET, &ipTable); + status = GetUnicastIpAddressTable(AF_UNSPEC, &ipTable); if (status != STATUS_SUCCESS) { OVS_LOG_INFO("Fail to get unicast address table, status: %x", status); @@ -300,37 +360,25 @@ OvsDumpRoute(const SOCKADDR_INET *sourceAddress, const SOCKADDR_INET *destinationAddress, PMIB_IPFORWARD_ROW2 route) { - UINT32 ipAddr = destinationAddress->Ipv4.sin_addr.s_addr; - - OVS_LOG_INFO("Destination: %d.%d.%d.%d", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); - - ipAddr = sourceAddress->Ipv4.sin_addr.s_addr; - OVS_LOG_INFO("Source: %d.%d.%d.%d", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); - - ipAddr = route->NextHop.Ipv4.sin_addr.s_addr; - OVS_LOG_INFO("NextHop: %d.%d.%d.%d", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); + OvsDumpIpAddrMsg("Destination", destinationAddress); + OvsDumpIpAddrMsg("Source", sourceAddress); + OvsDumpIpAddrMsg("NextHop", &(route->NextHop)); } - NTSTATUS OvsGetRoute(SOCKADDR_INET *destinationAddress, PMIB_IPFORWARD_ROW2 route, SOCKADDR_INET *sourceAddress, POVS_IPHELPER_INSTANCE *instance, POVS_VPORT_ENTRY* vport, - UINT32 srcIp) + SOCKADDR_INET srcIp) { NTSTATUS status = STATUS_NETWORK_UNREACHABLE; NTSTATUS result = STATUS_SUCCESS; PLIST_ENTRY head, link, next; ULONG minMetric = MAXULONG; + if (destinationAddress == NULL || route == NULL) { return STATUS_INVALID_PARAMETER; } @@ -342,7 +390,10 @@ OvsGetRoute(SOCKADDR_INET *destinationAddress, MIB_IPFORWARD_ROW2 crtRoute = { 0 }; POVS_IPHELPER_INSTANCE crtInstance = NULL; WCHAR interfaceName[IF_MAX_STRING_SIZE + 1]; - +#ifdef DBG + char ansiIfname[256] = { 0 }; + size_t strLen = 0; +#endif crtInstance = CONTAINING_RECORD(link, OVS_IPHELPER_INSTANCE, link); ExAcquireResourceExclusiveLite(&crtInstance->lock, TRUE); @@ -355,8 +406,24 @@ OvsGetRoute(SOCKADDR_INET *destinationAddress, continue; } +#ifdef DBG + RtlZeroMemory(ansiIfname, 256); + + status = + ConvertInterfaceLuidToAlias(&crtInstance->internalRow.InterfaceLuid, + interfaceName, + IF_MAX_STRING_SIZE + 1); + + if (NT_SUCCESS(status)) { + status = RtlStringCbLengthW(interfaceName, IF_MAX_STRING_SIZE, + &strLen); + } + + OvsConvertWcharToAnsiStr(interfaceName, strLen, ansiIfname, 256); + OVS_LOG_INFO("the interface name is %s", ansiIfname); +#endif if (minMetric > crtRoute.Metric && - (!srcIp || srcIp == crtSrcAddr.Ipv4.sin_addr.S_un.S_addr)) { + (OvsIphIsZero(&srcIp) || OvsIphAddrEquals(&srcIp, &crtSrcAddr))) { status = STATUS_SUCCESS; size_t len = 0; minMetric = crtRoute.Metric; @@ -374,13 +441,24 @@ OvsGetRoute(SOCKADDR_INET *destinationAddress, status = RtlStringCbLengthW(interfaceName, IF_MAX_STRING_SIZE, &len); } - +#ifdef DBG + RtlZeroMemory(ansiIfname, 256); + OvsConvertWcharToAnsiStr(interfaceName, len, ansiIfname, 256); + OVS_LOG_INFO("the found interface name is %s", ansiIfname); +#endif if (gOvsSwitchContext != NULL && NT_SUCCESS(status)) { NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, - &lockState, 0); + &lockState, 0); *vport = OvsFindVportByHvNameW(gOvsSwitchContext, interfaceName, len); +#ifdef DBG + if (*vport) { + OVS_LOG_INFO("match the ovs port ovsName: %s", (*vport)->ovsName); + } else { + OVS_LOG_INFO("not get the ovs port"); + } +#endif NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); } } @@ -396,11 +474,7 @@ OvsGetRoute(SOCKADDR_INET *destinationAddress, static VOID OvsDumpIPNeigh(PMIB_IPNET_ROW2 ipNeigh) { - UINT32 ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr; - - OVS_LOG_INFO("Neigh: %d.%d.%d.%d", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); + OvsDumpIpAddrMsg("Neigh", &(ipNeigh->Address)); OVS_LOG_INFO("MAC Address: %02x:%02x:%02x:%02x:%02x:%02x", ipNeigh->PhysicalAddress[0], ipNeigh->PhysicalAddress[1], @@ -421,11 +495,10 @@ OvsGetIPNeighEntry(PMIB_IPNET_ROW2 ipNeigh) status = GetIpNetEntry2(ipNeigh); if (status != STATUS_SUCCESS) { - UINT32 ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr; - OVS_LOG_INFO("Fail to get ARP entry: %d.%d.%d.%d, status: %x", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, status); - return status; + OvsDumpIpAddrMsgStatus("Fail to get ARP entry", + "Fail to get neighbour entry", + status, &(ipNeigh->Address)); + return status; } if (ipNeigh->State == NlnsReachable || ipNeigh->State == NlnsPermanent) { @@ -445,10 +518,9 @@ OvsResolveIPNeighEntry(PMIB_IPNET_ROW2 ipNeigh) status = ResolveIpNetEntry2(ipNeigh, NULL); if (status != STATUS_SUCCESS) { - UINT32 ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr; - OVS_LOG_INFO("Fail to resolve ARP entry: %d.%d.%d.%d, status: %x", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, status); + OvsDumpIpAddrMsgStatus("Fail to get ARP entry", + "Fail to get neighbour entry", + status, &(ipNeigh->Address)); return status; } @@ -463,7 +535,7 @@ OvsResolveIPNeighEntry(PMIB_IPNET_ROW2 ipNeigh) NTSTATUS OvsGetOrResolveIPNeigh(PMIB_IF_ROW2 ipRow, - UINT32 ipAddr, + SOCKADDR_INET ipAddr, PMIB_IPNET_ROW2 ipNeigh) { NTSTATUS status; @@ -473,8 +545,7 @@ OvsGetOrResolveIPNeigh(PMIB_IF_ROW2 ipRow, RtlZeroMemory(ipNeigh, sizeof (*ipNeigh)); ipNeigh->InterfaceLuid.Value = ipRow->InterfaceLuid.Value; ipNeigh->InterfaceIndex = ipRow->InterfaceIndex; - ipNeigh->Address.si_family = AF_INET; - ipNeigh->Address.Ipv4.sin_addr.s_addr = ipAddr; + OvsCopyIphAddress(&(ipNeigh->Address), &ipAddr); status = OvsGetIPNeighEntry(ipNeigh); @@ -482,8 +553,7 @@ OvsGetOrResolveIPNeigh(PMIB_IF_ROW2 ipRow, RtlZeroMemory(ipNeigh, sizeof (*ipNeigh)); ipNeigh->InterfaceLuid.Value = ipRow->InterfaceLuid.Value; ipNeigh->InterfaceIndex = ipRow->InterfaceIndex; - ipNeigh->Address.si_family = AF_INET; - ipNeigh->Address.Ipv4.sin_addr.s_addr = ipAddr; + OvsCopyIphAddress(&(ipNeigh->Address), &ipAddr); status = OvsResolveIPNeighEntry(ipNeigh); } return status; @@ -714,29 +784,45 @@ OvsChangeCallbackIpRoute(PVOID context, MIB_NOTIFICATION_TYPE notificationType) { UINT32 ipAddr, nextHop; + struct in6_addr *pAddr = NULL, *pNextHop = NULL; + char wszAddr[256] = { 0 }; + char wszNextHop[256] = { 0 }; UNREFERENCED_PARAMETER(context); switch (notificationType) { case MibAddInstance: ASSERT(ipRoute); - ipAddr = ipRoute->DestinationPrefix.Prefix.Ipv4.sin_addr.s_addr; - nextHop = ipRoute->NextHop.Ipv4.sin_addr.s_addr; - - OVS_LOG_INFO("IPRoute: To %d.%d.%d.%d/%d through %d.%d.%d.%d added", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, - ipRoute->DestinationPrefix.PrefixLength, - nextHop & 0xff, (nextHop >> 8) & 0xff, - (nextHop >> 16) & 0xff, (nextHop >> 24) & 0xff); + if (ipRoute->DestinationPrefix.Prefix.si_family == AF_INET) { + ipAddr = ipRoute->DestinationPrefix.Prefix.Ipv4.sin_addr.s_addr; + nextHop = ipRoute->NextHop.Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("IPRoute: To %d.%d.%d.%d/%d through %d.%d.%d.%d added", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, + ipRoute->DestinationPrefix.PrefixLength, + nextHop & 0xff, (nextHop >> 8) & 0xff, + (nextHop >> 16) & 0xff, (nextHop >> 24) & 0xff); + } else if (ipRoute->DestinationPrefix.Prefix.si_family == AF_INET6) { + pAddr = (struct in6_addr *)&(ipRoute->DestinationPrefix.Prefix.Ipv6.sin6_addr); + if (!RtlIpv6AddressToStringA(pAddr, wszAddr)) { + OVS_LOG_INFO("DestinationPrefix Ipv6 Address got failed\n"); + } else { + pNextHop = (struct in6_addr *)&(ipRoute->NextHop.Ipv6.sin6_addr); + if (!RtlIpv6AddressToStringA(pNextHop, wszNextHop)) { + OVS_LOG_INFO("NextHop Ipv6 Address got failed\n"); + } else { + OVS_LOG_INFO("IPRoute: To %s/%d through %s added", + wszAddr, ipRoute->DestinationPrefix.PrefixLength, + wszNextHop); + } + } + } break; case MibParameterNotification: case MibDeleteInstance: { ASSERT(ipRoute); - ipAddr = ipRoute->DestinationPrefix.Prefix.Ipv4.sin_addr.s_addr; - nextHop = ipRoute->NextHop.Ipv4.sin_addr.s_addr; POVS_IPFORWARD_ENTRY ipf; LOCK_STATE_EX lockState; @@ -748,14 +834,34 @@ OvsChangeCallbackIpRoute(PVOID context, } NdisReleaseRWLock(ovsTableLock, &lockState); - OVS_LOG_INFO("IPRoute: To %d.%d.%d.%d/%d through %d.%d.%d.%d %s.", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, - ipRoute->DestinationPrefix.PrefixLength, - nextHop & 0xff, (nextHop >> 8) & 0xff, - (nextHop >> 16) & 0xff, (nextHop >> 24) & 0xff, - notificationType == MibDeleteInstance ? "deleted" : - "modified"); + if (ipRoute->DestinationPrefix.Prefix.si_family == AF_INET) { + ipAddr = ipRoute->DestinationPrefix.Prefix.Ipv4.sin_addr.s_addr; + nextHop = ipRoute->NextHop.Ipv4.sin_addr.s_addr; + + OVS_LOG_INFO("IPRoute: To %d.%d.%d.%d/%d through %d.%d.%d.%d %s.", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, + ipRoute->DestinationPrefix.PrefixLength, + nextHop & 0xff, (nextHop >> 8) & 0xff, + (nextHop >> 16) & 0xff, (nextHop >> 24) & 0xff, + notificationType == MibDeleteInstance ? "deleted" : + "modified"); + } else if (ipRoute->DestinationPrefix.Prefix.si_family == AF_INET6) { + pAddr = (struct in6_addr *)&(ipRoute->DestinationPrefix.Prefix.Ipv6.sin6_addr); + if (!RtlIpv6AddressToStringA(pAddr, wszAddr)) { + OVS_LOG_INFO("DestinationPrefix Ipv6 Address got failed\n"); + } else { + pNextHop = (struct in6_addr *)&(ipRoute->NextHop.Ipv6.sin6_addr); + if (!RtlIpv6AddressToStringA(pNextHop, wszNextHop)) { + OVS_LOG_INFO("NextHop Ipv6 Address got failed\n"); + } else { + OVS_LOG_INFO("IPRoute: To %s/%d through %s %s.", + wszAddr, ipRoute->DestinationPrefix.PrefixLength, + wszNextHop, notificationType == MibDeleteInstance ? "deleted" : + "modified"); + } + } + } break; } @@ -772,7 +878,7 @@ OvsChangeCallbackUnicastIpAddress(PVOID context, PMIB_UNICASTIPADDRESS_ROW unicastRow, MIB_NOTIFICATION_TYPE notificationType) { - UINT32 ipAddr; + SOCKADDR_INET iphAddr = { 0 }; UNREFERENCED_PARAMETER(context); switch (notificationType) { @@ -782,7 +888,7 @@ OvsChangeCallbackUnicastIpAddress(PVOID context, PLIST_ENTRY head, link, next; ASSERT(unicastRow); - ipAddr = unicastRow->Address.Ipv4.sin_addr.s_addr; + OvsCopyIphAddress(&iphAddr, &(unicastRow->Address)); ExAcquireResourceExclusiveLite(&ovsInstanceListLock, TRUE); head = &(ovsInstanceList); @@ -796,14 +902,12 @@ OvsChangeCallbackUnicastIpAddress(PVOID context, OvsCheckInstanceRow(&instance->internalRow, &unicastRow->InterfaceLuid, unicastRow->InterfaceIndex)) { + OvsCopyIphAddress(&(instance->ipAddress), &(unicastRow->Address)); - instance->ipAddress = ipAddr; - - OVS_LOG_INFO("IP Address: %d.%d.%d.%d is %s", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, - notificationType == MibAddInstance ? "added": "modified"); - + OvsDumpIpAddrDesc("IP Address", " is ", + notificationType == MibAddInstance ? + "added": "modified", + &(unicastRow->Address)); ExReleaseResourceLite(&instance->lock); break; } @@ -821,7 +925,7 @@ OvsChangeCallbackUnicastIpAddress(PVOID context, BOOLEAN found = FALSE; ASSERT(unicastRow); - ipAddr = unicastRow->Address.Ipv4.sin_addr.s_addr; + OvsCopyIphAddress(&iphAddr, &(unicastRow->Address)); ExAcquireResourceExclusiveLite(&ovsInstanceListLock, TRUE); head = &(ovsInstanceList); @@ -847,12 +951,10 @@ OvsChangeCallbackUnicastIpAddress(PVOID context, if (found) { NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); - OvsRemoveAllFwdEntriesWithSrc(ipAddr); + OvsRemoveAllFwdEntriesWithSrc(iphAddr); NdisReleaseRWLock(ovsTableLock, &lockState); - OVS_LOG_INFO("IP Address removed: %d.%d.%d.%d", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); + OvsDumpIpAddrMsg("IP Address removed", &(unicastRow->Address)); } break; @@ -891,7 +993,7 @@ OvsRegisterChangeNotification() UINT dummy = 0; - status = NotifyIpInterfaceChange(AF_INET, OvsChangeCallbackIpInterface, + status = NotifyIpInterfaceChange(AF_UNSPEC, OvsChangeCallbackIpInterface, NULL, TRUE, &ipInterfaceNotificationHandle); if (status != STATUS_SUCCESS) { @@ -901,14 +1003,14 @@ OvsRegisterChangeNotification() } /* The CallerContext is dummy and should never be used */ - status = NotifyRouteChange2(AF_INET, OvsChangeCallbackIpRoute, &dummy, + status = NotifyRouteChange2(AF_UNSPEC, OvsChangeCallbackIpRoute, &dummy, TRUE, &ipRouteNotificationHandle); if (status != STATUS_SUCCESS) { OVS_LOG_ERROR("Failed to register IP route change, status: %x.", status); goto register_cleanup; } - status = NotifyUnicastIpAddressChange(AF_INET, + status = NotifyUnicastIpAddressChange(AF_UNSPEC, OvsChangeCallbackUnicastIpAddress, NULL, TRUE, &unicastIPNotificationHandle); @@ -926,53 +1028,53 @@ register_cleanup: static POVS_IPNEIGH_ENTRY -OvsLookupIPNeighEntry(UINT32 ipAddr) +OvsLookupIPNeighEntry(SOCKADDR_INET ipAddr) { PLIST_ENTRY link; - UINT32 hash = OvsJhashWords(&ipAddr, 1, OVS_HASH_BASIS); + + UINT32 hash = OvsJhashIphHdr(&ipAddr); LIST_FORALL(&ovsNeighHashTable[hash & OVS_NEIGH_HASH_TABLE_MASK], link) { POVS_IPNEIGH_ENTRY entry; entry = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, link); - if (entry->ipAddr == ipAddr) { + if (OvsIphAddrEquals(&(entry->ipAddr), &ipAddr)) { return entry; } } return NULL; } - static UINT32 OvsHashIPPrefix(PIP_ADDRESS_PREFIX prefix) { - UINT64 words = (UINT64)prefix->Prefix.Ipv4.sin_addr.s_addr << 32 | - (UINT32)prefix->PrefixLength; - return OvsJhashWords((UINT32 *)&words, 2, OVS_HASH_BASIS); + UINT32 hash = 0; + + if (prefix->Prefix.si_family == AF_INET) { + UINT64 words = (UINT64)prefix->Prefix.Ipv4.sin_addr.s_addr << 32 | + (UINT32)prefix->PrefixLength; + hash = OvsJhashWords((UINT32 *)&words, 2, OVS_HASH_BASIS); + } else if (prefix->Prefix.si_family == AF_INET6) { + UCHAR words[20] = { 0 }; + RtlCopyMemory(words, prefix->Prefix.Ipv6.sin6_addr.u.Byte, + sizeof(prefix->Prefix.Ipv6.sin6_addr.u.Byte)); + *((UINT32*)(&words[16])) = (UINT32)prefix->PrefixLength; + hash = OvsJhashBytes((UINT32 *)words, 5, OVS_HASH_BASIS); + } + return hash; } - static POVS_IPFORWARD_ENTRY OvsLookupIPForwardEntry(PIP_ADDRESS_PREFIX prefix) { - PLIST_ENTRY link; UINT32 hash; - ASSERT(prefix->Prefix.si_family == AF_INET); - - hash = RtlUlongByteSwap(prefix->Prefix.Ipv4.sin_addr.s_addr); - - ASSERT(prefix->PrefixLength >= 32 || - (hash & (((UINT32)1 << (32 - prefix->PrefixLength)) - 1)) == 0); - hash = OvsHashIPPrefix(prefix); LIST_FORALL(&ovsRouteHashTable[hash & OVS_ROUTE_HASH_TABLE_MASK], link) { POVS_IPFORWARD_ENTRY ipfEntry; ipfEntry = CONTAINING_RECORD(link, OVS_IPFORWARD_ENTRY, link); - if (ipfEntry->prefix.PrefixLength == prefix->PrefixLength && - ipfEntry->prefix.Prefix.Ipv4.sin_addr.s_addr == - prefix->Prefix.Ipv4.sin_addr.s_addr) { + if (OvsIphAddrEquals(&ipfEntry->prefix.Prefix, &prefix->Prefix)) { return ipfEntry; } } @@ -981,17 +1083,19 @@ OvsLookupIPForwardEntry(PIP_ADDRESS_PREFIX prefix) static POVS_FWD_ENTRY -OvsLookupIPFwdEntry(UINT32 srcIp, UINT32 dstIp) +OvsLookupIPFwdEntry(SOCKADDR_INET srcIp, SOCKADDR_INET dstIp) { PLIST_ENTRY link; - UINT32 hash = OvsJhashWords(&dstIp, 1, OVS_HASH_BASIS); + UINT32 hash = 0; + hash = OvsJhashIphHdr(&dstIp); LIST_FORALL(&ovsFwdHashTable[hash & OVS_FWD_HASH_TABLE_MASK], link) { POVS_FWD_ENTRY entry; entry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, link); - if (entry->info.dstIpAddr == dstIp && - (!srcIp || entry->info.srcIpAddr == srcIp)) { + + if (OvsIphAddrEquals(&(entry->info.dstIphAddr), &dstIp) && + (OvsIphIsZero(&srcIp) || OvsIphAddrEquals(&(entry->info.srcIphAddr), &srcIp))) { return entry; } } @@ -1000,9 +1104,9 @@ OvsLookupIPFwdEntry(UINT32 srcIp, UINT32 dstIp) NTSTATUS -OvsLookupIPFwdInfo(UINT32 srcIp, - UINT32 dstIp, - POVS_FWD_INFO info) +OvsLookupIPhFwdInfo(SOCKADDR_INET srcIp, + SOCKADDR_INET dstIp, + POVS_FWD_INFO info) { POVS_FWD_ENTRY entry; LOCK_STATE_EX lockState; @@ -1013,13 +1117,13 @@ OvsLookupIPFwdInfo(UINT32 srcIp, if (entry) { RtlCopyMemory(info->value, entry->info.value, sizeof entry->info.value); + OvsCopyIphAddress(&info->dstIphAddr, &(entry->info.dstIphAddr)); status = STATUS_SUCCESS; } NdisReleaseRWLock(ovsTableLock, &lockState); return status; } - static POVS_IPNEIGH_ENTRY OvsCreateIPNeighEntry(PMIB_IPNET_ROW2 ipNeigh, POVS_IPHELPER_INSTANCE instance) @@ -1036,7 +1140,7 @@ OvsCreateIPNeighEntry(PMIB_IPNET_ROW2 ipNeigh, } RtlZeroMemory(entry, sizeof (OVS_IPNEIGH_ENTRY)); - entry->ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr; + OvsCopyIphAddress(&(entry->ipAddr), &(ipNeigh->Address)); KeQuerySystemTime((LARGE_INTEGER *)&timeVal); entry->timeout = timeVal + OVS_IPNEIGH_TIMEOUT; RtlCopyMemory(entry->macAddr, ipNeigh->PhysicalAddress, @@ -1064,7 +1168,7 @@ OvsCreateIPForwardEntry(PMIB_IPFORWARD_ROW2 ipRoute) RtlZeroMemory(entry, sizeof (OVS_IPFORWARD_ENTRY)); RtlCopyMemory(&entry->prefix, &ipRoute->DestinationPrefix, sizeof (IP_ADDRESS_PREFIX)); - entry->nextHop = ipRoute->NextHop.Ipv4.sin_addr.s_addr; + OvsCopyIphAddress(&(entry->nextHop), &(ipRoute->NextHop)); InitializeListHead(&entry->fwdList); return entry; @@ -1198,7 +1302,8 @@ OvsAddIPFwdCache(POVS_FWD_ENTRY fwdEntry, NdisAcquireSpinLock(&ovsIpHelperLock); OvsAddToSortedNeighList(ipn); NdisReleaseSpinLock(&ovsIpHelperLock); - hash = OvsJhashWords(&ipn->ipAddr, 1, OVS_HASH_BASIS); + hash = OvsJhashIphHdr(&ipn->ipAddr); + InsertHeadList(&ovsNeighHashTable[hash & OVS_NEIGH_HASH_TABLE_MASK], &ipn->link); } @@ -1216,15 +1321,15 @@ OvsAddIPFwdCache(POVS_FWD_ENTRY fwdEntry, ipn->refCount++; fwdEntry->ipn = ipn; - hash = OvsJhashWords(&fwdEntry->info.dstIpAddr, 1, OVS_HASH_BASIS); + hash = OvsJhashIphHdr(&(fwdEntry->info.dstIphAddr)); + InsertHeadList(&ovsFwdHashTable[hash & OVS_FWD_HASH_TABLE_MASK], &fwdEntry->link); ovsNumFwdEntries++; } - static VOID -OvsRemoveAllFwdEntriesWithSrc(UINT32 ipAddr) +OvsRemoveAllFwdEntriesWithSrc(SOCKADDR_INET ipAddr) { UINT32 i; PLIST_ENTRY link, next; @@ -1234,14 +1339,14 @@ OvsRemoveAllFwdEntriesWithSrc(UINT32 ipAddr) POVS_FWD_ENTRY fwdEntry; fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, link); - if (fwdEntry->info.srcIpAddr == ipAddr) { + + if (OvsIphAddrEquals(&(fwdEntry->info.srcIphAddr), &ipAddr)) { OvsRemoveFwdEntry(fwdEntry); } } } } - static VOID OvsRemoveIPNeighEntriesWithInstance(POVS_IPHELPER_INSTANCE instance) { @@ -1519,7 +1624,7 @@ OvsEnqueueIpHelperRequest(POVS_IP_HELPER_REQUEST request) NTSTATUS OvsFwdIPHelperRequest(PNET_BUFFER_LIST nbl, UINT32 inPort, - const OvsIPv4TunnelKey *tunnelKey, + const OvsIPTunnelKey *tunnelKey, OvsIPHelperCallback cb, PVOID cbData1, PVOID cbData2) @@ -1540,21 +1645,18 @@ OvsFwdIPHelperRequest(PNET_BUFFER_LIST nbl, request->fwdReq.cb = cb; request->fwdReq.cbData1 = cbData1; request->fwdReq.cbData2 = cbData2; - return OvsEnqueueIpHelperRequest(request); } - static VOID OvsHandleFwdRequest(POVS_IP_HELPER_REQUEST request) { - SOCKADDR_INET dst, src; NTSTATUS status; MIB_IPFORWARD_ROW2 ipRoute; MIB_IPNET_ROW2 ipNeigh; - OVS_FWD_INFO fwdInfo = { 0 }; - UINT32 ipAddr; - UINT32 srcAddr; + OVS_FWD_INFO fwdInfo; + SOCKADDR_INET iphAddr; + SOCKADDR_INET srcAddr; POVS_FWD_ENTRY fwdEntry = NULL; POVS_IPFORWARD_ENTRY ipf = NULL; POVS_IPNEIGH_ENTRY ipn = NULL; @@ -1564,44 +1666,40 @@ OvsHandleFwdRequest(POVS_IP_HELPER_REQUEST request) BOOLEAN newFWD = FALSE; POVS_IPHELPER_INSTANCE instance = NULL; - status = OvsLookupIPFwdInfo(request->fwdReq.tunnelKey.src, - request->fwdReq.tunnelKey.dst, - &fwdInfo); + RtlZeroMemory(&fwdInfo, sizeof(OVS_FWD_INFO)); + + status = OvsLookupIPhFwdInfo(request->fwdReq.tunnelKey.src, + request->fwdReq.tunnelKey.dst, + &fwdInfo); if (status == STATUS_SUCCESS) { goto fwd_handle_nbl; } /* find IPRoute */ - RtlZeroMemory(&dst, sizeof(dst)); - RtlZeroMemory(&src, sizeof(src)); + RtlZeroMemory(&srcAddr, sizeof(srcAddr)); + RtlZeroMemory(&iphAddr, sizeof(iphAddr)); RtlZeroMemory(&ipRoute, sizeof (MIB_IPFORWARD_ROW2)); - dst.si_family = AF_INET; - dst.Ipv4.sin_addr.s_addr = request->fwdReq.tunnelKey.dst; - - status = OvsGetRoute(&dst, &ipRoute, &src, &instance, &fwdInfo.vport, request->fwdReq.tunnelKey.src); - if (request->fwdReq.tunnelKey.src && request->fwdReq.tunnelKey.src != src.Ipv4.sin_addr.s_addr) { - UINT32 tempAddr = dst.Ipv4.sin_addr.s_addr; - OVS_LOG_INFO("Fail to get route to %d.%d.%d.%d, status: %x", - tempAddr & 0xff, (tempAddr >> 8) & 0xff, - (tempAddr >> 16) & 0xff, (tempAddr >> 24) & 0xff, status); + status = OvsGetRoute(&request->fwdReq.tunnelKey.dst, &ipRoute, &srcAddr, + &instance, &fwdInfo.vport, + request->fwdReq.tunnelKey.src); + if (!OvsIphIsZero(&(request->fwdReq.tunnelKey.src)) && + !OvsIphAddrEquals(&(request->fwdReq.tunnelKey.src), &srcAddr)) { + OvsDumpIpAddrDescStatus("Fail to get route to", + status, &(request->fwdReq.tunnelKey.dst)); goto fwd_handle_nbl; } if (status != STATUS_SUCCESS || instance == NULL) { - UINT32 tempAddr = dst.Ipv4.sin_addr.s_addr; - OVS_LOG_INFO("Fail to get route to %d.%d.%d.%d, status: %x", - tempAddr & 0xff, (tempAddr >> 8) & 0xff, - (tempAddr >> 16) & 0xff, (tempAddr >> 24) & 0xff, status); + OvsDumpIpAddrDescStatus("Fail to get route to", + status, &(request->fwdReq.tunnelKey.dst)); goto fwd_handle_nbl; } ExAcquireResourceExclusiveLite(&instance->lock, TRUE); - srcAddr = src.Ipv4.sin_addr.s_addr; - /* find IPNeigh */ - ipAddr = ipRoute.NextHop.Ipv4.sin_addr.s_addr; - if (ipAddr != 0) { + OvsCopyIphAddress(&iphAddr, &(ipRoute.NextHop)); + if (!OvsIphIsZero(&iphAddr)) { NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); - ipn = OvsLookupIPNeighEntry(ipAddr); + ipn = OvsLookupIPNeighEntry(iphAddr); if (ipn) { goto fwd_request_done; } @@ -1610,11 +1708,11 @@ OvsHandleFwdRequest(POVS_IP_HELPER_REQUEST request) RtlZeroMemory(&ipNeigh, sizeof (ipNeigh)); ipNeigh.InterfaceLuid.Value = instance->internalRow.InterfaceLuid.Value; - if (ipAddr == 0) { - ipAddr = request->fwdReq.tunnelKey.dst; + if (OvsIphIsZero(&iphAddr)) { + OvsCopyIphAddress(&iphAddr, &(request->fwdReq.tunnelKey.dst)); } status = OvsGetOrResolveIPNeigh(&instance->internalRow, - ipAddr, &ipNeigh); + iphAddr, &ipNeigh); if (status != STATUS_SUCCESS) { ExReleaseResourceLite(&instance->lock); goto fwd_handle_nbl; @@ -1641,21 +1739,21 @@ fwd_request_done: PLIST_ENTRY link; link = ipf->fwdList.Flink; fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, ipfLink); - if (fwdEntry->info.srcIpAddr != srcAddr) { + if (!OvsIphAddrEquals(&(fwdEntry->info.srcIphAddr), &srcAddr)) { OvsRemoveFwdEntry(fwdEntry); NdisReleaseRWLock(ovsTableLock, &lockState); ExReleaseResourceLite(&instance->lock); status = STATUS_INSUFFICIENT_RESOURCES; goto fwd_handle_nbl; } - srcAddr = fwdEntry->info.srcIpAddr; + OvsCopyIphAddress(&srcAddr, &(fwdEntry->info.srcIphAddr)); } /* * initialize ipn */ if (ipn == NULL) { - ipn = OvsLookupIPNeighEntry(ipAddr); + ipn = OvsLookupIPNeighEntry(iphAddr); if (ipn == NULL) { ipn = OvsCreateIPNeighEntry(&ipNeigh, instance); if (ipn == NULL) { @@ -1671,8 +1769,8 @@ fwd_request_done: /* * initialize fwdEntry */ - fwdInfo.dstIpAddr = request->fwdReq.tunnelKey.dst; - fwdInfo.srcIpAddr = srcAddr; + OvsCopyIphAddress(&fwdInfo.dstIphAddr, &(request->fwdReq.tunnelKey.dst)); + OvsCopyIphAddress(&fwdInfo.srcIphAddr, &srcAddr); RtlCopyMemory(fwdInfo.dstMacAddr, ipn->macAddr, ETH_ADDR_LEN); RtlCopyMemory(fwdInfo.srcMacAddr, instance->internalRow.PhysicalAddress, ETH_ADDR_LEN); @@ -1686,6 +1784,7 @@ fwd_request_done: goto fwd_handle_nbl; } newFWD = TRUE; + if (status == STATUS_SUCCESS) { /* * Cache the result @@ -1710,10 +1809,8 @@ fwd_handle_nbl: ASSERT(ipn && ipn->refCount == 0); OvsFreeMemoryWithTag(ipn, OVS_IPHELPER_POOL_TAG); } - ipAddr = request->fwdReq.tunnelKey.dst; - OVS_LOG_INFO("Fail to handle IP helper request for dst: %d.%d.%d.%d", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); + OvsCopyIphAddress(&iphAddr, &(request->fwdReq.tunnelKey.dst)); + OvsDumpIpAddrMsg("Fail to handle IP helper request for dst", &iphAddr); } if (request->fwdReq.cb) { request->fwdReq.cb(request->fwdReq.nbl, @@ -1727,9 +1824,8 @@ fwd_handle_nbl: OvsFreeMemoryWithTag(request, OVS_IPHELPER_POOL_TAG); } - static VOID -OvsUpdateIPNeighEntry(UINT32 ipAddr, +OvsUpdateIPNeighEntry(SOCKADDR_INET ipAddr, PMIB_IPNET_ROW2 ipNeigh, NTSTATUS status) { @@ -1889,7 +1985,7 @@ OvsStartIpHelper(PVOID data) * IPN */ while (!IsListEmpty(&ovsSortedIPNeighList)) { - UINT32 ipAddr; + SOCKADDR_INET ipAddr; if (context->exit) { goto ip_helper_wait; } @@ -1901,7 +1997,7 @@ OvsStartIpHelper(PVOID data) threadSleepTimeout = (PLARGE_INTEGER)&timeout; break; } - ipAddr = ipn->ipAddr; + RtlCopyMemory(&ipAddr, &ipn->ipAddr, sizeof(ipAddr)); MIB_IPNET_ROW2 ipNeigh; NTSTATUS status; POVS_IPHELPER_INSTANCE instance = ipn->instance; @@ -2107,3 +2203,55 @@ OvsCancelFwdIpHelperRequest(PNET_BUFFER_LIST nbl) OvsFreeMemoryWithTag(req, OVS_IPHELPER_POOL_TAG); } } + +uint32_t +OvsJhashIphHdr(const SOCKADDR_INET *iphAddr) +{ + UINT32 hash = 0; + + if (!iphAddr) return 0; + + if (iphAddr->si_family == AF_INET) { + hash = OvsJhashWords((UINT32*)&iphAddr->Ipv4.sin_addr.s_addr, + 1, OVS_HASH_BASIS); + } else if (iphAddr->si_family == AF_INET6) { + hash = OvsJhashWords((UINT32 *)&(iphAddr->Ipv6.sin6_addr.u.Byte), + 4, OVS_HASH_BASIS); + } + + return hash; +} + +NTSTATUS +OvsConvertWcharToAnsiStr(WCHAR *wStr, size_t wlen, + CHAR* str, + size_t maxStrLen) +{ + ANSI_STRING astr; + UNICODE_STRING ustr; + NTSTATUS status; + size_t size; + + ustr.Buffer = wStr; + ustr.Length = (UINT16)wlen; + ustr.MaximumLength = IF_MAX_STRING_SIZE; + + astr.Buffer = str; + astr.MaximumLength = (UINT16)maxStrLen; + astr.Length = 0; + + size = RtlUnicodeStringToAnsiSize(&ustr); + if (size > maxStrLen) { + return STATUS_BUFFER_OVERFLOW; + } + + status = RtlUnicodeStringToAnsiString(&astr, &ustr, FALSE); + + ASSERT(status == STATUS_SUCCESS); + if (status != STATUS_SUCCESS) { + return status; + } + ASSERT(astr.Length <= (UINT16)maxStrLen); + str[astr.Length] = 0; + return STATUS_SUCCESS; +} diff --git a/datapath-windows/ovsext/IpHelper.h b/datapath-windows/ovsext/IpHelper.h index 25adf6ea2..f6955a06e 100644 --- a/datapath-windows/ovsext/IpHelper.h +++ b/datapath-windows/ovsext/IpHelper.h @@ -46,6 +46,7 @@ * The lock must not raise the IRQL higher than PASSIVE_LEVEL in order for the * route manipulation functions, i.e. GetBestRoute, to work. */ + typedef struct _OVS_IPHELPER_INSTANCE { LIST_ENTRY link; @@ -55,15 +56,15 @@ typedef struct _OVS_IPHELPER_INSTANCE GUID netCfgId; MIB_IF_ROW2 internalRow; MIB_IPINTERFACE_ROW internalIPRow; - UINT32 ipAddress; - + SOCKADDR_INET ipAddress; ERESOURCE lock; } OVS_IPHELPER_INSTANCE, *POVS_IPHELPER_INSTANCE; + typedef struct _OVS_IPNEIGH_ENTRY { UINT8 macAddr[ETH_ADDR_LEN]; UINT16 refCount; - UINT32 ipAddr; + SOCKADDR_INET ipAddr; UINT32 pad; UINT64 timeout; LIST_ENTRY link; @@ -74,7 +75,7 @@ typedef struct _OVS_IPNEIGH_ENTRY { typedef struct _OVS_IPFORWARD_ENTRY { IP_ADDRESS_PREFIX prefix; - UINT32 nextHop; + SOCKADDR_INET nextHop; UINT16 refCount; LIST_ENTRY link; LIST_ENTRY fwdList; @@ -82,14 +83,14 @@ typedef struct _OVS_IPFORWARD_ENTRY { typedef union _OVS_FWD_INFO { struct { - UINT32 dstIpAddr; - UINT32 srcIpAddr; + SOCKADDR_INET dstIphAddr; + SOCKADDR_INET srcIphAddr; UINT8 dstMacAddr[ETH_ADDR_LEN]; UINT8 srcMacAddr[ETH_ADDR_LEN]; UINT32 srcPortNo; POVS_VPORT_ENTRY vport; }; - UINT64 value[4]; + UINT64 value[10]; } OVS_FWD_INFO, *POVS_FWD_INFO; typedef struct _OVS_FWD_ENTRY { @@ -101,7 +102,6 @@ typedef struct _OVS_FWD_ENTRY { LIST_ENTRY ipnLink; } OVS_FWD_ENTRY, *POVS_FWD_ENTRY; - enum { OVS_IP_HELPER_INTERNAL_ADAPTER_UP, OVS_IP_HELPER_INTERNAL_ADAPTER_DOWN, @@ -119,7 +119,7 @@ typedef VOID (*OvsIPHelperCallback)(PNET_BUFFER_LIST nbl, typedef struct _OVS_FWD_REQUEST_INFO { PNET_BUFFER_LIST nbl; UINT32 inPort; - OvsIPv4TunnelKey tunnelKey; + OvsIPTunnelKey tunnelKey; OvsIPHelperCallback cb; PVOID cbData1; PVOID cbData2; @@ -157,7 +157,78 @@ NTSTATUS OvsFwdIPHelperRequest(PNET_BUFFER_LIST nbl, UINT32 inPort, OvsIPHelperCallback cb, PVOID cbData1, PVOID cbData2); -NTSTATUS OvsLookupIPFwdInfo(UINT32 srcIp, UINT32 dstIp, POVS_FWD_INFO info); + VOID OvsCancelFwdIpHelperRequest(PNET_BUFFER_LIST nbl); +NTSTATUS +OvsLookupIPhFwdInfo(SOCKADDR_INET srcIp, SOCKADDR_INET dstIp, + POVS_FWD_INFO info); + +static __inline BOOLEAN +OvsIphAddrEquals(const SOCKADDR_INET *src, const SOCKADDR_INET *dst) +{ + BOOLEAN addrEqual = FALSE; + if (!src || !dst) return FALSE; + + if (src->si_family == AF_INET && + dst->si_family == AF_INET) { + addrEqual = (src->Ipv4.sin_addr.s_addr == dst->Ipv4.sin_addr.s_addr); + } else if(src->si_family == AF_INET6 && + dst->si_family == AF_INET6) { + if (RtlEqualMemory(&src->Ipv6.sin6_addr, + &dst->Ipv6.sin6_addr, + sizeof(src->Ipv6.sin6_addr))) { + addrEqual = TRUE; + } + } + return addrEqual; +} + +/* check if the pointers to SOCKADDR_INET is zero*/ +static __inline BOOLEAN +OvsIphIsZero(const SOCKADDR_INET *ipAddr) +{ + BOOLEAN isZero = FALSE; + UCHAR zeros[16] = { 0 }; + if (!ipAddr) return FALSE; + + if (ipAddr->si_family == AF_INET || + ipAddr->si_family == AF_UNSPEC) { + isZero = (ipAddr->Ipv4.sin_addr.s_addr == 0); + } else if(ipAddr->si_family == AF_INET6) { + if (RtlEqualMemory(&ipAddr->Ipv6.sin6_addr.u.Byte, + &zeros, + sizeof(ipAddr->Ipv6.sin6_addr))) { + isZero = TRUE; + } + } + return isZero; +} + +/* Copy the content from the pointer to SOCKADDR_INET + * To the pointer to SOCKADDR_INET + */ +static __inline void +OvsCopyIphAddress(SOCKADDR_INET *dstAddr, const SOCKADDR_INET *srcAddr) +{ + if (!srcAddr || !dstAddr) return; + + dstAddr->si_family = srcAddr->si_family; + + if (srcAddr->si_family == AF_INET) { + dstAddr->Ipv4.sin_addr.s_addr = srcAddr->Ipv4.sin_addr.s_addr; + } else if (srcAddr->si_family == AF_INET6) { + RtlCopyMemory(&dstAddr->Ipv6, &srcAddr->Ipv6, + sizeof(srcAddr->Ipv6)); + } + return; +} + +/* compute the hash value based on SOCKADDR_INET*/ +uint32_t +OvsJhashIphHdr(const SOCKADDR_INET *iphAddr); + +NTSTATUS +OvsConvertWcharToAnsiStr(WCHAR* wStr, size_t wlen, + CHAR* str, size_t maxStrLen); #endif /* __IP_HELPER_H_ */ diff --git a/datapath-windows/ovsext/Offload.c b/datapath-windows/ovsext/Offload.c index bdd9a180e..1fd7ebaa6 100644 --- a/datapath-windows/ovsext/Offload.c +++ b/datapath-windows/ovsext/Offload.c @@ -542,6 +542,7 @@ OvsValidateIPChecksum(PNET_BUFFER_LIST curNbl, const IPHdr *ipHdr; if (!hdrInfo->isIPv4) { + /*need check add v6 check logic*/ return NDIS_STATUS_SUCCESS; } @@ -610,7 +611,7 @@ OvsValidateUDPChecksum(PNET_BUFFER_LIST curNbl, BOOLEAN udpCsumZero) NDIS_STATUS OvsCalculateUDPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER curNb, - IPHdr *ipHdr, + EthHdr *ethHdr, UDPHdr *udpHdr, UINT32 packetLength, POVS_PACKET_HDR_INFO layers) @@ -622,18 +623,42 @@ OvsCalculateUDPChecksum(PNET_BUFFER_LIST curNbl, /* Next check if UDP checksum has been calculated. */ if (!csumInfo.Receive.UdpChecksumSucceeded) { - UINT32 l4Payload; + UINT32 l4Payload = packetLength - layers->l4Offset; checkSum = udpHdr->check; - l4Payload = packetLength - layers->l4Offset; - udpHdr->check = 0; - udpHdr->check = - IPPseudoChecksum((UINT32 *)&ipHdr->saddr, - (UINT32 *)&ipHdr->daddr, - IPPROTO_UDP, (UINT16)l4Payload); - udpHdr->check = CalculateChecksumNB(curNb, (UINT16)l4Payload, - layers->l4Offset); + switch (ethHdr->Type) { + case ETH_TYPE_IPV4_NBO: { + IPHdr *ipHdr = (IPHdr *)((PCHAR)ethHdr + layers->l3Offset); + + udpHdr->check = 0; + udpHdr->check = + IPPseudoChecksum((UINT32 *)&ipHdr->saddr, + (UINT32 *)&ipHdr->daddr, + IPPROTO_UDP, (UINT16)l4Payload); + udpHdr->check = + CalculateChecksumNB(curNb, (UINT16)l4Payload, + layers->l4Offset); + break; + } + case ETH_TYPE_IPV6_NBO: { + IPv6Hdr *ipv6Hdr = (IPv6Hdr *)((PCHAR)ethHdr + layers->l3Offset); + + udpHdr->check = 0; + udpHdr->check = + IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->saddr, + (UINT32 *)&ipv6Hdr->daddr, + IPPROTO_UDP, (UINT16)l4Payload); + udpHdr->check = + CalculateChecksumNB(curNb, (UINT16)l4Payload, + layers->l4Offset); + break; + } + default: + OVS_LOG_ERROR("Invalid eth type: %d\n", ethHdr->Type); + ASSERT(!"Invalid eth type"); + } + if (checkSum != udpHdr->check) { OVS_LOG_ERROR("UDP checksum incorrect, expected %u, got %u", udpHdr->check, checkSum); @@ -646,8 +671,6 @@ OvsCalculateUDPChecksum(PNET_BUFFER_LIST curNbl, return NDIS_STATUS_SUCCESS; } - - /* * OvsApplySWChecksumOnNB -- * diff --git a/datapath-windows/ovsext/Offload.h b/datapath-windows/ovsext/Offload.h index b2a0b6faa..c389e255a 100644 --- a/datapath-windows/ovsext/Offload.h +++ b/datapath-windows/ovsext/Offload.h @@ -35,12 +35,13 @@ NDIS_STATUS OvsValidateIPChecksum(PNET_BUFFER_LIST curNbl, POVS_PACKET_HDR_INFO hdrInfo); NDIS_STATUS OvsValidateUDPChecksum(PNET_BUFFER_LIST curNbl, BOOLEAN udpCsumZero); -NDIS_STATUS OvsCalculateUDPChecksum(PNET_BUFFER_LIST curNbl, - PNET_BUFFER curNb, - IPHdr *ipHdr, - UDPHdr *udpHdr, - UINT32 packetLength, - POVS_PACKET_HDR_INFO layers); +NDIS_STATUS +OvsCalculateUDPChecksum(PNET_BUFFER_LIST curNbl, + PNET_BUFFER curNb, + EthHdr *ethHdr, + UDPHdr *udpHdr, + UINT32 packetLength, + POVS_PACKET_HDR_INFO layers); ULONG OVSGetTcpMSS(PNET_BUFFER_LIST nbl); diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c index 0220a6e80..ebdebb690 100644 --- a/datapath-windows/ovsext/Stt.c +++ b/datapath-windows/ovsext/Stt.c @@ -46,7 +46,7 @@ static OVS_STT_THREAD_CTX sttDefragThreadCtx; static NDIS_STATUS OvsDoEncapStt(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl, - const OvsIPv4TunnelKey *tunKey, + const OvsIPTunnelKey *tunKey, const POVS_FWD_INFO fwdInfo, POVS_PACKET_HDR_INFO layers, POVS_SWITCH_CONTEXT switchContext, @@ -104,7 +104,7 @@ OvsCleanupSttTunnel(POVS_VPORT_ENTRY vport) NDIS_STATUS OvsEncapStt(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, + OvsIPTunnelKey *tunKey, POVS_SWITCH_CONTEXT switchContext, POVS_PACKET_HDR_INFO layers, PNET_BUFFER_LIST *newNbl, @@ -114,7 +114,13 @@ OvsEncapStt(POVS_VPORT_ENTRY vport, NDIS_STATUS status; UNREFERENCED_PARAMETER(switchContext); - status = OvsLookupIPFwdInfo(tunKey->src, tunKey->dst, &fwdInfo); + + if (tunKey->dst.si_family != AF_INET) { + /*V6 tunnel support will be supported later*/ + return NDIS_STATUS_FAILURE; + } + + status = OvsLookupIPhFwdInfo(tunKey->src, tunKey->dst, &fwdInfo); if (status != STATUS_SUCCESS) { OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL); /* @@ -140,7 +146,7 @@ OvsEncapStt(POVS_VPORT_ENTRY vport, NDIS_STATUS OvsDoEncapStt(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl, - const OvsIPv4TunnelKey *tunKey, + const OvsIPTunnelKey *tunKey, const POVS_FWD_INFO fwdInfo, POVS_PACKET_HDR_INFO layers, POVS_SWITCH_CONTEXT switchContext, @@ -301,8 +307,8 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, outerIpHdr->ttl = tunKey->ttl? tunKey->ttl : 64; outerIpHdr->protocol = IPPROTO_TCP; outerIpHdr->check = 0; - outerIpHdr->saddr = fwdInfo->srcIpAddr; - outerIpHdr->daddr = tunKey->dst; + outerIpHdr->saddr = fwdInfo->srcIphAddr.Ipv4.sin_addr.s_addr; + outerIpHdr->daddr = tunKey->dst.Ipv4.sin_addr.s_addr; /* L4 header */ RtlZeroMemory(outerTcpHdr, sizeof *outerTcpHdr); @@ -370,8 +376,8 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, */ outerIpHdr->check = IPChecksum((UINT8 *)outerIpHdr, sizeof *outerIpHdr, 0); - outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr, - (uint32 *)&tunKey->dst, + outerTcpHdr->check = IPPseudoChecksum((UINT32*)&fwdInfo->srcIphAddr.Ipv4.sin_addr.s_addr, + (UINT32*)&tunKey->dst.Ipv4.sin_addr.s_addr, IPPROTO_TCP, (uint16)0); lsoInfo.Value = 0; @@ -382,8 +388,8 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, NET_BUFFER_LIST_INFO(curNbl, TcpLargeSendNetBufferListInfo) = lsoInfo.Value; } else { - outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr, - (uint32 *) &tunKey->dst, + outerTcpHdr->check = IPPseudoChecksum((UINT32*)&fwdInfo->srcIphAddr.Ipv4.sin_addr.s_addr, + (UINT32*)&tunKey->dst.Ipv4.sin_addr.s_addr, IPPROTO_TCP, (uint16) tcpChksumLen); } @@ -912,7 +918,7 @@ OvsDecapSetOffloads(PNET_BUFFER_LIST *curNbl, NDIS_STATUS OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, + OvsIPTunnelKey *tunKey, PNET_BUFFER_LIST *newNbl) { NDIS_STATUS status; @@ -925,6 +931,12 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, UINT32 advanceCnt, hdrLen; OVS_PACKET_HDR_INFO layers = { 0 }; + + if (tunKey->dst.si_family != AF_INET) { + /*V6 tunnel support will be supported later*/ + return NDIS_STATUS_FAILURE; + } + status = OvsExtractLayers(curNbl, &layers); if (status != NDIS_STATUS_SUCCESS) { return status; @@ -1004,8 +1016,10 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, ASSERT(sttHdr); /* Initialize the tunnel key */ - tunKey->dst = ipHdr->daddr; - tunKey->src = ipHdr->saddr; + tunKey->dst.Ipv4.sin_addr.s_addr = ipHdr->daddr; + tunKey->dst.si_family = AF_INET; + tunKey->src.Ipv4.sin_addr.s_addr = ipHdr->saddr; + tunKey->src.si_family = AF_INET; tunKey->tunnelId = sttHdr->key; tunKey->flags = OVS_TNL_F_KEY; tunKey->tos = ipHdr->tos; diff --git a/datapath-windows/ovsext/Stt.h b/datapath-windows/ovsext/Stt.h index 2999dcf99..5e77f96c9 100644 --- a/datapath-windows/ovsext/Stt.h +++ b/datapath-windows/ovsext/Stt.h @@ -92,7 +92,7 @@ VOID OvsCleanupSttTunnel(POVS_VPORT_ENTRY vport); NDIS_STATUS OvsEncapStt(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, + OvsIPTunnelKey *tunKey, POVS_SWITCH_CONTEXT switchContext, POVS_PACKET_HDR_INFO layers, PNET_BUFFER_LIST *newNbl, @@ -101,7 +101,7 @@ NDIS_STATUS OvsEncapStt(POVS_VPORT_ENTRY vport, NDIS_STATUS OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, + OvsIPTunnelKey *tunKey, PNET_BUFFER_LIST *newNbl); NTSTATUS OvsInitSttDefragmentation(); diff --git a/datapath-windows/ovsext/Tunnel.c b/datapath-windows/ovsext/Tunnel.c index 5d1be80f4..5725d275a 100644 --- a/datapath-windows/ovsext/Tunnel.c +++ b/datapath-windows/ovsext/Tunnel.c @@ -205,7 +205,7 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, OVS_TUNNEL_PENDED_PACKET *packet) { NTSTATUS status; - OvsIPv4TunnelKey tunnelKey; + OvsIPTunnelKey tunKey = {0}; NET_BUFFER *pNb; ULONG sendCompleteFlags = 0; BOOLEAN dispatch; @@ -220,7 +220,7 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, ASSERT(gOvsSwitchContext); /* Fill the tunnel key */ - status = OvsSlowPathDecapVxlan(pNbl, &tunnelKey); + status = OvsSlowPathDecapVxlan(pNbl, &tunKey); if(!NT_SUCCESS(status)) { goto dropit; @@ -276,7 +276,7 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, SendFlags |= NDIS_SEND_FLAGS_DISPATCH_LEVEL; vport = OvsFindTunnelVportByDstPortAndType(gOvsSwitchContext, - htons(tunnelKey.dst_port), + htons(tunKey.dst_port), OVS_VPORT_TYPE_VXLAN); if (vport == NULL){ @@ -288,7 +288,7 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, portNo = vport->portNo; - status = OvsExtractFlow(pNbl, portNo, &key, &layers, &tunnelKey); + status = OvsExtractFlow(pNbl, portNo, &key, &layers, &tunKey); if (status != NDIS_STATUS_SUCCESS) { goto unlockAndDrop; } diff --git a/datapath-windows/ovsext/User.c b/datapath-windows/ovsext/User.c index ee0e38d99..c4563b28b 100644 --- a/datapath-windows/ovsext/User.c +++ b/datapath-windows/ovsext/User.c @@ -482,7 +482,8 @@ OvsExecuteDpIoctl(OvsPacketExecute *execute) } ndisStatus = OvsExtractFlow(pNbl, execute->inPort, &key, &layers, - tempTunKey.tunKey.dst == 0 ? NULL : &tempTunKey.tunKey); + OvsIphIsZero(&tempTunKey.tunKey.dst)? NULL : + &tempTunKey.tunKey); if (ndisStatus != NDIS_STATUS_SUCCESS) { /* Invalid network header */ @@ -847,7 +848,7 @@ OvsCreateAndAddPackets(PVOID userData, static __inline UINT32 OvsGetUpcallMsgSize(PVOID userData, UINT32 userDataLen, - OvsIPv4TunnelKey *tunnelKey, + OvsIPTunnelKey *tunnelKey, UINT32 payload) { UINT32 size = NLMSG_ALIGN(sizeof(struct ovs_header)) + @@ -1013,7 +1014,7 @@ OvsCreateQueueNlPacket(PVOID userData, UINT32 cmd, POVS_VPORT_ENTRY vport, OvsFlowKey *key, - OvsIPv4TunnelKey *tunnelKey, + OvsIPTunnelKey *tunnelKey, PNET_BUFFER_LIST nbl, PNET_BUFFER nb, BOOLEAN isRecv, diff --git a/datapath-windows/ovsext/User.h b/datapath-windows/ovsext/User.h index ccca0ba5f..ca27827d6 100644 --- a/datapath-windows/ovsext/User.h +++ b/datapath-windows/ovsext/User.h @@ -75,7 +75,7 @@ POVS_PACKET_QUEUE_ELEM OvsCreateQueueNlPacket(PVOID userData, UINT32 cmd, POVS_VPORT_ENTRY vport, OvsFlowKey *key, - OvsIPv4TunnelKey *tunnelKey, + OvsIPTunnelKey *tunnelKey, PNET_BUFFER_LIST nbl, PNET_BUFFER nb, BOOLEAN isRecv, diff --git a/datapath-windows/ovsext/Vxlan.c b/datapath-windows/ovsext/Vxlan.c index 04df9f6c9..d2c7a4a46 100644 --- a/datapath-windows/ovsext/Vxlan.c +++ b/datapath-windows/ovsext/Vxlan.c @@ -173,7 +173,7 @@ OvsCleanupVxlanTunnel(PIRP irp, static __inline NDIS_STATUS OvsDoEncapVxlan(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, + OvsIPTunnelKey *tunKey, POVS_FWD_INFO fwdInfo, POVS_PACKET_HDR_INFO layers, POVS_SWITCH_CONTEXT switchContext, @@ -188,10 +188,15 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport, UDPHdr *udpHdr; VXLANHdr *vxlanHdr; POVS_VXLAN_VPORT vportVxlan; - UINT32 headRoom = OvsGetVxlanTunHdrSize(); UINT32 packetLength; ULONG mss = 0; NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; + UINT32 headRoom = + OvsGetVxlanTunHdrSize(fwdInfo->dstIphAddr.si_family == AF_INET ? + TRUE : FALSE); + ASSERT(OvsIphAddrEquals(&tunKey->dst, &fwdInfo->dstIphAddr)); + ASSERT(OvsIphAddrEquals(&tunKey->src, &fwdInfo->srcIphAddr) || + OvsIphIsZero(&tunKey->src)); curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); packetLength = NET_BUFFER_DATA_LENGTH(curNb); @@ -265,54 +270,65 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport, ethHdr->Type = htons(ETH_TYPE_IPV4); /* IP header */ - ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); - - ipHdr->ihl = sizeof *ipHdr / 4; - ipHdr->version = IPPROTO_IPV4; - ipHdr->tos = tunKey->tos; - ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr); - ipHdr->id = (uint16)atomic_add64(&vportVxlan->ipId, - NET_BUFFER_DATA_LENGTH(curNb)); - ipHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ? - IP_DF_NBO : 0; - ipHdr->ttl = tunKey->ttl ? tunKey->ttl : VXLAN_DEFAULT_TTL; - ipHdr->protocol = IPPROTO_UDP; - ASSERT(tunKey->dst == fwdInfo->dstIpAddr); - ASSERT(tunKey->src == fwdInfo->srcIpAddr || tunKey->src == 0); - ipHdr->saddr = fwdInfo->srcIpAddr; - ipHdr->daddr = fwdInfo->dstIpAddr; - - ipHdr->check = 0; - - /* UDP header */ - udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr); - udpHdr->source = htons(tunKey->flow_hash | MAXINT16); - udpHdr->dest = tunKey->dst_port ? tunKey->dst_port : - htons(vportVxlan->dstPort); - udpHdr->len = htons(NET_BUFFER_DATA_LENGTH(curNb) - headRoom + - sizeof *udpHdr + sizeof *vxlanHdr); - - if (tunKey->flags & OVS_TNL_F_CSUM) { - udpHdr->check = IPPseudoChecksum(&ipHdr->saddr, &ipHdr->daddr, - IPPROTO_UDP, ntohs(udpHdr->len)); - } else { - udpHdr->check = 0; - } - - /* VXLAN header */ - vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof *udpHdr); - vxlanHdr->flags1 = 0; - vxlanHdr->locallyReplicate = 0; - vxlanHdr->flags2 = 0; - vxlanHdr->reserved1 = 0; - vxlanHdr->vxlanID = VXLAN_TUNNELID_TO_VNI(tunKey->tunnelId); - vxlanHdr->instanceID = 1; - vxlanHdr->reserved2 = 0; + if (fwdInfo->dstIphAddr.si_family == AF_INET) { + ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); + + ipHdr->ihl = sizeof *ipHdr / 4; + ipHdr->version = IPPROTO_IPV4; + ipHdr->tos = tunKey->tos; + ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr); + ipHdr->id = (uint16)atomic_add64(&vportVxlan->ipId, + NET_BUFFER_DATA_LENGTH(curNb)); + ipHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ? + IP_DF_NBO : 0; + ipHdr->ttl = tunKey->ttl ? tunKey->ttl : VXLAN_DEFAULT_TTL; + ipHdr->protocol = IPPROTO_UDP; + ASSERT(OvsIphAddrEquals(&tunKey->dst, &fwdInfo->dstIphAddr)); + ASSERT(OvsIphAddrEquals(&tunKey->src, &fwdInfo->srcIphAddr) || + OvsIphIsZero(&tunKey->src)); + + ipHdr->saddr = fwdInfo->srcIphAddr.Ipv4.sin_addr.s_addr; + ipHdr->daddr = fwdInfo->dstIphAddr.Ipv4.sin_addr.s_addr; + + ipHdr->check = 0; + + /* UDP header */ + udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr); + udpHdr->source = htons(tunKey->flow_hash | MAXINT16); + udpHdr->dest = tunKey->dst_port ? tunKey->dst_port : + htons(vportVxlan->dstPort); + udpHdr->len = htons(NET_BUFFER_DATA_LENGTH(curNb) - headRoom + + sizeof *udpHdr + sizeof *vxlanHdr); + + if (tunKey->flags & OVS_TNL_F_CSUM) { + udpHdr->check = IPPseudoChecksum(&ipHdr->saddr, &ipHdr->daddr, + IPPROTO_UDP, ntohs(udpHdr->len)); + } else { + udpHdr->check = 0; + } + + /* VXLAN header */ + vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof *udpHdr); + vxlanHdr->flags1 = 0; + vxlanHdr->locallyReplicate = 0; + vxlanHdr->flags2 = 0; + vxlanHdr->reserved1 = 0; + vxlanHdr->vxlanID = VXLAN_TUNNELID_TO_VNI(tunKey->tunnelId); + vxlanHdr->instanceID = 1; + vxlanHdr->reserved2 = 0; + } else { + status = NDIS_STATUS_FAILURE; + goto ret_error; + } } csumInfo.Value = 0; - csumInfo.Transmit.IpHeaderChecksum = 1; - csumInfo.Transmit.IsIPv4 = 1; + if (fwdInfo->dstIphAddr.si_family == AF_INET) { + csumInfo.Transmit.IpHeaderChecksum = 1; + csumInfo.Transmit.IsIPv4 = 1; + } else { + csumInfo.Transmit.IsIPv6 = 1; + } if (tunKey->flags & OVS_TNL_F_CSUM) { csumInfo.Transmit.UdpChecksum = 1; } @@ -339,7 +355,7 @@ ret_error: NDIS_STATUS OvsEncapVxlan(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, + OvsIPTunnelKey *tunKey, POVS_SWITCH_CONTEXT switchContext, POVS_PACKET_HDR_INFO layers, PNET_BUFFER_LIST *newNbl, @@ -348,7 +364,12 @@ OvsEncapVxlan(POVS_VPORT_ENTRY vport, NTSTATUS status; OVS_FWD_INFO fwdInfo; - status = OvsLookupIPFwdInfo(tunKey->src, tunKey->dst, &fwdInfo); + if (tunKey->dst.si_family != AF_INET) { + /*V6 tunnel support will be supported later*/ + return NDIS_STATUS_FAILURE; + } + + status = OvsLookupIPhFwdInfo(tunKey->src, tunKey->dst, &fwdInfo); if (status != STATUS_SUCCESS) { OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL); /* @@ -377,7 +398,7 @@ OvsEncapVxlan(POVS_VPORT_ENTRY vport, NDIS_STATUS OvsDecapVxlan(POVS_SWITCH_CONTEXT switchContext, PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, + OvsIPTunnelKey *tunKey, PNET_BUFFER_LIST *newNbl) { PNET_BUFFER curNb; @@ -391,6 +412,11 @@ OvsDecapVxlan(POVS_SWITCH_CONTEXT switchContext, NDIS_STATUS status; OVS_PACKET_HDR_INFO layers = { 0 }; + if (tunKey->dst.si_family != AF_INET) { + /*V6 tunnel support will be supported later*/ + return NDIS_STATUS_FAILURE; + } + status = OvsExtractLayers(curNbl, &layers); if (status != NDIS_STATUS_SUCCESS) { return status; @@ -427,13 +453,29 @@ OvsDecapVxlan(POVS_SWITCH_CONTEXT switchContext, } ethHdr = (EthHdr *)bufferStart; - /* XXX: Handle IP options. */ - ipHdr = (IPHdr *)(bufferStart + layers.l3Offset); - tunKey->src = ipHdr->saddr; - tunKey->dst = ipHdr->daddr; - tunKey->tos = ipHdr->tos; - tunKey->ttl = ipHdr->ttl; - tunKey->pad = 0; + if (ethHdr->Type == ETH_TYPE_IPV4_NBO) { + /* XXX: Handle IP options. */ + ipHdr = (IPHdr *)(bufferStart + layers.l3Offset); + tunKey->src.Ipv4.sin_addr.s_addr = ipHdr->saddr; + tunKey->src.si_family = AF_INET; + tunKey->dst.Ipv4.sin_addr.s_addr = ipHdr->daddr; + tunKey->dst.si_family = AF_INET; + tunKey->tos = ipHdr->tos; + tunKey->ttl = ipHdr->ttl; + tunKey->pad = 0; + } else { + IPv6Hdr *ipv6Hdr; + ASSERT(ethHdr->Type == ETH_TYPE_IPV6_NBO); + ipv6Hdr = (IPv6Hdr *)((PCHAR)ethHdr + sizeof(*ethHdr)); + RtlCopyMemory(&tunKey->src.Ipv6.sin6_addr, &ipv6Hdr->saddr, sizeof(ipv6Hdr->saddr)); + tunKey->src.si_family = AF_INET6; + RtlCopyMemory(&tunKey->dst.Ipv6.sin6_addr, &ipv6Hdr->daddr, sizeof(ipv6Hdr->saddr)); + tunKey->dst.si_family = AF_INET6; + tunKey->tos = (ipv6Hdr->priority << 4) | + ((ipv6Hdr->flow_lbl[0] & 0xF0) >> 4); + tunKey->ttl = ipv6Hdr->hop_limit; + tunKey->pad = 0; + } udpHdr = (UDPHdr *)(bufferStart + layers.l4Offset); /* Validate if NIC has indicated checksum failure. */ @@ -445,7 +487,7 @@ OvsDecapVxlan(POVS_SWITCH_CONTEXT switchContext, /* Calculate and verify UDP checksum if NIC didn't do it. */ if (udpHdr->check != 0) { tunKey->flags |= OVS_TNL_F_CSUM; - status = OvsCalculateUDPChecksum(curNbl, curNb, ipHdr, udpHdr, + status = OvsCalculateUDPChecksum(curNbl, curNb, ethHdr, udpHdr, packetLength, &layers); if (status != NDIS_STATUS_SUCCESS) { goto dropNbl; @@ -475,7 +517,7 @@ dropNbl: NDIS_STATUS OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, - OvsIPv4TunnelKey *tunnelKey) + OvsIPTunnelKey *tunnelKey) { NDIS_STATUS status = NDIS_STATUS_FAILURE; UDPHdr udpStorage; @@ -514,8 +556,10 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, &VxlanHeaderBuffer); if (VxlanHeader) { - tunnelKey->src = nh->saddr; - tunnelKey->dst = nh->daddr; + tunnelKey->src.Ipv4.sin_addr.s_addr = nh->saddr; + tunnelKey->src.si_family = AF_INET; + tunnelKey->dst.Ipv4.sin_addr.s_addr = nh->daddr; + tunnelKey->dst.si_family = AF_INET; tunnelKey->ttl = nh->ttl; tunnelKey->tos = nh->tos; if (VxlanHeader->instanceID) { diff --git a/datapath-windows/ovsext/Vxlan.h b/datapath-windows/ovsext/Vxlan.h index e2fb2dc08..1cd35f6ae 100644 --- a/datapath-windows/ovsext/Vxlan.h +++ b/datapath-windows/ovsext/Vxlan.h @@ -62,11 +62,11 @@ NTSTATUS OvsCleanupVxlanTunnel(PIRP irp, PVOID tunnelContext); NDIS_STATUS OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, - OvsIPv4TunnelKey *tunnelKey); + OvsIPTunnelKey *tunnelKey); NDIS_STATUS OvsEncapVxlan(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, + OvsIPTunnelKey *tunKey, POVS_SWITCH_CONTEXT switchContext, POVS_PACKET_HDR_INFO layers, PNET_BUFFER_LIST *newNbl, @@ -74,15 +74,15 @@ NDIS_STATUS OvsEncapVxlan(POVS_VPORT_ENTRY vport, NDIS_STATUS OvsDecapVxlan(POVS_SWITCH_CONTEXT switchContext, PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, + OvsIPTunnelKey *tunKey, PNET_BUFFER_LIST *newNbl); static __inline UINT32 -OvsGetVxlanTunHdrSize(VOID) +OvsGetVxlanTunHdrSize(BOOLEAN isIpv4) { /* XXX: Can L2 include VLAN at all? */ - return sizeof (EthHdr) + sizeof (IPHdr) + sizeof (UDPHdr) + - sizeof (VXLANHdr); + return sizeof(EthHdr) + (isIpv4 ? sizeof(IPHdr) : sizeof(IPv6Hdr)) + + sizeof (UDPHdr) + sizeof (VXLANHdr); } static __inline UINT32 -- cgit v1.2.1