summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--datapath-windows/ovsext/Datapath.c456
-rw-r--r--datapath-windows/ovsext/Datapath.h25
-rw-r--r--datapath-windows/ovsext/ovsext.vcxproj2
3 files changed, 476 insertions, 7 deletions
diff --git a/datapath-windows/ovsext/Datapath.c b/datapath-windows/ovsext/Datapath.c
index 3fa56ebcf..3bb2a2a32 100644
--- a/datapath-windows/ovsext/Datapath.c
+++ b/datapath-windows/ovsext/Datapath.c
@@ -23,7 +23,7 @@
#if defined OVS_USE_NL_INTERFACE && OVS_USE_NL_INTERFACE == 1
#include "precomp.h"
-#include "OvsDatapath.h"
+#include "Datapath.h"
#include "OvsJhash.h"
#include "OvsSwitch.h"
#include "OvsVport.h"
@@ -42,6 +42,153 @@
#define NETLINK_FAMILY_NAME_LEN 48
+
+/*
+ * Netlink messages are grouped by family (aka type), and each family supports
+ * a set of commands, and can be passed both from kernel -> userspace or
+ * vice-versa. To call into the kernel, userspace uses a device operation which
+ * is outside of a netlink message.
+ *
+ * Each command results in the invocation of a handler function to implement the
+ * request functionality.
+ *
+ * Expectedly, only certain combinations of (device operation, netlink family,
+ * command) are valid.
+ *
+ * Here, we implement the basic infrastructure to perform validation on the
+ * incoming message, version checking, and also to invoke the corresponding
+ * handler to do the heavy-lifting.
+ */
+
+/*
+ * Handler for a given netlink command. Not all the parameters are used by all
+ * the handlers.
+ */
+typedef NTSTATUS (*NetlinkCmdHandler)(PIRP irp, PFILE_OBJECT fileObject,
+ PVOID inputBuffer, UINT32 inputLength,
+ PVOID outputBuffer, UINT32 outputLength,
+ UINT32 *replyLen);
+
+typedef struct _NETLINK_CMD {
+ UINT16 cmd;
+ NetlinkCmdHandler handler;
+ UINT32 supportedDevOp; /* Supported device operations. */
+} NETLINK_CMD, *PNETLINK_CMD;
+
+/* A netlink family is a group of commands. */
+typedef struct _NETLINK_FAMILY {
+ CHAR *name;
+ UINT32 id;
+ UINT16 version;
+ UINT16 maxAttr;
+ NETLINK_CMD *cmds; /* Array of netlink commands and handlers. */
+ UINT16 opsCount;
+} NETLINK_FAMILY, *PNETLINK_FAMILY;
+
+/*
+ * Device operations to tag netlink commands with. This is a bitmask since it is
+ * possible that a particular command can be invoked via different device
+ * operations.
+ */
+#define OVS_READ_DEV_OP (1 << 0)
+#define OVS_WRITE_DEV_OP (1 << 1)
+#define OVS_TRANSACTION_DEV_OP (1 << 2)
+
+/* Handlers for the various netlink commands. */
+static NTSTATUS OvsGetPidCmdHandler(PIRP irp, PFILE_OBJECT fileObject,
+ PVOID inputBuffer, UINT32 inputLength,
+ PVOID outputBuffer, UINT32 outputLength,
+ UINT32 *replyLen);
+
+/*
+ * The various netlink families, along with the supported commands. Most of
+ * these families and commands are part of the openvswitch specification for a
+ * netlink datapath. In addition, each platform can implement a few families
+ * and commands as extensions.
+ */
+
+/* Netlink control family: this is a Windows specific family. */
+NETLINK_CMD nlControlFamilyCmdOps[] = {
+ { OVS_CTRL_CMD_WIN_GET_PID, OvsGetPidCmdHandler, OVS_TRANSACTION_DEV_OP, }
+};
+
+NETLINK_FAMILY nlControlFamilyOps = {
+ OVS_WIN_CONTROL_FAMILY,
+ OVS_WIN_NL_CTRL_FAMILY_ID,
+ OVS_WIN_CONTROL_VERSION,
+ OVS_WIN_CONTROL_ATTR_MAX,
+ nlControlFamilyCmdOps,
+ ARRAY_SIZE(nlControlFamilyCmdOps)
+};
+
+
+
+/* Netlink packet family. */
+/* XXX: Add commands here. */
+NETLINK_FAMILY nlPacketFamilyOps = {
+ OVS_PACKET_FAMILY,
+ OVS_WIN_NL_PACKET_FAMILY_ID,
+ OVS_PACKET_VERSION,
+ OVS_PACKET_ATTR_MAX,
+ NULL, /* XXX: placeholder. */
+ 0
+};
+
+/* Netlink datapath family. */
+/* XXX: Add commands here. */
+NETLINK_FAMILY nlDatapathFamilyOps = {
+ OVS_DATAPATH_FAMILY,
+ OVS_WIN_NL_DATAPATH_FAMILY_ID,
+ OVS_DATAPATH_VERSION,
+ OVS_DP_ATTR_MAX,
+ NULL, /* XXX: placeholder. */
+ 0
+};
+
+/* Netlink vport family. */
+/* XXX: Add commands here. */
+NETLINK_FAMILY nlVportFamilyOps = {
+ OVS_VPORT_FAMILY,
+ OVS_WIN_NL_VPORT_FAMILY_ID,
+ OVS_VPORT_VERSION,
+ OVS_VPORT_ATTR_MAX,
+ NULL, /* XXX: placeholder. */
+ 0
+};
+
+/* Netlink flow family. */
+/* XXX: Add commands here. */
+NETLINK_FAMILY nlFLowFamilyOps = {
+ OVS_FLOW_FAMILY,
+ OVS_WIN_NL_FLOW_FAMILY_ID,
+ OVS_FLOW_VERSION,
+ OVS_FLOW_ATTR_MAX,
+ NULL, /* XXX: placeholder. */
+ 0
+};
+
+static NTSTATUS
+MapIrpOutputBuffer(PIRP irp,
+ UINT32 bufferLength,
+ UINT32 requiredLength,
+ PVOID *buffer);
+static NTSTATUS
+ValidateNetlinkCmd(UINT32 devOp,
+ POVS_MESSAGE ovsMsg,
+ NETLINK_FAMILY *nlFamilyOps);
+static NTSTATUS
+InvokeNetlinkCmdHandler(PIRP irp,
+ PFILE_OBJECT fileObject,
+ UINT32 devOp,
+ POVS_MESSAGE ovsMsg,
+ NETLINK_FAMILY *nlFamily,
+ PVOID inputBuffer,
+ UINT32 inputLength,
+ PVOID outputBuffer,
+ UINT32 outputLength,
+ UINT32 *replyLen);
+
+
/* Handles to the device object for communication with userspace. */
NDIS_HANDLE gOvsDeviceHandle;
PDEVICE_OBJECT gOvsDeviceObject;
@@ -63,7 +210,11 @@ DRIVER_DISPATCH OvsDeviceControl;
#pragma alloc_text(PAGE, OvsDeviceControl)
#endif // ALLOC_PRAGMA
-#define OVS_MAX_OPEN_INSTANCES 128
+/*
+ * We might hit this limit easily since userspace opens a netlink descriptor for
+ * each thread, and at least one descriptor per vport. Revisit this later.
+ */
+#define OVS_MAX_OPEN_INSTANCES 512
POVS_OPEN_INSTANCE ovsOpenInstanceArray[OVS_MAX_OPEN_INSTANCES];
UINT32 ovsNumberOfOpenInstances;
@@ -218,7 +369,8 @@ OvsFindOpenInstance(PFILE_OBJECT fileObject)
}
NTSTATUS
-OvsAddOpenInstance(PFILE_OBJECT fileObject)
+OvsAddOpenInstance(POVS_DEVICE_EXTENSION ovsExt,
+ PFILE_OBJECT fileObject)
{
POVS_OPEN_INSTANCE instance =
(POVS_OPEN_INSTANCE) OvsAllocateMemory(sizeof (OVS_OPEN_INSTANCE));
@@ -247,6 +399,10 @@ OvsAddOpenInstance(PFILE_OBJECT fileObject)
ASSERT(i < OVS_MAX_OPEN_INSTANCES);
instance->fileObject = fileObject;
ASSERT(fileObject->FsContext == NULL);
+ instance->pid = (UINT32)InterlockedIncrement((LONG volatile *)&ovsExt->pidCount);
+ if (instance->pid == 0) {
+ /* XXX: check for rollover. */
+ }
fileObject->FsContext = instance;
OvsReleaseCtrlLock();
return STATUS_SUCCESS;
@@ -313,7 +469,7 @@ OvsOpenCloseDevice(PDEVICE_OBJECT deviceObject,
switch (irpSp->MajorFunction) {
case IRP_MJ_CREATE:
- status = OvsAddOpenInstance(fileObject);
+ status = OvsAddOpenInstance(ovsExt, fileObject);
if (STATUS_SUCCESS == status) {
InterlockedIncrement((LONG volatile *)&ovsExt->numberOpenInstance);
}
@@ -374,10 +530,14 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject,
NTSTATUS status = STATUS_SUCCESS;
PFILE_OBJECT fileObject;
PVOID inputBuffer;
- PVOID outputBuffer;
+ PVOID outputBuffer = NULL;
UINT32 inputBufferLen, outputBufferLen;
UINT32 code, replyLen = 0;
POVS_OPEN_INSTANCE instance;
+ UINT32 devOp;
+ OVS_MESSAGE ovsMsgReadOp;
+ POVS_MESSAGE ovsMsg;
+ NETLINK_FAMILY *nlFamilyOps;
#ifdef DBG
POVS_DEVICE_EXTENSION ovsExt =
@@ -399,9 +559,293 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject,
code = irpSp->Parameters.DeviceIoControl.IoControlCode;
inputBufferLen = irpSp->Parameters.DeviceIoControl.InputBufferLength;
outputBufferLen = irpSp->Parameters.DeviceIoControl.OutputBufferLength;
- outputBuffer = inputBuffer = irp->AssociatedIrp.SystemBuffer;
+ inputBuffer = irp->AssociatedIrp.SystemBuffer;
+
+ /* Concurrent netlink operations are not supported. */
+ if (InterlockedCompareExchange((LONG volatile *)&instance->inUse, 1, 0)) {
+ status = STATUS_RESOURCE_IN_USE;
+ goto done;
+ }
+
+ /*
+ * Validate the input/output buffer arguments depending on the type of the
+ * operation.
+ */
+ switch (code) {
+ case OVS_IOCTL_TRANSACT:
+ /* Input buffer is mandatory, output buffer is optional. */
+ if (outputBufferLen != 0) {
+ status = MapIrpOutputBuffer(irp, outputBufferLen,
+ sizeof *ovsMsg, &outputBuffer);
+ if (status != STATUS_SUCCESS) {
+ goto done;
+ }
+ ASSERT(outputBuffer);
+ }
+
+ if (inputBufferLen < sizeof (*ovsMsg)) {
+ status = STATUS_NDIS_INVALID_LENGTH;
+ goto done;
+ }
+ ovsMsg = inputBuffer;
+ devOp = OVS_TRANSACTION_DEV_OP;
+ break;
+
+ case OVS_IOCTL_READ:
+ /* Output buffer is mandatory. */
+ if (outputBufferLen != 0) {
+ status = MapIrpOutputBuffer(irp, outputBufferLen,
+ sizeof *ovsMsg, &outputBuffer);
+ if (status != STATUS_SUCCESS) {
+ goto done;
+ }
+ ASSERT(outputBuffer);
+ } else {
+ status = STATUS_NDIS_INVALID_LENGTH;
+ goto done;
+ }
+
+ /*
+ * Operate in the mode that read ioctl is similar to ReadFile(). This
+ * might change as the userspace code gets implemented.
+ */
+ inputBuffer = NULL;
+ inputBufferLen = 0;
+ /* Create an NL message for consumption. */
+ ovsMsg = &ovsMsgReadOp;
+ devOp = OVS_READ_DEV_OP;
+
+ /*
+ * For implementing read (ioctl or otherwise), we need to store some
+ * state in the instance to indicate the previous command. The state can
+ * setup 'ovsMsgReadOp' appropriately.
+ *
+ * XXX: Support for that will be added as the userspace code evolves.
+ */
+ status = STATUS_NOT_IMPLEMENTED;
+ goto done;
+
+ break;
+
+ case OVS_IOCTL_WRITE:
+ /* Input buffer is mandatory. */
+ if (inputBufferLen < sizeof (*ovsMsg)) {
+ status = STATUS_NDIS_INVALID_LENGTH;
+ goto done;
+ }
+
+ ovsMsg = inputBuffer;
+ devOp = OVS_WRITE_DEV_OP;
+ break;
+
+ default:
+ status = STATUS_INVALID_DEVICE_REQUEST;
+ goto done;
+ }
+
+ ASSERT(ovsMsg);
+ switch (ovsMsg->nlMsg.nlmsg_type) {
+ case OVS_WIN_NL_CTRL_FAMILY_ID:
+ nlFamilyOps = &nlControlFamilyOps;
+ break;
+ case OVS_WIN_NL_PACKET_FAMILY_ID:
+ case OVS_WIN_NL_DATAPATH_FAMILY_ID:
+ case OVS_WIN_NL_FLOW_FAMILY_ID:
+ case OVS_WIN_NL_VPORT_FAMILY_ID:
+ status = STATUS_NOT_IMPLEMENTED;
+ goto done;
+
+ default:
+ status = STATUS_INVALID_PARAMETER;
+ goto done;
+ }
+
+ /*
+ * For read operation, the netlink command has already been validated
+ * previously.
+ */
+ if (devOp != OVS_READ_DEV_OP) {
+ status = ValidateNetlinkCmd(devOp, ovsMsg, nlFamilyOps);
+ if (status != STATUS_SUCCESS) {
+ goto done;
+ }
+ }
+
+ status = InvokeNetlinkCmdHandler(irp, fileObject, devOp,
+ ovsMsg, nlFamilyOps,
+ inputBuffer, inputBufferLen,
+ outputBuffer, outputBufferLen,
+ &replyLen);
+
+done:
+ KeMemoryBarrier();
+ instance->inUse = 0;
return OvsCompleteIrpRequest(irp, (ULONG_PTR)replyLen, status);
}
+
+/*
+ * --------------------------------------------------------------------------
+ * Function to validate a netlink command. Only certain combinations of
+ * (device operation, netlink family, command) are valid.
+ * --------------------------------------------------------------------------
+ */
+static NTSTATUS
+ValidateNetlinkCmd(UINT32 devOp,
+ POVS_MESSAGE ovsMsg,
+ NETLINK_FAMILY *nlFamilyOps)
+{
+ NTSTATUS status = STATUS_INVALID_PARAMETER;
+ UINT16 i;
+
+ for (i = 0; i < nlFamilyOps->opsCount; i++) {
+ if (nlFamilyOps->cmds[i].cmd == ovsMsg->genlMsg.cmd) {
+ /* Validate if the command is valid for the device operation. */
+ if ((devOp & nlFamilyOps->cmds[i].supportedDevOp) == 0) {
+ status = STATUS_INVALID_PARAMETER;
+ goto done;
+ }
+
+ /* Validate the version. */
+ if (nlFamilyOps->version > ovsMsg->genlMsg.version) {
+ status = STATUS_INVALID_PARAMETER;
+ goto done;
+ }
+
+ /* Validate the DP for commands where the DP is actually set. */
+ if (ovsMsg->genlMsg.cmd != OVS_CTRL_CMD_WIN_GET_PID) {
+ OvsAcquireCtrlLock();
+ if (ovsMsg->ovsHdr.dp_ifindex == (INT)gOvsSwitchContext->dpNo) {
+ status = STATUS_INVALID_PARAMETER;
+ OvsReleaseCtrlLock();
+ goto done;
+ }
+ OvsReleaseCtrlLock();
+ }
+
+ status = STATUS_SUCCESS;
+ break;
+ }
+ }
+
+done:
+ return status;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * Function to invoke the netlink command handler.
+ * --------------------------------------------------------------------------
+ */
+static NTSTATUS
+InvokeNetlinkCmdHandler(PIRP irp,
+ PFILE_OBJECT fileObject,
+ UINT32 devOp,
+ OVS_MESSAGE *ovsMsg,
+ NETLINK_FAMILY *nlFamilyOps,
+ PVOID inputBuffer,
+ UINT32 inputLength,
+ PVOID outputBuffer,
+ UINT32 outputLength,
+ UINT32 *replyLen)
+{
+ NTSTATUS status = STATUS_INVALID_PARAMETER;
+ UINT16 i;
+
+ UNREFERENCED_PARAMETER(devOp);
+
+ for (i = 0; i < nlFamilyOps->opsCount; i++) {
+ if (nlFamilyOps->cmds[i].cmd == ovsMsg->genlMsg.cmd) {
+ status = nlFamilyOps->cmds[i].handler(irp, fileObject,
+ inputBuffer, inputLength,
+ outputBuffer, outputLength,
+ replyLen);
+ break;
+ }
+ }
+
+ return status;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Each handle on the device is assigned a unique PID when the handle is
+ * created. On platforms that support netlink natively, the PID is available
+ * to userspace when the netlink socket is created. However, without native
+ * netlink support on Windows, OVS datapath generates the PID and lets the
+ * userspace query it.
+ *
+ * This function implements the query.
+ * --------------------------------------------------------------------------
+ */
+static NTSTATUS
+OvsGetPidCmdHandler(PIRP irp,
+ PFILE_OBJECT fileObject,
+ PVOID inputBuffer,
+ UINT32 inputLength,
+ PVOID outputBuffer,
+ UINT32 outputLength,
+ UINT32 *replyLen)
+{
+ UNREFERENCED_PARAMETER(irp);
+ UNREFERENCED_PARAMETER(fileObject);
+ UNREFERENCED_PARAMETER(inputBuffer);
+ UNREFERENCED_PARAMETER(inputLength);
+
+ POVS_MESSAGE msgIn = (POVS_MESSAGE)inputBuffer;
+ POVS_MESSAGE msgOut = (POVS_MESSAGE)outputBuffer;
+
+ if (outputLength >= sizeof *msgOut) {
+ POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
+
+ RtlZeroMemory(msgOut, sizeof *msgOut);
+ msgOut->nlMsg.nlmsg_seq = msgIn->nlMsg.nlmsg_seq;
+ msgOut->nlMsg.nlmsg_pid = instance->pid;
+ *replyLen = sizeof *msgOut;
+ /* XXX: We might need to return the DP index as well. */
+ } else {
+ return STATUS_NDIS_INVALID_LENGTH;
+ }
+
+ return NDIS_STATUS_SUCCESS;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Utility function to map the output buffer in an IRP. The buffer is assumed
+ * to have been passed down using METHOD_OUT_DIRECT (Direct I/O).
+ * --------------------------------------------------------------------------
+ */
+static NTSTATUS
+MapIrpOutputBuffer(PIRP irp,
+ UINT32 bufferLength,
+ UINT32 requiredLength,
+ PVOID *buffer)
+{
+ ASSERT(irp);
+ ASSERT(buffer);
+ ASSERT(bufferLength);
+ ASSERT(requiredLength);
+ if (!buffer || !irp || bufferLength == 0 || requiredLength == 0) {
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ if (bufferLength < requiredLength) {
+ return STATUS_NDIS_INVALID_LENGTH;
+ }
+ if (irp->MdlAddress == NULL) {
+ return STATUS_INVALID_PARAMETER;
+ }
+ *buffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress,
+ NormalPagePriority);
+ if (*buffer == NULL) {
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ return STATUS_SUCCESS;
+}
+
#endif /* OVS_USE_NL_INTERFACE */
diff --git a/datapath-windows/ovsext/Datapath.h b/datapath-windows/ovsext/Datapath.h
index b68010bcf..2bea0fd2c 100644
--- a/datapath-windows/ovsext/Datapath.h
+++ b/datapath-windows/ovsext/Datapath.h
@@ -42,6 +42,21 @@ typedef struct _OVS_OPEN_INSTANCE {
PFILE_OBJECT fileObject;
PVOID eventQueue;
PVOID packetQueue;
+ UINT32 pid;
+
+ /*
+ * On platforms that support netlink natively, there's generally some form of
+ * serialization between concurrent calls to netlink sockets. However, OVS
+ * userspace guarantees that a given netlink handle is not concurrently used.
+ * Despite this, we do want to have some basic checks in the kernel to make
+ * sure that things don't break if there are concurrent calls.
+ *
+ * This is generally not an issue since kernel data structure access should
+ * be sychronized anyway. Only reason to have this safeguared is to protect
+ * the state in "state-aware" read calls which rely on previous state. This
+ * restriction might go away as the userspace code gets implemented.
+ */
+ INT inUse;
} OVS_OPEN_INSTANCE, *POVS_OPEN_INSTANCE;
NDIS_STATUS OvsCreateDeviceObject(NDIS_HANDLE ovsExtDriverHandle);
@@ -52,6 +67,16 @@ POVS_OPEN_INSTANCE OvsGetOpenInstance(PFILE_OBJECT fileObject,
NTSTATUS OvsCompleteIrpRequest(PIRP irp, ULONG_PTR infoPtr, NTSTATUS status);
+/*
+ * Structure of any message passed between userspace and kernel.
+ */
+typedef struct _OVS_MESSAGE {
+ struct nlmsghdr nlMsg;
+ struct genlmsghdr genlMsg;
+ struct ovs_header ovsHdr;
+ /* Variable length nl_attrs follow. */
+} OVS_MESSAGE, *POVS_MESSAGE;
+
#endif /* __OVS_DATAPATH_H_ */
#endif /* OVS_USE_NL_INTERFACE */
diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj
index 5ecc81a24..2b3e6fd6d 100644
--- a/datapath-windows/ovsext/ovsext.vcxproj
+++ b/datapath-windows/ovsext/ovsext.vcxproj
@@ -100,7 +100,7 @@
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
- <PreprocessorDefinitions>%(PreprocessorDefinitions);NDIS_WDM=1;NDIS630=1;OVS_WIN_DP=1;OVS_USE_NL_INTERFACE=0</PreprocessorDefinitions>
+ <PreprocessorDefinitions>%(PreprocessorDefinitions);NDIS_WDM=1;NDIS630=1;OVS_WIN_DP=1;OVS_USE_NL_INTERFACE=1</PreprocessorDefinitions>
</ClCompile>
<Midl>
<PreprocessorDefinitions>%(PreprocessorDefinitions);NDIS_WDM=1;NDIS630=1</PreprocessorDefinitions>