summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNithin Raju <nithin@vmware.com>2015-04-28 14:35:37 -0700
committerBen Pfaff <blp@nicira.com>2015-04-29 07:35:50 -0700
commita51a50862a9b9933067fef7651783b2a611f524a (patch)
treecf74e348e667a367c09e4b8cf263dc54a0090d28
parent9719aee5c7bf4d3b99a0b25eecf86ea0eef652e8 (diff)
downloadopenvswitch-a51a50862a9b9933067fef7651783b2a611f524a.tar.gz
ovs-hyperv: make kernel return values netlink socket like
In this patch, we make changes to usersapce as well as kernel datapath on hyperv to make it more netlink socket like. Previously, the kernel datapath did not distinguish between "transport errors" and other errors. Netlink semantics dictate that netlink functions should only return an error only in the case of a "transport error" which is generally something fatal. Eg. failure to communicate with the OVS module, or an invalid command altogether. Other errors such as an unsupported action, or an invalid flow key is not considered a "transport error", and in such cases, netlink functions are to return success with a 'struct nlmsgerr' populated in the output buffer. This patch implements these semantics. Signed-off-by: Nithin Raju <nithin@vmware.com> Acked-by: Sorin Vinturis <svinturis@cloudbasesolutions.com> Reported-at: https://github.com/openvswitch/ovs-issues/issues/72 Signed-off-by: Ben Pfaff <blp@nicira.com>
-rw-r--r--datapath-windows/ovsext/Datapath.c61
-rw-r--r--datapath-windows/ovsext/Datapath.h2
-rw-r--r--lib/netlink-socket.c102
3 files changed, 114 insertions, 51 deletions
diff --git a/datapath-windows/ovsext/Datapath.c b/datapath-windows/ovsext/Datapath.c
index 1dead33bf..7646f0a91 100644
--- a/datapath-windows/ovsext/Datapath.c
+++ b/datapath-windows/ovsext/Datapath.c
@@ -78,10 +78,11 @@ typedef struct _NETLINK_CMD {
/* A netlink family is a group of commands. */
typedef struct _NETLINK_FAMILY {
CHAR *name;
- UINT32 id;
+ UINT16 id;
UINT8 version;
- UINT8 pad;
+ UINT8 pad1;
UINT16 maxAttr;
+ UINT16 pad2;
NETLINK_CMD *cmds; /* Array of netlink commands and handlers. */
UINT16 opsCount;
} NETLINK_FAMILY, *PNETLINK_FAMILY;
@@ -143,12 +144,12 @@ NETLINK_CMD nlControlFamilyCmdOps[] = {
},
{ .cmd = OVS_CTRL_CMD_EVENT_NOTIFY,
.handler = OvsReadEventCmdHandler,
- .supportedDevOp = OVS_READ_EVENT_DEV_OP,
+ .supportedDevOp = OVS_READ_DEV_OP,
.validateDpIndex = FALSE,
},
{ .cmd = OVS_CTRL_CMD_READ_NOTIFY,
.handler = OvsReadPacketCmdHandler,
- .supportedDevOp = OVS_READ_PACKET_DEV_OP,
+ .supportedDevOp = OVS_READ_DEV_OP,
.validateDpIndex = FALSE,
}
};
@@ -799,12 +800,17 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject,
inputBufferLen = 0;
ovsMsg = &ovsMsgReadOp;
- ovsMsg->nlMsg.nlmsgType = OVS_WIN_NL_CTRL_FAMILY_ID;
+ RtlZeroMemory(ovsMsg, sizeof *ovsMsg);
+ ovsMsg->nlMsg.nlmsgLen = sizeof *ovsMsg;
+ ovsMsg->nlMsg.nlmsgType = nlControlFamilyOps.id;
ovsMsg->nlMsg.nlmsgPid = instance->pid;
+
/* An "artificial" command so we can use NL family function table*/
ovsMsg->genlMsg.cmd = (code == OVS_IOCTL_READ_EVENT) ?
OVS_CTRL_CMD_EVENT_NOTIFY :
OVS_CTRL_CMD_READ_NOTIFY;
+ ovsMsg->genlMsg.version = nlControlFamilyOps.version;
+
devOp = OVS_READ_DEV_OP;
break;
@@ -895,8 +901,8 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject,
}
/*
- * For read operation, the netlink command has already been validated
- * previously.
+ * For read operation, avoid duplicate validation since 'ovsMsg' is either
+ * "artificial" or was copied from a previously validated 'ovsMsg'.
*/
if (devOp != OVS_READ_DEV_OP) {
status = ValidateNetlinkCmd(devOp, instance, ovsMsg, nlFamilyOps);
@@ -982,7 +988,9 @@ done:
/*
* --------------------------------------------------------------------------
- * Function to invoke the netlink command handler.
+ * Function to invoke the netlink command handler. The function also stores
+ * the return value of the handler function to construct a 'NL_ERROR' message,
+ * and in turn returns success to the caller.
* --------------------------------------------------------------------------
*/
static NTSTATUS
@@ -1004,6 +1012,43 @@ InvokeNetlinkCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
}
}
+ /*
+ * Netlink socket semantics dictate that the return value of the netlink
+ * function should be an error ONLY under fatal conditions. If the message
+ * made it all the way to the handler function, it is not a fatal condition.
+ * Absorb the error returned by the handler function into a 'struct
+ * NL_ERROR' and populate the 'output buffer' to return to userspace.
+ *
+ * This behavior is obviously applicable only to netlink commands that
+ * specify an 'output buffer'. For other commands, we return the error as
+ * is.
+ *
+ * 'STATUS_PENDING' is a special return value and userspace is equipped to
+ * handle it.
+ */
+ if (status != STATUS_SUCCESS && status != STATUS_PENDING) {
+ if (usrParamsCtx->devOp != OVS_WRITE_DEV_OP && *replyLen == 0) {
+ NL_ERROR nlError = NlMapStatusToNlErr(status);
+ POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
+ POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
+ usrParamsCtx->outputBuffer;
+
+ ASSERT(msgError);
+ NlBuildErrorMsg(msgIn, msgError, nlError);
+ *replyLen = msgError->nlMsg.nlmsgLen;
+ }
+
+ if (*replyLen != 0) {
+ status = STATUS_SUCCESS;
+ }
+ }
+
+#ifdef DBG
+ if (usrParamsCtx->devOp != OVS_WRITE_DEV_OP) {
+ ASSERT(status == STATUS_PENDING || *replyLen != 0 || status == STATUS_SUCCESS);
+ }
+#endif
+
return status;
}
diff --git a/datapath-windows/ovsext/Datapath.h b/datapath-windows/ovsext/Datapath.h
index 863afc4ca..dbc9dea58 100644
--- a/datapath-windows/ovsext/Datapath.h
+++ b/datapath-windows/ovsext/Datapath.h
@@ -32,8 +32,6 @@
#define OVS_READ_DEV_OP (1 << 0)
#define OVS_WRITE_DEV_OP (1 << 1)
#define OVS_TRANSACTION_DEV_OP (1 << 2)
-#define OVS_READ_EVENT_DEV_OP (1 << 3)
-#define OVS_READ_PACKET_DEV_OP (1 << 4)
typedef struct _OVS_DEVICE_EXTENSION {
INT numberOpenInstance;
diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
index a809b1e0d..42eb232e5 100644
--- a/lib/netlink-socket.c
+++ b/lib/netlink-socket.c
@@ -475,6 +475,8 @@ nl_sock_send__(struct nl_sock *sock, const struct ofpbuf *msg,
retval = -1;
/* XXX: Map to a more appropriate error based on GetLastError(). */
errno = EINVAL;
+ VLOG_DBG_RL(&rl, "fatal driver failure in write: %s",
+ ovs_lasterror_to_string());
} else {
retval = msg->size;
}
@@ -564,7 +566,10 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait)
DWORD bytes;
if (!DeviceIoControl(sock->handle, sock->read_ioctl,
NULL, 0, tail, sizeof tail, &bytes, NULL)) {
+ VLOG_DBG_RL(&rl, "fatal driver failure in transact: %s",
+ ovs_lasterror_to_string());
retval = -1;
+ /* XXX: Map to a more appropriate error. */
errno = EINVAL;
} else {
retval = bytes;
@@ -789,14 +794,27 @@ nl_sock_transact_multiple__(struct nl_sock *sock,
uint8_t reply_buf[65536];
for (i = 0; i < n; i++) {
DWORD reply_len;
+ bool ret;
struct nl_transaction *txn = transactions[i];
struct nlmsghdr *request_nlmsg, *reply_nlmsg;
- if (!DeviceIoControl(sock->handle, OVS_IOCTL_TRANSACT,
- txn->request->data,
- txn->request->size,
- reply_buf, sizeof reply_buf,
- &reply_len, NULL)) {
+ ret = DeviceIoControl(sock->handle, OVS_IOCTL_TRANSACT,
+ txn->request->data,
+ txn->request->size,
+ reply_buf, sizeof reply_buf,
+ &reply_len, NULL);
+
+ if (ret && reply_len == 0) {
+ /*
+ * The current transaction did not produce any data to read and that
+ * is not an error as such. Continue with the remainder of the
+ * transactions.
+ */
+ txn->error = 0;
+ if (txn->reply) {
+ ofpbuf_clear(txn->reply);
+ }
+ } else if (!ret) {
/* XXX: Map to a more appropriate error. */
error = EINVAL;
VLOG_DBG_RL(&rl, "fatal driver failure: %s",
@@ -804,48 +822,50 @@ nl_sock_transact_multiple__(struct nl_sock *sock,
break;
}
- if (reply_len < sizeof *reply_nlmsg) {
- nl_sock_record_errors__(transactions, n, 0);
- VLOG_DBG_RL(&rl, "insufficient length of reply %#"PRIu32
- " for seq: %#"PRIx32, reply_len, request_nlmsg->nlmsg_seq);
- break;
- }
-
- /* Validate the sequence number in the reply. */
- request_nlmsg = nl_msg_nlmsghdr(txn->request);
- reply_nlmsg = (struct nlmsghdr *)reply_buf;
+ if (reply_len != 0) {
+ if (reply_len < sizeof *reply_nlmsg) {
+ nl_sock_record_errors__(transactions, n, 0);
+ VLOG_DBG_RL(&rl, "insufficient length of reply %#"PRIu32
+ " for seq: %#"PRIx32, reply_len, request_nlmsg->nlmsg_seq);
+ break;
+ }
- if (request_nlmsg->nlmsg_seq != reply_nlmsg->nlmsg_seq) {
- ovs_assert(request_nlmsg->nlmsg_seq == reply_nlmsg->nlmsg_seq);
- VLOG_DBG_RL(&rl, "mismatched seq request %#"PRIx32
- ", reply %#"PRIx32, request_nlmsg->nlmsg_seq,
- reply_nlmsg->nlmsg_seq);
- break;
- }
+ /* Validate the sequence number in the reply. */
+ request_nlmsg = nl_msg_nlmsghdr(txn->request);
+ reply_nlmsg = (struct nlmsghdr *)reply_buf;
- /* Handle errors embedded within the netlink message. */
- ofpbuf_use_stub(&tmp_reply, reply_buf, sizeof reply_buf);
- tmp_reply.size = sizeof reply_buf;
- if (nl_msg_nlmsgerr(&tmp_reply, &txn->error)) {
- if (txn->reply) {
- ofpbuf_clear(txn->reply);
- }
- if (txn->error) {
- VLOG_DBG_RL(&rl, "received NAK error=%d (%s)",
- error, ovs_strerror(txn->error));
+ if (request_nlmsg->nlmsg_seq != reply_nlmsg->nlmsg_seq) {
+ ovs_assert(request_nlmsg->nlmsg_seq == reply_nlmsg->nlmsg_seq);
+ VLOG_DBG_RL(&rl, "mismatched seq request %#"PRIx32
+ ", reply %#"PRIx32, request_nlmsg->nlmsg_seq,
+ reply_nlmsg->nlmsg_seq);
+ break;
}
- } else {
- txn->error = 0;
- if (txn->reply) {
- /* Copy the reply to the buffer specified by the caller. */
- if (reply_len > txn->reply->allocated) {
- ofpbuf_reinit(txn->reply, reply_len);
+
+ /* Handle errors embedded within the netlink message. */
+ ofpbuf_use_stub(&tmp_reply, reply_buf, sizeof reply_buf);
+ tmp_reply.size = sizeof reply_buf;
+ if (nl_msg_nlmsgerr(&tmp_reply, &txn->error)) {
+ if (txn->reply) {
+ ofpbuf_clear(txn->reply);
+ }
+ if (txn->error) {
+ VLOG_DBG_RL(&rl, "received NAK error=%d (%s)",
+ error, ovs_strerror(txn->error));
+ }
+ } else {
+ txn->error = 0;
+ if (txn->reply) {
+ /* Copy the reply to the buffer specified by the caller. */
+ if (reply_len > txn->reply->allocated) {
+ ofpbuf_reinit(txn->reply, reply_len);
+ }
+ memcpy(txn->reply->data, reply_buf, reply_len);
+ txn->reply->size = reply_len;
}
- memcpy(txn->reply->data, reply_buf, reply_len);
- txn->reply->size = reply_len;
}
+ ofpbuf_uninit(&tmp_reply);
}
- ofpbuf_uninit(&tmp_reply);
/* Count the number of successful transactions. */
(*done)++;