From c259baae45d76592eb14db5abde20aa72e7f2605 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Tue, 31 Aug 2010 10:13:55 +0900
Subject: Debian: Add Joe Perches to debian/copyright

This syncs debian/copyright with the AUTHORS file.

Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 debian/copyright | 1 +
 1 file changed, 1 insertion(+)

diff --git a/debian/copyright b/debian/copyright
index 56c4f2bbe..f4f4df6d5 100644
--- a/debian/copyright
+++ b/debian/copyright
@@ -15,6 +15,7 @@ Upstream Authors (from AUTHORS):
 	Jean Tourrilhes         jt@hpl.hp.com
 	Jeremy Stribling        strib@nicira.com
 	Jesse Gross             jesse@nicira.com
+	Joe Perches             joe@perches.com
 	Jun Nakajima            jun.nakajima@intel.com
 	Justin Pettit           jpettit@nicira.com
 	Keith Amidon            keith@nicira.com
-- 
cgit v1.2.1


From cd8055cce3e9dcd20c6125f064d73ef04b99aee4 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Tue, 31 Aug 2010 11:47:28 +0900
Subject: datapath: Include net/udp.h in vport-capwap.c

net/udp.h is currently included indirectly via linux/ipv6.h which is
in turn included indirectly via linux/ip.h.  However, this breaks down
if CONFIG_IPV6 is not set, leading to a number of build errors.

Signed-off-by: Simon Horman <horms@verge.net.au>
[Jesse: shortened commit message]
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 datapath/vport-capwap.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/datapath/vport-capwap.c b/datapath/vport-capwap.c
index ce8cc43e2..7ae3790d7 100644
--- a/datapath/vport-capwap.c
+++ b/datapath/vport-capwap.c
@@ -21,6 +21,7 @@
 #include <net/inet_frag.h>
 #include <net/ip.h>
 #include <net/protocol.h>
+#include <net/udp.h>
 
 #include "tunnel.h"
 #include "vport.h"
-- 
cgit v1.2.1


From 476d56f51d4935ff437537db151a49c7db8cd897 Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Tue, 31 Aug 2010 14:16:14 -0700
Subject: Release Open vSwitch 1.1.0-pre1

---
 ChangeLog        | 7 +++++++
 configure.ac     | 2 +-
 debian/changelog | 9 +++++++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index 29fcd2d8c..153d96695 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+v1.1.0-pre1 - 31 Aug 2010
+-------------------------
+    - OpenFlow 1.0 slicing (QoS) functionality
+    - Python bindings for configuration database (no write support)
+    - Performance and scalability improvements
+    - Bug fixes
+
 v1.0.1 - 31 May 2010
 --------------------
     - New "patch" interface type
diff --git a/configure.ac b/configure.ac
index 3088fa5d8..7d7b4fe6f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 AC_PREREQ(2.64)
-AC_INIT(openvswitch, 1.0.1, ovs-bugs@openvswitch.org)
+AC_INIT(openvswitch, 1.1.0-pre1, ovs-bugs@openvswitch.org)
 NX_BUILDNR
 AC_CONFIG_SRCDIR([datapath/datapath.c])
 AC_CONFIG_MACRO_DIR([m4])
diff --git a/debian/changelog b/debian/changelog
index 216e089e1..972226d58 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,12 @@
+openvswitch (1.1.0-pre1) unstable; urgency=low
+
+  * OpenFlow 1.0 slicing (QoS) functionality
+  * Python bindings for configuration database (no write support)
+  * Performance and scalability improvements
+  * Bug fixes
+
+ -- Open vSwitch team <dev@openvswitch.org>  Tue, 31 Aug 2010 23:20:00 +0000
+
 openvswitch (1.0.1) unstable; urgency=low
 
   * New upstream version.
-- 
cgit v1.2.1


From fdf2037829befd7e7fc337a477950fdc7f1cb09b Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Wed, 1 Sep 2010 02:26:06 -0700
Subject: XenServer builds barf on hyphens in versions.

---
 ChangeLog        | 4 ++--
 configure.ac     | 2 +-
 debian/changelog | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 153d96695..6e7217a5e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,5 @@
-v1.1.0-pre1 - 31 Aug 2010
--------------------------
+v1.1.0pre1 - 31 Aug 2010
+------------------------
     - OpenFlow 1.0 slicing (QoS) functionality
     - Python bindings for configuration database (no write support)
     - Performance and scalability improvements
diff --git a/configure.ac b/configure.ac
index 7d7b4fe6f..21f34739c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 AC_PREREQ(2.64)
-AC_INIT(openvswitch, 1.1.0-pre1, ovs-bugs@openvswitch.org)
+AC_INIT(openvswitch, 1.1.0pre1, ovs-bugs@openvswitch.org)
 NX_BUILDNR
 AC_CONFIG_SRCDIR([datapath/datapath.c])
 AC_CONFIG_MACRO_DIR([m4])
diff --git a/debian/changelog b/debian/changelog
index 972226d58..89eb5176d 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,4 +1,4 @@
-openvswitch (1.1.0-pre1) unstable; urgency=low
+openvswitch (1.1.0pre1) unstable; urgency=low
 
   * OpenFlow 1.0 slicing (QoS) functionality
   * Python bindings for configuration database (no write support)
-- 
cgit v1.2.1


From f79cb67e689566cfbf3071e0ac0e29923ada5a97 Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Tue, 31 Aug 2010 18:41:32 -0700
Subject: netflow: Send multiple records for byte counts > UINT32_MAX

When a NetFlow record is to be sent for a flow that had more than 2^32
bytes, we used to set the byte count to UINT32_MAX.  With this change,
we will send out multiple records to account for all the traffic.
---
 ofproto/netflow.c | 69 ++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 46 insertions(+), 23 deletions(-)

diff --git a/ofproto/netflow.c b/ofproto/netflow.c
index 50ab80a69..015208ac9 100644
--- a/ofproto/netflow.c
+++ b/ofproto/netflow.c
@@ -103,26 +103,19 @@ struct netflow {
     long long int reconfig_time;  /* When we reconfigured the timeouts. */
 };
 
-void
-netflow_expire(struct netflow *nf, struct netflow_flow *nf_flow,
-               struct ofexpired *expired)
+static void
+gen_netflow_rec(struct netflow *nf, struct netflow_flow *nf_flow,
+                struct ofexpired *expired, 
+                uint32_t packet_count, uint32_t byte_count)
 {
     struct netflow_v5_header *nf_hdr;
     struct netflow_v5_record *nf_rec;
-    struct timespec now;
-
-    nf_flow->last_expired += nf->active_timeout;
 
-    /* NetFlow only reports on IP packets and we should only report flows
-     * that actually have traffic. */
-    if (expired->flow.dl_type != htons(ETH_TYPE_IP) ||
-        expired->packet_count - nf_flow->packet_count_off == 0) {
-        return;
-    }
+    if (!nf->packet.size) {
+        struct timespec now;
 
-    time_wall_timespec(&now);
+        time_wall_timespec(&now);
 
-    if (!nf->packet.size) {
         nf_hdr = ofpbuf_put_zeros(&nf->packet, sizeof *nf_hdr);
         nf_hdr->version = htons(NETFLOW_V5_VERSION);
         nf_hdr->count = htons(0);
@@ -150,10 +143,8 @@ netflow_expire(struct netflow *nf, struct netflow_flow *nf_flow,
         nf_rec->input = htons(expired->flow.in_port);
         nf_rec->output = htons(nf_flow->output_iface);
     }
-    nf_rec->packet_count = htonl(MIN(expired->packet_count -
-                                     nf_flow->packet_count_off, UINT32_MAX));
-    nf_rec->byte_count = htonl(MIN(expired->byte_count -
-                                   nf_flow->byte_count_off, UINT32_MAX));
+    nf_rec->packet_count = htonl(packet_count);
+    nf_rec->byte_count = htonl(byte_count);
     nf_rec->init_time = htonl(nf_flow->created - nf->boot_time);
     nf_rec->used_time = htonl(MAX(nf_flow->created, expired->used)
                              - nf->boot_time);
@@ -172,16 +163,48 @@ netflow_expire(struct netflow *nf, struct netflow_flow *nf_flow,
     nf_rec->ip_proto = expired->flow.nw_proto;
     nf_rec->ip_tos = expired->flow.nw_tos;
 
+    /* NetFlow messages are limited to 30 records. */
+    if (ntohs(nf_hdr->count) >= 30) {
+        netflow_run(nf);
+    }
+}
+
+void
+netflow_expire(struct netflow *nf, struct netflow_flow *nf_flow,
+               struct ofexpired *expired)
+{
+    uint64_t pkt_delta = expired->packet_count - nf_flow->packet_count_off;
+    uint64_t byte_delta = expired->byte_count - nf_flow->byte_count_off;
+
+    nf_flow->last_expired += nf->active_timeout;
+
+    /* NetFlow only reports on IP packets and we should only report flows
+     * that actually have traffic. */
+    if (expired->flow.dl_type != htons(ETH_TYPE_IP) || pkt_delta == 0) {
+        return;
+    }
+
+    /* NetFlow v5 records are limited to 32-bit counters.  If we've
+     * wrapped a counter, send as multiple records so we don't lose
+     * track of any traffic.  We try to evenly distribute the packet and
+     * byte counters, so that the bytes-per-packet lengths don't look
+     * wonky across the records. */
+    while (byte_delta) {
+        int n_recs = (byte_delta + UINT32_MAX - 1) / UINT32_MAX;
+        uint32_t pkt_count = pkt_delta / n_recs;
+        uint32_t byte_count = byte_delta / n_recs;
+        
+        gen_netflow_rec(nf, nf_flow, expired, pkt_count, byte_count);
+
+        pkt_delta -= pkt_count;
+        byte_delta -= byte_count;
+    }
+
     /* Update flow tracking data. */
     nf_flow->created = 0;
     nf_flow->packet_count_off = expired->packet_count;
     nf_flow->byte_count_off = expired->byte_count;
     nf_flow->tcp_flags = 0;
-
-    /* NetFlow messages are limited to 30 records. */
-    if (ntohs(nf_hdr->count) >= 30) {
-        netflow_run(nf);
-    }
 }
 
 void
-- 
cgit v1.2.1


From 68efcbec41b0acfd8bb7579a5d38afd71b6daf7c Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 1 Sep 2010 12:55:38 -0700
Subject: ofpbuf: Add ofpbuf_new_with_headroom(), ofpbuf_clone_with_headroom().

These new functions simplify an increasingly common usage pattern.

Suggested-by: Jesse Gross <jesse@nicira.com>
---
 lib/dpif-linux.c  |  3 +--
 lib/dpif-netdev.c |  3 +--
 lib/ofpbuf.c      | 20 ++++++++++++++++++++
 lib/ofpbuf.h      |  3 +++
 ofproto/pktbuf.c  |  4 ++--
 5 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index 52d73c6bb..2c688e3af 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -478,8 +478,7 @@ dpif_linux_recv(struct dpif *dpif_, struct ofpbuf **bufp)
     int retval;
     int error;
 
-    buf = ofpbuf_new(65536 + DPIF_RECV_MSG_PADDING);
-    ofpbuf_reserve(buf, DPIF_RECV_MSG_PADDING);
+    buf = ofpbuf_new_with_headroom(65536, DPIF_RECV_MSG_PADDING);
     retval = read(dpif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf));
     if (retval < 0) {
         error = errno;
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 08a721340..323f36411 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1262,8 +1262,7 @@ dp_netdev_output_control(struct dp_netdev *dp, const struct ofpbuf *packet,
     }
 
     msg_size = sizeof *header + packet->size;
-    msg = ofpbuf_new(msg_size + DPIF_RECV_MSG_PADDING);
-    ofpbuf_reserve(msg, DPIF_RECV_MSG_PADDING);
+    msg = ofpbuf_new_with_headroom(msg_size, DPIF_RECV_MSG_PADDING);
     header = ofpbuf_put_uninit(msg, sizeof *header);
     header->type = queue_no;
     header->length = msg_size;
diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c
index 5693eefda..bf5567251 100644
--- a/lib/ofpbuf.c
+++ b/lib/ofpbuf.c
@@ -75,12 +75,32 @@ ofpbuf_new(size_t size)
     return b;
 }
 
+/* Creates and returns a new ofpbuf with an initial capacity of 'size +
+ * headroom' bytes, reserving the first 'headroom' bytes as headroom. */
+struct ofpbuf *
+ofpbuf_new_with_headroom(size_t size, size_t headroom)
+{
+    struct ofpbuf *b = ofpbuf_new(size + headroom);
+    ofpbuf_reserve(b, headroom);
+    return b;
+}
+
 struct ofpbuf *
 ofpbuf_clone(const struct ofpbuf *buffer)
 {
     return ofpbuf_clone_data(buffer->data, buffer->size);
 }
 
+/* Creates and returns a new ofpbuf whose data are copied from 'buffer'.   The
+ * returned ofpbuf will additionally have 'headroom' bytes of headroom. */
+struct ofpbuf *
+ofpbuf_clone_with_headroom(const struct ofpbuf *buffer, size_t headroom)
+{
+    struct ofpbuf *b = ofpbuf_new_with_headroom(buffer->size, headroom);
+    ofpbuf_put(b, buffer->data, buffer->size);
+    return b;
+}
+
 struct ofpbuf *
 ofpbuf_clone_data(const void *data, size_t size)
 {
diff --git a/lib/ofpbuf.h b/lib/ofpbuf.h
index 736b8f5e5..5e20aab0b 100644
--- a/lib/ofpbuf.h
+++ b/lib/ofpbuf.h
@@ -48,7 +48,10 @@ void ofpbuf_uninit(struct ofpbuf *);
 void ofpbuf_reinit(struct ofpbuf *, size_t);
 
 struct ofpbuf *ofpbuf_new(size_t);
+struct ofpbuf *ofpbuf_new_with_headroom(size_t, size_t headroom);
 struct ofpbuf *ofpbuf_clone(const struct ofpbuf *);
+struct ofpbuf *ofpbuf_clone_with_headroom(const struct ofpbuf *,
+                                          size_t headroom);
 struct ofpbuf *ofpbuf_clone_data(const void *, size_t);
 void ofpbuf_delete(struct ofpbuf *);
 
diff --git a/ofproto/pktbuf.c b/ofproto/pktbuf.c
index 67adb5606..aa9029542 100644
--- a/ofproto/pktbuf.c
+++ b/ofproto/pktbuf.c
@@ -112,8 +112,8 @@ pktbuf_save(struct pktbuf *pb, struct ofpbuf *buffer, uint16_t in_port)
     if (++p->cookie >= COOKIE_MAX) {
         p->cookie = 0;
     }
-    p->buffer = ofpbuf_new(sizeof(struct ofp_packet_in) + buffer->size);
-    ofpbuf_reserve(p->buffer, sizeof(struct ofp_packet_in));
+    p->buffer = ofpbuf_new_with_headroom(buffer->size,
+                                         sizeof(struct ofp_packet_in));
     ofpbuf_put(p->buffer, buffer->data, buffer->size);
     p->timeout = time_msec() + OVERWRITE_MSECS;
     p->in_port = in_port;
-- 
cgit v1.2.1


From c9b5816cfd8fccc2abedd258d914882033bfcf0a Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 1 Sep 2010 10:17:40 -0700
Subject: ofproto: Get enough headroom in fail-open when connected to
 controller.

Since commit 750638bb "ofproto: Avoid ofpbuf_clone() for OFPAT_CONTROLLER
common case," send_packet_in() needs at least DPIF_RECV_MSG_PADDING bytes
of headroom, which ofpbuf_clone() doesn't supply.  This commit should fix
that.

This fixes an assertion failure in ofpbuf_prealloc_headroom() via
send_packet_in().

Reported-by: Justin Pettit <jpettit@nicira.com>
---
 ofproto/ofproto.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index adc52827c..844083d8b 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -4154,7 +4154,8 @@ handle_odp_miss_msg(struct ofproto *p, struct ofpbuf *packet)
          *
          * See the top-level comment in fail-open.c for more information.
          */
-        send_packet_in(p, ofpbuf_clone(packet));
+        send_packet_in(p, ofpbuf_clone_with_headroom(packet,
+                                                     DPIF_RECV_MSG_PADDING));
     }
 
     ofpbuf_pull(packet, sizeof *msg);
-- 
cgit v1.2.1


From e61070c32030d6d00e2eeae213d219320a7cbd10 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 1 Sep 2010 16:49:19 -0700
Subject: Rename "xs-vif-mac" external_ids key to "attached-mac", for
 generality.

This can be useful on systems other than XenServer so there is no reason
to make it looks XenServer-specific.

CC: Jeremy Stribling <strib@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
---
 vswitchd/vswitch.xml                | 15 ++++++++-------
 xenserver/etc_xensource_scripts_vif |  2 +-
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 38dc6a1aa..979fd5dfe 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -665,8 +665,15 @@
         integrators should either use the Open vSwitch development
         mailing list to coordinate on common key-value definitions, or
         choose key names that are likely to be unique.  The currently
-        defined common key-value pair is:
+        defined common key-value pairs are:
         <dl>
+          <dt><code>attached-mac</code></dt>
+          <dd>
+            The MAC address programmed into the ``virtual hardware'' for this
+            interface, in the form
+            <var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>.
+            For Citrix XenServer, this is the value of the <code>MAC</code>
+            field in the VIF record for this interface.</dd>
           <dt><code>iface-id</code></dt>
           <dd>A system-unique identifier for the interface.  On XenServer, 
             this will commonly be the same as <code>xs-vif-uuid</code>.</dd>
@@ -689,12 +696,6 @@
           <dd>The virtual network to which this interface is attached.</dd>
           <dt><code>xs-vm-uuid</code></dt>
           <dd>The VM to which this interface belongs.</dd>
-          <dt><code>xs-vif-mac</code></dt>
-          <dd>The MAC address programmed into the "virtual hardware" for this
-              interface, in the
-              form <var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>.
-              For Citrix XenServer, this is the value of the <code>MAC</code>
-              field in the VIF record for this interface.</dd>
         </dl>
       </column>
 
diff --git a/xenserver/etc_xensource_scripts_vif b/xenserver/etc_xensource_scripts_vif
index 91b75c123..f27ff5b40 100755
--- a/xenserver/etc_xensource_scripts_vif
+++ b/xenserver/etc_xensource_scripts_vif
@@ -114,7 +114,7 @@ handle_vswitch_vif_details()
 
     local address=$(xenstore-read "/local/domain/$DOMID/device/vif/$DEVID/mac" 2>/dev/null)
     if [ -n "${address}" ] ; then
-        set_vif_external_id "xs-vif-mac" "${address}"
+        set_vif_external_id "attached-mac" "${address}"
     fi
 
     if $xs550; then
-- 
cgit v1.2.1


From 82e959195414246dece1e4c636b40d06d65211a7 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 1 Sep 2010 10:50:49 -0700
Subject: debian: Generate authorship in debian/copyright from AUTHORS.

---
 debian/.gitignore   |  1 +
 debian/automake.mk  | 10 ++++++++
 debian/copyright    | 68 -----------------------------------------------------
 debian/copyright.in | 41 ++++++++++++++++++++++++++++++++
 4 files changed, 52 insertions(+), 68 deletions(-)
 delete mode 100644 debian/copyright
 create mode 100644 debian/copyright.in

diff --git a/debian/.gitignore b/debian/.gitignore
index 3beef4405..7f43aa6ed 100644
--- a/debian/.gitignore
+++ b/debian/.gitignore
@@ -3,6 +3,7 @@
 *.substvars
 /control
 /corekeeper
+/copyright
 /files
 /nicira-switch
 /openvswitch
diff --git a/debian/automake.mk b/debian/automake.mk
index 5a23d4632..ba9ea861d 100644
--- a/debian/automake.mk
+++ b/debian/automake.mk
@@ -4,6 +4,7 @@ EXTRA_DIST += \
 	debian/control \
 	debian/control.modules.in \
 	debian/copyright \
+	debian/copyright.in \
 	debian/corekeeper.cron.daily \
 	debian/corekeeper.init \
 	debian/corekeeper.override \
@@ -50,3 +51,12 @@ check-debian-changelog-version:
 	fi
 ALL_LOCAL += check-debian-changelog-version
 DIST_HOOKS += check-debian-changelog-version
+
+$(srcdir)/debian/copyright: AUTHORS debian/copyright.in
+	{ sed -n -e '/%AUTHORS%/q' -e p < $(srcdir)/debian/copyright.in;   \
+	  sed '1,/^$$/d' $(srcdir)/AUTHORS |				   \
+		sed -n -e '/^$$/q' -e 's/^/  /p';			   \
+	  sed -e '1,/%AUTHORS%/d' $(srcdir)/debian/copyright.in;	   \
+	} > $@
+
+DISTCLEANFILES += debian/copyright
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index f4f4df6d5..000000000
--- a/debian/copyright
+++ /dev/null
@@ -1,68 +0,0 @@
-The original sources for this package can be found at:
-
-	http://openvswitch.org/
-
-
-Upstream Authors (from AUTHORS):
-
-	Andy Southgate          andy.southgate@citrix.com
-	Ben Pfaff               blp@nicira.com
-	Bryan Phillippe         bp@toroki.com
-	Dan Wendlandt           dan@nicira.com
-	David Erickson          derickso@stanford.edu
-	Glen Gibb               grg@stanford.edu
-	Ian Campbell            Ian.Campbell@citrix.com
-	Jean Tourrilhes         jt@hpl.hp.com
-	Jeremy Stribling        strib@nicira.com
-	Jesse Gross             jesse@nicira.com
-	Joe Perches             joe@perches.com
-	Jun Nakajima            jun.nakajima@intel.com
-	Justin Pettit           jpettit@nicira.com
-	Keith Amidon            keith@nicira.com
-	Martin Casado           casado@nicira.com
-	Natasha Gude            natasha@nicira.com
-	Neil McKee              neil.mckee@inmon.com
-	Paul Fazzone            pfazzone@nicira.com
-	Reid Price              reid@nicira.com
-	Simon Horman            horms@verge.net.au
-	Tetsuo NAKAGAWA         nakagawa@mxc.nes.nec.co.jp
-	Thomas Lacroix          thomas.lacroix@citrix.com
-	Todd Deshane            deshantm@gmail.com
-	Tom Everman             teverman@google.com
-	Tsvi Slonim             tsvi@toroki.com
-	Wei Yongjun             yjwei@cn.fujitsu.com
-	Yu Zhiguo               yuzg@cn.fujitsu.com
-
-Upstream Copyright Holders:
-
-	Copyright (c) 2007, 2008, 2009, 2010 Nicira Networks.
-	Copyright (c) 2010 Jean Tourrilhes - HP-Labs.
-	Copyright (c) 2008,2009,2010 Citrix Systems, Inc.
-	and authors listed above.
-
-License:
-
-* The following components are licensed under the GNU General Public Licence
-  version 2.
-
-	datapath/
-
-  On Debian systems, the complete text of the GNU General Public Licence
-  version 2 can be found in `/usr/share/common-licenses/GPL-2'
-
-* The following components are dual-licensed under the
-  GNU General Public Licence version 3 and the Apache Licence Version 2.0.
-
-	include/openvswitch/
-
-  On Debian systems, the complete text of the GNU General Public Licence
-  version 2 can be found in `/usr/share/common-licenses/GPL-2'.
-  On Debian systems, the complete text of the Apache License version 2.0
-  can be found in '/usr/share/common-licenses/Apache-2.0'.
-
-* All other components of this package are licensed under
-  The Apache License Version 2.0.
-
-  On Debian systems, the complete text of the Apache License version 2.0
-  can be found in '/usr/share/common-licenses/Apache-2.0'.
-
diff --git a/debian/copyright.in b/debian/copyright.in
new file mode 100644
index 000000000..ae1a78868
--- /dev/null
+++ b/debian/copyright.in
@@ -0,0 +1,41 @@
+The original sources for this package can be found at:
+
+	http://openvswitch.org/
+
+Upstream Authors (from AUTHORS):
+
+%AUTHORS%
+
+Upstream Copyright Holders:
+
+	Copyright (c) 2007, 2008, 2009, 2010 Nicira Networks.
+	Copyright (c) 2010 Jean Tourrilhes - HP-Labs.
+	Copyright (c) 2008,2009,2010 Citrix Systems, Inc.
+	and authors listed above.
+
+License:
+
+* The following components are licensed under the GNU General Public Licence
+  version 2.
+
+	datapath/
+
+  On Debian systems, the complete text of the GNU General Public Licence
+  version 2 can be found in `/usr/share/common-licenses/GPL-2'
+
+* The following components are dual-licensed under the
+  GNU General Public Licence version 3 and the Apache Licence Version 2.0.
+
+	include/openvswitch/
+
+  On Debian systems, the complete text of the GNU General Public Licence
+  version 2 can be found in `/usr/share/common-licenses/GPL-2'.
+  On Debian systems, the complete text of the Apache License version 2.0
+  can be found in '/usr/share/common-licenses/Apache-2.0'.
+
+* All other components of this package are licensed under
+  The Apache License Version 2.0.
+
+  On Debian systems, the complete text of the Apache License version 2.0
+  can be found in '/usr/share/common-licenses/Apache-2.0'.
+
-- 
cgit v1.2.1


From e215ebca67720188724344eb198c5045c02e375f Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Wed, 8 Sep 2010 10:04:47 -0700
Subject: datapath: Check for backported __wsum and __sum16.

Reported-by: Alexey I. Froloff <raorn@altlinux.org>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
---
 acinclude.m4                                         | 2 ++
 datapath/linux-2.6/compat-2.6/include/linux/kernel.h | 1 +
 datapath/linux-2.6/compat-2.6/include/linux/types.h  | 7 ++-----
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/acinclude.m4 b/acinclude.m4
index 80794dac3..f1322fa0e 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -188,6 +188,8 @@ AC_DEFUN([OVS_CHECK_LINUX26_COMPAT], [
 
   OVS_GREP_IFELSE([$KSRC26/include/linux/types.h], [bool],
                   [OVS_DEFINE([HAVE_BOOL_TYPE])])
+  OVS_GREP_IFELSE([$KSRC26/include/linux/types.h], [__wsum],
+                  [OVS_DEFINE([HAVE_CSUM_TYPES])])
 
   OVS_GREP_IFELSE([$KSRC26/include/net/checksum.h], [csum_unfold],
                   [OVS_DEFINE([HAVE_CSUM_UNFOLD])])
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/kernel.h b/datapath/linux-2.6/compat-2.6/include/linux/kernel.h
index 1f65c099a..13361f78d 100644
--- a/datapath/linux-2.6/compat-2.6/include/linux/kernel.h
+++ b/datapath/linux-2.6/compat-2.6/include/linux/kernel.h
@@ -6,6 +6,7 @@
 #include <linux/log2.h>
 #endif
 
+#include <linux/version.h>
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
 #undef pr_emerg
 #define pr_emerg(fmt, ...) \
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/types.h b/datapath/linux-2.6/compat-2.6/include/linux/types.h
index d88baf71c..b989d96c3 100644
--- a/datapath/linux-2.6/compat-2.6/include/linux/types.h
+++ b/datapath/linux-2.6/compat-2.6/include/linux/types.h
@@ -3,13 +3,10 @@
 
 #include_next <linux/types.h>
 
-#include <linux/version.h>
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
-
+#ifndef HAVE_CSUM_TYPES
 typedef __u16 __bitwise __sum16;
 typedef __u32 __bitwise __wsum;
-
-#endif /* linux kernel < 2.6.20 */
+#endif
 
 #ifndef HAVE_BOOL_TYPE
 typedef _Bool bool;
-- 
cgit v1.2.1


From 933df876ffa272d9d5768edf7fc5465261888ad2 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Fri, 10 Sep 2010 09:17:29 -0700
Subject: ovs-ofctl: Add support for drop_spoofed_arp action.

Requested-by: Michael Mao <mmao@nicira.com>
---
 include/openflow/nicira-ext.h | 2 +-
 lib/ofp-parse.c               | 5 +++++
 utilities/ovs-ofctl.8.in      | 9 +++++++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h
index 885e01da6..c97478faf 100644
--- a/include/openflow/nicira-ext.h
+++ b/include/openflow/nicira-ext.h
@@ -141,7 +141,7 @@ enum nx_action_subtype {
      *
      * This is useful because OpenFlow does not provide a way to match on the
      * Ethernet addresses inside ARP packets, so there is no other way to drop
-     * spoofed ARPs other than sending every packet up to the controller. */
+     * spoofed ARPs other than sending every ARP packet to a controller. */
     NXAST_DROP_SPOOFED_ARP
 };
 
diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c
index cc1419a0e..06d5bd11d 100644
--- a/lib/ofp-parse.c
+++ b/lib/ofp-parse.c
@@ -263,6 +263,11 @@ str_to_action(char *str, struct ofpbuf *b)
             nast->vendor = htonl(NX_VENDOR_ID);
             nast->subtype = htons(NXAST_SET_TUNNEL);
             nast->tun_id = htonl(str_to_u32(arg));
+        } else if (!strcasecmp(act, "drop_spoofed_arp")) {
+            struct nx_action_header *nah;
+            nah = put_action(b, sizeof *nah, OFPAT_VENDOR);
+            nah->vendor = htonl(NX_VENDOR_ID);
+            nah->subtype = htons(NXAST_DROP_SPOOFED_ARP);
         } else if (!strcasecmp(act, "output")) {
             put_output_action(b, str_to_u32(arg));
         } else if (!strcasecmp(act, "enqueue")) {
diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in
index f51f87a14..7de788e1c 100644
--- a/utilities/ovs-ofctl.8.in
+++ b/utilities/ovs-ofctl.8.in
@@ -451,6 +451,15 @@ addition to any other actions in this flow entry.  Recursive
 If outputting to a port that encapsulates the packet in a tunnel and supports
 an identifier (such as GRE), sets the identifier to \fBid\fR.
 .
+.IP \fBdrop_spoofed_arp\fR
+Stops processing further actions, if the packet being processed is an
+Ethernet+IPv4 ARP packet for which the source Ethernet address inside
+the ARP packet differs from the source Ethernet address in the
+Ethernet header.
+.
+This is useful because OpenFlow does not provide a way to match on the
+Ethernet addresses inside ARP packets, so there is no other way to
+drop spoofed ARPs other than sending every ARP packet to a controller.
 .RE
 .
 .IP
-- 
cgit v1.2.1


From 6784cb57cf432197c497764cdb01cecd68f10362 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Fri, 10 Sep 2010 09:57:01 -0700
Subject: vswitchd: Add some references for the HTB classifier to the
 documentation.

---
 vswitchd/vswitch.xml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 979fd5dfe..b9d8aaa9f 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -775,7 +775,12 @@
         defined types are listed below:</p>
       <dl>
         <dt><code>linux-htb</code></dt>
-        <dd>Linux ``hierarchy token bucket'' classifier.</dd>
+        <dd>
+          Linux ``hierarchy token bucket'' classifier.  See tc-htb(8) (also at
+          <code>http://linux.die.net/man/8/tc-htb</code>) and the HTB manual
+          (<code>http://luxik.cdi.cz/~devik/qos/htb/manual/userg.htm</code>)
+          for information on how this classifier works and how to configure it.
+        </dd>
       </dl>
     </column>
 
-- 
cgit v1.2.1


From 29e21ea26944731f9b9681fecedc81cd5c86f342 Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Fri, 10 Sep 2010 14:20:49 -0700
Subject: xenserver: Don't monitor external-ids until XAPI is up

monitor-external-ids can't complete all its tasks until XAPI is up.  The
daemon is usually started before XAPI, so it can miss events.  This
commit causes the daemon to block until XAPI is finished initializing.
---
 xenserver/usr_share_openvswitch_scripts_monitor-external-ids | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids
index c87171f06..f91801d22 100755
--- a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids
+++ b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids
@@ -21,9 +21,11 @@
 # Bridge table and duplicates its value to the preferred "xs-network-uuids".
 
 import getopt
+import os
 import subprocess
 import sys
 import syslog
+import time
 
 import XenAPI
 
@@ -189,6 +191,11 @@ def main(argv):
     idl = ovs.db.idl.Idl(remote, "Open_vSwitch", monitor_uuid_schema_cb)
 
     ovs.daemon.daemonize()
+
+    # This daemon is usually started before XAPI, but to complete our
+    # tasks, we need it.  Wait here until it's up.
+    while not os.path.exists("/var/run/xapi_init_complete.cookie"):
+        time.sleep(1)
  
     bridges = {}
     interfaces = {}
-- 
cgit v1.2.1


From b14c5fe9bd85b440d1727934c84d5f7fded051f7 Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Fri, 10 Sep 2010 14:32:41 -0700
Subject: xenserver: Don't delete pidfile when stopping monitor-external-ids

It's not necessary to explicitly delete the pidfile when stopping
monitor-external-ids through the init script, since the daemon will take
care of that.
---
 xenserver/etc_init.d_openvswitch | 1 -
 1 file changed, 1 deletion(-)

diff --git a/xenserver/etc_init.d_openvswitch b/xenserver/etc_init.d_openvswitch
index 7b86d4c66..050844570 100755
--- a/xenserver/etc_init.d_openvswitch
+++ b/xenserver/etc_init.d_openvswitch
@@ -358,7 +358,6 @@ function stop {
     stop_daemon OVSDB_SERVER "$ovsdb_server"
     if [ -e /var/run/openvswitch/monitor-external-ids.pid ]; then
         kill `cat /var/run/openvswitch/monitor-external-ids.pid`
-        rm /var/run/openvswitch/monitor-external-ids.pid
     fi
     rm -f /var/lock/subsys/openvswitch
 }
-- 
cgit v1.2.1


From d59051362fa8ac4369f1be69ac942a52c9a424b9 Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Mon, 13 Sep 2010 21:55:56 -0700
Subject: Release Open vSwitch 1.1.0pre2

---
 ChangeLog        | 4 ++++
 configure.ac     | 2 +-
 debian/changelog | 6 ++++++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index 6e7217a5e..c816ed7c9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+v1.1.0pre2 - 13 Sep 2010
+------------------------
+    - Bug fixes
+
 v1.1.0pre1 - 31 Aug 2010
 ------------------------
     - OpenFlow 1.0 slicing (QoS) functionality
diff --git a/configure.ac b/configure.ac
index 21f34739c..08a6f0fdf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 AC_PREREQ(2.64)
-AC_INIT(openvswitch, 1.1.0pre1, ovs-bugs@openvswitch.org)
+AC_INIT(openvswitch, 1.1.0pre2, ovs-bugs@openvswitch.org)
 NX_BUILDNR
 AC_CONFIG_SRCDIR([datapath/datapath.c])
 AC_CONFIG_MACRO_DIR([m4])
diff --git a/debian/changelog b/debian/changelog
index 89eb5176d..6a8ab5879 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+openvswitch (1.1.0pre2) unstable; urgency=low
+
+  * Bug fixes
+
+ -- Open vSwitch team <dev@openvswitch.org>  Mon, 13 Sep 2010 21:50:00 +0000
+
 openvswitch (1.1.0pre1) unstable; urgency=low
 
   * OpenFlow 1.0 slicing (QoS) functionality
-- 
cgit v1.2.1


From a44be3f0cf616166f550ba65769766a577b4eaf5 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Mon, 13 Sep 2010 09:16:29 -0700
Subject: REPORTING-BUGS: Rewrite based on experience.

Burying the description of the problem, which is usually the most important
part in my experience, at the bottom of the REPORTING-BUGS file might be
the reason why we don't get good descriptions sometimes.  It is certainly
not the reason in other cases, but we might as well prioritize a bit
better.
---
 REPORTING-BUGS | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/REPORTING-BUGS b/REPORTING-BUGS
index 75da3d6eb..812bfba0f 100644
--- a/REPORTING-BUGS
+++ b/REPORTING-BUGS
@@ -5,8 +5,17 @@ We are eager to hear from users about problems that they have
 encountered with Open vSwitch.  This file documents how best to report
 bugs so as to ensure that they can be fixed as quickly as possible.
 
-Please report bugs by sending email to bugs@openvswitch.org.  Include
-as much of the following information as you can in your report:
+Please report bugs by sending email to bugs@openvswitch.org.  
+
+The most important parts of your bug report are the following:
+
+	* What you did that make the problem appear.
+
+	* What you expected to happen.
+
+	* What actually happened.
+
+Please also include the following information:
 
         * The Open vSwitch version number (as output by "ovs-vswitchd
           --version").
@@ -16,6 +25,8 @@ as much of the following information as you can in your report:
 
         * Any local patches or changes you have applied (if any).
 
+The following are also handy sometimes:
+
         * The kernel version on which Open vSwitch is running (from
           /proc/version) and the distribution and version number of
           your OS (e.g. "Centos 5.0").
@@ -28,15 +39,7 @@ as much of the following information as you can in your report:
         * If you have Open vSwitch configured to connect to an
           OpenFlow controller, the output of "ovs-ofctl show <bridge>"
           for each <bridge> configured in the vswitchd configuration
-          file.
-
-        * A description of the problem, which should include:
-
-                - What you did that make the problem appear.
-
-                - What you expected to happen.
-
-                - What actually happened.
+          database.
 
         * A fix or workaround, if you have one.
 
-- 
cgit v1.2.1


From fd2a9392411147654dad43f93741a1aa634187b5 Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Tue, 14 Sep 2010 08:36:55 -0700
Subject: datapath: Increase default MTU on patch ports

The default MTU on patch ports was 1500, which would cause jumbo frames
to get dropped between the ends of the patch.  It also dropped the MTU
of attached bridges to no more that 1500 bytes.  This patch increases
the default MTU to 65535.  Long term, we should eliminate MTU on patch
ports entirely.

Signed-off-by: Justin Pettit <jpettit@nicira.com>
---
 datapath/vport-patch.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/datapath/vport-patch.c b/datapath/vport-patch.c
index d55a1bf2d..62fd71f79 100644
--- a/datapath/vport-patch.c
+++ b/datapath/vport-patch.c
@@ -136,7 +136,10 @@ static struct vport *patch_create(const char *name, const void __user *config)
 	}
 
 	vport_gen_rand_ether_addr(patch_vport->devconf->eth_addr);
-	patch_vport->devconf->mtu = ETH_DATA_LEN;
+
+    /* Make the default MTU fairly large so that it doesn't become the
+     * bottleneck on systems using jumbo frames. */
+	patch_vport->devconf->mtu = 65535;
 
 	return vport;
 
-- 
cgit v1.2.1


From 722d19c504351a3e1a6f64e5a01ff9806eb089a4 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Tue, 14 Sep 2010 13:32:36 -0700
Subject: datapath: Increase maximum number of actions per flow.

Until now the number of actions in a flow has been limited to what fits in
a page.  Each action is 8 bytes, and on 32-bit architectures there is a
12-byte header, so with 4-kB pages that limits flows to 510 actions.  We
and Citrix have noticed that OVS stops working properly after about 509
VIFs are added to a bridge.  According to log messages this is the reason:
at this point it is no longer possible to flood a packet to all ports.

This commit should help, by increasing the maximum number of actions in a
flow.  In the long term, though, we should adopt use of port groups or
otherwise reduce the number of actions needed to flood a packet.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Bug #3573.
NIC-234.
---
 datapath/flow.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/datapath/flow.c b/datapath/flow.c
index dbfe5dd73..7684c061a 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -108,7 +108,10 @@ struct sw_flow_actions *flow_actions_alloc(size_t n_actions)
 {
 	struct sw_flow_actions *sfa;
 
-	if (n_actions > (PAGE_SIZE - sizeof *sfa) / sizeof(union odp_action))
+	/* At least DP_MAX_PORTS actions are required to be able to flood a
+	 * packet to every port.  Factor of 2 allows for setting VLAN tags,
+	 * etc. */
+	if (n_actions > 2 * DP_MAX_PORTS)
 		return ERR_PTR(-EINVAL);
 
 	sfa = kmalloc(sizeof *sfa + n_actions * sizeof(union odp_action),
-- 
cgit v1.2.1


From 60bb134ae3aa7f60d2b20ce007b2a52bd9b14d14 Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Tue, 14 Sep 2010 15:10:46 -0700
Subject: vswitchd: Remove duplicate "external_id" from Interface table

---
 vswitchd/vswitch.ovsschema | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
index c8a4963af..a7d257036 100644
--- a/vswitchd/vswitch.ovsschema
+++ b/vswitchd/vswitch.ovsschema
@@ -139,10 +139,7 @@
          "ephemeral": true},
        "status": {
          "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"},
-         "ephemeral": true},
-       "external_ids": {
-         "type": {"key": "string", "value": "string",
-                  "min": 0, "max": "unlimited"}}}},
+         "ephemeral": true}}},
    "QoS": {
      "columns": {
        "type": {
-- 
cgit v1.2.1


From 24926bc2276a480d5a36667d26020f828a7122c2 Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Tue, 14 Sep 2010 17:57:53 -0700
Subject: ovsdb-tool: Remove reference to non-implemented "extract-schema"
 command

While useful sounding, ovsdb-tool does not actually implement a command
called "extract-schema".
---
 ovsdb/ovsdb-tool.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c
index 0da208590..f4bb701d1 100644
--- a/ovsdb/ovsdb-tool.c
+++ b/ovsdb/ovsdb-tool.c
@@ -110,7 +110,6 @@ usage(void)
            "  create DB SCHEMA   create DB with the given SCHEMA\n"
            "  compact DB [DST]   compact DB in-place (or to DST)\n"
            "  convert DB SCHEMA [DST]   convert DB to SCHEMA (to DST)\n"
-           "  extract-schema DB  print DB's schema on stdout\n"
            "  query DB TRNS      execute read-only transaction on DB\n"
            "  transact DB TRNS   execute read/write transaction on DB\n"
            "  show-log DB        prints information about DB's log entries\n",
-- 
cgit v1.2.1


From 92467099ee05b930b060d34d05b81bb1322e6fbf Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 15 Sep 2010 12:47:10 -0700
Subject: ofp-util: Also accept NXAST_DROP_SPOOFED_ARP.

Reported-by: Bryan Fulton <bryan@nicira.com>
Reported-by: Michael Mao <mmao@nicira.com>
Bug #3655.
---
 AUTHORS        | 1 +
 lib/ofp-util.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/AUTHORS b/AUTHORS
index bf0e34271..9e1dfa0b9 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -33,6 +33,7 @@ The following additional people are mentioned in commit logs as having
 provided helpful bug reports or suggestions.
 
 Brandon Heller          brandonh@stanford.edu
+Bryan Fulton            bryan@nicira.com
 Cedric Hobbs            cedric@nicira.com
 Ghanem Bahri            bahri.ghanem@gmail.com
 Henrik Amren            henrik@nicira.com
diff --git a/lib/ofp-util.c b/lib/ofp-util.c
index 89f368950..5171900a7 100644
--- a/lib/ofp-util.c
+++ b/lib/ofp-util.c
@@ -564,6 +564,7 @@ check_nicira_action(const union ofp_action *a, unsigned int len)
     switch (ntohs(nah->subtype)) {
     case NXAST_RESUBMIT:
     case NXAST_SET_TUNNEL:
+    case NXAST_DROP_SPOOFED_ARP:
         return check_action_exact_len(a, len, 16);
     default:
         return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_BAD_VENDOR_TYPE);
-- 
cgit v1.2.1


From a77d89b84ad05d880f9ad7c5b5bd3f7d221d76f3 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 15 Sep 2010 13:26:08 -0700
Subject: Properly print drop_spoofed_arp actions when decoding OpenFlow and
 ODP.

Also fix formatting of unknown Nicira actions in OpenFlow.
---
 lib/odp-util.c  | 3 +++
 lib/ofp-print.c | 6 +++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/lib/odp-util.c b/lib/odp-util.c
index 442c939a7..798e42540 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -95,6 +95,9 @@ format_odp_action(struct ds *ds, const union odp_action *a)
     case ODPAT_POP_PRIORITY:
         ds_put_cstr(ds, "pop_priority");
         break;
+    case ODPAT_DROP_SPOOFED_ARP:
+        ds_put_cstr(ds, "drop_spoofed_arp");
+        break;
     default:
         ds_put_format(ds, "***bad action 0x%"PRIx16"***", a->type);
         break;
diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index 870487816..78f3649ba 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -200,8 +200,12 @@ ofp_print_nx_action(struct ds *string, const struct nx_action_header *nah)
         break;
     }
 
+    case NXAST_DROP_SPOOFED_ARP:
+        ds_put_cstr(string, "drop_spoofed_arp");
+        break;
+
     default:
-        ds_put_format(string, "***unknown Nicira action:%d***\n",
+        ds_put_format(string, "***unknown Nicira action:%d***",
                       ntohs(nah->subtype));
     }
 }
-- 
cgit v1.2.1


From a154533795474bd8d13a2a935c4b6719215d6907 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 15 Sep 2010 15:21:03 -0700
Subject: ovs-ofctl, ovs-controller: Disable flow idle timeout by default.

Until now, flows set up by ovs-ofctl and by "ovs-controller --with-flows"
by default expired after 60 seconds of inactivity.  This was surprising,
especially in the latter case where one is normally trying to set up
permanent flows.  Even in the former case, however, we can't think of a
good reason that flows added by ovs-ofctl should expire by default.  So
this commit make flows permanent by default.

Reported-by: Michael Mao <mmao@nicira.com>
---
 lib/ofp-parse.c          | 4 +---
 utilities/ovs-ofctl.8.in | 4 ++--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c
index 06d5bd11d..405008c67 100644
--- a/lib/ofp-parse.c
+++ b/lib/ofp-parse.c
@@ -33,8 +33,6 @@
 
 VLOG_DEFINE_THIS_MODULE(ofp_parse)
 
-#define DEFAULT_IDLE_TIMEOUT 60
-
 static uint32_t
 str_to_u32(const char *str)
 {
@@ -402,7 +400,7 @@ parse_ofp_str(char *string, struct ofp_match *match, struct ofpbuf *actions,
         *priority = OFP_DEFAULT_PRIORITY;
     }
     if (idle_timeout) {
-        *idle_timeout = DEFAULT_IDLE_TIMEOUT;
+        *idle_timeout = OFP_FLOW_PERMANENT;
     }
     if (hard_timeout) {
         *hard_timeout = OFP_FLOW_PERMANENT;
diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in
index 7de788e1c..bbe747b21 100644
--- a/utilities/ovs-ofctl.8.in
+++ b/utilities/ovs-ofctl.8.in
@@ -495,8 +495,8 @@ optional fields:
 .TP
 \fBidle_timeout=\fIseconds\fR
 Causes the flow to expire after the given number of seconds of
-inactivity.  A value of 0 prevents a flow from expiring due to
-inactivity.  The default is 60 seconds.
+inactivity.  A value of 0 (the default) prevents a flow from expiring due to
+inactivity.
 .
 .IP \fBhard_timeout=\fIseconds\fR
 Causes the flow to expire after the given number of seconds,
-- 
cgit v1.2.1


From 8ba1fd2fb9eb616ec028027e303c1664185c88e7 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Wed, 15 Sep 2010 16:52:48 -0700
Subject: datapath: Check IS_ERR() in do_execute().

flow_actions_alloc() returns an error code in the form of a pointer
but we checked that the pointer was not NULL, which is always true.
This caused oopses on allocation errors when we would write into
an invalid pointer.

NIC-234

Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
---
 datapath/datapath.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 5ee915740..fe37ec1ed 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -1326,10 +1326,11 @@ static int do_execute(struct datapath *dp, const struct odp_execute *execute)
 	if (execute->length < ETH_HLEN || execute->length > 65535)
 		goto error;
 
-	err = -ENOMEM;
 	actions = flow_actions_alloc(execute->n_actions);
-	if (!actions)
+	if (IS_ERR(actions)) {
+		err = PTR_ERR(actions);
 		goto error;
+	}
 
 	err = -EFAULT;
 	if (copy_from_user(actions->actions, execute->actions,
-- 
cgit v1.2.1


From bbf4f269a391724d886f66b3661b10e5a434e2e8 Mon Sep 17 00:00:00 2001
From: Vivien Bernet-Rollande <vbr@soprive.net>
Date: Thu, 16 Sep 2010 10:56:55 -0700
Subject: brcompat_mod: Check if user has CAP_NET_ADMIN in ioctl handler

This patch checks that the user calling ioctl() to create, delete, or
modify bridges has the CAP_NET_ADMIN capability. This prevents
unpriviledged users from modifying the bridge configuration through
brcompatd. The checks are actually the same performed in
net/bridge/br_ioctl.c by the Linux kernel.

Signed-off-by: Vivien Bernet-Rollande <vbr@soprive.net>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 datapath/brcompat.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/datapath/brcompat.c b/datapath/brcompat.c
index 3e8401154..2113eae0f 100644
--- a/datapath/brcompat.c
+++ b/datapath/brcompat.c
@@ -84,6 +84,9 @@ static int brc_add_del_bridge(char __user *uname, int add)
 	struct sk_buff *request;
 	char name[IFNAMSIZ];
 
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
 	if (copy_from_user(name, uname, IFNAMSIZ))
 		return -EFAULT;
 
@@ -196,6 +199,9 @@ static int brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
 	struct net_device *port;
 	int err;
 
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
 	port = __dev_get_by_index(&init_net, port_ifindex);
 	if (!port)
 		return -EINVAL;
-- 
cgit v1.2.1


From 8e236e71f2e52ee47b2787fa722f2dfb36c2c284 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Thu, 16 Sep 2010 11:02:15 -0700
Subject: AUTHORS: Add Vivien Bernet-Rollande.

---
 AUTHORS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/AUTHORS b/AUTHORS
index 9e1dfa0b9..a5d042fb3 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -26,6 +26,7 @@ Thomas Lacroix          thomas.lacroix@citrix.com
 Todd Deshane            deshantm@gmail.com
 Tom Everman             teverman@google.com
 Tsvi Slonim             tsvi@toroki.com
+Vivien Bernet-Rollande  vbr@soprive.net
 Wei Yongjun             yjwei@cn.fujitsu.com
 Yu Zhiguo               yuzg@cn.fujitsu.com
 
-- 
cgit v1.2.1


From 5c16362b51eed5eb76c7e92629e2e7900b8c2ca9 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 1 Sep 2010 15:12:23 -0700
Subject: ovsdb-doc: Be less explicit in ovs-vswitchd.conf.db(5).

The documentation doesn't really need to say that a field may be "between
0 and 4294967295 characters long".

This regression was introduced by commit 991559357 "Implement initial
Python bindings for Open vSwitch database."
---
 python/ovs/db/types.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/ovs/db/types.py b/python/ovs/db/types.py
index 6e7ef11db..d42ac7fe8 100644
--- a/python/ovs/db/types.py
+++ b/python/ovs/db/types.py
@@ -290,14 +290,14 @@ class BaseType(object):
                 return 'at most %s' % commafy(self.max)
             else:
                 return 'at most %g' % self.max
-        elif self.min_length is not None and self.max_length is not None:
+        elif self.min_length != 0 and self.max_length != sys.maxint:
             if self.min_length == self.max_length:
                 return 'exactly %d characters long' % (self.min_length)
             else:
                 return 'between %d and %d characters long' % (self.min_length, self.max_length)
-        elif self.min_length is not None:
+        elif self.min_length != 0:
             return 'at least %d characters long' % self.min_length
-        elif self.max_length is not None:
+        elif self.max_length != sys.maxint:
             return 'at most %d characters long' % self.max_length
         else:
             return ''
-- 
cgit v1.2.1


From 7ac60147cdba1af0b066e0e3cb3ca83f9bbf4101 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Wed, 15 Sep 2010 01:51:15 -0700
Subject: debian: Init script should put core dumps in an appropriate place

Before this commit the init script did not change the cwd of
openvswitch processes it started.  Thus, core files were created in
root directory.  This patch changes the cwd of openvswitch to
a more reasonable location.
---
 AUTHORS                        |  1 +
 debian/openvswitch-switch.init | 12 ++++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/AUTHORS b/AUTHORS
index a5d042fb3..75a6de5c9 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -6,6 +6,7 @@ Ben Pfaff               blp@nicira.com
 Bryan Phillippe         bp@toroki.com
 Dan Wendlandt           dan@nicira.com
 David Erickson          derickso@stanford.edu
+Ethan Jackson           ethan@nicira.com
 Glen Gibb               grg@stanford.edu
 Ian Campbell            Ian.Campbell@citrix.com
 Jean Tourrilhes         jt@hpl.hp.com
diff --git a/debian/openvswitch-switch.init b/debian/openvswitch-switch.init
index a933a21ae..d86063037 100755
--- a/debian/openvswitch-switch.init
+++ b/debian/openvswitch-switch.init
@@ -230,12 +230,16 @@ case "$1" in
             install -d -m 755 -o root -g root /var/log/openvswitch
         fi
 
+        if [ ! -d /var/log/openvswitch/cores ]; then
+            install -d -m 755 -o root -g root /var/log/openvswitch/cores
+        fi
+
         # Start ovsdb-server.
         set --
         set -- "$@" /etc/openvswitch/conf.db
         set -- "$@" --verbose=ANY:console:emer --verbose=ANY:syslog:err
         set -- "$@" --log-file=/var/log/openvswitch/ovsdb-server.log
-        set -- "$@" --detach --pidfile $monitor_opt
+        set -- "$@" --detach --no-chdir --pidfile $monitor_opt
         set -- "$@" --remote punix:/var/run/openvswitch/db.sock
         set -- "$@" --remote db:Open_vSwitch,managers
         set -- "$@" --private-key=db:SSL,private_key
@@ -245,6 +249,7 @@ case "$1" in
         echo -n "Starting ovsdb-server: "
         start-stop-daemon --start --quiet \
             --pidfile /var/run/openvswitch/ovsdb-server.pid \
+            --chdir /var/log/openvswitch/cores              \
             --exec $ovsdb_server -- "$@"
         if running ovsdb-server; then
             echo "ovsdb-server."
@@ -258,12 +263,13 @@ case "$1" in
         set --
         set -- "$@" --verbose=ANY:console:emer --verbose=ANY:syslog:err
         set -- "$@" --log-file=/var/log/openvswitch/ovs-vswitchd.log
-        set -- "$@" --detach --pidfile $monitor_opt
+        set -- "$@" --detach --no-chdir --pidfile $monitor_opt
         set -- "$@" unix:/var/run/openvswitch/db.sock
         set -- "$@" $OVS_VSWITCHD_OPTS
         echo -n "Starting ovs-vswitchd: "
         start-stop-daemon --start --quiet \
             --pidfile /var/run/openvswitch/ovs-vswitchd.pid \
+            --chdir /var/log/openvswitch/cores              \
             --exec $ovs_vswitchd -- "$@"
         if running ovs-vswitchd; then
             echo "ovs-vswitchd."
@@ -275,12 +281,14 @@ case "$1" in
         echo -n "Stopping ovs-vswitchd: "
         start-stop-daemon --stop --quiet --oknodo --retry 5 \
             --pidfile /var/run/openvswitch/ovs-vswitchd.pid \
+            --chdir /var/log/openvswitch/cores              \
             --exec $ovs_vswitchd
         echo "ovs-vswitchd."
 
         echo -n "Stopping ovsdb-server: "
         start-stop-daemon --stop --quiet --oknodo --retry 5 \
             --pidfile /var/run/openvswitch/ovsdb-server.pid \
+            --chdir /var/log/openvswitch/cores              \
             --exec $ovsdb_server
         echo "ovsdb-server."
         ;;
-- 
cgit v1.2.1


From b828c2f5fa580412f7c3afae03862b9dcce6f576 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Wed, 15 Sep 2010 01:51:40 -0700
Subject: debian: Created a debian equivalent to xen-bugtool

ovs-bugtool creates a tarball of useful information which people
can submit with bug reports.  The source is copied from xen-bugtool
with the xen specific removed or changed.
---
 debian/automake.mk                |    1 +
 debian/control                    |    2 +-
 debian/copyright.in               |   21 +
 debian/openvswitch-common.install |    1 +
 debian/ovs-bugtool                | 1110 +++++++++++++++++++++++++++++++++++++
 5 files changed, 1134 insertions(+), 1 deletion(-)
 create mode 100755 debian/ovs-bugtool

diff --git a/debian/automake.mk b/debian/automake.mk
index ba9ea861d..a5a6e05a3 100644
--- a/debian/automake.mk
+++ b/debian/automake.mk
@@ -38,6 +38,7 @@ EXTRA_DIST += \
 	debian/openvswitch-switch.postinst \
 	debian/openvswitch-switch.postrm \
 	debian/openvswitch-switch.template \
+	debian/ovs-bugtool \
 	debian/rules \
 	debian/rules.modules
 
diff --git a/debian/control b/debian/control
index c07eca08a..edecffda5 100644
--- a/debian/control
+++ b/debian/control
@@ -23,7 +23,7 @@ Description: Source code for Open vSwitch datapath Linux module
 
 Package: openvswitch-common
 Architecture: any
-Depends: ${shlibs:Depends}, openssl, ${misc:Depends}
+Depends: ${shlibs:Depends}, openssl, ${misc:Depends}, python, ethtool
 Description: Open vSwitch common components
  openvswitch-common provides components required by both openvswitch-switch
  and openvswitch-controller.
diff --git a/debian/copyright.in b/debian/copyright.in
index ae1a78868..0cac63471 100644
--- a/debian/copyright.in
+++ b/debian/copyright.in
@@ -33,6 +33,27 @@ License:
   On Debian systems, the complete text of the Apache License version 2.0
   can be found in '/usr/share/common-licenses/Apache-2.0'.
 
+* ovs-bugtool is covered by the following license:
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of version 2.1 of the GNU Lesser General Public
+   License as published by the Free Software Foundation.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+   Copyright (c) 2005, 2007 XenSource Ltd.
+   Copyright (c) 2010, Nicira Networks.
+
+ On Debian systems, the complete text of the GNU Lesser General Public
+ License can be found in `/usr/share/common-licenses/LGPL-2.1'.
+
 * All other components of this package are licensed under
   The Apache License Version 2.0.
 
diff --git a/debian/openvswitch-common.install b/debian/openvswitch-common.install
index fab991666..298f1ad0f 100644
--- a/debian/openvswitch-common.install
+++ b/debian/openvswitch-common.install
@@ -4,4 +4,5 @@ _debian/utilities/ovs-appctl usr/sbin
 _debian/utilities/ovs-ofctl usr/sbin
 _debian/utilities/ovs-parse-leaks usr/bin
 _debian/utilities/ovs-pki usr/sbin
+debian/ovs-bugtool usr/sbin
 vswitchd/vswitch.ovsschema usr/share/openvswitch
diff --git a/debian/ovs-bugtool b/debian/ovs-bugtool
new file mode 100755
index 000000000..f991f27a2
--- /dev/null
+++ b/debian/ovs-bugtool
@@ -0,0 +1,1110 @@
+#!/usr/bin/env python
+
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# Copyright (c) 2005, 2007 XenSource Ltd.
+# Copyright (c) 2010, Nicira Networks.
+
+#
+# To add new entries to the bugtool, you need to:
+#
+# Create a new capability.  These declare the new entry to the GUI, including
+# the expected size, time to collect, privacy implications, and whether the
+# capability should be selected by default.  One capability may refer to
+# multiple files, assuming that they can be reasonably grouped together, and
+# have the same privacy implications.  You need:
+#
+#   A new CAP_ constant.
+#   A cap() invocation to declare the capability.
+#
+# You then need to add calls to main() to collect the files.  These will
+# typically be calls to the helpers file_output(), tree_output(), cmd_output(),
+# or func_output().
+#
+
+import getopt
+import re
+import os
+import StringIO
+import sys
+import tarfile
+import time
+import commands
+import pprint
+from xml.dom.minidom import parse, getDOMImplementation
+import zipfile
+from subprocess import Popen, PIPE
+from select import select
+from signal import SIGTERM, SIGUSR1
+import md5
+import platform
+import fcntl
+import glob
+import urllib
+import socket
+import base64
+
+sys.path.append('/usr/lib/python')
+sys.path.append('/usr/lib64/python')
+
+OS_RELEASE = platform.release()
+
+#
+# Files & directories
+#
+
+BUG_DIR = "/var/log/openvswitch"
+PLUGIN_DIR = "/etc/openvswitch/bugtool"
+GRUB_CONFIG = '/boot/grub/menu.lst'
+BOOT_KERNEL = '/boot/vmlinuz-' + OS_RELEASE
+BOOT_INITRD = '/boot/initrd-' + OS_RELEASE + '.img'
+PROC_PARTITIONS = '/proc/partitions'
+FSTAB = '/etc/fstab'
+PROC_MOUNTS = '/proc/mounts'
+PROC_CPUINFO = '/proc/cpuinfo'
+PROC_MEMINFO = '/proc/meminfo'
+PROC_IOPORTS = '/proc/ioports'
+PROC_INTERRUPTS = '/proc/interrupts'
+PROC_SCSI = '/proc/scsi/scsi'
+PROC_VERSION = '/proc/version'
+PROC_MODULES = '/proc/modules'
+PROC_DEVICES = '/proc/devices'
+PROC_FILESYSTEMS = '/proc/filesystems'
+PROC_CMDLINE = '/proc/cmdline'
+PROC_CONFIG = '/proc/config.gz'
+PROC_USB_DEV = '/proc/bus/usb/devices'
+PROC_NET_SOFTNET_STAT = '/proc/net/softnet_stat'
+MODPROBE_DIR = '/etc/modprobe.d'
+RESOLV_CONF = '/etc/resolv.conf'
+NSSWITCH_CONF = '/etc/nsswitch.conf'
+NTP_CONF = '/etc/ntp.conf'
+HOSTS = '/etc/hosts'
+HOSTS_ALLOW = '/etc/hosts.allow'
+HOSTS_DENY = '/etc/hosts.deny'
+DHCP_LEASE_DIR = '/var/lib/dhcp3'
+OPENVSWITCH_CORE_DIR = '/var/log/openvswitch/cores'
+OPENVSWITCH_DEFAULT_SWITCH = '/etc/default/openvswitch-switch'
+OPENVSWITCH_DEFAULT_CONTROLLER = '/etc/default/openvswitch-controller'
+OPENVSWITCH_CONF_DB = '/etc/openvswitch/conf.db'
+OPENVSWITCH_VSWITCHD_PID = '/var/run/openvswitch/ovs-vswitchd.pid'
+VAR_LOG_DIR = '/var/log/'
+X11_LOGS_DIR = VAR_LOG_DIR
+X11_LOGS_RE = re.compile(r'.*/Xorg\..*$')
+X11_AUTH_DIR = '/root/'
+X11_AUTH_RE = re.compile(r'.*/\.((Xauthority)|(serverauth\.[0-9]*))$')
+PAM_DIR = '/etc/pam.d'
+
+#
+# External programs
+#
+
+ARP = '/usr/sbin/arp'
+CAT = '/bin/cat'
+DF = '/bin/df'
+DMESG = '/bin/dmesg'
+DMIDECODE = '/usr/sbin/dmidecode'
+ETHTOOL = '/sbin/ethtool'
+FDISK = '/sbin/fdisk'
+FIND = '/usr/bin/find'
+IFCONFIG = '/sbin/ifconfig'
+IPTABLES = '/sbin/iptables'
+LOSETUP = '/sbin/losetup'
+LS = '/bin/ls'
+LSPCI = '/usr/bin/lspci'
+MD5SUM = '/usr/bin/md5sum'
+MODINFO = '/sbin/modinfo'
+NETSTAT = '/bin/netstat'
+OVS_DPCTL = '/usr/sbin/ovs-dpctl'
+OVS_OFCTL = '/usr/sbin/ovs-ofctl'
+OVS_VSCTL = '/usr/sbin/ovs-vsctl'
+OVS_APPCTL = '/usr/sbin/ovs-appctl'
+PS = '/bin/ps'
+ROUTE = '/sbin/route'
+SYSCTL = '/sbin/sysctl'
+TC = '/sbin/tc'
+UPTIME = '/usr/bin/uptime'
+ZCAT = '/bin/zcat'
+
+#
+# PII -- Personally identifiable information.  Of particular concern are
+# things that would identify customers, or their network topology.
+# Passwords are never to be included in any bug report, regardless of any PII
+# declaration.
+#
+# NO            -- No PII will be in these entries.
+# YES           -- PII will likely or certainly be in these entries.
+# MAYBE         -- The user may wish to audit these entries for PII.
+# IF_CUSTOMIZED -- If the files are unmodified, then they will contain no PII,
+# but since we encourage customers to edit these files, PII may have been
+# introduced by the customer.  This is used in particular for the networking
+# scripts in dom0.
+#
+
+PII_NO            = 'no'
+PII_YES           = 'yes'
+PII_MAYBE         = 'maybe'
+PII_IF_CUSTOMIZED = 'if_customized'
+KEY      = 0
+PII      = 1
+MIN_SIZE = 2
+MAX_SIZE = 3
+MIN_TIME = 4
+MAX_TIME = 5
+MIME     = 6
+CHECKED  = 7
+HIDDEN   = 8
+
+MIME_DATA = 'application/data'
+MIME_TEXT = 'text/plain'
+
+INVENTORY_XML_ROOT = "system-status-inventory"
+INVENTORY_XML_SUMMARY = 'system-summary'
+INVENTORY_XML_ELEMENT = 'inventory-entry'
+CAP_XML_ROOT = "system-status-capabilities"
+CAP_XML_ELEMENT = 'capability'
+
+
+CAP_BLOBS                = 'blobs'
+CAP_BOOT_LOADER          = 'boot-loader'
+CAP_DISK_INFO            = 'disk-info'
+CAP_FIRSTBOOT            = 'firstboot'
+CAP_HARDWARE_INFO        = 'hardware-info'
+CAP_HIGH_AVAILABILITY    = 'high-availability'
+CAP_HOST_CRASHDUMP_DUMPS = 'host-crashdump-dumps'
+CAP_HOST_CRASHDUMP_LOGS  = 'host-crashdump-logs'
+CAP_KERNEL_INFO          = 'kernel-info'
+CAP_LOSETUP_A            = 'loopback-devices'
+CAP_NETWORK_CONFIG       = 'network-config'
+CAP_NETWORK_STATUS       = 'network-status'
+CAP_OEM                  = 'oem'
+CAP_PAM                  = 'pam'
+CAP_PROCESS_LIST         = 'process-list'
+CAP_PERSISTENT_STATS     = 'persistent-stats'
+CAP_SYSTEM_LOGS          = 'system-logs'
+CAP_SYSTEM_SERVICES      = 'system-services'
+CAP_VNCTERM              = 'vncterm'
+CAP_WLB                  = 'wlb'
+CAP_X11_LOGS             = 'X11'
+CAP_X11_AUTH             = 'X11-auth'
+
+KB = 1024
+MB = 1024 * 1024
+
+caps = {}
+cap_sizes = {}
+unlimited_data = False
+dbg = False
+
+def cap(key, pii=PII_MAYBE, min_size=-1, max_size=-1, min_time=-1,
+        max_time=-1, mime=MIME_TEXT, checked=True, hidden=False):
+    caps[key] = (key, pii, min_size, max_size, min_time, max_time, mime,
+                 checked, hidden)
+    cap_sizes[key] = 0
+
+
+cap(CAP_BLOBS,               PII_NO,                    max_size=5*MB)
+cap(CAP_BOOT_LOADER,         PII_NO,                    max_size=3*KB,
+    max_time=5)
+cap(CAP_DISK_INFO,           PII_MAYBE,                 max_size=25*KB,
+    max_time=20)
+cap(CAP_FIRSTBOOT,           PII_YES,   min_size=60*KB, max_size=80*KB)
+cap(CAP_HARDWARE_INFO,       PII_MAYBE,                 max_size=30*KB,
+    max_time=20)
+cap(CAP_HIGH_AVAILABILITY,   PII_MAYBE,                 max_size=5*MB)
+cap(CAP_HOST_CRASHDUMP_DUMPS,PII_YES, checked = False)
+cap(CAP_HOST_CRASHDUMP_LOGS, PII_NO)
+cap(CAP_KERNEL_INFO,         PII_MAYBE,                 max_size=120*KB,
+    max_time=5)
+cap(CAP_LOSETUP_A,           PII_MAYBE,                 max_size=KB, max_time=5)
+cap(CAP_NETWORK_CONFIG,      PII_IF_CUSTOMIZED,
+                                        min_size=0,     max_size=20*KB)
+cap(CAP_NETWORK_STATUS,      PII_YES,                   max_size=19*KB,
+    max_time=30)
+cap(CAP_PAM,                 PII_NO,                    max_size=30*KB)
+cap(CAP_PERSISTENT_STATS,    PII_MAYBE,                 max_size=50*MB,
+    max_time=60)
+cap(CAP_PROCESS_LIST,        PII_YES,                   max_size=30*KB,
+    max_time=20)
+cap(CAP_SYSTEM_LOGS,         PII_MAYBE,                 max_size=50*MB,
+    max_time=5)
+cap(CAP_SYSTEM_SERVICES,     PII_NO,                    max_size=5*KB,
+    max_time=20)
+cap(CAP_VNCTERM,             PII_MAYBE, checked = False)
+cap(CAP_WLB,                 PII_NO,                    max_size=3*MB,
+    max_time=20)
+cap(CAP_X11_LOGS,            PII_NO,                    max_size=100*KB)
+cap(CAP_X11_AUTH,            PII_NO,                    max_size=100*KB)
+
+ANSWER_YES_TO_ALL = False
+SILENT_MODE = False
+entries = None
+data = {}
+dev_null = open('/dev/null', 'r+')
+
+def output(x):
+    global SILENT_MODE
+    if not SILENT_MODE:
+        print x
+
+def output_ts(x):
+    output("[%s]  %s" % (time.strftime("%x %X %Z"), x))
+
+def cmd_output(cap, args, label = None, filter = None):
+    if cap in entries:
+        if not label:
+            if isinstance(args, list):
+                a = [aa for aa in args]
+                a[0] = os.path.basename(a[0])
+                label = ' '.join(a)
+            else:
+                label = args
+        data[label] = {'cap': cap, 'cmd_args': args, 'filter': filter}
+
+def file_output(cap, path_list):
+    if cap in entries:
+        for p in path_list:
+            if os.path.exists(p):
+                if unlimited_data or caps[cap][MAX_SIZE] == -1 or \
+                        cap_sizes[cap] < caps[cap][MAX_SIZE]:
+                    data[p] = {'cap': cap, 'filename': p}
+                    try:
+                        s = os.stat(p)
+                        cap_sizes[cap] += s.st_size
+                    except:
+                        pass
+                else:
+                    output("Omitting %s, size constraint of %s exceeded" % (p, cap))
+
+def tree_output(cap, path, pattern = None, negate = False):
+    if cap in entries:
+        if os.path.exists(path):
+            for f in os.listdir(path):
+                fn = os.path.join(path, f)
+                if os.path.isfile(fn) and matches(fn, pattern, negate):
+                    file_output(cap, [fn])
+                elif os.path.isdir(fn):
+                    tree_output(cap, fn, pattern, negate)
+
+def func_output(cap, label, func):
+    if cap in entries:
+        t = str(func).split()
+        data[label] = {'cap': cap, 'func': func}
+
+def collect_data():
+    process_lists = {}
+
+    for (k, v) in data.items():
+        cap = v['cap']
+        if v.has_key('cmd_args'):
+            v['output'] = StringIOmtime()
+            if not process_lists.has_key(cap):
+                process_lists[cap] = []
+            process_lists[cap].append(ProcOutput(v['cmd_args'], caps[cap][MAX_TIME], v['output'], v['filter']))
+        elif v.has_key('filename') and v['filename'].startswith('/proc/'):
+            # proc files must be read into memory
+            try:
+                f = open(v['filename'], 'r')
+                s = f.read()
+                f.close()
+                if unlimited_data or caps[cap][MAX_SIZE] == -1 or \
+                        cap_sizes[cap] < caps[cap][MAX_SIZE]:
+                    v['output'] = StringIOmtime(s)
+                    cap_sizes[cap] += len(s)
+                else:
+                    output("Omitting %s, size constraint of %s exceeded" % (v['filename'], cap))
+            except:
+                pass
+        elif v.has_key('func'):
+            try:
+                s = v['func'](cap)
+            except Exception, e:
+                s = str(e)
+            if unlimited_data or caps[cap][MAX_SIZE] == -1 or \
+                    cap_sizes[cap] < caps[cap][MAX_SIZE]:
+                v['output'] = StringIOmtime(s)
+                cap_sizes[cap] += len(s)
+            else:
+                output("Omitting %s, size constraint of %s exceeded" % (k, cap))
+
+    run_procs(process_lists.values())
+
+
+def main(argv = None):
+    global ANSWER_YES_TO_ALL, SILENT_MODE
+    global entries, data, dbg
+
+    # we need access to privileged files, exit if we are not running as root
+    if os.getuid() != 0:
+        print >>sys.stderr, "Error: ovs-bugtool must be run as root"
+        return 1
+
+    output_type = 'tar.bz2'
+    output_fd = -1
+
+    if argv is None:
+        argv = sys.argv
+
+    try:
+        (options, params) = getopt.gnu_getopt(
+            argv, 'sy', ['capabilities', 'silent', 'yestoall', 'entries=',
+                         'output=', 'outfd=', 'all', 'unlimited', 'debug'])
+    except getopt.GetoptError, opterr:
+        print >>sys.stderr, opterr
+        return 2
+
+    try:
+        load_plugins(True)
+    except:
+        pass
+
+    entries = [e for e in caps.keys() if caps[e][CHECKED]]
+
+    for (k, v) in options:
+        if k == '--capabilities':
+            update_capabilities()
+            print_capabilities()
+            return 0
+
+        if k == '--output':
+            if  v in ['tar', 'tar.bz2', 'zip']:
+                output_type = v
+            else:
+                print >>sys.stderr, "Invalid output format '%s'" % v
+                return 2
+
+        # "-s" or "--silent" means suppress output (except for the final
+        # output filename at the end)
+        if k in ['-s', '--silent']:
+            SILENT_MODE = True
+
+        if k == '--entries' and v != '':
+            entries = v.split(',')
+
+        # If the user runs the script with "-y" or "--yestoall" we don't ask
+        # all the really annoying questions.
+        if k in ['-y', '--yestoall']:
+            ANSWER_YES_TO_ALL = True
+
+        if k == '--outfd':
+            output_fd = int(v)
+            try:
+                old = fcntl.fcntl(output_fd, fcntl.F_GETFD)
+                fcntl.fcntl(output_fd, fcntl.F_SETFD, old | fcntl.FD_CLOEXEC)
+            except:
+                print >>sys.stderr, "Invalid output file descriptor", output_fd
+                return 2
+
+        elif k == '--all':
+            entries = caps.keys()
+        elif k == '--unlimited':
+            unlimited_data = True
+        elif k == '--debug':
+            dbg = True
+            ProcOutput.debug = True
+
+    if len(params) != 1:
+        print >>sys.stderr, "Invalid additional arguments", str(params)
+        return 2
+
+    if output_fd != -1 and output_type != 'tar':
+        print >>sys.stderr, "Option '--outfd' only valid with '--output=tar'"
+        return 2
+
+    if ANSWER_YES_TO_ALL:
+        output("Warning: '--yestoall' argument provided, will not prompt for individual files.")
+
+    output('''
+This application will collate dmesg output, details of the
+hardware configuration of your machine, information about the build of
+openvswitch that you are using, plus, if you allow it, various logs.
+
+The collated information will be saved as a .%s for archiving or
+sending to a Technical Support Representative.
+
+The logs may contain private information, and if you are at all
+worried about that, you should exit now, or you should explicitly
+exclude those logs from the archive.
+
+''' % output_type)
+
+    # assemble potential data
+
+    file_output(CAP_BOOT_LOADER, [GRUB_CONFIG])
+    cmd_output(CAP_BOOT_LOADER, [LS, '-lR', '/boot'])
+    cmd_output(CAP_BOOT_LOADER, [MD5SUM, BOOT_KERNEL, BOOT_INITRD], label='vmlinuz-initrd.md5sum')
+
+    cmd_output(CAP_DISK_INFO, [FDISK, '-l'])
+    file_output(CAP_DISK_INFO, [PROC_PARTITIONS, PROC_MOUNTS])
+    file_output(CAP_DISK_INFO, [FSTAB])
+    cmd_output(CAP_DISK_INFO, [DF, '-alT'])
+    cmd_output(CAP_DISK_INFO, [DF, '-alTi'])
+    cmd_output(CAP_DISK_INFO, [LS, '-R', '/sys/class/scsi_host'])
+    cmd_output(CAP_DISK_INFO, [LS, '-R', '/sys/class/scsi_disk'])
+    cmd_output(CAP_DISK_INFO, [LS, '-R', '/sys/class/fc_transport'])
+    func_output(CAP_DISK_INFO, 'scsi-hosts', dump_scsi_hosts)
+
+
+    file_output(CAP_HARDWARE_INFO, [PROC_CPUINFO, PROC_MEMINFO, PROC_IOPORTS, PROC_INTERRUPTS])
+    cmd_output(CAP_HARDWARE_INFO, [DMIDECODE])
+    cmd_output(CAP_HARDWARE_INFO, [LSPCI, '-n'])
+    cmd_output(CAP_HARDWARE_INFO, [LSPCI, '-vv'])
+    file_output(CAP_HARDWARE_INFO, [PROC_USB_DEV, PROC_SCSI])
+    cmd_output(CAP_HARDWARE_INFO, [LS, '-lR', '/dev'])
+
+    file_output(CAP_KERNEL_INFO, [PROC_VERSION, PROC_MODULES, PROC_DEVICES,
+                                  PROC_FILESYSTEMS, PROC_CMDLINE])
+    cmd_output(CAP_KERNEL_INFO, [ZCAT, PROC_CONFIG], label='config')
+    cmd_output(CAP_KERNEL_INFO, [SYSCTL, '-A'])
+    tree_output(CAP_KERNEL_INFO, MODPROBE_DIR)
+    func_output(CAP_KERNEL_INFO, 'modinfo', module_info)
+
+    cmd_output(CAP_LOSETUP_A, [LOSETUP, '-a'])
+
+    file_output(CAP_NETWORK_CONFIG, [RESOLV_CONF, NSSWITCH_CONF, HOSTS])
+    file_output(CAP_NETWORK_CONFIG, [NTP_CONF, HOSTS_ALLOW, HOSTS_DENY])
+    file_output(CAP_NETWORK_CONFIG, [OPENVSWITCH_DEFAULT_SWITCH,
+        OPENVSWITCH_DEFAULT_CONTROLLER, OPENVSWITCH_CONF_DB])
+
+    cmd_output(CAP_NETWORK_STATUS, [IFCONFIG, '-a'])
+    cmd_output(CAP_NETWORK_STATUS, [ROUTE, '-n'])
+    cmd_output(CAP_NETWORK_STATUS, [ARP, '-n'])
+    cmd_output(CAP_NETWORK_STATUS, [NETSTAT, '-an'])
+    tree_output(CAP_NETWORK_STATUS, DHCP_LEASE_DIR)
+    cmd_output(CAP_NETWORK_STATUS, [IPTABLES, '-nL'])
+    for p in os.listdir('/sys/class/net/'):
+        try:
+            f = open('/sys/class/net/%s/type' % p, 'r')
+            t = f.readline()
+            f.close()
+            if int(t) == 1:
+                # ARPHRD_ETHER
+                cmd_output(CAP_NETWORK_STATUS, [ETHTOOL, p])
+                cmd_output(CAP_NETWORK_STATUS, [ETHTOOL, '-S', p])
+                cmd_output(CAP_NETWORK_STATUS, [ETHTOOL, '-k', p])
+                cmd_output(CAP_NETWORK_STATUS, [ETHTOOL, '-i', p])
+                cmd_output(CAP_NETWORK_STATUS, [ETHTOOL, '-c', p])
+        except:
+            pass
+    cmd_output(CAP_NETWORK_STATUS, [TC, '-s', 'qdisc'])
+    file_output(CAP_NETWORK_STATUS, [PROC_NET_SOFTNET_STAT])
+    tree_output(CAP_NETWORK_STATUS, OPENVSWITCH_CORE_DIR)
+    if os.path.exists(OPENVSWITCH_VSWITCHD_PID):
+        cmd_output(CAP_NETWORK_STATUS, [OVS_DPCTL, 'show'])
+        for d in dp_list():
+            cmd_output(CAP_NETWORK_STATUS, [OVS_OFCTL, 'show', d])
+            cmd_output(CAP_NETWORK_STATUS, [OVS_OFCTL, 'status', d])
+            cmd_output(CAP_NETWORK_STATUS, [OVS_OFCTL, 'dump-flows', d])
+            cmd_output(CAP_NETWORK_STATUS, [OVS_DPCTL, 'dump-flows', d])
+        try:
+            vspidfile = open(OPENVSWITCH_VSWITCHD_PID)
+            vspid = int(vspidfile.readline().strip())
+            vspidfile.close()
+            for b in bond_list(vspid):
+                cmd_output(CAP_NETWORK_STATUS,
+                           [OVS_APPCTL, '-t', '/var/run/ovs-vswitchd.%s.ctl' % vspid, '-e' 'bond/show %s' % b],
+                           'ovs-appctl-bond-show-%s.out' % b)
+        except e:
+            pass
+
+    tree_output(CAP_PAM, PAM_DIR)
+
+    cmd_output(CAP_PROCESS_LIST, [PS, 'wwwaxf', '-eo', 'pid,tty,stat,time,nice,psr,pcpu,pmem,nwchan,wchan:25,args'], label='process-tree')
+    func_output(CAP_PROCESS_LIST, 'fd_usage', fd_usage)
+
+    file_output(CAP_SYSTEM_LOGS,
+         [ VAR_LOG_DIR + x for x in
+           [ 'kern.log', 'daemon.log', 'user.log', 'syslog', 'messages',
+             'debug', 'dmesg', 'boot'] +
+           [ f % n for n in range(1, 20) \
+                 for f in ['kern.log.%d', 'kern.log.%d.gz',
+                           'daemon.log.%d', 'daemon.log.%d.gz',
+                           'user.log.%d', 'user.log.%d.gz',
+                           'messages.%d', 'messages.%d.gz']]])
+    if not os.path.exists('/var/log/dmesg') and not os.path.exists('/var/log/boot'):
+        cmd_output(CAP_SYSTEM_LOGS, [DMESG])
+
+
+    tree_output(CAP_X11_LOGS, X11_LOGS_DIR, X11_LOGS_RE)
+    tree_output(CAP_X11_AUTH, X11_AUTH_DIR, X11_AUTH_RE)
+
+
+    try:
+        load_plugins()
+    except:
+        pass
+
+    # permit the user to filter out data
+    for k in sorted(data.keys()):
+        if not ANSWER_YES_TO_ALL and not yes("Include '%s'? [Y/n]: " % k):
+            del data[k]
+
+    # collect selected data now
+    output_ts('Running commands to collect data')
+    collect_data()
+
+    subdir = "bug-report-%s" % time.strftime("%Y%m%d%H%M%S")
+
+    # include inventory
+    data['inventory.xml'] = {'cap': None, 'output': StringIOmtime(make_inventory(data, subdir))}
+
+    # create archive
+    if output_fd == -1 and not os.path.exists(BUG_DIR):
+        try:
+            os.makedirs(BUG_DIR)
+        except:
+            pass
+
+    if output_fd == -1:
+        output_ts('Creating output file')
+
+    if output_type.startswith('tar'):
+        make_tar(subdir, output_type, output_fd)
+    else:
+        make_zip(subdir)
+
+    clean_tapdisk_logs()
+
+    if dbg:
+        print >>sys.stderr, "Category sizes (max, actual):\n"
+        for c in caps.keys():
+            print >>sys.stderr, "    %s (%d, %d)" % (c, caps[c][MAX_SIZE],
+                                                     cap_sizes[c])
+    return 0
+
+def find_tapdisk_logs():
+    return glob.glob('/var/log/blktap/*.log*')
+
+def generate_tapdisk_logs():
+    for pid in pidof('tapdisk'):
+	try:
+	    os.kill(pid, SIGUSR1)
+            output_ts("Including logs for tapdisk process %d" % pid)
+        except :
+            pass
+    # give processes a second to write their logs
+    time.sleep(1)
+
+def clean_tapdisk_logs():
+    for filename in find_tapdisk_logs():
+        try:
+            os.remove(filename)
+        except :
+            pass
+
+def filter_db_pii(str, state):
+    if 'in_secret_table' not in state:
+        state['in_secret_table'] = False
+
+    if str.startswith('<table ') and 'name="secret"' in str:
+        state['in_secret_table'] = True
+    elif str.startswith('</table>'):
+        state['in_secret_table'] = False
+
+    if state['in_secret_table'] and str.startswith("<row"): # match only on DB rows
+        str = re.sub(r'(value=")[^"]+(")', r'\1REMOVED\2', str)
+    return str
+
+def dump_scsi_hosts(cap):
+    output = ''
+    l = os.listdir('/sys/class/scsi_host')
+    l.sort()
+
+    for h in l:
+        procname = ''
+        try:
+                f = open('/sys/class/scsi_host/%s/proc_name' % h)
+                procname = f.readline().strip("\n")
+                f.close()
+        except:
+                pass
+        modelname = None
+        try:
+                f = open('/sys/class/scsi_host/%s/model_name' % h)
+                modelname = f.readline().strip("\n")
+                f.close()
+        except:
+                pass
+
+        output += "%s:\n" %h
+        output += "    %s%s\n" % (procname, modelname and (" -> %s" % modelname) or '')
+
+    return output
+
+def module_info(cap):
+    output = StringIO.StringIO()
+    modules = open(PROC_MODULES, 'r')
+    procs = []
+
+    for line in modules:
+        module = line.split()[0]
+        procs.append(ProcOutput([MODINFO, module], caps[cap][MAX_TIME], output))
+    modules.close()
+
+    run_procs([procs])
+
+    return output.getvalue()
+
+def dp_list():
+    output = StringIO.StringIO()
+    procs = [ProcOutput([OVS_DPCTL, 'dump-dps'], caps[CAP_NETWORK_STATUS][MAX_TIME], output)]
+
+    run_procs([procs])
+
+    if not procs[0].timed_out:
+        return output.getvalue().splitlines()
+    return []
+
+def bond_list(pid):
+    output = StringIO.StringIO()
+    procs = [ProcOutput([OVS_APPCTL, '-t', '/var/run/ovs-vswitchd.%s.ctl' % pid, '-e' 'bond/list'], caps[CAP_NETWORK_STATUS][MAX_TIME], output)]
+
+    run_procs([procs])
+
+    if not procs[0].timed_out:
+        bonds = output.getvalue().splitlines()[1:]
+        return [x.split('\t')[1] for x in bonds]
+    return []
+
+def fd_usage(cap):
+    output = ''
+    fd_dict = {}
+    for d in [p for p in os.listdir('/proc') if p.isdigit()]:
+        try:
+            fh = open('/proc/'+d+'/cmdline')
+            name = fh.readline()
+            num_fds = len(os.listdir(os.path.join('/proc/'+d+'/fd')))
+            if num_fds > 0:
+                if not num_fds in fd_dict:
+                    fd_dict[num_fds] = []
+                fd_dict[num_fds].append(name.replace('\0', ' ').strip())
+        finally:
+            fh.close()
+    keys = fd_dict.keys()
+    keys.sort(lambda a, b: int(b) - int(a))
+    for k in keys:
+        output += "%s: %s\n" % (k, str(fd_dict[k]))
+    return output
+
+def load_plugins(just_capabilities = False):
+    def getText(nodelist):
+        rc = ""
+        for node in nodelist:
+            if node.nodeType == node.TEXT_NODE:
+                rc += node.data
+        return rc.encode()
+
+    def getBoolAttr(el, attr, default = False):
+        ret = default
+        val = el.getAttribute(attr).lower()
+        if val in ['true', 'false', 'yes', 'no']:
+            ret = val in ['true', 'yes']
+        return ret
+
+    for dir in [d for d in os.listdir(PLUGIN_DIR) if os.path.isdir(os.path.join(PLUGIN_DIR, d))]:
+        if not caps.has_key(dir):
+            if not os.path.exists("%s/%s.xml" % (PLUGIN_DIR, dir)):
+                continue
+            xmldoc = parse("%s/%s.xml" % (PLUGIN_DIR, dir))
+            assert xmldoc.documentElement.tagName == "capability"
+
+            pii, min_size, max_size, min_time, max_time, mime = \
+                 PII_MAYBE, -1,-1,-1,-1, MIME_TEXT
+
+            if xmldoc.documentElement.getAttribute("pii") in [PII_NO, PII_YES, PII_MAYBE, PII_IF_CUSTOMIZED]:
+                pii = xmldoc.documentElement.getAttribute("pii")
+            if xmldoc.documentElement.getAttribute("min_size") != '':
+                min_size = long(xmldoc.documentElement.getAttribute("min_size"))
+            if xmldoc.documentElement.getAttribute("max_size") != '':
+                max_size = long(xmldoc.documentElement.getAttribute("max_size"))
+            if xmldoc.documentElement.getAttribute("min_time") != '':
+                min_time = int(xmldoc.documentElement.getAttribute("min_time"))
+            if xmldoc.documentElement.getAttribute("max_time") != '':
+                max_time = int(xmldoc.documentElement.getAttribute("max_time"))
+            if xmldoc.documentElement.getAttribute("mime") in [MIME_DATA, MIME_TEXT]:
+                mime = xmldoc.documentElement.getAttribute("mime")
+            checked = getBoolAttr(xmldoc.documentElement, 'checked', True)
+            hidden = getBoolAttr(xmldoc.documentElement, 'hidden', False)
+
+            cap(dir, pii, min_size, max_size, min_time, max_time, mime, checked, hidden)
+
+        if just_capabilities:
+            continue
+
+        plugdir = os.path.join(PLUGIN_DIR, dir)
+        for file in [f for f in os.listdir(plugdir) if f.endswith('.xml')]:
+            xmldoc = parse(os.path.join(plugdir, file))
+            assert xmldoc.documentElement.tagName == "collect"
+
+            for el in xmldoc.documentElement.getElementsByTagName("*"):
+                if el.tagName == "files":
+                    file_output(dir, getText(el.childNodes).split())
+                elif el.tagName == "directory":
+                    pattern = el.getAttribute("pattern")
+                    if pattern == '': pattern = None
+                    negate = getBoolAttr(el, 'negate')
+                    tree_output(dir, getText(el.childNodes), pattern and re.compile(pattern) or None, negate)
+                elif el.tagName == "command":
+                    label = el.getAttribute("label")
+                    if label == '': label = None
+                    cmd_output(dir, getText(el.childNodes), label)
+
+def make_tar(subdir, suffix, output_fd):
+    global SILENT_MODE, data
+
+    mode = 'w'
+    if suffix == 'tar.bz2':
+        mode = 'w:bz2'
+    filename = "%s/%s.%s" % (BUG_DIR, subdir, suffix)
+
+    if output_fd == -1:
+        tf = tarfile.open(filename, mode)
+    else:
+        tf = tarfile.open(None, 'w', os.fdopen(output_fd, 'a'))
+
+    try:
+        for (k, v) in data.items():
+            try:
+                tar_filename = os.path.join(subdir, construct_filename(k, v))
+                ti = tarfile.TarInfo(tar_filename)
+
+                ti.uname = 'root'
+                ti.gname = 'root'
+
+                if v.has_key('output'):
+                    ti.mtime = v['output'].mtime
+                    ti.size = len(v['output'].getvalue())
+                    v['output'].seek(0)
+                    tf.addfile(ti, v['output'])
+                elif v.has_key('filename'):
+                    s = os.stat(v['filename'])
+                    ti.mtime = s.st_mtime
+                    ti.size = s.st_size
+                    tf.addfile(ti, file(v['filename']))
+            except:
+                pass
+    finally:
+        tf.close()
+
+    if output_fd == -1:
+        output ('Writing tarball %s successful.' % filename)
+        if SILENT_MODE:
+            print filename
+
+
+def make_zip(subdir):
+    global SILENT_MODE, data
+
+    filename = "%s/%s.zip" % (BUG_DIR, subdir)
+    zf = zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED)
+
+    try:
+        for (k, v) in data.items():
+            try:
+                dest = os.path.join(subdir, construct_filename(k, v))
+
+                if v.has_key('output'):
+                    zf.writestr(dest, v['output'].getvalue())
+                else:
+                    if os.stat(v['filename']).st_size < 50:
+                        compress_type = zipfile.ZIP_STORED
+                    else:
+                        compress_type = zipfile.ZIP_DEFLATED
+                    zf.write(v['filename'], dest, compress_type)
+            except:
+                pass
+    finally:
+        zf.close()
+
+    output ('Writing archive %s successful.' % filename)
+    if SILENT_MODE:
+        print filename
+
+
+def make_inventory(inventory, subdir):
+    document = getDOMImplementation().createDocument(
+        None, INVENTORY_XML_ROOT, None)
+
+    # create summary entry
+    s = document.createElement(INVENTORY_XML_SUMMARY)
+    user = os.getenv('SUDO_USER', os.getenv('USER'))
+    if user:
+        s.setAttribute('user', user)
+    s.setAttribute('date', time.strftime('%c'))
+    s.setAttribute('hostname', platform.node())
+    s.setAttribute('uname', ' '.join(platform.uname()))
+    s.setAttribute('uptime', commands.getoutput(UPTIME))
+    document.getElementsByTagName(INVENTORY_XML_ROOT)[0].appendChild(s)
+
+    map(lambda (k, v): inventory_entry(document, subdir, k, v),
+        inventory.items())
+    return document.toprettyxml()
+
+def inventory_entry(document, subdir, k, v):
+    try:
+        el = document.createElement(INVENTORY_XML_ELEMENT)
+        el.setAttribute('capability', v['cap'])
+        el.setAttribute('filename', os.path.join(subdir, construct_filename(k, v)))
+        el.setAttribute('md5sum', md5sum(v))
+        document.getElementsByTagName(INVENTORY_XML_ROOT)[0].appendChild(el)
+    except:
+        pass
+
+
+def md5sum(d):
+    m = md5.new()
+    if d.has_key('filename'):
+        f = open(d['filename'])
+        data = f.read(1024)
+        while len(data) > 0:
+            m.update(data)
+            data = f.read(1024)
+        f.close()
+    elif d.has_key('output'):
+        m.update(d['output'].getvalue())
+    return m.hexdigest()
+
+
+def construct_filename(k, v):
+    if v.has_key('filename'):
+        if v['filename'][0] == '/':
+            return v['filename'][1:]
+        else:
+            return v['filename']
+    s = k.replace(' ', '-')
+    s = s.replace('--', '-')
+    s = s.replace('/', '%')
+    if s.find('.') == -1:
+        s += '.out'
+
+    return s
+
+def update_capabilities():
+    pass
+
+def update_cap_size(cap, size):
+    update_cap(cap, MIN_SIZE, size)
+    update_cap(cap, MAX_SIZE, size)
+    update_cap(cap, CHECKED, size > 0)
+
+
+def update_cap(cap, k, v):
+    global caps
+    l = list(caps[cap])
+    l[k] = v
+    caps[cap] = tuple(l)
+
+
+def size_of_dir(d, pattern = None, negate = False):
+    if os.path.isdir(d):
+        return size_of_all([os.path.join(d, fn) for fn in os.listdir(d)],
+                           pattern, negate)
+    else:
+        return 0
+
+
+def size_of_all(files, pattern = None, negate = False):
+    return sum([size_of(f, pattern, negate) for f in files])
+
+
+def matches(f, pattern, negate):
+    if negate:
+        return not matches(f, pattern, False)
+    else:
+        return pattern is None or pattern.match(f)
+
+
+def size_of(f, pattern, negate):
+    if os.path.isfile(f) and matches(f, pattern, negate):
+        return os.stat(f)[6]
+    else:
+        return size_of_dir(f, pattern, negate)
+
+
+def print_capabilities():
+    document = getDOMImplementation().createDocument(
+        "ns", CAP_XML_ROOT, None)
+    map(lambda key: capability(document, key), [k for k in caps.keys() if not caps[k][HIDDEN]])
+    print document.toprettyxml()
+
+def capability(document, key):
+    c = caps[key]
+    el = document.createElement(CAP_XML_ELEMENT)
+    el.setAttribute('key', c[KEY])
+    el.setAttribute('pii', c[PII])
+    el.setAttribute('min-size', str(c[MIN_SIZE]))
+    el.setAttribute('max-size', str(c[MAX_SIZE]))
+    el.setAttribute('min-time', str(c[MIN_TIME]))
+    el.setAttribute('max-time', str(c[MAX_TIME]))
+    el.setAttribute('content-type', c[MIME])
+    el.setAttribute('default-checked', c[CHECKED] and 'yes' or 'no')
+    document.getElementsByTagName(CAP_XML_ROOT)[0].appendChild(el)
+
+
+def prettyDict(d):
+    format = '%%-%ds: %%s' % max(map(len, [k for k, _ in d.items()]))
+    return '\n'.join([format % i for i in d.items()]) + '\n'
+
+
+def yes(prompt):
+    yn = raw_input(prompt)
+
+    return len(yn) == 0 or yn.lower()[0] == 'y'
+
+
+partition_re = re.compile(r'(.*[0-9]+$)|(^xvd)')
+
+def disk_list():
+    disks = []
+    try:
+        f = open('/proc/partitions')
+        f.readline()
+        f.readline()
+        for line in f.readlines():
+            (major, minor, blocks, name) = line.split()
+            if int(major) < 254 and not partition_re.match(name):
+                disks.append(name)
+        f.close()
+    except:
+        pass
+    return disks
+
+
+class ProcOutput:
+    debug = False
+
+    def __init__(self, command, max_time, inst=None, filter=None):
+        self.command = command
+        self.max_time = max_time
+        self.inst = inst
+        self.running = False
+        self.status = None
+        self.timed_out = False
+        self.failed = False
+        self.timeout = int(time.time()) + self.max_time
+        self.filter = filter
+        self.filter_state = {}
+
+    def __del__(self):
+        self.terminate()
+
+    def cmdAsStr(self):
+        return isinstance(self.command, list) and ' '.join(self.command) or self.command
+
+    def run(self):
+        self.timed_out = False
+        try:
+            if ProcOutput.debug:
+                output_ts("Starting '%s'" % self.cmdAsStr())
+            self.proc = Popen(self.command, bufsize=1, stdin=dev_null, stdout=PIPE, stderr=dev_null, shell=isinstance(self.command, str))
+            old = fcntl.fcntl(self.proc.stdout.fileno(), fcntl.F_GETFD)
+            fcntl.fcntl(self.proc.stdout.fileno(), fcntl.F_SETFD, old | fcntl.FD_CLOEXEC)
+            self.running = True
+            self.failed = False
+        except:
+            output_ts("'%s' failed" % self.cmdAsStr())
+            self.running = False
+            self.failed = True
+
+    def terminate(self):
+        if self.running:
+            try:
+                os.kill(self.proc.pid, SIGTERM)
+            except:
+                pass
+            self.proc = None
+            self.running = False
+            self.status = SIGTERM
+
+    def read_line(self):
+        assert self.running
+        line = self.proc.stdout.readline()
+        if line == '':
+            # process exited
+            self.status = self.proc.wait()
+            self.proc = None
+            self.running = False
+        else:
+            if self.filter:
+                line = self.filter(line, self.filter_state)
+            if self.inst:
+                self.inst.write(line)
+
+def run_procs(procs):
+    while True:
+        pipes = []
+        active_procs = []
+
+        for pp in procs:
+            for p in pp:
+                if p.running:
+                    active_procs.append(p)
+                    pipes.append(p.proc.stdout)
+                    break
+                elif p.status == None and not p.failed and not p.timed_out:
+                    p.run()
+                    if p.running:
+                        active_procs.append(p)
+                        pipes.append(p.proc.stdout)
+                        break
+
+        if len(pipes) == 0:
+            # all finished
+            break
+
+        (i, o, x) = select(pipes, [], [], 1.0)
+        now = int(time.time())
+
+        # handle process output
+        for p in active_procs:
+            if p.proc.stdout in i:
+                p.read_line()
+
+            # handle timeout
+            if p.running and now > p.timeout:
+                output_ts("'%s' timed out" % p.cmdAsStr())
+                if p.inst:
+                    p.inst.write("\n** timeout **\n")
+                p.timed_out = True
+                p.terminate()
+
+
+def pidof(name):
+    pids = []
+
+    for d in [p for p in os.listdir('/proc') if p.isdigit()]:
+        try:
+            if os.path.basename(os.readlink('/proc/%s/exe' % d)) == name:
+                pids.append(int(d))
+        except:
+            pass
+
+    return pids
+
+
+class StringIOmtime(StringIO.StringIO):
+    def __init__(self, buf = ''):
+        StringIO.StringIO.__init__(self, buf)
+        self.mtime = time.time()
+
+    def write(self, s):
+        StringIO.StringIO.write(self, s)
+        self.mtime = time.time()
+
+
+if __name__ == "__main__":
+    try:
+        sys.exit(main())
+    except KeyboardInterrupt:
+        print "\nInterrupted."
+        sys.exit(3)
-- 
cgit v1.2.1


From 3273ae634ab473ddd6f8bedea4550e25f8718e77 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 17 Sep 2010 11:23:19 -0700
Subject: datapath: Remove backported random32().

Nothing uses it anymore and it causes problems when backported on
some distributions.  Kernels we support have net_random(), which
is the same thing so there is no reason to have an entire copy of
the random number generator in our source tree.

Reported-by: Alexey I. Froloff <raorn@altlinux.org>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
---
 datapath/datapath.c                                |   1 -
 datapath/linux-2.6/Modules.mk                      |   2 -
 .../linux-2.6/compat-2.6/include/linux/random.h    |  17 ---
 datapath/linux-2.6/compat-2.6/random32.c           | 144 ---------------------
 4 files changed, 164 deletions(-)
 delete mode 100644 datapath/linux-2.6/compat-2.6/include/linux/random.h
 delete mode 100644 datapath/linux-2.6/compat-2.6/random32.c

diff --git a/datapath/datapath.c b/datapath/datapath.c
index fe37ec1ed..b3f77b36a 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -29,7 +29,6 @@
 #include <linux/udp.h>
 #include <linux/version.h>
 #include <linux/ethtool.h>
-#include <linux/random.h>
 #include <linux/wait.h>
 #include <asm/system.h>
 #include <asm/div64.h>
diff --git a/datapath/linux-2.6/Modules.mk b/datapath/linux-2.6/Modules.mk
index 7f4cae6ea..5a0e9ad47 100644
--- a/datapath/linux-2.6/Modules.mk
+++ b/datapath/linux-2.6/Modules.mk
@@ -4,7 +4,6 @@ openvswitch_sources += \
 	linux-2.6/compat-2.6/genetlink-openvswitch.c \
 	linux-2.6/compat-2.6/ip_output-openvswitch.c \
 	linux-2.6/compat-2.6/kmemdup.c \
-	linux-2.6/compat-2.6/random32.c \
 	linux-2.6/compat-2.6/skbuff-openvswitch.c \
 	linux-2.6/compat-2.6/time.c
 openvswitch_headers += \
@@ -32,7 +31,6 @@ openvswitch_headers += \
 	linux-2.6/compat-2.6/include/linux/netfilter_bridge.h \
 	linux-2.6/compat-2.6/include/linux/netfilter_ipv4.h \
 	linux-2.6/compat-2.6/include/linux/netlink.h \
-	linux-2.6/compat-2.6/include/linux/random.h \
 	linux-2.6/compat-2.6/include/linux/rculist.h \
 	linux-2.6/compat-2.6/include/linux/rtnetlink.h \
 	linux-2.6/compat-2.6/include/linux/skbuff.h \
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/random.h b/datapath/linux-2.6/compat-2.6/include/linux/random.h
deleted file mode 100644
index 4e4932c9c..000000000
--- a/datapath/linux-2.6/compat-2.6/include/linux/random.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef __LINUX_RANDOM_WRAPPER_H
-#define __LINUX_RANDOM_WRAPPER_H 1
-
-#include_next <linux/random.h>
-
-#include <linux/version.h>
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
-
-#ifdef __KERNEL__
-u32 random32(void);
-void srandom32(u32 seed);
-#endif /* __KERNEL__ */
-
-#endif /* linux kernel < 2.6.19 */
-
-
-#endif
diff --git a/datapath/linux-2.6/compat-2.6/random32.c b/datapath/linux-2.6/compat-2.6/random32.c
deleted file mode 100644
index b0dd2a32b..000000000
--- a/datapath/linux-2.6/compat-2.6/random32.c
+++ /dev/null
@@ -1,144 +0,0 @@
-#include <linux/version.h>
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
-
-/*
-  This is a maximally equidistributed combined Tausworthe generator
-  based on code from GNU Scientific Library 1.5 (30 Jun 2004)
-
-   x_n = (s1_n ^ s2_n ^ s3_n)
-
-   s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19))
-   s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25))
-   s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11))
-
-   The period of this generator is about 2^88.
-
-   From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe
-   Generators", Mathematics of Computation, 65, 213 (1996), 203--213.
-
-   This is available on the net from L'Ecuyer's home page,
-
-   http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
-   ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps
-
-   There is an erratum in the paper "Tables of Maximally
-   Equidistributed Combined LFSR Generators", Mathematics of
-   Computation, 68, 225 (1999), 261--269:
-   http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps
-
-        ... the k_j most significant bits of z_j must be non-
-        zero, for each j. (Note: this restriction also applies to the
-        computer code given in [4], but was mistakenly not mentioned in
-        that paper.)
-
-   This affects the seeding procedure by imposing the requirement
-   s1 > 1, s2 > 7, s3 > 15.
-
-*/
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/jiffies.h>
-#include <linux/random.h>
-#include <linux/smp.h>
-
-#include "compat26.h"
-
-struct rnd_state {
-	u32 s1, s2, s3;
-};
-
-static struct rnd_state net_rand_state[NR_CPUS];
-
-static u32 __random32(struct rnd_state *state)
-{
-#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
-
-	state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12);
-	state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4);
-	state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17);
-
-	return (state->s1 ^ state->s2 ^ state->s3);
-}
-
-static void __set_random32(struct rnd_state *state, unsigned long s)
-{
-	if (s == 0)
-		s = 1;      /* default seed is 1 */
-
-#define LCG(n) (69069 * n)
-	state->s1 = LCG(s);
-	state->s2 = LCG(state->s1);
-	state->s3 = LCG(state->s2);
-
-	/* "warm it up" */
-	__random32(state);
-	__random32(state);
-	__random32(state);
-	__random32(state);
-	__random32(state);
-	__random32(state);
-}
-
-/**
- *	random32 - pseudo random number generator
- *
- *	A 32 bit pseudo-random number is generated using a fast
- *	algorithm suitable for simulation. This algorithm is NOT
- *	considered safe for cryptographic use.
- */
-u32 random32(void)
-{
-	return __random32(&net_rand_state[smp_processor_id()]);
-}
-
-/**
- *	srandom32 - add entropy to pseudo random number generator
- *	@seed: seed value
- *
- *	Add some additional seeding to the random32() pool.
- *	Note: this pool is per cpu so it only affects current CPU.
- */
-void srandom32(u32 entropy)
-{
-	struct rnd_state *state = &net_rand_state[smp_processor_id()];
-	__set_random32(state, state->s1 ^ entropy);
-}
-
-static int __init random32_reseed(void);
-
-/*
- *	Generate some initially weak seeding values to allow
- *	to start the random32() engine.
- */
-int __init random32_init(void)
-{
-	int i;
-
-	for (i = 0; i < NR_CPUS; i++) {
-		struct rnd_state *state = &net_rand_state[i];
-		__set_random32(state, i + jiffies);
-	}
-	random32_reseed();
-	return 0;
-}
-
-/*
- *	Generate better values after random number generator
- *	is fully initalized.
- */
-static int __init random32_reseed(void)
-{
-	int i;
-	unsigned long seed;
-
-	for (i = 0; i < NR_CPUS; i++) {
-		struct rnd_state *state = &net_rand_state[i];
-
-		get_random_bytes(&seed, sizeof(seed));
-		__set_random32(state, seed);
-	}
-	return 0;
-}
-
-#endif /* kernel < 2.6.19 */
-- 
cgit v1.2.1


From 4fefc29e00f032fb4187d4af0e05cdd328e76dde Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 17 Sep 2010 11:47:49 -0700
Subject: AUTHORS: Add Alexey I. Froloff as reporter.

---
 AUTHORS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/AUTHORS b/AUTHORS
index 75a6de5c9..8d2aed834 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -34,6 +34,7 @@ Yu Zhiguo               yuzg@cn.fujitsu.com
 The following additional people are mentioned in commit logs as having
 provided helpful bug reports or suggestions.
 
+Alexey I. Froloff       raorn@altlinux.org
 Brandon Heller          brandonh@stanford.edu
 Bryan Fulton            bryan@nicira.com
 Cedric Hobbs            cedric@nicira.com
-- 
cgit v1.2.1


From 6b7b9d34c0eab9871cf6284c41108a84129817f3 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Fri, 17 Sep 2010 14:37:51 -0700
Subject: ovs-vsctl: Remove default timeout.

On overloaded XenServers the current default timeout of 5 seconds can
occasionally be reached, which causes VM startup to fail.  This commit
fixes the problem by removing the default timeout and changing each
invocation of ovs-vsctl within the tree to specify its own timeout,
if appropriate.

Bug #3573.
---
 debian/openvswitch-switch.init                               |  2 +-
 utilities/ovs-vsctl.8.in                                     | 11 ++++++-----
 utilities/ovs-vsctl.c                                        |  2 +-
 xenserver/etc_init.d_openvswitch                             |  6 +++---
 xenserver/etc_xapi.d_plugins_openvswitch-cfg-update          |  6 +++---
 xenserver/etc_xensource_scripts_vif                          |  4 ++--
 xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py |  2 +-
 xenserver/usr_sbin_brctl                                     |  2 +-
 xenserver/usr_share_openvswitch_scripts_monitor-external-ids |  2 +-
 9 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/debian/openvswitch-switch.init b/debian/openvswitch-switch.init
index d86063037..34b5604dc 100755
--- a/debian/openvswitch-switch.init
+++ b/debian/openvswitch-switch.init
@@ -257,7 +257,7 @@ case "$1" in
             echo " ERROR."
         fi
 
-        ovs-vsctl --no-wait init
+        ovs-vsctl --no-wait --timeout=5 init
 
         # Start ovs-vswitchd.
         set --
diff --git a/utilities/ovs-vsctl.8.in b/utilities/ovs-vsctl.8.in
index 7476777a8..ef080bb41 100644
--- a/utilities/ovs-vsctl.8.in
+++ b/utilities/ovs-vsctl.8.in
@@ -118,11 +118,12 @@ Prevents \fBovs\-vsctl\fR from actually modifying the database.
 .
 .IP "\fB\-t \fIsecs\fR"
 .IQ "\fB\-\-timeout=\fIsecs\fR"
-Limits runtime to approximately \fIsecs\fR seconds.  A value of 
-zero will cause \fBovs\-vsctl\fR to wait forever.  If the timeout expires, 
-\fBovs\-vsctl\fR will exit with a \fBSIGALRM\fR signal.  If this option is
-not used, \fBovs\-vsctl\fR uses a timeout of five seconds.
-(A timeout would normally happen only if the database cannot be contacted.)
+By default, or with a \fIsecs\fR of \fB0\fR, \fBovs\-vsctl\fR waits
+forever for a response from the database.  This option limits runtime
+to approximately \fIsecs\fR seconds.  If the timeout expires,
+\fBovs\-vsctl\fR will exit with a \fBSIGALRM\fR signal.  (A timeout
+would normally happen only if the database cannot be contacted, or if
+the system is overloaded.)
 .
 .SS "Public Key Infrastructure Options"
 .so lib/ssl.man
diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c
index 4d50194aa..043530280 100644
--- a/utilities/ovs-vsctl.c
+++ b/utilities/ovs-vsctl.c
@@ -85,7 +85,7 @@ static bool dry_run;
 static bool wait_for_reload = true;
 
 /* --timeout: Time to wait for a connection to 'db'. */
-static int timeout = 5;
+static int timeout;
 
 /* All supported commands. */
 static const struct vsctl_command_syntax all_commands[];
diff --git a/xenserver/etc_init.d_openvswitch b/xenserver/etc_init.d_openvswitch
index 050844570..68079fcf5 100755
--- a/xenserver/etc_init.d_openvswitch
+++ b/xenserver/etc_init.d_openvswitch
@@ -293,7 +293,7 @@ EOF
 function set_system_ids {
     if [ -f /etc/xensource-inventory ]; then
         action "Configuring Open vSwitch system IDs" true
-        $vsctl --no-wait set Open_vSwitch . \
+        $vsctl --no-wait --timeout=5 set Open_vSwitch . \
             external-ids:system-type="$PRODUCT_BRAND" \
             external-ids:system-version="$PRODUCT_VERSION-$BUILD_NUMBER" \
             external-ids:system-id="$INSTALLATION_UUID" \
@@ -329,11 +329,11 @@ function start {
     fi
 
     start_ovsdb_server
-    $vsctl --no-wait init
+    $vsctl --no-wait --timeout=5 init
     if [ ! -e /var/run/openvswitch.booted ]; then
         touch /var/run/openvswitch.booted
         for bridge in $($vsctl list-br); do
-            $vsctl --no-wait del-br $bridge
+            $vsctl --no-wait --timeout=5 del-br $bridge
         done
     fi
 
diff --git a/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update b/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update
index 6da86d5cd..733301868 100755
--- a/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update
+++ b/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update
@@ -111,7 +111,7 @@ def setControllerCfg(controller):
                    'managers="ssl:' + controller + ':6632"'])
 
 def vswitchCfgQuery(action_args):
-    cmd = [vsctl, "-vANY:console:emer"] + action_args
+    cmd = [vsctl, "--timeout=5", "-vANY:console:emer"] + action_args
     output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()
     if len(output) == 0 or output[0] == None:
         output = ""
@@ -120,14 +120,14 @@ def vswitchCfgQuery(action_args):
     return output
 
 def vswitchCfgMod(action_args):
-    cmd = [vsctl, "-vANY:console:emer"] + action_args
+    cmd = [vsctl, "--timeout=5", "-vANY:console:emer"] + action_args
     exitcode = subprocess.call(cmd)
     if exitcode != 0:
         raise XenAPIPlugin.Failure("VSWITCH_CONFIG_MOD_FAILURE",
                                    [ str(exitcode) , str(action_args) ])
 
 def emergency_reset(session, args):
-    cmd = [vsctl, "emer-reset"]
+    cmd = [vsctl, "--timeout=5", "emer-reset"]
     exitcode = subprocess.call(cmd)
     if exitcode != 0:
         raise XenAPIPlugin.Failure("VSWITCH_EMER_RESET_FAILURE",
diff --git a/xenserver/etc_xensource_scripts_vif b/xenserver/etc_xensource_scripts_vif
index f27ff5b40..88006e2c4 100755
--- a/xenserver/etc_xensource_scripts_vif
+++ b/xenserver/etc_xensource_scripts_vif
@@ -165,7 +165,7 @@ add_to_bridge()
             local vif_details=$(handle_vswitch_vif_details $bridge)
         fi
 
-        $vsctl -- --if-exists del-port $dev -- add-port $bridge $dev $vif_details
+        $vsctl --timeout=30 -- --if-exists del-port $dev -- add-port $bridge $dev $vif_details
         ;;
     esac
         
@@ -182,7 +182,7 @@ remove_from_bridge()
         # If ovs-brcompatd is running, it might already have deleted the
         # port.  Use --if-exists to suppress the error that would otherwise
         # arise in that case.
-        $vsctl -- --if-exists del-port $dev
+        $vsctl --timeout=30 -- --if-exists del-port $dev
         ;;
     esac
 }
diff --git a/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py b/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py
index 6ee4138b2..d2f6a6a3c 100644
--- a/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py
+++ b/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py
@@ -79,7 +79,7 @@ class VSwitchConfig:
     @staticmethod
     def Get(action):
         try:
-            arg = [vsctl, "-vANY:console:emer"] + action.split()
+            arg = [vsctl, "--timeout=30", "-vANY:console:emer"] + action.split()
             output = ShellPipe(arg).Stdout()
         except StandardError, e:
             XSLogError("config retrieval error: " + str(e))
diff --git a/xenserver/usr_sbin_brctl b/xenserver/usr_sbin_brctl
index 7fecc5648..5cf0b88ac 100755
--- a/xenserver/usr_sbin_brctl
+++ b/xenserver/usr_sbin_brctl
@@ -35,7 +35,7 @@ def delegate():
 
 def call_vsctl(cmd, arg=""):
     database = '--db=' + OVSDB_SERVER
-    command = [VSCTL, database, cmd]
+    command = [VSCTL, '--timeout=30', database, cmd]
     if (arg):
         command.append(arg)
     return subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0].split()
diff --git a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids
index f91801d22..a28ce6090 100755
--- a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids
+++ b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids
@@ -98,7 +98,7 @@ def get_iface_id(if_name, default=None):
 
 def set_external_id(table, record, key, value):
     col = 'external-ids:"' + key + '"="' + value + '"'
-    cmd = [vsctl, "-vANY:console:emer", "set", table, record, col]
+    cmd = [vsctl, "--timeout=30", "-vANY:console:emer", "set", table, record, col]
     exitcode = subprocess.call(cmd)
     if exitcode != 0:
         syslog.syslog(syslog.LOG_WARNING, 
-- 
cgit v1.2.1


From 00456ddda4110f7f75ea9f8a703b31c7f2df7649 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Fri, 17 Sep 2010 08:23:32 -0700
Subject: xenserver: monitor-external-ids remove redundant ovs-vsctl calls

The number of ovs-vsctl calls required to add a new vif in
monitor-external-ids grew linearly with the number of vifs in the
system.  Changed to only do O(1) ovs-vsctl calls per vif addition.
---
 xenserver/usr_share_openvswitch_scripts_monitor-external-ids | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids
index a28ce6090..a0aad7a0e 100755
--- a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids
+++ b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids
@@ -227,13 +227,15 @@ def main(argv):
                 if name not in bridges:
                     update_network_uuids(name, ids)
 
-                update_bridge_id(name, ids)
+                if (name not in bridges) or (bridges[name] != ids):
+                    update_bridge_id(name, ids)
 
             bridges = new_bridges
 
         if interfaces != new_interfaces:
             for name,ids in new_interfaces.items():
-                update_iface_id(name, ids)
+                if (name not in interfaces) or (interfaces[name] != ids):
+                    update_iface_id(name, ids)
             interfaces = new_interfaces
  
 if __name__ == '__main__':
-- 
cgit v1.2.1


From 1089aab7136612acb86cdcd638d7d2261311531a Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 2 Sep 2010 10:06:42 -0700
Subject: ovsdb: Fix bug in "wait" command implementation.

The declaration of "error" that this commit removes shadowed an outer local
declaration of "error", which caused errors detected by this code not to be
propagated up to the outer level.

Found with GCC -Wshadow.
---
 ovsdb/execution.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ovsdb/execution.c b/ovsdb/execution.c
index 5b6762f07..7ce9a3f50 100644
--- a/ovsdb/execution.c
+++ b/ovsdb/execution.c
@@ -629,7 +629,6 @@ ovsdb_execute_wait(struct ovsdb_execution *x, struct ovsdb_parser *parser,
         /* Parse "rows" into 'expected'. */
         ovsdb_row_hash_init(&expected, &columns);
         for (i = 0; i < rows->u.array.n; i++) {
-            struct ovsdb_error *error;
             struct ovsdb_row *row;
 
             row = ovsdb_row_create(table);
-- 
cgit v1.2.1


From 2a022368f4b37559de5d5621a88c648023493f75 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 2 Sep 2010 10:09:09 -0700
Subject: Avoid shadowing local variable names.

All of these changes avoid using the same name for two local variables
within a same function.  None of them are actual bugs as far as I can tell,
but any of them could be confusing to the casual reader.

The one in lib/ovsdb-idl.c is particularly brilliant: inner and outer
loops both using (different) variables named 'i'.

Found with GCC -Wshadow.
---
 lib/dpif-netdev.c          |  1 -
 lib/dynamic-string.c       |  2 --
 lib/json.c                 |  1 -
 lib/learning-switch.c      |  6 +++---
 lib/netdev-linux.c         |  6 +++---
 lib/netlink.c              | 10 +++++-----
 lib/ofp-parse.c            |  6 +++---
 lib/ovsdb-idl.c            |  8 ++++----
 lib/process.c              |  2 +-
 lib/stream-fd.c            |  2 +-
 lib/stream-ssl.c           |  6 +++---
 ofproto/ofproto.c          |  2 --
 ovsdb/execution.c          |  2 --
 tests/test-csum.c          |  3 +--
 tests/test-ovsdb.c         |  6 ++++--
 utilities/ovs-controller.c |  6 ++----
 utilities/ovs-openflowd.c  |  4 ----
 utilities/ovs-vsctl.c      | 24 +++++++++++-------------
 vswitchd/bridge.c          |  4 ----
 19 files changed, 41 insertions(+), 60 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 323f36411..3975b5a8b 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1104,7 +1104,6 @@ dp_netdev_modify_vlan_tci(struct ofpbuf *packet, uint16_t tci, uint16_t mask)
         veh->veth_tci |= htons(tci);
     } else {
         /* Insert new 802.1Q header. */
-        struct eth_header *eh = packet->l2;
         struct vlan_eth_header tmp;
         memcpy(tmp.veth_dst, eh->eth_dst, ETH_ADDR_LEN);
         memcpy(tmp.veth_src, eh->eth_src, ETH_ADDR_LEN);
diff --git a/lib/dynamic-string.c b/lib/dynamic-string.c
index 5f8054a45..3af7fc9f5 100644
--- a/lib/dynamic-string.c
+++ b/lib/dynamic-string.c
@@ -147,8 +147,6 @@ ds_put_format_valist(struct ds *ds, const char *format, va_list args_)
     if (needed < available) {
         ds->length += needed;
     } else {
-        size_t available;
-
         ds_reserve(ds, ds->length + needed);
 
         va_copy(args, args_);
diff --git a/lib/json.c b/lib/json.c
index 3b70e6bdb..5887f677a 100644
--- a/lib/json.c
+++ b/lib/json.c
@@ -705,7 +705,6 @@ json_lex_number(struct json_parser *p)
      *
      * We suppress negative zeros as a matter of policy. */
     if (!significand) {
-        struct json_token token;
         token.type = T_INTEGER;
         token.u.integer = 0;
         json_parser_input(p, &token);
diff --git a/lib/learning-switch.c b/lib/learning-switch.c
index e189f1e44..4e7645d7c 100644
--- a/lib/learning-switch.c
+++ b/lib/learning-switch.c
@@ -220,10 +220,10 @@ lswitch_process_packet(struct lswitch *sw, struct rconn *rconn,
         }
     }
     if (VLOG_IS_DBG_ENABLED()) {
-        char *p = ofp_to_string(msg->data, msg->size, 2);
+        char *s = ofp_to_string(msg->data, msg->size, 2);
         VLOG_DBG_RL(&rl, "%016llx: OpenFlow packet ignored: %s",
-                    sw->datapath_id, p);
-        free(p);
+                    sw->datapath_id, s);
+        free(s);
     }
 }
 
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index e6036bfc5..7227f5dfc 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1779,12 +1779,12 @@ netdev_linux_get_in6(const struct netdev *netdev_, struct in6_addr *in6)
         if (file != NULL) {
             const char *name = netdev_get_name(netdev_);
             while (fgets(line, sizeof line, file)) {
-                struct in6_addr in6;
+                struct in6_addr in6_tmp;
                 char ifname[16 + 1];
-                if (parse_if_inet6_line(line, &in6, ifname)
+                if (parse_if_inet6_line(line, &in6_tmp, ifname)
                     && !strcmp(name, ifname))
                 {
-                    netdev_dev->in6 = in6;
+                    netdev_dev->in6 = in6_tmp;
                     break;
                 }
             }
diff --git a/lib/netlink.c b/lib/netlink.c
index 4e83747cc..66c27b1fb 100644
--- a/lib/netlink.c
+++ b/lib/netlink.c
@@ -1036,19 +1036,19 @@ nl_policy_parse(const struct ofpbuf *msg, size_t nla_offset,
 
         type = nla->nla_type;
         if (type < n_attrs && policy[type].type != NL_A_NO_ATTR) {
-            const struct nl_policy *p = &policy[type];
+            const struct nl_policy *e = &policy[type];
             size_t min_len, max_len;
 
             /* Validate length and content. */
-            min_len = p->min_len ? p->min_len : attr_len_range[p->type][0];
-            max_len = p->max_len ? p->max_len : attr_len_range[p->type][1];
+            min_len = e->min_len ? e->min_len : attr_len_range[e->type][0];
+            max_len = e->max_len ? e->max_len : attr_len_range[e->type][1];
             if (len < min_len || len > max_len) {
                 VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" length %zu not in "
                             "allowed range %zu...%zu",
                             offset, type, len, min_len, max_len);
                 return false;
             }
-            if (p->type == NL_A_STRING) {
+            if (e->type == NL_A_STRING) {
                 if (((char *) nla)[nla->nla_len - 1]) {
                     VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" lacks null at end",
                                 offset, type);
@@ -1060,7 +1060,7 @@ nl_policy_parse(const struct ofpbuf *msg, size_t nla_offset,
                     return false;
                 }
             }
-            if (!p->optional && attrs[type] == NULL) {
+            if (!e->optional && attrs[type] == NULL) {
                 assert(n_required > 0);
                 --n_required;
             }
diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c
index 405008c67..312eaaaaf 100644
--- a/lib/ofp-parse.c
+++ b/lib/ofp-parse.c
@@ -270,12 +270,12 @@ str_to_action(char *str, struct ofpbuf *b)
             put_output_action(b, str_to_u32(arg));
         } else if (!strcasecmp(act, "enqueue")) {
             char *sp = NULL;
-            char *port = strtok_r(arg, ":q", &sp);
+            char *port_s = strtok_r(arg, ":q", &sp);
             char *queue = strtok_r(NULL, "", &sp);
-            if (port == NULL || queue == NULL) {
+            if (port_s == NULL || queue == NULL) {
                 ovs_fatal(0, "\"enqueue\" syntax is \"enqueue:PORT:QUEUE\"");
             }
-            put_enqueue_action(b, str_to_u32(port), str_to_u32(queue));
+            put_enqueue_action(b, str_to_u32(port_s), str_to_u32(queue));
         } else if (!strcasecmp(act, "drop")) {
             /* A drop action in OpenFlow occurs by just not setting
              * an action. */
diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
index 2132f9fef..43ff94714 100644
--- a/lib/ovsdb-idl.c
+++ b/lib/ovsdb-idl.c
@@ -433,13 +433,13 @@ ovsdb_idl_send_monitor_request(struct ovsdb_idl *idl)
         const struct ovsdb_idl_table *table = &idl->tables[i];
         const struct ovsdb_idl_table_class *tc = table->class;
         struct json *monitor_request, *columns;
-        size_t i;
+        size_t j;
 
         monitor_request = json_object_create();
         columns = json_array_create_empty();
-        for (i = 0; i < tc->n_columns; i++) {
-            const struct ovsdb_idl_column *column = &tc->columns[i];
-            if (table->modes[i] != OVSDB_IDL_MODE_NONE) {
+        for (j = 0; j < tc->n_columns; j++) {
+            const struct ovsdb_idl_column *column = &tc->columns[j];
+            if (table->modes[j] != OVSDB_IDL_MODE_NONE) {
                 json_array_add(columns, json_string_create(column->name));
             }
         }
diff --git a/lib/process.c b/lib/process.c
index a201a88f8..377c396b9 100644
--- a/lib/process.c
+++ b/lib/process.c
@@ -517,7 +517,7 @@ process_run_capture(char **argv, char **stdout_log, char **stderr_log,
     block_sigchld(&oldsigs);
     pid = fork();
     if (pid < 0) {
-        int error = errno;
+        error = errno;
 
         unblock_sigchld(&oldsigs);
         VLOG_WARN("fork failed: %s", strerror(error));
diff --git a/lib/stream-fd.c b/lib/stream-fd.c
index 9410009c4..ef4dc8d91 100644
--- a/lib/stream-fd.c
+++ b/lib/stream-fd.c
@@ -214,7 +214,7 @@ pfd_accept(struct pstream *pstream, struct stream **new_streamp)
 
     new_fd = accept(ps->fd, (struct sockaddr *) &ss, &ss_len);
     if (new_fd < 0) {
-        int retval = errno;
+        retval = errno;
         if (retval != EAGAIN) {
             VLOG_DBG_RL(&rl, "accept: %s", strerror(retval));
         }
diff --git a/lib/stream-ssl.c b/lib/stream-ssl.c
index 70b15f0da..9c7533d1e 100644
--- a/lib/stream-ssl.c
+++ b/lib/stream-ssl.c
@@ -385,7 +385,7 @@ do_ca_cert_bootstrap(struct stream *stream)
 
     file = fdopen(fd, "w");
     if (!file) {
-        int error = errno;
+        error = errno;
         VLOG_ERR("could not bootstrap CA cert: fdopen failed: %s",
                  strerror(error));
         unlink(ca_cert.file_name);
@@ -402,7 +402,7 @@ do_ca_cert_bootstrap(struct stream *stream)
     }
 
     if (fclose(file)) {
-        int error = errno;
+        error = errno;
         VLOG_ERR("could not bootstrap CA cert: writing %s failed: %s",
                  ca_cert.file_name, strerror(error));
         unlink(ca_cert.file_name);
@@ -921,7 +921,7 @@ pssl_accept(struct pstream *pstream, struct stream **new_streamp)
 
     new_fd = accept(pssl->fd, &sin, &sin_len);
     if (new_fd < 0) {
-        int error = errno;
+        error = errno;
         if (error != EAGAIN) {
             VLOG_DBG_RL(&rl, "accept: %s", strerror(error));
         }
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 844083d8b..e571bd4e2 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -1071,7 +1071,6 @@ ofproto_run1(struct ofproto *p)
 
     for (i = 0; i < 50; i++) {
         struct ofpbuf *buf;
-        int error;
 
         error = dpif_recv(p->dpif, &buf);
         if (error) {
@@ -1122,7 +1121,6 @@ ofproto_run1(struct ofproto *p)
 
         retval = pvconn_accept(ofservice->pvconn, OFP_VERSION, &vconn);
         if (!retval) {
-            struct ofconn *ofconn;
             struct rconn *rconn;
             char *name;
 
diff --git a/ovsdb/execution.c b/ovsdb/execution.c
index 7ce9a3f50..a96abfcaf 100644
--- a/ovsdb/execution.c
+++ b/ovsdb/execution.c
@@ -103,8 +103,6 @@ ovsdb_execute(struct ovsdb *db, const struct json *params,
         || !params->u.array.n
         || params->u.array.elems[0]->type != JSON_STRING
         || strcmp(params->u.array.elems[0]->u.string, db->schema->name)) {
-        struct ovsdb_error *error;
-
         if (params->type != JSON_ARRAY) {
             error = ovsdb_syntax_error(params, NULL, "array expected");
         } else {
diff --git a/tests/test-csum.c b/tests/test-csum.c
index 8c8545870..eebc8803f 100644
--- a/tests/test-csum.c
+++ b/tests/test-csum.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009 Nicira Networks.
+ * Copyright (c) 2009, 2010 Nicira Networks.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -134,7 +134,6 @@ main(void)
         const uint16_t *data16 = (const uint16_t *) tc->data;
         const uint32_t *data32 = (const uint32_t *) tc->data;
         uint32_t partial;
-        size_t i;
 
         /* Test csum(). */
         assert(ntohs(csum(tc->data, tc->size)) == tc->csum);
diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c
index 18784a52a..04db65421 100644
--- a/tests/test-ovsdb.c
+++ b/tests/test-ovsdb.c
@@ -1095,7 +1095,7 @@ do_query_distinct(int argc OVS_UNUSED, char *argv[])
     size_t n_classes;
     struct json *json;
     int exit_code = 0;
-    size_t i, j, k;
+    size_t i;
 
     /* Parse table schema, create table. */
     json = unbox_json(parse_json(argv[1]));
@@ -1161,6 +1161,7 @@ do_query_distinct(int argc OVS_UNUSED, char *argv[])
     for (i = 0; i < json->u.array.n; i++) {
         struct ovsdb_row_set results;
         struct ovsdb_condition cnd;
+        size_t j;
 
         check_ovsdb_error(ovsdb_condition_from_json(ts, json->u.array.elems[i],
                                                     NULL, &cnd));
@@ -1171,6 +1172,8 @@ do_query_distinct(int argc OVS_UNUSED, char *argv[])
         ovsdb_row_set_init(&results);
         ovsdb_query_distinct(table, &cnd, &columns, &results);
         for (j = 0; j < results.n_rows; j++) {
+            size_t k;
+
             for (k = 0; k < n_rows; k++) {
                 if (uuid_equals(ovsdb_row_get_uuid(results.rows[j]),
                                 &rows[k].uuid)) {
@@ -1833,7 +1836,6 @@ do_idl(int argc, char *argv[])
     for (i = 2; i < argc; i++) {
         char *arg = argv[i];
         struct jsonrpc_msg *request, *reply;
-        int error;
 
         if (*arg == '+') {
             /* The previous transaction didn't change anything. */
diff --git a/utilities/ovs-controller.c b/utilities/ovs-controller.c
index b18959ad6..40e2a801d 100644
--- a/utilities/ovs-controller.c
+++ b/utilities/ovs-controller.c
@@ -107,7 +107,6 @@ main(int argc, char *argv[])
     for (i = optind; i < argc; i++) {
         const char *name = argv[i];
         struct vconn *vconn;
-        int retval;
 
         retval = vconn_open(name, OFP_VERSION, &vconn);
         if (!retval) {
@@ -146,12 +145,10 @@ main(int argc, char *argv[])
 
     while (n_switches > 0 || n_listeners > 0) {
         int iteration;
-        int i;
 
         /* Accept connections on listening vconns. */
         for (i = 0; i < n_listeners && n_switches < MAX_SWITCHES; ) {
             struct vconn *new_vconn;
-            int retval;
 
             retval = pvconn_accept(listeners[i], OFP_VERSION, &new_vconn);
             if (!retval || retval == EAGAIN) {
@@ -171,7 +168,8 @@ main(int argc, char *argv[])
             bool progress = false;
             for (i = 0; i < n_switches; ) {
                 struct switch_ *this = &switches[i];
-                int retval = do_switching(this);
+
+                retval = do_switching(this);
                 if (!retval || retval == EAGAIN) {
                     if (!retval) {
                         progress = true;
diff --git a/utilities/ovs-openflowd.c b/utilities/ovs-openflowd.c
index 8cb50e4b4..945b11d05 100644
--- a/utilities/ovs-openflowd.c
+++ b/utilities/ovs-openflowd.c
@@ -458,8 +458,6 @@ parse_options(int argc, char *argv[], struct ofsettings *s)
     s->n_controllers = controllers.n;
     s->controllers = xmalloc(s->n_controllers * sizeof *s->controllers);
     if (argc > 1) {
-        size_t i;
-
         for (i = 0; i < s->n_controllers; i++) {
             s->controllers[i] = controller_opts;
             s->controllers[i].target = controllers.names[i];
@@ -468,8 +466,6 @@ parse_options(int argc, char *argv[], struct ofsettings *s)
 
     /* Sanity check. */
     if (controller_opts.band == OFPROTO_OUT_OF_BAND) {
-        size_t i;
-
         for (i = 0; i < s->n_controllers; i++) {
             if (!strcmp(s->controllers[i].target, "discover")) {
                 ovs_fatal(0, "Cannot perform discovery with out-of-band "
diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c
index 043530280..884a41faf 100644
--- a/utilities/ovs-vsctl.c
+++ b/utilities/ovs-vsctl.c
@@ -1305,12 +1305,11 @@ add_port(struct vsctl_context *ctx,
 
     get_info(ctx->ovs, &info);
     if (may_exist) {
-        struct vsctl_port *port;
+        struct vsctl_port *vsctl_port;
 
-        port = find_port(&info, port_name, false);
-        if (port) {
+        vsctl_port = find_port(&info, port_name, false);
+        if (vsctl_port) {
             struct svec want_names, have_names;
-            size_t i;
 
             svec_init(&want_names);
             for (i = 0; i < n_ifaces; i++) {
@@ -1319,15 +1318,16 @@ add_port(struct vsctl_context *ctx,
             svec_sort(&want_names);
 
             svec_init(&have_names);
-            for (i = 0; i < port->port_cfg->n_interfaces; i++) {
-                svec_add(&have_names, port->port_cfg->interfaces[i]->name);
+            for (i = 0; i < vsctl_port->port_cfg->n_interfaces; i++) {
+                svec_add(&have_names,
+                         vsctl_port->port_cfg->interfaces[i]->name);
             }
             svec_sort(&have_names);
 
-            if (strcmp(port->bridge->name, br_name)) {
+            if (strcmp(vsctl_port->bridge->name, br_name)) {
                 char *command = vsctl_context_to_string(ctx);
                 vsctl_fatal("\"%s\" but %s is actually attached to bridge %s",
-                            command, port_name, port->bridge->name);
+                            command, port_name, vsctl_port->bridge->name);
             }
 
             if (!svec_equal(&want_names, &have_names)) {
@@ -2767,8 +2767,8 @@ do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands,
 
             ds_chomp(ds, '\n');
             for (j = 0; j < ds->length; j++) {
-                int c = ds->string[j];
-                switch (c) {
+                int ch = ds->string[j];
+                switch (ch) {
                 case '\n':
                     fputs("\\n", stdout);
                     break;
@@ -2778,7 +2778,7 @@ do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands,
                     break;
 
                 default:
-                    putchar(c);
+                    putchar(ch);
                 }
             }
             putchar('\n');
@@ -2796,8 +2796,6 @@ do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands,
 
     if (wait_for_reload && status != TXN_UNCHANGED) {
         for (;;) {
-            const struct ovsrec_open_vswitch *ovs;
-
             ovsdb_idl_run(idl);
             OVSREC_OPEN_VSWITCH_FOR_EACH (ovs, idl) {
                 if (ovs->cur_cfg >= next_cfg) {
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 598b0016d..3f5e3d471 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -632,7 +632,6 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
         struct odp_port *dpif_ports;
         size_t n_dpif_ports;
         struct shash cur_ifaces, want_ifaces;
-        struct shash_node *node;
 
         /* Get the set of interfaces currently in this datapath. */
         dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports);
@@ -765,7 +764,6 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
             struct ovsrec_controller **controllers;
             struct ofproto_sflow_options oso;
             size_t n_controllers;
-            size_t i;
 
             memset(&oso, 0, sizeof oso);
 
@@ -2822,7 +2820,6 @@ bond_rebalance_port(struct port *port)
              * smallest hashes instead of the biggest ones.  There is little
              * reason behind this decision; we could use the opposite sort
              * order to shift away big hashes ahead of small ones. */
-            size_t i;
             bool order_swapped;
 
             for (i = 0; i < from->n_hashes; i++) {
@@ -3407,7 +3404,6 @@ port_reconfigure(struct port *port, const struct ovsrec_port *cfg)
     trunks = NULL;
     if (vlan < 0 && cfg->n_trunks) {
         size_t n_errors;
-        size_t i;
 
         trunks = bitmap_allocate(4096);
         n_errors = 0;
-- 
cgit v1.2.1


From 3f5d8c02b213ea42dcf892635c1db310f00d855e Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Mon, 20 Sep 2010 10:22:19 -0700
Subject: vswitchd: Document policing implementation and caveats.

With some text from Dan Wendlandt <dan@nicira.com> adapted from
http://openvswitch.org/?page_id=267

CC: Keith Amidon <keith@nicira.com>
---
 vswitchd/vswitch.xml | 80 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 67 insertions(+), 13 deletions(-)

diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index b9d8aaa9f..6e255763d 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -637,24 +637,78 @@
     </group>
 
     <group title="Ingress Policing">
+      <p>
+        These settings control ingress policing for packets received on this
+        interface.  On a physical interface, this limits the rate at which
+        traffic is allowed into the system from the outside; on a virtual
+        interface (one connected to a virtual machine), this limits the rate at
+        which the VM is able to transmit.
+      </p>
+      <p>
+        Policing is a simple form of quality-of-service that simply drops
+        packets received in excess of the configured rate.  Due to its
+        simplicity, policing is usually less accurate and less effective than
+        egress QoS (which is configured using the <ref table="QoS"/> and <ref
+        table="Queue"/> tables).
+      </p>
+      <p>
+        Policing is currently implemented only on Linux.  The Linux
+        implementation uses a simple ``token bucket'' approach:
+      </p>
+      <ul>
+        <li>
+          The size of the bucket corresponds to <ref
+          column="ingress_policing_burst"/>.  Initially the bucket is full.
+        </li>
+        <li>
+          Whenever a packet is received, its size (converted to tokens) is
+          compared to the number of tokens currently in the bucket.  If the
+          required number of tokens are available, they are removed and the
+          packet is forwarded.  Otherwise, the packet is dropped.
+        </li>
+        <li>
+          Whenever it is not full, the bucket is refilled with tokens at the
+          rate specified by <ref column="ingress_policing_rate"/>.
+        </li>
+      </ul>
+      <p>
+        Policing interacts badly with some network protocols, and especially
+        with fragmented IP packets.  Suppose that there is enough network
+        activity to keep the bucket nearly empty all the time.  Then this token
+        bucket algorithm will forward a single packet every so often, with the
+        period depending on packet size and on the configured rate.  All of the
+        fragments of an IP packets are normally transmitted back-to-back, as a
+        group.  In such a situation, therefore, only one of these fragments
+        will be forwarded and the rest will be dropped.  IP does not provide
+        any way for the intended recipient to ask for only the remaining
+        fragments.  In such a case there are two likely possibilities for what
+        will happen next: either all of the fragments will eventually be
+        retransmitted (as TCP will do), in which case the same problem will
+        recur, or the sender will not realize that its packet has been dropped
+        and data will simply be lost (as some UDP-based protocols will do).
+        Either way, it is possible that no forward progress will ever occur.
+      </p>
+      <column name="ingress_policing_rate">
+        <p>
+          Maximum rate for data received on this interface, in kbps.  Data
+          received faster than this rate is dropped.  Set to <code>0</code>
+          (the default) to disable policing.
+        </p>
+      </column>
+
       <column name="ingress_policing_burst">
         <p>Maximum burst size for data received on this interface, in kb.  The
           default burst size if set to <code>0</code> is 1000 kb.  This value
           has no effect if <ref column="ingress_policing_rate"/>
           is <code>0</code>.</p>
-        <p>The burst size should be at least the size of the interface's
-          MTU.</p>
-      </column>
-
-      <column name="ingress_policing_rate">
-        <p>Maximum rate for data received on this interface, in kbps.  Data
-          received faster than this rate is dropped.  Set to <code>0</code> to
-          disable policing.</p>
-        <p>The meaning of ``ingress'' is from Open vSwitch's perspective.  If
-          configured on a physical interface, then it limits the rate at which
-          traffic is allowed into the system from the outside.  If configured
-          on a virtual interface that is connected to a virtual machine, then
-          it limits the rate at which the guest is able to transmit.</p>
+        <p>
+          Specifying a larger burst size lets the algorithm be more forgiving,
+          which is important for protocols like TCP that react severely to
+          dropped packets.  The burst size should be at least the size of the
+          interface's MTU.  Specifying a value that is numerically at least as
+          large as 10% of <ref column="ingress_policing_rate"/> helps TCP come
+          closer to achieving the full rate.
+        </p>
       </column>
     </group>
 
-- 
cgit v1.2.1


From c0c2489aabc0d19495909ac7391ea7427d9b458a Mon Sep 17 00:00:00 2001
From: Sajjad Lateef <slateef@nicira.com>
Date: Fri, 17 Sep 2010 14:53:42 -0700
Subject: debian: Add openvswitch-python package

This installs the Python runtime bindings for
Open vSwitch database into
/usr/share/python-support/openvswitch-python/ovs and
/usr/share/python-support/openvswitch-python/ovs/db

Updated FSF address in copyright file

Minor Whitespace re-formatting

Removed prerm, preinst, postinst files for openvswitch-python
---
 debian/automake.mk                |  2 ++
 debian/control                    | 10 +++++++++-
 debian/copyright.in               |  2 +-
 debian/openvswitch-python.dirs    |  2 ++
 debian/openvswitch-python.install |  2 ++
 debian/rules                      |  1 +
 6 files changed, 17 insertions(+), 2 deletions(-)
 create mode 100644 debian/openvswitch-python.dirs
 create mode 100644 debian/openvswitch-python.install

diff --git a/debian/automake.mk b/debian/automake.mk
index a5a6e05a3..dc18961a4 100644
--- a/debian/automake.mk
+++ b/debian/automake.mk
@@ -29,6 +29,8 @@ EXTRA_DIST += \
 	debian/openvswitch-pki-server.install \
 	debian/openvswitch-pki-server.postinst \
 	debian/openvswitch-pki.postinst \
+	debian/openvswitch-python.dirs \
+	debian/openvswitch-python.install \
 	debian/openvswitch-switch.README.Debian \
 	debian/openvswitch-switch.dirs \
 	debian/openvswitch-switch.init \
diff --git a/debian/control b/debian/control
index edecffda5..fde640eac 100644
--- a/debian/control
+++ b/debian/control
@@ -5,7 +5,8 @@ Maintainer: Open vSwitch developers <dev@openvswitch.org>
 Uploaders: Ben Pfaff <pfaffben@debian.org>, Simon Horman <horms@debian.org>
 Build-Depends:
  debhelper (>= 5), autoconf (>= 2.64), automake1.10, libssl-dev,
- pkg-config (>= 0.21), po-debconf, bzip2, openssl, python
+ pkg-config (>= 0.21), po-debconf, bzip2, openssl, python,
+ python-support (>= 0.8.4)
 Standards-Version: 3.9.1
 Homepage: http://openvswitch.org/
 
@@ -94,3 +95,10 @@ Description: Debug symbols for Open vSwitch packages
  This package contains the debug symbols for all the other openvswitch-*
  packages.  Install it to debug one of them or to examine a core dump
  produced by one of them.
+
+Package: openvswitch-python 
+Architecture: all
+Depends: ${python:Depends}, openvswitch-switch (= ${binary:Version})
+Provides: ${python:Provides}
+Description: Python bindings for Open vSwitch
+ This package contains the full Python bindings for Open vSwitch database.
diff --git a/debian/copyright.in b/debian/copyright.in
index 0cac63471..6a7370e39 100644
--- a/debian/copyright.in
+++ b/debian/copyright.in
@@ -46,7 +46,7 @@ License:
 
    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
 
    Copyright (c) 2005, 2007 XenSource Ltd.
    Copyright (c) 2010, Nicira Networks.
diff --git a/debian/openvswitch-python.dirs b/debian/openvswitch-python.dirs
new file mode 100644
index 000000000..055838c0b
--- /dev/null
+++ b/debian/openvswitch-python.dirs
@@ -0,0 +1,2 @@
+usr/share/python-support/openvswitch-python/ovs/
+usr/share/python-support/openvswitch-python/ovs/db/
diff --git a/debian/openvswitch-python.install b/debian/openvswitch-python.install
new file mode 100644
index 000000000..35d35e784
--- /dev/null
+++ b/debian/openvswitch-python.install
@@ -0,0 +1,2 @@
+python/ovs/*.py usr/share/python-support/openvswitch-python/ovs/
+python/ovs/db/*.py usr/share/python-support/openvswitch-python/ovs/db/
diff --git a/debian/rules b/debian/rules
index 49d562ad7..8e7ad1e82 100755
--- a/debian/rules
+++ b/debian/rules
@@ -103,6 +103,7 @@ binary-common:
 	dh_strip --dbg-package=openvswitch-dbg
 	dh_compress 
 	dh_fixperms -X var/log/core
+	dh_pysupport
 	dh_perl
 	dh_makeshlibs
 	dh_installdeb
-- 
cgit v1.2.1


From 110b54816bb56b8ba4878812ad300a3a277abb06 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Fri, 17 Sep 2010 15:06:17 -0700
Subject: python: timer_wait_until calculated current time incorrectly

The timer_wait_until function in poller.py was using Time.msec to
figure out the current time.  Unfortunately, Time.msec does not in
exist. Changed to use ovs.timeval.msec .
---
 python/ovs/poller.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/python/ovs/poller.py b/python/ovs/poller.py
index 57417c481..2a0b2ecbb 100644
--- a/python/ovs/poller.py
+++ b/python/ovs/poller.py
@@ -15,6 +15,7 @@
 import errno
 import logging
 import select
+import ovs.timeval
 
 class Poller(object):
     """High-level wrapper around the "poll" system call.
@@ -62,15 +63,15 @@ class Poller(object):
             self.__timer_wait(msec)
 
     def timer_wait_until(self, msec):
-        """Causes the following call to self.block() to wake up when the
-        current time, as returned by Time.msec(), reaches 'msec' or later.  If
+        """Causes the following call to self.block() to wake up when the current
+        time, as returned by ovs.timeval.msec(), reaches 'msec' or later.  If
         'msec' is earlier than the current time, the following call to
         self.block() will not block at all.
 
         The timer registration is one-shot: only the following call to
         self.block() is affected.  The timer will need to be re-registered
         after self.block() is called if it is to persist."""
-        now = Time.msec()
+        now = ovs.timeval.msec()
         if msec <= now:
             self.immediate_wake()
         else:
-- 
cgit v1.2.1


From 2ed7d6e227c009f0f6bdabbb85262d8aac896e00 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Tue, 21 Sep 2010 12:06:12 +0900
Subject: debian: Add override of non-standard-dir-perm to corekeeper

Remove code to restart openvswitch-switch from its postinst script
as this is automatically added by dh_installinit

Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Ben Pfaff <blp@nicira.com>
---
 debian/openvswitch-switch.postinst | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/debian/openvswitch-switch.postinst b/debian/openvswitch-switch.postinst
index 4be5a30c7..74b52ba90 100755
--- a/debian/openvswitch-switch.postinst
+++ b/debian/openvswitch-switch.postinst
@@ -33,17 +33,6 @@ case "$1" in
                 fi
             done
 	fi
-
-        if /etc/init.d/openvswitch-switch status >/dev/null 2>&1; then
-            running=true
-            /etc/init.d/openvswitch-switch stop
-        else
-            running=false
-        fi
-
-        if $running; then
-            /etc/init.d/openvswitch-switch start
-        fi
         ;;
 
     abort-upgrade|abort-remove|abort-deconfigure)
-- 
cgit v1.2.1


From b9ddc6aaaea369faf7787b690de2ae56ffd08ad9 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Tue, 21 Sep 2010 12:06:13 +0900
Subject: debian: Dont fail init if module cant be inserted

The problem here is that the daemon is started/restarted on
package installation, but the module may not be present at that
time and (as far as I know) its bad form to fail the package installation
in that circumstance.

In keeping with the way ipvsadm handles a similar problem,
exit with a non-error exit status if the module can't be inserted.
The loud error message is still displayed.

Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Ben Pfaff <blp@nicira.com>
---
 debian/openvswitch-switch.init | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/debian/openvswitch-switch.init b/debian/openvswitch-switch.init
index 34b5604dc..600c18564 100755
--- a/debian/openvswitch-switch.init
+++ b/debian/openvswitch-switch.init
@@ -157,7 +157,7 @@ load_module() {
             echo "For instructions, read"
             echo "/usr/share/doc/openvswitch-datapath-source/README.Debian"
         fi
-        exit 1
+        exit 0
     fi
 }
 
-- 
cgit v1.2.1


From cbb863426624fff643b30b22ddb8c524ec7891c4 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Tue, 21 Sep 2010 12:06:14 +0900
Subject: debian: The second argument to m-a should be openvswitch-datapath

Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Ben Pfaff <blp@nicira.com>
---
 debian/openvswitch-datapath-source.README.Debian | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/debian/openvswitch-datapath-source.README.Debian b/debian/openvswitch-datapath-source.README.Debian
index 73bba7a14..a9132c5f3 100644
--- a/debian/openvswitch-datapath-source.README.Debian
+++ b/debian/openvswitch-datapath-source.README.Debian
@@ -5,9 +5,9 @@ Open vSwitch for Debian
 
     - Building with module-assistant:
 
-        $ module-assistant auto-install openvswitch
+        $ module-assistant auto-install openvswitch-datapath
       or
-        $ m-a a-i openvswitch
+        $ m-a a-i openvswitch-datapath
 
       If kernel source or headers are in a non-standard directory, add
       the option -k /path/to/kernel/source with the correct path.
-- 
cgit v1.2.1


From 1d273d6d802e5daeebe551e8ca0c3e99f4dda15e Mon Sep 17 00:00:00 2001
From: Sajjad Lateef <slateef@nicira.com>
Date: Tue, 21 Sep 2010 11:18:26 -0700
Subject: debian: Rename openvswitch-python to python-openvswitch

Package name changed to be consistent with
Debian Python policy.

The python files are installed in
/usr/share/python-support/python-openvswitch/ovs and
/usr/share/python-support/python-openvswitch/ovs/db

Changed Section to python, per lintian
---
 debian/automake.mk                | 4 ++--
 debian/control                    | 3 ++-
 debian/openvswitch-python.dirs    | 2 --
 debian/openvswitch-python.install | 2 --
 debian/python-openvswitch.dirs    | 2 ++
 debian/python-openvswitch.install | 2 ++
 6 files changed, 8 insertions(+), 7 deletions(-)
 delete mode 100644 debian/openvswitch-python.dirs
 delete mode 100644 debian/openvswitch-python.install
 create mode 100644 debian/python-openvswitch.dirs
 create mode 100644 debian/python-openvswitch.install

diff --git a/debian/automake.mk b/debian/automake.mk
index dc18961a4..c768d56b5 100644
--- a/debian/automake.mk
+++ b/debian/automake.mk
@@ -29,8 +29,6 @@ EXTRA_DIST += \
 	debian/openvswitch-pki-server.install \
 	debian/openvswitch-pki-server.postinst \
 	debian/openvswitch-pki.postinst \
-	debian/openvswitch-python.dirs \
-	debian/openvswitch-python.install \
 	debian/openvswitch-switch.README.Debian \
 	debian/openvswitch-switch.dirs \
 	debian/openvswitch-switch.init \
@@ -41,6 +39,8 @@ EXTRA_DIST += \
 	debian/openvswitch-switch.postrm \
 	debian/openvswitch-switch.template \
 	debian/ovs-bugtool \
+	debian/python-openvswitch.dirs \
+	debian/python-openvswitch.install \
 	debian/rules \
 	debian/rules.modules
 
diff --git a/debian/control b/debian/control
index fde640eac..b7f2248f9 100644
--- a/debian/control
+++ b/debian/control
@@ -96,8 +96,9 @@ Description: Debug symbols for Open vSwitch packages
  packages.  Install it to debug one of them or to examine a core dump
  produced by one of them.
 
-Package: openvswitch-python 
+Package: python-openvswitch 
 Architecture: all
+Section: python
 Depends: ${python:Depends}, openvswitch-switch (= ${binary:Version})
 Provides: ${python:Provides}
 Description: Python bindings for Open vSwitch
diff --git a/debian/openvswitch-python.dirs b/debian/openvswitch-python.dirs
deleted file mode 100644
index 055838c0b..000000000
--- a/debian/openvswitch-python.dirs
+++ /dev/null
@@ -1,2 +0,0 @@
-usr/share/python-support/openvswitch-python/ovs/
-usr/share/python-support/openvswitch-python/ovs/db/
diff --git a/debian/openvswitch-python.install b/debian/openvswitch-python.install
deleted file mode 100644
index 35d35e784..000000000
--- a/debian/openvswitch-python.install
+++ /dev/null
@@ -1,2 +0,0 @@
-python/ovs/*.py usr/share/python-support/openvswitch-python/ovs/
-python/ovs/db/*.py usr/share/python-support/openvswitch-python/ovs/db/
diff --git a/debian/python-openvswitch.dirs b/debian/python-openvswitch.dirs
new file mode 100644
index 000000000..9bc6912a3
--- /dev/null
+++ b/debian/python-openvswitch.dirs
@@ -0,0 +1,2 @@
+usr/share/python-support/python-openvswitch/ovs/
+usr/share/python-support/python-openvswitch/ovs/db/
diff --git a/debian/python-openvswitch.install b/debian/python-openvswitch.install
new file mode 100644
index 000000000..211ed300e
--- /dev/null
+++ b/debian/python-openvswitch.install
@@ -0,0 +1,2 @@
+python/ovs/*.py usr/share/python-support/python-openvswitch/ovs/
+python/ovs/db/*.py usr/share/python-support/python-openvswitch/ovs/db/
-- 
cgit v1.2.1


From 9884eaad2b8386aefa036f370891fd1159598994 Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Tue, 21 Sep 2010 11:58:11 -0700
Subject: Indicate that OVS only works with kernels 2.6.18 and greater

At one time, we tested against 2.6.15.  This hasn't been done for a long
time, and it almost certainly no longer works.
---
 README | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README b/README
index 881a11343..55e9d9c9b 100644
--- a/README
+++ b/README
@@ -29,7 +29,7 @@ vSwitch supports the following features:
     * Support for OpenFlow
     * Compatibility layer for the Linux bridging code
 
-The included Linux kernel module supports Linux 2.6.15 and up, with
+The included Linux kernel module supports Linux 2.6.18 and up, with
 testing focused on 2.6.18 with Centos and Xen patches and version
 2.6.26 from kernel.org.  Open vSwitch also has special support for
 Citrix XenServer hosts.
-- 
cgit v1.2.1


From 5fa555b3cf6030bbeda1bbcf169d514f3ae1a077 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Fri, 17 Sep 2010 15:07:02 -0700
Subject: xenserver: monitor-external-ids responds to SIGHUP

When monitor-external-ids receives a SIGHUP it will forget
everything and run as if it was newly started.

Feature #3668.
---
 ..._share_openvswitch_scripts_monitor-external-ids | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids
index a0aad7a0e..45b3dd7bf 100755
--- a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids
+++ b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids
@@ -22,6 +22,7 @@
 
 import getopt
 import os
+import signal
 import subprocess
 import sys
 import syslog
@@ -37,6 +38,7 @@ import ovs.db.idl
 
 vsctl="/usr/bin/ovs-vsctl"
 session = None
+force_run = False
 
 # Set up a session to interact with XAPI.
 #
@@ -163,8 +165,15 @@ def usage():
     print "Other options:"
     print "  -h, --help               display this help message"
     sys.exit(0)
- 
+
+def handler(signum, frame):
+    global force_run
+    if (signum == signal.SIGHUP):
+        force_run = True
+
 def main(argv):
+    global force_run
+
     try:
         options, args = getopt.gnu_getopt(
             argv[1:], 'h', ['help'] + ovs.daemon.LONG_OPTIONS)
@@ -196,16 +205,23 @@ def main(argv):
     # tasks, we need it.  Wait here until it's up.
     while not os.path.exists("/var/run/xapi_init_complete.cookie"):
         time.sleep(1)
+
+    signal.signal(signal.SIGHUP, handler)
  
     bridges = {}
     interfaces = {}
     while True:
-        if not idl.run():
+        if not force_run and not idl.run():
             poller = ovs.poller.Poller()
             idl.wait(poller)
             poller.block()
             continue
- 
+
+        if force_run:
+            bridges    = {}
+            interfaces = {}
+            force_run = False
+
         new_bridges = {}
         for rec in idl.data["Bridge"].itervalues():
             name = rec.name.as_scalar()
-- 
cgit v1.2.1


From 97685b902d63f48bdc9f63e5458a1ac5ae59b0b5 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Fri, 17 Sep 2010 16:05:04 -0700
Subject: xenserver: reload sends SIGHUP to monitor-external-ids

When the init script's reload function is called it will send a
SIGHUP to monitor-external-ids.  This will cause
monitor-external-ids to re-generate everything.

Feature #3668.
---
 xenserver/etc_init.d_openvswitch | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/xenserver/etc_init.d_openvswitch b/xenserver/etc_init.d_openvswitch
index 68079fcf5..282765323 100755
--- a/xenserver/etc_init.d_openvswitch
+++ b/xenserver/etc_init.d_openvswitch
@@ -117,6 +117,12 @@ else
     monitor_opt=
 fi
 
+function hup_monitor_external_ids {
+    if [ -e /var/run/openvswitch/monitor-external-ids.pid ]; then
+        action "Configuring Open vSwitch external IDs" kill -HUP `cat /var/run/openvswitch/monitor-external-ids.pid`
+    fi
+}
+
 function dp_list {
     "$dpctl" show | grep '^dp[0-9]\+:' | cut -d':' -f 1
 }
@@ -380,8 +386,10 @@ case "$1" in
         restart
         ;;
     reload|force-reload)
-	# Nothing to do--ovs-vswitchd and ovsdb-server keep their configuration
-	# up-to-date all the time.
+        # Nothing to do to ovs-vswitchd and ovsdb-server as they keep their
+        # configuration up-to-date all the time.  HUP monitor-external-ids so it
+        # re-runs.
+        hup_monitor_external_ids
 	;;
     strace-vswitchd)
         shift
-- 
cgit v1.2.1


From b524945e78804bf4ef46abb936eba829538f8e29 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Tue, 21 Sep 2010 14:27:51 -0700
Subject: tests: Add 5-second timeout to ovs-vsctl tests.

Otherwise some of the tests may not terminate if something goes wrong.
(Formerly, ovs-vsctl had a default timeout of 5 seconds, so this was not
necessary before.)
---
 tests/ovs-vsctl.at | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/ovs-vsctl.at b/tests/ovs-vsctl.at
index 56fc1ce2d..66ead6dd3 100644
--- a/tests/ovs-vsctl.at
+++ b/tests/ovs-vsctl.at
@@ -15,17 +15,17 @@ dnl RUN_OVS_VSCTL(COMMAND, ...)
 dnl
 dnl Executes each ovs-vsctl COMMAND.
 m4_define([RUN_OVS_VSCTL],
-  [m4_foreach([command], [$@], [ovs-vsctl --no-wait -vreconnect:ANY:emer --db=unix:socket -- command
+  [m4_foreach([command], [$@], [ovs-vsctl --timeout=5 --no-wait -vreconnect:ANY:emer --db=unix:socket -- command
 ])])
 m4_define([RUN_OVS_VSCTL_ONELINE],
-  [m4_foreach([command], [$@], [ovs-vsctl --no-wait -vreconnect:ANY:emer --db=unix:socket --oneline -- command
+  [m4_foreach([command], [$@], [ovs-vsctl --timeout=5 --no-wait -vreconnect:ANY:emer --db=unix:socket --oneline -- command
 ])])
 
 dnl RUN_OVS_VSCTL_TOGETHER(COMMAND, ...)
 dnl
 dnl Executes each ovs-vsctl COMMAND in a single run of ovs-vsctl.
 m4_define([RUN_OVS_VSCTL_TOGETHER],
-  [ovs-vsctl --no-wait -vreconnect:ANY:emer --db=unix:socket --oneline dnl
+  [ovs-vsctl --timeout=5 --no-wait -vreconnect:ANY:emer --db=unix:socket --oneline dnl
 m4_foreach([command], [$@], [ -- command])])
 
 dnl CHECK_BRIDGES([BRIDGE, PARENT, VLAN], ...)
-- 
cgit v1.2.1


From aed133bf9bce8f35b666c3052907f525c803c83b Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Mon, 20 Sep 2010 10:56:15 -0700
Subject: ovs-vsctl: Allow "get" commands to create @names also.

This is useful for adding records that refer to other records by UUID, e.g.
   ovs-vsctl \
     -- set bridge br0 mirrors=@m \
     -- --id=@eth0 get port eth0 \
     -- --id=@eth0 get port eth1 \
     -- --id=@m create mirror name=mymirror select-dst-port=@eth0 \
                              select-src-port=@eth0 output-port=@eth1
---
 tests/ovs-vsctl.at       | 49 ++++++++++++++++++++++++++++++++++++++++++++
 utilities/ovs-vsctl.8.in |  8 ++++++--
 utilities/ovs-vsctl.c    | 53 +++++++++++++++++++++++++++++++-----------------
 3 files changed, 89 insertions(+), 21 deletions(-)

diff --git a/tests/ovs-vsctl.at b/tests/ovs-vsctl.at
index 66ead6dd3..152a6585c 100644
--- a/tests/ovs-vsctl.at
+++ b/tests/ovs-vsctl.at
@@ -705,6 +705,55 @@ AT_CHECK([cat stdout4], [0], [500
 OVS_VSCTL_CLEANUP
 AT_CLEANUP
 
+AT_SETUP([--id option on create, get commands])
+AT_KEYWORDS([ovs-vsctl])
+OVS_VSCTL_SETUP
+AT_CHECK([RUN_OVS_VSCTL([add-br br0],
+                        [add-port br0 eth0],
+                        [add-port br0 eth1])])
+AT_CHECK(
+  [RUN_OVS_VSCTL_TOGETHER(
+    [set bridge br0 mirrors=@m],
+    [--id=@eth0 get port eth0],
+    [--id=@eth1 get port eth1],
+    [--id=@m create mirror name=mymirror select-dst-port=@eth0 select-src-port=@eth0 output-port=@eth1])],
+  [0], [stdout], [], [OVS_VSCTL_CLEANUP])
+AT_CHECK(
+  [perl $srcdir/uuidfilt.pl stdout], [0], [dnl
+
+
+
+<0>
+],
+  [], [OVS_VSCTL_CLEANUP])
+AT_CHECK(
+  [RUN_OVS_VSCTL(
+    [list port eth0 eth1],
+    [list mirror],
+    [list bridge br0])],
+  [0], [stdout], [], [OVS_VSCTL_CLEANUP])
+AT_CHECK(
+  [sed -n -e '/uuid/p' -e '/name/p' -e '/mirrors/p' -e '/select/p' -e '/output/p' < stdout | $srcdir/uuidfilt.pl], [0], [dnl
+[_uuid               : <0>
+name                : "eth0"
+_uuid               : <1>
+name                : "eth1"
+_uuid               : <2>
+name                : mymirror
+output_port         : <1>
+output_vlan         : []
+select_all          : false
+select_dst_port     : [<0>]
+select_src_port     : [<0>]
+select_vlan         : []
+_uuid               : <3>
+mirrors             : [<2>]
+name                : "br0"
+]],
+  [], [OVS_VSCTL_CLEANUP])
+OVS_VSCTL_CLEANUP
+AT_CLEANUP
+
 dnl This test really shows a bug -- "create" followed by "list" in
 dnl the same execution shows the wrong UUID on the "list" command.
 dnl The bug is documented in ovs-vsctl.8.
diff --git a/utilities/ovs-vsctl.8.in b/utilities/ovs-vsctl.8.in
index ef080bb41..cfb911483 100644
--- a/utilities/ovs-vsctl.8.in
+++ b/utilities/ovs-vsctl.8.in
@@ -490,7 +490,7 @@ pair of double quotes (\fB""\fR).
 .IP "UUID"
 Either a universally unique identifier in the style of RFC 4122,
 e.g. \fBf81d4fae\-7dec\-11d0\-a765\-00a0c91e6bf6\fR, or an \fB@\fIname\fR
-defined by the \fBcreate\fR command within the same \fBovs\-vsctl\fR
+defined by a \fBget\fR or \fBcreate\fR command within the same \fBovs\-vsctl\fR
 invocation.
 .PP
 Multiple values in a single column may be separated by spaces or a
@@ -518,7 +518,7 @@ records are specified, lists all the records in \fItable\fR.
 The UUIDs shown for rows created in the same \fBovs\-vsctl\fR
 invocation will be wrong.
 .
-.IP "[\fB\-\-if\-exists\fR] \fBget \fItable record column\fR[\fB:\fIkey\fR]..."
+.IP "[\fB\-\-id=@\fIname\fR] [\fB\-\-if\-exists\fR] \fBget \fItable record \fR[\fIcolumn\fR[\fB:\fIkey\fR]]..."
 Prints the value of each specified \fIcolumn\fR in the given
 \fIrecord\fR in \fItable\fR.  For map columns, a \fIkey\fR may
 optionally be specified, in which case the value associated with
@@ -528,6 +528,10 @@ For a map column, without \fB\-\-if\-exists\fR it is an error if
 \fIkey\fR does not exist; with it, a blank line is printed.  If
 \fIcolumn\fR is not a map column or if \fIkey\fR is not specified,
 \fB\-\-if\-exists\fR has no effect.
+.IP
+If \fB@\fIname\fR is specified, then the UUID for \fIrecord\fR may be
+referred to by that name later in the same \fBovs\-vsctl\fR
+invocation in contexts where a UUID is expected.
 .
 .IP "\fBset \fItable record column\fR[\fB:\fIkey\fR]\fB=\fIvalue\fR..."
 Sets the value of each specified \fIcolumn\fR in the given
diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c
index 884a41faf..d09cf7460 100644
--- a/utilities/ovs-vsctl.c
+++ b/utilities/ovs-vsctl.c
@@ -1975,6 +1975,28 @@ get_column(const struct vsctl_table_class *table, const char *column_name,
     }
 }
 
+static struct uuid *
+create_symbol(struct ovsdb_symbol_table *symtab, const char *id, bool *newp)
+{
+    struct ovsdb_symbol *symbol;
+
+    if (id[0] != '@') {
+        vsctl_fatal("row id \"%s\" does not begin with \"@\"", id);
+    }
+
+    if (newp) {
+        *newp = ovsdb_symbol_table_get(symtab, id) == NULL;
+    }
+
+    symbol = ovsdb_symbol_table_insert(symtab, id);
+    if (symbol->used) {
+        vsctl_fatal("row id \"%s\" may only be specified on one --id option",
+                    id);
+    }
+    symbol->used = true;
+    return &symbol->uuid;
+}
+
 static char *
 missing_operator_error(const char *arg, const char **allowed_operators,
                        size_t n_allowed)
@@ -2142,6 +2164,7 @@ error:
 static void
 cmd_get(struct vsctl_context *ctx)
 {
+    const char *id = shash_find_data(&ctx->options, "--id");
     bool if_exists = shash_find(&ctx->options, "--if-exists");
     const char *table_name = ctx->argv[1];
     const char *record_id = ctx->argv[2];
@@ -2152,6 +2175,15 @@ cmd_get(struct vsctl_context *ctx)
 
     table = get_table(table_name);
     row = must_get_row(ctx, table, record_id);
+    if (id) {
+        bool new;
+
+        *create_symbol(ctx->symtab, id, &new) = row->uuid;
+        if (!new) {
+            vsctl_fatal("row id \"%s\" specified on \"get\" command was used "
+                        "before it was defined", id);
+        }
+    }
     for (i = 3; i < ctx->argc; i++) {
         const struct ovsdb_idl_column *column;
         const struct ovsdb_datum *datum;
@@ -2453,24 +2485,7 @@ cmd_create(struct vsctl_context *ctx)
     const struct uuid *uuid;
     int i;
 
-    if (id) {
-        struct ovsdb_symbol *symbol;
-
-        if (id[0] != '@') {
-            vsctl_fatal("row id \"%s\" does not begin with \"@\"", id);
-        }
-
-        symbol = ovsdb_symbol_table_insert(ctx->symtab, id);
-        if (symbol->used) {
-            vsctl_fatal("row id \"%s\" may only be used to insert a single "
-                        "row", id);
-        }
-        symbol->used = true;
-
-        uuid = &symbol->uuid;
-    } else {
-        uuid = NULL;
-    }
+    uuid = id ? create_symbol(ctx->symtab, id, NULL) : NULL;
 
     table = get_table(table_name);
     row = ovsdb_idl_txn_insert(ctx->txn, table->class, uuid);
@@ -2865,7 +2880,7 @@ static const struct vsctl_command_syntax all_commands[] = {
     {"emer-reset", 0, 0, cmd_emer_reset, NULL, ""},
 
     /* Parameter commands. */
-    {"get", 3, INT_MAX, cmd_get, NULL, "--if-exists"},
+    {"get", 2, INT_MAX, cmd_get, NULL, "--if-exists,--id="},
     {"list", 1, INT_MAX, cmd_list, NULL, ""},
     {"set", 3, INT_MAX, cmd_set, NULL, ""},
     {"add", 4, INT_MAX, cmd_add, NULL, ""},
-- 
cgit v1.2.1


From bad973d7ae0ca5273b46c79b4bd9007e607f277c Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Mon, 20 Sep 2010 11:57:13 -0700
Subject: ovs-vsctl: Add many more usage examples.

---
 utilities/ovs-vsctl.8.in | 90 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/utilities/ovs-vsctl.8.in b/utilities/ovs-vsctl.8.in
index cfb911483..281756c90 100644
--- a/utilities/ovs-vsctl.8.in
+++ b/utilities/ovs-vsctl.8.in
@@ -631,7 +631,97 @@ point to a new \fBQoS\fR record, which in turn points with its queue 0
 to a new \fBQueue\fR record:
 .IP
 .B "ovs\-vsctl \-\- set port eth0 qos=@newqos \-\- \-\-id=@newqos create qos type=linux\-htb other\-config:max\-rate=1000000 queues:0=@newqueue \-\- \-\-id=@newqueue create queue other\-config:min\-rate=1000000 other\-config:max\-rate=1000000"
+.SH "CONFIGURATION COOKBOOK"
+.SS "Port Configuration"
+.PP
+Add an ``internal port'' \fBvlan10\fR to bridge \fBbr0\fR as a VLAN
+access port for VLAN 10, and configure it with an IP address:
+.IP
+.B "ovs\-vsctl add\-port br0 vlan10 tag=10 \-\- set Interface vlan10 type=internal"
+.IP
+.B "ifconfig vlan10 192.168.0.123"
 .
+.SS "Port Mirroring"
+.PP
+Mirror all packets received or sent on \fBeth0\fR or \fBeth1\fR onto
+\fBeth2\fR, assuming that all of those ports exist on bridge \fBbr0\fR
+(as a side-effect this causes any packets received on \fBeth2\fR to be
+ignored):
+.IP
+.B "ovs\-vsctl \-\- set Bridge br0 mirrors=@m \(rs"
+.IP
+.B "\-\- \-\-id=@eth0 get Port eth0 \(rs"
+.IP
+.B "\-\- \-\-id=@eth1 get Port eth1 \(rs"
+.IP
+.B "\-\- \-\-id=@eth2 get Port eth2 \(rs"
+.IP
+.B "\-\- \-\-id=@m create Mirror name=mymirror select-dst-port=@eth0,@eth1 select-src-port=@eth0,@eth1 output-port=@eth2"
+.PP
+Remove the mirror created above from \fBbr0\fR and destroy the Mirror
+record (to avoid having an unreferenced record in the database):
+.IP
+.B "ovs\-vsctl destroy Mirror mymirror \-\- clear Bridge br0 mirrors"
+.SS "Quality of Service (QoS)"
+.PP
+Create a \fBlinux\-htb\fR QoS record that points to a few queues and
+use it on \fBeth0\fR and \fBeth1\fR:
+.IP
+.B "ovs\-vsctl \-\- set Port eth0 qos=@newqos \(rs"
+.IP
+.B "\-\- set Port eth1 qos=@newqos \(rs"
+.IP
+.B "\-\- \-\-id=@newqos create QoS type=linux\-htb other\-config:max\-rate=1000000000 queues=0=@q0,1=@q1 \(rs"
+.IP
+.B "\-\- \-\-id=@q0 create Queue other\-config:min\-rate=100000000 other\-config:max\-rate=100000000 \(rs"
+.IP
+.B "\-\- \-\-id=@q1 create Queue other\-config:min\-rate=500000000"
+.PP
+Deconfigure the QoS record above from \fBeth1\fR only:
+.IP
+.B "ovs\-vsctl clear Port eth1 qos"
+.PP
+To deconfigure the QoS record from both \fBeth0\fR and \fBeth1\fR and
+then delete the QoS record:
+.IP
+.B "ovs\-vsctl \-\- destroy QoS eth0 \-\- clear Port eth0 qos \-\- clear Port eth1 qos"
+.PP
+(This command will leave two unreferenced Queue records in the
+database.  To delete them, use "\fBovs\-vsctl list Queue\fR" to find
+their UUIDs, then "\fBovs\-vsctl destroy Queue \fIuuid1\fR
+\fIuuid2\fR" to destroy each of them.)
+.SS "NetFlow"
+.PP
+Configure bridge \fBbr0\fR to send NetFlow records to UDP port 5566 on
+host 192.168.0.34, with an active timeout of 30 seconds:
+.IP
+.B "ovs\-vsctl \-\- set Bridge br0 netflow=@nf \(rs"
+.IP
+.B "\-\- \-\-id=@nf create NetFlow targets=\(rs\(dq192.168.0.34:5566\(rs\(dq active\-timeout=30"
+.PP
+Update the NetFlow configuration created by the previous command to
+instead use an active timeout of 60 seconds:
+.IP
+.B "ovs\-vsctl set NetFlow br0 active_timeout=60"
+.PP
+Deconfigure the NetFlow settings from \fBbr0\fR and delete the NetFlow
+record (to avoid having an unreferenced record in the database):
+.IP
+.B "ovs\-vsctl destroy NetFlow br0 \-\- clear Bridge br0 netflow"
+.SS "sFlow"
+.PP
+Configure bridge \fBbr0\fR to send sFlow records to a collector on
+10.0.0.1 at port 6343, using \fBeth1\fR\'s IP address as the source,
+with specific sampling parameters:
+.IP
+.B "ovs\-vsctl \-\- \-\-id=@s create sFlow agent=eth1 target=\(rs\(dq10.0.0.1:6343\(rs\(dq header=128 sampling=64 polling=10 \(rs"
+.IP
+.B "\-\- set Bridge br0 sflow=@s"
+.PP
+Deconfigure sFlow from br0 and destroy the sFlow record (to avoid
+having an unreferenced record in the database):
+.IP
+.B "ovs\-vsctl \-\- destroy sFlow br0 \-\- clear Bridge br0 sflow"
 .SH "EXIT STATUS"
 .IP "0"
 Successful program execution.
-- 
cgit v1.2.1


From 88ec924aba6e3301b97110994e7da18c2331faa7 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 22 Sep 2010 09:52:27 -0700
Subject: xenserver: Remove LICENSE header that is difficult to keep up to
 date.

I had forgotten that I had added this header.  Let's keep all the
information about licensing in individual files instead.

Signed-off-by: Ben Pfaff <blp@nicira.com>
---
 xenserver/LICENSE | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/xenserver/LICENSE b/xenserver/LICENSE
index ce8949ef1..00fc4d8cb 100644
--- a/xenserver/LICENSE
+++ b/xenserver/LICENSE
@@ -1,8 +1,3 @@
-The files etc_xensource_scripts_vif and
-opt_xensource_libexec_interface-reconfigure are distributed under the
-terms of the GNU Lesser General Public License version 2.1 (included
-below).
-
 As a special exception to the GNU Lesser General Public License, you
 may link, statically or dynamically, a "work that uses the Library"
 with a publicly distributed version of the Library to produce an
-- 
cgit v1.2.1


From 1d7ab9963c34b2f23608195b81312bafa4c01cd6 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 22 Sep 2010 13:14:37 -0700
Subject: xenserver: Add GPLv2 license text.

xsconsole is being relicensed under GPLv2 so we need to include the text.

It would be more usual to name this file COPYING and to name the LGPLv2.1
that is already named LICENSE as COPYING.LIB, but some of the files pulled
in from XenServer say that their license is in a file named LICENSE.  I
don't expect that Citrix would be willing to change that, so it seems
better to keep LGPLv2.1 named LICENSE.

Signed-off-by: Ben Pfaff <blp@nicira.com>
---
 xenserver/GPLv2       | 339 ++++++++++++++++++++++++++++++++++++++++++++++++++
 xenserver/automake.mk |   1 +
 2 files changed, 340 insertions(+)
 create mode 100644 xenserver/GPLv2

diff --git a/xenserver/GPLv2 b/xenserver/GPLv2
new file mode 100644
index 000000000..d511905c1
--- /dev/null
+++ b/xenserver/GPLv2
@@ -0,0 +1,339 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/xenserver/automake.mk b/xenserver/automake.mk
index ffd59960b..85911bb56 100644
--- a/xenserver/automake.mk
+++ b/xenserver/automake.mk
@@ -6,6 +6,7 @@
 # without warranty of any kind.
 
 EXTRA_DIST += \
+	xenserver/GPLv2 \
 	xenserver/LICENSE \
 	xenserver/README \
 	xenserver/automake.mk \
-- 
cgit v1.2.1


From 9b35536a145361285d341f83bc6fc19408bb04e5 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 22 Sep 2010 09:32:58 -0700
Subject: xenserver: Change license of xsconsole plugin to GPLv2.

This file was under a proprietary license because it was derived from
proprietary XenServer code.  That upstream code is now under GPLv2, so
change the downstream code to GPLv2 also.

Acked-by: Ian Campbell <Ian.Campbell@citrix.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
---
 ...r_lib_xsconsole_plugins-base_XSFeatureVSwitch.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py b/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py
index d2f6a6a3c..015f4cc00 100644
--- a/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py
+++ b/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py
@@ -1,11 +1,18 @@
-# Copyright (c) Citrix Systems 2008. All rights reserved.
-# xsconsole is proprietary software.
+# Copyright (c) 2007-2010 Citrix Systems Inc.
+# Copyright (c) 2009,2010 Nicira Networks.
 #
-# Xen, the Xen logo, XenCenter, XenMotion are trademarks or registered
-# trademarks of Citrix Systems, Inc., in the United States and other
-# countries.
-
-# Copyright (c) 2009, 2010 Nicira Networks.
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 only.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
 from XSConsoleLog import *
 
-- 
cgit v1.2.1


From ee5311097049272834308f64931172f1c8210755 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 22 Sep 2010 10:05:29 -0700
Subject: xenserver: Add explicit license to refresh-xs-network-uuids.

I had assumed that a trivial one-line shell script didn't need an explicit
license, but it seems that I was wrong.

Signed-off-by: Ben Pfaff <blp@nicira.com>
---
 xenserver/usr_share_openvswitch_scripts_refresh-xs-network-uuids | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/xenserver/usr_share_openvswitch_scripts_refresh-xs-network-uuids b/xenserver/usr_share_openvswitch_scripts_refresh-xs-network-uuids
index 42ebe06e7..35df06da4 100755
--- a/xenserver/usr_share_openvswitch_scripts_refresh-xs-network-uuids
+++ b/xenserver/usr_share_openvswitch_scripts_refresh-xs-network-uuids
@@ -1,2 +1,9 @@
 #! /bin/sh
+# Copyright (C) 2009, 2010 Nicira Networks, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.  This file is offered as-is,
+# without warranty of any kind.
+
 exec /opt/xensource/libexec/interface-reconfigure rewrite
-- 
cgit v1.2.1


From 6787c8cd1c5a4882e9370d3d0dad4590f781ae42 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 22 Sep 2010 10:03:31 -0700
Subject: xenserver: Add license to uuid.py.

There seemed to be some confusion regarding this file's provenance, so it
is best to clarify.

Signed-off-by: Ben Pfaff <blp@nicira.com>
---
 xenserver/uuid.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/xenserver/uuid.py b/xenserver/uuid.py
index ae3da25ca..599ece9ea 100644
--- a/xenserver/uuid.py
+++ b/xenserver/uuid.py
@@ -1,3 +1,56 @@
+# This file is from Python 2.5.  It has been modified by adding this
+# license header, which is copied from the LICENSE file distributed
+# with Python.
+#
+# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
+# --------------------------------------------
+#
+# 1. This LICENSE AGREEMENT is between the Python Software Foundation
+# ("PSF"), and the Individual or Organization ("Licensee") accessing and
+# otherwise using this software ("Python") in source or binary form and
+# its associated documentation.
+#
+# 2. Subject to the terms and conditions of this License Agreement, PSF
+# hereby grants Licensee a nonexclusive, royalty-free, world-wide
+# license to reproduce, analyze, test, perform and/or display publicly,
+# prepare derivative works, distribute, and otherwise use Python
+# alone or in any derivative version, provided, however, that PSF's
+# License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
+# 2001, 2002, 2003, 2004, 2005, 2006, 2007 Python Software Foundation;
+# All Rights Reserved" are retained in Python alone or in any derivative
+# version prepared by Licensee.
+#
+# 3. In the event Licensee prepares a derivative work that is based on
+# or incorporates Python or any part thereof, and wants to make
+# the derivative work available to others as provided herein, then
+# Licensee hereby agrees to include in any such work a brief summary of
+# the changes made to Python.
+#
+# 4. PSF is making Python available to Licensee on an "AS IS"
+# basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+# IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
+# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
+# INFRINGE ANY THIRD PARTY RIGHTS.
+#
+# 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+# FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+# A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
+# OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+#
+# 6. This License Agreement will automatically terminate upon a material
+# breach of its terms and conditions.
+#
+# 7. Nothing in this License Agreement shall be deemed to create any
+# relationship of agency, partnership, or joint venture between PSF and
+# Licensee.  This License Agreement does not grant permission to use PSF
+# trademarks or trade name in a trademark sense to endorse or promote
+# products or services of Licensee, or any third party.
+#
+# 8. By copying, installing or otherwise using Python, Licensee
+# agrees to be bound by the terms and conditions of this License
+# Agreement.
+
 r"""UUID objects (universally unique identifiers) according to RFC 4122.
 
 This module provides immutable UUID objects (class UUID) and the functions
-- 
cgit v1.2.1


From 02892690e6866781a0a6c84e25a2c1d277822788 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 22 Sep 2010 13:16:07 -0700
Subject: xenserver: Now everything is free.

Signed-off-by: Ben Pfaff <blp@nicira.com>
---
 COPYING          | 3 +--
 xenserver/README | 7 ++-----
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/COPYING b/COPYING
index 375efecae..8d1bc9d51 100644
--- a/COPYING
+++ b/COPYING
@@ -20,8 +20,7 @@ Files under the datapath directory are licensed under the GNU General
 Public License, version 2.
 
 Files under the xenserver directory are licensed on a file-by-file
-basis.  Some files are under an uncertain license that may not be
-DFSG-compliant or GPL-compatible.  Refer to each file for details.
+basis.  Refer to each file for details.
 
 Files lib/sflow*.[ch] are licensed under the terms of the InMon sFlow
 licence that is available at:
diff --git a/xenserver/README b/xenserver/README
index 9fcdb1a3d..941c4e1e7 100644
--- a/xenserver/README
+++ b/xenserver/README
@@ -1,11 +1,8 @@
 This directory contains files for seamless integration of Open vSwitch on
 Citrix XenServer hosts managed by the Citrix management tools.
 
-Some of these files are modifications of Citrix's proprietary code.
-Citrix has given permission to distribute these modified files.
-Citrix has not specified a particular license for them.  There is no
-guarantee that, should Citrix specify a license, that it would be
-DFSG-compliant or GPL-compatible.
+Files in this directory are licensed on a file-by-file basis.  Please
+refer to each file for details.
 
 Most of the files in this directory is installed on a XenServer system
 under the same name, if underscores are replaced by slashes.  The
-- 
cgit v1.2.1


From 560e802229f3028c02273435dd1c6efba33e0949 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Mon, 26 Jul 2010 18:46:27 -0700
Subject: datapath: Move flow allocation into a function.

As the process to allocate a flow becomes more involved it becomes
more cumbersome for the code to be mixed in with the general
datapath so split it out into a new function.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Reviewed-by: Ben Pfaff <blp@nicira.com>
---
 datapath/datapath.c | 10 +++++-----
 datapath/flow.c     | 28 +++++++++++++++++++++++-----
 datapath/flow.h     | 14 ++++++++------
 3 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index b3f77b36a..1677927ff 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -1049,12 +1049,12 @@ static int do_put_flow(struct datapath *dp, struct odp_flow_put *uf,
 		}
 
 		/* Allocate flow. */
-		error = -ENOMEM;
-		flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
-		if (flow == NULL)
+		flow = flow_alloc();
+		if (IS_ERR(flow)) {
+			error = PTR_ERR(flow);
 			goto error;
+		}
 		flow->key = uf->flow.key;
-		spin_lock_init(&flow->lock);
 		clear_stats(flow);
 
 		/* Obtain actions. */
@@ -1109,7 +1109,7 @@ static int do_put_flow(struct datapath *dp, struct odp_flow_put *uf,
 error_free_flow_acts:
 	kfree(flow->sf_acts);
 error_free_flow:
-	kmem_cache_free(flow_cache, flow);
+	flow_free(flow);
 error:
 	return error;
 }
diff --git a/datapath/flow.c b/datapath/flow.c
index 7684c061a..1f01166c5 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -123,27 +123,45 @@ struct sw_flow_actions *flow_actions_alloc(size_t n_actions)
 	return sfa;
 }
 
+struct sw_flow *flow_alloc(void)
+{
+	struct sw_flow *flow;
+
+	flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
+	if (!flow)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&flow->lock);
 
-/* Frees 'flow' immediately. */
-static void flow_free(struct sw_flow *flow)
+	return flow;
+}
+
+void flow_free(struct sw_flow *flow)
 {
 	if (unlikely(!flow))
 		return;
-	kfree(flow->sf_acts);
+
 	kmem_cache_free(flow_cache, flow);
 }
 
+/* Frees the entire 'flow' (both base and actions) immediately. */
+static void flow_free_full(struct sw_flow *flow)
+{
+	kfree(flow->sf_acts);
+	flow_free(flow);
+}
+
 void flow_free_tbl(struct tbl_node *node)
 {
 	struct sw_flow *flow = flow_cast(node);
-	flow_free(flow);
+	flow_free_full(flow);
 }
 
 /* RCU callback used by flow_deferred_free. */
 static void rcu_free_flow_callback(struct rcu_head *rcu)
 {
 	struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
-	flow_free(flow);
+	flow_free_full(flow);
 }
 
 /* Schedules 'flow' to be freed after the next RCU grace period.
diff --git a/datapath/flow.h b/datapath/flow.h
index 80a5b66b1..484ca1207 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -58,20 +58,22 @@ struct arp_eth_header
 	unsigned char       ar_tip[4];		/* target IP address        */
 } __attribute__((packed));
 
-extern struct kmem_cache *flow_cache;
+int flow_init(void);
+void flow_exit(void);
 
-struct sw_flow_actions *flow_actions_alloc(size_t n_actions);
+struct sw_flow *flow_alloc(void);
+void flow_free(struct sw_flow *flow);
 void flow_deferred_free(struct sw_flow *);
+void flow_free_tbl(struct tbl_node *);
+
+struct sw_flow_actions *flow_actions_alloc(size_t n_actions);
 void flow_deferred_free_acts(struct sw_flow_actions *);
+
 int flow_extract(struct sk_buff *, u16 in_port, struct odp_flow_key *);
 void flow_used(struct sw_flow *, struct sk_buff *);
 
 u32 flow_hash(const struct odp_flow_key *key);
 int flow_cmp(const struct tbl_node *, void *target);
-void flow_free_tbl(struct tbl_node *);
-
-int flow_init(void);
-void flow_exit(void);
 
 static inline struct sw_flow *flow_cast(const struct tbl_node *node)
 {
-- 
cgit v1.2.1


From fb8c93473efacd67a50117d0f2a3084f2d96ceca Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sun, 29 Aug 2010 09:49:51 -0700
Subject: datapath: Add ref counting for flows.

Currently flows are only used within the confines of one
rcu_read_lock()/rcu_read_unlock() session.  However, with the
addition of header caching we will need to hold references to flows
for longer periods of time.  This adds support for that by adding
refcounts to flows.  RCU is still used for normal packet handling
to avoid a performance impact from constantly updating the refcount.
However, instead of directly freeing the flow after a grace period
we simply decrement the refcount.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Reviewed-by: Ben Pfaff <blp@nicira.com>
---
 datapath/datapath.c |  3 ++-
 datapath/flow.c     | 41 ++++++++++++++++++++++++-----------------
 datapath/flow.h     |  7 ++++++-
 3 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 1677927ff..06e1006a8 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -1109,7 +1109,8 @@ static int do_put_flow(struct datapath *dp, struct odp_flow_put *uf,
 error_free_flow_acts:
 	kfree(flow->sf_acts);
 error_free_flow:
-	flow_free(flow);
+	flow->sf_acts = NULL;
+	flow_put(flow);
 error:
 	return error;
 }
diff --git a/datapath/flow.c b/datapath/flow.c
index 1f01166c5..dfbf76938 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -132,36 +132,27 @@ struct sw_flow *flow_alloc(void)
 		return ERR_PTR(-ENOMEM);
 
 	spin_lock_init(&flow->lock);
+	atomic_set(&flow->refcnt, 1);
+	flow->dead = false;
 
 	return flow;
 }
 
-void flow_free(struct sw_flow *flow)
-{
-	if (unlikely(!flow))
-		return;
-
-	kmem_cache_free(flow_cache, flow);
-}
-
-/* Frees the entire 'flow' (both base and actions) immediately. */
-static void flow_free_full(struct sw_flow *flow)
-{
-	kfree(flow->sf_acts);
-	flow_free(flow);
-}
-
 void flow_free_tbl(struct tbl_node *node)
 {
 	struct sw_flow *flow = flow_cast(node);
-	flow_free_full(flow);
+
+	flow->dead = true;
+	flow_put(flow);
 }
 
 /* RCU callback used by flow_deferred_free. */
 static void rcu_free_flow_callback(struct rcu_head *rcu)
 {
 	struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
-	flow_free_full(flow);
+
+	flow->dead = true;
+	flow_put(flow);
 }
 
 /* Schedules 'flow' to be freed after the next RCU grace period.
@@ -171,6 +162,22 @@ void flow_deferred_free(struct sw_flow *flow)
 	call_rcu(&flow->rcu, rcu_free_flow_callback);
 }
 
+void flow_hold(struct sw_flow *flow)
+{
+	atomic_inc(&flow->refcnt);
+}
+
+void flow_put(struct sw_flow *flow)
+{
+	if (unlikely(!flow))
+		return;
+
+	if (atomic_dec_and_test(&flow->refcnt)) {
+		kfree(flow->sf_acts);
+		kmem_cache_free(flow_cache, flow);
+	}
+}
+
 /* RCU callback used by flow_deferred_free_acts. */
 static void rcu_free_acts_callback(struct rcu_head *rcu)
 {
diff --git a/datapath/flow.h b/datapath/flow.h
index 484ca1207..3f434677b 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -36,6 +36,9 @@ struct sw_flow {
 	struct odp_flow_key key;
 	struct sw_flow_actions *sf_acts;
 
+	atomic_t refcnt;
+	bool dead;
+
 	spinlock_t lock;	/* Lock for values below. */
 	unsigned long used;	/* Last used time (in jiffies). */
 	u64 packet_count;	/* Number of packets matched. */
@@ -62,13 +65,15 @@ int flow_init(void);
 void flow_exit(void);
 
 struct sw_flow *flow_alloc(void);
-void flow_free(struct sw_flow *flow);
 void flow_deferred_free(struct sw_flow *);
 void flow_free_tbl(struct tbl_node *);
 
 struct sw_flow_actions *flow_actions_alloc(size_t n_actions);
 void flow_deferred_free_acts(struct sw_flow_actions *);
 
+void flow_hold(struct sw_flow *);
+void flow_put(struct sw_flow *);
+
 int flow_extract(struct sk_buff *, u16 in_port, struct odp_flow_key *);
 void flow_used(struct sw_flow *, struct sk_buff *);
 
-- 
cgit v1.2.1


From 3976f6d57b1134c5c3ed054c9da4aa6786fbf5bf Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sun, 29 Aug 2010 10:49:11 -0700
Subject: datapath: Enable usage of cached flows.

An upcoming commit will add support for supplying cached flows for
packets entering the datapath.  This adds the code in the datapath
itself to recognize these cached flows and use them instead of
extracting the flow fields and doing a lookup.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Reviewed-by: Ben Pfaff <blp@nicira.com>
---
 datapath/datapath.c           | 53 +++++++++++++++++++++++--------------------
 datapath/datapath.h           |  6 +++--
 datapath/vport-internal_dev.c |  3 ++-
 datapath/vport.c              |  3 +++
 datapath/vport.h              |  3 ++-
 5 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 06e1006a8..390acc8a4 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -543,40 +543,44 @@ void dp_process_received_packet(struct dp_port *p, struct sk_buff *skb)
 	struct datapath *dp = p->dp;
 	struct dp_stats_percpu *stats;
 	int stats_counter_off;
-	struct odp_flow_key key;
-	struct tbl_node *flow_node;
-	struct sw_flow *flow;
 	struct sw_flow_actions *acts;
 	struct loop_counter *loop;
 	int error;
 
 	OVS_CB(skb)->dp_port = p;
 
-	/* Extract flow from 'skb' into 'key'. */
-	error = flow_extract(skb, p ? p->port_no : ODPP_NONE, &key);
-	if (unlikely(error)) {
-		kfree_skb(skb);
-		return;
-	}
+	if (!OVS_CB(skb)->flow) {
+		struct odp_flow_key key;
+		struct tbl_node *flow_node;
 
-	if (OVS_CB(skb)->is_frag && dp->drop_frags) {
-		kfree_skb(skb);
-		stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
-		goto out;
-	}
+		/* Extract flow from 'skb' into 'key'. */
+		error = flow_extract(skb, p ? p->port_no : ODPP_NONE, &key);
+		if (unlikely(error)) {
+			kfree_skb(skb);
+			return;
+		}
 
-	/* Look up flow. */
-	flow_node = tbl_lookup(rcu_dereference(dp->table), &key, flow_hash(&key), flow_cmp);
-	if (unlikely(!flow_node)) {
-		dp_output_control(dp, skb, _ODPL_MISS_NR, OVS_CB(skb)->tun_id);
-		stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
-		goto out;
+		if (OVS_CB(skb)->is_frag && dp->drop_frags) {
+			kfree_skb(skb);
+			stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
+			goto out;
+		}
+
+		/* Look up flow. */
+		flow_node = tbl_lookup(rcu_dereference(dp->table), &key,
+					flow_hash(&key), flow_cmp);
+		if (unlikely(!flow_node)) {
+			dp_output_control(dp, skb, _ODPL_MISS_NR, OVS_CB(skb)->tun_id);
+			stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
+			goto out;
+		}
+
+		OVS_CB(skb)->flow = flow_cast(flow_node);
 	}
 
-	flow = flow_cast(flow_node);
-	flow_used(flow, skb);
+	flow_used(OVS_CB(skb)->flow, skb);
 
-	acts = rcu_dereference(flow->sf_acts);
+	acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
 
 	/* Check whether we've looped too much. */
 	loop = &get_cpu_var(dp_loop_counters).counters[!!in_interrupt()];
@@ -588,7 +592,8 @@ void dp_process_received_packet(struct dp_port *p, struct sk_buff *skb)
 	}
 
 	/* Execute actions. */
-	execute_actions(dp, skb, &key, acts->actions, acts->n_actions, GFP_ATOMIC);
+	execute_actions(dp, skb, &OVS_CB(skb)->flow->key, acts->actions,
+			acts->n_actions, GFP_ATOMIC);
 	stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
 
 	/* Check whether sub-actions looped too much. */
diff --git a/datapath/datapath.h b/datapath/datapath.h
index abc6aeab2..dacc3a42c 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -146,17 +146,19 @@ enum csum_type {
 /**
  * struct ovs_skb_cb - OVS data in skb CB
  * @dp_port: The datapath port on which the skb entered the switch.
+ * @flow: The flow associated with this packet.  May be %NULL if no flow.
+ * @is_frag: %true if this packet is an IPv4 fragment, %false otherwise.
  * @ip_summed: Consistently stores L4 checksumming status across different
  * kernel versions.
  * @tun_id: ID (in network byte order) of the tunnel that encapsulated this
  * packet. It is 0 if the packet was not received on a tunnel.
- * @is_frag: %true if this packet is an IPv4 fragment, %false otherwise.
  */
 struct ovs_skb_cb {
 	struct dp_port		*dp_port;
+	struct sw_flow		*flow;
+	bool			is_frag;
 	enum csum_type		ip_summed;
 	__be32			tun_id;
-	bool			is_frag;
 };
 #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
 
diff --git a/datapath/vport-internal_dev.c b/datapath/vport-internal_dev.c
index 6cbfdf80d..514d00cb3 100644
--- a/datapath/vport-internal_dev.c
+++ b/datapath/vport-internal_dev.c
@@ -82,6 +82,7 @@ static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	skb_reset_mac_header(skb);
 	compute_ip_summed(skb, true);
+	OVS_CB(skb)->flow = NULL;
 
 	vport_receive(vport, skb);
 
@@ -293,7 +294,7 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
 
 struct vport_ops internal_vport_ops = {
 	.type		= "internal",
-	.flags		= VPORT_F_REQUIRED | VPORT_F_GEN_STATS,
+	.flags		= VPORT_F_REQUIRED | VPORT_F_GEN_STATS | VPORT_F_FLOW,
 	.create		= internal_dev_create,
 	.destroy	= internal_dev_destroy,
 	.attach		= internal_dev_attach,
diff --git a/datapath/vport.c b/datapath/vport.c
index 91b650e54..6c8eb0845 100644
--- a/datapath/vport.c
+++ b/datapath/vport.c
@@ -1217,6 +1217,9 @@ void vport_receive(struct vport *vport, struct sk_buff *skb)
 		local_bh_enable();
 	}
 
+	if (!(vport->ops->flags & VPORT_F_FLOW))
+		OVS_CB(skb)->flow = NULL;
+
 	if (!(vport->ops->flags & VPORT_F_TUN_ID))
 		OVS_CB(skb)->tun_id = 0;
 
diff --git a/datapath/vport.h b/datapath/vport.h
index fca5f1abe..30b0cc6b3 100644
--- a/datapath/vport.h
+++ b/datapath/vport.h
@@ -112,7 +112,8 @@ struct vport {
 
 #define VPORT_F_REQUIRED	(1 << 0) /* If init fails, module loading fails. */
 #define VPORT_F_GEN_STATS	(1 << 1) /* Track stats at the generic layer. */
-#define VPORT_F_TUN_ID		(1 << 2) /* Sets OVS_CB(skb)->tun_id. */
+#define VPORT_F_FLOW		(1 << 2) /* Sets OVS_CB(skb)->flow. */
+#define VPORT_F_TUN_ID		(1 << 3) /* Sets OVS_CB(skb)->tun_id. */
 
 /**
  * struct vport_ops - definition of a type of virtual port
-- 
cgit v1.2.1


From b7a31ec13d0617868378d39a72beb4c4ffcb7e5c Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sun, 29 Aug 2010 14:28:58 -0700
Subject: datapath: Move is_frag out of struct ovs_skb_cb.

is_frag is only used for communication between two functions, which
means that it doesn't really need to be in the SKB CB.  This wouldn't
necessarily be a problem except that there are also a number of other
paths that lead to this being uninitialized.  This isn't a problem
now but uninitialized memory seems dangerous and there isn't much
upside.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Reviewed-by: Ben Pfaff <blp@nicira.com>
---
 datapath/datapath.c |  8 +++++---
 datapath/datapath.h |  2 --
 datapath/flow.c     | 14 +++++++-------
 datapath/flow.h     |  2 +-
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 390acc8a4..5996d6ed7 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -552,15 +552,16 @@ void dp_process_received_packet(struct dp_port *p, struct sk_buff *skb)
 	if (!OVS_CB(skb)->flow) {
 		struct odp_flow_key key;
 		struct tbl_node *flow_node;
+		bool is_frag;
 
 		/* Extract flow from 'skb' into 'key'. */
-		error = flow_extract(skb, p ? p->port_no : ODPP_NONE, &key);
+		error = flow_extract(skb, p ? p->port_no : ODPP_NONE, &key, &is_frag);
 		if (unlikely(error)) {
 			kfree_skb(skb);
 			return;
 		}
 
-		if (OVS_CB(skb)->is_frag && dp->drop_frags) {
+		if (is_frag && dp->drop_frags) {
 			kfree_skb(skb);
 			stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
 			goto out;
@@ -1325,6 +1326,7 @@ static int do_execute(struct datapath *dp, const struct odp_execute *execute)
 	struct sk_buff *skb;
 	struct sw_flow_actions *actions;
 	struct ethhdr *eth;
+	bool is_frag;
 	int err;
 
 	err = -EINVAL;
@@ -1372,7 +1374,7 @@ static int do_execute(struct datapath *dp, const struct odp_execute *execute)
 	else
 		skb->protocol = htons(ETH_P_802_2);
 
-	err = flow_extract(skb, execute->in_port, &key);
+	err = flow_extract(skb, execute->in_port, &key, &is_frag);
 	if (err)
 		goto error_free_skb;
 
diff --git a/datapath/datapath.h b/datapath/datapath.h
index dacc3a42c..f28513bb7 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -147,7 +147,6 @@ enum csum_type {
  * struct ovs_skb_cb - OVS data in skb CB
  * @dp_port: The datapath port on which the skb entered the switch.
  * @flow: The flow associated with this packet.  May be %NULL if no flow.
- * @is_frag: %true if this packet is an IPv4 fragment, %false otherwise.
  * @ip_summed: Consistently stores L4 checksumming status across different
  * kernel versions.
  * @tun_id: ID (in network byte order) of the tunnel that encapsulated this
@@ -156,7 +155,6 @@ enum csum_type {
 struct ovs_skb_cb {
 	struct dp_port		*dp_port;
 	struct sw_flow		*flow;
-	bool			is_frag;
 	enum csum_type		ip_summed;
 	__be32			tun_id;
 };
diff --git a/datapath/flow.c b/datapath/flow.c
index dfbf76938..1aa6e291b 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -267,7 +267,8 @@ static __be16 parse_ethertype(struct sk_buff *skb)
  * Sets OVS_CB(skb)->is_frag to %true if @skb is an IPv4 fragment, otherwise to
  * %false.
  */
-int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key)
+int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key,
+		 bool *is_frag)
 {
 	struct ethhdr *eth;
 
@@ -275,7 +276,7 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key)
 	key->tun_id = OVS_CB(skb)->tun_id;
 	key->in_port = in_port;
 	key->dl_vlan = htons(ODP_VLAN_NONE);
-	OVS_CB(skb)->is_frag = false;
+	*is_frag = false;
 
 	/*
 	 * We would really like to pull as many bytes as we could possibly
@@ -356,9 +357,9 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key)
 					key->tp_dst = htons(icmp->code);
 				}
 			}
-		} else {
-			OVS_CB(skb)->is_frag = true;
-		}
+		} else
+			*is_frag = true;
+
 	} else if (key->dl_type == htons(ETH_P_ARP) && arphdr_ok(skb)) {
 		struct arp_eth_header *arp;
 
@@ -370,9 +371,8 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key)
 				&& arp->ar_pln == 4) {
 
 			/* We only match on the lower 8 bits of the opcode. */
-			if (ntohs(arp->ar_op) <= 0xff) {
+			if (ntohs(arp->ar_op) <= 0xff)
 				key->nw_proto = ntohs(arp->ar_op);
-			}
 
 			if (key->nw_proto == ARPOP_REQUEST
 					|| key->nw_proto == ARPOP_REPLY) {
diff --git a/datapath/flow.h b/datapath/flow.h
index 3f434677b..25b720449 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -74,7 +74,7 @@ void flow_deferred_free_acts(struct sw_flow_actions *);
 void flow_hold(struct sw_flow *);
 void flow_put(struct sw_flow *);
 
-int flow_extract(struct sk_buff *, u16 in_port, struct odp_flow_key *);
+int flow_extract(struct sk_buff *, u16 in_port, struct odp_flow_key *, bool *is_frag);
 void flow_used(struct sw_flow *, struct sk_buff *);
 
 u32 flow_hash(const struct odp_flow_key *key);
-- 
cgit v1.2.1


From 7c79397fe8ff52eb9cfe4d32a9d1f8b2f23c45c8 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Tue, 31 Aug 2010 15:38:25 -0700
Subject: datapath: Backport workqueue functions.

An upcoming commit will use some workqueue functions that weren't
available on earlier kernels, so this backports those functions.
The backporting uses timers instead of delayed work queues because
the earlier versions of work queues have some unsafe corner cases.
In addition, this removes some unused work queue backporting code
that is no longer used because it is potentially unsafe.

Note that this commit changes the behavior of work queues: normally
they run in process context but the backported version runs in
softirq context.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Reviewed-by: Ben Pfaff <blp@nicira.com>
---
 datapath/datapath.c                                |  1 -
 .../linux-2.6/compat-2.6/include/linux/workqueue.h | 65 +++++++++++-----------
 2 files changed, 32 insertions(+), 34 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 5996d6ed7..e9f30f8a8 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -39,7 +39,6 @@
 #include <linux/inetdevice.h>
 #include <linux/list.h>
 #include <linux/rculist.h>
-#include <linux/workqueue.h>
 #include <linux/dmi.h>
 #include <net/inet_ecn.h>
 #include <linux/compat.h>
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/workqueue.h b/datapath/linux-2.6/compat-2.6/include/linux/workqueue.h
index 1ac3b6ecb..01c6345e9 100644
--- a/datapath/linux-2.6/compat-2.6/include/linux/workqueue.h
+++ b/datapath/linux-2.6/compat-2.6/include/linux/workqueue.h
@@ -4,39 +4,38 @@
 #include_next <linux/workqueue.h>
 
 #include <linux/version.h>
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
-
-#ifdef __KERNEL__
-/*
- * initialize a work-struct's func and data pointers:
- */
-#undef PREPARE_WORK
-#define PREPARE_WORK(_work, _func)                              \
-        do {                                                    \
-		(_work)->func = (void(*)(void*)) _func;		\
-                (_work)->data = _work;				\
-        } while (0)
-
-/*
- * initialize all of a work-struct:
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+
+/* Older kernels have an implementation of work queues with some very bad
+ * characteristics when trying to cancel work (potential deadlocks, use after
+ * free, etc.  Here we directly use timers instead for delayed work.  It's not
+ * optimal but it is better than the alternative.  Note that work queues
+ * normally run in process context but this will cause them to operate in
+ * softirq context.
  */
-#undef INIT_WORK
-#define INIT_WORK(_work, _func)                                 \
-        do {                                                    \
-                INIT_LIST_HEAD(&(_work)->entry);                \
-                (_work)->pending = 0;                           \
-                PREPARE_WORK((_work), (_func));                 \
-                init_timer(&(_work)->timer);                    \
-        } while (0)
-
-#endif /* __KERNEL__ */
-
-#endif /* linux kernel < 2.6.20 */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
-/* There is no equivalent to cancel_work_sync() so just flush all
- * pending work. */
-#define cancel_work_sync(_work) flush_scheduled_work()
-#endif
+
+#include <linux/timer.h>
+
+#undef DECLARE_DELAYED_WORK
+#define DECLARE_DELAYED_WORK(n, f) \
+	struct timer_list n = TIMER_INITIALIZER((void (*)(unsigned long))f, 0, 0)
+
+#define schedule_delayed_work rpl_schedule_delayed_work
+static inline int schedule_delayed_work(struct timer_list *timer, unsigned long delay)
+{
+	if (timer_pending(timer))
+		return 0;
+
+	mod_timer(timer, jiffies + delay);
+	return 1;
+}
+
+#define cancel_delayed_work_sync rpl_cancel_delayed_work_sync
+static inline int cancel_delayed_work_sync(struct timer_list *timer)
+{
+	return del_timer_sync(timer);
+}
+
+#endif /* kernel version < 2.6.23 */
 
 #endif
-- 
cgit v1.2.1


From 842cf6f472b236b6e61be04b41970116245b1759 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 27 Aug 2010 13:55:02 -0700
Subject: datapath: Add tunnel header caching.

On the transmit path we generate essentially the same tunnel header
for every packet to a given destination.  However, each packet must
have the headers assembled in pieces, lookup the destination in the
routing table, and lookup the flow in OVS.  This avoids that extra
work by caching all of the header and output path information and
only rebuilding it when something actually changes.

This optimization reduces CPU load on transmit by approximately 13%.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Reviewed-by: Ben Pfaff <blp@nicira.com>
---
 datapath/tunnel.c            | 985 ++++++++++++++++++++++++++++++-------------
 datapath/tunnel.h            | 153 +++++--
 datapath/vport-capwap.c      |  68 +--
 datapath/vport-gre.c         |  59 +--
 include/openvswitch/tunnel.h |   1 +
 5 files changed, 899 insertions(+), 367 deletions(-)

diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index 6fa369be0..77f976fdc 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -15,6 +15,7 @@
 #include <linux/jhash.h>
 #include <linux/kernel.h>
 #include <linux/version.h>
+#include <linux/workqueue.h>
 
 #include <net/dsfield.h>
 #include <net/dst.h>
@@ -33,10 +34,45 @@
 #include "tunnel.h"
 #include "vport.h"
 #include "vport-generic.h"
+#include "vport-internal_dev.h"
+
+#ifdef NEED_CACHE_TIMEOUT
+/*
+ * On kernels where we can't quickly detect changes in the rest of the system
+ * we use an expiration time to invalidate the cache.  A shorter expiration
+ * reduces the length of time that we may potentially blackhole packets while
+ * a longer time increases performance by reducing the frequency that the
+ * cache needs to be rebuilt.  A variety of factors may cause the cache to be
+ * invalidated before the expiration time but this is the maximum.  The time
+ * is expressed in jiffies.
+ */
+#define MAX_CACHE_EXP HZ
+#endif
+
+/*
+ * Interval to check for and remove caches that are no longer valid.  Caches
+ * are checked for validity before they are used for packet encapsulation and
+ * old caches are removed at that time.  However, if no packets are sent through
+ * the tunnel then the cache will never be destroyed.  Since it holds
+ * references to a number of system objects, the cache will continue to use
+ * system resources by not allowing those objects to be destroyed.  The cache
+ * cleaner is periodically run to free invalid caches.  It does not
+ * significantly affect system performance.  A lower interval will release
+ * resources faster but will itself consume resources by requiring more frequent
+ * checks.  A longer interval may result in messages being printed to the kernel
+ * message buffer about unreleased resources.  The interval is expressed in
+ * jiffies.
+ */
+#define CACHE_CLEANER_INTERVAL (5 * HZ)
+
+#define CACHE_DATA_ALIGN 16
 
 /* Protected by RCU. */
 static struct tbl *port_table;
 
+static void cache_cleaner(struct work_struct *work);
+DECLARE_DELAYED_WORK(cache_cleaner_wq, cache_cleaner);
+
 /*
  * These are just used as an optimization: they don't require any kind of
  * synchronization because we could have just as easily read the value before
@@ -63,22 +99,54 @@ static inline struct tnl_vport *tnl_vport_table_cast(const struct tbl_node *node
 	return container_of(node, struct tnl_vport, tbl_node);
 }
 
-/* RCU callback. */
-static void free_config(struct rcu_head *rcu)
+static inline void schedule_cache_cleaner(void)
+{
+	schedule_delayed_work(&cache_cleaner_wq, CACHE_CLEANER_INTERVAL);
+}
+
+static void free_cache(struct tnl_cache *cache)
+{
+	if (!cache)
+		return;
+
+	flow_put(cache->flow);
+	ip_rt_put(cache->rt);
+	kfree(cache);
+}
+
+static void free_config_rcu(struct rcu_head *rcu)
 {
 	struct tnl_mutable_config *c = container_of(rcu, struct tnl_mutable_config, rcu);
 	kfree(c);
 }
 
+static void free_cache_rcu(struct rcu_head *rcu)
+{
+	struct tnl_cache *c = container_of(rcu, struct tnl_cache, rcu);
+	free_cache(c);
+}
+
 static void assign_config_rcu(struct vport *vport,
 			      struct tnl_mutable_config *new_config)
 {
 	struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
 	struct tnl_mutable_config *old_config;
 
-	old_config = rcu_dereference(tnl_vport->mutable);
+	old_config = tnl_vport->mutable;
 	rcu_assign_pointer(tnl_vport->mutable, new_config);
-	call_rcu(&old_config->rcu, free_config);
+	call_rcu(&old_config->rcu, free_config_rcu);
+}
+
+static void assign_cache_rcu(struct vport *vport, struct tnl_cache *new_cache)
+{
+	struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+	struct tnl_cache *old_cache;
+
+	old_cache = tnl_vport->cache;
+	rcu_assign_pointer(tnl_vport->cache, new_cache);
+
+	if (old_cache)
+		call_rcu(&old_cache->rcu, free_cache_rcu);
 }
 
 static unsigned int *find_port_pool(const struct tnl_mutable_config *mutable)
@@ -130,10 +198,32 @@ static u32 port_hash(struct port_lookup_key *lookup)
 	return jhash2(lookup->vals, ARRAY_SIZE(lookup->vals), 0);
 }
 
+static u32 mutable_hash(const struct tnl_mutable_config *mutable)
+{
+	struct port_lookup_key lookup;
+
+	lookup.vals[LOOKUP_SADDR] = mutable->port_config.saddr;
+	lookup.vals[LOOKUP_DADDR] = mutable->port_config.daddr;
+	lookup.vals[LOOKUP_KEY] = mutable->port_config.in_key;
+	lookup.vals[LOOKUP_TUNNEL_TYPE] = mutable->tunnel_type;
+
+	return port_hash(&lookup);
+}
+
+static void check_table_empty(void)
+{
+	if (tbl_count(port_table) == 0) {
+		struct tbl *old_table = port_table;
+
+		cancel_delayed_work_sync(&cache_cleaner_wq);
+		rcu_assign_pointer(port_table, NULL);
+		tbl_deferred_destroy(old_table, NULL);
+	}
+}
+
 static int add_port(struct vport *vport)
 {
 	struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
-	struct port_lookup_key lookup;
 	int err;
 
 	if (!port_table) {
@@ -144,6 +234,7 @@ static int add_port(struct vport *vport)
 			return -ENOMEM;
 
 		rcu_assign_pointer(port_table, new_table);
+		schedule_cache_cleaner();
 
 	} else if (tbl_count(port_table) > tbl_n_buckets(port_table)) {
 		struct tbl *old_table = port_table;
@@ -157,16 +248,44 @@ static int add_port(struct vport *vport)
 		tbl_deferred_destroy(old_table, NULL);
 	}
 
-	lookup.vals[LOOKUP_SADDR] = tnl_vport->mutable->port_config.saddr;
-	lookup.vals[LOOKUP_DADDR] = tnl_vport->mutable->port_config.daddr;
-	lookup.vals[LOOKUP_KEY] = tnl_vport->mutable->port_config.in_key;
-	lookup.vals[LOOKUP_TUNNEL_TYPE] = tnl_vport->mutable->tunnel_type;
+	err = tbl_insert(port_table, &tnl_vport->tbl_node, mutable_hash(tnl_vport->mutable));
+	if (err) {
+		check_table_empty();
+		return err;
+	}
+
+	(*find_port_pool(tnl_vport->mutable))++;
+
+	return 0;
+}
+
+static int move_port(struct vport *vport, struct tnl_mutable_config *new_mutable)
+{
+	int err;
+	struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+	u32 hash;
+
+	hash = mutable_hash(new_mutable);
+	if (hash == tnl_vport->tbl_node.hash)
+		goto table_updated;
 
-	err = tbl_insert(port_table, &tnl_vport->tbl_node, port_hash(&lookup));
+	/*
+	 * Ideally we should make this move atomic to avoid having gaps in
+	 * finding tunnels or the possibility of failure.  However, if we do
+	 * find a tunnel it will always be consistent.
+	 */
+	err = tbl_remove(port_table, &tnl_vport->tbl_node);
 	if (err)
 		return err;
 
-	(*find_port_pool(tnl_vport->mutable))++;
+	err = tbl_insert(port_table, &tnl_vport->tbl_node, hash);
+	if (err) {
+		check_table_empty();
+		return err;
+	}
+
+table_updated:
+	assign_config_rcu(vport, new_mutable);
 
 	return 0;
 }
@@ -180,6 +299,7 @@ static int del_port(struct vport *vport)
 	if (err)
 		return err;
 
+	check_table_empty();
 	(*find_port_pool(tnl_vport->mutable))--;
 
 	return 0;
@@ -193,7 +313,7 @@ struct vport *tnl_find_port(__be32 saddr, __be32 daddr, __be32 key,
 	struct tbl *table = rcu_dereference(port_table);
 	struct tbl_node *tbl_node;
 
-	if (!table)
+	if (unlikely(!table))
 		return NULL;
 
 	lookup.vals[LOOKUP_SADDR] = saddr;
@@ -246,6 +366,60 @@ found:
 	return tnl_vport_to_vport(tnl_vport_table_cast(tbl_node));
 }
 
+static inline void ecn_decapsulate(struct sk_buff *skb)
+{
+	u8 tos = ip_hdr(skb)->tos;
+
+	if (INET_ECN_is_ce(tos)) {
+		__be16 protocol = skb->protocol;
+		unsigned int nw_header = skb_network_offset(skb);
+
+		if (skb->protocol == htons(ETH_P_8021Q)) {
+			if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
+				return;
+
+			protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
+			nw_header += VLAN_HLEN;
+		}
+
+		if (protocol == htons(ETH_P_IP)) {
+			if (unlikely(!pskb_may_pull(skb, nw_header
+			    + sizeof(struct iphdr))))
+				return;
+
+			IP_ECN_set_ce((struct iphdr *)(skb->data + nw_header));
+		}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		else if (protocol == htons(ETH_P_IPV6)) {
+			if (unlikely(!pskb_may_pull(skb, nw_header
+			    + sizeof(struct ipv6hdr))))
+				return;
+
+			IP6_ECN_set_ce((struct ipv6hdr *)(skb->data + nw_header));
+		}
+#endif
+	}
+}
+
+/* Called with rcu_read_lock. */
+void tnl_rcv(struct vport *vport, struct sk_buff *skb)
+{
+	skb->pkt_type = PACKET_HOST;
+	skb->protocol = eth_type_trans(skb, skb->dev);
+
+	skb_dst_drop(skb);
+	nf_reset(skb);
+	secpath_reset(skb);
+	skb_reset_network_header(skb);
+
+	ecn_decapsulate(skb);
+
+	skb_push(skb, ETH_HLEN);
+	compute_ip_summed(skb, false);
+
+	vport_receive(vport, skb);
+}
+
 static bool check_ipv4_address(__be32 addr)
 {
 	if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr)
@@ -514,179 +688,412 @@ bool tnl_frag_needed(struct vport *vport, const struct tnl_mutable_config *mutab
 	return true;
 }
 
-static struct sk_buff *check_headroom(struct sk_buff *skb, int headroom)
+static bool check_mtu(struct sk_buff *skb,
+		      struct vport *vport,
+		      const struct tnl_mutable_config *mutable,
+		      const struct rtable *rt, __be16 *frag_offp)
 {
-	if (skb_headroom(skb) < headroom || skb_header_cloned(skb)) {
-		struct sk_buff *nskb = skb_realloc_headroom(skb, headroom + 16);
-		if (unlikely(!nskb)) {
-			kfree_skb(skb);
-			return ERR_PTR(-ENOMEM);
+	int mtu;
+	__be16 frag_off;
+
+	frag_off = (mutable->port_config.flags & TNL_F_PMTUD) ? htons(IP_DF) : 0;
+	if (frag_off)
+		mtu = dst_mtu(&rt_dst(rt))
+			- ETH_HLEN
+			- mutable->tunnel_hlen
+			- (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0);
+	else
+		mtu = mutable->mtu;
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		struct iphdr *old_iph = ip_hdr(skb);
+
+		frag_off |= old_iph->frag_off & htons(IP_DF);
+		mtu = max(mtu, IP_MIN_MTU);
+
+		if ((old_iph->frag_off & htons(IP_DF)) &&
+		    mtu < ntohs(old_iph->tot_len)) {
+			if (tnl_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id))
+				goto drop;
 		}
+	}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	else if (skb->protocol == htons(ETH_P_IPV6)) {
+		unsigned int packet_length = skb->len - ETH_HLEN
+			- (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0);
 
-		set_skb_csum_bits(skb, nskb);
+		mtu = max(mtu, IPV6_MIN_MTU);
 
-		if (skb->sk)
-			skb_set_owner_w(nskb, skb->sk);
+		/* IPv6 requires PMTUD if the packet is above the minimum MTU. */
+		if (packet_length > IPV6_MIN_MTU)
+			frag_off = htons(IP_DF);
 
-		dev_kfree_skb(skb);
-		return nskb;
+		if (mtu < packet_length) {
+			if (tnl_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id))
+				goto drop;
+		}
 	}
+#endif
 
-	return skb;
+	*frag_offp = frag_off;
+	return true;
+
+drop:
+	*frag_offp = 0;
+	return false;
 }
 
-static inline u8 ecn_encapsulate(u8 tos, struct sk_buff *skb)
+static void create_tunnel_header(const struct vport *vport,
+				 const struct tnl_mutable_config *mutable,
+				 const struct rtable *rt, void *header)
 {
-	u8 inner;
+	struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+	struct iphdr *iph = header;
+
+	iph->version	= 4;
+	iph->ihl	= sizeof(struct iphdr) >> 2;
+	iph->frag_off	= htons(IP_DF);
+	iph->protocol	= tnl_vport->tnl_ops->ipproto;
+	iph->tos	= mutable->port_config.tos;
+	iph->daddr	= rt->rt_dst;
+	iph->saddr	= rt->rt_src;
+	iph->ttl	= mutable->port_config.ttl;
+	if (!iph->ttl)
+		iph->ttl = dst_metric(&rt_dst(rt), RTAX_HOPLIMIT);
+
+	tnl_vport->tnl_ops->build_header(vport, mutable, iph + 1);
+}
 
-	if (skb->protocol == htons(ETH_P_IP))
-		inner = ((struct iphdr *)skb_network_header(skb))->tos;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	else if (skb->protocol == htons(ETH_P_IPV6))
-		inner = ipv6_get_dsfield((struct ipv6hdr *)skb_network_header(skb));
-#endif
-	else
-		inner = 0;
+static inline void *get_cached_header(const struct tnl_cache *cache)
+{
+	return (void *)cache + ALIGN(sizeof(struct tnl_cache), CACHE_DATA_ALIGN);
+}
 
-	return INET_ECN_encapsulate(tos, inner);
+static inline bool check_cache_valid(const struct tnl_cache *cache,
+				     const struct tnl_mutable_config *mutable)
+{
+	return cache &&
+#ifdef NEED_CACHE_TIMEOUT
+		time_before(jiffies, cache->expiration) &&
+#endif
+#ifdef HAVE_RT_GENID
+		atomic_read(&init_net.ipv4.rt_genid) == cache->rt->rt_genid &&
+#endif
+#ifdef HAVE_HH_SEQ
+		rt_dst(cache->rt).hh->hh_lock.sequence == cache->hh_seq &&
+#endif
+		mutable->seq == cache->mutable_seq &&
+		(!is_internal_dev(rt_dst(cache->rt).dev) ||
+		(cache->flow && !cache->flow->dead));
 }
 
-static inline void ecn_decapsulate(struct sk_buff *skb)
+static int cache_cleaner_cb(struct tbl_node *tbl_node, void *aux)
 {
-	u8 tos = ip_hdr(skb)->tos;
+	struct tnl_vport *tnl_vport = tnl_vport_table_cast(tbl_node);
+	const struct tnl_mutable_config *mutable = rcu_dereference(tnl_vport->mutable);
+	const struct tnl_cache *cache = rcu_dereference(tnl_vport->cache);
 
-	if (INET_ECN_is_ce(tos)) {
-		__be16 protocol = skb->protocol;
-		unsigned int nw_header = skb_network_header(skb) - skb->data;
+	if (cache && !check_cache_valid(cache, mutable) &&
+	    spin_trylock_bh(&tnl_vport->cache_lock)) {
+		assign_cache_rcu(tnl_vport_to_vport(tnl_vport), NULL);
+		spin_unlock_bh(&tnl_vport->cache_lock);
+	}
 
-		if (skb->protocol == htons(ETH_P_8021Q)) {
-			if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
-				return;
+	return 0;
+}
 
-			protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
-			nw_header += VLAN_HLEN;
-		}
+static void cache_cleaner(struct work_struct *work)
+{
+	schedule_cache_cleaner();
 
-		if (protocol == htons(ETH_P_IP)) {
-			if (unlikely(!pskb_may_pull(skb, nw_header
-			    + sizeof(struct iphdr))))
-				return;
+	rcu_read_lock();
+	tbl_foreach(port_table, cache_cleaner_cb, NULL);
+	rcu_read_unlock();
+}
 
-			IP_ECN_set_ce((struct iphdr *)(nw_header + skb->data));
-		}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-		else if (protocol == htons(ETH_P_IPV6)) {
-			if (unlikely(!pskb_may_pull(skb, nw_header
-			    + sizeof(struct ipv6hdr))))
-				return;
+static inline void create_eth_hdr(struct tnl_cache *cache,
+				  const struct rtable *rt)
+{
+	void *cache_data = get_cached_header(cache);
+	int hh_len = rt_dst(rt).hh->hh_len;
+	int hh_off = HH_DATA_ALIGN(rt_dst(rt).hh->hh_len) - hh_len;
 
-			IP6_ECN_set_ce((struct ipv6hdr *)(nw_header
-							  + skb->data));
-		}
+#ifdef HAVE_HH_SEQ
+	unsigned hh_seq;
+
+	do {
+		hh_seq = read_seqbegin(&rt_dst(rt).hh->hh_lock);
+		memcpy(cache_data, (void *)rt_dst(rt).hh->hh_data + hh_off, hh_len);
+	} while (read_seqretry(&rt_dst(rt).hh->hh_lock, hh_seq));
+
+	cache->hh_seq = hh_seq;
+#else
+	read_lock_bh(&rt_dst(rt).hh->hh_lock);
+	memcpy(cache_data, (void *)rt_dst(rt).hh->hh_data + hh_off, hh_len);
+	read_unlock_bh(&rt_dst(rt).hh->hh_lock);
 #endif
-	}
 }
 
-static struct sk_buff *handle_gso(struct sk_buff *skb)
+static struct tnl_cache *build_cache(struct vport *vport,
+				     const struct tnl_mutable_config *mutable,
+				     struct rtable *rt)
 {
-	if (skb_is_gso(skb)) {
-		struct sk_buff *nskb = skb_gso_segment(skb, 0);
+	struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+	struct tnl_cache *cache;
+	void *cache_data;
+	int cache_len;
 
-		dev_kfree_skb(skb);
-		return nskb;
+	if (!(mutable->port_config.flags & TNL_F_HDR_CACHE))
+		return NULL;
+
+	/*
+	 * If there is no entry in the ARP cache or if this device does not
+	 * support hard header caching just fall back to the IP stack.
+	 */
+	if (!rt_dst(rt).hh)
+		return NULL;
+
+	/*
+	 * If lock is contended fall back to directly building the header.
+	 * We're not going to help performance by sitting here spinning.
+	 */
+	if (!spin_trylock_bh(&tnl_vport->cache_lock))
+		return NULL;
+
+	cache = tnl_vport->cache;
+	if (check_cache_valid(cache, mutable))
+		goto unlock;
+	else
+		cache = NULL;
+
+	cache_len = rt_dst(rt).hh->hh_len + mutable->tunnel_hlen;
+
+	cache = kzalloc(ALIGN(sizeof(struct tnl_cache), CACHE_DATA_ALIGN) +
+			cache_len, GFP_ATOMIC);
+	if (!cache)
+		goto unlock;
+
+	cache->len = cache_len;
+
+	create_eth_hdr(cache, rt);
+	cache_data = get_cached_header(cache) + rt_dst(rt).hh->hh_len;
+
+	create_tunnel_header(vport, mutable, rt, cache_data);
+
+	cache->mutable_seq = mutable->seq;
+	cache->rt = rt;
+#ifdef NEED_CACHE_TIMEOUT
+	cache->expiration = jiffies + tnl_vport->cache_exp_interval;
+#endif
+
+	if (is_internal_dev(rt_dst(rt).dev)) {
+		int err;
+		struct vport *vport;
+		struct dp_port *dp_port;
+		struct sk_buff *skb;
+		bool is_frag;
+		struct odp_flow_key flow_key;
+		struct tbl_node *flow_node;
+
+		vport = internal_dev_get_vport(rt_dst(rt).dev);
+		if (!vport)
+			goto done;
+
+		dp_port = vport_get_dp_port(vport);
+		if (!dp_port)
+			goto done;
+
+		skb = alloc_skb(cache->len, GFP_ATOMIC);
+		if (!skb)
+			goto done;
+
+		__skb_put(skb, cache->len);
+		memcpy(skb->data, get_cached_header(cache), cache->len);
+
+		err = flow_extract(skb, dp_port->port_no, &flow_key, &is_frag);
+
+		kfree_skb(skb);
+		if (err || is_frag)
+			goto done;
+
+		flow_node = tbl_lookup(rcu_dereference(dp_port->dp->table),
+				       &flow_key, flow_hash(&flow_key),
+				       flow_cmp);
+		if (flow_node) {
+			struct sw_flow *flow = flow_cast(flow_node);
+
+			cache->flow = flow;
+			flow_hold(flow);
+		}
 	}
 
-	return skb;
+done:
+	assign_cache_rcu(vport, cache);
+
+unlock:
+	spin_unlock_bh(&tnl_vport->cache_lock);
+
+	return cache;
 }
 
-static int handle_csum_offload(struct sk_buff *skb)
+static struct rtable *find_route(struct vport *vport,
+				 const struct tnl_mutable_config *mutable,
+				 u8 tos, struct tnl_cache **cache)
 {
-	if (skb->ip_summed == CHECKSUM_PARTIAL)
-		return skb_checksum_help(skb);
-	else {
-		skb->ip_summed = CHECKSUM_NONE;
-		return 0;
+	struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+	struct tnl_cache *cur_cache = rcu_dereference(tnl_vport->cache);
+
+	*cache = NULL;
+	tos = RT_TOS(tos);
+
+	if (likely(tos == mutable->port_config.tos &&
+		   check_cache_valid(cur_cache, mutable))) {
+		*cache = cur_cache;
+		return cur_cache->rt;
+	} else {
+		struct rtable *rt;
+		struct flowi fl = { .nl_u = { .ip4_u =
+					      { .daddr = mutable->port_config.daddr,
+						.saddr = mutable->port_config.saddr,
+						.tos = tos } },
+				    .proto = tnl_vport->tnl_ops->ipproto };
+
+		if (unlikely(ip_route_output_key(&init_net, &rt, &fl)))
+			return NULL;
+
+		if (likely(tos == mutable->port_config.tos))
+			*cache = build_cache(vport, mutable, rt);
+
+		return rt;
 	}
 }
 
-/* Called with rcu_read_lock. */
-void tnl_rcv(struct vport *vport, struct sk_buff *skb)
+static struct sk_buff *check_headroom(struct sk_buff *skb, int headroom)
 {
-	skb->pkt_type = PACKET_HOST;
-	skb->protocol = eth_type_trans(skb, skb->dev);
+	if (skb_headroom(skb) < headroom || skb_header_cloned(skb)) {
+		struct sk_buff *nskb = skb_realloc_headroom(skb, headroom + 16);
+		if (unlikely(!nskb)) {
+			kfree_skb(skb);
+			return ERR_PTR(-ENOMEM);
+		}
 
-	skb_dst_drop(skb);
-	nf_reset(skb);
-	secpath_reset(skb);
-	skb_reset_network_header(skb);
+		set_skb_csum_bits(skb, nskb);
 
-	ecn_decapsulate(skb);
+		if (skb->sk)
+			skb_set_owner_w(nskb, skb->sk);
 
-	skb_push(skb, ETH_HLEN);
-	compute_ip_summed(skb, false);
+		kfree_skb(skb);
+		return nskb;
+	}
 
-	vport_receive(vport, skb);
+	return skb;
 }
 
-static int build_packet(struct vport *vport, const struct tnl_mutable_config *mutable,
-			struct iphdr *iph, struct rtable *rt, int max_headroom,
-			int mtu, struct sk_buff *skb)
+static inline bool need_linearize(const struct sk_buff *skb)
 {
-	struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+	int i;
+
+	if (unlikely(skb_shinfo(skb)->frag_list))
+		return true;
+
+	/*
+	 * Generally speaking we should linearize if there are paged frags.
+	 * However, if all of the refcounts are 1 we know nobody else can
+	 * change them from underneath us and we can skip the linearization.
+	 */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+		if (unlikely(page_count(skb_shinfo(skb)->frags[0].page) > 1))
+			return true;
+
+	return false;
+}
+
+static struct sk_buff *handle_offloads(struct sk_buff *skb,
+				       const struct tnl_mutable_config *mutable,
+				       const struct rtable *rt)
+{
+	int min_headroom;
 	int err;
-	struct iphdr *new_iph;
-	int orig_len = skb->len;
-	__be16 frag_off = iph->frag_off;
 
-	skb = check_headroom(skb, max_headroom);
-	if (unlikely(IS_ERR(skb)))
-		goto error;
+	forward_ip_summed(skb);
 
-	err = handle_csum_offload(skb);
+	err = vswitch_skb_checksum_setup(skb);
 	if (unlikely(err))
 		goto error_free;
 
-	if (skb->protocol == htons(ETH_P_IP)) {
-		struct iphdr *old_iph = ip_hdr(skb);
+	min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
+			+ mutable->tunnel_hlen;
 
-		if ((old_iph->frag_off & htons(IP_DF)) &&
-		    mtu < ntohs(old_iph->tot_len)) {
-			if (tnl_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id))
-				goto error_free;
+	if (skb_is_gso(skb)) {
+		struct sk_buff *nskb;
+
+		/*
+		 * If we are doing GSO on a pskb it is better to make sure that
+		 * the headroom is correct now.  We will only have to copy the
+		 * portion in the linear data area and GSO will preserve
+		 * headroom when it creates the segments.  This is particularly
+		 * beneficial on Xen where we get a lot of GSO pskbs.
+		 * Conversely, we avoid copying if it is just to get our own
+		 * writable clone because GSO will do the copy for us.
+		 */
+		if (skb_headroom(skb) < min_headroom) {
+			skb = check_headroom(skb, min_headroom);
+			if (unlikely(IS_ERR(skb))) {
+				err = PTR_ERR(skb);
+				goto error;
+			}
 		}
 
-	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	else if (skb->protocol == htons(ETH_P_IPV6)) {
-		unsigned int packet_length = skb->len - ETH_HLEN
-			- (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0);
+		nskb = skb_gso_segment(skb, 0);
+		kfree_skb(skb);
+		if (unlikely(IS_ERR(nskb))) {
+			err = PTR_ERR(nskb);
+			goto error;
+		}
 
-		/* IPv6 requires PMTUD if the packet is above the minimum MTU. */
-		if (packet_length > IPV6_MIN_MTU)
-			frag_off = htons(IP_DF);
+		skb = nskb;
+	} else {
+		skb = check_headroom(skb, min_headroom);
+		if (unlikely(IS_ERR(skb))) {
+			err = PTR_ERR(skb);
+			goto error;
+		}
 
-		if (mtu < packet_length) {
-			if (tnl_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id))
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+			/*
+			 * Pages aren't locked and could change at any time.
+			 * If this happens after we compute the checksum, the
+			 * checksum will be wrong.  We linearize now to avoid
+			 * this problem.
+			 */
+			if (unlikely(need_linearize(skb))) {
+				err = __skb_linearize(skb);
+				if (unlikely(err))
+					goto error_free;
+			}
+
+			err = skb_checksum_help(skb);
+			if (unlikely(err))
 				goto error_free;
-		}
+		} else if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = CHECKSUM_NONE;
 	}
-#endif
 
-	new_iph = (struct iphdr *)skb_push(skb, mutable->tunnel_hlen);
-	skb_reset_network_header(skb);
-	skb_set_transport_header(skb, sizeof(struct iphdr));
-
-	memcpy(new_iph, iph, sizeof(struct iphdr));
-	new_iph->frag_off = frag_off;
-	ip_select_ident(new_iph, &rt_dst(rt), NULL);
+	return skb;
 
-	memset(&IPCB(skb)->opt, 0, sizeof(IPCB(skb)->opt));
-	IPCB(skb)->flags = 0;
+error_free:
+	kfree_skb(skb);
+error:
+	return ERR_PTR(err);
+}
 
-	skb = tnl_vport->tnl_ops->build_header(skb, vport, mutable, &rt_dst(rt));
-	if (unlikely(!skb))
-		goto error;
+static int send_frags(struct sk_buff *skb,
+		      const struct tnl_mutable_config *mutable)
+{
+	int sent_len;
+	int err;
 
+	sent_len = 0;
 	while (skb) {
 		struct sk_buff *next = skb->next;
 		int frag_len = skb->len - mutable->tunnel_hlen;
@@ -694,34 +1101,26 @@ static int build_packet(struct vport *vport, const struct tnl_mutable_config *mu
 		skb->next = NULL;
 
 		err = ip_local_out(skb);
-		if (unlikely(net_xmit_eval(err) != 0)) {
-			orig_len -= frag_len;
+		if (likely(net_xmit_eval(err) == 0))
+			sent_len += frag_len;
+		else {
 			skb = next;
 			goto free_frags;
 		}
 
 		skb = next;
-	};
+	}
 
-	return orig_len;
+	return sent_len;
 
-error_free:
-	kfree_skb(skb);
-error:
-	return 0;
 free_frags:
 	/*
 	 * There's no point in continuing to send fragments once one has been
 	 * dropped so just free the rest.  This may help improve the congestion
 	 * that caused the first packet to be dropped.
 	 */
-	while (skb) {
-		struct sk_buff *next = skb->next;
-		orig_len -= skb->len - mutable->tunnel_hlen;
-		kfree_skb(skb);
-		skb = next;
-	};
-	return orig_len;
+	tnl_free_linked_skbs(skb);
+	return sent_len;
 }
 
 int tnl_send(struct vport *vport, struct sk_buff *skb)
@@ -729,12 +1128,15 @@ int tnl_send(struct vport *vport, struct sk_buff *skb)
 	struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
 	const struct tnl_mutable_config *mutable = rcu_dereference(tnl_vport->mutable);
 
-	struct iphdr *old_iph;
-	int orig_len;
-	struct iphdr iph;
+	enum vport_err_type err = VPORT_E_TX_ERROR;
 	struct rtable *rt;
-	int max_headroom;
-	int mtu;
+	struct dst_entry *unattached_dst = NULL;
+	struct tnl_cache *cache;
+	int sent_len = 0;
+	__be16 frag_off;
+	u8 ttl;
+	u8 inner_tos;
+	u8 tos;
 
 	/* Validate the protocol headers before we try to use them. */
 	if (skb->protocol == htons(ETH_P_8021Q)) {
@@ -746,147 +1148,164 @@ int tnl_send(struct vport *vport, struct sk_buff *skb)
 	}
 
 	if (skb->protocol == htons(ETH_P_IP)) {
-		if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
-		    + sizeof(struct iphdr) - skb->data)))
+		if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb)
+		    + sizeof(struct iphdr))))
 			skb->protocol = 0;
 	}
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	else if (skb->protocol == htons(ETH_P_IPV6)) {
-		if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
-		    + sizeof(struct ipv6hdr) - skb->data)))
+		if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb)
+		    + sizeof(struct ipv6hdr))))
 			skb->protocol = 0;
 	}
 #endif
-	old_iph = ip_hdr(skb);
 
-	iph.tos = mutable->port_config.tos;
-	if (mutable->port_config.flags & TNL_F_TOS_INHERIT) {
-		if (skb->protocol == htons(ETH_P_IP))
-			iph.tos = old_iph->tos;
+	/* ToS */
+	if (skb->protocol == htons(ETH_P_IP))
+		inner_tos = ip_hdr(skb)->tos;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-		else if (skb->protocol == htons(ETH_P_IPV6))
-			iph.tos = ipv6_get_dsfield(ipv6_hdr(skb));
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		inner_tos = ipv6_get_dsfield(ipv6_hdr(skb));
 #endif
-	}
-	iph.tos = ecn_encapsulate(iph.tos, skb);
+	else
+		inner_tos = 0;
 
-	{
-		struct flowi fl = { .nl_u = { .ip4_u =
-					      { .daddr = mutable->port_config.daddr,
-						.saddr = mutable->port_config.saddr,
-						.tos = RT_TOS(iph.tos) } },
-				    .proto = tnl_vport->tnl_ops->ipproto };
+	if (mutable->port_config.flags & TNL_F_TOS_INHERIT)
+		tos = inner_tos;
+	else
+		tos = mutable->port_config.tos;
 
-		if (unlikely(ip_route_output_key(&init_net, &rt, &fl)))
-			goto error_free;
+	tos = INET_ECN_encapsulate(tos, inner_tos);
+
+	/* Route lookup */
+	rt = find_route(vport, mutable, tos, &cache);
+	if (unlikely(!rt))
+		goto error_free;
+	if (unlikely(!cache))
+		unattached_dst = &rt_dst(rt);
+
+	/* Reset SKB */
+	nf_reset(skb);
+	secpath_reset(skb);
+	skb_dst_drop(skb);
+
+	/* Offloading */
+	skb = handle_offloads(skb, mutable, rt);
+	if (unlikely(IS_ERR(skb)))
+		goto error;
+
+	/* MTU */
+	if (unlikely(!check_mtu(skb, vport, mutable, rt, &frag_off))) {
+		err = VPORT_E_TX_DROPPED;
+		goto error_free;
 	}
 
-	iph.ttl = mutable->port_config.ttl;
+	/*
+	 * If we are over the MTU, allow the IP stack to handle fragmentation.
+	 * Fragmentation is a slow path anyways.
+	 */
+	if (unlikely(skb->len + mutable->tunnel_hlen > dst_mtu(&rt_dst(rt)) &&
+		     cache)) {
+		unattached_dst = &rt_dst(rt);
+		dst_hold(unattached_dst);
+		cache = NULL;
+	}
+
+	/* TTL */
+	ttl = mutable->port_config.ttl;
+	if (!ttl)
+		ttl = dst_metric(&rt_dst(rt), RTAX_HOPLIMIT);
+
 	if (mutable->port_config.flags & TNL_F_TTL_INHERIT) {
 		if (skb->protocol == htons(ETH_P_IP))
-			iph.ttl = old_iph->ttl;
+			ttl = ip_hdr(skb)->ttl;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 		else if (skb->protocol == htons(ETH_P_IPV6))
-			iph.ttl = ipv6_hdr(skb)->hop_limit;
+			ttl = ipv6_hdr(skb)->hop_limit;
 #endif
 	}
-	if (!iph.ttl)
-		iph.ttl = dst_metric(&rt_dst(rt), RTAX_HOPLIMIT);
 
-	iph.frag_off = (mutable->port_config.flags & TNL_F_PMTUD) ? htons(IP_DF) : 0;
-	if (iph.frag_off)
-		mtu = dst_mtu(&rt_dst(rt))
-			- ETH_HLEN
-			- mutable->tunnel_hlen
-			- (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0);
-	else
-		mtu = mutable->mtu;
+	while (skb) {
+		struct iphdr *iph;
+		struct sk_buff *next_skb = skb->next;
+		skb->next = NULL;
 
-	if (skb->protocol == htons(ETH_P_IP)) {
-		iph.frag_off |= old_iph->frag_off & htons(IP_DF);
-		mtu = max(mtu, IP_MIN_MTU);
-	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	else if (skb->protocol == htons(ETH_P_IPV6))
-		mtu = max(mtu, IPV6_MIN_MTU);
-#endif
+		if (likely(cache)) {
+			skb_push(skb, cache->len);
+			memcpy(skb->data, get_cached_header(cache), cache->len);
+			skb_reset_mac_header(skb);
+			skb_set_network_header(skb, rt_dst(rt).hh->hh_len);
 
-	iph.version = 4;
-	iph.ihl = sizeof(struct iphdr) >> 2;
-	iph.protocol = tnl_vport->tnl_ops->ipproto;
-	iph.daddr = rt->rt_dst;
-	iph.saddr = rt->rt_src;
+		} else {
+			skb_push(skb, mutable->tunnel_hlen);
+			create_tunnel_header(vport, mutable, rt, skb->data);
+			skb_reset_network_header(skb);
 
-	nf_reset(skb);
-	secpath_reset(skb);
-	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt_dst(rt));
+			if (next_skb)
+				skb_dst_set(skb, dst_clone(unattached_dst));
+			else {
+				skb_dst_set(skb, unattached_dst);
+				unattached_dst = NULL;
+			}
 
-	/*
-	 * If we are doing GSO on a pskb it is better to make sure that the
-	 * headroom is correct now.  We will only have to copy the portion in
-	 * the linear data area and GSO will preserve headroom when it creates
-	 * the segments.  This is particularly beneficial on Xen where we get
-	 * lots of GSO pskbs.  Conversely, we delay copying if it is just to
-	 * get our own writable clone because GSO may do the copy for us.
-	 */
-	max_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
-			+ mutable->tunnel_hlen;
 
-	if (skb_headroom(skb) < max_headroom) {
-		skb = check_headroom(skb, max_headroom);
-		if (unlikely(IS_ERR(skb))) {
-			vport_record_error(vport, VPORT_E_TX_DROPPED);
-			goto error;
+			memset(&IPCB(skb)->opt, 0, sizeof(IPCB(skb)->opt));
+			IPCB(skb)->flags = 0;
 		}
-	}
+		skb_set_transport_header(skb, skb_network_offset(skb) + sizeof(struct iphdr));
 
-	forward_ip_summed(skb);
+		iph = ip_hdr(skb);
+		iph->tos = tos;
+		iph->ttl = ttl;
+		iph->frag_off = frag_off;
+		ip_select_ident(iph, &rt_dst(rt), NULL);
 
-	if (unlikely(vswitch_skb_checksum_setup(skb)))
-		goto error_free;
+		skb = tnl_vport->tnl_ops->update_header(vport, mutable, &rt_dst(rt), skb);
+		if (unlikely(!skb))
+			goto next;
 
-	skb = handle_gso(skb);
-	if (unlikely(IS_ERR(skb))) {
-		vport_record_error(vport, VPORT_E_TX_DROPPED);
-		goto error;
-	}
+		if (likely(cache)) {
+			int orig_len = skb->len - cache->len;
+			struct vport *cache_vport = internal_dev_get_vport(rt_dst(rt).dev);
 
-	/*
-	 * Process GSO segments.  Try to do any work for the entire packet that
-	 * doesn't involve actually writing to it before this point.
-	 */
-	orig_len = 0;
-	do {
-		struct sk_buff *next_skb = skb->next;
-		skb->next = NULL;
+			skb->protocol = htons(ETH_P_IP);
+
+			iph->tot_len = htons(skb->len - skb_network_offset(skb));
+			ip_send_check(iph);
 
-		orig_len += build_packet(vport, mutable, &iph, rt, max_headroom, mtu, skb);
+			if (likely(cache_vport)) {
+				OVS_CB(skb)->flow = cache->flow;
+				compute_ip_summed(skb, true);
+				vport_receive(cache_vport, skb);
+				sent_len += orig_len;
+			} else {
+				int err;
 
+				skb->dev = rt_dst(rt).dev;
+				err = dev_queue_xmit(skb);
+
+				if (likely(net_xmit_eval(err) == 0))
+					sent_len += orig_len;
+			}
+		} else
+			sent_len += send_frags(skb, mutable);
+
+next:
 		skb = next_skb;
-	} while (skb);
+	}
 
-	if (unlikely(orig_len == 0))
+	if (unlikely(sent_len == 0))
 		vport_record_error(vport, VPORT_E_TX_DROPPED);
 
-	return orig_len;
+	goto out;
 
 error_free:
-	kfree_skb(skb);
-	vport_record_error(vport, VPORT_E_TX_ERROR);
+	tnl_free_linked_skbs(skb);
 error:
-	return 0;
-}
-
-int tnl_init(void)
-{
-	return 0;
-}
-
-void tnl_exit(void)
-{
-	tbl_destroy(port_table, NULL);
-	port_table = NULL;
+	dst_release(unattached_dst);
+	vport_record_error(vport, err);
+out:
+	return sent_len;
 }
 
 static int set_config(const void __user *uconfig, const struct tnl_ops *tnl_ops,
@@ -899,15 +1318,18 @@ static int set_config(const void __user *uconfig, const struct tnl_ops *tnl_ops,
 	if (copy_from_user(&mutable->port_config, uconfig, sizeof(struct tnl_port_config)))
 		return -EFAULT;
 
+	if (mutable->port_config.daddr == 0)
+		return -EINVAL;
+
+	if (mutable->port_config.tos != RT_TOS(mutable->port_config.tos))
+		return -EINVAL;
+
 	mutable->tunnel_hlen = tnl_ops->hdr_len(&mutable->port_config);
 	if (mutable->tunnel_hlen < 0)
 		return mutable->tunnel_hlen;
 
 	mutable->tunnel_hlen += sizeof(struct iphdr);
 
-	if (mutable->port_config.daddr == 0)
-		return -EINVAL;
-
 	mutable->tunnel_type = tnl_ops->tunnel_type;
 	if (mutable->port_config.flags & TNL_F_IN_KEY_MATCH) {
 		mutable->tunnel_type |= TNL_T_KEY_MATCH;
@@ -950,7 +1372,7 @@ struct vport *tnl_create(const char *name, const void __user *config,
 	strcpy(tnl_vport->name, name);
 	tnl_vport->tnl_ops = tnl_ops;
 
-	tnl_vport->mutable = kmalloc(sizeof(struct tnl_mutable_config), GFP_KERNEL);
+	tnl_vport->mutable = kzalloc(sizeof(struct tnl_mutable_config), GFP_KERNEL);
 	if (!tnl_vport->mutable) {
 		err = -ENOMEM;
 		goto error_free_vport;
@@ -966,6 +1388,13 @@ struct vport *tnl_create(const char *name, const void __user *config,
 	if (err)
 		goto error_free_mutable;
 
+	spin_lock_init(&tnl_vport->cache_lock);
+
+#ifdef NEED_CACHE_TIMEOUT
+	tnl_vport->cache_exp_interval = MAX_CACHE_EXP -
+					(net_random() % (MAX_CACHE_EXP / 2));
+#endif
+
 	err = add_port(vport);
 	if (err)
 		goto error_free_mutable;
@@ -985,7 +1414,6 @@ int tnl_modify(struct vport *vport, const void __user *config)
 	struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
 	struct tnl_mutable_config *mutable;
 	int err;
-	bool update_hash = false;
 
 	mutable = kmemdup(tnl_vport->mutable, sizeof(struct tnl_mutable_config), GFP_KERNEL);
 	if (!mutable) {
@@ -997,35 +1425,11 @@ int tnl_modify(struct vport *vport, const void __user *config)
 	if (err)
 		goto error_free;
 
-	/*
-	 * Only remove the port from the hash table if something that would
-	 * affect the lookup has changed.
-	 */
-	if (tnl_vport->mutable->port_config.saddr != mutable->port_config.saddr ||
-	    tnl_vport->mutable->port_config.daddr != mutable->port_config.daddr ||
-	    tnl_vport->mutable->port_config.in_key != mutable->port_config.in_key ||
-	    (tnl_vport->mutable->port_config.flags & TNL_F_IN_KEY_MATCH) !=
-	    (mutable->port_config.flags & TNL_F_IN_KEY_MATCH))
-		update_hash = true;
-
-
-	/*
-	 * This update is not atomic but the lookup uses the config, which
-	 * serves as an inherent double check.
-	 */
-	if (update_hash) {
-		err = del_port(vport);
-		if (err)
-			goto error_free;
-	}
-
-	assign_config_rcu(vport, mutable);
+	mutable->seq++;
 
-	if (update_hash) {
-		err = add_port(vport);
-		if (err)
-			goto error_free;
-	}
+	err = move_port(vport, mutable);
+	if (err)
+		goto error_free;
 
 	return 0;
 
@@ -1035,10 +1439,14 @@ error:
 	return err;
 }
 
-static void free_port(struct rcu_head *rcu)
+static void free_port_rcu(struct rcu_head *rcu)
 {
 	struct tnl_vport *tnl_vport = container_of(rcu, struct tnl_vport, rcu);
 
+	spin_lock_bh(&tnl_vport->cache_lock);
+	free_cache(tnl_vport->cache);
+	spin_unlock_bh(&tnl_vport->cache_lock);
+
 	kfree(tnl_vport->mutable);
 	vport_free(tnl_vport_to_vport(tnl_vport));
 }
@@ -1055,7 +1463,7 @@ int tnl_destroy(struct vport *vport)
 	    &old_mutable))
 		del_port(vport);
 
-	call_rcu(&tnl_vport->rcu, free_port);
+	call_rcu(&tnl_vport->rcu, free_port_rcu);
 
 	return 0;
 }
@@ -1090,7 +1498,6 @@ int tnl_set_addr(struct vport *vport, const unsigned char *addr)
 	return 0;
 }
 
-
 const char *tnl_get_name(const struct vport *vport)
 {
 	const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
@@ -1108,3 +1515,15 @@ int tnl_get_mtu(const struct vport *vport)
 	const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
 	return rcu_dereference(tnl_vport->mutable)->mtu;
 }
+
+void tnl_free_linked_skbs(struct sk_buff *skb)
+{
+	if (unlikely(!skb))
+		return;
+
+	while (skb) {
+		struct sk_buff *next = skb->next;
+		kfree_skb(skb);
+		skb = next;
+	}
+}
diff --git a/datapath/tunnel.h b/datapath/tunnel.h
index 37874c57c..8ffb7bf54 100644
--- a/datapath/tunnel.h
+++ b/datapath/tunnel.h
@@ -9,6 +9,9 @@
 #ifndef TUNNEL_H
 #define TUNNEL_H 1
 
+#include <linux/version.h>
+
+#include "flow.h"
 #include "openvswitch/tunnel.h"
 #include "table.h"
 #include "vport.h"
@@ -20,14 +23,15 @@
 #define IP_MIN_MTU 68
 
 /*
- * One of these goes in your struct tnl_ops and in tnl_find_port().
+ * One of these goes in struct tnl_ops and in tnl_find_port().
  * These values are in the same namespace as other TNL_T_* values, so
- * you have only the first 10 bits to define protocol identifiers.
+ * only the least significant 10 bits are available to define protocol
+ * identifiers.
  */
 #define TNL_T_PROTO_GRE		0
 #define TNL_T_PROTO_CAPWAP	1
 
-/* You only need these flags when you are calling tnl_find_port(). */
+/* These flags are only needed when calling tnl_find_port(). */
 #define TNL_T_KEY_EXACT		(1 << 10)
 #define TNL_T_KEY_MATCH		(1 << 11)
 #define TNL_T_KEY_EITHER	(TNL_T_KEY_EXACT | TNL_T_KEY_MATCH)
@@ -35,39 +39,119 @@
 struct tnl_mutable_config {
 	struct rcu_head rcu;
 
-	unsigned char eth_addr[ETH_ALEN];
-	unsigned int mtu;
-	struct tnl_port_config port_config;
+	unsigned seq;		/* Sequence number to identify this config. */
 
-	/* Set of TNL_T_* flags that define the category for lookup. */
-	u32 tunnel_type;
+	u32 tunnel_type;	/* Set of TNL_T_* flags that define lookup. */
+	unsigned tunnel_hlen; 	/* Tunnel header length. */
+
+	unsigned char eth_addr[ETH_ALEN];
+	unsigned mtu;
 
-	int tunnel_hlen; /* Tunnel header length. */
+	struct tnl_port_config port_config;
 };
 
 struct tnl_ops {
-	/* Put your TNL_T_PROTO_* type in here. */
-	u32 tunnel_type;
-	u8 ipproto;
+	u32 tunnel_type;	/* Put the TNL_T_PROTO_* type in here. */
+	u8 ipproto;		/* The IP protocol for the tunnel. */
 
 	/*
-	 * Returns the length of the tunnel header you will add in
+	 * Returns the length of the tunnel header that will be added in
 	 * build_header() (i.e. excludes the IP header).  Returns a negative
 	 * error code if the configuration is invalid.
 	 */
 	int (*hdr_len)(const struct tnl_port_config *);
 
 	/*
-	 * Returns a linked list of SKBs with tunnel headers (multiple
-	 * packets may be generated in the event of fragmentation).  Space
-	 * will have already been allocated at the start of the packet equal
-	 * to sizeof(struct iphdr) + value returned by hdr_len().  The IP
-	 * header will have already been constructed.
+	 * Builds the static portion of the tunnel header, which is stored in
+	 * the header cache.  In general the performance of this function is
+	 * not too important as we try to only call it when building the cache
+	 * so it is preferable to shift as much work as possible here.  However,
+	 * in some circumstances caching is disabled and this function will be
+	 * called for every packet, so try not to make it too slow.
+	 */
+	void (*build_header)(const struct vport *,
+			     const struct tnl_mutable_config *, void *header);
+
+	/*
+	 * Updates the cached header of a packet to match the actual packet
+	 * data.  Typical things that might need to be updated are length,
+	 * checksum, etc.  The IP header will have already been updated and this
+	 * is the final step before transmission.  Returns a linked list of
+	 * completed SKBs (multiple packets may be generated in the event
+	 * of fragmentation).
+	 */
+	struct sk_buff *(*update_header)(const struct vport *,
+					 const struct tnl_mutable_config *,
+					 struct dst_entry *, struct sk_buff *);
+};
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
+/*
+ * On these kernels we have a fast mechanism to tell if the ARP cache for a
+ * particular destination has changed.
+ */
+#define HAVE_HH_SEQ
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)
+/*
+ * On these kernels we have a fast mechanism to tell if the routing table
+ * has changed.
+ */
+#define HAVE_RT_GENID
+#endif
+#if !defined(HAVE_HH_SEQ) || !defined(HAVE_RT_GENID)
+/* If we can't detect all system changes directly we need to use a timeout. */
+#define NEED_CACHE_TIMEOUT
+#endif
+struct tnl_cache {
+	struct rcu_head rcu;
+
+	int len;		/* Length of data to be memcpy'd from cache. */
+
+	/* Sequence number of mutable->seq from which this cache was generated. */
+	unsigned mutable_seq;
+
+#ifdef HAVE_HH_SEQ
+	/*
+	 * The sequence number from the seqlock protecting the hardware header
+	 * cache (in the ARP cache).  Since every write increments the counter
+	 * this gives us an easy way to tell if it has changed.
+	 */
+	unsigned hh_seq;
+#endif
+
+#ifdef NEED_CACHE_TIMEOUT
+	/*
+	 * If we don't have direct mechanisms to detect all important changes in
+	 * the system fall back to an expiration time.  This expiration time
+	 * can be relatively short since at high rates there will be millions of
+	 * packets per second, so we'll still get plenty of benefit from the
+	 * cache.  Note that if something changes we may blackhole packets
+	 * until the expiration time (depending on what changed and the kernel
+	 * version we may be able to detect the change sooner).  Expiration is
+	 * expressed as a time in jiffies.
 	 */
-	struct sk_buff *(*build_header)(struct sk_buff *,
-					const struct vport *,
-					const struct tnl_mutable_config *,
-					struct dst_entry *);
+	unsigned long expiration;
+#endif
+
+	/*
+	 * The routing table entry that is the result of looking up the tunnel
+	 * endpoints.  It also contains a sequence number (called a generation
+	 * ID) that can be compared to a global sequence to tell if the routing
+	 * table has changed (and therefore there is a potential that this
+	 * cached route has been invalidated).
+	 */
+	struct rtable *rt;
+
+	/*
+	 * If the output device for tunnel traffic is an OVS internal device,
+	 * the flow of that datapath.  Since all tunnel traffic will have the
+	 * same headers this allows us to cache the flow lookup.  NULL if the
+	 * output device is not OVS or if there is no flow installed.
+	 */
+	struct sw_flow *flow;
+
+	/* The cached header follows after padding for alignment. */
 };
 
 struct tnl_vport {
@@ -77,14 +161,29 @@ struct tnl_vport {
 	char name[IFNAMSIZ];
 	const struct tnl_ops *tnl_ops;
 
-	/* Protected by RCU. */
-	struct tnl_mutable_config *mutable;
+	struct tnl_mutable_config *mutable;	/* Protected by RCU. */
 
+	/*
+	 * ID of last fragment sent (for tunnel protocols with direct support
+	 * fragmentation).  If the protocol relies on IP fragmentation then
+	 * this is not needed.
+	 */
 	atomic_t frag_id;
+
+	spinlock_t cache_lock;
+	struct tnl_cache *cache;		/* Protected by RCU/cache_lock. */
+
+#ifdef NEED_CACHE_TIMEOUT
+	/*
+	 * If we must rely on expiration time to invalidate the cache, this is
+	 * the interval.  It is randomized within a range (defined by
+	 * MAX_CACHE_EXP in tunnel.c) to avoid synchronized expirations caused
+	 * by creation of a large number of tunnels at a one time.
+	 */
+	unsigned long cache_exp_interval;
+#endif
 };
 
-int tnl_init(void);
-void tnl_exit(void);
 struct vport *tnl_create(const char *name, const void __user *config,
 			 const struct vport_ops *,
 			 const struct tnl_ops *);
@@ -104,10 +203,12 @@ struct vport *tnl_find_port(__be32 saddr, __be32 daddr, __be32 key,
 bool tnl_frag_needed(struct vport *vport,
 		     const struct tnl_mutable_config *mutable,
 		     struct sk_buff *skb, unsigned int mtu, __be32 flow_key);
+void tnl_free_linked_skbs(struct sk_buff *skb);
 
 static inline struct tnl_vport *tnl_vport_priv(const struct vport *vport)
 {
 	return vport_priv(vport);
 }
 
+
 #endif /* tunnel.h */
diff --git a/datapath/vport-capwap.c b/datapath/vport-capwap.c
index 7ae3790d7..bf1465fc0 100644
--- a/datapath/vport-capwap.c
+++ b/datapath/vport-capwap.c
@@ -128,24 +128,32 @@ static int capwap_hdr_len(const struct tnl_port_config *port_config)
 	return CAPWAP_HLEN;
 }
 
-static struct sk_buff *capwap_build_header(struct sk_buff *skb,
-					   const struct vport *vport,
-					   const struct tnl_mutable_config *mutable,
-					   struct dst_entry *dst)
+static void capwap_build_header(const struct vport *vport,
+				const struct tnl_mutable_config *mutable,
+				void *header)
 {
-	struct udphdr *udph = udp_hdr(skb);
-	struct capwaphdr *cwh = capwap_hdr(skb);
+	struct udphdr *udph = header;
+	struct capwaphdr *cwh = (struct capwaphdr *)(udph + 1);
 
 	udph->source = htons(CAPWAP_SRC_PORT);
 	udph->dest = htons(CAPWAP_DST_PORT);
-	udph->len = htons(skb->len - sizeof(struct iphdr));
 	udph->check = 0;
 
 	cwh->begin = NO_FRAG_HDR;
 	cwh->frag_id = 0;
 	cwh->frag_off = 0;
+}
+
+static struct sk_buff *capwap_update_header(const struct vport *vport,
+					    const struct tnl_mutable_config *mutable,
+					    struct dst_entry *dst,
+					    struct sk_buff *skb)
+{
+	struct udphdr *udph = udp_hdr(skb);
 
-	if (unlikely(skb->len > dst_mtu(dst)))
+	udph->len = htons(skb->len - skb_transport_offset(skb));
+
+	if (unlikely(skb->len - skb_network_offset(skb) > dst_mtu(dst)))
 		skb = fragment(skb, vport, dst);
 
 	return skb;
@@ -209,6 +217,7 @@ struct tnl_ops capwap_tnl_ops = {
 	.ipproto	= IPPROTO_UDP,
 	.hdr_len	= capwap_hdr_len,
 	.build_header	= capwap_build_header,
+	.update_header	= capwap_update_header,
 };
 
 static struct vport *capwap_create(const char *name, const void __user *config)
@@ -241,7 +250,7 @@ static int capwap_init(void)
 
 	defrag_init();
 
-	return tnl_init();
+	return 0;
 
 error_sock:
 	sock_release(capwap_rcv_socket);
@@ -252,7 +261,6 @@ error:
 
 static void capwap_exit(void)
 {
-	tnl_exit();
 	defrag_exit();
 	sock_release(capwap_rcv_socket);
 }
@@ -282,17 +290,19 @@ static struct sk_buff *fragment(struct sk_buff *skb, const struct vport *vport,
 				struct dst_entry *dst)
 {
 	struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
-	unsigned int hlen = sizeof(struct iphdr) + CAPWAP_HLEN;
-	unsigned int headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len;
+	unsigned int hlen = skb_transport_offset(skb) + CAPWAP_HLEN;
+	unsigned int headroom;
+	unsigned int max_frame_len = dst_mtu(dst) + skb_network_offset(skb);
 	struct sk_buff *result = NULL, *list_cur = NULL;
 	unsigned int remaining;
 	unsigned int offset;
 	__be16 frag_id;
 
-	if (hlen + ~FRAG_OFF_MASK + 1 > dst_mtu(dst)) {
+	if (hlen + ~FRAG_OFF_MASK + 1 > max_frame_len) {
 		if (net_ratelimit())
 			pr_warn("capwap link mtu (%d) is less than minimum packet (%d)\n",
-				dst_mtu(dst), hlen + ~FRAG_OFF_MASK + 1);
+				dst_mtu(dst),
+				hlen - skb_network_offset(skb) + ~FRAG_OFF_MASK + 1);
 		goto error;
 	}
 
@@ -300,14 +310,17 @@ static struct sk_buff *fragment(struct sk_buff *skb, const struct vport *vport,
 	offset = 0;
 	frag_id = htons(atomic_inc_return(&tnl_vport->frag_id));
 
+	headroom = dst->header_len + 16;
+	if (!skb_network_offset(skb))
+		headroom += LL_RESERVED_SPACE(dst->dev);
+
 	while (remaining) {
 		struct sk_buff *skb2;
 		int frag_size;
-		struct iphdr *iph;
 		struct udphdr *udph;
 		struct capwaphdr *cwh;
 
-		frag_size = min(remaining, dst_mtu(dst) - hlen);
+		frag_size = min(remaining, max_frame_len - hlen);
 		if (remaining > frag_size)
 			frag_size &= FRAG_OFF_MASK;
 
@@ -317,23 +330,22 @@ static struct sk_buff *fragment(struct sk_buff *skb, const struct vport *vport,
 
 		skb_reserve(skb2, headroom);
 		__skb_put(skb2, hlen + frag_size);
-		skb_reset_network_header(skb2);
-		skb_set_transport_header(skb2, sizeof(struct iphdr));
 
-		/* Copy IP/UDP/CAPWAP header. */
+		if (skb_network_offset(skb))
+			skb_reset_mac_header(skb2);
+		skb_set_network_header(skb2, skb_network_offset(skb));
+		skb_set_transport_header(skb2, skb_transport_offset(skb));
+
+		/* Copy (Ethernet)/IP/UDP/CAPWAP header. */
 		copy_skb_metadata(skb, skb2);
-		skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
+		skb_copy_from_linear_data(skb, skb2->data, hlen);
 
 		/* Copy this data chunk. */
 		if (skb_copy_bits(skb, hlen + offset, skb2->data + hlen, frag_size))
 			BUG();
 
-		iph = ip_hdr(skb2);
-		iph->tot_len = hlen + frag_size;
-		ip_send_check(iph);
-
 		udph = udp_hdr(skb2);
-		udph->len = htons(skb2->len - sizeof(struct iphdr));
+		udph->len = htons(skb2->len - skb_transport_offset(skb2));
 
 		cwh = capwap_hdr(skb2);
 		if (remaining > frag_size)
@@ -356,11 +368,7 @@ static struct sk_buff *fragment(struct sk_buff *skb, const struct vport *vport,
 	goto out;
 
 error:
-	while (result) {
-		list_cur = result->next;
-		kfree_skb(result);
-		result = list_cur;
-	}
+	tnl_free_linked_skbs(result);
 out:
 	kfree_skb(skb);
 	return result;
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
index 0a7092f96..be8fb5343 100644
--- a/datapath/vport-gre.c
+++ b/datapath/vport-gre.c
@@ -50,41 +50,49 @@ static int gre_hdr_len(const struct tnl_port_config *port_config)
 	return len;
 }
 
-static struct sk_buff *gre_build_header(struct sk_buff *skb,
-					const struct vport *vport,
-					const struct tnl_mutable_config *mutable,
-					struct dst_entry *dst)
+static void gre_build_header(const struct vport *vport,
+			     const struct tnl_mutable_config *mutable,
+			     void *header)
 {
-	struct gre_base_hdr *greh = (struct gre_base_hdr *)skb_transport_header(skb);
-	__be32 *options = (__be32 *)(skb_network_header(skb) + mutable->tunnel_hlen
-					       - GRE_HEADER_SECTION);
+	struct gre_base_hdr *greh = header;
+	__be32 *options = (__be32 *)(greh + 1);
 
 	greh->protocol = htons(ETH_P_TEB);
 	greh->flags = 0;
 
-	/* Work backwards over the options so the checksum is last. */
+	if (mutable->port_config.flags & TNL_F_CSUM) {
+		greh->flags |= GRE_CSUM;
+		*options = 0;
+		options++;
+	}
+
 	if (mutable->port_config.out_key ||
-	    mutable->port_config.flags & TNL_F_OUT_KEY_ACTION) {
+	    mutable->port_config.flags & TNL_F_OUT_KEY_ACTION)
 		greh->flags |= GRE_KEY;
 
-		if (mutable->port_config.flags & TNL_F_OUT_KEY_ACTION)
-			*options = OVS_CB(skb)->tun_id;
-		else
-			*options = mutable->port_config.out_key;
+	if (mutable->port_config.out_key)
+		*options = mutable->port_config.out_key;
+}
+
+static struct sk_buff *gre_update_header(const struct vport *vport,
+					 const struct tnl_mutable_config *mutable,
+					 struct dst_entry *dst,
+					 struct sk_buff *skb)
+{
+	__be32 *options = (__be32 *)(skb_network_header(skb) + mutable->tunnel_hlen
+					       - GRE_HEADER_SECTION);
 
+	/* Work backwards over the options so the checksum is last. */
+	if (mutable->port_config.flags & TNL_F_OUT_KEY_ACTION) {
+		*options = OVS_CB(skb)->tun_id;
 		options--;
 	}
 
-	if (mutable->port_config.flags & TNL_F_CSUM) {
-		greh->flags |= GRE_CSUM;
-
-		*options = 0;
+	if (mutable->port_config.flags & TNL_F_CSUM)
 		*(__sum16 *)options = csum_fold(skb_checksum(skb,
-						sizeof(struct iphdr),
-						skb->len - sizeof(struct iphdr),
+						skb_transport_offset(skb),
+						skb->len - skb_transport_offset(skb),
 						0));
-	}
-
 	/*
 	 * Allow our local IP stack to fragment the outer packet even if the
 	 * DF bit is set as a last resort.
@@ -329,6 +337,7 @@ struct tnl_ops gre_tnl_ops = {
 	.ipproto	= IPPROTO_GRE,
 	.hdr_len	= gre_hdr_len,
 	.build_header	= gre_build_header,
+	.update_header	= gre_update_header,
 };
 
 static struct vport *gre_create(const char *name, const void __user *config)
@@ -346,20 +355,14 @@ static int gre_init(void)
 	int err;
 
 	err = inet_add_protocol(&gre_protocol_handlers, IPPROTO_GRE);
-	if (err) {
+	if (err)
 		pr_warn("cannot register gre protocol handler\n");
-		goto out;
-	}
-
-	err = tnl_init();
 
-out:
 	return err;
 }
 
 static void gre_exit(void)
 {
-	tnl_exit();
 	inet_del_protocol(&gre_protocol_handlers, IPPROTO_GRE);
 }
 
diff --git a/include/openvswitch/tunnel.h b/include/openvswitch/tunnel.h
index 373797513..dd700d0dc 100644
--- a/include/openvswitch/tunnel.h
+++ b/include/openvswitch/tunnel.h
@@ -48,6 +48,7 @@
 #define TNL_F_TOS_INHERIT	(1 << 4) /* Inherit the ToS from the inner packet. */
 #define TNL_F_TTL_INHERIT	(1 << 5) /* Inherit the TTL from the inner packet. */
 #define TNL_F_PMTUD		(1 << 6) /* Enable path MTU discovery. */
+#define TNL_F_HDR_CACHE		(1 << 7) /* Enable tunnel header caching. */
 
 struct tnl_port_config {
 	__u32	flags;
-- 
cgit v1.2.1


From dca9309ae888995f13be5e1bfa607214ca531613 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Mon, 30 Aug 2010 15:34:04 -0700
Subject: tunneling: Allow disabling tunnel header caching.

Tunnel header caching significantly improves performance by bypassing
much of the transmit path.  However, in some special cases or for
debugging it may be desirable to traverse the entire IP stack.  This
exposes that as an option (default is to enable header caching).

Signed-off-by: Jesse Gross <jesse@nicira.com>
Reviewed-by: Ben Pfaff <blp@nicira.com>
---
 lib/netdev-tunnel.c  |  5 +++++
 vswitchd/vswitch.xml | 22 ++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/lib/netdev-tunnel.c b/lib/netdev-tunnel.c
index d0ecd98e2..fdc1d976a 100644
--- a/lib/netdev-tunnel.c
+++ b/lib/netdev-tunnel.c
@@ -66,6 +66,7 @@ parse_config(const char *name, const char *type, const struct shash *args,
     memset(config, 0, sizeof *config);
 
     config->flags |= TNL_F_PMTUD;
+    config->flags |= TNL_F_HDR_CACHE;
 
     SHASH_FOR_EACH (node, args) {
         if (!strcmp(node->name, "remote_ip")) {
@@ -121,6 +122,10 @@ parse_config(const char *name, const char *type, const struct shash *args,
             if (!strcmp(node->data, "false")) {
                 config->flags &= ~TNL_F_PMTUD;
             }
+        } else if (!strcmp(node->name, "header_cache")) {
+            if (!strcmp(node->data, "false")) {
+                config->flags &= ~TNL_F_HDR_CACHE;
+            }
         } else {
             VLOG_WARN("%s: unknown %s argument '%s'", name, type, node->name);
         }
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 6e255763d..86fd3f9be 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -539,6 +539,17 @@
                 compliance with the IEEE 802.1D specification for bridges.
                 Default is enabled, set to <code>false</code> to disable.</dd>
             </dl>
+            <dl>
+              <dt><code>header_cache</code></dt>
+              <dd>Optional.  Enable caching of tunnel headers and the output
+		path.  This can lead to a significant performance increase
+		without changing behavior.  In general it should not be
+		necessary to adjust this setting.  However, the caching can
+		bypass certain components of the IP stack (such as IP tables)
+		and it may be useful to disable it if these features are
+		required or as a debugging measure.  Default is enabled, set to
+		<code>false</code> to disable.</dd>
+            </dl>
           </dd>
           <dt><code>capwap</code></dt>
           <dd>Ethernet tunneling over the UDP transport portion of CAPWAP
@@ -594,6 +605,17 @@
                 compliance with the IEEE 802.1D specification for bridges.
                 Default is enabled, set to <code>false</code> to disable.</dd>
             </dl>
+            <dl>
+              <dt><code>header_cache</code></dt>
+              <dd>Optional.  Enable caching of tunnel headers and the output
+		path.  This can lead to a significant performance increase
+		without changing behavior.  In general it should not be
+		necessary to adjust this setting.  However, the caching can
+		bypass certain components of the IP stack (such as IP tables)
+		and it may be useful to disable it if these features are
+		required or as a debugging measure.  Default is enabled, set to
+		<code>false</code> to disable.</dd>
+            </dl>
           </dd>
           <dt><code>patch</code></dt>
           <dd>
-- 
cgit v1.2.1


From f10a03343b5dd77a41dfefad150b65863af38a00 Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Thu, 16 Sep 2010 15:37:16 -0700
Subject: debian: Allow automake versions greater than or equal to 1.10

---
 debian/control | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/debian/control b/debian/control
index b7f2248f9..53e5b98d6 100644
--- a/debian/control
+++ b/debian/control
@@ -4,8 +4,8 @@ Priority: extra
 Maintainer: Open vSwitch developers <dev@openvswitch.org>
 Uploaders: Ben Pfaff <pfaffben@debian.org>, Simon Horman <horms@debian.org>
 Build-Depends:
- debhelper (>= 5), autoconf (>= 2.64), automake1.10, libssl-dev,
- pkg-config (>= 0.21), po-debconf, bzip2, openssl, python,
+ debhelper (>= 5), autoconf (>= 2.64), automake (>= 1.10) | automake1.10, 
+ libssl-dev, pkg-config (>= 0.21), po-debconf, bzip2, openssl, python,
  python-support (>= 0.8.4)
 Standards-Version: 3.9.1
 Homepage: http://openvswitch.org/
-- 
cgit v1.2.1


From a3acf0b0c46a28d6c891086e054d81dd915eea2e Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Thu, 16 Sep 2010 19:19:11 -0700
Subject: debian: Add support for GRE-over-IPsec

The ovs-monitor-ipsec daemon monitors the Interface table for GRE
entries.  If an entry specifies other-config parameters "ipsec-local-ip"
and ("ipsec-psk" or "ipsec-cert"), it will create the appropriate
security associations so that all GRE traffic to the remote host will be
encrypted.  In order for the two GRE tunnels to communicate, both sides
need to be configured for IPsec with appropriate authentication.

Currently, ovs-monitor-ipsec does not support certificate authentication
or ensure that an interface is actually attached to a bridge.  Both of
these issues will be addressed in a forthcoming patch.

NB: While GRE-over-IPsec should work on any system with a relatively
recent racoon and setkey, it has only been tested on Debian.  As such,
only Debian packaging has been provided.
---
 debian/.gitignore                |   1 +
 debian/automake.mk               |   4 +
 debian/control                   |  16 +-
 debian/openvswitch-ipsec.dirs    |   1 +
 debian/openvswitch-ipsec.init    | 184 +++++++++++++++++++++
 debian/openvswitch-ipsec.install |   1 +
 debian/ovs-monitor-ipsec         | 349 +++++++++++++++++++++++++++++++++++++++
 vswitchd/vswitch.ovsschema       |   2 +
 vswitchd/vswitch.xml             |  21 +++
 9 files changed, 578 insertions(+), 1 deletion(-)
 create mode 100644 debian/openvswitch-ipsec.dirs
 create mode 100755 debian/openvswitch-ipsec.init
 create mode 100644 debian/openvswitch-ipsec.install
 create mode 100755 debian/ovs-monitor-ipsec

diff --git a/debian/.gitignore b/debian/.gitignore
index 7f43aa6ed..24e62d94b 100644
--- a/debian/.gitignore
+++ b/debian/.gitignore
@@ -12,6 +12,7 @@
 /openvswitch-controller
 /openvswitch-datapath-source
 /openvswitch-dbg
+/openvswitch-ipsec
 /openvswitch-pki
 /openvswitch-pki-server
 /openvswitch-switch
diff --git a/debian/automake.mk b/debian/automake.mk
index c768d56b5..20432062a 100644
--- a/debian/automake.mk
+++ b/debian/automake.mk
@@ -24,6 +24,9 @@ EXTRA_DIST += \
 	debian/openvswitch-datapath-source.copyright \
 	debian/openvswitch-datapath-source.dirs \
 	debian/openvswitch-datapath-source.install \
+	debian/openvswitch-ipsec.dirs \
+	debian/openvswitch-ipsec.init \
+	debian/openvswitch-ipsec.install \
 	debian/openvswitch-pki-server.apache2 \
 	debian/openvswitch-pki-server.dirs \
 	debian/openvswitch-pki-server.install \
@@ -39,6 +42,7 @@ EXTRA_DIST += \
 	debian/openvswitch-switch.postrm \
 	debian/openvswitch-switch.template \
 	debian/ovs-bugtool \
+	debian/ovs-monitor-ipsec \
 	debian/python-openvswitch.dirs \
 	debian/python-openvswitch.install \
 	debian/rules \
diff --git a/debian/control b/debian/control
index 53e5b98d6..622daeb3b 100644
--- a/debian/control
+++ b/debian/control
@@ -41,6 +41,19 @@ Description: Open vSwitch switch implementations
  .
  Open vSwitch is a full-featured software-based Ethernet switch.
 
+Package: openvswitch-ipsec
+Architecture: any
+Depends:
+ ${shlibs:Depends}, ${misc:Depends}, ${python:Depends}, ipsec-tools, racoon,
+ openvswitch-common (= ${binary:Version}),
+ openvswitch-switch (= ${binary:Version}),
+ python-openvswitch (= ${binary:Version})
+Description: Open vSwitch GRE-over-IPsec support
+ The ovs-monitor-ipsec script provides support for encrypting GRE
+ tunnels with IPsec.
+ .
+ Open vSwitch is a full-featured software-based Ethernet switch.
+
 Package: openvswitch-pki
 Architecture: all
 Depends:
@@ -90,13 +103,14 @@ Depends:
  ${shlibs:Depends}, ${misc:Depends},
  openvswitch-common (= ${binary:Version}),
  openvswitch-controller (= ${binary:Version}),
+ openvswitch-ipsec (= ${binary:Version}),
  openvswitch-switch (= ${binary:Version})
 Description: Debug symbols for Open vSwitch packages
  This package contains the debug symbols for all the other openvswitch-*
  packages.  Install it to debug one of them or to examine a core dump
  produced by one of them.
 
-Package: python-openvswitch 
+Package: python-openvswitch
 Architecture: all
 Section: python
 Depends: ${python:Depends}, openvswitch-switch (= ${binary:Version})
diff --git a/debian/openvswitch-ipsec.dirs b/debian/openvswitch-ipsec.dirs
new file mode 100644
index 000000000..02130d0e9
--- /dev/null
+++ b/debian/openvswitch-ipsec.dirs
@@ -0,0 +1 @@
+usr/share/openvswitch/scripts
diff --git a/debian/openvswitch-ipsec.init b/debian/openvswitch-ipsec.init
new file mode 100755
index 000000000..f3c9a13a0
--- /dev/null
+++ b/debian/openvswitch-ipsec.init
@@ -0,0 +1,184 @@
+#!/bin/sh
+#
+# Copyright (c) 2007, 2009 Javier Fernandez-Sanguino <jfs@debian.org>
+#
+# This is free software; you may redistribute it and/or modify
+# it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2,
+# or (at your option) any later version.
+#
+# This is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License with
+# the Debian operating system, in /usr/share/common-licenses/GPL;  if
+# not, write to the Free Software Foundation, Inc., 59 Temple Place,
+# Suite 330, Boston, MA 02111-1307 USA
+#
+### BEGIN INIT INFO
+# Provides:          openvswitch-ipsec
+# Required-Start:    $network $local_fs $remote_fs
+# Required-Stop:     $remote_fs
+# Default-Start:     2 3 4 5
+# Default-Stop:      0 1 6
+# Short-Description: Open vSwitch GRE-over-IPsec daemon
+### END INIT INFO
+
+PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
+
+DAEMON=/usr/share/openvswitch/scripts/ovs-monitor-ipsec # Daemon's location
+NAME=ovs-monitor-ipsec          # Introduce the short server's name here
+LOGDIR=/var/log/openvswitch     # Log directory to use
+
+PIDFILE=/var/run/openvswitch/$NAME.pid 
+
+test -x $DAEMON || exit 0
+
+. /lib/lsb/init-functions
+
+DODTIME=10              # Time to wait for the server to die, in seconds
+                        # If this value is set too low you might not
+                        # let some servers to die gracefully and
+                        # 'restart' will not work
+                        
+set -e
+
+running_pid() {
+# Check if a given process pid's cmdline matches a given name
+    pid=$1
+    name=$2
+    [ -z "$pid" ] && return 1 
+    [ ! -d /proc/$pid ] &&  return 1
+    cmd=`cat /proc/$pid/cmdline | tr "\000" " "|cut -d " " -f 2`
+    # Is this the expected server
+    [ "$cmd" != "$name" ] &&  return 1
+    return 0
+}
+
+running() {
+# Check if the process is running looking at /proc
+# (works for all users)
+
+    # No pidfile, probably no daemon present
+    [ ! -f "$PIDFILE" ] && return 1
+    pid=`cat $PIDFILE`
+    running_pid $pid $DAEMON || return 1
+    return 0
+}
+
+start_server() {
+    PYTHONPATH=/usr/share/openvswitch/python \
+           /usr/share/openvswitch/scripts/ovs-monitor-ipsec \
+           --pidfile-name=$PIDFILE --detach --monitor \
+           unix:/var/run/openvswitch/db.sock
+
+    return 0
+}
+
+stop_server() {
+    if [ -e $PIDFILE ]; then
+        kill `cat $PIDFILE`
+    fi
+
+    return 0
+}
+
+force_stop() {
+# Force the process to die killing it manually
+    [ ! -e "$PIDFILE" ] && return
+    if running ; then
+        kill -15 $pid
+        # Is it really dead?
+        sleep "$DIETIME"s
+        if running ; then
+            kill -9 $pid
+            sleep "$DIETIME"s
+            if running ; then
+                echo "Cannot kill $NAME (pid=$pid)!"
+                exit 1
+            fi
+        fi
+    fi
+    rm -f $PIDFILE
+}
+
+
+case "$1" in
+  start)
+        log_daemon_msg "Starting $NAME"
+        # Check if it's running first
+        if running ;  then
+            log_progress_msg "apparently already running"
+            log_end_msg 0
+            exit 0
+        fi
+        if start_server && running ;  then
+            # It's ok, the server started and is running
+            log_end_msg 0
+        else
+            # Either we could not start it or it is not running
+            # after we did
+            # NOTE: Some servers might die some time after they start,
+            # this code does not try to detect this and might give
+            # a false positive (use 'status' for that)
+            log_end_msg 1
+        fi
+        ;;
+  stop)
+        log_daemon_msg "Stopping $NAME"
+        if running ; then
+            # Only stop the server if we see it running
+            stop_server
+            log_end_msg $?
+        else
+            # If it's not running don't do anything
+            log_progress_msg "apparently not running"
+            log_end_msg 0
+            exit 0
+        fi
+        ;;
+  force-stop)
+        # First try to stop gracefully the program
+        $0 stop
+        if running; then
+            # If it's still running try to kill it more forcefully
+            log_daemon_msg "Stopping (force) $NAME"
+            force_stop
+            log_end_msg $?
+        fi
+        ;;
+  restart|force-reload)
+        log_daemon_msg "Restarting $NAME"
+        stop_server
+        # Wait some sensible amount, some server need this
+        [ -n "$DIETIME" ] && sleep $DIETIME
+        start_server
+        running
+        log_end_msg $?
+        ;;
+  status)
+        log_daemon_msg "Checking status of $NAME"
+        if running ;  then
+            log_progress_msg "running"
+            log_end_msg 0
+        else
+            log_progress_msg "apparently not running"
+            log_end_msg 1
+            exit 1
+        fi
+        ;;
+  # Use this if the daemon cannot reload
+  reload)
+        log_warning_msg "Reloading $NAME daemon: not implemented, as the daemon"
+        log_warning_msg "cannot re-read the config file (use restart)."
+        ;;
+  *)
+        N=/etc/init.d/openvswitch-ipsec
+        echo "Usage: $N {start|stop|force-stop|restart|force-reload|status}" >&2
+        exit 1
+        ;;
+esac
+
+exit 0
diff --git a/debian/openvswitch-ipsec.install b/debian/openvswitch-ipsec.install
new file mode 100644
index 000000000..72cacfa25
--- /dev/null
+++ b/debian/openvswitch-ipsec.install
@@ -0,0 +1 @@
+debian/ovs-monitor-ipsec usr/share/openvswitch/scripts
diff --git a/debian/ovs-monitor-ipsec b/debian/ovs-monitor-ipsec
new file mode 100755
index 000000000..1caece3a9
--- /dev/null
+++ b/debian/ovs-monitor-ipsec
@@ -0,0 +1,349 @@
+#!/usr/bin/python
+# Copyright (c) 2009, 2010 Nicira Networks
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# A daemon to monitor attempts to create GRE-over-IPsec tunnels.
+# Uses racoon and setkey to support the configuration.  Assumes that
+# OVS has complete control over IPsec configuration for the box.
+
+# xxx To-do:
+#  - Doesn't actually check that Interface is connected to bridge
+#  - Doesn't support cert authentication
+
+
+import getopt
+import logging, logging.handlers
+import os
+import stat
+import subprocess
+import sys
+
+from ovs.db import error
+from ovs.db import types
+import ovs.util
+import ovs.daemon
+import ovs.db.idl
+
+
+# By default log messages as DAEMON into syslog
+s_log = logging.getLogger("ovs-monitor-ipsec")
+l_handler = logging.handlers.SysLogHandler(
+        "/dev/log",
+        facility=logging.handlers.SysLogHandler.LOG_DAEMON)
+l_formatter = logging.Formatter('%(filename)s: %(levelname)s: %(message)s')
+l_handler.setFormatter(l_formatter)
+s_log.addHandler(l_handler)
+
+
+setkey = "/usr/sbin/setkey"
+
+# Class to configure the racoon daemon, which handles IKE negotiation
+class Racoon:
+    # Default locations for files
+    conf_file = "/etc/racoon/racoon.conf"
+    cert_file = "/etc/racoon/certs"
+    psk_file = "/etc/racoon/psk.txt"
+
+    # Default racoon configuration file we use for IKE
+    conf_template = """# Configuration file generated by Open vSwitch
+#
+# Do not modify by hand!
+
+path pre_shared_key "/etc/racoon/psk.txt";
+path certificate "/etc/racoon/certs";
+
+remote anonymous {
+        exchange_mode main;
+        proposal {
+                encryption_algorithm aes;
+                hash_algorithm sha1;
+                authentication_method pre_shared_key;
+                dh_group 2;
+        }
+}
+
+sainfo anonymous {
+        pfs_group 2;
+        lifetime time 1 hour;
+        encryption_algorithm aes;
+        authentication_algorithm hmac_sha1, hmac_md5;
+        compression_algorithm deflate;
+}
+"""
+
+    def __init__(self):
+        self.psk_hosts = {}
+        self.cert_hosts = {}
+
+        # Replace racoon's conf file with our template
+        f = open(Racoon.conf_file, "w")
+        f.write(Racoon.conf_template)
+        f.close()
+
+        # Clear out any pre-shared keys
+        self.commit_psk()
+
+        self.reload()
+
+    def reload(self):
+        exitcode = subprocess.call(["/etc/init.d/racoon", "reload"])
+        if exitcode != 0:
+            s_log.warning("couldn't reload racoon")
+
+    def commit_psk(self):
+        f = open(Racoon.psk_file, 'w')
+ 
+        # The file must only be accessible by root
+        os.chmod(Racoon.psk_file, stat.S_IRUSR | stat.S_IWUSR)
+
+        f.write("# Generated by Open vSwitch...do not modify by hand!\n\n")
+        for host, psk in self.psk_hosts.iteritems():
+            f.write("%s   %s\n" % (host, psk))
+        f.close()
+
+    def add_psk(self, host, psk):
+        self.psk_hosts[host] = psk
+        self.commit_psk()
+
+    def del_psk(self, host):
+        if host in self.psk_hosts:
+            del self.psk_hosts[host]
+            self.commit_psk()
+
+
+# Class to configure IPsec on a system using racoon for IKE and setkey
+# for maintaining the Security Association Database (SAD) and Security
+# Policy Database (SPD).  Only policies for GRE are supported.
+class IPsec:
+    def __init__(self):
+        self.sad_flush()
+        self.spd_flush()
+        self.racoon = Racoon()
+
+    def call_setkey(self, cmds):
+        try:
+            p = subprocess.Popen([setkey, "-c"], stdin=subprocess.PIPE, 
+                    stdout=subprocess.PIPE)
+        except:
+            s_log.error("could not call setkey")
+            sys.exit(1)
+
+        # xxx It is safer to pass the string into the communicate()
+        # xxx method, but it didn't work for slightly longer commands.
+        # xxx An alternative may need to be found.
+        p.stdin.write(cmds)
+        return p.communicate()[0]
+
+    def get_spi(self, local_ip, remote_ip, proto="esp"):
+        # Run the setkey dump command to retrieve the SAD.  Then, parse
+        # the output looking for SPI buried in the output.  Note that
+        # multiple SAD entries can exist for the same "flow", since an
+        # older entry could be in a "dying" state.
+        spi_list = []
+        host_line = "%s %s" % (local_ip, remote_ip)
+        results = self.call_setkey("dump ;").split("\n")
+        for i in range(len(results)):
+            if results[i].strip() == host_line:
+                # The SPI is in the line following the host pair
+                spi_line = results[i+1]
+                if (spi_line[1:4] == proto):
+                    spi = spi_line.split()[2]
+                    spi_list.append(spi.split('(')[1].rstrip(')'))
+        return spi_list
+
+    def sad_flush(self):
+        self.call_setkey("flush;")
+
+    def sad_del(self, local_ip, remote_ip):
+        # To delete all SAD entries, we should be able to use setkey's
+        # "deleteall" command.  Unfortunately, it's fundamentally broken
+        # on Linux and not documented as such.
+        cmds = ""
+
+        # Delete local_ip->remote_ip SAD entries
+        spi_list = self.get_spi(local_ip, remote_ip)
+        for spi in spi_list:
+            cmds += "delete %s %s esp %s;\n" % (local_ip, remote_ip, spi)
+
+        # Delete remote_ip->local_ip SAD entries
+        spi_list = self.get_spi(remote_ip, local_ip)
+        for spi in spi_list:
+            cmds += "delete %s %s esp %s;\n" % (remote_ip, local_ip, spi)
+
+        if cmds:
+            self.call_setkey(cmds)
+
+    def spd_flush(self):
+        self.call_setkey("spdflush;")
+
+    def spd_add(self, local_ip, remote_ip):
+        cmds = ("spdadd %s %s gre -P out ipsec esp/transport//default;" %
+                    (local_ip, remote_ip))
+        cmds += "\n"
+        cmds += ("spdadd %s %s gre -P in ipsec esp/transport//default;" %
+                    (remote_ip, local_ip))
+        self.call_setkey(cmds)
+
+    def spd_del(self, local_ip, remote_ip):
+        cmds = "spddelete %s %s gre -P out;" % (local_ip, remote_ip)
+        cmds += "\n"
+        cmds += "spddelete %s %s gre -P in;" % (remote_ip, local_ip)
+        self.call_setkey(cmds)
+
+    def ipsec_cert_del(self, local_ip, remote_ip):
+        # Need to support cert...right now only PSK supported
+        self.racoon.del_psk(remote_ip)
+        self.spd_del(local_ip, remote_ip)
+        self.sad_del(local_ip, remote_ip)
+
+    def ipsec_cert_update(self, local_ip, remote_ip, cert):
+        # Need to support cert...right now only PSK supported
+        self.racoon.add_psk(remote_ip, "abc12345")
+        self.spd_add(local_ip, remote_ip)
+
+    def ipsec_psk_del(self, local_ip, remote_ip):
+        self.racoon.del_psk(remote_ip)
+        self.spd_del(local_ip, remote_ip)
+        self.sad_del(local_ip, remote_ip)
+
+    def ipsec_psk_update(self, local_ip, remote_ip, psk):
+        self.racoon.add_psk(remote_ip, psk)
+        self.spd_add(local_ip, remote_ip)
+
+
+def keep_table_columns(schema, table_name, column_types):
+    table = schema.tables.get(table_name)
+    if not table:
+        raise error.Error("schema has no %s table" % table_name)
+
+    new_columns = {}
+    for column_name, column_type in column_types.iteritems():
+        column = table.columns.get(column_name)
+        if not column:
+            raise error.Error("%s table schema lacks %s column"
+                              % (table_name, column_name))
+        if column.type != column_type:
+            raise error.Error("%s column in %s table has type \"%s\", "
+                              "expected type \"%s\""
+                              % (column_name, table_name,
+                                 column.type.toEnglish(),
+                                 column_type.toEnglish()))
+        new_columns[column_name] = column
+    table.columns = new_columns
+    return table
+ 
+def monitor_uuid_schema_cb(schema):
+    string_type = types.Type(types.BaseType(types.StringType))
+    string_map_type = types.Type(types.BaseType(types.StringType),
+                                 types.BaseType(types.StringType),
+                                 0, sys.maxint)
+ 
+    new_tables = {}
+    new_tables["Interface"] = keep_table_columns(
+        schema, "Interface", {"name": string_type,
+                              "type": string_type,
+                              "options": string_map_type,
+                              "other_config": string_map_type})
+    schema.tables = new_tables
+
+def usage():
+    print "usage: %s [OPTIONS] DATABASE" % sys.argv[0]
+    print "where DATABASE is a socket on which ovsdb-server is listening."
+    ovs.daemon.usage()
+    print "Other options:"
+    print "  -h, --help               display this help message"
+    sys.exit(0)
+ 
+def main(argv):
+    try:
+        options, args = getopt.gnu_getopt(
+            argv[1:], 'h', ['help'] + ovs.daemon.LONG_OPTIONS)
+    except getopt.GetoptError, geo:
+        sys.stderr.write("%s: %s\n" % (ovs.util.PROGRAM_NAME, geo.msg))
+        sys.exit(1)
+ 
+    for key, value in options:
+        if key in ['-h', '--help']:
+            usage()
+        elif not ovs.daemon.parse_opt(key, value):
+            sys.stderr.write("%s: unhandled option %s\n"
+                             % (ovs.util.PROGRAM_NAME, key))
+            sys.exit(1)
+ 
+    if len(args) != 1:
+        sys.stderr.write("%s: exactly one nonoption argument is required "
+                         "(use --help for help)\n" % ovs.util.PROGRAM_NAME)
+        sys.exit(1)
+
+    ovs.daemon.die_if_already_running()
+ 
+    remote = args[0]
+    idl = ovs.db.idl.Idl(remote, "Open_vSwitch", monitor_uuid_schema_cb)
+
+    ovs.daemon.daemonize()
+
+    ipsec = IPsec()
+
+    interfaces = {}
+    while True:
+        if not idl.run():
+            poller = ovs.poller.Poller()
+            idl.wait(poller)
+            poller.block()
+            continue
+ 
+        new_interfaces = {}
+        for rec in idl.data["Interface"].itervalues():
+            name = rec.name.as_scalar()
+            local_ip = rec.other_config.get("ipsec_local_ip")
+            if rec.type.as_scalar() == "gre" and local_ip:
+                new_interfaces[name] = {
+                        "remote_ip": rec.options.get("remote_ip"),
+                        "local_ip": local_ip,
+                        "ipsec_cert": rec.other_config.get("ipsec_cert"),
+                        "ipsec_psk": rec.other_config.get("ipsec_psk") }
+ 
+        if interfaces != new_interfaces:
+            for name, vals in interfaces.items():
+                if name not in new_interfaces.keys():
+                    ipsec.ipsec_cert_del(vals["local_ip"], vals["remote_ip"])
+            for name, vals in new_interfaces.items():
+                if vals == interfaces.get(name):
+                    s_log.warning(
+                        "configuration changed for %s, need to delete "
+                        "interface first" % name)
+                    continue
+
+                if vals["ipsec_cert"]:
+                    ipsec.ipsec_cert_update(vals["local_ip"],
+                            vals["remote_ip"], vals["ipsec_cert"])
+                elif vals["ipsec_psk"]:
+                    ipsec.ipsec_psk_update(vals["local_ip"], 
+                            vals["remote_ip"], vals["ipsec_psk"])
+                else:
+                    s_log.warning(
+                        "no ipsec_cert or ipsec_psk defined for %s" % name)
+                    continue
+
+            interfaces = new_interfaces
+ 
+if __name__ == '__main__':
+    try:
+        main(sys.argv)
+    except SystemExit:
+        # Let system.exit() calls complete normally
+        raise
+    except:
+        s_log.exception("traceback")
diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
index a7d257036..07dd79fbc 100644
--- a/vswitchd/vswitch.ovsschema
+++ b/vswitchd/vswitch.ovsschema
@@ -134,6 +134,8 @@
        "ofport": {
          "type": {"key": "integer", "min": 0, "max": 1},
          "ephemeral": true},
+       "other_config": {
+         "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}},
        "statistics": {
          "type": {"key": "string", "value": "integer", "min": 0, "max": "unlimited"},
          "ephemeral": true},
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 86fd3f9be..5b5655ddd 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -775,6 +775,27 @@
         </dl>
       </column>
 
+      <column name="other_config">
+        Key-value pairs for rarely used interface features.  Currently,
+        the only keys are for configuring GRE-over-IPsec, which is only
+        available through the <code>openvswitch-ipsec</code> package for
+        Debian.  The currently defined key-value pairs are:
+        <dl>
+          <dt><code>ipsec-local-ip</code></dt>
+          <dd>Required key for GRE-over-IPsec interfaces.  Additionally,
+            the <ref column="type"/> must be <code>gre</code> and the
+            <code>ipsec-psk</code> <ref column="other_config"/> key must
+            be set.  The <code>in_key</code>, <code>out_key</code>, and
+            <code>key</code> <ref column="options"/> must not be
+            set.</dd>
+          <dt><code>ipsec-psk</code></dt>
+          <dd>Required key for GRE-over-IPsec interfaces.  Specifies a
+            pre-shared key for authentication that must be identical on 
+            both sides of the tunnel.  Additionally, the
+            <code>ipsec-local-ip</code> key must also be set.</dd>
+        </dl>
+      </column>
+
       <column name="statistics">
         <p>
           Key-value pairs that report interface statistics.  The current
-- 
cgit v1.2.1


From e6494c64e35f62411f770be086ba6a0914afaf5d Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Wed, 22 Sep 2010 22:52:04 -0700
Subject: vswitch: Reference ipsec_local_ip and ipsec_psk with underscores

The GRE-over-IPsec the documentation describes "ipsec-local-ip" and
"ipsec-psk" when they actually use underscores.
---
 vswitchd/vswitch.xml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 5b5655ddd..bb3f648fb 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -781,18 +781,18 @@
         available through the <code>openvswitch-ipsec</code> package for
         Debian.  The currently defined key-value pairs are:
         <dl>
-          <dt><code>ipsec-local-ip</code></dt>
+          <dt><code>ipsec_local_ip</code></dt>
           <dd>Required key for GRE-over-IPsec interfaces.  Additionally,
             the <ref column="type"/> must be <code>gre</code> and the
-            <code>ipsec-psk</code> <ref column="other_config"/> key must
+            <code>ipsec_psk</code> <ref column="other_config"/> key must
             be set.  The <code>in_key</code>, <code>out_key</code>, and
             <code>key</code> <ref column="options"/> must not be
             set.</dd>
-          <dt><code>ipsec-psk</code></dt>
+          <dt><code>ipsec_psk</code></dt>
           <dd>Required key for GRE-over-IPsec interfaces.  Specifies a
             pre-shared key for authentication that must be identical on 
             both sides of the tunnel.  Additionally, the
-            <code>ipsec-local-ip</code> key must also be set.</dd>
+            <code>ipsec_local_ip</code> key must also be set.</dd>
         </dl>
       </column>
 
-- 
cgit v1.2.1


From b13300c7178527f85937604022a755e174a0e1aa Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Tue, 21 Sep 2010 23:57:13 -0700
Subject: xenserver: Only put the primary XenServer UUID in default bridge-id

This patch defensively guarantees that the first id in
xs-network-uuids will belong to the primary network (as opposed to
a vlan).  Given that the primary network id comes first, it parses
xs-network-ids and only copies the primary id to bridge-id when
monitor-external-ids is run.

Feature #3647

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Reviewed-by: Ben Pfaff <blp@nicira.com>
---
 tests/interface-reconfigure.at                                 |  6 +++---
 xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py |  7 ++++++-
 xenserver/usr_share_openvswitch_scripts_monitor-external-ids   | 10 ++++++++--
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/tests/interface-reconfigure.at b/tests/interface-reconfigure.at
index 880f4a508..8566102d7 100644
--- a/tests/interface-reconfigure.at
+++ b/tests/interface-reconfigure.at
@@ -802,7 +802,7 @@ Applying changes to /etc/sysconfig/network-scripts/ifcfg-xapi1 configuration
     --fake-iface add-bond xapi1 bond0 eth0 eth1
     set Port bond0 MAC="00:22:19:22:4b:af" bond_downdelay=200 other-config:"bond-miimon"=100 other-config:"bond-use_carrier"=1 other-config:"bond-mode"="balance-slb" bond_updelay=31000
     set Bridge xapi1 other-config:hwaddr="00:22:19:22:4b:af"
-    br-set-external-id xapi1 xs-network-uuids 99be2da4-6c33-6f8e-49ea-3bc592fe3c85;45cbbb43-113d-a712-3231-c6463f253cef
+    br-set-external-id xapi1 xs-network-uuids 45cbbb43-113d-a712-3231-c6463f253cef;99be2da4-6c33-6f8e-49ea-3bc592fe3c85
 /sbin/ifup xapi1
 action_up: bring up bond0
 /sbin/ifconfig bond0 up
@@ -883,10 +883,10 @@ Applying changes to /etc/sysconfig/network-scripts/ifcfg-xapi2 configuration
     --fake-iface add-bond xapi1 bond0 eth0 eth1
     set Port bond0 MAC="00:22:19:22:4b:af" bond_downdelay=200 other-config:"bond-miimon"=100 other-config:"bond-use_carrier"=1 other-config:"bond-mode"="balance-slb" bond_updelay=31000
     set Bridge xapi1 other-config:hwaddr="00:22:19:22:4b:af"
-    br-set-external-id xapi1 xs-network-uuids 99be2da4-6c33-6f8e-49ea-3bc592fe3c85;45cbbb43-113d-a712-3231-c6463f253cef
+    br-set-external-id xapi1 xs-network-uuids 45cbbb43-113d-a712-3231-c6463f253cef;99be2da4-6c33-6f8e-49ea-3bc592fe3c85
     --if-exists del-br xapi2
     --may-exist add-br xapi2 xapi1 4
-    br-set-external-id xapi2 xs-network-uuids 99be2da4-6c33-6f8e-49ea-3bc592fe3c85;45cbbb43-113d-a712-3231-c6463f253cef
+    br-set-external-id xapi2 xs-network-uuids 45cbbb43-113d-a712-3231-c6463f253cef;99be2da4-6c33-6f8e-49ea-3bc592fe3c85
     set Interface xapi2 MAC="00:22:19:22:4b:af"
 /sbin/ifup xapi2
 action_up: bring up bond0
diff --git a/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py b/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py
index bc311f803..c352594ac 100644
--- a/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py
+++ b/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py
@@ -342,7 +342,12 @@ def set_br_external_ids(pif):
         #    log("Network PIF %s not currently attached (%s)" % (rec['uuid'],pifrec['uuid']))
         #    continue
         nwrec = db().get_network_record(rec['network'])
-        xs_network_uuids += [nwrec['uuid']]
+
+        uuid = nwrec['uuid']
+        if pif_is_vlan(nwpif):
+            xs_network_uuids.append(uuid)
+        else:
+            xs_network_uuids.insert(0, uuid)
 
     vsctl_argv = []
     vsctl_argv += ['# configure xs-network-uuids']
diff --git a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids
index 45b3dd7bf..2c2844c63 100755
--- a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids
+++ b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids
@@ -116,8 +116,14 @@ def update_network_uuids(name, ids):
 
 def update_bridge_id(name, ids):
     id = get_bridge_id(name, ids.get("xs-network-uuids"))
-    if ids.get("bridge-id") != id and id:
-        set_external_id("Bridge", name, "bridge-id", id)
+
+    if not id:
+        return
+
+    primary_id = id.split(";")[0]
+
+    if ids.get("bridge-id") != primary_id:
+        set_external_id("Bridge", name, "bridge-id", primary_id)
 
 def update_iface_id(name, ids):
     id = get_iface_id(name, ids.get("xs-vif-uuid"))
-- 
cgit v1.2.1


From 88cd3cc9f41f916fd51ad1cd02cbbfb50d9b476d Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Thu, 23 Sep 2010 10:55:15 +0900
Subject: debian: update copyright for xenserver/

Further to the recent work done by Ben Pfaff and Ian Campbell to
clarify the license of all the files in xenserver/ the debian/copyright.in
file seems to need updating.

Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Ben Pfaff <blp@nicira.com>
---
 debian/copyright.in | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 105 insertions(+), 1 deletion(-)

diff --git a/debian/copyright.in b/debian/copyright.in
index 6a7370e39..6143f1869 100644
--- a/debian/copyright.in
+++ b/debian/copyright.in
@@ -15,7 +15,54 @@ Upstream Copyright Holders:
 
 License:
 
-* The following components are licensed under the GNU General Public Licence
+* The following components are licensed under the
+  GNU Lesser General Public Licence version 2.1 only
+  with the exception clause below as a pre-amble.
+
+	xenserver/etc_xensource_scripts_vif
+	xenserver/opt_xensource_libexec_InterfaceReconfigure.py
+	xenserver/opt_xensource_libexec_InterfaceReconfigureBridge.py
+	xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py
+	xenserver/opt_xensource_libexec_interface-reconfigure
+	xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py
+	xenserver/usr_sbin_xen-bugtool
+
+	* These components are only distributed in the source package.
+	  They do not appear in any binary packages.
+
+  On Debian systems, the complete text of the
+  GNU Lesser General Public Licence version 2.1 can be found in
+  `/usr/share/common-licenses/LGPL-2.1'
+
+  The exception clause pre-amble reads:
+
+  As a special exception to the GNU Lesser General Public License, you
+  may link, statically or dynamically, a "work that uses the Library"
+  with a publicly distributed version of the Library to produce an
+  executable file containing portions of the Library, and distribute
+  that executable file under terms of your choice, without any of the
+  additional requirements listed in clause 6 of the GNU Lesser General
+  Public License.  By "a publicly distributed version of the Library",
+  we mean either the unmodified Library as distributed, or a
+  modified version of the Library that is distributed under the
+  conditions defined in clause 3 of the GNU Library General Public
+  License.  This exception does not however invalidate any other reasons
+  why the executable file might be covered by the GNU Lesser General
+  Public License.
+
+* The following component is licensed under the
+  GNU Lesser General Public Licence version 2.1.
+
+	xenserver/usr_sbin_xen-bugtool
+
+	* This component is only distributed in the source package.
+	  It does not appear in any binary packages.
+
+  On Debian systems, the complete text of the
+  GNU Lesser General Public Licence version 2.1 can be found in
+  `/usr/share/common-licenses/LGPL-2.1'
+
+* The following component is licensed under the GNU General Public Licence
   version 2.
 
 	datapath/
@@ -33,6 +80,63 @@ License:
   On Debian systems, the complete text of the Apache License version 2.0
   can be found in '/usr/share/common-licenses/Apache-2.0'.
 
+* The following component is licenced under the
+  Python Software Foundation License Version 2.
+
+	xenserver/uuid.py
+
+	* This component is only distributed in the source package.
+	  It does not appear in any binary packages.
+
+   PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
+   --------------------------------------------
+
+   1. This LICENSE AGREEMENT is between the Python Software Foundation
+   ("PSF"), and the Individual or Organization ("Licensee") accessing and
+   otherwise using this software ("Python") in source or binary form and
+   its associated documentation.
+
+   2. Subject to the terms and conditions of this License Agreement, PSF
+   hereby grants Licensee a nonexclusive, royalty-free, world-wide
+   license to reproduce, analyze, test, perform and/or display publicly,
+   prepare derivative works, distribute, and otherwise use Python
+   alone or in any derivative version, provided, however, that PSF's
+   License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
+   2001, 2002, 2003, 2004, 2005, 2006, 2007 Python Software Foundation;
+   All Rights Reserved" are retained in Python alone or in any derivative
+   version prepared by Licensee.
+
+   3. In the event Licensee prepares a derivative work that is based on
+   or incorporates Python or any part thereof, and wants to make
+   the derivative work available to others as provided herein, then
+   Licensee hereby agrees to include in any such work a brief summary of
+   the changes made to Python.
+
+   4. PSF is making Python available to Licensee on an "AS IS"
+   basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+   IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
+   DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+   FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
+   INFRINGE ANY THIRD PARTY RIGHTS.
+
+   5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+   FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+   A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
+   OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+
+   6. This License Agreement will automatically terminate upon a material
+   breach of its terms and conditions.
+
+   7. Nothing in this License Agreement shall be deemed to create any
+   relationship of agency, partnership, or joint venture between PSF and
+   Licensee.  This License Agreement does not grant permission to use PSF
+   trademarks or trade name in a trademark sense to endorse or promote
+   products or services of Licensee, or any third party.
+
+   8. By copying, installing or otherwise using Python, Licensee
+   agrees to be bound by the terms and conditions of this License
+   Agreement.
+
 * ovs-bugtool is covered by the following license:
 
    This library is free software; you can redistribute it and/or
-- 
cgit v1.2.1


From 1095d0996bf83e99eab0be964b66403d3fff76f5 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 23 Sep 2010 11:20:10 -0700
Subject: debian: additional copyright update for xenserver/

Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Ben Pfaff <blp@nicira.com>
---
 debian/copyright.in | 26 +++-----------------------
 1 file changed, 3 insertions(+), 23 deletions(-)

diff --git a/debian/copyright.in b/debian/copyright.in
index 6143f1869..f131ea92d 100644
--- a/debian/copyright.in
+++ b/debian/copyright.in
@@ -50,10 +50,11 @@ License:
   why the executable file might be covered by the GNU Lesser General
   Public License.
 
-* The following component is licensed under the
+* The following components are licensed under the
   GNU Lesser General Public Licence version 2.1.
 
-	xenserver/usr_sbin_xen-bugtool
+	debian/ovs-bugtool
+	xenserver/usr_sbin_xen-bugtool [*]
 
 	* This component is only distributed in the source package.
 	  It does not appear in any binary packages.
@@ -137,27 +138,6 @@ License:
    agrees to be bound by the terms and conditions of this License
    Agreement.
 
-* ovs-bugtool is covered by the following license:
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of version 2.1 of the GNU Lesser General Public
-   License as published by the Free Software Foundation.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
-
-   Copyright (c) 2005, 2007 XenSource Ltd.
-   Copyright (c) 2010, Nicira Networks.
-
- On Debian systems, the complete text of the GNU Lesser General Public
- License can be found in `/usr/share/common-licenses/LGPL-2.1'.
-
 * All other components of this package are licensed under
   The Apache License Version 2.0.
 
-- 
cgit v1.2.1


From 5136364f41b74de00f86873a6f5be4c8a19cb5ad Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Mon, 13 Sep 2010 13:29:57 -0700
Subject: vlog: Add VLOG_WARN_ONCE() and similar macros.

---
 lib/dpif-linux.c |  6 +-----
 lib/vlog.h       | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index 2c688e3af..ec8a952b3 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -731,11 +731,7 @@ get_major(const char *target)
                 return major;
             }
         } else {
-            static bool warned;
-            if (!warned) {
-                VLOG_WARN("%s:%d: syntax error", fn, ln);
-            }
-            warned = true;
+            VLOG_WARN_ONCE("%s:%d: syntax error", fn, ln);
         }
     }
 
diff --git a/lib/vlog.h b/lib/vlog.h
index 03f17ea56..a4e143c1b 100644
--- a/lib/vlog.h
+++ b/lib/vlog.h
@@ -181,6 +181,12 @@ void vlog_rate_limit(const struct vlog_module *, enum vlog_level,
 #define VLOG_DROP_INFO(RL) vlog_should_drop(THIS_MODULE, VLL_INFO, RL)
 #define VLOG_DROP_DBG(RL) vlog_should_drop(THIS_MODULE, VLL_DBG, RL)
 
+/* Macros for logging at most once per execution. */
+#define VLOG_ERR_ONCE(...) VLOG_ONCE(VLL_ERR, __VA_ARGS__)
+#define VLOG_WARN_ONCE(...) VLOG_ONCE(VLL_WARN, __VA_ARGS__)
+#define VLOG_INFO_ONCE(...) VLOG_ONCE(VLL_INFO, __VA_ARGS__)
+#define VLOG_DBG_ONCE(...) VLOG_ONCE(VLL_DBG, __VA_ARGS__)
+
 /* Command line processing. */
 #define VLOG_OPTION_ENUMS OPT_LOG_FILE
 #define VLOG_LONG_OPTIONS                                   \
@@ -208,6 +214,15 @@ void vlog_usage(void);
             vlog_rate_limit(THIS_MODULE, LEVEL, RL, __VA_ARGS__);   \
         }                                                           \
     } while (0)
+#define VLOG_ONCE(LEVEL, ...)                       \
+    do {                                            \
+        static bool already_logged;                 \
+        if (!already_logged) {                      \
+            already_logged = true;                  \
+            vlog(THIS_MODULE, LEVEL, __VA_ARGS__);  \
+        }                                           \
+    } while (0)
+
 #define VLOG_DEFINE_MODULE__(MODULE)                                    \
         struct vlog_module VLM_##MODULE =                               \
         {                                                               \
-- 
cgit v1.2.1


From cbbdf81cf8bfcc87e141f66b93bf3bcf1220bff8 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Tue, 21 Sep 2010 14:27:02 -0700
Subject: daemon: Report number of crashes on monitor process command line.

---
 lib/daemon.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/daemon.c b/lib/daemon.c
index 548650464..6b61879db 100644
--- a/lib/daemon.c
+++ b/lib/daemon.c
@@ -330,11 +330,13 @@ monitor_daemon(pid_t daemon_pid)
     const char *saved_program_name;
     time_t last_restart;
     char *status_msg;
+    int crashes;
 
     saved_program_name = program_name;
     program_name = xasprintf("monitor(%s)", program_name);
     status_msg = xstrdup("healthy");
     last_restart = TIME_MIN;
+    crashes = 0;
     for (;;) {
         int retval;
         int status;
@@ -352,7 +354,8 @@ monitor_daemon(pid_t daemon_pid)
         } else if (retval == daemon_pid) {
             char *s = process_status_msg(status);
             free(status_msg);
-            status_msg = xasprintf("pid %lu died, %s",
+            status_msg = xasprintf("%d crashes: pid %lu died, %s",
+                                   ++crashes,
                                    (unsigned long int) daemon_pid, s);
             free(s);
 
-- 
cgit v1.2.1


From e4bd5e2a6c4223fd9cfb2dd1ad82a4eeb1b1fbe6 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 23 Sep 2010 09:39:47 -0700
Subject: daemon: Fix behavior of read_pidfile() for our own pidfile.

Opening a file descriptor and then closing it always discards any locks
held on the underlying file, even if the file is still open as another file
descriptor.  This meant that calling read_pidfile() on the process's own
pidfile would discard the lock and make other OVS processes think that the
process had died.  This commit fixes the problem.
---
 lib/daemon.c         | 25 +++++++++++++++++++++++++
 python/ovs/daemon.py | 23 ++++++++++++++++++++++-
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/lib/daemon.c b/lib/daemon.c
index 6b61879db..bbcfe6afc 100644
--- a/lib/daemon.c
+++ b/lib/daemon.c
@@ -42,6 +42,10 @@ static bool detach;
 /* --pidfile: Name of pidfile (null if none). */
 static char *pidfile;
 
+/* Device and inode of pidfile, so we can avoid reopening it. */
+static dev_t pidfile_dev;
+static ino_t pidfile_ino;
+
 /* --overwrite-pidfile: Create pidfile even if one already exists and is
    locked? */
 static bool overwrite_pidfile;
@@ -208,6 +212,15 @@ make_pidfile(void)
                         close(fd);
                     } else {
                         /* Keep 'fd' open to retain the lock. */
+                        struct stat s;
+
+                        if (!fstat(fd, &s)) {
+                            pidfile_dev = s.st_dev;
+                            pidfile_ino = s.st_ino;
+                        } else {
+                            VLOG_ERR("%s: fstat failed: %s",
+                                     pidfile, strerror(errno));
+                        }
                     }
                     free(text);
                 } else {
@@ -494,9 +507,21 @@ read_pidfile(const char *pidfile)
 {
     char line[128];
     struct flock lck;
+    struct stat s;
     FILE *file;
     int error;
 
+    if ((pidfile_ino || pidfile_dev)
+        && !stat(pidfile, &s)
+        && s.st_ino == pidfile_ino && s.st_dev == pidfile_dev) {
+        /* It's our own pidfile.  We can't afford to open it, because closing
+         * *any* fd for a file that a process has locked also releases all the
+         * locks on that file.
+         *
+         * Fortunately, we know the associated pid anyhow: */
+        return getpid();
+    }
+
     file = fopen(pidfile, "r");
     if (!file) {
         error = errno;
diff --git a/python/ovs/daemon.py b/python/ovs/daemon.py
index a8373cfd0..eaaaa519b 100644
--- a/python/ovs/daemon.py
+++ b/python/ovs/daemon.py
@@ -35,6 +35,10 @@ _detach = False
 # --pidfile: Name of pidfile (null if none).
 _pidfile = None
 
+# Our pidfile's inode and device, if we have created one.
+_pidfile_dev = None
+_pidfile_ino = None
+
 # --overwrite-pidfile: Create pidfile even if one already exists and is locked?
 _overwrite_pidfile = False
 
@@ -163,7 +167,7 @@ def _make_pidfile():
             logging.error("%s: create failed: %s"
                           % (tmpfile, os.strerror(e.errno)))
             return
-            
+
         try:
             fcntl.lockf(file, fcntl.LOCK_EX | fcntl.LOCK_NB)
         except IOError, e:
@@ -191,6 +195,10 @@ def _make_pidfile():
             file.close()
             return
 
+        s = os.fstat(file.fileno())
+        _pidfile_dev = s.st_dev
+        _pidfile_ino = s.st_ino
+
 def daemonize():
     """If configured with set_pidfile() or set_detach(), creates the pid file
     and detaches from the foreground session."""
@@ -368,6 +376,19 @@ Daemon options:
 def read_pidfile(pidfile):
     """Opens and reads a PID from 'pidfile'.  Returns the nonnegative PID if
     successful, otherwise a negative errno value."""
+    if _pidfile_dev is not None:
+        try:
+            s = os.stat(pidfile)
+            if s.st_ino == _pidfile_ino and s.st_dev == _pidfile_dev:
+                # It's our own pidfile.  We can't afford to open it,
+                # because closing *any* fd for a file that a process
+                # has locked also releases all the locks on that file.
+                #
+                # Fortunately, we know the associated pid anyhow.
+                return os.getpid()
+        except OSError:
+            pass
+
     try:
         file = open(pidfile, "r")
     except IOError, e:
-- 
cgit v1.2.1


From 4f2226487d3522654876885d769510b835c5f5ee Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 23 Sep 2010 09:42:30 -0700
Subject: shash: New function shash_steal().

---
 lib/dpif-linux.c |  3 +--
 lib/netdev.c     |  3 +--
 lib/shash.c      | 15 ++++++++++++++-
 lib/shash.h      |  1 +
 4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index ec8a952b3..635fe9411 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -338,8 +338,7 @@ dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep)
         return ENOBUFS;
     } else if (!shash_is_empty(&dpif->changed_ports)) {
         struct shash_node *node = shash_first(&dpif->changed_ports);
-        *devnamep = xstrdup(node->name);
-        shash_delete(&dpif->changed_ports, node);
+        *devnamep = shash_steal(&dpif->changed_ports, node);
         return 0;
     } else {
         return EAGAIN;
diff --git a/lib/netdev.c b/lib/netdev.c
index 24c2a88fa..c1eb5d04f 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -1553,8 +1553,7 @@ netdev_monitor_poll(struct netdev_monitor *monitor, char **devnamep)
         *devnamep = NULL;
         return EAGAIN;
     } else {
-        *devnamep = xstrdup(node->name);
-        shash_delete(&monitor->changed_netdevs, node);
+        *devnamep = shash_steal(&monitor->changed_netdevs, node);
         return 0;
     }
 }
diff --git a/lib/shash.c b/lib/shash.c
index 8fd2eb18f..cc45efb5c 100644
--- a/lib/shash.c
+++ b/lib/shash.c
@@ -167,12 +167,25 @@ shash_replace(struct shash *sh, const char *name, const void *data)
     }
 }
 
+/* Deletes 'node' from 'sh' and frees the node's name.  The caller is still
+ * responsible for freeing the node's data, if necessary. */
 void
 shash_delete(struct shash *sh, struct shash_node *node)
 {
+    free(shash_steal(sh, node));
+}
+
+/* Deletes 'node' from 'sh'.  Neither the node's name nor its data is freed;
+ * instead, ownership is transferred to the caller.  Returns the node's
+ * name. */
+char *
+shash_steal(struct shash *sh, struct shash_node *node)
+{
+    char *name = node->name;
+
     hmap_remove(&sh->map, &node->node);
-    free(node->name);
     free(node);
+    return name;
 }
 
 static struct shash_node *
diff --git a/lib/shash.h b/lib/shash.h
index eab0af45a..8a736e80b 100644
--- a/lib/shash.h
+++ b/lib/shash.h
@@ -57,6 +57,7 @@ bool shash_add_once(struct shash *, const char *, const void *);
 void shash_add_assert(struct shash *, const char *, const void *);
 void *shash_replace(struct shash *, const char *, const void *data);
 void shash_delete(struct shash *, struct shash_node *);
+char *shash_steal(struct shash *, struct shash_node *);
 struct shash_node *shash_find(const struct shash *, const char *);
 void *shash_find_data(const struct shash *, const char *);
 void *shash_find_and_delete(struct shash *, const char *);
-- 
cgit v1.2.1


From e4af561537cfea7d35d2075596b4474847876794 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 23 Sep 2010 09:43:46 -0700
Subject: ovsdb-data: New function ovsdb_datum_from_shash().

---
 lib/ovsdb-data.c | 25 +++++++++++++++++++++++++
 lib/ovsdb-data.h |  3 +++
 2 files changed, 28 insertions(+)

diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c
index 9c54fe81b..492da7fa5 100644
--- a/lib/ovsdb-data.c
+++ b/lib/ovsdb-data.c
@@ -1444,6 +1444,31 @@ ovsdb_datum_to_string(const struct ovsdb_datum *datum,
     }
 }
 
+/* Initializes 'datum' as a string-to-string map whose contents are taken from
+ * 'sh'.  Destroys 'sh'. */
+void
+ovsdb_datum_from_shash(struct ovsdb_datum *datum, struct shash *sh)
+{
+    struct shash_node *node, *next;
+    size_t i;
+
+    datum->n = shash_count(sh);
+    datum->keys = xmalloc(datum->n * sizeof *datum->keys);
+    datum->values = xmalloc(datum->n * sizeof *datum->values);
+
+    i = 0;
+    SHASH_FOR_EACH_SAFE (node, next, sh) {
+        datum->keys[i].string = node->name;
+        datum->values[i].string = node->data;
+        shash_steal(sh, node);
+        i++;
+    }
+    assert(i == datum->n);
+
+    shash_destroy(sh);
+    ovsdb_datum_sort_unique(datum, OVSDB_TYPE_STRING, OVSDB_TYPE_STRING);
+}
+
 static uint32_t
 hash_atoms(enum ovsdb_atomic_type type, const union ovsdb_atom *atoms,
            unsigned int n, uint32_t basis)
diff --git a/lib/ovsdb-data.h b/lib/ovsdb-data.h
index ae0faa26e..f7e98a84b 100644
--- a/lib/ovsdb-data.h
+++ b/lib/ovsdb-data.h
@@ -22,6 +22,7 @@
 
 struct ds;
 struct ovsdb_symbol_table;
+struct shash;
 
 /* One value of an atomic type (given by enum ovs_atomic_type). */
 union ovsdb_atom {
@@ -167,6 +168,8 @@ char *ovsdb_datum_from_string(struct ovsdb_datum *,
 void ovsdb_datum_to_string(const struct ovsdb_datum *,
                            const struct ovsdb_type *, struct ds *);
 
+void ovsdb_datum_from_shash(struct ovsdb_datum *, struct shash *);
+
 /* Comparison. */
 uint32_t ovsdb_datum_hash(const struct ovsdb_datum *,
                           const struct ovsdb_type *, uint32_t basis);
-- 
cgit v1.2.1


From c94238565522c51a117a7f89fecaabd2605b7c66 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 22 Sep 2010 16:34:05 -0700
Subject: ovsdb-doc: Implement new <option>, <ol> tags.

---
 ovsdb/ovsdb-doc.in | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/ovsdb/ovsdb-doc.in b/ovsdb/ovsdb-doc.in
index 90de4521a..9e0a318d2 100755
--- a/ovsdb/ovsdb-doc.in
+++ b/ovsdb/ovsdb-doc.in
@@ -43,7 +43,7 @@ def inlineXmlToNroff(node, font):
     if node.nodeType == node.TEXT_NODE:
         return textToNroff(node.data, font)
     elif node.nodeType == node.ELEMENT_NODE:
-        if node.tagName == 'code' or node.tagName == 'em':
+        if node.tagName in ['code', 'em', 'option']:
             s = r'\fB'
             for child in node.childNodes:
                 s += inlineXmlToNroff(child, r'\fB')
@@ -76,17 +76,23 @@ def blockXmlToNroff(nodes, para='.PP'):
             s += textToNroff(node.data)
             s = s.lstrip()
         elif node.nodeType == node.ELEMENT_NODE:
-            if node.tagName == 'ul':
+            if node.tagName in ['ul', 'ol']:
                 if s != "":
                     s += "\n"
                 s += ".RS\n"
+                i = 0
                 for liNode in node.childNodes:
                     if (liNode.nodeType == node.ELEMENT_NODE
                         and liNode.tagName == 'li'):
-                        s += ".IP \\(bu\n" + blockXmlToNroff(liNode.childNodes, ".IP")
+                        i += 1
+                        if node.tagName == 'ul':
+                            s += ".IP \\bu\n"
+                        else:
+                            s += ".IP %d. .25in\n" % i
+                        s += blockXmlToNroff(liNode.childNodes, ".IP")
                     elif (liNode.nodeType != node.TEXT_NODE
                           or not liNode.data.isspace()):
-                        raise error.Error("<ul> element may only have <li> children")
+                        raise error.Error("<%s> element may only have <li> children" % node.tagName)
                 s += ".RE\n"
             elif node.tagName == 'dl':
                 if s != "":
-- 
cgit v1.2.1


From ce8876775477a359f3ae14b8cae0ef2212f1681b Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 22 Sep 2010 16:45:30 -0700
Subject: ovs-vswitchd: Export system stats through Open_vSwitch table.

This is intended to provide controllers enough information to determine
whether a switch is overloaded or busted, to enable them to spread load
fairly across a group of switches.

Feature #2421.

CC: Peter Balland <peter@nicira.com>
---
 PORTING                    |   3 +
 configure.ac               |   5 +-
 lib/vlog-modules.def       |   1 +
 vswitchd/automake.mk       |   1 +
 vswitchd/bridge.c          |  42 ++--
 vswitchd/system-stats.c    | 503 +++++++++++++++++++++++++++++++++++++++++++++
 vswitchd/system-stats.h    |  21 ++
 vswitchd/vswitch.ovsschema |   2 +-
 vswitchd/vswitch.xml       | 139 +++++++++++--
 9 files changed, 688 insertions(+), 29 deletions(-)
 create mode 100644 vswitchd/system-stats.c
 create mode 100644 vswitchd/system-stats.h

diff --git a/PORTING b/PORTING
index 5f88c940a..922d6c85b 100644
--- a/PORTING
+++ b/PORTING
@@ -209,6 +209,9 @@ lib/entropy.c assumes that it can obtain high-quality random number
 seeds at startup by reading from /dev/urandom.  You will need to
 modify it if this is not true on your platform.
 
+vswitchd/system-stats.c only knows how to obtain some statistics on
+Linux.  Optionally you may implement them for your platform as well.
+
 Questions
 ---------
 
diff --git a/configure.ac b/configure.ac
index 08a6f0fdf..8a5dc5ce0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -55,7 +55,8 @@ OVS_CHECK_IF_PACKET
 OVS_CHECK_STRTOK_R
 AC_CHECK_MEMBERS([struct stat.st_mtim.tv_nsec, struct stat.st_mtimensec],
   [], [], [[#include <sys/stat.h>]])
-AC_CHECK_FUNCS([mlockall])
+AC_CHECK_FUNCS([mlockall strsignal getloadavg statvfs setmntent])
+AC_CHECK_HEADERS([mntent.h sys/statvfs.h])
 
 OVS_CHECK_PKIDIR
 OVS_CHECK_RUNDIR
@@ -64,8 +65,6 @@ OVS_CHECK_VALGRIND
 OVS_CHECK_SOCKET_LIBS
 OVS_CHECK_LINKER_SECTIONS
 
-AC_CHECK_FUNCS([strsignal])
-
 OVS_ENABLE_OPTION([-Wall])
 OVS_ENABLE_OPTION([-Wno-sign-compare])
 OVS_ENABLE_OPTION([-Wpointer-arith])
diff --git a/lib/vlog-modules.def b/lib/vlog-modules.def
index 03f310375..7d614b412 100644
--- a/lib/vlog-modules.def
+++ b/lib/vlog-modules.def
@@ -78,6 +78,7 @@ VLOG_MODULE(status)
 VLOG_MODULE(svec)
 VLOG_MODULE(timeval)
 VLOG_MODULE(socket_util)
+VLOG_MODULE(system_stats)
 VLOG_MODULE(unixctl)
 VLOG_MODULE(util)
 VLOG_MODULE(vconn_stream)
diff --git a/vswitchd/automake.mk b/vswitchd/automake.mk
index 592be607a..1a4b465f5 100644
--- a/vswitchd/automake.mk
+++ b/vswitchd/automake.mk
@@ -12,6 +12,7 @@ vswitchd_ovs_vswitchd_SOURCES = \
 	vswitchd/proc-net-compat.c \
 	vswitchd/proc-net-compat.h \
 	vswitchd/ovs-vswitchd.c \
+	vswitchd/system-stats.c \
 	vswitchd/vswitch-idl.c \
 	vswitchd/vswitch-idl.h \
 	vswitchd/xenserver.c \
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 3f5e3d471..77eb152ce 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -57,6 +57,7 @@
 #include "socket-util.h"
 #include "stream-ssl.h"
 #include "svec.h"
+#include "system-stats.h"
 #include "timeval.h"
 #include "util.h"
 #include "unixctl.h"
@@ -1096,6 +1097,20 @@ iface_refresh_stats(struct iface *iface)
     ovsrec_interface_set_statistics(iface->cfg, keys, values, n);
 }
 
+static void
+refresh_system_stats(const struct ovsrec_open_vswitch *cfg)
+{
+    struct ovsdb_datum datum;
+    struct shash stats;
+
+    shash_init(&stats);
+    get_system_stats(&stats);
+
+    ovsdb_datum_from_shash(&datum, &stats);
+    ovsdb_idl_txn_write(&cfg->header_, &ovsrec_open_vswitch_col_statistics,
+                        &datum);
+}
+
 void
 bridge_run(void)
 {
@@ -1153,24 +1168,27 @@ bridge_run(void)
 
     /* Refresh interface stats if necessary. */
     if (time_msec() >= iface_stats_timer) {
-        struct ovsdb_idl_txn *txn;
+        if (cfg) {
+            struct ovsdb_idl_txn *txn;
 
-        txn = ovsdb_idl_txn_create(idl);
-        LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
-            size_t i;
+            txn = ovsdb_idl_txn_create(idl);
+            LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+                size_t i;
 
-            for (i = 0; i < br->n_ports; i++) {
-                struct port *port = br->ports[i];
-                size_t j;
+                for (i = 0; i < br->n_ports; i++) {
+                    struct port *port = br->ports[i];
+                    size_t j;
 
-                for (j = 0; j < port->n_ifaces; j++) {
-                    struct iface *iface = port->ifaces[j];
-                    iface_refresh_stats(iface);
+                    for (j = 0; j < port->n_ifaces; j++) {
+                        struct iface *iface = port->ifaces[j];
+                        iface_refresh_stats(iface);
+                    }
                 }
             }
+            refresh_system_stats(cfg);
+            ovsdb_idl_txn_commit(txn);
+            ovsdb_idl_txn_destroy(txn); /* XXX */
         }
-        ovsdb_idl_txn_commit(txn);
-        ovsdb_idl_txn_destroy(txn); /* XXX */
 
         iface_stats_timer = time_msec() + IFACE_STATS_INTERVAL;
     }
diff --git a/vswitchd/system-stats.c b/vswitchd/system-stats.c
new file mode 100644
index 000000000..11b2fbedb
--- /dev/null
+++ b/vswitchd/system-stats.c
@@ -0,0 +1,503 @@
+/* Copyright (c) 2010 Nicira Networks
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#if HAVE_MNTENT_H
+#include <mntent.h>
+#endif
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#if HAVE_SYS_STATVFS_H
+#include <sys/statvfs.h>
+#endif
+#include <unistd.h>
+
+#include "daemon.h"
+#include "dirs.h"
+#include "dynamic-string.h"
+#include "shash.h"
+#include "system-stats.h"
+#include "timeval.h"
+#include "vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(system_stats)
+
+/* #ifdefs make it a pain to maintain code: you have to try to build both ways.
+ * Thus, this file tries to compile as much of the code as possible regardless
+ * of the target, by writing "if (LINUX)" instead of "#ifdef __linux__" where
+ * this is possible. */
+#ifdef __linux__
+#include <asm/param.h>
+#define LINUX 1
+#else
+#define LINUX 0
+#endif
+
+static void
+get_cpu_cores(struct shash *stats)
+{
+    long int n_cores = sysconf(_SC_NPROCESSORS_ONLN);
+    if (n_cores > 0) {
+        shash_add(stats, "cpu", xasprintf("%ld", n_cores));
+    }
+}
+
+static void
+get_load_average(struct shash *stats OVS_UNUSED)
+{
+#if HAVE_GETLOADAVG
+    double loadavg[3];
+
+    if (getloadavg(loadavg, 3) == 3) {
+        shash_add(stats, "load_average",
+                  xasprintf("%.2f,%.2f,%.2f",
+                            loadavg[0], loadavg[1], loadavg[2]));
+    }
+#endif
+}
+
+static unsigned int
+get_page_size(void)
+{
+    static unsigned int cached;
+
+    if (!cached) {
+        long int value = sysconf(_SC_PAGESIZE);
+        if (value >= 0) {
+            cached = value;
+        }
+    }
+
+    return cached;
+}
+
+static void
+get_memory_stats(struct shash *stats)
+{
+    if (!LINUX) {
+        unsigned int pagesize = get_page_size();
+        long int phys_pages = sysconf(_SC_PHYS_PAGES);
+        long int avphys_pages = sysconf(_SC_AVPHYS_PAGES);
+        int mem_total, mem_used;
+
+        if (pagesize <= 0 || phys_pages <= 0 || avphys_pages <= 0) {
+            return;
+        }
+
+        mem_total = phys_pages * (pagesize / 1024);
+        mem_used = (phys_pages - avphys_pages) * (pagesize / 1024);
+        shash_add(stats, "memory", xasprintf("%d,%d", mem_total, mem_used));
+    } else {
+        static const char file_name[] = "/proc/meminfo";
+        int mem_used, mem_cache, swap_used;
+        int mem_free = 0;
+        int buffers = 0;
+        int cached = 0;
+        int swap_free = 0;
+        int mem_total = 0;
+        int swap_total = 0;
+        struct shash dict;
+        char line[128];
+        FILE *stream;
+
+        stream = fopen(file_name, "r");
+        if (!stream) {
+            VLOG_WARN_ONCE("%s: open failed (%s)", file_name, strerror(errno));
+            return;
+        }
+
+        shash_init(&dict);
+        shash_add(&dict, "MemTotal", &mem_total);
+        shash_add(&dict, "MemFree", &mem_free);
+        shash_add(&dict, "Buffers", &buffers);
+        shash_add(&dict, "Cached", &cached);
+        shash_add(&dict, "SwapTotal", &swap_total);
+        shash_add(&dict, "SwapFree", &swap_free);
+        while (fgets(line, sizeof line, stream)) {
+            char key[16];
+            int value;
+
+            if (sscanf(line, "%15[^:]: %u", key, &value) == 2) {
+                int *valuep = shash_find_data(&dict, key);
+                if (valuep) {
+                    *valuep = value;
+                }
+            }
+        }
+        fclose(stream);
+        shash_destroy(&dict);
+
+        mem_used = mem_total - mem_free;
+        mem_cache = buffers + cached;
+        swap_used = swap_total - swap_free;
+        shash_add(stats, "memory",
+                  xasprintf("%d,%d,%d,%d,%d", mem_total, mem_used, mem_cache,
+                            swap_total, swap_used));
+    }
+}
+
+/* Returns the time at which the system booted, as the number of milliseconds
+ * since the epoch, or 0 if the time of boot cannot be determined. */
+static long long int
+get_boot_time(void)
+{
+    static long long int cache_expiration = LLONG_MIN;
+    static long long int boot_time;
+
+    assert(LINUX);
+
+    if (time_msec() >= cache_expiration) {
+        static const char stat_file[] = "/proc/stat";
+        char line[128];
+        FILE *stream;
+
+        cache_expiration = time_msec() + 5 * 1000;
+
+        stream = fopen(stat_file, "r");
+        if (!stream) {
+            VLOG_ERR_ONCE("%s: open failed (%s)", stat_file, strerror(errno));
+            return boot_time;
+        }
+
+        while (fgets(line, sizeof line, stream)) {
+            long long int btime;
+            if (sscanf(line, "btime %lld", &btime) == 1) {
+                boot_time = btime * 1000;
+                goto done;
+            }
+        }
+        VLOG_ERR_ONCE("%s: btime not found", stat_file);
+    done:
+        fclose(stream);
+    }
+    return boot_time;
+}
+
+static unsigned long long int
+ticks_to_ms(unsigned long long int ticks)
+{
+    assert(LINUX);
+
+#ifndef USER_HZ
+#define USER_HZ 100
+#endif
+
+#if USER_HZ == 100              /* Common case. */
+    return ticks * (1000 / USER_HZ);
+#else  /* Alpha and some other architectures.  */
+    double factor = 1000.0 / USER_HZ;
+    return ticks * factor + 0.5;
+#endif
+}
+
+struct raw_process_info {
+    unsigned long int vsz;      /* Virtual size, in kB. */
+    unsigned long int rss;      /* Resident set size, in kB. */
+    long long int uptime;       /* ms since started. */
+    long long int cputime;      /* ms of CPU used during 'uptime'. */
+    pid_t ppid;                 /* Parent. */
+    char name[18];              /* Name (surrounded by parentheses). */
+};
+
+static bool
+get_raw_process_info(pid_t pid, struct raw_process_info *raw)
+{
+    unsigned long long int vsize, rss, start_time, utime, stime;
+    long long int start_msec;
+    unsigned long ppid;
+    char file_name[128];
+    FILE *stream;
+    int n;
+
+    assert(LINUX);
+
+    sprintf(file_name, "/proc/%lu/stat", (unsigned long int) pid);
+    stream = fopen(file_name, "r");
+    if (!stream) {
+        VLOG_ERR_ONCE("%s: open failed (%s)", file_name, strerror(errno));
+        return false;
+    }
+
+    n = fscanf(stream,
+               "%*d "           /* (1. pid) */
+               "%17s "          /* 2. process name */
+               "%*c "           /* (3. state) */
+               "%lu "           /* 4. ppid */
+               "%*d "           /* (5. pgid) */
+               "%*d "           /* (6. sid) */
+               "%*d "           /* (7. tty_nr) */
+               "%*d "           /* (8. tty_pgrp) */
+               "%*u "           /* (9. flags) */
+               "%*u "           /* (10. min_flt) */
+               "%*u "           /* (11. cmin_flt) */
+               "%*u "           /* (12. maj_flt) */
+               "%*u "           /* (13. cmaj_flt) */
+               "%llu "          /* 14. utime */
+               "%llu "          /* 15. stime */
+               "%*d "           /* (16. cutime) */
+               "%*d "           /* (17. cstime) */
+               "%*d "           /* (18. priority) */
+               "%*d "           /* (19. nice) */
+               "%*d "           /* (20. num_threads) */
+               "%*d "           /* (21. always 0) */
+               "%llu "          /* 22. start_time */
+               "%llu "          /* 23. vsize */
+               "%llu "          /* 24. rss */
+#if 0
+               /* These are here for documentation but #if'd out to save
+                * actually parsing them from the stream for no benefit. */
+               "%*lu "          /* (25. rsslim) */
+               "%*lu "          /* (26. start_code) */
+               "%*lu "          /* (27. end_code) */
+               "%*lu "          /* (28. start_stack) */
+               "%*lu "          /* (29. esp) */
+               "%*lu "          /* (30. eip) */
+               "%*lu "          /* (31. pending signals) */
+               "%*lu "          /* (32. blocked signals) */
+               "%*lu "          /* (33. ignored signals) */
+               "%*lu "          /* (34. caught signals) */
+               "%*lu "          /* (35. whcan) */
+               "%*lu "          /* (36. always 0) */
+               "%*lu "          /* (37. always 0) */
+               "%*d "           /* (38. exit_signal) */
+               "%*d "           /* (39. task_cpu) */
+               "%*u "           /* (40. rt_priority) */
+               "%*u "           /* (41. policy) */
+               "%*llu "         /* (42. blkio_ticks) */
+               "%*lu "          /* (43. gtime) */
+               "%*ld"           /* (44. cgtime) */
+#endif
+               , raw->name, &ppid, &utime, &stime, &start_time, &vsize, &rss);
+    fclose(stream);
+    if (n != 7) {
+        VLOG_ERR_ONCE("%s: fscanf failed", file_name);
+        return false;
+    }
+
+    start_msec = get_boot_time() + ticks_to_ms(start_time);
+
+    raw->vsz = vsize / 1024;
+    raw->rss = rss * (getpagesize() / 1024);
+    raw->uptime = time_wall_msec() - start_msec;
+    raw->cputime = ticks_to_ms(utime + stime);
+    raw->ppid = ppid;
+
+    return true;
+}
+
+static int
+count_crashes(pid_t pid)
+{
+    char file_name[128];
+    const char *paren;
+    char line[128];
+    int crashes = 0;
+    FILE *stream;
+
+    assert(LINUX);
+
+    sprintf(file_name, "/proc/%lu/cmdline", (unsigned long int) pid);
+    stream = fopen(file_name, "r");
+    if (!stream) {
+        VLOG_WARN_ONCE("%s: open failed (%s)", file_name, strerror(errno));
+        goto exit;
+    }
+
+    if (!fgets(line, sizeof line, stream)) {
+        VLOG_WARN_ONCE("%s: read failed (%s)", file_name,
+                       feof(stream) ? "end of file" : strerror(errno));
+        goto exit_close;
+    }
+
+    paren = strchr(line, '(');
+    if (paren) {
+        int x;
+        if (sscanf(paren + 1, "%d", &x) == 1) {
+            crashes = x;
+        }
+    }
+
+exit_close:
+    fclose(stream);
+exit:
+    return crashes;
+}
+
+struct process_info {
+    unsigned long int vsz;      /* Virtual size, in kB. */
+    unsigned long int rss;      /* Resident set size, in kB. */
+    long long int booted;       /* ms since monitor started. */
+    int crashes;                /* # of crashes (usually 0). */
+    long long int uptime;       /* ms since last (re)started by monitor. */
+    long long int cputime;      /* ms of CPU used during 'uptime'. */
+};
+
+static bool
+get_process_info(pid_t pid, struct process_info *pinfo)
+{
+    struct raw_process_info child;
+
+    assert(LINUX);
+    if (!get_raw_process_info(pid, &child)) {
+        return false;
+    }
+
+    pinfo->vsz = child.vsz;
+    pinfo->rss = child.rss;
+    pinfo->booted = child.uptime;
+    pinfo->crashes = 0;
+    pinfo->uptime = child.uptime;
+    pinfo->cputime = child.cputime;
+
+    if (child.ppid) {
+        struct raw_process_info parent;
+
+        get_raw_process_info(child.ppid, &parent);
+        if (!strcmp(child.name, parent.name)) {
+            pinfo->booted = parent.uptime;
+            pinfo->crashes = count_crashes(child.ppid);
+        }
+    }
+
+    return true;
+}
+
+static void
+get_process_stats(struct shash *stats)
+{
+    struct dirent *de;
+    DIR *dir;
+
+    dir = opendir(ovs_rundir);
+    if (!dir) {
+        VLOG_ERR_ONCE("%s: open failed (%s)", ovs_rundir, strerror(errno));
+        return;
+    }
+
+    while ((de = readdir(dir)) != NULL) {
+        struct process_info pinfo;
+        char *key, *value;
+        char *file_name;
+        char *extension;
+        pid_t pid;
+
+#ifdef _DIRENT_HAVE_D_TYPE
+        if (de->d_type != DT_UNKNOWN && de->d_type != DT_REG) {
+            continue;
+        }
+#endif
+
+        extension = strrchr(de->d_name, '.');
+        if (!extension || strcmp(extension, ".pid")) {
+            continue;
+        }
+
+        file_name = xasprintf("%s/%s", ovs_rundir, de->d_name);
+        pid = read_pidfile(file_name);
+        free(file_name);
+        if (pid < 0 || kill(pid, 0)) {
+            continue;
+        }
+
+        key = xasprintf("process_%.*s",
+                        (int) (extension - de->d_name), de->d_name);
+        if (shash_find(stats, key)) {
+            free(key);
+            continue;
+        }
+
+        if (LINUX && get_process_info(pid, &pinfo)) {
+            value = xasprintf("%lu,%lu,%lld,%d,%lld,%lld",
+                              pinfo.vsz, pinfo.rss, pinfo.cputime,
+                              pinfo.crashes, pinfo.booted, pinfo.uptime);
+        } else {
+            value = xstrdup("");
+        }
+
+        shash_add_nocopy(stats, key, value);
+    }
+
+    closedir(dir);
+}
+
+static void
+get_filesys_stats(struct shash *stats OVS_UNUSED)
+{
+#if HAVE_SETMNTENT && HAVE_STATVFS
+    static const char file_name[] = "/etc/mtab";
+    struct mntent *me;
+    FILE *stream;
+    struct ds s;
+
+    stream = setmntent(file_name, "r");
+    if (!stream) {
+        VLOG_ERR_ONCE("%s: open failed (%s)", file_name, strerror(errno));
+        return;
+    }
+
+    ds_init(&s);
+    while ((me = getmntent(stream)) != NULL) {
+        unsigned long long int total, free;
+        struct statvfs vfs;
+        char *p;
+
+        /* Skip non-local and read-only filesystems. */
+        if (strncmp(me->mnt_fsname, "/dev", 4)
+            || !strstr(me->mnt_opts, "rw")) {
+            continue;
+        }
+
+        /* Given the mount point we can stat the file system. */
+        if (statvfs(me->mnt_dir, &vfs) && vfs.f_flag & ST_RDONLY) {
+            /* That's odd... */
+            continue;
+        }
+
+        /* Now format the data. */
+        if (s.length) {
+            ds_put_char(&s, ' ');
+        }
+        for (p = me->mnt_dir; *p != '\0'; p++) {
+            ds_put_char(&s, *p == ' ' || *p == ',' ? '_' : *p);
+        }
+        total = (unsigned long long int) vfs.f_frsize * vfs.f_blocks / 1024;
+        free = (unsigned long long int) vfs.f_frsize * vfs.f_bfree / 1024;
+        ds_put_format(&s, ",%llu,%llu", total, total - free);
+    }
+    endmntent(stream);
+
+    if (s.length) {
+        shash_add(stats, "file_systems", ds_steal_cstr(&s));
+    }
+    ds_destroy(&s);
+#endif  /* HAVE_SETMNTENT && HAVE_STATVFS */
+}
+
+void
+get_system_stats(struct shash *stats)
+{
+    get_cpu_cores(stats);
+    get_load_average(stats);
+    get_memory_stats(stats);
+    get_process_stats(stats);
+    get_filesys_stats(stats);
+}
diff --git a/vswitchd/system-stats.h b/vswitchd/system-stats.h
new file mode 100644
index 000000000..559be692d
--- /dev/null
+++ b/vswitchd/system-stats.h
@@ -0,0 +1,21 @@
+/* Copyright (c) 2010 Nicira Networks
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef VSWITCHD_SYSTEM_STATS
+#define VSWITCHD_SYSTEM_STATS 1
+
+void get_system_stats(struct shash *);
+
+#endif /* vswitchd/system-stats.h */
diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
index 07dd79fbc..d872fdc51 100644
--- a/vswitchd/vswitch.ovsschema
+++ b/vswitchd/vswitch.ovsschema
@@ -25,7 +25,7 @@
                             "refTable": "Capability"},
                   "min": 0, "max": "unlimited"}},
        "statistics": {
-         "type": {"key": "string", "value": "integer", "min": 0, "max": "unlimited"},
+         "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"},
          "ephemeral": true}},
      "maxRows": 1},
    "Capability": {
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index bb3f648fb..141c5fe53 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -1,3 +1,4 @@
+<?xml version="1.0" encoding="utf-8"?>
 <database title="Open vSwitch Configuration Database">
   <p>A database with this schema holds the configuration for one Open
     vSwitch daemon.  The root of the configuration for the daemon is
@@ -74,21 +75,133 @@
 
       <column name="statistics">
         <p>
-          Key-value pairs that report statistics about a running Open_vSwitch
-          daemon.  The current implementation updates these counters
-          periodically.  In the future, we plan to, instead, update them only
-          when they are queried (e.g. using an OVSDB <code>select</code>
-          operation) and perhaps at other times, but not on any regular
-          periodic basis.</p>
-        <p>
-          The currently defined key-value pairs are listed below.  Some Open
-          vSwitch implementations may not support some statistics, in which
-          case those key-value pairs are omitted.</p>
+          Key-value pairs that report statistics about a system running an Open
+          vSwitch.  These are updated periodically (currently, every 5
+          seconds).  Key-value pairs that cannot be determined or that do not
+          apply to a platform are omitted.
+        </p>
+
         <dl>
-          <dt><code>load-average</code></dt>
+          <dt><code>cpu</code></dt>
+          <dd>
+            <p>
+              Number of CPU processors, threads, or cores currently online and
+              available to the operating system on which Open vSwitch is
+              running, as an integer.  This may be less than the number
+              installed, if some are not online or if they are not available to
+              the operating system.
+            </p>
+            <p>
+              Open vSwitch userspace processes are not multithreaded, but the
+              Linux kernel-based datapath is.
+            </p>
+          </dd>
+
+          <dt><code>load_average</code></dt>
+          <dd>
+            <p>
+              A comma-separated list of three floating-point numbers,
+              representing the system load average over the last 1, 5, and 15
+              minutes, respectively.
+            </p>
+          </dd>
+
+          <dt><code>memory</code></dt>
+          <dd>
+            <p>
+              A comma-separated list of integers, each of which represents a
+              quantity of memory in kilobytes that describes the operating
+              system on which Open vSwitch is running.  In respective order,
+              these values are:
+            </p>
+
+            <ol>
+              <li>Total amount of RAM allocated to the OS.</li>
+              <li>RAM allocated to the OS that is in use.</li>
+              <li>RAM that can be flushed out to disk or otherwise discarded
+              if that space is needed for another purpose.  This number is
+              necessarily less than or equal to the previous value.</li>
+              <li>Total disk space allocated for swap.</li>
+              <li>Swap space currently in use.</li>
+            </ol>
+
+            <p>
+              On Linux, all five values can be determined and are included.  On
+              other operating systems, only the first two values can be
+              determined, so the list will only have two values.
+            </p>
+          </dd>
+
+          <dt><code>process_</code><var>name</var></dt>
           <dd>
-            System load average multiplied by 100 and rounded to the nearest
-            integer.</dd>
+            <p>
+              One such key-value pair will exist for each running Open vSwitch
+              daemon process, with <var>name</var> replaced by the daemon's
+              name (e.g. <code>process_ovs-vswitchd</code>).  The value is a
+              comma-separated list of integers.  The integers represent the
+              following, with memory measured in kilobytes and durations in
+              milliseconds:
+            </p>
+
+            <ol>
+              <li>The process's virtual memory size.</li>
+              <li>The process's resident set size.</li>
+              <li>The amount of user and system CPU time consumed by the
+              process.</li>
+              <li>The number of times that the process has crashed and been
+              automatically restarted by the monitor.</li>
+              <li>The duration since the process was started.</li>
+              <li>The duration for which the process has been running.</li>
+            </ol>
+
+            <p>
+              The interpretation of some of these values depends on whether the
+              process was started with the <option>--monitor</option>.  If it
+              was not, then the crash count will always be 0 and the two
+              durations will always be the same.  If <option>--monitor</option>
+              was given, then the crash count may be positive; if it is, the
+              latter duration is the amount of time since the most recent crash
+              and restart.
+            </p>
+
+            <p>
+              There will be one key-value pair for each file in Open vSwitch's
+              ``run directory'' (usually <code>/var/run/openvswitch</code>)
+              whose name ends in <code>.pid</code>, whose contents are a
+              process ID, and which is locked by a running process.  The
+              <var>name</var> is taken from the pidfile's name.
+            </p>
+
+            <p>
+              Currently Open vSwitch is only able to obtain all of the above
+              detail on Linux systems.  On other systems, the same key-value
+              pairs will be present but the values will always be the empty
+              string.
+            </p>
+          </dd>
+
+          <dt><code>file_systems</code></dt>
+          <dd>
+            <p>
+              A space-separated list of information on local, writable file
+              systems.  Each item in the list describes one file system and
+              consists in turn of a comma-separated list of the following:
+            </p>
+
+            <ol>
+              <li>Mount point, e.g. <code>/</code> or <code>/var/log</code>.
+              Any spaces or commas in the mount point are replaced by
+              underscores.</li>
+              <li>Total size, in kilobytes, as an integer.</li>
+              <li>Amount of storage in use, in kilobytes, as an integer.</li>
+            </ol>
+
+            <p>
+              This key-value pair is omitted if there are no local, writable
+              file systems or if Open vSwitch cannot obtain the needed
+              information.
+            </p>
+          </dd>
         </dl>
       </column>
     </group>
-- 
cgit v1.2.1


From cd0cd65fe3721aa575e5eec081783f8047eda545 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 23 Sep 2010 10:21:40 -0700
Subject: vswitchd: Rename IFACE_STATS_INTERVAL, iface_stats_timer.

These are more than interface stats now so the names should be more
generic.

Suggested-by: Justin Pettit <jpettit@nicira.com>
---
 vswitchd/bridge.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 77eb152ce..c86e4d5bc 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -189,10 +189,10 @@ static struct list all_bridges = LIST_INITIALIZER(&all_bridges);
 /* OVSDB IDL used to obtain configuration. */
 static struct ovsdb_idl *idl;
 
-/* Each time this timer expires, the bridge fetches statistics for every
- * interface and pushes them into the database. */
-#define IFACE_STATS_INTERVAL (5 * 1000) /* In milliseconds. */
-static long long int iface_stats_timer = LLONG_MIN;
+/* Each time this timer expires, the bridge fetches systems and interface
+ * statistics and pushes them into the database. */
+#define STATS_INTERVAL (5 * 1000) /* In milliseconds. */
+static long long int stats_timer = LLONG_MIN;
 
 static struct bridge *bridge_create(const struct ovsrec_bridge *br_cfg);
 static void bridge_destroy(struct bridge *);
@@ -307,7 +307,7 @@ bridge_configure_once(const struct ovsrec_open_vswitch *cfg)
     }
     already_configured_once = true;
 
-    iface_stats_timer = time_msec() + IFACE_STATS_INTERVAL;
+    stats_timer = time_msec() + STATS_INTERVAL;
 
     /* Get all the configured bridges' names from 'cfg' into 'bridge_names'. */
     svec_init(&bridge_names);
@@ -1166,8 +1166,8 @@ bridge_run(void)
     }
 #endif
 
-    /* Refresh interface stats if necessary. */
-    if (time_msec() >= iface_stats_timer) {
+    /* Refresh system and interface stats if necessary. */
+    if (time_msec() >= stats_timer) {
         if (cfg) {
             struct ovsdb_idl_txn *txn;
 
@@ -1190,7 +1190,7 @@ bridge_run(void)
             ovsdb_idl_txn_destroy(txn); /* XXX */
         }
 
-        iface_stats_timer = time_msec() + IFACE_STATS_INTERVAL;
+        stats_timer = time_msec() + STATS_INTERVAL;
     }
 }
 
@@ -1209,7 +1209,7 @@ bridge_wait(void)
         bond_wait(br);
     }
     ovsdb_idl_wait(idl);
-    poll_timer_wait_until(iface_stats_timer);
+    poll_timer_wait_until(stats_timer);
 }
 
 /* Forces 'br' to revalidate all of its flows.  This is appropriate when 'br''s
-- 
cgit v1.2.1


From fca426efdad5e21a55f4a106ce7b2f0d1c4a5875 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 23 Sep 2010 11:56:36 -0700
Subject: Add missing file to distribution.

---
 vswitchd/automake.mk | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vswitchd/automake.mk b/vswitchd/automake.mk
index 1a4b465f5..93c6f92ea 100644
--- a/vswitchd/automake.mk
+++ b/vswitchd/automake.mk
@@ -13,6 +13,7 @@ vswitchd_ovs_vswitchd_SOURCES = \
 	vswitchd/proc-net-compat.h \
 	vswitchd/ovs-vswitchd.c \
 	vswitchd/system-stats.c \
+	vswitchd/system-stats.h \
 	vswitchd/vswitch-idl.c \
 	vswitchd/vswitch-idl.h \
 	vswitchd/xenserver.c \
-- 
cgit v1.2.1


From 2b8e39ae83c28509accc56597da0fae1032aaaf1 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Wed, 22 Sep 2010 17:29:12 -0700
Subject: debian: Support two locations for ethtool

The ethtool binary recently moved from /usr/sbin to /sbin.  This
change falls back to /usr/sbin/ethtool if /sbin/ethtool does not
exist.
---
 debian/ovs-bugtool | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/debian/ovs-bugtool b/debian/ovs-bugtool
index f991f27a2..6f792eea7 100755
--- a/debian/ovs-bugtool
+++ b/debian/ovs-bugtool
@@ -114,7 +114,6 @@ CAT = '/bin/cat'
 DF = '/bin/df'
 DMESG = '/bin/dmesg'
 DMIDECODE = '/usr/sbin/dmidecode'
-ETHTOOL = '/sbin/ethtool'
 FDISK = '/sbin/fdisk'
 FIND = '/usr/bin/find'
 IFCONFIG = '/sbin/ifconfig'
@@ -136,6 +135,11 @@ TC = '/sbin/tc'
 UPTIME = '/usr/bin/uptime'
 ZCAT = '/bin/zcat'
 
+ETHTOOL = '/sbin/ethtool'
+# ETHTOOL recently moved from /usr/sbin to /sbin in debian
+if not os.path.isfile(ETHTOOL):
+    ETHTOOL = '/usr/sbin/ethtool'
+
 #
 # PII -- Personally identifiable information.  Of particular concern are
 # things that would identify customers, or their network topology.
-- 
cgit v1.2.1


From 69c675a07271d6b4b9bfd9ed49ac354f7af24613 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Tue, 21 Sep 2010 18:03:07 -0700
Subject: xenserver: Rename monitor-external-ids -> ovs-external-ids

Renamed the monitor-external-ids script ovs-external-ids.
Hopefully this will make it clearer who owns it when someone does
ps xa.

Also removed trailing whitespace from ovs-external-ids.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
---
 xenserver/README                                   |   2 +-
 xenserver/automake.mk                              |   2 +-
 xenserver/etc_init.d_openvswitch                   |  12 +-
 xenserver/openvswitch-xen.spec                     |   6 +-
 ..._share_openvswitch_scripts_monitor-external-ids | 268 ---------------------
 .../usr_share_openvswitch_scripts_ovs-external-ids | 268 +++++++++++++++++++++
 6 files changed, 279 insertions(+), 279 deletions(-)
 delete mode 100755 xenserver/usr_share_openvswitch_scripts_monitor-external-ids
 create mode 100755 xenserver/usr_share_openvswitch_scripts_ovs-external-ids

diff --git a/xenserver/README b/xenserver/README
index 941c4e1e7..7da3ac266 100644
--- a/xenserver/README
+++ b/xenserver/README
@@ -46,7 +46,7 @@ files are:
 
         Open vSwitch-aware replacement for Citrix script of the same name.
 
-    usr_share_openvswitch_scripts_monitor-external-ids
+    usr_share_openvswitch_scripts_ovs-external-ids
 
         Daemon to monitor the external_ids columns of the Bridge and
         Interface OVSDB tables.
diff --git a/xenserver/automake.mk b/xenserver/automake.mk
index 85911bb56..fc23a7636 100644
--- a/xenserver/automake.mk
+++ b/xenserver/automake.mk
@@ -24,7 +24,7 @@ EXTRA_DIST += \
 	xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py \
 	xenserver/usr_sbin_brctl \
 	xenserver/usr_sbin_xen-bugtool \
-	xenserver/usr_share_openvswitch_scripts_monitor-external-ids \
+	xenserver/usr_share_openvswitch_scripts_ovs-external-ids \
 	xenserver/usr_share_openvswitch_scripts_refresh-xs-network-uuids \
 	xenserver/usr_share_openvswitch_scripts_sysconfig.template \
 	xenserver/uuid.py
diff --git a/xenserver/etc_init.d_openvswitch b/xenserver/etc_init.d_openvswitch
index 282765323..e4641097d 100755
--- a/xenserver/etc_init.d_openvswitch
+++ b/xenserver/etc_init.d_openvswitch
@@ -118,8 +118,8 @@ else
 fi
 
 function hup_monitor_external_ids {
-    if [ -e /var/run/openvswitch/monitor-external-ids.pid ]; then
-        action "Configuring Open vSwitch external IDs" kill -HUP `cat /var/run/openvswitch/monitor-external-ids.pid`
+    if [ -e /var/run/openvswitch/ovs-external-ids.pid ]; then
+        action "Configuring Open vSwitch external IDs" kill -HUP `cat /var/run/openvswitch/ovs-external-ids.pid`
     fi
 }
 
@@ -352,7 +352,7 @@ function start {
 
     # Start daemon to monitor external ids
     PYTHONPATH=/usr/share/openvswitch/python \
-               /usr/share/openvswitch/scripts/monitor-external-ids \
+               /usr/share/openvswitch/scripts/ovs-external-ids \
                --pidfile --detach "$VSWITCHD_OVSDB_SERVER"
 
     touch /var/lock/subsys/openvswitch
@@ -362,8 +362,8 @@ function stop {
     stop_daemon BRCOMPATD "$brcompatd"
     stop_daemon VSWITCHD "$vswitchd"
     stop_daemon OVSDB_SERVER "$ovsdb_server"
-    if [ -e /var/run/openvswitch/monitor-external-ids.pid ]; then
-        kill `cat /var/run/openvswitch/monitor-external-ids.pid`
+    if [ -e /var/run/openvswitch/ovs-external-ids.pid ]; then
+        kill `cat /var/run/openvswitch/ovs-external-ids.pid`
     fi
     rm -f /var/lock/subsys/openvswitch
 }
@@ -387,7 +387,7 @@ case "$1" in
         ;;
     reload|force-reload)
         # Nothing to do to ovs-vswitchd and ovsdb-server as they keep their
-        # configuration up-to-date all the time.  HUP monitor-external-ids so it
+        # configuration up-to-date all the time.  HUP ovs-external-ids so it
         # re-runs.
         hup_monitor_external_ids
 	;;
diff --git a/xenserver/openvswitch-xen.spec b/xenserver/openvswitch-xen.spec
index e420ec69c..90fee2ab0 100644
--- a/xenserver/openvswitch-xen.spec
+++ b/xenserver/openvswitch-xen.spec
@@ -73,8 +73,8 @@ install -m 644 xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py \
              $RPM_BUILD_ROOT/usr/share/openvswitch/scripts/InterfaceReconfigureVswitch.py
 install -m 755 xenserver/etc_xensource_scripts_vif \
              $RPM_BUILD_ROOT/usr/share/openvswitch/scripts/vif
-install -m 755 xenserver/usr_share_openvswitch_scripts_monitor-external-ids \
-               $RPM_BUILD_ROOT/usr/share/openvswitch/scripts/monitor-external-ids
+install -m 755 xenserver/usr_share_openvswitch_scripts_ovs-external-ids \
+               $RPM_BUILD_ROOT/usr/share/openvswitch/scripts/ovs-external-ids
 install -m 755 xenserver/usr_share_openvswitch_scripts_refresh-xs-network-uuids \
                $RPM_BUILD_ROOT/usr/share/openvswitch/scripts/refresh-xs-network-uuids
 install -m 755 xenserver/usr_sbin_xen-bugtool \
@@ -392,7 +392,7 @@ fi
 /usr/share/openvswitch/python/ovs/timeval.py
 /usr/share/openvswitch/python/ovs/util.py
 /usr/share/openvswitch/python/uuid.py
-/usr/share/openvswitch/scripts/monitor-external-ids
+/usr/share/openvswitch/scripts/ovs-external-ids
 /usr/share/openvswitch/scripts/refresh-xs-network-uuids
 /usr/share/openvswitch/scripts/interface-reconfigure
 /usr/share/openvswitch/scripts/InterfaceReconfigure.py
diff --git a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids
deleted file mode 100755
index 2c2844c63..000000000
--- a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids
+++ /dev/null
@@ -1,268 +0,0 @@
-#!/usr/bin/python
-# Copyright (c) 2009, 2010 Nicira Networks
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# A daemon to monitor the external_ids columns of the Bridge and
-# Interface OVSDB tables.  Its primary responsibility is to set the
-# "bridge-id" and "iface-id" keys in the Bridge and Interface tables,
-# respectively.  It also looks for the use of "network-uuids" in the
-# Bridge table and duplicates its value to the preferred "xs-network-uuids".
-
-import getopt
-import os
-import signal
-import subprocess
-import sys
-import syslog
-import time
-
-import XenAPI
-
-from ovs.db import error
-from ovs.db import types
-import ovs.util
-import ovs.daemon
-import ovs.db.idl
-
-vsctl="/usr/bin/ovs-vsctl"
-session = None
-force_run = False
-
-# Set up a session to interact with XAPI.
-#
-# On system start-up, OVS comes up before XAPI, so we can't log into the
-# session until later.  Try to do this on-demand, since we won't
-# actually do anything interesting until XAPI is up.
-def init_session():
-    global session
-    if session is not None:
-        return True
-
-    try:
-        session = XenAPI.xapi_local()
-        session.xenapi.login_with_password("", "")
-    except:
-        session = None
-        syslog.syslog(syslog.LOG_WARNING, 
-                "monitor-external-ids: Couldn't login to XAPI")
-        return False
-
-    return True
-
-# By default, the "bridge-id" external id in the Bridge table is the 
-# same as "xs-network-uuids".  This may be overridden by defining a
-# "nicira-bridge-id" key in the "other_config" field of the network
-# record of XAPI.
-def get_bridge_id(br_name, default=None):
-    if not init_session():
-        return default
-
-    for n in session.xenapi.network.get_all():
-        rec = session.xenapi.network.get_record(n)
-        if rec['bridge'] != br_name:
-            continue
-        return rec['other_config'].get('nicira-bridge-id', default)
-
-# By default, the "iface-id" external id in the Interface table is the 
-# same as "xs-vif-uuid".  This may be overridden by defining a
-# "nicira-iface-id" key in the "other_config" field of the VIF
-# record of XAPI.
-def get_iface_id(if_name, default=None):
-    if not if_name.startswith("vif"):
-        return default
-
-    domain,device = if_name.strip("vif").split(".")
-
-    if not init_session():
-        return default
-
-    for n in session.xenapi.VM.get_all():
-        if session.xenapi.VM.get_domid(n) == domain:
-            vifs = session.xenapi.VM.get_VIFs(n)
-            for vif in vifs:
-                rec = session.xenapi.VIF.get_record(vif)
-                if rec['device'] == device:
-                    return rec['other_config'].get('nicira-iface-id', default)
-    return None
-
-
-def set_external_id(table, record, key, value):
-    col = 'external-ids:"' + key + '"="' + value + '"'
-    cmd = [vsctl, "--timeout=30", "-vANY:console:emer", "set", table, record, col]
-    exitcode = subprocess.call(cmd)
-    if exitcode != 0:
-        syslog.syslog(syslog.LOG_WARNING, 
-                "monitor-external-ids: Couldn't call ovs-vsctl")
-
-# XAPI on XenServer 5.6 uses the external-id "network-uuids" for internal
-# networks, but we now prefer "xs-network-uuids".  Look for its use and 
-# write our preferred external-id.
-def update_network_uuids(name, ids):
-    if ids["network-uuids"] and not ids["xs-network-uuids"]:
-        set_external_id("Bridge", name, "xs-network-uuids", 
-                ids["network-uuids"])
-
-def update_bridge_id(name, ids):
-    id = get_bridge_id(name, ids.get("xs-network-uuids"))
-
-    if not id:
-        return
-
-    primary_id = id.split(";")[0]
-
-    if ids.get("bridge-id") != primary_id:
-        set_external_id("Bridge", name, "bridge-id", primary_id)
-
-def update_iface_id(name, ids):
-    id = get_iface_id(name, ids.get("xs-vif-uuid"))
-    if ids.get("iface-id") != id and id:
-        set_external_id("Interface", name, "iface-id", id)
-
-def keep_table_columns(schema, table_name, column_types):
-    table = schema.tables.get(table_name)
-    if not table:
-        raise error.Error("schema has no %s table" % table_name)
-
-    new_columns = {}
-    for column_name, column_type in column_types.iteritems():
-        column = table.columns.get(column_name)
-        if not column:
-            raise error.Error("%s table schema lacks %s column"
-                              % (table_name, column_name))
-        if column.type != column_type:
-            raise error.Error("%s column in %s table has type \"%s\", "
-                              "expected type \"%s\""
-                              % (column_name, table_name,
-                                 column.type.toEnglish(),
-                                 column_type.toEnglish()))
-        new_columns[column_name] = column
-    table.columns = new_columns
-    return table
- 
-def monitor_uuid_schema_cb(schema):
-    string_type = types.Type(types.BaseType(types.StringType))
-    string_map_type = types.Type(types.BaseType(types.StringType),
-                                 types.BaseType(types.StringType),
-                                 0, sys.maxint)
- 
-    new_tables = {}
-    for table_name in ("Bridge", "Interface"):
-        new_tables[table_name] = keep_table_columns(
-            schema, table_name, {"name": string_type,
-                                 "external_ids": string_map_type})
-    schema.tables = new_tables
-
-def usage():
-    print "usage: %s [OPTIONS] DATABASE" % sys.argv[0]
-    print "where DATABASE is a socket on which ovsdb-server is listening."
-    ovs.daemon.usage()
-    print "Other options:"
-    print "  -h, --help               display this help message"
-    sys.exit(0)
-
-def handler(signum, frame):
-    global force_run
-    if (signum == signal.SIGHUP):
-        force_run = True
-
-def main(argv):
-    global force_run
-
-    try:
-        options, args = getopt.gnu_getopt(
-            argv[1:], 'h', ['help'] + ovs.daemon.LONG_OPTIONS)
-    except getopt.GetoptError, geo:
-        sys.stderr.write("%s: %s\n" % (ovs.util.PROGRAM_NAME, geo.msg))
-        sys.exit(1)
- 
-    for key, value in options:
-        if key in ['-h', '--help']:
-            usage()
-        elif not ovs.daemon.parse_opt(key, value):
-            sys.stderr.write("%s: unhandled option %s\n"
-                             % (ovs.util.PROGRAM_NAME, key))
-            sys.exit(1)
- 
-    if len(args) != 1:
-        sys.stderr.write("%s: exactly one nonoption argument is required "
-                         "(use --help for help)\n" % ovs.util.PROGRAM_NAME)
-        sys.exit(1)
-
-    ovs.daemon.die_if_already_running()
- 
-    remote = args[0]
-    idl = ovs.db.idl.Idl(remote, "Open_vSwitch", monitor_uuid_schema_cb)
-
-    ovs.daemon.daemonize()
-
-    # This daemon is usually started before XAPI, but to complete our
-    # tasks, we need it.  Wait here until it's up.
-    while not os.path.exists("/var/run/xapi_init_complete.cookie"):
-        time.sleep(1)
-
-    signal.signal(signal.SIGHUP, handler)
- 
-    bridges = {}
-    interfaces = {}
-    while True:
-        if not force_run and not idl.run():
-            poller = ovs.poller.Poller()
-            idl.wait(poller)
-            poller.block()
-            continue
-
-        if force_run:
-            bridges    = {}
-            interfaces = {}
-            force_run = False
-
-        new_bridges = {}
-        for rec in idl.data["Bridge"].itervalues():
-            name = rec.name.as_scalar()
-            xs_network_uuids = rec.external_ids.get("xs-network-uuids")
-            network_uuids = rec.external_ids.get("network-uuids")
-            new_bridges[name] = {"xs-network-uuids": xs_network_uuids,
-                                 "network-uuids": network_uuids}
- 
-        new_interfaces = {}
-        for rec in idl.data["Interface"].itervalues():
-            name = rec.name.as_scalar()
-            xs_vif_uuid = rec.external_ids.get("xs-vif-uuid")
-            new_interfaces[name] = {"xs-vif-uuid": xs_vif_uuid}
- 
-        if bridges != new_bridges:
-            for name,ids in new_bridges.items():
-                # Network uuids shouldn't change in the life of a bridge,
-                # so only check for "network-uuids" on creation.
-                if name not in bridges:
-                    update_network_uuids(name, ids)
-
-                if (name not in bridges) or (bridges[name] != ids):
-                    update_bridge_id(name, ids)
-
-            bridges = new_bridges
-
-        if interfaces != new_interfaces:
-            for name,ids in new_interfaces.items():
-                if (name not in interfaces) or (interfaces[name] != ids):
-                    update_iface_id(name, ids)
-            interfaces = new_interfaces
- 
-if __name__ == '__main__':
-    try:
-        main(sys.argv)
-    except error.Error, e:
-        sys.stderr.write("%s\n" % e)
-        sys.exit(1)
diff --git a/xenserver/usr_share_openvswitch_scripts_ovs-external-ids b/xenserver/usr_share_openvswitch_scripts_ovs-external-ids
new file mode 100755
index 000000000..72ecb403c
--- /dev/null
+++ b/xenserver/usr_share_openvswitch_scripts_ovs-external-ids
@@ -0,0 +1,268 @@
+#!/usr/bin/python
+# Copyright (c) 2009, 2010 Nicira Networks
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# A daemon to monitor the external_ids columns of the Bridge and
+# Interface OVSDB tables.  Its primary responsibility is to set the
+# "bridge-id" and "iface-id" keys in the Bridge and Interface tables,
+# respectively.  It also looks for the use of "network-uuids" in the
+# Bridge table and duplicates its value to the preferred "xs-network-uuids".
+
+import getopt
+import os
+import signal
+import subprocess
+import sys
+import syslog
+import time
+
+import XenAPI
+
+from ovs.db import error
+from ovs.db import types
+import ovs.util
+import ovs.daemon
+import ovs.db.idl
+
+vsctl="/usr/bin/ovs-vsctl"
+session = None
+force_run = False
+
+# Set up a session to interact with XAPI.
+#
+# On system start-up, OVS comes up before XAPI, so we can't log into the
+# session until later.  Try to do this on-demand, since we won't
+# actually do anything interesting until XAPI is up.
+def init_session():
+    global session
+    if session is not None:
+        return True
+
+    try:
+        session = XenAPI.xapi_local()
+        session.xenapi.login_with_password("", "")
+    except:
+        session = None
+        syslog.syslog(syslog.LOG_WARNING,
+                "ovs-external-ids: Couldn't login to XAPI")
+        return False
+
+    return True
+
+# By default, the "bridge-id" external id in the Bridge table is the
+# same as "xs-network-uuids".  This may be overridden by defining a
+# "nicira-bridge-id" key in the "other_config" field of the network
+# record of XAPI.
+def get_bridge_id(br_name, default=None):
+    if not init_session():
+        return default
+
+    for n in session.xenapi.network.get_all():
+        rec = session.xenapi.network.get_record(n)
+        if rec['bridge'] != br_name:
+            continue
+        return rec['other_config'].get('nicira-bridge-id', default)
+
+# By default, the "iface-id" external id in the Interface table is the
+# same as "xs-vif-uuid".  This may be overridden by defining a
+# "nicira-iface-id" key in the "other_config" field of the VIF
+# record of XAPI.
+def get_iface_id(if_name, default=None):
+    if not if_name.startswith("vif"):
+        return default
+
+    domain,device = if_name.strip("vif").split(".")
+
+    if not init_session():
+        return default
+
+    for n in session.xenapi.VM.get_all():
+        if session.xenapi.VM.get_domid(n) == domain:
+            vifs = session.xenapi.VM.get_VIFs(n)
+            for vif in vifs:
+                rec = session.xenapi.VIF.get_record(vif)
+                if rec['device'] == device:
+                    return rec['other_config'].get('nicira-iface-id', default)
+    return None
+
+
+def set_external_id(table, record, key, value):
+    col = 'external-ids:"' + key + '"="' + value + '"'
+    cmd = [vsctl, "--timeout=30", "-vANY:console:emer", "set", table, record, col]
+    exitcode = subprocess.call(cmd)
+    if exitcode != 0:
+        syslog.syslog(syslog.LOG_WARNING,
+                "ovs-external-ids: Couldn't call ovs-vsctl")
+
+# XAPI on XenServer 5.6 uses the external-id "network-uuids" for internal
+# networks, but we now prefer "xs-network-uuids".  Look for its use and
+# write our preferred external-id.
+def update_network_uuids(name, ids):
+    if ids["network-uuids"] and not ids["xs-network-uuids"]:
+        set_external_id("Bridge", name, "xs-network-uuids",
+                ids["network-uuids"])
+
+def update_bridge_id(name, ids):
+    id = get_bridge_id(name, ids.get("xs-network-uuids"))
+
+    if not id:
+        return
+
+    primary_id = id.split(";")[0]
+
+    if ids.get("bridge-id") != primary_id:
+        set_external_id("Bridge", name, "bridge-id", primary_id)
+
+def update_iface_id(name, ids):
+    id = get_iface_id(name, ids.get("xs-vif-uuid"))
+    if ids.get("iface-id") != id and id:
+        set_external_id("Interface", name, "iface-id", id)
+
+def keep_table_columns(schema, table_name, column_types):
+    table = schema.tables.get(table_name)
+    if not table:
+        raise error.Error("schema has no %s table" % table_name)
+
+    new_columns = {}
+    for column_name, column_type in column_types.iteritems():
+        column = table.columns.get(column_name)
+        if not column:
+            raise error.Error("%s table schema lacks %s column"
+                              % (table_name, column_name))
+        if column.type != column_type:
+            raise error.Error("%s column in %s table has type \"%s\", "
+                              "expected type \"%s\""
+                              % (column_name, table_name,
+                                 column.type.toEnglish(),
+                                 column_type.toEnglish()))
+        new_columns[column_name] = column
+    table.columns = new_columns
+    return table
+
+def monitor_uuid_schema_cb(schema):
+    string_type = types.Type(types.BaseType(types.StringType))
+    string_map_type = types.Type(types.BaseType(types.StringType),
+                                 types.BaseType(types.StringType),
+                                 0, sys.maxint)
+
+    new_tables = {}
+    for table_name in ("Bridge", "Interface"):
+        new_tables[table_name] = keep_table_columns(
+            schema, table_name, {"name": string_type,
+                                 "external_ids": string_map_type})
+    schema.tables = new_tables
+
+def usage():
+    print "usage: %s [OPTIONS] DATABASE" % sys.argv[0]
+    print "where DATABASE is a socket on which ovsdb-server is listening."
+    ovs.daemon.usage()
+    print "Other options:"
+    print "  -h, --help               display this help message"
+    sys.exit(0)
+
+def handler(signum, frame):
+    global force_run
+    if (signum == signal.SIGHUP):
+        force_run = True
+
+def main(argv):
+    global force_run
+
+    try:
+        options, args = getopt.gnu_getopt(
+            argv[1:], 'h', ['help'] + ovs.daemon.LONG_OPTIONS)
+    except getopt.GetoptError, geo:
+        sys.stderr.write("%s: %s\n" % (ovs.util.PROGRAM_NAME, geo.msg))
+        sys.exit(1)
+
+    for key, value in options:
+        if key in ['-h', '--help']:
+            usage()
+        elif not ovs.daemon.parse_opt(key, value):
+            sys.stderr.write("%s: unhandled option %s\n"
+                             % (ovs.util.PROGRAM_NAME, key))
+            sys.exit(1)
+
+    if len(args) != 1:
+        sys.stderr.write("%s: exactly one nonoption argument is required "
+                         "(use --help for help)\n" % ovs.util.PROGRAM_NAME)
+        sys.exit(1)
+
+    ovs.daemon.die_if_already_running()
+
+    remote = args[0]
+    idl = ovs.db.idl.Idl(remote, "Open_vSwitch", monitor_uuid_schema_cb)
+
+    ovs.daemon.daemonize()
+
+    # This daemon is usually started before XAPI, but to complete our
+    # tasks, we need it.  Wait here until it's up.
+    while not os.path.exists("/var/run/xapi_init_complete.cookie"):
+        time.sleep(1)
+
+    signal.signal(signal.SIGHUP, handler)
+
+    bridges = {}
+    interfaces = {}
+    while True:
+        if not force_run and not idl.run():
+            poller = ovs.poller.Poller()
+            idl.wait(poller)
+            poller.block()
+            continue
+
+        if force_run:
+            bridges    = {}
+            interfaces = {}
+            force_run  = False
+
+        new_bridges = {}
+        for rec in idl.data["Bridge"].itervalues():
+            name = rec.name.as_scalar()
+            xs_network_uuids = rec.external_ids.get("xs-network-uuids")
+            network_uuids = rec.external_ids.get("network-uuids")
+            new_bridges[name] = {"xs-network-uuids": xs_network_uuids,
+                                 "network-uuids": network_uuids}
+
+        new_interfaces = {}
+        for rec in idl.data["Interface"].itervalues():
+            name = rec.name.as_scalar()
+            xs_vif_uuid = rec.external_ids.get("xs-vif-uuid")
+            new_interfaces[name] = {"xs-vif-uuid": xs_vif_uuid}
+
+        if bridges != new_bridges:
+            for name,ids in new_bridges.items():
+                # Network uuids shouldn't change in the life of a bridge,
+                # so only check for "network-uuids" on creation.
+                if name not in bridges:
+                    update_network_uuids(name, ids)
+
+                if (name not in bridges) or (bridges[name] != ids):
+                    update_bridge_id(name, ids)
+
+            bridges = new_bridges
+
+        if interfaces != new_interfaces:
+            for name,ids in new_interfaces.items():
+                if (name not in interfaces) or (interfaces[name] != ids):
+                    update_iface_id(name, ids)
+            interfaces = new_interfaces
+
+if __name__ == '__main__':
+    try:
+        main(sys.argv)
+    except error.Error, e:
+        sys.stderr.write("%s\n" % e)
+        sys.exit(1)
-- 
cgit v1.2.1


From 9704460403a02554595b5a7ed17914a0b0357cac Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Thu, 23 Sep 2010 13:37:58 -0700
Subject: debian: Use DODTIME instead of DIETIME in init scripts

Brad pointed out that openvswitch-ipsec init script defined the variable
DIETIME but attempted to use it as DODTIME.  This commit uses DODTIME,
since it's the name used by the openvswitch-switch init script.  The
openvswitch-controller init script had the same issue.

As suggested by Ben, the "s" suffixes are removed from sleep commands,
since they are a GNU extension.

Reported-by: Brad Hall <brad@nicira.com>
---
 AUTHORS                            | 1 +
 debian/openvswitch-controller.init | 6 +++---
 debian/openvswitch-ipsec.init      | 6 +++---
 debian/openvswitch-switch.init     | 4 ++--
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/AUTHORS b/AUTHORS
index 8d2aed834..3d9ddd336 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -35,6 +35,7 @@ The following additional people are mentioned in commit logs as having
 provided helpful bug reports or suggestions.
 
 Alexey I. Froloff       raorn@altlinux.org
+Brad Hall               brad@nicira.com
 Brandon Heller          brandonh@stanford.edu
 Bryan Fulton            bryan@nicira.com
 Cedric Hobbs            cedric@nicira.com
diff --git a/debian/openvswitch-controller.init b/debian/openvswitch-controller.init
index d489869ed..4781f83f2 100755
--- a/debian/openvswitch-controller.init
+++ b/debian/openvswitch-controller.init
@@ -175,10 +175,10 @@ force_stop() {
     if running ; then
         kill -15 $pid
         # Is it really dead?
-        sleep "$DIETIME"s
+        sleep "$DODTIME"
         if running ; then
             kill -9 $pid
-            sleep "$DIETIME"s
+            sleep "$DODTIME"
             if running ; then
                 echo "Cannot kill $NAME (pid=$pid)!"
                 exit 1
@@ -237,7 +237,7 @@ case "$1" in
         log_daemon_msg "Restarting $DESC" "$NAME"
         stop_server
         # Wait some sensible amount, some server need this
-        [ -n "$DIETIME" ] && sleep $DIETIME
+        [ -n "$DODTIME" ] && sleep $DODTIME
         start_server
         running
         log_end_msg $?
diff --git a/debian/openvswitch-ipsec.init b/debian/openvswitch-ipsec.init
index f3c9a13a0..5f4240672 100755
--- a/debian/openvswitch-ipsec.init
+++ b/debian/openvswitch-ipsec.init
@@ -91,10 +91,10 @@ force_stop() {
     if running ; then
         kill -15 $pid
         # Is it really dead?
-        sleep "$DIETIME"s
+        sleep "$DODTIME"
         if running ; then
             kill -9 $pid
-            sleep "$DIETIME"s
+            sleep "$DODTIME"
             if running ; then
                 echo "Cannot kill $NAME (pid=$pid)!"
                 exit 1
@@ -153,7 +153,7 @@ case "$1" in
         log_daemon_msg "Restarting $NAME"
         stop_server
         # Wait some sensible amount, some server need this
-        [ -n "$DIETIME" ] && sleep $DIETIME
+        [ -n "$DODTIME" ] && sleep $DODTIME
         start_server
         running
         log_end_msg $?
diff --git a/debian/openvswitch-switch.init b/debian/openvswitch-switch.init
index 600c18564..0907cdf7c 100755
--- a/debian/openvswitch-switch.init
+++ b/debian/openvswitch-switch.init
@@ -93,10 +93,10 @@ force_stop() {
     [ ! -f "$pidfile" ] && return
     if running $name; then
         kill $pid
-        [ -n "$DODTIME" ] && sleep "$DODTIME"s
+        [ -n "$DODTIME" ] && sleep "$DODTIME"
         if running $name; then
             kill -KILL $pid
-            [ -n "$DODTIME" ] && sleep "$DODTIME"s
+            [ -n "$DODTIME" ] && sleep "$DODTIME"
             if running $name; then
                 echo "Cannot kill $name (pid=$pid)!"
                 exit 1
-- 
cgit v1.2.1


From d1984028446c334c33569127f6db98a423569f2d Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Thu, 23 Sep 2010 16:25:25 -0700
Subject: vswitch: Disable header-caching when IPsec is enabled

Header caching speeds up sending tunneled traffic by bypassing the Linux
IP stack.  This also causes it to bypass IPsec processing, which will
break connectivity.  This commit disables header caching when IPsec is
enabled.
---
 lib/netdev-tunnel.c  | 15 +++++++++++++++
 vswitchd/bridge.c    | 14 ++++++++++++++
 vswitchd/vswitch.xml |  4 +++-
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/lib/netdev-tunnel.c b/lib/netdev-tunnel.c
index fdc1d976a..079830e63 100644
--- a/lib/netdev-tunnel.c
+++ b/lib/netdev-tunnel.c
@@ -62,6 +62,8 @@ parse_config(const char *name, const char *type, const struct shash *args,
              struct tnl_port_config *config)
 {
     struct shash_node *node;
+    bool ipsec_ip_set = false;
+    bool ipsec_mech_set = false;
 
     memset(config, 0, sizeof *config);
 
@@ -126,11 +128,24 @@ parse_config(const char *name, const char *type, const struct shash *args,
             if (!strcmp(node->data, "false")) {
                 config->flags &= ~TNL_F_HDR_CACHE;
             }
+        } else if (!strcmp(node->name, "ipsec_local_ip")) {
+            ipsec_ip_set = true;
+        } else if (!strcmp(node->name, "ipsec_cert")
+                   || !strcmp(node->name, "ipsec_psk")) {
+            ipsec_mech_set = true;
         } else {
             VLOG_WARN("%s: unknown %s argument '%s'", name, type, node->name);
         }
     }
 
+    /* IPsec doesn't work when header caching is enabled.  Disable it if
+     * the IPsec local IP address and authentication mechanism have been
+     * defined. */
+    if (ipsec_ip_set && ipsec_mech_set) {
+        VLOG_INFO("%s: header caching disabled due to use of IPsec", name);
+        config->flags &= ~TNL_F_HDR_CACHE;
+    }
+
     if (!config->daddr) {
         VLOG_WARN("%s: %s type requires valid 'remote_ip' argument", name, type);
         return EINVAL;
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index c86e4d5bc..6c271fb7a 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -374,6 +374,20 @@ set_up_iface(const struct ovsrec_interface *iface_cfg, struct iface *iface,
                   xstrdup(iface_cfg->value_options[i]));
     }
 
+    /* Include 'other_config' keys in hash of netdev options.  The
+     * namespace of 'other_config' and 'options' must be disjoint.
+     * Prefer 'options' keys over 'other_config' keys. */
+    for (i = 0; i < iface_cfg->n_other_config; i++) {
+        char *value = xstrdup(iface_cfg->value_other_config[i]);
+        if (!shash_add_once(&options, iface_cfg->key_other_config[i],
+                            value)) {
+            VLOG_WARN("%s: \"other_config\" key %s conflicts with existing "
+                      "\"other_config\" or \"options\" entry...ignoring",
+                      iface_cfg->name, iface_cfg->key_other_config[i]);
+            free(value);
+        }
+    }
+
     if (create) {
         struct netdev_options netdev_options;
 
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 141c5fe53..242106d0d 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -661,7 +661,9 @@
 		bypass certain components of the IP stack (such as IP tables)
 		and it may be useful to disable it if these features are
 		required or as a debugging measure.  Default is enabled, set to
-		<code>false</code> to disable.</dd>
+		<code>false</code> to disable.  If IPsec is enabled through the
+		<ref column="other_config"/> parameters, header caching will be
+		automatically disabled.</dd>
             </dl>
           </dd>
           <dt><code>capwap</code></dt>
-- 
cgit v1.2.1


From c1fdab010642441ca14954383ab351fc5f38465a Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Thu, 23 Sep 2010 15:11:39 -0700
Subject: netdev: Cleanup minor comment formatting issues.

---
 lib/netdev.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/lib/netdev.c b/lib/netdev.c
index c1eb5d04f..d93cabe3b 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -285,13 +285,12 @@ create_device(struct netdev_options *options, struct netdev_dev **netdev_devp)
  * to the new network device, otherwise to null.
  *
  * If this is the first time the device has been opened, then create is called
- * before opening.  The device is  created using the given type and arguments.
+ * before opening.  The device is created using the given type and arguments.
  *
  * 'ethertype' may be a 16-bit Ethernet protocol value in host byte order to
  * capture frames of that type received on the device.  It may also be one of
  * the 'enum netdev_pseudo_ethertype' values to receive frames in one of those
  * categories. */
-
 int
 netdev_open(struct netdev_options *options, struct netdev **netdevp)
 {
@@ -465,8 +464,7 @@ netdev_enumerate(struct svec *svec)
  * be returned.
  *
  * Some network devices may not implement support for this function.  In such
- * cases this function will always return EOPNOTSUPP.
- */
+ * cases this function will always return EOPNOTSUPP. */
 int
 netdev_recv(struct netdev *netdev, struct ofpbuf *buffer)
 {
@@ -632,8 +630,7 @@ netdev_get_ifindex(const struct netdev *netdev)
  * passed-in values are set to 0.
  *
  * Some network devices may not implement support for this function.  In such
- * cases this function will always return EOPNOTSUPP.
- */
+ * cases this function will always return EOPNOTSUPP. */
 int
 netdev_get_features(struct netdev *netdev,
                     uint32_t *current, uint32_t *advertised,
@@ -718,8 +715,8 @@ netdev_set_advertisements(struct netdev *netdev, uint32_t advertise)
  *
  *   - EOPNOTSUPP: No IPv4 network stack attached to 'netdev'.
  *
- * 'address' or 'netmask' or both may be null, in which case the address or netmask
- * is not reported. */
+ * 'address' or 'netmask' or both may be null, in which case the address or 
+ * netmask is not reported. */
 int
 netdev_get_in4(const struct netdev *netdev,
                struct in_addr *address_, struct in_addr *netmask_)
@@ -1150,8 +1147,7 @@ netdev_get_queue(const struct netdev *netdev,
  * the current form of QoS (e.g. as returned by netdev_get_n_queues(netdev)).
  *
  * This function does not modify 'details', and the caller retains ownership of
- * it.
- */
+ * it. */
 int
 netdev_set_queue(struct netdev *netdev,
                  unsigned int queue_id, const struct shash *details)
@@ -1543,8 +1539,7 @@ netdev_monitor_remove(struct netdev_monitor *monitor, struct netdev *netdev)
  * sets '*devnamep' to the name of a device that has changed and returns 0.
  * The caller is responsible for freeing '*devnamep' (with free()).
  *
- * If no devices have changed, sets '*devnamep' to NULL and returns EAGAIN.
- */
+ * If no devices have changed, sets '*devnamep' to NULL and returns EAGAIN. */
 int
 netdev_monitor_poll(struct netdev_monitor *monitor, char **devnamep)
 {
-- 
cgit v1.2.1


From 9a3f4a496f9772b65562044f722536cd6270f698 Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Thu, 23 Sep 2010 16:32:13 -0700
Subject: vswitch: Consistently use spaces instead of tabs in vswitch.xml

---
 vswitchd/vswitch.xml | 96 ++++++++++++++++++++++++++--------------------------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 242106d0d..8b2221b83 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -655,15 +655,15 @@
             <dl>
               <dt><code>header_cache</code></dt>
               <dd>Optional.  Enable caching of tunnel headers and the output
-		path.  This can lead to a significant performance increase
-		without changing behavior.  In general it should not be
-		necessary to adjust this setting.  However, the caching can
-		bypass certain components of the IP stack (such as IP tables)
-		and it may be useful to disable it if these features are
-		required or as a debugging measure.  Default is enabled, set to
-		<code>false</code> to disable.  If IPsec is enabled through the
-		<ref column="other_config"/> parameters, header caching will be
-		automatically disabled.</dd>
+                path.  This can lead to a significant performance increase
+                without changing behavior.  In general it should not be
+                necessary to adjust this setting.  However, the caching can
+                bypass certain components of the IP stack (such as IP tables)
+                and it may be useful to disable it if these features are
+                required or as a debugging measure.  Default is enabled, set to
+                <code>false</code> to disable.  If IPsec is enabled through the
+                <ref column="other_config"/> parameters, header caching will be
+                automatically disabled.</dd>
             </dl>
           </dd>
           <dt><code>capwap</code></dt>
@@ -723,13 +723,13 @@
             <dl>
               <dt><code>header_cache</code></dt>
               <dd>Optional.  Enable caching of tunnel headers and the output
-		path.  This can lead to a significant performance increase
-		without changing behavior.  In general it should not be
-		necessary to adjust this setting.  However, the caching can
-		bypass certain components of the IP stack (such as IP tables)
-		and it may be useful to disable it if these features are
-		required or as a debugging measure.  Default is enabled, set to
-		<code>false</code> to disable.</dd>
+                path.  This can lead to a significant performance increase
+                without changing behavior.  In general it should not be
+                necessary to adjust this setting.  However, the caching can
+                bypass certain components of the IP stack (such as IP tables)
+                and it may be useful to disable it if these features are
+                required or as a debugging measure.  Default is enabled, set to
+                <code>false</code> to disable.</dd>
             </dl>
           </dd>
           <dt><code>patch</code></dt>
@@ -1300,34 +1300,34 @@
             restricted to the specified local IP address.
           </dd>
         </dl>
-	<p>When multiple controllers are configured for a single bridge, the
-	  <ref column="target"/> values must be unique.  Duplicate
-	  <ref column="target"/> values yield unspecified results.</p>
+        <p>When multiple controllers are configured for a single bridge, the
+          <ref column="target"/> values must be unique.  Duplicate
+          <ref column="target"/> values yield unspecified results.</p>
       </column>
 
       <column name="connection_mode">
-	<p>If it is specified, this setting must be one of the following
-	strings that describes how Open vSwitch contacts this OpenFlow
-	controller over the network:</p>
-
-	<dl>
-	  <dt><code>in-band</code></dt>
-	  <dd>In this mode, this controller's OpenFlow traffic travels over the
-	    bridge associated with the controller.  With this setting, Open
-	    vSwitch allows traffic to and from the controller regardless of the
-	    contents of the OpenFlow flow table.  (Otherwise, Open vSwitch
-	    would never be able to connect to the controller, because it did
-	    not have a flow to enable it.)  This is the most common connection
-	    mode because it is not necessary to maintain two independent
-	    networks.</dd>
-	  <dt><code>out-of-band</code></dt>
-	  <dd>In this mode, OpenFlow traffic uses a control network separate
-	    from the bridge associated with this controller, that is, the
-	    bridge does not use any of its own network devices to communicate
-	    with the controller.  The control network must be configured
-	    separately, before or after <code>ovs-vswitchd</code> is started.
-	  </dd>
-	</dl>
+        <p>If it is specified, this setting must be one of the following
+        strings that describes how Open vSwitch contacts this OpenFlow
+        controller over the network:</p>
+
+        <dl>
+          <dt><code>in-band</code></dt>
+          <dd>In this mode, this controller's OpenFlow traffic travels over the
+            bridge associated with the controller.  With this setting, Open
+            vSwitch allows traffic to and from the controller regardless of the
+            contents of the OpenFlow flow table.  (Otherwise, Open vSwitch
+            would never be able to connect to the controller, because it did
+            not have a flow to enable it.)  This is the most common connection
+            mode because it is not necessary to maintain two independent
+            networks.</dd>
+          <dt><code>out-of-band</code></dt>
+          <dd>In this mode, OpenFlow traffic uses a control network separate
+            from the bridge associated with this controller, that is, the
+            bridge does not use any of its own network devices to communicate
+            with the controller.  The control network must be configured
+            separately, before or after <code>ovs-vswitchd</code> is started.
+          </dd>
+        </dl>
 
         <p>If not specified, the default is implementation-specific.  If
           <ref column="target"/> is <code>discover</code>, the connection mode
@@ -1384,7 +1384,7 @@
 
     <group title="Additional Discovery Configuration">
       <p>These values are considered only when <ref column="target"/>
-	is <code>discover</code>.</p>
+        is <code>discover</code>.</p>
 
       <column name="discover_accept_regex">
         A POSIX
@@ -1406,14 +1406,14 @@
 
     <group title="Additional In-Band Configuration">
       <p>These values are considered only in in-band control mode (see
-	<ref column="connection_mode"/>) and only when <ref column="target"/>
-	is not <code>discover</code>.  (For controller discovery, the network
-	configuration obtained via DHCP is used instead.)</p>
+        <ref column="connection_mode"/>) and only when <ref column="target"/>
+        is not <code>discover</code>.  (For controller discovery, the network
+        configuration obtained via DHCP is used instead.)</p>
 
       <p>When multiple controllers are configured on a single bridge, there
-	should be only one set of unique values in these columns.  If different
-	values are set for these columns in different controllers, the effect
-	is unspecified.</p>
+        should be only one set of unique values in these columns.  If different
+        values are set for these columns in different controllers, the effect
+        is unspecified.</p>
 
       <column name="local_ip">
         The IP address to configure on the local port,
-- 
cgit v1.2.1


From 0d8568c13e6cfd34e0da977279f93beb26e33968 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Wed, 22 Sep 2010 15:38:17 -0700
Subject: xenserver: Added additional logging to ovs-external-ids

Added additional logging for debug-ability and migrated to python
logging module.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
---
 .../usr_share_openvswitch_scripts_ovs-external-ids | 28 ++++++++++++++++------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/xenserver/usr_share_openvswitch_scripts_ovs-external-ids b/xenserver/usr_share_openvswitch_scripts_ovs-external-ids
index 72ecb403c..c69fe3775 100755
--- a/xenserver/usr_share_openvswitch_scripts_ovs-external-ids
+++ b/xenserver/usr_share_openvswitch_scripts_ovs-external-ids
@@ -21,11 +21,11 @@
 # Bridge table and duplicates its value to the preferred "xs-network-uuids".
 
 import getopt
+import logging, logging.handlers
 import os
 import signal
 import subprocess
 import sys
-import syslog
 import time
 
 import XenAPI
@@ -36,6 +36,14 @@ import ovs.util
 import ovs.daemon
 import ovs.db.idl
 
+s_log     = logging.getLogger("ovs-external-ids")
+l_handler = logging.handlers.SysLogHandler(
+        "/dev/log",
+        facility=logging.handlers.SysLogHandler.LOG_DAEMON)
+l_formatter = logging.Formatter('%(filename)s: %(levelname)s: %(message)s')
+l_handler.setFormatter(l_formatter)
+s_log.addHandler(l_handler)
+
 vsctl="/usr/bin/ovs-vsctl"
 session = None
 force_run = False
@@ -55,8 +63,7 @@ def init_session():
         session.xenapi.login_with_password("", "")
     except:
         session = None
-        syslog.syslog(syslog.LOG_WARNING,
-                "ovs-external-ids: Couldn't login to XAPI")
+        s_log.warning("Couldn't login to XAPI")
         return False
 
     return True
@@ -67,6 +74,8 @@ def init_session():
 # record of XAPI.
 def get_bridge_id(br_name, default=None):
     if not init_session():
+        s_log.warning("Failed to get bridge id %s because"
+                " XAPI session could not be initialized" % br_name)
         return default
 
     for n in session.xenapi.network.get_all():
@@ -86,6 +95,8 @@ def get_iface_id(if_name, default=None):
     domain,device = if_name.strip("vif").split(".")
 
     if not init_session():
+        s_log.warning("Failed to get interface id %s because"
+                " XAPI session could not be initialized" % if_name)
         return default
 
     for n in session.xenapi.VM.get_all():
@@ -103,8 +114,7 @@ def set_external_id(table, record, key, value):
     cmd = [vsctl, "--timeout=30", "-vANY:console:emer", "set", table, record, col]
     exitcode = subprocess.call(cmd)
     if exitcode != 0:
-        syslog.syslog(syslog.LOG_WARNING,
-                "ovs-external-ids: Couldn't call ovs-vsctl")
+        s_log.warning("Couldn't call ovs-vsctl")
 
 # XAPI on XenServer 5.6 uses the external-id "network-uuids" for internal
 # networks, but we now prefer "xs-network-uuids".  Look for its use and
@@ -224,6 +234,7 @@ def main(argv):
             continue
 
         if force_run:
+            s_log.info("Forced to re-run as the result of a SIGHUP")
             bridges    = {}
             interfaces = {}
             force_run  = False
@@ -263,6 +274,9 @@ def main(argv):
 if __name__ == '__main__':
     try:
         main(sys.argv)
-    except error.Error, e:
-        sys.stderr.write("%s\n" % e)
+    except SystemExit:
+        # Let system.exit() calls complete normally
+        raise
+    except:
+        s_log.exception("traceback")
         sys.exit(1)
-- 
cgit v1.2.1


From 998bb652ecb494af8aa38ed0ae4e653cd8dff709 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Wed, 22 Sep 2010 12:40:39 -0700
Subject: xenserver: monitor-external-ids should run with --monitor

The init script starts monitor-external-ids with --monitor when
configured to do so.  Also made changes to guarantee that --monitor
actually restarts ovs-external-ids.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
---
 python/ovs/daemon.py                                     | 7 +++++++
 xenserver/etc_init.d_openvswitch                         | 2 +-
 xenserver/usr_share_openvswitch_scripts_ovs-external-ids | 2 +-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/python/ovs/daemon.py b/python/ovs/daemon.py
index eaaaa519b..6dff3a02f 100644
--- a/python/ovs/daemon.py
+++ b/python/ovs/daemon.py
@@ -52,6 +52,8 @@ _monitor = False
 # File descriptor used by daemonize_start() and daemonize_complete().
 _daemonize_fd = None
 
+RESTART_EXIT_CODE = 5
+
 def make_pidfile_name(name):
     """Returns the file name that would be used for a pidfile if 'name' were
     provided to set_pidfile()."""
@@ -266,6 +268,11 @@ def _fork_notify_startup(fd):
         os.close(fd)
 
 def _should_restart(status):
+    global RESTART_EXIT_CODE
+
+    if os.WIFEXITED(status) and os.WEXITSTATUS(status) == RESTART_EXIT_CODE:
+        return True
+
     if os.WIFSIGNALED(status):
         for signame in ("SIGABRT", "SIGALRM", "SIGBUS", "SIGFPE", "SIGILL",
                         "SIGPIPE", "SIGSEGV", "SIGXCPU", "SIGXFSZ"):
diff --git a/xenserver/etc_init.d_openvswitch b/xenserver/etc_init.d_openvswitch
index e4641097d..5f18196cf 100755
--- a/xenserver/etc_init.d_openvswitch
+++ b/xenserver/etc_init.d_openvswitch
@@ -353,7 +353,7 @@ function start {
     # Start daemon to monitor external ids
     PYTHONPATH=/usr/share/openvswitch/python \
                /usr/share/openvswitch/scripts/ovs-external-ids \
-               --pidfile --detach "$VSWITCHD_OVSDB_SERVER"
+               --pidfile --detach $monitor_opt "$VSWITCHD_OVSDB_SERVER"
 
     touch /var/lock/subsys/openvswitch
 }
diff --git a/xenserver/usr_share_openvswitch_scripts_ovs-external-ids b/xenserver/usr_share_openvswitch_scripts_ovs-external-ids
index c69fe3775..fc27aaae1 100755
--- a/xenserver/usr_share_openvswitch_scripts_ovs-external-ids
+++ b/xenserver/usr_share_openvswitch_scripts_ovs-external-ids
@@ -279,4 +279,4 @@ if __name__ == '__main__':
         raise
     except:
         s_log.exception("traceback")
-        sys.exit(1)
+        sys.exit(ovs.daemon.RESTART_EXIT_CODE)
-- 
cgit v1.2.1


From 9c64f2384d850658985d7e18003443c196e89ae1 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Wed, 22 Sep 2010 22:48:42 -0700
Subject: tests: Remove trailing whitespace from python daemon tests

Removes the trailing whitespace from the testing code related to
daemonizing in Python.
---
 tests/daemon-py.at   | 10 +++++-----
 tests/test-daemon.py |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/daemon-py.at b/tests/daemon-py.at
index 7ff376eb7..9e5f2793b 100644
--- a/tests/daemon-py.at
+++ b/tests/daemon-py.at
@@ -33,8 +33,8 @@ AT_CHECK([kill -0 `cat pid`], [0], [], [], [kill `cat parent`])
 AT_CHECK([ps -o ppid= -p `cat pid` > parentpid],
   [0], [], [], [kill `cat parent`])
 AT_CHECK(
-  [parentpid=`cat parentpid` && 
-   parent=`cat parent` && 
+  [parentpid=`cat parentpid` &&
+   parent=`cat parent` &&
    test $parentpid = $parent],
   [0], [], [], [kill `cat parent`])
 # Kill the daemon process, making it look like a segfault,
@@ -50,8 +50,8 @@ AT_CHECK([cp pid newpid], [0], [], [], [kill `cat parent`])
 AT_CHECK([ps -o ppid= -p `cat pid` > parentpid],
   [0], [], [], [kill `cat parent`])
 AT_CHECK(
-  [parentpid=`cat parentpid` && 
-   parent=`cat parent` && 
+  [parentpid=`cat parentpid` &&
+   parent=`cat parent` &&
    test $parentpid = $parent],
   [0], [], [], [kill `cat parent`])
 # Kill the daemon process with SIGTERM, and wait for the daemon
@@ -79,7 +79,7 @@ AT_CLEANUP
 
 AT_SETUP([daemon --detach --monitor - Python])
 AT_SKIP_IF([test $HAVE_PYTHON = no])
-m4_define([CHECK], 
+m4_define([CHECK],
   [AT_CHECK([$1], [$2], [$3], [$4], [kill `cat daemon monitor`])])
 AT_CAPTURE_FILE([daemon])
 AT_CAPTURE_FILE([olddaemon])
diff --git a/tests/test-daemon.py b/tests/test-daemon.py
index 3c757f308..98a516580 100644
--- a/tests/test-daemon.py
+++ b/tests/test-daemon.py
@@ -1,11 +1,11 @@
 # Copyright (c) 2010 Nicira Networks.
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
-# 
+#
 #     http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- 
cgit v1.2.1


From 6793129d369dee182d9260620d1db9188f09d986 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Wed, 22 Sep 2010 21:59:02 -0700
Subject: tests: Test that children restart with special exit code

Added a test which checks that the python daemon code properly
restarts child processes which exit with RESTART_EXIT_CODE.
---
 tests/daemon-py.at   | 43 +++++++++++++++++++++++++++++++++++++++++++
 tests/test-daemon.py | 15 ++++++++++++++-
 2 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/tests/daemon-py.at b/tests/daemon-py.at
index 9e5f2793b..9a2549c11 100644
--- a/tests/daemon-py.at
+++ b/tests/daemon-py.at
@@ -61,6 +61,49 @@ OVS_WAIT_WHILE([kill -0 `cat parent` || kill -0 `cat newpid` || test -e pid],
   [kill `cat parent`])
 AT_CLEANUP
 
+AT_SETUP([daemon --monitor restart exit code - Python])
+AT_SKIP_IF([test $HAVE_PYTHON = no])
+AT_CAPTURE_FILE([pid])
+AT_CAPTURE_FILE([parent])
+AT_CAPTURE_FILE([parentpid])
+AT_CAPTURE_FILE([newpid])
+# Start the daemon and wait for the pidfile to get created.
+AT_CHECK([$PYTHON $srcdir/test-daemon.py --pidfile-name=$PWD/pid --monitor& echo $! > parent], [0], [ignore], [ignore])
+OVS_WAIT_UNTIL([test -s pid], [kill `cat parent`])
+# Check that the pidfile names a running process,
+# and that the parent process of that process is our child process.
+AT_CHECK([kill -0 `cat pid`], [0], [], [], [kill `cat parent`])
+AT_CHECK([ps -o ppid= -p `cat pid` > parentpid],
+  [0], [], [], [kill `cat parent`])
+AT_CHECK(
+  [parentpid=`cat parentpid` &&
+   parent=`cat parent` &&
+   test $parentpid = $parent],
+  [0], [], [], [kill `cat parent`])
+# HUP the daemon process causing it to throw an exception,
+# and wait for a new child process to get spawned.
+AT_CHECK([cp pid oldpid], [0], [], [], [kill `cat parent`])
+AT_CHECK([kill -HUP `cat pid`], [0], [], [ignore], [kill `cat parent`])
+OVS_WAIT_WHILE([kill -0 `cat oldpid`], [kill `cat parent`])
+OVS_WAIT_UNTIL([test -s pid && test `cat pid` != `cat oldpid`],
+  [kill `cat parent`])
+AT_CHECK([cp pid newpid], [0], [], [], [kill `cat parent`])
+# Check that the pidfile names a running process,
+# and that the parent process of that process is our child process.
+AT_CHECK([ps -o ppid= -p `cat pid` > parentpid],
+  [0], [], [], [kill `cat parent`])
+AT_CHECK(
+  [parentpid=`cat parentpid` &&
+   parent=`cat parent` &&
+   test $parentpid = $parent],
+  [0], [], [], [kill `cat parent`])
+# Kill the daemon process with SIGTERM, and wait for the daemon
+# and the monitor processes to go away and the pidfile to get deleted.
+AT_CHECK([kill `cat pid`], [0], [], [ignore], [kill `cat parent`])
+OVS_WAIT_WHILE([kill -0 `cat parent` || kill -0 `cat newpid` || test -e pid],
+  [kill `cat parent`])
+AT_CLEANUP
+
 AT_SETUP([daemon --detach - Python])
 AT_SKIP_IF([test $HAVE_PYTHON = no])
 AT_CAPTURE_FILE([pid])
diff --git a/tests/test-daemon.py b/tests/test-daemon.py
index 98a516580..386445d44 100644
--- a/tests/test-daemon.py
+++ b/tests/test-daemon.py
@@ -13,13 +13,20 @@
 # limitations under the License.
 
 import getopt
+import signal
 import sys
 import time
 
 import ovs.daemon
 import ovs.util
 
+def handler(signum, frame):
+    raise Exception("Signal handler called with %d" % signum)
+
 def main(argv):
+
+    signal.signal(signal.SIGHUP, handler)
+
     try:
         options, args = getopt.gnu_getopt(
             argv[1:], 'b', ["bail", "help"] + ovs.daemon.LONG_OPTIONS)
@@ -63,4 +70,10 @@ Other options:
     sys.exit(0)
 
 if __name__ == '__main__':
-    main(sys.argv)
+    try:
+        main(sys.argv)
+    except SystemExit:
+        # Let system.exit() calls complete normally
+        raise
+    except:
+        sys.exit(ovs.daemon.RESTART_EXIT_CODE)
-- 
cgit v1.2.1


From 85d32f1feed0befba10dd9727a99d16b22b3e0b1 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Wed, 22 Sep 2010 23:45:58 -0700
Subject: debian: Added a manpage for ovs-bugtool

Added a manpage for the ovs-bugtool script.
---
 debian/automake.mk                |  1 +
 debian/openvswitch-common.install |  1 +
 debian/ovs-bugtool.8              | 46 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 48 insertions(+)
 create mode 100644 debian/ovs-bugtool.8

diff --git a/debian/automake.mk b/debian/automake.mk
index 20432062a..7c73831a2 100644
--- a/debian/automake.mk
+++ b/debian/automake.mk
@@ -42,6 +42,7 @@ EXTRA_DIST += \
 	debian/openvswitch-switch.postrm \
 	debian/openvswitch-switch.template \
 	debian/ovs-bugtool \
+	debian/ovs-bugtool.8 \
 	debian/ovs-monitor-ipsec \
 	debian/python-openvswitch.dirs \
 	debian/python-openvswitch.install \
diff --git a/debian/openvswitch-common.install b/debian/openvswitch-common.install
index 298f1ad0f..1733612ae 100644
--- a/debian/openvswitch-common.install
+++ b/debian/openvswitch-common.install
@@ -5,4 +5,5 @@ _debian/utilities/ovs-ofctl usr/sbin
 _debian/utilities/ovs-parse-leaks usr/bin
 _debian/utilities/ovs-pki usr/sbin
 debian/ovs-bugtool usr/sbin
+debian/ovs-bugtool.8 usr/share/man/man8
 vswitchd/vswitch.ovsschema usr/share/openvswitch
diff --git a/debian/ovs-bugtool.8 b/debian/ovs-bugtool.8
new file mode 100644
index 000000000..28147e460
--- /dev/null
+++ b/debian/ovs-bugtool.8
@@ -0,0 +1,46 @@
+.\" -*- nroff -*-
+.de IQ
+.  br
+.  ns
+.  IP "\\$1"
+..
+.TH ovs\-bugtool 8 "September 2010" "Open vSwitch" "Open vSwitch Manual"
+.\" This program's name:
+.ds PN ovs\-bugtool
+.
+.SH NAME
+ovs\-bugtool \- Open vSwitch
+.
+.SH SYNOPSIS
+.B ovs\-bugtool
+.
+.SH DESCRIPTION
+Generate a debug bundle with useful information about Open vSwitch on this
+system. The bundle is placed in /var/log/openvswitch.
+.
+.SH OPTIONS
+.
+.IP "\fB\-\-all\fR"
+use all available capabilities.
+.
+.IP "\fB\-\-capabilities\fR"
+list \fBovs\-bugtool\fR capabilities.
+.
+.IP "\fB\-\-debug\fR"
+print verbose debugging output.
+.
+.IP "\fB\-\-entries=\fIlist\fR\fR"
+use the capabilities specified in a comma separated list.
+.
+.IP "\fB\-\-output=\fIfiletype\fR\fR"
+generate a debug bundle with the specified file type.  Options include \fBtar\fR,
+\fBtar.bz2\fR, and \fBzip\fR.
+.
+.IP "\fB\-\-silent\fR"
+suppress output.
+.
+.IP "\fB\-\-unlimited\fR"
+do not exclude files which are too large.
+.
+.IP "\fB\-\-yestoall\fR"
+answer yes to all prompts.
-- 
cgit v1.2.1


From b0e62f3daca16a97541d33798e65894fcfa9623c Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Mon, 27 Sep 2010 18:33:56 -0700
Subject: debian: Create ovs-monitor-ipsec rundir if it doesn't exist

Reported-by: Sajjad Lateef <slateef@nicira.com>
---
 debian/openvswitch-ipsec.init | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/debian/openvswitch-ipsec.init b/debian/openvswitch-ipsec.init
index 5f4240672..ba82f5139 100755
--- a/debian/openvswitch-ipsec.init
+++ b/debian/openvswitch-ipsec.init
@@ -69,6 +69,10 @@ running() {
 }
 
 start_server() {
+    if [ ! -d /var/run/openvswitch ]; then
+        install -d -m 755 -o root -g root /var/run/openvswitch
+    fi
+
     PYTHONPATH=/usr/share/openvswitch/python \
            /usr/share/openvswitch/scripts/ovs-monitor-ipsec \
            --pidfile-name=$PIDFILE --detach --monitor \
-- 
cgit v1.2.1


From 55f8a832c181a047f06f57125a2c0da9c235e030 Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Mon, 27 Sep 2010 18:48:25 -0700
Subject: debian: Restart ovs-monitor-ipsec when --monitor specified

The OVS Python daemon library will restart a process when started with
"--monitor" that exits with RESTART_EXIT_CODE.  Have ovs-monitor-ipsec
exit with this code when an uncaught exception occurs.
---
 debian/ovs-monitor-ipsec | 1 +
 1 file changed, 1 insertion(+)

diff --git a/debian/ovs-monitor-ipsec b/debian/ovs-monitor-ipsec
index 1caece3a9..184b00469 100755
--- a/debian/ovs-monitor-ipsec
+++ b/debian/ovs-monitor-ipsec
@@ -347,3 +347,4 @@ if __name__ == '__main__':
         raise
     except:
         s_log.exception("traceback")
+        sys.exit(ovs.daemon.RESTART_EXIT_CODE)
-- 
cgit v1.2.1


From c84d769c147830be5aa99ed3e6bdc92af15abd5d Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Tue, 28 Sep 2010 11:58:00 -0700
Subject: debian: openvswitch-common ethtool should not be required

The ethtool package is only used to help ovs-bugtool create bug
reports and does not prevent the script from running if it's
missing. Making it a requirement for installation is a bit
draconian.
---
 debian/control | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/debian/control b/debian/control
index 622daeb3b..0efcf925b 100644
--- a/debian/control
+++ b/debian/control
@@ -24,7 +24,8 @@ Description: Source code for Open vSwitch datapath Linux module
 
 Package: openvswitch-common
 Architecture: any
-Depends: ${shlibs:Depends}, openssl, ${misc:Depends}, python, ethtool
+Depends: ${shlibs:Depends}, openssl, ${misc:Depends}, python
+Suggests: ethtool
 Description: Open vSwitch common components
  openvswitch-common provides components required by both openvswitch-switch
  and openvswitch-controller.
-- 
cgit v1.2.1


From 1998cd4d3ef151a57abf9a2e826659f28146362b Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 1 Sep 2010 13:41:09 -0700
Subject: ovs-vsctl: Factor out and optimize searching for a command by name.

The following commit will introduce a new function that wants to do this
a lot, so we might as well do it efficiently.
---
 utilities/ovs-vsctl.c | 100 +++++++++++++++++++++++++++++---------------------
 1 file changed, 58 insertions(+), 42 deletions(-)

diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c
index d09cf7460..24e001706 100644
--- a/utilities/ovs-vsctl.c
+++ b/utilities/ovs-vsctl.c
@@ -105,6 +105,7 @@ static void parse_options(int argc, char *argv[]);
 static struct vsctl_command *parse_commands(int argc, char *argv[],
                                             size_t *n_commandsp);
 static void parse_command(int argc, char *argv[], struct vsctl_command *);
+static const struct vsctl_command_syntax *find_command(const char *name);
 static void do_vsctl(const char *args,
                      struct vsctl_command *, size_t n_commands,
                      struct ovsdb_idl *);
@@ -295,6 +296,8 @@ static void
 parse_command(int argc, char *argv[], struct vsctl_command *command)
 {
     const struct vsctl_command_syntax *p;
+    struct shash_node *node;
+    int n_arg;
     int i;
 
     shash_init(&command->options);
@@ -325,58 +328,71 @@ parse_command(int argc, char *argv[], struct vsctl_command *command)
         vsctl_fatal("missing command name");
     }
 
-    for (p = all_commands; p->name; p++) {
-        if (!strcmp(p->name, argv[i])) {
-            struct shash_node *node;
-            int n_arg;
+    p = find_command(argv[i]);
+    if (!p) {
+        vsctl_fatal("unknown command '%s'; use --help for help", argv[i]);
+    }
 
-            SHASH_FOR_EACH (node, &command->options) {
-                const char *s = strstr(p->options, node->name);
-                int end = s ? s[strlen(node->name)] : EOF;
+    SHASH_FOR_EACH (node, &command->options) {
+        const char *s = strstr(p->options, node->name);
+        int end = s ? s[strlen(node->name)] : EOF;
 
-                if (end != '=' && end != ',' && end != ' ' && end != '\0') {
-                    vsctl_fatal("'%s' command has no '%s' option",
-                                argv[i], node->name);
-                }
-                if ((end == '=') != (node->data != NULL)) {
-                    if (end == '=') {
-                        vsctl_fatal("missing argument to '%s' option on '%s' "
-                                    "command", node->name, argv[i]);
-                    } else {
-                        vsctl_fatal("'%s' option on '%s' does not accept an "
-                                    "argument", node->name, argv[i]);
-                    }
-                }
+        if (end != '=' && end != ',' && end != ' ' && end != '\0') {
+            vsctl_fatal("'%s' command has no '%s' option",
+                        argv[i], node->name);
+        }
+        if ((end == '=') != (node->data != NULL)) {
+            if (end == '=') {
+                vsctl_fatal("missing argument to '%s' option on '%s' "
+                            "command", node->name, argv[i]);
+            } else {
+                vsctl_fatal("'%s' option on '%s' does not accept an "
+                            "argument", node->name, argv[i]);
             }
+        }
+    }
 
-            n_arg = argc - i - 1;
-            if (n_arg < p->min_args) {
-                vsctl_fatal("'%s' command requires at least %d arguments",
-                            p->name, p->min_args);
-            } else if (n_arg > p->max_args) {
-                int j;
-
-                for (j = i + 1; j < argc; j++) {
-                    if (argv[j][0] == '-') {
-                        vsctl_fatal("'%s' command takes at most %d arguments "
-                                    "(note that options must precede command "
-                                    "names and follow a \"--\" argument)",
-                                    p->name, p->max_args);
-                    }
-                }
+    n_arg = argc - i - 1;
+    if (n_arg < p->min_args) {
+        vsctl_fatal("'%s' command requires at least %d arguments",
+                    p->name, p->min_args);
+    } else if (n_arg > p->max_args) {
+        int j;
 
-                vsctl_fatal("'%s' command takes at most %d arguments",
+        for (j = i + 1; j < argc; j++) {
+            if (argv[j][0] == '-') {
+                vsctl_fatal("'%s' command takes at most %d arguments "
+                            "(note that options must precede command "
+                            "names and follow a \"--\" argument)",
                             p->name, p->max_args);
-            } else {
-                command->syntax = p;
-                command->argc = n_arg + 1;
-                command->argv = &argv[i];
-                return;
             }
         }
+
+        vsctl_fatal("'%s' command takes at most %d arguments",
+                    p->name, p->max_args);
+    }
+
+    command->syntax = p;
+    command->argc = n_arg + 1;
+    command->argv = &argv[i];
+}
+
+/* Returns the "struct vsctl_command_syntax" for a given command 'name', or a
+ * null pointer if there is none. */
+static const struct vsctl_command_syntax *
+find_command(const char *name)
+{
+    static struct shash commands = SHASH_INITIALIZER(&commands);
+
+    if (shash_is_empty(&commands)) {
+        const struct vsctl_command_syntax *p;
+
+        for (p = all_commands; p->name; p++) {
+            shash_add_assert(&commands, p->name, p);
+        }
     }
 
-    vsctl_fatal("unknown command '%s'; use --help for help", argv[i]);
+    return shash_find_data(&commands, name);
 }
 
 static void
-- 
cgit v1.2.1


From 0c18b5a065365e79d92d66532e7bdc41c4588f33 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 1 Sep 2010 13:42:33 -0700
Subject: ovs-vsctl: Reduce log level for "Called as..." for read-only
 invocations.

When ovs-vsctl is not actually going to modify the database, it is less
interesting in the log, so we might as well only log it at "debug" level.

Suggested-by: Neil McKee <neil.mckee@inmon.com>
---
 utilities/ovs-vsctl.c | 94 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 55 insertions(+), 39 deletions(-)

diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c
index 24e001706..940866449 100644
--- a/utilities/ovs-vsctl.c
+++ b/utilities/ovs-vsctl.c
@@ -59,6 +59,7 @@ struct vsctl_command_syntax {
     vsctl_handler_func *run;
     vsctl_handler_func *postprocess;
     const char *options;
+    enum { RO, RW } mode;       /* Does this command modify the database? */
 };
 
 struct vsctl_command {
@@ -101,6 +102,7 @@ static void vsctl_fatal(const char *, ...) PRINTF_FORMAT(1, 2) NO_RETURN;
 static char *default_db(void);
 static void usage(void) NO_RETURN;
 static void parse_options(int argc, char *argv[]);
+static bool might_write_to_db(char **argv);
 
 static struct vsctl_command *parse_commands(int argc, char *argv[],
                                             size_t *n_commandsp);
@@ -115,7 +117,6 @@ static void set_column(const struct vsctl_table_class *,
                        const struct ovsdb_idl_row *, const char *arg,
                        struct ovsdb_symbol_table *);
 
-
 int
 main(int argc, char *argv[])
 {
@@ -133,7 +134,7 @@ main(int argc, char *argv[])
 
     /* Log our arguments.  This is often valuable for debugging systems. */
     args = process_escape_args(argv);
-    VLOG_INFO("Called as %s", args);
+    VLOG(might_write_to_db(argv) ? VLL_INFO : VLL_DBG, "Called as %s", args);
 
     /* Parse command line. */
     parse_options(argc, argv);
@@ -510,6 +511,21 @@ default_db(void)
     }
     return def;
 }
+
+/* Returns true if it looks like this set of arguments might modify the
+ * database, otherwise false.  (Not very smart, so it's prone to false
+ * positives.) */
+static bool
+might_write_to_db(char **argv)
+{
+    for (; *argv; argv++) {
+        const struct vsctl_command_syntax *p = find_command(*argv);
+        if (p && p->mode == RW) {
+            return true;
+        }
+    }
+    return false;
+}
 
 struct vsctl_context {
     /* Read-only. */
@@ -2856,56 +2872,56 @@ try_again:
 
 static const struct vsctl_command_syntax all_commands[] = {
     /* Open vSwitch commands. */
-    {"init", 0, 0, cmd_init, NULL, ""},
+    {"init", 0, 0, cmd_init, NULL, "", RW},
 
     /* Bridge commands. */
-    {"add-br", 1, 3, cmd_add_br, NULL, "--may-exist"},
-    {"del-br", 1, 1, cmd_del_br, NULL, "--if-exists"},
-    {"list-br", 0, 0, cmd_list_br, NULL, ""},
-    {"br-exists", 1, 1, cmd_br_exists, NULL, ""},
-    {"br-to-vlan", 1, 1, cmd_br_to_vlan, NULL, ""},
-    {"br-to-parent", 1, 1, cmd_br_to_parent, NULL, ""},
-    {"br-set-external-id", 2, 3, cmd_br_set_external_id, NULL, ""},
-    {"br-get-external-id", 1, 2, cmd_br_get_external_id, NULL, ""},
+    {"add-br", 1, 3, cmd_add_br, NULL, "--may-exist", RW},
+    {"del-br", 1, 1, cmd_del_br, NULL, "--if-exists", RW},
+    {"list-br", 0, 0, cmd_list_br, NULL, "", RO},
+    {"br-exists", 1, 1, cmd_br_exists, NULL, "", RO},
+    {"br-to-vlan", 1, 1, cmd_br_to_vlan, NULL, "", RO},
+    {"br-to-parent", 1, 1, cmd_br_to_parent, NULL, "", RO},
+    {"br-set-external-id", 2, 3, cmd_br_set_external_id, NULL, "", RW},
+    {"br-get-external-id", 1, 2, cmd_br_get_external_id, NULL, "", RO},
 
     /* Port commands. */
-    {"list-ports", 1, 1, cmd_list_ports, NULL, ""},
-    {"add-port", 2, INT_MAX, cmd_add_port, NULL, "--may-exist"},
-    {"add-bond", 4, INT_MAX, cmd_add_bond, NULL, "--may-exist,--fake-iface"},
-    {"del-port", 1, 2, cmd_del_port, NULL, "--if-exists,--with-iface"},
-    {"port-to-br", 1, 1, cmd_port_to_br, NULL, ""},
+    {"list-ports", 1, 1, cmd_list_ports, NULL, "", RO},
+    {"add-port", 2, INT_MAX, cmd_add_port, NULL, "--may-exist", RW},
+    {"add-bond", 4, INT_MAX, cmd_add_bond, NULL, "--may-exist,--fake-iface", RW},
+    {"del-port", 1, 2, cmd_del_port, NULL, "--if-exists,--with-iface", RW},
+    {"port-to-br", 1, 1, cmd_port_to_br, NULL, "", RO},
 
     /* Interface commands. */
-    {"list-ifaces", 1, 1, cmd_list_ifaces, NULL, ""},
-    {"iface-to-br", 1, 1, cmd_iface_to_br, NULL, ""},
+    {"list-ifaces", 1, 1, cmd_list_ifaces, NULL, "", RO},
+    {"iface-to-br", 1, 1, cmd_iface_to_br, NULL, "", RO},
 
     /* Controller commands. */
-    {"get-controller", 1, 1, cmd_get_controller, NULL, ""},
-    {"del-controller", 1, 1, cmd_del_controller, NULL, ""},
-    {"set-controller", 1, INT_MAX, cmd_set_controller, NULL, ""},
-    {"get-fail-mode", 1, 1, cmd_get_fail_mode, NULL, ""},
-    {"del-fail-mode", 1, 1, cmd_del_fail_mode, NULL, ""},
-    {"set-fail-mode", 2, 2, cmd_set_fail_mode, NULL, ""},
+    {"get-controller", 1, 1, cmd_get_controller, NULL, "", RO},
+    {"del-controller", 1, 1, cmd_del_controller, NULL, "", RW},
+    {"set-controller", 1, INT_MAX, cmd_set_controller, NULL, "", RW},
+    {"get-fail-mode", 1, 1, cmd_get_fail_mode, NULL, "", RO},
+    {"del-fail-mode", 1, 1, cmd_del_fail_mode, NULL, "", RW},
+    {"set-fail-mode", 2, 2, cmd_set_fail_mode, NULL, "", RW},
 
     /* SSL commands. */
-    {"get-ssl", 0, 0, cmd_get_ssl, NULL, ""},
-    {"del-ssl", 0, 0, cmd_del_ssl, NULL, ""},
-    {"set-ssl", 3, 3, cmd_set_ssl, NULL, "--bootstrap"},
+    {"get-ssl", 0, 0, cmd_get_ssl, NULL, "", RO},
+    {"del-ssl", 0, 0, cmd_del_ssl, NULL, "", RW},
+    {"set-ssl", 3, 3, cmd_set_ssl, NULL, "--bootstrap", RW},
 
     /* Switch commands. */
-    {"emer-reset", 0, 0, cmd_emer_reset, NULL, ""},
+    {"emer-reset", 0, 0, cmd_emer_reset, NULL, "", RW},
 
     /* Parameter commands. */
-    {"get", 2, INT_MAX, cmd_get, NULL, "--if-exists,--id="},
-    {"list", 1, INT_MAX, cmd_list, NULL, ""},
-    {"set", 3, INT_MAX, cmd_set, NULL, ""},
-    {"add", 4, INT_MAX, cmd_add, NULL, ""},
-    {"remove", 4, INT_MAX, cmd_remove, NULL, ""},
-    {"clear", 3, INT_MAX, cmd_clear, NULL, ""},
-    {"create", 2, INT_MAX, cmd_create, post_create, "--id="},
-    {"destroy", 1, INT_MAX, cmd_destroy, NULL, "--if-exists"},
-    {"wait-until", 2, INT_MAX, cmd_wait_until, NULL, ""},
-
-    {NULL, 0, 0, NULL, NULL, NULL},
+    {"get", 2, INT_MAX, cmd_get, NULL, "--if-exists,--id=", RO},
+    {"list", 1, INT_MAX, cmd_list, NULL, "", RO},
+    {"set", 3, INT_MAX, cmd_set, NULL, "", RW},
+    {"add", 4, INT_MAX, cmd_add, NULL, "", RW},
+    {"remove", 4, INT_MAX, cmd_remove, NULL, "", RW},
+    {"clear", 3, INT_MAX, cmd_clear, NULL, "", RW},
+    {"create", 2, INT_MAX, cmd_create, post_create, "--id=", RW},
+    {"destroy", 1, INT_MAX, cmd_destroy, NULL, "--if-exists", RW},
+    {"wait-until", 2, INT_MAX, cmd_wait_until, NULL, "", RO},
+
+    {NULL, 0, 0, NULL, NULL, NULL, RO},
 };
 
-- 
cgit v1.2.1


From 93b13be8e65455ecf6e568e604cf76fdb20601c9 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Mon, 19 Jul 2010 11:21:59 -0700
Subject: netdev-linux: Use hash table instead of sparse array for QoS classes.

The main advantage of a sparse array over a hash table is that it can be
iterated in numerical order.  But the OVS implementation of sparse arrays
is quite expensive in terms of memory: on a 32-bit system, a sparse array
with exactly 1 nonnull element has 512 bytes of overhead.  In this case,
the sparse array's property of iteration in numerical order is not
important, so this commit converts it to a hash table to save memory.
---
 lib/netdev-linux.c | 171 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 106 insertions(+), 65 deletions(-)

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 7227f5dfc..eecff5034 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -48,6 +48,8 @@
 #include "coverage.h"
 #include "dynamic-string.h"
 #include "fatal-signal.h"
+#include "hash.h"
+#include "hmap.h"
 #include "netdev-provider.h"
 #include "netdev-vport.h"
 #include "netlink.h"
@@ -55,7 +57,6 @@
 #include "openflow/openflow.h"
 #include "packets.h"
 #include "poll-loop.h"
-#include "port-array.h"
 #include "rtnetlink.h"
 #include "socket-util.h"
 #include "shash.h"
@@ -102,19 +103,24 @@ struct tap_state {
 /* Traffic control. */
 
 /* An instance of a traffic control class.  Always associated with a particular
- * network device. */
+ * network device.
+ *
+ * Each TC implementation subclasses this with whatever additional data it
+ * needs. */
 struct tc {
     const struct tc_ops *ops;
+    struct hmap queues;         /* Contains "struct tc_queue"s.
+                                 * Read by generic TC layer.
+                                 * Written only by TC implementation. */
+};
 
-    /* Maps from queue ID to tc-specific data.
-     *
-     * The generic netdev TC layer uses this to the following extent: if an
-     * entry is nonnull, then the queue whose ID is the index is assumed to
-     * exist; if an entry is null, then that queue is assumed not to exist.
-     * Implementations must adhere to this scheme, although they may store
-     * whatever they like as data.
-     */
-    struct port_array queues;
+/* One traffic control queue.
+ *
+ * Each TC implementation subclasses this with whatever additional data it
+ * needs. */
+struct tc_queue {
+    struct hmap_node hmap_node; /* In struct tc's "queues" hmap. */
+    unsigned int queue_id;      /* OpenFlow queue ID. */
 };
 
 /* A particular kind of traffic control.  Each implementation generally maps to
@@ -204,8 +210,8 @@ struct tc_ops {
      */
     int (*qdisc_set)(struct netdev *, const struct shash *details);
 
-    /* Retrieves details of 'queue_id' on 'netdev->tc' into 'details'.  The
-     * caller ensures that 'queues' has a nonnull value for index 'queue_id.
+    /* Retrieves details of 'queue' on 'netdev->tc' into 'details'.  'queue' is
+     * one of the 'struct tc_queue's within 'netdev->tc->queues'.
      *
      * The contents of 'details' should be documented as valid for 'ovs_name'
      * in the "other_config" column in the "Queue" table in
@@ -217,7 +223,7 @@ struct tc_ops {
      *
      * This function may be null if 'tc' does not have queues ('n_queues' is
      * 0). */
-    int (*class_get)(const struct netdev *netdev, unsigned int queue_id,
+    int (*class_get)(const struct netdev *netdev, const struct tc_queue *queue,
                      struct shash *details);
 
     /* Configures or reconfigures 'queue_id' on 'netdev->tc' according to
@@ -234,21 +240,22 @@ struct tc_ops {
     int (*class_set)(struct netdev *, unsigned int queue_id,
                      const struct shash *details);
 
-    /* Deletes 'queue_id' from 'netdev->tc'.  The caller ensures that 'queues'
-     * has a nonnull value for index 'queue_id.
+    /* Deletes 'queue' from 'netdev->tc'.  'queue' is one of the 'struct
+     * tc_queue's within 'netdev->tc->queues'.
      *
      * This function may be null if 'tc' does not have queues or its queues
      * cannot be deleted. */
-    int (*class_delete)(struct netdev *, unsigned int queue_id);
+    int (*class_delete)(struct netdev *, struct tc_queue *queue);
 
-    /* Obtains stats for 'queue' from 'netdev->tc'.  The caller ensures that
-     * 'queues' has a nonnull value for index 'queue_id.
+    /* Obtains stats for 'queue' from 'netdev->tc'.  'queue' is one of the
+     * 'struct tc_queue's within 'netdev->tc->queues'.
      *
      * On success, initializes '*stats'.
      *
      * This function may be null if 'tc' does not have queues or if it cannot
      * report queue statistics. */
-    int (*class_get_stats)(const struct netdev *netdev, unsigned int queue_id,
+    int (*class_get_stats)(const struct netdev *netdev,
+                           const struct tc_queue *queue,
                            struct netdev_queue_stats *stats);
 
     /* Extracts queue stats from 'nlmsg', which is a response to a
@@ -265,13 +272,13 @@ static void
 tc_init(struct tc *tc, const struct tc_ops *ops)
 {
     tc->ops = ops;
-    port_array_init(&tc->queues);
+    hmap_init(&tc->queues);
 }
 
 static void
 tc_destroy(struct tc *tc)
 {
-    port_array_destroy(&tc->queues);
+    hmap_destroy(&tc->queues);
 }
 
 static const struct tc_ops tc_ops_htb;
@@ -1468,6 +1475,29 @@ tc_lookup_linux_name(const char *name)
     return NULL;
 }
 
+static struct tc_queue *
+tc_find_queue__(const struct netdev *netdev, unsigned int queue_id,
+                size_t hash)
+{
+    struct netdev_dev_linux *netdev_dev =
+                                netdev_dev_linux_cast(netdev_get_dev(netdev));
+    struct tc_queue *queue;
+
+    HMAP_FOR_EACH_IN_BUCKET (queue, struct tc_queue, hmap_node,
+                             hash, &netdev_dev->tc->queues) {
+        if (queue->queue_id == queue_id) {
+            return queue;
+        }
+    }
+    return NULL;
+}
+
+static struct tc_queue *
+tc_find_queue(const struct netdev *netdev, unsigned int queue_id)
+{
+    return tc_find_queue__(netdev, queue_id, hash_int(queue_id, 0));
+}
+
 static int
 netdev_linux_get_qos_capabilities(const struct netdev *netdev OVS_UNUSED,
                                   const char *type,
@@ -1548,12 +1578,12 @@ netdev_linux_get_queue(const struct netdev *netdev,
     error = tc_query_qdisc(netdev);
     if (error) {
         return error;
-    } else if (queue_id > UINT16_MAX
-               || !port_array_get(&netdev_dev->tc->queues, queue_id)) {
-        return ENOENT;
+    } else {
+        struct tc_queue *queue = tc_find_queue(netdev, queue_id);
+        return (queue
+                ? netdev_dev->tc->ops->class_get(netdev, queue, details)
+                : ENOENT);
     }
-
-    return netdev_dev->tc->ops->class_get(netdev, queue_id, details);
 }
 
 static int
@@ -1587,12 +1617,12 @@ netdev_linux_delete_queue(struct netdev *netdev, unsigned int queue_id)
         return error;
     } else if (!netdev_dev->tc->ops->class_delete) {
         return EINVAL;
-    } else if (queue_id > UINT16_MAX
-               || !port_array_get(&netdev_dev->tc->queues, queue_id)) {
-        return ENOENT;
+    } else {
+        struct tc_queue *queue = tc_find_queue(netdev, queue_id);
+        return (queue
+                ? netdev_dev->tc->ops->class_delete(netdev, queue)
+                : ENOENT);
     }
-
-    return netdev_dev->tc->ops->class_delete(netdev, queue_id);
 }
 
 static int
@@ -1607,14 +1637,14 @@ netdev_linux_get_queue_stats(const struct netdev *netdev,
     error = tc_query_qdisc(netdev);
     if (error) {
         return error;
-    } else if (queue_id > UINT16_MAX
-               || !port_array_get(&netdev_dev->tc->queues, queue_id)) {
-        return ENOENT;
     } else if (!netdev_dev->tc->ops->class_get_stats) {
         return EOPNOTSUPP;
+    } else {
+        const struct tc_queue *queue = tc_find_queue(netdev, queue_id);
+        return (queue
+                ? netdev_dev->tc->ops->class_get_stats(netdev, queue, stats)
+                : ENOENT);
     }
-
-    return netdev_dev->tc->ops->class_get_stats(netdev, queue_id, stats);
 }
 
 static void
@@ -1635,10 +1665,9 @@ netdev_linux_dump_queues(const struct netdev *netdev,
 {
     struct netdev_dev_linux *netdev_dev =
                                 netdev_dev_linux_cast(netdev_get_dev(netdev));
-    unsigned int queue_id;
+    struct tc_queue *queue;
     struct shash details;
     int last_error;
-    void *queue;
     int error;
 
     error = tc_query_qdisc(netdev);
@@ -1650,12 +1679,13 @@ netdev_linux_dump_queues(const struct netdev *netdev,
 
     last_error = 0;
     shash_init(&details);
-    PORT_ARRAY_FOR_EACH (queue, &netdev_dev->tc->queues, queue_id) {
+    HMAP_FOR_EACH (queue, struct tc_queue, hmap_node,
+                   &netdev_dev->tc->queues) {
         shash_clear(&details);
 
-        error = netdev_dev->tc->ops->class_get(netdev, queue_id, &details);
+        error = netdev_dev->tc->ops->class_get(netdev, queue, &details);
         if (!error) {
-            (*cb)(queue_id, &details, aux);
+            (*cb)(queue->queue_id, &details, aux);
         } else {
             last_error = error;
         }
@@ -2191,6 +2221,7 @@ struct htb {
 };
 
 struct htb_class {
+    struct tc_queue tc_queue;
     unsigned int min_rate;      /* In bytes/s. */
     unsigned int max_rate;      /* In bytes/s. */
     unsigned int burst;         /* In bytes. */
@@ -2454,19 +2485,35 @@ htb_tc_install(struct netdev *netdev, const struct shash *details)
     return error;
 }
 
+static struct htb_class *
+htb_class_cast__(const struct tc_queue *queue)
+{
+    return CONTAINER_OF(queue, struct htb_class, tc_queue);
+}
+
 static void
 htb_update_queue__(struct netdev *netdev, unsigned int queue_id,
                    const struct htb_class *hc)
 {
     struct htb *htb = htb_get__(netdev);
+    size_t hash = hash_int(queue_id, 0);
+    struct tc_queue *queue;
     struct htb_class *hcp;
 
-    hcp = port_array_get(&htb->tc.queues, queue_id);
-    if (!hcp) {
+    queue = tc_find_queue__(netdev, queue_id, hash);
+    if (queue) {
+        hcp = htb_class_cast__(queue);
+    } else {
         hcp = xmalloc(sizeof *hcp);
-        port_array_set(&htb->tc.queues, queue_id, hcp);
+        queue = &hcp->tc_queue;
+        queue->queue_id = queue_id;
+        hmap_insert(&htb->tc.queues, &queue->hmap_node, hash);
     }
-    *hcp = *hc;
+
+    hcp->min_rate = hc->min_rate;
+    hcp->max_rate = hc->max_rate;
+    hcp->burst = hc->burst;
+    hcp->priority = hc->priority;
 }
 
 static int
@@ -2502,10 +2549,11 @@ static void
 htb_tc_destroy(struct tc *tc)
 {
     struct htb *htb = CONTAINER_OF(tc, struct htb, tc);
-    unsigned int queue_id;
-    struct htb_class *hc;
+    struct htb_class *hc, *next;
 
-    PORT_ARRAY_FOR_EACH (hc, &htb->tc.queues, queue_id) {
+    HMAP_FOR_EACH_SAFE (hc, next, struct htb_class, tc_queue.hmap_node,
+                        &htb->tc.queues) {
+        hmap_remove(&htb->tc.queues, &hc->tc_queue.hmap_node);
         free(hc);
     }
     tc_destroy(tc);
@@ -2536,14 +2584,10 @@ htb_qdisc_set(struct netdev *netdev, const struct shash *details)
 }
 
 static int
-htb_class_get(const struct netdev *netdev, unsigned int queue_id,
-              struct shash *details)
+htb_class_get(const struct netdev *netdev OVS_UNUSED,
+              const struct tc_queue *queue, struct shash *details)
 {
-    const struct htb *htb = htb_get__(netdev);
-    const struct htb_class *hc;
-
-    hc = port_array_get(&htb->tc.queues, queue_id);
-    assert(hc != NULL);
+    const struct htb_class *hc = htb_class_cast__(queue);
 
     shash_add(details, "min-rate", xasprintf("%llu", 8ULL * hc->min_rate));
     if (hc->min_rate != hc->max_rate) {
@@ -2579,28 +2623,25 @@ htb_class_set(struct netdev *netdev, unsigned int queue_id,
 }
 
 static int
-htb_class_delete(struct netdev *netdev, unsigned int queue_id)
+htb_class_delete(struct netdev *netdev, struct tc_queue *queue)
 {
+    struct htb_class *hc = htb_class_cast__(queue);
     struct htb *htb = htb_get__(netdev);
-    struct htb_class *hc;
     int error;
 
-    hc = port_array_get(&htb->tc.queues, queue_id);
-    assert(hc != NULL);
-
-    error = tc_delete_class(netdev, tc_make_handle(1, queue_id + 1));
+    error = tc_delete_class(netdev, tc_make_handle(1, queue->queue_id + 1));
     if (!error) {
+        hmap_remove(&htb->tc.queues, &hc->tc_queue.hmap_node);
         free(hc);
-        port_array_delete(&htb->tc.queues, queue_id);
     }
     return error;
 }
 
 static int
-htb_class_get_stats(const struct netdev *netdev, unsigned int queue_id,
+htb_class_get_stats(const struct netdev *netdev, const struct tc_queue *queue,
                     struct netdev_queue_stats *stats)
 {
-    return htb_query_class__(netdev, tc_make_handle(1, queue_id + 1),
+    return htb_query_class__(netdev, tc_make_handle(1, queue->queue_id + 1),
                              tc_make_handle(1, 0xfffe), NULL, stats);
 }
 
-- 
cgit v1.2.1


From f3099647623f2b13ece56cf8cf31761c00c1c297 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Mon, 19 Jul 2010 11:22:10 -0700
Subject: hmap: New function hmap_clear().

---
 lib/hmap.c | 17 +++++++++++++++++
 lib/hmap.h |  1 +
 2 files changed, 18 insertions(+)

diff --git a/lib/hmap.c b/lib/hmap.c
index 1b4816d9d..6bc5ea74b 100644
--- a/lib/hmap.c
+++ b/lib/hmap.c
@@ -18,6 +18,7 @@
 #include "hmap.h"
 #include <assert.h>
 #include <stdint.h>
+#include <string.h>
 #include "coverage.h"
 #include "random.h"
 #include "util.h"
@@ -42,6 +43,22 @@ hmap_destroy(struct hmap *hmap)
     }
 }
 
+/* Removes all node from 'hmap', leaving it ready to accept more nodes.  Does
+ * not free memory allocated for 'hmap'.
+ *
+ * This function is appropriate when 'hmap' will soon have about as many
+ * elements as it before.  If 'hmap' will likely have fewer elements than
+ * before, use hmap_destroy() followed by hmap_clear() to save memory and
+ * iteration time. */
+void
+hmap_clear(struct hmap *hmap)
+{
+    if (hmap->n > 0) {
+        hmap->n = 0;
+        memset(hmap->buckets, 0, (hmap->mask + 1) * sizeof *hmap->buckets);
+    }
+}
+
 /* Exchanges hash maps 'a' and 'b'. */
 void
 hmap_swap(struct hmap *a, struct hmap *b)
diff --git a/lib/hmap.h b/lib/hmap.h
index d56749996..92aff7f99 100644
--- a/lib/hmap.h
+++ b/lib/hmap.h
@@ -69,6 +69,7 @@ struct hmap {
 /* Initialization. */
 void hmap_init(struct hmap *);
 void hmap_destroy(struct hmap *);
+void hmap_clear(struct hmap *);
 void hmap_swap(struct hmap *a, struct hmap *b);
 void hmap_moved(struct hmap *hmap);
 static inline size_t hmap_count(const struct hmap *);
-- 
cgit v1.2.1


From d9a8717a004310044b3157f4fb46e8fd00083a73 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Mon, 19 Jul 2010 11:23:05 -0700
Subject: bridge: Use hash table instead of sparse array for bridge ports.

The main advantage of a sparse array over a hash table is that it can be
iterated in numerical order.  But the OVS implementation of sparse arrays
is quite expensive in terms of memory: on a 32-bit system, a sparse array
with exactly 1 nonnull element has 512 bytes of overhead.  In this case,
the sparse array's property of iteration in numerical order is not
important, so this commit converts it to a hash table to save memory.
---
 vswitchd/bridge.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 6c271fb7a..d16f0c339 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -37,6 +37,7 @@
 #include "dynamic-string.h"
 #include "flow.h"
 #include "hash.h"
+#include "hmap.h"
 #include "jsonrpc.h"
 #include "list.h"
 #include "mac-learning.h"
@@ -49,7 +50,6 @@
 #include "ovsdb-data.h"
 #include "packets.h"
 #include "poll-loop.h"
-#include "port-array.h"
 #include "proc-net-compat.h"
 #include "process.h"
 #include "sha1.h"
@@ -85,6 +85,7 @@ struct iface {
 
     /* These members are valid only after bridge_reconfigure() causes them to
      * be initialized. */
+    struct hmap_node dp_ifidx_node; /* In struct bridge's "ifaces" hmap. */
     int dp_ifidx;               /* Index within kernel datapath. */
     struct netdev *netdev;      /* Network device. */
     bool enabled;               /* May be chosen for flows? */
@@ -165,7 +166,7 @@ struct bridge {
 
     /* Kernel datapath information. */
     struct dpif *dpif;          /* Datapath. */
-    struct port_array ifaces;   /* Indexed by kernel datapath port number. */
+    struct hmap ifaces;         /* Contains "struct iface"s. */
 
     /* Bridge ports. */
     struct port **ports;
@@ -1318,7 +1319,7 @@ bridge_create(const struct ovsrec_bridge *br_cfg)
     br->ml = mac_learning_create();
     eth_addr_nicira_random(br->default_ea);
 
-    port_array_init(&br->ifaces);
+    hmap_init(&br->ifaces);
 
     shash_init(&br->port_by_name);
     shash_init(&br->iface_by_name);
@@ -1350,7 +1351,7 @@ bridge_destroy(struct bridge *br)
         dpif_close(br->dpif);
         ofproto_destroy(br->ofproto);
         mac_learning_destroy(br->ml);
-        port_array_destroy(&br->ifaces);
+        hmap_destroy(&br->ifaces);
         shash_destroy(&br->port_by_name);
         shash_destroy(&br->iface_by_name);
         free(br->ports);
@@ -1755,7 +1756,7 @@ bridge_fetch_dp_ifaces(struct bridge *br)
             iface->dp_ifidx = -1;
         }
     }
-    port_array_clear(&br->ifaces);
+    hmap_clear(&br->ifaces);
 
     dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports);
     for (i = 0; i < n_dpif_ports; i++) {
@@ -1769,8 +1770,9 @@ bridge_fetch_dp_ifaces(struct bridge *br)
                 VLOG_WARN("%s reported interface %"PRIu16" twice",
                           dpif_name(br->dpif), p->port);
             } else {
-                port_array_set(&br->ifaces, p->port, iface);
                 iface->dp_ifidx = p->port;
+                hmap_insert(&br->ifaces, &iface->dp_ifidx_node,
+                            hash_int(iface->dp_ifidx, 0));
             }
 
             if (iface->cfg) {
@@ -3734,7 +3736,7 @@ iface_destroy(struct iface *iface)
         shash_find_and_delete_assert(&br->iface_by_name, iface->name);
 
         if (iface->dp_ifidx >= 0) {
-            port_array_set(&br->ifaces, iface->dp_ifidx, NULL);
+            hmap_remove(&br->ifaces, &iface->dp_ifidx_node);
         }
 
         del = port->ifaces[iface->port_ifidx] = port->ifaces[--port->n_ifaces];
@@ -3764,7 +3766,15 @@ iface_lookup(const struct bridge *br, const char *name)
 static struct iface *
 iface_from_dp_ifidx(const struct bridge *br, uint16_t dp_ifidx)
 {
-    return port_array_get(&br->ifaces, dp_ifidx);
+    struct iface *iface;
+
+    HMAP_FOR_EACH_IN_BUCKET (iface, struct iface, dp_ifidx_node,
+                             hash_int(dp_ifidx, 0), &br->ifaces) {
+        if (iface->dp_ifidx == dp_ifidx) {
+            return iface;
+        }
+    }
+    return NULL;
 }
 
 /* Returns true if 'iface' is the name of an "internal" interface on bridge
-- 
cgit v1.2.1


From 0cc96e48ab2fc573c3b7b69fe5a034bb29dde578 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Mon, 19 Jul 2010 11:43:05 -0700
Subject: sflow: Use hash table instead of sparse array for sflow ports.

The main advantage of a sparse array over a hash table is that it can be
iterated in numerical order.  But the OVS implementation of sparse arrays
is quite expensive in terms of memory: on a 32-bit system, a sparse array
with exactly 1 nonnull element has 512 bytes of overhead.  In this case,
the sparse array's property of iteration in numerical order is not
important, so this commit converts it to a hash table to save memory.
---
 ofproto/ofproto-sflow.c | 71 ++++++++++++++++++++++++++++++++++---------------
 1 file changed, 49 insertions(+), 22 deletions(-)

diff --git a/ofproto/ofproto-sflow.c b/ofproto/ofproto-sflow.c
index c74c7360e..076bd79c4 100644
--- a/ofproto/ofproto-sflow.c
+++ b/ofproto/ofproto-sflow.c
@@ -22,11 +22,12 @@
 #include "collectors.h"
 #include "dpif.h"
 #include "compiler.h"
+#include "hash.h"
+#include "hmap.h"
 #include "netdev.h"
 #include "ofpbuf.h"
 #include "ofproto.h"
 #include "poll-loop.h"
-#include "port-array.h"
 #include "sflow_api.h"
 #include "socket-util.h"
 #include "timeval.h"
@@ -35,8 +36,10 @@
 VLOG_DEFINE_THIS_MODULE(sflow)
 
 struct ofproto_sflow_port {
+    struct hmap_node hmap_node; /* In struct ofproto_sflow's "ports" hmap. */
     struct netdev *netdev;      /* Underlying network device, for stats. */
     SFLDataSource_instance dsi; /* sFlow library's notion of port number. */
+    uint16_t odp_port;          /* ODP port number. */
 };
 
 struct ofproto_sflow {
@@ -47,9 +50,12 @@ struct ofproto_sflow {
     struct dpif *dpif;
     time_t next_tick;
     size_t n_flood, n_all;
-    struct port_array ports;    /* Indexed by ODP port number. */
+    struct hmap ports;          /* Contains "struct ofproto_sflow_port"s. */
 };
 
+static void ofproto_sflow_del_port__(struct ofproto_sflow *,
+                                     struct ofproto_sflow_port *);
+
 #define RECEIVER_INDEX 1
 
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
@@ -129,6 +135,20 @@ sflow_agent_send_packet_cb(void *os_, SFLAgent *agent OVS_UNUSED,
     collectors_send(os->collectors, pkt, pktLen);
 }
 
+static struct ofproto_sflow_port *
+ofproto_sflow_find_port(const struct ofproto_sflow *os, uint16_t odp_port)
+{
+    struct ofproto_sflow_port *osp;
+
+    HMAP_FOR_EACH_IN_BUCKET (osp, struct ofproto_sflow_port, hmap_node,
+                             hash_int(odp_port, 0), &os->ports) {
+        if (osp->odp_port == odp_port) {
+            return osp;
+        }
+    }
+    return NULL;
+}
+
 static void
 sflow_agent_get_counters(void *os_, SFLPoller *poller,
                          SFL_COUNTERS_SAMPLE_TYPE *cs)
@@ -141,7 +161,7 @@ sflow_agent_get_counters(void *os_, SFLPoller *poller,
     enum netdev_flags flags;
     uint32_t current;
 
-    osp = port_array_get(&os->ports, poller->bridgePort);
+    osp = ofproto_sflow_find_port(os, poller->bridgePort);
     if (!osp) {
         return;
     }
@@ -266,7 +286,7 @@ ofproto_sflow_create(struct dpif *dpif)
     os = xcalloc(1, sizeof *os);
     os->dpif = dpif;
     os->next_tick = time_now() + 1;
-    port_array_init(&os->ports);
+    hmap_init(&os->ports);
     return os;
 }
 
@@ -274,14 +294,14 @@ void
 ofproto_sflow_destroy(struct ofproto_sflow *os)
 {
     if (os) {
-        struct ofproto_sflow_port *osp;
-        unsigned int odp_port;
+        struct ofproto_sflow_port *osp, *next;
 
         ofproto_sflow_clear(os);
-        PORT_ARRAY_FOR_EACH (osp, &os->ports, odp_port) {
-            ofproto_sflow_del_port(os, odp_port);
+        HMAP_FOR_EACH_SAFE (osp, next, struct ofproto_sflow_port, hmap_node,
+                            &os->ports) {
+            ofproto_sflow_del_port__(os, osp);
         }
-        port_array_destroy(&os->ports);
+        hmap_destroy(&os->ports);
         free(os);
     }
 }
@@ -334,7 +354,8 @@ ofproto_sflow_add_port(struct ofproto_sflow *os, uint16_t odp_port,
         ifindex = (os->sflow_agent->subId << 16) + odp_port;
     }
     SFL_DS_SET(osp->dsi, 0, ifindex, 0);
-    port_array_set(&os->ports, odp_port, osp);
+    osp->odp_port = odp_port;
+    hmap_insert(&os->ports, &osp->hmap_node, hash_int(odp_port, 0));
 
     /* Add poller and sampler. */
     if (os->sflow_agent) {
@@ -343,18 +364,25 @@ ofproto_sflow_add_port(struct ofproto_sflow *os, uint16_t odp_port,
     }
 }
 
+static void
+ofproto_sflow_del_port__(struct ofproto_sflow *os,
+                         struct ofproto_sflow_port *osp)
+{
+    if (os->sflow_agent) {
+        sfl_agent_removePoller(os->sflow_agent, &osp->dsi);
+        sfl_agent_removeSampler(os->sflow_agent, &osp->dsi);
+    }
+    netdev_close(osp->netdev);
+    hmap_remove(&os->ports, &osp->hmap_node);
+    free(osp);
+}
+
 void
 ofproto_sflow_del_port(struct ofproto_sflow *os, uint16_t odp_port)
 {
-    struct ofproto_sflow_port *osp = port_array_get(&os->ports, odp_port);
+    struct ofproto_sflow_port *osp = ofproto_sflow_find_port(os, odp_port);
     if (osp) {
-        if (os->sflow_agent) {
-            sfl_agent_removePoller(os->sflow_agent, &osp->dsi);
-            sfl_agent_removeSampler(os->sflow_agent, &osp->dsi);
-        }
-        netdev_close(osp->netdev);
-        free(osp);
-        port_array_delete(&os->ports, odp_port);
+        ofproto_sflow_del_port__(os, osp);
     }
 }
 
@@ -365,7 +393,6 @@ ofproto_sflow_set_options(struct ofproto_sflow *os,
     struct ofproto_sflow_port *osp;
     bool options_changed;
     SFLReceiver *receiver;
-    unsigned int odp_port;
     SFLAddress agentIP;
     time_t now;
 
@@ -436,8 +463,8 @@ ofproto_sflow_set_options(struct ofproto_sflow *os,
                                MAX(1, UINT32_MAX / options->sampling_rate));
 
     /* Add samplers and pollers for the currently known ports. */
-    PORT_ARRAY_FOR_EACH (osp, &os->ports, odp_port) {
-        ofproto_sflow_add_poller(os, osp, odp_port);
+    HMAP_FOR_EACH (osp, struct ofproto_sflow_port, hmap_node, &os->ports) {
+        ofproto_sflow_add_poller(os, osp, osp->odp_port);
         ofproto_sflow_add_sampler(os, osp);
     }
 }
@@ -446,7 +473,7 @@ static int
 ofproto_sflow_odp_port_to_ifindex(const struct ofproto_sflow *os,
                                   uint16_t odp_port)
 {
-    struct ofproto_sflow_port *osp = port_array_get(&os->ports, odp_port);
+    struct ofproto_sflow_port *osp = ofproto_sflow_find_port(os, odp_port);
     return osp ? SFL_DS_INDEX(osp->dsi) : 0;
 }
 
-- 
cgit v1.2.1


From ca0f572cfe87f284018e14fa7f1de58fbaef4c87 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Mon, 19 Jul 2010 12:57:22 -0700
Subject: ofproto: Use hash table instead of sparse array for ofports.

The main advantage of a sparse array over a hash table is that it can be
iterated in numerical order.  But the OVS implementation of sparse arrays
is quite expensive in terms of memory: on a 32-bit system, a sparse array
with exactly 1 nonnull element has 512 bytes of overhead.  In this case,
the sparse array's property of iteration in numerical order is not
important, so this commit converts it to a hash table to save memory.
---
 ofproto/ofproto.c | 111 +++++++++++++++++++++++++++++-------------------------
 1 file changed, 60 insertions(+), 51 deletions(-)

diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index e571bd4e2..68b2493de 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -30,6 +30,8 @@
 #include "dpif.h"
 #include "dynamic-string.h"
 #include "fail-open.h"
+#include "hash.h"
+#include "hmap.h"
 #include "in-band.h"
 #include "mac-learning.h"
 #include "netdev.h"
@@ -46,7 +48,6 @@
 #include "pinsched.h"
 #include "pktbuf.h"
 #include "poll-loop.h"
-#include "port-array.h"
 #include "rconn.h"
 #include "shash.h"
 #include "status.h"
@@ -68,9 +69,12 @@ enum {
     TABLEID_CLASSIFIER = 1
 };
 
+
 struct ofport {
+    struct hmap_node hmap_node; /* In struct ofproto's "ports" hmap. */
     struct netdev *netdev;
     struct ofp_phy_port opp;    /* In host byte order. */
+    uint16_t odp_port;
 };
 
 static void ofport_free(struct ofport *);
@@ -272,8 +276,7 @@ struct ofproto {
     /* Datapath. */
     struct dpif *dpif;
     struct netdev_monitor *netdev_monitor;
-    struct port_array ports;    /* Index is ODP port nr; ofport->opp.port_no is
-                                 * OFP port nr. */
+    struct hmap ports;          /* Contains "struct ofport"s. */
     struct shash port_by_name;
     uint32_t max_ports;
 
@@ -336,6 +339,7 @@ static void handle_openflow(struct ofconn *, struct ofproto *,
 
 static void refresh_port_groups(struct ofproto *);
 
+static struct ofport *get_port(const struct ofproto *, uint16_t odp_port);
 static void update_port(struct ofproto *, const char *devname);
 static int init_ports(struct ofproto *);
 static void reinit_ports(struct ofproto *);
@@ -388,7 +392,7 @@ ofproto_create(const char *datapath, const char *datapath_type,
     /* Initialize datapath. */
     p->dpif = dpif;
     p->netdev_monitor = netdev_monitor_create();
-    port_array_init(&p->ports);
+    hmap_init(&p->ports);
     shash_init(&p->port_by_name);
     p->max_ports = stats.max_ports;
 
@@ -883,12 +887,11 @@ ofproto_set_sflow(struct ofproto *ofproto,
     if (oso) {
         if (!os) {
             struct ofport *ofport;
-            unsigned int odp_port;
 
             os = ofproto->sflow = ofproto_sflow_create(ofproto->dpif);
             refresh_port_groups(ofproto);
-            PORT_ARRAY_FOR_EACH (ofport, &ofproto->ports, odp_port) {
-                ofproto_sflow_add_port(os, odp_port,
+            HMAP_FOR_EACH (ofport, struct ofport, hmap_node, &ofproto->ports) {
+                ofproto_sflow_add_port(os, ofport->odp_port,
                                        netdev_get_name(ofport->netdev));
             }
         }
@@ -932,8 +935,7 @@ ofproto_destroy(struct ofproto *p)
 {
     struct ofservice *ofservice, *next_ofservice;
     struct ofconn *ofconn, *next_ofconn;
-    struct ofport *ofport;
-    unsigned int port_no;
+    struct ofport *ofport, *next_ofport;
     size_t i;
 
     if (!p) {
@@ -959,7 +961,9 @@ ofproto_destroy(struct ofproto *p)
 
     dpif_close(p->dpif);
     netdev_monitor_destroy(p->netdev_monitor);
-    PORT_ARRAY_FOR_EACH (ofport, &p->ports, port_no) {
+    HMAP_FOR_EACH_SAFE (ofport, next_ofport, struct ofport, hmap_node,
+                        &p->ports) {
+        hmap_remove(&p->ports, &ofport->hmap_node);
         ofport_free(ofport);
     }
     shash_destroy(&p->port_by_name);
@@ -987,7 +991,7 @@ ofproto_destroy(struct ofproto *p)
     free(p->serial_desc);
     free(p->dp_desc);
 
-    port_array_destroy(&p->ports);
+    hmap_destroy(&p->ports);
 
     free(p);
 }
@@ -1343,13 +1347,12 @@ reinit_ports(struct ofproto *p)
 {
     struct svec devnames;
     struct ofport *ofport;
-    unsigned int port_no;
     struct odp_port *odp_ports;
     size_t n_odp_ports;
     size_t i;
 
     svec_init(&devnames);
-    PORT_ARRAY_FOR_EACH (ofport, &p->ports, port_no) {
+    HMAP_FOR_EACH (ofport, struct ofport, hmap_node, &p->ports) {
         svec_add (&devnames, (char *) ofport->opp.name);
     }
     dpif_port_list(p->dpif, &odp_ports, &n_odp_ports);
@@ -1371,15 +1374,14 @@ refresh_port_group(struct ofproto *p, unsigned int group)
     uint16_t *ports;
     size_t n_ports;
     struct ofport *port;
-    unsigned int port_no;
 
     assert(group == DP_GROUP_ALL || group == DP_GROUP_FLOOD);
 
-    ports = xmalloc(port_array_count(&p->ports) * sizeof *ports);
+    ports = xmalloc(hmap_count(&p->ports) * sizeof *ports);
     n_ports = 0;
-    PORT_ARRAY_FOR_EACH (port, &p->ports, port_no) {
+    HMAP_FOR_EACH (port, struct ofport, hmap_node, &p->ports) {
         if (group == DP_GROUP_ALL || !(port->opp.config & OFPPC_NO_FLOOD)) {
-            ports[n_ports++] = port_no;
+            ports[n_ports++] = port->odp_port;
         }
     }
     dpif_port_group_set(p->dpif, group, ports, n_ports);
@@ -1423,6 +1425,7 @@ make_ofport(const struct odp_port *odp_port)
 
     ofport = xmalloc(sizeof *ofport);
     ofport->netdev = netdev;
+    ofport->odp_port = odp_port->port;
     ofport->opp.port_no = odp_port_to_ofp_port(odp_port->port);
     netdev_get_etheraddr(netdev, ofport->opp.hw_addr);
     memcpy(ofport->opp.name, odp_port->devname,
@@ -1444,7 +1447,7 @@ make_ofport(const struct odp_port *odp_port)
 static bool
 ofport_conflicts(const struct ofproto *p, const struct odp_port *odp_port)
 {
-    if (port_array_get(&p->ports, odp_port->port)) {
+    if (get_port(p, odp_port->port)) {
         VLOG_WARN_RL(&rl, "ignoring duplicate port %"PRIu16" in datapath",
                      odp_port->port);
         return true;
@@ -1503,28 +1506,25 @@ send_port_status(struct ofproto *p, const struct ofport *ofport,
 static void
 ofport_install(struct ofproto *p, struct ofport *ofport)
 {
-    uint16_t odp_port = ofp_port_to_odp_port(ofport->opp.port_no);
     const char *netdev_name = (const char *) ofport->opp.name;
 
     netdev_monitor_add(p->netdev_monitor, ofport->netdev);
-    port_array_set(&p->ports, odp_port, ofport);
+    hmap_insert(&p->ports, &ofport->hmap_node, hash_int(ofport->odp_port, 0));
     shash_add(&p->port_by_name, netdev_name, ofport);
     if (p->sflow) {
-        ofproto_sflow_add_port(p->sflow, odp_port, netdev_name);
+        ofproto_sflow_add_port(p->sflow, ofport->odp_port, netdev_name);
     }
 }
 
 static void
 ofport_remove(struct ofproto *p, struct ofport *ofport)
 {
-    uint16_t odp_port = ofp_port_to_odp_port(ofport->opp.port_no);
-
     netdev_monitor_remove(p->netdev_monitor, ofport->netdev);
-    port_array_delete(&p->ports, odp_port);
+    hmap_remove(&p->ports, &ofport->hmap_node);
     shash_delete(&p->port_by_name,
                  shash_find(&p->port_by_name, (char *) ofport->opp.name));
     if (p->sflow) {
-        ofproto_sflow_del_port(p->sflow, odp_port);
+        ofproto_sflow_del_port(p->sflow, ofport->odp_port);
     }
 }
 
@@ -1537,6 +1537,20 @@ ofport_free(struct ofport *ofport)
     }
 }
 
+static struct ofport *
+get_port(const struct ofproto *ofproto, uint16_t odp_port)
+{
+    struct ofport *port;
+
+    HMAP_FOR_EACH_IN_BUCKET (port, struct ofport, hmap_node,
+                             hash_int(odp_port, 0), &ofproto->ports) {
+        if (port->odp_port == odp_port) {
+            return port;
+        }
+    }
+    return NULL;
+}
+
 static void
 update_port(struct ofproto *p, const char *devname)
 {
@@ -1564,7 +1578,7 @@ update_port(struct ofproto *p, const char *devname)
              * reliably but more portably by comparing the old port's MAC
              * against the new port's MAC.  However, this code isn't that smart
              * and always sends an OFPPR_MODIFY (XXX). */
-            old_ofport = port_array_get(&p->ports, odp_port.port);
+            old_ofport = get_port(p, odp_port.port);
         }
     } else if (error != ENOENT && error != ENODEV) {
         VLOG_WARN_RL(&rl, "dpif_port_query_by_name returned unexpected error "
@@ -2339,7 +2353,6 @@ handle_features_request(struct ofproto *p, struct ofconn *ofconn,
 {
     struct ofp_switch_features *osf;
     struct ofpbuf *buf;
-    unsigned int port_no;
     struct ofport *port;
 
     osf = make_openflow_xid(sizeof *osf, OFPT_FEATURES_REPLY, oh->xid, &buf);
@@ -2361,7 +2374,7 @@ handle_features_request(struct ofproto *p, struct ofconn *ofconn,
                          (1u << OFPAT_SET_TP_DST) |
                          (1u << OFPAT_ENQUEUE));
 
-    PORT_ARRAY_FOR_EACH (port, &p->ports, port_no) {
+    HMAP_FOR_EACH (port, struct ofport, hmap_node, &p->ports) {
         hton_ofp_phy_port(ofpbuf_put(buf, &port->opp, sizeof port->opp));
     }
 
@@ -2469,7 +2482,7 @@ static void do_xlate_actions(const union ofp_action *in, size_t n_in,
 static void
 add_output_action(struct action_xlate_ctx *ctx, uint16_t port)
 {
-    const struct ofport *ofport = port_array_get(&ctx->ofproto->ports, port);
+    const struct ofport *ofport = get_port(ctx->ofproto, port);
 
     if (ofport) {
         if (ofport->opp.config & OFPPC_NO_FWD) {
@@ -2699,7 +2712,7 @@ do_xlate_actions(const union ofp_action *in, size_t n_in,
     const union ofp_action *ia;
     const struct ofport *port;
 
-    port = port_array_get(&ctx->ofproto->ports, ctx->flow.in_port);
+    port = get_port(ctx->ofproto, ctx->flow.in_port);
     if (port && port->opp.config & (OFPPC_NO_RECV | OFPPC_NO_RECV_STP) &&
         port->opp.config & (eth_addr_equals(ctx->flow.dl_dst, eth_addr_stp)
                             ? OFPPC_NO_RECV_STP : OFPPC_NO_RECV)) {
@@ -2948,8 +2961,7 @@ handle_port_mod(struct ofproto *p, struct ofconn *ofconn,
     }
     opm = (struct ofp_port_mod *) oh;
 
-    port = port_array_get(&p->ports,
-                          ofp_port_to_odp_port(ntohs(opm->port_no)));
+    port = get_port(p, ofp_port_to_odp_port(ntohs(opm->port_no)));
     if (!port) {
         return ofp_mkerr(OFPET_PORT_MOD_FAILED, OFPPMFC_BAD_PORT);
     } else if (memcmp(port->opp.hw_addr, opm->hw_addr, OFP_ETH_ALEN)) {
@@ -3074,7 +3086,7 @@ handle_table_stats_request(struct ofproto *p, struct ofconn *ofconn,
 }
 
 static void
-append_port_stat(struct ofport *port, uint16_t port_no, struct ofconn *ofconn,
+append_port_stat(struct ofport *port, struct ofconn *ofconn,
                  struct ofpbuf **msgp)
 {
     struct netdev_stats stats;
@@ -3086,7 +3098,7 @@ append_port_stat(struct ofport *port, uint16_t port_no, struct ofconn *ofconn,
     netdev_get_stats(port->netdev, &stats);
 
     ops = append_stats_reply(sizeof *ops, ofconn, msgp);
-    ops->port_no = htons(odp_port_to_ofp_port(port_no));
+    ops->port_no = htons(port->opp.port_no);
     memset(ops->pad, 0, sizeof ops->pad);
     ops->rx_packets = htonll(stats.rx_packets);
     ops->tx_packets = htonll(stats.tx_packets);
@@ -3111,7 +3123,6 @@ handle_port_stats_request(struct ofproto *p, struct ofconn *ofconn,
     struct ofp_port_stats *ops;
     struct ofpbuf *msg;
     struct ofport *port;
-    unsigned int port_no;
 
     if (arg_size != sizeof *psr) {
         return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LEN);
@@ -3120,14 +3131,13 @@ handle_port_stats_request(struct ofproto *p, struct ofconn *ofconn,
 
     msg = start_stats_reply(osr, sizeof *ops * 16);
     if (psr->port_no != htons(OFPP_NONE)) {
-        port = port_array_get(&p->ports,
-                ofp_port_to_odp_port(ntohs(psr->port_no)));
+        port = get_port(p, ofp_port_to_odp_port(ntohs(psr->port_no)));
         if (port) {
-            append_port_stat(port, ntohs(psr->port_no), ofconn, &msg);
+            append_port_stat(port, ofconn, &msg);
         }
     } else {
-        PORT_ARRAY_FOR_EACH (port, &p->ports, port_no) {
-            append_port_stat(port, port_no, ofconn, &msg);
+        HMAP_FOR_EACH (port, struct ofport, hmap_node, &p->ports) {
+            append_port_stat(port, ofconn, &msg);
         }
     }
 
@@ -3389,8 +3399,8 @@ handle_aggregate_stats_request(struct ofproto *p, struct ofconn *ofconn,
 
 struct queue_stats_cbdata {
     struct ofconn *ofconn;
+    struct ofport *ofport;
     struct ofpbuf *msg;
-    uint16_t port_no;
 };
 
 static void
@@ -3400,7 +3410,7 @@ put_queue_stats(struct queue_stats_cbdata *cbdata, uint32_t queue_id,
     struct ofp_queue_stats *reply;
 
     reply = append_stats_reply(sizeof *reply, cbdata->ofconn, &cbdata->msg);
-    reply->port_no = htons(cbdata->port_no);
+    reply->port_no = htons(cbdata->ofport->opp.port_no);
     memset(reply->pad, 0, sizeof reply->pad);
     reply->queue_id = htonl(queue_id);
     reply->tx_bytes = htonll(stats->tx_bytes);
@@ -3419,11 +3429,10 @@ handle_queue_stats_dump_cb(uint32_t queue_id,
 }
 
 static void
-handle_queue_stats_for_port(struct ofport *port, uint16_t port_no,
-                            uint32_t queue_id,
+handle_queue_stats_for_port(struct ofport *port, uint32_t queue_id,
                             struct queue_stats_cbdata *cbdata)
 {
-    cbdata->port_no = port_no;
+    cbdata->ofport = port;
     if (queue_id == OFPQ_ALL) {
         netdev_dump_queue_stats(port->netdev,
                                 handle_queue_stats_dump_cb, cbdata);
@@ -3459,13 +3468,13 @@ handle_queue_stats_request(struct ofproto *ofproto, struct ofconn *ofconn,
     port_no = ntohs(qsr->port_no);
     queue_id = ntohl(qsr->queue_id);
     if (port_no == OFPP_ALL) {
-        PORT_ARRAY_FOR_EACH (port, &ofproto->ports, port_no) {
-            handle_queue_stats_for_port(port, port_no, queue_id, &cbdata);
+        HMAP_FOR_EACH (port, struct ofport, hmap_node, &ofproto->ports) {
+            handle_queue_stats_for_port(port, queue_id, &cbdata);
         }
     } else if (port_no < ofproto->max_ports) {
-        port = port_array_get(&ofproto->ports, port_no);
+        port = get_port(ofproto, ofp_port_to_odp_port(port_no));
         if (port) {
-            handle_queue_stats_for_port(port, port_no, queue_id, &cbdata);
+            handle_queue_stats_for_port(port, queue_id, &cbdata);
         }
     } else {
         ofpbuf_delete(cbdata.msg);
@@ -4110,7 +4119,7 @@ handle_odp_miss_msg(struct ofproto *p, struct ofpbuf *packet)
     rule = lookup_valid_rule(p, &flow);
     if (!rule) {
         /* Don't send a packet-in if OFPPC_NO_PACKET_IN asserted. */
-        struct ofport *port = port_array_get(&p->ports, msg->port);
+        struct ofport *port = get_port(p, msg->port);
         if (port) {
             if (port->opp.config & OFPPC_NO_PACKET_IN) {
                 COVERAGE_INC(ofproto_no_packet_in);
@@ -4567,7 +4576,7 @@ pick_datapath_id(const struct ofproto *ofproto)
 {
     const struct ofport *port;
 
-    port = port_array_get(&ofproto->ports, ODPP_LOCAL);
+    port = get_port(ofproto, ODPP_LOCAL);
     if (port) {
         uint8_t ea[ETH_ADDR_LEN];
         int error;
-- 
cgit v1.2.1


From adf7cfd851c3d9d6f1ad74653cbd2fecce8c3ba9 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Mon, 19 Jul 2010 13:46:52 -0700
Subject: util: New macro OBJECT_CONTAINING.

This macro is a variant on CONTAINER_OF that takes an object pointer
instead of a type name as its second argument.  In the following commit
this will simplify many users of CONTAINER_OF.
---
 lib/util.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/lib/util.h b/lib/util.h
index 9eca8ec9a..5147ffc55 100644
--- a/lib/util.h
+++ b/lib/util.h
@@ -82,6 +82,16 @@ extern const char *program_name;
 #define CONTAINER_OF(POINTER, STRUCT, MEMBER)                           \
         ((STRUCT *) (void *) ((char *) (POINTER) - offsetof (STRUCT, MEMBER)))
 
+/* Given POINTER, the address of the given MEMBER within an object of the type
+ * that that OBJECT points to, returns OBJECT as a "void *" pointer.  OBJECT
+ * must be an lvalue.
+ *
+ * This is the same as CONTAINER_OF except that it infers the structure type
+ * from the type of '*OBJECT'. */
+#define OBJECT_CONTAINING(POINTER, OBJECT, MEMBER)                      \
+        ((void *) ((char *) (POINTER)                                   \
+                   - ((char *) &(OBJECT)->MEMBER - (char *) (OBJECT))))
+
 #ifdef  __cplusplus
 extern "C" {
 #endif
-- 
cgit v1.2.1


From 4e8e4213a815a30216e855a805a8bcd5b8c5a886 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Fri, 17 Sep 2010 10:33:10 -0700
Subject: Switch many macros from using CONTAINER_OF to using
 OBJECT_CONTAINING.

These macros require one fewer argument by switching, which makes code
that uses them shorter and more readable.
---
 lib/classifier.c           | 43 ++++++++++++-----------------
 lib/dpif-netdev.c          | 20 +++++++-------
 lib/hmap.h                 | 49 +++++++++++++++++----------------
 lib/list.h                 | 24 ++++++++---------
 lib/lockfile.c             |  4 +--
 lib/mac-learning.c         |  2 +-
 lib/netdev-linux.c         | 11 +++-----
 lib/netdev-vport.c         |  3 +--
 lib/netdev.c               |  2 +-
 lib/ovsdb-idl.c            | 38 ++++++++++----------------
 lib/poll-loop.c            |  4 +--
 lib/process.c              |  2 +-
 lib/rtnetlink.c            |  6 ++---
 lib/shash.c                |  2 +-
 lib/shash.h                |  9 +++----
 lib/unixctl.c              |  8 +++---
 ofproto/ofproto-sflow.c    |  7 +++--
 ofproto/ofproto.c          | 67 +++++++++++++++++++++-------------------------
 ofproto/status.c           |  5 ++--
 ovsdb/file.c               |  2 +-
 ovsdb/jsonrpc-server.c     | 26 +++++++-----------
 ovsdb/ovsdb-server.c       |  4 +--
 ovsdb/query.c              |  8 +++---
 ovsdb/row.c                | 14 ++++------
 ovsdb/table.c              |  6 ++---
 ovsdb/transaction.c        | 20 ++++++--------
 ovsdb/trigger.c            |  4 +--
 tests/test-classifier.c    |  2 +-
 tests/test-hmap.c          |  9 +++----
 tests/test-list.c          | 10 +++----
 tests/test-ovsdb.c         |  3 +--
 vswitchd/bridge.c          | 39 +++++++++++++--------------
 vswitchd/proc-net-compat.c |  5 ++--
 33 files changed, 197 insertions(+), 261 deletions(-)

diff --git a/lib/classifier.c b/lib/classifier.c
index 378faf8e0..e0c57ebe4 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -177,8 +177,7 @@ classifier_destroy(struct classifier *cls)
         struct hmap *tbl;
 
         for (tbl = &cls->tables[0]; tbl < &cls->tables[CLS_N_FIELDS]; tbl++) {
-            HMAP_FOR_EACH_SAFE (bucket, next_bucket,
-                                struct cls_bucket, hmap_node, tbl) {
+            HMAP_FOR_EACH_SAFE (bucket, next_bucket, hmap_node, tbl) {
                 free(bucket);
             }
             hmap_destroy(tbl);
@@ -335,11 +334,11 @@ classifier_find_rule_exactly(const struct classifier *cls,
     assert(wildcards == (wildcards & OVSFW_ALL));
     table_idx = table_idx_from_wildcards(wildcards);
     hash = hash_fields(target, table_idx);
-    HMAP_FOR_EACH_WITH_HASH (bucket, struct cls_bucket, hmap_node, hash,
+    HMAP_FOR_EACH_WITH_HASH (bucket, hmap_node, hash,
                              &cls->tables[table_idx]) {
         if (equal_fields(&bucket->fixed, target, table_idx)) {
             struct cls_rule *pos;
-            LIST_FOR_EACH (pos, struct cls_rule, node.list, &bucket->rules) {
+            LIST_FOR_EACH (pos, node.list, &bucket->rules) {
                 if (pos->priority < priority) {
                     return NULL;
                 } else if (pos->priority == priority &&
@@ -374,13 +373,12 @@ classifier_rule_overlaps(const struct classifier *cls,
     for (tbl = &cls->tables[0]; tbl < &cls->tables[CLS_N_FIELDS]; tbl++) {
         struct cls_bucket *bucket;
 
-        HMAP_FOR_EACH (bucket, struct cls_bucket, hmap_node, tbl) {
+        HMAP_FOR_EACH (bucket, hmap_node, tbl) {
             struct cls_rule *rule;
 
-            LIST_FOR_EACH (rule, struct cls_rule, node.list,
-                           &bucket->rules) {
+            LIST_FOR_EACH (rule, node.list, &bucket->rules) {
                 if (rule->priority == priority
-                        && rules_match_2wild(rule, &target_rule, 0)) {
+                    && rules_match_2wild(rule, &target_rule, 0)) {
                     return true;
                 }
             }
@@ -409,8 +407,7 @@ classifier_for_each_match(const struct classifier *cls,
              table++) {
             struct cls_bucket *bucket, *next_bucket;
 
-            HMAP_FOR_EACH_SAFE (bucket, next_bucket,
-                                struct cls_bucket, hmap_node, table) {
+            HMAP_FOR_EACH_SAFE (bucket, next_bucket, hmap_node, table) {
                 /* XXX there is a bit of room for optimization here based on
                  * rejecting entire buckets on their fixed fields, but it will
                  * only be worthwhile for big buckets (which we hope we won't
@@ -422,8 +419,7 @@ classifier_for_each_match(const struct classifier *cls,
                  * bucket itself will be destroyed.  The bucket contains the
                  * list head so that's a use-after-free error. */
                 prev_rule = NULL;
-                LIST_FOR_EACH (rule, struct cls_rule, node.list,
-                               &bucket->rules) {
+                LIST_FOR_EACH (rule, node.list, &bucket->rules) {
                     if (rules_match_1wild(rule, target, 0)) {
                         if (prev_rule) {
                             callback(prev_rule, aux);
@@ -442,7 +438,7 @@ classifier_for_each_match(const struct classifier *cls,
         if (target->wc.wildcards) {
             struct cls_rule *rule, *next_rule;
 
-            HMAP_FOR_EACH_SAFE (rule, next_rule, struct cls_rule, node.hmap,
+            HMAP_FOR_EACH_SAFE (rule, next_rule, node.hmap,
                                 &cls->exact_table) {
                 if (rules_match_1wild(rule, target, 0)) {
                     callback(rule, aux);
@@ -477,8 +473,7 @@ classifier_for_each(const struct classifier *cls, int include,
         for (tbl = &cls->tables[0]; tbl < &cls->tables[CLS_N_FIELDS]; tbl++) {
             struct cls_bucket *bucket, *next_bucket;
 
-            HMAP_FOR_EACH_SAFE (bucket, next_bucket,
-                                struct cls_bucket, hmap_node, tbl) {
+            HMAP_FOR_EACH_SAFE (bucket, next_bucket, hmap_node, tbl) {
                 struct cls_rule *prev_rule, *rule;
 
                 /* We can't just use LIST_FOR_EACH_SAFE here because, if the
@@ -486,8 +481,7 @@ classifier_for_each(const struct classifier *cls, int include,
                  * bucket itself will be destroyed.  The bucket contains the
                  * list head so that's a use-after-free error. */
                 prev_rule = NULL;
-                LIST_FOR_EACH (rule, struct cls_rule, node.list,
-                               &bucket->rules) {
+                LIST_FOR_EACH (rule, node.list, &bucket->rules) {
                     if (prev_rule) {
                         callback(prev_rule, aux);
                     }
@@ -503,8 +497,7 @@ classifier_for_each(const struct classifier *cls, int include,
     if (include & CLS_INC_EXACT) {
         struct cls_rule *rule, *next_rule;
 
-        HMAP_FOR_EACH_SAFE (rule, next_rule,
-                            struct cls_rule, node.hmap, &cls->exact_table) {
+        HMAP_FOR_EACH_SAFE (rule, next_rule, node.hmap, &cls->exact_table) {
             callback(rule, aux);
         }
     }
@@ -641,7 +634,7 @@ static struct cls_rule *
 bucket_insert(struct cls_bucket *bucket, struct cls_rule *rule)
 {
     struct cls_rule *pos;
-    LIST_FOR_EACH (pos, struct cls_rule, node.list, &bucket->rules) {
+    LIST_FOR_EACH (pos, node.list, &bucket->rules) {
         if (pos->priority == rule->priority) {
             if (pos->wc.wildcards == rule->wc.wildcards
                 && rules_match_1wild(pos, rule, rule->table_idx))
@@ -679,8 +672,7 @@ static struct cls_bucket *
 find_bucket(struct hmap *table, size_t hash, const struct cls_rule *rule)
 {
     struct cls_bucket *bucket;
-    HMAP_FOR_EACH_WITH_HASH (bucket, struct cls_bucket, hmap_node, hash,
-                             table) {
+    HMAP_FOR_EACH_WITH_HASH (bucket, hmap_node, hash, table) {
         if (equal_fields(&bucket->fixed, &rule->flow, rule->table_idx)) {
             return bucket;
         }
@@ -850,7 +842,7 @@ search_bucket(struct cls_bucket *bucket, int field_idx,
         return NULL;
     }
 
-    LIST_FOR_EACH (pos, struct cls_rule, node.list, &bucket->rules) {
+    LIST_FOR_EACH (pos, node.list, &bucket->rules) {
         if (rules_match_1wild(target, pos, field_idx)) {
             return pos;
         }
@@ -878,7 +870,7 @@ search_table(const struct hmap *table, int field_idx,
         return search_bucket(bucket, field_idx, target);
     }
 
-    HMAP_FOR_EACH_WITH_HASH (bucket, struct cls_bucket, hmap_node,
+    HMAP_FOR_EACH_WITH_HASH (bucket, hmap_node,
                              hash_fields(&target->flow, field_idx), table) {
         struct cls_rule *rule = search_bucket(bucket, field_idx, target);
         if (rule) {
@@ -894,8 +886,7 @@ search_exact_table(const struct classifier *cls, size_t hash,
 {
     struct cls_rule *rule;
 
-    HMAP_FOR_EACH_WITH_HASH (rule, struct cls_rule, node.hmap,
-                             hash, &cls->exact_table) {
+    HMAP_FOR_EACH_WITH_HASH (rule, node.hmap, hash, &cls->exact_table) {
         if (flow_equal(&rule->flow, target)) {
             return rule;
         }
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 3975b5a8b..60094073e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -457,7 +457,7 @@ get_port_by_name(struct dp_netdev *dp,
 {
     struct dp_netdev_port *port;
 
-    LIST_FOR_EACH (port, struct dp_netdev_port, node, &dp->port_list) {
+    LIST_FOR_EACH (port, node, &dp->port_list) {
         if (!strcmp(netdev_get_name(port->netdev), devname)) {
             *portp = port;
             return 0;
@@ -545,8 +545,7 @@ dp_netdev_flow_flush(struct dp_netdev *dp)
 {
     struct dp_netdev_flow *flow, *next;
 
-    HMAP_FOR_EACH_SAFE (flow, next, struct dp_netdev_flow, node,
-                        &dp->flow_table) {
+    HMAP_FOR_EACH_SAFE (flow, next, node, &dp->flow_table) {
         dp_netdev_free_flow(dp, flow);
     }
 }
@@ -567,7 +566,7 @@ dpif_netdev_port_list(const struct dpif *dpif, struct odp_port *ports, int n)
     int i;
 
     i = 0;
-    LIST_FOR_EACH (port, struct dp_netdev_port, node, &dp->port_list) {
+    LIST_FOR_EACH (port, node, &dp->port_list) {
         struct odp_port *odp_port = &ports[i];
         if (i >= n) {
             break;
@@ -661,8 +660,7 @@ dp_netdev_lookup_flow(const struct dp_netdev *dp, const flow_t *key)
     struct dp_netdev_flow *flow;
 
     assert(!key->reserved[0] && !key->reserved[1] && !key->reserved[2]);
-    HMAP_FOR_EACH_WITH_HASH (flow, struct dp_netdev_flow, node,
-                             flow_hash(key, 0), &dp->flow_table) {
+    HMAP_FOR_EACH_WITH_HASH (flow, node, flow_hash(key, 0), &dp->flow_table) {
         if (flow_equal(&flow->key, key)) {
             return flow;
         }
@@ -886,7 +884,7 @@ dpif_netdev_flow_list(const struct dpif *dpif, struct odp_flow flows[], int n)
     int i;
 
     i = 0;
-    HMAP_FOR_EACH (flow, struct dp_netdev_flow, node, &dp->flow_table) {
+    HMAP_FOR_EACH (flow, node, &dp->flow_table) {
         if (i >= n) {
             break;
         }
@@ -1044,10 +1042,10 @@ dp_netdev_run(void)
     struct dp_netdev *dp;
 
     ofpbuf_init(&packet, DP_NETDEV_HEADROOM + max_mtu);
-    LIST_FOR_EACH (dp, struct dp_netdev, node, &dp_netdev_list) {
+    LIST_FOR_EACH (dp, node, &dp_netdev_list) {
         struct dp_netdev_port *port;
 
-        LIST_FOR_EACH (port, struct dp_netdev_port, node, &dp->port_list) {
+        LIST_FOR_EACH (port, node, &dp->port_list) {
             int error;
 
             /* Reset packet contents. */
@@ -1072,9 +1070,9 @@ dp_netdev_wait(void)
 {
     struct dp_netdev *dp;
 
-    LIST_FOR_EACH (dp, struct dp_netdev, node, &dp_netdev_list) {
+    LIST_FOR_EACH (dp, node, &dp_netdev_list) {
         struct dp_netdev_port *port;
-        LIST_FOR_EACH (port, struct dp_netdev_port, node, &dp->port_list) {
+        LIST_FOR_EACH (port, node, &dp->port_list) {
             netdev_recv_wait(port->netdev);
         }
     }
diff --git a/lib/hmap.h b/lib/hmap.h
index 92aff7f99..04e51bc6d 100644
--- a/lib/hmap.h
+++ b/lib/hmap.h
@@ -96,9 +96,8 @@ struct hmap_node *hmap_random_node(const struct hmap *);
  *
  * HMAP_FOR_EACH_WITH_HASH iterates NODE over all of the nodes in HMAP that
  * have hash value equal to HASH.  HMAP_FOR_EACH_IN_BUCKET iterates NODE over
- * all of the nodes in HMAP that would fall in the same bucket as HASH.  STRUCT
- * and MEMBER must be the name of the struct that contains the 'struct
- * hmap_node' and the name of the 'struct hmap_node' member, respectively.
+ * all of the nodes in HMAP that would fall in the same bucket as HASH.  MEMBER
+ * must be the name of the 'struct hmap_node' member within NODE.
  *
  * These macros may be used interchangeably to search for a particular value in
  * an hmap, see, e.g. shash_find() for an example.  Usually, using
@@ -113,18 +112,18 @@ struct hmap_node *hmap_random_node(const struct hmap *);
  *
  * HASH is only evaluated once.
  */
-#define HMAP_FOR_EACH_WITH_HASH(NODE, STRUCT, MEMBER, HASH, HMAP)       \
-    for ((NODE) = CONTAINER_OF(hmap_first_with_hash(HMAP, HASH),        \
-                               STRUCT, MEMBER);                         \
+#define HMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HMAP)               \
+    for ((NODE) = OBJECT_CONTAINING(hmap_first_with_hash(HMAP, HASH),   \
+                                  NODE, MEMBER);                        \
          &(NODE)->MEMBER != NULL;                                       \
-         (NODE) = CONTAINER_OF(hmap_next_with_hash(&(NODE)->MEMBER),    \
-                               STRUCT, MEMBER))
-#define HMAP_FOR_EACH_IN_BUCKET(NODE, STRUCT, MEMBER, HASH, HMAP)       \
-    for ((NODE) = CONTAINER_OF(hmap_first_in_bucket(HMAP, HASH),        \
-                               STRUCT, MEMBER);                         \
+         (NODE) = OBJECT_CONTAINING(hmap_next_with_hash(&(NODE)->MEMBER), \
+                                  NODE, MEMBER))
+#define HMAP_FOR_EACH_IN_BUCKET(NODE, MEMBER, HASH, HMAP)               \
+    for ((NODE) = OBJECT_CONTAINING(hmap_first_in_bucket(HMAP, HASH),   \
+                                  NODE, MEMBER);                        \
          &(NODE)->MEMBER != NULL;                                       \
-         (NODE) = CONTAINER_OF(hmap_next_in_bucket(&(NODE)->MEMBER),    \
-                               STRUCT, MEMBER))
+         (NODE) = OBJECT_CONTAINING(hmap_next_in_bucket(&(NODE)->MEMBER), \
+                               NODE, MEMBER))
 
 static inline struct hmap_node *hmap_first_with_hash(const struct hmap *,
                                                      size_t hash);
@@ -138,18 +137,18 @@ static inline struct hmap_node *hmap_next_in_bucket(const struct hmap_node *);
  * The _SAFE version is needed when NODE may be freed.  It is not needed when
  * NODE may be removed from the hash map but its members remain accessible and
  * intact. */
-#define HMAP_FOR_EACH(NODE, STRUCT, MEMBER, HMAP)                   \
-    for ((NODE) = CONTAINER_OF(hmap_first(HMAP), STRUCT, MEMBER);   \
-         &(NODE)->MEMBER != NULL;                                   \
-         (NODE) = CONTAINER_OF(hmap_next(HMAP, &(NODE)->MEMBER),    \
-                               STRUCT, MEMBER))
-
-#define HMAP_FOR_EACH_SAFE(NODE, NEXT, STRUCT, MEMBER, HMAP)        \
-    for ((NODE) = CONTAINER_OF(hmap_first(HMAP), STRUCT, MEMBER);   \
-         (&(NODE)->MEMBER != NULL                                   \
-          ? (NEXT) = CONTAINER_OF(hmap_next(HMAP, &(NODE)->MEMBER), \
-                                  STRUCT, MEMBER), 1                \
-          : 0);                                                     \
+#define HMAP_FOR_EACH(NODE, MEMBER, HMAP)                               \
+    for ((NODE) = OBJECT_CONTAINING(hmap_first(HMAP), NODE, MEMBER);    \
+         &(NODE)->MEMBER != NULL;                                       \
+         (NODE) = OBJECT_CONTAINING(hmap_next(HMAP, &(NODE)->MEMBER),   \
+                                    NODE, MEMBER))
+
+#define HMAP_FOR_EACH_SAFE(NODE, NEXT, MEMBER, HMAP)                    \
+    for ((NODE) = OBJECT_CONTAINING(hmap_first(HMAP), NODE, MEMBER);    \
+         (&(NODE)->MEMBER != NULL                                       \
+          ? (NEXT) = OBJECT_CONTAINING(hmap_next(HMAP, &(NODE)->MEMBER), \
+                                       NODE, MEMBER), 1                 \
+          : 0);                                                         \
          (NODE) = (NEXT))
 
 static inline struct hmap_node *hmap_first(const struct hmap *);
diff --git a/lib/list.h b/lib/list.h
index 845aab20b..0481477ae 100644
--- a/lib/list.h
+++ b/lib/list.h
@@ -53,18 +53,18 @@ struct list *list_back(struct list *);
 size_t list_size(const struct list *);
 bool list_is_empty(const struct list *);
 
-#define LIST_FOR_EACH(ITER, STRUCT, MEMBER, LIST)                   \
-    for (ITER = CONTAINER_OF((LIST)->next, STRUCT, MEMBER);         \
-         &(ITER)->MEMBER != (LIST);                                 \
-         ITER = CONTAINER_OF((ITER)->MEMBER.next, STRUCT, MEMBER))
-#define LIST_FOR_EACH_REVERSE(ITER, STRUCT, MEMBER, LIST)           \
-    for (ITER = CONTAINER_OF((LIST)->prev, STRUCT, MEMBER);         \
-         &(ITER)->MEMBER != (LIST);                                 \
-         ITER = CONTAINER_OF((ITER)->MEMBER.prev, STRUCT, MEMBER))
-#define LIST_FOR_EACH_SAFE(ITER, NEXT, STRUCT, MEMBER, LIST)        \
-    for (ITER = CONTAINER_OF((LIST)->next, STRUCT, MEMBER);         \
-         (NEXT = CONTAINER_OF((ITER)->MEMBER.next, STRUCT, MEMBER), \
-          &(ITER)->MEMBER != (LIST));                               \
+#define LIST_FOR_EACH(ITER, MEMBER, LIST)                               \
+    for (ITER = OBJECT_CONTAINING((LIST)->next, ITER, MEMBER);          \
+         &(ITER)->MEMBER != (LIST);                                     \
+         ITER = OBJECT_CONTAINING((ITER)->MEMBER.next, ITER, MEMBER))
+#define LIST_FOR_EACH_REVERSE(ITER, MEMBER, LIST)                       \
+    for (ITER = OBJECT_CONTAINING((LIST)->prev, ITER, MEMBER);          \
+         &(ITER)->MEMBER != (LIST);                                     \
+         ITER = OBJECT_CONTAINING((ITER)->MEMBER.prev, ITER, MEMBER))
+#define LIST_FOR_EACH_SAFE(ITER, NEXT, MEMBER, LIST)                    \
+    for (ITER = OBJECT_CONTAINING((LIST)->next, ITER, MEMBER);          \
+         (NEXT = OBJECT_CONTAINING((ITER)->MEMBER.next, ITER, MEMBER),  \
+          &(ITER)->MEMBER != (LIST));                                   \
          ITER = NEXT)
 
 #endif /* list.h */
diff --git a/lib/lockfile.c b/lib/lockfile.c
index 84bfb3c04..690caf9f2 100644
--- a/lib/lockfile.c
+++ b/lib/lockfile.c
@@ -151,7 +151,7 @@ lockfile_postfork(void)
 {
     struct lockfile *lockfile;
 
-    HMAP_FOR_EACH (lockfile, struct lockfile, hmap_node, &lock_table) {
+    HMAP_FOR_EACH (lockfile, hmap_node, &lock_table) {
         if (lockfile->fd >= 0) {
             VLOG_WARN("%s: child does not inherit lock", lockfile->name);
             lockfile_unhash(lockfile);
@@ -171,7 +171,7 @@ lockfile_find(dev_t device, ino_t inode)
 {
     struct lockfile *lockfile;
 
-    HMAP_FOR_EACH_WITH_HASH (lockfile, struct lockfile, hmap_node,
+    HMAP_FOR_EACH_WITH_HASH (lockfile, hmap_node,
                              lockfile_hash(device, inode), &lock_table) {
         if (lockfile->device == device && lockfile->inode == inode) {
             return lockfile;
diff --git a/lib/mac-learning.c b/lib/mac-learning.c
index c8582f312..362347010 100644
--- a/lib/mac-learning.c
+++ b/lib/mac-learning.c
@@ -79,7 +79,7 @@ search_bucket(struct list *bucket, const uint8_t mac[ETH_ADDR_LEN],
               uint16_t vlan)
 {
     struct mac_entry *e;
-    LIST_FOR_EACH (e, struct mac_entry, hash_node, bucket) {
+    LIST_FOR_EACH (e, hash_node, bucket) {
         if (eth_addr_equals(e->mac, mac) && e->vlan == vlan) {
             return e;
         }
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index eecff5034..e1a3c8c46 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1483,8 +1483,7 @@ tc_find_queue__(const struct netdev *netdev, unsigned int queue_id,
                                 netdev_dev_linux_cast(netdev_get_dev(netdev));
     struct tc_queue *queue;
 
-    HMAP_FOR_EACH_IN_BUCKET (queue, struct tc_queue, hmap_node,
-                             hash, &netdev_dev->tc->queues) {
+    HMAP_FOR_EACH_IN_BUCKET (queue, hmap_node, hash, &netdev_dev->tc->queues) {
         if (queue->queue_id == queue_id) {
             return queue;
         }
@@ -1679,8 +1678,7 @@ netdev_linux_dump_queues(const struct netdev *netdev,
 
     last_error = 0;
     shash_init(&details);
-    HMAP_FOR_EACH (queue, struct tc_queue, hmap_node,
-                   &netdev_dev->tc->queues) {
+    HMAP_FOR_EACH (queue, hmap_node, &netdev_dev->tc->queues) {
         shash_clear(&details);
 
         error = netdev_dev->tc->ops->class_get(netdev, queue, &details);
@@ -2012,7 +2010,7 @@ static void
 poll_notify(struct list *list)
 {
     struct netdev_linux_notifier *notifier;
-    LIST_FOR_EACH (notifier, struct netdev_linux_notifier, node, list) {
+    LIST_FOR_EACH (notifier, node, list) {
         struct netdev_notifier *n = &notifier->notifier;
         n->cb(n);
     }
@@ -2551,8 +2549,7 @@ htb_tc_destroy(struct tc *tc)
     struct htb *htb = CONTAINER_OF(tc, struct htb, tc);
     struct htb_class *hc, *next;
 
-    HMAP_FOR_EACH_SAFE (hc, next, struct htb_class, tc_queue.hmap_node,
-                        &htb->tc.queues) {
+    HMAP_FOR_EACH_SAFE (hc, next, tc_queue.hmap_node, &htb->tc.queues) {
         hmap_remove(&htb->tc.queues, &hc->tc_queue.hmap_node);
         free(hc);
     }
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 0153ac78d..880c9cd6c 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -269,8 +269,7 @@ netdev_vport_poll_notify(const struct netdev *netdev)
     if (list) {
         struct netdev_vport_notifier *notifier;
 
-        LIST_FOR_EACH (notifier, struct netdev_vport_notifier,
-                       list_node, list) {
+        LIST_FOR_EACH (notifier, list_node, list) {
             struct netdev_notifier *n = &notifier->notifier;
             n->cb(n);
         }
diff --git a/lib/netdev.c b/lib/netdev.c
index d93cabe3b..d516ff21d 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -1591,7 +1591,7 @@ static void
 close_all_netdevs(void *aux OVS_UNUSED)
 {
     struct netdev *netdev, *next;
-    LIST_FOR_EACH_SAFE(netdev, next, struct netdev, node, &netdev_list) {
+    LIST_FOR_EACH_SAFE(netdev, next, node, &netdev_list) {
         netdev_close(netdev);
     }
 }
diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
index 43ff94714..b7ee0976a 100644
--- a/lib/ovsdb-idl.c
+++ b/lib/ovsdb-idl.c
@@ -215,15 +215,13 @@ ovsdb_idl_clear(struct ovsdb_idl *idl)
         }
 
         changed = true;
-        HMAP_FOR_EACH_SAFE (row, next_row, struct ovsdb_idl_row, hmap_node,
-                            &table->rows) {
+        HMAP_FOR_EACH_SAFE (row, next_row, hmap_node, &table->rows) {
             struct ovsdb_idl_arc *arc, *next_arc;
 
             if (!ovsdb_idl_row_is_orphan(row)) {
                 ovsdb_idl_row_unparse(row);
             }
-            LIST_FOR_EACH_SAFE (arc, next_arc, struct ovsdb_idl_arc, src_node,
-                                &row->src_arcs) {
+            LIST_FOR_EACH_SAFE (arc, next_arc, src_node, &row->src_arcs) {
                 free(arc);
             }
             /* No need to do anything with dst_arcs: some node has those arcs
@@ -553,8 +551,7 @@ ovsdb_idl_get_row(struct ovsdb_idl_table *table, const struct uuid *uuid)
 {
     struct ovsdb_idl_row *row;
 
-    HMAP_FOR_EACH_WITH_HASH (row, struct ovsdb_idl_row, hmap_node,
-                             uuid_hash(uuid), &table->rows) {
+    HMAP_FOR_EACH_WITH_HASH (row, hmap_node, uuid_hash(uuid), &table->rows) {
         if (uuid_equals(&row->uuid, uuid)) {
             return row;
         }
@@ -771,8 +768,7 @@ ovsdb_idl_row_clear_arcs(struct ovsdb_idl_row *row, bool destroy_dsts)
 
     /* Delete all forward arcs.  If 'destroy_dsts', destroy any orphaned rows
      * that this causes to be unreferenced. */
-    LIST_FOR_EACH_SAFE (arc, next, struct ovsdb_idl_arc, src_node,
-                        &row->src_arcs) {
+    LIST_FOR_EACH_SAFE (arc, next, src_node, &row->src_arcs) {
         list_remove(&arc->dst_node);
         if (destroy_dsts
             && ovsdb_idl_row_is_orphan(arc->dst)
@@ -800,8 +796,7 @@ ovsdb_idl_row_reparse_backrefs(struct ovsdb_idl_row *row)
      * (If duplicate arcs were possible then we would need to make sure that
      * 'next' didn't also point into 'arc''s destination, but we forbid
      * duplicate arcs.) */
-    LIST_FOR_EACH_SAFE (arc, next, struct ovsdb_idl_arc, dst_node,
-                        &row->dst_arcs) {
+    LIST_FOR_EACH_SAFE (arc, next, dst_node, &row->dst_arcs) {
         struct ovsdb_idl_row *ref = arc->src;
 
         ovsdb_idl_row_unparse(ref);
@@ -1145,8 +1140,7 @@ ovsdb_idl_txn_destroy(struct ovsdb_idl_txn *txn)
     free(txn->inc_table);
     free(txn->inc_column);
     json_destroy(txn->inc_where);
-    HMAP_FOR_EACH_SAFE (insert, next, struct ovsdb_idl_txn_insert, hmap_node,
-                        &txn->inserted_rows) {
+    HMAP_FOR_EACH_SAFE (insert, next, hmap_node, &txn->inserted_rows) {
         free(insert);
     }
     hmap_destroy(&txn->inserted_rows);
@@ -1196,8 +1190,7 @@ ovsdb_idl_txn_get_row(const struct ovsdb_idl_txn *txn, const struct uuid *uuid)
 {
     const struct ovsdb_idl_row *row;
 
-    HMAP_FOR_EACH_WITH_HASH (row, struct ovsdb_idl_row, txn_node,
-                             uuid_hash(uuid), &txn->txn_rows) {
+    HMAP_FOR_EACH_WITH_HASH (row, txn_node, uuid_hash(uuid), &txn->txn_rows) {
         if (uuid_equals(&row->uuid, uuid)) {
             return row;
         }
@@ -1255,8 +1248,7 @@ ovsdb_idl_txn_disassemble(struct ovsdb_idl_txn *txn)
      * transaction and fail to update the graph.  */
     txn->idl->txn = NULL;
 
-    HMAP_FOR_EACH_SAFE (row, next, struct ovsdb_idl_row, txn_node,
-                        &txn->txn_rows) {
+    HMAP_FOR_EACH_SAFE (row, next, txn_node, &txn->txn_rows) {
         if (row->old) {
             if (row->written) {
                 ovsdb_idl_row_unparse(row);
@@ -1300,7 +1292,7 @@ ovsdb_idl_txn_commit(struct ovsdb_idl_txn *txn)
         json_string_create(txn->idl->class->database));
 
     /* Add prerequisites and declarations of new rows. */
-    HMAP_FOR_EACH (row, struct ovsdb_idl_row, txn_node, &txn->txn_rows) {
+    HMAP_FOR_EACH (row, txn_node, &txn->txn_rows) {
         /* XXX check that deleted rows exist even if no prereqs? */
         if (row->prereqs) {
             const struct ovsdb_idl_table_class *class = row->table->class;
@@ -1332,7 +1324,7 @@ ovsdb_idl_txn_commit(struct ovsdb_idl_txn *txn)
 
     /* Add updates. */
     any_updates = false;
-    HMAP_FOR_EACH (row, struct ovsdb_idl_row, txn_node, &txn->txn_rows) {
+    HMAP_FOR_EACH (row, txn_node, &txn->txn_rows) {
         const struct ovsdb_idl_table_class *class = row->table->class;
 
         if (row->old == row->new) {
@@ -1530,7 +1522,7 @@ ovsdb_idl_txn_get_insert_uuid(const struct ovsdb_idl_txn *txn,
     const struct ovsdb_idl_txn_insert *insert;
 
     assert(txn->status == TXN_SUCCESS || txn->status == TXN_UNCHANGED);
-    HMAP_FOR_EACH_IN_BUCKET (insert, struct ovsdb_idl_txn_insert, hmap_node,
+    HMAP_FOR_EACH_IN_BUCKET (insert, hmap_node,
                              uuid_hash(uuid), &txn->inserted_rows) {
         if (uuid_equals(uuid, &insert->dummy)) {
             return &insert->real;
@@ -1653,8 +1645,7 @@ ovsdb_idl_txn_abort_all(struct ovsdb_idl *idl)
 {
     struct ovsdb_idl_txn *txn;
 
-    HMAP_FOR_EACH (txn, struct ovsdb_idl_txn, hmap_node,
-                   &idl->outstanding_txns) {
+    HMAP_FOR_EACH (txn, hmap_node, &idl->outstanding_txns) {
         ovsdb_idl_txn_complete(txn, TXN_TRY_AGAIN);
     }
 }
@@ -1664,7 +1655,7 @@ ovsdb_idl_txn_find(struct ovsdb_idl *idl, const struct json *id)
 {
     struct ovsdb_idl_txn *txn;
 
-    HMAP_FOR_EACH_WITH_HASH (txn, struct ovsdb_idl_txn, hmap_node,
+    HMAP_FOR_EACH_WITH_HASH (txn, hmap_node,
                              json_hash(id, 0), &idl->outstanding_txns) {
         if (json_equal(id, txn->request_id)) {
             return txn;
@@ -1844,8 +1835,7 @@ ovsdb_idl_txn_process_reply(struct ovsdb_idl *idl,
                 hard_errors++;
             }
 
-            HMAP_FOR_EACH (insert, struct ovsdb_idl_txn_insert, hmap_node,
-                           &txn->inserted_rows) {
+            HMAP_FOR_EACH (insert, hmap_node, &txn->inserted_rows) {
                 if (!ovsdb_idl_txn_process_insert_reply(insert, ops)) {
                     hard_errors++;
                 }
diff --git a/lib/poll-loop.c b/lib/poll-loop.c
index 70360b9b5..6aefc7689 100644
--- a/lib/poll-loop.c
+++ b/lib/poll-loop.c
@@ -171,7 +171,7 @@ poll_block(void)
     }
 
     n_pollfds = 0;
-    LIST_FOR_EACH (pw, struct poll_waiter, node, &waiters) {
+    LIST_FOR_EACH (pw, node, &waiters) {
         pw->pollfd = &pollfds[n_pollfds];
         pollfds[n_pollfds].fd = pw->fd;
         pollfds[n_pollfds].events = pw->events;
@@ -190,7 +190,7 @@ poll_block(void)
         log_wakeup(&timeout_backtrace, "%d-ms timeout", timeout);
     }
 
-    LIST_FOR_EACH_SAFE (pw, next, struct poll_waiter, node, &waiters) {
+    LIST_FOR_EACH_SAFE (pw, next, node, &waiters) {
         if (pw->pollfd->revents && VLOG_IS_DBG_ENABLED()) {
             log_wakeup(pw->backtrace, "%s%s%s%s%s on fd %d",
                        pw->pollfd->revents & POLLIN ? "[POLLIN]" : "",
diff --git a/lib/process.c b/lib/process.c
index 377c396b9..087275b8e 100644
--- a/lib/process.c
+++ b/lib/process.c
@@ -590,7 +590,7 @@ sigchld_handler(int signr OVS_UNUSED)
     struct process *p;
 
     COVERAGE_INC(process_sigchld);
-    LIST_FOR_EACH (p, struct process, node, &all_processes) {
+    LIST_FOR_EACH (p, node, &all_processes) {
         if (!p->exited) {
             int retval, status;
             do {
diff --git a/lib/rtnetlink.c b/lib/rtnetlink.c
index f5a6df8a1..2e1c17330 100644
--- a/lib/rtnetlink.c
+++ b/lib/rtnetlink.c
@@ -166,8 +166,7 @@ rtnetlink_report_change(const struct nlmsghdr *nlmsg,
     change.master_ifindex = (attrs[IFLA_MASTER]
                              ? nl_attr_get_u32(attrs[IFLA_MASTER]) : 0);
 
-    LIST_FOR_EACH (notifier, struct rtnetlink_notifier, node,
-                   &all_notifiers) {
+    LIST_FOR_EACH (notifier, node, &all_notifiers) {
         notifier->cb(&change, notifier->aux);
     }
 }
@@ -177,8 +176,7 @@ rtnetlink_report_notify_error(void)
 {
     struct rtnetlink_notifier *notifier;
 
-    LIST_FOR_EACH (notifier, struct rtnetlink_notifier, node,
-                   &all_notifiers) {
+    LIST_FOR_EACH (notifier, node, &all_notifiers) {
         notifier->cb(NULL, notifier->aux);
     }
 }
diff --git a/lib/shash.c b/lib/shash.c
index cc45efb5c..82791e31f 100644
--- a/lib/shash.c
+++ b/lib/shash.c
@@ -193,7 +193,7 @@ shash_find__(const struct shash *sh, const char *name, size_t hash)
 {
     struct shash_node *node;
 
-    HMAP_FOR_EACH_WITH_HASH (node, struct shash_node, node, hash, &sh->map) {
+    HMAP_FOR_EACH_WITH_HASH (node, node, hash, &sh->map) {
         if (!strcmp(node->name, name)) {
             return node;
         }
diff --git a/lib/shash.h b/lib/shash.h
index 8a736e80b..dfb10e2cc 100644
--- a/lib/shash.h
+++ b/lib/shash.h
@@ -35,12 +35,11 @@ struct shash {
 
 #define SHASH_INITIALIZER(SHASH) { HMAP_INITIALIZER(&(SHASH)->map) }
 
-#define SHASH_FOR_EACH(SHASH_NODE, SHASH)                               \
-    HMAP_FOR_EACH (SHASH_NODE, struct shash_node, node, &(SHASH)->map)
+#define SHASH_FOR_EACH(SHASH_NODE, SHASH) \
+    HMAP_FOR_EACH (SHASH_NODE, node, &(SHASH)->map)
 
-#define SHASH_FOR_EACH_SAFE(SHASH_NODE, NEXT, SHASH)                \
-    HMAP_FOR_EACH_SAFE (SHASH_NODE, NEXT, struct shash_node, node,  \
-                        &(SHASH)->map)
+#define SHASH_FOR_EACH_SAFE(SHASH_NODE, NEXT, SHASH) \
+    HMAP_FOR_EACH_SAFE (SHASH_NODE, NEXT, node, &(SHASH)->map)
 
 void shash_init(struct shash *);
 void shash_destroy(struct shash *);
diff --git a/lib/unixctl.c b/lib/unixctl.c
index ac756a834..706b3e309 100644
--- a/lib/unixctl.c
+++ b/lib/unixctl.c
@@ -411,8 +411,7 @@ unixctl_server_run(struct unixctl_server *server)
         new_connection(server, fd);
     }
 
-    LIST_FOR_EACH_SAFE (conn, next,
-                        struct unixctl_conn, node, &server->conns) {
+    LIST_FOR_EACH_SAFE (conn, next, node, &server->conns) {
         int error = run_connection(conn);
         if (error && error != EAGAIN) {
             kill_connection(conn);
@@ -426,7 +425,7 @@ unixctl_server_wait(struct unixctl_server *server)
     struct unixctl_conn *conn;
 
     poll_fd_wait(server->fd, POLLIN);
-    LIST_FOR_EACH (conn, struct unixctl_conn, node, &server->conns) {
+    LIST_FOR_EACH (conn, node, &server->conns) {
         if (conn->state == S_RECV) {
             poll_fd_wait(conn->fd, POLLIN);
         } else if (conn->state == S_SEND) {
@@ -442,8 +441,7 @@ unixctl_server_destroy(struct unixctl_server *server)
     if (server) {
         struct unixctl_conn *conn, *next;
 
-        LIST_FOR_EACH_SAFE (conn, next,
-                            struct unixctl_conn, node, &server->conns) {
+        LIST_FOR_EACH_SAFE (conn, next, node, &server->conns) {
             kill_connection(conn);
         }
 
diff --git a/ofproto/ofproto-sflow.c b/ofproto/ofproto-sflow.c
index 076bd79c4..f129d38be 100644
--- a/ofproto/ofproto-sflow.c
+++ b/ofproto/ofproto-sflow.c
@@ -140,7 +140,7 @@ ofproto_sflow_find_port(const struct ofproto_sflow *os, uint16_t odp_port)
 {
     struct ofproto_sflow_port *osp;
 
-    HMAP_FOR_EACH_IN_BUCKET (osp, struct ofproto_sflow_port, hmap_node,
+    HMAP_FOR_EACH_IN_BUCKET (osp, hmap_node,
                              hash_int(odp_port, 0), &os->ports) {
         if (osp->odp_port == odp_port) {
             return osp;
@@ -297,8 +297,7 @@ ofproto_sflow_destroy(struct ofproto_sflow *os)
         struct ofproto_sflow_port *osp, *next;
 
         ofproto_sflow_clear(os);
-        HMAP_FOR_EACH_SAFE (osp, next, struct ofproto_sflow_port, hmap_node,
-                            &os->ports) {
+        HMAP_FOR_EACH_SAFE (osp, next, hmap_node, &os->ports) {
             ofproto_sflow_del_port__(os, osp);
         }
         hmap_destroy(&os->ports);
@@ -463,7 +462,7 @@ ofproto_sflow_set_options(struct ofproto_sflow *os,
                                MAX(1, UINT32_MAX / options->sampling_rate));
 
     /* Add samplers and pollers for the currently known ports. */
-    HMAP_FOR_EACH (osp, struct ofproto_sflow_port, hmap_node, &os->ports) {
+    HMAP_FOR_EACH (osp, hmap_node, &os->ports) {
         ofproto_sflow_add_poller(os, osp, osp->odp_port);
         ofproto_sflow_add_sampler(os, osp);
     }
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 68b2493de..c683e3b63 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -532,7 +532,7 @@ find_controller_by_target(struct ofproto *ofproto, const char *target)
 {
     struct ofconn *ofconn;
 
-    HMAP_FOR_EACH_WITH_HASH (ofconn, struct ofconn, hmap_node,
+    HMAP_FOR_EACH_WITH_HASH (ofconn, hmap_node,
                              hash_string(target, 0), &ofproto->controllers) {
         if (!strcmp(ofconn_get_target(ofconn), target)) {
             return ofconn;
@@ -557,7 +557,7 @@ update_in_band_remotes(struct ofproto *ofproto)
 
     /* Add all the remotes. */
     discovery = false;
-    HMAP_FOR_EACH (ofconn, struct ofconn, hmap_node, &ofproto->controllers) {
+    HMAP_FOR_EACH (ofconn, hmap_node, &ofproto->controllers) {
         struct sockaddr_in *sin = &addrs[n_addrs];
 
         if (ofconn->band == OFPROTO_OUT_OF_BAND) {
@@ -616,7 +616,7 @@ update_fail_open(struct ofproto *p)
 
         n = 0;
         rconns = xmalloc(hmap_count(&p->controllers) * sizeof *rconns);
-        HMAP_FOR_EACH (ofconn, struct ofconn, hmap_node, &p->controllers) {
+        HMAP_FOR_EACH (ofconn, hmap_node, &p->controllers) {
             rconns[n++] = ofconn->rconn;
         }
 
@@ -665,8 +665,7 @@ ofproto_set_controllers(struct ofproto *p,
     /* Delete controllers that are no longer configured.
      * Update configuration of all now-existing controllers. */
     ss_exists = false;
-    HMAP_FOR_EACH_SAFE (ofconn, next_ofconn, struct ofconn, hmap_node,
-                        &p->controllers) {
+    HMAP_FOR_EACH_SAFE (ofconn, next_ofconn, hmap_node, &p->controllers) {
         struct ofproto_controller *c;
 
         c = shash_find_data(&new_controllers, ofconn_get_target(ofconn));
@@ -682,8 +681,7 @@ ofproto_set_controllers(struct ofproto *p,
 
     /* Delete services that are no longer configured.
      * Update configuration of all now-existing services. */
-    HMAP_FOR_EACH_SAFE (ofservice, next_ofservice, struct ofservice, node,
-                        &p->services) {
+    HMAP_FOR_EACH_SAFE (ofservice, next_ofservice, node, &p->services) {
         struct ofproto_controller *c;
 
         c = shash_find_data(&new_controllers,
@@ -722,7 +720,7 @@ ofproto_reconnect_controllers(struct ofproto *ofproto)
 {
     struct ofconn *ofconn;
 
-    LIST_FOR_EACH (ofconn, struct ofconn, node, &ofproto->all_conns) {
+    LIST_FOR_EACH (ofconn, node, &ofproto->all_conns) {
         rconn_reconnect(ofconn->rconn);
     }
 }
@@ -890,7 +888,7 @@ ofproto_set_sflow(struct ofproto *ofproto,
 
             os = ofproto->sflow = ofproto_sflow_create(ofproto->dpif);
             refresh_port_groups(ofproto);
-            HMAP_FOR_EACH (ofport, struct ofport, hmap_node, &ofproto->ports) {
+            HMAP_FOR_EACH (ofport, hmap_node, &ofproto->ports) {
                 ofproto_sflow_add_port(os, ofport->odp_port,
                                        netdev_get_name(ofport->netdev));
             }
@@ -953,16 +951,14 @@ ofproto_destroy(struct ofproto *p)
     ofproto_flush_flows(p);
     classifier_destroy(&p->cls);
 
-    LIST_FOR_EACH_SAFE (ofconn, next_ofconn, struct ofconn, node,
-                        &p->all_conns) {
+    LIST_FOR_EACH_SAFE (ofconn, next_ofconn, node, &p->all_conns) {
         ofconn_destroy(ofconn);
     }
     hmap_destroy(&p->controllers);
 
     dpif_close(p->dpif);
     netdev_monitor_destroy(p->netdev_monitor);
-    HMAP_FOR_EACH_SAFE (ofport, next_ofport, struct ofport, hmap_node,
-                        &p->ports) {
+    HMAP_FOR_EACH_SAFE (ofport, next_ofport, hmap_node, &p->ports) {
         hmap_remove(&p->ports, &ofport->hmap_node);
         ofport_free(ofport);
     }
@@ -972,8 +968,7 @@ ofproto_destroy(struct ofproto *p)
     netflow_destroy(p->netflow);
     ofproto_sflow_destroy(p->sflow);
 
-    HMAP_FOR_EACH_SAFE (ofservice, next_ofservice, struct ofservice, node,
-                        &p->services) {
+    HMAP_FOR_EACH_SAFE (ofservice, next_ofservice, node, &p->services) {
         ofservice_destroy(p, ofservice);
     }
     hmap_destroy(&p->services);
@@ -1045,7 +1040,7 @@ add_snooper(struct ofproto *ofproto, struct vconn *vconn)
 
     /* Pick a controller for monitoring. */
     best = NULL;
-    LIST_FOR_EACH (ofconn, struct ofconn, node, &ofproto->all_conns) {
+    LIST_FOR_EACH (ofconn, node, &ofproto->all_conns) {
         if (ofconn->type == OFCONN_PRIMARY
             && (!best || snoop_preference(ofconn) > snoop_preference(best))) {
             best = ofconn;
@@ -1108,8 +1103,7 @@ ofproto_run1(struct ofproto *p)
         in_band_run(p->in_band);
     }
 
-    LIST_FOR_EACH_SAFE (ofconn, next_ofconn, struct ofconn, node,
-                        &p->all_conns) {
+    LIST_FOR_EACH_SAFE (ofconn, next_ofconn, node, &p->all_conns) {
         ofconn_run(ofconn, p);
     }
 
@@ -1119,7 +1113,7 @@ ofproto_run1(struct ofproto *p)
         fail_open_run(p->fail_open);
     }
 
-    HMAP_FOR_EACH (ofservice, struct ofservice, node, &p->services) {
+    HMAP_FOR_EACH (ofservice, node, &p->services) {
         struct vconn *vconn;
         int retval;
 
@@ -1215,7 +1209,7 @@ ofproto_wait(struct ofproto *p)
     dpif_recv_wait(p->dpif);
     dpif_port_poll_wait(p->dpif);
     netdev_monitor_poll_wait(p->netdev_monitor);
-    LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
+    LIST_FOR_EACH (ofconn, node, &p->all_conns) {
         ofconn_wait(ofconn);
     }
     if (p->in_band) {
@@ -1238,7 +1232,7 @@ ofproto_wait(struct ofproto *p)
     } else if (p->next_expiration != LLONG_MAX) {
         poll_timer_wait_until(p->next_expiration);
     }
-    HMAP_FOR_EACH (ofservice, struct ofservice, node, &p->services) {
+    HMAP_FOR_EACH (ofservice, node, &p->services) {
         pvconn_wait(ofservice->pvconn);
     }
     for (i = 0; i < p->n_snoops; i++) {
@@ -1352,7 +1346,7 @@ reinit_ports(struct ofproto *p)
     size_t i;
 
     svec_init(&devnames);
-    HMAP_FOR_EACH (ofport, struct ofport, hmap_node, &p->ports) {
+    HMAP_FOR_EACH (ofport, hmap_node, &p->ports) {
         svec_add (&devnames, (char *) ofport->opp.name);
     }
     dpif_port_list(p->dpif, &odp_ports, &n_odp_ports);
@@ -1379,7 +1373,7 @@ refresh_port_group(struct ofproto *p, unsigned int group)
 
     ports = xmalloc(hmap_count(&p->ports) * sizeof *ports);
     n_ports = 0;
-    HMAP_FOR_EACH (port, struct ofport, hmap_node, &p->ports) {
+    HMAP_FOR_EACH (port, hmap_node, &p->ports) {
         if (group == DP_GROUP_ALL || !(port->opp.config & OFPPC_NO_FLOOD)) {
             ports[n_ports++] = port->odp_port;
         }
@@ -1484,7 +1478,7 @@ send_port_status(struct ofproto *p, const struct ofport *ofport,
 {
     /* XXX Should limit the number of queued port status change messages. */
     struct ofconn *ofconn;
-    LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
+    LIST_FOR_EACH (ofconn, node, &p->all_conns) {
         struct ofp_port_status *ops;
         struct ofpbuf *b;
 
@@ -1542,7 +1536,7 @@ get_port(const struct ofproto *ofproto, uint16_t odp_port)
 {
     struct ofport *port;
 
-    HMAP_FOR_EACH_IN_BUCKET (port, struct ofport, hmap_node,
+    HMAP_FOR_EACH_IN_BUCKET (port, hmap_node,
                              hash_int(odp_port, 0), &ofproto->ports) {
         if (port->odp_port == odp_port) {
             return port;
@@ -1848,8 +1842,8 @@ ofservice_lookup(struct ofproto *ofproto, const char *target)
 {
     struct ofservice *ofservice;
 
-    HMAP_FOR_EACH_WITH_HASH (ofservice, struct ofservice, node,
-                             hash_string(target, 0), &ofproto->services) {
+    HMAP_FOR_EACH_WITH_HASH (ofservice, node, hash_string(target, 0),
+                             &ofproto->services) {
         if (!strcmp(pvconn_get_name(ofservice->pvconn), target)) {
             return ofservice;
         }
@@ -1912,7 +1906,7 @@ rule_destroy(struct ofproto *ofproto, struct rule *rule)
 {
     if (!rule->super) {
         struct rule *subrule, *next;
-        LIST_FOR_EACH_SAFE (subrule, next, struct rule, list, &rule->list) {
+        LIST_FOR_EACH_SAFE (subrule, next, list, &rule->list) {
             revalidate_rule(ofproto, subrule);
         }
     } else {
@@ -2374,7 +2368,7 @@ handle_features_request(struct ofproto *p, struct ofconn *ofconn,
                          (1u << OFPAT_SET_TP_DST) |
                          (1u << OFPAT_ENQUEUE));
 
-    HMAP_FOR_EACH (port, struct ofport, hmap_node, &p->ports) {
+    HMAP_FOR_EACH (port, hmap_node, &p->ports) {
         hton_ofp_phy_port(ofpbuf_put(buf, &port->opp, sizeof port->opp));
     }
 
@@ -3136,7 +3130,7 @@ handle_port_stats_request(struct ofproto *p, struct ofconn *ofconn,
             append_port_stat(port, ofconn, &msg);
         }
     } else {
-        HMAP_FOR_EACH (port, struct ofport, hmap_node, &p->ports) {
+        HMAP_FOR_EACH (port, hmap_node, &p->ports) {
             append_port_stat(port, ofconn, &msg);
         }
     }
@@ -3181,7 +3175,7 @@ query_stats(struct ofproto *p, struct rule *rule,
     odp_flows = xzalloc(n_odp_flows * sizeof *odp_flows);
     if (rule->cr.wc.wildcards) {
         size_t i = 0;
-        LIST_FOR_EACH (subrule, struct rule, list, &rule->list) {
+        LIST_FOR_EACH (subrule, list, &rule->list) {
             odp_flows[i++].key = subrule->cr.flow;
             packet_count += subrule->packet_count;
             byte_count += subrule->byte_count;
@@ -3468,7 +3462,7 @@ handle_queue_stats_request(struct ofproto *ofproto, struct ofconn *ofconn,
     port_no = ntohs(qsr->port_no);
     queue_id = ntohl(qsr->queue_id);
     if (port_no == OFPP_ALL) {
-        HMAP_FOR_EACH (port, struct ofport, hmap_node, &ofproto->ports) {
+        HMAP_FOR_EACH (port, hmap_node, &ofproto->ports) {
             handle_queue_stats_for_port(port, queue_id, &cbdata);
         }
     } else if (port_no < ofproto->max_ports) {
@@ -3955,8 +3949,7 @@ handle_role_request(struct ofproto *ofproto,
     if (role == NX_ROLE_MASTER) {
         struct ofconn *other;
 
-        HMAP_FOR_EACH (other, struct ofconn, hmap_node,
-                       &ofproto->controllers) {
+        HMAP_FOR_EACH (other, hmap_node, &ofproto->controllers) {
             if (other->role == NX_ROLE_MASTER) {
                 other->role = NX_ROLE_SLAVE;
             }
@@ -4293,7 +4286,7 @@ send_flow_removed(struct ofproto *p, struct rule *rule,
      * requests that would not add new flows, so it is imperfect.) */
 
     prev = NULL;
-    LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
+    LIST_FOR_EACH (ofconn, node, &p->all_conns) {
         if (rule->send_flow_removed && rconn_is_connected(ofconn->rconn)
             && ofconn_receives_async_msgs(ofconn)) {
             if (prev) {
@@ -4343,7 +4336,7 @@ expire_rule(struct cls_rule *cls_rule, void *p_)
      * due to an idle timeout. */
     if (rule->cr.wc.wildcards) {
         struct rule *subrule, *next;
-        LIST_FOR_EACH_SAFE (subrule, next, struct rule, list, &rule->list) {
+        LIST_FOR_EACH_SAFE (subrule, next, list, &rule->list) {
             rule_remove(p, subrule);
         }
     } else {
@@ -4556,7 +4549,7 @@ send_packet_in(struct ofproto *ofproto, struct ofpbuf *packet)
     max_len = do_convert_to_packet_in(packet);
 
     prev = NULL;
-    LIST_FOR_EACH (ofconn, struct ofconn, node, &ofproto->all_conns) {
+    LIST_FOR_EACH (ofconn, node, &ofproto->all_conns) {
         if (ofconn_receives_async_msgs(ofconn)) {
             if (prev) {
                 schedule_packet_in(prev, packet, max_len, true);
diff --git a/ofproto/status.c b/ofproto/status.c
index 0df9ce3d9..e4834d84b 100644
--- a/ofproto/status.c
+++ b/ofproto/status.c
@@ -70,7 +70,7 @@ switch_status_handle_request(struct switch_status *ss, struct rconn *rconn,
     sr.request.string = (void *) (request + 1);
     sr.request.length = ntohs(request->header.length) - sizeof *request;
     ds_init(&sr.output);
-    LIST_FOR_EACH (c, struct status_category, node, &ss->categories) {
+    LIST_FOR_EACH (c, node, &ss->categories) {
         if (!memcmp(c->name, sr.request.string,
                     MIN(strlen(c->name), sr.request.length))) {
             sr.category = c;
@@ -170,8 +170,7 @@ switch_status_destroy(struct switch_status *ss)
         /* Orphan any remaining categories, so that unregistering them later
          * won't write to bad memory. */
         struct status_category *c, *next;
-        LIST_FOR_EACH_SAFE (c, next,
-                            struct status_category, node, &ss->categories) {
+        LIST_FOR_EACH_SAFE (c, next, node, &ss->categories) {
             list_init(&c->node);
         }
         switch_status_unregister(ss->config_cat);
diff --git a/ovsdb/file.c b/ovsdb/file.c
index 846f55668..ddb443a11 100644
--- a/ovsdb/file.c
+++ b/ovsdb/file.c
@@ -427,7 +427,7 @@ ovsdb_file_save_copy__(const char *file_name, int locking,
         const struct ovsdb_table *table = node->data;
         const struct ovsdb_row *row;
 
-        HMAP_FOR_EACH (row, struct ovsdb_row, hmap_node, &table->rows) {
+        HMAP_FOR_EACH (row, hmap_node, &table->rows) {
             ovsdb_file_txn_add_row(&ftxn, NULL, row, NULL);
         }
     }
diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c
index 71a44899e..d58f9dc17 100644
--- a/ovsdb/jsonrpc-server.c
+++ b/ovsdb/jsonrpc-server.c
@@ -323,8 +323,7 @@ ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *remote)
 {
     struct ovsdb_jsonrpc_session *s, *next;
 
-    LIST_FOR_EACH_SAFE (s, next, struct ovsdb_jsonrpc_session, node,
-                        &remote->sessions) {
+    LIST_FOR_EACH_SAFE (s, next, node, &remote->sessions) {
         int error = ovsdb_jsonrpc_session_run(s);
         if (error) {
             ovsdb_jsonrpc_session_close(s);
@@ -346,7 +345,7 @@ ovsdb_jsonrpc_session_wait_all(struct ovsdb_jsonrpc_remote *remote)
 {
     struct ovsdb_jsonrpc_session *s;
 
-    LIST_FOR_EACH (s, struct ovsdb_jsonrpc_session, node, &remote->sessions) {
+    LIST_FOR_EACH (s, node, &remote->sessions) {
         ovsdb_jsonrpc_session_wait(s);
     }
 }
@@ -356,8 +355,7 @@ ovsdb_jsonrpc_session_close_all(struct ovsdb_jsonrpc_remote *remote)
 {
     struct ovsdb_jsonrpc_session *s, *next;
 
-    LIST_FOR_EACH_SAFE (s, next, struct ovsdb_jsonrpc_session, node,
-                        &remote->sessions) {
+    LIST_FOR_EACH_SAFE (s, next, node, &remote->sessions) {
         ovsdb_jsonrpc_session_close(s);
     }
 }
@@ -369,8 +367,7 @@ ovsdb_jsonrpc_session_reconnect_all(struct ovsdb_jsonrpc_remote *remote)
 {
     struct ovsdb_jsonrpc_session *s, *next;
 
-    LIST_FOR_EACH_SAFE (s, next, struct ovsdb_jsonrpc_session, node,
-                        &remote->sessions) {
+    LIST_FOR_EACH_SAFE (s, next, node, &remote->sessions) {
         jsonrpc_session_force_reconnect(s->js);
         if (!jsonrpc_session_is_alive(s->js)) {
             ovsdb_jsonrpc_session_close(s);
@@ -554,8 +551,7 @@ ovsdb_jsonrpc_trigger_find(struct ovsdb_jsonrpc_session *s,
 {
     struct ovsdb_jsonrpc_trigger *t;
 
-    HMAP_FOR_EACH_WITH_HASH (t, struct ovsdb_jsonrpc_trigger, hmap_node, hash,
-                             &s->triggers) {
+    HMAP_FOR_EACH_WITH_HASH (t, hmap_node, hash, &s->triggers) {
         if (json_equal(t->id, id)) {
             return t;
         }
@@ -593,8 +589,7 @@ static void
 ovsdb_jsonrpc_trigger_complete_all(struct ovsdb_jsonrpc_session *s)
 {
     struct ovsdb_jsonrpc_trigger *t, *next;
-    HMAP_FOR_EACH_SAFE (t, next, struct ovsdb_jsonrpc_trigger, hmap_node,
-                        &s->triggers) {
+    HMAP_FOR_EACH_SAFE (t, next, hmap_node, &s->triggers) {
         ovsdb_jsonrpc_trigger_complete(t);
     }
 }
@@ -671,8 +666,7 @@ ovsdb_jsonrpc_monitor_find(struct ovsdb_jsonrpc_session *s,
 {
     struct ovsdb_jsonrpc_monitor *m;
 
-    HMAP_FOR_EACH_WITH_HASH (m, struct ovsdb_jsonrpc_monitor, node,
-                             json_hash(monitor_id, 0), &s->monitors) {
+    HMAP_FOR_EACH_WITH_HASH (m, node, json_hash(monitor_id, 0), &s->monitors) {
         if (json_equal(m->monitor_id, monitor_id)) {
             return m;
         }
@@ -919,8 +913,7 @@ ovsdb_jsonrpc_monitor_remove_all(struct ovsdb_jsonrpc_session *s)
 {
     struct ovsdb_jsonrpc_monitor *m, *next;
 
-    HMAP_FOR_EACH_SAFE (m, next,
-                        struct ovsdb_jsonrpc_monitor, node, &s->monitors) {
+    HMAP_FOR_EACH_SAFE (m, next, node, &s->monitors) {
         ovsdb_remove_replica(s->remote->server->db, &m->replica);
     }
 }
@@ -1097,8 +1090,7 @@ ovsdb_jsonrpc_monitor_get_initial(const struct ovsdb_jsonrpc_monitor *m)
         if (mt->select & OJMS_INITIAL) {
             struct ovsdb_row *row;
 
-            HMAP_FOR_EACH (row, struct ovsdb_row, hmap_node,
-                           &mt->table->rows) {
+            HMAP_FOR_EACH (row, hmap_node, &mt->table->rows) {
                 ovsdb_jsonrpc_monitor_change_cb(NULL, row, NULL, &aux);
             }
         }
diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c
index 27db0702c..e0c9690df 100644
--- a/ovsdb/ovsdb-server.c
+++ b/ovsdb/ovsdb-server.c
@@ -222,7 +222,7 @@ query_db_string(const struct ovsdb *db, const char *name)
 
         parse_db_string_column(db, name, &table, &column);
 
-        HMAP_FOR_EACH (row, struct ovsdb_row, hmap_node, &table->rows) {
+        HMAP_FOR_EACH (row, hmap_node, &table->rows) {
             const struct ovsdb_datum *datum;
             size_t i;
 
@@ -248,7 +248,7 @@ query_db_remotes(const char *name, const struct ovsdb *db,
 
     parse_db_string_column(db, name, &table, &column);
 
-    HMAP_FOR_EACH (row, struct ovsdb_row, hmap_node, &table->rows) {
+    HMAP_FOR_EACH (row, hmap_node, &table->rows) {
         const struct ovsdb_datum *datum;
         size_t i;
 
diff --git a/ovsdb/query.c b/ovsdb/query.c
index 878ac5b2d..52eda0a7c 100644
--- a/ovsdb/query.c
+++ b/ovsdb/query.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2009 Nicira Networks
+/* Copyright (c) 2009, 2010 Nicira Networks
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -41,8 +41,7 @@ ovsdb_query(struct ovsdb_table *table, const struct ovsdb_condition *cnd,
         /* Linear scan. */
         const struct ovsdb_row *row, *next;
 
-        HMAP_FOR_EACH_SAFE (row, next, struct ovsdb_row, hmap_node,
-                            &table->rows) {
+        HMAP_FOR_EACH_SAFE (row, next, hmap_node, &table->rows) {
             if (ovsdb_condition_evaluate(row, cnd) && !output_row(row, aux)) {
                 break;
             }
@@ -90,8 +89,7 @@ ovsdb_query_distinct(struct ovsdb_table *table,
 
         ovsdb_row_hash_init(&hash, columns);
         ovsdb_query(table, condition, query_distinct_cb, &hash);
-        HMAP_FOR_EACH (node, struct ovsdb_row_hash_node, hmap_node,
-                       &hash.rows) {
+        HMAP_FOR_EACH (node, hmap_node, &hash.rows) {
             ovsdb_row_set_add_row(results, node->row);
         }
         ovsdb_row_hash_destroy(&hash, false);
diff --git a/ovsdb/row.c b/ovsdb/row.c
index 5043cbc04..ba00bb9f3 100644
--- a/ovsdb/row.c
+++ b/ovsdb/row.c
@@ -82,15 +82,13 @@ ovsdb_row_destroy(struct ovsdb_row *row)
         struct ovsdb_weak_ref *weak, *next;
         const struct shash_node *node;
 
-        LIST_FOR_EACH_SAFE (weak, next, struct ovsdb_weak_ref, dst_node,
-                            &row->dst_refs) {
+        LIST_FOR_EACH_SAFE (weak, next, dst_node, &row->dst_refs) {
             list_remove(&weak->src_node);
             list_remove(&weak->dst_node);
             free(weak);
         }
 
-        LIST_FOR_EACH_SAFE (weak, next, struct ovsdb_weak_ref, src_node,
-                            &row->src_refs) {
+        LIST_FOR_EACH_SAFE (weak, next, src_node, &row->src_refs) {
             list_remove(&weak->src_node);
             list_remove(&weak->dst_node);
             free(weak);
@@ -326,8 +324,7 @@ ovsdb_row_hash_destroy(struct ovsdb_row_hash *rh, bool destroy_rows)
 {
     struct ovsdb_row_hash_node *node, *next;
 
-    HMAP_FOR_EACH_SAFE (node, next, struct ovsdb_row_hash_node, hmap_node,
-                        &rh->rows) {
+    HMAP_FOR_EACH_SAFE (node, next, hmap_node, &rh->rows) {
         hmap_remove(&rh->rows, &node->hmap_node);
         if (destroy_rows) {
             ovsdb_row_destroy((struct ovsdb_row *) node->row);
@@ -360,7 +357,7 @@ ovsdb_row_hash_contains_all(const struct ovsdb_row_hash *a,
     struct ovsdb_row_hash_node *node;
 
     assert(ovsdb_column_set_equals(&a->columns, &b->columns));
-    HMAP_FOR_EACH (node, struct ovsdb_row_hash_node, hmap_node, &b->rows) {
+    HMAP_FOR_EACH (node, hmap_node, &b->rows) {
         if (!ovsdb_row_hash_contains__(a, node->row, node->hmap_node.hash)) {
             return false;
         }
@@ -380,8 +377,7 @@ ovsdb_row_hash_contains__(const struct ovsdb_row_hash *rh,
                           const struct ovsdb_row *row, size_t hash)
 {
     struct ovsdb_row_hash_node *node;
-    HMAP_FOR_EACH_WITH_HASH (node, struct ovsdb_row_hash_node, hmap_node,
-                             hash, &rh->rows) {
+    HMAP_FOR_EACH_WITH_HASH (node, hmap_node, hash, &rh->rows) {
         if (ovsdb_row_equal_columns(row, node->row, &rh->columns)) {
             return true;
         }
diff --git a/ovsdb/table.c b/ovsdb/table.c
index 6a4e7ae2f..5e83683b7 100644
--- a/ovsdb/table.c
+++ b/ovsdb/table.c
@@ -210,8 +210,7 @@ ovsdb_table_destroy(struct ovsdb_table *table)
     if (table) {
         struct ovsdb_row *row, *next;
 
-        HMAP_FOR_EACH_SAFE (row, next, struct ovsdb_row, hmap_node,
-                            &table->rows) {
+        HMAP_FOR_EACH_SAFE (row, next, hmap_node, &table->rows) {
             ovsdb_row_destroy(row);
         }
         hmap_destroy(&table->rows);
@@ -226,8 +225,7 @@ ovsdb_table_get_row(const struct ovsdb_table *table, const struct uuid *uuid)
 {
     struct ovsdb_row *row;
 
-    HMAP_FOR_EACH_WITH_HASH (row, struct ovsdb_row, hmap_node, uuid_hash(uuid),
-                             &table->rows) {
+    HMAP_FOR_EACH_WITH_HASH (row, hmap_node, uuid_hash(uuid), &table->rows) {
         if (uuid_equals(ovsdb_row_get_uuid(row), uuid)) {
             return row;
         }
diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c
index bfa2fcbad..b26705a3a 100644
--- a/ovsdb/transaction.c
+++ b/ovsdb/transaction.c
@@ -138,7 +138,7 @@ find_txn_row(const struct ovsdb_table *table, const struct uuid *uuid)
         return NULL;
     }
 
-    HMAP_FOR_EACH_WITH_HASH (txn_row, struct ovsdb_txn_row, hmap_node,
+    HMAP_FOR_EACH_WITH_HASH (txn_row, hmap_node,
                              uuid_hash(uuid), &table->txn_table->txn_rows) {
         const struct ovsdb_row *row;
 
@@ -315,8 +315,7 @@ assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row)
          * that their weak references will get reassessed. */
         struct ovsdb_weak_ref *weak, *next;
 
-        LIST_FOR_EACH_SAFE (weak, next, struct ovsdb_weak_ref, dst_node,
-                            &txn_row->old->dst_refs) {
+        LIST_FOR_EACH_SAFE (weak, next, dst_node, &txn_row->old->dst_refs) {
             if (!weak->src->txn_row) {
                 ovsdb_txn_row_modify(txn, weak->src);
             }
@@ -451,7 +450,7 @@ check_max_rows(struct ovsdb_txn *txn)
 {
     struct ovsdb_txn_table *t;
 
-    LIST_FOR_EACH (t, struct ovsdb_txn_table, node, &txn->txn_tables) {
+    LIST_FOR_EACH (t, node, &txn->txn_tables) {
         size_t n_rows = hmap_count(&t->table->rows);
         unsigned int max_rows = t->table->schema->max_rows;
 
@@ -508,7 +507,7 @@ ovsdb_txn_commit(struct ovsdb_txn *txn, bool durable)
     }
 
     /* Send the commit to each replica. */
-    LIST_FOR_EACH (replica, struct ovsdb_replica, node, &txn->db->replicas) {
+    LIST_FOR_EACH (replica, node, &txn->db->replicas) {
         error = (replica->class->commit)(replica, txn, durable);
         if (error) {
             /* We don't support two-phase commit so only the first replica is
@@ -535,8 +534,8 @@ ovsdb_txn_for_each_change(const struct ovsdb_txn *txn,
     struct ovsdb_txn_table *t;
     struct ovsdb_txn_row *r;
 
-    LIST_FOR_EACH (t, struct ovsdb_txn_table, node, &txn->txn_tables) {
-        HMAP_FOR_EACH (r, struct ovsdb_txn_row, hmap_node, &t->txn_rows) {
+    LIST_FOR_EACH (t, node, &txn->txn_tables) {
+        HMAP_FOR_EACH (r, hmap_node, &t->txn_rows) {
             if (!cb(r->old, r->new, r->changed, aux)) {
                 break;
             }
@@ -714,8 +713,7 @@ for_each_txn_row(struct ovsdb_txn *txn,
         struct ovsdb_txn_table *t, *next_txn_table;
 
         any_work = false;
-        LIST_FOR_EACH_SAFE (t, next_txn_table, struct ovsdb_txn_table, node,
-                            &txn->txn_tables) {
+        LIST_FOR_EACH_SAFE (t, next_txn_table, node, &txn->txn_tables) {
             if (t->serial != serial) {
                 t->serial = serial;
                 t->n_processed = 0;
@@ -724,9 +722,7 @@ for_each_txn_row(struct ovsdb_txn *txn,
             while (t->n_processed < hmap_count(&t->txn_rows)) {
                 struct ovsdb_txn_row *r, *next_txn_row;
 
-                HMAP_FOR_EACH_SAFE (r, next_txn_row,
-                                    struct ovsdb_txn_row, hmap_node,
-                                    &t->txn_rows) {
+                HMAP_FOR_EACH_SAFE (r, next_txn_row, hmap_node, &t->txn_rows) {
                     if (r->serial != serial) {
                         struct ovsdb_error *error;
 
diff --git a/ovsdb/trigger.c b/ovsdb/trigger.c
index 47719698c..c222d895c 100644
--- a/ovsdb/trigger.c
+++ b/ovsdb/trigger.c
@@ -73,7 +73,7 @@ ovsdb_trigger_run(struct ovsdb *db, long long int now)
 
     run_triggers = db->run_triggers;
     db->run_triggers = false;
-    LIST_FOR_EACH_SAFE (t, next, struct ovsdb_trigger, node, &db->triggers) {
+    LIST_FOR_EACH_SAFE (t, next, node, &db->triggers) {
         if (run_triggers || now - t->created >= t->timeout_msec) {
             ovsdb_trigger_try(db, t, now);
         }
@@ -89,7 +89,7 @@ ovsdb_trigger_wait(struct ovsdb *db, long long int now)
         long long int deadline = LLONG_MAX;
         struct ovsdb_trigger *t;
 
-        LIST_FOR_EACH (t, struct ovsdb_trigger, node, &db->triggers) {
+        LIST_FOR_EACH (t, node, &db->triggers) {
             if (t->created < LLONG_MAX - t->timeout_msec) {
                 long long int t_deadline = t->created + t->timeout_msec;
                 if (deadline > t_deadline) {
diff --git a/tests/test-classifier.c b/tests/test-classifier.c
index 57a1e2c2c..4227c1822 100644
--- a/tests/test-classifier.c
+++ b/tests/test-classifier.c
@@ -430,7 +430,7 @@ check_tables(const struct classifier *cls,
         if (!hmap_is_empty(&cls->tables[i])) {
             found_tables++;
         }
-        HMAP_FOR_EACH (bucket, struct cls_bucket, hmap_node, &cls->tables[i]) {
+        HMAP_FOR_EACH (bucket, hmap_node, &cls->tables[i]) {
             found_buckets++;
             assert(!list_is_empty(&bucket->rules));
             found_rules += list_size(&bucket->rules);
diff --git a/tests/test-hmap.c b/tests/test-hmap.c
index 18d8f461d..be6bcf4a7 100644
--- a/tests/test-hmap.c
+++ b/tests/test-hmap.c
@@ -56,7 +56,7 @@ check_hmap(struct hmap *hmap, const int values[], size_t n,
     hmap_values = xmalloc(sizeof *sort_values * n);
 
     i = 0;
-    HMAP_FOR_EACH (e, struct element, node, hmap) {
+    HMAP_FOR_EACH (e, node, hmap) {
         assert(i < n);
         hmap_values[i++] = e->value;
     }
@@ -77,8 +77,7 @@ check_hmap(struct hmap *hmap, const int values[], size_t n,
     for (i = 0; i < n; i++) {
         size_t count = 0;
 
-        HMAP_FOR_EACH_WITH_HASH (e, struct element, node,
-                                 hash(values[i]), hmap) {
+        HMAP_FOR_EACH_WITH_HASH (e, node, hash(values[i]), hmap) {
             count += e->value == values[i];
         }
         assert(count == 1);
@@ -124,7 +123,7 @@ print_hmap(const char *name, struct hmap *hmap)
     struct element *e;
 
     printf("%s:", name);
-    HMAP_FOR_EACH (e, struct element, node, hmap) {
+    HMAP_FOR_EACH (e, node, hmap) {
         printf(" %d(%zu)", e->value, e->node.hash & hmap->mask);
     }
     printf("\n");
@@ -242,7 +241,7 @@ test_hmap_for_each_safe(hash_func *hash)
 
             i = 0;
             n_remaining = n;
-            HMAP_FOR_EACH_SAFE (e, next, struct element, node, &hmap) {
+            HMAP_FOR_EACH_SAFE (e, next, node, &hmap) {
                 assert(i < n);
                 if (pattern & (1ul << e->value)) {
                     size_t j;
diff --git a/tests/test-list.c b/tests/test-list.c
index 0efdbfce4..5e62e0c00 100644
--- a/tests/test-list.c
+++ b/tests/test-list.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2009 Nicira Networks.
+ * Copyright (c) 2008, 2009, 2010 Nicira Networks.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -55,7 +55,7 @@ check_list(struct list *list, const int values[], size_t n)
     size_t i;
 
     i = 0;
-    LIST_FOR_EACH (e, struct element, node, list) {
+    LIST_FOR_EACH (e, node, list) {
         assert(i < n);
         assert(e->value == values[i]);
         i++;
@@ -64,7 +64,7 @@ check_list(struct list *list, const int values[], size_t n)
     assert(i == n);
 
     i = 0;
-    LIST_FOR_EACH_REVERSE (e, struct element, node, list) {
+    LIST_FOR_EACH_REVERSE (e, node, list) {
         assert(i < n);
         assert(e->value == values[n - i - 1]);
         i++;
@@ -84,7 +84,7 @@ print_list(const char *name, struct list *list)
     struct element *e;
 
     printf("%s:", name);
-    LIST_FOR_EACH (e, struct element, node, list) {
+    LIST_FOR_EACH (e, node, list) {
         printf(" %d", e->value);
     }
     printf("\n");
@@ -131,7 +131,7 @@ test_list_for_each_safe(void)
             i = 0;
             values_idx = 0;
             n_remaining = n;
-            LIST_FOR_EACH_SAFE (e, next, struct element, node, &list) {
+            LIST_FOR_EACH_SAFE (e, next, node, &list) {
                 assert(i < n);
                 if (pattern & (1ul << i)) {
                     list_remove(&e->node);
diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c
index 04db65421..cdc939bb4 100644
--- a/tests/test-ovsdb.c
+++ b/tests/test-ovsdb.c
@@ -1468,8 +1468,7 @@ do_transact_print(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
     n_rows = hmap_count(&do_transact_table->rows);
     rows = xmalloc(n_rows * sizeof *rows);
     i = 0;
-    HMAP_FOR_EACH (row, struct ovsdb_row, hmap_node,
-                   &do_transact_table->rows) {
+    HMAP_FOR_EACH (row, hmap_node, &do_transact_table->rows) {
         rows[i++] = row;
     }
     assert(i == n_rows);
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index d16f0c339..60a0d6e1c 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -575,7 +575,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
     /* Collect old and new bridges. */
     shash_init(&old_br);
     shash_init(&new_br);
-    LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+    LIST_FOR_EACH (br, node, &all_bridges) {
         shash_add(&old_br, br->name, br);
     }
     for (i = 0; i < ovs_cfg->n_bridges; i++) {
@@ -586,7 +586,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
     }
 
     /* Get rid of deleted bridges and add new bridges. */
-    LIST_FOR_EACH_SAFE (br, next, struct bridge, node, &all_bridges) {
+    LIST_FOR_EACH_SAFE (br, next, node, &all_bridges) {
         struct ovsrec_bridge *br_cfg = shash_find_data(&new_br, br->name);
         if (br_cfg) {
             br->cfg = br_cfg;
@@ -613,7 +613,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
     shash_destroy(&new_br);
 
     /* Reconfigure all bridges. */
-    LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+    LIST_FOR_EACH (br, node, &all_bridges) {
         bridge_reconfigure_one(br);
     }
 
@@ -622,7 +622,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
      * The kernel will reject any attempt to add a given port to a datapath if
      * that port already belongs to a different datapath, so we must do all
      * port deletions before any port additions. */
-    LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+    LIST_FOR_EACH (br, node, &all_bridges) {
         struct odp_port *dpif_ports;
         size_t n_dpif_ports;
         struct shash want_ifaces;
@@ -644,7 +644,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
         shash_destroy(&want_ifaces);
         free(dpif_ports);
     }
-    LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+    LIST_FOR_EACH (br, node, &all_bridges) {
         struct odp_port *dpif_ports;
         size_t n_dpif_ports;
         struct shash cur_ifaces, want_ifaces;
@@ -693,7 +693,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
         shash_destroy(&want_ifaces);
     }
     sflow_bridge_number = 0;
-    LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+    LIST_FOR_EACH (br, node, &all_bridges) {
         uint8_t ea[8];
         uint64_t dpid;
         struct iface *local_iface;
@@ -830,7 +830,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
          * the datapath ID before the controller. */
         bridge_reconfigure_remotes(br, managers, n_managers);
     }
-    LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+    LIST_FOR_EACH (br, node, &all_bridges) {
         for (i = 0; i < br->n_ports; i++) {
             struct port *port = br->ports[i];
             int j;
@@ -843,7 +843,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
             }
         }
     }
-    LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+    LIST_FOR_EACH (br, node, &all_bridges) {
         iterate_and_prune_ifaces(br, set_iface_properties, NULL);
     }
 
@@ -1137,7 +1137,7 @@ bridge_run(void)
 
     /* Let each bridge do the work that it needs to do. */
     datapath_destroyed = false;
-    LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+    LIST_FOR_EACH (br, node, &all_bridges) {
         int error = bridge_run_one(br);
         if (error) {
             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
@@ -1187,7 +1187,7 @@ bridge_run(void)
             struct ovsdb_idl_txn *txn;
 
             txn = ovsdb_idl_txn_create(idl);
-            LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+            LIST_FOR_EACH (br, node, &all_bridges) {
                 size_t i;
 
                 for (i = 0; i < br->n_ports; i++) {
@@ -1214,7 +1214,7 @@ bridge_wait(void)
 {
     struct bridge *br;
 
-    LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+    LIST_FOR_EACH (br, node, &all_bridges) {
         ofproto_wait(br->ofproto);
         if (ofproto_has_primary_controller(br->ofproto)) {
             continue;
@@ -1273,7 +1273,7 @@ bridge_unixctl_fdb_show(struct unixctl_conn *conn,
     }
 
     ds_put_cstr(&ds, " port  VLAN  MAC                Age\n");
-    LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
+    LIST_FOR_EACH (e, lru_node, &br->ml->lrus) {
         if (e->port < 0 || e->port >= br->n_ports) {
             continue;
         }
@@ -1365,7 +1365,7 @@ bridge_lookup(const char *name)
 {
     struct bridge *br;
 
-    LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+    LIST_FOR_EACH (br, node, &all_bridges) {
         if (!strcmp(br->name, name)) {
             return br;
         }
@@ -1411,7 +1411,7 @@ bridge_unixctl_reconnect(struct unixctl_conn *conn,
         }
         ofproto_reconnect_controllers(br->ofproto);
     } else {
-        LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+        LIST_FOR_EACH (br, node, &all_bridges) {
             ofproto_reconnect_controllers(br->ofproto);
         }
     }
@@ -2934,7 +2934,7 @@ bond_send_learning_packets(struct port *port)
 
     ofpbuf_init(&packet, 128);
     error = n_packets = n_errors = 0;
-    LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
+    LIST_FOR_EACH (e, lru_node, &br->ml->lrus) {
         union ofp_action actions[2], *a;
         uint16_t dp_ifidx;
         tag_type tags = 0;
@@ -2996,7 +2996,7 @@ bond_unixctl_list(struct unixctl_conn *conn,
 
     ds_put_cstr(&ds, "bridge\tbond\tslaves\n");
 
-    LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+    LIST_FOR_EACH (br, node, &all_bridges) {
         size_t i;
 
         for (i = 0; i < br->n_ports; i++) {
@@ -3025,7 +3025,7 @@ bond_find(const char *name)
 {
     const struct bridge *br;
 
-    LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+    LIST_FOR_EACH (br, node, &all_bridges) {
         size_t i;
 
         for (i = 0; i < br->n_ports; i++) {
@@ -3085,8 +3085,7 @@ bond_unixctl_show(struct unixctl_conn *conn,
                           hash, be->tx_bytes / 1024);
 
             /* MACs. */
-            LIST_FOR_EACH (me, struct mac_entry, lru_node,
-                           &port->bridge->ml->lrus) {
+            LIST_FOR_EACH (me, lru_node, &port->bridge->ml->lrus) {
                 uint16_t dp_ifidx;
                 tag_type tags = 0;
                 if (bond_hash(me->mac) == hash
@@ -3768,7 +3767,7 @@ iface_from_dp_ifidx(const struct bridge *br, uint16_t dp_ifidx)
 {
     struct iface *iface;
 
-    HMAP_FOR_EACH_IN_BUCKET (iface, struct iface, dp_ifidx_node,
+    HMAP_FOR_EACH_IN_BUCKET (iface, dp_ifidx_node,
                              hash_int(dp_ifidx, 0), &br->ifaces) {
         if (iface->dp_ifidx == dp_ifidx) {
             return iface;
diff --git a/vswitchd/proc-net-compat.c b/vswitchd/proc-net-compat.c
index 6a8f8756e..0eda6d258 100644
--- a/vswitchd/proc-net-compat.c
+++ b/vswitchd/proc-net-compat.c
@@ -248,8 +248,7 @@ proc_net_compat_update_vlan(const char *tagged_dev, const char *trunk_dev,
         /* 'tagged_dev' is not attached to any compat_vlan.  Find the
          * compat_vlan corresponding to (trunk_dev,vid) to attach it to, or
          * create a new compat_vlan if none exists for (trunk_dev,vid). */
-        HMAP_FOR_EACH_WITH_HASH (vlan, struct compat_vlan, trunk_node,
-                                 hash_vlan(trunk_dev, vid),
+        HMAP_FOR_EACH_WITH_HASH (vlan, trunk_node, hash_vlan(trunk_dev, vid),
                                  &vlans_by_trunk) {
             if (!strcmp(trunk_dev, vlan->trunk_dev) && vid == vlan->vid) {
                 break;
@@ -340,7 +339,7 @@ update_vlan_config(void)
     ds_init(&ds);
     ds_put_cstr(&ds, "VLAN Dev name     | VLAN ID\n"
                 "Name-Type: VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD\n");
-    HMAP_FOR_EACH (vlan, struct compat_vlan, trunk_node, &vlans_by_trunk) {
+    HMAP_FOR_EACH (vlan, trunk_node, &vlans_by_trunk) {
         ds_put_format(&ds, "%-15s| %d  | %s\n",
                       vlan->vlan_dev, vlan->vid, vlan->trunk_dev);
     }
-- 
cgit v1.2.1


From d2805da2cb2256e9e2efc5074fbe8df55408213f Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 16 Sep 2010 15:36:57 -0700
Subject: ovs-ofctl: Add "queue-stats" command to print queue stats.

---
 lib/ofp-print.c          | 56 ++++++++++++++++++++++++++++++++++++++++++++++++
 utilities/ovs-ofctl.8.in |  9 ++++++++
 utilities/ovs-ofctl.c    | 26 ++++++++++++++++++++++
 3 files changed, 91 insertions(+)

diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index 78f3649ba..5cbfe6c04 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -38,6 +38,7 @@
 #include "xtoxll.h"
 
 static void ofp_print_port_name(struct ds *string, uint16_t port);
+static void ofp_print_queue_name(struct ds *string, uint32_t port);
 
 /* Returns a string that represents the contents of the Ethernet frame in the
  * 'len' bytes starting at 'data' to 'stream' as output by tcpdump.
@@ -1165,6 +1166,53 @@ ofp_table_stats_reply(struct ds *string, const void *body, size_t len,
      }
 }
 
+static void
+ofp_print_queue_name(struct ds *string, uint32_t queue_id)
+{
+    if (queue_id == OFPQ_ALL) {
+        ds_put_cstr(string, "ALL");
+    } else {
+        ds_put_format(string, "%"PRIu32, queue_id);
+    }
+}
+
+static void
+ofp_queue_stats_request(struct ds *string, const void *body_,
+                       size_t len OVS_UNUSED, int verbosity OVS_UNUSED)
+{
+    const struct ofp_queue_stats_request *qsr = body_;
+
+    ds_put_cstr(string, "port=");
+    ofp_print_port_name(string, ntohs(qsr->port_no));
+
+    ds_put_cstr(string, " queue=");
+    ofp_print_queue_name(string, ntohl(qsr->queue_id));
+}
+
+static void
+ofp_queue_stats_reply(struct ds *string, const void *body, size_t len,
+                     int verbosity)
+{
+    const struct ofp_queue_stats *qs = body;
+    size_t n = len / sizeof *qs;
+    ds_put_format(string, " %zu queues\n", n);
+    if (verbosity < 1) {
+        return;
+    }
+
+    for (; n--; qs++) {
+        ds_put_cstr(string, "  port ");
+        ofp_print_port_name(string, ntohs(qs->port_no));
+        ds_put_cstr(string, " queue ");
+        ofp_print_queue_name(string, ntohl(qs->queue_id));
+        ds_put_cstr(string, ": ");
+
+        print_port_stat(string, "bytes=", ntohll(qs->tx_bytes), 1);
+        print_port_stat(string, "pkts=", ntohll(qs->tx_packets), 1);
+        print_port_stat(string, "errors=", ntohll(qs->tx_errors), 0);
+    }
+}
+
 static void
 vendor_stat(struct ds *string, const void *body, size_t len,
             int verbosity OVS_UNUSED)
@@ -1234,6 +1282,14 @@ print_stats(struct ds *string, int type, const void *body, size_t body_len,
               ofp_port_stats_request },
             { 0, SIZE_MAX, ofp_port_stats_reply },
         },
+        {
+            OFPST_QUEUE,
+            "queue",
+            { sizeof(struct ofp_queue_stats_request),
+              sizeof(struct ofp_queue_stats_request),
+              ofp_queue_stats_request },
+            { 0, SIZE_MAX, ofp_queue_stats_reply },
+        },
         {
             OFPST_VENDOR,
             "vendor-specific",
diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in
index bbe747b21..c12b5f125 100644
--- a/utilities/ovs-ofctl.8.in
+++ b/utilities/ovs-ofctl.8.in
@@ -115,6 +115,15 @@ the statistics are aggregated across all flows in the switch's flow
 tables.  See \fBFlow Syntax\fR, below, for the syntax of \fIflows\fR.
 The output format is descrbed in \fBTable Entry Output\fR.
 .
+.IP "\fBqueue\-stats \fIswitch \fR[\fIport \fR[\fIqueue\fR]]"
+Prints to the console statistics for the specified \fIqueue\fR on
+\fIport\fR within \fIswitch\fR.  Either of \fIport\fR or \fIqueue\fR
+or both may be omitted (or equivalently specified as \fBALL\fR).  If
+both are omitted, statistics are printed for all queues on all ports.
+If only \fIqueue\fR is omitted, then statistics are printed for all
+queues on \fIport\fR; if only \fIport\fR is omitted, then statistics
+are printed for \fIqueue\fR on every port where it exists.
+.
 .TP
 \fBadd\-flow \fIswitch flow\fR
 Add the flow entry as described by \fIflow\fR to the \fIswitch\fR's 
diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c
index dc6d5e31b..55278fb67 100644
--- a/utilities/ovs-ofctl.c
+++ b/utilities/ovs-ofctl.c
@@ -150,6 +150,7 @@ usage(void)
            "  dump-flows SWITCH FLOW      print matching FLOWs\n"
            "  dump-aggregate SWITCH       print aggregate flow statistics\n"
            "  dump-aggregate SWITCH FLOW  print aggregate stats for FLOWs\n"
+           "  queue-stats SWITCH [PORT [QUEUE]]  dump queue stats\n"
            "  add-flow SWITCH FLOW        add flow described by FLOW\n"
            "  add-flows SWITCH FILE       add flows from FILE\n"
            "  mod-flows SWITCH FLOW       modify actions of matching FLOWs\n"
@@ -464,6 +465,30 @@ do_dump_aggregate(int argc, char *argv[])
     dump_stats_transaction(argv[1], request);
 }
 
+static void
+do_queue_stats(int argc, char *argv[])
+{
+    struct ofp_queue_stats_request *req;
+    struct ofpbuf *request;
+
+    req = alloc_stats_request(sizeof *req, OFPST_QUEUE, &request);
+
+    if (argc > 2 && argv[2][0] && strcasecmp(argv[2], "all")) {
+        req->port_no = htons(str_to_port_no(argv[1], argv[2]));
+    } else {
+        req->port_no = htons(OFPP_ALL);
+    }
+    if (argc > 3 && argv[3][0] && strcasecmp(argv[3], "all")) {
+        req->queue_id = htonl(atoi(argv[3]));
+    } else {
+        req->queue_id = htonl(OFPQ_ALL);
+    }
+
+    memset(req->pad, 0, sizeof req->pad);
+
+    dump_stats_transaction(argv[1], request);
+}
+
 static void
 do_add_flow(int argc OVS_UNUSED, char *argv[])
 {
@@ -884,6 +909,7 @@ static const struct command all_commands[] = {
     { "dump-tables", 1, 1, do_dump_tables },
     { "dump-flows", 1, 2, do_dump_flows },
     { "dump-aggregate", 1, 2, do_dump_aggregate },
+    { "queue-stats", 1, 3, do_queue_stats },
     { "add-flow", 2, 2, do_add_flow },
     { "add-flows", 2, 2, do_add_flows },
     { "mod-flows", 2, 2, do_mod_flows },
-- 
cgit v1.2.1


From d5590e7e4114253bc5b84b494dba18be24e6073f Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 16 Sep 2010 15:38:10 -0700
Subject: netdev-linux: Fix off-by-one error dumping queue stats.

Linux kernel queue numbers are one greater than OpenFlow queue numbers, for
HTB anyhow.  The code to dump queues wasn't compensating for this, so this
commit fixes it up.
---
 lib/netdev-linux.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index e1a3c8c46..2ea411dcd 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -2659,7 +2659,7 @@ htb_class_dump_stats(const struct netdev *netdev OVS_UNUSED,
     major = tc_get_major(handle);
     minor = tc_get_minor(handle);
     if (major == 1 && minor > 0 && minor <= HTB_N_QUEUES) {
-        (*cb)(tc_get_minor(handle), &stats, aux);
+        (*cb)(minor - 1, &stats, aux);
     }
     return 0;
 }
-- 
cgit v1.2.1


From 1ac788f67ff614662ce7d9af36d5eb7597f53a3f Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 16 Sep 2010 15:41:14 -0700
Subject: ofproto: Only dump queue statistics if the queue really exists.

Without this commit, "ovs-ofctl queue-stats br0 ALL 1" will print something
like the following if port 3 has queue 1 but none of the other ports do:

    stats_reply (xid=0x7b378): flags=none type=5(queue)
     4 queues
      port 0 queue 1: bytes=?, pkts=?, errors=?
      port 1 queue 1: bytes=?, pkts=?, errors=?
      port 2 queue 1: bytes=?, pkts=?, errors=?
      port 3 queue 1: bytes=0, pkts=0, errors=0

With this commit, it will print the following instead, which seems more
useful:

   stats_reply (xid=0x3ada1): flags=none type=5(queue)
    1 queues
     port 3 queue 1: bytes=0, pkts=0, errors=0
---
 ofproto/ofproto.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index c683e3b63..00cac6e20 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -3433,8 +3433,9 @@ handle_queue_stats_for_port(struct ofport *port, uint32_t queue_id,
     } else {
         struct netdev_queue_stats stats;
 
-        netdev_get_queue_stats(port->netdev, queue_id, &stats);
-        put_queue_stats(cbdata, queue_id, &stats);
+        if (!netdev_get_queue_stats(port->netdev, queue_id, &stats)) {
+            put_queue_stats(cbdata, queue_id, &stats);
+        }
     }
 }
 
-- 
cgit v1.2.1


From bb98f9b951af0f6438d921522a8f54f5ead0d1d0 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Fri, 1 Oct 2010 13:05:23 -0700
Subject: ofp-parse: Properly byteswap in_port.

---
 lib/ofp-parse.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c
index 312eaaaaf..32e790a86 100644
--- a/lib/ofp-parse.c
+++ b/lib/ofp-parse.c
@@ -465,10 +465,12 @@ parse_ofp_str(char *string, struct ofp_match *match, struct ofpbuf *actions,
                 if (!strcmp(value, "*") || !strcmp(value, "ANY")) {
                     wildcards |= f->wildcard;
                 } else {
+                    uint16_t port_no;
+
                     wildcards &= ~f->wildcard;
                     if (f->wildcard == OFPFW_IN_PORT
-                        && parse_port_name(value, (uint16_t *) data)) {
-                        /* Nothing to do. */
+                        && parse_port_name(value, &port_no)) {
+                        match->in_port = htons(port_no);
                     } else if (f->type == F_U8) {
                         *(uint8_t *) data = str_to_u32(value);
                     } else if (f->type == F_U16) {
-- 
cgit v1.2.1


From 15f1f1b63ab5514f8dfafa8f4c90ecee44693e1d Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 23 Sep 2010 13:19:49 -0700
Subject: ofp-parse: Factor out duplicated code into new functions.

---
 lib/learning-switch.c | 40 ++-----------------------------
 lib/ofp-parse.c       | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 lib/ofp-parse.h       |  3 +++
 utilities/ovs-ofctl.c | 41 +++-----------------------------
 4 files changed, 72 insertions(+), 77 deletions(-)

diff --git a/lib/learning-switch.c b/lib/learning-switch.c
index 4e7645d7c..b20506b16 100644
--- a/lib/learning-switch.c
+++ b/lib/learning-switch.c
@@ -252,45 +252,9 @@ static void
 send_default_flows(struct lswitch *sw, struct rconn *rconn,
                    FILE *default_flows)
 {
-    char line[1024];
+    struct ofpbuf *b;
 
-    while (fgets(line, sizeof line, default_flows)) {
-        struct ofpbuf *b;
-        struct ofp_flow_mod *ofm;
-        uint16_t priority, idle_timeout, hard_timeout;
-        uint64_t cookie;
-        struct ofp_match match;
-
-        char *comment;
-
-        /* Delete comments. */
-        comment = strchr(line, '#');
-        if (comment) {
-            *comment = '\0';
-        }
-
-        /* Drop empty lines. */
-        if (line[strspn(line, " \t\n")] == '\0') {
-            continue;
-        }
-
-        /* Parse and send.  str_to_flow() will expand and reallocate the data
-         * in 'buffer', so we can't keep pointers to across the str_to_flow()
-         * call. */
-        make_openflow(sizeof *ofm, OFPT_FLOW_MOD, &b);
-        parse_ofp_str(line, &match, b,
-                      NULL, NULL, &priority, &idle_timeout, &hard_timeout,
-                      &cookie);
-        ofm = b->data;
-        ofm->match = match;
-        ofm->command = htons(OFPFC_ADD);
-        ofm->cookie = htonll(cookie);
-        ofm->idle_timeout = htons(idle_timeout);
-        ofm->hard_timeout = htons(hard_timeout);
-        ofm->buffer_id = htonl(UINT32_MAX);
-        ofm->priority = htons(priority);
-
-        update_openflow_length(b);
+    while ((b = parse_ofp_add_flow_file(default_flows)) != NULL) {
         queue_tx(sw, rconn, b);
     }
 }
diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c
index 32e790a86..069687b15 100644
--- a/lib/ofp-parse.c
+++ b/lib/ofp-parse.c
@@ -21,6 +21,7 @@
 #include <errno.h>
 #include <stdlib.h>
 
+#include "dynamic-string.h"
 #include "netdev.h"
 #include "ofp-util.h"
 #include "ofpbuf.h"
@@ -29,7 +30,7 @@
 #include "socket-util.h"
 #include "vconn.h"
 #include "vlog.h"
-
+#include "xtoxll.h"
 
 VLOG_DEFINE_THIS_MODULE(ofp_parse)
 
@@ -502,3 +503,65 @@ parse_ofp_str(char *string, struct ofp_match *match, struct ofpbuf *actions,
         free(new);
     }
 }
+
+/* Parses 'string' as a OFPT_FLOW_MOD with subtype OFPFC_ADD and returns an
+ * ofpbuf that contains it. */
+struct ofpbuf *
+parse_ofp_add_flow_str(char *string)
+{
+    struct ofpbuf *buffer;
+    struct ofp_flow_mod *ofm;
+    uint16_t priority, idle_timeout, hard_timeout;
+    uint64_t cookie;
+    struct ofp_match match;
+
+    /* parse_ofp_str() will expand and reallocate the data in 'buffer', so we
+     * can't keep pointers to across the parse_ofp_str() call. */
+    make_openflow(sizeof *ofm, OFPT_FLOW_MOD, &buffer);
+    parse_ofp_str(string, &match, buffer,
+                  NULL, NULL, &priority, &idle_timeout, &hard_timeout,
+                  &cookie);
+    ofm = buffer->data;
+    ofm->match = match;
+    ofm->command = htons(OFPFC_ADD);
+    ofm->cookie = htonll(cookie);
+    ofm->idle_timeout = htons(idle_timeout);
+    ofm->hard_timeout = htons(hard_timeout);
+    ofm->buffer_id = htonl(UINT32_MAX);
+    ofm->priority = htons(priority);
+    update_openflow_length(buffer);
+
+    return buffer;
+}
+
+/* Parses an OFPT_FLOW_MOD with subtype OFPFC_ADD from 'stream' and returns an
+ * ofpbuf that contains it.  Returns a null pointer if end-of-file is reached
+ * before reading a flow. */
+struct ofpbuf *
+parse_ofp_add_flow_file(FILE *stream)
+{
+    struct ofpbuf *b = NULL;
+    struct ds s = DS_EMPTY_INITIALIZER;
+
+    while (!ds_get_line(&s, stream)) {
+        char *line = ds_cstr(&s);
+        char *comment;
+
+        /* Delete comments. */
+        comment = strchr(line, '#');
+        if (comment) {
+            *comment = '\0';
+        }
+
+        /* Drop empty lines. */
+        if (line[strspn(line, " \t\n")] == '\0') {
+            continue;
+        }
+
+        b = parse_ofp_add_flow_str(line);
+        break;
+    }
+    ds_destroy(&s);
+
+    return b;
+}
diff --git a/lib/ofp-parse.h b/lib/ofp-parse.h
index aa0489c0c..ac8e6d2f1 100644
--- a/lib/ofp-parse.h
+++ b/lib/ofp-parse.h
@@ -20,6 +20,7 @@
 #define OFP_PARSE_H 1
 
 #include <stdint.h>
+#include <stdio.h>
 
 struct ofp_match;
 struct ofpbuf;
@@ -29,5 +30,7 @@ void parse_ofp_str(char *string, struct ofp_match *match,
                    uint16_t *out_port, uint16_t *priority,
                    uint16_t *idle_timeout, uint16_t *hard_timeout,
                    uint64_t *cookie);
+struct ofpbuf *parse_ofp_add_flow_str(char *string);
+struct ofpbuf *parse_ofp_add_flow_file(FILE *);
 
 #endif /* ofp-parse.h */
diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c
index 55278fb67..c21c4f982 100644
--- a/utilities/ovs-ofctl.c
+++ b/utilities/ovs-ofctl.c
@@ -524,8 +524,8 @@ static void
 do_add_flows(int argc OVS_UNUSED, char *argv[])
 {
     struct vconn *vconn;
+    struct ofpbuf *b;
     FILE *file;
-    char line[1024];
 
     file = fopen(argv[2], "r");
     if (file == NULL) {
@@ -533,43 +533,8 @@ do_add_flows(int argc OVS_UNUSED, char *argv[])
     }
 
     open_vconn(argv[1], &vconn);
-    while (fgets(line, sizeof line, file)) {
-        struct ofpbuf *buffer;
-        struct ofp_flow_mod *ofm;
-        uint16_t priority, idle_timeout, hard_timeout;
-        uint64_t cookie;
-        struct ofp_match match;
-
-        char *comment;
-
-        /* Delete comments. */
-        comment = strchr(line, '#');
-        if (comment) {
-            *comment = '\0';
-        }
-
-        /* Drop empty lines. */
-        if (line[strspn(line, " \t\n")] == '\0') {
-            continue;
-        }
-
-        /* Parse and send.  parse_ofp_str() will expand and reallocate
-         * the data in 'buffer', so we can't keep pointers to across the
-         * parse_ofp_str() call. */
-        make_openflow(sizeof *ofm, OFPT_FLOW_MOD, &buffer);
-        parse_ofp_str(line, &match, buffer,
-                      NULL, NULL, &priority, &idle_timeout, &hard_timeout,
-                      &cookie);
-        ofm = buffer->data;
-        ofm->match = match;
-        ofm->command = htons(OFPFC_ADD);
-        ofm->cookie = htonll(cookie);
-        ofm->idle_timeout = htons(idle_timeout);
-        ofm->hard_timeout = htons(hard_timeout);
-        ofm->buffer_id = htonl(UINT32_MAX);
-        ofm->priority = htons(priority);
-
-        send_openflow_buffer(vconn, buffer);
+    while ((b = parse_ofp_add_flow_file(file)) != NULL) {
+        send_openflow_buffer(vconn, b);
     }
     vconn_close(vconn);
     fclose(file);
-- 
cgit v1.2.1


From 3ff4f87161e4e82eec2393db067cf57c5721ea0b Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Fri, 1 Oct 2010 13:05:59 -0700
Subject: ofp-print: Make output easier to read.

---
 lib/ofp-print.c | 53 ++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 38 insertions(+), 15 deletions(-)

diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index 5cbfe6c04..569a70aba 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -752,31 +752,50 @@ ofp_print_flow_mod(struct ds *string, const void *oh, size_t len,
 {
     const struct ofp_flow_mod *ofm = oh;
 
+    ds_put_char(string, ' ');
     ofp_print_match(string, &ofm->match, verbosity);
+    if (ds_last(string) != ' ') {
+        ds_put_char(string, ' ');
+    }
+
     switch (ntohs(ofm->command)) {
     case OFPFC_ADD:
-        ds_put_cstr(string, " ADD: ");
+        ds_put_cstr(string, "ADD:");
         break;
     case OFPFC_MODIFY:
-        ds_put_cstr(string, " MOD: ");
+        ds_put_cstr(string, "MOD:");
         break;
     case OFPFC_MODIFY_STRICT:
-        ds_put_cstr(string, " MOD_STRICT: ");
+        ds_put_cstr(string, "MOD_STRICT:");
         break;
     case OFPFC_DELETE:
-        ds_put_cstr(string, " DEL: ");
+        ds_put_cstr(string, "DEL:");
         break;
     case OFPFC_DELETE_STRICT:
-        ds_put_cstr(string, " DEL_STRICT: ");
+        ds_put_cstr(string, "DEL_STRICT:");
         break;
     default:
-        ds_put_format(string, " cmd:%d ", ntohs(ofm->command));
+        ds_put_format(string, "cmd:%d", ntohs(ofm->command));
+    }
+    if (ofm->cookie != htonll(0)) {
+        ds_put_format(string, " cookie:0x%"PRIx64, ntohll(ofm->cookie));
+    }
+    if (ofm->idle_timeout != htons(OFP_FLOW_PERMANENT)) {
+        ds_put_format(string, " idle:%d", ntohs(ofm->idle_timeout));
+    }
+    if (ofm->hard_timeout != htons(OFP_FLOW_PERMANENT)) {
+        ds_put_format(string, " hard:%d", ntohs(ofm->hard_timeout));
     }
-    ds_put_format(string, "cookie:0x%"PRIx64" idle:%d hard:%d pri:%d "
-            "buf:%#x flags:%"PRIx16" ", ntohll(ofm->cookie),
-            ntohs(ofm->idle_timeout), ntohs(ofm->hard_timeout),
-            ofm->match.wildcards ? ntohs(ofm->priority) : (uint16_t)-1,
-            ntohl(ofm->buffer_id), ntohs(ofm->flags));
+    if (ofm->priority != htons(32768)) {
+        ds_put_format(string, " pri:%"PRIu16, ntohs(ofm->priority));
+    }
+    if (ofm->buffer_id != htonl(UINT32_MAX)) {
+        ds_put_format(string, " buf:%#"PRIx32, ntohl(ofm->buffer_id));
+    }
+    if (ofm->flags != htons(0)) {
+        ds_put_format(string, " flags:%"PRIx16, ntohs(ofm->flags));
+    }
+    ds_put_cstr(string, " ");
     ofp_print_actions(string, ofm->actions,
                       len - offsetof(struct ofp_flow_mod, actions));
     ds_put_char(string, '\n');
@@ -806,11 +825,15 @@ ofp_print_flow_removed(struct ds *string, const void *oh,
         ds_put_format(string, "**%"PRIu8"**", ofr->reason);
         break;
     }
-    ds_put_format(string,
-         " cookie0x%"PRIx64" pri%"PRIu16" secs%"PRIu32" nsecs%"PRIu32
+
+    if (ofr->cookie != htonll(0)) {
+        ds_put_format(string, " cookie:0x%"PRIx64, ntohll(ofr->cookie));
+    }
+    if (ofr->priority != htons(32768)) {
+        ds_put_format(string, " pri:%"PRIu16, ntohs(ofr->priority));
+    }
+    ds_put_format(string, " secs%"PRIu32" nsecs%"PRIu32
          " idle%"PRIu16" pkts%"PRIu64" bytes%"PRIu64"\n",
-         ntohll(ofr->cookie),
-         ofr->match.wildcards ? ntohs(ofr->priority) : (uint16_t)-1,
          ntohl(ofr->duration_sec), ntohl(ofr->duration_nsec),
          ntohs(ofr->idle_timeout), ntohll(ofr->packet_count),
          ntohll(ofr->byte_count));
-- 
cgit v1.2.1


From 0e581146fe81fce12c9e72cb70e06eba940720b5 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Fri, 1 Oct 2010 13:08:14 -0700
Subject: ofp-parse: Add test.

---
 tests/automake.mk     |  1 +
 tests/ovs-ofctl.at    | 22 ++++++++++++++++++++++
 tests/testsuite.at    |  1 +
 utilities/ovs-ofctl.c | 21 +++++++++++++++++++++
 4 files changed, 45 insertions(+)
 create mode 100644 tests/ovs-ofctl.at

diff --git a/tests/automake.mk b/tests/automake.mk
index e647bbb99..1fac45782 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -12,6 +12,7 @@ TESTSUITE_AT = \
 	tests/check-structs.at \
 	tests/daemon.at \
 	tests/daemon-py.at \
+	tests/ovs-ofctl.at \
 	tests/vconn.at \
 	tests/dir_name.at \
 	tests/aes128.at \
diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at
new file mode 100644
index 000000000..2a0ce2cf9
--- /dev/null
+++ b/tests/ovs-ofctl.at
@@ -0,0 +1,22 @@
+AT_BANNER([ovs-ofctl])
+
+AT_SETUP([ovs-ofctl parse-flows])
+AT_DATA([flows.txt], [
+# comment
+tcp,tp_src=123,actions=flood
+in_port=LOCAL dl_vlan=9 dl_src=00:0A:E4:25:6B:B0 actions=drop
+arp,nw_src=192.168.0.1,actions=drop_spoofed_arp,NORMAL
+udp dl_vlan_pcp=7 idle_timeout=5 actions=strip_vlan output:0
+cookie=0x123456789abcdef hard_timeout=10 priority=60000 actions=controller
+actions=drop
+])
+AT_CHECK([ovs-ofctl parse-flows flows.txt], [0], [stdout])
+AT_CHECK([[sed 's/ (xid=0x[0-9a-fA-F]*)//' stdout]], [0], [dnl
+flow_mod: tcp,tp_src=123, ADD: actions=FLOOD
+flow_mod: in_port=65534,dl_vlan=9,dl_src=00:0a:e4:25:6b:b0, ADD: actions=drop
+flow_mod: arp,nw_src=192.168.0.1, ADD: actions=drop_spoofed_arp,NORMAL
+flow_mod: udp,dl_vlan_pcp=7, ADD: idle:5 actions=strip_vlan,output:0
+flow_mod: ADD: cookie:0x123456789abcdef hard:10 pri:60000 actions=CONTROLLER:65535
+flow_mod: ADD: actions=drop
+])
+AT_CLEANUP
diff --git a/tests/testsuite.at b/tests/testsuite.at
index 42e62dfbe..d66563c8f 100644
--- a/tests/testsuite.at
+++ b/tests/testsuite.at
@@ -42,6 +42,7 @@ m4_include([tests/classifier.at])
 m4_include([tests/check-structs.at])
 m4_include([tests/daemon.at])
 m4_include([tests/daemon-py.at])
+m4_include([tests/ovs-ofctl.at])
 m4_include([tests/vconn.at])
 m4_include([tests/dir_name.at])
 m4_include([tests/aes128.at])
diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c
index c21c4f982..13f583e7e 100644
--- a/utilities/ovs-ofctl.c
+++ b/utilities/ovs-ofctl.c
@@ -859,6 +859,26 @@ do_benchmark(int argc OVS_UNUSED, char *argv[])
            count * message_size / (duration / 1000.0));
 }
 
+/* This command is really only useful for testing the flow parser (ofp_parse),
+ * so it is undocumented. */
+static void
+do_parse_flows(int argc OVS_UNUSED, char *argv[])
+{
+    struct ofpbuf *b;
+    FILE *file;
+
+    file = fopen(argv[1], "r");
+    if (file == NULL) {
+        ovs_fatal(errno, "%s: open", argv[2]);
+    }
+
+    while ((b = parse_ofp_add_flow_file(file)) != NULL) {
+        ofp_print(stdout, b->data, b->size, 0);
+        ofpbuf_delete(b);
+    }
+    fclose(file);
+}
+
 static void
 do_help(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
 {
@@ -885,6 +905,7 @@ static const struct command all_commands[] = {
     { "probe", 1, 1, do_probe },
     { "ping", 1, 2, do_ping },
     { "benchmark", 3, 3, do_benchmark },
+    { "parse-flows", 1, 1, do_parse_flows },
     { "help", 0, INT_MAX, do_help },
     { NULL, 0, 0, NULL },
 };
-- 
cgit v1.2.1


From 09913dfd259ae81dee6c944db1b92e9722f17667 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 23 Sep 2010 14:08:13 -0700
Subject: ovs-controller: Make --with-flows read the file only once, at
 startup.

A couple of people have reported that ovs-controller --with-flows is
confusing.  This seems to be because it doesn't read the file with the
flows until the first connection from a switch.  Then, if the file has a
syntax error, it exits.

This commit changes the behavior so that it reads the file immediately at
startup instead.
---
 lib/learning-switch.c         | 30 +++++++++---------------------
 lib/learning-switch.h         |  3 ++-
 lib/queue.h                   |  5 ++++-
 utilities/ovs-controller.8.in |  2 ++
 utilities/ovs-controller.c    | 35 ++++++++++++++++++++++-------------
 5 files changed, 39 insertions(+), 36 deletions(-)

diff --git a/lib/learning-switch.c b/lib/learning-switch.c
index b20506b16..36594ac63 100644
--- a/lib/learning-switch.c
+++ b/lib/learning-switch.c
@@ -63,8 +63,6 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
 
 static void queue_tx(struct lswitch *, struct rconn *, struct ofpbuf *);
 static void send_features_request(struct lswitch *, struct rconn *);
-static void send_default_flows(struct lswitch *sw, struct rconn *rconn,
-                               FILE *default_flows);
 
 typedef void packet_handler_func(struct lswitch *, struct rconn *, void *);
 static packet_handler_func process_switch_features;
@@ -80,18 +78,17 @@ static packet_handler_func process_echo_request;
  * after the given number of seconds (or never expire, if 'max_idle' is
  * OFP_FLOW_PERMANENT).  Otherwise, the new switch will process every packet.
  *
- * The caller may provide the file stream 'default_flows' that defines
- * default flows that should be pushed when a switch connects.  Each
- * line is a flow entry in the format described for "add-flows" command
- * in the Flow Syntax section of the ovs-ofct(8) man page.  The caller
- * is responsible for closing the stream.
+ * The caller may provide an ofpbuf 'default_flows' that consists of a chain of
+ * one or more OpenFlow messages to send to the switch at time of connection.
+ * Presumably these will be OFPT_FLOW_MOD requests to set up the flow table.
  *
  * 'rconn' is used to send out an OpenFlow features request. */
 struct lswitch *
 lswitch_create(struct rconn *rconn, bool learn_macs,
                bool exact_flows, int max_idle, bool action_normal,
-               FILE *default_flows)
+               const struct ofpbuf *default_flows)
 {
+    const struct ofpbuf *b;
     struct lswitch *sw;
 
     sw = xzalloc(sizeof *sw);
@@ -113,9 +110,11 @@ lswitch_create(struct rconn *rconn, bool learn_macs,
     sw->queue = UINT32_MAX;
     sw->queued = rconn_packet_counter_create();
     send_features_request(sw, rconn);
-    if (default_flows) {
-        send_default_flows(sw, rconn, default_flows);
+
+    for (b = default_flows; b; b = b->next) {
+        queue_tx(sw, rconn, ofpbuf_clone(b));
     }
+
     return sw;
 }
 
@@ -248,17 +247,6 @@ send_features_request(struct lswitch *sw, struct rconn *rconn)
     }
 }
 
-static void
-send_default_flows(struct lswitch *sw, struct rconn *rconn,
-                   FILE *default_flows)
-{
-    struct ofpbuf *b;
-
-    while ((b = parse_ofp_add_flow_file(default_flows)) != NULL) {
-        queue_tx(sw, rconn, b);
-    }
-}
-
 static void
 queue_tx(struct lswitch *sw, struct rconn *rconn, struct ofpbuf *b)
 {
diff --git a/lib/learning-switch.h b/lib/learning-switch.h
index 96707b842..edb31549a 100644
--- a/lib/learning-switch.h
+++ b/lib/learning-switch.h
@@ -26,7 +26,8 @@ struct rconn;
 
 struct lswitch *lswitch_create(struct rconn *, bool learn_macs,
                                bool exact_flows, int max_idle,
-                               bool action_normal, FILE *default_flows);
+                               bool action_normal,
+                               const struct ofpbuf *default_flows);
 void lswitch_set_queue(struct lswitch *sw, uint32_t queue);
 void lswitch_run(struct lswitch *);
 void lswitch_wait(struct lswitch *);
diff --git a/lib/queue.h b/lib/queue.h
index 879f7a2d1..e30b84c54 100644
--- a/lib/queue.h
+++ b/lib/queue.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2009 Nicira Networks.
+ * Copyright (c) 2008, 2009, 2010 Nicira Networks.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #define QUEUE_H 1
 
 #include <stdbool.h>
+#include <stddef.h>
 
 /* Packet queue. */
 struct ovs_queue {
@@ -26,6 +27,8 @@ struct ovs_queue {
     struct ofpbuf *tail;        /* Last queued packet, null if n == 0. */
 };
 
+#define OVS_QUEUE_INITIALIZER { 0, NULL, NULL }
+
 void queue_init(struct ovs_queue *);
 void queue_destroy(struct ovs_queue *);
 void queue_clear(struct ovs_queue *);
diff --git a/utilities/ovs-controller.8.in b/utilities/ovs-controller.8.in
index c5954dd00..24f3a5cd7 100644
--- a/utilities/ovs-controller.8.in
+++ b/utilities/ovs-controller.8.in
@@ -105,6 +105,8 @@ When a switch connects, push the flow entries as described in
 \fIfile\fR.  Each line in \fIfile\fR is a flow entry in the format
 described for the \fBadd\-flows\fR command in the \fBFlow Syntax\fR
 section of the \fBovs\-ofctl\fR(8) man page.
+.IP
+Use this option more than once to add flows from multiple files.
 .
 .SS "Public Key Infrastructure Options"
 .so lib/ssl.man
diff --git a/utilities/ovs-controller.c b/utilities/ovs-controller.c
index 40e2a801d..b1b4f0a83 100644
--- a/utilities/ovs-controller.c
+++ b/utilities/ovs-controller.c
@@ -28,6 +28,7 @@
 #include "compiler.h"
 #include "daemon.h"
 #include "learning-switch.h"
+#include "ofp-parse.h"
 #include "ofpbuf.h"
 #include "openflow/openflow.h"
 #include "poll-loop.h"
@@ -73,7 +74,7 @@ static uint32_t queue_id = UINT32_MAX;
 
 /* --with-flows: File with flows to send to switch, or null to not load
  * any default flows. */
-static FILE *flow_file = NULL;
+static struct ovs_queue default_flows = OVS_QUEUE_INITIALIZER;
 
 /* --unixctl: Name of unixctl socket, or null to use the default. */
 static char *unixctl_path = NULL;
@@ -216,16 +217,9 @@ new_switch(struct switch_ *sw, struct vconn *vconn)
 {
     sw->rconn = rconn_create(60, 0);
     rconn_connect_unreliably(sw->rconn, vconn, NULL);
-
-    /* If it was set, rewind 'flow_file' to the beginning, since a
-     * previous call to lswitch_create() will leave the stream at the
-     * end. */
-    if (flow_file) {
-        rewind(flow_file);
-    }
     sw->lswitch = lswitch_create(sw->rconn, learn_macs, exact_flows,
                                  set_up_flows ? max_idle : -1,
-                                 action_normal, flow_file);
+                                 action_normal, default_flows.head);
 
     lswitch_set_queue(sw->lswitch, queue_id);
 }
@@ -252,6 +246,24 @@ do_switching(struct switch_ *sw)
             : EAGAIN);
 }
 
+static void
+read_flow_file(const char *name)
+{
+    struct ofpbuf *b;
+    FILE *stream;
+
+    stream = fopen(optarg, "r");
+    if (!stream) {
+        ovs_fatal(errno, "%s: open", name);
+    }
+
+    while ((b = parse_ofp_add_flow_file(stream)) != NULL) {
+        queue_push_tail(&default_flows, b);
+    }
+
+    fclose(stream);
+}
+
 static void
 parse_options(int argc, char *argv[])
 {
@@ -332,10 +344,7 @@ parse_options(int argc, char *argv[])
             break;
 
         case OPT_WITH_FLOWS:
-            flow_file = fopen(optarg, "r");
-            if (flow_file == NULL) {
-                ovs_fatal(errno, "%s: open", optarg);
-            }
+            read_flow_file(optarg);
             break;
 
         case OPT_UNIXCTL:
-- 
cgit v1.2.1


From ad67e568887f56d3b70a8225c3a476d94f70c20b Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 23 Sep 2010 14:12:09 -0700
Subject: learning-switch: Introduce struct for configuration.

This should make extensions easier.
---
 lib/learning-switch.c      | 39 +++++++++------------------------------
 lib/learning-switch.h      | 32 ++++++++++++++++++++++++++++----
 utilities/ovs-controller.c | 13 +++++++++----
 3 files changed, 46 insertions(+), 38 deletions(-)

diff --git a/lib/learning-switch.c b/lib/learning-switch.c
index 36594ac63..90749c018 100644
--- a/lib/learning-switch.c
+++ b/lib/learning-switch.c
@@ -69,35 +69,23 @@ static packet_handler_func process_switch_features;
 static packet_handler_func process_packet_in;
 static packet_handler_func process_echo_request;
 
-/* Creates and returns a new learning switch.
- *
- * If 'learn_macs' is true, the new switch will learn the ports on which MAC
- * addresses appear.  Otherwise, the new switch will flood all packets.
- *
- * If 'max_idle' is nonnegative, the new switch will set up flows that expire
- * after the given number of seconds (or never expire, if 'max_idle' is
- * OFP_FLOW_PERMANENT).  Otherwise, the new switch will process every packet.
- *
- * The caller may provide an ofpbuf 'default_flows' that consists of a chain of
- * one or more OpenFlow messages to send to the switch at time of connection.
- * Presumably these will be OFPT_FLOW_MOD requests to set up the flow table.
+/* Creates and returns a new learning switch whose configuration is given by
+ * 'cfg'.
  *
  * 'rconn' is used to send out an OpenFlow features request. */
 struct lswitch *
-lswitch_create(struct rconn *rconn, bool learn_macs,
-               bool exact_flows, int max_idle, bool action_normal,
-               const struct ofpbuf *default_flows)
+lswitch_create(struct rconn *rconn, const struct lswitch_config *cfg)
 {
     const struct ofpbuf *b;
     struct lswitch *sw;
 
     sw = xzalloc(sizeof *sw);
-    sw->max_idle = max_idle;
+    sw->max_idle = cfg->max_idle;
     sw->datapath_id = 0;
     sw->last_features_request = time_now() - 1;
-    sw->ml = learn_macs ? mac_learning_create() : NULL;
-    sw->action_normal = action_normal;
-    if (exact_flows) {
+    sw->ml = cfg->mode == LSW_LEARN ? mac_learning_create() : NULL;
+    sw->action_normal = cfg->mode == LSW_NORMAL;
+    if (cfg->exact_flows) {
         /* Exact match. */
         sw->wildcards = 0;
     } else {
@@ -107,11 +95,11 @@ lswitch_create(struct rconn *rconn, bool learn_macs,
         sw->wildcards = (OFPFW_DL_TYPE | OFPFW_NW_SRC_MASK | OFPFW_NW_DST_MASK
                          | OFPFW_NW_PROTO | OFPFW_TP_SRC | OFPFW_TP_DST);
     }
-    sw->queue = UINT32_MAX;
+    sw->queue = cfg->queue_id;
     sw->queued = rconn_packet_counter_create();
     send_features_request(sw, rconn);
 
-    for (b = default_flows; b; b = b->next) {
+    for (b = cfg->default_flows; b; b = b->next) {
         queue_tx(sw, rconn, ofpbuf_clone(b));
     }
 
@@ -129,15 +117,6 @@ lswitch_destroy(struct lswitch *sw)
     }
 }
 
-/* Sets 'queue' as the OpenFlow queue used by packets and flows set up by 'sw'.
- * Specify UINT32_MAX to avoid specifying a particular queue, which is also the
- * default if this function is never called for 'sw'.  */
-void
-lswitch_set_queue(struct lswitch *sw, uint32_t queue)
-{
-    sw->queue = queue;
-}
-
 /* Takes care of necessary 'sw' activity, except for receiving packets (which
  * the caller must do). */
 void
diff --git a/lib/learning-switch.h b/lib/learning-switch.h
index edb31549a..2ce49e612 100644
--- a/lib/learning-switch.h
+++ b/lib/learning-switch.h
@@ -24,10 +24,34 @@
 struct ofpbuf;
 struct rconn;
 
-struct lswitch *lswitch_create(struct rconn *, bool learn_macs,
-                               bool exact_flows, int max_idle,
-                               bool action_normal,
-                               const struct ofpbuf *default_flows);
+enum lswitch_mode {
+    LSW_NORMAL,                 /* Always use OFPP_NORMAL. */
+    LSW_FLOOD,                  /* Always use OFPP_FLOOD. */
+    LSW_LEARN                   /* Learn MACs at controller. */
+};
+
+struct lswitch_config {
+    enum lswitch_mode mode;
+
+    /* Set up only exact-match flows? */
+    bool exact_flows;
+
+    /* <0: Process every packet at the controller.
+     * >=0: Expire flows after they are unused for 'max_idle' seconds.
+     * OFP_FLOW_PERMANENT: Set up permanent flows. */
+    int max_idle;
+
+    /* Optionally, a chain of one or more OpenFlow messages to send to the
+     * switch at time of connection.  Presumably these will be OFPT_FLOW_MOD
+     * requests to set up the flow table. */
+    const struct ofpbuf *default_flows;
+
+    /* The OpenFlow queue used by packets and flows set up by 'sw'.  Use
+     * UINT32_MAX to avoid specifying a particular queue. */
+    uint32_t queue_id;
+};
+
+struct lswitch *lswitch_create(struct rconn *, const struct lswitch_config *);
 void lswitch_set_queue(struct lswitch *sw, uint32_t queue);
 void lswitch_run(struct lswitch *);
 void lswitch_wait(struct lswitch *);
diff --git a/utilities/ovs-controller.c b/utilities/ovs-controller.c
index b1b4f0a83..9892abe4d 100644
--- a/utilities/ovs-controller.c
+++ b/utilities/ovs-controller.c
@@ -215,13 +215,18 @@ main(int argc, char *argv[])
 static void
 new_switch(struct switch_ *sw, struct vconn *vconn)
 {
+    struct lswitch_config cfg;
+
     sw->rconn = rconn_create(60, 0);
     rconn_connect_unreliably(sw->rconn, vconn, NULL);
-    sw->lswitch = lswitch_create(sw->rconn, learn_macs, exact_flows,
-                                 set_up_flows ? max_idle : -1,
-                                 action_normal, default_flows.head);
 
-    lswitch_set_queue(sw->lswitch, queue_id);
+    cfg.mode = (action_normal ? LSW_NORMAL
+                : learn_macs ? LSW_LEARN
+                : LSW_FLOOD);
+    cfg.max_idle = set_up_flows ? max_idle : -1;
+    cfg.default_flows = default_flows.head;
+    cfg.queue_id = queue_id;
+    sw->lswitch = lswitch_create(sw->rconn, &cfg);
 }
 
 static int
-- 
cgit v1.2.1


From d4cdc6b4c45e5ca6a44bccf90f856b76ef936fac Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Fri, 1 Oct 2010 13:41:40 -0700
Subject: ovs-controller: Improve QoS abilities.

This makes it a little easier to test Open vSwitch QoS features using
ovs-controller, by making it possible to assign queues on the basis of
input port, instead of just allowing a single queue for a whole switch.

CC: Michael Mao <mmao@nicira.com>
---
 lib/learning-switch.c         | 78 ++++++++++++++++++++++++++++++++++++++++---
 lib/learning-switch.h         |  9 +++--
 utilities/ovs-controller.8.in | 24 ++++++++++++-
 utilities/ovs-controller.c    | 63 +++++++++++++++++++++++++++++-----
 4 files changed, 158 insertions(+), 16 deletions(-)

diff --git a/lib/learning-switch.c b/lib/learning-switch.c
index 90749c018..1e5d25bc2 100644
--- a/lib/learning-switch.c
+++ b/lib/learning-switch.c
@@ -24,6 +24,7 @@
 #include <time.h>
 
 #include "flow.h"
+#include "hmap.h"
 #include "mac-learning.h"
 #include "ofpbuf.h"
 #include "ofp-parse.h"
@@ -33,6 +34,7 @@
 #include "poll-loop.h"
 #include "queue.h"
 #include "rconn.h"
+#include "shash.h"
 #include "timeval.h"
 #include "vconn.h"
 #include "vlog.h"
@@ -40,6 +42,12 @@
 
 VLOG_DEFINE_THIS_MODULE(learning_switch)
 
+struct lswitch_port {
+    struct hmap_node hmap_node; /* Hash node for port number. */
+    uint16_t port_no;           /* OpenFlow port number, in host byte order. */
+    uint32_t queue_id;          /* OpenFlow queue number. */
+};
+
 struct lswitch {
     /* If nonnegative, the switch sets up flows that expire after the given
      * number of seconds (or never expire, if the value is OFP_FLOW_PERMANENT).
@@ -51,7 +59,11 @@ struct lswitch {
     struct mac_learning *ml;    /* NULL to act as hub instead of switch. */
     uint32_t wildcards;         /* Wildcards to apply to flows. */
     bool action_normal;         /* Use OFPP_NORMAL? */
-    uint32_t queue;             /* OpenFlow queue to use, or UINT32_MAX. */
+
+    /* Queue distribution. */
+    uint32_t default_queue;     /* Default OpenFlow queue, or UINT32_MAX. */
+    struct hmap queue_numbers;  /* Map from port number to lswitch_port. */
+    struct shash queue_names;   /* Map from port name to lswitch_port. */
 
     /* Number of outgoing queued packets on the rconn. */
     struct rconn_packet_counter *queued;
@@ -95,7 +107,21 @@ lswitch_create(struct rconn *rconn, const struct lswitch_config *cfg)
         sw->wildcards = (OFPFW_DL_TYPE | OFPFW_NW_SRC_MASK | OFPFW_NW_DST_MASK
                          | OFPFW_NW_PROTO | OFPFW_TP_SRC | OFPFW_TP_DST);
     }
-    sw->queue = cfg->queue_id;
+
+    sw->default_queue = cfg->default_queue;
+    hmap_init(&sw->queue_numbers);
+    shash_init(&sw->queue_names);
+    if (cfg->port_queues) {
+        struct shash_node *node;
+
+        SHASH_FOR_EACH (node, cfg->port_queues) {
+            struct lswitch_port *port = xmalloc(sizeof *port);
+            hmap_node_nullify(&port->hmap_node);
+            port->queue_id = (uintptr_t) node->data;
+            shash_add(&sw->queue_names, node->name, port);
+        }
+    }
+
     sw->queued = rconn_packet_counter_create();
     send_features_request(sw, rconn);
 
@@ -111,6 +137,13 @@ void
 lswitch_destroy(struct lswitch *sw)
 {
     if (sw) {
+        struct lswitch_port *node, *next;
+
+        HMAP_FOR_EACH_SAFE (node, next, hmap_node, &sw->queue_numbers) {
+            hmap_remove(&sw->queue_numbers, &node->hmap_node);
+            free(node);
+        }
+        shash_destroy(&sw->queue_names);
         mac_learning_destroy(sw->ml);
         rconn_packet_counter_destroy(sw->queued);
         free(sw);
@@ -247,8 +280,28 @@ process_switch_features(struct lswitch *sw, struct rconn *rconn OVS_UNUSED,
                         void *osf_)
 {
     struct ofp_switch_features *osf = osf_;
+    size_t n_ports;
+    size_t i;
+
+    if (check_ofp_message_array(&osf->header, OFPT_FEATURES_REPLY,
+                                sizeof *osf, sizeof *osf->ports, &n_ports)) {
+        return;
+    }
 
     sw->datapath_id = ntohll(osf->datapath_id);
+
+    for (i = 0; i < n_ports; i++) {
+        struct ofp_phy_port *opp = &osf->ports[i];
+        struct lswitch_port *lp;
+
+        opp->name[OFP_MAX_PORT_NAME_LEN - 1] = '\0';
+        lp = shash_find_data(&sw->queue_names, (char *) opp->name);
+        if (lp && hmap_node_is_null(&lp->hmap_node)) {
+            lp->port_no = ntohs(opp->port_no);
+            hmap_insert(&sw->queue_numbers, &lp->hmap_node,
+                        hash_int(lp->port_no, 0));
+        }
+    }
 }
 
 static uint16_t
@@ -291,11 +344,27 @@ lswitch_choose_destination(struct lswitch *sw, const flow_t *flow)
     return out_port;
 }
 
+static uint32_t
+get_queue_id(const struct lswitch *sw, uint16_t in_port)
+{
+    const struct lswitch_port *port;
+
+    HMAP_FOR_EACH_WITH_HASH (port, hmap_node, hash_int(in_port, 0),
+                             &sw->queue_numbers) {
+        if (port->port_no == in_port) {
+            return port->queue_id;
+        }
+    }
+
+    return sw->default_queue;
+}
+
 static void
 process_packet_in(struct lswitch *sw, struct rconn *rconn, void *opi_)
 {
     struct ofp_packet_in *opi = opi_;
     uint16_t in_port = ntohs(opi->in_port);
+    uint32_t queue_id;
     uint16_t out_port;
 
     struct ofp_action_header actions[2];
@@ -323,9 +392,10 @@ process_packet_in(struct lswitch *sw, struct rconn *rconn, void *opi_)
     out_port = lswitch_choose_destination(sw, &flow);
 
     /* Make actions. */
+    queue_id = get_queue_id(sw, in_port);
     if (out_port == OFPP_NONE) {
         actions_len = 0;
-    } else if (sw->queue == UINT32_MAX || out_port >= OFPP_MAX) {
+    } else if (queue_id == UINT32_MAX || out_port >= OFPP_MAX) {
         struct ofp_action_output oao;
 
         memset(&oao, 0, sizeof oao);
@@ -342,7 +412,7 @@ process_packet_in(struct lswitch *sw, struct rconn *rconn, void *opi_)
         oae.type = htons(OFPAT_ENQUEUE);
         oae.len = htons(sizeof oae);
         oae.port = htons(out_port);
-        oae.queue_id = htonl(sw->queue);
+        oae.queue_id = htonl(queue_id);
 
         memcpy(actions, &oae, sizeof oae);
         actions_len = sizeof oae;
diff --git a/lib/learning-switch.h b/lib/learning-switch.h
index 2ce49e612..d0892576a 100644
--- a/lib/learning-switch.h
+++ b/lib/learning-switch.h
@@ -46,9 +46,12 @@ struct lswitch_config {
      * requests to set up the flow table. */
     const struct ofpbuf *default_flows;
 
-    /* The OpenFlow queue used by packets and flows set up by 'sw'.  Use
-     * UINT32_MAX to avoid specifying a particular queue. */
-    uint32_t queue_id;
+    /* The OpenFlow queue to use by default.  Use UINT32_MAX to avoid
+     * specifying a particular queue. */
+    uint32_t default_queue;
+
+    /* Maps from a port name to a queue_id (cast to void *). */
+    const struct shash *port_queues;
 };
 
 struct lswitch *lswitch_create(struct rconn *, const struct lswitch_config *);
diff --git a/utilities/ovs-controller.8.in b/utilities/ovs-controller.8.in
index 24f3a5cd7..aa5751f93 100644
--- a/utilities/ovs-controller.8.in
+++ b/utilities/ovs-controller.8.in
@@ -98,7 +98,29 @@ sending packets and setting up flows.  Use one of these options,
 supplying \fIid\fR as an OpenFlow queue ID as a decimal number, to
 instead use that specific queue.
 .IP
-This option may be useful for debugging quality of service setups.
+This option is incompatible with \fB\-N\fR or \fB\-\-normal\fR and
+with \fB\-H\fR or \fB\-\-hub\fR.  If more than one is specified then
+this option takes precedence.
+.IP
+This option may be useful for testing or debugging quality of service
+setups.
+.
+.IP "\fB\-Q \fIport-name\fB:\fIqueue-id\fR"
+.IP "\fB\-\-port\-queue \fIport-name\fB:\fIqueue-id\fR"
+Configures packets received on the port named \fIport-name\fR
+(e.g. \fBeth0\fR) to be output on OpenFlow queue ID \fIqueue-id\fR
+(specified as a decimal number).  For the specified port, this option
+overrides the default specified on \fB\-q\fR or \fB\-\-queue\fR.
+.IP
+This option may be specified any number of times with different
+\Iport-name\fR arguments.
+.IP
+This option is incompatible with \fB\-N\fR or \fB\-\-normal\fR and
+with \fB\-H\fR or \fB\-\-hub\fR.  If more than one is specified then
+this option takes precedence.
+.IP
+This option may be useful for testing or debugging quality of service
+setups.
 .
 .IP "\fB\-\-with\-flows \fIfile\fR"
 When a switch connects, push the flow entries as described in
diff --git a/utilities/ovs-controller.c b/utilities/ovs-controller.c
index 9892abe4d..26a1fc3f5 100644
--- a/utilities/ovs-controller.c
+++ b/utilities/ovs-controller.c
@@ -33,6 +33,7 @@
 #include "openflow/openflow.h"
 #include "poll-loop.h"
 #include "rconn.h"
+#include "shash.h"
 #include "stream-ssl.h"
 #include "timeval.h"
 #include "unixctl.h"
@@ -50,10 +51,11 @@ struct switch_ {
     struct rconn *rconn;
 };
 
-/* Learn the ports on which MAC addresses appear? */
+/* -H, --hub: Learn the ports on which MAC addresses appear? */
 static bool learn_macs = true;
 
-/* Set up flows?  (If not, every packet is processed at the controller.) */
+/* -n, --noflow: Set up flows?  (If not, every packet is processed at the
+ * controller.) */
 static bool set_up_flows = true;
 
 /* -N, --normal: Use "NORMAL" action instead of explicit port? */
@@ -69,8 +71,11 @@ static int max_idle = 60;
  * of their messages (for debugging fail-open mode). */
 static bool mute = false;
 
-/* -q, --queue: OpenFlow queue to use, or the default queue if UINT32_MAX. */
-static uint32_t queue_id = UINT32_MAX;
+/* -q, --queue: default OpenFlow queue, none if UINT32_MAX. */
+static uint32_t default_queue = UINT32_MAX;
+
+/* -Q, --port-queue: map from port name to port number (cast to void *). */
+static struct shash port_queues = SHASH_INITIALIZER(&port_queues);
 
 /* --with-flows: File with flows to send to switch, or null to not load
  * any default flows. */
@@ -225,7 +230,8 @@ new_switch(struct switch_ *sw, struct vconn *vconn)
                 : LSW_FLOOD);
     cfg.max_idle = set_up_flows ? max_idle : -1;
     cfg.default_flows = default_flows.head;
-    cfg.queue_id = queue_id;
+    cfg.default_queue = default_queue;
+    cfg.port_queues = &port_queues;
     sw->lswitch = lswitch_create(sw->rconn, &cfg);
 }
 
@@ -269,6 +275,27 @@ read_flow_file(const char *name)
     fclose(stream);
 }
 
+static void
+add_port_queue(char *s)
+{
+    char *save_ptr = NULL;
+    char *port_name;
+    char *queue_id;
+
+    port_name = strtok_r(s, ":", &save_ptr);
+    queue_id = strtok_r(NULL, "", &save_ptr);
+    if (!queue_id) {
+        ovs_fatal(0, "argument to -Q or --port-queue should take the form "
+                  "\"<port-name>:<queue-id>\"");
+    }
+
+    if (!shash_add_once(&port_queues, port_name,
+                        (void *) (uintptr_t) atoi(queue_id))) {
+        ovs_fatal(0, "<port-name> arguments for -Q or --port-queue must "
+                  "be unique");
+    }
+}
+
 static void
 parse_options(int argc, char *argv[])
 {
@@ -288,6 +315,7 @@ parse_options(int argc, char *argv[])
         {"max-idle",    required_argument, 0, OPT_MAX_IDLE},
         {"mute",        no_argument, 0, OPT_MUTE},
         {"queue",       required_argument, 0, 'q'},
+        {"port-queue",  required_argument, 0, 'Q'},
         {"with-flows",  required_argument, 0, OPT_WITH_FLOWS},
         {"unixctl",     required_argument, 0, OPT_UNIXCTL},
         {"help",        no_argument, 0, 'h'},
@@ -345,7 +373,11 @@ parse_options(int argc, char *argv[])
             break;
 
         case 'q':
-            queue_id = atoi(optarg);
+            default_queue = atoi(optarg);
+            break;
+
+        case 'Q':
+            add_port_queue(optarg);
             break;
 
         case OPT_WITH_FLOWS:
@@ -382,6 +414,20 @@ parse_options(int argc, char *argv[])
         }
     }
     free(short_options);
+
+    if (!shash_is_empty(&port_queues) || default_queue != UINT32_MAX) {
+        if (action_normal) {
+            ovs_error(0, "queue IDs are incompatible with -N or --normal; "
+                      "not using OFPP_NORMAL");
+            action_normal = false;
+        }
+
+        if (!learn_macs) {
+            ovs_error(0, "queue IDs are incompatible with -H or --hub; "
+                      "not acting as hub");
+            learn_macs = true;
+        }
+    }
 }
 
 static void
@@ -398,9 +444,10 @@ usage(void)
            "  -H, --hub               act as hub instead of learning switch\n"
            "  -n, --noflow            pass traffic, but don't add flows\n"
            "  --max-idle=SECS         max idle time for new flows\n"
-           "  -N, --normal            use OFPAT_NORMAL action\n"
+           "  -N, --normal            use OFPP_NORMAL action\n"
            "  -w, --wildcard          use wildcards, not exact-match rules\n"
-           "  -q, --queue=QUEUE       OpenFlow queue ID to use for output\n"
+           "  -q, --queue=QUEUE-ID    OpenFlow queue ID to use for output\n"
+           "  -Q PORT-NAME:QUEUE-ID   use QUEUE-ID for frames from PORT-NAME\n"
            "  --with-flows FILE       use the flows from FILE\n"
            "  --unixctl=SOCKET        override default control socket name\n"
            "  -h, --help              display this help message\n"
-- 
cgit v1.2.1


From 8b055d92533271b05843f4b62fa98aba5e7ce90a Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 26 Aug 2010 14:53:04 -0700
Subject: INSTALL.Linux: Describe how to upgrade the Open vSwitch database.

Suggested-by: Parham Kiani <pkiani@essex.ac.uk>
---
 INSTALL.Linux | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/INSTALL.Linux b/INSTALL.Linux
index 8ae7aaca7..d5a3b7d65 100644
--- a/INSTALL.Linux
+++ b/INSTALL.Linux
@@ -247,7 +247,8 @@ configuration in the database:
                       --remote=db:Open_vSwitch,managers \
                       --private-key=db:SSL,private_key \
                       --certificate=db:SSL,certificate \
-                      --bootstrap-ca-cert=db:SSL,ca_cert
+                      --bootstrap-ca-cert=db:SSL,ca_cert \
+                      --pidfile --detach
 
 Then initialize the database using ovs-vsctl.  This is only
 necessary the first time after you create the database with
@@ -258,7 +259,8 @@ ovsdb-tool (but running it at any time is harmless):
 Then start the main Open vSwitch daemon, telling it to connect to the
 same Unix domain socket:
 
-      % ovs-vswitchd unix:/usr/local/var/run/openvswitch/db.sock
+      % ovs-vswitchd unix:/usr/local/var/run/openvswitch/db.sock \
+            --pidfile --detach 
 
 Now you may use ovs-vsctl to set up bridges and other Open vSwitch
 features.  For example, to create a bridge named br0 and add ports
@@ -270,6 +272,33 @@ eth0 and vif1.0 to it:
 
 Please refer to ovs-vsctl(8) for more details.
 
+Upgrading
+=========
+
+When you upgrade Open vSwitch from one version to another, you should
+also upgrade the database schema:
+
+1. Stop the Open vSwitch daemons, e.g.:
+
+      % ovs-kill ovsdb-server.pid ovs-vswitchd.pid
+
+2. Install the new Open vSwitch release.
+
+3. Upgrade the database, in one of the following two ways:
+
+      - If there is no important data in your database, then you may
+        delete the database file and recreate it with ovsdb-tool,
+        following the instructions under "Building and Installing Open
+        vSwitch for Linux".
+
+      - If you want to preserve the contents of your database, back it
+        up first, then use "ovsdb-tool convert" to upgrade it, e.g.:
+
+        % ovsdb-tool convert /usr/local/etc/ovs-vswitchd.conf.db vswitchd/vswitch.ovsschema
+
+4. Start the Open vSwitch daemons as described under "Building and
+   Installing Open vSwitch for Linux" above.
+
 Bug Reporting
 -------------
 
-- 
cgit v1.2.1


From 48f846e66ef06c39228fb1d3f8d8bfd1c695c93d Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 1 Sep 2010 12:43:11 -0700
Subject: netflow: Do 64-bit division less often.

64-bit division is expensive.  Usually we can avoid it entirely, as done by
this patch.
---
 ofproto/netflow.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/ofproto/netflow.c b/ofproto/netflow.c
index 015208ac9..a70b2fce8 100644
--- a/ofproto/netflow.c
+++ b/ofproto/netflow.c
@@ -184,21 +184,24 @@ netflow_expire(struct netflow *nf, struct netflow_flow *nf_flow,
         return;
     }
 
-    /* NetFlow v5 records are limited to 32-bit counters.  If we've
-     * wrapped a counter, send as multiple records so we don't lose
-     * track of any traffic.  We try to evenly distribute the packet and
-     * byte counters, so that the bytes-per-packet lengths don't look
-     * wonky across the records. */
-    while (byte_delta) {
-        int n_recs = (byte_delta + UINT32_MAX - 1) / UINT32_MAX;
+    /* NetFlow v5 records are limited to 32-bit counters.  If we've wrapped
+     * a counter, send as multiple records so we don't lose track of any
+     * traffic.  We try to evenly distribute the packet and byte counters,
+     * so that the bytes-per-packet lengths don't look wonky across the
+     * records. */
+    while (byte_delta > UINT32_MAX) {
+        uint32_t n_recs = byte_delta >> 32;
         uint32_t pkt_count = pkt_delta / n_recs;
         uint32_t byte_count = byte_delta / n_recs;
-        
+
         gen_netflow_rec(nf, nf_flow, expired, pkt_count, byte_count);
 
         pkt_delta -= pkt_count;
         byte_delta -= byte_count;
     }
+    if (byte_delta > 0) {
+        gen_netflow_rec(nf, nf_flow, expired, pkt_delta, byte_delta);
+    }
 
     /* Update flow tracking data. */
     nf_flow->created = 0;
-- 
cgit v1.2.1


From 9ebc44ae8c5940513a8dcc2aab8dcca8aff9d2a2 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 1 Sep 2010 12:45:24 -0700
Subject: netflow: Avoid (theoretically) looping 2**32 times.

If the netflow byte counter is UINT64_MAX, or at any rate much larger than
UINT32_MAX, netflow_expire() could loop for a very long time.  This commit
avoids that case.

This is only a theoretical bug fix.  I don't know of any actual bug that
would cause a counter to be that high.
---
 ofproto/netflow.c | 49 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 17 deletions(-)

diff --git a/ofproto/netflow.c b/ofproto/netflow.c
index a70b2fce8..4881c5fdb 100644
--- a/ofproto/netflow.c
+++ b/ofproto/netflow.c
@@ -184,23 +184,38 @@ netflow_expire(struct netflow *nf, struct netflow_flow *nf_flow,
         return;
     }
 
-    /* NetFlow v5 records are limited to 32-bit counters.  If we've wrapped
-     * a counter, send as multiple records so we don't lose track of any
-     * traffic.  We try to evenly distribute the packet and byte counters,
-     * so that the bytes-per-packet lengths don't look wonky across the
-     * records. */
-    while (byte_delta > UINT32_MAX) {
-        uint32_t n_recs = byte_delta >> 32;
-        uint32_t pkt_count = pkt_delta / n_recs;
-        uint32_t byte_count = byte_delta / n_recs;
-
-        gen_netflow_rec(nf, nf_flow, expired, pkt_count, byte_count);
-
-        pkt_delta -= pkt_count;
-        byte_delta -= byte_count;
-    }
-    if (byte_delta > 0) {
-        gen_netflow_rec(nf, nf_flow, expired, pkt_delta, byte_delta);
+    if ((byte_delta >> 32) <= 175) {
+        /* NetFlow v5 records are limited to 32-bit counters.  If we've wrapped
+         * a counter, send as multiple records so we don't lose track of any
+         * traffic.  We try to evenly distribute the packet and byte counters,
+         * so that the bytes-per-packet lengths don't look wonky across the
+         * records. */
+        while (byte_delta > UINT32_MAX) {
+            uint32_t n_recs = byte_delta >> 32;
+            uint32_t pkt_count = pkt_delta / n_recs;
+            uint32_t byte_count = byte_delta / n_recs;
+
+            gen_netflow_rec(nf, nf_flow, expired, pkt_count, byte_count);
+
+            pkt_delta -= pkt_count;
+            byte_delta -= byte_count;
+        }
+        if (byte_delta > 0) {
+            gen_netflow_rec(nf, nf_flow, expired, pkt_delta, byte_delta);
+        }
+    } else {
+        /* In 600 seconds, a 10GbE link can theoretically transmit 75 * 10**10
+         * == 175 * 2**32 bytes.  The byte counter is bigger than that, so it's
+         * probably a bug--for example, the netdev code uses UINT64_MAX to
+         * report "unknown value", and perhaps that has leaked through to here.
+         *
+         * We wouldn't want to hit the loop above in this case, because it
+         * would try to send up to UINT32_MAX netflow records, which would take
+         * a long time.
+         */
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+
+        VLOG_WARN_RL(&rl, "impossible byte counter %"PRIu64, byte_delta);
     }
 
     /* Update flow tracking data. */
-- 
cgit v1.2.1


From eedc0097f475a15297375a2aba39313c0f98f330 Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Sat, 2 Oct 2010 00:27:23 -0700
Subject: Add Nicira extension for modifying queue without transmitting

The OpenFlow OFPAT_ENQUEUE action sets a queue id and outputs the packet
in one shot.  There are times in which the queue should be set, but the
output port is not yet known.  This commit adds the NXAST_SET_QUEUE and
NXAST_POP_QUEUE Nicira extension actions to modify the queue
configuration without requiring a port argument.

CC: Jeremy Stribling <strib@nicira.com>
CC: Keith Amidon <keith@nicira.com>
---
 include/openflow/nicira-ext.h | 23 ++++++++++++++++++++++-
 lib/ofp-parse.c               | 11 +++++++++++
 lib/ofp-print.c               | 11 +++++++++++
 lib/ofp-util.c                |  2 ++
 ofproto/ofproto.c             | 30 ++++++++++++++++++++++++++++++
 tests/ovs-ofctl.at            |  6 +++++-
 utilities/ovs-ofctl.8.in      | 10 ++++++++++
 7 files changed, 91 insertions(+), 2 deletions(-)

diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h
index c97478faf..df2488bd9 100644
--- a/include/openflow/nicira-ext.h
+++ b/include/openflow/nicira-ext.h
@@ -142,7 +142,17 @@ enum nx_action_subtype {
      * This is useful because OpenFlow does not provide a way to match on the
      * Ethernet addresses inside ARP packets, so there is no other way to drop
      * spoofed ARPs other than sending every ARP packet to a controller. */
-    NXAST_DROP_SPOOFED_ARP
+    NXAST_DROP_SPOOFED_ARP,
+
+    /* Set the queue that should be used when packets are output.  This
+     * is similar to the OpenFlow OFPAT_ENQUEUE action, but does not
+     * take the output port as an argument.  This allows the queue
+     * to be defined before the port is known. */
+    NXAST_SET_QUEUE,
+
+    /* Restore the queue to the value it was before any NXAST_SET_QUEUE
+     * actions were used. */
+    NXAST_POP_QUEUE
 };
 
 /* Action structure for NXAST_RESUBMIT. */
@@ -167,6 +177,17 @@ struct nx_action_set_tunnel {
 };
 OFP_ASSERT(sizeof(struct nx_action_set_tunnel) == 16);
 
+/* Action structure for NXAST_SET_QUEUE. */
+struct nx_action_set_queue {
+    uint16_t type;                  /* OFPAT_VENDOR. */
+    uint16_t len;                   /* Length is 16. */
+    uint32_t vendor;                /* NX_VENDOR_ID. */
+    uint16_t subtype;               /* NXAST_SET_QUEUE. */
+    uint8_t pad[2];
+    uint32_t queue_id;              /* Where to enqueue packets. */
+};
+OFP_ASSERT(sizeof(struct nx_action_set_queue) == 16);
+
 /* Header for Nicira-defined actions. */
 struct nx_action_header {
     uint16_t type;                  /* OFPAT_VENDOR. */
diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c
index 069687b15..7a888801e 100644
--- a/lib/ofp-parse.c
+++ b/lib/ofp-parse.c
@@ -267,6 +267,17 @@ str_to_action(char *str, struct ofpbuf *b)
             nah = put_action(b, sizeof *nah, OFPAT_VENDOR);
             nah->vendor = htonl(NX_VENDOR_ID);
             nah->subtype = htons(NXAST_DROP_SPOOFED_ARP);
+        } else if (!strcasecmp(act, "set_queue")) {
+            struct nx_action_set_queue *nasq;
+            nasq = put_action(b, sizeof *nasq, OFPAT_VENDOR);
+            nasq->vendor = htonl(NX_VENDOR_ID);
+            nasq->subtype = htons(NXAST_SET_QUEUE);
+            nasq->queue_id = htonl(str_to_u32(arg));
+        } else if (!strcasecmp(act, "pop_queue")) {
+            struct nx_action_header *nah;
+            nah = put_action(b, sizeof *nah, OFPAT_VENDOR);
+            nah->vendor = htonl(NX_VENDOR_ID);
+            nah->subtype = htons(NXAST_POP_QUEUE);
         } else if (!strcasecmp(act, "output")) {
             put_output_action(b, str_to_u32(arg));
         } else if (!strcasecmp(act, "enqueue")) {
diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index 569a70aba..1eaaa27d8 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -205,6 +205,17 @@ ofp_print_nx_action(struct ds *string, const struct nx_action_header *nah)
         ds_put_cstr(string, "drop_spoofed_arp");
         break;
 
+    case NXAST_SET_QUEUE: {
+        const struct nx_action_set_queue *nasq =
+                                            (struct nx_action_set_queue *)nah;
+        ds_put_format(string, "set_queue:%u", ntohl(nasq->queue_id));
+        break;
+    }
+
+    case NXAST_POP_QUEUE:
+        ds_put_cstr(string, "pop_queue");
+        break;
+
     default:
         ds_put_format(string, "***unknown Nicira action:%d***",
                       ntohs(nah->subtype));
diff --git a/lib/ofp-util.c b/lib/ofp-util.c
index 5171900a7..7a2e17cb2 100644
--- a/lib/ofp-util.c
+++ b/lib/ofp-util.c
@@ -565,6 +565,8 @@ check_nicira_action(const union ofp_action *a, unsigned int len)
     case NXAST_RESUBMIT:
     case NXAST_SET_TUNNEL:
     case NXAST_DROP_SPOOFED_ARP:
+    case NXAST_SET_QUEUE:
+    case NXAST_POP_QUEUE:
         return check_action_exact_len(a, len, 16);
     default:
         return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_BAD_VENDOR_TYPE);
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 00cac6e20..3d2989a62 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -2661,12 +2661,33 @@ xlate_enqueue_action(struct action_xlate_ctx *ctx,
     }
 }
 
+static void
+xlate_set_queue_action(struct action_xlate_ctx *ctx,
+                       const struct nx_action_set_queue *nasq)
+{
+    uint32_t priority;
+    int error;
+
+    error = dpif_queue_to_priority(ctx->ofproto->dpif, ntohl(nasq->queue_id),
+                                   &priority);
+    if (error) {
+        /* Couldn't translate queue to a priority, so ignore.  A warning
+         * has already been logged. */
+        return;
+    }
+
+    remove_pop_action(ctx);
+    odp_actions_add(ctx->out, ODPAT_SET_PRIORITY)->priority.priority
+        = priority;
+}
+
 static void
 xlate_nicira_action(struct action_xlate_ctx *ctx,
                     const struct nx_action_header *nah)
 {
     const struct nx_action_resubmit *nar;
     const struct nx_action_set_tunnel *nast;
+    const struct nx_action_set_queue *nasq;
     union odp_action *oa;
     int subtype = ntohs(nah->subtype);
 
@@ -2689,6 +2710,15 @@ xlate_nicira_action(struct action_xlate_ctx *ctx,
         }
         break;
 
+    case NXAST_SET_QUEUE:
+        nasq = (const struct nx_action_set_queue *) nah;
+        xlate_set_queue_action(ctx, nasq);
+        break;
+
+    case NXAST_POP_QUEUE:
+        odp_actions_add(ctx->out, ODPAT_POP_PRIORITY);
+        break;
+
     /* If you add a new action here that modifies flow data, don't forget to
      * update the flow key in ctx->flow at the same time. */
 
diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at
index 2a0ce2cf9..f6a5cd81e 100644
--- a/tests/ovs-ofctl.at
+++ b/tests/ovs-ofctl.at
@@ -5,8 +5,10 @@ AT_DATA([flows.txt], [
 # comment
 tcp,tp_src=123,actions=flood
 in_port=LOCAL dl_vlan=9 dl_src=00:0A:E4:25:6B:B0 actions=drop
-arp,nw_src=192.168.0.1,actions=drop_spoofed_arp,NORMAL
+arp,nw_src=192.168.0.1 actions=drop_spoofed_arp,NORMAL
 udp dl_vlan_pcp=7 idle_timeout=5 actions=strip_vlan output:0
+tcp,nw_src=192.168.0.3,tp_dst=80 actions=set_queue:37,output:1
+udp,nw_src=192.168.0.3,tp_dst=53 actions=pop_queue,output:1
 cookie=0x123456789abcdef hard_timeout=10 priority=60000 actions=controller
 actions=drop
 ])
@@ -16,6 +18,8 @@ flow_mod: tcp,tp_src=123, ADD: actions=FLOOD
 flow_mod: in_port=65534,dl_vlan=9,dl_src=00:0a:e4:25:6b:b0, ADD: actions=drop
 flow_mod: arp,nw_src=192.168.0.1, ADD: actions=drop_spoofed_arp,NORMAL
 flow_mod: udp,dl_vlan_pcp=7, ADD: idle:5 actions=strip_vlan,output:0
+flow_mod: tcp,nw_src=192.168.0.3,tp_dst=80, ADD: actions=set_queue:37,output:1
+flow_mod: udp,nw_src=192.168.0.3,tp_dst=53, ADD: actions=pop_queue,output:1
 flow_mod: ADD: cookie:0x123456789abcdef hard:10 pri:60000 actions=CONTROLLER:65535
 flow_mod: ADD: actions=drop
 ])
diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in
index c12b5f125..dbcf3a5f2 100644
--- a/utilities/ovs-ofctl.8.in
+++ b/utilities/ovs-ofctl.8.in
@@ -469,6 +469,16 @@ Ethernet header.
 This is useful because OpenFlow does not provide a way to match on the
 Ethernet addresses inside ARP packets, so there is no other way to
 drop spoofed ARPs other than sending every ARP packet to a controller.
+.
+.IP \fBset_queue\fB:\fIqueue\fR
+Sets the queue that should be used to \fIqueue\fR when packets are
+output.  The number of supported queues depends on the switch; some
+OpenFlow implementations do not support queuing at all.
+.
+.IP \fBpop_queue\fR
+Restores the queue to the value it was before any \fBset_queue\fR
+actions were applied.
+.
 .RE
 .
 .IP
-- 
cgit v1.2.1


From 4bee421f3ab28492aebc32b8b13e41ca5d12a936 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 1 Oct 2010 19:21:08 -0700
Subject: tunnel: Correctly check for internal device.

With header caching we check to see if the next device in the stack
is an OVS device and, if so, cache that flow as well.  However, the
test for this called internal_dev_get_vport() assuming that it would
return NULL if the device is not an internal device.  It doesn't,
however, it just returns the offset from the device where the vport
data structure would be if it were an internal device.  This changes
it to explicitly check for an internal device first to avoid a panic.

Bug #3470

Reported-by: Ram Jothikumar <rjothikumar@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Reviewed-by: Justin Pettit <jpettit@nicira.com>
---
 datapath/tunnel.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index 77f976fdc..ad4522808 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -1266,17 +1266,15 @@ int tnl_send(struct vport *vport, struct sk_buff *skb)
 
 		if (likely(cache)) {
 			int orig_len = skb->len - cache->len;
-			struct vport *cache_vport = internal_dev_get_vport(rt_dst(rt).dev);
 
 			skb->protocol = htons(ETH_P_IP);
-
 			iph->tot_len = htons(skb->len - skb_network_offset(skb));
 			ip_send_check(iph);
 
-			if (likely(cache_vport)) {
+			if (is_internal_dev(rt_dst(rt).dev)) {
 				OVS_CB(skb)->flow = cache->flow;
 				compute_ip_summed(skb, true);
-				vport_receive(cache_vport, skb);
+				vport_receive(internal_dev_get_vport(rt_dst(rt).dev), skb);
 				sent_len += orig_len;
 			} else {
 				int err;
-- 
cgit v1.2.1