From c259baae45d76592eb14db5abde20aa72e7f2605 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 31 Aug 2010 10:13:55 +0900 Subject: Debian: Add Joe Perches to debian/copyright This syncs debian/copyright with the AUTHORS file. Signed-off-by: Simon Horman Signed-off-by: Jesse Gross --- debian/copyright | 1 + 1 file changed, 1 insertion(+) diff --git a/debian/copyright b/debian/copyright index 56c4f2bbe..f4f4df6d5 100644 --- a/debian/copyright +++ b/debian/copyright @@ -15,6 +15,7 @@ Upstream Authors (from AUTHORS): Jean Tourrilhes jt@hpl.hp.com Jeremy Stribling strib@nicira.com Jesse Gross jesse@nicira.com + Joe Perches joe@perches.com Jun Nakajima jun.nakajima@intel.com Justin Pettit jpettit@nicira.com Keith Amidon keith@nicira.com -- cgit v1.2.1 From cd8055cce3e9dcd20c6125f064d73ef04b99aee4 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 31 Aug 2010 11:47:28 +0900 Subject: datapath: Include net/udp.h in vport-capwap.c net/udp.h is currently included indirectly via linux/ipv6.h which is in turn included indirectly via linux/ip.h. However, this breaks down if CONFIG_IPV6 is not set, leading to a number of build errors. Signed-off-by: Simon Horman [Jesse: shortened commit message] Signed-off-by: Jesse Gross --- datapath/vport-capwap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/datapath/vport-capwap.c b/datapath/vport-capwap.c index ce8cc43e2..7ae3790d7 100644 --- a/datapath/vport-capwap.c +++ b/datapath/vport-capwap.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "tunnel.h" #include "vport.h" -- cgit v1.2.1 From 476d56f51d4935ff437537db151a49c7db8cd897 Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Tue, 31 Aug 2010 14:16:14 -0700 Subject: Release Open vSwitch 1.1.0-pre1 --- ChangeLog | 7 +++++++ configure.ac | 2 +- debian/changelog | 9 +++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 29fcd2d8c..153d96695 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +v1.1.0-pre1 - 31 Aug 2010 +------------------------- + - OpenFlow 1.0 slicing (QoS) functionality + - Python bindings for configuration database (no write support) + - Performance and scalability improvements + - Bug fixes + v1.0.1 - 31 May 2010 -------------------- - New "patch" interface type diff --git a/configure.ac b/configure.ac index 3088fa5d8..7d7b4fe6f 100644 --- a/configure.ac +++ b/configure.ac @@ -13,7 +13,7 @@ # limitations under the License. AC_PREREQ(2.64) -AC_INIT(openvswitch, 1.0.1, ovs-bugs@openvswitch.org) +AC_INIT(openvswitch, 1.1.0-pre1, ovs-bugs@openvswitch.org) NX_BUILDNR AC_CONFIG_SRCDIR([datapath/datapath.c]) AC_CONFIG_MACRO_DIR([m4]) diff --git a/debian/changelog b/debian/changelog index 216e089e1..972226d58 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,12 @@ +openvswitch (1.1.0-pre1) unstable; urgency=low + + * OpenFlow 1.0 slicing (QoS) functionality + * Python bindings for configuration database (no write support) + * Performance and scalability improvements + * Bug fixes + + -- Open vSwitch team Tue, 31 Aug 2010 23:20:00 +0000 + openvswitch (1.0.1) unstable; urgency=low * New upstream version. -- cgit v1.2.1 From fdf2037829befd7e7fc337a477950fdc7f1cb09b Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Wed, 1 Sep 2010 02:26:06 -0700 Subject: XenServer builds barf on hyphens in versions. --- ChangeLog | 4 ++-- configure.ac | 2 +- debian/changelog | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 153d96695..6e7217a5e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,5 @@ -v1.1.0-pre1 - 31 Aug 2010 -------------------------- +v1.1.0pre1 - 31 Aug 2010 +------------------------ - OpenFlow 1.0 slicing (QoS) functionality - Python bindings for configuration database (no write support) - Performance and scalability improvements diff --git a/configure.ac b/configure.ac index 7d7b4fe6f..21f34739c 100644 --- a/configure.ac +++ b/configure.ac @@ -13,7 +13,7 @@ # limitations under the License. AC_PREREQ(2.64) -AC_INIT(openvswitch, 1.1.0-pre1, ovs-bugs@openvswitch.org) +AC_INIT(openvswitch, 1.1.0pre1, ovs-bugs@openvswitch.org) NX_BUILDNR AC_CONFIG_SRCDIR([datapath/datapath.c]) AC_CONFIG_MACRO_DIR([m4]) diff --git a/debian/changelog b/debian/changelog index 972226d58..89eb5176d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -openvswitch (1.1.0-pre1) unstable; urgency=low +openvswitch (1.1.0pre1) unstable; urgency=low * OpenFlow 1.0 slicing (QoS) functionality * Python bindings for configuration database (no write support) -- cgit v1.2.1 From f79cb67e689566cfbf3071e0ac0e29923ada5a97 Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Tue, 31 Aug 2010 18:41:32 -0700 Subject: netflow: Send multiple records for byte counts > UINT32_MAX When a NetFlow record is to be sent for a flow that had more than 2^32 bytes, we used to set the byte count to UINT32_MAX. With this change, we will send out multiple records to account for all the traffic. --- ofproto/netflow.c | 69 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 23 deletions(-) diff --git a/ofproto/netflow.c b/ofproto/netflow.c index 50ab80a69..015208ac9 100644 --- a/ofproto/netflow.c +++ b/ofproto/netflow.c @@ -103,26 +103,19 @@ struct netflow { long long int reconfig_time; /* When we reconfigured the timeouts. */ }; -void -netflow_expire(struct netflow *nf, struct netflow_flow *nf_flow, - struct ofexpired *expired) +static void +gen_netflow_rec(struct netflow *nf, struct netflow_flow *nf_flow, + struct ofexpired *expired, + uint32_t packet_count, uint32_t byte_count) { struct netflow_v5_header *nf_hdr; struct netflow_v5_record *nf_rec; - struct timespec now; - - nf_flow->last_expired += nf->active_timeout; - /* NetFlow only reports on IP packets and we should only report flows - * that actually have traffic. */ - if (expired->flow.dl_type != htons(ETH_TYPE_IP) || - expired->packet_count - nf_flow->packet_count_off == 0) { - return; - } + if (!nf->packet.size) { + struct timespec now; - time_wall_timespec(&now); + time_wall_timespec(&now); - if (!nf->packet.size) { nf_hdr = ofpbuf_put_zeros(&nf->packet, sizeof *nf_hdr); nf_hdr->version = htons(NETFLOW_V5_VERSION); nf_hdr->count = htons(0); @@ -150,10 +143,8 @@ netflow_expire(struct netflow *nf, struct netflow_flow *nf_flow, nf_rec->input = htons(expired->flow.in_port); nf_rec->output = htons(nf_flow->output_iface); } - nf_rec->packet_count = htonl(MIN(expired->packet_count - - nf_flow->packet_count_off, UINT32_MAX)); - nf_rec->byte_count = htonl(MIN(expired->byte_count - - nf_flow->byte_count_off, UINT32_MAX)); + nf_rec->packet_count = htonl(packet_count); + nf_rec->byte_count = htonl(byte_count); nf_rec->init_time = htonl(nf_flow->created - nf->boot_time); nf_rec->used_time = htonl(MAX(nf_flow->created, expired->used) - nf->boot_time); @@ -172,16 +163,48 @@ netflow_expire(struct netflow *nf, struct netflow_flow *nf_flow, nf_rec->ip_proto = expired->flow.nw_proto; nf_rec->ip_tos = expired->flow.nw_tos; + /* NetFlow messages are limited to 30 records. */ + if (ntohs(nf_hdr->count) >= 30) { + netflow_run(nf); + } +} + +void +netflow_expire(struct netflow *nf, struct netflow_flow *nf_flow, + struct ofexpired *expired) +{ + uint64_t pkt_delta = expired->packet_count - nf_flow->packet_count_off; + uint64_t byte_delta = expired->byte_count - nf_flow->byte_count_off; + + nf_flow->last_expired += nf->active_timeout; + + /* NetFlow only reports on IP packets and we should only report flows + * that actually have traffic. */ + if (expired->flow.dl_type != htons(ETH_TYPE_IP) || pkt_delta == 0) { + return; + } + + /* NetFlow v5 records are limited to 32-bit counters. If we've + * wrapped a counter, send as multiple records so we don't lose + * track of any traffic. We try to evenly distribute the packet and + * byte counters, so that the bytes-per-packet lengths don't look + * wonky across the records. */ + while (byte_delta) { + int n_recs = (byte_delta + UINT32_MAX - 1) / UINT32_MAX; + uint32_t pkt_count = pkt_delta / n_recs; + uint32_t byte_count = byte_delta / n_recs; + + gen_netflow_rec(nf, nf_flow, expired, pkt_count, byte_count); + + pkt_delta -= pkt_count; + byte_delta -= byte_count; + } + /* Update flow tracking data. */ nf_flow->created = 0; nf_flow->packet_count_off = expired->packet_count; nf_flow->byte_count_off = expired->byte_count; nf_flow->tcp_flags = 0; - - /* NetFlow messages are limited to 30 records. */ - if (ntohs(nf_hdr->count) >= 30) { - netflow_run(nf); - } } void -- cgit v1.2.1 From 68efcbec41b0acfd8bb7579a5d38afd71b6daf7c Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 1 Sep 2010 12:55:38 -0700 Subject: ofpbuf: Add ofpbuf_new_with_headroom(), ofpbuf_clone_with_headroom(). These new functions simplify an increasingly common usage pattern. Suggested-by: Jesse Gross --- lib/dpif-linux.c | 3 +-- lib/dpif-netdev.c | 3 +-- lib/ofpbuf.c | 20 ++++++++++++++++++++ lib/ofpbuf.h | 3 +++ ofproto/pktbuf.c | 4 ++-- 5 files changed, 27 insertions(+), 6 deletions(-) diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index 52d73c6bb..2c688e3af 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -478,8 +478,7 @@ dpif_linux_recv(struct dpif *dpif_, struct ofpbuf **bufp) int retval; int error; - buf = ofpbuf_new(65536 + DPIF_RECV_MSG_PADDING); - ofpbuf_reserve(buf, DPIF_RECV_MSG_PADDING); + buf = ofpbuf_new_with_headroom(65536, DPIF_RECV_MSG_PADDING); retval = read(dpif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf)); if (retval < 0) { error = errno; diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 08a721340..323f36411 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1262,8 +1262,7 @@ dp_netdev_output_control(struct dp_netdev *dp, const struct ofpbuf *packet, } msg_size = sizeof *header + packet->size; - msg = ofpbuf_new(msg_size + DPIF_RECV_MSG_PADDING); - ofpbuf_reserve(msg, DPIF_RECV_MSG_PADDING); + msg = ofpbuf_new_with_headroom(msg_size, DPIF_RECV_MSG_PADDING); header = ofpbuf_put_uninit(msg, sizeof *header); header->type = queue_no; header->length = msg_size; diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c index 5693eefda..bf5567251 100644 --- a/lib/ofpbuf.c +++ b/lib/ofpbuf.c @@ -75,12 +75,32 @@ ofpbuf_new(size_t size) return b; } +/* Creates and returns a new ofpbuf with an initial capacity of 'size + + * headroom' bytes, reserving the first 'headroom' bytes as headroom. */ +struct ofpbuf * +ofpbuf_new_with_headroom(size_t size, size_t headroom) +{ + struct ofpbuf *b = ofpbuf_new(size + headroom); + ofpbuf_reserve(b, headroom); + return b; +} + struct ofpbuf * ofpbuf_clone(const struct ofpbuf *buffer) { return ofpbuf_clone_data(buffer->data, buffer->size); } +/* Creates and returns a new ofpbuf whose data are copied from 'buffer'. The + * returned ofpbuf will additionally have 'headroom' bytes of headroom. */ +struct ofpbuf * +ofpbuf_clone_with_headroom(const struct ofpbuf *buffer, size_t headroom) +{ + struct ofpbuf *b = ofpbuf_new_with_headroom(buffer->size, headroom); + ofpbuf_put(b, buffer->data, buffer->size); + return b; +} + struct ofpbuf * ofpbuf_clone_data(const void *data, size_t size) { diff --git a/lib/ofpbuf.h b/lib/ofpbuf.h index 736b8f5e5..5e20aab0b 100644 --- a/lib/ofpbuf.h +++ b/lib/ofpbuf.h @@ -48,7 +48,10 @@ void ofpbuf_uninit(struct ofpbuf *); void ofpbuf_reinit(struct ofpbuf *, size_t); struct ofpbuf *ofpbuf_new(size_t); +struct ofpbuf *ofpbuf_new_with_headroom(size_t, size_t headroom); struct ofpbuf *ofpbuf_clone(const struct ofpbuf *); +struct ofpbuf *ofpbuf_clone_with_headroom(const struct ofpbuf *, + size_t headroom); struct ofpbuf *ofpbuf_clone_data(const void *, size_t); void ofpbuf_delete(struct ofpbuf *); diff --git a/ofproto/pktbuf.c b/ofproto/pktbuf.c index 67adb5606..aa9029542 100644 --- a/ofproto/pktbuf.c +++ b/ofproto/pktbuf.c @@ -112,8 +112,8 @@ pktbuf_save(struct pktbuf *pb, struct ofpbuf *buffer, uint16_t in_port) if (++p->cookie >= COOKIE_MAX) { p->cookie = 0; } - p->buffer = ofpbuf_new(sizeof(struct ofp_packet_in) + buffer->size); - ofpbuf_reserve(p->buffer, sizeof(struct ofp_packet_in)); + p->buffer = ofpbuf_new_with_headroom(buffer->size, + sizeof(struct ofp_packet_in)); ofpbuf_put(p->buffer, buffer->data, buffer->size); p->timeout = time_msec() + OVERWRITE_MSECS; p->in_port = in_port; -- cgit v1.2.1 From c9b5816cfd8fccc2abedd258d914882033bfcf0a Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 1 Sep 2010 10:17:40 -0700 Subject: ofproto: Get enough headroom in fail-open when connected to controller. Since commit 750638bb "ofproto: Avoid ofpbuf_clone() for OFPAT_CONTROLLER common case," send_packet_in() needs at least DPIF_RECV_MSG_PADDING bytes of headroom, which ofpbuf_clone() doesn't supply. This commit should fix that. This fixes an assertion failure in ofpbuf_prealloc_headroom() via send_packet_in(). Reported-by: Justin Pettit --- ofproto/ofproto.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index adc52827c..844083d8b 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -4154,7 +4154,8 @@ handle_odp_miss_msg(struct ofproto *p, struct ofpbuf *packet) * * See the top-level comment in fail-open.c for more information. */ - send_packet_in(p, ofpbuf_clone(packet)); + send_packet_in(p, ofpbuf_clone_with_headroom(packet, + DPIF_RECV_MSG_PADDING)); } ofpbuf_pull(packet, sizeof *msg); -- cgit v1.2.1 From e61070c32030d6d00e2eeae213d219320a7cbd10 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 1 Sep 2010 16:49:19 -0700 Subject: Rename "xs-vif-mac" external_ids key to "attached-mac", for generality. This can be useful on systems other than XenServer so there is no reason to make it looks XenServer-specific. CC: Jeremy Stribling Signed-off-by: Ben Pfaff --- vswitchd/vswitch.xml | 15 ++++++++------- xenserver/etc_xensource_scripts_vif | 2 +- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 38dc6a1aa..979fd5dfe 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -665,8 +665,15 @@ integrators should either use the Open vSwitch development mailing list to coordinate on common key-value definitions, or choose key names that are likely to be unique. The currently - defined common key-value pair is: + defined common key-value pairs are:
+
attached-mac
+
+ The MAC address programmed into the ``virtual hardware'' for this + interface, in the form + xx:xx:xx:xx:xx:xx. + For Citrix XenServer, this is the value of the MAC + field in the VIF record for this interface.
iface-id
A system-unique identifier for the interface. On XenServer, this will commonly be the same as xs-vif-uuid.
@@ -689,12 +696,6 @@
The virtual network to which this interface is attached.
xs-vm-uuid
The VM to which this interface belongs.
-
xs-vif-mac
-
The MAC address programmed into the "virtual hardware" for this - interface, in the - form xx:xx:xx:xx:xx:xx. - For Citrix XenServer, this is the value of the MAC - field in the VIF record for this interface.
diff --git a/xenserver/etc_xensource_scripts_vif b/xenserver/etc_xensource_scripts_vif index 91b75c123..f27ff5b40 100755 --- a/xenserver/etc_xensource_scripts_vif +++ b/xenserver/etc_xensource_scripts_vif @@ -114,7 +114,7 @@ handle_vswitch_vif_details() local address=$(xenstore-read "/local/domain/$DOMID/device/vif/$DEVID/mac" 2>/dev/null) if [ -n "${address}" ] ; then - set_vif_external_id "xs-vif-mac" "${address}" + set_vif_external_id "attached-mac" "${address}" fi if $xs550; then -- cgit v1.2.1 From 82e959195414246dece1e4c636b40d06d65211a7 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 1 Sep 2010 10:50:49 -0700 Subject: debian: Generate authorship in debian/copyright from AUTHORS. --- debian/.gitignore | 1 + debian/automake.mk | 10 ++++++++ debian/copyright | 68 ----------------------------------------------------- debian/copyright.in | 41 ++++++++++++++++++++++++++++++++ 4 files changed, 52 insertions(+), 68 deletions(-) delete mode 100644 debian/copyright create mode 100644 debian/copyright.in diff --git a/debian/.gitignore b/debian/.gitignore index 3beef4405..7f43aa6ed 100644 --- a/debian/.gitignore +++ b/debian/.gitignore @@ -3,6 +3,7 @@ *.substvars /control /corekeeper +/copyright /files /nicira-switch /openvswitch diff --git a/debian/automake.mk b/debian/automake.mk index 5a23d4632..ba9ea861d 100644 --- a/debian/automake.mk +++ b/debian/automake.mk @@ -4,6 +4,7 @@ EXTRA_DIST += \ debian/control \ debian/control.modules.in \ debian/copyright \ + debian/copyright.in \ debian/corekeeper.cron.daily \ debian/corekeeper.init \ debian/corekeeper.override \ @@ -50,3 +51,12 @@ check-debian-changelog-version: fi ALL_LOCAL += check-debian-changelog-version DIST_HOOKS += check-debian-changelog-version + +$(srcdir)/debian/copyright: AUTHORS debian/copyright.in + { sed -n -e '/%AUTHORS%/q' -e p < $(srcdir)/debian/copyright.in; \ + sed '1,/^$$/d' $(srcdir)/AUTHORS | \ + sed -n -e '/^$$/q' -e 's/^/ /p'; \ + sed -e '1,/%AUTHORS%/d' $(srcdir)/debian/copyright.in; \ + } > $@ + +DISTCLEANFILES += debian/copyright diff --git a/debian/copyright b/debian/copyright deleted file mode 100644 index f4f4df6d5..000000000 --- a/debian/copyright +++ /dev/null @@ -1,68 +0,0 @@ -The original sources for this package can be found at: - - http://openvswitch.org/ - - -Upstream Authors (from AUTHORS): - - Andy Southgate andy.southgate@citrix.com - Ben Pfaff blp@nicira.com - Bryan Phillippe bp@toroki.com - Dan Wendlandt dan@nicira.com - David Erickson derickso@stanford.edu - Glen Gibb grg@stanford.edu - Ian Campbell Ian.Campbell@citrix.com - Jean Tourrilhes jt@hpl.hp.com - Jeremy Stribling strib@nicira.com - Jesse Gross jesse@nicira.com - Joe Perches joe@perches.com - Jun Nakajima jun.nakajima@intel.com - Justin Pettit jpettit@nicira.com - Keith Amidon keith@nicira.com - Martin Casado casado@nicira.com - Natasha Gude natasha@nicira.com - Neil McKee neil.mckee@inmon.com - Paul Fazzone pfazzone@nicira.com - Reid Price reid@nicira.com - Simon Horman horms@verge.net.au - Tetsuo NAKAGAWA nakagawa@mxc.nes.nec.co.jp - Thomas Lacroix thomas.lacroix@citrix.com - Todd Deshane deshantm@gmail.com - Tom Everman teverman@google.com - Tsvi Slonim tsvi@toroki.com - Wei Yongjun yjwei@cn.fujitsu.com - Yu Zhiguo yuzg@cn.fujitsu.com - -Upstream Copyright Holders: - - Copyright (c) 2007, 2008, 2009, 2010 Nicira Networks. - Copyright (c) 2010 Jean Tourrilhes - HP-Labs. - Copyright (c) 2008,2009,2010 Citrix Systems, Inc. - and authors listed above. - -License: - -* The following components are licensed under the GNU General Public Licence - version 2. - - datapath/ - - On Debian systems, the complete text of the GNU General Public Licence - version 2 can be found in `/usr/share/common-licenses/GPL-2' - -* The following components are dual-licensed under the - GNU General Public Licence version 3 and the Apache Licence Version 2.0. - - include/openvswitch/ - - On Debian systems, the complete text of the GNU General Public Licence - version 2 can be found in `/usr/share/common-licenses/GPL-2'. - On Debian systems, the complete text of the Apache License version 2.0 - can be found in '/usr/share/common-licenses/Apache-2.0'. - -* All other components of this package are licensed under - The Apache License Version 2.0. - - On Debian systems, the complete text of the Apache License version 2.0 - can be found in '/usr/share/common-licenses/Apache-2.0'. - diff --git a/debian/copyright.in b/debian/copyright.in new file mode 100644 index 000000000..ae1a78868 --- /dev/null +++ b/debian/copyright.in @@ -0,0 +1,41 @@ +The original sources for this package can be found at: + + http://openvswitch.org/ + +Upstream Authors (from AUTHORS): + +%AUTHORS% + +Upstream Copyright Holders: + + Copyright (c) 2007, 2008, 2009, 2010 Nicira Networks. + Copyright (c) 2010 Jean Tourrilhes - HP-Labs. + Copyright (c) 2008,2009,2010 Citrix Systems, Inc. + and authors listed above. + +License: + +* The following components are licensed under the GNU General Public Licence + version 2. + + datapath/ + + On Debian systems, the complete text of the GNU General Public Licence + version 2 can be found in `/usr/share/common-licenses/GPL-2' + +* The following components are dual-licensed under the + GNU General Public Licence version 3 and the Apache Licence Version 2.0. + + include/openvswitch/ + + On Debian systems, the complete text of the GNU General Public Licence + version 2 can be found in `/usr/share/common-licenses/GPL-2'. + On Debian systems, the complete text of the Apache License version 2.0 + can be found in '/usr/share/common-licenses/Apache-2.0'. + +* All other components of this package are licensed under + The Apache License Version 2.0. + + On Debian systems, the complete text of the Apache License version 2.0 + can be found in '/usr/share/common-licenses/Apache-2.0'. + -- cgit v1.2.1 From e215ebca67720188724344eb198c5045c02e375f Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 8 Sep 2010 10:04:47 -0700 Subject: datapath: Check for backported __wsum and __sum16. Reported-by: Alexey I. Froloff Signed-off-by: Jesse Gross Acked-by: Ben Pfaff --- acinclude.m4 | 2 ++ datapath/linux-2.6/compat-2.6/include/linux/kernel.h | 1 + datapath/linux-2.6/compat-2.6/include/linux/types.h | 7 ++----- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/acinclude.m4 b/acinclude.m4 index 80794dac3..f1322fa0e 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -188,6 +188,8 @@ AC_DEFUN([OVS_CHECK_LINUX26_COMPAT], [ OVS_GREP_IFELSE([$KSRC26/include/linux/types.h], [bool], [OVS_DEFINE([HAVE_BOOL_TYPE])]) + OVS_GREP_IFELSE([$KSRC26/include/linux/types.h], [__wsum], + [OVS_DEFINE([HAVE_CSUM_TYPES])]) OVS_GREP_IFELSE([$KSRC26/include/net/checksum.h], [csum_unfold], [OVS_DEFINE([HAVE_CSUM_UNFOLD])]) diff --git a/datapath/linux-2.6/compat-2.6/include/linux/kernel.h b/datapath/linux-2.6/compat-2.6/include/linux/kernel.h index 1f65c099a..13361f78d 100644 --- a/datapath/linux-2.6/compat-2.6/include/linux/kernel.h +++ b/datapath/linux-2.6/compat-2.6/include/linux/kernel.h @@ -6,6 +6,7 @@ #include #endif +#include #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) #undef pr_emerg #define pr_emerg(fmt, ...) \ diff --git a/datapath/linux-2.6/compat-2.6/include/linux/types.h b/datapath/linux-2.6/compat-2.6/include/linux/types.h index d88baf71c..b989d96c3 100644 --- a/datapath/linux-2.6/compat-2.6/include/linux/types.h +++ b/datapath/linux-2.6/compat-2.6/include/linux/types.h @@ -3,13 +3,10 @@ #include_next -#include -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) - +#ifndef HAVE_CSUM_TYPES typedef __u16 __bitwise __sum16; typedef __u32 __bitwise __wsum; - -#endif /* linux kernel < 2.6.20 */ +#endif #ifndef HAVE_BOOL_TYPE typedef _Bool bool; -- cgit v1.2.1 From 933df876ffa272d9d5768edf7fc5465261888ad2 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 10 Sep 2010 09:17:29 -0700 Subject: ovs-ofctl: Add support for drop_spoofed_arp action. Requested-by: Michael Mao --- include/openflow/nicira-ext.h | 2 +- lib/ofp-parse.c | 5 +++++ utilities/ovs-ofctl.8.in | 9 +++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h index 885e01da6..c97478faf 100644 --- a/include/openflow/nicira-ext.h +++ b/include/openflow/nicira-ext.h @@ -141,7 +141,7 @@ enum nx_action_subtype { * * This is useful because OpenFlow does not provide a way to match on the * Ethernet addresses inside ARP packets, so there is no other way to drop - * spoofed ARPs other than sending every packet up to the controller. */ + * spoofed ARPs other than sending every ARP packet to a controller. */ NXAST_DROP_SPOOFED_ARP }; diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c index cc1419a0e..06d5bd11d 100644 --- a/lib/ofp-parse.c +++ b/lib/ofp-parse.c @@ -263,6 +263,11 @@ str_to_action(char *str, struct ofpbuf *b) nast->vendor = htonl(NX_VENDOR_ID); nast->subtype = htons(NXAST_SET_TUNNEL); nast->tun_id = htonl(str_to_u32(arg)); + } else if (!strcasecmp(act, "drop_spoofed_arp")) { + struct nx_action_header *nah; + nah = put_action(b, sizeof *nah, OFPAT_VENDOR); + nah->vendor = htonl(NX_VENDOR_ID); + nah->subtype = htons(NXAST_DROP_SPOOFED_ARP); } else if (!strcasecmp(act, "output")) { put_output_action(b, str_to_u32(arg)); } else if (!strcasecmp(act, "enqueue")) { diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in index f51f87a14..7de788e1c 100644 --- a/utilities/ovs-ofctl.8.in +++ b/utilities/ovs-ofctl.8.in @@ -451,6 +451,15 @@ addition to any other actions in this flow entry. Recursive If outputting to a port that encapsulates the packet in a tunnel and supports an identifier (such as GRE), sets the identifier to \fBid\fR. . +.IP \fBdrop_spoofed_arp\fR +Stops processing further actions, if the packet being processed is an +Ethernet+IPv4 ARP packet for which the source Ethernet address inside +the ARP packet differs from the source Ethernet address in the +Ethernet header. +. +This is useful because OpenFlow does not provide a way to match on the +Ethernet addresses inside ARP packets, so there is no other way to +drop spoofed ARPs other than sending every ARP packet to a controller. .RE . .IP -- cgit v1.2.1 From 6784cb57cf432197c497764cdb01cecd68f10362 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 10 Sep 2010 09:57:01 -0700 Subject: vswitchd: Add some references for the HTB classifier to the documentation. --- vswitchd/vswitch.xml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 979fd5dfe..b9d8aaa9f 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -775,7 +775,12 @@ defined types are listed below:

linux-htb
-
Linux ``hierarchy token bucket'' classifier.
+
+ Linux ``hierarchy token bucket'' classifier. See tc-htb(8) (also at + http://linux.die.net/man/8/tc-htb) and the HTB manual + (http://luxik.cdi.cz/~devik/qos/htb/manual/userg.htm) + for information on how this classifier works and how to configure it. +
-- cgit v1.2.1 From 29e21ea26944731f9b9681fecedc81cd5c86f342 Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Fri, 10 Sep 2010 14:20:49 -0700 Subject: xenserver: Don't monitor external-ids until XAPI is up monitor-external-ids can't complete all its tasks until XAPI is up. The daemon is usually started before XAPI, so it can miss events. This commit causes the daemon to block until XAPI is finished initializing. --- xenserver/usr_share_openvswitch_scripts_monitor-external-ids | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids index c87171f06..f91801d22 100755 --- a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids +++ b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids @@ -21,9 +21,11 @@ # Bridge table and duplicates its value to the preferred "xs-network-uuids". import getopt +import os import subprocess import sys import syslog +import time import XenAPI @@ -189,6 +191,11 @@ def main(argv): idl = ovs.db.idl.Idl(remote, "Open_vSwitch", monitor_uuid_schema_cb) ovs.daemon.daemonize() + + # This daemon is usually started before XAPI, but to complete our + # tasks, we need it. Wait here until it's up. + while not os.path.exists("/var/run/xapi_init_complete.cookie"): + time.sleep(1) bridges = {} interfaces = {} -- cgit v1.2.1 From b14c5fe9bd85b440d1727934c84d5f7fded051f7 Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Fri, 10 Sep 2010 14:32:41 -0700 Subject: xenserver: Don't delete pidfile when stopping monitor-external-ids It's not necessary to explicitly delete the pidfile when stopping monitor-external-ids through the init script, since the daemon will take care of that. --- xenserver/etc_init.d_openvswitch | 1 - 1 file changed, 1 deletion(-) diff --git a/xenserver/etc_init.d_openvswitch b/xenserver/etc_init.d_openvswitch index 7b86d4c66..050844570 100755 --- a/xenserver/etc_init.d_openvswitch +++ b/xenserver/etc_init.d_openvswitch @@ -358,7 +358,6 @@ function stop { stop_daemon OVSDB_SERVER "$ovsdb_server" if [ -e /var/run/openvswitch/monitor-external-ids.pid ]; then kill `cat /var/run/openvswitch/monitor-external-ids.pid` - rm /var/run/openvswitch/monitor-external-ids.pid fi rm -f /var/lock/subsys/openvswitch } -- cgit v1.2.1 From d59051362fa8ac4369f1be69ac942a52c9a424b9 Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Mon, 13 Sep 2010 21:55:56 -0700 Subject: Release Open vSwitch 1.1.0pre2 --- ChangeLog | 4 ++++ configure.ac | 2 +- debian/changelog | 6 ++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 6e7217a5e..c816ed7c9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +v1.1.0pre2 - 13 Sep 2010 +------------------------ + - Bug fixes + v1.1.0pre1 - 31 Aug 2010 ------------------------ - OpenFlow 1.0 slicing (QoS) functionality diff --git a/configure.ac b/configure.ac index 21f34739c..08a6f0fdf 100644 --- a/configure.ac +++ b/configure.ac @@ -13,7 +13,7 @@ # limitations under the License. AC_PREREQ(2.64) -AC_INIT(openvswitch, 1.1.0pre1, ovs-bugs@openvswitch.org) +AC_INIT(openvswitch, 1.1.0pre2, ovs-bugs@openvswitch.org) NX_BUILDNR AC_CONFIG_SRCDIR([datapath/datapath.c]) AC_CONFIG_MACRO_DIR([m4]) diff --git a/debian/changelog b/debian/changelog index 89eb5176d..6a8ab5879 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +openvswitch (1.1.0pre2) unstable; urgency=low + + * Bug fixes + + -- Open vSwitch team Mon, 13 Sep 2010 21:50:00 +0000 + openvswitch (1.1.0pre1) unstable; urgency=low * OpenFlow 1.0 slicing (QoS) functionality -- cgit v1.2.1 From a44be3f0cf616166f550ba65769766a577b4eaf5 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 13 Sep 2010 09:16:29 -0700 Subject: REPORTING-BUGS: Rewrite based on experience. Burying the description of the problem, which is usually the most important part in my experience, at the bottom of the REPORTING-BUGS file might be the reason why we don't get good descriptions sometimes. It is certainly not the reason in other cases, but we might as well prioritize a bit better. --- REPORTING-BUGS | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/REPORTING-BUGS b/REPORTING-BUGS index 75da3d6eb..812bfba0f 100644 --- a/REPORTING-BUGS +++ b/REPORTING-BUGS @@ -5,8 +5,17 @@ We are eager to hear from users about problems that they have encountered with Open vSwitch. This file documents how best to report bugs so as to ensure that they can be fixed as quickly as possible. -Please report bugs by sending email to bugs@openvswitch.org. Include -as much of the following information as you can in your report: +Please report bugs by sending email to bugs@openvswitch.org. + +The most important parts of your bug report are the following: + + * What you did that make the problem appear. + + * What you expected to happen. + + * What actually happened. + +Please also include the following information: * The Open vSwitch version number (as output by "ovs-vswitchd --version"). @@ -16,6 +25,8 @@ as much of the following information as you can in your report: * Any local patches or changes you have applied (if any). +The following are also handy sometimes: + * The kernel version on which Open vSwitch is running (from /proc/version) and the distribution and version number of your OS (e.g. "Centos 5.0"). @@ -28,15 +39,7 @@ as much of the following information as you can in your report: * If you have Open vSwitch configured to connect to an OpenFlow controller, the output of "ovs-ofctl show " for each configured in the vswitchd configuration - file. - - * A description of the problem, which should include: - - - What you did that make the problem appear. - - - What you expected to happen. - - - What actually happened. + database. * A fix or workaround, if you have one. -- cgit v1.2.1 From fd2a9392411147654dad43f93741a1aa634187b5 Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Tue, 14 Sep 2010 08:36:55 -0700 Subject: datapath: Increase default MTU on patch ports The default MTU on patch ports was 1500, which would cause jumbo frames to get dropped between the ends of the patch. It also dropped the MTU of attached bridges to no more that 1500 bytes. This patch increases the default MTU to 65535. Long term, we should eliminate MTU on patch ports entirely. Signed-off-by: Justin Pettit --- datapath/vport-patch.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datapath/vport-patch.c b/datapath/vport-patch.c index d55a1bf2d..62fd71f79 100644 --- a/datapath/vport-patch.c +++ b/datapath/vport-patch.c @@ -136,7 +136,10 @@ static struct vport *patch_create(const char *name, const void __user *config) } vport_gen_rand_ether_addr(patch_vport->devconf->eth_addr); - patch_vport->devconf->mtu = ETH_DATA_LEN; + + /* Make the default MTU fairly large so that it doesn't become the + * bottleneck on systems using jumbo frames. */ + patch_vport->devconf->mtu = 65535; return vport; -- cgit v1.2.1 From 722d19c504351a3e1a6f64e5a01ff9806eb089a4 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 14 Sep 2010 13:32:36 -0700 Subject: datapath: Increase maximum number of actions per flow. Until now the number of actions in a flow has been limited to what fits in a page. Each action is 8 bytes, and on 32-bit architectures there is a 12-byte header, so with 4-kB pages that limits flows to 510 actions. We and Citrix have noticed that OVS stops working properly after about 509 VIFs are added to a bridge. According to log messages this is the reason: at this point it is no longer possible to flood a packet to all ports. This commit should help, by increasing the maximum number of actions in a flow. In the long term, though, we should adopt use of port groups or otherwise reduce the number of actions needed to flood a packet. Signed-off-by: Ben Pfaff Bug #3573. NIC-234. --- datapath/flow.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datapath/flow.c b/datapath/flow.c index dbfe5dd73..7684c061a 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -108,7 +108,10 @@ struct sw_flow_actions *flow_actions_alloc(size_t n_actions) { struct sw_flow_actions *sfa; - if (n_actions > (PAGE_SIZE - sizeof *sfa) / sizeof(union odp_action)) + /* At least DP_MAX_PORTS actions are required to be able to flood a + * packet to every port. Factor of 2 allows for setting VLAN tags, + * etc. */ + if (n_actions > 2 * DP_MAX_PORTS) return ERR_PTR(-EINVAL); sfa = kmalloc(sizeof *sfa + n_actions * sizeof(union odp_action), -- cgit v1.2.1 From 60bb134ae3aa7f60d2b20ce007b2a52bd9b14d14 Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Tue, 14 Sep 2010 15:10:46 -0700 Subject: vswitchd: Remove duplicate "external_id" from Interface table --- vswitchd/vswitch.ovsschema | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema index c8a4963af..a7d257036 100644 --- a/vswitchd/vswitch.ovsschema +++ b/vswitchd/vswitch.ovsschema @@ -139,10 +139,7 @@ "ephemeral": true}, "status": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}, - "ephemeral": true}, - "external_ids": { - "type": {"key": "string", "value": "string", - "min": 0, "max": "unlimited"}}}}, + "ephemeral": true}}}, "QoS": { "columns": { "type": { -- cgit v1.2.1 From 24926bc2276a480d5a36667d26020f828a7122c2 Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Tue, 14 Sep 2010 17:57:53 -0700 Subject: ovsdb-tool: Remove reference to non-implemented "extract-schema" command While useful sounding, ovsdb-tool does not actually implement a command called "extract-schema". --- ovsdb/ovsdb-tool.c | 1 - 1 file changed, 1 deletion(-) diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c index 0da208590..f4bb701d1 100644 --- a/ovsdb/ovsdb-tool.c +++ b/ovsdb/ovsdb-tool.c @@ -110,7 +110,6 @@ usage(void) " create DB SCHEMA create DB with the given SCHEMA\n" " compact DB [DST] compact DB in-place (or to DST)\n" " convert DB SCHEMA [DST] convert DB to SCHEMA (to DST)\n" - " extract-schema DB print DB's schema on stdout\n" " query DB TRNS execute read-only transaction on DB\n" " transact DB TRNS execute read/write transaction on DB\n" " show-log DB prints information about DB's log entries\n", -- cgit v1.2.1 From 92467099ee05b930b060d34d05b81bb1322e6fbf Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 15 Sep 2010 12:47:10 -0700 Subject: ofp-util: Also accept NXAST_DROP_SPOOFED_ARP. Reported-by: Bryan Fulton Reported-by: Michael Mao Bug #3655. --- AUTHORS | 1 + lib/ofp-util.c | 1 + 2 files changed, 2 insertions(+) diff --git a/AUTHORS b/AUTHORS index bf0e34271..9e1dfa0b9 100644 --- a/AUTHORS +++ b/AUTHORS @@ -33,6 +33,7 @@ The following additional people are mentioned in commit logs as having provided helpful bug reports or suggestions. Brandon Heller brandonh@stanford.edu +Bryan Fulton bryan@nicira.com Cedric Hobbs cedric@nicira.com Ghanem Bahri bahri.ghanem@gmail.com Henrik Amren henrik@nicira.com diff --git a/lib/ofp-util.c b/lib/ofp-util.c index 89f368950..5171900a7 100644 --- a/lib/ofp-util.c +++ b/lib/ofp-util.c @@ -564,6 +564,7 @@ check_nicira_action(const union ofp_action *a, unsigned int len) switch (ntohs(nah->subtype)) { case NXAST_RESUBMIT: case NXAST_SET_TUNNEL: + case NXAST_DROP_SPOOFED_ARP: return check_action_exact_len(a, len, 16); default: return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_BAD_VENDOR_TYPE); -- cgit v1.2.1 From a77d89b84ad05d880f9ad7c5b5bd3f7d221d76f3 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 15 Sep 2010 13:26:08 -0700 Subject: Properly print drop_spoofed_arp actions when decoding OpenFlow and ODP. Also fix formatting of unknown Nicira actions in OpenFlow. --- lib/odp-util.c | 3 +++ lib/ofp-print.c | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/odp-util.c b/lib/odp-util.c index 442c939a7..798e42540 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -95,6 +95,9 @@ format_odp_action(struct ds *ds, const union odp_action *a) case ODPAT_POP_PRIORITY: ds_put_cstr(ds, "pop_priority"); break; + case ODPAT_DROP_SPOOFED_ARP: + ds_put_cstr(ds, "drop_spoofed_arp"); + break; default: ds_put_format(ds, "***bad action 0x%"PRIx16"***", a->type); break; diff --git a/lib/ofp-print.c b/lib/ofp-print.c index 870487816..78f3649ba 100644 --- a/lib/ofp-print.c +++ b/lib/ofp-print.c @@ -200,8 +200,12 @@ ofp_print_nx_action(struct ds *string, const struct nx_action_header *nah) break; } + case NXAST_DROP_SPOOFED_ARP: + ds_put_cstr(string, "drop_spoofed_arp"); + break; + default: - ds_put_format(string, "***unknown Nicira action:%d***\n", + ds_put_format(string, "***unknown Nicira action:%d***", ntohs(nah->subtype)); } } -- cgit v1.2.1 From a154533795474bd8d13a2a935c4b6719215d6907 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 15 Sep 2010 15:21:03 -0700 Subject: ovs-ofctl, ovs-controller: Disable flow idle timeout by default. Until now, flows set up by ovs-ofctl and by "ovs-controller --with-flows" by default expired after 60 seconds of inactivity. This was surprising, especially in the latter case where one is normally trying to set up permanent flows. Even in the former case, however, we can't think of a good reason that flows added by ovs-ofctl should expire by default. So this commit make flows permanent by default. Reported-by: Michael Mao --- lib/ofp-parse.c | 4 +--- utilities/ovs-ofctl.8.in | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c index 06d5bd11d..405008c67 100644 --- a/lib/ofp-parse.c +++ b/lib/ofp-parse.c @@ -33,8 +33,6 @@ VLOG_DEFINE_THIS_MODULE(ofp_parse) -#define DEFAULT_IDLE_TIMEOUT 60 - static uint32_t str_to_u32(const char *str) { @@ -402,7 +400,7 @@ parse_ofp_str(char *string, struct ofp_match *match, struct ofpbuf *actions, *priority = OFP_DEFAULT_PRIORITY; } if (idle_timeout) { - *idle_timeout = DEFAULT_IDLE_TIMEOUT; + *idle_timeout = OFP_FLOW_PERMANENT; } if (hard_timeout) { *hard_timeout = OFP_FLOW_PERMANENT; diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in index 7de788e1c..bbe747b21 100644 --- a/utilities/ovs-ofctl.8.in +++ b/utilities/ovs-ofctl.8.in @@ -495,8 +495,8 @@ optional fields: .TP \fBidle_timeout=\fIseconds\fR Causes the flow to expire after the given number of seconds of -inactivity. A value of 0 prevents a flow from expiring due to -inactivity. The default is 60 seconds. +inactivity. A value of 0 (the default) prevents a flow from expiring due to +inactivity. . .IP \fBhard_timeout=\fIseconds\fR Causes the flow to expire after the given number of seconds, -- cgit v1.2.1 From 8ba1fd2fb9eb616ec028027e303c1664185c88e7 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 15 Sep 2010 16:52:48 -0700 Subject: datapath: Check IS_ERR() in do_execute(). flow_actions_alloc() returns an error code in the form of a pointer but we checked that the pointer was not NULL, which is always true. This caused oopses on allocation errors when we would write into an invalid pointer. NIC-234 Signed-off-by: Jesse Gross Acked-by: Ben Pfaff --- datapath/datapath.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index 5ee915740..fe37ec1ed 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -1326,10 +1326,11 @@ static int do_execute(struct datapath *dp, const struct odp_execute *execute) if (execute->length < ETH_HLEN || execute->length > 65535) goto error; - err = -ENOMEM; actions = flow_actions_alloc(execute->n_actions); - if (!actions) + if (IS_ERR(actions)) { + err = PTR_ERR(actions); goto error; + } err = -EFAULT; if (copy_from_user(actions->actions, execute->actions, -- cgit v1.2.1 From bbf4f269a391724d886f66b3661b10e5a434e2e8 Mon Sep 17 00:00:00 2001 From: Vivien Bernet-Rollande Date: Thu, 16 Sep 2010 10:56:55 -0700 Subject: brcompat_mod: Check if user has CAP_NET_ADMIN in ioctl handler This patch checks that the user calling ioctl() to create, delete, or modify bridges has the CAP_NET_ADMIN capability. This prevents unpriviledged users from modifying the bridge configuration through brcompatd. The checks are actually the same performed in net/bridge/br_ioctl.c by the Linux kernel. Signed-off-by: Vivien Bernet-Rollande Signed-off-by: Jesse Gross --- datapath/brcompat.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/datapath/brcompat.c b/datapath/brcompat.c index 3e8401154..2113eae0f 100644 --- a/datapath/brcompat.c +++ b/datapath/brcompat.c @@ -84,6 +84,9 @@ static int brc_add_del_bridge(char __user *uname, int add) struct sk_buff *request; char name[IFNAMSIZ]; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (copy_from_user(name, uname, IFNAMSIZ)) return -EFAULT; @@ -196,6 +199,9 @@ static int brc_add_del_port(struct net_device *dev, int port_ifindex, int add) struct net_device *port; int err; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + port = __dev_get_by_index(&init_net, port_ifindex); if (!port) return -EINVAL; -- cgit v1.2.1 From 8e236e71f2e52ee47b2787fa722f2dfb36c2c284 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Thu, 16 Sep 2010 11:02:15 -0700 Subject: AUTHORS: Add Vivien Bernet-Rollande. --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 9e1dfa0b9..a5d042fb3 100644 --- a/AUTHORS +++ b/AUTHORS @@ -26,6 +26,7 @@ Thomas Lacroix thomas.lacroix@citrix.com Todd Deshane deshantm@gmail.com Tom Everman teverman@google.com Tsvi Slonim tsvi@toroki.com +Vivien Bernet-Rollande vbr@soprive.net Wei Yongjun yjwei@cn.fujitsu.com Yu Zhiguo yuzg@cn.fujitsu.com -- cgit v1.2.1 From 5c16362b51eed5eb76c7e92629e2e7900b8c2ca9 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 1 Sep 2010 15:12:23 -0700 Subject: ovsdb-doc: Be less explicit in ovs-vswitchd.conf.db(5). The documentation doesn't really need to say that a field may be "between 0 and 4294967295 characters long". This regression was introduced by commit 991559357 "Implement initial Python bindings for Open vSwitch database." --- python/ovs/db/types.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/ovs/db/types.py b/python/ovs/db/types.py index 6e7ef11db..d42ac7fe8 100644 --- a/python/ovs/db/types.py +++ b/python/ovs/db/types.py @@ -290,14 +290,14 @@ class BaseType(object): return 'at most %s' % commafy(self.max) else: return 'at most %g' % self.max - elif self.min_length is not None and self.max_length is not None: + elif self.min_length != 0 and self.max_length != sys.maxint: if self.min_length == self.max_length: return 'exactly %d characters long' % (self.min_length) else: return 'between %d and %d characters long' % (self.min_length, self.max_length) - elif self.min_length is not None: + elif self.min_length != 0: return 'at least %d characters long' % self.min_length - elif self.max_length is not None: + elif self.max_length != sys.maxint: return 'at most %d characters long' % self.max_length else: return '' -- cgit v1.2.1 From 7ac60147cdba1af0b066e0e3cb3ca83f9bbf4101 Mon Sep 17 00:00:00 2001 From: Ethan Jackson Date: Wed, 15 Sep 2010 01:51:15 -0700 Subject: debian: Init script should put core dumps in an appropriate place Before this commit the init script did not change the cwd of openvswitch processes it started. Thus, core files were created in root directory. This patch changes the cwd of openvswitch to a more reasonable location. --- AUTHORS | 1 + debian/openvswitch-switch.init | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/AUTHORS b/AUTHORS index a5d042fb3..75a6de5c9 100644 --- a/AUTHORS +++ b/AUTHORS @@ -6,6 +6,7 @@ Ben Pfaff blp@nicira.com Bryan Phillippe bp@toroki.com Dan Wendlandt dan@nicira.com David Erickson derickso@stanford.edu +Ethan Jackson ethan@nicira.com Glen Gibb grg@stanford.edu Ian Campbell Ian.Campbell@citrix.com Jean Tourrilhes jt@hpl.hp.com diff --git a/debian/openvswitch-switch.init b/debian/openvswitch-switch.init index a933a21ae..d86063037 100755 --- a/debian/openvswitch-switch.init +++ b/debian/openvswitch-switch.init @@ -230,12 +230,16 @@ case "$1" in install -d -m 755 -o root -g root /var/log/openvswitch fi + if [ ! -d /var/log/openvswitch/cores ]; then + install -d -m 755 -o root -g root /var/log/openvswitch/cores + fi + # Start ovsdb-server. set -- set -- "$@" /etc/openvswitch/conf.db set -- "$@" --verbose=ANY:console:emer --verbose=ANY:syslog:err set -- "$@" --log-file=/var/log/openvswitch/ovsdb-server.log - set -- "$@" --detach --pidfile $monitor_opt + set -- "$@" --detach --no-chdir --pidfile $monitor_opt set -- "$@" --remote punix:/var/run/openvswitch/db.sock set -- "$@" --remote db:Open_vSwitch,managers set -- "$@" --private-key=db:SSL,private_key @@ -245,6 +249,7 @@ case "$1" in echo -n "Starting ovsdb-server: " start-stop-daemon --start --quiet \ --pidfile /var/run/openvswitch/ovsdb-server.pid \ + --chdir /var/log/openvswitch/cores \ --exec $ovsdb_server -- "$@" if running ovsdb-server; then echo "ovsdb-server." @@ -258,12 +263,13 @@ case "$1" in set -- set -- "$@" --verbose=ANY:console:emer --verbose=ANY:syslog:err set -- "$@" --log-file=/var/log/openvswitch/ovs-vswitchd.log - set -- "$@" --detach --pidfile $monitor_opt + set -- "$@" --detach --no-chdir --pidfile $monitor_opt set -- "$@" unix:/var/run/openvswitch/db.sock set -- "$@" $OVS_VSWITCHD_OPTS echo -n "Starting ovs-vswitchd: " start-stop-daemon --start --quiet \ --pidfile /var/run/openvswitch/ovs-vswitchd.pid \ + --chdir /var/log/openvswitch/cores \ --exec $ovs_vswitchd -- "$@" if running ovs-vswitchd; then echo "ovs-vswitchd." @@ -275,12 +281,14 @@ case "$1" in echo -n "Stopping ovs-vswitchd: " start-stop-daemon --stop --quiet --oknodo --retry 5 \ --pidfile /var/run/openvswitch/ovs-vswitchd.pid \ + --chdir /var/log/openvswitch/cores \ --exec $ovs_vswitchd echo "ovs-vswitchd." echo -n "Stopping ovsdb-server: " start-stop-daemon --stop --quiet --oknodo --retry 5 \ --pidfile /var/run/openvswitch/ovsdb-server.pid \ + --chdir /var/log/openvswitch/cores \ --exec $ovsdb_server echo "ovsdb-server." ;; -- cgit v1.2.1 From b828c2f5fa580412f7c3afae03862b9dcce6f576 Mon Sep 17 00:00:00 2001 From: Ethan Jackson Date: Wed, 15 Sep 2010 01:51:40 -0700 Subject: debian: Created a debian equivalent to xen-bugtool ovs-bugtool creates a tarball of useful information which people can submit with bug reports. The source is copied from xen-bugtool with the xen specific removed or changed. --- debian/automake.mk | 1 + debian/control | 2 +- debian/copyright.in | 21 + debian/openvswitch-common.install | 1 + debian/ovs-bugtool | 1110 +++++++++++++++++++++++++++++++++++++ 5 files changed, 1134 insertions(+), 1 deletion(-) create mode 100755 debian/ovs-bugtool diff --git a/debian/automake.mk b/debian/automake.mk index ba9ea861d..a5a6e05a3 100644 --- a/debian/automake.mk +++ b/debian/automake.mk @@ -38,6 +38,7 @@ EXTRA_DIST += \ debian/openvswitch-switch.postinst \ debian/openvswitch-switch.postrm \ debian/openvswitch-switch.template \ + debian/ovs-bugtool \ debian/rules \ debian/rules.modules diff --git a/debian/control b/debian/control index c07eca08a..edecffda5 100644 --- a/debian/control +++ b/debian/control @@ -23,7 +23,7 @@ Description: Source code for Open vSwitch datapath Linux module Package: openvswitch-common Architecture: any -Depends: ${shlibs:Depends}, openssl, ${misc:Depends} +Depends: ${shlibs:Depends}, openssl, ${misc:Depends}, python, ethtool Description: Open vSwitch common components openvswitch-common provides components required by both openvswitch-switch and openvswitch-controller. diff --git a/debian/copyright.in b/debian/copyright.in index ae1a78868..0cac63471 100644 --- a/debian/copyright.in +++ b/debian/copyright.in @@ -33,6 +33,27 @@ License: On Debian systems, the complete text of the Apache License version 2.0 can be found in '/usr/share/common-licenses/Apache-2.0'. +* ovs-bugtool is covered by the following license: + + This library is free software; you can redistribute it and/or + modify it under the terms of version 2.1 of the GNU Lesser General Public + License as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + Copyright (c) 2005, 2007 XenSource Ltd. + Copyright (c) 2010, Nicira Networks. + + On Debian systems, the complete text of the GNU Lesser General Public + License can be found in `/usr/share/common-licenses/LGPL-2.1'. + * All other components of this package are licensed under The Apache License Version 2.0. diff --git a/debian/openvswitch-common.install b/debian/openvswitch-common.install index fab991666..298f1ad0f 100644 --- a/debian/openvswitch-common.install +++ b/debian/openvswitch-common.install @@ -4,4 +4,5 @@ _debian/utilities/ovs-appctl usr/sbin _debian/utilities/ovs-ofctl usr/sbin _debian/utilities/ovs-parse-leaks usr/bin _debian/utilities/ovs-pki usr/sbin +debian/ovs-bugtool usr/sbin vswitchd/vswitch.ovsschema usr/share/openvswitch diff --git a/debian/ovs-bugtool b/debian/ovs-bugtool new file mode 100755 index 000000000..f991f27a2 --- /dev/null +++ b/debian/ovs-bugtool @@ -0,0 +1,1110 @@ +#!/usr/bin/env python + +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU Lesser General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# Copyright (c) 2005, 2007 XenSource Ltd. +# Copyright (c) 2010, Nicira Networks. + +# +# To add new entries to the bugtool, you need to: +# +# Create a new capability. These declare the new entry to the GUI, including +# the expected size, time to collect, privacy implications, and whether the +# capability should be selected by default. One capability may refer to +# multiple files, assuming that they can be reasonably grouped together, and +# have the same privacy implications. You need: +# +# A new CAP_ constant. +# A cap() invocation to declare the capability. +# +# You then need to add calls to main() to collect the files. These will +# typically be calls to the helpers file_output(), tree_output(), cmd_output(), +# or func_output(). +# + +import getopt +import re +import os +import StringIO +import sys +import tarfile +import time +import commands +import pprint +from xml.dom.minidom import parse, getDOMImplementation +import zipfile +from subprocess import Popen, PIPE +from select import select +from signal import SIGTERM, SIGUSR1 +import md5 +import platform +import fcntl +import glob +import urllib +import socket +import base64 + +sys.path.append('/usr/lib/python') +sys.path.append('/usr/lib64/python') + +OS_RELEASE = platform.release() + +# +# Files & directories +# + +BUG_DIR = "/var/log/openvswitch" +PLUGIN_DIR = "/etc/openvswitch/bugtool" +GRUB_CONFIG = '/boot/grub/menu.lst' +BOOT_KERNEL = '/boot/vmlinuz-' + OS_RELEASE +BOOT_INITRD = '/boot/initrd-' + OS_RELEASE + '.img' +PROC_PARTITIONS = '/proc/partitions' +FSTAB = '/etc/fstab' +PROC_MOUNTS = '/proc/mounts' +PROC_CPUINFO = '/proc/cpuinfo' +PROC_MEMINFO = '/proc/meminfo' +PROC_IOPORTS = '/proc/ioports' +PROC_INTERRUPTS = '/proc/interrupts' +PROC_SCSI = '/proc/scsi/scsi' +PROC_VERSION = '/proc/version' +PROC_MODULES = '/proc/modules' +PROC_DEVICES = '/proc/devices' +PROC_FILESYSTEMS = '/proc/filesystems' +PROC_CMDLINE = '/proc/cmdline' +PROC_CONFIG = '/proc/config.gz' +PROC_USB_DEV = '/proc/bus/usb/devices' +PROC_NET_SOFTNET_STAT = '/proc/net/softnet_stat' +MODPROBE_DIR = '/etc/modprobe.d' +RESOLV_CONF = '/etc/resolv.conf' +NSSWITCH_CONF = '/etc/nsswitch.conf' +NTP_CONF = '/etc/ntp.conf' +HOSTS = '/etc/hosts' +HOSTS_ALLOW = '/etc/hosts.allow' +HOSTS_DENY = '/etc/hosts.deny' +DHCP_LEASE_DIR = '/var/lib/dhcp3' +OPENVSWITCH_CORE_DIR = '/var/log/openvswitch/cores' +OPENVSWITCH_DEFAULT_SWITCH = '/etc/default/openvswitch-switch' +OPENVSWITCH_DEFAULT_CONTROLLER = '/etc/default/openvswitch-controller' +OPENVSWITCH_CONF_DB = '/etc/openvswitch/conf.db' +OPENVSWITCH_VSWITCHD_PID = '/var/run/openvswitch/ovs-vswitchd.pid' +VAR_LOG_DIR = '/var/log/' +X11_LOGS_DIR = VAR_LOG_DIR +X11_LOGS_RE = re.compile(r'.*/Xorg\..*$') +X11_AUTH_DIR = '/root/' +X11_AUTH_RE = re.compile(r'.*/\.((Xauthority)|(serverauth\.[0-9]*))$') +PAM_DIR = '/etc/pam.d' + +# +# External programs +# + +ARP = '/usr/sbin/arp' +CAT = '/bin/cat' +DF = '/bin/df' +DMESG = '/bin/dmesg' +DMIDECODE = '/usr/sbin/dmidecode' +ETHTOOL = '/sbin/ethtool' +FDISK = '/sbin/fdisk' +FIND = '/usr/bin/find' +IFCONFIG = '/sbin/ifconfig' +IPTABLES = '/sbin/iptables' +LOSETUP = '/sbin/losetup' +LS = '/bin/ls' +LSPCI = '/usr/bin/lspci' +MD5SUM = '/usr/bin/md5sum' +MODINFO = '/sbin/modinfo' +NETSTAT = '/bin/netstat' +OVS_DPCTL = '/usr/sbin/ovs-dpctl' +OVS_OFCTL = '/usr/sbin/ovs-ofctl' +OVS_VSCTL = '/usr/sbin/ovs-vsctl' +OVS_APPCTL = '/usr/sbin/ovs-appctl' +PS = '/bin/ps' +ROUTE = '/sbin/route' +SYSCTL = '/sbin/sysctl' +TC = '/sbin/tc' +UPTIME = '/usr/bin/uptime' +ZCAT = '/bin/zcat' + +# +# PII -- Personally identifiable information. Of particular concern are +# things that would identify customers, or their network topology. +# Passwords are never to be included in any bug report, regardless of any PII +# declaration. +# +# NO -- No PII will be in these entries. +# YES -- PII will likely or certainly be in these entries. +# MAYBE -- The user may wish to audit these entries for PII. +# IF_CUSTOMIZED -- If the files are unmodified, then they will contain no PII, +# but since we encourage customers to edit these files, PII may have been +# introduced by the customer. This is used in particular for the networking +# scripts in dom0. +# + +PII_NO = 'no' +PII_YES = 'yes' +PII_MAYBE = 'maybe' +PII_IF_CUSTOMIZED = 'if_customized' +KEY = 0 +PII = 1 +MIN_SIZE = 2 +MAX_SIZE = 3 +MIN_TIME = 4 +MAX_TIME = 5 +MIME = 6 +CHECKED = 7 +HIDDEN = 8 + +MIME_DATA = 'application/data' +MIME_TEXT = 'text/plain' + +INVENTORY_XML_ROOT = "system-status-inventory" +INVENTORY_XML_SUMMARY = 'system-summary' +INVENTORY_XML_ELEMENT = 'inventory-entry' +CAP_XML_ROOT = "system-status-capabilities" +CAP_XML_ELEMENT = 'capability' + + +CAP_BLOBS = 'blobs' +CAP_BOOT_LOADER = 'boot-loader' +CAP_DISK_INFO = 'disk-info' +CAP_FIRSTBOOT = 'firstboot' +CAP_HARDWARE_INFO = 'hardware-info' +CAP_HIGH_AVAILABILITY = 'high-availability' +CAP_HOST_CRASHDUMP_DUMPS = 'host-crashdump-dumps' +CAP_HOST_CRASHDUMP_LOGS = 'host-crashdump-logs' +CAP_KERNEL_INFO = 'kernel-info' +CAP_LOSETUP_A = 'loopback-devices' +CAP_NETWORK_CONFIG = 'network-config' +CAP_NETWORK_STATUS = 'network-status' +CAP_OEM = 'oem' +CAP_PAM = 'pam' +CAP_PROCESS_LIST = 'process-list' +CAP_PERSISTENT_STATS = 'persistent-stats' +CAP_SYSTEM_LOGS = 'system-logs' +CAP_SYSTEM_SERVICES = 'system-services' +CAP_VNCTERM = 'vncterm' +CAP_WLB = 'wlb' +CAP_X11_LOGS = 'X11' +CAP_X11_AUTH = 'X11-auth' + +KB = 1024 +MB = 1024 * 1024 + +caps = {} +cap_sizes = {} +unlimited_data = False +dbg = False + +def cap(key, pii=PII_MAYBE, min_size=-1, max_size=-1, min_time=-1, + max_time=-1, mime=MIME_TEXT, checked=True, hidden=False): + caps[key] = (key, pii, min_size, max_size, min_time, max_time, mime, + checked, hidden) + cap_sizes[key] = 0 + + +cap(CAP_BLOBS, PII_NO, max_size=5*MB) +cap(CAP_BOOT_LOADER, PII_NO, max_size=3*KB, + max_time=5) +cap(CAP_DISK_INFO, PII_MAYBE, max_size=25*KB, + max_time=20) +cap(CAP_FIRSTBOOT, PII_YES, min_size=60*KB, max_size=80*KB) +cap(CAP_HARDWARE_INFO, PII_MAYBE, max_size=30*KB, + max_time=20) +cap(CAP_HIGH_AVAILABILITY, PII_MAYBE, max_size=5*MB) +cap(CAP_HOST_CRASHDUMP_DUMPS,PII_YES, checked = False) +cap(CAP_HOST_CRASHDUMP_LOGS, PII_NO) +cap(CAP_KERNEL_INFO, PII_MAYBE, max_size=120*KB, + max_time=5) +cap(CAP_LOSETUP_A, PII_MAYBE, max_size=KB, max_time=5) +cap(CAP_NETWORK_CONFIG, PII_IF_CUSTOMIZED, + min_size=0, max_size=20*KB) +cap(CAP_NETWORK_STATUS, PII_YES, max_size=19*KB, + max_time=30) +cap(CAP_PAM, PII_NO, max_size=30*KB) +cap(CAP_PERSISTENT_STATS, PII_MAYBE, max_size=50*MB, + max_time=60) +cap(CAP_PROCESS_LIST, PII_YES, max_size=30*KB, + max_time=20) +cap(CAP_SYSTEM_LOGS, PII_MAYBE, max_size=50*MB, + max_time=5) +cap(CAP_SYSTEM_SERVICES, PII_NO, max_size=5*KB, + max_time=20) +cap(CAP_VNCTERM, PII_MAYBE, checked = False) +cap(CAP_WLB, PII_NO, max_size=3*MB, + max_time=20) +cap(CAP_X11_LOGS, PII_NO, max_size=100*KB) +cap(CAP_X11_AUTH, PII_NO, max_size=100*KB) + +ANSWER_YES_TO_ALL = False +SILENT_MODE = False +entries = None +data = {} +dev_null = open('/dev/null', 'r+') + +def output(x): + global SILENT_MODE + if not SILENT_MODE: + print x + +def output_ts(x): + output("[%s] %s" % (time.strftime("%x %X %Z"), x)) + +def cmd_output(cap, args, label = None, filter = None): + if cap in entries: + if not label: + if isinstance(args, list): + a = [aa for aa in args] + a[0] = os.path.basename(a[0]) + label = ' '.join(a) + else: + label = args + data[label] = {'cap': cap, 'cmd_args': args, 'filter': filter} + +def file_output(cap, path_list): + if cap in entries: + for p in path_list: + if os.path.exists(p): + if unlimited_data or caps[cap][MAX_SIZE] == -1 or \ + cap_sizes[cap] < caps[cap][MAX_SIZE]: + data[p] = {'cap': cap, 'filename': p} + try: + s = os.stat(p) + cap_sizes[cap] += s.st_size + except: + pass + else: + output("Omitting %s, size constraint of %s exceeded" % (p, cap)) + +def tree_output(cap, path, pattern = None, negate = False): + if cap in entries: + if os.path.exists(path): + for f in os.listdir(path): + fn = os.path.join(path, f) + if os.path.isfile(fn) and matches(fn, pattern, negate): + file_output(cap, [fn]) + elif os.path.isdir(fn): + tree_output(cap, fn, pattern, negate) + +def func_output(cap, label, func): + if cap in entries: + t = str(func).split() + data[label] = {'cap': cap, 'func': func} + +def collect_data(): + process_lists = {} + + for (k, v) in data.items(): + cap = v['cap'] + if v.has_key('cmd_args'): + v['output'] = StringIOmtime() + if not process_lists.has_key(cap): + process_lists[cap] = [] + process_lists[cap].append(ProcOutput(v['cmd_args'], caps[cap][MAX_TIME], v['output'], v['filter'])) + elif v.has_key('filename') and v['filename'].startswith('/proc/'): + # proc files must be read into memory + try: + f = open(v['filename'], 'r') + s = f.read() + f.close() + if unlimited_data or caps[cap][MAX_SIZE] == -1 or \ + cap_sizes[cap] < caps[cap][MAX_SIZE]: + v['output'] = StringIOmtime(s) + cap_sizes[cap] += len(s) + else: + output("Omitting %s, size constraint of %s exceeded" % (v['filename'], cap)) + except: + pass + elif v.has_key('func'): + try: + s = v['func'](cap) + except Exception, e: + s = str(e) + if unlimited_data or caps[cap][MAX_SIZE] == -1 or \ + cap_sizes[cap] < caps[cap][MAX_SIZE]: + v['output'] = StringIOmtime(s) + cap_sizes[cap] += len(s) + else: + output("Omitting %s, size constraint of %s exceeded" % (k, cap)) + + run_procs(process_lists.values()) + + +def main(argv = None): + global ANSWER_YES_TO_ALL, SILENT_MODE + global entries, data, dbg + + # we need access to privileged files, exit if we are not running as root + if os.getuid() != 0: + print >>sys.stderr, "Error: ovs-bugtool must be run as root" + return 1 + + output_type = 'tar.bz2' + output_fd = -1 + + if argv is None: + argv = sys.argv + + try: + (options, params) = getopt.gnu_getopt( + argv, 'sy', ['capabilities', 'silent', 'yestoall', 'entries=', + 'output=', 'outfd=', 'all', 'unlimited', 'debug']) + except getopt.GetoptError, opterr: + print >>sys.stderr, opterr + return 2 + + try: + load_plugins(True) + except: + pass + + entries = [e for e in caps.keys() if caps[e][CHECKED]] + + for (k, v) in options: + if k == '--capabilities': + update_capabilities() + print_capabilities() + return 0 + + if k == '--output': + if v in ['tar', 'tar.bz2', 'zip']: + output_type = v + else: + print >>sys.stderr, "Invalid output format '%s'" % v + return 2 + + # "-s" or "--silent" means suppress output (except for the final + # output filename at the end) + if k in ['-s', '--silent']: + SILENT_MODE = True + + if k == '--entries' and v != '': + entries = v.split(',') + + # If the user runs the script with "-y" or "--yestoall" we don't ask + # all the really annoying questions. + if k in ['-y', '--yestoall']: + ANSWER_YES_TO_ALL = True + + if k == '--outfd': + output_fd = int(v) + try: + old = fcntl.fcntl(output_fd, fcntl.F_GETFD) + fcntl.fcntl(output_fd, fcntl.F_SETFD, old | fcntl.FD_CLOEXEC) + except: + print >>sys.stderr, "Invalid output file descriptor", output_fd + return 2 + + elif k == '--all': + entries = caps.keys() + elif k == '--unlimited': + unlimited_data = True + elif k == '--debug': + dbg = True + ProcOutput.debug = True + + if len(params) != 1: + print >>sys.stderr, "Invalid additional arguments", str(params) + return 2 + + if output_fd != -1 and output_type != 'tar': + print >>sys.stderr, "Option '--outfd' only valid with '--output=tar'" + return 2 + + if ANSWER_YES_TO_ALL: + output("Warning: '--yestoall' argument provided, will not prompt for individual files.") + + output(''' +This application will collate dmesg output, details of the +hardware configuration of your machine, information about the build of +openvswitch that you are using, plus, if you allow it, various logs. + +The collated information will be saved as a .%s for archiving or +sending to a Technical Support Representative. + +The logs may contain private information, and if you are at all +worried about that, you should exit now, or you should explicitly +exclude those logs from the archive. + +''' % output_type) + + # assemble potential data + + file_output(CAP_BOOT_LOADER, [GRUB_CONFIG]) + cmd_output(CAP_BOOT_LOADER, [LS, '-lR', '/boot']) + cmd_output(CAP_BOOT_LOADER, [MD5SUM, BOOT_KERNEL, BOOT_INITRD], label='vmlinuz-initrd.md5sum') + + cmd_output(CAP_DISK_INFO, [FDISK, '-l']) + file_output(CAP_DISK_INFO, [PROC_PARTITIONS, PROC_MOUNTS]) + file_output(CAP_DISK_INFO, [FSTAB]) + cmd_output(CAP_DISK_INFO, [DF, '-alT']) + cmd_output(CAP_DISK_INFO, [DF, '-alTi']) + cmd_output(CAP_DISK_INFO, [LS, '-R', '/sys/class/scsi_host']) + cmd_output(CAP_DISK_INFO, [LS, '-R', '/sys/class/scsi_disk']) + cmd_output(CAP_DISK_INFO, [LS, '-R', '/sys/class/fc_transport']) + func_output(CAP_DISK_INFO, 'scsi-hosts', dump_scsi_hosts) + + + file_output(CAP_HARDWARE_INFO, [PROC_CPUINFO, PROC_MEMINFO, PROC_IOPORTS, PROC_INTERRUPTS]) + cmd_output(CAP_HARDWARE_INFO, [DMIDECODE]) + cmd_output(CAP_HARDWARE_INFO, [LSPCI, '-n']) + cmd_output(CAP_HARDWARE_INFO, [LSPCI, '-vv']) + file_output(CAP_HARDWARE_INFO, [PROC_USB_DEV, PROC_SCSI]) + cmd_output(CAP_HARDWARE_INFO, [LS, '-lR', '/dev']) + + file_output(CAP_KERNEL_INFO, [PROC_VERSION, PROC_MODULES, PROC_DEVICES, + PROC_FILESYSTEMS, PROC_CMDLINE]) + cmd_output(CAP_KERNEL_INFO, [ZCAT, PROC_CONFIG], label='config') + cmd_output(CAP_KERNEL_INFO, [SYSCTL, '-A']) + tree_output(CAP_KERNEL_INFO, MODPROBE_DIR) + func_output(CAP_KERNEL_INFO, 'modinfo', module_info) + + cmd_output(CAP_LOSETUP_A, [LOSETUP, '-a']) + + file_output(CAP_NETWORK_CONFIG, [RESOLV_CONF, NSSWITCH_CONF, HOSTS]) + file_output(CAP_NETWORK_CONFIG, [NTP_CONF, HOSTS_ALLOW, HOSTS_DENY]) + file_output(CAP_NETWORK_CONFIG, [OPENVSWITCH_DEFAULT_SWITCH, + OPENVSWITCH_DEFAULT_CONTROLLER, OPENVSWITCH_CONF_DB]) + + cmd_output(CAP_NETWORK_STATUS, [IFCONFIG, '-a']) + cmd_output(CAP_NETWORK_STATUS, [ROUTE, '-n']) + cmd_output(CAP_NETWORK_STATUS, [ARP, '-n']) + cmd_output(CAP_NETWORK_STATUS, [NETSTAT, '-an']) + tree_output(CAP_NETWORK_STATUS, DHCP_LEASE_DIR) + cmd_output(CAP_NETWORK_STATUS, [IPTABLES, '-nL']) + for p in os.listdir('/sys/class/net/'): + try: + f = open('/sys/class/net/%s/type' % p, 'r') + t = f.readline() + f.close() + if int(t) == 1: + # ARPHRD_ETHER + cmd_output(CAP_NETWORK_STATUS, [ETHTOOL, p]) + cmd_output(CAP_NETWORK_STATUS, [ETHTOOL, '-S', p]) + cmd_output(CAP_NETWORK_STATUS, [ETHTOOL, '-k', p]) + cmd_output(CAP_NETWORK_STATUS, [ETHTOOL, '-i', p]) + cmd_output(CAP_NETWORK_STATUS, [ETHTOOL, '-c', p]) + except: + pass + cmd_output(CAP_NETWORK_STATUS, [TC, '-s', 'qdisc']) + file_output(CAP_NETWORK_STATUS, [PROC_NET_SOFTNET_STAT]) + tree_output(CAP_NETWORK_STATUS, OPENVSWITCH_CORE_DIR) + if os.path.exists(OPENVSWITCH_VSWITCHD_PID): + cmd_output(CAP_NETWORK_STATUS, [OVS_DPCTL, 'show']) + for d in dp_list(): + cmd_output(CAP_NETWORK_STATUS, [OVS_OFCTL, 'show', d]) + cmd_output(CAP_NETWORK_STATUS, [OVS_OFCTL, 'status', d]) + cmd_output(CAP_NETWORK_STATUS, [OVS_OFCTL, 'dump-flows', d]) + cmd_output(CAP_NETWORK_STATUS, [OVS_DPCTL, 'dump-flows', d]) + try: + vspidfile = open(OPENVSWITCH_VSWITCHD_PID) + vspid = int(vspidfile.readline().strip()) + vspidfile.close() + for b in bond_list(vspid): + cmd_output(CAP_NETWORK_STATUS, + [OVS_APPCTL, '-t', '/var/run/ovs-vswitchd.%s.ctl' % vspid, '-e' 'bond/show %s' % b], + 'ovs-appctl-bond-show-%s.out' % b) + except e: + pass + + tree_output(CAP_PAM, PAM_DIR) + + cmd_output(CAP_PROCESS_LIST, [PS, 'wwwaxf', '-eo', 'pid,tty,stat,time,nice,psr,pcpu,pmem,nwchan,wchan:25,args'], label='process-tree') + func_output(CAP_PROCESS_LIST, 'fd_usage', fd_usage) + + file_output(CAP_SYSTEM_LOGS, + [ VAR_LOG_DIR + x for x in + [ 'kern.log', 'daemon.log', 'user.log', 'syslog', 'messages', + 'debug', 'dmesg', 'boot'] + + [ f % n for n in range(1, 20) \ + for f in ['kern.log.%d', 'kern.log.%d.gz', + 'daemon.log.%d', 'daemon.log.%d.gz', + 'user.log.%d', 'user.log.%d.gz', + 'messages.%d', 'messages.%d.gz']]]) + if not os.path.exists('/var/log/dmesg') and not os.path.exists('/var/log/boot'): + cmd_output(CAP_SYSTEM_LOGS, [DMESG]) + + + tree_output(CAP_X11_LOGS, X11_LOGS_DIR, X11_LOGS_RE) + tree_output(CAP_X11_AUTH, X11_AUTH_DIR, X11_AUTH_RE) + + + try: + load_plugins() + except: + pass + + # permit the user to filter out data + for k in sorted(data.keys()): + if not ANSWER_YES_TO_ALL and not yes("Include '%s'? [Y/n]: " % k): + del data[k] + + # collect selected data now + output_ts('Running commands to collect data') + collect_data() + + subdir = "bug-report-%s" % time.strftime("%Y%m%d%H%M%S") + + # include inventory + data['inventory.xml'] = {'cap': None, 'output': StringIOmtime(make_inventory(data, subdir))} + + # create archive + if output_fd == -1 and not os.path.exists(BUG_DIR): + try: + os.makedirs(BUG_DIR) + except: + pass + + if output_fd == -1: + output_ts('Creating output file') + + if output_type.startswith('tar'): + make_tar(subdir, output_type, output_fd) + else: + make_zip(subdir) + + clean_tapdisk_logs() + + if dbg: + print >>sys.stderr, "Category sizes (max, actual):\n" + for c in caps.keys(): + print >>sys.stderr, " %s (%d, %d)" % (c, caps[c][MAX_SIZE], + cap_sizes[c]) + return 0 + +def find_tapdisk_logs(): + return glob.glob('/var/log/blktap/*.log*') + +def generate_tapdisk_logs(): + for pid in pidof('tapdisk'): + try: + os.kill(pid, SIGUSR1) + output_ts("Including logs for tapdisk process %d" % pid) + except : + pass + # give processes a second to write their logs + time.sleep(1) + +def clean_tapdisk_logs(): + for filename in find_tapdisk_logs(): + try: + os.remove(filename) + except : + pass + +def filter_db_pii(str, state): + if 'in_secret_table' not in state: + state['in_secret_table'] = False + + if str.startswith(''): + state['in_secret_table'] = False + + if state['in_secret_table'] and str.startswith(" %s" % modelname) or '') + + return output + +def module_info(cap): + output = StringIO.StringIO() + modules = open(PROC_MODULES, 'r') + procs = [] + + for line in modules: + module = line.split()[0] + procs.append(ProcOutput([MODINFO, module], caps[cap][MAX_TIME], output)) + modules.close() + + run_procs([procs]) + + return output.getvalue() + +def dp_list(): + output = StringIO.StringIO() + procs = [ProcOutput([OVS_DPCTL, 'dump-dps'], caps[CAP_NETWORK_STATUS][MAX_TIME], output)] + + run_procs([procs]) + + if not procs[0].timed_out: + return output.getvalue().splitlines() + return [] + +def bond_list(pid): + output = StringIO.StringIO() + procs = [ProcOutput([OVS_APPCTL, '-t', '/var/run/ovs-vswitchd.%s.ctl' % pid, '-e' 'bond/list'], caps[CAP_NETWORK_STATUS][MAX_TIME], output)] + + run_procs([procs]) + + if not procs[0].timed_out: + bonds = output.getvalue().splitlines()[1:] + return [x.split('\t')[1] for x in bonds] + return [] + +def fd_usage(cap): + output = '' + fd_dict = {} + for d in [p for p in os.listdir('/proc') if p.isdigit()]: + try: + fh = open('/proc/'+d+'/cmdline') + name = fh.readline() + num_fds = len(os.listdir(os.path.join('/proc/'+d+'/fd'))) + if num_fds > 0: + if not num_fds in fd_dict: + fd_dict[num_fds] = [] + fd_dict[num_fds].append(name.replace('\0', ' ').strip()) + finally: + fh.close() + keys = fd_dict.keys() + keys.sort(lambda a, b: int(b) - int(a)) + for k in keys: + output += "%s: %s\n" % (k, str(fd_dict[k])) + return output + +def load_plugins(just_capabilities = False): + def getText(nodelist): + rc = "" + for node in nodelist: + if node.nodeType == node.TEXT_NODE: + rc += node.data + return rc.encode() + + def getBoolAttr(el, attr, default = False): + ret = default + val = el.getAttribute(attr).lower() + if val in ['true', 'false', 'yes', 'no']: + ret = val in ['true', 'yes'] + return ret + + for dir in [d for d in os.listdir(PLUGIN_DIR) if os.path.isdir(os.path.join(PLUGIN_DIR, d))]: + if not caps.has_key(dir): + if not os.path.exists("%s/%s.xml" % (PLUGIN_DIR, dir)): + continue + xmldoc = parse("%s/%s.xml" % (PLUGIN_DIR, dir)) + assert xmldoc.documentElement.tagName == "capability" + + pii, min_size, max_size, min_time, max_time, mime = \ + PII_MAYBE, -1,-1,-1,-1, MIME_TEXT + + if xmldoc.documentElement.getAttribute("pii") in [PII_NO, PII_YES, PII_MAYBE, PII_IF_CUSTOMIZED]: + pii = xmldoc.documentElement.getAttribute("pii") + if xmldoc.documentElement.getAttribute("min_size") != '': + min_size = long(xmldoc.documentElement.getAttribute("min_size")) + if xmldoc.documentElement.getAttribute("max_size") != '': + max_size = long(xmldoc.documentElement.getAttribute("max_size")) + if xmldoc.documentElement.getAttribute("min_time") != '': + min_time = int(xmldoc.documentElement.getAttribute("min_time")) + if xmldoc.documentElement.getAttribute("max_time") != '': + max_time = int(xmldoc.documentElement.getAttribute("max_time")) + if xmldoc.documentElement.getAttribute("mime") in [MIME_DATA, MIME_TEXT]: + mime = xmldoc.documentElement.getAttribute("mime") + checked = getBoolAttr(xmldoc.documentElement, 'checked', True) + hidden = getBoolAttr(xmldoc.documentElement, 'hidden', False) + + cap(dir, pii, min_size, max_size, min_time, max_time, mime, checked, hidden) + + if just_capabilities: + continue + + plugdir = os.path.join(PLUGIN_DIR, dir) + for file in [f for f in os.listdir(plugdir) if f.endswith('.xml')]: + xmldoc = parse(os.path.join(plugdir, file)) + assert xmldoc.documentElement.tagName == "collect" + + for el in xmldoc.documentElement.getElementsByTagName("*"): + if el.tagName == "files": + file_output(dir, getText(el.childNodes).split()) + elif el.tagName == "directory": + pattern = el.getAttribute("pattern") + if pattern == '': pattern = None + negate = getBoolAttr(el, 'negate') + tree_output(dir, getText(el.childNodes), pattern and re.compile(pattern) or None, negate) + elif el.tagName == "command": + label = el.getAttribute("label") + if label == '': label = None + cmd_output(dir, getText(el.childNodes), label) + +def make_tar(subdir, suffix, output_fd): + global SILENT_MODE, data + + mode = 'w' + if suffix == 'tar.bz2': + mode = 'w:bz2' + filename = "%s/%s.%s" % (BUG_DIR, subdir, suffix) + + if output_fd == -1: + tf = tarfile.open(filename, mode) + else: + tf = tarfile.open(None, 'w', os.fdopen(output_fd, 'a')) + + try: + for (k, v) in data.items(): + try: + tar_filename = os.path.join(subdir, construct_filename(k, v)) + ti = tarfile.TarInfo(tar_filename) + + ti.uname = 'root' + ti.gname = 'root' + + if v.has_key('output'): + ti.mtime = v['output'].mtime + ti.size = len(v['output'].getvalue()) + v['output'].seek(0) + tf.addfile(ti, v['output']) + elif v.has_key('filename'): + s = os.stat(v['filename']) + ti.mtime = s.st_mtime + ti.size = s.st_size + tf.addfile(ti, file(v['filename'])) + except: + pass + finally: + tf.close() + + if output_fd == -1: + output ('Writing tarball %s successful.' % filename) + if SILENT_MODE: + print filename + + +def make_zip(subdir): + global SILENT_MODE, data + + filename = "%s/%s.zip" % (BUG_DIR, subdir) + zf = zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) + + try: + for (k, v) in data.items(): + try: + dest = os.path.join(subdir, construct_filename(k, v)) + + if v.has_key('output'): + zf.writestr(dest, v['output'].getvalue()) + else: + if os.stat(v['filename']).st_size < 50: + compress_type = zipfile.ZIP_STORED + else: + compress_type = zipfile.ZIP_DEFLATED + zf.write(v['filename'], dest, compress_type) + except: + pass + finally: + zf.close() + + output ('Writing archive %s successful.' % filename) + if SILENT_MODE: + print filename + + +def make_inventory(inventory, subdir): + document = getDOMImplementation().createDocument( + None, INVENTORY_XML_ROOT, None) + + # create summary entry + s = document.createElement(INVENTORY_XML_SUMMARY) + user = os.getenv('SUDO_USER', os.getenv('USER')) + if user: + s.setAttribute('user', user) + s.setAttribute('date', time.strftime('%c')) + s.setAttribute('hostname', platform.node()) + s.setAttribute('uname', ' '.join(platform.uname())) + s.setAttribute('uptime', commands.getoutput(UPTIME)) + document.getElementsByTagName(INVENTORY_XML_ROOT)[0].appendChild(s) + + map(lambda (k, v): inventory_entry(document, subdir, k, v), + inventory.items()) + return document.toprettyxml() + +def inventory_entry(document, subdir, k, v): + try: + el = document.createElement(INVENTORY_XML_ELEMENT) + el.setAttribute('capability', v['cap']) + el.setAttribute('filename', os.path.join(subdir, construct_filename(k, v))) + el.setAttribute('md5sum', md5sum(v)) + document.getElementsByTagName(INVENTORY_XML_ROOT)[0].appendChild(el) + except: + pass + + +def md5sum(d): + m = md5.new() + if d.has_key('filename'): + f = open(d['filename']) + data = f.read(1024) + while len(data) > 0: + m.update(data) + data = f.read(1024) + f.close() + elif d.has_key('output'): + m.update(d['output'].getvalue()) + return m.hexdigest() + + +def construct_filename(k, v): + if v.has_key('filename'): + if v['filename'][0] == '/': + return v['filename'][1:] + else: + return v['filename'] + s = k.replace(' ', '-') + s = s.replace('--', '-') + s = s.replace('/', '%') + if s.find('.') == -1: + s += '.out' + + return s + +def update_capabilities(): + pass + +def update_cap_size(cap, size): + update_cap(cap, MIN_SIZE, size) + update_cap(cap, MAX_SIZE, size) + update_cap(cap, CHECKED, size > 0) + + +def update_cap(cap, k, v): + global caps + l = list(caps[cap]) + l[k] = v + caps[cap] = tuple(l) + + +def size_of_dir(d, pattern = None, negate = False): + if os.path.isdir(d): + return size_of_all([os.path.join(d, fn) for fn in os.listdir(d)], + pattern, negate) + else: + return 0 + + +def size_of_all(files, pattern = None, negate = False): + return sum([size_of(f, pattern, negate) for f in files]) + + +def matches(f, pattern, negate): + if negate: + return not matches(f, pattern, False) + else: + return pattern is None or pattern.match(f) + + +def size_of(f, pattern, negate): + if os.path.isfile(f) and matches(f, pattern, negate): + return os.stat(f)[6] + else: + return size_of_dir(f, pattern, negate) + + +def print_capabilities(): + document = getDOMImplementation().createDocument( + "ns", CAP_XML_ROOT, None) + map(lambda key: capability(document, key), [k for k in caps.keys() if not caps[k][HIDDEN]]) + print document.toprettyxml() + +def capability(document, key): + c = caps[key] + el = document.createElement(CAP_XML_ELEMENT) + el.setAttribute('key', c[KEY]) + el.setAttribute('pii', c[PII]) + el.setAttribute('min-size', str(c[MIN_SIZE])) + el.setAttribute('max-size', str(c[MAX_SIZE])) + el.setAttribute('min-time', str(c[MIN_TIME])) + el.setAttribute('max-time', str(c[MAX_TIME])) + el.setAttribute('content-type', c[MIME]) + el.setAttribute('default-checked', c[CHECKED] and 'yes' or 'no') + document.getElementsByTagName(CAP_XML_ROOT)[0].appendChild(el) + + +def prettyDict(d): + format = '%%-%ds: %%s' % max(map(len, [k for k, _ in d.items()])) + return '\n'.join([format % i for i in d.items()]) + '\n' + + +def yes(prompt): + yn = raw_input(prompt) + + return len(yn) == 0 or yn.lower()[0] == 'y' + + +partition_re = re.compile(r'(.*[0-9]+$)|(^xvd)') + +def disk_list(): + disks = [] + try: + f = open('/proc/partitions') + f.readline() + f.readline() + for line in f.readlines(): + (major, minor, blocks, name) = line.split() + if int(major) < 254 and not partition_re.match(name): + disks.append(name) + f.close() + except: + pass + return disks + + +class ProcOutput: + debug = False + + def __init__(self, command, max_time, inst=None, filter=None): + self.command = command + self.max_time = max_time + self.inst = inst + self.running = False + self.status = None + self.timed_out = False + self.failed = False + self.timeout = int(time.time()) + self.max_time + self.filter = filter + self.filter_state = {} + + def __del__(self): + self.terminate() + + def cmdAsStr(self): + return isinstance(self.command, list) and ' '.join(self.command) or self.command + + def run(self): + self.timed_out = False + try: + if ProcOutput.debug: + output_ts("Starting '%s'" % self.cmdAsStr()) + self.proc = Popen(self.command, bufsize=1, stdin=dev_null, stdout=PIPE, stderr=dev_null, shell=isinstance(self.command, str)) + old = fcntl.fcntl(self.proc.stdout.fileno(), fcntl.F_GETFD) + fcntl.fcntl(self.proc.stdout.fileno(), fcntl.F_SETFD, old | fcntl.FD_CLOEXEC) + self.running = True + self.failed = False + except: + output_ts("'%s' failed" % self.cmdAsStr()) + self.running = False + self.failed = True + + def terminate(self): + if self.running: + try: + os.kill(self.proc.pid, SIGTERM) + except: + pass + self.proc = None + self.running = False + self.status = SIGTERM + + def read_line(self): + assert self.running + line = self.proc.stdout.readline() + if line == '': + # process exited + self.status = self.proc.wait() + self.proc = None + self.running = False + else: + if self.filter: + line = self.filter(line, self.filter_state) + if self.inst: + self.inst.write(line) + +def run_procs(procs): + while True: + pipes = [] + active_procs = [] + + for pp in procs: + for p in pp: + if p.running: + active_procs.append(p) + pipes.append(p.proc.stdout) + break + elif p.status == None and not p.failed and not p.timed_out: + p.run() + if p.running: + active_procs.append(p) + pipes.append(p.proc.stdout) + break + + if len(pipes) == 0: + # all finished + break + + (i, o, x) = select(pipes, [], [], 1.0) + now = int(time.time()) + + # handle process output + for p in active_procs: + if p.proc.stdout in i: + p.read_line() + + # handle timeout + if p.running and now > p.timeout: + output_ts("'%s' timed out" % p.cmdAsStr()) + if p.inst: + p.inst.write("\n** timeout **\n") + p.timed_out = True + p.terminate() + + +def pidof(name): + pids = [] + + for d in [p for p in os.listdir('/proc') if p.isdigit()]: + try: + if os.path.basename(os.readlink('/proc/%s/exe' % d)) == name: + pids.append(int(d)) + except: + pass + + return pids + + +class StringIOmtime(StringIO.StringIO): + def __init__(self, buf = ''): + StringIO.StringIO.__init__(self, buf) + self.mtime = time.time() + + def write(self, s): + StringIO.StringIO.write(self, s) + self.mtime = time.time() + + +if __name__ == "__main__": + try: + sys.exit(main()) + except KeyboardInterrupt: + print "\nInterrupted." + sys.exit(3) -- cgit v1.2.1 From 3273ae634ab473ddd6f8bedea4550e25f8718e77 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 17 Sep 2010 11:23:19 -0700 Subject: datapath: Remove backported random32(). Nothing uses it anymore and it causes problems when backported on some distributions. Kernels we support have net_random(), which is the same thing so there is no reason to have an entire copy of the random number generator in our source tree. Reported-by: Alexey I. Froloff Signed-off-by: Jesse Gross Acked-by: Ben Pfaff --- datapath/datapath.c | 1 - datapath/linux-2.6/Modules.mk | 2 - .../linux-2.6/compat-2.6/include/linux/random.h | 17 --- datapath/linux-2.6/compat-2.6/random32.c | 144 --------------------- 4 files changed, 164 deletions(-) delete mode 100644 datapath/linux-2.6/compat-2.6/include/linux/random.h delete mode 100644 datapath/linux-2.6/compat-2.6/random32.c diff --git a/datapath/datapath.c b/datapath/datapath.c index fe37ec1ed..b3f77b36a 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/datapath/linux-2.6/Modules.mk b/datapath/linux-2.6/Modules.mk index 7f4cae6ea..5a0e9ad47 100644 --- a/datapath/linux-2.6/Modules.mk +++ b/datapath/linux-2.6/Modules.mk @@ -4,7 +4,6 @@ openvswitch_sources += \ linux-2.6/compat-2.6/genetlink-openvswitch.c \ linux-2.6/compat-2.6/ip_output-openvswitch.c \ linux-2.6/compat-2.6/kmemdup.c \ - linux-2.6/compat-2.6/random32.c \ linux-2.6/compat-2.6/skbuff-openvswitch.c \ linux-2.6/compat-2.6/time.c openvswitch_headers += \ @@ -32,7 +31,6 @@ openvswitch_headers += \ linux-2.6/compat-2.6/include/linux/netfilter_bridge.h \ linux-2.6/compat-2.6/include/linux/netfilter_ipv4.h \ linux-2.6/compat-2.6/include/linux/netlink.h \ - linux-2.6/compat-2.6/include/linux/random.h \ linux-2.6/compat-2.6/include/linux/rculist.h \ linux-2.6/compat-2.6/include/linux/rtnetlink.h \ linux-2.6/compat-2.6/include/linux/skbuff.h \ diff --git a/datapath/linux-2.6/compat-2.6/include/linux/random.h b/datapath/linux-2.6/compat-2.6/include/linux/random.h deleted file mode 100644 index 4e4932c9c..000000000 --- a/datapath/linux-2.6/compat-2.6/include/linux/random.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef __LINUX_RANDOM_WRAPPER_H -#define __LINUX_RANDOM_WRAPPER_H 1 - -#include_next - -#include -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) - -#ifdef __KERNEL__ -u32 random32(void); -void srandom32(u32 seed); -#endif /* __KERNEL__ */ - -#endif /* linux kernel < 2.6.19 */ - - -#endif diff --git a/datapath/linux-2.6/compat-2.6/random32.c b/datapath/linux-2.6/compat-2.6/random32.c deleted file mode 100644 index b0dd2a32b..000000000 --- a/datapath/linux-2.6/compat-2.6/random32.c +++ /dev/null @@ -1,144 +0,0 @@ -#include -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) - -/* - This is a maximally equidistributed combined Tausworthe generator - based on code from GNU Scientific Library 1.5 (30 Jun 2004) - - x_n = (s1_n ^ s2_n ^ s3_n) - - s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19)) - s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25)) - s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11)) - - The period of this generator is about 2^88. - - From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe - Generators", Mathematics of Computation, 65, 213 (1996), 203--213. - - This is available on the net from L'Ecuyer's home page, - - http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps - ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps - - There is an erratum in the paper "Tables of Maximally - Equidistributed Combined LFSR Generators", Mathematics of - Computation, 68, 225 (1999), 261--269: - http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps - - ... the k_j most significant bits of z_j must be non- - zero, for each j. (Note: this restriction also applies to the - computer code given in [4], but was mistakenly not mentioned in - that paper.) - - This affects the seeding procedure by imposing the requirement - s1 > 1, s2 > 7, s3 > 15. - -*/ - -#include -#include -#include -#include -#include - -#include "compat26.h" - -struct rnd_state { - u32 s1, s2, s3; -}; - -static struct rnd_state net_rand_state[NR_CPUS]; - -static u32 __random32(struct rnd_state *state) -{ -#define TAUSWORTHE(s,a,b,c,d) ((s&c)<>b) - - state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12); - state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4); - state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17); - - return (state->s1 ^ state->s2 ^ state->s3); -} - -static void __set_random32(struct rnd_state *state, unsigned long s) -{ - if (s == 0) - s = 1; /* default seed is 1 */ - -#define LCG(n) (69069 * n) - state->s1 = LCG(s); - state->s2 = LCG(state->s1); - state->s3 = LCG(state->s2); - - /* "warm it up" */ - __random32(state); - __random32(state); - __random32(state); - __random32(state); - __random32(state); - __random32(state); -} - -/** - * random32 - pseudo random number generator - * - * A 32 bit pseudo-random number is generated using a fast - * algorithm suitable for simulation. This algorithm is NOT - * considered safe for cryptographic use. - */ -u32 random32(void) -{ - return __random32(&net_rand_state[smp_processor_id()]); -} - -/** - * srandom32 - add entropy to pseudo random number generator - * @seed: seed value - * - * Add some additional seeding to the random32() pool. - * Note: this pool is per cpu so it only affects current CPU. - */ -void srandom32(u32 entropy) -{ - struct rnd_state *state = &net_rand_state[smp_processor_id()]; - __set_random32(state, state->s1 ^ entropy); -} - -static int __init random32_reseed(void); - -/* - * Generate some initially weak seeding values to allow - * to start the random32() engine. - */ -int __init random32_init(void) -{ - int i; - - for (i = 0; i < NR_CPUS; i++) { - struct rnd_state *state = &net_rand_state[i]; - __set_random32(state, i + jiffies); - } - random32_reseed(); - return 0; -} - -/* - * Generate better values after random number generator - * is fully initalized. - */ -static int __init random32_reseed(void) -{ - int i; - unsigned long seed; - - for (i = 0; i < NR_CPUS; i++) { - struct rnd_state *state = &net_rand_state[i]; - - get_random_bytes(&seed, sizeof(seed)); - __set_random32(state, seed); - } - return 0; -} - -#endif /* kernel < 2.6.19 */ -- cgit v1.2.1 From 4fefc29e00f032fb4187d4af0e05cdd328e76dde Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 17 Sep 2010 11:47:49 -0700 Subject: AUTHORS: Add Alexey I. Froloff as reporter. --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 75a6de5c9..8d2aed834 100644 --- a/AUTHORS +++ b/AUTHORS @@ -34,6 +34,7 @@ Yu Zhiguo yuzg@cn.fujitsu.com The following additional people are mentioned in commit logs as having provided helpful bug reports or suggestions. +Alexey I. Froloff raorn@altlinux.org Brandon Heller brandonh@stanford.edu Bryan Fulton bryan@nicira.com Cedric Hobbs cedric@nicira.com -- cgit v1.2.1 From 6b7b9d34c0eab9871cf6284c41108a84129817f3 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 17 Sep 2010 14:37:51 -0700 Subject: ovs-vsctl: Remove default timeout. On overloaded XenServers the current default timeout of 5 seconds can occasionally be reached, which causes VM startup to fail. This commit fixes the problem by removing the default timeout and changing each invocation of ovs-vsctl within the tree to specify its own timeout, if appropriate. Bug #3573. --- debian/openvswitch-switch.init | 2 +- utilities/ovs-vsctl.8.in | 11 ++++++----- utilities/ovs-vsctl.c | 2 +- xenserver/etc_init.d_openvswitch | 6 +++--- xenserver/etc_xapi.d_plugins_openvswitch-cfg-update | 6 +++--- xenserver/etc_xensource_scripts_vif | 4 ++-- xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py | 2 +- xenserver/usr_sbin_brctl | 2 +- xenserver/usr_share_openvswitch_scripts_monitor-external-ids | 2 +- 9 files changed, 19 insertions(+), 18 deletions(-) diff --git a/debian/openvswitch-switch.init b/debian/openvswitch-switch.init index d86063037..34b5604dc 100755 --- a/debian/openvswitch-switch.init +++ b/debian/openvswitch-switch.init @@ -257,7 +257,7 @@ case "$1" in echo " ERROR." fi - ovs-vsctl --no-wait init + ovs-vsctl --no-wait --timeout=5 init # Start ovs-vswitchd. set -- diff --git a/utilities/ovs-vsctl.8.in b/utilities/ovs-vsctl.8.in index 7476777a8..ef080bb41 100644 --- a/utilities/ovs-vsctl.8.in +++ b/utilities/ovs-vsctl.8.in @@ -118,11 +118,12 @@ Prevents \fBovs\-vsctl\fR from actually modifying the database. . .IP "\fB\-t \fIsecs\fR" .IQ "\fB\-\-timeout=\fIsecs\fR" -Limits runtime to approximately \fIsecs\fR seconds. A value of -zero will cause \fBovs\-vsctl\fR to wait forever. If the timeout expires, -\fBovs\-vsctl\fR will exit with a \fBSIGALRM\fR signal. If this option is -not used, \fBovs\-vsctl\fR uses a timeout of five seconds. -(A timeout would normally happen only if the database cannot be contacted.) +By default, or with a \fIsecs\fR of \fB0\fR, \fBovs\-vsctl\fR waits +forever for a response from the database. This option limits runtime +to approximately \fIsecs\fR seconds. If the timeout expires, +\fBovs\-vsctl\fR will exit with a \fBSIGALRM\fR signal. (A timeout +would normally happen only if the database cannot be contacted, or if +the system is overloaded.) . .SS "Public Key Infrastructure Options" .so lib/ssl.man diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c index 4d50194aa..043530280 100644 --- a/utilities/ovs-vsctl.c +++ b/utilities/ovs-vsctl.c @@ -85,7 +85,7 @@ static bool dry_run; static bool wait_for_reload = true; /* --timeout: Time to wait for a connection to 'db'. */ -static int timeout = 5; +static int timeout; /* All supported commands. */ static const struct vsctl_command_syntax all_commands[]; diff --git a/xenserver/etc_init.d_openvswitch b/xenserver/etc_init.d_openvswitch index 050844570..68079fcf5 100755 --- a/xenserver/etc_init.d_openvswitch +++ b/xenserver/etc_init.d_openvswitch @@ -293,7 +293,7 @@ EOF function set_system_ids { if [ -f /etc/xensource-inventory ]; then action "Configuring Open vSwitch system IDs" true - $vsctl --no-wait set Open_vSwitch . \ + $vsctl --no-wait --timeout=5 set Open_vSwitch . \ external-ids:system-type="$PRODUCT_BRAND" \ external-ids:system-version="$PRODUCT_VERSION-$BUILD_NUMBER" \ external-ids:system-id="$INSTALLATION_UUID" \ @@ -329,11 +329,11 @@ function start { fi start_ovsdb_server - $vsctl --no-wait init + $vsctl --no-wait --timeout=5 init if [ ! -e /var/run/openvswitch.booted ]; then touch /var/run/openvswitch.booted for bridge in $($vsctl list-br); do - $vsctl --no-wait del-br $bridge + $vsctl --no-wait --timeout=5 del-br $bridge done fi diff --git a/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update b/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update index 6da86d5cd..733301868 100755 --- a/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update +++ b/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update @@ -111,7 +111,7 @@ def setControllerCfg(controller): 'managers="ssl:' + controller + ':6632"']) def vswitchCfgQuery(action_args): - cmd = [vsctl, "-vANY:console:emer"] + action_args + cmd = [vsctl, "--timeout=5", "-vANY:console:emer"] + action_args output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate() if len(output) == 0 or output[0] == None: output = "" @@ -120,14 +120,14 @@ def vswitchCfgQuery(action_args): return output def vswitchCfgMod(action_args): - cmd = [vsctl, "-vANY:console:emer"] + action_args + cmd = [vsctl, "--timeout=5", "-vANY:console:emer"] + action_args exitcode = subprocess.call(cmd) if exitcode != 0: raise XenAPIPlugin.Failure("VSWITCH_CONFIG_MOD_FAILURE", [ str(exitcode) , str(action_args) ]) def emergency_reset(session, args): - cmd = [vsctl, "emer-reset"] + cmd = [vsctl, "--timeout=5", "emer-reset"] exitcode = subprocess.call(cmd) if exitcode != 0: raise XenAPIPlugin.Failure("VSWITCH_EMER_RESET_FAILURE", diff --git a/xenserver/etc_xensource_scripts_vif b/xenserver/etc_xensource_scripts_vif index f27ff5b40..88006e2c4 100755 --- a/xenserver/etc_xensource_scripts_vif +++ b/xenserver/etc_xensource_scripts_vif @@ -165,7 +165,7 @@ add_to_bridge() local vif_details=$(handle_vswitch_vif_details $bridge) fi - $vsctl -- --if-exists del-port $dev -- add-port $bridge $dev $vif_details + $vsctl --timeout=30 -- --if-exists del-port $dev -- add-port $bridge $dev $vif_details ;; esac @@ -182,7 +182,7 @@ remove_from_bridge() # If ovs-brcompatd is running, it might already have deleted the # port. Use --if-exists to suppress the error that would otherwise # arise in that case. - $vsctl -- --if-exists del-port $dev + $vsctl --timeout=30 -- --if-exists del-port $dev ;; esac } diff --git a/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py b/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py index 6ee4138b2..d2f6a6a3c 100644 --- a/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py +++ b/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py @@ -79,7 +79,7 @@ class VSwitchConfig: @staticmethod def Get(action): try: - arg = [vsctl, "-vANY:console:emer"] + action.split() + arg = [vsctl, "--timeout=30", "-vANY:console:emer"] + action.split() output = ShellPipe(arg).Stdout() except StandardError, e: XSLogError("config retrieval error: " + str(e)) diff --git a/xenserver/usr_sbin_brctl b/xenserver/usr_sbin_brctl index 7fecc5648..5cf0b88ac 100755 --- a/xenserver/usr_sbin_brctl +++ b/xenserver/usr_sbin_brctl @@ -35,7 +35,7 @@ def delegate(): def call_vsctl(cmd, arg=""): database = '--db=' + OVSDB_SERVER - command = [VSCTL, database, cmd] + command = [VSCTL, '--timeout=30', database, cmd] if (arg): command.append(arg) return subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0].split() diff --git a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids index f91801d22..a28ce6090 100755 --- a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids +++ b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids @@ -98,7 +98,7 @@ def get_iface_id(if_name, default=None): def set_external_id(table, record, key, value): col = 'external-ids:"' + key + '"="' + value + '"' - cmd = [vsctl, "-vANY:console:emer", "set", table, record, col] + cmd = [vsctl, "--timeout=30", "-vANY:console:emer", "set", table, record, col] exitcode = subprocess.call(cmd) if exitcode != 0: syslog.syslog(syslog.LOG_WARNING, -- cgit v1.2.1 From 00456ddda4110f7f75ea9f8a703b31c7f2df7649 Mon Sep 17 00:00:00 2001 From: Ethan Jackson Date: Fri, 17 Sep 2010 08:23:32 -0700 Subject: xenserver: monitor-external-ids remove redundant ovs-vsctl calls The number of ovs-vsctl calls required to add a new vif in monitor-external-ids grew linearly with the number of vifs in the system. Changed to only do O(1) ovs-vsctl calls per vif addition. --- xenserver/usr_share_openvswitch_scripts_monitor-external-ids | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids index a28ce6090..a0aad7a0e 100755 --- a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids +++ b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids @@ -227,13 +227,15 @@ def main(argv): if name not in bridges: update_network_uuids(name, ids) - update_bridge_id(name, ids) + if (name not in bridges) or (bridges[name] != ids): + update_bridge_id(name, ids) bridges = new_bridges if interfaces != new_interfaces: for name,ids in new_interfaces.items(): - update_iface_id(name, ids) + if (name not in interfaces) or (interfaces[name] != ids): + update_iface_id(name, ids) interfaces = new_interfaces if __name__ == '__main__': -- cgit v1.2.1 From 1089aab7136612acb86cdcd638d7d2261311531a Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 2 Sep 2010 10:06:42 -0700 Subject: ovsdb: Fix bug in "wait" command implementation. The declaration of "error" that this commit removes shadowed an outer local declaration of "error", which caused errors detected by this code not to be propagated up to the outer level. Found with GCC -Wshadow. --- ovsdb/execution.c | 1 - 1 file changed, 1 deletion(-) diff --git a/ovsdb/execution.c b/ovsdb/execution.c index 5b6762f07..7ce9a3f50 100644 --- a/ovsdb/execution.c +++ b/ovsdb/execution.c @@ -629,7 +629,6 @@ ovsdb_execute_wait(struct ovsdb_execution *x, struct ovsdb_parser *parser, /* Parse "rows" into 'expected'. */ ovsdb_row_hash_init(&expected, &columns); for (i = 0; i < rows->u.array.n; i++) { - struct ovsdb_error *error; struct ovsdb_row *row; row = ovsdb_row_create(table); -- cgit v1.2.1 From 2a022368f4b37559de5d5621a88c648023493f75 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 2 Sep 2010 10:09:09 -0700 Subject: Avoid shadowing local variable names. All of these changes avoid using the same name for two local variables within a same function. None of them are actual bugs as far as I can tell, but any of them could be confusing to the casual reader. The one in lib/ovsdb-idl.c is particularly brilliant: inner and outer loops both using (different) variables named 'i'. Found with GCC -Wshadow. --- lib/dpif-netdev.c | 1 - lib/dynamic-string.c | 2 -- lib/json.c | 1 - lib/learning-switch.c | 6 +++--- lib/netdev-linux.c | 6 +++--- lib/netlink.c | 10 +++++----- lib/ofp-parse.c | 6 +++--- lib/ovsdb-idl.c | 8 ++++---- lib/process.c | 2 +- lib/stream-fd.c | 2 +- lib/stream-ssl.c | 6 +++--- ofproto/ofproto.c | 2 -- ovsdb/execution.c | 2 -- tests/test-csum.c | 3 +-- tests/test-ovsdb.c | 6 ++++-- utilities/ovs-controller.c | 6 ++---- utilities/ovs-openflowd.c | 4 ---- utilities/ovs-vsctl.c | 24 +++++++++++------------- vswitchd/bridge.c | 4 ---- 19 files changed, 41 insertions(+), 60 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 323f36411..3975b5a8b 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1104,7 +1104,6 @@ dp_netdev_modify_vlan_tci(struct ofpbuf *packet, uint16_t tci, uint16_t mask) veh->veth_tci |= htons(tci); } else { /* Insert new 802.1Q header. */ - struct eth_header *eh = packet->l2; struct vlan_eth_header tmp; memcpy(tmp.veth_dst, eh->eth_dst, ETH_ADDR_LEN); memcpy(tmp.veth_src, eh->eth_src, ETH_ADDR_LEN); diff --git a/lib/dynamic-string.c b/lib/dynamic-string.c index 5f8054a45..3af7fc9f5 100644 --- a/lib/dynamic-string.c +++ b/lib/dynamic-string.c @@ -147,8 +147,6 @@ ds_put_format_valist(struct ds *ds, const char *format, va_list args_) if (needed < available) { ds->length += needed; } else { - size_t available; - ds_reserve(ds, ds->length + needed); va_copy(args, args_); diff --git a/lib/json.c b/lib/json.c index 3b70e6bdb..5887f677a 100644 --- a/lib/json.c +++ b/lib/json.c @@ -705,7 +705,6 @@ json_lex_number(struct json_parser *p) * * We suppress negative zeros as a matter of policy. */ if (!significand) { - struct json_token token; token.type = T_INTEGER; token.u.integer = 0; json_parser_input(p, &token); diff --git a/lib/learning-switch.c b/lib/learning-switch.c index e189f1e44..4e7645d7c 100644 --- a/lib/learning-switch.c +++ b/lib/learning-switch.c @@ -220,10 +220,10 @@ lswitch_process_packet(struct lswitch *sw, struct rconn *rconn, } } if (VLOG_IS_DBG_ENABLED()) { - char *p = ofp_to_string(msg->data, msg->size, 2); + char *s = ofp_to_string(msg->data, msg->size, 2); VLOG_DBG_RL(&rl, "%016llx: OpenFlow packet ignored: %s", - sw->datapath_id, p); - free(p); + sw->datapath_id, s); + free(s); } } diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index e6036bfc5..7227f5dfc 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -1779,12 +1779,12 @@ netdev_linux_get_in6(const struct netdev *netdev_, struct in6_addr *in6) if (file != NULL) { const char *name = netdev_get_name(netdev_); while (fgets(line, sizeof line, file)) { - struct in6_addr in6; + struct in6_addr in6_tmp; char ifname[16 + 1]; - if (parse_if_inet6_line(line, &in6, ifname) + if (parse_if_inet6_line(line, &in6_tmp, ifname) && !strcmp(name, ifname)) { - netdev_dev->in6 = in6; + netdev_dev->in6 = in6_tmp; break; } } diff --git a/lib/netlink.c b/lib/netlink.c index 4e83747cc..66c27b1fb 100644 --- a/lib/netlink.c +++ b/lib/netlink.c @@ -1036,19 +1036,19 @@ nl_policy_parse(const struct ofpbuf *msg, size_t nla_offset, type = nla->nla_type; if (type < n_attrs && policy[type].type != NL_A_NO_ATTR) { - const struct nl_policy *p = &policy[type]; + const struct nl_policy *e = &policy[type]; size_t min_len, max_len; /* Validate length and content. */ - min_len = p->min_len ? p->min_len : attr_len_range[p->type][0]; - max_len = p->max_len ? p->max_len : attr_len_range[p->type][1]; + min_len = e->min_len ? e->min_len : attr_len_range[e->type][0]; + max_len = e->max_len ? e->max_len : attr_len_range[e->type][1]; if (len < min_len || len > max_len) { VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" length %zu not in " "allowed range %zu...%zu", offset, type, len, min_len, max_len); return false; } - if (p->type == NL_A_STRING) { + if (e->type == NL_A_STRING) { if (((char *) nla)[nla->nla_len - 1]) { VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" lacks null at end", offset, type); @@ -1060,7 +1060,7 @@ nl_policy_parse(const struct ofpbuf *msg, size_t nla_offset, return false; } } - if (!p->optional && attrs[type] == NULL) { + if (!e->optional && attrs[type] == NULL) { assert(n_required > 0); --n_required; } diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c index 405008c67..312eaaaaf 100644 --- a/lib/ofp-parse.c +++ b/lib/ofp-parse.c @@ -270,12 +270,12 @@ str_to_action(char *str, struct ofpbuf *b) put_output_action(b, str_to_u32(arg)); } else if (!strcasecmp(act, "enqueue")) { char *sp = NULL; - char *port = strtok_r(arg, ":q", &sp); + char *port_s = strtok_r(arg, ":q", &sp); char *queue = strtok_r(NULL, "", &sp); - if (port == NULL || queue == NULL) { + if (port_s == NULL || queue == NULL) { ovs_fatal(0, "\"enqueue\" syntax is \"enqueue:PORT:QUEUE\""); } - put_enqueue_action(b, str_to_u32(port), str_to_u32(queue)); + put_enqueue_action(b, str_to_u32(port_s), str_to_u32(queue)); } else if (!strcasecmp(act, "drop")) { /* A drop action in OpenFlow occurs by just not setting * an action. */ diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index 2132f9fef..43ff94714 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -433,13 +433,13 @@ ovsdb_idl_send_monitor_request(struct ovsdb_idl *idl) const struct ovsdb_idl_table *table = &idl->tables[i]; const struct ovsdb_idl_table_class *tc = table->class; struct json *monitor_request, *columns; - size_t i; + size_t j; monitor_request = json_object_create(); columns = json_array_create_empty(); - for (i = 0; i < tc->n_columns; i++) { - const struct ovsdb_idl_column *column = &tc->columns[i]; - if (table->modes[i] != OVSDB_IDL_MODE_NONE) { + for (j = 0; j < tc->n_columns; j++) { + const struct ovsdb_idl_column *column = &tc->columns[j]; + if (table->modes[j] != OVSDB_IDL_MODE_NONE) { json_array_add(columns, json_string_create(column->name)); } } diff --git a/lib/process.c b/lib/process.c index a201a88f8..377c396b9 100644 --- a/lib/process.c +++ b/lib/process.c @@ -517,7 +517,7 @@ process_run_capture(char **argv, char **stdout_log, char **stderr_log, block_sigchld(&oldsigs); pid = fork(); if (pid < 0) { - int error = errno; + error = errno; unblock_sigchld(&oldsigs); VLOG_WARN("fork failed: %s", strerror(error)); diff --git a/lib/stream-fd.c b/lib/stream-fd.c index 9410009c4..ef4dc8d91 100644 --- a/lib/stream-fd.c +++ b/lib/stream-fd.c @@ -214,7 +214,7 @@ pfd_accept(struct pstream *pstream, struct stream **new_streamp) new_fd = accept(ps->fd, (struct sockaddr *) &ss, &ss_len); if (new_fd < 0) { - int retval = errno; + retval = errno; if (retval != EAGAIN) { VLOG_DBG_RL(&rl, "accept: %s", strerror(retval)); } diff --git a/lib/stream-ssl.c b/lib/stream-ssl.c index 70b15f0da..9c7533d1e 100644 --- a/lib/stream-ssl.c +++ b/lib/stream-ssl.c @@ -385,7 +385,7 @@ do_ca_cert_bootstrap(struct stream *stream) file = fdopen(fd, "w"); if (!file) { - int error = errno; + error = errno; VLOG_ERR("could not bootstrap CA cert: fdopen failed: %s", strerror(error)); unlink(ca_cert.file_name); @@ -402,7 +402,7 @@ do_ca_cert_bootstrap(struct stream *stream) } if (fclose(file)) { - int error = errno; + error = errno; VLOG_ERR("could not bootstrap CA cert: writing %s failed: %s", ca_cert.file_name, strerror(error)); unlink(ca_cert.file_name); @@ -921,7 +921,7 @@ pssl_accept(struct pstream *pstream, struct stream **new_streamp) new_fd = accept(pssl->fd, &sin, &sin_len); if (new_fd < 0) { - int error = errno; + error = errno; if (error != EAGAIN) { VLOG_DBG_RL(&rl, "accept: %s", strerror(error)); } diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 844083d8b..e571bd4e2 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -1071,7 +1071,6 @@ ofproto_run1(struct ofproto *p) for (i = 0; i < 50; i++) { struct ofpbuf *buf; - int error; error = dpif_recv(p->dpif, &buf); if (error) { @@ -1122,7 +1121,6 @@ ofproto_run1(struct ofproto *p) retval = pvconn_accept(ofservice->pvconn, OFP_VERSION, &vconn); if (!retval) { - struct ofconn *ofconn; struct rconn *rconn; char *name; diff --git a/ovsdb/execution.c b/ovsdb/execution.c index 7ce9a3f50..a96abfcaf 100644 --- a/ovsdb/execution.c +++ b/ovsdb/execution.c @@ -103,8 +103,6 @@ ovsdb_execute(struct ovsdb *db, const struct json *params, || !params->u.array.n || params->u.array.elems[0]->type != JSON_STRING || strcmp(params->u.array.elems[0]->u.string, db->schema->name)) { - struct ovsdb_error *error; - if (params->type != JSON_ARRAY) { error = ovsdb_syntax_error(params, NULL, "array expected"); } else { diff --git a/tests/test-csum.c b/tests/test-csum.c index 8c8545870..eebc8803f 100644 --- a/tests/test-csum.c +++ b/tests/test-csum.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009 Nicira Networks. + * Copyright (c) 2009, 2010 Nicira Networks. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -134,7 +134,6 @@ main(void) const uint16_t *data16 = (const uint16_t *) tc->data; const uint32_t *data32 = (const uint32_t *) tc->data; uint32_t partial; - size_t i; /* Test csum(). */ assert(ntohs(csum(tc->data, tc->size)) == tc->csum); diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c index 18784a52a..04db65421 100644 --- a/tests/test-ovsdb.c +++ b/tests/test-ovsdb.c @@ -1095,7 +1095,7 @@ do_query_distinct(int argc OVS_UNUSED, char *argv[]) size_t n_classes; struct json *json; int exit_code = 0; - size_t i, j, k; + size_t i; /* Parse table schema, create table. */ json = unbox_json(parse_json(argv[1])); @@ -1161,6 +1161,7 @@ do_query_distinct(int argc OVS_UNUSED, char *argv[]) for (i = 0; i < json->u.array.n; i++) { struct ovsdb_row_set results; struct ovsdb_condition cnd; + size_t j; check_ovsdb_error(ovsdb_condition_from_json(ts, json->u.array.elems[i], NULL, &cnd)); @@ -1171,6 +1172,8 @@ do_query_distinct(int argc OVS_UNUSED, char *argv[]) ovsdb_row_set_init(&results); ovsdb_query_distinct(table, &cnd, &columns, &results); for (j = 0; j < results.n_rows; j++) { + size_t k; + for (k = 0; k < n_rows; k++) { if (uuid_equals(ovsdb_row_get_uuid(results.rows[j]), &rows[k].uuid)) { @@ -1833,7 +1836,6 @@ do_idl(int argc, char *argv[]) for (i = 2; i < argc; i++) { char *arg = argv[i]; struct jsonrpc_msg *request, *reply; - int error; if (*arg == '+') { /* The previous transaction didn't change anything. */ diff --git a/utilities/ovs-controller.c b/utilities/ovs-controller.c index b18959ad6..40e2a801d 100644 --- a/utilities/ovs-controller.c +++ b/utilities/ovs-controller.c @@ -107,7 +107,6 @@ main(int argc, char *argv[]) for (i = optind; i < argc; i++) { const char *name = argv[i]; struct vconn *vconn; - int retval; retval = vconn_open(name, OFP_VERSION, &vconn); if (!retval) { @@ -146,12 +145,10 @@ main(int argc, char *argv[]) while (n_switches > 0 || n_listeners > 0) { int iteration; - int i; /* Accept connections on listening vconns. */ for (i = 0; i < n_listeners && n_switches < MAX_SWITCHES; ) { struct vconn *new_vconn; - int retval; retval = pvconn_accept(listeners[i], OFP_VERSION, &new_vconn); if (!retval || retval == EAGAIN) { @@ -171,7 +168,8 @@ main(int argc, char *argv[]) bool progress = false; for (i = 0; i < n_switches; ) { struct switch_ *this = &switches[i]; - int retval = do_switching(this); + + retval = do_switching(this); if (!retval || retval == EAGAIN) { if (!retval) { progress = true; diff --git a/utilities/ovs-openflowd.c b/utilities/ovs-openflowd.c index 8cb50e4b4..945b11d05 100644 --- a/utilities/ovs-openflowd.c +++ b/utilities/ovs-openflowd.c @@ -458,8 +458,6 @@ parse_options(int argc, char *argv[], struct ofsettings *s) s->n_controllers = controllers.n; s->controllers = xmalloc(s->n_controllers * sizeof *s->controllers); if (argc > 1) { - size_t i; - for (i = 0; i < s->n_controllers; i++) { s->controllers[i] = controller_opts; s->controllers[i].target = controllers.names[i]; @@ -468,8 +466,6 @@ parse_options(int argc, char *argv[], struct ofsettings *s) /* Sanity check. */ if (controller_opts.band == OFPROTO_OUT_OF_BAND) { - size_t i; - for (i = 0; i < s->n_controllers; i++) { if (!strcmp(s->controllers[i].target, "discover")) { ovs_fatal(0, "Cannot perform discovery with out-of-band " diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c index 043530280..884a41faf 100644 --- a/utilities/ovs-vsctl.c +++ b/utilities/ovs-vsctl.c @@ -1305,12 +1305,11 @@ add_port(struct vsctl_context *ctx, get_info(ctx->ovs, &info); if (may_exist) { - struct vsctl_port *port; + struct vsctl_port *vsctl_port; - port = find_port(&info, port_name, false); - if (port) { + vsctl_port = find_port(&info, port_name, false); + if (vsctl_port) { struct svec want_names, have_names; - size_t i; svec_init(&want_names); for (i = 0; i < n_ifaces; i++) { @@ -1319,15 +1318,16 @@ add_port(struct vsctl_context *ctx, svec_sort(&want_names); svec_init(&have_names); - for (i = 0; i < port->port_cfg->n_interfaces; i++) { - svec_add(&have_names, port->port_cfg->interfaces[i]->name); + for (i = 0; i < vsctl_port->port_cfg->n_interfaces; i++) { + svec_add(&have_names, + vsctl_port->port_cfg->interfaces[i]->name); } svec_sort(&have_names); - if (strcmp(port->bridge->name, br_name)) { + if (strcmp(vsctl_port->bridge->name, br_name)) { char *command = vsctl_context_to_string(ctx); vsctl_fatal("\"%s\" but %s is actually attached to bridge %s", - command, port_name, port->bridge->name); + command, port_name, vsctl_port->bridge->name); } if (!svec_equal(&want_names, &have_names)) { @@ -2767,8 +2767,8 @@ do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands, ds_chomp(ds, '\n'); for (j = 0; j < ds->length; j++) { - int c = ds->string[j]; - switch (c) { + int ch = ds->string[j]; + switch (ch) { case '\n': fputs("\\n", stdout); break; @@ -2778,7 +2778,7 @@ do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands, break; default: - putchar(c); + putchar(ch); } } putchar('\n'); @@ -2796,8 +2796,6 @@ do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands, if (wait_for_reload && status != TXN_UNCHANGED) { for (;;) { - const struct ovsrec_open_vswitch *ovs; - ovsdb_idl_run(idl); OVSREC_OPEN_VSWITCH_FOR_EACH (ovs, idl) { if (ovs->cur_cfg >= next_cfg) { diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 598b0016d..3f5e3d471 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -632,7 +632,6 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) struct odp_port *dpif_ports; size_t n_dpif_ports; struct shash cur_ifaces, want_ifaces; - struct shash_node *node; /* Get the set of interfaces currently in this datapath. */ dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports); @@ -765,7 +764,6 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) struct ovsrec_controller **controllers; struct ofproto_sflow_options oso; size_t n_controllers; - size_t i; memset(&oso, 0, sizeof oso); @@ -2822,7 +2820,6 @@ bond_rebalance_port(struct port *port) * smallest hashes instead of the biggest ones. There is little * reason behind this decision; we could use the opposite sort * order to shift away big hashes ahead of small ones. */ - size_t i; bool order_swapped; for (i = 0; i < from->n_hashes; i++) { @@ -3407,7 +3404,6 @@ port_reconfigure(struct port *port, const struct ovsrec_port *cfg) trunks = NULL; if (vlan < 0 && cfg->n_trunks) { size_t n_errors; - size_t i; trunks = bitmap_allocate(4096); n_errors = 0; -- cgit v1.2.1 From 3f5d8c02b213ea42dcf892635c1db310f00d855e Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 20 Sep 2010 10:22:19 -0700 Subject: vswitchd: Document policing implementation and caveats. With some text from Dan Wendlandt adapted from http://openvswitch.org/?page_id=267 CC: Keith Amidon --- vswitchd/vswitch.xml | 80 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 67 insertions(+), 13 deletions(-) diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index b9d8aaa9f..6e255763d 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -637,24 +637,78 @@ +

+ These settings control ingress policing for packets received on this + interface. On a physical interface, this limits the rate at which + traffic is allowed into the system from the outside; on a virtual + interface (one connected to a virtual machine), this limits the rate at + which the VM is able to transmit. +

+

+ Policing is a simple form of quality-of-service that simply drops + packets received in excess of the configured rate. Due to its + simplicity, policing is usually less accurate and less effective than + egress QoS (which is configured using the and tables). +

+

+ Policing is currently implemented only on Linux. The Linux + implementation uses a simple ``token bucket'' approach: +

+
    +
  • + The size of the bucket corresponds to . Initially the bucket is full. +
  • +
  • + Whenever a packet is received, its size (converted to tokens) is + compared to the number of tokens currently in the bucket. If the + required number of tokens are available, they are removed and the + packet is forwarded. Otherwise, the packet is dropped. +
  • +
  • + Whenever it is not full, the bucket is refilled with tokens at the + rate specified by . +
  • +
+

+ Policing interacts badly with some network protocols, and especially + with fragmented IP packets. Suppose that there is enough network + activity to keep the bucket nearly empty all the time. Then this token + bucket algorithm will forward a single packet every so often, with the + period depending on packet size and on the configured rate. All of the + fragments of an IP packets are normally transmitted back-to-back, as a + group. In such a situation, therefore, only one of these fragments + will be forwarded and the rest will be dropped. IP does not provide + any way for the intended recipient to ask for only the remaining + fragments. In such a case there are two likely possibilities for what + will happen next: either all of the fragments will eventually be + retransmitted (as TCP will do), in which case the same problem will + recur, or the sender will not realize that its packet has been dropped + and data will simply be lost (as some UDP-based protocols will do). + Either way, it is possible that no forward progress will ever occur. +

+ +

+ Maximum rate for data received on this interface, in kbps. Data + received faster than this rate is dropped. Set to 0 + (the default) to disable policing. +

+
+

Maximum burst size for data received on this interface, in kb. The default burst size if set to 0 is 1000 kb. This value has no effect if is 0.

-

The burst size should be at least the size of the interface's - MTU.

-
- - -

Maximum rate for data received on this interface, in kbps. Data - received faster than this rate is dropped. Set to 0 to - disable policing.

-

The meaning of ``ingress'' is from Open vSwitch's perspective. If - configured on a physical interface, then it limits the rate at which - traffic is allowed into the system from the outside. If configured - on a virtual interface that is connected to a virtual machine, then - it limits the rate at which the guest is able to transmit.

+

+ Specifying a larger burst size lets the algorithm be more forgiving, + which is important for protocols like TCP that react severely to + dropped packets. The burst size should be at least the size of the + interface's MTU. Specifying a value that is numerically at least as + large as 10% of helps TCP come + closer to achieving the full rate. +

-- cgit v1.2.1 From c0c2489aabc0d19495909ac7391ea7427d9b458a Mon Sep 17 00:00:00 2001 From: Sajjad Lateef Date: Fri, 17 Sep 2010 14:53:42 -0700 Subject: debian: Add openvswitch-python package This installs the Python runtime bindings for Open vSwitch database into /usr/share/python-support/openvswitch-python/ovs and /usr/share/python-support/openvswitch-python/ovs/db Updated FSF address in copyright file Minor Whitespace re-formatting Removed prerm, preinst, postinst files for openvswitch-python --- debian/automake.mk | 2 ++ debian/control | 10 +++++++++- debian/copyright.in | 2 +- debian/openvswitch-python.dirs | 2 ++ debian/openvswitch-python.install | 2 ++ debian/rules | 1 + 6 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 debian/openvswitch-python.dirs create mode 100644 debian/openvswitch-python.install diff --git a/debian/automake.mk b/debian/automake.mk index a5a6e05a3..dc18961a4 100644 --- a/debian/automake.mk +++ b/debian/automake.mk @@ -29,6 +29,8 @@ EXTRA_DIST += \ debian/openvswitch-pki-server.install \ debian/openvswitch-pki-server.postinst \ debian/openvswitch-pki.postinst \ + debian/openvswitch-python.dirs \ + debian/openvswitch-python.install \ debian/openvswitch-switch.README.Debian \ debian/openvswitch-switch.dirs \ debian/openvswitch-switch.init \ diff --git a/debian/control b/debian/control index edecffda5..fde640eac 100644 --- a/debian/control +++ b/debian/control @@ -5,7 +5,8 @@ Maintainer: Open vSwitch developers Uploaders: Ben Pfaff , Simon Horman Build-Depends: debhelper (>= 5), autoconf (>= 2.64), automake1.10, libssl-dev, - pkg-config (>= 0.21), po-debconf, bzip2, openssl, python + pkg-config (>= 0.21), po-debconf, bzip2, openssl, python, + python-support (>= 0.8.4) Standards-Version: 3.9.1 Homepage: http://openvswitch.org/ @@ -94,3 +95,10 @@ Description: Debug symbols for Open vSwitch packages This package contains the debug symbols for all the other openvswitch-* packages. Install it to debug one of them or to examine a core dump produced by one of them. + +Package: openvswitch-python +Architecture: all +Depends: ${python:Depends}, openvswitch-switch (= ${binary:Version}) +Provides: ${python:Provides} +Description: Python bindings for Open vSwitch + This package contains the full Python bindings for Open vSwitch database. diff --git a/debian/copyright.in b/debian/copyright.in index 0cac63471..6a7370e39 100644 --- a/debian/copyright.in +++ b/debian/copyright.in @@ -46,7 +46,7 @@ License: You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. Copyright (c) 2005, 2007 XenSource Ltd. Copyright (c) 2010, Nicira Networks. diff --git a/debian/openvswitch-python.dirs b/debian/openvswitch-python.dirs new file mode 100644 index 000000000..055838c0b --- /dev/null +++ b/debian/openvswitch-python.dirs @@ -0,0 +1,2 @@ +usr/share/python-support/openvswitch-python/ovs/ +usr/share/python-support/openvswitch-python/ovs/db/ diff --git a/debian/openvswitch-python.install b/debian/openvswitch-python.install new file mode 100644 index 000000000..35d35e784 --- /dev/null +++ b/debian/openvswitch-python.install @@ -0,0 +1,2 @@ +python/ovs/*.py usr/share/python-support/openvswitch-python/ovs/ +python/ovs/db/*.py usr/share/python-support/openvswitch-python/ovs/db/ diff --git a/debian/rules b/debian/rules index 49d562ad7..8e7ad1e82 100755 --- a/debian/rules +++ b/debian/rules @@ -103,6 +103,7 @@ binary-common: dh_strip --dbg-package=openvswitch-dbg dh_compress dh_fixperms -X var/log/core + dh_pysupport dh_perl dh_makeshlibs dh_installdeb -- cgit v1.2.1 From 110b54816bb56b8ba4878812ad300a3a277abb06 Mon Sep 17 00:00:00 2001 From: Ethan Jackson Date: Fri, 17 Sep 2010 15:06:17 -0700 Subject: python: timer_wait_until calculated current time incorrectly The timer_wait_until function in poller.py was using Time.msec to figure out the current time. Unfortunately, Time.msec does not in exist. Changed to use ovs.timeval.msec . --- python/ovs/poller.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/ovs/poller.py b/python/ovs/poller.py index 57417c481..2a0b2ecbb 100644 --- a/python/ovs/poller.py +++ b/python/ovs/poller.py @@ -15,6 +15,7 @@ import errno import logging import select +import ovs.timeval class Poller(object): """High-level wrapper around the "poll" system call. @@ -62,15 +63,15 @@ class Poller(object): self.__timer_wait(msec) def timer_wait_until(self, msec): - """Causes the following call to self.block() to wake up when the - current time, as returned by Time.msec(), reaches 'msec' or later. If + """Causes the following call to self.block() to wake up when the current + time, as returned by ovs.timeval.msec(), reaches 'msec' or later. If 'msec' is earlier than the current time, the following call to self.block() will not block at all. The timer registration is one-shot: only the following call to self.block() is affected. The timer will need to be re-registered after self.block() is called if it is to persist.""" - now = Time.msec() + now = ovs.timeval.msec() if msec <= now: self.immediate_wake() else: -- cgit v1.2.1 From 2ed7d6e227c009f0f6bdabbb85262d8aac896e00 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 21 Sep 2010 12:06:12 +0900 Subject: debian: Add override of non-standard-dir-perm to corekeeper Remove code to restart openvswitch-switch from its postinst script as this is automatically added by dh_installinit Signed-off-by: Simon Horman Signed-off-by: Ben Pfaff --- debian/openvswitch-switch.postinst | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/debian/openvswitch-switch.postinst b/debian/openvswitch-switch.postinst index 4be5a30c7..74b52ba90 100755 --- a/debian/openvswitch-switch.postinst +++ b/debian/openvswitch-switch.postinst @@ -33,17 +33,6 @@ case "$1" in fi done fi - - if /etc/init.d/openvswitch-switch status >/dev/null 2>&1; then - running=true - /etc/init.d/openvswitch-switch stop - else - running=false - fi - - if $running; then - /etc/init.d/openvswitch-switch start - fi ;; abort-upgrade|abort-remove|abort-deconfigure) -- cgit v1.2.1 From b9ddc6aaaea369faf7787b690de2ae56ffd08ad9 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 21 Sep 2010 12:06:13 +0900 Subject: debian: Dont fail init if module cant be inserted The problem here is that the daemon is started/restarted on package installation, but the module may not be present at that time and (as far as I know) its bad form to fail the package installation in that circumstance. In keeping with the way ipvsadm handles a similar problem, exit with a non-error exit status if the module can't be inserted. The loud error message is still displayed. Signed-off-by: Simon Horman Signed-off-by: Ben Pfaff --- debian/openvswitch-switch.init | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/openvswitch-switch.init b/debian/openvswitch-switch.init index 34b5604dc..600c18564 100755 --- a/debian/openvswitch-switch.init +++ b/debian/openvswitch-switch.init @@ -157,7 +157,7 @@ load_module() { echo "For instructions, read" echo "/usr/share/doc/openvswitch-datapath-source/README.Debian" fi - exit 1 + exit 0 fi } -- cgit v1.2.1 From cbb863426624fff643b30b22ddb8c524ec7891c4 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 21 Sep 2010 12:06:14 +0900 Subject: debian: The second argument to m-a should be openvswitch-datapath Signed-off-by: Simon Horman Signed-off-by: Ben Pfaff --- debian/openvswitch-datapath-source.README.Debian | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/openvswitch-datapath-source.README.Debian b/debian/openvswitch-datapath-source.README.Debian index 73bba7a14..a9132c5f3 100644 --- a/debian/openvswitch-datapath-source.README.Debian +++ b/debian/openvswitch-datapath-source.README.Debian @@ -5,9 +5,9 @@ Open vSwitch for Debian - Building with module-assistant: - $ module-assistant auto-install openvswitch + $ module-assistant auto-install openvswitch-datapath or - $ m-a a-i openvswitch + $ m-a a-i openvswitch-datapath If kernel source or headers are in a non-standard directory, add the option -k /path/to/kernel/source with the correct path. -- cgit v1.2.1 From 1d273d6d802e5daeebe551e8ca0c3e99f4dda15e Mon Sep 17 00:00:00 2001 From: Sajjad Lateef Date: Tue, 21 Sep 2010 11:18:26 -0700 Subject: debian: Rename openvswitch-python to python-openvswitch Package name changed to be consistent with Debian Python policy. The python files are installed in /usr/share/python-support/python-openvswitch/ovs and /usr/share/python-support/python-openvswitch/ovs/db Changed Section to python, per lintian --- debian/automake.mk | 4 ++-- debian/control | 3 ++- debian/openvswitch-python.dirs | 2 -- debian/openvswitch-python.install | 2 -- debian/python-openvswitch.dirs | 2 ++ debian/python-openvswitch.install | 2 ++ 6 files changed, 8 insertions(+), 7 deletions(-) delete mode 100644 debian/openvswitch-python.dirs delete mode 100644 debian/openvswitch-python.install create mode 100644 debian/python-openvswitch.dirs create mode 100644 debian/python-openvswitch.install diff --git a/debian/automake.mk b/debian/automake.mk index dc18961a4..c768d56b5 100644 --- a/debian/automake.mk +++ b/debian/automake.mk @@ -29,8 +29,6 @@ EXTRA_DIST += \ debian/openvswitch-pki-server.install \ debian/openvswitch-pki-server.postinst \ debian/openvswitch-pki.postinst \ - debian/openvswitch-python.dirs \ - debian/openvswitch-python.install \ debian/openvswitch-switch.README.Debian \ debian/openvswitch-switch.dirs \ debian/openvswitch-switch.init \ @@ -41,6 +39,8 @@ EXTRA_DIST += \ debian/openvswitch-switch.postrm \ debian/openvswitch-switch.template \ debian/ovs-bugtool \ + debian/python-openvswitch.dirs \ + debian/python-openvswitch.install \ debian/rules \ debian/rules.modules diff --git a/debian/control b/debian/control index fde640eac..b7f2248f9 100644 --- a/debian/control +++ b/debian/control @@ -96,8 +96,9 @@ Description: Debug symbols for Open vSwitch packages packages. Install it to debug one of them or to examine a core dump produced by one of them. -Package: openvswitch-python +Package: python-openvswitch Architecture: all +Section: python Depends: ${python:Depends}, openvswitch-switch (= ${binary:Version}) Provides: ${python:Provides} Description: Python bindings for Open vSwitch diff --git a/debian/openvswitch-python.dirs b/debian/openvswitch-python.dirs deleted file mode 100644 index 055838c0b..000000000 --- a/debian/openvswitch-python.dirs +++ /dev/null @@ -1,2 +0,0 @@ -usr/share/python-support/openvswitch-python/ovs/ -usr/share/python-support/openvswitch-python/ovs/db/ diff --git a/debian/openvswitch-python.install b/debian/openvswitch-python.install deleted file mode 100644 index 35d35e784..000000000 --- a/debian/openvswitch-python.install +++ /dev/null @@ -1,2 +0,0 @@ -python/ovs/*.py usr/share/python-support/openvswitch-python/ovs/ -python/ovs/db/*.py usr/share/python-support/openvswitch-python/ovs/db/ diff --git a/debian/python-openvswitch.dirs b/debian/python-openvswitch.dirs new file mode 100644 index 000000000..9bc6912a3 --- /dev/null +++ b/debian/python-openvswitch.dirs @@ -0,0 +1,2 @@ +usr/share/python-support/python-openvswitch/ovs/ +usr/share/python-support/python-openvswitch/ovs/db/ diff --git a/debian/python-openvswitch.install b/debian/python-openvswitch.install new file mode 100644 index 000000000..211ed300e --- /dev/null +++ b/debian/python-openvswitch.install @@ -0,0 +1,2 @@ +python/ovs/*.py usr/share/python-support/python-openvswitch/ovs/ +python/ovs/db/*.py usr/share/python-support/python-openvswitch/ovs/db/ -- cgit v1.2.1 From 9884eaad2b8386aefa036f370891fd1159598994 Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Tue, 21 Sep 2010 11:58:11 -0700 Subject: Indicate that OVS only works with kernels 2.6.18 and greater At one time, we tested against 2.6.15. This hasn't been done for a long time, and it almost certainly no longer works. --- README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README b/README index 881a11343..55e9d9c9b 100644 --- a/README +++ b/README @@ -29,7 +29,7 @@ vSwitch supports the following features: * Support for OpenFlow * Compatibility layer for the Linux bridging code -The included Linux kernel module supports Linux 2.6.15 and up, with +The included Linux kernel module supports Linux 2.6.18 and up, with testing focused on 2.6.18 with Centos and Xen patches and version 2.6.26 from kernel.org. Open vSwitch also has special support for Citrix XenServer hosts. -- cgit v1.2.1 From 5fa555b3cf6030bbeda1bbcf169d514f3ae1a077 Mon Sep 17 00:00:00 2001 From: Ethan Jackson Date: Fri, 17 Sep 2010 15:07:02 -0700 Subject: xenserver: monitor-external-ids responds to SIGHUP When monitor-external-ids receives a SIGHUP it will forget everything and run as if it was newly started. Feature #3668. --- ..._share_openvswitch_scripts_monitor-external-ids | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids index a0aad7a0e..45b3dd7bf 100755 --- a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids +++ b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids @@ -22,6 +22,7 @@ import getopt import os +import signal import subprocess import sys import syslog @@ -37,6 +38,7 @@ import ovs.db.idl vsctl="/usr/bin/ovs-vsctl" session = None +force_run = False # Set up a session to interact with XAPI. # @@ -163,8 +165,15 @@ def usage(): print "Other options:" print " -h, --help display this help message" sys.exit(0) - + +def handler(signum, frame): + global force_run + if (signum == signal.SIGHUP): + force_run = True + def main(argv): + global force_run + try: options, args = getopt.gnu_getopt( argv[1:], 'h', ['help'] + ovs.daemon.LONG_OPTIONS) @@ -196,16 +205,23 @@ def main(argv): # tasks, we need it. Wait here until it's up. while not os.path.exists("/var/run/xapi_init_complete.cookie"): time.sleep(1) + + signal.signal(signal.SIGHUP, handler) bridges = {} interfaces = {} while True: - if not idl.run(): + if not force_run and not idl.run(): poller = ovs.poller.Poller() idl.wait(poller) poller.block() continue - + + if force_run: + bridges = {} + interfaces = {} + force_run = False + new_bridges = {} for rec in idl.data["Bridge"].itervalues(): name = rec.name.as_scalar() -- cgit v1.2.1 From 97685b902d63f48bdc9f63e5458a1ac5ae59b0b5 Mon Sep 17 00:00:00 2001 From: Ethan Jackson Date: Fri, 17 Sep 2010 16:05:04 -0700 Subject: xenserver: reload sends SIGHUP to monitor-external-ids When the init script's reload function is called it will send a SIGHUP to monitor-external-ids. This will cause monitor-external-ids to re-generate everything. Feature #3668. --- xenserver/etc_init.d_openvswitch | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/xenserver/etc_init.d_openvswitch b/xenserver/etc_init.d_openvswitch index 68079fcf5..282765323 100755 --- a/xenserver/etc_init.d_openvswitch +++ b/xenserver/etc_init.d_openvswitch @@ -117,6 +117,12 @@ else monitor_opt= fi +function hup_monitor_external_ids { + if [ -e /var/run/openvswitch/monitor-external-ids.pid ]; then + action "Configuring Open vSwitch external IDs" kill -HUP `cat /var/run/openvswitch/monitor-external-ids.pid` + fi +} + function dp_list { "$dpctl" show | grep '^dp[0-9]\+:' | cut -d':' -f 1 } @@ -380,8 +386,10 @@ case "$1" in restart ;; reload|force-reload) - # Nothing to do--ovs-vswitchd and ovsdb-server keep their configuration - # up-to-date all the time. + # Nothing to do to ovs-vswitchd and ovsdb-server as they keep their + # configuration up-to-date all the time. HUP monitor-external-ids so it + # re-runs. + hup_monitor_external_ids ;; strace-vswitchd) shift -- cgit v1.2.1 From b524945e78804bf4ef46abb936eba829538f8e29 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 21 Sep 2010 14:27:51 -0700 Subject: tests: Add 5-second timeout to ovs-vsctl tests. Otherwise some of the tests may not terminate if something goes wrong. (Formerly, ovs-vsctl had a default timeout of 5 seconds, so this was not necessary before.) --- tests/ovs-vsctl.at | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ovs-vsctl.at b/tests/ovs-vsctl.at index 56fc1ce2d..66ead6dd3 100644 --- a/tests/ovs-vsctl.at +++ b/tests/ovs-vsctl.at @@ -15,17 +15,17 @@ dnl RUN_OVS_VSCTL(COMMAND, ...) dnl dnl Executes each ovs-vsctl COMMAND. m4_define([RUN_OVS_VSCTL], - [m4_foreach([command], [$@], [ovs-vsctl --no-wait -vreconnect:ANY:emer --db=unix:socket -- command + [m4_foreach([command], [$@], [ovs-vsctl --timeout=5 --no-wait -vreconnect:ANY:emer --db=unix:socket -- command ])]) m4_define([RUN_OVS_VSCTL_ONELINE], - [m4_foreach([command], [$@], [ovs-vsctl --no-wait -vreconnect:ANY:emer --db=unix:socket --oneline -- command + [m4_foreach([command], [$@], [ovs-vsctl --timeout=5 --no-wait -vreconnect:ANY:emer --db=unix:socket --oneline -- command ])]) dnl RUN_OVS_VSCTL_TOGETHER(COMMAND, ...) dnl dnl Executes each ovs-vsctl COMMAND in a single run of ovs-vsctl. m4_define([RUN_OVS_VSCTL_TOGETHER], - [ovs-vsctl --no-wait -vreconnect:ANY:emer --db=unix:socket --oneline dnl + [ovs-vsctl --timeout=5 --no-wait -vreconnect:ANY:emer --db=unix:socket --oneline dnl m4_foreach([command], [$@], [ -- command])]) dnl CHECK_BRIDGES([BRIDGE, PARENT, VLAN], ...) -- cgit v1.2.1 From aed133bf9bce8f35b666c3052907f525c803c83b Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 20 Sep 2010 10:56:15 -0700 Subject: ovs-vsctl: Allow "get" commands to create @names also. This is useful for adding records that refer to other records by UUID, e.g. ovs-vsctl \ -- set bridge br0 mirrors=@m \ -- --id=@eth0 get port eth0 \ -- --id=@eth0 get port eth1 \ -- --id=@m create mirror name=mymirror select-dst-port=@eth0 \ select-src-port=@eth0 output-port=@eth1 --- tests/ovs-vsctl.at | 49 ++++++++++++++++++++++++++++++++++++++++++++ utilities/ovs-vsctl.8.in | 8 ++++++-- utilities/ovs-vsctl.c | 53 +++++++++++++++++++++++++++++++----------------- 3 files changed, 89 insertions(+), 21 deletions(-) diff --git a/tests/ovs-vsctl.at b/tests/ovs-vsctl.at index 66ead6dd3..152a6585c 100644 --- a/tests/ovs-vsctl.at +++ b/tests/ovs-vsctl.at @@ -705,6 +705,55 @@ AT_CHECK([cat stdout4], [0], [500 OVS_VSCTL_CLEANUP AT_CLEANUP +AT_SETUP([--id option on create, get commands]) +AT_KEYWORDS([ovs-vsctl]) +OVS_VSCTL_SETUP +AT_CHECK([RUN_OVS_VSCTL([add-br br0], + [add-port br0 eth0], + [add-port br0 eth1])]) +AT_CHECK( + [RUN_OVS_VSCTL_TOGETHER( + [set bridge br0 mirrors=@m], + [--id=@eth0 get port eth0], + [--id=@eth1 get port eth1], + [--id=@m create mirror name=mymirror select-dst-port=@eth0 select-src-port=@eth0 output-port=@eth1])], + [0], [stdout], [], [OVS_VSCTL_CLEANUP]) +AT_CHECK( + [perl $srcdir/uuidfilt.pl stdout], [0], [dnl + + + +<0> +], + [], [OVS_VSCTL_CLEANUP]) +AT_CHECK( + [RUN_OVS_VSCTL( + [list port eth0 eth1], + [list mirror], + [list bridge br0])], + [0], [stdout], [], [OVS_VSCTL_CLEANUP]) +AT_CHECK( + [sed -n -e '/uuid/p' -e '/name/p' -e '/mirrors/p' -e '/select/p' -e '/output/p' < stdout | $srcdir/uuidfilt.pl], [0], [dnl +[_uuid : <0> +name : "eth0" +_uuid : <1> +name : "eth1" +_uuid : <2> +name : mymirror +output_port : <1> +output_vlan : [] +select_all : false +select_dst_port : [<0>] +select_src_port : [<0>] +select_vlan : [] +_uuid : <3> +mirrors : [<2>] +name : "br0" +]], + [], [OVS_VSCTL_CLEANUP]) +OVS_VSCTL_CLEANUP +AT_CLEANUP + dnl This test really shows a bug -- "create" followed by "list" in dnl the same execution shows the wrong UUID on the "list" command. dnl The bug is documented in ovs-vsctl.8. diff --git a/utilities/ovs-vsctl.8.in b/utilities/ovs-vsctl.8.in index ef080bb41..cfb911483 100644 --- a/utilities/ovs-vsctl.8.in +++ b/utilities/ovs-vsctl.8.in @@ -490,7 +490,7 @@ pair of double quotes (\fB""\fR). .IP "UUID" Either a universally unique identifier in the style of RFC 4122, e.g. \fBf81d4fae\-7dec\-11d0\-a765\-00a0c91e6bf6\fR, or an \fB@\fIname\fR -defined by the \fBcreate\fR command within the same \fBovs\-vsctl\fR +defined by a \fBget\fR or \fBcreate\fR command within the same \fBovs\-vsctl\fR invocation. .PP Multiple values in a single column may be separated by spaces or a @@ -518,7 +518,7 @@ records are specified, lists all the records in \fItable\fR. The UUIDs shown for rows created in the same \fBovs\-vsctl\fR invocation will be wrong. . -.IP "[\fB\-\-if\-exists\fR] \fBget \fItable record column\fR[\fB:\fIkey\fR]..." +.IP "[\fB\-\-id=@\fIname\fR] [\fB\-\-if\-exists\fR] \fBget \fItable record \fR[\fIcolumn\fR[\fB:\fIkey\fR]]..." Prints the value of each specified \fIcolumn\fR in the given \fIrecord\fR in \fItable\fR. For map columns, a \fIkey\fR may optionally be specified, in which case the value associated with @@ -528,6 +528,10 @@ For a map column, without \fB\-\-if\-exists\fR it is an error if \fIkey\fR does not exist; with it, a blank line is printed. If \fIcolumn\fR is not a map column or if \fIkey\fR is not specified, \fB\-\-if\-exists\fR has no effect. +.IP +If \fB@\fIname\fR is specified, then the UUID for \fIrecord\fR may be +referred to by that name later in the same \fBovs\-vsctl\fR +invocation in contexts where a UUID is expected. . .IP "\fBset \fItable record column\fR[\fB:\fIkey\fR]\fB=\fIvalue\fR..." Sets the value of each specified \fIcolumn\fR in the given diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c index 884a41faf..d09cf7460 100644 --- a/utilities/ovs-vsctl.c +++ b/utilities/ovs-vsctl.c @@ -1975,6 +1975,28 @@ get_column(const struct vsctl_table_class *table, const char *column_name, } } +static struct uuid * +create_symbol(struct ovsdb_symbol_table *symtab, const char *id, bool *newp) +{ + struct ovsdb_symbol *symbol; + + if (id[0] != '@') { + vsctl_fatal("row id \"%s\" does not begin with \"@\"", id); + } + + if (newp) { + *newp = ovsdb_symbol_table_get(symtab, id) == NULL; + } + + symbol = ovsdb_symbol_table_insert(symtab, id); + if (symbol->used) { + vsctl_fatal("row id \"%s\" may only be specified on one --id option", + id); + } + symbol->used = true; + return &symbol->uuid; +} + static char * missing_operator_error(const char *arg, const char **allowed_operators, size_t n_allowed) @@ -2142,6 +2164,7 @@ error: static void cmd_get(struct vsctl_context *ctx) { + const char *id = shash_find_data(&ctx->options, "--id"); bool if_exists = shash_find(&ctx->options, "--if-exists"); const char *table_name = ctx->argv[1]; const char *record_id = ctx->argv[2]; @@ -2152,6 +2175,15 @@ cmd_get(struct vsctl_context *ctx) table = get_table(table_name); row = must_get_row(ctx, table, record_id); + if (id) { + bool new; + + *create_symbol(ctx->symtab, id, &new) = row->uuid; + if (!new) { + vsctl_fatal("row id \"%s\" specified on \"get\" command was used " + "before it was defined", id); + } + } for (i = 3; i < ctx->argc; i++) { const struct ovsdb_idl_column *column; const struct ovsdb_datum *datum; @@ -2453,24 +2485,7 @@ cmd_create(struct vsctl_context *ctx) const struct uuid *uuid; int i; - if (id) { - struct ovsdb_symbol *symbol; - - if (id[0] != '@') { - vsctl_fatal("row id \"%s\" does not begin with \"@\"", id); - } - - symbol = ovsdb_symbol_table_insert(ctx->symtab, id); - if (symbol->used) { - vsctl_fatal("row id \"%s\" may only be used to insert a single " - "row", id); - } - symbol->used = true; - - uuid = &symbol->uuid; - } else { - uuid = NULL; - } + uuid = id ? create_symbol(ctx->symtab, id, NULL) : NULL; table = get_table(table_name); row = ovsdb_idl_txn_insert(ctx->txn, table->class, uuid); @@ -2865,7 +2880,7 @@ static const struct vsctl_command_syntax all_commands[] = { {"emer-reset", 0, 0, cmd_emer_reset, NULL, ""}, /* Parameter commands. */ - {"get", 3, INT_MAX, cmd_get, NULL, "--if-exists"}, + {"get", 2, INT_MAX, cmd_get, NULL, "--if-exists,--id="}, {"list", 1, INT_MAX, cmd_list, NULL, ""}, {"set", 3, INT_MAX, cmd_set, NULL, ""}, {"add", 4, INT_MAX, cmd_add, NULL, ""}, -- cgit v1.2.1 From bad973d7ae0ca5273b46c79b4bd9007e607f277c Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 20 Sep 2010 11:57:13 -0700 Subject: ovs-vsctl: Add many more usage examples. --- utilities/ovs-vsctl.8.in | 90 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/utilities/ovs-vsctl.8.in b/utilities/ovs-vsctl.8.in index cfb911483..281756c90 100644 --- a/utilities/ovs-vsctl.8.in +++ b/utilities/ovs-vsctl.8.in @@ -631,7 +631,97 @@ point to a new \fBQoS\fR record, which in turn points with its queue 0 to a new \fBQueue\fR record: .IP .B "ovs\-vsctl \-\- set port eth0 qos=@newqos \-\- \-\-id=@newqos create qos type=linux\-htb other\-config:max\-rate=1000000 queues:0=@newqueue \-\- \-\-id=@newqueue create queue other\-config:min\-rate=1000000 other\-config:max\-rate=1000000" +.SH "CONFIGURATION COOKBOOK" +.SS "Port Configuration" +.PP +Add an ``internal port'' \fBvlan10\fR to bridge \fBbr0\fR as a VLAN +access port for VLAN 10, and configure it with an IP address: +.IP +.B "ovs\-vsctl add\-port br0 vlan10 tag=10 \-\- set Interface vlan10 type=internal" +.IP +.B "ifconfig vlan10 192.168.0.123" . +.SS "Port Mirroring" +.PP +Mirror all packets received or sent on \fBeth0\fR or \fBeth1\fR onto +\fBeth2\fR, assuming that all of those ports exist on bridge \fBbr0\fR +(as a side-effect this causes any packets received on \fBeth2\fR to be +ignored): +.IP +.B "ovs\-vsctl \-\- set Bridge br0 mirrors=@m \(rs" +.IP +.B "\-\- \-\-id=@eth0 get Port eth0 \(rs" +.IP +.B "\-\- \-\-id=@eth1 get Port eth1 \(rs" +.IP +.B "\-\- \-\-id=@eth2 get Port eth2 \(rs" +.IP +.B "\-\- \-\-id=@m create Mirror name=mymirror select-dst-port=@eth0,@eth1 select-src-port=@eth0,@eth1 output-port=@eth2" +.PP +Remove the mirror created above from \fBbr0\fR and destroy the Mirror +record (to avoid having an unreferenced record in the database): +.IP +.B "ovs\-vsctl destroy Mirror mymirror \-\- clear Bridge br0 mirrors" +.SS "Quality of Service (QoS)" +.PP +Create a \fBlinux\-htb\fR QoS record that points to a few queues and +use it on \fBeth0\fR and \fBeth1\fR: +.IP +.B "ovs\-vsctl \-\- set Port eth0 qos=@newqos \(rs" +.IP +.B "\-\- set Port eth1 qos=@newqos \(rs" +.IP +.B "\-\- \-\-id=@newqos create QoS type=linux\-htb other\-config:max\-rate=1000000000 queues=0=@q0,1=@q1 \(rs" +.IP +.B "\-\- \-\-id=@q0 create Queue other\-config:min\-rate=100000000 other\-config:max\-rate=100000000 \(rs" +.IP +.B "\-\- \-\-id=@q1 create Queue other\-config:min\-rate=500000000" +.PP +Deconfigure the QoS record above from \fBeth1\fR only: +.IP +.B "ovs\-vsctl clear Port eth1 qos" +.PP +To deconfigure the QoS record from both \fBeth0\fR and \fBeth1\fR and +then delete the QoS record: +.IP +.B "ovs\-vsctl \-\- destroy QoS eth0 \-\- clear Port eth0 qos \-\- clear Port eth1 qos" +.PP +(This command will leave two unreferenced Queue records in the +database. To delete them, use "\fBovs\-vsctl list Queue\fR" to find +their UUIDs, then "\fBovs\-vsctl destroy Queue \fIuuid1\fR +\fIuuid2\fR" to destroy each of them.) +.SS "NetFlow" +.PP +Configure bridge \fBbr0\fR to send NetFlow records to UDP port 5566 on +host 192.168.0.34, with an active timeout of 30 seconds: +.IP +.B "ovs\-vsctl \-\- set Bridge br0 netflow=@nf \(rs" +.IP +.B "\-\- \-\-id=@nf create NetFlow targets=\(rs\(dq192.168.0.34:5566\(rs\(dq active\-timeout=30" +.PP +Update the NetFlow configuration created by the previous command to +instead use an active timeout of 60 seconds: +.IP +.B "ovs\-vsctl set NetFlow br0 active_timeout=60" +.PP +Deconfigure the NetFlow settings from \fBbr0\fR and delete the NetFlow +record (to avoid having an unreferenced record in the database): +.IP +.B "ovs\-vsctl destroy NetFlow br0 \-\- clear Bridge br0 netflow" +.SS "sFlow" +.PP +Configure bridge \fBbr0\fR to send sFlow records to a collector on +10.0.0.1 at port 6343, using \fBeth1\fR\'s IP address as the source, +with specific sampling parameters: +.IP +.B "ovs\-vsctl \-\- \-\-id=@s create sFlow agent=eth1 target=\(rs\(dq10.0.0.1:6343\(rs\(dq header=128 sampling=64 polling=10 \(rs" +.IP +.B "\-\- set Bridge br0 sflow=@s" +.PP +Deconfigure sFlow from br0 and destroy the sFlow record (to avoid +having an unreferenced record in the database): +.IP +.B "ovs\-vsctl \-\- destroy sFlow br0 \-\- clear Bridge br0 sflow" .SH "EXIT STATUS" .IP "0" Successful program execution. -- cgit v1.2.1 From 88ec924aba6e3301b97110994e7da18c2331faa7 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 22 Sep 2010 09:52:27 -0700 Subject: xenserver: Remove LICENSE header that is difficult to keep up to date. I had forgotten that I had added this header. Let's keep all the information about licensing in individual files instead. Signed-off-by: Ben Pfaff --- xenserver/LICENSE | 5 ----- 1 file changed, 5 deletions(-) diff --git a/xenserver/LICENSE b/xenserver/LICENSE index ce8949ef1..00fc4d8cb 100644 --- a/xenserver/LICENSE +++ b/xenserver/LICENSE @@ -1,8 +1,3 @@ -The files etc_xensource_scripts_vif and -opt_xensource_libexec_interface-reconfigure are distributed under the -terms of the GNU Lesser General Public License version 2.1 (included -below). - As a special exception to the GNU Lesser General Public License, you may link, statically or dynamically, a "work that uses the Library" with a publicly distributed version of the Library to produce an -- cgit v1.2.1 From 1d7ab9963c34b2f23608195b81312bafa4c01cd6 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 22 Sep 2010 13:14:37 -0700 Subject: xenserver: Add GPLv2 license text. xsconsole is being relicensed under GPLv2 so we need to include the text. It would be more usual to name this file COPYING and to name the LGPLv2.1 that is already named LICENSE as COPYING.LIB, but some of the files pulled in from XenServer say that their license is in a file named LICENSE. I don't expect that Citrix would be willing to change that, so it seems better to keep LGPLv2.1 named LICENSE. Signed-off-by: Ben Pfaff --- xenserver/GPLv2 | 339 ++++++++++++++++++++++++++++++++++++++++++++++++++ xenserver/automake.mk | 1 + 2 files changed, 340 insertions(+) create mode 100644 xenserver/GPLv2 diff --git a/xenserver/GPLv2 b/xenserver/GPLv2 new file mode 100644 index 000000000..d511905c1 --- /dev/null +++ b/xenserver/GPLv2 @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/xenserver/automake.mk b/xenserver/automake.mk index ffd59960b..85911bb56 100644 --- a/xenserver/automake.mk +++ b/xenserver/automake.mk @@ -6,6 +6,7 @@ # without warranty of any kind. EXTRA_DIST += \ + xenserver/GPLv2 \ xenserver/LICENSE \ xenserver/README \ xenserver/automake.mk \ -- cgit v1.2.1 From 9b35536a145361285d341f83bc6fc19408bb04e5 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 22 Sep 2010 09:32:58 -0700 Subject: xenserver: Change license of xsconsole plugin to GPLv2. This file was under a proprietary license because it was derived from proprietary XenServer code. That upstream code is now under GPLv2, so change the downstream code to GPLv2 also. Acked-by: Ian Campbell Signed-off-by: Ben Pfaff --- ...r_lib_xsconsole_plugins-base_XSFeatureVSwitch.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py b/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py index d2f6a6a3c..015f4cc00 100644 --- a/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py +++ b/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py @@ -1,11 +1,18 @@ -# Copyright (c) Citrix Systems 2008. All rights reserved. -# xsconsole is proprietary software. +# Copyright (c) 2007-2010 Citrix Systems Inc. +# Copyright (c) 2009,2010 Nicira Networks. # -# Xen, the Xen logo, XenCenter, XenMotion are trademarks or registered -# trademarks of Citrix Systems, Inc., in the United States and other -# countries. - -# Copyright (c) 2009, 2010 Nicira Networks. +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 only. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. from XSConsoleLog import * -- cgit v1.2.1 From ee5311097049272834308f64931172f1c8210755 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 22 Sep 2010 10:05:29 -0700 Subject: xenserver: Add explicit license to refresh-xs-network-uuids. I had assumed that a trivial one-line shell script didn't need an explicit license, but it seems that I was wrong. Signed-off-by: Ben Pfaff --- xenserver/usr_share_openvswitch_scripts_refresh-xs-network-uuids | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/xenserver/usr_share_openvswitch_scripts_refresh-xs-network-uuids b/xenserver/usr_share_openvswitch_scripts_refresh-xs-network-uuids index 42ebe06e7..35df06da4 100755 --- a/xenserver/usr_share_openvswitch_scripts_refresh-xs-network-uuids +++ b/xenserver/usr_share_openvswitch_scripts_refresh-xs-network-uuids @@ -1,2 +1,9 @@ #! /bin/sh +# Copyright (C) 2009, 2010 Nicira Networks, Inc. +# +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notice and this notice are preserved. This file is offered as-is, +# without warranty of any kind. + exec /opt/xensource/libexec/interface-reconfigure rewrite -- cgit v1.2.1 From 6787c8cd1c5a4882e9370d3d0dad4590f781ae42 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 22 Sep 2010 10:03:31 -0700 Subject: xenserver: Add license to uuid.py. There seemed to be some confusion regarding this file's provenance, so it is best to clarify. Signed-off-by: Ben Pfaff --- xenserver/uuid.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/xenserver/uuid.py b/xenserver/uuid.py index ae3da25ca..599ece9ea 100644 --- a/xenserver/uuid.py +++ b/xenserver/uuid.py @@ -1,3 +1,56 @@ +# This file is from Python 2.5. It has been modified by adding this +# license header, which is copied from the LICENSE file distributed +# with Python. +# +# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +# -------------------------------------------- +# +# 1. This LICENSE AGREEMENT is between the Python Software Foundation +# ("PSF"), and the Individual or Organization ("Licensee") accessing and +# otherwise using this software ("Python") in source or binary form and +# its associated documentation. +# +# 2. Subject to the terms and conditions of this License Agreement, PSF +# hereby grants Licensee a nonexclusive, royalty-free, world-wide +# license to reproduce, analyze, test, perform and/or display publicly, +# prepare derivative works, distribute, and otherwise use Python +# alone or in any derivative version, provided, however, that PSF's +# License Agreement and PSF's notice of copyright, i.e., "Copyright (c) +# 2001, 2002, 2003, 2004, 2005, 2006, 2007 Python Software Foundation; +# All Rights Reserved" are retained in Python alone or in any derivative +# version prepared by Licensee. +# +# 3. In the event Licensee prepares a derivative work that is based on +# or incorporates Python or any part thereof, and wants to make +# the derivative work available to others as provided herein, then +# Licensee hereby agrees to include in any such work a brief summary of +# the changes made to Python. +# +# 4. PSF is making Python available to Licensee on an "AS IS" +# basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +# IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +# INFRINGE ANY THIRD PARTY RIGHTS. +# +# 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +# FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +# A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +# OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. +# +# 6. This License Agreement will automatically terminate upon a material +# breach of its terms and conditions. +# +# 7. Nothing in this License Agreement shall be deemed to create any +# relationship of agency, partnership, or joint venture between PSF and +# Licensee. This License Agreement does not grant permission to use PSF +# trademarks or trade name in a trademark sense to endorse or promote +# products or services of Licensee, or any third party. +# +# 8. By copying, installing or otherwise using Python, Licensee +# agrees to be bound by the terms and conditions of this License +# Agreement. + r"""UUID objects (universally unique identifiers) according to RFC 4122. This module provides immutable UUID objects (class UUID) and the functions -- cgit v1.2.1 From 02892690e6866781a0a6c84e25a2c1d277822788 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 22 Sep 2010 13:16:07 -0700 Subject: xenserver: Now everything is free. Signed-off-by: Ben Pfaff --- COPYING | 3 +-- xenserver/README | 7 ++----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/COPYING b/COPYING index 375efecae..8d1bc9d51 100644 --- a/COPYING +++ b/COPYING @@ -20,8 +20,7 @@ Files under the datapath directory are licensed under the GNU General Public License, version 2. Files under the xenserver directory are licensed on a file-by-file -basis. Some files are under an uncertain license that may not be -DFSG-compliant or GPL-compatible. Refer to each file for details. +basis. Refer to each file for details. Files lib/sflow*.[ch] are licensed under the terms of the InMon sFlow licence that is available at: diff --git a/xenserver/README b/xenserver/README index 9fcdb1a3d..941c4e1e7 100644 --- a/xenserver/README +++ b/xenserver/README @@ -1,11 +1,8 @@ This directory contains files for seamless integration of Open vSwitch on Citrix XenServer hosts managed by the Citrix management tools. -Some of these files are modifications of Citrix's proprietary code. -Citrix has given permission to distribute these modified files. -Citrix has not specified a particular license for them. There is no -guarantee that, should Citrix specify a license, that it would be -DFSG-compliant or GPL-compatible. +Files in this directory are licensed on a file-by-file basis. Please +refer to each file for details. Most of the files in this directory is installed on a XenServer system under the same name, if underscores are replaced by slashes. The -- cgit v1.2.1 From 560e802229f3028c02273435dd1c6efba33e0949 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 26 Jul 2010 18:46:27 -0700 Subject: datapath: Move flow allocation into a function. As the process to allocate a flow becomes more involved it becomes more cumbersome for the code to be mixed in with the general datapath so split it out into a new function. Signed-off-by: Jesse Gross Reviewed-by: Ben Pfaff --- datapath/datapath.c | 10 +++++----- datapath/flow.c | 28 +++++++++++++++++++++++----- datapath/flow.h | 14 ++++++++------ 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index b3f77b36a..1677927ff 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -1049,12 +1049,12 @@ static int do_put_flow(struct datapath *dp, struct odp_flow_put *uf, } /* Allocate flow. */ - error = -ENOMEM; - flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); - if (flow == NULL) + flow = flow_alloc(); + if (IS_ERR(flow)) { + error = PTR_ERR(flow); goto error; + } flow->key = uf->flow.key; - spin_lock_init(&flow->lock); clear_stats(flow); /* Obtain actions. */ @@ -1109,7 +1109,7 @@ static int do_put_flow(struct datapath *dp, struct odp_flow_put *uf, error_free_flow_acts: kfree(flow->sf_acts); error_free_flow: - kmem_cache_free(flow_cache, flow); + flow_free(flow); error: return error; } diff --git a/datapath/flow.c b/datapath/flow.c index 7684c061a..1f01166c5 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -123,27 +123,45 @@ struct sw_flow_actions *flow_actions_alloc(size_t n_actions) return sfa; } +struct sw_flow *flow_alloc(void) +{ + struct sw_flow *flow; + + flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); + if (!flow) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&flow->lock); -/* Frees 'flow' immediately. */ -static void flow_free(struct sw_flow *flow) + return flow; +} + +void flow_free(struct sw_flow *flow) { if (unlikely(!flow)) return; - kfree(flow->sf_acts); + kmem_cache_free(flow_cache, flow); } +/* Frees the entire 'flow' (both base and actions) immediately. */ +static void flow_free_full(struct sw_flow *flow) +{ + kfree(flow->sf_acts); + flow_free(flow); +} + void flow_free_tbl(struct tbl_node *node) { struct sw_flow *flow = flow_cast(node); - flow_free(flow); + flow_free_full(flow); } /* RCU callback used by flow_deferred_free. */ static void rcu_free_flow_callback(struct rcu_head *rcu) { struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); - flow_free(flow); + flow_free_full(flow); } /* Schedules 'flow' to be freed after the next RCU grace period. diff --git a/datapath/flow.h b/datapath/flow.h index 80a5b66b1..484ca1207 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -58,20 +58,22 @@ struct arp_eth_header unsigned char ar_tip[4]; /* target IP address */ } __attribute__((packed)); -extern struct kmem_cache *flow_cache; +int flow_init(void); +void flow_exit(void); -struct sw_flow_actions *flow_actions_alloc(size_t n_actions); +struct sw_flow *flow_alloc(void); +void flow_free(struct sw_flow *flow); void flow_deferred_free(struct sw_flow *); +void flow_free_tbl(struct tbl_node *); + +struct sw_flow_actions *flow_actions_alloc(size_t n_actions); void flow_deferred_free_acts(struct sw_flow_actions *); + int flow_extract(struct sk_buff *, u16 in_port, struct odp_flow_key *); void flow_used(struct sw_flow *, struct sk_buff *); u32 flow_hash(const struct odp_flow_key *key); int flow_cmp(const struct tbl_node *, void *target); -void flow_free_tbl(struct tbl_node *); - -int flow_init(void); -void flow_exit(void); static inline struct sw_flow *flow_cast(const struct tbl_node *node) { -- cgit v1.2.1 From fb8c93473efacd67a50117d0f2a3084f2d96ceca Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 29 Aug 2010 09:49:51 -0700 Subject: datapath: Add ref counting for flows. Currently flows are only used within the confines of one rcu_read_lock()/rcu_read_unlock() session. However, with the addition of header caching we will need to hold references to flows for longer periods of time. This adds support for that by adding refcounts to flows. RCU is still used for normal packet handling to avoid a performance impact from constantly updating the refcount. However, instead of directly freeing the flow after a grace period we simply decrement the refcount. Signed-off-by: Jesse Gross Reviewed-by: Ben Pfaff --- datapath/datapath.c | 3 ++- datapath/flow.c | 41 ++++++++++++++++++++++++----------------- datapath/flow.h | 7 ++++++- 3 files changed, 32 insertions(+), 19 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index 1677927ff..06e1006a8 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -1109,7 +1109,8 @@ static int do_put_flow(struct datapath *dp, struct odp_flow_put *uf, error_free_flow_acts: kfree(flow->sf_acts); error_free_flow: - flow_free(flow); + flow->sf_acts = NULL; + flow_put(flow); error: return error; } diff --git a/datapath/flow.c b/datapath/flow.c index 1f01166c5..dfbf76938 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -132,36 +132,27 @@ struct sw_flow *flow_alloc(void) return ERR_PTR(-ENOMEM); spin_lock_init(&flow->lock); + atomic_set(&flow->refcnt, 1); + flow->dead = false; return flow; } -void flow_free(struct sw_flow *flow) -{ - if (unlikely(!flow)) - return; - - kmem_cache_free(flow_cache, flow); -} - -/* Frees the entire 'flow' (both base and actions) immediately. */ -static void flow_free_full(struct sw_flow *flow) -{ - kfree(flow->sf_acts); - flow_free(flow); -} - void flow_free_tbl(struct tbl_node *node) { struct sw_flow *flow = flow_cast(node); - flow_free_full(flow); + + flow->dead = true; + flow_put(flow); } /* RCU callback used by flow_deferred_free. */ static void rcu_free_flow_callback(struct rcu_head *rcu) { struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); - flow_free_full(flow); + + flow->dead = true; + flow_put(flow); } /* Schedules 'flow' to be freed after the next RCU grace period. @@ -171,6 +162,22 @@ void flow_deferred_free(struct sw_flow *flow) call_rcu(&flow->rcu, rcu_free_flow_callback); } +void flow_hold(struct sw_flow *flow) +{ + atomic_inc(&flow->refcnt); +} + +void flow_put(struct sw_flow *flow) +{ + if (unlikely(!flow)) + return; + + if (atomic_dec_and_test(&flow->refcnt)) { + kfree(flow->sf_acts); + kmem_cache_free(flow_cache, flow); + } +} + /* RCU callback used by flow_deferred_free_acts. */ static void rcu_free_acts_callback(struct rcu_head *rcu) { diff --git a/datapath/flow.h b/datapath/flow.h index 484ca1207..3f434677b 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -36,6 +36,9 @@ struct sw_flow { struct odp_flow_key key; struct sw_flow_actions *sf_acts; + atomic_t refcnt; + bool dead; + spinlock_t lock; /* Lock for values below. */ unsigned long used; /* Last used time (in jiffies). */ u64 packet_count; /* Number of packets matched. */ @@ -62,13 +65,15 @@ int flow_init(void); void flow_exit(void); struct sw_flow *flow_alloc(void); -void flow_free(struct sw_flow *flow); void flow_deferred_free(struct sw_flow *); void flow_free_tbl(struct tbl_node *); struct sw_flow_actions *flow_actions_alloc(size_t n_actions); void flow_deferred_free_acts(struct sw_flow_actions *); +void flow_hold(struct sw_flow *); +void flow_put(struct sw_flow *); + int flow_extract(struct sk_buff *, u16 in_port, struct odp_flow_key *); void flow_used(struct sw_flow *, struct sk_buff *); -- cgit v1.2.1 From 3976f6d57b1134c5c3ed054c9da4aa6786fbf5bf Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 29 Aug 2010 10:49:11 -0700 Subject: datapath: Enable usage of cached flows. An upcoming commit will add support for supplying cached flows for packets entering the datapath. This adds the code in the datapath itself to recognize these cached flows and use them instead of extracting the flow fields and doing a lookup. Signed-off-by: Jesse Gross Reviewed-by: Ben Pfaff --- datapath/datapath.c | 53 +++++++++++++++++++++++-------------------- datapath/datapath.h | 6 +++-- datapath/vport-internal_dev.c | 3 ++- datapath/vport.c | 3 +++ datapath/vport.h | 3 ++- 5 files changed, 40 insertions(+), 28 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index 06e1006a8..390acc8a4 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -543,40 +543,44 @@ void dp_process_received_packet(struct dp_port *p, struct sk_buff *skb) struct datapath *dp = p->dp; struct dp_stats_percpu *stats; int stats_counter_off; - struct odp_flow_key key; - struct tbl_node *flow_node; - struct sw_flow *flow; struct sw_flow_actions *acts; struct loop_counter *loop; int error; OVS_CB(skb)->dp_port = p; - /* Extract flow from 'skb' into 'key'. */ - error = flow_extract(skb, p ? p->port_no : ODPP_NONE, &key); - if (unlikely(error)) { - kfree_skb(skb); - return; - } + if (!OVS_CB(skb)->flow) { + struct odp_flow_key key; + struct tbl_node *flow_node; - if (OVS_CB(skb)->is_frag && dp->drop_frags) { - kfree_skb(skb); - stats_counter_off = offsetof(struct dp_stats_percpu, n_frags); - goto out; - } + /* Extract flow from 'skb' into 'key'. */ + error = flow_extract(skb, p ? p->port_no : ODPP_NONE, &key); + if (unlikely(error)) { + kfree_skb(skb); + return; + } - /* Look up flow. */ - flow_node = tbl_lookup(rcu_dereference(dp->table), &key, flow_hash(&key), flow_cmp); - if (unlikely(!flow_node)) { - dp_output_control(dp, skb, _ODPL_MISS_NR, OVS_CB(skb)->tun_id); - stats_counter_off = offsetof(struct dp_stats_percpu, n_missed); - goto out; + if (OVS_CB(skb)->is_frag && dp->drop_frags) { + kfree_skb(skb); + stats_counter_off = offsetof(struct dp_stats_percpu, n_frags); + goto out; + } + + /* Look up flow. */ + flow_node = tbl_lookup(rcu_dereference(dp->table), &key, + flow_hash(&key), flow_cmp); + if (unlikely(!flow_node)) { + dp_output_control(dp, skb, _ODPL_MISS_NR, OVS_CB(skb)->tun_id); + stats_counter_off = offsetof(struct dp_stats_percpu, n_missed); + goto out; + } + + OVS_CB(skb)->flow = flow_cast(flow_node); } - flow = flow_cast(flow_node); - flow_used(flow, skb); + flow_used(OVS_CB(skb)->flow, skb); - acts = rcu_dereference(flow->sf_acts); + acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts); /* Check whether we've looped too much. */ loop = &get_cpu_var(dp_loop_counters).counters[!!in_interrupt()]; @@ -588,7 +592,8 @@ void dp_process_received_packet(struct dp_port *p, struct sk_buff *skb) } /* Execute actions. */ - execute_actions(dp, skb, &key, acts->actions, acts->n_actions, GFP_ATOMIC); + execute_actions(dp, skb, &OVS_CB(skb)->flow->key, acts->actions, + acts->n_actions, GFP_ATOMIC); stats_counter_off = offsetof(struct dp_stats_percpu, n_hit); /* Check whether sub-actions looped too much. */ diff --git a/datapath/datapath.h b/datapath/datapath.h index abc6aeab2..dacc3a42c 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -146,17 +146,19 @@ enum csum_type { /** * struct ovs_skb_cb - OVS data in skb CB * @dp_port: The datapath port on which the skb entered the switch. + * @flow: The flow associated with this packet. May be %NULL if no flow. + * @is_frag: %true if this packet is an IPv4 fragment, %false otherwise. * @ip_summed: Consistently stores L4 checksumming status across different * kernel versions. * @tun_id: ID (in network byte order) of the tunnel that encapsulated this * packet. It is 0 if the packet was not received on a tunnel. - * @is_frag: %true if this packet is an IPv4 fragment, %false otherwise. */ struct ovs_skb_cb { struct dp_port *dp_port; + struct sw_flow *flow; + bool is_frag; enum csum_type ip_summed; __be32 tun_id; - bool is_frag; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) diff --git a/datapath/vport-internal_dev.c b/datapath/vport-internal_dev.c index 6cbfdf80d..514d00cb3 100644 --- a/datapath/vport-internal_dev.c +++ b/datapath/vport-internal_dev.c @@ -82,6 +82,7 @@ static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) skb_reset_mac_header(skb); compute_ip_summed(skb, true); + OVS_CB(skb)->flow = NULL; vport_receive(vport, skb); @@ -293,7 +294,7 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) struct vport_ops internal_vport_ops = { .type = "internal", - .flags = VPORT_F_REQUIRED | VPORT_F_GEN_STATS, + .flags = VPORT_F_REQUIRED | VPORT_F_GEN_STATS | VPORT_F_FLOW, .create = internal_dev_create, .destroy = internal_dev_destroy, .attach = internal_dev_attach, diff --git a/datapath/vport.c b/datapath/vport.c index 91b650e54..6c8eb0845 100644 --- a/datapath/vport.c +++ b/datapath/vport.c @@ -1217,6 +1217,9 @@ void vport_receive(struct vport *vport, struct sk_buff *skb) local_bh_enable(); } + if (!(vport->ops->flags & VPORT_F_FLOW)) + OVS_CB(skb)->flow = NULL; + if (!(vport->ops->flags & VPORT_F_TUN_ID)) OVS_CB(skb)->tun_id = 0; diff --git a/datapath/vport.h b/datapath/vport.h index fca5f1abe..30b0cc6b3 100644 --- a/datapath/vport.h +++ b/datapath/vport.h @@ -112,7 +112,8 @@ struct vport { #define VPORT_F_REQUIRED (1 << 0) /* If init fails, module loading fails. */ #define VPORT_F_GEN_STATS (1 << 1) /* Track stats at the generic layer. */ -#define VPORT_F_TUN_ID (1 << 2) /* Sets OVS_CB(skb)->tun_id. */ +#define VPORT_F_FLOW (1 << 2) /* Sets OVS_CB(skb)->flow. */ +#define VPORT_F_TUN_ID (1 << 3) /* Sets OVS_CB(skb)->tun_id. */ /** * struct vport_ops - definition of a type of virtual port -- cgit v1.2.1 From b7a31ec13d0617868378d39a72beb4c4ffcb7e5c Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 29 Aug 2010 14:28:58 -0700 Subject: datapath: Move is_frag out of struct ovs_skb_cb. is_frag is only used for communication between two functions, which means that it doesn't really need to be in the SKB CB. This wouldn't necessarily be a problem except that there are also a number of other paths that lead to this being uninitialized. This isn't a problem now but uninitialized memory seems dangerous and there isn't much upside. Signed-off-by: Jesse Gross Reviewed-by: Ben Pfaff --- datapath/datapath.c | 8 +++++--- datapath/datapath.h | 2 -- datapath/flow.c | 14 +++++++------- datapath/flow.h | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index 390acc8a4..5996d6ed7 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -552,15 +552,16 @@ void dp_process_received_packet(struct dp_port *p, struct sk_buff *skb) if (!OVS_CB(skb)->flow) { struct odp_flow_key key; struct tbl_node *flow_node; + bool is_frag; /* Extract flow from 'skb' into 'key'. */ - error = flow_extract(skb, p ? p->port_no : ODPP_NONE, &key); + error = flow_extract(skb, p ? p->port_no : ODPP_NONE, &key, &is_frag); if (unlikely(error)) { kfree_skb(skb); return; } - if (OVS_CB(skb)->is_frag && dp->drop_frags) { + if (is_frag && dp->drop_frags) { kfree_skb(skb); stats_counter_off = offsetof(struct dp_stats_percpu, n_frags); goto out; @@ -1325,6 +1326,7 @@ static int do_execute(struct datapath *dp, const struct odp_execute *execute) struct sk_buff *skb; struct sw_flow_actions *actions; struct ethhdr *eth; + bool is_frag; int err; err = -EINVAL; @@ -1372,7 +1374,7 @@ static int do_execute(struct datapath *dp, const struct odp_execute *execute) else skb->protocol = htons(ETH_P_802_2); - err = flow_extract(skb, execute->in_port, &key); + err = flow_extract(skb, execute->in_port, &key, &is_frag); if (err) goto error_free_skb; diff --git a/datapath/datapath.h b/datapath/datapath.h index dacc3a42c..f28513bb7 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -147,7 +147,6 @@ enum csum_type { * struct ovs_skb_cb - OVS data in skb CB * @dp_port: The datapath port on which the skb entered the switch. * @flow: The flow associated with this packet. May be %NULL if no flow. - * @is_frag: %true if this packet is an IPv4 fragment, %false otherwise. * @ip_summed: Consistently stores L4 checksumming status across different * kernel versions. * @tun_id: ID (in network byte order) of the tunnel that encapsulated this @@ -156,7 +155,6 @@ enum csum_type { struct ovs_skb_cb { struct dp_port *dp_port; struct sw_flow *flow; - bool is_frag; enum csum_type ip_summed; __be32 tun_id; }; diff --git a/datapath/flow.c b/datapath/flow.c index dfbf76938..1aa6e291b 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -267,7 +267,8 @@ static __be16 parse_ethertype(struct sk_buff *skb) * Sets OVS_CB(skb)->is_frag to %true if @skb is an IPv4 fragment, otherwise to * %false. */ -int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key) +int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key, + bool *is_frag) { struct ethhdr *eth; @@ -275,7 +276,7 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key) key->tun_id = OVS_CB(skb)->tun_id; key->in_port = in_port; key->dl_vlan = htons(ODP_VLAN_NONE); - OVS_CB(skb)->is_frag = false; + *is_frag = false; /* * We would really like to pull as many bytes as we could possibly @@ -356,9 +357,9 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key) key->tp_dst = htons(icmp->code); } } - } else { - OVS_CB(skb)->is_frag = true; - } + } else + *is_frag = true; + } else if (key->dl_type == htons(ETH_P_ARP) && arphdr_ok(skb)) { struct arp_eth_header *arp; @@ -370,9 +371,8 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct odp_flow_key *key) && arp->ar_pln == 4) { /* We only match on the lower 8 bits of the opcode. */ - if (ntohs(arp->ar_op) <= 0xff) { + if (ntohs(arp->ar_op) <= 0xff) key->nw_proto = ntohs(arp->ar_op); - } if (key->nw_proto == ARPOP_REQUEST || key->nw_proto == ARPOP_REPLY) { diff --git a/datapath/flow.h b/datapath/flow.h index 3f434677b..25b720449 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -74,7 +74,7 @@ void flow_deferred_free_acts(struct sw_flow_actions *); void flow_hold(struct sw_flow *); void flow_put(struct sw_flow *); -int flow_extract(struct sk_buff *, u16 in_port, struct odp_flow_key *); +int flow_extract(struct sk_buff *, u16 in_port, struct odp_flow_key *, bool *is_frag); void flow_used(struct sw_flow *, struct sk_buff *); u32 flow_hash(const struct odp_flow_key *key); -- cgit v1.2.1 From 7c79397fe8ff52eb9cfe4d32a9d1f8b2f23c45c8 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Tue, 31 Aug 2010 15:38:25 -0700 Subject: datapath: Backport workqueue functions. An upcoming commit will use some workqueue functions that weren't available on earlier kernels, so this backports those functions. The backporting uses timers instead of delayed work queues because the earlier versions of work queues have some unsafe corner cases. In addition, this removes some unused work queue backporting code that is no longer used because it is potentially unsafe. Note that this commit changes the behavior of work queues: normally they run in process context but the backported version runs in softirq context. Signed-off-by: Jesse Gross Reviewed-by: Ben Pfaff --- datapath/datapath.c | 1 - .../linux-2.6/compat-2.6/include/linux/workqueue.h | 65 +++++++++++----------- 2 files changed, 32 insertions(+), 34 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index 5996d6ed7..e9f30f8a8 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include diff --git a/datapath/linux-2.6/compat-2.6/include/linux/workqueue.h b/datapath/linux-2.6/compat-2.6/include/linux/workqueue.h index 1ac3b6ecb..01c6345e9 100644 --- a/datapath/linux-2.6/compat-2.6/include/linux/workqueue.h +++ b/datapath/linux-2.6/compat-2.6/include/linux/workqueue.h @@ -4,39 +4,38 @@ #include_next #include -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) - -#ifdef __KERNEL__ -/* - * initialize a work-struct's func and data pointers: - */ -#undef PREPARE_WORK -#define PREPARE_WORK(_work, _func) \ - do { \ - (_work)->func = (void(*)(void*)) _func; \ - (_work)->data = _work; \ - } while (0) - -/* - * initialize all of a work-struct: +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + +/* Older kernels have an implementation of work queues with some very bad + * characteristics when trying to cancel work (potential deadlocks, use after + * free, etc. Here we directly use timers instead for delayed work. It's not + * optimal but it is better than the alternative. Note that work queues + * normally run in process context but this will cause them to operate in + * softirq context. */ -#undef INIT_WORK -#define INIT_WORK(_work, _func) \ - do { \ - INIT_LIST_HEAD(&(_work)->entry); \ - (_work)->pending = 0; \ - PREPARE_WORK((_work), (_func)); \ - init_timer(&(_work)->timer); \ - } while (0) - -#endif /* __KERNEL__ */ - -#endif /* linux kernel < 2.6.20 */ - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) -/* There is no equivalent to cancel_work_sync() so just flush all - * pending work. */ -#define cancel_work_sync(_work) flush_scheduled_work() -#endif + +#include + +#undef DECLARE_DELAYED_WORK +#define DECLARE_DELAYED_WORK(n, f) \ + struct timer_list n = TIMER_INITIALIZER((void (*)(unsigned long))f, 0, 0) + +#define schedule_delayed_work rpl_schedule_delayed_work +static inline int schedule_delayed_work(struct timer_list *timer, unsigned long delay) +{ + if (timer_pending(timer)) + return 0; + + mod_timer(timer, jiffies + delay); + return 1; +} + +#define cancel_delayed_work_sync rpl_cancel_delayed_work_sync +static inline int cancel_delayed_work_sync(struct timer_list *timer) +{ + return del_timer_sync(timer); +} + +#endif /* kernel version < 2.6.23 */ #endif -- cgit v1.2.1 From 842cf6f472b236b6e61be04b41970116245b1759 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 27 Aug 2010 13:55:02 -0700 Subject: datapath: Add tunnel header caching. On the transmit path we generate essentially the same tunnel header for every packet to a given destination. However, each packet must have the headers assembled in pieces, lookup the destination in the routing table, and lookup the flow in OVS. This avoids that extra work by caching all of the header and output path information and only rebuilding it when something actually changes. This optimization reduces CPU load on transmit by approximately 13%. Signed-off-by: Jesse Gross Reviewed-by: Ben Pfaff --- datapath/tunnel.c | 985 ++++++++++++++++++++++++++++++------------- datapath/tunnel.h | 153 +++++-- datapath/vport-capwap.c | 68 +-- datapath/vport-gre.c | 59 +-- include/openvswitch/tunnel.h | 1 + 5 files changed, 899 insertions(+), 367 deletions(-) diff --git a/datapath/tunnel.c b/datapath/tunnel.c index 6fa369be0..77f976fdc 100644 --- a/datapath/tunnel.c +++ b/datapath/tunnel.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -33,10 +34,45 @@ #include "tunnel.h" #include "vport.h" #include "vport-generic.h" +#include "vport-internal_dev.h" + +#ifdef NEED_CACHE_TIMEOUT +/* + * On kernels where we can't quickly detect changes in the rest of the system + * we use an expiration time to invalidate the cache. A shorter expiration + * reduces the length of time that we may potentially blackhole packets while + * a longer time increases performance by reducing the frequency that the + * cache needs to be rebuilt. A variety of factors may cause the cache to be + * invalidated before the expiration time but this is the maximum. The time + * is expressed in jiffies. + */ +#define MAX_CACHE_EXP HZ +#endif + +/* + * Interval to check for and remove caches that are no longer valid. Caches + * are checked for validity before they are used for packet encapsulation and + * old caches are removed at that time. However, if no packets are sent through + * the tunnel then the cache will never be destroyed. Since it holds + * references to a number of system objects, the cache will continue to use + * system resources by not allowing those objects to be destroyed. The cache + * cleaner is periodically run to free invalid caches. It does not + * significantly affect system performance. A lower interval will release + * resources faster but will itself consume resources by requiring more frequent + * checks. A longer interval may result in messages being printed to the kernel + * message buffer about unreleased resources. The interval is expressed in + * jiffies. + */ +#define CACHE_CLEANER_INTERVAL (5 * HZ) + +#define CACHE_DATA_ALIGN 16 /* Protected by RCU. */ static struct tbl *port_table; +static void cache_cleaner(struct work_struct *work); +DECLARE_DELAYED_WORK(cache_cleaner_wq, cache_cleaner); + /* * These are just used as an optimization: they don't require any kind of * synchronization because we could have just as easily read the value before @@ -63,22 +99,54 @@ static inline struct tnl_vport *tnl_vport_table_cast(const struct tbl_node *node return container_of(node, struct tnl_vport, tbl_node); } -/* RCU callback. */ -static void free_config(struct rcu_head *rcu) +static inline void schedule_cache_cleaner(void) +{ + schedule_delayed_work(&cache_cleaner_wq, CACHE_CLEANER_INTERVAL); +} + +static void free_cache(struct tnl_cache *cache) +{ + if (!cache) + return; + + flow_put(cache->flow); + ip_rt_put(cache->rt); + kfree(cache); +} + +static void free_config_rcu(struct rcu_head *rcu) { struct tnl_mutable_config *c = container_of(rcu, struct tnl_mutable_config, rcu); kfree(c); } +static void free_cache_rcu(struct rcu_head *rcu) +{ + struct tnl_cache *c = container_of(rcu, struct tnl_cache, rcu); + free_cache(c); +} + static void assign_config_rcu(struct vport *vport, struct tnl_mutable_config *new_config) { struct tnl_vport *tnl_vport = tnl_vport_priv(vport); struct tnl_mutable_config *old_config; - old_config = rcu_dereference(tnl_vport->mutable); + old_config = tnl_vport->mutable; rcu_assign_pointer(tnl_vport->mutable, new_config); - call_rcu(&old_config->rcu, free_config); + call_rcu(&old_config->rcu, free_config_rcu); +} + +static void assign_cache_rcu(struct vport *vport, struct tnl_cache *new_cache) +{ + struct tnl_vport *tnl_vport = tnl_vport_priv(vport); + struct tnl_cache *old_cache; + + old_cache = tnl_vport->cache; + rcu_assign_pointer(tnl_vport->cache, new_cache); + + if (old_cache) + call_rcu(&old_cache->rcu, free_cache_rcu); } static unsigned int *find_port_pool(const struct tnl_mutable_config *mutable) @@ -130,10 +198,32 @@ static u32 port_hash(struct port_lookup_key *lookup) return jhash2(lookup->vals, ARRAY_SIZE(lookup->vals), 0); } +static u32 mutable_hash(const struct tnl_mutable_config *mutable) +{ + struct port_lookup_key lookup; + + lookup.vals[LOOKUP_SADDR] = mutable->port_config.saddr; + lookup.vals[LOOKUP_DADDR] = mutable->port_config.daddr; + lookup.vals[LOOKUP_KEY] = mutable->port_config.in_key; + lookup.vals[LOOKUP_TUNNEL_TYPE] = mutable->tunnel_type; + + return port_hash(&lookup); +} + +static void check_table_empty(void) +{ + if (tbl_count(port_table) == 0) { + struct tbl *old_table = port_table; + + cancel_delayed_work_sync(&cache_cleaner_wq); + rcu_assign_pointer(port_table, NULL); + tbl_deferred_destroy(old_table, NULL); + } +} + static int add_port(struct vport *vport) { struct tnl_vport *tnl_vport = tnl_vport_priv(vport); - struct port_lookup_key lookup; int err; if (!port_table) { @@ -144,6 +234,7 @@ static int add_port(struct vport *vport) return -ENOMEM; rcu_assign_pointer(port_table, new_table); + schedule_cache_cleaner(); } else if (tbl_count(port_table) > tbl_n_buckets(port_table)) { struct tbl *old_table = port_table; @@ -157,16 +248,44 @@ static int add_port(struct vport *vport) tbl_deferred_destroy(old_table, NULL); } - lookup.vals[LOOKUP_SADDR] = tnl_vport->mutable->port_config.saddr; - lookup.vals[LOOKUP_DADDR] = tnl_vport->mutable->port_config.daddr; - lookup.vals[LOOKUP_KEY] = tnl_vport->mutable->port_config.in_key; - lookup.vals[LOOKUP_TUNNEL_TYPE] = tnl_vport->mutable->tunnel_type; + err = tbl_insert(port_table, &tnl_vport->tbl_node, mutable_hash(tnl_vport->mutable)); + if (err) { + check_table_empty(); + return err; + } + + (*find_port_pool(tnl_vport->mutable))++; + + return 0; +} + +static int move_port(struct vport *vport, struct tnl_mutable_config *new_mutable) +{ + int err; + struct tnl_vport *tnl_vport = tnl_vport_priv(vport); + u32 hash; + + hash = mutable_hash(new_mutable); + if (hash == tnl_vport->tbl_node.hash) + goto table_updated; - err = tbl_insert(port_table, &tnl_vport->tbl_node, port_hash(&lookup)); + /* + * Ideally we should make this move atomic to avoid having gaps in + * finding tunnels or the possibility of failure. However, if we do + * find a tunnel it will always be consistent. + */ + err = tbl_remove(port_table, &tnl_vport->tbl_node); if (err) return err; - (*find_port_pool(tnl_vport->mutable))++; + err = tbl_insert(port_table, &tnl_vport->tbl_node, hash); + if (err) { + check_table_empty(); + return err; + } + +table_updated: + assign_config_rcu(vport, new_mutable); return 0; } @@ -180,6 +299,7 @@ static int del_port(struct vport *vport) if (err) return err; + check_table_empty(); (*find_port_pool(tnl_vport->mutable))--; return 0; @@ -193,7 +313,7 @@ struct vport *tnl_find_port(__be32 saddr, __be32 daddr, __be32 key, struct tbl *table = rcu_dereference(port_table); struct tbl_node *tbl_node; - if (!table) + if (unlikely(!table)) return NULL; lookup.vals[LOOKUP_SADDR] = saddr; @@ -246,6 +366,60 @@ found: return tnl_vport_to_vport(tnl_vport_table_cast(tbl_node)); } +static inline void ecn_decapsulate(struct sk_buff *skb) +{ + u8 tos = ip_hdr(skb)->tos; + + if (INET_ECN_is_ce(tos)) { + __be16 protocol = skb->protocol; + unsigned int nw_header = skb_network_offset(skb); + + if (skb->protocol == htons(ETH_P_8021Q)) { + if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) + return; + + protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; + nw_header += VLAN_HLEN; + } + + if (protocol == htons(ETH_P_IP)) { + if (unlikely(!pskb_may_pull(skb, nw_header + + sizeof(struct iphdr)))) + return; + + IP_ECN_set_ce((struct iphdr *)(skb->data + nw_header)); + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + else if (protocol == htons(ETH_P_IPV6)) { + if (unlikely(!pskb_may_pull(skb, nw_header + + sizeof(struct ipv6hdr)))) + return; + + IP6_ECN_set_ce((struct ipv6hdr *)(skb->data + nw_header)); + } +#endif + } +} + +/* Called with rcu_read_lock. */ +void tnl_rcv(struct vport *vport, struct sk_buff *skb) +{ + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, skb->dev); + + skb_dst_drop(skb); + nf_reset(skb); + secpath_reset(skb); + skb_reset_network_header(skb); + + ecn_decapsulate(skb); + + skb_push(skb, ETH_HLEN); + compute_ip_summed(skb, false); + + vport_receive(vport, skb); +} + static bool check_ipv4_address(__be32 addr) { if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) @@ -514,179 +688,412 @@ bool tnl_frag_needed(struct vport *vport, const struct tnl_mutable_config *mutab return true; } -static struct sk_buff *check_headroom(struct sk_buff *skb, int headroom) +static bool check_mtu(struct sk_buff *skb, + struct vport *vport, + const struct tnl_mutable_config *mutable, + const struct rtable *rt, __be16 *frag_offp) { - if (skb_headroom(skb) < headroom || skb_header_cloned(skb)) { - struct sk_buff *nskb = skb_realloc_headroom(skb, headroom + 16); - if (unlikely(!nskb)) { - kfree_skb(skb); - return ERR_PTR(-ENOMEM); + int mtu; + __be16 frag_off; + + frag_off = (mutable->port_config.flags & TNL_F_PMTUD) ? htons(IP_DF) : 0; + if (frag_off) + mtu = dst_mtu(&rt_dst(rt)) + - ETH_HLEN + - mutable->tunnel_hlen + - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0); + else + mtu = mutable->mtu; + + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *old_iph = ip_hdr(skb); + + frag_off |= old_iph->frag_off & htons(IP_DF); + mtu = max(mtu, IP_MIN_MTU); + + if ((old_iph->frag_off & htons(IP_DF)) && + mtu < ntohs(old_iph->tot_len)) { + if (tnl_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id)) + goto drop; } + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + else if (skb->protocol == htons(ETH_P_IPV6)) { + unsigned int packet_length = skb->len - ETH_HLEN + - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0); - set_skb_csum_bits(skb, nskb); + mtu = max(mtu, IPV6_MIN_MTU); - if (skb->sk) - skb_set_owner_w(nskb, skb->sk); + /* IPv6 requires PMTUD if the packet is above the minimum MTU. */ + if (packet_length > IPV6_MIN_MTU) + frag_off = htons(IP_DF); - dev_kfree_skb(skb); - return nskb; + if (mtu < packet_length) { + if (tnl_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id)) + goto drop; + } } +#endif - return skb; + *frag_offp = frag_off; + return true; + +drop: + *frag_offp = 0; + return false; } -static inline u8 ecn_encapsulate(u8 tos, struct sk_buff *skb) +static void create_tunnel_header(const struct vport *vport, + const struct tnl_mutable_config *mutable, + const struct rtable *rt, void *header) { - u8 inner; + struct tnl_vport *tnl_vport = tnl_vport_priv(vport); + struct iphdr *iph = header; + + iph->version = 4; + iph->ihl = sizeof(struct iphdr) >> 2; + iph->frag_off = htons(IP_DF); + iph->protocol = tnl_vport->tnl_ops->ipproto; + iph->tos = mutable->port_config.tos; + iph->daddr = rt->rt_dst; + iph->saddr = rt->rt_src; + iph->ttl = mutable->port_config.ttl; + if (!iph->ttl) + iph->ttl = dst_metric(&rt_dst(rt), RTAX_HOPLIMIT); + + tnl_vport->tnl_ops->build_header(vport, mutable, iph + 1); +} - if (skb->protocol == htons(ETH_P_IP)) - inner = ((struct iphdr *)skb_network_header(skb))->tos; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (skb->protocol == htons(ETH_P_IPV6)) - inner = ipv6_get_dsfield((struct ipv6hdr *)skb_network_header(skb)); -#endif - else - inner = 0; +static inline void *get_cached_header(const struct tnl_cache *cache) +{ + return (void *)cache + ALIGN(sizeof(struct tnl_cache), CACHE_DATA_ALIGN); +} - return INET_ECN_encapsulate(tos, inner); +static inline bool check_cache_valid(const struct tnl_cache *cache, + const struct tnl_mutable_config *mutable) +{ + return cache && +#ifdef NEED_CACHE_TIMEOUT + time_before(jiffies, cache->expiration) && +#endif +#ifdef HAVE_RT_GENID + atomic_read(&init_net.ipv4.rt_genid) == cache->rt->rt_genid && +#endif +#ifdef HAVE_HH_SEQ + rt_dst(cache->rt).hh->hh_lock.sequence == cache->hh_seq && +#endif + mutable->seq == cache->mutable_seq && + (!is_internal_dev(rt_dst(cache->rt).dev) || + (cache->flow && !cache->flow->dead)); } -static inline void ecn_decapsulate(struct sk_buff *skb) +static int cache_cleaner_cb(struct tbl_node *tbl_node, void *aux) { - u8 tos = ip_hdr(skb)->tos; + struct tnl_vport *tnl_vport = tnl_vport_table_cast(tbl_node); + const struct tnl_mutable_config *mutable = rcu_dereference(tnl_vport->mutable); + const struct tnl_cache *cache = rcu_dereference(tnl_vport->cache); - if (INET_ECN_is_ce(tos)) { - __be16 protocol = skb->protocol; - unsigned int nw_header = skb_network_header(skb) - skb->data; + if (cache && !check_cache_valid(cache, mutable) && + spin_trylock_bh(&tnl_vport->cache_lock)) { + assign_cache_rcu(tnl_vport_to_vport(tnl_vport), NULL); + spin_unlock_bh(&tnl_vport->cache_lock); + } - if (skb->protocol == htons(ETH_P_8021Q)) { - if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) - return; + return 0; +} - protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; - nw_header += VLAN_HLEN; - } +static void cache_cleaner(struct work_struct *work) +{ + schedule_cache_cleaner(); - if (protocol == htons(ETH_P_IP)) { - if (unlikely(!pskb_may_pull(skb, nw_header - + sizeof(struct iphdr)))) - return; + rcu_read_lock(); + tbl_foreach(port_table, cache_cleaner_cb, NULL); + rcu_read_unlock(); +} - IP_ECN_set_ce((struct iphdr *)(nw_header + skb->data)); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (protocol == htons(ETH_P_IPV6)) { - if (unlikely(!pskb_may_pull(skb, nw_header - + sizeof(struct ipv6hdr)))) - return; +static inline void create_eth_hdr(struct tnl_cache *cache, + const struct rtable *rt) +{ + void *cache_data = get_cached_header(cache); + int hh_len = rt_dst(rt).hh->hh_len; + int hh_off = HH_DATA_ALIGN(rt_dst(rt).hh->hh_len) - hh_len; - IP6_ECN_set_ce((struct ipv6hdr *)(nw_header - + skb->data)); - } +#ifdef HAVE_HH_SEQ + unsigned hh_seq; + + do { + hh_seq = read_seqbegin(&rt_dst(rt).hh->hh_lock); + memcpy(cache_data, (void *)rt_dst(rt).hh->hh_data + hh_off, hh_len); + } while (read_seqretry(&rt_dst(rt).hh->hh_lock, hh_seq)); + + cache->hh_seq = hh_seq; +#else + read_lock_bh(&rt_dst(rt).hh->hh_lock); + memcpy(cache_data, (void *)rt_dst(rt).hh->hh_data + hh_off, hh_len); + read_unlock_bh(&rt_dst(rt).hh->hh_lock); #endif - } } -static struct sk_buff *handle_gso(struct sk_buff *skb) +static struct tnl_cache *build_cache(struct vport *vport, + const struct tnl_mutable_config *mutable, + struct rtable *rt) { - if (skb_is_gso(skb)) { - struct sk_buff *nskb = skb_gso_segment(skb, 0); + struct tnl_vport *tnl_vport = tnl_vport_priv(vport); + struct tnl_cache *cache; + void *cache_data; + int cache_len; - dev_kfree_skb(skb); - return nskb; + if (!(mutable->port_config.flags & TNL_F_HDR_CACHE)) + return NULL; + + /* + * If there is no entry in the ARP cache or if this device does not + * support hard header caching just fall back to the IP stack. + */ + if (!rt_dst(rt).hh) + return NULL; + + /* + * If lock is contended fall back to directly building the header. + * We're not going to help performance by sitting here spinning. + */ + if (!spin_trylock_bh(&tnl_vport->cache_lock)) + return NULL; + + cache = tnl_vport->cache; + if (check_cache_valid(cache, mutable)) + goto unlock; + else + cache = NULL; + + cache_len = rt_dst(rt).hh->hh_len + mutable->tunnel_hlen; + + cache = kzalloc(ALIGN(sizeof(struct tnl_cache), CACHE_DATA_ALIGN) + + cache_len, GFP_ATOMIC); + if (!cache) + goto unlock; + + cache->len = cache_len; + + create_eth_hdr(cache, rt); + cache_data = get_cached_header(cache) + rt_dst(rt).hh->hh_len; + + create_tunnel_header(vport, mutable, rt, cache_data); + + cache->mutable_seq = mutable->seq; + cache->rt = rt; +#ifdef NEED_CACHE_TIMEOUT + cache->expiration = jiffies + tnl_vport->cache_exp_interval; +#endif + + if (is_internal_dev(rt_dst(rt).dev)) { + int err; + struct vport *vport; + struct dp_port *dp_port; + struct sk_buff *skb; + bool is_frag; + struct odp_flow_key flow_key; + struct tbl_node *flow_node; + + vport = internal_dev_get_vport(rt_dst(rt).dev); + if (!vport) + goto done; + + dp_port = vport_get_dp_port(vport); + if (!dp_port) + goto done; + + skb = alloc_skb(cache->len, GFP_ATOMIC); + if (!skb) + goto done; + + __skb_put(skb, cache->len); + memcpy(skb->data, get_cached_header(cache), cache->len); + + err = flow_extract(skb, dp_port->port_no, &flow_key, &is_frag); + + kfree_skb(skb); + if (err || is_frag) + goto done; + + flow_node = tbl_lookup(rcu_dereference(dp_port->dp->table), + &flow_key, flow_hash(&flow_key), + flow_cmp); + if (flow_node) { + struct sw_flow *flow = flow_cast(flow_node); + + cache->flow = flow; + flow_hold(flow); + } } - return skb; +done: + assign_cache_rcu(vport, cache); + +unlock: + spin_unlock_bh(&tnl_vport->cache_lock); + + return cache; } -static int handle_csum_offload(struct sk_buff *skb) +static struct rtable *find_route(struct vport *vport, + const struct tnl_mutable_config *mutable, + u8 tos, struct tnl_cache **cache) { - if (skb->ip_summed == CHECKSUM_PARTIAL) - return skb_checksum_help(skb); - else { - skb->ip_summed = CHECKSUM_NONE; - return 0; + struct tnl_vport *tnl_vport = tnl_vport_priv(vport); + struct tnl_cache *cur_cache = rcu_dereference(tnl_vport->cache); + + *cache = NULL; + tos = RT_TOS(tos); + + if (likely(tos == mutable->port_config.tos && + check_cache_valid(cur_cache, mutable))) { + *cache = cur_cache; + return cur_cache->rt; + } else { + struct rtable *rt; + struct flowi fl = { .nl_u = { .ip4_u = + { .daddr = mutable->port_config.daddr, + .saddr = mutable->port_config.saddr, + .tos = tos } }, + .proto = tnl_vport->tnl_ops->ipproto }; + + if (unlikely(ip_route_output_key(&init_net, &rt, &fl))) + return NULL; + + if (likely(tos == mutable->port_config.tos)) + *cache = build_cache(vport, mutable, rt); + + return rt; } } -/* Called with rcu_read_lock. */ -void tnl_rcv(struct vport *vport, struct sk_buff *skb) +static struct sk_buff *check_headroom(struct sk_buff *skb, int headroom) { - skb->pkt_type = PACKET_HOST; - skb->protocol = eth_type_trans(skb, skb->dev); + if (skb_headroom(skb) < headroom || skb_header_cloned(skb)) { + struct sk_buff *nskb = skb_realloc_headroom(skb, headroom + 16); + if (unlikely(!nskb)) { + kfree_skb(skb); + return ERR_PTR(-ENOMEM); + } - skb_dst_drop(skb); - nf_reset(skb); - secpath_reset(skb); - skb_reset_network_header(skb); + set_skb_csum_bits(skb, nskb); - ecn_decapsulate(skb); + if (skb->sk) + skb_set_owner_w(nskb, skb->sk); - skb_push(skb, ETH_HLEN); - compute_ip_summed(skb, false); + kfree_skb(skb); + return nskb; + } - vport_receive(vport, skb); + return skb; } -static int build_packet(struct vport *vport, const struct tnl_mutable_config *mutable, - struct iphdr *iph, struct rtable *rt, int max_headroom, - int mtu, struct sk_buff *skb) +static inline bool need_linearize(const struct sk_buff *skb) { - struct tnl_vport *tnl_vport = tnl_vport_priv(vport); + int i; + + if (unlikely(skb_shinfo(skb)->frag_list)) + return true; + + /* + * Generally speaking we should linearize if there are paged frags. + * However, if all of the refcounts are 1 we know nobody else can + * change them from underneath us and we can skip the linearization. + */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) + if (unlikely(page_count(skb_shinfo(skb)->frags[0].page) > 1)) + return true; + + return false; +} + +static struct sk_buff *handle_offloads(struct sk_buff *skb, + const struct tnl_mutable_config *mutable, + const struct rtable *rt) +{ + int min_headroom; int err; - struct iphdr *new_iph; - int orig_len = skb->len; - __be16 frag_off = iph->frag_off; - skb = check_headroom(skb, max_headroom); - if (unlikely(IS_ERR(skb))) - goto error; + forward_ip_summed(skb); - err = handle_csum_offload(skb); + err = vswitch_skb_checksum_setup(skb); if (unlikely(err)) goto error_free; - if (skb->protocol == htons(ETH_P_IP)) { - struct iphdr *old_iph = ip_hdr(skb); + min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len + + mutable->tunnel_hlen; - if ((old_iph->frag_off & htons(IP_DF)) && - mtu < ntohs(old_iph->tot_len)) { - if (tnl_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id)) - goto error_free; + if (skb_is_gso(skb)) { + struct sk_buff *nskb; + + /* + * If we are doing GSO on a pskb it is better to make sure that + * the headroom is correct now. We will only have to copy the + * portion in the linear data area and GSO will preserve + * headroom when it creates the segments. This is particularly + * beneficial on Xen where we get a lot of GSO pskbs. + * Conversely, we avoid copying if it is just to get our own + * writable clone because GSO will do the copy for us. + */ + if (skb_headroom(skb) < min_headroom) { + skb = check_headroom(skb, min_headroom); + if (unlikely(IS_ERR(skb))) { + err = PTR_ERR(skb); + goto error; + } } - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (skb->protocol == htons(ETH_P_IPV6)) { - unsigned int packet_length = skb->len - ETH_HLEN - - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0); + nskb = skb_gso_segment(skb, 0); + kfree_skb(skb); + if (unlikely(IS_ERR(nskb))) { + err = PTR_ERR(nskb); + goto error; + } - /* IPv6 requires PMTUD if the packet is above the minimum MTU. */ - if (packet_length > IPV6_MIN_MTU) - frag_off = htons(IP_DF); + skb = nskb; + } else { + skb = check_headroom(skb, min_headroom); + if (unlikely(IS_ERR(skb))) { + err = PTR_ERR(skb); + goto error; + } - if (mtu < packet_length) { - if (tnl_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id)) + if (skb->ip_summed == CHECKSUM_PARTIAL) { + /* + * Pages aren't locked and could change at any time. + * If this happens after we compute the checksum, the + * checksum will be wrong. We linearize now to avoid + * this problem. + */ + if (unlikely(need_linearize(skb))) { + err = __skb_linearize(skb); + if (unlikely(err)) + goto error_free; + } + + err = skb_checksum_help(skb); + if (unlikely(err)) goto error_free; - } + } else if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; } -#endif - new_iph = (struct iphdr *)skb_push(skb, mutable->tunnel_hlen); - skb_reset_network_header(skb); - skb_set_transport_header(skb, sizeof(struct iphdr)); - - memcpy(new_iph, iph, sizeof(struct iphdr)); - new_iph->frag_off = frag_off; - ip_select_ident(new_iph, &rt_dst(rt), NULL); + return skb; - memset(&IPCB(skb)->opt, 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags = 0; +error_free: + kfree_skb(skb); +error: + return ERR_PTR(err); +} - skb = tnl_vport->tnl_ops->build_header(skb, vport, mutable, &rt_dst(rt)); - if (unlikely(!skb)) - goto error; +static int send_frags(struct sk_buff *skb, + const struct tnl_mutable_config *mutable) +{ + int sent_len; + int err; + sent_len = 0; while (skb) { struct sk_buff *next = skb->next; int frag_len = skb->len - mutable->tunnel_hlen; @@ -694,34 +1101,26 @@ static int build_packet(struct vport *vport, const struct tnl_mutable_config *mu skb->next = NULL; err = ip_local_out(skb); - if (unlikely(net_xmit_eval(err) != 0)) { - orig_len -= frag_len; + if (likely(net_xmit_eval(err) == 0)) + sent_len += frag_len; + else { skb = next; goto free_frags; } skb = next; - }; + } - return orig_len; + return sent_len; -error_free: - kfree_skb(skb); -error: - return 0; free_frags: /* * There's no point in continuing to send fragments once one has been * dropped so just free the rest. This may help improve the congestion * that caused the first packet to be dropped. */ - while (skb) { - struct sk_buff *next = skb->next; - orig_len -= skb->len - mutable->tunnel_hlen; - kfree_skb(skb); - skb = next; - }; - return orig_len; + tnl_free_linked_skbs(skb); + return sent_len; } int tnl_send(struct vport *vport, struct sk_buff *skb) @@ -729,12 +1128,15 @@ int tnl_send(struct vport *vport, struct sk_buff *skb) struct tnl_vport *tnl_vport = tnl_vport_priv(vport); const struct tnl_mutable_config *mutable = rcu_dereference(tnl_vport->mutable); - struct iphdr *old_iph; - int orig_len; - struct iphdr iph; + enum vport_err_type err = VPORT_E_TX_ERROR; struct rtable *rt; - int max_headroom; - int mtu; + struct dst_entry *unattached_dst = NULL; + struct tnl_cache *cache; + int sent_len = 0; + __be16 frag_off; + u8 ttl; + u8 inner_tos; + u8 tos; /* Validate the protocol headers before we try to use them. */ if (skb->protocol == htons(ETH_P_8021Q)) { @@ -746,147 +1148,164 @@ int tnl_send(struct vport *vport, struct sk_buff *skb) } if (skb->protocol == htons(ETH_P_IP)) { - if (unlikely(!pskb_may_pull(skb, skb_network_header(skb) - + sizeof(struct iphdr) - skb->data))) + if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb) + + sizeof(struct iphdr)))) skb->protocol = 0; } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) { - if (unlikely(!pskb_may_pull(skb, skb_network_header(skb) - + sizeof(struct ipv6hdr) - skb->data))) + if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb) + + sizeof(struct ipv6hdr)))) skb->protocol = 0; } #endif - old_iph = ip_hdr(skb); - iph.tos = mutable->port_config.tos; - if (mutable->port_config.flags & TNL_F_TOS_INHERIT) { - if (skb->protocol == htons(ETH_P_IP)) - iph.tos = old_iph->tos; + /* ToS */ + if (skb->protocol == htons(ETH_P_IP)) + inner_tos = ip_hdr(skb)->tos; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (skb->protocol == htons(ETH_P_IPV6)) - iph.tos = ipv6_get_dsfield(ipv6_hdr(skb)); + else if (skb->protocol == htons(ETH_P_IPV6)) + inner_tos = ipv6_get_dsfield(ipv6_hdr(skb)); #endif - } - iph.tos = ecn_encapsulate(iph.tos, skb); + else + inner_tos = 0; - { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = mutable->port_config.daddr, - .saddr = mutable->port_config.saddr, - .tos = RT_TOS(iph.tos) } }, - .proto = tnl_vport->tnl_ops->ipproto }; + if (mutable->port_config.flags & TNL_F_TOS_INHERIT) + tos = inner_tos; + else + tos = mutable->port_config.tos; - if (unlikely(ip_route_output_key(&init_net, &rt, &fl))) - goto error_free; + tos = INET_ECN_encapsulate(tos, inner_tos); + + /* Route lookup */ + rt = find_route(vport, mutable, tos, &cache); + if (unlikely(!rt)) + goto error_free; + if (unlikely(!cache)) + unattached_dst = &rt_dst(rt); + + /* Reset SKB */ + nf_reset(skb); + secpath_reset(skb); + skb_dst_drop(skb); + + /* Offloading */ + skb = handle_offloads(skb, mutable, rt); + if (unlikely(IS_ERR(skb))) + goto error; + + /* MTU */ + if (unlikely(!check_mtu(skb, vport, mutable, rt, &frag_off))) { + err = VPORT_E_TX_DROPPED; + goto error_free; } - iph.ttl = mutable->port_config.ttl; + /* + * If we are over the MTU, allow the IP stack to handle fragmentation. + * Fragmentation is a slow path anyways. + */ + if (unlikely(skb->len + mutable->tunnel_hlen > dst_mtu(&rt_dst(rt)) && + cache)) { + unattached_dst = &rt_dst(rt); + dst_hold(unattached_dst); + cache = NULL; + } + + /* TTL */ + ttl = mutable->port_config.ttl; + if (!ttl) + ttl = dst_metric(&rt_dst(rt), RTAX_HOPLIMIT); + if (mutable->port_config.flags & TNL_F_TTL_INHERIT) { if (skb->protocol == htons(ETH_P_IP)) - iph.ttl = old_iph->ttl; + ttl = ip_hdr(skb)->ttl; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) - iph.ttl = ipv6_hdr(skb)->hop_limit; + ttl = ipv6_hdr(skb)->hop_limit; #endif } - if (!iph.ttl) - iph.ttl = dst_metric(&rt_dst(rt), RTAX_HOPLIMIT); - iph.frag_off = (mutable->port_config.flags & TNL_F_PMTUD) ? htons(IP_DF) : 0; - if (iph.frag_off) - mtu = dst_mtu(&rt_dst(rt)) - - ETH_HLEN - - mutable->tunnel_hlen - - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0); - else - mtu = mutable->mtu; + while (skb) { + struct iphdr *iph; + struct sk_buff *next_skb = skb->next; + skb->next = NULL; - if (skb->protocol == htons(ETH_P_IP)) { - iph.frag_off |= old_iph->frag_off & htons(IP_DF); - mtu = max(mtu, IP_MIN_MTU); - } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - else if (skb->protocol == htons(ETH_P_IPV6)) - mtu = max(mtu, IPV6_MIN_MTU); -#endif + if (likely(cache)) { + skb_push(skb, cache->len); + memcpy(skb->data, get_cached_header(cache), cache->len); + skb_reset_mac_header(skb); + skb_set_network_header(skb, rt_dst(rt).hh->hh_len); - iph.version = 4; - iph.ihl = sizeof(struct iphdr) >> 2; - iph.protocol = tnl_vport->tnl_ops->ipproto; - iph.daddr = rt->rt_dst; - iph.saddr = rt->rt_src; + } else { + skb_push(skb, mutable->tunnel_hlen); + create_tunnel_header(vport, mutable, rt, skb->data); + skb_reset_network_header(skb); - nf_reset(skb); - secpath_reset(skb); - skb_dst_drop(skb); - skb_dst_set(skb, &rt_dst(rt)); + if (next_skb) + skb_dst_set(skb, dst_clone(unattached_dst)); + else { + skb_dst_set(skb, unattached_dst); + unattached_dst = NULL; + } - /* - * If we are doing GSO on a pskb it is better to make sure that the - * headroom is correct now. We will only have to copy the portion in - * the linear data area and GSO will preserve headroom when it creates - * the segments. This is particularly beneficial on Xen where we get - * lots of GSO pskbs. Conversely, we delay copying if it is just to - * get our own writable clone because GSO may do the copy for us. - */ - max_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len - + mutable->tunnel_hlen; - if (skb_headroom(skb) < max_headroom) { - skb = check_headroom(skb, max_headroom); - if (unlikely(IS_ERR(skb))) { - vport_record_error(vport, VPORT_E_TX_DROPPED); - goto error; + memset(&IPCB(skb)->opt, 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags = 0; } - } + skb_set_transport_header(skb, skb_network_offset(skb) + sizeof(struct iphdr)); - forward_ip_summed(skb); + iph = ip_hdr(skb); + iph->tos = tos; + iph->ttl = ttl; + iph->frag_off = frag_off; + ip_select_ident(iph, &rt_dst(rt), NULL); - if (unlikely(vswitch_skb_checksum_setup(skb))) - goto error_free; + skb = tnl_vport->tnl_ops->update_header(vport, mutable, &rt_dst(rt), skb); + if (unlikely(!skb)) + goto next; - skb = handle_gso(skb); - if (unlikely(IS_ERR(skb))) { - vport_record_error(vport, VPORT_E_TX_DROPPED); - goto error; - } + if (likely(cache)) { + int orig_len = skb->len - cache->len; + struct vport *cache_vport = internal_dev_get_vport(rt_dst(rt).dev); - /* - * Process GSO segments. Try to do any work for the entire packet that - * doesn't involve actually writing to it before this point. - */ - orig_len = 0; - do { - struct sk_buff *next_skb = skb->next; - skb->next = NULL; + skb->protocol = htons(ETH_P_IP); + + iph->tot_len = htons(skb->len - skb_network_offset(skb)); + ip_send_check(iph); - orig_len += build_packet(vport, mutable, &iph, rt, max_headroom, mtu, skb); + if (likely(cache_vport)) { + OVS_CB(skb)->flow = cache->flow; + compute_ip_summed(skb, true); + vport_receive(cache_vport, skb); + sent_len += orig_len; + } else { + int err; + skb->dev = rt_dst(rt).dev; + err = dev_queue_xmit(skb); + + if (likely(net_xmit_eval(err) == 0)) + sent_len += orig_len; + } + } else + sent_len += send_frags(skb, mutable); + +next: skb = next_skb; - } while (skb); + } - if (unlikely(orig_len == 0)) + if (unlikely(sent_len == 0)) vport_record_error(vport, VPORT_E_TX_DROPPED); - return orig_len; + goto out; error_free: - kfree_skb(skb); - vport_record_error(vport, VPORT_E_TX_ERROR); + tnl_free_linked_skbs(skb); error: - return 0; -} - -int tnl_init(void) -{ - return 0; -} - -void tnl_exit(void) -{ - tbl_destroy(port_table, NULL); - port_table = NULL; + dst_release(unattached_dst); + vport_record_error(vport, err); +out: + return sent_len; } static int set_config(const void __user *uconfig, const struct tnl_ops *tnl_ops, @@ -899,15 +1318,18 @@ static int set_config(const void __user *uconfig, const struct tnl_ops *tnl_ops, if (copy_from_user(&mutable->port_config, uconfig, sizeof(struct tnl_port_config))) return -EFAULT; + if (mutable->port_config.daddr == 0) + return -EINVAL; + + if (mutable->port_config.tos != RT_TOS(mutable->port_config.tos)) + return -EINVAL; + mutable->tunnel_hlen = tnl_ops->hdr_len(&mutable->port_config); if (mutable->tunnel_hlen < 0) return mutable->tunnel_hlen; mutable->tunnel_hlen += sizeof(struct iphdr); - if (mutable->port_config.daddr == 0) - return -EINVAL; - mutable->tunnel_type = tnl_ops->tunnel_type; if (mutable->port_config.flags & TNL_F_IN_KEY_MATCH) { mutable->tunnel_type |= TNL_T_KEY_MATCH; @@ -950,7 +1372,7 @@ struct vport *tnl_create(const char *name, const void __user *config, strcpy(tnl_vport->name, name); tnl_vport->tnl_ops = tnl_ops; - tnl_vport->mutable = kmalloc(sizeof(struct tnl_mutable_config), GFP_KERNEL); + tnl_vport->mutable = kzalloc(sizeof(struct tnl_mutable_config), GFP_KERNEL); if (!tnl_vport->mutable) { err = -ENOMEM; goto error_free_vport; @@ -966,6 +1388,13 @@ struct vport *tnl_create(const char *name, const void __user *config, if (err) goto error_free_mutable; + spin_lock_init(&tnl_vport->cache_lock); + +#ifdef NEED_CACHE_TIMEOUT + tnl_vport->cache_exp_interval = MAX_CACHE_EXP - + (net_random() % (MAX_CACHE_EXP / 2)); +#endif + err = add_port(vport); if (err) goto error_free_mutable; @@ -985,7 +1414,6 @@ int tnl_modify(struct vport *vport, const void __user *config) struct tnl_vport *tnl_vport = tnl_vport_priv(vport); struct tnl_mutable_config *mutable; int err; - bool update_hash = false; mutable = kmemdup(tnl_vport->mutable, sizeof(struct tnl_mutable_config), GFP_KERNEL); if (!mutable) { @@ -997,35 +1425,11 @@ int tnl_modify(struct vport *vport, const void __user *config) if (err) goto error_free; - /* - * Only remove the port from the hash table if something that would - * affect the lookup has changed. - */ - if (tnl_vport->mutable->port_config.saddr != mutable->port_config.saddr || - tnl_vport->mutable->port_config.daddr != mutable->port_config.daddr || - tnl_vport->mutable->port_config.in_key != mutable->port_config.in_key || - (tnl_vport->mutable->port_config.flags & TNL_F_IN_KEY_MATCH) != - (mutable->port_config.flags & TNL_F_IN_KEY_MATCH)) - update_hash = true; - - - /* - * This update is not atomic but the lookup uses the config, which - * serves as an inherent double check. - */ - if (update_hash) { - err = del_port(vport); - if (err) - goto error_free; - } - - assign_config_rcu(vport, mutable); + mutable->seq++; - if (update_hash) { - err = add_port(vport); - if (err) - goto error_free; - } + err = move_port(vport, mutable); + if (err) + goto error_free; return 0; @@ -1035,10 +1439,14 @@ error: return err; } -static void free_port(struct rcu_head *rcu) +static void free_port_rcu(struct rcu_head *rcu) { struct tnl_vport *tnl_vport = container_of(rcu, struct tnl_vport, rcu); + spin_lock_bh(&tnl_vport->cache_lock); + free_cache(tnl_vport->cache); + spin_unlock_bh(&tnl_vport->cache_lock); + kfree(tnl_vport->mutable); vport_free(tnl_vport_to_vport(tnl_vport)); } @@ -1055,7 +1463,7 @@ int tnl_destroy(struct vport *vport) &old_mutable)) del_port(vport); - call_rcu(&tnl_vport->rcu, free_port); + call_rcu(&tnl_vport->rcu, free_port_rcu); return 0; } @@ -1090,7 +1498,6 @@ int tnl_set_addr(struct vport *vport, const unsigned char *addr) return 0; } - const char *tnl_get_name(const struct vport *vport) { const struct tnl_vport *tnl_vport = tnl_vport_priv(vport); @@ -1108,3 +1515,15 @@ int tnl_get_mtu(const struct vport *vport) const struct tnl_vport *tnl_vport = tnl_vport_priv(vport); return rcu_dereference(tnl_vport->mutable)->mtu; } + +void tnl_free_linked_skbs(struct sk_buff *skb) +{ + if (unlikely(!skb)) + return; + + while (skb) { + struct sk_buff *next = skb->next; + kfree_skb(skb); + skb = next; + } +} diff --git a/datapath/tunnel.h b/datapath/tunnel.h index 37874c57c..8ffb7bf54 100644 --- a/datapath/tunnel.h +++ b/datapath/tunnel.h @@ -9,6 +9,9 @@ #ifndef TUNNEL_H #define TUNNEL_H 1 +#include + +#include "flow.h" #include "openvswitch/tunnel.h" #include "table.h" #include "vport.h" @@ -20,14 +23,15 @@ #define IP_MIN_MTU 68 /* - * One of these goes in your struct tnl_ops and in tnl_find_port(). + * One of these goes in struct tnl_ops and in tnl_find_port(). * These values are in the same namespace as other TNL_T_* values, so - * you have only the first 10 bits to define protocol identifiers. + * only the least significant 10 bits are available to define protocol + * identifiers. */ #define TNL_T_PROTO_GRE 0 #define TNL_T_PROTO_CAPWAP 1 -/* You only need these flags when you are calling tnl_find_port(). */ +/* These flags are only needed when calling tnl_find_port(). */ #define TNL_T_KEY_EXACT (1 << 10) #define TNL_T_KEY_MATCH (1 << 11) #define TNL_T_KEY_EITHER (TNL_T_KEY_EXACT | TNL_T_KEY_MATCH) @@ -35,39 +39,119 @@ struct tnl_mutable_config { struct rcu_head rcu; - unsigned char eth_addr[ETH_ALEN]; - unsigned int mtu; - struct tnl_port_config port_config; + unsigned seq; /* Sequence number to identify this config. */ - /* Set of TNL_T_* flags that define the category for lookup. */ - u32 tunnel_type; + u32 tunnel_type; /* Set of TNL_T_* flags that define lookup. */ + unsigned tunnel_hlen; /* Tunnel header length. */ + + unsigned char eth_addr[ETH_ALEN]; + unsigned mtu; - int tunnel_hlen; /* Tunnel header length. */ + struct tnl_port_config port_config; }; struct tnl_ops { - /* Put your TNL_T_PROTO_* type in here. */ - u32 tunnel_type; - u8 ipproto; + u32 tunnel_type; /* Put the TNL_T_PROTO_* type in here. */ + u8 ipproto; /* The IP protocol for the tunnel. */ /* - * Returns the length of the tunnel header you will add in + * Returns the length of the tunnel header that will be added in * build_header() (i.e. excludes the IP header). Returns a negative * error code if the configuration is invalid. */ int (*hdr_len)(const struct tnl_port_config *); /* - * Returns a linked list of SKBs with tunnel headers (multiple - * packets may be generated in the event of fragmentation). Space - * will have already been allocated at the start of the packet equal - * to sizeof(struct iphdr) + value returned by hdr_len(). The IP - * header will have already been constructed. + * Builds the static portion of the tunnel header, which is stored in + * the header cache. In general the performance of this function is + * not too important as we try to only call it when building the cache + * so it is preferable to shift as much work as possible here. However, + * in some circumstances caching is disabled and this function will be + * called for every packet, so try not to make it too slow. + */ + void (*build_header)(const struct vport *, + const struct tnl_mutable_config *, void *header); + + /* + * Updates the cached header of a packet to match the actual packet + * data. Typical things that might need to be updated are length, + * checksum, etc. The IP header will have already been updated and this + * is the final step before transmission. Returns a linked list of + * completed SKBs (multiple packets may be generated in the event + * of fragmentation). + */ + struct sk_buff *(*update_header)(const struct vport *, + const struct tnl_mutable_config *, + struct dst_entry *, struct sk_buff *); +}; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) +/* + * On these kernels we have a fast mechanism to tell if the ARP cache for a + * particular destination has changed. + */ +#define HAVE_HH_SEQ +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27) +/* + * On these kernels we have a fast mechanism to tell if the routing table + * has changed. + */ +#define HAVE_RT_GENID +#endif +#if !defined(HAVE_HH_SEQ) || !defined(HAVE_RT_GENID) +/* If we can't detect all system changes directly we need to use a timeout. */ +#define NEED_CACHE_TIMEOUT +#endif +struct tnl_cache { + struct rcu_head rcu; + + int len; /* Length of data to be memcpy'd from cache. */ + + /* Sequence number of mutable->seq from which this cache was generated. */ + unsigned mutable_seq; + +#ifdef HAVE_HH_SEQ + /* + * The sequence number from the seqlock protecting the hardware header + * cache (in the ARP cache). Since every write increments the counter + * this gives us an easy way to tell if it has changed. + */ + unsigned hh_seq; +#endif + +#ifdef NEED_CACHE_TIMEOUT + /* + * If we don't have direct mechanisms to detect all important changes in + * the system fall back to an expiration time. This expiration time + * can be relatively short since at high rates there will be millions of + * packets per second, so we'll still get plenty of benefit from the + * cache. Note that if something changes we may blackhole packets + * until the expiration time (depending on what changed and the kernel + * version we may be able to detect the change sooner). Expiration is + * expressed as a time in jiffies. */ - struct sk_buff *(*build_header)(struct sk_buff *, - const struct vport *, - const struct tnl_mutable_config *, - struct dst_entry *); + unsigned long expiration; +#endif + + /* + * The routing table entry that is the result of looking up the tunnel + * endpoints. It also contains a sequence number (called a generation + * ID) that can be compared to a global sequence to tell if the routing + * table has changed (and therefore there is a potential that this + * cached route has been invalidated). + */ + struct rtable *rt; + + /* + * If the output device for tunnel traffic is an OVS internal device, + * the flow of that datapath. Since all tunnel traffic will have the + * same headers this allows us to cache the flow lookup. NULL if the + * output device is not OVS or if there is no flow installed. + */ + struct sw_flow *flow; + + /* The cached header follows after padding for alignment. */ }; struct tnl_vport { @@ -77,14 +161,29 @@ struct tnl_vport { char name[IFNAMSIZ]; const struct tnl_ops *tnl_ops; - /* Protected by RCU. */ - struct tnl_mutable_config *mutable; + struct tnl_mutable_config *mutable; /* Protected by RCU. */ + /* + * ID of last fragment sent (for tunnel protocols with direct support + * fragmentation). If the protocol relies on IP fragmentation then + * this is not needed. + */ atomic_t frag_id; + + spinlock_t cache_lock; + struct tnl_cache *cache; /* Protected by RCU/cache_lock. */ + +#ifdef NEED_CACHE_TIMEOUT + /* + * If we must rely on expiration time to invalidate the cache, this is + * the interval. It is randomized within a range (defined by + * MAX_CACHE_EXP in tunnel.c) to avoid synchronized expirations caused + * by creation of a large number of tunnels at a one time. + */ + unsigned long cache_exp_interval; +#endif }; -int tnl_init(void); -void tnl_exit(void); struct vport *tnl_create(const char *name, const void __user *config, const struct vport_ops *, const struct tnl_ops *); @@ -104,10 +203,12 @@ struct vport *tnl_find_port(__be32 saddr, __be32 daddr, __be32 key, bool tnl_frag_needed(struct vport *vport, const struct tnl_mutable_config *mutable, struct sk_buff *skb, unsigned int mtu, __be32 flow_key); +void tnl_free_linked_skbs(struct sk_buff *skb); static inline struct tnl_vport *tnl_vport_priv(const struct vport *vport) { return vport_priv(vport); } + #endif /* tunnel.h */ diff --git a/datapath/vport-capwap.c b/datapath/vport-capwap.c index 7ae3790d7..bf1465fc0 100644 --- a/datapath/vport-capwap.c +++ b/datapath/vport-capwap.c @@ -128,24 +128,32 @@ static int capwap_hdr_len(const struct tnl_port_config *port_config) return CAPWAP_HLEN; } -static struct sk_buff *capwap_build_header(struct sk_buff *skb, - const struct vport *vport, - const struct tnl_mutable_config *mutable, - struct dst_entry *dst) +static void capwap_build_header(const struct vport *vport, + const struct tnl_mutable_config *mutable, + void *header) { - struct udphdr *udph = udp_hdr(skb); - struct capwaphdr *cwh = capwap_hdr(skb); + struct udphdr *udph = header; + struct capwaphdr *cwh = (struct capwaphdr *)(udph + 1); udph->source = htons(CAPWAP_SRC_PORT); udph->dest = htons(CAPWAP_DST_PORT); - udph->len = htons(skb->len - sizeof(struct iphdr)); udph->check = 0; cwh->begin = NO_FRAG_HDR; cwh->frag_id = 0; cwh->frag_off = 0; +} + +static struct sk_buff *capwap_update_header(const struct vport *vport, + const struct tnl_mutable_config *mutable, + struct dst_entry *dst, + struct sk_buff *skb) +{ + struct udphdr *udph = udp_hdr(skb); - if (unlikely(skb->len > dst_mtu(dst))) + udph->len = htons(skb->len - skb_transport_offset(skb)); + + if (unlikely(skb->len - skb_network_offset(skb) > dst_mtu(dst))) skb = fragment(skb, vport, dst); return skb; @@ -209,6 +217,7 @@ struct tnl_ops capwap_tnl_ops = { .ipproto = IPPROTO_UDP, .hdr_len = capwap_hdr_len, .build_header = capwap_build_header, + .update_header = capwap_update_header, }; static struct vport *capwap_create(const char *name, const void __user *config) @@ -241,7 +250,7 @@ static int capwap_init(void) defrag_init(); - return tnl_init(); + return 0; error_sock: sock_release(capwap_rcv_socket); @@ -252,7 +261,6 @@ error: static void capwap_exit(void) { - tnl_exit(); defrag_exit(); sock_release(capwap_rcv_socket); } @@ -282,17 +290,19 @@ static struct sk_buff *fragment(struct sk_buff *skb, const struct vport *vport, struct dst_entry *dst) { struct tnl_vport *tnl_vport = tnl_vport_priv(vport); - unsigned int hlen = sizeof(struct iphdr) + CAPWAP_HLEN; - unsigned int headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len; + unsigned int hlen = skb_transport_offset(skb) + CAPWAP_HLEN; + unsigned int headroom; + unsigned int max_frame_len = dst_mtu(dst) + skb_network_offset(skb); struct sk_buff *result = NULL, *list_cur = NULL; unsigned int remaining; unsigned int offset; __be16 frag_id; - if (hlen + ~FRAG_OFF_MASK + 1 > dst_mtu(dst)) { + if (hlen + ~FRAG_OFF_MASK + 1 > max_frame_len) { if (net_ratelimit()) pr_warn("capwap link mtu (%d) is less than minimum packet (%d)\n", - dst_mtu(dst), hlen + ~FRAG_OFF_MASK + 1); + dst_mtu(dst), + hlen - skb_network_offset(skb) + ~FRAG_OFF_MASK + 1); goto error; } @@ -300,14 +310,17 @@ static struct sk_buff *fragment(struct sk_buff *skb, const struct vport *vport, offset = 0; frag_id = htons(atomic_inc_return(&tnl_vport->frag_id)); + headroom = dst->header_len + 16; + if (!skb_network_offset(skb)) + headroom += LL_RESERVED_SPACE(dst->dev); + while (remaining) { struct sk_buff *skb2; int frag_size; - struct iphdr *iph; struct udphdr *udph; struct capwaphdr *cwh; - frag_size = min(remaining, dst_mtu(dst) - hlen); + frag_size = min(remaining, max_frame_len - hlen); if (remaining > frag_size) frag_size &= FRAG_OFF_MASK; @@ -317,23 +330,22 @@ static struct sk_buff *fragment(struct sk_buff *skb, const struct vport *vport, skb_reserve(skb2, headroom); __skb_put(skb2, hlen + frag_size); - skb_reset_network_header(skb2); - skb_set_transport_header(skb2, sizeof(struct iphdr)); - /* Copy IP/UDP/CAPWAP header. */ + if (skb_network_offset(skb)) + skb_reset_mac_header(skb2); + skb_set_network_header(skb2, skb_network_offset(skb)); + skb_set_transport_header(skb2, skb_transport_offset(skb)); + + /* Copy (Ethernet)/IP/UDP/CAPWAP header. */ copy_skb_metadata(skb, skb2); - skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen); + skb_copy_from_linear_data(skb, skb2->data, hlen); /* Copy this data chunk. */ if (skb_copy_bits(skb, hlen + offset, skb2->data + hlen, frag_size)) BUG(); - iph = ip_hdr(skb2); - iph->tot_len = hlen + frag_size; - ip_send_check(iph); - udph = udp_hdr(skb2); - udph->len = htons(skb2->len - sizeof(struct iphdr)); + udph->len = htons(skb2->len - skb_transport_offset(skb2)); cwh = capwap_hdr(skb2); if (remaining > frag_size) @@ -356,11 +368,7 @@ static struct sk_buff *fragment(struct sk_buff *skb, const struct vport *vport, goto out; error: - while (result) { - list_cur = result->next; - kfree_skb(result); - result = list_cur; - } + tnl_free_linked_skbs(result); out: kfree_skb(skb); return result; diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c index 0a7092f96..be8fb5343 100644 --- a/datapath/vport-gre.c +++ b/datapath/vport-gre.c @@ -50,41 +50,49 @@ static int gre_hdr_len(const struct tnl_port_config *port_config) return len; } -static struct sk_buff *gre_build_header(struct sk_buff *skb, - const struct vport *vport, - const struct tnl_mutable_config *mutable, - struct dst_entry *dst) +static void gre_build_header(const struct vport *vport, + const struct tnl_mutable_config *mutable, + void *header) { - struct gre_base_hdr *greh = (struct gre_base_hdr *)skb_transport_header(skb); - __be32 *options = (__be32 *)(skb_network_header(skb) + mutable->tunnel_hlen - - GRE_HEADER_SECTION); + struct gre_base_hdr *greh = header; + __be32 *options = (__be32 *)(greh + 1); greh->protocol = htons(ETH_P_TEB); greh->flags = 0; - /* Work backwards over the options so the checksum is last. */ + if (mutable->port_config.flags & TNL_F_CSUM) { + greh->flags |= GRE_CSUM; + *options = 0; + options++; + } + if (mutable->port_config.out_key || - mutable->port_config.flags & TNL_F_OUT_KEY_ACTION) { + mutable->port_config.flags & TNL_F_OUT_KEY_ACTION) greh->flags |= GRE_KEY; - if (mutable->port_config.flags & TNL_F_OUT_KEY_ACTION) - *options = OVS_CB(skb)->tun_id; - else - *options = mutable->port_config.out_key; + if (mutable->port_config.out_key) + *options = mutable->port_config.out_key; +} + +static struct sk_buff *gre_update_header(const struct vport *vport, + const struct tnl_mutable_config *mutable, + struct dst_entry *dst, + struct sk_buff *skb) +{ + __be32 *options = (__be32 *)(skb_network_header(skb) + mutable->tunnel_hlen + - GRE_HEADER_SECTION); + /* Work backwards over the options so the checksum is last. */ + if (mutable->port_config.flags & TNL_F_OUT_KEY_ACTION) { + *options = OVS_CB(skb)->tun_id; options--; } - if (mutable->port_config.flags & TNL_F_CSUM) { - greh->flags |= GRE_CSUM; - - *options = 0; + if (mutable->port_config.flags & TNL_F_CSUM) *(__sum16 *)options = csum_fold(skb_checksum(skb, - sizeof(struct iphdr), - skb->len - sizeof(struct iphdr), + skb_transport_offset(skb), + skb->len - skb_transport_offset(skb), 0)); - } - /* * Allow our local IP stack to fragment the outer packet even if the * DF bit is set as a last resort. @@ -329,6 +337,7 @@ struct tnl_ops gre_tnl_ops = { .ipproto = IPPROTO_GRE, .hdr_len = gre_hdr_len, .build_header = gre_build_header, + .update_header = gre_update_header, }; static struct vport *gre_create(const char *name, const void __user *config) @@ -346,20 +355,14 @@ static int gre_init(void) int err; err = inet_add_protocol(&gre_protocol_handlers, IPPROTO_GRE); - if (err) { + if (err) pr_warn("cannot register gre protocol handler\n"); - goto out; - } - - err = tnl_init(); -out: return err; } static void gre_exit(void) { - tnl_exit(); inet_del_protocol(&gre_protocol_handlers, IPPROTO_GRE); } diff --git a/include/openvswitch/tunnel.h b/include/openvswitch/tunnel.h index 373797513..dd700d0dc 100644 --- a/include/openvswitch/tunnel.h +++ b/include/openvswitch/tunnel.h @@ -48,6 +48,7 @@ #define TNL_F_TOS_INHERIT (1 << 4) /* Inherit the ToS from the inner packet. */ #define TNL_F_TTL_INHERIT (1 << 5) /* Inherit the TTL from the inner packet. */ #define TNL_F_PMTUD (1 << 6) /* Enable path MTU discovery. */ +#define TNL_F_HDR_CACHE (1 << 7) /* Enable tunnel header caching. */ struct tnl_port_config { __u32 flags; -- cgit v1.2.1 From dca9309ae888995f13be5e1bfa607214ca531613 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 30 Aug 2010 15:34:04 -0700 Subject: tunneling: Allow disabling tunnel header caching. Tunnel header caching significantly improves performance by bypassing much of the transmit path. However, in some special cases or for debugging it may be desirable to traverse the entire IP stack. This exposes that as an option (default is to enable header caching). Signed-off-by: Jesse Gross Reviewed-by: Ben Pfaff --- lib/netdev-tunnel.c | 5 +++++ vswitchd/vswitch.xml | 22 ++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/lib/netdev-tunnel.c b/lib/netdev-tunnel.c index d0ecd98e2..fdc1d976a 100644 --- a/lib/netdev-tunnel.c +++ b/lib/netdev-tunnel.c @@ -66,6 +66,7 @@ parse_config(const char *name, const char *type, const struct shash *args, memset(config, 0, sizeof *config); config->flags |= TNL_F_PMTUD; + config->flags |= TNL_F_HDR_CACHE; SHASH_FOR_EACH (node, args) { if (!strcmp(node->name, "remote_ip")) { @@ -121,6 +122,10 @@ parse_config(const char *name, const char *type, const struct shash *args, if (!strcmp(node->data, "false")) { config->flags &= ~TNL_F_PMTUD; } + } else if (!strcmp(node->name, "header_cache")) { + if (!strcmp(node->data, "false")) { + config->flags &= ~TNL_F_HDR_CACHE; + } } else { VLOG_WARN("%s: unknown %s argument '%s'", name, type, node->name); } diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 6e255763d..86fd3f9be 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -539,6 +539,17 @@ compliance with the IEEE 802.1D specification for bridges. Default is enabled, set to false to disable. +
+
header_cache
+
Optional. Enable caching of tunnel headers and the output + path. This can lead to a significant performance increase + without changing behavior. In general it should not be + necessary to adjust this setting. However, the caching can + bypass certain components of the IP stack (such as IP tables) + and it may be useful to disable it if these features are + required or as a debugging measure. Default is enabled, set to + false to disable.
+
capwap
Ethernet tunneling over the UDP transport portion of CAPWAP @@ -594,6 +605,17 @@ compliance with the IEEE 802.1D specification for bridges. Default is enabled, set to false to disable.
+
+
header_cache
+
Optional. Enable caching of tunnel headers and the output + path. This can lead to a significant performance increase + without changing behavior. In general it should not be + necessary to adjust this setting. However, the caching can + bypass certain components of the IP stack (such as IP tables) + and it may be useful to disable it if these features are + required or as a debugging measure. Default is enabled, set to + false to disable.
+
patch
-- cgit v1.2.1 From f10a03343b5dd77a41dfefad150b65863af38a00 Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Thu, 16 Sep 2010 15:37:16 -0700 Subject: debian: Allow automake versions greater than or equal to 1.10 --- debian/control | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/control b/debian/control index b7f2248f9..53e5b98d6 100644 --- a/debian/control +++ b/debian/control @@ -4,8 +4,8 @@ Priority: extra Maintainer: Open vSwitch developers Uploaders: Ben Pfaff , Simon Horman Build-Depends: - debhelper (>= 5), autoconf (>= 2.64), automake1.10, libssl-dev, - pkg-config (>= 0.21), po-debconf, bzip2, openssl, python, + debhelper (>= 5), autoconf (>= 2.64), automake (>= 1.10) | automake1.10, + libssl-dev, pkg-config (>= 0.21), po-debconf, bzip2, openssl, python, python-support (>= 0.8.4) Standards-Version: 3.9.1 Homepage: http://openvswitch.org/ -- cgit v1.2.1 From a3acf0b0c46a28d6c891086e054d81dd915eea2e Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Thu, 16 Sep 2010 19:19:11 -0700 Subject: debian: Add support for GRE-over-IPsec The ovs-monitor-ipsec daemon monitors the Interface table for GRE entries. If an entry specifies other-config parameters "ipsec-local-ip" and ("ipsec-psk" or "ipsec-cert"), it will create the appropriate security associations so that all GRE traffic to the remote host will be encrypted. In order for the two GRE tunnels to communicate, both sides need to be configured for IPsec with appropriate authentication. Currently, ovs-monitor-ipsec does not support certificate authentication or ensure that an interface is actually attached to a bridge. Both of these issues will be addressed in a forthcoming patch. NB: While GRE-over-IPsec should work on any system with a relatively recent racoon and setkey, it has only been tested on Debian. As such, only Debian packaging has been provided. --- debian/.gitignore | 1 + debian/automake.mk | 4 + debian/control | 16 +- debian/openvswitch-ipsec.dirs | 1 + debian/openvswitch-ipsec.init | 184 +++++++++++++++++++++ debian/openvswitch-ipsec.install | 1 + debian/ovs-monitor-ipsec | 349 +++++++++++++++++++++++++++++++++++++++ vswitchd/vswitch.ovsschema | 2 + vswitchd/vswitch.xml | 21 +++ 9 files changed, 578 insertions(+), 1 deletion(-) create mode 100644 debian/openvswitch-ipsec.dirs create mode 100755 debian/openvswitch-ipsec.init create mode 100644 debian/openvswitch-ipsec.install create mode 100755 debian/ovs-monitor-ipsec diff --git a/debian/.gitignore b/debian/.gitignore index 7f43aa6ed..24e62d94b 100644 --- a/debian/.gitignore +++ b/debian/.gitignore @@ -12,6 +12,7 @@ /openvswitch-controller /openvswitch-datapath-source /openvswitch-dbg +/openvswitch-ipsec /openvswitch-pki /openvswitch-pki-server /openvswitch-switch diff --git a/debian/automake.mk b/debian/automake.mk index c768d56b5..20432062a 100644 --- a/debian/automake.mk +++ b/debian/automake.mk @@ -24,6 +24,9 @@ EXTRA_DIST += \ debian/openvswitch-datapath-source.copyright \ debian/openvswitch-datapath-source.dirs \ debian/openvswitch-datapath-source.install \ + debian/openvswitch-ipsec.dirs \ + debian/openvswitch-ipsec.init \ + debian/openvswitch-ipsec.install \ debian/openvswitch-pki-server.apache2 \ debian/openvswitch-pki-server.dirs \ debian/openvswitch-pki-server.install \ @@ -39,6 +42,7 @@ EXTRA_DIST += \ debian/openvswitch-switch.postrm \ debian/openvswitch-switch.template \ debian/ovs-bugtool \ + debian/ovs-monitor-ipsec \ debian/python-openvswitch.dirs \ debian/python-openvswitch.install \ debian/rules \ diff --git a/debian/control b/debian/control index 53e5b98d6..622daeb3b 100644 --- a/debian/control +++ b/debian/control @@ -41,6 +41,19 @@ Description: Open vSwitch switch implementations . Open vSwitch is a full-featured software-based Ethernet switch. +Package: openvswitch-ipsec +Architecture: any +Depends: + ${shlibs:Depends}, ${misc:Depends}, ${python:Depends}, ipsec-tools, racoon, + openvswitch-common (= ${binary:Version}), + openvswitch-switch (= ${binary:Version}), + python-openvswitch (= ${binary:Version}) +Description: Open vSwitch GRE-over-IPsec support + The ovs-monitor-ipsec script provides support for encrypting GRE + tunnels with IPsec. + . + Open vSwitch is a full-featured software-based Ethernet switch. + Package: openvswitch-pki Architecture: all Depends: @@ -90,13 +103,14 @@ Depends: ${shlibs:Depends}, ${misc:Depends}, openvswitch-common (= ${binary:Version}), openvswitch-controller (= ${binary:Version}), + openvswitch-ipsec (= ${binary:Version}), openvswitch-switch (= ${binary:Version}) Description: Debug symbols for Open vSwitch packages This package contains the debug symbols for all the other openvswitch-* packages. Install it to debug one of them or to examine a core dump produced by one of them. -Package: python-openvswitch +Package: python-openvswitch Architecture: all Section: python Depends: ${python:Depends}, openvswitch-switch (= ${binary:Version}) diff --git a/debian/openvswitch-ipsec.dirs b/debian/openvswitch-ipsec.dirs new file mode 100644 index 000000000..02130d0e9 --- /dev/null +++ b/debian/openvswitch-ipsec.dirs @@ -0,0 +1 @@ +usr/share/openvswitch/scripts diff --git a/debian/openvswitch-ipsec.init b/debian/openvswitch-ipsec.init new file mode 100755 index 000000000..f3c9a13a0 --- /dev/null +++ b/debian/openvswitch-ipsec.init @@ -0,0 +1,184 @@ +#!/bin/sh +# +# Copyright (c) 2007, 2009 Javier Fernandez-Sanguino +# +# This is free software; you may redistribute it and/or modify +# it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2, +# or (at your option) any later version. +# +# This is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License with +# the Debian operating system, in /usr/share/common-licenses/GPL; if +# not, write to the Free Software Foundation, Inc., 59 Temple Place, +# Suite 330, Boston, MA 02111-1307 USA +# +### BEGIN INIT INFO +# Provides: openvswitch-ipsec +# Required-Start: $network $local_fs $remote_fs +# Required-Stop: $remote_fs +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: Open vSwitch GRE-over-IPsec daemon +### END INIT INFO + +PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin + +DAEMON=/usr/share/openvswitch/scripts/ovs-monitor-ipsec # Daemon's location +NAME=ovs-monitor-ipsec # Introduce the short server's name here +LOGDIR=/var/log/openvswitch # Log directory to use + +PIDFILE=/var/run/openvswitch/$NAME.pid + +test -x $DAEMON || exit 0 + +. /lib/lsb/init-functions + +DODTIME=10 # Time to wait for the server to die, in seconds + # If this value is set too low you might not + # let some servers to die gracefully and + # 'restart' will not work + +set -e + +running_pid() { +# Check if a given process pid's cmdline matches a given name + pid=$1 + name=$2 + [ -z "$pid" ] && return 1 + [ ! -d /proc/$pid ] && return 1 + cmd=`cat /proc/$pid/cmdline | tr "\000" " "|cut -d " " -f 2` + # Is this the expected server + [ "$cmd" != "$name" ] && return 1 + return 0 +} + +running() { +# Check if the process is running looking at /proc +# (works for all users) + + # No pidfile, probably no daemon present + [ ! -f "$PIDFILE" ] && return 1 + pid=`cat $PIDFILE` + running_pid $pid $DAEMON || return 1 + return 0 +} + +start_server() { + PYTHONPATH=/usr/share/openvswitch/python \ + /usr/share/openvswitch/scripts/ovs-monitor-ipsec \ + --pidfile-name=$PIDFILE --detach --monitor \ + unix:/var/run/openvswitch/db.sock + + return 0 +} + +stop_server() { + if [ -e $PIDFILE ]; then + kill `cat $PIDFILE` + fi + + return 0 +} + +force_stop() { +# Force the process to die killing it manually + [ ! -e "$PIDFILE" ] && return + if running ; then + kill -15 $pid + # Is it really dead? + sleep "$DIETIME"s + if running ; then + kill -9 $pid + sleep "$DIETIME"s + if running ; then + echo "Cannot kill $NAME (pid=$pid)!" + exit 1 + fi + fi + fi + rm -f $PIDFILE +} + + +case "$1" in + start) + log_daemon_msg "Starting $NAME" + # Check if it's running first + if running ; then + log_progress_msg "apparently already running" + log_end_msg 0 + exit 0 + fi + if start_server && running ; then + # It's ok, the server started and is running + log_end_msg 0 + else + # Either we could not start it or it is not running + # after we did + # NOTE: Some servers might die some time after they start, + # this code does not try to detect this and might give + # a false positive (use 'status' for that) + log_end_msg 1 + fi + ;; + stop) + log_daemon_msg "Stopping $NAME" + if running ; then + # Only stop the server if we see it running + stop_server + log_end_msg $? + else + # If it's not running don't do anything + log_progress_msg "apparently not running" + log_end_msg 0 + exit 0 + fi + ;; + force-stop) + # First try to stop gracefully the program + $0 stop + if running; then + # If it's still running try to kill it more forcefully + log_daemon_msg "Stopping (force) $NAME" + force_stop + log_end_msg $? + fi + ;; + restart|force-reload) + log_daemon_msg "Restarting $NAME" + stop_server + # Wait some sensible amount, some server need this + [ -n "$DIETIME" ] && sleep $DIETIME + start_server + running + log_end_msg $? + ;; + status) + log_daemon_msg "Checking status of $NAME" + if running ; then + log_progress_msg "running" + log_end_msg 0 + else + log_progress_msg "apparently not running" + log_end_msg 1 + exit 1 + fi + ;; + # Use this if the daemon cannot reload + reload) + log_warning_msg "Reloading $NAME daemon: not implemented, as the daemon" + log_warning_msg "cannot re-read the config file (use restart)." + ;; + *) + N=/etc/init.d/openvswitch-ipsec + echo "Usage: $N {start|stop|force-stop|restart|force-reload|status}" >&2 + exit 1 + ;; +esac + +exit 0 diff --git a/debian/openvswitch-ipsec.install b/debian/openvswitch-ipsec.install new file mode 100644 index 000000000..72cacfa25 --- /dev/null +++ b/debian/openvswitch-ipsec.install @@ -0,0 +1 @@ +debian/ovs-monitor-ipsec usr/share/openvswitch/scripts diff --git a/debian/ovs-monitor-ipsec b/debian/ovs-monitor-ipsec new file mode 100755 index 000000000..1caece3a9 --- /dev/null +++ b/debian/ovs-monitor-ipsec @@ -0,0 +1,349 @@ +#!/usr/bin/python +# Copyright (c) 2009, 2010 Nicira Networks +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# A daemon to monitor attempts to create GRE-over-IPsec tunnels. +# Uses racoon and setkey to support the configuration. Assumes that +# OVS has complete control over IPsec configuration for the box. + +# xxx To-do: +# - Doesn't actually check that Interface is connected to bridge +# - Doesn't support cert authentication + + +import getopt +import logging, logging.handlers +import os +import stat +import subprocess +import sys + +from ovs.db import error +from ovs.db import types +import ovs.util +import ovs.daemon +import ovs.db.idl + + +# By default log messages as DAEMON into syslog +s_log = logging.getLogger("ovs-monitor-ipsec") +l_handler = logging.handlers.SysLogHandler( + "/dev/log", + facility=logging.handlers.SysLogHandler.LOG_DAEMON) +l_formatter = logging.Formatter('%(filename)s: %(levelname)s: %(message)s') +l_handler.setFormatter(l_formatter) +s_log.addHandler(l_handler) + + +setkey = "/usr/sbin/setkey" + +# Class to configure the racoon daemon, which handles IKE negotiation +class Racoon: + # Default locations for files + conf_file = "/etc/racoon/racoon.conf" + cert_file = "/etc/racoon/certs" + psk_file = "/etc/racoon/psk.txt" + + # Default racoon configuration file we use for IKE + conf_template = """# Configuration file generated by Open vSwitch +# +# Do not modify by hand! + +path pre_shared_key "/etc/racoon/psk.txt"; +path certificate "/etc/racoon/certs"; + +remote anonymous { + exchange_mode main; + proposal { + encryption_algorithm aes; + hash_algorithm sha1; + authentication_method pre_shared_key; + dh_group 2; + } +} + +sainfo anonymous { + pfs_group 2; + lifetime time 1 hour; + encryption_algorithm aes; + authentication_algorithm hmac_sha1, hmac_md5; + compression_algorithm deflate; +} +""" + + def __init__(self): + self.psk_hosts = {} + self.cert_hosts = {} + + # Replace racoon's conf file with our template + f = open(Racoon.conf_file, "w") + f.write(Racoon.conf_template) + f.close() + + # Clear out any pre-shared keys + self.commit_psk() + + self.reload() + + def reload(self): + exitcode = subprocess.call(["/etc/init.d/racoon", "reload"]) + if exitcode != 0: + s_log.warning("couldn't reload racoon") + + def commit_psk(self): + f = open(Racoon.psk_file, 'w') + + # The file must only be accessible by root + os.chmod(Racoon.psk_file, stat.S_IRUSR | stat.S_IWUSR) + + f.write("# Generated by Open vSwitch...do not modify by hand!\n\n") + for host, psk in self.psk_hosts.iteritems(): + f.write("%s %s\n" % (host, psk)) + f.close() + + def add_psk(self, host, psk): + self.psk_hosts[host] = psk + self.commit_psk() + + def del_psk(self, host): + if host in self.psk_hosts: + del self.psk_hosts[host] + self.commit_psk() + + +# Class to configure IPsec on a system using racoon for IKE and setkey +# for maintaining the Security Association Database (SAD) and Security +# Policy Database (SPD). Only policies for GRE are supported. +class IPsec: + def __init__(self): + self.sad_flush() + self.spd_flush() + self.racoon = Racoon() + + def call_setkey(self, cmds): + try: + p = subprocess.Popen([setkey, "-c"], stdin=subprocess.PIPE, + stdout=subprocess.PIPE) + except: + s_log.error("could not call setkey") + sys.exit(1) + + # xxx It is safer to pass the string into the communicate() + # xxx method, but it didn't work for slightly longer commands. + # xxx An alternative may need to be found. + p.stdin.write(cmds) + return p.communicate()[0] + + def get_spi(self, local_ip, remote_ip, proto="esp"): + # Run the setkey dump command to retrieve the SAD. Then, parse + # the output looking for SPI buried in the output. Note that + # multiple SAD entries can exist for the same "flow", since an + # older entry could be in a "dying" state. + spi_list = [] + host_line = "%s %s" % (local_ip, remote_ip) + results = self.call_setkey("dump ;").split("\n") + for i in range(len(results)): + if results[i].strip() == host_line: + # The SPI is in the line following the host pair + spi_line = results[i+1] + if (spi_line[1:4] == proto): + spi = spi_line.split()[2] + spi_list.append(spi.split('(')[1].rstrip(')')) + return spi_list + + def sad_flush(self): + self.call_setkey("flush;") + + def sad_del(self, local_ip, remote_ip): + # To delete all SAD entries, we should be able to use setkey's + # "deleteall" command. Unfortunately, it's fundamentally broken + # on Linux and not documented as such. + cmds = "" + + # Delete local_ip->remote_ip SAD entries + spi_list = self.get_spi(local_ip, remote_ip) + for spi in spi_list: + cmds += "delete %s %s esp %s;\n" % (local_ip, remote_ip, spi) + + # Delete remote_ip->local_ip SAD entries + spi_list = self.get_spi(remote_ip, local_ip) + for spi in spi_list: + cmds += "delete %s %s esp %s;\n" % (remote_ip, local_ip, spi) + + if cmds: + self.call_setkey(cmds) + + def spd_flush(self): + self.call_setkey("spdflush;") + + def spd_add(self, local_ip, remote_ip): + cmds = ("spdadd %s %s gre -P out ipsec esp/transport//default;" % + (local_ip, remote_ip)) + cmds += "\n" + cmds += ("spdadd %s %s gre -P in ipsec esp/transport//default;" % + (remote_ip, local_ip)) + self.call_setkey(cmds) + + def spd_del(self, local_ip, remote_ip): + cmds = "spddelete %s %s gre -P out;" % (local_ip, remote_ip) + cmds += "\n" + cmds += "spddelete %s %s gre -P in;" % (remote_ip, local_ip) + self.call_setkey(cmds) + + def ipsec_cert_del(self, local_ip, remote_ip): + # Need to support cert...right now only PSK supported + self.racoon.del_psk(remote_ip) + self.spd_del(local_ip, remote_ip) + self.sad_del(local_ip, remote_ip) + + def ipsec_cert_update(self, local_ip, remote_ip, cert): + # Need to support cert...right now only PSK supported + self.racoon.add_psk(remote_ip, "abc12345") + self.spd_add(local_ip, remote_ip) + + def ipsec_psk_del(self, local_ip, remote_ip): + self.racoon.del_psk(remote_ip) + self.spd_del(local_ip, remote_ip) + self.sad_del(local_ip, remote_ip) + + def ipsec_psk_update(self, local_ip, remote_ip, psk): + self.racoon.add_psk(remote_ip, psk) + self.spd_add(local_ip, remote_ip) + + +def keep_table_columns(schema, table_name, column_types): + table = schema.tables.get(table_name) + if not table: + raise error.Error("schema has no %s table" % table_name) + + new_columns = {} + for column_name, column_type in column_types.iteritems(): + column = table.columns.get(column_name) + if not column: + raise error.Error("%s table schema lacks %s column" + % (table_name, column_name)) + if column.type != column_type: + raise error.Error("%s column in %s table has type \"%s\", " + "expected type \"%s\"" + % (column_name, table_name, + column.type.toEnglish(), + column_type.toEnglish())) + new_columns[column_name] = column + table.columns = new_columns + return table + +def monitor_uuid_schema_cb(schema): + string_type = types.Type(types.BaseType(types.StringType)) + string_map_type = types.Type(types.BaseType(types.StringType), + types.BaseType(types.StringType), + 0, sys.maxint) + + new_tables = {} + new_tables["Interface"] = keep_table_columns( + schema, "Interface", {"name": string_type, + "type": string_type, + "options": string_map_type, + "other_config": string_map_type}) + schema.tables = new_tables + +def usage(): + print "usage: %s [OPTIONS] DATABASE" % sys.argv[0] + print "where DATABASE is a socket on which ovsdb-server is listening." + ovs.daemon.usage() + print "Other options:" + print " -h, --help display this help message" + sys.exit(0) + +def main(argv): + try: + options, args = getopt.gnu_getopt( + argv[1:], 'h', ['help'] + ovs.daemon.LONG_OPTIONS) + except getopt.GetoptError, geo: + sys.stderr.write("%s: %s\n" % (ovs.util.PROGRAM_NAME, geo.msg)) + sys.exit(1) + + for key, value in options: + if key in ['-h', '--help']: + usage() + elif not ovs.daemon.parse_opt(key, value): + sys.stderr.write("%s: unhandled option %s\n" + % (ovs.util.PROGRAM_NAME, key)) + sys.exit(1) + + if len(args) != 1: + sys.stderr.write("%s: exactly one nonoption argument is required " + "(use --help for help)\n" % ovs.util.PROGRAM_NAME) + sys.exit(1) + + ovs.daemon.die_if_already_running() + + remote = args[0] + idl = ovs.db.idl.Idl(remote, "Open_vSwitch", monitor_uuid_schema_cb) + + ovs.daemon.daemonize() + + ipsec = IPsec() + + interfaces = {} + while True: + if not idl.run(): + poller = ovs.poller.Poller() + idl.wait(poller) + poller.block() + continue + + new_interfaces = {} + for rec in idl.data["Interface"].itervalues(): + name = rec.name.as_scalar() + local_ip = rec.other_config.get("ipsec_local_ip") + if rec.type.as_scalar() == "gre" and local_ip: + new_interfaces[name] = { + "remote_ip": rec.options.get("remote_ip"), + "local_ip": local_ip, + "ipsec_cert": rec.other_config.get("ipsec_cert"), + "ipsec_psk": rec.other_config.get("ipsec_psk") } + + if interfaces != new_interfaces: + for name, vals in interfaces.items(): + if name not in new_interfaces.keys(): + ipsec.ipsec_cert_del(vals["local_ip"], vals["remote_ip"]) + for name, vals in new_interfaces.items(): + if vals == interfaces.get(name): + s_log.warning( + "configuration changed for %s, need to delete " + "interface first" % name) + continue + + if vals["ipsec_cert"]: + ipsec.ipsec_cert_update(vals["local_ip"], + vals["remote_ip"], vals["ipsec_cert"]) + elif vals["ipsec_psk"]: + ipsec.ipsec_psk_update(vals["local_ip"], + vals["remote_ip"], vals["ipsec_psk"]) + else: + s_log.warning( + "no ipsec_cert or ipsec_psk defined for %s" % name) + continue + + interfaces = new_interfaces + +if __name__ == '__main__': + try: + main(sys.argv) + except SystemExit: + # Let system.exit() calls complete normally + raise + except: + s_log.exception("traceback") diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema index a7d257036..07dd79fbc 100644 --- a/vswitchd/vswitch.ovsschema +++ b/vswitchd/vswitch.ovsschema @@ -134,6 +134,8 @@ "ofport": { "type": {"key": "integer", "min": 0, "max": 1}, "ephemeral": true}, + "other_config": { + "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, "statistics": { "type": {"key": "string", "value": "integer", "min": 0, "max": "unlimited"}, "ephemeral": true}, diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 86fd3f9be..5b5655ddd 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -775,6 +775,27 @@ + + Key-value pairs for rarely used interface features. Currently, + the only keys are for configuring GRE-over-IPsec, which is only + available through the openvswitch-ipsec package for + Debian. The currently defined key-value pairs are: +
+
ipsec-local-ip
+
Required key for GRE-over-IPsec interfaces. Additionally, + the must be gre and the + ipsec-psk key must + be set. The in_key, out_key, and + key must not be + set.
+
ipsec-psk
+
Required key for GRE-over-IPsec interfaces. Specifies a + pre-shared key for authentication that must be identical on + both sides of the tunnel. Additionally, the + ipsec-local-ip key must also be set.
+
+
+

Key-value pairs that report interface statistics. The current -- cgit v1.2.1 From e6494c64e35f62411f770be086ba6a0914afaf5d Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Wed, 22 Sep 2010 22:52:04 -0700 Subject: vswitch: Reference ipsec_local_ip and ipsec_psk with underscores The GRE-over-IPsec the documentation describes "ipsec-local-ip" and "ipsec-psk" when they actually use underscores. --- vswitchd/vswitch.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 5b5655ddd..bb3f648fb 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -781,18 +781,18 @@ available through the openvswitch-ipsec package for Debian. The currently defined key-value pairs are:

-
ipsec-local-ip
+
ipsec_local_ip
Required key for GRE-over-IPsec interfaces. Additionally, the must be gre and the - ipsec-psk key must + ipsec_psk key must be set. The in_key, out_key, and key must not be set.
-
ipsec-psk
+
ipsec_psk
Required key for GRE-over-IPsec interfaces. Specifies a pre-shared key for authentication that must be identical on both sides of the tunnel. Additionally, the - ipsec-local-ip key must also be set.
+ ipsec_local_ip key must also be set.
-- cgit v1.2.1 From b13300c7178527f85937604022a755e174a0e1aa Mon Sep 17 00:00:00 2001 From: Ethan Jackson Date: Tue, 21 Sep 2010 23:57:13 -0700 Subject: xenserver: Only put the primary XenServer UUID in default bridge-id This patch defensively guarantees that the first id in xs-network-uuids will belong to the primary network (as opposed to a vlan). Given that the primary network id comes first, it parses xs-network-ids and only copies the primary id to bridge-id when monitor-external-ids is run. Feature #3647 Signed-off-by: Ethan Jackson Reviewed-by: Ben Pfaff --- tests/interface-reconfigure.at | 6 +++--- xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py | 7 ++++++- xenserver/usr_share_openvswitch_scripts_monitor-external-ids | 10 ++++++++-- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/tests/interface-reconfigure.at b/tests/interface-reconfigure.at index 880f4a508..8566102d7 100644 --- a/tests/interface-reconfigure.at +++ b/tests/interface-reconfigure.at @@ -802,7 +802,7 @@ Applying changes to /etc/sysconfig/network-scripts/ifcfg-xapi1 configuration --fake-iface add-bond xapi1 bond0 eth0 eth1 set Port bond0 MAC="00:22:19:22:4b:af" bond_downdelay=200 other-config:"bond-miimon"=100 other-config:"bond-use_carrier"=1 other-config:"bond-mode"="balance-slb" bond_updelay=31000 set Bridge xapi1 other-config:hwaddr="00:22:19:22:4b:af" - br-set-external-id xapi1 xs-network-uuids 99be2da4-6c33-6f8e-49ea-3bc592fe3c85;45cbbb43-113d-a712-3231-c6463f253cef + br-set-external-id xapi1 xs-network-uuids 45cbbb43-113d-a712-3231-c6463f253cef;99be2da4-6c33-6f8e-49ea-3bc592fe3c85 /sbin/ifup xapi1 action_up: bring up bond0 /sbin/ifconfig bond0 up @@ -883,10 +883,10 @@ Applying changes to /etc/sysconfig/network-scripts/ifcfg-xapi2 configuration --fake-iface add-bond xapi1 bond0 eth0 eth1 set Port bond0 MAC="00:22:19:22:4b:af" bond_downdelay=200 other-config:"bond-miimon"=100 other-config:"bond-use_carrier"=1 other-config:"bond-mode"="balance-slb" bond_updelay=31000 set Bridge xapi1 other-config:hwaddr="00:22:19:22:4b:af" - br-set-external-id xapi1 xs-network-uuids 99be2da4-6c33-6f8e-49ea-3bc592fe3c85;45cbbb43-113d-a712-3231-c6463f253cef + br-set-external-id xapi1 xs-network-uuids 45cbbb43-113d-a712-3231-c6463f253cef;99be2da4-6c33-6f8e-49ea-3bc592fe3c85 --if-exists del-br xapi2 --may-exist add-br xapi2 xapi1 4 - br-set-external-id xapi2 xs-network-uuids 99be2da4-6c33-6f8e-49ea-3bc592fe3c85;45cbbb43-113d-a712-3231-c6463f253cef + br-set-external-id xapi2 xs-network-uuids 45cbbb43-113d-a712-3231-c6463f253cef;99be2da4-6c33-6f8e-49ea-3bc592fe3c85 set Interface xapi2 MAC="00:22:19:22:4b:af" /sbin/ifup xapi2 action_up: bring up bond0 diff --git a/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py b/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py index bc311f803..c352594ac 100644 --- a/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py +++ b/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py @@ -342,7 +342,12 @@ def set_br_external_ids(pif): # log("Network PIF %s not currently attached (%s)" % (rec['uuid'],pifrec['uuid'])) # continue nwrec = db().get_network_record(rec['network']) - xs_network_uuids += [nwrec['uuid']] + + uuid = nwrec['uuid'] + if pif_is_vlan(nwpif): + xs_network_uuids.append(uuid) + else: + xs_network_uuids.insert(0, uuid) vsctl_argv = [] vsctl_argv += ['# configure xs-network-uuids'] diff --git a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids index 45b3dd7bf..2c2844c63 100755 --- a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids +++ b/xenserver/usr_share_openvswitch_scripts_monitor-external-ids @@ -116,8 +116,14 @@ def update_network_uuids(name, ids): def update_bridge_id(name, ids): id = get_bridge_id(name, ids.get("xs-network-uuids")) - if ids.get("bridge-id") != id and id: - set_external_id("Bridge", name, "bridge-id", id) + + if not id: + return + + primary_id = id.split(";")[0] + + if ids.get("bridge-id") != primary_id: + set_external_id("Bridge", name, "bridge-id", primary_id) def update_iface_id(name, ids): id = get_iface_id(name, ids.get("xs-vif-uuid")) -- cgit v1.2.1 From 88cd3cc9f41f916fd51ad1cd02cbbfb50d9b476d Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 23 Sep 2010 10:55:15 +0900 Subject: debian: update copyright for xenserver/ Further to the recent work done by Ben Pfaff and Ian Campbell to clarify the license of all the files in xenserver/ the debian/copyright.in file seems to need updating. Signed-off-by: Simon Horman Signed-off-by: Ben Pfaff --- debian/copyright.in | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 1 deletion(-) diff --git a/debian/copyright.in b/debian/copyright.in index 6a7370e39..6143f1869 100644 --- a/debian/copyright.in +++ b/debian/copyright.in @@ -15,7 +15,54 @@ Upstream Copyright Holders: License: -* The following components are licensed under the GNU General Public Licence +* The following components are licensed under the + GNU Lesser General Public Licence version 2.1 only + with the exception clause below as a pre-amble. + + xenserver/etc_xensource_scripts_vif + xenserver/opt_xensource_libexec_InterfaceReconfigure.py + xenserver/opt_xensource_libexec_InterfaceReconfigureBridge.py + xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py + xenserver/opt_xensource_libexec_interface-reconfigure + xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py + xenserver/usr_sbin_xen-bugtool + + * These components are only distributed in the source package. + They do not appear in any binary packages. + + On Debian systems, the complete text of the + GNU Lesser General Public Licence version 2.1 can be found in + `/usr/share/common-licenses/LGPL-2.1' + + The exception clause pre-amble reads: + + As a special exception to the GNU Lesser General Public License, you + may link, statically or dynamically, a "work that uses the Library" + with a publicly distributed version of the Library to produce an + executable file containing portions of the Library, and distribute + that executable file under terms of your choice, without any of the + additional requirements listed in clause 6 of the GNU Lesser General + Public License. By "a publicly distributed version of the Library", + we mean either the unmodified Library as distributed, or a + modified version of the Library that is distributed under the + conditions defined in clause 3 of the GNU Library General Public + License. This exception does not however invalidate any other reasons + why the executable file might be covered by the GNU Lesser General + Public License. + +* The following component is licensed under the + GNU Lesser General Public Licence version 2.1. + + xenserver/usr_sbin_xen-bugtool + + * This component is only distributed in the source package. + It does not appear in any binary packages. + + On Debian systems, the complete text of the + GNU Lesser General Public Licence version 2.1 can be found in + `/usr/share/common-licenses/LGPL-2.1' + +* The following component is licensed under the GNU General Public Licence version 2. datapath/ @@ -33,6 +80,63 @@ License: On Debian systems, the complete text of the Apache License version 2.0 can be found in '/usr/share/common-licenses/Apache-2.0'. +* The following component is licenced under the + Python Software Foundation License Version 2. + + xenserver/uuid.py + + * This component is only distributed in the source package. + It does not appear in any binary packages. + + PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 + -------------------------------------------- + + 1. This LICENSE AGREEMENT is between the Python Software Foundation + ("PSF"), and the Individual or Organization ("Licensee") accessing and + otherwise using this software ("Python") in source or binary form and + its associated documentation. + + 2. Subject to the terms and conditions of this License Agreement, PSF + hereby grants Licensee a nonexclusive, royalty-free, world-wide + license to reproduce, analyze, test, perform and/or display publicly, + prepare derivative works, distribute, and otherwise use Python + alone or in any derivative version, provided, however, that PSF's + License Agreement and PSF's notice of copyright, i.e., "Copyright (c) + 2001, 2002, 2003, 2004, 2005, 2006, 2007 Python Software Foundation; + All Rights Reserved" are retained in Python alone or in any derivative + version prepared by Licensee. + + 3. In the event Licensee prepares a derivative work that is based on + or incorporates Python or any part thereof, and wants to make + the derivative work available to others as provided herein, then + Licensee hereby agrees to include in any such work a brief summary of + the changes made to Python. + + 4. PSF is making Python available to Licensee on an "AS IS" + basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR + IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND + DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS + FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT + INFRINGE ANY THIRD PARTY RIGHTS. + + 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON + FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS + A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, + OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + + 6. This License Agreement will automatically terminate upon a material + breach of its terms and conditions. + + 7. Nothing in this License Agreement shall be deemed to create any + relationship of agency, partnership, or joint venture between PSF and + Licensee. This License Agreement does not grant permission to use PSF + trademarks or trade name in a trademark sense to endorse or promote + products or services of Licensee, or any third party. + + 8. By copying, installing or otherwise using Python, Licensee + agrees to be bound by the terms and conditions of this License + Agreement. + * ovs-bugtool is covered by the following license: This library is free software; you can redistribute it and/or -- cgit v1.2.1 From 1095d0996bf83e99eab0be964b66403d3fff76f5 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 23 Sep 2010 11:20:10 -0700 Subject: debian: additional copyright update for xenserver/ Signed-off-by: Simon Horman Signed-off-by: Ben Pfaff --- debian/copyright.in | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/debian/copyright.in b/debian/copyright.in index 6143f1869..f131ea92d 100644 --- a/debian/copyright.in +++ b/debian/copyright.in @@ -50,10 +50,11 @@ License: why the executable file might be covered by the GNU Lesser General Public License. -* The following component is licensed under the +* The following components are licensed under the GNU Lesser General Public Licence version 2.1. - xenserver/usr_sbin_xen-bugtool + debian/ovs-bugtool + xenserver/usr_sbin_xen-bugtool [*] * This component is only distributed in the source package. It does not appear in any binary packages. @@ -137,27 +138,6 @@ License: agrees to be bound by the terms and conditions of this License Agreement. -* ovs-bugtool is covered by the following license: - - This library is free software; you can redistribute it and/or - modify it under the terms of version 2.1 of the GNU Lesser General Public - License as published by the Free Software Foundation. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - - Copyright (c) 2005, 2007 XenSource Ltd. - Copyright (c) 2010, Nicira Networks. - - On Debian systems, the complete text of the GNU Lesser General Public - License can be found in `/usr/share/common-licenses/LGPL-2.1'. - * All other components of this package are licensed under The Apache License Version 2.0. -- cgit v1.2.1 From 5136364f41b74de00f86873a6f5be4c8a19cb5ad Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 13 Sep 2010 13:29:57 -0700 Subject: vlog: Add VLOG_WARN_ONCE() and similar macros. --- lib/dpif-linux.c | 6 +----- lib/vlog.h | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index 2c688e3af..ec8a952b3 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -731,11 +731,7 @@ get_major(const char *target) return major; } } else { - static bool warned; - if (!warned) { - VLOG_WARN("%s:%d: syntax error", fn, ln); - } - warned = true; + VLOG_WARN_ONCE("%s:%d: syntax error", fn, ln); } } diff --git a/lib/vlog.h b/lib/vlog.h index 03f17ea56..a4e143c1b 100644 --- a/lib/vlog.h +++ b/lib/vlog.h @@ -181,6 +181,12 @@ void vlog_rate_limit(const struct vlog_module *, enum vlog_level, #define VLOG_DROP_INFO(RL) vlog_should_drop(THIS_MODULE, VLL_INFO, RL) #define VLOG_DROP_DBG(RL) vlog_should_drop(THIS_MODULE, VLL_DBG, RL) +/* Macros for logging at most once per execution. */ +#define VLOG_ERR_ONCE(...) VLOG_ONCE(VLL_ERR, __VA_ARGS__) +#define VLOG_WARN_ONCE(...) VLOG_ONCE(VLL_WARN, __VA_ARGS__) +#define VLOG_INFO_ONCE(...) VLOG_ONCE(VLL_INFO, __VA_ARGS__) +#define VLOG_DBG_ONCE(...) VLOG_ONCE(VLL_DBG, __VA_ARGS__) + /* Command line processing. */ #define VLOG_OPTION_ENUMS OPT_LOG_FILE #define VLOG_LONG_OPTIONS \ @@ -208,6 +214,15 @@ void vlog_usage(void); vlog_rate_limit(THIS_MODULE, LEVEL, RL, __VA_ARGS__); \ } \ } while (0) +#define VLOG_ONCE(LEVEL, ...) \ + do { \ + static bool already_logged; \ + if (!already_logged) { \ + already_logged = true; \ + vlog(THIS_MODULE, LEVEL, __VA_ARGS__); \ + } \ + } while (0) + #define VLOG_DEFINE_MODULE__(MODULE) \ struct vlog_module VLM_##MODULE = \ { \ -- cgit v1.2.1 From cbbdf81cf8bfcc87e141f66b93bf3bcf1220bff8 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 21 Sep 2010 14:27:02 -0700 Subject: daemon: Report number of crashes on monitor process command line. --- lib/daemon.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/daemon.c b/lib/daemon.c index 548650464..6b61879db 100644 --- a/lib/daemon.c +++ b/lib/daemon.c @@ -330,11 +330,13 @@ monitor_daemon(pid_t daemon_pid) const char *saved_program_name; time_t last_restart; char *status_msg; + int crashes; saved_program_name = program_name; program_name = xasprintf("monitor(%s)", program_name); status_msg = xstrdup("healthy"); last_restart = TIME_MIN; + crashes = 0; for (;;) { int retval; int status; @@ -352,7 +354,8 @@ monitor_daemon(pid_t daemon_pid) } else if (retval == daemon_pid) { char *s = process_status_msg(status); free(status_msg); - status_msg = xasprintf("pid %lu died, %s", + status_msg = xasprintf("%d crashes: pid %lu died, %s", + ++crashes, (unsigned long int) daemon_pid, s); free(s); -- cgit v1.2.1 From e4bd5e2a6c4223fd9cfb2dd1ad82a4eeb1b1fbe6 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 23 Sep 2010 09:39:47 -0700 Subject: daemon: Fix behavior of read_pidfile() for our own pidfile. Opening a file descriptor and then closing it always discards any locks held on the underlying file, even if the file is still open as another file descriptor. This meant that calling read_pidfile() on the process's own pidfile would discard the lock and make other OVS processes think that the process had died. This commit fixes the problem. --- lib/daemon.c | 25 +++++++++++++++++++++++++ python/ovs/daemon.py | 23 ++++++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/lib/daemon.c b/lib/daemon.c index 6b61879db..bbcfe6afc 100644 --- a/lib/daemon.c +++ b/lib/daemon.c @@ -42,6 +42,10 @@ static bool detach; /* --pidfile: Name of pidfile (null if none). */ static char *pidfile; +/* Device and inode of pidfile, so we can avoid reopening it. */ +static dev_t pidfile_dev; +static ino_t pidfile_ino; + /* --overwrite-pidfile: Create pidfile even if one already exists and is locked? */ static bool overwrite_pidfile; @@ -208,6 +212,15 @@ make_pidfile(void) close(fd); } else { /* Keep 'fd' open to retain the lock. */ + struct stat s; + + if (!fstat(fd, &s)) { + pidfile_dev = s.st_dev; + pidfile_ino = s.st_ino; + } else { + VLOG_ERR("%s: fstat failed: %s", + pidfile, strerror(errno)); + } } free(text); } else { @@ -494,9 +507,21 @@ read_pidfile(const char *pidfile) { char line[128]; struct flock lck; + struct stat s; FILE *file; int error; + if ((pidfile_ino || pidfile_dev) + && !stat(pidfile, &s) + && s.st_ino == pidfile_ino && s.st_dev == pidfile_dev) { + /* It's our own pidfile. We can't afford to open it, because closing + * *any* fd for a file that a process has locked also releases all the + * locks on that file. + * + * Fortunately, we know the associated pid anyhow: */ + return getpid(); + } + file = fopen(pidfile, "r"); if (!file) { error = errno; diff --git a/python/ovs/daemon.py b/python/ovs/daemon.py index a8373cfd0..eaaaa519b 100644 --- a/python/ovs/daemon.py +++ b/python/ovs/daemon.py @@ -35,6 +35,10 @@ _detach = False # --pidfile: Name of pidfile (null if none). _pidfile = None +# Our pidfile's inode and device, if we have created one. +_pidfile_dev = None +_pidfile_ino = None + # --overwrite-pidfile: Create pidfile even if one already exists and is locked? _overwrite_pidfile = False @@ -163,7 +167,7 @@ def _make_pidfile(): logging.error("%s: create failed: %s" % (tmpfile, os.strerror(e.errno))) return - + try: fcntl.lockf(file, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError, e: @@ -191,6 +195,10 @@ def _make_pidfile(): file.close() return + s = os.fstat(file.fileno()) + _pidfile_dev = s.st_dev + _pidfile_ino = s.st_ino + def daemonize(): """If configured with set_pidfile() or set_detach(), creates the pid file and detaches from the foreground session.""" @@ -368,6 +376,19 @@ Daemon options: def read_pidfile(pidfile): """Opens and reads a PID from 'pidfile'. Returns the nonnegative PID if successful, otherwise a negative errno value.""" + if _pidfile_dev is not None: + try: + s = os.stat(pidfile) + if s.st_ino == _pidfile_ino and s.st_dev == _pidfile_dev: + # It's our own pidfile. We can't afford to open it, + # because closing *any* fd for a file that a process + # has locked also releases all the locks on that file. + # + # Fortunately, we know the associated pid anyhow. + return os.getpid() + except OSError: + pass + try: file = open(pidfile, "r") except IOError, e: -- cgit v1.2.1 From 4f2226487d3522654876885d769510b835c5f5ee Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 23 Sep 2010 09:42:30 -0700 Subject: shash: New function shash_steal(). --- lib/dpif-linux.c | 3 +-- lib/netdev.c | 3 +-- lib/shash.c | 15 ++++++++++++++- lib/shash.h | 1 + 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index ec8a952b3..635fe9411 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -338,8 +338,7 @@ dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep) return ENOBUFS; } else if (!shash_is_empty(&dpif->changed_ports)) { struct shash_node *node = shash_first(&dpif->changed_ports); - *devnamep = xstrdup(node->name); - shash_delete(&dpif->changed_ports, node); + *devnamep = shash_steal(&dpif->changed_ports, node); return 0; } else { return EAGAIN; diff --git a/lib/netdev.c b/lib/netdev.c index 24c2a88fa..c1eb5d04f 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -1553,8 +1553,7 @@ netdev_monitor_poll(struct netdev_monitor *monitor, char **devnamep) *devnamep = NULL; return EAGAIN; } else { - *devnamep = xstrdup(node->name); - shash_delete(&monitor->changed_netdevs, node); + *devnamep = shash_steal(&monitor->changed_netdevs, node); return 0; } } diff --git a/lib/shash.c b/lib/shash.c index 8fd2eb18f..cc45efb5c 100644 --- a/lib/shash.c +++ b/lib/shash.c @@ -167,12 +167,25 @@ shash_replace(struct shash *sh, const char *name, const void *data) } } +/* Deletes 'node' from 'sh' and frees the node's name. The caller is still + * responsible for freeing the node's data, if necessary. */ void shash_delete(struct shash *sh, struct shash_node *node) { + free(shash_steal(sh, node)); +} + +/* Deletes 'node' from 'sh'. Neither the node's name nor its data is freed; + * instead, ownership is transferred to the caller. Returns the node's + * name. */ +char * +shash_steal(struct shash *sh, struct shash_node *node) +{ + char *name = node->name; + hmap_remove(&sh->map, &node->node); - free(node->name); free(node); + return name; } static struct shash_node * diff --git a/lib/shash.h b/lib/shash.h index eab0af45a..8a736e80b 100644 --- a/lib/shash.h +++ b/lib/shash.h @@ -57,6 +57,7 @@ bool shash_add_once(struct shash *, const char *, const void *); void shash_add_assert(struct shash *, const char *, const void *); void *shash_replace(struct shash *, const char *, const void *data); void shash_delete(struct shash *, struct shash_node *); +char *shash_steal(struct shash *, struct shash_node *); struct shash_node *shash_find(const struct shash *, const char *); void *shash_find_data(const struct shash *, const char *); void *shash_find_and_delete(struct shash *, const char *); -- cgit v1.2.1 From e4af561537cfea7d35d2075596b4474847876794 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 23 Sep 2010 09:43:46 -0700 Subject: ovsdb-data: New function ovsdb_datum_from_shash(). --- lib/ovsdb-data.c | 25 +++++++++++++++++++++++++ lib/ovsdb-data.h | 3 +++ 2 files changed, 28 insertions(+) diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c index 9c54fe81b..492da7fa5 100644 --- a/lib/ovsdb-data.c +++ b/lib/ovsdb-data.c @@ -1444,6 +1444,31 @@ ovsdb_datum_to_string(const struct ovsdb_datum *datum, } } +/* Initializes 'datum' as a string-to-string map whose contents are taken from + * 'sh'. Destroys 'sh'. */ +void +ovsdb_datum_from_shash(struct ovsdb_datum *datum, struct shash *sh) +{ + struct shash_node *node, *next; + size_t i; + + datum->n = shash_count(sh); + datum->keys = xmalloc(datum->n * sizeof *datum->keys); + datum->values = xmalloc(datum->n * sizeof *datum->values); + + i = 0; + SHASH_FOR_EACH_SAFE (node, next, sh) { + datum->keys[i].string = node->name; + datum->values[i].string = node->data; + shash_steal(sh, node); + i++; + } + assert(i == datum->n); + + shash_destroy(sh); + ovsdb_datum_sort_unique(datum, OVSDB_TYPE_STRING, OVSDB_TYPE_STRING); +} + static uint32_t hash_atoms(enum ovsdb_atomic_type type, const union ovsdb_atom *atoms, unsigned int n, uint32_t basis) diff --git a/lib/ovsdb-data.h b/lib/ovsdb-data.h index ae0faa26e..f7e98a84b 100644 --- a/lib/ovsdb-data.h +++ b/lib/ovsdb-data.h @@ -22,6 +22,7 @@ struct ds; struct ovsdb_symbol_table; +struct shash; /* One value of an atomic type (given by enum ovs_atomic_type). */ union ovsdb_atom { @@ -167,6 +168,8 @@ char *ovsdb_datum_from_string(struct ovsdb_datum *, void ovsdb_datum_to_string(const struct ovsdb_datum *, const struct ovsdb_type *, struct ds *); +void ovsdb_datum_from_shash(struct ovsdb_datum *, struct shash *); + /* Comparison. */ uint32_t ovsdb_datum_hash(const struct ovsdb_datum *, const struct ovsdb_type *, uint32_t basis); -- cgit v1.2.1 From c94238565522c51a117a7f89fecaabd2605b7c66 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 22 Sep 2010 16:34:05 -0700 Subject: ovsdb-doc: Implement new