summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Pfaff <blp@nicira.com>2010-10-06 11:37:03 -0700
committerBen Pfaff <blp@nicira.com>2010-10-06 11:37:03 -0700
commitd15a5ee594f46a855788652e687aa5d736d530fb (patch)
tree6cd499aa6fde6ec6db9d6543d7b99ce09cef0d15
parente9ec7451755a50449811738b5a0adad49a864fe3 (diff)
parent4bee421f3ab28492aebc32b8b13e41ca5d12a936 (diff)
downloadopenvswitch-d15a5ee594f46a855788652e687aa5d736d530fb.tar.gz
Merge "master" into "wdp".
-rw-r--r--AUTHORS5
-rw-r--r--COPYING3
-rw-r--r--ChangeLog11
-rw-r--r--INSTALL.Linux33
-rw-r--r--PORTING3
-rw-r--r--README2
-rw-r--r--REPORTING-BUGS25
-rw-r--r--acinclude.m42
-rw-r--r--configure.ac7
-rw-r--r--datapath/brcompat.c6
-rw-r--r--datapath/datapath.c75
-rw-r--r--datapath/datapath.h4
-rw-r--r--datapath/flow.c62
-rw-r--r--datapath/flow.h21
-rw-r--r--datapath/linux-2.6/Modules.mk2
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/kernel.h1
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/random.h17
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/types.h7
-rw-r--r--datapath/linux-2.6/compat-2.6/include/linux/workqueue.h65
-rw-r--r--datapath/linux-2.6/compat-2.6/random32.c144
-rw-r--r--datapath/tunnel.c989
-rw-r--r--datapath/tunnel.h153
-rw-r--r--datapath/vport-capwap.c69
-rw-r--r--datapath/vport-gre.c59
-rw-r--r--datapath/vport-internal_dev.c3
-rw-r--r--datapath/vport-patch.c5
-rw-r--r--datapath/vport.c3
-rw-r--r--datapath/vport.h3
-rw-r--r--debian/.gitignore2
-rw-r--r--debian/automake.mk18
-rw-r--r--debian/changelog15
-rw-r--r--debian/control30
-rw-r--r--debian/copyright67
-rw-r--r--debian/copyright.in146
-rw-r--r--debian/openvswitch-common.install2
-rwxr-xr-xdebian/openvswitch-controller.init6
-rw-r--r--debian/openvswitch-datapath-source.README.Debian4
-rw-r--r--debian/openvswitch-ipsec.dirs1
-rwxr-xr-xdebian/openvswitch-ipsec.init188
-rw-r--r--debian/openvswitch-ipsec.install1
-rwxr-xr-xdebian/openvswitch-switch.init20
-rwxr-xr-xdebian/openvswitch-switch.postinst11
-rwxr-xr-xdebian/ovs-bugtool1114
-rw-r--r--debian/ovs-bugtool.846
-rwxr-xr-xdebian/ovs-monitor-ipsec350
-rw-r--r--debian/python-openvswitch.dirs2
-rw-r--r--debian/python-openvswitch.install2
-rwxr-xr-xdebian/rules1
-rw-r--r--include/openflow/nicira-ext.h25
-rw-r--r--include/openvswitch/tunnel.h1
-rw-r--r--lib/classifier.c43
-rw-r--r--lib/daemon.c30
-rw-r--r--lib/dynamic-string.c2
-rw-r--r--lib/hmap.c17
-rw-r--r--lib/hmap.h50
-rw-r--r--lib/json.c1
-rw-r--r--lib/learning-switch.c177
-rw-r--r--lib/learning-switch.h34
-rw-r--r--lib/list.h24
-rw-r--r--lib/lockfile.c4
-rw-r--r--lib/mac-learning.c2
-rw-r--r--lib/netdev-linux.c178
-rw-r--r--lib/netdev-tunnel.c20
-rw-r--r--lib/netdev-vport.c3
-rw-r--r--lib/netdev.c23
-rw-r--r--lib/netlink.c10
-rw-r--r--lib/ofp-parse.c100
-rw-r--r--lib/ofp-parse.h3
-rw-r--r--lib/ofp-print.c128
-rw-r--r--lib/ofp-util.c18
-rw-r--r--lib/ofp-util.h2
-rw-r--r--lib/ofpbuf.c20
-rw-r--r--lib/ofpbuf.h3
-rw-r--r--lib/ovsdb-data.c25
-rw-r--r--lib/ovsdb-data.h3
-rw-r--r--lib/ovsdb-idl.c46
-rw-r--r--lib/poll-loop.c4
-rw-r--r--lib/process.c4
-rw-r--r--lib/queue.h5
-rw-r--r--lib/rtnetlink.c6
-rw-r--r--lib/shash.c17
-rw-r--r--lib/shash.h10
-rw-r--r--lib/stream-fd.c2
-rw-r--r--lib/stream-ssl.c6
-rw-r--r--lib/unixctl.c8
-rw-r--r--lib/util.h10
-rw-r--r--lib/vlog-modules.def1
-rw-r--r--lib/vlog.h15
-rw-r--r--lib/xfif-linux.c12
-rw-r--r--lib/xfif-netdev.c20
-rw-r--r--lib/xflow-util.c3
-rw-r--r--ofproto/netflow.c87
-rw-r--r--ofproto/ofproto-sflow.c72
-rw-r--r--ofproto/ofproto.c59
-rw-r--r--ofproto/pktbuf.c4
-rw-r--r--ofproto/status.c5
-rw-r--r--ofproto/wdp-xflow.c275
-rw-r--r--ovsdb/execution.c3
-rw-r--r--ovsdb/file.c2
-rw-r--r--ovsdb/jsonrpc-server.c26
-rwxr-xr-xovsdb/ovsdb-doc.in14
-rw-r--r--ovsdb/ovsdb-server.c4
-rw-r--r--ovsdb/ovsdb-tool.c1
-rw-r--r--ovsdb/query.c8
-rw-r--r--ovsdb/row.c14
-rw-r--r--ovsdb/table.c6
-rw-r--r--ovsdb/transaction.c20
-rw-r--r--ovsdb/trigger.c4
-rw-r--r--python/ovs/daemon.py30
-rw-r--r--python/ovs/db/types.py6
-rw-r--r--python/ovs/poller.py7
-rw-r--r--tests/automake.mk1
-rw-r--r--tests/daemon-py.at53
-rw-r--r--tests/interface-reconfigure.at6
-rw-r--r--tests/ovs-ofctl.at26
-rw-r--r--tests/ovs-vsctl.at55
-rw-r--r--tests/test-classifier.c2
-rw-r--r--tests/test-csum.c3
-rw-r--r--tests/test-daemon.py21
-rw-r--r--tests/test-hmap.c9
-rw-r--r--tests/test-list.c10
-rw-r--r--tests/test-ovsdb.c9
-rw-r--r--tests/testsuite.at1
-rw-r--r--utilities/ovs-controller.8.in26
-rw-r--r--utilities/ovs-controller.c121
-rw-r--r--utilities/ovs-ofctl.8.in32
-rw-r--r--utilities/ovs-ofctl.c111
-rw-r--r--utilities/ovs-openflowd.c4
-rw-r--r--utilities/ovs-vsctl.8.in109
-rw-r--r--utilities/ovs-vsctl.c271
-rw-r--r--vswitchd/automake.mk2
-rw-r--r--vswitchd/bridge.c139
-rw-r--r--vswitchd/proc-net-compat.c5
-rw-r--r--vswitchd/system-stats.c503
-rw-r--r--vswitchd/system-stats.h21
-rw-r--r--vswitchd/vswitch.ovsschema9
-rw-r--r--vswitchd/vswitch.xml350
-rw-r--r--xenserver/GPLv2339
-rw-r--r--xenserver/LICENSE5
-rw-r--r--xenserver/README9
-rw-r--r--xenserver/automake.mk3
-rwxr-xr-xxenserver/etc_init.d_openvswitch27
-rwxr-xr-xxenserver/etc_xapi.d_plugins_openvswitch-cfg-update6
-rwxr-xr-xxenserver/etc_xensource_scripts_vif6
-rw-r--r--xenserver/openvswitch-xen.spec6
-rw-r--r--xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py7
-rw-r--r--xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py23
-rwxr-xr-xxenserver/usr_sbin_brctl2
-rwxr-xr-xxenserver/usr_share_openvswitch_scripts_ovs-external-ids (renamed from xenserver/usr_share_openvswitch_scripts_monitor-external-ids)103
-rwxr-xr-xxenserver/usr_share_openvswitch_scripts_refresh-xs-network-uuids7
-rw-r--r--xenserver/uuid.py53
151 files changed, 6270 insertions, 1792 deletions
diff --git a/AUTHORS b/AUTHORS
index bf0e34271..3d9ddd336 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -6,6 +6,7 @@ Ben Pfaff blp@nicira.com
Bryan Phillippe bp@toroki.com
Dan Wendlandt dan@nicira.com
David Erickson derickso@stanford.edu
+Ethan Jackson ethan@nicira.com
Glen Gibb grg@stanford.edu
Ian Campbell Ian.Campbell@citrix.com
Jean Tourrilhes jt@hpl.hp.com
@@ -26,13 +27,17 @@ Thomas Lacroix thomas.lacroix@citrix.com
Todd Deshane deshantm@gmail.com
Tom Everman teverman@google.com
Tsvi Slonim tsvi@toroki.com
+Vivien Bernet-Rollande vbr@soprive.net
Wei Yongjun yjwei@cn.fujitsu.com
Yu Zhiguo yuzg@cn.fujitsu.com
The following additional people are mentioned in commit logs as having
provided helpful bug reports or suggestions.
+Alexey I. Froloff raorn@altlinux.org
+Brad Hall brad@nicira.com
Brandon Heller brandonh@stanford.edu
+Bryan Fulton bryan@nicira.com
Cedric Hobbs cedric@nicira.com
Ghanem Bahri bahri.ghanem@gmail.com
Henrik Amren henrik@nicira.com
diff --git a/COPYING b/COPYING
index 375efecae..8d1bc9d51 100644
--- a/COPYING
+++ b/COPYING
@@ -20,8 +20,7 @@ Files under the datapath directory are licensed under the GNU General
Public License, version 2.
Files under the xenserver directory are licensed on a file-by-file
-basis. Some files are under an uncertain license that may not be
-DFSG-compliant or GPL-compatible. Refer to each file for details.
+basis. Refer to each file for details.
Files lib/sflow*.[ch] are licensed under the terms of the InMon sFlow
licence that is available at:
diff --git a/ChangeLog b/ChangeLog
index 29fcd2d8c..c816ed7c9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+v1.1.0pre2 - 13 Sep 2010
+------------------------
+ - Bug fixes
+
+v1.1.0pre1 - 31 Aug 2010
+------------------------
+ - OpenFlow 1.0 slicing (QoS) functionality
+ - Python bindings for configuration database (no write support)
+ - Performance and scalability improvements
+ - Bug fixes
+
v1.0.1 - 31 May 2010
--------------------
- New "patch" interface type
diff --git a/INSTALL.Linux b/INSTALL.Linux
index 8ae7aaca7..d5a3b7d65 100644
--- a/INSTALL.Linux
+++ b/INSTALL.Linux
@@ -247,7 +247,8 @@ configuration in the database:
--remote=db:Open_vSwitch,managers \
--private-key=db:SSL,private_key \
--certificate=db:SSL,certificate \
- --bootstrap-ca-cert=db:SSL,ca_cert
+ --bootstrap-ca-cert=db:SSL,ca_cert \
+ --pidfile --detach
Then initialize the database using ovs-vsctl. This is only
necessary the first time after you create the database with
@@ -258,7 +259,8 @@ ovsdb-tool (but running it at any time is harmless):
Then start the main Open vSwitch daemon, telling it to connect to the
same Unix domain socket:
- % ovs-vswitchd unix:/usr/local/var/run/openvswitch/db.sock
+ % ovs-vswitchd unix:/usr/local/var/run/openvswitch/db.sock \
+ --pidfile --detach
Now you may use ovs-vsctl to set up bridges and other Open vSwitch
features. For example, to create a bridge named br0 and add ports
@@ -270,6 +272,33 @@ eth0 and vif1.0 to it:
Please refer to ovs-vsctl(8) for more details.
+Upgrading
+=========
+
+When you upgrade Open vSwitch from one version to another, you should
+also upgrade the database schema:
+
+1. Stop the Open vSwitch daemons, e.g.:
+
+ % ovs-kill ovsdb-server.pid ovs-vswitchd.pid
+
+2. Install the new Open vSwitch release.
+
+3. Upgrade the database, in one of the following two ways:
+
+ - If there is no important data in your database, then you may
+ delete the database file and recreate it with ovsdb-tool,
+ following the instructions under "Building and Installing Open
+ vSwitch for Linux".
+
+ - If you want to preserve the contents of your database, back it
+ up first, then use "ovsdb-tool convert" to upgrade it, e.g.:
+
+ % ovsdb-tool convert /usr/local/etc/ovs-vswitchd.conf.db vswitchd/vswitch.ovsschema
+
+4. Start the Open vSwitch daemons as described under "Building and
+ Installing Open vSwitch for Linux" above.
+
Bug Reporting
-------------
diff --git a/PORTING b/PORTING
index 5f88c940a..922d6c85b 100644
--- a/PORTING
+++ b/PORTING
@@ -209,6 +209,9 @@ lib/entropy.c assumes that it can obtain high-quality random number
seeds at startup by reading from /dev/urandom. You will need to
modify it if this is not true on your platform.
+vswitchd/system-stats.c only knows how to obtain some statistics on
+Linux. Optionally you may implement them for your platform as well.
+
Questions
---------
diff --git a/README b/README
index 881a11343..55e9d9c9b 100644
--- a/README
+++ b/README
@@ -29,7 +29,7 @@ vSwitch supports the following features:
* Support for OpenFlow
* Compatibility layer for the Linux bridging code
-The included Linux kernel module supports Linux 2.6.15 and up, with
+The included Linux kernel module supports Linux 2.6.18 and up, with
testing focused on 2.6.18 with Centos and Xen patches and version
2.6.26 from kernel.org. Open vSwitch also has special support for
Citrix XenServer hosts.
diff --git a/REPORTING-BUGS b/REPORTING-BUGS
index 75da3d6eb..812bfba0f 100644
--- a/REPORTING-BUGS
+++ b/REPORTING-BUGS
@@ -5,8 +5,17 @@ We are eager to hear from users about problems that they have
encountered with Open vSwitch. This file documents how best to report
bugs so as to ensure that they can be fixed as quickly as possible.
-Please report bugs by sending email to bugs@openvswitch.org. Include
-as much of the following information as you can in your report:
+Please report bugs by sending email to bugs@openvswitch.org.
+
+The most important parts of your bug report are the following:
+
+ * What you did that make the problem appear.
+
+ * What you expected to happen.
+
+ * What actually happened.
+
+Please also include the following information:
* The Open vSwitch version number (as output by "ovs-vswitchd
--version").
@@ -16,6 +25,8 @@ as much of the following information as you can in your report:
* Any local patches or changes you have applied (if any).
+The following are also handy sometimes:
+
* The kernel version on which Open vSwitch is running (from
/proc/version) and the distribution and version number of
your OS (e.g. "Centos 5.0").
@@ -28,15 +39,7 @@ as much of the following information as you can in your report:
* If you have Open vSwitch configured to connect to an
OpenFlow controller, the output of "ovs-ofctl show <bridge>"
for each <bridge> configured in the vswitchd configuration
- file.
-
- * A description of the problem, which should include:
-
- - What you did that make the problem appear.
-
- - What you expected to happen.
-
- - What actually happened.
+ database.
* A fix or workaround, if you have one.
diff --git a/acinclude.m4 b/acinclude.m4
index 80794dac3..f1322fa0e 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -188,6 +188,8 @@ AC_DEFUN([OVS_CHECK_LINUX26_COMPAT], [
OVS_GREP_IFELSE([$KSRC26/include/linux/types.h], [bool],
[OVS_DEFINE([HAVE_BOOL_TYPE])])
+ OVS_GREP_IFELSE([$KSRC26/include/linux/types.h], [__wsum],
+ [OVS_DEFINE([HAVE_CSUM_TYPES])])
OVS_GREP_IFELSE([$KSRC26/include/net/checksum.h], [csum_unfold],
[OVS_DEFINE([HAVE_CSUM_UNFOLD])])
diff --git a/configure.ac b/configure.ac
index 3088fa5d8..8a5dc5ce0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@
# limitations under the License.
AC_PREREQ(2.64)
-AC_INIT(openvswitch, 1.0.1, ovs-bugs@openvswitch.org)
+AC_INIT(openvswitch, 1.1.0pre2, ovs-bugs@openvswitch.org)
NX_BUILDNR
AC_CONFIG_SRCDIR([datapath/datapath.c])
AC_CONFIG_MACRO_DIR([m4])
@@ -55,7 +55,8 @@ OVS_CHECK_IF_PACKET
OVS_CHECK_STRTOK_R
AC_CHECK_MEMBERS([struct stat.st_mtim.tv_nsec, struct stat.st_mtimensec],
[], [], [[#include <sys/stat.h>]])
-AC_CHECK_FUNCS([mlockall])
+AC_CHECK_FUNCS([mlockall strsignal getloadavg statvfs setmntent])
+AC_CHECK_HEADERS([mntent.h sys/statvfs.h])
OVS_CHECK_PKIDIR
OVS_CHECK_RUNDIR
@@ -64,8 +65,6 @@ OVS_CHECK_VALGRIND
OVS_CHECK_SOCKET_LIBS
OVS_CHECK_LINKER_SECTIONS
-AC_CHECK_FUNCS([strsignal])
-
OVS_ENABLE_OPTION([-Wall])
OVS_ENABLE_OPTION([-Wno-sign-compare])
OVS_ENABLE_OPTION([-Wpointer-arith])
diff --git a/datapath/brcompat.c b/datapath/brcompat.c
index 3e8401154..2113eae0f 100644
--- a/datapath/brcompat.c
+++ b/datapath/brcompat.c
@@ -84,6 +84,9 @@ static int brc_add_del_bridge(char __user *uname, int add)
struct sk_buff *request;
char name[IFNAMSIZ];
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (copy_from_user(name, uname, IFNAMSIZ))
return -EFAULT;
@@ -196,6 +199,9 @@ static int brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
struct net_device *port;
int err;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
port = __dev_get_by_index(&init_net, port_ifindex);
if (!port)
return -EINVAL;
diff --git a/datapath/datapath.c b/datapath/datapath.c
index 131dcafcf..abf751677 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -29,7 +29,6 @@
#include <linux/udp.h>
#include <linux/version.h>
#include <linux/ethtool.h>
-#include <linux/random.h>
#include <linux/wait.h>
#include <asm/system.h>
#include <asm/div64.h>
@@ -40,7 +39,6 @@
#include <linux/inetdevice.h>
#include <linux/list.h>
#include <linux/rculist.h>
-#include <linux/workqueue.h>
#include <linux/dmi.h>
#include <net/inet_ecn.h>
#include <linux/compat.h>
@@ -544,40 +542,45 @@ void dp_process_received_packet(struct dp_port *p, struct sk_buff *skb)
struct datapath *dp = p->dp;
struct dp_stats_percpu *stats;
int stats_counter_off;
- struct xflow_key key;
- struct tbl_node *flow_node;
- struct sw_flow *flow;
struct sw_flow_actions *acts;
struct loop_counter *loop;
int error;
OVS_CB(skb)->dp_port = p;
- /* Extract flow from 'skb' into 'key'. */
- error = flow_extract(skb, p ? p->port_no : XFLOWP_NONE, &key);
- if (unlikely(error)) {
- kfree_skb(skb);
- return;
- }
+ if (!OVS_CB(skb)->flow) {
+ struct xflow_key key;
+ struct tbl_node *flow_node;
+ bool is_frag;
- if (OVS_CB(skb)->is_frag && dp->drop_frags) {
- kfree_skb(skb);
- stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
- goto out;
- }
+ /* Extract flow from 'skb' into 'key'. */
+ error = flow_extract(skb, p ? p->port_no : XFLOWP_NONE, &key, &is_frag);
+ if (unlikely(error)) {
+ kfree_skb(skb);
+ return;
+ }
- /* Look up flow. */
- flow_node = tbl_lookup(rcu_dereference(dp->table), &key, flow_hash(&key), flow_cmp);
- if (unlikely(!flow_node)) {
- dp_output_control(dp, skb, _XFLOWL_MISS_NR, OVS_CB(skb)->tun_id);
- stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
- goto out;
+ if (is_frag && dp->drop_frags) {
+ kfree_skb(skb);
+ stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
+ goto out;
+ }
+
+ /* Look up flow. */
+ flow_node = tbl_lookup(rcu_dereference(dp->table), &key,
+ flow_hash(&key), flow_cmp);
+ if (unlikely(!flow_node)) {
+ dp_output_control(dp, skb, _XFLOWL_MISS_NR, OVS_CB(skb)->tun_id);
+ stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
+ goto out;
+ }
+
+ OVS_CB(skb)->flow = flow_cast(flow_node);
}
- flow = flow_cast(flow_node);
- flow_used(flow, skb);
+ flow_used(OVS_CB(skb)->flow, skb);
- acts = rcu_dereference(flow->sf_acts);
+ acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
/* Check whether we've looped too much. */
loop = &get_cpu_var(dp_loop_counters).counters[!!in_interrupt()];
@@ -589,7 +592,8 @@ void dp_process_received_packet(struct dp_port *p, struct sk_buff *skb)
}
/* Execute actions. */
- execute_actions(dp, skb, &key, acts->actions, acts->n_actions, GFP_ATOMIC);
+ execute_actions(dp, skb, &OVS_CB(skb)->flow->key, acts->actions,
+ acts->n_actions, GFP_ATOMIC);
stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
/* Check whether sub-actions looped too much. */
@@ -1049,12 +1053,12 @@ static int do_put_flow(struct datapath *dp, struct xflow_flow_put *uf,
}
/* Allocate flow. */
- error = -ENOMEM;
- flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
- if (flow == NULL)
+ flow = flow_alloc();
+ if (IS_ERR(flow)) {
+ error = PTR_ERR(flow);
goto error;
+ }
flow->key = uf->flow.key;
- spin_lock_init(&flow->lock);
clear_stats(flow);
/* Obtain actions. */
@@ -1109,7 +1113,8 @@ static int do_put_flow(struct datapath *dp, struct xflow_flow_put *uf,
error_free_flow_acts:
kfree(flow->sf_acts);
error_free_flow:
- kmem_cache_free(flow_cache, flow);
+ flow->sf_acts = NULL;
+ flow_put(flow);
error:
return error;
}
@@ -1317,16 +1322,18 @@ static int do_execute(struct datapath *dp, const struct xflow_execute *execute)
struct sk_buff *skb;
struct sw_flow_actions *actions;
struct ethhdr *eth;
+ bool is_frag;
int err;
err = -EINVAL;
if (execute->length < ETH_HLEN || execute->length > 65535)
goto error;
- err = -ENOMEM;
actions = flow_actions_alloc(execute->n_actions);
- if (!actions)
+ if (IS_ERR(actions)) {
+ err = PTR_ERR(actions);
goto error;
+ }
err = -EFAULT;
if (copy_from_user(actions->actions, execute->actions,
@@ -1363,7 +1370,7 @@ static int do_execute(struct datapath *dp, const struct xflow_execute *execute)
else
skb->protocol = htons(ETH_P_802_2);
- err = flow_extract(skb, execute->in_port, &key);
+ err = flow_extract(skb, execute->in_port, &key, &is_frag);
if (err)
goto error_free_skb;
diff --git a/datapath/datapath.h b/datapath/datapath.h
index 1488bec6f..e57c1831a 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -146,17 +146,17 @@ enum csum_type {
/**
* struct ovs_skb_cb - OVS data in skb CB
* @dp_port: The datapath port on which the skb entered the switch.
+ * @flow: The flow associated with this packet. May be %NULL if no flow.
* @ip_summed: Consistently stores L4 checksumming status across different
* kernel versions.
* @tun_id: ID (in network byte order) of the tunnel that encapsulated this
* packet. It is 0 if the packet was not received on a tunnel.
- * @is_frag: %true if this packet is an IPv4 fragment, %false otherwise.
*/
struct ovs_skb_cb {
struct dp_port *dp_port;
+ struct sw_flow *flow;
enum csum_type ip_summed;
__be32 tun_id;
- bool is_frag;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
diff --git a/datapath/flow.c b/datapath/flow.c
index 48bebca52..de2d3f3d6 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -108,7 +108,10 @@ struct sw_flow_actions *flow_actions_alloc(size_t n_actions)
{
struct sw_flow_actions *sfa;
- if (n_actions > (PAGE_SIZE - sizeof *sfa) / sizeof(union xflow_action))
+ /* At least DP_MAX_PORTS actions are required to be able to flood a
+ * packet to every port. Factor of 2 allows for setting VLAN tags,
+ * etc. */
+ if (n_actions > 2 * DP_MAX_PORTS)
return ERR_PTR(-EINVAL);
sfa = kmalloc(sizeof *sfa + n_actions * sizeof(union xflow_action),
@@ -120,27 +123,36 @@ struct sw_flow_actions *flow_actions_alloc(size_t n_actions)
return sfa;
}
-
-/* Frees 'flow' immediately. */
-static void flow_free(struct sw_flow *flow)
+struct sw_flow *flow_alloc(void)
{
- if (unlikely(!flow))
- return;
- kfree(flow->sf_acts);
- kmem_cache_free(flow_cache, flow);
+ struct sw_flow *flow;
+
+ flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
+ if (!flow)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_init(&flow->lock);
+ atomic_set(&flow->refcnt, 1);
+ flow->dead = false;
+
+ return flow;
}
void flow_free_tbl(struct tbl_node *node)
{
struct sw_flow *flow = flow_cast(node);
- flow_free(flow);
+
+ flow->dead = true;
+ flow_put(flow);
}
/* RCU callback used by flow_deferred_free. */
static void rcu_free_flow_callback(struct rcu_head *rcu)
{
struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
- flow_free(flow);
+
+ flow->dead = true;
+ flow_put(flow);
}
/* Schedules 'flow' to be freed after the next RCU grace period.
@@ -150,6 +162,22 @@ void flow_deferred_free(struct sw_flow *flow)
call_rcu(&flow->rcu, rcu_free_flow_callback);
}
+void flow_hold(struct sw_flow *flow)
+{
+ atomic_inc(&flow->refcnt);
+}
+
+void flow_put(struct sw_flow *flow)
+{
+ if (unlikely(!flow))
+ return;
+
+ if (atomic_dec_and_test(&flow->refcnt)) {
+ kfree(flow->sf_acts);
+ kmem_cache_free(flow_cache, flow);
+ }
+}
+
/* RCU callback used by flow_deferred_free_acts. */
static void rcu_free_acts_callback(struct rcu_head *rcu)
{
@@ -238,14 +266,15 @@ static __be16 parse_ethertype(struct sk_buff *skb)
* Sets OVS_CB(skb)->is_frag to %true if @skb is an IPv4 fragment, otherwise to
* %false.
*/
-int flow_extract(struct sk_buff *skb, u16 in_port, struct xflow_key *key)
+int flow_extract(struct sk_buff *skb, u16 in_port, struct xflow_key *key,
+ bool *is_frag)
{
struct ethhdr *eth;
memset(key, 0, sizeof *key);
key->tun_id = OVS_CB(skb)->tun_id;
key->in_port = in_port;
- OVS_CB(skb)->is_frag = false;
+ *is_frag = false;
/*
* We would really like to pull as many bytes as we could possibly
@@ -326,9 +355,9 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct xflow_key *key)
key->tp_dst = htons(icmp->code);
}
}
- } else {
- OVS_CB(skb)->is_frag = true;
- }
+ } else
+ *is_frag = true;
+
} else if (key->dl_type == htons(ETH_P_ARP) && arphdr_ok(skb)) {
struct arp_eth_header *arp;
@@ -340,9 +369,8 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct xflow_key *key)
&& arp->ar_pln == 4) {
/* We only match on the lower 8 bits of the opcode. */
- if (ntohs(arp->ar_op) <= 0xff) {
+ if (ntohs(arp->ar_op) <= 0xff)
key->nw_proto = ntohs(arp->ar_op);
- }
if (key->nw_proto == ARPOP_REQUEST
|| key->nw_proto == ARPOP_REPLY) {
diff --git a/datapath/flow.h b/datapath/flow.h
index 528e15aa3..fab57c758 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -36,6 +36,9 @@ struct sw_flow {
struct xflow_key key;
struct sw_flow_actions *sf_acts;
+ atomic_t refcnt;
+ bool dead;
+
spinlock_t lock; /* Lock for values below. */
unsigned long used; /* Last used time (in jiffies). */
u64 packet_count; /* Number of packets matched. */
@@ -58,20 +61,24 @@ struct arp_eth_header
unsigned char ar_tip[4]; /* target IP address */
} __attribute__((packed));
-extern struct kmem_cache *flow_cache;
+int flow_init(void);
+void flow_exit(void);
-struct sw_flow_actions *flow_actions_alloc(size_t n_actions);
+struct sw_flow *flow_alloc(void);
void flow_deferred_free(struct sw_flow *);
+void flow_free_tbl(struct tbl_node *);
+
+struct sw_flow_actions *flow_actions_alloc(size_t n_actions);
void flow_deferred_free_acts(struct sw_flow_actions *);
-int flow_extract(struct sk_buff *, u16 in_port, struct xflow_key *);
+
+void flow_hold(struct sw_flow *);
+void flow_put(struct sw_flow *);
+
+int flow_extract(struct sk_buff *, u16 in_port, struct xflow_key *, bool *is_frag);
void flow_used(struct sw_flow *, struct sk_buff *);
u32 flow_hash(const struct xflow_key *key);
int flow_cmp(const struct tbl_node *, void *target);
-void flow_free_tbl(struct tbl_node *);
-
-int flow_init(void);
-void flow_exit(void);
static inline struct sw_flow *flow_cast(const struct tbl_node *node)
{
diff --git a/datapath/linux-2.6/Modules.mk b/datapath/linux-2.6/Modules.mk
index 7f4cae6ea..5a0e9ad47 100644
--- a/datapath/linux-2.6/Modules.mk
+++ b/datapath/linux-2.6/Modules.mk
@@ -4,7 +4,6 @@ openvswitch_sources += \
linux-2.6/compat-2.6/genetlink-openvswitch.c \
linux-2.6/compat-2.6/ip_output-openvswitch.c \
linux-2.6/compat-2.6/kmemdup.c \
- linux-2.6/compat-2.6/random32.c \
linux-2.6/compat-2.6/skbuff-openvswitch.c \
linux-2.6/compat-2.6/time.c
openvswitch_headers += \
@@ -32,7 +31,6 @@ openvswitch_headers += \
linux-2.6/compat-2.6/include/linux/netfilter_bridge.h \
linux-2.6/compat-2.6/include/linux/netfilter_ipv4.h \
linux-2.6/compat-2.6/include/linux/netlink.h \
- linux-2.6/compat-2.6/include/linux/random.h \
linux-2.6/compat-2.6/include/linux/rculist.h \
linux-2.6/compat-2.6/include/linux/rtnetlink.h \
linux-2.6/compat-2.6/include/linux/skbuff.h \
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/kernel.h b/datapath/linux-2.6/compat-2.6/include/linux/kernel.h
index 1f65c099a..13361f78d 100644
--- a/datapath/linux-2.6/compat-2.6/include/linux/kernel.h
+++ b/datapath/linux-2.6/compat-2.6/include/linux/kernel.h
@@ -6,6 +6,7 @@
#include <linux/log2.h>
#endif
+#include <linux/version.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
#undef pr_emerg
#define pr_emerg(fmt, ...) \
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/random.h b/datapath/linux-2.6/compat-2.6/include/linux/random.h
deleted file mode 100644
index 4e4932c9c..000000000
--- a/datapath/linux-2.6/compat-2.6/include/linux/random.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef __LINUX_RANDOM_WRAPPER_H
-#define __LINUX_RANDOM_WRAPPER_H 1
-
-#include_next <linux/random.h>
-
-#include <linux/version.h>
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
-
-#ifdef __KERNEL__
-u32 random32(void);
-void srandom32(u32 seed);
-#endif /* __KERNEL__ */
-
-#endif /* linux kernel < 2.6.19 */
-
-
-#endif
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/types.h b/datapath/linux-2.6/compat-2.6/include/linux/types.h
index d88baf71c..b989d96c3 100644
--- a/datapath/linux-2.6/compat-2.6/include/linux/types.h
+++ b/datapath/linux-2.6/compat-2.6/include/linux/types.h
@@ -3,13 +3,10 @@
#include_next <linux/types.h>
-#include <linux/version.h>
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
-
+#ifndef HAVE_CSUM_TYPES
typedef __u16 __bitwise __sum16;
typedef __u32 __bitwise __wsum;
-
-#endif /* linux kernel < 2.6.20 */
+#endif
#ifndef HAVE_BOOL_TYPE
typedef _Bool bool;
diff --git a/datapath/linux-2.6/compat-2.6/include/linux/workqueue.h b/datapath/linux-2.6/compat-2.6/include/linux/workqueue.h
index 1ac3b6ecb..01c6345e9 100644
--- a/datapath/linux-2.6/compat-2.6/include/linux/workqueue.h
+++ b/datapath/linux-2.6/compat-2.6/include/linux/workqueue.h
@@ -4,39 +4,38 @@
#include_next <linux/workqueue.h>
#include <linux/version.h>
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
-
-#ifdef __KERNEL__
-/*
- * initialize a work-struct's func and data pointers:
- */
-#undef PREPARE_WORK
-#define PREPARE_WORK(_work, _func) \
- do { \
- (_work)->func = (void(*)(void*)) _func; \
- (_work)->data = _work; \
- } while (0)
-
-/*
- * initialize all of a work-struct:
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+
+/* Older kernels have an implementation of work queues with some very bad
+ * characteristics when trying to cancel work (potential deadlocks, use after
+ * free, etc. Here we directly use timers instead for delayed work. It's not
+ * optimal but it is better than the alternative. Note that work queues
+ * normally run in process context but this will cause them to operate in
+ * softirq context.
*/
-#undef INIT_WORK
-#define INIT_WORK(_work, _func) \
- do { \
- INIT_LIST_HEAD(&(_work)->entry); \
- (_work)->pending = 0; \
- PREPARE_WORK((_work), (_func)); \
- init_timer(&(_work)->timer); \
- } while (0)
-
-#endif /* __KERNEL__ */
-
-#endif /* linux kernel < 2.6.20 */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
-/* There is no equivalent to cancel_work_sync() so just flush all
- * pending work. */
-#define cancel_work_sync(_work) flush_scheduled_work()
-#endif
+
+#include <linux/timer.h>
+
+#undef DECLARE_DELAYED_WORK
+#define DECLARE_DELAYED_WORK(n, f) \
+ struct timer_list n = TIMER_INITIALIZER((void (*)(unsigned long))f, 0, 0)
+
+#define schedule_delayed_work rpl_schedule_delayed_work
+static inline int schedule_delayed_work(struct timer_list *timer, unsigned long delay)
+{
+ if (timer_pending(timer))
+ return 0;
+
+ mod_timer(timer, jiffies + delay);
+ return 1;
+}
+
+#define cancel_delayed_work_sync rpl_cancel_delayed_work_sync
+static inline int cancel_delayed_work_sync(struct timer_list *timer)
+{
+ return del_timer_sync(timer);
+}
+
+#endif /* kernel version < 2.6.23 */
#endif
diff --git a/datapath/linux-2.6/compat-2.6/random32.c b/datapath/linux-2.6/compat-2.6/random32.c
deleted file mode 100644
index b0dd2a32b..000000000
--- a/datapath/linux-2.6/compat-2.6/random32.c
+++ /dev/null
@@ -1,144 +0,0 @@
-#include <linux/version.h>
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
-
-/*
- This is a maximally equidistributed combined Tausworthe generator
- based on code from GNU Scientific Library 1.5 (30 Jun 2004)
-
- x_n = (s1_n ^ s2_n ^ s3_n)
-
- s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19))
- s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25))
- s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11))
-
- The period of this generator is about 2^88.
-
- From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe
- Generators", Mathematics of Computation, 65, 213 (1996), 203--213.
-
- This is available on the net from L'Ecuyer's home page,
-
- http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
- ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps
-
- There is an erratum in the paper "Tables of Maximally
- Equidistributed Combined LFSR Generators", Mathematics of
- Computation, 68, 225 (1999), 261--269:
- http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps
-
- ... the k_j most significant bits of z_j must be non-
- zero, for each j. (Note: this restriction also applies to the
- computer code given in [4], but was mistakenly not mentioned in
- that paper.)
-
- This affects the seeding procedure by imposing the requirement
- s1 > 1, s2 > 7, s3 > 15.
-
-*/
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/jiffies.h>
-#include <linux/random.h>
-#include <linux/smp.h>
-
-#include "compat26.h"
-
-struct rnd_state {
- u32 s1, s2, s3;
-};
-
-static struct rnd_state net_rand_state[NR_CPUS];
-
-static u32 __random32(struct rnd_state *state)
-{
-#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
-
- state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12);
- state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4);
- state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17);
-
- return (state->s1 ^ state->s2 ^ state->s3);
-}
-
-static void __set_random32(struct rnd_state *state, unsigned long s)
-{
- if (s == 0)
- s = 1; /* default seed is 1 */
-
-#define LCG(n) (69069 * n)
- state->s1 = LCG(s);
- state->s2 = LCG(state->s1);
- state->s3 = LCG(state->s2);
-
- /* "warm it up" */
- __random32(state);
- __random32(state);
- __random32(state);
- __random32(state);
- __random32(state);
- __random32(state);
-}
-
-/**
- * random32 - pseudo random number generator
- *
- * A 32 bit pseudo-random number is generated using a fast
- * algorithm suitable for simulation. This algorithm is NOT
- * considered safe for cryptographic use.
- */
-u32 random32(void)
-{
- return __random32(&net_rand_state[smp_processor_id()]);
-}
-
-/**
- * srandom32 - add entropy to pseudo random number generator
- * @seed: seed value
- *
- * Add some additional seeding to the random32() pool.
- * Note: this pool is per cpu so it only affects current CPU.
- */
-void srandom32(u32 entropy)
-{
- struct rnd_state *state = &net_rand_state[smp_processor_id()];
- __set_random32(state, state->s1 ^ entropy);
-}
-
-static int __init random32_reseed(void);
-
-/*
- * Generate some initially weak seeding values to allow
- * to start the random32() engine.
- */
-int __init random32_init(void)
-{
- int i;
-
- for (i = 0; i < NR_CPUS; i++) {
- struct rnd_state *state = &net_rand_state[i];
- __set_random32(state, i + jiffies);
- }
- random32_reseed();
- return 0;
-}
-
-/*
- * Generate better values after random number generator
- * is fully initalized.
- */
-static int __init random32_reseed(void)
-{
- int i;
- unsigned long seed;
-
- for (i = 0; i < NR_CPUS; i++) {
- struct rnd_state *state = &net_rand_state[i];
-
- get_random_bytes(&seed, sizeof(seed));
- __set_random32(state, seed);
- }
- return 0;
-}
-
-#endif /* kernel < 2.6.19 */
diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index 6fa369be0..c752fe8af 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -15,6 +15,7 @@
#include <linux/jhash.h>
#include <linux/kernel.h>
#include <linux/version.h>
+#include <linux/workqueue.h>
#include <net/dsfield.h>
#include <net/dst.h>
@@ -33,10 +34,45 @@
#include "tunnel.h"
#include "vport.h"
#include "vport-generic.h"
+#include "vport-internal_dev.h"
+
+#ifdef NEED_CACHE_TIMEOUT
+/*
+ * On kernels where we can't quickly detect changes in the rest of the system
+ * we use an expiration time to invalidate the cache. A shorter expiration
+ * reduces the length of time that we may potentially blackhole packets while
+ * a longer time increases performance by reducing the frequency that the
+ * cache needs to be rebuilt. A variety of factors may cause the cache to be
+ * invalidated before the expiration time but this is the maximum. The time
+ * is expressed in jiffies.
+ */
+#define MAX_CACHE_EXP HZ
+#endif
+
+/*
+ * Interval to check for and remove caches that are no longer valid. Caches
+ * are checked for validity before they are used for packet encapsulation and
+ * old caches are removed at that time. However, if no packets are sent through
+ * the tunnel then the cache will never be destroyed. Since it holds
+ * references to a number of system objects, the cache will continue to use
+ * system resources by not allowing those objects to be destroyed. The cache
+ * cleaner is periodically run to free invalid caches. It does not
+ * significantly affect system performance. A lower interval will release
+ * resources faster but will itself consume resources by requiring more frequent
+ * checks. A longer interval may result in messages being printed to the kernel
+ * message buffer about unreleased resources. The interval is expressed in
+ * jiffies.
+ */
+#define CACHE_CLEANER_INTERVAL (5 * HZ)
+
+#define CACHE_DATA_ALIGN 16
/* Protected by RCU. */
static struct tbl *port_table;
+static void cache_cleaner(struct work_struct *work);
+DECLARE_DELAYED_WORK(cache_cleaner_wq, cache_cleaner);
+
/*
* These are just used as an optimization: they don't require any kind of
* synchronization because we could have just as easily read the value before
@@ -63,22 +99,54 @@ static inline struct tnl_vport *tnl_vport_table_cast(const struct tbl_node *node
return container_of(node, struct tnl_vport, tbl_node);
}
-/* RCU callback. */
-static void free_config(struct rcu_head *rcu)
+static inline void schedule_cache_cleaner(void)
+{
+ schedule_delayed_work(&cache_cleaner_wq, CACHE_CLEANER_INTERVAL);
+}
+
+static void free_cache(struct tnl_cache *cache)
+{
+ if (!cache)
+ return;
+
+ flow_put(cache->flow);
+ ip_rt_put(cache->rt);
+ kfree(cache);
+}
+
+static void free_config_rcu(struct rcu_head *rcu)
{
struct tnl_mutable_config *c = container_of(rcu, struct tnl_mutable_config, rcu);
kfree(c);
}
+static void free_cache_rcu(struct rcu_head *rcu)
+{
+ struct tnl_cache *c = container_of(rcu, struct tnl_cache, rcu);
+ free_cache(c);
+}
+
static void assign_config_rcu(struct vport *vport,
struct tnl_mutable_config *new_config)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
struct tnl_mutable_config *old_config;
- old_config = rcu_dereference(tnl_vport->mutable);
+ old_config = tnl_vport->mutable;
rcu_assign_pointer(tnl_vport->mutable, new_config);
- call_rcu(&old_config->rcu, free_config);
+ call_rcu(&old_config->rcu, free_config_rcu);
+}
+
+static void assign_cache_rcu(struct vport *vport, struct tnl_cache *new_cache)
+{
+ struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+ struct tnl_cache *old_cache;
+
+ old_cache = tnl_vport->cache;
+ rcu_assign_pointer(tnl_vport->cache, new_cache);
+
+ if (old_cache)
+ call_rcu(&old_cache->rcu, free_cache_rcu);
}
static unsigned int *find_port_pool(const struct tnl_mutable_config *mutable)
@@ -130,10 +198,32 @@ static u32 port_hash(struct port_lookup_key *lookup)
return jhash2(lookup->vals, ARRAY_SIZE(lookup->vals), 0);
}
+static u32 mutable_hash(const struct tnl_mutable_config *mutable)
+{
+ struct port_lookup_key lookup;
+
+ lookup.vals[LOOKUP_SADDR] = mutable->port_config.saddr;
+ lookup.vals[LOOKUP_DADDR] = mutable->port_config.daddr;
+ lookup.vals[LOOKUP_KEY] = mutable->port_config.in_key;
+ lookup.vals[LOOKUP_TUNNEL_TYPE] = mutable->tunnel_type;
+
+ return port_hash(&lookup);
+}
+
+static void check_table_empty(void)
+{
+ if (tbl_count(port_table) == 0) {
+ struct tbl *old_table = port_table;
+
+ cancel_delayed_work_sync(&cache_cleaner_wq);
+ rcu_assign_pointer(port_table, NULL);
+ tbl_deferred_destroy(old_table, NULL);
+ }
+}
+
static int add_port(struct vport *vport)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
- struct port_lookup_key lookup;
int err;
if (!port_table) {
@@ -144,6 +234,7 @@ static int add_port(struct vport *vport)
return -ENOMEM;
rcu_assign_pointer(port_table, new_table);
+ schedule_cache_cleaner();
} else if (tbl_count(port_table) > tbl_n_buckets(port_table)) {
struct tbl *old_table = port_table;
@@ -157,16 +248,44 @@ static int add_port(struct vport *vport)
tbl_deferred_destroy(old_table, NULL);
}
- lookup.vals[LOOKUP_SADDR] = tnl_vport->mutable->port_config.saddr;
- lookup.vals[LOOKUP_DADDR] = tnl_vport->mutable->port_config.daddr;
- lookup.vals[LOOKUP_KEY] = tnl_vport->mutable->port_config.in_key;
- lookup.vals[LOOKUP_TUNNEL_TYPE] = tnl_vport->mutable->tunnel_type;
+ err = tbl_insert(port_table, &tnl_vport->tbl_node, mutable_hash(tnl_vport->mutable));
+ if (err) {
+ check_table_empty();
+ return err;
+ }
+
+ (*find_port_pool(tnl_vport->mutable))++;
+
+ return 0;
+}
+
+static int move_port(struct vport *vport, struct tnl_mutable_config *new_mutable)
+{
+ int err;
+ struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+ u32 hash;
+
+ hash = mutable_hash(new_mutable);
+ if (hash == tnl_vport->tbl_node.hash)
+ goto table_updated;
- err = tbl_insert(port_table, &tnl_vport->tbl_node, port_hash(&lookup));
+ /*
+ * Ideally we should make this move atomic to avoid having gaps in
+ * finding tunnels or the possibility of failure. However, if we do
+ * find a tunnel it will always be consistent.
+ */
+ err = tbl_remove(port_table, &tnl_vport->tbl_node);
if (err)
return err;
- (*find_port_pool(tnl_vport->mutable))++;
+ err = tbl_insert(port_table, &tnl_vport->tbl_node, hash);
+ if (err) {
+ check_table_empty();
+ return err;
+ }
+
+table_updated:
+ assign_config_rcu(vport, new_mutable);
return 0;
}
@@ -180,6 +299,7 @@ static int del_port(struct vport *vport)
if (err)
return err;
+ check_table_empty();
(*find_port_pool(tnl_vport->mutable))--;
return 0;
@@ -193,7 +313,7 @@ struct vport *tnl_find_port(__be32 saddr, __be32 daddr, __be32 key,
struct tbl *table = rcu_dereference(port_table);
struct tbl_node *tbl_node;
- if (!table)
+ if (unlikely(!table))
return NULL;
lookup.vals[LOOKUP_SADDR] = saddr;
@@ -246,6 +366,60 @@ found:
return tnl_vport_to_vport(tnl_vport_table_cast(tbl_node));
}
+static inline void ecn_decapsulate(struct sk_buff *skb)
+{
+ u8 tos = ip_hdr(skb)->tos;
+
+ if (INET_ECN_is_ce(tos)) {
+ __be16 protocol = skb->protocol;
+ unsigned int nw_header = skb_network_offset(skb);
+
+ if (skb->protocol == htons(ETH_P_8021Q)) {
+ if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
+ return;
+
+ protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
+ nw_header += VLAN_HLEN;
+ }
+
+ if (protocol == htons(ETH_P_IP)) {
+ if (unlikely(!pskb_may_pull(skb, nw_header
+ + sizeof(struct iphdr))))
+ return;
+
+ IP_ECN_set_ce((struct iphdr *)(skb->data + nw_header));
+ }
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ else if (protocol == htons(ETH_P_IPV6)) {
+ if (unlikely(!pskb_may_pull(skb, nw_header
+ + sizeof(struct ipv6hdr))))
+ return;
+
+ IP6_ECN_set_ce((struct ipv6hdr *)(skb->data + nw_header));
+ }
+#endif
+ }
+}
+
+/* Called with rcu_read_lock. */
+void tnl_rcv(struct vport *vport, struct sk_buff *skb)
+{
+ skb->pkt_type = PACKET_HOST;
+ skb->protocol = eth_type_trans(skb, skb->dev);
+
+ skb_dst_drop(skb);
+ nf_reset(skb);
+ secpath_reset(skb);
+ skb_reset_network_header(skb);
+
+ ecn_decapsulate(skb);
+
+ skb_push(skb, ETH_HLEN);
+ compute_ip_summed(skb, false);
+
+ vport_receive(vport, skb);
+}
+
static bool check_ipv4_address(__be32 addr)
{
if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr)
@@ -514,179 +688,412 @@ bool tnl_frag_needed(struct vport *vport, const struct tnl_mutable_config *mutab
return true;
}
-static struct sk_buff *check_headroom(struct sk_buff *skb, int headroom)
+static bool check_mtu(struct sk_buff *skb,
+ struct vport *vport,
+ const struct tnl_mutable_config *mutable,
+ const struct rtable *rt, __be16 *frag_offp)
{
- if (skb_headroom(skb) < headroom || skb_header_cloned(skb)) {
- struct sk_buff *nskb = skb_realloc_headroom(skb, headroom + 16);
- if (unlikely(!nskb)) {
- kfree_skb(skb);
- return ERR_PTR(-ENOMEM);
+ int mtu;
+ __be16 frag_off;
+
+ frag_off = (mutable->port_config.flags & TNL_F_PMTUD) ? htons(IP_DF) : 0;
+ if (frag_off)
+ mtu = dst_mtu(&rt_dst(rt))
+ - ETH_HLEN
+ - mutable->tunnel_hlen
+ - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0);
+ else
+ mtu = mutable->mtu;
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ struct iphdr *old_iph = ip_hdr(skb);
+
+ frag_off |= old_iph->frag_off & htons(IP_DF);
+ mtu = max(mtu, IP_MIN_MTU);
+
+ if ((old_iph->frag_off & htons(IP_DF)) &&
+ mtu < ntohs(old_iph->tot_len)) {
+ if (tnl_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id))
+ goto drop;
}
+ }
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ else if (skb->protocol == htons(ETH_P_IPV6)) {
+ unsigned int packet_length = skb->len - ETH_HLEN
+ - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0);
- set_skb_csum_bits(skb, nskb);
+ mtu = max(mtu, IPV6_MIN_MTU);
- if (skb->sk)
- skb_set_owner_w(nskb, skb->sk);
+ /* IPv6 requires PMTUD if the packet is above the minimum MTU. */
+ if (packet_length > IPV6_MIN_MTU)
+ frag_off = htons(IP_DF);
- dev_kfree_skb(skb);
- return nskb;
+ if (mtu < packet_length) {
+ if (tnl_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id))
+ goto drop;
+ }
}
+#endif
- return skb;
+ *frag_offp = frag_off;
+ return true;
+
+drop:
+ *frag_offp = 0;
+ return false;
+}
+
+static void create_tunnel_header(const struct vport *vport,
+ const struct tnl_mutable_config *mutable,
+ const struct rtable *rt, void *header)
+{
+ struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+ struct iphdr *iph = header;
+
+ iph->version = 4;
+ iph->ihl = sizeof(struct iphdr) >> 2;
+ iph->frag_off = htons(IP_DF);
+ iph->protocol = tnl_vport->tnl_ops->ipproto;
+ iph->tos = mutable->port_config.tos;
+ iph->daddr = rt->rt_dst;
+ iph->saddr = rt->rt_src;
+ iph->ttl = mutable->port_config.ttl;
+ if (!iph->ttl)
+ iph->ttl = dst_metric(&rt_dst(rt), RTAX_HOPLIMIT);
+
+ tnl_vport->tnl_ops->build_header(vport, mutable, iph + 1);
}
-static inline u8 ecn_encapsulate(u8 tos, struct sk_buff *skb)
+static inline void *get_cached_header(const struct tnl_cache *cache)
{
- u8 inner;
+ return (void *)cache + ALIGN(sizeof(struct tnl_cache), CACHE_DATA_ALIGN);
+}
- if (skb->protocol == htons(ETH_P_IP))
- inner = ((struct iphdr *)skb_network_header(skb))->tos;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6))
- inner = ipv6_get_dsfield((struct ipv6hdr *)skb_network_header(skb));
+static inline bool check_cache_valid(const struct tnl_cache *cache,
+ const struct tnl_mutable_config *mutable)
+{
+ return cache &&
+#ifdef NEED_CACHE_TIMEOUT
+ time_before(jiffies, cache->expiration) &&
#endif
- else
- inner = 0;
-
- return INET_ECN_encapsulate(tos, inner);
+#ifdef HAVE_RT_GENID
+ atomic_read(&init_net.ipv4.rt_genid) == cache->rt->rt_genid &&
+#endif
+#ifdef HAVE_HH_SEQ
+ rt_dst(cache->rt).hh->hh_lock.sequence == cache->hh_seq &&
+#endif
+ mutable->seq == cache->mutable_seq &&
+ (!is_internal_dev(rt_dst(cache->rt).dev) ||
+ (cache->flow && !cache->flow->dead));
}
-static inline void ecn_decapsulate(struct sk_buff *skb)
+static int cache_cleaner_cb(struct tbl_node *tbl_node, void *aux)
{
- u8 tos = ip_hdr(skb)->tos;
+ struct tnl_vport *tnl_vport = tnl_vport_table_cast(tbl_node);
+ const struct tnl_mutable_config *mutable = rcu_dereference(tnl_vport->mutable);
+ const struct tnl_cache *cache = rcu_dereference(tnl_vport->cache);
- if (INET_ECN_is_ce(tos)) {
- __be16 protocol = skb->protocol;
- unsigned int nw_header = skb_network_header(skb) - skb->data;
+ if (cache && !check_cache_valid(cache, mutable) &&
+ spin_trylock_bh(&tnl_vport->cache_lock)) {
+ assign_cache_rcu(tnl_vport_to_vport(tnl_vport), NULL);
+ spin_unlock_bh(&tnl_vport->cache_lock);
+ }
- if (skb->protocol == htons(ETH_P_8021Q)) {
- if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
- return;
+ return 0;
+}
- protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
- nw_header += VLAN_HLEN;
- }
+static void cache_cleaner(struct work_struct *work)
+{
+ schedule_cache_cleaner();
- if (protocol == htons(ETH_P_IP)) {
- if (unlikely(!pskb_may_pull(skb, nw_header
- + sizeof(struct iphdr))))
- return;
+ rcu_read_lock();
+ tbl_foreach(port_table, cache_cleaner_cb, NULL);
+ rcu_read_unlock();
+}
- IP_ECN_set_ce((struct iphdr *)(nw_header + skb->data));
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (protocol == htons(ETH_P_IPV6)) {
- if (unlikely(!pskb_may_pull(skb, nw_header
- + sizeof(struct ipv6hdr))))
- return;
+static inline void create_eth_hdr(struct tnl_cache *cache,
+ const struct rtable *rt)
+{
+ void *cache_data = get_cached_header(cache);
+ int hh_len = rt_dst(rt).hh->hh_len;
+ int hh_off = HH_DATA_ALIGN(rt_dst(rt).hh->hh_len) - hh_len;
- IP6_ECN_set_ce((struct ipv6hdr *)(nw_header
- + skb->data));
- }
+#ifdef HAVE_HH_SEQ
+ unsigned hh_seq;
+
+ do {
+ hh_seq = read_seqbegin(&rt_dst(rt).hh->hh_lock);
+ memcpy(cache_data, (void *)rt_dst(rt).hh->hh_data + hh_off, hh_len);
+ } while (read_seqretry(&rt_dst(rt).hh->hh_lock, hh_seq));
+
+ cache->hh_seq = hh_seq;
+#else
+ read_lock_bh(&rt_dst(rt).hh->hh_lock);
+ memcpy(cache_data, (void *)rt_dst(rt).hh->hh_data + hh_off, hh_len);
+ read_unlock_bh(&rt_dst(rt).hh->hh_lock);
#endif
- }
}
-static struct sk_buff *handle_gso(struct sk_buff *skb)
+static struct tnl_cache *build_cache(struct vport *vport,
+ const struct tnl_mutable_config *mutable,
+ struct rtable *rt)
{
- if (skb_is_gso(skb)) {
- struct sk_buff *nskb = skb_gso_segment(skb, 0);
+ struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+ struct tnl_cache *cache;
+ void *cache_data;
+ int cache_len;
- dev_kfree_skb(skb);
- return nskb;
+ if (!(mutable->port_config.flags & TNL_F_HDR_CACHE))
+ return NULL;
+
+ /*
+ * If there is no entry in the ARP cache or if this device does not
+ * support hard header caching just fall back to the IP stack.
+ */
+ if (!rt_dst(rt).hh)
+ return NULL;
+
+ /*
+ * If lock is contended fall back to directly building the header.
+ * We're not going to help performance by sitting here spinning.
+ */
+ if (!spin_trylock_bh(&tnl_vport->cache_lock))
+ return NULL;
+
+ cache = tnl_vport->cache;
+ if (check_cache_valid(cache, mutable))
+ goto unlock;
+ else
+ cache = NULL;
+
+ cache_len = rt_dst(rt).hh->hh_len + mutable->tunnel_hlen;
+
+ cache = kzalloc(ALIGN(sizeof(struct tnl_cache), CACHE_DATA_ALIGN) +
+ cache_len, GFP_ATOMIC);
+ if (!cache)
+ goto unlock;
+
+ cache->len = cache_len;
+
+ create_eth_hdr(cache, rt);
+ cache_data = get_cached_header(cache) + rt_dst(rt).hh->hh_len;
+
+ create_tunnel_header(vport, mutable, rt, cache_data);
+
+ cache->mutable_seq = mutable->seq;
+ cache->rt = rt;
+#ifdef NEED_CACHE_TIMEOUT
+ cache->expiration = jiffies + tnl_vport->cache_exp_interval;
+#endif
+
+ if (is_internal_dev(rt_dst(rt).dev)) {
+ int err;
+ struct vport *vport;
+ struct dp_port *dp_port;
+ struct sk_buff *skb;
+ bool is_frag;
+ struct xflow_key flow_key;
+ struct tbl_node *flow_node;
+
+ vport = internal_dev_get_vport(rt_dst(rt).dev);
+ if (!vport)
+ goto done;
+
+ dp_port = vport_get_dp_port(vport);
+ if (!dp_port)
+ goto done;
+
+ skb = alloc_skb(cache->len, GFP_ATOMIC);
+ if (!skb)
+ goto done;
+
+ __skb_put(skb, cache->len);
+ memcpy(skb->data, get_cached_header(cache), cache->len);
+
+ err = flow_extract(skb, dp_port->port_no, &flow_key, &is_frag);
+
+ kfree_skb(skb);
+ if (err || is_frag)
+ goto done;
+
+ flow_node = tbl_lookup(rcu_dereference(dp_port->dp->table),
+ &flow_key, flow_hash(&flow_key),
+ flow_cmp);
+ if (flow_node) {
+ struct sw_flow *flow = flow_cast(flow_node);
+
+ cache->flow = flow;
+ flow_hold(flow);
+ }
}
- return skb;
+done:
+ assign_cache_rcu(vport, cache);
+
+unlock:
+ spin_unlock_bh(&tnl_vport->cache_lock);
+
+ return cache;
}
-static int handle_csum_offload(struct sk_buff *skb)
+static struct rtable *find_route(struct vport *vport,
+ const struct tnl_mutable_config *mutable,
+ u8 tos, struct tnl_cache **cache)
{
- if (skb->ip_summed == CHECKSUM_PARTIAL)
- return skb_checksum_help(skb);
- else {
- skb->ip_summed = CHECKSUM_NONE;
- return 0;
+ struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+ struct tnl_cache *cur_cache = rcu_dereference(tnl_vport->cache);
+
+ *cache = NULL;
+ tos = RT_TOS(tos);
+
+ if (likely(tos == mutable->port_config.tos &&
+ check_cache_valid(cur_cache, mutable))) {
+ *cache = cur_cache;
+ return cur_cache->rt;
+ } else {
+ struct rtable *rt;
+ struct flowi fl = { .nl_u = { .ip4_u =
+ { .daddr = mutable->port_config.daddr,
+ .saddr = mutable->port_config.saddr,
+ .tos = tos } },
+ .proto = tnl_vport->tnl_ops->ipproto };
+
+ if (unlikely(ip_route_output_key(&init_net, &rt, &fl)))
+ return NULL;
+
+ if (likely(tos == mutable->port_config.tos))
+ *cache = build_cache(vport, mutable, rt);
+
+ return rt;
}
}
-/* Called with rcu_read_lock. */
-void tnl_rcv(struct vport *vport, struct sk_buff *skb)
+static struct sk_buff *check_headroom(struct sk_buff *skb, int headroom)
{
- skb->pkt_type = PACKET_HOST;
- skb->protocol = eth_type_trans(skb, skb->dev);
+ if (skb_headroom(skb) < headroom || skb_header_cloned(skb)) {
+ struct sk_buff *nskb = skb_realloc_headroom(skb, headroom + 16);
+ if (unlikely(!nskb)) {
+ kfree_skb(skb);
+ return ERR_PTR(-ENOMEM);
+ }
- skb_dst_drop(skb);
- nf_reset(skb);
- secpath_reset(skb);
- skb_reset_network_header(skb);
+ set_skb_csum_bits(skb, nskb);
- ecn_decapsulate(skb);
+ if (skb->sk)
+ skb_set_owner_w(nskb, skb->sk);
- skb_push(skb, ETH_HLEN);
- compute_ip_summed(skb, false);
+ kfree_skb(skb);
+ return nskb;
+ }
- vport_receive(vport, skb);
+ return skb;
}
-static int build_packet(struct vport *vport, const struct tnl_mutable_config *mutable,
- struct iphdr *iph, struct rtable *rt, int max_headroom,
- int mtu, struct sk_buff *skb)
+static inline bool need_linearize(const struct sk_buff *skb)
{
- struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+ int i;
+
+ if (unlikely(skb_shinfo(skb)->frag_list))
+ return true;
+
+ /*
+ * Generally speaking we should linearize if there are paged frags.
+ * However, if all of the refcounts are 1 we know nobody else can
+ * change them from underneath us and we can skip the linearization.
+ */
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ if (unlikely(page_count(skb_shinfo(skb)->frags[0].page) > 1))
+ return true;
+
+ return false;
+}
+
+static struct sk_buff *handle_offloads(struct sk_buff *skb,
+ const struct tnl_mutable_config *mutable,
+ const struct rtable *rt)
+{
+ int min_headroom;
int err;
- struct iphdr *new_iph;
- int orig_len = skb->len;
- __be16 frag_off = iph->frag_off;
- skb = check_headroom(skb, max_headroom);
- if (unlikely(IS_ERR(skb)))
- goto error;
+ forward_ip_summed(skb);
- err = handle_csum_offload(skb);
+ err = vswitch_skb_checksum_setup(skb);
if (unlikely(err))
goto error_free;
- if (skb->protocol == htons(ETH_P_IP)) {
- struct iphdr *old_iph = ip_hdr(skb);
+ min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
+ + mutable->tunnel_hlen;
- if ((old_iph->frag_off & htons(IP_DF)) &&
- mtu < ntohs(old_iph->tot_len)) {
- if (tnl_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id))
- goto error_free;
+ if (skb_is_gso(skb)) {
+ struct sk_buff *nskb;
+
+ /*
+ * If we are doing GSO on a pskb it is better to make sure that
+ * the headroom is correct now. We will only have to copy the
+ * portion in the linear data area and GSO will preserve
+ * headroom when it creates the segments. This is particularly
+ * beneficial on Xen where we get a lot of GSO pskbs.
+ * Conversely, we avoid copying if it is just to get our own
+ * writable clone because GSO will do the copy for us.
+ */
+ if (skb_headroom(skb) < min_headroom) {
+ skb = check_headroom(skb, min_headroom);
+ if (unlikely(IS_ERR(skb))) {
+ err = PTR_ERR(skb);
+ goto error;
+ }
}
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6)) {
- unsigned int packet_length = skb->len - ETH_HLEN
- - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0);
+ nskb = skb_gso_segment(skb, 0);
+ kfree_skb(skb);
+ if (unlikely(IS_ERR(nskb))) {
+ err = PTR_ERR(nskb);
+ goto error;
+ }
- /* IPv6 requires PMTUD if the packet is above the minimum MTU. */
- if (packet_length > IPV6_MIN_MTU)
- frag_off = htons(IP_DF);
+ skb = nskb;
+ } else {
+ skb = check_headroom(skb, min_headroom);
+ if (unlikely(IS_ERR(skb))) {
+ err = PTR_ERR(skb);
+ goto error;
+ }
- if (mtu < packet_length) {
- if (tnl_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id))
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ /*
+ * Pages aren't locked and could change at any time.
+ * If this happens after we compute the checksum, the
+ * checksum will be wrong. We linearize now to avoid
+ * this problem.
+ */
+ if (unlikely(need_linearize(skb))) {
+ err = __skb_linearize(skb);
+ if (unlikely(err))
+ goto error_free;
+ }
+
+ err = skb_checksum_help(skb);
+ if (unlikely(err))
goto error_free;
- }
+ } else if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
}
-#endif
- new_iph = (struct iphdr *)skb_push(skb, mutable->tunnel_hlen);
- skb_reset_network_header(skb);
- skb_set_transport_header(skb, sizeof(struct iphdr));
-
- memcpy(new_iph, iph, sizeof(struct iphdr));
- new_iph->frag_off = frag_off;
- ip_select_ident(new_iph, &rt_dst(rt), NULL);
+ return skb;
- memset(&IPCB(skb)->opt, 0, sizeof(IPCB(skb)->opt));
- IPCB(skb)->flags = 0;
+error_free:
+ kfree_skb(skb);
+error:
+ return ERR_PTR(err);
+}
- skb = tnl_vport->tnl_ops->build_header(skb, vport, mutable, &rt_dst(rt));
- if (unlikely(!skb))
- goto error;
+static int send_frags(struct sk_buff *skb,
+ const struct tnl_mutable_config *mutable)
+{
+ int sent_len;
+ int err;
+ sent_len = 0;
while (skb) {
struct sk_buff *next = skb->next;
int frag_len = skb->len - mutable->tunnel_hlen;
@@ -694,34 +1101,26 @@ static int build_packet(struct vport *vport, const struct tnl_mutable_config *mu
skb->next = NULL;
err = ip_local_out(skb);
- if (unlikely(net_xmit_eval(err) != 0)) {
- orig_len -= frag_len;
+ if (likely(net_xmit_eval(err) == 0))
+ sent_len += frag_len;
+ else {
skb = next;
goto free_frags;
}
skb = next;
- };
+ }
- return orig_len;
+ return sent_len;
-error_free:
- kfree_skb(skb);
-error:
- return 0;
free_frags:
/*
* There's no point in continuing to send fragments once one has been
* dropped so just free the rest. This may help improve the congestion
* that caused the first packet to be dropped.
*/
- while (skb) {
- struct sk_buff *next = skb->next;
- orig_len -= skb->len - mutable->tunnel_hlen;
- kfree_skb(skb);
- skb = next;
- };
- return orig_len;
+ tnl_free_linked_skbs(skb);
+ return sent_len;
}
int tnl_send(struct vport *vport, struct sk_buff *skb)
@@ -729,12 +1128,15 @@ int tnl_send(struct vport *vport, struct sk_buff *skb)
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
const struct tnl_mutable_config *mutable = rcu_dereference(tnl_vport->mutable);
- struct iphdr *old_iph;
- int orig_len;
- struct iphdr iph;
+ enum vport_err_type err = VPORT_E_TX_ERROR;
struct rtable *rt;
- int max_headroom;
- int mtu;
+ struct dst_entry *unattached_dst = NULL;
+ struct tnl_cache *cache;
+ int sent_len = 0;
+ __be16 frag_off;
+ u8 ttl;
+ u8 inner_tos;
+ u8 tos;
/* Validate the protocol headers before we try to use them. */
if (skb->protocol == htons(ETH_P_8021Q)) {
@@ -746,147 +1148,162 @@ int tnl_send(struct vport *vport, struct sk_buff *skb)
}
if (skb->protocol == htons(ETH_P_IP)) {
- if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
- + sizeof(struct iphdr) - skb->data)))
+ if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb)
+ + sizeof(struct iphdr))))
skb->protocol = 0;
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
else if (skb->protocol == htons(ETH_P_IPV6)) {
- if (unlikely(!pskb_may_pull(skb, skb_network_header(skb)
- + sizeof(struct ipv6hdr) - skb->data)))
+ if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb)
+ + sizeof(struct ipv6hdr))))
skb->protocol = 0;
}
#endif
- old_iph = ip_hdr(skb);
-
- iph.tos = mutable->port_config.tos;
- if (mutable->port_config.flags & TNL_F_TOS_INHERIT) {
- if (skb->protocol == htons(ETH_P_IP))
- iph.tos = old_iph->tos;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6))
- iph.tos = ipv6_get_dsfield(ipv6_hdr(skb));
-#endif
- }
- iph.tos = ecn_encapsulate(iph.tos, skb);
-
- {
- struct flowi fl = { .nl_u = { .ip4_u =
- { .daddr = mutable->port_config.daddr,
- .saddr = mutable->port_config.saddr,
- .tos = RT_TOS(iph.tos) } },
- .proto = tnl_vport->tnl_ops->ipproto };
-
- if (unlikely(ip_route_output_key(&init_net, &rt, &fl)))
- goto error_free;
- }
- iph.ttl = mutable->port_config.ttl;
- if (mutable->port_config.flags & TNL_F_TTL_INHERIT) {
- if (skb->protocol == htons(ETH_P_IP))
- iph.ttl = old_iph->ttl;
+ /* ToS */
+ if (skb->protocol == htons(ETH_P_IP))
+ inner_tos = ip_hdr(skb)->tos;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6))
- iph.ttl = ipv6_hdr(skb)->hop_limit;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ inner_tos = ipv6_get_dsfield(ipv6_hdr(skb));
#endif
- }
- if (!iph.ttl)
- iph.ttl = dst_metric(&rt_dst(rt), RTAX_HOPLIMIT);
+ else
+ inner_tos = 0;
- iph.frag_off = (mutable->port_config.flags & TNL_F_PMTUD) ? htons(IP_DF) : 0;
- if (iph.frag_off)
- mtu = dst_mtu(&rt_dst(rt))
- - ETH_HLEN
- - mutable->tunnel_hlen
- - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0);
+ if (mutable->port_config.flags & TNL_F_TOS_INHERIT)
+ tos = inner_tos;
else
- mtu = mutable->mtu;
+ tos = mutable->port_config.tos;
- if (skb->protocol == htons(ETH_P_IP)) {
- iph.frag_off |= old_iph->frag_off & htons(IP_DF);
- mtu = max(mtu, IP_MIN_MTU);
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6))
- mtu = max(mtu, IPV6_MIN_MTU);
-#endif
+ tos = INET_ECN_encapsulate(tos, inner_tos);
- iph.version = 4;
- iph.ihl = sizeof(struct iphdr) >> 2;
- iph.protocol = tnl_vport->tnl_ops->ipproto;
- iph.daddr = rt->rt_dst;
- iph.saddr = rt->rt_src;
+ /* Route lookup */
+ rt = find_route(vport, mutable, tos, &cache);
+ if (unlikely(!rt))
+ goto error_free;
+ if (unlikely(!cache))
+ unattached_dst = &rt_dst(rt);
+ /* Reset SKB */
nf_reset(skb);
secpath_reset(skb);
skb_dst_drop(skb);
- skb_dst_set(skb, &rt_dst(rt));
- /*
- * If we are doing GSO on a pskb it is better to make sure that the
- * headroom is correct now. We will only have to copy the portion in
- * the linear data area and GSO will preserve headroom when it creates
- * the segments. This is particularly beneficial on Xen where we get
- * lots of GSO pskbs. Conversely, we delay copying if it is just to
- * get our own writable clone because GSO may do the copy for us.
- */
- max_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
- + mutable->tunnel_hlen;
+ /* Offloading */
+ skb = handle_offloads(skb, mutable, rt);
+ if (unlikely(IS_ERR(skb)))
+ goto error;
- if (skb_headroom(skb) < max_headroom) {
- skb = check_headroom(skb, max_headroom);
- if (unlikely(IS_ERR(skb))) {
- vport_record_error(vport, VPORT_E_TX_DROPPED);
- goto error;
- }
+ /* MTU */
+ if (unlikely(!check_mtu(skb, vport, mutable, rt, &frag_off))) {
+ err = VPORT_E_TX_DROPPED;
+ goto error_free;
}
- forward_ip_summed(skb);
+ /*
+ * If we are over the MTU, allow the IP stack to handle fragmentation.
+ * Fragmentation is a slow path anyways.
+ */
+ if (unlikely(skb->len + mutable->tunnel_hlen > dst_mtu(&rt_dst(rt)) &&
+ cache)) {
+ unattached_dst = &rt_dst(rt);
+ dst_hold(unattached_dst);
+ cache = NULL;
+ }
- if (unlikely(vswitch_skb_checksum_setup(skb)))
- goto error_free;
+ /* TTL */
+ ttl = mutable->port_config.ttl;
+ if (!ttl)
+ ttl = dst_metric(&rt_dst(rt), RTAX_HOPLIMIT);
- skb = handle_gso(skb);
- if (unlikely(IS_ERR(skb))) {
- vport_record_error(vport, VPORT_E_TX_DROPPED);
- goto error;
+ if (mutable->port_config.flags & TNL_F_TTL_INHERIT) {
+ if (skb->protocol == htons(ETH_P_IP))
+ ttl = ip_hdr(skb)->ttl;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ ttl = ipv6_hdr(skb)->hop_limit;
+#endif
}
- /*
- * Process GSO segments. Try to do any work for the entire packet that
- * doesn't involve actually writing to it before this point.
- */
- orig_len = 0;
- do {
+ while (skb) {
+ struct iphdr *iph;
struct sk_buff *next_skb = skb->next;
skb->next = NULL;
- orig_len += build_packet(vport, mutable, &iph, rt, max_headroom, mtu, skb);
+ if (likely(cache)) {
+ skb_push(skb, cache->len);
+ memcpy(skb->data, get_cached_header(cache), cache->len);
+ skb_reset_mac_header(skb);
+ skb_set_network_header(skb, rt_dst(rt).hh->hh_len);
+
+ } else {
+ skb_push(skb, mutable->tunnel_hlen);
+ create_tunnel_header(vport, mutable, rt, skb->data);
+ skb_reset_network_header(skb);
+
+ if (next_skb)
+ skb_dst_set(skb, dst_clone(unattached_dst));
+ else {
+ skb_dst_set(skb, unattached_dst);
+ unattached_dst = NULL;
+ }
+
+ memset(&IPCB(skb)->opt, 0, sizeof(IPCB(skb)->opt));
+ IPCB(skb)->flags = 0;
+ }
+ skb_set_transport_header(skb, skb_network_offset(skb) + sizeof(struct iphdr));
+
+ iph = ip_hdr(skb);
+ iph->tos = tos;
+ iph->ttl = ttl;
+ iph->frag_off = frag_off;
+ ip_select_ident(iph, &rt_dst(rt), NULL);
+
+ skb = tnl_vport->tnl_ops->update_header(vport, mutable, &rt_dst(rt), skb);
+ if (unlikely(!skb))
+ goto next;
+
+ if (likely(cache)) {
+ int orig_len = skb->len - cache->len;
+
+ skb->protocol = htons(ETH_P_IP);
+ iph->tot_len = htons(skb->len - skb_network_offset(skb));
+ ip_send_check(iph);
+
+ if (is_internal_dev(rt_dst(rt).dev)) {
+ OVS_CB(skb)->flow = cache->flow;
+ compute_ip_summed(skb, true);
+ vport_receive(internal_dev_get_vport(rt_dst(rt).dev), skb);
+ sent_len += orig_len;
+ } else {
+ int err;
+
+ skb->dev = rt_dst(rt).dev;
+ err = dev_queue_xmit(skb);
+
+ if (likely(net_xmit_eval(err) == 0))
+ sent_len += orig_len;
+ }
+ } else
+ sent_len += send_frags(skb, mutable);
+
+next:
skb = next_skb;
- } while (skb);
+ }
- if (unlikely(orig_len == 0))
+ if (unlikely(sent_len == 0))
vport_record_error(vport, VPORT_E_TX_DROPPED);
- return orig_len;
+ goto out;
error_free:
- kfree_skb(skb);
- vport_record_error(vport, VPORT_E_TX_ERROR);
+ tnl_free_linked_skbs(skb);
error:
- return 0;
-}
-
-int tnl_init(void)
-{
- return 0;
-}
-
-void tnl_exit(void)
-{
- tbl_destroy(port_table, NULL);
- port_table = NULL;
+ dst_release(unattached_dst);
+ vport_record_error(vport, err);
+out:
+ return sent_len;
}
static int set_config(const void __user *uconfig, const struct tnl_ops *tnl_ops,
@@ -899,15 +1316,18 @@ static int set_config(const void __user *uconfig, const struct tnl_ops *tnl_ops,
if (copy_from_user(&mutable->port_config, uconfig, sizeof(struct tnl_port_config)))
return -EFAULT;
+ if (mutable->port_config.daddr == 0)
+ return -EINVAL;
+
+ if (mutable->port_config.tos != RT_TOS(mutable->port_config.tos))
+ return -EINVAL;
+
mutable->tunnel_hlen = tnl_ops->hdr_len(&mutable->port_config);
if (mutable->tunnel_hlen < 0)
return mutable->tunnel_hlen;
mutable->tunnel_hlen += sizeof(struct iphdr);
- if (mutable->port_config.daddr == 0)
- return -EINVAL;
-
mutable->tunnel_type = tnl_ops->tunnel_type;
if (mutable->port_config.flags & TNL_F_IN_KEY_MATCH) {
mutable->tunnel_type |= TNL_T_KEY_MATCH;
@@ -950,7 +1370,7 @@ struct vport *tnl_create(const char *name, const void __user *config,
strcpy(tnl_vport->name, name);
tnl_vport->tnl_ops = tnl_ops;
- tnl_vport->mutable = kmalloc(sizeof(struct tnl_mutable_config), GFP_KERNEL);
+ tnl_vport->mutable = kzalloc(sizeof(struct tnl_mutable_config), GFP_KERNEL);
if (!tnl_vport->mutable) {
err = -ENOMEM;
goto error_free_vport;
@@ -966,6 +1386,13 @@ struct vport *tnl_create(const char *name, const void __user *config,
if (err)
goto error_free_mutable;
+ spin_lock_init(&tnl_vport->cache_lock);
+
+#ifdef NEED_CACHE_TIMEOUT
+ tnl_vport->cache_exp_interval = MAX_CACHE_EXP -
+ (net_random() % (MAX_CACHE_EXP / 2));
+#endif
+
err = add_port(vport);
if (err)
goto error_free_mutable;
@@ -985,7 +1412,6 @@ int tnl_modify(struct vport *vport, const void __user *config)
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
struct tnl_mutable_config *mutable;
int err;
- bool update_hash = false;
mutable = kmemdup(tnl_vport->mutable, sizeof(struct tnl_mutable_config), GFP_KERNEL);
if (!mutable) {
@@ -997,35 +1423,11 @@ int tnl_modify(struct vport *vport, const void __user *config)
if (err)
goto error_free;
- /*
- * Only remove the port from the hash table if something that would
- * affect the lookup has changed.
- */
- if (tnl_vport->mutable->port_config.saddr != mutable->port_config.saddr ||
- tnl_vport->mutable->port_config.daddr != mutable->port_config.daddr ||
- tnl_vport->mutable->port_config.in_key != mutable->port_config.in_key ||
- (tnl_vport->mutable->port_config.flags & TNL_F_IN_KEY_MATCH) !=
- (mutable->port_config.flags & TNL_F_IN_KEY_MATCH))
- update_hash = true;
-
-
- /*
- * This update is not atomic but the lookup uses the config, which
- * serves as an inherent double check.
- */
- if (update_hash) {
- err = del_port(vport);
- if (err)
- goto error_free;
- }
-
- assign_config_rcu(vport, mutable);
+ mutable->seq++;
- if (update_hash) {
- err = add_port(vport);
- if (err)
- goto error_free;
- }
+ err = move_port(vport, mutable);
+ if (err)
+ goto error_free;
return 0;
@@ -1035,10 +1437,14 @@ error:
return err;
}
-static void free_port(struct rcu_head *rcu)
+static void free_port_rcu(struct rcu_head *rcu)
{
struct tnl_vport *tnl_vport = container_of(rcu, struct tnl_vport, rcu);
+ spin_lock_bh(&tnl_vport->cache_lock);
+ free_cache(tnl_vport->cache);
+ spin_unlock_bh(&tnl_vport->cache_lock);
+
kfree(tnl_vport->mutable);
vport_free(tnl_vport_to_vport(tnl_vport));
}
@@ -1055,7 +1461,7 @@ int tnl_destroy(struct vport *vport)
&old_mutable))
del_port(vport);
- call_rcu(&tnl_vport->rcu, free_port);
+ call_rcu(&tnl_vport->rcu, free_port_rcu);
return 0;
}
@@ -1090,7 +1496,6 @@ int tnl_set_addr(struct vport *vport, const unsigned char *addr)
return 0;
}
-
const char *tnl_get_name(const struct vport *vport)
{
const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
@@ -1108,3 +1513,15 @@ int tnl_get_mtu(const struct vport *vport)
const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
return rcu_dereference(tnl_vport->mutable)->mtu;
}
+
+void tnl_free_linked_skbs(struct sk_buff *skb)
+{
+ if (unlikely(!skb))
+ return;
+
+ while (skb) {
+ struct sk_buff *next = skb->next;
+ kfree_skb(skb);
+ skb = next;
+ }
+}
diff --git a/datapath/tunnel.h b/datapath/tunnel.h
index 37874c57c..8ffb7bf54 100644
--- a/datapath/tunnel.h
+++ b/datapath/tunnel.h
@@ -9,6 +9,9 @@
#ifndef TUNNEL_H
#define TUNNEL_H 1
+#include <linux/version.h>
+
+#include "flow.h"
#include "openvswitch/tunnel.h"
#include "table.h"
#include "vport.h"
@@ -20,14 +23,15 @@
#define IP_MIN_MTU 68
/*
- * One of these goes in your struct tnl_ops and in tnl_find_port().
+ * One of these goes in struct tnl_ops and in tnl_find_port().
* These values are in the same namespace as other TNL_T_* values, so
- * you have only the first 10 bits to define protocol identifiers.
+ * only the least significant 10 bits are available to define protocol
+ * identifiers.
*/
#define TNL_T_PROTO_GRE 0
#define TNL_T_PROTO_CAPWAP 1
-/* You only need these flags when you are calling tnl_find_port(). */
+/* These flags are only needed when calling tnl_find_port(). */
#define TNL_T_KEY_EXACT (1 << 10)
#define TNL_T_KEY_MATCH (1 << 11)
#define TNL_T_KEY_EITHER (TNL_T_KEY_EXACT | TNL_T_KEY_MATCH)
@@ -35,39 +39,119 @@
struct tnl_mutable_config {
struct rcu_head rcu;
- unsigned char eth_addr[ETH_ALEN];
- unsigned int mtu;
- struct tnl_port_config port_config;
+ unsigned seq; /* Sequence number to identify this config. */
- /* Set of TNL_T_* flags that define the category for lookup. */
- u32 tunnel_type;
+ u32 tunnel_type; /* Set of TNL_T_* flags that define lookup. */
+ unsigned tunnel_hlen; /* Tunnel header length. */
+
+ unsigned char eth_addr[ETH_ALEN];
+ unsigned mtu;
- int tunnel_hlen; /* Tunnel header length. */
+ struct tnl_port_config port_config;
};
struct tnl_ops {
- /* Put your TNL_T_PROTO_* type in here. */
- u32 tunnel_type;
- u8 ipproto;
+ u32 tunnel_type; /* Put the TNL_T_PROTO_* type in here. */
+ u8 ipproto; /* The IP protocol for the tunnel. */
/*
- * Returns the length of the tunnel header you will add in
+ * Returns the length of the tunnel header that will be added in
* build_header() (i.e. excludes the IP header). Returns a negative
* error code if the configuration is invalid.
*/
int (*hdr_len)(const struct tnl_port_config *);
/*
- * Returns a linked list of SKBs with tunnel headers (multiple
- * packets may be generated in the event of fragmentation). Space
- * will have already been allocated at the start of the packet equal
- * to sizeof(struct iphdr) + value returned by hdr_len(). The IP
- * header will have already been constructed.
+ * Builds the static portion of the tunnel header, which is stored in
+ * the header cache. In general the performance of this function is
+ * not too important as we try to only call it when building the cache
+ * so it is preferable to shift as much work as possible here. However,
+ * in some circumstances caching is disabled and this function will be
+ * called for every packet, so try not to make it too slow.
+ */
+ void (*build_header)(const struct vport *,
+ const struct tnl_mutable_config *, void *header);
+
+ /*
+ * Updates the cached header of a packet to match the actual packet
+ * data. Typical things that might need to be updated are length,
+ * checksum, etc. The IP header will have already been updated and this
+ * is the final step before transmission. Returns a linked list of
+ * completed SKBs (multiple packets may be generated in the event
+ * of fragmentation).
+ */
+ struct sk_buff *(*update_header)(const struct vport *,
+ const struct tnl_mutable_config *,
+ struct dst_entry *, struct sk_buff *);
+};
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
+/*
+ * On these kernels we have a fast mechanism to tell if the ARP cache for a
+ * particular destination has changed.
+ */
+#define HAVE_HH_SEQ
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)
+/*
+ * On these kernels we have a fast mechanism to tell if the routing table
+ * has changed.
+ */
+#define HAVE_RT_GENID
+#endif
+#if !defined(HAVE_HH_SEQ) || !defined(HAVE_RT_GENID)
+/* If we can't detect all system changes directly we need to use a timeout. */
+#define NEED_CACHE_TIMEOUT
+#endif
+struct tnl_cache {
+ struct rcu_head rcu;
+
+ int len; /* Length of data to be memcpy'd from cache. */
+
+ /* Sequence number of mutable->seq from which this cache was generated. */
+ unsigned mutable_seq;
+
+#ifdef HAVE_HH_SEQ
+ /*
+ * The sequence number from the seqlock protecting the hardware header
+ * cache (in the ARP cache). Since every write increments the counter
+ * this gives us an easy way to tell if it has changed.
+ */
+ unsigned hh_seq;
+#endif
+
+#ifdef NEED_CACHE_TIMEOUT
+ /*
+ * If we don't have direct mechanisms to detect all important changes in
+ * the system fall back to an expiration time. This expiration time
+ * can be relatively short since at high rates there will be millions of
+ * packets per second, so we'll still get plenty of benefit from the
+ * cache. Note that if something changes we may blackhole packets
+ * until the expiration time (depending on what changed and the kernel
+ * version we may be able to detect the change sooner). Expiration is
+ * expressed as a time in jiffies.
*/
- struct sk_buff *(*build_header)(struct sk_buff *,
- const struct vport *,
- const struct tnl_mutable_config *,
- struct dst_entry *);
+ unsigned long expiration;
+#endif
+
+ /*
+ * The routing table entry that is the result of looking up the tunnel
+ * endpoints. It also contains a sequence number (called a generation
+ * ID) that can be compared to a global sequence to tell if the routing
+ * table has changed (and therefore there is a potential that this
+ * cached route has been invalidated).
+ */
+ struct rtable *rt;
+
+ /*
+ * If the output device for tunnel traffic is an OVS internal device,
+ * the flow of that datapath. Since all tunnel traffic will have the
+ * same headers this allows us to cache the flow lookup. NULL if the
+ * output device is not OVS or if there is no flow installed.
+ */
+ struct sw_flow *flow;
+
+ /* The cached header follows after padding for alignment. */
};
struct tnl_vport {
@@ -77,14 +161,29 @@ struct tnl_vport {
char name[IFNAMSIZ];
const struct tnl_ops *tnl_ops;
- /* Protected by RCU. */
- struct tnl_mutable_config *mutable;
+ struct tnl_mutable_config *mutable; /* Protected by RCU. */
+ /*
+ * ID of last fragment sent (for tunnel protocols with direct support
+ * fragmentation). If the protocol relies on IP fragmentation then
+ * this is not needed.
+ */
atomic_t frag_id;
+
+ spinlock_t cache_lock;
+ struct tnl_cache *cache; /* Protected by RCU/cache_lock. */
+
+#ifdef NEED_CACHE_TIMEOUT
+ /*
+ * If we must rely on expiration time to invalidate the cache, this is
+ * the interval. It is randomized within a range (defined by
+ * MAX_CACHE_EXP in tunnel.c) to avoid synchronized expirations caused
+ * by creation of a large number of tunnels at a one time.
+ */
+ unsigned long cache_exp_interval;
+#endif
};
-int tnl_init(void);
-void tnl_exit(void);
struct vport *tnl_create(const char *name, const void __user *config,
const struct vport_ops *,
const struct tnl_ops *);
@@ -104,10 +203,12 @@ struct vport *tnl_find_port(__be32 saddr, __be32 daddr, __be32 key,
bool tnl_frag_needed(struct vport *vport,
const struct tnl_mutable_config *mutable,
struct sk_buff *skb, unsigned int mtu, __be32 flow_key);
+void tnl_free_linked_skbs(struct sk_buff *skb);
static inline struct tnl_vport *tnl_vport_priv(const struct vport *vport)
{
return vport_priv(vport);
}
+
#endif /* tunnel.h */
diff --git a/datapath/vport-capwap.c b/datapath/vport-capwap.c
index ce8cc43e2..bf1465fc0 100644
--- a/datapath/vport-capwap.c
+++ b/datapath/vport-capwap.c
@@ -21,6 +21,7 @@
#include <net/inet_frag.h>
#include <net/ip.h>
#include <net/protocol.h>
+#include <net/udp.h>
#include "tunnel.h"
#include "vport.h"
@@ -127,24 +128,32 @@ static int capwap_hdr_len(const struct tnl_port_config *port_config)
return CAPWAP_HLEN;
}
-static struct sk_buff *capwap_build_header(struct sk_buff *skb,
- const struct vport *vport,
- const struct tnl_mutable_config *mutable,
- struct dst_entry *dst)
+static void capwap_build_header(const struct vport *vport,
+ const struct tnl_mutable_config *mutable,
+ void *header)
{
- struct udphdr *udph = udp_hdr(skb);
- struct capwaphdr *cwh = capwap_hdr(skb);
+ struct udphdr *udph = header;
+ struct capwaphdr *cwh = (struct capwaphdr *)(udph + 1);
udph->source = htons(CAPWAP_SRC_PORT);
udph->dest = htons(CAPWAP_DST_PORT);
- udph->len = htons(skb->len - sizeof(struct iphdr));
udph->check = 0;
cwh->begin = NO_FRAG_HDR;
cwh->frag_id = 0;
cwh->frag_off = 0;
+}
+
+static struct sk_buff *capwap_update_header(const struct vport *vport,
+ const struct tnl_mutable_config *mutable,
+ struct dst_entry *dst,
+ struct sk_buff *skb)
+{
+ struct udphdr *udph = udp_hdr(skb);
- if (unlikely(skb->len > dst_mtu(dst)))
+ udph->len = htons(skb->len - skb_transport_offset(skb));
+
+ if (unlikely(skb->len - skb_network_offset(skb) > dst_mtu(dst)))
skb = fragment(skb, vport, dst);
return skb;
@@ -208,6 +217,7 @@ struct tnl_ops capwap_tnl_ops = {
.ipproto = IPPROTO_UDP,
.hdr_len = capwap_hdr_len,
.build_header = capwap_build_header,
+ .update_header = capwap_update_header,
};
static struct vport *capwap_create(const char *name, const void __user *config)
@@ -240,7 +250,7 @@ static int capwap_init(void)
defrag_init();
- return tnl_init();
+ return 0;
error_sock:
sock_release(capwap_rcv_socket);
@@ -251,7 +261,6 @@ error:
static void capwap_exit(void)
{
- tnl_exit();
defrag_exit();
sock_release(capwap_rcv_socket);
}
@@ -281,17 +290,19 @@ static struct sk_buff *fragment(struct sk_buff *skb, const struct vport *vport,
struct dst_entry *dst)
{
struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
- unsigned int hlen = sizeof(struct iphdr) + CAPWAP_HLEN;
- unsigned int headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len;
+ unsigned int hlen = skb_transport_offset(skb) + CAPWAP_HLEN;
+ unsigned int headroom;
+ unsigned int max_frame_len = dst_mtu(dst) + skb_network_offset(skb);
struct sk_buff *result = NULL, *list_cur = NULL;
unsigned int remaining;
unsigned int offset;
__be16 frag_id;
- if (hlen + ~FRAG_OFF_MASK + 1 > dst_mtu(dst)) {
+ if (hlen + ~FRAG_OFF_MASK + 1 > max_frame_len) {
if (net_ratelimit())
pr_warn("capwap link mtu (%d) is less than minimum packet (%d)\n",
- dst_mtu(dst), hlen + ~FRAG_OFF_MASK + 1);
+ dst_mtu(dst),
+ hlen - skb_network_offset(skb) + ~FRAG_OFF_MASK + 1);
goto error;
}
@@ -299,14 +310,17 @@ static struct sk_buff *fragment(struct sk_buff *skb, const struct vport *vport,
offset = 0;
frag_id = htons(atomic_inc_return(&tnl_vport->frag_id));
+ headroom = dst->header_len + 16;
+ if (!skb_network_offset(skb))
+ headroom += LL_RESERVED_SPACE(dst->dev);
+
while (remaining) {
struct sk_buff *skb2;
int frag_size;
- struct iphdr *iph;
struct udphdr *udph;
struct capwaphdr *cwh;
- frag_size = min(remaining, dst_mtu(dst) - hlen);
+ frag_size = min(remaining, max_frame_len - hlen);
if (remaining > frag_size)
frag_size &= FRAG_OFF_MASK;
@@ -316,23 +330,22 @@ static struct sk_buff *fragment(struct sk_buff *skb, const struct vport *vport,
skb_reserve(skb2, headroom);
__skb_put(skb2, hlen + frag_size);
- skb_reset_network_header(skb2);
- skb_set_transport_header(skb2, sizeof(struct iphdr));
- /* Copy IP/UDP/CAPWAP header. */
+ if (skb_network_offset(skb))
+ skb_reset_mac_header(skb2);
+ skb_set_network_header(skb2, skb_network_offset(skb));
+ skb_set_transport_header(skb2, skb_transport_offset(skb));
+
+ /* Copy (Ethernet)/IP/UDP/CAPWAP header. */
copy_skb_metadata(skb, skb2);
- skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
+ skb_copy_from_linear_data(skb, skb2->data, hlen);
/* Copy this data chunk. */
if (skb_copy_bits(skb, hlen + offset, skb2->data + hlen, frag_size))
BUG();
- iph = ip_hdr(skb2);
- iph->tot_len = hlen + frag_size;
- ip_send_check(iph);
-
udph = udp_hdr(skb2);
- udph->len = htons(skb2->len - sizeof(struct iphdr));
+ udph->len = htons(skb2->len - skb_transport_offset(skb2));
cwh = capwap_hdr(skb2);
if (remaining > frag_size)
@@ -355,11 +368,7 @@ static struct sk_buff *fragment(struct sk_buff *skb, const struct vport *vport,
goto out;
error:
- while (result) {
- list_cur = result->next;
- kfree_skb(result);
- result = list_cur;
- }
+ tnl_free_linked_skbs(result);
out:
kfree_skb(skb);
return result;
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
index 0a7092f96..be8fb5343 100644
--- a/datapath/vport-gre.c
+++ b/datapath/vport-gre.c
@@ -50,41 +50,49 @@ static int gre_hdr_len(const struct tnl_port_config *port_config)
return len;
}
-static struct sk_buff *gre_build_header(struct sk_buff *skb,
- const struct vport *vport,
- const struct tnl_mutable_config *mutable,
- struct dst_entry *dst)
+static void gre_build_header(const struct vport *vport,
+ const struct tnl_mutable_config *mutable,
+ void *header)
{
- struct gre_base_hdr *greh = (struct gre_base_hdr *)skb_transport_header(skb);
- __be32 *options = (__be32 *)(skb_network_header(skb) + mutable->tunnel_hlen
- - GRE_HEADER_SECTION);
+ struct gre_base_hdr *greh = header;
+ __be32 *options = (__be32 *)(greh + 1);
greh->protocol = htons(ETH_P_TEB);
greh->flags = 0;
- /* Work backwards over the options so the checksum is last. */
+ if (mutable->port_config.flags & TNL_F_CSUM) {
+ greh->flags |= GRE_CSUM;
+ *options = 0;
+ options++;
+ }
+
if (mutable->port_config.out_key ||
- mutable->port_config.flags & TNL_F_OUT_KEY_ACTION) {
+ mutable->port_config.flags & TNL_F_OUT_KEY_ACTION)
greh->flags |= GRE_KEY;
- if (mutable->port_config.flags & TNL_F_OUT_KEY_ACTION)
- *options = OVS_CB(skb)->tun_id;
- else
- *options = mutable->port_config.out_key;
+ if (mutable->port_config.out_key)
+ *options = mutable->port_config.out_key;
+}
+
+static struct sk_buff *gre_update_header(const struct vport *vport,
+ const struct tnl_mutable_config *mutable,
+ struct dst_entry *dst,
+ struct sk_buff *skb)
+{
+ __be32 *options = (__be32 *)(skb_network_header(skb) + mutable->tunnel_hlen
+ - GRE_HEADER_SECTION);
+ /* Work backwards over the options so the checksum is last. */
+ if (mutable->port_config.flags & TNL_F_OUT_KEY_ACTION) {
+ *options = OVS_CB(skb)->tun_id;
options--;
}
- if (mutable->port_config.flags & TNL_F_CSUM) {
- greh->flags |= GRE_CSUM;
-
- *options = 0;
+ if (mutable->port_config.flags & TNL_F_CSUM)
*(__sum16 *)options = csum_fold(skb_checksum(skb,
- sizeof(struct iphdr),
- skb->len - sizeof(struct iphdr),
+ skb_transport_offset(skb),
+ skb->len - skb_transport_offset(skb),
0));
- }
-
/*
* Allow our local IP stack to fragment the outer packet even if the
* DF bit is set as a last resort.
@@ -329,6 +337,7 @@ struct tnl_ops gre_tnl_ops = {
.ipproto = IPPROTO_GRE,
.hdr_len = gre_hdr_len,
.build_header = gre_build_header,
+ .update_header = gre_update_header,
};
static struct vport *gre_create(const char *name, const void __user *config)
@@ -346,20 +355,14 @@ static int gre_init(void)
int err;
err = inet_add_protocol(&gre_protocol_handlers, IPPROTO_GRE);
- if (err) {
+ if (err)
pr_warn("cannot register gre protocol handler\n");
- goto out;
- }
-
- err = tnl_init();
-out:
return err;
}
static void gre_exit(void)
{
- tnl_exit();
inet_del_protocol(&gre_protocol_handlers, IPPROTO_GRE);
}
diff --git a/datapath/vport-internal_dev.c b/datapath/vport-internal_dev.c
index f37d20a21..16ba64d71 100644
--- a/datapath/vport-internal_dev.c
+++ b/datapath/vport-internal_dev.c
@@ -82,6 +82,7 @@ static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
skb_reset_mac_header(skb);
compute_ip_summed(skb, true);
+ OVS_CB(skb)->flow = NULL;
vport_receive(vport, skb);
@@ -293,7 +294,7 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
struct vport_ops internal_vport_ops = {
.type = "internal",
- .flags = VPORT_F_REQUIRED | VPORT_F_GEN_STATS,
+ .flags = VPORT_F_REQUIRED | VPORT_F_GEN_STATS | VPORT_F_FLOW,
.create = internal_dev_create,
.destroy = internal_dev_destroy,
.attach = internal_dev_attach,
diff --git a/datapath/vport-patch.c b/datapath/vport-patch.c
index d55a1bf2d..62fd71f79 100644
--- a/datapath/vport-patch.c
+++ b/datapath/vport-patch.c
@@ -136,7 +136,10 @@ static struct vport *patch_create(const char *name, const void __user *config)
}
vport_gen_rand_ether_addr(patch_vport->devconf->eth_addr);
- patch_vport->devconf->mtu = ETH_DATA_LEN;
+
+ /* Make the default MTU fairly large so that it doesn't become the
+ * bottleneck on systems using jumbo frames. */
+ patch_vport->devconf->mtu = 65535;
return vport;
diff --git a/datapath/vport.c b/datapath/vport.c
index 4f3b466c7..bbc711e1e 100644
--- a/datapath/vport.c
+++ b/datapath/vport.c
@@ -1217,6 +1217,9 @@ void vport_receive(struct vport *vport, struct sk_buff *skb)
local_bh_enable();
}
+ if (!(vport->ops->flags & VPORT_F_FLOW))
+ OVS_CB(skb)->flow = NULL;
+
if (!(vport->ops->flags & VPORT_F_TUN_ID))
OVS_CB(skb)->tun_id = 0;
diff --git a/datapath/vport.h b/datapath/vport.h
index 4bfbcc825..5baea6d41 100644
--- a/datapath/vport.h
+++ b/datapath/vport.h
@@ -112,7 +112,8 @@ struct vport {
#define VPORT_F_REQUIRED (1 << 0) /* If init fails, module loading fails. */
#define VPORT_F_GEN_STATS (1 << 1) /* Track stats at the generic layer. */
-#define VPORT_F_TUN_ID (1 << 2) /* Sets OVS_CB(skb)->tun_id. */
+#define VPORT_F_FLOW (1 << 2) /* Sets OVS_CB(skb)->flow. */
+#define VPORT_F_TUN_ID (1 << 3) /* Sets OVS_CB(skb)->tun_id. */
/**
* struct vport_ops - definition of a type of virtual port
diff --git a/debian/.gitignore b/debian/.gitignore
index 3beef4405..24e62d94b 100644
--- a/debian/.gitignore
+++ b/debian/.gitignore
@@ -3,6 +3,7 @@
*.substvars
/control
/corekeeper
+/copyright
/files
/nicira-switch
/openvswitch
@@ -11,6 +12,7 @@
/openvswitch-controller
/openvswitch-datapath-source
/openvswitch-dbg
+/openvswitch-ipsec
/openvswitch-pki
/openvswitch-pki-server
/openvswitch-switch
diff --git a/debian/automake.mk b/debian/automake.mk
index 5a23d4632..7c73831a2 100644
--- a/debian/automake.mk
+++ b/debian/automake.mk
@@ -4,6 +4,7 @@ EXTRA_DIST += \
debian/control \
debian/control.modules.in \
debian/copyright \
+ debian/copyright.in \
debian/corekeeper.cron.daily \
debian/corekeeper.init \
debian/corekeeper.override \
@@ -23,6 +24,9 @@ EXTRA_DIST += \
debian/openvswitch-datapath-source.copyright \
debian/openvswitch-datapath-source.dirs \
debian/openvswitch-datapath-source.install \
+ debian/openvswitch-ipsec.dirs \
+ debian/openvswitch-ipsec.init \
+ debian/openvswitch-ipsec.install \
debian/openvswitch-pki-server.apache2 \
debian/openvswitch-pki-server.dirs \
debian/openvswitch-pki-server.install \
@@ -37,6 +41,11 @@ EXTRA_DIST += \
debian/openvswitch-switch.postinst \
debian/openvswitch-switch.postrm \
debian/openvswitch-switch.template \
+ debian/ovs-bugtool \
+ debian/ovs-bugtool.8 \
+ debian/ovs-monitor-ipsec \
+ debian/python-openvswitch.dirs \
+ debian/python-openvswitch.install \
debian/rules \
debian/rules.modules
@@ -50,3 +59,12 @@ check-debian-changelog-version:
fi
ALL_LOCAL += check-debian-changelog-version
DIST_HOOKS += check-debian-changelog-version
+
+$(srcdir)/debian/copyright: AUTHORS debian/copyright.in
+ { sed -n -e '/%AUTHORS%/q' -e p < $(srcdir)/debian/copyright.in; \
+ sed '1,/^$$/d' $(srcdir)/AUTHORS | \
+ sed -n -e '/^$$/q' -e 's/^/ /p'; \
+ sed -e '1,/%AUTHORS%/d' $(srcdir)/debian/copyright.in; \
+ } > $@
+
+DISTCLEANFILES += debian/copyright
diff --git a/debian/changelog b/debian/changelog
index 216e089e1..6a8ab5879 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,18 @@
+openvswitch (1.1.0pre2) unstable; urgency=low
+
+ * Bug fixes
+
+ -- Open vSwitch team <dev@openvswitch.org> Mon, 13 Sep 2010 21:50:00 +0000
+
+openvswitch (1.1.0pre1) unstable; urgency=low
+
+ * OpenFlow 1.0 slicing (QoS) functionality
+ * Python bindings for configuration database (no write support)
+ * Performance and scalability improvements
+ * Bug fixes
+
+ -- Open vSwitch team <dev@openvswitch.org> Tue, 31 Aug 2010 23:20:00 +0000
+
openvswitch (1.0.1) unstable; urgency=low
* New upstream version.
diff --git a/debian/control b/debian/control
index c07eca08a..0efcf925b 100644
--- a/debian/control
+++ b/debian/control
@@ -4,8 +4,9 @@ Priority: extra
Maintainer: Open vSwitch developers <dev@openvswitch.org>
Uploaders: Ben Pfaff <pfaffben@debian.org>, Simon Horman <horms@debian.org>
Build-Depends:
- debhelper (>= 5), autoconf (>= 2.64), automake1.10, libssl-dev,
- pkg-config (>= 0.21), po-debconf, bzip2, openssl, python
+ debhelper (>= 5), autoconf (>= 2.64), automake (>= 1.10) | automake1.10,
+ libssl-dev, pkg-config (>= 0.21), po-debconf, bzip2, openssl, python,
+ python-support (>= 0.8.4)
Standards-Version: 3.9.1
Homepage: http://openvswitch.org/
@@ -23,7 +24,8 @@ Description: Source code for Open vSwitch datapath Linux module
Package: openvswitch-common
Architecture: any
-Depends: ${shlibs:Depends}, openssl, ${misc:Depends}
+Depends: ${shlibs:Depends}, openssl, ${misc:Depends}, python
+Suggests: ethtool
Description: Open vSwitch common components
openvswitch-common provides components required by both openvswitch-switch
and openvswitch-controller.
@@ -40,6 +42,19 @@ Description: Open vSwitch switch implementations
.
Open vSwitch is a full-featured software-based Ethernet switch.
+Package: openvswitch-ipsec
+Architecture: any
+Depends:
+ ${shlibs:Depends}, ${misc:Depends}, ${python:Depends}, ipsec-tools, racoon,
+ openvswitch-common (= ${binary:Version}),
+ openvswitch-switch (= ${binary:Version}),
+ python-openvswitch (= ${binary:Version})
+Description: Open vSwitch GRE-over-IPsec support
+ The ovs-monitor-ipsec script provides support for encrypting GRE
+ tunnels with IPsec.
+ .
+ Open vSwitch is a full-featured software-based Ethernet switch.
+
Package: openvswitch-pki
Architecture: all
Depends:
@@ -89,8 +104,17 @@ Depends:
${shlibs:Depends}, ${misc:Depends},
openvswitch-common (= ${binary:Version}),
openvswitch-controller (= ${binary:Version}),
+ openvswitch-ipsec (= ${binary:Version}),
openvswitch-switch (= ${binary:Version})
Description: Debug symbols for Open vSwitch packages
This package contains the debug symbols for all the other openvswitch-*
packages. Install it to debug one of them or to examine a core dump
produced by one of them.
+
+Package: python-openvswitch
+Architecture: all
+Section: python
+Depends: ${python:Depends}, openvswitch-switch (= ${binary:Version})
+Provides: ${python:Provides}
+Description: Python bindings for Open vSwitch
+ This package contains the full Python bindings for Open vSwitch database.
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index 56c4f2bbe..000000000
--- a/debian/copyright
+++ /dev/null
@@ -1,67 +0,0 @@
-The original sources for this package can be found at:
-
- http://openvswitch.org/
-
-
-Upstream Authors (from AUTHORS):
-
- Andy Southgate andy.southgate@citrix.com
- Ben Pfaff blp@nicira.com
- Bryan Phillippe bp@toroki.com
- Dan Wendlandt dan@nicira.com
- David Erickson derickso@stanford.edu
- Glen Gibb grg@stanford.edu
- Ian Campbell Ian.Campbell@citrix.com
- Jean Tourrilhes jt@hpl.hp.com
- Jeremy Stribling strib@nicira.com
- Jesse Gross jesse@nicira.com
- Jun Nakajima jun.nakajima@intel.com
- Justin Pettit jpettit@nicira.com
- Keith Amidon keith@nicira.com
- Martin Casado casado@nicira.com
- Natasha Gude natasha@nicira.com
- Neil McKee neil.mckee@inmon.com
- Paul Fazzone pfazzone@nicira.com
- Reid Price reid@nicira.com
- Simon Horman horms@verge.net.au
- Tetsuo NAKAGAWA nakagawa@mxc.nes.nec.co.jp
- Thomas Lacroix thomas.lacroix@citrix.com
- Todd Deshane deshantm@gmail.com
- Tom Everman teverman@google.com
- Tsvi Slonim tsvi@toroki.com
- Wei Yongjun yjwei@cn.fujitsu.com
- Yu Zhiguo yuzg@cn.fujitsu.com
-
-Upstream Copyright Holders:
-
- Copyright (c) 2007, 2008, 2009, 2010 Nicira Networks.
- Copyright (c) 2010 Jean Tourrilhes - HP-Labs.
- Copyright (c) 2008,2009,2010 Citrix Systems, Inc.
- and authors listed above.
-
-License:
-
-* The following components are licensed under the GNU General Public Licence
- version 2.
-
- datapath/
-
- On Debian systems, the complete text of the GNU General Public Licence
- version 2 can be found in `/usr/share/common-licenses/GPL-2'
-
-* The following components are dual-licensed under the
- GNU General Public Licence version 3 and the Apache Licence Version 2.0.
-
- include/openvswitch/
-
- On Debian systems, the complete text of the GNU General Public Licence
- version 2 can be found in `/usr/share/common-licenses/GPL-2'.
- On Debian systems, the complete text of the Apache License version 2.0
- can be found in '/usr/share/common-licenses/Apache-2.0'.
-
-* All other components of this package are licensed under
- The Apache License Version 2.0.
-
- On Debian systems, the complete text of the Apache License version 2.0
- can be found in '/usr/share/common-licenses/Apache-2.0'.
-
diff --git a/debian/copyright.in b/debian/copyright.in
new file mode 100644
index 000000000..f131ea92d
--- /dev/null
+++ b/debian/copyright.in
@@ -0,0 +1,146 @@
+The original sources for this package can be found at:
+
+ http://openvswitch.org/
+
+Upstream Authors (from AUTHORS):
+
+%AUTHORS%
+
+Upstream Copyright Holders:
+
+ Copyright (c) 2007, 2008, 2009, 2010 Nicira Networks.
+ Copyright (c) 2010 Jean Tourrilhes - HP-Labs.
+ Copyright (c) 2008,2009,2010 Citrix Systems, Inc.
+ and authors listed above.
+
+License:
+
+* The following components are licensed under the
+ GNU Lesser General Public Licence version 2.1 only
+ with the exception clause below as a pre-amble.
+
+ xenserver/etc_xensource_scripts_vif
+ xenserver/opt_xensource_libexec_InterfaceReconfigure.py
+ xenserver/opt_xensource_libexec_InterfaceReconfigureBridge.py
+ xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py
+ xenserver/opt_xensource_libexec_interface-reconfigure
+ xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py
+ xenserver/usr_sbin_xen-bugtool
+
+ * These components are only distributed in the source package.
+ They do not appear in any binary packages.
+
+ On Debian systems, the complete text of the
+ GNU Lesser General Public Licence version 2.1 can be found in
+ `/usr/share/common-licenses/LGPL-2.1'
+
+ The exception clause pre-amble reads:
+
+ As a special exception to the GNU Lesser General Public License, you
+ may link, statically or dynamically, a "work that uses the Library"
+ with a publicly distributed version of the Library to produce an
+ executable file containing portions of the Library, and distribute
+ that executable file under terms of your choice, without any of the
+ additional requirements listed in clause 6 of the GNU Lesser General
+ Public License. By "a publicly distributed version of the Library",
+ we mean either the unmodified Library as distributed, or a
+ modified version of the Library that is distributed under the
+ conditions defined in clause 3 of the GNU Library General Public
+ License. This exception does not however invalidate any other reasons
+ why the executable file might be covered by the GNU Lesser General
+ Public License.
+
+* The following components are licensed under the
+ GNU Lesser General Public Licence version 2.1.
+
+ debian/ovs-bugtool
+ xenserver/usr_sbin_xen-bugtool [*]
+
+ * This component is only distributed in the source package.
+ It does not appear in any binary packages.
+
+ On Debian systems, the complete text of the
+ GNU Lesser General Public Licence version 2.1 can be found in
+ `/usr/share/common-licenses/LGPL-2.1'
+
+* The following component is licensed under the GNU General Public Licence
+ version 2.
+
+ datapath/
+
+ On Debian systems, the complete text of the GNU General Public Licence
+ version 2 can be found in `/usr/share/common-licenses/GPL-2'
+
+* The following components are dual-licensed under the
+ GNU General Public Licence version 3 and the Apache Licence Version 2.0.
+
+ include/openvswitch/
+
+ On Debian systems, the complete text of the GNU General Public Licence
+ version 2 can be found in `/usr/share/common-licenses/GPL-2'.
+ On Debian systems, the complete text of the Apache License version 2.0
+ can be found in '/usr/share/common-licenses/Apache-2.0'.
+
+* The following component is licenced under the
+ Python Software Foundation License Version 2.
+
+ xenserver/uuid.py
+
+ * This component is only distributed in the source package.
+ It does not appear in any binary packages.
+
+ PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
+ --------------------------------------------
+
+ 1. This LICENSE AGREEMENT is between the Python Software Foundation
+ ("PSF"), and the Individual or Organization ("Licensee") accessing and
+ otherwise using this software ("Python") in source or binary form and
+ its associated documentation.
+
+ 2. Subject to the terms and conditions of this License Agreement, PSF
+ hereby grants Licensee a nonexclusive, royalty-free, world-wide
+ license to reproduce, analyze, test, perform and/or display publicly,
+ prepare derivative works, distribute, and otherwise use Python
+ alone or in any derivative version, provided, however, that PSF's
+ License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
+ 2001, 2002, 2003, 2004, 2005, 2006, 2007 Python Software Foundation;
+ All Rights Reserved" are retained in Python alone or in any derivative
+ version prepared by Licensee.
+
+ 3. In the event Licensee prepares a derivative work that is based on
+ or incorporates Python or any part thereof, and wants to make
+ the derivative work available to others as provided herein, then
+ Licensee hereby agrees to include in any such work a brief summary of
+ the changes made to Python.
+
+ 4. PSF is making Python available to Licensee on an "AS IS"
+ basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+ IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
+ DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+ FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
+ INFRINGE ANY THIRD PARTY RIGHTS.
+
+ 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+ FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+ A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
+ OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+
+ 6. This License Agreement will automatically terminate upon a material
+ breach of its terms and conditions.
+
+ 7. Nothing in this License Agreement shall be deemed to create any
+ relationship of agency, partnership, or joint venture between PSF and
+ Licensee. This License Agreement does not grant permission to use PSF
+ trademarks or trade name in a trademark sense to endorse or promote
+ products or services of Licensee, or any third party.
+
+ 8. By copying, installing or otherwise using Python, Licensee
+ agrees to be bound by the terms and conditions of this License
+ Agreement.
+
+* All other components of this package are licensed under
+ The Apache License Version 2.0.
+
+ On Debian systems, the complete text of the Apache License version 2.0
+ can be found in '/usr/share/common-licenses/Apache-2.0'.
+
diff --git a/debian/openvswitch-common.install b/debian/openvswitch-common.install
index fab991666..1733612ae 100644
--- a/debian/openvswitch-common.install
+++ b/debian/openvswitch-common.install
@@ -4,4 +4,6 @@ _debian/utilities/ovs-appctl usr/sbin
_debian/utilities/ovs-ofctl usr/sbin
_debian/utilities/ovs-parse-leaks usr/bin
_debian/utilities/ovs-pki usr/sbin
+debian/ovs-bugtool usr/sbin
+debian/ovs-bugtool.8 usr/share/man/man8
vswitchd/vswitch.ovsschema usr/share/openvswitch
diff --git a/debian/openvswitch-controller.init b/debian/openvswitch-controller.init
index d489869ed..4781f83f2 100755
--- a/debian/openvswitch-controller.init
+++ b/debian/openvswitch-controller.init
@@ -175,10 +175,10 @@ force_stop() {
if running ; then
kill -15 $pid
# Is it really dead?
- sleep "$DIETIME"s
+ sleep "$DODTIME"
if running ; then
kill -9 $pid
- sleep "$DIETIME"s
+ sleep "$DODTIME"
if running ; then
echo "Cannot kill $NAME (pid=$pid)!"
exit 1
@@ -237,7 +237,7 @@ case "$1" in
log_daemon_msg "Restarting $DESC" "$NAME"
stop_server
# Wait some sensible amount, some server need this
- [ -n "$DIETIME" ] && sleep $DIETIME
+ [ -n "$DODTIME" ] && sleep $DODTIME
start_server
running
log_end_msg $?
diff --git a/debian/openvswitch-datapath-source.README.Debian b/debian/openvswitch-datapath-source.README.Debian
index 73bba7a14..a9132c5f3 100644
--- a/debian/openvswitch-datapath-source.README.Debian
+++ b/debian/openvswitch-datapath-source.README.Debian
@@ -5,9 +5,9 @@ Open vSwitch for Debian
- Building with module-assistant:
- $ module-assistant auto-install openvswitch
+ $ module-assistant auto-install openvswitch-datapath
or
- $ m-a a-i openvswitch
+ $ m-a a-i openvswitch-datapath
If kernel source or headers are in a non-standard directory, add
the option -k /path/to/kernel/source with the correct path.
diff --git a/debian/openvswitch-ipsec.dirs b/debian/openvswitch-ipsec.dirs
new file mode 100644
index 000000000..02130d0e9
--- /dev/null
+++ b/debian/openvswitch-ipsec.dirs
@@ -0,0 +1 @@
+usr/share/openvswitch/scripts
diff --git a/debian/openvswitch-ipsec.init b/debian/openvswitch-ipsec.init
new file mode 100755
index 000000000..ba82f5139
--- /dev/null
+++ b/debian/openvswitch-ipsec.init
@@ -0,0 +1,188 @@
+#!/bin/sh
+#
+# Copyright (c) 2007, 2009 Javier Fernandez-Sanguino <jfs@debian.org>
+#
+# This is free software; you may redistribute it and/or modify
+# it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2,
+# or (at your option) any later version.
+#
+# This is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License with
+# the Debian operating system, in /usr/share/common-licenses/GPL; if
+# not, write to the Free Software Foundation, Inc., 59 Temple Place,
+# Suite 330, Boston, MA 02111-1307 USA
+#
+### BEGIN INIT INFO
+# Provides: openvswitch-ipsec
+# Required-Start: $network $local_fs $remote_fs
+# Required-Stop: $remote_fs
+# Default-Start: 2 3 4 5
+# Default-Stop: 0 1 6
+# Short-Description: Open vSwitch GRE-over-IPsec daemon
+### END INIT INFO
+
+PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
+
+DAEMON=/usr/share/openvswitch/scripts/ovs-monitor-ipsec # Daemon's location
+NAME=ovs-monitor-ipsec # Introduce the short server's name here
+LOGDIR=/var/log/openvswitch # Log directory to use
+
+PIDFILE=/var/run/openvswitch/$NAME.pid
+
+test -x $DAEMON || exit 0
+
+. /lib/lsb/init-functions
+
+DODTIME=10 # Time to wait for the server to die, in seconds
+ # If this value is set too low you might not
+ # let some servers to die gracefully and
+ # 'restart' will not work
+
+set -e
+
+running_pid() {
+# Check if a given process pid's cmdline matches a given name
+ pid=$1
+ name=$2
+ [ -z "$pid" ] && return 1
+ [ ! -d /proc/$pid ] && return 1
+ cmd=`cat /proc/$pid/cmdline | tr "\000" " "|cut -d " " -f 2`
+ # Is this the expected server
+ [ "$cmd" != "$name" ] && return 1
+ return 0
+}
+
+running() {
+# Check if the process is running looking at /proc
+# (works for all users)
+
+ # No pidfile, probably no daemon present
+ [ ! -f "$PIDFILE" ] && return 1
+ pid=`cat $PIDFILE`
+ running_pid $pid $DAEMON || return 1
+ return 0
+}
+
+start_server() {
+ if [ ! -d /var/run/openvswitch ]; then
+ install -d -m 755 -o root -g root /var/run/openvswitch
+ fi
+
+ PYTHONPATH=/usr/share/openvswitch/python \
+ /usr/share/openvswitch/scripts/ovs-monitor-ipsec \
+ --pidfile-name=$PIDFILE --detach --monitor \
+ unix:/var/run/openvswitch/db.sock
+
+ return 0
+}
+
+stop_server() {
+ if [ -e $PIDFILE ]; then
+ kill `cat $PIDFILE`
+ fi
+
+ return 0
+}
+
+force_stop() {
+# Force the process to die killing it manually
+ [ ! -e "$PIDFILE" ] && return
+ if running ; then
+ kill -15 $pid
+ # Is it really dead?
+ sleep "$DODTIME"
+ if running ; then
+ kill -9 $pid
+ sleep "$DODTIME"
+ if running ; then
+ echo "Cannot kill $NAME (pid=$pid)!"
+ exit 1
+ fi
+ fi
+ fi
+ rm -f $PIDFILE
+}
+
+
+case "$1" in
+ start)
+ log_daemon_msg "Starting $NAME"
+ # Check if it's running first
+ if running ; then
+ log_progress_msg "apparently already running"
+ log_end_msg 0
+ exit 0
+ fi
+ if start_server && running ; then
+ # It's ok, the server started and is running
+ log_end_msg 0
+ else
+ # Either we could not start it or it is not running
+ # after we did
+ # NOTE: Some servers might die some time after they start,
+ # this code does not try to detect this and might give
+ # a false positive (use 'status' for that)
+ log_end_msg 1
+ fi
+ ;;
+ stop)
+ log_daemon_msg "Stopping $NAME"
+ if running ; then
+ # Only stop the server if we see it running
+ stop_server
+ log_end_msg $?
+ else
+ # If it's not running don't do anything
+ log_progress_msg "apparently not running"
+ log_end_msg 0
+ exit 0
+ fi
+ ;;
+ force-stop)
+ # First try to stop gracefully the program
+ $0 stop
+ if running; then
+ # If it's still running try to kill it more forcefully
+ log_daemon_msg "Stopping (force) $NAME"
+ force_stop
+ log_end_msg $?
+ fi
+ ;;
+ restart|force-reload)
+ log_daemon_msg "Restarting $NAME"
+ stop_server
+ # Wait some sensible amount, some server need this
+ [ -n "$DODTIME" ] && sleep $DODTIME
+ start_server
+ running
+ log_end_msg $?
+ ;;
+ status)
+ log_daemon_msg "Checking status of $NAME"
+ if running ; then
+ log_progress_msg "running"
+ log_end_msg 0
+ else
+ log_progress_msg "apparently not running"
+ log_end_msg 1
+ exit 1
+ fi
+ ;;
+ # Use this if the daemon cannot reload
+ reload)
+ log_warning_msg "Reloading $NAME daemon: not implemented, as the daemon"
+ log_warning_msg "cannot re-read the config file (use restart)."
+ ;;
+ *)
+ N=/etc/init.d/openvswitch-ipsec
+ echo "Usage: $N {start|stop|force-stop|restart|force-reload|status}" >&2
+ exit 1
+ ;;
+esac
+
+exit 0
diff --git a/debian/openvswitch-ipsec.install b/debian/openvswitch-ipsec.install
new file mode 100644
index 000000000..72cacfa25
--- /dev/null
+++ b/debian/openvswitch-ipsec.install
@@ -0,0 +1 @@
+debian/ovs-monitor-ipsec usr/share/openvswitch/scripts
diff --git a/debian/openvswitch-switch.init b/debian/openvswitch-switch.init
index a933a21ae..0907cdf7c 100755
--- a/debian/openvswitch-switch.init
+++ b/debian/openvswitch-switch.init
@@ -93,10 +93,10 @@ force_stop() {
[ ! -f "$pidfile" ] && return
if running $name; then
kill $pid
- [ -n "$DODTIME" ] && sleep "$DODTIME"s
+ [ -n "$DODTIME" ] && sleep "$DODTIME"
if running $name; then
kill -KILL $pid
- [ -n "$DODTIME" ] && sleep "$DODTIME"s
+ [ -n "$DODTIME" ] && sleep "$DODTIME"
if running $name; then
echo "Cannot kill $name (pid=$pid)!"
exit 1
@@ -157,7 +157,7 @@ load_module() {
echo "For instructions, read"
echo "/usr/share/doc/openvswitch-datapath-source/README.Debian"
fi
- exit 1
+ exit 0
fi
}
@@ -230,12 +230,16 @@ case "$1" in
install -d -m 755 -o root -g root /var/log/openvswitch
fi
+ if [ ! -d /var/log/openvswitch/cores ]; then
+ install -d -m 755 -o root -g root /var/log/openvswitch/cores
+ fi
+
# Start ovsdb-server.
set --
set -- "$@" /etc/openvswitch/conf.db
set -- "$@" --verbose=ANY:console:emer --verbose=ANY:syslog:err
set -- "$@" --log-file=/var/log/openvswitch/ovsdb-server.log
- set -- "$@" --detach --pidfile $monitor_opt
+ set -- "$@" --detach --no-chdir --pidfile $monitor_opt
set -- "$@" --remote punix:/var/run/openvswitch/db.sock
set -- "$@" --remote db:Open_vSwitch,managers
set -- "$@" --private-key=db:SSL,private_key
@@ -245,6 +249,7 @@ case "$1" in
echo -n "Starting ovsdb-server: "
start-stop-daemon --start --quiet \
--pidfile /var/run/openvswitch/ovsdb-server.pid \
+ --chdir /var/log/openvswitch/cores \
--exec $ovsdb_server -- "$@"
if running ovsdb-server; then
echo "ovsdb-server."
@@ -252,18 +257,19 @@ case "$1" in
echo " ERROR."
fi
- ovs-vsctl --no-wait init
+ ovs-vsctl --no-wait --timeout=5 init
# Start ovs-vswitchd.
set --
set -- "$@" --verbose=ANY:console:emer --verbose=ANY:syslog:err
set -- "$@" --log-file=/var/log/openvswitch/ovs-vswitchd.log
- set -- "$@" --detach --pidfile $monitor_opt
+ set -- "$@" --detach --no-chdir --pidfile $monitor_opt
set -- "$@" unix:/var/run/openvswitch/db.sock
set -- "$@" $OVS_VSWITCHD_OPTS
echo -n "Starting ovs-vswitchd: "
start-stop-daemon --start --quiet \
--pidfile /var/run/openvswitch/ovs-vswitchd.pid \
+ --chdir /var/log/openvswitch/cores \
--exec $ovs_vswitchd -- "$@"
if running ovs-vswitchd; then
echo "ovs-vswitchd."
@@ -275,12 +281,14 @@ case "$1" in
echo -n "Stopping ovs-vswitchd: "
start-stop-daemon --stop --quiet --oknodo --retry 5 \
--pidfile /var/run/openvswitch/ovs-vswitchd.pid \
+ --chdir /var/log/openvswitch/cores \
--exec $ovs_vswitchd
echo "ovs-vswitchd."
echo -n "Stopping ovsdb-server: "
start-stop-daemon --stop --quiet --oknodo --retry 5 \
--pidfile /var/run/openvswitch/ovsdb-server.pid \
+ --chdir /var/log/openvswitch/cores \
--exec $ovsdb_server
echo "ovsdb-server."
;;
diff --git a/debian/openvswitch-switch.postinst b/debian/openvswitch-switch.postinst
index 4be5a30c7..74b52ba90 100755
--- a/debian/openvswitch-switch.postinst
+++ b/debian/openvswitch-switch.postinst
@@ -33,17 +33,6 @@ case "$1" in
fi
done
fi
-
- if /etc/init.d/openvswitch-switch status >/dev/null 2>&1; then
- running=true
- /etc/init.d/openvswitch-switch stop
- else
- running=false
- fi
-
- if $running; then
- /etc/init.d/openvswitch-switch start
- fi
;;
abort-upgrade|abort-remove|abort-deconfigure)
diff --git a/debian/ovs-bugtool b/debian/ovs-bugtool
new file mode 100755
index 000000000..6f792eea7
--- /dev/null
+++ b/debian/ovs-bugtool
@@ -0,0 +1,1114 @@
+#!/usr/bin/env python
+
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# Copyright (c) 2005, 2007 XenSource Ltd.
+# Copyright (c) 2010, Nicira Networks.
+
+#
+# To add new entries to the bugtool, you need to:
+#
+# Create a new capability. These declare the new entry to the GUI, including
+# the expected size, time to collect, privacy implications, and whether the
+# capability should be selected by default. One capability may refer to
+# multiple files, assuming that they can be reasonably grouped together, and
+# have the same privacy implications. You need:
+#
+# A new CAP_ constant.
+# A cap() invocation to declare the capability.
+#
+# You then need to add calls to main() to collect the files. These will
+# typically be calls to the helpers file_output(), tree_output(), cmd_output(),
+# or func_output().
+#
+
+import getopt
+import re
+import os
+import StringIO
+import sys
+import tarfile
+import time
+import commands
+import pprint
+from xml.dom.minidom import parse, getDOMImplementation
+import zipfile
+from subprocess import Popen, PIPE
+from select import select
+from signal import SIGTERM, SIGUSR1
+import md5
+import platform
+import fcntl
+import glob
+import urllib
+import socket
+import base64
+
+sys.path.append('/usr/lib/python')
+sys.path.append('/usr/lib64/python')
+
+OS_RELEASE = platform.release()
+
+#
+# Files & directories
+#
+
+BUG_DIR = "/var/log/openvswitch"
+PLUGIN_DIR = "/etc/openvswitch/bugtool"
+GRUB_CONFIG = '/boot/grub/menu.lst'
+BOOT_KERNEL = '/boot/vmlinuz-' + OS_RELEASE
+BOOT_INITRD = '/boot/initrd-' + OS_RELEASE + '.img'
+PROC_PARTITIONS = '/proc/partitions'
+FSTAB = '/etc/fstab'
+PROC_MOUNTS = '/proc/mounts'
+PROC_CPUINFO = '/proc/cpuinfo'
+PROC_MEMINFO = '/proc/meminfo'
+PROC_IOPORTS = '/proc/ioports'
+PROC_INTERRUPTS = '/proc/interrupts'
+PROC_SCSI = '/proc/scsi/scsi'
+PROC_VERSION = '/proc/version'
+PROC_MODULES = '/proc/modules'
+PROC_DEVICES = '/proc/devices'
+PROC_FILESYSTEMS = '/proc/filesystems'
+PROC_CMDLINE = '/proc/cmdline'
+PROC_CONFIG = '/proc/config.gz'
+PROC_USB_DEV = '/proc/bus/usb/devices'
+PROC_NET_SOFTNET_STAT = '/proc/net/softnet_stat'
+MODPROBE_DIR = '/etc/modprobe.d'
+RESOLV_CONF = '/etc/resolv.conf'
+NSSWITCH_CONF = '/etc/nsswitch.conf'
+NTP_CONF = '/etc/ntp.conf'
+HOSTS = '/etc/hosts'
+HOSTS_ALLOW = '/etc/hosts.allow'
+HOSTS_DENY = '/etc/hosts.deny'
+DHCP_LEASE_DIR = '/var/lib/dhcp3'
+OPENVSWITCH_CORE_DIR = '/var/log/openvswitch/cores'
+OPENVSWITCH_DEFAULT_SWITCH = '/etc/default/openvswitch-switch'
+OPENVSWITCH_DEFAULT_CONTROLLER = '/etc/default/openvswitch-controller'
+OPENVSWITCH_CONF_DB = '/etc/openvswitch/conf.db'
+OPENVSWITCH_VSWITCHD_PID = '/var/run/openvswitch/ovs-vswitchd.pid'
+VAR_LOG_DIR = '/var/log/'
+X11_LOGS_DIR = VAR_LOG_DIR
+X11_LOGS_RE = re.compile(r'.*/Xorg\..*$')
+X11_AUTH_DIR = '/root/'
+X11_AUTH_RE = re.compile(r'.*/\.((Xauthority)|(serverauth\.[0-9]*))$')
+PAM_DIR = '/etc/pam.d'
+
+#
+# External programs
+#
+
+ARP = '/usr/sbin/arp'
+CAT = '/bin/cat'
+DF = '/bin/df'
+DMESG = '/bin/dmesg'
+DMIDECODE = '/usr/sbin/dmidecode'
+FDISK = '/sbin/fdisk'
+FIND = '/usr/bin/find'
+IFCONFIG = '/sbin/ifconfig'
+IPTABLES = '/sbin/iptables'
+LOSETUP = '/sbin/losetup'
+LS = '/bin/ls'
+LSPCI = '/usr/bin/lspci'
+MD5SUM = '/usr/bin/md5sum'
+MODINFO = '/sbin/modinfo'
+NETSTAT = '/bin/netstat'
+OVS_DPCTL = '/usr/sbin/ovs-dpctl'
+OVS_OFCTL = '/usr/sbin/ovs-ofctl'
+OVS_VSCTL = '/usr/sbin/ovs-vsctl'
+OVS_APPCTL = '/usr/sbin/ovs-appctl'
+PS = '/bin/ps'
+ROUTE = '/sbin/route'
+SYSCTL = '/sbin/sysctl'
+TC = '/sbin/tc'
+UPTIME = '/usr/bin/uptime'
+ZCAT = '/bin/zcat'
+
+ETHTOOL = '/sbin/ethtool'
+# ETHTOOL recently moved from /usr/sbin to /sbin in debian
+if not os.path.isfile(ETHTOOL):
+ ETHTOOL = '/usr/sbin/ethtool'
+
+#
+# PII -- Personally identifiable information. Of particular concern are
+# things that would identify customers, or their network topology.
+# Passwords are never to be included in any bug report, regardless of any PII
+# declaration.
+#
+# NO -- No PII will be in these entries.
+# YES -- PII will likely or certainly be in these entries.
+# MAYBE -- The user may wish to audit these entries for PII.
+# IF_CUSTOMIZED -- If the files are unmodified, then they will contain no PII,
+# but since we encourage customers to edit these files, PII may have been
+# introduced by the customer. This is used in particular for the networking
+# scripts in dom0.
+#
+
+PII_NO = 'no'
+PII_YES = 'yes'
+PII_MAYBE = 'maybe'
+PII_IF_CUSTOMIZED = 'if_customized'
+KEY = 0
+PII = 1
+MIN_SIZE = 2
+MAX_SIZE = 3
+MIN_TIME = 4
+MAX_TIME = 5
+MIME = 6
+CHECKED = 7
+HIDDEN = 8
+
+MIME_DATA = 'application/data'
+MIME_TEXT = 'text/plain'
+
+INVENTORY_XML_ROOT = "system-status-inventory"
+INVENTORY_XML_SUMMARY = 'system-summary'
+INVENTORY_XML_ELEMENT = 'inventory-entry'
+CAP_XML_ROOT = "system-status-capabilities"
+CAP_XML_ELEMENT = 'capability'
+
+
+CAP_BLOBS = 'blobs'
+CAP_BOOT_LOADER = 'boot-loader'
+CAP_DISK_INFO = 'disk-info'
+CAP_FIRSTBOOT = 'firstboot'
+CAP_HARDWARE_INFO = 'hardware-info'
+CAP_HIGH_AVAILABILITY = 'high-availability'
+CAP_HOST_CRASHDUMP_DUMPS = 'host-crashdump-dumps'
+CAP_HOST_CRASHDUMP_LOGS = 'host-crashdump-logs'
+CAP_KERNEL_INFO = 'kernel-info'
+CAP_LOSETUP_A = 'loopback-devices'
+CAP_NETWORK_CONFIG = 'network-config'
+CAP_NETWORK_STATUS = 'network-status'
+CAP_OEM = 'oem'
+CAP_PAM = 'pam'
+CAP_PROCESS_LIST = 'process-list'
+CAP_PERSISTENT_STATS = 'persistent-stats'
+CAP_SYSTEM_LOGS = 'system-logs'
+CAP_SYSTEM_SERVICES = 'system-services'
+CAP_VNCTERM = 'vncterm'
+CAP_WLB = 'wlb'
+CAP_X11_LOGS = 'X11'
+CAP_X11_AUTH = 'X11-auth'
+
+KB = 1024
+MB = 1024 * 1024
+
+caps = {}
+cap_sizes = {}
+unlimited_data = False
+dbg = False
+
+def cap(key, pii=PII_MAYBE, min_size=-1, max_size=-1, min_time=-1,
+ max_time=-1, mime=MIME_TEXT, checked=True, hidden=False):
+ caps[key] = (key, pii, min_size, max_size, min_time, max_time, mime,
+ checked, hidden)
+ cap_sizes[key] = 0
+
+
+cap(CAP_BLOBS, PII_NO, max_size=5*MB)
+cap(CAP_BOOT_LOADER, PII_NO, max_size=3*KB,
+ max_time=5)
+cap(CAP_DISK_INFO, PII_MAYBE, max_size=25*KB,
+ max_time=20)
+cap(CAP_FIRSTBOOT, PII_YES, min_size=60*KB, max_size=80*KB)
+cap(CAP_HARDWARE_INFO, PII_MAYBE, max_size=30*KB,
+ max_time=20)
+cap(CAP_HIGH_AVAILABILITY, PII_MAYBE, max_size=5*MB)
+cap(CAP_HOST_CRASHDUMP_DUMPS,PII_YES, checked = False)
+cap(CAP_HOST_CRASHDUMP_LOGS, PII_NO)
+cap(CAP_KERNEL_INFO, PII_MAYBE, max_size=120*KB,
+ max_time=5)
+cap(CAP_LOSETUP_A, PII_MAYBE, max_size=KB, max_time=5)
+cap(CAP_NETWORK_CONFIG, PII_IF_CUSTOMIZED,
+ min_size=0, max_size=20*KB)
+cap(CAP_NETWORK_STATUS, PII_YES, max_size=19*KB,
+ max_time=30)
+cap(CAP_PAM, PII_NO, max_size=30*KB)
+cap(CAP_PERSISTENT_STATS, PII_MAYBE, max_size=50*MB,
+ max_time=60)
+cap(CAP_PROCESS_LIST, PII_YES, max_size=30*KB,
+ max_time=20)
+cap(CAP_SYSTEM_LOGS, PII_MAYBE, max_size=50*MB,
+ max_time=5)
+cap(CAP_SYSTEM_SERVICES, PII_NO, max_size=5*KB,
+ max_time=20)
+cap(CAP_VNCTERM, PII_MAYBE, checked = False)
+cap(CAP_WLB, PII_NO, max_size=3*MB,
+ max_time=20)
+cap(CAP_X11_LOGS, PII_NO, max_size=100*KB)
+cap(CAP_X11_AUTH, PII_NO, max_size=100*KB)
+
+ANSWER_YES_TO_ALL = False
+SILENT_MODE = False
+entries = None
+data = {}
+dev_null = open('/dev/null', 'r+')
+
+def output(x):
+ global SILENT_MODE
+ if not SILENT_MODE:
+ print x
+
+def output_ts(x):
+ output("[%s] %s" % (time.strftime("%x %X %Z"), x))
+
+def cmd_output(cap, args, label = None, filter = None):
+ if cap in entries:
+ if not label:
+ if isinstance(args, list):
+ a = [aa for aa in args]
+ a[0] = os.path.basename(a[0])
+ label = ' '.join(a)
+ else:
+ label = args
+ data[label] = {'cap': cap, 'cmd_args': args, 'filter': filter}
+
+def file_output(cap, path_list):
+ if cap in entries:
+ for p in path_list:
+ if os.path.exists(p):
+ if unlimited_data or caps[cap][MAX_SIZE] == -1 or \
+ cap_sizes[cap] < caps[cap][MAX_SIZE]:
+ data[p] = {'cap': cap, 'filename': p}
+ try:
+ s = os.stat(p)
+ cap_sizes[cap] += s.st_size
+ except:
+ pass
+ else:
+ output("Omitting %s, size constraint of %s exceeded" % (p, cap))
+
+def tree_output(cap, path, pattern = None, negate = False):
+ if cap in entries:
+ if os.path.exists(path):
+ for f in os.listdir(path):
+ fn = os.path.join(path, f)
+ if os.path.isfile(fn) and matches(fn, pattern, negate):
+ file_output(cap, [fn])
+ elif os.path.isdir(fn):
+ tree_output(cap, fn, pattern, negate)
+
+def func_output(cap, label, func):
+ if cap in entries:
+ t = str(func).split()
+ data[label] = {'cap': cap, 'func': func}
+
+def collect_data():
+ process_lists = {}
+
+ for (k, v) in data.items():
+ cap = v['cap']
+ if v.has_key('cmd_args'):
+ v['output'] = StringIOmtime()
+ if not process_lists.has_key(cap):
+ process_lists[cap] = []
+ process_lists[cap].append(ProcOutput(v['cmd_args'], caps[cap][MAX_TIME], v['output'], v['filter']))
+ elif v.has_key('filename') and v['filename'].startswith('/proc/'):
+ # proc files must be read into memory
+ try:
+ f = open(v['filename'], 'r')
+ s = f.read()
+ f.close()
+ if unlimited_data or caps[cap][MAX_SIZE] == -1 or \
+ cap_sizes[cap] < caps[cap][MAX_SIZE]:
+ v['output'] = StringIOmtime(s)
+ cap_sizes[cap] += len(s)
+ else:
+ output("Omitting %s, size constraint of %s exceeded" % (v['filename'], cap))
+ except:
+ pass
+ elif v.has_key('func'):
+ try:
+ s = v['func'](cap)
+ except Exception, e:
+ s = str(e)
+ if unlimited_data or caps[cap][MAX_SIZE] == -1 or \
+ cap_sizes[cap] < caps[cap][MAX_SIZE]:
+ v['output'] = StringIOmtime(s)
+ cap_sizes[cap] += len(s)
+ else:
+ output("Omitting %s, size constraint of %s exceeded" % (k, cap))
+
+ run_procs(process_lists.values())
+
+
+def main(argv = None):
+ global ANSWER_YES_TO_ALL, SILENT_MODE
+ global entries, data, dbg
+
+ # we need access to privileged files, exit if we are not running as root
+ if os.getuid() != 0:
+ print >>sys.stderr, "Error: ovs-bugtool must be run as root"
+ return 1
+
+ output_type = 'tar.bz2'
+ output_fd = -1
+
+ if argv is None:
+ argv = sys.argv
+
+ try:
+ (options, params) = getopt.gnu_getopt(
+ argv, 'sy', ['capabilities', 'silent', 'yestoall', 'entries=',
+ 'output=', 'outfd=', 'all', 'unlimited', 'debug'])
+ except getopt.GetoptError, opterr:
+ print >>sys.stderr, opterr
+ return 2
+
+ try:
+ load_plugins(True)
+ except:
+ pass
+
+ entries = [e for e in caps.keys() if caps[e][CHECKED]]
+
+ for (k, v) in options:
+ if k == '--capabilities':
+ update_capabilities()
+ print_capabilities()
+ return 0
+
+ if k == '--output':
+ if v in ['tar', 'tar.bz2', 'zip']:
+ output_type = v
+ else:
+ print >>sys.stderr, "Invalid output format '%s'" % v
+ return 2
+
+ # "-s" or "--silent" means suppress output (except for the final
+ # output filename at the end)
+ if k in ['-s', '--silent']:
+ SILENT_MODE = True
+
+ if k == '--entries' and v != '':
+ entries = v.split(',')
+
+ # If the user runs the script with "-y" or "--yestoall" we don't ask
+ # all the really annoying questions.
+ if k in ['-y', '--yestoall']:
+ ANSWER_YES_TO_ALL = True
+
+ if k == '--outfd':
+ output_fd = int(v)
+ try:
+ old = fcntl.fcntl(output_fd, fcntl.F_GETFD)
+ fcntl.fcntl(output_fd, fcntl.F_SETFD, old | fcntl.FD_CLOEXEC)
+ except:
+ print >>sys.stderr, "Invalid output file descriptor", output_fd
+ return 2
+
+ elif k == '--all':
+ entries = caps.keys()
+ elif k == '--unlimited':
+ unlimited_data = True
+ elif k == '--debug':
+ dbg = True
+ ProcOutput.debug = True
+
+ if len(params) != 1:
+ print >>sys.stderr, "Invalid additional arguments", str(params)
+ return 2
+
+ if output_fd != -1 and output_type != 'tar':
+ print >>sys.stderr, "Option '--outfd' only valid with '--output=tar'"
+ return 2
+
+ if ANSWER_YES_TO_ALL:
+ output("Warning: '--yestoall' argument provided, will not prompt for individual files.")
+
+ output('''
+This application will collate dmesg output, details of the
+hardware configuration of your machine, information about the build of
+openvswitch that you are using, plus, if you allow it, various logs.
+
+The collated information will be saved as a .%s for archiving or
+sending to a Technical Support Representative.
+
+The logs may contain private information, and if you are at all
+worried about that, you should exit now, or you should explicitly
+exclude those logs from the archive.
+
+''' % output_type)
+
+ # assemble potential data
+
+ file_output(CAP_BOOT_LOADER, [GRUB_CONFIG])
+ cmd_output(CAP_BOOT_LOADER, [LS, '-lR', '/boot'])
+ cmd_output(CAP_BOOT_LOADER, [MD5SUM, BOOT_KERNEL, BOOT_INITRD], label='vmlinuz-initrd.md5sum')
+
+ cmd_output(CAP_DISK_INFO, [FDISK, '-l'])
+ file_output(CAP_DISK_INFO, [PROC_PARTITIONS, PROC_MOUNTS])
+ file_output(CAP_DISK_INFO, [FSTAB])
+ cmd_output(CAP_DISK_INFO, [DF, '-alT'])
+ cmd_output(CAP_DISK_INFO, [DF, '-alTi'])
+ cmd_output(CAP_DISK_INFO, [LS, '-R', '/sys/class/scsi_host'])
+ cmd_output(CAP_DISK_INFO, [LS, '-R', '/sys/class/scsi_disk'])
+ cmd_output(CAP_DISK_INFO, [LS, '-R', '/sys/class/fc_transport'])
+ func_output(CAP_DISK_INFO, 'scsi-hosts', dump_scsi_hosts)
+
+
+ file_output(CAP_HARDWARE_INFO, [PROC_CPUINFO, PROC_MEMINFO, PROC_IOPORTS, PROC_INTERRUPTS])
+ cmd_output(CAP_HARDWARE_INFO, [DMIDECODE])
+ cmd_output(CAP_HARDWARE_INFO, [LSPCI, '-n'])
+ cmd_output(CAP_HARDWARE_INFO, [LSPCI, '-vv'])
+ file_output(CAP_HARDWARE_INFO, [PROC_USB_DEV, PROC_SCSI])
+ cmd_output(CAP_HARDWARE_INFO, [LS, '-lR', '/dev'])
+
+ file_output(CAP_KERNEL_INFO, [PROC_VERSION, PROC_MODULES, PROC_DEVICES,
+ PROC_FILESYSTEMS, PROC_CMDLINE])
+ cmd_output(CAP_KERNEL_INFO, [ZCAT, PROC_CONFIG], label='config')
+ cmd_output(CAP_KERNEL_INFO, [SYSCTL, '-A'])
+ tree_output(CAP_KERNEL_INFO, MODPROBE_DIR)
+ func_output(CAP_KERNEL_INFO, 'modinfo', module_info)
+
+ cmd_output(CAP_LOSETUP_A, [LOSETUP, '-a'])
+
+ file_output(CAP_NETWORK_CONFIG, [RESOLV_CONF, NSSWITCH_CONF, HOSTS])
+ file_output(CAP_NETWORK_CONFIG, [NTP_CONF, HOSTS_ALLOW, HOSTS_DENY])
+ file_output(CAP_NETWORK_CONFIG, [OPENVSWITCH_DEFAULT_SWITCH,
+ OPENVSWITCH_DEFAULT_CONTROLLER, OPENVSWITCH_CONF_DB])
+
+ cmd_output(CAP_NETWORK_STATUS, [IFCONFIG, '-a'])
+ cmd_output(CAP_NETWORK_STATUS, [ROUTE, '-n'])
+ cmd_output(CAP_NETWORK_STATUS, [ARP, '-n'])
+ cmd_output(CAP_NETWORK_STATUS, [NETSTAT, '-an'])
+ tree_output(CAP_NETWORK_STATUS, DHCP_LEASE_DIR)
+ cmd_output(CAP_NETWORK_STATUS, [IPTABLES, '-nL'])
+ for p in os.listdir('/sys/class/net/'):
+ try:
+ f = open('/sys/class/net/%s/type' % p, 'r')
+ t = f.readline()
+ f.close()
+ if int(t) == 1:
+ # ARPHRD_ETHER
+ cmd_output(CAP_NETWORK_STATUS, [ETHTOOL, p])
+ cmd_output(CAP_NETWORK_STATUS, [ETHTOOL, '-S', p])
+ cmd_output(CAP_NETWORK_STATUS, [ETHTOOL, '-k', p])
+ cmd_output(CAP_NETWORK_STATUS, [ETHTOOL, '-i', p])
+ cmd_output(CAP_NETWORK_STATUS, [ETHTOOL, '-c', p])
+ except:
+ pass
+ cmd_output(CAP_NETWORK_STATUS, [TC, '-s', 'qdisc'])
+ file_output(CAP_NETWORK_STATUS, [PROC_NET_SOFTNET_STAT])
+ tree_output(CAP_NETWORK_STATUS, OPENVSWITCH_CORE_DIR)
+ if os.path.exists(OPENVSWITCH_VSWITCHD_PID):
+ cmd_output(CAP_NETWORK_STATUS, [OVS_DPCTL, 'show'])
+ for d in dp_list():
+ cmd_output(CAP_NETWORK_STATUS, [OVS_OFCTL, 'show', d])
+ cmd_output(CAP_NETWORK_STATUS, [OVS_OFCTL, 'status', d])
+ cmd_output(CAP_NETWORK_STATUS, [OVS_OFCTL, 'dump-flows', d])
+ cmd_output(CAP_NETWORK_STATUS, [OVS_DPCTL, 'dump-flows', d])
+ try:
+ vspidfile = open(OPENVSWITCH_VSWITCHD_PID)
+ vspid = int(vspidfile.readline().strip())
+ vspidfile.close()
+ for b in bond_list(vspid):
+ cmd_output(CAP_NETWORK_STATUS,
+ [OVS_APPCTL, '-t', '/var/run/ovs-vswitchd.%s.ctl' % vspid, '-e' 'bond/show %s' % b],
+ 'ovs-appctl-bond-show-%s.out' % b)
+ except e:
+ pass
+
+ tree_output(CAP_PAM, PAM_DIR)
+
+ cmd_output(CAP_PROCESS_LIST, [PS, 'wwwaxf', '-eo', 'pid,tty,stat,time,nice,psr,pcpu,pmem,nwchan,wchan:25,args'], label='process-tree')
+ func_output(CAP_PROCESS_LIST, 'fd_usage', fd_usage)
+
+ file_output(CAP_SYSTEM_LOGS,
+ [ VAR_LOG_DIR + x for x in
+ [ 'kern.log', 'daemon.log', 'user.log', 'syslog', 'messages',
+ 'debug', 'dmesg', 'boot'] +
+ [ f % n for n in range(1, 20) \
+ for f in ['kern.log.%d', 'kern.log.%d.gz',
+ 'daemon.log.%d', 'daemon.log.%d.gz',
+ 'user.log.%d', 'user.log.%d.gz',
+ 'messages.%d', 'messages.%d.gz']]])
+ if not os.path.exists('/var/log/dmesg') and not os.path.exists('/var/log/boot'):
+ cmd_output(CAP_SYSTEM_LOGS, [DMESG])
+
+
+ tree_output(CAP_X11_LOGS, X11_LOGS_DIR, X11_LOGS_RE)
+ tree_output(CAP_X11_AUTH, X11_AUTH_DIR, X11_AUTH_RE)
+
+
+ try:
+ load_plugins()
+ except:
+ pass
+
+ # permit the user to filter out data
+ for k in sorted(data.keys()):
+ if not ANSWER_YES_TO_ALL and not yes("Include '%s'? [Y/n]: " % k):
+ del data[k]
+
+ # collect selected data now
+ output_ts('Running commands to collect data')
+ collect_data()
+
+ subdir = "bug-report-%s" % time.strftime("%Y%m%d%H%M%S")
+
+ # include inventory
+ data['inventory.xml'] = {'cap': None, 'output': StringIOmtime(make_inventory(data, subdir))}
+
+ # create archive
+ if output_fd == -1 and not os.path.exists(BUG_DIR):
+ try:
+ os.makedirs(BUG_DIR)
+ except:
+ pass
+
+ if output_fd == -1:
+ output_ts('Creating output file')
+
+ if output_type.startswith('tar'):
+ make_tar(subdir, output_type, output_fd)
+ else:
+ make_zip(subdir)
+
+ clean_tapdisk_logs()
+
+ if dbg:
+ print >>sys.stderr, "Category sizes (max, actual):\n"
+ for c in caps.keys():
+ print >>sys.stderr, " %s (%d, %d)" % (c, caps[c][MAX_SIZE],
+ cap_sizes[c])
+ return 0
+
+def find_tapdisk_logs():
+ return glob.glob('/var/log/blktap/*.log*')
+
+def generate_tapdisk_logs():
+ for pid in pidof('tapdisk'):
+ try:
+ os.kill(pid, SIGUSR1)
+ output_ts("Including logs for tapdisk process %d" % pid)
+ except :
+ pass
+ # give processes a second to write their logs
+ time.sleep(1)
+
+def clean_tapdisk_logs():
+ for filename in find_tapdisk_logs():
+ try:
+ os.remove(filename)
+ except :
+ pass
+
+def filter_db_pii(str, state):
+ if 'in_secret_table' not in state:
+ state['in_secret_table'] = False
+
+ if str.startswith('<table ') and 'name="secret"' in str:
+ state['in_secret_table'] = True
+ elif str.startswith('</table>'):
+ state['in_secret_table'] = False
+
+ if state['in_secret_table'] and str.startswith("<row"): # match only on DB rows
+ str = re.sub(r'(value=")[^"]+(")', r'\1REMOVED\2', str)
+ return str
+
+def dump_scsi_hosts(cap):
+ output = ''
+ l = os.listdir('/sys/class/scsi_host')
+ l.sort()
+
+ for h in l:
+ procname = ''
+ try:
+ f = open('/sys/class/scsi_host/%s/proc_name' % h)
+ procname = f.readline().strip("\n")
+ f.close()
+ except:
+ pass
+ modelname = None
+ try:
+ f = open('/sys/class/scsi_host/%s/model_name' % h)
+ modelname = f.readline().strip("\n")
+ f.close()
+ except:
+ pass
+
+ output += "%s:\n" %h
+ output += " %s%s\n" % (procname, modelname and (" -> %s" % modelname) or '')
+
+ return output
+
+def module_info(cap):
+ output = StringIO.StringIO()
+ modules = open(PROC_MODULES, 'r')
+ procs = []
+
+ for line in modules:
+ module = line.split()[0]
+ procs.append(ProcOutput([MODINFO, module], caps[cap][MAX_TIME], output))
+ modules.close()
+
+ run_procs([procs])
+
+ return output.getvalue()
+
+def dp_list():
+ output = StringIO.StringIO()
+ procs = [ProcOutput([OVS_DPCTL, 'dump-dps'], caps[CAP_NETWORK_STATUS][MAX_TIME], output)]
+
+ run_procs([procs])
+
+ if not procs[0].timed_out:
+ return output.getvalue().splitlines()
+ return []
+
+def bond_list(pid):
+ output = StringIO.StringIO()
+ procs = [ProcOutput([OVS_APPCTL, '-t', '/var/run/ovs-vswitchd.%s.ctl' % pid, '-e' 'bond/list'], caps[CAP_NETWORK_STATUS][MAX_TIME], output)]
+
+ run_procs([procs])
+
+ if not procs[0].timed_out:
+ bonds = output.getvalue().splitlines()[1:]
+ return [x.split('\t')[1] for x in bonds]
+ return []
+
+def fd_usage(cap):
+ output = ''
+ fd_dict = {}
+ for d in [p for p in os.listdir('/proc') if p.isdigit()]:
+ try:
+ fh = open('/proc/'+d+'/cmdline')
+ name = fh.readline()
+ num_fds = len(os.listdir(os.path.join('/proc/'+d+'/fd')))
+ if num_fds > 0:
+ if not num_fds in fd_dict:
+ fd_dict[num_fds] = []
+ fd_dict[num_fds].append(name.replace('\0', ' ').strip())
+ finally:
+ fh.close()
+ keys = fd_dict.keys()
+ keys.sort(lambda a, b: int(b) - int(a))
+ for k in keys:
+ output += "%s: %s\n" % (k, str(fd_dict[k]))
+ return output
+
+def load_plugins(just_capabilities = False):
+ def getText(nodelist):
+ rc = ""
+ for node in nodelist:
+ if node.nodeType == node.TEXT_NODE:
+ rc += node.data
+ return rc.encode()
+
+ def getBoolAttr(el, attr, default = False):
+ ret = default
+ val = el.getAttribute(attr).lower()
+ if val in ['true', 'false', 'yes', 'no']:
+ ret = val in ['true', 'yes']
+ return ret
+
+ for dir in [d for d in os.listdir(PLUGIN_DIR) if os.path.isdir(os.path.join(PLUGIN_DIR, d))]:
+ if not caps.has_key(dir):
+ if not os.path.exists("%s/%s.xml" % (PLUGIN_DIR, dir)):
+ continue
+ xmldoc = parse("%s/%s.xml" % (PLUGIN_DIR, dir))
+ assert xmldoc.documentElement.tagName == "capability"
+
+ pii, min_size, max_size, min_time, max_time, mime = \
+ PII_MAYBE, -1,-1,-1,-1, MIME_TEXT
+
+ if xmldoc.documentElement.getAttribute("pii") in [PII_NO, PII_YES, PII_MAYBE, PII_IF_CUSTOMIZED]:
+ pii = xmldoc.documentElement.getAttribute("pii")
+ if xmldoc.documentElement.getAttribute("min_size") != '':
+ min_size = long(xmldoc.documentElement.getAttribute("min_size"))
+ if xmldoc.documentElement.getAttribute("max_size") != '':
+ max_size = long(xmldoc.documentElement.getAttribute("max_size"))
+ if xmldoc.documentElement.getAttribute("min_time") != '':
+ min_time = int(xmldoc.documentElement.getAttribute("min_time"))
+ if xmldoc.documentElement.getAttribute("max_time") != '':
+ max_time = int(xmldoc.documentElement.getAttribute("max_time"))
+ if xmldoc.documentElement.getAttribute("mime") in [MIME_DATA, MIME_TEXT]:
+ mime = xmldoc.documentElement.getAttribute("mime")
+ checked = getBoolAttr(xmldoc.documentElement, 'checked', True)
+ hidden = getBoolAttr(xmldoc.documentElement, 'hidden', False)
+
+ cap(dir, pii, min_size, max_size, min_time, max_time, mime, checked, hidden)
+
+ if just_capabilities:
+ continue
+
+ plugdir = os.path.join(PLUGIN_DIR, dir)
+ for file in [f for f in os.listdir(plugdir) if f.endswith('.xml')]:
+ xmldoc = parse(os.path.join(plugdir, file))
+ assert xmldoc.documentElement.tagName == "collect"
+
+ for el in xmldoc.documentElement.getElementsByTagName("*"):
+ if el.tagName == "files":
+ file_output(dir, getText(el.childNodes).split())
+ elif el.tagName == "directory":
+ pattern = el.getAttribute("pattern")
+ if pattern == '': pattern = None
+ negate = getBoolAttr(el, 'negate')
+ tree_output(dir, getText(el.childNodes), pattern and re.compile(pattern) or None, negate)
+ elif el.tagName == "command":
+ label = el.getAttribute("label")
+ if label == '': label = None
+ cmd_output(dir, getText(el.childNodes), label)
+
+def make_tar(subdir, suffix, output_fd):
+ global SILENT_MODE, data
+
+ mode = 'w'
+ if suffix == 'tar.bz2':
+ mode = 'w:bz2'
+ filename = "%s/%s.%s" % (BUG_DIR, subdir, suffix)
+
+ if output_fd == -1:
+ tf = tarfile.open(filename, mode)
+ else:
+ tf = tarfile.open(None, 'w', os.fdopen(output_fd, 'a'))
+
+ try:
+ for (k, v) in data.items():
+ try:
+ tar_filename = os.path.join(subdir, construct_filename(k, v))
+ ti = tarfile.TarInfo(tar_filename)
+
+ ti.uname = 'root'
+ ti.gname = 'root'
+
+ if v.has_key('output'):
+ ti.mtime = v['output'].mtime
+ ti.size = len(v['output'].getvalue())
+ v['output'].seek(0)
+ tf.addfile(ti, v['output'])
+ elif v.has_key('filename'):
+ s = os.stat(v['filename'])
+ ti.mtime = s.st_mtime
+ ti.size = s.st_size
+ tf.addfile(ti, file(v['filename']))
+ except:
+ pass
+ finally:
+ tf.close()
+
+ if output_fd == -1:
+ output ('Writing tarball %s successful.' % filename)
+ if SILENT_MODE:
+ print filename
+
+
+def make_zip(subdir):
+ global SILENT_MODE, data
+
+ filename = "%s/%s.zip" % (BUG_DIR, subdir)
+ zf = zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED)
+
+ try:
+ for (k, v) in data.items():
+ try:
+ dest = os.path.join(subdir, construct_filename(k, v))
+
+ if v.has_key('output'):
+ zf.writestr(dest, v['output'].getvalue())
+ else:
+ if os.stat(v['filename']).st_size < 50:
+ compress_type = zipfile.ZIP_STORED
+ else:
+ compress_type = zipfile.ZIP_DEFLATED
+ zf.write(v['filename'], dest, compress_type)
+ except:
+ pass
+ finally:
+ zf.close()
+
+ output ('Writing archive %s successful.' % filename)
+ if SILENT_MODE:
+ print filename
+
+
+def make_inventory(inventory, subdir):
+ document = getDOMImplementation().createDocument(
+ None, INVENTORY_XML_ROOT, None)
+
+ # create summary entry
+ s = document.createElement(INVENTORY_XML_SUMMARY)
+ user = os.getenv('SUDO_USER', os.getenv('USER'))
+ if user:
+ s.setAttribute('user', user)
+ s.setAttribute('date', time.strftime('%c'))
+ s.setAttribute('hostname', platform.node())
+ s.setAttribute('uname', ' '.join(platform.uname()))
+ s.setAttribute('uptime', commands.getoutput(UPTIME))
+ document.getElementsByTagName(INVENTORY_XML_ROOT)[0].appendChild(s)
+
+ map(lambda (k, v): inventory_entry(document, subdir, k, v),
+ inventory.items())
+ return document.toprettyxml()
+
+def inventory_entry(document, subdir, k, v):
+ try:
+ el = document.createElement(INVENTORY_XML_ELEMENT)
+ el.setAttribute('capability', v['cap'])
+ el.setAttribute('filename', os.path.join(subdir, construct_filename(k, v)))
+ el.setAttribute('md5sum', md5sum(v))
+ document.getElementsByTagName(INVENTORY_XML_ROOT)[0].appendChild(el)
+ except:
+ pass
+
+
+def md5sum(d):
+ m = md5.new()
+ if d.has_key('filename'):
+ f = open(d['filename'])
+ data = f.read(1024)
+ while len(data) > 0:
+ m.update(data)
+ data = f.read(1024)
+ f.close()
+ elif d.has_key('output'):
+ m.update(d['output'].getvalue())
+ return m.hexdigest()
+
+
+def construct_filename(k, v):
+ if v.has_key('filename'):
+ if v['filename'][0] == '/':
+ return v['filename'][1:]
+ else:
+ return v['filename']
+ s = k.replace(' ', '-')
+ s = s.replace('--', '-')
+ s = s.replace('/', '%')
+ if s.find('.') == -1:
+ s += '.out'
+
+ return s
+
+def update_capabilities():
+ pass
+
+def update_cap_size(cap, size):
+ update_cap(cap, MIN_SIZE, size)
+ update_cap(cap, MAX_SIZE, size)
+ update_cap(cap, CHECKED, size > 0)
+
+
+def update_cap(cap, k, v):
+ global caps
+ l = list(caps[cap])
+ l[k] = v
+ caps[cap] = tuple(l)
+
+
+def size_of_dir(d, pattern = None, negate = False):
+ if os.path.isdir(d):
+ return size_of_all([os.path.join(d, fn) for fn in os.listdir(d)],
+ pattern, negate)
+ else:
+ return 0
+
+
+def size_of_all(files, pattern = None, negate = False):
+ return sum([size_of(f, pattern, negate) for f in files])
+
+
+def matches(f, pattern, negate):
+ if negate:
+ return not matches(f, pattern, False)
+ else:
+ return pattern is None or pattern.match(f)
+
+
+def size_of(f, pattern, negate):
+ if os.path.isfile(f) and matches(f, pattern, negate):
+ return os.stat(f)[6]
+ else:
+ return size_of_dir(f, pattern, negate)
+
+
+def print_capabilities():
+ document = getDOMImplementation().createDocument(
+ "ns", CAP_XML_ROOT, None)
+ map(lambda key: capability(document, key), [k for k in caps.keys() if not caps[k][HIDDEN]])
+ print document.toprettyxml()
+
+def capability(document, key):
+ c = caps[key]
+ el = document.createElement(CAP_XML_ELEMENT)
+ el.setAttribute('key', c[KEY])
+ el.setAttribute('pii', c[PII])
+ el.setAttribute('min-size', str(c[MIN_SIZE]))
+ el.setAttribute('max-size', str(c[MAX_SIZE]))
+ el.setAttribute('min-time', str(c[MIN_TIME]))
+ el.setAttribute('max-time', str(c[MAX_TIME]))
+ el.setAttribute('content-type', c[MIME])
+ el.setAttribute('default-checked', c[CHECKED] and 'yes' or 'no')
+ document.getElementsByTagName(CAP_XML_ROOT)[0].appendChild(el)
+
+
+def prettyDict(d):
+ format = '%%-%ds: %%s' % max(map(len, [k for k, _ in d.items()]))
+ return '\n'.join([format % i for i in d.items()]) + '\n'
+
+
+def yes(prompt):
+ yn = raw_input(prompt)
+
+ return len(yn) == 0 or yn.lower()[0] == 'y'
+
+
+partition_re = re.compile(r'(.*[0-9]+$)|(^xvd)')
+
+def disk_list():
+ disks = []
+ try:
+ f = open('/proc/partitions')
+ f.readline()
+ f.readline()
+ for line in f.readlines():
+ (major, minor, blocks, name) = line.split()
+ if int(major) < 254 and not partition_re.match(name):
+ disks.append(name)
+ f.close()
+ except:
+ pass
+ return disks
+
+
+class ProcOutput:
+ debug = False
+
+ def __init__(self, command, max_time, inst=None, filter=None):
+ self.command = command
+ self.max_time = max_time
+ self.inst = inst
+ self.running = False
+ self.status = None
+ self.timed_out = False
+ self.failed = False
+ self.timeout = int(time.time()) + self.max_time
+ self.filter = filter
+ self.filter_state = {}
+
+ def __del__(self):
+ self.terminate()
+
+ def cmdAsStr(self):
+ return isinstance(self.command, list) and ' '.join(self.command) or self.command
+
+ def run(self):
+ self.timed_out = False
+ try:
+ if ProcOutput.debug:
+ output_ts("Starting '%s'" % self.cmdAsStr())
+ self.proc = Popen(self.command, bufsize=1, stdin=dev_null, stdout=PIPE, stderr=dev_null, shell=isinstance(self.command, str))
+ old = fcntl.fcntl(self.proc.stdout.fileno(), fcntl.F_GETFD)
+ fcntl.fcntl(self.proc.stdout.fileno(), fcntl.F_SETFD, old | fcntl.FD_CLOEXEC)
+ self.running = True
+ self.failed = False
+ except:
+ output_ts("'%s' failed" % self.cmdAsStr())
+ self.running = False
+ self.failed = True
+
+ def terminate(self):
+ if self.running:
+ try:
+ os.kill(self.proc.pid, SIGTERM)
+ except:
+ pass
+ self.proc = None
+ self.running = False
+ self.status = SIGTERM
+
+ def read_line(self):
+ assert self.running
+ line = self.proc.stdout.readline()
+ if line == '':
+ # process exited
+ self.status = self.proc.wait()
+ self.proc = None
+ self.running = False
+ else:
+ if self.filter:
+ line = self.filter(line, self.filter_state)
+ if self.inst:
+ self.inst.write(line)
+
+def run_procs(procs):
+ while True:
+ pipes = []
+ active_procs = []
+
+ for pp in procs:
+ for p in pp:
+ if p.running:
+ active_procs.append(p)
+ pipes.append(p.proc.stdout)
+ break
+ elif p.status == None and not p.failed and not p.timed_out:
+ p.run()
+ if p.running:
+ active_procs.append(p)
+ pipes.append(p.proc.stdout)
+ break
+
+ if len(pipes) == 0:
+ # all finished
+ break
+
+ (i, o, x) = select(pipes, [], [], 1.0)
+ now = int(time.time())
+
+ # handle process output
+ for p in active_procs:
+ if p.proc.stdout in i:
+ p.read_line()
+
+ # handle timeout
+ if p.running and now > p.timeout:
+ output_ts("'%s' timed out" % p.cmdAsStr())
+ if p.inst:
+ p.inst.write("\n** timeout **\n")
+ p.timed_out = True
+ p.terminate()
+
+
+def pidof(name):
+ pids = []
+
+ for d in [p for p in os.listdir('/proc') if p.isdigit()]:
+ try:
+ if os.path.basename(os.readlink('/proc/%s/exe' % d)) == name:
+ pids.append(int(d))
+ except:
+ pass
+
+ return pids
+
+
+class StringIOmtime(StringIO.StringIO):
+ def __init__(self, buf = ''):
+ StringIO.StringIO.__init__(self, buf)
+ self.mtime = time.time()
+
+ def write(self, s):
+ StringIO.StringIO.write(self, s)
+ self.mtime = time.time()
+
+
+if __name__ == "__main__":
+ try:
+ sys.exit(main())
+ except KeyboardInterrupt:
+ print "\nInterrupted."
+ sys.exit(3)
diff --git a/debian/ovs-bugtool.8 b/debian/ovs-bugtool.8
new file mode 100644
index 000000000..28147e460
--- /dev/null
+++ b/debian/ovs-bugtool.8
@@ -0,0 +1,46 @@
+.\" -*- nroff -*-
+.de IQ
+. br
+. ns
+. IP "\\$1"
+..
+.TH ovs\-bugtool 8 "September 2010" "Open vSwitch" "Open vSwitch Manual"
+.\" This program's name:
+.ds PN ovs\-bugtool
+.
+.SH NAME
+ovs\-bugtool \- Open vSwitch
+.
+.SH SYNOPSIS
+.B ovs\-bugtool
+.
+.SH DESCRIPTION
+Generate a debug bundle with useful information about Open vSwitch on this
+system. The bundle is placed in /var/log/openvswitch.
+.
+.SH OPTIONS
+.
+.IP "\fB\-\-all\fR"
+use all available capabilities.
+.
+.IP "\fB\-\-capabilities\fR"
+list \fBovs\-bugtool\fR capabilities.
+.
+.IP "\fB\-\-debug\fR"
+print verbose debugging output.
+.
+.IP "\fB\-\-entries=\fIlist\fR\fR"
+use the capabilities specified in a comma separated list.
+.
+.IP "\fB\-\-output=\fIfiletype\fR\fR"
+generate a debug bundle with the specified file type. Options include \fBtar\fR,
+\fBtar.bz2\fR, and \fBzip\fR.
+.
+.IP "\fB\-\-silent\fR"
+suppress output.
+.
+.IP "\fB\-\-unlimited\fR"
+do not exclude files which are too large.
+.
+.IP "\fB\-\-yestoall\fR"
+answer yes to all prompts.
diff --git a/debian/ovs-monitor-ipsec b/debian/ovs-monitor-ipsec
new file mode 100755
index 000000000..184b00469
--- /dev/null
+++ b/debian/ovs-monitor-ipsec
@@ -0,0 +1,350 @@
+#!/usr/bin/python
+# Copyright (c) 2009, 2010 Nicira Networks
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# A daemon to monitor attempts to create GRE-over-IPsec tunnels.
+# Uses racoon and setkey to support the configuration. Assumes that
+# OVS has complete control over IPsec configuration for the box.
+
+# xxx To-do:
+# - Doesn't actually check that Interface is connected to bridge
+# - Doesn't support cert authentication
+
+
+import getopt
+import logging, logging.handlers
+import os
+import stat
+import subprocess
+import sys
+
+from ovs.db import error
+from ovs.db import types
+import ovs.util
+import ovs.daemon
+import ovs.db.idl
+
+
+# By default log messages as DAEMON into syslog
+s_log = logging.getLogger("ovs-monitor-ipsec")
+l_handler = logging.handlers.SysLogHandler(
+ "/dev/log",
+ facility=logging.handlers.SysLogHandler.LOG_DAEMON)
+l_formatter = logging.Formatter('%(filename)s: %(levelname)s: %(message)s')
+l_handler.setFormatter(l_formatter)
+s_log.addHandler(l_handler)
+
+
+setkey = "/usr/sbin/setkey"
+
+# Class to configure the racoon daemon, which handles IKE negotiation
+class Racoon:
+ # Default locations for files
+ conf_file = "/etc/racoon/racoon.conf"
+ cert_file = "/etc/racoon/certs"
+ psk_file = "/etc/racoon/psk.txt"
+
+ # Default racoon configuration file we use for IKE
+ conf_template = """# Configuration file generated by Open vSwitch
+#
+# Do not modify by hand!
+
+path pre_shared_key "/etc/racoon/psk.txt";
+path certificate "/etc/racoon/certs";
+
+remote anonymous {
+ exchange_mode main;
+ proposal {
+ encryption_algorithm aes;
+ hash_algorithm sha1;
+ authentication_method pre_shared_key;
+ dh_group 2;
+ }
+}
+
+sainfo anonymous {
+ pfs_group 2;
+ lifetime time 1 hour;
+ encryption_algorithm aes;
+ authentication_algorithm hmac_sha1, hmac_md5;
+ compression_algorithm deflate;
+}
+"""
+
+ def __init__(self):
+ self.psk_hosts = {}
+ self.cert_hosts = {}
+
+ # Replace racoon's conf file with our template
+ f = open(Racoon.conf_file, "w")
+ f.write(Racoon.conf_template)
+ f.close()
+
+ # Clear out any pre-shared keys
+ self.commit_psk()
+
+ self.reload()
+
+ def reload(self):
+ exitcode = subprocess.call(["/etc/init.d/racoon", "reload"])
+ if exitcode != 0:
+ s_log.warning("couldn't reload racoon")
+
+ def commit_psk(self):
+ f = open(Racoon.psk_file, 'w')
+
+ # The file must only be accessible by root
+ os.chmod(Racoon.psk_file, stat.S_IRUSR | stat.S_IWUSR)
+
+ f.write("# Generated by Open vSwitch...do not modify by hand!\n\n")
+ for host, psk in self.psk_hosts.iteritems():
+ f.write("%s %s\n" % (host, psk))
+ f.close()
+
+ def add_psk(self, host, psk):
+ self.psk_hosts[host] = psk
+ self.commit_psk()
+
+ def del_psk(self, host):
+ if host in self.psk_hosts:
+ del self.psk_hosts[host]
+ self.commit_psk()
+
+
+# Class to configure IPsec on a system using racoon for IKE and setkey
+# for maintaining the Security Association Database (SAD) and Security
+# Policy Database (SPD). Only policies for GRE are supported.
+class IPsec:
+ def __init__(self):
+ self.sad_flush()
+ self.spd_flush()
+ self.racoon = Racoon()
+
+ def call_setkey(self, cmds):
+ try:
+ p = subprocess.Popen([setkey, "-c"], stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE)
+ except:
+ s_log.error("could not call setkey")
+ sys.exit(1)
+
+ # xxx It is safer to pass the string into the communicate()
+ # xxx method, but it didn't work for slightly longer commands.
+ # xxx An alternative may need to be found.
+ p.stdin.write(cmds)
+ return p.communicate()[0]
+
+ def get_spi(self, local_ip, remote_ip, proto="esp"):
+ # Run the setkey dump command to retrieve the SAD. Then, parse
+ # the output looking for SPI buried in the output. Note that
+ # multiple SAD entries can exist for the same "flow", since an
+ # older entry could be in a "dying" state.
+ spi_list = []
+ host_line = "%s %s" % (local_ip, remote_ip)
+ results = self.call_setkey("dump ;").split("\n")
+ for i in range(len(results)):
+ if results[i].strip() == host_line:
+ # The SPI is in the line following the host pair
+ spi_line = results[i+1]
+ if (spi_line[1:4] == proto):
+ spi = spi_line.split()[2]
+ spi_list.append(spi.split('(')[1].rstrip(')'))
+ return spi_list
+
+ def sad_flush(self):
+ self.call_setkey("flush;")
+
+ def sad_del(self, local_ip, remote_ip):
+ # To delete all SAD entries, we should be able to use setkey's
+ # "deleteall" command. Unfortunately, it's fundamentally broken
+ # on Linux and not documented as such.
+ cmds = ""
+
+ # Delete local_ip->remote_ip SAD entries
+ spi_list = self.get_spi(local_ip, remote_ip)
+ for spi in spi_list:
+ cmds += "delete %s %s esp %s;\n" % (local_ip, remote_ip, spi)
+
+ # Delete remote_ip->local_ip SAD entries
+ spi_list = self.get_spi(remote_ip, local_ip)
+ for spi in spi_list:
+ cmds += "delete %s %s esp %s;\n" % (remote_ip, local_ip, spi)
+
+ if cmds:
+ self.call_setkey(cmds)
+
+ def spd_flush(self):
+ self.call_setkey("spdflush;")
+
+ def spd_add(self, local_ip, remote_ip):
+ cmds = ("spdadd %s %s gre -P out ipsec esp/transport//default;" %
+ (local_ip, remote_ip))
+ cmds += "\n"
+ cmds += ("spdadd %s %s gre -P in ipsec esp/transport//default;" %
+ (remote_ip, local_ip))
+ self.call_setkey(cmds)
+
+ def spd_del(self, local_ip, remote_ip):
+ cmds = "spddelete %s %s gre -P out;" % (local_ip, remote_ip)
+ cmds += "\n"
+ cmds += "spddelete %s %s gre -P in;" % (remote_ip, local_ip)
+ self.call_setkey(cmds)
+
+ def ipsec_cert_del(self, local_ip, remote_ip):
+ # Need to support cert...right now only PSK supported
+ self.racoon.del_psk(remote_ip)
+ self.spd_del(local_ip, remote_ip)
+ self.sad_del(local_ip, remote_ip)
+
+ def ipsec_cert_update(self, local_ip, remote_ip, cert):
+ # Need to support cert...right now only PSK supported
+ self.racoon.add_psk(remote_ip, "abc12345")
+ self.spd_add(local_ip, remote_ip)
+
+ def ipsec_psk_del(self, local_ip, remote_ip):
+ self.racoon.del_psk(remote_ip)
+ self.spd_del(local_ip, remote_ip)
+ self.sad_del(local_ip, remote_ip)
+
+ def ipsec_psk_update(self, local_ip, remote_ip, psk):
+ self.racoon.add_psk(remote_ip, psk)
+ self.spd_add(local_ip, remote_ip)
+
+
+def keep_table_columns(schema, table_name, column_types):
+ table = schema.tables.get(table_name)
+ if not table:
+ raise error.Error("schema has no %s table" % table_name)
+
+ new_columns = {}
+ for column_name, column_type in column_types.iteritems():
+ column = table.columns.get(column_name)
+ if not column:
+ raise error.Error("%s table schema lacks %s column"
+ % (table_name, column_name))
+ if column.type != column_type:
+ raise error.Error("%s column in %s table has type \"%s\", "
+ "expected type \"%s\""
+ % (column_name, table_name,
+ column.type.toEnglish(),
+ column_type.toEnglish()))
+ new_columns[column_name] = column
+ table.columns = new_columns
+ return table
+
+def monitor_uuid_schema_cb(schema):
+ string_type = types.Type(types.BaseType(types.StringType))
+ string_map_type = types.Type(types.BaseType(types.StringType),
+ types.BaseType(types.StringType),
+ 0, sys.maxint)
+
+ new_tables = {}
+ new_tables["Interface"] = keep_table_columns(
+ schema, "Interface", {"name": string_type,
+ "type": string_type,
+ "options": string_map_type,
+ "other_config": string_map_type})
+ schema.tables = new_tables
+
+def usage():
+ print "usage: %s [OPTIONS] DATABASE" % sys.argv[0]
+ print "where DATABASE is a socket on which ovsdb-server is listening."
+ ovs.daemon.usage()
+ print "Other options:"
+ print " -h, --help display this help message"
+ sys.exit(0)
+
+def main(argv):
+ try:
+ options, args = getopt.gnu_getopt(
+ argv[1:], 'h', ['help'] + ovs.daemon.LONG_OPTIONS)
+ except getopt.GetoptError, geo:
+ sys.stderr.write("%s: %s\n" % (ovs.util.PROGRAM_NAME, geo.msg))
+ sys.exit(1)
+
+ for key, value in options:
+ if key in ['-h', '--help']:
+ usage()
+ elif not ovs.daemon.parse_opt(key, value):
+ sys.stderr.write("%s: unhandled option %s\n"
+ % (ovs.util.PROGRAM_NAME, key))
+ sys.exit(1)
+
+ if len(args) != 1:
+ sys.stderr.write("%s: exactly one nonoption argument is required "
+ "(use --help for help)\n" % ovs.util.PROGRAM_NAME)
+ sys.exit(1)
+
+ ovs.daemon.die_if_already_running()
+
+ remote = args[0]
+ idl = ovs.db.idl.Idl(remote, "Open_vSwitch", monitor_uuid_schema_cb)
+
+ ovs.daemon.daemonize()
+
+ ipsec = IPsec()
+
+ interfaces = {}
+ while True:
+ if not idl.run():
+ poller = ovs.poller.Poller()
+ idl.wait(poller)
+ poller.block()
+ continue
+
+ new_interfaces = {}
+ for rec in idl.data["Interface"].itervalues():
+ name = rec.name.as_scalar()
+ local_ip = rec.other_config.get("ipsec_local_ip")
+ if rec.type.as_scalar() == "gre" and local_ip:
+ new_interfaces[name] = {
+ "remote_ip": rec.options.get("remote_ip"),
+ "local_ip": local_ip,
+ "ipsec_cert": rec.other_config.get("ipsec_cert"),
+ "ipsec_psk": rec.other_config.get("ipsec_psk") }
+
+ if interfaces != new_interfaces:
+ for name, vals in interfaces.items():
+ if name not in new_interfaces.keys():
+ ipsec.ipsec_cert_del(vals["local_ip"], vals["remote_ip"])
+ for name, vals in new_interfaces.items():
+ if vals == interfaces.get(name):
+ s_log.warning(
+ "configuration changed for %s, need to delete "
+ "interface first" % name)
+ continue
+
+ if vals["ipsec_cert"]:
+ ipsec.ipsec_cert_update(vals["local_ip"],
+ vals["remote_ip"], vals["ipsec_cert"])
+ elif vals["ipsec_psk"]:
+ ipsec.ipsec_psk_update(vals["local_ip"],
+ vals["remote_ip"], vals["ipsec_psk"])
+ else:
+ s_log.warning(
+ "no ipsec_cert or ipsec_psk defined for %s" % name)
+ continue
+
+ interfaces = new_interfaces
+
+if __name__ == '__main__':
+ try:
+ main(sys.argv)
+ except SystemExit:
+ # Let system.exit() calls complete normally
+ raise
+ except:
+ s_log.exception("traceback")
+ sys.exit(ovs.daemon.RESTART_EXIT_CODE)
diff --git a/debian/python-openvswitch.dirs b/debian/python-openvswitch.dirs
new file mode 100644
index 000000000..9bc6912a3
--- /dev/null
+++ b/debian/python-openvswitch.dirs
@@ -0,0 +1,2 @@
+usr/share/python-support/python-openvswitch/ovs/
+usr/share/python-support/python-openvswitch/ovs/db/
diff --git a/debian/python-openvswitch.install b/debian/python-openvswitch.install
new file mode 100644
index 000000000..211ed300e
--- /dev/null
+++ b/debian/python-openvswitch.install
@@ -0,0 +1,2 @@
+python/ovs/*.py usr/share/python-support/python-openvswitch/ovs/
+python/ovs/db/*.py usr/share/python-support/python-openvswitch/ovs/db/
diff --git a/debian/rules b/debian/rules
index 49d562ad7..8e7ad1e82 100755
--- a/debian/rules
+++ b/debian/rules
@@ -103,6 +103,7 @@ binary-common:
dh_strip --dbg-package=openvswitch-dbg
dh_compress
dh_fixperms -X var/log/core
+ dh_pysupport
dh_perl
dh_makeshlibs
dh_installdeb
diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h
index 0b543d45a..13aea6607 100644
--- a/include/openflow/nicira-ext.h
+++ b/include/openflow/nicira-ext.h
@@ -254,8 +254,18 @@ enum nx_action_subtype {
*
* This is useful because OpenFlow does not provide a way to match on the
* Ethernet addresses inside ARP packets, so there is no other way to drop
- * spoofed ARPs other than sending every packet up to the controller. */
- NXAST_DROP_SPOOFED_ARP
+ * spoofed ARPs other than sending every ARP packet to a controller. */
+ NXAST_DROP_SPOOFED_ARP,
+
+ /* Set the queue that should be used when packets are output. This
+ * is similar to the OpenFlow OFPAT_ENQUEUE action, but does not
+ * take the output port as an argument. This allows the queue
+ * to be defined before the port is known. */
+ NXAST_SET_QUEUE,
+
+ /* Restore the queue to the value it was before any NXAST_SET_QUEUE
+ * actions were used. */
+ NXAST_POP_QUEUE
};
/* Action structure for NXAST_RESUBMIT. */
@@ -280,6 +290,17 @@ struct nx_action_set_tunnel {
};
OFP_ASSERT(sizeof(struct nx_action_set_tunnel) == 16);
+/* Action structure for NXAST_SET_QUEUE. */
+struct nx_action_set_queue {
+ uint16_t type; /* OFPAT_VENDOR. */
+ uint16_t len; /* Length is 16. */
+ uint32_t vendor; /* NX_VENDOR_ID. */
+ uint16_t subtype; /* NXAST_SET_QUEUE. */
+ uint8_t pad[2];
+ uint32_t queue_id; /* Where to enqueue packets. */
+};
+OFP_ASSERT(sizeof(struct nx_action_set_queue) == 16);
+
/* Header for Nicira-defined actions. */
struct nx_action_header {
uint16_t type; /* OFPAT_VENDOR. */
diff --git a/include/openvswitch/tunnel.h b/include/openvswitch/tunnel.h
index 373797513..dd700d0dc 100644
--- a/include/openvswitch/tunnel.h
+++ b/include/openvswitch/tunnel.h
@@ -48,6 +48,7 @@
#define TNL_F_TOS_INHERIT (1 << 4) /* Inherit the ToS from the inner packet. */
#define TNL_F_TTL_INHERIT (1 << 5) /* Inherit the TTL from the inner packet. */
#define TNL_F_PMTUD (1 << 6) /* Enable path MTU discovery. */
+#define TNL_F_HDR_CACHE (1 << 7) /* Enable tunnel header caching. */
struct tnl_port_config {
__u32 flags;
diff --git a/lib/classifier.c b/lib/classifier.c
index f844f5232..f6f0b5a53 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -176,8 +176,7 @@ classifier_destroy(struct classifier *cls)
struct hmap *tbl;
for (tbl = &cls->tables[0]; tbl < &cls->tables[CLS_N_FIELDS]; tbl++) {
- HMAP_FOR_EACH_SAFE (bucket, next_bucket,
- struct cls_bucket, hmap_node, tbl) {
+ HMAP_FOR_EACH_SAFE (bucket, next_bucket, hmap_node, tbl) {
free(bucket);
}
hmap_destroy(tbl);
@@ -341,11 +340,11 @@ classifier_find_rule_exactly(const struct classifier *cls,
assert(target->wildcards == (target->wildcards & OVSFW_ALL));
table_idx = table_idx_from_wildcards(target->wildcards);
hash = hash_fields(target, table_idx);
- HMAP_FOR_EACH_WITH_HASH (bucket, struct cls_bucket, hmap_node, hash,
+ HMAP_FOR_EACH_WITH_HASH (bucket, hmap_node, hash,
&cls->tables[table_idx]) {
if (equal_fields(&bucket->fixed, target, table_idx)) {
struct cls_rule *pos;
- LIST_FOR_EACH (pos, struct cls_rule, node.list, &bucket->rules) {
+ LIST_FOR_EACH (pos, node.list, &bucket->rules) {
if (pos->flow.priority < target->priority) {
return NULL;
} else if (pos->flow.priority == target->priority &&
@@ -378,13 +377,12 @@ classifier_rule_overlaps(const struct classifier *cls, const flow_t *target)
for (tbl = &cls->tables[0]; tbl < &cls->tables[CLS_N_FIELDS]; tbl++) {
struct cls_bucket *bucket;
- HMAP_FOR_EACH (bucket, struct cls_bucket, hmap_node, tbl) {
+ HMAP_FOR_EACH (bucket, hmap_node, tbl) {
struct cls_rule *rule;
- LIST_FOR_EACH (rule, struct cls_rule, node.list,
- &bucket->rules) {
+ LIST_FOR_EACH (rule, node.list, &bucket->rules) {
if (rule->flow.priority == target->priority
- && rules_match_2wild(rule, &target_rule, 0)) {
+ && rules_match_2wild(rule, &target_rule, 0)) {
return true;
}
}
@@ -420,8 +418,7 @@ classifier_for_each_match(const struct classifier *cls,
table++) {
struct cls_bucket *bucket, *next_bucket;
- HMAP_FOR_EACH_SAFE (bucket, next_bucket,
- struct cls_bucket, hmap_node, table) {
+ HMAP_FOR_EACH_SAFE (bucket, next_bucket, hmap_node, table) {
/* XXX there is a bit of room for optimization here based on
* rejecting entire buckets on their fixed fields, but it will
* only be worthwhile for big buckets (which we hope we won't
@@ -433,8 +430,7 @@ classifier_for_each_match(const struct classifier *cls,
* bucket itself will be destroyed. The bucket contains the
* list head so that's a use-after-free error. */
prev_rule = NULL;
- LIST_FOR_EACH (rule, struct cls_rule, node.list,
- &bucket->rules) {
+ LIST_FOR_EACH (rule, node.list, &bucket->rules) {
if (rules_match_1wild(rule, &target, 0)) {
if (prev_rule) {
int retval = callback(prev_rule, aux);
@@ -459,7 +455,7 @@ classifier_for_each_match(const struct classifier *cls,
if (target.flow.wildcards) {
struct cls_rule *rule, *next_rule;
- HMAP_FOR_EACH_SAFE (rule, next_rule, struct cls_rule, node.hmap,
+ HMAP_FOR_EACH_SAFE (rule, next_rule, node.hmap,
&cls->exact_table) {
if (rules_match_1wild(rule, &target, 0)) {
int retval = callback(rule, aux);
@@ -506,8 +502,7 @@ classifier_for_each(const struct classifier *cls, int include,
for (tbl = &cls->tables[0]; tbl < &cls->tables[CLS_N_FIELDS]; tbl++) {
struct cls_bucket *bucket, *next_bucket;
- HMAP_FOR_EACH_SAFE (bucket, next_bucket,
- struct cls_bucket, hmap_node, tbl) {
+ HMAP_FOR_EACH_SAFE (bucket, next_bucket, hmap_node, tbl) {
struct cls_rule *prev_rule, *rule;
/* We can't just use LIST_FOR_EACH_SAFE here because, if the
@@ -515,8 +510,7 @@ classifier_for_each(const struct classifier *cls, int include,
* bucket itself will be destroyed. The bucket contains the
* list head so that's a use-after-free error. */
prev_rule = NULL;
- LIST_FOR_EACH (rule, struct cls_rule, node.list,
- &bucket->rules) {
+ LIST_FOR_EACH (rule, node.list, &bucket->rules) {
if (prev_rule) {
int retval = callback(prev_rule, aux);
if (retval) {
@@ -538,8 +532,7 @@ classifier_for_each(const struct classifier *cls, int include,
if (include & CLS_INC_EXACT) {
struct cls_rule *rule, *next_rule;
- HMAP_FOR_EACH_SAFE (rule, next_rule,
- struct cls_rule, node.hmap, &cls->exact_table) {
+ HMAP_FOR_EACH_SAFE (rule, next_rule, node.hmap, &cls->exact_table) {
int retval = callback(rule, aux);
if (retval) {
return retval;
@@ -681,7 +674,7 @@ static struct cls_rule *
bucket_insert(struct cls_bucket *bucket, struct cls_rule *rule)
{
struct cls_rule *pos;
- LIST_FOR_EACH (pos, struct cls_rule, node.list, &bucket->rules) {
+ LIST_FOR_EACH (pos, node.list, &bucket->rules) {
if (pos->flow.priority == rule->flow.priority) {
if (pos->flow.wildcards == rule->flow.wildcards
&& rules_match_1wild(pos, rule, rule->table_idx))
@@ -719,8 +712,7 @@ static struct cls_bucket *
find_bucket(struct hmap *table, size_t hash, const struct cls_rule *rule)
{
struct cls_bucket *bucket;
- HMAP_FOR_EACH_WITH_HASH (bucket, struct cls_bucket, hmap_node, hash,
- table) {
+ HMAP_FOR_EACH_WITH_HASH (bucket, hmap_node, hash, table) {
if (equal_fields(&bucket->fixed, &rule->flow, rule->table_idx)) {
return bucket;
}
@@ -890,7 +882,7 @@ search_bucket(struct cls_bucket *bucket, int field_idx,
return NULL;
}
- LIST_FOR_EACH (pos, struct cls_rule, node.list, &bucket->rules) {
+ LIST_FOR_EACH (pos, node.list, &bucket->rules) {
if (rules_match_1wild(target, pos, field_idx)) {
return pos;
}
@@ -918,7 +910,7 @@ search_table(const struct hmap *table, int field_idx,
return search_bucket(bucket, field_idx, target);
}
- HMAP_FOR_EACH_WITH_HASH (bucket, struct cls_bucket, hmap_node,
+ HMAP_FOR_EACH_WITH_HASH (bucket, hmap_node,
hash_fields(&target->flow, field_idx), table) {
struct cls_rule *rule = search_bucket(bucket, field_idx, target);
if (rule) {
@@ -934,8 +926,7 @@ search_exact_table(const struct classifier *cls, size_t hash,
{
struct cls_rule *rule;
- HMAP_FOR_EACH_WITH_HASH (rule, struct cls_rule, node.hmap,
- hash, &cls->exact_table) {
+ HMAP_FOR_EACH_WITH_HASH (rule, node.hmap, hash, &cls->exact_table) {
if (flow_equal_headers(&rule->flow, target)) {
return rule;
}
diff --git a/lib/daemon.c b/lib/daemon.c
index 548650464..bbcfe6afc 100644
--- a/lib/daemon.c
+++ b/lib/daemon.c
@@ -42,6 +42,10 @@ static bool detach;
/* --pidfile: Name of pidfile (null if none). */
static char *pidfile;
+/* Device and inode of pidfile, so we can avoid reopening it. */
+static dev_t pidfile_dev;
+static ino_t pidfile_ino;
+
/* --overwrite-pidfile: Create pidfile even if one already exists and is
locked? */
static bool overwrite_pidfile;
@@ -208,6 +212,15 @@ make_pidfile(void)
close(fd);
} else {
/* Keep 'fd' open to retain the lock. */
+ struct stat s;
+
+ if (!fstat(fd, &s)) {
+ pidfile_dev = s.st_dev;
+ pidfile_ino = s.st_ino;
+ } else {
+ VLOG_ERR("%s: fstat failed: %s",
+ pidfile, strerror(errno));
+ }
}
free(text);
} else {
@@ -330,11 +343,13 @@ monitor_daemon(pid_t daemon_pid)
const char *saved_program_name;
time_t last_restart;
char *status_msg;
+ int crashes;
saved_program_name = program_name;
program_name = xasprintf("monitor(%s)", program_name);
status_msg = xstrdup("healthy");
last_restart = TIME_MIN;
+ crashes = 0;
for (;;) {
int retval;
int status;
@@ -352,7 +367,8 @@ monitor_daemon(pid_t daemon_pid)
} else if (retval == daemon_pid) {
char *s = process_status_msg(status);
free(status_msg);
- status_msg = xasprintf("pid %lu died, %s",
+ status_msg = xasprintf("%d crashes: pid %lu died, %s",
+ ++crashes,
(unsigned long int) daemon_pid, s);
free(s);
@@ -491,9 +507,21 @@ read_pidfile(const char *pidfile)
{
char line[128];
struct flock lck;
+ struct stat s;
FILE *file;
int error;
+ if ((pidfile_ino || pidfile_dev)
+ && !stat(pidfile, &s)
+ && s.st_ino == pidfile_ino && s.st_dev == pidfile_dev) {
+ /* It's our own pidfile. We can't afford to open it, because closing
+ * *any* fd for a file that a process has locked also releases all the
+ * locks on that file.
+ *
+ * Fortunately, we know the associated pid anyhow: */
+ return getpid();
+ }
+
file = fopen(pidfile, "r");
if (!file) {
error = errno;
diff --git a/lib/dynamic-string.c b/lib/dynamic-string.c
index 5f8054a45..3af7fc9f5 100644
--- a/lib/dynamic-string.c
+++ b/lib/dynamic-string.c
@@ -147,8 +147,6 @@ ds_put_format_valist(struct ds *ds, const char *format, va_list args_)
if (needed < available) {
ds->length += needed;
} else {
- size_t available;
-
ds_reserve(ds, ds->length + needed);
va_copy(args, args_);
diff --git a/lib/hmap.c b/lib/hmap.c
index 1b4816d9d..6bc5ea74b 100644
--- a/lib/hmap.c
+++ b/lib/hmap.c
@@ -18,6 +18,7 @@
#include "hmap.h"
#include <assert.h>
#include <stdint.h>
+#include <string.h>
#include "coverage.h"
#include "random.h"
#include "util.h"
@@ -42,6 +43,22 @@ hmap_destroy(struct hmap *hmap)
}
}
+/* Removes all node from 'hmap', leaving it ready to accept more nodes. Does
+ * not free memory allocated for 'hmap'.
+ *
+ * This function is appropriate when 'hmap' will soon have about as many
+ * elements as it before. If 'hmap' will likely have fewer elements than
+ * before, use hmap_destroy() followed by hmap_clear() to save memory and
+ * iteration time. */
+void
+hmap_clear(struct hmap *hmap)
+{
+ if (hmap->n > 0) {
+ hmap->n = 0;
+ memset(hmap->buckets, 0, (hmap->mask + 1) * sizeof *hmap->buckets);
+ }
+}
+
/* Exchanges hash maps 'a' and 'b'. */
void
hmap_swap(struct hmap *a, struct hmap *b)
diff --git a/lib/hmap.h b/lib/hmap.h
index b3032c294..cf0c74b91 100644
--- a/lib/hmap.h
+++ b/lib/hmap.h
@@ -69,6 +69,7 @@ struct hmap {
/* Initialization. */
void hmap_init(struct hmap *);
void hmap_destroy(struct hmap *);
+void hmap_clear(struct hmap *);
void hmap_swap(struct hmap *a, struct hmap *b);
void hmap_moved(struct hmap *hmap);
static inline size_t hmap_count(const struct hmap *);
@@ -95,9 +96,8 @@ struct hmap_node *hmap_random_node(const struct hmap *);
*
* HMAP_FOR_EACH_WITH_HASH iterates NODE over all of the nodes in HMAP that
* have hash value equal to HASH. HMAP_FOR_EACH_IN_BUCKET iterates NODE over
- * all of the nodes in HMAP that would fall in the same bucket as HASH. STRUCT
- * and MEMBER must be the name of the struct that contains the 'struct
- * hmap_node' and the name of the 'struct hmap_node' member, respectively.
+ * all of the nodes in HMAP that would fall in the same bucket as HASH. MEMBER
+ * must be the name of the 'struct hmap_node' member within NODE.
*
* These macros may be used interchangeably to search for a particular value in
* an hmap, see, e.g. shash_find() for an example. Usually, using
@@ -112,18 +112,18 @@ struct hmap_node *hmap_random_node(const struct hmap *);
*
* HASH is only evaluated once.
*/
-#define HMAP_FOR_EACH_WITH_HASH(NODE, STRUCT, MEMBER, HASH, HMAP) \
- for ((NODE) = CONTAINER_OF(hmap_first_with_hash(HMAP, HASH), \
- STRUCT, MEMBER); \
+#define HMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HMAP) \
+ for ((NODE) = OBJECT_CONTAINING(hmap_first_with_hash(HMAP, HASH), \
+ NODE, MEMBER); \
&(NODE)->MEMBER != NULL; \
- (NODE) = CONTAINER_OF(hmap_next_with_hash(&(NODE)->MEMBER), \
- STRUCT, MEMBER))
-#define HMAP_FOR_EACH_IN_BUCKET(NODE, STRUCT, MEMBER, HASH, HMAP) \
- for ((NODE) = CONTAINER_OF(hmap_first_in_bucket(HMAP, HASH), \
- STRUCT, MEMBER); \
+ (NODE) = OBJECT_CONTAINING(hmap_next_with_hash(&(NODE)->MEMBER), \
+ NODE, MEMBER))
+#define HMAP_FOR_EACH_IN_BUCKET(NODE, MEMBER, HASH, HMAP) \
+ for ((NODE) = OBJECT_CONTAINING(hmap_first_in_bucket(HMAP, HASH), \
+ NODE, MEMBER); \
&(NODE)->MEMBER != NULL; \
- (NODE) = CONTAINER_OF(hmap_next_in_bucket(&(NODE)->MEMBER), \
- STRUCT, MEMBER))
+ (NODE) = OBJECT_CONTAINING(hmap_next_in_bucket(&(NODE)->MEMBER), \
+ NODE, MEMBER))
static inline struct hmap_node *hmap_first_with_hash(const struct hmap *,
size_t hash);
@@ -137,18 +137,18 @@ static inline struct hmap_node *hmap_next_in_bucket(const struct hmap_node *);
* The _SAFE version is needed when NODE may be freed. It is not needed when
* NODE may be removed from the hash map but its members remain accessible and
* intact. */
-#define HMAP_FOR_EACH(NODE, STRUCT, MEMBER, HMAP) \
- for ((NODE) = CONTAINER_OF(hmap_first(HMAP), STRUCT, MEMBER); \
- &(NODE)->MEMBER != NULL; \
- (NODE) = CONTAINER_OF(hmap_next(HMAP, &(NODE)->MEMBER), \
- STRUCT, MEMBER))
-
-#define HMAP_FOR_EACH_SAFE(NODE, NEXT, STRUCT, MEMBER, HMAP) \
- for ((NODE) = CONTAINER_OF(hmap_first(HMAP), STRUCT, MEMBER); \
- (&(NODE)->MEMBER != NULL \
- ? (NEXT) = CONTAINER_OF(hmap_next(HMAP, &(NODE)->MEMBER), \
- STRUCT, MEMBER), 1 \
- : 0); \
+#define HMAP_FOR_EACH(NODE, MEMBER, HMAP) \
+ for ((NODE) = OBJECT_CONTAINING(hmap_first(HMAP), NODE, MEMBER); \
+ &(NODE)->MEMBER != NULL; \
+ (NODE) = OBJECT_CONTAINING(hmap_next(HMAP, &(NODE)->MEMBER), \
+ NODE, MEMBER))
+
+#define HMAP_FOR_EACH_SAFE(NODE, NEXT, MEMBER, HMAP) \
+ for ((NODE) = OBJECT_CONTAINING(hmap_first(HMAP), NODE, MEMBER); \
+ (&(NODE)->MEMBER != NULL \
+ ? (NEXT) = OBJECT_CONTAINING(hmap_next(HMAP, &(NODE)->MEMBER), \
+ NODE, MEMBER), 1 \
+ : 0); \
(NODE) = (NEXT))
static inline struct hmap_node *hmap_first(const struct hmap *);
diff --git a/lib/json.c b/lib/json.c
index 3b70e6bdb..5887f677a 100644
--- a/lib/json.c
+++ b/lib/json.c
@@ -705,7 +705,6 @@ json_lex_number(struct json_parser *p)
*
* We suppress negative zeros as a matter of policy. */
if (!significand) {
- struct json_token token;
token.type = T_INTEGER;
token.u.integer = 0;
json_parser_input(p, &token);
diff --git a/lib/learning-switch.c b/lib/learning-switch.c
index 3fd5d66f8..0c2f7bc93 100644
--- a/lib/learning-switch.c
+++ b/lib/learning-switch.c
@@ -24,6 +24,7 @@
#include <time.h>
#include "flow.h"
+#include "hmap.h"
#include "mac-learning.h"
#include "ofpbuf.h"
#include "ofp-parse.h"
@@ -33,6 +34,7 @@
#include "poll-loop.h"
#include "queue.h"
#include "rconn.h"
+#include "shash.h"
#include "timeval.h"
#include "vconn.h"
#include "vlog.h"
@@ -40,6 +42,12 @@
VLOG_DEFINE_THIS_MODULE(learning_switch)
+struct lswitch_port {
+ struct hmap_node hmap_node; /* Hash node for port number. */
+ uint16_t port_no; /* OpenFlow port number, in host byte order. */
+ uint32_t queue_id; /* OpenFlow queue number. */
+};
+
struct lswitch {
/* If nonnegative, the switch sets up flows that expire after the given
* number of seconds (or never expire, if the value is OFP_FLOW_PERMANENT).
@@ -51,7 +59,11 @@ struct lswitch {
struct mac_learning *ml; /* NULL to act as hub instead of switch. */
uint32_t wildcards; /* Wildcards to apply to flows. */
bool action_normal; /* Use OFPP_NORMAL? */
- uint32_t queue; /* OpenFlow queue to use, or UINT32_MAX. */
+
+ /* Queue distribution. */
+ uint32_t default_queue; /* Default OpenFlow queue, or UINT32_MAX. */
+ struct hmap queue_numbers; /* Map from port number to lswitch_port. */
+ struct shash queue_names; /* Map from port name to lswitch_port. */
/* Number of outgoing queued packets on the rconn. */
struct rconn_packet_counter *queued;
@@ -63,44 +75,29 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
static void queue_tx(struct lswitch *, struct rconn *, struct ofpbuf *);
static void send_features_request(struct lswitch *, struct rconn *);
-static void send_default_flows(struct lswitch *sw, struct rconn *rconn,
- FILE *default_flows);
typedef void packet_handler_func(struct lswitch *, struct rconn *, void *);
static packet_handler_func process_switch_features;
static packet_handler_func process_packet_in;
static packet_handler_func process_echo_request;
-/* Creates and returns a new learning switch.
- *
- * If 'learn_macs' is true, the new switch will learn the ports on which MAC
- * addresses appear. Otherwise, the new switch will flood all packets.
- *
- * If 'max_idle' is nonnegative, the new switch will set up flows that expire
- * after the given number of seconds (or never expire, if 'max_idle' is
- * OFP_FLOW_PERMANENT). Otherwise, the new switch will process every packet.
- *
- * The caller may provide the file stream 'default_flows' that defines
- * default flows that should be pushed when a switch connects. Each
- * line is a flow entry in the format described for "add-flows" command
- * in the Flow Syntax section of the ovs-ofct(8) man page. The caller
- * is responsible for closing the stream.
+/* Creates and returns a new learning switch whose configuration is given by
+ * 'cfg'.
*
* 'rconn' is used to send out an OpenFlow features request. */
struct lswitch *
-lswitch_create(struct rconn *rconn, bool learn_macs,
- bool exact_flows, int max_idle, bool action_normal,
- FILE *default_flows)
+lswitch_create(struct rconn *rconn, const struct lswitch_config *cfg)
{
+ const struct ofpbuf *b;
struct lswitch *sw;
sw = xzalloc(sizeof *sw);
- sw->max_idle = max_idle;
+ sw->max_idle = cfg->max_idle;
sw->datapath_id = 0;
sw->last_features_request = time_now() - 1;
- sw->ml = learn_macs ? mac_learning_create() : NULL;
- sw->action_normal = action_normal;
- if (exact_flows) {
+ sw->ml = cfg->mode == LSW_LEARN ? mac_learning_create() : NULL;
+ sw->action_normal = cfg->mode == LSW_NORMAL;
+ if (cfg->exact_flows) {
/* Exact match. */
sw->wildcards = 0;
} else {
@@ -110,12 +107,28 @@ lswitch_create(struct rconn *rconn, bool learn_macs,
sw->wildcards = (OFPFW_DL_TYPE | OFPFW_NW_SRC_MASK | OFPFW_NW_DST_MASK
| OFPFW_NW_PROTO | OFPFW_TP_SRC | OFPFW_TP_DST);
}
- sw->queue = UINT32_MAX;
+
+ sw->default_queue = cfg->default_queue;
+ hmap_init(&sw->queue_numbers);
+ shash_init(&sw->queue_names);
+ if (cfg->port_queues) {
+ struct shash_node *node;
+
+ SHASH_FOR_EACH (node, cfg->port_queues) {
+ struct lswitch_port *port = xmalloc(sizeof *port);
+ hmap_node_nullify(&port->hmap_node);
+ port->queue_id = (uintptr_t) node->data;
+ shash_add(&sw->queue_names, node->name, port);
+ }
+ }
+
sw->queued = rconn_packet_counter_create();
send_features_request(sw, rconn);
- if (default_flows) {
- send_default_flows(sw, rconn, default_flows);
+
+ for (b = cfg->default_flows; b; b = b->next) {
+ queue_tx(sw, rconn, ofpbuf_clone(b));
}
+
return sw;
}
@@ -124,21 +137,19 @@ void
lswitch_destroy(struct lswitch *sw)
{
if (sw) {
+ struct lswitch_port *node, *next;
+
+ HMAP_FOR_EACH_SAFE (node, next, hmap_node, &sw->queue_numbers) {
+ hmap_remove(&sw->queue_numbers, &node->hmap_node);
+ free(node);
+ }
+ shash_destroy(&sw->queue_names);
mac_learning_destroy(sw->ml);
rconn_packet_counter_destroy(sw->queued);
free(sw);
}
}
-/* Sets 'queue' as the OpenFlow queue used by packets and flows set up by 'sw'.
- * Specify UINT32_MAX to avoid specifying a particular queue, which is also the
- * default if this function is never called for 'sw'. */
-void
-lswitch_set_queue(struct lswitch *sw, uint32_t queue)
-{
- sw->queue = queue;
-}
-
/* Takes care of necessary 'sw' activity, except for receiving packets (which
* the caller must do). */
void
@@ -220,10 +231,10 @@ lswitch_process_packet(struct lswitch *sw, struct rconn *rconn,
}
}
if (VLOG_IS_DBG_ENABLED()) {
- char *p = ofp_to_string(msg->data, msg->size, 2);
+ char *s = ofp_to_string(msg->data, msg->size, 2);
VLOG_DBG_RL(&rl, "%016llx: OpenFlow packet ignored: %s",
- sw->datapath_id, p);
- free(p);
+ sw->datapath_id, s);
+ free(s);
}
}
@@ -249,53 +260,6 @@ send_features_request(struct lswitch *sw, struct rconn *rconn)
}
static void
-send_default_flows(struct lswitch *sw, struct rconn *rconn,
- FILE *default_flows)
-{
- char line[1024];
-
- while (fgets(line, sizeof line, default_flows)) {
- struct ofpbuf *b;
- struct ofp_flow_mod *ofm;
- uint16_t priority, idle_timeout, hard_timeout;
- uint64_t cookie;
- struct ofp_match match;
-
- char *comment;
-
- /* Delete comments. */
- comment = strchr(line, '#');
- if (comment) {
- *comment = '\0';
- }
-
- /* Drop empty lines. */
- if (line[strspn(line, " \t\n")] == '\0') {
- continue;
- }
-
- /* Parse and send. str_to_flow() will expand and reallocate the data
- * in 'buffer', so we can't keep pointers to across the str_to_flow()
- * call. */
- make_openflow(sizeof *ofm, OFPT_FLOW_MOD, &b);
- parse_ofp_str(line, &match, b,
- NULL, NULL, &priority, &idle_timeout, &hard_timeout,
- &cookie);
- ofm = b->data;
- ofm->match = match;
- ofm->command = htons(OFPFC_ADD);
- ofm->cookie = htonll(cookie);
- ofm->idle_timeout = htons(idle_timeout);
- ofm->hard_timeout = htons(hard_timeout);
- ofm->buffer_id = htonl(UINT32_MAX);
- ofm->priority = htons(priority);
-
- update_openflow_length(b);
- queue_tx(sw, rconn, b);
- }
-}
-
-static void
queue_tx(struct lswitch *sw, struct rconn *rconn, struct ofpbuf *b)
{
int retval = rconn_send_with_limit(rconn, b, sw->queued, 10);
@@ -316,8 +280,28 @@ process_switch_features(struct lswitch *sw, struct rconn *rconn OVS_UNUSED,
void *osf_)
{
struct ofp_switch_features *osf = osf_;
+ size_t n_ports;
+ size_t i;
+
+ if (check_ofp_message_array(&osf->header, OFPT_FEATURES_REPLY,
+ sizeof *osf, sizeof *osf->ports, &n_ports)) {
+ return;
+ }
sw->datapath_id = ntohll(osf->datapath_id);
+
+ for (i = 0; i < n_ports; i++) {
+ struct ofp_phy_port *opp = &osf->ports[i];
+ struct lswitch_port *lp;
+
+ opp->name[OFP_MAX_PORT_NAME_LEN - 1] = '\0';
+ lp = shash_find_data(&sw->queue_names, (char *) opp->name);
+ if (lp && hmap_node_is_null(&lp->hmap_node)) {
+ lp->port_no = ntohs(opp->port_no);
+ hmap_insert(&sw->queue_numbers, &lp->hmap_node,
+ hash_int(lp->port_no, 0));
+ }
+ }
}
static uint16_t
@@ -360,11 +344,27 @@ lswitch_choose_destination(struct lswitch *sw, const flow_t *flow)
return out_port;
}
+static uint32_t
+get_queue_id(const struct lswitch *sw, uint16_t in_port)
+{
+ const struct lswitch_port *port;
+
+ HMAP_FOR_EACH_WITH_HASH (port, hmap_node, hash_int(in_port, 0),
+ &sw->queue_numbers) {
+ if (port->port_no == in_port) {
+ return port->queue_id;
+ }
+ }
+
+ return sw->default_queue;
+}
+
static void
process_packet_in(struct lswitch *sw, struct rconn *rconn, void *opi_)
{
struct ofp_packet_in *opi = opi_;
uint16_t in_port = ntohs(opi->in_port);
+ uint32_t queue_id;
uint16_t out_port;
struct ofp_action_header actions[2];
@@ -392,9 +392,10 @@ process_packet_in(struct lswitch *sw, struct rconn *rconn, void *opi_)
out_port = lswitch_choose_destination(sw, &flow);
/* Make actions. */
+ queue_id = get_queue_id(sw, in_port);
if (out_port == OFPP_NONE) {
actions_len = 0;
- } else if (sw->queue == UINT32_MAX || out_port >= OFPP_MAX) {
+ } else if (queue_id == UINT32_MAX || out_port >= OFPP_MAX) {
struct ofp_action_output oao;
memset(&oao, 0, sizeof oao);
@@ -411,7 +412,7 @@ process_packet_in(struct lswitch *sw, struct rconn *rconn, void *opi_)
oae.type = htons(OFPAT_ENQUEUE);
oae.len = htons(sizeof oae);
oae.port = htons(out_port);
- oae.queue_id = htonl(sw->queue);
+ oae.queue_id = htonl(queue_id);
memcpy(actions, &oae, sizeof oae);
actions_len = sizeof oae;
diff --git a/lib/learning-switch.h b/lib/learning-switch.h
index 96707b842..d0892576a 100644
--- a/lib/learning-switch.h
+++ b/lib/learning-switch.h
@@ -24,9 +24,37 @@
struct ofpbuf;
struct rconn;
-struct lswitch *lswitch_create(struct rconn *, bool learn_macs,
- bool exact_flows, int max_idle,
- bool action_normal, FILE *default_flows);
+enum lswitch_mode {
+ LSW_NORMAL, /* Always use OFPP_NORMAL. */
+ LSW_FLOOD, /* Always use OFPP_FLOOD. */
+ LSW_LEARN /* Learn MACs at controller. */
+};
+
+struct lswitch_config {
+ enum lswitch_mode mode;
+
+ /* Set up only exact-match flows? */
+ bool exact_flows;
+
+ /* <0: Process every packet at the controller.
+ * >=0: Expire flows after they are unused for 'max_idle' seconds.
+ * OFP_FLOW_PERMANENT: Set up permanent flows. */
+ int max_idle;
+
+ /* Optionally, a chain of one or more OpenFlow messages to send to the
+ * switch at time of connection. Presumably these will be OFPT_FLOW_MOD
+ * requests to set up the flow table. */
+ const struct ofpbuf *default_flows;
+
+ /* The OpenFlow queue to use by default. Use UINT32_MAX to avoid
+ * specifying a particular queue. */
+ uint32_t default_queue;
+
+ /* Maps from a port name to a queue_id (cast to void *). */
+ const struct shash *port_queues;
+};
+
+struct lswitch *lswitch_create(struct rconn *, const struct lswitch_config *);
void lswitch_set_queue(struct lswitch *sw, uint32_t queue);
void lswitch_run(struct lswitch *);
void lswitch_wait(struct lswitch *);
diff --git a/lib/list.h b/lib/list.h
index 59e3e86cb..997298270 100644
--- a/lib/list.h
+++ b/lib/list.h
@@ -57,18 +57,18 @@ struct list *list_back(struct list *);
size_t list_size(const struct list *);
bool list_is_empty(const struct list *);
-#define LIST_FOR_EACH(ITER, STRUCT, MEMBER, LIST) \
- for (ITER = CONTAINER_OF((LIST)->next, STRUCT, MEMBER); \
- &(ITER)->MEMBER != (LIST); \
- ITER = CONTAINER_OF((ITER)->MEMBER.next, STRUCT, MEMBER))
-#define LIST_FOR_EACH_REVERSE(ITER, STRUCT, MEMBER, LIST) \
- for (ITER = CONTAINER_OF((LIST)->prev, STRUCT, MEMBER); \
- &(ITER)->MEMBER != (LIST); \
- ITER = CONTAINER_OF((ITER)->MEMBER.prev, STRUCT, MEMBER))
-#define LIST_FOR_EACH_SAFE(ITER, NEXT, STRUCT, MEMBER, LIST) \
- for (ITER = CONTAINER_OF((LIST)->next, STRUCT, MEMBER); \
- (NEXT = CONTAINER_OF((ITER)->MEMBER.next, STRUCT, MEMBER), \
- &(ITER)->MEMBER != (LIST)); \
+#define LIST_FOR_EACH(ITER, MEMBER, LIST) \
+ for (ITER = OBJECT_CONTAINING((LIST)->next, ITER, MEMBER); \
+ &(ITER)->MEMBER != (LIST); \
+ ITER = OBJECT_CONTAINING((ITER)->MEMBER.next, ITER, MEMBER))
+#define LIST_FOR_EACH_REVERSE(ITER, MEMBER, LIST) \
+ for (ITER = OBJECT_CONTAINING((LIST)->prev, ITER, MEMBER); \
+ &(ITER)->MEMBER != (LIST); \
+ ITER = OBJECT_CONTAINING((ITER)->MEMBER.prev, ITER, MEMBER))
+#define LIST_FOR_EACH_SAFE(ITER, NEXT, MEMBER, LIST) \
+ for (ITER = OBJECT_CONTAINING((LIST)->next, ITER, MEMBER); \
+ (NEXT = OBJECT_CONTAINING((ITER)->MEMBER.next, ITER, MEMBER), \
+ &(ITER)->MEMBER != (LIST)); \
ITER = NEXT)
#ifdef __cplusplus
diff --git a/lib/lockfile.c b/lib/lockfile.c
index 84bfb3c04..690caf9f2 100644
--- a/lib/lockfile.c
+++ b/lib/lockfile.c
@@ -151,7 +151,7 @@ lockfile_postfork(void)
{
struct lockfile *lockfile;
- HMAP_FOR_EACH (lockfile, struct lockfile, hmap_node, &lock_table) {
+ HMAP_FOR_EACH (lockfile, hmap_node, &lock_table) {
if (lockfile->fd >= 0) {
VLOG_WARN("%s: child does not inherit lock", lockfile->name);
lockfile_unhash(lockfile);
@@ -171,7 +171,7 @@ lockfile_find(dev_t device, ino_t inode)
{
struct lockfile *lockfile;
- HMAP_FOR_EACH_WITH_HASH (lockfile, struct lockfile, hmap_node,
+ HMAP_FOR_EACH_WITH_HASH (lockfile, hmap_node,
lockfile_hash(device, inode), &lock_table) {
if (lockfile->device == device && lockfile->inode == inode) {
return lockfile;
diff --git a/lib/mac-learning.c b/lib/mac-learning.c
index bd0ecec09..161412990 100644
--- a/lib/mac-learning.c
+++ b/lib/mac-learning.c
@@ -79,7 +79,7 @@ search_bucket(struct list *bucket, const uint8_t mac[ETH_ADDR_LEN],
uint16_t vlan)
{
struct mac_entry *e;
- LIST_FOR_EACH (e, struct mac_entry, hash_node, bucket) {
+ LIST_FOR_EACH (e, hash_node, bucket) {
if (eth_addr_equals(e->mac, mac) && e->vlan == vlan) {
return e;
}
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index e6036bfc5..2ea411dcd 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -48,6 +48,8 @@
#include "coverage.h"
#include "dynamic-string.h"
#include "fatal-signal.h"
+#include "hash.h"
+#include "hmap.h"
#include "netdev-provider.h"
#include "netdev-vport.h"
#include "netlink.h"
@@ -55,7 +57,6 @@
#include "openflow/openflow.h"
#include "packets.h"
#include "poll-loop.h"
-#include "port-array.h"
#include "rtnetlink.h"
#include "socket-util.h"
#include "shash.h"
@@ -102,19 +103,24 @@ struct tap_state {
/* Traffic control. */
/* An instance of a traffic control class. Always associated with a particular
- * network device. */
+ * network device.
+ *
+ * Each TC implementation subclasses this with whatever additional data it
+ * needs. */
struct tc {
const struct tc_ops *ops;
+ struct hmap queues; /* Contains "struct tc_queue"s.
+ * Read by generic TC layer.
+ * Written only by TC implementation. */
+};
- /* Maps from queue ID to tc-specific data.
- *
- * The generic netdev TC layer uses this to the following extent: if an
- * entry is nonnull, then the queue whose ID is the index is assumed to
- * exist; if an entry is null, then that queue is assumed not to exist.
- * Implementations must adhere to this scheme, although they may store
- * whatever they like as data.
- */
- struct port_array queues;
+/* One traffic control queue.
+ *
+ * Each TC implementation subclasses this with whatever additional data it
+ * needs. */
+struct tc_queue {
+ struct hmap_node hmap_node; /* In struct tc's "queues" hmap. */
+ unsigned int queue_id; /* OpenFlow queue ID. */
};
/* A particular kind of traffic control. Each implementation generally maps to
@@ -204,8 +210,8 @@ struct tc_ops {
*/
int (*qdisc_set)(struct netdev *, const struct shash *details);
- /* Retrieves details of 'queue_id' on 'netdev->tc' into 'details'. The
- * caller ensures that 'queues' has a nonnull value for index 'queue_id.
+ /* Retrieves details of 'queue' on 'netdev->tc' into 'details'. 'queue' is
+ * one of the 'struct tc_queue's within 'netdev->tc->queues'.
*
* The contents of 'details' should be documented as valid for 'ovs_name'
* in the "other_config" column in the "Queue" table in
@@ -217,7 +223,7 @@ struct tc_ops {
*
* This function may be null if 'tc' does not have queues ('n_queues' is
* 0). */
- int (*class_get)(const struct netdev *netdev, unsigned int queue_id,
+ int (*class_get)(const struct netdev *netdev, const struct tc_queue *queue,
struct shash *details);
/* Configures or reconfigures 'queue_id' on 'netdev->tc' according to
@@ -234,21 +240,22 @@ struct tc_ops {
int (*class_set)(struct netdev *, unsigned int queue_id,
const struct shash *details);
- /* Deletes 'queue_id' from 'netdev->tc'. The caller ensures that 'queues'
- * has a nonnull value for index 'queue_id.
+ /* Deletes 'queue' from 'netdev->tc'. 'queue' is one of the 'struct
+ * tc_queue's within 'netdev->tc->queues'.
*
* This function may be null if 'tc' does not have queues or its queues
* cannot be deleted. */
- int (*class_delete)(struct netdev *, unsigned int queue_id);
+ int (*class_delete)(struct netdev *, struct tc_queue *queue);
- /* Obtains stats for 'queue' from 'netdev->tc'. The caller ensures that
- * 'queues' has a nonnull value for index 'queue_id.
+ /* Obtains stats for 'queue' from 'netdev->tc'. 'queue' is one of the
+ * 'struct tc_queue's within 'netdev->tc->queues'.
*
* On success, initializes '*stats'.
*
* This function may be null if 'tc' does not have queues or if it cannot
* report queue statistics. */
- int (*class_get_stats)(const struct netdev *netdev, unsigned int queue_id,
+ int (*class_get_stats)(const struct netdev *netdev,
+ const struct tc_queue *queue,
struct netdev_queue_stats *stats);
/* Extracts queue stats from 'nlmsg', which is a response to a
@@ -265,13 +272,13 @@ static void
tc_init(struct tc *tc, const struct tc_ops *ops)
{
tc->ops = ops;
- port_array_init(&tc->queues);
+ hmap_init(&tc->queues);
}
static void
tc_destroy(struct tc *tc)
{
- port_array_destroy(&tc->queues);
+ hmap_destroy(&tc->queues);
}
static const struct tc_ops tc_ops_htb;
@@ -1468,6 +1475,28 @@ tc_lookup_linux_name(const char *name)
return NULL;
}
+static struct tc_queue *
+tc_find_queue__(const struct netdev *netdev, unsigned int queue_id,
+ size_t hash)
+{
+ struct netdev_dev_linux *netdev_dev =
+ netdev_dev_linux_cast(netdev_get_dev(netdev));
+ struct tc_queue *queue;
+
+ HMAP_FOR_EACH_IN_BUCKET (queue, hmap_node, hash, &netdev_dev->tc->queues) {
+ if (queue->queue_id == queue_id) {
+ return queue;
+ }
+ }
+ return NULL;
+}
+
+static struct tc_queue *
+tc_find_queue(const struct netdev *netdev, unsigned int queue_id)
+{
+ return tc_find_queue__(netdev, queue_id, hash_int(queue_id, 0));
+}
+
static int
netdev_linux_get_qos_capabilities(const struct netdev *netdev OVS_UNUSED,
const char *type,
@@ -1548,12 +1577,12 @@ netdev_linux_get_queue(const struct netdev *netdev,
error = tc_query_qdisc(netdev);
if (error) {
return error;
- } else if (queue_id > UINT16_MAX
- || !port_array_get(&netdev_dev->tc->queues, queue_id)) {
- return ENOENT;
+ } else {
+ struct tc_queue *queue = tc_find_queue(netdev, queue_id);
+ return (queue
+ ? netdev_dev->tc->ops->class_get(netdev, queue, details)
+ : ENOENT);
}
-
- return netdev_dev->tc->ops->class_get(netdev, queue_id, details);
}
static int
@@ -1587,12 +1616,12 @@ netdev_linux_delete_queue(struct netdev *netdev, unsigned int queue_id)
return error;
} else if (!netdev_dev->tc->ops->class_delete) {
return EINVAL;
- } else if (queue_id > UINT16_MAX
- || !port_array_get(&netdev_dev->tc->queues, queue_id)) {
- return ENOENT;
+ } else {
+ struct tc_queue *queue = tc_find_queue(netdev, queue_id);
+ return (queue
+ ? netdev_dev->tc->ops->class_delete(netdev, queue)
+ : ENOENT);
}
-
- return netdev_dev->tc->ops->class_delete(netdev, queue_id);
}
static int
@@ -1607,14 +1636,14 @@ netdev_linux_get_queue_stats(const struct netdev *netdev,
error = tc_query_qdisc(netdev);
if (error) {
return error;
- } else if (queue_id > UINT16_MAX
- || !port_array_get(&netdev_dev->tc->queues, queue_id)) {
- return ENOENT;
} else if (!netdev_dev->tc->ops->class_get_stats) {
return EOPNOTSUPP;
+ } else {
+ const struct tc_queue *queue = tc_find_queue(netdev, queue_id);
+ return (queue
+ ? netdev_dev->tc->ops->class_get_stats(netdev, queue, stats)
+ : ENOENT);
}
-
- return netdev_dev->tc->ops->class_get_stats(netdev, queue_id, stats);
}
static void
@@ -1635,10 +1664,9 @@ netdev_linux_dump_queues(const struct netdev *netdev,
{
struct netdev_dev_linux *netdev_dev =
netdev_dev_linux_cast(netdev_get_dev(netdev));
- unsigned int queue_id;
+ struct tc_queue *queue;
struct shash details;
int last_error;
- void *queue;
int error;
error = tc_query_qdisc(netdev);
@@ -1650,12 +1678,12 @@ netdev_linux_dump_queues(const struct netdev *netdev,
last_error = 0;
shash_init(&details);
- PORT_ARRAY_FOR_EACH (queue, &netdev_dev->tc->queues, queue_id) {
+ HMAP_FOR_EACH (queue, hmap_node, &netdev_dev->tc->queues) {
shash_clear(&details);
- error = netdev_dev->tc->ops->class_get(netdev, queue_id, &details);
+ error = netdev_dev->tc->ops->class_get(netdev, queue, &details);
if (!error) {
- (*cb)(queue_id, &details, aux);
+ (*cb)(queue->queue_id, &details, aux);
} else {
last_error = error;
}
@@ -1779,12 +1807,12 @@ netdev_linux_get_in6(const struct netdev *netdev_, struct in6_addr *in6)
if (file != NULL) {
const char *name = netdev_get_name(netdev_);
while (fgets(line, sizeof line, file)) {
- struct in6_addr in6;
+ struct in6_addr in6_tmp;
char ifname[16 + 1];
- if (parse_if_inet6_line(line, &in6, ifname)
+ if (parse_if_inet6_line(line, &in6_tmp, ifname)
&& !strcmp(name, ifname))
{
- netdev_dev->in6 = in6;
+ netdev_dev->in6 = in6_tmp;
break;
}
}
@@ -1982,7 +2010,7 @@ static void
poll_notify(struct list *list)
{
struct netdev_linux_notifier *notifier;
- LIST_FOR_EACH (notifier, struct netdev_linux_notifier, node, list) {
+ LIST_FOR_EACH (notifier, node, list) {
struct netdev_notifier *n = &notifier->notifier;
n->cb(n);
}
@@ -2191,6 +2219,7 @@ struct htb {
};
struct htb_class {
+ struct tc_queue tc_queue;
unsigned int min_rate; /* In bytes/s. */
unsigned int max_rate; /* In bytes/s. */
unsigned int burst; /* In bytes. */
@@ -2454,19 +2483,35 @@ htb_tc_install(struct netdev *netdev, const struct shash *details)
return error;
}
+static struct htb_class *
+htb_class_cast__(const struct tc_queue *queue)
+{
+ return CONTAINER_OF(queue, struct htb_class, tc_queue);
+}
+
static void
htb_update_queue__(struct netdev *netdev, unsigned int queue_id,
const struct htb_class *hc)
{
struct htb *htb = htb_get__(netdev);
+ size_t hash = hash_int(queue_id, 0);
+ struct tc_queue *queue;
struct htb_class *hcp;
- hcp = port_array_get(&htb->tc.queues, queue_id);
- if (!hcp) {
+ queue = tc_find_queue__(netdev, queue_id, hash);
+ if (queue) {
+ hcp = htb_class_cast__(queue);
+ } else {
hcp = xmalloc(sizeof *hcp);
- port_array_set(&htb->tc.queues, queue_id, hcp);
+ queue = &hcp->tc_queue;
+ queue->queue_id = queue_id;
+ hmap_insert(&htb->tc.queues, &queue->hmap_node, hash);
}
- *hcp = *hc;
+
+ hcp->min_rate = hc->min_rate;
+ hcp->max_rate = hc->max_rate;
+ hcp->burst = hc->burst;
+ hcp->priority = hc->priority;
}
static int
@@ -2502,10 +2547,10 @@ static void
htb_tc_destroy(struct tc *tc)
{
struct htb *htb = CONTAINER_OF(tc, struct htb, tc);
- unsigned int queue_id;
- struct htb_class *hc;
+ struct htb_class *hc, *next;
- PORT_ARRAY_FOR_EACH (hc, &htb->tc.queues, queue_id) {
+ HMAP_FOR_EACH_SAFE (hc, next, tc_queue.hmap_node, &htb->tc.queues) {
+ hmap_remove(&htb->tc.queues, &hc->tc_queue.hmap_node);
free(hc);
}
tc_destroy(tc);
@@ -2536,14 +2581,10 @@ htb_qdisc_set(struct netdev *netdev, const struct shash *details)
}
static int
-htb_class_get(const struct netdev *netdev, unsigned int queue_id,
- struct shash *details)
+htb_class_get(const struct netdev *netdev OVS_UNUSED,
+ const struct tc_queue *queue, struct shash *details)
{
- const struct htb *htb = htb_get__(netdev);
- const struct htb_class *hc;
-
- hc = port_array_get(&htb->tc.queues, queue_id);
- assert(hc != NULL);
+ const struct htb_class *hc = htb_class_cast__(queue);
shash_add(details, "min-rate", xasprintf("%llu", 8ULL * hc->min_rate));
if (hc->min_rate != hc->max_rate) {
@@ -2579,28 +2620,25 @@ htb_class_set(struct netdev *netdev, unsigned int queue_id,
}
static int
-htb_class_delete(struct netdev *netdev, unsigned int queue_id)
+htb_class_delete(struct netdev *netdev, struct tc_queue *queue)
{
+ struct htb_class *hc = htb_class_cast__(queue);
struct htb *htb = htb_get__(netdev);
- struct htb_class *hc;
int error;
- hc = port_array_get(&htb->tc.queues, queue_id);
- assert(hc != NULL);
-
- error = tc_delete_class(netdev, tc_make_handle(1, queue_id + 1));
+ error = tc_delete_class(netdev, tc_make_handle(1, queue->queue_id + 1));
if (!error) {
+ hmap_remove(&htb->tc.queues, &hc->tc_queue.hmap_node);
free(hc);
- port_array_delete(&htb->tc.queues, queue_id);
}
return error;
}
static int
-htb_class_get_stats(const struct netdev *netdev, unsigned int queue_id,
+htb_class_get_stats(const struct netdev *netdev, const struct tc_queue *queue,
struct netdev_queue_stats *stats)
{
- return htb_query_class__(netdev, tc_make_handle(1, queue_id + 1),
+ return htb_query_class__(netdev, tc_make_handle(1, queue->queue_id + 1),
tc_make_handle(1, 0xfffe), NULL, stats);
}
@@ -2621,7 +2659,7 @@ htb_class_dump_stats(const struct netdev *netdev OVS_UNUSED,
major = tc_get_major(handle);
minor = tc_get_minor(handle);
if (major == 1 && minor > 0 && minor <= HTB_N_QUEUES) {
- (*cb)(tc_get_minor(handle), &stats, aux);
+ (*cb)(minor - 1, &stats, aux);
}
return 0;
}
diff --git a/lib/netdev-tunnel.c b/lib/netdev-tunnel.c
index 9ce0f74dc..de3f882cb 100644
--- a/lib/netdev-tunnel.c
+++ b/lib/netdev-tunnel.c
@@ -62,10 +62,13 @@ parse_config(const char *name, const char *type, const struct shash *args,
struct tnl_port_config *config)
{
struct shash_node *node;
+ bool ipsec_ip_set = false;
+ bool ipsec_mech_set = false;
memset(config, 0, sizeof *config);
config->flags |= TNL_F_PMTUD;
+ config->flags |= TNL_F_HDR_CACHE;
SHASH_FOR_EACH (node, args) {
if (!strcmp(node->name, "remote_ip")) {
@@ -121,11 +124,28 @@ parse_config(const char *name, const char *type, const struct shash *args,
if (!strcmp(node->data, "false")) {
config->flags &= ~TNL_F_PMTUD;
}
+ } else if (!strcmp(node->name, "header_cache")) {
+ if (!strcmp(node->data, "false")) {
+ config->flags &= ~TNL_F_HDR_CACHE;
+ }
+ } else if (!strcmp(node->name, "ipsec_local_ip")) {
+ ipsec_ip_set = true;
+ } else if (!strcmp(node->name, "ipsec_cert")
+ || !strcmp(node->name, "ipsec_psk")) {
+ ipsec_mech_set = true;
} else {
VLOG_WARN("%s: unknown %s argument '%s'", name, type, node->name);
}
}
+ /* IPsec doesn't work when header caching is enabled. Disable it if
+ * the IPsec local IP address and authentication mechanism have been
+ * defined. */
+ if (ipsec_ip_set && ipsec_mech_set) {
+ VLOG_INFO("%s: header caching disabled due to use of IPsec", name);
+ config->flags &= ~TNL_F_HDR_CACHE;
+ }
+
if (!config->daddr) {
VLOG_WARN("%s: %s type requires valid 'remote_ip' argument", name, type);
return EINVAL;
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 71ed072a1..c362ac518 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -269,8 +269,7 @@ netdev_vport_poll_notify(const struct netdev *netdev)
if (list) {
struct netdev_vport_notifier *notifier;
- LIST_FOR_EACH (notifier, struct netdev_vport_notifier,
- list_node, list) {
+ LIST_FOR_EACH (notifier, list_node, list) {
struct netdev_notifier *n = &notifier->notifier;
n->cb(n);
}
diff --git a/lib/netdev.c b/lib/netdev.c
index 1816c1527..3ee5caca1 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -285,7 +285,7 @@ create_device(struct netdev_options *options, struct netdev_dev **netdev_devp)
* to the new network device, otherwise to null.
*
* If this is the first time the device has been opened, then create is called
- * before opening. The device is created using the given type and arguments.
+ * before opening. The device is created using the given type and arguments.
*
* 'ethertype' may be a 16-bit Ethernet protocol value in host byte order to
* capture frames of that type received on the device. It may also be one of
@@ -474,8 +474,7 @@ netdev_enumerate(struct svec *svec)
* be returned.
*
* Some network devices may not implement support for this function. In such
- * cases this function will always return EOPNOTSUPP.
- */
+ * cases this function will always return EOPNOTSUPP. */
int
netdev_recv(struct netdev *netdev, struct ofpbuf *buffer)
{
@@ -641,8 +640,7 @@ netdev_get_ifindex(const struct netdev *netdev)
* passed-in values are set to 0.
*
* Some network devices may not implement support for this function. In such
- * cases this function will always return EOPNOTSUPP.
- */
+ * cases this function will always return EOPNOTSUPP. */
int
netdev_get_features(struct netdev *netdev,
uint32_t *current, uint32_t *advertised,
@@ -727,8 +725,8 @@ netdev_set_advertisements(struct netdev *netdev, uint32_t advertise)
*
* - EOPNOTSUPP: No IPv4 network stack attached to 'netdev'.
*
- * 'address' or 'netmask' or both may be null, in which case the address or netmask
- * is not reported. */
+ * 'address' or 'netmask' or both may be null, in which case the address or
+ * netmask is not reported. */
int
netdev_get_in4(const struct netdev *netdev,
struct in_addr *address_, struct in_addr *netmask_)
@@ -1159,8 +1157,7 @@ netdev_get_queue(const struct netdev *netdev,
* the current form of QoS (e.g. as returned by netdev_get_n_queues(netdev)).
*
* This function does not modify 'details', and the caller retains ownership of
- * it.
- */
+ * it. */
int
netdev_set_queue(struct netdev *netdev,
unsigned int queue_id, const struct shash *details)
@@ -1553,8 +1550,7 @@ netdev_monitor_remove(struct netdev_monitor *monitor, struct netdev *netdev)
* sets '*devnamep' to the name of a device that has changed and returns 0.
* The caller is responsible for freeing '*devnamep' (with free()).
*
- * If no devices have changed, sets '*devnamep' to NULL and returns EAGAIN.
- */
+ * If no devices have changed, sets '*devnamep' to NULL and returns EAGAIN. */
int
netdev_monitor_poll(struct netdev_monitor *monitor, char **devnamep)
{
@@ -1563,8 +1559,7 @@ netdev_monitor_poll(struct netdev_monitor *monitor, char **devnamep)
*devnamep = NULL;
return EAGAIN;
} else {
- *devnamep = xstrdup(node->name);
- shash_delete(&monitor->changed_netdevs, node);
+ *devnamep = shash_steal(&monitor->changed_netdevs, node);
return 0;
}
}
@@ -1607,7 +1602,7 @@ static void
close_all_netdevs(void *aux OVS_UNUSED)
{
struct netdev *netdev, *next;
- LIST_FOR_EACH_SAFE(netdev, next, struct netdev, node, &netdev_list) {
+ LIST_FOR_EACH_SAFE(netdev, next, node, &netdev_list) {
netdev_close(netdev);
}
}
diff --git a/lib/netlink.c b/lib/netlink.c
index 4e83747cc..66c27b1fb 100644
--- a/lib/netlink.c
+++ b/lib/netlink.c
@@ -1036,19 +1036,19 @@ nl_policy_parse(const struct ofpbuf *msg, size_t nla_offset,
type = nla->nla_type;
if (type < n_attrs && policy[type].type != NL_A_NO_ATTR) {
- const struct nl_policy *p = &policy[type];
+ const struct nl_policy *e = &policy[type];
size_t min_len, max_len;
/* Validate length and content. */
- min_len = p->min_len ? p->min_len : attr_len_range[p->type][0];
- max_len = p->max_len ? p->max_len : attr_len_range[p->type][1];
+ min_len = e->min_len ? e->min_len : attr_len_range[e->type][0];
+ max_len = e->max_len ? e->max_len : attr_len_range[e->type][1];
if (len < min_len || len > max_len) {
VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" length %zu not in "
"allowed range %zu...%zu",
offset, type, len, min_len, max_len);
return false;
}
- if (p->type == NL_A_STRING) {
+ if (e->type == NL_A_STRING) {
if (((char *) nla)[nla->nla_len - 1]) {
VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" lacks null at end",
offset, type);
@@ -1060,7 +1060,7 @@ nl_policy_parse(const struct ofpbuf *msg, size_t nla_offset,
return false;
}
}
- if (!p->optional && attrs[type] == NULL) {
+ if (!e->optional && attrs[type] == NULL) {
assert(n_required > 0);
--n_required;
}
diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c
index a17d75c04..01dae0ca8 100644
--- a/lib/ofp-parse.c
+++ b/lib/ofp-parse.c
@@ -21,6 +21,7 @@
#include <errno.h>
#include <stdlib.h>
+#include "dynamic-string.h"
#include "netdev.h"
#include "ofp-util.h"
#include "ofpbuf.h"
@@ -29,12 +30,10 @@
#include "socket-util.h"
#include "vconn.h"
#include "vlog.h"
-
+#include "xtoxll.h"
VLOG_DEFINE_THIS_MODULE(ofp_parse)
-#define DEFAULT_IDLE_TIMEOUT 60
-
static uint32_t
str_to_u32(const char *str)
{
@@ -263,16 +262,32 @@ str_to_action(char *str, struct ofpbuf *b)
nast->vendor = htonl(NX_VENDOR_ID);
nast->subtype = htons(NXAST_SET_TUNNEL);
nast->tun_id = htonl(str_to_u32(arg));
+ } else if (!strcasecmp(act, "drop_spoofed_arp")) {
+ struct nx_action_header *nah;
+ nah = put_action(b, sizeof *nah, OFPAT_VENDOR);
+ nah->vendor = htonl(NX_VENDOR_ID);
+ nah->subtype = htons(NXAST_DROP_SPOOFED_ARP);
+ } else if (!strcasecmp(act, "set_queue")) {
+ struct nx_action_set_queue *nasq;
+ nasq = put_action(b, sizeof *nasq, OFPAT_VENDOR);
+ nasq->vendor = htonl(NX_VENDOR_ID);
+ nasq->subtype = htons(NXAST_SET_QUEUE);
+ nasq->queue_id = htonl(str_to_u32(arg));
+ } else if (!strcasecmp(act, "pop_queue")) {
+ struct nx_action_header *nah;
+ nah = put_action(b, sizeof *nah, OFPAT_VENDOR);
+ nah->vendor = htonl(NX_VENDOR_ID);
+ nah->subtype = htons(NXAST_POP_QUEUE);
} else if (!strcasecmp(act, "output")) {
put_output_action(b, str_to_u32(arg));
} else if (!strcasecmp(act, "enqueue")) {
char *sp = NULL;
- char *port = strtok_r(arg, ":q", &sp);
+ char *port_s = strtok_r(arg, ":q", &sp);
char *queue = strtok_r(NULL, "", &sp);
- if (port == NULL || queue == NULL) {
+ if (port_s == NULL || queue == NULL) {
ovs_fatal(0, "\"enqueue\" syntax is \"enqueue:PORT:QUEUE\"");
}
- put_enqueue_action(b, str_to_u32(port), str_to_u32(queue));
+ put_enqueue_action(b, str_to_u32(port_s), str_to_u32(queue));
} else if (!strcasecmp(act, "drop")) {
/* A drop action in OpenFlow occurs by just not setting
* an action. */
@@ -397,7 +412,7 @@ parse_ofp_str(char *string, struct ofp_match *match, struct ofpbuf *actions,
*priority = OFP_DEFAULT_PRIORITY;
}
if (idle_timeout) {
- *idle_timeout = DEFAULT_IDLE_TIMEOUT;
+ *idle_timeout = OFP_FLOW_PERMANENT;
}
if (hard_timeout) {
*hard_timeout = OFP_FLOW_PERMANENT;
@@ -466,10 +481,12 @@ parse_ofp_str(char *string, struct ofp_match *match, struct ofpbuf *actions,
if (!strcmp(value, "*") || !strcmp(value, "ANY")) {
wildcards |= f->wildcard;
} else {
+ uint16_t port_no;
+
wildcards &= ~f->wildcard;
if (f->wildcard == OFPFW_IN_PORT
- && parse_port_name(value, (uint16_t *) data)) {
- /* Nothing to do. */
+ && parse_port_name(value, &port_no)) {
+ match->in_port = htons(port_no);
} else if (f->type == F_U8) {
*(uint8_t *) data = str_to_u32(value);
} else if (f->type == F_U16) {
@@ -501,3 +518,68 @@ parse_ofp_str(char *string, struct ofp_match *match, struct ofpbuf *actions,
free(new);
}
}
+
+/* Parses 'string' as a OFPT_FLOW_MOD with subtype OFPFC_ADD and returns an
+ * ofpbuf that contains it. Sets '*table_idx' to the index of the table to
+ * which the flow should be added, or to 0xff if none was specified. */
+struct ofpbuf *
+parse_ofp_add_flow_str(char *string, uint8_t *table_idx)
+{
+ struct ofpbuf *buffer;
+ struct ofp_flow_mod *ofm;
+ uint16_t priority, idle_timeout, hard_timeout;
+ uint64_t cookie;
+ struct ofp_match match;
+
+ /* parse_ofp_str() will expand and reallocate the data in 'buffer', so we
+ * can't keep pointers to across the parse_ofp_str() call. */
+ make_openflow(sizeof *ofm, OFPT_FLOW_MOD, &buffer);
+ parse_ofp_str(string, &match, buffer, table_idx, NULL, &priority,
+ &idle_timeout, &hard_timeout, &cookie);
+ ofm = buffer->data;
+ ofm->match = match;
+ ofm->command = htons(OFPFC_ADD);
+ if (*table_idx != 0xff) {
+ ofm->command |= htons(*table_idx << 8);
+ }
+ ofm->cookie = htonll(cookie);
+ ofm->idle_timeout = htons(idle_timeout);
+ ofm->hard_timeout = htons(hard_timeout);
+ ofm->buffer_id = htonl(UINT32_MAX);
+ ofm->priority = htons(priority);
+ update_openflow_length(buffer);
+
+ return buffer;
+}
+
+/* Parses an OFPT_FLOW_MOD with subtype OFPFC_ADD from 'stream' and returns an
+ * ofpbuf that contains it. Returns a null pointer if end-of-file is reached
+ * before reading a flow. */
+struct ofpbuf *
+parse_ofp_add_flow_file(FILE *stream, uint8_t *table_idx)
+{
+ struct ofpbuf *b = NULL;
+ struct ds s = DS_EMPTY_INITIALIZER;
+
+ while (!ds_get_line(&s, stream)) {
+ char *line = ds_cstr(&s);
+ char *comment;
+
+ /* Delete comments. */
+ comment = strchr(line, '#');
+ if (comment) {
+ *comment = '\0';
+ }
+
+ /* Drop empty lines. */
+ if (line[strspn(line, " \t\n")] == '\0') {
+ continue;
+ }
+
+ b = parse_ofp_add_flow_str(line, table_idx);
+ break;
+ }
+ ds_destroy(&s);
+
+ return b;
+}
diff --git a/lib/ofp-parse.h b/lib/ofp-parse.h
index aa0489c0c..7a44f0b9a 100644
--- a/lib/ofp-parse.h
+++ b/lib/ofp-parse.h
@@ -20,6 +20,7 @@
#define OFP_PARSE_H 1
#include <stdint.h>
+#include <stdio.h>
struct ofp_match;
struct ofpbuf;
@@ -29,5 +30,7 @@ void parse_ofp_str(char *string, struct ofp_match *match,
uint16_t *out_port, uint16_t *priority,
uint16_t *idle_timeout, uint16_t *hard_timeout,
uint64_t *cookie);
+struct ofpbuf *parse_ofp_add_flow_str(char *string, uint8_t *table_idx);
+struct ofpbuf *parse_ofp_add_flow_file(FILE *, uint8_t *table_idx);
#endif /* ofp-parse.h */
diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index e44cf36a1..8de48a381 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -38,6 +38,7 @@
#include "xtoxll.h"
static void ofp_print_port_name(struct ds *string, uint16_t port);
+static void ofp_print_queue_name(struct ds *string, uint32_t port);
/* Returns a string that represents the contents of the Ethernet frame in the
* 'len' bytes starting at 'data' to 'stream' as output by tcpdump.
@@ -200,8 +201,23 @@ ofp_print_nx_action(struct ds *string, const struct nx_action_header *nah)
break;
}
+ case NXAST_DROP_SPOOFED_ARP:
+ ds_put_cstr(string, "drop_spoofed_arp");
+ break;
+
+ case NXAST_SET_QUEUE: {
+ const struct nx_action_set_queue *nasq =
+ (struct nx_action_set_queue *)nah;
+ ds_put_format(string, "set_queue:%u", ntohl(nasq->queue_id));
+ break;
+ }
+
+ case NXAST_POP_QUEUE:
+ ds_put_cstr(string, "pop_queue");
+ break;
+
default:
- ds_put_format(string, "***unknown Nicira action:%d***\n",
+ ds_put_format(string, "***unknown Nicira action:%d***",
ntohs(nah->subtype));
}
}
@@ -748,34 +764,53 @@ ofp_print_flow_mod(struct ds *string, const void *oh, size_t len,
const struct ofp_flow_mod *ofm = oh;
unsigned int command = ntohs(ofm->command);
+ ds_put_char(string, ' ');
ofp_print_match(string, &ofm->match, verbosity);
+ if (ds_last(string) != ' ') {
+ ds_put_char(string, ' ');
+ }
+
switch (command & 0xff) {
case OFPFC_ADD:
- ds_put_cstr(string, " ADD: ");
+ ds_put_cstr(string, "ADD:");
break;
case OFPFC_MODIFY:
- ds_put_cstr(string, " MOD: ");
+ ds_put_cstr(string, "MOD:");
break;
case OFPFC_MODIFY_STRICT:
- ds_put_cstr(string, " MOD_STRICT: ");
+ ds_put_cstr(string, "MOD_STRICT:");
break;
case OFPFC_DELETE:
- ds_put_cstr(string, " DEL: ");
+ ds_put_cstr(string, "DEL:");
break;
case OFPFC_DELETE_STRICT:
- ds_put_cstr(string, " DEL_STRICT: ");
+ ds_put_cstr(string, "DEL_STRICT:");
break;
default:
- ds_put_format(string, " cmd:%u ", command);
+ ds_put_format(string, "cmd:%u", command & 0xff);
}
if (command & 0xff00) {
- ds_put_format(string, "table_id:%u ", command >> 8);
+ ds_put_format(string, " table_id:%u", command >> 8);
+ }
+ if (ofm->cookie != htonll(0)) {
+ ds_put_format(string, " cookie:0x%"PRIx64, ntohll(ofm->cookie));
+ }
+ if (ofm->idle_timeout != htons(OFP_FLOW_PERMANENT)) {
+ ds_put_format(string, " idle:%d", ntohs(ofm->idle_timeout));
}
- ds_put_format(string, "cookie:0x%"PRIx64" idle:%d hard:%d pri:%d "
- "buf:%#x flags:%"PRIx16" ", ntohll(ofm->cookie),
- ntohs(ofm->idle_timeout), ntohs(ofm->hard_timeout),
- ofm->match.wildcards ? ntohs(ofm->priority) : (uint16_t)-1,
- ntohl(ofm->buffer_id), ntohs(ofm->flags));
+ if (ofm->hard_timeout != htons(OFP_FLOW_PERMANENT)) {
+ ds_put_format(string, " hard:%d", ntohs(ofm->hard_timeout));
+ }
+ if (ofm->priority != htons(32768)) {
+ ds_put_format(string, " pri:%"PRIu16, ntohs(ofm->priority));
+ }
+ if (ofm->buffer_id != htonl(UINT32_MAX)) {
+ ds_put_format(string, " buf:%#"PRIx32, ntohl(ofm->buffer_id));
+ }
+ if (ofm->flags != htons(0)) {
+ ds_put_format(string, " flags:%"PRIx16, ntohs(ofm->flags));
+ }
+ ds_put_cstr(string, " ");
ofp_print_actions(string, ofm->actions,
len - offsetof(struct ofp_flow_mod, actions));
ds_put_char(string, '\n');
@@ -805,11 +840,15 @@ ofp_print_flow_removed(struct ds *string, const void *oh,
ds_put_format(string, "**%"PRIu8"**", ofr->reason);
break;
}
- ds_put_format(string,
- " cookie0x%"PRIx64" pri%"PRIu16" secs%"PRIu32" nsecs%"PRIu32
+
+ if (ofr->cookie != htonll(0)) {
+ ds_put_format(string, " cookie:0x%"PRIx64, ntohll(ofr->cookie));
+ }
+ if (ofr->priority != htons(32768)) {
+ ds_put_format(string, " pri:%"PRIu16, ntohs(ofr->priority));
+ }
+ ds_put_format(string, " secs%"PRIu32" nsecs%"PRIu32
" idle%"PRIu16" pkts%"PRIu64" bytes%"PRIu64"\n",
- ntohll(ofr->cookie),
- ofr->match.wildcards ? ntohs(ofr->priority) : (uint16_t)-1,
ntohl(ofr->duration_sec), ntohl(ofr->duration_nsec),
ntohs(ofr->idle_timeout), ntohll(ofr->packet_count),
ntohll(ofr->byte_count));
@@ -1166,6 +1205,53 @@ ofp_table_stats_reply(struct ds *string, const void *body, size_t len,
}
static void
+ofp_print_queue_name(struct ds *string, uint32_t queue_id)
+{
+ if (queue_id == OFPQ_ALL) {
+ ds_put_cstr(string, "ALL");
+ } else {
+ ds_put_format(string, "%"PRIu32, queue_id);
+ }
+}
+
+static void
+ofp_queue_stats_request(struct ds *string, const void *body_,
+ size_t len OVS_UNUSED, int verbosity OVS_UNUSED)
+{
+ const struct ofp_queue_stats_request *qsr = body_;
+
+ ds_put_cstr(string, "port=");
+ ofp_print_port_name(string, ntohs(qsr->port_no));
+
+ ds_put_cstr(string, " queue=");
+ ofp_print_queue_name(string, ntohl(qsr->queue_id));
+}
+
+static void
+ofp_queue_stats_reply(struct ds *string, const void *body, size_t len,
+ int verbosity)
+{
+ const struct ofp_queue_stats *qs = body;
+ size_t n = len / sizeof *qs;
+ ds_put_format(string, " %zu queues\n", n);
+ if (verbosity < 1) {
+ return;
+ }
+
+ for (; n--; qs++) {
+ ds_put_cstr(string, " port ");
+ ofp_print_port_name(string, ntohs(qs->port_no));
+ ds_put_cstr(string, " queue ");
+ ofp_print_queue_name(string, ntohl(qs->queue_id));
+ ds_put_cstr(string, ": ");
+
+ print_port_stat(string, "bytes=", ntohll(qs->tx_bytes), 1);
+ print_port_stat(string, "pkts=", ntohll(qs->tx_packets), 1);
+ print_port_stat(string, "errors=", ntohll(qs->tx_errors), 0);
+ }
+}
+
+static void
vendor_stat(struct ds *string, const void *body, size_t len,
int verbosity OVS_UNUSED)
{
@@ -1235,6 +1321,14 @@ print_stats(struct ds *string, int type, const void *body, size_t body_len,
{ 0, SIZE_MAX, ofp_port_stats_reply },
},
{
+ OFPST_QUEUE,
+ "queue",
+ { sizeof(struct ofp_queue_stats_request),
+ sizeof(struct ofp_queue_stats_request),
+ ofp_queue_stats_request },
+ { 0, SIZE_MAX, ofp_queue_stats_reply },
+ },
+ {
OFPST_VENDOR,
"vendor-specific",
{ sizeof(uint32_t), SIZE_MAX, vendor_stat },
diff --git a/lib/ofp-util.c b/lib/ofp-util.c
index d527d430e..6e0e41ff9 100644
--- a/lib/ofp-util.c
+++ b/lib/ofp-util.c
@@ -438,6 +438,21 @@ check_ofp_packet_out(const struct ofp_header *oh, struct ofpbuf *data,
return 0;
}
+struct ofpbuf *
+make_nxt_flow_mod_table_id(bool enable)
+{
+ struct nxt_flow_mod_table_id *flow_mod_table_id;
+ struct ofpbuf *buffer;
+
+ flow_mod_table_id = make_openflow(sizeof *flow_mod_table_id, OFPT_VENDOR,
+ &buffer);
+
+ flow_mod_table_id->vendor = htonl(NX_VENDOR_ID);
+ flow_mod_table_id->subtype = htonl(NXT_FLOW_MOD_TABLE_ID);
+ flow_mod_table_id->set = enable;
+ return buffer;
+}
+
const struct ofp_flow_stats *
flow_stats_first(struct flow_stats_iterator *iter,
const struct ofp_stats_reply *osr)
@@ -564,6 +579,9 @@ check_nicira_action(const union ofp_action *a, unsigned int len)
switch (ntohs(nah->subtype)) {
case NXAST_RESUBMIT:
case NXAST_SET_TUNNEL:
+ case NXAST_DROP_SPOOFED_ARP:
+ case NXAST_SET_QUEUE:
+ case NXAST_POP_QUEUE:
return check_action_exact_len(a, len, 16);
default:
return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_BAD_VENDOR_TYPE);
diff --git a/lib/ofp-util.h b/lib/ofp-util.h
index 0e7281518..64de2a079 100644
--- a/lib/ofp-util.h
+++ b/lib/ofp-util.h
@@ -62,6 +62,8 @@ int check_ofp_message_array(const struct ofp_header *, uint8_t type,
int check_ofp_packet_out(const struct ofp_header *, struct ofpbuf *data,
int *n_actions, int max_ports);
+struct ofpbuf *make_nxt_flow_mod_table_id(bool enable);
+
struct flow_stats_iterator {
const uint8_t *pos, *end;
};
diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c
index 5693eefda..bf5567251 100644
--- a/lib/ofpbuf.c
+++ b/lib/ofpbuf.c
@@ -75,12 +75,32 @@ ofpbuf_new(size_t size)
return b;
}
+/* Creates and returns a new ofpbuf with an initial capacity of 'size +
+ * headroom' bytes, reserving the first 'headroom' bytes as headroom. */
+struct ofpbuf *
+ofpbuf_new_with_headroom(size_t size, size_t headroom)
+{
+ struct ofpbuf *b = ofpbuf_new(size + headroom);
+ ofpbuf_reserve(b, headroom);
+ return b;
+}
+
struct ofpbuf *
ofpbuf_clone(const struct ofpbuf *buffer)
{
return ofpbuf_clone_data(buffer->data, buffer->size);
}
+/* Creates and returns a new ofpbuf whose data are copied from 'buffer'. The
+ * returned ofpbuf will additionally have 'headroom' bytes of headroom. */
+struct ofpbuf *
+ofpbuf_clone_with_headroom(const struct ofpbuf *buffer, size_t headroom)
+{
+ struct ofpbuf *b = ofpbuf_new_with_headroom(buffer->size, headroom);
+ ofpbuf_put(b, buffer->data, buffer->size);
+ return b;
+}
+
struct ofpbuf *
ofpbuf_clone_data(const void *data, size_t size)
{
diff --git a/lib/ofpbuf.h b/lib/ofpbuf.h
index 736b8f5e5..5e20aab0b 100644
--- a/lib/ofpbuf.h
+++ b/lib/ofpbuf.h
@@ -48,7 +48,10 @@ void ofpbuf_uninit(struct ofpbuf *);
void ofpbuf_reinit(struct ofpbuf *, size_t);
struct ofpbuf *ofpbuf_new(size_t);
+struct ofpbuf *ofpbuf_new_with_headroom(size_t, size_t headroom);
struct ofpbuf *ofpbuf_clone(const struct ofpbuf *);
+struct ofpbuf *ofpbuf_clone_with_headroom(const struct ofpbuf *,
+ size_t headroom);
struct ofpbuf *ofpbuf_clone_data(const void *, size_t);
void ofpbuf_delete(struct ofpbuf *);
diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c
index 9c54fe81b..492da7fa5 100644
--- a/lib/ovsdb-data.c
+++ b/lib/ovsdb-data.c
@@ -1444,6 +1444,31 @@ ovsdb_datum_to_string(const struct ovsdb_datum *datum,
}
}
+/* Initializes 'datum' as a string-to-string map whose contents are taken from
+ * 'sh'. Destroys 'sh'. */
+void
+ovsdb_datum_from_shash(struct ovsdb_datum *datum, struct shash *sh)
+{
+ struct shash_node *node, *next;
+ size_t i;
+
+ datum->n = shash_count(sh);
+ datum->keys = xmalloc(datum->n * sizeof *datum->keys);
+ datum->values = xmalloc(datum->n * sizeof *datum->values);
+
+ i = 0;
+ SHASH_FOR_EACH_SAFE (node, next, sh) {
+ datum->keys[i].string = node->name;
+ datum->values[i].string = node->data;
+ shash_steal(sh, node);
+ i++;
+ }
+ assert(i == datum->n);
+
+ shash_destroy(sh);
+ ovsdb_datum_sort_unique(datum, OVSDB_TYPE_STRING, OVSDB_TYPE_STRING);
+}
+
static uint32_t
hash_atoms(enum ovsdb_atomic_type type, const union ovsdb_atom *atoms,
unsigned int n, uint32_t basis)
diff --git a/lib/ovsdb-data.h b/lib/ovsdb-data.h
index ae0faa26e..f7e98a84b 100644
--- a/lib/ovsdb-data.h
+++ b/lib/ovsdb-data.h
@@ -22,6 +22,7 @@
struct ds;
struct ovsdb_symbol_table;
+struct shash;
/* One value of an atomic type (given by enum ovs_atomic_type). */
union ovsdb_atom {
@@ -167,6 +168,8 @@ char *ovsdb_datum_from_string(struct ovsdb_datum *,
void ovsdb_datum_to_string(const struct ovsdb_datum *,
const struct ovsdb_type *, struct ds *);
+void ovsdb_datum_from_shash(struct ovsdb_datum *, struct shash *);
+
/* Comparison. */
uint32_t ovsdb_datum_hash(const struct ovsdb_datum *,
const struct ovsdb_type *, uint32_t basis);
diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
index 2132f9fef..b7ee0976a 100644
--- a/lib/ovsdb-idl.c
+++ b/lib/ovsdb-idl.c
@@ -215,15 +215,13 @@ ovsdb_idl_clear(struct ovsdb_idl *idl)
}
changed = true;
- HMAP_FOR_EACH_SAFE (row, next_row, struct ovsdb_idl_row, hmap_node,
- &table->rows) {
+ HMAP_FOR_EACH_SAFE (row, next_row, hmap_node, &table->rows) {
struct ovsdb_idl_arc *arc, *next_arc;
if (!ovsdb_idl_row_is_orphan(row)) {
ovsdb_idl_row_unparse(row);
}
- LIST_FOR_EACH_SAFE (arc, next_arc, struct ovsdb_idl_arc, src_node,
- &row->src_arcs) {
+ LIST_FOR_EACH_SAFE (arc, next_arc, src_node, &row->src_arcs) {
free(arc);
}
/* No need to do anything with dst_arcs: some node has those arcs
@@ -433,13 +431,13 @@ ovsdb_idl_send_monitor_request(struct ovsdb_idl *idl)
const struct ovsdb_idl_table *table = &idl->tables[i];
const struct ovsdb_idl_table_class *tc = table->class;
struct json *monitor_request, *columns;
- size_t i;
+ size_t j;
monitor_request = json_object_create();
columns = json_array_create_empty();
- for (i = 0; i < tc->n_columns; i++) {
- const struct ovsdb_idl_column *column = &tc->columns[i];
- if (table->modes[i] != OVSDB_IDL_MODE_NONE) {
+ for (j = 0; j < tc->n_columns; j++) {
+ const struct ovsdb_idl_column *column = &tc->columns[j];
+ if (table->modes[j] != OVSDB_IDL_MODE_NONE) {
json_array_add(columns, json_string_create(column->name));
}
}
@@ -553,8 +551,7 @@ ovsdb_idl_get_row(struct ovsdb_idl_table *table, const struct uuid *uuid)
{
struct ovsdb_idl_row *row;
- HMAP_FOR_EACH_WITH_HASH (row, struct ovsdb_idl_row, hmap_node,
- uuid_hash(uuid), &table->rows) {
+ HMAP_FOR_EACH_WITH_HASH (row, hmap_node, uuid_hash(uuid), &table->rows) {
if (uuid_equals(&row->uuid, uuid)) {
return row;
}
@@ -771,8 +768,7 @@ ovsdb_idl_row_clear_arcs(struct ovsdb_idl_row *row, bool destroy_dsts)
/* Delete all forward arcs. If 'destroy_dsts', destroy any orphaned rows
* that this causes to be unreferenced. */
- LIST_FOR_EACH_SAFE (arc, next, struct ovsdb_idl_arc, src_node,
- &row->src_arcs) {
+ LIST_FOR_EACH_SAFE (arc, next, src_node, &row->src_arcs) {
list_remove(&arc->dst_node);
if (destroy_dsts
&& ovsdb_idl_row_is_orphan(arc->dst)
@@ -800,8 +796,7 @@ ovsdb_idl_row_reparse_backrefs(struct ovsdb_idl_row *row)
* (If duplicate arcs were possible then we would need to make sure that
* 'next' didn't also point into 'arc''s destination, but we forbid
* duplicate arcs.) */
- LIST_FOR_EACH_SAFE (arc, next, struct ovsdb_idl_arc, dst_node,
- &row->dst_arcs) {
+ LIST_FOR_EACH_SAFE (arc, next, dst_node, &row->dst_arcs) {
struct ovsdb_idl_row *ref = arc->src;
ovsdb_idl_row_unparse(ref);
@@ -1145,8 +1140,7 @@ ovsdb_idl_txn_destroy(struct ovsdb_idl_txn *txn)
free(txn->inc_table);
free(txn->inc_column);
json_destroy(txn->inc_where);
- HMAP_FOR_EACH_SAFE (insert, next, struct ovsdb_idl_txn_insert, hmap_node,
- &txn->inserted_rows) {
+ HMAP_FOR_EACH_SAFE (insert, next, hmap_node, &txn->inserted_rows) {
free(insert);
}
hmap_destroy(&txn->inserted_rows);
@@ -1196,8 +1190,7 @@ ovsdb_idl_txn_get_row(const struct ovsdb_idl_txn *txn, const struct uuid *uuid)
{
const struct ovsdb_idl_row *row;
- HMAP_FOR_EACH_WITH_HASH (row, struct ovsdb_idl_row, txn_node,
- uuid_hash(uuid), &txn->txn_rows) {
+ HMAP_FOR_EACH_WITH_HASH (row, txn_node, uuid_hash(uuid), &txn->txn_rows) {
if (uuid_equals(&row->uuid, uuid)) {
return row;
}
@@ -1255,8 +1248,7 @@ ovsdb_idl_txn_disassemble(struct ovsdb_idl_txn *txn)
* transaction and fail to update the graph. */
txn->idl->txn = NULL;
- HMAP_FOR_EACH_SAFE (row, next, struct ovsdb_idl_row, txn_node,
- &txn->txn_rows) {
+ HMAP_FOR_EACH_SAFE (row, next, txn_node, &txn->txn_rows) {
if (row->old) {
if (row->written) {
ovsdb_idl_row_unparse(row);
@@ -1300,7 +1292,7 @@ ovsdb_idl_txn_commit(struct ovsdb_idl_txn *txn)
json_string_create(txn->idl->class->database));
/* Add prerequisites and declarations of new rows. */
- HMAP_FOR_EACH (row, struct ovsdb_idl_row, txn_node, &txn->txn_rows) {
+ HMAP_FOR_EACH (row, txn_node, &txn->txn_rows) {
/* XXX check that deleted rows exist even if no prereqs? */
if (row->prereqs) {
const struct ovsdb_idl_table_class *class = row->table->class;
@@ -1332,7 +1324,7 @@ ovsdb_idl_txn_commit(struct ovsdb_idl_txn *txn)
/* Add updates. */
any_updates = false;
- HMAP_FOR_EACH (row, struct ovsdb_idl_row, txn_node, &txn->txn_rows) {
+ HMAP_FOR_EACH (row, txn_node, &txn->txn_rows) {
const struct ovsdb_idl_table_class *class = row->table->class;
if (row->old == row->new) {
@@ -1530,7 +1522,7 @@ ovsdb_idl_txn_get_insert_uuid(const struct ovsdb_idl_txn *txn,
const struct ovsdb_idl_txn_insert *insert;
assert(txn->status == TXN_SUCCESS || txn->status == TXN_UNCHANGED);
- HMAP_FOR_EACH_IN_BUCKET (insert, struct ovsdb_idl_txn_insert, hmap_node,
+ HMAP_FOR_EACH_IN_BUCKET (insert, hmap_node,
uuid_hash(uuid), &txn->inserted_rows) {
if (uuid_equals(uuid, &insert->dummy)) {
return &insert->real;
@@ -1653,8 +1645,7 @@ ovsdb_idl_txn_abort_all(struct ovsdb_idl *idl)
{
struct ovsdb_idl_txn *txn;
- HMAP_FOR_EACH (txn, struct ovsdb_idl_txn, hmap_node,
- &idl->outstanding_txns) {
+ HMAP_FOR_EACH (txn, hmap_node, &idl->outstanding_txns) {
ovsdb_idl_txn_complete(txn, TXN_TRY_AGAIN);
}
}
@@ -1664,7 +1655,7 @@ ovsdb_idl_txn_find(struct ovsdb_idl *idl, const struct json *id)
{
struct ovsdb_idl_txn *txn;
- HMAP_FOR_EACH_WITH_HASH (txn, struct ovsdb_idl_txn, hmap_node,
+ HMAP_FOR_EACH_WITH_HASH (txn, hmap_node,
json_hash(id, 0), &idl->outstanding_txns) {
if (json_equal(id, txn->request_id)) {
return txn;
@@ -1844,8 +1835,7 @@ ovsdb_idl_txn_process_reply(struct ovsdb_idl *idl,
hard_errors++;
}
- HMAP_FOR_EACH (insert, struct ovsdb_idl_txn_insert, hmap_node,
- &txn->inserted_rows) {
+ HMAP_FOR_EACH (insert, hmap_node, &txn->inserted_rows) {
if (!ovsdb_idl_txn_process_insert_reply(insert, ops)) {
hard_errors++;
}
diff --git a/lib/poll-loop.c b/lib/poll-loop.c
index 70360b9b5..6aefc7689 100644
--- a/lib/poll-loop.c
+++ b/lib/poll-loop.c
@@ -171,7 +171,7 @@ poll_block(void)
}
n_pollfds = 0;
- LIST_FOR_EACH (pw, struct poll_waiter, node, &waiters) {
+ LIST_FOR_EACH (pw, node, &waiters) {
pw->pollfd = &pollfds[n_pollfds];
pollfds[n_pollfds].fd = pw->fd;
pollfds[n_pollfds].events = pw->events;
@@ -190,7 +190,7 @@ poll_block(void)
log_wakeup(&timeout_backtrace, "%d-ms timeout", timeout);
}
- LIST_FOR_EACH_SAFE (pw, next, struct poll_waiter, node, &waiters) {
+ LIST_FOR_EACH_SAFE (pw, next, node, &waiters) {
if (pw->pollfd->revents && VLOG_IS_DBG_ENABLED()) {
log_wakeup(pw->backtrace, "%s%s%s%s%s on fd %d",
pw->pollfd->revents & POLLIN ? "[POLLIN]" : "",
diff --git a/lib/process.c b/lib/process.c
index a201a88f8..087275b8e 100644
--- a/lib/process.c
+++ b/lib/process.c
@@ -517,7 +517,7 @@ process_run_capture(char **argv, char **stdout_log, char **stderr_log,
block_sigchld(&oldsigs);
pid = fork();
if (pid < 0) {
- int error = errno;
+ error = errno;
unblock_sigchld(&oldsigs);
VLOG_WARN("fork failed: %s", strerror(error));
@@ -590,7 +590,7 @@ sigchld_handler(int signr OVS_UNUSED)
struct process *p;
COVERAGE_INC(process_sigchld);
- LIST_FOR_EACH (p, struct process, node, &all_processes) {
+ LIST_FOR_EACH (p, node, &all_processes) {
if (!p->exited) {
int retval, status;
do {
diff --git a/lib/queue.h b/lib/queue.h
index 879f7a2d1..e30b84c54 100644
--- a/lib/queue.h
+++ b/lib/queue.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008, 2009 Nicira Networks.
+ * Copyright (c) 2008, 2009, 2010 Nicira Networks.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
#define QUEUE_H 1
#include <stdbool.h>
+#include <stddef.h>
/* Packet queue. */
struct ovs_queue {
@@ -26,6 +27,8 @@ struct ovs_queue {
struct ofpbuf *tail; /* Last queued packet, null if n == 0. */
};
+#define OVS_QUEUE_INITIALIZER { 0, NULL, NULL }
+
void queue_init(struct ovs_queue *);
void queue_destroy(struct ovs_queue *);
void queue_clear(struct ovs_queue *);
diff --git a/lib/rtnetlink.c b/lib/rtnetlink.c
index 81ef57a80..b1f00c812 100644
--- a/lib/rtnetlink.c
+++ b/lib/rtnetlink.c
@@ -166,8 +166,7 @@ rtnetlink_report_change(const struct nlmsghdr *nlmsg,
change.master_ifindex = (attrs[IFLA_MASTER]
? nl_attr_get_u32(attrs[IFLA_MASTER]) : 0);
- LIST_FOR_EACH (notifier, struct rtnetlink_notifier, node,
- &all_notifiers) {
+ LIST_FOR_EACH (notifier, node, &all_notifiers) {
notifier->cb(&change, notifier->aux);
}
}
@@ -177,8 +176,7 @@ rtnetlink_report_notify_error(void)
{
struct rtnetlink_notifier *notifier;
- LIST_FOR_EACH (notifier, struct rtnetlink_notifier, node,
- &all_notifiers) {
+ LIST_FOR_EACH (notifier, node, &all_notifiers) {
notifier->cb(NULL, notifier->aux);
}
}
diff --git a/lib/shash.c b/lib/shash.c
index 8fd2eb18f..82791e31f 100644
--- a/lib/shash.c
+++ b/lib/shash.c
@@ -167,12 +167,25 @@ shash_replace(struct shash *sh, const char *name, const void *data)
}
}
+/* Deletes 'node' from 'sh' and frees the node's name. The caller is still
+ * responsible for freeing the node's data, if necessary. */
void
shash_delete(struct shash *sh, struct shash_node *node)
{
+ free(shash_steal(sh, node));
+}
+
+/* Deletes 'node' from 'sh'. Neither the node's name nor its data is freed;
+ * instead, ownership is transferred to the caller. Returns the node's
+ * name. */
+char *
+shash_steal(struct shash *sh, struct shash_node *node)
+{
+ char *name = node->name;
+
hmap_remove(&sh->map, &node->node);
- free(node->name);
free(node);
+ return name;
}
static struct shash_node *
@@ -180,7 +193,7 @@ shash_find__(const struct shash *sh, const char *name, size_t hash)
{
struct shash_node *node;
- HMAP_FOR_EACH_WITH_HASH (node, struct shash_node, node, hash, &sh->map) {
+ HMAP_FOR_EACH_WITH_HASH (node, node, hash, &sh->map) {
if (!strcmp(node->name, name)) {
return node;
}
diff --git a/lib/shash.h b/lib/shash.h
index eab0af45a..dfb10e2cc 100644
--- a/lib/shash.h
+++ b/lib/shash.h
@@ -35,12 +35,11 @@ struct shash {
#define SHASH_INITIALIZER(SHASH) { HMAP_INITIALIZER(&(SHASH)->map) }
-#define SHASH_FOR_EACH(SHASH_NODE, SHASH) \
- HMAP_FOR_EACH (SHASH_NODE, struct shash_node, node, &(SHASH)->map)
+#define SHASH_FOR_EACH(SHASH_NODE, SHASH) \
+ HMAP_FOR_EACH (SHASH_NODE, node, &(SHASH)->map)
-#define SHASH_FOR_EACH_SAFE(SHASH_NODE, NEXT, SHASH) \
- HMAP_FOR_EACH_SAFE (SHASH_NODE, NEXT, struct shash_node, node, \
- &(SHASH)->map)
+#define SHASH_FOR_EACH_SAFE(SHASH_NODE, NEXT, SHASH) \
+ HMAP_FOR_EACH_SAFE (SHASH_NODE, NEXT, node, &(SHASH)->map)
void shash_init(struct shash *);
void shash_destroy(struct shash *);
@@ -57,6 +56,7 @@ bool shash_add_once(struct shash *, const char *, const void *);
void shash_add_assert(struct shash *, const char *, const void *);
void *shash_replace(struct shash *, const char *, const void *data);
void shash_delete(struct shash *, struct shash_node *);
+char *shash_steal(struct shash *, struct shash_node *);
struct shash_node *shash_find(const struct shash *, const char *);
void *shash_find_data(const struct shash *, const char *);
void *shash_find_and_delete(struct shash *, const char *);
diff --git a/lib/stream-fd.c b/lib/stream-fd.c
index 9410009c4..ef4dc8d91 100644
--- a/lib/stream-fd.c
+++ b/lib/stream-fd.c
@@ -214,7 +214,7 @@ pfd_accept(struct pstream *pstream, struct stream **new_streamp)
new_fd = accept(ps->fd, (struct sockaddr *) &ss, &ss_len);
if (new_fd < 0) {
- int retval = errno;
+ retval = errno;
if (retval != EAGAIN) {
VLOG_DBG_RL(&rl, "accept: %s", strerror(retval));
}
diff --git a/lib/stream-ssl.c b/lib/stream-ssl.c
index 70b15f0da..9c7533d1e 100644
--- a/lib/stream-ssl.c
+++ b/lib/stream-ssl.c
@@ -385,7 +385,7 @@ do_ca_cert_bootstrap(struct stream *stream)
file = fdopen(fd, "w");
if (!file) {
- int error = errno;
+ error = errno;
VLOG_ERR("could not bootstrap CA cert: fdopen failed: %s",
strerror(error));
unlink(ca_cert.file_name);
@@ -402,7 +402,7 @@ do_ca_cert_bootstrap(struct stream *stream)
}
if (fclose(file)) {
- int error = errno;
+ error = errno;
VLOG_ERR("could not bootstrap CA cert: writing %s failed: %s",
ca_cert.file_name, strerror(error));
unlink(ca_cert.file_name);
@@ -921,7 +921,7 @@ pssl_accept(struct pstream *pstream, struct stream **new_streamp)
new_fd = accept(pssl->fd, &sin, &sin_len);
if (new_fd < 0) {
- int error = errno;
+ error = errno;
if (error != EAGAIN) {
VLOG_DBG_RL(&rl, "accept: %s", strerror(error));
}
diff --git a/lib/unixctl.c b/lib/unixctl.c
index ac756a834..706b3e309 100644
--- a/lib/unixctl.c
+++ b/lib/unixctl.c
@@ -411,8 +411,7 @@ unixctl_server_run(struct unixctl_server *server)
new_connection(server, fd);
}
- LIST_FOR_EACH_SAFE (conn, next,
- struct unixctl_conn, node, &server->conns) {
+ LIST_FOR_EACH_SAFE (conn, next, node, &server->conns) {
int error = run_connection(conn);
if (error && error != EAGAIN) {
kill_connection(conn);
@@ -426,7 +425,7 @@ unixctl_server_wait(struct unixctl_server *server)
struct unixctl_conn *conn;
poll_fd_wait(server->fd, POLLIN);
- LIST_FOR_EACH (conn, struct unixctl_conn, node, &server->conns) {
+ LIST_FOR_EACH (conn, node, &server->conns) {
if (conn->state == S_RECV) {
poll_fd_wait(conn->fd, POLLIN);
} else if (conn->state == S_SEND) {
@@ -442,8 +441,7 @@ unixctl_server_destroy(struct unixctl_server *server)
if (server) {
struct unixctl_conn *conn, *next;
- LIST_FOR_EACH_SAFE (conn, next,
- struct unixctl_conn, node, &server->conns) {
+ LIST_FOR_EACH_SAFE (conn, next, node, &server->conns) {
kill_connection(conn);
}
diff --git a/lib/util.h b/lib/util.h
index 9eca8ec9a..5147ffc55 100644
--- a/lib/util.h
+++ b/lib/util.h
@@ -82,6 +82,16 @@ extern const char *program_name;
#define CONTAINER_OF(POINTER, STRUCT, MEMBER) \
((STRUCT *) (void *) ((char *) (POINTER) - offsetof (STRUCT, MEMBER)))
+/* Given POINTER, the address of the given MEMBER within an object of the type
+ * that that OBJECT points to, returns OBJECT as a "void *" pointer. OBJECT
+ * must be an lvalue.
+ *
+ * This is the same as CONTAINER_OF except that it infers the structure type
+ * from the type of '*OBJECT'. */
+#define OBJECT_CONTAINING(POINTER, OBJECT, MEMBER) \
+ ((void *) ((char *) (POINTER) \
+ - ((char *) &(OBJECT)->MEMBER - (char *) (OBJECT))))
+
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/lib/vlog-modules.def b/lib/vlog-modules.def
index 4487662fa..c82275733 100644
--- a/lib/vlog-modules.def
+++ b/lib/vlog-modules.def
@@ -76,6 +76,7 @@ VLOG_MODULE(stream_unix)
VLOG_MODULE(svec)
VLOG_MODULE(timeval)
VLOG_MODULE(socket_util)
+VLOG_MODULE(system_stats)
VLOG_MODULE(unixctl)
VLOG_MODULE(util)
VLOG_MODULE(vconn)
diff --git a/lib/vlog.h b/lib/vlog.h
index 03f17ea56..a4e143c1b 100644
--- a/lib/vlog.h
+++ b/lib/vlog.h
@@ -181,6 +181,12 @@ void vlog_rate_limit(const struct vlog_module *, enum vlog_level,
#define VLOG_DROP_INFO(RL) vlog_should_drop(THIS_MODULE, VLL_INFO, RL)
#define VLOG_DROP_DBG(RL) vlog_should_drop(THIS_MODULE, VLL_DBG, RL)
+/* Macros for logging at most once per execution. */
+#define VLOG_ERR_ONCE(...) VLOG_ONCE(VLL_ERR, __VA_ARGS__)
+#define VLOG_WARN_ONCE(...) VLOG_ONCE(VLL_WARN, __VA_ARGS__)
+#define VLOG_INFO_ONCE(...) VLOG_ONCE(VLL_INFO, __VA_ARGS__)
+#define VLOG_DBG_ONCE(...) VLOG_ONCE(VLL_DBG, __VA_ARGS__)
+
/* Command line processing. */
#define VLOG_OPTION_ENUMS OPT_LOG_FILE
#define VLOG_LONG_OPTIONS \
@@ -208,6 +214,15 @@ void vlog_usage(void);
vlog_rate_limit(THIS_MODULE, LEVEL, RL, __VA_ARGS__); \
} \
} while (0)
+#define VLOG_ONCE(LEVEL, ...) \
+ do { \
+ static bool already_logged; \
+ if (!already_logged) { \
+ already_logged = true; \
+ vlog(THIS_MODULE, LEVEL, __VA_ARGS__); \
+ } \
+ } while (0)
+
#define VLOG_DEFINE_MODULE__(MODULE) \
struct vlog_module VLM_##MODULE = \
{ \
diff --git a/lib/xfif-linux.c b/lib/xfif-linux.c
index 7bfa840c1..82fa1d7b8 100644
--- a/lib/xfif-linux.c
+++ b/lib/xfif-linux.c
@@ -338,8 +338,7 @@ xfif_linux_port_poll(const struct xfif *xfif_, char **devnamep)
return ENOBUFS;
} else if (!shash_is_empty(&xfif->changed_ports)) {
struct shash_node *node = shash_first(&xfif->changed_ports);
- *devnamep = xstrdup(node->name);
- shash_delete(&xfif->changed_ports, node);
+ *devnamep = shash_steal(&xfif->changed_ports, node);
return 0;
} else {
return EAGAIN;
@@ -478,8 +477,7 @@ xfif_linux_recv(struct xfif *xfif_, struct ofpbuf **bufp)
int retval;
int error;
- buf = ofpbuf_new(65536 + XFIF_RECV_MSG_PADDING);
- ofpbuf_reserve(buf, XFIF_RECV_MSG_PADDING);
+ buf = ofpbuf_new_with_headroom(65536, XFIF_RECV_MSG_PADDING);
retval = read(xfif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf));
if (retval < 0) {
error = errno;
@@ -732,11 +730,7 @@ get_major(const char *target)
return major;
}
} else {
- static bool warned;
- if (!warned) {
- VLOG_WARN("%s:%d: syntax error", fn, ln);
- }
- warned = true;
+ VLOG_WARN_ONCE("%s:%d: syntax error", fn, ln);
}
}
diff --git a/lib/xfif-netdev.c b/lib/xfif-netdev.c
index 1afb08c81..25493fbd4 100644
--- a/lib/xfif-netdev.c
+++ b/lib/xfif-netdev.c
@@ -457,7 +457,7 @@ get_port_by_name(struct xf_netdev *xf,
{
struct xf_netdev_port *port;
- LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) {
+ LIST_FOR_EACH (port, node, &xf->port_list) {
if (!strcmp(netdev_get_name(port->netdev), devname)) {
*portp = port;
return 0;
@@ -545,8 +545,7 @@ xf_netdev_flow_flush(struct xf_netdev *xf)
{
struct xf_netdev_flow *flow, *next;
- HMAP_FOR_EACH_SAFE (flow, next, struct xf_netdev_flow, node,
- &xf->flow_table) {
+ HMAP_FOR_EACH_SAFE (flow, next, node, &xf->flow_table) {
xf_netdev_free_flow(xf, flow);
}
}
@@ -567,7 +566,7 @@ xfif_netdev_port_list(const struct xfif *xfif, struct xflow_port *ports, int n)
int i;
i = 0;
- LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) {
+ LIST_FOR_EACH (port, node, &xf->port_list) {
struct xflow_port *xflow_port = &ports[i];
if (i >= n) {
break;
@@ -661,7 +660,7 @@ xf_netdev_lookup_flow(const struct xf_netdev *xf,
{
struct xf_netdev_flow *flow;
- HMAP_FOR_EACH_WITH_HASH (flow, struct xf_netdev_flow, node,
+ HMAP_FOR_EACH_WITH_HASH (flow, node,
xflow_key_hash(key, 0), &xf->flow_table) {
if (xflow_key_equal(&flow->key, key)) {
return flow;
@@ -884,7 +883,7 @@ xfif_netdev_flow_list(const struct xfif *xfif, struct xflow_flow flows[], int n)
int i;
i = 0;
- HMAP_FOR_EACH (flow, struct xf_netdev_flow, node, &xf->flow_table) {
+ HMAP_FOR_EACH (flow, node, &xf->flow_table) {
if (i >= n) {
break;
}
@@ -1047,10 +1046,10 @@ xf_netdev_run(void)
struct xf_netdev *xf;
ofpbuf_init(&packet, XF_NETDEV_HEADROOM + max_mtu);
- LIST_FOR_EACH (xf, struct xf_netdev, node, &xf_netdev_list) {
+ LIST_FOR_EACH (xf, node, &xf_netdev_list) {
struct xf_netdev_port *port;
- LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) {
+ LIST_FOR_EACH (port, node, &xf->port_list) {
int error;
/* Reset packet contents. */
@@ -1075,9 +1074,9 @@ xf_netdev_wait(void)
{
struct xf_netdev *xf;
- LIST_FOR_EACH (xf, struct xf_netdev, node, &xf_netdev_list) {
+ LIST_FOR_EACH (xf, node, &xf_netdev_list) {
struct xf_netdev_port *port;
- LIST_FOR_EACH (port, struct xf_netdev_port, node, &xf->port_list) {
+ LIST_FOR_EACH (port, node, &xf->port_list) {
netdev_recv_wait(port->netdev);
}
}
@@ -1099,7 +1098,6 @@ xf_netdev_set_dl_tci(struct ofpbuf *packet,
veh->veth_tci = (veh->veth_tci & ~a->mask) | a->tci;
} else {
/* Insert new 802.1Q header. */
- struct eth_header *eh = packet->l2;
struct vlan_eth_header tmp;
memcpy(tmp.veth_dst, eh->eth_dst, ETH_ADDR_LEN);
memcpy(tmp.veth_src, eh->eth_src, ETH_ADDR_LEN);
diff --git a/lib/xflow-util.c b/lib/xflow-util.c
index e11da07fc..622f9efdc 100644
--- a/lib/xflow-util.c
+++ b/lib/xflow-util.c
@@ -111,6 +111,9 @@ format_xflow_action(struct ds *ds, const union xflow_action *a)
case XFLOWAT_POP_PRIORITY:
ds_put_cstr(ds, "pop_priority");
break;
+ case XFLOWAT_DROP_SPOOFED_ARP:
+ ds_put_cstr(ds, "drop_spoofed_arp");
+ break;
default:
ds_put_format(ds, "***bad action 0x%"PRIx16"***", a->type);
break;
diff --git a/ofproto/netflow.c b/ofproto/netflow.c
index 50ab80a69..4881c5fdb 100644
--- a/ofproto/netflow.c
+++ b/ofproto/netflow.c
@@ -103,26 +103,19 @@ struct netflow {
long long int reconfig_time; /* When we reconfigured the timeouts. */
};
-void
-netflow_expire(struct netflow *nf, struct netflow_flow *nf_flow,
- struct ofexpired *expired)
+static void
+gen_netflow_rec(struct netflow *nf, struct netflow_flow *nf_flow,
+ struct ofexpired *expired,
+ uint32_t packet_count, uint32_t byte_count)
{
struct netflow_v5_header *nf_hdr;
struct netflow_v5_record *nf_rec;
- struct timespec now;
-
- nf_flow->last_expired += nf->active_timeout;
- /* NetFlow only reports on IP packets and we should only report flows
- * that actually have traffic. */
- if (expired->flow.dl_type != htons(ETH_TYPE_IP) ||
- expired->packet_count - nf_flow->packet_count_off == 0) {
- return;
- }
+ if (!nf->packet.size) {
+ struct timespec now;
- time_wall_timespec(&now);
+ time_wall_timespec(&now);
- if (!nf->packet.size) {
nf_hdr = ofpbuf_put_zeros(&nf->packet, sizeof *nf_hdr);
nf_hdr->version = htons(NETFLOW_V5_VERSION);
nf_hdr->count = htons(0);
@@ -150,10 +143,8 @@ netflow_expire(struct netflow *nf, struct netflow_flow *nf_flow,
nf_rec->input = htons(expired->flow.in_port);
nf_rec->output = htons(nf_flow->output_iface);
}
- nf_rec->packet_count = htonl(MIN(expired->packet_count -
- nf_flow->packet_count_off, UINT32_MAX));
- nf_rec->byte_count = htonl(MIN(expired->byte_count -
- nf_flow->byte_count_off, UINT32_MAX));
+ nf_rec->packet_count = htonl(packet_count);
+ nf_rec->byte_count = htonl(byte_count);
nf_rec->init_time = htonl(nf_flow->created - nf->boot_time);
nf_rec->used_time = htonl(MAX(nf_flow->created, expired->used)
- nf->boot_time);
@@ -172,16 +163,66 @@ netflow_expire(struct netflow *nf, struct netflow_flow *nf_flow,
nf_rec->ip_proto = expired->flow.nw_proto;
nf_rec->ip_tos = expired->flow.nw_tos;
+ /* NetFlow messages are limited to 30 records. */
+ if (ntohs(nf_hdr->count) >= 30) {
+ netflow_run(nf);
+ }
+}
+
+void
+netflow_expire(struct netflow *nf, struct netflow_flow *nf_flow,
+ struct ofexpired *expired)
+{
+ uint64_t pkt_delta = expired->packet_count - nf_flow->packet_count_off;
+ uint64_t byte_delta = expired->byte_count - nf_flow->byte_count_off;
+
+ nf_flow->last_expired += nf->active_timeout;
+
+ /* NetFlow only reports on IP packets and we should only report flows
+ * that actually have traffic. */
+ if (expired->flow.dl_type != htons(ETH_TYPE_IP) || pkt_delta == 0) {
+ return;
+ }
+
+ if ((byte_delta >> 32) <= 175) {
+ /* NetFlow v5 records are limited to 32-bit counters. If we've wrapped
+ * a counter, send as multiple records so we don't lose track of any
+ * traffic. We try to evenly distribute the packet and byte counters,
+ * so that the bytes-per-packet lengths don't look wonky across the
+ * records. */
+ while (byte_delta > UINT32_MAX) {
+ uint32_t n_recs = byte_delta >> 32;
+ uint32_t pkt_count = pkt_delta / n_recs;
+ uint32_t byte_count = byte_delta / n_recs;
+
+ gen_netflow_rec(nf, nf_flow, expired, pkt_count, byte_count);
+
+ pkt_delta -= pkt_count;
+ byte_delta -= byte_count;
+ }
+ if (byte_delta > 0) {
+ gen_netflow_rec(nf, nf_flow, expired, pkt_delta, byte_delta);
+ }
+ } else {
+ /* In 600 seconds, a 10GbE link can theoretically transmit 75 * 10**10
+ * == 175 * 2**32 bytes. The byte counter is bigger than that, so it's
+ * probably a bug--for example, the netdev code uses UINT64_MAX to
+ * report "unknown value", and perhaps that has leaked through to here.
+ *
+ * We wouldn't want to hit the loop above in this case, because it
+ * would try to send up to UINT32_MAX netflow records, which would take
+ * a long time.
+ */
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+
+ VLOG_WARN_RL(&rl, "impossible byte counter %"PRIu64, byte_delta);
+ }
+
/* Update flow tracking data. */
nf_flow->created = 0;
nf_flow->packet_count_off = expired->packet_count;
nf_flow->byte_count_off = expired->byte_count;
nf_flow->tcp_flags = 0;
-
- /* NetFlow messages are limited to 30 records. */
- if (ntohs(nf_hdr->count) >= 30) {
- netflow_run(nf);
- }
}
void
diff --git a/ofproto/ofproto-sflow.c b/ofproto/ofproto-sflow.c
index 7ed61af10..372491159 100644
--- a/ofproto/ofproto-sflow.c
+++ b/ofproto/ofproto-sflow.c
@@ -21,12 +21,13 @@
#include <stdlib.h>
#include "collectors.h"
#include "compiler.h"
+#include "hash.h"
+#include "hmap.h"
#include "netdev.h"
#include "ofpbuf.h"
#include "ofproto.h"
#include "packets.h"
#include "poll-loop.h"
-#include "port-array.h"
#include "sflow_api.h"
#include "socket-util.h"
#include "timeval.h"
@@ -37,8 +38,10 @@
VLOG_DEFINE_THIS_MODULE(sflow)
struct ofproto_sflow_port {
+ struct hmap_node hmap_node; /* In struct ofproto_sflow's "ports" hmap. */
struct netdev *netdev; /* Underlying network device, for stats. */
SFLDataSource_instance dsi; /* sFlow library's notion of port number. */
+ uint16_t xflow_port; /* xflow port number. */
};
struct ofproto_sflow {
@@ -49,9 +52,12 @@ struct ofproto_sflow {
struct wdp *wdp;
time_t next_tick;
size_t n_flood, n_all;
- struct port_array ports; /* Indexed by XFLOW port number. */
+ struct hmap ports; /* Contains "struct ofproto_sflow_port"s. */
};
+static void ofproto_sflow_del_port__(struct ofproto_sflow *,
+ struct ofproto_sflow_port *);
+
#define RECEIVER_INDEX 1
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
@@ -131,6 +137,20 @@ sflow_agent_send_packet_cb(void *os_, SFLAgent *agent OVS_UNUSED,
collectors_send(os->collectors, pkt, pktLen);
}
+static struct ofproto_sflow_port *
+ofproto_sflow_find_port(const struct ofproto_sflow *os, uint16_t xflow_port)
+{
+ struct ofproto_sflow_port *osp;
+
+ HMAP_FOR_EACH_IN_BUCKET (osp, hmap_node,
+ hash_int(xflow_port, 0), &os->ports) {
+ if (osp->xflow_port == xflow_port) {
+ return osp;
+ }
+ }
+ return NULL;
+}
+
static void
sflow_agent_get_counters(void *os_, SFLPoller *poller,
SFL_COUNTERS_SAMPLE_TYPE *cs)
@@ -143,7 +163,7 @@ sflow_agent_get_counters(void *os_, SFLPoller *poller,
enum netdev_flags flags;
uint32_t current;
- osp = port_array_get(&os->ports, poller->bridgePort);
+ osp = ofproto_sflow_find_port(os, poller->bridgePort);
if (!osp) {
return;
}
@@ -268,7 +288,7 @@ ofproto_sflow_create(struct wdp *wdp)
os = xcalloc(1, sizeof *os);
os->wdp = wdp;
os->next_tick = time_now() + 1;
- port_array_init(&os->ports);
+ hmap_init(&os->ports);
return os;
}
@@ -276,14 +296,13 @@ void
ofproto_sflow_destroy(struct ofproto_sflow *os)
{
if (os) {
- struct ofproto_sflow_port *osp;
- unsigned int xflow_port;
+ struct ofproto_sflow_port *osp, *next;
ofproto_sflow_clear(os);
- PORT_ARRAY_FOR_EACH (osp, &os->ports, xflow_port) {
- ofproto_sflow_del_port(os, xflow_port);
+ HMAP_FOR_EACH_SAFE (osp, next, hmap_node, &os->ports) {
+ ofproto_sflow_del_port__(os, osp);
}
- port_array_destroy(&os->ports);
+ hmap_destroy(&os->ports);
free(os);
}
}
@@ -336,7 +355,8 @@ ofproto_sflow_add_port(struct ofproto_sflow *os, uint16_t xflow_port,
ifindex = (os->sflow_agent->subId << 16) + xflow_port;
}
SFL_DS_SET(osp->dsi, 0, ifindex, 0);
- port_array_set(&os->ports, xflow_port, osp);
+ osp->xflow_port = xflow_port;
+ hmap_insert(&os->ports, &osp->hmap_node, hash_int(xflow_port, 0));
/* Add poller and sampler. */
if (os->sflow_agent) {
@@ -345,18 +365,25 @@ ofproto_sflow_add_port(struct ofproto_sflow *os, uint16_t xflow_port,
}
}
+static void
+ofproto_sflow_del_port__(struct ofproto_sflow *os,
+ struct ofproto_sflow_port *osp)
+{
+ if (os->sflow_agent) {
+ sfl_agent_removePoller(os->sflow_agent, &osp->dsi);
+ sfl_agent_removeSampler(os->sflow_agent, &osp->dsi);
+ }
+ netdev_close(osp->netdev);
+ hmap_remove(&os->ports, &osp->hmap_node);
+ free(osp);
+}
+
void
ofproto_sflow_del_port(struct ofproto_sflow *os, uint16_t xflow_port)
{
- struct ofproto_sflow_port *osp = port_array_get(&os->ports, xflow_port);
+ struct ofproto_sflow_port *osp = ofproto_sflow_find_port(os, xflow_port);
if (osp) {
- if (os->sflow_agent) {
- sfl_agent_removePoller(os->sflow_agent, &osp->dsi);
- sfl_agent_removeSampler(os->sflow_agent, &osp->dsi);
- }
- netdev_close(osp->netdev);
- free(osp);
- port_array_delete(&os->ports, xflow_port);
+ ofproto_sflow_del_port__(os, osp);
}
}
@@ -367,7 +394,6 @@ ofproto_sflow_set_options(struct ofproto_sflow *os,
struct ofproto_sflow_port *osp;
bool options_changed;
SFLReceiver *receiver;
- unsigned int xflow_port;
SFLAddress agentIP;
time_t now;
@@ -438,17 +464,17 @@ ofproto_sflow_set_options(struct ofproto_sflow *os,
MAX(1, UINT32_MAX / options->sampling_rate));
/* Add samplers and pollers for the currently known ports. */
- PORT_ARRAY_FOR_EACH (osp, &os->ports, xflow_port) {
- ofproto_sflow_add_poller(os, osp, xflow_port);
+ HMAP_FOR_EACH (osp, hmap_node, &os->ports) {
+ ofproto_sflow_add_poller(os, osp, osp->xflow_port);
ofproto_sflow_add_sampler(os, osp);
}
}
static int
ofproto_sflow_xflow_port_to_ifindex(const struct ofproto_sflow *os,
- uint16_t xflow_port)
+ uint16_t xflow_port)
{
- struct ofproto_sflow_port *osp = port_array_get(&os->ports, xflow_port);
+ struct ofproto_sflow_port *osp = ofproto_sflow_find_port(os, xflow_port);
return osp ? SFL_DS_INDEX(osp->dsi) : 0;
}
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 3c165c71b..cb67deb2b 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -29,6 +29,8 @@
#include "discovery.h"
#include "dynamic-string.h"
#include "fail-open.h"
+#include "hash.h"
+#include "hmap.h"
#include "in-band.h"
#include "mac-learning.h"
#include "netdev.h"
@@ -44,7 +46,6 @@
#include "pinsched.h"
#include "pktbuf.h"
#include "poll-loop.h"
-#include "port-array.h"
#include "rconn.h"
#include "shash.h"
#include "status.h"
@@ -409,7 +410,7 @@ find_controller_by_target(struct ofproto *ofproto, const char *target)
{
struct ofconn *ofconn;
- HMAP_FOR_EACH_WITH_HASH (ofconn, struct ofconn, hmap_node,
+ HMAP_FOR_EACH_WITH_HASH (ofconn, hmap_node,
hash_string(target, 0), &ofproto->controllers) {
if (!strcmp(ofconn_get_target(ofconn), target)) {
return ofconn;
@@ -434,7 +435,7 @@ update_in_band_remotes(struct ofproto *ofproto)
/* Add all the remotes. */
discovery = false;
- HMAP_FOR_EACH (ofconn, struct ofconn, hmap_node, &ofproto->controllers) {
+ HMAP_FOR_EACH (ofconn, hmap_node, &ofproto->controllers) {
struct sockaddr_in *sin = &addrs[n_addrs];
if (ofconn->band == OFPROTO_OUT_OF_BAND) {
@@ -493,7 +494,7 @@ update_fail_open(struct ofproto *p)
n = 0;
rconns = xmalloc(hmap_count(&p->controllers) * sizeof *rconns);
- HMAP_FOR_EACH (ofconn, struct ofconn, hmap_node, &p->controllers) {
+ HMAP_FOR_EACH (ofconn, hmap_node, &p->controllers) {
rconns[n++] = ofconn->rconn;
}
@@ -542,8 +543,7 @@ ofproto_set_controllers(struct ofproto *p,
/* Delete controllers that are no longer configured.
* Update configuration of all now-existing controllers. */
ss_exists = false;
- HMAP_FOR_EACH_SAFE (ofconn, next_ofconn, struct ofconn, hmap_node,
- &p->controllers) {
+ HMAP_FOR_EACH_SAFE (ofconn, next_ofconn, hmap_node, &p->controllers) {
struct ofproto_controller *c;
c = shash_find_data(&new_controllers, ofconn_get_target(ofconn));
@@ -559,8 +559,7 @@ ofproto_set_controllers(struct ofproto *p,
/* Delete services that are no longer configured.
* Update configuration of all now-existing services. */
- HMAP_FOR_EACH_SAFE (ofservice, next_ofservice, struct ofservice, node,
- &p->services) {
+ HMAP_FOR_EACH_SAFE (ofservice, next_ofservice, node, &p->services) {
struct ofproto_controller *c;
c = shash_find_data(&new_controllers,
@@ -599,7 +598,7 @@ ofproto_reconnect_controllers(struct ofproto *ofproto)
{
struct ofconn *ofconn;
- LIST_FOR_EACH (ofconn, struct ofconn, node, &ofproto->all_conns) {
+ LIST_FOR_EACH (ofconn, node, &ofproto->all_conns) {
rconn_reconnect(ofconn->rconn);
}
}
@@ -822,8 +821,7 @@ ofproto_destroy(struct ofproto *p)
ofproto_flush_flows(p);
- LIST_FOR_EACH_SAFE (ofconn, next_ofconn, struct ofconn, node,
- &p->all_conns) {
+ LIST_FOR_EACH_SAFE (ofconn, next_ofconn, node, &p->all_conns) {
ofconn_destroy(ofconn);
}
hmap_destroy(&p->controllers);
@@ -834,8 +832,7 @@ ofproto_destroy(struct ofproto *p)
netflow_destroy(p->netflow);
ofproto_sflow_destroy(p->sflow);
- HMAP_FOR_EACH_SAFE (ofservice, next_ofservice, struct ofservice, node,
- &p->services) {
+ HMAP_FOR_EACH_SAFE (ofservice, next_ofservice, node, &p->services) {
ofservice_destroy(p, ofservice);
}
hmap_destroy(&p->services);
@@ -892,7 +889,7 @@ add_snooper(struct ofproto *ofproto, struct vconn *vconn)
/* Pick a controller for monitoring. */
best = NULL;
- LIST_FOR_EACH (ofconn, struct ofconn, node, &ofproto->all_conns) {
+ LIST_FOR_EACH (ofconn, node, &ofproto->all_conns) {
if (ofconn->type == OFCONN_PRIMARY
&& (!best || snoop_preference(ofconn) > snoop_preference(best))) {
best = ofconn;
@@ -915,7 +912,7 @@ ofproto_port_poll_cb(const struct ofp_phy_port *opp, uint8_t reason,
struct ofproto *ofproto = ofproto_;
struct ofconn *ofconn;
- LIST_FOR_EACH (ofconn, struct ofconn, node, &ofproto->all_conns) {
+ LIST_FOR_EACH (ofconn, node, &ofproto->all_conns) {
struct ofp_port_status *ops;
struct ofpbuf *b;
@@ -968,8 +965,7 @@ ofproto_run1(struct ofproto *p)
in_band_run(p->in_band);
}
- LIST_FOR_EACH_SAFE (ofconn, next_ofconn, struct ofconn, node,
- &p->all_conns) {
+ LIST_FOR_EACH_SAFE (ofconn, next_ofconn, node, &p->all_conns) {
ofconn_run(ofconn, p);
}
@@ -979,13 +975,12 @@ ofproto_run1(struct ofproto *p)
fail_open_run(p->fail_open);
}
- HMAP_FOR_EACH (ofservice, struct ofservice, node, &p->services) {
+ HMAP_FOR_EACH (ofservice, node, &p->services) {
struct vconn *vconn;
int retval;
retval = pvconn_accept(ofservice->pvconn, OFP_VERSION, &vconn);
if (!retval) {
- struct ofconn *ofconn;
struct rconn *rconn;
char *name;
@@ -1039,7 +1034,7 @@ ofproto_wait(struct ofproto *p)
wdp_recv_wait(p->wdp);
wdp_port_poll_wait(p->wdp);
- LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
+ LIST_FOR_EACH (ofconn, node, &p->all_conns) {
ofconn_wait(ofconn);
}
if (p->in_band) {
@@ -1052,7 +1047,7 @@ ofproto_wait(struct ofproto *p)
if (p->sflow) {
ofproto_sflow_wait(p->sflow);
}
- HMAP_FOR_EACH (ofservice, struct ofservice, node, &p->services) {
+ HMAP_FOR_EACH (ofservice, node, &p->services) {
pvconn_wait(ofservice->pvconn);
}
for (i = 0; i < p->n_snoops; i++) {
@@ -1336,8 +1331,8 @@ ofservice_lookup(struct ofproto *ofproto, const char *target)
{
struct ofservice *ofservice;
- HMAP_FOR_EACH_WITH_HASH (ofservice, struct ofservice, node,
- hash_string(target, 0), &ofproto->services) {
+ HMAP_FOR_EACH_WITH_HASH (ofservice, node, hash_string(target, 0),
+ &ofproto->services) {
if (!strcmp(pvconn_get_name(ofservice->pvconn), target)) {
return ofservice;
}
@@ -1945,8 +1940,8 @@ handle_aggregate_stats_request(struct ofproto *p, struct ofconn *ofconn,
struct queue_stats_cbdata {
struct ofconn *ofconn;
+ struct wdp_port *wdp_port;
struct ofpbuf *msg;
- uint16_t port_no;
};
static void
@@ -1956,7 +1951,7 @@ put_queue_stats(struct queue_stats_cbdata *cbdata, uint32_t queue_id,
struct ofp_queue_stats *reply;
reply = append_stats_reply(sizeof *reply, cbdata->ofconn, &cbdata->msg);
- reply->port_no = htons(cbdata->port_no);
+ reply->port_no = htons(cbdata->wdp_port->opp.port_no);
memset(reply->pad, 0, sizeof reply->pad);
reply->queue_id = htonl(queue_id);
reply->tx_bytes = htonll(stats->tx_bytes);
@@ -1978,15 +1973,16 @@ static void
handle_queue_stats_for_port(struct wdp_port *port, uint32_t queue_id,
struct queue_stats_cbdata *cbdata)
{
- cbdata->port_no = port->opp.port_no;
+ cbdata->wdp_port = port;
if (queue_id == OFPQ_ALL) {
netdev_dump_queue_stats(port->netdev,
handle_queue_stats_dump_cb, cbdata);
} else {
struct netdev_queue_stats stats;
- netdev_get_queue_stats(port->netdev, queue_id, &stats);
- put_queue_stats(cbdata, queue_id, &stats);
+ if (!netdev_get_queue_stats(port->netdev, queue_id, &stats)) {
+ put_queue_stats(cbdata, queue_id, &stats);
+ }
}
}
@@ -2516,8 +2512,7 @@ handle_role_request(struct ofproto *ofproto,
if (role == NX_ROLE_MASTER) {
struct ofconn *other;
- HMAP_FOR_EACH (other, struct ofconn, hmap_node,
- &ofproto->controllers) {
+ HMAP_FOR_EACH (other, hmap_node, &ofproto->controllers) {
if (other->role == NX_ROLE_MASTER) {
other->role = NX_ROLE_SLAVE;
}
@@ -2791,7 +2786,7 @@ delete_flow(struct ofproto *p, struct wdp_rule *rule, uint8_t reason)
ofr->packet_count = htonll(stats.n_packets);
ofr->byte_count = htonll(stats.n_bytes);
- LIST_FOR_EACH (ofconn, struct ofconn, node, &p->all_conns) {
+ LIST_FOR_EACH (ofconn, node, &p->all_conns) {
if (rconn_is_connected(ofconn->rconn)) {
if (prev) {
queue_tx(ofpbuf_clone(buf), prev, prev->reply_counter);
@@ -2931,7 +2926,7 @@ send_packet_in(struct ofproto *ofproto, struct wdp_packet *packet)
max_len = do_convert_to_packet_in(packet);
prev = NULL;
- LIST_FOR_EACH (ofconn, struct ofconn, node, &ofproto->all_conns) {
+ LIST_FOR_EACH (ofconn, node, &ofproto->all_conns) {
if (ofconn_receives_async_msgs(ofconn)) {
if (prev) {
schedule_packet_in(prev, packet, max_len, true);
diff --git a/ofproto/pktbuf.c b/ofproto/pktbuf.c
index 7e5981d5c..f28111b90 100644
--- a/ofproto/pktbuf.c
+++ b/ofproto/pktbuf.c
@@ -113,8 +113,8 @@ pktbuf_save(struct pktbuf *pb, struct ofpbuf *buffer, uint16_t in_port)
if (++p->cookie >= COOKIE_MAX) {
p->cookie = 0;
}
- p->buffer = ofpbuf_new(sizeof(struct ofp_packet_in) + buffer->size);
- ofpbuf_reserve(p->buffer, sizeof(struct ofp_packet_in));
+ p->buffer = ofpbuf_new_with_headroom(buffer->size,
+ sizeof(struct ofp_packet_in));
ofpbuf_put(p->buffer, buffer->data, buffer->size);
p->timeout = time_msec() + OVERWRITE_MSECS;
p->in_port = in_port;
diff --git a/ofproto/status.c b/ofproto/status.c
index 0df9ce3d9..e4834d84b 100644
--- a/ofproto/status.c
+++ b/ofproto/status.c
@@ -70,7 +70,7 @@ switch_status_handle_request(struct switch_status *ss, struct rconn *rconn,
sr.request.string = (void *) (request + 1);
sr.request.length = ntohs(request->header.length) - sizeof *request;
ds_init(&sr.output);
- LIST_FOR_EACH (c, struct status_category, node, &ss->categories) {
+ LIST_FOR_EACH (c, node, &ss->categories) {
if (!memcmp(c->name, sr.request.string,
MIN(strlen(c->name), sr.request.length))) {
sr.category = c;
@@ -170,8 +170,7 @@ switch_status_destroy(struct switch_status *ss)
/* Orphan any remaining categories, so that unregistering them later
* won't write to bad memory. */
struct status_category *c, *next;
- LIST_FOR_EACH_SAFE (c, next,
- struct status_category, node, &ss->categories) {
+ LIST_FOR_EACH_SAFE (c, next, node, &ss->categories) {
list_init(&c->node);
}
switch_status_unregister(ss->config_cat);
diff --git a/ofproto/wdp-xflow.c b/ofproto/wdp-xflow.c
index ce7ff2f1c..26830e666 100644
--- a/ofproto/wdp-xflow.c
+++ b/ofproto/wdp-xflow.c
@@ -59,14 +59,19 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
#define WX_MAX_WILD 65536 /* Wildcarded rules. */
#define WX_MAX_EXACT 1048576 /* Exact-match rules. */
+struct wx_port {
+ struct hmap_node hmap_node;
+ struct wdp_port wdp_port;
+ uint16_t xflow_port;
+};
+
struct wx {
struct list list_node;
struct wdp wdp;
struct xfif *xfif;
struct classifier cls;
struct netdev_monitor *netdev_monitor;
- struct port_array ports; /* Index is xflow port nr;
- * wdp_port->opp.port_no is OFP port nr. */
+ struct hmap ports; /* Contains "struct wx_port"s. */
struct shash port_by_name;
long long int next_expiration;
int wdp_listen_mask;
@@ -95,6 +100,7 @@ static const struct ofhooks default_ofhooks;
static struct list all_wx = LIST_INITIALIZER(&all_wx);
static int wx_port_init(struct wx *);
+static struct wx_port *wx_port_get(const struct wx *, uint16_t xflow_port);
static void wx_port_process_change(struct wx *wx, int error, char *devname,
wdp_port_poll_cb_func *cb, void *aux);
static void wx_port_refresh_groups(struct wx *);
@@ -334,7 +340,7 @@ wx_rule_destroy(struct wx *wx, struct wx_rule *rule)
{
if (!rule->super) {
struct wx_rule *subrule, *next;
- LIST_FOR_EACH_SAFE (subrule, next, struct wx_rule, list, &rule->list) {
+ LIST_FOR_EACH_SAFE (subrule, next, list, &rule->list) {
wx_rule_revalidate(wx, subrule);
}
} else {
@@ -700,10 +706,10 @@ static void do_xlate_actions(const union ofp_action *in, size_t n_in,
static void
add_output_action(struct wx_xlate_ctx *ctx, uint16_t port)
{
- const struct wdp_port *wdp_port = port_array_get(&ctx->wx->ports, port);
+ const struct wx_port *wx_port = wx_port_get(ctx->wx, port);
- if (wdp_port) {
- if (wdp_port->opp.config & OFPPC_NO_FWD) {
+ if (wx_port) {
+ if (wx_port->wdp_port.opp.config & OFPPC_NO_FWD) {
/* Forwarding disabled on port. */
return;
}
@@ -881,11 +887,32 @@ xlate_enqueue_action(struct wx_xlate_ctx *ctx,
}
static void
+xlate_set_queue_action(struct wx_xlate_ctx *ctx,
+ const struct nx_action_set_queue *nasq)
+{
+ uint32_t priority;
+ int error;
+
+ error = xfif_queue_to_priority(ctx->wx->xfif, ntohl(nasq->queue_id),
+ &priority);
+ if (error) {
+ /* Couldn't translate queue to a priority, so ignore. A warning
+ * has already been logged. */
+ return;
+ }
+
+ remove_pop_action(ctx);
+ xflow_actions_add(ctx->out, XFLOWAT_SET_PRIORITY)->priority.priority
+ = priority;
+}
+
+static void
xlate_nicira_action(struct wx_xlate_ctx *ctx,
const struct nx_action_header *nah)
{
const struct nx_action_resubmit *nar;
const struct nx_action_set_tunnel *nast;
+ const struct nx_action_set_queue *nasq;
union xflow_action *oa;
int subtype = ntohs(nah->subtype);
@@ -908,6 +935,15 @@ xlate_nicira_action(struct wx_xlate_ctx *ctx,
}
break;
+ case NXAST_SET_QUEUE:
+ nasq = (const struct nx_action_set_queue *) nah;
+ xlate_set_queue_action(ctx, nasq);
+ break;
+
+ case NXAST_POP_QUEUE:
+ xflow_actions_add(ctx->out, XFLOWAT_POP_PRIORITY);
+ break;
+
/* If you add a new action here that modifies flow data, don't forget to
* update the flow key in ctx->flow at the same time. */
@@ -923,14 +959,17 @@ do_xlate_actions(const union ofp_action *in, size_t n_in,
{
struct actions_iterator iter;
const union ofp_action *ia;
- const struct wdp_port *port;
+ const struct wx_port *port;
- port = port_array_get(&ctx->wx->ports, ctx->flow.in_port);
- if (port && port->opp.config & (OFPPC_NO_RECV | OFPPC_NO_RECV_STP) &&
- port->opp.config & (eth_addr_equals(ctx->flow.dl_dst, eth_addr_stp)
- ? OFPPC_NO_RECV_STP : OFPPC_NO_RECV)) {
- /* Drop this flow. */
- return;
+ port = wx_port_get(ctx->wx, ctx->flow.in_port);
+ if (port) {
+ const struct ofp_phy_port *opp = &port->wdp_port.opp;
+ if (opp->config & (OFPPC_NO_RECV | OFPPC_NO_RECV_STP) &&
+ opp->config & (eth_addr_equals(ctx->flow.dl_dst, eth_addr_stp)
+ ? OFPPC_NO_RECV_STP : OFPPC_NO_RECV)) {
+ /* Drop this flow. */
+ return;
+ }
}
for (ia = actions_first(&iter, in, n_in); ia; ia = actions_next(&iter)) {
@@ -1165,7 +1204,7 @@ expire_rule(struct cls_rule *cls_rule, void *wx_)
* due to an idle timeout. */
if (rule->wr.cr.flow.wildcards) {
struct wx_rule *subrule, *next;
- LIST_FOR_EACH_SAFE (subrule, next, struct wx_rule, list, &rule->list) {
+ LIST_FOR_EACH_SAFE (subrule, next, list, &rule->list) {
wx_rule_remove(wx, subrule);
}
} else {
@@ -1264,7 +1303,7 @@ wx_run(void)
{
struct wx *wx;
- LIST_FOR_EACH (wx, struct wx, list_node, &all_wx) {
+ LIST_FOR_EACH (wx, list_node, &all_wx) {
wx_run_one(wx);
}
xf_run();
@@ -1285,7 +1324,7 @@ wx_wait(void)
{
struct wx *wx;
- LIST_FOR_EACH (wx, struct wx, list_node, &all_wx) {
+ LIST_FOR_EACH (wx, list_node, &all_wx) {
wx_wait_one(wx);
}
xf_wait();
@@ -1321,7 +1360,7 @@ wx_open(const struct wdp_class *wdp_class, const char *name, bool create,
wx->xfif = xfif;
classifier_init(&wx->cls);
wx->netdev_monitor = netdev_monitor_create();
- port_array_init(&wx->ports);
+ hmap_init(&wx->ports);
shash_init(&wx->port_by_name);
wx->next_expiration = time_msec() + 1000;
tag_set_init(&wx->revalidate_set);
@@ -1351,6 +1390,8 @@ wx_close(struct wdp *wdp)
netdev_monitor_destroy(wx->netdev_monitor);
list_remove(&wx->list_node);
mac_learning_destroy(wx->ml);
+ hmap_destroy(&wx->ports);
+ shash_destroy(&wx->port_by_name);
free(wx);
}
@@ -1376,8 +1417,7 @@ wx_get_features(const struct wdp *wdp, struct ofpbuf **featuresp)
struct wx *wx = wx_cast(wdp);
struct ofp_switch_features *osf;
struct ofpbuf *buf;
- unsigned int port_no;
- struct wdp_port *port;
+ struct wx_port *port;
buf = ofpbuf_new(sizeof *osf);
osf = ofpbuf_put_zeros(buf, sizeof *osf);
@@ -1396,8 +1436,9 @@ wx_get_features(const struct wdp *wdp, struct ofpbuf **featuresp)
(1u << OFPAT_SET_TP_DST) |
(1u << OFPAT_ENQUEUE));
- PORT_ARRAY_FOR_EACH (port, &wx->ports, port_no) {
- hton_ofp_phy_port(ofpbuf_put(buf, &port->opp, sizeof port->opp));
+ HMAP_FOR_EACH (port, hmap_node, &wx->ports) {
+ const struct ofp_phy_port *opp = &port->wdp_port.opp;
+ hton_ofp_phy_port(ofpbuf_put(buf, opp, sizeof *opp));
}
*featuresp = buf;
@@ -1498,10 +1539,10 @@ wx_port_del(struct wdp *wdp, uint16_t port_no)
}
static int
-wx_answer_port_query(const struct wdp_port *port, struct wdp_port *portp)
+wx_answer_port_query(const struct wx_port *port, struct wdp_port *portp)
{
if (port) {
- wdp_port_copy(portp, port);
+ wdp_port_copy(portp, &port->wdp_port);
return 0;
} else {
return ENOENT;
@@ -1513,10 +1554,9 @@ wx_port_query_by_number(const struct wdp *wdp, uint16_t port_no,
struct wdp_port *portp)
{
struct wx *wx = wx_cast(wdp);
- const struct wdp_port *port;
+ struct wx_port *wx_port = wx_port_get(wx, ofp_port_to_xflow_port(port_no));
- port = port_array_get(&wx->ports, ofp_port_to_xflow_port(port_no));
- return wx_answer_port_query(port, portp);
+ return wx_answer_port_query(wx_port, portp);
}
static int
@@ -1533,42 +1573,46 @@ static int
wx_port_set_config(struct wdp *wdp, uint16_t port_no, uint32_t config)
{
struct wx *wx = wx_cast(wdp);
- struct wdp_port *port;
+ struct wx_port *port;
+ struct ofp_phy_port *opp;
uint32_t changes;
- port = port_array_get(&wx->ports, ofp_port_to_xflow_port(port_no));
+ port = wx_port_get(wx, ofp_port_to_xflow_port(port_no));
if (!port) {
return ENOENT;
}
- changes = config ^ port->opp.config;
+ opp = &port->wdp_port.opp;
+ changes = config ^ opp->config;
if (changes & OFPPC_PORT_DOWN) {
+ struct netdev *netdev = port->wdp_port.netdev;
int error;
+
if (config & OFPPC_PORT_DOWN) {
- error = netdev_turn_flags_off(port->netdev, NETDEV_UP, true);
+ error = netdev_turn_flags_off(netdev, NETDEV_UP, true);
} else {
- error = netdev_turn_flags_on(port->netdev, NETDEV_UP, true);
+ error = netdev_turn_flags_on(netdev, NETDEV_UP, true);
}
if (!error) {
- port->opp.config ^= OFPPC_PORT_DOWN;
+ opp->config ^= OFPPC_PORT_DOWN;
}
}
#define REVALIDATE_BITS (OFPPC_NO_RECV | OFPPC_NO_RECV_STP | OFPPC_NO_FWD)
if (changes & REVALIDATE_BITS) {
COVERAGE_INC(wx_costly_flags);
- port->opp.config ^= changes & REVALIDATE_BITS;
+ opp->config ^= changes & REVALIDATE_BITS;
wx->need_revalidate = true;
}
#undef REVALIDATE_BITS
if (changes & OFPPC_NO_FLOOD) {
- port->opp.config ^= OFPPC_NO_FLOOD;
+ opp->config ^= OFPPC_NO_FLOOD;
wx_port_refresh_groups(wx);
}
if (changes & OFPPC_NO_PACKET_IN) {
- port->opp.config ^= OFPPC_NO_PACKET_IN;
+ opp->config ^= OFPPC_NO_PACKET_IN;
}
return 0;
@@ -1578,15 +1622,15 @@ static int
wx_port_list(const struct wdp *wdp, struct wdp_port **portsp, size_t *n_portsp)
{
struct wx *wx = wx_cast(wdp);
- struct wdp_port *ports, *port;
- unsigned int port_no;
+ struct wdp_port *ports;
+ struct wx_port *port;
size_t n_ports, i;
- *n_portsp = n_ports = port_array_count(&wx->ports);
+ *n_portsp = n_ports = hmap_count(&wx->ports);
*portsp = ports = xmalloc(n_ports * sizeof *ports);
i = 0;
- PORT_ARRAY_FOR_EACH (port, &wx->ports, port_no) {
- wdp_port_copy(&ports[i++], port);
+ HMAP_FOR_EACH (port, hmap_node, &wx->ports) {
+ wdp_port_copy(&ports[i++], &port->wdp_port);
}
assert(i == n_ports);
@@ -1732,7 +1776,7 @@ query_stats(struct wx *wx, struct wx_rule *rule, struct wdp_flow_stats *stats)
xflow_flows = xzalloc(n_xflow_flows * sizeof *xflow_flows);
if (rule->wr.cr.flow.wildcards) {
size_t i = 0;
- LIST_FOR_EACH (subrule, struct wx_rule, list, &rule->list) {
+ LIST_FOR_EACH (subrule, list, &rule->list) {
xflow_key_from_flow(&xflow_flows[i++].key, &subrule->wr.cr.flow);
stats->n_packets += subrule->packet_count;
stats->n_bytes += subrule->byte_count;
@@ -2012,8 +2056,8 @@ wx_translate_xflow_msg(struct xflow_msg *msg, struct ofpbuf *payload,
static const uint8_t *
get_local_mac(const struct wx *wx)
{
- const struct wdp_port *port = port_array_get(&wx->ports, XFLOWP_LOCAL);
- return port ? port->opp.hw_addr : NULL;
+ const struct wx_port *port = wx_port_get(wx, XFLOWP_LOCAL);
+ return port ? port->wdp_port.opp.hw_addr : NULL;
}
/* Returns true if 'packet' is a DHCP reply to the local port. Such a reply
@@ -2150,8 +2194,7 @@ wx_purge_ctl_packets__(struct wx *wx)
{
struct wdp_packet *this, *next;
- LIST_FOR_EACH_SAFE (this, next, struct wdp_packet, list,
- &wx->ctl_packets) {
+ LIST_FOR_EACH_SAFE (this, next, list, &wx->ctl_packets) {
list_remove(&this->list);
ofpbuf_delete(this->payload);
free(this);
@@ -2257,16 +2300,16 @@ wx_port_refresh_group(struct wx *wx, unsigned int group)
{
uint16_t *ports;
size_t n_ports;
- struct wdp_port *port;
- unsigned int port_no;
+ struct wx_port *port;
assert(group == WX_GROUP_ALL || group == WX_GROUP_FLOOD);
- ports = xmalloc(port_array_count(&wx->ports) * sizeof *ports);
+ ports = xmalloc(hmap_count(&wx->ports) * sizeof *ports);
n_ports = 0;
- PORT_ARRAY_FOR_EACH (port, &wx->ports, port_no) {
- if (group == WX_GROUP_ALL || !(port->opp.config & OFPPC_NO_FLOOD)) {
- ports[n_ports++] = port_no;
+ HMAP_FOR_EACH (port, hmap_node, &wx->ports) {
+ const struct ofp_phy_port *opp = &port->wdp_port.opp;
+ if (group == WX_GROUP_ALL || !(opp->config & OFPPC_NO_FLOOD)) {
+ ports[n_ports++] = port->xflow_port;
}
}
xfif_port_group_set(wx->xfif, group, ports, n_ports);
@@ -2286,15 +2329,14 @@ static void
wx_port_reinit(struct wx *wx, wdp_port_poll_cb_func *cb, void *aux)
{
struct svec devnames;
- struct wdp_port *wdp_port;
- unsigned int port_no;
+ struct wx_port *wx_port;
struct xflow_port *xflow_ports;
size_t n_xflow_ports;
size_t i;
svec_init(&devnames);
- PORT_ARRAY_FOR_EACH (wdp_port, &wx->ports, port_no) {
- svec_add (&devnames, (char *) wdp_port->opp.name);
+ HMAP_FOR_EACH (wx_port, hmap_node, &wx->ports) {
+ svec_add (&devnames, (char *) wx_port->wdp_port.opp.name);
}
xfif_port_list(wx->xfif, &xflow_ports, &n_xflow_ports);
for (i = 0; i < n_xflow_ports; i++) {
@@ -2311,11 +2353,12 @@ wx_port_reinit(struct wx *wx, wdp_port_poll_cb_func *cb, void *aux)
wx_port_refresh_groups(wx);
}
-static struct wdp_port *
-make_wdp_port(const struct xflow_port *xflow_port)
+static struct wx_port *
+make_wx_port(const struct xflow_port *xflow_port)
{
struct netdev_options netdev_options;
enum netdev_flags flags;
+ struct wx_port *wx_port;
struct wdp_port *wdp_port;
struct netdev *netdev;
bool carrier;
@@ -2334,7 +2377,9 @@ make_wdp_port(const struct xflow_port *xflow_port)
return NULL;
}
- wdp_port = xmalloc(sizeof *wdp_port);
+ wx_port = xmalloc(sizeof *wx_port);
+ wx_port->xflow_port = xflow_port->port;
+ wdp_port = &wx_port->wdp_port;
wdp_port->netdev = netdev;
wdp_port->opp.port_no = xflow_port_to_ofp_port(xflow_port->port);
netdev_get_etheraddr(netdev, wdp_port->opp.hw_addr);
@@ -2354,13 +2399,13 @@ make_wdp_port(const struct xflow_port *xflow_port)
wdp_port->devname = xstrdup(xflow_port->devname);
wdp_port->internal = (xflow_port->flags & XFLOW_PORT_INTERNAL) != 0;
- return wdp_port;
+ return wx_port;
}
static bool
wx_port_conflicts(const struct wx *wx, const struct xflow_port *xflow_port)
{
- if (port_array_get(&wx->ports, xflow_port->port)) {
+ if (wx_port_get(wx, xflow_port->port)) {
VLOG_WARN_RL(&rl, "ignoring duplicate port %"PRIu16" in datapath",
xflow_port->port);
return true;
@@ -2374,10 +2419,10 @@ wx_port_conflicts(const struct wx *wx, const struct xflow_port *xflow_port)
}
static int
-wdp_port_equal(const struct wdp_port *a_, const struct wdp_port *b_)
+wx_port_equal(const struct wx_port *a_, const struct wx_port *b_)
{
- const struct ofp_phy_port *a = &a_->opp;
- const struct ofp_phy_port *b = &b_->opp;
+ const struct ofp_phy_port *a = &a_->wdp_port.opp;
+ const struct ofp_phy_port *b = &b_->wdp_port.opp;
BUILD_ASSERT_DECL(sizeof *a == 48); /* Detect ofp_phy_port changes. */
return (a->port_no == b->port_no
@@ -2392,32 +2437,35 @@ wdp_port_equal(const struct wdp_port *a_, const struct wdp_port *b_)
}
static void
-wx_port_install(struct wx *wx, struct wdp_port *wdp_port)
+wx_port_install(struct wx *wx, struct wx_port *wx_port)
{
- uint16_t xflow_port = ofp_port_to_xflow_port(wdp_port->opp.port_no);
- const char *netdev_name = (const char *) wdp_port->opp.name;
+ const struct ofp_phy_port *opp = &wx_port->wdp_port.opp;
+ uint16_t xflow_port = ofp_port_to_xflow_port(opp->port_no);
+ const char *name = (const char *) opp->name;
- netdev_monitor_add(wx->netdev_monitor, wdp_port->netdev);
- port_array_set(&wx->ports, xflow_port, wdp_port);
- shash_add(&wx->port_by_name, netdev_name, wdp_port);
+ netdev_monitor_add(wx->netdev_monitor, wx_port->wdp_port.netdev);
+ hmap_insert(&wx->ports, &wx_port->hmap_node, hash_int(xflow_port, 0));
+ shash_add(&wx->port_by_name, name, wx_port);
}
static void
-wx_port_remove(struct wx *wx, struct wdp_port *wdp_port)
+wx_port_remove(struct wx *wx, struct wx_port *wx_port)
{
- uint16_t xflow_port = ofp_port_to_xflow_port(wdp_port->opp.port_no);
+ const struct ofp_phy_port *opp = &wx_port->wdp_port.opp;
+ const char *name = (const char *) opp->name;
- netdev_monitor_remove(wx->netdev_monitor, wdp_port->netdev);
- port_array_delete(&wx->ports, xflow_port);
- shash_delete(&wx->port_by_name,
- shash_find(&wx->port_by_name, (char *) wdp_port->opp.name));
+ netdev_monitor_remove(wx->netdev_monitor, wx_port->wdp_port.netdev);
+ hmap_remove(&wx->ports, &wx_port->hmap_node);
+ shash_delete(&wx->port_by_name, shash_find(&wx->port_by_name, name));
}
static void
-wx_port_free(struct wdp_port *wdp_port)
+wx_port_free(struct wx_port *wx_port)
{
- wdp_port_free(wdp_port);
- free(wdp_port);
+ if (wx_port) {
+ wdp_port_free(&wx_port->wdp_port);
+ free(wx_port);
+ }
}
static void
@@ -2425,8 +2473,8 @@ wx_port_update(struct wx *wx, const char *devname,
wdp_port_poll_cb_func *cb, void *aux)
{
struct xflow_port xflow_port;
- struct wdp_port *old_wdp_port;
- struct wdp_port *new_wdp_port;
+ struct wx_port *old_wx_port;
+ struct wx_port *new_wx_port;
int error;
COVERAGE_INC(wx_update_port);
@@ -2434,10 +2482,10 @@ wx_port_update(struct wx *wx, const char *devname,
/* Query the datapath for port information. */
error = xfif_port_query_by_name(wx->xfif, devname, &xflow_port);
- /* Find the old wdp_port. */
- old_wdp_port = shash_find_data(&wx->port_by_name, devname);
+ /* Find the old wx_port. */
+ old_wx_port = shash_find_data(&wx->port_by_name, devname);
if (!error) {
- if (!old_wdp_port) {
+ if (!old_wx_port) {
/* There's no port named 'devname' but there might be a port with
* the same port number. This could happen if a port is deleted
* and then a new one added in its place very quickly, or if a port
@@ -2448,7 +2496,7 @@ wx_port_update(struct wx *wx, const char *devname,
* reliably but more portably by comparing the old port's MAC
* against the new port's MAC. However, this code isn't that smart
* and always sends an OFPPR_MODIFY (XXX). */
- old_wdp_port = port_array_get(&wx->ports, xflow_port.port);
+ old_wx_port = wx_port_get(wx, xflow_port.port);
}
} else if (error != ENOENT && error != ENODEV) {
VLOG_WARN_RL(&rl, "xfif_port_query_by_name returned unexpected error "
@@ -2456,48 +2504,50 @@ wx_port_update(struct wx *wx, const char *devname,
return;
}
- /* Create a new wdp_port. */
- new_wdp_port = !error ? make_wdp_port(&xflow_port) : NULL;
+ /* Create a new wx_port. */
+ new_wx_port = !error ? make_wx_port(&xflow_port) : NULL;
/* Eliminate a few pathological cases. */
- if (!old_wdp_port && !new_wdp_port) {
+ if (!old_wx_port && !new_wx_port) {
return;
- } else if (old_wdp_port && new_wdp_port) {
+ } else if (old_wx_port && new_wx_port) {
/* Most of the 'config' bits are OpenFlow soft state, but
* OFPPC_PORT_DOWN is maintained by the kernel. So transfer the
- * OpenFlow bits from old_wdp_port. (make_wdp_port() only sets
+ * OpenFlow bits from old_wx_port. (make_wx_port() only sets
* OFPPC_PORT_DOWN and leaves the other bits 0.) */
- new_wdp_port->opp.config |= old_wdp_port->opp.config & ~OFPPC_PORT_DOWN;
+ struct ofp_phy_port *new_opp = &new_wx_port->wdp_port.opp;
+ struct ofp_phy_port *old_opp = &old_wx_port->wdp_port.opp;
+ new_opp->config |= old_opp->config & ~OFPPC_PORT_DOWN;
- if (wdp_port_equal(old_wdp_port, new_wdp_port)) {
+ if (wx_port_equal(old_wx_port, new_wx_port)) {
/* False alarm--no change. */
- wx_port_free(new_wdp_port);
+ wx_port_free(new_wx_port);
return;
}
}
/* Now deal with the normal cases. */
- if (old_wdp_port) {
- wx_port_remove(wx, old_wdp_port);
+ if (old_wx_port) {
+ wx_port_remove(wx, old_wx_port);
}
- if (new_wdp_port) {
- wx_port_install(wx, new_wdp_port);
+ if (new_wx_port) {
+ wx_port_install(wx, new_wx_port);
}
/* Call back. */
- if (!old_wdp_port) {
- (*cb)(&new_wdp_port->opp, OFPPR_ADD, aux);
- } else if (!new_wdp_port) {
- (*cb)(&old_wdp_port->opp, OFPPR_DELETE, aux);
+ if (!old_wx_port) {
+ (*cb)(&new_wx_port->wdp_port.opp, OFPPR_ADD, aux);
+ } else if (!new_wx_port) {
+ (*cb)(&old_wx_port->wdp_port.opp, OFPPR_DELETE, aux);
} else {
- (*cb)(&new_wdp_port->opp, OFPPR_MODIFY, aux);
+ (*cb)(&new_wx_port->wdp_port.opp, OFPPR_MODIFY, aux);
}
/* Update port groups. */
wx_port_refresh_groups(wx);
/* Clean up. */
- wx_port_free(old_wdp_port);
+ wx_port_free(old_wx_port);
}
static int
@@ -2516,9 +2566,9 @@ wx_port_init(struct wx *wx)
for (i = 0; i < n_ports; i++) {
const struct xflow_port *xflow_port = &ports[i];
if (!wx_port_conflicts(wx, xflow_port)) {
- struct wdp_port *wdp_port = make_wdp_port(xflow_port);
- if (wdp_port) {
- wx_port_install(wx, wdp_port);
+ struct wx_port *wx_port = make_wx_port(xflow_port);
+ if (wx_port) {
+ wx_port_install(wx, wx_port);
}
}
}
@@ -2526,6 +2576,21 @@ wx_port_init(struct wx *wx)
wx_port_refresh_groups(wx);
return 0;
}
+
+/* Returns the port in 'wx' with xflow port number 'xflow_port'. */
+static struct wx_port *
+wx_port_get(const struct wx *wx, uint16_t xflow_port)
+{
+ struct wx_port *port;
+
+ HMAP_FOR_EACH_IN_BUCKET (port, hmap_node, hash_int(xflow_port, 0),
+ &wx->ports) {
+ if (port->xflow_port == xflow_port) {
+ return port;
+ }
+ }
+ return NULL;
+}
void
wdp_xflow_register(void)
diff --git a/ovsdb/execution.c b/ovsdb/execution.c
index 5b6762f07..a96abfcaf 100644
--- a/ovsdb/execution.c
+++ b/ovsdb/execution.c
@@ -103,8 +103,6 @@ ovsdb_execute(struct ovsdb *db, const struct json *params,
|| !params->u.array.n
|| params->u.array.elems[0]->type != JSON_STRING
|| strcmp(params->u.array.elems[0]->u.string, db->schema->name)) {
- struct ovsdb_error *error;
-
if (params->type != JSON_ARRAY) {
error = ovsdb_syntax_error(params, NULL, "array expected");
} else {
@@ -629,7 +627,6 @@ ovsdb_execute_wait(struct ovsdb_execution *x, struct ovsdb_parser *parser,
/* Parse "rows" into 'expected'. */
ovsdb_row_hash_init(&expected, &columns);
for (i = 0; i < rows->u.array.n; i++) {
- struct ovsdb_error *error;
struct ovsdb_row *row;
row = ovsdb_row_create(table);
diff --git a/ovsdb/file.c b/ovsdb/file.c
index 846f55668..ddb443a11 100644
--- a/ovsdb/file.c
+++ b/ovsdb/file.c
@@ -427,7 +427,7 @@ ovsdb_file_save_copy__(const char *file_name, int locking,
const struct ovsdb_table *table = node->data;
const struct ovsdb_row *row;
- HMAP_FOR_EACH (row, struct ovsdb_row, hmap_node, &table->rows) {
+ HMAP_FOR_EACH (row, hmap_node, &table->rows) {
ovsdb_file_txn_add_row(&ftxn, NULL, row, NULL);
}
}
diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c
index 71a44899e..d58f9dc17 100644
--- a/ovsdb/jsonrpc-server.c
+++ b/ovsdb/jsonrpc-server.c
@@ -323,8 +323,7 @@ ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *remote)
{
struct ovsdb_jsonrpc_session *s, *next;
- LIST_FOR_EACH_SAFE (s, next, struct ovsdb_jsonrpc_session, node,
- &remote->sessions) {
+ LIST_FOR_EACH_SAFE (s, next, node, &remote->sessions) {
int error = ovsdb_jsonrpc_session_run(s);
if (error) {
ovsdb_jsonrpc_session_close(s);
@@ -346,7 +345,7 @@ ovsdb_jsonrpc_session_wait_all(struct ovsdb_jsonrpc_remote *remote)
{
struct ovsdb_jsonrpc_session *s;
- LIST_FOR_EACH (s, struct ovsdb_jsonrpc_session, node, &remote->sessions) {
+ LIST_FOR_EACH (s, node, &remote->sessions) {
ovsdb_jsonrpc_session_wait(s);
}
}
@@ -356,8 +355,7 @@ ovsdb_jsonrpc_session_close_all(struct ovsdb_jsonrpc_remote *remote)
{
struct ovsdb_jsonrpc_session *s, *next;
- LIST_FOR_EACH_SAFE (s, next, struct ovsdb_jsonrpc_session, node,
- &remote->sessions) {
+ LIST_FOR_EACH_SAFE (s, next, node, &remote->sessions) {
ovsdb_jsonrpc_session_close(s);
}
}
@@ -369,8 +367,7 @@ ovsdb_jsonrpc_session_reconnect_all(struct ovsdb_jsonrpc_remote *remote)
{
struct ovsdb_jsonrpc_session *s, *next;
- LIST_FOR_EACH_SAFE (s, next, struct ovsdb_jsonrpc_session, node,
- &remote->sessions) {
+ LIST_FOR_EACH_SAFE (s, next, node, &remote->sessions) {
jsonrpc_session_force_reconnect(s->js);
if (!jsonrpc_session_is_alive(s->js)) {
ovsdb_jsonrpc_session_close(s);
@@ -554,8 +551,7 @@ ovsdb_jsonrpc_trigger_find(struct ovsdb_jsonrpc_session *s,
{
struct ovsdb_jsonrpc_trigger *t;
- HMAP_FOR_EACH_WITH_HASH (t, struct ovsdb_jsonrpc_trigger, hmap_node, hash,
- &s->triggers) {
+ HMAP_FOR_EACH_WITH_HASH (t, hmap_node, hash, &s->triggers) {
if (json_equal(t->id, id)) {
return t;
}
@@ -593,8 +589,7 @@ static void
ovsdb_jsonrpc_trigger_complete_all(struct ovsdb_jsonrpc_session *s)
{
struct ovsdb_jsonrpc_trigger *t, *next;
- HMAP_FOR_EACH_SAFE (t, next, struct ovsdb_jsonrpc_trigger, hmap_node,
- &s->triggers) {
+ HMAP_FOR_EACH_SAFE (t, next, hmap_node, &s->triggers) {
ovsdb_jsonrpc_trigger_complete(t);
}
}
@@ -671,8 +666,7 @@ ovsdb_jsonrpc_monitor_find(struct ovsdb_jsonrpc_session *s,
{
struct ovsdb_jsonrpc_monitor *m;
- HMAP_FOR_EACH_WITH_HASH (m, struct ovsdb_jsonrpc_monitor, node,
- json_hash(monitor_id, 0), &s->monitors) {
+ HMAP_FOR_EACH_WITH_HASH (m, node, json_hash(monitor_id, 0), &s->monitors) {
if (json_equal(m->monitor_id, monitor_id)) {
return m;
}
@@ -919,8 +913,7 @@ ovsdb_jsonrpc_monitor_remove_all(struct ovsdb_jsonrpc_session *s)
{
struct ovsdb_jsonrpc_monitor *m, *next;
- HMAP_FOR_EACH_SAFE (m, next,
- struct ovsdb_jsonrpc_monitor, node, &s->monitors) {
+ HMAP_FOR_EACH_SAFE (m, next, node, &s->monitors) {
ovsdb_remove_replica(s->remote->server->db, &m->replica);
}
}
@@ -1097,8 +1090,7 @@ ovsdb_jsonrpc_monitor_get_initial(const struct ovsdb_jsonrpc_monitor *m)
if (mt->select & OJMS_INITIAL) {
struct ovsdb_row *row;
- HMAP_FOR_EACH (row, struct ovsdb_row, hmap_node,
- &mt->table->rows) {
+ HMAP_FOR_EACH (row, hmap_node, &mt->table->rows) {
ovsdb_jsonrpc_monitor_change_cb(NULL, row, NULL, &aux);
}
}
diff --git a/ovsdb/ovsdb-doc.in b/ovsdb/ovsdb-doc.in
index 90de4521a..9e0a318d2 100755
--- a/ovsdb/ovsdb-doc.in
+++ b/ovsdb/ovsdb-doc.in
@@ -43,7 +43,7 @@ def inlineXmlToNroff(node, font):
if node.nodeType == node.TEXT_NODE:
return textToNroff(node.data, font)
elif node.nodeType == node.ELEMENT_NODE:
- if node.tagName == 'code' or node.tagName == 'em':
+ if node.tagName in ['code', 'em', 'option']:
s = r'\fB'
for child in node.childNodes:
s += inlineXmlToNroff(child, r'\fB')
@@ -76,17 +76,23 @@ def blockXmlToNroff(nodes, para='.PP'):
s += textToNroff(node.data)
s = s.lstrip()
elif node.nodeType == node.ELEMENT_NODE:
- if node.tagName == 'ul':
+ if node.tagName in ['ul', 'ol']:
if s != "":
s += "\n"
s += ".RS\n"
+ i = 0
for liNode in node.childNodes:
if (liNode.nodeType == node.ELEMENT_NODE
and liNode.tagName == 'li'):
- s += ".IP \\(bu\n" + blockXmlToNroff(liNode.childNodes, ".IP")
+ i += 1
+ if node.tagName == 'ul':
+ s += ".IP \\bu\n"
+ else:
+ s += ".IP %d. .25in\n" % i
+ s += blockXmlToNroff(liNode.childNodes, ".IP")
elif (liNode.nodeType != node.TEXT_NODE
or not liNode.data.isspace()):
- raise error.Error("<ul> element may only have <li> children")
+ raise error.Error("<%s> element may only have <li> children" % node.tagName)
s += ".RE\n"
elif node.tagName == 'dl':
if s != "":
diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c
index 27db0702c..e0c9690df 100644
--- a/ovsdb/ovsdb-server.c
+++ b/ovsdb/ovsdb-server.c
@@ -222,7 +222,7 @@ query_db_string(const struct ovsdb *db, const char *name)
parse_db_string_column(db, name, &table, &column);
- HMAP_FOR_EACH (row, struct ovsdb_row, hmap_node, &table->rows) {
+ HMAP_FOR_EACH (row, hmap_node, &table->rows) {
const struct ovsdb_datum *datum;
size_t i;
@@ -248,7 +248,7 @@ query_db_remotes(const char *name, const struct ovsdb *db,
parse_db_string_column(db, name, &table, &column);
- HMAP_FOR_EACH (row, struct ovsdb_row, hmap_node, &table->rows) {
+ HMAP_FOR_EACH (row, hmap_node, &table->rows) {
const struct ovsdb_datum *datum;
size_t i;
diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c
index 0da208590..f4bb701d1 100644
--- a/ovsdb/ovsdb-tool.c
+++ b/ovsdb/ovsdb-tool.c
@@ -110,7 +110,6 @@ usage(void)
" create DB SCHEMA create DB with the given SCHEMA\n"
" compact DB [DST] compact DB in-place (or to DST)\n"
" convert DB SCHEMA [DST] convert DB to SCHEMA (to DST)\n"
- " extract-schema DB print DB's schema on stdout\n"
" query DB TRNS execute read-only transaction on DB\n"
" transact DB TRNS execute read/write transaction on DB\n"
" show-log DB prints information about DB's log entries\n",
diff --git a/ovsdb/query.c b/ovsdb/query.c
index 878ac5b2d..52eda0a7c 100644
--- a/ovsdb/query.c
+++ b/ovsdb/query.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2009 Nicira Networks
+/* Copyright (c) 2009, 2010 Nicira Networks
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -41,8 +41,7 @@ ovsdb_query(struct ovsdb_table *table, const struct ovsdb_condition *cnd,
/* Linear scan. */
const struct ovsdb_row *row, *next;
- HMAP_FOR_EACH_SAFE (row, next, struct ovsdb_row, hmap_node,
- &table->rows) {
+ HMAP_FOR_EACH_SAFE (row, next, hmap_node, &table->rows) {
if (ovsdb_condition_evaluate(row, cnd) && !output_row(row, aux)) {
break;
}
@@ -90,8 +89,7 @@ ovsdb_query_distinct(struct ovsdb_table *table,
ovsdb_row_hash_init(&hash, columns);
ovsdb_query(table, condition, query_distinct_cb, &hash);
- HMAP_FOR_EACH (node, struct ovsdb_row_hash_node, hmap_node,
- &hash.rows) {
+ HMAP_FOR_EACH (node, hmap_node, &hash.rows) {
ovsdb_row_set_add_row(results, node->row);
}
ovsdb_row_hash_destroy(&hash, false);
diff --git a/ovsdb/row.c b/ovsdb/row.c
index 5043cbc04..ba00bb9f3 100644
--- a/ovsdb/row.c
+++ b/ovsdb/row.c
@@ -82,15 +82,13 @@ ovsdb_row_destroy(struct ovsdb_row *row)
struct ovsdb_weak_ref *weak, *next;
const struct shash_node *node;
- LIST_FOR_EACH_SAFE (weak, next, struct ovsdb_weak_ref, dst_node,
- &row->dst_refs) {
+ LIST_FOR_EACH_SAFE (weak, next, dst_node, &row->dst_refs) {
list_remove(&weak->src_node);
list_remove(&weak->dst_node);
free(weak);
}
- LIST_FOR_EACH_SAFE (weak, next, struct ovsdb_weak_ref, src_node,
- &row->src_refs) {
+ LIST_FOR_EACH_SAFE (weak, next, src_node, &row->src_refs) {
list_remove(&weak->src_node);
list_remove(&weak->dst_node);
free(weak);
@@ -326,8 +324,7 @@ ovsdb_row_hash_destroy(struct ovsdb_row_hash *rh, bool destroy_rows)
{
struct ovsdb_row_hash_node *node, *next;
- HMAP_FOR_EACH_SAFE (node, next, struct ovsdb_row_hash_node, hmap_node,
- &rh->rows) {
+ HMAP_FOR_EACH_SAFE (node, next, hmap_node, &rh->rows) {
hmap_remove(&rh->rows, &node->hmap_node);
if (destroy_rows) {
ovsdb_row_destroy((struct ovsdb_row *) node->row);
@@ -360,7 +357,7 @@ ovsdb_row_hash_contains_all(const struct ovsdb_row_hash *a,
struct ovsdb_row_hash_node *node;
assert(ovsdb_column_set_equals(&a->columns, &b->columns));
- HMAP_FOR_EACH (node, struct ovsdb_row_hash_node, hmap_node, &b->rows) {
+ HMAP_FOR_EACH (node, hmap_node, &b->rows) {
if (!ovsdb_row_hash_contains__(a, node->row, node->hmap_node.hash)) {
return false;
}
@@ -380,8 +377,7 @@ ovsdb_row_hash_contains__(const struct ovsdb_row_hash *rh,
const struct ovsdb_row *row, size_t hash)
{
struct ovsdb_row_hash_node *node;
- HMAP_FOR_EACH_WITH_HASH (node, struct ovsdb_row_hash_node, hmap_node,
- hash, &rh->rows) {
+ HMAP_FOR_EACH_WITH_HASH (node, hmap_node, hash, &rh->rows) {
if (ovsdb_row_equal_columns(row, node->row, &rh->columns)) {
return true;
}
diff --git a/ovsdb/table.c b/ovsdb/table.c
index 6a4e7ae2f..5e83683b7 100644
--- a/ovsdb/table.c
+++ b/ovsdb/table.c
@@ -210,8 +210,7 @@ ovsdb_table_destroy(struct ovsdb_table *table)
if (table) {
struct ovsdb_row *row, *next;
- HMAP_FOR_EACH_SAFE (row, next, struct ovsdb_row, hmap_node,
- &table->rows) {
+ HMAP_FOR_EACH_SAFE (row, next, hmap_node, &table->rows) {
ovsdb_row_destroy(row);
}
hmap_destroy(&table->rows);
@@ -226,8 +225,7 @@ ovsdb_table_get_row(const struct ovsdb_table *table, const struct uuid *uuid)
{
struct ovsdb_row *row;
- HMAP_FOR_EACH_WITH_HASH (row, struct ovsdb_row, hmap_node, uuid_hash(uuid),
- &table->rows) {
+ HMAP_FOR_EACH_WITH_HASH (row, hmap_node, uuid_hash(uuid), &table->rows) {
if (uuid_equals(ovsdb_row_get_uuid(row), uuid)) {
return row;
}
diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c
index bfa2fcbad..b26705a3a 100644
--- a/ovsdb/transaction.c
+++ b/ovsdb/transaction.c
@@ -138,7 +138,7 @@ find_txn_row(const struct ovsdb_table *table, const struct uuid *uuid)
return NULL;
}
- HMAP_FOR_EACH_WITH_HASH (txn_row, struct ovsdb_txn_row, hmap_node,
+ HMAP_FOR_EACH_WITH_HASH (txn_row, hmap_node,
uuid_hash(uuid), &table->txn_table->txn_rows) {
const struct ovsdb_row *row;
@@ -315,8 +315,7 @@ assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row)
* that their weak references will get reassessed. */
struct ovsdb_weak_ref *weak, *next;
- LIST_FOR_EACH_SAFE (weak, next, struct ovsdb_weak_ref, dst_node,
- &txn_row->old->dst_refs) {
+ LIST_FOR_EACH_SAFE (weak, next, dst_node, &txn_row->old->dst_refs) {
if (!weak->src->txn_row) {
ovsdb_txn_row_modify(txn, weak->src);
}
@@ -451,7 +450,7 @@ check_max_rows(struct ovsdb_txn *txn)
{
struct ovsdb_txn_table *t;
- LIST_FOR_EACH (t, struct ovsdb_txn_table, node, &txn->txn_tables) {
+ LIST_FOR_EACH (t, node, &txn->txn_tables) {
size_t n_rows = hmap_count(&t->table->rows);
unsigned int max_rows = t->table->schema->max_rows;
@@ -508,7 +507,7 @@ ovsdb_txn_commit(struct ovsdb_txn *txn, bool durable)
}
/* Send the commit to each replica. */
- LIST_FOR_EACH (replica, struct ovsdb_replica, node, &txn->db->replicas) {
+ LIST_FOR_EACH (replica, node, &txn->db->replicas) {
error = (replica->class->commit)(replica, txn, durable);
if (error) {
/* We don't support two-phase commit so only the first replica is
@@ -535,8 +534,8 @@ ovsdb_txn_for_each_change(const struct ovsdb_txn *txn,
struct ovsdb_txn_table *t;
struct ovsdb_txn_row *r;
- LIST_FOR_EACH (t, struct ovsdb_txn_table, node, &txn->txn_tables) {
- HMAP_FOR_EACH (r, struct ovsdb_txn_row, hmap_node, &t->txn_rows) {
+ LIST_FOR_EACH (t, node, &txn->txn_tables) {
+ HMAP_FOR_EACH (r, hmap_node, &t->txn_rows) {
if (!cb(r->old, r->new, r->changed, aux)) {
break;
}
@@ -714,8 +713,7 @@ for_each_txn_row(struct ovsdb_txn *txn,
struct ovsdb_txn_table *t, *next_txn_table;
any_work = false;
- LIST_FOR_EACH_SAFE (t, next_txn_table, struct ovsdb_txn_table, node,
- &txn->txn_tables) {
+ LIST_FOR_EACH_SAFE (t, next_txn_table, node, &txn->txn_tables) {
if (t->serial != serial) {
t->serial = serial;
t->n_processed = 0;
@@ -724,9 +722,7 @@ for_each_txn_row(struct ovsdb_txn *txn,
while (t->n_processed < hmap_count(&t->txn_rows)) {
struct ovsdb_txn_row *r, *next_txn_row;
- HMAP_FOR_EACH_SAFE (r, next_txn_row,
- struct ovsdb_txn_row, hmap_node,
- &t->txn_rows) {
+ HMAP_FOR_EACH_SAFE (r, next_txn_row, hmap_node, &t->txn_rows) {
if (r->serial != serial) {
struct ovsdb_error *error;
diff --git a/ovsdb/trigger.c b/ovsdb/trigger.c
index 47719698c..c222d895c 100644
--- a/ovsdb/trigger.c
+++ b/ovsdb/trigger.c
@@ -73,7 +73,7 @@ ovsdb_trigger_run(struct ovsdb *db, long long int now)
run_triggers = db->run_triggers;
db->run_triggers = false;
- LIST_FOR_EACH_SAFE (t, next, struct ovsdb_trigger, node, &db->triggers) {
+ LIST_FOR_EACH_SAFE (t, next, node, &db->triggers) {
if (run_triggers || now - t->created >= t->timeout_msec) {
ovsdb_trigger_try(db, t, now);
}
@@ -89,7 +89,7 @@ ovsdb_trigger_wait(struct ovsdb *db, long long int now)
long long int deadline = LLONG_MAX;
struct ovsdb_trigger *t;
- LIST_FOR_EACH (t, struct ovsdb_trigger, node, &db->triggers) {
+ LIST_FOR_EACH (t, node, &db->triggers) {
if (t->created < LLONG_MAX - t->timeout_msec) {
long long int t_deadline = t->created + t->timeout_msec;
if (deadline > t_deadline) {
diff --git a/python/ovs/daemon.py b/python/ovs/daemon.py
index a8373cfd0..6dff3a02f 100644
--- a/python/ovs/daemon.py
+++ b/python/ovs/daemon.py
@@ -35,6 +35,10 @@ _detach = False
# --pidfile: Name of pidfile (null if none).
_pidfile = None
+# Our pidfile's inode and device, if we have created one.
+_pidfile_dev = None
+_pidfile_ino = None
+
# --overwrite-pidfile: Create pidfile even if one already exists and is locked?
_overwrite_pidfile = False
@@ -48,6 +52,8 @@ _monitor = False
# File descriptor used by daemonize_start() and daemonize_complete().
_daemonize_fd = None
+RESTART_EXIT_CODE = 5
+
def make_pidfile_name(name):
"""Returns the file name that would be used for a pidfile if 'name' were
provided to set_pidfile()."""
@@ -163,7 +169,7 @@ def _make_pidfile():
logging.error("%s: create failed: %s"
% (tmpfile, os.strerror(e.errno)))
return
-
+
try:
fcntl.lockf(file, fcntl.LOCK_EX | fcntl.LOCK_NB)
except IOError, e:
@@ -191,6 +197,10 @@ def _make_pidfile():
file.close()
return
+ s = os.fstat(file.fileno())
+ _pidfile_dev = s.st_dev
+ _pidfile_ino = s.st_ino
+
def daemonize():
"""If configured with set_pidfile() or set_detach(), creates the pid file
and detaches from the foreground session."""
@@ -258,6 +268,11 @@ def _fork_notify_startup(fd):
os.close(fd)
def _should_restart(status):
+ global RESTART_EXIT_CODE
+
+ if os.WIFEXITED(status) and os.WEXITSTATUS(status) == RESTART_EXIT_CODE:
+ return True
+
if os.WIFSIGNALED(status):
for signame in ("SIGABRT", "SIGALRM", "SIGBUS", "SIGFPE", "SIGILL",
"SIGPIPE", "SIGSEGV", "SIGXCPU", "SIGXFSZ"):
@@ -368,6 +383,19 @@ Daemon options:
def read_pidfile(pidfile):
"""Opens and reads a PID from 'pidfile'. Returns the nonnegative PID if
successful, otherwise a negative errno value."""
+ if _pidfile_dev is not None:
+ try:
+ s = os.stat(pidfile)
+ if s.st_ino == _pidfile_ino and s.st_dev == _pidfile_dev:
+ # It's our own pidfile. We can't afford to open it,
+ # because closing *any* fd for a file that a process
+ # has locked also releases all the locks on that file.
+ #
+ # Fortunately, we know the associated pid anyhow.
+ return os.getpid()
+ except OSError:
+ pass
+
try:
file = open(pidfile, "r")
except IOError, e:
diff --git a/python/ovs/db/types.py b/python/ovs/db/types.py
index 6e7ef11db..d42ac7fe8 100644
--- a/python/ovs/db/types.py
+++ b/python/ovs/db/types.py
@@ -290,14 +290,14 @@ class BaseType(object):
return 'at most %s' % commafy(self.max)
else:
return 'at most %g' % self.max
- elif self.min_length is not None and self.max_length is not None:
+ elif self.min_length != 0 and self.max_length != sys.maxint:
if self.min_length == self.max_length:
return 'exactly %d characters long' % (self.min_length)
else:
return 'between %d and %d characters long' % (self.min_length, self.max_length)
- elif self.min_length is not None:
+ elif self.min_length != 0:
return 'at least %d characters long' % self.min_length
- elif self.max_length is not None:
+ elif self.max_length != sys.maxint:
return 'at most %d characters long' % self.max_length
else:
return ''
diff --git a/python/ovs/poller.py b/python/ovs/poller.py
index 57417c481..2a0b2ecbb 100644
--- a/python/ovs/poller.py
+++ b/python/ovs/poller.py
@@ -15,6 +15,7 @@
import errno
import logging
import select
+import ovs.timeval
class Poller(object):
"""High-level wrapper around the "poll" system call.
@@ -62,15 +63,15 @@ class Poller(object):
self.__timer_wait(msec)
def timer_wait_until(self, msec):
- """Causes the following call to self.block() to wake up when the
- current time, as returned by Time.msec(), reaches 'msec' or later. If
+ """Causes the following call to self.block() to wake up when the current
+ time, as returned by ovs.timeval.msec(), reaches 'msec' or later. If
'msec' is earlier than the current time, the following call to
self.block() will not block at all.
The timer registration is one-shot: only the following call to
self.block() is affected. The timer will need to be re-registered
after self.block() is called if it is to persist."""
- now = Time.msec()
+ now = ovs.timeval.msec()
if msec <= now:
self.immediate_wake()
else:
diff --git a/tests/automake.mk b/tests/automake.mk
index 9fecaaf75..1925f89cb 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -12,6 +12,7 @@ TESTSUITE_AT = \
tests/check-structs.at \
tests/daemon.at \
tests/daemon-py.at \
+ tests/ovs-ofctl.at \
tests/vconn.at \
tests/dir_name.at \
tests/aes128.at \
diff --git a/tests/daemon-py.at b/tests/daemon-py.at
index 7ff376eb7..9a2549c11 100644
--- a/tests/daemon-py.at
+++ b/tests/daemon-py.at
@@ -33,8 +33,8 @@ AT_CHECK([kill -0 `cat pid`], [0], [], [], [kill `cat parent`])
AT_CHECK([ps -o ppid= -p `cat pid` > parentpid],
[0], [], [], [kill `cat parent`])
AT_CHECK(
- [parentpid=`cat parentpid` &&
- parent=`cat parent` &&
+ [parentpid=`cat parentpid` &&
+ parent=`cat parent` &&
test $parentpid = $parent],
[0], [], [], [kill `cat parent`])
# Kill the daemon process, making it look like a segfault,
@@ -50,8 +50,51 @@ AT_CHECK([cp pid newpid], [0], [], [], [kill `cat parent`])
AT_CHECK([ps -o ppid= -p `cat pid` > parentpid],
[0], [], [], [kill `cat parent`])
AT_CHECK(
- [parentpid=`cat parentpid` &&
- parent=`cat parent` &&
+ [parentpid=`cat parentpid` &&
+ parent=`cat parent` &&
+ test $parentpid = $parent],
+ [0], [], [], [kill `cat parent`])
+# Kill the daemon process with SIGTERM, and wait for the daemon
+# and the monitor processes to go away and the pidfile to get deleted.
+AT_CHECK([kill `cat pid`], [0], [], [ignore], [kill `cat parent`])
+OVS_WAIT_WHILE([kill -0 `cat parent` || kill -0 `cat newpid` || test -e pid],
+ [kill `cat parent`])
+AT_CLEANUP
+
+AT_SETUP([daemon --monitor restart exit code - Python])
+AT_SKIP_IF([test $HAVE_PYTHON = no])
+AT_CAPTURE_FILE([pid])
+AT_CAPTURE_FILE([parent])
+AT_CAPTURE_FILE([parentpid])
+AT_CAPTURE_FILE([newpid])
+# Start the daemon and wait for the pidfile to get created.
+AT_CHECK([$PYTHON $srcdir/test-daemon.py --pidfile-name=$PWD/pid --monitor& echo $! > parent], [0], [ignore], [ignore])
+OVS_WAIT_UNTIL([test -s pid], [kill `cat parent`])
+# Check that the pidfile names a running process,
+# and that the parent process of that process is our child process.
+AT_CHECK([kill -0 `cat pid`], [0], [], [], [kill `cat parent`])
+AT_CHECK([ps -o ppid= -p `cat pid` > parentpid],
+ [0], [], [], [kill `cat parent`])
+AT_CHECK(
+ [parentpid=`cat parentpid` &&
+ parent=`cat parent` &&
+ test $parentpid = $parent],
+ [0], [], [], [kill `cat parent`])
+# HUP the daemon process causing it to throw an exception,
+# and wait for a new child process to get spawned.
+AT_CHECK([cp pid oldpid], [0], [], [], [kill `cat parent`])
+AT_CHECK([kill -HUP `cat pid`], [0], [], [ignore], [kill `cat parent`])
+OVS_WAIT_WHILE([kill -0 `cat oldpid`], [kill `cat parent`])
+OVS_WAIT_UNTIL([test -s pid && test `cat pid` != `cat oldpid`],
+ [kill `cat parent`])
+AT_CHECK([cp pid newpid], [0], [], [], [kill `cat parent`])
+# Check that the pidfile names a running process,
+# and that the parent process of that process is our child process.
+AT_CHECK([ps -o ppid= -p `cat pid` > parentpid],
+ [0], [], [], [kill `cat parent`])
+AT_CHECK(
+ [parentpid=`cat parentpid` &&
+ parent=`cat parent` &&
test $parentpid = $parent],
[0], [], [], [kill `cat parent`])
# Kill the daemon process with SIGTERM, and wait for the daemon
@@ -79,7 +122,7 @@ AT_CLEANUP
AT_SETUP([daemon --detach --monitor - Python])
AT_SKIP_IF([test $HAVE_PYTHON = no])
-m4_define([CHECK],
+m4_define([CHECK],
[AT_CHECK([$1], [$2], [$3], [$4], [kill `cat daemon monitor`])])
AT_CAPTURE_FILE([daemon])
AT_CAPTURE_FILE([olddaemon])
diff --git a/tests/interface-reconfigure.at b/tests/interface-reconfigure.at
index 880f4a508..8566102d7 100644
--- a/tests/interface-reconfigure.at
+++ b/tests/interface-reconfigure.at
@@ -802,7 +802,7 @@ Applying changes to /etc/sysconfig/network-scripts/ifcfg-xapi1 configuration
--fake-iface add-bond xapi1 bond0 eth0 eth1
set Port bond0 MAC="00:22:19:22:4b:af" bond_downdelay=200 other-config:"bond-miimon"=100 other-config:"bond-use_carrier"=1 other-config:"bond-mode"="balance-slb" bond_updelay=31000
set Bridge xapi1 other-config:hwaddr="00:22:19:22:4b:af"
- br-set-external-id xapi1 xs-network-uuids 99be2da4-6c33-6f8e-49ea-3bc592fe3c85;45cbbb43-113d-a712-3231-c6463f253cef
+ br-set-external-id xapi1 xs-network-uuids 45cbbb43-113d-a712-3231-c6463f253cef;99be2da4-6c33-6f8e-49ea-3bc592fe3c85
/sbin/ifup xapi1
action_up: bring up bond0
/sbin/ifconfig bond0 up
@@ -883,10 +883,10 @@ Applying changes to /etc/sysconfig/network-scripts/ifcfg-xapi2 configuration
--fake-iface add-bond xapi1 bond0 eth0 eth1
set Port bond0 MAC="00:22:19:22:4b:af" bond_downdelay=200 other-config:"bond-miimon"=100 other-config:"bond-use_carrier"=1 other-config:"bond-mode"="balance-slb" bond_updelay=31000
set Bridge xapi1 other-config:hwaddr="00:22:19:22:4b:af"
- br-set-external-id xapi1 xs-network-uuids 99be2da4-6c33-6f8e-49ea-3bc592fe3c85;45cbbb43-113d-a712-3231-c6463f253cef
+ br-set-external-id xapi1 xs-network-uuids 45cbbb43-113d-a712-3231-c6463f253cef;99be2da4-6c33-6f8e-49ea-3bc592fe3c85
--if-exists del-br xapi2
--may-exist add-br xapi2 xapi1 4
- br-set-external-id xapi2 xs-network-uuids 99be2da4-6c33-6f8e-49ea-3bc592fe3c85;45cbbb43-113d-a712-3231-c6463f253cef
+ br-set-external-id xapi2 xs-network-uuids 45cbbb43-113d-a712-3231-c6463f253cef;99be2da4-6c33-6f8e-49ea-3bc592fe3c85
set Interface xapi2 MAC="00:22:19:22:4b:af"
/sbin/ifup xapi2
action_up: bring up bond0
diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at
new file mode 100644
index 000000000..f6a5cd81e
--- /dev/null
+++ b/tests/ovs-ofctl.at
@@ -0,0 +1,26 @@
+AT_BANNER([ovs-ofctl])
+
+AT_SETUP([ovs-ofctl parse-flows])
+AT_DATA([flows.txt], [
+# comment
+tcp,tp_src=123,actions=flood
+in_port=LOCAL dl_vlan=9 dl_src=00:0A:E4:25:6B:B0 actions=drop
+arp,nw_src=192.168.0.1 actions=drop_spoofed_arp,NORMAL
+udp dl_vlan_pcp=7 idle_timeout=5 actions=strip_vlan output:0
+tcp,nw_src=192.168.0.3,tp_dst=80 actions=set_queue:37,output:1
+udp,nw_src=192.168.0.3,tp_dst=53 actions=pop_queue,output:1
+cookie=0x123456789abcdef hard_timeout=10 priority=60000 actions=controller
+actions=drop
+])
+AT_CHECK([ovs-ofctl parse-flows flows.txt], [0], [stdout])
+AT_CHECK([[sed 's/ (xid=0x[0-9a-fA-F]*)//' stdout]], [0], [dnl
+flow_mod: tcp,tp_src=123, ADD: actions=FLOOD
+flow_mod: in_port=65534,dl_vlan=9,dl_src=00:0a:e4:25:6b:b0, ADD: actions=drop
+flow_mod: arp,nw_src=192.168.0.1, ADD: actions=drop_spoofed_arp,NORMAL
+flow_mod: udp,dl_vlan_pcp=7, ADD: idle:5 actions=strip_vlan,output:0
+flow_mod: tcp,nw_src=192.168.0.3,tp_dst=80, ADD: actions=set_queue:37,output:1
+flow_mod: udp,nw_src=192.168.0.3,tp_dst=53, ADD: actions=pop_queue,output:1
+flow_mod: ADD: cookie:0x123456789abcdef hard:10 pri:60000 actions=CONTROLLER:65535
+flow_mod: ADD: actions=drop
+])
+AT_CLEANUP
diff --git a/tests/ovs-vsctl.at b/tests/ovs-vsctl.at
index 56fc1ce2d..152a6585c 100644
--- a/tests/ovs-vsctl.at
+++ b/tests/ovs-vsctl.at
@@ -15,17 +15,17 @@ dnl RUN_OVS_VSCTL(COMMAND, ...)
dnl
dnl Executes each ovs-vsctl COMMAND.
m4_define([RUN_OVS_VSCTL],
- [m4_foreach([command], [$@], [ovs-vsctl --no-wait -vreconnect:ANY:emer --db=unix:socket -- command
+ [m4_foreach([command], [$@], [ovs-vsctl --timeout=5 --no-wait -vreconnect:ANY:emer --db=unix:socket -- command
])])
m4_define([RUN_OVS_VSCTL_ONELINE],
- [m4_foreach([command], [$@], [ovs-vsctl --no-wait -vreconnect:ANY:emer --db=unix:socket --oneline -- command
+ [m4_foreach([command], [$@], [ovs-vsctl --timeout=5 --no-wait -vreconnect:ANY:emer --db=unix:socket --oneline -- command
])])
dnl RUN_OVS_VSCTL_TOGETHER(COMMAND, ...)
dnl
dnl Executes each ovs-vsctl COMMAND in a single run of ovs-vsctl.
m4_define([RUN_OVS_VSCTL_TOGETHER],
- [ovs-vsctl --no-wait -vreconnect:ANY:emer --db=unix:socket --oneline dnl
+ [ovs-vsctl --timeout=5 --no-wait -vreconnect:ANY:emer --db=unix:socket --oneline dnl
m4_foreach([command], [$@], [ -- command])])
dnl CHECK_BRIDGES([BRIDGE, PARENT, VLAN], ...)
@@ -705,6 +705,55 @@ AT_CHECK([cat stdout4], [0], [500
OVS_VSCTL_CLEANUP
AT_CLEANUP
+AT_SETUP([--id option on create, get commands])
+AT_KEYWORDS([ovs-vsctl])
+OVS_VSCTL_SETUP
+AT_CHECK([RUN_OVS_VSCTL([add-br br0],
+ [add-port br0 eth0],
+ [add-port br0 eth1])])
+AT_CHECK(
+ [RUN_OVS_VSCTL_TOGETHER(
+ [set bridge br0 mirrors=@m],
+ [--id=@eth0 get port eth0],
+ [--id=@eth1 get port eth1],
+ [--id=@m create mirror name=mymirror select-dst-port=@eth0 select-src-port=@eth0 output-port=@eth1])],
+ [0], [stdout], [], [OVS_VSCTL_CLEANUP])
+AT_CHECK(
+ [perl $srcdir/uuidfilt.pl stdout], [0], [dnl
+
+
+
+<0>
+],
+ [], [OVS_VSCTL_CLEANUP])
+AT_CHECK(
+ [RUN_OVS_VSCTL(
+ [list port eth0 eth1],
+ [list mirror],
+ [list bridge br0])],
+ [0], [stdout], [], [OVS_VSCTL_CLEANUP])
+AT_CHECK(
+ [sed -n -e '/uuid/p' -e '/name/p' -e '/mirrors/p' -e '/select/p' -e '/output/p' < stdout | $srcdir/uuidfilt.pl], [0], [dnl
+[_uuid : <0>
+name : "eth0"
+_uuid : <1>
+name : "eth1"
+_uuid : <2>
+name : mymirror
+output_port : <1>
+output_vlan : []
+select_all : false
+select_dst_port : [<0>]
+select_src_port : [<0>]
+select_vlan : []
+_uuid : <3>
+mirrors : [<2>]
+name : "br0"
+]],
+ [], [OVS_VSCTL_CLEANUP])
+OVS_VSCTL_CLEANUP
+AT_CLEANUP
+
dnl This test really shows a bug -- "create" followed by "list" in
dnl the same execution shows the wrong UUID on the "list" command.
dnl The bug is documented in ovs-vsctl.8.
diff --git a/tests/test-classifier.c b/tests/test-classifier.c
index 116e58940..5b5c1722c 100644
--- a/tests/test-classifier.c
+++ b/tests/test-classifier.c
@@ -436,7 +436,7 @@ check_tables(const struct classifier *cls,
if (!hmap_is_empty(&cls->tables[i])) {
found_tables++;
}
- HMAP_FOR_EACH (bucket, struct cls_bucket, hmap_node, &cls->tables[i]) {
+ HMAP_FOR_EACH (bucket, hmap_node, &cls->tables[i]) {
found_buckets++;
assert(!list_is_empty(&bucket->rules));
found_rules += list_size(&bucket->rules);
diff --git a/tests/test-csum.c b/tests/test-csum.c
index 8c8545870..eebc8803f 100644
--- a/tests/test-csum.c
+++ b/tests/test-csum.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2009 Nicira Networks.
+ * Copyright (c) 2009, 2010 Nicira Networks.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -134,7 +134,6 @@ main(void)
const uint16_t *data16 = (const uint16_t *) tc->data;
const uint32_t *data32 = (const uint32_t *) tc->data;
uint32_t partial;
- size_t i;
/* Test csum(). */
assert(ntohs(csum(tc->data, tc->size)) == tc->csum);
diff --git a/tests/test-daemon.py b/tests/test-daemon.py
index 3c757f308..386445d44 100644
--- a/tests/test-daemon.py
+++ b/tests/test-daemon.py
@@ -1,11 +1,11 @@
# Copyright (c) 2010 Nicira Networks.
-#
+#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -13,13 +13,20 @@
# limitations under the License.
import getopt
+import signal
import sys
import time
import ovs.daemon
import ovs.util
+def handler(signum, frame):
+ raise Exception("Signal handler called with %d" % signum)
+
def main(argv):
+
+ signal.signal(signal.SIGHUP, handler)
+
try:
options, args = getopt.gnu_getopt(
argv[1:], 'b', ["bail", "help"] + ovs.daemon.LONG_OPTIONS)
@@ -63,4 +70,10 @@ Other options:
sys.exit(0)
if __name__ == '__main__':
- main(sys.argv)
+ try:
+ main(sys.argv)
+ except SystemExit:
+ # Let system.exit() calls complete normally
+ raise
+ except:
+ sys.exit(ovs.daemon.RESTART_EXIT_CODE)
diff --git a/tests/test-hmap.c b/tests/test-hmap.c
index 18d8f461d..be6bcf4a7 100644
--- a/tests/test-hmap.c
+++ b/tests/test-hmap.c
@@ -56,7 +56,7 @@ check_hmap(struct hmap *hmap, const int values[], size_t n,
hmap_values = xmalloc(sizeof *sort_values * n);
i = 0;
- HMAP_FOR_EACH (e, struct element, node, hmap) {
+ HMAP_FOR_EACH (e, node, hmap) {
assert(i < n);
hmap_values[i++] = e->value;
}
@@ -77,8 +77,7 @@ check_hmap(struct hmap *hmap, const int values[], size_t n,
for (i = 0; i < n; i++) {
size_t count = 0;
- HMAP_FOR_EACH_WITH_HASH (e, struct element, node,
- hash(values[i]), hmap) {
+ HMAP_FOR_EACH_WITH_HASH (e, node, hash(values[i]), hmap) {
count += e->value == values[i];
}
assert(count == 1);
@@ -124,7 +123,7 @@ print_hmap(const char *name, struct hmap *hmap)
struct element *e;
printf("%s:", name);
- HMAP_FOR_EACH (e, struct element, node, hmap) {
+ HMAP_FOR_EACH (e, node, hmap) {
printf(" %d(%zu)", e->value, e->node.hash & hmap->mask);
}
printf("\n");
@@ -242,7 +241,7 @@ test_hmap_for_each_safe(hash_func *hash)
i = 0;
n_remaining = n;
- HMAP_FOR_EACH_SAFE (e, next, struct element, node, &hmap) {
+ HMAP_FOR_EACH_SAFE (e, next, node, &hmap) {
assert(i < n);
if (pattern & (1ul << e->value)) {
size_t j;
diff --git a/tests/test-list.c b/tests/test-list.c
index 0efdbfce4..5e62e0c00 100644
--- a/tests/test-list.c
+++ b/tests/test-list.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008, 2009 Nicira Networks.
+ * Copyright (c) 2008, 2009, 2010 Nicira Networks.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -55,7 +55,7 @@ check_list(struct list *list, const int values[], size_t n)
size_t i;
i = 0;
- LIST_FOR_EACH (e, struct element, node, list) {
+ LIST_FOR_EACH (e, node, list) {
assert(i < n);
assert(e->value == values[i]);
i++;
@@ -64,7 +64,7 @@ check_list(struct list *list, const int values[], size_t n)
assert(i == n);
i = 0;
- LIST_FOR_EACH_REVERSE (e, struct element, node, list) {
+ LIST_FOR_EACH_REVERSE (e, node, list) {
assert(i < n);
assert(e->value == values[n - i - 1]);
i++;
@@ -84,7 +84,7 @@ print_list(const char *name, struct list *list)
struct element *e;
printf("%s:", name);
- LIST_FOR_EACH (e, struct element, node, list) {
+ LIST_FOR_EACH (e, node, list) {
printf(" %d", e->value);
}
printf("\n");
@@ -131,7 +131,7 @@ test_list_for_each_safe(void)
i = 0;
values_idx = 0;
n_remaining = n;
- LIST_FOR_EACH_SAFE (e, next, struct element, node, &list) {
+ LIST_FOR_EACH_SAFE (e, next, node, &list) {
assert(i < n);
if (pattern & (1ul << i)) {
list_remove(&e->node);
diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c
index 18784a52a..cdc939bb4 100644
--- a/tests/test-ovsdb.c
+++ b/tests/test-ovsdb.c
@@ -1095,7 +1095,7 @@ do_query_distinct(int argc OVS_UNUSED, char *argv[])
size_t n_classes;
struct json *json;
int exit_code = 0;
- size_t i, j, k;
+ size_t i;
/* Parse table schema, create table. */
json = unbox_json(parse_json(argv[1]));
@@ -1161,6 +1161,7 @@ do_query_distinct(int argc OVS_UNUSED, char *argv[])
for (i = 0; i < json->u.array.n; i++) {
struct ovsdb_row_set results;
struct ovsdb_condition cnd;
+ size_t j;
check_ovsdb_error(ovsdb_condition_from_json(ts, json->u.array.elems[i],
NULL, &cnd));
@@ -1171,6 +1172,8 @@ do_query_distinct(int argc OVS_UNUSED, char *argv[])
ovsdb_row_set_init(&results);
ovsdb_query_distinct(table, &cnd, &columns, &results);
for (j = 0; j < results.n_rows; j++) {
+ size_t k;
+
for (k = 0; k < n_rows; k++) {
if (uuid_equals(ovsdb_row_get_uuid(results.rows[j]),
&rows[k].uuid)) {
@@ -1465,8 +1468,7 @@ do_transact_print(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
n_rows = hmap_count(&do_transact_table->rows);
rows = xmalloc(n_rows * sizeof *rows);
i = 0;
- HMAP_FOR_EACH (row, struct ovsdb_row, hmap_node,
- &do_transact_table->rows) {
+ HMAP_FOR_EACH (row, hmap_node, &do_transact_table->rows) {
rows[i++] = row;
}
assert(i == n_rows);
@@ -1833,7 +1835,6 @@ do_idl(int argc, char *argv[])
for (i = 2; i < argc; i++) {
char *arg = argv[i];
struct jsonrpc_msg *request, *reply;
- int error;
if (*arg == '+') {
/* The previous transaction didn't change anything. */
diff --git a/tests/testsuite.at b/tests/testsuite.at
index 42e62dfbe..d66563c8f 100644
--- a/tests/testsuite.at
+++ b/tests/testsuite.at
@@ -42,6 +42,7 @@ m4_include([tests/classifier.at])
m4_include([tests/check-structs.at])
m4_include([tests/daemon.at])
m4_include([tests/daemon-py.at])
+m4_include([tests/ovs-ofctl.at])
m4_include([tests/vconn.at])
m4_include([tests/dir_name.at])
m4_include([tests/aes128.at])
diff --git a/utilities/ovs-controller.8.in b/utilities/ovs-controller.8.in
index c5954dd00..aa5751f93 100644
--- a/utilities/ovs-controller.8.in
+++ b/utilities/ovs-controller.8.in
@@ -98,13 +98,37 @@ sending packets and setting up flows. Use one of these options,
supplying \fIid\fR as an OpenFlow queue ID as a decimal number, to
instead use that specific queue.
.IP
-This option may be useful for debugging quality of service setups.
+This option is incompatible with \fB\-N\fR or \fB\-\-normal\fR and
+with \fB\-H\fR or \fB\-\-hub\fR. If more than one is specified then
+this option takes precedence.
+.IP
+This option may be useful for testing or debugging quality of service
+setups.
+.
+.IP "\fB\-Q \fIport-name\fB:\fIqueue-id\fR"
+.IP "\fB\-\-port\-queue \fIport-name\fB:\fIqueue-id\fR"
+Configures packets received on the port named \fIport-name\fR
+(e.g. \fBeth0\fR) to be output on OpenFlow queue ID \fIqueue-id\fR
+(specified as a decimal number). For the specified port, this option
+overrides the default specified on \fB\-q\fR or \fB\-\-queue\fR.
+.IP
+This option may be specified any number of times with different
+\Iport-name\fR arguments.
+.IP
+This option is incompatible with \fB\-N\fR or \fB\-\-normal\fR and
+with \fB\-H\fR or \fB\-\-hub\fR. If more than one is specified then
+this option takes precedence.
+.IP
+This option may be useful for testing or debugging quality of service
+setups.
.
.IP "\fB\-\-with\-flows \fIfile\fR"
When a switch connects, push the flow entries as described in
\fIfile\fR. Each line in \fIfile\fR is a flow entry in the format
described for the \fBadd\-flows\fR command in the \fBFlow Syntax\fR
section of the \fBovs\-ofctl\fR(8) man page.
+.IP
+Use this option more than once to add flows from multiple files.
.
.SS "Public Key Infrastructure Options"
.so lib/ssl.man
diff --git a/utilities/ovs-controller.c b/utilities/ovs-controller.c
index b18959ad6..e72b74da3 100644
--- a/utilities/ovs-controller.c
+++ b/utilities/ovs-controller.c
@@ -28,10 +28,13 @@
#include "compiler.h"
#include "daemon.h"
#include "learning-switch.h"
+#include "ofp-parse.h"
+#include "ofp-util.h"
#include "ofpbuf.h"
#include "openflow/openflow.h"
#include "poll-loop.h"
#include "rconn.h"
+#include "shash.h"
#include "stream-ssl.h"
#include "timeval.h"
#include "unixctl.h"
@@ -49,10 +52,11 @@ struct switch_ {
struct rconn *rconn;
};
-/* Learn the ports on which MAC addresses appear? */
+/* -H, --hub: Learn the ports on which MAC addresses appear? */
static bool learn_macs = true;
-/* Set up flows? (If not, every packet is processed at the controller.) */
+/* -n, --noflow: Set up flows? (If not, every packet is processed at the
+ * controller.) */
static bool set_up_flows = true;
/* -N, --normal: Use "NORMAL" action instead of explicit port? */
@@ -68,12 +72,15 @@ static int max_idle = 60;
* of their messages (for debugging fail-open mode). */
static bool mute = false;
-/* -q, --queue: OpenFlow queue to use, or the default queue if UINT32_MAX. */
-static uint32_t queue_id = UINT32_MAX;
+/* -q, --queue: default OpenFlow queue, none if UINT32_MAX. */
+static uint32_t default_queue = UINT32_MAX;
+
+/* -Q, --port-queue: map from port name to port number (cast to void *). */
+static struct shash port_queues = SHASH_INITIALIZER(&port_queues);
/* --with-flows: File with flows to send to switch, or null to not load
* any default flows. */
-static FILE *flow_file = NULL;
+static struct ovs_queue default_flows = OVS_QUEUE_INITIALIZER;
/* --unixctl: Name of unixctl socket, or null to use the default. */
static char *unixctl_path = NULL;
@@ -107,7 +114,6 @@ main(int argc, char *argv[])
for (i = optind; i < argc; i++) {
const char *name = argv[i];
struct vconn *vconn;
- int retval;
retval = vconn_open(name, OFP_VERSION, &vconn);
if (!retval) {
@@ -146,12 +152,10 @@ main(int argc, char *argv[])
while (n_switches > 0 || n_listeners > 0) {
int iteration;
- int i;
/* Accept connections on listening vconns. */
for (i = 0; i < n_listeners && n_switches < MAX_SWITCHES; ) {
struct vconn *new_vconn;
- int retval;
retval = pvconn_accept(listeners[i], OFP_VERSION, &new_vconn);
if (!retval || retval == EAGAIN) {
@@ -171,7 +175,8 @@ main(int argc, char *argv[])
bool progress = false;
for (i = 0; i < n_switches; ) {
struct switch_ *this = &switches[i];
- int retval = do_switching(this);
+
+ retval = do_switching(this);
if (!retval || retval == EAGAIN) {
if (!retval) {
progress = true;
@@ -216,20 +221,19 @@ main(int argc, char *argv[])
static void
new_switch(struct switch_ *sw, struct vconn *vconn)
{
+ struct lswitch_config cfg;
+
sw->rconn = rconn_create(60, 0);
rconn_connect_unreliably(sw->rconn, vconn, NULL);
- /* If it was set, rewind 'flow_file' to the beginning, since a
- * previous call to lswitch_create() will leave the stream at the
- * end. */
- if (flow_file) {
- rewind(flow_file);
- }
- sw->lswitch = lswitch_create(sw->rconn, learn_macs, exact_flows,
- set_up_flows ? max_idle : -1,
- action_normal, flow_file);
-
- lswitch_set_queue(sw->lswitch, queue_id);
+ cfg.mode = (action_normal ? LSW_NORMAL
+ : learn_macs ? LSW_LEARN
+ : LSW_FLOOD);
+ cfg.max_idle = set_up_flows ? max_idle : -1;
+ cfg.default_flows = default_flows.head;
+ cfg.default_queue = default_queue;
+ cfg.port_queues = &port_queues;
+ sw->lswitch = lswitch_create(sw->rconn, &cfg);
}
static int
@@ -255,6 +259,52 @@ do_switching(struct switch_ *sw)
}
static void
+read_flow_file(const char *name)
+{
+ bool table_id_enabled = false;
+ uint8_t table_idx;
+ struct ofpbuf *b;
+ FILE *stream;
+
+ stream = fopen(optarg, "r");
+ if (!stream) {
+ ovs_fatal(errno, "%s: open", name);
+ }
+
+ while ((b = parse_ofp_add_flow_file(stream, &table_idx)) != NULL) {
+ if ((table_idx != 0xff) != table_id_enabled) {
+ table_id_enabled = table_idx != 0xff;
+ queue_push_tail(&default_flows,
+ make_nxt_flow_mod_table_id(table_id_enabled));
+ }
+ queue_push_tail(&default_flows, b);
+ }
+
+ fclose(stream);
+}
+
+static void
+add_port_queue(char *s)
+{
+ char *save_ptr = NULL;
+ char *port_name;
+ char *queue_id;
+
+ port_name = strtok_r(s, ":", &save_ptr);
+ queue_id = strtok_r(NULL, "", &save_ptr);
+ if (!queue_id) {
+ ovs_fatal(0, "argument to -Q or --port-queue should take the form "
+ "\"<port-name>:<queue-id>\"");
+ }
+
+ if (!shash_add_once(&port_queues, port_name,
+ (void *) (uintptr_t) atoi(queue_id))) {
+ ovs_fatal(0, "<port-name> arguments for -Q or --port-queue must "
+ "be unique");
+ }
+}
+
+static void
parse_options(int argc, char *argv[])
{
enum {
@@ -273,6 +323,7 @@ parse_options(int argc, char *argv[])
{"max-idle", required_argument, 0, OPT_MAX_IDLE},
{"mute", no_argument, 0, OPT_MUTE},
{"queue", required_argument, 0, 'q'},
+ {"port-queue", required_argument, 0, 'Q'},
{"with-flows", required_argument, 0, OPT_WITH_FLOWS},
{"unixctl", required_argument, 0, OPT_UNIXCTL},
{"help", no_argument, 0, 'h'},
@@ -330,14 +381,15 @@ parse_options(int argc, char *argv[])
break;
case 'q':
- queue_id = atoi(optarg);
+ default_queue = atoi(optarg);
+ break;
+
+ case 'Q':
+ add_port_queue(optarg);
break;
case OPT_WITH_FLOWS:
- flow_file = fopen(optarg, "r");
- if (flow_file == NULL) {
- ovs_fatal(errno, "%s: open", optarg);
- }
+ read_flow_file(optarg);
break;
case OPT_UNIXCTL:
@@ -370,6 +422,20 @@ parse_options(int argc, char *argv[])
}
}
free(short_options);
+
+ if (!shash_is_empty(&port_queues) || default_queue != UINT32_MAX) {
+ if (action_normal) {
+ ovs_error(0, "queue IDs are incompatible with -N or --normal; "
+ "not using OFPP_NORMAL");
+ action_normal = false;
+ }
+
+ if (!learn_macs) {
+ ovs_error(0, "queue IDs are incompatible with -H or --hub; "
+ "not acting as hub");
+ learn_macs = true;
+ }
+ }
}
static void
@@ -386,9 +452,10 @@ usage(void)
" -H, --hub act as hub instead of learning switch\n"
" -n, --noflow pass traffic, but don't add flows\n"
" --max-idle=SECS max idle time for new flows\n"
- " -N, --normal use OFPAT_NORMAL action\n"
+ " -N, --normal use OFPP_NORMAL action\n"
" -w, --wildcard use wildcards, not exact-match rules\n"
- " -q, --queue=QUEUE OpenFlow queue ID to use for output\n"
+ " -q, --queue=QUEUE-ID OpenFlow queue ID to use for output\n"
+ " -Q PORT-NAME:QUEUE-ID use QUEUE-ID for frames from PORT-NAME\n"
" --with-flows FILE use the flows from FILE\n"
" --unixctl=SOCKET override default control socket name\n"
" -h, --help display this help message\n"
diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in
index d8f1a0a38..00488516e 100644
--- a/utilities/ovs-ofctl.8.in
+++ b/utilities/ovs-ofctl.8.in
@@ -115,6 +115,15 @@ the statistics are aggregated across all flows in the switch's flow
tables. See \fBFlow Syntax\fR, below, for the syntax of \fIflows\fR.
The output format is descrbed in \fBTable Entry Output\fR.
.
+.IP "\fBqueue\-stats \fIswitch \fR[\fIport \fR[\fIqueue\fR]]"
+Prints to the console statistics for the specified \fIqueue\fR on
+\fIport\fR within \fIswitch\fR. Either of \fIport\fR or \fIqueue\fR
+or both may be omitted (or equivalently specified as \fBALL\fR). If
+both are omitted, statistics are printed for all queues on all ports.
+If only \fIqueue\fR is omitted, then statistics are printed for all
+queues on \fIport\fR; if only \fIport\fR is omitted, then statistics
+are printed for \fIqueue\fR on every port where it exists.
+.
.TP
\fBadd\-flow \fIswitch flow\fR
Add the flow entry as described by \fIflow\fR to the \fIswitch\fR's
@@ -467,6 +476,25 @@ addition to any other actions in this flow entry. Recursive
If outputting to a port that encapsulates the packet in a tunnel and supports
an identifier (such as GRE), sets the identifier to \fBid\fR.
.
+.IP \fBdrop_spoofed_arp\fR
+Stops processing further actions, if the packet being processed is an
+Ethernet+IPv4 ARP packet for which the source Ethernet address inside
+the ARP packet differs from the source Ethernet address in the
+Ethernet header.
+.
+This is useful because OpenFlow does not provide a way to match on the
+Ethernet addresses inside ARP packets, so there is no other way to
+drop spoofed ARPs other than sending every ARP packet to a controller.
+.
+.IP \fBset_queue\fB:\fIqueue\fR
+Sets the queue that should be used to \fIqueue\fR when packets are
+output. The number of supported queues depends on the switch; some
+OpenFlow implementations do not support queuing at all.
+.
+.IP \fBpop_queue\fR
+Restores the queue to the value it was before any \fBset_queue\fR
+actions were applied.
+.
.RE
.
.IP
@@ -502,8 +530,8 @@ optional fields:
.TP
\fBidle_timeout=\fIseconds\fR
Causes the flow to expire after the given number of seconds of
-inactivity. A value of 0 prevents a flow from expiring due to
-inactivity. The default is 60 seconds.
+inactivity. A value of 0 (the default) prevents a flow from expiring due to
+inactivity.
.
.IP \fBhard_timeout=\fIseconds\fR
Causes the flow to expire after the given number of seconds,
diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c
index 60c63c79a..aba8f8487 100644
--- a/utilities/ovs-ofctl.c
+++ b/utilities/ovs-ofctl.c
@@ -152,6 +152,7 @@ usage(void)
" dump-flows SWITCH FLOW print matching FLOWs\n"
" dump-aggregate SWITCH print aggregate flow statistics\n"
" dump-aggregate SWITCH FLOW print aggregate stats for FLOWs\n"
+ " queue-stats SWITCH [PORT [QUEUE]] dump queue stats\n"
" add-flow SWITCH FLOW add flow described by FLOW\n"
" add-flows SWITCH FILE add flows from FILE\n"
" mod-flows SWITCH FLOW modify actions of matching FLOWs\n"
@@ -467,18 +468,33 @@ do_dump_aggregate(int argc, char *argv[])
}
static void
-enable_flow_mod_table_id_ext(struct vconn *vconn, uint8_t enable)
+enable_flow_mod_table_id_ext(struct vconn *vconn, bool enable)
{
- struct nxt_flow_mod_table_id *flow_mod_table_id;
- struct ofpbuf *buffer;
+ send_openflow_buffer(vconn, make_nxt_flow_mod_table_id(enable));
+}
- flow_mod_table_id = make_openflow(sizeof *flow_mod_table_id, OFPT_VENDOR, &buffer);
+static void
+do_queue_stats(int argc, char *argv[])
+{
+ struct ofp_queue_stats_request *req;
+ struct ofpbuf *request;
- flow_mod_table_id->vendor = htonl(NX_VENDOR_ID);
- flow_mod_table_id->subtype = htonl(NXT_FLOW_MOD_TABLE_ID);
- flow_mod_table_id->set = enable;
+ req = alloc_stats_request(sizeof *req, OFPST_QUEUE, &request);
- send_openflow_buffer(vconn, buffer);
+ if (argc > 2 && argv[2][0] && strcasecmp(argv[2], "all")) {
+ req->port_no = htons(str_to_port_no(argv[1], argv[2]));
+ } else {
+ req->port_no = htons(OFPP_ALL);
+ }
+ if (argc > 3 && argv[3][0] && strcasecmp(argv[3], "all")) {
+ req->queue_id = htonl(atoi(argv[3]));
+ } else {
+ req->queue_id = htonl(OFPQ_ALL);
+ }
+
+ memset(req->pad, 0, sizeof req->pad);
+
+ dump_stats_transaction(argv[1], request);
}
static void
@@ -521,10 +537,10 @@ static void
do_add_flows(int argc OVS_UNUSED, char *argv[])
{
struct vconn *vconn;
+ struct ofpbuf *b;
FILE *file;
- char line[1024];
+ bool table_id_enabled = false;
uint8_t table_idx;
- int table_id_enabled = 0;
file = fopen(argv[2], "r");
if (file == NULL) {
@@ -532,54 +548,12 @@ do_add_flows(int argc OVS_UNUSED, char *argv[])
}
open_vconn(argv[1], &vconn);
- while (fgets(line, sizeof line, file)) {
- struct ofpbuf *buffer;
- struct ofp_flow_mod *ofm;
- uint16_t priority, idle_timeout, hard_timeout;
- uint64_t cookie;
- struct ofp_match match;
-
- char *comment;
-
- /* Delete comments. */
- comment = strchr(line, '#');
- if (comment) {
- *comment = '\0';
- }
-
- /* Drop empty lines. */
- if (line[strspn(line, " \t\n")] == '\0') {
- continue;
- }
-
- /* Parse and send. parse_ofp_str() will expand and reallocate
- * the data in 'buffer', so we can't keep pointers to across the
- * parse_ofp_str() call. */
- make_openflow(sizeof *ofm, OFPT_FLOW_MOD, &buffer);
- parse_ofp_str(line, &match, buffer, &table_idx, NULL, &priority,
- &idle_timeout, &hard_timeout, &cookie);
- ofm = buffer->data;
- ofm->match = match;
- ofm->command = htons(OFPFC_ADD);
- ofm->cookie = htonll(cookie);
- ofm->idle_timeout = htons(idle_timeout);
- ofm->hard_timeout = htons(hard_timeout);
- ofm->buffer_id = htonl(UINT32_MAX);
- ofm->priority = htons(priority);
-
- if (table_idx != 0xff) {
- if (!table_id_enabled) {
- enable_flow_mod_table_id_ext(vconn, 1);
- table_id_enabled = 1;
- }
- ofm->command = htons(ntohs(ofm->command) | (table_idx << 8));
- } else {
- if (table_id_enabled) {
- enable_flow_mod_table_id_ext(vconn, 0);
- table_id_enabled = 0;
- }
+ while ((b = parse_ofp_add_flow_file(file, &table_idx)) != NULL) {
+ if ((table_idx != 0xff) != table_id_enabled) {
+ table_id_enabled = table_idx != 0xff;
+ enable_flow_mod_table_id_ext(vconn, table_id_enabled);
}
- send_openflow_buffer(vconn, buffer);
+ send_openflow_buffer(vconn, b);
}
vconn_close(vconn);
fclose(file);
@@ -914,6 +888,27 @@ do_benchmark(int argc OVS_UNUSED, char *argv[])
count * message_size / (duration / 1000.0));
}
+/* This command is really only useful for testing the flow parser (ofp_parse),
+ * so it is undocumented. */
+static void
+do_parse_flows(int argc OVS_UNUSED, char *argv[])
+{
+ uint8_t table_idx;
+ struct ofpbuf *b;
+ FILE *file;
+
+ file = fopen(argv[1], "r");
+ if (file == NULL) {
+ ovs_fatal(errno, "%s: open", argv[2]);
+ }
+
+ while ((b = parse_ofp_add_flow_file(file, &table_idx)) != NULL) {
+ ofp_print(stdout, b->data, b->size, 0);
+ ofpbuf_delete(b);
+ }
+ fclose(file);
+}
+
static void
do_help(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
{
@@ -929,6 +924,7 @@ static const struct command all_commands[] = {
{ "dump-tables", 1, 1, do_dump_tables },
{ "dump-flows", 1, 2, do_dump_flows },
{ "dump-aggregate", 1, 2, do_dump_aggregate },
+ { "queue-stats", 1, 3, do_queue_stats },
{ "add-flow", 2, 2, do_add_flow },
{ "add-flows", 2, 2, do_add_flows },
{ "mod-flows", 2, 2, do_mod_flows },
@@ -939,6 +935,7 @@ static const struct command all_commands[] = {
{ "probe", 1, 1, do_probe },
{ "ping", 1, 2, do_ping },
{ "benchmark", 3, 3, do_benchmark },
+ { "parse-flows", 1, 1, do_parse_flows },
{ "help", 0, INT_MAX, do_help },
{ NULL, 0, 0, NULL },
};
diff --git a/utilities/ovs-openflowd.c b/utilities/ovs-openflowd.c
index 3e7e94f7d..e4035444e 100644
--- a/utilities/ovs-openflowd.c
+++ b/utilities/ovs-openflowd.c
@@ -459,8 +459,6 @@ parse_options(int argc, char *argv[], struct ofsettings *s)
s->n_controllers = controllers.n;
s->controllers = xmalloc(s->n_controllers * sizeof *s->controllers);
if (argc > 1) {
- size_t i;
-
for (i = 0; i < s->n_controllers; i++) {
s->controllers[i] = controller_opts;
s->controllers[i].target = controllers.names[i];
@@ -469,8 +467,6 @@ parse_options(int argc, char *argv[], struct ofsettings *s)
/* Sanity check. */
if (controller_opts.band == OFPROTO_OUT_OF_BAND) {
- size_t i;
-
for (i = 0; i < s->n_controllers; i++) {
if (!strcmp(s->controllers[i].target, "discover")) {
ovs_fatal(0, "Cannot perform discovery with out-of-band "
diff --git a/utilities/ovs-vsctl.8.in b/utilities/ovs-vsctl.8.in
index 7476777a8..281756c90 100644
--- a/utilities/ovs-vsctl.8.in
+++ b/utilities/ovs-vsctl.8.in
@@ -118,11 +118,12 @@ Prevents \fBovs\-vsctl\fR from actually modifying the database.
.
.IP "\fB\-t \fIsecs\fR"
.IQ "\fB\-\-timeout=\fIsecs\fR"
-Limits runtime to approximately \fIsecs\fR seconds. A value of
-zero will cause \fBovs\-vsctl\fR to wait forever. If the timeout expires,
-\fBovs\-vsctl\fR will exit with a \fBSIGALRM\fR signal. If this option is
-not used, \fBovs\-vsctl\fR uses a timeout of five seconds.
-(A timeout would normally happen only if the database cannot be contacted.)
+By default, or with a \fIsecs\fR of \fB0\fR, \fBovs\-vsctl\fR waits
+forever for a response from the database. This option limits runtime
+to approximately \fIsecs\fR seconds. If the timeout expires,
+\fBovs\-vsctl\fR will exit with a \fBSIGALRM\fR signal. (A timeout
+would normally happen only if the database cannot be contacted, or if
+the system is overloaded.)
.
.SS "Public Key Infrastructure Options"
.so lib/ssl.man
@@ -489,7 +490,7 @@ pair of double quotes (\fB""\fR).
.IP "UUID"
Either a universally unique identifier in the style of RFC 4122,
e.g. \fBf81d4fae\-7dec\-11d0\-a765\-00a0c91e6bf6\fR, or an \fB@\fIname\fR
-defined by the \fBcreate\fR command within the same \fBovs\-vsctl\fR
+defined by a \fBget\fR or \fBcreate\fR command within the same \fBovs\-vsctl\fR
invocation.
.PP
Multiple values in a single column may be separated by spaces or a
@@ -517,7 +518,7 @@ records are specified, lists all the records in \fItable\fR.
The UUIDs shown for rows created in the same \fBovs\-vsctl\fR
invocation will be wrong.
.
-.IP "[\fB\-\-if\-exists\fR] \fBget \fItable record column\fR[\fB:\fIkey\fR]..."
+.IP "[\fB\-\-id=@\fIname\fR] [\fB\-\-if\-exists\fR] \fBget \fItable record \fR[\fIcolumn\fR[\fB:\fIkey\fR]]..."
Prints the value of each specified \fIcolumn\fR in the given
\fIrecord\fR in \fItable\fR. For map columns, a \fIkey\fR may
optionally be specified, in which case the value associated with
@@ -527,6 +528,10 @@ For a map column, without \fB\-\-if\-exists\fR it is an error if
\fIkey\fR does not exist; with it, a blank line is printed. If
\fIcolumn\fR is not a map column or if \fIkey\fR is not specified,
\fB\-\-if\-exists\fR has no effect.
+.IP
+If \fB@\fIname\fR is specified, then the UUID for \fIrecord\fR may be
+referred to by that name later in the same \fBovs\-vsctl\fR
+invocation in contexts where a UUID is expected.
.
.IP "\fBset \fItable record column\fR[\fB:\fIkey\fR]\fB=\fIvalue\fR..."
Sets the value of each specified \fIcolumn\fR in the given
@@ -626,7 +631,97 @@ point to a new \fBQoS\fR record, which in turn points with its queue 0
to a new \fBQueue\fR record:
.IP
.B "ovs\-vsctl \-\- set port eth0 qos=@newqos \-\- \-\-id=@newqos create qos type=linux\-htb other\-config:max\-rate=1000000 queues:0=@newqueue \-\- \-\-id=@newqueue create queue other\-config:min\-rate=1000000 other\-config:max\-rate=1000000"
+.SH "CONFIGURATION COOKBOOK"
+.SS "Port Configuration"
+.PP
+Add an ``internal port'' \fBvlan10\fR to bridge \fBbr0\fR as a VLAN
+access port for VLAN 10, and configure it with an IP address:
+.IP
+.B "ovs\-vsctl add\-port br0 vlan10 tag=10 \-\- set Interface vlan10 type=internal"
+.IP
+.B "ifconfig vlan10 192.168.0.123"
.
+.SS "Port Mirroring"
+.PP
+Mirror all packets received or sent on \fBeth0\fR or \fBeth1\fR onto
+\fBeth2\fR, assuming that all of those ports exist on bridge \fBbr0\fR
+(as a side-effect this causes any packets received on \fBeth2\fR to be
+ignored):
+.IP
+.B "ovs\-vsctl \-\- set Bridge br0 mirrors=@m \(rs"
+.IP
+.B "\-\- \-\-id=@eth0 get Port eth0 \(rs"
+.IP
+.B "\-\- \-\-id=@eth1 get Port eth1 \(rs"
+.IP
+.B "\-\- \-\-id=@eth2 get Port eth2 \(rs"
+.IP
+.B "\-\- \-\-id=@m create Mirror name=mymirror select-dst-port=@eth0,@eth1 select-src-port=@eth0,@eth1 output-port=@eth2"
+.PP
+Remove the mirror created above from \fBbr0\fR and destroy the Mirror
+record (to avoid having an unreferenced record in the database):
+.IP
+.B "ovs\-vsctl destroy Mirror mymirror \-\- clear Bridge br0 mirrors"
+.SS "Quality of Service (QoS)"
+.PP
+Create a \fBlinux\-htb\fR QoS record that points to a few queues and
+use it on \fBeth0\fR and \fBeth1\fR:
+.IP
+.B "ovs\-vsctl \-\- set Port eth0 qos=@newqos \(rs"
+.IP
+.B "\-\- set Port eth1 qos=@newqos \(rs"
+.IP
+.B "\-\- \-\-id=@newqos create QoS type=linux\-htb other\-config:max\-rate=1000000000 queues=0=@q0,1=@q1 \(rs"
+.IP
+.B "\-\- \-\-id=@q0 create Queue other\-config:min\-rate=100000000 other\-config:max\-rate=100000000 \(rs"
+.IP
+.B "\-\- \-\-id=@q1 create Queue other\-config:min\-rate=500000000"
+.PP
+Deconfigure the QoS record above from \fBeth1\fR only:
+.IP
+.B "ovs\-vsctl clear Port eth1 qos"
+.PP
+To deconfigure the QoS record from both \fBeth0\fR and \fBeth1\fR and
+then delete the QoS record:
+.IP
+.B "ovs\-vsctl \-\- destroy QoS eth0 \-\- clear Port eth0 qos \-\- clear Port eth1 qos"
+.PP
+(This command will leave two unreferenced Queue records in the
+database. To delete them, use "\fBovs\-vsctl list Queue\fR" to find
+their UUIDs, then "\fBovs\-vsctl destroy Queue \fIuuid1\fR
+\fIuuid2\fR" to destroy each of them.)
+.SS "NetFlow"
+.PP
+Configure bridge \fBbr0\fR to send NetFlow records to UDP port 5566 on
+host 192.168.0.34, with an active timeout of 30 seconds:
+.IP
+.B "ovs\-vsctl \-\- set Bridge br0 netflow=@nf \(rs"
+.IP
+.B "\-\- \-\-id=@nf create NetFlow targets=\(rs\(dq192.168.0.34:5566\(rs\(dq active\-timeout=30"
+.PP
+Update the NetFlow configuration created by the previous command to
+instead use an active timeout of 60 seconds:
+.IP
+.B "ovs\-vsctl set NetFlow br0 active_timeout=60"
+.PP
+Deconfigure the NetFlow settings from \fBbr0\fR and delete the NetFlow
+record (to avoid having an unreferenced record in the database):
+.IP
+.B "ovs\-vsctl destroy NetFlow br0 \-\- clear Bridge br0 netflow"
+.SS "sFlow"
+.PP
+Configure bridge \fBbr0\fR to send sFlow records to a collector on
+10.0.0.1 at port 6343, using \fBeth1\fR\'s IP address as the source,
+with specific sampling parameters:
+.IP
+.B "ovs\-vsctl \-\- \-\-id=@s create sFlow agent=eth1 target=\(rs\(dq10.0.0.1:6343\(rs\(dq header=128 sampling=64 polling=10 \(rs"
+.IP
+.B "\-\- set Bridge br0 sflow=@s"
+.PP
+Deconfigure sFlow from br0 and destroy the sFlow record (to avoid
+having an unreferenced record in the database):
+.IP
+.B "ovs\-vsctl \-\- destroy sFlow br0 \-\- clear Bridge br0 sflow"
.SH "EXIT STATUS"
.IP "0"
Successful program execution.
diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c
index 4d50194aa..940866449 100644
--- a/utilities/ovs-vsctl.c
+++ b/utilities/ovs-vsctl.c
@@ -59,6 +59,7 @@ struct vsctl_command_syntax {
vsctl_handler_func *run;
vsctl_handler_func *postprocess;
const char *options;
+ enum { RO, RW } mode; /* Does this command modify the database? */
};
struct vsctl_command {
@@ -85,7 +86,7 @@ static bool dry_run;
static bool wait_for_reload = true;
/* --timeout: Time to wait for a connection to 'db'. */
-static int timeout = 5;
+static int timeout;
/* All supported commands. */
static const struct vsctl_command_syntax all_commands[];
@@ -101,10 +102,12 @@ static void vsctl_fatal(const char *, ...) PRINTF_FORMAT(1, 2) NO_RETURN;
static char *default_db(void);
static void usage(void) NO_RETURN;
static void parse_options(int argc, char *argv[]);
+static bool might_write_to_db(char **argv);
static struct vsctl_command *parse_commands(int argc, char *argv[],
size_t *n_commandsp);
static void parse_command(int argc, char *argv[], struct vsctl_command *);
+static const struct vsctl_command_syntax *find_command(const char *name);
static void do_vsctl(const char *args,
struct vsctl_command *, size_t n_commands,
struct ovsdb_idl *);
@@ -114,7 +117,6 @@ static void set_column(const struct vsctl_table_class *,
const struct ovsdb_idl_row *, const char *arg,
struct ovsdb_symbol_table *);
-
int
main(int argc, char *argv[])
{
@@ -132,7 +134,7 @@ main(int argc, char *argv[])
/* Log our arguments. This is often valuable for debugging systems. */
args = process_escape_args(argv);
- VLOG_INFO("Called as %s", args);
+ VLOG(might_write_to_db(argv) ? VLL_INFO : VLL_DBG, "Called as %s", args);
/* Parse command line. */
parse_options(argc, argv);
@@ -295,6 +297,8 @@ static void
parse_command(int argc, char *argv[], struct vsctl_command *command)
{
const struct vsctl_command_syntax *p;
+ struct shash_node *node;
+ int n_arg;
int i;
shash_init(&command->options);
@@ -325,58 +329,71 @@ parse_command(int argc, char *argv[], struct vsctl_command *command)
vsctl_fatal("missing command name");
}
- for (p = all_commands; p->name; p++) {
- if (!strcmp(p->name, argv[i])) {
- struct shash_node *node;
- int n_arg;
+ p = find_command(argv[i]);
+ if (!p) {
+ vsctl_fatal("unknown command '%s'; use --help for help", argv[i]);
+ }
- SHASH_FOR_EACH (node, &command->options) {
- const char *s = strstr(p->options, node->name);
- int end = s ? s[strlen(node->name)] : EOF;
+ SHASH_FOR_EACH (node, &command->options) {
+ const char *s = strstr(p->options, node->name);
+ int end = s ? s[strlen(node->name)] : EOF;
- if (end != '=' && end != ',' && end != ' ' && end != '\0') {
- vsctl_fatal("'%s' command has no '%s' option",
- argv[i], node->name);
- }
- if ((end == '=') != (node->data != NULL)) {
- if (end == '=') {
- vsctl_fatal("missing argument to '%s' option on '%s' "
- "command", node->name, argv[i]);
- } else {
- vsctl_fatal("'%s' option on '%s' does not accept an "
- "argument", node->name, argv[i]);
- }
- }
+ if (end != '=' && end != ',' && end != ' ' && end != '\0') {
+ vsctl_fatal("'%s' command has no '%s' option",
+ argv[i], node->name);
+ }
+ if ((end == '=') != (node->data != NULL)) {
+ if (end == '=') {
+ vsctl_fatal("missing argument to '%s' option on '%s' "
+ "command", node->name, argv[i]);
+ } else {
+ vsctl_fatal("'%s' option on '%s' does not accept an "
+ "argument", node->name, argv[i]);
}
+ }
+ }
- n_arg = argc - i - 1;
- if (n_arg < p->min_args) {
- vsctl_fatal("'%s' command requires at least %d arguments",
- p->name, p->min_args);
- } else if (n_arg > p->max_args) {
- int j;
-
- for (j = i + 1; j < argc; j++) {
- if (argv[j][0] == '-') {
- vsctl_fatal("'%s' command takes at most %d arguments "
- "(note that options must precede command "
- "names and follow a \"--\" argument)",
- p->name, p->max_args);
- }
- }
+ n_arg = argc - i - 1;
+ if (n_arg < p->min_args) {
+ vsctl_fatal("'%s' command requires at least %d arguments",
+ p->name, p->min_args);
+ } else if (n_arg > p->max_args) {
+ int j;
- vsctl_fatal("'%s' command takes at most %d arguments",
+ for (j = i + 1; j < argc; j++) {
+ if (argv[j][0] == '-') {
+ vsctl_fatal("'%s' command takes at most %d arguments "
+ "(note that options must precede command "
+ "names and follow a \"--\" argument)",
p->name, p->max_args);
- } else {
- command->syntax = p;
- command->argc = n_arg + 1;
- command->argv = &argv[i];
- return;
}
}
+
+ vsctl_fatal("'%s' command takes at most %d arguments",
+ p->name, p->max_args);
}
- vsctl_fatal("unknown command '%s'; use --help for help", argv[i]);
+ command->syntax = p;
+ command->argc = n_arg + 1;
+ command->argv = &argv[i];
+}
+
+/* Returns the "struct vsctl_command_syntax" for a given command 'name', or a
+ * null pointer if there is none. */
+static const struct vsctl_command_syntax *
+find_command(const char *name)
+{
+ static struct shash commands = SHASH_INITIALIZER(&commands);
+
+ if (shash_is_empty(&commands)) {
+ const struct vsctl_command_syntax *p;
+
+ for (p = all_commands; p->name; p++) {
+ shash_add_assert(&commands, p->name, p);
+ }
+ }
+
+ return shash_find_data(&commands, name);
}
static void
@@ -494,6 +511,21 @@ default_db(void)
}
return def;
}
+
+/* Returns true if it looks like this set of arguments might modify the
+ * database, otherwise false. (Not very smart, so it's prone to false
+ * positives.) */
+static bool
+might_write_to_db(char **argv)
+{
+ for (; *argv; argv++) {
+ const struct vsctl_command_syntax *p = find_command(*argv);
+ if (p && p->mode == RW) {
+ return true;
+ }
+ }
+ return false;
+}
struct vsctl_context {
/* Read-only. */
@@ -1305,12 +1337,11 @@ add_port(struct vsctl_context *ctx,
get_info(ctx->ovs, &info);
if (may_exist) {
- struct vsctl_port *port;
+ struct vsctl_port *vsctl_port;
- port = find_port(&info, port_name, false);
- if (port) {
+ vsctl_port = find_port(&info, port_name, false);
+ if (vsctl_port) {
struct svec want_names, have_names;
- size_t i;
svec_init(&want_names);
for (i = 0; i < n_ifaces; i++) {
@@ -1319,15 +1350,16 @@ add_port(struct vsctl_context *ctx,
svec_sort(&want_names);
svec_init(&have_names);
- for (i = 0; i < port->port_cfg->n_interfaces; i++) {
- svec_add(&have_names, port->port_cfg->interfaces[i]->name);
+ for (i = 0; i < vsctl_port->port_cfg->n_interfaces; i++) {
+ svec_add(&have_names,
+ vsctl_port->port_cfg->interfaces[i]->name);
}
svec_sort(&have_names);
- if (strcmp(port->bridge->name, br_name)) {
+ if (strcmp(vsctl_port->bridge->name, br_name)) {
char *command = vsctl_context_to_string(ctx);
vsctl_fatal("\"%s\" but %s is actually attached to bridge %s",
- command, port_name, port->bridge->name);
+ command, port_name, vsctl_port->bridge->name);
}
if (!svec_equal(&want_names, &have_names)) {
@@ -1975,6 +2007,28 @@ get_column(const struct vsctl_table_class *table, const char *column_name,
}
}
+static struct uuid *
+create_symbol(struct ovsdb_symbol_table *symtab, const char *id, bool *newp)
+{
+ struct ovsdb_symbol *symbol;
+
+ if (id[0] != '@') {
+ vsctl_fatal("row id \"%s\" does not begin with \"@\"", id);
+ }
+
+ if (newp) {
+ *newp = ovsdb_symbol_table_get(symtab, id) == NULL;
+ }
+
+ symbol = ovsdb_symbol_table_insert(symtab, id);
+ if (symbol->used) {
+ vsctl_fatal("row id \"%s\" may only be specified on one --id option",
+ id);
+ }
+ symbol->used = true;
+ return &symbol->uuid;
+}
+
static char *
missing_operator_error(const char *arg, const char **allowed_operators,
size_t n_allowed)
@@ -2142,6 +2196,7 @@ error:
static void
cmd_get(struct vsctl_context *ctx)
{
+ const char *id = shash_find_data(&ctx->options, "--id");
bool if_exists = shash_find(&ctx->options, "--if-exists");
const char *table_name = ctx->argv[1];
const char *record_id = ctx->argv[2];
@@ -2152,6 +2207,15 @@ cmd_get(struct vsctl_context *ctx)
table = get_table(table_name);
row = must_get_row(ctx, table, record_id);
+ if (id) {
+ bool new;
+
+ *create_symbol(ctx->symtab, id, &new) = row->uuid;
+ if (!new) {
+ vsctl_fatal("row id \"%s\" specified on \"get\" command was used "
+ "before it was defined", id);
+ }
+ }
for (i = 3; i < ctx->argc; i++) {
const struct ovsdb_idl_column *column;
const struct ovsdb_datum *datum;
@@ -2453,24 +2517,7 @@ cmd_create(struct vsctl_context *ctx)
const struct uuid *uuid;
int i;
- if (id) {
- struct ovsdb_symbol *symbol;
-
- if (id[0] != '@') {
- vsctl_fatal("row id \"%s\" does not begin with \"@\"", id);
- }
-
- symbol = ovsdb_symbol_table_insert(ctx->symtab, id);
- if (symbol->used) {
- vsctl_fatal("row id \"%s\" may only be used to insert a single "
- "row", id);
- }
- symbol->used = true;
-
- uuid = &symbol->uuid;
- } else {
- uuid = NULL;
- }
+ uuid = id ? create_symbol(ctx->symtab, id, NULL) : NULL;
table = get_table(table_name);
row = ovsdb_idl_txn_insert(ctx->txn, table->class, uuid);
@@ -2767,8 +2814,8 @@ do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands,
ds_chomp(ds, '\n');
for (j = 0; j < ds->length; j++) {
- int c = ds->string[j];
- switch (c) {
+ int ch = ds->string[j];
+ switch (ch) {
case '\n':
fputs("\\n", stdout);
break;
@@ -2778,7 +2825,7 @@ do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands,
break;
default:
- putchar(c);
+ putchar(ch);
}
}
putchar('\n');
@@ -2796,8 +2843,6 @@ do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands,
if (wait_for_reload && status != TXN_UNCHANGED) {
for (;;) {
- const struct ovsrec_open_vswitch *ovs;
-
ovsdb_idl_run(idl);
OVSREC_OPEN_VSWITCH_FOR_EACH (ovs, idl) {
if (ovs->cur_cfg >= next_cfg) {
@@ -2827,56 +2872,56 @@ try_again:
static const struct vsctl_command_syntax all_commands[] = {
/* Open vSwitch commands. */
- {"init", 0, 0, cmd_init, NULL, ""},
+ {"init", 0, 0, cmd_init, NULL, "", RW},
/* Bridge commands. */
- {"add-br", 1, 3, cmd_add_br, NULL, "--may-exist"},
- {"del-br", 1, 1, cmd_del_br, NULL, "--if-exists"},
- {"list-br", 0, 0, cmd_list_br, NULL, ""},
- {"br-exists", 1, 1, cmd_br_exists, NULL, ""},
- {"br-to-vlan", 1, 1, cmd_br_to_vlan, NULL, ""},
- {"br-to-parent", 1, 1, cmd_br_to_parent, NULL, ""},
- {"br-set-external-id", 2, 3, cmd_br_set_external_id, NULL, ""},
- {"br-get-external-id", 1, 2, cmd_br_get_external_id, NULL, ""},
+ {"add-br", 1, 3, cmd_add_br, NULL, "--may-exist", RW},
+ {"del-br", 1, 1, cmd_del_br, NULL, "--if-exists", RW},
+ {"list-br", 0, 0, cmd_list_br, NULL, "", RO},
+ {"br-exists", 1, 1, cmd_br_exists, NULL, "", RO},
+ {"br-to-vlan", 1, 1, cmd_br_to_vlan, NULL, "", RO},
+ {"br-to-parent", 1, 1, cmd_br_to_parent, NULL, "", RO},
+ {"br-set-external-id", 2, 3, cmd_br_set_external_id, NULL, "", RW},
+ {"br-get-external-id", 1, 2, cmd_br_get_external_id, NULL, "", RO},
/* Port commands. */
- {"list-ports", 1, 1, cmd_list_ports, NULL, ""},
- {"add-port", 2, INT_MAX, cmd_add_port, NULL, "--may-exist"},
- {"add-bond", 4, INT_MAX, cmd_add_bond, NULL, "--may-exist,--fake-iface"},
- {"del-port", 1, 2, cmd_del_port, NULL, "--if-exists,--with-iface"},
- {"port-to-br", 1, 1, cmd_port_to_br, NULL, ""},
+ {"list-ports", 1, 1, cmd_list_ports, NULL, "", RO},
+ {"add-port", 2, INT_MAX, cmd_add_port, NULL, "--may-exist", RW},
+ {"add-bond", 4, INT_MAX, cmd_add_bond, NULL, "--may-exist,--fake-iface", RW},
+ {"del-port", 1, 2, cmd_del_port, NULL, "--if-exists,--with-iface", RW},
+ {"port-to-br", 1, 1, cmd_port_to_br, NULL, "", RO},
/* Interface commands. */
- {"list-ifaces", 1, 1, cmd_list_ifaces, NULL, ""},
- {"iface-to-br", 1, 1, cmd_iface_to_br, NULL, ""},
+ {"list-ifaces", 1, 1, cmd_list_ifaces, NULL, "", RO},
+ {"iface-to-br", 1, 1, cmd_iface_to_br, NULL, "", RO},
/* Controller commands. */
- {"get-controller", 1, 1, cmd_get_controller, NULL, ""},
- {"del-controller", 1, 1, cmd_del_controller, NULL, ""},
- {"set-controller", 1, INT_MAX, cmd_set_controller, NULL, ""},
- {"get-fail-mode", 1, 1, cmd_get_fail_mode, NULL, ""},
- {"del-fail-mode", 1, 1, cmd_del_fail_mode, NULL, ""},
- {"set-fail-mode", 2, 2, cmd_set_fail_mode, NULL, ""},
+ {"get-controller", 1, 1, cmd_get_controller, NULL, "", RO},
+ {"del-controller", 1, 1, cmd_del_controller, NULL, "", RW},
+ {"set-controller", 1, INT_MAX, cmd_set_controller, NULL, "", RW},
+ {"get-fail-mode", 1, 1, cmd_get_fail_mode, NULL, "", RO},
+ {"del-fail-mode", 1, 1, cmd_del_fail_mode, NULL, "", RW},
+ {"set-fail-mode", 2, 2, cmd_set_fail_mode, NULL, "", RW},
/* SSL commands. */
- {"get-ssl", 0, 0, cmd_get_ssl, NULL, ""},
- {"del-ssl", 0, 0, cmd_del_ssl, NULL, ""},
- {"set-ssl", 3, 3, cmd_set_ssl, NULL, "--bootstrap"},
+ {"get-ssl", 0, 0, cmd_get_ssl, NULL, "", RO},
+ {"del-ssl", 0, 0, cmd_del_ssl, NULL, "", RW},
+ {"set-ssl", 3, 3, cmd_set_ssl, NULL, "--bootstrap", RW},
/* Switch commands. */
- {"emer-reset", 0, 0, cmd_emer_reset, NULL, ""},
+ {"emer-reset", 0, 0, cmd_emer_reset, NULL, "", RW},
/* Parameter commands. */
- {"get", 3, INT_MAX, cmd_get, NULL, "--if-exists"},
- {"list", 1, INT_MAX, cmd_list, NULL, ""},
- {"set", 3, INT_MAX, cmd_set, NULL, ""},
- {"add", 4, INT_MAX, cmd_add, NULL, ""},
- {"remove", 4, INT_MAX, cmd_remove, NULL, ""},
- {"clear", 3, INT_MAX, cmd_clear, NULL, ""},
- {"create", 2, INT_MAX, cmd_create, post_create, "--id="},
- {"destroy", 1, INT_MAX, cmd_destroy, NULL, "--if-exists"},
- {"wait-until", 2, INT_MAX, cmd_wait_until, NULL, ""},
-
- {NULL, 0, 0, NULL, NULL, NULL},
+ {"get", 2, INT_MAX, cmd_get, NULL, "--if-exists,--id=", RO},
+ {"list", 1, INT_MAX, cmd_list, NULL, "", RO},
+ {"set", 3, INT_MAX, cmd_set, NULL, "", RW},
+ {"add", 4, INT_MAX, cmd_add, NULL, "", RW},
+ {"remove", 4, INT_MAX, cmd_remove, NULL, "", RW},
+ {"clear", 3, INT_MAX, cmd_clear, NULL, "", RW},
+ {"create", 2, INT_MAX, cmd_create, post_create, "--id=", RW},
+ {"destroy", 1, INT_MAX, cmd_destroy, NULL, "--if-exists", RW},
+ {"wait-until", 2, INT_MAX, cmd_wait_until, NULL, "", RO},
+
+ {NULL, 0, 0, NULL, NULL, NULL, RO},
};
diff --git a/vswitchd/automake.mk b/vswitchd/automake.mk
index 592be607a..93c6f92ea 100644
--- a/vswitchd/automake.mk
+++ b/vswitchd/automake.mk
@@ -12,6 +12,8 @@ vswitchd_ovs_vswitchd_SOURCES = \
vswitchd/proc-net-compat.c \
vswitchd/proc-net-compat.h \
vswitchd/ovs-vswitchd.c \
+ vswitchd/system-stats.c \
+ vswitchd/system-stats.h \
vswitchd/vswitch-idl.c \
vswitchd/vswitch-idl.h \
vswitchd/xenserver.c \
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 45b32bb64..45ce7277c 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -36,6 +36,7 @@
#include "dynamic-string.h"
#include "flow.h"
#include "hash.h"
+#include "hmap.h"
#include "jsonrpc.h"
#include "list.h"
#include "mac-learning.h"
@@ -48,7 +49,6 @@
#include "ovsdb-data.h"
#include "packets.h"
#include "poll-loop.h"
-#include "port-array.h"
#include "proc-net-compat.h"
#include "process.h"
#include "sha1.h"
@@ -56,6 +56,7 @@
#include "socket-util.h"
#include "stream-ssl.h"
#include "svec.h"
+#include "system-stats.h"
#include "timeval.h"
#include "util.h"
#include "unixctl.h"
@@ -84,6 +85,7 @@ struct iface {
/* These members are valid only after bridge_reconfigure() causes them to
* be initialized. */
+ struct hmap_node xf_ifidx_node; /* In struct bridge's "ifaces" hmap. */
int xf_ifidx; /* Index within kernel datapath. */
struct netdev *netdev; /* Network device. */
bool enabled; /* May be chosen for flows? */
@@ -164,7 +166,7 @@ struct bridge {
/* Kernel datapath information. */
struct xfif *xfif; /* Datapath. */
- struct port_array ifaces; /* Indexed by kernel datapath port number. */
+ struct hmap ifaces; /* Contains "struct iface"s. */
/* Bridge ports. */
struct port **ports;
@@ -188,10 +190,10 @@ static struct list all_bridges = LIST_INITIALIZER(&all_bridges);
/* OVSDB IDL used to obtain configuration. */
static struct ovsdb_idl *idl;
-/* Each time this timer expires, the bridge fetches statistics for every
- * interface and pushes them into the database. */
-#define IFACE_STATS_INTERVAL (5 * 1000) /* In milliseconds. */
-static long long int iface_stats_timer = LLONG_MIN;
+/* Each time this timer expires, the bridge fetches systems and interface
+ * statistics and pushes them into the database. */
+#define STATS_INTERVAL (5 * 1000) /* In milliseconds. */
+static long long int stats_timer = LLONG_MIN;
static struct bridge *bridge_create(const struct ovsrec_bridge *br_cfg);
static void bridge_destroy(struct bridge *);
@@ -306,7 +308,7 @@ bridge_configure_once(const struct ovsrec_open_vswitch *cfg)
}
already_configured_once = true;
- iface_stats_timer = time_msec() + IFACE_STATS_INTERVAL;
+ stats_timer = time_msec() + STATS_INTERVAL;
/* Get all the configured bridges' names from 'cfg' into 'bridge_names'. */
svec_init(&bridge_names);
@@ -373,6 +375,20 @@ set_up_iface(const struct ovsrec_interface *iface_cfg, struct iface *iface,
xstrdup(iface_cfg->value_options[i]));
}
+ /* Include 'other_config' keys in hash of netdev options. The
+ * namespace of 'other_config' and 'options' must be disjoint.
+ * Prefer 'options' keys over 'other_config' keys. */
+ for (i = 0; i < iface_cfg->n_other_config; i++) {
+ char *value = xstrdup(iface_cfg->value_other_config[i]);
+ if (!shash_add_once(&options, iface_cfg->key_other_config[i],
+ value)) {
+ VLOG_WARN("%s: \"other_config\" key %s conflicts with existing "
+ "\"other_config\" or \"options\" entry...ignoring",
+ iface_cfg->name, iface_cfg->key_other_config[i]);
+ free(value);
+ }
+ }
+
if (create) {
struct netdev_options netdev_options;
@@ -558,7 +574,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
/* Collect old and new bridges. */
shash_init(&old_br);
shash_init(&new_br);
- LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+ LIST_FOR_EACH (br, node, &all_bridges) {
shash_add(&old_br, br->name, br);
}
for (i = 0; i < ovs_cfg->n_bridges; i++) {
@@ -569,7 +585,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
}
/* Get rid of deleted bridges and add new bridges. */
- LIST_FOR_EACH_SAFE (br, next, struct bridge, node, &all_bridges) {
+ LIST_FOR_EACH_SAFE (br, next, node, &all_bridges) {
struct ovsrec_bridge *br_cfg = shash_find_data(&new_br, br->name);
if (br_cfg) {
br->cfg = br_cfg;
@@ -596,7 +612,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
shash_destroy(&new_br);
/* Reconfigure all bridges. */
- LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+ LIST_FOR_EACH (br, node, &all_bridges) {
bridge_reconfigure_one(br);
}
@@ -605,7 +621,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
* The kernel will reject any attempt to add a given port to a datapath if
* that port already belongs to a different datapath, so we must do all
* port deletions before any port additions. */
- LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+ LIST_FOR_EACH (br, node, &all_bridges) {
struct xflow_port *xfif_ports;
size_t n_xfif_ports;
struct shash want_ifaces;
@@ -627,11 +643,10 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
shash_destroy(&want_ifaces);
free(xfif_ports);
}
- LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+ LIST_FOR_EACH (br, node, &all_bridges) {
struct xflow_port *xfif_ports;
size_t n_xfif_ports;
struct shash cur_ifaces, want_ifaces;
- struct shash_node *node;
/* Get the set of interfaces currently in this datapath. */
xfif_port_list(br->xfif, &xfif_ports, &n_xfif_ports);
@@ -677,7 +692,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
shash_destroy(&want_ifaces);
}
sflow_bridge_number = 0;
- LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+ LIST_FOR_EACH (br, node, &all_bridges) {
uint8_t ea[8];
uint64_t dpid;
struct iface *local_iface;
@@ -764,7 +779,6 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
struct ovsrec_controller **controllers;
struct ofproto_sflow_options oso;
size_t n_controllers;
- size_t i;
memset(&oso, 0, sizeof oso);
@@ -815,7 +829,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
* the datapath ID before the controller. */
bridge_reconfigure_remotes(br, managers, n_managers);
}
- LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+ LIST_FOR_EACH (br, node, &all_bridges) {
for (i = 0; i < br->n_ports; i++) {
struct port *port = br->ports[i];
int j;
@@ -828,7 +842,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
}
}
}
- LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+ LIST_FOR_EACH (br, node, &all_bridges) {
iterate_and_prune_ifaces(br, set_iface_properties, NULL);
}
@@ -1097,6 +1111,20 @@ iface_refresh_stats(struct iface *iface)
ovsrec_interface_set_statistics(iface->cfg, keys, values, n);
}
+static void
+refresh_system_stats(const struct ovsrec_open_vswitch *cfg)
+{
+ struct ovsdb_datum datum;
+ struct shash stats;
+
+ shash_init(&stats);
+ get_system_stats(&stats);
+
+ ovsdb_datum_from_shash(&datum, &stats);
+ ovsdb_idl_txn_write(&cfg->header_, &ovsrec_open_vswitch_col_statistics,
+ &datum);
+}
+
void
bridge_run(void)
{
@@ -1108,7 +1136,7 @@ bridge_run(void)
/* Let each bridge do the work that it needs to do. */
datapath_destroyed = false;
- LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+ LIST_FOR_EACH (br, node, &all_bridges) {
int error = bridge_run_one(br);
if (error) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
@@ -1152,28 +1180,31 @@ bridge_run(void)
}
#endif
- /* Refresh interface stats if necessary. */
- if (time_msec() >= iface_stats_timer) {
- struct ovsdb_idl_txn *txn;
+ /* Refresh system and interface stats if necessary. */
+ if (time_msec() >= stats_timer) {
+ if (cfg) {
+ struct ovsdb_idl_txn *txn;
- txn = ovsdb_idl_txn_create(idl);
- LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
- size_t i;
+ txn = ovsdb_idl_txn_create(idl);
+ LIST_FOR_EACH (br, node, &all_bridges) {
+ size_t i;
- for (i = 0; i < br->n_ports; i++) {
- struct port *port = br->ports[i];
- size_t j;
+ for (i = 0; i < br->n_ports; i++) {
+ struct port *port = br->ports[i];
+ size_t j;
- for (j = 0; j < port->n_ifaces; j++) {
- struct iface *iface = port->ifaces[j];
- iface_refresh_stats(iface);
+ for (j = 0; j < port->n_ifaces; j++) {
+ struct iface *iface = port->ifaces[j];
+ iface_refresh_stats(iface);
+ }
}
}
+ refresh_system_stats(cfg);
+ ovsdb_idl_txn_commit(txn);
+ ovsdb_idl_txn_destroy(txn); /* XXX */
}
- ovsdb_idl_txn_commit(txn);
- ovsdb_idl_txn_destroy(txn); /* XXX */
- iface_stats_timer = time_msec() + IFACE_STATS_INTERVAL;
+ stats_timer = time_msec() + STATS_INTERVAL;
}
}
@@ -1182,7 +1213,7 @@ bridge_wait(void)
{
struct bridge *br;
- LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+ LIST_FOR_EACH (br, node, &all_bridges) {
ofproto_wait(br->ofproto);
if (ofproto_has_primary_controller(br->ofproto)) {
continue;
@@ -1192,7 +1223,7 @@ bridge_wait(void)
bond_wait(br);
}
ovsdb_idl_wait(idl);
- poll_timer_wait_until(iface_stats_timer);
+ poll_timer_wait_until(stats_timer);
}
/* Forces 'br' to revalidate all of its flows. This is appropriate when 'br''s
@@ -1241,7 +1272,7 @@ bridge_unixctl_fdb_show(struct unixctl_conn *conn,
}
ds_put_cstr(&ds, " port VLAN MAC Age\n");
- LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
+ LIST_FOR_EACH (e, lru_node, &br->ml->lrus) {
if (e->port < 0 || e->port >= br->n_ports) {
continue;
}
@@ -1287,7 +1318,7 @@ bridge_create(const struct ovsrec_bridge *br_cfg)
br->ml = mac_learning_create();
eth_addr_nicira_random(br->default_ea);
- port_array_init(&br->ifaces);
+ hmap_init(&br->ifaces);
shash_init(&br->port_by_name);
shash_init(&br->iface_by_name);
@@ -1317,7 +1348,7 @@ bridge_destroy(struct bridge *br)
xfif_close(br->xfif);
ofproto_destroy(br->ofproto);
mac_learning_destroy(br->ml);
- port_array_destroy(&br->ifaces);
+ hmap_destroy(&br->ifaces);
shash_destroy(&br->port_by_name);
shash_destroy(&br->iface_by_name);
free(br->ports);
@@ -1331,7 +1362,7 @@ bridge_lookup(const char *name)
{
struct bridge *br;
- LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+ LIST_FOR_EACH (br, node, &all_bridges) {
if (!strcmp(br->name, name)) {
return br;
}
@@ -1377,7 +1408,7 @@ bridge_unixctl_reconnect(struct unixctl_conn *conn,
}
ofproto_reconnect_controllers(br->ofproto);
} else {
- LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+ LIST_FOR_EACH (br, node, &all_bridges) {
ofproto_reconnect_controllers(br->ofproto);
}
}
@@ -1712,7 +1743,7 @@ bridge_fetch_dp_ifaces(struct bridge *br)
iface->xf_ifidx = -1;
}
}
- port_array_clear(&br->ifaces);
+ hmap_clear(&br->ifaces);
xfif_port_list(br->xfif, &xfif_ports, &n_xfif_ports);
for (i = 0; i < n_xfif_ports; i++) {
@@ -1726,8 +1757,9 @@ bridge_fetch_dp_ifaces(struct bridge *br)
VLOG_WARN("%s reported interface %"PRIu16" twice",
xfif_name(br->xfif), p->port);
} else {
- port_array_set(&br->ifaces, p->port, iface);
iface->xf_ifidx = p->port;
+ hmap_insert(&br->ifaces, &iface->xf_ifidx_node,
+ hash_int(iface->xf_ifidx, 0));
}
if (iface->cfg) {
@@ -2810,7 +2842,6 @@ bond_rebalance_port(struct port *port)
* smallest hashes instead of the biggest ones. There is little
* reason behind this decision; we could use the opposite sort
* order to shift away big hashes ahead of small ones. */
- size_t i;
bool order_swapped;
for (i = 0; i < from->n_hashes; i++) {
@@ -2891,7 +2922,7 @@ bond_send_learning_packets(struct port *port)
ofpbuf_init(&packet, 128);
error = n_packets = n_errors = 0;
- LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
+ LIST_FOR_EACH (e, lru_node, &br->ml->lrus) {
union ofp_action actions[2], *a;
uint16_t xf_ifidx;
tag_type tags = 0;
@@ -2953,7 +2984,7 @@ bond_unixctl_list(struct unixctl_conn *conn,
ds_put_cstr(&ds, "bridge\tbond\tslaves\n");
- LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+ LIST_FOR_EACH (br, node, &all_bridges) {
size_t i;
for (i = 0; i < br->n_ports; i++) {
@@ -2982,7 +3013,7 @@ bond_find(const char *name)
{
const struct bridge *br;
- LIST_FOR_EACH (br, struct bridge, node, &all_bridges) {
+ LIST_FOR_EACH (br, node, &all_bridges) {
size_t i;
for (i = 0; i < br->n_ports; i++) {
@@ -3042,8 +3073,7 @@ bond_unixctl_show(struct unixctl_conn *conn,
hash, be->tx_bytes / 1024);
/* MACs. */
- LIST_FOR_EACH (me, struct mac_entry, lru_node,
- &port->bridge->ml->lrus) {
+ LIST_FOR_EACH (me, lru_node, &port->bridge->ml->lrus) {
uint16_t xf_ifidx;
tag_type tags = 0;
if (bond_hash(me->mac) == hash
@@ -3395,7 +3425,6 @@ port_reconfigure(struct port *port, const struct ovsrec_port *cfg)
trunks = NULL;
if (vlan < 0 && cfg->n_trunks) {
size_t n_errors;
- size_t i;
trunks = bitmap_allocate(4096);
n_errors = 0;
@@ -3694,7 +3723,7 @@ iface_destroy(struct iface *iface)
shash_find_and_delete_assert(&br->iface_by_name, iface->name);
if (iface->xf_ifidx >= 0) {
- port_array_set(&br->ifaces, iface->xf_ifidx, NULL);
+ hmap_remove(&br->ifaces, &iface->xf_ifidx_node);
}
del = port->ifaces[iface->port_ifidx] = port->ifaces[--port->n_ifaces];
@@ -3724,7 +3753,15 @@ iface_lookup(const struct bridge *br, const char *name)
static struct iface *
iface_from_xf_ifidx(const struct bridge *br, uint16_t xf_ifidx)
{
- return port_array_get(&br->ifaces, xf_ifidx);
+ struct iface *iface;
+
+ HMAP_FOR_EACH_IN_BUCKET (iface, xf_ifidx_node,
+ hash_int(xf_ifidx, 0), &br->ifaces) {
+ if (iface->xf_ifidx == xf_ifidx) {
+ return iface;
+ }
+ }
+ return NULL;
}
/* Returns true if 'iface' is the name of an "internal" interface on bridge
diff --git a/vswitchd/proc-net-compat.c b/vswitchd/proc-net-compat.c
index 6a8f8756e..0eda6d258 100644
--- a/vswitchd/proc-net-compat.c
+++ b/vswitchd/proc-net-compat.c
@@ -248,8 +248,7 @@ proc_net_compat_update_vlan(const char *tagged_dev, const char *trunk_dev,
/* 'tagged_dev' is not attached to any compat_vlan. Find the
* compat_vlan corresponding to (trunk_dev,vid) to attach it to, or
* create a new compat_vlan if none exists for (trunk_dev,vid). */
- HMAP_FOR_EACH_WITH_HASH (vlan, struct compat_vlan, trunk_node,
- hash_vlan(trunk_dev, vid),
+ HMAP_FOR_EACH_WITH_HASH (vlan, trunk_node, hash_vlan(trunk_dev, vid),
&vlans_by_trunk) {
if (!strcmp(trunk_dev, vlan->trunk_dev) && vid == vlan->vid) {
break;
@@ -340,7 +339,7 @@ update_vlan_config(void)
ds_init(&ds);
ds_put_cstr(&ds, "VLAN Dev name | VLAN ID\n"
"Name-Type: VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD\n");
- HMAP_FOR_EACH (vlan, struct compat_vlan, trunk_node, &vlans_by_trunk) {
+ HMAP_FOR_EACH (vlan, trunk_node, &vlans_by_trunk) {
ds_put_format(&ds, "%-15s| %d | %s\n",
vlan->vlan_dev, vlan->vid, vlan->trunk_dev);
}
diff --git a/vswitchd/system-stats.c b/vswitchd/system-stats.c
new file mode 100644
index 000000000..11b2fbedb
--- /dev/null
+++ b/vswitchd/system-stats.c
@@ -0,0 +1,503 @@
+/* Copyright (c) 2010 Nicira Networks
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#if HAVE_MNTENT_H
+#include <mntent.h>
+#endif
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#if HAVE_SYS_STATVFS_H
+#include <sys/statvfs.h>
+#endif
+#include <unistd.h>
+
+#include "daemon.h"
+#include "dirs.h"
+#include "dynamic-string.h"
+#include "shash.h"
+#include "system-stats.h"
+#include "timeval.h"
+#include "vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(system_stats)
+
+/* #ifdefs make it a pain to maintain code: you have to try to build both ways.
+ * Thus, this file tries to compile as much of the code as possible regardless
+ * of the target, by writing "if (LINUX)" instead of "#ifdef __linux__" where
+ * this is possible. */
+#ifdef __linux__
+#include <asm/param.h>
+#define LINUX 1
+#else
+#define LINUX 0
+#endif
+
+static void
+get_cpu_cores(struct shash *stats)
+{
+ long int n_cores = sysconf(_SC_NPROCESSORS_ONLN);
+ if (n_cores > 0) {
+ shash_add(stats, "cpu", xasprintf("%ld", n_cores));
+ }
+}
+
+static void
+get_load_average(struct shash *stats OVS_UNUSED)
+{
+#if HAVE_GETLOADAVG
+ double loadavg[3];
+
+ if (getloadavg(loadavg, 3) == 3) {
+ shash_add(stats, "load_average",
+ xasprintf("%.2f,%.2f,%.2f",
+ loadavg[0], loadavg[1], loadavg[2]));
+ }
+#endif
+}
+
+static unsigned int
+get_page_size(void)
+{
+ static unsigned int cached;
+
+ if (!cached) {
+ long int value = sysconf(_SC_PAGESIZE);
+ if (value >= 0) {
+ cached = value;
+ }
+ }
+
+ return cached;
+}
+
+static void
+get_memory_stats(struct shash *stats)
+{
+ if (!LINUX) {
+ unsigned int pagesize = get_page_size();
+ long int phys_pages = sysconf(_SC_PHYS_PAGES);
+ long int avphys_pages = sysconf(_SC_AVPHYS_PAGES);
+ int mem_total, mem_used;
+
+ if (pagesize <= 0 || phys_pages <= 0 || avphys_pages <= 0) {
+ return;
+ }
+
+ mem_total = phys_pages * (pagesize / 1024);
+ mem_used = (phys_pages - avphys_pages) * (pagesize / 1024);
+ shash_add(stats, "memory", xasprintf("%d,%d", mem_total, mem_used));
+ } else {
+ static const char file_name[] = "/proc/meminfo";
+ int mem_used, mem_cache, swap_used;
+ int mem_free = 0;
+ int buffers = 0;
+ int cached = 0;
+ int swap_free = 0;
+ int mem_total = 0;
+ int swap_total = 0;
+ struct shash dict;
+ char line[128];
+ FILE *stream;
+
+ stream = fopen(file_name, "r");
+ if (!stream) {
+ VLOG_WARN_ONCE("%s: open failed (%s)", file_name, strerror(errno));
+ return;
+ }
+
+ shash_init(&dict);
+ shash_add(&dict, "MemTotal", &mem_total);
+ shash_add(&dict, "MemFree", &mem_free);
+ shash_add(&dict, "Buffers", &buffers);
+ shash_add(&dict, "Cached", &cached);
+ shash_add(&dict, "SwapTotal", &swap_total);
+ shash_add(&dict, "SwapFree", &swap_free);
+ while (fgets(line, sizeof line, stream)) {
+ char key[16];
+ int value;
+
+ if (sscanf(line, "%15[^:]: %u", key, &value) == 2) {
+ int *valuep = shash_find_data(&dict, key);
+ if (valuep) {
+ *valuep = value;
+ }
+ }
+ }
+ fclose(stream);
+ shash_destroy(&dict);
+
+ mem_used = mem_total - mem_free;
+ mem_cache = buffers + cached;
+ swap_used = swap_total - swap_free;
+ shash_add(stats, "memory",
+ xasprintf("%d,%d,%d,%d,%d", mem_total, mem_used, mem_cache,
+ swap_total, swap_used));
+ }
+}
+
+/* Returns the time at which the system booted, as the number of milliseconds
+ * since the epoch, or 0 if the time of boot cannot be determined. */
+static long long int
+get_boot_time(void)
+{
+ static long long int cache_expiration = LLONG_MIN;
+ static long long int boot_time;
+
+ assert(LINUX);
+
+ if (time_msec() >= cache_expiration) {
+ static const char stat_file[] = "/proc/stat";
+ char line[128];
+ FILE *stream;
+
+ cache_expiration = time_msec() + 5 * 1000;
+
+ stream = fopen(stat_file, "r");
+ if (!stream) {
+ VLOG_ERR_ONCE("%s: open failed (%s)", stat_file, strerror(errno));
+ return boot_time;
+ }
+
+ while (fgets(line, sizeof line, stream)) {
+ long long int btime;
+ if (sscanf(line, "btime %lld", &btime) == 1) {
+ boot_time = btime * 1000;
+ goto done;
+ }
+ }
+ VLOG_ERR_ONCE("%s: btime not found", stat_file);
+ done:
+ fclose(stream);
+ }
+ return boot_time;
+}
+
+static unsigned long long int
+ticks_to_ms(unsigned long long int ticks)
+{
+ assert(LINUX);
+
+#ifndef USER_HZ
+#define USER_HZ 100
+#endif
+
+#if USER_HZ == 100 /* Common case. */
+ return ticks * (1000 / USER_HZ);
+#else /* Alpha and some other architectures. */
+ double factor = 1000.0 / USER_HZ;
+ return ticks * factor + 0.5;
+#endif
+}
+
+struct raw_process_info {
+ unsigned long int vsz; /* Virtual size, in kB. */
+ unsigned long int rss; /* Resident set size, in kB. */
+ long long int uptime; /* ms since started. */
+ long long int cputime; /* ms of CPU used during 'uptime'. */
+ pid_t ppid; /* Parent. */
+ char name[18]; /* Name (surrounded by parentheses). */
+};
+
+static bool
+get_raw_process_info(pid_t pid, struct raw_process_info *raw)
+{
+ unsigned long long int vsize, rss, start_time, utime, stime;
+ long long int start_msec;
+ unsigned long ppid;
+ char file_name[128];
+ FILE *stream;
+ int n;
+
+ assert(LINUX);
+
+ sprintf(file_name, "/proc/%lu/stat", (unsigned long int) pid);
+ stream = fopen(file_name, "r");
+ if (!stream) {
+ VLOG_ERR_ONCE("%s: open failed (%s)", file_name, strerror(errno));
+ return false;
+ }
+
+ n = fscanf(stream,
+ "%*d " /* (1. pid) */
+ "%17s " /* 2. process name */
+ "%*c " /* (3. state) */
+ "%lu " /* 4. ppid */
+ "%*d " /* (5. pgid) */
+ "%*d " /* (6. sid) */
+ "%*d " /* (7. tty_nr) */
+ "%*d " /* (8. tty_pgrp) */
+ "%*u " /* (9. flags) */
+ "%*u " /* (10. min_flt) */
+ "%*u " /* (11. cmin_flt) */
+ "%*u " /* (12. maj_flt) */
+ "%*u " /* (13. cmaj_flt) */
+ "%llu " /* 14. utime */
+ "%llu " /* 15. stime */
+ "%*d " /* (16. cutime) */
+ "%*d " /* (17. cstime) */
+ "%*d " /* (18. priority) */
+ "%*d " /* (19. nice) */
+ "%*d " /* (20. num_threads) */
+ "%*d " /* (21. always 0) */
+ "%llu " /* 22. start_time */
+ "%llu " /* 23. vsize */
+ "%llu " /* 24. rss */
+#if 0
+ /* These are here for documentation but #if'd out to save
+ * actually parsing them from the stream for no benefit. */
+ "%*lu " /* (25. rsslim) */
+ "%*lu " /* (26. start_code) */
+ "%*lu " /* (27. end_code) */
+ "%*lu " /* (28. start_stack) */
+ "%*lu " /* (29. esp) */
+ "%*lu " /* (30. eip) */
+ "%*lu " /* (31. pending signals) */
+ "%*lu " /* (32. blocked signals) */
+ "%*lu " /* (33. ignored signals) */
+ "%*lu " /* (34. caught signals) */
+ "%*lu " /* (35. whcan) */
+ "%*lu " /* (36. always 0) */
+ "%*lu " /* (37. always 0) */
+ "%*d " /* (38. exit_signal) */
+ "%*d " /* (39. task_cpu) */
+ "%*u " /* (40. rt_priority) */
+ "%*u " /* (41. policy) */
+ "%*llu " /* (42. blkio_ticks) */
+ "%*lu " /* (43. gtime) */
+ "%*ld" /* (44. cgtime) */
+#endif
+ , raw->name, &ppid, &utime, &stime, &start_time, &vsize, &rss);
+ fclose(stream);
+ if (n != 7) {
+ VLOG_ERR_ONCE("%s: fscanf failed", file_name);
+ return false;
+ }
+
+ start_msec = get_boot_time() + ticks_to_ms(start_time);
+
+ raw->vsz = vsize / 1024;
+ raw->rss = rss * (getpagesize() / 1024);
+ raw->uptime = time_wall_msec() - start_msec;
+ raw->cputime = ticks_to_ms(utime + stime);
+ raw->ppid = ppid;
+
+ return true;
+}
+
+static int
+count_crashes(pid_t pid)
+{
+ char file_name[128];
+ const char *paren;
+ char line[128];
+ int crashes = 0;
+ FILE *stream;
+
+ assert(LINUX);
+
+ sprintf(file_name, "/proc/%lu/cmdline", (unsigned long int) pid);
+ stream = fopen(file_name, "r");
+ if (!stream) {
+ VLOG_WARN_ONCE("%s: open failed (%s)", file_name, strerror(errno));
+ goto exit;
+ }
+
+ if (!fgets(line, sizeof line, stream)) {
+ VLOG_WARN_ONCE("%s: read failed (%s)", file_name,
+ feof(stream) ? "end of file" : strerror(errno));
+ goto exit_close;
+ }
+
+ paren = strchr(line, '(');
+ if (paren) {
+ int x;
+ if (sscanf(paren + 1, "%d", &x) == 1) {
+ crashes = x;
+ }
+ }
+
+exit_close:
+ fclose(stream);
+exit:
+ return crashes;
+}
+
+struct process_info {
+ unsigned long int vsz; /* Virtual size, in kB. */
+ unsigned long int rss; /* Resident set size, in kB. */
+ long long int booted; /* ms since monitor started. */
+ int crashes; /* # of crashes (usually 0). */
+ long long int uptime; /* ms since last (re)started by monitor. */
+ long long int cputime; /* ms of CPU used during 'uptime'. */
+};
+
+static bool
+get_process_info(pid_t pid, struct process_info *pinfo)
+{
+ struct raw_process_info child;
+
+ assert(LINUX);
+ if (!get_raw_process_info(pid, &child)) {
+ return false;
+ }
+
+ pinfo->vsz = child.vsz;
+ pinfo->rss = child.rss;
+ pinfo->booted = child.uptime;
+ pinfo->crashes = 0;
+ pinfo->uptime = child.uptime;
+ pinfo->cputime = child.cputime;
+
+ if (child.ppid) {
+ struct raw_process_info parent;
+
+ get_raw_process_info(child.ppid, &parent);
+ if (!strcmp(child.name, parent.name)) {
+ pinfo->booted = parent.uptime;
+ pinfo->crashes = count_crashes(child.ppid);
+ }
+ }
+
+ return true;
+}
+
+static void
+get_process_stats(struct shash *stats)
+{
+ struct dirent *de;
+ DIR *dir;
+
+ dir = opendir(ovs_rundir);
+ if (!dir) {
+ VLOG_ERR_ONCE("%s: open failed (%s)", ovs_rundir, strerror(errno));
+ return;
+ }
+
+ while ((de = readdir(dir)) != NULL) {
+ struct process_info pinfo;
+ char *key, *value;
+ char *file_name;
+ char *extension;
+ pid_t pid;
+
+#ifdef _DIRENT_HAVE_D_TYPE
+ if (de->d_type != DT_UNKNOWN && de->d_type != DT_REG) {
+ continue;
+ }
+#endif
+
+ extension = strrchr(de->d_name, '.');
+ if (!extension || strcmp(extension, ".pid")) {
+ continue;
+ }
+
+ file_name = xasprintf("%s/%s", ovs_rundir, de->d_name);
+ pid = read_pidfile(file_name);
+ free(file_name);
+ if (pid < 0 || kill(pid, 0)) {
+ continue;
+ }
+
+ key = xasprintf("process_%.*s",
+ (int) (extension - de->d_name), de->d_name);
+ if (shash_find(stats, key)) {
+ free(key);
+ continue;
+ }
+
+ if (LINUX && get_process_info(pid, &pinfo)) {
+ value = xasprintf("%lu,%lu,%lld,%d,%lld,%lld",
+ pinfo.vsz, pinfo.rss, pinfo.cputime,
+ pinfo.crashes, pinfo.booted, pinfo.uptime);
+ } else {
+ value = xstrdup("");
+ }
+
+ shash_add_nocopy(stats, key, value);
+ }
+
+ closedir(dir);
+}
+
+static void
+get_filesys_stats(struct shash *stats OVS_UNUSED)
+{
+#if HAVE_SETMNTENT && HAVE_STATVFS
+ static const char file_name[] = "/etc/mtab";
+ struct mntent *me;
+ FILE *stream;
+ struct ds s;
+
+ stream = setmntent(file_name, "r");
+ if (!stream) {
+ VLOG_ERR_ONCE("%s: open failed (%s)", file_name, strerror(errno));
+ return;
+ }
+
+ ds_init(&s);
+ while ((me = getmntent(stream)) != NULL) {
+ unsigned long long int total, free;
+ struct statvfs vfs;
+ char *p;
+
+ /* Skip non-local and read-only filesystems. */
+ if (strncmp(me->mnt_fsname, "/dev", 4)
+ || !strstr(me->mnt_opts, "rw")) {
+ continue;
+ }
+
+ /* Given the mount point we can stat the file system. */
+ if (statvfs(me->mnt_dir, &vfs) && vfs.f_flag & ST_RDONLY) {
+ /* That's odd... */
+ continue;
+ }
+
+ /* Now format the data. */
+ if (s.length) {
+ ds_put_char(&s, ' ');
+ }
+ for (p = me->mnt_dir; *p != '\0'; p++) {
+ ds_put_char(&s, *p == ' ' || *p == ',' ? '_' : *p);
+ }
+ total = (unsigned long long int) vfs.f_frsize * vfs.f_blocks / 1024;
+ free = (unsigned long long int) vfs.f_frsize * vfs.f_bfree / 1024;
+ ds_put_format(&s, ",%llu,%llu", total, total - free);
+ }
+ endmntent(stream);
+
+ if (s.length) {
+ shash_add(stats, "file_systems", ds_steal_cstr(&s));
+ }
+ ds_destroy(&s);
+#endif /* HAVE_SETMNTENT && HAVE_STATVFS */
+}
+
+void
+get_system_stats(struct shash *stats)
+{
+ get_cpu_cores(stats);
+ get_load_average(stats);
+ get_memory_stats(stats);
+ get_process_stats(stats);
+ get_filesys_stats(stats);
+}
diff --git a/vswitchd/system-stats.h b/vswitchd/system-stats.h
new file mode 100644
index 000000000..559be692d
--- /dev/null
+++ b/vswitchd/system-stats.h
@@ -0,0 +1,21 @@
+/* Copyright (c) 2010 Nicira Networks
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef VSWITCHD_SYSTEM_STATS
+#define VSWITCHD_SYSTEM_STATS 1
+
+void get_system_stats(struct shash *);
+
+#endif /* vswitchd/system-stats.h */
diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
index c8a4963af..d872fdc51 100644
--- a/vswitchd/vswitch.ovsschema
+++ b/vswitchd/vswitch.ovsschema
@@ -25,7 +25,7 @@
"refTable": "Capability"},
"min": 0, "max": "unlimited"}},
"statistics": {
- "type": {"key": "string", "value": "integer", "min": 0, "max": "unlimited"},
+ "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"},
"ephemeral": true}},
"maxRows": 1},
"Capability": {
@@ -134,15 +134,14 @@
"ofport": {
"type": {"key": "integer", "min": 0, "max": 1},
"ephemeral": true},
+ "other_config": {
+ "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}},
"statistics": {
"type": {"key": "string", "value": "integer", "min": 0, "max": "unlimited"},
"ephemeral": true},
"status": {
"type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"},
- "ephemeral": true},
- "external_ids": {
- "type": {"key": "string", "value": "string",
- "min": 0, "max": "unlimited"}}}},
+ "ephemeral": true}}},
"QoS": {
"columns": {
"type": {
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 38dc6a1aa..8b2221b83 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -1,3 +1,4 @@
+<?xml version="1.0" encoding="utf-8"?>
<database title="Open vSwitch Configuration Database">
<p>A database with this schema holds the configuration for one Open
vSwitch daemon. The root of the configuration for the daemon is
@@ -74,21 +75,133 @@
<column name="statistics">
<p>
- Key-value pairs that report statistics about a running Open_vSwitch
- daemon. The current implementation updates these counters
- periodically. In the future, we plan to, instead, update them only
- when they are queried (e.g. using an OVSDB <code>select</code>
- operation) and perhaps at other times, but not on any regular
- periodic basis.</p>
- <p>
- The currently defined key-value pairs are listed below. Some Open
- vSwitch implementations may not support some statistics, in which
- case those key-value pairs are omitted.</p>
+ Key-value pairs that report statistics about a system running an Open
+ vSwitch. These are updated periodically (currently, every 5
+ seconds). Key-value pairs that cannot be determined or that do not
+ apply to a platform are omitted.
+ </p>
+
<dl>
- <dt><code>load-average</code></dt>
+ <dt><code>cpu</code></dt>
+ <dd>
+ <p>
+ Number of CPU processors, threads, or cores currently online and
+ available to the operating system on which Open vSwitch is
+ running, as an integer. This may be less than the number
+ installed, if some are not online or if they are not available to
+ the operating system.
+ </p>
+ <p>
+ Open vSwitch userspace processes are not multithreaded, but the
+ Linux kernel-based datapath is.
+ </p>
+ </dd>
+
+ <dt><code>load_average</code></dt>
+ <dd>
+ <p>
+ A comma-separated list of three floating-point numbers,
+ representing the system load average over the last 1, 5, and 15
+ minutes, respectively.
+ </p>
+ </dd>
+
+ <dt><code>memory</code></dt>
+ <dd>
+ <p>
+ A comma-separated list of integers, each of which represents a
+ quantity of memory in kilobytes that describes the operating
+ system on which Open vSwitch is running. In respective order,
+ these values are:
+ </p>
+
+ <ol>
+ <li>Total amount of RAM allocated to the OS.</li>
+ <li>RAM allocated to the OS that is in use.</li>
+ <li>RAM that can be flushed out to disk or otherwise discarded
+ if that space is needed for another purpose. This number is
+ necessarily less than or equal to the previous value.</li>
+ <li>Total disk space allocated for swap.</li>
+ <li>Swap space currently in use.</li>
+ </ol>
+
+ <p>
+ On Linux, all five values can be determined and are included. On
+ other operating systems, only the first two values can be
+ determined, so the list will only have two values.
+ </p>
+ </dd>
+
+ <dt><code>process_</code><var>name</var></dt>
<dd>
- System load average multiplied by 100 and rounded to the nearest
- integer.</dd>
+ <p>
+ One such key-value pair will exist for each running Open vSwitch
+ daemon process, with <var>name</var> replaced by the daemon's
+ name (e.g. <code>process_ovs-vswitchd</code>). The value is a
+ comma-separated list of integers. The integers represent the
+ following, with memory measured in kilobytes and durations in
+ milliseconds:
+ </p>
+
+ <ol>
+ <li>The process's virtual memory size.</li>
+ <li>The process's resident set size.</li>
+ <li>The amount of user and system CPU time consumed by the
+ process.</li>
+ <li>The number of times that the process has crashed and been
+ automatically restarted by the monitor.</li>
+ <li>The duration since the process was started.</li>
+ <li>The duration for which the process has been running.</li>
+ </ol>
+
+ <p>
+ The interpretation of some of these values depends on whether the
+ process was started with the <option>--monitor</option>. If it
+ was not, then the crash count will always be 0 and the two
+ durations will always be the same. If <option>--monitor</option>
+ was given, then the crash count may be positive; if it is, the
+ latter duration is the amount of time since the most recent crash
+ and restart.
+ </p>
+
+ <p>
+ There will be one key-value pair for each file in Open vSwitch's
+ ``run directory'' (usually <code>/var/run/openvswitch</code>)
+ whose name ends in <code>.pid</code>, whose contents are a
+ process ID, and which is locked by a running process. The
+ <var>name</var> is taken from the pidfile's name.
+ </p>
+
+ <p>
+ Currently Open vSwitch is only able to obtain all of the above
+ detail on Linux systems. On other systems, the same key-value
+ pairs will be present but the values will always be the empty
+ string.
+ </p>
+ </dd>
+
+ <dt><code>file_systems</code></dt>
+ <dd>
+ <p>
+ A space-separated list of information on local, writable file
+ systems. Each item in the list describes one file system and
+ consists in turn of a comma-separated list of the following:
+ </p>
+
+ <ol>
+ <li>Mount point, e.g. <code>/</code> or <code>/var/log</code>.
+ Any spaces or commas in the mount point are replaced by
+ underscores.</li>
+ <li>Total size, in kilobytes, as an integer.</li>
+ <li>Amount of storage in use, in kilobytes, as an integer.</li>
+ </ol>
+
+ <p>
+ This key-value pair is omitted if there are no local, writable
+ file systems or if Open vSwitch cannot obtain the needed
+ information.
+ </p>
+ </dd>
</dl>
</column>
</group>
@@ -539,6 +652,19 @@
compliance with the IEEE 802.1D specification for bridges.
Default is enabled, set to <code>false</code> to disable.</dd>
</dl>
+ <dl>
+ <dt><code>header_cache</code></dt>
+ <dd>Optional. Enable caching of tunnel headers and the output
+ path. This can lead to a significant performance increase
+ without changing behavior. In general it should not be
+ necessary to adjust this setting. However, the caching can
+ bypass certain components of the IP stack (such as IP tables)
+ and it may be useful to disable it if these features are
+ required or as a debugging measure. Default is enabled, set to
+ <code>false</code> to disable. If IPsec is enabled through the
+ <ref column="other_config"/> parameters, header caching will be
+ automatically disabled.</dd>
+ </dl>
</dd>
<dt><code>capwap</code></dt>
<dd>Ethernet tunneling over the UDP transport portion of CAPWAP
@@ -594,6 +720,17 @@
compliance with the IEEE 802.1D specification for bridges.
Default is enabled, set to <code>false</code> to disable.</dd>
</dl>
+ <dl>
+ <dt><code>header_cache</code></dt>
+ <dd>Optional. Enable caching of tunnel headers and the output
+ path. This can lead to a significant performance increase
+ without changing behavior. In general it should not be
+ necessary to adjust this setting. However, the caching can
+ bypass certain components of the IP stack (such as IP tables)
+ and it may be useful to disable it if these features are
+ required or as a debugging measure. Default is enabled, set to
+ <code>false</code> to disable.</dd>
+ </dl>
</dd>
<dt><code>patch</code></dt>
<dd>
@@ -637,24 +774,78 @@
</group>
<group title="Ingress Policing">
+ <p>
+ These settings control ingress policing for packets received on this
+ interface. On a physical interface, this limits the rate at which
+ traffic is allowed into the system from the outside; on a virtual
+ interface (one connected to a virtual machine), this limits the rate at
+ which the VM is able to transmit.
+ </p>
+ <p>
+ Policing is a simple form of quality-of-service that simply drops
+ packets received in excess of the configured rate. Due to its
+ simplicity, policing is usually less accurate and less effective than
+ egress QoS (which is configured using the <ref table="QoS"/> and <ref
+ table="Queue"/> tables).
+ </p>
+ <p>
+ Policing is currently implemented only on Linux. The Linux
+ implementation uses a simple ``token bucket'' approach:
+ </p>
+ <ul>
+ <li>
+ The size of the bucket corresponds to <ref
+ column="ingress_policing_burst"/>. Initially the bucket is full.
+ </li>
+ <li>
+ Whenever a packet is received, its size (converted to tokens) is
+ compared to the number of tokens currently in the bucket. If the
+ required number of tokens are available, they are removed and the
+ packet is forwarded. Otherwise, the packet is dropped.
+ </li>
+ <li>
+ Whenever it is not full, the bucket is refilled with tokens at the
+ rate specified by <ref column="ingress_policing_rate"/>.
+ </li>
+ </ul>
+ <p>
+ Policing interacts badly with some network protocols, and especially
+ with fragmented IP packets. Suppose that there is enough network
+ activity to keep the bucket nearly empty all the time. Then this token
+ bucket algorithm will forward a single packet every so often, with the
+ period depending on packet size and on the configured rate. All of the
+ fragments of an IP packets are normally transmitted back-to-back, as a
+ group. In such a situation, therefore, only one of these fragments
+ will be forwarded and the rest will be dropped. IP does not provide
+ any way for the intended recipient to ask for only the remaining
+ fragments. In such a case there are two likely possibilities for what
+ will happen next: either all of the fragments will eventually be
+ retransmitted (as TCP will do), in which case the same problem will
+ recur, or the sender will not realize that its packet has been dropped
+ and data will simply be lost (as some UDP-based protocols will do).
+ Either way, it is possible that no forward progress will ever occur.
+ </p>
+ <column name="ingress_policing_rate">
+ <p>
+ Maximum rate for data received on this interface, in kbps. Data
+ received faster than this rate is dropped. Set to <code>0</code>
+ (the default) to disable policing.
+ </p>
+ </column>
+
<column name="ingress_policing_burst">
<p>Maximum burst size for data received on this interface, in kb. The
default burst size if set to <code>0</code> is 1000 kb. This value
has no effect if <ref column="ingress_policing_rate"/>
is <code>0</code>.</p>
- <p>The burst size should be at least the size of the interface's
- MTU.</p>
- </column>
-
- <column name="ingress_policing_rate">
- <p>Maximum rate for data received on this interface, in kbps. Data
- received faster than this rate is dropped. Set to <code>0</code> to
- disable policing.</p>
- <p>The meaning of ``ingress'' is from Open vSwitch's perspective. If
- configured on a physical interface, then it limits the rate at which
- traffic is allowed into the system from the outside. If configured
- on a virtual interface that is connected to a virtual machine, then
- it limits the rate at which the guest is able to transmit.</p>
+ <p>
+ Specifying a larger burst size lets the algorithm be more forgiving,
+ which is important for protocols like TCP that react severely to
+ dropped packets. The burst size should be at least the size of the
+ interface's MTU. Specifying a value that is numerically at least as
+ large as 10% of <ref column="ingress_policing_rate"/> helps TCP come
+ closer to achieving the full rate.
+ </p>
</column>
</group>
@@ -665,8 +856,15 @@
integrators should either use the Open vSwitch development
mailing list to coordinate on common key-value definitions, or
choose key names that are likely to be unique. The currently
- defined common key-value pair is:
+ defined common key-value pairs are:
<dl>
+ <dt><code>attached-mac</code></dt>
+ <dd>
+ The MAC address programmed into the ``virtual hardware'' for this
+ interface, in the form
+ <var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>.
+ For Citrix XenServer, this is the value of the <code>MAC</code>
+ field in the VIF record for this interface.</dd>
<dt><code>iface-id</code></dt>
<dd>A system-unique identifier for the interface. On XenServer,
this will commonly be the same as <code>xs-vif-uuid</code>.</dd>
@@ -689,12 +887,27 @@
<dd>The virtual network to which this interface is attached.</dd>
<dt><code>xs-vm-uuid</code></dt>
<dd>The VM to which this interface belongs.</dd>
- <dt><code>xs-vif-mac</code></dt>
- <dd>The MAC address programmed into the "virtual hardware" for this
- interface, in the
- form <var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>.
- For Citrix XenServer, this is the value of the <code>MAC</code>
- field in the VIF record for this interface.</dd>
+ </dl>
+ </column>
+
+ <column name="other_config">
+ Key-value pairs for rarely used interface features. Currently,
+ the only keys are for configuring GRE-over-IPsec, which is only
+ available through the <code>openvswitch-ipsec</code> package for
+ Debian. The currently defined key-value pairs are:
+ <dl>
+ <dt><code>ipsec_local_ip</code></dt>
+ <dd>Required key for GRE-over-IPsec interfaces. Additionally,
+ the <ref column="type"/> must be <code>gre</code> and the
+ <code>ipsec_psk</code> <ref column="other_config"/> key must
+ be set. The <code>in_key</code>, <code>out_key</code>, and
+ <code>key</code> <ref column="options"/> must not be
+ set.</dd>
+ <dt><code>ipsec_psk</code></dt>
+ <dd>Required key for GRE-over-IPsec interfaces. Specifies a
+ pre-shared key for authentication that must be identical on
+ both sides of the tunnel. Additionally, the
+ <code>ipsec_local_ip</code> key must also be set.</dd>
</dl>
</column>
@@ -774,7 +987,12 @@
defined types are listed below:</p>
<dl>
<dt><code>linux-htb</code></dt>
- <dd>Linux ``hierarchy token bucket'' classifier.</dd>
+ <dd>
+ Linux ``hierarchy token bucket'' classifier. See tc-htb(8) (also at
+ <code>http://linux.die.net/man/8/tc-htb</code>) and the HTB manual
+ (<code>http://luxik.cdi.cz/~devik/qos/htb/manual/userg.htm</code>)
+ for information on how this classifier works and how to configure it.
+ </dd>
</dl>
</column>
@@ -1082,34 +1300,34 @@
restricted to the specified local IP address.
</dd>
</dl>
- <p>When multiple controllers are configured for a single bridge, the
- <ref column="target"/> values must be unique. Duplicate
- <ref column="target"/> values yield unspecified results.</p>
+ <p>When multiple controllers are configured for a single bridge, the
+ <ref column="target"/> values must be unique. Duplicate
+ <ref column="target"/> values yield unspecified results.</p>
</column>
<column name="connection_mode">
- <p>If it is specified, this setting must be one of the following
- strings that describes how Open vSwitch contacts this OpenFlow
- controller over the network:</p>
-
- <dl>
- <dt><code>in-band</code></dt>
- <dd>In this mode, this controller's OpenFlow traffic travels over the
- bridge associated with the controller. With this setting, Open
- vSwitch allows traffic to and from the controller regardless of the
- contents of the OpenFlow flow table. (Otherwise, Open vSwitch
- would never be able to connect to the controller, because it did
- not have a flow to enable it.) This is the most common connection
- mode because it is not necessary to maintain two independent
- networks.</dd>
- <dt><code>out-of-band</code></dt>
- <dd>In this mode, OpenFlow traffic uses a control network separate
- from the bridge associated with this controller, that is, the
- bridge does not use any of its own network devices to communicate
- with the controller. The control network must be configured
- separately, before or after <code>ovs-vswitchd</code> is started.
- </dd>
- </dl>
+ <p>If it is specified, this setting must be one of the following
+ strings that describes how Open vSwitch contacts this OpenFlow
+ controller over the network:</p>
+
+ <dl>
+ <dt><code>in-band</code></dt>
+ <dd>In this mode, this controller's OpenFlow traffic travels over the
+ bridge associated with the controller. With this setting, Open
+ vSwitch allows traffic to and from the controller regardless of the
+ contents of the OpenFlow flow table. (Otherwise, Open vSwitch
+ would never be able to connect to the controller, because it did
+ not have a flow to enable it.) This is the most common connection
+ mode because it is not necessary to maintain two independent
+ networks.</dd>
+ <dt><code>out-of-band</code></dt>
+ <dd>In this mode, OpenFlow traffic uses a control network separate
+ from the bridge associated with this controller, that is, the
+ bridge does not use any of its own network devices to communicate
+ with the controller. The control network must be configured
+ separately, before or after <code>ovs-vswitchd</code> is started.
+ </dd>
+ </dl>
<p>If not specified, the default is implementation-specific. If
<ref column="target"/> is <code>discover</code>, the connection mode
@@ -1166,7 +1384,7 @@
<group title="Additional Discovery Configuration">
<p>These values are considered only when <ref column="target"/>
- is <code>discover</code>.</p>
+ is <code>discover</code>.</p>
<column name="discover_accept_regex">
A POSIX
@@ -1188,14 +1406,14 @@
<group title="Additional In-Band Configuration">
<p>These values are considered only in in-band control mode (see
- <ref column="connection_mode"/>) and only when <ref column="target"/>
- is not <code>discover</code>. (For controller discovery, the network
- configuration obtained via DHCP is used instead.)</p>
+ <ref column="connection_mode"/>) and only when <ref column="target"/>
+ is not <code>discover</code>. (For controller discovery, the network
+ configuration obtained via DHCP is used instead.)</p>
<p>When multiple controllers are configured on a single bridge, there
- should be only one set of unique values in these columns. If different
- values are set for these columns in different controllers, the effect
- is unspecified.</p>
+ should be only one set of unique values in these columns. If different
+ values are set for these columns in different controllers, the effect
+ is unspecified.</p>
<column name="local_ip">
The IP address to configure on the local port,
diff --git a/xenserver/GPLv2 b/xenserver/GPLv2
new file mode 100644
index 000000000..d511905c1
--- /dev/null
+++ b/xenserver/GPLv2
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/xenserver/LICENSE b/xenserver/LICENSE
index ce8949ef1..00fc4d8cb 100644
--- a/xenserver/LICENSE
+++ b/xenserver/LICENSE
@@ -1,8 +1,3 @@
-The files etc_xensource_scripts_vif and
-opt_xensource_libexec_interface-reconfigure are distributed under the
-terms of the GNU Lesser General Public License version 2.1 (included
-below).
-
As a special exception to the GNU Lesser General Public License, you
may link, statically or dynamically, a "work that uses the Library"
with a publicly distributed version of the Library to produce an
diff --git a/xenserver/README b/xenserver/README
index 9fcdb1a3d..7da3ac266 100644
--- a/xenserver/README
+++ b/xenserver/README
@@ -1,11 +1,8 @@
This directory contains files for seamless integration of Open vSwitch on
Citrix XenServer hosts managed by the Citrix management tools.
-Some of these files are modifications of Citrix's proprietary code.
-Citrix has given permission to distribute these modified files.
-Citrix has not specified a particular license for them. There is no
-guarantee that, should Citrix specify a license, that it would be
-DFSG-compliant or GPL-compatible.
+Files in this directory are licensed on a file-by-file basis. Please
+refer to each file for details.
Most of the files in this directory is installed on a XenServer system
under the same name, if underscores are replaced by slashes. The
@@ -49,7 +46,7 @@ files are:
Open vSwitch-aware replacement for Citrix script of the same name.
- usr_share_openvswitch_scripts_monitor-external-ids
+ usr_share_openvswitch_scripts_ovs-external-ids
Daemon to monitor the external_ids columns of the Bridge and
Interface OVSDB tables.
diff --git a/xenserver/automake.mk b/xenserver/automake.mk
index ffd59960b..fc23a7636 100644
--- a/xenserver/automake.mk
+++ b/xenserver/automake.mk
@@ -6,6 +6,7 @@
# without warranty of any kind.
EXTRA_DIST += \
+ xenserver/GPLv2 \
xenserver/LICENSE \
xenserver/README \
xenserver/automake.mk \
@@ -23,7 +24,7 @@ EXTRA_DIST += \
xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py \
xenserver/usr_sbin_brctl \
xenserver/usr_sbin_xen-bugtool \
- xenserver/usr_share_openvswitch_scripts_monitor-external-ids \
+ xenserver/usr_share_openvswitch_scripts_ovs-external-ids \
xenserver/usr_share_openvswitch_scripts_refresh-xs-network-uuids \
xenserver/usr_share_openvswitch_scripts_sysconfig.template \
xenserver/uuid.py
diff --git a/xenserver/etc_init.d_openvswitch b/xenserver/etc_init.d_openvswitch
index 7b86d4c66..5f18196cf 100755
--- a/xenserver/etc_init.d_openvswitch
+++ b/xenserver/etc_init.d_openvswitch
@@ -117,6 +117,12 @@ else
monitor_opt=
fi
+function hup_monitor_external_ids {
+ if [ -e /var/run/openvswitch/ovs-external-ids.pid ]; then
+ action "Configuring Open vSwitch external IDs" kill -HUP `cat /var/run/openvswitch/ovs-external-ids.pid`
+ fi
+}
+
function dp_list {
"$dpctl" show | grep '^dp[0-9]\+:' | cut -d':' -f 1
}
@@ -293,7 +299,7 @@ EOF
function set_system_ids {
if [ -f /etc/xensource-inventory ]; then
action "Configuring Open vSwitch system IDs" true
- $vsctl --no-wait set Open_vSwitch . \
+ $vsctl --no-wait --timeout=5 set Open_vSwitch . \
external-ids:system-type="$PRODUCT_BRAND" \
external-ids:system-version="$PRODUCT_VERSION-$BUILD_NUMBER" \
external-ids:system-id="$INSTALLATION_UUID" \
@@ -329,11 +335,11 @@ function start {
fi
start_ovsdb_server
- $vsctl --no-wait init
+ $vsctl --no-wait --timeout=5 init
if [ ! -e /var/run/openvswitch.booted ]; then
touch /var/run/openvswitch.booted
for bridge in $($vsctl list-br); do
- $vsctl --no-wait del-br $bridge
+ $vsctl --no-wait --timeout=5 del-br $bridge
done
fi
@@ -346,8 +352,8 @@ function start {
# Start daemon to monitor external ids
PYTHONPATH=/usr/share/openvswitch/python \
- /usr/share/openvswitch/scripts/monitor-external-ids \
- --pidfile --detach "$VSWITCHD_OVSDB_SERVER"
+ /usr/share/openvswitch/scripts/ovs-external-ids \
+ --pidfile --detach $monitor_opt "$VSWITCHD_OVSDB_SERVER"
touch /var/lock/subsys/openvswitch
}
@@ -356,9 +362,8 @@ function stop {
stop_daemon BRCOMPATD "$brcompatd"
stop_daemon VSWITCHD "$vswitchd"
stop_daemon OVSDB_SERVER "$ovsdb_server"
- if [ -e /var/run/openvswitch/monitor-external-ids.pid ]; then
- kill `cat /var/run/openvswitch/monitor-external-ids.pid`
- rm /var/run/openvswitch/monitor-external-ids.pid
+ if [ -e /var/run/openvswitch/ovs-external-ids.pid ]; then
+ kill `cat /var/run/openvswitch/ovs-external-ids.pid`
fi
rm -f /var/lock/subsys/openvswitch
}
@@ -381,8 +386,10 @@ case "$1" in
restart
;;
reload|force-reload)
- # Nothing to do--ovs-vswitchd and ovsdb-server keep their configuration
- # up-to-date all the time.
+ # Nothing to do to ovs-vswitchd and ovsdb-server as they keep their
+ # configuration up-to-date all the time. HUP ovs-external-ids so it
+ # re-runs.
+ hup_monitor_external_ids
;;
strace-vswitchd)
shift
diff --git a/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update b/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update
index 6da86d5cd..733301868 100755
--- a/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update
+++ b/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update
@@ -111,7 +111,7 @@ def setControllerCfg(controller):
'managers="ssl:' + controller + ':6632"'])
def vswitchCfgQuery(action_args):
- cmd = [vsctl, "-vANY:console:emer"] + action_args
+ cmd = [vsctl, "--timeout=5", "-vANY:console:emer"] + action_args
output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()
if len(output) == 0 or output[0] == None:
output = ""
@@ -120,14 +120,14 @@ def vswitchCfgQuery(action_args):
return output
def vswitchCfgMod(action_args):
- cmd = [vsctl, "-vANY:console:emer"] + action_args
+ cmd = [vsctl, "--timeout=5", "-vANY:console:emer"] + action_args
exitcode = subprocess.call(cmd)
if exitcode != 0:
raise XenAPIPlugin.Failure("VSWITCH_CONFIG_MOD_FAILURE",
[ str(exitcode) , str(action_args) ])
def emergency_reset(session, args):
- cmd = [vsctl, "emer-reset"]
+ cmd = [vsctl, "--timeout=5", "emer-reset"]
exitcode = subprocess.call(cmd)
if exitcode != 0:
raise XenAPIPlugin.Failure("VSWITCH_EMER_RESET_FAILURE",
diff --git a/xenserver/etc_xensource_scripts_vif b/xenserver/etc_xensource_scripts_vif
index 91b75c123..88006e2c4 100755
--- a/xenserver/etc_xensource_scripts_vif
+++ b/xenserver/etc_xensource_scripts_vif
@@ -114,7 +114,7 @@ handle_vswitch_vif_details()
local address=$(xenstore-read "/local/domain/$DOMID/device/vif/$DEVID/mac" 2>/dev/null)
if [ -n "${address}" ] ; then
- set_vif_external_id "xs-vif-mac" "${address}"
+ set_vif_external_id "attached-mac" "${address}"
fi
if $xs550; then
@@ -165,7 +165,7 @@ add_to_bridge()
local vif_details=$(handle_vswitch_vif_details $bridge)
fi
- $vsctl -- --if-exists del-port $dev -- add-port $bridge $dev $vif_details
+ $vsctl --timeout=30 -- --if-exists del-port $dev -- add-port $bridge $dev $vif_details
;;
esac
@@ -182,7 +182,7 @@ remove_from_bridge()
# If ovs-brcompatd is running, it might already have deleted the
# port. Use --if-exists to suppress the error that would otherwise
# arise in that case.
- $vsctl -- --if-exists del-port $dev
+ $vsctl --timeout=30 -- --if-exists del-port $dev
;;
esac
}
diff --git a/xenserver/openvswitch-xen.spec b/xenserver/openvswitch-xen.spec
index e420ec69c..90fee2ab0 100644
--- a/xenserver/openvswitch-xen.spec
+++ b/xenserver/openvswitch-xen.spec
@@ -73,8 +73,8 @@ install -m 644 xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py \
$RPM_BUILD_ROOT/usr/share/openvswitch/scripts/InterfaceReconfigureVswitch.py
install -m 755 xenserver/etc_xensource_scripts_vif \
$RPM_BUILD_ROOT/usr/share/openvswitch/scripts/vif
-install -m 755 xenserver/usr_share_openvswitch_scripts_monitor-external-ids \
- $RPM_BUILD_ROOT/usr/share/openvswitch/scripts/monitor-external-ids
+install -m 755 xenserver/usr_share_openvswitch_scripts_ovs-external-ids \
+ $RPM_BUILD_ROOT/usr/share/openvswitch/scripts/ovs-external-ids
install -m 755 xenserver/usr_share_openvswitch_scripts_refresh-xs-network-uuids \
$RPM_BUILD_ROOT/usr/share/openvswitch/scripts/refresh-xs-network-uuids
install -m 755 xenserver/usr_sbin_xen-bugtool \
@@ -392,7 +392,7 @@ fi
/usr/share/openvswitch/python/ovs/timeval.py
/usr/share/openvswitch/python/ovs/util.py
/usr/share/openvswitch/python/uuid.py
-/usr/share/openvswitch/scripts/monitor-external-ids
+/usr/share/openvswitch/scripts/ovs-external-ids
/usr/share/openvswitch/scripts/refresh-xs-network-uuids
/usr/share/openvswitch/scripts/interface-reconfigure
/usr/share/openvswitch/scripts/InterfaceReconfigure.py
diff --git a/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py b/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py
index bc311f803..c352594ac 100644
--- a/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py
+++ b/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py
@@ -342,7 +342,12 @@ def set_br_external_ids(pif):
# log("Network PIF %s not currently attached (%s)" % (rec['uuid'],pifrec['uuid']))
# continue
nwrec = db().get_network_record(rec['network'])
- xs_network_uuids += [nwrec['uuid']]
+
+ uuid = nwrec['uuid']
+ if pif_is_vlan(nwpif):
+ xs_network_uuids.append(uuid)
+ else:
+ xs_network_uuids.insert(0, uuid)
vsctl_argv = []
vsctl_argv += ['# configure xs-network-uuids']
diff --git a/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py b/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py
index 6ee4138b2..015f4cc00 100644
--- a/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py
+++ b/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py
@@ -1,11 +1,18 @@
-# Copyright (c) Citrix Systems 2008. All rights reserved.
-# xsconsole is proprietary software.
+# Copyright (c) 2007-2010 Citrix Systems Inc.
+# Copyright (c) 2009,2010 Nicira Networks.
#
-# Xen, the Xen logo, XenCenter, XenMotion are trademarks or registered
-# trademarks of Citrix Systems, Inc., in the United States and other
-# countries.
-
-# Copyright (c) 2009, 2010 Nicira Networks.
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 only.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
from XSConsoleLog import *
@@ -79,7 +86,7 @@ class VSwitchConfig:
@staticmethod
def Get(action):
try:
- arg = [vsctl, "-vANY:console:emer"] + action.split()
+ arg = [vsctl, "--timeout=30", "-vANY:console:emer"] + action.split()
output = ShellPipe(arg).Stdout()
except StandardError, e:
XSLogError("config retrieval error: " + str(e))
diff --git a/xenserver/usr_sbin_brctl b/xenserver/usr_sbin_brctl
index 7fecc5648..5cf0b88ac 100755
--- a/xenserver/usr_sbin_brctl
+++ b/xenserver/usr_sbin_brctl
@@ -35,7 +35,7 @@ def delegate():
def call_vsctl(cmd, arg=""):
database = '--db=' + OVSDB_SERVER
- command = [VSCTL, database, cmd]
+ command = [VSCTL, '--timeout=30', database, cmd]
if (arg):
command.append(arg)
return subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0].split()
diff --git a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids b/xenserver/usr_share_openvswitch_scripts_ovs-external-ids
index c87171f06..fc27aaae1 100755
--- a/xenserver/usr_share_openvswitch_scripts_monitor-external-ids
+++ b/xenserver/usr_share_openvswitch_scripts_ovs-external-ids
@@ -21,9 +21,12 @@
# Bridge table and duplicates its value to the preferred "xs-network-uuids".
import getopt
+import logging, logging.handlers
+import os
+import signal
import subprocess
import sys
-import syslog
+import time
import XenAPI
@@ -33,8 +36,17 @@ import ovs.util
import ovs.daemon
import ovs.db.idl
+s_log = logging.getLogger("ovs-external-ids")
+l_handler = logging.handlers.SysLogHandler(
+ "/dev/log",
+ facility=logging.handlers.SysLogHandler.LOG_DAEMON)
+l_formatter = logging.Formatter('%(filename)s: %(levelname)s: %(message)s')
+l_handler.setFormatter(l_formatter)
+s_log.addHandler(l_handler)
+
vsctl="/usr/bin/ovs-vsctl"
session = None
+force_run = False
# Set up a session to interact with XAPI.
#
@@ -51,18 +63,19 @@ def init_session():
session.xenapi.login_with_password("", "")
except:
session = None
- syslog.syslog(syslog.LOG_WARNING,
- "monitor-external-ids: Couldn't login to XAPI")
+ s_log.warning("Couldn't login to XAPI")
return False
return True
-# By default, the "bridge-id" external id in the Bridge table is the
+# By default, the "bridge-id" external id in the Bridge table is the
# same as "xs-network-uuids". This may be overridden by defining a
# "nicira-bridge-id" key in the "other_config" field of the network
# record of XAPI.
def get_bridge_id(br_name, default=None):
if not init_session():
+ s_log.warning("Failed to get bridge id %s because"
+ " XAPI session could not be initialized" % br_name)
return default
for n in session.xenapi.network.get_all():
@@ -71,7 +84,7 @@ def get_bridge_id(br_name, default=None):
continue
return rec['other_config'].get('nicira-bridge-id', default)
-# By default, the "iface-id" external id in the Interface table is the
+# By default, the "iface-id" external id in the Interface table is the
# same as "xs-vif-uuid". This may be overridden by defining a
# "nicira-iface-id" key in the "other_config" field of the VIF
# record of XAPI.
@@ -82,6 +95,8 @@ def get_iface_id(if_name, default=None):
domain,device = if_name.strip("vif").split(".")
if not init_session():
+ s_log.warning("Failed to get interface id %s because"
+ " XAPI session could not be initialized" % if_name)
return default
for n in session.xenapi.VM.get_all():
@@ -96,24 +111,29 @@ def get_iface_id(if_name, default=None):
def set_external_id(table, record, key, value):
col = 'external-ids:"' + key + '"="' + value + '"'
- cmd = [vsctl, "-vANY:console:emer", "set", table, record, col]
+ cmd = [vsctl, "--timeout=30", "-vANY:console:emer", "set", table, record, col]
exitcode = subprocess.call(cmd)
if exitcode != 0:
- syslog.syslog(syslog.LOG_WARNING,
- "monitor-external-ids: Couldn't call ovs-vsctl")
+ s_log.warning("Couldn't call ovs-vsctl")
# XAPI on XenServer 5.6 uses the external-id "network-uuids" for internal
-# networks, but we now prefer "xs-network-uuids". Look for its use and
+# networks, but we now prefer "xs-network-uuids". Look for its use and
# write our preferred external-id.
def update_network_uuids(name, ids):
if ids["network-uuids"] and not ids["xs-network-uuids"]:
- set_external_id("Bridge", name, "xs-network-uuids",
+ set_external_id("Bridge", name, "xs-network-uuids",
ids["network-uuids"])
def update_bridge_id(name, ids):
id = get_bridge_id(name, ids.get("xs-network-uuids"))
- if ids.get("bridge-id") != id and id:
- set_external_id("Bridge", name, "bridge-id", id)
+
+ if not id:
+ return
+
+ primary_id = id.split(";")[0]
+
+ if ids.get("bridge-id") != primary_id:
+ set_external_id("Bridge", name, "bridge-id", primary_id)
def update_iface_id(name, ids):
id = get_iface_id(name, ids.get("xs-vif-uuid"))
@@ -140,13 +160,13 @@ def keep_table_columns(schema, table_name, column_types):
new_columns[column_name] = column
table.columns = new_columns
return table
-
+
def monitor_uuid_schema_cb(schema):
string_type = types.Type(types.BaseType(types.StringType))
string_map_type = types.Type(types.BaseType(types.StringType),
types.BaseType(types.StringType),
0, sys.maxint)
-
+
new_tables = {}
for table_name in ("Bridge", "Interface"):
new_tables[table_name] = keep_table_columns(
@@ -161,15 +181,22 @@ def usage():
print "Other options:"
print " -h, --help display this help message"
sys.exit(0)
-
+
+def handler(signum, frame):
+ global force_run
+ if (signum == signal.SIGHUP):
+ force_run = True
+
def main(argv):
+ global force_run
+
try:
options, args = getopt.gnu_getopt(
argv[1:], 'h', ['help'] + ovs.daemon.LONG_OPTIONS)
except getopt.GetoptError, geo:
sys.stderr.write("%s: %s\n" % (ovs.util.PROGRAM_NAME, geo.msg))
sys.exit(1)
-
+
for key, value in options:
if key in ['-h', '--help']:
usage()
@@ -177,28 +204,41 @@ def main(argv):
sys.stderr.write("%s: unhandled option %s\n"
% (ovs.util.PROGRAM_NAME, key))
sys.exit(1)
-
+
if len(args) != 1:
sys.stderr.write("%s: exactly one nonoption argument is required "
"(use --help for help)\n" % ovs.util.PROGRAM_NAME)
sys.exit(1)
ovs.daemon.die_if_already_running()
-
+
remote = args[0]
idl = ovs.db.idl.Idl(remote, "Open_vSwitch", monitor_uuid_schema_cb)
ovs.daemon.daemonize()
-
+
+ # This daemon is usually started before XAPI, but to complete our
+ # tasks, we need it. Wait here until it's up.
+ while not os.path.exists("/var/run/xapi_init_complete.cookie"):
+ time.sleep(1)
+
+ signal.signal(signal.SIGHUP, handler)
+
bridges = {}
interfaces = {}
while True:
- if not idl.run():
+ if not force_run and not idl.run():
poller = ovs.poller.Poller()
idl.wait(poller)
poller.block()
continue
-
+
+ if force_run:
+ s_log.info("Forced to re-run as the result of a SIGHUP")
+ bridges = {}
+ interfaces = {}
+ force_run = False
+
new_bridges = {}
for rec in idl.data["Bridge"].itervalues():
name = rec.name.as_scalar()
@@ -206,13 +246,13 @@ def main(argv):
network_uuids = rec.external_ids.get("network-uuids")
new_bridges[name] = {"xs-network-uuids": xs_network_uuids,
"network-uuids": network_uuids}
-
+
new_interfaces = {}
for rec in idl.data["Interface"].itervalues():
name = rec.name.as_scalar()
xs_vif_uuid = rec.external_ids.get("xs-vif-uuid")
new_interfaces[name] = {"xs-vif-uuid": xs_vif_uuid}
-
+
if bridges != new_bridges:
for name,ids in new_bridges.items():
# Network uuids shouldn't change in the life of a bridge,
@@ -220,18 +260,23 @@ def main(argv):
if name not in bridges:
update_network_uuids(name, ids)
- update_bridge_id(name, ids)
+ if (name not in bridges) or (bridges[name] != ids):
+ update_bridge_id(name, ids)
bridges = new_bridges
if interfaces != new_interfaces:
for name,ids in new_interfaces.items():
- update_iface_id(name, ids)
+ if (name not in interfaces) or (interfaces[name] != ids):
+ update_iface_id(name, ids)
interfaces = new_interfaces
-
+
if __name__ == '__main__':
try:
main(sys.argv)
- except error.Error, e:
- sys.stderr.write("%s\n" % e)
- sys.exit(1)
+ except SystemExit:
+ # Let system.exit() calls complete normally
+ raise
+ except:
+ s_log.exception("traceback")
+ sys.exit(ovs.daemon.RESTART_EXIT_CODE)
diff --git a/xenserver/usr_share_openvswitch_scripts_refresh-xs-network-uuids b/xenserver/usr_share_openvswitch_scripts_refresh-xs-network-uuids
index 42ebe06e7..35df06da4 100755
--- a/xenserver/usr_share_openvswitch_scripts_refresh-xs-network-uuids
+++ b/xenserver/usr_share_openvswitch_scripts_refresh-xs-network-uuids
@@ -1,2 +1,9 @@
#! /bin/sh
+# Copyright (C) 2009, 2010 Nicira Networks, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved. This file is offered as-is,
+# without warranty of any kind.
+
exec /opt/xensource/libexec/interface-reconfigure rewrite
diff --git a/xenserver/uuid.py b/xenserver/uuid.py
index ae3da25ca..599ece9ea 100644
--- a/xenserver/uuid.py
+++ b/xenserver/uuid.py
@@ -1,3 +1,56 @@
+# This file is from Python 2.5. It has been modified by adding this
+# license header, which is copied from the LICENSE file distributed
+# with Python.
+#
+# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
+# --------------------------------------------
+#
+# 1. This LICENSE AGREEMENT is between the Python Software Foundation
+# ("PSF"), and the Individual or Organization ("Licensee") accessing and
+# otherwise using this software ("Python") in source or binary form and
+# its associated documentation.
+#
+# 2. Subject to the terms and conditions of this License Agreement, PSF
+# hereby grants Licensee a nonexclusive, royalty-free, world-wide
+# license to reproduce, analyze, test, perform and/or display publicly,
+# prepare derivative works, distribute, and otherwise use Python
+# alone or in any derivative version, provided, however, that PSF's
+# License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
+# 2001, 2002, 2003, 2004, 2005, 2006, 2007 Python Software Foundation;
+# All Rights Reserved" are retained in Python alone or in any derivative
+# version prepared by Licensee.
+#
+# 3. In the event Licensee prepares a derivative work that is based on
+# or incorporates Python or any part thereof, and wants to make
+# the derivative work available to others as provided herein, then
+# Licensee hereby agrees to include in any such work a brief summary of
+# the changes made to Python.
+#
+# 4. PSF is making Python available to Licensee on an "AS IS"
+# basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+# IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
+# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
+# INFRINGE ANY THIRD PARTY RIGHTS.
+#
+# 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+# FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+# A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
+# OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+#
+# 6. This License Agreement will automatically terminate upon a material
+# breach of its terms and conditions.
+#
+# 7. Nothing in this License Agreement shall be deemed to create any
+# relationship of agency, partnership, or joint venture between PSF and
+# Licensee. This License Agreement does not grant permission to use PSF
+# trademarks or trade name in a trademark sense to endorse or promote
+# products or services of Licensee, or any third party.
+#
+# 8. By copying, installing or otherwise using Python, Licensee
+# agrees to be bound by the terms and conditions of this License
+# Agreement.
+
r"""UUID objects (universally unique identifiers) according to RFC 4122.
This module provides immutable UUID objects (class UUID) and the functions