summaryrefslogtreecommitdiff
path: root/ofproto/ofproto-dpif-monitor.c
diff options
context:
space:
mode:
authorAlex Wang <alexw@nicira.com>2014-04-03 18:31:13 -0700
committerAlex Wang <alexw@nicira.com>2014-05-02 12:49:37 -0700
commit6d308b28c023e864925b1a6775b3b91d215e89bf (patch)
treeaee2b450c503bc2d48654b14329e707bbdb7843c /ofproto/ofproto-dpif-monitor.c
parent73a9f3418e5ad987a1e2c52c63af1ad511ecc1cb (diff)
downloadopenvswitch-6d308b28c023e864925b1a6775b3b91d215e89bf.tar.gz
ofproto-dpif-monitor: Fix deadlock.
Commit 6b59b543 (ovs-thread: Use fair (but nonrecursive) rwlocks on glibc.) changed the rwlocks to nonrecursive, writer-biased lock. It also made the following deadlock possible. Assume BFD is used on both end of a link. Consider the following events: 1. Handler at one end received the BFD control packet with POLL flag set while holding the read lock of 'xlate_rwlock'. Since a BFD control packet with FINAL flag set should be sent back immediately, it calls the ofproto_dpif_monitor_port_send_soon(), in which, it tries to grab the 'monitor_mutex'. 2. The main thread needs to configure the ofproto-dpif-xlate module. It tries to grab the write lock of 'xlate_rwlock' and is blocked by event 1. 3. The monitor thread, after acquired the 'monitor_mutex', wants to acquire the read lock of 'xlate_rwlock'. Since the rwlock is now writer-biased, the attempt of acquiring read lock in event 3 will be blocked by event 2. This will subsequently cause the block of event 1, since monitor thread is holding the 'monitor_mutex'. So the deadlock happens. This commit resolves the above issue by removing the requirement of acquiring 'monitor_mutex' in ofproto_dpif_monitor_port_send_soon(). Signed-off-by: Alex Wang <alexw@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
Diffstat (limited to 'ofproto/ofproto-dpif-monitor.c')
-rw-r--r--ofproto/ofproto-dpif-monitor.c135
1 files changed, 91 insertions, 44 deletions
diff --git a/ofproto/ofproto-dpif-monitor.c b/ofproto/ofproto-dpif-monitor.c
index 7c84e4b3a..764c7b199 100644
--- a/ofproto/ofproto-dpif-monitor.c
+++ b/ofproto/ofproto-dpif-monitor.c
@@ -21,6 +21,7 @@
#include "bfd.h"
#include "cfm.h"
+#include "guarded-list.h"
#include "hash.h"
#include "heap.h"
#include "hmap.h"
@@ -52,12 +53,24 @@ struct mport {
uint8_t hw_addr[OFP_ETH_ALEN]; /* Hardware address. */
};
+/* Entry of the 'send_soon' list. Contains the pointer to the
+ * 'ofport_dpif'. Note, the pointed object is not protected, so
+ * users should always use the mport_find() to convert it to 'mport'. */
+struct send_soon_entry {
+ struct list list_node; /* In send_soon. */
+ const struct ofport_dpif *ofport;
+};
+
/* hmap that contains "struct mport"s. */
static struct hmap monitor_hmap = HMAP_INITIALIZER(&monitor_hmap);
/* heap for ordering mport based on bfd/cfm wakeup time. */
static struct heap monitor_heap;
+/* guarded-list for storing the mports that need to send bfd/cfm control
+ * packet soon. */
+static struct guarded_list send_soon = GUARDED_LIST_INITIALIZER(&send_soon);
+
/* The monitor thread id. */
static pthread_t monitor_tid;
/* True if the monitor thread is running. */
@@ -67,7 +80,9 @@ static struct latch monitor_exit_latch;
static struct ovs_mutex monitor_mutex = OVS_MUTEX_INITIALIZER;
static void *monitor_main(void *);
+static void monitor_check_send_soon(struct ofpbuf *);
static void monitor_run(void);
+static void monitor_mport_run(struct mport *, struct ofpbuf *);
static void mport_register(const struct ofport_dpif *, struct bfd *,
struct cfm *, uint8_t[ETH_ADDR_LEN])
@@ -172,9 +187,8 @@ monitor_main(void * args OVS_UNUSED)
* reconfigured monitoring ports are run in a timely manner. */
#define MONITOR_INTERVAL_MSEC 100
-/* Checks the sending of control packets on mports that have timed out.
- * Sends the control packets if needed. Executes bfd and cfm periodic
- * functions (run, wait) on those mports. */
+/* Checks the 'send_soon' list and the heap for mports that have timed
+ * out bfd/cfm sessions. */
static void
monitor_run(void)
{
@@ -184,39 +198,26 @@ monitor_run(void)
ofpbuf_use_stub(&packet, stub, sizeof stub);
ovs_mutex_lock(&monitor_mutex);
+
+ /* The monitor_check_send_soon() needs to be run twice. The first
+ * time is for preventing the same 'mport' from being processed twice
+ * (i.e. once from heap, the other from the 'send_soon' array).
+ * The second run is to cover the case when the control packet is sent
+ * via patch port and the other end needs to send back immediately. */
+ monitor_check_send_soon(&packet);
+
prio_now = MSEC_TO_PRIO(time_msec());
/* Peeks the top of heap and checks if we should run this mport. */
while (!heap_is_empty(&monitor_heap)
&& heap_max(&monitor_heap)->priority >= prio_now) {
- long long int next_wake_time;
struct mport *mport;
mport = CONTAINER_OF(heap_max(&monitor_heap), struct mport, heap_node);
- if (mport->cfm && cfm_should_send_ccm(mport->cfm)) {
- ofpbuf_clear(&packet);
- cfm_compose_ccm(mport->cfm, &packet, mport->hw_addr);
- ofproto_dpif_send_packet(mport->ofport, &packet);
- }
- if (mport->bfd && bfd_should_send_packet(mport->bfd)) {
- ofpbuf_clear(&packet);
- bfd_put_packet(mport->bfd, &packet, mport->hw_addr);
- ofproto_dpif_send_packet(mport->ofport, &packet);
- }
- if (mport->cfm) {
- cfm_run(mport->cfm);
- cfm_wait(mport->cfm);
- }
- if (mport->bfd) {
- bfd_run(mport->bfd);
- bfd_wait(mport->bfd);
- }
- /* Computes the next wakeup time for this mport. */
- next_wake_time = MIN(bfd_wake_time(mport->bfd),
- cfm_wake_time(mport->cfm));
- heap_change(&monitor_heap, &mport->heap_node,
- MSEC_TO_PRIO(next_wake_time));
+ monitor_mport_run(mport, &packet);
}
+ monitor_check_send_soon(&packet);
+
/* Waits on the earliest next wakeup time. */
if (!heap_is_empty(&monitor_heap)) {
long long int next_timeout, next_mport_wakeup;
@@ -228,6 +229,61 @@ monitor_run(void)
ovs_mutex_unlock(&monitor_mutex);
ofpbuf_uninit(&packet);
}
+
+/* Checks the 'send_soon' list for any mport that needs to send cfm/bfd
+ * control packet immediately, and calls monitor_mport_run(). */
+static void
+monitor_check_send_soon(struct ofpbuf *packet)
+ OVS_REQUIRES(monitor_mutex)
+{
+ while (!guarded_list_is_empty(&send_soon)) {
+ struct send_soon_entry *entry;
+ struct mport *mport;
+
+ entry = CONTAINER_OF(guarded_list_pop_front(&send_soon),
+ struct send_soon_entry, list_node);
+ mport = mport_find(entry->ofport);
+ if (mport) {
+ monitor_mport_run(mport, packet);
+ }
+ free(entry);
+ }
+}
+
+/* Checks the sending of control packet on 'mport'. Sends the control
+ * packet if needed. Executes bfd and cfm periodic functions (run, wait)
+ * on 'mport'. And changes the location of 'mport' in heap based on next
+ * timeout. */
+static void
+monitor_mport_run(struct mport *mport, struct ofpbuf *packet)
+ OVS_REQUIRES(monitor_mutex)
+{
+ long long int next_wake_time;
+
+ if (mport->cfm && cfm_should_send_ccm(mport->cfm)) {
+ ofpbuf_clear(packet);
+ cfm_compose_ccm(mport->cfm, packet, mport->hw_addr);
+ ofproto_dpif_send_packet(mport->ofport, packet);
+ }
+ if (mport->bfd && bfd_should_send_packet(mport->bfd)) {
+ ofpbuf_clear(packet);
+ bfd_put_packet(mport->bfd, packet, mport->hw_addr);
+ ofproto_dpif_send_packet(mport->ofport, packet);
+ }
+ if (mport->cfm) {
+ cfm_run(mport->cfm);
+ cfm_wait(mport->cfm);
+ }
+ if (mport->bfd) {
+ bfd_run(mport->bfd);
+ bfd_wait(mport->bfd);
+ }
+ /* Computes the next wakeup time for this mport. */
+ next_wake_time = MIN(bfd_wake_time(mport->bfd),
+ cfm_wake_time(mport->cfm));
+ heap_change(&monitor_heap, &mport->heap_node,
+ MSEC_TO_PRIO(next_wake_time));
+}
/* Creates the mport in monitor module if either bfd or cfm
@@ -262,25 +318,16 @@ ofproto_dpif_monitor_port_update(const struct ofport_dpif *ofport,
}
}
-/* Moves the mport on top of the heap. This is necessary when
- * for example, bfd POLL is received and the mport should
- * immediately send FINAL back. */
-void
-ofproto_dpif_monitor_port_send_soon_safe(const struct ofport_dpif *ofport)
-{
- ovs_mutex_lock(&monitor_mutex);
- ofproto_dpif_monitor_port_send_soon(ofport);
- ovs_mutex_unlock(&monitor_mutex);
-}
-
+/* Registers the 'ofport' in the 'send_soon' list. We cannot directly
+ * insert the corresponding mport to the 'send_soon' list, since the
+ * 'send_soon' list is not updated when the mport is removed.
+ *
+ * Reader of the 'send_soon' list is responsible for freeing the entry. */
void
ofproto_dpif_monitor_port_send_soon(const struct ofport_dpif *ofport)
- OVS_REQUIRES(monitor_mutex)
{
- struct mport *mport;
+ struct send_soon_entry *entry = xzalloc(sizeof *entry);
+ entry->ofport = ofport;
- mport = mport_find(ofport);
- if (mport) {
- heap_change(&monitor_heap, &mport->heap_node, LLONG_MAX);
- }
+ guarded_list_push_back(&send_soon, &entry->list_node, SIZE_MAX);
}