summaryrefslogtreecommitdiff
path: root/lib/ovs-rcu.h
blob: a1c15c1266e8010b941000e4f11cd69b3cf783bb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
/*
 * Copyright (c) 2014, 2015, 2016 Nicira, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef OVS_RCU_H
#define OVS_RCU_H 1

/* Read-Copy-Update (RCU)
 * ======================
 *
 * Introduction
 * ------------
 *
 * Atomic pointer access makes it pretty easy to implement lock-free
 * algorithms.  There is one big problem, though: when a writer updates a
 * pointer to point to a new data structure, some thread might be reading the
 * old version, and there's no convenient way to free the old version when all
 * threads are done with the old version.
 *
 * The function ovsrcu_postpone() solves that problem.  The function pointer
 * passed in as its argument is called only after all threads are done with old
 * versions of data structures.  The function callback frees an old version of
 * data no longer in use.  This technique is called "read-copy-update", or RCU
 * for short.
 *
 *
 * Details
 * -------
 *
 * A "quiescent state" is a time at which a thread holds no pointers to memory
 * that is managed by RCU; that is, when the thread is known not to reference
 * memory that might be an old version of some object freed via RCU.  For
 * example, poll_block() includes a quiescent state.
 *
 * The following functions manage the recognition of quiescent states:
 *
 *     void ovsrcu_quiesce(void)
 *
 *         Recognizes a momentary quiescent state in the current thread.
 *
 *     void ovsrcu_quiesce_start(void)
 *     void ovsrcu_quiesce_end(void)
 *
 *         Brackets a time period during which the current thread is quiescent.
 *
 * A newly created thread is initially active, not quiescent. When a process
 * becomes multithreaded, the main thread becomes active, not quiescent.
 *
 * When a quiescient state has occurred in every thread, we say that a "grace
 * period" has occurred.  Following a grace period, all of the callbacks
 * postponed before the start of the grace period MAY be invoked.  OVS takes
 * care of this automatically through the RCU mechanism: while a process still
 * has only a single thread, it invokes the postponed callbacks directly from
 * ovsrcu_quiesce() and ovsrcu_quiesce_start(); after additional threads have
 * been created, it creates an extra helper thread to invoke callbacks.
 *
 * Please note that while a postponed function call is guaranteed to happen
 * after the next time all participating threads have quiesced at least once,
 * there is no quarantee that all postponed functions are called as early as
 * possible, or that the functions postponed by different threads would be
 * called in the order the registrations took place.  In particular, even if
 * two threads provably postpone a function each in a specific order, the
 * postponed functions may still be called in the opposite order, depending on
 * the timing of when the threads call ovsrcu_quiesce(), how many functions
 * they postpone, and when the ovs-rcu thread happens to grab the functions to
 * be called.
 *
 * All functions postponed by a single thread are guaranteed to execute in the
 * order they were postponed, however.
 *
 * Usage
 * -----
 *
 * Use OVSRCU_TYPE(TYPE) to declare a pointer to RCU-protected data, e.g. the
 * following declares an RCU-protected "struct flow *" named flowp:
 *
 *     OVSRCU_TYPE(struct flow *) flowp;
 *
 * Use ovsrcu_get(TYPE, VAR) to read an RCU-protected pointer, e.g. to read the
 * pointer variable declared above:
 *
 *     struct flow *flow = ovsrcu_get(struct flow *, &flowp);
 *
 * If the pointer variable is currently protected against change (because
 * the current thread holds a mutex that protects it), ovsrcu_get_protected()
 * may be used instead.  Only on the Alpha architecture is this likely to
 * generate different code, but it may be useful documentation.
 *
 * (With GNU C or Clang, you get a compiler error if TYPE is wrong; other
 * compilers will merrily carry along accepting the wrong type.)
 *
 * Use ovsrcu_set() to write an RCU-protected pointer and ovsrcu_postpone() to
 * free the previous data.  ovsrcu_set_hidden() can be used on RCU protected
 * data not visible to any readers yet, but will be made visible by a later
 * ovsrcu_set().   ovsrcu_init() can be used to initialize RCU pointers when
 * no readers are yet executing.  If more than one thread can write the
 * pointer, then some form of external synchronization, e.g. a mutex, is
 * needed to prevent writers from interfering with one another.  For example,
 * to write the pointer variable declared above while safely freeing the old
 * value:
 *
 *     static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
 *
 *     OVSRCU_TYPE(struct flow *) flowp;
 *
 *     void
 *     change_flow(struct flow *new_flow)
 *     {
 *         ovs_mutex_lock(&mutex);
 *         ovsrcu_postpone(free,
 *                         ovsrcu_get_protected(struct flow *, &flowp));
 *         ovsrcu_set(&flowp, new_flow);
 *         ovs_mutex_unlock(&mutex);
 *     }
 *
 * In some rare cases an object may not be addressable with a pointer, but only
 * through an array index (e.g. because it's provided by another library).  It
 * is still possible to have RCU semantics by using the ovsrcu_index type.
 *
 *     static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
 *
 *     ovsrcu_index port_id;
 *
 *     void tx()
 *     {
 *         int id = ovsrcu_index_get(&port_id);
 *         if (id == -1) {
 *             return;
 *         }
 *         port_tx(id);
 *     }
 *
 *     void delete()
 *     {
 *         int id;
 *
 *         ovs_mutex_lock(&mutex);
 *         id = ovsrcu_index_get_protected(&port_id);
 *         ovsrcu_index_set(&port_id, -1);
 *         ovs_mutex_unlock(&mutex);
 *
 *         ovsrcu_synchronize();
 *         port_delete(id);
 *     }
 *
 * Use ovsrcu_barrier() to wait for all the outstanding RCU callbacks to
 * finish. This is useful when you have to destroy some resources however
 * these resources are referenced in the outstanding RCU callbacks.
 *
 *     void rcu_cb(void *A) {
 *         do_something(A);
 *     }
 *
 *     void destroy_A() {
 *         ovsrcu_postpone(rcu_cb, A); // will use A later
 *         ovsrcu_barrier(); // wait for rcu_cb done
 *         do_destroy_A(); // free A
 *     }
 */

#include "compiler.h"
#include "ovs-atomic.h"

#if __GNUC__
#define OVSRCU_TYPE(TYPE) struct { ATOMIC(TYPE) p; }
#define OVSRCU_INITIALIZER(VALUE) { VALUE }
#define ovsrcu_get__(TYPE, VAR, ORDER)                                  \
    ({                                                                  \
        TYPE value__;                                                   \
        typeof(VAR) ovsrcu_var = (VAR);                                 \
                                                                        \
        atomic_read_explicit(CONST_CAST(ATOMIC(TYPE) *, &ovsrcu_var->p), \
                             &value__, ORDER);                          \
                                                                        \
        value__;                                                        \
    })
#define ovsrcu_get(TYPE, VAR) \
    ovsrcu_get__(TYPE, VAR, memory_order_consume)
#define ovsrcu_get_protected(TYPE, VAR) \
    ovsrcu_get__(TYPE, VAR, memory_order_relaxed)

/* 'VALUE' may be an atomic operation, which must be evaluated before
 * any of the body of the atomic_store_explicit.  Since the type of
 * 'VAR' is not fixed, we cannot use an inline function to get
 * function semantics for this. */
#define ovsrcu_set__(VAR, VALUE, ORDER)                                 \
    ({                                                                  \
        typeof(VAR) ovsrcu_var = (VAR);                                 \
        typeof(VALUE) ovsrcu_value = (VALUE);                           \
        memory_order ovsrcu_order = (ORDER);                            \
                                                                        \
        atomic_store_explicit(&ovsrcu_var->p, ovsrcu_value, ovsrcu_order); \
        (void *) 0;                                                     \
    })
#else  /* not GNU C */
struct ovsrcu_pointer { ATOMIC(void *) p; };
#define OVSRCU_TYPE(TYPE) struct ovsrcu_pointer
#define OVSRCU_INITIALIZER(VALUE) { VALUE }
static inline void *
ovsrcu_get__(const struct ovsrcu_pointer *pointer, memory_order order)
{
    void *value;
    atomic_read_explicit(&CONST_CAST(struct ovsrcu_pointer *, pointer)->p,
                         &value, order);
    return value;
}
#define ovsrcu_get(TYPE, VAR) \
    CONST_CAST(TYPE, ovsrcu_get__(VAR, memory_order_consume))
#define ovsrcu_get_protected(TYPE, VAR) \
    CONST_CAST(TYPE, ovsrcu_get__(VAR, memory_order_relaxed))

static inline void ovsrcu_set__(struct ovsrcu_pointer *pointer,
                                const void *value,
                                memory_order order)
{
    atomic_store_explicit(&pointer->p, CONST_CAST(void *, value), order);
}
#endif

/* Writes VALUE to the RCU-protected pointer whose address is VAR.
 *
 * Users require external synchronization (e.g. a mutex).  See "Usage" above
 * for an example. */
#define ovsrcu_set(VAR, VALUE) \
    ovsrcu_set__(VAR, VALUE, memory_order_release)

/* This can be used for initializing RCU pointers before any readers can
 * see them.  A later ovsrcu_set() needs to make the bigger structure this
 * is part of visible to the readers. */
#define ovsrcu_set_hidden(VAR, VALUE) \
    ovsrcu_set__(VAR, VALUE, memory_order_relaxed)

/* This can be used for initializing RCU pointers before any readers are
 * executing. */
#define ovsrcu_init(VAR, VALUE) atomic_init(&(VAR)->p, VALUE)

/* Calls FUNCTION passing ARG as its pointer-type argument following the next
 * grace period.  See "Usage" above for an example. */
void ovsrcu_postpone__(void (*function)(void *aux), void *aux);
#define ovsrcu_postpone(FUNCTION, ARG)                          \
    (/* Verify that ARG is appropriate for FUNCTION. */         \
     (void) sizeof((FUNCTION)(ARG), 1),                         \
     /* Verify that ARG is a pointer type. */                   \
     (void) sizeof(*(ARG)),                                     \
     ovsrcu_postpone__((void (*)(void *))(FUNCTION), ARG))

/* An array index protected by RCU semantics.  This is an easier alternative to
 * an RCU protected pointer to a malloc'd int. */
typedef struct { atomic_int v; } ovsrcu_index;

static inline int ovsrcu_index_get__(const ovsrcu_index *i, memory_order order)
{
    int ret;
    atomic_read_explicit(CONST_CAST(atomic_int *, &i->v), &ret, order);
    return ret;
}

/* Returns the index contained in 'i'.  The returned value can be used until
 * the next grace period. */
static inline int ovsrcu_index_get(const ovsrcu_index *i)
{
    return ovsrcu_index_get__(i, memory_order_consume);
}

/* Returns the index contained in 'i'.  This is an alternative to
 * ovsrcu_index_get() that can be used when there's no possible concurrent
 * writer. */
static inline int ovsrcu_index_get_protected(const ovsrcu_index *i)
{
    return ovsrcu_index_get__(i, memory_order_relaxed);
}

static inline void ovsrcu_index_set__(ovsrcu_index *i, int value,
                                      memory_order order)
{
    atomic_store_explicit(&i->v, value, order);
}

/* Writes the index 'value' in 'i'.  The previous value of 'i' may still be
 * used by readers until the next grace period. */
static inline void ovsrcu_index_set(ovsrcu_index *i, int value)
{
    ovsrcu_index_set__(i, value, memory_order_release);
}

/* Writes the index 'value' in 'i'.  This is an alternative to
 * ovsrcu_index_set() that can be used when there's no possible concurrent
 * reader. */
static inline void ovsrcu_index_set_hidden(ovsrcu_index *i, int value)
{
    ovsrcu_index_set__(i, value, memory_order_relaxed);
}

/* Initializes 'i' with 'value'.  This is safe to call as long as there are no
 * concurrent readers. */
static inline void ovsrcu_index_init(ovsrcu_index *i, int value)
{
    atomic_init(&i->v, value);
}

/* Quiescent states. */
void ovsrcu_quiesce_start(void);
void ovsrcu_quiesce_end(void);
void ovsrcu_quiesce(void);
int ovsrcu_try_quiesce(void);
bool ovsrcu_is_quiescent(void);

/* Synchronization.  Waits for all non-quiescent threads to quiesce at least
 * once.  This can block for a relatively long time. */
void ovsrcu_synchronize(void);

void ovsrcu_exit(void);

void ovsrcu_barrier(void);

#endif /* ovs-rcu.h */