summaryrefslogtreecommitdiff
path: root/lib/conntrack.h
blob: e3a5dcc8023ff04aa148da231c83c5f3c24f6e7d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
/*
 * Copyright (c) 2015, 2016, 2017 Nicira, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef CONNTRACK_H
#define CONNTRACK_H 1

#include <stdbool.h>

#include "latch.h"
#include "odp-netlink.h"
#include "openvswitch/hmap.h"
#include "openvswitch/list.h"
#include "openvswitch/thread.h"
#include "openvswitch/types.h"
#include "ovs-atomic.h"
#include "ovs-thread.h"
#include "packets.h"
#include "hindex.h"

/* Userspace connection tracker
 * ============================
 *
 * This is a connection tracking module that keeps all the state in userspace.
 *
 * Usage
 * =====
 *
 *     struct conntrack ct;
 *
 * Initialization:
 *
 *     conntrack_init(&ct);
 *
 * It is necessary to periodically issue a call to
 *
 *     conntrack_run(&ct);
 *
 * to allow the module to clean up expired connections.
 *
 * To send a group of packets through the connection tracker:
 *
 *     conntrack_execute(&ct, pkts, n_pkts, ...);
 *
 * Thread-safety
 * =============
 *
 * conntrack_execute() can be called by multiple threads simultaneoulsy.
 */

struct dp_packet_batch;

struct conntrack;

struct ct_addr {
    union {
        ovs_16aligned_be32 ipv4;
        union ovs_16aligned_in6_addr ipv6;
        ovs_be32 ipv4_aligned;
        struct in6_addr ipv6_aligned;
    };
};

enum nat_action_e {
    NAT_ACTION_SRC = 1 << 0,
    NAT_ACTION_SRC_PORT = 1 << 1,
    NAT_ACTION_DST = 1 << 2,
    NAT_ACTION_DST_PORT = 1 << 3,
};

struct nat_action_info_t {
    struct ct_addr min_addr;
    struct ct_addr max_addr;
    uint16_t min_port;
    uint16_t max_port;
    uint16_t nat_action;
};

void conntrack_init(struct conntrack *);
void conntrack_destroy(struct conntrack *);

int conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
                      ovs_be16 dl_type, bool force, bool commit, uint16_t zone,
                      const uint32_t *setmark,
                      const struct ovs_key_ct_labels *setlabel,
                      ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
                      const struct nat_action_info_t *nat_action_info,
                      long long now);
void conntrack_clear(struct dp_packet *packet);

struct conntrack_dump {
    struct conntrack *ct;
    unsigned bucket;
    struct hmap_position bucket_pos;
    bool filter_zone;
    uint16_t zone;
};

struct ct_dpif_entry;
struct ct_dpif_tuple;

int conntrack_dump_start(struct conntrack *, struct conntrack_dump *,
                         const uint16_t *pzone, int *);
int conntrack_dump_next(struct conntrack_dump *, struct ct_dpif_entry *);
int conntrack_dump_done(struct conntrack_dump *);

int conntrack_flush(struct conntrack *, const uint16_t *zone);
int conntrack_flush_tuple(struct conntrack *, const struct ct_dpif_tuple *,
                          uint16_t zone);
int conntrack_set_maxconns(struct conntrack *ct, uint32_t maxconns);
int conntrack_get_maxconns(struct conntrack *ct, uint32_t *maxconns);
int conntrack_get_nconns(struct conntrack *ct, uint32_t *nconns);

/* 'struct ct_lock' is a wrapper for an adaptive mutex.  It's useful to try
 * different types of locks (e.g. spinlocks) */

struct OVS_LOCKABLE ct_lock {
    struct ovs_mutex lock;
};

struct OVS_LOCKABLE ct_rwlock {
    struct ovs_rwlock lock;
};

static inline void ct_lock_init(struct ct_lock *lock)
{
    ovs_mutex_init_adaptive(&lock->lock);
}

static inline void ct_lock_lock(struct ct_lock *lock)
    OVS_ACQUIRES(lock)
    OVS_NO_THREAD_SAFETY_ANALYSIS
{
    ovs_mutex_lock(&lock->lock);
}

static inline void ct_lock_unlock(struct ct_lock *lock)
    OVS_RELEASES(lock)
    OVS_NO_THREAD_SAFETY_ANALYSIS
{
    ovs_mutex_unlock(&lock->lock);
}

static inline void ct_lock_destroy(struct ct_lock *lock)
{
    ovs_mutex_destroy(&lock->lock);
}

static inline void ct_rwlock_init(struct ct_rwlock *lock)
{
    ovs_rwlock_init(&lock->lock);
}


static inline void ct_rwlock_wrlock(struct ct_rwlock *lock)
    OVS_ACQ_WRLOCK(lock)
    OVS_NO_THREAD_SAFETY_ANALYSIS
{
    ovs_rwlock_wrlock(&lock->lock);
}

static inline void ct_rwlock_rdlock(struct ct_rwlock *lock)
    OVS_ACQ_RDLOCK(lock)
    OVS_NO_THREAD_SAFETY_ANALYSIS
{
    ovs_rwlock_rdlock(&lock->lock);
}

static inline void ct_rwlock_unlock(struct ct_rwlock *lock)
    OVS_RELEASES(lock)
    OVS_NO_THREAD_SAFETY_ANALYSIS
{
    ovs_rwlock_unlock(&lock->lock);
}

static inline void ct_rwlock_destroy(struct ct_rwlock *lock)
{
    ovs_rwlock_destroy(&lock->lock);
}


/* Timeouts: all the possible timeout states passed to update_expiration()
 * are listed here. The name will be prefix by CT_TM_ and the value is in
 * milliseconds */
#define CT_TIMEOUTS \
    CT_TIMEOUT(TCP_FIRST_PACKET, 30 * 1000) \
    CT_TIMEOUT(TCP_OPENING, 30 * 1000) \
    CT_TIMEOUT(TCP_ESTABLISHED, 24 * 60 * 60 * 1000) \
    CT_TIMEOUT(TCP_CLOSING, 15 * 60 * 1000) \
    CT_TIMEOUT(TCP_FIN_WAIT, 45 * 1000) \
    CT_TIMEOUT(TCP_CLOSED, 30 * 1000) \
    CT_TIMEOUT(OTHER_FIRST, 60 * 1000) \
    CT_TIMEOUT(OTHER_MULTIPLE, 60 * 1000) \
    CT_TIMEOUT(OTHER_BIDIR, 30 * 1000) \
    CT_TIMEOUT(ICMP_FIRST, 60 * 1000) \
    CT_TIMEOUT(ICMP_REPLY, 30 * 1000)

/* The smallest of the above values: it is used as an upper bound for the
 * interval between two rounds of cleanup of expired entries */
#define CT_TM_MIN (30 * 1000)

#define CT_TIMEOUT(NAME, VAL) BUILD_ASSERT_DECL(VAL >= CT_TM_MIN);
    CT_TIMEOUTS
#undef CT_TIMEOUT

enum ct_timeout {
#define CT_TIMEOUT(NAME, VALUE) CT_TM_##NAME,
    CT_TIMEOUTS
#undef CT_TIMEOUT
    N_CT_TM
};

/* Locking:
 *
 * The connections are kept in different buckets, which are completely
 * independent. The connection bucket is determined by the hash of its key.
 *
 * Each bucket has two locks. Acquisition order is, from outermost to
 * innermost:
 *
 *    cleanup_mutex
 *    lock
 *
 * */
struct conntrack_bucket {
    /* Protects 'connections' and 'exp_lists'.  Used in the fast path */
    struct ct_lock lock;
    /* Contains the connections in the bucket, indexed by 'struct conn_key' */
    struct hmap connections OVS_GUARDED;
    /* For each possible timeout we have a list of connections. When the
     * timeout of a connection is updated, we move it to the back of the list.
     * Since the connection in a list have the same relative timeout, the list
     * will be ordered, with the oldest connections to the front. */
    struct ovs_list exp_lists[N_CT_TM] OVS_GUARDED;

    /* Protects 'next_cleanup'. Used to make sure that there's only one thread
     * performing the cleanup. */
    struct ovs_mutex cleanup_mutex;
    long long next_cleanup OVS_GUARDED;
};

#define CONNTRACK_BUCKETS_SHIFT 8
#define CONNTRACK_BUCKETS (1 << CONNTRACK_BUCKETS_SHIFT)

struct conntrack {
    /* Independent buckets containing the connections */
    struct conntrack_bucket buckets[CONNTRACK_BUCKETS];

    /* Salt for hashing a connection key. */
    uint32_t hash_basis;

    /* The thread performing periodic cleanup of the connection
     * tracker */
    pthread_t clean_thread;
    /* Latch to destroy the 'clean_thread' */
    struct latch clean_thread_exit;

    /* Number of connections currently in the connection tracker. */
    atomic_count n_conn;
    /* Connections limit. When this limit is reached, no new connection
     * will be accepted. */
    atomic_uint n_conn_limit;

    /* The following resources are referenced during nat connection
     * creation and deletion. */
    struct hmap nat_conn_keys OVS_GUARDED;
    /* Hash table for alg expectations. Expectations are created
     * by control connections to help create data connections. */
    struct hmap alg_expectations OVS_GUARDED;
    /* Used to lookup alg expectations from the control context. */
    struct hindex alg_expectation_refs OVS_GUARDED;
    /* Expiry list for alg expectations. */
    struct ovs_list alg_exp_list OVS_GUARDED;
    /* This lock is used during NAT connection creation and deletion;
     * it is taken after a bucket lock and given back before that
     * bucket unlock.
     * This lock is similarly used to guard alg_expectations and
     * alg_expectation_refs. If a bucket lock is also held during
     * the normal code flow, then is must be taken first and released
     * last.
     */
    struct ct_rwlock resources_lock;

};

#endif /* conntrack.h */