summaryrefslogtreecommitdiff
path: root/lib/tag.h
blob: 2002e5a397e3c142e353393f9a4519cec40aeedd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/*
 * Copyright (c) 2008 Nicira Networks.
 *
 * Permission to use, copy, modify, and/or distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#ifndef TAG_H
#define TAG_H 1

#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include "util.h"

/*
 * Tagging support.
 *
 * A 'tag' represents an arbitrary category.  Currently, tags are used to
 * represent categories of flows and in particular the dependencies for a flow
 * switching decision.  For example, if a flow's output port is based on
 * knowledge that source MAC 00:02:e3:0f:80:a4 is on eth0, then a tag that
 * represents that dependency is attached to that flow in the flowtracking hash
 * table.
 *
 * As this example shows, the universe of possible categories is very large,
 * and even the number of categories that are in use at a given time can be
 * very large.  This means that keeping track of category membership via
 * conventional means (lists, bitmaps, etc.) is likely to be expensive.
 *
 * Tags are actually implemented via a "superimposed coding", as discussed in
 * Knuth TAOCP v.3 section 6.5 "Retrieval on Secondary Keys".  A tag is an
 * unsigned integer in which exactly 2 bits are set to 1 and the rest set to 0.
 * For 32-bit integers (as currently used) there are 32 * 31 / 2 = 496 unique
 * tags; for 64-bit integers there are 64 * 63 / 2 = 2,016.
 *
 * Because there is a small finite number of unique tags, tags must collide
 * after some number of them have been created.  In practice we generally
 * create tags by choosing bits randomly.
 *
 * The key property of tags is that we can combine them without increasing the
 * amount of data required using bitwise-OR, since the result has the 1-bits
 * from both tags set.  The necessary tradeoff is that the result is even more
 * ambiguous: if combining two tags yields a value with 4 bits set to 1, then
 * the result value will test as having 4 * 3 / 2 = 6 unique tags, not just the
 * two tags that we combined.
 *
 * The upshot is this: a value that is the bitwise-OR combination of a number
 * of tags will always include the tags that were combined, but it may contain
 * any number of additional tags as well.  This is acceptable for flowtracking,
 * since we want to be sure that we catch every flow that needs to be
 * revalidated, but it is OK if we revalidate a few extra flows as well.
 *
 * If we combine too many tags, then the result will have every bit set, so
 * that it will test as including every tag.  Fortunately, this is not a big
 * problem for us: although there are many flows overall, each individual flow
 * belongs only to a small number of categories.
 */

/* Represents a tag, or the combination of 0 or more tags. */
typedef uint32_t tag_type;

tag_type tag_create_random(void);
tag_type tag_create_deterministic(uint32_t seed);
static inline bool tag_intersects(tag_type, tag_type);
static inline bool tag_is_valid(tag_type);

/* Returns true if 'a' and 'b' have at least one tag in common,
 * false if their set of tags is disjoint. . */
static inline bool
tag_intersects(tag_type a, tag_type b)
{
    tag_type x = a & b;
    return (x & (x - 1)) != 0;
}

/* Returns true if 'tag' is a valid tag, that is, if exactly two bits are set
 * to 1 and the rest to 0.   Otherwise, returns false. */
static inline bool
tag_is_valid(tag_type tag)
{
    tag_type x = tag & (tag - 1);
    tag_type y = x & (x - 1);
    return x && !y;
}

/*
 * A tag set accumulates tags with reduced ambiguity compared to a single tag.
 * The flow tracking uses tag sets to keep track of tags that need to
 * revalidated after a number of packets have been processed.
 */
#define TAG_SET_SIZE 4
struct tag_set {
    tag_type total;
    tag_type tags[TAG_SET_SIZE];
    unsigned int n;
};

void tag_set_init(struct tag_set *);
void tag_set_add(struct tag_set *, tag_type);
static inline bool tag_set_is_empty(const struct tag_set *);
static inline bool tag_set_intersects(const struct tag_set *, tag_type);

/* Returns true if 'set' will match no tags at all,
 * false if it will match at least one tag. */
static inline bool
tag_set_is_empty(const struct tag_set *set)
{
    return !set->n;
}

/* Returns true if any of the tags in 'tags' are also in 'set',
 * false if the intersection is empty. */
static inline bool
tag_set_intersects(const struct tag_set *set, tag_type tags)
{
    BUILD_ASSERT_DECL(TAG_SET_SIZE == 4);
    return (tag_intersects(set->total, tags)
            && (tag_intersects(set->tags[0], tags)
                || tag_intersects(set->tags[1], tags)
                || tag_intersects(set->tags[2], tags)
                || tag_intersects(set->tags[3], tags)));
}

#endif /* tag.h */