summaryrefslogtreecommitdiff
path: root/lib/classifier.h
diff options
context:
space:
mode:
authorJarno Rajahalme <jrajahalme@nicira.com>2013-12-11 11:07:01 -0800
committerJarno Rajahalme <jrajahalme@nicira.com>2013-12-11 11:07:01 -0800
commit13751fd88c4b7464f9453c03659201c10b3b87a0 (patch)
tree5e3948feee5b405327d60c4f083988021d51191b /lib/classifier.h
parent8c301900fc6f7faface4a2cbd016411f966d0601 (diff)
downloadopenvswitch-13751fd88c4b7464f9453c03659201c10b3b87a0.tar.gz
Classifier: Track address prefixes.
Add a prefix tree (trie) structure for tracking the used address space, enabling skipping classifier tables containing longer masks than necessary for an address field value in a packet header being classified. This enables less unwildcarding for datapath flows in parts of the address space without host routes. Trie lookup is interwoven to the staged lookup, so that a trie is searched only when the configured trie field becomes relevant for the lookup. The trie lookup results are retained so that each trie is checked at most once for each classifier lookup. This implementation tracks the number of rules at each address prefix for the whole classifier. More aggressive table skipping would be possible by maintaining lists of tables that have prefixes at the lengths encountered on tree traversal, or by maintaining separate tries for subsets of rules separated by metadata fields. Prefix tracking is configured via OVSDB. A new column "prefixes" is added to the database table "Flow_Table". "prefixes" is a set of string values listing the field names for which prefix lookup should be used. As of now, the fields for which prefix lookup can be enabled are: - tun_id, tun_src, tun_dst - nw_src, nw_dst (or aliases ip_src and ip_dst) - ipv6_src, ipv6_dst There is a maximum number of fields that can be enabled for any one flow table. Currently this limit is 3. Examples: ovs-vsctl set Bridge br0 flow_tables:0=@N1 -- \ --id=@N1 create Flow_Table name=table0 ovs-vsctl set Bridge br0 flow_tables:1=@N1 -- \ --id=@N1 create Flow_Table name=table1 ovs-vsctl set Flow_Table table0 prefixes=ip_dst,ip_src ovs-vsctl set Flow_Table table1 prefixes=[] Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
Diffstat (limited to 'lib/classifier.h')
-rw-r--r--lib/classifier.h152
1 files changed, 136 insertions, 16 deletions
diff --git a/lib/classifier.h b/lib/classifier.h
index 3c3e7d160..b6b89a0c1 100644
--- a/lib/classifier.h
+++ b/lib/classifier.h
@@ -24,15 +24,79 @@
* =====
*
* A flow classifier holds any number of "rules", each of which specifies
- * values to match for some fields or subfields and a priority. The primary
- * design goal for the classifier is that, given a packet, it can as quickly as
- * possible find the highest-priority rule that matches the packet.
+ * values to match for some fields or subfields and a priority. Each OpenFlow
+ * table is implemented as a flow classifier.
*
- * Each OpenFlow table is implemented as a flow classifier.
+ * The classifier has two primary design goals. The first is obvious: given a
+ * set of packet headers, as quickly as possible find the highest-priority rule
+ * that matches those headers. The following section describes the second
+ * goal.
*
*
- * Basic Design
- * ============
+ * "Un-wildcarding"
+ * ================
+ *
+ * A primary goal of the flow classifier is to produce, as a side effect of a
+ * packet lookup, a wildcard mask that indicates which bits of the packet
+ * headers were essential to the classification result. Ideally, a 1-bit in
+ * any position of this mask means that, if the corresponding bit in the packet
+ * header were flipped, then the classification result might change. A 0-bit
+ * means that changing the packet header bit would have no effect. Thus, the
+ * wildcarded bits are the ones that played no role in the classification
+ * decision.
+ *
+ * Such a wildcard mask is useful with datapaths that support installing flows
+ * that wildcard fields or subfields. If an OpenFlow lookup for a TCP flow
+ * does not actually look at the TCP source or destination ports, for example,
+ * then the switch may install into the datapath a flow that wildcards the port
+ * numbers, which in turn allows the datapath to handle packets that arrive for
+ * other TCP source or destination ports without additional help from
+ * ovs-vswitchd. This is useful for the Open vSwitch software and,
+ * potentially, for ASIC-based switches as well.
+ *
+ * Some properties of the wildcard mask:
+ *
+ * - "False 1-bits" are acceptable, that is, setting a bit in the wildcard
+ * mask to 1 will never cause a packet to be forwarded the wrong way.
+ * As a corollary, a wildcard mask composed of all 1-bits will always
+ * yield correct (but often needlessly inefficient) behavior.
+ *
+ * - "False 0-bits" can cause problems, so they must be avoided. In the
+ * extreme case, a mask of all 0-bits is only correct if the classifier
+ * contains only a single flow that matches all packets.
+ *
+ * - 0-bits are desirable because they allow the datapath to act more
+ * autonomously, relying less on ovs-vswitchd to process flow setups,
+ * thereby improving performance.
+ *
+ * - We don't know a good way to generate wildcard masks with the maximum
+ * (correct) number of 0-bits. We use various approximations, described
+ * in later sections.
+ *
+ * - Wildcard masks for lookups in a given classifier yield a
+ * non-overlapping set of rules. More specifically:
+ *
+ * Consider an classifier C1 filled with an arbitrary collection of rules
+ * and an empty classifier C2. Now take a set of packet headers H and
+ * look it up in C1, yielding a highest-priority matching rule R1 and
+ * wildcard mask M. Form a new classifier rule R2 out of packet headers
+ * H and mask M, and add R2 to C2 with a fixed priority. If one were to
+ * do this for every possible set of packet headers H, then this
+ * process would not attempt to add any overlapping rules to C2, that is,
+ * any packet lookup using the rules generated by this process matches at
+ * most one rule in C2.
+ *
+ * During the lookup process, the classifier starts out with a wildcard mask
+ * that is all 0-bits, that is, fully wildcarded. As lookup proceeds, each
+ * step tends to add constraints to the wildcard mask, that is, change
+ * wildcarded 0-bits into exact-match 1-bits. We call this "un-wildcarding".
+ * A lookup step that examines a particular field must un-wildcard that field.
+ * In general, un-wildcarding is necessary for correctness but undesirable for
+ * performance.
+ *
+ *
+ * Basic Classifier Design
+ * =======================
*
* Suppose that all the rules in a classifier had the same form. For example,
* suppose that they all matched on the source and destination Ethernet address
@@ -51,8 +115,10 @@
* cls_subtable" in the classifier and tracks the highest-priority match that
* it finds. The subtables are kept in a descending priority order according
* to the highest priority rule in each subtable, which allows lookup to skip
- * over subtables that can't possibly have a higher-priority match than
- * already found.
+ * over subtables that can't possibly have a higher-priority match than already
+ * found. Eliminating lookups through priority ordering aids both classifier
+ * primary design goals: skipping lookups saves time and avoids un-wildcarding
+ * fields that those lookups would have examined.
*
* One detail: a classifier can contain multiple rules that are identical other
* than their priority. When this happens, only the highest priority rule out
@@ -61,18 +127,51 @@
* list inside that highest-priority rule.
*
*
- * Staged Lookup
- * =============
+ * Staged Lookup (Wildcard Optimization)
+ * =====================================
*
* Subtable lookup is performed in ranges defined for struct flow, starting
* from metadata (registers, in_port, etc.), then L2 header, L3, and finally
* L4 ports. Whenever it is found that there are no matches in the current
- * subtable, the rest of the subtable can be skipped. The rationale of this
- * logic is that as many fields as possible can remain wildcarded.
+ * subtable, the rest of the subtable can be skipped.
+ *
+ * Staged lookup does not reduce lookup time, and it may increase it, because
+ * it changes a single hash table lookup into multiple hash table lookups.
+ * It reduces un-wildcarding significantly in important use cases.
*
*
- * Partitioning
- * ============
+ * Prefix Tracking (Wildcard Optimization)
+ * =======================================
+ *
+ * Classifier uses prefix trees ("tries") for tracking the used
+ * address space, enabling skipping classifier tables containing
+ * longer masks than necessary for the given address. This reduces
+ * un-wildcarding for datapath flows in parts of the address space
+ * without host routes, but consulting extra data structures (the
+ * tries) may slightly increase lookup time.
+ *
+ * Trie lookup is interwoven with staged lookup, so that a trie is
+ * searched only when the configured trie field becomes relevant for
+ * the lookup. The trie lookup results are retained so that each trie
+ * is checked at most once for each classifier lookup.
+ *
+ * This implementation tracks the number of rules at each address
+ * prefix for the whole classifier. More aggressive table skipping
+ * would be possible by maintaining lists of tables that have prefixes
+ * at the lengths encountered on tree traversal, or by maintaining
+ * separate tries for subsets of rules separated by metadata fields.
+ *
+ * Prefix tracking is configured via OVSDB "Flow_Table" table,
+ * "fieldspec" column. "fieldspec" is a string map where a "prefix"
+ * key tells which fields should be used for prefix tracking. The
+ * value of the "prefix" key is a comma separated list of field names.
+ *
+ * There is a maximum number of fields that can be enabled for any one
+ * flow table. Currently this limit is 3.
+ *
+ *
+ * Partitioning (Lookup Time and Wildcard Optimization)
+ * ====================================================
*
* Suppose that a given classifier is being used to handle multiple stages in a
* pipeline using "resubmit", with metadata (that is, the OpenFlow 1.1+ field
@@ -104,6 +203,9 @@
* any cls_subtable that doesn't match on metadata. We handle that by giving
* any such cls_subtable TAG_ALL as its 'tags' so that it matches any tag.)
*
+ * Partitioning saves lookup time by reducing the number of subtable lookups.
+ * Each eliminated subtable lookup also reduces the amount of un-wildcarding.
+ *
*
* Thread-safety
* =============
@@ -116,6 +218,7 @@
#include "hmap.h"
#include "list.h"
#include "match.h"
+#include "meta-flow.h"
#include "tag.h"
#include "openflow/nicira-ext.h"
#include "openflow/openflow.h"
@@ -128,9 +231,18 @@ extern "C" {
/* Needed only for the lock annotation in struct classifier. */
extern struct ovs_mutex ofproto_mutex;
+struct trie_node;
+
+/* Prefix trie for a 'field' */
+struct cls_trie {
+ const struct mf_field *field; /* Trie field, or NULL. */
+ struct trie_node *root; /* NULL if none. */
+};
-/* Maximum number of staged lookup indices for each subtable. */
-enum { CLS_MAX_INDICES = 3 };
+enum {
+ CLS_MAX_INDICES = 3, /* Maximum number of lookup indices per subtable. */
+ CLS_MAX_TRIES = 3 /* Maximum number of prefix trees per classifier. */
+};
/* A flow classifier. */
struct classifier {
@@ -143,6 +255,8 @@ struct classifier {
*/
struct hmap partitions; /* Contains "struct cls_partition"s. */
struct ovs_rwlock rwlock OVS_ACQ_AFTER(ofproto_mutex);
+ struct cls_trie tries[CLS_MAX_TRIES]; /* Prefix tries. */
+ unsigned int n_tries;
};
/* A set of rules that all have the same fields wildcarded. */
@@ -160,6 +274,7 @@ struct cls_subtable {
uint8_t n_indices; /* How many indices to use. */
uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 flow segment boundaries. */
struct hindex indices[CLS_MAX_INDICES]; /* Staged lookup indices. */
+ unsigned int trie_plen[CLS_MAX_TRIES]; /* Trie prefix length in 'mask'. */
};
/* Returns true if 'table' is a "catch-all" subtable that will match every
@@ -211,6 +326,11 @@ bool cls_rule_is_loose_match(const struct cls_rule *rule,
void classifier_init(struct classifier *cls, const uint8_t *flow_segments);
void classifier_destroy(struct classifier *);
+void classifier_set_prefix_fields(struct classifier *cls,
+ const enum mf_field_id *trie_fields,
+ unsigned int n_trie_fields)
+ OVS_REQ_WRLOCK(cls->rwlock);
+
bool classifier_is_empty(const struct classifier *cls)
OVS_REQ_RDLOCK(cls->rwlock);
int classifier_count(const struct classifier *cls)