summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/classifier.c105
-rw-r--r--lib/classifier.h93
-rw-r--r--lib/flow.h27
-rw-r--r--manpages.mk8
4 files changed, 223 insertions, 10 deletions
diff --git a/lib/classifier.c b/lib/classifier.c
index 89f56b6e7..4c19c90fb 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -154,6 +154,7 @@ classifier_init(struct classifier *cls)
cls->n_rules = 0;
hmap_init(&cls->tables);
list_init(&cls->tables_priority);
+ hmap_init(&cls->partitions);
ovs_rwlock_init(&cls->rwlock);
}
@@ -163,12 +164,20 @@ void
classifier_destroy(struct classifier *cls)
{
if (cls) {
+ struct cls_table *partition, *next_partition;
struct cls_table *table, *next_table;
HMAP_FOR_EACH_SAFE (table, next_table, hmap_node, &cls->tables) {
destroy_table(cls, table);
}
hmap_destroy(&cls->tables);
+
+ HMAP_FOR_EACH_SAFE (partition, next_partition, hmap_node,
+ &cls->partitions) {
+ hmap_remove(&cls->partitions, &partition->hmap_node);
+ free(partition);
+ }
+ hmap_destroy(&cls->partitions);
ovs_rwlock_destroy(&cls->rwlock);
}
}
@@ -187,6 +196,45 @@ classifier_count(const struct classifier *cls)
return cls->n_rules;
}
+static uint32_t
+hash_metadata(ovs_be64 metadata_)
+{
+ uint64_t metadata = (OVS_FORCE uint64_t) metadata_;
+ return hash_2words(metadata, metadata >> 32);
+}
+
+static struct cls_partition *
+find_partition(const struct classifier *cls, ovs_be64 metadata, uint32_t hash)
+{
+ struct cls_partition *partition;
+
+ HMAP_FOR_EACH_IN_BUCKET (partition, hmap_node, hash, &cls->partitions) {
+ if (partition->metadata == metadata) {
+ return partition;
+ }
+ }
+
+ return NULL;
+}
+
+static struct cls_partition *
+create_partition(struct classifier *cls, struct cls_table *table,
+ ovs_be64 metadata)
+{
+ uint32_t hash = hash_metadata(metadata);
+ struct cls_partition *partition = find_partition(cls, metadata, hash);
+ if (!partition) {
+ partition = xmalloc(sizeof *partition);
+ partition->metadata = metadata;
+ partition->tags = 0;
+ partition->n_refs = 0;
+ hmap_insert(&cls->partitions, &partition->hmap_node, hash);
+ }
+ partition->tags |= table->tag;
+ partition->n_refs++;
+ return partition;
+}
+
/* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller
* must not modify or free it.
*
@@ -213,8 +261,17 @@ classifier_replace(struct classifier *cls, struct cls_rule *rule)
old_rule = insert_rule(cls, table, rule);
if (!old_rule) {
+ if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) {
+ ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow);
+ rule->partition = create_partition(cls, table, metadata);
+ } else {
+ rule->partition = NULL;
+ }
+
table->n_table_rules++;
cls->n_rules++;
+ } else {
+ rule->partition = old_rule->partition;
}
return old_rule;
}
@@ -238,6 +295,7 @@ classifier_insert(struct classifier *cls, struct cls_rule *rule)
void
classifier_remove(struct classifier *cls, struct cls_rule *rule)
{
+ struct cls_partition *partition;
struct cls_rule *head;
struct cls_table *table;
@@ -255,6 +313,12 @@ classifier_remove(struct classifier *cls, struct cls_rule *rule)
hmap_replace(&table->rules, &rule->hmap_node, &next->hmap_node);
}
+ partition = rule->partition;
+ if (partition && --partition->n_refs == 0) {
+ hmap_remove(&cls->partitions, &partition->hmap_node);
+ free(partition);
+ }
+
if (--table->n_table_rules == 0) {
destroy_table(cls, table);
} else {
@@ -275,13 +339,44 @@ struct cls_rule *
classifier_lookup(const struct classifier *cls, const struct flow *flow,
struct flow_wildcards *wc)
{
+ const struct cls_partition *partition;
struct cls_table *table;
struct cls_rule *best;
+ tag_type tags;
+
+ /* Determine 'tags' such that, if 'table->tag' doesn't intersect them, then
+ * 'flow' cannot possibly match in 'table':
+ *
+ * - If flow->metadata maps to a given 'partition', then we can use
+ * 'tags' for 'partition->tags'.
+ *
+ * - If flow->metadata has no partition, then no rule in 'cls' has an
+ * exact-match for flow->metadata. That means that we don't need to
+ * search any table that includes flow->metadata in its mask.
+ *
+ * In either case, we always need to search any cls_tables that do not
+ * include flow->metadata in its mask. One way to do that would be to
+ * check the "cls_table"s explicitly for that, but that would require an
+ * extra branch per table. Instead, we mark such a cls_table's 'tags' as
+ * TAG_ALL and make sure that 'tags' is never empty. This means that
+ * 'tags' always intersects such a cls_table's 'tags', so we don't need a
+ * special case.
+ */
+ partition = (hmap_is_empty(&cls->partitions)
+ ? NULL
+ : find_partition(cls, flow->metadata,
+ hash_metadata(flow->metadata)));
+ tags = partition ? partition->tags : TAG_ARBITRARY;
best = NULL;
LIST_FOR_EACH (table, list_node, &cls->tables_priority) {
- struct cls_rule *rule = find_match(table, flow);
+ struct cls_rule *rule;
+
+ if (!tag_intersects(tags, table->tag)) {
+ continue;
+ }
+ rule = find_match(table, flow);
if (wc) {
flow_wildcards_fold_minimask(wc, &table->mask);
}
@@ -293,6 +388,10 @@ classifier_lookup(const struct classifier *cls, const struct flow *flow,
* can not find anything better. */
return best;
}
+ if (!tag_intersects(tags, table->tag)) {
+ continue;
+ }
+
rule = find_match(table, flow);
if (wc) {
flow_wildcards_fold_minimask(wc, &table->mask);
@@ -550,6 +649,7 @@ find_table(const struct classifier *cls, const struct minimask *mask)
static struct cls_table *
insert_table(struct classifier *cls, const struct minimask *mask)
{
+ uint32_t hash = minimask_hash(mask, 0);
struct cls_table *table;
table = xzalloc(sizeof *table);
@@ -557,6 +657,9 @@ insert_table(struct classifier *cls, const struct minimask *mask)
minimask_clone(&table->mask, mask);
hmap_insert(&cls->tables, &table->hmap_node, minimask_hash(mask, 0));
list_push_back(&cls->tables_priority, &table->list_node);
+ table->tag = (minimask_get_metadata_mask(mask) == OVS_BE64_MAX
+ ? tag_create_deterministic(hash)
+ : TAG_ALL);
return table;
}
diff --git a/lib/classifier.h b/lib/classifier.h
index a795b4a18..00848f8ed 100644
--- a/lib/classifier.h
+++ b/lib/classifier.h
@@ -19,11 +19,80 @@
/* Flow classifier.
*
- * A classifier is a "struct classifier",
- * a hash map from a set of wildcards to a "struct cls_table",
- * a hash map from fixed field values to "struct cls_rule",
- * which can contain a list of otherwise identical rules
- * with lower priorities.
+ *
+ * What?
+ * =====
+ *
+ * A flow classifier holds any number of "rules", each of which specifies
+ * values to match for some fields or subfields and a priority. The primary
+ * design goal for the classifier is that, given a packet, it can as quickly as
+ * possible find the highest-priority rule that matches the packet.
+ *
+ * Each OpenFlow table is implemented as a flow classifier.
+ *
+ *
+ * Basic Design
+ * ============
+ *
+ * Suppose that all the rules in a classifier had the same form. For example,
+ * suppose that they all matched on the source and destination Ethernet address
+ * and wildcarded all the other fields. Then the obvious way to implement a
+ * classifier would be a hash table on the source and destination Ethernet
+ * addresses. If new classification rules came along with a different form,
+ * you could add a second hash table that hashed on the fields matched in those
+ * rules. With two hash tables, you look up a given flow in each hash table.
+ * If there are no matches, the classifier didn't contain a match; if you find
+ * a match in one of them, that's the result; if you find a match in both of
+ * them, then the result is the rule with the higher priority.
+ *
+ * This is how the classifier works. In a "struct classifier", each form of
+ * "struct cls_rule" present (based on its ->match.mask) goes into a separate
+ * "struct cls_table". A lookup does a hash lookup in every "struct cls_table"
+ * in the classifier and tracks the highest-priority match that it finds. The
+ * tables are kept in a descending priority order according to the highest
+ * priority rule in each table, which allows lookup to skip over tables that
+ * can't possibly have a higher-priority match than already found.
+ *
+ * One detail: a classifier can contain multiple rules that are identical other
+ * than their priority. When this happens, only the highest priority rule out
+ * of a group of otherwise identical rules is stored directly in the "struct
+ * cls_table", with the other almost-identical rules chained off a linked list
+ * inside that highest-priority rule.
+ *
+ *
+ * Partitioning
+ * ============
+ *
+ * Suppose that a given classifier is being used to handle multiple stages in a
+ * pipeline using "resubmit", with metadata (that is, the OpenFlow 1.1+ field
+ * named "metadata") distinguishing between the different stages. For example,
+ * metadata value 1 might identify ingress rules, metadata value 2 might
+ * identify ACLs, and metadata value 3 might identify egress rules. Such a
+ * classifier is essentially partitioned into multiple sub-classifiers on the
+ * basis of the metadata value.
+ *
+ * The classifier has a special optimization to speed up matching in this
+ * scenario:
+ *
+ * - Each cls_table that matches on metadata gets a tag derived from the
+ * table's mask, so that it is likely that each table has a unique tag.
+ * (Duplicate tags have a performance cost but do not affect
+ * correctness.)
+ *
+ * - For each metadata value matched by any cls_rule, the classifier
+ * constructs a "struct cls_partition" indexed by the metadata value.
+ * The cls_partition has a 'tags' member whose value is the bitwise-OR of
+ * the tags of each cls_table that contains any rule that matches on the
+ * cls_partition's metadata value. In other words, struct cls_partition
+ * associates metadata values with tables that need to be checked with
+ * flows with that specific metadata value.
+ *
+ * Thus, a flow lookup can start by looking up the partition associated with
+ * the flow's metadata, and then skip over any cls_table whose 'tag' does not
+ * intersect the partition's 'tags'. (The flow must also be looked up in any
+ * cls_table that doesn't match on metadata. We handle that by giving any such
+ * cls_table TAG_ALL as its 'tags' so that it matches any tag.)
+ *
*
* Thread-safety
* =============
@@ -37,6 +106,7 @@
#include "hmap.h"
#include "list.h"
#include "match.h"
+#include "tag.h"
#include "openflow/nicira-ext.h"
#include "openflow/openflow.h"
#include "ovs-thread.h"
@@ -54,6 +124,7 @@ struct classifier {
int n_rules; /* Total number of rules. */
struct hmap tables; /* Contains "struct cls_table"s. */
struct list tables_priority; /* Tables in descending priority order */
+ struct hmap partitions; /* Contains "struct cls_partition"s. */
struct ovs_rwlock rwlock OVS_ACQ_AFTER(ofproto_mutex);
};
@@ -66,6 +137,7 @@ struct cls_table {
int n_table_rules; /* Number of rules, including duplicates. */
unsigned int max_priority; /* Max priority of any rule in the table. */
unsigned int max_count; /* Count of max_priority rules. */
+ tag_type tag; /* Tag generated from mask for partitioning. */
};
/* Returns true if 'table' is a "catch-all" table that will match every
@@ -82,6 +154,17 @@ struct cls_rule {
struct list list; /* List of identical, lower-priority rules. */
struct minimatch match; /* Matching rule. */
unsigned int priority; /* Larger numbers are higher priorities. */
+ struct cls_partition *partition;
+};
+
+/* Associates a metadata value (that is, a value of the OpenFlow 1.1+ metadata
+ * field) with tags for the "cls_table"s that contain rules that match that
+ * metadata value. */
+struct cls_partition {
+ struct hmap_node hmap_node; /* In struct classifier's 'partitions' hmap. */
+ ovs_be64 metadata; /* metadata value for this partition. */
+ tag_type tags; /* OR of each included flow's cls_table tag. */
+ unsigned int n_refs; /* # of flows that refer to this partition. */
};
void cls_rule_init(struct cls_rule *, const struct match *,
diff --git a/lib/flow.h b/lib/flow.h
index 0c448ded4..d05066344 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -21,6 +21,7 @@
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
+#include "byte-order.h"
#include "openflow/nicira-ext.h"
#include "openflow/openflow.h"
#include "hash.h"
@@ -330,6 +331,7 @@ void miniflow_expand(const struct miniflow *, struct flow *);
uint32_t miniflow_get(const struct miniflow *, unsigned int u32_ofs);
uint16_t miniflow_get_vid(const struct miniflow *);
+static inline ovs_be64 miniflow_get_metadata(const struct miniflow *);
bool miniflow_equal(const struct miniflow *a, const struct miniflow *b);
bool miniflow_equal_in_minimask(const struct miniflow *a,
@@ -363,11 +365,36 @@ void minimask_expand(const struct minimask *, struct flow_wildcards *);
uint32_t minimask_get(const struct minimask *, unsigned int u32_ofs);
uint16_t minimask_get_vid_mask(const struct minimask *);
+static inline ovs_be64 minimask_get_metadata_mask(const struct minimask *);
bool minimask_equal(const struct minimask *a, const struct minimask *b);
uint32_t minimask_hash(const struct minimask *, uint32_t basis);
bool minimask_has_extra(const struct minimask *, const struct minimask *);
bool minimask_is_catchall(const struct minimask *);
+
+/* Returns the value of the OpenFlow 1.1+ "metadata" field in 'flow'. */
+static inline ovs_be64
+miniflow_get_metadata(const struct miniflow *flow)
+{
+ enum { MD_OFS = offsetof(struct flow, metadata) };
+ BUILD_ASSERT_DECL(MD_OFS % sizeof(uint32_t) == 0);
+ ovs_be32 hi = (OVS_FORCE ovs_be32) miniflow_get(flow, MD_OFS / 4);
+ ovs_be32 lo = (OVS_FORCE ovs_be32) miniflow_get(flow, MD_OFS / 4 + 1);
+
+ return htonll(((uint64_t) ntohl(hi) << 32) | ntohl(lo));
+}
+
+/* Returns the mask for the OpenFlow 1.1+ "metadata" field in 'mask'.
+ *
+ * The return value is all-1-bits if 'mask' matches on the whole value of the
+ * metadata field, all-0-bits if 'mask' entirely wildcards the metadata field,
+ * or some other value if the metadata field is partially matched, partially
+ * wildcarded. */
+static inline ovs_be64
+minimask_get_metadata_mask(const struct minimask *mask)
+{
+ return miniflow_get_metadata(&mask->masks);
+}
#endif /* flow.h */
diff --git a/manpages.mk b/manpages.mk
index 811d2f992..2a34f04bc 100644
--- a/manpages.mk
+++ b/manpages.mk
@@ -116,6 +116,10 @@ lib/vconn-active.man:
lib/vconn-passive.man:
lib/vlog.man:
+utilities/ovs-dpctl-top.8: \
+ utilities/ovs-dpctl-top.8.in
+utilities/ovs-dpctl-top.8.in:
+
utilities/ovs-dpctl.8: \
utilities/ovs-dpctl.8.in \
lib/common.man \
@@ -124,10 +128,6 @@ utilities/ovs-dpctl.8.in:
lib/common.man:
lib/vlog.man:
-utilities/ovs-dpctl-top.8: \
- utilities/ovs-dpctl-top.8.in
-utilities/ovs-dpctl-top.8.in:
-
utilities/ovs-l3ping.8: \
utilities/ovs-l3ping.8.in \
lib/common-syn.man \