diff options
-rw-r--r-- | lib/classifier.c | 105 | ||||
-rw-r--r-- | lib/classifier.h | 93 | ||||
-rw-r--r-- | lib/flow.h | 27 | ||||
-rw-r--r-- | manpages.mk | 8 |
4 files changed, 223 insertions, 10 deletions
diff --git a/lib/classifier.c b/lib/classifier.c index 89f56b6e7..4c19c90fb 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -154,6 +154,7 @@ classifier_init(struct classifier *cls) cls->n_rules = 0; hmap_init(&cls->tables); list_init(&cls->tables_priority); + hmap_init(&cls->partitions); ovs_rwlock_init(&cls->rwlock); } @@ -163,12 +164,20 @@ void classifier_destroy(struct classifier *cls) { if (cls) { + struct cls_table *partition, *next_partition; struct cls_table *table, *next_table; HMAP_FOR_EACH_SAFE (table, next_table, hmap_node, &cls->tables) { destroy_table(cls, table); } hmap_destroy(&cls->tables); + + HMAP_FOR_EACH_SAFE (partition, next_partition, hmap_node, + &cls->partitions) { + hmap_remove(&cls->partitions, &partition->hmap_node); + free(partition); + } + hmap_destroy(&cls->partitions); ovs_rwlock_destroy(&cls->rwlock); } } @@ -187,6 +196,45 @@ classifier_count(const struct classifier *cls) return cls->n_rules; } +static uint32_t +hash_metadata(ovs_be64 metadata_) +{ + uint64_t metadata = (OVS_FORCE uint64_t) metadata_; + return hash_2words(metadata, metadata >> 32); +} + +static struct cls_partition * +find_partition(const struct classifier *cls, ovs_be64 metadata, uint32_t hash) +{ + struct cls_partition *partition; + + HMAP_FOR_EACH_IN_BUCKET (partition, hmap_node, hash, &cls->partitions) { + if (partition->metadata == metadata) { + return partition; + } + } + + return NULL; +} + +static struct cls_partition * +create_partition(struct classifier *cls, struct cls_table *table, + ovs_be64 metadata) +{ + uint32_t hash = hash_metadata(metadata); + struct cls_partition *partition = find_partition(cls, metadata, hash); + if (!partition) { + partition = xmalloc(sizeof *partition); + partition->metadata = metadata; + partition->tags = 0; + partition->n_refs = 0; + hmap_insert(&cls->partitions, &partition->hmap_node, hash); + } + partition->tags |= table->tag; + partition->n_refs++; + return partition; +} + /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller * must not modify or free it. * @@ -213,8 +261,17 @@ classifier_replace(struct classifier *cls, struct cls_rule *rule) old_rule = insert_rule(cls, table, rule); if (!old_rule) { + if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) { + ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow); + rule->partition = create_partition(cls, table, metadata); + } else { + rule->partition = NULL; + } + table->n_table_rules++; cls->n_rules++; + } else { + rule->partition = old_rule->partition; } return old_rule; } @@ -238,6 +295,7 @@ classifier_insert(struct classifier *cls, struct cls_rule *rule) void classifier_remove(struct classifier *cls, struct cls_rule *rule) { + struct cls_partition *partition; struct cls_rule *head; struct cls_table *table; @@ -255,6 +313,12 @@ classifier_remove(struct classifier *cls, struct cls_rule *rule) hmap_replace(&table->rules, &rule->hmap_node, &next->hmap_node); } + partition = rule->partition; + if (partition && --partition->n_refs == 0) { + hmap_remove(&cls->partitions, &partition->hmap_node); + free(partition); + } + if (--table->n_table_rules == 0) { destroy_table(cls, table); } else { @@ -275,13 +339,44 @@ struct cls_rule * classifier_lookup(const struct classifier *cls, const struct flow *flow, struct flow_wildcards *wc) { + const struct cls_partition *partition; struct cls_table *table; struct cls_rule *best; + tag_type tags; + + /* Determine 'tags' such that, if 'table->tag' doesn't intersect them, then + * 'flow' cannot possibly match in 'table': + * + * - If flow->metadata maps to a given 'partition', then we can use + * 'tags' for 'partition->tags'. + * + * - If flow->metadata has no partition, then no rule in 'cls' has an + * exact-match for flow->metadata. That means that we don't need to + * search any table that includes flow->metadata in its mask. + * + * In either case, we always need to search any cls_tables that do not + * include flow->metadata in its mask. One way to do that would be to + * check the "cls_table"s explicitly for that, but that would require an + * extra branch per table. Instead, we mark such a cls_table's 'tags' as + * TAG_ALL and make sure that 'tags' is never empty. This means that + * 'tags' always intersects such a cls_table's 'tags', so we don't need a + * special case. + */ + partition = (hmap_is_empty(&cls->partitions) + ? NULL + : find_partition(cls, flow->metadata, + hash_metadata(flow->metadata))); + tags = partition ? partition->tags : TAG_ARBITRARY; best = NULL; LIST_FOR_EACH (table, list_node, &cls->tables_priority) { - struct cls_rule *rule = find_match(table, flow); + struct cls_rule *rule; + + if (!tag_intersects(tags, table->tag)) { + continue; + } + rule = find_match(table, flow); if (wc) { flow_wildcards_fold_minimask(wc, &table->mask); } @@ -293,6 +388,10 @@ classifier_lookup(const struct classifier *cls, const struct flow *flow, * can not find anything better. */ return best; } + if (!tag_intersects(tags, table->tag)) { + continue; + } + rule = find_match(table, flow); if (wc) { flow_wildcards_fold_minimask(wc, &table->mask); @@ -550,6 +649,7 @@ find_table(const struct classifier *cls, const struct minimask *mask) static struct cls_table * insert_table(struct classifier *cls, const struct minimask *mask) { + uint32_t hash = minimask_hash(mask, 0); struct cls_table *table; table = xzalloc(sizeof *table); @@ -557,6 +657,9 @@ insert_table(struct classifier *cls, const struct minimask *mask) minimask_clone(&table->mask, mask); hmap_insert(&cls->tables, &table->hmap_node, minimask_hash(mask, 0)); list_push_back(&cls->tables_priority, &table->list_node); + table->tag = (minimask_get_metadata_mask(mask) == OVS_BE64_MAX + ? tag_create_deterministic(hash) + : TAG_ALL); return table; } diff --git a/lib/classifier.h b/lib/classifier.h index a795b4a18..00848f8ed 100644 --- a/lib/classifier.h +++ b/lib/classifier.h @@ -19,11 +19,80 @@ /* Flow classifier. * - * A classifier is a "struct classifier", - * a hash map from a set of wildcards to a "struct cls_table", - * a hash map from fixed field values to "struct cls_rule", - * which can contain a list of otherwise identical rules - * with lower priorities. + * + * What? + * ===== + * + * A flow classifier holds any number of "rules", each of which specifies + * values to match for some fields or subfields and a priority. The primary + * design goal for the classifier is that, given a packet, it can as quickly as + * possible find the highest-priority rule that matches the packet. + * + * Each OpenFlow table is implemented as a flow classifier. + * + * + * Basic Design + * ============ + * + * Suppose that all the rules in a classifier had the same form. For example, + * suppose that they all matched on the source and destination Ethernet address + * and wildcarded all the other fields. Then the obvious way to implement a + * classifier would be a hash table on the source and destination Ethernet + * addresses. If new classification rules came along with a different form, + * you could add a second hash table that hashed on the fields matched in those + * rules. With two hash tables, you look up a given flow in each hash table. + * If there are no matches, the classifier didn't contain a match; if you find + * a match in one of them, that's the result; if you find a match in both of + * them, then the result is the rule with the higher priority. + * + * This is how the classifier works. In a "struct classifier", each form of + * "struct cls_rule" present (based on its ->match.mask) goes into a separate + * "struct cls_table". A lookup does a hash lookup in every "struct cls_table" + * in the classifier and tracks the highest-priority match that it finds. The + * tables are kept in a descending priority order according to the highest + * priority rule in each table, which allows lookup to skip over tables that + * can't possibly have a higher-priority match than already found. + * + * One detail: a classifier can contain multiple rules that are identical other + * than their priority. When this happens, only the highest priority rule out + * of a group of otherwise identical rules is stored directly in the "struct + * cls_table", with the other almost-identical rules chained off a linked list + * inside that highest-priority rule. + * + * + * Partitioning + * ============ + * + * Suppose that a given classifier is being used to handle multiple stages in a + * pipeline using "resubmit", with metadata (that is, the OpenFlow 1.1+ field + * named "metadata") distinguishing between the different stages. For example, + * metadata value 1 might identify ingress rules, metadata value 2 might + * identify ACLs, and metadata value 3 might identify egress rules. Such a + * classifier is essentially partitioned into multiple sub-classifiers on the + * basis of the metadata value. + * + * The classifier has a special optimization to speed up matching in this + * scenario: + * + * - Each cls_table that matches on metadata gets a tag derived from the + * table's mask, so that it is likely that each table has a unique tag. + * (Duplicate tags have a performance cost but do not affect + * correctness.) + * + * - For each metadata value matched by any cls_rule, the classifier + * constructs a "struct cls_partition" indexed by the metadata value. + * The cls_partition has a 'tags' member whose value is the bitwise-OR of + * the tags of each cls_table that contains any rule that matches on the + * cls_partition's metadata value. In other words, struct cls_partition + * associates metadata values with tables that need to be checked with + * flows with that specific metadata value. + * + * Thus, a flow lookup can start by looking up the partition associated with + * the flow's metadata, and then skip over any cls_table whose 'tag' does not + * intersect the partition's 'tags'. (The flow must also be looked up in any + * cls_table that doesn't match on metadata. We handle that by giving any such + * cls_table TAG_ALL as its 'tags' so that it matches any tag.) + * * * Thread-safety * ============= @@ -37,6 +106,7 @@ #include "hmap.h" #include "list.h" #include "match.h" +#include "tag.h" #include "openflow/nicira-ext.h" #include "openflow/openflow.h" #include "ovs-thread.h" @@ -54,6 +124,7 @@ struct classifier { int n_rules; /* Total number of rules. */ struct hmap tables; /* Contains "struct cls_table"s. */ struct list tables_priority; /* Tables in descending priority order */ + struct hmap partitions; /* Contains "struct cls_partition"s. */ struct ovs_rwlock rwlock OVS_ACQ_AFTER(ofproto_mutex); }; @@ -66,6 +137,7 @@ struct cls_table { int n_table_rules; /* Number of rules, including duplicates. */ unsigned int max_priority; /* Max priority of any rule in the table. */ unsigned int max_count; /* Count of max_priority rules. */ + tag_type tag; /* Tag generated from mask for partitioning. */ }; /* Returns true if 'table' is a "catch-all" table that will match every @@ -82,6 +154,17 @@ struct cls_rule { struct list list; /* List of identical, lower-priority rules. */ struct minimatch match; /* Matching rule. */ unsigned int priority; /* Larger numbers are higher priorities. */ + struct cls_partition *partition; +}; + +/* Associates a metadata value (that is, a value of the OpenFlow 1.1+ metadata + * field) with tags for the "cls_table"s that contain rules that match that + * metadata value. */ +struct cls_partition { + struct hmap_node hmap_node; /* In struct classifier's 'partitions' hmap. */ + ovs_be64 metadata; /* metadata value for this partition. */ + tag_type tags; /* OR of each included flow's cls_table tag. */ + unsigned int n_refs; /* # of flows that refer to this partition. */ }; void cls_rule_init(struct cls_rule *, const struct match *, diff --git a/lib/flow.h b/lib/flow.h index 0c448ded4..d05066344 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -21,6 +21,7 @@ #include <stdbool.h> #include <stdint.h> #include <string.h> +#include "byte-order.h" #include "openflow/nicira-ext.h" #include "openflow/openflow.h" #include "hash.h" @@ -330,6 +331,7 @@ void miniflow_expand(const struct miniflow *, struct flow *); uint32_t miniflow_get(const struct miniflow *, unsigned int u32_ofs); uint16_t miniflow_get_vid(const struct miniflow *); +static inline ovs_be64 miniflow_get_metadata(const struct miniflow *); bool miniflow_equal(const struct miniflow *a, const struct miniflow *b); bool miniflow_equal_in_minimask(const struct miniflow *a, @@ -363,11 +365,36 @@ void minimask_expand(const struct minimask *, struct flow_wildcards *); uint32_t minimask_get(const struct minimask *, unsigned int u32_ofs); uint16_t minimask_get_vid_mask(const struct minimask *); +static inline ovs_be64 minimask_get_metadata_mask(const struct minimask *); bool minimask_equal(const struct minimask *a, const struct minimask *b); uint32_t minimask_hash(const struct minimask *, uint32_t basis); bool minimask_has_extra(const struct minimask *, const struct minimask *); bool minimask_is_catchall(const struct minimask *); + +/* Returns the value of the OpenFlow 1.1+ "metadata" field in 'flow'. */ +static inline ovs_be64 +miniflow_get_metadata(const struct miniflow *flow) +{ + enum { MD_OFS = offsetof(struct flow, metadata) }; + BUILD_ASSERT_DECL(MD_OFS % sizeof(uint32_t) == 0); + ovs_be32 hi = (OVS_FORCE ovs_be32) miniflow_get(flow, MD_OFS / 4); + ovs_be32 lo = (OVS_FORCE ovs_be32) miniflow_get(flow, MD_OFS / 4 + 1); + + return htonll(((uint64_t) ntohl(hi) << 32) | ntohl(lo)); +} + +/* Returns the mask for the OpenFlow 1.1+ "metadata" field in 'mask'. + * + * The return value is all-1-bits if 'mask' matches on the whole value of the + * metadata field, all-0-bits if 'mask' entirely wildcards the metadata field, + * or some other value if the metadata field is partially matched, partially + * wildcarded. */ +static inline ovs_be64 +minimask_get_metadata_mask(const struct minimask *mask) +{ + return miniflow_get_metadata(&mask->masks); +} #endif /* flow.h */ diff --git a/manpages.mk b/manpages.mk index 811d2f992..2a34f04bc 100644 --- a/manpages.mk +++ b/manpages.mk @@ -116,6 +116,10 @@ lib/vconn-active.man: lib/vconn-passive.man: lib/vlog.man: +utilities/ovs-dpctl-top.8: \ + utilities/ovs-dpctl-top.8.in +utilities/ovs-dpctl-top.8.in: + utilities/ovs-dpctl.8: \ utilities/ovs-dpctl.8.in \ lib/common.man \ @@ -124,10 +128,6 @@ utilities/ovs-dpctl.8.in: lib/common.man: lib/vlog.man: -utilities/ovs-dpctl-top.8: \ - utilities/ovs-dpctl-top.8.in -utilities/ovs-dpctl-top.8.in: - utilities/ovs-l3ping.8: \ utilities/ovs-l3ping.8.in \ lib/common-syn.man \ |