diff options
Diffstat (limited to 'ofproto')
-rw-r--r-- | ofproto/ofproto-dpif-xlate.c | 59 | ||||
-rw-r--r-- | ofproto/ofproto-dpif.c | 150 | ||||
-rw-r--r-- | ofproto/ofproto-dpif.h | 13 |
3 files changed, 200 insertions, 22 deletions
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 9f7fca7b9..c990d8a67 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -4392,27 +4392,37 @@ pick_hash_fields_select_group(struct xlate_ctx *ctx, struct group_dpif *group) static struct ofputil_bucket * pick_dp_hash_select_group(struct xlate_ctx *ctx, struct group_dpif *group) { + uint32_t dp_hash = ctx->xin->flow.dp_hash; + /* dp_hash value 0 is special since it means that the dp_hash has not been * computed, as all computed dp_hash values are non-zero. Therefore * compare to zero can be used to decide if the dp_hash value is valid * without masking the dp_hash field. */ - if (!ctx->xin->flow.dp_hash) { - uint64_t param = group->up.props.selection_method_param; - - ctx_trigger_recirculate_with_hash(ctx, param >> 32, (uint32_t)param); + if (!dp_hash) { + enum ovs_hash_alg hash_alg = group->hash_alg; + if (hash_alg > ctx->xbridge->support.max_hash_alg) { + /* Algorithm supported by all datapaths. */ + hash_alg = OVS_HASH_ALG_L4; + } + ctx_trigger_recirculate_with_hash(ctx, hash_alg, group->hash_basis); return NULL; } else { - uint32_t n_buckets = group->up.n_buckets; - if (n_buckets) { - /* Minimal mask to cover the number of buckets. */ - uint32_t mask = (1 << log_2_ceil(n_buckets)) - 1; - /* Multiplier chosen to make the trivial 1 bit case to - * actually distribute amongst two equal weight buckets. */ - uint32_t basis = 0xc2b73583 * (ctx->xin->flow.dp_hash & mask); - - ctx->wc->masks.dp_hash |= mask; - return group_best_live_bucket(ctx, group, basis); + uint32_t hash_mask = group->hash_mask; + ctx->wc->masks.dp_hash |= hash_mask; + + /* Starting from the original masked dp_hash value iterate over the + * hash mapping table to find the first live bucket. As the buckets + * are quasi-randomly spread over the hash values, this maintains + * a distribution according to bucket weights even when some buckets + * are non-live. */ + for (int i = 0; i <= hash_mask; i++) { + struct ofputil_bucket *b = + group->hash_map[(dp_hash + i) & hash_mask]; + if (bucket_is_alive(ctx, b, 0)) { + return b; + } } + return NULL; } } @@ -4427,17 +4437,22 @@ pick_select_group(struct xlate_ctx *ctx, struct group_dpif *group) ctx_trigger_freeze(ctx); } - const char *selection_method = group->up.props.selection_method; - if (selection_method[0] == '\0') { + switch (group->selection_method) { + case SEL_METHOD_DEFAULT: return pick_default_select_group(ctx, group); - } else if (!strcasecmp("hash", selection_method)) { + break; + case SEL_METHOD_HASH: return pick_hash_fields_select_group(ctx, group); - } else if (!strcasecmp("dp_hash", selection_method)) { + break; + case SEL_METHOD_DP_HASH: return pick_dp_hash_select_group(ctx, group); - } else { - /* Parsing of groups should ensure this never happens */ + break; + default: + /* Parsing of groups ensures this never happens */ OVS_NOT_REACHED(); } + + return NULL; } static void @@ -4731,8 +4746,8 @@ finish_freezing__(struct xlate_ctx *ctx, uint8_t table) act_hash = nl_msg_put_unspec_uninit(ctx->odp_actions, OVS_ACTION_ATTR_HASH, sizeof *act_hash); - act_hash->hash_alg = OVS_HASH_ALG_L4; /* Make configurable. */ - act_hash->hash_basis = 0; /* Make configurable. */ + act_hash->hash_alg = ctx->dp_hash_alg; + act_hash->hash_basis = ctx->dp_hash_basis; } nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC, recirc_id); } diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 716281175..c9c2e5176 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -32,6 +32,7 @@ #include "lacp.h" #include "learn.h" #include "mac-learning.h" +#include "math.h" #include "mcast-snooping.h" #include "multipath.h" #include "netdev-vport.h" @@ -4762,6 +4763,147 @@ group_dpif_credit_stats(struct group_dpif *group, ovs_mutex_unlock(&group->stats_mutex); } +/* Calculate the dp_hash mask needed to provide the least weighted bucket + * with at least one hash value and construct a mapping table from masked + * dp_hash value to group bucket using the Webster method. + * If the caller specifies a non-zero max_hash value, abort and return false + * if more hash values would be required. The absolute maximum number of + * hash values supported is 256. */ + +#define MAX_SELECT_GROUP_HASH_VALUES 256 + +static bool +group_setup_dp_hash_table(struct group_dpif *group, size_t max_hash) +{ + struct ofputil_bucket *bucket; + uint32_t n_buckets = group->up.n_buckets; + uint64_t total_weight = 0; + uint16_t min_weight = UINT16_MAX; + struct webster { + struct ofputil_bucket *bucket; + uint32_t divisor; + double value; + int hits; + } *webster; + + if (n_buckets == 0) { + VLOG_DBG(" Don't apply dp_hash method without buckets"); + return false; + } + + webster = xcalloc(n_buckets, sizeof(struct webster)); + int i = 0; + LIST_FOR_EACH (bucket, list_node, &group->up.buckets) { + if (bucket->weight > 0 && bucket->weight < min_weight) { + min_weight = bucket->weight; + } + total_weight += bucket->weight; + webster[i].bucket = bucket; + webster[i].divisor = 1; + webster[i].value = bucket->weight; + webster[i].hits = 0; + i++; + } + + if (total_weight == 0) { + VLOG_DBG(" Total weight is zero. No active buckets."); + free(webster); + return false; + } + VLOG_DBG(" Minimum weight: %d, total weight: %"PRIu64, + min_weight, total_weight); + + uint64_t min_slots = DIV_ROUND_UP(total_weight, min_weight); + uint64_t min_slots2 = ROUND_UP_POW2(min_slots); + uint64_t n_hash = MAX(16, min_slots2); + if (n_hash > MAX_SELECT_GROUP_HASH_VALUES || + (max_hash != 0 && n_hash > max_hash)) { + VLOG_DBG(" Too many hash values required: %"PRIu64, n_hash); + return false; + } + + VLOG_DBG(" Using %"PRIu64" hash values:", n_hash); + group->hash_mask = n_hash - 1; + if (group->hash_map) { + free(group->hash_map); + } + group->hash_map = xcalloc(n_hash, sizeof(struct ofputil_bucket *)); + + /* Use Webster method to distribute hash values over buckets. */ + for (int hash = 0; hash < n_hash; hash++) { + struct webster *winner = &webster[0]; + for (i = 1; i < n_buckets; i++) { + if (webster[i].value > winner->value) { + winner = &webster[i]; + } + } + winner->hits++; + winner->divisor += 2; + winner->value = (double) winner->bucket->weight / winner->divisor; + group->hash_map[hash] = winner->bucket; + } + + i = 0; + LIST_FOR_EACH (bucket, list_node, &group->up.buckets) { + double target = (n_hash * bucket->weight) / (double) total_weight; + VLOG_DBG(" Bucket %d: weight=%d, target=%.2f hits=%d", + bucket->bucket_id, bucket->weight, + target, webster[i].hits); + i++; + } + + free(webster); + return true; +} + +static void +group_set_selection_method(struct group_dpif *group) +{ + const struct ofputil_group_props *props = &group->up.props; + const char *selection_method = props->selection_method; + + if (selection_method[0] == '\0') { + VLOG_DBG("No selection method specified."); + group->selection_method = SEL_METHOD_DEFAULT; + } else if (!strcmp(selection_method, "dp_hash")) { + VLOG_DBG("Selection method specified: dp_hash."); + /* Try to use dp_hash if possible at all. */ + if (group_setup_dp_hash_table(group, 0)) { + group->selection_method = SEL_METHOD_DP_HASH; + group->hash_alg = props->selection_method_param >> 32; + if (group->hash_alg >= __OVS_HASH_MAX) { + VLOG_DBG(" Invalid dp_hash algorithm %d. " + "Defaulting to OVS_HASH_ALG_L4", group->hash_alg); + group->hash_alg = OVS_HASH_ALG_L4; + } + group->hash_basis = (uint32_t) props->selection_method_param; + VLOG_DBG("Use dp_hash with %d hash values using algorithm %d.", + group->hash_mask + 1, group->hash_alg); + } else { + /* Fall back to original default hashing in slow path. */ + VLOG_DBG(" Falling back to default hash method."); + group->selection_method = SEL_METHOD_DEFAULT; + } + } else if (!strcmp(selection_method, "hash")) { + VLOG_DBG("Selection method specified: hash."); + if (props->fields.values_size > 0) { + /* Controller has specified hash fields. */ + struct ds s = DS_EMPTY_INITIALIZER; + oxm_format_field_array(&s, &props->fields); + VLOG_DBG(" Hash fields: %s", ds_cstr(&s)); + ds_destroy(&s); + group->selection_method = SEL_METHOD_HASH; + } else { + /* No hash fields. Fall back to original default hashing. */ + VLOG_DBG(" No hash fields. Falling back to default hash method."); + group->selection_method = SEL_METHOD_DEFAULT; + } + } else { + /* Parsing of groups should ensure this never happens */ + OVS_NOT_REACHED(); + } +} + static enum ofperr group_construct(struct ofgroup *group_) { @@ -4770,6 +4912,10 @@ group_construct(struct ofgroup *group_) ovs_mutex_init_adaptive(&group->stats_mutex); ovs_mutex_lock(&group->stats_mutex); group_construct_stats(group); + group->hash_map = NULL; + if (group->up.type == OFPGT11_SELECT) { + group_set_selection_method(group); + } ovs_mutex_unlock(&group->stats_mutex); return 0; } @@ -4779,6 +4925,10 @@ group_destruct(struct ofgroup *group_) { struct group_dpif *group = group_dpif_cast(group_); ovs_mutex_destroy(&group->stats_mutex); + if (group->hash_map) { + free(group->hash_map); + group->hash_map = NULL; + } } static enum ofperr diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h index d654947e6..e95feadff 100644 --- a/ofproto/ofproto-dpif.h +++ b/ofproto/ofproto-dpif.h @@ -119,6 +119,12 @@ rule_dpif_is_internal(const struct rule_dpif *rule) /* Groups. */ +enum group_selection_method { + SEL_METHOD_DEFAULT, + SEL_METHOD_DP_HASH, + SEL_METHOD_HASH, +}; + struct group_dpif { struct ofgroup up; @@ -129,6 +135,12 @@ struct group_dpif { struct ovs_mutex stats_mutex; uint64_t packet_count OVS_GUARDED; /* Number of packets received. */ uint64_t byte_count OVS_GUARDED; /* Number of bytes received. */ + + enum group_selection_method selection_method; + enum ovs_hash_alg hash_alg; /* dp_hash algorithm to be applied. */ + uint32_t hash_basis; /* Basis for dp_hash. */ + uint32_t hash_mask; /* Used to mask dp_hash (2^N - 1).*/ + struct ofputil_bucket **hash_map; /* Map hash values to buckets. */ }; void group_dpif_credit_stats(struct group_dpif *, @@ -137,6 +149,7 @@ void group_dpif_credit_stats(struct group_dpif *, struct group_dpif *group_dpif_lookup(struct ofproto_dpif *, uint32_t group_id, ovs_version_t version, bool take_ref); + /* Backers. * |