summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/openvswitch.h29
-rw-r--r--lib/dpif-netdev.c20
-rw-r--r--lib/dpif.c3
-rw-r--r--lib/odp-execute.c9
-rw-r--r--lib/odp-execute.h2
-rw-r--r--lib/odp-util.c98
-rw-r--r--lib/packets.h9
-rw-r--r--ofproto/ofproto-dpif.h58
8 files changed, 218 insertions, 10 deletions
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index e39e4377b..d9282d675 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -307,11 +307,13 @@ enum ovs_key_attr {
OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */
OVS_KEY_ATTR_SCTP, /* struct ovs_key_sctp */
OVS_KEY_ATTR_TCP_FLAGS, /* be16 TCP flags. */
-
#ifdef __KERNEL__
OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */
#endif
+ OVS_KEY_ATTR_DP_HASH = 20, /* u32 hash value */
+ OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */
+
OVS_KEY_ATTR_MPLS = 62, /* array of struct ovs_key_mpls.
* The implementation may restrict
* the accepted length of the array. */
@@ -532,6 +534,29 @@ struct ovs_action_push_vlan {
__be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */
};
+/* Data path hash algorithm for computing Datapath hash.
+ *
+ * The Algorithm type only specifies the fields in a flow
+ * will be used as part of the hash. Each datapath is free
+ * to use its own hash algorithm. The hash value will be
+ * opaque to the user space daemon.
+ */
+enum ovs_recirc_hash_alg {
+ OVS_RECIRC_HASH_ALG_NONE,
+ OVS_RECIRC_HASH_ALG_L4,
+};
+/*
+ * struct ovs_action_recirc - %OVS_ACTION_ATTR_RECIRC action argument.
+ * @recirc_id: The Recirculation label, Zero is invalid.
+ * @hash_alg: Algorithm used to compute hash prior to recirculation.
+ * @hash_bias: bias used for computing hash. used to compute hash prior to recirculation.
+ */
+struct ovs_action_recirc {
+ uint32_t hash_alg; /* One of ovs_dp_hash_alg. */
+ uint32_t hash_bias;
+ uint32_t recirc_id; /* Recirculation label. */
+};
+
/**
* enum ovs_action_attr - Action types.
*
@@ -555,6 +580,7 @@ struct ovs_action_push_vlan {
* indicate the new packet contents. This could potentially still be
* %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there
* is no MPLS label stack, as determined by ethertype, no action is taken.
+ * @OVS_ACTION_RECIRC: Recirculate within the data path.
*
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -571,6 +597,7 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */
OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */
OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */
+ OVS_ACTION_ATTR_RECIRC, /* struct ovs_action_recirc. */
__OVS_ACTION_ATTR_MAX
};
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 4d0462907..8687a4725 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2082,7 +2082,7 @@ struct dp_netdev_execute_aux {
static void
dp_execute_cb(void *aux_, struct ofpbuf *packet,
- const struct pkt_metadata *md OVS_UNUSED,
+ struct pkt_metadata *md,
const struct nlattr *a, bool may_steal)
OVS_NO_THREAD_SAFETY_ANALYSIS
{
@@ -2114,6 +2114,24 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
}
break;
}
+
+ case OVS_ACTION_ATTR_RECIRC: {
+ const struct ovs_action_recirc *act;
+ act = nl_attr_get(a);
+ md->recirc_id =act->recirc_id;
+ md->dp_hash = 0;
+
+ if (act->hash_alg == OVS_RECIRC_HASH_ALG_L4) {
+ struct flow flow;
+
+ flow_extract(packet, md, &flow);
+ md->dp_hash = flow_hash_symmetric_l4(&flow, act->hash_bias);
+ }
+
+ dp_netdev_port_input(aux->dp, packet, md);
+ break;
+ }
+
case OVS_ACTION_ATTR_PUSH_VLAN:
case OVS_ACTION_ATTR_POP_VLAN:
case OVS_ACTION_ATTR_PUSH_MPLS:
diff --git a/lib/dpif.c b/lib/dpif.c
index dbf1c101d..664917663 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1108,7 +1108,7 @@ struct dpif_execute_helper_aux {
* meaningful. */
static void
dpif_execute_helper_cb(void *aux_, struct ofpbuf *packet,
- const struct pkt_metadata *md,
+ struct pkt_metadata *md,
const struct nlattr *action, bool may_steal OVS_UNUSED)
{
struct dpif_execute_helper_aux *aux = aux_;
@@ -1133,6 +1133,7 @@ dpif_execute_helper_cb(void *aux_, struct ofpbuf *packet,
case OVS_ACTION_ATTR_SET:
case OVS_ACTION_ATTR_SAMPLE:
case OVS_ACTION_ATTR_UNSPEC:
+ case OVS_ACTION_ATTR_RECIRC:
case __OVS_ACTION_ATTR_MAX:
OVS_NOT_REACHED();
}
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index cf33eb779..6e04816a5 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -125,6 +125,14 @@ odp_execute_set_action(struct ofpbuf *packet, const struct nlattr *a,
set_arp(packet, nl_attr_get_unspec(a, sizeof(struct ovs_key_arp)));
break;
+ case OVS_KEY_ATTR_DP_HASH:
+ md->dp_hash = nl_attr_get_u32(a);
+ break;
+
+ case OVS_KEY_ATTR_RECIRC_ID:
+ md->recirc_id = nl_attr_get_u32(a);
+ break;
+
case OVS_KEY_ATTR_UNSPEC:
case OVS_KEY_ATTR_ENCAP:
case OVS_KEY_ATTR_ETHERTYPE:
@@ -197,6 +205,7 @@ odp_execute_actions__(void *dp, struct ofpbuf *packet, bool steal,
/* These only make sense in the context of a datapath. */
case OVS_ACTION_ATTR_OUTPUT:
case OVS_ACTION_ATTR_USERSPACE:
+ case OVS_ACTION_ATTR_RECIRC:
if (dp_execute_action) {
bool may_steal;
/* Allow 'dp_execute_action' to steal the packet data if we do
diff --git a/lib/odp-execute.h b/lib/odp-execute.h
index 6f1b9bd77..91f0c5183 100644
--- a/lib/odp-execute.h
+++ b/lib/odp-execute.h
@@ -28,7 +28,7 @@ struct ofpbuf;
struct pkt_metadata;
typedef void (*odp_execute_cb)(void *dp, struct ofpbuf *packet,
- const struct pkt_metadata *,
+ struct pkt_metadata *,
const struct nlattr *action, bool may_steal);
/* Actions that need to be executed in the context of a datapath are handed
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 7c6aad4f3..956fef11b 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -79,6 +79,7 @@ odp_action_len(uint16_t type)
case OVS_ACTION_ATTR_POP_VLAN: return 0;
case OVS_ACTION_ATTR_PUSH_MPLS: return sizeof(struct ovs_action_push_mpls);
case OVS_ACTION_ATTR_POP_MPLS: return sizeof(ovs_be16);
+ case OVS_ACTION_ATTR_RECIRC: return sizeof(struct ovs_action_recirc);
case OVS_ACTION_ATTR_SET: return -2;
case OVS_ACTION_ATTR_SAMPLE: return -2;
@@ -118,6 +119,8 @@ ovs_key_attr_to_string(enum ovs_key_attr attr, char *namebuf, size_t bufsize)
case OVS_KEY_ATTR_ARP: return "arp";
case OVS_KEY_ATTR_ND: return "nd";
case OVS_KEY_ATTR_MPLS: return "mpls";
+ case OVS_KEY_ATTR_DP_HASH: return "dp_hash";
+ case OVS_KEY_ATTR_RECIRC_ID: return "recirc_id";
case __OVS_KEY_ATTR_MAX:
default:
@@ -384,6 +387,19 @@ format_mpls(struct ds *ds, const struct ovs_key_mpls *mpls_key,
}
static void
+format_odp_recirc_action(struct ds *ds,
+ const struct ovs_action_recirc *act)
+{
+ ds_put_format(ds, "recirc(");
+
+ if (act->hash_alg == OVS_RECIRC_HASH_ALG_L4) {
+ ds_put_format(ds, "hash_l4(%"PRIu32"), ", act->hash_bias);
+ }
+
+ ds_put_format(ds, "%"PRIu32")", act->recirc_id);
+}
+
+static void
format_odp_action(struct ds *ds, const struct nlattr *a)
{
int expected_len;
@@ -405,6 +421,9 @@ format_odp_action(struct ds *ds, const struct nlattr *a)
case OVS_ACTION_ATTR_USERSPACE:
format_odp_userspace_action(ds, a);
break;
+ case OVS_ACTION_ATTR_RECIRC:
+ format_odp_recirc_action(ds, nl_attr_get(a));
+ break;
case OVS_ACTION_ATTR_SET:
ds_put_cstr(ds, "set(");
format_odp_key_attr(nl_attr_get(a), NULL, NULL, ds, true);
@@ -730,6 +749,8 @@ odp_flow_key_attr_len(uint16_t type)
case OVS_KEY_ATTR_ENCAP: return -2;
case OVS_KEY_ATTR_PRIORITY: return 4;
case OVS_KEY_ATTR_SKB_MARK: return 4;
+ case OVS_KEY_ATTR_DP_HASH: return 4;
+ case OVS_KEY_ATTR_RECIRC_ID: return 4;
case OVS_KEY_ATTR_TUNNEL: return -2;
case OVS_KEY_ATTR_IN_PORT: return 4;
case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet);
@@ -1025,6 +1046,8 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma,
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_SKB_MARK:
+ case OVS_KEY_ATTR_DP_HASH:
+ case OVS_KEY_ATTR_RECIRC_ID:
ds_put_format(ds, "%#"PRIx32, nl_attr_get_u32(a));
if (!is_exact) {
ds_put_format(ds, "/%#"PRIx32, nl_attr_get_u32(ma));
@@ -1386,7 +1409,6 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma,
}
break;
}
-
case OVS_KEY_ATTR_UNSPEC:
case __OVS_KEY_ATTR_MAX:
default:
@@ -1619,6 +1641,36 @@ parse_odp_key_mask_attr(const char *s, const struct simap *port_names,
}
{
+ uint32_t recirc_id;
+ int n = -1;
+
+ if (ovs_scan(s, "recirc_id(%"SCNi32")%n", &recirc_id, &n)) {
+ nl_msg_put_u32(key, OVS_KEY_ATTR_RECIRC_ID, recirc_id);
+ nl_msg_put_u32(mask, OVS_KEY_ATTR_RECIRC_ID, UINT32_MAX);
+ return n;
+ }
+ }
+
+ {
+ uint32_t dp_hash;
+ uint32_t dp_hash_mask;
+ int n = -1;
+
+ if (mask && ovs_scan(s, "dp_hash(%"SCNi32"/%"SCNi32")%n", &dp_hash,
+ &dp_hash_mask, &n)) {
+ nl_msg_put_u32(key, OVS_KEY_ATTR_DP_HASH, dp_hash);
+ nl_msg_put_u32(mask, OVS_KEY_ATTR_DP_HASH, dp_hash_mask);
+ return n;
+ } else if (ovs_scan(s, "dp_hash(%"SCNi32")%n", &dp_hash, &n)) {
+ nl_msg_put_u32(key, OVS_KEY_ATTR_DP_HASH, dp_hash);
+ if (mask) {
+ nl_msg_put_u32(mask, OVS_KEY_ATTR_DP_HASH, UINT32_MAX);
+ }
+ return n;
+ }
+ }
+
+ {
uint64_t tun_id, tun_id_mask;
struct flow_tnl tun_key, tun_key_mask;
int n = -1;
@@ -2438,6 +2490,14 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data,
nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, data->pkt_mark);
+ if (flow->recirc_id) {
+ nl_msg_put_u32(buf, OVS_KEY_ATTR_RECIRC_ID, data->recirc_id);
+ }
+
+ if (flow->dp_hash) {
+ nl_msg_put_u32(buf, OVS_KEY_ATTR_DP_HASH, data->dp_hash);
+ }
+
/* Add an ingress port attribute if this is a mask or 'odp_in_port'
* is not the magical value "ODPP_NONE". */
if (is_mask || odp_in_port != ODPP_NONE) {
@@ -2673,13 +2733,24 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len,
continue;
}
- if (type == OVS_KEY_ATTR_PRIORITY) {
+ switch (type) {
+ case OVS_KEY_ATTR_RECIRC_ID:
+ md->recirc_id = nl_attr_get_u32(nla);
+ wanted_attrs &= ~(1u << OVS_KEY_ATTR_RECIRC_ID);
+ break;
+ case OVS_KEY_ATTR_DP_HASH:
+ md->dp_hash = nl_attr_get_u32(nla);
+ wanted_attrs &= ~(1u << OVS_KEY_ATTR_DP_HASH);
+ break;
+ case OVS_KEY_ATTR_PRIORITY:
md->skb_priority = nl_attr_get_u32(nla);
wanted_attrs &= ~(1u << OVS_KEY_ATTR_PRIORITY);
- } else if (type == OVS_KEY_ATTR_SKB_MARK) {
+ break;
+ case OVS_KEY_ATTR_SKB_MARK:
md->pkt_mark = nl_attr_get_u32(nla);
wanted_attrs &= ~(1u << OVS_KEY_ATTR_SKB_MARK);
- } else if (type == OVS_KEY_ATTR_TUNNEL) {
+ break;
+ case OVS_KEY_ATTR_TUNNEL: {
enum odp_key_fitness res;
res = odp_tun_key_from_attr(nla, &md->tunnel);
@@ -2688,9 +2759,14 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len,
} else if (res == ODP_FIT_PERFECT) {
wanted_attrs &= ~(1u << OVS_KEY_ATTR_TUNNEL);
}
- } else if (type == OVS_KEY_ATTR_IN_PORT) {
+ break;
+ }
+ case OVS_KEY_ATTR_IN_PORT:
md->in_port.odp_port = nl_attr_get_odp_port(nla);
wanted_attrs &= ~(1u << OVS_KEY_ATTR_IN_PORT);
+ break;
+ default:
+ break;
}
if (!wanted_attrs) {
@@ -3226,6 +3302,18 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len,
expected_attrs = 0;
/* Metadata. */
+ if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_RECIRC_ID)) {
+ flow->recirc_id = nl_attr_get_u32(attrs[OVS_KEY_ATTR_RECIRC_ID]);
+ expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_RECIRC_ID;
+ } else if (is_mask) {
+ /* Always exact match recirc_id when datapath does not sepcify it. */
+ flow->recirc_id = UINT32_MAX;
+ }
+
+ if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_DP_HASH)) {
+ flow->dp_hash = nl_attr_get_u32(attrs[OVS_KEY_ATTR_DP_HASH]);
+ expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_DP_HASH;
+ }
if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_PRIORITY)) {
flow->skb_priority = nl_attr_get_u32(attrs[OVS_KEY_ATTR_PRIORITY]);
expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_PRIORITY;
diff --git a/lib/packets.h b/lib/packets.h
index f6a4f43b6..30e4d13f3 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -33,6 +33,11 @@ struct ds;
/* Datapath packet metadata */
struct pkt_metadata {
+ uint32_t recirc_id; /* Recirculation id carried with the
+ recirculating packets. 0 for packets
+ received from the wire. */
+ uint32_t dp_hash; /* hash value computed by the recirculation
+ action. */
struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */
uint32_t skb_priority; /* Packet priority for QoS. */
uint32_t pkt_mark; /* Packet mark. */
@@ -40,13 +45,15 @@ struct pkt_metadata {
};
#define PKT_METADATA_INITIALIZER(PORT) \
- (struct pkt_metadata){ { 0, 0, 0, 0, 0, 0}, 0, 0, {(PORT)} }
+ (struct pkt_metadata){ 0, 0, { 0, 0, 0, 0, 0, 0}, 0, 0, {(PORT)} }
static inline struct pkt_metadata
pkt_metadata_from_flow(const struct flow *flow)
{
struct pkt_metadata md;
+ md.recirc_id = flow->recirc_id;
+ md.dp_hash = flow->dp_hash;
md.tunnel = flow->tunnel;
md.skb_priority = flow->skb_priority;
md.pkt_mark = flow->pkt_mark;
diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
index 6fbc6726b..93e6ec055 100644
--- a/ofproto/ofproto-dpif.h
+++ b/ofproto/ofproto-dpif.h
@@ -135,6 +135,64 @@ void ofproto_dpif_flow_mod(struct ofproto_dpif *, struct ofputil_flow_mod *);
struct ofport_dpif *odp_port_to_ofport(const struct dpif_backer *, odp_port_t);
+/*
+ * Recirculation
+ * =============
+ *
+ * Recirculation is a technique to allow a frame to re-enter the packet processing
+ * path for one or multiple times to achieve more flexible packet processing in the
+ * data path. MPLS handling and selecting bond slave port of a bond ports.
+ *
+ * Data path and user space interface
+ * -----------------------------------
+ *
+ * Two new fields, recirc_id and dp_hash, are added to the current flow data structure.
+ * They are both both of type uint32_t. In addition, a new action, RECIRC, are added.
+ *
+ * The value recirc_id is used to distinguish a packet from multiple iterations of
+ * recirculation. A packet initially received is considered of having recirc_id of 0.
+ * Recirc_id is managed by the user space, opaque to the data path.
+ *
+ * On the other hand, dp_hash can only be computed by the data path, opaque to
+ * the user space. In fact, user space may not able to recompute the hash value.
+ * The dp_hash value should be wildcarded when for a newly received packet.
+ * RECIRC action specifies whether the hash is computed. If computed, how many
+ * fields to be included in the hash computation. The computed hash value is
+ * stored into the dp_hash field prior to recirculation.
+ *
+ * The RECIRC action computes and set the dp_hash field, set the recirc_id field
+ * and then reprocess the packet as if it was received on the same input port.
+ * RECIRC action works like a function call; actions listed behind the RECIRC
+ * action will be executed after its execution. RECIRC action can be nested,
+ * data path implementation limits the number of recirculation executed
+ * to prevent unreasonable nesting depth or infinite loop.
+ *
+ * Both flow fields and the RECIRC action are exposed as open flow fields via
+ * Nicira extensions.
+ *
+ * Post recirculation flow
+ * ------------------------
+ *
+ * At the open flow level, post recirculation rules are always hidden from the
+ * controller. They are installed in table 254 which is set up as a hidden table
+ * during boot time. Those rules are managed by the local user space program only.
+ *
+ * To speed up the classifier look up process, recirc_id is always reflected into
+ * the metadata field, since recirc_id is required to be exactly matched.
+ *
+ * Classifier look up always starts with table 254. A post recirculation flow
+ * lookup should find its hidden rule within this table. On the other hand, A
+ * newly received packet should miss all post recirculation rules because its
+ * recirc_id is zero, then hit a pre-installed lower priority rule to redirect
+ * classifier to look up starting from table 0:
+ *
+ * * , actions=resubmit(,0)
+ *
+ * Post recirculation data path flows are managed like other data path flows.
+ * They are created on demand. Miss handling, stats collection and revalidation
+ * work the same way as regular flows.
+ */
+
uint32_t ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto);
void ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id);
#endif /* ofproto-dpif.h */