summaryrefslogtreecommitdiff
path: root/ovn
diff options
context:
space:
mode:
authorvenu iyer <venugopali@nvidia.com>2019-01-14 17:30:43 -0800
committerBen Pfaff <blp@ovn.org>2019-02-22 13:12:09 -0800
commitb520ca7c7e72af0c653f497b67f2e41c0af53409 (patch)
treebe143b4bbb079a039afb54726afd32793f6dbdfc /ovn
parent8bdf4e674c95497c5f82e613d6388ff4ed569f55 (diff)
downloadopenvswitch-b520ca7c7e72af0c653f497b67f2e41c0af53409.tar.gz
Support for multiple VTEP in OVN
OVN uses tunnels to achieve logical network connectivity. The tunnel IP to be used when communicating with a node is configured using an external_ids field called "ovn-encap-ip" (and "ovn-encap-type" to indicate the type of tunnel - geneve, vxlan, stt). The fact that "ovn-encap-ip" is a single IP is significantly limiting when used in certain scenarios. Primarly, if we have multiple NICs on a system and want to assign SR-IOV VFs from different NICs to a guest (as logical ports), then we'll still end up using the "ovn-encap-ip" to encapsulate traffic from different VFs. This means we'll end up using only one NIC on the physical, thereby not maintaining the VF-PF association while also not using all the physical NICs. It is possible to bond all the NICs and use the bond IP as the "encap-ip", but bonding multiple NICs has its own limitations, i.e. NICs supporting OVS flows offload don't work with bonding - this severly undermines SR-IOV use with OVS (i.e. if all the processing needs to be done in the host despite giving VFs to guests). +-------------------------------------------------------+ +-------------------------~ |Hypervisor I (chassis-ID = HV1) | |Hypervisor II | | | |+----------------------+ | | || guest | | | || | | | |+-------|----------|---+ | | (ovn-chassis-id) | | | vf0_rep +-------+ | | +-------+ | | | encap-ip=IP1 | | | | (HV1@IP1) | | | |------------------------------| |-------------------------------| | | | | |br-int | | | |br-int | | | | vf0_rep | | | | (HV1@IP2) | | | | | encap-ip=IP2 | |-------------------------------| | | | |-------------------| | | | | +-------+ | | | +-------+ | | | | |vf0 |vf0 | | | | +---------+ +---------+ | | | +---| nic1 |--| nic2 |----------------------------+ | +--------------------------~ +---------+ +---------+ V | | Tunnel Ports |pf=IP1 |pf=IP2 between Hypervisors. Note: The above uses a NIC that supports OVS with SR-IOV (e.g. Mellanox CX-5) which uses a "representor" to plug in a VF to the OVS bridge. This patch enables a list of comma separated IP addresses to be specified in "ovn-encap-ip", thus allowing the node to be reached via any IP combined with the "ovn-encap-type" - assuming physical routing allows that. Additionally, it also introduces an way to specify the encap IP to be used for a logical port (so that the VF-PF mapping is maintained when traversing the logical path over a tunnel). A new "encap-ip" external_ids can be configured on an Interface to indicate this. On the SB these changes appear as an additional column in port_bindings as "encap". The encap record for a port points to an encap record on its chassis. If the port is not explicitly associated with an encap-ip (using external_ids), the encap record is empty, which means the preferred tunnel will be used to reach the port's chassis. The intention is also to have no functional changes in the default case, i.e when there is only one "ovn-encap-ip". The changes have been tested with multiple encap-ip addresses, SR-IOV and for backwards compatibality (in the case where there is only one ovn-encap-ip) with an OVN SB that doesn't include these changes.
Diffstat (limited to 'ovn')
-rw-r--r--ovn/controller/bfd.c21
-rw-r--r--ovn/controller/binding.c46
-rw-r--r--ovn/controller/chassis.c72
-rw-r--r--ovn/controller/encaps.c56
-rw-r--r--ovn/controller/ovn-controller.h8
-rw-r--r--ovn/controller/physical.c105
-rw-r--r--ovn/controller/pinctrl.c4
-rw-r--r--ovn/ovn-sb.ovsschema8
-rw-r--r--ovn/ovn-sb.xml6
9 files changed, 282 insertions, 44 deletions
diff --git a/ovn/controller/bfd.c b/ovn/controller/bfd.c
index 94dad236e..10ab6ac37 100644
--- a/ovn/controller/bfd.c
+++ b/ovn/controller/bfd.c
@@ -73,7 +73,14 @@ bfd_calculate_active_tunnels(const struct ovsrec_bridge *br_int,
const char *id = smap_get(&port_rec->external_ids,
"ovn-chassis-id");
if (id) {
- sset_add(active_tunnels, id);
+ char *chassis_name;
+ char *save_ptr = NULL;
+ char *tokstr = xstrdup(id);
+ chassis_name = strtok_r(tokstr, OVN_MVTEP_CHASSISID_DELIM, &save_ptr);
+ if (chassis_name && !sset_contains(active_tunnels, chassis_name)) {
+ sset_add(active_tunnels, chassis_name);
+ }
+ free(tokstr);
}
}
}
@@ -264,14 +271,20 @@ bfd_run(struct ovsdb_idl_index *sbrec_chassis_by_name,
struct sset tunnels = SSET_INITIALIZER(&tunnels);
struct sset bfd_ifaces = SSET_INITIALIZER(&bfd_ifaces);
for (size_t k = 0; k < br_int->n_ports; k++) {
- const char *chassis_id = smap_get(&br_int->ports[k]->external_ids,
+ const char *tunnel_id = smap_get(&br_int->ports[k]->external_ids,
"ovn-chassis-id");
- if (chassis_id) {
+ if (tunnel_id) {
+ char *chassis_name;
+ char *save_ptr = NULL;
+ char *tokstr = xstrdup(tunnel_id);
char *port_name = br_int->ports[k]->name;
+
sset_add(&tunnels, port_name);
- if (sset_contains(&bfd_chassis, chassis_id)) {
+ chassis_name = strtok_r(tokstr, OVN_MVTEP_CHASSISID_DELIM, &save_ptr);
+ if (chassis_name && sset_contains(&bfd_chassis, chassis_name)) {
sset_add(&bfd_ifaces, port_name);
}
+ free(tokstr);
}
}
diff --git a/ovn/controller/binding.c b/ovn/controller/binding.c
index 021ecddcf..74ba12743 100644
--- a/ovn/controller/binding.c
+++ b/ovn/controller/binding.c
@@ -391,6 +391,42 @@ update_local_lport_ids(struct sset *local_lport_ids,
sset_add(local_lport_ids, buf);
}
+/*
+ * Get the encap from the chassis for this port. The interface
+ * may have an external_ids:encap-ip=<encap-ip> set; if so we
+ * get the corresponding encap from the chassis.
+ * If "encap-ip" external-ids is not set, we'll not bind the port
+ * to any specific encap rec. and we'll pick up a tunnel port based on
+ * the chassis name alone for the port.
+ */
+static struct sbrec_encap *
+sbrec_get_port_encap(const struct sbrec_chassis *chassis_rec,
+ const struct ovsrec_interface *iface_rec)
+{
+
+ if (!iface_rec) {
+ return NULL;
+ }
+
+ const char *encap_ip = smap_get(&iface_rec->external_ids, "encap-ip");
+ if (!encap_ip) {
+ return NULL;
+ }
+
+ struct sbrec_encap *best_encap = NULL;
+ uint32_t best_type = 0;
+ for (int i = 0; i < chassis_rec->n_encaps; i++) {
+ if (!strcmp(chassis_rec->encaps[i]->ip, encap_ip)) {
+ uint32_t tun_type = get_tunnel_type(chassis_rec->encaps[i]->type);
+ if (tun_type > best_type) {
+ best_type = tun_type;
+ best_encap = chassis_rec->encaps[i];
+ }
+ }
+ }
+ return best_encap;
+}
+
static void
consider_local_datapath(struct ovsdb_idl_txn *ovnsb_idl_txn,
struct ovsdb_idl_txn *ovs_idl_txn,
@@ -505,9 +541,17 @@ consider_local_datapath(struct ovsdb_idl_txn *ovnsb_idl_txn,
}
sbrec_port_binding_set_chassis(binding_rec, chassis_rec);
}
+ /* Check if the port encap binding, if any, has changed */
+ struct sbrec_encap *encap_rec = sbrec_get_port_encap(
+ chassis_rec, iface_rec);
+ if (encap_rec && binding_rec->encap != encap_rec) {
+ sbrec_port_binding_set_encap(binding_rec, encap_rec);
+ }
} else if (binding_rec->chassis == chassis_rec) {
VLOG_INFO("Releasing lport %s from this chassis.",
binding_rec->logical_port);
+ if (binding_rec->encap)
+ sbrec_port_binding_set_encap(binding_rec, NULL);
sbrec_port_binding_set_chassis(binding_rec, NULL);
} else if (our_chassis) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
@@ -632,6 +676,8 @@ binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
bool any_changes = false;
SBREC_PORT_BINDING_TABLE_FOR_EACH (binding_rec, port_binding_table) {
if (binding_rec->chassis == chassis_rec) {
+ if (binding_rec->encap)
+ sbrec_port_binding_set_encap(binding_rec, NULL);
sbrec_port_binding_set_chassis(binding_rec, NULL);
any_changes = true;
}
diff --git a/ovn/controller/chassis.c b/ovn/controller/chassis.c
index 797c16c3c..3ea908d18 100644
--- a/ovn/controller/chassis.c
+++ b/ovn/controller/chassis.c
@@ -117,6 +117,15 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
}
free(tokstr);
+ tokstr = xstrdup(encap_ip);
+ save_ptr = NULL;
+ uint32_t nencap_ips = 0;
+ for (token = strtok_r(tokstr, ",", &save_ptr); token != NULL;
+ token = strtok_r(NULL, ",", &save_ptr)) {
+ nencap_ips++;
+ }
+ free(tokstr);
+
const char *hostname = smap_get_def(&cfg->external_ids, "hostname", "");
char hostname_[HOST_NAME_MAX + 1];
if (!hostname[0]) {
@@ -143,6 +152,7 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
= chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
const char *encap_csum = smap_get_def(&cfg->external_ids,
"ovn-encap-csum", "true");
+ int n_encaps = count_1bits(req_tunnels);
if (chassis_rec) {
if (strcmp(hostname, chassis_rec->hostname)) {
sbrec_chassis_set_hostname(chassis_rec, hostname);
@@ -175,11 +185,17 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
}
/* Compare desired tunnels against those currently in the database. */
+
+ /*
+ * We walk through the types and the IP's rather than check for the
+ * combination since we create a mesh; if we create specific tunnel-
+ * type combinations, then we'd need to check for the type-remote-ip
+ * pair.
+ */
uint32_t cur_tunnels = 0;
bool same = true;
for (int i = 0; i < chassis_rec->n_encaps; i++) {
cur_tunnels |= get_tunnel_type(chassis_rec->encaps[i]->type);
- same = same && !strcmp(chassis_rec->encaps[i]->ip, encap_ip);
same = same && !strcmp(
smap_get_def(&chassis_rec->encaps[i]->options, "csum", ""),
@@ -187,6 +203,29 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
}
same = same && req_tunnels == cur_tunnels;
+ same = same && chassis_rec->n_encaps == nencap_ips * n_encaps;
+ if (same) {
+ tokstr = xstrdup(encap_ip);
+ save_ptr = NULL;
+ bool found = false;
+
+ for (token = strtok_r(tokstr, ",", &save_ptr); token != NULL;
+ token = strtok_r(NULL, ",", &save_ptr)) {
+ found = false;
+ for (int i = 0; i < chassis_rec->n_encaps; i++) {
+ if (!strcmp(chassis_rec->encaps[i]->ip, token)) {
+ found = true;
+ break;
+ }
+ }
+ same = same && found;
+ if (!same) {
+ break;
+ }
+ }
+ free(tokstr);
+ }
+
if (same) {
/* Nothing changed. */
inited = true;
@@ -226,20 +265,31 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
}
ds_destroy(&iface_types);
- int n_encaps = count_1bits(req_tunnels);
- struct sbrec_encap **encaps = xmalloc(n_encaps * sizeof *encaps);
+ struct sbrec_encap **encaps =
+ xmalloc((nencap_ips * n_encaps) * sizeof *encaps);
const struct smap options = SMAP_CONST1(&options, "csum", encap_csum);
- for (int i = 0; i < n_encaps; i++) {
- const char *type = pop_tunnel_name(&req_tunnels);
+ tokstr = xstrdup(encap_ip);
+ save_ptr = NULL;
+ uint32_t save_req_tunnels = req_tunnels;
+ uint32_t tuncnt = 0;
+ for (token = strtok_r(tokstr, ",", &save_ptr); token != NULL;
+ token = strtok_r(NULL, ",", &save_ptr)) {
- encaps[i] = sbrec_encap_insert(ovnsb_idl_txn);
+ req_tunnels = save_req_tunnels;
+ for (int i = 0; i < n_encaps; i++) {
+ const char *type = pop_tunnel_name(&req_tunnels);
- sbrec_encap_set_type(encaps[i], type);
- sbrec_encap_set_ip(encaps[i], encap_ip);
- sbrec_encap_set_options(encaps[i], &options);
- sbrec_encap_set_chassis_name(encaps[i], chassis_id);
+ encaps[tuncnt] = sbrec_encap_insert(ovnsb_idl_txn);
+
+ sbrec_encap_set_type(encaps[tuncnt], type);
+ sbrec_encap_set_ip(encaps[tuncnt], token);
+ sbrec_encap_set_options(encaps[tuncnt], &options);
+ sbrec_encap_set_chassis_name(encaps[tuncnt], chassis_id);
+ tuncnt++;
+ }
}
- sbrec_chassis_set_encaps(chassis_rec, encaps, n_encaps);
+ sbrec_chassis_set_encaps(chassis_rec, encaps, tuncnt);
+ free(tokstr);
free(encaps);
inited = true;
diff --git a/ovn/controller/encaps.c b/ovn/controller/encaps.c
index 0495b25c2..610b833de 100644
--- a/ovn/controller/encaps.c
+++ b/ovn/controller/encaps.c
@@ -86,6 +86,16 @@ tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg,
smap_add(&options, "remote_ip", encap->ip);
smap_add(&options, "key", "flow");
const char *csum = smap_get(&encap->options, "csum");
+ char *tunnel_entry_id = NULL;
+
+ /*
+ * Since a chassis may have multiple encap-ip, we can't just add the
+ * chassis name as as the "ovn-chassis-id" for the port; we use the
+ * combination of the chassis_name and the encap-ip to identify
+ * a specific tunnel to the chassis.
+ */
+ tunnel_entry_id = xasprintf("%s%s%s", new_chassis_id,
+ OVN_MVTEP_CHASSISID_DELIM, encap->ip);
if (csum && (!strcmp(csum, "true") || !strcmp(csum, "false"))) {
smap_add(&options, "csum", csum);
}
@@ -100,12 +110,12 @@ tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg,
* record, the new record will supplant it and encaps_run() will delete
* it). */
struct chassis_node *chassis = shash_find_data(&tc->chassis,
- new_chassis_id);
+ tunnel_entry_id);
if (chassis
&& chassis->port->n_interfaces == 1
&& !strcmp(chassis->port->interfaces[0]->type, encap->type)
&& smap_equal(&chassis->port->interfaces[0]->options, &options)) {
- shash_find_and_delete(&tc->chassis, new_chassis_id);
+ shash_find_and_delete(&tc->chassis, tunnel_entry_id);
free(chassis);
goto exit;
}
@@ -129,7 +139,7 @@ tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg,
struct ovsrec_port *port = ovsrec_port_insert(tc->ovs_txn);
ovsrec_port_set_name(port, port_name);
ovsrec_port_set_interfaces(port, &iface, 1);
- const struct smap id = SMAP_CONST1(&id, "ovn-chassis-id", new_chassis_id);
+ const struct smap id = SMAP_CONST1(&id, "ovn-chassis-id", tunnel_entry_id);
ovsrec_port_set_external_ids(port, &id);
ovsrec_bridge_update_ports_addvalue(tc->br_int, port);
@@ -137,10 +147,11 @@ tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg,
sset_add_and_free(&tc->port_names, port_name);
exit:
+ free(tunnel_entry_id);
smap_destroy(&options);
}
-static struct sbrec_encap *
+struct sbrec_encap *
preferred_encap(const struct sbrec_chassis *chassis_rec)
{
struct sbrec_encap *best_encap = NULL;
@@ -157,6 +168,33 @@ preferred_encap(const struct sbrec_chassis *chassis_rec)
return best_encap;
}
+/*
+ * For each peer chassis, get a preferred tunnel type and create as many tunnels
+ * as there are VTEP of that type (differentiated by remote_ip) on that chassis.
+ */
+static int
+chassis_tunnel_add(const struct sbrec_chassis *chassis_rec, const struct sbrec_sb_global *sbg, struct tunnel_ctx *tc)
+{
+ struct sbrec_encap *encap = preferred_encap(chassis_rec);
+ int tuncnt = 0;
+
+ if (!encap) {
+ VLOG_INFO("chassis_tunnel_add: No supported encaps for '%s'", chassis_rec->name);
+ return tuncnt;
+ }
+
+ uint32_t pref_type = get_tunnel_type(encap->type);
+ for (int i = 0; i < chassis_rec->n_encaps; i++) {
+ uint32_t tun_type = get_tunnel_type(chassis_rec->encaps[i]->type);
+ if (tun_type != pref_type) {
+ continue;
+ }
+ tunnel_add(tc, sbg, chassis_rec->name, chassis_rec->encaps[i]);
+ tuncnt++;
+ }
+ return tuncnt;
+}
+
void
encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
const struct ovsrec_bridge_table *bridge_table,
@@ -191,6 +229,10 @@ encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
const struct ovsrec_port *port = br->ports[i];
sset_add(&tc.port_names, port->name);
+ /*
+ * note that the id here is not just the chassis name, but the
+ * combination of <chassis_name><delim><encap_ip>
+ */
const char *id = smap_get(&port->external_ids, "ovn-chassis-id");
if (id) {
if (!shash_find(&tc.chassis, id)) {
@@ -210,12 +252,10 @@ encaps_run(struct ovsdb_idl_txn *ovs_idl_txn,
SBREC_CHASSIS_TABLE_FOR_EACH (chassis_rec, chassis_table) {
if (strcmp(chassis_rec->name, chassis_id)) {
/* Create tunnels to the other chassis. */
- const struct sbrec_encap *encap = preferred_encap(chassis_rec);
- if (!encap) {
- VLOG_INFO("No supported encaps for '%s'", chassis_rec->name);
+ if (chassis_tunnel_add(chassis_rec, sbg, &tc) == 0) {
+ VLOG_INFO("Creating encap for '%s' failed", chassis_rec->name);
continue;
}
- tunnel_add(&tc, sbg, chassis_rec->name, encap);
}
}
diff --git a/ovn/controller/ovn-controller.h b/ovn/controller/ovn-controller.h
index b13b3713c..6afd7278d 100644
--- a/ovn/controller/ovn-controller.h
+++ b/ovn/controller/ovn-controller.h
@@ -69,6 +69,8 @@ struct local_datapath *get_local_datapath(const struct hmap *,
const struct ovsrec_bridge *get_bridge(const struct ovsrec_bridge_table *,
const char *br_name);
+struct sbrec_encap *preferred_encap(const struct sbrec_chassis *);
+
/* Must be a bit-field ordered from most-preferred (higher number) to
* least-preferred (lower number). */
enum chassis_tunnel_type {
@@ -79,5 +81,11 @@ enum chassis_tunnel_type {
uint32_t get_tunnel_type(const char *name);
+/*
+ * Given there could be multiple tunnels with different IPs to the same
+ * chassis we annotate the ovn-chassis-id with
+ * <chassis_name>OVN_MVTEP_CHASSISID_DELIM<IP>.
+ */
+#define OVN_MVTEP_CHASSISID_DELIM "@"
#endif /* ovn/ovn-controller.h */
diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
index ab3b02ab1..da89890ac 100644
--- a/ovn/controller/physical.c
+++ b/ovn/controller/physical.c
@@ -74,16 +74,41 @@ struct chassis_tunnel {
enum chassis_tunnel_type type;
};
+/*
+ * This function looks up the list of tunnel ports (provided by
+ * ovn-chassis-id ports) and returns the tunnel for the given chassid-id and
+ * encap-ip. The ovn-chassis-id is formed using the chassis-id and encap-ip as
+ * <chassis-id>OVN_MVTEP_CHASSISID_DELIM<encap-ip>. The list is hashed using
+ * the chassis-id. If the encap-ip is not specified, it means we'll just
+ * return a tunnel for that chassis-id, i.e. we just check for chassis-id and
+ * if there is a match, we'll return the tunnel. If encap-ip is also provided we
+ * use <chassis-id>OVN_MVTEP_CHASSISID_DELIM<encap-ip> to do a more specific
+ * lookup.
+ */
static struct chassis_tunnel *
-chassis_tunnel_find(const char *chassis_id)
+chassis_tunnel_find(const char *chassis_id, char *encap_ip)
{
- struct chassis_tunnel *tun;
+ char *chassis_tunnel_entry;
+
+ /*
+ * If the specific encap_ip is given, look for the chassisid_ip entry,
+ * else return the 1st found entry for the chassis.
+ */
+ if (encap_ip != NULL) {
+ chassis_tunnel_entry = xasprintf("%s%s%s", chassis_id,
+ OVN_MVTEP_CHASSISID_DELIM, encap_ip);
+ } else {
+ chassis_tunnel_entry = xasprintf("%s", chassis_id);
+ }
+ struct chassis_tunnel *tun = NULL;
HMAP_FOR_EACH_WITH_HASH (tun, hmap_node, hash_string(chassis_id, 0),
&tunnels) {
- if (!strcmp(tun->chassis_id, chassis_id)) {
+ if (strstr(tun->chassis_id, chassis_tunnel_entry) != NULL) {
+ free (chassis_tunnel_entry);
return tun;
}
}
+ free (chassis_tunnel_entry);
return NULL;
}
@@ -122,6 +147,26 @@ put_resubmit(uint8_t table_id, struct ofpbuf *ofpacts)
resubmit->table_id = table_id;
}
+/*
+ * For a port binding, get the corresponding ovn-chassis-id tunnel port
+ * from the associated encap.
+ */
+static struct chassis_tunnel *
+get_port_binding_tun(const struct sbrec_port_binding *binding)
+{
+ struct sbrec_encap *encap = binding->encap;
+ struct sbrec_chassis *chassis = binding->chassis;
+ struct chassis_tunnel *tun = NULL;
+
+ if (encap) {
+ tun = chassis_tunnel_find(chassis->name, encap->ip);
+ }
+ if (!tun) {
+ tun = chassis_tunnel_find(chassis->name, NULL);
+ }
+ return tun;
+}
+
static void
put_encapsulation(enum mf_field_id mff_ovn_geneve,
const struct chassis_tunnel *tun,
@@ -548,7 +593,7 @@ consider_port_binding(struct ovsdb_idl_index *sbrec_chassis_by_name,
if (!binding->chassis) {
goto out;
}
- tun = chassis_tunnel_find(binding->chassis->name);
+ tun = chassis_tunnel_find(binding->chassis->name, NULL);
if (!tun) {
goto out;
}
@@ -698,10 +743,15 @@ consider_port_binding(struct ovsdb_idl_index *sbrec_chassis_by_name,
if (!is_ha_remote) {
/* Setup encapsulation */
+ const struct chassis_tunnel *rem_tun =
+ get_port_binding_tun(binding);
+ if (!rem_tun) {
+ goto out;
+ }
put_encapsulation(mff_ovn_geneve, tun, binding->datapath,
port_key, ofpacts_p);
/* Output to tunnel. */
- ofpact_put_OUTPUT(ofpacts_p)->port = ofport;
+ ofpact_put_OUTPUT(ofpacts_p)->port = rem_tun->ofport;
} else {
struct gateway_chassis *gwc;
/* Make sure all tunnel endpoints use the same encapsulation,
@@ -709,10 +759,10 @@ consider_port_binding(struct ovsdb_idl_index *sbrec_chassis_by_name,
LIST_FOR_EACH (gwc, node, gateway_chassis) {
if (gwc->db->chassis) {
if (!tun) {
- tun = chassis_tunnel_find(gwc->db->chassis->name);
+ tun = chassis_tunnel_find(gwc->db->chassis->name, NULL);
} else {
struct chassis_tunnel *chassis_tunnel =
- chassis_tunnel_find(gwc->db->chassis->name);
+ chassis_tunnel_find(gwc->db->chassis->name, NULL);
if (chassis_tunnel &&
tun->type != chassis_tunnel->type) {
static struct vlog_rate_limit rl =
@@ -743,7 +793,7 @@ consider_port_binding(struct ovsdb_idl_index *sbrec_chassis_by_name,
LIST_FOR_EACH (gwc, node, gateway_chassis) {
if (gwc->db->chassis) {
- tun = chassis_tunnel_find(gwc->db->chassis->name);
+ tun = chassis_tunnel_find(gwc->db->chassis->name, NULL);
if (!tun) {
continue;
}
@@ -881,7 +931,7 @@ consider_mc_group(enum mf_field_id mff_ovn_geneve,
const struct chassis_tunnel *prev = NULL;
SSET_FOR_EACH (chassis_name, &remote_chassis) {
const struct chassis_tunnel *tun
- = chassis_tunnel_find(chassis_name);
+ = chassis_tunnel_find(chassis_name, NULL);
if (!tun) {
continue;
}
@@ -943,9 +993,9 @@ physical_run(struct ovsdb_idl_index *sbrec_chassis_by_name,
continue;
}
- const char *chassis_id = smap_get(&port_rec->external_ids,
+ const char *tunnel_id = smap_get(&port_rec->external_ids,
"ovn-chassis-id");
- if (chassis_id && !strcmp(chassis_id, chassis->name)) {
+ if (tunnel_id && strstr(tunnel_id, chassis->name)) {
continue;
}
@@ -977,7 +1027,7 @@ physical_run(struct ovsdb_idl_index *sbrec_chassis_by_name,
/* L2 gateway patch ports can be handled just like VIFs. */
simap_put(&new_localvif_to_ofport, l2gateway, ofport);
break;
- } else if (chassis_id) {
+ } else if (tunnel_id) {
enum chassis_tunnel_type tunnel_type;
if (!strcmp(iface_rec->type, "geneve")) {
tunnel_type = GENEVE;
@@ -992,8 +1042,28 @@ physical_run(struct ovsdb_idl_index *sbrec_chassis_by_name,
continue;
}
- simap_put(&new_tunnel_to_ofport, chassis_id, ofport);
- struct chassis_tunnel *tun = chassis_tunnel_find(chassis_id);
+ simap_put(&new_tunnel_to_ofport, tunnel_id, ofport);
+ /*
+ * We split the tunnel_id to get the chassis-id
+ * and hash the tunnel list on the chassis-id. The
+ * reason to use the chassis-id alone is because
+ * there might be cases (multicast, gateway chassis)
+ * where we need to tunnel to the chassis, but won't
+ * have the encap-ip specifically.
+ */
+ char *tokstr = xstrdup(tunnel_id);
+ char *save_ptr = NULL;
+ char *hash_id = strtok_r(tokstr, OVN_MVTEP_CHASSISID_DELIM,
+ &save_ptr);
+ char *ip = strtok_r(NULL, "", &save_ptr);
+ /*
+ * If the value has morphed into something other than
+ * chassis-id>delim>encap-ip, ignore.
+ */
+ if (!hash_id || !ip) {
+ continue;
+ }
+ struct chassis_tunnel *tun = chassis_tunnel_find(hash_id, ip);
if (tun) {
/* If the tunnel's ofport has changed, update. */
if (tun->ofport != u16_to_ofp(ofport) ||
@@ -1005,12 +1075,13 @@ physical_run(struct ovsdb_idl_index *sbrec_chassis_by_name,
} else {
tun = xmalloc(sizeof *tun);
hmap_insert(&tunnels, &tun->hmap_node,
- hash_string(chassis_id, 0));
- tun->chassis_id = xstrdup(chassis_id);
+ hash_string(hash_id, 0));
+ tun->chassis_id = xstrdup(tunnel_id);
tun->ofport = u16_to_ofp(ofport);
tun->type = tunnel_type;
physical_map_changed = true;
}
+ free(tokstr);
break;
} else {
const char *iface_id = smap_get(&iface_rec->external_ids,
@@ -1120,7 +1191,7 @@ physical_run(struct ovsdb_idl_index *sbrec_chassis_by_name,
struct match match = MATCH_CATCHALL_INITIALIZER;
if (!binding->chassis ||
- strcmp(tun->chassis_id, binding->chassis->name)) {
+ strstr(tun->chassis_id, binding->chassis->name) == NULL) {
continue;
}
diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
index 5aebbfc2c..9250a71bd 100644
--- a/ovn/controller/pinctrl.c
+++ b/ovn/controller/pinctrl.c
@@ -2259,9 +2259,9 @@ get_localnet_vifs_l3gwports(
if (!strcmp(port_rec->name, br_int->name)) {
continue;
}
- const char *chassis_id = smap_get(&port_rec->external_ids,
+ const char *tunnel_id = smap_get(&port_rec->external_ids,
"ovn-chassis-id");
- if (chassis_id && !strcmp(chassis_id, chassis->name)) {
+ if (tunnel_id && strstr(tunnel_id, chassis->name)) {
continue;
}
const char *localnet = smap_get(&port_rec->external_ids,
diff --git a/ovn/ovn-sb.ovsschema b/ovn/ovn-sb.ovsschema
index e4412c006..cc8c771a7 100644
--- a/ovn/ovn-sb.ovsschema
+++ b/ovn/ovn-sb.ovsschema
@@ -1,7 +1,7 @@
{
"name": "OVN_Southbound",
- "version": "2.0.0",
- "cksum": "3109267860 15085",
+ "version": "2.1.0",
+ "cksum": "3806083220 15332",
"tables": {
"SB_Global": {
"columns": {
@@ -168,6 +168,10 @@
"refTable": "Chassis",
"refType": "weak"},
"min": 0, "max": 1}},
+ "encap": {"type": {"key": {"type": "uuid",
+ "refTable": "Encap",
+ "refType": "weak"},
+ "min": 0, "max": 1}},
"mac": {"type": {"key": "string",
"min": 0,
"max": "unlimited"}},
diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
index 8ffef403a..4e080abff 100644
--- a/ovn/ovn-sb.xml
+++ b/ovn/ovn-sb.xml
@@ -2179,6 +2179,12 @@ tcp.flags = RST;
logical port ID.
</column>
+ <column name="encap">
+ Points to supported encapsulation configurations to transmit
+ logical dataplane packets to this chassis. Each entry is a <ref
+ table="Encap"/> record that describes the configuration.
+ </column>
+
<column name="chassis">
The meaning of this column depends on the value of the <ref column="type"/>
column. This is the meaning for each <ref column="type"/>