summaryrefslogtreecommitdiff
path: root/ovn
diff options
context:
space:
mode:
authorLorenzo Bianconi <lorenzo.bianconi@redhat.com>2018-10-18 16:05:56 +0200
committerBen Pfaff <blp@ovn.org>2018-10-18 08:33:16 -0700
commitd7abfe39cfd234227bb6174b7f959a16dc803b83 (patch)
treec1eab6cce6efcb103b28bdfa0765ed11b09ff7a7 /ovn
parentc9c8c8a1502a4fa255a920ca37e0d1d3603eecb0 (diff)
downloadopenvswitch-d7abfe39cfd234227bb6174b7f959a16dc803b83.tar.gz
OVN: add buffering support for ip packets
Add buffering support for IPv4/IPv6 packets that will be processed by arp{}/nd_ns{} action when L2 address is not discovered yet since otherwise the packet will be substituted with an ARP/Neighbor Solicitation frame and this will result in the lost of the first packet of the connection. Moreover fix following automatic tests broken by ip-buffering support since now original ip packets are transmitted by OVN logical router: - ovn -- 3 HVs, 3 LS, 3 lports/LS, 1 LR - ovn -- /32 router IP address Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
Diffstat (limited to 'ovn')
-rw-r--r--ovn/controller/pinctrl.c256
1 files changed, 223 insertions, 33 deletions
diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
index a20f6b6ef..e3ee516e4 100644
--- a/ovn/controller/pinctrl.c
+++ b/ovn/controller/pinctrl.c
@@ -61,6 +61,9 @@ static struct rconn *swconn;
* rconn_get_connection_seqno(rconn), 'swconn' has reconnected. */
static unsigned int conn_seq_no;
+static void init_buffered_packets_map(void);
+static void destroy_buffered_packets_map(void);
+
static void pinctrl_handle_put_mac_binding(const struct flow *md,
const struct flow *headers,
bool is_arp);
@@ -96,6 +99,7 @@ static void pinctrl_handle_put_nd_ra_opts(
struct ofputil_packet_in *pin, struct ofpbuf *userdata,
struct ofpbuf *continuation);
static void pinctrl_handle_nd_ns(const struct flow *ip_flow,
+ struct dp_packet *pkt_in,
const struct match *md,
struct ofpbuf *userdata);
static void init_ipv6_ras(void);
@@ -108,6 +112,7 @@ static void send_ipv6_ras(
;
COVERAGE_DEFINE(pinctrl_drop_put_mac_binding);
+COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map);
void
pinctrl_init(void)
@@ -117,6 +122,7 @@ pinctrl_init(void)
init_put_mac_bindings();
init_send_garps();
init_ipv6_ras();
+ init_buffered_packets_map();
}
static ovs_be32
@@ -190,9 +196,180 @@ set_actions_and_enqueue_msg(const struct dp_packet *packet,
ofpbuf_uninit(&ofpacts);
}
+struct buffer_info {
+ struct ofpbuf ofpacts;
+ struct dp_packet *p;
+};
+
+#define BUFFER_QUEUE_DEPTH 4
+struct buffered_packets {
+ struct hmap_node hmap_node;
+
+ /* key */
+ struct in6_addr ip;
+
+ long long int timestamp;
+
+ struct buffer_info data[BUFFER_QUEUE_DEPTH];
+ uint32_t head, tail;
+};
+
+static struct hmap buffered_packets_map;
+
+static void
+init_buffered_packets_map(void)
+{
+ hmap_init(&buffered_packets_map);
+}
+
+static void
+destroy_buffered_packets(struct buffered_packets *bp)
+{
+ struct buffer_info *bi;
+
+ while (bp->head != bp->tail) {
+ bi = &bp->data[bp->head];
+ dp_packet_uninit(bi->p);
+ ofpbuf_uninit(&bi->ofpacts);
+
+ bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
+ }
+ hmap_remove(&buffered_packets_map, &bp->hmap_node);
+ free(bp);
+}
+
+static void
+destroy_buffered_packets_map(void)
+{
+ struct buffered_packets *bp;
+ HMAP_FOR_EACH_POP (bp, hmap_node, &buffered_packets_map) {
+ destroy_buffered_packets(bp);
+ }
+ hmap_destroy(&buffered_packets_map);
+}
+
+static void
+buffered_push_packet(struct buffered_packets *bp,
+ struct dp_packet *packet,
+ const struct match *md)
+{
+ uint32_t next = (bp->tail + 1) % BUFFER_QUEUE_DEPTH;
+ struct buffer_info *bi = &bp->data[bp->tail];
+
+ ofpbuf_init(&bi->ofpacts, 4096);
+
+ reload_metadata(&bi->ofpacts, md);
+ struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&bi->ofpacts);
+ resubmit->in_port = OFPP_CONTROLLER;
+ resubmit->table_id = OFTABLE_REMOTE_OUTPUT;
+
+ bi->p = packet;
+
+ if (next == bp->head) {
+ bi = &bp->data[bp->head];
+ dp_packet_uninit(bi->p);
+ ofpbuf_uninit(&bi->ofpacts);
+ bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
+ }
+ bp->tail = next;
+}
+
+static void
+buffered_send_packets(struct buffered_packets *bp, struct eth_addr *addr)
+{
+ enum ofp_version version = rconn_get_version(swconn);
+ enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
+
+ while (bp->head != bp->tail) {
+ struct buffer_info *bi = &bp->data[bp->head];
+ struct eth_header *eth = dp_packet_data(bi->p);
+
+ eth->eth_dst = *addr;
+ struct ofputil_packet_out po = {
+ .packet = dp_packet_data(bi->p),
+ .packet_len = dp_packet_size(bi->p),
+ .buffer_id = UINT32_MAX,
+ .ofpacts = bi->ofpacts.data,
+ .ofpacts_len = bi->ofpacts.size,
+ };
+ match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
+ queue_msg(ofputil_encode_packet_out(&po, proto));
+
+ ofpbuf_uninit(&bi->ofpacts);
+ dp_packet_uninit(bi->p);
+
+ bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
+ }
+}
+
+#define BUFFER_MAP_TIMEOUT 10000
static void
-pinctrl_handle_arp(const struct flow *ip_flow, const struct match *md,
- struct ofpbuf *userdata)
+buffered_packets_map_gc(void)
+{
+ struct buffered_packets *cur_qp, *next_qp;
+ long long int now = time_msec();
+
+ HMAP_FOR_EACH_SAFE (cur_qp, next_qp, hmap_node, &buffered_packets_map) {
+ if (now > cur_qp->timestamp + BUFFER_MAP_TIMEOUT) {
+ destroy_buffered_packets(cur_qp);
+ }
+ }
+}
+
+static struct buffered_packets *
+pinctrl_find_buffered_packets(const struct in6_addr *ip, uint32_t hash)
+{
+ struct buffered_packets *qp;
+
+ HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash,
+ &buffered_packets_map) {
+ if (IN6_ARE_ADDR_EQUAL(&qp->ip, ip)) {
+ return qp;
+ }
+ }
+ return NULL;
+}
+
+static int
+pinctrl_handle_buffered_packets(const struct flow *ip_flow,
+ struct dp_packet *pkt_in,
+ const struct match *md, bool is_arp)
+{
+ struct buffered_packets *bp;
+ struct dp_packet *clone;
+ struct in6_addr addr;
+
+ if (is_arp) {
+ addr = in6_addr_mapped_ipv4(ip_flow->nw_dst);
+ } else {
+ addr = ip_flow->ipv6_dst;
+ }
+
+ uint32_t hash = hash_bytes(&addr, sizeof addr, 0);
+ bp = pinctrl_find_buffered_packets(&addr, hash);
+ if (!bp) {
+ if (hmap_count(&buffered_packets_map) >= 1000) {
+ COVERAGE_INC(pinctrl_drop_buffered_packets_map);
+ return -ENOMEM;
+ }
+
+ bp = xmalloc(sizeof *bp);
+ hmap_insert(&buffered_packets_map, &bp->hmap_node, hash);
+ bp->head = bp->tail = 0;
+ bp->ip = addr;
+ }
+ bp->timestamp = time_msec();
+ /* clone the packet to send it later with correct L2 address */
+ clone = dp_packet_clone_data(dp_packet_data(pkt_in),
+ dp_packet_size(pkt_in));
+ buffered_push_packet(bp, clone, md);
+
+ return 0;
+}
+
+static void
+pinctrl_handle_arp(const struct flow *ip_flow, struct dp_packet *pkt_in,
+ const struct match *md, struct ofpbuf *userdata)
{
/* This action only works for IP packets, and the switch should only send
* us IP packets this way, but check here just to be sure. */
@@ -203,6 +380,8 @@ pinctrl_handle_arp(const struct flow *ip_flow, const struct match *md,
return;
}
+ pinctrl_handle_buffered_packets(ip_flow, pkt_in, md, true);
+
/* Compose an ARP packet. */
uint64_t packet_stub[128 / 8];
struct dp_packet packet;
@@ -1162,7 +1341,7 @@ process_packet_in(const struct ofp_header *msg,
switch (ntohl(ah->opcode)) {
case ACTION_OPCODE_ARP:
- pinctrl_handle_arp(&headers, &pin.flow_metadata, &userdata);
+ pinctrl_handle_arp(&headers, &packet, &pin.flow_metadata, &userdata);
break;
case ACTION_OPCODE_PUT_ARP:
@@ -1207,7 +1386,8 @@ process_packet_in(const struct ofp_header *msg,
break;
case ACTION_OPCODE_ND_NS:
- pinctrl_handle_nd_ns(&headers, &pin.flow_metadata, &userdata);
+ pinctrl_handle_nd_ns(&headers, &packet, &pin.flow_metadata,
+ &userdata);
break;
case ACTION_OPCODE_ICMP:
@@ -1310,6 +1490,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
local_datapaths, active_tunnels);
send_ipv6_ras(sbrec_port_binding_by_datapath,
sbrec_port_binding_by_name, local_datapaths);
+ buffered_packets_map_gc();
}
/* Table of ipv6_ra_state structures, keyed on logical port name */
@@ -1620,6 +1801,7 @@ pinctrl_destroy(void)
destroy_put_mac_bindings();
destroy_send_garps();
destroy_ipv6_ras();
+ destroy_buffered_packets_map();
}
/* Implementation of the "put_arp" and "put_nd" OVN actions. These
@@ -1642,7 +1824,7 @@ struct put_mac_binding {
/* Key. */
uint32_t dp_key;
uint32_t port_key;
- char ip_s[INET6_ADDRSTRLEN + 1];
+ struct in6_addr ip_key;
/* Value. */
struct eth_addr mac;
@@ -1666,13 +1848,13 @@ destroy_put_mac_bindings(void)
static struct put_mac_binding *
pinctrl_find_put_mac_binding(uint32_t dp_key, uint32_t port_key,
- const char *ip_s, uint32_t hash)
+ const struct in6_addr *ip_key, uint32_t hash)
{
struct put_mac_binding *pa;
HMAP_FOR_EACH_WITH_HASH (pa, hmap_node, hash, &put_mac_bindings) {
if (pa->dp_key == dp_key
&& pa->port_key == port_key
- && !strcmp(pa->ip_s, ip_s)) {
+ && IN6_ARE_ADDR_EQUAL(&pa->ip_key, ip_key)) {
return pa;
}
}
@@ -1685,18 +1867,19 @@ pinctrl_handle_put_mac_binding(const struct flow *md,
{
uint32_t dp_key = ntohll(md->metadata);
uint32_t port_key = md->regs[MFF_LOG_INPORT - MFF_REG0];
- char ip_s[INET6_ADDRSTRLEN];
+ struct buffered_packets *bp;
+ struct in6_addr ip_key;
if (is_arp) {
- ovs_be32 ip = htonl(md->regs[0]);
- inet_ntop(AF_INET, &ip, ip_s, sizeof(ip_s));
+ ip_key = in6_addr_mapped_ipv4(htonl(md->regs[0]));
} else {
ovs_be128 ip6 = hton128(flow_get_xxreg(md, 0));
- inet_ntop(AF_INET6, &ip6, ip_s, sizeof(ip_s));
+ memcpy(&ip_key, &ip6, sizeof ip_key);
}
- uint32_t hash = hash_string(ip_s, hash_2words(dp_key, port_key));
+ uint32_t hash = hash_bytes(&ip_key, sizeof ip_key,
+ hash_2words(dp_key, port_key));
struct put_mac_binding *pmb
- = pinctrl_find_put_mac_binding(dp_key, port_key, ip_s, hash);
+ = pinctrl_find_put_mac_binding(dp_key, port_key, &ip_key, hash);
if (!pmb) {
if (hmap_count(&put_mac_bindings) >= 1000) {
COVERAGE_INC(pinctrl_drop_put_mac_binding);
@@ -1707,10 +1890,17 @@ pinctrl_handle_put_mac_binding(const struct flow *md,
hmap_insert(&put_mac_bindings, &pmb->hmap_node, hash);
pmb->dp_key = dp_key;
pmb->port_key = port_key;
- ovs_strlcpy_arrays(pmb->ip_s, ip_s);
+ pmb->ip_key = ip_key;
}
pmb->timestamp = time_msec();
pmb->mac = headers->dl_src;
+
+ /* send queued pkts */
+ uint32_t bhash = hash_bytes(&ip_key, sizeof ip_key, 0);
+ bp = pinctrl_find_buffered_packets(&ip_key, bhash);
+ if (bp) {
+ buffered_send_packets(bp, &pmb->mac);
+ }
}
static const struct sbrec_mac_binding *
@@ -1760,25 +1950,23 @@ run_put_mac_binding(struct ovsdb_idl_txn *ovnsb_idl_txn,
snprintf(mac_string, sizeof mac_string,
ETH_ADDR_FMT, ETH_ADDR_ARGS(pmb->mac));
- /* Check for and update an existing IP-MAC binding for this logical
- * port.
- */
+ struct ds ip_s = DS_EMPTY_INITIALIZER;
+ ipv6_format_mapped(&pmb->ip_key, &ip_s);
+
+ /* Update or add an IP-MAC binding for this logical port. */
const struct sbrec_mac_binding *b =
mac_binding_lookup(sbrec_mac_binding_by_lport_ip, pb->logical_port,
- pmb->ip_s);
- if (b) {
- if (strcmp(b->mac, mac_string)) {
- sbrec_mac_binding_set_mac(b, mac_string);
- }
- return;
- }
-
- /* Add new IP-MAC binding for this logical port. */
- b = sbrec_mac_binding_insert(ovnsb_idl_txn);
- sbrec_mac_binding_set_logical_port(b, pb->logical_port);
- sbrec_mac_binding_set_ip(b, pmb->ip_s);
- sbrec_mac_binding_set_mac(b, mac_string);
- sbrec_mac_binding_set_datapath(b, pb->datapath);
+ ds_cstr(&ip_s));
+ if (!b) {
+ b = sbrec_mac_binding_insert(ovnsb_idl_txn);
+ sbrec_mac_binding_set_logical_port(b, pb->logical_port);
+ sbrec_mac_binding_set_ip(b, ds_cstr(&ip_s));
+ sbrec_mac_binding_set_mac(b, mac_string);
+ sbrec_mac_binding_set_datapath(b, pb->datapath);
+ } else if (strcmp(b->mac, mac_string)) {
+ sbrec_mac_binding_set_mac(b, mac_string);
+ }
+ ds_destroy(&ip_s);
}
static void
@@ -2409,8 +2597,8 @@ pinctrl_handle_nd_na(const struct flow *ip_flow, const struct match *md,
}
static void
-pinctrl_handle_nd_ns(const struct flow *ip_flow, const struct match *md,
- struct ofpbuf *userdata)
+pinctrl_handle_nd_ns(const struct flow *ip_flow, struct dp_packet *pkt_in,
+ const struct match *md, struct ofpbuf *userdata)
{
/* This action only works for IPv6 packets. */
if (get_dl_type(ip_flow) != htons(ETH_TYPE_IPV6)) {
@@ -2419,6 +2607,8 @@ pinctrl_handle_nd_ns(const struct flow *ip_flow, const struct match *md,
return;
}
+ pinctrl_handle_buffered_packets(ip_flow, pkt_in, md, false);
+
uint64_t packet_stub[128 / 8];
struct dp_packet packet;
dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);