summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDarrell Ball <dlu998@gmail.com>2019-08-26 09:05:44 -0700
committerBen Pfaff <blp@ovn.org>2019-09-25 08:58:11 -0700
commit594570ea1cdecc7ef7880d707cbc7a4a4ecef09f (patch)
tree702eec3e00d443f043f62f5723514b7602b10fe0 /lib
parentba5ca284098fdf92ca851405c8fab2ef1a4fac39 (diff)
downloadopenvswitch-594570ea1cdecc7ef7880d707cbc7a4a4ecef09f.tar.gz
conntrack: Optimize recirculations.
Cache the 'conn' context and use it when it is valid. The cached 'conn' context will get reset if it is not expected to be valid; the cost to do this is negligible. Besides being most optimal, this also handles corner cases, such as decapsulation leading to the same tuple, as in tunnel VPN cases. A negative test is added to check the resetting of the cached 'conn'. Signed-off-by: Darrell Ball <dlu998@gmail.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
Diffstat (limited to 'lib')
-rw-r--r--lib/conntrack.c60
-rw-r--r--lib/netdev.c1
-rw-r--r--lib/packets.c9
-rw-r--r--lib/packets.h11
4 files changed, 76 insertions, 5 deletions
diff --git a/lib/conntrack.c b/lib/conntrack.c
index b56ef06ac..ad7583cbe 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -1089,6 +1089,46 @@ conn_update_state_alg(struct conntrack *ct, struct dp_packet *pkt,
}
static void
+set_cached_conn(const struct nat_action_info_t *nat_action_info,
+ const struct conn_lookup_ctx *ctx, struct conn *conn,
+ struct dp_packet *pkt)
+{
+ if (OVS_LIKELY(!nat_action_info)) {
+ pkt->md.conn = conn;
+ pkt->md.reply = ctx->reply;
+ pkt->md.icmp_related = ctx->icmp_related;
+ } else {
+ pkt->md.conn = NULL;
+ }
+}
+
+static void
+process_one_fast(uint16_t zone, const uint32_t *setmark,
+ const struct ovs_key_ct_labels *setlabel,
+ const struct nat_action_info_t *nat_action_info,
+ struct conn *conn, struct dp_packet *pkt)
+{
+ if (nat_action_info) {
+ handle_nat(pkt, conn, zone, pkt->md.reply, pkt->md.icmp_related);
+ pkt->md.conn = NULL;
+ }
+
+ pkt->md.ct_zone = zone;
+ ovs_mutex_lock(&conn->lock);
+ pkt->md.ct_mark = conn->mark;
+ pkt->md.ct_label = conn->label;
+ ovs_mutex_unlock(&conn->lock);
+
+ if (setmark) {
+ set_mark(pkt, conn, setmark[0], setmark[1]);
+ }
+
+ if (setlabel) {
+ set_label(pkt, conn, &setlabel[0], &setlabel[1]);
+ }
+}
+
+static void
process_one(struct conntrack *ct, struct dp_packet *pkt,
struct conn_lookup_ctx *ctx, uint16_t zone,
bool force, bool commit, long long now, const uint32_t *setmark,
@@ -1188,6 +1228,8 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
}
handle_alg_ctl(ct, ctx, pkt, ct_alg_ctl, conn, now, !!nat_action_info);
+
+ set_cached_conn(nat_action_info, ctx, conn, pkt);
}
/* Sends the packets in '*pkt_batch' through the connection tracker 'ct'. All
@@ -1215,14 +1257,21 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
struct conn_lookup_ctx ctx;
DP_PACKET_BATCH_FOR_EACH (i, packet, pkt_batch) {
- if (packet->md.ct_state == CS_INVALID
- || !conn_key_extract(ct, packet, dl_type, &ctx, zone)) {
+ struct conn *conn = packet->md.conn;
+ if (OVS_UNLIKELY(packet->md.ct_state == CS_INVALID)) {
+ write_ct_md(packet, zone, NULL, NULL, NULL);
+ } else if (conn && conn->key.zone == zone && !force
+ && !get_alg_ctl_type(packet, tp_src, tp_dst, helper)) {
+ process_one_fast(zone, setmark, setlabel, nat_action_info,
+ conn, packet);
+ } else if (OVS_UNLIKELY(!conn_key_extract(ct, packet, dl_type, &ctx,
+ zone))) {
packet->md.ct_state = CS_INVALID;
write_ct_md(packet, zone, NULL, NULL, NULL);
- continue;
+ } else {
+ process_one(ct, packet, &ctx, zone, force, commit, now, setmark,
+ setlabel, nat_action_info, tp_src, tp_dst, helper);
}
- process_one(ct, packet, &ctx, zone, force, commit, now, setmark,
- setlabel, nat_action_info, tp_src, tp_dst, helper);
}
ipf_postprocess_conntrack(ct->ipf, pkt_batch, now, dl_type);
@@ -1236,6 +1285,7 @@ conntrack_clear(struct dp_packet *packet)
/* According to pkt_metadata_init(), ct_state == 0 is enough to make all of
* the conntrack fields invalid. */
packet->md.ct_state = 0;
+ pkt_metadata_init_conn(&packet->md);
}
static void
diff --git a/lib/netdev.c b/lib/netdev.c
index b1976d365..af8f8560d 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -837,6 +837,7 @@ netdev_pop_header(struct netdev *netdev, struct dp_packet_batch *batch)
* interpretation in the further packet processing when
* recirculated.*/
dp_packet_reset_offload(packet);
+ pkt_metadata_init_conn(&packet->md);
dp_packet_batch_refill(batch, packet, i);
}
}
diff --git a/lib/packets.c b/lib/packets.c
index 12053df57..fba69eed2 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -391,6 +391,8 @@ push_mpls(struct dp_packet *packet, ovs_be16 ethtype, ovs_be32 lse)
header = dp_packet_resize_l2_5(packet, MPLS_HLEN);
memmove(header, header + MPLS_HLEN, len);
memcpy(header + len, &lse, sizeof lse);
+
+ pkt_metadata_init_conn(&packet->md);
}
/* If 'packet' is an MPLS packet, removes its outermost MPLS label stack entry.
@@ -983,6 +985,8 @@ packet_set_ipv4_addr(struct dp_packet *packet,
ovs_be32 old_addr = get_16aligned_be32(addr);
size_t l4_size = dp_packet_l4_size(packet);
+ pkt_metadata_init_conn(&packet->md);
+
if (nh->ip_proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) {
struct tcp_header *th = dp_packet_l4(packet);
@@ -1122,6 +1126,7 @@ packet_set_ipv6_addr(struct dp_packet *packet, uint8_t proto,
packet_update_csum128(packet, proto, addr, new_addr);
}
memcpy(addr, new_addr, sizeof(ovs_be32[4]));
+ pkt_metadata_init_conn(&packet->md);
}
static void
@@ -1223,6 +1228,7 @@ packet_set_tcp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
packet_set_port(&th->tcp_src, src, &th->tcp_csum);
packet_set_port(&th->tcp_dst, dst, &th->tcp_csum);
+ pkt_metadata_init_conn(&packet->md);
}
/* Sets the UDP source and destination port ('src' and 'dst' respectively) of
@@ -1244,6 +1250,7 @@ packet_set_udp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
uh->udp_src = src;
uh->udp_dst = dst;
}
+ pkt_metadata_init_conn(&packet->md);
}
/* Sets the SCTP source and destination port ('src' and 'dst' respectively) of
@@ -1265,6 +1272,7 @@ packet_set_sctp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
new_csum = crc32c((void *)sh, tp_len);
put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum ^ new_csum);
+ pkt_metadata_init_conn(&packet->md);
}
/* Sets the ICMP type and code of the ICMP header contained in 'packet'.
@@ -1283,6 +1291,7 @@ packet_set_icmp(struct dp_packet *packet, uint8_t type, uint8_t code)
ih->icmp_csum = recalc_csum16(ih->icmp_csum, orig_tc, new_tc);
}
+ pkt_metadata_init_conn(&packet->md);
}
/* Sets the IGMP type to IGMP_HOST_MEMBERSHIP_QUERY and populates the
diff --git a/lib/packets.h b/lib/packets.h
index c44009892..c78defb89 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -35,6 +35,7 @@
#include "timeval.h"
struct dp_packet;
+struct conn;
struct ds;
/* Purely internal to OVS userspace. These flags should never be exposed to
@@ -108,6 +109,9 @@ PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
uint32_t ct_mark; /* Connection mark. */
ovs_u128 ct_label; /* Connection label. */
union flow_in_port in_port; /* Input port. */
+ struct conn *conn; /* Cached conntrack connection. */
+ bool reply; /* True if reply direction. */
+ bool icmp_related; /* True if ICMP related. */
);
PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline1,
@@ -140,6 +144,12 @@ pkt_metadata_init_tnl(struct pkt_metadata *md)
}
static inline void
+pkt_metadata_init_conn(struct pkt_metadata *md)
+{
+ md->conn = NULL;
+}
+
+static inline void
pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
{
/* This is called for every packet in userspace datapath and affects
@@ -157,6 +167,7 @@ pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
md->tunnel.ip_dst = 0;
md->tunnel.ipv6_dst = in6addr_any;
md->in_port.odp_port = port;
+ md->conn = NULL;
}
/* This function prefetches the cachelines touched by pkt_metadata_init()