From aab96ec4d81e001f94d3ed65e0e352788ab94c0f Mon Sep 17 00:00:00 2001 From: Yuanhan Liu Date: Mon, 25 Jun 2018 16:21:05 +0300 Subject: dpif-netdev: retrieve flow directly from the flow mark So that we could skip some very costly CPU operations, including but not limiting to miniflow_extract, emc lookup, dpcls lookup, etc. Thus, performance could be greatly improved. A PHY-PHY forwarding with 1000 mega flows (udp,tp_src=1000-1999) and 1 million streams (tp_src=1000-1999, tp_dst=2000-2999) show more that 260% performance boost. Note that though the heavy miniflow_extract is skipped, we still have to do per packet checking, due to we have to check the tcp_flags. Co-authored-by: Finn Christensen Signed-off-by: Yuanhan Liu Signed-off-by: Finn Christensen Co-authored-by: Shahaf Shuler Signed-off-by: Shahaf Shuler Signed-off-by: Ian Stokes --- lib/dp-packet.h | 13 +++++++++++ lib/dpif-netdev.c | 46 +++++++++++++++++++++++++++++++++----- lib/flow.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/flow.h | 1 + 4 files changed, 121 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 596cfe691..ba91e5891 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -691,6 +691,19 @@ reset_dp_packet_checksum_ol_flags(struct dp_packet *p) #define reset_dp_packet_checksum_ol_flags(arg) #endif +static inline bool +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED, + uint32_t *mark OVS_UNUSED) +{ +#ifdef DPDK_NETDEV + if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) { + *mark = p->mbuf.hash.fdir.hi; + return true; + } +#endif + return false; +} + enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */ struct dp_packet_batch { diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index e11106e75..cb5934f8d 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2119,6 +2119,23 @@ flow_mark_flush(struct dp_netdev_pmd_thread *pmd) } } +static struct dp_netdev_flow * +mark_to_flow_find(const struct dp_netdev_pmd_thread *pmd, + const uint32_t mark) +{ + struct dp_netdev_flow *flow; + + CMAP_FOR_EACH_WITH_HASH (flow, mark_node, hash_int(mark, 0), + &flow_mark.mark_to_flow) { + if (flow->mark == mark && flow->pmd_id == pmd->core_id && + flow->dead == false) { + return flow; + } + } + + return NULL; +} + static void dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd, struct dp_netdev_flow *flow) @@ -5366,10 +5383,10 @@ struct packet_batch_per_flow { static inline void packet_batch_per_flow_update(struct packet_batch_per_flow *batch, struct dp_packet *packet, - const struct miniflow *mf) + uint16_t tcp_flags) { batch->byte_count += dp_packet_size(packet); - batch->tcp_flags |= miniflow_get_tcp_flags(mf); + batch->tcp_flags |= tcp_flags; batch->array.packets[batch->array.count++] = packet; } @@ -5403,7 +5420,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch, static inline void dp_netdev_queue_batches(struct dp_packet *pkt, - struct dp_netdev_flow *flow, const struct miniflow *mf, + struct dp_netdev_flow *flow, uint16_t tcp_flags, struct packet_batch_per_flow *batches, size_t *n_batches) { @@ -5414,7 +5431,7 @@ dp_netdev_queue_batches(struct dp_packet *pkt, packet_batch_per_flow_init(batch, flow); } - packet_batch_per_flow_update(batch, pkt, mf); + packet_batch_per_flow_update(batch, pkt, tcp_flags); } /* Try to process all ('cnt') the 'packets' using only the exact match cache @@ -5445,6 +5462,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, const size_t cnt = dp_packet_batch_size(packets_); uint32_t cur_min; int i; + uint16_t tcp_flags; atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min); pmd_perf_update_counter(&pmd->perf_stats, @@ -5453,6 +5471,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, DP_PACKET_BATCH_REFILL_FOR_EACH (i, cnt, packet, packets_) { struct dp_netdev_flow *flow; + uint32_t mark; if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) { dp_packet_delete(packet); @@ -5470,6 +5489,18 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, if (!md_is_valid) { pkt_metadata_init(&packet->md, port_no); } + + if ((*recirc_depth_get() == 0) && + dp_packet_has_flow_mark(packet, &mark)) { + flow = mark_to_flow_find(pmd, mark); + if (flow) { + tcp_flags = parse_tcp_flags(packet); + dp_netdev_queue_batches(packet, flow, tcp_flags, batches, + n_batches); + continue; + } + } + miniflow_extract(packet, &key->mf); key->len = 0; /* Not computed yet. */ /* If EMC is disabled skip hash computation and emc_lookup */ @@ -5485,7 +5516,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, flow = NULL; } if (OVS_LIKELY(flow)) { - dp_netdev_queue_batches(packet, flow, &key->mf, batches, + tcp_flags = miniflow_get_tcp_flags(&key->mf); + dp_netdev_queue_batches(packet, flow, tcp_flags, batches, n_batches); } else { /* Exact match cache missed. Group missed packets together at @@ -5672,7 +5704,9 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, flow = dp_netdev_flow_cast(rules[i]); emc_probabilistic_insert(pmd, &keys[i], flow); - dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches); + dp_netdev_queue_batches(packet, flow, + miniflow_get_tcp_flags(&keys[i].mf), + batches, n_batches); } pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_HIT, diff --git a/lib/flow.c b/lib/flow.c index cb8b2df19..a785e63a8 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -1019,6 +1019,73 @@ parse_dl_type(const struct eth_header *data_, size_t size) return parse_ethertype(&data, &size); } +uint16_t +parse_tcp_flags(struct dp_packet *packet) +{ + const void *data = dp_packet_data(packet); + const char *frame = (const char *)data; + size_t size = dp_packet_size(packet); + ovs_be16 dl_type; + uint8_t nw_frag = 0, nw_proto = 0; + + if (packet->packet_type != htonl(PT_ETH)) { + return 0; + } + + dp_packet_reset_offsets(packet); + + data_pull(&data, &size, ETH_ADDR_LEN * 2); + dl_type = parse_ethertype(&data, &size); + if (OVS_UNLIKELY(eth_type_mpls(dl_type))) { + packet->l2_5_ofs = (char *)data - frame; + } + if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) { + const struct ip_header *nh = data; + int ip_len; + uint16_t tot_len; + + if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len, &tot_len))) { + return 0; + } + dp_packet_set_l2_pad_size(packet, size - tot_len); + packet->l3_ofs = (uint16_t)((char *)nh - frame); + nw_proto = nh->ip_proto; + nw_frag = ipv4_get_nw_frag(nh); + + size = tot_len; /* Never pull padding. */ + data_pull(&data, &size, ip_len); + } else if (dl_type == htons(ETH_TYPE_IPV6)) { + const struct ovs_16aligned_ip6_hdr *nh = data; + uint16_t plen; + + if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) { + return 0; + } + packet->l3_ofs = (uint16_t)((char *)nh - frame); + data_pull(&data, &size, sizeof *nh); + + plen = ntohs(nh->ip6_plen); /* Never pull padding. */ + dp_packet_set_l2_pad_size(packet, size - plen); + size = plen; + if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag)) { + return 0; + } + nw_proto = nh->ip6_nxt; + } else { + return 0; + } + + packet->l4_ofs = (uint16_t)((char *)data - frame); + if (!(nw_frag & FLOW_NW_FRAG_LATER) && nw_proto == IPPROTO_TCP && + size >= TCP_HEADER_LEN) { + const struct tcp_header *tcp = data; + + return TCP_FLAGS(tcp->tcp_ctl); + } + + return 0; +} + /* For every bit of a field that is wildcarded in 'wildcards', sets the * corresponding bit in 'flow' to zero. */ void diff --git a/lib/flow.h b/lib/flow.h index 5b6585f11..af7b5e921 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -133,6 +133,7 @@ bool parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto, uint8_t *nw_frag); ovs_be16 parse_dl_type(const struct eth_header *data_, size_t size); bool parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh *key); +uint16_t parse_tcp_flags(struct dp_packet *packet); static inline uint64_t flow_get_xreg(const struct flow *flow, int idx) -- cgit v1.2.1