/* * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc. * Copyright (c) 2019 Mellanox Technologies, Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include "cmap.h" #include "dpif-netdev.h" #include "netdev-offload-provider.h" #include "netdev-provider.h" #include "openvswitch/match.h" #include "openvswitch/vlog.h" #include "packets.h" #include "uuid.h" VLOG_DEFINE_THIS_MODULE(netdev_offload_dpdk); static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(100, 5); /* Thread-safety * ============= * * Below API is NOT thread safe in following terms: * * - The caller must be sure that none of these functions will be called * simultaneously. Even for different 'netdev's. * * - The caller must be sure that 'netdev' will not be destructed/deallocated. * * - The caller must be sure that 'netdev' configuration will not be changed. * For example, simultaneous call of 'netdev_reconfigure()' for the same * 'netdev' is forbidden. * * For current implementation all above restrictions could be fulfilled by * taking the datapath 'port_mutex' in lib/dpif-netdev.c. */ /* * A mapping from ufid to dpdk rte_flow. */ static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER; struct ufid_to_rte_flow_data { struct cmap_node node; ovs_u128 ufid; struct rte_flow *rte_flow; bool actions_offloaded; struct dpif_flow_stats stats; }; /* Find rte_flow with @ufid. */ static struct ufid_to_rte_flow_data * ufid_to_rte_flow_data_find(const ovs_u128 *ufid) { size_t hash = hash_bytes(ufid, sizeof *ufid, 0); struct ufid_to_rte_flow_data *data; CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) { if (ovs_u128_equals(*ufid, data->ufid)) { return data; } } return NULL; } static inline void ufid_to_rte_flow_associate(const ovs_u128 *ufid, struct rte_flow *rte_flow, bool actions_offloaded) { size_t hash = hash_bytes(ufid, sizeof *ufid, 0); struct ufid_to_rte_flow_data *data = xzalloc(sizeof *data); struct ufid_to_rte_flow_data *data_prev; /* * We should not simply overwrite an existing rte flow. * We should have deleted it first before re-adding it. * Thus, if following assert triggers, something is wrong: * the rte_flow is not destroyed. */ data_prev = ufid_to_rte_flow_data_find(ufid); if (data_prev) { ovs_assert(data_prev->rte_flow == NULL); } data->ufid = *ufid; data->rte_flow = rte_flow; data->actions_offloaded = actions_offloaded; cmap_insert(&ufid_to_rte_flow, CONST_CAST(struct cmap_node *, &data->node), hash); } static inline void ufid_to_rte_flow_disassociate(const ovs_u128 *ufid) { size_t hash = hash_bytes(ufid, sizeof *ufid, 0); struct ufid_to_rte_flow_data *data; CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) { if (ovs_u128_equals(*ufid, data->ufid)) { cmap_remove(&ufid_to_rte_flow, CONST_CAST(struct cmap_node *, &data->node), hash); ovsrcu_postpone(free, data); return; } } VLOG_WARN("ufid "UUID_FMT" is not associated with an rte flow", UUID_ARGS((struct uuid *) ufid)); } /* * To avoid individual xrealloc calls for each new element, a 'curent_max' * is used to keep track of current allocated number of elements. Starts * by 8 and doubles on each xrealloc call. */ struct flow_patterns { struct rte_flow_item *items; int cnt; int current_max; }; struct flow_actions { struct rte_flow_action *actions; int cnt; int current_max; }; static void dump_flow_attr(struct ds *s, const struct rte_flow_attr *attr) { ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s", attr->ingress ? "ingress " : "", attr->egress ? "egress " : "", attr->priority, attr->group, attr->transfer ? "transfer " : ""); } /* Adds one pattern item 'field' with the 'mask' to dynamic string 's' using * 'testpmd command'-like format. */ #define DUMP_PATTERN_ITEM(mask, field, fmt, spec_pri, mask_pri) \ if (is_all_ones(&mask, sizeof mask)) { \ ds_put_format(s, field " is " fmt " ", spec_pri); \ } else if (!is_all_zeros(&mask, sizeof mask)) { \ ds_put_format(s, field " spec " fmt " " field " mask " fmt " ", \ spec_pri, mask_pri); \ } static void dump_flow_pattern(struct ds *s, const struct rte_flow_item *item) { if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { const struct rte_flow_item_eth *eth_spec = item->spec; const struct rte_flow_item_eth *eth_mask = item->mask; ds_put_cstr(s, "eth "); if (eth_spec) { if (!eth_mask) { eth_mask = &rte_flow_item_eth_mask; } DUMP_PATTERN_ITEM(eth_mask->src, "src", ETH_ADDR_FMT, ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes), ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes)); DUMP_PATTERN_ITEM(eth_mask->dst, "dst", ETH_ADDR_FMT, ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes), ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes)); DUMP_PATTERN_ITEM(eth_mask->type, "type", "0x%04"PRIx16, ntohs(eth_spec->type), ntohs(eth_mask->type)); } ds_put_cstr(s, "/ "); } else if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { const struct rte_flow_item_vlan *vlan_spec = item->spec; const struct rte_flow_item_vlan *vlan_mask = item->mask; ds_put_cstr(s, "vlan "); if (vlan_spec) { if (!vlan_mask) { vlan_mask = &rte_flow_item_vlan_mask; } DUMP_PATTERN_ITEM(vlan_mask->inner_type, "inner_type", "0x%"PRIx16, ntohs(vlan_spec->inner_type), ntohs(vlan_mask->inner_type)); DUMP_PATTERN_ITEM(vlan_mask->tci, "tci", "0x%"PRIx16, ntohs(vlan_spec->tci), ntohs(vlan_mask->tci)); } ds_put_cstr(s, "/ "); } else if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) { const struct rte_flow_item_ipv4 *ipv4_spec = item->spec; const struct rte_flow_item_ipv4 *ipv4_mask = item->mask; ds_put_cstr(s, "ipv4 "); if (ipv4_spec) { if (!ipv4_mask) { ipv4_mask = &rte_flow_item_ipv4_mask; } DUMP_PATTERN_ITEM(ipv4_mask->hdr.src_addr, "src", IP_FMT, IP_ARGS(ipv4_spec->hdr.src_addr), IP_ARGS(ipv4_mask->hdr.src_addr)); DUMP_PATTERN_ITEM(ipv4_mask->hdr.dst_addr, "dst", IP_FMT, IP_ARGS(ipv4_spec->hdr.dst_addr), IP_ARGS(ipv4_mask->hdr.dst_addr)); DUMP_PATTERN_ITEM(ipv4_mask->hdr.next_proto_id, "proto", "0x%"PRIx8, ipv4_spec->hdr.next_proto_id, ipv4_mask->hdr.next_proto_id); DUMP_PATTERN_ITEM(ipv4_mask->hdr.type_of_service, "tos", "0x%"PRIx8, ipv4_spec->hdr.type_of_service, ipv4_mask->hdr.type_of_service); DUMP_PATTERN_ITEM(ipv4_mask->hdr.time_to_live, "ttl", "0x%"PRIx8, ipv4_spec->hdr.time_to_live, ipv4_mask->hdr.time_to_live); } ds_put_cstr(s, "/ "); } else if (item->type == RTE_FLOW_ITEM_TYPE_UDP) { const struct rte_flow_item_udp *udp_spec = item->spec; const struct rte_flow_item_udp *udp_mask = item->mask; ds_put_cstr(s, "udp "); if (udp_spec) { if (!udp_mask) { udp_mask = &rte_flow_item_udp_mask; } DUMP_PATTERN_ITEM(udp_mask->hdr.src_port, "src", "%"PRIu16, ntohs(udp_spec->hdr.src_port), ntohs(udp_mask->hdr.src_port)); DUMP_PATTERN_ITEM(udp_mask->hdr.dst_port, "dst", "%"PRIu16, ntohs(udp_spec->hdr.dst_port), ntohs(udp_mask->hdr.dst_port)); } ds_put_cstr(s, "/ "); } else if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) { const struct rte_flow_item_sctp *sctp_spec = item->spec; const struct rte_flow_item_sctp *sctp_mask = item->mask; ds_put_cstr(s, "sctp "); if (sctp_spec) { if (!sctp_mask) { sctp_mask = &rte_flow_item_sctp_mask; } DUMP_PATTERN_ITEM(sctp_mask->hdr.src_port, "src", "%"PRIu16, ntohs(sctp_spec->hdr.src_port), ntohs(sctp_mask->hdr.src_port)); DUMP_PATTERN_ITEM(sctp_mask->hdr.dst_port, "dst", "%"PRIu16, ntohs(sctp_spec->hdr.dst_port), ntohs(sctp_mask->hdr.dst_port)); } ds_put_cstr(s, "/ "); } else if (item->type == RTE_FLOW_ITEM_TYPE_ICMP) { const struct rte_flow_item_icmp *icmp_spec = item->spec; const struct rte_flow_item_icmp *icmp_mask = item->mask; ds_put_cstr(s, "icmp "); if (icmp_spec) { if (!icmp_mask) { icmp_mask = &rte_flow_item_icmp_mask; } DUMP_PATTERN_ITEM(icmp_mask->hdr.icmp_type, "icmp_type", "%"PRIu8, icmp_spec->hdr.icmp_type, icmp_mask->hdr.icmp_type); DUMP_PATTERN_ITEM(icmp_mask->hdr.icmp_code, "icmp_code", "%"PRIu8, icmp_spec->hdr.icmp_code, icmp_mask->hdr.icmp_code); } ds_put_cstr(s, "/ "); } else if (item->type == RTE_FLOW_ITEM_TYPE_TCP) { const struct rte_flow_item_tcp *tcp_spec = item->spec; const struct rte_flow_item_tcp *tcp_mask = item->mask; ds_put_cstr(s, "tcp "); if (tcp_spec) { if (!tcp_mask) { tcp_mask = &rte_flow_item_tcp_mask; } DUMP_PATTERN_ITEM(tcp_mask->hdr.src_port, "src", "%"PRIu16, ntohs(tcp_spec->hdr.src_port), ntohs(tcp_mask->hdr.src_port)); DUMP_PATTERN_ITEM(tcp_mask->hdr.dst_port, "dst", "%"PRIu16, ntohs(tcp_spec->hdr.dst_port), ntohs(tcp_mask->hdr.dst_port)); DUMP_PATTERN_ITEM(tcp_mask->hdr.tcp_flags, "flags", "0x%"PRIx8, tcp_spec->hdr.tcp_flags, tcp_mask->hdr.tcp_flags); } ds_put_cstr(s, "/ "); } else if (item->type == RTE_FLOW_ITEM_TYPE_IPV6) { const struct rte_flow_item_ipv6 *ipv6_spec = item->spec; const struct rte_flow_item_ipv6 *ipv6_mask = item->mask; char addr_str[INET6_ADDRSTRLEN]; char mask_str[INET6_ADDRSTRLEN]; struct in6_addr addr, mask; ds_put_cstr(s, "ipv6 "); if (ipv6_spec) { if (!ipv6_mask) { ipv6_mask = &rte_flow_item_ipv6_mask; } memcpy(&addr, ipv6_spec->hdr.src_addr, sizeof addr); memcpy(&mask, ipv6_mask->hdr.src_addr, sizeof mask); ipv6_string_mapped(addr_str, &addr); ipv6_string_mapped(mask_str, &mask); DUMP_PATTERN_ITEM(mask, "src", "%s", addr_str, mask_str); memcpy(&addr, ipv6_spec->hdr.dst_addr, sizeof addr); memcpy(&mask, ipv6_mask->hdr.dst_addr, sizeof mask); ipv6_string_mapped(addr_str, &addr); ipv6_string_mapped(mask_str, &mask); DUMP_PATTERN_ITEM(mask, "dst", "%s", addr_str, mask_str); DUMP_PATTERN_ITEM(ipv6_mask->hdr.proto, "proto", "%"PRIu8, ipv6_spec->hdr.proto, ipv6_mask->hdr.proto); DUMP_PATTERN_ITEM(ipv6_mask->hdr.vtc_flow, "tc", "0x%"PRIx32, ntohl(ipv6_spec->hdr.vtc_flow), ntohl(ipv6_mask->hdr.vtc_flow)); DUMP_PATTERN_ITEM(ipv6_mask->hdr.hop_limits, "hop", "%"PRIu8, ipv6_spec->hdr.hop_limits, ipv6_mask->hdr.hop_limits); } ds_put_cstr(s, "/ "); } else { ds_put_format(s, "unknown rte flow pattern (%d)\n", item->type); } } static void dump_vxlan_encap(struct ds *s, const struct rte_flow_item *items) { const struct rte_flow_item_eth *eth = NULL; const struct rte_flow_item_ipv4 *ipv4 = NULL; const struct rte_flow_item_ipv6 *ipv6 = NULL; const struct rte_flow_item_udp *udp = NULL; const struct rte_flow_item_vxlan *vxlan = NULL; for (; items && items->type != RTE_FLOW_ITEM_TYPE_END; items++) { if (items->type == RTE_FLOW_ITEM_TYPE_ETH) { eth = items->spec; } else if (items->type == RTE_FLOW_ITEM_TYPE_IPV4) { ipv4 = items->spec; } else if (items->type == RTE_FLOW_ITEM_TYPE_IPV6) { ipv6 = items->spec; } else if (items->type == RTE_FLOW_ITEM_TYPE_UDP) { udp = items->spec; } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) { vxlan = items->spec; } } ds_put_format(s, "set vxlan ip-version %s ", ipv4 ? "ipv4" : ipv6 ? "ipv6" : "ERR"); if (vxlan) { ds_put_format(s, "vni %"PRIu32" ", ntohl(*(ovs_be32 *) vxlan->vni) >> 8); } if (udp) { ds_put_format(s, "udp-src %"PRIu16" udp-dst %"PRIu16" ", ntohs(udp->hdr.src_port), ntohs(udp->hdr.dst_port)); } if (ipv4) { ds_put_format(s, "ip-src "IP_FMT" ip-dst "IP_FMT" ", IP_ARGS(ipv4->hdr.src_addr), IP_ARGS(ipv4->hdr.dst_addr)); } if (ipv6) { struct in6_addr addr; ds_put_cstr(s, "ip-src "); memcpy(&addr, ipv6->hdr.src_addr, sizeof addr); ipv6_format_mapped(&addr, s); ds_put_cstr(s, " ip-dst "); memcpy(&addr, ipv6->hdr.dst_addr, sizeof addr); ipv6_format_mapped(&addr, s); ds_put_cstr(s, " "); } if (eth) { ds_put_format(s, "eth-src "ETH_ADDR_FMT" eth-dst "ETH_ADDR_FMT, ETH_ADDR_BYTES_ARGS(eth->src.addr_bytes), ETH_ADDR_BYTES_ARGS(eth->dst.addr_bytes)); } } static void dump_flow_action(struct ds *s, struct ds *s_extra, const struct rte_flow_action *actions) { if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) { const struct rte_flow_action_mark *mark = actions->conf; ds_put_cstr(s, "mark "); if (mark) { ds_put_format(s, "id %d ", mark->id); } ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) { ds_put_cstr(s, "rss / "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) { ds_put_cstr(s, "count / "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) { const struct rte_flow_action_port_id *port_id = actions->conf; ds_put_cstr(s, "port_id "); if (port_id) { ds_put_format(s, "original %d id %d ", port_id->original, port_id->id); } ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) { ds_put_cstr(s, "drop / "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC || actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_DST) { const struct rte_flow_action_set_mac *set_mac = actions->conf; char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_DST ? "dst" : "src"; ds_put_format(s, "set_mac_%s ", dirstr); if (set_mac) { ds_put_format(s, "mac_addr "ETH_ADDR_FMT" ", ETH_ADDR_BYTES_ARGS(set_mac->mac_addr)); } ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC || actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST) { const struct rte_flow_action_set_ipv4 *set_ipv4 = actions->conf; char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST ? "dst" : "src"; ds_put_format(s, "set_ipv4_%s ", dirstr); if (set_ipv4) { ds_put_format(s, "ipv4_addr "IP_FMT" ", IP_ARGS(set_ipv4->ipv4_addr)); } ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_TTL) { const struct rte_flow_action_set_ttl *set_ttl = actions->conf; ds_put_cstr(s, "set_ttl "); if (set_ttl) { ds_put_format(s, "ttl_value %d ", set_ttl->ttl_value); } ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC || actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST) { const struct rte_flow_action_set_tp *set_tp = actions->conf; char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST ? "dst" : "src"; ds_put_format(s, "set_tp_%s ", dirstr); if (set_tp) { ds_put_format(s, "port %"PRIu16" ", ntohs(set_tp->port)); } ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN) { const struct rte_flow_action_of_push_vlan *of_push_vlan = actions->conf; ds_put_cstr(s, "of_push_vlan "); if (of_push_vlan) { ds_put_format(s, "ethertype 0x%"PRIx16" ", ntohs(of_push_vlan->ethertype)); } ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) { const struct rte_flow_action_of_set_vlan_pcp *of_set_vlan_pcp = actions->conf; ds_put_cstr(s, "of_set_vlan_pcp "); if (of_set_vlan_pcp) { ds_put_format(s, "vlan_pcp %"PRIu8" ", of_set_vlan_pcp->vlan_pcp); } ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) { const struct rte_flow_action_of_set_vlan_vid *of_set_vlan_vid = actions->conf; ds_put_cstr(s, "of_set_vlan_vid "); if (of_set_vlan_vid) { ds_put_format(s, "vlan_vid %"PRIu16" ", ntohs(of_set_vlan_vid->vlan_vid)); } ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_POP_VLAN) { ds_put_cstr(s, "of_pop_vlan / "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC || actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST) { const struct rte_flow_action_set_ipv6 *set_ipv6 = actions->conf; char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST ? "dst" : "src"; ds_put_format(s, "set_ipv6_%s ", dirstr); if (set_ipv6) { ds_put_cstr(s, "ipv6_addr "); ipv6_format_addr((struct in6_addr *) &set_ipv6->ipv6_addr, s); ds_put_cstr(s, " "); } ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) { const struct rte_flow_action_raw_encap *raw_encap = actions->conf; ds_put_cstr(s, "raw_encap index 0 / "); if (raw_encap) { ds_put_format(s_extra, "Raw-encap size=%ld set raw_encap 0 raw " "pattern is ", raw_encap->size); for (int i = 0; i < raw_encap->size; i++) { ds_put_format(s_extra, "%02x", raw_encap->data[i]); } ds_put_cstr(s_extra, " / end_set;"); } } else if (actions->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP) { const struct rte_flow_action_vxlan_encap *vxlan_encap = actions->conf; const struct rte_flow_item *items = vxlan_encap->definition; ds_put_cstr(s, "vxlan_encap / "); dump_vxlan_encap(s_extra, items); ds_put_cstr(s_extra, ";"); } else { ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); } } static struct ds * dump_flow(struct ds *s, struct ds *s_extra, const struct rte_flow_attr *attr, const struct rte_flow_item *items, const struct rte_flow_action *actions) { if (attr) { dump_flow_attr(s, attr); } ds_put_cstr(s, "pattern "); while (items && items->type != RTE_FLOW_ITEM_TYPE_END) { dump_flow_pattern(s, items++); } ds_put_cstr(s, "end actions "); while (actions && actions->type != RTE_FLOW_ACTION_TYPE_END) { dump_flow_action(s, s_extra, actions++); } ds_put_cstr(s, "end"); return s; } static struct rte_flow * netdev_offload_dpdk_flow_create(struct netdev *netdev, const struct rte_flow_attr *attr, const struct rte_flow_item *items, const struct rte_flow_action *actions, struct rte_flow_error *error) { struct ds s_extra = DS_EMPTY_INITIALIZER; struct ds s = DS_EMPTY_INITIALIZER; struct rte_flow *flow; char *extra_str; flow = netdev_dpdk_rte_flow_create(netdev, attr, items, actions, error); if (flow) { if (!VLOG_DROP_DBG(&rl)) { dump_flow(&s, &s_extra, attr, items, actions); extra_str = ds_cstr(&s_extra); VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s flow create %d %s", netdev_get_name(netdev), (intptr_t) flow, extra_str, netdev_dpdk_get_port_id(netdev), ds_cstr(&s)); } } else { enum vlog_level level = VLL_WARN; if (error->type == RTE_FLOW_ERROR_TYPE_ACTION) { level = VLL_DBG; } VLOG_RL(&rl, level, "%s: rte_flow creation failed: %d (%s).", netdev_get_name(netdev), error->type, error->message); if (!vlog_should_drop(&this_module, level, &rl)) { dump_flow(&s, &s_extra, attr, items, actions); extra_str = ds_cstr(&s_extra); VLOG_RL(&rl, level, "%s: Failed flow: %s flow create %d %s", netdev_get_name(netdev), extra_str, netdev_dpdk_get_port_id(netdev), ds_cstr(&s)); } } ds_destroy(&s); ds_destroy(&s_extra); return flow; } static void add_flow_pattern(struct flow_patterns *patterns, enum rte_flow_item_type type, const void *spec, const void *mask) { int cnt = patterns->cnt; if (cnt == 0) { patterns->current_max = 8; patterns->items = xcalloc(patterns->current_max, sizeof *patterns->items); } else if (cnt == patterns->current_max) { patterns->current_max *= 2; patterns->items = xrealloc(patterns->items, patterns->current_max * sizeof *patterns->items); } patterns->items[cnt].type = type; patterns->items[cnt].spec = spec; patterns->items[cnt].mask = mask; patterns->items[cnt].last = NULL; patterns->cnt++; } static void add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type, const void *conf) { int cnt = actions->cnt; if (cnt == 0) { actions->current_max = 8; actions->actions = xcalloc(actions->current_max, sizeof *actions->actions); } else if (cnt == actions->current_max) { actions->current_max *= 2; actions->actions = xrealloc(actions->actions, actions->current_max * sizeof *actions->actions); } actions->actions[cnt].type = type; actions->actions[cnt].conf = conf; actions->cnt++; } static void free_flow_patterns(struct flow_patterns *patterns) { int i; for (i = 0; i < patterns->cnt; i++) { if (patterns->items[i].spec) { free(CONST_CAST(void *, patterns->items[i].spec)); } if (patterns->items[i].mask) { free(CONST_CAST(void *, patterns->items[i].mask)); } } free(patterns->items); patterns->items = NULL; patterns->cnt = 0; } static void free_flow_actions(struct flow_actions *actions) { int i; for (i = 0; i < actions->cnt; i++) { if (actions->actions[i].conf) { free(CONST_CAST(void *, actions->actions[i].conf)); } } free(actions->actions); actions->actions = NULL; actions->cnt = 0; } static int parse_flow_match(struct flow_patterns *patterns, struct match *match) { uint8_t *next_proto_mask = NULL; struct flow *consumed_masks; uint8_t proto = 0; consumed_masks = &match->wc.masks; memset(&consumed_masks->in_port, 0, sizeof consumed_masks->in_port); /* recirc id must be zero. */ if (match->wc.masks.recirc_id & match->flow.recirc_id) { return -1; } consumed_masks->recirc_id = 0; consumed_masks->packet_type = 0; /* Eth */ if (match->wc.masks.dl_type == OVS_BE16_MAX && is_ip_any(&match->flow) && eth_addr_is_zero(match->wc.masks.dl_dst) && eth_addr_is_zero(match->wc.masks.dl_src)) { /* * This is a temporary work around to fix ethernet pattern for partial * hardware offload for X710 devices. This fix will be reverted once * the issue is fixed within the i40e PMD driver. */ add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL); memset(&consumed_masks->dl_dst, 0, sizeof consumed_masks->dl_dst); memset(&consumed_masks->dl_src, 0, sizeof consumed_masks->dl_src); consumed_masks->dl_type = 0; } else if (match->wc.masks.dl_type || !eth_addr_is_zero(match->wc.masks.dl_src) || !eth_addr_is_zero(match->wc.masks.dl_dst)) { struct rte_flow_item_eth *spec, *mask; spec = xzalloc(sizeof *spec); mask = xzalloc(sizeof *mask); memcpy(&spec->dst, &match->flow.dl_dst, sizeof spec->dst); memcpy(&spec->src, &match->flow.dl_src, sizeof spec->src); spec->type = match->flow.dl_type; memcpy(&mask->dst, &match->wc.masks.dl_dst, sizeof mask->dst); memcpy(&mask->src, &match->wc.masks.dl_src, sizeof mask->src); mask->type = match->wc.masks.dl_type; memset(&consumed_masks->dl_dst, 0, sizeof consumed_masks->dl_dst); memset(&consumed_masks->dl_src, 0, sizeof consumed_masks->dl_src); consumed_masks->dl_type = 0; add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, spec, mask); } /* VLAN */ if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) { struct rte_flow_item_vlan *spec, *mask; spec = xzalloc(sizeof *spec); mask = xzalloc(sizeof *mask); spec->tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI); mask->tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI); /* Match any protocols. */ mask->inner_type = 0; add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask); } /* For untagged matching match->wc.masks.vlans[0].tci is 0xFFFF and * match->flow.vlans[0].tci is 0. Consuming is needed outside of the if * scope to handle that. */ memset(&consumed_masks->vlans[0], 0, sizeof consumed_masks->vlans[0]); /* IP v4 */ if (match->flow.dl_type == htons(ETH_TYPE_IP)) { struct rte_flow_item_ipv4 *spec, *mask; spec = xzalloc(sizeof *spec); mask = xzalloc(sizeof *mask); spec->hdr.type_of_service = match->flow.nw_tos; spec->hdr.time_to_live = match->flow.nw_ttl; spec->hdr.next_proto_id = match->flow.nw_proto; spec->hdr.src_addr = match->flow.nw_src; spec->hdr.dst_addr = match->flow.nw_dst; mask->hdr.type_of_service = match->wc.masks.nw_tos; mask->hdr.time_to_live = match->wc.masks.nw_ttl; mask->hdr.next_proto_id = match->wc.masks.nw_proto; mask->hdr.src_addr = match->wc.masks.nw_src; mask->hdr.dst_addr = match->wc.masks.nw_dst; consumed_masks->nw_tos = 0; consumed_masks->nw_ttl = 0; consumed_masks->nw_proto = 0; consumed_masks->nw_src = 0; consumed_masks->nw_dst = 0; add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_IPV4, spec, mask); /* Save proto for L4 protocol setup. */ proto = spec->hdr.next_proto_id & mask->hdr.next_proto_id; next_proto_mask = &mask->hdr.next_proto_id; } /* If fragmented, then don't HW accelerate - for now. */ if (match->wc.masks.nw_frag & match->flow.nw_frag) { return -1; } consumed_masks->nw_frag = 0; /* IP v6 */ if (match->flow.dl_type == htons(ETH_TYPE_IPV6)) { struct rte_flow_item_ipv6 *spec, *mask; spec = xzalloc(sizeof *spec); mask = xzalloc(sizeof *mask); spec->hdr.proto = match->flow.nw_proto; spec->hdr.hop_limits = match->flow.nw_ttl; spec->hdr.vtc_flow = htonl((uint32_t) match->flow.nw_tos << RTE_IPV6_HDR_TC_SHIFT); memcpy(spec->hdr.src_addr, &match->flow.ipv6_src, sizeof spec->hdr.src_addr); memcpy(spec->hdr.dst_addr, &match->flow.ipv6_dst, sizeof spec->hdr.dst_addr); mask->hdr.proto = match->wc.masks.nw_proto; mask->hdr.hop_limits = match->wc.masks.nw_ttl; mask->hdr.vtc_flow = htonl((uint32_t) match->wc.masks.nw_tos << RTE_IPV6_HDR_TC_SHIFT); memcpy(mask->hdr.src_addr, &match->wc.masks.ipv6_src, sizeof mask->hdr.src_addr); memcpy(mask->hdr.dst_addr, &match->wc.masks.ipv6_dst, sizeof mask->hdr.dst_addr); consumed_masks->nw_proto = 0; consumed_masks->nw_ttl = 0; consumed_masks->nw_tos = 0; memset(&consumed_masks->ipv6_src, 0, sizeof consumed_masks->ipv6_src); memset(&consumed_masks->ipv6_dst, 0, sizeof consumed_masks->ipv6_dst); add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_IPV6, spec, mask); /* Save proto for L4 protocol setup. */ proto = spec->hdr.proto & mask->hdr.proto; next_proto_mask = &mask->hdr.proto; } if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP && proto != IPPROTO_SCTP && proto != IPPROTO_TCP && (match->wc.masks.tp_src || match->wc.masks.tp_dst || match->wc.masks.tcp_flags)) { VLOG_DBG("L4 Protocol (%u) not supported", proto); return -1; } if (proto == IPPROTO_TCP) { struct rte_flow_item_tcp *spec, *mask; spec = xzalloc(sizeof *spec); mask = xzalloc(sizeof *mask); spec->hdr.src_port = match->flow.tp_src; spec->hdr.dst_port = match->flow.tp_dst; spec->hdr.data_off = ntohs(match->flow.tcp_flags) >> 8; spec->hdr.tcp_flags = ntohs(match->flow.tcp_flags) & 0xff; mask->hdr.src_port = match->wc.masks.tp_src; mask->hdr.dst_port = match->wc.masks.tp_dst; mask->hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8; mask->hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & 0xff; consumed_masks->tp_src = 0; consumed_masks->tp_dst = 0; consumed_masks->tcp_flags = 0; add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_TCP, spec, mask); /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match. */ if (next_proto_mask) { *next_proto_mask = 0; } } else if (proto == IPPROTO_UDP) { struct rte_flow_item_udp *spec, *mask; spec = xzalloc(sizeof *spec); mask = xzalloc(sizeof *mask); spec->hdr.src_port = match->flow.tp_src; spec->hdr.dst_port = match->flow.tp_dst; mask->hdr.src_port = match->wc.masks.tp_src; mask->hdr.dst_port = match->wc.masks.tp_dst; consumed_masks->tp_src = 0; consumed_masks->tp_dst = 0; add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_UDP, spec, mask); /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match. */ if (next_proto_mask) { *next_proto_mask = 0; } } else if (proto == IPPROTO_SCTP) { struct rte_flow_item_sctp *spec, *mask; spec = xzalloc(sizeof *spec); mask = xzalloc(sizeof *mask); spec->hdr.src_port = match->flow.tp_src; spec->hdr.dst_port = match->flow.tp_dst; mask->hdr.src_port = match->wc.masks.tp_src; mask->hdr.dst_port = match->wc.masks.tp_dst; consumed_masks->tp_src = 0; consumed_masks->tp_dst = 0; add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_SCTP, spec, mask); /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match. */ if (next_proto_mask) { *next_proto_mask = 0; } } else if (proto == IPPROTO_ICMP) { struct rte_flow_item_icmp *spec, *mask; spec = xzalloc(sizeof *spec); mask = xzalloc(sizeof *mask); spec->hdr.icmp_type = (uint8_t) ntohs(match->flow.tp_src); spec->hdr.icmp_code = (uint8_t) ntohs(match->flow.tp_dst); mask->hdr.icmp_type = (uint8_t) ntohs(match->wc.masks.tp_src); mask->hdr.icmp_code = (uint8_t) ntohs(match->wc.masks.tp_dst); consumed_masks->tp_src = 0; consumed_masks->tp_dst = 0; add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ICMP, spec, mask); /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto match. */ if (next_proto_mask) { *next_proto_mask = 0; } } add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_END, NULL, NULL); if (!is_all_zeros(consumed_masks, sizeof *consumed_masks)) { return -1; } return 0; } static void add_flow_mark_rss_actions(struct flow_actions *actions, uint32_t flow_mark, const struct netdev *netdev) { struct rte_flow_action_mark *mark; struct action_rss_data { struct rte_flow_action_rss conf; uint16_t queue[0]; } *rss_data; BUILD_ASSERT_DECL(offsetof(struct action_rss_data, conf) == 0); int i; mark = xzalloc(sizeof *mark); mark->id = flow_mark; add_flow_action(actions, RTE_FLOW_ACTION_TYPE_MARK, mark); rss_data = xmalloc(sizeof *rss_data + netdev_n_rxq(netdev) * sizeof rss_data->queue[0]); *rss_data = (struct action_rss_data) { .conf = (struct rte_flow_action_rss) { .func = RTE_ETH_HASH_FUNCTION_DEFAULT, .level = 0, .types = 0, .queue_num = netdev_n_rxq(netdev), .queue = rss_data->queue, .key_len = 0, .key = NULL }, }; /* Override queue array with default. */ for (i = 0; i < netdev_n_rxq(netdev); i++) { rss_data->queue[i] = i; } add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data->conf); add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL); } static struct rte_flow * netdev_offload_dpdk_mark_rss(struct flow_patterns *patterns, struct netdev *netdev, uint32_t flow_mark) { struct flow_actions actions = { .actions = NULL, .cnt = 0 }; const struct rte_flow_attr flow_attr = { .group = 0, .priority = 0, .ingress = 1, .egress = 0 }; struct rte_flow_error error; struct rte_flow *flow; add_flow_mark_rss_actions(&actions, flow_mark, netdev); flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items, actions.actions, &error); free_flow_actions(&actions); return flow; } static void add_count_action(struct flow_actions *actions) { struct rte_flow_action_count *count = xzalloc(sizeof *count); add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count); } static int add_port_id_action(struct flow_actions *actions, struct netdev *outdev) { struct rte_flow_action_port_id *port_id; int outdev_id; outdev_id = netdev_dpdk_get_port_id(outdev); if (outdev_id < 0) { return -1; } port_id = xzalloc(sizeof *port_id); port_id->id = outdev_id; add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id); return 0; } static int add_output_action(struct netdev *netdev, struct flow_actions *actions, const struct nlattr *nla) { struct netdev *outdev; odp_port_t port; int ret = 0; port = nl_attr_get_odp_port(nla); outdev = netdev_ports_get(port, netdev->dpif_type); if (outdev == NULL) { VLOG_DBG_RL(&rl, "Cannot find netdev for odp port %"PRIu32, port); return -1; } if (!netdev_flow_api_equals(netdev, outdev) || add_port_id_action(actions, outdev)) { VLOG_DBG_RL(&rl, "%s: Output to port \'%s\' cannot be offloaded.", netdev_get_name(netdev), netdev_get_name(outdev)); ret = -1; } netdev_close(outdev); return ret; } static int add_set_flow_action__(struct flow_actions *actions, const void *value, void *mask, const size_t size, const int attr) { void *spec; if (mask) { /* DPDK does not support partially masked set actions. In such * case, fail the offload. */ if (is_all_zeros(mask, size)) { return 0; } if (!is_all_ones(mask, size)) { VLOG_DBG_RL(&rl, "Partial mask is not supported"); return -1; } } spec = xzalloc(size); memcpy(spec, value, size); add_flow_action(actions, attr, spec); /* Clear used mask for later checking. */ if (mask) { memset(mask, 0, size); } return 0; } BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_mac) == MEMBER_SIZEOF(struct ovs_key_ethernet, eth_src)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_mac) == MEMBER_SIZEOF(struct ovs_key_ethernet, eth_dst)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4) == MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_src)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4) == MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_dst)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ttl) == MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_ttl)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv6) == MEMBER_SIZEOF(struct ovs_key_ipv6, ipv6_src)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv6) == MEMBER_SIZEOF(struct ovs_key_ipv6, ipv6_dst)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ttl) == MEMBER_SIZEOF(struct ovs_key_ipv6, ipv6_hlimit)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) == MEMBER_SIZEOF(struct ovs_key_tcp, tcp_src)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) == MEMBER_SIZEOF(struct ovs_key_tcp, tcp_dst)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) == MEMBER_SIZEOF(struct ovs_key_udp, udp_src)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) == MEMBER_SIZEOF(struct ovs_key_udp, udp_dst)); static int parse_set_actions(struct flow_actions *actions, const struct nlattr *set_actions, const size_t set_actions_len, bool masked) { const struct nlattr *sa; unsigned int sleft; #define add_set_flow_action(field, type) \ if (add_set_flow_action__(actions, &key->field, \ mask ? CONST_CAST(void *, &mask->field) : NULL, \ sizeof key->field, type)) { \ return -1; \ } NL_ATTR_FOR_EACH_UNSAFE (sa, sleft, set_actions, set_actions_len) { if (nl_attr_type(sa) == OVS_KEY_ATTR_ETHERNET) { const struct ovs_key_ethernet *key = nl_attr_get(sa); const struct ovs_key_ethernet *mask = masked ? key + 1 : NULL; add_set_flow_action(eth_src, RTE_FLOW_ACTION_TYPE_SET_MAC_SRC); add_set_flow_action(eth_dst, RTE_FLOW_ACTION_TYPE_SET_MAC_DST); if (mask && !is_all_zeros(mask, sizeof *mask)) { VLOG_DBG_RL(&rl, "Unsupported ETHERNET set action"); return -1; } } else if (nl_attr_type(sa) == OVS_KEY_ATTR_IPV4) { const struct ovs_key_ipv4 *key = nl_attr_get(sa); const struct ovs_key_ipv4 *mask = masked ? key + 1 : NULL; add_set_flow_action(ipv4_src, RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC); add_set_flow_action(ipv4_dst, RTE_FLOW_ACTION_TYPE_SET_IPV4_DST); add_set_flow_action(ipv4_ttl, RTE_FLOW_ACTION_TYPE_SET_TTL); if (mask && !is_all_zeros(mask, sizeof *mask)) { VLOG_DBG_RL(&rl, "Unsupported IPv4 set action"); return -1; } } else if (nl_attr_type(sa) == OVS_KEY_ATTR_IPV6) { const struct ovs_key_ipv6 *key = nl_attr_get(sa); const struct ovs_key_ipv6 *mask = masked ? key + 1 : NULL; add_set_flow_action(ipv6_src, RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC); add_set_flow_action(ipv6_dst, RTE_FLOW_ACTION_TYPE_SET_IPV6_DST); add_set_flow_action(ipv6_hlimit, RTE_FLOW_ACTION_TYPE_SET_TTL); if (mask && !is_all_zeros(mask, sizeof *mask)) { VLOG_DBG_RL(&rl, "Unsupported IPv6 set action"); return -1; } } else if (nl_attr_type(sa) == OVS_KEY_ATTR_TCP) { const struct ovs_key_tcp *key = nl_attr_get(sa); const struct ovs_key_tcp *mask = masked ? key + 1 : NULL; add_set_flow_action(tcp_src, RTE_FLOW_ACTION_TYPE_SET_TP_SRC); add_set_flow_action(tcp_dst, RTE_FLOW_ACTION_TYPE_SET_TP_DST); if (mask && !is_all_zeros(mask, sizeof *mask)) { VLOG_DBG_RL(&rl, "Unsupported TCP set action"); return -1; } } else if (nl_attr_type(sa) == OVS_KEY_ATTR_UDP) { const struct ovs_key_udp *key = nl_attr_get(sa); const struct ovs_key_udp *mask = masked ? key + 1 : NULL; add_set_flow_action(udp_src, RTE_FLOW_ACTION_TYPE_SET_TP_SRC); add_set_flow_action(udp_dst, RTE_FLOW_ACTION_TYPE_SET_TP_DST); if (mask && !is_all_zeros(mask, sizeof *mask)) { VLOG_DBG_RL(&rl, "Unsupported UDP set action"); return -1; } } else { VLOG_DBG_RL(&rl, "Unsupported set action type %d", nl_attr_type(sa)); return -1; } } return 0; } /* Maximum number of items in struct rte_flow_action_vxlan_encap. * ETH / IPv4(6) / UDP / VXLAN / END */ #define ACTION_VXLAN_ENCAP_ITEMS_NUM 5 static int add_vxlan_encap_action(struct flow_actions *actions, const void *header) { const struct eth_header *eth; const struct udp_header *udp; struct vxlan_data { struct rte_flow_action_vxlan_encap conf; struct rte_flow_item items[ACTION_VXLAN_ENCAP_ITEMS_NUM]; } *vxlan_data; BUILD_ASSERT_DECL(offsetof(struct vxlan_data, conf) == 0); const void *vxlan; const void *l3; const void *l4; int field; vxlan_data = xzalloc(sizeof *vxlan_data); field = 0; eth = header; /* Ethernet */ vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_ETH; vxlan_data->items[field].spec = eth; vxlan_data->items[field].mask = &rte_flow_item_eth_mask; field++; l3 = eth + 1; /* IP */ if (eth->eth_type == htons(ETH_TYPE_IP)) { /* IPv4 */ const struct ip_header *ip = l3; vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV4; vxlan_data->items[field].spec = ip; vxlan_data->items[field].mask = &rte_flow_item_ipv4_mask; if (ip->ip_proto != IPPROTO_UDP) { goto err; } l4 = (ip + 1); } else if (eth->eth_type == htons(ETH_TYPE_IPV6)) { const struct ovs_16aligned_ip6_hdr *ip6 = l3; vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV6; vxlan_data->items[field].spec = ip6; vxlan_data->items[field].mask = &rte_flow_item_ipv6_mask; if (ip6->ip6_nxt != IPPROTO_UDP) { goto err; } l4 = (ip6 + 1); } else { goto err; } field++; udp = l4; vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_UDP; vxlan_data->items[field].spec = udp; vxlan_data->items[field].mask = &rte_flow_item_udp_mask; field++; vxlan = (udp + 1); vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_VXLAN; vxlan_data->items[field].spec = vxlan; vxlan_data->items[field].mask = &rte_flow_item_vxlan_mask; field++; vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_END; vxlan_data->conf.definition = vxlan_data->items; add_flow_action(actions, RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP, vxlan_data); return 0; err: free(vxlan_data); return -1; } static int parse_vlan_push_action(struct flow_actions *actions, const struct ovs_action_push_vlan *vlan_push) { struct rte_flow_action_of_push_vlan *rte_push_vlan; struct rte_flow_action_of_set_vlan_pcp *rte_vlan_pcp; struct rte_flow_action_of_set_vlan_vid *rte_vlan_vid; rte_push_vlan = xzalloc(sizeof *rte_push_vlan); rte_push_vlan->ethertype = vlan_push->vlan_tpid; add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN, rte_push_vlan); rte_vlan_pcp = xzalloc(sizeof *rte_vlan_pcp); rte_vlan_pcp->vlan_pcp = vlan_tci_to_pcp(vlan_push->vlan_tci); add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP, rte_vlan_pcp); rte_vlan_vid = xzalloc(sizeof *rte_vlan_vid); rte_vlan_vid->vlan_vid = htons(vlan_tci_to_vid(vlan_push->vlan_tci)); add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID, rte_vlan_vid); return 0; } static int parse_clone_actions(struct netdev *netdev, struct flow_actions *actions, const struct nlattr *clone_actions, const size_t clone_actions_len) { const struct nlattr *ca; unsigned int cleft; NL_ATTR_FOR_EACH_UNSAFE (ca, cleft, clone_actions, clone_actions_len) { int clone_type = nl_attr_type(ca); if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) { const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca); struct rte_flow_action_raw_encap *raw_encap; if (tnl_push->tnl_type == OVS_VPORT_TYPE_VXLAN && !add_vxlan_encap_action(actions, tnl_push->header)) { continue; } raw_encap = xzalloc(sizeof *raw_encap); raw_encap->data = (uint8_t *) tnl_push->header; raw_encap->preserve = NULL; raw_encap->size = tnl_push->header_len; add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP, raw_encap); } else if (clone_type == OVS_ACTION_ATTR_OUTPUT) { if (add_output_action(netdev, actions, ca)) { return -1; } } else { VLOG_DBG_RL(&rl, "Unsupported nested action inside clone(), " "action type: %d", clone_type); return -1; } } return 0; } static int parse_flow_actions(struct netdev *netdev, struct flow_actions *actions, struct nlattr *nl_actions, size_t nl_actions_len) { struct nlattr *nla; size_t left; add_count_action(actions); NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) { if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) { if (add_output_action(netdev, actions, nla)) { return -1; } } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_DROP) { add_flow_action(actions, RTE_FLOW_ACTION_TYPE_DROP, NULL); } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SET || nl_attr_type(nla) == OVS_ACTION_ATTR_SET_MASKED) { const struct nlattr *set_actions = nl_attr_get(nla); const size_t set_actions_len = nl_attr_get_size(nla); bool masked = nl_attr_type(nla) == OVS_ACTION_ATTR_SET_MASKED; if (parse_set_actions(actions, set_actions, set_actions_len, masked)) { return -1; } } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_PUSH_VLAN) { const struct ovs_action_push_vlan *vlan = nl_attr_get(nla); if (parse_vlan_push_action(actions, vlan)) { return -1; } } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_POP_VLAN) { add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_POP_VLAN, NULL); } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CLONE && left <= NLA_ALIGN(nla->nla_len)) { const struct nlattr *clone_actions = nl_attr_get(nla); size_t clone_actions_len = nl_attr_get_size(nla); if (parse_clone_actions(netdev, actions, clone_actions, clone_actions_len)) { return -1; } } else { VLOG_DBG_RL(&rl, "Unsupported action type %d", nl_attr_type(nla)); return -1; } } if (nl_actions_len == 0) { VLOG_DBG_RL(&rl, "No actions provided"); return -1; } add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL); return 0; } static struct rte_flow * netdev_offload_dpdk_actions(struct netdev *netdev, struct flow_patterns *patterns, struct nlattr *nl_actions, size_t actions_len) { const struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 }; struct flow_actions actions = { .actions = NULL, .cnt = 0 }; struct rte_flow *flow = NULL; struct rte_flow_error error; int ret; ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len); if (ret) { goto out; } flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items, actions.actions, &error); out: free_flow_actions(&actions); return flow; } static int netdev_offload_dpdk_add_flow(struct netdev *netdev, struct match *match, struct nlattr *nl_actions, size_t actions_len, const ovs_u128 *ufid, struct offload_info *info) { struct flow_patterns patterns = { .items = NULL, .cnt = 0 }; bool actions_offloaded = true; struct rte_flow *flow; int ret = 0; ret = parse_flow_match(&patterns, match); if (ret) { VLOG_DBG_RL(&rl, "%s: matches of ufid "UUID_FMT" are not supported", netdev_get_name(netdev), UUID_ARGS((struct uuid *) ufid)); goto out; } flow = netdev_offload_dpdk_actions(netdev, &patterns, nl_actions, actions_len); if (!flow) { /* If we failed to offload the rule actions fallback to MARK+RSS * actions. */ flow = netdev_offload_dpdk_mark_rss(&patterns, netdev, info->flow_mark); actions_offloaded = false; } if (!flow) { ret = -1; goto out; } ufid_to_rte_flow_associate(ufid, flow, actions_offloaded); VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT, netdev_get_name(netdev), flow, UUID_ARGS((struct uuid *)ufid)); out: free_flow_patterns(&patterns); return ret; } static int netdev_offload_dpdk_destroy_flow(struct netdev *netdev, const ovs_u128 *ufid, struct rte_flow *rte_flow) { struct rte_flow_error error; int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error); if (ret == 0) { ufid_to_rte_flow_disassociate(ufid); VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR " flow destroy %d ufid " UUID_FMT, netdev_get_name(netdev), (intptr_t) rte_flow, netdev_dpdk_get_port_id(netdev), UUID_ARGS((struct uuid *) ufid)); } else { VLOG_ERR("Failed flow: %s: flow destroy %d ufid " UUID_FMT, netdev_get_name(netdev), netdev_dpdk_get_port_id(netdev), UUID_ARGS((struct uuid *) ufid)); } return ret; } static int netdev_offload_dpdk_flow_put(struct netdev *netdev, struct match *match, struct nlattr *actions, size_t actions_len, const ovs_u128 *ufid, struct offload_info *info, struct dpif_flow_stats *stats) { struct ufid_to_rte_flow_data *rte_flow_data; int ret; /* * If an old rte_flow exists, it means it's a flow modification. * Here destroy the old rte flow first before adding a new one. */ rte_flow_data = ufid_to_rte_flow_data_find(ufid); if (rte_flow_data && rte_flow_data->rte_flow) { ret = netdev_offload_dpdk_destroy_flow(netdev, ufid, rte_flow_data->rte_flow); if (ret < 0) { return ret; } } if (stats) { memset(stats, 0, sizeof *stats); } return netdev_offload_dpdk_add_flow(netdev, match, actions, actions_len, ufid, info); } static int netdev_offload_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid, struct dpif_flow_stats *stats) { struct ufid_to_rte_flow_data *rte_flow_data; rte_flow_data = ufid_to_rte_flow_data_find(ufid); if (!rte_flow_data || !rte_flow_data->rte_flow) { return -1; } if (stats) { memset(stats, 0, sizeof *stats); } return netdev_offload_dpdk_destroy_flow(netdev, ufid, rte_flow_data->rte_flow); } static int netdev_offload_dpdk_init_flow_api(struct netdev *netdev) { return netdev_dpdk_flow_api_supported(netdev) ? 0 : EOPNOTSUPP; } static int netdev_offload_dpdk_flow_get(struct netdev *netdev, struct match *match OVS_UNUSED, struct nlattr **actions OVS_UNUSED, const ovs_u128 *ufid, struct dpif_flow_stats *stats, struct dpif_flow_attrs *attrs, struct ofpbuf *buf OVS_UNUSED) { struct rte_flow_query_count query = { .reset = 1 }; struct ufid_to_rte_flow_data *rte_flow_data; struct rte_flow_error error; int ret = 0; rte_flow_data = ufid_to_rte_flow_data_find(ufid); if (!rte_flow_data || !rte_flow_data->rte_flow) { ret = -1; goto out; } attrs->offloaded = true; if (!rte_flow_data->actions_offloaded) { attrs->dp_layer = "ovs"; memset(stats, 0, sizeof *stats); goto out; } attrs->dp_layer = "dpdk"; ret = netdev_dpdk_rte_flow_query_count(netdev, rte_flow_data->rte_flow, &query, &error); if (ret) { VLOG_DBG_RL(&rl, "%s: Failed to query ufid "UUID_FMT" flow: %p", netdev_get_name(netdev), UUID_ARGS((struct uuid *) ufid), rte_flow_data->rte_flow); goto out; } rte_flow_data->stats.n_packets += (query.hits_set) ? query.hits : 0; rte_flow_data->stats.n_bytes += (query.bytes_set) ? query.bytes : 0; if (query.hits_set && query.hits) { rte_flow_data->stats.used = time_msec(); } memcpy(stats, &rte_flow_data->stats, sizeof *stats); out: attrs->dp_extra_info = NULL; return ret; } const struct netdev_flow_api netdev_offload_dpdk = { .type = "dpdk_flow_api", .flow_put = netdev_offload_dpdk_flow_put, .flow_del = netdev_offload_dpdk_flow_del, .init_flow_api = netdev_offload_dpdk_init_flow_api, .flow_get = netdev_offload_dpdk_flow_get, };