diff options
author | Jan Scheurich <jan.scheurich@ericsson.com> | 2017-06-23 16:47:57 +0000 |
---|---|---|
committer | Ben Pfaff <blp@ovn.org> | 2017-06-27 17:28:30 -0400 |
commit | 3d4b2e6eb74ed5bb5b35373aa8a489536938fee6 (patch) | |
tree | f086a8ec9d205140a69fd39db0c4c8be67ab855c /lib | |
parent | be7ac2f3c105866542de5c1d4c7e2651250a3e92 (diff) | |
download | openvswitch-3d4b2e6eb74ed5bb5b35373aa8a489536938fee6.tar.gz |
userspace: Add OXM field MFF_PACKET_TYPE
Allow packet type namespace OFPHTN_ETHERTYPE as alternative pre-requisite
for matching L3 protocols (MPLS, IP, IPv6, ARP etc).
Change the meta-flow definition of packet_type field to use the new
custom format MFS_PACKET_TYPE representing "(NS,NS_TYPE)".
Parsing routine for MFS_PACKET_TYPE added to meta-flow.c. Formatting
routine for field packet_type extracted from match_format() and moved to
flow.c to be used from meta-flow.c for formatting MFS_PACKET_TYPE.
Updated the ovs-fields man page source meta-flow.xml with documentation
for packet-type-aware bridges and added documentation for field packet_type.
Added packet_type to the matching properties in tests/ofproto.at.
If dl_type is unwildcarded due to later packet modification, make sure it
is cleared again if the original packet_type was not PT_ETH.
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/flow.c | 34 | ||||
-rw-r--r-- | lib/flow.h | 27 | ||||
-rw-r--r-- | lib/learn.c | 1 | ||||
-rw-r--r-- | lib/match.c | 98 | ||||
-rw-r--r-- | lib/meta-flow.c | 86 | ||||
-rw-r--r-- | lib/meta-flow.xml | 156 | ||||
-rw-r--r-- | lib/nx-match.c | 34 | ||||
-rw-r--r-- | lib/odp-util.c | 38 | ||||
-rw-r--r-- | lib/ofp-parse.c | 12 | ||||
-rw-r--r-- | lib/ofp-util.c | 67 |
10 files changed, 447 insertions, 106 deletions
diff --git a/lib/flow.c b/lib/flow.c index d73e796a2..6be645730 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -1153,6 +1153,38 @@ format_flags_masked(struct ds *ds, const char *name, } } +static void +put_u16_masked(struct ds *s, uint16_t value, uint16_t mask) +{ + if (!mask) { + ds_put_char(s, '*'); + } else { + if (value > 9) { + ds_put_format(s, "0x%"PRIx16, value); + } else { + ds_put_format(s, "%"PRIu16, value); + } + + if (mask != UINT16_MAX) { + ds_put_format(s, "/0x%"PRIx16, mask); + } + } +} + +void +format_packet_type_masked(struct ds *s, ovs_be32 value, ovs_be32 mask) +{ + if (value == htonl(PT_ETH) && mask == OVS_BE32_MAX) { + ds_put_cstr(s, "eth"); + } else { + ds_put_cstr(s, "packet_type=("); + put_u16_masked(s, pt_ns(value), pt_ns(mask)); + ds_put_char(s, ','); + put_u16_masked(s, pt_ns_type(value), pt_ns_type(mask)); + ds_put_char(s, ')'); + } +} + /* Scans a string 's' of flags to determine their numerical value and * returns the number of characters parsed using 'bit_to_string' to * lookup flag names. Scanning continues until the character 'end' is @@ -1425,7 +1457,6 @@ flow_wildcards_init_for_packet(struct flow_wildcards *wc, WC_MASK_FIELD(wc, tunnel.tp_dst); WC_MASK_FIELD(wc, tunnel.gbp_id); WC_MASK_FIELD(wc, tunnel.gbp_flags); - WC_MASK_FIELD(wc, packet_type); if (!(flow->tunnel.flags & FLOW_TNL_F_UDPIF)) { if (flow->tunnel.metadata.present.map) { @@ -1457,6 +1488,7 @@ flow_wildcards_init_for_packet(struct flow_wildcards *wc, /* actset_output wildcarded. */ + WC_MASK_FIELD(wc, packet_type); WC_MASK_FIELD(wc, dl_dst); WC_MASK_FIELD(wc, dl_src); WC_MASK_FIELD(wc, dl_type); diff --git a/lib/flow.h b/lib/flow.h index 68bd4f3c5..f61d6c3aa 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -83,6 +83,7 @@ void format_flags(struct ds *ds, const char *(*bit_to_string)(uint32_t), void format_flags_masked(struct ds *ds, const char *name, const char *(*bit_to_string)(uint32_t), uint32_t flags, uint32_t mask, uint32_t max_mask); +void format_packet_type_masked(struct ds *, ovs_be32 value, ovs_be32 mask); int parse_flags(const char *s, const char *(*bit_to_string)(uint32_t), char end, const char *field_name, char **res_string, uint32_t *res_flags, uint32_t allowed, uint32_t *res_mask); @@ -964,9 +965,23 @@ static inline bool is_ethernet(const struct flow *flow, return flow->packet_type == htonl(PT_ETH); } +static inline ovs_be16 get_dl_type(const struct flow *flow) +{ + if (flow->packet_type == htonl(PT_ETH)) { + return flow->dl_type; + } else if (pt_ns(flow->packet_type) == OFPHTN_ETHERTYPE) { + return pt_ns_type_be(flow->packet_type); + } else { + return htons(FLOW_DL_TYPE_NONE); + } +} + static inline bool is_vlan(const struct flow *flow, struct flow_wildcards *wc) { + if (!is_ethernet(flow, wc)) { + return false; + } if (wc) { WC_MASK_FIELD_MASK(wc, vlans[0].tci, htons(VLAN_CFI)); } @@ -975,7 +990,7 @@ static inline bool is_vlan(const struct flow *flow, static inline bool is_ip_any(const struct flow *flow) { - return dl_type_is_ip_any(flow->dl_type); + return dl_type_is_ip_any(get_dl_type(flow)); } static inline bool is_ip_proto(const struct flow *flow, uint8_t ip_proto, @@ -1011,7 +1026,7 @@ static inline bool is_sctp(const struct flow *flow, static inline bool is_icmpv4(const struct flow *flow, struct flow_wildcards *wc) { - if (flow->dl_type == htons(ETH_TYPE_IP)) { + if (get_dl_type(flow) == htons(ETH_TYPE_IP)) { if (wc) { memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); } @@ -1023,7 +1038,7 @@ static inline bool is_icmpv4(const struct flow *flow, static inline bool is_icmpv6(const struct flow *flow, struct flow_wildcards *wc) { - if (flow->dl_type == htons(ETH_TYPE_IPV6)) { + if (get_dl_type(flow) == htons(ETH_TYPE_IPV6)) { if (wc) { memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); } @@ -1054,7 +1069,7 @@ static inline bool is_nd(const struct flow *flow, static inline bool is_igmp(const struct flow *flow, struct flow_wildcards *wc) { - if (flow->dl_type == htons(ETH_TYPE_IP)) { + if (get_dl_type(flow) == htons(ETH_TYPE_IP)) { if (wc) { memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); } @@ -1098,8 +1113,8 @@ static inline bool is_mld_report(const struct flow *flow, static inline bool is_stp(const struct flow *flow) { - return (eth_addr_equals(flow->dl_dst, eth_addr_stp) - && flow->dl_type == htons(FLOW_DL_TYPE_NONE)); + return (flow->dl_type == htons(FLOW_DL_TYPE_NONE) + && eth_addr_equals(flow->dl_dst, eth_addr_stp)); } #endif /* flow.h */ diff --git a/lib/learn.c b/lib/learn.c index bc5a6eb2d..4658b8611 100644 --- a/lib/learn.c +++ b/lib/learn.c @@ -139,6 +139,7 @@ learn_execute(const struct ofpact_learn *learn, const struct flow *flow, switch (spec->dst_type) { case NX_LEARN_DST_MATCH: mf_write_subfield(&spec->dst, &value, &fm->match); + match_add_ethernet_prereq(&fm->match, spec->dst.field); mf_vl_mff_set_tlv_bitmap( spec->dst.field, &fm->match.flow.tunnel.metadata.present.map); break; diff --git a/lib/match.c b/lib/match.c index 9aa0d8885..f5288e31f 100644 --- a/lib/match.c +++ b/lib/match.c @@ -496,6 +496,49 @@ match_set_packet_type(struct match *match, ovs_be32 packet_type) match->wc.masks.packet_type = OVS_BE32_MAX; } +/* If 'match' does not match on any packet type, make it match on Ethernet + * packets (the default packet type, as specified by OpenFlow). */ +void +match_set_default_packet_type(struct match *match) +{ + if (!match->wc.masks.packet_type) { + match_set_packet_type(match, htonl(PT_ETH)); + } +} + +/* Returns true if 'match' matches only Ethernet packets (the default packet + * type, as specified by OpenFlow). */ +bool +match_has_default_packet_type(const struct match *match) +{ + return (match->flow.packet_type == htonl(PT_ETH) + && match->wc.masks.packet_type == OVS_BE32_MAX); +} + +/* A match on 'field' is being added to or has been added to 'match'. If + * 'field' is a data field, and 'match' does not already match on packet_type, + * this function make it match on the Ethernet packet_type. + * + * This function is useful because OpenFlow implicitly applies to Ethernet + * packets when there's no explicit packet_type, but matching on a metadata + * field doesn't imply anything about the packet_type and falsely inferring + * that it does can cause harm. A flow that matches only on metadata fields, + * for example, should be able to match more than just Ethernet flows. There + * are also important reasons that a catch-all match (one with no field matches + * at all) should not imply a packet_type(0,0) match. For example, a "flow + * dump" request that matches on no fields should return every flow in the + * switch, not just the flows that match on Ethernet. As a second example, + * OpenFlow 1.2+ special-cases "table miss" flows, that is catch-all flows with + * priority 0, and inferring a match on packet_type(0,0) causes such a flow not + * to be a table miss flow. */ +void +match_add_ethernet_prereq(struct match *match, const struct mf_field *field) +{ + if (field->prereqs != MFP_NONE) { + match_set_default_packet_type(match); + } +} + void match_set_dl_type(struct match *match, ovs_be16 dl_type) { @@ -1187,8 +1230,8 @@ match_format(const struct match *match, size_t start_len = s->length; const struct flow *f = &match->flow; bool skip_type = false; - bool skip_proto = false; + ovs_be16 dl_type = f->dl_type; int i; @@ -1269,25 +1312,18 @@ match_format(const struct match *match, format_be16_masked(s, "ct_tp_dst", f->ct_tp_dst, wc->masks.ct_tp_dst); } - if (wc->masks.packet_type) { - if (pt_ns_type_be(wc->masks.packet_type) == 0) { - ds_put_format(s, "packet_type=(%u,*),", - pt_ns(f->packet_type)); - } else if (pt_ns_type_be(wc->masks.packet_type) == OVS_BE16_MAX) { - ds_put_format(s, "packet_type=(%u,%#"PRIx16"),", - pt_ns(f->packet_type), - pt_ns_type(f->packet_type)); - } else { - ds_put_format(s, "packet_type=(%u,%#"PRIx16"/%#"PRIx16"),", - pt_ns(f->packet_type), - pt_ns_type(f->packet_type), - pt_ns_type(wc->masks.packet_type)); + if (wc->masks.packet_type && !match_has_default_packet_type(match)) { + format_packet_type_masked(s, f->packet_type, wc->masks.packet_type); + ds_put_char(s, ','); + if (pt_ns(f->packet_type) == OFPHTN_ETHERTYPE) { + dl_type = pt_ns_type_be(f->packet_type); } } if (wc->masks.dl_type) { + dl_type = f->dl_type; skip_type = true; - if (f->dl_type == htons(ETH_TYPE_IP)) { + if (dl_type == htons(ETH_TYPE_IP)) { if (wc->masks.nw_proto) { skip_proto = true; if (f->nw_proto == IPPROTO_ICMP) { @@ -1307,7 +1343,7 @@ match_format(const struct match *match, } else { ds_put_format(s, "%sip%s,", colors.value, colors.end); } - } else if (f->dl_type == htons(ETH_TYPE_IPV6)) { + } else if (dl_type == htons(ETH_TYPE_IPV6)) { if (wc->masks.nw_proto) { skip_proto = true; if (f->nw_proto == IPPROTO_ICMPV6) { @@ -1325,13 +1361,13 @@ match_format(const struct match *match, } else { ds_put_format(s, "%sipv6%s,", colors.value, colors.end); } - } else if (f->dl_type == htons(ETH_TYPE_ARP)) { + } else if (dl_type == htons(ETH_TYPE_ARP)) { ds_put_format(s, "%sarp%s,", colors.value, colors.end); - } else if (f->dl_type == htons(ETH_TYPE_RARP)) { + } else if (dl_type == htons(ETH_TYPE_RARP)) { ds_put_format(s, "%srarp%s,", colors.value, colors.end); - } else if (f->dl_type == htons(ETH_TYPE_MPLS)) { + } else if (dl_type == htons(ETH_TYPE_MPLS)) { ds_put_format(s, "%smpls%s,", colors.value, colors.end); - } else if (f->dl_type == htons(ETH_TYPE_MPLS_MCAST)) { + } else if (dl_type == htons(ETH_TYPE_MPLS_MCAST)) { ds_put_format(s, "%smplsm%s,", colors.value, colors.end); } else { skip_type = false; @@ -1403,9 +1439,9 @@ match_format(const struct match *match, if (!skip_type && wc->masks.dl_type) { ds_put_format(s, "%sdl_type=%s0x%04"PRIx16",", - colors.param, colors.end, ntohs(f->dl_type)); + colors.param, colors.end, ntohs(dl_type)); } - if (f->dl_type == htons(ETH_TYPE_IPV6)) { + if (dl_type == htons(ETH_TYPE_IPV6)) { format_ipv6_netmask(s, "ipv6_src", &f->ipv6_src, &wc->masks.ipv6_src); format_ipv6_netmask(s, "ipv6_dst", &f->ipv6_dst, &wc->masks.ipv6_dst); if (wc->masks.ipv6_label) { @@ -1419,8 +1455,8 @@ match_format(const struct match *match, ntohl(wc->masks.ipv6_label)); } } - } else if (f->dl_type == htons(ETH_TYPE_ARP) || - f->dl_type == htons(ETH_TYPE_RARP)) { + } else if (dl_type == htons(ETH_TYPE_ARP) || + dl_type == htons(ETH_TYPE_RARP)) { format_ip_netmask(s, "arp_spa", f->nw_src, wc->masks.nw_src); format_ip_netmask(s, "arp_tpa", f->nw_dst, wc->masks.nw_dst); } else { @@ -1428,8 +1464,8 @@ match_format(const struct match *match, format_ip_netmask(s, "nw_dst", f->nw_dst, wc->masks.nw_dst); } if (!skip_proto && wc->masks.nw_proto) { - if (f->dl_type == htons(ETH_TYPE_ARP) || - f->dl_type == htons(ETH_TYPE_RARP)) { + if (dl_type == htons(ETH_TYPE_ARP) || + dl_type == htons(ETH_TYPE_RARP)) { ds_put_format(s, "%sarp_op=%s%"PRIu8",", colors.param, colors.end, f->nw_proto); } else { @@ -1437,8 +1473,8 @@ match_format(const struct match *match, colors.param, colors.end, f->nw_proto); } } - if (f->dl_type == htons(ETH_TYPE_ARP) || - f->dl_type == htons(ETH_TYPE_RARP)) { + if (dl_type == htons(ETH_TYPE_ARP) || + dl_type == htons(ETH_TYPE_RARP)) { format_eth_masked(s, "arp_sha", f->arp_sha, wc->masks.arp_sha); format_eth_masked(s, "arp_tha", f->arp_tha, wc->masks.arp_tha); } @@ -1491,15 +1527,15 @@ match_format(const struct match *match, f->nw_frag & FLOW_NW_FRAG_LATER ? "later" : "not_later"); break; } - if (f->dl_type == htons(ETH_TYPE_IP) && + if (dl_type == htons(ETH_TYPE_IP) && f->nw_proto == IPPROTO_ICMP) { format_be16_masked(s, "icmp_type", f->tp_src, wc->masks.tp_src); format_be16_masked(s, "icmp_code", f->tp_dst, wc->masks.tp_dst); - } else if (f->dl_type == htons(ETH_TYPE_IP) && + } else if (dl_type == htons(ETH_TYPE_IP) && f->nw_proto == IPPROTO_IGMP) { format_be16_masked(s, "igmp_type", f->tp_src, wc->masks.tp_src); format_be16_masked(s, "igmp_code", f->tp_dst, wc->masks.tp_dst); - } else if (f->dl_type == htons(ETH_TYPE_IPV6) && + } else if (dl_type == htons(ETH_TYPE_IPV6) && f->nw_proto == IPPROTO_ICMPV6) { format_be16_masked(s, "icmp_type", f->tp_src, wc->masks.tp_src); format_be16_masked(s, "icmp_code", f->tp_dst, wc->masks.tp_dst); diff --git a/lib/meta-flow.c b/lib/meta-flow.c index a255f1ce9..d0980814c 100644 --- a/lib/meta-flow.c +++ b/lib/meta-flow.c @@ -205,6 +205,8 @@ mf_is_all_wild(const struct mf_field *mf, const struct flow_wildcards *wc) return !wc->masks.dp_hash; case MFF_RECIRC_ID: return !wc->masks.recirc_id; + case MFF_PACKET_TYPE: + return !wc->masks.packet_type; case MFF_CONJ_ID: return !wc->masks.conj_id; case MFF_TUN_SRC: @@ -401,22 +403,24 @@ mf_are_prereqs_ok__(const struct mf_field *mf, const struct flow *flow, const struct flow_wildcards *mask, struct flow_wildcards *wc) { + ovs_be16 dl_type = get_dl_type(flow); + switch (mf->prereqs) { case MFP_NONE: return true; case MFP_ETHERNET: return is_ethernet(flow, wc); case MFP_ARP: - return (flow->dl_type == htons(ETH_TYPE_ARP) || - flow->dl_type == htons(ETH_TYPE_RARP)); + return (dl_type == htons(ETH_TYPE_ARP) || + dl_type == htons(ETH_TYPE_RARP)); case MFP_IPV4: - return flow->dl_type == htons(ETH_TYPE_IP); + return dl_type == htons(ETH_TYPE_IP); case MFP_IPV6: - return flow->dl_type == htons(ETH_TYPE_IPV6); + return dl_type == htons(ETH_TYPE_IPV6); case MFP_VLAN_VID: return is_vlan(flow, wc); case MFP_MPLS: - return eth_type_mpls(flow->dl_type); + return eth_type_mpls(dl_type); case MFP_IP_ANY: return is_ip_any(flow); case MFP_CT_VALID: @@ -476,6 +480,7 @@ mf_is_value_valid(const struct mf_field *mf, const union mf_value *value) switch (mf->id) { case MFF_DP_HASH: case MFF_RECIRC_ID: + case MFF_PACKET_TYPE: case MFF_CONJ_ID: case MFF_TUN_ID: case MFF_TUN_SRC: @@ -600,6 +605,9 @@ mf_get_value(const struct mf_field *mf, const struct flow *flow, case MFF_RECIRC_ID: value->be32 = htonl(flow->recirc_id); break; + case MFF_PACKET_TYPE: + value->be32 = flow->packet_type; + break; case MFF_CONJ_ID: value->be32 = htonl(flow->conj_id); break; @@ -883,6 +891,9 @@ mf_set_value(const struct mf_field *mf, case MFF_RECIRC_ID: match_set_recirc_id(match, ntohl(value->be32)); break; + case MFF_PACKET_TYPE: + match_set_packet_type(match, value->be32); + break; case MFF_CONJ_ID: match_set_conj_id(match, ntohl(value->be32)); break; @@ -1248,6 +1259,9 @@ mf_set_flow_value(const struct mf_field *mf, case MFF_RECIRC_ID: flow->recirc_id = ntohl(value->be32); break; + case MFF_PACKET_TYPE: + flow->packet_type = value->be32; + break; case MFF_CONJ_ID: flow->conj_id = ntohl(value->be32); break; @@ -1292,7 +1306,6 @@ mf_set_flow_value(const struct mf_field *mf, case MFF_IN_PORT: flow->in_port.ofp_port = u16_to_ofp(ntohs(value->be16)); break; - case MFF_IN_PORT_OXM: ofputil_port_from_ofp11(value->be32, &flow->in_port.ofp_port); break; @@ -1574,6 +1587,7 @@ mf_is_pipeline_field(const struct mf_field *mf) CASE_MFF_REGS: CASE_MFF_XREGS: CASE_MFF_XXREGS: + case MFF_PACKET_TYPE: return true; case MFF_DP_HASH: @@ -1688,6 +1702,10 @@ mf_set_wild(const struct mf_field *mf, struct match *match, char **err_str) match->flow.recirc_id = 0; match->wc.masks.recirc_id = 0; break; + case MFF_PACKET_TYPE: + match->flow.packet_type = 0; + match->wc.masks.packet_type = 0; + break; case MFF_CONJ_ID: match->flow.conj_id = 0; match->wc.masks.conj_id = 0; @@ -2021,6 +2039,7 @@ mf_set(const struct mf_field *mf, case MFF_CT_TP_SRC: case MFF_CT_TP_DST: case MFF_RECIRC_ID: + case MFF_PACKET_TYPE: case MFF_CONJ_ID: case MFF_IN_PORT: case MFF_IN_PORT_OXM: @@ -2386,6 +2405,44 @@ syntax_error: } static char * +mf_from_packet_type_string(const char *s, ovs_be32 *packet_type) +{ + char *tail; + const char *err_str = ""; + int err; + + if (*s != '(') { + err_str = "missing '('"; + goto syntax_error; + } + s++; + err = parse_int_string(s, (uint8_t *)packet_type, 2, &tail); + if (err) { + err_str = "ns"; + goto syntax_error; + } + if (*tail != ',') { + err_str = "missing ','"; + goto syntax_error; + } + s = tail + 1; + err = parse_int_string(s, ((uint8_t *)packet_type) + 2, 2, &tail); + if (err) { + err_str = "ns_type"; + goto syntax_error; + } + if (*tail != ')') { + err_str = "missing ')'"; + goto syntax_error; + } + + return NULL; + +syntax_error: + return xasprintf("%s: bad syntax for packet type %s", s, err_str); +} + +static char * mf_from_ethernet_string(const struct mf_field *mf, const char *s, struct eth_addr *mac, struct eth_addr *mask) { @@ -2623,6 +2680,12 @@ mf_parse(const struct mf_field *mf, const char *s, error = mf_from_tcp_flags_string(s, &value->be16, &mask->be16); break; + case MFS_PACKET_TYPE: + ovs_assert(mf->n_bytes == sizeof(ovs_be32)); + error = mf_from_packet_type_string(s, &value->be32); + mask->be32 = OVS_BE32_MAX; + break; + default: OVS_NOT_REACHED(); } @@ -2717,6 +2780,12 @@ mf_format_ct_state_string(ovs_be32 value, ovs_be32 mask, struct ds *s) ntohl(mask), UINT16_MAX); } +static void +mf_format_packet_type_string(ovs_be32 value, ovs_be32 mask, struct ds *s) +{ + format_packet_type_masked(s, value, mask); +} + /* Appends to 's' a string representation of field 'mf' whose value is in * 'value' and 'mask'. 'mask' may be NULL to indicate an exact match. */ void @@ -2785,6 +2854,11 @@ mf_format(const struct mf_field *mf, mask ? mask->be16 : OVS_BE16_MAX, s); break; + case MFS_PACKET_TYPE: + mf_format_packet_type_string(value->be32, + mask ? mask->be32 : OVS_BE32_MAX, s); + break; + default: OVS_NOT_REACHED(); } diff --git a/lib/meta-flow.xml b/lib/meta-flow.xml index 5efd43100..856e1ba8c 100644 --- a/lib/meta-flow.xml +++ b/lib/meta-flow.xml @@ -22,26 +22,38 @@ </p> <p> - Some data fields, called <dfn>root fields</dfn>, are always present as a - consequence of the basic networking technology in use. The Ethernet header - fields are root fields in current versions of Open vSwitch, though future - versions might support other roots. (Currently, to support LISP tunnels, - which do not encapsulate an Ethernet header, Open vSwitch synthesizes one.) + Data fields that are always present as a consequence of the basic + networking technology in use are called called <dfn>root fields</dfn>. + Open vSwitch 2.7 and earlier considered Ethernet fields to be root fields, + and this remains the default mode of operation for Open vSwitch bridges. + In this mode, when a packet is received from a non-Ethernet interfaces, + such as a layer-3 LISP or GRE tunnel, Open vSwitch force-fits it to this + Ethernet-centric point of view by pretending that an Ethernet header is + present whose Ethernet type that indicates the packet's actual type (and + whose source and destination addresses are all-zero). </p> - <!-- future directions: EXT-112 --> <p> - Other data fields are not always present. A packet contains ARP fields, - for example, only when its Ethernet header indicates the Ethertype for ARP, + Open vSwitch 2.8 and later supports the ``packet type-aware pipeline'' + concept introduced in OpenFlow 1.5. A bridge configured to be packet + type-aware can handle packets of multiple networking technologies, such as + Ethernet, IP, ARP, MPLS, or NSH in parallel. Such a bridge does not have + any root fields. + </p> + + <p> + Non-root data fields are not always present. A packet contains ARP + fields, for example, only when its packet type is ARP or when it is an + Ethernet packet whose Ethernet header indicates the Ethertype for ARP, 0x0806. In this documentation, we say that a field is <dfn>applicable</dfn> when it is present in a packet, and <dfn>inapplicable</dfn> when it is not. (These are not standard terms.) We refer to the conditions that determine whether a field is applicable as <dfn>prerequisites</dfn>. Some VLAN-related fields are a special case: - these fields are always applicable, but have a designated value or bit that - indicates whether a VLAN header is present, with the remaining values or - bits indicating the VLAN header's content (if it is present). <!-- XXX - also ethertype --> + these fields are always applicable for Ethernet packets, but have a + designated value or bit that indicates whether a VLAN header is present, + with the remaining values or bits indicating the VLAN header's content + (if it is present). <!-- XXX also ethertype --> </p> <p> @@ -51,7 +63,8 @@ example, one may match (see <cite>Matching</cite>, below) a given field only if the match includes the field's prerequisite, e.g. matching an ARP field is only allowed if one also matches on - Ethertype 0x0806. + Ethertype 0x0806 or the <ref field="packet_type"/> for ARP in a packet + type-aware bridge. </p> <p> @@ -301,6 +314,8 @@ tcp,tp_src=0x07c0/0xfff0 </p> <dl> + <dt><code>eth</code></dt> + <dd><code>packet_type=(0,0)</code> (Open vSwitch 2.8 and later)</dd> <dt><code>ip</code></dt> <dd><code>eth_type=0x0800</code></dd> <dt><code>ipv6</code></dt> <dd><code>eth_type=0x86dd</code></dd> <dt><code>icmp</code></dt> <dd><code>eth_type=0x0800,ip_proto=1</code></dd> @@ -317,6 +332,15 @@ tcp,tp_src=0x07c0/0xfff0 <dt><code>mplsm</code></dt> <dd><code>eth_type=0x8848</code></dd> </dl> + <p> + These shorthand notations continue to work in packet type-aware bridges. + The absence of a packet_type match implies + <code>packet_type=ethernet</code>, so that shorthands match on Ethernet + packets with the implied eth_type. Please note that the shorthand + <code>ip</code> does not match packets of packet_type (1,0x800) for IPv4. + </p> + + <h2>Evolution of OpenFlow Fields</h2> <p> @@ -802,20 +826,12 @@ tcp,tp_src=0x07c0/0xfff0 </p> <ul> + <li>Packet type.</li> <li>TCP flags.</li> <li>Packet registers.</li> <li>The output port in the OpenFlow action set.</li> </ul> - <p> - OpenFlow 1.5 also added OXMs for the following fields not documented here - and not yet implemented by Open vSwitch: - </p> - - <ul> - <li>Packet type.</li> - </ul> - <h1>Fields Reference</h1> <p> @@ -2273,6 +2289,102 @@ actions=clone(load:0->NXM_OF_IN_PORT[],output:123) <field id="MFF_DP_HASH" title="Datapath Hash" internal="yes"/> <field id="MFF_RECIRC_ID" title="Datapath Recirculation ID" internal="yes"/> + + <field id="MFF_PACKET_TYPE" title="Packet Type"> + <p> + The type of the packet in the format specified in OpenFlow 1.5: + </p> + + <diagram> + <header name="Packet type"> + <bits name="ns" above="16" width=".75"/> + <bits name="ns_type" above="16" width=".75"/> + </header> + <dots/> + </diagram> + + <p> + The upper 16 bits, <var>ns</var>, are a namespace. The meaning of + <var>ns_type</var> depends on the namespace. The packet type field is + specified and displayed in the format + <code>(<var>ns</var>,<var>ns_type</var>)</code>. + </p> + + <p> + Open vSwitch currently supports the following classes of packet types + for matching: + <dl> + <dt><code>(0,0)</code></dt> + <dd>Ethernet.</dd> + <dt><code>(1,<var>ethertype</var>)</code></dt> + <dd> + <p> + The specified <var>ethertype</var>. Open vSwitch can forward + packets with any <var>ethertype</var>, but it can only match on + and process data fields for the following supported packet types: + </p> + <dl> + <dt><code>(1,0x800)</code></dt> <dd>IPv4</dd> + <dt><code>(1,0x806)</code></dt> <dd>ARP</dd> + <dt><code>(1,0x86dd)</code></dt> <dd>IPv6</dd> + <dt><code>(1,0x8847)</code></dt> <dd>MPLS</dd> + <dt><code>(1,0x8848)</code></dt> <dd>MPLS multicast</dd> + <dt><code>(1,0x8035)</code></dt> <dd>RARP</dd> + <dt><code>(1,0x894f)</code></dt> <dd>NSH</dd> + </dl> + </dd> + </dl> + </p> + + <p> + Consider the distinction between a packet with <code>packet_type=(0,0), + dl_type=0x800</code> and one with <code>packet_type=(1,0x800)</code>. + The former is an Ethernet frame that contains an IPv4 packet, like + this: + </p> + + <diagram> + <header name="Ethernet"> + <bits name="dst" above="48" width="0.4"/> + <bits name="src" above="48" width="0.4"/> + <bits name="type" above="16" below="0x800" width="0.4"/> + </header> + <header name="IPv4"> + <bits name="..." width="0.4"/> + <bits name="proto" above="8" width="0.4"/> + <bits name="src" above="32" width="0.4"/> + <bits name="dst" above="32" width="0.4"/> + </header> + <dots/> + </diagram> + + <p> + The latter is an IPv4 packet not encapsulated inside any outer frame, + like this: + </p> + + <diagram> + <header name="IPv4"> + <bits name="..." width="0.4"/> + <bits name="proto" above="8" width="0.4"/> + <bits name="src" above="32" width="0.4"/> + <bits name="dst" above="32" width="0.4"/> + </header> + <dots/> + </diagram> + + <p> + Matching on <ref field="packet_type"/> is a pre-requisite for matching + on any data field, but for backward compatibility, when a match on a + data field is present without a <ref field="packet_type"/> match, Open + vSwitch acts as though a match on <code>(0,0)</code> (Ethernet) had + been supplied. Similarly, when Open vSwitch sends flow match + information to a controller, e.g. in a reply to a request to dump the + flow table, Open vSwitch omits a match on packet type (0,0) if it would + be implied by a data field match. + </p> + </field> + </group> <group title="Connection Tracking"> diff --git a/lib/nx-match.c b/lib/nx-match.c index 6278b7758..cb0cad845 100644 --- a/lib/nx-match.c +++ b/lib/nx-match.c @@ -561,6 +561,8 @@ nx_pull_raw(const uint8_t *p, unsigned int match_len, bool strict, free(err_str); return OFPERR_OFPBMC_BAD_VALUE; } + + match_add_ethernet_prereq(match, field); } if (error) { @@ -775,6 +777,7 @@ oxm_pull_field_array(const void *fields_data, size_t fields_len, struct nxm_put_ctx { struct ofpbuf *output; + bool implied_ethernet; }; void @@ -795,6 +798,9 @@ nxm_put__(struct nxm_put_ctx *ctx, const void *value, const void *mask, size_t n_bytes) { nxm_put_entry_raw(ctx->output, field, version, value, mask, n_bytes); + if (!ctx->implied_ethernet && mf_from_id(field)->prereqs != MFP_NONE) { + ctx->implied_ethernet = true; + } } static void @@ -904,8 +910,9 @@ nxm_put_ip(struct nxm_put_ctx *ctx, const struct match *match, enum ofp_version oxm) { const struct flow *flow = &match->flow; + ovs_be16 dl_type = get_dl_type(flow); - if (flow->dl_type == htons(ETH_TYPE_IP)) { + if (dl_type == htons(ETH_TYPE_IP)) { nxm_put_32m(ctx, MFF_IPV4_SRC, oxm, flow->nw_src, match->wc.masks.nw_src); nxm_put_32m(ctx, MFF_IPV4_DST, oxm, @@ -1014,12 +1021,19 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match, { const struct flow *flow = &match->flow; const size_t start_len = b->size; + ovs_be16 dl_type = get_dl_type(flow); int match_len; int i; BUILD_ASSERT_DECL(FLOW_WC_SEQ == 39); - struct nxm_put_ctx ctx = { .output = b }; + struct nxm_put_ctx ctx = { .output = b, .implied_ethernet = false }; + + /* OpenFlow Packet Type. Must be first. */ + if (match->wc.masks.packet_type && !match_has_default_packet_type(match)) { + nxm_put_32m(&ctx, MFF_PACKET_TYPE, oxm, flow->packet_type, + match->wc.masks.packet_type); + } /* Metadata. */ if (match->wc.masks.dp_hash) { @@ -1082,7 +1096,7 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match, } /* MPLS. */ - if (eth_type_mpls(flow->dl_type)) { + if (eth_type_mpls(dl_type)) { if (match->wc.masks.mpls_lse[0] & htonl(MPLS_TC_MASK)) { nxm_put_8(&ctx, MFF_MPLS_TC, oxm, mpls_lse_to_tc(flow->mpls_lse[0])); @@ -1102,8 +1116,8 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match, /* L3. */ if (is_ip_any(flow)) { nxm_put_ip(&ctx, match, oxm); - } else if (flow->dl_type == htons(ETH_TYPE_ARP) || - flow->dl_type == htons(ETH_TYPE_RARP)) { + } else if (dl_type == htons(ETH_TYPE_ARP) || + dl_type == htons(ETH_TYPE_RARP)) { /* ARP. */ if (match->wc.masks.nw_proto) { nxm_put_16(&ctx, MFF_ARP_OP, oxm, @@ -1198,6 +1212,16 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match, } } + if (match_has_default_packet_type(match) && !ctx.implied_ethernet) { + uint64_t pt_stub[16 / 8]; + struct ofpbuf pt; + ofpbuf_use_stack(&pt, pt_stub, sizeof pt_stub); + nxm_put_entry_raw(&pt, MFF_PACKET_TYPE, oxm, &flow->packet_type, + NULL, sizeof flow->packet_type); + + ofpbuf_insert(b, start_len, pt.data, pt.size); + } + match_len = b->size - start_len; return match_len; } diff --git a/lib/odp-util.c b/lib/odp-util.c index 6c2ab6cc5..f4c0b6650 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -2983,23 +2983,18 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, break; case OVS_KEY_ATTR_PACKET_TYPE: { - ovs_be32 packet_type = nl_attr_get_be32(a); - uint16_t ns = pt_ns(packet_type); - uint16_t ns_type = pt_ns_type(packet_type); + ovs_be32 value = nl_attr_get_be32(a); + ovs_be32 mask = ma ? nl_attr_get_be32(ma) : OVS_BE32_MAX; - if (!is_exact) { - ovs_be32 mask = nl_attr_get_be32(ma); - uint16_t mask_ns_type = pt_ns_type(mask); + ovs_be16 ns = htons(pt_ns(value)); + ovs_be16 ns_mask = htons(pt_ns(mask)); + format_be16(ds, "ns", ns, &ns_mask, verbose); - if (mask == 0) { - ds_put_format(ds, "ns=%u,id=*", ns); - } else { - ds_put_format(ds, "ns=%u,id=%#"PRIx16"/%#"PRIx16, - ns, ns_type, mask_ns_type); - } - } else { - ds_put_format(ds, "ns=%u,id=%#"PRIx16, ns, ns_type); - } + ovs_be16 ns_type = pt_ns_type_be(value); + ovs_be16 ns_type_mask = pt_ns_type_be(mask); + format_be16x(ds, "id", ns_type, &ns_type_mask, verbose); + + ds_chomp(ds, ','); break; } @@ -4340,6 +4335,15 @@ parse_odp_key_mask_attr(const char *s, const struct simap *port_names, SCAN_FIELD("tll=", eth, nd_tll); } SCAN_END(OVS_KEY_ATTR_ND); + struct packet_type { + ovs_be16 ns; + ovs_be16 id; + }; + SCAN_BEGIN("packet_type(", struct packet_type) { + SCAN_FIELD("ns=", be16, ns); + SCAN_FIELD("id=", be16, id); + } SCAN_END(OVS_KEY_ATTR_PACKET_TYPE); + /* Encap open-coded. */ if (!strncmp(s, "encap(", 6)) { const char *start = s; @@ -4527,9 +4531,7 @@ odp_flow_key_from_flow__(const struct odp_flow_key_parms *parms, nl_msg_put_odp_port(buf, OVS_KEY_ATTR_IN_PORT, data->in_port.odp_port); } - if (export_mask || flow->packet_type != htonl(PT_ETH)) { - nl_msg_put_be32(buf, OVS_KEY_ATTR_PACKET_TYPE, data->packet_type); - } + nl_msg_put_be32(buf, OVS_KEY_ATTR_PACKET_TYPE, data->packet_type); if (OVS_UNLIKELY(parms->probe)) { max_vlans = FLOW_MAX_VLAN_HEADERS; diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c index 2e4942262..8e2448b20 100644 --- a/lib/ofp-parse.c +++ b/lib/ofp-parse.c @@ -251,6 +251,7 @@ parse_field(const struct mf_field *mf, const char *s, error = mf_parse(mf, s, port_map, &value, &mask); if (!error) { *usable_protocols &= mf_set(mf, &value, &mask, match, &error); + match_add_ethernet_prereq(match, mf); } return error; } @@ -297,6 +298,8 @@ parse_subfield(const char *name, const char *str_value, struct match *match, bitwise_copy(&val, size, 0, &value, size, sf.ofs, sf.n_bits); bitwise_one ( &mask, size, sf.ofs, sf.n_bits); *usable_protocols &= mf_set(field, &value, &mask, match, &error); + + match_add_ethernet_prereq(match, sf.field); } return error; } @@ -416,6 +419,9 @@ parse_ofp_str__(struct ofputil_flow_mod *fm, int command, char *string, if (p->nw_proto) { match_set_nw_proto(&fm->match, p->nw_proto); } + match_set_default_packet_type(&fm->match); + } else if (!strcmp(name, "eth")) { + match_set_packet_type(&fm->match, htonl(PT_ETH)); } else if (fields & F_FLAGS && !strcmp(name, "send_flow_rem")) { fm->flags |= OFPUTIL_FF_SEND_FLOW_REM; } else if (fields & F_FLAGS && !strcmp(name, "check_overlap")) { @@ -517,6 +523,12 @@ parse_ofp_str__(struct ofputil_flow_mod *fm, int command, char *string, return error; } } + /* Copy ethertype to flow->dl_type for matches on packet_type + * (OFPHTN_ETHERTYPE, ethertype). */ + if (fm->match.wc.masks.packet_type == OVS_BE32_MAX && + pt_ns(fm->match.flow.packet_type) == OFPHTN_ETHERTYPE) { + fm->match.flow.dl_type = pt_ns_type_be(fm->match.flow.packet_type); + } /* Check for usable protocol interdependencies between match fields. */ if (fm->match.flow.dl_type == htons(ETH_TYPE_IPV6)) { const struct flow_wildcards *wc = &fm->match.wc; diff --git a/lib/ofp-util.c b/lib/ofp-util.c index da171cdf6..344b66a30 100644 --- a/lib/ofp-util.c +++ b/lib/ofp-util.c @@ -161,6 +161,19 @@ ofputil_match_from_ofp10_match(const struct ofp10_match *ofmatch, ofputil_wildcard_from_ofpfw10(ofpfw, &match->wc); memset(&match->tun_md, 0, sizeof match->tun_md); + /* If any fields, except in_port, are matched, then we also need to match + * on the Ethernet packet_type. */ + const uint32_t ofpfw_data_bits = (OFPFW10_NW_TOS | OFPFW10_NW_PROTO + | OFPFW10_TP_SRC | OFPFW10_TP_DST + | OFPFW10_DL_SRC | OFPFW10_DL_DST + | OFPFW10_DL_TYPE + | OFPFW10_DL_VLAN | OFPFW10_DL_VLAN_PCP); + if ((ofpfw & ofpfw_data_bits) != ofpfw_data_bits + || ofputil_wcbits_to_netmask(ofpfw >> OFPFW10_NW_SRC_SHIFT) + || ofputil_wcbits_to_netmask(ofpfw >> OFPFW10_NW_DST_SHIFT)) { + match_set_default_packet_type(match); + } + /* Initialize most of match->flow. */ match->flow.nw_src = ofmatch->nw_src; match->flow.nw_dst = ofmatch->nw_dst; @@ -328,6 +341,7 @@ ofputil_match_from_ofp11_match(const struct ofp11_match *ofmatch, bool ipv4, arp, rarp; match_init_catchall(match); + match->flow.tunnel.metadata.tab = NULL; if (!(wc & OFPFW11_IN_PORT)) { ofp_port_t ofp_port; @@ -340,10 +354,13 @@ ofputil_match_from_ofp11_match(const struct ofp11_match *ofmatch, match_set_in_port(match, ofp_port); } - match_set_dl_src_masked(match, ofmatch->dl_src, - eth_addr_invert(ofmatch->dl_src_mask)); - match_set_dl_dst_masked(match, ofmatch->dl_dst, - eth_addr_invert(ofmatch->dl_dst_mask)); + struct eth_addr dl_src_mask = eth_addr_invert(ofmatch->dl_src_mask); + struct eth_addr dl_dst_mask = eth_addr_invert(ofmatch->dl_dst_mask); + if (!eth_addr_is_zero(dl_src_mask) || !eth_addr_is_zero(dl_dst_mask)) { + match_set_dl_src_masked(match, ofmatch->dl_src, dl_src_mask); + match_set_dl_dst_masked(match, ofmatch->dl_dst, dl_dst_mask); + match_set_default_packet_type(match); + } if (!(wc & OFPFW11_DL_VLAN)) { if (ofmatch->dl_vlan == htons(OFPVID11_NONE)) { @@ -375,11 +392,13 @@ ofputil_match_from_ofp11_match(const struct ofp11_match *ofmatch, } } } + match_set_default_packet_type(match); } if (!(wc & OFPFW11_DL_TYPE)) { match_set_dl_type(match, ofputil_dl_type_from_openflow(ofmatch->dl_type)); + match_set_default_packet_type(match); } ipv4 = match->flow.dl_type == htons(ETH_TYPE_IP); @@ -7680,21 +7699,35 @@ ofputil_normalize_match__(struct match *match, bool may_log) MAY_IPV6 = 1 << 6, /* ipv6_src, ipv6_dst, ipv6_label */ MAY_ND_TARGET = 1 << 7, /* nd_target */ MAY_MPLS = 1 << 8, /* mpls label and tc */ + MAY_ETHER = 1 << 9, /* dl_src, dl_dst */ } may_match; - struct flow_wildcards wc; + struct flow_wildcards wc = match->wc; + ovs_be16 dl_type; /* Figure out what fields may be matched. */ - if (match->flow.dl_type == htons(ETH_TYPE_IP)) { - may_match = MAY_NW_PROTO | MAY_IPVx | MAY_NW_ADDR; + /* Check the packet_type first and extract dl_type. */ + if (wc.masks.packet_type == 0 || match_has_default_packet_type(match)) { + may_match = MAY_ETHER; + dl_type = match->flow.dl_type; + } else if (wc.masks.packet_type == OVS_BE32_MAX && + pt_ns(match->flow.packet_type) == OFPHTN_ETHERTYPE) { + may_match = 0; + dl_type = pt_ns_type_be(match->flow.packet_type); + } else { + may_match = 0; + dl_type = 0; + } + if (dl_type == htons(ETH_TYPE_IP)) { + may_match |= MAY_NW_PROTO | MAY_IPVx | MAY_NW_ADDR; if (match->flow.nw_proto == IPPROTO_TCP || match->flow.nw_proto == IPPROTO_UDP || match->flow.nw_proto == IPPROTO_SCTP || match->flow.nw_proto == IPPROTO_ICMP) { may_match |= MAY_TP_ADDR; } - } else if (match->flow.dl_type == htons(ETH_TYPE_IPV6)) { - may_match = MAY_NW_PROTO | MAY_IPVx | MAY_IPV6; + } else if (dl_type == htons(ETH_TYPE_IPV6)) { + may_match |= MAY_NW_PROTO | MAY_IPVx | MAY_IPV6; if (match->flow.nw_proto == IPPROTO_TCP || match->flow.nw_proto == IPPROTO_UDP || match->flow.nw_proto == IPPROTO_SCTP) { @@ -7707,17 +7740,17 @@ ofputil_normalize_match__(struct match *match, bool may_log) may_match |= MAY_ND_TARGET | MAY_ARP_THA; } } - } else if (match->flow.dl_type == htons(ETH_TYPE_ARP) || - match->flow.dl_type == htons(ETH_TYPE_RARP)) { - may_match = MAY_NW_PROTO | MAY_NW_ADDR | MAY_ARP_SHA | MAY_ARP_THA; - } else if (eth_type_mpls(match->flow.dl_type)) { - may_match = MAY_MPLS; - } else { - may_match = 0; + } else if (dl_type == htons(ETH_TYPE_ARP) || + dl_type == htons(ETH_TYPE_RARP)) { + may_match |= MAY_NW_PROTO | MAY_NW_ADDR | MAY_ARP_SHA | MAY_ARP_THA; + } else if (eth_type_mpls(dl_type)) { + may_match |= MAY_MPLS; } /* Clear the fields that may not be matched. */ - wc = match->wc; + if (!(may_match & MAY_ETHER)) { + wc.masks.dl_src = wc.masks.dl_dst = eth_addr_zero; + } if (!(may_match & MAY_NW_ADDR)) { wc.masks.nw_src = wc.masks.nw_dst = htonl(0); } |