diff options
-rw-r--r-- | include/openvswitch/ofp-actions.h | 4 | ||||
-rw-r--r-- | lib/ofp-actions.c | 82 | ||||
-rw-r--r-- | ofproto/ofproto-dpif-xlate.c | 75 | ||||
-rw-r--r-- | tests/ofp-actions.at | 6 | ||||
-rw-r--r-- | tests/ofproto-dpif.at | 89 | ||||
-rw-r--r-- | tests/system-traffic.at | 122 | ||||
-rw-r--r-- | utilities/ovs-ofctl.8.in | 19 |
7 files changed, 317 insertions, 80 deletions
diff --git a/include/openvswitch/ofp-actions.h b/include/openvswitch/ofp-actions.h index ef4598b85..7abebb0a5 100644 --- a/include/openvswitch/ofp-actions.h +++ b/include/openvswitch/ofp-actions.h @@ -641,11 +641,13 @@ struct ofpact_nat { /* OFPACT_RESUBMIT. * - * Used for NXAST_RESUBMIT, NXAST_RESUBMIT_TABLE. */ + * Used for NXAST_RESUBMIT, NXAST_RESUBMIT_TABLE, NXAST_RESUBMIT_TABLE_CT. */ struct ofpact_resubmit { struct ofpact ofpact; ofp_port_t in_port; uint8_t table_id; + bool with_ct_orig; /* Resubmit with Conntrack original direction tuple + * fields in place of IP header fields. */ }; /* Bits for 'flags' in struct nx_action_learn. diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c index 83a923920..9c7dd7ff9 100644 --- a/lib/ofp-actions.c +++ b/lib/ofp-actions.c @@ -265,6 +265,8 @@ enum ofp_raw_action_type { NXAST_RAW_RESUBMIT, /* NX1.0+(14): struct nx_action_resubmit. */ NXAST_RAW_RESUBMIT_TABLE, + /* NX1.0+(44): struct nx_action_resubmit. */ + NXAST_RAW_RESUBMIT_TABLE_CT, /* NX1.0+(2): uint32_t. */ NXAST_RAW_SET_TUNNEL, @@ -3852,19 +3854,20 @@ format_FIN_TIMEOUT(const struct ofpact_fin_timeout *a, struct ds *s) ds_put_format(s, "%s)%s", colors.paren, colors.end); } -/* Action structures for NXAST_RESUBMIT and NXAST_RESUBMIT_TABLE. +/* Action structures for NXAST_RESUBMIT, NXAST_RESUBMIT_TABLE, and + * NXAST_RESUBMIT_TABLE_CT. * * These actions search one of the switch's flow tables: * - * - For NXAST_RESUBMIT_TABLE only, if the 'table' member is not 255, then - * it specifies the table to search. + * - For NXAST_RESUBMIT_TABLE and NXAST_RESUBMIT_TABLE_CT, if the + * 'table' member is not 255, then it specifies the table to search. * - * - Otherwise (for NXAST_RESUBMIT_TABLE with a 'table' of 255, or for - * NXAST_RESUBMIT regardless of 'table'), it searches the current flow - * table, that is, the OpenFlow flow table that contains the flow from - * which this action was obtained. If this action did not come from a - * flow table (e.g. it came from an OFPT_PACKET_OUT message), then table 0 - * is the current table. + * - Otherwise (for NXAST_RESUBMIT_TABLE or NXAST_RESUBMIT_TABLE_CT with a + * 'table' of 255, or for NXAST_RESUBMIT regardless of 'table'), it + * searches the current flow table, that is, the OpenFlow flow table that + * contains the flow from which this action was obtained. If this action + * did not come from a flow table (e.g. it came from an OFPT_PACKET_OUT + * message), then table 0 is the current table. * * The flow table lookup uses a flow that may be slightly modified from the * original lookup: @@ -3872,9 +3875,12 @@ format_FIN_TIMEOUT(const struct ofpact_fin_timeout *a, struct ds *s) * - For NXAST_RESUBMIT, the 'in_port' member of struct nx_action_resubmit * is used as the flow's in_port. * - * - For NXAST_RESUBMIT_TABLE, if the 'in_port' member is not OFPP_IN_PORT, - * then its value is used as the flow's in_port. Otherwise, the original - * in_port is used. + * - For NXAST_RESUBMIT_TABLE and NXAST_RESUBMIT_TABLE_CT, if the 'in_port' + * member is not OFPP_IN_PORT, then its value is used as the flow's + * in_port. Otherwise, the original in_port is used. + * + * - For NXAST_RESUBMIT_TABLE_CT the Conntrack 5-tuple fields are used as + * the packets IP header fields during the lookup. * * - If actions that modify the flow (e.g. OFPAT_SET_VLAN_VID) precede the * resubmit action, then the flow is updated with the new values. @@ -3907,11 +3913,12 @@ format_FIN_TIMEOUT(const struct ofpact_fin_timeout *a, struct ds *s) * a total limit of 4,096 resubmits per flow translation (earlier versions * did not impose any total limit). * - * NXAST_RESUBMIT ignores 'table' and 'pad'. NXAST_RESUBMIT_TABLE requires - * 'pad' to be all-bits-zero. + * NXAST_RESUBMIT ignores 'table' and 'pad'. NXAST_RESUBMIT_TABLE and + * NXAST_RESUBMIT_TABLE_CT require 'pad' to be all-bits-zero. * * Open vSwitch 1.0.1 and earlier did not support recursion. Open vSwitch - * before 1.2.90 did not support NXAST_RESUBMIT_TABLE. + * before 1.2.90 did not support NXAST_RESUBMIT_TABLE. Open vSwitch before + * 2.8.0 did not support NXAST_RESUBMIT_TABLE_CT. */ struct nx_action_resubmit { ovs_be16 type; /* OFPAT_VENDOR. */ @@ -3956,6 +3963,21 @@ decode_NXAST_RAW_RESUBMIT_TABLE(const struct nx_action_resubmit *nar, return 0; } +static enum ofperr +decode_NXAST_RAW_RESUBMIT_TABLE_CT(const struct nx_action_resubmit *nar, + enum ofp_version ofp_version OVS_UNUSED, + struct ofpbuf *out) +{ + enum ofperr error = decode_NXAST_RAW_RESUBMIT_TABLE(nar, ofp_version, out); + if (error) { + return error; + } + struct ofpact_resubmit *resubmit = out->header; + resubmit->ofpact.raw = NXAST_RAW_RESUBMIT_TABLE_CT; + resubmit->with_ct_orig = true; + return 0; +} + static void encode_RESUBMIT(const struct ofpact_resubmit *resubmit, enum ofp_version ofp_version OVS_UNUSED, struct ofpbuf *out) @@ -3963,10 +3985,12 @@ encode_RESUBMIT(const struct ofpact_resubmit *resubmit, uint16_t in_port = ofp_to_u16(resubmit->in_port); if (resubmit->table_id == 0xff - && resubmit->ofpact.raw != NXAST_RAW_RESUBMIT_TABLE) { + && resubmit->ofpact.raw == NXAST_RAW_RESUBMIT) { put_NXAST_RESUBMIT(out, in_port); } else { - struct nx_action_resubmit *nar = put_NXAST_RESUBMIT_TABLE(out); + struct nx_action_resubmit *nar; + nar = resubmit->with_ct_orig + ? put_NXAST_RESUBMIT_TABLE_CT(out) : put_NXAST_RESUBMIT_TABLE(out); nar->table = resubmit->table_id; nar->in_port = htons(in_port); } @@ -3977,7 +4001,7 @@ parse_RESUBMIT(char *arg, struct ofpbuf *ofpacts, enum ofputil_protocol *usable_protocols OVS_UNUSED) { struct ofpact_resubmit *resubmit; - char *in_port_s, *table_s; + char *in_port_s, *table_s, *ct_s; resubmit = ofpact_put_RESUBMIT(ofpacts); @@ -4004,6 +4028,16 @@ parse_RESUBMIT(char *arg, struct ofpbuf *ofpacts, resubmit->table_id = 255; } + ct_s = strsep(&arg, ","); + if (ct_s && ct_s[0]) { + if (strcmp(ct_s, "ct")) { + return xasprintf("%s: unknown parameter", ct_s); + } + resubmit->with_ct_orig = true; + } else { + resubmit->with_ct_orig = false; + } + if (resubmit->in_port == OFPP_IN_PORT && resubmit->table_id == 255) { return xstrdup("at least one \"in_port\" or \"table\" must be " "specified on resubmit"); @@ -4026,6 +4060,9 @@ format_RESUBMIT(const struct ofpact_resubmit *a, struct ds *s) if (a->table_id != 255) { ds_put_format(s, "%"PRIu8, a->table_id); } + if (a->with_ct_orig) { + ds_put_cstr(s, ",ct"); + } ds_put_format(s, "%s)%s", colors.paren, colors.end); } } @@ -7223,9 +7260,16 @@ ofpact_check__(enum ofputil_protocol *usable_protocols, struct ofpact *a, case OFPACT_SET_TUNNEL: case OFPACT_SET_QUEUE: case OFPACT_POP_QUEUE: - case OFPACT_RESUBMIT: return 0; + case OFPACT_RESUBMIT: { + struct ofpact_resubmit *resubmit = ofpact_get_RESUBMIT(a); + + if (resubmit->with_ct_orig && !is_ct_valid(flow, &match->wc, NULL)) { + return OFPERR_OFPBAC_MATCH_INCONSISTENT; + } + return 0; + } case OFPACT_FIN_TIMEOUT: if (flow->nw_proto != IPPROTO_TCP) { inconsistent_match(usable_protocols); diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 677789d50..5bbcc0dee 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -483,7 +483,7 @@ static void do_xlate_actions(const struct ofpact *, size_t ofpacts_len, static void xlate_normal(struct xlate_ctx *); static void xlate_table_action(struct xlate_ctx *, ofp_port_t in_port, uint8_t table_id, bool may_packet_in, - bool honor_table_miss); + bool honor_table_miss, bool with_ct_orig); static bool input_vid_is_valid(const struct xlate_ctx *, uint16_t vid, struct xbundle *); static uint16_t input_vid_to_vlan(const struct xbundle *, uint16_t vid); @@ -3204,7 +3204,8 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, if (!process_special(ctx, peer) && may_receive(peer, ctx)) { if (xport_stp_forward_state(peer) && xport_rstp_forward_state(peer)) { - xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true); + xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true, + false); if (!ctx->freezing) { xlate_action_set(ctx); } @@ -3218,7 +3219,8 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, size_t old_size = ctx->odp_actions->size; mirror_mask_t old_mirrors2 = ctx->mirrors; - xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true); + xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true, + false); ctx->mirrors = old_mirrors2; ctx->base_flow = old_base_flow; ctx->odp_actions->size = old_size; @@ -3473,8 +3475,52 @@ xlate_resubmit_resource_check(struct xlate_ctx *ctx) } static void +tuple_swap_flow(struct flow *flow, bool ipv4) +{ + uint8_t nw_proto = flow->nw_proto; + flow->nw_proto = flow->ct_nw_proto; + flow->ct_nw_proto = nw_proto; + + if (ipv4) { + ovs_be32 nw_src = flow->nw_src; + flow->nw_src = flow->ct_nw_src; + flow->ct_nw_src = nw_src; + + ovs_be32 nw_dst = flow->nw_dst; + flow->nw_dst = flow->ct_nw_dst; + flow->ct_nw_dst = nw_dst; + } else { + struct in6_addr ipv6_src = flow->ipv6_src; + flow->ipv6_src = flow->ct_ipv6_src; + flow->ct_ipv6_src = ipv6_src; + + struct in6_addr ipv6_dst = flow->ipv6_dst; + flow->ipv6_dst = flow->ct_ipv6_dst; + flow->ct_ipv6_dst = ipv6_dst; + } + + ovs_be16 tp_src = flow->tp_src; + flow->tp_src = flow->ct_tp_src; + flow->ct_tp_src = tp_src; + + ovs_be16 tp_dst = flow->tp_dst; + flow->tp_dst = flow->ct_tp_dst; + flow->ct_tp_dst = tp_dst; +} + +static void +tuple_swap(struct flow *flow, struct flow_wildcards *wc) +{ + bool ipv4 = (flow->dl_type == htons(ETH_TYPE_IP)); + + tuple_swap_flow(flow, ipv4); + tuple_swap_flow(&wc->masks, ipv4); +} + +static void xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id, - bool may_packet_in, bool honor_table_miss) + bool may_packet_in, bool honor_table_miss, + bool with_ct_orig) { /* Check if we need to recirculate before matching in a table. */ if (ctx->was_mpls) { @@ -3487,6 +3533,17 @@ xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id, ctx->table_id = table_id; + /* Swap packet fields with CT 5-tuple if requested. */ + if (with_ct_orig) { + /* Do not swap if there is no CT tuple, or if key is not IP. */ + if (ctx->xin->flow.ct_nw_proto == 0 || + !is_ip_any(&ctx->xin->flow)) { + xlate_report_error(ctx, + "resubmit(ct) with non-tracked or non-IP packet!"); + return; + } + tuple_swap(&ctx->xin->flow, ctx->wc); + } rule = rule_dpif_lookup_from_table(ctx->xbridge->ofproto, ctx->xin->tables_version, &ctx->xin->flow, ctx->wc, @@ -3494,6 +3551,10 @@ xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id, &ctx->table_id, in_port, may_packet_in, honor_table_miss, ctx->xin->xcache); + /* Swap back. */ + if (with_ct_orig) { + tuple_swap(&ctx->xin->flow, ctx->wc); + } if (rule) { /* Fill in the cache entry here instead of xlate_recursively @@ -3801,7 +3862,7 @@ xlate_ofpact_resubmit(struct xlate_ctx *ctx, } xlate_table_action(ctx, in_port, table_id, may_packet_in, - honor_table_miss); + honor_table_miss, resubmit->with_ct_orig); } static void @@ -4303,7 +4364,7 @@ xlate_output_action(struct xlate_ctx *ctx, break; case OFPP_TABLE: xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port, - 0, may_packet_in, true); + 0, may_packet_in, true, false); break; case OFPP_NORMAL: xlate_normal(ctx); @@ -5585,7 +5646,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, ovs_assert(ctx->table_id < ogt->table_id); xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port, - ogt->table_id, true, true); + ogt->table_id, true, true, false); break; } diff --git a/tests/ofp-actions.at b/tests/ofp-actions.at index 58d540afe..c185eaa4d 100644 --- a/tests/ofp-actions.at +++ b/tests/ofp-actions.at @@ -91,6 +91,9 @@ ffff 0028 00002320 000d 0001 0000 0000 00000002 0002 001f 00010004 00000000 dnl # actions=resubmit(10,5) ffff 0010 00002320 000e 000a 05 000000 +# actions=resubmit(10,5,ct) +ffff 0010 00002320 002c 000a 05 000000 + # actions=output:NXM_NX_REG1[5..10] ffff 0018 00002320 000f 0145 00010204 ffff 000000000000 @@ -407,6 +410,9 @@ ffff 0028 00002320 000d 0001 0000 0000 00000002 0002 001f 00010004 00000000 dnl # actions=resubmit(10,5) ffff 0010 00002320 000e 000a 05 000000 +# actions=resubmit(10,5,ct) +ffff 0010 00002320 002c 000a 05 000000 + # actions=output:NXM_NX_REG1[5..10] ffff 0018 00002320 000f 0145 00010204 ffff 000000000000 diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 6f55d433b..82c863ca0 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -8409,19 +8409,74 @@ add_of_ports br0 1 2 AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg vconn:info ofproto_dpif:info]) -dnl Allow new connections on p1->p2, but not on p2->p1. +dnl Allow new UDP connections on p1 for dst port 2, drop everything else. AT_DATA([flows.txt], [dnl dnl Table 0 +dnl Store zone in reg4 and packet direction in reg3 (IN=1, OUT=2). dnl table=0,priority=100,arp,action=normal -table=0,priority=10,in_port=1,udp,action=ct(commit,zone=0),controller -table=0,priority=10,in_port=2,udp,action=ct(table=1,zone=0) +table=0,priority=10,in_port=1,ip,action=set_field:1->reg4,set_field:1->reg3,ct(zone=NXM_NX_REG4[[0..15]],table=1) +table=0,priority=10,in_port=2,ip,action=set_field:1->reg4,set_field:2->reg3,ct(zone=NXM_NX_REG4[[0..15]],table=1) table=0,priority=1,action=drop dnl -dnl Table 1 -dnl -table=1,priority=10,in_port=2,ct_state=+trk+est-new,udp,action=controller -table=1,priority=1,action=drop +dnl Pass tracked traffic through ACL, drop everything else. +dnl Non-REPLY/RELATED packets get the ACL lookup with the packet headers +dnl in the actual packet direction in reg0 (IN=1, OUT=2). REPLY packets +dnl get the ACL lookup using the conntrack tuple and the inverted direction. +dnl RELATED packets get ACL lookup using the conntrack tuple in the direction +dnl of the master connection, as storted in ct_mark. +dnl +dnl Incoming non-related packet in the original direction (ACL IN) +table=1 reg3=1, ip, ct_state=-rel-rpl+trk-inv action=set_field:1->reg0,resubmit(,3),goto_table:5 +dnl Incoming non-related reply packet (CT ACL OUT) +table=1 reg3=1, ip, ct_state=-rel+rpl+trk-inv action=set_field:2->reg0,resubmit(,3,ct),goto_table:4 +dnl Outgoing non-related packet (ACL OUT) +table=1 reg3=2, ip, ct_state=-rel-rpl+trk-inv action=set_field:2->reg0,resubmit(,3),goto_table:5 +dnl Outgoing non-related reply packet (CT ACL IN) +table=1 reg3=2, ip, ct_state=-rel+rpl+trk-inv action=set_field:1->reg0,resubmit(,3,ct),goto_table:4 +dnl +dnl Related packet (CT ACL in the direction of the master connection.) +table=1 ip, ct_state=+rel+trk-inv, action=move:NXM_NX_CT_MARK[[]]->NXM_NX_REG0[[]],resubmit(,3,ct),goto_table:4 +dnl Drop everything else. +table=1 priority=0, action=drop +dnl +dnl "ACL table" +dnl +dnl Stateful accept (1->reg2) all incoming (reg0=1) IP connections with +dnl UDP destination port '2'. Store rule ID (1234) in reg1, verdict +dnl in reg2. +table=3 priority=10, reg0=1, udp, udp_dst=2 action=set_field:1234->reg1,set_field:1->reg2 +dnl Stateless drop (0->reg2) everything else in both directions. (Rule ID: 1235) +table=3 priority=0, action=set_field:1235->reg1,set_field:0->reg2 +dnl +dnl Re-process stateful traffic that was not accepted by a stateful rule as +dnl normal traffic in the current direction. This should also delete the +dnl now stale conntrack state, so that new state can be created in it's place. +dnl +dnl Stateful accepts go to next table. +table=4 priority=100 reg2=1, action=goto_table:5 +dnl Everything else is reprocessed disregarding the CT state, using the actual +dnl packet direction. +table=4 priority=0 action=move:NXM_NX_REG3[[]]->NXM_NX_REG0[[]],resubmit(,3),goto_table:5 +dnl +dnl "ACL verdict processing table." +dnl +dnl Handle stateful (reg2=1) / stateless (reg2=2) accepts and drops (reg2=0) +dnl +dnl Drop all non-accepted packets. +table=5 reg2=0 priority=1000 action=drop +dnl Commit new non-related IP connections. +table=5 priority=10 reg2=1 ct_state=+new-rel, ip, action=ct(zone=NXM_NX_REG4[[0..15]],commit,exec(move:NXM_NX_REG3[[0..31]]->NXM_NX_CT_MARK[[0..31]],move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6 +dnl Commit new related connections in either direction, which inherit the mark +dnl (the direction of the original direction master tuple) from the master +dnl connection. +table=5 priority=10 reg2=1 ct_state=+new+rel, ip, action=ct(zone=NXM_NX_REG4[[0..15]],commit,exec(move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6 +dnl Forward everything else, including stateless accepts. +table=5 priority=0 action=goto_table:6 +dnl +dnl "Forwarding table" +dnl +table=6 action=controller ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) @@ -8441,37 +8496,33 @@ OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 4]) OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit]) dnl Check this output. We only see the latter two packets, not the first. -dnl Note that the first packet doesn't have the ct_state bits set. This -dnl happens because the ct_state field is available only after recirc. AT_CHECK([cat ofctl_monitor.log], [0], [dnl -NXT_PACKET_IN (xid=0x0): cookie=0x0 total_len=42 in_port=1 (via action) data_len=42 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=6 cookie=0x0 total_len=42 ct_state=new|trk,ct_zone=1,ct_mark=0x1,ct_label=0x4d2000000000000000000000000,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2,reg0=0x1,reg1=0x4d2,reg2=0x1,reg3=0x1,reg4=0x1,in_port=1 (via action) data_len=42 (unbuffered) udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_src=10.1.1.1,nw_dst=10.1.1.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=1,tp_dst=2 udp_csum:e9d6 dnl -NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x0 total_len=42 ct_state=est|rpl|trk,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2,in_port=2 (via action) data_len=42 (unbuffered) +NXT_PACKET_IN (xid=0x0): table_id=6 cookie=0x0 total_len=42 ct_state=est|rpl|trk,ct_zone=1,ct_mark=0x1,ct_label=0x4d2000000000000000000000000,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2,reg0=0x1,reg1=0x4d2,reg2=0x1,reg3=0x2,reg4=0x1,in_port=2 (via action) data_len=42 (unbuffered) udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,nw_src=10.1.1.2,nw_dst=10.1.1.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=2,tp_dst=1 udp_csum:e9d6 ]) AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl -P nxt_packet_in --detach --no-chdir --pidfile 2> ofctl_monitor.log]) dnl OK, now start a second connection from port 1 -AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,ttl=64,frag=no),udp(src=3,dst=4)']) +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,ttl=64,frag=no),udp(src=3,dst=2)']) dnl Now try a reply from port 2. -AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:0a,dst=50:54:00:00:00:09),eth_type(0x0800),ipv4(src=10.1.1.2,dst=10.1.1.1,proto=17,tos=0,ttl=64,frag=no),udp(src=4,dst=3)']) +AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:0a,dst=50:54:00:00:00:09),eth_type(0x0800),ipv4(src=10.1.1.2,dst=10.1.1.1,proto=17,tos=0,ttl=64,frag=no),udp(src=2,dst=3)']) OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 4]) OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit]) dnl Check this output. We should see both packets -dnl Note that the first packet doesn't have the ct_state bits set. This -dnl happens because the ct_state field is available only after recirc. AT_CHECK([cat ofctl_monitor.log], [0], [dnl -NXT_PACKET_IN (xid=0x0): cookie=0x0 total_len=42 in_port=1 (via action) data_len=42 (unbuffered) -udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_src=10.1.1.1,nw_dst=10.1.1.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=3,tp_dst=4 udp_csum:e9d2 +NXT_PACKET_IN (xid=0x0): table_id=6 cookie=0x0 total_len=42 ct_state=new|trk,ct_zone=1,ct_mark=0x1,ct_label=0x4d2000000000000000000000000,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=3,ct_tp_dst=2,reg0=0x1,reg1=0x4d2,reg2=0x1,reg3=0x1,reg4=0x1,in_port=1 (via action) data_len=42 (unbuffered) +udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_src=10.1.1.1,nw_dst=10.1.1.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=3,tp_dst=2 udp_csum:e9d4 dnl -NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x0 total_len=42 ct_state=est|rpl|trk,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=3,ct_tp_dst=4,in_port=2 (via action) data_len=42 (unbuffered) -udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,nw_src=10.1.1.2,nw_dst=10.1.1.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=4,tp_dst=3 udp_csum:e9d2 +NXT_PACKET_IN (xid=0x0): table_id=6 cookie=0x0 total_len=42 ct_state=est|rpl|trk,ct_zone=1,ct_mark=0x1,ct_label=0x4d2000000000000000000000000,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=3,ct_tp_dst=2,reg0=0x1,reg1=0x4d2,reg2=0x1,reg3=0x2,reg4=0x1,in_port=2 (via action) data_len=42 (unbuffered) +udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,nw_src=10.1.1.2,nw_dst=10.1.1.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=2,tp_dst=3 udp_csum:e9d4 ]) OVS_VSWITCHD_STOP diff --git a/tests/system-traffic.at b/tests/system-traffic.at index 4ba4b089b..4fdd27e26 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -2645,7 +2645,7 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src= OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP -dnl CHECK_FTP_NAT(TITLE, IP_ADDR, FLOWS) +dnl CHECK_FTP_NAT(TITLE, IP_ADDR, FLOWS, CT_DUMP) dnl dnl Checks the implementation of conntrack with FTP ALGs in combination with dnl NAT, using the provided flow table. @@ -2664,8 +2664,7 @@ m4_define([CHECK_FTP_NAT], ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0. - AT_DATA([flows.txt], [$3 -]) + AT_DATA([flows.txt], [$3]) AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) @@ -2675,10 +2674,7 @@ m4_define([CHECK_FTP_NAT], NS_CHECK_EXEC([at_ns0], [wget ftp://10.1.1.2 -4 --no-passive-ftp -t 3 -T 1 --retry-connrefused -v --server-response --no-remove-listing -o wget0.log -d]) dnl Discards CLOSE_WAIT and CLOSING - AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl -tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=$2,sport=<cleared>,dport=<cleared>),protoinfo=(state=<cleared>),helper=ftp -tcp,orig=(src=10.1.1.2,dst=$2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),protoinfo=(state=<cleared>) -]) + AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [$4]) OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP]) @@ -2739,7 +2735,10 @@ dnl Swaps the fields of the ARP message to turn a query to a response. table=10 priority=100 arp xreg0=0 action=normal table=10 priority=10,arp,arp_op=1,action=load:2->OXM_OF_ARP_OP[[]],move:OXM_OF_ARP_SHA[[]]->OXM_OF_ARP_THA[[]],move:OXM_OF_PKT_REG0[[0..47]]->OXM_OF_ARP_SHA[[]],move:OXM_OF_ARP_SPA[[]]->OXM_OF_ARP_TPA[[]],move:NXM_NX_REG2[[]]->OXM_OF_ARP_SPA[[]],move:NXM_OF_ETH_SRC[[]]->NXM_OF_ETH_DST[[]],move:OXM_OF_PKT_REG0[[0..47]]->NXM_OF_ETH_SRC[[]],move:NXM_OF_IN_PORT[[]]->NXM_NX_REG3[[0..15]],load:0->NXM_OF_IN_PORT[[]],output:NXM_NX_REG3[[0..15]] table=10 priority=0 action=drop - ]) +], [dnl +tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=$2,sport=<cleared>,dport=<cleared>),protoinfo=(state=<cleared>),helper=ftp +tcp,orig=(src=10.1.1.2,dst=$2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),protoinfo=(state=<cleared>) +]) ]) dnl Check that ct(nat,table=foo) works without TCP sequence adjustment. @@ -2802,7 +2801,10 @@ dnl Swaps the fields of the ARP message to turn a query to a response. table=10 priority=100 arp xreg0=0 action=normal table=10 priority=10,arp,arp_op=1,action=load:2->OXM_OF_ARP_OP[[]],move:OXM_OF_ARP_SHA[[]]->OXM_OF_ARP_THA[[]],move:OXM_OF_PKT_REG0[[0..47]]->OXM_OF_ARP_SHA[[]],move:OXM_OF_ARP_SPA[[]]->OXM_OF_ARP_TPA[[]],move:NXM_NX_REG2[[]]->OXM_OF_ARP_SPA[[]],move:NXM_OF_ETH_SRC[[]]->NXM_OF_ETH_DST[[]],move:OXM_OF_PKT_REG0[[0..47]]->NXM_OF_ETH_SRC[[]],move:NXM_OF_IN_PORT[[]]->NXM_NX_REG3[[0..15]],load:0->NXM_OF_IN_PORT[[]],output:NXM_NX_REG3[[0..15]] table=10 priority=0 action=drop - ]) +], [dnl +tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=$2,sport=<cleared>,dport=<cleared>),protoinfo=(state=<cleared>),helper=ftp +tcp,orig=(src=10.1.1.2,dst=$2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),protoinfo=(state=<cleared>) +]) ]) dnl Check that ct(nat,table=foo) works without TCP sequence adjustment. @@ -2832,8 +2834,14 @@ dnl IP_ADDR must specify the NAT address in standard "10.1.1.x" format, dnl and IP_ADDR_AS_HEX must specify the same address as hex, eg 0x0a0101xx. m4_define([CHECK_FTP_NAT_ORIG_TUPLE], [dnl CHECK_FTP_NAT([orig tuple $1], [$2], [dnl -dnl track all IP traffic (includes nat and helper calls to non-NEW packets.) -table=0 ip, action=ct(nat,table=1) +dnl Store zone in reg4 and packet direction in reg3 (IN=1, OUT=2). +dnl NAT is only applied to OUT-direction packets, so that ACL +dnl processing can be done with non-NATted headers. +dnl +dnl Track all IP traffic in the IN-direction (IN from Port 1). +table=0 in_port=1, ip, action=set_field:1->reg4,set_field:1->reg3,ct(zone=NXM_NX_REG4[[0..15]],table=1) +dnl Track all IP traffic in the OUT-direction (OUT to the Port 1). +table=0 in_port=2, ip, action=set_field:1->reg4,set_field:2->reg3,ct(zone=NXM_NX_REG4[[0..15]],nat,table=1) dnl dnl ARP dnl @@ -2841,32 +2849,81 @@ table=0 priority=100 arp arp_op=1 action=move:OXM_OF_ARP_TPA[[]]->NXM_NX_REG2[[] table=0 priority=10 arp action=normal table=0 priority=0 action=drop dnl -dnl "ACL table" -dnl -dnl Allow all IP traffic with conntrack original direction IP source address -dnl '10.1.1.1'. This should allow also reply packets and related packets in -dnl both directions. -table=1 ip, ct_state=+trk-inv, ct_nw_src=10.1.1.1 action=goto_table:2 +dnl Pass tracked traffic through ACL, drop everything else. +dnl Non-REPLY/RELATED packets get the ACL lookup with the packet headers +dnl in the actual packet direction in reg0 (IN=1, OUT=2). REPLY packets +dnl get the ACL lookup using the conntrack tuple and the inverted direction. +dnl RELATED packets get ACL lookup using the conntrack tuple in the direction +dnl of the master connection, as storted in ct_mark. +dnl +dnl Incoming non-related packet in the original direction (ACL IN) +table=1 reg3=1, ip, ct_state=-rel-rpl+trk-inv action=set_field:1->reg0,resubmit(,3),goto_table:5 +dnl Incoming non-related reply packet (CT ACL OUT) +table=1 reg3=1, ip, ct_state=-rel+rpl+trk-inv action=set_field:2->reg0,resubmit(,3,ct),goto_table:4 +dnl Outgoing non-related packet (ACL OUT) +table=1 reg3=2, ip, ct_state=-rel-rpl+trk-inv action=set_field:2->reg0,resubmit(,3),goto_table:5 +dnl Outgoing non-related reply packet (CT ACL IN) +table=1 reg3=2, ip, ct_state=-rel+rpl+trk-inv action=set_field:1->reg0,resubmit(,3,ct),goto_table:4 +dnl +dnl Related packet (CT ACL in the direction of the master connection.) +table=1 ip, ct_state=+rel+trk-inv, action=move:NXM_NX_CT_MARK[[]]->NXM_NX_REG0[[]],resubmit(,3,ct),goto_table:4 dnl Drop everything else. table=1 priority=0, action=drop dnl -dnl "Conntrack commit table" +dnl "ACL table" +dnl +dnl Stateful accept (1->reg2) all incoming (reg0=1) IP connections with +dnl IP source address '10.1.1.1'. Store rule ID (1234) in reg1, verdict +dnl in reg2. +table=3 priority=10, reg0=1, ip, nw_src=10.1.1.1 action=set_field:1234->reg1,set_field:1->reg2 +dnl Stateless drop (0->reg2) everything else in both directions. (Rule ID: 1235) +table=3 priority=0, action=set_field:1235->reg1,set_field:0->reg2 +dnl +dnl Re-process stateful traffic that was not accepted by a stateful rule as +dnl normal traffic in the current direction. This should also delete the +dnl now stale conntrack state, so that new state can be created in it's place. +dnl +dnl Stateful accepts go to next table. +table=4 priority=100 reg2=1, action=goto_table:5 +dnl Everything else is reprocessed disregarding the CT state, using the actual +dnl packet direction. +table=4 priority=0 action=move:NXM_NX_REG3[[]]->NXM_NX_REG0[[]],resubmit(,3),goto_table:5 +dnl +dnl "ACL verdict processing table." +dnl +dnl Handle stateful (reg2=1) / stateless (reg2=2) accepts and drops (reg2=0) +dnl +dnl Drop all non-accepted packets. +table=5 reg2=0 priority=1000 action=drop dnl -dnl Commit new outgoing FTP control connections. Must match on 'tcp' when -dnl setting 'alg=ftp'. -table=2 in_port=1 priority=100 ct_state=+new, tcp, tp_dst=21, action=ct(alg=ftp,commit,nat(src=$2)),2 -dnl Commit other new outgoing IP connections. -table=2 in_port=1 priority=20 ct_state=+new, ip, action=ct(commit,nat(src=$2)),2 -dnl Commit incoming new IP connections. 'nat' may be needed for related -dnl connections, and is harmless for connections that do not need it. -table=2 in_port=2 priority=10 ct_state=+new, ip, action=ct(commit,nat),1 -dnl Just forward all the rest. -table=2 priority=0 in_port=1 action=2 -table=2 priority=0 in_port=2 action=1 +dnl Commit new incoming FTP control connections with SNAT range. Must match on +dnl 'tcp' when setting 'alg=ftp'. Store the directionality of non-related +dnl connections to ct_mark. Store the rule ID to labels. +table=5 priority=100 reg2=1 reg3=1 ct_state=+new-rel, tcp, tp_dst=21, action=ct(zone=NXM_NX_REG4[[0..15]],alg=ftp,commit,nat(src=$2),exec(move:NXM_NX_REG3[[0..31]]->NXM_NX_CT_MARK[[0..31]],move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6 +dnl Commit other new incoming non-related IP connections with SNAT range. +table=5 priority=10 reg2=1 reg3=1 ct_state=+new-rel, ip, action=ct(zone=NXM_NX_REG4[[0..15]],commit,nat(src=$2),exec(move:NXM_NX_REG3[[0..31]]->NXM_NX_CT_MARK[[0..31]],move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6 +dnl Commit non-related outgoing new IP connections with DNAT range. +dnl (This should not get any packets in this test.) +table=5 priority=10 reg2=1 reg3=2 ct_state=+new-rel, ip, action=ct(zone=NXM_NX_REG4[[0..15]],commit,nat(dst=$2),exec(move:NXM_NX_REG3[[0..31]]->NXM_NX_CT_MARK[[0..31]],move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6 +dnl Commit new related connections in either direction, which need 'nat' +dnl and which inherit the mark (the direction of the original direction +dnl master tuple) from the master connection. +table=5 priority=10 reg2=1 ct_state=+new+rel, ip, action=ct(zone=NXM_NX_REG4[[0..15]],commit,nat,exec(move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6 +dnl +dnl NAT incoming non-NEW packets. Outgoing packets were NATted in table 0. +dnl +table=5 priority=10 ct_state=-new+trk-inv reg3=1 ip, action=ct(zone=NXM_NX_REG4[[0..15]],nat),goto_table:6 +dnl Forward everything else, including stateless accepts. +table=5 priority=0 action=goto_table:6 +dnl +dnl "Forwarding table" +dnl +table=6 in_port=1 action=2 +table=6 in_port=2 action=1 dnl dnl MAC resolution table for IP in reg2, stores mac in OXM_OF_PKT_REG0 dnl -table=8,reg2=$3/0xffffffff,action=load:0x808888888888->OXM_OF_PKT_REG0[[]] +table=8,reg2=$3,action=load:0x808888888888->OXM_OF_PKT_REG0[[]] table=8,priority=0,action=load:0->OXM_OF_PKT_REG0[[]] dnl ARP responder mac filled in at OXM_OF_PKT_REG0, or 0 for normal action. dnl TPA IP in reg2. @@ -2874,7 +2931,10 @@ dnl Swaps the fields of the ARP message to turn a query to a response. table=10 priority=100 arp xreg0=0 action=normal table=10 priority=10,arp,arp_op=1,action=load:2->OXM_OF_ARP_OP[[]],move:OXM_OF_ARP_SHA[[]]->OXM_OF_ARP_THA[[]],move:OXM_OF_PKT_REG0[[0..47]]->OXM_OF_ARP_SHA[[]],move:OXM_OF_ARP_SPA[[]]->OXM_OF_ARP_TPA[[]],move:NXM_NX_REG2[[]]->OXM_OF_ARP_SPA[[]],move:NXM_OF_ETH_SRC[[]]->NXM_OF_ETH_DST[[]],move:OXM_OF_PKT_REG0[[0..47]]->NXM_OF_ETH_SRC[[]],move:NXM_OF_IN_PORT[[]]->NXM_NX_REG3[[0..15]],load:0->NXM_OF_IN_PORT[[]],output:NXM_NX_REG3[[0..15]] table=10 priority=0 action=drop - ]) +], [dnl +tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=$2,sport=<cleared>,dport=<cleared>),zone=1,mark=1,labels=0x4d2000000000000000000000000,protoinfo=(state=<cleared>),helper=ftp +tcp,orig=(src=10.1.1.2,dst=$2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),zone=1,mark=1,labels=0x4d2000000000000000000000000,protoinfo=(state=<cleared>) +]) ]) dnl Check that ct(nat,table=foo) works without TCP sequence adjustment with diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in index 2ee3193d4..d783f85c7 100644 --- a/utilities/ovs-ofctl.8.in +++ b/utilities/ovs-ofctl.8.in @@ -963,10 +963,22 @@ only known to be implemented by Open vSwitch: . .IP \fBresubmit\fB:\fIport\fR .IQ \fBresubmit\fB(\fR[\fIport\fR]\fB,\fR[\fItable\fR]\fB) +.IQ \fBresubmit\fB(\fR[\fIport\fR]\fB,\fR[\fItable\fR]\fB,ct) Re-searches this OpenFlow flow table (or the table whose number is specified by \fItable\fR) with the \fBin_port\fR field replaced by -\fIport\fR (if \fIport\fR is specified) and executes the actions -found, if any, in addition to any other actions in this flow entry. +\fIport\fR (if \fIport\fR is specified) and the packet 5-tuple fields +swapped with the corresponding conntrack original direction tuple +fields (if \fBct\fR is specified, see \fBct_nw_src\fR above), and +executes the actions found, if any, in addition to any other actions +in this flow entry. The \fBin_port\fR and swapped 5-tuple fields are +restored immediately after the search, before any actions are +executed. +.IP +The \fBct\fR option requires a valid connection tracking state as a +match prerequisite in the flow where this action is placed. Examples +of valid connection tracking state matches include +\fBct_state=+new\fR, \fBct_state=+est\fR, \fBct_state=+rel\fR, and +\fBct_state=+trk-inv\fR. .IP Recursive \fBresubmit\fR actions are obeyed up to implementation-defined limits: @@ -991,7 +1003,8 @@ exception: \fBresubmit\fR from table \fIx\fR to any table \fIy\fR > \fIx\fR does not count against the recursion limit. .RE .IP -Open vSwitch before 1.2.90 did not support \fItable\fR. +Open vSwitch before 1.2.90 did not support \fItable\fR. Open vSwitch +before 2.7 did not support \fBct\fR. . .IP \fBset_tunnel\fB:\fIid\fR .IQ \fBset_tunnel64\fB:\fIid\fR |