summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/openvswitch/ofp-actions.h4
-rw-r--r--lib/ofp-actions.c82
-rw-r--r--ofproto/ofproto-dpif-xlate.c75
-rw-r--r--tests/ofp-actions.at6
-rw-r--r--tests/ofproto-dpif.at89
-rw-r--r--tests/system-traffic.at122
-rw-r--r--utilities/ovs-ofctl.8.in19
7 files changed, 317 insertions, 80 deletions
diff --git a/include/openvswitch/ofp-actions.h b/include/openvswitch/ofp-actions.h
index ef4598b85..7abebb0a5 100644
--- a/include/openvswitch/ofp-actions.h
+++ b/include/openvswitch/ofp-actions.h
@@ -641,11 +641,13 @@ struct ofpact_nat {
/* OFPACT_RESUBMIT.
*
- * Used for NXAST_RESUBMIT, NXAST_RESUBMIT_TABLE. */
+ * Used for NXAST_RESUBMIT, NXAST_RESUBMIT_TABLE, NXAST_RESUBMIT_TABLE_CT. */
struct ofpact_resubmit {
struct ofpact ofpact;
ofp_port_t in_port;
uint8_t table_id;
+ bool with_ct_orig; /* Resubmit with Conntrack original direction tuple
+ * fields in place of IP header fields. */
};
/* Bits for 'flags' in struct nx_action_learn.
diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c
index 83a923920..9c7dd7ff9 100644
--- a/lib/ofp-actions.c
+++ b/lib/ofp-actions.c
@@ -265,6 +265,8 @@ enum ofp_raw_action_type {
NXAST_RAW_RESUBMIT,
/* NX1.0+(14): struct nx_action_resubmit. */
NXAST_RAW_RESUBMIT_TABLE,
+ /* NX1.0+(44): struct nx_action_resubmit. */
+ NXAST_RAW_RESUBMIT_TABLE_CT,
/* NX1.0+(2): uint32_t. */
NXAST_RAW_SET_TUNNEL,
@@ -3852,19 +3854,20 @@ format_FIN_TIMEOUT(const struct ofpact_fin_timeout *a, struct ds *s)
ds_put_format(s, "%s)%s", colors.paren, colors.end);
}
-/* Action structures for NXAST_RESUBMIT and NXAST_RESUBMIT_TABLE.
+/* Action structures for NXAST_RESUBMIT, NXAST_RESUBMIT_TABLE, and
+ * NXAST_RESUBMIT_TABLE_CT.
*
* These actions search one of the switch's flow tables:
*
- * - For NXAST_RESUBMIT_TABLE only, if the 'table' member is not 255, then
- * it specifies the table to search.
+ * - For NXAST_RESUBMIT_TABLE and NXAST_RESUBMIT_TABLE_CT, if the
+ * 'table' member is not 255, then it specifies the table to search.
*
- * - Otherwise (for NXAST_RESUBMIT_TABLE with a 'table' of 255, or for
- * NXAST_RESUBMIT regardless of 'table'), it searches the current flow
- * table, that is, the OpenFlow flow table that contains the flow from
- * which this action was obtained. If this action did not come from a
- * flow table (e.g. it came from an OFPT_PACKET_OUT message), then table 0
- * is the current table.
+ * - Otherwise (for NXAST_RESUBMIT_TABLE or NXAST_RESUBMIT_TABLE_CT with a
+ * 'table' of 255, or for NXAST_RESUBMIT regardless of 'table'), it
+ * searches the current flow table, that is, the OpenFlow flow table that
+ * contains the flow from which this action was obtained. If this action
+ * did not come from a flow table (e.g. it came from an OFPT_PACKET_OUT
+ * message), then table 0 is the current table.
*
* The flow table lookup uses a flow that may be slightly modified from the
* original lookup:
@@ -3872,9 +3875,12 @@ format_FIN_TIMEOUT(const struct ofpact_fin_timeout *a, struct ds *s)
* - For NXAST_RESUBMIT, the 'in_port' member of struct nx_action_resubmit
* is used as the flow's in_port.
*
- * - For NXAST_RESUBMIT_TABLE, if the 'in_port' member is not OFPP_IN_PORT,
- * then its value is used as the flow's in_port. Otherwise, the original
- * in_port is used.
+ * - For NXAST_RESUBMIT_TABLE and NXAST_RESUBMIT_TABLE_CT, if the 'in_port'
+ * member is not OFPP_IN_PORT, then its value is used as the flow's
+ * in_port. Otherwise, the original in_port is used.
+ *
+ * - For NXAST_RESUBMIT_TABLE_CT the Conntrack 5-tuple fields are used as
+ * the packets IP header fields during the lookup.
*
* - If actions that modify the flow (e.g. OFPAT_SET_VLAN_VID) precede the
* resubmit action, then the flow is updated with the new values.
@@ -3907,11 +3913,12 @@ format_FIN_TIMEOUT(const struct ofpact_fin_timeout *a, struct ds *s)
* a total limit of 4,096 resubmits per flow translation (earlier versions
* did not impose any total limit).
*
- * NXAST_RESUBMIT ignores 'table' and 'pad'. NXAST_RESUBMIT_TABLE requires
- * 'pad' to be all-bits-zero.
+ * NXAST_RESUBMIT ignores 'table' and 'pad'. NXAST_RESUBMIT_TABLE and
+ * NXAST_RESUBMIT_TABLE_CT require 'pad' to be all-bits-zero.
*
* Open vSwitch 1.0.1 and earlier did not support recursion. Open vSwitch
- * before 1.2.90 did not support NXAST_RESUBMIT_TABLE.
+ * before 1.2.90 did not support NXAST_RESUBMIT_TABLE. Open vSwitch before
+ * 2.8.0 did not support NXAST_RESUBMIT_TABLE_CT.
*/
struct nx_action_resubmit {
ovs_be16 type; /* OFPAT_VENDOR. */
@@ -3956,6 +3963,21 @@ decode_NXAST_RAW_RESUBMIT_TABLE(const struct nx_action_resubmit *nar,
return 0;
}
+static enum ofperr
+decode_NXAST_RAW_RESUBMIT_TABLE_CT(const struct nx_action_resubmit *nar,
+ enum ofp_version ofp_version OVS_UNUSED,
+ struct ofpbuf *out)
+{
+ enum ofperr error = decode_NXAST_RAW_RESUBMIT_TABLE(nar, ofp_version, out);
+ if (error) {
+ return error;
+ }
+ struct ofpact_resubmit *resubmit = out->header;
+ resubmit->ofpact.raw = NXAST_RAW_RESUBMIT_TABLE_CT;
+ resubmit->with_ct_orig = true;
+ return 0;
+}
+
static void
encode_RESUBMIT(const struct ofpact_resubmit *resubmit,
enum ofp_version ofp_version OVS_UNUSED, struct ofpbuf *out)
@@ -3963,10 +3985,12 @@ encode_RESUBMIT(const struct ofpact_resubmit *resubmit,
uint16_t in_port = ofp_to_u16(resubmit->in_port);
if (resubmit->table_id == 0xff
- && resubmit->ofpact.raw != NXAST_RAW_RESUBMIT_TABLE) {
+ && resubmit->ofpact.raw == NXAST_RAW_RESUBMIT) {
put_NXAST_RESUBMIT(out, in_port);
} else {
- struct nx_action_resubmit *nar = put_NXAST_RESUBMIT_TABLE(out);
+ struct nx_action_resubmit *nar;
+ nar = resubmit->with_ct_orig
+ ? put_NXAST_RESUBMIT_TABLE_CT(out) : put_NXAST_RESUBMIT_TABLE(out);
nar->table = resubmit->table_id;
nar->in_port = htons(in_port);
}
@@ -3977,7 +4001,7 @@ parse_RESUBMIT(char *arg, struct ofpbuf *ofpacts,
enum ofputil_protocol *usable_protocols OVS_UNUSED)
{
struct ofpact_resubmit *resubmit;
- char *in_port_s, *table_s;
+ char *in_port_s, *table_s, *ct_s;
resubmit = ofpact_put_RESUBMIT(ofpacts);
@@ -4004,6 +4028,16 @@ parse_RESUBMIT(char *arg, struct ofpbuf *ofpacts,
resubmit->table_id = 255;
}
+ ct_s = strsep(&arg, ",");
+ if (ct_s && ct_s[0]) {
+ if (strcmp(ct_s, "ct")) {
+ return xasprintf("%s: unknown parameter", ct_s);
+ }
+ resubmit->with_ct_orig = true;
+ } else {
+ resubmit->with_ct_orig = false;
+ }
+
if (resubmit->in_port == OFPP_IN_PORT && resubmit->table_id == 255) {
return xstrdup("at least one \"in_port\" or \"table\" must be "
"specified on resubmit");
@@ -4026,6 +4060,9 @@ format_RESUBMIT(const struct ofpact_resubmit *a, struct ds *s)
if (a->table_id != 255) {
ds_put_format(s, "%"PRIu8, a->table_id);
}
+ if (a->with_ct_orig) {
+ ds_put_cstr(s, ",ct");
+ }
ds_put_format(s, "%s)%s", colors.paren, colors.end);
}
}
@@ -7223,9 +7260,16 @@ ofpact_check__(enum ofputil_protocol *usable_protocols, struct ofpact *a,
case OFPACT_SET_TUNNEL:
case OFPACT_SET_QUEUE:
case OFPACT_POP_QUEUE:
- case OFPACT_RESUBMIT:
return 0;
+ case OFPACT_RESUBMIT: {
+ struct ofpact_resubmit *resubmit = ofpact_get_RESUBMIT(a);
+
+ if (resubmit->with_ct_orig && !is_ct_valid(flow, &match->wc, NULL)) {
+ return OFPERR_OFPBAC_MATCH_INCONSISTENT;
+ }
+ return 0;
+ }
case OFPACT_FIN_TIMEOUT:
if (flow->nw_proto != IPPROTO_TCP) {
inconsistent_match(usable_protocols);
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 677789d50..5bbcc0dee 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -483,7 +483,7 @@ static void do_xlate_actions(const struct ofpact *, size_t ofpacts_len,
static void xlate_normal(struct xlate_ctx *);
static void xlate_table_action(struct xlate_ctx *, ofp_port_t in_port,
uint8_t table_id, bool may_packet_in,
- bool honor_table_miss);
+ bool honor_table_miss, bool with_ct_orig);
static bool input_vid_is_valid(const struct xlate_ctx *,
uint16_t vid, struct xbundle *);
static uint16_t input_vid_to_vlan(const struct xbundle *, uint16_t vid);
@@ -3204,7 +3204,8 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
if (!process_special(ctx, peer) && may_receive(peer, ctx)) {
if (xport_stp_forward_state(peer) && xport_rstp_forward_state(peer)) {
- xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true);
+ xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true,
+ false);
if (!ctx->freezing) {
xlate_action_set(ctx);
}
@@ -3218,7 +3219,8 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
size_t old_size = ctx->odp_actions->size;
mirror_mask_t old_mirrors2 = ctx->mirrors;
- xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true);
+ xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true,
+ false);
ctx->mirrors = old_mirrors2;
ctx->base_flow = old_base_flow;
ctx->odp_actions->size = old_size;
@@ -3473,8 +3475,52 @@ xlate_resubmit_resource_check(struct xlate_ctx *ctx)
}
static void
+tuple_swap_flow(struct flow *flow, bool ipv4)
+{
+ uint8_t nw_proto = flow->nw_proto;
+ flow->nw_proto = flow->ct_nw_proto;
+ flow->ct_nw_proto = nw_proto;
+
+ if (ipv4) {
+ ovs_be32 nw_src = flow->nw_src;
+ flow->nw_src = flow->ct_nw_src;
+ flow->ct_nw_src = nw_src;
+
+ ovs_be32 nw_dst = flow->nw_dst;
+ flow->nw_dst = flow->ct_nw_dst;
+ flow->ct_nw_dst = nw_dst;
+ } else {
+ struct in6_addr ipv6_src = flow->ipv6_src;
+ flow->ipv6_src = flow->ct_ipv6_src;
+ flow->ct_ipv6_src = ipv6_src;
+
+ struct in6_addr ipv6_dst = flow->ipv6_dst;
+ flow->ipv6_dst = flow->ct_ipv6_dst;
+ flow->ct_ipv6_dst = ipv6_dst;
+ }
+
+ ovs_be16 tp_src = flow->tp_src;
+ flow->tp_src = flow->ct_tp_src;
+ flow->ct_tp_src = tp_src;
+
+ ovs_be16 tp_dst = flow->tp_dst;
+ flow->tp_dst = flow->ct_tp_dst;
+ flow->ct_tp_dst = tp_dst;
+}
+
+static void
+tuple_swap(struct flow *flow, struct flow_wildcards *wc)
+{
+ bool ipv4 = (flow->dl_type == htons(ETH_TYPE_IP));
+
+ tuple_swap_flow(flow, ipv4);
+ tuple_swap_flow(&wc->masks, ipv4);
+}
+
+static void
xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id,
- bool may_packet_in, bool honor_table_miss)
+ bool may_packet_in, bool honor_table_miss,
+ bool with_ct_orig)
{
/* Check if we need to recirculate before matching in a table. */
if (ctx->was_mpls) {
@@ -3487,6 +3533,17 @@ xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id,
ctx->table_id = table_id;
+ /* Swap packet fields with CT 5-tuple if requested. */
+ if (with_ct_orig) {
+ /* Do not swap if there is no CT tuple, or if key is not IP. */
+ if (ctx->xin->flow.ct_nw_proto == 0 ||
+ !is_ip_any(&ctx->xin->flow)) {
+ xlate_report_error(ctx,
+ "resubmit(ct) with non-tracked or non-IP packet!");
+ return;
+ }
+ tuple_swap(&ctx->xin->flow, ctx->wc);
+ }
rule = rule_dpif_lookup_from_table(ctx->xbridge->ofproto,
ctx->xin->tables_version,
&ctx->xin->flow, ctx->wc,
@@ -3494,6 +3551,10 @@ xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id,
&ctx->table_id, in_port,
may_packet_in, honor_table_miss,
ctx->xin->xcache);
+ /* Swap back. */
+ if (with_ct_orig) {
+ tuple_swap(&ctx->xin->flow, ctx->wc);
+ }
if (rule) {
/* Fill in the cache entry here instead of xlate_recursively
@@ -3801,7 +3862,7 @@ xlate_ofpact_resubmit(struct xlate_ctx *ctx,
}
xlate_table_action(ctx, in_port, table_id, may_packet_in,
- honor_table_miss);
+ honor_table_miss, resubmit->with_ct_orig);
}
static void
@@ -4303,7 +4364,7 @@ xlate_output_action(struct xlate_ctx *ctx,
break;
case OFPP_TABLE:
xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port,
- 0, may_packet_in, true);
+ 0, may_packet_in, true, false);
break;
case OFPP_NORMAL:
xlate_normal(ctx);
@@ -5585,7 +5646,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
ovs_assert(ctx->table_id < ogt->table_id);
xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port,
- ogt->table_id, true, true);
+ ogt->table_id, true, true, false);
break;
}
diff --git a/tests/ofp-actions.at b/tests/ofp-actions.at
index 58d540afe..c185eaa4d 100644
--- a/tests/ofp-actions.at
+++ b/tests/ofp-actions.at
@@ -91,6 +91,9 @@ ffff 0028 00002320 000d 0001 0000 0000 00000002 0002 001f 00010004 00000000 dnl
# actions=resubmit(10,5)
ffff 0010 00002320 000e 000a 05 000000
+# actions=resubmit(10,5,ct)
+ffff 0010 00002320 002c 000a 05 000000
+
# actions=output:NXM_NX_REG1[5..10]
ffff 0018 00002320 000f 0145 00010204 ffff 000000000000
@@ -407,6 +410,9 @@ ffff 0028 00002320 000d 0001 0000 0000 00000002 0002 001f 00010004 00000000 dnl
# actions=resubmit(10,5)
ffff 0010 00002320 000e 000a 05 000000
+# actions=resubmit(10,5,ct)
+ffff 0010 00002320 002c 000a 05 000000
+
# actions=output:NXM_NX_REG1[5..10]
ffff 0018 00002320 000f 0145 00010204 ffff 000000000000
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index 6f55d433b..82c863ca0 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -8409,19 +8409,74 @@ add_of_ports br0 1 2
AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg vconn:info ofproto_dpif:info])
-dnl Allow new connections on p1->p2, but not on p2->p1.
+dnl Allow new UDP connections on p1 for dst port 2, drop everything else.
AT_DATA([flows.txt], [dnl
dnl Table 0
+dnl Store zone in reg4 and packet direction in reg3 (IN=1, OUT=2).
dnl
table=0,priority=100,arp,action=normal
-table=0,priority=10,in_port=1,udp,action=ct(commit,zone=0),controller
-table=0,priority=10,in_port=2,udp,action=ct(table=1,zone=0)
+table=0,priority=10,in_port=1,ip,action=set_field:1->reg4,set_field:1->reg3,ct(zone=NXM_NX_REG4[[0..15]],table=1)
+table=0,priority=10,in_port=2,ip,action=set_field:1->reg4,set_field:2->reg3,ct(zone=NXM_NX_REG4[[0..15]],table=1)
table=0,priority=1,action=drop
dnl
-dnl Table 1
-dnl
-table=1,priority=10,in_port=2,ct_state=+trk+est-new,udp,action=controller
-table=1,priority=1,action=drop
+dnl Pass tracked traffic through ACL, drop everything else.
+dnl Non-REPLY/RELATED packets get the ACL lookup with the packet headers
+dnl in the actual packet direction in reg0 (IN=1, OUT=2). REPLY packets
+dnl get the ACL lookup using the conntrack tuple and the inverted direction.
+dnl RELATED packets get ACL lookup using the conntrack tuple in the direction
+dnl of the master connection, as storted in ct_mark.
+dnl
+dnl Incoming non-related packet in the original direction (ACL IN)
+table=1 reg3=1, ip, ct_state=-rel-rpl+trk-inv action=set_field:1->reg0,resubmit(,3),goto_table:5
+dnl Incoming non-related reply packet (CT ACL OUT)
+table=1 reg3=1, ip, ct_state=-rel+rpl+trk-inv action=set_field:2->reg0,resubmit(,3,ct),goto_table:4
+dnl Outgoing non-related packet (ACL OUT)
+table=1 reg3=2, ip, ct_state=-rel-rpl+trk-inv action=set_field:2->reg0,resubmit(,3),goto_table:5
+dnl Outgoing non-related reply packet (CT ACL IN)
+table=1 reg3=2, ip, ct_state=-rel+rpl+trk-inv action=set_field:1->reg0,resubmit(,3,ct),goto_table:4
+dnl
+dnl Related packet (CT ACL in the direction of the master connection.)
+table=1 ip, ct_state=+rel+trk-inv, action=move:NXM_NX_CT_MARK[[]]->NXM_NX_REG0[[]],resubmit(,3,ct),goto_table:4
+dnl Drop everything else.
+table=1 priority=0, action=drop
+dnl
+dnl "ACL table"
+dnl
+dnl Stateful accept (1->reg2) all incoming (reg0=1) IP connections with
+dnl UDP destination port '2'. Store rule ID (1234) in reg1, verdict
+dnl in reg2.
+table=3 priority=10, reg0=1, udp, udp_dst=2 action=set_field:1234->reg1,set_field:1->reg2
+dnl Stateless drop (0->reg2) everything else in both directions. (Rule ID: 1235)
+table=3 priority=0, action=set_field:1235->reg1,set_field:0->reg2
+dnl
+dnl Re-process stateful traffic that was not accepted by a stateful rule as
+dnl normal traffic in the current direction. This should also delete the
+dnl now stale conntrack state, so that new state can be created in it's place.
+dnl
+dnl Stateful accepts go to next table.
+table=4 priority=100 reg2=1, action=goto_table:5
+dnl Everything else is reprocessed disregarding the CT state, using the actual
+dnl packet direction.
+table=4 priority=0 action=move:NXM_NX_REG3[[]]->NXM_NX_REG0[[]],resubmit(,3),goto_table:5
+dnl
+dnl "ACL verdict processing table."
+dnl
+dnl Handle stateful (reg2=1) / stateless (reg2=2) accepts and drops (reg2=0)
+dnl
+dnl Drop all non-accepted packets.
+table=5 reg2=0 priority=1000 action=drop
+dnl Commit new non-related IP connections.
+table=5 priority=10 reg2=1 ct_state=+new-rel, ip, action=ct(zone=NXM_NX_REG4[[0..15]],commit,exec(move:NXM_NX_REG3[[0..31]]->NXM_NX_CT_MARK[[0..31]],move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6
+dnl Commit new related connections in either direction, which inherit the mark
+dnl (the direction of the original direction master tuple) from the master
+dnl connection.
+table=5 priority=10 reg2=1 ct_state=+new+rel, ip, action=ct(zone=NXM_NX_REG4[[0..15]],commit,exec(move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6
+dnl Forward everything else, including stateless accepts.
+table=5 priority=0 action=goto_table:6
+dnl
+dnl "Forwarding table"
+dnl
+table=6 action=controller
])
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
@@ -8441,37 +8496,33 @@ OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 4])
OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit])
dnl Check this output. We only see the latter two packets, not the first.
-dnl Note that the first packet doesn't have the ct_state bits set. This
-dnl happens because the ct_state field is available only after recirc.
AT_CHECK([cat ofctl_monitor.log], [0], [dnl
-NXT_PACKET_IN (xid=0x0): cookie=0x0 total_len=42 in_port=1 (via action) data_len=42 (unbuffered)
+NXT_PACKET_IN (xid=0x0): table_id=6 cookie=0x0 total_len=42 ct_state=new|trk,ct_zone=1,ct_mark=0x1,ct_label=0x4d2000000000000000000000000,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2,reg0=0x1,reg1=0x4d2,reg2=0x1,reg3=0x1,reg4=0x1,in_port=1 (via action) data_len=42 (unbuffered)
udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_src=10.1.1.1,nw_dst=10.1.1.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=1,tp_dst=2 udp_csum:e9d6
dnl
-NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x0 total_len=42 ct_state=est|rpl|trk,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2,in_port=2 (via action) data_len=42 (unbuffered)
+NXT_PACKET_IN (xid=0x0): table_id=6 cookie=0x0 total_len=42 ct_state=est|rpl|trk,ct_zone=1,ct_mark=0x1,ct_label=0x4d2000000000000000000000000,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2,reg0=0x1,reg1=0x4d2,reg2=0x1,reg3=0x2,reg4=0x1,in_port=2 (via action) data_len=42 (unbuffered)
udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,nw_src=10.1.1.2,nw_dst=10.1.1.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=2,tp_dst=1 udp_csum:e9d6
])
AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl -P nxt_packet_in --detach --no-chdir --pidfile 2> ofctl_monitor.log])
dnl OK, now start a second connection from port 1
-AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,ttl=64,frag=no),udp(src=3,dst=4)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,ttl=64,frag=no),udp(src=3,dst=2)'])
dnl Now try a reply from port 2.
-AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:0a,dst=50:54:00:00:00:09),eth_type(0x0800),ipv4(src=10.1.1.2,dst=10.1.1.1,proto=17,tos=0,ttl=64,frag=no),udp(src=4,dst=3)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:0a,dst=50:54:00:00:00:09),eth_type(0x0800),ipv4(src=10.1.1.2,dst=10.1.1.1,proto=17,tos=0,ttl=64,frag=no),udp(src=2,dst=3)'])
OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 4])
OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit])
dnl Check this output. We should see both packets
-dnl Note that the first packet doesn't have the ct_state bits set. This
-dnl happens because the ct_state field is available only after recirc.
AT_CHECK([cat ofctl_monitor.log], [0], [dnl
-NXT_PACKET_IN (xid=0x0): cookie=0x0 total_len=42 in_port=1 (via action) data_len=42 (unbuffered)
-udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_src=10.1.1.1,nw_dst=10.1.1.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=3,tp_dst=4 udp_csum:e9d2
+NXT_PACKET_IN (xid=0x0): table_id=6 cookie=0x0 total_len=42 ct_state=new|trk,ct_zone=1,ct_mark=0x1,ct_label=0x4d2000000000000000000000000,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=3,ct_tp_dst=2,reg0=0x1,reg1=0x4d2,reg2=0x1,reg3=0x1,reg4=0x1,in_port=1 (via action) data_len=42 (unbuffered)
+udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_src=10.1.1.1,nw_dst=10.1.1.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=3,tp_dst=2 udp_csum:e9d4
dnl
-NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x0 total_len=42 ct_state=est|rpl|trk,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=3,ct_tp_dst=4,in_port=2 (via action) data_len=42 (unbuffered)
-udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,nw_src=10.1.1.2,nw_dst=10.1.1.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=4,tp_dst=3 udp_csum:e9d2
+NXT_PACKET_IN (xid=0x0): table_id=6 cookie=0x0 total_len=42 ct_state=est|rpl|trk,ct_zone=1,ct_mark=0x1,ct_label=0x4d2000000000000000000000000,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=3,ct_tp_dst=2,reg0=0x1,reg1=0x4d2,reg2=0x1,reg3=0x2,reg4=0x1,in_port=2 (via action) data_len=42 (unbuffered)
+udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,nw_src=10.1.1.2,nw_dst=10.1.1.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=2,tp_dst=3 udp_csum:e9d4
])
OVS_VSWITCHD_STOP
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index 4ba4b089b..4fdd27e26 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -2645,7 +2645,7 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
-dnl CHECK_FTP_NAT(TITLE, IP_ADDR, FLOWS)
+dnl CHECK_FTP_NAT(TITLE, IP_ADDR, FLOWS, CT_DUMP)
dnl
dnl Checks the implementation of conntrack with FTP ALGs in combination with
dnl NAT, using the provided flow table.
@@ -2664,8 +2664,7 @@ m4_define([CHECK_FTP_NAT],
ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0.
- AT_DATA([flows.txt], [$3
-])
+ AT_DATA([flows.txt], [$3])
AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
@@ -2675,10 +2674,7 @@ m4_define([CHECK_FTP_NAT],
NS_CHECK_EXEC([at_ns0], [wget ftp://10.1.1.2 -4 --no-passive-ftp -t 3 -T 1 --retry-connrefused -v --server-response --no-remove-listing -o wget0.log -d])
dnl Discards CLOSE_WAIT and CLOSING
- AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
-tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=$2,sport=<cleared>,dport=<cleared>),protoinfo=(state=<cleared>),helper=ftp
-tcp,orig=(src=10.1.1.2,dst=$2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),protoinfo=(state=<cleared>)
-])
+ AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [$4])
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP])
@@ -2739,7 +2735,10 @@ dnl Swaps the fields of the ARP message to turn a query to a response.
table=10 priority=100 arp xreg0=0 action=normal
table=10 priority=10,arp,arp_op=1,action=load:2->OXM_OF_ARP_OP[[]],move:OXM_OF_ARP_SHA[[]]->OXM_OF_ARP_THA[[]],move:OXM_OF_PKT_REG0[[0..47]]->OXM_OF_ARP_SHA[[]],move:OXM_OF_ARP_SPA[[]]->OXM_OF_ARP_TPA[[]],move:NXM_NX_REG2[[]]->OXM_OF_ARP_SPA[[]],move:NXM_OF_ETH_SRC[[]]->NXM_OF_ETH_DST[[]],move:OXM_OF_PKT_REG0[[0..47]]->NXM_OF_ETH_SRC[[]],move:NXM_OF_IN_PORT[[]]->NXM_NX_REG3[[0..15]],load:0->NXM_OF_IN_PORT[[]],output:NXM_NX_REG3[[0..15]]
table=10 priority=0 action=drop
- ])
+], [dnl
+tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=$2,sport=<cleared>,dport=<cleared>),protoinfo=(state=<cleared>),helper=ftp
+tcp,orig=(src=10.1.1.2,dst=$2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),protoinfo=(state=<cleared>)
+])
])
dnl Check that ct(nat,table=foo) works without TCP sequence adjustment.
@@ -2802,7 +2801,10 @@ dnl Swaps the fields of the ARP message to turn a query to a response.
table=10 priority=100 arp xreg0=0 action=normal
table=10 priority=10,arp,arp_op=1,action=load:2->OXM_OF_ARP_OP[[]],move:OXM_OF_ARP_SHA[[]]->OXM_OF_ARP_THA[[]],move:OXM_OF_PKT_REG0[[0..47]]->OXM_OF_ARP_SHA[[]],move:OXM_OF_ARP_SPA[[]]->OXM_OF_ARP_TPA[[]],move:NXM_NX_REG2[[]]->OXM_OF_ARP_SPA[[]],move:NXM_OF_ETH_SRC[[]]->NXM_OF_ETH_DST[[]],move:OXM_OF_PKT_REG0[[0..47]]->NXM_OF_ETH_SRC[[]],move:NXM_OF_IN_PORT[[]]->NXM_NX_REG3[[0..15]],load:0->NXM_OF_IN_PORT[[]],output:NXM_NX_REG3[[0..15]]
table=10 priority=0 action=drop
- ])
+], [dnl
+tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=$2,sport=<cleared>,dport=<cleared>),protoinfo=(state=<cleared>),helper=ftp
+tcp,orig=(src=10.1.1.2,dst=$2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),protoinfo=(state=<cleared>)
+])
])
dnl Check that ct(nat,table=foo) works without TCP sequence adjustment.
@@ -2832,8 +2834,14 @@ dnl IP_ADDR must specify the NAT address in standard "10.1.1.x" format,
dnl and IP_ADDR_AS_HEX must specify the same address as hex, eg 0x0a0101xx.
m4_define([CHECK_FTP_NAT_ORIG_TUPLE], [dnl
CHECK_FTP_NAT([orig tuple $1], [$2], [dnl
-dnl track all IP traffic (includes nat and helper calls to non-NEW packets.)
-table=0 ip, action=ct(nat,table=1)
+dnl Store zone in reg4 and packet direction in reg3 (IN=1, OUT=2).
+dnl NAT is only applied to OUT-direction packets, so that ACL
+dnl processing can be done with non-NATted headers.
+dnl
+dnl Track all IP traffic in the IN-direction (IN from Port 1).
+table=0 in_port=1, ip, action=set_field:1->reg4,set_field:1->reg3,ct(zone=NXM_NX_REG4[[0..15]],table=1)
+dnl Track all IP traffic in the OUT-direction (OUT to the Port 1).
+table=0 in_port=2, ip, action=set_field:1->reg4,set_field:2->reg3,ct(zone=NXM_NX_REG4[[0..15]],nat,table=1)
dnl
dnl ARP
dnl
@@ -2841,32 +2849,81 @@ table=0 priority=100 arp arp_op=1 action=move:OXM_OF_ARP_TPA[[]]->NXM_NX_REG2[[]
table=0 priority=10 arp action=normal
table=0 priority=0 action=drop
dnl
-dnl "ACL table"
-dnl
-dnl Allow all IP traffic with conntrack original direction IP source address
-dnl '10.1.1.1'. This should allow also reply packets and related packets in
-dnl both directions.
-table=1 ip, ct_state=+trk-inv, ct_nw_src=10.1.1.1 action=goto_table:2
+dnl Pass tracked traffic through ACL, drop everything else.
+dnl Non-REPLY/RELATED packets get the ACL lookup with the packet headers
+dnl in the actual packet direction in reg0 (IN=1, OUT=2). REPLY packets
+dnl get the ACL lookup using the conntrack tuple and the inverted direction.
+dnl RELATED packets get ACL lookup using the conntrack tuple in the direction
+dnl of the master connection, as storted in ct_mark.
+dnl
+dnl Incoming non-related packet in the original direction (ACL IN)
+table=1 reg3=1, ip, ct_state=-rel-rpl+trk-inv action=set_field:1->reg0,resubmit(,3),goto_table:5
+dnl Incoming non-related reply packet (CT ACL OUT)
+table=1 reg3=1, ip, ct_state=-rel+rpl+trk-inv action=set_field:2->reg0,resubmit(,3,ct),goto_table:4
+dnl Outgoing non-related packet (ACL OUT)
+table=1 reg3=2, ip, ct_state=-rel-rpl+trk-inv action=set_field:2->reg0,resubmit(,3),goto_table:5
+dnl Outgoing non-related reply packet (CT ACL IN)
+table=1 reg3=2, ip, ct_state=-rel+rpl+trk-inv action=set_field:1->reg0,resubmit(,3,ct),goto_table:4
+dnl
+dnl Related packet (CT ACL in the direction of the master connection.)
+table=1 ip, ct_state=+rel+trk-inv, action=move:NXM_NX_CT_MARK[[]]->NXM_NX_REG0[[]],resubmit(,3,ct),goto_table:4
dnl Drop everything else.
table=1 priority=0, action=drop
dnl
-dnl "Conntrack commit table"
+dnl "ACL table"
+dnl
+dnl Stateful accept (1->reg2) all incoming (reg0=1) IP connections with
+dnl IP source address '10.1.1.1'. Store rule ID (1234) in reg1, verdict
+dnl in reg2.
+table=3 priority=10, reg0=1, ip, nw_src=10.1.1.1 action=set_field:1234->reg1,set_field:1->reg2
+dnl Stateless drop (0->reg2) everything else in both directions. (Rule ID: 1235)
+table=3 priority=0, action=set_field:1235->reg1,set_field:0->reg2
+dnl
+dnl Re-process stateful traffic that was not accepted by a stateful rule as
+dnl normal traffic in the current direction. This should also delete the
+dnl now stale conntrack state, so that new state can be created in it's place.
+dnl
+dnl Stateful accepts go to next table.
+table=4 priority=100 reg2=1, action=goto_table:5
+dnl Everything else is reprocessed disregarding the CT state, using the actual
+dnl packet direction.
+table=4 priority=0 action=move:NXM_NX_REG3[[]]->NXM_NX_REG0[[]],resubmit(,3),goto_table:5
+dnl
+dnl "ACL verdict processing table."
+dnl
+dnl Handle stateful (reg2=1) / stateless (reg2=2) accepts and drops (reg2=0)
+dnl
+dnl Drop all non-accepted packets.
+table=5 reg2=0 priority=1000 action=drop
dnl
-dnl Commit new outgoing FTP control connections. Must match on 'tcp' when
-dnl setting 'alg=ftp'.
-table=2 in_port=1 priority=100 ct_state=+new, tcp, tp_dst=21, action=ct(alg=ftp,commit,nat(src=$2)),2
-dnl Commit other new outgoing IP connections.
-table=2 in_port=1 priority=20 ct_state=+new, ip, action=ct(commit,nat(src=$2)),2
-dnl Commit incoming new IP connections. 'nat' may be needed for related
-dnl connections, and is harmless for connections that do not need it.
-table=2 in_port=2 priority=10 ct_state=+new, ip, action=ct(commit,nat),1
-dnl Just forward all the rest.
-table=2 priority=0 in_port=1 action=2
-table=2 priority=0 in_port=2 action=1
+dnl Commit new incoming FTP control connections with SNAT range. Must match on
+dnl 'tcp' when setting 'alg=ftp'. Store the directionality of non-related
+dnl connections to ct_mark. Store the rule ID to labels.
+table=5 priority=100 reg2=1 reg3=1 ct_state=+new-rel, tcp, tp_dst=21, action=ct(zone=NXM_NX_REG4[[0..15]],alg=ftp,commit,nat(src=$2),exec(move:NXM_NX_REG3[[0..31]]->NXM_NX_CT_MARK[[0..31]],move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6
+dnl Commit other new incoming non-related IP connections with SNAT range.
+table=5 priority=10 reg2=1 reg3=1 ct_state=+new-rel, ip, action=ct(zone=NXM_NX_REG4[[0..15]],commit,nat(src=$2),exec(move:NXM_NX_REG3[[0..31]]->NXM_NX_CT_MARK[[0..31]],move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6
+dnl Commit non-related outgoing new IP connections with DNAT range.
+dnl (This should not get any packets in this test.)
+table=5 priority=10 reg2=1 reg3=2 ct_state=+new-rel, ip, action=ct(zone=NXM_NX_REG4[[0..15]],commit,nat(dst=$2),exec(move:NXM_NX_REG3[[0..31]]->NXM_NX_CT_MARK[[0..31]],move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6
+dnl Commit new related connections in either direction, which need 'nat'
+dnl and which inherit the mark (the direction of the original direction
+dnl master tuple) from the master connection.
+table=5 priority=10 reg2=1 ct_state=+new+rel, ip, action=ct(zone=NXM_NX_REG4[[0..15]],commit,nat,exec(move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6
+dnl
+dnl NAT incoming non-NEW packets. Outgoing packets were NATted in table 0.
+dnl
+table=5 priority=10 ct_state=-new+trk-inv reg3=1 ip, action=ct(zone=NXM_NX_REG4[[0..15]],nat),goto_table:6
+dnl Forward everything else, including stateless accepts.
+table=5 priority=0 action=goto_table:6
+dnl
+dnl "Forwarding table"
+dnl
+table=6 in_port=1 action=2
+table=6 in_port=2 action=1
dnl
dnl MAC resolution table for IP in reg2, stores mac in OXM_OF_PKT_REG0
dnl
-table=8,reg2=$3/0xffffffff,action=load:0x808888888888->OXM_OF_PKT_REG0[[]]
+table=8,reg2=$3,action=load:0x808888888888->OXM_OF_PKT_REG0[[]]
table=8,priority=0,action=load:0->OXM_OF_PKT_REG0[[]]
dnl ARP responder mac filled in at OXM_OF_PKT_REG0, or 0 for normal action.
dnl TPA IP in reg2.
@@ -2874,7 +2931,10 @@ dnl Swaps the fields of the ARP message to turn a query to a response.
table=10 priority=100 arp xreg0=0 action=normal
table=10 priority=10,arp,arp_op=1,action=load:2->OXM_OF_ARP_OP[[]],move:OXM_OF_ARP_SHA[[]]->OXM_OF_ARP_THA[[]],move:OXM_OF_PKT_REG0[[0..47]]->OXM_OF_ARP_SHA[[]],move:OXM_OF_ARP_SPA[[]]->OXM_OF_ARP_TPA[[]],move:NXM_NX_REG2[[]]->OXM_OF_ARP_SPA[[]],move:NXM_OF_ETH_SRC[[]]->NXM_OF_ETH_DST[[]],move:OXM_OF_PKT_REG0[[0..47]]->NXM_OF_ETH_SRC[[]],move:NXM_OF_IN_PORT[[]]->NXM_NX_REG3[[0..15]],load:0->NXM_OF_IN_PORT[[]],output:NXM_NX_REG3[[0..15]]
table=10 priority=0 action=drop
- ])
+], [dnl
+tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=$2,sport=<cleared>,dport=<cleared>),zone=1,mark=1,labels=0x4d2000000000000000000000000,protoinfo=(state=<cleared>),helper=ftp
+tcp,orig=(src=10.1.1.2,dst=$2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),zone=1,mark=1,labels=0x4d2000000000000000000000000,protoinfo=(state=<cleared>)
+])
])
dnl Check that ct(nat,table=foo) works without TCP sequence adjustment with
diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in
index 2ee3193d4..d783f85c7 100644
--- a/utilities/ovs-ofctl.8.in
+++ b/utilities/ovs-ofctl.8.in
@@ -963,10 +963,22 @@ only known to be implemented by Open vSwitch:
.
.IP \fBresubmit\fB:\fIport\fR
.IQ \fBresubmit\fB(\fR[\fIport\fR]\fB,\fR[\fItable\fR]\fB)
+.IQ \fBresubmit\fB(\fR[\fIport\fR]\fB,\fR[\fItable\fR]\fB,ct)
Re-searches this OpenFlow flow table (or the table whose number is
specified by \fItable\fR) with the \fBin_port\fR field replaced by
-\fIport\fR (if \fIport\fR is specified) and executes the actions
-found, if any, in addition to any other actions in this flow entry.
+\fIport\fR (if \fIport\fR is specified) and the packet 5-tuple fields
+swapped with the corresponding conntrack original direction tuple
+fields (if \fBct\fR is specified, see \fBct_nw_src\fR above), and
+executes the actions found, if any, in addition to any other actions
+in this flow entry. The \fBin_port\fR and swapped 5-tuple fields are
+restored immediately after the search, before any actions are
+executed.
+.IP
+The \fBct\fR option requires a valid connection tracking state as a
+match prerequisite in the flow where this action is placed. Examples
+of valid connection tracking state matches include
+\fBct_state=+new\fR, \fBct_state=+est\fR, \fBct_state=+rel\fR, and
+\fBct_state=+trk-inv\fR.
.IP
Recursive \fBresubmit\fR actions are obeyed up to
implementation-defined limits:
@@ -991,7 +1003,8 @@ exception: \fBresubmit\fR from table \fIx\fR to any table \fIy\fR >
\fIx\fR does not count against the recursion limit.
.RE
.IP
-Open vSwitch before 1.2.90 did not support \fItable\fR.
+Open vSwitch before 1.2.90 did not support \fItable\fR. Open vSwitch
+before 2.7 did not support \fBct\fR.
.
.IP \fBset_tunnel\fB:\fIid\fR
.IQ \fBset_tunnel64\fB:\fIid\fR