summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Ahern <dsahern@kernel.org>2023-04-22 10:08:08 -0600
committerDavid Ahern <dsahern@kernel.org>2023-04-22 10:08:08 -0600
commita08205b62f5e22792bed9cbbb306aab14eb8f202 (patch)
tree36df1828b847fe61148a95bcb5960f0c6b281222
parent47928f88511721da41c4d492b4a7bd93b4f0d936 (diff)
parent1371d7deaa2011f5e611e7ce9fdad65483f248dd (diff)
downloadiproute2-a08205b62f5e22792bed9cbbb306aab14eb8f202.tar.gz
Merge branch 'main' into next
Signed-off-by: David Ahern <dsahern@kernel.org>
-rw-r--r--genl/ctrl.c12
-rw-r--r--ip/iplink.c6
-rw-r--r--ip/iplink_bridge_slave.c2
-rw-r--r--ip/iproute_lwtunnel.c21
-rw-r--r--ip/iptunnel.c14
-rw-r--r--ip/xfrm_policy.c3
-rw-r--r--man/man8/ip-link.8.in26
-rw-r--r--man/man8/tc-mqprio.896
-rw-r--r--man/man8/tc-netem.8432
-rw-r--r--tc/f_u32.c26
-rw-r--r--tc/m_action.c8
-rw-r--r--tc/m_csum.c5
-rw-r--r--tc/m_mpls.c4
-rw-r--r--tc/m_nat.c5
-rw-r--r--tc/tc_class.c2
-rw-r--r--tc/tc_util.c2
16 files changed, 477 insertions, 187 deletions
diff --git a/genl/ctrl.c b/genl/ctrl.c
index a2d87af0..8d2e9448 100644
--- a/genl/ctrl.c
+++ b/genl/ctrl.c
@@ -57,7 +57,7 @@ static void print_ctrl_cmd_flags(FILE *fp, __u32 fl)
fprintf(fp, "\n");
}
-static int print_ctrl_cmds(FILE *fp, struct rtattr *arg, __u32 ctrl_ver)
+static int print_ctrl_cmds(FILE *fp, struct rtattr *arg)
{
struct rtattr *tb[CTRL_ATTR_OP_MAX + 1];
@@ -70,7 +70,7 @@ static int print_ctrl_cmds(FILE *fp, struct rtattr *arg, __u32 ctrl_ver)
fprintf(fp, " ID-0x%x ",*id);
}
/* we are only gonna do this for newer version of the controller */
- if (tb[CTRL_ATTR_OP_FLAGS] && ctrl_ver >= 0x2) {
+ if (tb[CTRL_ATTR_OP_FLAGS]) {
__u32 *fl = RTA_DATA(tb[CTRL_ATTR_OP_FLAGS]);
print_ctrl_cmd_flags(fp, *fl);
}
@@ -78,7 +78,7 @@ static int print_ctrl_cmds(FILE *fp, struct rtattr *arg, __u32 ctrl_ver)
}
-static int print_ctrl_grp(FILE *fp, struct rtattr *arg, __u32 ctrl_ver)
+static int print_ctrl_grp(FILE *fp, struct rtattr *arg)
{
struct rtattr *tb[CTRL_ATTR_MCAST_GRP_MAX + 1];
@@ -109,7 +109,6 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl,
int len = n->nlmsg_len;
struct rtattr *attrs;
FILE *fp = (FILE *) arg;
- __u32 ctrl_v = 0x1;
if (n->nlmsg_type != GENL_ID_CTRL) {
fprintf(stderr, "Not a controller message, nlmsg_len=%d "
@@ -148,7 +147,6 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl,
if (tb[CTRL_ATTR_VERSION]) {
__u32 *v = RTA_DATA(tb[CTRL_ATTR_VERSION]);
fprintf(fp, " Version: 0x%x ",*v);
- ctrl_v = *v;
}
if (tb[CTRL_ATTR_HDRSIZE]) {
__u32 *h = RTA_DATA(tb[CTRL_ATTR_HDRSIZE]);
@@ -198,7 +196,7 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl,
for (i = 0; i < GENL_MAX_FAM_OPS; i++) {
if (tb2[i]) {
fprintf(fp, "\t\t#%d: ", i);
- if (0 > print_ctrl_cmds(fp, tb2[i], ctrl_v)) {
+ if (0 > print_ctrl_cmds(fp, tb2[i])) {
fprintf(fp, "Error printing command\n");
}
/* for next command */
@@ -221,7 +219,7 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl,
for (i = 0; i < GENL_MAX_FAM_GRPS; i++) {
if (tb2[i]) {
fprintf(fp, "\t\t#%d: ", i);
- if (0 > print_ctrl_grp(fp, tb2[i], ctrl_v))
+ if (0 > print_ctrl_grp(fp, tb2[i]))
fprintf(fp, "Error printing group\n");
/* for next group */
fprintf(fp,"\n");
diff --git a/ip/iplink.c b/ip/iplink.c
index a8da52f9..690636b6 100644
--- a/ip/iplink.c
+++ b/ip/iplink.c
@@ -63,7 +63,7 @@ void iplink_usage(void)
" [ mtu MTU ] [index IDX ]\n"
" [ numtxqueues QUEUE_COUNT ]\n"
" [ numrxqueues QUEUE_COUNT ]\n"
- " [ netns { PID | NAME } ]\n"
+ " [ netns { PID | NETNSNAME | NETNSFILE } ]\n"
" type TYPE [ ARGS ]\n"
"\n"
" ip link delete { DEVICE | dev DEVICE | group DEVGROUP } type TYPE [ ARGS ]\n"
@@ -88,7 +88,7 @@ void iplink_usage(void)
" [ address LLADDR ]\n"
" [ broadcast LLADDR ]\n"
" [ mtu MTU ]\n"
- " [ netns { PID | NAME } ]\n"
+ " [ netns { PID | NETNSNAME | NETNSFILE } ]\n"
" [ link-netns NAME | link-netnsid ID ]\n"
" [ alias NAME ]\n"
" [ vf NUM [ mac LLADDR ]\n"
@@ -1629,7 +1629,7 @@ static void print_af_stats_attr(FILE *fp, int ifindex, struct rtattr *attr)
if (!if_printed) {
print_uint(PRINT_ANY, "ifindex",
"%u:", ifindex);
- print_color_string(PRINT_ANY, COLOR_IFNAME,
+ print_color_string(PRINT_ANY, COLOR_IFNAME,
"ifname", "%s",
ll_index_to_name(ifindex));
print_nl();
diff --git a/ip/iplink_bridge_slave.c b/ip/iplink_bridge_slave.c
index 43b42948..66a67961 100644
--- a/ip/iplink_bridge_slave.c
+++ b/ip/iplink_bridge_slave.c
@@ -40,7 +40,7 @@ static void print_explain(FILE *f)
" [ vlan_tunnel {on | off} ]\n"
" [ isolated {on | off} ]\n"
" [ locked {on | off} ]\n"
- " [ mab {on | off} ]\n"
+ " [ mab {on | off} ]\n"
" [ backup_port DEVICE ] [ nobackup_port ]\n"
);
}
diff --git a/ip/iproute_lwtunnel.c b/ip/iproute_lwtunnel.c
index 52221c69..308178ef 100644
--- a/ip/iproute_lwtunnel.c
+++ b/ip/iproute_lwtunnel.c
@@ -32,7 +32,7 @@
#include <linux/ioam6.h>
#include <linux/ioam6_iptunnel.h>
-static const char *format_encap_type(int type)
+static const char *format_encap_type(uint16_t type)
{
switch (type) {
case LWTUNNEL_ENCAP_MPLS:
@@ -62,7 +62,7 @@ static const char *format_encap_type(int type)
static void encap_type_usage(void)
{
- int i;
+ uint16_t i;
fprintf(stderr, "Usage: ip route ... encap TYPE [ OPTIONS ] [...]\n");
@@ -73,7 +73,7 @@ static void encap_type_usage(void)
exit(-1);
}
-static int read_encap_type(const char *name)
+static uint16_t read_encap_type(const char *name)
{
if (strcmp(name, "mpls") == 0)
return LWTUNNEL_ENCAP_MPLS;
@@ -834,14 +834,15 @@ static void print_encap_xfrm(FILE *fp, struct rtattr *encap)
void lwt_print_encap(FILE *fp, struct rtattr *encap_type,
struct rtattr *encap)
{
- int et;
+ uint16_t et;
if (!encap_type)
return;
et = rta_getattr_u16(encap_type);
-
- print_string(PRINT_ANY, "encap", " encap %s ", format_encap_type(et));
+ open_json_object("encap");
+ print_string(PRINT_ANY, "encap_type", " encap %s ",
+ format_encap_type(et));
switch (et) {
case LWTUNNEL_ENCAP_MPLS:
@@ -875,6 +876,7 @@ void lwt_print_encap(FILE *fp, struct rtattr *encap_type,
print_encap_xfrm(fp, encap);
break;
}
+ close_json_object();
}
static struct ipv6_sr_hdr *parse_srh(char *segbuf, int hmac, bool encap)
@@ -959,7 +961,7 @@ static int parse_encap_seg6(struct rtattr *rta, size_t len, int *argcp,
invarg("\"segs\" provided before \"mode\"\n",
*argv);
- strlcpy(segbuf, *argv, 1024);
+ strlcpy(segbuf, *argv, sizeof(segbuf));
} else if (strcmp(*argv, "hmac") == 0) {
NEXT_ARG();
if (hmac_ok++)
@@ -1045,7 +1047,7 @@ static int parse_encap_rpl(struct rtattr *rta, size_t len, int *argcp,
if (segs_ok++)
duparg2("segs", *argv);
- strlcpy(segbuf, *argv, 1024);
+ strlcpy(segbuf, *argv, sizeof(segbuf));
} else {
break;
}
@@ -1466,8 +1468,7 @@ static int parse_encap_seg6local(struct rtattr *rta, size_t len, int *argcp,
NEXT_ARG();
if (segs_ok++)
duparg2("segs", *argv);
- strncpy(segbuf, *argv, 1024);
- segbuf[1023] = 0;
+ strlcpy(segbuf, *argv, sizeof(segbuf));
if (!NEXT_ARG_OK())
break;
NEXT_ARG();
diff --git a/ip/iptunnel.c b/ip/iptunnel.c
index 02c3670b..b6da1459 100644
--- a/ip/iptunnel.c
+++ b/ip/iptunnel.c
@@ -17,6 +17,7 @@
#include <net/if_arp.h>
#include <linux/ip.h>
#include <linux/if_tunnel.h>
+#include <linux/ip6_tunnel.h>
#include "rt_names.h"
#include "utils.h"
@@ -172,11 +173,20 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p)
if (get_ifname(p->name, *argv))
invarg("\"name\" not a valid ifname", *argv);
if (cmd == SIOCCHGTUNNEL && count == 0) {
- struct ip_tunnel_parm old_p = {};
+ union {
+ struct ip_tunnel_parm ip_tnl;
+ struct ip6_tnl_parm2 ip6_tnl;
+ } old_p = {};
if (tnl_get_ioctl(*argv, &old_p))
return -1;
- *p = old_p;
+
+ if (old_p.ip_tnl.iph.version != 4 ||
+ old_p.ip_tnl.iph.ihl != 5)
+ invarg("\"name\" is not an ip tunnel",
+ *argv);
+
+ *p = old_p.ip_tnl;
}
}
count++;
diff --git a/ip/xfrm_policy.c b/ip/xfrm_policy.c
index be2235ca..8687ced3 100644
--- a/ip/xfrm_policy.c
+++ b/ip/xfrm_policy.c
@@ -1141,7 +1141,8 @@ static int xfrm_str_to_policy(char *name, uint8_t *policy)
if (strcmp(name, "block") == 0) {
*policy = XFRM_USERPOLICY_BLOCK;
return 0;
- } else if (strcmp(name, "accept") == 0) {
+ } else if (strcmp(name, "accept") == 0 ||
+ strcmp(name, "allow") == 0) {
*policy = XFRM_USERPOLICY_ACCEPT;
return 0;
}
diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in
index bec1b78b..8cec5fe3 100644
--- a/man/man8/ip-link.8.in
+++ b/man/man8/ip-link.8.in
@@ -49,7 +49,7 @@ ip-link \- network device configuration
.IR BYTES " ]"
.br
.RB "[ " netns " {"
-.IR PID " | " NETNSNAME " } ]"
+.IR PID " | " NETNSNAME " | " NETNSFILE " } ]"
.br
.BI type " TYPE"
.RI "[ " ARGS " ]"
@@ -118,7 +118,7 @@ ip-link \- network device configuration
.IR MTU " ]"
.br
.RB "[ " netns " {"
-.IR PID " | " NETNSNAME " } ]"
+.IR PID " | " NETNSNAME " | " NETNSFILE " } ]"
.br
.RB "[ " link-netnsid
.IR ID " ]"
@@ -465,8 +465,15 @@ specifies the desired index of the new virtual device. The link
creation fails, if the index is busy.
.TP
-.BI netns " { PID | NAME } "
-specifies the desired network namespace to create interface in.
+.B netns
+.RI "{ " PID " | " NETNSNAME " | " NETNSFILE " }"
+.br
+create the device in the network namespace associated with process
+.IR "PID " or
+the name
+.IR "NETNSNAME " or
+the file
+.IR "NETNSFILE".
.TP
VLAN Type Support
@@ -2196,10 +2203,15 @@ the interface is
.IR "POINTOPOINT" .
.TP
-.BI netns " NETNSNAME " \fR| " PID"
-move the device to the network namespace associated with name
+.B netns
+.RI "{ " PID " | " NETNSNAME " | " NETNSFILE " }"
+.br
+move the device to the network namespace associated with process
+.IR "PID " or
+the name
.IR "NETNSNAME " or
-.RI process " PID".
+the file
+.IR "NETNSFILE".
Some devices are not allowed to change network namespace: loopback, bridge,
wireless. These are network namespace local devices. In such case
diff --git a/man/man8/tc-mqprio.8 b/man/man8/tc-mqprio.8
index 4b9e942e..16ecb9a1 100644
--- a/man/man8/tc-mqprio.8
+++ b/man/man8/tc-mqprio.8
@@ -98,6 +98,7 @@ belong to an application. See kernel and cgroup documentation for details.
.TP
num_tc
Number of traffic classes to use. Up to 16 classes supported.
+You cannot have more classes than queues
.TP
map
@@ -119,6 +120,8 @@ Set to
to support hardware offload. Set to
.B 0
to configure user specified values in software only.
+The default value of this parameter is
+.B 1
.TP
mode
@@ -146,5 +149,98 @@ max_rate
Maximum value of bandwidth rate limit for a traffic class.
+.SH EXAMPLE
+
+The following example shows how to attach priorities to 4 traffic classes ("num_tc 4"),
+and then how to pair these traffic classes with 4 hardware queues with mqprio,
+with hardware coordination ("hw 1", or does not specified, because 1 is the default value).
+Traffic class 0 (tc0) is mapped to hardware queue 0 (q0), tc1 is mapped to q1,
+tc2 is mapped to q2, and tc3 is mapped q3.
+
+.EX
+# tc qdisc add dev eth0 root mqprio \
+ num_tc 4 \
+ map 0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 \
+ queues 1@0 1@1 1@2 1@3 \
+ hw 1
+.EE
+
+The next example shows how to attach priorities to 3 traffic classes ("num_tc 3"),
+and how to pair these traffic classes with 4 queues,
+without hardware coordination ("hw 0").
+Traffic class 0 (tc0) is mapped to hardware queue 0 (q0), tc1 is mapped to q1,
+tc2 and is mapped to q2 and q3, where the queue selection between these
+two queues is somewhat randomly decided.
+
+.EX
+# tc qdisc add dev eth0 root mqprio \
+ num_tc 3 \
+ map 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 \
+ queues 1@0 1@1 2@2 \
+ hw 0
+.EE
+
+
+In both cases from above the priority values from 0 to 3 (prio0-3) are
+mapped to tc0, prio4-7 are mapped to tc1, and the
+prio8-11 are mapped to tc2 ("map" attribute). The last four priority values
+(prio12-15) are mapped in different ways in the two examples.
+They are mapped to tc3 in the first example and mapped to tc2 in the second example.
+The values of these two examples are the following:
+
+ ┌────┬────┬───────┐ ┌────┬────┬────────┐
+ │Prio│ tc │ queue │ │Prio│ tc │ queue │
+ ├────┼────┼───────┤ ├────┼────┼────────┤
+ │ 0 │ 0 │ 0 │ │ 0 │ 0 │ 0 │
+ │ 1 │ 0 │ 0 │ │ 1 │ 0 │ 0 │
+ │ 2 │ 0 │ 0 │ │ 2 │ 0 │ 0 │
+ │ 3 │ 0 │ 0 │ │ 3 │ 0 │ 0 │
+ │ 4 │ 1 │ 1 │ │ 4 │ 1 │ 1 │
+ │ 5 │ 1 │ 1 │ │ 5 │ 1 │ 1 │
+ │ 6 │ 1 │ 1 │ │ 6 │ 1 │ 1 │
+ │ 7 │ 1 │ 1 │ │ 7 │ 1 │ 1 │
+ │ 8 │ 2 │ 2 │ │ 8 │ 2 │ 2 or 3 │
+ │ 9 │ 2 │ 2 │ │ 9 │ 2 │ 2 or 3 │
+ │ 10 │ 2 │ 2 │ │ 10 │ 2 │ 2 or 3 │
+ │ 11 │ 2 │ 2 │ │ 11 │ 2 │ 2 or 3 │
+ │ 12 │ 3 │ 3 │ │ 12 │ 2 │ 2 or 3 │
+ │ 13 │ 3 │ 3 │ │ 13 │ 2 │ 2 or 3 │
+ │ 14 │ 3 │ 3 │ │ 14 │ 2 │ 2 or 3 │
+ │ 15 │ 3 │ 3 │ │ 15 │ 2 │ 2 or 3 │
+ └────┴────┴───────┘ └────┴────┴────────┘
+ example1 example2
+
+
+Another example of queue mapping is the following.
+There are 5 traffic classes, and there are 8 hardware queues.
+
+.EX
+# tc qdisc add dev eth0 root mqprio \
+ num_tc 5 \
+ map 0 0 0 1 1 1 1 2 2 3 3 4 4 4 4 4 \
+ queues 1@0 2@1 1@3 1@4 3@5
+.EE
+
+The value mapping is the following for this example:
+
+ ┌───────┐
+ tc0────┤Queue 0│◄────1@0
+ ├───────┤
+ ┌─┤Queue 1│◄────2@1
+ tc1──┤ ├───────┤
+ └─┤Queue 2│
+ ├───────┤
+ tc2────┤Queue 3│◄────1@3
+ ├───────┤
+ tc3────┤Queue 4│◄────1@4
+ ├───────┤
+ ┌─┤Queue 5│◄────3@5
+ │ ├───────┤
+ tc4──┼─┤Queue 6│
+ │ ├───────┤
+ └─┤Queue 7│
+ └───────┘
+
+
.SH AUTHORS
John Fastabend, <john.r.fastabend@intel.com>
diff --git a/man/man8/tc-netem.8 b/man/man8/tc-netem.8
index 21775854..51cf081e 100644
--- a/man/man8/tc-netem.8
+++ b/man/man8/tc-netem.8
@@ -1,6 +1,6 @@
.TH NETEM 8 "25 November 2011" "iproute2" "Linux"
.SH NAME
-NetEm \- Network Emulator
+netem \- Network Emulator
.SH SYNOPSIS
.B "tc qdisc ... dev"
.IR DEVICE " ] "
@@ -20,7 +20,7 @@ NetEm \- Network Emulator
.IR TIME " [ " JITTER " [ " CORRELATION " ]]]"
.br
[
-.BR distribution " { "uniform " | " normal " | " pareto " | " paretonormal " } ]"
+.BR distribution " { "uniform " | " normal " | " pareto " | " paretonormal " } ]"
.IR LOSS " := "
.BR loss " { "
@@ -64,135 +64,175 @@ NetEm \- Network Emulator
.BR bytes
.IR BYTES " ]"
-
.SH DESCRIPTION
-NetEm is an enhancement of the Linux traffic control facilities
-that allow one to add delay, packet loss, duplication and more other
-characteristics to packets outgoing from a selected network
-interface. NetEm is built using the existing Quality Of Service (QOS)
-and Differentiated Services (diffserv) facilities in the Linux
-kernel.
-
-.SH netem OPTIONS
-netem has the following options:
-
-.SS limit packets
-
-maximum number of packets the qdisc may hold queued at a time.
-
-.SS delay
-adds the chosen delay to the packets outgoing to chosen network interface. The
-optional parameters allows one to introduce a delay variation and a correlation.
-Delay and jitter values are expressed in ms while correlation is percentage.
-
-.SS distribution
-allow the user to choose the delay distribution. If not specified, the default
-distribution is Normal. Additional parameters allow one to consider situations in
-which network has variable delays depending on traffic flows concurring on the
-same path, that causes several delay peaks and a tail.
-
-.SS loss random
-adds an independent loss probability to the packets outgoing from the chosen
-network interface. It is also possible to add a correlation, but this option
-is now deprecated due to the noticed bad behavior.
-
-.SS loss state
-adds packet losses according to the 4-state Markov using the transition
-probabilities as input parameters. The parameter p13 is mandatory and if used
-alone corresponds to the Bernoulli model. The optional parameters allows one to
-extend the model to 2-state (p31), 3-state (p23 and p32) and 4-state (p14).
-State 1 corresponds to good reception, State 4 to independent losses, State 3
-to burst losses and State 2 to good reception within a burst.
-
-.SS loss gemodel
-adds packet losses according to the Gilbert-Elliot loss model or its special
-cases (Gilbert, Simple Gilbert and Bernoulli). To use the Bernoulli model, the
-only needed parameter is p while the others will be set to the default
-values r=1-p, 1-h=1 and 1-k=0. The parameters needed for the Simple Gilbert
-model are two (p and r), while three parameters (p, r, 1-h) are needed for the
-Gilbert model and four (p, r, 1-h and 1-k) are needed for the Gilbert-Elliot
-model. As known, p and r are the transition probabilities between the bad and
-the good states, 1-h is the loss probability in the bad state and 1-k is the
-loss probability in the good state.
-
-.SS ecn
-can be used optionally to mark packets instead of dropping them. A loss model
-has to be used for this to be enabled.
-
-.SS corrupt
-allows the emulation of random noise introducing an error in a random position
-for a chosen percent of packets. It is also possible to add a correlation
-through the proper parameter.
-
-.SS duplicate
-using this option the chosen percent of packets is duplicated before queuing
-them. It is also possible to add a correlation through the proper parameter.
-
-.SS reorder
-to use reordering, a delay option must be specified. There are two ways to use
-this option (assuming 'delay 10ms' in the options list).
-
-.B "reorder "
-.I 25% 50%
-.B "gap"
-.I 5
+The
+.B netem
+queue discipline provides Network Emulation functionality
+for testing protocols by emulating the properties of real-world networks.
+
+The queue discipline provides one or more network impairments to packets
+such as: delay, loss, duplication, and packet corruption.
+
+.SH OPTIONS
+.TP
+.BI limit " COUNT"
+Limits the maximum number of packets the qdisc may hold when doing delay.
+
+.TP
+.B delay
+.IR TIME " [ " JITTER " [ " CORRELATION " ]]]"
.br
-in this first example, the first 4 (gap - 1) packets are delayed by 10ms and
-subsequent packets are sent immediately with a probability of 0.25 (with
-correlation of 50% ) or delayed with a probability of 0.75. After a packet is
-reordered, the process restarts i.e. the next 4 packets are delayed and
-subsequent packets are sent immediately or delayed based on reordering
-probability. To cause a repeatable pattern where every 5th packet is reordered
-reliably, a reorder probability of 100% can be used.
+Delays the packets before sending.
+The optional parameters allow introducing a delay variation and a correlation.
+Delay and jitter values are expressed in milliseconds;
+Correlation is set by specifying a percent of how much the previous delay
+will impact the current random value.
+
+.TP
+.BI distribution " TYPE"
+Specifies a pattern for delay distribution.
+.RS
+.TP
+.B uniform
+Use an equally weighted distribution of packet delays.
+.TP
+.B normal
+Use a Gaussian distribution of delays.
+Sometimes called a Bell Curve.
+.TP
+.B pareto
+Use a Pareto distribution of packet delays.
+This is useful to emulate long-tail distributions.
+.TP
+.B paretonormal
+This is a mix of
+.B pareto
+and
+.B normal
+distribution which has properties of both Bell curve and long tail.
+.RE
-.B reorder
-.I 25% 50%
+.TP
+.BI loss " MODEL"
+Drop packets based on a loss model.
+.I MODEL
+can be one of
+.RS
+.TP
+.BI random " PERCENT"
+Each packet loss is independent.
+.TP
+.BI state " P13 [ P31 [ P32 [ P23 P14 ]]]"
+Use a 4-state Markov chain to describe packet loss.
.br
-in this second example 25% of packets are sent immediately (with correlation of
-50%) while the others are delayed by 10 ms.
-
-.SS rate
-delay packets based on packet size and is a replacement for
-.IR TBF .
-Rate can be
-specified in common units (e.g. 100kbit). Optional
+.I P13
+is the packet loss.
+Optional parameters extend the model to 2-state
+.IR P31 ,
+3-state
+.IR P23 ,
+.I P32
+and 4-state
+.IR P14 .
+
+The Markov chain states are:
+.RS
+.TP
+.B 1
+good packet reception (no loss).
+.TP
+.B 2
+good reception within a burst.
+.TP
+.B 3
+burst losses.
+.TP
+.B 4
+independent losses.
+.RE
+
+.TP
+.BI gemodel " PERCENT [ R [ 1-H [ 1-K ]]]"
+Use a Gilbert-Elliot (burst loss) model
+based on:
+.RS
+.TP
+.I PERCENT
+probability of starting bad (lossy) state.
+.TP
+.I R
+probability of exiting bad state.
+.TP
+.I "1-H"
+loss probability in bad state.
+.TP
+.I "1-K"
+loss probability in good state.
+.RE
+.RE
+
+.TP
+.B ecn
+Use
+Explicit Congestion Notification (ECN)
+to mark packets instead of dropping them.
+A loss model has to be used for this to be enabled.
+.TP
+.BI corrupt " PERCENT"
+modifies the contents of the packet at a random position
+based on
+.IR PERCENT .
+.TP
+.BI duplicate " PERCENT"
+creates a copy of the packet before queuing.
+.TP
+.BI reorder " PERCENT"
+modifies the order of packet in the queue.
+.TP
+.BI gap " DISTANCE"
+sends some packets immediately.
+The first packets
+.I "(DISTANCE - 1)"
+are delayed and the next packet is sent immediately.
+
+.TP
+.BI rate " RATE [ PACKETOVERHEAD [ CELLSIZE [ CELLOVERHEAD ]]]"
+Delays packets based on packet size to emulate a fixed link speed.
+Optional parameters:
+.RS
+.TP
.I PACKETOVERHEAD
-(in bytes) specify an per packet overhead and can be negative. A positive value can be
-used to simulate additional link layer headers. A negative value can be used to
-artificial strip the Ethernet header (e.g. -14) and/or simulate a link layer
-header compression scheme. The third parameter - an unsigned value - specify
-the cellsize. Cellsize can be used to simulate link layer schemes. ATM for
-example has an payload cellsize of 48 bytes and 5 byte per cell header. If a
-packet is 50 byte then ATM must use two cells: 2 * 48 bytes payload including 2
-* 5 byte header, thus consume 106 byte on the wire. The last optional value
-.I CELLOVERHEAD
-can be used to specify per cell overhead - for our ATM example 5.
+Specify a per packet overhead in bytes.
+Used to simulate additional link layer headers.
+A negative value can be used to simlate when the Ethernet header is
+stripped (e.g. -14) or header compression is used.
+.TP
+.I CELLSIZE
+simulate link layer schemes like ATM.
+.TP
.I CELLOVERHEAD
-can be negative, but use negative values with caution.
-
-Note that rate throttling is limited by several factors: the kernel clock
-granularity avoid a perfect shaping at a specific level. This will show up in
-an artificial packet compression (bursts). Another influence factor are network
-adapter buffers which can also add artificial delay.
-
-.SS slot
-defer delivering accumulated packets to within a slot. Each available slot can be
-configured with a minimum delay to acquire, and an optional maximum delay.
-Alternatively it can be configured with the distribution similar to
-.BR distribution
-for
-.BR delay
-option. Slot delays can be specified in nanoseconds, microseconds, milliseconds or seconds
-(e.g. 800us). Values for the optional parameters
-.I BYTES
-will limit the number of bytes delivered per slot, and/or
-.I PACKETS
-will limit the number of packets delivered per slot.
+specify per cell overhead.
+.RE
+
+Rate throttling impacted by several factors including the kernel clock
+granularity. This will show up in an artificial packet compression (bursts).
+
+.TP
+.BI slot " MIN_DELAY [ MAX_DELAY ]"
+allows emulating slotted networks.
+Defer delivering accumulated packets to within a slot.
+Each available slot is configured with a minimum delay to acquire,
+and an optional maximum delay.
+.TP
+.B slot distribution
+allows configuring based on distribution similar to
+.B distribution
+option for packet delays.
These slot options can provide a crude approximation of bursty MACs such as
DOCSIS, WiFi, and LTE.
-Note that slotting is limited by several factors: the kernel clock granularity,
+Slot emulation is limited by several factors: the kernel clock granularity,
as with a rate, and attempts to deliver many packets within a slot will be
smeared by the timer resolution, and by the underlying native bandwidth also.
@@ -201,36 +241,156 @@ where either the rate, or the slot limits on bytes or packets per slot, govern
the actual delivered rate.
.SH LIMITATIONS
-The main known limitation of Netem are related to timer granularity, since
-Linux is not a real-time operating system.
+Netem is limited by the timer granularity in the kernel.
+Rate and delay maybe impacted by clock interrupts.
+.PP
+Mixing forms of reordering may lead to unexpected results.
+For any method of reordering to work, some delay is necessary.
+If the delay is less than the inter-packet arrival time then
+no reordering will be seen.
+Due to mechanisms like TSQ (TCP Small Queues), for TCP performance test
+results to be realistic netem must be placed on the ingress of the
+receiver host.
+.PP
+Combining netem with other qdisc is possible but may not always
+work because netem use skb control block to set delays.
.SH EXAMPLES
.PP
-tc qdisc add dev eth0 root netem rate 5kbit 20 100 5
+.EX
+# tc qdisc add dev eth0 root netem delay 100ms
+.EE
+.RS 4
+Add fixed amount of delay to all packets going out on device eth0.
+Each packet will have added delay of 100ms ± 10ms.
+.RE
+.PP
+.EX
+# tc qdisc change dev eth0 root netem delay 100ms 10ms 25%
+.EE
+.RS 4
+This causes the added delay of 100ms ± 10ms
+and the next packet delay value will be biased by 25% on the most recent delay.
+This isn't a true statistical correlation, but an approximation.
+.RE
+.PP
+.EX
+# tc qdisc change dev eth0 root netem delay 100ms 20ms distribution normal
+.EE
+.RS 4
+This delays packets according to a normal distribution (Bell curve)
+over a range of 100ms ± 20ms.
+.RE
+.PP
+.EX
+# tc qdisc change dev eth0 root netem loss 0.1%
+.EE
.RS 4
-delay all outgoing packets on device eth0 with a rate of 5kbit, a per packet
-overhead of 20 byte, a cellsize of 100 byte and a per celloverhead of 5 byte:
+This causes 1/10th of a percent (i.e 1 out of 1000) packets to be
+randomly dropped.
+
+An optional correlation may also be added.
+This causes the random number generator to be less random and can be used to emulate packet burst losses.
+.RE
+.PP
+.EX
+# tc qdisc change dev eth0 root netem duplicate 1%
+.EE
+.RS 4
+This causes one percent of the packets sent on eth0 to be duplicated.
+.RE
+.PP
+.EX
+# tc qdisc change dev eth0 root netem loss 0.3% 25%
+.EE
+.RS 4
+This will cause 0.3% of packets to be lost,
+and each successive probability depends is biased by 25% of the previous one.
+.RE
+.PP
+There are two different ways to specify reordering.
+The gap method uses a fixed sequence and reorders every Nth packet.
+.EX
+# tc qdisc change dev eth0 root netem gap 5 delay 10ms
+.EE
+.RS 4
+This causes every 5th (10th, 15th, …) packet to go to be sent immediately
+and every other packet to be delayed by 10ms.
+This is predictable and useful for base protocol testing like reassembly.
+.RE
+.PP
+The reorder form uses a percentage of the packets to get misordered.
+.EX
+# tc qdisc change dev eth0 root netem delay 10ms reorder 25% 50%
+.EE
+In this example, 25% of packets (with a correlation of 50%) will get sent immediately, others will be delayed by 10ms.
+.PP
+Packets will also get reordered if jitter is large enough.
+.EX
+# tc qdisc change dev eth0 root netem delay 100ms 75ms
+.EE
+.RS 4
+If the first packet gets a random delay of 100ms (100ms base - 0ms jitter)
+and the second packet is sent 1ms later and gets a delay of 50ms (100ms base - 50ms jitter);
+the second packet will be sent first.
+This is because the queue discipline tfifo inside netem,
+keeps packets in order by time to send.
+.RE
+.PP
+If you don't want this behavior then replace the internal
+queue discipline tfifo with a simple FIFO queue discipline.
+.EX
+# tc qdisc add dev eth0 root handle 1: netem delay 10ms 100ms
+# tc qdisc add dev eth0 parent 1:1 pfifo limit 1000
+.EE
+
+.PP
+Example of using rate control and cells size.
+.EX
+# tc qdisc add dev eth0 root netem rate 5kbit 20 100 5
+.EE
+.RS 4
+Delay all outgoing packets on device eth0 with a rate of 5kbit, a per packet
+overhead of 20 byte, a cellsize of 100 byte and a per celloverhead of 5 bytes.
.RE
+.PP
+It is possible to selectively apply impairment using traffic classification.
+.EX
+# tc qdisc add dev eth0 root handle 1: prio
+# tc qdisc add dev eth0 parent 1:3 handle 30: \
+ tbf rate 20kbit buffer 1600 limit 3000
+# tc qdisc add dev eth0 parent 30:1 handle 31: \
+ netem delay 200ms 10ms distribution normal
+# tc filter add dev eth0 protocol ip parent 1:0 prio 3 u32 \
+ match ip dst 65.172.181.4/32 flowid 1:3
+.EE
+.RS 4
+This eample uses a priority queueing discipline;
+a TBF is added to do rate control; and a simple netem delay.
+A filter classifies all packets going to 65.172.181.4 as being priority 3.
+.PP
.SH SOURCES
.IP " 1. " 4
Hemminger S. , "Network Emulation with NetEm", Open Source Development Lab,
April 2005
-(http://devresources.linux-foundation.org/shemminger/netem/LCA2005_paper.pdf)
+.UR http://devresources.linux-foundation.org/shemminger/netem/LCA2005_paper.pdf
+.UE
.IP " 2. " 4
-Netem page from Linux foundation, (https://wiki.linuxfoundation.org/networking/netem)
-
-.IP " 3. " 4
Salsano S., Ludovici F., Ordine A., "Definition of a general and intuitive loss
model for packet networks and its implementation in the Netem module in the
-Linux kernel", available at http://netgroup.uniroma2.it/NetemCLG
+Linux kernel", available at
+.UR http://netgroup.uniroma2.it/NetemCLG
+.UE
.SH SEE ALSO
-.BR tc (8),
-.BR tc-tbf (8)
+.BR tc (8)
.SH AUTHOR
-Netem was written by Stephen Hemminger at Linux foundation and is based on NISTnet.
-This manpage was created by Fabio Ludovici <fabio.ludovici at yahoo dot it> and
-Hagen Paul Pfeifer <hagen@jauu.net>
+Netem was written by Stephen Hemminger at Linux foundation and was
+inspired by NISTnet.
+
+Original manpage was created by Fabio Ludovici
+<fabio.ludovici at yahoo dot it> and Hagen Paul Pfeifer
+<hagen@jauu.net>.
diff --git a/tc/f_u32.c b/tc/f_u32.c
index bfe9e5f9..936dbd65 100644
--- a/tc/f_u32.c
+++ b/tc/f_u32.c
@@ -828,12 +828,12 @@ static void print_ipv4(FILE *f, const struct tc_u32_key *key)
print_nl();
print_uint(PRINT_ANY, "ip_ihl", " match IP ihl %u",
ntohl(key->val) >> 24);
- return;
+ break;
case 0x00ff0000:
print_nl();
print_0xhex(PRINT_ANY, "ip_dsfield", " match IP dsfield %#x",
ntohl(key->val) >> 16);
- return;
+ break;
}
break;
case 8:
@@ -841,7 +841,6 @@ static void print_ipv4(FILE *f, const struct tc_u32_key *key)
print_nl();
print_int(PRINT_ANY, "ip_protocol", " match IP protocol %d",
ntohl(key->val) >> 16);
- return;
}
break;
case 12:
@@ -864,7 +863,6 @@ static void print_ipv4(FILE *f, const struct tc_u32_key *key)
print_string(PRINT_ANY, "address", "%s", addr);
print_int(PRINT_ANY, "prefixlen", "/%d", bits);
close_json_object();
- return;
}
}
break;
@@ -874,19 +872,19 @@ static void print_ipv4(FILE *f, const struct tc_u32_key *key)
case 0x0000ffff:
print_uint(PRINT_ANY, "dport", "match dport %u",
ntohl(key->val) & 0xffff);
- return;
+ break;
case 0xffff0000:
print_nl();
print_uint(PRINT_ANY, "sport", " match sport %u",
ntohl(key->val) >> 16);
- return;
+ break;
case 0xffffffff:
print_nl();
print_uint(PRINT_ANY, "dport", " match dport %u, ",
ntohl(key->val) & 0xffff);
print_uint(PRINT_ANY, "sport", "match sport %u",
ntohl(key->val) >> 16);
- return;
+ break;
}
/* XXX: Default print_raw */
}
@@ -905,12 +903,12 @@ static void print_ipv6(FILE *f, const struct tc_u32_key *key)
print_nl();
print_uint(PRINT_ANY, "ip_ihl", " match IP ihl %u",
ntohl(key->val) >> 24);
- return;
+ break;
case 0x00ff0000:
print_nl();
print_0xhex(PRINT_ANY, "ip_dsfield", " match IP dsfield %#x",
ntohl(key->val) >> 16);
- return;
+ break;
}
break;
case 8:
@@ -918,7 +916,6 @@ static void print_ipv6(FILE *f, const struct tc_u32_key *key)
print_nl();
print_int(PRINT_ANY, "ip_protocol", " match IP protocol %d",
ntohl(key->val) >> 16);
- return;
}
break;
case 12:
@@ -941,7 +938,6 @@ static void print_ipv6(FILE *f, const struct tc_u32_key *key)
print_string(PRINT_ANY, "address", "%s", addr);
print_int(PRINT_ANY, "prefixlen", "/%d", bits);
close_json_object();
- return;
}
}
break;
@@ -952,11 +948,11 @@ static void print_ipv6(FILE *f, const struct tc_u32_key *key)
print_nl();
print_uint(PRINT_ANY, "sport", " match sport %u",
ntohl(key->val) & 0xffff);
- return;
+ break;
case 0xffff0000:
print_uint(PRINT_ANY, "dport", "match dport %u",
ntohl(key->val) >> 16);
- return;
+ break;
case 0xffffffff:
print_nl();
print_uint(PRINT_ANY, "sport", " match sport %u, ",
@@ -964,7 +960,7 @@ static void print_ipv6(FILE *f, const struct tc_u32_key *key)
print_uint(PRINT_ANY, "dport", "match dport %u",
ntohl(key->val) >> 16);
- return;
+ break;
}
/* XXX: Default print_raw */
}
@@ -1273,7 +1269,7 @@ static int u32_print_opt(struct filter_util *qu, FILE *f, struct rtattr *opt,
if (tb[TCA_U32_CLASSID]) {
__u32 classid = rta_getattr_u32(tb[TCA_U32_CLASSID]);
SPRINT_BUF(b1);
- if (sel && (sel->flags & TC_U32_TERMINAL))
+ if (!sel || !(sel->flags & TC_U32_TERMINAL))
print_string(PRINT_FP, NULL, "*", NULL);
print_string(PRINT_ANY, "flowid", "flowid %s ",
diff --git a/tc/m_action.c b/tc/m_action.c
index 0400132c..a446cabd 100644
--- a/tc/m_action.c
+++ b/tc/m_action.c
@@ -586,7 +586,13 @@ int print_action(struct nlmsghdr *n, void *arg)
open_json_object(NULL);
tc_dump_action(fp, tb[TCA_ACT_TAB], tot_acts ? *tot_acts:0, false);
- print_ext_msg(tb);
+
+ if (tb[TCA_ROOT_EXT_WARN_MSG]) {
+ print_string(PRINT_ANY, "warn", "%s",
+ rta_getattr_str(tb[TCA_ROOT_EXT_WARN_MSG]));
+ print_nl();
+ }
+
close_json_object();
return 0;
diff --git a/tc/m_csum.c b/tc/m_csum.c
index ba1e3e33..f5fe8f55 100644
--- a/tc/m_csum.c
+++ b/tc/m_csum.c
@@ -94,7 +94,9 @@ parse_csum(struct action_util *a, int *argc_p,
while (argc > 0) {
if (matches(*argv, "csum") == 0) {
NEXT_ARG();
- if (parse_csum_args(&argc, &argv, &sel)) {
+ if (strcmp(*argv, "index") == 0) {
+ goto skip_args;
+ } else if (parse_csum_args(&argc, &argv, &sel)) {
fprintf(stderr, "Illegal csum construct (%s)\n",
*argv);
explain();
@@ -123,6 +125,7 @@ parse_csum(struct action_util *a, int *argc_p,
if (argc) {
if (matches(*argv, "index") == 0) {
+skip_args:
NEXT_ARG();
if (get_u32(&sel.index, *argv, 10)) {
fprintf(stderr, "Illegal \"index\" (%s) <csum>\n",
diff --git a/tc/m_mpls.c b/tc/m_mpls.c
index 9b39d853..dda46805 100644
--- a/tc/m_mpls.c
+++ b/tc/m_mpls.c
@@ -91,6 +91,9 @@ static int parse_mpls(struct action_util *a, int *argc_p, char ***argv_p,
NEXT_ARG();
+ if (strcmp(*argv, "index") == 0)
+ goto skip_args;
+
while (argc > 0) {
if (matches(*argv, "pop") == 0) {
if (check_double_action(action, *argv))
@@ -164,6 +167,7 @@ static int parse_mpls(struct action_util *a, int *argc_p, char ***argv_p,
if (argc) {
if (matches(*argv, "index") == 0) {
+skip_args:
NEXT_ARG();
if (get_u32(&parm.index, *argv, 10))
invarg("illegal index", *argv);
diff --git a/tc/m_nat.c b/tc/m_nat.c
index 58315125..95b35584 100644
--- a/tc/m_nat.c
+++ b/tc/m_nat.c
@@ -88,7 +88,9 @@ parse_nat(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, struct
while (argc > 0) {
if (matches(*argv, "nat") == 0) {
NEXT_ARG();
- if (parse_nat_args(&argc, &argv, &sel)) {
+ if (strcmp(*argv, "index") == 0) {
+ goto skip_args;
+ } else if (parse_nat_args(&argc, &argv, &sel)) {
fprintf(stderr, "Illegal nat construct (%s)\n",
*argv);
explain();
@@ -113,6 +115,7 @@ parse_nat(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, struct
if (argc) {
if (matches(*argv, "index") == 0) {
+skip_args:
NEXT_ARG();
if (get_u32(&sel.index, *argv, 10)) {
fprintf(stderr, "Nat: Illegal \"index\"\n");
diff --git a/tc/tc_class.c b/tc/tc_class.c
index c1feb009..096fa2ec 100644
--- a/tc/tc_class.c
+++ b/tc/tc_class.c
@@ -356,7 +356,7 @@ int print_class(struct nlmsghdr *n, void *arg)
print_string(PRINT_ANY, "parent", "parent %s ", abuf);
}
if (t->tcm_info)
- print_0xhex(PRINT_ANY, "leaf", "leaf %x", t->tcm_info>>16);
+ print_0xhex(PRINT_ANY, "leaf", "leaf %x: ", t->tcm_info>>16);
q = get_qdisc_kind(RTA_DATA(tb[TCA_KIND]));
if (tb[TCA_OPTIONS]) {
diff --git a/tc/tc_util.c b/tc/tc_util.c
index d9dd5a81..0714134e 100644
--- a/tc/tc_util.c
+++ b/tc/tc_util.c
@@ -471,7 +471,7 @@ static int parse_action_control_slash_spaces(int *argc_p, char ***argv_p,
result_p = &result2;
NEXT_ARG();
/* fall-through */
- case 0:
+ case 0:
ret = parse_action_control(&argc, &argv,
result_p, allow_num);
if (ret)