diff options
author | David Ahern <dsahern@kernel.org> | 2023-04-22 10:08:08 -0600 |
---|---|---|
committer | David Ahern <dsahern@kernel.org> | 2023-04-22 10:08:08 -0600 |
commit | a08205b62f5e22792bed9cbbb306aab14eb8f202 (patch) | |
tree | 36df1828b847fe61148a95bcb5960f0c6b281222 | |
parent | 47928f88511721da41c4d492b4a7bd93b4f0d936 (diff) | |
parent | 1371d7deaa2011f5e611e7ce9fdad65483f248dd (diff) | |
download | iproute2-a08205b62f5e22792bed9cbbb306aab14eb8f202.tar.gz |
Merge branch 'main' into next
Signed-off-by: David Ahern <dsahern@kernel.org>
-rw-r--r-- | genl/ctrl.c | 12 | ||||
-rw-r--r-- | ip/iplink.c | 6 | ||||
-rw-r--r-- | ip/iplink_bridge_slave.c | 2 | ||||
-rw-r--r-- | ip/iproute_lwtunnel.c | 21 | ||||
-rw-r--r-- | ip/iptunnel.c | 14 | ||||
-rw-r--r-- | ip/xfrm_policy.c | 3 | ||||
-rw-r--r-- | man/man8/ip-link.8.in | 26 | ||||
-rw-r--r-- | man/man8/tc-mqprio.8 | 96 | ||||
-rw-r--r-- | man/man8/tc-netem.8 | 432 | ||||
-rw-r--r-- | tc/f_u32.c | 26 | ||||
-rw-r--r-- | tc/m_action.c | 8 | ||||
-rw-r--r-- | tc/m_csum.c | 5 | ||||
-rw-r--r-- | tc/m_mpls.c | 4 | ||||
-rw-r--r-- | tc/m_nat.c | 5 | ||||
-rw-r--r-- | tc/tc_class.c | 2 | ||||
-rw-r--r-- | tc/tc_util.c | 2 |
16 files changed, 477 insertions, 187 deletions
diff --git a/genl/ctrl.c b/genl/ctrl.c index a2d87af0..8d2e9448 100644 --- a/genl/ctrl.c +++ b/genl/ctrl.c @@ -57,7 +57,7 @@ static void print_ctrl_cmd_flags(FILE *fp, __u32 fl) fprintf(fp, "\n"); } -static int print_ctrl_cmds(FILE *fp, struct rtattr *arg, __u32 ctrl_ver) +static int print_ctrl_cmds(FILE *fp, struct rtattr *arg) { struct rtattr *tb[CTRL_ATTR_OP_MAX + 1]; @@ -70,7 +70,7 @@ static int print_ctrl_cmds(FILE *fp, struct rtattr *arg, __u32 ctrl_ver) fprintf(fp, " ID-0x%x ",*id); } /* we are only gonna do this for newer version of the controller */ - if (tb[CTRL_ATTR_OP_FLAGS] && ctrl_ver >= 0x2) { + if (tb[CTRL_ATTR_OP_FLAGS]) { __u32 *fl = RTA_DATA(tb[CTRL_ATTR_OP_FLAGS]); print_ctrl_cmd_flags(fp, *fl); } @@ -78,7 +78,7 @@ static int print_ctrl_cmds(FILE *fp, struct rtattr *arg, __u32 ctrl_ver) } -static int print_ctrl_grp(FILE *fp, struct rtattr *arg, __u32 ctrl_ver) +static int print_ctrl_grp(FILE *fp, struct rtattr *arg) { struct rtattr *tb[CTRL_ATTR_MCAST_GRP_MAX + 1]; @@ -109,7 +109,6 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl, int len = n->nlmsg_len; struct rtattr *attrs; FILE *fp = (FILE *) arg; - __u32 ctrl_v = 0x1; if (n->nlmsg_type != GENL_ID_CTRL) { fprintf(stderr, "Not a controller message, nlmsg_len=%d " @@ -148,7 +147,6 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl, if (tb[CTRL_ATTR_VERSION]) { __u32 *v = RTA_DATA(tb[CTRL_ATTR_VERSION]); fprintf(fp, " Version: 0x%x ",*v); - ctrl_v = *v; } if (tb[CTRL_ATTR_HDRSIZE]) { __u32 *h = RTA_DATA(tb[CTRL_ATTR_HDRSIZE]); @@ -198,7 +196,7 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl, for (i = 0; i < GENL_MAX_FAM_OPS; i++) { if (tb2[i]) { fprintf(fp, "\t\t#%d: ", i); - if (0 > print_ctrl_cmds(fp, tb2[i], ctrl_v)) { + if (0 > print_ctrl_cmds(fp, tb2[i])) { fprintf(fp, "Error printing command\n"); } /* for next command */ @@ -221,7 +219,7 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl, for (i = 0; i < GENL_MAX_FAM_GRPS; i++) { if (tb2[i]) { fprintf(fp, "\t\t#%d: ", i); - if (0 > print_ctrl_grp(fp, tb2[i], ctrl_v)) + if (0 > print_ctrl_grp(fp, tb2[i])) fprintf(fp, "Error printing group\n"); /* for next group */ fprintf(fp,"\n"); diff --git a/ip/iplink.c b/ip/iplink.c index a8da52f9..690636b6 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -63,7 +63,7 @@ void iplink_usage(void) " [ mtu MTU ] [index IDX ]\n" " [ numtxqueues QUEUE_COUNT ]\n" " [ numrxqueues QUEUE_COUNT ]\n" - " [ netns { PID | NAME } ]\n" + " [ netns { PID | NETNSNAME | NETNSFILE } ]\n" " type TYPE [ ARGS ]\n" "\n" " ip link delete { DEVICE | dev DEVICE | group DEVGROUP } type TYPE [ ARGS ]\n" @@ -88,7 +88,7 @@ void iplink_usage(void) " [ address LLADDR ]\n" " [ broadcast LLADDR ]\n" " [ mtu MTU ]\n" - " [ netns { PID | NAME } ]\n" + " [ netns { PID | NETNSNAME | NETNSFILE } ]\n" " [ link-netns NAME | link-netnsid ID ]\n" " [ alias NAME ]\n" " [ vf NUM [ mac LLADDR ]\n" @@ -1629,7 +1629,7 @@ static void print_af_stats_attr(FILE *fp, int ifindex, struct rtattr *attr) if (!if_printed) { print_uint(PRINT_ANY, "ifindex", "%u:", ifindex); - print_color_string(PRINT_ANY, COLOR_IFNAME, + print_color_string(PRINT_ANY, COLOR_IFNAME, "ifname", "%s", ll_index_to_name(ifindex)); print_nl(); diff --git a/ip/iplink_bridge_slave.c b/ip/iplink_bridge_slave.c index 43b42948..66a67961 100644 --- a/ip/iplink_bridge_slave.c +++ b/ip/iplink_bridge_slave.c @@ -40,7 +40,7 @@ static void print_explain(FILE *f) " [ vlan_tunnel {on | off} ]\n" " [ isolated {on | off} ]\n" " [ locked {on | off} ]\n" - " [ mab {on | off} ]\n" + " [ mab {on | off} ]\n" " [ backup_port DEVICE ] [ nobackup_port ]\n" ); } diff --git a/ip/iproute_lwtunnel.c b/ip/iproute_lwtunnel.c index 52221c69..308178ef 100644 --- a/ip/iproute_lwtunnel.c +++ b/ip/iproute_lwtunnel.c @@ -32,7 +32,7 @@ #include <linux/ioam6.h> #include <linux/ioam6_iptunnel.h> -static const char *format_encap_type(int type) +static const char *format_encap_type(uint16_t type) { switch (type) { case LWTUNNEL_ENCAP_MPLS: @@ -62,7 +62,7 @@ static const char *format_encap_type(int type) static void encap_type_usage(void) { - int i; + uint16_t i; fprintf(stderr, "Usage: ip route ... encap TYPE [ OPTIONS ] [...]\n"); @@ -73,7 +73,7 @@ static void encap_type_usage(void) exit(-1); } -static int read_encap_type(const char *name) +static uint16_t read_encap_type(const char *name) { if (strcmp(name, "mpls") == 0) return LWTUNNEL_ENCAP_MPLS; @@ -834,14 +834,15 @@ static void print_encap_xfrm(FILE *fp, struct rtattr *encap) void lwt_print_encap(FILE *fp, struct rtattr *encap_type, struct rtattr *encap) { - int et; + uint16_t et; if (!encap_type) return; et = rta_getattr_u16(encap_type); - - print_string(PRINT_ANY, "encap", " encap %s ", format_encap_type(et)); + open_json_object("encap"); + print_string(PRINT_ANY, "encap_type", " encap %s ", + format_encap_type(et)); switch (et) { case LWTUNNEL_ENCAP_MPLS: @@ -875,6 +876,7 @@ void lwt_print_encap(FILE *fp, struct rtattr *encap_type, print_encap_xfrm(fp, encap); break; } + close_json_object(); } static struct ipv6_sr_hdr *parse_srh(char *segbuf, int hmac, bool encap) @@ -959,7 +961,7 @@ static int parse_encap_seg6(struct rtattr *rta, size_t len, int *argcp, invarg("\"segs\" provided before \"mode\"\n", *argv); - strlcpy(segbuf, *argv, 1024); + strlcpy(segbuf, *argv, sizeof(segbuf)); } else if (strcmp(*argv, "hmac") == 0) { NEXT_ARG(); if (hmac_ok++) @@ -1045,7 +1047,7 @@ static int parse_encap_rpl(struct rtattr *rta, size_t len, int *argcp, if (segs_ok++) duparg2("segs", *argv); - strlcpy(segbuf, *argv, 1024); + strlcpy(segbuf, *argv, sizeof(segbuf)); } else { break; } @@ -1466,8 +1468,7 @@ static int parse_encap_seg6local(struct rtattr *rta, size_t len, int *argcp, NEXT_ARG(); if (segs_ok++) duparg2("segs", *argv); - strncpy(segbuf, *argv, 1024); - segbuf[1023] = 0; + strlcpy(segbuf, *argv, sizeof(segbuf)); if (!NEXT_ARG_OK()) break; NEXT_ARG(); diff --git a/ip/iptunnel.c b/ip/iptunnel.c index 02c3670b..b6da1459 100644 --- a/ip/iptunnel.c +++ b/ip/iptunnel.c @@ -17,6 +17,7 @@ #include <net/if_arp.h> #include <linux/ip.h> #include <linux/if_tunnel.h> +#include <linux/ip6_tunnel.h> #include "rt_names.h" #include "utils.h" @@ -172,11 +173,20 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p) if (get_ifname(p->name, *argv)) invarg("\"name\" not a valid ifname", *argv); if (cmd == SIOCCHGTUNNEL && count == 0) { - struct ip_tunnel_parm old_p = {}; + union { + struct ip_tunnel_parm ip_tnl; + struct ip6_tnl_parm2 ip6_tnl; + } old_p = {}; if (tnl_get_ioctl(*argv, &old_p)) return -1; - *p = old_p; + + if (old_p.ip_tnl.iph.version != 4 || + old_p.ip_tnl.iph.ihl != 5) + invarg("\"name\" is not an ip tunnel", + *argv); + + *p = old_p.ip_tnl; } } count++; diff --git a/ip/xfrm_policy.c b/ip/xfrm_policy.c index be2235ca..8687ced3 100644 --- a/ip/xfrm_policy.c +++ b/ip/xfrm_policy.c @@ -1141,7 +1141,8 @@ static int xfrm_str_to_policy(char *name, uint8_t *policy) if (strcmp(name, "block") == 0) { *policy = XFRM_USERPOLICY_BLOCK; return 0; - } else if (strcmp(name, "accept") == 0) { + } else if (strcmp(name, "accept") == 0 || + strcmp(name, "allow") == 0) { *policy = XFRM_USERPOLICY_ACCEPT; return 0; } diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index bec1b78b..8cec5fe3 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -49,7 +49,7 @@ ip-link \- network device configuration .IR BYTES " ]" .br .RB "[ " netns " {" -.IR PID " | " NETNSNAME " } ]" +.IR PID " | " NETNSNAME " | " NETNSFILE " } ]" .br .BI type " TYPE" .RI "[ " ARGS " ]" @@ -118,7 +118,7 @@ ip-link \- network device configuration .IR MTU " ]" .br .RB "[ " netns " {" -.IR PID " | " NETNSNAME " } ]" +.IR PID " | " NETNSNAME " | " NETNSFILE " } ]" .br .RB "[ " link-netnsid .IR ID " ]" @@ -465,8 +465,15 @@ specifies the desired index of the new virtual device. The link creation fails, if the index is busy. .TP -.BI netns " { PID | NAME } " -specifies the desired network namespace to create interface in. +.B netns +.RI "{ " PID " | " NETNSNAME " | " NETNSFILE " }" +.br +create the device in the network namespace associated with process +.IR "PID " or +the name +.IR "NETNSNAME " or +the file +.IR "NETNSFILE". .TP VLAN Type Support @@ -2196,10 +2203,15 @@ the interface is .IR "POINTOPOINT" . .TP -.BI netns " NETNSNAME " \fR| " PID" -move the device to the network namespace associated with name +.B netns +.RI "{ " PID " | " NETNSNAME " | " NETNSFILE " }" +.br +move the device to the network namespace associated with process +.IR "PID " or +the name .IR "NETNSNAME " or -.RI process " PID". +the file +.IR "NETNSFILE". Some devices are not allowed to change network namespace: loopback, bridge, wireless. These are network namespace local devices. In such case diff --git a/man/man8/tc-mqprio.8 b/man/man8/tc-mqprio.8 index 4b9e942e..16ecb9a1 100644 --- a/man/man8/tc-mqprio.8 +++ b/man/man8/tc-mqprio.8 @@ -98,6 +98,7 @@ belong to an application. See kernel and cgroup documentation for details. .TP num_tc Number of traffic classes to use. Up to 16 classes supported. +You cannot have more classes than queues .TP map @@ -119,6 +120,8 @@ Set to to support hardware offload. Set to .B 0 to configure user specified values in software only. +The default value of this parameter is +.B 1 .TP mode @@ -146,5 +149,98 @@ max_rate Maximum value of bandwidth rate limit for a traffic class. +.SH EXAMPLE + +The following example shows how to attach priorities to 4 traffic classes ("num_tc 4"), +and then how to pair these traffic classes with 4 hardware queues with mqprio, +with hardware coordination ("hw 1", or does not specified, because 1 is the default value). +Traffic class 0 (tc0) is mapped to hardware queue 0 (q0), tc1 is mapped to q1, +tc2 is mapped to q2, and tc3 is mapped q3. + +.EX +# tc qdisc add dev eth0 root mqprio \ + num_tc 4 \ + map 0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 \ + queues 1@0 1@1 1@2 1@3 \ + hw 1 +.EE + +The next example shows how to attach priorities to 3 traffic classes ("num_tc 3"), +and how to pair these traffic classes with 4 queues, +without hardware coordination ("hw 0"). +Traffic class 0 (tc0) is mapped to hardware queue 0 (q0), tc1 is mapped to q1, +tc2 and is mapped to q2 and q3, where the queue selection between these +two queues is somewhat randomly decided. + +.EX +# tc qdisc add dev eth0 root mqprio \ + num_tc 3 \ + map 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 \ + queues 1@0 1@1 2@2 \ + hw 0 +.EE + + +In both cases from above the priority values from 0 to 3 (prio0-3) are +mapped to tc0, prio4-7 are mapped to tc1, and the +prio8-11 are mapped to tc2 ("map" attribute). The last four priority values +(prio12-15) are mapped in different ways in the two examples. +They are mapped to tc3 in the first example and mapped to tc2 in the second example. +The values of these two examples are the following: + + ┌────┬────┬───────┐ ┌────┬────┬────────┐ + │Prio│ tc │ queue │ │Prio│ tc │ queue │ + ├────┼────┼───────┤ ├────┼────┼────────┤ + │ 0 │ 0 │ 0 │ │ 0 │ 0 │ 0 │ + │ 1 │ 0 │ 0 │ │ 1 │ 0 │ 0 │ + │ 2 │ 0 │ 0 │ │ 2 │ 0 │ 0 │ + │ 3 │ 0 │ 0 │ │ 3 │ 0 │ 0 │ + │ 4 │ 1 │ 1 │ │ 4 │ 1 │ 1 │ + │ 5 │ 1 │ 1 │ │ 5 │ 1 │ 1 │ + │ 6 │ 1 │ 1 │ │ 6 │ 1 │ 1 │ + │ 7 │ 1 │ 1 │ │ 7 │ 1 │ 1 │ + │ 8 │ 2 │ 2 │ │ 8 │ 2 │ 2 or 3 │ + │ 9 │ 2 │ 2 │ │ 9 │ 2 │ 2 or 3 │ + │ 10 │ 2 │ 2 │ │ 10 │ 2 │ 2 or 3 │ + │ 11 │ 2 │ 2 │ │ 11 │ 2 │ 2 or 3 │ + │ 12 │ 3 │ 3 │ │ 12 │ 2 │ 2 or 3 │ + │ 13 │ 3 │ 3 │ │ 13 │ 2 │ 2 or 3 │ + │ 14 │ 3 │ 3 │ │ 14 │ 2 │ 2 or 3 │ + │ 15 │ 3 │ 3 │ │ 15 │ 2 │ 2 or 3 │ + └────┴────┴───────┘ └────┴────┴────────┘ + example1 example2 + + +Another example of queue mapping is the following. +There are 5 traffic classes, and there are 8 hardware queues. + +.EX +# tc qdisc add dev eth0 root mqprio \ + num_tc 5 \ + map 0 0 0 1 1 1 1 2 2 3 3 4 4 4 4 4 \ + queues 1@0 2@1 1@3 1@4 3@5 +.EE + +The value mapping is the following for this example: + + ┌───────┐ + tc0────┤Queue 0│◄────1@0 + ├───────┤ + ┌─┤Queue 1│◄────2@1 + tc1──┤ ├───────┤ + └─┤Queue 2│ + ├───────┤ + tc2────┤Queue 3│◄────1@3 + ├───────┤ + tc3────┤Queue 4│◄────1@4 + ├───────┤ + ┌─┤Queue 5│◄────3@5 + │ ├───────┤ + tc4──┼─┤Queue 6│ + │ ├───────┤ + └─┤Queue 7│ + └───────┘ + + .SH AUTHORS John Fastabend, <john.r.fastabend@intel.com> diff --git a/man/man8/tc-netem.8 b/man/man8/tc-netem.8 index 21775854..51cf081e 100644 --- a/man/man8/tc-netem.8 +++ b/man/man8/tc-netem.8 @@ -1,6 +1,6 @@ .TH NETEM 8 "25 November 2011" "iproute2" "Linux" .SH NAME -NetEm \- Network Emulator +netem \- Network Emulator .SH SYNOPSIS .B "tc qdisc ... dev" .IR DEVICE " ] " @@ -20,7 +20,7 @@ NetEm \- Network Emulator .IR TIME " [ " JITTER " [ " CORRELATION " ]]]" .br [ -.BR distribution " { "uniform " | " normal " | " pareto " | " paretonormal " } ]" +.BR distribution " { "uniform " | " normal " | " pareto " | " paretonormal " } ]" .IR LOSS " := " .BR loss " { " @@ -64,135 +64,175 @@ NetEm \- Network Emulator .BR bytes .IR BYTES " ]" - .SH DESCRIPTION -NetEm is an enhancement of the Linux traffic control facilities -that allow one to add delay, packet loss, duplication and more other -characteristics to packets outgoing from a selected network -interface. NetEm is built using the existing Quality Of Service (QOS) -and Differentiated Services (diffserv) facilities in the Linux -kernel. - -.SH netem OPTIONS -netem has the following options: - -.SS limit packets - -maximum number of packets the qdisc may hold queued at a time. - -.SS delay -adds the chosen delay to the packets outgoing to chosen network interface. The -optional parameters allows one to introduce a delay variation and a correlation. -Delay and jitter values are expressed in ms while correlation is percentage. - -.SS distribution -allow the user to choose the delay distribution. If not specified, the default -distribution is Normal. Additional parameters allow one to consider situations in -which network has variable delays depending on traffic flows concurring on the -same path, that causes several delay peaks and a tail. - -.SS loss random -adds an independent loss probability to the packets outgoing from the chosen -network interface. It is also possible to add a correlation, but this option -is now deprecated due to the noticed bad behavior. - -.SS loss state -adds packet losses according to the 4-state Markov using the transition -probabilities as input parameters. The parameter p13 is mandatory and if used -alone corresponds to the Bernoulli model. The optional parameters allows one to -extend the model to 2-state (p31), 3-state (p23 and p32) and 4-state (p14). -State 1 corresponds to good reception, State 4 to independent losses, State 3 -to burst losses and State 2 to good reception within a burst. - -.SS loss gemodel -adds packet losses according to the Gilbert-Elliot loss model or its special -cases (Gilbert, Simple Gilbert and Bernoulli). To use the Bernoulli model, the -only needed parameter is p while the others will be set to the default -values r=1-p, 1-h=1 and 1-k=0. The parameters needed for the Simple Gilbert -model are two (p and r), while three parameters (p, r, 1-h) are needed for the -Gilbert model and four (p, r, 1-h and 1-k) are needed for the Gilbert-Elliot -model. As known, p and r are the transition probabilities between the bad and -the good states, 1-h is the loss probability in the bad state and 1-k is the -loss probability in the good state. - -.SS ecn -can be used optionally to mark packets instead of dropping them. A loss model -has to be used for this to be enabled. - -.SS corrupt -allows the emulation of random noise introducing an error in a random position -for a chosen percent of packets. It is also possible to add a correlation -through the proper parameter. - -.SS duplicate -using this option the chosen percent of packets is duplicated before queuing -them. It is also possible to add a correlation through the proper parameter. - -.SS reorder -to use reordering, a delay option must be specified. There are two ways to use -this option (assuming 'delay 10ms' in the options list). - -.B "reorder " -.I 25% 50% -.B "gap" -.I 5 +The +.B netem +queue discipline provides Network Emulation functionality +for testing protocols by emulating the properties of real-world networks. + +The queue discipline provides one or more network impairments to packets +such as: delay, loss, duplication, and packet corruption. + +.SH OPTIONS +.TP +.BI limit " COUNT" +Limits the maximum number of packets the qdisc may hold when doing delay. + +.TP +.B delay +.IR TIME " [ " JITTER " [ " CORRELATION " ]]]" .br -in this first example, the first 4 (gap - 1) packets are delayed by 10ms and -subsequent packets are sent immediately with a probability of 0.25 (with -correlation of 50% ) or delayed with a probability of 0.75. After a packet is -reordered, the process restarts i.e. the next 4 packets are delayed and -subsequent packets are sent immediately or delayed based on reordering -probability. To cause a repeatable pattern where every 5th packet is reordered -reliably, a reorder probability of 100% can be used. +Delays the packets before sending. +The optional parameters allow introducing a delay variation and a correlation. +Delay and jitter values are expressed in milliseconds; +Correlation is set by specifying a percent of how much the previous delay +will impact the current random value. + +.TP +.BI distribution " TYPE" +Specifies a pattern for delay distribution. +.RS +.TP +.B uniform +Use an equally weighted distribution of packet delays. +.TP +.B normal +Use a Gaussian distribution of delays. +Sometimes called a Bell Curve. +.TP +.B pareto +Use a Pareto distribution of packet delays. +This is useful to emulate long-tail distributions. +.TP +.B paretonormal +This is a mix of +.B pareto +and +.B normal +distribution which has properties of both Bell curve and long tail. +.RE -.B reorder -.I 25% 50% +.TP +.BI loss " MODEL" +Drop packets based on a loss model. +.I MODEL +can be one of +.RS +.TP +.BI random " PERCENT" +Each packet loss is independent. +.TP +.BI state " P13 [ P31 [ P32 [ P23 P14 ]]]" +Use a 4-state Markov chain to describe packet loss. .br -in this second example 25% of packets are sent immediately (with correlation of -50%) while the others are delayed by 10 ms. - -.SS rate -delay packets based on packet size and is a replacement for -.IR TBF . -Rate can be -specified in common units (e.g. 100kbit). Optional +.I P13 +is the packet loss. +Optional parameters extend the model to 2-state +.IR P31 , +3-state +.IR P23 , +.I P32 +and 4-state +.IR P14 . + +The Markov chain states are: +.RS +.TP +.B 1 +good packet reception (no loss). +.TP +.B 2 +good reception within a burst. +.TP +.B 3 +burst losses. +.TP +.B 4 +independent losses. +.RE + +.TP +.BI gemodel " PERCENT [ R [ 1-H [ 1-K ]]]" +Use a Gilbert-Elliot (burst loss) model +based on: +.RS +.TP +.I PERCENT +probability of starting bad (lossy) state. +.TP +.I R +probability of exiting bad state. +.TP +.I "1-H" +loss probability in bad state. +.TP +.I "1-K" +loss probability in good state. +.RE +.RE + +.TP +.B ecn +Use +Explicit Congestion Notification (ECN) +to mark packets instead of dropping them. +A loss model has to be used for this to be enabled. +.TP +.BI corrupt " PERCENT" +modifies the contents of the packet at a random position +based on +.IR PERCENT . +.TP +.BI duplicate " PERCENT" +creates a copy of the packet before queuing. +.TP +.BI reorder " PERCENT" +modifies the order of packet in the queue. +.TP +.BI gap " DISTANCE" +sends some packets immediately. +The first packets +.I "(DISTANCE - 1)" +are delayed and the next packet is sent immediately. + +.TP +.BI rate " RATE [ PACKETOVERHEAD [ CELLSIZE [ CELLOVERHEAD ]]]" +Delays packets based on packet size to emulate a fixed link speed. +Optional parameters: +.RS +.TP .I PACKETOVERHEAD -(in bytes) specify an per packet overhead and can be negative. A positive value can be -used to simulate additional link layer headers. A negative value can be used to -artificial strip the Ethernet header (e.g. -14) and/or simulate a link layer -header compression scheme. The third parameter - an unsigned value - specify -the cellsize. Cellsize can be used to simulate link layer schemes. ATM for -example has an payload cellsize of 48 bytes and 5 byte per cell header. If a -packet is 50 byte then ATM must use two cells: 2 * 48 bytes payload including 2 -* 5 byte header, thus consume 106 byte on the wire. The last optional value -.I CELLOVERHEAD -can be used to specify per cell overhead - for our ATM example 5. +Specify a per packet overhead in bytes. +Used to simulate additional link layer headers. +A negative value can be used to simlate when the Ethernet header is +stripped (e.g. -14) or header compression is used. +.TP +.I CELLSIZE +simulate link layer schemes like ATM. +.TP .I CELLOVERHEAD -can be negative, but use negative values with caution. - -Note that rate throttling is limited by several factors: the kernel clock -granularity avoid a perfect shaping at a specific level. This will show up in -an artificial packet compression (bursts). Another influence factor are network -adapter buffers which can also add artificial delay. - -.SS slot -defer delivering accumulated packets to within a slot. Each available slot can be -configured with a minimum delay to acquire, and an optional maximum delay. -Alternatively it can be configured with the distribution similar to -.BR distribution -for -.BR delay -option. Slot delays can be specified in nanoseconds, microseconds, milliseconds or seconds -(e.g. 800us). Values for the optional parameters -.I BYTES -will limit the number of bytes delivered per slot, and/or -.I PACKETS -will limit the number of packets delivered per slot. +specify per cell overhead. +.RE + +Rate throttling impacted by several factors including the kernel clock +granularity. This will show up in an artificial packet compression (bursts). + +.TP +.BI slot " MIN_DELAY [ MAX_DELAY ]" +allows emulating slotted networks. +Defer delivering accumulated packets to within a slot. +Each available slot is configured with a minimum delay to acquire, +and an optional maximum delay. +.TP +.B slot distribution +allows configuring based on distribution similar to +.B distribution +option for packet delays. These slot options can provide a crude approximation of bursty MACs such as DOCSIS, WiFi, and LTE. -Note that slotting is limited by several factors: the kernel clock granularity, +Slot emulation is limited by several factors: the kernel clock granularity, as with a rate, and attempts to deliver many packets within a slot will be smeared by the timer resolution, and by the underlying native bandwidth also. @@ -201,36 +241,156 @@ where either the rate, or the slot limits on bytes or packets per slot, govern the actual delivered rate. .SH LIMITATIONS -The main known limitation of Netem are related to timer granularity, since -Linux is not a real-time operating system. +Netem is limited by the timer granularity in the kernel. +Rate and delay maybe impacted by clock interrupts. +.PP +Mixing forms of reordering may lead to unexpected results. +For any method of reordering to work, some delay is necessary. +If the delay is less than the inter-packet arrival time then +no reordering will be seen. +Due to mechanisms like TSQ (TCP Small Queues), for TCP performance test +results to be realistic netem must be placed on the ingress of the +receiver host. +.PP +Combining netem with other qdisc is possible but may not always +work because netem use skb control block to set delays. .SH EXAMPLES .PP -tc qdisc add dev eth0 root netem rate 5kbit 20 100 5 +.EX +# tc qdisc add dev eth0 root netem delay 100ms +.EE +.RS 4 +Add fixed amount of delay to all packets going out on device eth0. +Each packet will have added delay of 100ms ± 10ms. +.RE +.PP +.EX +# tc qdisc change dev eth0 root netem delay 100ms 10ms 25% +.EE +.RS 4 +This causes the added delay of 100ms ± 10ms +and the next packet delay value will be biased by 25% on the most recent delay. +This isn't a true statistical correlation, but an approximation. +.RE +.PP +.EX +# tc qdisc change dev eth0 root netem delay 100ms 20ms distribution normal +.EE +.RS 4 +This delays packets according to a normal distribution (Bell curve) +over a range of 100ms ± 20ms. +.RE +.PP +.EX +# tc qdisc change dev eth0 root netem loss 0.1% +.EE .RS 4 -delay all outgoing packets on device eth0 with a rate of 5kbit, a per packet -overhead of 20 byte, a cellsize of 100 byte and a per celloverhead of 5 byte: +This causes 1/10th of a percent (i.e 1 out of 1000) packets to be +randomly dropped. + +An optional correlation may also be added. +This causes the random number generator to be less random and can be used to emulate packet burst losses. +.RE +.PP +.EX +# tc qdisc change dev eth0 root netem duplicate 1% +.EE +.RS 4 +This causes one percent of the packets sent on eth0 to be duplicated. +.RE +.PP +.EX +# tc qdisc change dev eth0 root netem loss 0.3% 25% +.EE +.RS 4 +This will cause 0.3% of packets to be lost, +and each successive probability depends is biased by 25% of the previous one. +.RE +.PP +There are two different ways to specify reordering. +The gap method uses a fixed sequence and reorders every Nth packet. +.EX +# tc qdisc change dev eth0 root netem gap 5 delay 10ms +.EE +.RS 4 +This causes every 5th (10th, 15th, …) packet to go to be sent immediately +and every other packet to be delayed by 10ms. +This is predictable and useful for base protocol testing like reassembly. +.RE +.PP +The reorder form uses a percentage of the packets to get misordered. +.EX +# tc qdisc change dev eth0 root netem delay 10ms reorder 25% 50% +.EE +In this example, 25% of packets (with a correlation of 50%) will get sent immediately, others will be delayed by 10ms. +.PP +Packets will also get reordered if jitter is large enough. +.EX +# tc qdisc change dev eth0 root netem delay 100ms 75ms +.EE +.RS 4 +If the first packet gets a random delay of 100ms (100ms base - 0ms jitter) +and the second packet is sent 1ms later and gets a delay of 50ms (100ms base - 50ms jitter); +the second packet will be sent first. +This is because the queue discipline tfifo inside netem, +keeps packets in order by time to send. +.RE +.PP +If you don't want this behavior then replace the internal +queue discipline tfifo with a simple FIFO queue discipline. +.EX +# tc qdisc add dev eth0 root handle 1: netem delay 10ms 100ms +# tc qdisc add dev eth0 parent 1:1 pfifo limit 1000 +.EE + +.PP +Example of using rate control and cells size. +.EX +# tc qdisc add dev eth0 root netem rate 5kbit 20 100 5 +.EE +.RS 4 +Delay all outgoing packets on device eth0 with a rate of 5kbit, a per packet +overhead of 20 byte, a cellsize of 100 byte and a per celloverhead of 5 bytes. .RE +.PP +It is possible to selectively apply impairment using traffic classification. +.EX +# tc qdisc add dev eth0 root handle 1: prio +# tc qdisc add dev eth0 parent 1:3 handle 30: \ + tbf rate 20kbit buffer 1600 limit 3000 +# tc qdisc add dev eth0 parent 30:1 handle 31: \ + netem delay 200ms 10ms distribution normal +# tc filter add dev eth0 protocol ip parent 1:0 prio 3 u32 \ + match ip dst 65.172.181.4/32 flowid 1:3 +.EE +.RS 4 +This eample uses a priority queueing discipline; +a TBF is added to do rate control; and a simple netem delay. +A filter classifies all packets going to 65.172.181.4 as being priority 3. +.PP .SH SOURCES .IP " 1. " 4 Hemminger S. , "Network Emulation with NetEm", Open Source Development Lab, April 2005 -(http://devresources.linux-foundation.org/shemminger/netem/LCA2005_paper.pdf) +.UR http://devresources.linux-foundation.org/shemminger/netem/LCA2005_paper.pdf +.UE .IP " 2. " 4 -Netem page from Linux foundation, (https://wiki.linuxfoundation.org/networking/netem) - -.IP " 3. " 4 Salsano S., Ludovici F., Ordine A., "Definition of a general and intuitive loss model for packet networks and its implementation in the Netem module in the -Linux kernel", available at http://netgroup.uniroma2.it/NetemCLG +Linux kernel", available at +.UR http://netgroup.uniroma2.it/NetemCLG +.UE .SH SEE ALSO -.BR tc (8), -.BR tc-tbf (8) +.BR tc (8) .SH AUTHOR -Netem was written by Stephen Hemminger at Linux foundation and is based on NISTnet. -This manpage was created by Fabio Ludovici <fabio.ludovici at yahoo dot it> and -Hagen Paul Pfeifer <hagen@jauu.net> +Netem was written by Stephen Hemminger at Linux foundation and was +inspired by NISTnet. + +Original manpage was created by Fabio Ludovici +<fabio.ludovici at yahoo dot it> and Hagen Paul Pfeifer +<hagen@jauu.net>. @@ -828,12 +828,12 @@ static void print_ipv4(FILE *f, const struct tc_u32_key *key) print_nl(); print_uint(PRINT_ANY, "ip_ihl", " match IP ihl %u", ntohl(key->val) >> 24); - return; + break; case 0x00ff0000: print_nl(); print_0xhex(PRINT_ANY, "ip_dsfield", " match IP dsfield %#x", ntohl(key->val) >> 16); - return; + break; } break; case 8: @@ -841,7 +841,6 @@ static void print_ipv4(FILE *f, const struct tc_u32_key *key) print_nl(); print_int(PRINT_ANY, "ip_protocol", " match IP protocol %d", ntohl(key->val) >> 16); - return; } break; case 12: @@ -864,7 +863,6 @@ static void print_ipv4(FILE *f, const struct tc_u32_key *key) print_string(PRINT_ANY, "address", "%s", addr); print_int(PRINT_ANY, "prefixlen", "/%d", bits); close_json_object(); - return; } } break; @@ -874,19 +872,19 @@ static void print_ipv4(FILE *f, const struct tc_u32_key *key) case 0x0000ffff: print_uint(PRINT_ANY, "dport", "match dport %u", ntohl(key->val) & 0xffff); - return; + break; case 0xffff0000: print_nl(); print_uint(PRINT_ANY, "sport", " match sport %u", ntohl(key->val) >> 16); - return; + break; case 0xffffffff: print_nl(); print_uint(PRINT_ANY, "dport", " match dport %u, ", ntohl(key->val) & 0xffff); print_uint(PRINT_ANY, "sport", "match sport %u", ntohl(key->val) >> 16); - return; + break; } /* XXX: Default print_raw */ } @@ -905,12 +903,12 @@ static void print_ipv6(FILE *f, const struct tc_u32_key *key) print_nl(); print_uint(PRINT_ANY, "ip_ihl", " match IP ihl %u", ntohl(key->val) >> 24); - return; + break; case 0x00ff0000: print_nl(); print_0xhex(PRINT_ANY, "ip_dsfield", " match IP dsfield %#x", ntohl(key->val) >> 16); - return; + break; } break; case 8: @@ -918,7 +916,6 @@ static void print_ipv6(FILE *f, const struct tc_u32_key *key) print_nl(); print_int(PRINT_ANY, "ip_protocol", " match IP protocol %d", ntohl(key->val) >> 16); - return; } break; case 12: @@ -941,7 +938,6 @@ static void print_ipv6(FILE *f, const struct tc_u32_key *key) print_string(PRINT_ANY, "address", "%s", addr); print_int(PRINT_ANY, "prefixlen", "/%d", bits); close_json_object(); - return; } } break; @@ -952,11 +948,11 @@ static void print_ipv6(FILE *f, const struct tc_u32_key *key) print_nl(); print_uint(PRINT_ANY, "sport", " match sport %u", ntohl(key->val) & 0xffff); - return; + break; case 0xffff0000: print_uint(PRINT_ANY, "dport", "match dport %u", ntohl(key->val) >> 16); - return; + break; case 0xffffffff: print_nl(); print_uint(PRINT_ANY, "sport", " match sport %u, ", @@ -964,7 +960,7 @@ static void print_ipv6(FILE *f, const struct tc_u32_key *key) print_uint(PRINT_ANY, "dport", "match dport %u", ntohl(key->val) >> 16); - return; + break; } /* XXX: Default print_raw */ } @@ -1273,7 +1269,7 @@ static int u32_print_opt(struct filter_util *qu, FILE *f, struct rtattr *opt, if (tb[TCA_U32_CLASSID]) { __u32 classid = rta_getattr_u32(tb[TCA_U32_CLASSID]); SPRINT_BUF(b1); - if (sel && (sel->flags & TC_U32_TERMINAL)) + if (!sel || !(sel->flags & TC_U32_TERMINAL)) print_string(PRINT_FP, NULL, "*", NULL); print_string(PRINT_ANY, "flowid", "flowid %s ", diff --git a/tc/m_action.c b/tc/m_action.c index 0400132c..a446cabd 100644 --- a/tc/m_action.c +++ b/tc/m_action.c @@ -586,7 +586,13 @@ int print_action(struct nlmsghdr *n, void *arg) open_json_object(NULL); tc_dump_action(fp, tb[TCA_ACT_TAB], tot_acts ? *tot_acts:0, false); - print_ext_msg(tb); + + if (tb[TCA_ROOT_EXT_WARN_MSG]) { + print_string(PRINT_ANY, "warn", "%s", + rta_getattr_str(tb[TCA_ROOT_EXT_WARN_MSG])); + print_nl(); + } + close_json_object(); return 0; diff --git a/tc/m_csum.c b/tc/m_csum.c index ba1e3e33..f5fe8f55 100644 --- a/tc/m_csum.c +++ b/tc/m_csum.c @@ -94,7 +94,9 @@ parse_csum(struct action_util *a, int *argc_p, while (argc > 0) { if (matches(*argv, "csum") == 0) { NEXT_ARG(); - if (parse_csum_args(&argc, &argv, &sel)) { + if (strcmp(*argv, "index") == 0) { + goto skip_args; + } else if (parse_csum_args(&argc, &argv, &sel)) { fprintf(stderr, "Illegal csum construct (%s)\n", *argv); explain(); @@ -123,6 +125,7 @@ parse_csum(struct action_util *a, int *argc_p, if (argc) { if (matches(*argv, "index") == 0) { +skip_args: NEXT_ARG(); if (get_u32(&sel.index, *argv, 10)) { fprintf(stderr, "Illegal \"index\" (%s) <csum>\n", diff --git a/tc/m_mpls.c b/tc/m_mpls.c index 9b39d853..dda46805 100644 --- a/tc/m_mpls.c +++ b/tc/m_mpls.c @@ -91,6 +91,9 @@ static int parse_mpls(struct action_util *a, int *argc_p, char ***argv_p, NEXT_ARG(); + if (strcmp(*argv, "index") == 0) + goto skip_args; + while (argc > 0) { if (matches(*argv, "pop") == 0) { if (check_double_action(action, *argv)) @@ -164,6 +167,7 @@ static int parse_mpls(struct action_util *a, int *argc_p, char ***argv_p, if (argc) { if (matches(*argv, "index") == 0) { +skip_args: NEXT_ARG(); if (get_u32(&parm.index, *argv, 10)) invarg("illegal index", *argv); @@ -88,7 +88,9 @@ parse_nat(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, struct while (argc > 0) { if (matches(*argv, "nat") == 0) { NEXT_ARG(); - if (parse_nat_args(&argc, &argv, &sel)) { + if (strcmp(*argv, "index") == 0) { + goto skip_args; + } else if (parse_nat_args(&argc, &argv, &sel)) { fprintf(stderr, "Illegal nat construct (%s)\n", *argv); explain(); @@ -113,6 +115,7 @@ parse_nat(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, struct if (argc) { if (matches(*argv, "index") == 0) { +skip_args: NEXT_ARG(); if (get_u32(&sel.index, *argv, 10)) { fprintf(stderr, "Nat: Illegal \"index\"\n"); diff --git a/tc/tc_class.c b/tc/tc_class.c index c1feb009..096fa2ec 100644 --- a/tc/tc_class.c +++ b/tc/tc_class.c @@ -356,7 +356,7 @@ int print_class(struct nlmsghdr *n, void *arg) print_string(PRINT_ANY, "parent", "parent %s ", abuf); } if (t->tcm_info) - print_0xhex(PRINT_ANY, "leaf", "leaf %x", t->tcm_info>>16); + print_0xhex(PRINT_ANY, "leaf", "leaf %x: ", t->tcm_info>>16); q = get_qdisc_kind(RTA_DATA(tb[TCA_KIND])); if (tb[TCA_OPTIONS]) { diff --git a/tc/tc_util.c b/tc/tc_util.c index d9dd5a81..0714134e 100644 --- a/tc/tc_util.c +++ b/tc/tc_util.c @@ -471,7 +471,7 @@ static int parse_action_control_slash_spaces(int *argc_p, char ***argv_p, result_p = &result2; NEXT_ARG(); /* fall-through */ - case 0: + case 0: ret = parse_action_control(&argc, &argv, result_p, allow_num); if (ret) |