summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYu Watanabe <watanabe.yu+github@gmail.com>2019-10-30 14:35:51 +0900
committerGitHub <noreply@github.com>2019-10-30 14:35:51 +0900
commita346aa7c38dff023a6783eade2900cb75eff1d25 (patch)
tree0a48471f2b6f732eb6813cb0c1d2219a673afd7e
parentafa1a54eb5cb1a375fe4a305d41b1a2d53fdab26 (diff)
parentef3c8a92b7332fa9e017afcb5e88f22255eb8d1d (diff)
downloadsystemd-a346aa7c38dff023a6783eade2900cb75eff1d25.tar.gz
Merge pull request #13747 from ssahani/tc-qdisc
network: introduce Traffic Control
-rw-r--r--man/systemd.network.xml50
-rw-r--r--src/basic/linux/pkt_sched.h1184
-rw-r--r--src/libsystemd/sd-netlink/netlink-types.c18
-rw-r--r--src/libsystemd/sd-netlink/netlink-util.h4
-rw-r--r--src/libsystemd/sd-netlink/rtnl-message.c43
-rw-r--r--src/network/meson.build6
-rw-r--r--src/network/networkd-link.c30
-rw-r--r--src/network/networkd-link.h2
-rw-r--r--src/network/networkd-network-gperf.gperf7
-rw-r--r--src/network/networkd-network.c2
-rw-r--r--src/network/networkd-network.h2
-rw-r--r--src/network/tc/netem.c213
-rw-r--r--src/network/tc/netem.h28
-rw-r--r--src/network/tc/qdisc.c203
-rw-r--r--src/network/tc/qdisc.h35
-rw-r--r--src/network/tc/tc-util.c63
-rw-r--r--src/network/tc/tc-util.h8
-rw-r--r--src/systemd/sd-netlink.h4
-rw-r--r--test/fuzz/fuzz-network-parser/directives.network6
-rw-r--r--test/test-network/conf/25-qdisc.network20
-rwxr-xr-xtest/test-network/systemd-networkd-tests.py18
21 files changed, 1942 insertions, 4 deletions
diff --git a/man/systemd.network.xml b/man/systemd.network.xml
index 04cbde391e..6f08edb369 100644
--- a/man/systemd.network.xml
+++ b/man/systemd.network.xml
@@ -2288,6 +2288,56 @@
</refsect1>
<refsect1>
+ <title>[TrafficControlQueueingDiscipline] Section Options</title>
+ <para>The <literal>[TrafficControlQueueingDiscipline]</literal> section manages the Traffic control. It can be used
+ to configure the kernel packet scheduler and simulate packet delay and loss for UDP or TCP applications,
+ or limit the bandwidth usage of a particular service to simulate internet connections.</para>
+
+ <variablelist class='network-directives'>
+ <varlistentry>
+ <term><varname>Parent=</varname></term>
+ <listitem>
+ <para>Specifies the parent Queueing Discipline (qdisc). Takes one of <literal>root</literal>
+ or <literal>clsact</literal>. Defaults to <literal>root</literal>.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>NetworkEmulatorDelaySec=</varname></term>
+ <listitem>
+ <para>Specifies the fixed amount of delay to be added to all packets going out of the
+ interface. Defaults to unset.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>NetworkEmulatorDelayJitterSec=</varname></term>
+ <listitem>
+ <para>Specifies the chosen delay to be added to the packets outgoing to the network
+ interface. Defaults to unset.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>NetworkEmulatorPacketLimit=</varname></term>
+ <listitem>
+ <para>Specifies the maximum number of packets the qdisc may hold queued at a time.
+ An unsigned integer ranges 0 to 4294967294. Defaults to 1000.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>NetworkEmulatorLossRate=</varname></term>
+ <listitem>
+ <para>Specifies an independent loss probability to be added to the packets outgoing from the
+ network interface. Takes a percentage value, suffixed with "%". Defaults to unset.</para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
<title>[BridgeVLAN] Section Options</title>
<para>The <literal>[BridgeVLAN]</literal> section manages the VLAN ID configuration of a bridge port and accepts
the following keys. Specify several <literal>[BridgeVLAN]</literal> sections to configure several VLAN entries.
diff --git a/src/basic/linux/pkt_sched.h b/src/basic/linux/pkt_sched.h
new file mode 100644
index 0000000000..daf605760c
--- /dev/null
+++ b/src/basic/linux/pkt_sched.h
@@ -0,0 +1,1184 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_PKT_SCHED_H
+#define __LINUX_PKT_SCHED_H
+
+#include <linux/const.h>
+#include <linux/types.h>
+
+/* Logical priority bands not depending on specific packet scheduler.
+ Every scheduler will map them to real traffic classes, if it has
+ no more precise mechanism to classify packets.
+
+ These numbers have no special meaning, though their coincidence
+ with obsolete IPv6 values is not occasional :-). New IPv6 drafts
+ preferred full anarchy inspired by diffserv group.
+
+ Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy
+ class, actually, as rule it will be handled with more care than
+ filler or even bulk.
+*/
+
+#define TC_PRIO_BESTEFFORT 0
+#define TC_PRIO_FILLER 1
+#define TC_PRIO_BULK 2
+#define TC_PRIO_INTERACTIVE_BULK 4
+#define TC_PRIO_INTERACTIVE 6
+#define TC_PRIO_CONTROL 7
+
+#define TC_PRIO_MAX 15
+
+/* Generic queue statistics, available for all the elements.
+ Particular schedulers may have also their private records.
+*/
+
+struct tc_stats {
+ __u64 bytes; /* Number of enqueued bytes */
+ __u32 packets; /* Number of enqueued packets */
+ __u32 drops; /* Packets dropped because of lack of resources */
+ __u32 overlimits; /* Number of throttle events when this
+ * flow goes out of allocated bandwidth */
+ __u32 bps; /* Current flow byte rate */
+ __u32 pps; /* Current flow packet rate */
+ __u32 qlen;
+ __u32 backlog;
+};
+
+struct tc_estimator {
+ signed char interval;
+ unsigned char ewma_log;
+};
+
+/* "Handles"
+ ---------
+
+ All the traffic control objects have 32bit identifiers, or "handles".
+
+ They can be considered as opaque numbers from user API viewpoint,
+ but actually they always consist of two fields: major and
+ minor numbers, which are interpreted by kernel specially,
+ that may be used by applications, though not recommended.
+
+ F.e. qdisc handles always have minor number equal to zero,
+ classes (or flows) have major equal to parent qdisc major, and
+ minor uniquely identifying class inside qdisc.
+
+ Macros to manipulate handles:
+*/
+
+#define TC_H_MAJ_MASK (0xFFFF0000U)
+#define TC_H_MIN_MASK (0x0000FFFFU)
+#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK)
+#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK)
+#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK))
+
+#define TC_H_UNSPEC (0U)
+#define TC_H_ROOT (0xFFFFFFFFU)
+#define TC_H_INGRESS (0xFFFFFFF1U)
+#define TC_H_CLSACT TC_H_INGRESS
+
+#define TC_H_MIN_PRIORITY 0xFFE0U
+#define TC_H_MIN_INGRESS 0xFFF2U
+#define TC_H_MIN_EGRESS 0xFFF3U
+
+/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */
+enum tc_link_layer {
+ TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */
+ TC_LINKLAYER_ETHERNET,
+ TC_LINKLAYER_ATM,
+};
+#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */
+
+struct tc_ratespec {
+ unsigned char cell_log;
+ __u8 linklayer; /* lower 4 bits */
+ unsigned short overhead;
+ short cell_align;
+ unsigned short mpu;
+ __u32 rate;
+};
+
+#define TC_RTAB_SIZE 1024
+
+struct tc_sizespec {
+ unsigned char cell_log;
+ unsigned char size_log;
+ short cell_align;
+ int overhead;
+ unsigned int linklayer;
+ unsigned int mpu;
+ unsigned int mtu;
+ unsigned int tsize;
+};
+
+enum {
+ TCA_STAB_UNSPEC,
+ TCA_STAB_BASE,
+ TCA_STAB_DATA,
+ __TCA_STAB_MAX
+};
+
+#define TCA_STAB_MAX (__TCA_STAB_MAX - 1)
+
+/* FIFO section */
+
+struct tc_fifo_qopt {
+ __u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */
+};
+
+/* SKBPRIO section */
+
+/*
+ * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1).
+ * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able
+ * to map one to one the DS field of IPV4 and IPV6 headers.
+ * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY.
+ */
+
+#define SKBPRIO_MAX_PRIORITY 64
+
+struct tc_skbprio_qopt {
+ __u32 limit; /* Queue length in packets. */
+};
+
+/* PRIO section */
+
+#define TCQ_PRIO_BANDS 16
+#define TCQ_MIN_PRIO_BANDS 2
+
+struct tc_prio_qopt {
+ int bands; /* Number of bands */
+ __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */
+};
+
+/* MULTIQ section */
+
+struct tc_multiq_qopt {
+ __u16 bands; /* Number of bands */
+ __u16 max_bands; /* Maximum number of queues */
+};
+
+/* PLUG section */
+
+#define TCQ_PLUG_BUFFER 0
+#define TCQ_PLUG_RELEASE_ONE 1
+#define TCQ_PLUG_RELEASE_INDEFINITE 2
+#define TCQ_PLUG_LIMIT 3
+
+struct tc_plug_qopt {
+ /* TCQ_PLUG_BUFFER: Inset a plug into the queue and
+ * buffer any incoming packets
+ * TCQ_PLUG_RELEASE_ONE: Dequeue packets from queue head
+ * to beginning of the next plug.
+ * TCQ_PLUG_RELEASE_INDEFINITE: Dequeue all packets from queue.
+ * Stop buffering packets until the next TCQ_PLUG_BUFFER
+ * command is received (just act as a pass-thru queue).
+ * TCQ_PLUG_LIMIT: Increase/decrease queue size
+ */
+ int action;
+ __u32 limit;
+};
+
+/* TBF section */
+
+struct tc_tbf_qopt {
+ struct tc_ratespec rate;
+ struct tc_ratespec peakrate;
+ __u32 limit;
+ __u32 buffer;
+ __u32 mtu;
+};
+
+enum {
+ TCA_TBF_UNSPEC,
+ TCA_TBF_PARMS,
+ TCA_TBF_RTAB,
+ TCA_TBF_PTAB,
+ TCA_TBF_RATE64,
+ TCA_TBF_PRATE64,
+ TCA_TBF_BURST,
+ TCA_TBF_PBURST,
+ TCA_TBF_PAD,
+ __TCA_TBF_MAX,
+};
+
+#define TCA_TBF_MAX (__TCA_TBF_MAX - 1)
+
+
+/* TEQL section */
+
+/* TEQL does not require any parameters */
+
+/* SFQ section */
+
+struct tc_sfq_qopt {
+ unsigned quantum; /* Bytes per round allocated to flow */
+ int perturb_period; /* Period of hash perturbation */
+ __u32 limit; /* Maximal packets in queue */
+ unsigned divisor; /* Hash divisor */
+ unsigned flows; /* Maximal number of flows */
+};
+
+struct tc_sfqred_stats {
+ __u32 prob_drop; /* Early drops, below max threshold */
+ __u32 forced_drop; /* Early drops, after max threshold */
+ __u32 prob_mark; /* Marked packets, below max threshold */
+ __u32 forced_mark; /* Marked packets, after max threshold */
+ __u32 prob_mark_head; /* Marked packets, below max threshold */
+ __u32 forced_mark_head;/* Marked packets, after max threshold */
+};
+
+struct tc_sfq_qopt_v1 {
+ struct tc_sfq_qopt v0;
+ unsigned int depth; /* max number of packets per flow */
+ unsigned int headdrop;
+ /* SFQRED parameters */
+ __u32 limit; /* HARD maximal flow queue length (bytes) */
+ __u32 qth_min; /* Min average length threshold (bytes) */
+ __u32 qth_max; /* Max average length threshold (bytes) */
+ unsigned char Wlog; /* log(W) */
+ unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */
+ unsigned char Scell_log; /* cell size for idle damping */
+ unsigned char flags;
+ __u32 max_P; /* probability, high resolution */
+ /* SFQRED stats */
+ struct tc_sfqred_stats stats;
+};
+
+
+struct tc_sfq_xstats {
+ __s32 allot;
+};
+
+/* RED section */
+
+enum {
+ TCA_RED_UNSPEC,
+ TCA_RED_PARMS,
+ TCA_RED_STAB,
+ TCA_RED_MAX_P,
+ __TCA_RED_MAX,
+};
+
+#define TCA_RED_MAX (__TCA_RED_MAX - 1)
+
+struct tc_red_qopt {
+ __u32 limit; /* HARD maximal queue length (bytes) */
+ __u32 qth_min; /* Min average length threshold (bytes) */
+ __u32 qth_max; /* Max average length threshold (bytes) */
+ unsigned char Wlog; /* log(W) */
+ unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */
+ unsigned char Scell_log; /* cell size for idle damping */
+ unsigned char flags;
+#define TC_RED_ECN 1
+#define TC_RED_HARDDROP 2
+#define TC_RED_ADAPTATIVE 4
+};
+
+struct tc_red_xstats {
+ __u32 early; /* Early drops */
+ __u32 pdrop; /* Drops due to queue limits */
+ __u32 other; /* Drops due to drop() calls */
+ __u32 marked; /* Marked packets */
+};
+
+/* GRED section */
+
+#define MAX_DPs 16
+
+enum {
+ TCA_GRED_UNSPEC,
+ TCA_GRED_PARMS,
+ TCA_GRED_STAB,
+ TCA_GRED_DPS,
+ TCA_GRED_MAX_P,
+ TCA_GRED_LIMIT,
+ TCA_GRED_VQ_LIST, /* nested TCA_GRED_VQ_ENTRY */
+ __TCA_GRED_MAX,
+};
+
+#define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
+
+enum {
+ TCA_GRED_VQ_ENTRY_UNSPEC,
+ TCA_GRED_VQ_ENTRY, /* nested TCA_GRED_VQ_* */
+ __TCA_GRED_VQ_ENTRY_MAX,
+};
+#define TCA_GRED_VQ_ENTRY_MAX (__TCA_GRED_VQ_ENTRY_MAX - 1)
+
+enum {
+ TCA_GRED_VQ_UNSPEC,
+ TCA_GRED_VQ_PAD,
+ TCA_GRED_VQ_DP, /* u32 */
+ TCA_GRED_VQ_STAT_BYTES, /* u64 */
+ TCA_GRED_VQ_STAT_PACKETS, /* u32 */
+ TCA_GRED_VQ_STAT_BACKLOG, /* u32 */
+ TCA_GRED_VQ_STAT_PROB_DROP, /* u32 */
+ TCA_GRED_VQ_STAT_PROB_MARK, /* u32 */
+ TCA_GRED_VQ_STAT_FORCED_DROP, /* u32 */
+ TCA_GRED_VQ_STAT_FORCED_MARK, /* u32 */
+ TCA_GRED_VQ_STAT_PDROP, /* u32 */
+ TCA_GRED_VQ_STAT_OTHER, /* u32 */
+ TCA_GRED_VQ_FLAGS, /* u32 */
+ __TCA_GRED_VQ_MAX
+};
+
+#define TCA_GRED_VQ_MAX (__TCA_GRED_VQ_MAX - 1)
+
+struct tc_gred_qopt {
+ __u32 limit; /* HARD maximal queue length (bytes) */
+ __u32 qth_min; /* Min average length threshold (bytes) */
+ __u32 qth_max; /* Max average length threshold (bytes) */
+ __u32 DP; /* up to 2^32 DPs */
+ __u32 backlog;
+ __u32 qave;
+ __u32 forced;
+ __u32 early;
+ __u32 other;
+ __u32 pdrop;
+ __u8 Wlog; /* log(W) */
+ __u8 Plog; /* log(P_max/(qth_max-qth_min)) */
+ __u8 Scell_log; /* cell size for idle damping */
+ __u8 prio; /* prio of this VQ */
+ __u32 packets;
+ __u32 bytesin;
+};
+
+/* gred setup */
+struct tc_gred_sopt {
+ __u32 DPs;
+ __u32 def_DP;
+ __u8 grio;
+ __u8 flags;
+ __u16 pad1;
+};
+
+/* CHOKe section */
+
+enum {
+ TCA_CHOKE_UNSPEC,
+ TCA_CHOKE_PARMS,
+ TCA_CHOKE_STAB,
+ TCA_CHOKE_MAX_P,
+ __TCA_CHOKE_MAX,
+};
+
+#define TCA_CHOKE_MAX (__TCA_CHOKE_MAX - 1)
+
+struct tc_choke_qopt {
+ __u32 limit; /* Hard queue length (packets) */
+ __u32 qth_min; /* Min average threshold (packets) */
+ __u32 qth_max; /* Max average threshold (packets) */
+ unsigned char Wlog; /* log(W) */
+ unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */
+ unsigned char Scell_log; /* cell size for idle damping */
+ unsigned char flags; /* see RED flags */
+};
+
+struct tc_choke_xstats {
+ __u32 early; /* Early drops */
+ __u32 pdrop; /* Drops due to queue limits */
+ __u32 other; /* Drops due to drop() calls */
+ __u32 marked; /* Marked packets */
+ __u32 matched; /* Drops due to flow match */
+};
+
+/* HTB section */
+#define TC_HTB_NUMPRIO 8
+#define TC_HTB_MAXDEPTH 8
+#define TC_HTB_PROTOVER 3 /* the same as HTB and TC's major */
+
+struct tc_htb_opt {
+ struct tc_ratespec rate;
+ struct tc_ratespec ceil;
+ __u32 buffer;
+ __u32 cbuffer;
+ __u32 quantum;
+ __u32 level; /* out only */
+ __u32 prio;
+};
+struct tc_htb_glob {
+ __u32 version; /* to match HTB/TC */
+ __u32 rate2quantum; /* bps->quantum divisor */
+ __u32 defcls; /* default class number */
+ __u32 debug; /* debug flags */
+
+ /* stats */
+ __u32 direct_pkts; /* count of non shaped packets */
+};
+enum {
+ TCA_HTB_UNSPEC,
+ TCA_HTB_PARMS,
+ TCA_HTB_INIT,
+ TCA_HTB_CTAB,
+ TCA_HTB_RTAB,
+ TCA_HTB_DIRECT_QLEN,
+ TCA_HTB_RATE64,
+ TCA_HTB_CEIL64,
+ TCA_HTB_PAD,
+ __TCA_HTB_MAX,
+};
+
+#define TCA_HTB_MAX (__TCA_HTB_MAX - 1)
+
+struct tc_htb_xstats {
+ __u32 lends;
+ __u32 borrows;
+ __u32 giants; /* unused since 'Make HTB scheduler work with TSO.' */
+ __s32 tokens;
+ __s32 ctokens;
+};
+
+/* HFSC section */
+
+struct tc_hfsc_qopt {
+ __u16 defcls; /* default class */
+};
+
+struct tc_service_curve {
+ __u32 m1; /* slope of the first segment in bps */
+ __u32 d; /* x-projection of the first segment in us */
+ __u32 m2; /* slope of the second segment in bps */
+};
+
+struct tc_hfsc_stats {
+ __u64 work; /* total work done */
+ __u64 rtwork; /* work done by real-time criteria */
+ __u32 period; /* current period */
+ __u32 level; /* class level in hierarchy */
+};
+
+enum {
+ TCA_HFSC_UNSPEC,
+ TCA_HFSC_RSC,
+ TCA_HFSC_FSC,
+ TCA_HFSC_USC,
+ __TCA_HFSC_MAX,
+};
+
+#define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1)
+
+
+/* CBQ section */
+
+#define TC_CBQ_MAXPRIO 8
+#define TC_CBQ_MAXLEVEL 8
+#define TC_CBQ_DEF_EWMA 5
+
+struct tc_cbq_lssopt {
+ unsigned char change;
+ unsigned char flags;
+#define TCF_CBQ_LSS_BOUNDED 1
+#define TCF_CBQ_LSS_ISOLATED 2
+ unsigned char ewma_log;
+ unsigned char level;
+#define TCF_CBQ_LSS_FLAGS 1
+#define TCF_CBQ_LSS_EWMA 2
+#define TCF_CBQ_LSS_MAXIDLE 4
+#define TCF_CBQ_LSS_MINIDLE 8
+#define TCF_CBQ_LSS_OFFTIME 0x10
+#define TCF_CBQ_LSS_AVPKT 0x20
+ __u32 maxidle;
+ __u32 minidle;
+ __u32 offtime;
+ __u32 avpkt;
+};
+
+struct tc_cbq_wrropt {
+ unsigned char flags;
+ unsigned char priority;
+ unsigned char cpriority;
+ unsigned char __reserved;
+ __u32 allot;
+ __u32 weight;
+};
+
+struct tc_cbq_ovl {
+ unsigned char strategy;
+#define TC_CBQ_OVL_CLASSIC 0
+#define TC_CBQ_OVL_DELAY 1
+#define TC_CBQ_OVL_LOWPRIO 2
+#define TC_CBQ_OVL_DROP 3
+#define TC_CBQ_OVL_RCLASSIC 4
+ unsigned char priority2;
+ __u16 pad;
+ __u32 penalty;
+};
+
+struct tc_cbq_police {
+ unsigned char police;
+ unsigned char __res1;
+ unsigned short __res2;
+};
+
+struct tc_cbq_fopt {
+ __u32 split;
+ __u32 defmap;
+ __u32 defchange;
+};
+
+struct tc_cbq_xstats {
+ __u32 borrows;
+ __u32 overactions;
+ __s32 avgidle;
+ __s32 undertime;
+};
+
+enum {
+ TCA_CBQ_UNSPEC,
+ TCA_CBQ_LSSOPT,
+ TCA_CBQ_WRROPT,
+ TCA_CBQ_FOPT,
+ TCA_CBQ_OVL_STRATEGY,
+ TCA_CBQ_RATE,
+ TCA_CBQ_RTAB,
+ TCA_CBQ_POLICE,
+ __TCA_CBQ_MAX,
+};
+
+#define TCA_CBQ_MAX (__TCA_CBQ_MAX - 1)
+
+/* dsmark section */
+
+enum {
+ TCA_DSMARK_UNSPEC,
+ TCA_DSMARK_INDICES,
+ TCA_DSMARK_DEFAULT_INDEX,
+ TCA_DSMARK_SET_TC_INDEX,
+ TCA_DSMARK_MASK,
+ TCA_DSMARK_VALUE,
+ __TCA_DSMARK_MAX,
+};
+
+#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1)
+
+/* ATM section */
+
+enum {
+ TCA_ATM_UNSPEC,
+ TCA_ATM_FD, /* file/socket descriptor */
+ TCA_ATM_PTR, /* pointer to descriptor - later */
+ TCA_ATM_HDR, /* LL header */
+ TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */
+ TCA_ATM_ADDR, /* PVC address (for output only) */
+ TCA_ATM_STATE, /* VC state (ATM_VS_*; for output only) */
+ __TCA_ATM_MAX,
+};
+
+#define TCA_ATM_MAX (__TCA_ATM_MAX - 1)
+
+/* Network emulator */
+
+enum {
+ TCA_NETEM_UNSPEC,
+ TCA_NETEM_CORR,
+ TCA_NETEM_DELAY_DIST,
+ TCA_NETEM_REORDER,
+ TCA_NETEM_CORRUPT,
+ TCA_NETEM_LOSS,
+ TCA_NETEM_RATE,
+ TCA_NETEM_ECN,
+ TCA_NETEM_RATE64,
+ TCA_NETEM_PAD,
+ TCA_NETEM_LATENCY64,
+ TCA_NETEM_JITTER64,
+ TCA_NETEM_SLOT,
+ TCA_NETEM_SLOT_DIST,
+ __TCA_NETEM_MAX,
+};
+
+#define TCA_NETEM_MAX (__TCA_NETEM_MAX - 1)
+
+struct tc_netem_qopt {
+ __u32 latency; /* added delay (us) */
+ __u32 limit; /* fifo limit (packets) */
+ __u32 loss; /* random packet loss (0=none ~0=100%) */
+ __u32 gap; /* re-ordering gap (0 for none) */
+ __u32 duplicate; /* random packet dup (0=none ~0=100%) */
+ __u32 jitter; /* random jitter in latency (us) */
+};
+
+struct tc_netem_corr {
+ __u32 delay_corr; /* delay correlation */
+ __u32 loss_corr; /* packet loss correlation */
+ __u32 dup_corr; /* duplicate correlation */
+};
+
+struct tc_netem_reorder {
+ __u32 probability;
+ __u32 correlation;
+};
+
+struct tc_netem_corrupt {
+ __u32 probability;
+ __u32 correlation;
+};
+
+struct tc_netem_rate {
+ __u32 rate; /* byte/s */
+ __s32 packet_overhead;
+ __u32 cell_size;
+ __s32 cell_overhead;
+};
+
+struct tc_netem_slot {
+ __s64 min_delay; /* nsec */
+ __s64 max_delay;
+ __s32 max_packets;
+ __s32 max_bytes;
+ __s64 dist_delay; /* nsec */
+ __s64 dist_jitter; /* nsec */
+};
+
+enum {
+ NETEM_LOSS_UNSPEC,
+ NETEM_LOSS_GI, /* General Intuitive - 4 state model */
+ NETEM_LOSS_GE, /* Gilbert Elliot models */
+ __NETEM_LOSS_MAX
+};
+#define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1)
+
+/* State transition probabilities for 4 state model */
+struct tc_netem_gimodel {
+ __u32 p13;
+ __u32 p31;
+ __u32 p32;
+ __u32 p14;
+ __u32 p23;
+};
+
+/* Gilbert-Elliot models */
+struct tc_netem_gemodel {
+ __u32 p;
+ __u32 r;
+ __u32 h;
+ __u32 k1;
+};
+
+#define NETEM_DIST_SCALE 8192
+#define NETEM_DIST_MAX 16384
+
+/* DRR */
+
+enum {
+ TCA_DRR_UNSPEC,
+ TCA_DRR_QUANTUM,
+ __TCA_DRR_MAX
+};
+
+#define TCA_DRR_MAX (__TCA_DRR_MAX - 1)
+
+struct tc_drr_stats {
+ __u32 deficit;
+};
+
+/* MQPRIO */
+#define TC_QOPT_BITMASK 15
+#define TC_QOPT_MAX_QUEUE 16
+
+enum {
+ TC_MQPRIO_HW_OFFLOAD_NONE, /* no offload requested */
+ TC_MQPRIO_HW_OFFLOAD_TCS, /* offload TCs, no queue counts */
+ __TC_MQPRIO_HW_OFFLOAD_MAX
+};
+
+#define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1)
+
+enum {
+ TC_MQPRIO_MODE_DCB,
+ TC_MQPRIO_MODE_CHANNEL,
+ __TC_MQPRIO_MODE_MAX
+};
+
+#define __TC_MQPRIO_MODE_MAX (__TC_MQPRIO_MODE_MAX - 1)
+
+enum {
+ TC_MQPRIO_SHAPER_DCB,
+ TC_MQPRIO_SHAPER_BW_RATE, /* Add new shapers below */
+ __TC_MQPRIO_SHAPER_MAX
+};
+
+#define __TC_MQPRIO_SHAPER_MAX (__TC_MQPRIO_SHAPER_MAX - 1)
+
+struct tc_mqprio_qopt {
+ __u8 num_tc;
+ __u8 prio_tc_map[TC_QOPT_BITMASK + 1];
+ __u8 hw;
+ __u16 count[TC_QOPT_MAX_QUEUE];
+ __u16 offset[TC_QOPT_MAX_QUEUE];
+};
+
+#define TC_MQPRIO_F_MODE 0x1
+#define TC_MQPRIO_F_SHAPER 0x2
+#define TC_MQPRIO_F_MIN_RATE 0x4
+#define TC_MQPRIO_F_MAX_RATE 0x8
+
+enum {
+ TCA_MQPRIO_UNSPEC,
+ TCA_MQPRIO_MODE,
+ TCA_MQPRIO_SHAPER,
+ TCA_MQPRIO_MIN_RATE64,
+ TCA_MQPRIO_MAX_RATE64,
+ __TCA_MQPRIO_MAX,
+};
+
+#define TCA_MQPRIO_MAX (__TCA_MQPRIO_MAX - 1)
+
+/* SFB */
+
+enum {
+ TCA_SFB_UNSPEC,
+ TCA_SFB_PARMS,
+ __TCA_SFB_MAX,
+};
+
+#define TCA_SFB_MAX (__TCA_SFB_MAX - 1)
+
+/*
+ * Note: increment, decrement are Q0.16 fixed-point values.
+ */
+struct tc_sfb_qopt {
+ __u32 rehash_interval; /* delay between hash move, in ms */
+ __u32 warmup_time; /* double buffering warmup time in ms (warmup_time < rehash_interval) */
+ __u32 max; /* max len of qlen_min */
+ __u32 bin_size; /* maximum queue length per bin */
+ __u32 increment; /* probability increment, (d1 in Blue) */
+ __u32 decrement; /* probability decrement, (d2 in Blue) */
+ __u32 limit; /* max SFB queue length */
+ __u32 penalty_rate; /* inelastic flows are rate limited to 'rate' pps */
+ __u32 penalty_burst;
+};
+
+struct tc_sfb_xstats {
+ __u32 earlydrop;
+ __u32 penaltydrop;
+ __u32 bucketdrop;
+ __u32 queuedrop;
+ __u32 childdrop; /* drops in child qdisc */
+ __u32 marked;
+ __u32 maxqlen;
+ __u32 maxprob;
+ __u32 avgprob;
+};
+
+#define SFB_MAX_PROB 0xFFFF
+
+/* QFQ */
+enum {
+ TCA_QFQ_UNSPEC,
+ TCA_QFQ_WEIGHT,
+ TCA_QFQ_LMAX,
+ __TCA_QFQ_MAX
+};
+
+#define TCA_QFQ_MAX (__TCA_QFQ_MAX - 1)
+
+struct tc_qfq_stats {
+ __u32 weight;
+ __u32 lmax;
+};
+
+/* CODEL */
+
+enum {
+ TCA_CODEL_UNSPEC,
+ TCA_CODEL_TARGET,
+ TCA_CODEL_LIMIT,
+ TCA_CODEL_INTERVAL,
+ TCA_CODEL_ECN,
+ TCA_CODEL_CE_THRESHOLD,
+ __TCA_CODEL_MAX
+};
+
+#define TCA_CODEL_MAX (__TCA_CODEL_MAX - 1)
+
+struct tc_codel_xstats {
+ __u32 maxpacket; /* largest packet we've seen so far */
+ __u32 count; /* how many drops we've done since the last time we
+ * entered dropping state
+ */
+ __u32 lastcount; /* count at entry to dropping state */
+ __u32 ldelay; /* in-queue delay seen by most recently dequeued packet */
+ __s32 drop_next; /* time to drop next packet */
+ __u32 drop_overlimit; /* number of time max qdisc packet limit was hit */
+ __u32 ecn_mark; /* number of packets we ECN marked instead of dropped */
+ __u32 dropping; /* are we in dropping state ? */
+ __u32 ce_mark; /* number of CE marked packets because of ce_threshold */
+};
+
+/* FQ_CODEL */
+
+enum {
+ TCA_FQ_CODEL_UNSPEC,
+ TCA_FQ_CODEL_TARGET,
+ TCA_FQ_CODEL_LIMIT,
+ TCA_FQ_CODEL_INTERVAL,
+ TCA_FQ_CODEL_ECN,
+ TCA_FQ_CODEL_FLOWS,
+ TCA_FQ_CODEL_QUANTUM,
+ TCA_FQ_CODEL_CE_THRESHOLD,
+ TCA_FQ_CODEL_DROP_BATCH_SIZE,
+ TCA_FQ_CODEL_MEMORY_LIMIT,
+ __TCA_FQ_CODEL_MAX
+};
+
+#define TCA_FQ_CODEL_MAX (__TCA_FQ_CODEL_MAX - 1)
+
+enum {
+ TCA_FQ_CODEL_XSTATS_QDISC,
+ TCA_FQ_CODEL_XSTATS_CLASS,
+};
+
+struct tc_fq_codel_qd_stats {
+ __u32 maxpacket; /* largest packet we've seen so far */
+ __u32 drop_overlimit; /* number of time max qdisc
+ * packet limit was hit
+ */
+ __u32 ecn_mark; /* number of packets we ECN marked
+ * instead of being dropped
+ */
+ __u32 new_flow_count; /* number of time packets
+ * created a 'new flow'
+ */
+ __u32 new_flows_len; /* count of flows in new list */
+ __u32 old_flows_len; /* count of flows in old list */
+ __u32 ce_mark; /* packets above ce_threshold */
+ __u32 memory_usage; /* in bytes */
+ __u32 drop_overmemory;
+};
+
+struct tc_fq_codel_cl_stats {
+ __s32 deficit;
+ __u32 ldelay; /* in-queue delay seen by most recently
+ * dequeued packet
+ */
+ __u32 count;
+ __u32 lastcount;
+ __u32 dropping;
+ __s32 drop_next;
+};
+
+struct tc_fq_codel_xstats {
+ __u32 type;
+ union {
+ struct tc_fq_codel_qd_stats qdisc_stats;
+ struct tc_fq_codel_cl_stats class_stats;
+ };
+};
+
+/* FQ */
+
+enum {
+ TCA_FQ_UNSPEC,
+
+ TCA_FQ_PLIMIT, /* limit of total number of packets in queue */
+
+ TCA_FQ_FLOW_PLIMIT, /* limit of packets per flow */
+
+ TCA_FQ_QUANTUM, /* RR quantum */
+
+ TCA_FQ_INITIAL_QUANTUM, /* RR quantum for new flow */
+
+ TCA_FQ_RATE_ENABLE, /* enable/disable rate limiting */
+
+ TCA_FQ_FLOW_DEFAULT_RATE,/* obsolete, do not use */
+
+ TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */
+
+ TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */
+
+ TCA_FQ_FLOW_REFILL_DELAY, /* flow credit refill delay in usec */
+
+ TCA_FQ_ORPHAN_MASK, /* mask applied to orphaned skb hashes */
+
+ TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */
+
+ TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */
+
+ __TCA_FQ_MAX
+};
+
+#define TCA_FQ_MAX (__TCA_FQ_MAX - 1)
+
+struct tc_fq_qd_stats {
+ __u64 gc_flows;
+ __u64 highprio_packets;
+ __u64 tcp_retrans;
+ __u64 throttled;
+ __u64 flows_plimit;
+ __u64 pkts_too_long;
+ __u64 allocation_errors;
+ __s64 time_next_delayed_flow;
+ __u32 flows;
+ __u32 inactive_flows;
+ __u32 throttled_flows;
+ __u32 unthrottle_latency_ns;
+ __u64 ce_mark; /* packets above ce_threshold */
+};
+
+/* Heavy-Hitter Filter */
+
+enum {
+ TCA_HHF_UNSPEC,
+ TCA_HHF_BACKLOG_LIMIT,
+ TCA_HHF_QUANTUM,
+ TCA_HHF_HH_FLOWS_LIMIT,
+ TCA_HHF_RESET_TIMEOUT,
+ TCA_HHF_ADMIT_BYTES,
+ TCA_HHF_EVICT_TIMEOUT,
+ TCA_HHF_NON_HH_WEIGHT,
+ __TCA_HHF_MAX
+};
+
+#define TCA_HHF_MAX (__TCA_HHF_MAX - 1)
+
+struct tc_hhf_xstats {
+ __u32 drop_overlimit; /* number of times max qdisc packet limit
+ * was hit
+ */
+ __u32 hh_overlimit; /* number of times max heavy-hitters was hit */
+ __u32 hh_tot_count; /* number of captured heavy-hitters so far */
+ __u32 hh_cur_count; /* number of current heavy-hitters */
+};
+
+/* PIE */
+enum {
+ TCA_PIE_UNSPEC,
+ TCA_PIE_TARGET,
+ TCA_PIE_LIMIT,
+ TCA_PIE_TUPDATE,
+ TCA_PIE_ALPHA,
+ TCA_PIE_BETA,
+ TCA_PIE_ECN,
+ TCA_PIE_BYTEMODE,
+ __TCA_PIE_MAX
+};
+#define TCA_PIE_MAX (__TCA_PIE_MAX - 1)
+
+struct tc_pie_xstats {
+ __u64 prob; /* current probability */
+ __u32 delay; /* current delay in ms */
+ __u32 avg_dq_rate; /* current average dq_rate in bits/pie_time */
+ __u32 packets_in; /* total number of packets enqueued */
+ __u32 dropped; /* packets dropped due to pie_action */
+ __u32 overlimit; /* dropped due to lack of space in queue */
+ __u32 maxq; /* maximum queue size */
+ __u32 ecn_mark; /* packets marked with ecn*/
+};
+
+/* CBS */
+struct tc_cbs_qopt {
+ __u8 offload;
+ __u8 _pad[3];
+ __s32 hicredit;
+ __s32 locredit;
+ __s32 idleslope;
+ __s32 sendslope;
+};
+
+enum {
+ TCA_CBS_UNSPEC,
+ TCA_CBS_PARMS,
+ __TCA_CBS_MAX,
+};
+
+#define TCA_CBS_MAX (__TCA_CBS_MAX - 1)
+
+
+/* ETF */
+struct tc_etf_qopt {
+ __s32 delta;
+ __s32 clockid;
+ __u32 flags;
+#define TC_ETF_DEADLINE_MODE_ON _BITUL(0)
+#define TC_ETF_OFFLOAD_ON _BITUL(1)
+#define TC_ETF_SKIP_SOCK_CHECK _BITUL(2)
+};
+
+enum {
+ TCA_ETF_UNSPEC,
+ TCA_ETF_PARMS,
+ __TCA_ETF_MAX,
+};
+
+#define TCA_ETF_MAX (__TCA_ETF_MAX - 1)
+
+
+/* CAKE */
+enum {
+ TCA_CAKE_UNSPEC,
+ TCA_CAKE_PAD,
+ TCA_CAKE_BASE_RATE64,
+ TCA_CAKE_DIFFSERV_MODE,
+ TCA_CAKE_ATM,
+ TCA_CAKE_FLOW_MODE,
+ TCA_CAKE_OVERHEAD,
+ TCA_CAKE_RTT,
+ TCA_CAKE_TARGET,
+ TCA_CAKE_AUTORATE,
+ TCA_CAKE_MEMORY,
+ TCA_CAKE_NAT,
+ TCA_CAKE_RAW,
+ TCA_CAKE_WASH,
+ TCA_CAKE_MPU,
+ TCA_CAKE_INGRESS,
+ TCA_CAKE_ACK_FILTER,
+ TCA_CAKE_SPLIT_GSO,
+ TCA_CAKE_FWMARK,
+ __TCA_CAKE_MAX
+};
+#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1)
+
+enum {
+ __TCA_CAKE_STATS_INVALID,
+ TCA_CAKE_STATS_PAD,
+ TCA_CAKE_STATS_CAPACITY_ESTIMATE64,
+ TCA_CAKE_STATS_MEMORY_LIMIT,
+ TCA_CAKE_STATS_MEMORY_USED,
+ TCA_CAKE_STATS_AVG_NETOFF,
+ TCA_CAKE_STATS_MIN_NETLEN,
+ TCA_CAKE_STATS_MAX_NETLEN,
+ TCA_CAKE_STATS_MIN_ADJLEN,
+ TCA_CAKE_STATS_MAX_ADJLEN,
+ TCA_CAKE_STATS_TIN_STATS,
+ TCA_CAKE_STATS_DEFICIT,
+ TCA_CAKE_STATS_COBALT_COUNT,
+ TCA_CAKE_STATS_DROPPING,
+ TCA_CAKE_STATS_DROP_NEXT_US,
+ TCA_CAKE_STATS_P_DROP,
+ TCA_CAKE_STATS_BLUE_TIMER_US,
+ __TCA_CAKE_STATS_MAX
+};
+#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1)
+
+enum {
+ __TCA_CAKE_TIN_STATS_INVALID,
+ TCA_CAKE_TIN_STATS_PAD,
+ TCA_CAKE_TIN_STATS_SENT_PACKETS,
+ TCA_CAKE_TIN_STATS_SENT_BYTES64,
+ TCA_CAKE_TIN_STATS_DROPPED_PACKETS,
+ TCA_CAKE_TIN_STATS_DROPPED_BYTES64,
+ TCA_CAKE_TIN_STATS_ACKS_DROPPED_PACKETS,
+ TCA_CAKE_TIN_STATS_ACKS_DROPPED_BYTES64,
+ TCA_CAKE_TIN_STATS_ECN_MARKED_PACKETS,
+ TCA_CAKE_TIN_STATS_ECN_MARKED_BYTES64,
+ TCA_CAKE_TIN_STATS_BACKLOG_PACKETS,
+ TCA_CAKE_TIN_STATS_BACKLOG_BYTES,
+ TCA_CAKE_TIN_STATS_THRESHOLD_RATE64,
+ TCA_CAKE_TIN_STATS_TARGET_US,
+ TCA_CAKE_TIN_STATS_INTERVAL_US,
+ TCA_CAKE_TIN_STATS_WAY_INDIRECT_HITS,
+ TCA_CAKE_TIN_STATS_WAY_MISSES,
+ TCA_CAKE_TIN_STATS_WAY_COLLISIONS,
+ TCA_CAKE_TIN_STATS_PEAK_DELAY_US,
+ TCA_CAKE_TIN_STATS_AVG_DELAY_US,
+ TCA_CAKE_TIN_STATS_BASE_DELAY_US,
+ TCA_CAKE_TIN_STATS_SPARSE_FLOWS,
+ TCA_CAKE_TIN_STATS_BULK_FLOWS,
+ TCA_CAKE_TIN_STATS_UNRESPONSIVE_FLOWS,
+ TCA_CAKE_TIN_STATS_MAX_SKBLEN,
+ TCA_CAKE_TIN_STATS_FLOW_QUANTUM,
+ __TCA_CAKE_TIN_STATS_MAX
+};
+#define TCA_CAKE_TIN_STATS_MAX (__TCA_CAKE_TIN_STATS_MAX - 1)
+#define TC_CAKE_MAX_TINS (8)
+
+enum {
+ CAKE_FLOW_NONE = 0,
+ CAKE_FLOW_SRC_IP,
+ CAKE_FLOW_DST_IP,
+ CAKE_FLOW_HOSTS, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_DST_IP */
+ CAKE_FLOW_FLOWS,
+ CAKE_FLOW_DUAL_SRC, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_FLOWS */
+ CAKE_FLOW_DUAL_DST, /* = CAKE_FLOW_DST_IP | CAKE_FLOW_FLOWS */
+ CAKE_FLOW_TRIPLE, /* = CAKE_FLOW_HOSTS | CAKE_FLOW_FLOWS */
+ CAKE_FLOW_MAX,
+};
+
+enum {
+ CAKE_DIFFSERV_DIFFSERV3 = 0,
+ CAKE_DIFFSERV_DIFFSERV4,
+ CAKE_DIFFSERV_DIFFSERV8,
+ CAKE_DIFFSERV_BESTEFFORT,
+ CAKE_DIFFSERV_PRECEDENCE,
+ CAKE_DIFFSERV_MAX
+};
+
+enum {
+ CAKE_ACK_NONE = 0,
+ CAKE_ACK_FILTER,
+ CAKE_ACK_AGGRESSIVE,
+ CAKE_ACK_MAX
+};
+
+enum {
+ CAKE_ATM_NONE = 0,
+ CAKE_ATM_ATM,
+ CAKE_ATM_PTM,
+ CAKE_ATM_MAX
+};
+
+
+/* TAPRIO */
+enum {
+ TC_TAPRIO_CMD_SET_GATES = 0x00,
+ TC_TAPRIO_CMD_SET_AND_HOLD = 0x01,
+ TC_TAPRIO_CMD_SET_AND_RELEASE = 0x02,
+};
+
+enum {
+ TCA_TAPRIO_SCHED_ENTRY_UNSPEC,
+ TCA_TAPRIO_SCHED_ENTRY_INDEX, /* u32 */
+ TCA_TAPRIO_SCHED_ENTRY_CMD, /* u8 */
+ TCA_TAPRIO_SCHED_ENTRY_GATE_MASK, /* u32 */
+ TCA_TAPRIO_SCHED_ENTRY_INTERVAL, /* u32 */
+ __TCA_TAPRIO_SCHED_ENTRY_MAX,
+};
+#define TCA_TAPRIO_SCHED_ENTRY_MAX (__TCA_TAPRIO_SCHED_ENTRY_MAX - 1)
+
+/* The format for schedule entry list is:
+ * [TCA_TAPRIO_SCHED_ENTRY_LIST]
+ * [TCA_TAPRIO_SCHED_ENTRY]
+ * [TCA_TAPRIO_SCHED_ENTRY_CMD]
+ * [TCA_TAPRIO_SCHED_ENTRY_GATES]
+ * [TCA_TAPRIO_SCHED_ENTRY_INTERVAL]
+ */
+enum {
+ TCA_TAPRIO_SCHED_UNSPEC,
+ TCA_TAPRIO_SCHED_ENTRY,
+ __TCA_TAPRIO_SCHED_MAX,
+};
+
+#define TCA_TAPRIO_SCHED_MAX (__TCA_TAPRIO_SCHED_MAX - 1)
+
+/* The format for the admin sched (dump only):
+ * [TCA_TAPRIO_SCHED_ADMIN_SCHED]
+ * [TCA_TAPRIO_ATTR_SCHED_BASE_TIME]
+ * [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]
+ * [TCA_TAPRIO_ATTR_SCHED_ENTRY]
+ * [TCA_TAPRIO_ATTR_SCHED_ENTRY_CMD]
+ * [TCA_TAPRIO_ATTR_SCHED_ENTRY_GATES]
+ * [TCA_TAPRIO_ATTR_SCHED_ENTRY_INTERVAL]
+ */
+
+#define TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST BIT(0)
+#define TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD BIT(1)
+
+enum {
+ TCA_TAPRIO_ATTR_UNSPEC,
+ TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */
+ TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST, /* nested of entry */
+ TCA_TAPRIO_ATTR_SCHED_BASE_TIME, /* s64 */
+ TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY, /* single entry */
+ TCA_TAPRIO_ATTR_SCHED_CLOCKID, /* s32 */
+ TCA_TAPRIO_PAD,
+ TCA_TAPRIO_ATTR_ADMIN_SCHED, /* The admin sched, only used in dump */
+ TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME, /* s64 */
+ TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, /* s64 */
+ TCA_TAPRIO_ATTR_FLAGS, /* u32 */
+ TCA_TAPRIO_ATTR_TXTIME_DELAY, /* u32 */
+ __TCA_TAPRIO_ATTR_MAX,
+};
+
+#define TCA_TAPRIO_ATTR_MAX (__TCA_TAPRIO_ATTR_MAX - 1)
+
+#endif
diff --git a/src/libsystemd/sd-netlink/netlink-types.c b/src/libsystemd/sd-netlink/netlink-types.c
index ac85b8b18c..cd6e5c87d5 100644
--- a/src/libsystemd/sd-netlink/netlink-types.c
+++ b/src/libsystemd/sd-netlink/netlink-types.c
@@ -18,9 +18,10 @@
#include <linux/if_link.h>
#include <linux/if_macsec.h>
#include <linux/if_tunnel.h>
-#include <linux/nexthop.h>
#include <linux/l2tp.h>
+#include <linux/nexthop.h>
#include <linux/nl80211.h>
+#include <linux/pkt_sched.h>
#include <linux/veth.h>
#include <linux/wireguard.h>
@@ -733,6 +734,18 @@ static const NLTypeSystem rtnl_nexthop_type_system = {
.types = rtnl_nexthop_types,
};
+static const NLType rtnl_qdisc_types[] = {
+ [TCA_KIND] = { .type = NETLINK_TYPE_STRING },
+ [TCA_OPTIONS] = { .size = sizeof(struct tc_netem_qopt) },
+ [TCA_INGRESS_BLOCK] = { .type = NETLINK_TYPE_U32 },
+ [TCA_EGRESS_BLOCK] = { .type = NETLINK_TYPE_U32 },
+};
+
+static const NLTypeSystem rtnl_qdisc_type_system = {
+ .count = ELEMENTSOF(rtnl_qdisc_types),
+ .types = rtnl_qdisc_types,
+};
+
static const NLType rtnl_types[] = {
[NLMSG_DONE] = { .type = NETLINK_TYPE_NESTED, .type_system = &empty_type_system, .size = 0 },
[NLMSG_ERROR] = { .type = NETLINK_TYPE_NESTED, .type_system = &empty_type_system, .size = sizeof(struct nlmsgerr) },
@@ -758,6 +771,9 @@ static const NLType rtnl_types[] = {
[RTM_NEWNEXTHOP] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_nexthop_type_system, .size = sizeof(struct nhmsg) },
[RTM_DELNEXTHOP] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_nexthop_type_system, .size = sizeof(struct nhmsg) },
[RTM_GETNEXTHOP] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_nexthop_type_system, .size = sizeof(struct nhmsg) },
+ [RTM_NEWQDISC] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_qdisc_type_system, .size = sizeof(struct tcmsg) },
+ [RTM_DELQDISC] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_qdisc_type_system, .size = sizeof(struct tcmsg) },
+ [RTM_GETQDISC] = { .type = NETLINK_TYPE_NESTED, .type_system = &rtnl_qdisc_type_system, .size = sizeof(struct tcmsg) },
};
const NLTypeSystem rtnl_type_system_root = {
diff --git a/src/libsystemd/sd-netlink/netlink-util.h b/src/libsystemd/sd-netlink/netlink-util.h
index 923accb7e3..94410e520a 100644
--- a/src/libsystemd/sd-netlink/netlink-util.h
+++ b/src/libsystemd/sd-netlink/netlink-util.h
@@ -41,6 +41,10 @@ static inline bool rtnl_message_type_is_routing_policy_rule(uint16_t type) {
return IN_SET(type, RTM_NEWRULE, RTM_DELRULE, RTM_GETRULE);
}
+static inline bool rtnl_message_type_is_qdisc(uint16_t type) {
+ return IN_SET(type, RTM_NEWQDISC, RTM_DELQDISC, RTM_GETQDISC);
+}
+
int rtnl_set_link_name(sd_netlink **rtnl, int ifindex, const char *name);
int rtnl_set_link_properties(sd_netlink **rtnl, int ifindex, const char *alias, const struct ether_addr *mac, uint32_t mtu);
diff --git a/src/libsystemd/sd-netlink/rtnl-message.c b/src/libsystemd/sd-netlink/rtnl-message.c
index 429b21b149..194676a6e5 100644
--- a/src/libsystemd/sd-netlink/rtnl-message.c
+++ b/src/libsystemd/sd-netlink/rtnl-message.c
@@ -1033,3 +1033,46 @@ int sd_rtnl_message_routing_policy_rule_get_rtm_src_prefixlen(const sd_netlink_m
return 0;
}
+
+int sd_rtnl_message_new_qdisc(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int tcm_family, int tcm_ifindex) {
+ struct tcmsg *tcm;
+ int r;
+
+ assert_return(rtnl_message_type_is_qdisc(nlmsg_type), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWQDISC)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
+
+ tcm = NLMSG_DATA((*ret)->hdr);
+ tcm->tcm_family = tcm_family;
+ tcm->tcm_ifindex = tcm_ifindex;
+
+ return 0;
+}
+
+int sd_rtnl_message_set_qdisc_parent(sd_netlink_message *m, uint32_t parent) {
+ struct tcmsg *tcm;
+
+ assert_return(rtnl_message_type_is_qdisc(m->hdr->nlmsg_type), -EINVAL);
+
+ tcm = NLMSG_DATA(m->hdr);
+ tcm->tcm_parent = parent;
+
+ return 0;
+}
+
+int sd_rtnl_message_set_qdisc_handle(sd_netlink_message *m, uint32_t handle) {
+ struct tcmsg *tcm;
+
+ assert_return(rtnl_message_type_is_qdisc(m->hdr->nlmsg_type), -EINVAL);
+
+ tcm = NLMSG_DATA(m->hdr);
+ tcm->tcm_handle = handle;
+
+ return 0;
+}
diff --git a/src/network/meson.build b/src/network/meson.build
index fd21008d10..06b6ec64a2 100644
--- a/src/network/meson.build
+++ b/src/network/meson.build
@@ -105,6 +105,12 @@ sources = files('''
networkd-util.h
networkd-wifi.c
networkd-wifi.h
+ tc/netem.c
+ tc/netem.h
+ tc/qdisc.c
+ tc/qdisc.h
+ tc/tc-util.c
+ tc/tc-util.h
'''.split())
systemd_networkd_sources = files('networkd.c')
diff --git a/src/network/networkd-link.c b/src/network/networkd-link.c
index 906267764e..91de2be61f 100644
--- a/src/network/networkd-link.c
+++ b/src/network/networkd-link.c
@@ -43,6 +43,7 @@
#include "tmpfile-util.h"
#include "udev-util.h"
#include "util.h"
+#include "tc/qdisc.h"
#include "virt.h"
uint32_t link_get_vrf_table(Link *link) {
@@ -1094,6 +1095,9 @@ void link_check_ready(Link *link) {
if (!link->routing_policy_rules_configured)
return;
+ if (!link->qdiscs_configured)
+ return;
+
if (link_has_carrier(link) || !link->network->configure_without_carrier) {
if (link_ipv4ll_enabled(link, ADDRESS_FAMILY_IPV4) && !link->ipv4ll_address)
@@ -2580,6 +2584,28 @@ static int link_drop_config(Link *link) {
return 0;
}
+static int link_configure_qdiscs(Link *link) {
+ QDiscs *qdisc;
+ Iterator i;
+ int r;
+
+ link->qdiscs_configured = false;
+ link->qdisc_messages = 0;
+
+ ORDERED_HASHMAP_FOREACH(qdisc, link->network->qdiscs_by_section, i) {
+ r = qdisc_configure(link, qdisc);
+ if (r < 0)
+ return r;
+ }
+
+ if (link->qdisc_messages == 0)
+ link->qdiscs_configured = true;
+ else
+ log_link_debug(link, "Configuring QDiscs");
+
+ return 0;
+}
+
static int link_configure(Link *link) {
int r;
@@ -2587,6 +2613,10 @@ static int link_configure(Link *link) {
assert(link->network);
assert(link->state == LINK_STATE_INITIALIZED);
+ r = link_configure_qdiscs(link);
+ if (r < 0)
+ return r;
+
if (link->iftype == ARPHRD_CAN)
return link_configure_can(link);
diff --git a/src/network/networkd-link.h b/src/network/networkd-link.h
index 1e4510b277..0b62326024 100644
--- a/src/network/networkd-link.h
+++ b/src/network/networkd-link.h
@@ -78,6 +78,7 @@ typedef struct Link {
unsigned nexthop_messages;
unsigned routing_policy_rule_messages;
unsigned routing_policy_rule_remove_messages;
+ unsigned qdisc_messages;
unsigned enslaving;
Set *addresses;
@@ -113,6 +114,7 @@ typedef struct Link {
bool static_routes_ready:1;
bool static_nexthops_configured:1;
bool routing_policy_rules_configured:1;
+ bool qdiscs_configured:1;
bool setting_mtu:1;
LIST_HEAD(Address, pool_addresses);
diff --git a/src/network/networkd-network-gperf.gperf b/src/network/networkd-network-gperf.gperf
index 68199ac45f..1375626e86 100644
--- a/src/network/networkd-network-gperf.gperf
+++ b/src/network/networkd-network-gperf.gperf
@@ -13,6 +13,8 @@ _Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
#include "networkd-ndisc.h"
#include "networkd-network.h"
#include "vlan-util.h"
+#include "tc/qdisc.h"
+#include "tc/netem.h"
%}
struct ConfigPerfItem;
%null_strings
@@ -241,6 +243,11 @@ CAN.BitRate, config_parse_si_size,
CAN.SamplePoint, config_parse_permille, 0, offsetof(Network, can_sample_point)
CAN.RestartSec, config_parse_sec, 0, offsetof(Network, can_restart_us)
CAN.TripleSampling, config_parse_tristate, 0, offsetof(Network, can_triple_sampling)
+TrafficControlQueueingDiscipline.Parent, config_parse_tc_qdiscs_parent, 0, 0
+TrafficControlQueueingDiscipline.NetworkEmulatorDelaySec, config_parse_tc_network_emulator_delay, 0, 0
+TrafficControlQueueingDiscipline.NetworkEmulatorDelayJitterSec, config_parse_tc_network_emulator_delay, 0, 0
+TrafficControlQueueingDiscipline.NetworkEmulatorLossRate, config_parse_tc_network_emulator_loss_rate, 0, 0
+TrafficControlQueueingDiscipline.NetworkEmulatorPacketLimit, config_parse_tc_network_emulator_packet_limit, 0, 0
/* backwards compatibility: do not add new entries to this section */
Network.IPv4LL, config_parse_ipv4ll, 0, offsetof(Network, link_local)
DHCP.ClientIdentifier, config_parse_dhcp_client_identifier, 0, offsetof(Network, dhcp_client_identifier)
diff --git a/src/network/networkd-network.c b/src/network/networkd-network.c
index 8cdcb57306..0956d2a9b7 100644
--- a/src/network/networkd-network.c
+++ b/src/network/networkd-network.c
@@ -471,6 +471,7 @@ int network_load_one(Manager *manager, OrderedHashmap **networks, const char *fi
"IPv6PrefixDelegation\0"
"IPv6Prefix\0"
"IPv6RoutePrefix\0"
+ "TrafficControlQueueingDiscipline\0"
"CAN\0",
config_item_perf_lookup, network_network_gperf_lookup,
CONFIG_PARSE_WARN, network);
@@ -663,6 +664,7 @@ static Network *network_free(Network *network) {
hashmap_free(network->address_labels_by_section);
hashmap_free(network->prefixes_by_section);
hashmap_free(network->rules_by_section);
+ ordered_hashmap_free_with_destructor(network->qdiscs_by_section, qdisc_free);
if (network->manager &&
network->manager->duids_requesting_uuid)
diff --git a/src/network/networkd-network.h b/src/network/networkd-network.h
index 8b4b5d042d..9820d6af29 100644
--- a/src/network/networkd-network.h
+++ b/src/network/networkd-network.h
@@ -28,6 +28,7 @@
#include "networkd-util.h"
#include "ordered-set.h"
#include "resolve-util.h"
+#include "tc/qdisc.h"
typedef enum IPv6PrivacyExtensions {
/* The values map to the kernel's /proc/sys/net/ipv6/conf/xxx/use_tempaddr values */
@@ -265,6 +266,7 @@ struct Network {
Hashmap *prefixes_by_section;
Hashmap *route_prefixes_by_section;
Hashmap *rules_by_section;
+ OrderedHashmap *qdiscs_by_section;
/* All kinds of DNS configuration */
struct in_addr_data *dns;
diff --git a/src/network/tc/netem.c b/src/network/tc/netem.c
new file mode 100644
index 0000000000..e0e3e9a48e
--- /dev/null
+++ b/src/network/tc/netem.c
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: LGPL-2.1+
+ * Copyright © 2019 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+#include <math.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "hashmap.h"
+#include "in-addr-util.h"
+#include "netem.h"
+#include "netlink-util.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "qdisc.h"
+#include "string-util.h"
+#include "tc-util.h"
+#include "util.h"
+
+int network_emulator_new(NetworkEmulator **ret) {
+ NetworkEmulator *ne = NULL;
+
+ ne = new(NetworkEmulator, 1);
+ if (!ne)
+ return -ENOMEM;
+
+ *ne = (NetworkEmulator) {
+ .delay = USEC_INFINITY,
+ .jitter = USEC_INFINITY,
+ };
+
+ *ret = TAKE_PTR(ne);
+
+ return 0;
+}
+
+int network_emulator_fill_message(Link *link, QDiscs *qdisc, sd_netlink_message *req) {
+ struct tc_netem_qopt opt = {
+ .limit = 1000,
+ };
+ int r;
+
+ assert(link);
+ assert(qdisc);
+ assert(req);
+
+ if (qdisc->ne.limit > 0)
+ opt.limit = qdisc->ne.limit;
+
+ if (qdisc->ne.loss > 0)
+ opt.loss = qdisc->ne.loss;
+
+ if (qdisc->ne.delay != USEC_INFINITY) {
+ r = tc_time_to_tick(qdisc->ne.delay, &opt.latency);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to calculate latency in TCA_OPTION: %m");
+ }
+
+ if (qdisc->ne.jitter != USEC_INFINITY) {
+ r = tc_time_to_tick(qdisc->ne.jitter, &opt.jitter);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Failed to calculate jitter in TCA_OPTION: %m");
+ }
+
+ r = sd_netlink_message_append_data(req, TCA_OPTIONS, &opt, sizeof(struct tc_netem_qopt));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_OPTION attribute: %m");
+
+ return 0;
+}
+
+int config_parse_tc_network_emulator_delay(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDiscs *qdisc = NULL;
+ Network *network = data;
+ usec_t u;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(network, filename, section_line, &qdisc);
+ if (r < 0)
+ return r;
+
+ if (isempty(rvalue)) {
+ if (streq(lvalue, "NetworkEmulatorDelaySec"))
+ qdisc->ne.delay = USEC_INFINITY;
+ else if (streq(lvalue, "NetworkEmulatorDelayJitterSec"))
+ qdisc->ne.jitter = USEC_INFINITY;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_sec(rvalue, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to parse '%s=', ignoring assignment: %s",
+ lvalue, rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "NetworkEmulatorDelaySec"))
+ qdisc->ne.delay = u;
+ else if (streq(lvalue, "NetworkEmulatorDelayJitterSec"))
+ qdisc->ne.jitter = u;
+
+ qdisc->has_network_emulator = true;
+ qdisc = NULL;
+
+ return 0;
+}
+
+int config_parse_tc_network_emulator_loss_rate(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDiscs *qdisc = NULL;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(network, filename, section_line, &qdisc);
+ if (r < 0)
+ return r;
+
+ if (isempty(rvalue)) {
+ qdisc->ne.loss = 0;
+
+ qdisc = NULL;
+ return 0;
+ }
+
+ r = parse_tc_percent(rvalue, &qdisc->ne.loss);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to parse 'NetworkEmularorLossRate=', ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+
+ qdisc = NULL;
+ return 0;
+}
+
+int config_parse_tc_network_emulator_packet_limit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDiscs *qdisc = NULL;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(network, filename, section_line, &qdisc);
+ if (r < 0)
+ return r;
+
+ if (isempty(rvalue)) {
+ qdisc->ne.limit = 0;
+ qdisc = NULL;
+
+ return 0;
+ }
+
+ r = safe_atou(rvalue, &qdisc->ne.limit);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to parse 'NetworkEmulatorPacketLimit=', ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+
+ qdisc = NULL;
+ return 0;
+}
diff --git a/src/network/tc/netem.h b/src/network/tc/netem.h
new file mode 100644
index 0000000000..33dfdd8588
--- /dev/null
+++ b/src/network/tc/netem.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1+
+ * Copyright © 2019 VMware, Inc. */
+#pragma once
+
+#include "sd-netlink.h"
+
+#include "conf-parser.h"
+#include "macro.h"
+#include "../networkd-link.h"
+#include "time-util.h"
+
+typedef struct NetworkEmulator NetworkEmulator;
+typedef struct QDiscs QDiscs;
+
+struct NetworkEmulator {
+ usec_t delay;
+ usec_t jitter;
+
+ uint32_t limit;
+ uint32_t loss;
+};
+
+int network_emulator_new(NetworkEmulator **ret);
+int network_emulator_fill_message(Link *link, QDiscs *qdisc, sd_netlink_message *req);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_tc_network_emulator_delay);
+CONFIG_PARSER_PROTOTYPE(config_parse_tc_network_emulator_loss_rate);
+CONFIG_PARSER_PROTOTYPE(config_parse_tc_network_emulator_packet_limit);
diff --git a/src/network/tc/qdisc.c b/src/network/tc/qdisc.c
new file mode 100644
index 0000000000..ed4a11d265
--- /dev/null
+++ b/src/network/tc/qdisc.c
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: LGPL-2.1+
+ * Copyright © 2019 VMware, Inc. */
+
+#include <linux/pkt_sched.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "in-addr-util.h"
+#include "netlink-util.h"
+#include "networkd-manager.h"
+#include "parse-util.h"
+#include "qdisc.h"
+#include "set.h"
+#include "string-util.h"
+#include "util.h"
+
+static int qdisc_new(QDiscs **ret) {
+ QDiscs *qdisc;
+
+ qdisc = new(QDiscs, 1);
+ if (!qdisc)
+ return -ENOMEM;
+
+ *qdisc = (QDiscs) {
+ .family = AF_UNSPEC,
+ .parent = TC_H_ROOT,
+ };
+
+ *ret = TAKE_PTR(qdisc);
+
+ return 0;
+}
+
+int qdisc_new_static(Network *network, const char *filename, unsigned section_line, QDiscs **ret) {
+ _cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
+ _cleanup_(qdisc_freep) QDiscs *qdisc = NULL;
+ int r;
+
+ assert(network);
+ assert(ret);
+ assert(!!filename == (section_line > 0));
+
+ if (filename) {
+ r = network_config_section_new(filename, section_line, &n);
+ if (r < 0)
+ return r;
+
+ qdisc = ordered_hashmap_get(network->qdiscs_by_section, n);
+ if (qdisc) {
+ *ret = TAKE_PTR(qdisc);
+
+ return 0;
+ }
+ }
+
+ r = qdisc_new(&qdisc);
+ if (r < 0)
+ return r;
+
+ qdisc->network = network;
+
+ if (filename) {
+ qdisc->section = TAKE_PTR(n);
+
+ r = ordered_hashmap_ensure_allocated(&network->qdiscs_by_section, &network_config_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = ordered_hashmap_put(network->qdiscs_by_section, qdisc->section, qdisc);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(qdisc);
+
+ return 0;
+}
+
+void qdisc_free(QDiscs *qdisc) {
+ if (!qdisc)
+ return;
+
+ if (qdisc->network && qdisc->section)
+ ordered_hashmap_remove(qdisc->network->qdiscs_by_section, qdisc->section);
+
+ network_config_section_free(qdisc->section);
+
+ free(qdisc);
+}
+
+static int qdisc_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
+ int r;
+
+ assert(link);
+ assert(link->qdisc_messages > 0);
+ link->qdisc_messages--;
+
+ if (IN_SET(link->state, LINK_STATE_FAILED, LINK_STATE_LINGER))
+ return 1;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0 && r != -EEXIST) {
+ log_link_error_errno(link, r, "Could not set QDisc: %m");
+ link_enter_failed(link);
+ return 1;
+ }
+
+ if (link->route_messages == 0) {
+ log_link_debug(link, "QDiscs configured");
+ link->qdiscs_configured = true;
+ link_check_ready(link);
+ }
+
+ return 1;
+}
+
+int qdisc_configure(Link *link, QDiscs *qdisc) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ int r;
+
+ assert(link);
+ assert(link->manager);
+ assert(link->manager->rtnl);
+ assert(link->ifindex > 0);
+
+ r = sd_rtnl_message_new_qdisc(link->manager->rtnl, &req, RTM_NEWQDISC, qdisc->family, link->ifindex);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not create RTM_NEWQDISC message: %m");
+
+ r = sd_rtnl_message_set_qdisc_parent(req, qdisc->parent);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not create tcm_parent message: %m");
+
+ if (qdisc->parent == TC_H_CLSACT) {
+ r = sd_rtnl_message_set_qdisc_handle(req, TC_H_MAKE(TC_H_CLSACT, 0));
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not set tcm_handle message: %m");
+
+ r = sd_netlink_message_append_string(req, TCA_KIND, "clsact");
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_KIND attribute: %m");
+ }
+
+ if (qdisc->has_network_emulator) {
+ r = sd_netlink_message_append_string(req, TCA_KIND, "netem");
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not append TCA_KIND attribute: %m");
+
+ r = network_emulator_fill_message(link, qdisc, req);
+ if (r < 0)
+ return r;
+ }
+
+ r = netlink_call_async(link->manager->rtnl, NULL, req, qdisc_handler, link_netlink_destroy_callback, link);
+ if (r < 0)
+ return log_link_error_errno(link, r, "Could not send rtnetlink message: %m");
+
+ link_ref(link);
+ link->qdisc_messages++;
+
+ return 0;
+}
+
+int config_parse_tc_qdiscs_parent(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_(qdisc_free_or_set_invalidp) QDiscs *qdisc = NULL;
+ Network *network = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = qdisc_new_static(network, filename, section_line, &qdisc);
+ if (r < 0)
+ return r;
+
+ if (streq(rvalue, "root"))
+ qdisc->parent = TC_H_ROOT;
+ else if (streq(rvalue, "clsact"))
+ qdisc->parent = TC_H_CLSACT;
+ else {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to parse [QueueDiscs] 'Parent=', ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+
+ qdisc = NULL;
+
+ return 0;
+}
diff --git a/src/network/tc/qdisc.h b/src/network/tc/qdisc.h
new file mode 100644
index 0000000000..80b893b837
--- /dev/null
+++ b/src/network/tc/qdisc.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1+
+ * Copyright © 2019 VMware, Inc. */
+#pragma once
+
+#include "conf-parser.h"
+#include "macro.h"
+#include "netem.h"
+#include "../networkd-util.h"
+
+typedef struct QDiscs QDiscs;
+
+struct QDiscs {
+ NetworkConfigSection *section;
+ Network *network;
+
+ Link *link;
+
+ int family;
+
+ uint32_t handle;
+ uint32_t parent;
+
+ bool has_network_emulator:1;
+
+ NetworkEmulator ne;
+};
+
+void qdisc_free(QDiscs *qdisc);
+int qdisc_new_static(Network *network, const char *filename, unsigned section_line, QDiscs **ret);
+
+int qdisc_configure(Link *link, QDiscs *qdisc);
+
+DEFINE_NETWORK_SECTION_FUNCTIONS(QDiscs, qdisc_free);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_tc_qdiscs_parent);
diff --git a/src/network/tc/tc-util.c b/src/network/tc/tc-util.c
new file mode 100644
index 0000000000..7e1cf53e11
--- /dev/null
+++ b/src/network/tc/tc-util.c
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1+
+ * Copyright © 2019 VMware, Inc. */
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "parse-util.h"
+#include "tc-util.h"
+#include "time-util.h"
+
+static int tc_init(double *ticks_in_usec) {
+ uint32_t clock_resolution, ticks_to_usec, usec_to_ticks;
+ _cleanup_free_ char *line = NULL;
+ double clock_factor;
+ int r;
+
+ r = read_one_line_file("/proc/net/psched", &line);
+ if (r < 0)
+ return r;
+
+ r = sscanf(line, "%08x%08x%08x", &ticks_to_usec, &usec_to_ticks, &clock_resolution);
+ if (r < 3)
+ return -EIO;
+
+ clock_factor = (double) clock_resolution / USEC_PER_SEC;
+ *ticks_in_usec = (double) ticks_to_usec / usec_to_ticks * clock_factor;
+
+ return 0;
+}
+
+int tc_time_to_tick(usec_t t, uint32_t *ret) {
+ static double ticks_in_usec = -1;
+ usec_t a;
+ int r;
+
+ assert(ret);
+
+ if (ticks_in_usec < 0) {
+ r = tc_init(&ticks_in_usec);
+ if (r < 0)
+ return r;
+ }
+
+ a = t * ticks_in_usec;
+ if (a > UINT32_MAX)
+ return -ERANGE;
+
+ *ret = a;
+ return 0;
+}
+
+int parse_tc_percent(const char *s, uint32_t *percent) {
+ int r;
+
+ assert(s);
+ assert(percent);
+
+ r = parse_permille(s);
+ if (r < 0)
+ return r;
+
+ *percent = (double) r / 1000 * UINT32_MAX;
+ return 0;
+}
diff --git a/src/network/tc/tc-util.h b/src/network/tc/tc-util.h
new file mode 100644
index 0000000000..ce7ab40538
--- /dev/null
+++ b/src/network/tc/tc-util.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1+
+ * Copyright © 2019 VMware, Inc. */
+#pragma once
+
+#include "time-util.h"
+
+int tc_time_to_tick(usec_t t, uint32_t *ret);
+int parse_tc_percent(const char *s, uint32_t *percent);
diff --git a/src/systemd/sd-netlink.h b/src/systemd/sd-netlink.h
index 099d76d3c7..b34befebc8 100644
--- a/src/systemd/sd-netlink.h
+++ b/src/systemd/sd-netlink.h
@@ -202,6 +202,10 @@ int sd_rtnl_message_routing_policy_rule_get_rtm_type(const sd_netlink_message *m
int sd_rtnl_message_routing_policy_rule_set_flags(sd_netlink_message *m, unsigned flags);
int sd_rtnl_message_routing_policy_rule_get_flags(const sd_netlink_message *m, unsigned *flags);
+int sd_rtnl_message_new_qdisc(sd_netlink *rtnl, sd_netlink_message **ret, uint16_t nlmsg_type, int tcm_family, int tcm_ifindex);
+int sd_rtnl_message_set_qdisc_parent(sd_netlink_message *m, uint32_t parent);
+int sd_rtnl_message_set_qdisc_handle(sd_netlink_message *m, uint32_t handle);
+
/* genl */
int sd_genl_socket_open(sd_netlink **nl);
int sd_genl_message_new(sd_netlink *nl, sd_genl_family family, uint8_t cmd, sd_netlink_message **m);
diff --git a/test/fuzz/fuzz-network-parser/directives.network b/test/fuzz/fuzz-network-parser/directives.network
index 7a25561336..0d0892fd3a 100644
--- a/test/fuzz/fuzz-network-parser/directives.network
+++ b/test/fuzz/fuzz-network-parser/directives.network
@@ -262,3 +262,9 @@ DNS=
[NextHop]
Id=
Gateway=
+[TrafficControlQueueingDiscipline]
+Parent=
+NetworkEmulatorDelaySec=
+NetworkEmulatorDelayJitterSec=
+NetworkEmulatorLossRate=
+NetworkEmulatorPacketLimit=
diff --git a/test/test-network/conf/25-qdisc.network b/test/test-network/conf/25-qdisc.network
new file mode 100644
index 0000000000..de8f7243ce
--- /dev/null
+++ b/test/test-network/conf/25-qdisc.network
@@ -0,0 +1,20 @@
+[Match]
+Name=dummy98
+
+[Network]
+IPv6AcceptRA=no
+Address=10.1.2.3/16
+
+[TrafficControlQueueingDiscipline]
+Parent=root
+NetworkEmulatorDelaySec=50ms
+NetworkEmulatorDelayJitterSec=10ms
+NetworkEmulatorLossRate=20%
+NetworkEmulatorPacketLimit=100
+
+[TrafficControlQueueingDiscipline]
+Parent=clsact
+NetworkEmulatorDelaySec=100ms
+NetworkEmulatorDelayJitterSec=13ms
+NetworkEmulatorLossRate=20.5%
+NetworkEmulatorPacketLimit=200
diff --git a/test/test-network/systemd-networkd-tests.py b/test/test-network/systemd-networkd-tests.py
index 1cd623482f..e9b9fb0d7c 100755
--- a/test/test-network/systemd-networkd-tests.py
+++ b/test/test-network/systemd-networkd-tests.py
@@ -1485,15 +1485,16 @@ class NetworkdNetworkTests(unittest.TestCase, Utilities):
'25-gre-tunnel-remote-any.netdev',
'25-ip6gre-tunnel-remote-any.netdev',
'25-ipv6-address-label-section.network',
+ '25-link-local-addressing-no.network',
+ '25-link-local-addressing-yes.network',
+ '25-link-section-unmanaged.network',
'25-neighbor-section.network',
'25-neighbor-next.network',
'25-neighbor-ipv6.network',
'25-neighbor-ip-dummy.network',
'25-neighbor-ip.network',
'25-nexthop.network',
- '25-link-local-addressing-no.network',
- '25-link-local-addressing-yes.network',
- '25-link-section-unmanaged.network',
+ '25-qdisc.network',
'25-route-ipv6-src.network',
'25-route-static.network',
'25-gateway-static.network',
@@ -2051,6 +2052,17 @@ class NetworkdNetworkTests(unittest.TestCase, Utilities):
print(output)
self.assertRegex(output, '192.168.5.1')
+ def test_qdisc(self):
+ copy_unit_to_networkd_unit_path('25-qdisc.network', '12-dummy.netdev')
+ start_networkd()
+
+ self.wait_online(['dummy98:routable'])
+
+ output = check_output('tc qdisc show dev dummy98')
+ print(output)
+ self.assertRegex(output, 'limit 100 delay 50.0ms 10.0ms loss 20%')
+ self.assertRegex(output, 'limit 200 delay 100.0ms 13.0ms loss 20.5%')
+
class NetworkdStateFileTests(unittest.TestCase, Utilities):
links = [
'dummy98',