diff options
author | Mauro Carvalho Chehab <mchehab+huawei@kernel.org> | 2021-12-07 11:29:41 +0100 |
---|---|---|
committer | Mauro Carvalho Chehab <mchehab+huawei@kernel.org> | 2021-12-07 11:29:41 +0100 |
commit | 4383cfa18c5bbc5b9b6a9e77adc12aec1c20b72d (patch) | |
tree | 7dfa9fae520bda949cb79ce315d077cda102296d /tools | |
parent | 8cc7a1b2aca067397a016cdb971a5e6ad9b640c7 (diff) | |
parent | 0fcfb00b28c0b7884635dacf38e46d60bf3d4eb1 (diff) | |
download | linux-4383cfa18c5bbc5b9b6a9e77adc12aec1c20b72d.tar.gz |
Merge tag 'v5.16-rc4' into media_tree
Linux 5.16-rc4
* tag 'v5.16-rc4': (984 commits)
Linux 5.16-rc4
KVM: SVM: Do not terminate SEV-ES guests on GHCB validation failure
KVM: SEV: Fall back to vmalloc for SEV-ES scratch area if necessary
KVM: SEV: Return appropriate error codes if SEV-ES scratch setup fails
parisc: Mark cr16 CPU clocksource unstable on all SMP machines
parisc: Fix "make install" on newer debian releases
sched/uclamp: Fix rq->uclamp_max not set on first enqueue
preempt/dynamic: Fix setup_preempt_mode() return value
cifs: avoid use of dstaddr as key for fscache client cookie
cifs: add server conn_id to fscache client cookie
cifs: wait for tcon resource_id before getting fscache super
cifs: fix missed refcounting of ipc tcon
x86/xen: Add xenpv_restore_regs_and_return_to_usermode()
x86/entry: Use the correct fence macro after swapgs in kernel CR3
fget: check that the fd still exists after getting a ref to it
x86/entry: Add a fence for kernel entry SWAPGS in paranoid_entry()
x86/sev: Fix SEV-ES INS/OUTS instructions for word, dword, and qword
powercap: DTPM: Drop unused local variable from init_dtpm()
io-wq: don't retry task_work creation failure on fatal conditions
serial: 8250_bcm7271: UART errors after resuming from S2
...
Diffstat (limited to 'tools')
73 files changed, 2309 insertions, 669 deletions
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index d0ce5cfd3ac1..d5b5f2ab87a0 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -277,6 +277,7 @@ #define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ +#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -298,6 +299,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 2ef1f6513c68..5a776a08f78c 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -504,4 +504,8 @@ struct kvm_pmu_event_filter { #define KVM_PMU_EVENT_ALLOW 0 #define KVM_PMU_EVENT_DENY 1 +/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ +#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */ +#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ + #endif /* _ASM_X86_KVM_H */ diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index bbd1150578f7..8791d0e2762b 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -88,5 +88,4 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OU $(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT) $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) \ - LIBBPF_OUTPUT=$(BPFOBJ_OUTPUT) \ - LIBBPF_DESTDIR=$(BPF_DESTDIR) CC=$(HOSTCC) LD=$(HOSTLD) + CC=$(HOSTCC) LD=$(HOSTLD) diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 920439527291..0b243ce842be 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -200,7 +200,6 @@ int main(int argc, char *argv[]) main_test_timerfd(); main_test_stackprotector_all(); main_test_libdw_dwarf_unwind(); - main_test_sync_compare_and_swap(argc, argv); main_test_zlib(); main_test_pthread_attr_setaffinity_np(); main_test_pthread_barrier(); diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index a7e54a08fb54..3e8df500cfbd 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -7,6 +7,7 @@ #include <assert.h> #include <linux/build_bug.h> #include <linux/compiler.h> +#include <linux/math.h> #include <endian.h> #include <byteswap.h> @@ -14,8 +15,6 @@ #define UINT_MAX (~0U) #endif -#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) - #define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1) #define __PERF_ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) @@ -52,15 +51,6 @@ _min1 < _min2 ? _min1 : _min2; }) #endif -#ifndef roundup -#define roundup(x, y) ( \ -{ \ - const typeof(y) __y = y; \ - (((x) + (__y - 1)) / __y) * __y; \ -} \ -) -#endif - #ifndef BUG_ON #ifdef NDEBUG #define BUG_ON(cond) do { if (cond) {} } while (0) @@ -104,16 +94,6 @@ int scnprintf_pad(char * buf, size_t size, const char * fmt, ...); #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) -/* - * This looks more complex than it should be. But we need to - * get the type for the ~ right in round_down (it needs to be - * as wide as the result!), and we want to evaluate the macro - * arguments just once each. - */ -#define __round_mask(x, y) ((__typeof__(x))((y)-1)) -#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) -#define round_down(x, y) ((x) & ~__round_mask(x, y)) - #define current_gfp_context(k) 0 #define synchronize_rcu() diff --git a/tools/include/linux/math.h b/tools/include/linux/math.h new file mode 100644 index 000000000000..4e7af99ec9eb --- /dev/null +++ b/tools/include/linux/math.h @@ -0,0 +1,25 @@ +#ifndef _TOOLS_MATH_H +#define _TOOLS_MATH_H + +/* + * This looks more complex than it should be. But we need to + * get the type for the ~ right in round_down (it needs to be + * as wide as the result!), and we want to evaluate the macro + * arguments just once each. + */ +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) + +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) + +#ifndef roundup +#define roundup(x, y) ( \ +{ \ + const typeof(y) __y = y; \ + (((x) + (__y - 1)) / __y) * __y; \ +} \ +) +#endif + +#endif diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index b3610fdd1fee..eebd3894fe89 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -7,24 +7,23 @@ /* This struct should be in sync with struct rtnl_link_stats64 */ struct rtnl_link_stats { - __u32 rx_packets; /* total packets received */ - __u32 tx_packets; /* total packets transmitted */ - __u32 rx_bytes; /* total bytes received */ - __u32 tx_bytes; /* total bytes transmitted */ - __u32 rx_errors; /* bad packets received */ - __u32 tx_errors; /* packet transmit problems */ - __u32 rx_dropped; /* no space in linux buffers */ - __u32 tx_dropped; /* no space available in linux */ - __u32 multicast; /* multicast packets received */ + __u32 rx_packets; + __u32 tx_packets; + __u32 rx_bytes; + __u32 tx_bytes; + __u32 rx_errors; + __u32 tx_errors; + __u32 rx_dropped; + __u32 tx_dropped; + __u32 multicast; __u32 collisions; - /* detailed rx_errors: */ __u32 rx_length_errors; - __u32 rx_over_errors; /* receiver ring buff overflow */ - __u32 rx_crc_errors; /* recved pkt with crc error */ - __u32 rx_frame_errors; /* recv'd frame alignment error */ - __u32 rx_fifo_errors; /* recv'r fifo overrun */ - __u32 rx_missed_errors; /* receiver missed packet */ + __u32 rx_over_errors; + __u32 rx_crc_errors; + __u32 rx_frame_errors; + __u32 rx_fifo_errors; + __u32 rx_missed_errors; /* detailed tx_errors */ __u32 tx_aborted_errors; @@ -37,29 +36,201 @@ struct rtnl_link_stats { __u32 rx_compressed; __u32 tx_compressed; - __u32 rx_nohandler; /* dropped, no handler found */ + __u32 rx_nohandler; }; -/* The main device statistics structure */ +/** + * struct rtnl_link_stats64 - The main device statistics structure. + * + * @rx_packets: Number of good packets received by the interface. + * For hardware interfaces counts all good packets received from the device + * by the host, including packets which host had to drop at various stages + * of processing (even in the driver). + * + * @tx_packets: Number of packets successfully transmitted. + * For hardware interfaces counts packets which host was able to successfully + * hand over to the device, which does not necessarily mean that packets + * had been successfully transmitted out of the device, only that device + * acknowledged it copied them out of host memory. + * + * @rx_bytes: Number of good received bytes, corresponding to @rx_packets. + * + * For IEEE 802.3 devices should count the length of Ethernet Frames + * excluding the FCS. + * + * @tx_bytes: Number of good transmitted bytes, corresponding to @tx_packets. + * + * For IEEE 802.3 devices should count the length of Ethernet Frames + * excluding the FCS. + * + * @rx_errors: Total number of bad packets received on this network device. + * This counter must include events counted by @rx_length_errors, + * @rx_crc_errors, @rx_frame_errors and other errors not otherwise + * counted. + * + * @tx_errors: Total number of transmit problems. + * This counter must include events counter by @tx_aborted_errors, + * @tx_carrier_errors, @tx_fifo_errors, @tx_heartbeat_errors, + * @tx_window_errors and other errors not otherwise counted. + * + * @rx_dropped: Number of packets received but not processed, + * e.g. due to lack of resources or unsupported protocol. + * For hardware interfaces this counter may include packets discarded + * due to L2 address filtering but should not include packets dropped + * by the device due to buffer exhaustion which are counted separately in + * @rx_missed_errors (since procfs folds those two counters together). + * + * @tx_dropped: Number of packets dropped on their way to transmission, + * e.g. due to lack of resources. + * + * @multicast: Multicast packets received. + * For hardware interfaces this statistic is commonly calculated + * at the device level (unlike @rx_packets) and therefore may include + * packets which did not reach the host. + * + * For IEEE 802.3 devices this counter may be equivalent to: + * + * - 30.3.1.1.21 aMulticastFramesReceivedOK + * + * @collisions: Number of collisions during packet transmissions. + * + * @rx_length_errors: Number of packets dropped due to invalid length. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter should be equivalent to a sum + * of the following attributes: + * + * - 30.3.1.1.23 aInRangeLengthErrors + * - 30.3.1.1.24 aOutOfRangeLengthField + * - 30.3.1.1.25 aFrameTooLongErrors + * + * @rx_over_errors: Receiver FIFO overflow event counter. + * + * Historically the count of overflow events. Such events may be + * reported in the receive descriptors or via interrupts, and may + * not correspond one-to-one with dropped packets. + * + * The recommended interpretation for high speed interfaces is - + * number of packets dropped because they did not fit into buffers + * provided by the host, e.g. packets larger than MTU or next buffer + * in the ring was not available for a scatter transfer. + * + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * This statistics was historically used interchangeably with + * @rx_fifo_errors. + * + * This statistic corresponds to hardware events and is not commonly used + * on software devices. + * + * @rx_crc_errors: Number of packets received with a CRC error. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.6 aFrameCheckSequenceErrors + * + * @rx_frame_errors: Receiver frame alignment errors. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter should be equivalent to: + * + * - 30.3.1.1.7 aAlignmentErrors + * + * @rx_fifo_errors: Receiver FIFO error counter. + * + * Historically the count of overflow events. Those events may be + * reported in the receive descriptors or via interrupts, and may + * not correspond one-to-one with dropped packets. + * + * This statistics was used interchangeably with @rx_over_errors. + * Not recommended for use in drivers for high speed interfaces. + * + * This statistic is used on software devices, e.g. to count software + * packet queue overflow (can) or sequencing errors (GRE). + * + * @rx_missed_errors: Count of packets missed by the host. + * Folded into the "drop" counter in `/proc/net/dev`. + * + * Counts number of packets dropped by the device due to lack + * of buffer space. This usually indicates that the host interface + * is slower than the network interface, or host is not keeping up + * with the receive packet rate. + * + * This statistic corresponds to hardware events and is not used + * on software devices. + * + * @tx_aborted_errors: + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * For IEEE 802.3 devices capable of half-duplex operation this counter + * must be equivalent to: + * + * - 30.3.1.1.11 aFramesAbortedDueToXSColls + * + * High speed interfaces may use this counter as a general device + * discard counter. + * + * @tx_carrier_errors: Number of frame transmission errors due to loss + * of carrier during transmission. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.13 aCarrierSenseErrors + * + * @tx_fifo_errors: Number of frame transmission errors due to device + * FIFO underrun / underflow. This condition occurs when the device + * begins transmission of a frame but is unable to deliver the + * entire frame to the transmitter in time for transmission. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * @tx_heartbeat_errors: Number of Heartbeat / SQE Test errors for + * old half-duplex Ethernet. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices possibly equivalent to: + * + * - 30.3.2.1.4 aSQETestErrors + * + * @tx_window_errors: Number of frame transmission errors due + * to late collisions (for Ethernet - after the first 64B of transmission). + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.10 aLateCollisions + * + * @rx_compressed: Number of correctly received compressed packets. + * This counters is only meaningful for interfaces which support + * packet compression (e.g. CSLIP, PPP). + * + * @tx_compressed: Number of transmitted compressed packets. + * This counters is only meaningful for interfaces which support + * packet compression (e.g. CSLIP, PPP). + * + * @rx_nohandler: Number of packets received on the interface + * but dropped by the networking stack because the device is + * not designated to receive packets (e.g. backup link in a bond). + */ struct rtnl_link_stats64 { - __u64 rx_packets; /* total packets received */ - __u64 tx_packets; /* total packets transmitted */ - __u64 rx_bytes; /* total bytes received */ - __u64 tx_bytes; /* total bytes transmitted */ - __u64 rx_errors; /* bad packets received */ - __u64 tx_errors; /* packet transmit problems */ - __u64 rx_dropped; /* no space in linux buffers */ - __u64 tx_dropped; /* no space available in linux */ - __u64 multicast; /* multicast packets received */ + __u64 rx_packets; + __u64 tx_packets; + __u64 rx_bytes; + __u64 tx_bytes; + __u64 rx_errors; + __u64 tx_errors; + __u64 rx_dropped; + __u64 tx_dropped; + __u64 multicast; __u64 collisions; /* detailed rx_errors: */ __u64 rx_length_errors; - __u64 rx_over_errors; /* receiver ring buff overflow */ - __u64 rx_crc_errors; /* recved pkt with crc error */ - __u64 rx_frame_errors; /* recv'd frame alignment error */ - __u64 rx_fifo_errors; /* recv'r fifo overrun */ - __u64 rx_missed_errors; /* receiver missed packet */ + __u64 rx_over_errors; + __u64 rx_crc_errors; + __u64 rx_frame_errors; + __u64 rx_fifo_errors; + __u64 rx_missed_errors; /* detailed tx_errors */ __u64 tx_aborted_errors; @@ -71,8 +242,7 @@ struct rtnl_link_stats64 { /* for cslip etc */ __u64 rx_compressed; __u64 tx_compressed; - - __u64 rx_nohandler; /* dropped, no handler found */ + __u64 rx_nohandler; }; /* The struct should be in sync with struct ifmap */ @@ -170,12 +340,29 @@ enum { IFLA_PROP_LIST, IFLA_ALT_IFNAME, /* Alternative ifname */ IFLA_PERM_ADDRESS, + IFLA_PROTO_DOWN_REASON, + + /* device (sysfs) name as parent, used instead + * of IFLA_LINK where there's no parent netdev + */ + IFLA_PARENT_DEV_NAME, + IFLA_PARENT_DEV_BUS_NAME, + __IFLA_MAX }; #define IFLA_MAX (__IFLA_MAX - 1) +enum { + IFLA_PROTO_DOWN_REASON_UNSPEC, + IFLA_PROTO_DOWN_REASON_MASK, /* u32, mask for reason bits */ + IFLA_PROTO_DOWN_REASON_VALUE, /* u32, reason bit value */ + + __IFLA_PROTO_DOWN_REASON_CNT, + IFLA_PROTO_DOWN_REASON_MAX = __IFLA_PROTO_DOWN_REASON_CNT - 1 +}; + /* backwards compatibility for userspace */ #ifndef __KERNEL__ #define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg)))) @@ -293,6 +480,7 @@ enum { IFLA_BR_MCAST_MLD_VERSION, IFLA_BR_VLAN_STATS_PER_PORT, IFLA_BR_MULTI_BOOLOPT, + IFLA_BR_MCAST_QUERIER_STATE, __IFLA_BR_MAX, }; @@ -346,6 +534,8 @@ enum { IFLA_BRPORT_BACKUP_PORT, IFLA_BRPORT_MRP_RING_OPEN, IFLA_BRPORT_MRP_IN_OPEN, + IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, + IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -433,6 +623,7 @@ enum macvlan_macaddr_mode { }; #define MACVLAN_FLAG_NOPROMISC 1 +#define MACVLAN_FLAG_NODST 2 /* skip dst macvlan if matching src macvlan */ /* VRF section */ enum { @@ -597,6 +788,18 @@ enum ifla_geneve_df { GENEVE_DF_MAX = __GENEVE_DF_END - 1, }; +/* Bareudp section */ +enum { + IFLA_BAREUDP_UNSPEC, + IFLA_BAREUDP_PORT, + IFLA_BAREUDP_ETHERTYPE, + IFLA_BAREUDP_SRCPORT_MIN, + IFLA_BAREUDP_MULTIPROTO_MODE, + __IFLA_BAREUDP_MAX +}; + +#define IFLA_BAREUDP_MAX (__IFLA_BAREUDP_MAX - 1) + /* PPP section */ enum { IFLA_PPP_UNSPEC, @@ -899,7 +1102,14 @@ enum { #define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1) -/* HSR section */ +/* HSR/PRP section, both uses same interface */ + +/* Different redundancy protocols for hsr device */ +enum { + HSR_PROTOCOL_HSR, + HSR_PROTOCOL_PRP, + HSR_PROTOCOL_MAX, +}; enum { IFLA_HSR_UNSPEC, @@ -909,6 +1119,9 @@ enum { IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ IFLA_HSR_SEQ_NR, IFLA_HSR_VERSION, /* HSR version */ + IFLA_HSR_PROTOCOL, /* Indicate different protocol than + * HSR. For example PRP. + */ __IFLA_HSR_MAX, }; @@ -1033,6 +1246,8 @@ enum { #define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1) #define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) #define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) +#define RMNET_FLAGS_INGRESS_MAP_CKSUMV5 (1U << 4) +#define RMNET_FLAGS_EGRESS_MAP_CKSUMV5 (1U << 5) enum { IFLA_RMNET_UNSPEC, @@ -1048,4 +1263,14 @@ struct ifla_rmnet_flags { __u32 mask; }; +/* MCTP section */ + +enum { + IFLA_MCTP_UNSPEC, + IFLA_MCTP_NET, + __IFLA_MCTP_MAX, +}; + +#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index a067410ebea5..1daa45268de2 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -269,6 +269,7 @@ struct kvm_xen_exit { #define KVM_EXIT_AP_RESET_HOLD 32 #define KVM_EXIT_X86_BUS_LOCK 33 #define KVM_EXIT_XEN 34 +#define KVM_EXIT_RISCV_SBI 35 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -397,13 +398,23 @@ struct kvm_run { * "ndata" is correct, that new fields are enumerated in "flags", * and that each flag enumerates fields that are 64-bit aligned * and sized (so that ndata+internal.data[] is valid/accurate). + * + * Space beyond the defined fields may be used to store arbitrary + * debug information relating to the emulation failure. It is + * accounted for in "ndata" but the format is unspecified and is + * not represented in "flags". Any such information is *not* ABI! */ struct { __u32 suberror; __u32 ndata; __u64 flags; - __u8 insn_size; - __u8 insn_bytes[15]; + union { + struct { + __u8 insn_size; + __u8 insn_bytes[15]; + }; + }; + /* Arbitrary debug data may follow. */ } emulation_failure; /* KVM_EXIT_OSI */ struct { @@ -469,6 +480,13 @@ struct kvm_run { } msr; /* KVM_EXIT_XEN */ struct kvm_xen_exit xen; + /* KVM_EXIT_RISCV_SBI */ + struct { + unsigned long extension_id; + unsigned long function_id; + unsigned long args[6]; + unsigned long ret[2]; + } riscv_sbi; /* Fix the size of the union. */ char padding[256]; }; @@ -1112,6 +1130,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_BINARY_STATS_FD 203 #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 #define KVM_CAP_ARM_MTE 205 +#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 #ifdef KVM_CAP_IRQ_ROUTING @@ -1223,11 +1242,16 @@ struct kvm_irqfd { /* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */ #define KVM_CLOCK_TSC_STABLE 2 +#define KVM_CLOCK_REALTIME (1 << 2) +#define KVM_CLOCK_HOST_TSC (1 << 3) struct kvm_clock_data { __u64 clock; __u32 flags; - __u32 pad[9]; + __u32 pad0; + __u64 realtime; + __u64 host_tsc; + __u32 pad[4]; }; /* For KVM_CAP_SW_TLB */ diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index d26e5472fe50..6f3df004479b 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -45,8 +45,8 @@ struct bpf_gen { int nr_fd_array; }; -void bpf_gen__init(struct bpf_gen *gen, int log_level); -int bpf_gen__finish(struct bpf_gen *gen); +void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps); +int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps); void bpf_gen__free(struct bpf_gen *gen); void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size); void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_params *map_attr, int map_idx); diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 502dea53a742..9934851ccde7 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -18,7 +18,7 @@ #define MAX_USED_MAPS 64 #define MAX_USED_PROGS 32 #define MAX_KFUNC_DESCS 256 -#define MAX_FD_ARRAY_SZ (MAX_USED_PROGS + MAX_KFUNC_DESCS) +#define MAX_FD_ARRAY_SZ (MAX_USED_MAPS + MAX_KFUNC_DESCS) /* The following structure describes the stack layout of the loader program. * In addition R6 contains the pointer to context. @@ -33,8 +33,8 @@ */ struct loader_stack { __u32 btf_fd; - __u32 prog_fd[MAX_USED_PROGS]; __u32 inner_map_fd; + __u32 prog_fd[MAX_USED_PROGS]; }; #define stack_off(field) \ @@ -42,6 +42,11 @@ struct loader_stack { #define attr_field(attr, field) (attr + offsetof(union bpf_attr, field)) +static int blob_fd_array_off(struct bpf_gen *gen, int index) +{ + return gen->fd_array + index * sizeof(int); +} + static int realloc_insn_buf(struct bpf_gen *gen, __u32 size) { size_t off = gen->insn_cur - gen->insn_start; @@ -102,11 +107,15 @@ static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn in emit(gen, insn2); } -void bpf_gen__init(struct bpf_gen *gen, int log_level) +static int add_data(struct bpf_gen *gen, const void *data, __u32 size); +static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off); + +void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps) { - size_t stack_sz = sizeof(struct loader_stack); + size_t stack_sz = sizeof(struct loader_stack), nr_progs_sz; int i; + gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); gen->log_level = log_level; /* save ctx pointer into R6 */ emit(gen, BPF_MOV64_REG(BPF_REG_6, BPF_REG_1)); @@ -118,19 +127,27 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level) emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0)); emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel)); + /* amount of stack actually used, only used to calculate iterations, not stack offset */ + nr_progs_sz = offsetof(struct loader_stack, prog_fd[nr_progs]); /* jump over cleanup code */ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, - /* size of cleanup code below */ - (stack_sz / 4) * 3 + 2)); + /* size of cleanup code below (including map fd cleanup) */ + (nr_progs_sz / 4) * 3 + 2 + + /* 6 insns for emit_sys_close_blob, + * 6 insns for debug_regs in emit_sys_close_blob + */ + nr_maps * (6 + (gen->log_level ? 6 : 0)))); /* remember the label where all error branches will jump to */ gen->cleanup_label = gen->insn_cur - gen->insn_start; /* emit cleanup code: close all temp FDs */ - for (i = 0; i < stack_sz; i += 4) { + for (i = 0; i < nr_progs_sz; i += 4) { emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -stack_sz + i)); emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0, 1)); emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close)); } + for (i = 0; i < nr_maps; i++) + emit_sys_close_blob(gen, blob_fd_array_off(gen, i)); /* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */ emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); emit(gen, BPF_EXIT_INSN()); @@ -160,8 +177,6 @@ static int add_data(struct bpf_gen *gen, const void *data, __u32 size) */ static int add_map_fd(struct bpf_gen *gen) { - if (!gen->fd_array) - gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); if (gen->nr_maps == MAX_USED_MAPS) { pr_warn("Total maps exceeds %d\n", MAX_USED_MAPS); gen->error = -E2BIG; @@ -174,8 +189,6 @@ static int add_kfunc_btf_fd(struct bpf_gen *gen) { int cur; - if (!gen->fd_array) - gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); if (gen->nr_fd_array == MAX_KFUNC_DESCS) { cur = add_data(gen, NULL, sizeof(int)); return (cur - gen->fd_array) / sizeof(int); @@ -183,11 +196,6 @@ static int add_kfunc_btf_fd(struct bpf_gen *gen) return MAX_USED_MAPS + gen->nr_fd_array++; } -static int blob_fd_array_off(struct bpf_gen *gen, int index) -{ - return gen->fd_array + index * sizeof(int); -} - static int insn_bytes_to_bpf_size(__u32 sz) { switch (sz) { @@ -359,10 +367,15 @@ static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off) __emit_sys_close(gen); } -int bpf_gen__finish(struct bpf_gen *gen) +int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) { int i; + if (nr_progs != gen->nr_progs || nr_maps != gen->nr_maps) { + pr_warn("progs/maps mismatch\n"); + gen->error = -EFAULT; + return gen->error; + } emit_sys_close_stack(gen, stack_off(btf_fd)); for (i = 0; i < gen->nr_progs; i++) move_stack2ctx(gen, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a1bea1953df6..7c74342bb668 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7258,7 +7258,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) } if (obj->gen_loader) - bpf_gen__init(obj->gen_loader, attr->log_level); + bpf_gen__init(obj->gen_loader, attr->log_level, obj->nr_programs, obj->nr_maps); err = bpf_object__probe_loading(obj); err = err ? : bpf_object__load_vmlinux_btf(obj, false); @@ -7277,7 +7277,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) for (i = 0; i < obj->nr_maps; i++) obj->maps[i].fd = -1; if (!err) - err = bpf_gen__finish(obj->gen_loader); + err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); } /* clean up fd_array */ diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 81a4c543ff7e..4b384c907027 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -375,6 +375,7 @@ static int read_symbols(struct elf *elf) return -1; } memset(sym, 0, sizeof(*sym)); + INIT_LIST_HEAD(&sym->pv_target); sym->alias = sym; sym->idx = i; diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index c90c7084e45a..bdf699f6552b 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -153,6 +153,10 @@ void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func) !strcmp(func->name, "_paravirt_ident_64")) return; + /* already added this function */ + if (!list_empty(&func->pv_target)) + return; + list_add(&func->pv_target, &f->pv_ops[idx].targets); f->pv_ops[idx].clean = false; } diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 07e65a061fd3..afd144725a0b 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -1010,6 +1010,9 @@ ifndef NO_AUXTRACE ifndef NO_AUXTRACE $(call detected,CONFIG_AUXTRACE) CFLAGS += -DHAVE_AUXTRACE_SUPPORT + ifeq ($(feature-reallocarray), 0) + CFLAGS += -DCOMPAT_NEED_REALLOCARRAY + endif endif endif diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 1ca7bc337932..e2c481fcede6 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -363,3 +363,4 @@ 446 n64 landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 n64 process_mrelease sys_process_mrelease +449 n64 futex_waitv sys_futex_waitv diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index 488f6e6ba1a5..fa0ff4ce2b74 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -223,6 +223,8 @@ static unsigned int group(pthread_t *pth, snd_ctx->out_fds[i] = fds[1]; if (!thread_mode) close(fds[0]); + + free(ctx); } /* Now we have all the fds, fork the senders */ @@ -239,6 +241,8 @@ static unsigned int group(pthread_t *pth, for (i = 0; i < num_fds; i++) close(snd_ctx->out_fds[i]); + free(snd_ctx); + /* Return number of children to reap */ return num_fds * 2; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8167ebfe776a..8ae400429870 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -619,14 +619,17 @@ static int report__browse_hists(struct report *rep) int ret; struct perf_session *session = rep->session; struct evlist *evlist = session->evlist; - const char *help = perf_tip(system_path(TIPDIR)); + char *help = NULL, *path = NULL; - if (help == NULL) { + path = system_path(TIPDIR); + if (perf_tip(&help, path) || help == NULL) { /* fallback for people who don't install perf ;-) */ - help = perf_tip(DOCDIR); - if (help == NULL) - help = "Cannot load tips.txt file, please install perf!"; + free(path); + path = system_path(DOCDIR); + if (perf_tip(&help, path) || help == NULL) + help = strdup("Cannot load tips.txt file, please install perf!"); } + free(path); switch (use_browser) { case 1: @@ -651,7 +654,7 @@ static int report__browse_hists(struct report *rep) ret = evlist__tty_browse_hists(evlist, rep, help); break; } - + free(help); return ret; } diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index fbb68deba59f..d01532d40acb 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -88,7 +88,6 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes struct evsel *evsel; struct event_name tmp; struct evlist *evlist = evlist__new_default(); - char *unit = strdup("KRAVA"); TEST_ASSERT_VAL("failed to get evlist", evlist); @@ -99,7 +98,8 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes perf_evlist__id_add(&evlist->core, &evsel->core, 0, 0, 123); - evsel->unit = unit; + free((char *)evsel->unit); + evsel->unit = strdup("KRAVA"); TEST_ASSERT_VAL("failed to synthesize attr update unit", !perf_event__synthesize_event_update_unit(NULL, evsel, process_event_unit)); @@ -119,7 +119,6 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes TEST_ASSERT_VAL("failed to synthesize attr update cpus", !perf_event__synthesize_event_update_cpus(&tmp.tool, evsel, process_event_cpus)); - free(unit); evlist__delete(evlist); return 0; } diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index b669d22f2b13..07f2411b0ad4 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -36,7 +36,7 @@ * These are based on the input value (213) specified * in branch_stack variable. */ -#define BS_EXPECTED_BE 0xa00d000000000000 +#define BS_EXPECTED_BE 0xa000d00000000000 #define BS_EXPECTED_LE 0xd5000000 #define FLAG(s) s->branch_stack->entries[i].flags diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c index 820d942b30c3..9d4c45184e71 100644 --- a/tools/perf/tests/wp.c +++ b/tools/perf/tests/wp.c @@ -21,6 +21,7 @@ do { \ volatile u64 data1; volatile u8 data2[3]; +#ifndef __s390x__ static int wp_read(int fd, long long *count, int size) { int ret = read(fd, count, size); @@ -48,7 +49,6 @@ static void get__perf_event_attr(struct perf_event_attr *attr, int wp_type, attr->exclude_hv = 1; } -#ifndef __s390x__ static int __event(int wp_type, void *wp_addr, unsigned long wp_len) { int fd; diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index c1f24d004852..5075ecead5f3 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -535,6 +535,18 @@ struct perf_hpp_list perf_hpp_list = { #undef __HPP_SORT_ACC_FN #undef __HPP_SORT_RAW_FN +static void fmt_free(struct perf_hpp_fmt *fmt) +{ + /* + * At this point fmt should be completely + * unhooked, if not it's a bug. + */ + BUG_ON(!list_empty(&fmt->list)); + BUG_ON(!list_empty(&fmt->sort_list)); + + if (fmt->free) + fmt->free(fmt); +} void perf_hpp__init(void) { @@ -598,9 +610,10 @@ void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list, list_add(&format->sort_list, &list->sorts); } -void perf_hpp__column_unregister(struct perf_hpp_fmt *format) +static void perf_hpp__column_unregister(struct perf_hpp_fmt *format) { list_del_init(&format->list); + fmt_free(format); } void perf_hpp__cancel_cumulate(void) @@ -672,19 +685,6 @@ next: } -static void fmt_free(struct perf_hpp_fmt *fmt) -{ - /* - * At this point fmt should be completely - * unhooked, if not it's a bug. - */ - BUG_ON(!list_empty(&fmt->list)); - BUG_ON(!list_empty(&fmt->sort_list)); - - if (fmt->free) - fmt->free(fmt); -} - void perf_hpp__reset_output_field(struct perf_hpp_list *list) { struct perf_hpp_fmt *fmt, *tmp; diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 4748bcfe61de..fccac06b573a 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -51,6 +51,7 @@ struct arm_spe { u8 timeless_decoding; u8 data_queued; + u64 sample_type; u8 sample_flc; u8 sample_llc; u8 sample_tlb; @@ -287,6 +288,12 @@ static void arm_spe_prep_sample(struct arm_spe *spe, event->sample.header.size = sizeof(struct perf_event_header); } +static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) +{ + event->header.size = perf_event__sample_event_size(sample, type, 0); + return perf_event__synthesize_sample(event, type, 0, sample); +} + static inline int arm_spe_deliver_synth_event(struct arm_spe *spe, struct arm_spe_queue *speq __maybe_unused, @@ -295,6 +302,12 @@ arm_spe_deliver_synth_event(struct arm_spe *spe, { int ret; + if (spe->synth_opts.inject) { + ret = arm_spe__inject_event(event, sample, spe->sample_type); + if (ret) + return ret; + } + ret = perf_session__deliver_synth_event(spe->session, event, sample); if (ret) pr_err("ARM SPE: failed to deliver event, error %d\n", ret); @@ -986,6 +999,8 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) else attr.sample_type |= PERF_SAMPLE_TIME; + spe->sample_type = attr.sample_type; + attr.exclude_user = evsel->core.attr.exclude_user; attr.exclude_kernel = evsel->core.attr.exclude_kernel; attr.exclude_hv = evsel->core.attr.exclude_hv; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a59fb2ecb84e..ac0127be0459 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -241,7 +241,7 @@ void evsel__init(struct evsel *evsel, { perf_evsel__init(&evsel->core, attr, idx); evsel->tracking = !idx; - evsel->unit = ""; + evsel->unit = strdup(""); evsel->scale = 1.0; evsel->max_events = ULONG_MAX; evsel->evlist = NULL; @@ -276,13 +276,8 @@ struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) } if (evsel__is_clock(evsel)) { - /* - * The evsel->unit points to static alias->unit - * so it's ok to use static string in here. - */ - static const char *unit = "msec"; - - evsel->unit = unit; + free((char *)evsel->unit); + evsel->unit = strdup("msec"); evsel->scale = 1e-6; } @@ -420,7 +415,11 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->max_events = orig->max_events; evsel->tool_event = orig->tool_event; - evsel->unit = orig->unit; + free((char *)evsel->unit); + evsel->unit = strdup(orig->unit); + if (evsel->unit == NULL) + goto out_err; + evsel->scale = orig->scale; evsel->snapshot = orig->snapshot; evsel->per_pkg = orig->per_pkg; @@ -1441,6 +1440,7 @@ void evsel__exit(struct evsel *evsel) zfree(&evsel->group_name); zfree(&evsel->name); zfree(&evsel->pmu_name); + zfree(&evsel->unit); zfree(&evsel->metric_id); evsel__zero_per_pkg(evsel); hashmap__free(evsel->per_pkg_mask); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index fda8d14c891f..79cce216727e 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -4257,9 +4257,11 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, switch (ev->type) { case PERF_EVENT_UPDATE__UNIT: + free((char *)evsel->unit); evsel->unit = strdup(ev->data); break; case PERF_EVENT_UPDATE__NAME: + free(evsel->name); evsel->name = strdup(ev->data); break; case PERF_EVENT_UPDATE__SCALE: @@ -4268,11 +4270,11 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, break; case PERF_EVENT_UPDATE__CPUS: ev_cpus = (struct perf_record_event_update_cpus *)ev->data; - map = cpu_map__new_data(&ev_cpus->cpus); - if (map) + if (map) { + perf_cpu_map__put(evsel->core.own_cpus); evsel->core.own_cpus = map; - else + } else pr_err("failed to get event_update cpus\n"); default: break; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 65fe65ba03c2..b776465e04ef 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -289,15 +289,10 @@ static long hist_time(unsigned long htime) return htime; } -static void he_stat__add_period(struct he_stat *he_stat, u64 period, - u64 weight, u64 ins_lat, u64 p_stage_cyc) +static void he_stat__add_period(struct he_stat *he_stat, u64 period) { - he_stat->period += period; - he_stat->weight += weight; he_stat->nr_events += 1; - he_stat->ins_lat += ins_lat; - he_stat->p_stage_cyc += p_stage_cyc; } static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) @@ -308,9 +303,6 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) dest->period_guest_sys += src->period_guest_sys; dest->period_guest_us += src->period_guest_us; dest->nr_events += src->nr_events; - dest->weight += src->weight; - dest->ins_lat += src->ins_lat; - dest->p_stage_cyc += src->p_stage_cyc; } static void he_stat__decay(struct he_stat *he_stat) @@ -598,9 +590,6 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, struct hist_entry *he; int64_t cmp; u64 period = entry->stat.period; - u64 weight = entry->stat.weight; - u64 ins_lat = entry->stat.ins_lat; - u64 p_stage_cyc = entry->stat.p_stage_cyc; bool leftmost = true; p = &hists->entries_in->rb_root.rb_node; @@ -619,11 +608,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (!cmp) { if (sample_self) { - he_stat__add_period(&he->stat, period, weight, ins_lat, p_stage_cyc); + he_stat__add_period(&he->stat, period); hist_entry__add_callchain_period(he, period); } if (symbol_conf.cumulate_callchain) - he_stat__add_period(he->stat_acc, period, weight, ins_lat, p_stage_cyc); + he_stat__add_period(he->stat_acc, period); /* * This mem info was allocated from sample__resolve_mem @@ -733,9 +722,6 @@ __hists__add_entry(struct hists *hists, .stat = { .nr_events = 1, .period = sample->period, - .weight = sample->weight, - .ins_lat = sample->ins_lat, - .p_stage_cyc = sample->p_stage_cyc, }, .parent = sym_parent, .filtered = symbol__parent_filter(sym_parent) | al->filtered, @@ -748,6 +734,9 @@ __hists__add_entry(struct hists *hists, .raw_size = sample->raw_size, .ops = ops, .time = hist_time(sample->time), + .weight = sample->weight, + .ins_lat = sample->ins_lat, + .p_stage_cyc = sample->p_stage_cyc, }, *he = hists__findnew_entry(hists, &entry, al, sample_self); if (!hists->has_callchains && he && he->callchain_size != 0) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 5343b62476e6..621f35ae1efa 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -369,7 +369,6 @@ enum { }; void perf_hpp__init(void); -void perf_hpp__column_unregister(struct perf_hpp_fmt *format); void perf_hpp__cancel_cumulate(void); void perf_hpp__setup_output_field(struct perf_hpp_list *list); void perf_hpp__reset_output_field(struct perf_hpp_list *list); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5bfb6f892489..ba74fdf74af9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -402,8 +402,10 @@ static int add_event_tool(struct list_head *list, int *idx, if (!evsel) return -ENOMEM; evsel->tool_event = tool_event; - if (tool_event == PERF_TOOL_DURATION_TIME) - evsel->unit = "ns"; + if (tool_event == PERF_TOOL_DURATION_TIME) { + free((char *)evsel->unit); + evsel->unit = strdup("ns"); + } return 0; } @@ -1630,7 +1632,8 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (parse_state->fake_pmu) return 0; - evsel->unit = info.unit; + free((char *)evsel->unit); + evsel->unit = strdup(info.unit); evsel->scale = info.scale; evsel->per_pkg = info.per_pkg; evsel->snapshot = info.snapshot; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 568a88c001c6..a111065b484e 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1325,88 +1325,68 @@ struct sort_entry sort_mispredict = { .se_width_idx = HISTC_MISPREDICT, }; -static u64 he_weight(struct hist_entry *he) -{ - return he->stat.nr_events ? he->stat.weight / he->stat.nr_events : 0; -} - static int64_t -sort__local_weight_cmp(struct hist_entry *left, struct hist_entry *right) +sort__weight_cmp(struct hist_entry *left, struct hist_entry *right) { - return he_weight(left) - he_weight(right); + return left->weight - right->weight; } static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*llu", width, he_weight(he)); + return repsep_snprintf(bf, size, "%-*llu", width, he->weight); } struct sort_entry sort_local_weight = { .se_header = "Local Weight", - .se_cmp = sort__local_weight_cmp, + .se_cmp = sort__weight_cmp, .se_snprintf = hist_entry__local_weight_snprintf, .se_width_idx = HISTC_LOCAL_WEIGHT, }; -static int64_t -sort__global_weight_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return left->stat.weight - right->stat.weight; -} - static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*llu", width, he->stat.weight); + return repsep_snprintf(bf, size, "%-*llu", width, + he->weight * he->stat.nr_events); } struct sort_entry sort_global_weight = { .se_header = "Weight", - .se_cmp = sort__global_weight_cmp, + .se_cmp = sort__weight_cmp, .se_snprintf = hist_entry__global_weight_snprintf, .se_width_idx = HISTC_GLOBAL_WEIGHT, }; -static u64 he_ins_lat(struct hist_entry *he) -{ - return he->stat.nr_events ? he->stat.ins_lat / he->stat.nr_events : 0; -} - static int64_t -sort__local_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right) +sort__ins_lat_cmp(struct hist_entry *left, struct hist_entry *right) { - return he_ins_lat(left) - he_ins_lat(right); + return left->ins_lat - right->ins_lat; } static int hist_entry__local_ins_lat_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, he_ins_lat(he)); + return repsep_snprintf(bf, size, "%-*u", width, he->ins_lat); } struct sort_entry sort_local_ins_lat = { .se_header = "Local INSTR Latency", - .se_cmp = sort__local_ins_lat_cmp, + .se_cmp = sort__ins_lat_cmp, .se_snprintf = hist_entry__local_ins_lat_snprintf, .se_width_idx = HISTC_LOCAL_INS_LAT, }; -static int64_t -sort__global_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return left->stat.ins_lat - right->stat.ins_lat; -} - static int hist_entry__global_ins_lat_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, he->stat.ins_lat); + return repsep_snprintf(bf, size, "%-*u", width, + he->ins_lat * he->stat.nr_events); } struct sort_entry sort_global_ins_lat = { .se_header = "INSTR Latency", - .se_cmp = sort__global_ins_lat_cmp, + .se_cmp = sort__ins_lat_cmp, .se_snprintf = hist_entry__global_ins_lat_snprintf, .se_width_idx = HISTC_GLOBAL_INS_LAT, }; @@ -1414,13 +1394,13 @@ struct sort_entry sort_global_ins_lat = { static int64_t sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) { - return left->stat.p_stage_cyc - right->stat.p_stage_cyc; + return left->p_stage_cyc - right->p_stage_cyc; } static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, he->stat.p_stage_cyc); + return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc); } struct sort_entry sort_p_stage_cyc = { diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index b67c469aba79..7b7145501933 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -49,9 +49,6 @@ struct he_stat { u64 period_us; u64 period_guest_sys; u64 period_guest_us; - u64 weight; - u64 ins_lat; - u64 p_stage_cyc; u32 nr_events; }; @@ -109,6 +106,9 @@ struct hist_entry { s32 socket; s32 cpu; u64 code_page_size; + u64 weight; + u64 ins_lat; + u64 p_stage_cyc; u8 cpumode; u8 depth; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 37a9492edb3e..df3c4671be72 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -379,32 +379,32 @@ fetch_kernel_version(unsigned int *puint, char *str, return 0; } -const char *perf_tip(const char *dirpath) +int perf_tip(char **strp, const char *dirpath) { struct strlist *tips; struct str_node *node; - char *tip = NULL; struct strlist_config conf = { .dirname = dirpath, .file_only = true, }; + int ret = 0; + *strp = NULL; tips = strlist__new("tips.txt", &conf); if (tips == NULL) - return errno == ENOENT ? NULL : - "Tip: check path of tips.txt or get more memory! ;-p"; + return -errno; if (strlist__nr_entries(tips) == 0) goto out; node = strlist__entry(tips, random() % strlist__nr_entries(tips)); - if (asprintf(&tip, "Tip: %s", node->s) < 0) - tip = (char *)"Tip: get more memory! ;-)"; + if (asprintf(strp, "Tip: %s", node->s) < 0) + ret = -ENOMEM; out: strlist__delete(tips); - return tip; + return ret; } char *perf_exe(char *buf, int len) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index ad737052e597..9f0d36ba77f2 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -39,7 +39,7 @@ int fetch_kernel_version(unsigned int *puint, #define KVER_FMT "%d.%d.%d" #define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x) -const char *perf_tip(const char *dirpath); +int perf_tip(char **strp, const char *dirpath); #ifndef HAVE_SCHED_GETCPU_SUPPORT int sched_getcpu(void); diff --git a/tools/testing/radix-tree/linux/lockdep.h b/tools/testing/radix-tree/linux/lockdep.h index 565fccdfe6e9..016cff473cfc 100644 --- a/tools/testing/radix-tree/linux/lockdep.h +++ b/tools/testing/radix-tree/linux/lockdep.h @@ -1,5 +1,8 @@ #ifndef _LINUX_LOCKDEP_H #define _LINUX_LOCKDEP_H + +#include <linux/spinlock.h> + struct lock_class_key { unsigned int a; }; diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 54b0a41a3775..62fafbeb4672 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -187,7 +187,7 @@ DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF) \ - BPFTOOL_OUTPUT=$(BUILD_DIR)/bpftool/ \ + BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf \ BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \ cp $(RUNQSLOWER_OUTPUT)runqslower $@ diff --git a/tools/testing/selftests/bpf/prog_tests/helper_restricted.c b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c new file mode 100644 index 000000000000..e1de5f80c3b2 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "test_helper_restricted.skel.h" + +void test_helper_restricted(void) +{ + int prog_i = 0, prog_cnt; + int duration = 0; + + do { + struct test_helper_restricted *test; + int maybeOK; + + test = test_helper_restricted__open(); + if (!ASSERT_OK_PTR(test, "open")) + return; + + prog_cnt = test->skeleton->prog_cnt; + + for (int j = 0; j < prog_cnt; ++j) { + struct bpf_program *prog = *test->skeleton->progs[j].prog; + + maybeOK = bpf_program__set_autoload(prog, prog_i == j); + ASSERT_OK(maybeOK, "set autoload"); + } + + maybeOK = test_helper_restricted__load(test); + CHECK(!maybeOK, test->skeleton->progs[prog_i].name, "helper isn't restricted"); + + test_helper_restricted__destroy(test); + } while (++prog_i < prog_cnt); +} diff --git a/tools/testing/selftests/bpf/progs/test_helper_restricted.c b/tools/testing/selftests/bpf/progs/test_helper_restricted.c new file mode 100644 index 000000000000..68d64c365f90 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_helper_restricted.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <time.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct timer { + struct bpf_timer t; +}; + +struct lock { + struct bpf_spin_lock l; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct timer); +} timers SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct lock); +} locks SEC(".maps"); + +static int timer_cb(void *map, int *key, struct timer *timer) +{ + return 0; +} + +static void timer_work(void) +{ + struct timer *timer; + const int key = 0; + + timer = bpf_map_lookup_elem(&timers, &key); + if (timer) { + bpf_timer_init(&timer->t, &timers, CLOCK_MONOTONIC); + bpf_timer_set_callback(&timer->t, timer_cb); + bpf_timer_start(&timer->t, 10E9, 0); + bpf_timer_cancel(&timer->t); + } +} + +static void spin_lock_work(void) +{ + const int key = 0; + struct lock *lock; + + lock = bpf_map_lookup_elem(&locks, &key); + if (lock) { + bpf_spin_lock(&lock->l); + bpf_spin_unlock(&lock->l); + } +} + +SEC("raw_tp/sys_enter") +int raw_tp_timer(void *ctx) +{ + timer_work(); + + return 0; +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int tp_timer(void *ctx) +{ + timer_work(); + + return 0; +} + +SEC("kprobe/sys_nanosleep") +int kprobe_timer(void *ctx) +{ + timer_work(); + + return 0; +} + +SEC("perf_event") +int perf_event_timer(void *ctx) +{ + timer_work(); + + return 0; +} + +SEC("raw_tp/sys_enter") +int raw_tp_spin_lock(void *ctx) +{ + spin_lock_work(); + + return 0; +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int tp_spin_lock(void *ctx) +{ + spin_lock_work(); + + return 0; +} + +SEC("kprobe/sys_nanosleep") +int kprobe_spin_lock(void *ctx) +{ + spin_lock_work(); + + return 0; +} + +SEC("perf_event") +int perf_event_spin_lock(void *ctx) +{ + spin_lock_work(); + + return 0; +} + +const char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 25afe423b3f0..465ef3f112c0 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -92,6 +92,7 @@ struct bpf_test { int fixup_map_event_output[MAX_FIXUPS]; int fixup_map_reuseport_array[MAX_FIXUPS]; int fixup_map_ringbuf[MAX_FIXUPS]; + int fixup_map_timer[MAX_FIXUPS]; /* Expected verifier log output for result REJECT or VERBOSE_ACCEPT. * Can be a tab-separated sequence of expected strings. An empty string * means no log verification. @@ -604,8 +605,15 @@ static int create_cgroup_storage(bool percpu) * int cnt; * struct bpf_spin_lock l; * }; + * struct bpf_timer { + * __u64 :64; + * __u64 :64; + * } __attribute__((aligned(8))); + * struct timer { + * struct bpf_timer t; + * }; */ -static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l"; +static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t"; static __u32 btf_raw_types[] = { /* int */ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ @@ -616,6 +624,11 @@ static __u32 btf_raw_types[] = { BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */ BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */ + /* struct bpf_timer */ /* [4] */ + BTF_TYPE_ENC(25, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0), 16), + /* struct timer */ /* [5] */ + BTF_TYPE_ENC(35, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16), + BTF_MEMBER_ENC(41, 4, 0), /* struct bpf_timer t; */ }; static int load_btf(void) @@ -696,6 +709,29 @@ static int create_sk_storage_map(void) return fd; } +static int create_map_timer(void) +{ + struct bpf_create_map_attr attr = { + .name = "test_map", + .map_type = BPF_MAP_TYPE_ARRAY, + .key_size = 4, + .value_size = 16, + .max_entries = 1, + .btf_key_type_id = 1, + .btf_value_type_id = 5, + }; + int fd, btf_fd; + + btf_fd = load_btf(); + if (btf_fd < 0) + return -1; + attr.btf_fd = btf_fd; + fd = bpf_create_map_xattr(&attr); + if (fd < 0) + printf("Failed to create map with timer\n"); + return fd; +} + static char bpf_vlog[UINT_MAX >> 8]; static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, @@ -722,6 +758,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_map_event_output = test->fixup_map_event_output; int *fixup_map_reuseport_array = test->fixup_map_reuseport_array; int *fixup_map_ringbuf = test->fixup_map_ringbuf; + int *fixup_map_timer = test->fixup_map_timer; if (test->fill_helper) { test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn)); @@ -907,6 +944,13 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_map_ringbuf++; } while (*fixup_map_ringbuf); } + if (*fixup_map_timer) { + map_fds[21] = create_map_timer(); + do { + prog[*fixup_map_timer].imm = map_fds[21]; + fixup_map_timer++; + } while (*fixup_map_timer); + } } struct libcap { diff --git a/tools/testing/selftests/bpf/verifier/helper_restricted.c b/tools/testing/selftests/bpf/verifier/helper_restricted.c new file mode 100644 index 000000000000..a067b7098b97 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/helper_restricted.c @@ -0,0 +1,196 @@ +{ + "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_KPROBE", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "unknown func bpf_ktime_get_coarse_ns", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_KPROBE, +}, +{ + "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_TRACEPOINT", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "unknown func bpf_ktime_get_coarse_ns", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_PERF_EVENT", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "unknown func bpf_ktime_get_coarse_ns", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_PERF_EVENT, +}, +{ + "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "unknown func bpf_ktime_get_coarse_ns", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT, +}, +{ + "bpf_timer_init isn restricted in BPF_PROG_TYPE_KPROBE", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_EMIT_CALL(BPF_FUNC_timer_init), + BPF_EXIT_INSN(), + }, + .fixup_map_timer = { 3, 8 }, + .errstr = "tracing progs cannot use bpf_timer yet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_KPROBE, +}, +{ + "bpf_timer_init is forbidden in BPF_PROG_TYPE_PERF_EVENT", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_EMIT_CALL(BPF_FUNC_timer_init), + BPF_EXIT_INSN(), + }, + .fixup_map_timer = { 3, 8 }, + .errstr = "tracing progs cannot use bpf_timer yet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_PERF_EVENT, +}, +{ + "bpf_timer_init is forbidden in BPF_PROG_TYPE_TRACEPOINT", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_EMIT_CALL(BPF_FUNC_timer_init), + BPF_EXIT_INSN(), + }, + .fixup_map_timer = { 3, 8 }, + .errstr = "tracing progs cannot use bpf_timer yet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "bpf_timer_init is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_EMIT_CALL(BPF_FUNC_timer_init), + BPF_EXIT_INSN(), + }, + .fixup_map_timer = { 3, 8 }, + .errstr = "tracing progs cannot use bpf_timer yet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT, +}, +{ + "bpf_spin_lock is forbidden in BPF_PROG_TYPE_KPROBE", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_spin_lock), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .errstr = "tracing progs cannot use bpf_spin_lock yet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_KPROBE, +}, +{ + "bpf_spin_lock is forbidden in BPF_PROG_TYPE_TRACEPOINT", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_spin_lock), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .errstr = "tracing progs cannot use bpf_spin_lock yet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "bpf_spin_lock is forbidden in BPF_PROG_TYPE_PERF_EVENT", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_spin_lock), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .errstr = "tracing progs cannot use bpf_spin_lock yet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_PERF_EVENT, +}, +{ + "bpf_spin_lock is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_spin_lock), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .errstr = "tracing progs cannot use bpf_spin_lock yet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT, +}, diff --git a/tools/testing/selftests/bpf/verifier/map_in_map.c b/tools/testing/selftests/bpf/verifier/map_in_map.c index 2798927ee9ff..128a348b762d 100644 --- a/tools/testing/selftests/bpf/verifier/map_in_map.c +++ b/tools/testing/selftests/bpf/verifier/map_in_map.c @@ -19,6 +19,40 @@ .result = ACCEPT, }, { + "map in map state pruning", + .insns = { + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -4), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 11), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_in_map = { 4, 14 }, + .flags = BPF_F_TEST_STATE_FREQ, + .result = VERBOSE_ACCEPT, + .errstr = "processed 25 insns", + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ "invalid inner map pointer", .insns = { BPF_ST_MEM(0, BPF_REG_10, -4, 0), diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index 39f2bbe8dd3d..d7b312b44a62 100644 --- a/tools/testing/selftests/gpio/Makefile +++ b/tools/testing/selftests/gpio/Makefile @@ -3,5 +3,6 @@ TEST_PROGS := gpio-mockup.sh TEST_FILES := gpio-mockup-sysfs.sh TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev +CFLAGS += -O2 -g -Wall -I../../../../usr/include/ include ../lib.mk diff --git a/tools/testing/selftests/gpio/gpio-mockup-cdev.c b/tools/testing/selftests/gpio/gpio-mockup-cdev.c index e83eac71621a..d1640f44f8ac 100644 --- a/tools/testing/selftests/gpio/gpio-mockup-cdev.c +++ b/tools/testing/selftests/gpio/gpio-mockup-cdev.c @@ -117,7 +117,7 @@ int main(int argc, char *argv[]) { char *chip; int opt, ret, cfd, lfd; - unsigned int offset, val, abiv; + unsigned int offset, val = 0, abiv; uint32_t flags_v1; uint64_t flags_v2; diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index d4a830139683..3763105029fb 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -23,6 +23,7 @@ /x86_64/platform_info_test /x86_64/set_boot_cpu_id /x86_64/set_sregs_test +/x86_64/sev_migrate_tests /x86_64/smm_test /x86_64/state_test /x86_64/svm_vmcall_test diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 5d95113c7b7c..d8909032317a 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -47,7 +47,7 @@ #include "guest_modes.h" /* Global variable used to synchronize all of the vCPU threads. */ -static int iteration = -1; +static int iteration; /* Defines what vCPU threads should do during a given iteration. */ static enum { @@ -215,12 +215,11 @@ static bool spin_wait_for_next_iteration(int *current_iteration) return true; } -static void *vcpu_thread_main(void *arg) +static void vcpu_thread_main(struct perf_test_vcpu_args *vcpu_args) { - struct perf_test_vcpu_args *vcpu_args = arg; struct kvm_vm *vm = perf_test_args.vm; int vcpu_id = vcpu_args->vcpu_id; - int current_iteration = -1; + int current_iteration = 0; while (spin_wait_for_next_iteration(¤t_iteration)) { switch (READ_ONCE(iteration_work)) { @@ -235,8 +234,6 @@ static void *vcpu_thread_main(void *arg) vcpu_last_completed_iteration[vcpu_id] = current_iteration; } - - return NULL; } static void spin_wait_for_vcpu(int vcpu_id, int target_iteration) @@ -277,8 +274,7 @@ static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description) static void access_memory(struct kvm_vm *vm, int vcpus, enum access_type access, const char *description) { - perf_test_args.wr_fract = (access == ACCESS_READ) ? INT_MAX : 1; - sync_global_to_guest(vm, perf_test_args); + perf_test_set_wr_fract(vm, (access == ACCESS_READ) ? INT_MAX : 1); iteration_work = ITERATION_ACCESS_MEMORY; run_iteration(vm, vcpus, description); } @@ -296,48 +292,16 @@ static void mark_memory_idle(struct kvm_vm *vm, int vcpus) run_iteration(vm, vcpus, "Mark memory idle"); } -static pthread_t *create_vcpu_threads(int vcpus) -{ - pthread_t *vcpu_threads; - int i; - - vcpu_threads = malloc(vcpus * sizeof(vcpu_threads[0])); - TEST_ASSERT(vcpu_threads, "Failed to allocate vcpu_threads."); - - for (i = 0; i < vcpus; i++) { - vcpu_last_completed_iteration[i] = iteration; - pthread_create(&vcpu_threads[i], NULL, vcpu_thread_main, - &perf_test_args.vcpu_args[i]); - } - - return vcpu_threads; -} - -static void terminate_vcpu_threads(pthread_t *vcpu_threads, int vcpus) -{ - int i; - - /* Set done to signal the vCPU threads to exit */ - done = true; - - for (i = 0; i < vcpus; i++) - pthread_join(vcpu_threads[i], NULL); -} - static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *params = arg; struct kvm_vm *vm; - pthread_t *vcpu_threads; int vcpus = params->vcpus; vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes, 1, - params->backing_src); + params->backing_src, !overlap_memory_access); - perf_test_setup_vcpus(vm, vcpus, params->vcpu_memory_bytes, - !overlap_memory_access); - - vcpu_threads = create_vcpu_threads(vcpus); + perf_test_start_vcpu_threads(vcpus, vcpu_thread_main); pr_info("\n"); access_memory(vm, vcpus, ACCESS_WRITE, "Populating memory"); @@ -352,8 +316,10 @@ static void run_test(enum vm_guest_mode mode, void *arg) mark_memory_idle(vm, vcpus); access_memory(vm, vcpus, ACCESS_READ, "Reading from idle memory"); - terminate_vcpu_threads(vcpu_threads, vcpus); - free(vcpu_threads); + /* Set done to signal the vCPU threads to exit */ + done = true; + + perf_test_join_vcpu_threads(vcpus); perf_test_destroy_vm(vm); } diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 1510b21e6306..6a719d065599 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -42,10 +42,9 @@ static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static size_t demand_paging_size; static char *guest_data_prototype; -static void *vcpu_worker(void *data) +static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) { int ret; - struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; int vcpu_id = vcpu_args->vcpu_id; struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; @@ -68,8 +67,6 @@ static void *vcpu_worker(void *data) ts_diff = timespec_elapsed(start); PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id, ts_diff.tv_sec, ts_diff.tv_nsec); - - return NULL; } static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr) @@ -282,7 +279,6 @@ struct test_params { static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *p = arg; - pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; struct uffd_handler_args *uffd_args = NULL; struct timespec start; @@ -293,9 +289,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) int r; vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, - p->src_type); - - perf_test_args.wr_fract = 1; + p->src_type, p->partition_vcpu_memory_access); demand_paging_size = get_backing_src_pagesz(p->src_type); @@ -304,12 +298,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) "Failed to allocate buffer for guest data pattern"); memset(guest_data_prototype, 0xAB, demand_paging_size); - vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); - TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - - perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size, - p->partition_vcpu_memory_access); - if (p->uffd_mode) { uffd_handler_threads = malloc(nr_vcpus * sizeof(*uffd_handler_threads)); @@ -322,26 +310,15 @@ static void run_test(enum vm_guest_mode mode, void *arg) TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { - vm_paddr_t vcpu_gpa; + struct perf_test_vcpu_args *vcpu_args; void *vcpu_hva; void *vcpu_alias; - uint64_t vcpu_mem_size; - - if (p->partition_vcpu_memory_access) { - vcpu_gpa = guest_test_phys_mem + - (vcpu_id * guest_percpu_mem_size); - vcpu_mem_size = guest_percpu_mem_size; - } else { - vcpu_gpa = guest_test_phys_mem; - vcpu_mem_size = guest_percpu_mem_size * nr_vcpus; - } - PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size); + vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; /* Cache the host addresses of the region */ - vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); - vcpu_alias = addr_gpa2alias(vm, vcpu_gpa); + vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa); + vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa); /* * Set up user fault fd to handle demand paging @@ -355,32 +332,18 @@ static void run_test(enum vm_guest_mode mode, void *arg) pipefds[vcpu_id * 2], p->uffd_mode, p->uffd_delay, &uffd_args[vcpu_id], vcpu_hva, vcpu_alias, - vcpu_mem_size); + vcpu_args->pages * perf_test_args.guest_page_size); } } - /* Export the shared variables to the guest */ - sync_global_to_guest(vm, perf_test_args); - pr_info("Finished creating vCPUs and starting uffd threads\n"); clock_gettime(CLOCK_MONOTONIC, &start); - - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { - pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, - &perf_test_args.vcpu_args[vcpu_id]); - } - + perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); pr_info("Started all vCPUs\n"); - /* Wait for the vcpu threads to quit */ - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { - pthread_join(vcpu_threads[vcpu_id], NULL); - PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); - } - + perf_test_join_vcpu_threads(nr_vcpus); ts_diff = timespec_elapsed(start); - pr_info("All vCPU threads joined\n"); if (p->uffd_mode) { @@ -404,7 +367,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) perf_test_destroy_vm(vm); free(guest_data_prototype); - free(vcpu_threads); if (p->uffd_mode) { free(uffd_handler_threads); free(uffd_args); diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 7ffab5bd5ce5..1954b964d1cf 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -31,7 +31,7 @@ static bool host_quit; static int iteration; static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; -static void *vcpu_worker(void *data) +static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) { int ret; struct kvm_vm *vm = perf_test_args.vm; @@ -41,7 +41,6 @@ static void *vcpu_worker(void *data) struct timespec ts_diff; struct timespec total = (struct timespec){0}; struct timespec avg; - struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; int vcpu_id = vcpu_args->vcpu_id; run = vcpu_state(vm, vcpu_id); @@ -83,8 +82,6 @@ static void *vcpu_worker(void *data) pr_debug("\nvCPU %d dirtied 0x%lx pages over %d iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id], total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec); - - return NULL; } struct test_params { @@ -170,7 +167,6 @@ static void free_bitmaps(unsigned long *bitmaps[], int slots) static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *p = arg; - pthread_t *vcpu_threads; struct kvm_vm *vm; unsigned long **bitmaps; uint64_t guest_num_pages; @@ -186,9 +182,10 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct timespec clear_dirty_log_total = (struct timespec){0}; vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, - p->slots, p->backing_src); + p->slots, p->backing_src, + p->partition_vcpu_memory_access); - perf_test_args.wr_fract = p->wr_fract; + perf_test_set_wr_fract(vm, p->wr_fract); guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm); guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); @@ -203,25 +200,15 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm_enable_cap(vm, &cap); } - vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); - TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - - perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size, - p->partition_vcpu_memory_access); - - sync_global_to_guest(vm, perf_test_args); - /* Start the iterations */ iteration = 0; host_quit = false; clock_gettime(CLOCK_MONOTONIC, &start); - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) vcpu_last_completed_iteration[vcpu_id] = -1; - pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, - &perf_test_args.vcpu_args[vcpu_id]); - } + perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); /* Allow the vCPUs to populate memory */ pr_debug("Starting iteration %d - Populating\n", iteration); @@ -290,8 +277,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Tell the vcpu thread to quit */ host_quit = true; - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) - pthread_join(vcpu_threads[vcpu_id], NULL); + perf_test_join_vcpu_threads(nr_vcpus); avg = timespec_div(get_dirty_log_total, p->iterations); pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", @@ -306,7 +292,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) } free_bitmaps(bitmaps, p->slots); - free(vcpu_threads); perf_test_destroy_vm(vm); } diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 792c60e1b17d..3fcd89e195c7 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -115,7 +115,7 @@ static void guest_code(void) addr = guest_test_virt_mem; addr += (READ_ONCE(random_array[i]) % guest_num_pages) * guest_page_size; - addr &= ~(host_page_size - 1); + addr = align_down(addr, host_page_size); *(uint64_t *)addr = READ_ONCE(iteration); } @@ -737,14 +737,14 @@ static void run_test(enum vm_guest_mode mode, void *arg) if (!p->phys_offset) { guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * guest_page_size; - guest_test_phys_mem &= ~(host_page_size - 1); + guest_test_phys_mem = align_down(guest_test_phys_mem, host_page_size); } else { guest_test_phys_mem = p->phys_offset; } #ifdef __s390x__ /* Align to 1M (segment size) */ - guest_test_phys_mem &= ~((1 << 20) - 1); + guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20); #endif pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index df9f1a3a3ffb..a86f953d8d36 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -8,6 +8,8 @@ #ifndef SELFTEST_KVM_PERF_TEST_UTIL_H #define SELFTEST_KVM_PERF_TEST_UTIL_H +#include <pthread.h> + #include "kvm_util.h" /* Default guest test virtual memory offset */ @@ -18,6 +20,7 @@ #define PERF_TEST_MEM_SLOT_INDEX 1 struct perf_test_vcpu_args { + uint64_t gpa; uint64_t gva; uint64_t pages; @@ -27,7 +30,7 @@ struct perf_test_vcpu_args { struct perf_test_args { struct kvm_vm *vm; - uint64_t host_page_size; + uint64_t gpa; uint64_t guest_page_size; int wr_fract; @@ -36,19 +39,15 @@ struct perf_test_args { extern struct perf_test_args perf_test_args; -/* - * Guest physical memory offset of the testing memory slot. - * This will be set to the topmost valid physical address minus - * the test memory size. - */ -extern uint64_t guest_test_phys_mem; - struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, uint64_t vcpu_memory_bytes, int slots, - enum vm_mem_backing_src_type backing_src); + enum vm_mem_backing_src_type backing_src, + bool partition_vcpu_memory_access); void perf_test_destroy_vm(struct kvm_vm *vm); -void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, - uint64_t vcpu_memory_bytes, - bool partition_vcpu_memory_access); + +void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract); + +void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)); +void perf_test_join_vcpu_threads(int vcpus); #endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index f8fddc84c0d3..99e0dcdc923f 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -104,6 +104,7 @@ size_t get_trans_hugepagesz(void); size_t get_def_hugetlb_pagesz(void); const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i); size_t get_backing_src_pagesz(uint32_t i); +bool is_backing_src_hugetlb(uint32_t i); void backing_src_help(const char *flag); enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); long get_run_delay(void); @@ -117,4 +118,29 @@ static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t) return vm_mem_backing_src_alias(t)->flag & MAP_SHARED; } +/* Aligns x up to the next multiple of size. Size must be a power of 2. */ +static inline uint64_t align_up(uint64_t x, uint64_t size) +{ + uint64_t mask = size - 1; + + TEST_ASSERT(size != 0 && !(size & (size - 1)), + "size not a power of 2: %lu", size); + return ((x + mask) & ~mask); +} + +static inline uint64_t align_down(uint64_t x, uint64_t size) +{ + uint64_t x_aligned_up = align_up(x, size); + + if (x == x_aligned_up) + return x; + else + return x_aligned_up - size; +} + +static inline void *align_ptr_up(void *x, size_t size) +{ + return (void *)align_up((unsigned long)x, size); +} + #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index f968dfd4ee88..aed9dc3ca1e9 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -12,6 +12,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/resource.h> #include "test_util.h" @@ -40,11 +41,40 @@ int main(int argc, char *argv[]) { int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID); int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); + /* + * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds + + * an arbitrary number for everything else. + */ + int nr_fds_wanted = kvm_max_vcpus + 100; + struct rlimit rl; pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id); pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus); /* + * Check that we're allowed to open nr_fds_wanted file descriptors and + * try raising the limits if needed. + */ + TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!"); + + if (rl.rlim_cur < nr_fds_wanted) { + rl.rlim_cur = nr_fds_wanted; + if (rl.rlim_max < nr_fds_wanted) { + int old_rlim_max = rl.rlim_max; + rl.rlim_max = nr_fds_wanted; + + int r = setrlimit(RLIMIT_NOFILE, &rl); + if (r < 0) { + printf("RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n", + old_rlim_max, nr_fds_wanted); + exit(KSFT_SKIP); + } + } else { + TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); + } + } + + /* * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID. * Userspace is supposed to use KVM_CAP_MAX_VCPUS as the maximum ID * in this case. diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index 36407cb0ec85..ba1fdc3dcf4a 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -280,7 +280,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) #ifdef __s390x__ alignment = max(0x100000, alignment); #endif - guest_test_phys_mem &= ~(alignment - 1); + guest_test_phys_mem = align_down(guest_test_phys_mem, alignment); /* Set up the shared data structure test_args */ test_args.vm = vm; diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index eac44f5d0db0..13e8e3dcf984 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -157,8 +157,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) "memsize of 0,\n" " phdr index: %u p_memsz: 0x%" PRIx64, n1, (uint64_t) phdr.p_memsz); - vm_vaddr_t seg_vstart = phdr.p_vaddr; - seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1); + vm_vaddr_t seg_vstart = align_down(phdr.p_vaddr, vm->page_size); vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1; seg_vend |= vm->page_size - 1; size_t seg_size = seg_vend - seg_vstart + 1; diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 14bb4d5b6bb7..8f2e0bb1ef96 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -22,15 +22,6 @@ static int vcpu_mmap_sz(void); -/* Aligns x up to the next multiple of size. Size must be a power of 2. */ -static void *align(void *x, size_t size) -{ - size_t mask = size - 1; - TEST_ASSERT(size != 0 && !(size & (size - 1)), - "size not a power of 2: %lu", size); - return (void *) (((size_t) x + mask) & ~mask); -} - int open_path_or_exit(const char *path, int flags) { int fd; @@ -191,15 +182,15 @@ const char *vm_guest_mode_string(uint32_t i) } const struct vm_guest_mode_params vm_guest_mode_params[] = { - { 52, 48, 0x1000, 12 }, - { 52, 48, 0x10000, 16 }, - { 48, 48, 0x1000, 12 }, - { 48, 48, 0x10000, 16 }, - { 40, 48, 0x1000, 12 }, - { 40, 48, 0x10000, 16 }, - { 0, 0, 0x1000, 12 }, - { 47, 64, 0x1000, 12 }, - { 44, 64, 0x1000, 12 }, + [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 }, + [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 }, + [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 }, + [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 }, + [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, + [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, + [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, + [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, + [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, }; _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); @@ -879,9 +870,17 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, alignment = 1; #endif + /* + * When using THP mmap is not guaranteed to returned a hugepage aligned + * address so we have to pad the mmap. Padding is not needed for HugeTLB + * because mmap will always return an address aligned to the HugeTLB + * page size. + */ if (src_type == VM_MEM_SRC_ANONYMOUS_THP) alignment = max(backing_src_pagesz, alignment); + ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); + /* Add enough memory to align up if necessary */ if (alignment > 1) region->mmap_size += alignment; @@ -914,8 +913,13 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, "test_malloc failed, mmap_start: %p errno: %i", region->mmap_start, errno); + TEST_ASSERT(!is_backing_src_hugetlb(src_type) || + region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), + "mmap_start %p is not aligned to HugeTLB page size 0x%lx", + region->mmap_start, backing_src_pagesz); + /* Align host address */ - region->host_mem = align(region->mmap_start, alignment); + region->host_mem = align_ptr_up(region->mmap_start, alignment); /* As needed perform madvise */ if ((src_type == VM_MEM_SRC_ANONYMOUS || @@ -958,7 +962,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, "mmap of alias failed, errno: %i", errno); /* Align host alias address */ - region->host_alias = align(region->mmap_alias, alignment); + region->host_alias = align_ptr_up(region->mmap_alias, alignment); } } diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c index 0ef80dbdc116..722df3a28791 100644 --- a/tools/testing/selftests/kvm/lib/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -10,21 +10,40 @@ struct perf_test_args perf_test_args; -uint64_t guest_test_phys_mem; - /* * Guest virtual memory offset of the testing memory slot. * Must not conflict with identity mapped test code. */ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; +struct vcpu_thread { + /* The id of the vCPU. */ + int vcpu_id; + + /* The pthread backing the vCPU. */ + pthread_t thread; + + /* Set to true once the vCPU thread is up and running. */ + bool running; +}; + +/* The vCPU threads involved in this test. */ +static struct vcpu_thread vcpu_threads[KVM_MAX_VCPUS]; + +/* The function run by each vCPU thread, as provided by the test. */ +static void (*vcpu_thread_fn)(struct perf_test_vcpu_args *); + +/* Set to true once all vCPU threads are up and running. */ +static bool all_vcpu_threads_running; + /* * Continuously write to the first 8 bytes of each page in the * specified region. */ static void guest_code(uint32_t vcpu_id) { - struct perf_test_vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + struct perf_test_args *pta = &perf_test_args; + struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_id]; uint64_t gva; uint64_t pages; int i; @@ -37,9 +56,9 @@ static void guest_code(uint32_t vcpu_id) while (true) { for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * perf_test_args.guest_page_size); + uint64_t addr = gva + (i * pta->guest_page_size); - if (i % perf_test_args.wr_fract == 0) + if (i % pta->wr_fract == 0) *(uint64_t *)addr = 0x0123456789ABCDEF; else READ_ONCE(*(uint64_t *)addr); @@ -49,35 +68,81 @@ static void guest_code(uint32_t vcpu_id) } } +void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, + uint64_t vcpu_memory_bytes, + bool partition_vcpu_memory_access) +{ + struct perf_test_args *pta = &perf_test_args; + struct perf_test_vcpu_args *vcpu_args; + int vcpu_id; + + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + vcpu_args = &pta->vcpu_args[vcpu_id]; + + vcpu_args->vcpu_id = vcpu_id; + if (partition_vcpu_memory_access) { + vcpu_args->gva = guest_test_virt_mem + + (vcpu_id * vcpu_memory_bytes); + vcpu_args->pages = vcpu_memory_bytes / + pta->guest_page_size; + vcpu_args->gpa = pta->gpa + (vcpu_id * vcpu_memory_bytes); + } else { + vcpu_args->gva = guest_test_virt_mem; + vcpu_args->pages = (vcpus * vcpu_memory_bytes) / + pta->guest_page_size; + vcpu_args->gpa = pta->gpa; + } + + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + + pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_args->gpa, vcpu_args->gpa + + (vcpu_args->pages * pta->guest_page_size)); + } +} + struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, uint64_t vcpu_memory_bytes, int slots, - enum vm_mem_backing_src_type backing_src) + enum vm_mem_backing_src_type backing_src, + bool partition_vcpu_memory_access) { + struct perf_test_args *pta = &perf_test_args; struct kvm_vm *vm; uint64_t guest_num_pages; + uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src); int i; pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - perf_test_args.host_page_size = getpagesize(); - perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size; + /* By default vCPUs will write to memory. */ + pta->wr_fract = 1; + + /* + * Snapshot the non-huge page size. This is used by the guest code to + * access/dirty pages at the logging granularity. + */ + pta->guest_page_size = vm_guest_mode_params[mode].page_size; guest_num_pages = vm_adjust_num_guest_pages(mode, - (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size); + (vcpus * vcpu_memory_bytes) / pta->guest_page_size); - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, + TEST_ASSERT(vcpu_memory_bytes % getpagesize() == 0, "Guest memory size is not host page size aligned."); - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, + TEST_ASSERT(vcpu_memory_bytes % pta->guest_page_size == 0, "Guest memory size is not guest page size aligned."); TEST_ASSERT(guest_num_pages % slots == 0, "Guest memory cannot be evenly divided into %d slots.", slots); + /* + * Pass guest_num_pages to populate the page tables for test memory. + * The memory is also added to memslot 0, but that's a benign side + * effect as KVM allows aliasing HVAs in meslots. + */ vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES, - (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size, - 0, guest_code, NULL); + guest_num_pages, 0, guest_code, NULL); - perf_test_args.vm = vm; + pta->vm = vm; /* * If there should be more memory in the guest test region than there @@ -90,20 +155,18 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, guest_num_pages, vm_get_max_gfn(vm), vcpus, vcpu_memory_bytes); - guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * - perf_test_args.guest_page_size; - guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); + pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size; + pta->gpa = align_down(pta->gpa, backing_src_pagesz); #ifdef __s390x__ /* Align to 1M (segment size) */ - guest_test_phys_mem &= ~((1 << 20) - 1); + pta->gpa = align_down(pta->gpa, 1 << 20); #endif - pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); + pr_info("guest physical test memory offset: 0x%lx\n", pta->gpa); /* Add extra memory slots for testing */ for (i = 0; i < slots; i++) { uint64_t region_pages = guest_num_pages / slots; - vm_paddr_t region_start = guest_test_phys_mem + - region_pages * perf_test_args.guest_page_size * i; + vm_paddr_t region_start = pta->gpa + region_pages * pta->guest_page_size * i; vm_userspace_mem_region_add(vm, backing_src, region_start, PERF_TEST_MEM_SLOT_INDEX + i, @@ -111,10 +174,15 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, } /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages); + virt_map(vm, guest_test_virt_mem, pta->gpa, guest_num_pages); + + perf_test_setup_vcpus(vm, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access); ucall_init(vm, NULL); + /* Export the shared variables to the guest. */ + sync_global_to_guest(vm, perf_test_args); + return vm; } @@ -124,36 +192,60 @@ void perf_test_destroy_vm(struct kvm_vm *vm) kvm_vm_free(vm); } -void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, - uint64_t vcpu_memory_bytes, - bool partition_vcpu_memory_access) +void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract) +{ + perf_test_args.wr_fract = wr_fract; + sync_global_to_guest(vm, perf_test_args); +} + +static void *vcpu_thread_main(void *data) +{ + struct vcpu_thread *vcpu = data; + + WRITE_ONCE(vcpu->running, true); + + /* + * Wait for all vCPU threads to be up and running before calling the test- + * provided vCPU thread function. This prevents thread creation (which + * requires taking the mmap_sem in write mode) from interfering with the + * guest faulting in its memory. + */ + while (!READ_ONCE(all_vcpu_threads_running)) + ; + + vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu->vcpu_id]); + + return NULL; +} + +void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)) { - vm_paddr_t vcpu_gpa; - struct perf_test_vcpu_args *vcpu_args; int vcpu_id; + vcpu_thread_fn = vcpu_fn; + WRITE_ONCE(all_vcpu_threads_running, false); + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + struct vcpu_thread *vcpu = &vcpu_threads[vcpu_id]; - vcpu_args->vcpu_id = vcpu_id; - if (partition_vcpu_memory_access) { - vcpu_args->gva = guest_test_virt_mem + - (vcpu_id * vcpu_memory_bytes); - vcpu_args->pages = vcpu_memory_bytes / - perf_test_args.guest_page_size; - vcpu_gpa = guest_test_phys_mem + - (vcpu_id * vcpu_memory_bytes); - } else { - vcpu_args->gva = guest_test_virt_mem; - vcpu_args->pages = (vcpus * vcpu_memory_bytes) / - perf_test_args.guest_page_size; - vcpu_gpa = guest_test_phys_mem; - } + vcpu->vcpu_id = vcpu_id; + WRITE_ONCE(vcpu->running, false); - vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + pthread_create(&vcpu->thread, NULL, vcpu_thread_main, vcpu); + } - pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + - (vcpu_args->pages * perf_test_args.guest_page_size)); + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + while (!READ_ONCE(vcpu_threads[vcpu_id].running)) + ; } + + WRITE_ONCE(all_vcpu_threads_running, true); +} + +void perf_test_join_vcpu_threads(int vcpus) +{ + int vcpu_id; + + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) + pthread_join(vcpu_threads[vcpu_id].thread, NULL); } diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index b72429108993..6d23878bbfe1 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -283,6 +283,11 @@ size_t get_backing_src_pagesz(uint32_t i) } } +bool is_backing_src_hugetlb(uint32_t i) +{ + return !!(vm_mem_backing_src_alias(i)->flag & MAP_HUGETLB); +} + static void print_available_backing_src_types(const char *prefix) { int i; diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 4cfcafea9f5a..1410d0a9141a 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -36,11 +36,9 @@ static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static bool run_vcpus = true; -static void *vcpu_worker(void *data) +static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) { int ret; - struct perf_test_vcpu_args *vcpu_args = - (struct perf_test_vcpu_args *)data; int vcpu_id = vcpu_args->vcpu_id; struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; @@ -59,8 +57,6 @@ static void *vcpu_worker(void *data) "Invalid guest sync status: exit_reason=%s\n", exit_reason_str(run->exit_reason)); } - - return NULL; } struct memslot_antagonist_args { @@ -80,7 +76,7 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, * Add the dummy memslot just below the perf_test_util memslot, which is * at the top of the guest physical address space. */ - gpa = guest_test_phys_mem - pages * vm_get_page_size(vm); + gpa = perf_test_args.gpa - pages * vm_get_page_size(vm); for (i = 0; i < nr_modifications; i++) { usleep(delay); @@ -100,29 +96,15 @@ struct test_params { static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *p = arg; - pthread_t *vcpu_threads; struct kvm_vm *vm; - int vcpu_id; vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, - VM_MEM_SRC_ANONYMOUS); - - perf_test_args.wr_fract = 1; - - vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); - TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - - perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size, - p->partition_vcpu_memory_access); - - /* Export the shared variables to the guest */ - sync_global_to_guest(vm, perf_test_args); + VM_MEM_SRC_ANONYMOUS, + p->partition_vcpu_memory_access); pr_info("Finished creating vCPUs\n"); - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) - pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, - &perf_test_args.vcpu_args[vcpu_id]); + perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); pr_info("Started all vCPUs\n"); @@ -131,16 +113,10 @@ static void run_test(enum vm_guest_mode mode, void *arg) run_vcpus = false; - /* Wait for the vcpu threads to quit */ - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) - pthread_join(vcpu_threads[vcpu_id], NULL); - + perf_test_join_vcpu_threads(nr_vcpus); pr_info("All vCPU threads joined\n"); - ucall_uninit(vm); - kvm_vm_free(vm); - - free(vcpu_threads); + perf_test_destroy_vm(vm); } static void help(char *name) diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 91d88aaa9899..672915ce73d8 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -165,10 +165,10 @@ static void hv_set_cpuid(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid, vcpu_set_cpuid(vm, VCPU_ID, cpuid); } -static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr, - struct kvm_cpuid2 *best) +static void guest_test_msrs_access(void) { struct kvm_run *run; + struct kvm_vm *vm; struct ucall uc; int stage = 0, r; struct kvm_cpuid_entry2 feat = { @@ -180,11 +180,34 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr, struct kvm_cpuid_entry2 dbg = { .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES }; - struct kvm_enable_cap cap = {0}; - - run = vcpu_state(vm, VCPU_ID); + struct kvm_cpuid2 *best; + vm_vaddr_t msr_gva; + struct kvm_enable_cap cap = { + .cap = KVM_CAP_HYPERV_ENFORCE_CPUID, + .args = {1} + }; + struct msr_data *msr; while (true) { + vm = vm_create_default(VCPU_ID, 0, guest_msr); + + msr_gva = vm_vaddr_alloc_page(vm); + memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize()); + msr = addr_gva2hva(vm, msr_gva); + + vcpu_args_set(vm, VCPU_ID, 1, msr_gva); + vcpu_enable_cap(vm, VCPU_ID, &cap); + + vcpu_set_hv_cpuid(vm, VCPU_ID); + + best = kvm_get_supported_hv_cpuid(); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); + + run = vcpu_state(vm, VCPU_ID); + switch (stage) { case 0: /* @@ -315,6 +338,7 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr, * capability enabled and guest visible CPUID bit unset. */ cap.cap = KVM_CAP_HYPERV_SYNIC2; + cap.args[0] = 0; vcpu_enable_cap(vm, VCPU_ID, &cap); break; case 22: @@ -461,9 +485,9 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr, switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_SYNC: - TEST_ASSERT(uc.args[1] == stage, - "Unexpected stage: %ld (%d expected)\n", - uc.args[1], stage); + TEST_ASSERT(uc.args[1] == 0, + "Unexpected stage: %ld (0 expected)\n", + uc.args[1]); break; case UCALL_ABORT: TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], @@ -474,13 +498,14 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr, } stage++; + kvm_vm_free(vm); } } -static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall, - void *input, void *output, struct kvm_cpuid2 *best) +static void guest_test_hcalls_access(void) { struct kvm_run *run; + struct kvm_vm *vm; struct ucall uc; int stage = 0, r; struct kvm_cpuid_entry2 feat = { @@ -493,10 +518,38 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall struct kvm_cpuid_entry2 dbg = { .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES }; - - run = vcpu_state(vm, VCPU_ID); + struct kvm_enable_cap cap = { + .cap = KVM_CAP_HYPERV_ENFORCE_CPUID, + .args = {1} + }; + vm_vaddr_t hcall_page, hcall_params; + struct hcall_data *hcall; + struct kvm_cpuid2 *best; while (true) { + vm = vm_create_default(VCPU_ID, 0, guest_hcall); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); + + /* Hypercall input/output */ + hcall_page = vm_vaddr_alloc_pages(vm, 2); + hcall = addr_gva2hva(vm, hcall_page); + memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); + + hcall_params = vm_vaddr_alloc_page(vm); + memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize()); + + vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params); + vcpu_enable_cap(vm, VCPU_ID, &cap); + + vcpu_set_hv_cpuid(vm, VCPU_ID); + + best = kvm_get_supported_hv_cpuid(); + + run = vcpu_state(vm, VCPU_ID); + switch (stage) { case 0: hcall->control = 0xdeadbeef; @@ -606,9 +659,9 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_SYNC: - TEST_ASSERT(uc.args[1] == stage, - "Unexpected stage: %ld (%d expected)\n", - uc.args[1], stage); + TEST_ASSERT(uc.args[1] == 0, + "Unexpected stage: %ld (0 expected)\n", + uc.args[1]); break; case UCALL_ABORT: TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], @@ -619,66 +672,15 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall } stage++; + kvm_vm_free(vm); } } int main(void) { - struct kvm_cpuid2 *best; - struct kvm_vm *vm; - vm_vaddr_t msr_gva, hcall_page, hcall_params; - struct kvm_enable_cap cap = { - .cap = KVM_CAP_HYPERV_ENFORCE_CPUID, - .args = {1} - }; - - /* Test MSRs */ - vm = vm_create_default(VCPU_ID, 0, guest_msr); - - msr_gva = vm_vaddr_alloc_page(vm); - memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize()); - vcpu_args_set(vm, VCPU_ID, 1, msr_gva); - vcpu_enable_cap(vm, VCPU_ID, &cap); - - vcpu_set_hv_cpuid(vm, VCPU_ID); - - best = kvm_get_supported_hv_cpuid(); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); - pr_info("Testing access to Hyper-V specific MSRs\n"); - guest_test_msrs_access(vm, addr_gva2hva(vm, msr_gva), - best); - kvm_vm_free(vm); - - /* Test hypercalls */ - vm = vm_create_default(VCPU_ID, 0, guest_hcall); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); - - /* Hypercall input/output */ - hcall_page = vm_vaddr_alloc_pages(vm, 2); - memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); - - hcall_params = vm_vaddr_alloc_page(vm); - memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize()); - - vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params); - vcpu_enable_cap(vm, VCPU_ID, &cap); - - vcpu_set_hv_cpuid(vm, VCPU_ID); - - best = kvm_get_supported_hv_cpuid(); + guest_test_msrs_access(); pr_info("Testing access to Hyper-V hypercalls\n"); - guest_test_hcalls_access(vm, addr_gva2hva(vm, hcall_params), - addr_gva2hva(vm, hcall_page), - addr_gva2hva(vm, hcall_page) + getpagesize(), - best); - - kvm_vm_free(vm); + guest_test_hcalls_access(); } diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c index 5ba325cd64bf..29b18d565cf4 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c +++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c @@ -54,12 +54,15 @@ static struct kvm_vm *sev_vm_create(bool es) return vm; } -static struct kvm_vm *__vm_create(void) +static struct kvm_vm *aux_vm_create(bool with_vcpus) { struct kvm_vm *vm; int i; vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); + if (!with_vcpus) + return vm; + for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i) vm_vcpu_add(vm, i); @@ -89,11 +92,11 @@ static void test_sev_migrate_from(bool es) { struct kvm_vm *src_vm; struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS]; - int i; + int i, ret; src_vm = sev_vm_create(es); for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i) - dst_vms[i] = __vm_create(); + dst_vms[i] = aux_vm_create(true); /* Initial migration from the src to the first dst. */ sev_migrate_from(dst_vms[0]->fd, src_vm->fd); @@ -102,7 +105,10 @@ static void test_sev_migrate_from(bool es) sev_migrate_from(dst_vms[i]->fd, dst_vms[i - 1]->fd); /* Migrate the guest back to the original VM. */ - sev_migrate_from(src_vm->fd, dst_vms[NR_MIGRATE_TEST_VMS - 1]->fd); + ret = __sev_migrate_from(src_vm->fd, dst_vms[NR_MIGRATE_TEST_VMS - 1]->fd); + TEST_ASSERT(ret == -1 && errno == EIO, + "VM that was migrated from should be dead. ret %d, errno: %d\n", ret, + errno); kvm_vm_free(src_vm); for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i) @@ -146,6 +152,8 @@ static void test_sev_migrate_locking(void) for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) pthread_join(pt[i], NULL); + for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) + kvm_vm_free(input[i].vm); } static void test_sev_migrate_parameters(void) @@ -157,12 +165,11 @@ static void test_sev_migrate_parameters(void) sev_vm = sev_vm_create(/* es= */ false); sev_es_vm = sev_vm_create(/* es= */ true); vm_no_vcpu = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); - vm_no_sev = __vm_create(); + vm_no_sev = aux_vm_create(true); sev_es_vm_no_vmsa = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL); vm_vcpu_add(sev_es_vm_no_vmsa, 1); - ret = __sev_migrate_from(sev_vm->fd, sev_es_vm->fd); TEST_ASSERT( ret == -1 && errno == EINVAL, @@ -191,13 +198,151 @@ static void test_sev_migrate_parameters(void) TEST_ASSERT(ret == -1 && errno == EINVAL, "Migrations require SEV enabled. ret %d, errno: %d\n", ret, errno); + + kvm_vm_free(sev_vm); + kvm_vm_free(sev_es_vm); + kvm_vm_free(sev_es_vm_no_vmsa); + kvm_vm_free(vm_no_vcpu); + kvm_vm_free(vm_no_sev); +} + +static int __sev_mirror_create(int dst_fd, int src_fd) +{ + struct kvm_enable_cap cap = { + .cap = KVM_CAP_VM_COPY_ENC_CONTEXT_FROM, + .args = { src_fd } + }; + + return ioctl(dst_fd, KVM_ENABLE_CAP, &cap); +} + + +static void sev_mirror_create(int dst_fd, int src_fd) +{ + int ret; + + ret = __sev_mirror_create(dst_fd, src_fd); + TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d\n", ret, errno); +} + +static void test_sev_mirror(bool es) +{ + struct kvm_vm *src_vm, *dst_vm; + struct kvm_sev_launch_start start = { + .policy = es ? SEV_POLICY_ES : 0 + }; + int i; + + src_vm = sev_vm_create(es); + dst_vm = aux_vm_create(false); + + sev_mirror_create(dst_vm->fd, src_vm->fd); + + /* Check that we can complete creation of the mirror VM. */ + for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i) + vm_vcpu_add(dst_vm, i); + sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_START, &start); + if (es) + sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); + + kvm_vm_free(src_vm); + kvm_vm_free(dst_vm); +} + +static void test_sev_mirror_parameters(void) +{ + struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_with_vcpu; + int ret; + + sev_vm = sev_vm_create(/* es= */ false); + sev_es_vm = sev_vm_create(/* es= */ true); + vm_with_vcpu = aux_vm_create(true); + vm_no_vcpu = aux_vm_create(false); + + ret = __sev_mirror_create(sev_vm->fd, sev_vm->fd); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "Should not be able copy context to self. ret: %d, errno: %d\n", + ret, errno); + + ret = __sev_mirror_create(sev_vm->fd, sev_es_vm->fd); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d\n", + ret, errno); + + ret = __sev_mirror_create(sev_es_vm->fd, sev_vm->fd); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d\n", + ret, errno); + + ret = __sev_mirror_create(vm_no_vcpu->fd, vm_with_vcpu->fd); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "Copy context requires SEV enabled. ret %d, errno: %d\n", ret, + errno); + + ret = __sev_mirror_create(vm_with_vcpu->fd, sev_vm->fd); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n", + ret, errno); + + kvm_vm_free(sev_vm); + kvm_vm_free(sev_es_vm); + kvm_vm_free(vm_with_vcpu); + kvm_vm_free(vm_no_vcpu); +} + +static void test_sev_move_copy(void) +{ + struct kvm_vm *dst_vm, *sev_vm, *mirror_vm, *dst_mirror_vm; + int ret; + + sev_vm = sev_vm_create(/* es= */ false); + dst_vm = aux_vm_create(true); + mirror_vm = aux_vm_create(false); + dst_mirror_vm = aux_vm_create(false); + + sev_mirror_create(mirror_vm->fd, sev_vm->fd); + ret = __sev_migrate_from(dst_vm->fd, sev_vm->fd); + TEST_ASSERT(ret == -1 && errno == EBUSY, + "Cannot migrate VM that has mirrors. ret %d, errno: %d\n", ret, + errno); + + /* The mirror itself can be migrated. */ + sev_migrate_from(dst_mirror_vm->fd, mirror_vm->fd); + ret = __sev_migrate_from(dst_vm->fd, sev_vm->fd); + TEST_ASSERT(ret == -1 && errno == EBUSY, + "Cannot migrate VM that has mirrors. ret %d, errno: %d\n", ret, + errno); + + /* + * mirror_vm is not a mirror anymore, dst_mirror_vm is. Thus, + * the owner can be copied as soon as dst_mirror_vm is gone. + */ + kvm_vm_free(dst_mirror_vm); + sev_migrate_from(dst_vm->fd, sev_vm->fd); + + kvm_vm_free(mirror_vm); + kvm_vm_free(dst_vm); + kvm_vm_free(sev_vm); } int main(int argc, char *argv[]) { - test_sev_migrate_from(/* es= */ false); - test_sev_migrate_from(/* es= */ true); - test_sev_migrate_locking(); - test_sev_migrate_parameters(); + if (kvm_check_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) { + test_sev_migrate_from(/* es= */ false); + test_sev_migrate_from(/* es= */ true); + test_sev_migrate_locking(); + test_sev_migrate_parameters(); + if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) + test_sev_move_copy(); + } + if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) { + test_sev_mirror(/* es= */ false); + test_sev_mirror(/* es= */ true); + test_sev_mirror_parameters(); + } return 0; } diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index eda0d2a51224..a0699f00b3d6 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -24,8 +24,12 @@ #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) #define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20) +#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40) #define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20) +#define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40) + +#define EVTCHN_VECTOR 0x10 static struct kvm_vm *vm; @@ -56,15 +60,44 @@ struct vcpu_runstate_info { uint64_t time[4]; }; +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ +}; + +struct vcpu_info { + uint8_t evtchn_upcall_pending; + uint8_t evtchn_upcall_mask; + unsigned long evtchn_pending_sel; + struct arch_vcpu_info arch; + struct pvclock_vcpu_time_info time; +}; /* 64 bytes (x86) */ + #define RUNSTATE_running 0 #define RUNSTATE_runnable 1 #define RUNSTATE_blocked 2 #define RUNSTATE_offline 3 +static void evtchn_handler(struct ex_regs *regs) +{ + struct vcpu_info *vi = (void *)VCPU_INFO_VADDR; + vi->evtchn_upcall_pending = 0; + + GUEST_SYNC(0x20); +} + static void guest_code(void) { struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; + __asm__ __volatile__( + "sti\n" + "nop\n" + ); + + /* Trigger an interrupt injection */ + GUEST_SYNC(0); + /* Test having the host set runstates manually */ GUEST_SYNC(RUNSTATE_runnable); GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0); @@ -153,7 +186,7 @@ int main(int argc, char *argv[]) struct kvm_xen_vcpu_attr vi = { .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, - .u.gpa = SHINFO_REGION_GPA + 0x40, + .u.gpa = VCPU_INFO_ADDR, }; vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &vi); @@ -163,6 +196,16 @@ int main(int argc, char *argv[]) }; vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock); + struct kvm_xen_hvm_attr vec = { + .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR, + .u.vector = EVTCHN_VECTOR, + }; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler); + if (do_runstate_tests) { struct kvm_xen_vcpu_attr st = { .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, @@ -171,9 +214,14 @@ int main(int argc, char *argv[]) vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st); } + struct vcpu_info *vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR); + vinfo->evtchn_upcall_pending = 0; + struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR); rs->state = 0x5a; + bool evtchn_irq_expected = false; + for (;;) { volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); struct ucall uc; @@ -193,16 +241,21 @@ int main(int argc, char *argv[]) struct kvm_xen_vcpu_attr rst; long rundelay; - /* If no runstate support, bail out early */ - if (!do_runstate_tests) - goto done; - - TEST_ASSERT(rs->state_entry_time == rs->time[0] + - rs->time[1] + rs->time[2] + rs->time[3], - "runstate times don't add up"); + if (do_runstate_tests) + TEST_ASSERT(rs->state_entry_time == rs->time[0] + + rs->time[1] + rs->time[2] + rs->time[3], + "runstate times don't add up"); switch (uc.args[1]) { - case RUNSTATE_running...RUNSTATE_offline: + case 0: + evtchn_irq_expected = true; + vinfo->evtchn_upcall_pending = 1; + break; + + case RUNSTATE_runnable...RUNSTATE_offline: + TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen"); + if (!do_runstate_tests) + goto done; rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; rst.u.runstate.state = uc.args[1]; vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); @@ -236,6 +289,10 @@ int main(int argc, char *argv[]) sched_yield(); } while (get_run_delay() < rundelay); break; + case 0x20: + TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ"); + evtchn_irq_expected = false; + break; } break; } diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 7615f29831eb..9897fa9ab953 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -34,6 +34,7 @@ TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh TEST_PROGS += vrf_strict_mode_test.sh +TEST_PROGS += arp_ndisc_evict_nocarrier.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 3313566ce906..7f5b265fcb90 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -4002,8 +4002,8 @@ EOF ################################################################################ # main -TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_addr_bind ipv4_runtime ipv4_netfilter" -TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_addr_bind ipv6_runtime ipv6_netfilter" +TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_bind ipv4_runtime ipv4_netfilter" +TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_bind ipv6_runtime ipv6_netfilter" TESTS_OTHER="use_cases" PAUSE_ON_FAIL=no diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index b5a69ad191b0..d444ee6aa3cb 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -629,6 +629,66 @@ ipv6_fcnal() log_test $? 0 "Nexthops removed on admin down" } +ipv6_grp_refs() +{ + if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test; need mausezahn tool" + return + fi + + run_cmd "$IP link set dev veth1 up" + run_cmd "$IP link add veth1.10 link veth1 up type vlan id 10" + run_cmd "$IP link add veth1.20 link veth1 up type vlan id 20" + run_cmd "$IP -6 addr add 2001:db8:91::1/64 dev veth1.10" + run_cmd "$IP -6 addr add 2001:db8:92::1/64 dev veth1.20" + run_cmd "$IP -6 neigh add 2001:db8:91::2 lladdr 00:11:22:33:44:55 dev veth1.10" + run_cmd "$IP -6 neigh add 2001:db8:92::2 lladdr 00:11:22:33:44:55 dev veth1.20" + run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1.10" + run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth1.20" + run_cmd "$IP nexthop add id 102 group 100" + run_cmd "$IP route add 2001:db8:101::1/128 nhid 102" + + # create per-cpu dsts through nh 100 + run_cmd "ip netns exec me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1" + + # remove nh 100 from the group to delete the route potentially leaving + # a stale per-cpu dst which holds a reference to the nexthop's net + # device and to the IPv6 route + run_cmd "$IP nexthop replace id 102 group 101" + run_cmd "$IP route del 2001:db8:101::1/128" + + # add both nexthops to the group so a reference is taken on them + run_cmd "$IP nexthop replace id 102 group 100/101" + + # if the bug described in commit "net: nexthop: release IPv6 per-cpu + # dsts when replacing a nexthop group" exists at this point we have + # an unlinked IPv6 route (but not freed due to stale dst) with a + # reference over the group so we delete the group which will again + # only unlink it due to the route reference + run_cmd "$IP nexthop del id 102" + + # delete the nexthop with stale dst, since we have an unlinked + # group with a ref to it and an unlinked IPv6 route with ref to the + # group, the nh will only be unlinked and not freed so the stale dst + # remains forever and we get a net device refcount imbalance + run_cmd "$IP nexthop del id 100" + + # if a reference was lost this command will hang because the net device + # cannot be removed + timeout -s KILL 5 ip netns exec me ip link del veth1.10 >/dev/null 2>&1 + + # we can't cleanup if the command is hung trying to delete the netdev + if [ $? -eq 137 ]; then + return 1 + fi + + # cleanup + run_cmd "$IP link del veth1.20" + run_cmd "$IP nexthop flush" + + return 0 +} + ipv6_grp_fcnal() { local rc @@ -734,6 +794,9 @@ ipv6_grp_fcnal() run_cmd "$IP nexthop add id 108 group 31/24" log_test $? 2 "Nexthop group can not have a blackhole and another nexthop" + + ipv6_grp_refs + log_test $? 0 "Nexthop group replace refcounts" } ipv6_res_grp_fcnal() diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index a4bd1b087303..697994a9278b 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -6,6 +6,7 @@ CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_VRF=m CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_MPLS=m CONFIG_NET_ACT_VLAN=m diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index d9eca227136b..de19eb6c38f0 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -3,7 +3,7 @@ ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \ mirred_egress_mirror_test matchall_mirred_egress_mirror_test \ - gact_trap_test" + gact_trap_test mirred_egress_to_ingress_test" NUM_NETIFS=4 source tc_common.sh source lib.sh @@ -13,10 +13,12 @@ tcflags="skip_hw" h1_create() { simple_if_init $h1 192.0.2.1/24 + tc qdisc add dev $h1 clsact } h1_destroy() { + tc qdisc del dev $h1 clsact simple_if_fini $h1 192.0.2.1/24 } @@ -153,6 +155,49 @@ gact_trap_test() log_test "trap ($tcflags)" } +mirred_egress_to_ingress_test() +{ + RET=0 + + tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \ + ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 8 action \ + ct commit nat src addr 192.0.2.2 pipe \ + ct clear pipe \ + ct commit nat dst addr 192.0.2.1 pipe \ + mirred ingress redirect dev $h1 + + tc filter add dev $swp1 protocol ip pref 11 handle 111 ingress flower \ + ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 8 action drop + tc filter add dev $swp1 protocol ip pref 12 handle 112 ingress flower \ + ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 0 action pass + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t icmp "ping,id=42,seq=10" -q + + tc_check_packets "dev $h1 egress" 100 1 + check_err $? "didn't mirror first packet" + + tc_check_packets "dev $swp1 ingress" 111 1 + check_fail $? "didn't redirect first packet" + tc_check_packets "dev $swp1 ingress" 112 1 + check_err $? "didn't receive reply to first packet" + + ping 192.0.2.2 -I$h1 -c1 -w1 -q 1>/dev/null 2>&1 + + tc_check_packets "dev $h1 egress" 100 2 + check_err $? "didn't mirror second packet" + tc_check_packets "dev $swp1 ingress" 111 1 + check_fail $? "didn't redirect second packet" + tc_check_packets "dev $swp1 ingress" 112 2 + check_err $? "didn't receive reply to second packet" + + tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower + tc filter del dev $swp1 ingress protocol ip pref 11 handle 111 flower + tc filter del dev $swp1 ingress protocol ip pref 12 handle 112 flower + + log_test "mirred_egress_to_ingress ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/gre_gso.sh b/tools/testing/selftests/net/gre_gso.sh index fdeb44d621eb..3224651db97b 100755 --- a/tools/testing/selftests/net/gre_gso.sh +++ b/tools/testing/selftests/net/gre_gso.sh @@ -118,16 +118,18 @@ gre_gst_test_checks() local addr=$2 local proto=$3 - $NS_EXEC nc $proto -kl $port >/dev/null & + [ "$proto" == 6 ] && addr="[$addr]" + + $NS_EXEC socat - tcp${proto}-listen:$port,reuseaddr,fork >/dev/null & PID=$! while ! $NS_EXEC ss -ltn | grep -q $port; do ((i++)); sleep 0.01; done - cat $TMPFILE | timeout 1 nc $proto -N $addr $port + cat $TMPFILE | timeout 1 socat -u STDIN TCP:$addr:$port log_test $? 0 "$name - copy file w/ TSO" ethtool -K veth0 tso off - cat $TMPFILE | timeout 1 nc $proto -N $addr $port + cat $TMPFILE | timeout 1 socat -u STDIN TCP:$addr:$port log_test $? 0 "$name - copy file w/ GSO" ethtool -K veth0 tso on @@ -155,8 +157,8 @@ gre6_gso_test() sleep 2 - gre_gst_test_checks GREv6/v4 172.16.2.2 - gre_gst_test_checks GREv6/v6 2001:db8:1::2 -6 + gre_gst_test_checks GREv6/v4 172.16.2.2 4 + gre_gst_test_checks GREv6/v6 2001:db8:1::2 6 cleanup } @@ -212,8 +214,8 @@ if [ ! -x "$(command -v ip)" ]; then exit $ksft_skip fi -if [ ! -x "$(command -v nc)" ]; then - echo "SKIP: Could not run test without nc tool" +if [ ! -x "$(command -v socat)" ]; then + echo "SKIP: Could not run test without socat tool" exit $ksft_skip fi diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index e61fc4c32ba2..8a22db0cca49 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -78,26 +78,21 @@ static void memrnd(void *s, size_t n) *byte++ = rand(); } -FIXTURE(tls_basic) -{ - int fd, cfd; - bool notls; -}; - -FIXTURE_SETUP(tls_basic) +static void ulp_sock_pair(struct __test_metadata *_metadata, + int *fd, int *cfd, bool *notls) { struct sockaddr_in addr; socklen_t len; int sfd, ret; - self->notls = false; + *notls = false; len = sizeof(addr); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(INADDR_ANY); addr.sin_port = 0; - self->fd = socket(AF_INET, SOCK_STREAM, 0); + *fd = socket(AF_INET, SOCK_STREAM, 0); sfd = socket(AF_INET, SOCK_STREAM, 0); ret = bind(sfd, &addr, sizeof(addr)); @@ -108,26 +103,96 @@ FIXTURE_SETUP(tls_basic) ret = getsockname(sfd, &addr, &len); ASSERT_EQ(ret, 0); - ret = connect(self->fd, &addr, sizeof(addr)); + ret = connect(*fd, &addr, sizeof(addr)); ASSERT_EQ(ret, 0); - self->cfd = accept(sfd, &addr, &len); - ASSERT_GE(self->cfd, 0); + *cfd = accept(sfd, &addr, &len); + ASSERT_GE(*cfd, 0); close(sfd); - ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + ret = setsockopt(*fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); if (ret != 0) { ASSERT_EQ(errno, ENOENT); - self->notls = true; + *notls = true; printf("Failure setting TCP_ULP, testing without tls\n"); return; } - ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + ret = setsockopt(*cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); ASSERT_EQ(ret, 0); } +/* Produce a basic cmsg */ +static int tls_send_cmsg(int fd, unsigned char record_type, + void *data, size_t len, int flags) +{ + char cbuf[CMSG_SPACE(sizeof(char))]; + int cmsg_len = sizeof(char); + struct cmsghdr *cmsg; + struct msghdr msg; + struct iovec vec; + + vec.iov_base = data; + vec.iov_len = len; + memset(&msg, 0, sizeof(struct msghdr)); + msg.msg_iov = &vec; + msg.msg_iovlen = 1; + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_TLS; + /* test sending non-record types. */ + cmsg->cmsg_type = TLS_SET_RECORD_TYPE; + cmsg->cmsg_len = CMSG_LEN(cmsg_len); + *CMSG_DATA(cmsg) = record_type; + msg.msg_controllen = cmsg->cmsg_len; + + return sendmsg(fd, &msg, flags); +} + +static int tls_recv_cmsg(struct __test_metadata *_metadata, + int fd, unsigned char record_type, + void *data, size_t len, int flags) +{ + char cbuf[CMSG_SPACE(sizeof(char))]; + struct cmsghdr *cmsg; + unsigned char ctype; + struct msghdr msg; + struct iovec vec; + int n; + + vec.iov_base = data; + vec.iov_len = len; + memset(&msg, 0, sizeof(struct msghdr)); + msg.msg_iov = &vec; + msg.msg_iovlen = 1; + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + + n = recvmsg(fd, &msg, flags); + + cmsg = CMSG_FIRSTHDR(&msg); + EXPECT_NE(cmsg, NULL); + EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); + EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); + ctype = *((unsigned char *)CMSG_DATA(cmsg)); + EXPECT_EQ(ctype, record_type); + + return n; +} + +FIXTURE(tls_basic) +{ + int fd, cfd; + bool notls; +}; + +FIXTURE_SETUP(tls_basic) +{ + ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); +} + FIXTURE_TEARDOWN(tls_basic) { close(self->fd); @@ -199,60 +264,21 @@ FIXTURE_VARIANT_ADD(tls, 13_sm4_ccm) FIXTURE_SETUP(tls) { struct tls_crypto_info_keys tls12; - struct sockaddr_in addr; - socklen_t len; - int sfd, ret; - - self->notls = false; - len = sizeof(addr); + int ret; tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(INADDR_ANY); - addr.sin_port = 0; + ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); - self->fd = socket(AF_INET, SOCK_STREAM, 0); - sfd = socket(AF_INET, SOCK_STREAM, 0); - - ret = bind(sfd, &addr, sizeof(addr)); - ASSERT_EQ(ret, 0); - ret = listen(sfd, 10); - ASSERT_EQ(ret, 0); + if (self->notls) + return; - ret = getsockname(sfd, &addr, &len); + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len); ASSERT_EQ(ret, 0); - ret = connect(self->fd, &addr, sizeof(addr)); + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len); ASSERT_EQ(ret, 0); - - ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); - if (ret != 0) { - self->notls = true; - printf("Failure setting TCP_ULP, testing without tls\n"); - } - - if (!self->notls) { - ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, - tls12.len); - ASSERT_EQ(ret, 0); - } - - self->cfd = accept(sfd, &addr, &len); - ASSERT_GE(self->cfd, 0); - - if (!self->notls) { - ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls", - sizeof("tls")); - ASSERT_EQ(ret, 0); - - ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, - tls12.len); - ASSERT_EQ(ret, 0); - } - - close(sfd); } FIXTURE_TEARDOWN(tls) @@ -613,6 +639,95 @@ TEST_F(tls, splice_to_pipe) EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } +TEST_F(tls, splice_cmsg_to_pipe) +{ + char *test_str = "test_read"; + char record_type = 100; + int send_len = 10; + char buf[10]; + int p[2]; + + ASSERT_GE(pipe(p), 0); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10); + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1); + EXPECT_EQ(errno, EINVAL); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(errno, EIO); + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL), + send_len); + EXPECT_EQ(memcmp(test_str, buf, send_len), 0); +} + +TEST_F(tls, splice_dec_cmsg_to_pipe) +{ + char *test_str = "test_read"; + char record_type = 100; + int send_len = 10; + char buf[10]; + int p[2]; + + ASSERT_GE(pipe(p), 0); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(errno, EIO); + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1); + EXPECT_EQ(errno, EINVAL); + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL), + send_len); + EXPECT_EQ(memcmp(test_str, buf, send_len), 0); +} + +TEST_F(tls, recv_and_splice) +{ + int send_len = TLS_PAYLOAD_MAX_LEN; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + int half = send_len / 2; + int p[2]; + + ASSERT_GE(pipe(p), 0); + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + /* Recv hald of the record, splice the other half */ + EXPECT_EQ(recv(self->cfd, mem_recv, half, MSG_WAITALL), half); + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, half, SPLICE_F_NONBLOCK), + half); + EXPECT_EQ(read(p[0], &mem_recv[half], half), half); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); +} + +TEST_F(tls, peek_and_splice) +{ + int send_len = TLS_PAYLOAD_MAX_LEN; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + int chunk = TLS_PAYLOAD_MAX_LEN / 4; + int n, i, p[2]; + + memrnd(mem_send, sizeof(mem_send)); + + ASSERT_GE(pipe(p), 0); + for (i = 0; i < 4; i++) + EXPECT_EQ(send(self->fd, &mem_send[chunk * i], chunk, 0), + chunk); + + EXPECT_EQ(recv(self->cfd, mem_recv, chunk * 5 / 2, + MSG_WAITALL | MSG_PEEK), + chunk * 5 / 2); + EXPECT_EQ(memcmp(mem_send, mem_recv, chunk * 5 / 2), 0); + + n = 0; + while (n < send_len) { + i = splice(self->cfd, NULL, p[1], NULL, send_len - n, 0); + EXPECT_GT(i, 0); + n += i; + } + EXPECT_EQ(n, send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); +} + TEST_F(tls, recvmsg_single) { char const *test_str = "test_recvmsg_single"; @@ -1193,60 +1308,30 @@ TEST_F(tls, mutliproc_sendpage_writers) TEST_F(tls, control_msg) { - if (self->notls) - return; - - char cbuf[CMSG_SPACE(sizeof(char))]; - char const *test_str = "test_read"; - int cmsg_len = sizeof(char); + char *test_str = "test_read"; char record_type = 100; - struct cmsghdr *cmsg; - struct msghdr msg; int send_len = 10; - struct iovec vec; char buf[10]; - vec.iov_base = (char *)test_str; - vec.iov_len = 10; - memset(&msg, 0, sizeof(struct msghdr)); - msg.msg_iov = &vec; - msg.msg_iovlen = 1; - msg.msg_control = cbuf; - msg.msg_controllen = sizeof(cbuf); - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = SOL_TLS; - /* test sending non-record types. */ - cmsg->cmsg_type = TLS_SET_RECORD_TYPE; - cmsg->cmsg_len = CMSG_LEN(cmsg_len); - *CMSG_DATA(cmsg) = record_type; - msg.msg_controllen = cmsg->cmsg_len; + if (self->notls) + SKIP(return, "no TLS support"); - EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len); + EXPECT_EQ(tls_send_cmsg(self->fd, record_type, test_str, send_len, 0), + send_len); /* Should fail because we didn't provide a control message */ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); - vec.iov_base = buf; - EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL | MSG_PEEK), send_len); - - cmsg = CMSG_FIRSTHDR(&msg); - EXPECT_NE(cmsg, NULL); - EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); - EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); - record_type = *((unsigned char *)CMSG_DATA(cmsg)); - EXPECT_EQ(record_type, 100); + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL | MSG_PEEK), + send_len); EXPECT_EQ(memcmp(buf, test_str, send_len), 0); /* Recv the message again without MSG_PEEK */ - record_type = 0; memset(buf, 0, sizeof(buf)); - EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL), send_len); - cmsg = CMSG_FIRSTHDR(&msg); - EXPECT_NE(cmsg, NULL); - EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); - EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); - record_type = *((unsigned char *)CMSG_DATA(cmsg)); - EXPECT_EQ(record_type, 100); + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL), + send_len); EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } @@ -1301,6 +1386,160 @@ TEST_F(tls, shutdown_reuse) EXPECT_EQ(errno, EISCONN); } +FIXTURE(tls_err) +{ + int fd, cfd; + int fd2, cfd2; + bool notls; +}; + +FIXTURE_VARIANT(tls_err) +{ + uint16_t tls_version; +}; + +FIXTURE_VARIANT_ADD(tls_err, 12_aes_gcm) +{ + .tls_version = TLS_1_2_VERSION, +}; + +FIXTURE_VARIANT_ADD(tls_err, 13_aes_gcm) +{ + .tls_version = TLS_1_3_VERSION, +}; + +FIXTURE_SETUP(tls_err) +{ + struct tls_crypto_info_keys tls12; + int ret; + + tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128, + &tls12); + + ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); + ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls); + if (self->notls) + return; + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd2, SOL_TLS, TLS_RX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(tls_err) +{ + close(self->fd); + close(self->cfd); + close(self->fd2); + close(self->cfd2); +} + +TEST_F(tls_err, bad_rec) +{ + char buf[64]; + + if (self->notls) + SKIP(return, "no TLS support"); + + memset(buf, 0x55, sizeof(buf)); + EXPECT_EQ(send(self->fd2, buf, sizeof(buf), 0), sizeof(buf)); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EMSGSIZE); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EAGAIN); +} + +TEST_F(tls_err, bad_auth) +{ + char buf[128]; + int n; + + if (self->notls) + SKIP(return, "no TLS support"); + + memrnd(buf, sizeof(buf) / 2); + EXPECT_EQ(send(self->fd, buf, sizeof(buf) / 2, 0), sizeof(buf) / 2); + n = recv(self->cfd, buf, sizeof(buf), 0); + EXPECT_GT(n, sizeof(buf) / 2); + + buf[n - 1]++; + + EXPECT_EQ(send(self->fd2, buf, n, 0), n); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); +} + +TEST_F(tls_err, bad_in_large_read) +{ + char txt[3][64]; + char cip[3][128]; + char buf[3 * 128]; + int i, n; + + if (self->notls) + SKIP(return, "no TLS support"); + + /* Put 3 records in the sockets */ + for (i = 0; i < 3; i++) { + memrnd(txt[i], sizeof(txt[i])); + EXPECT_EQ(send(self->fd, txt[i], sizeof(txt[i]), 0), + sizeof(txt[i])); + n = recv(self->cfd, cip[i], sizeof(cip[i]), 0); + EXPECT_GT(n, sizeof(txt[i])); + /* Break the third message */ + if (i == 2) + cip[2][n - 1]++; + EXPECT_EQ(send(self->fd2, cip[i], n, 0), n); + } + + /* We should be able to receive the first two messages */ + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt[0]) * 2); + EXPECT_EQ(memcmp(buf, txt[0], sizeof(txt[0])), 0); + EXPECT_EQ(memcmp(buf + sizeof(txt[0]), txt[1], sizeof(txt[1])), 0); + /* Third mesasge is bad */ + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); +} + +TEST_F(tls_err, bad_cmsg) +{ + char *test_str = "test_read"; + int send_len = 10; + char cip[128]; + char buf[128]; + char txt[64]; + int n; + + if (self->notls) + SKIP(return, "no TLS support"); + + /* Queue up one data record */ + memrnd(txt, sizeof(txt)); + EXPECT_EQ(send(self->fd, txt, sizeof(txt), 0), sizeof(txt)); + n = recv(self->cfd, cip, sizeof(cip), 0); + EXPECT_GT(n, sizeof(txt)); + EXPECT_EQ(send(self->fd2, cip, n, 0), n); + + EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10); + n = recv(self->cfd, cip, sizeof(cip), 0); + cip[n - 1]++; /* Break it */ + EXPECT_GT(n, send_len); + EXPECT_EQ(send(self->fd2, cip, n, 0), n); + + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt)); + EXPECT_EQ(memcmp(buf, txt, sizeof(txt)), 0); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); +} + TEST(non_established) { struct tls12_crypto_info_aes_gcm_256 tls12; struct sockaddr_in addr; @@ -1355,64 +1594,82 @@ TEST(non_established) { TEST(keysizes) { struct tls12_crypto_info_aes_gcm_256 tls12; - struct sockaddr_in addr; - int sfd, ret, fd, cfd; - socklen_t len; + int ret, fd, cfd; bool notls; - notls = false; - len = sizeof(addr); - memset(&tls12, 0, sizeof(tls12)); tls12.info.version = TLS_1_2_VERSION; tls12.info.cipher_type = TLS_CIPHER_AES_GCM_256; - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(INADDR_ANY); - addr.sin_port = 0; + ulp_sock_pair(_metadata, &fd, &cfd, ¬ls); - fd = socket(AF_INET, SOCK_STREAM, 0); - sfd = socket(AF_INET, SOCK_STREAM, 0); + if (!notls) { + ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, + sizeof(tls12)); + EXPECT_EQ(ret, 0); + + ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, + sizeof(tls12)); + EXPECT_EQ(ret, 0); + } + + close(fd); + close(cfd); +} + +TEST(tls_v6ops) { + struct tls_crypto_info_keys tls12; + struct sockaddr_in6 addr, addr2; + int sfd, ret, fd; + socklen_t len, len2; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12); + + addr.sin6_family = AF_INET6; + addr.sin6_addr = in6addr_any; + addr.sin6_port = 0; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + sfd = socket(AF_INET6, SOCK_STREAM, 0); ret = bind(sfd, &addr, sizeof(addr)); ASSERT_EQ(ret, 0); ret = listen(sfd, 10); ASSERT_EQ(ret, 0); + len = sizeof(addr); ret = getsockname(sfd, &addr, &len); ASSERT_EQ(ret, 0); ret = connect(fd, &addr, sizeof(addr)); ASSERT_EQ(ret, 0); + len = sizeof(addr); + ret = getsockname(fd, &addr, &len); + ASSERT_EQ(ret, 0); + ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); - if (ret != 0) { - notls = true; - printf("Failure setting TCP_ULP, testing without tls\n"); + if (ret) { + ASSERT_EQ(errno, ENOENT); + SKIP(return, "no TLS support"); } + ASSERT_EQ(ret, 0); - if (!notls) { - ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, - sizeof(tls12)); - EXPECT_EQ(ret, 0); - } + ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); - cfd = accept(sfd, &addr, &len); - ASSERT_GE(cfd, 0); + ret = setsockopt(fd, SOL_TLS, TLS_RX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); - if (!notls) { - ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", - sizeof("tls")); - EXPECT_EQ(ret, 0); + len2 = sizeof(addr2); + ret = getsockname(fd, &addr2, &len2); + ASSERT_EQ(ret, 0); - ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, - sizeof(tls12)); - EXPECT_EQ(ret, 0); - } + EXPECT_EQ(len2, len); + EXPECT_EQ(memcmp(&addr, &addr2, len), 0); - close(sfd); close(fd); - close(cfd); + close(sfd); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 8748199ac109..ffca314897c4 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -5,7 +5,8 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ - ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh + ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ + conntrack_vrf.sh LDLIBS = -lmnl TEST_GEN_FILES = nf-queue diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/netfilter/conntrack_vrf.sh new file mode 100755 index 000000000000..91f3ef0f1192 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh @@ -0,0 +1,219 @@ +#!/bin/sh + +# This script demonstrates interaction of conntrack and vrf. +# The vrf driver calls the netfilter hooks again, with oif/iif +# pointing at the VRF device. +# +# For ingress, this means first iteration has iifname of lower/real +# device. In this script, thats veth0. +# Second iteration is iifname set to vrf device, tvrf in this script. +# +# For egress, this is reversed: first iteration has the vrf device, +# second iteration is done with the lower/real/veth0 device. +# +# test_ct_zone_in demonstrates unexpected change of nftables +# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack +# connection on VRF rcv" +# +# It was possible to assign conntrack zone to a packet (or mark it for +# `notracking`) in the prerouting chain before conntrack, based on real iif. +# +# After the change, the zone assignment is lost and the zone is assigned based +# on the VRF master interface (in case such a rule exists). +# assignment is lost. Instead, assignment based on the `iif` matching +# Thus it is impossible to distinguish packets based on the original +# interface. +# +# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem +# that was supposed to be fixed by the commit mentioned above to make sure +# that any fix to test case 1 won't break masquerade again. + +ksft_skip=4 + +IP0=172.30.30.1 +IP1=172.30.30.2 +PFXL=30 +ret=0 + +sfx=$(mktemp -u "XXXXXXXX") +ns0="ns0-$sfx" +ns1="ns1-$sfx" + +cleanup() +{ + ip netns pids $ns0 | xargs kill 2>/dev/null + ip netns pids $ns1 | xargs kill 2>/dev/null + + ip netns del $ns0 $ns1 +} + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add "$ns0" +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace $ns0" + exit $ksft_skip +fi +ip netns add "$ns1" + +trap cleanup EXIT + +ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0 +ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 +ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 + +ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not add veth device" + exit $ksft_skip +fi + +ip -net $ns0 li add tvrf type vrf table 9876 +if [ $? -ne 0 ];then + echo "SKIP: Could not add vrf device" + exit $ksft_skip +fi + +ip -net $ns0 li set lo up + +ip -net $ns0 li set veth0 master tvrf +ip -net $ns0 li set tvrf up +ip -net $ns0 li set veth0 up +ip -net $ns1 li set veth0 up + +ip -net $ns0 addr add $IP0/$PFXL dev veth0 +ip -net $ns1 addr add $IP1/$PFXL dev veth0 + +ip netns exec $ns1 iperf3 -s > /dev/null 2>&1& +if [ $? -ne 0 ];then + echo "SKIP: Could not start iperf3" + exit $ksft_skip +fi + +# test vrf ingress handling. +# The incoming connection should be placed in conntrack zone 1, +# as decided by the first iteration of the ruleset. +test_ct_zone_in() +{ +ip netns exec $ns0 nft -f - <<EOF +table testct { + chain rawpre { + type filter hook prerouting priority raw; + + iif { veth0, tvrf } counter meta nftrace set 1 + iif veth0 counter ct zone set 1 counter return + iif tvrf counter ct zone set 2 counter return + ip protocol icmp counter + notrack counter + } + + chain rawout { + type filter hook output priority raw; + + oif veth0 counter ct zone set 1 counter return + oif tvrf counter ct zone set 2 counter return + notrack counter + } +} +EOF + ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null + + # should be in zone 1, not zone 2 + count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l) + if [ $count -eq 1 ]; then + echo "PASS: entry found in conntrack zone 1" + else + echo "FAIL: entry not found in conntrack zone 1" + count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l) + if [ $count -eq 1 ]; then + echo "FAIL: entry found in zone 2 instead" + else + echo "FAIL: entry not in zone 1 or 2, dumping table" + ip netns exec $ns0 conntrack -L + ip netns exec $ns0 nft list ruleset + fi + fi +} + +# add masq rule that gets evaluated w. outif set to vrf device. +# This tests the first iteration of the packet through conntrack, +# oifname is the vrf device. +test_masquerade_vrf() +{ + ip netns exec $ns0 conntrack -F 2>/dev/null + +ip netns exec $ns0 nft -f - <<EOF +flush ruleset +table ip nat { + chain postrouting { + type nat hook postrouting priority 0; + # NB: masquerade should always be combined with 'oif(name) bla', + # lack of this is intentional here, we want to exercise double-snat. + ip saddr 172.30.30.0/30 counter masquerade random + } +} +EOF + ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null + if [ $? -ne 0 ]; then + echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device" + ret=1 + return + fi + + # must also check that nat table was evaluated on second (lower device) iteration. + ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' + if [ $? -eq 0 ]; then + echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device" + else + echo "FAIL: vrf masq rule has unexpected counter value" + ret=1 + fi +} + +# add masq rule that gets evaluated w. outif set to veth device. +# This tests the 2nd iteration of the packet through conntrack, +# oifname is the lower device (veth0 in this case). +test_masquerade_veth() +{ + ip netns exec $ns0 conntrack -F 2>/dev/null +ip netns exec $ns0 nft -f - <<EOF +flush ruleset +table ip nat { + chain postrouting { + type nat hook postrouting priority 0; + meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random + } +} +EOF + ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null + if [ $? -ne 0 ]; then + echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device" + ret=1 + return + fi + + # must also check that nat table was evaluated on second (lower device) iteration. + ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' + if [ $? -eq 0 ]; then + echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device" + else + echo "FAIL: vrf masq rule has unexpected counter value" + ret=1 + fi +} + +test_ct_zone_in +test_masquerade_vrf +test_masquerade_veth + +exit $ret diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index da1c1e4b6c86..d88867d2fed7 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -759,19 +759,21 @@ test_port_shadow() local result="" local logmsg="" - echo ROUTER | ip netns exec "$ns0" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 & - nc_r=$! + # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405. + echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405 - echo CLIENT | ip netns exec "$ns2" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 & - nc_c=$! + echo ROUTER | ip netns exec "$ns0" timeout 5 socat -u STDIN UDP4-LISTEN:1405 & + sc_r=$! - # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405. - echo "fake-entry" | ip netns exec "$ns2" nc -w 1 -p 1405 -u "$daddrc" 41404 > /dev/null + echo CLIENT | ip netns exec "$ns2" timeout 5 socat -u STDIN UDP4-LISTEN:1405,reuseport & + sc_c=$! + + sleep 0.3 # ns1 tries to connect to ns0:1405. With default settings this should connect # to client, it matches the conntrack entry created above. - result=$(echo "" | ip netns exec "$ns1" nc -w 1 -p 41404 -u "$daddrs" 1405) + result=$(echo "data" | ip netns exec "$ns1" timeout 1 socat - UDP:"$daddrs":1405,sourceport=41404) if [ "$result" = "$expect" ] ;then echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}" @@ -780,7 +782,7 @@ test_port_shadow() ret=1 fi - kill $nc_r $nc_c 2>/dev/null + kill $sc_r $sc_c 2>/dev/null # flush udp entries for next test round, if any ip netns exec "$ns0" conntrack -F >/dev/null 2>&1 @@ -816,11 +818,10 @@ table $family raw { chain prerouting { type filter hook prerouting priority -300; policy accept; meta iif veth0 udp dport 1405 notrack - udp dport 1405 notrack } chain output { type filter hook output priority -300; policy accept; - udp sport 1405 notrack + meta oif veth0 udp sport 1405 notrack } } EOF @@ -851,6 +852,18 @@ test_port_shadowing() { local family="ip" + conntrack -h >/dev/null 2>&1 + if [ $? -ne 0 ];then + echo "SKIP: Could not run nat port shadowing test without conntrack tool" + return + fi + + socat -h > /dev/null 2>&1 + if [ $? -ne 0 ];then + echo "SKIP: Could not run nat port shadowing test without socat tool" + return + fi + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh index 3d202b90b33d..7d27f1f3bc01 100755 --- a/tools/testing/selftests/netfilter/nft_queue.sh +++ b/tools/testing/selftests/netfilter/nft_queue.sh @@ -16,6 +16,10 @@ timeout=4 cleanup() { + ip netns pids ${ns1} | xargs kill 2>/dev/null + ip netns pids ${ns2} | xargs kill 2>/dev/null + ip netns pids ${nsrouter} | xargs kill 2>/dev/null + ip netns del ${ns1} ip netns del ${ns2} ip netns del ${nsrouter} @@ -332,6 +336,55 @@ EOF echo "PASS: tcp via loopback and re-queueing" } +test_icmp_vrf() { + ip -net $ns1 link add tvrf type vrf table 9876 + if [ $? -ne 0 ];then + echo "SKIP: Could not add vrf device" + return + fi + + ip -net $ns1 li set eth0 master tvrf + ip -net $ns1 li set tvrf up + + ip -net $ns1 route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876 +ip netns exec ${ns1} nft -f /dev/stdin <<EOF +flush ruleset +table inet filter { + chain output { + type filter hook output priority 0; policy accept; + meta oifname "tvrf" icmp type echo-request counter queue num 1 + meta oifname "eth0" icmp type echo-request counter queue num 1 + } + chain post { + type filter hook postrouting priority 0; policy accept; + meta oifname "tvrf" icmp type echo-request counter queue num 1 + meta oifname "eth0" icmp type echo-request counter queue num 1 + } +} +EOF + ip netns exec ${ns1} ./nf-queue -q 1 -t $timeout & + local nfqpid=$! + + sleep 1 + ip netns exec ${ns1} ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null + + for n in output post; do + for d in tvrf eth0; do + ip netns exec ${ns1} nft list chain inet filter $n | grep -q "oifname \"$d\" icmp type echo-request counter packets 1" + if [ $? -ne 0 ] ; then + echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2 + ip netns exec ${ns1} nft list ruleset + ret=1 + return + fi + done + done + + wait $nfqpid + [ $? -eq 0 ] && echo "PASS: icmp+nfqueue via vrf" + wait 2>/dev/null +} + ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null @@ -372,5 +425,6 @@ test_queue 20 test_tcp_forward test_tcp_localhost test_tcp_localhost_requeue +test_icmp_vrf exit $ret diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json index 503982b8f295..91832400ddbd 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json @@ -68,7 +68,7 @@ "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667", "expExitCode": "0", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9]* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref", + "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref", "matchCount": "1", "teardown": [ "$TC action flush action bpf" diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json index 88a20c781e49..c6046096d9db 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json @@ -15,7 +15,7 @@ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", "expExitCode": "0", "verifyCmd": "$TC qdisc show dev $ETH", - "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]", "matchCount": "4", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" @@ -37,7 +37,7 @@ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", "expExitCode": "0", "verifyCmd": "$TC qdisc show dev $ETH", - "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-9,a-f][0-9,a-f]{0,2} bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-9,a-f][0-9,a-f]{0,2}", "matchCount": "256", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" @@ -60,7 +60,7 @@ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", "expExitCode": "2", "verifyCmd": "$TC qdisc show dev $ETH", - "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]", "matchCount": "4", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" @@ -82,7 +82,7 @@ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq", "expExitCode": "2", "verifyCmd": "$TC qdisc show dev $ETH", - "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]", "matchCount": "0", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" @@ -106,7 +106,7 @@ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq", "expExitCode": "2", "verifyCmd": "$TC qdisc show dev $ETH", - "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]", "matchCount": "0", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" @@ -128,7 +128,7 @@ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", "expExitCode": "2", "verifyCmd": "$TC qdisc show dev $ETH", - "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]", "matchCount": "0", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index ebc4ee0fe179..8a9461aa0878 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -276,7 +276,11 @@ n0 ping -W 1 -c 1 192.168.241.2 n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7 ip2 link del wg0 ip2 link del wg1 -! n0 ping -W 1 -c 10 -f 192.168.241.2 || false # Should not crash kernel +read _ _ tx_bytes_before < <(n0 wg show wg1 transfer) +! n0 ping -W 1 -c 10 -f 192.168.241.2 || false +sleep 1 +read _ _ tx_bytes_after < <(n0 wg show wg1 transfer) +(( tx_bytes_after - tx_bytes_before < 70000 )) ip0 link del wg1 ip1 link del wg0 @@ -609,6 +613,28 @@ ip0 link set wg0 up kill $ncat_pid ip0 link del wg0 +# Ensure that dst_cache references don't outlive netns lifetime +ip1 link add dev wg0 type wireguard +ip2 link add dev wg0 type wireguard +configure_peers +ip1 link add veth1 type veth peer name veth2 +ip1 link set veth2 netns $netns2 +ip1 addr add fd00:aa::1/64 dev veth1 +ip2 addr add fd00:aa::2/64 dev veth2 +ip1 link set veth1 up +ip2 link set veth2 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +ip1 -6 route add default dev veth1 via fd00:aa::2 +ip2 -6 route add default dev veth2 via fd00:aa::1 +n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2 +n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1 +n1 ping6 -c 1 fd00::2 +pp ip netns delete $netns1 +pp ip netns delete $netns2 +pp ip netns add $netns1 +pp ip netns add $netns2 + # Ensure there aren't circular reference loops ip1 link add wg1 type wireguard ip2 link add wg2 type wireguard @@ -627,7 +653,7 @@ while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do done < /dev/kmsg alldeleted=1 for object in "${!objects[@]}"; do - if [[ ${objects["$object"]} != *createddestroyed ]]; then + if [[ ${objects["$object"]} != *createddestroyed && ${objects["$object"]} != *createdcreateddestroyeddestroyed ]]; then echo "Error: $object: merely ${objects["$object"]}" >&3 alldeleted=0 fi diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index fe07d97df9fa..2b321b8a96cf 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -47,7 +47,7 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_TRACE_IRQFLAGS=y CONFIG_DEBUG_BUGVERBOSE=y CONFIG_DEBUG_LIST=y -CONFIG_DEBUG_PI_LIST=y +CONFIG_DEBUG_PLIST=y CONFIG_PROVE_RCU=y CONFIG_SPARSE_RCU_POINTER=y CONFIG_RCU_CPU_STALL_TIMEOUT=21 diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index 74db83a0aedd..a9b5a520a1d2 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -66,6 +66,7 @@ CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15 +CONFIG_LOG_BUF_SHIFT=18 CONFIG_PRINTK_TIME=y CONFIG_BLK_DEV_INITRD=y CONFIG_LEGACY_VSYSCALL_NONE=y |