diff options
Diffstat (limited to 'tools')
139 files changed, 9873 insertions, 358 deletions
diff --git a/tools/accounting/.gitignore b/tools/accounting/.gitignore new file mode 100644 index 000000000000..86485203c4ae --- /dev/null +++ b/tools/accounting/.gitignore @@ -0,0 +1 @@ +getdelays diff --git a/tools/accounting/Makefile b/tools/accounting/Makefile new file mode 100644 index 000000000000..647c94a219bf --- /dev/null +++ b/tools/accounting/Makefile @@ -0,0 +1,9 @@ +CC := $(CROSS_COMPILE)gcc +CFLAGS := -I../../usr/include + +PROGS := getdelays + +all: $(PROGS) + +clean: + rm -fr $(PROGS) diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c new file mode 100644 index 000000000000..b5ca536e56a8 --- /dev/null +++ b/tools/accounting/getdelays.c @@ -0,0 +1,550 @@ +/* getdelays.c + * + * Utility to get per-pid and per-tgid delay accounting statistics + * Also illustrates usage of the taskstats interface + * + * Copyright (C) Shailabh Nagar, IBM Corp. 2005 + * Copyright (C) Balbir Singh, IBM Corp. 2006 + * Copyright (c) Jay Lan, SGI. 2006 + * + * Compile with + * gcc -I/usr/src/linux/include getdelays.c -o getdelays + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <unistd.h> +#include <poll.h> +#include <string.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include <signal.h> + +#include <linux/genetlink.h> +#include <linux/taskstats.h> +#include <linux/cgroupstats.h> + +/* + * Generic macros for dealing with netlink sockets. Might be duplicated + * elsewhere. It is recommended that commercial grade applications use + * libnl or libnetlink and use the interfaces provided by the library + */ +#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN)) +#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN) +#define NLA_DATA(na) ((void *)((char*)(na) + NLA_HDRLEN)) +#define NLA_PAYLOAD(len) (len - NLA_HDRLEN) + +#define err(code, fmt, arg...) \ + do { \ + fprintf(stderr, fmt, ##arg); \ + exit(code); \ + } while (0) + +int done; +int rcvbufsz; +char name[100]; +int dbg; +int print_delays; +int print_io_accounting; +int print_task_context_switch_counts; + +#define PRINTF(fmt, arg...) { \ + if (dbg) { \ + printf(fmt, ##arg); \ + } \ + } + +/* Maximum size of response requested or message sent */ +#define MAX_MSG_SIZE 1024 +/* Maximum number of cpus expected to be specified in a cpumask */ +#define MAX_CPUS 32 + +struct msgtemplate { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[MAX_MSG_SIZE]; +}; + +char cpumask[100+6*MAX_CPUS]; + +static void usage(void) +{ + fprintf(stderr, "getdelays [-dilv] [-w logfile] [-r bufsize] " + "[-m cpumask] [-t tgid] [-p pid]\n"); + fprintf(stderr, " -d: print delayacct stats\n"); + fprintf(stderr, " -i: print IO accounting (works only with -p)\n"); + fprintf(stderr, " -l: listen forever\n"); + fprintf(stderr, " -v: debug on\n"); + fprintf(stderr, " -C: container path\n"); +} + +/* + * Create a raw netlink socket and bind + */ +static int create_nl_socket(int protocol) +{ + int fd; + struct sockaddr_nl local; + + fd = socket(AF_NETLINK, SOCK_RAW, protocol); + if (fd < 0) + return -1; + + if (rcvbufsz) + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, + &rcvbufsz, sizeof(rcvbufsz)) < 0) { + fprintf(stderr, "Unable to set socket rcv buf size to %d\n", + rcvbufsz); + goto error; + } + + memset(&local, 0, sizeof(local)); + local.nl_family = AF_NETLINK; + + if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) + goto error; + + return fd; +error: + close(fd); + return -1; +} + + +static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, + __u8 genl_cmd, __u16 nla_type, + void *nla_data, int nla_len) +{ + struct nlattr *na; + struct sockaddr_nl nladdr; + int r, buflen; + char *buf; + + struct msgtemplate msg; + + msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); + msg.n.nlmsg_type = nlmsg_type; + msg.n.nlmsg_flags = NLM_F_REQUEST; + msg.n.nlmsg_seq = 0; + msg.n.nlmsg_pid = nlmsg_pid; + msg.g.cmd = genl_cmd; + msg.g.version = 0x1; + na = (struct nlattr *) GENLMSG_DATA(&msg); + na->nla_type = nla_type; + na->nla_len = nla_len + 1 + NLA_HDRLEN; + memcpy(NLA_DATA(na), nla_data, nla_len); + msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); + + buf = (char *) &msg; + buflen = msg.n.nlmsg_len ; + memset(&nladdr, 0, sizeof(nladdr)); + nladdr.nl_family = AF_NETLINK; + while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr, + sizeof(nladdr))) < buflen) { + if (r > 0) { + buf += r; + buflen -= r; + } else if (errno != EAGAIN) + return -1; + } + return 0; +} + + +/* + * Probe the controller in genetlink to find the family id + * for the TASKSTATS family + */ +static int get_family_id(int sd) +{ + struct { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[256]; + } ans; + + int id = 0, rc; + struct nlattr *na; + int rep_len; + + strcpy(name, TASKSTATS_GENL_NAME); + rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, + CTRL_ATTR_FAMILY_NAME, (void *)name, + strlen(TASKSTATS_GENL_NAME)+1); + if (rc < 0) + return 0; /* sendto() failure? */ + + rep_len = recv(sd, &ans, sizeof(ans), 0); + if (ans.n.nlmsg_type == NLMSG_ERROR || + (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) + return 0; + + na = (struct nlattr *) GENLMSG_DATA(&ans); + na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); + if (na->nla_type == CTRL_ATTR_FAMILY_ID) { + id = *(__u16 *) NLA_DATA(na); + } + return id; +} + +#define average_ms(t, c) (t / 1000000ULL / (c ? c : 1)) + +static void print_delayacct(struct taskstats *t) +{ + printf("\n\nCPU %15s%15s%15s%15s%15s\n" + " %15llu%15llu%15llu%15llu%15.3fms\n" + "IO %15s%15s%15s\n" + " %15llu%15llu%15llums\n" + "SWAP %15s%15s%15s\n" + " %15llu%15llu%15llums\n" + "RECLAIM %12s%15s%15s\n" + " %15llu%15llu%15llums\n", + "count", "real total", "virtual total", + "delay total", "delay average", + (unsigned long long)t->cpu_count, + (unsigned long long)t->cpu_run_real_total, + (unsigned long long)t->cpu_run_virtual_total, + (unsigned long long)t->cpu_delay_total, + average_ms((double)t->cpu_delay_total, t->cpu_count), + "count", "delay total", "delay average", + (unsigned long long)t->blkio_count, + (unsigned long long)t->blkio_delay_total, + average_ms(t->blkio_delay_total, t->blkio_count), + "count", "delay total", "delay average", + (unsigned long long)t->swapin_count, + (unsigned long long)t->swapin_delay_total, + average_ms(t->swapin_delay_total, t->swapin_count), + "count", "delay total", "delay average", + (unsigned long long)t->freepages_count, + (unsigned long long)t->freepages_delay_total, + average_ms(t->freepages_delay_total, t->freepages_count)); +} + +static void task_context_switch_counts(struct taskstats *t) +{ + printf("\n\nTask %15s%15s\n" + " %15llu%15llu\n", + "voluntary", "nonvoluntary", + (unsigned long long)t->nvcsw, (unsigned long long)t->nivcsw); +} + +static void print_cgroupstats(struct cgroupstats *c) +{ + printf("sleeping %llu, blocked %llu, running %llu, stopped %llu, " + "uninterruptible %llu\n", (unsigned long long)c->nr_sleeping, + (unsigned long long)c->nr_io_wait, + (unsigned long long)c->nr_running, + (unsigned long long)c->nr_stopped, + (unsigned long long)c->nr_uninterruptible); +} + + +static void print_ioacct(struct taskstats *t) +{ + printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n", + t->ac_comm, + (unsigned long long)t->read_bytes, + (unsigned long long)t->write_bytes, + (unsigned long long)t->cancelled_write_bytes); +} + +int main(int argc, char *argv[]) +{ + int c, rc, rep_len, aggr_len, len2; + int cmd_type = TASKSTATS_CMD_ATTR_UNSPEC; + __u16 id; + __u32 mypid; + + struct nlattr *na; + int nl_sd = -1; + int len = 0; + pid_t tid = 0; + pid_t rtid = 0; + + int fd = 0; + int count = 0; + int write_file = 0; + int maskset = 0; + char *logfile = NULL; + int loop = 0; + int containerset = 0; + char *containerpath = NULL; + int cfd = 0; + int forking = 0; + sigset_t sigset; + + struct msgtemplate msg; + + while (!forking) { + c = getopt(argc, argv, "qdiw:r:m:t:p:vlC:c:"); + if (c < 0) + break; + + switch (c) { + case 'd': + printf("print delayacct stats ON\n"); + print_delays = 1; + break; + case 'i': + printf("printing IO accounting\n"); + print_io_accounting = 1; + break; + case 'q': + printf("printing task/process context switch rates\n"); + print_task_context_switch_counts = 1; + break; + case 'C': + containerset = 1; + containerpath = optarg; + break; + case 'w': + logfile = strdup(optarg); + printf("write to file %s\n", logfile); + write_file = 1; + break; + case 'r': + rcvbufsz = atoi(optarg); + printf("receive buf size %d\n", rcvbufsz); + if (rcvbufsz < 0) + err(1, "Invalid rcv buf size\n"); + break; + case 'm': + strncpy(cpumask, optarg, sizeof(cpumask)); + cpumask[sizeof(cpumask) - 1] = '\0'; + maskset = 1; + printf("cpumask %s maskset %d\n", cpumask, maskset); + break; + case 't': + tid = atoi(optarg); + if (!tid) + err(1, "Invalid tgid\n"); + cmd_type = TASKSTATS_CMD_ATTR_TGID; + break; + case 'p': + tid = atoi(optarg); + if (!tid) + err(1, "Invalid pid\n"); + cmd_type = TASKSTATS_CMD_ATTR_PID; + break; + case 'c': + + /* Block SIGCHLD for sigwait() later */ + if (sigemptyset(&sigset) == -1) + err(1, "Failed to empty sigset"); + if (sigaddset(&sigset, SIGCHLD)) + err(1, "Failed to set sigchld in sigset"); + sigprocmask(SIG_BLOCK, &sigset, NULL); + + /* fork/exec a child */ + tid = fork(); + if (tid < 0) + err(1, "Fork failed\n"); + if (tid == 0) + if (execvp(argv[optind - 1], + &argv[optind - 1]) < 0) + exit(-1); + + /* Set the command type and avoid further processing */ + cmd_type = TASKSTATS_CMD_ATTR_PID; + forking = 1; + break; + case 'v': + printf("debug on\n"); + dbg = 1; + break; + case 'l': + printf("listen forever\n"); + loop = 1; + break; + default: + usage(); + exit(-1); + } + } + + if (write_file) { + fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (fd == -1) { + perror("Cannot open output file\n"); + exit(1); + } + } + + nl_sd = create_nl_socket(NETLINK_GENERIC); + if (nl_sd < 0) + err(1, "error creating Netlink socket\n"); + + + mypid = getpid(); + id = get_family_id(nl_sd); + if (!id) { + fprintf(stderr, "Error getting family id, errno %d\n", errno); + goto err; + } + PRINTF("family id %d\n", id); + + if (maskset) { + rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET, + TASKSTATS_CMD_ATTR_REGISTER_CPUMASK, + &cpumask, strlen(cpumask) + 1); + PRINTF("Sent register cpumask, retval %d\n", rc); + if (rc < 0) { + fprintf(stderr, "error sending register cpumask\n"); + goto err; + } + } + + if (tid && containerset) { + fprintf(stderr, "Select either -t or -C, not both\n"); + goto err; + } + + /* + * If we forked a child, wait for it to exit. Cannot use waitpid() + * as all the delicious data would be reaped as part of the wait + */ + if (tid && forking) { + int sig_received; + sigwait(&sigset, &sig_received); + } + + if (tid) { + rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET, + cmd_type, &tid, sizeof(__u32)); + PRINTF("Sent pid/tgid, retval %d\n", rc); + if (rc < 0) { + fprintf(stderr, "error sending tid/tgid cmd\n"); + goto done; + } + } + + if (containerset) { + cfd = open(containerpath, O_RDONLY); + if (cfd < 0) { + perror("error opening container file"); + goto err; + } + rc = send_cmd(nl_sd, id, mypid, CGROUPSTATS_CMD_GET, + CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32)); + if (rc < 0) { + perror("error sending cgroupstats command"); + goto err; + } + } + if (!maskset && !tid && !containerset) { + usage(); + goto err; + } + + do { + rep_len = recv(nl_sd, &msg, sizeof(msg), 0); + PRINTF("received %d bytes\n", rep_len); + + if (rep_len < 0) { + fprintf(stderr, "nonfatal reply error: errno %d\n", + errno); + continue; + } + if (msg.n.nlmsg_type == NLMSG_ERROR || + !NLMSG_OK((&msg.n), rep_len)) { + struct nlmsgerr *err = NLMSG_DATA(&msg); + fprintf(stderr, "fatal reply error, errno %d\n", + err->error); + goto done; + } + + PRINTF("nlmsghdr size=%zu, nlmsg_len=%d, rep_len=%d\n", + sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len); + + + rep_len = GENLMSG_PAYLOAD(&msg.n); + + na = (struct nlattr *) GENLMSG_DATA(&msg); + len = 0; + while (len < rep_len) { + len += NLA_ALIGN(na->nla_len); + switch (na->nla_type) { + case TASKSTATS_TYPE_AGGR_TGID: + /* Fall through */ + case TASKSTATS_TYPE_AGGR_PID: + aggr_len = NLA_PAYLOAD(na->nla_len); + len2 = 0; + /* For nested attributes, na follows */ + na = (struct nlattr *) NLA_DATA(na); + done = 0; + while (len2 < aggr_len) { + switch (na->nla_type) { + case TASKSTATS_TYPE_PID: + rtid = *(int *) NLA_DATA(na); + if (print_delays) + printf("PID\t%d\n", rtid); + break; + case TASKSTATS_TYPE_TGID: + rtid = *(int *) NLA_DATA(na); + if (print_delays) + printf("TGID\t%d\n", rtid); + break; + case TASKSTATS_TYPE_STATS: + count++; + if (print_delays) + print_delayacct((struct taskstats *) NLA_DATA(na)); + if (print_io_accounting) + print_ioacct((struct taskstats *) NLA_DATA(na)); + if (print_task_context_switch_counts) + task_context_switch_counts((struct taskstats *) NLA_DATA(na)); + if (fd) { + if (write(fd, NLA_DATA(na), na->nla_len) < 0) { + err(1,"write error\n"); + } + } + if (!loop) + goto done; + break; + case TASKSTATS_TYPE_NULL: + break; + default: + fprintf(stderr, "Unknown nested" + " nla_type %d\n", + na->nla_type); + break; + } + len2 += NLA_ALIGN(na->nla_len); + na = (struct nlattr *)((char *)na + + NLA_ALIGN(na->nla_len)); + } + break; + + case CGROUPSTATS_TYPE_CGROUP_STATS: + print_cgroupstats(NLA_DATA(na)); + break; + default: + fprintf(stderr, "Unknown nla_type %d\n", + na->nla_type); + case TASKSTATS_TYPE_NULL: + break; + } + na = (struct nlattr *) (GENLMSG_DATA(&msg) + len); + } + } while (loop); +done: + if (maskset) { + rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET, + TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK, + &cpumask, strlen(cpumask) + 1); + printf("Sent deregister mask, retval %d\n", rc); + if (rc < 0) + err(rc, "error sending deregister cpumask\n"); + } +err: + close(nl_sd); + if (fd) + close(fd); + if (cfd) + close(cfd); + return 0; +} diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 92a8308b96f6..1188bc849ee3 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -106,7 +106,6 @@ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ #define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ -#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 2ec0b0abbfaa..49e6ebac7e73 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -181,11 +181,11 @@ ENDPROC(memcpy_orig) #ifndef CONFIG_UML /* - * memcpy_mcsafe - memory copy with machine check exception handling + * memcpy_mcsafe_unrolled - memory copy with machine check exception handling * Note that we only catch machine checks when reading the source addresses. * Writes to target are posted and don't generate machine checks. */ -ENTRY(memcpy_mcsafe) +ENTRY(memcpy_mcsafe_unrolled) cmpl $8, %edx /* Less than 8 bytes? Go to byte copy loop */ jb .L_no_whole_words @@ -273,7 +273,7 @@ ENTRY(memcpy_mcsafe) .L_done_memcpy_trap: xorq %rax, %rax ret -ENDPROC(memcpy_mcsafe) +ENDPROC(memcpy_mcsafe_unrolled) .section .fixup, "ax" /* Return -EFAULT for any failure */ diff --git a/tools/build/Build b/tools/build/Build index 63a6c34c0c88..76d1a4960973 100644 --- a/tools/build/Build +++ b/tools/build/Build @@ -1 +1,3 @@ +hostprogs := fixdep + fixdep-y := fixdep.o diff --git a/tools/build/Build.include b/tools/build/Build.include index 4d000bc959b4..1dcb95e76f70 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -90,3 +90,9 @@ if_changed = $(if $(strip $(any-prereq) $(arg-check)), \ # - per object C flags # - BUILD_STR macro to allow '-D"$(variable)"' constructs c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj)) +cxx_flags = -Wp,-MD,$(depfile),-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXXFLAGS_$(basetarget).o) $(CXXFLAGS_$(obj)) + +### +## HOSTCC C flags + +host_c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CHOSTFLAGS) -D"BUILD_STR(s)=\#s" $(CHOSTFLAGS_$(basetarget).o) $(CHOSTFLAGS_$(obj)) diff --git a/tools/build/Makefile b/tools/build/Makefile index 0d5a0e3a8fa9..8332959fbca4 100644 --- a/tools/build/Makefile +++ b/tools/build/Makefile @@ -14,6 +14,12 @@ endef $(call allow-override,CC,$(CROSS_COMPILE)gcc) $(call allow-override,LD,$(CROSS_COMPILE)ld) +HOSTCC ?= gcc +HOSTLD ?= ld +HOSTAR ?= ar + +export HOSTCC HOSTLD HOSTAR + ifeq ($(V),1) Q = else @@ -36,7 +42,7 @@ $(OUTPUT)fixdep-in.o: FORCE $(Q)$(MAKE) $(build)=fixdep $(OUTPUT)fixdep: $(OUTPUT)fixdep-in.o - $(QUIET_LINK)$(CC) $(LDFLAGS) -o $@ $< + $(QUIET_LINK)$(HOSTCC) $(LDFLAGS) -o $@ $< FORCE: diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 27f3583193e6..99c0ccd2f176 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -58,6 +58,12 @@ quiet_cmd_mkdir = MKDIR $(dir $@) quiet_cmd_cc_o_c = CC $@ cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< +quiet_cmd_host_cc_o_c = HOSTCC $@ + cmd_host_cc_o_c = $(HOSTCC) $(host_c_flags) -c -o $@ $< + +quiet_cmd_cxx_o_c = CXX $@ + cmd_cxx_o_c = $(CXX) $(cxx_flags) -c -o $@ $< + quiet_cmd_cpp_i_c = CPP $@ cmd_cpp_i_c = $(CC) $(c_flags) -E -o $@ $< @@ -70,16 +76,28 @@ quiet_cmd_gen = GEN $@ # If there's nothing to link, create empty $@ object. quiet_cmd_ld_multi = LD $@ cmd_ld_multi = $(if $(strip $(obj-y)),\ - $(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@) + $(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@) + +quiet_cmd_host_ld_multi = HOSTLD $@ + cmd_host_ld_multi = $(if $(strip $(obj-y)),\ + $(HOSTLD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(HOSTAR) rcs $@) + +ifneq ($(filter $(obj),$(hostprogs)),) + host = host_ +endif # Build rules $(OUTPUT)%.o: %.c FORCE $(call rule_mkdir) - $(call if_changed_dep,cc_o_c) + $(call if_changed_dep,$(host)cc_o_c) + +$(OUTPUT)%.o: %.cpp FORCE + $(call rule_mkdir) + $(call if_changed_dep,cxx_o_c) $(OUTPUT)%.o: %.S FORCE $(call rule_mkdir) - $(call if_changed_dep,cc_o_c) + $(call if_changed_dep,$(host)cc_o_c) $(OUTPUT)%.i: %.c FORCE $(call rule_mkdir) @@ -119,7 +137,7 @@ $(sort $(subdir-obj-y)): $(subdir-y) ; $(in-target): $(obj-y) FORCE $(call rule_mkdir) - $(call if_changed,ld_multi) + $(call if_changed,$(host)ld_multi) __build: $(in-target) @: diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index a120c6b755a9..ae52e029dd22 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -7,7 +7,7 @@ endif feature_check = $(eval $(feature_check_code)) define feature_check_code - feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) + feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) endef feature_set = $(eval $(feature_set_code)) diff --git a/tools/build/Makefile.include b/tools/build/Makefile.include index be630bed66d2..ad22e4e7bc59 100644 --- a/tools/build/Makefile.include +++ b/tools/build/Makefile.include @@ -1,10 +1,6 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj -ifdef CROSS_COMPILE -fixdep: -else fixdep: $(Q)$(MAKE) -C $(srctree)/tools/build CFLAGS= LDFLAGS= $(OUTPUT)fixdep -endif .PHONY: fixdep diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index a0b29a311816..ac9c477a2a48 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -46,11 +46,13 @@ FILES= \ test-lzma.bin \ test-bpf.bin \ test-get_cpuid.bin \ - test-sdt.bin + test-sdt.bin \ + test-cxx.bin FILES := $(addprefix $(OUTPUT),$(FILES)) CC := $(CROSS_COMPILE)gcc -MD +CXX := $(CROSS_COMPILE)g++ -MD PKG_CONFIG := $(CROSS_COMPILE)pkg-config all: $(FILES) @@ -58,6 +60,9 @@ all: $(FILES) __BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS) BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1 +__BUILDXX = $(CXX) $(CXXFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS) + BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1 + ############################### $(OUTPUT)test-all.bin: @@ -217,6 +222,9 @@ $(OUTPUT)test-bpf.bin: $(OUTPUT)test-sdt.bin: $(BUILD) +$(OUTPUT)test-cxx.bin: + $(BUILDXX) -std=gnu++11 + -include $(OUTPUT)*.d ############################### diff --git a/tools/build/feature/test-cxx.cpp b/tools/build/feature/test-cxx.cpp new file mode 100644 index 000000000000..b1dee9a31d6c --- /dev/null +++ b/tools/build/feature/test-cxx.cpp @@ -0,0 +1,15 @@ +#include <iostream> +#include <memory> + +static void print_str(std::string s) +{ + std::cout << s << std::endl; +} + +int main() +{ + std::string s("Hello World!"); + print_str(std::move(s)); + std::cout << "|" << s << "|" << std::endl; + return 0; +} diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 0d9f48ec42bb..bc7adb84e679 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -1433,7 +1433,7 @@ int main(int argc, char *argv[]) openlog("KVP", 0, LOG_USER); syslog(LOG_INFO, "KVP starting; pid is:%d", getpid()); - kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR); + kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR | O_CLOEXEC); if (kvp_fd < 0) { syslog(LOG_ERR, "open /dev/vmbus/hv_kvp failed; error: %d %s", diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c index 5d51d6ff08e6..e0829809c897 100644 --- a/tools/hv/hv_vss_daemon.c +++ b/tools/hv/hv_vss_daemon.c @@ -250,6 +250,9 @@ int main(int argc, char *argv[]) syslog(LOG_ERR, "/etc/fstab and /proc/mounts"); } break; + case VSS_OP_HOT_BACKUP: + syslog(LOG_INFO, "VSS: op=CHECK HOT BACKUP\n"); + break; default: syslog(LOG_ERR, "Illegal op:%d\n", op); } diff --git a/tools/iio/iio_utils.c b/tools/iio/iio_utils.c index 5eb6793f3972..7a6d61c6c012 100644 --- a/tools/iio/iio_utils.c +++ b/tools/iio/iio_utils.c @@ -121,10 +121,6 @@ int iioutils_get_type(unsigned *is_signed, unsigned *bytes, unsigned *bits_used, ret = -ENOENT; while (ent = readdir(dp), ent) - /* - * Do we allow devices to override a generic name with - * a specific one? - */ if ((strcmp(builtname, ent->d_name) == 0) || (strcmp(builtname_generic, ent->d_name) == 0)) { ret = asprintf(&filename, @@ -178,6 +174,13 @@ int iioutils_get_type(unsigned *is_signed, unsigned *bytes, unsigned *bits_used, sysfsfp = 0; free(filename); filename = 0; + + /* + * Avoid having a more generic entry overwriting + * the settings. + */ + if (strcmp(builtname, ent->d_name) == 0) + break; } error_close_sysfsfp: diff --git a/tools/iio/lsiio.c b/tools/iio/lsiio.c index 3d650e668252..ab0f5cf16025 100644 --- a/tools/iio/lsiio.c +++ b/tools/iio/lsiio.c @@ -51,7 +51,8 @@ static int dump_channels(const char *dev_dir_name) while (ent = readdir(dp), ent) if (check_prefix(ent->d_name, "in_") && - check_postfix(ent->d_name, "_raw")) + (check_postfix(ent->d_name, "_raw") || + check_postfix(ent->d_name, "_input"))) printf(" %-10s\n", ent->d_name); return (closedir(dp) == -1) ? -errno : 0; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index da218fec6056..9e5fc168c8a3 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -339,7 +339,7 @@ enum bpf_func_id { BPF_FUNC_skb_change_type, /** - * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb + * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb * @skb: pointer to skb * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type * @index: index of the cgroup in the bpf_map @@ -348,7 +348,7 @@ enum bpf_func_id { * == 1 skb succeeded the cgroup2 descendant test * < 0 error */ - BPF_FUNC_skb_in_cgroup, + BPF_FUNC_skb_under_cgroup, /** * bpf_get_hash_recalc(skb) diff --git a/tools/laptop/dslm/.gitignore b/tools/laptop/dslm/.gitignore new file mode 100644 index 000000000000..9fc984e64386 --- /dev/null +++ b/tools/laptop/dslm/.gitignore @@ -0,0 +1 @@ +dslm diff --git a/tools/laptop/dslm/Makefile b/tools/laptop/dslm/Makefile new file mode 100644 index 000000000000..ff613b31730b --- /dev/null +++ b/tools/laptop/dslm/Makefile @@ -0,0 +1,9 @@ +CC := $(CROSS_COMPILE)gcc +CFLAGS := -I../../usr/include + +PROGS := dslm + +all: $(PROGS) + +clean: + rm -fr $(PROGS) diff --git a/tools/laptop/dslm/dslm.c b/tools/laptop/dslm/dslm.c new file mode 100644 index 000000000000..d5dd2d4b04d8 --- /dev/null +++ b/tools/laptop/dslm/dslm.c @@ -0,0 +1,166 @@ +/* + * dslm.c + * Simple Disk Sleep Monitor + * by Bartek Kania + * Licensed under the GPL + */ +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> +#include <time.h> +#include <string.h> +#include <signal.h> +#include <sys/ioctl.h> +#include <linux/hdreg.h> + +#ifdef DEBUG +#define D(x) x +#else +#define D(x) +#endif + +int endit = 0; + +/* Check if the disk is in powersave-mode + * Most of the code is stolen from hdparm. + * 1 = active, 0 = standby/sleep, -1 = unknown */ +static int check_powermode(int fd) +{ + unsigned char args[4] = {WIN_CHECKPOWERMODE1,0,0,0}; + int state; + + if (ioctl(fd, HDIO_DRIVE_CMD, &args) + && (args[0] = WIN_CHECKPOWERMODE2) /* try again with 0x98 */ + && ioctl(fd, HDIO_DRIVE_CMD, &args)) { + if (errno != EIO || args[0] != 0 || args[1] != 0) { + state = -1; /* "unknown"; */ + } else + state = 0; /* "sleeping"; */ + } else { + state = (args[2] == 255) ? 1 : 0; + } + D(printf(" drive state is: %d\n", state)); + + return state; +} + +static char *state_name(int i) +{ + if (i == -1) return "unknown"; + if (i == 0) return "sleeping"; + if (i == 1) return "active"; + + return "internal error"; +} + +static char *myctime(time_t time) +{ + char *ts = ctime(&time); + ts[strlen(ts) - 1] = 0; + + return ts; +} + +static void measure(int fd) +{ + time_t start_time; + int last_state; + time_t last_time; + int curr_state; + time_t curr_time = 0; + time_t time_diff; + time_t active_time = 0; + time_t sleep_time = 0; + time_t unknown_time = 0; + time_t total_time = 0; + int changes = 0; + float tmp; + + printf("Starting measurements\n"); + + last_state = check_powermode(fd); + start_time = last_time = time(0); + printf(" System is in state %s\n\n", state_name(last_state)); + + while(!endit) { + sleep(1); + curr_state = check_powermode(fd); + + if (curr_state != last_state || endit) { + changes++; + curr_time = time(0); + time_diff = curr_time - last_time; + + if (last_state == 1) active_time += time_diff; + else if (last_state == 0) sleep_time += time_diff; + else unknown_time += time_diff; + + last_state = curr_state; + last_time = curr_time; + + printf("%s: State-change to %s\n", myctime(curr_time), + state_name(curr_state)); + } + } + changes--; /* Compensate for SIGINT */ + + total_time = time(0) - start_time; + printf("\nTotal running time: %lus\n", curr_time - start_time); + printf(" State changed %d times\n", changes); + + tmp = (float)sleep_time / (float)total_time * 100; + printf(" Time in sleep state: %lus (%.2f%%)\n", sleep_time, tmp); + tmp = (float)active_time / (float)total_time * 100; + printf(" Time in active state: %lus (%.2f%%)\n", active_time, tmp); + tmp = (float)unknown_time / (float)total_time * 100; + printf(" Time in unknown state: %lus (%.2f%%)\n", unknown_time, tmp); +} + +static void ender(int s) +{ + endit = 1; +} + +static void usage(void) +{ + puts("usage: dslm [-w <time>] <disk>"); + exit(0); +} + +int main(int argc, char **argv) +{ + int fd; + char *disk = 0; + int settle_time = 60; + + /* Parse the simple command-line */ + if (argc == 2) + disk = argv[1]; + else if (argc == 4) { + settle_time = atoi(argv[2]); + disk = argv[3]; + } else + usage(); + + if (!(fd = open(disk, O_RDONLY|O_NONBLOCK))) { + printf("Can't open %s, because: %s\n", disk, strerror(errno)); + exit(-1); + } + + if (settle_time) { + printf("Waiting %d seconds for the system to settle down to " + "'normal'\n", settle_time); + sleep(settle_time); + } else + puts("Not waiting for system to settle down"); + + signal(SIGINT, ender); + + measure(fd); + + close(fd); + + return 0; +} diff --git a/tools/lib/subcmd/pager.c b/tools/lib/subcmd/pager.c index d50f3b58606b..6518bea926d6 100644 --- a/tools/lib/subcmd/pager.c +++ b/tools/lib/subcmd/pager.c @@ -3,6 +3,7 @@ #include <stdio.h> #include <string.h> #include <signal.h> +#include <sys/ioctl.h> #include "pager.h" #include "run-command.h" #include "sigchain.h" @@ -14,6 +15,7 @@ */ static int spawned_pager; +static int pager_columns; void pager_init(const char *pager_env) { @@ -58,9 +60,12 @@ static void wait_for_pager_signal(int signo) void setup_pager(void) { const char *pager = getenv(subcmd_config.pager_env); + struct winsize sz; if (!isatty(1)) return; + if (ioctl(1, TIOCGWINSZ, &sz) == 0) + pager_columns = sz.ws_col; if (!pager) pager = getenv("PAGER"); if (!(pager || access("/usr/bin/pager", X_OK))) @@ -98,3 +103,14 @@ int pager_in_use(void) { return spawned_pager; } + +int pager_get_columns(void) +{ + char *s; + + s = getenv("COLUMNS"); + if (s) + return atoi(s); + + return (pager_columns ? pager_columns : 80) - 2; +} diff --git a/tools/lib/subcmd/pager.h b/tools/lib/subcmd/pager.h index 8b83714ecf73..623f5542d05d 100644 --- a/tools/lib/subcmd/pager.h +++ b/tools/lib/subcmd/pager.h @@ -5,5 +5,6 @@ extern void pager_init(const char *pager_env); extern void setup_pager(void); extern int pager_in_use(void); +extern int pager_get_columns(void); #endif /* __SUBCMD_PAGER_H */ diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c index 3bcada3ae05a..65984f1c2974 100644 --- a/tools/lib/traceevent/kbuffer-parse.c +++ b/tools/lib/traceevent/kbuffer-parse.c @@ -622,6 +622,7 @@ void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset, /* Reset the buffer */ kbuffer_load_subbuffer(kbuf, kbuf->subbuffer); + data = kbuffer_read_event(kbuf, ts); while (kbuf->curr < offset) { data = kbuffer_next_event(kbuf, ts); diff --git a/tools/pcmcia/.gitignore b/tools/pcmcia/.gitignore new file mode 100644 index 000000000000..53d081336757 --- /dev/null +++ b/tools/pcmcia/.gitignore @@ -0,0 +1 @@ +crc32hash diff --git a/tools/pcmcia/Makefile b/tools/pcmcia/Makefile new file mode 100644 index 000000000000..81a7498c5cd9 --- /dev/null +++ b/tools/pcmcia/Makefile @@ -0,0 +1,9 @@ +CC := $(CROSS_COMPILE)gcc +CFLAGS := -I../../usr/include + +PROGS := crc32hash + +all: $(PROGS) + +clean: + rm -fr $(PROGS) diff --git a/tools/pcmcia/crc32hash.c b/tools/pcmcia/crc32hash.c new file mode 100644 index 000000000000..44f8beea7260 --- /dev/null +++ b/tools/pcmcia/crc32hash.c @@ -0,0 +1,32 @@ +/* crc32hash.c - derived from linux/lib/crc32.c, GNU GPL v2 */ +/* Usage example: +$ ./crc32hash "Dual Speed" +*/ + +#include <string.h> +#include <stdio.h> +#include <ctype.h> +#include <stdlib.h> + +static unsigned int crc32(unsigned char const *p, unsigned int len) +{ + int i; + unsigned int crc = 0; + while (len--) { + crc ^= *p++; + for (i = 0; i < 8; i++) + crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0); + } + return crc; +} + +int main(int argc, char **argv) { + unsigned int result; + if (argc != 2) { + printf("no string passed as argument\n"); + return -1; + } + result = crc32((unsigned char const *)argv[1], strlen(argv[1])); + printf("0x%x\n", result); + return 0; +} diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index a126e97a8114..41857cce5e86 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -8,13 +8,23 @@ perf-list - List all symbolic event types SYNOPSIS -------- [verse] -'perf list' [hw|sw|cache|tracepoint|pmu|event_glob] +'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|event_glob] DESCRIPTION ----------- This command displays the symbolic event types which can be selected in the various perf commands with the -e option. +OPTIONS +------- +--no-desc:: +Don't print descriptions. + +-v:: +--long-desc:: +Print longer event descriptions. + + [[EVENT_MODIFIERS]] EVENT MODIFIERS --------------- diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt index 5950b5a24efd..8a6479c0eac9 100644 --- a/tools/perf/Documentation/tips.txt +++ b/tools/perf/Documentation/tips.txt @@ -28,3 +28,7 @@ To change sampling frequency to 100 Hz: perf record -F 100 See assembly instructions with percentage: perf annotate <symbol> If you prefer Intel style assembly, try: perf annotate -M intel For hierarchical output, try: perf report --hierarchy +Order by the overhead of source file name and line number: perf report -s srcline +System-wide collection from all CPUs: perf record -a +Show current config key-value pairs: perf config --list +Show user configuration overrides: perf config --user --list diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index d710db16b963..982d6439bb07 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -144,6 +144,10 @@ $(call allow-override,LD,$(CROSS_COMPILE)ld) LD += $(EXTRA_LDFLAGS) +HOSTCC ?= gcc +HOSTLD ?= ld +HOSTAR ?= ar + PKG_CONFIG = $(CROSS_COMPILE)pkg-config RM = rm -f @@ -345,8 +349,18 @@ strip: $(PROGRAMS) $(OUTPUT)perf PERF_IN := $(OUTPUT)perf-in.o export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK +export HOSTCC HOSTLD HOSTAR include $(srctree)/tools/build/Makefile.include +JEVENTS := $(OUTPUT)pmu-events/jevents +JEVENTS_IN := $(OUTPUT)pmu-events/jevents-in.o + +PMU_EVENTS_IN := $(OUTPUT)pmu-events/pmu-events-in.o + +export JEVENTS + +build := -f $(srctree)/tools/build/Makefile.build dir=. obj + $(PERF_IN): prepare FORCE @(test -f ../../include/uapi/linux/perf_event.h && ( \ (diff -B ../include/uapi/linux/perf_event.h ../../include/uapi/linux/perf_event.h >/dev/null) \ @@ -443,9 +457,18 @@ $(PERF_IN): prepare FORCE || echo "Warning: tools/include/uapi/linux/mman.h differs from kernel" >&2 )) || true $(Q)$(MAKE) $(build)=perf -$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) +$(JEVENTS_IN): FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=jevents + +$(JEVENTS): $(JEVENTS_IN) + $(QUIET_LINK)$(HOSTCC) $(JEVENTS_IN) -o $@ + +$(PMU_EVENTS_IN): $(JEVENTS) FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events + +$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \ - $(PERF_IN) $(LIBS) -o $@ + $(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@ $(GTK_IN): fixdep FORCE $(Q)$(MAKE) $(build)=gtk @@ -474,6 +497,8 @@ perf.spec $(SCRIPTS) \ ifneq ($(OUTPUT),) %.o: $(OUTPUT)%.o @echo " # Redirected target $@ => $(OUTPUT)$@" +pmu-events/%.o: $(OUTPUT)pmu-events/%.o + @echo " # Redirected target $@ => $(OUTPUT)$@" util/%.o: $(OUTPUT)util/%.o @echo " # Redirected target $@ => $(OUTPUT)$@" bench/%.o: $(OUTPUT)bench/%.o @@ -729,10 +754,11 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected - $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 + $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \ $(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \ - $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c + $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ + $(OUTPUT)pmu-events/pmu-events.c $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index f8ccee132867..9aaa6f5a9347 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -32,3 +32,14 @@ get_cpuid(char *buffer, size_t sz) } return -1; } + +char * +get_cpuid_str(void) +{ + char *bufp; + + if (asprintf(&bufp, "%.8lx", mfspr(SPRN_PVR)) < 0) + bufp = NULL; + + return bufp; +} diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index ed9d5d15d5b6..1030a6e504bb 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -82,7 +82,8 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev, * * In addition, we shouldn't specify an offset for kretprobes. */ - if (pev->point.offset || pev->point.retprobe || !map || !sym) + if (pev->point.offset || (!pev->uprobes && pev->point.retprobe) || + !map || !sym) return; lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym); diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index 146d12a1cec0..a74a48db26f5 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -19,8 +19,8 @@ cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c, : "a" (op)); } -int -get_cpuid(char *buffer, size_t sz) +static int +__get_cpuid(char *buffer, size_t sz, const char *fmt) { unsigned int a, b, c, d, lvl; int family = -1, model = -1, step = -1; @@ -48,7 +48,7 @@ get_cpuid(char *buffer, size_t sz) if (family >= 0x6) model += ((a >> 16) & 0xf) << 4; } - nb = scnprintf(buffer, sz, "%s,%u,%u,%u$", vendor, family, model, step); + nb = scnprintf(buffer, sz, fmt, vendor, family, model, step); /* look for end marker to ensure the entire data fit */ if (strchr(buffer, '$')) { @@ -57,3 +57,21 @@ get_cpuid(char *buffer, size_t sz) } return -1; } + +int +get_cpuid(char *buffer, size_t sz) +{ + return __get_cpuid(buffer, sz, "%s,%u,%u,%u$"); +} + +char * +get_cpuid_str(void) +{ + char *buf = malloc(128); + + if (__get_cpuid(buf, 128, "%s-%u-%X$") < 0) { + free(buf); + return NULL; + } + return buf; +} diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 88ee419e5189..ba9322ff858b 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -16,16 +16,23 @@ #include "util/pmu.h" #include <subcmd/parse-options.h> +static bool desc_flag = true; + int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) { int i; bool raw_dump = false; + bool long_desc_flag = false; struct option list_options[] = { OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"), + OPT_BOOLEAN('d', "desc", &desc_flag, + "Print extra event descriptions. --no-desc to not print."), + OPT_BOOLEAN('v', "long-desc", &long_desc_flag, + "Print longer event descriptions."), OPT_END() }; const char * const list_usage[] = { - "perf list [hw|sw|cache|tracepoint|pmu|sdt|event_glob]", + "perf list [<options>] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]", NULL }; @@ -40,7 +47,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) printf("\nList of pre-defined events (to be used in -e):\n\n"); if (argc == 0) { - print_events(NULL, raw_dump); + print_events(NULL, raw_dump, !desc_flag, long_desc_flag); return 0; } @@ -61,14 +68,16 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) strcmp(argv[i], "hwcache") == 0) print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], "pmu") == 0) - print_pmu_events(NULL, raw_dump); + print_pmu_events(NULL, raw_dump, !desc_flag, + long_desc_flag); else if (strcmp(argv[i], "sdt") == 0) print_sdt_events(NULL, NULL, raw_dump); else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; if (sep == NULL) { - print_events(argv[i], raw_dump); + print_events(argv[i], raw_dump, !desc_flag, + long_desc_flag); continue; } sep_idx = sep - argv[i]; @@ -90,7 +99,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) print_symbol_events(s, PERF_TYPE_SOFTWARE, event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump); print_hwcache_events(s, raw_dump); - print_pmu_events(s, raw_dump); + print_pmu_events(s, raw_dump, !desc_flag, + long_desc_flag); print_tracepoint_events(NULL, s, raw_dump); print_sdt_events(NULL, s, raw_dump); free(s); diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build new file mode 100644 index 000000000000..9213a1273697 --- /dev/null +++ b/tools/perf/pmu-events/Build @@ -0,0 +1,13 @@ +hostprogs := jevents + +jevents-y += json.o jsmn.o jevents.o +pmu-events-y += pmu-events.o +JDIR = pmu-events/arch/$(ARCH) +JSON = $(shell [ -d $(JDIR) ] && \ + find $(JDIR) -name '*.json' -o -name 'mapfile.csv') +# +# Locate/process JSON files in pmu-events/arch/ +# directory and create tables in pmu-events.c. +# +$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JEVENTS) + $(Q)$(call echo-cmd,gen)$(JEVENTS) $(ARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V) diff --git a/tools/perf/pmu-events/README b/tools/perf/pmu-events/README new file mode 100644 index 000000000000..1408ade0d773 --- /dev/null +++ b/tools/perf/pmu-events/README @@ -0,0 +1,147 @@ + +The contents of this directory allow users to specify PMU events in their +CPUs by their symbolic names rather than raw event codes (see example below). + +The main program in this directory, is the 'jevents', which is built and +executed _BEFORE_ the perf binary itself is built. + +The 'jevents' program tries to locate and process JSON files in the directory +tree tools/perf/pmu-events/arch/foo. + + - Regular files with '.json' extension in the name are assumed to be + JSON files, each of which describes a set of PMU events. + + - Regular files with basename starting with 'mapfile.csv' are assumed + to be a CSV file that maps a specific CPU to its set of PMU events. + (see below for mapfile format) + + - Directories are traversed, but all other files are ignored. + +The PMU events supported by a CPU model are expected to grouped into topics +such as Pipelining, Cache, Memory, Floating-point etc. All events for a topic +should be placed in a separate JSON file - where the file name identifies +the topic. Eg: "Floating-point.json". + +All the topic JSON files for a CPU model/family should be in a separate +sub directory. Thus for the Silvermont X86 CPU: + + $ ls tools/perf/pmu-events/arch/x86/Silvermont_core + Cache.json Memory.json Virtual-Memory.json + Frontend.json Pipeline.json + +Using the JSON files and the mapfile, 'jevents' generates the C source file, +'pmu-events.c', which encodes the two sets of tables: + + - Set of 'PMU events tables' for all known CPUs in the architecture, + (one table like the following, per JSON file; table name 'pme_power8' + is derived from JSON file name, 'power8.json'). + + struct pmu_event pme_power8[] = { + + ... + + { + .name = "pm_1plus_ppc_cmpl", + .event = "event=0x100f2", + .desc = "1 or more ppc insts finished,", + }, + + ... + } + + - A 'mapping table' that maps each CPU of the architecture, to its + 'PMU events table' + + struct pmu_events_map pmu_events_map[] = { + { + .cpuid = "004b0000", + .version = "1", + .type = "core", + .table = pme_power8 + }, + ... + + }; + +After the 'pmu-events.c' is generated, it is compiled and the resulting +'pmu-events.o' is added to 'libperf.a' which is then used to build perf. + +NOTES: + 1. Several CPUs can support same set of events and hence use a common + JSON file. Hence several entries in the pmu_events_map[] could map + to a single 'PMU events table'. + + 2. The 'pmu-events.h' has an extern declaration for the mapping table + and the generated 'pmu-events.c' defines this table. + + 3. _All_ known CPU tables for architecture are included in the perf + binary. + +At run time, perf determines the actual CPU it is running on, finds the +matching events table and builds aliases for those events. This allows +users to specify events by their name: + + $ perf stat -e pm_1plus_ppc_cmpl sleep 1 + +where 'pm_1plus_ppc_cmpl' is a Power8 PMU event. + +In case of errors when processing files in the tools/perf/pmu-events/arch +directory, 'jevents' tries to create an empty mapping file to allow the perf +build to succeed even if the PMU event aliases cannot be used. + +However some errors in processing may cause the perf build to fail. + +Mapfile format +=============== + +The mapfile enables multiple CPU models to share a single set of PMU events. +It is required even if such mapping is 1:1. + +The mapfile.csv format is expected to be: + + Header line + CPUID,Version,Dir/path/name,Type + +where: + + Comma: + is the required field delimiter (i.e other fields cannot + have commas within them). + + Comments: + Lines in which the first character is either '\n' or '#' + are ignored. + + Header line + The header line is the first line in the file, which is + always _IGNORED_. It can empty. + + CPUID: + CPUID is an arch-specific char string, that can be used + to identify CPU (and associate it with a set of PMU events + it supports). Multiple CPUIDS can point to the same + File/path/name.json. + + Example: + CPUID == 'GenuineIntel-6-2E' (on x86). + CPUID == '004b0100' (PVR value in Powerpc) + Version: + is the Version of the mapfile. + + Dir/path/name: + is the pathname to the directory containing the CPU's JSON + files, relative to the directory containing the mapfile.csv + + Type: + indicates whether the events or "core" or "uncore" events. + + + Eg: + + $ grep Silvermont tools/perf/pmu-events/arch/x86/mapfile.csv + GenuineIntel-6-37,V13,Silvermont_core,core + GenuineIntel-6-4D,V13,Silvermont_core,core + GenuineIntel-6-4C,V13,Silvermont_core,core + + i.e the three CPU models use the JSON files (i.e PMU events) listed + in the directory 'tools/perf/pmu-events/arch/x86/Silvermont_core'. diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c new file mode 100644 index 000000000000..41611d7f9873 --- /dev/null +++ b/tools/perf/pmu-events/jevents.c @@ -0,0 +1,814 @@ +#define _XOPEN_SOURCE 500 /* needed for nftw() */ +#define _GNU_SOURCE /* needed for asprintf() */ + +/* Parse event JSON files */ + +/* + * Copyright (c) 2014, Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <ctype.h> +#include <unistd.h> +#include <stdarg.h> +#include <libgen.h> +#include <dirent.h> +#include <sys/time.h> /* getrlimit */ +#include <sys/resource.h> /* getrlimit */ +#include <ftw.h> +#include <sys/stat.h> +#include "jsmn.h" +#include "json.h" +#include "jevents.h" + +#ifndef __maybe_unused +#define __maybe_unused __attribute__((unused)) +#endif + +int verbose; +char *prog; + +int eprintf(int level, int var, const char *fmt, ...) +{ + + int ret; + va_list args; + + if (var < level) + return 0; + + va_start(args, fmt); + + ret = vfprintf(stderr, fmt, args); + + va_end(args); + + return ret; +} + +__attribute__((weak)) char *get_cpu_str(void) +{ + return NULL; +} + +static void addfield(char *map, char **dst, const char *sep, + const char *a, jsmntok_t *bt) +{ + unsigned int len = strlen(a) + 1 + strlen(sep); + int olen = *dst ? strlen(*dst) : 0; + int blen = bt ? json_len(bt) : 0; + char *out; + + out = realloc(*dst, len + olen + blen); + if (!out) { + /* Don't add field in this case */ + return; + } + *dst = out; + + if (!olen) + *(*dst) = 0; + else + strcat(*dst, sep); + strcat(*dst, a); + if (bt) + strncat(*dst, map + bt->start, blen); +} + +static void fixname(char *s) +{ + for (; *s; s++) + *s = tolower(*s); +} + +static void fixdesc(char *s) +{ + char *e = s + strlen(s); + + /* Remove trailing dots that look ugly in perf list */ + --e; + while (e >= s && isspace(*e)) + --e; + if (*e == '.') + *e = 0; +} + +static struct msrmap { + const char *num; + const char *pname; +} msrmap[] = { + { "0x3F6", "ldlat=" }, + { "0x1A6", "offcore_rsp=" }, + { "0x1A7", "offcore_rsp=" }, + { "0x3F7", "frontend=" }, + { NULL, NULL } +}; + +static struct field { + const char *field; + const char *kernel; +} fields[] = { + { "EventCode", "event=" }, + { "UMask", "umask=" }, + { "CounterMask", "cmask=" }, + { "Invert", "inv=" }, + { "AnyThread", "any=" }, + { "EdgeDetect", "edge=" }, + { "SampleAfterValue", "period=" }, + { NULL, NULL } +}; + +static void cut_comma(char *map, jsmntok_t *newval) +{ + int i; + + /* Cut off everything after comma */ + for (i = newval->start; i < newval->end; i++) { + if (map[i] == ',') + newval->end = i; + } +} + +static int match_field(char *map, jsmntok_t *field, int nz, + char **event, jsmntok_t *val) +{ + struct field *f; + jsmntok_t newval = *val; + + for (f = fields; f->field; f++) + if (json_streq(map, field, f->field) && nz) { + cut_comma(map, &newval); + addfield(map, event, ",", f->kernel, &newval); + return 1; + } + return 0; +} + +static struct msrmap *lookup_msr(char *map, jsmntok_t *val) +{ + jsmntok_t newval = *val; + static bool warned; + int i; + + cut_comma(map, &newval); + for (i = 0; msrmap[i].num; i++) + if (json_streq(map, &newval, msrmap[i].num)) + return &msrmap[i]; + if (!warned) { + warned = true; + pr_err("%s: Unknown MSR in event file %.*s\n", prog, + json_len(val), map + val->start); + } + return NULL; +} + +#define EXPECT(e, t, m) do { if (!(e)) { \ + jsmntok_t *loc = (t); \ + if (!(t)->start && (t) > tokens) \ + loc = (t) - 1; \ + pr_err("%s:%d: " m ", got %s\n", fn, \ + json_line(map, loc), \ + json_name(t)); \ + goto out_free; \ +} } while (0) + +#define TOPIC_DEPTH 256 +static char *topic_array[TOPIC_DEPTH]; +static int topic_level; + +static char *get_topic(void) +{ + char *tp_old, *tp = NULL; + int i; + + for (i = 0; i < topic_level + 1; i++) { + int n; + + tp_old = tp; + n = asprintf(&tp, "%s%s", tp ?: "", topic_array[i]); + if (n < 0) { + pr_info("%s: asprintf() error %s\n", prog); + return NULL; + } + free(tp_old); + } + + for (i = 0; i < (int) strlen(tp); i++) { + char c = tp[i]; + + if (c == '-') + tp[i] = ' '; + else if (c == '.') { + tp[i] = '\0'; + break; + } + } + + return tp; +} + +static int add_topic(int level, char *bname) +{ + char *topic; + + level -= 2; + + if (level >= TOPIC_DEPTH) + return -EINVAL; + + topic = strdup(bname); + if (!topic) { + pr_info("%s: strdup() error %s for file %s\n", prog, + strerror(errno), bname); + return -ENOMEM; + } + + free(topic_array[topic_level]); + topic_array[topic_level] = topic; + topic_level = level; + return 0; +} + +struct perf_entry_data { + FILE *outfp; + char *topic; +}; + +static int close_table; + +static void print_events_table_prefix(FILE *fp, const char *tblname) +{ + fprintf(fp, "struct pmu_event %s[] = {\n", tblname); + close_table = 1; +} + +static int print_events_table_entry(void *data, char *name, char *event, + char *desc, char *long_desc) +{ + struct perf_entry_data *pd = data; + FILE *outfp = pd->outfp; + char *topic = pd->topic; + + /* + * TODO: Remove formatting chars after debugging to reduce + * string lengths. + */ + fprintf(outfp, "{\n"); + + fprintf(outfp, "\t.name = \"%s\",\n", name); + fprintf(outfp, "\t.event = \"%s\",\n", event); + fprintf(outfp, "\t.desc = \"%s\",\n", desc); + fprintf(outfp, "\t.topic = \"%s\",\n", topic); + if (long_desc && long_desc[0]) + fprintf(outfp, "\t.long_desc = \"%s\",\n", long_desc); + + fprintf(outfp, "},\n"); + + return 0; +} + +static void print_events_table_suffix(FILE *outfp) +{ + fprintf(outfp, "{\n"); + + fprintf(outfp, "\t.name = 0,\n"); + fprintf(outfp, "\t.event = 0,\n"); + fprintf(outfp, "\t.desc = 0,\n"); + + fprintf(outfp, "},\n"); + fprintf(outfp, "};\n"); + close_table = 0; +} + +static struct fixed { + const char *name; + const char *event; +} fixed[] = { + { "inst_retired.any", "event=0xc0" }, + { "inst_retired.any_p", "event=0xc0" }, + { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03" }, + { "cpu_clk_unhalted.thread", "event=0x3c" }, + { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" }, + { NULL, NULL}, +}; + +/* + * Handle different fixed counter encodings between JSON and perf. + */ +static char *real_event(const char *name, char *event) +{ + int i; + + for (i = 0; fixed[i].name; i++) + if (!strcasecmp(name, fixed[i].name)) + return (char *)fixed[i].event; + return event; +} + +/* Call func with each event in the json file */ +int json_events(const char *fn, + int (*func)(void *data, char *name, char *event, char *desc, + char *long_desc), + void *data) +{ + int err = -EIO; + size_t size; + jsmntok_t *tokens, *tok; + int i, j, len; + char *map; + + if (!fn) + return -ENOENT; + + tokens = parse_json(fn, &map, &size, &len); + if (!tokens) + return -EIO; + EXPECT(tokens->type == JSMN_ARRAY, tokens, "expected top level array"); + tok = tokens + 1; + for (i = 0; i < tokens->size; i++) { + char *event = NULL, *desc = NULL, *name = NULL; + char *long_desc = NULL; + char *extra_desc = NULL; + struct msrmap *msr = NULL; + jsmntok_t *msrval = NULL; + jsmntok_t *precise = NULL; + jsmntok_t *obj = tok++; + + EXPECT(obj->type == JSMN_OBJECT, obj, "expected object"); + for (j = 0; j < obj->size; j += 2) { + jsmntok_t *field, *val; + int nz; + + field = tok + j; + EXPECT(field->type == JSMN_STRING, tok + j, + "Expected field name"); + val = tok + j + 1; + EXPECT(val->type == JSMN_STRING, tok + j + 1, + "Expected string value"); + + nz = !json_streq(map, val, "0"); + if (match_field(map, field, nz, &event, val)) { + /* ok */ + } else if (json_streq(map, field, "EventName")) { + addfield(map, &name, "", "", val); + } else if (json_streq(map, field, "BriefDescription")) { + addfield(map, &desc, "", "", val); + fixdesc(desc); + } else if (json_streq(map, field, + "PublicDescription")) { + addfield(map, &long_desc, "", "", val); + fixdesc(long_desc); + } else if (json_streq(map, field, "PEBS") && nz) { + precise = val; + } else if (json_streq(map, field, "MSRIndex") && nz) { + msr = lookup_msr(map, val); + } else if (json_streq(map, field, "MSRValue")) { + msrval = val; + } else if (json_streq(map, field, "Errata") && + !json_streq(map, val, "null")) { + addfield(map, &extra_desc, ". ", + " Spec update: ", val); + } else if (json_streq(map, field, "Data_LA") && nz) { + addfield(map, &extra_desc, ". ", + " Supports address when precise", + NULL); + } + /* ignore unknown fields */ + } + if (precise && desc && !strstr(desc, "(Precise Event)")) { + if (json_streq(map, precise, "2")) + addfield(map, &extra_desc, " ", + "(Must be precise)", NULL); + else + addfield(map, &extra_desc, " ", + "(Precise event)", NULL); + } + if (desc && extra_desc) + addfield(map, &desc, " ", extra_desc, NULL); + if (long_desc && extra_desc) + addfield(map, &long_desc, " ", extra_desc, NULL); + if (msr != NULL) + addfield(map, &event, ",", msr->pname, msrval); + fixname(name); + + err = func(data, name, real_event(name, event), desc, long_desc); + free(event); + free(desc); + free(name); + free(long_desc); + free(extra_desc); + if (err) + break; + tok += j; + } + EXPECT(tok - tokens == len, tok, "unexpected objects at end"); + err = 0; +out_free: + free_json(map, size, tokens); + return err; +} + +static char *file_name_to_table_name(char *fname) +{ + unsigned int i; + int n; + int c; + char *tblname; + + /* + * Ensure tablename starts with alphabetic character. + * Derive rest of table name from basename of the JSON file, + * replacing hyphens and stripping out .json suffix. + */ + n = asprintf(&tblname, "pme_%s", basename(fname)); + if (n < 0) { + pr_info("%s: asprintf() error %s for file %s\n", prog, + strerror(errno), fname); + return NULL; + } + + for (i = 0; i < strlen(tblname); i++) { + c = tblname[i]; + + if (c == '-') + tblname[i] = '_'; + else if (c == '.') { + tblname[i] = '\0'; + break; + } else if (!isalnum(c) && c != '_') { + pr_err("%s: Invalid character '%c' in file name %s\n", + prog, c, basename(fname)); + free(tblname); + tblname = NULL; + break; + } + } + + return tblname; +} + +static void print_mapping_table_prefix(FILE *outfp) +{ + fprintf(outfp, "struct pmu_events_map pmu_events_map[] = {\n"); +} + +static void print_mapping_table_suffix(FILE *outfp) +{ + /* + * Print the terminating, NULL entry. + */ + fprintf(outfp, "{\n"); + fprintf(outfp, "\t.cpuid = 0,\n"); + fprintf(outfp, "\t.version = 0,\n"); + fprintf(outfp, "\t.type = 0,\n"); + fprintf(outfp, "\t.table = 0,\n"); + fprintf(outfp, "},\n"); + + /* and finally, the closing curly bracket for the struct */ + fprintf(outfp, "};\n"); +} + +static int process_mapfile(FILE *outfp, char *fpath) +{ + int n = 16384; + FILE *mapfp; + char *save = NULL; + char *line, *p; + int line_num; + char *tblname; + + pr_info("%s: Processing mapfile %s\n", prog, fpath); + + line = malloc(n); + if (!line) + return -1; + + mapfp = fopen(fpath, "r"); + if (!mapfp) { + pr_info("%s: Error %s opening %s\n", prog, strerror(errno), + fpath); + return -1; + } + + print_mapping_table_prefix(outfp); + + /* Skip first line (header) */ + p = fgets(line, n, mapfp); + if (!p) + goto out; + + line_num = 1; + while (1) { + char *cpuid, *version, *type, *fname; + + line_num++; + p = fgets(line, n, mapfp); + if (!p) + break; + + if (line[0] == '#' || line[0] == '\n') + continue; + + if (line[strlen(line)-1] != '\n') { + /* TODO Deal with lines longer than 16K */ + pr_info("%s: Mapfile %s: line %d too long, aborting\n", + prog, fpath, line_num); + return -1; + } + line[strlen(line)-1] = '\0'; + + cpuid = strtok_r(p, ",", &save); + version = strtok_r(NULL, ",", &save); + fname = strtok_r(NULL, ",", &save); + type = strtok_r(NULL, ",", &save); + + tblname = file_name_to_table_name(fname); + fprintf(outfp, "{\n"); + fprintf(outfp, "\t.cpuid = \"%s\",\n", cpuid); + fprintf(outfp, "\t.version = \"%s\",\n", version); + fprintf(outfp, "\t.type = \"%s\",\n", type); + + /* + * CHECK: We can't use the type (eg "core") field in the + * table name. For us to do that, we need to somehow tweak + * the other caller of file_name_to_table(), process_json() + * to determine the type. process_json() file has no way + * of knowing these are "core" events unless file name has + * core in it. If filename has core in it, we can safely + * ignore the type field here also. + */ + fprintf(outfp, "\t.table = %s\n", tblname); + fprintf(outfp, "},\n"); + } + +out: + print_mapping_table_suffix(outfp); + return 0; +} + +/* + * If we fail to locate/process JSON and map files, create a NULL mapping + * table. This would at least allow perf to build even if we can't find/use + * the aliases. + */ +static void create_empty_mapping(const char *output_file) +{ + FILE *outfp; + + pr_info("%s: Creating empty pmu_events_map[] table\n", prog); + + /* Truncate file to clear any partial writes to it */ + outfp = fopen(output_file, "w"); + if (!outfp) { + perror("fopen()"); + _Exit(1); + } + + fprintf(outfp, "#include \"../../pmu-events/pmu-events.h\"\n"); + print_mapping_table_prefix(outfp); + print_mapping_table_suffix(outfp); + fclose(outfp); +} + +static int get_maxfds(void) +{ + struct rlimit rlim; + + if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) + return min((int)rlim.rlim_max / 2, 512); + + return 512; +} + +/* + * nftw() doesn't let us pass an argument to the processing function, + * so use a global variables. + */ +static FILE *eventsfp; +static char *mapfile; + +static int process_one_file(const char *fpath, const struct stat *sb, + int typeflag, struct FTW *ftwbuf) +{ + char *tblname, *bname = (char *) fpath + ftwbuf->base; + int is_dir = typeflag == FTW_D; + int is_file = typeflag == FTW_F; + int level = ftwbuf->level; + int err = 0; + + pr_debug("%s %d %7jd %-20s %s\n", + is_file ? "f" : is_dir ? "d" : "x", + level, sb->st_size, bname, fpath); + + /* base dir */ + if (level == 0) + return 0; + + /* model directory, reset topic */ + if (level == 1 && is_dir) { + if (close_table) + print_events_table_suffix(eventsfp); + + /* + * Drop file name suffix. Replace hyphens with underscores. + * Fail if file name contains any alphanum characters besides + * underscores. + */ + tblname = file_name_to_table_name(bname); + if (!tblname) { + pr_info("%s: Error determining table name for %s\n", prog, + bname); + return -1; + } + + print_events_table_prefix(eventsfp, tblname); + return 0; + } + + /* + * Save the mapfile name for now. We will process mapfile + * after processing all JSON files (so we can write out the + * mapping table after all PMU events tables). + * + * TODO: Allow for multiple mapfiles? Punt for now. + */ + if (level == 1 && is_file) { + if (!strncmp(bname, "mapfile.csv", 11)) { + if (mapfile) { + pr_info("%s: Many mapfiles? Using %s, ignoring %s\n", + prog, mapfile, fpath); + } else { + mapfile = strdup(fpath); + } + return 0; + } + + pr_info("%s: Ignoring file %s\n", prog, fpath); + return 0; + } + + /* + * If the file name does not have a .json extension, + * ignore it. It could be a readme.txt for instance. + */ + if (is_file) { + char *suffix = bname + strlen(bname) - 5; + + if (strncmp(suffix, ".json", 5)) { + pr_info("%s: Ignoring file without .json suffix %s\n", prog, + fpath); + return 0; + } + } + + if (level > 1 && add_topic(level, bname)) + return -ENOMEM; + + /* + * Assume all other files are JSON files. + * + * If mapfile refers to 'power7_core.json', we create a table + * named 'power7_core'. Any inconsistencies between the mapfile + * and directory tree could result in build failure due to table + * names not being found. + * + * Atleast for now, be strict with processing JSON file names. + * i.e. if JSON file name cannot be mapped to C-style table name, + * fail. + */ + if (is_file) { + struct perf_entry_data data = { + .topic = get_topic(), + .outfp = eventsfp, + }; + + err = json_events(fpath, print_events_table_entry, &data); + + free(data.topic); + } + + return err; +} + +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +/* + * Starting in directory 'start_dirname', find the "mapfile.csv" and + * the set of JSON files for the architecture 'arch'. + * + * From each JSON file, create a C-style "PMU events table" from the + * JSON file (see struct pmu_event). + * + * From the mapfile, create a mapping between the CPU revisions and + * PMU event tables (see struct pmu_events_map). + * + * Write out the PMU events tables and the mapping table to pmu-event.c. + * + * If unable to process the JSON or arch files, create an empty mapping + * table so we can continue to build/use perf even if we cannot use the + * PMU event aliases. + */ +int main(int argc, char *argv[]) +{ + int rc; + int maxfds; + char ldirname[PATH_MAX]; + + const char *arch; + const char *output_file; + const char *start_dirname; + + prog = basename(argv[0]); + if (argc < 4) { + pr_err("Usage: %s <arch> <starting_dir> <output_file>\n", prog); + return 1; + } + + arch = argv[1]; + start_dirname = argv[2]; + output_file = argv[3]; + + if (argc > 4) + verbose = atoi(argv[4]); + + eventsfp = fopen(output_file, "w"); + if (!eventsfp) { + pr_err("%s Unable to create required file %s (%s)\n", + prog, output_file, strerror(errno)); + return 2; + } + + /* Include pmu-events.h first */ + fprintf(eventsfp, "#include \"../../pmu-events/pmu-events.h\"\n"); + + sprintf(ldirname, "%s/%s", start_dirname, arch); + + /* + * The mapfile allows multiple CPUids to point to the same JSON file, + * so, not sure if there is a need for symlinks within the pmu-events + * directory. + * + * For now, treat symlinks of JSON files as regular files and create + * separate tables for each symlink (presumably, each symlink refers + * to specific version of the CPU). + */ + + maxfds = get_maxfds(); + mapfile = NULL; + rc = nftw(ldirname, process_one_file, maxfds, 0); + if (rc && verbose) { + pr_info("%s: Error walking file tree %s\n", prog, ldirname); + goto empty_map; + } else if (rc) { + goto empty_map; + } + + if (close_table) + print_events_table_suffix(eventsfp); + + if (!mapfile) { + pr_info("%s: No CPU->JSON mapping?\n", prog); + goto empty_map; + } + + if (process_mapfile(eventsfp, mapfile)) { + pr_info("%s: Error processing mapfile %s\n", prog, mapfile); + goto empty_map; + } + + return 0; + +empty_map: + fclose(eventsfp); + create_empty_mapping(output_file); + return 0; +} diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h new file mode 100644 index 000000000000..b0eb2744b498 --- /dev/null +++ b/tools/perf/pmu-events/jevents.h @@ -0,0 +1,18 @@ +#ifndef JEVENTS_H +#define JEVENTS_H 1 + +int json_events(const char *fn, + int (*func)(void *data, char *name, char *event, char *desc, + char *long_desc), + void *data); +char *get_cpu_str(void); + +#ifndef min +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) +#endif + +#endif diff --git a/tools/perf/pmu-events/jsmn.c b/tools/perf/pmu-events/jsmn.c new file mode 100644 index 000000000000..11d1fa18bfa5 --- /dev/null +++ b/tools/perf/pmu-events/jsmn.c @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2010 Serge A. Zaitsev + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * Slightly modified by AK to not assume 0 terminated input. + */ + +#include <stdlib.h> +#include "jsmn.h" + +/* + * Allocates a fresh unused token from the token pool. + */ +static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser, + jsmntok_t *tokens, size_t num_tokens) +{ + jsmntok_t *tok; + + if ((unsigned)parser->toknext >= num_tokens) + return NULL; + tok = &tokens[parser->toknext++]; + tok->start = tok->end = -1; + tok->size = 0; + return tok; +} + +/* + * Fills token type and boundaries. + */ +static void jsmn_fill_token(jsmntok_t *token, jsmntype_t type, + int start, int end) +{ + token->type = type; + token->start = start; + token->end = end; + token->size = 0; +} + +/* + * Fills next available token with JSON primitive. + */ +static jsmnerr_t jsmn_parse_primitive(jsmn_parser *parser, const char *js, + size_t len, + jsmntok_t *tokens, size_t num_tokens) +{ + jsmntok_t *token; + int start; + + start = parser->pos; + + for (; parser->pos < len; parser->pos++) { + switch (js[parser->pos]) { +#ifndef JSMN_STRICT + /* + * In strict mode primitive must be followed by "," + * or "}" or "]" + */ + case ':': +#endif + case '\t': + case '\r': + case '\n': + case ' ': + case ',': + case ']': + case '}': + goto found; + default: + break; + } + if (js[parser->pos] < 32 || js[parser->pos] >= 127) { + parser->pos = start; + return JSMN_ERROR_INVAL; + } + } +#ifdef JSMN_STRICT + /* + * In strict mode primitive must be followed by a + * comma/object/array. + */ + parser->pos = start; + return JSMN_ERROR_PART; +#endif + +found: + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (token == NULL) { + parser->pos = start; + return JSMN_ERROR_NOMEM; + } + jsmn_fill_token(token, JSMN_PRIMITIVE, start, parser->pos); + parser->pos--; /* parent sees closing brackets */ + return JSMN_SUCCESS; +} + +/* + * Fills next token with JSON string. + */ +static jsmnerr_t jsmn_parse_string(jsmn_parser *parser, const char *js, + size_t len, + jsmntok_t *tokens, size_t num_tokens) +{ + jsmntok_t *token; + int start = parser->pos; + + /* Skip starting quote */ + parser->pos++; + + for (; parser->pos < len; parser->pos++) { + char c = js[parser->pos]; + + /* Quote: end of string */ + if (c == '\"') { + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (token == NULL) { + parser->pos = start; + return JSMN_ERROR_NOMEM; + } + jsmn_fill_token(token, JSMN_STRING, start+1, + parser->pos); + return JSMN_SUCCESS; + } + + /* Backslash: Quoted symbol expected */ + if (c == '\\') { + parser->pos++; + switch (js[parser->pos]) { + /* Allowed escaped symbols */ + case '\"': + case '/': + case '\\': + case 'b': + case 'f': + case 'r': + case 'n': + case 't': + break; + /* Allows escaped symbol \uXXXX */ + case 'u': + /* TODO */ + break; + /* Unexpected symbol */ + default: + parser->pos = start; + return JSMN_ERROR_INVAL; + } + } + } + parser->pos = start; + return JSMN_ERROR_PART; +} + +/* + * Parse JSON string and fill tokens. + */ +jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, + jsmntok_t *tokens, unsigned int num_tokens) +{ + jsmnerr_t r; + int i; + jsmntok_t *token; + + for (; parser->pos < len; parser->pos++) { + char c; + jsmntype_t type; + + c = js[parser->pos]; + switch (c) { + case '{': + case '[': + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (token == NULL) + return JSMN_ERROR_NOMEM; + if (parser->toksuper != -1) + tokens[parser->toksuper].size++; + token->type = (c == '{' ? JSMN_OBJECT : JSMN_ARRAY); + token->start = parser->pos; + parser->toksuper = parser->toknext - 1; + break; + case '}': + case ']': + type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY); + for (i = parser->toknext - 1; i >= 0; i--) { + token = &tokens[i]; + if (token->start != -1 && token->end == -1) { + if (token->type != type) + return JSMN_ERROR_INVAL; + parser->toksuper = -1; + token->end = parser->pos + 1; + break; + } + } + /* Error if unmatched closing bracket */ + if (i == -1) + return JSMN_ERROR_INVAL; + for (; i >= 0; i--) { + token = &tokens[i]; + if (token->start != -1 && token->end == -1) { + parser->toksuper = i; + break; + } + } + break; + case '\"': + r = jsmn_parse_string(parser, js, len, tokens, + num_tokens); + if (r < 0) + return r; + if (parser->toksuper != -1) + tokens[parser->toksuper].size++; + break; + case '\t': + case '\r': + case '\n': + case ':': + case ',': + case ' ': + break; +#ifdef JSMN_STRICT + /* + * In strict mode primitives are: + * numbers and booleans. + */ + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 't': + case 'f': + case 'n': +#else + /* + * In non-strict mode every unquoted value + * is a primitive. + */ + /*FALL THROUGH */ + default: +#endif + r = jsmn_parse_primitive(parser, js, len, tokens, + num_tokens); + if (r < 0) + return r; + if (parser->toksuper != -1) + tokens[parser->toksuper].size++; + break; + +#ifdef JSMN_STRICT + /* Unexpected char in strict mode */ + default: + return JSMN_ERROR_INVAL; +#endif + } + } + + for (i = parser->toknext - 1; i >= 0; i--) { + /* Unmatched opened object or array */ + if (tokens[i].start != -1 && tokens[i].end == -1) + return JSMN_ERROR_PART; + } + + return JSMN_SUCCESS; +} + +/* + * Creates a new parser based over a given buffer with an array of tokens + * available. + */ +void jsmn_init(jsmn_parser *parser) +{ + parser->pos = 0; + parser->toknext = 0; + parser->toksuper = -1; +} + +const char *jsmn_strerror(jsmnerr_t err) +{ + switch (err) { + case JSMN_ERROR_NOMEM: + return "No enough tokens"; + case JSMN_ERROR_INVAL: + return "Invalid character inside JSON string"; + case JSMN_ERROR_PART: + return "The string is not a full JSON packet, more bytes expected"; + case JSMN_SUCCESS: + return "Success"; + default: + return "Unknown json error"; + } +} diff --git a/tools/perf/pmu-events/jsmn.h b/tools/perf/pmu-events/jsmn.h new file mode 100644 index 000000000000..d666b10cf25b --- /dev/null +++ b/tools/perf/pmu-events/jsmn.h @@ -0,0 +1,67 @@ +#ifndef __JSMN_H_ +#define __JSMN_H_ + +/* + * JSON type identifier. Basic types are: + * o Object + * o Array + * o String + * o Other primitive: number, boolean (true/false) or null + */ +typedef enum { + JSMN_PRIMITIVE = 0, + JSMN_OBJECT = 1, + JSMN_ARRAY = 2, + JSMN_STRING = 3 +} jsmntype_t; + +typedef enum { + /* Not enough tokens were provided */ + JSMN_ERROR_NOMEM = -1, + /* Invalid character inside JSON string */ + JSMN_ERROR_INVAL = -2, + /* The string is not a full JSON packet, more bytes expected */ + JSMN_ERROR_PART = -3, + /* Everything was fine */ + JSMN_SUCCESS = 0 +} jsmnerr_t; + +/* + * JSON token description. + * @param type type (object, array, string etc.) + * @param start start position in JSON data string + * @param end end position in JSON data string + */ +typedef struct { + jsmntype_t type; + int start; + int end; + int size; +} jsmntok_t; + +/* + * JSON parser. Contains an array of token blocks available. Also stores + * the string being parsed now and current position in that string + */ +typedef struct { + unsigned int pos; /* offset in the JSON string */ + int toknext; /* next token to allocate */ + int toksuper; /* superior token node, e.g parent object or array */ +} jsmn_parser; + +/* + * Create JSON parser over an array of tokens + */ +void jsmn_init(jsmn_parser *parser); + +/* + * Run JSON parser. It parses a JSON data string into and array of tokens, + * each describing a single JSON object. + */ +jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, + size_t len, + jsmntok_t *tokens, unsigned int num_tokens); + +const char *jsmn_strerror(jsmnerr_t err); + +#endif /* __JSMN_H_ */ diff --git a/tools/perf/pmu-events/json.c b/tools/perf/pmu-events/json.c new file mode 100644 index 000000000000..f67bbb0aa36e --- /dev/null +++ b/tools/perf/pmu-events/json.c @@ -0,0 +1,162 @@ +/* Parse JSON files using the JSMN parser. */ + +/* + * Copyright (c) 2014, Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <errno.h> +#include <unistd.h> +#include "jsmn.h" +#include "json.h" +#include <linux/kernel.h> + + +static char *mapfile(const char *fn, size_t *size) +{ + unsigned ps = sysconf(_SC_PAGESIZE); + struct stat st; + char *map = NULL; + int err; + int fd = open(fn, O_RDONLY); + + if (fd < 0 && verbose && fn) { + pr_err("Error opening events file '%s': %s\n", fn, + strerror(errno)); + } + + if (fd < 0) + return NULL; + err = fstat(fd, &st); + if (err < 0) + goto out; + *size = st.st_size; + map = mmap(NULL, + (st.st_size + ps - 1) & ~(ps - 1), + PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); + if (map == MAP_FAILED) + map = NULL; +out: + close(fd); + return map; +} + +static void unmapfile(char *map, size_t size) +{ + unsigned ps = sysconf(_SC_PAGESIZE); + munmap(map, roundup(size, ps)); +} + +/* + * Parse json file using jsmn. Return array of tokens, + * and mapped file. Caller needs to free array. + */ +jsmntok_t *parse_json(const char *fn, char **map, size_t *size, int *len) +{ + jsmn_parser parser; + jsmntok_t *tokens; + jsmnerr_t res; + unsigned sz; + + *map = mapfile(fn, size); + if (!*map) + return NULL; + /* Heuristic */ + sz = *size * 16; + tokens = malloc(sz); + if (!tokens) + goto error; + jsmn_init(&parser); + res = jsmn_parse(&parser, *map, *size, tokens, + sz / sizeof(jsmntok_t)); + if (res != JSMN_SUCCESS) { + pr_err("%s: json error %s\n", fn, jsmn_strerror(res)); + goto error_free; + } + if (len) + *len = parser.toknext; + return tokens; +error_free: + free(tokens); +error: + unmapfile(*map, *size); + return NULL; +} + +void free_json(char *map, size_t size, jsmntok_t *tokens) +{ + free(tokens); + unmapfile(map, size); +} + +static int countchar(char *map, char c, int end) +{ + int i; + int count = 0; + for (i = 0; i < end; i++) + if (map[i] == c) + count++; + return count; +} + +/* Return line number of a jsmn token */ +int json_line(char *map, jsmntok_t *t) +{ + return countchar(map, '\n', t->start) + 1; +} + +static const char * const jsmn_types[] = { + [JSMN_PRIMITIVE] = "primitive", + [JSMN_ARRAY] = "array", + [JSMN_OBJECT] = "object", + [JSMN_STRING] = "string" +}; + +#define LOOKUP(a, i) ((i) < (sizeof(a)/sizeof(*(a))) ? ((a)[i]) : "?") + +/* Return type name of a jsmn token */ +const char *json_name(jsmntok_t *t) +{ + return LOOKUP(jsmn_types, t->type); +} + +int json_len(jsmntok_t *t) +{ + return t->end - t->start; +} + +/* Is string t equal to s? */ +int json_streq(char *map, jsmntok_t *t, const char *s) +{ + unsigned len = json_len(t); + return len == strlen(s) && !strncasecmp(map + t->start, s, len); +} diff --git a/tools/perf/pmu-events/json.h b/tools/perf/pmu-events/json.h new file mode 100644 index 000000000000..278ebd32cfb6 --- /dev/null +++ b/tools/perf/pmu-events/json.h @@ -0,0 +1,38 @@ +#ifndef JSON_H +#define JSON_H 1 + +#include "jsmn.h" + +jsmntok_t *parse_json(const char *fn, char **map, size_t *size, int *len); +void free_json(char *map, size_t size, jsmntok_t *tokens); +int json_line(char *map, jsmntok_t *t); +const char *json_name(jsmntok_t *t); +int json_streq(char *map, jsmntok_t *t, const char *s); +int json_len(jsmntok_t *t); + +extern int verbose; + +#include <stdbool.h> + +extern int eprintf(int level, int var, const char *fmt, ...); +#define pr_fmt(fmt) fmt + +#define pr_err(fmt, ...) \ + eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) + +#define pr_info(fmt, ...) \ + eprintf(1, verbose, pr_fmt(fmt), ##__VA_ARGS__) + +#define pr_debug(fmt, ...) \ + eprintf(2, verbose, pr_fmt(fmt), ##__VA_ARGS__) + +#ifndef roundup +#define roundup(x, y) ( \ +{ \ + const typeof(y) __y = y; \ + (((x) + (__y - 1)) / __y) * __y; \ +} \ +) +#endif + +#endif diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h new file mode 100644 index 000000000000..2eaef595d8a0 --- /dev/null +++ b/tools/perf/pmu-events/pmu-events.h @@ -0,0 +1,37 @@ +#ifndef PMU_EVENTS_H +#define PMU_EVENTS_H + +/* + * Describe each PMU event. Each CPU has a table of PMU events. + */ +struct pmu_event { + const char *name; + const char *event; + const char *desc; + const char *topic; + const char *long_desc; +}; + +/* + * + * Map a CPU to its table of PMU events. The CPU is identified by the + * cpuid field, which is an arch-specific identifier for the CPU. + * The identifier specified in tools/perf/pmu-events/arch/xxx/mapfile + * must match the get_cpustr() in tools/perf/arch/xxx/util/header.c) + * + * The cpuid can contain any character other than the comma. + */ +struct pmu_events_map { + const char *cpuid; + const char *version; + const char *type; /* core, uncore etc */ + struct pmu_event *table; +}; + +/* + * Global table mapping each known CPU for the architecture to its + * table of PMU events. + */ +extern struct pmu_events_map pmu_events_map[]; + +#endif diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index ea34c5a32c11..d92e02006fb8 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -384,15 +384,14 @@ void perf_evlist__toggle_enable(struct perf_evlist *evlist) static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist, struct perf_evsel *evsel, int cpu) { - int thread, err; + int thread; int nr_threads = perf_evlist__nr_threads(evlist, evsel); if (!evsel->fd) return -EINVAL; for (thread = 0; thread < nr_threads; thread++) { - err = ioctl(FD(evsel, cpu, thread), - PERF_EVENT_IOC_ENABLE, 0); + int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); if (err) return err; } @@ -403,14 +402,14 @@ static int perf_evlist__enable_event_thread(struct perf_evlist *evlist, struct perf_evsel *evsel, int thread) { - int cpu, err; + int cpu; int nr_cpus = cpu_map__nr(evlist->cpus); if (!evsel->fd) return -EINVAL; for (cpu = 0; cpu < nr_cpus; cpu++) { - err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); + int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); if (err) return err; } @@ -1606,10 +1605,9 @@ void perf_evlist__close(struct perf_evlist *evlist) struct perf_evsel *evsel; int ncpus = cpu_map__nr(evlist->cpus); int nthreads = thread_map__nr(evlist->threads); - int n; evlist__for_each_entry_reverse(evlist, evsel) { - n = evsel->cpus ? evsel->cpus->nr : ncpus; + int n = evsel->cpus ? evsel->cpus->nr : ncpus; perf_evsel__close(evsel, n, nthreads); } } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 380e84c3af3d..8bc271141d9d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -985,14 +985,13 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { - int cpu, thread; - if (evsel->system_wide) nthreads = 1; evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); if (evsel->fd) { + int cpu, thread; for (cpu = 0; cpu < ncpus; cpu++) { for (thread = 0; thread < nthreads; thread++) { FD(evsel, cpu, thread) = -1; diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index d306ca118449..d30109b421ee 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -151,4 +151,5 @@ int write_padded(int fd, const void *bf, size_t count, size_t count_aligned); */ int get_cpuid(char *buffer, size_t sz); +char *get_cpuid_str(void); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 7591a0c37473..16c06d3ae577 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -90,6 +90,7 @@ struct intel_pt_decoder { bool pge; bool have_tma; bool have_cyc; + bool fixup_last_mtc; uint64_t pos; uint64_t last_ip; uint64_t ip; @@ -586,10 +587,31 @@ struct intel_pt_calc_cyc_to_tsc_info { uint64_t tsc_timestamp; uint64_t timestamp; bool have_tma; + bool fixup_last_mtc; bool from_mtc; double cbr_cyc_to_tsc; }; +/* + * MTC provides a 8-bit slice of CTC but the TMA packet only provides the lower + * 16 bits of CTC. If mtc_shift > 8 then some of the MTC bits are not in the CTC + * provided by the TMA packet. Fix-up the last_mtc calculated from the TMA + * packet by copying the missing bits from the current MTC assuming the least + * difference between the two, and that the current MTC comes after last_mtc. + */ +static void intel_pt_fixup_last_mtc(uint32_t mtc, int mtc_shift, + uint32_t *last_mtc) +{ + uint32_t first_missing_bit = 1U << (16 - mtc_shift); + uint32_t mask = ~(first_missing_bit - 1); + + *last_mtc |= mtc & mask; + if (*last_mtc >= mtc) { + *last_mtc -= first_missing_bit; + *last_mtc &= 0xff; + } +} + static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) { struct intel_pt_decoder *decoder = pkt_info->decoder; @@ -619,6 +641,11 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) return 0; mtc = pkt_info->packet.payload; + if (decoder->mtc_shift > 8 && data->fixup_last_mtc) { + data->fixup_last_mtc = false; + intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift, + &data->last_mtc); + } if (mtc > data->last_mtc) mtc_delta = mtc - data->last_mtc; else @@ -687,6 +714,7 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) data->ctc_delta = 0; data->have_tma = true; + data->fixup_last_mtc = true; return 0; @@ -753,6 +781,7 @@ static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder, .tsc_timestamp = decoder->tsc_timestamp, .timestamp = decoder->timestamp, .have_tma = decoder->have_tma, + .fixup_last_mtc = decoder->fixup_last_mtc, .from_mtc = from_mtc, .cbr_cyc_to_tsc = 0, }; @@ -1271,6 +1300,7 @@ static void intel_pt_calc_tma(struct intel_pt_decoder *decoder) } decoder->ctc_delta = 0; decoder->have_tma = true; + decoder->fixup_last_mtc = true; intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x CTC rem %#x\n", decoder->ctc_timestamp, decoder->last_mtc, ctc_rem); } @@ -1285,6 +1315,12 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) mtc = decoder->packet.payload; + if (decoder->mtc_shift > 8 && decoder->fixup_last_mtc) { + decoder->fixup_last_mtc = false; + intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift, + &decoder->last_mtc); + } + if (mtc > decoder->last_mtc) mtc_delta = mtc - decoder->last_mtc; else @@ -1353,6 +1389,8 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) timestamp, decoder->timestamp); else decoder->timestamp = timestamp; + + decoder->timestamp_insn_cnt = 0; } /* Walk PSB+ packets when already in sync. */ diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 18e4519abef2..df85b9efd80f 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1745,9 +1745,8 @@ static int resolve_lbr_callchain_sample(struct thread *thread, int max_stack) { struct ip_callchain *chain = sample->callchain; - int chain_nr = min(max_stack, (int)chain->nr); + int chain_nr = min(max_stack, (int)chain->nr), i; u8 cpumode = PERF_RECORD_MISC_USER; - int i, j, err; u64 ip; for (i = 0; i < chain_nr; i++) { @@ -1758,7 +1757,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread, /* LBR only affects the user callchain */ if (i != chain_nr) { struct branch_stack *lbr_stack = sample->branch_stack; - int lbr_nr = lbr_stack->nr; + int lbr_nr = lbr_stack->nr, j; /* * LBR callstack can only get user call chain. * The mix_chain_nr is kernel call chain @@ -1772,6 +1771,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread, int mix_chain_nr = i + 1 + lbr_nr + 1; for (j = 0; j < mix_chain_nr; j++) { + int err; if (callchain_param.order == ORDER_CALLEE) { if (j < i + 1) ip = chain->ips[j]; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 33546c3ac1fe..4e778eae1510 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -924,6 +924,7 @@ config_term_avail(int term_type, struct parse_events_error *err) case PARSE_EVENTS__TERM_TYPE_CONFIG1: case PARSE_EVENTS__TERM_TYPE_CONFIG2: case PARSE_EVENTS__TERM_TYPE_NAME: + case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: return true; default: if (!err) @@ -1458,7 +1459,7 @@ comp_pmu(const void *p1, const void *p2) struct perf_pmu_event_symbol *pmu1 = (struct perf_pmu_event_symbol *) p1; struct perf_pmu_event_symbol *pmu2 = (struct perf_pmu_event_symbol *) p2; - return strcmp(pmu1->symbol, pmu2->symbol); + return strcasecmp(pmu1->symbol, pmu2->symbol); } static void perf_pmu__parse_cleanup(void) @@ -2263,7 +2264,8 @@ out_enomem: /* * Print the help text for the event symbols: */ -void print_events(const char *event_glob, bool name_only) +void print_events(const char *event_glob, bool name_only, bool quiet_flag, + bool long_desc) { print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); @@ -2273,7 +2275,7 @@ void print_events(const char *event_glob, bool name_only) print_hwcache_events(event_glob, name_only); - print_pmu_events(event_glob, name_only); + print_pmu_events(event_glob, name_only, quiet_flag, long_desc); if (event_glob != NULL) return; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 8d09a976fca8..da246a3ddb69 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -172,7 +172,8 @@ void parse_events_update_lists(struct list_head *list_event, void parse_events_evlist_error(struct parse_events_evlist *data, int idx, const char *str); -void print_events(const char *event_glob, bool name_only); +void print_events(const char *event_glob, bool name_only, bool quiet, + bool long_desc); struct event_symbol { const char *symbol; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 2babcdf62839..b1474dcadfa2 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -12,6 +12,9 @@ #include "pmu.h" #include "parse-events.h" #include "cpumap.h" +#include "header.h" +#include "pmu-events/pmu-events.h" +#include "cache.h" struct perf_pmu_format { char *name; @@ -220,7 +223,8 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, } static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, - char *desc __maybe_unused, char *val) + char *desc, char *val, char *long_desc, + char *topic) { struct perf_pmu_alias *alias; int ret; @@ -253,6 +257,11 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, perf_pmu__parse_snapshot(alias, dir, name); } + alias->desc = desc ? strdup(desc) : NULL; + alias->long_desc = long_desc ? strdup(long_desc) : + desc ? strdup(desc) : NULL; + alias->topic = topic ? strdup(topic) : NULL; + list_add_tail(&alias->list, list); return 0; @@ -269,7 +278,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI buf[ret] = 0; - return __perf_pmu__new_alias(list, dir, name, NULL, buf); + return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL); } static inline bool pmu_alias_info_file(char *name) @@ -473,6 +482,68 @@ static struct cpu_map *pmu_cpumask(const char *name) return cpus; } +/* + * Return the CPU id as a raw string. + * + * Each architecture should provide a more precise id string that + * can be use to match the architecture's "mapfile". + */ +char * __weak get_cpuid_str(void) +{ + return NULL; +} + +/* + * From the pmu_events_map, find the table of PMU events that corresponds + * to the current running CPU. Then, add all PMU events from that table + * as aliases. + */ +static void pmu_add_cpu_aliases(struct list_head *head) +{ + int i; + struct pmu_events_map *map; + struct pmu_event *pe; + char *cpuid; + + cpuid = getenv("PERF_CPUID"); + if (cpuid) + cpuid = strdup(cpuid); + if (!cpuid) + cpuid = get_cpuid_str(); + if (!cpuid) + return; + + pr_debug("Using CPUID %s\n", cpuid); + + i = 0; + while (1) { + map = &pmu_events_map[i++]; + if (!map->table) + goto out; + + if (!strcmp(map->cpuid, cpuid)) + break; + } + + /* + * Found a matching PMU events table. Create aliases + */ + i = 0; + while (1) { + pe = &map->table[i++]; + if (!pe->name) + break; + + /* need type casts to override 'const' */ + __perf_pmu__new_alias(head, NULL, (char *)pe->name, + (char *)pe->desc, (char *)pe->event, + (char *)pe->long_desc, (char *)pe->topic); + } + +out: + free(cpuid); +} + struct perf_event_attr * __weak perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) { @@ -497,6 +568,9 @@ static struct perf_pmu *pmu_lookup(const char *name) if (pmu_aliases(name, &aliases)) return NULL; + if (!strcmp(name, "cpu")) + pmu_add_cpu_aliases(&aliases); + if (pmu_type(name, &type)) return NULL; @@ -983,21 +1057,63 @@ static char *format_alias_or(char *buf, int len, struct perf_pmu *pmu, return buf; } -static int cmp_string(const void *a, const void *b) +struct sevent { + char *name; + char *desc; + char *topic; +}; + +static int cmp_sevent(const void *a, const void *b) { - const char * const *as = a; - const char * const *bs = b; - return strcmp(*as, *bs); + const struct sevent *as = a; + const struct sevent *bs = b; + + /* Put extra events last */ + if (!!as->desc != !!bs->desc) + return !!as->desc - !!bs->desc; + if (as->topic && bs->topic) { + int n = strcmp(as->topic, bs->topic); + + if (n) + return n; + } + return strcmp(as->name, bs->name); } -void print_pmu_events(const char *event_glob, bool name_only) +static void wordwrap(char *s, int start, int max, int corr) +{ + int column = start; + int n; + + while (*s) { + int wlen = strcspn(s, " \t"); + + if (column + wlen >= max && column > start) { + printf("\n%*s", start, ""); + column = start + corr; + } + n = printf("%s%.*s", column > start ? " " : "", wlen, s); + if (n <= 0) + break; + s += wlen; + column += n; + while (isspace(*s)) + s++; + } +} + +void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, + bool long_desc) { struct perf_pmu *pmu; struct perf_pmu_alias *alias; char buf[1024]; int printed = 0; int len, j; - char **aliases; + struct sevent *aliases; + int numdesc = 0; + int columns = pager_get_columns(); + char *topic = NULL; pmu = NULL; len = 0; @@ -1007,14 +1123,15 @@ void print_pmu_events(const char *event_glob, bool name_only) if (pmu->selectable) len++; } - aliases = zalloc(sizeof(char *) * len); + aliases = zalloc(sizeof(struct sevent) * len); if (!aliases) goto out_enomem; pmu = NULL; j = 0; while ((pmu = perf_pmu__scan(pmu)) != NULL) { list_for_each_entry(alias, &pmu->aliases, list) { - char *name = format_alias(buf, sizeof(buf), pmu, alias); + char *name = alias->desc ? alias->name : + format_alias(buf, sizeof(buf), pmu, alias); bool is_cpu = !strcmp(pmu->name, "cpu"); if (event_glob != NULL && @@ -1023,12 +1140,21 @@ void print_pmu_events(const char *event_glob, bool name_only) event_glob)))) continue; - if (is_cpu && !name_only) + if (is_cpu && !name_only && !alias->desc) name = format_alias_or(buf, sizeof(buf), pmu, alias); - aliases[j] = strdup(name); - if (aliases[j] == NULL) + aliases[j].name = name; + if (is_cpu && !name_only && !alias->desc) + aliases[j].name = format_alias_or(buf, + sizeof(buf), + pmu, alias); + aliases[j].name = strdup(aliases[j].name); + if (!aliases[j].name) goto out_enomem; + + aliases[j].desc = long_desc ? alias->long_desc : + alias->desc; + aliases[j].topic = alias->topic; j++; } if (pmu->selectable && @@ -1036,25 +1162,39 @@ void print_pmu_events(const char *event_glob, bool name_only) char *s; if (asprintf(&s, "%s//", pmu->name) < 0) goto out_enomem; - aliases[j] = s; + aliases[j].name = s; j++; } } len = j; - qsort(aliases, len, sizeof(char *), cmp_string); + qsort(aliases, len, sizeof(struct sevent), cmp_sevent); for (j = 0; j < len; j++) { if (name_only) { - printf("%s ", aliases[j]); + printf("%s ", aliases[j].name); continue; } - printf(" %-50s [Kernel PMU event]\n", aliases[j]); + if (aliases[j].desc && !quiet_flag) { + if (numdesc++ == 0) + printf("\n"); + if (aliases[j].topic && (!topic || + strcmp(topic, aliases[j].topic))) { + printf("%s%s:\n", topic ? "\n" : "", + aliases[j].topic); + topic = aliases[j].topic; + } + printf(" %-50s\n", aliases[j].name); + printf("%*s", 8, "["); + wordwrap(aliases[j].desc, 8, columns, 0); + printf("]\n"); + } else + printf(" %-50s [Kernel PMU event]\n", aliases[j].name); printed++; } if (printed && pager_in_use()) printf("\n"); out_free: for (j = 0; j < len; j++) - zfree(&aliases[j]); + zfree(&aliases[j].name); zfree(&aliases); return; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 743422ad900b..25712034c815 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -40,6 +40,9 @@ struct perf_pmu_info { struct perf_pmu_alias { char *name; + char *desc; + char *long_desc; + char *topic; struct list_head terms; /* HEAD struct parse_events_term -> list */ struct list_head list; /* ELEM */ char unit[UNIT_MAX_LEN+1]; @@ -71,7 +74,8 @@ int perf_pmu__format_parse(char *dir, struct list_head *head); struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); -void print_pmu_events(const char *event_glob, bool name_only); +void print_pmu_events(const char *event_glob, bool name_only, bool quiet, + bool long_desc); bool pmu_have_event(const char *pname, const char *name); int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index fcfbef07b92d..d281ae2b54e8 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -213,7 +213,7 @@ static int convert_exec_to_group(const char *exec, char **result) goto out; } - for (ptr2 = ptr1; ptr2 != '\0'; ptr2++) { + for (ptr2 = ptr1; *ptr2 != '\0'; ptr2++) { if (!isalnum(*ptr2) && *ptr2 != '_') { *ptr2 = '\0'; break; diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index b268a6648a5d..318424ea561d 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -66,9 +66,8 @@ static inline ssize_t strbuf_avail(const struct strbuf *sb) { int strbuf_grow(struct strbuf *buf, size_t); static inline int strbuf_setlen(struct strbuf *sb, size_t len) { - int ret; if (!sb->alloc) { - ret = strbuf_grow(sb, 0); + int ret = strbuf_grow(sb, 0); if (ret) return ret; } diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 8b10a55410a2..f5af87f66663 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -14,13 +14,12 @@ int thread__init_map_groups(struct thread *thread, struct machine *machine) { - struct thread *leader; pid_t pid = thread->pid_; if (pid == thread->tid || pid == -1) { thread->mg = map_groups__new(machine); } else { - leader = __machine__findnew_thread(machine, pid, pid); + struct thread *leader = __machine__findnew_thread(machine, pid, pid); if (leader) { thread->mg = map_groups__get(leader->mg); thread__put(leader); @@ -130,11 +129,10 @@ int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp, bool exec) { struct comm *new, *curr = thread__comm(thread); - int err; /* Override the default :tid entry */ if (!thread->comm_set) { - err = comm__override(curr, str, timestamp, exec); + int err = comm__override(curr, str, timestamp, exec); if (err) return err; } else { @@ -270,10 +268,9 @@ static int thread__clone_map_groups(struct thread *thread, int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) { - int err; - if (parent->comm_set) { const char *comm = thread__comm_str(parent); + int err; if (!comm) return -ENOMEM; err = thread__set_comm(thread, comm, timestamp); diff --git a/tools/spi/Makefile b/tools/spi/Makefile index cd0db62e4d9d..3815b18ba070 100644 --- a/tools/spi/Makefile +++ b/tools/spi/Makefile @@ -1,3 +1,5 @@ +CC = $(CROSS_COMPILE)gcc + all: spidev_test spidev_fdx clean: diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c index 8a73d8185316..f046b77cfefe 100644 --- a/tools/spi/spidev_test.c +++ b/tools/spi/spidev_test.c @@ -19,6 +19,7 @@ #include <getopt.h> #include <fcntl.h> #include <sys/ioctl.h> +#include <linux/ioctl.h> #include <sys/stat.h> #include <linux/types.h> #include <linux/spi/spidev.h> @@ -284,7 +285,7 @@ static void parse_opts(int argc, char *argv[]) static void transfer_escaped_string(int fd, char *str) { - size_t size = strlen(str + 1); + size_t size = strlen(str); uint8_t *tx; uint8_t *rx; diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index ad6dd0543019..582db95127ed 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -13,6 +13,7 @@ ldflags-y += --wrap=__release_region ldflags-y += --wrap=devm_memremap_pages ldflags-y += --wrap=insert_resource ldflags-y += --wrap=remove_resource +ldflags-y += --wrap=acpi_evaluate_object DRIVERS := ../../../drivers NVDIMM_SRC := $(DRIVERS)/nvdimm diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c index 878daf3429e8..7dc5a0af9b54 100644 --- a/tools/testing/nvdimm/config_check.c +++ b/tools/testing/nvdimm/config_check.c @@ -1,4 +1,3 @@ -#include <linux/kconfig.h> #include <linux/bug.h> void check(void) diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index c29f8dca9e67..3ccef732fce9 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -17,6 +17,7 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/pfn_t.h> +#include <linux/acpi.h> #include <linux/io.h> #include <linux/mm.h> #include "nfit_test.h" @@ -73,7 +74,7 @@ void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size, if (nfit_res) return (void __iomem *) nfit_res->buf + offset - - nfit_res->res->start; + - nfit_res->res.start; return fallback_fn(offset, size); } @@ -84,7 +85,7 @@ void __iomem *__wrap_devm_ioremap_nocache(struct device *dev, if (nfit_res) return (void __iomem *) nfit_res->buf + offset - - nfit_res->res->start; + - nfit_res->res.start; return devm_ioremap_nocache(dev, offset, size); } EXPORT_SYMBOL(__wrap_devm_ioremap_nocache); @@ -95,7 +96,7 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset, struct nfit_test_resource *nfit_res = get_nfit_res(offset); if (nfit_res) - return nfit_res->buf + offset - nfit_res->res->start; + return nfit_res->buf + offset - nfit_res->res.start; return devm_memremap(dev, offset, size, flags); } EXPORT_SYMBOL(__wrap_devm_memremap); @@ -107,7 +108,7 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res, struct nfit_test_resource *nfit_res = get_nfit_res(offset); if (nfit_res) - return nfit_res->buf + offset - nfit_res->res->start; + return nfit_res->buf + offset - nfit_res->res.start; return devm_memremap_pages(dev, res, ref, altmap); } EXPORT_SYMBOL(__wrap_devm_memremap_pages); @@ -128,7 +129,7 @@ void *__wrap_memremap(resource_size_t offset, size_t size, struct nfit_test_resource *nfit_res = get_nfit_res(offset); if (nfit_res) - return nfit_res->buf + offset - nfit_res->res->start; + return nfit_res->buf + offset - nfit_res->res.start; return memremap(offset, size, flags); } EXPORT_SYMBOL(__wrap_memremap); @@ -174,6 +175,63 @@ void __wrap_memunmap(void *addr) } EXPORT_SYMBOL(__wrap_memunmap); +static bool nfit_test_release_region(struct device *dev, + struct resource *parent, resource_size_t start, + resource_size_t n); + +static void nfit_devres_release(struct device *dev, void *data) +{ + struct resource *res = *((struct resource **) data); + + WARN_ON(!nfit_test_release_region(NULL, &iomem_resource, res->start, + resource_size(res))); +} + +static int match(struct device *dev, void *__res, void *match_data) +{ + struct resource *res = *((struct resource **) __res); + resource_size_t start = *((resource_size_t *) match_data); + + return res->start == start; +} + +static bool nfit_test_release_region(struct device *dev, + struct resource *parent, resource_size_t start, + resource_size_t n) +{ + if (parent == &iomem_resource) { + struct nfit_test_resource *nfit_res = get_nfit_res(start); + + if (nfit_res) { + struct nfit_test_request *req; + struct resource *res = NULL; + + if (dev) { + devres_release(dev, nfit_devres_release, match, + &start); + return true; + } + + spin_lock(&nfit_res->lock); + list_for_each_entry(req, &nfit_res->requests, list) + if (req->res.start == start) { + res = &req->res; + list_del(&req->list); + break; + } + spin_unlock(&nfit_res->lock); + + WARN(!res || resource_size(res) != n, + "%s: start: %llx n: %llx mismatch: %pr\n", + __func__, start, n, res); + if (res) + kfree(req); + return true; + } + } + return false; +} + static struct resource *nfit_test_request_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n, const char *name, int flags) @@ -183,21 +241,57 @@ static struct resource *nfit_test_request_region(struct device *dev, if (parent == &iomem_resource) { nfit_res = get_nfit_res(start); if (nfit_res) { - struct resource *res = nfit_res->res + 1; + struct nfit_test_request *req; + struct resource *res = NULL; - if (start + n > nfit_res->res->start - + resource_size(nfit_res->res)) { + if (start + n > nfit_res->res.start + + resource_size(&nfit_res->res)) { pr_debug("%s: start: %llx n: %llx overflow: %pr\n", __func__, start, n, - nfit_res->res); + &nfit_res->res); return NULL; } + spin_lock(&nfit_res->lock); + list_for_each_entry(req, &nfit_res->requests, list) + if (start == req->res.start) { + res = &req->res; + break; + } + spin_unlock(&nfit_res->lock); + + if (res) { + WARN(1, "%pr already busy\n", res); + return NULL; + } + + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return NULL; + INIT_LIST_HEAD(&req->list); + res = &req->res; + res->start = start; res->end = start + n - 1; res->name = name; res->flags = resource_type(parent); res->flags |= IORESOURCE_BUSY | flags; + spin_lock(&nfit_res->lock); + list_add(&req->list, &nfit_res->requests); + spin_unlock(&nfit_res->lock); + + if (dev) { + struct resource **d; + + d = devres_alloc(nfit_devres_release, + sizeof(struct resource *), + GFP_KERNEL); + if (!d) + return NULL; + *d = res; + devres_add(dev, d); + } + pr_debug("%s: %pr\n", __func__, res); return res; } @@ -241,29 +335,10 @@ struct resource *__wrap___devm_request_region(struct device *dev, } EXPORT_SYMBOL(__wrap___devm_request_region); -static bool nfit_test_release_region(struct resource *parent, - resource_size_t start, resource_size_t n) -{ - if (parent == &iomem_resource) { - struct nfit_test_resource *nfit_res = get_nfit_res(start); - if (nfit_res) { - struct resource *res = nfit_res->res + 1; - - if (start != res->start || resource_size(res) != n) - pr_info("%s: start: %llx n: %llx mismatch: %pr\n", - __func__, start, n, res); - else - memset(res, 0, sizeof(*res)); - return true; - } - } - return false; -} - void __wrap___release_region(struct resource *parent, resource_size_t start, resource_size_t n) { - if (!nfit_test_release_region(parent, start, n)) + if (!nfit_test_release_region(NULL, parent, start, n)) __release_region(parent, start, n); } EXPORT_SYMBOL(__wrap___release_region); @@ -271,9 +346,25 @@ EXPORT_SYMBOL(__wrap___release_region); void __wrap___devm_release_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n) { - if (!nfit_test_release_region(parent, start, n)) + if (!nfit_test_release_region(dev, parent, start, n)) __devm_release_region(dev, parent, start, n); } EXPORT_SYMBOL(__wrap___devm_release_region); +acpi_status __wrap_acpi_evaluate_object(acpi_handle handle, acpi_string path, + struct acpi_object_list *p, struct acpi_buffer *buf) +{ + struct nfit_test_resource *nfit_res = get_nfit_res((long) handle); + union acpi_object **obj; + + if (!nfit_res || strcmp(path, "_FIT") || !buf) + return acpi_evaluate_object(handle, path, p, buf); + + obj = nfit_res->buf; + buf->length = sizeof(union acpi_object); + buf->pointer = *obj; + return AE_OK; +} +EXPORT_SYMBOL(__wrap_acpi_evaluate_object); + MODULE_LICENSE("GPL v2"); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index f64c57bf1d4b..c9a6458cb63e 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -132,6 +132,8 @@ static u32 handle[NUM_DCR] = { [4] = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0), }; +static unsigned long dimm_fail_cmd_flags[NUM_DCR]; + struct nfit_test { struct acpi_nfit_desc acpi_desc; struct platform_device pdev; @@ -154,11 +156,14 @@ struct nfit_test { int (*alloc)(struct nfit_test *t); void (*setup)(struct nfit_test *t); int setup_hotplug; + union acpi_object **_fit; + dma_addr_t _fit_dma; struct ars_state { struct nd_cmd_ars_status *ars_status; unsigned long deadline; spinlock_t lock; } ars_state; + struct device *dimm_dev[NUM_DCR]; }; static struct nfit_test *to_nfit_test(struct device *dev) @@ -411,6 +416,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, if (i >= ARRAY_SIZE(handle)) return -ENXIO; + if ((1 << func) & dimm_fail_cmd_flags[i]) + return -EIO; + switch (func) { case ND_CMD_GET_CONFIG_SIZE: rc = nfit_test_cmd_get_config_size(buf, buf_len); @@ -428,6 +436,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, break; case ND_CMD_SMART_THRESHOLD: rc = nfit_test_cmd_smart_threshold(buf, buf_len); + device_lock(&t->pdev.dev); + __acpi_nvdimm_notify(t->dimm_dev[i], 0x81); + device_unlock(&t->pdev.dev); break; default: return -ENOTTY; @@ -467,14 +478,12 @@ static struct nfit_test *instances[NUM_NFITS]; static void release_nfit_res(void *data) { struct nfit_test_resource *nfit_res = data; - struct resource *res = nfit_res->res; spin_lock(&nfit_test_lock); list_del(&nfit_res->list); spin_unlock(&nfit_test_lock); vfree(nfit_res->buf); - kfree(res); kfree(nfit_res); } @@ -482,12 +491,11 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma, void *buf) { struct device *dev = &t->pdev.dev; - struct resource *res = kzalloc(sizeof(*res) * 2, GFP_KERNEL); struct nfit_test_resource *nfit_res = kzalloc(sizeof(*nfit_res), GFP_KERNEL); int rc; - if (!res || !buf || !nfit_res) + if (!buf || !nfit_res) goto err; rc = devm_add_action(dev, release_nfit_res, nfit_res); if (rc) @@ -496,10 +504,11 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma, memset(buf, 0, size); nfit_res->dev = dev; nfit_res->buf = buf; - nfit_res->res = res; - res->start = *dma; - res->end = *dma + size - 1; - res->name = "NFIT"; + nfit_res->res.start = *dma; + nfit_res->res.end = *dma + size - 1; + nfit_res->res.name = "NFIT"; + spin_lock_init(&nfit_res->lock); + INIT_LIST_HEAD(&nfit_res->requests); spin_lock(&nfit_test_lock); list_add(&nfit_res->list, &t->resources); spin_unlock(&nfit_test_lock); @@ -508,7 +517,6 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma, err: if (buf) vfree(buf); - kfree(res); kfree(nfit_res); return NULL; } @@ -533,13 +541,13 @@ static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr) continue; spin_lock(&nfit_test_lock); list_for_each_entry(n, &t->resources, list) { - if (addr >= n->res->start && (addr < n->res->start - + resource_size(n->res))) { + if (addr >= n->res.start && (addr < n->res.start + + resource_size(&n->res))) { nfit_res = n; break; } else if (addr >= (unsigned long) n->buf && (addr < (unsigned long) n->buf - + resource_size(n->res))) { + + resource_size(&n->res))) { nfit_res = n; break; } @@ -564,6 +572,86 @@ static int ars_state_init(struct device *dev, struct ars_state *ars_state) return 0; } +static void put_dimms(void *data) +{ + struct device **dimm_dev = data; + int i; + + for (i = 0; i < NUM_DCR; i++) + if (dimm_dev[i]) + device_unregister(dimm_dev[i]); +} + +static struct class *nfit_test_dimm; + +static int dimm_name_to_id(struct device *dev) +{ + int dimm; + + if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1 + || dimm >= NUM_DCR || dimm < 0) + return -ENXIO; + return dimm; +} + + +static ssize_t handle_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int dimm = dimm_name_to_id(dev); + + if (dimm < 0) + return dimm; + + return sprintf(buf, "%#x", handle[dimm]); +} +DEVICE_ATTR_RO(handle); + +static ssize_t fail_cmd_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int dimm = dimm_name_to_id(dev); + + if (dimm < 0) + return dimm; + + return sprintf(buf, "%#lx\n", dimm_fail_cmd_flags[dimm]); +} + +static ssize_t fail_cmd_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + int dimm = dimm_name_to_id(dev); + unsigned long val; + ssize_t rc; + + if (dimm < 0) + return dimm; + + rc = kstrtol(buf, 0, &val); + if (rc) + return rc; + + dimm_fail_cmd_flags[dimm] = val; + return size; +} +static DEVICE_ATTR_RW(fail_cmd); + +static struct attribute *nfit_test_dimm_attributes[] = { + &dev_attr_fail_cmd.attr, + &dev_attr_handle.attr, + NULL, +}; + +static struct attribute_group nfit_test_dimm_attribute_group = { + .attrs = nfit_test_dimm_attributes, +}; + +static const struct attribute_group *nfit_test_dimm_attribute_groups[] = { + &nfit_test_dimm_attribute_group, + NULL, +}; + static int nfit_test0_alloc(struct nfit_test *t) { size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA @@ -616,6 +704,21 @@ static int nfit_test0_alloc(struct nfit_test *t) return -ENOMEM; } + t->_fit = test_alloc(t, sizeof(union acpi_object **), &t->_fit_dma); + if (!t->_fit) + return -ENOMEM; + + if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t->dimm_dev)) + return -ENOMEM; + for (i = 0; i < NUM_DCR; i++) { + t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm, + &t->pdev.dev, 0, NULL, + nfit_test_dimm_attribute_groups, + "test_dimm%d", i); + if (!t->dimm_dev[i]) + return -ENOMEM; + } + return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -1409,6 +1512,8 @@ static int nfit_test_probe(struct platform_device *pdev) struct acpi_nfit_desc *acpi_desc; struct device *dev = &pdev->dev; struct nfit_test *nfit_test; + struct nfit_mem *nfit_mem; + union acpi_object *obj; int rc; nfit_test = to_nfit_test(&pdev->dev); @@ -1476,14 +1581,30 @@ static int nfit_test_probe(struct platform_device *pdev) if (nfit_test->setup != nfit_test0_setup) return 0; - flush_work(&acpi_desc->work); nfit_test->setup_hotplug = 1; nfit_test->setup(nfit_test); - rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf, - nfit_test->nfit_size); - if (rc) - return rc; + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; + obj->type = ACPI_TYPE_BUFFER; + obj->buffer.length = nfit_test->nfit_size; + obj->buffer.pointer = nfit_test->nfit_buf; + *(nfit_test->_fit) = obj; + __acpi_nfit_notify(&pdev->dev, nfit_test, 0x80); + + /* associate dimm devices with nfit_mem data for notification testing */ + mutex_lock(&acpi_desc->init_mutex); + list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) { + u32 nfit_handle = __to_nfit_memdev(nfit_mem)->device_handle; + int i; + + for (i = 0; i < NUM_DCR; i++) + if (nfit_handle == handle[i]) + dev_set_drvdata(nfit_test->dimm_dev[i], + nfit_mem); + } + mutex_unlock(&acpi_desc->init_mutex); return 0; } @@ -1518,6 +1639,10 @@ static __init int nfit_test_init(void) { int rc, i; + nfit_test_dimm = class_create(THIS_MODULE, "nfit_test_dimm"); + if (IS_ERR(nfit_test_dimm)) + return PTR_ERR(nfit_test_dimm); + nfit_test_setup(nfit_test_lookup); for (i = 0; i < NUM_NFITS; i++) { @@ -1584,6 +1709,7 @@ static __exit void nfit_test_exit(void) for (i = 0; i < NUM_NFITS; i++) platform_device_unregister(&instances[i]->pdev); nfit_test_teardown(); + class_destroy(nfit_test_dimm); } module_init(nfit_test_init); diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index 9f18e2a4a862..c281dd2e5e2d 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -13,11 +13,21 @@ #ifndef __NFIT_TEST_H__ #define __NFIT_TEST_H__ #include <linux/list.h> +#include <linux/ioport.h> +#include <linux/spinlock_types.h> + +struct nfit_test_request { + struct list_head list; + struct resource res; +}; struct nfit_test_resource { + struct list_head requests; struct list_head list; - struct resource *res; + struct resource res; struct device *dev; + spinlock_t lock; + int req_count; void *buf; }; diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index 9d0919ed52a4..f2e07f2fd4b4 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -3,7 +3,8 @@ CFLAGS += -I. -g -O2 -Wall -D_LGPL_SOURCE LDFLAGS += -lpthread -lurcu TARGETS = main OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_next_bit.o \ - regression1.o regression2.o regression3.o multiorder.o + regression1.o regression2.o regression3.o multiorder.o \ + iteration_check.o targets: $(TARGETS) diff --git a/tools/testing/radix-tree/iteration_check.c b/tools/testing/radix-tree/iteration_check.c new file mode 100644 index 000000000000..9adb8e7415a6 --- /dev/null +++ b/tools/testing/radix-tree/iteration_check.c @@ -0,0 +1,180 @@ +/* + * iteration_check.c: test races having to do with radix tree iteration + * Copyright (c) 2016 Intel Corporation + * Author: Ross Zwisler <ross.zwisler@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include <linux/radix-tree.h> +#include <pthread.h> +#include "test.h" + +#define NUM_THREADS 4 +#define TAG 0 +static pthread_mutex_t tree_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_t threads[NUM_THREADS]; +RADIX_TREE(tree, GFP_KERNEL); +bool test_complete; + +/* relentlessly fill the tree with tagged entries */ +static void *add_entries_fn(void *arg) +{ + int pgoff; + + while (!test_complete) { + for (pgoff = 0; pgoff < 100; pgoff++) { + pthread_mutex_lock(&tree_lock); + if (item_insert(&tree, pgoff) == 0) + item_tag_set(&tree, pgoff, TAG); + pthread_mutex_unlock(&tree_lock); + } + } + + return NULL; +} + +/* + * Iterate over the tagged entries, doing a radix_tree_iter_retry() as we find + * things that have been removed and randomly resetting our iteration to the + * next chunk with radix_tree_iter_next(). Both radix_tree_iter_retry() and + * radix_tree_iter_next() cause radix_tree_next_slot() to be called with a + * NULL 'slot' variable. + */ +static void *tagged_iteration_fn(void *arg) +{ + struct radix_tree_iter iter; + void **slot; + + while (!test_complete) { + rcu_read_lock(); + radix_tree_for_each_tagged(slot, &tree, &iter, 0, TAG) { + void *entry; + int i; + + /* busy wait to let removals happen */ + for (i = 0; i < 1000000; i++) + ; + + entry = radix_tree_deref_slot(slot); + if (unlikely(!entry)) + continue; + + if (radix_tree_deref_retry(entry)) { + slot = radix_tree_iter_retry(&iter); + continue; + } + + if (rand() % 50 == 0) + slot = radix_tree_iter_next(&iter); + } + rcu_read_unlock(); + } + + return NULL; +} + +/* + * Iterate over the entries, doing a radix_tree_iter_retry() as we find things + * that have been removed and randomly resetting our iteration to the next + * chunk with radix_tree_iter_next(). Both radix_tree_iter_retry() and + * radix_tree_iter_next() cause radix_tree_next_slot() to be called with a + * NULL 'slot' variable. + */ +static void *untagged_iteration_fn(void *arg) +{ + struct radix_tree_iter iter; + void **slot; + + while (!test_complete) { + rcu_read_lock(); + radix_tree_for_each_slot(slot, &tree, &iter, 0) { + void *entry; + int i; + + /* busy wait to let removals happen */ + for (i = 0; i < 1000000; i++) + ; + + entry = radix_tree_deref_slot(slot); + if (unlikely(!entry)) + continue; + + if (radix_tree_deref_retry(entry)) { + slot = radix_tree_iter_retry(&iter); + continue; + } + + if (rand() % 50 == 0) + slot = radix_tree_iter_next(&iter); + } + rcu_read_unlock(); + } + + return NULL; +} + +/* + * Randomly remove entries to help induce radix_tree_iter_retry() calls in the + * two iteration functions. + */ +static void *remove_entries_fn(void *arg) +{ + while (!test_complete) { + int pgoff; + + pgoff = rand() % 100; + + pthread_mutex_lock(&tree_lock); + item_delete(&tree, pgoff); + pthread_mutex_unlock(&tree_lock); + } + + return NULL; +} + +/* This is a unit test for a bug found by the syzkaller tester */ +void iteration_test(void) +{ + int i; + + printf("Running iteration tests for 10 seconds\n"); + + srand(time(0)); + test_complete = false; + + if (pthread_create(&threads[0], NULL, tagged_iteration_fn, NULL)) { + perror("pthread_create"); + exit(1); + } + if (pthread_create(&threads[1], NULL, untagged_iteration_fn, NULL)) { + perror("pthread_create"); + exit(1); + } + if (pthread_create(&threads[2], NULL, add_entries_fn, NULL)) { + perror("pthread_create"); + exit(1); + } + if (pthread_create(&threads[3], NULL, remove_entries_fn, NULL)) { + perror("pthread_create"); + exit(1); + } + + sleep(10); + test_complete = true; + + for (i = 0; i < NUM_THREADS; i++) { + if (pthread_join(threads[i], NULL)) { + perror("pthread_join"); + exit(1); + } + } + + item_kill_tree(&tree); +} diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index b7619ff3b552..daa9010693e8 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -332,6 +332,7 @@ int main(int argc, char **argv) regression1_test(); regression2_test(); regression3_test(); + iteration_test(); single_thread_tests(long_run); sleep(1); diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c index 2d03a63bb79c..0d6813a61b37 100644 --- a/tools/testing/radix-tree/regression1.c +++ b/tools/testing/radix-tree/regression1.c @@ -43,7 +43,7 @@ #include "regression.h" static RADIX_TREE(mt_tree, GFP_KERNEL); -static pthread_mutex_t mt_lock; +static pthread_mutex_t mt_lock = PTHREAD_MUTEX_INITIALIZER; struct page { pthread_mutex_t lock; diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index e85131369723..217fb2403f09 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -27,6 +27,7 @@ void item_kill_tree(struct radix_tree_root *root); void tag_check(void); void multiorder_checks(void); +void iteration_test(void); struct item * item_tag_set(struct radix_tree_root *root, unsigned long index, int tag); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index ff9e5f20a5a7..f770dba2a6f6 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -15,6 +15,7 @@ TARGETS += memory-hotplug TARGETS += mount TARGETS += mqueue TARGETS += net +TARGETS += nsfs TARGETS += powerpc TARGETS += pstore TARGETS += ptrace diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore new file mode 100644 index 000000000000..31d6e426b6d4 --- /dev/null +++ b/tools/testing/selftests/filesystems/.gitignore @@ -0,0 +1 @@ +dnotify_test diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile new file mode 100644 index 000000000000..0ab11307b414 --- /dev/null +++ b/tools/testing/selftests/filesystems/Makefile @@ -0,0 +1,7 @@ +TEST_PROGS := dnotify_test +all: $(TEST_PROGS) + +include ../lib.mk + +clean: + rm -fr $(TEST_PROGS) diff --git a/tools/testing/selftests/filesystems/dnotify_test.c b/tools/testing/selftests/filesystems/dnotify_test.c new file mode 100644 index 000000000000..8b37b4a1e18d --- /dev/null +++ b/tools/testing/selftests/filesystems/dnotify_test.c @@ -0,0 +1,34 @@ +#define _GNU_SOURCE /* needed to get the defines */ +#include <fcntl.h> /* in glibc 2.2 this has the needed + values defined */ +#include <signal.h> +#include <stdio.h> +#include <unistd.h> + +static volatile int event_fd; + +static void handler(int sig, siginfo_t *si, void *data) +{ + event_fd = si->si_fd; +} + +int main(void) +{ + struct sigaction act; + int fd; + + act.sa_sigaction = handler; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + sigaction(SIGRTMIN + 1, &act, NULL); + + fd = open(".", O_RDONLY); + fcntl(fd, F_SETSIG, SIGRTMIN + 1); + fcntl(fd, F_NOTIFY, DN_MODIFY|DN_CREATE|DN_MULTISHOT); + /* we will now be notified if any of the files + in "." is modified or new files are created */ + while (1) { + pause(); + printf("Got event on fd=%d\n", event_fd); + } +} diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh index e87dbe2a0b0d..7ff002eed624 100755 --- a/tools/testing/selftests/futex/functional/run.sh +++ b/tools/testing/selftests/futex/functional/run.sh @@ -24,7 +24,7 @@ # Test for a color capable console if [ -z "$USE_COLOR" ]; then - tput setf 7 + tput setf 7 || tput setaf 7 if [ $? -eq 0 ]; then USE_COLOR=1 tput sgr0 diff --git a/tools/testing/selftests/futex/run.sh b/tools/testing/selftests/futex/run.sh index 4126312ad64e..88bcb1767362 100755 --- a/tools/testing/selftests/futex/run.sh +++ b/tools/testing/selftests/futex/run.sh @@ -23,7 +23,7 @@ # Test for a color capable shell and pass the result to the subdir scripts USE_COLOR=0 -tput setf 7 +tput setf 7 || tput setaf 7 if [ $? -eq 0 ]; then USE_COLOR=1 tput sgr0 diff --git a/tools/testing/selftests/ia64/.gitignore b/tools/testing/selftests/ia64/.gitignore new file mode 100644 index 000000000000..ab806edc8732 --- /dev/null +++ b/tools/testing/selftests/ia64/.gitignore @@ -0,0 +1 @@ +aliasing-test diff --git a/tools/testing/selftests/ia64/Makefile b/tools/testing/selftests/ia64/Makefile new file mode 100644 index 000000000000..2b3de2d3e945 --- /dev/null +++ b/tools/testing/selftests/ia64/Makefile @@ -0,0 +1,8 @@ +TEST_PROGS := aliasing-test + +all: $(TEST_PROGS) + +include ../lib.mk + +clean: + rm -fr $(TEST_PROGS) diff --git a/tools/testing/selftests/ia64/aliasing-test.c b/tools/testing/selftests/ia64/aliasing-test.c new file mode 100644 index 000000000000..62a190d45f38 --- /dev/null +++ b/tools/testing/selftests/ia64/aliasing-test.c @@ -0,0 +1,263 @@ +/* + * Exercise /dev/mem mmap cases that have been troublesome in the past + * + * (c) Copyright 2007 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas <bjorn.helgaas@hp.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <sys/types.h> +#include <dirent.h> +#include <fcntl.h> +#include <fnmatch.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <unistd.h> +#include <linux/pci.h> + +int sum; + +static int map_mem(char *path, off_t offset, size_t length, int touch) +{ + int fd, rc; + void *addr; + int *c; + + fd = open(path, O_RDWR); + if (fd == -1) { + perror(path); + return -1; + } + + if (fnmatch("/proc/bus/pci/*", path, 0) == 0) { + rc = ioctl(fd, PCIIOC_MMAP_IS_MEM); + if (rc == -1) + perror("PCIIOC_MMAP_IS_MEM ioctl"); + } + + addr = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, offset); + if (addr == MAP_FAILED) + return 1; + + if (touch) { + c = (int *) addr; + while (c < (int *) (addr + length)) + sum += *c++; + } + + rc = munmap(addr, length); + if (rc == -1) { + perror("munmap"); + return -1; + } + + close(fd); + return 0; +} + +static int scan_tree(char *path, char *file, off_t offset, size_t length, int touch) +{ + struct dirent **namelist; + char *name, *path2; + int i, n, r, rc = 0, result = 0; + struct stat buf; + + n = scandir(path, &namelist, 0, alphasort); + if (n < 0) { + perror("scandir"); + return -1; + } + + for (i = 0; i < n; i++) { + name = namelist[i]->d_name; + + if (fnmatch(".", name, 0) == 0) + goto skip; + if (fnmatch("..", name, 0) == 0) + goto skip; + + path2 = malloc(strlen(path) + strlen(name) + 3); + strcpy(path2, path); + strcat(path2, "/"); + strcat(path2, name); + + if (fnmatch(file, name, 0) == 0) { + rc = map_mem(path2, offset, length, touch); + if (rc == 0) + fprintf(stderr, "PASS: %s 0x%lx-0x%lx is %s\n", path2, offset, offset + length, touch ? "readable" : "mappable"); + else if (rc > 0) + fprintf(stderr, "PASS: %s 0x%lx-0x%lx not mappable\n", path2, offset, offset + length); + else { + fprintf(stderr, "FAIL: %s 0x%lx-0x%lx not accessible\n", path2, offset, offset + length); + return rc; + } + } else { + r = lstat(path2, &buf); + if (r == 0 && S_ISDIR(buf.st_mode)) { + rc = scan_tree(path2, file, offset, length, touch); + if (rc < 0) + return rc; + } + } + + result |= rc; + free(path2); + +skip: + free(namelist[i]); + } + free(namelist); + return result; +} + +char buf[1024]; + +static int read_rom(char *path) +{ + int fd, rc; + size_t size = 0; + + fd = open(path, O_RDWR); + if (fd == -1) { + perror(path); + return -1; + } + + rc = write(fd, "1", 2); + if (rc <= 0) { + close(fd); + perror("write"); + return -1; + } + + do { + rc = read(fd, buf, sizeof(buf)); + if (rc > 0) + size += rc; + } while (rc > 0); + + close(fd); + return size; +} + +static int scan_rom(char *path, char *file) +{ + struct dirent **namelist; + char *name, *path2; + int i, n, r, rc = 0, result = 0; + struct stat buf; + + n = scandir(path, &namelist, 0, alphasort); + if (n < 0) { + perror("scandir"); + return -1; + } + + for (i = 0; i < n; i++) { + name = namelist[i]->d_name; + + if (fnmatch(".", name, 0) == 0) + goto skip; + if (fnmatch("..", name, 0) == 0) + goto skip; + + path2 = malloc(strlen(path) + strlen(name) + 3); + strcpy(path2, path); + strcat(path2, "/"); + strcat(path2, name); + + if (fnmatch(file, name, 0) == 0) { + rc = read_rom(path2); + + /* + * It's OK if the ROM is unreadable. Maybe there + * is no ROM, or some other error occurred. The + * important thing is that no MCA happened. + */ + if (rc > 0) + fprintf(stderr, "PASS: %s read %d bytes\n", path2, rc); + else { + fprintf(stderr, "PASS: %s not readable\n", path2); + return rc; + } + } else { + r = lstat(path2, &buf); + if (r == 0 && S_ISDIR(buf.st_mode)) { + rc = scan_rom(path2, file); + if (rc < 0) + return rc; + } + } + + result |= rc; + free(path2); + +skip: + free(namelist[i]); + } + free(namelist); + return result; +} + +int main(void) +{ + int rc; + + if (map_mem("/dev/mem", 0, 0xA0000, 1) == 0) + fprintf(stderr, "PASS: /dev/mem 0x0-0xa0000 is readable\n"); + else + fprintf(stderr, "FAIL: /dev/mem 0x0-0xa0000 not accessible\n"); + + /* + * It's not safe to blindly read the VGA frame buffer. If you know + * how to poke the card the right way, it should respond, but it's + * not safe in general. Many machines, e.g., Intel chipsets, cover + * up a non-responding card by just returning -1, but others will + * report the failure as a machine check. + */ + if (map_mem("/dev/mem", 0xA0000, 0x20000, 0) == 0) + fprintf(stderr, "PASS: /dev/mem 0xa0000-0xc0000 is mappable\n"); + else + fprintf(stderr, "FAIL: /dev/mem 0xa0000-0xc0000 not accessible\n"); + + if (map_mem("/dev/mem", 0xC0000, 0x40000, 1) == 0) + fprintf(stderr, "PASS: /dev/mem 0xc0000-0x100000 is readable\n"); + else + fprintf(stderr, "FAIL: /dev/mem 0xc0000-0x100000 not accessible\n"); + + /* + * Often you can map all the individual pieces above (0-0xA0000, + * 0xA0000-0xC0000, and 0xC0000-0x100000), but can't map the whole + * thing at once. This is because the individual pieces use different + * attributes, and there's no single attribute supported over the + * whole region. + */ + rc = map_mem("/dev/mem", 0, 1024*1024, 0); + if (rc == 0) + fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 is mappable\n"); + else if (rc > 0) + fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 not mappable\n"); + else + fprintf(stderr, "FAIL: /dev/mem 0x0-0x100000 not accessible\n"); + + scan_tree("/sys/class/pci_bus", "legacy_mem", 0, 0xA0000, 1); + scan_tree("/sys/class/pci_bus", "legacy_mem", 0xA0000, 0x20000, 0); + scan_tree("/sys/class/pci_bus", "legacy_mem", 0xC0000, 0x40000, 1); + scan_tree("/sys/class/pci_bus", "legacy_mem", 0, 1024*1024, 0); + + scan_rom("/sys/devices", "rom"); + + scan_tree("/proc/bus/pci", "??.?", 0, 0xA0000, 1); + scan_tree("/proc/bus/pci", "??.?", 0xA0000, 0x20000, 0); + scan_tree("/proc/bus/pci", "??.?", 0xC0000, 0x40000, 1); + scan_tree("/proc/bus/pci", "??.?", 0, 1024*1024, 0); + + return rc; +} diff --git a/tools/testing/selftests/networking/timestamping/.gitignore b/tools/testing/selftests/networking/timestamping/.gitignore new file mode 100644 index 000000000000..9e69e982fb38 --- /dev/null +++ b/tools/testing/selftests/networking/timestamping/.gitignore @@ -0,0 +1,3 @@ +timestamping +txtimestamp +hwtstamp_config diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile new file mode 100644 index 000000000000..ccbb9edbbbb9 --- /dev/null +++ b/tools/testing/selftests/networking/timestamping/Makefile @@ -0,0 +1,8 @@ +TEST_PROGS := hwtstamp_config timestamping txtimestamp + +all: $(TEST_PROGS) + +include ../../lib.mk + +clean: + rm -fr $(TEST_PROGS) diff --git a/tools/testing/selftests/networking/timestamping/hwtstamp_config.c b/tools/testing/selftests/networking/timestamping/hwtstamp_config.c new file mode 100644 index 000000000000..e8b685a7f15f --- /dev/null +++ b/tools/testing/selftests/networking/timestamping/hwtstamp_config.c @@ -0,0 +1,134 @@ +/* Test program for SIOC{G,S}HWTSTAMP + * Copyright 2013 Solarflare Communications + * Author: Ben Hutchings + */ + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <sys/socket.h> +#include <sys/ioctl.h> + +#include <linux/if.h> +#include <linux/net_tstamp.h> +#include <linux/sockios.h> + +static int +lookup_value(const char **names, int size, const char *name) +{ + int value; + + for (value = 0; value < size; value++) + if (names[value] && strcasecmp(names[value], name) == 0) + return value; + + return -1; +} + +static const char * +lookup_name(const char **names, int size, int value) +{ + return (value >= 0 && value < size) ? names[value] : NULL; +} + +static void list_names(FILE *f, const char **names, int size) +{ + int value; + + for (value = 0; value < size; value++) + if (names[value]) + fprintf(f, " %s\n", names[value]); +} + +static const char *tx_types[] = { +#define TX_TYPE(name) [HWTSTAMP_TX_ ## name] = #name + TX_TYPE(OFF), + TX_TYPE(ON), + TX_TYPE(ONESTEP_SYNC) +#undef TX_TYPE +}; +#define N_TX_TYPES ((int)(sizeof(tx_types) / sizeof(tx_types[0]))) + +static const char *rx_filters[] = { +#define RX_FILTER(name) [HWTSTAMP_FILTER_ ## name] = #name + RX_FILTER(NONE), + RX_FILTER(ALL), + RX_FILTER(SOME), + RX_FILTER(PTP_V1_L4_EVENT), + RX_FILTER(PTP_V1_L4_SYNC), + RX_FILTER(PTP_V1_L4_DELAY_REQ), + RX_FILTER(PTP_V2_L4_EVENT), + RX_FILTER(PTP_V2_L4_SYNC), + RX_FILTER(PTP_V2_L4_DELAY_REQ), + RX_FILTER(PTP_V2_L2_EVENT), + RX_FILTER(PTP_V2_L2_SYNC), + RX_FILTER(PTP_V2_L2_DELAY_REQ), + RX_FILTER(PTP_V2_EVENT), + RX_FILTER(PTP_V2_SYNC), + RX_FILTER(PTP_V2_DELAY_REQ), +#undef RX_FILTER +}; +#define N_RX_FILTERS ((int)(sizeof(rx_filters) / sizeof(rx_filters[0]))) + +static void usage(void) +{ + fputs("Usage: hwtstamp_config if_name [tx_type rx_filter]\n" + "tx_type is any of (case-insensitive):\n", + stderr); + list_names(stderr, tx_types, N_TX_TYPES); + fputs("rx_filter is any of (case-insensitive):\n", stderr); + list_names(stderr, rx_filters, N_RX_FILTERS); +} + +int main(int argc, char **argv) +{ + struct ifreq ifr; + struct hwtstamp_config config; + const char *name; + int sock; + + if ((argc != 2 && argc != 4) || (strlen(argv[1]) >= IFNAMSIZ)) { + usage(); + return 2; + } + + if (argc == 4) { + config.flags = 0; + config.tx_type = lookup_value(tx_types, N_TX_TYPES, argv[2]); + config.rx_filter = lookup_value(rx_filters, N_RX_FILTERS, argv[3]); + if (config.tx_type < 0 || config.rx_filter < 0) { + usage(); + return 2; + } + } + + sock = socket(AF_INET, SOCK_DGRAM, 0); + if (sock < 0) { + perror("socket"); + return 1; + } + + strcpy(ifr.ifr_name, argv[1]); + ifr.ifr_data = (caddr_t)&config; + + if (ioctl(sock, (argc == 2) ? SIOCGHWTSTAMP : SIOCSHWTSTAMP, &ifr)) { + perror("ioctl"); + return 1; + } + + printf("flags = %#x\n", config.flags); + name = lookup_name(tx_types, N_TX_TYPES, config.tx_type); + if (name) + printf("tx_type = %s\n", name); + else + printf("tx_type = %d\n", config.tx_type); + name = lookup_name(rx_filters, N_RX_FILTERS, config.rx_filter); + if (name) + printf("rx_filter = %s\n", name); + else + printf("rx_filter = %d\n", config.rx_filter); + + return 0; +} diff --git a/tools/testing/selftests/networking/timestamping/timestamping.c b/tools/testing/selftests/networking/timestamping/timestamping.c new file mode 100644 index 000000000000..5cdfd743447b --- /dev/null +++ b/tools/testing/selftests/networking/timestamping/timestamping.c @@ -0,0 +1,528 @@ +/* + * This program demonstrates how the various time stamping features in + * the Linux kernel work. It emulates the behavior of a PTP + * implementation in stand-alone master mode by sending PTPv1 Sync + * multicasts once every second. It looks for similar packets, but + * beyond that doesn't actually implement PTP. + * + * Outgoing packets are time stamped with SO_TIMESTAMPING with or + * without hardware support. + * + * Incoming packets are time stamped with SO_TIMESTAMPING with or + * without hardware support, SIOCGSTAMP[NS] (per-socket time stamp) and + * SO_TIMESTAMP[NS]. + * + * Copyright (C) 2009 Intel Corporation. + * Author: Patrick Ohly <patrick.ohly@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> + +#include <sys/time.h> +#include <sys/socket.h> +#include <sys/select.h> +#include <sys/ioctl.h> +#include <arpa/inet.h> +#include <net/if.h> + +#include <asm/types.h> +#include <linux/net_tstamp.h> +#include <linux/errqueue.h> + +#ifndef SO_TIMESTAMPING +# define SO_TIMESTAMPING 37 +# define SCM_TIMESTAMPING SO_TIMESTAMPING +#endif + +#ifndef SO_TIMESTAMPNS +# define SO_TIMESTAMPNS 35 +#endif + +#ifndef SIOCGSTAMPNS +# define SIOCGSTAMPNS 0x8907 +#endif + +#ifndef SIOCSHWTSTAMP +# define SIOCSHWTSTAMP 0x89b0 +#endif + +static void usage(const char *error) +{ + if (error) + printf("invalid option: %s\n", error); + printf("timestamping interface option*\n\n" + "Options:\n" + " IP_MULTICAST_LOOP - looping outgoing multicasts\n" + " SO_TIMESTAMP - normal software time stamping, ms resolution\n" + " SO_TIMESTAMPNS - more accurate software time stamping\n" + " SOF_TIMESTAMPING_TX_HARDWARE - hardware time stamping of outgoing packets\n" + " SOF_TIMESTAMPING_TX_SOFTWARE - software fallback for outgoing packets\n" + " SOF_TIMESTAMPING_RX_HARDWARE - hardware time stamping of incoming packets\n" + " SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n" + " SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n" + " SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n" + " SIOCGSTAMP - check last socket time stamp\n" + " SIOCGSTAMPNS - more accurate socket time stamp\n"); + exit(1); +} + +static void bail(const char *error) +{ + printf("%s: %s\n", error, strerror(errno)); + exit(1); +} + +static const unsigned char sync[] = { + 0x00, 0x01, 0x00, 0x01, + 0x5f, 0x44, 0x46, 0x4c, + 0x54, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x01, 0x01, + + /* fake uuid */ + 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, + + 0x00, 0x01, 0x00, 0x37, + 0x00, 0x00, 0x00, 0x08, + 0x00, 0x00, 0x00, 0x00, + 0x49, 0x05, 0xcd, 0x01, + 0x29, 0xb1, 0x8d, 0xb0, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, + + /* fake uuid */ + 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, + + 0x00, 0x00, 0x00, 0x37, + 0x00, 0x00, 0x00, 0x04, + 0x44, 0x46, 0x4c, 0x54, + 0x00, 0x00, 0xf0, 0x60, + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0xf0, 0x60, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x04, + 0x44, 0x46, 0x4c, 0x54, + 0x00, 0x01, + + /* fake uuid */ + 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, + + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 +}; + +static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len) +{ + struct timeval now; + int res; + + res = sendto(sock, sync, sizeof(sync), 0, + addr, addr_len); + gettimeofday(&now, 0); + if (res < 0) + printf("%s: %s\n", "send", strerror(errno)); + else + printf("%ld.%06ld: sent %d bytes\n", + (long)now.tv_sec, (long)now.tv_usec, + res); +} + +static void printpacket(struct msghdr *msg, int res, + char *data, + int sock, int recvmsg_flags, + int siocgstamp, int siocgstampns) +{ + struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name; + struct cmsghdr *cmsg; + struct timeval tv; + struct timespec ts; + struct timeval now; + + gettimeofday(&now, 0); + + printf("%ld.%06ld: received %s data, %d bytes from %s, %zu bytes control messages\n", + (long)now.tv_sec, (long)now.tv_usec, + (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular", + res, + inet_ntoa(from_addr->sin_addr), + msg->msg_controllen); + for (cmsg = CMSG_FIRSTHDR(msg); + cmsg; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + printf(" cmsg len %zu: ", cmsg->cmsg_len); + switch (cmsg->cmsg_level) { + case SOL_SOCKET: + printf("SOL_SOCKET "); + switch (cmsg->cmsg_type) { + case SO_TIMESTAMP: { + struct timeval *stamp = + (struct timeval *)CMSG_DATA(cmsg); + printf("SO_TIMESTAMP %ld.%06ld", + (long)stamp->tv_sec, + (long)stamp->tv_usec); + break; + } + case SO_TIMESTAMPNS: { + struct timespec *stamp = + (struct timespec *)CMSG_DATA(cmsg); + printf("SO_TIMESTAMPNS %ld.%09ld", + (long)stamp->tv_sec, + (long)stamp->tv_nsec); + break; + } + case SO_TIMESTAMPING: { + struct timespec *stamp = + (struct timespec *)CMSG_DATA(cmsg); + printf("SO_TIMESTAMPING "); + printf("SW %ld.%09ld ", + (long)stamp->tv_sec, + (long)stamp->tv_nsec); + stamp++; + /* skip deprecated HW transformed */ + stamp++; + printf("HW raw %ld.%09ld", + (long)stamp->tv_sec, + (long)stamp->tv_nsec); + break; + } + default: + printf("type %d", cmsg->cmsg_type); + break; + } + break; + case IPPROTO_IP: + printf("IPPROTO_IP "); + switch (cmsg->cmsg_type) { + case IP_RECVERR: { + struct sock_extended_err *err = + (struct sock_extended_err *)CMSG_DATA(cmsg); + printf("IP_RECVERR ee_errno '%s' ee_origin %d => %s", + strerror(err->ee_errno), + err->ee_origin, +#ifdef SO_EE_ORIGIN_TIMESTAMPING + err->ee_origin == SO_EE_ORIGIN_TIMESTAMPING ? + "bounced packet" : "unexpected origin" +#else + "probably SO_EE_ORIGIN_TIMESTAMPING" +#endif + ); + if (res < sizeof(sync)) + printf(" => truncated data?!"); + else if (!memcmp(sync, data + res - sizeof(sync), + sizeof(sync))) + printf(" => GOT OUR DATA BACK (HURRAY!)"); + break; + } + case IP_PKTINFO: { + struct in_pktinfo *pktinfo = + (struct in_pktinfo *)CMSG_DATA(cmsg); + printf("IP_PKTINFO interface index %u", + pktinfo->ipi_ifindex); + break; + } + default: + printf("type %d", cmsg->cmsg_type); + break; + } + break; + default: + printf("level %d type %d", + cmsg->cmsg_level, + cmsg->cmsg_type); + break; + } + printf("\n"); + } + + if (siocgstamp) { + if (ioctl(sock, SIOCGSTAMP, &tv)) + printf(" %s: %s\n", "SIOCGSTAMP", strerror(errno)); + else + printf("SIOCGSTAMP %ld.%06ld\n", + (long)tv.tv_sec, + (long)tv.tv_usec); + } + if (siocgstampns) { + if (ioctl(sock, SIOCGSTAMPNS, &ts)) + printf(" %s: %s\n", "SIOCGSTAMPNS", strerror(errno)); + else + printf("SIOCGSTAMPNS %ld.%09ld\n", + (long)ts.tv_sec, + (long)ts.tv_nsec); + } +} + +static void recvpacket(int sock, int recvmsg_flags, + int siocgstamp, int siocgstampns) +{ + char data[256]; + struct msghdr msg; + struct iovec entry; + struct sockaddr_in from_addr; + struct { + struct cmsghdr cm; + char control[512]; + } control; + int res; + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &entry; + msg.msg_iovlen = 1; + entry.iov_base = data; + entry.iov_len = sizeof(data); + msg.msg_name = (caddr_t)&from_addr; + msg.msg_namelen = sizeof(from_addr); + msg.msg_control = &control; + msg.msg_controllen = sizeof(control); + + res = recvmsg(sock, &msg, recvmsg_flags|MSG_DONTWAIT); + if (res < 0) { + printf("%s %s: %s\n", + "recvmsg", + (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular", + strerror(errno)); + } else { + printpacket(&msg, res, data, + sock, recvmsg_flags, + siocgstamp, siocgstampns); + } +} + +int main(int argc, char **argv) +{ + int so_timestamping_flags = 0; + int so_timestamp = 0; + int so_timestampns = 0; + int siocgstamp = 0; + int siocgstampns = 0; + int ip_multicast_loop = 0; + char *interface; + int i; + int enabled = 1; + int sock; + struct ifreq device; + struct ifreq hwtstamp; + struct hwtstamp_config hwconfig, hwconfig_requested; + struct sockaddr_in addr; + struct ip_mreq imr; + struct in_addr iaddr; + int val; + socklen_t len; + struct timeval next; + + if (argc < 2) + usage(0); + interface = argv[1]; + + for (i = 2; i < argc; i++) { + if (!strcasecmp(argv[i], "SO_TIMESTAMP")) + so_timestamp = 1; + else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS")) + so_timestampns = 1; + else if (!strcasecmp(argv[i], "SIOCGSTAMP")) + siocgstamp = 1; + else if (!strcasecmp(argv[i], "SIOCGSTAMPNS")) + siocgstampns = 1; + else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP")) + ip_multicast_loop = 1; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE; + else + usage(argv[i]); + } + + sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (sock < 0) + bail("socket"); + + memset(&device, 0, sizeof(device)); + strncpy(device.ifr_name, interface, sizeof(device.ifr_name)); + if (ioctl(sock, SIOCGIFADDR, &device) < 0) + bail("getting interface IP address"); + + memset(&hwtstamp, 0, sizeof(hwtstamp)); + strncpy(hwtstamp.ifr_name, interface, sizeof(hwtstamp.ifr_name)); + hwtstamp.ifr_data = (void *)&hwconfig; + memset(&hwconfig, 0, sizeof(hwconfig)); + hwconfig.tx_type = + (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ? + HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; + hwconfig.rx_filter = + (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ? + HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE; + hwconfig_requested = hwconfig; + if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) { + if ((errno == EINVAL || errno == ENOTSUP) && + hwconfig_requested.tx_type == HWTSTAMP_TX_OFF && + hwconfig_requested.rx_filter == HWTSTAMP_FILTER_NONE) + printf("SIOCSHWTSTAMP: disabling hardware time stamping not possible\n"); + else + bail("SIOCSHWTSTAMP"); + } + printf("SIOCSHWTSTAMP: tx_type %d requested, got %d; rx_filter %d requested, got %d\n", + hwconfig_requested.tx_type, hwconfig.tx_type, + hwconfig_requested.rx_filter, hwconfig.rx_filter); + + /* bind to PTP port */ + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = htons(319 /* PTP event port */); + if (bind(sock, + (struct sockaddr *)&addr, + sizeof(struct sockaddr_in)) < 0) + bail("bind"); + + /* set multicast group for outgoing packets */ + inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */ + addr.sin_addr = iaddr; + imr.imr_multiaddr.s_addr = iaddr.s_addr; + imr.imr_interface.s_addr = + ((struct sockaddr_in *)&device.ifr_addr)->sin_addr.s_addr; + if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_IF, + &imr.imr_interface.s_addr, sizeof(struct in_addr)) < 0) + bail("set multicast"); + + /* join multicast group, loop our own packet */ + if (setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP, + &imr, sizeof(struct ip_mreq)) < 0) + bail("join multicast group"); + + if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_LOOP, + &ip_multicast_loop, sizeof(enabled)) < 0) { + bail("loop multicast"); + } + + /* set socket options for time stamping */ + if (so_timestamp && + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, + &enabled, sizeof(enabled)) < 0) + bail("setsockopt SO_TIMESTAMP"); + + if (so_timestampns && + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, + &enabled, sizeof(enabled)) < 0) + bail("setsockopt SO_TIMESTAMPNS"); + + if (so_timestamping_flags && + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, + &so_timestamping_flags, + sizeof(so_timestamping_flags)) < 0) + bail("setsockopt SO_TIMESTAMPING"); + + /* request IP_PKTINFO for debugging purposes */ + if (setsockopt(sock, SOL_IP, IP_PKTINFO, + &enabled, sizeof(enabled)) < 0) + printf("%s: %s\n", "setsockopt IP_PKTINFO", strerror(errno)); + + /* verify socket options */ + len = sizeof(val); + if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, &val, &len) < 0) + printf("%s: %s\n", "getsockopt SO_TIMESTAMP", strerror(errno)); + else + printf("SO_TIMESTAMP %d\n", val); + + if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, &val, &len) < 0) + printf("%s: %s\n", "getsockopt SO_TIMESTAMPNS", + strerror(errno)); + else + printf("SO_TIMESTAMPNS %d\n", val); + + if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) { + printf("%s: %s\n", "getsockopt SO_TIMESTAMPING", + strerror(errno)); + } else { + printf("SO_TIMESTAMPING %d\n", val); + if (val != so_timestamping_flags) + printf(" not the expected value %d\n", + so_timestamping_flags); + } + + /* send packets forever every five seconds */ + gettimeofday(&next, 0); + next.tv_sec = (next.tv_sec + 1) / 5 * 5; + next.tv_usec = 0; + while (1) { + struct timeval now; + struct timeval delta; + long delta_us; + int res; + fd_set readfs, errorfs; + + gettimeofday(&now, 0); + delta_us = (long)(next.tv_sec - now.tv_sec) * 1000000 + + (long)(next.tv_usec - now.tv_usec); + if (delta_us > 0) { + /* continue waiting for timeout or data */ + delta.tv_sec = delta_us / 1000000; + delta.tv_usec = delta_us % 1000000; + + FD_ZERO(&readfs); + FD_ZERO(&errorfs); + FD_SET(sock, &readfs); + FD_SET(sock, &errorfs); + printf("%ld.%06ld: select %ldus\n", + (long)now.tv_sec, (long)now.tv_usec, + delta_us); + res = select(sock + 1, &readfs, 0, &errorfs, &delta); + gettimeofday(&now, 0); + printf("%ld.%06ld: select returned: %d, %s\n", + (long)now.tv_sec, (long)now.tv_usec, + res, + res < 0 ? strerror(errno) : "success"); + if (res > 0) { + if (FD_ISSET(sock, &readfs)) + printf("ready for reading\n"); + if (FD_ISSET(sock, &errorfs)) + printf("has error\n"); + recvpacket(sock, 0, + siocgstamp, + siocgstampns); + recvpacket(sock, MSG_ERRQUEUE, + siocgstamp, + siocgstampns); + } + } else { + /* write one packet */ + sendpacket(sock, + (struct sockaddr *)&addr, + sizeof(addr)); + next.tv_sec += 5; + continue; + } + } + + return 0; +} diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c new file mode 100644 index 000000000000..5df07047ca86 --- /dev/null +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c @@ -0,0 +1,549 @@ +/* + * Copyright 2014 Google Inc. + * Author: willemb@google.com (Willem de Bruijn) + * + * Test software tx timestamping, including + * + * - SCHED, SND and ACK timestamps + * - RAW, UDP and TCP + * - IPv4 and IPv6 + * - various packet sizes (to test GSO and TSO) + * + * Consult the command line arguments for help on running + * the various testcases. + * + * This test requires a dummy TCP server. + * A simple `nc6 [-u] -l -p $DESTPORT` will do + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <asm/types.h> +#include <error.h> +#include <errno.h> +#include <inttypes.h> +#include <linux/errqueue.h> +#include <linux/if_ether.h> +#include <linux/net_tstamp.h> +#include <netdb.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/udp.h> +#include <netinet/tcp.h> +#include <netpacket/packet.h> +#include <poll.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/select.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +/* command line parameters */ +static int cfg_proto = SOCK_STREAM; +static int cfg_ipproto = IPPROTO_TCP; +static int cfg_num_pkts = 4; +static int do_ipv4 = 1; +static int do_ipv6 = 1; +static int cfg_payload_len = 10; +static bool cfg_show_payload; +static bool cfg_do_pktinfo; +static bool cfg_loop_nodata; +static uint16_t dest_port = 9000; + +static struct sockaddr_in daddr; +static struct sockaddr_in6 daddr6; +static struct timespec ts_prev; + +static void __print_timestamp(const char *name, struct timespec *cur, + uint32_t key, int payload_len) +{ + if (!(cur->tv_sec | cur->tv_nsec)) + return; + + fprintf(stderr, " %s: %lu s %lu us (seq=%u, len=%u)", + name, cur->tv_sec, cur->tv_nsec / 1000, + key, payload_len); + + if ((ts_prev.tv_sec | ts_prev.tv_nsec)) { + int64_t cur_ms, prev_ms; + + cur_ms = (long) cur->tv_sec * 1000 * 1000; + cur_ms += cur->tv_nsec / 1000; + + prev_ms = (long) ts_prev.tv_sec * 1000 * 1000; + prev_ms += ts_prev.tv_nsec / 1000; + + fprintf(stderr, " (%+" PRId64 " us)", cur_ms - prev_ms); + } + + ts_prev = *cur; + fprintf(stderr, "\n"); +} + +static void print_timestamp_usr(void) +{ + struct timespec ts; + struct timeval tv; /* avoid dependency on -lrt */ + + gettimeofday(&tv, NULL); + ts.tv_sec = tv.tv_sec; + ts.tv_nsec = tv.tv_usec * 1000; + + __print_timestamp(" USR", &ts, 0, 0); +} + +static void print_timestamp(struct scm_timestamping *tss, int tstype, + int tskey, int payload_len) +{ + const char *tsname; + + switch (tstype) { + case SCM_TSTAMP_SCHED: + tsname = " ENQ"; + break; + case SCM_TSTAMP_SND: + tsname = " SND"; + break; + case SCM_TSTAMP_ACK: + tsname = " ACK"; + break; + default: + error(1, 0, "unknown timestamp type: %u", + tstype); + } + __print_timestamp(tsname, &tss->ts[0], tskey, payload_len); +} + +/* TODO: convert to check_and_print payload once API is stable */ +static void print_payload(char *data, int len) +{ + int i; + + if (!len) + return; + + if (len > 70) + len = 70; + + fprintf(stderr, "payload: "); + for (i = 0; i < len; i++) + fprintf(stderr, "%02hhx ", data[i]); + fprintf(stderr, "\n"); +} + +static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr) +{ + char sa[INET6_ADDRSTRLEN], da[INET6_ADDRSTRLEN]; + + fprintf(stderr, " pktinfo: ifindex=%u src=%s dst=%s\n", + ifindex, + saddr ? inet_ntop(family, saddr, sa, sizeof(sa)) : "unknown", + daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown"); +} + +static void __poll(int fd) +{ + struct pollfd pollfd; + int ret; + + memset(&pollfd, 0, sizeof(pollfd)); + pollfd.fd = fd; + ret = poll(&pollfd, 1, 100); + if (ret != 1) + error(1, errno, "poll"); +} + +static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) +{ + struct sock_extended_err *serr = NULL; + struct scm_timestamping *tss = NULL; + struct cmsghdr *cm; + int batch = 0; + + for (cm = CMSG_FIRSTHDR(msg); + cm && cm->cmsg_len; + cm = CMSG_NXTHDR(msg, cm)) { + if (cm->cmsg_level == SOL_SOCKET && + cm->cmsg_type == SCM_TIMESTAMPING) { + tss = (void *) CMSG_DATA(cm); + } else if ((cm->cmsg_level == SOL_IP && + cm->cmsg_type == IP_RECVERR) || + (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type == IPV6_RECVERR)) { + serr = (void *) CMSG_DATA(cm); + if (serr->ee_errno != ENOMSG || + serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) { + fprintf(stderr, "unknown ip error %d %d\n", + serr->ee_errno, + serr->ee_origin); + serr = NULL; + } + } else if (cm->cmsg_level == SOL_IP && + cm->cmsg_type == IP_PKTINFO) { + struct in_pktinfo *info = (void *) CMSG_DATA(cm); + print_pktinfo(AF_INET, info->ipi_ifindex, + &info->ipi_spec_dst, &info->ipi_addr); + } else if (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type == IPV6_PKTINFO) { + struct in6_pktinfo *info6 = (void *) CMSG_DATA(cm); + print_pktinfo(AF_INET6, info6->ipi6_ifindex, + NULL, &info6->ipi6_addr); + } else + fprintf(stderr, "unknown cmsg %d,%d\n", + cm->cmsg_level, cm->cmsg_type); + + if (serr && tss) { + print_timestamp(tss, serr->ee_info, serr->ee_data, + payload_len); + serr = NULL; + tss = NULL; + batch++; + } + } + + if (batch > 1) + fprintf(stderr, "batched %d timestamps\n", batch); +} + +static int recv_errmsg(int fd) +{ + static char ctrl[1024 /* overprovision*/]; + static struct msghdr msg; + struct iovec entry; + static char *data; + int ret = 0; + + data = malloc(cfg_payload_len); + if (!data) + error(1, 0, "malloc"); + + memset(&msg, 0, sizeof(msg)); + memset(&entry, 0, sizeof(entry)); + memset(ctrl, 0, sizeof(ctrl)); + + entry.iov_base = data; + entry.iov_len = cfg_payload_len; + msg.msg_iov = &entry; + msg.msg_iovlen = 1; + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_control = ctrl; + msg.msg_controllen = sizeof(ctrl); + + ret = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (ret == -1 && errno != EAGAIN) + error(1, errno, "recvmsg"); + + if (ret >= 0) { + __recv_errmsg_cmsg(&msg, ret); + if (cfg_show_payload) + print_payload(data, cfg_payload_len); + } + + free(data); + return ret == -1; +} + +static void do_test(int family, unsigned int opt) +{ + char *buf; + int fd, i, val = 1, total_len; + + if (family == AF_INET6 && cfg_proto != SOCK_STREAM) { + /* due to lack of checksum generation code */ + fprintf(stderr, "test: skipping datagram over IPv6\n"); + return; + } + + total_len = cfg_payload_len; + if (cfg_proto == SOCK_RAW) { + total_len += sizeof(struct udphdr); + if (cfg_ipproto == IPPROTO_RAW) + total_len += sizeof(struct iphdr); + } + + buf = malloc(total_len); + if (!buf) + error(1, 0, "malloc"); + + fd = socket(family, cfg_proto, cfg_ipproto); + if (fd < 0) + error(1, errno, "socket"); + + if (cfg_proto == SOCK_STREAM) { + if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, + (char*) &val, sizeof(val))) + error(1, 0, "setsockopt no nagle"); + + if (family == PF_INET) { + if (connect(fd, (void *) &daddr, sizeof(daddr))) + error(1, errno, "connect ipv4"); + } else { + if (connect(fd, (void *) &daddr6, sizeof(daddr6))) + error(1, errno, "connect ipv6"); + } + } + + if (cfg_do_pktinfo) { + if (family == AF_INET6) { + if (setsockopt(fd, SOL_IPV6, IPV6_RECVPKTINFO, + &val, sizeof(val))) + error(1, errno, "setsockopt pktinfo ipv6"); + } else { + if (setsockopt(fd, SOL_IP, IP_PKTINFO, + &val, sizeof(val))) + error(1, errno, "setsockopt pktinfo ipv4"); + } + } + + opt |= SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_CMSG | + SOF_TIMESTAMPING_OPT_ID; + if (cfg_loop_nodata) + opt |= SOF_TIMESTAMPING_OPT_TSONLY; + + if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, + (char *) &opt, sizeof(opt))) + error(1, 0, "setsockopt timestamping"); + + for (i = 0; i < cfg_num_pkts; i++) { + memset(&ts_prev, 0, sizeof(ts_prev)); + memset(buf, 'a' + i, total_len); + + if (cfg_proto == SOCK_RAW) { + struct udphdr *udph; + int off = 0; + + if (cfg_ipproto == IPPROTO_RAW) { + struct iphdr *iph = (void *) buf; + + memset(iph, 0, sizeof(*iph)); + iph->ihl = 5; + iph->version = 4; + iph->ttl = 2; + iph->daddr = daddr.sin_addr.s_addr; + iph->protocol = IPPROTO_UDP; + /* kernel writes saddr, csum, len */ + + off = sizeof(*iph); + } + + udph = (void *) buf + off; + udph->source = ntohs(9000); /* random spoof */ + udph->dest = ntohs(dest_port); + udph->len = ntohs(sizeof(*udph) + cfg_payload_len); + udph->check = 0; /* not allowed for IPv6 */ + } + + print_timestamp_usr(); + if (cfg_proto != SOCK_STREAM) { + if (family == PF_INET) + val = sendto(fd, buf, total_len, 0, (void *) &daddr, sizeof(daddr)); + else + val = sendto(fd, buf, total_len, 0, (void *) &daddr6, sizeof(daddr6)); + } else { + val = send(fd, buf, cfg_payload_len, 0); + } + if (val != total_len) + error(1, errno, "send"); + + /* wait for all errors to be queued, else ACKs arrive OOO */ + usleep(50 * 1000); + + __poll(fd); + + while (!recv_errmsg(fd)) {} + } + + if (close(fd)) + error(1, errno, "close"); + + free(buf); + usleep(400 * 1000); +} + +static void __attribute__((noreturn)) usage(const char *filepath) +{ + fprintf(stderr, "\nUsage: %s [options] hostname\n" + "\nwhere options are:\n" + " -4: only IPv4\n" + " -6: only IPv6\n" + " -h: show this message\n" + " -I: request PKTINFO\n" + " -l N: send N bytes at a time\n" + " -n: set no-payload option\n" + " -r: use raw\n" + " -R: use raw (IP_HDRINCL)\n" + " -p N: connect to port N\n" + " -u: use udp\n" + " -x: show payload (up to 70 bytes)\n", + filepath); + exit(1); +} + +static void parse_opt(int argc, char **argv) +{ + int proto_count = 0; + char c; + + while ((c = getopt(argc, argv, "46hIl:np:rRux")) != -1) { + switch (c) { + case '4': + do_ipv6 = 0; + break; + case '6': + do_ipv4 = 0; + break; + case 'I': + cfg_do_pktinfo = true; + break; + case 'n': + cfg_loop_nodata = true; + break; + case 'r': + proto_count++; + cfg_proto = SOCK_RAW; + cfg_ipproto = IPPROTO_UDP; + break; + case 'R': + proto_count++; + cfg_proto = SOCK_RAW; + cfg_ipproto = IPPROTO_RAW; + break; + case 'u': + proto_count++; + cfg_proto = SOCK_DGRAM; + cfg_ipproto = IPPROTO_UDP; + break; + case 'l': + cfg_payload_len = strtoul(optarg, NULL, 10); + break; + case 'p': + dest_port = strtoul(optarg, NULL, 10); + break; + case 'x': + cfg_show_payload = true; + break; + case 'h': + default: + usage(argv[0]); + } + } + + if (!cfg_payload_len) + error(1, 0, "payload may not be nonzero"); + if (cfg_proto != SOCK_STREAM && cfg_payload_len > 1472) + error(1, 0, "udp packet might exceed expected MTU"); + if (!do_ipv4 && !do_ipv6) + error(1, 0, "pass -4 or -6, not both"); + if (proto_count > 1) + error(1, 0, "pass -r, -R or -u, not multiple"); + + if (optind != argc - 1) + error(1, 0, "missing required hostname argument"); +} + +static void resolve_hostname(const char *hostname) +{ + struct addrinfo *addrs, *cur; + int have_ipv4 = 0, have_ipv6 = 0; + + if (getaddrinfo(hostname, NULL, NULL, &addrs)) + error(1, errno, "getaddrinfo"); + + cur = addrs; + while (cur && !have_ipv4 && !have_ipv6) { + if (!have_ipv4 && cur->ai_family == AF_INET) { + memcpy(&daddr, cur->ai_addr, sizeof(daddr)); + daddr.sin_port = htons(dest_port); + have_ipv4 = 1; + } + else if (!have_ipv6 && cur->ai_family == AF_INET6) { + memcpy(&daddr6, cur->ai_addr, sizeof(daddr6)); + daddr6.sin6_port = htons(dest_port); + have_ipv6 = 1; + } + cur = cur->ai_next; + } + if (addrs) + freeaddrinfo(addrs); + + do_ipv4 &= have_ipv4; + do_ipv6 &= have_ipv6; +} + +static void do_main(int family) +{ + fprintf(stderr, "family: %s\n", + family == PF_INET ? "INET" : "INET6"); + + fprintf(stderr, "test SND\n"); + do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE); + + fprintf(stderr, "test ENQ\n"); + do_test(family, SOF_TIMESTAMPING_TX_SCHED); + + fprintf(stderr, "test ENQ + SND\n"); + do_test(family, SOF_TIMESTAMPING_TX_SCHED | + SOF_TIMESTAMPING_TX_SOFTWARE); + + if (cfg_proto == SOCK_STREAM) { + fprintf(stderr, "\ntest ACK\n"); + do_test(family, SOF_TIMESTAMPING_TX_ACK); + + fprintf(stderr, "\ntest SND + ACK\n"); + do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK); + + fprintf(stderr, "\ntest ENQ + SND + ACK\n"); + do_test(family, SOF_TIMESTAMPING_TX_SCHED | + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK); + } +} + +const char *sock_names[] = { NULL, "TCP", "UDP", "RAW" }; + +int main(int argc, char **argv) +{ + if (argc == 1) + usage(argv[0]); + + parse_opt(argc, argv); + resolve_hostname(argv[argc - 1]); + + fprintf(stderr, "protocol: %s\n", sock_names[cfg_proto]); + fprintf(stderr, "payload: %u\n", cfg_payload_len); + fprintf(stderr, "server port: %u\n", dest_port); + fprintf(stderr, "\n"); + + if (do_ipv4) + do_main(PF_INET); + if (do_ipv6) + do_main(PF_INET6); + + return 0; +} diff --git a/tools/testing/selftests/nsfs/Makefile b/tools/testing/selftests/nsfs/Makefile new file mode 100644 index 000000000000..2306054a901a --- /dev/null +++ b/tools/testing/selftests/nsfs/Makefile @@ -0,0 +1,12 @@ +TEST_PROGS := owner pidns + +CFLAGS := -Wall -Werror + +all: owner pidns +owner: owner.c +pidns: pidns.c + +clean: + $(RM) owner pidns + +include ../lib.mk diff --git a/tools/testing/selftests/nsfs/owner.c b/tools/testing/selftests/nsfs/owner.c new file mode 100644 index 000000000000..437205f8b714 --- /dev/null +++ b/tools/testing/selftests/nsfs/owner.c @@ -0,0 +1,91 @@ +#define _GNU_SOURCE +#include <sched.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/prctl.h> +#include <sys/wait.h> + +#define NSIO 0xb7 +#define NS_GET_USERNS _IO(NSIO, 0x1) + +#define pr_err(fmt, ...) \ + ({ \ + fprintf(stderr, "%s:%d:" fmt ": %m\n", \ + __func__, __LINE__, ##__VA_ARGS__); \ + 1; \ + }) + +int main(int argc, char *argvp[]) +{ + int pfd[2], ns, uns, init_uns; + struct stat st1, st2; + char path[128]; + pid_t pid; + char c; + + if (pipe(pfd)) + return 1; + + pid = fork(); + if (pid < 0) + return pr_err("fork"); + if (pid == 0) { + prctl(PR_SET_PDEATHSIG, SIGKILL); + if (unshare(CLONE_NEWUTS | CLONE_NEWUSER)) + return pr_err("unshare"); + close(pfd[0]); + close(pfd[1]); + while (1) + sleep(1); + return 0; + } + close(pfd[1]); + if (read(pfd[0], &c, 1) != 0) + return pr_err("Unable to read from pipe"); + close(pfd[0]); + + snprintf(path, sizeof(path), "/proc/%d/ns/uts", pid); + ns = open(path, O_RDONLY); + if (ns < 0) + return pr_err("Unable to open %s", path); + + uns = ioctl(ns, NS_GET_USERNS); + if (uns < 0) + return pr_err("Unable to get an owning user namespace"); + + if (fstat(uns, &st1)) + return pr_err("fstat"); + + snprintf(path, sizeof(path), "/proc/%d/ns/user", pid); + if (stat(path, &st2)) + return pr_err("stat"); + + if (st1.st_ino != st2.st_ino) + return pr_err("NS_GET_USERNS returned a wrong namespace"); + + init_uns = ioctl(uns, NS_GET_USERNS); + if (uns < 0) + return pr_err("Unable to get an owning user namespace"); + + if (ioctl(init_uns, NS_GET_USERNS) >= 0 || errno != EPERM) + return pr_err("Don't get EPERM"); + + if (unshare(CLONE_NEWUSER)) + return pr_err("unshare"); + + if (ioctl(ns, NS_GET_USERNS) >= 0 || errno != EPERM) + return pr_err("Don't get EPERM"); + if (ioctl(init_uns, NS_GET_USERNS) >= 0 || errno != EPERM) + return pr_err("Don't get EPERM"); + + kill(pid, SIGKILL); + wait(NULL); + return 0; +} diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/nsfs/pidns.c new file mode 100644 index 000000000000..ae3a0d68e966 --- /dev/null +++ b/tools/testing/selftests/nsfs/pidns.c @@ -0,0 +1,78 @@ +#define _GNU_SOURCE +#include <sched.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/prctl.h> +#include <sys/wait.h> + +#define pr_err(fmt, ...) \ + ({ \ + fprintf(stderr, "%s:%d:" fmt ": %m\n", \ + __func__, __LINE__, ##__VA_ARGS__); \ + 1; \ + }) + +#define NSIO 0xb7 +#define NS_GET_USERNS _IO(NSIO, 0x1) +#define NS_GET_PARENT _IO(NSIO, 0x2) + +#define __stack_aligned__ __attribute__((aligned(16))) +struct cr_clone_arg { + char stack[128] __stack_aligned__; + char stack_ptr[0]; +}; + +static int child(void *args) +{ + prctl(PR_SET_PDEATHSIG, SIGKILL); + while (1) + sleep(1); + exit(0); +} + +int main(int argc, char *argv[]) +{ + char *ns_strs[] = {"pid", "user"}; + char path[] = "/proc/0123456789/ns/pid"; + struct cr_clone_arg ca; + struct stat st1, st2; + int ns, pns, i; + pid_t pid; + + pid = clone(child, ca.stack_ptr, CLONE_NEWUSER | CLONE_NEWPID | SIGCHLD, NULL); + if (pid < 0) + return pr_err("clone"); + + for (i = 0; i < 2; i++) { + snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns_strs[i]); + ns = open(path, O_RDONLY); + if (ns < 0) + return pr_err("Unable to open %s", path); + + pns = ioctl(ns, NS_GET_PARENT); + if (pns < 0) + return pr_err("Unable to get a parent pidns"); + + snprintf(path, sizeof(path), "/proc/self/ns/%s", ns_strs[i]); + if (stat(path, &st2)) + return pr_err("Unable to stat %s", path); + if (fstat(pns, &st1)) + return pr_err("Unable to stat the parent pidns"); + if (st1.st_ino != st2.st_ino) + return pr_err("NS_GET_PARENT returned a wrong namespace"); + + if (ioctl(pns, NS_GET_PARENT) >= 0 || errno != EPERM) + return pr_err("Don't get EPERM");; + } + + kill(pid, SIGKILL); + wait(NULL); + return 0; +} diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 1cc6d64c39b7..db54a33f850f 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -19,6 +19,7 @@ SUB_DIRS = alignment \ dscr \ mm \ pmu \ + signal \ primitives \ stringloops \ switch_endian \ diff --git a/tools/testing/selftests/powerpc/copyloops/asm/export.h b/tools/testing/selftests/powerpc/copyloops/asm/export.h new file mode 100644 index 000000000000..2d14a9b4248c --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/asm/export.h @@ -0,0 +1 @@ +#define EXPORT_SYMBOL(x) diff --git a/tools/testing/selftests/powerpc/fpu_asm.h b/tools/testing/selftests/powerpc/fpu_asm.h new file mode 100644 index 000000000000..6a387d255e27 --- /dev/null +++ b/tools/testing/selftests/powerpc/fpu_asm.h @@ -0,0 +1,80 @@ +/* + * Copyright 2016, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _SELFTESTS_POWERPC_FPU_ASM_H +#define _SELFTESTS_POWERPC_FPU_ASM_H +#include "basic_asm.h" + +#define PUSH_FPU(stack_size) \ + stfd f31,(stack_size + STACK_FRAME_MIN_SIZE)(%r1); \ + stfd f30,(stack_size + STACK_FRAME_MIN_SIZE - 8)(%r1); \ + stfd f29,(stack_size + STACK_FRAME_MIN_SIZE - 16)(%r1); \ + stfd f28,(stack_size + STACK_FRAME_MIN_SIZE - 24)(%r1); \ + stfd f27,(stack_size + STACK_FRAME_MIN_SIZE - 32)(%r1); \ + stfd f26,(stack_size + STACK_FRAME_MIN_SIZE - 40)(%r1); \ + stfd f25,(stack_size + STACK_FRAME_MIN_SIZE - 48)(%r1); \ + stfd f24,(stack_size + STACK_FRAME_MIN_SIZE - 56)(%r1); \ + stfd f23,(stack_size + STACK_FRAME_MIN_SIZE - 64)(%r1); \ + stfd f22,(stack_size + STACK_FRAME_MIN_SIZE - 72)(%r1); \ + stfd f21,(stack_size + STACK_FRAME_MIN_SIZE - 80)(%r1); \ + stfd f20,(stack_size + STACK_FRAME_MIN_SIZE - 88)(%r1); \ + stfd f19,(stack_size + STACK_FRAME_MIN_SIZE - 96)(%r1); \ + stfd f18,(stack_size + STACK_FRAME_MIN_SIZE - 104)(%r1); \ + stfd f17,(stack_size + STACK_FRAME_MIN_SIZE - 112)(%r1); \ + stfd f16,(stack_size + STACK_FRAME_MIN_SIZE - 120)(%r1); \ + stfd f15,(stack_size + STACK_FRAME_MIN_SIZE - 128)(%r1); \ + stfd f14,(stack_size + STACK_FRAME_MIN_SIZE - 136)(%r1); + +#define POP_FPU(stack_size) \ + lfd f31,(stack_size + STACK_FRAME_MIN_SIZE)(%r1); \ + lfd f30,(stack_size + STACK_FRAME_MIN_SIZE - 8)(%r1); \ + lfd f29,(stack_size + STACK_FRAME_MIN_SIZE - 16)(%r1); \ + lfd f28,(stack_size + STACK_FRAME_MIN_SIZE - 24)(%r1); \ + lfd f27,(stack_size + STACK_FRAME_MIN_SIZE - 32)(%r1); \ + lfd f26,(stack_size + STACK_FRAME_MIN_SIZE - 40)(%r1); \ + lfd f25,(stack_size + STACK_FRAME_MIN_SIZE - 48)(%r1); \ + lfd f24,(stack_size + STACK_FRAME_MIN_SIZE - 56)(%r1); \ + lfd f23,(stack_size + STACK_FRAME_MIN_SIZE - 64)(%r1); \ + lfd f22,(stack_size + STACK_FRAME_MIN_SIZE - 72)(%r1); \ + lfd f21,(stack_size + STACK_FRAME_MIN_SIZE - 80)(%r1); \ + lfd f20,(stack_size + STACK_FRAME_MIN_SIZE - 88)(%r1); \ + lfd f19,(stack_size + STACK_FRAME_MIN_SIZE - 96)(%r1); \ + lfd f18,(stack_size + STACK_FRAME_MIN_SIZE - 104)(%r1); \ + lfd f17,(stack_size + STACK_FRAME_MIN_SIZE - 112)(%r1); \ + lfd f16,(stack_size + STACK_FRAME_MIN_SIZE - 120)(%r1); \ + lfd f15,(stack_size + STACK_FRAME_MIN_SIZE - 128)(%r1); \ + lfd f14,(stack_size + STACK_FRAME_MIN_SIZE - 136)(%r1); + +/* + * Careful calling this, it will 'clobber' fpu (by design) + * Don't call this from C + */ +FUNC_START(load_fpu) + lfd f14,0(r3) + lfd f15,8(r3) + lfd f16,16(r3) + lfd f17,24(r3) + lfd f18,32(r3) + lfd f19,40(r3) + lfd f20,48(r3) + lfd f21,56(r3) + lfd f22,64(r3) + lfd f23,72(r3) + lfd f24,80(r3) + lfd f25,88(r3) + lfd f26,96(r3) + lfd f27,104(r3) + lfd f28,112(r3) + lfd f29,120(r3) + lfd f30,128(r3) + lfd f31,136(r3) + blr +FUNC_END(load_fpu) + +#endif /* _SELFTESTS_POWERPC_FPU_ASM_H */ diff --git a/tools/testing/selftests/powerpc/gpr_asm.h b/tools/testing/selftests/powerpc/gpr_asm.h new file mode 100644 index 000000000000..f6f38852d3a0 --- /dev/null +++ b/tools/testing/selftests/powerpc/gpr_asm.h @@ -0,0 +1,96 @@ +/* + * Copyright 2016, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _SELFTESTS_POWERPC_GPR_ASM_H +#define _SELFTESTS_POWERPC_GPR_ASM_H + +#include "basic_asm.h" + +#define __PUSH_NVREGS(top_pos); \ + std r31,(top_pos)(%r1); \ + std r30,(top_pos - 8)(%r1); \ + std r29,(top_pos - 16)(%r1); \ + std r28,(top_pos - 24)(%r1); \ + std r27,(top_pos - 32)(%r1); \ + std r26,(top_pos - 40)(%r1); \ + std r25,(top_pos - 48)(%r1); \ + std r24,(top_pos - 56)(%r1); \ + std r23,(top_pos - 64)(%r1); \ + std r22,(top_pos - 72)(%r1); \ + std r21,(top_pos - 80)(%r1); \ + std r20,(top_pos - 88)(%r1); \ + std r19,(top_pos - 96)(%r1); \ + std r18,(top_pos - 104)(%r1); \ + std r17,(top_pos - 112)(%r1); \ + std r16,(top_pos - 120)(%r1); \ + std r15,(top_pos - 128)(%r1); \ + std r14,(top_pos - 136)(%r1) + +#define __POP_NVREGS(top_pos); \ + ld r31,(top_pos)(%r1); \ + ld r30,(top_pos - 8)(%r1); \ + ld r29,(top_pos - 16)(%r1); \ + ld r28,(top_pos - 24)(%r1); \ + ld r27,(top_pos - 32)(%r1); \ + ld r26,(top_pos - 40)(%r1); \ + ld r25,(top_pos - 48)(%r1); \ + ld r24,(top_pos - 56)(%r1); \ + ld r23,(top_pos - 64)(%r1); \ + ld r22,(top_pos - 72)(%r1); \ + ld r21,(top_pos - 80)(%r1); \ + ld r20,(top_pos - 88)(%r1); \ + ld r19,(top_pos - 96)(%r1); \ + ld r18,(top_pos - 104)(%r1); \ + ld r17,(top_pos - 112)(%r1); \ + ld r16,(top_pos - 120)(%r1); \ + ld r15,(top_pos - 128)(%r1); \ + ld r14,(top_pos - 136)(%r1) + +#define PUSH_NVREGS(stack_size) \ + __PUSH_NVREGS(stack_size + STACK_FRAME_MIN_SIZE) + +/* 18 NV FPU REGS */ +#define PUSH_NVREGS_BELOW_FPU(stack_size) \ + __PUSH_NVREGS(stack_size + STACK_FRAME_MIN_SIZE - (18 * 8)) + +#define POP_NVREGS(stack_size) \ + __POP_NVREGS(stack_size + STACK_FRAME_MIN_SIZE) + +/* 18 NV FPU REGS */ +#define POP_NVREGS_BELOW_FPU(stack_size) \ + __POP_NVREGS(stack_size + STACK_FRAME_MIN_SIZE - (18 * 8)) + +/* + * Careful calling this, it will 'clobber' NVGPRs (by design) + * Don't call this from C + */ +FUNC_START(load_gpr) + ld r14,0(r3) + ld r15,8(r3) + ld r16,16(r3) + ld r17,24(r3) + ld r18,32(r3) + ld r19,40(r3) + ld r20,48(r3) + ld r21,56(r3) + ld r22,64(r3) + ld r23,72(r3) + ld r24,80(r3) + ld r25,88(r3) + ld r26,96(r3) + ld r27,104(r3) + ld r28,112(r3) + ld r29,120(r3) + ld r30,128(r3) + ld r31,136(r3) + blr +FUNC_END(load_gpr) + + +#endif /* _SELFTESTS_POWERPC_GPR_ASM_H */ diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index 52f9be7f61f0..248a820048df 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -19,9 +19,9 @@ #include "subunit.h" #include "utils.h" -#define TIMEOUT 120 #define KILL_TIMEOUT 5 +static uint64_t timeout = 120; int run_test(int (test_function)(void), char *name) { @@ -44,7 +44,7 @@ int run_test(int (test_function)(void), char *name) setpgid(pid, pid); /* Wake us up in timeout seconds */ - alarm(TIMEOUT); + alarm(timeout); terminated = false; wait: @@ -94,6 +94,11 @@ static struct sigaction alarm_action = { .sa_handler = alarm_handler, }; +void test_harness_set_timeout(uint64_t time) +{ + timeout = time; +} + int test_harness(int (test_function)(void), char *name) { int rc; diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore index 4fe13a439fd7..50ded63e25b7 100644 --- a/tools/testing/selftests/powerpc/math/.gitignore +++ b/tools/testing/selftests/powerpc/math/.gitignore @@ -4,3 +4,4 @@ fpu_preempt vmx_preempt fpu_signal vmx_signal +vsx_preempt diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile index 5b88875d5955..a505b66d408a 100644 --- a/tools/testing/selftests/powerpc/math/Makefile +++ b/tools/testing/selftests/powerpc/math/Makefile @@ -1,4 +1,4 @@ -TEST_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal +TEST_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt all: $(TEST_PROGS) @@ -13,6 +13,9 @@ vmx_syscall: vmx_asm.S vmx_preempt: vmx_asm.S vmx_signal: vmx_asm.S +vsx_preempt: CFLAGS += -mvsx +vsx_preempt: vsx_asm.S + include ../../lib.mk clean: diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S b/tools/testing/selftests/powerpc/math/fpu_asm.S index f3711d80e709..241f067a510f 100644 --- a/tools/testing/selftests/powerpc/math/fpu_asm.S +++ b/tools/testing/selftests/powerpc/math/fpu_asm.S @@ -8,70 +8,7 @@ */ #include "../basic_asm.h" - -#define PUSH_FPU(pos) \ - stfd f14,pos(sp); \ - stfd f15,pos+8(sp); \ - stfd f16,pos+16(sp); \ - stfd f17,pos+24(sp); \ - stfd f18,pos+32(sp); \ - stfd f19,pos+40(sp); \ - stfd f20,pos+48(sp); \ - stfd f21,pos+56(sp); \ - stfd f22,pos+64(sp); \ - stfd f23,pos+72(sp); \ - stfd f24,pos+80(sp); \ - stfd f25,pos+88(sp); \ - stfd f26,pos+96(sp); \ - stfd f27,pos+104(sp); \ - stfd f28,pos+112(sp); \ - stfd f29,pos+120(sp); \ - stfd f30,pos+128(sp); \ - stfd f31,pos+136(sp); - -#define POP_FPU(pos) \ - lfd f14,pos(sp); \ - lfd f15,pos+8(sp); \ - lfd f16,pos+16(sp); \ - lfd f17,pos+24(sp); \ - lfd f18,pos+32(sp); \ - lfd f19,pos+40(sp); \ - lfd f20,pos+48(sp); \ - lfd f21,pos+56(sp); \ - lfd f22,pos+64(sp); \ - lfd f23,pos+72(sp); \ - lfd f24,pos+80(sp); \ - lfd f25,pos+88(sp); \ - lfd f26,pos+96(sp); \ - lfd f27,pos+104(sp); \ - lfd f28,pos+112(sp); \ - lfd f29,pos+120(sp); \ - lfd f30,pos+128(sp); \ - lfd f31,pos+136(sp); - -# Careful calling this, it will 'clobber' fpu (by design) -# Don't call this from C -FUNC_START(load_fpu) - lfd f14,0(r3) - lfd f15,8(r3) - lfd f16,16(r3) - lfd f17,24(r3) - lfd f18,32(r3) - lfd f19,40(r3) - lfd f20,48(r3) - lfd f21,56(r3) - lfd f22,64(r3) - lfd f23,72(r3) - lfd f24,80(r3) - lfd f25,88(r3) - lfd f26,96(r3) - lfd f27,104(r3) - lfd f28,112(r3) - lfd f29,120(r3) - lfd f30,128(r3) - lfd f31,136(r3) - blr -FUNC_END(load_fpu) +#include "../fpu_asm.h" FUNC_START(check_fpu) mr r4,r3 @@ -138,9 +75,9 @@ FUNC_START(test_fpu) # r4 holds pointer to the pid # f14-f31 are non volatiles PUSH_BASIC_STACK(256) + PUSH_FPU(256) std r3,STACK_FRAME_PARAM(0)(sp) # Address of darray std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid - PUSH_FPU(STACK_FRAME_LOCAL(2,0)) bl load_fpu nop @@ -155,7 +92,7 @@ FUNC_START(test_fpu) bl check_fpu nop - POP_FPU(STACK_FRAME_LOCAL(2,0)) + POP_FPU(256) POP_BASIC_STACK(256) blr FUNC_END(test_fpu) @@ -166,10 +103,10 @@ FUNC_END(test_fpu) # registers while running is not zero. FUNC_START(preempt_fpu) PUSH_BASIC_STACK(256) + PUSH_FPU(256) std r3,STACK_FRAME_PARAM(0)(sp) # double *darray std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting std r5,STACK_FRAME_PARAM(2)(sp) # int *running - PUSH_FPU(STACK_FRAME_LOCAL(3,0)) bl load_fpu nop @@ -192,7 +129,7 @@ FUNC_START(preempt_fpu) cmpwi r5,0 bne 2b -3: POP_FPU(STACK_FRAME_LOCAL(3,0)) +3: POP_FPU(256) POP_BASIC_STACK(256) blr FUNC_END(preempt_fpu) diff --git a/tools/testing/selftests/powerpc/math/vmx_asm.S b/tools/testing/selftests/powerpc/math/vmx_asm.S index 1b8c248b3ac1..fd74da488625 100644 --- a/tools/testing/selftests/powerpc/math/vmx_asm.S +++ b/tools/testing/selftests/powerpc/math/vmx_asm.S @@ -8,90 +8,7 @@ */ #include "../basic_asm.h" - -# POS MUST BE 16 ALIGNED! -#define PUSH_VMX(pos,reg) \ - li reg,pos; \ - stvx v20,reg,sp; \ - addi reg,reg,16; \ - stvx v21,reg,sp; \ - addi reg,reg,16; \ - stvx v22,reg,sp; \ - addi reg,reg,16; \ - stvx v23,reg,sp; \ - addi reg,reg,16; \ - stvx v24,reg,sp; \ - addi reg,reg,16; \ - stvx v25,reg,sp; \ - addi reg,reg,16; \ - stvx v26,reg,sp; \ - addi reg,reg,16; \ - stvx v27,reg,sp; \ - addi reg,reg,16; \ - stvx v28,reg,sp; \ - addi reg,reg,16; \ - stvx v29,reg,sp; \ - addi reg,reg,16; \ - stvx v30,reg,sp; \ - addi reg,reg,16; \ - stvx v31,reg,sp; - -# POS MUST BE 16 ALIGNED! -#define POP_VMX(pos,reg) \ - li reg,pos; \ - lvx v20,reg,sp; \ - addi reg,reg,16; \ - lvx v21,reg,sp; \ - addi reg,reg,16; \ - lvx v22,reg,sp; \ - addi reg,reg,16; \ - lvx v23,reg,sp; \ - addi reg,reg,16; \ - lvx v24,reg,sp; \ - addi reg,reg,16; \ - lvx v25,reg,sp; \ - addi reg,reg,16; \ - lvx v26,reg,sp; \ - addi reg,reg,16; \ - lvx v27,reg,sp; \ - addi reg,reg,16; \ - lvx v28,reg,sp; \ - addi reg,reg,16; \ - lvx v29,reg,sp; \ - addi reg,reg,16; \ - lvx v30,reg,sp; \ - addi reg,reg,16; \ - lvx v31,reg,sp; - -# Carefull this will 'clobber' vmx (by design) -# Don't call this from C -FUNC_START(load_vmx) - li r5,0 - lvx v20,r5,r3 - addi r5,r5,16 - lvx v21,r5,r3 - addi r5,r5,16 - lvx v22,r5,r3 - addi r5,r5,16 - lvx v23,r5,r3 - addi r5,r5,16 - lvx v24,r5,r3 - addi r5,r5,16 - lvx v25,r5,r3 - addi r5,r5,16 - lvx v26,r5,r3 - addi r5,r5,16 - lvx v27,r5,r3 - addi r5,r5,16 - lvx v28,r5,r3 - addi r5,r5,16 - lvx v29,r5,r3 - addi r5,r5,16 - lvx v30,r5,r3 - addi r5,r5,16 - lvx v31,r5,r3 - blr -FUNC_END(load_vmx) +#include "../vmx_asm.h" # Should be safe from C, only touches r4, r5 and v0,v1,v2 FUNC_START(check_vmx) diff --git a/tools/testing/selftests/powerpc/math/vsx_asm.S b/tools/testing/selftests/powerpc/math/vsx_asm.S new file mode 100644 index 000000000000..a110dd882d5e --- /dev/null +++ b/tools/testing/selftests/powerpc/math/vsx_asm.S @@ -0,0 +1,61 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "../basic_asm.h" +#include "../vsx_asm.h" + +#long check_vsx(vector int *r3); +#This function wraps storeing VSX regs to the end of an array and a +#call to a comparison function in C which boils down to a memcmp() +FUNC_START(check_vsx) + PUSH_BASIC_STACK(32) + std r3,STACK_FRAME_PARAM(0)(sp) + addi r3, r3, 16 * 12 #Second half of array + bl store_vsx + ld r3,STACK_FRAME_PARAM(0)(sp) + bl vsx_memcmp + POP_BASIC_STACK(32) + blr +FUNC_END(check_vsx) + +# int preempt_vmx(vector int *varray, int *threads_starting, +# int *running); +# On starting will (atomically) decrement threads_starting as a signal +# that the VMX have been loaded with varray. Will proceed to check the +# validity of the VMX registers while running is not zero. +FUNC_START(preempt_vsx) + PUSH_BASIC_STACK(512) + std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray + std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting + std r5,STACK_FRAME_PARAM(2)(sp) # int *running + + bl load_vsx + nop + + sync + # Atomic DEC + ld r3,STACK_FRAME_PARAM(1)(sp) +1: lwarx r4,0,r3 + addi r4,r4,-1 + stwcx. r4,0,r3 + bne- 1b + +2: ld r3,STACK_FRAME_PARAM(0)(sp) + bl check_vsx + nop + cmpdi r3,0 + bne 3f + ld r4,STACK_FRAME_PARAM(2)(sp) + ld r5,0(r4) + cmpwi r5,0 + bne 2b + +3: POP_BASIC_STACK(512) + blr +FUNC_END(preempt_vsx) diff --git a/tools/testing/selftests/powerpc/math/vsx_preempt.c b/tools/testing/selftests/powerpc/math/vsx_preempt.c new file mode 100644 index 000000000000..6387f03a0a6a --- /dev/null +++ b/tools/testing/selftests/powerpc/math/vsx_preempt.c @@ -0,0 +1,147 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test attempts to see if the VSX registers change across preemption. + * There is no way to be sure preemption happened so this test just + * uses many threads and a long wait. As such, a successful test + * doesn't mean much but a failure is bad. + */ + +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdlib.h> +#include <pthread.h> + +#include "utils.h" + +/* Time to wait for workers to get preempted (seconds) */ +#define PREEMPT_TIME 20 +/* + * Factor by which to multiply number of online CPUs for total number of + * worker threads + */ +#define THREAD_FACTOR 8 + +/* + * Ensure there is twice the number of non-volatile VMX regs! + * check_vmx() is going to use the other half as space to put the live + * registers before calling vsx_memcmp() + */ +__thread vector int varray[24] = { + {1, 2, 3, 4 }, {5, 6, 7, 8 }, {9, 10,11,12}, + {13,14,15,16}, {17,18,19,20}, {21,22,23,24}, + {25,26,27,28}, {29,30,31,32}, {33,34,35,36}, + {37,38,39,40}, {41,42,43,44}, {45,46,47,48} +}; + +int threads_starting; +int running; + +extern long preempt_vsx(vector int *varray, int *threads_starting, int *running); + +long vsx_memcmp(vector int *a) { + vector int zero = {0, 0, 0, 0}; + int i; + + FAIL_IF(a != varray); + + for(i = 0; i < 12; i++) { + if (memcmp(&a[i + 12], &zero, sizeof(vector int)) == 0) { + fprintf(stderr, "Detected zero from the VSX reg %d\n", i + 12); + return 2; + } + } + + if (memcmp(a, &a[12], 12 * sizeof(vector int))) { + long *p = (long *)a; + fprintf(stderr, "VSX mismatch\n"); + for (i = 0; i < 24; i=i+2) + fprintf(stderr, "%d: 0x%08lx%08lx | 0x%08lx%08lx\n", + i/2 + i%2 + 20, p[i], p[i + 1], p[i + 24], p[i + 25]); + return 1; + } + return 0; +} + +void *preempt_vsx_c(void *p) +{ + int i, j; + long rc; + srand(pthread_self()); + for (i = 0; i < 12; i++) + for (j = 0; j < 4; j++) { + varray[i][j] = rand(); + /* Don't want zero because it hides kernel problems */ + if (varray[i][j] == 0) + j--; + } + rc = preempt_vsx(varray, &threads_starting, &running); + if (rc == 2) + fprintf(stderr, "Caught zeros in VSX compares\n"); + return (void *)rc; +} + +int test_preempt_vsx(void) +{ + int i, rc, threads; + pthread_t *tids; + + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; + tids = malloc(threads * sizeof(pthread_t)); + FAIL_IF(!tids); + + running = true; + threads_starting = threads; + for (i = 0; i < threads; i++) { + rc = pthread_create(&tids[i], NULL, preempt_vsx_c, NULL); + FAIL_IF(rc); + } + + setbuf(stdout, NULL); + /* Not really nessesary but nice to wait for every thread to start */ + printf("\tWaiting for %d workers to start...", threads_starting); + while(threads_starting) + asm volatile("": : :"memory"); + printf("done\n"); + + printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME); + sleep(PREEMPT_TIME); + printf("done\n"); + + printf("\tStopping workers..."); + /* + * Working are checking this value every loop. In preempt_vsx 'cmpwi r5,0; bne 2b'. + * r5 will have loaded the value of running. + */ + running = 0; + for (i = 0; i < threads; i++) { + void *rc_p; + pthread_join(tids[i], &rc_p); + + /* + * Harness will say the fail was here, look at why preempt_vsx + * returned + */ + if ((long) rc_p) + printf("oops\n"); + FAIL_IF((long) rc_p); + } + printf("done\n"); + + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_preempt_vsx, "vsx_preempt"); +} diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore new file mode 100644 index 000000000000..1b89224a8aab --- /dev/null +++ b/tools/testing/selftests/powerpc/signal/.gitignore @@ -0,0 +1,2 @@ +signal +signal_tm diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile new file mode 100644 index 000000000000..f0eef27458e2 --- /dev/null +++ b/tools/testing/selftests/powerpc/signal/Makefile @@ -0,0 +1,13 @@ +TEST_PROGS := signal signal_tm + +all: $(TEST_PROGS) + +$(TEST_PROGS): ../harness.c ../utils.c signal.S + +CFLAGS += -maltivec +signal_tm: CFLAGS += -mhtm + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) *.o diff --git a/tools/testing/selftests/powerpc/signal/signal.S b/tools/testing/selftests/powerpc/signal/signal.S new file mode 100644 index 000000000000..7043d521df0a --- /dev/null +++ b/tools/testing/selftests/powerpc/signal/signal.S @@ -0,0 +1,50 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "../basic_asm.h" + +/* long signal_self(pid_t pid, int sig); */ +FUNC_START(signal_self) + li r0,37 /* sys_kill */ + /* r3 already has our pid in it */ + /* r4 already has signal type in it */ + sc + bc 4,3,1f + subfze r3,r3 +1: blr +FUNC_END(signal_self) + +/* long tm_signal_self(pid_t pid, int sig, int *ret); */ +FUNC_START(tm_signal_self) + PUSH_BASIC_STACK(8) + std r5,STACK_FRAME_PARAM(0)(sp) /* ret */ + tbegin. + beq 1f + tsuspend. + li r0,37 /* sys_kill */ + /* r3 already has our pid in it */ + /* r4 already has signal type in it */ + sc + ld r5,STACK_FRAME_PARAM(0)(sp) /* ret */ + bc 4,3,2f + subfze r3,r3 +2: std r3,0(r5) + tabort. 0 + tresume. /* Be nice to some cleanup, jumps back to tbegin then to 1: */ + /* + * Transaction should be proper doomed and we should never get + * here + */ + li r3,1 + POP_BASIC_STACK(8) + blr +1: li r3,0 + POP_BASIC_STACK(8) + blr +FUNC_END(tm_signal_self) diff --git a/tools/testing/selftests/powerpc/signal/signal.c b/tools/testing/selftests/powerpc/signal/signal.c new file mode 100644 index 000000000000..e7dedd28b3c2 --- /dev/null +++ b/tools/testing/selftests/powerpc/signal/signal.c @@ -0,0 +1,111 @@ +/* + * Copyright 2016, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Sending one self a signal should always get delivered. + */ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <altivec.h> + +#include "utils.h" + +#define MAX_ATTEMPT 500000 +#define TIMEOUT 5 + +extern long signal_self(pid_t pid, int sig); + +static sig_atomic_t signaled; +static sig_atomic_t fail; + +static void signal_handler(int sig) +{ + if (sig == SIGUSR1) + signaled = 1; + else + fail = 1; +} + +static int test_signal() +{ + int i; + struct sigaction act; + pid_t ppid = getpid(); + pid_t pid; + + act.sa_handler = signal_handler; + act.sa_flags = 0; + sigemptyset(&act.sa_mask); + if (sigaction(SIGUSR1, &act, NULL) < 0) { + perror("sigaction SIGUSR1"); + exit(1); + } + if (sigaction(SIGALRM, &act, NULL) < 0) { + perror("sigaction SIGALRM"); + exit(1); + } + + /* Don't do this for MAX_ATTEMPT, its simply too long */ + for(i = 0; i < 1000; i++) { + pid = fork(); + if (pid == -1) { + perror("fork"); + exit(1); + } + if (pid == 0) { + signal_self(ppid, SIGUSR1); + exit(1); + } else { + alarm(0); /* Disable any pending */ + alarm(2); + while (!signaled && !fail) + asm volatile("": : :"memory"); + if (!signaled) { + fprintf(stderr, "Didn't get signal from child\n"); + FAIL_IF(1); /* For the line number */ + } + /* Otherwise we'll loop too fast and fork() will eventually fail */ + waitpid(pid, NULL, 0); + } + } + + for (i = 0; i < MAX_ATTEMPT; i++) { + long rc; + + alarm(0); /* Disable any pending */ + signaled = 0; + alarm(TIMEOUT); + rc = signal_self(ppid, SIGUSR1); + if (rc) { + fprintf(stderr, "(%d) Fail reason: %d rc=0x%lx", + i, fail, rc); + FAIL_IF(1); /* For the line number */ + } + while (!signaled && !fail) + asm volatile("": : :"memory"); + if (!signaled) { + fprintf(stderr, "(%d) Fail reason: %d rc=0x%lx", + i, fail, rc); + FAIL_IF(1); /* For the line number */ + } + } + + return 0; +} + +int main(void) +{ + test_harness_set_timeout(300); + return test_harness(test_signal, "signal"); +} diff --git a/tools/testing/selftests/powerpc/signal/signal_tm.c b/tools/testing/selftests/powerpc/signal/signal_tm.c new file mode 100644 index 000000000000..2e7451a37cc6 --- /dev/null +++ b/tools/testing/selftests/powerpc/signal/signal_tm.c @@ -0,0 +1,110 @@ +/* + * Copyright 2016, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Sending one self a signal should always get delivered. + */ + +#include <errno.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> + +#include <altivec.h> + +#include "utils.h" +#include "../tm/tm.h" + +#define MAX_ATTEMPT 500000 +#define TIMEOUT 10 + +extern long tm_signal_self(pid_t pid, int sig, long *ret); + +static sig_atomic_t signaled; +static sig_atomic_t fail; + +static void signal_handler(int sig) +{ + if (tcheck_active()) { + fail = 2; + return; + } + + if (sig == SIGUSR1) + signaled = 1; + else + fail = 1; +} + +static int test_signal_tm() +{ + int i; + struct sigaction act; + + act.sa_handler = signal_handler; + act.sa_flags = 0; + sigemptyset(&act.sa_mask); + if (sigaction(SIGUSR1, &act, NULL) < 0) { + perror("sigaction SIGUSR1"); + exit(1); + } + if (sigaction(SIGALRM, &act, NULL) < 0) { + perror("sigaction SIGALRM"); + exit(1); + } + + SKIP_IF(!have_htm()); + + for (i = 0; i < MAX_ATTEMPT; i++) { + /* + * If anything bad happens in ASM and we fail to set ret + * because *handwave* TM this will cause failure + */ + long ret = 0xdead; + long rc = 0xbeef; + + alarm(0); /* Disable any pending */ + signaled = 0; + alarm(TIMEOUT); + FAIL_IF(tcheck_transactional()); + rc = tm_signal_self(getpid(), SIGUSR1, &ret); + if (ret == 0xdead) + /* + * This basically means the transaction aborted before we + * even got to the suspend... this is crazy but it + * happens. + * Yes this also means we might never make forward + * progress... the alarm() will trip eventually... + */ + continue; + + if (rc || ret) { + /* Ret is actually an errno */ + printf("TEXASR 0x%016lx, TFIAR 0x%016lx\n", + __builtin_get_texasr(), __builtin_get_tfiar()); + fprintf(stderr, "(%d) Fail reason: %d rc=0x%lx ret=0x%lx\n", + i, fail, rc, ret); + FAIL_IF(ret); + } + while(!signaled && !fail) + asm volatile("": : :"memory"); + if (!signaled) { + fprintf(stderr, "(%d) Fail reason: %d rc=0x%lx ret=0x%lx\n", + i, fail, rc, ret); + FAIL_IF(fail); /* For the line number */ + } + } + + return 0; +} + +int main(void) +{ + return test_harness(test_signal_tm, "signal_tm"); +} diff --git a/tools/testing/selftests/powerpc/stringloops/asm/export.h b/tools/testing/selftests/powerpc/stringloops/asm/export.h new file mode 100644 index 000000000000..2d14a9b4248c --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/asm/export.h @@ -0,0 +1 @@ +#define EXPORT_SYMBOL(x) diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 82c0a9ce6e74..427621792229 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -7,3 +7,7 @@ tm-fork tm-tar tm-tmspr tm-exec +tm-signal-context-chk-fpu +tm-signal-context-chk-gpr +tm-signal-context-chk-vmx +tm-signal-context-chk-vsx diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 9d301d785d9e..c6c53c82fdd6 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -1,5 +1,8 @@ +SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu \ + tm-signal-context-chk-vmx tm-signal-context-chk-vsx + TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \ - tm-vmxcopy tm-fork tm-tar tm-tmspr tm-exec tm-execed + tm-vmxcopy tm-fork tm-tar tm-tmspr $(SIGNAL_CONTEXT_CHK_TESTS) all: $(TEST_PROGS) @@ -11,6 +14,9 @@ tm-syscall: tm-syscall-asm.S tm-syscall: CFLAGS += -I../../../../../usr/include tm-tmspr: CFLAGS += -pthread +$(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S +$(SIGNAL_CONTEXT_CHK_TESTS): CFLAGS += -mhtm -m64 -mvsx + include ../../lib.mk clean: diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c new file mode 100644 index 000000000000..c760debbd5ad --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c @@ -0,0 +1,92 @@ +/* + * Copyright 2016, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * + * Test the kernel's signal frame code. + * + * The kernel sets up two sets of ucontexts if the signal was to be + * delivered while the thread was in a transaction. + * Expected behaviour is that the checkpointed state is in the user + * context passed to the signal handler. The speculated state can be + * accessed with the uc_link pointer. + * + * The rationale for this is that if TM unaware code (which linked + * against TM libs) installs a signal handler it will not know of the + * speculative nature of the 'live' registers and may infer the wrong + * thing. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <signal.h> +#include <unistd.h> + +#include <altivec.h> + +#include "utils.h" +#include "tm.h" + +#define MAX_ATTEMPT 500000 + +#define NV_FPU_REGS 18 + +long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss); + +/* Be sure there are 2x as many as there are NV FPU regs (2x18) */ +static double fps[] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + -1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18 +}; + +static sig_atomic_t fail; + +static void signal_usr1(int signum, siginfo_t *info, void *uc) +{ + int i; + ucontext_t *ucp = uc; + ucontext_t *tm_ucp = ucp->uc_link; + + for (i = 0; i < NV_FPU_REGS && !fail; i++) { + fail = (ucp->uc_mcontext.fp_regs[i + 14] != fps[i]); + fail |= (tm_ucp->uc_mcontext.fp_regs[i + 14] != fps[i + NV_FPU_REGS]); + if (fail) + printf("Failed on %d FP %g or %g\n", i, ucp->uc_mcontext.fp_regs[i + 14], tm_ucp->uc_mcontext.fp_regs[i + 14]); + } +} + +static int tm_signal_context_chk_fpu() +{ + struct sigaction act; + int i; + long rc; + pid_t pid = getpid(); + + SKIP_IF(!have_htm()); + + act.sa_sigaction = signal_usr1; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + if (sigaction(SIGUSR1, &act, NULL) < 0) { + perror("sigaction sigusr1"); + exit(1); + } + + i = 0; + while (i < MAX_ATTEMPT && !fail) { + rc = tm_signal_self_context_load(pid, NULL, fps, NULL, NULL); + FAIL_IF(rc != pid); + i++; + } + + return fail; +} + +int main(void) +{ + return test_harness(tm_signal_context_chk_fpu, "tm_signal_context_chk_fpu"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c new file mode 100644 index 000000000000..df91330a08ef --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c @@ -0,0 +1,90 @@ +/* + * Copyright 2016, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * + * Test the kernel's signal frame code. + * + * The kernel sets up two sets of ucontexts if the signal was to be + * delivered while the thread was in a transaction. + * Expected behaviour is that the checkpointed state is in the user + * context passed to the signal handler. The speculated state can be + * accessed with the uc_link pointer. + * + * The rationale for this is that if TM unaware code (which linked + * against TM libs) installs a signal handler it will not know of the + * speculative nature of the 'live' registers and may infer the wrong + * thing. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <signal.h> +#include <unistd.h> + +#include <altivec.h> + +#include "utils.h" +#include "tm.h" + +#define MAX_ATTEMPT 500000 + +#define NV_GPR_REGS 18 + +long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss); + +static sig_atomic_t fail; + +static long gps[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + -1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18}; + +static void signal_usr1(int signum, siginfo_t *info, void *uc) +{ + int i; + ucontext_t *ucp = uc; + ucontext_t *tm_ucp = ucp->uc_link; + + for (i = 0; i < NV_GPR_REGS && !fail; i++) { + fail = (ucp->uc_mcontext.gp_regs[i + 14] != gps[i]); + fail |= (tm_ucp->uc_mcontext.gp_regs[i + 14] != gps[i + NV_GPR_REGS]); + if (fail) + printf("Failed on %d GPR %lu or %lu\n", i, + ucp->uc_mcontext.gp_regs[i + 14], tm_ucp->uc_mcontext.gp_regs[i + 14]); + } +} + +static int tm_signal_context_chk_gpr() +{ + struct sigaction act; + int i; + long rc; + pid_t pid = getpid(); + + SKIP_IF(!have_htm()); + + act.sa_sigaction = signal_usr1; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + if (sigaction(SIGUSR1, &act, NULL) < 0) { + perror("sigaction sigusr1"); + exit(1); + } + + i = 0; + while (i < MAX_ATTEMPT && !fail) { + rc = tm_signal_self_context_load(pid, gps, NULL, NULL, NULL); + FAIL_IF(rc != pid); + i++; + } + + return fail; +} + +int main(void) +{ + return test_harness(tm_signal_context_chk_gpr, "tm_signal_context_chk_gpr"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c new file mode 100644 index 000000000000..f0ee55fd5185 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c @@ -0,0 +1,110 @@ +/* + * Copyright 2016, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * + * Test the kernel's signal frame code. + * + * The kernel sets up two sets of ucontexts if the signal was to be + * delivered while the thread was in a transaction. + * Expected behaviour is that the checkpointed state is in the user + * context passed to the signal handler. The speculated state can be + * accessed with the uc_link pointer. + * + * The rationale for this is that if TM unaware code (which linked + * against TM libs) installs a signal handler it will not know of the + * speculative nature of the 'live' registers and may infer the wrong + * thing. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> + +#include <altivec.h> + +#include "utils.h" +#include "tm.h" + +#define MAX_ATTEMPT 500000 + +#define NV_VMX_REGS 12 + +long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss); + +static sig_atomic_t fail; + +vector int vms[] = { + {1, 2, 3, 4 },{5, 6, 7, 8 },{9, 10,11,12}, + {13,14,15,16},{17,18,19,20},{21,22,23,24}, + {25,26,27,28},{29,30,31,32},{33,34,35,36}, + {37,38,39,40},{41,42,43,44},{45,46,47,48}, + {-1, -2, -3, -4}, {-5, -6, -7, -8}, {-9, -10,-11,-12}, + {-13,-14,-15,-16},{-17,-18,-19,-20},{-21,-22,-23,-24}, + {-25,-26,-27,-28},{-29,-30,-31,-32},{-33,-34,-35,-36}, + {-37,-38,-39,-40},{-41,-42,-43,-44},{-45,-46,-47,-48} +}; + +static void signal_usr1(int signum, siginfo_t *info, void *uc) +{ + int i; + ucontext_t *ucp = uc; + ucontext_t *tm_ucp = ucp->uc_link; + + for (i = 0; i < NV_VMX_REGS && !fail; i++) { + fail = memcmp(ucp->uc_mcontext.v_regs->vrregs[i + 20], + &vms[i], sizeof(vector int)); + fail |= memcmp(tm_ucp->uc_mcontext.v_regs->vrregs[i + 20], + &vms[i + NV_VMX_REGS], sizeof (vector int)); + + if (fail) { + int j; + + fprintf(stderr, "Failed on %d vmx 0x", i); + for (j = 0; j < 4; j++) + fprintf(stderr, "%04x", ucp->uc_mcontext.v_regs->vrregs[i + 20][j]); + fprintf(stderr, " vs 0x"); + for (j = 0 ; j < 4; j++) + fprintf(stderr, "%04x", tm_ucp->uc_mcontext.v_regs->vrregs[i + 20][j]); + fprintf(stderr, "\n"); + } + } +} + +static int tm_signal_context_chk() +{ + struct sigaction act; + int i; + long rc; + pid_t pid = getpid(); + + SKIP_IF(!have_htm()); + + act.sa_sigaction = signal_usr1; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + if (sigaction(SIGUSR1, &act, NULL) < 0) { + perror("sigaction sigusr1"); + exit(1); + } + + i = 0; + while (i < MAX_ATTEMPT && !fail) { + rc = tm_signal_self_context_load(pid, NULL, NULL, vms, NULL); + FAIL_IF(rc != pid); + i++; + } + + return fail; +} + +int main(void) +{ + return test_harness(tm_signal_context_chk, "tm_signal_context_chk_vmx"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c new file mode 100644 index 000000000000..b99c3d835957 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c @@ -0,0 +1,125 @@ +/* + * Copyright 2016, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * + * Test the kernel's signal frame code. + * + * The kernel sets up two sets of ucontexts if the signal was to be + * delivered while the thread was in a transaction. + * Expected behaviour is that the checkpointed state is in the user + * context passed to the signal handler. The speculated state can be + * accessed with the uc_link pointer. + * + * The rationale for this is that if TM unaware code (which linked + * against TM libs) installs a signal handler it will not know of the + * speculative nature of the 'live' registers and may infer the wrong + * thing. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> + +#include <altivec.h> + +#include "utils.h" +#include "tm.h" + +#define MAX_ATTEMPT 500000 + +#define NV_VSX_REGS 12 + +long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss); + +static sig_atomic_t fail; + +vector int vss[] = { + {1, 2, 3, 4 },{5, 6, 7, 8 },{9, 10,11,12}, + {13,14,15,16},{17,18,19,20},{21,22,23,24}, + {25,26,27,28},{29,30,31,32},{33,34,35,36}, + {37,38,39,40},{41,42,43,44},{45,46,47,48}, + {-1, -2, -3, -4 },{-5, -6, -7, -8 },{-9, -10,-11,-12}, + {-13,-14,-15,-16},{-17,-18,-19,-20},{-21,-22,-23,-24}, + {-25,-26,-27,-28},{-29,-30,-31,-32},{-33,-34,-35,-36}, + {-37,-38,-39,-40},{-41,-42,-43,-44},{-45,-46,-47,-48} +}; + +static void signal_usr1(int signum, siginfo_t *info, void *uc) +{ + int i; + uint8_t vsc[sizeof(vector int)]; + uint8_t vst[sizeof(vector int)]; + ucontext_t *ucp = uc; + ucontext_t *tm_ucp = ucp->uc_link; + + /* + * The other half of the VSX regs will be after v_regs. + * + * In short, vmx_reserve array holds everything. v_regs is a 16 + * byte aligned pointer at the start of vmx_reserve (vmx_reserve + * may or may not be 16 aligned) where the v_regs structure exists. + * (half of) The VSX regsters are directly after v_regs so the + * easiest way to find them below. + */ + long *vsx_ptr = (long *)(ucp->uc_mcontext.v_regs + 1); + long *tm_vsx_ptr = (long *)(tm_ucp->uc_mcontext.v_regs + 1); + for (i = 0; i < NV_VSX_REGS && !fail; i++) { + memcpy(vsc, &ucp->uc_mcontext.fp_regs[i + 20], 8); + memcpy(vsc + 8, &vsx_ptr[20 + i], 8); + fail = memcmp(vsc, &vss[i], sizeof(vector int)); + memcpy(vst, &tm_ucp->uc_mcontext.fp_regs[i + 20], 8); + memcpy(vst + 8, &tm_vsx_ptr[20 + i], 8); + fail |= memcmp(vst, &vss[i + NV_VSX_REGS], sizeof(vector int)); + + if (fail) { + int j; + + fprintf(stderr, "Failed on %d vsx 0x", i); + for (j = 0; j < 16; j++) + fprintf(stderr, "%02x", vsc[j]); + fprintf(stderr, " vs 0x"); + for (j = 0; j < 16; j++) + fprintf(stderr, "%02x", vst[j]); + fprintf(stderr, "\n"); + } + } +} + +static int tm_signal_context_chk() +{ + struct sigaction act; + int i; + long rc; + pid_t pid = getpid(); + + SKIP_IF(!have_htm()); + + act.sa_sigaction = signal_usr1; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + if (sigaction(SIGUSR1, &act, NULL) < 0) { + perror("sigaction sigusr1"); + exit(1); + } + + i = 0; + while (i < MAX_ATTEMPT && !fail) { + rc = tm_signal_self_context_load(pid, NULL, NULL, NULL, vss); + FAIL_IF(rc != pid); + i++; + } + + return fail; +} + +int main(void) +{ + return test_harness(tm_signal_context_chk, "tm_signal_context_chk_vsx"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-signal.S b/tools/testing/selftests/powerpc/tm/tm-signal.S new file mode 100644 index 000000000000..4e13e8b3a96f --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-signal.S @@ -0,0 +1,114 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "../basic_asm.h" +#include "../gpr_asm.h" +#include "../fpu_asm.h" +#include "../vmx_asm.h" +#include "../vsx_asm.h" + +/* + * Large caveat here being that the caller cannot expect the + * signal to always be sent! The hardware can (AND WILL!) abort + * the transaction between the tbegin and the tsuspend (however + * unlikely it seems or infrequently it actually happens). + * You have been warned. + */ +/* long tm_signal_self(pid_t pid, long *gprs, double *fps, vector *vms, vector *vss); */ +FUNC_START(tm_signal_self_context_load) + PUSH_BASIC_STACK(512) + /* + * Don't strictly need to save and restore as it depends on if + * we're going to use them, however this reduces messy logic + */ + PUSH_VMX(STACK_FRAME_LOCAL(5,0),r8) + PUSH_FPU(512) + PUSH_NVREGS_BELOW_FPU(512) + std r3, STACK_FRAME_PARAM(0)(sp) /* pid */ + std r4, STACK_FRAME_PARAM(1)(sp) /* gps */ + std r5, STACK_FRAME_PARAM(2)(sp) /* fps */ + std r6, STACK_FRAME_PARAM(3)(sp) /* vms */ + std r7, STACK_FRAME_PARAM(4)(sp) /* vss */ + + ld r3, STACK_FRAME_PARAM(1)(sp) + cmpdi r3, 0 + beq skip_gpr_lc + bl load_gpr +skip_gpr_lc: + ld r3, STACK_FRAME_PARAM(2)(sp) + cmpdi r3, 0 + beq skip_fpu_lc + bl load_fpu +skip_fpu_lc: + ld r3, STACK_FRAME_PARAM(3)(sp) + cmpdi r3, 0 + beq skip_vmx_lc + bl load_vmx +skip_vmx_lc: + ld r3, STACK_FRAME_PARAM(4)(sp) + cmpdi r3, 0 + beq skip_vsx_lc + bl load_vsx +skip_vsx_lc: + /* + * Set r3 (return value) before tbegin. Use the pid as a known + * 'all good' return value, zero is used to indicate a non-doomed + * transaction. + */ + ld r3, STACK_FRAME_PARAM(0)(sp) + tbegin. + beq 1f + tsuspend. /* Can't enter a syscall transactionally */ + ld r3, STACK_FRAME_PARAM(1)(sp) + cmpdi r3, 0 + beq skip_gpr_lt + /* Get the second half of the array */ + addi r3, r3, 8 * 18 + bl load_gpr +skip_gpr_lt: + ld r3, STACK_FRAME_PARAM(2)(sp) + cmpdi r3, 0 + beq skip_fpu_lt + /* Get the second half of the array */ + addi r3, r3, 8 * 18 + bl load_fpu +skip_fpu_lt: + ld r3, STACK_FRAME_PARAM(3)(sp) + cmpdi r3, 0 + beq skip_vmx_lt + /* Get the second half of the array */ + addi r3, r3, 16 * 12 + bl load_vmx +skip_vmx_lt: + ld r3, STACK_FRAME_PARAM(4)(sp) + cmpdi r3, 0 + beq skip_vsx_lt + /* Get the second half of the array */ + addi r3, r3, 16 * 12 + bl load_vsx +skip_vsx_lt: + li r0, 37 /* sys_kill */ + ld r3, STACK_FRAME_PARAM(0)(sp) /* pid */ + li r4, 10 /* SIGUSR1 */ + sc /* Taking the signal will doom the transaction */ + tabort. 0 + tresume. /* Be super sure we abort */ + /* + * This will cause us to resume doomed transaction and cause + * hardware to cleanup, we'll end up at 1: anything between + * tresume. and 1: shouldn't ever run. + */ + li r3, 0 + 1: + POP_VMX(STACK_FRAME_LOCAL(5,0),r4) + POP_FPU(512) + POP_NVREGS_BELOW_FPU(512) + POP_BASIC_STACK(512) + blr +FUNC_END(tm_signal_self_context_load) diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h index 60318bad7d7a..2c8da74304e7 100644 --- a/tools/testing/selftests/powerpc/tm/tm.h +++ b/tools/testing/selftests/powerpc/tm/tm.h @@ -52,4 +52,31 @@ static inline bool failure_is_nesting(void) return (__builtin_get_texasru() & 0x400000); } +static inline int tcheck(void) +{ + long cr; + asm volatile ("tcheck 0" : "=r"(cr) : : "cr0"); + return (cr >> 28) & 4; +} + +static inline bool tcheck_doomed(void) +{ + return tcheck() & 8; +} + +static inline bool tcheck_active(void) +{ + return tcheck() & 4; +} + +static inline bool tcheck_suspended(void) +{ + return tcheck() & 2; +} + +static inline bool tcheck_transactional(void) +{ + return tcheck() & 6; +} + #endif /* _SELFTESTS_POWERPC_TM_TM_H */ diff --git a/tools/testing/selftests/powerpc/utils.h b/tools/testing/selftests/powerpc/utils.h index fbd33e52ef8f..53405e8a52ab 100644 --- a/tools/testing/selftests/powerpc/utils.h +++ b/tools/testing/selftests/powerpc/utils.h @@ -22,7 +22,7 @@ typedef uint32_t u32; typedef uint16_t u16; typedef uint8_t u8; - +void test_harness_set_timeout(uint64_t time); int test_harness(int (test_function)(void), char *name); extern void *get_auxv_entry(int type); int pick_online_cpu(void); @@ -32,10 +32,17 @@ static inline bool have_hwcap(unsigned long ftr) return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr; } +#ifdef AT_HWCAP2 static inline bool have_hwcap2(unsigned long ftr2) { return ((unsigned long)get_auxv_entry(AT_HWCAP2) & ftr2) == ftr2; } +#else +static inline bool have_hwcap2(unsigned long ftr2) +{ + return false; +} +#endif /* Yes, this is evil */ #define FAIL_IF(x) \ diff --git a/tools/testing/selftests/powerpc/vmx_asm.h b/tools/testing/selftests/powerpc/vmx_asm.h new file mode 100644 index 000000000000..2eaaeca9cf1d --- /dev/null +++ b/tools/testing/selftests/powerpc/vmx_asm.h @@ -0,0 +1,96 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "basic_asm.h" + +/* POS MUST BE 16 ALIGNED! */ +#define PUSH_VMX(pos,reg) \ + li reg,pos; \ + stvx v20,reg,%r1; \ + addi reg,reg,16; \ + stvx v21,reg,%r1; \ + addi reg,reg,16; \ + stvx v22,reg,%r1; \ + addi reg,reg,16; \ + stvx v23,reg,%r1; \ + addi reg,reg,16; \ + stvx v24,reg,%r1; \ + addi reg,reg,16; \ + stvx v25,reg,%r1; \ + addi reg,reg,16; \ + stvx v26,reg,%r1; \ + addi reg,reg,16; \ + stvx v27,reg,%r1; \ + addi reg,reg,16; \ + stvx v28,reg,%r1; \ + addi reg,reg,16; \ + stvx v29,reg,%r1; \ + addi reg,reg,16; \ + stvx v30,reg,%r1; \ + addi reg,reg,16; \ + stvx v31,reg,%r1; + +/* POS MUST BE 16 ALIGNED! */ +#define POP_VMX(pos,reg) \ + li reg,pos; \ + lvx v20,reg,%r1; \ + addi reg,reg,16; \ + lvx v21,reg,%r1; \ + addi reg,reg,16; \ + lvx v22,reg,%r1; \ + addi reg,reg,16; \ + lvx v23,reg,%r1; \ + addi reg,reg,16; \ + lvx v24,reg,%r1; \ + addi reg,reg,16; \ + lvx v25,reg,%r1; \ + addi reg,reg,16; \ + lvx v26,reg,%r1; \ + addi reg,reg,16; \ + lvx v27,reg,%r1; \ + addi reg,reg,16; \ + lvx v28,reg,%r1; \ + addi reg,reg,16; \ + lvx v29,reg,%r1; \ + addi reg,reg,16; \ + lvx v30,reg,%r1; \ + addi reg,reg,16; \ + lvx v31,reg,%r1; + +/* + * Careful this will 'clobber' vmx (by design) + * Don't call this from C + */ +FUNC_START(load_vmx) + li r5,0 + lvx v20,r5,r3 + addi r5,r5,16 + lvx v21,r5,r3 + addi r5,r5,16 + lvx v22,r5,r3 + addi r5,r5,16 + lvx v23,r5,r3 + addi r5,r5,16 + lvx v24,r5,r3 + addi r5,r5,16 + lvx v25,r5,r3 + addi r5,r5,16 + lvx v26,r5,r3 + addi r5,r5,16 + lvx v27,r5,r3 + addi r5,r5,16 + lvx v28,r5,r3 + addi r5,r5,16 + lvx v29,r5,r3 + addi r5,r5,16 + lvx v30,r5,r3 + addi r5,r5,16 + lvx v31,r5,r3 + blr +FUNC_END(load_vmx) diff --git a/tools/testing/selftests/powerpc/vsx_asm.h b/tools/testing/selftests/powerpc/vsx_asm.h new file mode 100644 index 000000000000..d828bfb6ef2d --- /dev/null +++ b/tools/testing/selftests/powerpc/vsx_asm.h @@ -0,0 +1,71 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "basic_asm.h" + +/* + * Careful this will 'clobber' vsx (by design), VSX are always + * volatile though so unlike vmx this isn't so much of an issue + * Still should avoid calling from C + */ +FUNC_START(load_vsx) + li r5,0 + lxvx vs20,r5,r3 + addi r5,r5,16 + lxvx vs21,r5,r3 + addi r5,r5,16 + lxvx vs22,r5,r3 + addi r5,r5,16 + lxvx vs23,r5,r3 + addi r5,r5,16 + lxvx vs24,r5,r3 + addi r5,r5,16 + lxvx vs25,r5,r3 + addi r5,r5,16 + lxvx vs26,r5,r3 + addi r5,r5,16 + lxvx vs27,r5,r3 + addi r5,r5,16 + lxvx vs28,r5,r3 + addi r5,r5,16 + lxvx vs29,r5,r3 + addi r5,r5,16 + lxvx vs30,r5,r3 + addi r5,r5,16 + lxvx vs31,r5,r3 + blr +FUNC_END(load_vsx) + +FUNC_START(store_vsx) + li r5,0 + stxvx vs20,r5,r3 + addi r5,r5,16 + stxvx vs21,r5,r3 + addi r5,r5,16 + stxvx vs22,r5,r3 + addi r5,r5,16 + stxvx vs23,r5,r3 + addi r5,r5,16 + stxvx vs24,r5,r3 + addi r5,r5,16 + stxvx vs25,r5,r3 + addi r5,r5,16 + stxvx vs26,r5,r3 + addi r5,r5,16 + stxvx vs27,r5,r3 + addi r5,r5,16 + stxvx vs28,r5,r3 + addi r5,r5,16 + stxvx vs29,r5,r3 + addi r5,r5,16 + stxvx vs30,r5,r3 + addi r5,r5,16 + stxvx vs31,r5,r3 + blr +FUNC_END(store_vsx) diff --git a/tools/testing/selftests/prctl/.gitignore b/tools/testing/selftests/prctl/.gitignore new file mode 100644 index 000000000000..0b5c27447bf6 --- /dev/null +++ b/tools/testing/selftests/prctl/.gitignore @@ -0,0 +1,3 @@ +disable-tsc-ctxt-sw-stress-test +disable-tsc-on-off-stress-test +disable-tsc-test diff --git a/tools/testing/selftests/prctl/Makefile b/tools/testing/selftests/prctl/Makefile new file mode 100644 index 000000000000..35aa1c8f2df2 --- /dev/null +++ b/tools/testing/selftests/prctl/Makefile @@ -0,0 +1,15 @@ +ifndef CROSS_COMPILE +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) + +ifeq ($(ARCH),x86) +TEST_PROGS := disable-tsc-ctxt-sw-stress-test disable-tsc-on-off-stress-test \ + disable-tsc-test +all: $(TEST_PROGS) + +include ../lib.mk + +clean: + rm -fr $(TEST_PROGS) +endif +endif diff --git a/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c b/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c new file mode 100644 index 000000000000..f7499d1c0415 --- /dev/null +++ b/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c @@ -0,0 +1,97 @@ +/* + * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...) + * + * Tests if the control register is updated correctly + * at context switches + * + * Warning: this test will cause a very high load for a few seconds + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <signal.h> +#include <inttypes.h> +#include <wait.h> + + +#include <sys/prctl.h> +#include <linux/prctl.h> + +/* Get/set the process' ability to use the timestamp counter instruction */ +#ifndef PR_GET_TSC +#define PR_GET_TSC 25 +#define PR_SET_TSC 26 +# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */ +# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */ +#endif + +static uint64_t rdtsc(void) +{ +uint32_t lo, hi; +/* We cannot use "=A", since this would use %rax on x86_64 */ +__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); +return (uint64_t)hi << 32 | lo; +} + +static void sigsegv_expect(int sig) +{ + /* */ +} + +static void segvtask(void) +{ + if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0) + { + perror("prctl"); + exit(0); + } + signal(SIGSEGV, sigsegv_expect); + alarm(10); + rdtsc(); + fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n"); + exit(0); +} + + +static void sigsegv_fail(int sig) +{ + fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n"); + exit(0); +} + +static void rdtsctask(void) +{ + if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0) + { + perror("prctl"); + exit(0); + } + signal(SIGSEGV, sigsegv_fail); + alarm(10); + for(;;) rdtsc(); +} + + +int main(void) +{ + int n_tasks = 100, i; + + fprintf(stderr, "[No further output means we're allright]\n"); + + for (i=0; i<n_tasks; i++) + if (fork() == 0) + { + if (i & 1) + segvtask(); + else + rdtsctask(); + } + + for (i=0; i<n_tasks; i++) + wait(NULL); + + exit(0); +} + diff --git a/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c b/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c new file mode 100644 index 000000000000..a06f027e9d16 --- /dev/null +++ b/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c @@ -0,0 +1,96 @@ +/* + * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...) + * + * Tests if the control register is updated correctly + * when set with prctl() + * + * Warning: this test will cause a very high load for a few seconds + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <signal.h> +#include <inttypes.h> +#include <wait.h> + + +#include <sys/prctl.h> +#include <linux/prctl.h> + +/* Get/set the process' ability to use the timestamp counter instruction */ +#ifndef PR_GET_TSC +#define PR_GET_TSC 25 +#define PR_SET_TSC 26 +# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */ +# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */ +#endif + +/* snippet from wikipedia :-) */ + +static uint64_t rdtsc(void) +{ +uint32_t lo, hi; +/* We cannot use "=A", since this would use %rax on x86_64 */ +__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); +return (uint64_t)hi << 32 | lo; +} + +int should_segv = 0; + +static void sigsegv_cb(int sig) +{ + if (!should_segv) + { + fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n"); + exit(0); + } + if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0) + { + perror("prctl"); + exit(0); + } + should_segv = 0; + + rdtsc(); +} + +static void task(void) +{ + signal(SIGSEGV, sigsegv_cb); + alarm(10); + for(;;) + { + rdtsc(); + if (should_segv) + { + fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n"); + exit(0); + } + if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0) + { + perror("prctl"); + exit(0); + } + should_segv = 1; + } +} + + +int main(void) +{ + int n_tasks = 100, i; + + fprintf(stderr, "[No further output means we're allright]\n"); + + for (i=0; i<n_tasks; i++) + if (fork() == 0) + task(); + + for (i=0; i<n_tasks; i++) + wait(NULL); + + exit(0); +} + diff --git a/tools/testing/selftests/prctl/disable-tsc-test.c b/tools/testing/selftests/prctl/disable-tsc-test.c new file mode 100644 index 000000000000..8d494f7bebdb --- /dev/null +++ b/tools/testing/selftests/prctl/disable-tsc-test.c @@ -0,0 +1,95 @@ +/* + * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...) + * + * Basic test to test behaviour of PR_GET_TSC and PR_SET_TSC + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <signal.h> +#include <inttypes.h> + + +#include <sys/prctl.h> +#include <linux/prctl.h> + +/* Get/set the process' ability to use the timestamp counter instruction */ +#ifndef PR_GET_TSC +#define PR_GET_TSC 25 +#define PR_SET_TSC 26 +# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */ +# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */ +#endif + +const char *tsc_names[] = +{ + [0] = "[not set]", + [PR_TSC_ENABLE] = "PR_TSC_ENABLE", + [PR_TSC_SIGSEGV] = "PR_TSC_SIGSEGV", +}; + +static uint64_t rdtsc(void) +{ +uint32_t lo, hi; +/* We cannot use "=A", since this would use %rax on x86_64 */ +__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); +return (uint64_t)hi << 32 | lo; +} + +static void sigsegv_cb(int sig) +{ + int tsc_val = 0; + + printf("[ SIG_SEGV ]\n"); + printf("prctl(PR_GET_TSC, &tsc_val); "); + fflush(stdout); + + if ( prctl(PR_GET_TSC, &tsc_val) == -1) + perror("prctl"); + + printf("tsc_val == %s\n", tsc_names[tsc_val]); + printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n"); + fflush(stdout); + if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1) + perror("prctl"); + + printf("rdtsc() == "); +} + +int main(void) +{ + int tsc_val = 0; + + signal(SIGSEGV, sigsegv_cb); + + printf("rdtsc() == %llu\n", (unsigned long long)rdtsc()); + printf("prctl(PR_GET_TSC, &tsc_val); "); + fflush(stdout); + + if ( prctl(PR_GET_TSC, &tsc_val) == -1) + perror("prctl"); + + printf("tsc_val == %s\n", tsc_names[tsc_val]); + printf("rdtsc() == %llu\n", (unsigned long long)rdtsc()); + printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n"); + fflush(stdout); + + if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1) + perror("prctl"); + + printf("rdtsc() == %llu\n", (unsigned long long)rdtsc()); + printf("prctl(PR_SET_TSC, PR_TSC_SIGSEGV)\n"); + fflush(stdout); + + if ( prctl(PR_SET_TSC, PR_TSC_SIGSEGV) == -1) + perror("prctl"); + + printf("rdtsc() == "); + fflush(stdout); + printf("%llu\n", (unsigned long long)rdtsc()); + fflush(stdout); + + exit(EXIT_SUCCESS); +} + diff --git a/tools/testing/selftests/ptp/.gitignore b/tools/testing/selftests/ptp/.gitignore new file mode 100644 index 000000000000..f562e49d6917 --- /dev/null +++ b/tools/testing/selftests/ptp/.gitignore @@ -0,0 +1 @@ +testptp diff --git a/tools/testing/selftests/ptp/Makefile b/tools/testing/selftests/ptp/Makefile new file mode 100644 index 000000000000..83dd42b2129e --- /dev/null +++ b/tools/testing/selftests/ptp/Makefile @@ -0,0 +1,8 @@ +TEST_PROGS := testptp +LDLIBS += -lrt +all: $(TEST_PROGS) + +include ../lib.mk + +clean: + rm -fr $(TEST_PROGS) diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c new file mode 100644 index 000000000000..5d2eae16f7ee --- /dev/null +++ b/tools/testing/selftests/ptp/testptp.c @@ -0,0 +1,523 @@ +/* + * PTP 1588 clock support - User space test program + * + * Copyright (C) 2010 OMICRON electronics GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */ +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <math.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include <linux/ptp_clock.h> + +#define DEVICE "/dev/ptp0" + +#ifndef ADJ_SETOFFSET +#define ADJ_SETOFFSET 0x0100 +#endif + +#ifndef CLOCK_INVALID +#define CLOCK_INVALID -1 +#endif + +/* clock_adjtime is not available in GLIBC < 2.14 */ +#if !__GLIBC_PREREQ(2, 14) +#include <sys/syscall.h> +static int clock_adjtime(clockid_t id, struct timex *tx) +{ + return syscall(__NR_clock_adjtime, id, tx); +} +#endif + +static clockid_t get_clockid(int fd) +{ +#define CLOCKFD 3 +#define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD) + + return FD_TO_CLOCKID(fd); +} + +static void handle_alarm(int s) +{ + printf("received signal %d\n", s); +} + +static int install_handler(int signum, void (*handler)(int)) +{ + struct sigaction action; + sigset_t mask; + + /* Unblock the signal. */ + sigemptyset(&mask); + sigaddset(&mask, signum); + sigprocmask(SIG_UNBLOCK, &mask, NULL); + + /* Install the signal handler. */ + action.sa_handler = handler; + action.sa_flags = 0; + sigemptyset(&action.sa_mask); + sigaction(signum, &action, NULL); + + return 0; +} + +static long ppb_to_scaled_ppm(int ppb) +{ + /* + * The 'freq' field in the 'struct timex' is in parts per + * million, but with a 16 bit binary fractional field. + * Instead of calculating either one of + * + * scaled_ppm = (ppb / 1000) << 16 [1] + * scaled_ppm = (ppb << 16) / 1000 [2] + * + * we simply use double precision math, in order to avoid the + * truncation in [1] and the possible overflow in [2]. + */ + return (long) (ppb * 65.536); +} + +static int64_t pctns(struct ptp_clock_time *t) +{ + return t->sec * 1000000000LL + t->nsec; +} + +static void usage(char *progname) +{ + fprintf(stderr, + "usage: %s [options]\n" + " -a val request a one-shot alarm after 'val' seconds\n" + " -A val request a periodic alarm every 'val' seconds\n" + " -c query the ptp clock's capabilities\n" + " -d name device to open\n" + " -e val read 'val' external time stamp events\n" + " -f val adjust the ptp clock frequency by 'val' ppb\n" + " -g get the ptp clock time\n" + " -h prints this message\n" + " -i val index for event/trigger\n" + " -k val measure the time offset between system and phc clock\n" + " for 'val' times (Maximum 25)\n" + " -l list the current pin configuration\n" + " -L pin,val configure pin index 'pin' with function 'val'\n" + " the channel index is taken from the '-i' option\n" + " 'val' specifies the auxiliary function:\n" + " 0 - none\n" + " 1 - external time stamp\n" + " 2 - periodic output\n" + " -p val enable output with a period of 'val' nanoseconds\n" + " -P val enable or disable (val=1|0) the system clock PPS\n" + " -s set the ptp clock time from the system time\n" + " -S set the system time from the ptp clock time\n" + " -t val shift the ptp clock time by 'val' seconds\n" + " -T val set the ptp clock time to 'val' seconds\n", + progname); +} + +int main(int argc, char *argv[]) +{ + struct ptp_clock_caps caps; + struct ptp_extts_event event; + struct ptp_extts_request extts_request; + struct ptp_perout_request perout_request; + struct ptp_pin_desc desc; + struct timespec ts; + struct timex tx; + + static timer_t timerid; + struct itimerspec timeout; + struct sigevent sigevent; + + struct ptp_clock_time *pct; + struct ptp_sys_offset *sysoff; + + + char *progname; + unsigned int i; + int c, cnt, fd; + + char *device = DEVICE; + clockid_t clkid; + int adjfreq = 0x7fffffff; + int adjtime = 0; + int capabilities = 0; + int extts = 0; + int gettime = 0; + int index = 0; + int list_pins = 0; + int oneshot = 0; + int pct_offset = 0; + int n_samples = 0; + int periodic = 0; + int perout = -1; + int pin_index = -1, pin_func; + int pps = -1; + int seconds = 0; + int settime = 0; + + int64_t t1, t2, tp; + int64_t interval, offset; + + progname = strrchr(argv[0], '/'); + progname = progname ? 1+progname : argv[0]; + while (EOF != (c = getopt(argc, argv, "a:A:cd:e:f:ghi:k:lL:p:P:sSt:T:v"))) { + switch (c) { + case 'a': + oneshot = atoi(optarg); + break; + case 'A': + periodic = atoi(optarg); + break; + case 'c': + capabilities = 1; + break; + case 'd': + device = optarg; + break; + case 'e': + extts = atoi(optarg); + break; + case 'f': + adjfreq = atoi(optarg); + break; + case 'g': + gettime = 1; + break; + case 'i': + index = atoi(optarg); + break; + case 'k': + pct_offset = 1; + n_samples = atoi(optarg); + break; + case 'l': + list_pins = 1; + break; + case 'L': + cnt = sscanf(optarg, "%d,%d", &pin_index, &pin_func); + if (cnt != 2) { + usage(progname); + return -1; + } + break; + case 'p': + perout = atoi(optarg); + break; + case 'P': + pps = atoi(optarg); + break; + case 's': + settime = 1; + break; + case 'S': + settime = 2; + break; + case 't': + adjtime = atoi(optarg); + break; + case 'T': + settime = 3; + seconds = atoi(optarg); + break; + case 'h': + usage(progname); + return 0; + case '?': + default: + usage(progname); + return -1; + } + } + + fd = open(device, O_RDWR); + if (fd < 0) { + fprintf(stderr, "opening %s: %s\n", device, strerror(errno)); + return -1; + } + + clkid = get_clockid(fd); + if (CLOCK_INVALID == clkid) { + fprintf(stderr, "failed to read clock id\n"); + return -1; + } + + if (capabilities) { + if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) { + perror("PTP_CLOCK_GETCAPS"); + } else { + printf("capabilities:\n" + " %d maximum frequency adjustment (ppb)\n" + " %d programmable alarms\n" + " %d external time stamp channels\n" + " %d programmable periodic signals\n" + " %d pulse per second\n" + " %d programmable pins\n" + " %d cross timestamping\n", + caps.max_adj, + caps.n_alarm, + caps.n_ext_ts, + caps.n_per_out, + caps.pps, + caps.n_pins, + caps.cross_timestamping); + } + } + + if (0x7fffffff != adjfreq) { + memset(&tx, 0, sizeof(tx)); + tx.modes = ADJ_FREQUENCY; + tx.freq = ppb_to_scaled_ppm(adjfreq); + if (clock_adjtime(clkid, &tx)) { + perror("clock_adjtime"); + } else { + puts("frequency adjustment okay"); + } + } + + if (adjtime) { + memset(&tx, 0, sizeof(tx)); + tx.modes = ADJ_SETOFFSET; + tx.time.tv_sec = adjtime; + tx.time.tv_usec = 0; + if (clock_adjtime(clkid, &tx) < 0) { + perror("clock_adjtime"); + } else { + puts("time shift okay"); + } + } + + if (gettime) { + if (clock_gettime(clkid, &ts)) { + perror("clock_gettime"); + } else { + printf("clock time: %ld.%09ld or %s", + ts.tv_sec, ts.tv_nsec, ctime(&ts.tv_sec)); + } + } + + if (settime == 1) { + clock_gettime(CLOCK_REALTIME, &ts); + if (clock_settime(clkid, &ts)) { + perror("clock_settime"); + } else { + puts("set time okay"); + } + } + + if (settime == 2) { + clock_gettime(clkid, &ts); + if (clock_settime(CLOCK_REALTIME, &ts)) { + perror("clock_settime"); + } else { + puts("set time okay"); + } + } + + if (settime == 3) { + ts.tv_sec = seconds; + ts.tv_nsec = 0; + if (clock_settime(clkid, &ts)) { + perror("clock_settime"); + } else { + puts("set time okay"); + } + } + + if (extts) { + memset(&extts_request, 0, sizeof(extts_request)); + extts_request.index = index; + extts_request.flags = PTP_ENABLE_FEATURE; + if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) { + perror("PTP_EXTTS_REQUEST"); + extts = 0; + } else { + puts("external time stamp request okay"); + } + for (; extts; extts--) { + cnt = read(fd, &event, sizeof(event)); + if (cnt != sizeof(event)) { + perror("read"); + break; + } + printf("event index %u at %lld.%09u\n", event.index, + event.t.sec, event.t.nsec); + fflush(stdout); + } + /* Disable the feature again. */ + extts_request.flags = 0; + if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) { + perror("PTP_EXTTS_REQUEST"); + } + } + + if (list_pins) { + int n_pins = 0; + if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) { + perror("PTP_CLOCK_GETCAPS"); + } else { + n_pins = caps.n_pins; + } + for (i = 0; i < n_pins; i++) { + desc.index = i; + if (ioctl(fd, PTP_PIN_GETFUNC, &desc)) { + perror("PTP_PIN_GETFUNC"); + break; + } + printf("name %s index %u func %u chan %u\n", + desc.name, desc.index, desc.func, desc.chan); + } + } + + if (oneshot) { + install_handler(SIGALRM, handle_alarm); + /* Create a timer. */ + sigevent.sigev_notify = SIGEV_SIGNAL; + sigevent.sigev_signo = SIGALRM; + if (timer_create(clkid, &sigevent, &timerid)) { + perror("timer_create"); + return -1; + } + /* Start the timer. */ + memset(&timeout, 0, sizeof(timeout)); + timeout.it_value.tv_sec = oneshot; + if (timer_settime(timerid, 0, &timeout, NULL)) { + perror("timer_settime"); + return -1; + } + pause(); + timer_delete(timerid); + } + + if (periodic) { + install_handler(SIGALRM, handle_alarm); + /* Create a timer. */ + sigevent.sigev_notify = SIGEV_SIGNAL; + sigevent.sigev_signo = SIGALRM; + if (timer_create(clkid, &sigevent, &timerid)) { + perror("timer_create"); + return -1; + } + /* Start the timer. */ + memset(&timeout, 0, sizeof(timeout)); + timeout.it_interval.tv_sec = periodic; + timeout.it_value.tv_sec = periodic; + if (timer_settime(timerid, 0, &timeout, NULL)) { + perror("timer_settime"); + return -1; + } + while (1) { + pause(); + } + timer_delete(timerid); + } + + if (perout >= 0) { + if (clock_gettime(clkid, &ts)) { + perror("clock_gettime"); + return -1; + } + memset(&perout_request, 0, sizeof(perout_request)); + perout_request.index = index; + perout_request.start.sec = ts.tv_sec + 2; + perout_request.start.nsec = 0; + perout_request.period.sec = 0; + perout_request.period.nsec = perout; + if (ioctl(fd, PTP_PEROUT_REQUEST, &perout_request)) { + perror("PTP_PEROUT_REQUEST"); + } else { + puts("periodic output request okay"); + } + } + + if (pin_index >= 0) { + memset(&desc, 0, sizeof(desc)); + desc.index = pin_index; + desc.func = pin_func; + desc.chan = index; + if (ioctl(fd, PTP_PIN_SETFUNC, &desc)) { + perror("PTP_PIN_SETFUNC"); + } else { + puts("set pin function okay"); + } + } + + if (pps != -1) { + int enable = pps ? 1 : 0; + if (ioctl(fd, PTP_ENABLE_PPS, enable)) { + perror("PTP_ENABLE_PPS"); + } else { + puts("pps for system time request okay"); + } + } + + if (pct_offset) { + if (n_samples <= 0 || n_samples > 25) { + puts("n_samples should be between 1 and 25"); + usage(progname); + return -1; + } + + sysoff = calloc(1, sizeof(*sysoff)); + if (!sysoff) { + perror("calloc"); + return -1; + } + sysoff->n_samples = n_samples; + + if (ioctl(fd, PTP_SYS_OFFSET, sysoff)) + perror("PTP_SYS_OFFSET"); + else + puts("system and phc clock time offset request okay"); + + pct = &sysoff->ts[0]; + for (i = 0; i < sysoff->n_samples; i++) { + t1 = pctns(pct+2*i); + tp = pctns(pct+2*i+1); + t2 = pctns(pct+2*i+2); + interval = t2 - t1; + offset = (t2 + t1) / 2 - tp; + + printf("system time: %lld.%u\n", + (pct+2*i)->sec, (pct+2*i)->nsec); + printf("phc time: %lld.%u\n", + (pct+2*i+1)->sec, (pct+2*i+1)->nsec); + printf("system time: %lld.%u\n", + (pct+2*i+2)->sec, (pct+2*i+2)->nsec); + printf("system/phc clock time offset is %" PRId64 " ns\n" + "system clock time delay is %" PRId64 " ns\n", + offset, interval); + } + + free(sysoff); + } + + close(fd); + return 0; +} diff --git a/tools/testing/selftests/ptp/testptp.mk b/tools/testing/selftests/ptp/testptp.mk new file mode 100644 index 000000000000..4ef2d9755421 --- /dev/null +++ b/tools/testing/selftests/ptp/testptp.mk @@ -0,0 +1,33 @@ +# PTP 1588 clock support - User space test program +# +# Copyright (C) 2010 OMICRON electronics GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +CC = $(CROSS_COMPILE)gcc +INC = -I$(KBUILD_OUTPUT)/usr/include +CFLAGS = -Wall $(INC) +LDLIBS = -lrt +PROGS = testptp + +all: $(PROGS) + +testptp: testptp.o + +clean: + rm -f testptp.o + +distclean: clean + rm -f $(PROGS) diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index 5a246a02dff3..15cf56d32155 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -122,7 +122,7 @@ static int check_itimer(int which) else if (which == ITIMER_REAL) idle_loop(); - gettimeofday(&end, NULL); + err = gettimeofday(&end, NULL); if (err < 0) { perror("Can't call gettimeofday()\n"); return -1; @@ -175,7 +175,7 @@ static int check_timer_create(int which) user_loop(); - gettimeofday(&end, NULL); + err = gettimeofday(&end, NULL); if (err < 0) { perror("Can't call gettimeofday()\n"); return -1; diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore new file mode 100644 index 000000000000..133bf9ee986c --- /dev/null +++ b/tools/testing/selftests/vDSO/.gitignore @@ -0,0 +1,2 @@ +vdso_test +vdso_standalone_test_x86 diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile new file mode 100644 index 000000000000..706b68b1c372 --- /dev/null +++ b/tools/testing/selftests/vDSO/Makefile @@ -0,0 +1,20 @@ +ifndef CROSS_COMPILE +CFLAGS := -std=gnu99 +CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector +ifeq ($(CONFIG_X86_32),y) +LDLIBS += -lgcc_s +endif + +TEST_PROGS := vdso_test vdso_standalone_test_x86 + +all: $(TEST_PROGS) +vdso_test: parse_vdso.c vdso_test.c +vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c + $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \ + vdso_standalone_test_x86.c parse_vdso.c \ + -o vdso_standalone_test_x86 + +include ../lib.mk +clean: + rm -fr $(TEST_PROGS) +endif diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c new file mode 100644 index 000000000000..1dbb4b87268f --- /dev/null +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -0,0 +1,269 @@ +/* + * parse_vdso.c: Linux reference vDSO parser + * Written by Andrew Lutomirski, 2011-2014. + * + * This code is meant to be linked in to various programs that run on Linux. + * As such, it is available with as few restrictions as possible. This file + * is licensed under the Creative Commons Zero License, version 1.0, + * available at http://creativecommons.org/publicdomain/zero/1.0/legalcode + * + * The vDSO is a regular ELF DSO that the kernel maps into user space when + * it starts a program. It works equally well in statically and dynamically + * linked binaries. + * + * This code is tested on x86. In principle it should work on any + * architecture that has a vDSO. + */ + +#include <stdbool.h> +#include <stdint.h> +#include <string.h> +#include <limits.h> +#include <elf.h> + +/* + * To use this vDSO parser, first call one of the vdso_init_* functions. + * If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR + * to vdso_init_from_sysinfo_ehdr. Otherwise pass auxv to vdso_init_from_auxv. + * Then call vdso_sym for each symbol you want. For example, to look up + * gettimeofday on x86_64, use: + * + * <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday"); + * or + * <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday"); + * + * vdso_sym will return 0 if the symbol doesn't exist or if the init function + * failed or was not called. vdso_sym is a little slow, so its return value + * should be cached. + * + * vdso_sym is threadsafe; the init functions are not. + * + * These are the prototypes: + */ +extern void vdso_init_from_auxv(void *auxv); +extern void vdso_init_from_sysinfo_ehdr(uintptr_t base); +extern void *vdso_sym(const char *version, const char *name); + + +/* And here's the code. */ +#ifndef ELF_BITS +# if ULONG_MAX > 0xffffffffUL +# define ELF_BITS 64 +# else +# define ELF_BITS 32 +# endif +#endif + +#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x +#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x) +#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x) + +static struct vdso_info +{ + bool valid; + + /* Load information */ + uintptr_t load_addr; + uintptr_t load_offset; /* load_addr - recorded vaddr */ + + /* Symbol table */ + ELF(Sym) *symtab; + const char *symstrings; + ELF(Word) *bucket, *chain; + ELF(Word) nbucket, nchain; + + /* Version table */ + ELF(Versym) *versym; + ELF(Verdef) *verdef; +} vdso_info; + +/* Straight from the ELF specification. */ +static unsigned long elf_hash(const unsigned char *name) +{ + unsigned long h = 0, g; + while (*name) + { + h = (h << 4) + *name++; + if (g = h & 0xf0000000) + h ^= g >> 24; + h &= ~g; + } + return h; +} + +void vdso_init_from_sysinfo_ehdr(uintptr_t base) +{ + size_t i; + bool found_vaddr = false; + + vdso_info.valid = false; + + vdso_info.load_addr = base; + + ELF(Ehdr) *hdr = (ELF(Ehdr)*)base; + if (hdr->e_ident[EI_CLASS] != + (ELF_BITS == 32 ? ELFCLASS32 : ELFCLASS64)) { + return; /* Wrong ELF class -- check ELF_BITS */ + } + + ELF(Phdr) *pt = (ELF(Phdr)*)(vdso_info.load_addr + hdr->e_phoff); + ELF(Dyn) *dyn = 0; + + /* + * We need two things from the segment table: the load offset + * and the dynamic table. + */ + for (i = 0; i < hdr->e_phnum; i++) + { + if (pt[i].p_type == PT_LOAD && !found_vaddr) { + found_vaddr = true; + vdso_info.load_offset = base + + (uintptr_t)pt[i].p_offset + - (uintptr_t)pt[i].p_vaddr; + } else if (pt[i].p_type == PT_DYNAMIC) { + dyn = (ELF(Dyn)*)(base + pt[i].p_offset); + } + } + + if (!found_vaddr || !dyn) + return; /* Failed */ + + /* + * Fish out the useful bits of the dynamic table. + */ + ELF(Word) *hash = 0; + vdso_info.symstrings = 0; + vdso_info.symtab = 0; + vdso_info.versym = 0; + vdso_info.verdef = 0; + for (i = 0; dyn[i].d_tag != DT_NULL; i++) { + switch (dyn[i].d_tag) { + case DT_STRTAB: + vdso_info.symstrings = (const char *) + ((uintptr_t)dyn[i].d_un.d_ptr + + vdso_info.load_offset); + break; + case DT_SYMTAB: + vdso_info.symtab = (ELF(Sym) *) + ((uintptr_t)dyn[i].d_un.d_ptr + + vdso_info.load_offset); + break; + case DT_HASH: + hash = (ELF(Word) *) + ((uintptr_t)dyn[i].d_un.d_ptr + + vdso_info.load_offset); + break; + case DT_VERSYM: + vdso_info.versym = (ELF(Versym) *) + ((uintptr_t)dyn[i].d_un.d_ptr + + vdso_info.load_offset); + break; + case DT_VERDEF: + vdso_info.verdef = (ELF(Verdef) *) + ((uintptr_t)dyn[i].d_un.d_ptr + + vdso_info.load_offset); + break; + } + } + if (!vdso_info.symstrings || !vdso_info.symtab || !hash) + return; /* Failed */ + + if (!vdso_info.verdef) + vdso_info.versym = 0; + + /* Parse the hash table header. */ + vdso_info.nbucket = hash[0]; + vdso_info.nchain = hash[1]; + vdso_info.bucket = &hash[2]; + vdso_info.chain = &hash[vdso_info.nbucket + 2]; + + /* That's all we need. */ + vdso_info.valid = true; +} + +static bool vdso_match_version(ELF(Versym) ver, + const char *name, ELF(Word) hash) +{ + /* + * This is a helper function to check if the version indexed by + * ver matches name (which hashes to hash). + * + * The version definition table is a mess, and I don't know how + * to do this in better than linear time without allocating memory + * to build an index. I also don't know why the table has + * variable size entries in the first place. + * + * For added fun, I can't find a comprehensible specification of how + * to parse all the weird flags in the table. + * + * So I just parse the whole table every time. + */ + + /* First step: find the version definition */ + ver &= 0x7fff; /* Apparently bit 15 means "hidden" */ + ELF(Verdef) *def = vdso_info.verdef; + while(true) { + if ((def->vd_flags & VER_FLG_BASE) == 0 + && (def->vd_ndx & 0x7fff) == ver) + break; + + if (def->vd_next == 0) + return false; /* No definition. */ + + def = (ELF(Verdef) *)((char *)def + def->vd_next); + } + + /* Now figure out whether it matches. */ + ELF(Verdaux) *aux = (ELF(Verdaux)*)((char *)def + def->vd_aux); + return def->vd_hash == hash + && !strcmp(name, vdso_info.symstrings + aux->vda_name); +} + +void *vdso_sym(const char *version, const char *name) +{ + unsigned long ver_hash; + if (!vdso_info.valid) + return 0; + + ver_hash = elf_hash(version); + ELF(Word) chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket]; + + for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) { + ELF(Sym) *sym = &vdso_info.symtab[chain]; + + /* Check for a defined global or weak function w/ right name. */ + if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) + continue; + if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && + ELF64_ST_BIND(sym->st_info) != STB_WEAK) + continue; + if (sym->st_shndx == SHN_UNDEF) + continue; + if (strcmp(name, vdso_info.symstrings + sym->st_name)) + continue; + + /* Check symbol version. */ + if (vdso_info.versym + && !vdso_match_version(vdso_info.versym[chain], + version, ver_hash)) + continue; + + return (void *)(vdso_info.load_offset + sym->st_value); + } + + return 0; +} + +void vdso_init_from_auxv(void *auxv) +{ + ELF(auxv_t) *elf_auxv = auxv; + for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++) + { + if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) { + vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val); + return; + } + } + + vdso_info.valid = false; +} diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c new file mode 100644 index 000000000000..93b0ebf8cc38 --- /dev/null +++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c @@ -0,0 +1,128 @@ +/* + * vdso_test.c: Sample code to test parse_vdso.c on x86 + * Copyright (c) 2011-2014 Andy Lutomirski + * Subject to the GNU General Public License, version 2 + * + * You can amuse yourself by compiling with: + * gcc -std=gnu99 -nostdlib + * -Os -fno-asynchronous-unwind-tables -flto -lgcc_s + * vdso_standalone_test_x86.c parse_vdso.c + * to generate a small binary. On x86_64, you can omit -lgcc_s + * if you want the binary to be completely standalone. + */ + +#include <sys/syscall.h> +#include <sys/time.h> +#include <unistd.h> +#include <stdint.h> + +extern void *vdso_sym(const char *version, const char *name); +extern void vdso_init_from_sysinfo_ehdr(uintptr_t base); +extern void vdso_init_from_auxv(void *auxv); + +/* We need a libc functions... */ +int strcmp(const char *a, const char *b) +{ + /* This implementation is buggy: it never returns -1. */ + while (*a || *b) { + if (*a != *b) + return 1; + if (*a == 0 || *b == 0) + return 1; + a++; + b++; + } + + return 0; +} + +/* ...and two syscalls. This is x86-specific. */ +static inline long x86_syscall3(long nr, long a0, long a1, long a2) +{ + long ret; +#ifdef __x86_64__ + asm volatile ("syscall" : "=a" (ret) : "a" (nr), + "D" (a0), "S" (a1), "d" (a2) : + "cc", "memory", "rcx", + "r8", "r9", "r10", "r11" ); +#else + asm volatile ("int $0x80" : "=a" (ret) : "a" (nr), + "b" (a0), "c" (a1), "d" (a2) : + "cc", "memory" ); +#endif + return ret; +} + +static inline long linux_write(int fd, const void *data, size_t len) +{ + return x86_syscall3(__NR_write, fd, (long)data, (long)len); +} + +static inline void linux_exit(int code) +{ + x86_syscall3(__NR_exit, code, 0, 0); +} + +void to_base10(char *lastdig, time_t n) +{ + while (n) { + *lastdig = (n % 10) + '0'; + n /= 10; + lastdig--; + } +} + +__attribute__((externally_visible)) void c_main(void **stack) +{ + /* Parse the stack */ + long argc = (long)*stack; + stack += argc + 2; + + /* Now we're pointing at the environment. Skip it. */ + while(*stack) + stack++; + stack++; + + /* Now we're pointing at auxv. Initialize the vDSO parser. */ + vdso_init_from_auxv((void *)stack); + + /* Find gettimeofday. */ + typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); + gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday"); + + if (!gtod) + linux_exit(1); + + struct timeval tv; + long ret = gtod(&tv, 0); + + if (ret == 0) { + char buf[] = "The time is .000000\n"; + to_base10(buf + 31, tv.tv_sec); + to_base10(buf + 38, tv.tv_usec); + linux_write(1, buf, sizeof(buf) - 1); + } else { + linux_exit(ret); + } + + linux_exit(0); +} + +/* + * This is the real entry point. It passes the initial stack into + * the C entry point. + */ +asm ( + ".text\n" + ".global _start\n" + ".type _start,@function\n" + "_start:\n\t" +#ifdef __x86_64__ + "mov %rsp,%rdi\n\t" + "jmp c_main" +#else + "push %esp\n\t" + "call c_main\n\t" + "int $3" +#endif + ); diff --git a/tools/testing/selftests/vDSO/vdso_test.c b/tools/testing/selftests/vDSO/vdso_test.c new file mode 100644 index 000000000000..8daeb7d7032c --- /dev/null +++ b/tools/testing/selftests/vDSO/vdso_test.c @@ -0,0 +1,52 @@ +/* + * vdso_test.c: Sample code to test parse_vdso.c + * Copyright (c) 2014 Andy Lutomirski + * Subject to the GNU General Public License, version 2 + * + * Compile with: + * gcc -std=gnu99 vdso_test.c parse_vdso.c + * + * Tested on x86, 32-bit and 64-bit. It may work on other architectures, too. + */ + +#include <stdint.h> +#include <elf.h> +#include <stdio.h> +#include <sys/auxv.h> +#include <sys/time.h> + +extern void *vdso_sym(const char *version, const char *name); +extern void vdso_init_from_sysinfo_ehdr(uintptr_t base); +extern void vdso_init_from_auxv(void *auxv); + +int main(int argc, char **argv) +{ + unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); + if (!sysinfo_ehdr) { + printf("AT_SYSINFO_EHDR is not present!\n"); + return 0; + } + + vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR)); + + /* Find gettimeofday. */ + typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); + gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday"); + + if (!gtod) { + printf("Could not find __vdso_gettimeofday\n"); + return 1; + } + + struct timeval tv; + long ret = gtod(&tv, 0); + + if (ret == 0) { + printf("The time is %lld.%06lld\n", + (long long)tv.tv_sec, (long long)tv.tv_usec); + } else { + printf("__vdso_gettimeofday failed\n"); + } + + return 0; +} diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index a937a9d26b60..142c565bb351 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -7,3 +7,4 @@ mlock2-tests on-fault-limit transhuge-stress userfaultfd +mlock-intersect-test diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index e4bb1de1d526..bbab7f4664ac 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -10,6 +10,7 @@ BINARIES += on-fault-limit BINARIES += thuge-gen BINARIES += transhuge-stress BINARIES += userfaultfd +BINARIES += mlock-random-test all: $(BINARIES) %: %.c @@ -17,6 +18,9 @@ all: $(BINARIES) userfaultfd: userfaultfd.c ../../../../usr/include/linux/kernel.h $(CC) $(CFLAGS) -O2 -o $@ $< -lpthread +mlock-random-test: mlock-random-test.c + $(CC) $(CFLAGS) -o $@ $< -lcap + ../../../../usr/include/linux/kernel.h: make -C ../../../.. headers_install diff --git a/tools/testing/selftests/vm/mlock-random-test.c b/tools/testing/selftests/vm/mlock-random-test.c new file mode 100644 index 000000000000..83de4f58d262 --- /dev/null +++ b/tools/testing/selftests/vm/mlock-random-test.c @@ -0,0 +1,293 @@ +/* + * It tests the mlock/mlock2() when they are invoked + * on randomly memory region. + */ +#include <unistd.h> +#include <sys/resource.h> +#include <sys/capability.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <string.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <time.h> +#include "mlock2.h" + +#define CHUNK_UNIT (128 * 1024) +#define MLOCK_RLIMIT_SIZE (CHUNK_UNIT * 2) +#define MLOCK_WITHIN_LIMIT_SIZE CHUNK_UNIT +#define MLOCK_OUTOF_LIMIT_SIZE (CHUNK_UNIT * 3) + +#define TEST_LOOP 100 +#define PAGE_ALIGN(size, ps) (((size) + ((ps) - 1)) & ~((ps) - 1)) + +int set_cap_limits(rlim_t max) +{ + struct rlimit new; + cap_t cap = cap_init(); + + new.rlim_cur = max; + new.rlim_max = max; + if (setrlimit(RLIMIT_MEMLOCK, &new)) { + perror("setrlimit() returns error\n"); + return -1; + } + + /* drop capabilities including CAP_IPC_LOCK */ + if (cap_set_proc(cap)) { + perror("cap_set_proc() returns error\n"); + return -2; + } + + return 0; +} + +int get_proc_locked_vm_size(void) +{ + FILE *f; + int ret = -1; + char line[1024] = {0}; + unsigned long lock_size = 0; + + f = fopen("/proc/self/status", "r"); + if (!f) { + perror("fopen"); + return -1; + } + + while (fgets(line, 1024, f)) { + if (strstr(line, "VmLck")) { + ret = sscanf(line, "VmLck:\t%8lu kB", &lock_size); + if (ret <= 0) { + printf("sscanf() on VmLck error: %s: %d\n", + line, ret); + fclose(f); + return -1; + } + fclose(f); + return (int)(lock_size << 10); + } + } + + perror("cann't parse VmLck in /proc/self/status\n"); + fclose(f); + return -1; +} + +/* + * Get the MMUPageSize of the memory region including input + * address from proc file. + * + * return value: on error case, 0 will be returned. + * Otherwise the page size(in bytes) is returned. + */ +int get_proc_page_size(unsigned long addr) +{ + FILE *smaps; + char *line; + unsigned long mmupage_size = 0; + size_t size; + + smaps = seek_to_smaps_entry(addr); + if (!smaps) { + printf("Unable to parse /proc/self/smaps\n"); + return 0; + } + + while (getline(&line, &size, smaps) > 0) { + if (!strstr(line, "MMUPageSize")) { + free(line); + line = NULL; + size = 0; + continue; + } + + /* found the MMUPageSize of this section */ + if (sscanf(line, "MMUPageSize: %8lu kB", + &mmupage_size) < 1) { + printf("Unable to parse smaps entry for Size:%s\n", + line); + break; + } + + } + free(line); + if (smaps) + fclose(smaps); + return mmupage_size << 10; +} + +/* + * Test mlock/mlock2() on provided memory chunk. + * It expects the mlock/mlock2() to be successful (within rlimit) + * + * With allocated memory chunk [p, p + alloc_size), this + * test will choose start/len randomly to perform mlock/mlock2 + * [start, start + len] memory range. The range is within range + * of the allocated chunk. + * + * The memory region size alloc_size is within the rlimit. + * So we always expect a success of mlock/mlock2. + * + * VmLck is assumed to be 0 before this test. + * + * return value: 0 - success + * else: failure + */ +int test_mlock_within_limit(char *p, int alloc_size) +{ + int i; + int ret = 0; + int locked_vm_size = 0; + struct rlimit cur; + int page_size = 0; + + getrlimit(RLIMIT_MEMLOCK, &cur); + if (cur.rlim_cur < alloc_size) { + printf("alloc_size[%d] < %u rlimit,lead to mlock failure\n", + alloc_size, (unsigned int)cur.rlim_cur); + return -1; + } + + srand(time(NULL)); + for (i = 0; i < TEST_LOOP; i++) { + /* + * - choose mlock/mlock2 randomly + * - choose lock_size randomly but lock_size < alloc_size + * - choose start_offset randomly but p+start_offset+lock_size + * < p+alloc_size + */ + int is_mlock = !!(rand() % 2); + int lock_size = rand() % alloc_size; + int start_offset = rand() % (alloc_size - lock_size); + + if (is_mlock) + ret = mlock(p + start_offset, lock_size); + else + ret = mlock2_(p + start_offset, lock_size, + MLOCK_ONFAULT); + + if (ret) { + printf("%s() failure at |%p(%d)| mlock:|%p(%d)|\n", + is_mlock ? "mlock" : "mlock2", + p, alloc_size, + p + start_offset, lock_size); + return ret; + } + } + + /* + * Check VmLck left by the tests. + */ + locked_vm_size = get_proc_locked_vm_size(); + page_size = get_proc_page_size((unsigned long)p); + if (page_size == 0) { + printf("cannot get proc MMUPageSize\n"); + return -1; + } + + if (locked_vm_size > PAGE_ALIGN(alloc_size, page_size) + page_size) { + printf("test_mlock_within_limit() left VmLck:%d on %d chunk\n", + locked_vm_size, alloc_size); + return -1; + } + + return 0; +} + + +/* + * We expect the mlock/mlock2() to be fail (outof limitation) + * + * With allocated memory chunk [p, p + alloc_size), this + * test will randomly choose start/len and perform mlock/mlock2 + * on [start, start+len] range. + * + * The memory region size alloc_size is above the rlimit. + * And the len to be locked is higher than rlimit. + * So we always expect a failure of mlock/mlock2. + * No locked page number should be increased as a side effect. + * + * return value: 0 - success + * else: failure + */ +int test_mlock_outof_limit(char *p, int alloc_size) +{ + int i; + int ret = 0; + int locked_vm_size = 0, old_locked_vm_size = 0; + struct rlimit cur; + + getrlimit(RLIMIT_MEMLOCK, &cur); + if (cur.rlim_cur >= alloc_size) { + printf("alloc_size[%d] >%u rlimit, violates test condition\n", + alloc_size, (unsigned int)cur.rlim_cur); + return -1; + } + + old_locked_vm_size = get_proc_locked_vm_size(); + srand(time(NULL)); + for (i = 0; i < TEST_LOOP; i++) { + int is_mlock = !!(rand() % 2); + int lock_size = (rand() % (alloc_size - cur.rlim_cur)) + + cur.rlim_cur; + int start_offset = rand() % (alloc_size - lock_size); + + if (is_mlock) + ret = mlock(p + start_offset, lock_size); + else + ret = mlock2_(p + start_offset, lock_size, + MLOCK_ONFAULT); + if (ret == 0) { + printf("%s() succeeds? on %p(%d) mlock%p(%d)\n", + is_mlock ? "mlock" : "mlock2", + p, alloc_size, + p + start_offset, lock_size); + return -1; + } + } + + locked_vm_size = get_proc_locked_vm_size(); + if (locked_vm_size != old_locked_vm_size) { + printf("tests leads to new mlocked page: old[%d], new[%d]\n", + old_locked_vm_size, + locked_vm_size); + return -1; + } + + return 0; +} + +int main(int argc, char **argv) +{ + char *p = NULL; + int ret = 0; + + if (set_cap_limits(MLOCK_RLIMIT_SIZE)) + return -1; + + p = malloc(MLOCK_WITHIN_LIMIT_SIZE); + if (p == NULL) { + perror("malloc() failure\n"); + return -1; + } + ret = test_mlock_within_limit(p, MLOCK_WITHIN_LIMIT_SIZE); + if (ret) + return ret; + munlock(p, MLOCK_WITHIN_LIMIT_SIZE); + free(p); + + + p = malloc(MLOCK_OUTOF_LIMIT_SIZE); + if (p == NULL) { + perror("malloc() failure\n"); + return -1; + } + ret = test_mlock_outof_limit(p, MLOCK_OUTOF_LIMIT_SIZE); + if (ret) + return ret; + munlock(p, MLOCK_OUTOF_LIMIT_SIZE); + free(p); + + return 0; +} diff --git a/tools/testing/selftests/vm/mlock2-tests.c b/tools/testing/selftests/vm/mlock2-tests.c index 02ca5e0177c5..ff0cda2b19c9 100644 --- a/tools/testing/selftests/vm/mlock2-tests.c +++ b/tools/testing/selftests/vm/mlock2-tests.c @@ -1,33 +1,12 @@ #define _GNU_SOURCE #include <sys/mman.h> #include <stdint.h> -#include <stdio.h> -#include <stdlib.h> #include <unistd.h> #include <string.h> #include <sys/time.h> #include <sys/resource.h> -#include <syscall.h> -#include <errno.h> #include <stdbool.h> - -#ifndef MLOCK_ONFAULT -#define MLOCK_ONFAULT 1 -#endif - -#ifndef MCL_ONFAULT -#define MCL_ONFAULT (MCL_FUTURE << 1) -#endif - -static int mlock2_(void *start, size_t len, int flags) -{ -#ifdef __NR_mlock2 - return syscall(__NR_mlock2, start, len, flags); -#else - errno = ENOSYS; - return -1; -#endif -} +#include "mlock2.h" struct vm_boundaries { unsigned long start; @@ -138,46 +117,6 @@ static uint64_t get_kpageflags(unsigned long pfn) return flags; } -static FILE *seek_to_smaps_entry(unsigned long addr) -{ - FILE *file; - char *line = NULL; - size_t size = 0; - unsigned long start, end; - char perms[5]; - unsigned long offset; - char dev[32]; - unsigned long inode; - char path[BUFSIZ]; - - file = fopen("/proc/self/smaps", "r"); - if (!file) { - perror("fopen smaps"); - _exit(1); - } - - while (getline(&line, &size, file) > 0) { - if (sscanf(line, "%lx-%lx %s %lx %s %lu %s\n", - &start, &end, perms, &offset, dev, &inode, path) < 6) - goto next; - - if (start <= addr && addr < end) - goto out; - -next: - free(line); - line = NULL; - size = 0; - } - - fclose(file); - file = NULL; - -out: - free(line); - return file; -} - #define VMFLAGS "VmFlags:" static bool is_vmflag_set(unsigned long addr, const char *vmflag) diff --git a/tools/testing/selftests/vm/mlock2.h b/tools/testing/selftests/vm/mlock2.h new file mode 100644 index 000000000000..7ee062929d3e --- /dev/null +++ b/tools/testing/selftests/vm/mlock2.h @@ -0,0 +1,62 @@ +#include <syscall.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> + +#ifndef MLOCK_ONFAULT +#define MLOCK_ONFAULT 1 +#endif + +#ifndef MCL_ONFAULT +#define MCL_ONFAULT (MCL_FUTURE << 1) +#endif + +static int mlock2_(void *start, size_t len, int flags) +{ +#ifdef __NR_mlock2 + return syscall(__NR_mlock2, start, len, flags); +#else + errno = ENOSYS; + return -1; +#endif +} + +static FILE *seek_to_smaps_entry(unsigned long addr) +{ + FILE *file; + char *line = NULL; + size_t size = 0; + unsigned long start, end; + char perms[5]; + unsigned long offset; + char dev[32]; + unsigned long inode; + char path[BUFSIZ]; + + file = fopen("/proc/self/smaps", "r"); + if (!file) { + perror("fopen smaps"); + _exit(1); + } + + while (getline(&line, &size, file) > 0) { + if (sscanf(line, "%lx-%lx %s %lx %s %lu %s\n", + &start, &end, perms, &offset, dev, &inode, path) < 6) + goto next; + + if (start <= addr && addr < end) + goto out; + +next: + free(line); + line = NULL; + size = 0; + } + + fclose(file); + file = NULL; + +out: + free(line); + return file; +} diff --git a/tools/testing/selftests/watchdog/.gitignore b/tools/testing/selftests/watchdog/.gitignore new file mode 100644 index 000000000000..5aac51575c7e --- /dev/null +++ b/tools/testing/selftests/watchdog/.gitignore @@ -0,0 +1 @@ +watchdog-test diff --git a/tools/testing/selftests/watchdog/Makefile b/tools/testing/selftests/watchdog/Makefile new file mode 100644 index 000000000000..f863c664e3d1 --- /dev/null +++ b/tools/testing/selftests/watchdog/Makefile @@ -0,0 +1,8 @@ +TEST_PROGS := watchdog-test + +all: $(TEST_PROGS) + +include ../lib.mk + +clean: + rm -fr $(TEST_PROGS) diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c new file mode 100644 index 000000000000..6983d05097e2 --- /dev/null +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -0,0 +1,105 @@ +/* + * Watchdog Driver Test Program + */ + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <signal.h> +#include <sys/ioctl.h> +#include <linux/types.h> +#include <linux/watchdog.h> + +int fd; +const char v = 'V'; + +/* + * This function simply sends an IOCTL to the driver, which in turn ticks + * the PC Watchdog card to reset its internal timer so it doesn't trigger + * a computer reset. + */ +static void keep_alive(void) +{ + int dummy; + + printf("."); + ioctl(fd, WDIOC_KEEPALIVE, &dummy); +} + +/* + * The main program. Run the program with "-d" to disable the card, + * or "-e" to enable the card. + */ + +static void term(int sig) +{ + int ret = write(fd, &v, 1); + + close(fd); + if (ret < 0) + printf("\nStopping watchdog ticks failed (%d)...\n", errno); + else + printf("\nStopping watchdog ticks...\n"); + exit(0); +} + +int main(int argc, char *argv[]) +{ + int flags; + unsigned int ping_rate = 1; + int ret; + + setbuf(stdout, NULL); + + fd = open("/dev/watchdog", O_WRONLY); + + if (fd == -1) { + printf("Watchdog device not enabled.\n"); + exit(-1); + } + + if (argc > 1) { + if (!strncasecmp(argv[1], "-d", 2)) { + flags = WDIOS_DISABLECARD; + ioctl(fd, WDIOC_SETOPTIONS, &flags); + printf("Watchdog card disabled.\n"); + goto end; + } else if (!strncasecmp(argv[1], "-e", 2)) { + flags = WDIOS_ENABLECARD; + ioctl(fd, WDIOC_SETOPTIONS, &flags); + printf("Watchdog card enabled.\n"); + goto end; + } else if (!strncasecmp(argv[1], "-t", 2) && argv[2]) { + flags = atoi(argv[2]); + ioctl(fd, WDIOC_SETTIMEOUT, &flags); + printf("Watchdog timeout set to %u seconds.\n", flags); + goto end; + } else if (!strncasecmp(argv[1], "-p", 2) && argv[2]) { + ping_rate = strtoul(argv[2], NULL, 0); + printf("Watchdog ping rate set to %u seconds.\n", ping_rate); + } else { + printf("-d to disable, -e to enable, -t <n> to set " \ + "the timeout,\n-p <n> to set the ping rate, and \n"); + printf("run by itself to tick the card.\n"); + goto end; + } + } + + printf("Watchdog Ticking Away!\n"); + + signal(SIGINT, term); + + while(1) { + keep_alive(); + sleep(ping_rate); + } +end: + ret = write(fd, &v, 1); + if (ret < 0) + printf("Stopping watchdog ticks failed (%d)...\n", errno); + close(fd); + return 0; +} diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 4f747ee07f10..a89f80a5b711 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -5,7 +5,8 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ - check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test + check_initial_reg_state sigreturn ldt_gdt iopl \ + protection_keys TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h new file mode 100644 index 000000000000..b20293956eec --- /dev/null +++ b/tools/testing/selftests/x86/pkey-helpers.h @@ -0,0 +1,219 @@ +#ifndef _PKEYS_HELPER_H +#define _PKEYS_HELPER_H +#define _GNU_SOURCE +#include <string.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <signal.h> +#include <assert.h> +#include <stdlib.h> +#include <ucontext.h> +#include <sys/mman.h> + +#define NR_PKEYS 16 +#define PKRU_BITS_PER_PKEY 2 + +#ifndef DEBUG_LEVEL +#define DEBUG_LEVEL 0 +#endif +#define DPRINT_IN_SIGNAL_BUF_SIZE 4096 +extern int dprint_in_signal; +extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; +static inline void sigsafe_printf(const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + if (!dprint_in_signal) { + vprintf(format, ap); + } else { + int len = vsnprintf(dprint_in_signal_buffer, + DPRINT_IN_SIGNAL_BUF_SIZE, + format, ap); + /* + * len is amount that would have been printed, + * but actual write is truncated at BUF_SIZE. + */ + if (len > DPRINT_IN_SIGNAL_BUF_SIZE) + len = DPRINT_IN_SIGNAL_BUF_SIZE; + write(1, dprint_in_signal_buffer, len); + } + va_end(ap); +} +#define dprintf_level(level, args...) do { \ + if (level <= DEBUG_LEVEL) \ + sigsafe_printf(args); \ + fflush(NULL); \ +} while (0) +#define dprintf0(args...) dprintf_level(0, args) +#define dprintf1(args...) dprintf_level(1, args) +#define dprintf2(args...) dprintf_level(2, args) +#define dprintf3(args...) dprintf_level(3, args) +#define dprintf4(args...) dprintf_level(4, args) + +extern unsigned int shadow_pkru; +static inline unsigned int __rdpkru(void) +{ + unsigned int eax, edx; + unsigned int ecx = 0; + unsigned int pkru; + + asm volatile(".byte 0x0f,0x01,0xee\n\t" + : "=a" (eax), "=d" (edx) + : "c" (ecx)); + pkru = eax; + return pkru; +} + +static inline unsigned int _rdpkru(int line) +{ + unsigned int pkru = __rdpkru(); + + dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n", + line, pkru, shadow_pkru); + assert(pkru == shadow_pkru); + + return pkru; +} + +#define rdpkru() _rdpkru(__LINE__) + +static inline void __wrpkru(unsigned int pkru) +{ + unsigned int eax = pkru; + unsigned int ecx = 0; + unsigned int edx = 0; + + dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (eax), "c" (ecx), "d" (edx)); + assert(pkru == __rdpkru()); +} + +static inline void wrpkru(unsigned int pkru) +{ + dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); + /* will do the shadow check for us: */ + rdpkru(); + __wrpkru(pkru); + shadow_pkru = pkru; + dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru()); +} + +/* + * These are technically racy. since something could + * change PKRU between the read and the write. + */ +static inline void __pkey_access_allow(int pkey, int do_allow) +{ + unsigned int pkru = rdpkru(); + int bit = pkey * 2; + + if (do_allow) + pkru &= (1<<bit); + else + pkru |= (1<<bit); + + dprintf4("pkru now: %08x\n", rdpkru()); + wrpkru(pkru); +} + +static inline void __pkey_write_allow(int pkey, int do_allow_write) +{ + long pkru = rdpkru(); + int bit = pkey * 2 + 1; + + if (do_allow_write) + pkru &= (1<<bit); + else + pkru |= (1<<bit); + + wrpkru(pkru); + dprintf4("pkru now: %08x\n", rdpkru()); +} + +#define PROT_PKEY0 0x10 /* protection key value (bit 0) */ +#define PROT_PKEY1 0x20 /* protection key value (bit 1) */ +#define PROT_PKEY2 0x40 /* protection key value (bit 2) */ +#define PROT_PKEY3 0x80 /* protection key value (bit 3) */ + +#define PAGE_SIZE 4096 +#define MB (1<<20) + +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile( + "cpuid;" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */ +#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */ + +static inline int cpu_has_pku(void) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + eax = 0x7; + ecx = 0x0; + __cpuid(&eax, &ebx, &ecx, &edx); + + if (!(ecx & X86_FEATURE_PKU)) { + dprintf2("cpu does not have PKU\n"); + return 0; + } + if (!(ecx & X86_FEATURE_OSPKE)) { + dprintf2("cpu does not have OSPKE\n"); + return 0; + } + return 1; +} + +#define XSTATE_PKRU_BIT (9) +#define XSTATE_PKRU 0x200 + +int pkru_xstate_offset(void) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + int xstate_offset; + int xstate_size; + unsigned long XSTATE_CPUID = 0xd; + int leaf; + + /* assume that XSTATE_PKRU is set in XCR0 */ + leaf = XSTATE_PKRU_BIT; + { + eax = XSTATE_CPUID; + ecx = leaf; + __cpuid(&eax, &ebx, &ecx, &edx); + + if (leaf == XSTATE_PKRU_BIT) { + xstate_offset = ebx; + xstate_size = eax; + } + } + + if (xstate_size == 0) { + printf("could not find size/offset of PKRU in xsave state\n"); + return 0; + } + + return xstate_offset; +} + +#endif /* _PKEYS_HELPER_H */ diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c new file mode 100644 index 000000000000..bdd58c78902e --- /dev/null +++ b/tools/testing/selftests/x86/protection_keys.c @@ -0,0 +1,1410 @@ +/* + * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt) + * + * There are examples in here of: + * * how to set protection keys on memory + * * how to set/clear bits in PKRU (the rights register) + * * how to handle SEGV_PKRU signals and extract pkey-relevant + * information from the siginfo + * + * Things to add: + * make sure KSM and KSM COW breaking works + * prefault pages in at malloc, or not + * protect MPX bounds tables with protection keys? + * make sure VMA splitting/merging is working correctly + * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys + * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel + * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks + * + * Compile like this: + * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + */ +#define _GNU_SOURCE +#include <errno.h> +#include <linux/futex.h> +#include <sys/time.h> +#include <sys/syscall.h> +#include <string.h> +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <signal.h> +#include <assert.h> +#include <stdlib.h> +#include <ucontext.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/ptrace.h> +#include <setjmp.h> + +#include "pkey-helpers.h" + +int iteration_nr = 1; +int test_nr; + +unsigned int shadow_pkru; + +#define HPAGE_SIZE (1UL<<21) +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) +#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) +#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) +#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) +#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to)) +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) + +int dprint_in_signal; +char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; + +extern void abort_hooks(void); +#define pkey_assert(condition) do { \ + if (!(condition)) { \ + dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \ + __FILE__, __LINE__, \ + test_nr, iteration_nr); \ + dprintf0("errno at assert: %d", errno); \ + abort_hooks(); \ + assert(condition); \ + } \ +} while (0) +#define raw_assert(cond) assert(cond) + +void cat_into_file(char *str, char *file) +{ + int fd = open(file, O_RDWR); + int ret; + + dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file); + /* + * these need to be raw because they are called under + * pkey_assert() + */ + raw_assert(fd >= 0); + ret = write(fd, str, strlen(str)); + if (ret != strlen(str)) { + perror("write to file failed"); + fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); + raw_assert(0); + } + close(fd); +} + +#if CONTROL_TRACING > 0 +static int warned_tracing; +int tracing_root_ok(void) +{ + if (geteuid() != 0) { + if (!warned_tracing) + fprintf(stderr, "WARNING: not run as root, " + "can not do tracing control\n"); + warned_tracing = 1; + return 0; + } + return 1; +} +#endif + +void tracing_on(void) +{ +#if CONTROL_TRACING > 0 +#define TRACEDIR "/sys/kernel/debug/tracing" + char pidstr[32]; + + if (!tracing_root_ok()) + return; + + sprintf(pidstr, "%d", getpid()); + cat_into_file("0", TRACEDIR "/tracing_on"); + cat_into_file("\n", TRACEDIR "/trace"); + if (1) { + cat_into_file("function_graph", TRACEDIR "/current_tracer"); + cat_into_file("1", TRACEDIR "/options/funcgraph-proc"); + } else { + cat_into_file("nop", TRACEDIR "/current_tracer"); + } + cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid"); + cat_into_file("1", TRACEDIR "/tracing_on"); + dprintf1("enabled tracing\n"); +#endif +} + +void tracing_off(void) +{ +#if CONTROL_TRACING > 0 + if (!tracing_root_ok()) + return; + cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on"); +#endif +} + +void abort_hooks(void) +{ + fprintf(stderr, "running %s()...\n", __func__); + tracing_off(); +#ifdef SLEEP_ON_ABORT + sleep(SLEEP_ON_ABORT); +#endif +} + +static inline void __page_o_noops(void) +{ + /* 8-bytes of instruction * 512 bytes = 1 page */ + asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); +} + +/* + * This attempts to have roughly a page of instructions followed by a few + * instructions that do a write, and another page of instructions. That + * way, we are pretty sure that the write is in the second page of + * instructions and has at least a page of padding behind it. + * + * *That* lets us be sure to madvise() away the write instruction, which + * will then fault, which makes sure that the fault code handles + * execute-only memory properly. + */ +__attribute__((__aligned__(PAGE_SIZE))) +void lots_o_noops_around_write(int *write_to_me) +{ + dprintf3("running %s()\n", __func__); + __page_o_noops(); + /* Assume this happens in the second page of instructions: */ + *write_to_me = __LINE__; + /* pad out by another page: */ + __page_o_noops(); + dprintf3("%s() done\n", __func__); +} + +/* Define some kernel-like types */ +#define u8 uint8_t +#define u16 uint16_t +#define u32 uint32_t +#define u64 uint64_t + +#ifdef __i386__ +#define SYS_mprotect_key 380 +#define SYS_pkey_alloc 381 +#define SYS_pkey_free 382 +#define REG_IP_IDX REG_EIP +#define si_pkey_offset 0x18 +#else +#define SYS_mprotect_key 329 +#define SYS_pkey_alloc 330 +#define SYS_pkey_free 331 +#define REG_IP_IDX REG_RIP +#define si_pkey_offset 0x20 +#endif + +void dump_mem(void *dumpme, int len_bytes) +{ + char *c = (void *)dumpme; + int i; + + for (i = 0; i < len_bytes; i += sizeof(u64)) { + u64 *ptr = (u64 *)(c + i); + dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr); + } +} + +#define __SI_FAULT (3 << 16) +#define SEGV_BNDERR (__SI_FAULT|3) /* failed address bound checks */ +#define SEGV_PKUERR (__SI_FAULT|4) + +static char *si_code_str(int si_code) +{ + if (si_code & SEGV_MAPERR) + return "SEGV_MAPERR"; + if (si_code & SEGV_ACCERR) + return "SEGV_ACCERR"; + if (si_code & SEGV_BNDERR) + return "SEGV_BNDERR"; + if (si_code & SEGV_PKUERR) + return "SEGV_PKUERR"; + return "UNKNOWN"; +} + +int pkru_faults; +int last_si_pkey = -1; +void signal_handler(int signum, siginfo_t *si, void *vucontext) +{ + ucontext_t *uctxt = vucontext; + int trapno; + unsigned long ip; + char *fpregs; + u32 *pkru_ptr; + u64 si_pkey; + u32 *si_pkey_ptr; + int pkru_offset; + fpregset_t fpregset; + + dprint_in_signal = 1; + dprintf1(">>>>===============SIGSEGV============================\n"); + dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__, + __rdpkru(), shadow_pkru); + + trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; + ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; + fpregset = uctxt->uc_mcontext.fpregs; + fpregs = (void *)fpregset; + + dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__, + trapno, ip, si_code_str(si->si_code), si->si_code); +#ifdef __i386__ + /* + * 32-bit has some extra padding so that userspace can tell whether + * the XSTATE header is present in addition to the "legacy" FPU + * state. We just assume that it is here. + */ + fpregs += 0x70; +#endif + pkru_offset = pkru_xstate_offset(); + pkru_ptr = (void *)(&fpregs[pkru_offset]); + + dprintf1("siginfo: %p\n", si); + dprintf1(" fpregs: %p\n", fpregs); + /* + * If we got a PKRU fault, we *HAVE* to have at least one bit set in + * here. + */ + dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset()); + if (DEBUG_LEVEL > 4) + dump_mem(pkru_ptr - 128, 256); + pkey_assert(*pkru_ptr); + + si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); + dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); + dump_mem(si_pkey_ptr - 8, 24); + si_pkey = *si_pkey_ptr; + pkey_assert(si_pkey < NR_PKEYS); + last_si_pkey = si_pkey; + + if ((si->si_code == SEGV_MAPERR) || + (si->si_code == SEGV_ACCERR) || + (si->si_code == SEGV_BNDERR)) { + printf("non-PK si_code, exiting...\n"); + exit(4); + } + + dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); + /* need __rdpkru() version so we do not do shadow_pkru checking */ + dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); + dprintf1("si_pkey from siginfo: %jx\n", si_pkey); + *(u64 *)pkru_ptr = 0x00000000; + dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); + pkru_faults++; + dprintf1("<<<<==================================================\n"); + return; + if (trapno == 14) { + fprintf(stderr, + "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n", + trapno, ip); + fprintf(stderr, "si_addr %p\n", si->si_addr); + fprintf(stderr, "REG_ERR: %lx\n", + (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); + exit(1); + } else { + fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip); + fprintf(stderr, "si_addr %p\n", si->si_addr); + fprintf(stderr, "REG_ERR: %lx\n", + (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); + exit(2); + } + dprint_in_signal = 0; +} + +int wait_all_children(void) +{ + int status; + return waitpid(-1, &status, 0); +} + +void sig_chld(int x) +{ + dprint_in_signal = 1; + dprintf2("[%d] SIGCHLD: %d\n", getpid(), x); + dprint_in_signal = 0; +} + +void setup_sigsegv_handler(void) +{ + int r, rs; + struct sigaction newact; + struct sigaction oldact; + + /* #PF is mapped to sigsegv */ + int signum = SIGSEGV; + + newact.sa_handler = 0; + newact.sa_sigaction = signal_handler; + + /*sigset_t - signals to block while in the handler */ + /* get the old signal mask. */ + rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask); + pkey_assert(rs == 0); + + /* call sa_sigaction, not sa_handler*/ + newact.sa_flags = SA_SIGINFO; + + newact.sa_restorer = 0; /* void(*)(), obsolete */ + r = sigaction(signum, &newact, &oldact); + r = sigaction(SIGALRM, &newact, &oldact); + pkey_assert(r == 0); +} + +void setup_handlers(void) +{ + signal(SIGCHLD, &sig_chld); + setup_sigsegv_handler(); +} + +pid_t fork_lazy_child(void) +{ + pid_t forkret; + + forkret = fork(); + pkey_assert(forkret >= 0); + dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); + + if (!forkret) { + /* in the child */ + while (1) { + dprintf1("child sleeping...\n"); + sleep(30); + } + } + return forkret; +} + +void davecmp(void *_a, void *_b, int len) +{ + int i; + unsigned long *a = _a; + unsigned long *b = _b; + + for (i = 0; i < len / sizeof(*a); i++) { + if (a[i] == b[i]) + continue; + + dprintf3("[%3d]: a: %016lx b: %016lx\n", i, a[i], b[i]); + } +} + +void dumpit(char *f) +{ + int fd = open(f, O_RDONLY); + char buf[100]; + int nr_read; + + dprintf2("maps fd: %d\n", fd); + do { + nr_read = read(fd, &buf[0], sizeof(buf)); + write(1, buf, nr_read); + } while (nr_read > 0); + close(fd); +} + +#define PKEY_DISABLE_ACCESS 0x1 +#define PKEY_DISABLE_WRITE 0x2 + +u32 pkey_get(int pkey, unsigned long flags) +{ + u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); + u32 pkru = __rdpkru(); + u32 shifted_pkru; + u32 masked_pkru; + + dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", + __func__, pkey, flags, 0, 0); + dprintf2("%s() raw pkru: %x\n", __func__, pkru); + + shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY)); + dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru); + masked_pkru = shifted_pkru & mask; + dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru); + /* + * shift down the relevant bits to the lowest two, then + * mask off all the other high bits. + */ + return masked_pkru; +} + +int pkey_set(int pkey, unsigned long rights, unsigned long flags) +{ + u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); + u32 old_pkru = __rdpkru(); + u32 new_pkru; + + /* make sure that 'rights' only contains the bits we expect: */ + assert(!(rights & ~mask)); + + /* copy old pkru */ + new_pkru = old_pkru; + /* mask out bits from pkey in old value: */ + new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY)); + /* OR in new bits for pkey: */ + new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY)); + + __wrpkru(new_pkru); + + dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n", + __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru); + return 0; +} + +void pkey_disable_set(int pkey, int flags) +{ + unsigned long syscall_flags = 0; + int ret; + int pkey_rights; + u32 orig_pkru; + + dprintf1("START->%s(%d, 0x%x)\n", __func__, + pkey, flags); + pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); + + pkey_rights = pkey_get(pkey, syscall_flags); + + dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + pkey_assert(pkey_rights >= 0); + + pkey_rights |= flags; + + ret = pkey_set(pkey, pkey_rights, syscall_flags); + assert(!ret); + /*pkru and flags have the same format */ + shadow_pkru |= flags << (pkey * 2); + dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru); + + pkey_assert(ret >= 0); + + pkey_rights = pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + + dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); + if (flags) + pkey_assert(rdpkru() > orig_pkru); + dprintf1("END<---%s(%d, 0x%x)\n", __func__, + pkey, flags); +} + +void pkey_disable_clear(int pkey, int flags) +{ + unsigned long syscall_flags = 0; + int ret; + int pkey_rights = pkey_get(pkey, syscall_flags); + u32 orig_pkru = rdpkru(); + + pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); + + dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + pkey_assert(pkey_rights >= 0); + + pkey_rights |= flags; + + ret = pkey_set(pkey, pkey_rights, 0); + /* pkru and flags have the same format */ + shadow_pkru &= ~(flags << (pkey * 2)); + pkey_assert(ret >= 0); + + pkey_rights = pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + + dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); + if (flags) + assert(rdpkru() > orig_pkru); +} + +void pkey_write_allow(int pkey) +{ + pkey_disable_clear(pkey, PKEY_DISABLE_WRITE); +} +void pkey_write_deny(int pkey) +{ + pkey_disable_set(pkey, PKEY_DISABLE_WRITE); +} +void pkey_access_allow(int pkey) +{ + pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS); +} +void pkey_access_deny(int pkey) +{ + pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); +} + +int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, + unsigned long pkey) +{ + int sret; + + dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__, + ptr, size, orig_prot, pkey); + + errno = 0; + sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey); + if (errno) { + dprintf2("SYS_mprotect_key sret: %d\n", sret); + dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); + dprintf2("SYS_mprotect_key failed, errno: %d\n", errno); + if (DEBUG_LEVEL >= 2) + perror("SYS_mprotect_pkey"); + } + return sret; +} + +int sys_pkey_alloc(unsigned long flags, unsigned long init_val) +{ + int ret = syscall(SYS_pkey_alloc, flags, init_val); + dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", + __func__, flags, init_val, ret, errno); + return ret; +} + +int alloc_pkey(void) +{ + int ret; + unsigned long init_val = 0x0; + + dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n", + __LINE__, __rdpkru(), shadow_pkru); + ret = sys_pkey_alloc(0, init_val); + /* + * pkey_alloc() sets PKRU, so we need to reflect it in + * shadow_pkru: + */ + dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", + __LINE__, ret, __rdpkru(), shadow_pkru); + if (ret) { + /* clear both the bits: */ + shadow_pkru &= ~(0x3 << (ret * 2)); + dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", + __LINE__, ret, __rdpkru(), shadow_pkru); + /* + * move the new state in from init_val + * (remember, we cheated and init_val == pkru format) + */ + shadow_pkru |= (init_val << (ret * 2)); + } + dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", + __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno); + /* for shadow checking: */ + rdpkru(); + dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", + __LINE__, ret, __rdpkru(), shadow_pkru); + return ret; +} + +int sys_pkey_free(unsigned long pkey) +{ + int ret = syscall(SYS_pkey_free, pkey); + dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); + return ret; +} + +/* + * I had a bug where pkey bits could be set by mprotect() but + * not cleared. This ensures we get lots of random bit sets + * and clears on the vma and pte pkey bits. + */ +int alloc_random_pkey(void) +{ + int max_nr_pkey_allocs; + int ret; + int i; + int alloced_pkeys[NR_PKEYS]; + int nr_alloced = 0; + int random_index; + memset(alloced_pkeys, 0, sizeof(alloced_pkeys)); + + /* allocate every possible key and make a note of which ones we got */ + max_nr_pkey_allocs = NR_PKEYS; + max_nr_pkey_allocs = 1; + for (i = 0; i < max_nr_pkey_allocs; i++) { + int new_pkey = alloc_pkey(); + if (new_pkey < 0) + break; + alloced_pkeys[nr_alloced++] = new_pkey; + } + + pkey_assert(nr_alloced > 0); + /* select a random one out of the allocated ones */ + random_index = rand() % nr_alloced; + ret = alloced_pkeys[random_index]; + /* now zero it out so we don't free it next */ + alloced_pkeys[random_index] = 0; + + /* go through the allocated ones that we did not want and free them */ + for (i = 0; i < nr_alloced; i++) { + int free_ret; + if (!alloced_pkeys[i]) + continue; + free_ret = sys_pkey_free(alloced_pkeys[i]); + pkey_assert(!free_ret); + } + dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkru(), shadow_pkru); + return ret; +} + +int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, + unsigned long pkey) +{ + int nr_iterations = random() % 100; + int ret; + + while (0) { + int rpkey = alloc_random_pkey(); + ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); + dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", + ptr, size, orig_prot, pkey, ret); + if (nr_iterations-- < 0) + break; + + dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkru(), shadow_pkru); + sys_pkey_free(rpkey); + dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkru(), shadow_pkru); + } + pkey_assert(pkey < NR_PKEYS); + + ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); + dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", + ptr, size, orig_prot, pkey, ret); + pkey_assert(!ret); + dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkru(), shadow_pkru); + return ret; +} + +struct pkey_malloc_record { + void *ptr; + long size; +}; +struct pkey_malloc_record *pkey_malloc_records; +long nr_pkey_malloc_records; +void record_pkey_malloc(void *ptr, long size) +{ + long i; + struct pkey_malloc_record *rec = NULL; + + for (i = 0; i < nr_pkey_malloc_records; i++) { + rec = &pkey_malloc_records[i]; + /* find a free record */ + if (rec) + break; + } + if (!rec) { + /* every record is full */ + size_t old_nr_records = nr_pkey_malloc_records; + size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1); + size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record); + dprintf2("new_nr_records: %zd\n", new_nr_records); + dprintf2("new_size: %zd\n", new_size); + pkey_malloc_records = realloc(pkey_malloc_records, new_size); + pkey_assert(pkey_malloc_records != NULL); + rec = &pkey_malloc_records[nr_pkey_malloc_records]; + /* + * realloc() does not initialize memory, so zero it from + * the first new record all the way to the end. + */ + for (i = 0; i < new_nr_records - old_nr_records; i++) + memset(rec + i, 0, sizeof(*rec)); + } + dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n", + (int)(rec - pkey_malloc_records), rec, ptr, size); + rec->ptr = ptr; + rec->size = size; + nr_pkey_malloc_records++; +} + +void free_pkey_malloc(void *ptr) +{ + long i; + int ret; + dprintf3("%s(%p)\n", __func__, ptr); + for (i = 0; i < nr_pkey_malloc_records; i++) { + struct pkey_malloc_record *rec = &pkey_malloc_records[i]; + dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n", + ptr, i, rec, rec->ptr, rec->size); + if ((ptr < rec->ptr) || + (ptr >= rec->ptr + rec->size)) + continue; + + dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n", + ptr, i, rec, rec->ptr, rec->size); + nr_pkey_malloc_records--; + ret = munmap(rec->ptr, rec->size); + dprintf3("munmap ret: %d\n", ret); + pkey_assert(!ret); + dprintf3("clearing rec->ptr, rec: %p\n", rec); + rec->ptr = NULL; + dprintf3("done clearing rec->ptr, rec: %p\n", rec); + return; + } + pkey_assert(false); +} + + +void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) +{ + void *ptr; + int ret; + + rdpkru(); + dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, + size, prot, pkey); + pkey_assert(pkey < NR_PKEYS); + ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + pkey_assert(ptr != (void *)-1); + ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); + pkey_assert(!ret); + record_pkey_malloc(ptr, size); + rdpkru(); + + dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); + return ptr; +} + +void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) +{ + int ret; + void *ptr; + + dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, + size, prot, pkey); + /* + * Guarantee we can fit at least one huge page in the resulting + * allocation by allocating space for 2: + */ + size = ALIGN_UP(size, HPAGE_SIZE * 2); + ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + pkey_assert(ptr != (void *)-1); + record_pkey_malloc(ptr, size); + mprotect_pkey(ptr, size, prot, pkey); + + dprintf1("unaligned ptr: %p\n", ptr); + ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE); + dprintf1(" aligned ptr: %p\n", ptr); + ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE); + dprintf1("MADV_HUGEPAGE ret: %d\n", ret); + ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED); + dprintf1("MADV_WILLNEED ret: %d\n", ret); + memset(ptr, 0, HPAGE_SIZE); + + dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr); + return ptr; +} + +int hugetlb_setup_ok; +#define GET_NR_HUGE_PAGES 10 +void setup_hugetlbfs(void) +{ + int err; + int fd; + int validated_nr_pages; + int i; + char buf[] = "123"; + + if (geteuid() != 0) { + fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n"); + return; + } + + cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages"); + + /* + * Now go make sure that we got the pages and that they + * are 2M pages. Someone might have made 1G the default. + */ + fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY); + if (fd < 0) { + perror("opening sysfs 2M hugetlb config"); + return; + } + + /* -1 to guarantee leaving the trailing \0 */ + err = read(fd, buf, sizeof(buf)-1); + close(fd); + if (err <= 0) { + perror("reading sysfs 2M hugetlb config"); + return; + } + + if (atoi(buf) != GET_NR_HUGE_PAGES) { + fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n", + buf, GET_NR_HUGE_PAGES); + return; + } + + hugetlb_setup_ok = 1; +} + +void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) +{ + void *ptr; + int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB; + + if (!hugetlb_setup_ok) + return PTR_ERR_ENOTSUP; + + dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey); + size = ALIGN_UP(size, HPAGE_SIZE * 2); + pkey_assert(pkey < NR_PKEYS); + ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0); + pkey_assert(ptr != (void *)-1); + mprotect_pkey(ptr, size, prot, pkey); + + record_pkey_malloc(ptr, size); + + dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); + return ptr; +} + +void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) +{ + void *ptr; + int fd; + + dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, + size, prot, pkey); + pkey_assert(pkey < NR_PKEYS); + fd = open("/dax/foo", O_RDWR); + pkey_assert(fd >= 0); + + ptr = mmap(0, size, prot, MAP_SHARED, fd, 0); + pkey_assert(ptr != (void *)-1); + + mprotect_pkey(ptr, size, prot, pkey); + + record_pkey_malloc(ptr, size); + + dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); + close(fd); + return ptr; +} + +void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { + + malloc_pkey_with_mprotect, + malloc_pkey_anon_huge, + malloc_pkey_hugetlb +/* can not do direct with the pkey_mprotect() API: + malloc_pkey_mmap_direct, + malloc_pkey_mmap_dax, +*/ +}; + +void *malloc_pkey(long size, int prot, u16 pkey) +{ + void *ret; + static int malloc_type; + int nr_malloc_types = ARRAY_SIZE(pkey_malloc); + + pkey_assert(pkey < NR_PKEYS); + + while (1) { + pkey_assert(malloc_type < nr_malloc_types); + + ret = pkey_malloc[malloc_type](size, prot, pkey); + pkey_assert(ret != (void *)-1); + + malloc_type++; + if (malloc_type >= nr_malloc_types) + malloc_type = (random()%nr_malloc_types); + + /* try again if the malloc_type we tried is unsupported */ + if (ret == PTR_ERR_ENOTSUP) + continue; + + break; + } + + dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__, + size, prot, pkey, ret); + return ret; +} + +int last_pkru_faults; +void expected_pk_fault(int pkey) +{ + dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", + __func__, last_pkru_faults, pkru_faults); + dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); + pkey_assert(last_pkru_faults + 1 == pkru_faults); + pkey_assert(last_si_pkey == pkey); + /* + * The signal handler shold have cleared out PKRU to let the + * test program continue. We now have to restore it. + */ + if (__rdpkru() != 0) + pkey_assert(0); + + __wrpkru(shadow_pkru); + dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n", + __func__, shadow_pkru); + last_pkru_faults = pkru_faults; + last_si_pkey = -1; +} + +void do_not_expect_pk_fault(void) +{ + pkey_assert(last_pkru_faults == pkru_faults); +} + +int test_fds[10] = { -1 }; +int nr_test_fds; +void __save_test_fd(int fd) +{ + pkey_assert(fd >= 0); + pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds)); + test_fds[nr_test_fds] = fd; + nr_test_fds++; +} + +int get_test_read_fd(void) +{ + int test_fd = open("/etc/passwd", O_RDONLY); + __save_test_fd(test_fd); + return test_fd; +} + +void close_test_fds(void) +{ + int i; + + for (i = 0; i < nr_test_fds; i++) { + if (test_fds[i] < 0) + continue; + close(test_fds[i]); + test_fds[i] = -1; + } + nr_test_fds = 0; +} + +#define barrier() __asm__ __volatile__("": : :"memory") +__attribute__((noinline)) int read_ptr(int *ptr) +{ + /* + * Keep GCC from optimizing this away somehow + */ + barrier(); + return *ptr; +} + +void test_read_of_write_disabled_region(int *ptr, u16 pkey) +{ + int ptr_contents; + + dprintf1("disabling write access to PKEY[1], doing read\n"); + pkey_write_deny(pkey); + ptr_contents = read_ptr(ptr); + dprintf1("*ptr: %d\n", ptr_contents); + dprintf1("\n"); +} +void test_read_of_access_disabled_region(int *ptr, u16 pkey) +{ + int ptr_contents; + + dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); + rdpkru(); + pkey_access_deny(pkey); + ptr_contents = read_ptr(ptr); + dprintf1("*ptr: %d\n", ptr_contents); + expected_pk_fault(pkey); +} +void test_write_of_write_disabled_region(int *ptr, u16 pkey) +{ + dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); + pkey_write_deny(pkey); + *ptr = __LINE__; + expected_pk_fault(pkey); +} +void test_write_of_access_disabled_region(int *ptr, u16 pkey) +{ + dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); + pkey_access_deny(pkey); + *ptr = __LINE__; + expected_pk_fault(pkey); +} +void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) +{ + int ret; + int test_fd = get_test_read_fd(); + + dprintf1("disabling access to PKEY[%02d], " + "having kernel read() to buffer\n", pkey); + pkey_access_deny(pkey); + ret = read(test_fd, ptr, 1); + dprintf1("read ret: %d\n", ret); + pkey_assert(ret); +} +void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) +{ + int ret; + int test_fd = get_test_read_fd(); + + pkey_write_deny(pkey); + ret = read(test_fd, ptr, 100); + dprintf1("read ret: %d\n", ret); + if (ret < 0 && (DEBUG_LEVEL > 0)) + perror("verbose read result (OK for this to be bad)"); + pkey_assert(ret); +} + +void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) +{ + int pipe_ret, vmsplice_ret; + struct iovec iov; + int pipe_fds[2]; + + pipe_ret = pipe(pipe_fds); + + pkey_assert(pipe_ret == 0); + dprintf1("disabling access to PKEY[%02d], " + "having kernel vmsplice from buffer\n", pkey); + pkey_access_deny(pkey); + iov.iov_base = ptr; + iov.iov_len = PAGE_SIZE; + vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT); + dprintf1("vmsplice() ret: %d\n", vmsplice_ret); + pkey_assert(vmsplice_ret == -1); + + close(pipe_fds[0]); + close(pipe_fds[1]); +} + +void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) +{ + int ignored = 0xdada; + int futex_ret; + int some_int = __LINE__; + + dprintf1("disabling write to PKEY[%02d], " + "doing futex gunk in buffer\n", pkey); + *ptr = some_int; + pkey_write_deny(pkey); + futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL, + &ignored, ignored); + if (DEBUG_LEVEL > 0) + perror("futex"); + dprintf1("futex() ret: %d\n", futex_ret); +} + +/* Assumes that all pkeys other than 'pkey' are unallocated */ +void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) +{ + int err; + int i; + + /* Note: 0 is the default pkey, so don't mess with it */ + for (i = 1; i < NR_PKEYS; i++) { + if (pkey == i) + continue; + + dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i); + err = sys_pkey_free(i); + pkey_assert(err); + + /* not enforced when pkey_get() is not a syscall + err = pkey_get(i, 0); + pkey_assert(err < 0); + */ + + err = sys_pkey_free(i); + pkey_assert(err); + + err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i); + pkey_assert(err); + } +} + +/* Assumes that all pkeys other than 'pkey' are unallocated */ +void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) +{ + int err; + int bad_flag = (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) + 1; + int bad_pkey = NR_PKEYS+99; + + /* not enforced when pkey_get() is not a syscall + err = pkey_get(bad_pkey, bad_flag); + pkey_assert(err < 0); + */ + + /* pass a known-invalid pkey in: */ + err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey); + pkey_assert(err); +} + +/* Assumes that all pkeys other than 'pkey' are unallocated */ +void test_pkey_alloc_exhaust(int *ptr, u16 pkey) +{ + unsigned long flags; + unsigned long init_val; + int err; + int allocated_pkeys[NR_PKEYS] = {0}; + int nr_allocated_pkeys = 0; + int i; + + for (i = 0; i < NR_PKEYS*2; i++) { + int new_pkey; + dprintf1("%s() alloc loop: %d\n", __func__, i); + new_pkey = alloc_pkey(); + dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, err, __rdpkru(), shadow_pkru); + rdpkru(); /* for shadow checking */ + dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); + if ((new_pkey == -1) && (errno == ENOSPC)) { + dprintf2("%s() failed to allocate pkey after %d tries\n", + __func__, nr_allocated_pkeys); + break; + } + pkey_assert(nr_allocated_pkeys < NR_PKEYS); + allocated_pkeys[nr_allocated_pkeys++] = new_pkey; + } + + dprintf3("%s()::%d\n", __func__, __LINE__); + + /* + * ensure it did not reach the end of the loop without + * failure: + */ + pkey_assert(i < NR_PKEYS*2); + + /* + * There are 16 pkeys supported in hardware. One is taken + * up for the default (0) and another can be taken up by + * an execute-only mapping. Ensure that we can allocate + * at least 14 (16-2). + */ + pkey_assert(i >= NR_PKEYS-2); + + for (i = 0; i < nr_allocated_pkeys; i++) { + err = sys_pkey_free(allocated_pkeys[i]); + pkey_assert(!err); + rdpkru(); /* for shadow checking */ + } +} + +void test_ptrace_of_child(int *ptr, u16 pkey) +{ + __attribute__((__unused__)) int peek_result; + pid_t child_pid; + void *ignored = 0; + long ret; + int status; + /* + * This is the "control" for our little expermient. Make sure + * we can always access it when ptracing. + */ + int *plain_ptr_unaligned = malloc(HPAGE_SIZE); + int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE); + + /* + * Fork a child which is an exact copy of this process, of course. + * That means we can do all of our tests via ptrace() and then plain + * memory access and ensure they work differently. + */ + child_pid = fork_lazy_child(); + dprintf1("[%d] child pid: %d\n", getpid(), child_pid); + + ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored); + if (ret) + perror("attach"); + dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__); + pkey_assert(ret != -1); + ret = waitpid(child_pid, &status, WUNTRACED); + if ((ret != child_pid) || !(WIFSTOPPED(status))) { + fprintf(stderr, "weird waitpid result %ld stat %x\n", + ret, status); + pkey_assert(0); + } + dprintf2("waitpid ret: %ld\n", ret); + dprintf2("waitpid status: %d\n", status); + + pkey_access_deny(pkey); + pkey_write_deny(pkey); + + /* Write access, untested for now: + ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data); + pkey_assert(ret != -1); + dprintf1("poke at %p: %ld\n", peek_at, ret); + */ + + /* + * Try to access the pkey-protected "ptr" via ptrace: + */ + ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored); + /* expect it to work, without an error: */ + pkey_assert(ret != -1); + /* Now access from the current task, and expect an exception: */ + peek_result = read_ptr(ptr); + expected_pk_fault(pkey); + + /* + * Try to access the NON-pkey-protected "plain_ptr" via ptrace: + */ + ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored); + /* expect it to work, without an error: */ + pkey_assert(ret != -1); + /* Now access from the current task, and expect NO exception: */ + peek_result = read_ptr(plain_ptr); + do_not_expect_pk_fault(); + + ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); + pkey_assert(ret != -1); + + ret = kill(child_pid, SIGKILL); + pkey_assert(ret != -1); + + wait(&status); + + free(plain_ptr_unaligned); +} + +void test_executing_on_unreadable_memory(int *ptr, u16 pkey) +{ + void *p1; + int scratch; + int ptr_contents; + int ret; + + p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); + dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); + /* lots_o_noops_around_write should be page-aligned already */ + assert(p1 == &lots_o_noops_around_write); + + /* Point 'p1' at the *second* page of the function: */ + p1 += PAGE_SIZE; + + madvise(p1, PAGE_SIZE, MADV_DONTNEED); + lots_o_noops_around_write(&scratch); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + + ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey); + pkey_assert(!ret); + pkey_access_deny(pkey); + + dprintf2("pkru: %x\n", rdpkru()); + + /* + * Make sure this is an *instruction* fault + */ + madvise(p1, PAGE_SIZE, MADV_DONTNEED); + lots_o_noops_around_write(&scratch); + do_not_expect_pk_fault(); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + expected_pk_fault(pkey); +} + +void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) +{ + int size = PAGE_SIZE; + int sret; + + if (cpu_has_pku()) { + dprintf1("SKIP: %s: no CPU support\n", __func__); + return; + } + + sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey); + pkey_assert(sret < 0); +} + +void (*pkey_tests[])(int *ptr, u16 pkey) = { + test_read_of_write_disabled_region, + test_read_of_access_disabled_region, + test_write_of_write_disabled_region, + test_write_of_access_disabled_region, + test_kernel_write_of_access_disabled_region, + test_kernel_write_of_write_disabled_region, + test_kernel_gup_of_access_disabled_region, + test_kernel_gup_write_to_write_disabled_region, + test_executing_on_unreadable_memory, + test_ptrace_of_child, + test_pkey_syscalls_on_non_allocated_pkey, + test_pkey_syscalls_bad_args, + test_pkey_alloc_exhaust, +}; + +void run_tests_once(void) +{ + int *ptr; + int prot = PROT_READ|PROT_WRITE; + + for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) { + int pkey; + int orig_pkru_faults = pkru_faults; + + dprintf1("======================\n"); + dprintf1("test %d preparing...\n", test_nr); + + tracing_on(); + pkey = alloc_random_pkey(); + dprintf1("test %d starting with pkey: %d\n", test_nr, pkey); + ptr = malloc_pkey(PAGE_SIZE, prot, pkey); + dprintf1("test %d starting...\n", test_nr); + pkey_tests[test_nr](ptr, pkey); + dprintf1("freeing test memory: %p\n", ptr); + free_pkey_malloc(ptr); + sys_pkey_free(pkey); + + dprintf1("pkru_faults: %d\n", pkru_faults); + dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults); + + tracing_off(); + close_test_fds(); + + printf("test %2d PASSED (itertation %d)\n", test_nr, iteration_nr); + dprintf1("======================\n\n"); + } + iteration_nr++; +} + +void pkey_setup_shadow(void) +{ + shadow_pkru = __rdpkru(); +} + +int main(void) +{ + int nr_iterations = 22; + + setup_handlers(); + + printf("has pku: %d\n", cpu_has_pku()); + + if (!cpu_has_pku()) { + int size = PAGE_SIZE; + int *ptr; + + printf("running PKEY tests for unsupported CPU/OS\n"); + + ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + assert(ptr != (void *)-1); + test_mprotect_pkey_on_unsupported_cpu(ptr, 1); + exit(0); + } + + pkey_setup_shadow(); + printf("startup pkru: %x\n", rdpkru()); + setup_hugetlbfs(); + + while (nr_iterations-- > 0) + run_tests_once(); + + printf("done (all tests OK)\n"); + return 0; +} diff --git a/tools/testing/selftests/zram/README b/tools/testing/selftests/zram/README index eb17917c8a3a..7972cc512408 100644 --- a/tools/testing/selftests/zram/README +++ b/tools/testing/selftests/zram/README @@ -13,7 +13,7 @@ Statistics for individual zram devices are exported through sysfs nodes at Kconfig required: CONFIG_ZRAM=y -CONFIG_ZRAM_LZ4_COMPRESS=y +CONFIG_CRYPTO_LZ4=y CONFIG_ZPOOL=y CONFIG_ZSMALLOC=y |