summaryrefslogtreecommitdiff
path: root/tools/perf/builtin-mem.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-02-22 13:59:43 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2021-02-22 13:59:43 -0800
commit3a36281a17199737b468befb826d4a23eb774445 (patch)
tree18bbdfcb0c30215883809f248d0334b7ea84d5ba /tools/perf/builtin-mem.c
parent7c70f3a7488d2fa62d32849d138bf2b8420fe788 (diff)
parent3027ce36ccbae74f2e7c1afbfc3f69fee0c2a996 (diff)
downloadlinux-3a36281a17199737b468befb826d4a23eb774445.tar.gz
Merge tag 'perf-tools-for-v5.12-2020-02-19' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tool updates from Arnaldo Carvalho de Melo: "New features: - Support instruction latency in 'perf report', with both memory latency (weight) and instruction latency information, users can locate expensive load instructions and understand time spent in different stages. - Extend 'perf c2c' to display the number of loads which were blocked by data or address conflict. - Add 'perf stat' support for L2 topdown events in systems such as Intel's Sapphire rapids server. - Add support for PERF_SAMPLE_CODE_PAGE_SIZE in various tools, as a sort key, for instance: perf report --stdio --sort=comm,symbol,code_page_size - New 'perf daemon' command to run long running sessions while providing a way to control the enablement of events without restarting a traditional 'perf record' session. - Enable counting events for BPF programs in 'perf stat' just like for other targets (tid, cgroup, cpu, etc), e.g.: # perf stat -e ref-cycles,cycles -b 254 -I 1000 1.487903822 115,200 ref-cycles 1.487903822 86,012 cycles 2.489147029 80,560 ref-cycles 2.489147029 73,784 cycles ^C The example above counts 'cycles' and 'ref-cycles' of BPF program of id 254. It is similar to bpftool-prog-profile command, but more flexible. - Support the new layout for PERF_RECORD_MMAP2 to carry the DSO build-id using infrastructure generalised from the eBPF subsystem, removing the need for traversing the perf.data file to collect build-ids at the end of 'perf record' sessions and helping with long running sessions where binaries can get replaced in updates, leading to possible mis-resolution of symbols. - Support filtering by hex address in 'perf script'. - Support DSO filter in 'perf script', like in other perf tools. - Add namespaces support to 'perf inject' - Add support for SDT (Dtrace Style Markers) events on ARM64. perf record: - Fix handling of eventfd() when draining a buffer in 'perf record'. - Improvements to the generation of metadata events for pre-existing threads (mmaps, comm, etc), speeding up the work done at the start of system wide or per CPU 'perf record' sessions. Hardware tracing: - Initial support for tracing KVM with Intel PT. - Intel PT fixes for IPC - Support Intel PT PSB (synchronization packets) events. - Automatically group aux-output events to overcome --filter syntax. - Enable PERF_SAMPLE_DATA_SRC on ARMs SPE. - Update ARM's CoreSight hardware tracing OpenCSD library to v1.0.0. perf annotate TUI: - Fix handling of 'k' ("show line number") hotkey - Fix jump parsing for C++ code. perf probe: - Add protection to avoid endless loop. cgroups: - Avoid reading cgroup mountpoint multiple times, caching it. - Fix handling of cgroup v1/v2 in mixed hierarchy. Symbol resolving: - Add OCaml symbol demangling. - Further fixes for handling PE executables when using perf with Wine and .exe/.dll files. - Fix 'perf unwind' DSO handling. - Resolve symbols against debug file first, to deal with artifacts related to LTO. - Fix gap between kernel end and module start on powerpc. Reporting tools: - The DSO filter shouldn't show samples in unresolved maps. - Improve debuginfod support in various tools. build ids: - Fix 16-byte build ids in 'perf buildid-cache', add a 'perf test' entry for that case. perf test: - Support for PERF_SAMPLE_WEIGHT_STRUCT. - Add test case for PERF_SAMPLE_CODE_PAGE_SIZE. - Shell based tests for 'perf daemon's commands ('start', 'stop, 'reconfig', 'list', etc). - ARM cs-etm 'perf test' fixes. - Add parse-metric memory bandwidth testcase. Compiler related: - Fix 'perf probe' kretprobe issue caused by gcc 11 bug when used with -fpatchable-function-entry. - Fix ARM64 build with gcc 11's -Wformat-overflow. - Fix unaligned access in sample parsing test. - Fix printf conversion specifier for IP addresses on arm64, s390 and powerpc. Arch specific: - Support exposing Performance Monitor Counter SPRs as part of extended regs on powerpc. - Add JSON 'perf stat' metrics for ARM64's imx8mp, imx8mq and imx8mn DDR, fix imx8mm ones. - Fix common and uarch events for ARM64's A76 and Ampere eMag" * tag 'perf-tools-for-v5.12-2020-02-19' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (148 commits) perf buildid-cache: Don't skip 16-byte build-ids perf buildid-cache: Add test for 16-byte build-id perf symbol: Remove redundant libbfd checks perf test: Output the sub testing result in cs-etm perf test: Suppress logs in cs-etm testing perf tools: Fix arm64 build error with gcc-11 perf intel-pt: Add documentation for tracing virtual machines perf intel-pt: Split VM-Entry and VM-Exit branches perf intel-pt: Adjust sample flags for VM-Exit perf intel-pt: Allow for a guest kernel address filter perf intel-pt: Support decoding of guest kernel perf machine: Factor out machine__idle_thread() perf machine: Factor out machines__find_guest() perf intel-pt: Amend decoder to track the NR flag perf intel-pt: Retain the last PIP packet payload as is perf intel_pt: Add vmlaunch and vmresume as branches perf script: Add branch types for VM-Entry and VM-Exit perf auxtrace: Automatically group aux-output events perf test: Fix unaligned access in sample parsing test perf tools: Support arch specific PERF_SAMPLE_WEIGHT_STRUCT processing ...
Diffstat (limited to 'tools/perf/builtin-mem.c')
-rw-r--r--tools/perf/builtin-mem.c113
1 files changed, 57 insertions, 56 deletions
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 823742036ddb..cdd2b9f643f6 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -30,6 +30,7 @@ struct perf_mem {
bool dump_raw;
bool force;
bool phys_addr;
+ bool data_page_size;
int operation;
const char *cpu_list;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -124,6 +125,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
if (mem->phys_addr)
rec_argv[i++] = "--phys-data";
+ if (mem->data_page_size)
+ rec_argv[i++] = "--data-page-size";
+
for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
e = perf_mem_events__ptr(j);
if (!e->record)
@@ -172,7 +176,8 @@ dump_raw_samples(struct perf_tool *tool,
{
struct perf_mem *mem = container_of(tool, struct perf_mem, tool);
struct addr_location al;
- const char *fmt;
+ const char *fmt, *field_sep;
+ char str[PAGE_SIZE_NAME_LEN];
if (machine__resolve(machine, &al, sample) < 0) {
fprintf(stderr, "problem processing %d event, skipping it.\n",
@@ -186,60 +191,47 @@ dump_raw_samples(struct perf_tool *tool,
if (al.map != NULL)
al.map->dso->hit = 1;
- if (mem->phys_addr) {
- if (symbol_conf.field_sep) {
- fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s0x%016"PRIx64
- "%s%"PRIu64"%s0x%"PRIx64"%s%s:%s\n";
- } else {
- fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
- "%s0x%016"PRIx64"%s%5"PRIu64"%s0x%06"PRIx64
- "%s%s:%s\n";
- symbol_conf.field_sep = " ";
- }
+ field_sep = symbol_conf.field_sep;
+ if (field_sep) {
+ fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s";
+ } else {
+ fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64"%s";
+ symbol_conf.field_sep = " ";
+ }
+ printf(fmt,
+ sample->pid,
+ symbol_conf.field_sep,
+ sample->tid,
+ symbol_conf.field_sep,
+ sample->ip,
+ symbol_conf.field_sep,
+ sample->addr,
+ symbol_conf.field_sep);
- printf(fmt,
- sample->pid,
- symbol_conf.field_sep,
- sample->tid,
- symbol_conf.field_sep,
- sample->ip,
- symbol_conf.field_sep,
- sample->addr,
- symbol_conf.field_sep,
+ if (mem->phys_addr) {
+ printf("0x%016"PRIx64"%s",
sample->phys_addr,
- symbol_conf.field_sep,
- sample->weight,
- symbol_conf.field_sep,
- sample->data_src,
- symbol_conf.field_sep,
- al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
- al.sym ? al.sym->name : "???");
- } else {
- if (symbol_conf.field_sep) {
- fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64
- "%s0x%"PRIx64"%s%s:%s\n";
- } else {
- fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
- "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
- symbol_conf.field_sep = " ";
- }
+ symbol_conf.field_sep);
+ }
- printf(fmt,
- sample->pid,
- symbol_conf.field_sep,
- sample->tid,
- symbol_conf.field_sep,
- sample->ip,
- symbol_conf.field_sep,
- sample->addr,
- symbol_conf.field_sep,
- sample->weight,
- symbol_conf.field_sep,
- sample->data_src,
- symbol_conf.field_sep,
- al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
- al.sym ? al.sym->name : "???");
+ if (mem->data_page_size) {
+ printf("%s%s",
+ get_page_size_name(sample->data_page_size, str),
+ symbol_conf.field_sep);
}
+
+ if (field_sep)
+ fmt = "%"PRIu64"%s0x%"PRIx64"%s%s:%s\n";
+ else
+ fmt = "%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
+
+ printf(fmt,
+ sample->weight,
+ symbol_conf.field_sep,
+ sample->data_src,
+ symbol_conf.field_sep,
+ al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
+ al.sym ? al.sym->name : "???");
out_put:
addr_location__put(&al);
return 0;
@@ -287,10 +279,15 @@ static int report_raw_events(struct perf_mem *mem)
if (ret < 0)
goto out_delete;
+ printf("# PID, TID, IP, ADDR, ");
+
if (mem->phys_addr)
- printf("# PID, TID, IP, ADDR, PHYS ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
- else
- printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
+ printf("PHYS ADDR, ");
+
+ if (mem->data_page_size)
+ printf("DATA PAGE SIZE, ");
+
+ printf("LOCAL WEIGHT, DSRC, SYMBOL\n");
ret = perf_session__process_events(session);
@@ -300,7 +297,7 @@ out_delete:
}
static char *get_sort_order(struct perf_mem *mem)
{
- bool has_extra_options = mem->phys_addr ? true : false;
+ bool has_extra_options = (mem->phys_addr | mem->data_page_size) ? true : false;
char sort[128];
/*
@@ -312,13 +309,16 @@ static char *get_sort_order(struct perf_mem *mem)
"dso_daddr,tlb,locked");
} else if (has_extra_options) {
strcpy(sort, "--sort=local_weight,mem,sym,dso,symbol_daddr,"
- "dso_daddr,snoop,tlb,locked");
+ "dso_daddr,snoop,tlb,locked,blocked");
} else
return NULL;
if (mem->phys_addr)
strcat(sort, ",phys_daddr");
+ if (mem->data_page_size)
+ strcat(sort, ",data_page_size");
+
return strdup(sort);
}
@@ -464,6 +464,7 @@ int cmd_mem(int argc, const char **argv)
" between columns '.' is reserved."),
OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"),
OPT_BOOLEAN('p', "phys-data", &mem.phys_addr, "Record/Report sample physical addresses"),
+ OPT_BOOLEAN(0, "data-page-size", &mem.data_page_size, "Record/Report sample data address page size"),
OPT_END()
};
const char *const mem_subcommands[] = { "record", "report", NULL };