diff options
Diffstat (limited to 'deps/v8/tools/ll_prof.py')
-rwxr-xr-x | deps/v8/tools/ll_prof.py | 63 |
1 files changed, 56 insertions, 7 deletions
diff --git a/deps/v8/tools/ll_prof.py b/deps/v8/tools/ll_prof.py index 3afe179d2f..216929d1e2 100755 --- a/deps/v8/tools/ll_prof.py +++ b/deps/v8/tools/ll_prof.py @@ -45,7 +45,7 @@ USAGE="""usage: %prog [OPTION]... Analyses V8 and perf logs to produce profiles. Perf logs can be collected using a command like: - $ perf record -R -e cycles -c 10000 -f -i ./shell bench.js --ll-prof + $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof # -R: collect all data # -e cycles: use cpu-cycles event (run "perf list" for details) # -c 10000: write a sample after each 10000 events @@ -54,6 +54,16 @@ Perf logs can be collected using a command like: # --ll-prof shell flag enables the right V8 logs This will produce a binary trace file (perf.data) that %prog can analyse. +IMPORTANT: + The kernel has an internal maximum for events per second, it is 100K by + default. That's not enough for "-c 10000". Set it to some higher value: + $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate + You can also make the warning about kernel address maps go away: + $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict + +We have a convenience script that handles all of the above for you: + $ tools/run-llprof.sh ./d8 bench.js + Examples: # Print flat profile with annotated disassembly for the 10 top # symbols. Use default log names and include the snapshot log. @@ -75,6 +85,10 @@ class Code(object): """Code object.""" _id = 0 + UNKNOWN = 0 + V8INTERNAL = 1 + FULL_CODEGEN = 2 + OPTIMIZED = 3 def __init__(self, name, start_address, end_address, origin, origin_offset): self.id = Code._id @@ -88,6 +102,14 @@ class Code(object): self.self_ticks = 0 self.self_ticks_map = None self.callee_ticks = None + if name.startswith("LazyCompile:*"): + self.codetype = Code.OPTIMIZED + elif name.startswith("LazyCompile:"): + self.codetype = Code.FULL_CODEGEN + elif name.startswith("v8::internal::"): + self.codetype = Code.V8INTERNAL + else: + self.codetype = Code.UNKNOWN def AddName(self, name): assert self.name != name @@ -185,7 +207,7 @@ class Code(object): class CodePage(object): """Group of adjacent code objects.""" - SHIFT = 12 # 4K pages + SHIFT = 20 # 1M pages SIZE = (1 << SHIFT) MASK = ~(SIZE - 1) @@ -507,6 +529,7 @@ class Descriptor(object): # for the gory details. +# Reference: struct perf_file_header in kernel/tools/perf/util/header.h TRACE_HEADER_DESC = Descriptor([ ("magic", "u64"), ("size", "u64"), @@ -520,6 +543,7 @@ TRACE_HEADER_DESC = Descriptor([ ]) +# Reference: /usr/include/linux/perf_event.h PERF_EVENT_ATTR_DESC = Descriptor([ ("type", "u32"), ("size", "u32"), @@ -529,12 +553,13 @@ PERF_EVENT_ATTR_DESC = Descriptor([ ("read_format", "u64"), ("flags", "u64"), ("wakeup_events_or_watermark", "u32"), - ("bt_type", "u32"), + ("bp_type", "u32"), ("bp_addr", "u64"), - ("bp_len", "u64"), + ("bp_len", "u64") ]) +# Reference: /usr/include/linux/perf_event.h PERF_EVENT_HEADER_DESC = Descriptor([ ("type", "u32"), ("misc", "u16"), @@ -542,6 +567,7 @@ PERF_EVENT_HEADER_DESC = Descriptor([ ]) +# Reference: kernel/events/core.c PERF_MMAP_EVENT_BODY_DESC = Descriptor([ ("pid", "u32"), ("tid", "u32"), @@ -566,6 +592,7 @@ PERF_SAMPLE_STREAM_ID = 1 << 9 PERF_SAMPLE_RAW = 1 << 10 +# Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE. PERF_SAMPLE_EVENT_BODY_FIELDS = [ ("ip", "u64", PERF_SAMPLE_IP), ("pid", "u32", PERF_SAMPLE_TID), @@ -702,8 +729,12 @@ class LibraryRepo(object): # Unfortunately, section headers span two lines, so we have to # keep the just seen section name (from the first line in each # section header) in the after_section variable. + if mmap_info.filename.endswith(".ko"): + dynamic_symbols = "" + else: + dynamic_symbols = "-T" process = subprocess.Popen( - "%s -h -t -T -C %s" % (OBJDUMP_BIN, mmap_info.filename), + "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) pipe = process.stdout after_section = None @@ -795,7 +826,7 @@ def PrintReport(code_map, library_repo, arch, ticks, options): code.PrintAnnotated(arch, options) print print "Ticks per library:" - mmap_infos = [m for m in library_repo.infos] + mmap_infos = [m for m in library_repo.infos if m.ticks > 0] mmap_infos.sort(key=lambda m: m.ticks, reverse=True) for mmap_info in mmap_infos: mmap_ticks = mmap_info.ticks @@ -885,6 +916,9 @@ if __name__ == "__main__": ticks = 0 missed_ticks = 0 really_missed_ticks = 0 + optimized_ticks = 0 + generated_ticks = 0 + v8_internal_ticks = 0 mmap_time = 0 sample_time = 0 @@ -928,6 +962,12 @@ if __name__ == "__main__": code = code_map.Find(sample.ip) if code: code.Tick(sample.ip) + if code.codetype == Code.OPTIMIZED: + optimized_ticks += 1 + elif code.codetype == Code.FULL_CODEGEN: + generated_ticks += 1 + elif code.codetype == Code.V8INTERNAL: + v8_internal_ticks += 1 else: missed_ticks += 1 if not library_repo.Tick(sample.ip) and not code: @@ -947,12 +987,21 @@ if __name__ == "__main__": PrintReport(code_map, library_repo, log_reader.arch, ticks, options) if not options.quiet: + def PrintTicks(number, total, description): + print("%10d %5.1f%% ticks in %s" % + (number, 100.0*number/total, description)) print print "Stats:" print "%10d total trace events" % events print "%10d total ticks" % ticks print "%10d ticks not in symbols" % missed_ticks - print "%10d unaccounted ticks" % really_missed_ticks + unaccounted = "unaccounted ticks" + if really_missed_ticks > 0: + unaccounted += " (probably in the kernel, try --kernel)" + PrintTicks(really_missed_ticks, ticks, unaccounted) + PrintTicks(optimized_ticks, ticks, "ticks in optimized code") + PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code") + PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*") print "%10d total symbols" % len([c for c in code_map.AllCode()]) print "%10d used symbols" % len([c for c in code_map.UsedCode()]) print "%9.2fs library processing time" % mmap_time |