diff options
Diffstat (limited to 'libgo/go/runtime/pprof/pprof.go')
-rw-r--r-- | libgo/go/runtime/pprof/pprof.go | 368 |
1 files changed, 259 insertions, 109 deletions
diff --git a/libgo/go/runtime/pprof/pprof.go b/libgo/go/runtime/pprof/pprof.go index 18e99366fb..0db1dedd7a 100644 --- a/libgo/go/runtime/pprof/pprof.go +++ b/libgo/go/runtime/pprof/pprof.go @@ -1,23 +1,87 @@ -// Copyright 2010 The Go Authors. All rights reserved. +// Copyright 2010 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package pprof writes runtime profiling data in the format expected // by the pprof visualization tool. +// +// Profiling a Go program +// +// The first step to profiling a Go program is to enable profiling. +// Support for profiling benchmarks built with the standard testing +// package is built into go test. For example, the following command +// runs benchmarks in the current directory and writes the CPU and +// memory profiles to cpu.prof and mem.prof: +// +// go test -cpuprofile cpu.prof -memprofile mem.prof -bench . +// +// To add equivalent profiling support to a standalone program, add +// code like the following to your main function: +// +// var cpuprofile = flag.String("cpuprofile", "", "write cpu profile `file`") +// var memprofile = flag.String("memprofile", "", "write memory profile to `file`") +// +// func main() { +// flag.Parse() +// if *cpuprofile != "" { +// f, err := os.Create(*cpuprofile) +// if err != nil { +// log.Fatal("could not create CPU profile: ", err) +// } +// if err := pprof.StartCPUProfile(f); err != nil { +// log.Fatal("could not start CPU profile: ", err) +// } +// defer pprof.StopCPUProfile() +// } +// ... +// if *memprofile != "" { +// f, err := os.Create(*memprofile) +// if err != nil { +// log.Fatal("could not create memory profile: ", err) +// } +// runtime.GC() // get up-to-date statistics +// if err := pprof.WriteHeapProfile(f); err != nil { +// log.Fatal("could not write memory profile: ", err) +// } +// f.Close() +// } +// } +// +// There is also a standard HTTP interface to profiling data. Adding +// the following line will install handlers under the /debug/pprof/ +// URL to download live profiles: +// +// import _ "net/http/pprof" +// +// See the net/http/pprof package for more details. +// +// Profiles can then be visualized with the pprof tool: +// +// go tool pprof cpu.prof +// +// There are many commands available from the pprof command line. +// Commonly used commands include "top", which prints a summary of the +// top program hot-spots, and "web", which opens an interactive graph +// of hot-spots and their call graphs. Use "help" for information on +// all pprof commands. +// // For more information about pprof, see -// http://code.google.com/p/google-perftools/. +// https://github.com/google/pprof/blob/master/doc/pprof.md. package pprof import ( "bufio" "bytes" "fmt" + "internal/pprof/profile" "io" "runtime" + "runtime/pprof/internal/protopprof" "sort" "strings" "sync" "text/tabwriter" + "time" ) // BUG(rsc): Profiles are only as good as the kernel support used to generate them. @@ -31,12 +95,13 @@ import ( // // A Profile's methods can be called from multiple goroutines simultaneously. // -// Each Profile has a unique name. A few profiles are predefined: +// Each Profile has a unique name. A few profiles are predefined: // // goroutine - stack traces of all current goroutines // heap - a sampling of all heap allocations // threadcreate - stack traces that led to the creation of new OS threads // block - stack traces that led to blocking on synchronization primitives +// mutex - stack traces of holders of contended mutexes // // These predefined profiles maintain themselves and panic on an explicit // Add or Remove method call. @@ -48,7 +113,7 @@ import ( // all known allocations. This exception helps mainly in programs running // without garbage collection enabled, usually for debugging purposes. // -// The CPU profile is not available as a Profile. It has a special API, +// The CPU profile is not available as a Profile. It has a special API, // the StartCPUProfile and StopCPUProfile functions, because it streams // output to a writer during profiling. // @@ -90,6 +155,12 @@ var blockProfile = &Profile{ write: writeBlock, } +var mutexProfile = &Profile{ + name: "mutex", + count: countMutex, + write: writeMutex, +} + func lockProfiles() { profiles.mu.Lock() if profiles.m == nil { @@ -99,6 +170,7 @@ func lockProfiles() { "threadcreate": threadcreateProfile, "heap": heapProfile, "block": blockProfile, + "mutex": mutexProfile, } } } @@ -140,21 +212,15 @@ func Profiles() []*Profile { lockProfiles() defer unlockProfiles() - var all []*Profile + all := make([]*Profile, 0, len(profiles.m)) for _, p := range profiles.m { all = append(all, p) } - sort.Sort(byName(all)) + sort.Slice(all, func(i, j int) bool { return all[i].name < all[j].name }) return all } -type byName []*Profile - -func (x byName) Len() int { return len(x) } -func (x byName) Swap(i, j int) { x[i], x[j] = x[j], x[i] } -func (x byName) Less(i, j int) bool { return x[i].name < x[j].name } - // Name returns this profile's name, which can be passed to Lookup to reobtain the profile. func (p *Profile) Name() string { return p.name @@ -173,11 +239,11 @@ func (p *Profile) Count() int { // Add adds the current execution stack to the profile, associated with value. // Add stores value in an internal map, so value must be suitable for use as // a map key and will not be garbage collected until the corresponding -// call to Remove. Add panics if the profile already contains a stack for value. +// call to Remove. Add panics if the profile already contains a stack for value. // // The skip parameter has the same meaning as runtime.Caller's skip -// and controls where the stack trace begins. Passing skip=0 begins the -// trace in the function calling Add. For example, given this +// and controls where the stack trace begins. Passing skip=0 begins the +// trace in the function calling Add. For example, given this // execution stack: // // Add @@ -237,7 +303,7 @@ func (p *Profile) WriteTo(w io.Writer, debug int) error { } // Obtain consistent snapshot under lock; then process without lock. - var all [][]uintptr + all := make([][]uintptr, 0, len(p.m)) p.mu.Lock() for _, stk := range p.m { all = append(all, stk) @@ -266,7 +332,7 @@ func (x stackProfile) Less(i, j int) bool { } // A countProfile is a set of stack traces to be printed as counts -// grouped by stack trace. There are multiple implementations: +// grouped by stack trace. There are multiple implementations: // all that matters is that we can find out how many traces there are // and obtain each trace in turn. type countProfile interface { @@ -275,17 +341,8 @@ type countProfile interface { } // printCountProfile prints a countProfile at the specified debug level. +// The profile will be in compressed proto format unless debug is nonzero. func printCountProfile(w io.Writer, debug int, name string, p countProfile) error { - b := bufio.NewWriter(w) - var tw *tabwriter.Writer - w = b - if debug > 0 { - tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) - w = tw - } - - fmt.Fprintf(w, "%s profile: total %d\n", name, p.Len()) - // Build count of each stack. var buf bytes.Buffer key := func(stk []uintptr) string { @@ -296,74 +353,114 @@ func printCountProfile(w io.Writer, debug int, name string, p countProfile) erro } return buf.String() } - m := map[string]int{} + count := map[string]int{} + index := map[string]int{} + var keys []string n := p.Len() for i := 0; i < n; i++ { - m[key(p.Stack(i))]++ + k := key(p.Stack(i)) + if count[k] == 0 { + index[k] = i + keys = append(keys, k) + } + count[k]++ } - // Print stacks, listing count on first occurrence of a unique stack. - for i := 0; i < n; i++ { - stk := p.Stack(i) - s := key(stk) - if count := m[s]; count != 0 { - fmt.Fprintf(w, "%d %s\n", count, s) - if debug > 0 { - printStackRecord(w, stk, false) + sort.Sort(&keysByCount{keys, count}) + + if debug > 0 { + // Print debug profile in legacy format + tw := tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) + fmt.Fprintf(tw, "%s profile: total %d\n", name, p.Len()) + for _, k := range keys { + fmt.Fprintf(tw, "%d %s\n", count[k], k) + printStackRecord(tw, p.Stack(index[k]), false) + } + return tw.Flush() + } + + // Output profile in protobuf form. + prof := &profile.Profile{ + PeriodType: &profile.ValueType{Type: name, Unit: "count"}, + Period: 1, + Sample: make([]*profile.Sample, 0, len(keys)), + SampleType: []*profile.ValueType{{Type: name, Unit: "count"}}, + } + locMap := make(map[uintptr]*profile.Location) + for _, k := range keys { + stk := p.Stack(index[k]) + c := count[k] + locs := make([]*profile.Location, len(stk)) + for i, addr := range stk { + loc := locMap[addr] + if loc == nil { + loc = &profile.Location{ + ID: uint64(len(locMap) + 1), + Address: uint64(addr - 1), + } + prof.Location = append(prof.Location, loc) + locMap[addr] = loc } - delete(m, s) + locs[i] = loc } + prof.Sample = append(prof.Sample, &profile.Sample{ + Location: locs, + Value: []int64{int64(c)}, + }) } + return prof.Write(w) +} - if tw != nil { - tw.Flush() +// keysByCount sorts keys with higher counts first, breaking ties by key string order. +type keysByCount struct { + keys []string + count map[string]int +} + +func (x *keysByCount) Len() int { return len(x.keys) } +func (x *keysByCount) Swap(i, j int) { x.keys[i], x.keys[j] = x.keys[j], x.keys[i] } +func (x *keysByCount) Less(i, j int) bool { + ki, kj := x.keys[i], x.keys[j] + ci, cj := x.count[ki], x.count[kj] + if ci != cj { + return ci > cj } - return b.Flush() + return ki < kj } // printStackRecord prints the function + source line information // for a single stack trace. func printStackRecord(w io.Writer, stk []uintptr, allFrames bool) { show := allFrames - wasPanic := false - for i, pc := range stk { - f := runtime.FuncForPC(pc) - if f == nil { - show = true - fmt.Fprintf(w, "#\t%#x\n", pc) - wasPanic = false - } else { - tracepc := pc - // Back up to call instruction. - if i > 0 && pc > f.Entry() && !wasPanic { - if runtime.GOARCH == "386" || runtime.GOARCH == "amd64" { - tracepc-- - } else if runtime.GOARCH == "s390" || runtime.GOARCH == "s390x" { - // only works if function was called - // with the brasl instruction (or a - // different 6-byte instruction). - tracepc -= 6 - } else { - tracepc -= 4 // arm, etc - } - } - file, line := f.FileLine(tracepc) - name := f.Name() - // Hide runtime.goexit and any runtime functions at the beginning. - // This is useful mainly for allocation traces. - wasPanic = name == "runtime.gopanic" - if name == "runtime.goexit" || !show && (strings.HasPrefix(name, "runtime.") || strings.HasPrefix(name, "runtime_")) { - continue - } - if !show && !strings.Contains(name, ".") && strings.HasPrefix(name, "__go_") { - continue - } - if !show && name == "" { - // This can happen due to http://gcc.gnu.org/PR65797. - continue + frames := runtime.CallersFrames(stk) + for { + frame, more := frames.Next() + name := frame.Function + + // Hide runtime.goexit and any runtime functions at the beginning. + // This is useful mainly for allocation traces. + skip := name == "runtime.goexit" + if !show { + switch { + case strings.HasPrefix(name, "runtime."): + skip = true + case strings.HasPrefix(name, "runtime_"): + skip = true + case !strings.Contains(name, ".") && strings.HasPrefix(name, "__go_"): + skip = true } + } + + if !show && name == "" { + // This can happen due to http://gcc.gnu.org/PR65797. + } else if name == "" { + fmt.Fprintf(w, "#\t%#x\n", frame.PC) + } else if !skip { show = true - fmt.Fprintf(w, "#\t%#x\t%s+%#x\t%s:%d\n", pc, name, pc-f.Entry(), file, line) + fmt.Fprintf(w, "#\t%#x\t%s+%#x\t%s:%d\n", frame.PC, name, frame.PC-frame.Entry, frame.File, frame.Line) + } + if !more { + break } } if !show { @@ -377,12 +474,6 @@ func printStackRecord(w io.Writer, stk []uintptr, allFrames bool) { // Interface to system profiles. -type byInUseBytes []runtime.MemProfileRecord - -func (x byInUseBytes) Len() int { return len(x) } -func (x byInUseBytes) Swap(i, j int) { x[i], x[j] = x[j], x[i] } -func (x byInUseBytes) Less(i, j int) bool { return x[i].InUseBytes() > x[j].InUseBytes() } - // WriteHeapProfile is shorthand for Lookup("heap").WriteTo(w, 0). // It is preserved for backwards compatibility. func WriteHeapProfile(w io.Writer) error { @@ -418,15 +509,16 @@ func writeHeap(w io.Writer, debug int) error { // Profile grew; try again. } - sort.Sort(byInUseBytes(p)) + if debug == 0 { + pp := protopprof.EncodeMemProfile(p, int64(runtime.MemProfileRate), time.Now()) + return pp.Write(w) + } + + sort.Slice(p, func(i, j int) bool { return p[i].InUseBytes() > p[j].InUseBytes() }) b := bufio.NewWriter(w) - var tw *tabwriter.Writer - w = b - if debug > 0 { - tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) - w = tw - } + tw := tabwriter.NewWriter(b, 1, 8, 1, '\t', 0) + w = tw var total runtime.MemProfileRecord for i := range p { @@ -454,9 +546,7 @@ func writeHeap(w io.Writer, debug int) error { fmt.Fprintf(w, " %#x", pc) } fmt.Fprintf(w, "\n") - if debug > 0 { - printStackRecord(w, r.Stack(), false) - } + printStackRecord(w, r.Stack(), false) } // Print memstats information too. @@ -482,16 +572,15 @@ func writeHeap(w io.Writer, debug int) error { fmt.Fprintf(w, "# MSpan = %d / %d\n", s.MSpanInuse, s.MSpanSys) fmt.Fprintf(w, "# MCache = %d / %d\n", s.MCacheInuse, s.MCacheSys) fmt.Fprintf(w, "# BuckHashSys = %d\n", s.BuckHashSys) + fmt.Fprintf(w, "# GCSys = %d\n", s.GCSys) + fmt.Fprintf(w, "# OtherSys = %d\n", s.OtherSys) fmt.Fprintf(w, "# NextGC = %d\n", s.NextGC) fmt.Fprintf(w, "# PauseNs = %d\n", s.PauseNs) fmt.Fprintf(w, "# NumGC = %d\n", s.NumGC) - fmt.Fprintf(w, "# EnableGC = %v\n", s.EnableGC) fmt.Fprintf(w, "# DebugGC = %v\n", s.DebugGC) - if tw != nil { - tw.Flush() - } + tw.Flush() return b.Flush() } @@ -521,7 +610,7 @@ func writeGoroutine(w io.Writer, debug int) error { func writeGoroutineStacks(w io.Writer) error { // We don't know how big the buffer needs to be to collect - // all the goroutines. Start with 1 MB and try a few times, doubling each time. + // all the goroutines. Start with 1 MB and try a few times, doubling each time. // Give up and use a truncated trace if 64 MB is not enough. buf := make([]byte, 1<<20) for i := 0; ; i++ { @@ -584,7 +673,7 @@ var cpu struct { // Go code built with -buildmode=c-archive or -buildmode=c-shared. // StartCPUProfile relies on the SIGPROF signal, but that signal will // be delivered to the main program's SIGPROF signal handler (if any) -// not to the one used by Go. To make it work, call os/signal.Notify +// not to the one used by Go. To make it work, call os/signal.Notify // for syscall.SIGPROF, but note that doing so may break any profiling // being done by the main program. func StartCPUProfile(w io.Writer) error { @@ -595,7 +684,7 @@ func StartCPUProfile(w io.Writer) error { // 100 Hz is a reasonable choice: it is frequent enough to // produce useful data, rare enough not to bog down the // system, and a nice round number to make it easy to - // convert sample counts to seconds. Instead of requiring + // convert sample counts to seconds. Instead of requiring // each client to specify the frequency, we hard code it. const hz = 100 @@ -615,13 +704,29 @@ func StartCPUProfile(w io.Writer) error { } func profileWriter(w io.Writer) { + startTime := time.Now() + // This will buffer the entire profile into buf and then + // translate it into a profile.Profile structure. This will + // create two copies of all the data in the profile in memory. + // TODO(matloob): Convert each chunk of the proto output and + // stream it out instead of converting the entire profile. + var buf bytes.Buffer for { data := runtime.CPUProfile() if data == nil { break } - w.Write(data) + buf.Write(data) + } + + profile, err := protopprof.TranslateCPUProfile(buf.Bytes(), startTime) + if err != nil { + // The runtime should never produce an invalid or truncated profile. + // It drops records that can't fit into its log buffers. + panic(fmt.Errorf("could not translate binary profile to proto format: %v", err)) } + + profile.Write(w) cpu.done <- true } @@ -640,18 +745,18 @@ func StopCPUProfile() { <-cpu.done } -type byCycles []runtime.BlockProfileRecord - -func (x byCycles) Len() int { return len(x) } -func (x byCycles) Swap(i, j int) { x[i], x[j] = x[j], x[i] } -func (x byCycles) Less(i, j int) bool { return x[i].Cycles > x[j].Cycles } - // countBlock returns the number of records in the blocking profile. func countBlock() int { n, _ := runtime.BlockProfile(nil) return n } +// countMutex returns the number of records in the mutex profile. +func countMutex() int { + n, _ := runtime.MutexProfile(nil) + return n +} + // writeBlock writes the current blocking profile to w. func writeBlock(w io.Writer, debug int) error { var p []runtime.BlockProfileRecord @@ -665,7 +770,7 @@ func writeBlock(w io.Writer, debug int) error { } } - sort.Sort(byCycles(p)) + sort.Slice(p, func(i, j int) bool { return p[i].Cycles > p[j].Cycles }) b := bufio.NewWriter(w) var tw *tabwriter.Writer @@ -695,4 +800,49 @@ func writeBlock(w io.Writer, debug int) error { return b.Flush() } +// writeMutex writes the current mutex profile to w. +func writeMutex(w io.Writer, debug int) error { + // TODO(pjw): too much common code with writeBlock. FIX! + var p []runtime.BlockProfileRecord + n, ok := runtime.MutexProfile(nil) + for { + p = make([]runtime.BlockProfileRecord, n+50) + n, ok = runtime.MutexProfile(p) + if ok { + p = p[:n] + break + } + } + + sort.Slice(p, func(i, j int) bool { return p[i].Cycles > p[j].Cycles }) + + b := bufio.NewWriter(w) + var tw *tabwriter.Writer + w = b + if debug > 0 { + tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) + w = tw + } + + fmt.Fprintf(w, "--- mutex:\n") + fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond()) + fmt.Fprintf(w, "sampling period=%d\n", runtime.SetMutexProfileFraction(-1)) + for i := range p { + r := &p[i] + fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count) + for _, pc := range r.Stack() { + fmt.Fprintf(w, " %#x", pc) + } + fmt.Fprint(w, "\n") + if debug > 0 { + printStackRecord(w, r.Stack(), true) + } + } + + if tw != nil { + tw.Flush() + } + return b.Flush() +} + func runtime_cyclesPerSecond() int64 |