From 555f59fbaa0b3cb086c62be60f5f460205a63167 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Wed, 24 Mar 2010 09:40:09 -0700 Subject: runtime: malloc sampling, pprof interface R=r CC=golang-dev http://codereview.appspot.com/719041 --- src/pkg/Makefile | 4 ++ src/pkg/http/pprof/Makefile | 11 ++++ src/pkg/http/pprof/pprof.go | 92 +++++++++++++++++++++++++++++++ src/pkg/runtime/extern.go | 121 ++++++++++++++++++++++++++++++++++++----- src/pkg/runtime/malloc.cgo | 26 +++++---- src/pkg/runtime/mprof.cgo | 59 ++++++++++++++++++-- src/pkg/runtime/pprof/Makefile | 11 ++++ src/pkg/runtime/pprof/pprof.go | 73 +++++++++++++++++++++++++ src/pkg/runtime/proc.c | 3 + src/pkg/runtime/runtime.c | 11 +++- src/pkg/runtime/runtime.h | 13 ++--- src/pkg/runtime/symtab.c | 9 +++ 12 files changed, 393 insertions(+), 40 deletions(-) create mode 100644 src/pkg/http/pprof/Makefile create mode 100644 src/pkg/http/pprof/pprof.go create mode 100644 src/pkg/runtime/pprof/Makefile create mode 100644 src/pkg/runtime/pprof/pprof.go (limited to 'src') diff --git a/src/pkg/Makefile b/src/pkg/Makefile index d0267b6b1..f40894b47 100644 --- a/src/pkg/Makefile +++ b/src/pkg/Makefile @@ -83,6 +83,7 @@ DIRS=\ hash/crc32\ hash/crc64\ http\ + http/pprof\ image\ image/jpeg\ image/png\ @@ -103,6 +104,7 @@ DIRS=\ regexp\ rpc\ runtime\ + runtime/pprof\ scanner\ sort\ strconv\ @@ -130,10 +132,12 @@ NOTEST=\ go/doc\ go/token\ hash\ + http/pprof\ image\ image/jpeg\ rand\ runtime\ + runtime/pprof\ syscall\ testing/iotest\ xgb\ diff --git a/src/pkg/http/pprof/Makefile b/src/pkg/http/pprof/Makefile new file mode 100644 index 000000000..e0315112f --- /dev/null +++ b/src/pkg/http/pprof/Makefile @@ -0,0 +1,11 @@ +# Copyright 2010 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.$(GOARCH) + +TARG=http/pprof +GOFILES=\ + pprof.go\ + +include ../../../Make.pkg diff --git a/src/pkg/http/pprof/pprof.go b/src/pkg/http/pprof/pprof.go new file mode 100644 index 000000000..38d91afbf --- /dev/null +++ b/src/pkg/http/pprof/pprof.go @@ -0,0 +1,92 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package pprof serves via its HTTP server runtime profiling data +// in the format expected by the pprof visualization tool. +// For more information about pprof, see +// http://code.google.com/p/google-perftools/. +// +// The package is typically only imported for the side effect of +// registering its HTTP handlers. +// The handled paths all begin with /debug/pprof/. +// +// To use pprof, link this package into your program: +// import _ "http/pprof" +// +// Then use the pprof tool to look at the heap profile: +// +// pprof http://localhost:6060/debug/pprof/heap +// +package pprof + +import ( + "bufio" + "fmt" + "http" + "os" + "runtime" + "runtime/pprof" + "strconv" + "strings" +) + +func init() { + http.Handle("/debug/pprof/cmdline", http.HandlerFunc(Cmdline)) + http.Handle("/debug/pprof/heap", http.HandlerFunc(Heap)) + http.Handle("/debug/pprof/symbol", http.HandlerFunc(Symbol)) +} + +// Cmdline responds with the running program's +// command line, with arguments separated by NUL bytes. +// The package initialization registers it as /debug/pprof/cmdline. +func Cmdline(c *http.Conn, r *http.Request) { + c.SetHeader("content-type", "text/plain; charset=utf-8") + fmt.Fprintf(c, strings.Join(os.Args, "\x00")) +} + +// Heap responds with the pprof-formatted heap profile. +// The package initialization registers it as /debug/pprof/heap. +func Heap(c *http.Conn, r *http.Request) { + c.SetHeader("content-type", "text/plain; charset=utf-8") + pprof.WriteHeapProfile(c) +} + +// Symbol looks up the program counters listed in the request, +// responding with a table mapping program counters to function names. +// The package initialization registers it as /debug/pprof/symbol. +func Symbol(c *http.Conn, r *http.Request) { + c.SetHeader("content-type", "text/plain; charset=utf-8") + + // We don't know how many symbols we have, but we + // do have symbol information. Pprof only cares whether + // this number is 0 (no symbols available) or > 0. + fmt.Fprintf(c, "num_symbols: 1\n") + + var b *bufio.Reader + if r.Method == "POST" { + b = bufio.NewReader(r.Body) + } else { + b = bufio.NewReader(strings.NewReader(r.URL.RawQuery)) + } + + for { + w, err := b.ReadSlice('+') + if err == nil { + w = w[0 : len(w)-1] // trim + + } + pc, _ := strconv.Btoui64(string(w), 0) + if pc != 0 { + f := runtime.FuncForPC(uintptr(pc)) + if f != nil { + fmt.Fprintf(c, "%#x %s\n", pc, f.Name()) + } + } + + // Wait until here to check for err; the last + // symbol will have an err because it doesn't end in +. + if err != nil { + break + } + } +} diff --git a/src/pkg/runtime/extern.go b/src/pkg/runtime/extern.go index 1e8c1b1df..338b0c5b2 100644 --- a/src/pkg/runtime/extern.go +++ b/src/pkg/runtime/extern.go @@ -33,6 +33,64 @@ func Caller(skip int) (pc uintptr, file string, line int, ok bool) // It returns the number of entries written to pc. func Callers(skip int, pc []int) int +// FuncForPC returns a *Func describing the function that contains the +// given program counter address, or else nil. +func FuncForPC(pc uintptr) *Func + +// NOTE(rsc): Func must match struct Func in runtime.h + +// Func records information about a function in the program, +// in particular the mapping from program counters to source +// line numbers within that function. +type Func struct { + name string + typ string + src string + pcln []byte + entry uintptr + pc0 uintptr + ln0 int32 + frame int32 + args int32 + locals int32 +} + +// Name returns the name of the function. +func (f *Func) Name() string { return f.name } + +// Entry returns the entry address of the function. +func (f *Func) Entry() uintptr { return f.entry } + +// FileLine returns the file name and line number of the +// source code corresponding to the program counter pc. +// The result will not be accurate if pc is not a program +// counter within f. +func (f *Func) FileLine(pc uintptr) (file string, line int) { + // NOTE(rsc): If you edit this function, also edit + // symtab.c:/^funcline. + const PcQuant = 1 + + p := f.pcln + pc1 := f.pc0 + line = int(f.ln0) + file = f.src + for i := 0; i < len(p) && pc1 <= pc; i++ { + switch { + case p[i] == 0: + line += int(p[i+1]<<24) | int(p[i+2]<<16) | int(p[i+3]<<8) | int(p[i+4]) + i += 4 + case p[i] <= 64: + line += int(p[i]) + case p[i] <= 128: + line += int(p[i] - 64) + default: + line += PcQuant * int(p[i]-129) + } + pc += PcQuant + } + return +} + // mid returns the current os thread (m) id. func mid() uint32 @@ -175,18 +233,55 @@ func GOROOT() string { // at the time of the build. func Version() string { return defaultVersion } -// MemProfileKind specifies how frequently to record -// memory allocations in the memory profiler. -type MemProfileKind int +// MemProfileRate controls the fraction of memory allocations +// that are recorded and reported in the memory profile. +// The profiler aims to sample an average of +// one allocation per MemProfileRate bytes allocated. +// +// To include every allocated block in the profile, set MemProfileRate to 1. +// To turn off profiling entirely, set MemProfileRate to 0. +// +// The tools that process the memory profiles assume that the +// profile rate is constant across the lifetime of the program +// and equal to the current value. Programs that change the +// memory profiling rate should do so just once, as early as +// possible in the execution of the program (for example, +// at the beginning of main). +var MemProfileRate int = 512 * 1024 + +// A MemProfileRecord describes the live objects allocated +// by a particular call sequence (stack trace). +type MemProfileRecord struct { + AllocBytes, FreeBytes int64 // number of bytes allocated, freed + AllocObjects, FreeObjects int64 // number of objects allocated, freed + Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry +} -const ( - MemProfileNone MemProfileKind = iota // no profiling - MemProfileSample // profile random sample - MemProfileAll // profile every allocation -) +// InUseBytes returns the number of bytes in use (AllocBytes - FreeBytes). +func (r *MemProfileRecord) InUseBytes() int64 { return r.AllocBytes - r.FreeBytes } -// SetMemProfileKind sets the fraction of memory allocations -// that are recorded and reported in the memory profile. -// Profiling an allocation has a small overhead, so the default -// is to profile only a random sample, weighted by block size. -func SetMemProfileKind(kind MemProfileKind) +// InUseObjects returns the number of objects in use (AllocObjects - FreeObjects). +func (r *MemProfileRecord) InUseObjects() int64 { + return r.AllocObjects - r.FreeObjects +} + +// Stack returns the stack trace associated with the record, +// a prefix of r.Stack0. +func (r *MemProfileRecord) Stack() []uintptr { + for i, v := range r.Stack0 { + if v == 0 { + return r.Stack0[0:i] + } + } + return r.Stack0[0:] +} + +// MemProfile returns n, the number of records in the current memory profile. +// If len(p) >= n, MemProfile copies the profile into p and returns n, true. +// If len(p) < n, MemProfile does not change p and returns n, false. +// +// If inuseZero is true, the profile includes allocation records +// where r.AllocBytes > 0 but r.AllocBytes == r.FreeBytes. +// These are sites where memory was allocated, but it has all +// been released back to the runtime. +func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) diff --git a/src/pkg/runtime/malloc.cgo b/src/pkg/runtime/malloc.cgo index f832a0ecb..b9572b2a7 100644 --- a/src/pkg/runtime/malloc.cgo +++ b/src/pkg/runtime/malloc.cgo @@ -15,6 +15,8 @@ package runtime MHeap mheap; MStats mstats; +extern volatile int32 ·MemProfileRate; + // Same algorithm from chan.c, but a different // instance of the static uint32 x. // Not protected by a lock - let the threads use @@ -36,7 +38,7 @@ fastrand1(void) void* mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed, int32 skip_depth) { - int32 sizeclass; + int32 sizeclass, rate; MCache *c; uintptr npages; MSpan *s; @@ -91,19 +93,19 @@ mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed, int32 skip_dept m->mallocing = 0; - if(!(refflag & RefNoProfiling) && malloc_profile != MProf_None) { - switch(malloc_profile) { - case MProf_Sample: - if(m->mcache->next_sample > size) { - m->mcache->next_sample -= size; - break; - } - m->mcache->next_sample = fastrand1() & (256*1024 - 1); // sample every 128 kB allocated, on average - // fall through - case MProf_All: + if(!(refflag & RefNoProfiling) && (rate = ·MemProfileRate) > 0) { + if(size >= rate) + goto profile; + if(m->mcache->next_sample > size) + m->mcache->next_sample -= size; + else { + // pick next profile time + if(rate > 0x3fffffff) // make 2*rate not overflow + rate = 0x3fffffff; + m->mcache->next_sample = fastrand1() % (2*rate); + profile: *ref |= RefProfiled; MProf_Malloc(skip_depth+1, v, size); - break; } } diff --git a/src/pkg/runtime/mprof.cgo b/src/pkg/runtime/mprof.cgo index c59eb3721..50bcaec3c 100644 --- a/src/pkg/runtime/mprof.cgo +++ b/src/pkg/runtime/mprof.cgo @@ -11,8 +11,6 @@ package runtime #include "defs.h" #include "type.h" -int32 malloc_profile = MProf_None; // no sampling during bootstrap - // NOTE(rsc): Everything here could use cas if contention became an issue. static Lock proflock; @@ -58,7 +56,7 @@ stkbucket(uintptr *stk, int32 nstk) } h += h<<3; h ^= h>>11; - + i = h%BuckHashSize; for(b = buckhash[i]; b; b=b->next) if(b->hash == h && b->nstk == nstk && @@ -162,7 +160,7 @@ getaddrbucket(uintptr addr) AddrHash *ah; AddrEntry *e, **l; Bucket *b; - + h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits); for(ah=addrhash[h]; ah; ah=ah->next) if(ah->addr == (addr>>20)) @@ -191,6 +189,10 @@ MProf_Malloc(int32 skip, void *p, uintptr size) uintptr stk[32]; Bucket *b; + if(m->nomemprof > 0) + return; + + m->nomemprof++; nstk = callers(1+skip, stk, 32); lock(&proflock); b = stkbucket(stk, nstk); @@ -198,6 +200,7 @@ MProf_Malloc(int32 skip, void *p, uintptr size) b->alloc_bytes += size; setaddrbucket((uintptr)p, b); unlock(&proflock); + m->nomemprof--; } // Called when freeing a profiled block. @@ -206,6 +209,10 @@ MProf_Free(void *p, uintptr size) { Bucket *b; + if(m->nomemprof > 0) + return; + + m->nomemprof++; lock(&proflock); b = getaddrbucket((uintptr)p); if(b != nil) { @@ -213,13 +220,53 @@ MProf_Free(void *p, uintptr size) b->free_bytes += size; } unlock(&proflock); + m->nomemprof--; } // Go interface to profile data. (Declared in extern.go) // Assumes Go sizeof(int) == sizeof(int32) -func SetMemProfileKind(kind int32) { - malloc_profile = kind; +// Must match MemProfileRecord in extern.go. +typedef struct Record Record; +struct Record { + int64 alloc_bytes, free_bytes; + int64 alloc_objects, free_objects; + uintptr stk[32]; +}; + +// Write b's data to r. +static void +record(Record *r, Bucket *b) +{ + int32 i; + + r->alloc_bytes = b->alloc_bytes; + r->free_bytes = b->free_bytes; + r->alloc_objects = b->allocs; + r->free_objects = b->frees; + for(i=0; instk && istk); i++) + r->stk[i] = b->stk[i]; + for(; istk); i++) + r->stk[i] = 0; } +func MemProfile(p Slice, include_inuse_zero bool) (n int32, ok bool) { + Bucket *b; + Record *r; + + lock(&proflock); + n = 0; + for(b=buckets; b; b=b->allnext) + if(include_inuse_zero || b->alloc_bytes != b->free_bytes) + n++; + ok = false; + if(n <= p.len) { + ok = true; + r = (Record*)p.array; + for(b=buckets; b; b=b->allnext) + if(include_inuse_zero || b->alloc_bytes != b->free_bytes) + record(r++, b); + } + unlock(&proflock); +} diff --git a/src/pkg/runtime/pprof/Makefile b/src/pkg/runtime/pprof/Makefile new file mode 100644 index 000000000..daffde79d --- /dev/null +++ b/src/pkg/runtime/pprof/Makefile @@ -0,0 +1,11 @@ +# Copyright 2010 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.$(GOARCH) + +TARG=runtime/pprof +GOFILES=\ + pprof.go\ + +include ../../../Make.pkg diff --git a/src/pkg/runtime/pprof/pprof.go b/src/pkg/runtime/pprof/pprof.go new file mode 100644 index 000000000..143c3c65c --- /dev/null +++ b/src/pkg/runtime/pprof/pprof.go @@ -0,0 +1,73 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package pprof writes runtime profiling data in the format expected +// by the pprof visualization tool. +// For more information about pprof, see +// http://code.google.com/p/google-perftools/. +package pprof + +import ( + "bufio" + "fmt" + "io" + "os" + "runtime" +) + +// WriteHeapProfile writes a pprof-formatted heap profile to w. +// If a write to w returns an error, WriteHeapProfile returns that error. +// Otherwise, WriteHeapProfile returns nil. +func WriteHeapProfile(w io.Writer) os.Error { + // Find out how many records there are (MemProfile(nil, false)), + // allocate that many records, and get the data. + // There's a race—more records might be added between + // the two calls—so allocate a few extra records for safety + // and also try again if we're very unlucky. + // The loop should only execute one iteration in the common case. + var p []runtime.MemProfileRecord + n, ok := runtime.MemProfile(nil, false) + for { + // Allocate room for a slightly bigger profile, + // in case a few more entries have been added + // since the call to MemProfile. + p = make([]runtime.MemProfileRecord, n+50) + n, ok = runtime.MemProfile(p, false) + if ok { + p = p[0:n] + break + } + // Profile grew; try again. + } + + var total runtime.MemProfileRecord + for i := range p { + r := &p[i] + total.AllocBytes += r.AllocBytes + total.AllocObjects += r.AllocObjects + total.FreeBytes += r.FreeBytes + total.FreeObjects += r.FreeObjects + } + + // Technically the rate is MemProfileRate not 2*MemProfileRate, + // but early versions of the C++ heap profiler reported 2*MemProfileRate, + // so that's what pprof has come to expect. + b := bufio.NewWriter(w) + fmt.Fprintf(b, "heap profile: %d: %d [%d: %d] @ heap/%d\n", + total.InUseObjects(), total.InUseBytes(), + total.AllocObjects, total.AllocBytes, + 2*runtime.MemProfileRate) + + for i := range p { + r := &p[i] + fmt.Fprintf(b, "%d: %d [%d: %d] @", + r.InUseObjects(), r.InUseBytes(), + r.AllocObjects, r.AllocBytes) + for _, pc := range r.Stack() { + fmt.Fprintf(b, " %#x", pc) + } + fmt.Fprintf(b, "\n") + } + return b.Flush() +} diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c index c85e347e2..c4783d8cf 100644 --- a/src/pkg/runtime/proc.c +++ b/src/pkg/runtime/proc.c @@ -102,6 +102,7 @@ schedinit(void) byte *p; allm = m; + m->nomemprof++; mallocinit(); goargs(); @@ -118,6 +119,8 @@ schedinit(void) sched.mcpumax = sched.gomaxprocs; sched.mcount = 1; sched.predawn = 1; + + m->nomemprof--; } // Called after main·init_function; main·main will be called on return. diff --git a/src/pkg/runtime/runtime.c b/src/pkg/runtime/runtime.c index f4882d8bd..aa6d82506 100644 --- a/src/pkg/runtime/runtime.c +++ b/src/pkg/runtime/runtime.c @@ -210,7 +210,7 @@ void ·getgoroot(String out) { byte *p; - + p = getenv("GOROOT"); out = gostring(p); FLUSH(&out); @@ -475,7 +475,7 @@ nanotime(void) { int64 sec; int32 usec; - + sec = 0; usec = 0; gettime(&sec, &usec); @@ -507,3 +507,10 @@ void retn = callers(skip, (uintptr*)pc.array, pc.len); FLUSH(&retn); } + +void +·FuncForPC(uintptr pc, void *retf) +{ + retf = findfunc(pc); + FLUSH(&retf); +} diff --git a/src/pkg/runtime/runtime.h b/src/pkg/runtime/runtime.h index c04693899..cd6e227ec 100644 --- a/src/pkg/runtime/runtime.h +++ b/src/pkg/runtime/runtime.h @@ -212,6 +212,7 @@ struct M int32 mallocing; int32 gcing; int32 locks; + int32 nomemprof; int32 waitnextg; Note havenextg; G* nextg; @@ -259,20 +260,18 @@ enum SigQueue = 1<<3, }; -// (will be) shared with go; edit ../cmd/6g/sys.go too. -// should move out of sys.go eventually. -// also eventually, the loaded symbol table should -// be closer to this form. +// NOTE(rsc): keep in sync with extern.go:/type.Func. +// Eventually, the loaded symbol table should be closer to this form. struct Func { String name; String type; // go type string String src; // src file name - uint64 entry; // entry pc - int64 frame; // stack frame size Slice pcln; // pc/ln tab for this func - int64 pc0; // starting pc, ln for table + uintptr entry; // entry pc + uintptr pc0; // starting pc, ln for table int32 ln0; + int32 frame; // stack frame size int32 args; // number of 32-bit in/out args int32 locals; // number of 32-bit locals }; diff --git a/src/pkg/runtime/symtab.c b/src/pkg/runtime/symtab.c index 51d572595..9e69ab4bf 100644 --- a/src/pkg/runtime/symtab.c +++ b/src/pkg/runtime/symtab.c @@ -299,6 +299,7 @@ splitpcln(void) // Return actual file line number for targetpc in func f. // (Source file is f->src.) +// NOTE(rsc): If you edit this function, also edit extern.go:/FileLine int32 funcline(Func *f, uint64 targetpc) { @@ -333,6 +334,12 @@ buildfuncs(void) if(func != nil) return; + + // Memory profiling uses this code; + // can deadlock if the profiler ends + // up back here. + m->nomemprof++; + // count funcs, fnames nfunc = 0; nfname = 0; @@ -350,6 +357,8 @@ buildfuncs(void) // record src file and line info for each func walksymtab(dosrcline); + + m->nomemprof--; } Func* -- cgit v1.2.1