diff options
author | Rob Pike <r@golang.org> | 2010-01-11 16:53:45 -0800 |
---|---|---|
committer | Rob Pike <r@golang.org> | 2010-01-11 16:53:45 -0800 |
commit | db0cb0f60e8fcc9a4258b4adf164e92fc31ca305 (patch) | |
tree | dbac2e0f773b0ba482bf0a06f14180542516aa3e /src/cmd/prof | |
parent | c5965ceffb551ee87d26119ab52e659ae50563b7 (diff) | |
download | go-db0cb0f60e8fcc9a4258b4adf164e92fc31ca305.tar.gz |
add pprof output format to prof.
amd64 linux only.
R=rsc
CC=golang-dev
http://codereview.appspot.com/186077
Diffstat (limited to 'src/cmd/prof')
-rw-r--r-- | src/cmd/prof/doc.go | 9 | ||||
-rw-r--r-- | src/cmd/prof/main.c | 290 |
2 files changed, 280 insertions, 19 deletions
diff --git a/src/cmd/prof/doc.go b/src/cmd/prof/doc.go index 1493c641d..1f2209f04 100644 --- a/src/cmd/prof/doc.go +++ b/src/cmd/prof/doc.go @@ -19,10 +19,15 @@ statistics. Usage: prof -p pid [-t total_secs] [-d delta_msec] [6.out args ...] -The formats (default -h) are: +The output modes (default -h) are: + -P file.prof: + Write the profile information to file.prof, in the format used by pprof. + At the moment, this only works on Linux amd64 binaries and requires that the + binary be written using 6l -e to produce ELF debug info. + See http://code.google.com/p/google-perftools for details. -h: histograms - How many times a sample occurred at each location + How many times a sample occurred at each location. -f: dynamic functions At each sample period, print the name of the executing function. -l: dynamic file and line numbers diff --git a/src/cmd/prof/main.c b/src/cmd/prof/main.c index ed14cf697..9de563d90 100644 --- a/src/cmd/prof/main.c +++ b/src/cmd/prof/main.c @@ -20,7 +20,6 @@ char* file = "6.out"; static Fhdr fhdr; int have_syms; int fd; -Map *symmap; struct Ureg_amd64 ureg_amd64; struct Ureg_x86 ureg_x86; int total_sec = 0; @@ -28,7 +27,17 @@ int delta_msec = 100; int nsample; int nsamplethread; +// pprof data, stored as sequences of N followed by N PC values. +// See http://code.google.com/p/google-perftools . +uvlong *ppdata; // traces +Biobuf* pproffd; // file descriptor to write trace info +long ppstart; // start position of current trace +long nppdata; // length of data +long ppalloc; // size of allocated data +char ppmapdata[10*1024]; // the map information for the output file + // output formats +int pprof; // print pprof output to named file int functions; // print functions int histograms; // print histograms int linenums; // print file and line numbers rather than function names @@ -46,6 +55,7 @@ Usage(void) { fprint(2, "Usage: prof -p pid [-t total_secs] [-d delta_msec] [6.out args ...]\n"); fprint(2, "\tformats (default -h):\n"); + fprint(2, "\t\t-c file.prof: write [c]pprof output to file.prof\n"); fprint(2, "\t\t-h: histograms\n"); fprint(2, "\t\t-f: dynamic functions\n"); fprint(2, "\t\t-l: dynamic file and line numbers\n"); @@ -79,6 +89,7 @@ struct Arch { int (*getSP)(Map*); uvlong (*uregPC)(void); uvlong (*uregSP)(void); + void (*ppword)(uvlong w); }; void @@ -116,24 +127,39 @@ int amd64_getregs(Map *map) { int i; + union { + uvlong regs[1]; + struct Ureg_amd64 ureg; + } u; for(i = 0; i < sizeof ureg_amd64; i+=8) { - if(get8(map, (uvlong)i, &((uvlong*)&ureg_amd64)[i/4]) < 0) - return -1; + if(get8(map, (uvlong)i, &u.regs[i/8]) < 0) + return -1; } + ureg_amd64 = u.ureg; return 0; } int amd64_getPC(Map *map) { - return get8(map, offsetof(struct Ureg_amd64, ip), (uvlong*)&ureg_amd64.ip); + uvlong x; + int r; + + r = get8(map, offsetof(struct Ureg_amd64, ip), &x); + ureg_amd64.ip = x; + return r; } int amd64_getSP(Map *map) { - return get8(map, offsetof(struct Ureg_amd64, sp), (uvlong*)&ureg_amd64.sp); + uvlong x; + int r; + + r = get8(map, offsetof(struct Ureg_amd64, sp), &x); + ureg_amd64.sp = x; + return r; } uvlong @@ -148,6 +174,22 @@ amd64_uregSP(void) { } void +amd64_ppword(uvlong w) +{ + uchar buf[8]; + + buf[0] = w; + buf[1] = w >> 8; + buf[2] = w >> 16; + buf[3] = w >> 24; + buf[4] = w >> 32; + buf[5] = w >> 40; + buf[6] = w >> 48; + buf[7] = w >> 56; + Bwrite(pproffd, buf, 8); +} + +void x86_regprint(void) { fprint(2, "ax\t0x%llux\n", ureg_x86.ax); @@ -175,7 +217,7 @@ x86_getregs(Map *map) for(i = 0; i < sizeof ureg_x86; i+=4) { if(get4(map, (uvlong)i, &((uint32*)&ureg_x86)[i/4]) < 0) - return -1; + return -1; } return 0; } @@ -204,6 +246,18 @@ x86_uregSP(void) return (uvlong)ureg_x86.sp; } +void +x86_ppword(uvlong w) +{ + uchar buf[4]; + + buf[0] = w; + buf[1] = w >> 8; + buf[2] = w >> 16; + buf[3] = w >> 24; + Bwrite(pproffd, buf, 4); +} + Arch archtab[] = { { "amd64", @@ -213,6 +267,7 @@ Arch archtab[] = { amd64_getSP, amd64_uregPC, amd64_uregSP, + amd64_ppword, }, { "386", @@ -222,6 +277,7 @@ Arch archtab[] = { x86_getSP, x86_uregPC, x86_uregSP, + x86_ppword, }, { nil @@ -345,6 +401,36 @@ addtohistogram(uvlong pc, uvlong callerpc, uvlong sp) counters[h] = x; } +void +addppword(uvlong pc) +{ + if(pc == 0) { + return; + } + if(nppdata == ppalloc) { + ppalloc = (1000+nppdata)*2; + ppdata = realloc(ppdata, ppalloc * sizeof ppdata[0]); + if(ppdata == nil) { + fprint(2, "prof: realloc failed: %r\n"); + exit(2); + } + } + ppdata[nppdata++] = pc; +} + +void +startpptrace() +{ + ppstart = nppdata; + addppword(~0); +} + +void +endpptrace() +{ + ppdata[ppstart] = nppdata-ppstart-1; +} + uvlong nextpc; void @@ -357,17 +443,22 @@ xptrace(Map *map, uvlong pc, uvlong sp, Symbol *sym) } if(histograms) addtohistogram(nextpc, pc, sp); - if(!histograms || stacks > 1) { + if(!histograms || stacks > 1 || pprof) { if(nextpc == 0) nextpc = sym->value; - fprint(2, "%s(", sym->name); - fprint(2, ")"); - if(nextpc != sym->value) - fprint(2, "+%#llux ", nextpc - sym->value); - if(have_syms && linenums && fileline(buf, sizeof buf, pc)) { - fprint(2, " %s", buf); + if(stacks){ + fprint(2, "%s(", sym->name); + fprint(2, ")"); + if(nextpc != sym->value) + fprint(2, "+%#llux ", nextpc - sym->value); + if(have_syms && linenums && fileline(buf, sizeof buf, pc)) { + fprint(2, " %s", buf); + } + fprint(2, "\n"); + } + if (pprof) { + addppword(nextpc); } - fprint(2, "\n"); } nextpc = pc; } @@ -376,15 +467,21 @@ void stacktracepcsp(Map *map, uvlong pc, uvlong sp) { nextpc = pc; + if(pprof){ + startpptrace(); + } if(machdata->ctrace==nil) fprint(2, "no machdata->ctrace\n"); else if(machdata->ctrace(map, pc, sp, 0, xptrace) <= 0) fprint(2, "no stack frame: pc=%#p sp=%#p\n", pc, sp); else { addtohistogram(nextpc, 0, sp); - if(!histograms || stacks > 1) + if(stacks) fprint(2, "\n"); } + if(pprof){ + endpptrace(); + } } void @@ -399,7 +496,7 @@ printpc(Map *map, uvlong pc, uvlong sp) symoff(buf, sizeof(buf), pc, CANY); fprint(2, "%s\n", buf); } - if(stacks){ + if(stacks || pprof){ stacktracepcsp(map, pc, sp); } else if(histograms){ @@ -408,13 +505,55 @@ printpc(Map *map, uvlong pc, uvlong sp) } void +ppmaps(void) +{ + int fd, n; + char tmp[100]; + Seg *seg; + + // If it's Linux, the info is in /proc/$pid/maps + snprint(tmp, sizeof tmp, "/proc/%d/maps", pid); + fd = open(tmp, 0); + if(fd >= 0) { + n = read(fd, ppmapdata, sizeof ppmapdata - 1); + close(fd); + if(n < 0) { + fprint(2, "prof: can't read %s: %r\n", tmp); + exit(2); + } + ppmapdata[n] = 0; + return; + } + + // It's probably a mac. Synthesize an entry for the text file. + // The register segment may come first but it has a zero offset, so grab the first non-zero offset segment. + for(n = 0; n < 3; n++){ + seg = &map[0]->seg[n]; + if(seg->b == 0) { + continue; + } + snprint(ppmapdata, sizeof ppmapdata, + "%.16x-%.16x r-xp %d 00:00 34968549 %s\n", + seg->b, seg->e, seg->f, "/home/r/6.out" + ); + return; + } + fprint(2, "prof: no text segment in maps for %s\n", file); + exit(2); +} + +void samples(void) { int i, pid, msec; struct timespec req; + int getmaps; req.tv_sec = delta_msec/1000; req.tv_nsec = 1000000*(delta_msec % 1000); + getmaps = 0; + if(pprof) + getmaps= 1; for(msec = 0; total_sec <= 0 || msec < 1000*total_sec; msec += delta_msec) { nsample++; nsamplethread += nthread; @@ -433,6 +572,10 @@ samples(void) getthreads(); if(nthread == 0) break; + if(getmaps) { + getmaps = 0; + ppmaps(); + } } } @@ -534,6 +677,106 @@ dumphistogram() } } +typedef struct Trace Trace; +struct Trace { + int count; + int npc; + uvlong *pc; + Trace *next; +}; + +void +dumppprof() +{ + uvlong i, n, *p, *e; + int ntrace; + Trace *trace, *tp, *up, *prev; + + if(!pprof) + return; + e = ppdata + nppdata; + // Create list of traces. First, count the traces + ntrace = 0; + for(p = ppdata; p < e;) { + n = *p++; + p += n; + if(n == 0) + continue; + ntrace++; + } + print("%d traces\n", ntrace); + if(ntrace <= 0) + return; + // Allocate and link the traces together. + trace = malloc(ntrace * sizeof(Trace)); + tp = trace; + for(p = ppdata; p < e;) { + n = *p++; + if(n == 0) + continue; + tp->count = 1; + tp->npc = n; + tp->pc = p; + tp->next = tp+1; + tp++; + p += n; + } + trace[ntrace-1].next = nil; +if(0) + for(tp = trace; tp != nil; tp = tp->next) { + print("%d: ", tp->npc); + for(i = 0; i < tp->npc; i++) { + print("%llx ", tp->pc[i]); + } + print("\n"); + } + // Eliminate duplicates. Lousy algorithm, although not as bad as it looks because + // the list collapses fast. + for(tp = trace; tp != nil; tp = tp->next) { + prev = tp; + for(up = tp->next; up != nil; up = up->next) { + if(up->npc == tp->npc && memcmp(up->pc, tp->pc, up->npc*sizeof up->pc[0]) == 0) { + tp->count++; + prev->next = up->next; + } else { + prev = up; + } + } + } + for(tp = trace; tp != nil; tp = tp->next) { + print("[%d] %d: ", tp->count, tp->npc); + for(i = 0; i < tp->npc; i++) { + print("%llx ", tp->pc[i]); + } + print("\n"); + } + // Write file. + // See http://code.google.com/p/google-perftools/source/browse/trunk/doc/cpuprofile-fileformat.html + // BUG: assumes little endian. + // 1) Header + arch->ppword(0); // must be zero + arch->ppword(3); // 3 words follow in header + arch->ppword(0); // must be zero + arch->ppword(delta_msec * 1000); // sampling period in microseconds + arch->ppword(0); // must be zero (padding) + // 2) One record for each trace. + for(tp = trace; tp != nil; tp = tp->next) { + arch->ppword(tp->count); + arch->ppword(tp->npc); + for(i = 0; i < tp->npc; i++) { + arch->ppword(tp->pc[i]); + } + } + // 3) Binary trailer + arch->ppword(0); // must be zero + arch->ppword(1); // must be one + arch->ppword(0); // must be zero + // 4) Mapped objects. + Bwrite(pproffd, ppmapdata, strlen(ppmapdata)); + // 5) That's it. + Bterm(pproffd); +} + int startprocess(char **argv) { @@ -574,8 +817,18 @@ int main(int argc, char *argv[]) { int i; + char *ppfile; ARGBEGIN{ + case 'P': + pprof =1; + ppfile = EARGF(Usage()); + pproffd = Bopen(ppfile, OWRITE); + if(pproffd == nil) { + fprint(2, "prof: cannot open %s: %r\n", ppfile); + exit(2); + } + break; case 'd': delta_msec = atoi(EARGF(Usage())); break; @@ -600,10 +853,12 @@ main(int argc, char *argv[]) case 's': stacks++; break; + default: + Usage(); }ARGEND if(pid <= 0 && argc == 0) Usage(); - if(functions+linenums+registers+stacks == 0) + if(functions+linenums+registers+stacks+pprof == 0) histograms = 1; if(!machbyname("amd64")) { fprint(2, "prof: no amd64 support\n", pid); @@ -651,5 +906,6 @@ main(int argc, char *argv[]) samples(); detach(); dumphistogram(); + dumppprof(); exit(0); } |