summaryrefslogtreecommitdiff
path: root/src/cmd/prof
diff options
context:
space:
mode:
authorRob Pike <r@golang.org>2010-01-11 16:53:45 -0800
committerRob Pike <r@golang.org>2010-01-11 16:53:45 -0800
commitdb0cb0f60e8fcc9a4258b4adf164e92fc31ca305 (patch)
treedbac2e0f773b0ba482bf0a06f14180542516aa3e /src/cmd/prof
parentc5965ceffb551ee87d26119ab52e659ae50563b7 (diff)
downloadgo-db0cb0f60e8fcc9a4258b4adf164e92fc31ca305.tar.gz
add pprof output format to prof.
amd64 linux only. R=rsc CC=golang-dev http://codereview.appspot.com/186077
Diffstat (limited to 'src/cmd/prof')
-rw-r--r--src/cmd/prof/doc.go9
-rw-r--r--src/cmd/prof/main.c290
2 files changed, 280 insertions, 19 deletions
diff --git a/src/cmd/prof/doc.go b/src/cmd/prof/doc.go
index 1493c641d..1f2209f04 100644
--- a/src/cmd/prof/doc.go
+++ b/src/cmd/prof/doc.go
@@ -19,10 +19,15 @@ statistics.
Usage: prof -p pid [-t total_secs] [-d delta_msec] [6.out args ...]
-The formats (default -h) are:
+The output modes (default -h) are:
+ -P file.prof:
+ Write the profile information to file.prof, in the format used by pprof.
+ At the moment, this only works on Linux amd64 binaries and requires that the
+ binary be written using 6l -e to produce ELF debug info.
+ See http://code.google.com/p/google-perftools for details.
-h: histograms
- How many times a sample occurred at each location
+ How many times a sample occurred at each location.
-f: dynamic functions
At each sample period, print the name of the executing function.
-l: dynamic file and line numbers
diff --git a/src/cmd/prof/main.c b/src/cmd/prof/main.c
index ed14cf697..9de563d90 100644
--- a/src/cmd/prof/main.c
+++ b/src/cmd/prof/main.c
@@ -20,7 +20,6 @@ char* file = "6.out";
static Fhdr fhdr;
int have_syms;
int fd;
-Map *symmap;
struct Ureg_amd64 ureg_amd64;
struct Ureg_x86 ureg_x86;
int total_sec = 0;
@@ -28,7 +27,17 @@ int delta_msec = 100;
int nsample;
int nsamplethread;
+// pprof data, stored as sequences of N followed by N PC values.
+// See http://code.google.com/p/google-perftools .
+uvlong *ppdata; // traces
+Biobuf* pproffd; // file descriptor to write trace info
+long ppstart; // start position of current trace
+long nppdata; // length of data
+long ppalloc; // size of allocated data
+char ppmapdata[10*1024]; // the map information for the output file
+
// output formats
+int pprof; // print pprof output to named file
int functions; // print functions
int histograms; // print histograms
int linenums; // print file and line numbers rather than function names
@@ -46,6 +55,7 @@ Usage(void)
{
fprint(2, "Usage: prof -p pid [-t total_secs] [-d delta_msec] [6.out args ...]\n");
fprint(2, "\tformats (default -h):\n");
+ fprint(2, "\t\t-c file.prof: write [c]pprof output to file.prof\n");
fprint(2, "\t\t-h: histograms\n");
fprint(2, "\t\t-f: dynamic functions\n");
fprint(2, "\t\t-l: dynamic file and line numbers\n");
@@ -79,6 +89,7 @@ struct Arch {
int (*getSP)(Map*);
uvlong (*uregPC)(void);
uvlong (*uregSP)(void);
+ void (*ppword)(uvlong w);
};
void
@@ -116,24 +127,39 @@ int
amd64_getregs(Map *map)
{
int i;
+ union {
+ uvlong regs[1];
+ struct Ureg_amd64 ureg;
+ } u;
for(i = 0; i < sizeof ureg_amd64; i+=8) {
- if(get8(map, (uvlong)i, &((uvlong*)&ureg_amd64)[i/4]) < 0)
- return -1;
+ if(get8(map, (uvlong)i, &u.regs[i/8]) < 0)
+ return -1;
}
+ ureg_amd64 = u.ureg;
return 0;
}
int
amd64_getPC(Map *map)
{
- return get8(map, offsetof(struct Ureg_amd64, ip), (uvlong*)&ureg_amd64.ip);
+ uvlong x;
+ int r;
+
+ r = get8(map, offsetof(struct Ureg_amd64, ip), &x);
+ ureg_amd64.ip = x;
+ return r;
}
int
amd64_getSP(Map *map)
{
- return get8(map, offsetof(struct Ureg_amd64, sp), (uvlong*)&ureg_amd64.sp);
+ uvlong x;
+ int r;
+
+ r = get8(map, offsetof(struct Ureg_amd64, sp), &x);
+ ureg_amd64.sp = x;
+ return r;
}
uvlong
@@ -148,6 +174,22 @@ amd64_uregSP(void) {
}
void
+amd64_ppword(uvlong w)
+{
+ uchar buf[8];
+
+ buf[0] = w;
+ buf[1] = w >> 8;
+ buf[2] = w >> 16;
+ buf[3] = w >> 24;
+ buf[4] = w >> 32;
+ buf[5] = w >> 40;
+ buf[6] = w >> 48;
+ buf[7] = w >> 56;
+ Bwrite(pproffd, buf, 8);
+}
+
+void
x86_regprint(void)
{
fprint(2, "ax\t0x%llux\n", ureg_x86.ax);
@@ -175,7 +217,7 @@ x86_getregs(Map *map)
for(i = 0; i < sizeof ureg_x86; i+=4) {
if(get4(map, (uvlong)i, &((uint32*)&ureg_x86)[i/4]) < 0)
- return -1;
+ return -1;
}
return 0;
}
@@ -204,6 +246,18 @@ x86_uregSP(void)
return (uvlong)ureg_x86.sp;
}
+void
+x86_ppword(uvlong w)
+{
+ uchar buf[4];
+
+ buf[0] = w;
+ buf[1] = w >> 8;
+ buf[2] = w >> 16;
+ buf[3] = w >> 24;
+ Bwrite(pproffd, buf, 4);
+}
+
Arch archtab[] = {
{
"amd64",
@@ -213,6 +267,7 @@ Arch archtab[] = {
amd64_getSP,
amd64_uregPC,
amd64_uregSP,
+ amd64_ppword,
},
{
"386",
@@ -222,6 +277,7 @@ Arch archtab[] = {
x86_getSP,
x86_uregPC,
x86_uregSP,
+ x86_ppword,
},
{
nil
@@ -345,6 +401,36 @@ addtohistogram(uvlong pc, uvlong callerpc, uvlong sp)
counters[h] = x;
}
+void
+addppword(uvlong pc)
+{
+ if(pc == 0) {
+ return;
+ }
+ if(nppdata == ppalloc) {
+ ppalloc = (1000+nppdata)*2;
+ ppdata = realloc(ppdata, ppalloc * sizeof ppdata[0]);
+ if(ppdata == nil) {
+ fprint(2, "prof: realloc failed: %r\n");
+ exit(2);
+ }
+ }
+ ppdata[nppdata++] = pc;
+}
+
+void
+startpptrace()
+{
+ ppstart = nppdata;
+ addppword(~0);
+}
+
+void
+endpptrace()
+{
+ ppdata[ppstart] = nppdata-ppstart-1;
+}
+
uvlong nextpc;
void
@@ -357,17 +443,22 @@ xptrace(Map *map, uvlong pc, uvlong sp, Symbol *sym)
}
if(histograms)
addtohistogram(nextpc, pc, sp);
- if(!histograms || stacks > 1) {
+ if(!histograms || stacks > 1 || pprof) {
if(nextpc == 0)
nextpc = sym->value;
- fprint(2, "%s(", sym->name);
- fprint(2, ")");
- if(nextpc != sym->value)
- fprint(2, "+%#llux ", nextpc - sym->value);
- if(have_syms && linenums && fileline(buf, sizeof buf, pc)) {
- fprint(2, " %s", buf);
+ if(stacks){
+ fprint(2, "%s(", sym->name);
+ fprint(2, ")");
+ if(nextpc != sym->value)
+ fprint(2, "+%#llux ", nextpc - sym->value);
+ if(have_syms && linenums && fileline(buf, sizeof buf, pc)) {
+ fprint(2, " %s", buf);
+ }
+ fprint(2, "\n");
+ }
+ if (pprof) {
+ addppword(nextpc);
}
- fprint(2, "\n");
}
nextpc = pc;
}
@@ -376,15 +467,21 @@ void
stacktracepcsp(Map *map, uvlong pc, uvlong sp)
{
nextpc = pc;
+ if(pprof){
+ startpptrace();
+ }
if(machdata->ctrace==nil)
fprint(2, "no machdata->ctrace\n");
else if(machdata->ctrace(map, pc, sp, 0, xptrace) <= 0)
fprint(2, "no stack frame: pc=%#p sp=%#p\n", pc, sp);
else {
addtohistogram(nextpc, 0, sp);
- if(!histograms || stacks > 1)
+ if(stacks)
fprint(2, "\n");
}
+ if(pprof){
+ endpptrace();
+ }
}
void
@@ -399,7 +496,7 @@ printpc(Map *map, uvlong pc, uvlong sp)
symoff(buf, sizeof(buf), pc, CANY);
fprint(2, "%s\n", buf);
}
- if(stacks){
+ if(stacks || pprof){
stacktracepcsp(map, pc, sp);
}
else if(histograms){
@@ -408,13 +505,55 @@ printpc(Map *map, uvlong pc, uvlong sp)
}
void
+ppmaps(void)
+{
+ int fd, n;
+ char tmp[100];
+ Seg *seg;
+
+ // If it's Linux, the info is in /proc/$pid/maps
+ snprint(tmp, sizeof tmp, "/proc/%d/maps", pid);
+ fd = open(tmp, 0);
+ if(fd >= 0) {
+ n = read(fd, ppmapdata, sizeof ppmapdata - 1);
+ close(fd);
+ if(n < 0) {
+ fprint(2, "prof: can't read %s: %r\n", tmp);
+ exit(2);
+ }
+ ppmapdata[n] = 0;
+ return;
+ }
+
+ // It's probably a mac. Synthesize an entry for the text file.
+ // The register segment may come first but it has a zero offset, so grab the first non-zero offset segment.
+ for(n = 0; n < 3; n++){
+ seg = &map[0]->seg[n];
+ if(seg->b == 0) {
+ continue;
+ }
+ snprint(ppmapdata, sizeof ppmapdata,
+ "%.16x-%.16x r-xp %d 00:00 34968549 %s\n",
+ seg->b, seg->e, seg->f, "/home/r/6.out"
+ );
+ return;
+ }
+ fprint(2, "prof: no text segment in maps for %s\n", file);
+ exit(2);
+}
+
+void
samples(void)
{
int i, pid, msec;
struct timespec req;
+ int getmaps;
req.tv_sec = delta_msec/1000;
req.tv_nsec = 1000000*(delta_msec % 1000);
+ getmaps = 0;
+ if(pprof)
+ getmaps= 1;
for(msec = 0; total_sec <= 0 || msec < 1000*total_sec; msec += delta_msec) {
nsample++;
nsamplethread += nthread;
@@ -433,6 +572,10 @@ samples(void)
getthreads();
if(nthread == 0)
break;
+ if(getmaps) {
+ getmaps = 0;
+ ppmaps();
+ }
}
}
@@ -534,6 +677,106 @@ dumphistogram()
}
}
+typedef struct Trace Trace;
+struct Trace {
+ int count;
+ int npc;
+ uvlong *pc;
+ Trace *next;
+};
+
+void
+dumppprof()
+{
+ uvlong i, n, *p, *e;
+ int ntrace;
+ Trace *trace, *tp, *up, *prev;
+
+ if(!pprof)
+ return;
+ e = ppdata + nppdata;
+ // Create list of traces. First, count the traces
+ ntrace = 0;
+ for(p = ppdata; p < e;) {
+ n = *p++;
+ p += n;
+ if(n == 0)
+ continue;
+ ntrace++;
+ }
+ print("%d traces\n", ntrace);
+ if(ntrace <= 0)
+ return;
+ // Allocate and link the traces together.
+ trace = malloc(ntrace * sizeof(Trace));
+ tp = trace;
+ for(p = ppdata; p < e;) {
+ n = *p++;
+ if(n == 0)
+ continue;
+ tp->count = 1;
+ tp->npc = n;
+ tp->pc = p;
+ tp->next = tp+1;
+ tp++;
+ p += n;
+ }
+ trace[ntrace-1].next = nil;
+if(0)
+ for(tp = trace; tp != nil; tp = tp->next) {
+ print("%d: ", tp->npc);
+ for(i = 0; i < tp->npc; i++) {
+ print("%llx ", tp->pc[i]);
+ }
+ print("\n");
+ }
+ // Eliminate duplicates. Lousy algorithm, although not as bad as it looks because
+ // the list collapses fast.
+ for(tp = trace; tp != nil; tp = tp->next) {
+ prev = tp;
+ for(up = tp->next; up != nil; up = up->next) {
+ if(up->npc == tp->npc && memcmp(up->pc, tp->pc, up->npc*sizeof up->pc[0]) == 0) {
+ tp->count++;
+ prev->next = up->next;
+ } else {
+ prev = up;
+ }
+ }
+ }
+ for(tp = trace; tp != nil; tp = tp->next) {
+ print("[%d] %d: ", tp->count, tp->npc);
+ for(i = 0; i < tp->npc; i++) {
+ print("%llx ", tp->pc[i]);
+ }
+ print("\n");
+ }
+ // Write file.
+ // See http://code.google.com/p/google-perftools/source/browse/trunk/doc/cpuprofile-fileformat.html
+ // BUG: assumes little endian.
+ // 1) Header
+ arch->ppword(0); // must be zero
+ arch->ppword(3); // 3 words follow in header
+ arch->ppword(0); // must be zero
+ arch->ppword(delta_msec * 1000); // sampling period in microseconds
+ arch->ppword(0); // must be zero (padding)
+ // 2) One record for each trace.
+ for(tp = trace; tp != nil; tp = tp->next) {
+ arch->ppword(tp->count);
+ arch->ppword(tp->npc);
+ for(i = 0; i < tp->npc; i++) {
+ arch->ppword(tp->pc[i]);
+ }
+ }
+ // 3) Binary trailer
+ arch->ppword(0); // must be zero
+ arch->ppword(1); // must be one
+ arch->ppword(0); // must be zero
+ // 4) Mapped objects.
+ Bwrite(pproffd, ppmapdata, strlen(ppmapdata));
+ // 5) That's it.
+ Bterm(pproffd);
+}
+
int
startprocess(char **argv)
{
@@ -574,8 +817,18 @@ int
main(int argc, char *argv[])
{
int i;
+ char *ppfile;
ARGBEGIN{
+ case 'P':
+ pprof =1;
+ ppfile = EARGF(Usage());
+ pproffd = Bopen(ppfile, OWRITE);
+ if(pproffd == nil) {
+ fprint(2, "prof: cannot open %s: %r\n", ppfile);
+ exit(2);
+ }
+ break;
case 'd':
delta_msec = atoi(EARGF(Usage()));
break;
@@ -600,10 +853,12 @@ main(int argc, char *argv[])
case 's':
stacks++;
break;
+ default:
+ Usage();
}ARGEND
if(pid <= 0 && argc == 0)
Usage();
- if(functions+linenums+registers+stacks == 0)
+ if(functions+linenums+registers+stacks+pprof == 0)
histograms = 1;
if(!machbyname("amd64")) {
fprint(2, "prof: no amd64 support\n", pid);
@@ -651,5 +906,6 @@ main(int argc, char *argv[])
samples();
detach();
dumphistogram();
+ dumppprof();
exit(0);
}