diff options
-rw-r--r-- | common.c | 12 | ||||
-rw-r--r-- | common.h | 19 | ||||
-rw-r--r-- | cube-shadertoy.c | 4 | ||||
-rw-r--r-- | drm-atomic.c | 5 | ||||
-rw-r--r-- | drm-legacy.c | 5 | ||||
-rw-r--r-- | kmscube.c | 20 | ||||
-rw-r--r-- | meson.build | 2 | ||||
-rw-r--r-- | perfcntrs.c | 477 |
8 files changed, 542 insertions, 2 deletions
@@ -274,6 +274,18 @@ int init_egl(struct egl *egl, const struct gbm *gbm, int samples) get_proc_gl(GL_OES_EGL_image, glEGLImageTargetTexture2DOES); + get_proc_gl(GL_AMD_performance_monitor, glGetPerfMonitorGroupsAMD); + get_proc_gl(GL_AMD_performance_monitor, glGetPerfMonitorCountersAMD); + get_proc_gl(GL_AMD_performance_monitor, glGetPerfMonitorGroupStringAMD); + get_proc_gl(GL_AMD_performance_monitor, glGetPerfMonitorCounterStringAMD); + get_proc_gl(GL_AMD_performance_monitor, glGetPerfMonitorCounterInfoAMD); + get_proc_gl(GL_AMD_performance_monitor, glGenPerfMonitorsAMD); + get_proc_gl(GL_AMD_performance_monitor, glDeletePerfMonitorsAMD); + get_proc_gl(GL_AMD_performance_monitor, glSelectPerfMonitorCountersAMD); + get_proc_gl(GL_AMD_performance_monitor, glBeginPerfMonitorAMD); + get_proc_gl(GL_AMD_performance_monitor, glEndPerfMonitorAMD); + get_proc_gl(GL_AMD_performance_monitor, glGetPerfMonitorCounterDataAMD); + return 0; } @@ -122,6 +122,19 @@ struct egl { PFNEGLCLIENTWAITSYNCKHRPROC eglClientWaitSyncKHR; PFNEGLDUPNATIVEFENCEFDANDROIDPROC eglDupNativeFenceFDANDROID; + /* AMD_performance_monitor */ + PFNGLGETPERFMONITORGROUPSAMDPROC glGetPerfMonitorGroupsAMD; + PFNGLGETPERFMONITORCOUNTERSAMDPROC glGetPerfMonitorCountersAMD; + PFNGLGETPERFMONITORGROUPSTRINGAMDPROC glGetPerfMonitorGroupStringAMD; + PFNGLGETPERFMONITORCOUNTERSTRINGAMDPROC glGetPerfMonitorCounterStringAMD; + PFNGLGETPERFMONITORCOUNTERINFOAMDPROC glGetPerfMonitorCounterInfoAMD; + PFNGLGENPERFMONITORSAMDPROC glGenPerfMonitorsAMD; + PFNGLDELETEPERFMONITORSAMDPROC glDeletePerfMonitorsAMD; + PFNGLSELECTPERFMONITORCOUNTERSAMDPROC glSelectPerfMonitorCountersAMD; + PFNGLBEGINPERFMONITORAMDPROC glBeginPerfMonitorAMD; + PFNGLENDPERFMONITORAMDPROC glEndPerfMonitorAMD; + PFNGLGETPERFMONITORCOUNTERDATAAMDPROC glGetPerfMonitorCounterDataAMD; + bool modifiers_supported; void (*draw)(unsigned i); @@ -174,6 +187,12 @@ init_cube_video(const struct gbm *gbm, const char *video, int samples) } #endif +void init_perfcntrs(const struct egl *egl, const char *perfcntrs); +void start_perfcntrs(void); +void end_perfcntrs(void); +void finish_perfcntrs(void); +void dump_perfcntrs(unsigned nframes, uint64_t elapsed_time_ns); + #define NSEC_PER_SEC (INT64_C(1000) * USEC_PER_SEC) #define USEC_PER_SEC (INT64_C(1000) * MSEC_PER_SEC) #define MSEC_PER_SEC INT64_C(1000) diff --git a/cube-shadertoy.c b/cube-shadertoy.c index 7642285..7b148da 100644 --- a/cube-shadertoy.c +++ b/cube-shadertoy.c @@ -313,8 +313,12 @@ static void draw_shadertoy(unsigned i) glDrawBuffers(1, mrt_bufs); + start_perfcntrs(); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + end_perfcntrs(); + glDisableVertexAttribArray(0); /* switch back to back buffer: */ diff --git a/drm-atomic.c b/drm-atomic.c index 13564d7..1fc7586 100644 --- a/drm-atomic.c +++ b/drm-atomic.c @@ -308,12 +308,17 @@ static int atomic_run(const struct gbm *gbm, const struct egl *egl) flags &= ~(DRM_MODE_ATOMIC_ALLOW_MODESET); } + finish_perfcntrs(); + + cur_time = get_time_ns(); double elapsed_time = cur_time - start_time; double secs = elapsed_time / (double)NSEC_PER_SEC; unsigned frames = i - 1; /* first frame ignored */ printf("Rendered %u frames in %f sec (%f fps)\n", frames, secs, (double)frames/secs); + dump_perfcntrs(frames, elapsed_time); + return ret; } diff --git a/drm-legacy.c b/drm-legacy.c index aeaba50..6c35904 100644 --- a/drm-legacy.c +++ b/drm-legacy.c @@ -139,12 +139,17 @@ static int legacy_run(const struct gbm *gbm, const struct egl *egl) bo = next_bo; } + finish_perfcntrs(); + + cur_time = get_time_ns(); double elapsed_time = cur_time - start_time; double secs = elapsed_time / (double)NSEC_PER_SEC; unsigned frames = i - 1; /* first frame ignored */ printf("Rendered %u frames in %f sec (%f fps)\n", frames, secs, (double)frames/secs); + dump_perfcntrs(frames, elapsed_time); + return 0; } @@ -41,7 +41,7 @@ static const struct egl *egl; static const struct gbm *gbm; static const struct drm *drm; -static const char *shortopts = "Ac:D:f:M:m:S:s:V:v:"; +static const char *shortopts = "Ac:D:f:M:m:p:S:s:V:v:"; static const struct option longopts[] = { {"atomic", no_argument, 0, 'A'}, @@ -50,6 +50,7 @@ static const struct option longopts[] = { {"format", required_argument, 0, 'f'}, {"mode", required_argument, 0, 'M'}, {"modifier", required_argument, 0, 'm'}, + {"perfcntr", required_argument, 0, 'p'}, {"samples", required_argument, 0, 's'}, {"video", required_argument, 0, 'V'}, {"vmode", required_argument, 0, 'v'}, @@ -71,9 +72,12 @@ static void usage(const char *name) " nv12-2img - yuv textured (color conversion in shader)\n" " nv12-1img - yuv textured (single nv12 texture)\n" " -m, --modifier=MODIFIER hardcode the selected modifier\n" + " -p, --perfcntr=LIST sample specified performance counters using\n" + " the AMD_performance_monitor extension (comma\n" + " separated list, shadertoy mode only)\n" " -S, --shadertoy=FILE use specified shadertoy shader\n" " -s, --samples=N use MSAA\n" - " -V, --video=FILE video textured cube\n" + " -V, --video=FILE video textured cube (comma separated list)\n" " -v, --vmode=VMODE specify the video mode in the format\n" " <mode>[-<vrefresh>]\n", name); @@ -84,6 +88,7 @@ int main(int argc, char *argv[]) const char *device = NULL; const char *video = NULL; const char *shadertoy = NULL; + const char *perfcntr = NULL; char mode_str[DRM_DISPLAY_MODE_LEN] = ""; char *p; enum mode mode = SMOOTH; @@ -145,6 +150,9 @@ int main(int argc, char *argv[]) case 'm': modifier = strtoull(optarg, NULL, 0); break; + case 'p': + perfcntr = optarg; + break; case 'S': mode = SHADERTOY; shadertoy = optarg; @@ -205,6 +213,14 @@ int main(int argc, char *argv[]) return -1; } + if (perfcntr) { + if (mode != SHADERTOY) { + printf("performance counters only supported in shadertoy mode\n"); + return -1; + } + init_perfcntrs(egl, perfcntr); + } + /* clear the color buffer */ glClearColor(0.5, 0.5, 0.5, 1.0); glClear(GL_COLOR_BUFFER_BIT); diff --git a/meson.build b/meson.build index 5d7df61..b8131db 100644 --- a/meson.build +++ b/meson.build @@ -45,6 +45,7 @@ sources = files( 'frame-512x512-NV12.c', 'frame-512x512-RGBA.c', 'kmscube.c', + 'perfcntrs.c', ) cc = meson.get_compiler('c') @@ -99,5 +100,6 @@ executable('texturator', files( 'common.c', 'drm-legacy.c', 'drm-common.c', + 'perfcntrs.c', # not used, but required to link 'texturator.c', ), dependencies : dep_common, install : true) diff --git a/perfcntrs.c b/perfcntrs.c new file mode 100644 index 0000000..555a1b1 --- /dev/null +++ b/perfcntrs.c @@ -0,0 +1,477 @@ +/* + * Copyright © 2020 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#define _GNU_SOURCE + +#include <assert.h> +#include <err.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <GLES3/gl3.h> + +#include "common.h" + +/* Module to collect a specified set of performance counts, and accumulate + * results, using the GL_AMD_performance_monitor extension. + * + * Call start_perfcntrs() before the draw(s) to measure, and end_perfcntrs() + * after the last draw to measure. This can be done multiple times, with + * the results accumulated. + */ + +/** + * Accumulated counter result: + */ +union counter_result { + uint32_t u32; /* GL_UNSIGNED_INT */ + float f; /* GL_FLOAT, GL_PERCENTAGE_AMD */ + uint64_t u64; /* GL_UNSIGNED_INT64_AMD */ +}; + +/** + * Tracking for a requested counter + */ +struct counter { + union counter_result result; + /* index into perfcntrs.groups[gidx].counters[cidx] + * Note that the group_idx/counter_idx is not necessarily the + * same as the group_id/counter_id. + */ + unsigned gidx; + unsigned cidx; +}; + +/** + * Description of gl counter groups and counters: + */ + +struct gl_counter { + char *name; + GLuint counter_id; + GLuint counter_type; + struct counter *counter; /* NULL if this is not a counter we track */ +}; + +struct gl_counter_group { + char *name; + GLuint group_id; + GLint max_active_counters; + GLint num_counters; + struct gl_counter *counters; + + /* number of counters in this group which are enabled: */ + int num_enabled_counters; +}; + +struct gl_monitor { + GLuint id; + bool valid; + bool active; +}; + +/** + * module state + */ +static struct { + const struct egl *egl; + + /* The extension doesn't let us pause/resume a single counter, so + * instead use a sequence of monitors, one per start_perfcntrs()/ + * end_perfcntrs() pair, so that we don't need to immediately read + * back a result, which could cause a stall. + */ + struct gl_monitor monitors[4]; + unsigned current_monitor; + + /* The requested counters to monitor: + */ + unsigned num_counters; + struct counter *counters; + + /* The description of all counter groups and the counters they + * contain, not just including the ones we monitor. + */ + GLint num_groups; + struct gl_counter_group *groups; + +} perfcntr; + +static void get_groups_and_counters(const struct egl *egl) +{ + int n; + + egl->glGetPerfMonitorGroupsAMD(&perfcntr.num_groups, 0, NULL); + perfcntr.groups = calloc(perfcntr.num_groups, sizeof(struct gl_counter_group)); + + GLuint group_ids[perfcntr.num_groups]; + egl->glGetPerfMonitorGroupsAMD(NULL, perfcntr.num_groups, group_ids); + + for (int i = 0; i < perfcntr.num_groups; i++) { + struct gl_counter_group *g = &perfcntr.groups[i]; + + g->group_id = group_ids[i]; + + egl->glGetPerfMonitorGroupStringAMD(g->group_id, 0, &n, NULL); + g->name = malloc(n+1); + egl->glGetPerfMonitorGroupStringAMD(g->group_id, n+1, NULL, g->name); + + egl->glGetPerfMonitorCountersAMD(g->group_id, &g->num_counters, + &g->max_active_counters, 0, NULL); + + g->counters = calloc(g->num_counters, sizeof(struct gl_counter)); + + GLuint counter_ids[g->num_counters]; + egl->glGetPerfMonitorCountersAMD(g->group_id, NULL, NULL, + g->num_counters, counter_ids); + + printf("GROUP[%u]: name=%s, max_active_counters=%u, num_counters=%u\n", + g->group_id, g->name, g->max_active_counters, g->num_counters); + + for (int j = 0; j < g->num_counters; j++) { + struct gl_counter *c = &g->counters[j]; + + c->counter_id = counter_ids[j]; + + egl->glGetPerfMonitorCounterStringAMD(g->group_id, + c->counter_id, 0, &n, NULL); + c->name = malloc(n+1); + egl->glGetPerfMonitorCounterStringAMD(g->group_id, + c->counter_id, n+1, NULL, c->name); + + egl->glGetPerfMonitorCounterInfoAMD(g->group_id, + c->counter_id, GL_COUNTER_TYPE_AMD, + &c->counter_type); + + printf("\tCOUNTER[%u]: name=%s, counter_type=%04x\n", + c->counter_id, c->name, c->counter_type); + } + } +} + +static void find_counter(const char *name, unsigned *group_idx, unsigned *counter_idx) +{ + for (int i = 0; i < perfcntr.num_groups; i++) { + struct gl_counter_group *g = &perfcntr.groups[i]; + + for (int j = 0; j < g->num_counters; j++) { + struct gl_counter *c = &g->counters[j]; + + if (strcmp(name, c->name) == 0) { + *group_idx = i; + *counter_idx = j; + return; + } + } + } + + errx(-1, "Could not find counter: %s", name); +} + +static void add_counter(const char *name) +{ + int idx = perfcntr.num_counters++; + + perfcntr.counters = realloc(perfcntr.counters, + perfcntr.num_counters * sizeof(struct counter)); + + struct counter *c = &perfcntr.counters[idx]; + memset(c, 0, sizeof(*c)); + + find_counter(name, &c->gidx, &c->cidx); + + struct gl_counter_group *g = &perfcntr.groups[c->gidx]; + if (g->num_enabled_counters >= g->max_active_counters) { + errx(-1, "Too many counters in group '%s'", g->name); + } + + g->num_enabled_counters++; +} + +/* parse list of performance counter names, and find their group+counter */ +static void find_counters(const char *perfcntrs) +{ + char *cnames, *s; + + cnames = strdup(perfcntrs); + while ((s = strstr(cnames, ","))) { + char *name = cnames; + s[0] = '\0'; + cnames = &s[1]; + + add_counter(name); + } + + add_counter(cnames); +} + +void init_perfcntrs(const struct egl *egl, const char *perfcntrs) +{ + if (egl_check(egl, glGetPerfMonitorGroupsAMD) || + egl_check(egl, glGetPerfMonitorCountersAMD) || + egl_check(egl, glGetPerfMonitorGroupStringAMD) || + egl_check(egl, glGetPerfMonitorCounterStringAMD) || + egl_check(egl, glGetPerfMonitorCounterInfoAMD) || + egl_check(egl, glGenPerfMonitorsAMD) || + egl_check(egl, glDeletePerfMonitorsAMD) || + egl_check(egl, glSelectPerfMonitorCountersAMD) || + egl_check(egl, glBeginPerfMonitorAMD) || + egl_check(egl, glEndPerfMonitorAMD) || + egl_check(egl, glGetPerfMonitorCounterDataAMD)) { + errx(-1, "AMD_performance_monitor is not supported"); + } + + get_groups_and_counters(egl); + find_counters(perfcntrs); + + /* setup enabled counters.. do this after realloc() stuff, + * otherwise the counter pointer may not be valid: + */ + for (unsigned i = 0; i < perfcntr.num_counters; i++) { + struct counter *c = &perfcntr.counters[i]; + perfcntr.groups[c->gidx].counters[c->cidx].counter = c; + } + + perfcntr.egl = egl; +} + +/* Create perf-monitor, and configure the counters it will monitor */ +static void init_monitor(struct gl_monitor *m) +{ + const struct egl *egl = perfcntr.egl; + + assert(!m->valid); + assert(!m->active); + + egl->glGenPerfMonitorsAMD(1, &m->id); + + for (int i = 0; i < perfcntr.num_groups; i++) { + struct gl_counter_group *g = &perfcntr.groups[i]; + + if (!g->num_enabled_counters) + continue; + + int idx = 0; + GLuint counters[g->num_enabled_counters]; + + for (int j = 0; j < g->num_counters; j++) { + struct gl_counter *c = &g->counters[j]; + + if (!c->counter) + continue; + + assert(idx < g->num_enabled_counters); + counters[idx++] = c->counter_id; + } + + assert(idx == g->num_enabled_counters); + egl->glSelectPerfMonitorCountersAMD(m->id, GL_TRUE, + g->group_id, g->num_enabled_counters, counters); + } + + m->valid = true; +} + +static struct gl_counter *lookup_counter(GLuint group_id, GLuint counter_id) +{ + for (int i = 0; i < perfcntr.num_groups; i++) { + struct gl_counter_group *g = &perfcntr.groups[i]; + + if (g->group_id != group_id) + continue; + + for (int j = 0; j < g->num_counters; j++) { + struct gl_counter *c = &g->counters[j]; + + if (c->counter_id != counter_id) + continue; + + return c; + } + } + + errx(-1, "invalid counter: group_id=%u, counter_id=%u", + group_id, counter_id); +} + +/* Collect monitor results and delete monitor */ +static void finish_monitor(struct gl_monitor *m) +{ + const struct egl *egl = perfcntr.egl; + + assert(m->valid); + assert(!m->active); + + GLuint result_size; + egl->glGetPerfMonitorCounterDataAMD(m->id, GL_PERFMON_RESULT_SIZE_AMD, + sizeof(GLint), &result_size, NULL); + + GLuint *data = malloc(result_size); + + GLsizei bytes_written; + egl->glGetPerfMonitorCounterDataAMD(m->id, GL_PERFMON_RESULT_AMD, + result_size, data, &bytes_written); + + GLsizei idx = 0; + while ((4 * idx) < bytes_written) { + GLuint group_id = data[idx++]; + GLuint counter_id = data[idx++]; + + struct gl_counter *c = lookup_counter(group_id, counter_id); + + assert(c->counter); + + switch(c->counter_type) { + case GL_UNSIGNED_INT: + c->counter->result.u32 += *(uint32_t *)(&data[idx]); + idx += 1; + break; + case GL_FLOAT: + c->counter->result.f += *(float *)(&data[idx]); + idx += 1; + break; + case GL_UNSIGNED_INT64_AMD: + c->counter->result.u64 += *(uint64_t *)(&data[idx]); + idx += 2; + break; + case GL_PERCENTAGE_AMD: + default: + errx(-1, "TODO unhandled counter type: 0x%04x", + c->counter_type); + break; + } + } + + egl->glDeletePerfMonitorsAMD(1, &m->id); + m->valid = false; +} + +void start_perfcntrs(void) +{ + const struct egl *egl = perfcntr.egl; + + if (!egl) { + return; + } + + struct gl_monitor *m = &perfcntr.monitors[perfcntr.current_monitor]; + + /* once we wrap-around and start re-using existing slots, collect + * previous results and delete the monitor before re-using the slot: + */ + if (m->valid) { + finish_monitor(m); + } + + init_monitor(m); + + egl->glBeginPerfMonitorAMD(m->id); + m->active = true; +} + +void end_perfcntrs(void) +{ + const struct egl *egl = perfcntr.egl; + + if (!egl) { + return; + } + + struct gl_monitor *m = &perfcntr.monitors[perfcntr.current_monitor]; + + assert(m->valid); + assert(m->active); + + /* end collection, but defer collecting results to avoid stall: */ + egl->glEndPerfMonitorAMD(m->id); + m->active = false; + + /* move to next slot: */ + perfcntr.current_monitor = + (perfcntr.current_monitor + 1) % ARRAY_SIZE(perfcntr.monitors); +} + +/* collect any remaining perfcntr results.. this should be called + * before computing the elapsed time (passed to dump_perfcntrs()) + * to ensured queued up draws which are monitored complete, ie. + * so that elapsed time covers the entirety of the monitored + * draws. + */ +void finish_perfcntrs(void) +{ + if (!perfcntr.egl) + return; + + /* collect any remaining results, it really doesn't matter the order */ + for (unsigned i = 0; i < ARRAY_SIZE(perfcntr.monitors); i++) { + struct gl_monitor *m = &perfcntr.monitors[i]; + if (m->valid) { + finish_monitor(m); + } + } +} + +void dump_perfcntrs(unsigned nframes, uint64_t elapsed_time_ns) +{ + if (!perfcntr.egl) { + return; + } + + /* print column headers: */ + printf("FPS"); + for (unsigned i = 0; i < perfcntr.num_counters; i++) { + struct counter *c = &perfcntr.counters[i]; + + printf(",%s", perfcntr.groups[c->gidx].counters[c->cidx].name); + } + printf("\n"); + + /* print results: */ + double secs = elapsed_time_ns / (double)NSEC_PER_SEC; + printf("%f", (double)nframes/secs); + for (unsigned i = 0; i < perfcntr.num_counters; i++) { + struct counter *c = &perfcntr.counters[i]; + + GLuint counter_type = + perfcntr.groups[c->gidx].counters[c->cidx].counter_type; + switch (counter_type) { + case GL_UNSIGNED_INT: + printf(",%u", c->result.u32); + break; + case GL_FLOAT: + printf(",%f", c->result.f); + break; + case GL_UNSIGNED_INT64_AMD: + printf(",%"PRIu64, c->result.u64); + break; + case GL_PERCENTAGE_AMD: + default: + errx(-1, "TODO unhandled counter type: 0x%04x", + counter_type); + break; + } + } + printf("\n"); +} |