[perf] Calibrate tests to run for at least 2 seconds

By ensuring that tests take longer than a couple of seconds we eliminate systematic errors in our measurements. However, we also effectively eliminate the synchronisation overhead. To compensate, we attempt to estimate the overhead by reporting the difference between a single instance and the minimum averaged instance.
author: Chris Wilson <chris@chris-wilson.co.uk> 2009-08-03 22:23:19 +0100
committer: Chris Wilson <chris@chris-wilson.co.uk> 2009-08-06 10:15:40 +0100
commit: 0db9e010fa70c65451d324cc9c0ade989f77fedd (patch)
tree: e249038d900a7afd7e3ede351517b74fd4be5764 /perf/cairo-perf.c
parent: 60c574ad062f3d8526056942bb7c9e71610a2773 (diff)
download: cairo-0db9e010fa70c65451d324cc9c0ade989f77fedd.tar.gz
1 files changed, 31 insertions, 8 deletions
diff --git a/perf/cairo-perf.c b/perf/cairo-perf.c
index 792120b99..21d48f422 100644
--- a/perf/cairo-perf.c
+++ b/perf/cairo-perf.c
@@ -191,9 +191,9 @@ cairo_perf_run (cairo_perf_t		*perf,
 
 	if (perf->summary) {
 	    fprintf (perf->summary,
-		     "[ # ] %8s.%-4s %28s %8s %8s %5s %5s %s\n",
+		     "[ # ] %8s.%-4s %28s %8s %8s %5s %5s %s %s\n",
 		     "backend", "content", "test-size", "min(ticks)", "min(ms)", "median(ms)",
-		     "stddev.", "iterations");
+		     "stddev.", "iterations", "overhead");
 	}
 	first_run = FALSE;
     }
@@ -208,7 +208,7 @@ cairo_perf_run (cairo_perf_t		*perf,
 		   name, perf->target->name,
 		   _content_to_string (perf->target->content, 0),
 		   perf->size);
-	perf_func (perf->cr, perf->size, perf->size);
+	perf_func (perf->cr, perf->size, perf->size, 1);
 	status = cairo_surface_write_to_png (cairo_get_target (perf->cr), filename);
 	if (status) {
 	    fprintf (stderr, "Failed to generate output check '%s': %s\n",
@@ -221,6 +221,9 @@ cairo_perf_run (cairo_perf_t		*perf,
 
     has_similar = cairo_perf_has_similar (perf);
     for (similar = 0; similar <= has_similar; similar++) {
+	cairo_perf_ticks_t calibration0, calibration;
+	unsigned loops;
+
 	if (perf->summary) {
 	    fprintf (perf->summary,
 		     "[%3d] %8s.%-5s %26s.%-3d ",
@@ -230,22 +233,41 @@ cairo_perf_run (cairo_perf_t		*perf,
 	    fflush (perf->summary);
 	}
 
-	/* We run one iteration in advance to warm caches, etc. */
+	/* We run one iteration in advance to warm caches and calibrate. */
 	cairo_perf_yield ();
 	if (similar)
 	    cairo_push_group_with_content (perf->cr,
 		                           cairo_boilerplate_content (perf->target->content));
-	(perf_func) (perf->cr, perf->size, perf->size);
+	perf_func (perf->cr, perf->size, perf->size, 1);
+	calibration0 = perf_func (perf->cr, perf->size, perf->size, 1);
+	loops = cairo_perf_ticks_per_second () / 100 / calibration0;
+	if (loops < 3)
+	    loops = 3;
+	calibration = (calibration0 + perf_func (perf->cr, perf->size, perf->size, loops)) / (loops + 1);
 	if (similar)
 	    cairo_pattern_destroy (cairo_pop_group (perf->cr));
 
+	/* XXX
+	 * Compute the number of loops required for the timing interval to
+	 * be ~2 seconds. This helps to eliminate sampling variance due to
+	 * timing and other systematic errors. However, it also hides
+	 * synchronisation overhead as we attempt to process a large batch
+	 * of identical operations in a single shot. This can be considered
+	 * both good and bad... It would be good to perform a more rigorous
+	 * analysis of the synchronisation overhead, that is to estimate
+	 * the time for loop=0.
+	 */
+	loops = 2 * cairo_perf_ticks_per_second () / calibration;
+	if (loops < 10)
+	    loops = 10;
+
 	low_std_dev_count = 0;
 	for (i =0; i < perf->iterations; i++) {
 	    cairo_perf_yield ();
 	    if (similar)
 		cairo_push_group_with_content (perf->cr,
 			                       cairo_boilerplate_content (perf->target->content));
-	    times[i] = (perf_func) (perf->cr, perf->size, perf->size);
+	    times[i] = perf_func (perf->cr, perf->size, perf->size, loops) / loops;
 	    if (similar)
 		cairo_pattern_destroy (cairo_pop_group (perf->cr));
 
@@ -279,11 +301,12 @@ cairo_perf_run (cairo_perf_t		*perf,
 	if (perf->summary) {
 	    _cairo_stats_compute (&stats, times, i);
 	    fprintf (perf->summary,
-		     "%10lld %#8.3f %#8.3f %#5.2f%% %3d\n",
+		     "%10lld %#8.3f %#8.3f %#5.2f%% %3d %10lld\n",
 		     (long long) stats.min_ticks,
 		     (stats.min_ticks * 1000.0) / cairo_perf_ticks_per_second (),
 		     (stats.median_ticks * 1000.0) / cairo_perf_ticks_per_second (),
-		     stats.std_dev * 100.0, stats.iterations);
+		     stats.std_dev * 100.0, stats.iterations,
+		     (long long) (calibration0 - stats.min_ticks));
 	    fflush (perf->summary);
 	}
author	Chris Wilson <chris@chris-wilson.co.uk>	2009-08-03 22:23:19 +0100
committer	Chris Wilson <chris@chris-wilson.co.uk>	2009-08-06 10:15:40 +0100
commit	0db9e010fa70c65451d324cc9c0ade989f77fedd (patch)
tree	e249038d900a7afd7e3ede351517b74fd4be5764 /perf/cairo-perf.c
parent	60c574ad062f3d8526056942bb7c9e71610a2773 (diff)
download	cairo-0db9e010fa70c65451d324cc9c0ade989f77fedd.tar.gz