Enhance performance reporting for benchmarking.

This CL adds reporting of: - number of processors - total cpu time - cpu utilization - include server cpu time I corrected issues with overly loose specification of compiler locations. (I developed this CL under Cygwin and encountered new problems.) This resulted in evil recursive invocations of distcc. I also: -- removed an unnecessary parameter from Build(..) -- pruned the number of decimals reported from 4 to 1. -- removed the dependency of 'Numeric', which is a non-standard package and rewrote the math do to std deviation -- use shell built-in 'time' instead of Python time functionality -- corrected deficient error reporting: builds would fail with non-zero status and not be reported as failing. REVIEW: csilvers@google.com TESTS: DISTCC_HOSTS="--randomize `/home/klarlund/svn-distcc/distcc/lsdistcc -P1 -k100 -d`" PATH=`pwd`:$PATH bench/benchmark.py --cc='/usr/crosstool/v10/gcc-4.2.1-glibc-2.3.2/x86_64-unknown-linux-gnu/bin/x86_64-unknown-linux-gnu-gcc' -n 1 -c 'pump,h40,j120' -c 'dist,h40,j32' linux-2.6.25 Part of the printout for hello-2.1.1: Local number of CPUs: 4 Linux klarlund-warp.nyc.corp.google.com 2.6.18.5-gg34workstation-mixed64-32 #1 SMP Thu May 8 01:31:23 UTC 2008 x86_64 GNU/Linux project compiler time s.d. CPU time CPU util incl serv hello-2.1.1 pump_h39_j120 0.9s 0.0s 0.5s 13.5% 0.2s hello-2.1.1 dist_h39_j32 0.4s 0.1s 0.3s 18.1% (For this small program, pump is a disadvantage.) git-svn-id: http://distcc.googlecode.com/svn/trunk@416 01de4be4-8c4a-0410-9132-4925637da917
author: klarlund <klarlund@01de4be4-8c4a-0410-9132-4925637da917> 2008-05-29 18:51:21 +0000
committer: klarlund <klarlund@01de4be4-8c4a-0410-9132-4925637da917> 2008-05-29 18:51:21 +0000
commit: 93c422ea97cae80a0a1bb1f688eb8e6113b8aaca (patch)
tree: b42f8eabc2bf901909b8c38e72a7fd85212a18d9
parent: 4af2e0ba011f2910166c3d79d108327b68d8928d (diff)
download: distcc-93c422ea97cae80a0a1bb1f688eb8e6113b8aaca.tar.gz
3 files changed, 116 insertions, 39 deletions
diff --git a/bench/Build.py b/bench/Build.py
index f8e2208..194c688 100755
--- a/bench/Build.py
+++ b/bench/Build.py
@@ -25,6 +25,19 @@ import buildutil
 from buildutil import make_dir, run_cmd, rm_files
 import re, os, sys, time
 
+# For parsing output of 'time -p'.
+RE_TIME = re.compile(r"""^real \s* (\d*\.\d*)\n
+                          user \s* (\d*\.\d*)\n
+                          sys  \s* (\d*\.\d*)""",
+                     re.VERBOSE | re.MULTILINE)
+
+class TimeInfo:
+   """A record of real, system, and user time."""
+   def __init__(self, real=None, system=None, user=None, include_server=None):
+      self.real = real
+      self.system = system
+      self.user = user
+      self.include_server = include_server
 
 
 class Build:
@@ -112,10 +125,33 @@ class Build:
                   self.compiler.cc, self.compiler.cxx,
                   self.project.configure_cmd, configure_log))
         self._run_cmd_with_redirect_farm(cmd)
-
         # Touch a file if the configure was successfully done, so we know.
         open(self.configure_done, 'w').close()
 
+    @staticmethod
+    def _extract_time_info(log_file_name):
+	"""Open log file and look for output of 'time -p' and include server 
+	time."""
+	log_file = open(log_file_name, 'r')
+	text = log_file.read()
+	log_file.close()
+
+	match = RE_TIME.search(text)
+	if not match:
+	    sys.exit('Could not locate time information in log %s.'
+		     % log_file_name)
+	time_info = TimeInfo(float(match.group(1)),
+			     float(match.group(2)),
+			     float(match.group(3)))
+	# Now locate include server cpu time if present.
+	lines = text.splitlines()
+	for line in lines:
+	   if line.startswith('Include server timing.  '):
+	      is_time = float(
+		 line[len('Include server timing.  '):].split()[9][:-1])
+	      time_info.include_server = is_time
+	      break
+	return time_info
 
     def did_configure(self):
         """Returns true if configure was successfully run for this
@@ -123,7 +159,6 @@ class Build:
         """
         return os.path.isfile(self.configure_done)
 
-
     def build(self):
         """Actually build the package."""
 
@@ -141,15 +176,21 @@ class Build:
                                               self.project.pre_build_cmd,
                                               prebuild_log))
             self._run_cmd_with_redirect_farm(cmd)
-
         distcc_hosts = buildutil.tweak_hosts(os.getenv("DISTCC_HOSTS"),
                                              self.compiler.num_hosts,
                                              self.compiler.host_opts)
+        # We use built-in 'time' to measure real, system, and user time.  To
+        # allow its stderr to be grabbed, the time command is executed in a
+        # subshell.  The wait statement is necessary for the include server
+        # to report its timing info.
         cmd = ("cd %s && \\\n"
+               "(time -p \\\n"
                "DISTCC_HOSTS='%s' \\\n"
                "INCLUDE_SERVER_ARGS='-t %s' \\\n"
                "%s%s \\\nDISTCC_LOG='%s' \\\nCC='%s' \\\nCXX='%s' "
-               "\\\n%s \\\n>%s 2>&1" %
+               "\\\n%s)"
+               "\\\n>%s 2>&1; wait"
+               %
                (self.build_dir,
                 distcc_hosts,
                 self.project.include_server_args,
@@ -160,9 +201,8 @@ class Build:
                 self.compiler.cxx,
                 self.compiler.make_opts,
                 build_log))
-        _, elapsed = self._run_cmd_with_redirect_farm(cmd)
-        return elapsed
-
+        result, unused_elapsed = self._run_cmd_with_redirect_farm(cmd)
+        return (result, Build._extract_time_info(build_log))
 
     def clean(self):
         clean_log = os.path.join(self.log_dir, "bench-clean.log")
@@ -182,7 +222,9 @@ class Build:
 
         Catch exceptions and handle."""
         try:
-            times = []
+            # The time_info_accumulator is normally a list.  But if something
+            # goes wrong, it will contain a short string indicating the problem.
+            time_info_accumulator = []
             if 'sweep' in actions:
                 self.scrub()
             if 'unpack' in actions:
@@ -195,14 +237,18 @@ class Build:
                 self.clean()
             for i in range(self.n_repeats):
                 if 'build' in actions:
-                    times.append(self.build())
+                    (result, time_info) = self.build()
+                    if result:  # that is, if result is bad!
+                       time_info_accumulator = 'NON-ZERO STATUS'
+                    elif isinstance(time_info_accumulator, list):
+                       time_info_accumulator.append(time_info)
                 if 'clean' in actions:
                     self.clean()
             if 'scrub' in actions:
                 self.scrub()
-            summary.store(self.project, self.compiler, times)
+            summary.store(self.project, self.compiler, time_info_accumulator)
         except KeyboardInterrupt:
             raise
         except:
             apply(sys.excepthook, sys.exc_info()) # print traceback
-            summary.store(self.project, self.compiler, 'FAIL')
+            summary.store(self.project, self.compiler, 'FAIL WITH EXCEPTION')
diff --git a/bench/Summary.py b/bench/Summary.py
index 4a556b3..f2e2d85 100755
--- a/bench/Summary.py
+++ b/bench/Summary.py
@@ -20,6 +20,8 @@
 # USA.
 
 import buildutil
+import os
+import statistics
 
 class Summary:
     """Stores and prints results of building different things"""
@@ -30,17 +32,37 @@ class Summary:
     def __init__(self):
         self._table = []
 
-    def store(self, project, compiler, elapsed_times):
+    def store(self, project, compiler, time_info_accumulator):
         """
-        elapsed_times is a sequence of elapsed times to build the project.
-        A sequence because we can build projects repeatedly.
+        Args:
+          project: a Project object
+          compiler: a Compiler object
+          time_info_accumulator: the string 'FAIL' or a list of Build.TimeInfo records
+   
+        The time information is a list because we can build projects repeatedly.
+        
         """
-        self._table.append((project.name, compiler.name, elapsed_times))
+        self._table.append((project.name, compiler.name, time_info_accumulator))
 
     def print_raw(self):
         from pprint import pprint
         pprint(self._table)
 
+    @staticmethod
+    def print_mean_and_sd(times, unit='s', no_sd=False):
+        assert len(unit) == 1, unit
+        mean = statistics.mean(times)
+        sd = statistics.std(times)
+        if mean is None:
+            print "%s%s  " % ("n/a", sd_space),
+        else:
+            print "%8.1f%s " % (mean, unit),
+        if not no_sd:
+            if sd is None:
+                print "%9s " % "n/a",
+            else:
+                print "%8.1f%s " % (sd, unit),
+
     def print_table(self):
         import time, os, sys
         import statistics
@@ -60,24 +82,38 @@ class Summary:
         hosts = os.getenv('DISTCC_HOSTS')
         print "DISTCC_HOSTS: %s" % `hosts`
         print "Total hosts: %d" % buildutil.count_hosts(hosts)
+        number_CPUs = os.sysconf('SC_NPROCESSORS_ONLN')
+        print "Local number of CPUs: %s" % number_CPUs
         sys.stdout.flush()
         os.system("uname -a")
 
-        print "%-20s  %-30s   %8s  %8s" % ('project', 'compiler', 'time', 's.d.')
+        print ("%-20s  %-30s  %9s  %9s  %9s  %9s  %9s" 
+        % ('project', 'compiler', 'time', 's.d.', 
+           'CPU time',
+           'CPU util', 
+           'incl serv'))
 
         for row in self._table:
             print "%-20s  %-30s " % row[:2],
-            times = row[2]
-            if times == 'FAIL':
-                print '%9s' % 'FAIL'
+            time_info_accumulator = row[2]
+            if isinstance(time_info_accumulator, str):
+                print ' ' * 4, time_info_accumulator
             else:
-                mean = statistics.mean(times)
-                sd = statistics.std(times)
-                if mean is None:
-                    print "%9s " % "n/a",
-                else:
-                    print "%8.4fs " % mean,
-                if sd is None:
-                    print "%9s" % "n/a"
-                else:
-                    print "%8.4fs" % sd
+                real_times = [time_info.real for time_info in time_info_accumulator]
+                Summary.print_mean_and_sd(real_times)
+
+                cpu_times = [time_info.user + time_info.system
+                             for time_info in time_info_accumulator]
+                self.print_mean_and_sd(cpu_times, no_sd=True)
+
+                cpu_util_ratios = (
+                    [100 * cpu_times[i]/(number_CPUs * time_info_accumulator[i].real)
+                     for i in range(len(time_info_accumulator))])
+                self.print_mean_and_sd(cpu_util_ratios, unit='%', no_sd=True)
+                include_server_times = [time_info.include_server
+                                        for time_info in time_info_accumulator]
+
+                if None not in include_server_times:
+                    self.print_mean_and_sd(include_server_times, no_sd=True)
+
+                print
diff --git a/bench/statistics.py b/bench/statistics.py
index 5aadaa3..d6159f1 100755
--- a/bench/statistics.py
+++ b/bench/statistics.py
@@ -21,11 +21,7 @@
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
 # USA.
 
-
-# Based in part on http://starship.python.net/crew/jhauser/NumAdd.py.html by Janko Hauser
-
-
-import Numeric
+import math
 
 def var(m):
     """
@@ -33,18 +29,17 @@ def var(m):
     """
     if len(m) < 2:
         return None
-    mu = Numeric.average(m)
-    return (Numeric.add.reduce(Numeric.power(Numeric.ravel(m)-mu, 2))
-            / (len(m)-1.))
+    mu = mean(m)
+    return sum((x - mu) * (x - mu) for x in m) / (len(m) - 1.0)
 
 def std(m):
     """
     Standard deviation of m.
     """
     v = var(m)
-    return v and Numeric.sqrt(v)
+    return v and math.sqrt(v)
 
 def mean(m):
     if len(m) < 1:
         return None
-    return Numeric.average(m)
+    return sum(m)/len(m)
author	klarlund <klarlund@01de4be4-8c4a-0410-9132-4925637da917>	2008-05-29 18:51:21 +0000
committer	klarlund <klarlund@01de4be4-8c4a-0410-9132-4925637da917>	2008-05-29 18:51:21 +0000
commit	93c422ea97cae80a0a1bb1f688eb8e6113b8aaca (patch)
tree	b42f8eabc2bf901909b8c38e72a7fd85212a18d9
parent	4af2e0ba011f2910166c3d79d108327b68d8928d (diff)
download	distcc-93c422ea97cae80a0a1bb1f688eb8e6113b8aaca.tar.gz