Some changes to the benchmark framework to not redo work

unnecessarily. Most prominently, no longer re-run 'tar xf && configure' every benchmark run. Instead, if configure was successfully executed last benchmark run, just do 'make clean'. This is equivalent to what we do now between build runs, when the repeat-count is > 1. This feature is not turned on by default, but must be enabled with the new --force=0 flag. --force=1 keeps the same behavior as before: we always re-run configure in each build-directory. --force=2 adds more repeat-work than before; in --force=2 mode, we always re-download the package from the web, even if we had successfully downloaded it before. Note that the 'tar' command currently warms the file-cache, which makes build times more consistent, so there is so cost to setting --force=0, in terms of the data collected. Reviewed by klarlund git-svn-id: http://distcc.googlecode.com/svn/trunk@408 01de4be4-8c4a-0410-9132-4925637da917
author: csilvers <csilvers@01de4be4-8c4a-0410-9132-4925637da917> 2008-05-29 05:38:49 +0000
committer: csilvers <csilvers@01de4be4-8c4a-0410-9132-4925637da917> 2008-05-29 05:38:49 +0000
commit: 4af2e0ba011f2910166c3d79d108327b68d8928d (patch)
tree: b18609495716376cccbf48cd479f1554302d1a36 /bench
parent: aeb3d229b2613c4f9ff31a3e4407280f0549fa49 (diff)
download: distcc-4af2e0ba011f2910166c3d79d108327b68d8928d.tar.gz
5 files changed, 149 insertions, 104 deletions
diff --git a/bench/Build.py b/bench/Build.py
index 91ef36c..f8e2208 100755
--- a/bench/Build.py
+++ b/bench/Build.py
@@ -54,35 +54,31 @@ class Build:
             self.build_dir = self.unpacked_dir
 
         self.log_dir = self.build_dir
-        self.old_path = None
+        self.configure_done = os.path.join(self.log_dir, "bench-configure.done")
 
     def __repr__(self):
         return "Build(%s, %s)" % (`self.project`, `self.compiler`)
 
-    def make_script_farm_augment_path(self):
-        """Initialize shell script farm and augment PATH.
-
-        A shell script farm is a set of scripts for dispatching a chosen
-        compiler using distcc. For example, the 'cc' script may contain the one
-        line:
+    def _run_cmd_with_redirect_farm(self, cmd):
+        """Initialize shell script farm for given compiler,
+        augment PATH, and run cmd.
 
+        A shell script farm is a set of scripts for dispatching a
+        chosen compiler using distcc. For example, the 'cc' script may
+        contain the one line:
           dist /usr/mine/gcc "$@"
-
         """
-        self.farm_dir = os.path.join(self.build_dir, 'build-cc-script-farm')
-        make_dir(self.farm_dir)
-        print ("""** Creating masquerading shell scripts in '%s'""" %
-               self.farm_dir)
+        farm_dir = os.path.join(self.build_dir, 'build-cc-script-farm')
+        make_dir(farm_dir)
+        print ("** Creating masquerading shell scripts in '%s'" % farm_dir)
         masquerade = os.path.join(self.build_dir, 'masquerade')
-        prepare_shell_script_farm(self.compiler, self.farm_dir, masquerade)
-        self.old_path = os.environ['PATH'] 
-        os.environ['PATH'] = self.farm_dir + ":" + self.old_path
-
-    def restore_path(self):
-        """Restore effect of constructor: reset PATH."""
-        if self.old_path:
-            os.environ['PATH'] = self.old_path
-            self.old_path = None
+        prepare_shell_script_farm(self.compiler, farm_dir, masquerade)
+        old_path = os.environ['PATH'] 
+        try:
+            os.environ['PATH'] = farm_dir + ":" + old_path
+            return run_cmd(cmd)
+        finally:
+            os.environ['PATH'] = old_path
 
     def unpack(self):
         """Unpack from source tarball into build directory"""
@@ -100,29 +96,35 @@ class Build:
         run_cmd("cd %s && %s" % (self.base_dir, tar_cmd))
 
 
-    def configure(self, compiler):
+    def configure(self):
         """Run configuration command for this tree, if any."""
-        self.compiler = compiler
-
         make_dir(self.log_dir)
 
         configure_log = os.path.join(self.log_dir, "bench-configure.log")
         distcc_log = os.path.join(self.log_dir, "bench-configure-distcc.log")
 
-        rm_files((configure_log, distcc_log))
+        rm_files((configure_log, distcc_log, self.configure_done))
 
         make_dir(self.build_dir)
         print "** Configuring..."
-        try:
-            self.make_script_farm_augment_path()
-            run_cmd("cd %s && \\\nDISTCC_LOG='%s' \\\nCC='%s' \\\nCXX='%s' \\\n%s \\\n>%s 2>&1" %
-                    (self.build_dir, distcc_log, self.compiler.cc,
-                     self.compiler.cxx,
-                     self.project.configure_cmd, configure_log))
-        finally:
-            self.restore_path()
+        cmd = ("cd %s && \\\nDISTCC_LOG='%s' \\\nCC='%s' \\\nCXX='%s' \\\n%s \\\n>%s 2>&1"
+               % (self.build_dir, distcc_log,
+                  self.compiler.cc, self.compiler.cxx,
+                  self.project.configure_cmd, configure_log))
+        self._run_cmd_with_redirect_farm(cmd)
+
+        # Touch a file if the configure was successfully done, so we know.
+        open(self.configure_done, 'w').close()
 
-    def build(self, sum):
+
+    def did_configure(self):
+        """Returns true if configure was successfully run for this
+        build in the past.
+        """
+        return os.path.isfile(self.configure_done)
+
+
+    def build(self):
         """Actually build the package."""
 
         build_log = os.path.join(self.log_dir, "bench-build.log")
@@ -135,15 +137,10 @@ class Build:
         make_dir(self.build_dir)
         print "** Building..."
         if self.project.pre_build_cmd:
-            try:
-                self.make_script_farm_augment_path()
-
-                cmd = ("cd %s && %s > %s 2>&1" % (self.build_dir,
-                                                  self.project.pre_build_cmd,
-                                                  prebuild_log))
-                run_cmd(cmd)
-            finally:
-                self.restore_path()
+            cmd = ("cd %s && %s > %s 2>&1" % (self.build_dir,
+                                              self.project.pre_build_cmd,
+                                              prebuild_log))
+            self._run_cmd_with_redirect_farm(cmd)
 
         distcc_hosts = buildutil.tweak_hosts(os.getenv("DISTCC_HOSTS"),
                                              self.compiler.num_hosts,
@@ -163,11 +160,7 @@ class Build:
                 self.compiler.cxx,
                 self.compiler.make_opts,
                 build_log))
-        try:
-            self.make_script_farm_augment_path()
-            result, elapsed = run_cmd(cmd)
-        finally:
-            self.restore_path()
+        _, elapsed = self._run_cmd_with_redirect_farm(cmd)
         return elapsed
 
 
@@ -176,14 +169,11 @@ class Build:
         make_dir(self.build_dir)
         print "** Cleaning build directory"
         cmd = "cd %s && make clean >%s 2>&1" % (self.build_dir, clean_log)
-        try:
-            self.make_script_farm_augment_path()
-            run_cmd(cmd)
-        finally:
-            self.restore_path()
+        self._run_cmd_with_redirect_farm(cmd)
 
     def scrub(self):
         print "** Removing build directory"
+        rm_files((self.configure_done, ))
         run_cmd("rm -rf %s" % self.unpacked_dir)
 
 
@@ -198,10 +188,14 @@ class Build:
             if 'unpack' in actions:
                 self.unpack()
             if 'configure' in actions:
-                self.configure(self.compiler)
+                self.configure()
+            # This is a safety measure, in case a previous benchmark
+            # run left the build in an incomplete state.
+            if 'clean' in actions:
+                self.clean()
             for i in range(self.n_repeats):
                 if 'build' in actions:
-                    times.append(self.build(summary))
+                    times.append(self.build())
                 if 'clean' in actions:
                     self.clean()
             if 'scrub' in actions:
diff --git a/bench/Project.py b/bench/Project.py
index 1c6178f..75477c7 100755
--- a/bench/Project.py
+++ b/bench/Project.py
@@ -112,11 +112,8 @@ class Project:
         return "Project(name=%s)" % `self.name`
 
 
-    def download(self, force=0):
-        """Download package from vendor site.
-
-        If force is 1, download even if the file already exists.
-        """
+    def download(self):
+        """Download package from vendor site."""
 
         make_dir(self.package_dir)
         make_dir(self.download_dir)
@@ -132,6 +129,9 @@ class Project:
                     (os.path.join(self.download_dir, self.package_file),
                      self.package_dir))
 
+    def did_download(self):
+        return os.path.exists(os.path.join(self.package_dir, self.package_file))
+
     def md5check(self):
         if self.md5:
             print "** Checking source package integrity"
diff --git a/bench/actions.py b/bench/actions.py
index 8517db7..580540f 100755
--- a/bench/actions.py
+++ b/bench/actions.py
@@ -29,6 +29,10 @@ all_actions = [('download', True, ''),
                ('clean', True, 'run "make clean" or equivalent'),
                ('scrub', False, 'remove build directory')]
 
+# Actions done on a per-project (rather than a per-build) basis
+project_actions = ('download', 'md5check')
+
+
 
 def action_help():
     print "Actions:"
@@ -43,10 +47,35 @@ default_actions = [a[0] for a in all_actions if a[1]]
 
 
 def parse_opt_actions(optarg):
-    import sys
     opt_actions = optarg.split(',')
     action_names = [a[0] for a in all_actions]
     for oa in opt_actions:
         if oa not in action_names:
             raise ValueError, ("no such action: %s" % `oa`)
     return opt_actions
+
+
+def remove_unnecessary_actions(opt_actions, force, did_download, did_configure):
+    """Given a list of actions (as a string), and a force value
+    (as described in the help text for benchmark.py), and a
+    bool indicating whether 'configure' was successfully run
+    for this build or not, return a new list which is the actions
+    to actually perform for this build.
+
+    Returns two lists: one that can be done on a per-project basis,
+    and one that has to be done on a per-build basis (as we build the
+    project with various different flags).
+    """
+
+    if force == 0 and did_configure and did_download:
+        remove = ('download', 'md5check', 'sweep', 'unpack', 'configure')
+    elif force <= 1 and did_download:
+        remove = ('download', )
+    else:
+        remove = ()
+
+    new_project_actions = [oa for oa in opt_actions
+                           if oa in project_actions and oa not in remove]
+    new_build_actions = [oa for oa in opt_actions
+                         if oa not in project_actions and oa not in remove]
+    return new_project_actions, new_build_actions
diff --git a/bench/benchmark.py b/bench/benchmark.py
index 7cbad07..7cc378f 100755
--- a/bench/benchmark.py
+++ b/bench/benchmark.py
@@ -117,6 +117,11 @@ Options:
   -n N                       repeat compilation N times
   -a, --actions=ACTIONS      comma-separated list of action phases
                              to perform
+  -f N, --force=N            If set to 0, skip download, unpack, and
+                             configure actions if they've already been
+                             successfully performed; if set to 1 (the
+                             default), only skip the download action;
+                             if set to 2, do not skip any action
 
 The C and C++ compiler versions used can be set with the --cc and --cxx
 options.
@@ -145,15 +150,16 @@ The default is to measure a few reasonable scenarios.
 def main():
     """Run the benchmark per arguments"""
     sum = Summary()
-    options, args = getopt(sys.argv[1:], 'a:c:n:',
+    options, args = getopt(sys.argv[1:], 'a:c:n:f:',
                            ['list-projects', 'actions=', 'help', 'compiler=',
-                            'cc=', 'cxx=', 'output='])
+                            'cc=', 'cxx=', 'output=', 'force='])
     opt_actions = actions.default_actions
     opt_cc = 'cc'
     opt_cxx = 'cxx'
     opt_output = None
     opt_compilers = []
     opt_repeats = 1
+    opt_force = 1
 
     for opt, optarg in options:
         if opt == '--help':
@@ -174,6 +180,8 @@ def main():
             opt_compilers.append(optarg)
         elif opt == '-n':
             opt_repeats = int(optarg)
+        elif opt == '-f' or opt == '--force':
+            opt_force = int(optarg)
 
     if opt_compilers:
         set_compilers = [compiler.parse_compiler_opt(c, cc=opt_cc, cxx=opt_cxx)
@@ -188,10 +196,20 @@ def main():
         chosen_projects = trees.values()
 
     for proj in chosen_projects:
-        proj.pre_actions(opt_actions)
+        # Ignore actions we did in a previous benchmark run, absent -f.
+        # We only run the project's pre-actions if one of the builds
+        # needs it because it hasn't successfully run 'configure' yet.
+        project_actions, _ = actions.remove_unnecessary_actions(
+                opt_actions, opt_force, proj.did_download(), 0)
+        proj.pre_actions(project_actions)
+
         for comp in set_compilers:
             build = Build(proj, comp, opt_repeats)
-            build.build_actions(opt_actions, sum)
+            _, build_actions = actions.remove_unnecessary_actions(
+                opt_actions, opt_force,
+                proj.did_download(), build.did_configure())
+
+            build.build_actions(build_actions, sum)
 
     sum.print_table()
     # If --output was specified, print the table to the output-file too
diff --git a/bench/compiler.py b/bench/compiler.py
index ba776dc..a9b63dd 100755
--- a/bench/compiler.py
+++ b/bench/compiler.py
@@ -64,6 +64,48 @@ class CompilerSpec:
         self.name = name or (self.pump_cmd + self.cc + "__" +
                              self.make_opts).replace(' ', '_')
 
+    def prepare_shell_script_farm(self, farm_dir, masquerade):
+        """Prepare farm directory for masquerading.
+
+        Assume the compiler is not local. Each standard name, such as
+        'cc', is used for form a shell script, named 'cc', that
+        contains the line 'distcc /my/path/gcc "$@"', where
+        '/my/path/gcc' is the value of the compiler.gcc field.
+
+        If the compiler is local, then the same procedure is followed
+        except that 'distcc' is omitted from the command line.
+        """
+        assert os.path.isdir(farm_dir)
+        assert os.path.isabs(farm_dir)
+
+        def make_shell_script(name, compiler_path, where):
+            fd = open(os.path.join(farm_dir, name), 'w')
+            fd.write('#!/bin/sh\n%s%s "$@"'
+                     % (where != 'local' and 'distcc ' or '',
+                        compiler_path))
+            fd.close()
+            os.chmod(os.path.join(farm_dir, name),
+                     stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR)
+
+        for generic_name in STANDARD_CC_NAMES:
+            make_shell_script(generic_name, self.real_cc, self.where)
+
+        for generic_name in STANDARD_CXX_NAMES:
+            make_shell_script(generic_name, self.real_cxx, self.where)
+
+        # Make shell wrapper to help manual debugging.
+        fd = open(masquerade, 'w')
+        fd.write("""\
+#!/bin/sh
+# Execute $@, but force 'cc' and 'cxx'" to be those in the farm of
+# masquerading scripts.  Each script in turn executes 'distcc' with the actual
+# compiler specified with the benchmark.py command.
+PATH=%s:"$PATH" "$@"
+""" % farm_dir)
+        fd.close()
+        os.chmod(masquerade,
+                 stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR)
+
 
 def default_compilers(cc, cxx):
     return [parse_compiler_opt('local,h1,j1', cc, cxx),
@@ -141,42 +183,4 @@ def parse_compiler_opt(optarg, cc, cxx):
 
 
 def prepare_shell_script_farm(compiler, farm_dir, masquerade):
-    """Prepare farm directory for masquerading.
-
-    Assume the compiler is not local. Each standard name, such as 'cc', is
-    used for form a shell script, named 'cc', that contains the line 'distcc
-    /my/path/gcc "$@"', where '/my/path/gcc' is the value of the compiler.gcc
-    field.
-
-    If the compiler is local, then the same procedure is followed except that
-    'distcc' is omitted from the command line.
-    """
-    assert os.path.isdir(farm_dir)
-    assert os.path.isabs(farm_dir)
-
-    def make_shell_script(name, compiler_path, where):
-        fd = open(os.path.join(farm_dir, name), 'w')
-        fd.write('#!/bin/sh\n%s%s "$@"'
-                 % (where != 'local' and 'distcc ' or '',
-                    compiler_path))
-        fd.close()
-        os.chmod(os.path.join(farm_dir, name),
-                 stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR)
-
-    for generic_name in STANDARD_CC_NAMES:
-        make_shell_script(generic_name, compiler.real_cc, compiler.where)
-
-    for generic_name in STANDARD_CXX_NAMES:
-        make_shell_script(generic_name, compiler.real_cxx, compiler.where)
-
-    # Make shell wrapper to help manual debugging.
-    fd = open(masquerade, 'w')
-    fd.write("""#!/bin/sh
-# Execute $@, but force 'cc' and 'cxx'" to be those in the farm of
-# masquerading scripts.  Each script in turn executes 'distcc' with the actual
-# compiler specified with the benchmark.py command.
-PATH=%s:"$PATH" "$@"\n"""
-             % farm_dir)
-    fd.close()
-    os.chmod(masquerade,
-             stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR)
+    compiler.prepare_shell_script_farm(farm_dir, masquerade)
author	csilvers <csilvers@01de4be4-8c4a-0410-9132-4925637da917>	2008-05-29 05:38:49 +0000
committer	csilvers <csilvers@01de4be4-8c4a-0410-9132-4925637da917>	2008-05-29 05:38:49 +0000
commit	4af2e0ba011f2910166c3d79d108327b68d8928d (patch)
tree	b18609495716376cccbf48cd479f1554302d1a36 /bench
parent	aeb3d229b2613c4f9ff31a3e4407280f0549fa49 (diff)
download	distcc-4af2e0ba011f2910166c3d79d108327b68d8928d.tar.gz