summaryrefslogtreecommitdiff
path: root/include_server
diff options
context:
space:
mode:
authorfergus.henderson <fergushenderson@users.noreply.github.com>2008-07-29 22:19:10 +0000
committerfergus.henderson <fergushenderson@users.noreply.github.com>2008-07-29 22:19:10 +0000
commit03163a50882914f4c4fe85d45490baccb9605357 (patch)
tree21b8fe5481bd10e4b08af16443e07dabbdc04fef /include_server
parenta59ffb549e7e423e48b46726de6563c96646e6b2 (diff)
downloaddistcc-git-03163a50882914f4c4fe85d45490baccb9605357.tar.gz
This is based on klarlund's original version of this patch (klarlund/d3b3):
gvn --project https://distcc.googlecode.com/svn/trunk review klarlund/d3b3 Fix Issue 7: Compiling with -I/usr/include/FOO or ... in pump mode Problem: Default system include directories are the directories on the default include search path, such as /usr/include, that is determined by the compiler. Distcc will not send default system include directories to the distcc compilation servers. Nevertheless, distcc on the server blindly rewrites -I options such as -I/usr/include/foo to -I/tmp/distccNNN/usr/include/foo. This doesn't work, since the files in /usr/include/foo are not sent to the distccd server. Solution: The present solution keeps the rewriting on the server, because we would like to not manage starting the compiler, parsing its output, and storing the default system paths on the server Instead, we use the existing mechanism for defining relative symbolic links under the server root. We escape from the root by using a sufficient number of '../'s. All this is tremendously complicated by: -- The possibility that other links encountered may point into the system default dirs in which case the usual mirroring of the reachable places should not take place, because the routing of such links will go through the link created for the system directory. -- The fact that determination of default-system-dirness is lazy: a deeply-seated link in a default system dir may become obsolete if it is later determined that a higher directory than the parent directory of the link is in fact also a default system dir. In that case, a new symlink, sitting in a place higher than the previous one will need to be created. Tests: make check benchmarks: samba (still 3X faster than with distcc), linux 2.6 kernel, and a couple of others TODO: better testing TODO: > In compiler_defaults.py, _MakeLinkFromMirrorToRealLocation: > Maybe comment each of the 'if' cases with an example of how this case > might be triggered. eg the real_prefix == rooted_system_dir is > triggered when we see /usr/include/foo, and the later see > /usr/include.
Diffstat (limited to 'include_server')
-rwxr-xr-xinclude_server/basics.py8
-rwxr-xr-xinclude_server/cache_basics.py8
-rwxr-xr-xinclude_server/compiler_defaults.py172
-rwxr-xr-xinclude_server/include_analyzer.py50
-rwxr-xr-xinclude_server/macro_eval_test.py9
-rwxr-xr-xinclude_server/mirror_path.py49
-rwxr-xr-xinclude_server/mirror_path_test.py12
-rwxr-xr-xinclude_server/parse_command_test.py8
8 files changed, 262 insertions, 54 deletions
diff --git a/include_server/basics.py b/include_server/basics.py
index 87c16d1..c3c23d6 100755
--- a/include_server/basics.py
+++ b/include_server/basics.py
@@ -149,6 +149,14 @@ class ClientRootKeeper(object):
"Cleaning out '%s' after defunct include server." % directory)
self.CleanOutClientRoots(pid)
+# COMPILATION SERVER
+
+# An upper bound on the number of directory components in the temporary
+# directory on the distccd server that corresponds to the root directory
+# on the client machine. Normally the server root is /tmp/distccd_XXXXXX.
+# But it could be something different if TMPDIR is set when distccd is
+# started. See dcc_get_tmp_top() in ../src/tempfile.c.
+MAX_COMPONENTS_IN_SERVER_ROOT = 10
# EMAILS
diff --git a/include_server/cache_basics.py b/include_server/cache_basics.py
index c7a936c..1826ce7 100755
--- a/include_server/cache_basics.py
+++ b/include_server/cache_basics.py
@@ -809,13 +809,17 @@ class SetUpCaches(object):
dirname_cache: DirnameCache
simple_build_stat: SimpleBuildStat
+ client_root: a path such as /dev/shm/tmpX.include_server-X-1
+ (used during default system dir determination)
+
IsFilepathIndex: test for filepath index
IsDirectoryIndex: test for director index
IsRealpathIndex: test for realpath index
IsFilepathPair: test for filepath pair
"""
- def __init__(self):
+ def __init__(self, client_root):
+
# A memoizing (caching) class to canonicalize a path: mostly by
# resolving any symlinks in the path-component.
self.canonical_path = CanonicalPath()
@@ -843,7 +847,7 @@ class SetUpCaches(object):
# sure "our" system_dirs_default_all is updated.
# TODO(csilvers): get rid of this once prefix_cache TODO is cleaned up
self.compiler_defaults = compiler_defaults.CompilerDefaults(
- self.canonical_path.Canonicalize)
+ self.canonical_path.Canonicalize, client_root)
self.systemdir_prefix_cache = SystemdirPrefixCache(
self.compiler_defaults.system_dirs_default_all)
diff --git a/include_server/compiler_defaults.py b/include_server/compiler_defaults.py
index e606110..007595c 100755
--- a/include_server/compiler_defaults.py
+++ b/include_server/compiler_defaults.py
@@ -40,6 +40,7 @@ import os
import re
import sys
import basics
+import shutil
import subprocess
Debug = basics.Debug
@@ -47,10 +48,104 @@ DEBUG_TRACE = basics.DEBUG_TRACE
DEBUG_DATA = basics.DEBUG_DATA
NotCoveredError = basics.NotCoveredError
+
+def _RealPrefix(path):
+ """Determine longest directory prefix and whether path contains a symlink.
+
+ Given an absolute path PATH, figure out the longest prefix of PATH where
+ every component of the prefix is a directory -- not a file or symlink.
+
+ Args:
+ path: a string starting with '/'
+ Returns:
+ a pair consisting of
+ - the prefix
+ - a bool, which is True iff PATH contained a symlink.
+ """
+ prefix = "/"
+ parts = path.split('/')
+ while prefix != path:
+ part = parts.pop(0)
+ last_prefix = prefix
+ prefix = os.path.join(prefix, part)
+ if os.path.islink(prefix):
+ return last_prefix, True
+ if not os.path.isdir(prefix):
+ return last_prefix, False
+ return path, False
+
+
+def _MakeLinkFromMirrorToRealLocation(system_dir, client_root, system_links):
+ """Create a link under client root what will resolve to system dir on server.
+
+ See comments for CompilerDefaults class for rationale.
+
+ Args:
+ system_dir: a path such as /usr/include or
+ /usr/lib/gcc/i486-linux-gnu/4.0.3/include
+ client_root: a path such as /dev/shm/tmpX.include_server-X-1
+ system_links: a list of paths under client_root; each denotes a symlink
+
+ The link is created only if necessary. So,
+ /usr/include/gcc/i486-linux-gnu/4.0.3/include
+ is not created if
+ /usr/include
+ is already in place, since it's a prefix of the longer path.
+
+ If a link is created, the symlink name will be appended to system_links.
+
+ For example, if system_dir is '/usr/include' and client_root is
+ '/dev/shm/tmpX.include_server-X-1', then this function will create a
+ symlink in /dev/shm/tmpX.include_server-X-1/usr/include which points
+ to ../../../../../../../../../../../../usr/include, and it will append
+ '/dev/shm/tmpX.include_server-X-1/usr/include' to system_links.
+ """
+ if not system_dir.startswith('/'):
+ raise ValueError("Expected absolute path, but got '%s'." % system_dir)
+ if os.path.realpath(system_dir) != system_dir:
+ raise NotCoveredError(
+ "Default compiler search path '%s' must be a realpath." %s)
+ rooted_system_dir = client_root + system_dir
+ # Typical values for rooted_system_dir:
+ # /dev/shm/tmpX.include_server-X-1/usr/include
+ real_prefix, is_link = _RealPrefix(rooted_system_dir)
+ parent = os.path.dirname(rooted_system_dir)
+ if real_prefix == rooted_system_dir:
+ # rooted_system_dir already exists as a real (non-symlink) path.
+ # Make rooted_system_dir a link.
+ # TODO(fergus): do we need to delete anything from system_links
+ # in this case?
+ shutil.rmtree(rooted_system_dir)
+ elif real_prefix == parent:
+ # The really constructed path does not extend beyond the parent directory,
+ # so we're all set to create the link if it's not already there.
+ if os.path.exists(rooted_system_dir):
+ assert os.path.islink(rooted_system_dir)
+ return
+ elif not is_link:
+ os.makedirs(parent)
+ else:
+ # A link above real_prefix has already been created with this routine.
+ return
+ assert _RealPrefix(parent) == (parent, False), parent
+ depth = len([c for c in system_dir if c == '/'])
+ # The more directories on the path system_dir, the more '../' need to
+ # appended. We add enough '../' to get to the root directory. It's OK
+ # if we have too many, since '..' in the root directory points back to
+ # the root directory.
+ # TODO(klarlund,fergus): do this in a more principled way.
+ # This probably requires changing the protocol.
+ os.symlink('../' * (basics.MAX_COMPONENTS_IN_SERVER_ROOT + depth)
+ + system_dir[1:], # remove leading '/'
+ rooted_system_dir)
+ system_links.append(rooted_system_dir)
+
def _SystemSearchdirsGCC(compiler, language, canonical_lookup):
"""Run gcc on empty file; parse output to figure out default paths.
+ This function works only for gcc, and only some versions at that.
+
Arguments:
compiler: a filepath (the first argument on the distcc command line)
language: 'c' or 'c++' or other item in basics.LANGUAGES
@@ -134,14 +229,59 @@ def _SystemSearchdirsGCC(compiler, language, canonical_lookup):
class CompilerDefaults(object):
- """Records and caches the default searchdirs, aka include directories
- or search-path. The 'default' searchdirs are those built in to the
- preprocessor, as opposed to being set on the commandline via -I et al.
+ """Records and caches the default search dirs and creates symlink farm.
- This scheme works only for gcc, and only some versions at that.
+ This function works only for gcc, and only some versions at that,
+ because we parse the output from gcc to determine the default search dirs.
+
+ The 'default' searchdirs are those on the search-path that are built in, that
+ is known to the preprocessor, as opposed to being set on the commandline via
+ -I et al.
+
+ When we pass an option such as -I/foo/bar to the server,
+ the server will rewrite it to say -I/server/path/root/foo/bar,
+ where /server/path/root is the temporary directory on the server
+ that corresponds to root on the client (e.g. typically /dev/shm/distccd_nnn).
+ This causes problems in this case of -I options such as -I/usr/include/foo,
+ where the path contains a 'default' search directory (in this case
+ /usr/include) as a prefix.
+ Header files under the system default directories are assumed to exist
+ on the server, and it would be expensive to send them to the server
+ unnecessarily (we measured it, and it slowed down the build of Samba by 20%).
+ So for -I options like -I/usr/include/foo, we want the server
+ to use /usr/include/foo on the server, not /server/path/root/usr/include/foo.
+
+ Because the server unconditionally rewrites include search
+ paths on the command line to be relative to the server root, we must take
+ corrective action when identifying default system dirs: references to files
+ under these relocated system directories must be redirected to the absolute
+ location where they're actually found.
+
+ To do so, we create a symlink forest under client_root.
+ This will contain symlinks of the form
+
+ usr/include -> ../../../../../../../../../../../../usr/include
+
+ After being sent to the server, the server will rewrite them as
+
+ /server/path/root/usr/include ->
+ /server/path/root/../../../../../../../../../../../../usr/include
+
+ which will make
+
+ /server/path/root/usr/include
+
+ become a symlink to
+
+ /usr/include
+
+ Consequently, an include search directory such as -I /usr/include/foo will
+ work on the server, even after it has been rewritten to:
+
+ -I /server/path/root/usr/include/foo
"""
- def __init__(self, canonical_lookup):
+ def __init__(self, canonical_lookup, client_root):
"""Constructor.
Instance variables:
@@ -150,14 +290,17 @@ class CompilerDefaults(object):
(strings) for compiler c and language lang
system_dirs_default: a list of all such strings, subjected to
realpath-ification, for all c and lang
+ client_root: a path such as /dev/shm/tmpX.include_server-X-1
+ system_links: locations under client_root representing system default dirs
"""
self.canonical_lookup = canonical_lookup
self.system_dirs_default_all = set([])
self.system_dirs_default = {}
-
+ self.system_links = []
+ self.client_root = client_root
def SetSystemDirsDefaults(self, compiler, language, timer=None):
- """Set instance variables according to compiler.
+ """Set instance variables according to compiler, and make symlink farm.
Arguments:
compiler: a filepath (the first argument on the distcc command line)
@@ -166,8 +309,8 @@ class CompilerDefaults(object):
The timer will be disabled during this routine because the select involved
in Popen calls does not handle SIGALRM.
-
- See also the constructor documentation for this class.
+
+ See also the class documentation for this class.
"""
assert isinstance(compiler, str)
assert isinstance(language, str)
@@ -191,11 +334,12 @@ class CompilerDefaults(object):
(compiler, language,
self.system_dirs_default[compiler][language]))
# Now summarize what we know and add to system_dirs_default_all.
- self.system_dirs_default_all |= set(
- [ _default
- for _compiler in self.system_dirs_default
- for _language in self.system_dirs_default[_compiler]
- for _default in self.system_dirs_default[_compiler][_language] ])
+ self.system_dirs_default_all |= (
+ set(self.system_dirs_default[compiler][language]))
+ # Construct the symlink farm for the compiler default dirs.
+ for system_dir in self.system_dirs_default[compiler][language]:
+ _MakeLinkFromMirrorToRealLocation(system_dir, self.client_root,
+ self.system_links)
finally:
if timer:
timer.Start()
diff --git a/include_server/include_analyzer.py b/include_server/include_analyzer.py
index 48b2e5d..c284c3f 100755
--- a/include_server/include_analyzer.py
+++ b/include_server/include_analyzer.py
@@ -50,7 +50,8 @@ class IncludeAnalyzer(object):
# Make table for symbols in #define's.
self.symbol_table = {}
# Erect the edifice of caches.
- caches = self.caches = cache_basics.SetUpCaches()
+ caches = self.caches = (
+ cache_basics.SetUpCaches(self.client_root_keeper.client_root))
# Migrate the cache stuff to self namespace.
self.includepath_map = caches.includepath_map
@@ -74,7 +75,9 @@ class IncludeAnalyzer(object):
# Make a cache for the symbolic links encountered; also for their
# replication into root directory.
self.mirror_path = mirror_path.MirrorPath(self.simple_build_stat,
- self.canonical_path)
+ self.canonical_path,
+ self.realpath_map,
+ self.systemdir_prefix_cache)
# Make a parser for C/C++.
self.parse_file = parse_file.ParseFile(self.includepath_map)
# Make a compressor for source files.
@@ -229,7 +232,7 @@ class IncludeAnalyzer(object):
def DoCompilationCommand(self, cmd, currdir, client_root_keeper):
"""Parse and and process the command; then gather files and links."""
-
+
self.translation_unit = "unknown translation unit" # don't know yet
# Any relative paths in the globs in the --stat_reset_trigger argument
@@ -249,7 +252,8 @@ class IncludeAnalyzer(object):
self.directory_map,
self.compiler_defaults,
self.timer))
- (_, _, _, source_file, result_file_prefix, _) = parsed_command
+ (unused_quote_dirs, unused_angle_dirs, unused_include_files, source_file,
+ result_file_prefix, unused_Dopts) = parsed_command
# Do the real work.
include_closure = (
@@ -262,18 +266,12 @@ class IncludeAnalyzer(object):
# Links are accumulated intra-build (across different compilations in a
# build). We send all of 'em every time. This will potentially lead to
# performance degradation for large link farms. We expect at most a
- # handful.
- links = self.mirror_path.Links()
+ # handful. We add put the system links first, because there should be very
+ # few of them.
+ links = self.compiler_defaults.system_links + self.mirror_path.Links()
files = self.compress_files.Compress(include_closure, client_root_keeper)
- must_exist_dirs = self.mirror_path.MustExistDirs()
- random_name = 'forcing_technique_271828'
- forcing_files = [d + '/' + random_name
- for d in must_exist_dirs]
- for forcing_file in forcing_files:
- # If for extremly obscure reasons the file already exists and is useful,
- # then don't change it.
- open(forcing_file, "a").close()
+ forcing_files = self._ForceDirectoriesToExist()
files_and_links = files + links + forcing_files
@@ -300,6 +298,30 @@ class IncludeAnalyzer(object):
realpath_map)
return files_and_links
+ def _ForceDirectoriesToExist(self):
+ """Force any needed directories to exist.
+
+ In rare cases, the source files may contain #include "foo/../bar",
+ but may not contain any other files from the "foo" directory.
+ In such cases, we invent a dummy file in (the mirrored copy of)
+ each such directory, just to force the distccd server to create the
+ directory, so that the C compiler won't get an error when it tries
+ to resolve that #include.
+
+ Returns:
+ A list of files to pass as dummy inputs.
+ """
+
+ must_exist_dirs = self.mirror_path.MustExistDirs()
+ random_name = 'forcing_technique_271828'
+ forcing_files = [d + '/' + random_name
+ for d in must_exist_dirs]
+ for forcing_file in forcing_files:
+ # If for extremly obscure reasons the file already exists and is useful,
+ # then don't change it: that's why we open in "append" mode.
+ open(forcing_file, "a").close()
+ return forcing_files
+
def RunAlgorithm(self, filepath_resolved_pair, filepath_real_idx):
"""Run FindNode on filepath; then compute include closure.
Arguments:
diff --git a/include_server/macro_eval_test.py b/include_server/macro_eval_test.py
index fd4bb1a..292cf35 100755
--- a/include_server/macro_eval_test.py
+++ b/include_server/macro_eval_test.py
@@ -26,6 +26,8 @@ import basics
import parse_file
import cache_basics
import macro_eval
+import shutil
+import tempfile
import unittest
NotCoveredError = basics.NotCoveredError
@@ -36,7 +38,8 @@ class MacroEvalTest(unittest.TestCase):
basics.opt_debug_pattern = 1
- caches = cache_basics.SetUpCaches()
+ self.tmp = tempfile.mkdtemp()
+ caches = cache_basics.SetUpCaches(self.tmp)
self.includepath_map = caches.includepath_map
self.canonical_path = caches.canonical_path
@@ -45,7 +48,7 @@ class MacroEvalTest(unittest.TestCase):
def tearDown(self):
- pass
+ shutil.rmtree(self.tmp)
def test__SubstituteSymbolInString(self):
@@ -194,7 +197,7 @@ class MacroEvalTest(unittest.TestCase):
def test_ResolveExpr(self):
# Erect the edifice of caches.
- caches = cache_basics.SetUpCaches()
+ caches = cache_basics.SetUpCaches(self.tmp)
parse_file_obj = parse_file.ParseFile(caches.includepath_map)
symbol_table = {}
diff --git a/include_server/mirror_path.py b/include_server/mirror_path.py
index 8b92933..dcf83eb 100755
--- a/include_server/mirror_path.py
+++ b/include_server/mirror_path.py
@@ -43,12 +43,16 @@ class MirrorPath(object):
def __init__(self,
simple_build_stat,
- canonical_path):
+ canonical_path,
+ realpath_map,
+ systemdir_prefix_cache):
"""Constructor.
Arguments:
simple_build_stat: object of type SimpleBuildStat
canonical_path: function of type CanonicalPath
+ realpath_map: a CanonicalMapToIndex; see cache_basics.py
+ systemdir_prefix_cache: a SystemdirPrefixCache; see cache_basics.py.
"""
assert isinstance(simple_build_stat, cache_basics.SimpleBuildStat)
assert isinstance(canonical_path, cache_basics.CanonicalPath)
@@ -61,6 +65,8 @@ class MirrorPath(object):
self.simple_build_stat = simple_build_stat
self.canonical_path = canonical_path
self.must_exist_dirs = []
+ self.realpath_map = realpath_map
+ self.systemdir_prefix_cache = systemdir_prefix_cache
def Links(self):
"""Return the list of symbolic links created."""
@@ -90,9 +96,10 @@ class MirrorPath(object):
# destinations exist, and replicate symbolic links where necessary.
while filepath and filepath != '/':
if (filepath, current_dir_idx) in link_stat:
- # Filepath is already mirrored
- return
+ # Filepath is already mirrored
+ return
link_stat.add((filepath, current_dir_idx))
+
# Process suffix of filepath by
# - making sure that the mirrored real path of the prefix exists,
# - and that the suffix if a symbolic link
@@ -109,18 +116,32 @@ class MirrorPath(object):
# And, its counterpart under root
root_prefix_real = root + prefix_real
- # Make sure that root_prefix_real is there
+ # Make sure that the parent, root_prefix_real, is there
if not lookup(root_prefix_real):
- if not os.path.isdir(root_prefix_real):
- self.must_exist_dirs.append(root_prefix_real)
- os.makedirs(root_prefix_real)
- self.simple_build_stat.cache[root_prefix_real] = True
-
+ # We have not been in this real location before.
+ if not os.path.isdir(root_prefix_real):
+ # Now check that the parent of the link is not under a default system
+ # dir. If it is, then we assume that the parent and indeed the
+ # link itself exist on the server as well, and thus, don't need to
+ # be mirrored.
+ realpath_map = self.realpath_map
+ realpath_idx = realpath_map.Index(prefix_real)
+ if not self.systemdir_prefix_cache.StartsWithSystemdir(realpath_idx,
+ realpath_map):
+ # Not under default system dir. Mark this directory as one that
+ # must always be created on the server.
+ self.must_exist_dirs.append(root_prefix_real)
+ # Create parent path in mirror directory.
+ os.makedirs(root_prefix_real)
+ else:
+ break
+ self.simple_build_stat.cache[root_prefix_real] = True
assert os.path.isdir(root_prefix_real)
- # Create the mirrored symbolic link if applicable
+ # Create the mirrored symbolic link if applicable.
if os.path.islink(filepath):
- link_name = root_prefix_real + '/' + suffix
- if not os.path.exists(link_name):
- os.symlink(self.canonical_path.Canonicalize(filepath), link_name)
- self.links.append(link_name)
+ link_name = root_prefix_real + '/' + suffix
+ if not os.path.exists(link_name):
+ os.symlink(self.canonical_path.Canonicalize(filepath),
+ link_name)
+ self.links.append(link_name)
filepath = prefix_filepath
diff --git a/include_server/mirror_path_test.py b/include_server/mirror_path_test.py
index 5cce1bf..91ae819 100755
--- a/include_server/mirror_path_test.py
+++ b/include_server/mirror_path_test.py
@@ -25,6 +25,8 @@ import os.path
import basics
import cache_basics
import mirror_path
+import shutil
+import tempfile
import unittest
NotCoveredError = basics.NotCoveredError
@@ -39,13 +41,15 @@ class MirrorPathTest(unittest.TestCase):
def setUp(self):
basics.debug_pattern = 3
-
- caches = cache_basics.SetUpCaches()
+ self.tmp = tempfile.mkdtemp()
+ caches = cache_basics.SetUpCaches(self.tmp)
self.canonical_path = caches.canonical_path
self.simple_build_stat = caches.simple_build_stat
self.mirror_path = mirror_path.MirrorPath(self.simple_build_stat,
- self.canonical_path)
+ self.canonical_path,
+ caches.realpath_map,
+ caches.systemdir_prefix_cache)
self.directories = ['/', '/a', '/link', '/a/link', '/a/b',
'/link/link', '/root']
@@ -59,7 +63,7 @@ class MirrorPathTest(unittest.TestCase):
def tearDown(self):
- pass
+ shutil.rmtree(self.tmp)
def test_MirrorPath(self):
diff --git a/include_server/parse_command_test.py b/include_server/parse_command_test.py
index a982775..b87f548 100755
--- a/include_server/parse_command_test.py
+++ b/include_server/parse_command_test.py
@@ -27,6 +27,8 @@ import time
import basics
import cache_basics
import parse_command
+import shutil
+import tempfile
import unittest
NotCoveredError = basics.NotCoveredError
@@ -37,7 +39,8 @@ class ParseCommandUnitTest(unittest.TestCase):
basics.opt_debug_pattern = 1
- caches = cache_basics.SetUpCaches()
+ self.tmp = tempfile.mkdtemp()
+ caches = cache_basics.SetUpCaches(self.tmp)
self.includepath_map = caches.includepath_map
self.canonical_path = caches.canonical_path
@@ -62,8 +65,7 @@ class ParseCommandUnitTest(unittest.TestCase):
self.compiler_defaults.system_dirs_default[mock_compiler]['c++'] = []
def tearDown(self):
- pass
-
+ shutil.rmtree(self.tmp)
def test__SplitMacroArg(self):
self.assertEqual(parse_command._SplitMacroArg("="), ["="])