diff options
author | fergus.henderson <fergushenderson@users.noreply.github.com> | 2008-07-29 22:19:10 +0000 |
---|---|---|
committer | fergus.henderson <fergushenderson@users.noreply.github.com> | 2008-07-29 22:19:10 +0000 |
commit | 03163a50882914f4c4fe85d45490baccb9605357 (patch) | |
tree | 21b8fe5481bd10e4b08af16443e07dabbdc04fef /include_server | |
parent | a59ffb549e7e423e48b46726de6563c96646e6b2 (diff) | |
download | distcc-git-03163a50882914f4c4fe85d45490baccb9605357.tar.gz |
This is based on klarlund's original version of this patch (klarlund/d3b3):
gvn --project https://distcc.googlecode.com/svn/trunk review klarlund/d3b3
Fix Issue 7: Compiling with -I/usr/include/FOO or ... in pump mode
Problem:
Default system include directories are the directories on the default include
search path, such as /usr/include, that is determined by the compiler. Distcc
will not send default system include directories to the distcc compilation
servers. Nevertheless, distcc on the server blindly rewrites -I options such
as -I/usr/include/foo to -I/tmp/distccNNN/usr/include/foo.
This doesn't work, since the files in /usr/include/foo are not sent to the
distccd server.
Solution:
The present solution keeps the rewriting on the server, because we would like
to not manage starting the compiler, parsing its output, and storing the
default system paths on the server
Instead, we use the existing mechanism for defining relative symbolic links
under the server root. We escape from the root by using a sufficient number
of '../'s.
All this is tremendously complicated by:
-- The possibility that other links encountered may point into the system
default dirs in which case the usual mirroring of the reachable places
should not take place, because the routing of such links will go through
the link created for the system directory.
-- The fact that determination of default-system-dirness is lazy: a
deeply-seated link in a default system dir may become obsolete if it is
later determined that a higher directory than the parent directory of the
link is in fact also a default system dir. In that case, a new symlink,
sitting in a place higher than the previous one will need to be created.
Tests:
make check
benchmarks: samba (still 3X faster than with distcc), linux 2.6 kernel, and
a couple of others
TODO: better testing
TODO:
> In compiler_defaults.py, _MakeLinkFromMirrorToRealLocation:
> Maybe comment each of the 'if' cases with an example of how this case
> might be triggered. eg the real_prefix == rooted_system_dir is
> triggered when we see /usr/include/foo, and the later see
> /usr/include.
Diffstat (limited to 'include_server')
-rwxr-xr-x | include_server/basics.py | 8 | ||||
-rwxr-xr-x | include_server/cache_basics.py | 8 | ||||
-rwxr-xr-x | include_server/compiler_defaults.py | 172 | ||||
-rwxr-xr-x | include_server/include_analyzer.py | 50 | ||||
-rwxr-xr-x | include_server/macro_eval_test.py | 9 | ||||
-rwxr-xr-x | include_server/mirror_path.py | 49 | ||||
-rwxr-xr-x | include_server/mirror_path_test.py | 12 | ||||
-rwxr-xr-x | include_server/parse_command_test.py | 8 |
8 files changed, 262 insertions, 54 deletions
diff --git a/include_server/basics.py b/include_server/basics.py index 87c16d1..c3c23d6 100755 --- a/include_server/basics.py +++ b/include_server/basics.py @@ -149,6 +149,14 @@ class ClientRootKeeper(object): "Cleaning out '%s' after defunct include server." % directory) self.CleanOutClientRoots(pid) +# COMPILATION SERVER + +# An upper bound on the number of directory components in the temporary +# directory on the distccd server that corresponds to the root directory +# on the client machine. Normally the server root is /tmp/distccd_XXXXXX. +# But it could be something different if TMPDIR is set when distccd is +# started. See dcc_get_tmp_top() in ../src/tempfile.c. +MAX_COMPONENTS_IN_SERVER_ROOT = 10 # EMAILS diff --git a/include_server/cache_basics.py b/include_server/cache_basics.py index c7a936c..1826ce7 100755 --- a/include_server/cache_basics.py +++ b/include_server/cache_basics.py @@ -809,13 +809,17 @@ class SetUpCaches(object): dirname_cache: DirnameCache simple_build_stat: SimpleBuildStat + client_root: a path such as /dev/shm/tmpX.include_server-X-1 + (used during default system dir determination) + IsFilepathIndex: test for filepath index IsDirectoryIndex: test for director index IsRealpathIndex: test for realpath index IsFilepathPair: test for filepath pair """ - def __init__(self): + def __init__(self, client_root): + # A memoizing (caching) class to canonicalize a path: mostly by # resolving any symlinks in the path-component. self.canonical_path = CanonicalPath() @@ -843,7 +847,7 @@ class SetUpCaches(object): # sure "our" system_dirs_default_all is updated. # TODO(csilvers): get rid of this once prefix_cache TODO is cleaned up self.compiler_defaults = compiler_defaults.CompilerDefaults( - self.canonical_path.Canonicalize) + self.canonical_path.Canonicalize, client_root) self.systemdir_prefix_cache = SystemdirPrefixCache( self.compiler_defaults.system_dirs_default_all) diff --git a/include_server/compiler_defaults.py b/include_server/compiler_defaults.py index e606110..007595c 100755 --- a/include_server/compiler_defaults.py +++ b/include_server/compiler_defaults.py @@ -40,6 +40,7 @@ import os import re import sys import basics +import shutil import subprocess Debug = basics.Debug @@ -47,10 +48,104 @@ DEBUG_TRACE = basics.DEBUG_TRACE DEBUG_DATA = basics.DEBUG_DATA NotCoveredError = basics.NotCoveredError + +def _RealPrefix(path): + """Determine longest directory prefix and whether path contains a symlink. + + Given an absolute path PATH, figure out the longest prefix of PATH where + every component of the prefix is a directory -- not a file or symlink. + + Args: + path: a string starting with '/' + Returns: + a pair consisting of + - the prefix + - a bool, which is True iff PATH contained a symlink. + """ + prefix = "/" + parts = path.split('/') + while prefix != path: + part = parts.pop(0) + last_prefix = prefix + prefix = os.path.join(prefix, part) + if os.path.islink(prefix): + return last_prefix, True + if not os.path.isdir(prefix): + return last_prefix, False + return path, False + + +def _MakeLinkFromMirrorToRealLocation(system_dir, client_root, system_links): + """Create a link under client root what will resolve to system dir on server. + + See comments for CompilerDefaults class for rationale. + + Args: + system_dir: a path such as /usr/include or + /usr/lib/gcc/i486-linux-gnu/4.0.3/include + client_root: a path such as /dev/shm/tmpX.include_server-X-1 + system_links: a list of paths under client_root; each denotes a symlink + + The link is created only if necessary. So, + /usr/include/gcc/i486-linux-gnu/4.0.3/include + is not created if + /usr/include + is already in place, since it's a prefix of the longer path. + + If a link is created, the symlink name will be appended to system_links. + + For example, if system_dir is '/usr/include' and client_root is + '/dev/shm/tmpX.include_server-X-1', then this function will create a + symlink in /dev/shm/tmpX.include_server-X-1/usr/include which points + to ../../../../../../../../../../../../usr/include, and it will append + '/dev/shm/tmpX.include_server-X-1/usr/include' to system_links. + """ + if not system_dir.startswith('/'): + raise ValueError("Expected absolute path, but got '%s'." % system_dir) + if os.path.realpath(system_dir) != system_dir: + raise NotCoveredError( + "Default compiler search path '%s' must be a realpath." %s) + rooted_system_dir = client_root + system_dir + # Typical values for rooted_system_dir: + # /dev/shm/tmpX.include_server-X-1/usr/include + real_prefix, is_link = _RealPrefix(rooted_system_dir) + parent = os.path.dirname(rooted_system_dir) + if real_prefix == rooted_system_dir: + # rooted_system_dir already exists as a real (non-symlink) path. + # Make rooted_system_dir a link. + # TODO(fergus): do we need to delete anything from system_links + # in this case? + shutil.rmtree(rooted_system_dir) + elif real_prefix == parent: + # The really constructed path does not extend beyond the parent directory, + # so we're all set to create the link if it's not already there. + if os.path.exists(rooted_system_dir): + assert os.path.islink(rooted_system_dir) + return + elif not is_link: + os.makedirs(parent) + else: + # A link above real_prefix has already been created with this routine. + return + assert _RealPrefix(parent) == (parent, False), parent + depth = len([c for c in system_dir if c == '/']) + # The more directories on the path system_dir, the more '../' need to + # appended. We add enough '../' to get to the root directory. It's OK + # if we have too many, since '..' in the root directory points back to + # the root directory. + # TODO(klarlund,fergus): do this in a more principled way. + # This probably requires changing the protocol. + os.symlink('../' * (basics.MAX_COMPONENTS_IN_SERVER_ROOT + depth) + + system_dir[1:], # remove leading '/' + rooted_system_dir) + system_links.append(rooted_system_dir) + def _SystemSearchdirsGCC(compiler, language, canonical_lookup): """Run gcc on empty file; parse output to figure out default paths. + This function works only for gcc, and only some versions at that. + Arguments: compiler: a filepath (the first argument on the distcc command line) language: 'c' or 'c++' or other item in basics.LANGUAGES @@ -134,14 +229,59 @@ def _SystemSearchdirsGCC(compiler, language, canonical_lookup): class CompilerDefaults(object): - """Records and caches the default searchdirs, aka include directories - or search-path. The 'default' searchdirs are those built in to the - preprocessor, as opposed to being set on the commandline via -I et al. + """Records and caches the default search dirs and creates symlink farm. - This scheme works only for gcc, and only some versions at that. + This function works only for gcc, and only some versions at that, + because we parse the output from gcc to determine the default search dirs. + + The 'default' searchdirs are those on the search-path that are built in, that + is known to the preprocessor, as opposed to being set on the commandline via + -I et al. + + When we pass an option such as -I/foo/bar to the server, + the server will rewrite it to say -I/server/path/root/foo/bar, + where /server/path/root is the temporary directory on the server + that corresponds to root on the client (e.g. typically /dev/shm/distccd_nnn). + This causes problems in this case of -I options such as -I/usr/include/foo, + where the path contains a 'default' search directory (in this case + /usr/include) as a prefix. + Header files under the system default directories are assumed to exist + on the server, and it would be expensive to send them to the server + unnecessarily (we measured it, and it slowed down the build of Samba by 20%). + So for -I options like -I/usr/include/foo, we want the server + to use /usr/include/foo on the server, not /server/path/root/usr/include/foo. + + Because the server unconditionally rewrites include search + paths on the command line to be relative to the server root, we must take + corrective action when identifying default system dirs: references to files + under these relocated system directories must be redirected to the absolute + location where they're actually found. + + To do so, we create a symlink forest under client_root. + This will contain symlinks of the form + + usr/include -> ../../../../../../../../../../../../usr/include + + After being sent to the server, the server will rewrite them as + + /server/path/root/usr/include -> + /server/path/root/../../../../../../../../../../../../usr/include + + which will make + + /server/path/root/usr/include + + become a symlink to + + /usr/include + + Consequently, an include search directory such as -I /usr/include/foo will + work on the server, even after it has been rewritten to: + + -I /server/path/root/usr/include/foo """ - def __init__(self, canonical_lookup): + def __init__(self, canonical_lookup, client_root): """Constructor. Instance variables: @@ -150,14 +290,17 @@ class CompilerDefaults(object): (strings) for compiler c and language lang system_dirs_default: a list of all such strings, subjected to realpath-ification, for all c and lang + client_root: a path such as /dev/shm/tmpX.include_server-X-1 + system_links: locations under client_root representing system default dirs """ self.canonical_lookup = canonical_lookup self.system_dirs_default_all = set([]) self.system_dirs_default = {} - + self.system_links = [] + self.client_root = client_root def SetSystemDirsDefaults(self, compiler, language, timer=None): - """Set instance variables according to compiler. + """Set instance variables according to compiler, and make symlink farm. Arguments: compiler: a filepath (the first argument on the distcc command line) @@ -166,8 +309,8 @@ class CompilerDefaults(object): The timer will be disabled during this routine because the select involved in Popen calls does not handle SIGALRM. - - See also the constructor documentation for this class. + + See also the class documentation for this class. """ assert isinstance(compiler, str) assert isinstance(language, str) @@ -191,11 +334,12 @@ class CompilerDefaults(object): (compiler, language, self.system_dirs_default[compiler][language])) # Now summarize what we know and add to system_dirs_default_all. - self.system_dirs_default_all |= set( - [ _default - for _compiler in self.system_dirs_default - for _language in self.system_dirs_default[_compiler] - for _default in self.system_dirs_default[_compiler][_language] ]) + self.system_dirs_default_all |= ( + set(self.system_dirs_default[compiler][language])) + # Construct the symlink farm for the compiler default dirs. + for system_dir in self.system_dirs_default[compiler][language]: + _MakeLinkFromMirrorToRealLocation(system_dir, self.client_root, + self.system_links) finally: if timer: timer.Start() diff --git a/include_server/include_analyzer.py b/include_server/include_analyzer.py index 48b2e5d..c284c3f 100755 --- a/include_server/include_analyzer.py +++ b/include_server/include_analyzer.py @@ -50,7 +50,8 @@ class IncludeAnalyzer(object): # Make table for symbols in #define's. self.symbol_table = {} # Erect the edifice of caches. - caches = self.caches = cache_basics.SetUpCaches() + caches = self.caches = ( + cache_basics.SetUpCaches(self.client_root_keeper.client_root)) # Migrate the cache stuff to self namespace. self.includepath_map = caches.includepath_map @@ -74,7 +75,9 @@ class IncludeAnalyzer(object): # Make a cache for the symbolic links encountered; also for their # replication into root directory. self.mirror_path = mirror_path.MirrorPath(self.simple_build_stat, - self.canonical_path) + self.canonical_path, + self.realpath_map, + self.systemdir_prefix_cache) # Make a parser for C/C++. self.parse_file = parse_file.ParseFile(self.includepath_map) # Make a compressor for source files. @@ -229,7 +232,7 @@ class IncludeAnalyzer(object): def DoCompilationCommand(self, cmd, currdir, client_root_keeper): """Parse and and process the command; then gather files and links.""" - + self.translation_unit = "unknown translation unit" # don't know yet # Any relative paths in the globs in the --stat_reset_trigger argument @@ -249,7 +252,8 @@ class IncludeAnalyzer(object): self.directory_map, self.compiler_defaults, self.timer)) - (_, _, _, source_file, result_file_prefix, _) = parsed_command + (unused_quote_dirs, unused_angle_dirs, unused_include_files, source_file, + result_file_prefix, unused_Dopts) = parsed_command # Do the real work. include_closure = ( @@ -262,18 +266,12 @@ class IncludeAnalyzer(object): # Links are accumulated intra-build (across different compilations in a # build). We send all of 'em every time. This will potentially lead to # performance degradation for large link farms. We expect at most a - # handful. - links = self.mirror_path.Links() + # handful. We add put the system links first, because there should be very + # few of them. + links = self.compiler_defaults.system_links + self.mirror_path.Links() files = self.compress_files.Compress(include_closure, client_root_keeper) - must_exist_dirs = self.mirror_path.MustExistDirs() - random_name = 'forcing_technique_271828' - forcing_files = [d + '/' + random_name - for d in must_exist_dirs] - for forcing_file in forcing_files: - # If for extremly obscure reasons the file already exists and is useful, - # then don't change it. - open(forcing_file, "a").close() + forcing_files = self._ForceDirectoriesToExist() files_and_links = files + links + forcing_files @@ -300,6 +298,30 @@ class IncludeAnalyzer(object): realpath_map) return files_and_links + def _ForceDirectoriesToExist(self): + """Force any needed directories to exist. + + In rare cases, the source files may contain #include "foo/../bar", + but may not contain any other files from the "foo" directory. + In such cases, we invent a dummy file in (the mirrored copy of) + each such directory, just to force the distccd server to create the + directory, so that the C compiler won't get an error when it tries + to resolve that #include. + + Returns: + A list of files to pass as dummy inputs. + """ + + must_exist_dirs = self.mirror_path.MustExistDirs() + random_name = 'forcing_technique_271828' + forcing_files = [d + '/' + random_name + for d in must_exist_dirs] + for forcing_file in forcing_files: + # If for extremly obscure reasons the file already exists and is useful, + # then don't change it: that's why we open in "append" mode. + open(forcing_file, "a").close() + return forcing_files + def RunAlgorithm(self, filepath_resolved_pair, filepath_real_idx): """Run FindNode on filepath; then compute include closure. Arguments: diff --git a/include_server/macro_eval_test.py b/include_server/macro_eval_test.py index fd4bb1a..292cf35 100755 --- a/include_server/macro_eval_test.py +++ b/include_server/macro_eval_test.py @@ -26,6 +26,8 @@ import basics import parse_file import cache_basics import macro_eval +import shutil +import tempfile import unittest NotCoveredError = basics.NotCoveredError @@ -36,7 +38,8 @@ class MacroEvalTest(unittest.TestCase): basics.opt_debug_pattern = 1 - caches = cache_basics.SetUpCaches() + self.tmp = tempfile.mkdtemp() + caches = cache_basics.SetUpCaches(self.tmp) self.includepath_map = caches.includepath_map self.canonical_path = caches.canonical_path @@ -45,7 +48,7 @@ class MacroEvalTest(unittest.TestCase): def tearDown(self): - pass + shutil.rmtree(self.tmp) def test__SubstituteSymbolInString(self): @@ -194,7 +197,7 @@ class MacroEvalTest(unittest.TestCase): def test_ResolveExpr(self): # Erect the edifice of caches. - caches = cache_basics.SetUpCaches() + caches = cache_basics.SetUpCaches(self.tmp) parse_file_obj = parse_file.ParseFile(caches.includepath_map) symbol_table = {} diff --git a/include_server/mirror_path.py b/include_server/mirror_path.py index 8b92933..dcf83eb 100755 --- a/include_server/mirror_path.py +++ b/include_server/mirror_path.py @@ -43,12 +43,16 @@ class MirrorPath(object): def __init__(self, simple_build_stat, - canonical_path): + canonical_path, + realpath_map, + systemdir_prefix_cache): """Constructor. Arguments: simple_build_stat: object of type SimpleBuildStat canonical_path: function of type CanonicalPath + realpath_map: a CanonicalMapToIndex; see cache_basics.py + systemdir_prefix_cache: a SystemdirPrefixCache; see cache_basics.py. """ assert isinstance(simple_build_stat, cache_basics.SimpleBuildStat) assert isinstance(canonical_path, cache_basics.CanonicalPath) @@ -61,6 +65,8 @@ class MirrorPath(object): self.simple_build_stat = simple_build_stat self.canonical_path = canonical_path self.must_exist_dirs = [] + self.realpath_map = realpath_map + self.systemdir_prefix_cache = systemdir_prefix_cache def Links(self): """Return the list of symbolic links created.""" @@ -90,9 +96,10 @@ class MirrorPath(object): # destinations exist, and replicate symbolic links where necessary. while filepath and filepath != '/': if (filepath, current_dir_idx) in link_stat: - # Filepath is already mirrored - return + # Filepath is already mirrored + return link_stat.add((filepath, current_dir_idx)) + # Process suffix of filepath by # - making sure that the mirrored real path of the prefix exists, # - and that the suffix if a symbolic link @@ -109,18 +116,32 @@ class MirrorPath(object): # And, its counterpart under root root_prefix_real = root + prefix_real - # Make sure that root_prefix_real is there + # Make sure that the parent, root_prefix_real, is there if not lookup(root_prefix_real): - if not os.path.isdir(root_prefix_real): - self.must_exist_dirs.append(root_prefix_real) - os.makedirs(root_prefix_real) - self.simple_build_stat.cache[root_prefix_real] = True - + # We have not been in this real location before. + if not os.path.isdir(root_prefix_real): + # Now check that the parent of the link is not under a default system + # dir. If it is, then we assume that the parent and indeed the + # link itself exist on the server as well, and thus, don't need to + # be mirrored. + realpath_map = self.realpath_map + realpath_idx = realpath_map.Index(prefix_real) + if not self.systemdir_prefix_cache.StartsWithSystemdir(realpath_idx, + realpath_map): + # Not under default system dir. Mark this directory as one that + # must always be created on the server. + self.must_exist_dirs.append(root_prefix_real) + # Create parent path in mirror directory. + os.makedirs(root_prefix_real) + else: + break + self.simple_build_stat.cache[root_prefix_real] = True assert os.path.isdir(root_prefix_real) - # Create the mirrored symbolic link if applicable + # Create the mirrored symbolic link if applicable. if os.path.islink(filepath): - link_name = root_prefix_real + '/' + suffix - if not os.path.exists(link_name): - os.symlink(self.canonical_path.Canonicalize(filepath), link_name) - self.links.append(link_name) + link_name = root_prefix_real + '/' + suffix + if not os.path.exists(link_name): + os.symlink(self.canonical_path.Canonicalize(filepath), + link_name) + self.links.append(link_name) filepath = prefix_filepath diff --git a/include_server/mirror_path_test.py b/include_server/mirror_path_test.py index 5cce1bf..91ae819 100755 --- a/include_server/mirror_path_test.py +++ b/include_server/mirror_path_test.py @@ -25,6 +25,8 @@ import os.path import basics import cache_basics import mirror_path +import shutil +import tempfile import unittest NotCoveredError = basics.NotCoveredError @@ -39,13 +41,15 @@ class MirrorPathTest(unittest.TestCase): def setUp(self): basics.debug_pattern = 3 - - caches = cache_basics.SetUpCaches() + self.tmp = tempfile.mkdtemp() + caches = cache_basics.SetUpCaches(self.tmp) self.canonical_path = caches.canonical_path self.simple_build_stat = caches.simple_build_stat self.mirror_path = mirror_path.MirrorPath(self.simple_build_stat, - self.canonical_path) + self.canonical_path, + caches.realpath_map, + caches.systemdir_prefix_cache) self.directories = ['/', '/a', '/link', '/a/link', '/a/b', '/link/link', '/root'] @@ -59,7 +63,7 @@ class MirrorPathTest(unittest.TestCase): def tearDown(self): - pass + shutil.rmtree(self.tmp) def test_MirrorPath(self): diff --git a/include_server/parse_command_test.py b/include_server/parse_command_test.py index a982775..b87f548 100755 --- a/include_server/parse_command_test.py +++ b/include_server/parse_command_test.py @@ -27,6 +27,8 @@ import time import basics import cache_basics import parse_command +import shutil +import tempfile import unittest NotCoveredError = basics.NotCoveredError @@ -37,7 +39,8 @@ class ParseCommandUnitTest(unittest.TestCase): basics.opt_debug_pattern = 1 - caches = cache_basics.SetUpCaches() + self.tmp = tempfile.mkdtemp() + caches = cache_basics.SetUpCaches(self.tmp) self.includepath_map = caches.includepath_map self.canonical_path = caches.canonical_path @@ -62,8 +65,7 @@ class ParseCommandUnitTest(unittest.TestCase): self.compiler_defaults.system_dirs_default[mock_compiler]['c++'] = [] def tearDown(self): - pass - + shutil.rmtree(self.tmp) def test__SplitMacroArg(self): self.assertEqual(parse_command._SplitMacroArg("="), ["="]) |