diff options
author | klarlund <klarlund@01de4be4-8c4a-0410-9132-4925637da917> | 2008-03-28 02:48:48 +0000 |
---|---|---|
committer | klarlund <klarlund@01de4be4-8c4a-0410-9132-4925637da917> | 2008-03-28 02:48:48 +0000 |
commit | 41ff798014fc3669f1d29cc35c79ca3468ed926a (patch) | |
tree | 995faab42744ddcb755996469397b865b88cc048 /include_server/cache_basics.py | |
parent | f23f077c10e4b492c428c1a08c562b9b5cbb7f09 (diff) | |
download | distcc-41ff798014fc3669f1d29cc35c79ca3468ed926a.tar.gz |
Initial submission @6805748, not fully baked yet.
git-svn-id: http://distcc.googlecode.com/svn/trunk@3 01de4be4-8c4a-0410-9132-4925637da917
Diffstat (limited to 'include_server/cache_basics.py')
-rwxr-xr-x | include_server/cache_basics.py | 858 |
1 files changed, 858 insertions, 0 deletions
diff --git a/include_server/cache_basics.py b/include_server/cache_basics.py new file mode 100755 index 0000000..7be7507 --- /dev/null +++ b/include_server/cache_basics.py @@ -0,0 +1,858 @@ +#! /usr/bin/python2.4 + +# Copyright 2007 Google Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# + +"""Classes enabling definition and composition of caches. + +This file defines caches used to speed up the does-this-file-exist +test that forms the basis of the C preprocessor's include-file +handling, and takes most of its time. + +When the preprocessor sees a line like "#include <foo/bar.h>" it looks +for a file named "bar.h" in many directories: /usr/include/foo/bar.h, +./foo/bar.h, and so forth. More precisely, the preprocessor is given +a "search path", which is a list of directory-names. (By default, the +search-path looks like ['/usr/include', '/usr/local/include', ...], +but it's often extended via gcc flags like -I, -isystem, -iprefix, +etc.) To resolve a single #include like "#include <foo/bar.h>", the +preprocessor goes through every directory in the search path, running + + os.stat(os.path.join(current_working_dir, search_dir, 'foo/bar.h')) + +until the stat call succeeds. With dozens of search-dirs to look +through, dozens of #include lines per source file, and hundreds of +source files per compilation, this can add up to millions of stat +calls. Many of these calls are exactly the same, so caching is a big +win. + +The cache of stat calls takes a filename as input and produces a bool +as output, saying if the filename exists. For reasons that will +become clear in a moment, we actually represent the input filename as +a triple that breaks the filename into its three components: + + 1) currdir: the current working directory (usually os.path.absdir('.')) + 2) searchdir: an element of the search path (eg '/usr/include', 'base') + 3) includepath: the thing that comes after "#include" in source files + ("foo/bar.h" in our examples above). + +Why do we break the input into three parts? Consider what cache-lookups +we have to do for a single source file: + cache[os.path.join(currdir, searchdir1, includepath1)] # #include <ipath1> + cache[os.path.join(currdir, searchdir2, includepath1)] # #include <ipath1> + cache[os.path.join(currdir, searchdir3, includepath1)] # #include <ipath1> + [etc...until the cache-lookup returns True] + cache[os.path.join(currdir, searchdir1, includepath2)] # #include <ipath2> + cache[os.path.join(currdir, searchdir2, includepath2)] # #include <ipath2> + cache[os.path.join(currdir, searchdir3, includepath2)] # #include <ipath2> + [etc] + +By having the key be a triple, we avoid all those unnecessary +os.path.join calls. But even if we do this, we notice bigger fish +to fry: the Python interpreter still has to do a string-hash of +currdir for every lookup, and also has to string-hash searchdirX and +includepathX many times. It would be much more efficient if we did +those hashes ourselves, reducing the number of string-hashes from +O(|search-path| * |#include lines|) to +O(|search-path| + |#include lines|). + +This motivates (finally!) the data structures in this file. We have +three string-to-number maps, for mapping each currdir, searchdir, and +includepath to a small integer. We put that all together in a cache, +that takes a triple of integers as its key and produces True if the +file exists, False if it does not, or None if its status is unknown. + +The String-to-number Map(s) +--------------------------- +The basic map that converts a filepath-path -- a currdir, searchdir, +or includepath -- to a small integer is called MapToIndex. MapToIndex +provides mapping in both directions: + index: a dictionary mapping paths (strings) to indices in 1..N, and + string: an array of size N + 1 that implements the reverse mapping + +So: + obj.string[obj.index[path_as_string]] == path_as_string + obj.index[obj.string[path_as_number]] == path_as_number + +Note we map from 1..N, and not 0..N-1, which leave us 0 free to use as +a synonym for None or False. + +There are also classes that specialize MapToIndex for specific purposes. + +DirectoryMapToIndex assumes the input is a directory, and in +particular a directory that does not have a slash at the end of it (eg +"/etc"). It adds the trailing slash before inserting into the map. +This is useful because it allows us to use + to join this directory +with a relative filename, rather than the slower os.path.join(). + +RelpathMapToIndex assumes the input is a relative filepath, that is, +one that does not start with /. When combined with DirectoryMapToIndex +entries, + can be used as a fast alternative to os.path.join(). + +CanonicalMapToIndex is a MapToIndex that canonializes its input before +inserting it into the map: resolving symlinks, getting rid of ..'s, +etc. It takes an absolute path as input. + +Other Caches +------------ +Besides the maps from strings to integers, there are three other caches. +One is the realpath-cache, that takes a filename and returns +os.path.realpath(filename). We cache this because os.path.realpath() +is very slow. This is called CanonicalPath. + +The second cache, the DirnameCache, maps an arbitrary pathname to +dirname(pathname), that is, the directory the pathname is in. The +input pathname is represented by a (currdir_idx, searchdir_idx, +includepath_idx) triple. The output is likewise represented as a +number: an index into the DirectoryMapToIndex structure. + +The third cache is called SystemdirPrefixCache. It tells you, for a +given absolute filepath, whether it is prefixed by a systemdir (that +is, one of the searchdirs that's built into cpp, such as /usr/include). +This is useful to cache because there are several systemdirs, and it's +expensive to check them all each time. + +Naming Conventions +------------------ + currdir: the current working dir. + searchdir: an element of the search-path (places cpp looks for .h files). + includepath: the string a source file #includes. + realpath: a full filepath with all its symlinks resolved: + os.path.realpath(os.path.join(currdir, searchdir, includepath)) + FOO_idx: the small integer associated with the string FOO. + + includepath_map: the map that takes includepaths to their idx and back + (a RelpathMapToIndex). + directory_map: the map that takes currdirs and searchdirs to their + idx and back. It also is used to store dirname(filepath) for arbitrary + filepaths -- basically, anything we know is a directory (a + DirectoryMapToIndex). + realpath_map: the map that takes full filepaths to their idx and back, + canonicalizing them first (by resolving symlinks) (a + CanonicalMapToIndex). + + searchlist: a list of searchdirs. In gcc/cpp documentation, this is + called the "search path", but for consistency, in this code we reserve + the name "path" to mean "filesystem component," never "list of dirs". + (A list of strings). + systemdir: a searchdir that's built into cpp, rather than set via -I. + (A string.) + + resolved_filepath: given an includepath, and a (possibly implicit) + currdir and searchlist, the resolved_filepath is + os.path.join(currdir, searchdir, includepath) + for the first searchdir in searchlist for which the joined string + exists. This path can be represented in many ways: 1) a string like + "foo/bar/baz.h" (if so, this string has been canonicalized to resolve + symlinks and the like); 2) an index into realpath_map associated with + that string; 3) a triple of indices; or 4) a pair of indices plus an + assumption that os.getcwd() == currdir. + +Pair Represenation of Filepaths +------------------------------- +A file is uniquely determined by the triple + (currdir_idx, searchdir_idx, includepath_idx) +For a single compilation unit, the code will often start with a +chdir(currdir). After that, we often refer to a file by the pair + (searchdir_idx, includepath_idx) +which might be either an absolute filename or relative to $PWD. + +We refer to this pair as a filepath_pair. +TODO(csilvers): find a better name? + +The function IsFilepathPair(x) tests whether x is a pair that could +plausibly have a searchdir_idx as its first element and an +includepath_idx as its second. + +Tests +----- +This code is currently only tested by regression tests of modules +using this one. +""" + +__author__ = "opensource@google.com (Nils Klarlund, Craig Silverstein)" + +import os +import os.path +import sys + +import basics +import statistics +import compiler_defaults + +DIR_ARRAY_SIZE = 500 + +# We currently use the stat and realpath of GNU libc stat and +# realpath. They are about an order of magnitude faster than their +# Python counterparts, even when called through the Python/C +# interface. + +try: + import distcc_pump_c_extensions + _OsPathExists = distcc_pump_c_extensions.OsPathExists + _OsPathIsFile = distcc_pump_c_extensions.OsPathIsFile + _PathRealpath = distcc_pump_c_extensions.Realpath + _path_realpath_works = True +except ImportError: + _OsPathExists = os.path.exists + _OsPathIsFile = os.path.isfile + _PathRealpath = os.path.realpath + # os.path.realpath might have some bugs. TODO(csilvers): check that here + _path_realpath_works = False + +Debug = basics.Debug +DEBUG_TRACE = basics.DEBUG_TRACE +DEBUG_TRACE1 = basics.DEBUG_TRACE1 +DEBUG_TRACE2 = basics.DEBUG_TRACE2 +NotCoveredError = basics.NotCoveredError + + +#### +#### SIMPLE CACHES +#### + +class CanonicalPath(object): + """Memoizing calculation of realpaths. realpath(x) is the 'canonical' + version of x, with all symbolic links eliminated. + """ + + def __init__(self): + self.cache = {} + + def Canonicalize(self, filepath): + """Find a really canonical path, possibly memoized. + + Arguments: + filepath: a filepath (string) + Returns: + the realpath of filepath (string) + + The following is irrelevant if we always use the distcc_pump_c_extensions + realpath function. + --- + Apparently, in some versions of Python 2.4 at least, realpath does + *not* resolve the last component of a filepath if it is a link: + https://sourceforge.net/tracker/?func=detail&atid=105470&aid=1213894&group_id=5470 + Make up for that: follow that final link until a real realpath has + been found. + + Also, realpath is not idempotent. + + Solution (?): turn filepath into abspath before applying realpath; + then we can cache results as well (without worring about value of + current directory). + + The final problem -- that os.path.realpath is very slow, at least + an order of magnitude slower than the gnu libc one --- is solved + through caching all uses through an object of the present class. + """ + assert isinstance(filepath, str) + try: + return self.cache[filepath] + except KeyError: + if _path_realpath_works: + r = _PathRealpath(filepath) + self.cache[filepath] = r + return r + # Fix for os.path.realpath idempotencey bug (Python 2.4). + filepath_ = os.path.abspath(filepath) + filepath_ = _PathRealpath(filepath_) + # Fix for os.path.realpath bug (Python 2.4): symlinks at end not + # resolved. + for unused_i in range(10): + if not os.path.islink(filepath_): + break + filepath_ = os.path.join(os.path.dirname(filepath_), + os.readlink(filepath_)) + else: + raise NotCoveredError("Too many symlinks in '%s'." % filepath) + self.cache[filepath] = filepath_ + return filepath_ + +class DirnameCache(object): + """Cache the mapping from filepath pairs to index of their directory names. + + The key is a triple (currdir_idx, searchdir_idx, includepath_idx). The + value is + + (dir_idx, dir_realpath_idx) + + where dir_idx is the index of dirname of the corresponding filepath, which + possibly is relative, and dir_realpath_idx is the realpath index of the + absolute location of the dirname. The value currdir_idx is of possible + importance for deteterming dir_realpath_idx, but plays no role in determining + dir_idx.""" + + def __init__(self, includepath_map, directory_map, realpath_map): + """Constructor. + Arguments: + includepath_map: the map used to construct the includepath_idx + that will be passed in as arguments to Lookup(). + directory_map: the map used to construct both the currdir_idx + and searchdir_idx that will be passed in as arguments to + Lookup(). It's also the data structure that produces dir_idx. + realpath_map: a string-to-int map of canonicalized filepaths + """ + self.includepath_map = includepath_map + self.directory_map = directory_map + self.realpath_map = realpath_map + self.cache = {} + + def Lookup(self, currdir_idx, searchdir_idx, includepath_idx): + """Return the directory and realpath indices of the dirname of the input. + + Arguments: + currdir_idx: the directory index of the current directory + searchdir_idx: a directory_map index + includepath_idx: an includepath index + Returns: + a pair (directory map index, realpath index) + + See class documentation. + + Example: if the strings of the arguments indices put together make + '/usr/include/foo/bar.h', then this routine will insert '/usr/include/foo/' + into self.directory_map, and then return the corresponding pair (directory + index of /usr/include/foo/, real path index of /usr/include/foo/). If the + arguments put together form "foo.h", then the directory index returned is + that of "", the current directory, and the realpath index is that of + currdir. + """ + try: + return self.cache[(currdir_idx, searchdir_idx, includepath_idx)] + except KeyError: + directory = os.path.dirname(os.path.join( + self.directory_map.string[searchdir_idx], + self.includepath_map.string[includepath_idx])) + dir_idx = self.directory_map.Index(directory) + rp_idx = self.realpath_map.Index( + os.path.join(self.directory_map.string[currdir_idx], + directory)) + self.cache[(currdir_idx, searchdir_idx, includepath_idx)] = (dir_idx, + rp_idx) + return (dir_idx, rp_idx) + + +class SystemdirPrefixCache(object): + """A cache of information about whether a file exists in a systemdir. + + A systemdir is a searchdir that is built in to the C/C++ + preprocessor. That is, when the preprocessor is figuring out what + directory an #include is in, these are the directories it's + hard-coded in to check (you can add other directories via -I). This + cache records, for a given filepath, whether it starts with a + systemdir. This is useful to identify whether the path is likely to + correspond to a system include-file (such as stdio.h). Such files are + unlikely to change, and are likely to already exist on the distcc + servers, both of which are useful things to know for optimization. + + For speed, users can access self.cache directly, rather than going + through the StartsWithSystemdir API. Be sure to call FillCache() to + make sure the cache is populated, before accessing it! + """ + + def __init__(self, systemdirs): + """Constructor. + + Argument: + systemdirs: the list of system-directories the preprocessor + uses. It's a list of strings, probably extracted from the + preprocessor itself. Each systemdir should end in a slash. + + In practice, systemdirs will start empty, and later some routine + (in parse_command.py) will magically fill it. So be sure to wait + for that before calling FillCache! + TODO(csilvers): normalize this; ideally pass systemdirs in to FillCache. + """ + self.systemdirs = systemdirs + # self.cache[i] will be True, False, or None for not-yet-checked. + self.cache = [None] + + def FillCache(self, realpath_map): + """Ensures that there's a cache entry for every index in realpath_map. + + Argument: + realpath_map: a string-to-int map of canonicalized filepaths we know. + + After this function is called, the cache entry is True iff + realpath.startswith(systemdir) is True for any of the systemdirs + passed in to our constructor. + """ + if len(self.cache) >= realpath_map.Length(): + return # we're already all full + for realpath_idx in xrange(len(self.cache), realpath_map.Length()): + realpath = realpath_map.string[realpath_idx] + for systemdir in self.systemdirs: + if realpath.startswith(systemdir): + self.cache.append(True) + break + else: # we get here if the for never 'break'ed + self.cache.append(False) + + assert len(self.cache) == realpath_map.Length() + + def StartsWithSystemdir(self, realpath_idx, realpath_map): + """Return True iff realpath starts with a systemdir. + + Arguments: + realpath_idx: the index of the realpath we want to check. + realpath_map: the map from realpath_idx to a string. + + Return True iff realpath.startswith(systemdir) for any of the + systemdirs passed in to our constructor. (For speed, you can + access self.cache directly instead of calling this, but make + sure FillCache() has been called first!) + """ + self.FillCache(realpath_map) + return self.cache[realpath_idx] + + +#### +#### MAP_TO_INDEX AND ITS SPECIALIZATIONS +#### + +class MapToIndex(object): + """Maps every object it sees to a unique small integer. In + practice, this class is used to map path-components (which are strings). + """ + + def __init__(self): + """Constructor. + + Instance variables: + map: a dictionary such that map[path] is the index of path + string: a list satisfying: string[i] is the path such that map[path] = i + """ + + # Do not make the mistake of letting a real index be 0. (Hint: + # because "if path:" then does not distinguish between 0 and None.) + self.index = {None:None} + self.string = [None] + + def _Invariant_(self): + return len(self.index) == len(self.string) + + def Index(self, path): + """Returns the index i > 0 of path.""" + assert self._Invariant_() + try: + return self.index[path] + except KeyError: + self.index[path] = len(self.string) + self.string.append(path) + return len(self.string) - 1 + + def String(self, i): + """Returns the path such that Index(path) == i.""" + assert self._Invariant_() + assert 0 < i < self.Length() + return self.string[i] + + def Length(self): + """One more than the number of elements indexed.""" + assert self._Invariant_() + return len(self.string) + + +class DirectoryMapToIndex(MapToIndex): + """Like a normal MapToIndex, but assumes the keys are directories, + and in particular, directories without a trailing slash (eg "/etc"). + It stores the directories in the map, but appends the trailing slash + first. This is another type of normalization, and useful for cheap + path-joining (eg using + instead of os.path.join). + """ + + def Index(self, directory): + """Return index d > 0 of normalized directory. + Argument: + directory: a string, either empty or not ending in '/'. + + The empty string is not changed, but other strings are stored with + a '/' appended. + """ + if directory != "" and directory != "/": + assert directory[-1] != '/', directory + directory = directory + '/' + return MapToIndex.Index(self, directory) + + +class RelpathMapToIndex(MapToIndex): + """Like a normal MapToIndex, but assumes the keys are relative + filesystem paths, that is, filesystem paths not starting with /. + This is useful for "cheap" normalization: this invariant ensures that + os.path.join(some-directorymap-string, some-relpathmap-string) can + be implemented using +. + """ + + def Index(self, relpath): + """Return index d > 0 of relative path. + Argument: + directory: a string not starting with /. + """ + if os.path.isabs(relpath): + raise NotCoveredError("Filepath must be relative but isn't: '%s'." % + relpath, + send_email=False) + # Now, remove leading "./" so as not to start an infinite regression when + # say foo.c contains: + # + # #include "./foo.c" + # + # which mighy seduce a recursive include analyzer down the forbidden path: + # + # "foo.c", # "./foo.c", "././foo.c." etc. + while relpath.startswith("./"): + relpath = relpath[2:] + return MapToIndex.Index(self, relpath) + + +class CanonicalMapToIndex(MapToIndex): + """Like a normal MapToIndex, but assumes the keys are absolute + filepaths, and canonicalizes them before inserting into the map. + 'Canonicalize' means to do the equivalent of os.path.realpath(), + which mostly involves resolving symlinks in the filepath. + """ + + def __init__(self, canonicalize): + """Constructor. + Argument: + canonicalize: an instance of the CanonicalPath cache.""" + MapToIndex.__init__(self) + self.canonicalize = canonicalize + + def Index(self, filepath): + """Return the realpath index r of filepath. filepath should be + an absolute filename. + """ + return MapToIndex.Index(self, self.canonicalize(filepath)) + + +def RetrieveDirectoriesExceptSys(directory_map, realpath_map, + systemdir_prefix_cache, directory_idxs): + """Calculate the set of non-system directories of an index list. + + Arguments: + directory_map: a DirectoryMapToIndex cache + realpath_map: a CanonicalMapToIndex cache + directory_idxs: a list or tuple of directory_map indices + Returns: + the corresponding tuple of directories except for those whose + realpath has a prefix that is a sysdir + + The directories in the returned list have their trailing '/' + stripped. + """ + result = [] + for dir_idx in directory_idxs: + # Index the absolute path; this will let us know whether dir_idx is under a + # default systemdir of the compiler. + rp_idx = realpath_map.Index(os.path.join( + os.getcwd(), directory_map.string[dir_idx])) + systemdir_prefix_cache.FillCache(realpath_map) + if not systemdir_prefix_cache.cache[rp_idx]: + result.append(directory_map.string[dir_idx].rstrip('/')) + return tuple(result) + + +#### +#### THE STAT CACHES +#### + +class SimpleBuildStat(object): + """Stat cache that works with strings, not indices.""" + + def __init__(self): + self.cache = {} + + def Lookup(self, filepath): + """Returns true if filepath exists.""" + try: + return self.cache[filepath] + except KeyError: + result = self.cache[filepath] = _OsPathExists(filepath) + return result + + +class BuildStatCache(object): + """A highly optimized mechanism for stat queries of filepaths, + as represented by a triple of indexes: currdir_idx, searchdir_idx, + filepath_idx. Given this input, we can say whether a regular file + represented by this triple exists on the filesystem, and if so, + what its canonical pathname is: that is, the pathname after all + symlinks have been resolved. + + The hash table is three-level structure: + - build_stat[currdir_idx] contains an array for each includepath_idx + - build_stat[currdir_idx][includepath_idx] is this array, and + - build_stat[currdir_idx][includepath_idx][searchdir_idx] is either + * False if os.path.join(currdir, searchdir, includepath) does not exist + * True if it does + * None when it is not known whether it exists or not + In addition, we keep a parallel structure for the realpath, that lets us + quickly map from a filepath to os.path.realpath(filepath). + - real_stat[currdir_idx] contains an array for each fp + - real_stat[currdir_idx][includepath_idx] is this array, and + - real_stat[currdir_idx][includepath_idx][searchdir_idx] is either + * realpath_idx, such that realpath_map.string[realpath_idx] = + os.path.realpath(os.path.join(currdir, searchdir, includepath)) + when build_stat[currdir_idx][includepath_idx][searchdir_idx] = True + * None, otherwise + """ + + def __init__(self, includepath_map, directory_map, realpath_map): + self.build_stat = {} + self.real_stat = {} + self.includepath_map = includepath_map + self.directory_map = directory_map + self.realpath_map = realpath_map + self.bad_realpath_tuples = [] + + def _Verify(self, currdir_idx, searchdir_idx, includepath_idx): + """Verify that the cached result is the same as obtained by stat call. + Prerequisite: we've done a chdir(currdir) before this call. + """ + assert 1 <= includepath_idx < self.includepath_map.Length() + assert 1 <= searchdir_idx < self.directory_map.Length() + if __debug__: statistics.sys_stat_counter += 1 + + # Since we know directory_map entries end in /, and includepaths don't + # start with / (who does "#include </usr/include/string.h>"??), we can + # use + instead of the more expensive os.path.join(). + # Make sure $PWD is currdir, so we don't need to include it in our stat(). + assert os.getcwd() + '/' == self.directory_map.string[currdir_idx] + really_exists = _OsPathIsFile( + self.directory_map.string[searchdir_idx] + + self.includepath_map.string[includepath_idx]) + cache_exists = self.build_stat[currdir_idx][includepath_idx][searchdir_idx] + assert isinstance(cache_exists, bool) + if cache_exists != really_exists: + filepath = os.path.join(self.directory_map.string[currdir_idx], + self.directory_map.string[searchdir_idx], + self.includepath_map.string[includepath_idx]) + sys.exit("FATAL ERROR: " + "Cache inconsistency: '%s' %s, but earlier this path %s." % ( + filepath, + really_exists and "exists" or "does not exist", + cache_exists and "existed" or "did not exist")) + + def GetAndClearBadResolutions(self): + """Return descriptions of resolutions matching realpath warning regexp. + Returns: + a list of tuples of the form + (include_path, resolved_path, real_path) + where include_path is the argument of an #include (or an initial file), + resolved_path is the concatenation of the searchdir where the + include_path is found with this include_path, and real_path is the + realpath of this location (relative to the current directory). + The list is reset to [] with this operation. Thus the items returned are + those that occcurred since the last time this method was invoked. + """ + bad_realpath_tuples = self.bad_realpath_tuples + self.bad_realpath_tuples = [] + return bad_realpath_tuples + + def Resolve(self, includepath_idx, currdir_idx, searchdir_idx, + searchlist_idxs): + """Says whether (currdir_idx, searchdir_idx, includepath_idx) exists, + and if so what its canonicalized form is (with symlinks resolved). + TODO(csilvers): rearrange the order of the arguments. + + Args: + includepath_idx: The index of an includepath, from e.g. "#include <foo>" + currdir_idx: The index of the current working dir. Note that we + require os.getcwd() == currdir before calling Resolve! + searchdir_idx: A single searchdir, which is prepended to searchlist, + or None to not prepend to the searchlist. + searchlist_idxs: A list of directory indices. + + Returns: + 1) (None, None) if, for all sl_idx in [searchdir_idx] + searchlist_idxs, + os.path.join(currdir, sp, includepath) does not exist. + 2) ((sl_idx, includepath_idx), realpath_idx) + if, for some sl_idx in [searchdir_idx] + searchlist_idxs, + os.path.join(currdir, sp, includepath) does exist. In this case, + sl_idx is the index of the first searchlist entry for which the + exists-test succeeds, and realpath_idx is the index into the + realpath_map of os.path.join(currdir, sp, includepath). + + Again, we require as a prequesite that os.getcwd() must equal currdir: + os.getcwd() + '/' == self.directory_map.string[currdir_idx] + """ + includepath = self.includepath_map.string[includepath_idx] + dir_map_string = self.directory_map.string # memoize the fn pointer + build_stat = self.build_stat + real_stat = self.real_stat + if __debug__: + dir_map = self.directory_map + assert 0 < includepath_idx < self.includepath_map.Length() + assert 0 < currdir_idx < dir_map.Length() + assert searchdir_idx is None or 1 <= searchdir_idx < dir_map.Length() + for sl_idx in searchlist_idxs: + assert sl_idx < dir_map.Length() + assert os.getcwd() + '/' == dir_map_string[currdir_idx], ( + "'%s/' != '%s'" % (os.getcwd(), dir_map_string[currdir_idx])) + Debug(DEBUG_TRACE2, "Resolve: includepath: '%s', currdir: '%s', " + "searchdir: '%s', searchlist: %s" % + (includepath, + dir_map_string[currdir_idx], + searchdir_idx and dir_map_string[searchdir_idx], + " \n".join([dir_map_string[idx] for idx in searchlist_idxs]))) + try: + # Locate the array (list) relative to currdir_idx and includepath_idx + searchdir_stats = build_stat[currdir_idx][includepath_idx] + # Locate the corresponding array of realpath names + searchdir_realpaths = real_stat[currdir_idx][includepath_idx] + except KeyError: # We'll need to grow the relevant arrays + currdir_stats = build_stat.setdefault(currdir_idx, {}) + currdir_realpaths = real_stat.setdefault(currdir_idx, {}) + searchdir_stats = currdir_stats[includepath_idx] = \ + [None] * DIR_ARRAY_SIZE + searchdir_realpaths = currdir_realpaths[includepath_idx] = \ + [None] * DIR_ARRAY_SIZE + + # Try searchdir_idx if not None, then try every index in searchlist_idxs. + # This inner loop may be executed tens of millions of times. + # Do not try to form [searchdir_idx] + searchlist_idxs -- too expensive! + for searchlist in (searchdir_idx and [searchdir_idx] or [], + searchlist_idxs): + for sl_idx in searchlist: + if __debug__: + statistics.search_counter += 1 + statistics.build_stat_counter += 1 + try: + # We expect that searchdir_stats[sl_idx] == False, because + # we've usually seen sl_idx before for our includepath and + # our currdir --- and includepath does not usually exist + # relative to the sp directory. We're optimizing for this + # case of course. That should give us a rate of a couple of + # million iterations per second (for this case). + if searchdir_stats[sl_idx] == False: + if __debug__: self._Verify(currdir_idx, sl_idx, includepath_idx) + continue + if searchdir_stats[sl_idx]: + if __debug__: self._Verify(currdir_idx, sl_idx, includepath_idx) + return ((sl_idx, includepath_idx), searchdir_realpaths[sl_idx]) + except IndexError: # DIR_ARRAY_SIZE wasn't big enough; let's double + searchdir_stats.extend([None] * max(sl_idx, len(searchdir_stats))) + searchdir_realpaths.extend([None] * max(sl_idx, len(searchdir_stats))) + + # If we get here, result is not cached yet. + if __debug__: statistics.sys_stat_counter += 1 + # We do not explictly take into account currdir_idx, because + # of the check above that os.getcwd is set to current_dir. + relpath = dir_map_string[sl_idx] + includepath + if _OsPathIsFile(relpath): + searchdir_stats[sl_idx] = True + rpath = os.path.join(dir_map_string[currdir_idx], relpath) + realpath_idx = searchdir_realpaths[sl_idx] = ( + self.realpath_map.Index(rpath)) + # This is the place to catch errant files according to user defined + # abspath_warning option. See documentation for method + # GetAndClearBadResolutions. + if basics.opt_realpath_warning_re: + realpath = self.realpath_map.string[realpath_idx] + if basics.opt_realpath_warning_re.search(realpath): + self.bad_realpath_tuples.append((includepath, relpath, realpath)) + return ((sl_idx, includepath_idx), realpath_idx) + else: + searchdir_stats[sl_idx] = False + + if __debug__: Debug(DEBUG_TRACE2, "Resolve: failed") + return (None, None) + + +class SetUpCaches(object): + """Erect the edifice of caches. + + Instance variables: + includepath_map: RelpathMapToIndex + directory_map: DirectoryMapToIndex + realpath_map: CanonicalMapToIndex + + canonical_path: CanonicalPath + build_stat_cache: BuildStatCache + dirname_cache: DirnameCache + simple_build_stat: SimpleBuildStat + + IsFilepathIndex: test for filepath index + IsDirectoryIndex: test for director index + IsRealpathIndex: test for realpath index + IsFilepathPair: test for filepath pair + """ + + def __init__(self): + # A memoizing (caching) class to canonicalize a path: mostly by + # resolving any symlinks in the path-component. + self.canonical_path = CanonicalPath() + + # The index-map for includepath names: things seen after '#include'. + self.includepath_map = RelpathMapToIndex() + + # The index-map for searchdir names and currdir as well. Also used any + # other time we have something we know is a directory (eg dirname(foo)). + self.directory_map = DirectoryMapToIndex() + + # The index-map for realpaths: the full pathname of an include, with + # symlinks resolved and such (hence the name realpath). + self.realpath_map = CanonicalMapToIndex(self.canonical_path.Canonicalize) + + # A cache of the directory part of filepaths. Note it uses the + # directory_map to actually store the mapping. + self.dirname_cache = DirnameCache(self.includepath_map, self.directory_map, + self.realpath_map) + + # A cache of whether a realpath starts with a system searchdir or + # not. Note: at this time, system_dirs_default_all will be empty. + # It will get filled via processing in parse_command.py. This is + # why we need to store the compiler_defaults instance, to make + # sure "our" system_dirs_default_all is updated. + # TODO(csilvers): get rid of this once prefix_cache TODO is cleaned up + self.compiler_defaults = compiler_defaults.CompilerDefaults( + self.canonical_path.Canonicalize) + self.systemdir_prefix_cache = SystemdirPrefixCache( + self.compiler_defaults.system_dirs_default_all) + + # The main caches, that say whether a file exists or not. We have + # two: a simple one that takes a filepath (string) as an argument, + # and the complicated one that works with index-triples. + self.simple_build_stat = SimpleBuildStat() + self.build_stat_cache = BuildStatCache(self.includepath_map, + self.directory_map, + self.realpath_map) + + # Convenient function closures to test for various semantic datatypes. + self.IsIncludepathIndex = (lambda x: + isinstance(x, int) + and 0 < x < self.includepath_map.Length()) + + self.IsSearchdirIndex = (lambda x: + isinstance(x, int) + and 0 < x < self.directory_map.Length()) + + self.IsCurrdirIndex = (lambda x: + isinstance(x, int) + and 0 < x < self.directory_map.Length()) + + self.IsFilepathPair = (lambda x: + isinstance(x, tuple) + and len(x) == 2 + and self.IsSearchdirIndex(x[0]) + and self.IsIncludepathIndex(x[1])) + + self.IsRealpathIndex = (lambda x: + isinstance(x, int) + and 0 < x < self.realpath_map.Length()) |