diff options
author | Sam Thursfield <sam.thursfield@codethink.co.uk> | 2018-01-19 13:56:52 +0000 |
---|---|---|
committer | Jürg Billeter <j@bitron.ch> | 2018-02-12 10:26:18 +0000 |
commit | 0f430622e4539b5cc9209384e32980dc086c29df (patch) | |
tree | 0a96f8469389ee365c9ba0c6eeb5fc0699d6e211 | |
parent | 5e8dab02a7267aa60e574460a2a52c31a4e766f1 (diff) | |
download | buildstream-0f430622e4539b5cc9209384e32980dc086c29df.tar.gz |
utils.py: Wrap calls to os.path.realpath() in an LRU cache
The os.path.realpath() function is expensive and we call it many times,
to the point that os.path.realpath() calls make up around 40% of the
total time spent in Element.stage_artifact().
The cleanest way to fix this is with a `functools.lru_cache()` wrapper
that caches recently used values. None of the code in question can be
removed (as the tests added in the previous commit will demonstrate).
I tested this by running `bst shell base/base-system.bst true` in
the GNOME modulesets project.
o Without this patch there are 240,019 calls os.path.realpath()
o With this patch there are 10,379 calls to os.path.realpath()
o If we increase the cache size to 128 items, there are 10,359 calls
to os.path.realpath().
o If we reduce the cache size to 32 items, there are 10,426 calls.
o In all cases the number of *unique* calls is 10,327.
This fixes issue #174.
-rw-r--r-- | buildstream/utils.py | 24 |
1 files changed, 15 insertions, 9 deletions
diff --git a/buildstream/utils.py b/buildstream/utils.py index da67f9c10..ff36a8c1b 100644 --- a/buildstream/utils.py +++ b/buildstream/utils.py @@ -34,6 +34,7 @@ import string import subprocess import tempfile import itertools +import functools from contextlib import contextmanager import psutil @@ -560,6 +561,11 @@ def _copy_directories(srcdir, destdir, target): 'directory expected: {}'.format(old_dir)) +@functools.lru_cache(maxsize=64) +def _resolve_symlinks(path): + return os.path.realpath(path) + + def _ensure_real_directory(root, destpath): # The realpath in the sandbox may refer to a file outside of the # sandbox when any of the direcory branches are a symlink to an @@ -568,12 +574,12 @@ def _ensure_real_directory(root, destpath): # This should not happen as we rely on relative_symlink_target() below # when staging the actual symlinks which may lead up to this path. # - realpath = os.path.realpath(destpath) - if not realpath.startswith(os.path.realpath(root)): + destpath_resolved = _resolve_symlinks(destpath) + if not destpath_resolved.startswith(_resolve_symlinks(root)): raise UtilError('Destination path resolves to a path outside ' + 'of the staging area\n\n' + ' Destination path: {}\n'.format(destpath) + - ' Real path: {}'.format(realpath)) + ' Real path: {}'.format(destpath_resolved)) # Ensure the real destination path exists before trying to get the mode # of the real destination path. @@ -582,10 +588,10 @@ def _ensure_real_directory(root, destpath): # refer to non-existing directories, they will be created on demand here # at staging time. # - if not os.path.exists(realpath): - os.makedirs(realpath) + if not os.path.exists(destpath_resolved): + os.makedirs(destpath_resolved) - return realpath + return destpath_resolved # _process_list() @@ -656,7 +662,7 @@ def _process_list(srcdir, destdir, filelist, actionfunc, result, if not os.path.exists(destpath): _ensure_real_directory(destdir, destpath) - dest_stat = os.lstat(os.path.realpath(destpath)) + dest_stat = os.lstat(_resolve_symlinks(destpath)) if not stat.S_ISDIR(dest_stat.st_mode): raise UtilError('Destination not a directory. source has {}' ' destination has {}'.format(srcpath, destpath)) @@ -728,11 +734,11 @@ def _relative_symlink_target(root, symlink, target): # We want a relative path from the directory in which symlink # is located, not from the symlink itself. - symlinkdir, _ = os.path.split(os.path.realpath(symlink)) + symlinkdir, _ = os.path.split(_resolve_symlinks(symlink)) # Create a full path to the target, including the leading staging # directory - fulltarget = os.path.join(os.path.realpath(root), target) + fulltarget = os.path.join(_resolve_symlinks(root), target) # now get the relative path from the directory where the symlink # is located within the staging root, to the target within the same |