diff options
author | Tristan Van Berkom <tristan.vanberkom@codethink.co.uk> | 2018-01-10 19:52:24 +0900 |
---|---|---|
committer | Tristan Van Berkom <tristan.vanberkom@codethink.co.uk> | 2018-01-10 19:57:43 +0900 |
commit | ab05f80db2c558e754e5feb26609fa8e9b6fa07e (patch) | |
tree | 907d9d9a842b04c521e2dff4e737365faa7f6aed /buildstream/utils.py | |
parent | 37562b10f15de2f31f7d78ed8d82100c4fac8cf1 (diff) | |
download | buildstream-ab05f80db2c558e754e5feb26609fa8e9b6fa07e.tar.gz |
utils.py: Make list_relative_paths() report sorted paths.
This is needed because plugins make use of this function to
generate a cache key which must be stable.
In addition to the above, this patch also mitigates the performance
hit of sorting, and allows _process_list() to function to iterate
through the generator when additional sorting is not needed.
This patch is an enhanced version of Sam Thursfield's patch on MR !216
Diffstat (limited to 'buildstream/utils.py')
-rw-r--r-- | buildstream/utils.py | 41 |
1 files changed, 26 insertions, 15 deletions
diff --git a/buildstream/utils.py b/buildstream/utils.py index 2b6c4517c..a811fe2f1 100644 --- a/buildstream/utils.py +++ b/buildstream/utils.py @@ -105,6 +105,11 @@ def list_relative_paths(directory): """ for (dirpath, dirnames, filenames) in os.walk(directory): + # Modifying the dirnames directly ensures that the os.walk() generator + # allows us to specify the order in which they will be iterated. + dirnames.sort() + filenames.sort() + relpath = os.path.relpath(dirpath, directory) # We don't want "./" pre-pended to all the entries in the root of @@ -346,13 +351,15 @@ def copy_files(src, dest, *, files=None, ignore_missing=False, report_written=Fa unless the existing directory in `dest` is not empty in which case the path will be reported in the return value. """ + presorted = False if files is None: files = list_relative_paths(src) + presorted = True result = FileListResult() try: _process_list(src, dest, files, safe_copy, result, ignore_missing=ignore_missing, - report_written=report_written) + report_written=report_written, presorted=presorted) except OSError as e: raise UtilError("Failed to copy '{} -> {}': {}" .format(src, dest, e)) @@ -386,13 +393,15 @@ def link_files(src, dest, *, files=None, ignore_missing=False, report_written=Fa If a hardlink cannot be created due to crossing filesystems, then the file will be copied instead. """ + presorted = False if files is None: files = list_relative_paths(src) + presorted = True result = FileListResult() try: _process_list(src, dest, files, safe_link, result, ignore_missing=ignore_missing, - report_written=report_written) + report_written=report_written, presorted=presorted) except OSError as e: raise UtilError("Failed to link '{} -> {}': {}" .format(src, dest, e)) @@ -594,30 +603,32 @@ def _ensure_real_directory(root, destpath): # actionfunc: The function to call for regular files # result: The FileListResult # ignore_missing: Dont raise any error if a source file is missing +# presorted: Whether the passed list is known to be presorted # # -def _process_list(srcdir, destdir, filelist, actionfunc, result, ignore_missing=False, report_written=False): +def _process_list(srcdir, destdir, filelist, actionfunc, result, + ignore_missing=False, report_written=False, + presorted=False): # Keep track of directory permissions, since these need to be set # *after* files have been written. permissions = [] - # filelist comes in as a generator, and we need to use it more than once. - filelist = list(filelist) - - # Add to the results the list of files written - if report_written: - result.files_written += filelist + # Sorting the list of files is necessary to ensure that we processes + # symbolic links which lead to directories before processing files inside + # those directories. + if not presorted: + filelist = sorted(filelist) - # Note we consume the filelist (which is a generator and not a list) - # by sorting it, this is necessary to ensure that we processes symbolic - # links which lead to directories before processing files inside those - # directories. - # - for path in sorted(filelist): + # Now walk the list + for path in filelist: srcpath = os.path.join(srcdir, path) destpath = os.path.join(destdir, path) + # Add to the results the list of files written + if report_written: + result.files_written.append(path) + # Collect overlaps if os.path.lexists(destpath) and not os.path.isdir(destpath): result.overwritten.append(path) |