summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTristan Maat <tristan.maat@codethink.co.uk>2018-03-09 10:55:25 +0100
committerTristan Maat <tristan.maat@codethink.co.uk>2018-03-27 10:58:18 +0100
commitb606fd14e1f9ec12c9e5ef20b4bc5f499792a496 (patch)
tree3bb43d8bae326d81b454d479ec1736f7575be7e7
parentb0e6a45e7d34ef969734bc5c607ce97a229a4f77 (diff)
downloadbuildstream-b606fd14e1f9ec12c9e5ef20b4bc5f499792a496.tar.gz
_ostree.py: Add diff_dirs function
-rw-r--r--buildstream/_ostree.py167
1 files changed, 167 insertions, 0 deletions
diff --git a/buildstream/_ostree.py b/buildstream/_ostree.py
index 0abb51da8..b3e19e27d 100644
--- a/buildstream/_ostree.py
+++ b/buildstream/_ostree.py
@@ -27,6 +27,7 @@
# pylint: disable=bad-exception-context,catching-non-exception
import os
+from collections import namedtuple
import gi
from gi.repository.GLib import Variant, VariantDict
@@ -243,6 +244,172 @@ def checksum(repo, ref):
return checksum_
+OSTREE_GIO_FAST_QUERYINFO = ("standard::name,standard::type,standard::size,"
+ "standard::is-symlink,standard::symlink-target,"
+ "unix::device,unix::inode,unix::mode,unix::uid,"
+ "unix::gid,unix::rdev")
+
+
+DiffItem = namedtuple('DiffItem', ['src', 'src_info',
+ 'target', 'target_info',
+ 'src_checksum', 'target_checksum'])
+
+
+# diff_dirs():
+#
+# Compute the difference between directory a and b as 3 separate sets
+# of OSTree.DiffItem.
+#
+# This is more-or-less a direct port of OSTree.diff_dirs (which cannot
+# be used via PyGobject), but does not support options.
+#
+# Args:
+# a (Gio.File): The first directory for the comparison.
+# b (Gio.File): The second directory for the comparison.
+#
+# Returns:
+# (modified, removed, added)
+#
+def diff_dirs(a, b):
+ # get_file_checksum():
+ #
+ # Helper to compute the checksum of an arbitrary file (different
+ # objects have different methods to compute these).
+ #
+ def get_file_checksum(f, f_info):
+ if isinstance(f, OSTree.RepoFile):
+ return f.get_checksum()
+ else:
+ contents = None
+ if f_info.get_file_type() == Gio.FileType.REGULAR:
+ contents = f.read()
+
+ csum = OSTree.checksum_file_from_input(f_info, None, contents,
+ OSTree.ObjectType.FILE)
+ return OSTree.checksum_from_bytes(csum)
+
+ # diff_files():
+ #
+ # Helper to compute a diff between two files.
+ #
+ def diff_files(a, a_info, b, b_info):
+ checksum_a = get_file_checksum(a, a_info)
+ checksum_b = get_file_checksum(b, b_info)
+
+ if checksum_a != checksum_b:
+ return DiffItem(a, a_info, b, b_info, checksum_a, checksum_b)
+
+ return None
+
+ # diff_add_dir_recurse():
+ #
+ # Helper to collect all files in a directory recursively.
+ #
+ def diff_add_dir_recurse(d):
+ added = []
+
+ dir_enum = d.enumerate_children(OSTREE_GIO_FAST_QUERYINFO,
+ Gio.FileQueryInfoFlags.NOFOLLOW_SYMLINKS)
+
+ for child_info in dir_enum:
+ name = child_info.get_name()
+ child = d.get_child(name)
+ added.append(child)
+
+ if child_info.get_file_type() == Gio.FileType.DIRECTORY:
+ added.extend(diff_add_dir_recurse(child))
+
+ return added
+
+ modified = []
+ removed = []
+ added = []
+
+ child_a_info = a.query_info(OSTREE_GIO_FAST_QUERYINFO,
+ Gio.FileQueryInfoFlags.NOFOLLOW_SYMLINKS)
+ child_b_info = b.query_info(OSTREE_GIO_FAST_QUERYINFO,
+ Gio.FileQueryInfoFlags.NOFOLLOW_SYMLINKS)
+
+ # If both are directories and have the same checksum, we know that
+ # none of the underlying files changed, so we can save time.
+ if (child_a_info.get_file_type() == Gio.FileType.DIRECTORY and
+ child_b_info.get_file_type() == Gio.FileType.DIRECTORY and
+ isinstance(a, OSTree.RepoFileClass) and
+ isinstance(b, OSTree.RepoFileClass)):
+ if a.tree_get_contents_checksum() == b.tree_get_contents_checksum():
+ return modified, removed, added
+
+ # We walk through 'a' first
+ dir_enum = a.enumerate_children(OSTREE_GIO_FAST_QUERYINFO,
+ Gio.FileQueryInfoFlags.NOFOLLOW_SYMLINKS)
+ for child_a_info in dir_enum:
+ name = child_a_info.get_name()
+
+ child_a = a.get_child(name)
+ child_a_type = child_a_info.get_file_type()
+
+ try:
+ child_b = b.get_child(name)
+ child_b_info = child_b.query_info(OSTREE_GIO_FAST_QUERYINFO,
+ Gio.FileQueryInfoFlags.NOFOLLOW_SYMLINKS)
+ except GLib.Error as e:
+ # If the file does not exist in b, it has been removed
+ if e.matches(Gio.io_error_quark(), Gio.IOErrorEnum.NOT_FOUND):
+ removed.append(child_a)
+ continue
+ else:
+ raise
+
+ # If the files differ but are of different types, we report a
+ # modification, saving a bit of time because we won't need a
+ # checksum
+ child_b_type = child_b_info.get_file_type()
+ if child_a_type != child_b_type:
+ diff_item = DiffItem(child_a, child_a_info,
+ child_b, child_b_info,
+ None, None)
+ modified.append(diff_item)
+ # Finally, we compute checksums and compare the file contents directly
+ else:
+ diff_item = diff_files(child_a, child_a_info, child_b, child_b_info)
+
+ if diff_item:
+ modified.append(diff_item)
+
+ # If the files are both directories, we recursively use
+ # this function to find differences - saving time if they
+ # are equal.
+ if child_a_type == Gio.FileType.DIRECTORY:
+ subdir = diff_dirs(child_a, child_b)
+ modified.extend(subdir[0])
+ removed.extend(subdir[1])
+ added.extend(subdir[2])
+
+ # Now we walk through 'b' to find any files that were added
+ dir_enum = b.enumerate_children(OSTREE_GIO_FAST_QUERYINFO,
+ Gio.FileQueryInfoFlags.NOFOLLOW_SYMLINKS)
+ for child_b_info in dir_enum:
+ name = child_b_info.get_name()
+
+ child_b = b.get_child(name)
+
+ try:
+ child_a = a.get_child(name)
+ child_a_info = child_a.query_info(OSTREE_GIO_FAST_QUERYINFO,
+ Gio.FileQueryInfoFlags.NOFOLLOW_SYMLINKS)
+ except GLib.Error as e:
+ # If the file does not exist in 'a', it was added.
+ if e.matches(Gio.io_error_quark(), Gio.IOErrorEnum.NOT_FOUND):
+ added.append(child_b)
+ if child_b_info.get_file_type() == Gio.FileType.DIRECTORY:
+ added.extend(diff_add_dir_recurse(child_b))
+ continue
+ else:
+ raise
+
+ return modified, removed, added
+
+
# fetch()
#
# Fetch new objects from a remote, if configured