Add 'filesystem_root' and 'filesystem_writable_paths' settings

This is the last bit of API needed to be usable by YBD and Morph.
author: Sam Thursfield <sam.thursfield@codethink.co.uk> 2015-05-27 15:51:58 +0100
committer: Sam Thursfield <sam.thursfield@codethink.co.uk> 2015-05-27 15:53:33 +0100
commit: dd8a2fb16b4d43949f9e3e23e5283cdf6fa66cf4 (patch)
tree: 25a6bc41e1eb189082cc2ce5202e76225806a692 /sandboxlib
parent: 74f78c208f33824bd06674a1f99f8f7cfa98a4d5 (diff)
download: sandboxlib-dd8a2fb16b4d43949f9e3e23e5283cdf6fa66cf4.tar.gz
3 files changed, 140 insertions, 12 deletions
diff --git a/sandboxlib/__init__.py b/sandboxlib/__init__.py
index 325fdd0..18c79b0 100644
--- a/sandboxlib/__init__.py
+++ b/sandboxlib/__init__.py
@@ -59,8 +59,6 @@ def run_sandbox(rootfs_path, command, cwd=None, extra_env=None,
     '''Run 'command' in a sandboxed environment.
 
     Parameters:
-      - rootfs_path: the path to the root of the sandbox. Can be '/', if you
-            don't want to isolate the command from the host filesystem at all.
       - command: the command to run. Pass a list of parameters rather than
             using spaces to separate them, e.g. ['echo', '"Hello world"'].
       - cwd: the working directory of 'command', relative to 'rootfs_path'.
@@ -68,6 +66,16 @@ def run_sandbox(rootfs_path, command, cwd=None, extra_env=None,
             directory of the calling process otherwise.
       - extra_env: environment variables to set in addition to
             BASE_ENVIRONMENT.
+      - filesystem_root: the path to the root of the sandbox. Defaults to '/',
+            which doesn't isolate the command from the host filesystem at all.
+      - filesystem_writable_paths: defaults to 'all', which allows the command
+            to write to anywhere under 'filesystem_root' that the user of the
+            calling process could write to. Backends may accept a list of paths
+            instead of 'all', and will prevent writes to any files not under a
+            path in that whitelist. If 'none' or an empty list is passed, the
+            whole file-system will be read-only. The paths should be relative
+            to filesystem_root. This will processed /after/ extra_mounts are
+            mounted.
       - mounts: configures mount sharing. Defaults to 'undefined', where no
             no attempt is made to isolate mounts. Backends may support
             'isolated' as well.
diff --git a/sandboxlib/chroot.py b/sandboxlib/chroot.py
index 95036b4..530e667 100644
--- a/sandboxlib/chroot.py
+++ b/sandboxlib/chroot.py
@@ -76,6 +76,10 @@ def process_network_config(network):
         "Network sharing cannot be be configured in this backend." % network
 
 
+def process_writable_paths(fs_root, writable_paths):
+    assert writable_paths == 'all'
+
+
 def mount(source, path, mount_type, mount_options):
     # We depend on the host system's 'mount' program here, which is a
     # little sad. It's possible to call the libc's mount() function
@@ -123,7 +127,7 @@ def mount_all(rootfs_path, mount_info_list):
             unmount(mountpoint)
 
 
-def run_command_in_chroot(pipe, extra_mounts, rootfs_path, command, cwd, env):
+def run_command_in_chroot(pipe, extra_mounts, chroot_path, command, cwd, env):
     # This function should be run in a multiprocessing.Process() subprocess,
     # because it calls os.chroot(). There's no 'unchroot()' function! After
     # chrooting, it calls sandboxlib._run_command(), which uses the
@@ -142,7 +146,7 @@ def run_command_in_chroot(pipe, extra_mounts, rootfs_path, command, cwd, env):
         # work.
 
         try:
-            os.chroot(rootfs_path)
+            os.chroot(chroot_path)
         except OSError as e:
             raise RuntimeError("Unable to chroot: %s" % e)
 
@@ -162,7 +166,8 @@ def run_command_in_chroot(pipe, extra_mounts, rootfs_path, command, cwd, env):
     os._exit(result)
 
 
-def run_sandbox(rootfs_path, command, cwd=None, extra_env=None,
+def run_sandbox(command, cwd=None, extra_env=None,
+                filesystem_root='/', filesystem_writable_paths='all',
                 mounts='undefined', extra_mounts=None,
                 network='undefined'):
     if type(command) == str:
@@ -174,12 +179,14 @@ def run_sandbox(rootfs_path, command, cwd=None, extra_env=None,
 
     process_network_config(network)
 
+    process_writable_paths(filesystem_root, filesystem_writable_paths)
+
     pipe_parent, pipe_child = multiprocessing.Pipe()
 
-    with mount_all(rootfs_path, extra_mounts):
+    with mount_all(filesystem_root, extra_mounts):
         process = multiprocessing.Process(
             target=run_command_in_chroot,
-            args=(pipe_child, extra_mounts, rootfs_path, command, cwd, env))
+            args=(pipe_child, extra_mounts, filesystem_root, command, cwd, env))
         process.start()
         process.join()
 
diff --git a/sandboxlib/linux_user_chroot.py b/sandboxlib/linux_user_chroot.py
index dc8c4e8..c0715c7 100644
--- a/sandboxlib/linux_user_chroot.py
+++ b/sandboxlib/linux_user_chroot.py
@@ -33,6 +33,9 @@ Supported mounts settings: 'undefined', 'isolated'.
 
 Supported network settings: 'undefined', 'isolated'.
 
+Much of this code is adapted from Morph, from the Baserock project, from code
+written by Joe Burmeister, Richard Maw, Lars Wirzenius and others.
+
 '''
 
 
@@ -117,7 +120,8 @@ def process_mount_config(root, mounts, extra_mounts):
         path = os.path.join(root, mount_point)
         if not os.path.exists(path):
             os.makedirs(path)
-        mount_script_args.extend((mount_point, mount_type, source, mount_options))
+        mount_script_args.extend((mount_point, mount_type, source,
+                                  mount_options))
     mount_script_args.append('--')
 
     mount_script += textwrap.dedent(r'''
@@ -151,7 +155,112 @@ def process_network_config(network):
     return extra_linux_user_chroot_args
 
 
-def run_sandbox(rootfs_path, command, cwd=None, extra_env=None,
+# This function is mostly taken from Morph, from the Baserock project, from
+# file morphlib/fsutils.py.
+#
+# It is used to convert the whitelist 'filesystem_writable_paths' into a
+# blacklist of '--mount-readonly' arguments for linux-user-chroot. It would
+# be better if we could pass the whitelist into linux-user-chroot itself,
+# all that is needed is a patch to linux-user-chroot.
+def invert_paths(tree_walker, paths):
+    '''List paths from `tree_walker` that are not in `paths`.
+
+    Given a traversal of a tree and a set of paths separated by os.sep,
+    return the files and directories that are not part of the set of
+    paths, culling directories that do not need to be recursed into,
+    if the traversal supports this.
+
+    `tree_walker` is expected to follow similar behaviour to `os.walk()`.
+
+    This function will remove directores from the ones listed, to avoid
+    traversing into these subdirectories, if it doesn't need to.
+
+    As such, if a directory is returned, it is implied that its contents
+    are also not in the set of paths.
+
+    If the tree walker does not support culling the traversal this way,
+    such as `os.walk(root, topdown=False)`, then the contents will also
+    be returned.
+
+    The purpose for this is to list the directories that can be made
+    read-only, such that it would leave everything in paths writable.
+
+    Each path in `paths` is expected to begin with the same path as
+    yielded by the tree walker.
+
+    '''
+
+    def normpath(path):
+        if path == '.':
+            return path
+        path = os.path.normpath(path)
+        if not os.path.isabs(path):
+            path = os.path.join('.', path)
+        return path
+    def any_paths_are_subpath_of(prefix):
+        prefix = normpath(prefix)
+        norm_paths = (normpath(path) for path in paths)
+        return any(path[:len(prefix)] == prefix
+                   for path in norm_paths)
+
+    def path_is_listed(path):
+        return any(normpath(path) == normpath(other)
+                   for other in paths)
+
+    for dirpath, dirnames, filenames in tree_walker:
+
+        if path_is_listed(dirpath):
+            # No subpaths need to be considered
+            del dirnames[:]
+            del filenames[:]
+        elif any_paths_are_subpath_of(dirpath):
+            # Subpaths may be marked, or may not, need to leave this
+            # writable, so don't yield, but we don't cull.
+            pass
+        else:
+            # not listed as a parent or an exact match, needs to be
+            # yielded, but we don't need to consider subdirs, so can cull
+            yield dirpath
+            del dirnames[:]
+            del filenames[:]
+
+        for filename in filenames:
+            fullpath = os.path.join(dirpath, filename)
+            if path_is_listed(fullpath):
+                pass
+            else:
+                yield fullpath
+
+
+def process_writable_paths(fs_root, writable_paths):
+    if writable_paths == 'all':
+        extra_linux_user_chroot_args = []
+    else:
+        if type(writable_paths) != list:
+            assert writable_paths == 'none'
+            writable_paths = []
+
+        # FIXME: It's rather annoying that we have to convert the
+        # 'writable_paths' whitelist into a blacklist of '--mount-readonly'
+        # arguments. It's also possible to break here by making a commandline
+        # that is too long, if 'fs_root' contains many directories.
+
+        extra_linux_user_chroot_args = []
+
+        absolute_writable_paths = [
+            os.path.join(fs_root, path.lstrip('/')) for path in writable_paths]
+
+        for d in invert_paths(os.walk(fs_root), absolute_writable_paths):
+            if not os.path.islink(d):
+                rel_path = '/' + os.path.relpath(d, fs_root)
+                extra_linux_user_chroot_args.extend(
+                    ['--mount-readonly', rel_path])
+
+    return extra_linux_user_chroot_args
+
+
+def run_sandbox(command, cwd=None, extra_env=None,
+                filesystem_root='/', filesystem_writable_paths='all',
                 mounts='undefined', extra_mounts=None,
                 network='undefined'):
     if type(command) == str:
@@ -160,16 +269,20 @@ def run_sandbox(rootfs_path, command, cwd=None, extra_env=None,
     linux_user_chroot_command = ['linux-user-chroot']
 
     unshare_command = process_mount_config(
-        root=rootfs_path, mounts=mounts, extra_mounts=extra_mounts or [])
+        root=filesystem_root, mounts=mounts, extra_mounts=extra_mounts or [])
 
     linux_user_chroot_command += process_network_config(network)
 
     if cwd is not None:
         linux_user_chroot_command.extend(['--chdir', cwd])
 
+    linux_user_chroot_command += process_writable_paths(
+        filesystem_root, filesystem_writable_paths)
+
+    linux_user_chroot_command.append(filesystem_root)
+
     env = sandboxlib.environment_vars(extra_env)
 
-    argv = (
-        unshare_command + linux_user_chroot_command + [rootfs_path] + command)
+    argv = (unshare_command + linux_user_chroot_command + command)
     exit, out, err = sandboxlib._run_command(argv, env=env)
     return exit, out, err
author	Sam Thursfield <sam.thursfield@codethink.co.uk>	2015-05-27 15:51:58 +0100
committer	Sam Thursfield <sam.thursfield@codethink.co.uk>	2015-05-27 15:53:33 +0100
commit	dd8a2fb16b4d43949f9e3e23e5283cdf6fa66cf4 (patch)
tree	25a6bc41e1eb189082cc2ce5202e76225806a692 /sandboxlib
parent	74f78c208f33824bd06674a1f99f8f7cfa98a4d5 (diff)
download	sandboxlib-dd8a2fb16b4d43949f9e3e23e5283cdf6fa66cf4.tar.gz