summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoffrey F <f.joffrey@gmail.com>2018-02-22 13:35:41 -0800
committerGitHub <noreply@github.com>2018-02-22 13:35:41 -0800
commitcc6e1b12495be792a81bff5d0ccefa01007260b6 (patch)
tree2cffc7c2060505f97296f77ce4f3245003436af2
parentad5f49b690b2e2838ca124291db500d25a3d6cf1 (diff)
parent0c948c7df65b0d7378c3c0c8d966c38171f1ef21 (diff)
downloaddocker-py-cc6e1b12495be792a81bff5d0ccefa01007260b6.tar.gz
Merge pull request #1914 from mefyl/master
Improve .dockerignore compliance
-rw-r--r--docker/utils/build.py207
-rw-r--r--tests/unit/utils_test.py92
2 files changed, 111 insertions, 188 deletions
diff --git a/docker/utils/build.py b/docker/utils/build.py
index a218873..1da56fb 100644
--- a/docker/utils/build.py
+++ b/docker/utils/build.py
@@ -1,20 +1,24 @@
import os
+import re
from ..constants import IS_WINDOWS_PLATFORM
-from .fnmatch import fnmatch
+from fnmatch import fnmatch
+from itertools import chain
from .utils import create_archive
def tar(path, exclude=None, dockerfile=None, fileobj=None, gzip=False):
root = os.path.abspath(path)
exclude = exclude or []
-
return create_archive(
files=sorted(exclude_paths(root, exclude, dockerfile=dockerfile)),
root=root, fileobj=fileobj, gzip=gzip
)
+_SEP = re.compile('/|\\\\') if IS_WINDOWS_PLATFORM else re.compile('/')
+
+
def exclude_paths(root, patterns, dockerfile=None):
"""
Given a root directory path and a list of .dockerignore patterns, return
@@ -23,127 +27,90 @@ def exclude_paths(root, patterns, dockerfile=None):
All paths returned are relative to the root.
"""
+
if dockerfile is None:
dockerfile = 'Dockerfile'
- patterns = [p.lstrip('/') for p in patterns]
- exceptions = [p for p in patterns if p.startswith('!')]
-
- include_patterns = [p[1:] for p in exceptions]
- include_patterns += [dockerfile, '.dockerignore']
-
- exclude_patterns = list(set(patterns) - set(exceptions))
-
- paths = get_paths(root, exclude_patterns, include_patterns,
- has_exceptions=len(exceptions) > 0)
-
- return set(paths).union(
- # If the Dockerfile is in a subdirectory that is excluded, get_paths
- # will not descend into it and the file will be skipped. This ensures
- # it doesn't happen.
- set([dockerfile.replace('/', os.path.sep)])
- if os.path.exists(os.path.join(root, dockerfile)) else set()
- )
-
-
-def should_include(path, exclude_patterns, include_patterns, root):
- """
- Given a path, a list of exclude patterns, and a list of inclusion patterns:
-
- 1. Returns True if the path doesn't match any exclusion pattern
- 2. Returns False if the path matches an exclusion pattern and doesn't match
- an inclusion pattern
- 3. Returns true if the path matches an exclusion pattern and matches an
- inclusion pattern
- """
- for pattern in exclude_patterns:
- if match_path(path, pattern):
- for pattern in include_patterns:
- if match_path(path, pattern):
- return True
- if os.path.isabs(pattern) and match_path(
- os.path.join(root, path), pattern):
- return True
- return False
- return True
-
-
-def should_check_directory(directory_path, exclude_patterns, include_patterns,
- root):
+ def normalize(p):
+ # Leading and trailing slashes are not relevant. Yes,
+ # "foo.py/" must exclude the "foo.py" regular file. "."
+ # components are not relevant either, even if the whole
+ # pattern is only ".", as the Docker reference states: "For
+ # historical reasons, the pattern . is ignored."
+ split = [pt for pt in re.split(_SEP, p) if pt and pt != '.']
+ # ".." component must be cleared with the potential previous
+ # component, regardless of whether it exists: "A preprocessing
+ # step [...] eliminates . and .. elements using Go's
+ # filepath.".
+ i = 0
+ while i < len(split):
+ if split[i] == '..':
+ del split[i]
+ if i > 0:
+ del split[i - 1]
+ i -= 1
+ else:
+ i += 1
+ return split
+
+ patterns = (
+ (True, normalize(p[1:]))
+ if p.startswith('!') else
+ (False, normalize(p))
+ for p in patterns)
+ patterns = list(reversed(list(chain(
+ # Exclude empty patterns such as "." or the empty string.
+ filter(lambda p: p[1], patterns),
+ # Always include the Dockerfile and .dockerignore
+ [(True, dockerfile.split('/')), (True, ['.dockerignore'])]))))
+ return set(walk(root, patterns))
+
+
+def walk(root, patterns, default=True):
"""
- Given a directory path, a list of exclude patterns, and a list of inclusion
- patterns:
-
- 1. Returns True if the directory path should be included according to
- should_include.
- 2. Returns True if the directory path is the prefix for an inclusion
- pattern
- 3. Returns False otherwise
+ A collection of file lying below root that should be included according to
+ patterns.
"""
- # To account for exception rules, check directories if their path is a
- # a prefix to an inclusion pattern. This logic conforms with the current
- # docker logic (2016-10-27):
- # https://github.com/docker/docker/blob/bc52939b0455116ab8e0da67869ec81c1a1c3e2c/pkg/archive/archive.go#L640-L671
-
- def normalize_path(path):
- return path.replace(os.path.sep, '/')
-
- path_with_slash = normalize_path(directory_path) + '/'
- possible_child_patterns = [
- pattern for pattern in map(normalize_path, include_patterns)
- if (pattern + '/').startswith(path_with_slash)
- ]
- directory_included = should_include(
- directory_path, exclude_patterns, include_patterns, root
- )
- return directory_included or len(possible_child_patterns) > 0
-
-
-def get_paths(root, exclude_patterns, include_patterns, has_exceptions=False):
- paths = []
-
- for parent, dirs, files in os.walk(root, topdown=True, followlinks=False):
- parent = os.path.relpath(parent, root)
- if parent == '.':
- parent = ''
-
- # Remove excluded patterns from the list of directories to traverse
- # by mutating the dirs we're iterating over.
- # This looks strange, but is considered the correct way to skip
- # traversal. See https://docs.python.org/2/library/os.html#os.walk
- dirs[:] = [
- d for d in dirs if should_check_directory(
- os.path.join(parent, d), exclude_patterns, include_patterns,
- root
- )
- ]
-
- for path in dirs:
- if should_include(os.path.join(parent, path),
- exclude_patterns, include_patterns, root):
- paths.append(os.path.join(parent, path))
-
- for path in files:
- if should_include(os.path.join(parent, path),
- exclude_patterns, include_patterns, root):
- paths.append(os.path.join(parent, path))
-
- return paths
-
-
-def match_path(path, pattern):
-
- pattern = pattern.rstrip('/' + os.path.sep)
- if pattern and not os.path.isabs(pattern):
- pattern = os.path.relpath(pattern)
-
- pattern_components = pattern.split(os.path.sep)
- if len(pattern_components) == 1 and IS_WINDOWS_PLATFORM:
- pattern_components = pattern.split('/')
-
- if '**' not in pattern:
- path_components = path.split(os.path.sep)[:len(pattern_components)]
- else:
- path_components = path.split(os.path.sep)
- return fnmatch('/'.join(path_components), '/'.join(pattern_components))
+ def match(p):
+ if p[1][0] == '**':
+ rec = (p[0], p[1][1:])
+ return [p] + (match(rec) if rec[1] else [rec])
+ elif fnmatch(f, p[1][0]):
+ return [(p[0], p[1][1:])]
+ else:
+ return []
+
+ for f in os.listdir(root):
+ cur = os.path.join(root, f)
+ # The patterns if recursing in that directory.
+ sub = list(chain(*(match(p) for p in patterns)))
+ # Whether this file is explicitely included / excluded.
+ hit = next((p[0] for p in sub if not p[1]), None)
+ # Whether this file is implicitely included / excluded.
+ matched = default if hit is None else hit
+ sub = list(filter(lambda p: p[1], sub))
+ if os.path.isdir(cur):
+ # Entirely skip directories if there are no chance any subfile will
+ # be included.
+ if all(not p[0] for p in sub) and not matched:
+ continue
+ # I think this would greatly speed up dockerignore handling by not
+ # recursing into directories we are sure would be entirely
+ # included, and only yielding the directory itself, which will be
+ # recursively archived anyway. However the current unit test expect
+ # the full list of subfiles and I'm not 100% sure it would make no
+ # difference yet.
+ # if all(p[0] for p in sub) and matched:
+ # yield f
+ # continue
+ children = False
+ for r in (os.path.join(f, p) for p in walk(cur, sub, matched)):
+ yield r
+ children = True
+ # The current unit tests expect directories only under those
+ # conditions. It might be simplifiable though.
+ if (not sub or not children) and hit or hit is None and default:
+ yield f
+ elif matched:
+ yield f
diff --git a/tests/unit/utils_test.py b/tests/unit/utils_test.py
index e144b7b..8a4b193 100644
--- a/tests/unit/utils_test.py
+++ b/tests/unit/utils_test.py
@@ -23,7 +23,6 @@ from docker.utils import (
decode_json_header, tar, split_command, parse_devices, update_headers,
)
-from docker.utils.build import should_check_directory
from docker.utils.ports import build_port_bindings, split_port
from docker.utils.utils import format_environment
@@ -758,6 +757,13 @@ class ExcludePathsTest(unittest.TestCase):
self.all_paths - set(['foo/a.py'])
)
+ def test_exclude_include_absolute_path(self):
+ base = make_tree([], ['a.py', 'b.py'])
+ assert exclude_paths(
+ base,
+ ['/*', '!/*.py']
+ ) == set(['a.py', 'b.py'])
+
def test_single_subdir_with_path_traversal(self):
assert self.exclude(['foo/whoops/../a.py']) == convert_paths(
self.all_paths - set(['foo/a.py'])
@@ -876,12 +882,25 @@ class ExcludePathsTest(unittest.TestCase):
)
)
- def test_exclude_include_absolute_path(self):
- base = make_tree([], ['a.py', 'b.py'])
+ def test_include_wildcard(self):
+ base = make_tree(['a'], ['a/b.py'])
assert exclude_paths(
base,
- ['/*', '!' + os.path.join(base, '*.py')]
- ) == set(['a.py', 'b.py'])
+ ['*', '!*/b.py']
+ ) == convert_paths(['a/b.py'])
+
+ def test_last_line_precedence(self):
+ base = make_tree(
+ [],
+ ['garbage.md',
+ 'thrash.md',
+ 'README.md',
+ 'README-bis.md',
+ 'README-secret.md'])
+ assert exclude_paths(
+ base,
+ ['*.md', '!README*.md', 'README-secret.md']
+ ) == set(['README.md', 'README-bis.md'])
class TarTest(unittest.TestCase):
@@ -1019,69 +1038,6 @@ class TarTest(unittest.TestCase):
assert tar_data.getmember('th.txt').mtime == -3600
-class ShouldCheckDirectoryTest(unittest.TestCase):
- exclude_patterns = [
- 'exclude_rather_large_directory',
- 'dir/with/subdir_excluded',
- 'dir/with/exceptions'
- ]
-
- include_patterns = [
- 'dir/with/exceptions/like_this_one',
- 'dir/with/exceptions/in/descendents'
- ]
-
- def test_should_check_directory_not_excluded(self):
- assert should_check_directory(
- 'not_excluded', self.exclude_patterns, self.include_patterns, '.'
- )
- assert should_check_directory(
- convert_path('dir/with'), self.exclude_patterns,
- self.include_patterns, '.'
- )
-
- def test_shoud_check_parent_directories_of_excluded(self):
- assert should_check_directory(
- 'dir', self.exclude_patterns, self.include_patterns, '.'
- )
- assert should_check_directory(
- convert_path('dir/with'), self.exclude_patterns,
- self.include_patterns, '.'
- )
-
- def test_should_not_check_excluded_directories_with_no_exceptions(self):
- assert not should_check_directory(
- 'exclude_rather_large_directory', self.exclude_patterns,
- self.include_patterns, '.'
- )
- assert not should_check_directory(
- convert_path('dir/with/subdir_excluded'), self.exclude_patterns,
- self.include_patterns, '.'
- )
-
- def test_should_check_excluded_directory_with_exceptions(self):
- assert should_check_directory(
- convert_path('dir/with/exceptions'), self.exclude_patterns,
- self.include_patterns, '.'
- )
- assert should_check_directory(
- convert_path('dir/with/exceptions/in'), self.exclude_patterns,
- self.include_patterns, '.'
- )
-
- def test_should_not_check_siblings_of_exceptions(self):
- assert not should_check_directory(
- convert_path('dir/with/exceptions/but_not_here'),
- self.exclude_patterns, self.include_patterns, '.'
- )
-
- def test_should_check_subdirectories_of_exceptions(self):
- assert should_check_directory(
- convert_path('dir/with/exceptions/like_this_one/subdir'),
- self.exclude_patterns, self.include_patterns, '.'
- )
-
-
class FormatEnvironmentTest(unittest.TestCase):
def test_format_env_binary_unicode_value(self):
env_dict = {