diff options
author | Tristan Van Berkom <tristan.vanberkom@codethink.co.uk> | 2017-06-30 16:04:44 +0900 |
---|---|---|
committer | Tristan Van Berkom <tristan.vanberkom@codethink.co.uk> | 2017-06-30 16:08:01 +0900 |
commit | f2d890e7494e06fd835b6f63a5c1e3a2dc1018bd (patch) | |
tree | 8473daf936bffa2e420938ad601fd4e885158c0e | |
parent | 1204b690ce5a05759947b7db44f1d87e7adb2c1a (diff) | |
download | buildstream-f2d890e7494e06fd835b6f63a5c1e3a2dc1018bd.tar.gz |
utils.py: Improved glob() function
Instead of using PurePath from pathlib, we use a modified version
of fnmatch.translate() to generate a regular expression.
This has multiple advantages:
* The private usage of regular expressions allows the Element
internal splitting rules to more easily adapt to the new globbing
syntax for split rules
* With the glob function under our control, we can document it more
clearly
* We have support for bash 'globstar' mode, which allows recursive
globs with double asterisk.
-rw-r--r-- | buildstream/utils.py | 104 |
1 files changed, 98 insertions, 6 deletions
diff --git a/buildstream/utils.py b/buildstream/utils.py index 18854e961..5bf3863d2 100644 --- a/buildstream/utils.py +++ b/buildstream/utils.py @@ -30,7 +30,7 @@ import calendar import psutil import subprocess import signal -from pathlib import PurePath +import re from . import ProgramNotFoundError from . import _yaml from . import _signals @@ -110,20 +110,45 @@ def glob(paths, pattern): This generator will iterate over the passed *paths* and yield only the filenames which matched the provided *pattern*. + + +--------+------------------------------------------------------------------+ + | Meta | Description | + +========+==================================================================+ + | \* | Match any pattern except a path separator | + +--------+------------------------------------------------------------------+ + | \** | Match any pattern, including path separators | + +--------+------------------------------------------------------------------+ + | ? | Match any single character | + +--------+------------------------------------------------------------------+ + | [abc] | Match one of the specified characters | + +--------+------------------------------------------------------------------+ + | [a-z] | Match one of the characters in the specified range | + +--------+------------------------------------------------------------------+ + | [!abc] | Match any single character, except the specified characters | + +--------+------------------------------------------------------------------+ + | [!a-z] | Match any single character, except those in the specified range | + +--------+------------------------------------------------------------------+ + + .. note:: + + Escaping of the metacharacters is not possible + """ - # When using PurePath.match(), it behaves as expected - # only when comparing two absolute filenames, so we - # force them to be absolute + # Ensure leading slash, just because we want patterns + # to match file lists regardless of whether the patterns + # or file lists had a leading slash or not. if not pattern.startswith(os.sep): pattern = os.sep + pattern + expression = _glob2re(pattern) + regexer = re.compile(expression) + for filename in paths: filename_try = filename if not filename_try.startswith(os.sep): filename_try = os.sep + filename_try - path = PurePath(filename_try) - if path.match(pattern): + if regexer.match(filename_try): yield filename @@ -694,3 +719,70 @@ def _call(*popenargs, **kwargs): output = output.decode('UTF-8') return (exit_code, output) + + +# _glob2re() +# +# Function to translate a glob style pattern into a regex +# +# Args: +# pat (str): The glob pattern +# +# This is a modified version of the python standard library's +# fnmatch.translate() function which supports path like globbing +# a bit more correctly, and additionally supports recursive glob +# patterns with double asterisk. +# +# Note that this will only support the most basic of standard +# glob patterns, and additionally the recursive double asterisk. +# +# Support includes: +# +# * Match any pattern except a path separator +# ** Match any pattern, including path separators +# ? Match any single character +# [abc] Match one of the specified characters +# [A-Z] Match one of the characters in the specified range +# [!abc] Match any single character, except the specified characters +# [!A-Z] Match any single character, except those in the specified range +# +def _glob2re(pat): + i, n = 0, len(pat) + res = '' + while i < n: + c = pat[i] + i = i + 1 + if c == '*': + # fnmatch.translate() simply uses the '.*' separator here, + # we only want that for double asterisk (bash 'globstar' behavior) + # + if i < n and pat[i] == '*': + res = res + '.*' + i = i + 1 + else: + res = res + '[^/]*' + elif c == '?': + # fnmatch.translate() simply uses the '.' wildcard here, but + # we dont want to match path separators here + res = res + '[^/]' + elif c == '[': + j = i + if j < n and pat[j] == '!': + j = j + 1 + if j < n and pat[j] == ']': + j = j + 1 + while j < n and pat[j] != ']': + j = j + 1 + if j >= n: + res = res + '\\[' + else: + stuff = pat[i:j].replace('\\', '\\\\') + i = j + 1 + if stuff[0] == '!': + stuff = '^' + stuff[1:] + elif stuff[0] == '^': + stuff = '\\' + stuff + res = '%s[%s]' % (res, stuff) + else: + res = res + re.escape(c) + return res + '\Z(?ms)' |