summaryrefslogtreecommitdiff
path: root/lib/chef/chef_fs/file_pattern.rb
blob: 41becca99e86ac3e5264cf7dd055da6ad1a4e6aa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
#
# Author:: John Keiser (<jkeiser@chef.io>)
# Copyright:: Copyright 2012-2016, Chef Software Inc.
# License:: Apache License, Version 2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

require "chef/chef_fs"
require "chef/chef_fs/path_utils"

class Chef
  module ChefFS
    #
    # Represents a glob pattern.  This class is designed so that it can
    # match arbitrary strings, and tell you about partial matches.
    #
    # Examples:
    # * <tt>a*z</tt>
    #   - Matches <tt>abcz</tt>
    #   - Does not match <tt>ab/cd/ez</tt>
    #   - Does not match <tt>xabcz</tt>
    # * <tt>a**z</tt>
    #   - Matches <tt>abcz</tt>
    #   - Matches <tt>ab/cd/ez</tt>
    #
    # Special characters supported:
    # * <tt>/</tt> (and <tt>\\</tt> on Windows) - directory separators
    # * <tt>\*</tt> - match zero or more characters (but not directory separators)
    # * <tt>\*\*</tt> - match zero or more characters, including directory separators
    # * <tt>?</tt> - match exactly one character (not a directory separator)
    # Only on Unix:
    # * <tt>[abc0-9]</tt> - match one of the included characters
    # * <tt>\\<character></tt> - escape character: match the given character
    #
    class FilePattern
      # Initialize a new FilePattern with the pattern string.
      #
      # Raises +ArgumentError+ if empty file pattern is specified
      def initialize(pattern)
        @pattern = pattern
      end

      # The pattern string.
      attr_reader :pattern

      # Reports whether this pattern could match children of <tt>path</tt>.
      # If the pattern doesn't match the path up to this point or
      # if it matches and doesn't allow further children, this will
      # return <tt>false</tt>.
      #
      # ==== Attributes
      #
      # * +path+ - a path to check
      #
      # ==== Examples
      #
      #   abc/def.could_match_children?('abc') == true
      #   abc.could_match_children?('abc') == false
      #   abc/def.could_match_children?('x') == false
      #   a**z.could_match_children?('ab/cd') == true
      def could_match_children?(path)
        return false if path == "" # Empty string is not a path

        argument_is_absolute = Chef::ChefFS::PathUtils.is_absolute?(path)
        return false if is_absolute != argument_is_absolute
        path = path[1, path.length - 1] if argument_is_absolute

        path_parts = Chef::ChefFS::PathUtils.split(path)
        # If the pattern is shorter than the path (or same size), children will be larger than the pattern, and will not match.
        return false if regexp_parts.length <= path_parts.length && !has_double_star
        # If the path doesn't match up to this point, children won't match either.
        return false if path_parts.zip(regexp_parts).any? { |part, regexp| !regexp.nil? && !regexp.match(part) }
        # Otherwise, it's possible we could match: the path matches to this point, and the pattern is longer than the path.
        # TODO There is one edge case where the double star comes after some characters like abc**def--we could check whether the next
        # bit of path starts with abc in that case.
        true
      end

      # Returns the immediate child of a path that would be matched
      # if this FilePattern was applied.  If more than one child
      # could match, this method returns nil.
      #
      # ==== Attributes
      #
      # * +path+ - The path to look for an exact child name under.
      #
      # ==== Returns
      #
      # The next directory in the pattern under the given path.
      # If the directory part could match more than one child, it
      # returns +nil+.
      #
      # ==== Examples
      #
      #   abc/def.exact_child_name_under('abc') == 'def'
      #   abc/def/ghi.exact_child_name_under('abc') == 'def'
      #   abc/*/ghi.exact_child_name_under('abc') == nil
      #   abc/*/ghi.exact_child_name_under('abc/def') == 'ghi'
      #   abc/**/ghi.exact_child_name_under('abc/def') == nil
      #
      # This method assumes +could_match_children?(path)+ is +true+.
      def exact_child_name_under(path)
        path = path[1, path.length - 1] if Chef::ChefFS::PathUtils.is_absolute?(path)
        dirs_in_path = Chef::ChefFS::PathUtils.split(path).length
        return nil if exact_parts.length <= dirs_in_path
        exact_parts[dirs_in_path]
      end

      # If this pattern represents an exact path, returns the exact path.
      #
      #   abc/def.exact_path == 'abc/def'
      #   abc/*def.exact_path == 'abc/def'
      #   abc/x\\yz.exact_path == 'abc/xyz'
      def exact_path
        return nil if has_double_star || exact_parts.any? { |part| part.nil? }
        result = Chef::ChefFS::PathUtils.join(*exact_parts)
        is_absolute ? Chef::ChefFS::PathUtils.join("", result) : result
      end

      # Returns the normalized version of the pattern, with / as the directory
      # separator, and "." and ".." removed.
      #
      # This does not presently change things like \b to b, but in the future
      # it might.
      def normalized_pattern
        calculate
        @normalized_pattern
      end

      # Tell whether this pattern matches absolute, or relative paths
      def is_absolute
        calculate
        @is_absolute
      end

      # Returns <tt>true+ if this pattern matches the path, <tt>false+ otherwise.
      #
      #   abc/*/def.match?('abc/foo/def') == true
      #   abc/*/def.match?('abc/foo') == false
      def match?(path)
        argument_is_absolute = Chef::ChefFS::PathUtils.is_absolute?(path)
        return false if is_absolute != argument_is_absolute
        path = path[1, path.length - 1] if argument_is_absolute
        !!regexp.match(path)
      end

      # Returns the string pattern
      def to_s
        pattern
      end

      private

      def regexp
        calculate
        @regexp
      end

      def regexp_parts
        calculate
        @regexp_parts
      end

      def exact_parts
        calculate
        @exact_parts
      end

      def has_double_star
        calculate
        @has_double_star
      end

      def calculate
        if !@regexp
          @is_absolute = Chef::ChefFS::PathUtils.is_absolute?(@pattern)

          full_regexp_parts = []
          normalized_parts = []
          @regexp_parts = []
          @exact_parts = []
          @has_double_star = false

          Chef::ChefFS::PathUtils.split(pattern).each do |part|
            regexp, exact, has_double_star = FilePattern.pattern_to_regexp(part)
            if has_double_star
              @has_double_star = true
            end

            # Skip // and /./ (pretend it's not there)
            if exact == "" || exact == "."
              next
            end

            # Back up when you see .. (unless the prior part has ** in it, in which case .. must be preserved)
            if exact == ".."
              if @is_absolute && normalized_parts.length == 0
                # If we are at the root, just pretend the .. isn't there
                next
              elsif normalized_parts.length > 0
                regexp_prev, exact_prev, has_double_star_prev = FilePattern.pattern_to_regexp(normalized_parts[-1])
                if has_double_star_prev
                  raise ArgumentError, ".. overlapping a ** is unsupported"
                end
                full_regexp_parts.pop
                normalized_parts.pop
                if !@has_double_star
                  @regexp_parts.pop
                  @exact_parts.pop
                end
                next
              end
            end

            # Build up the regexp
            full_regexp_parts << regexp
            normalized_parts << part
            if !@has_double_star
              @regexp_parts << Regexp.new("^#{regexp}$")
              @exact_parts << exact
            end
          end

          @regexp = Regexp.new("^#{full_regexp_parts.join(Chef::ChefFS::PathUtils.regexp_path_separator)}$")
          @normalized_pattern = Chef::ChefFS::PathUtils.join(*normalized_parts)
          @normalized_pattern = Chef::ChefFS::PathUtils.join("", @normalized_pattern) if @is_absolute
        end
      end

      def self.pattern_special_characters
        if ChefHelpers.windows?
          @pattern_special_characters ||= /(\*\*|\*|\?|[\*\?\.\|\(\)\[\]\{\}\+\\\\\^\$])/
        else
          # Unix also supports character regexes and backslashes
          @pattern_special_characters ||= /(\\.|\[[^\]]+\]|\*\*|\*|\?|[\*\?\.\|\(\)\[\]\{\}\+\\\\\^\$])/
        end
        @pattern_special_characters
      end

      def self.regexp_escape_characters
        [ "[", '\\', "^", "$", ".", "|", "?", "*", "+", "(", ")", "{", "}" ]
      end

      def self.pattern_to_regexp(pattern)
        regexp = ""
        exact = ""
        has_double_star = false
        pattern.split(pattern_special_characters).each_with_index do |part, index|
          # Odd indexes from the split are symbols.  Even are normal bits.
          if index.even?
            exact << part if !exact.nil?
            regexp << part
          else
            case part
            # **, * and ? happen on both platforms.
            when "**"
              exact = nil
              has_double_star = true
              regexp << ".*"
            when "*"
              exact = nil
              regexp << '[^\/]*'
            when "?"
              exact = nil
              regexp << "."
            else
              if part[0, 1] == '\\' && part.length == 2
                # backslash escapes are only supported on Unix, and are handled here by leaving the escape on (it means the same thing in a regex)
                exact << part[1, 1] if !exact.nil?
                if regexp_escape_characters.include?(part[1, 1])
                  regexp << part
                else
                  regexp << part[1, 1]
                end
              elsif part[0, 1] == "[" && part.length > 1
                # [...] happens only on Unix, and is handled here by *not* backslashing (it means the same thing in and out of regex)
                exact = nil
                regexp << part
              else
                exact += part if !exact.nil?
                regexp << "\\#{part}"
              end
            end
          end
        end
        [regexp, exact, has_double_star]
      end
    end
  end
end