summaryrefslogtreecommitdiff
path: root/lib/gitlab/robots_txt/parser.rb
blob: b9a3837e468ed7c4029fb72168a674c83e707706 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# frozen_string_literal: true

module Gitlab
  module RobotsTxt
    class Parser
      attr_reader :disallow_rules

      def initialize(content)
        @raw_content = content

        @disallow_rules = parse_raw_content!
      end

      def disallowed?(path)
        disallow_rules.any? { |rule| path =~ rule }
      end

      private

      # This parser is very basic as it only knows about `Disallow:` lines,
      # and simply ignores all other lines.
      #
      # Order of predecence, 'Allow:`, etc are ignored for now.
      def parse_raw_content!
        @raw_content.each_line.map do |line|
          if line.start_with?('Disallow:')
            value = line.sub('Disallow:', '').strip
            value = Regexp.escape(value).gsub('\*', '.*')
            Regexp.new("^#{value}")
          else
            nil
          end
        end.compact
      end
    end
  end
end