summaryrefslogtreecommitdiff
path: root/lib/extracts_path.rb
blob: 44a9c7ea536233ab2833f6e529cbd046b1dde58f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# frozen_string_literal: true

# Module providing methods for dealing with separating a tree-ish string and a
# file path string when combined in a request parameter
module ExtractsPath
  # Raised when given an invalid file path
  InvalidPathError = Class.new(StandardError)

  # Given a string containing both a Git tree-ish, such as a branch or tag, and
  # a filesystem path joined by forward slashes, attempts to separate the two.
  #
  # Expects a @project instance variable to contain the active project. This is
  # used to check the input against a list of valid repository refs.
  #
  # Examples
  #
  #   # No @project available
  #   extract_ref('master')
  #   # => ['', '']
  #
  #   extract_ref('master')
  #   # => ['master', '']
  #
  #   extract_ref("f4b14494ef6abf3d144c28e4af0c20143383e062/CHANGELOG")
  #   # => ['f4b14494ef6abf3d144c28e4af0c20143383e062', 'CHANGELOG']
  #
  #   extract_ref("v2.0.0/README.md")
  #   # => ['v2.0.0', 'README.md']
  #
  #   extract_ref('master/app/models/project.rb')
  #   # => ['master', 'app/models/project.rb']
  #
  #   extract_ref('issues/1234/app/models/project.rb')
  #   # => ['issues/1234', 'app/models/project.rb']
  #
  #   # Given an invalid branch, we fall back to just splitting on the first slash
  #   extract_ref('non/existent/branch/README.md')
  #   # => ['non', 'existent/branch/README.md']
  #
  # Returns an Array where the first value is the tree-ish and the second is the
  # path
  def extract_ref(id)
    pair = ['', '']

    return pair unless @project # rubocop:disable Gitlab/ModuleWithInstanceVariables

    if id =~ /^(\h{40})(.+)/
      # If the ref appears to be a SHA, we're done, just split the string
      pair = $~.captures
    else
      # Otherwise, attempt to detect the ref using a list of the project's
      # branches and tags

      # Append a trailing slash if we only get a ref and no file path
      unless id.ends_with?('/')
        id = [id, '/'].join
      end

      valid_refs = ref_names.select { |v| id.start_with?("#{v}/") }

      if valid_refs.empty?
        # No exact ref match, so just try our best
        pair = id.match(%r{([^/]+)(.*)}).captures
      else
        # There is a distinct possibility that multiple refs prefix the ID.
        # Use the longest match to maximize the chance that we have the
        # right ref.
        best_match = valid_refs.max_by(&:length)
        # Partition the string into the ref and the path, ignoring the empty first value
        pair = id.partition(best_match)[1..-1]
      end
    end

    # Remove ending slashes from path
    pair[1].gsub!(%r{^/|/$}, '')

    pair
  end

  # If we have an ID of 'foo.atom', and the controller provides Atom and HTML
  # formats, then we have to check if the request was for the Atom version of
  # the ID without the '.atom' suffix, or the HTML version of the ID including
  # the suffix. We only check this if the version including the suffix doesn't
  # match, so it is possible to create a branch which has an unroutable Atom
  # feed.
  def extract_ref_without_atom(id)
    id_without_atom = id.sub(/\.atom$/, '')
    valid_refs = ref_names.select { |v| "#{id_without_atom}/".start_with?("#{v}/") }

    valid_refs.max_by(&:length)
  end

  # Assigns common instance variables for views working with Git tree-ish objects
  #
  # Assignments are:
  #
  # - @id     - A string representing the joined ref and path
  # - @ref    - A string representing the ref (e.g., the branch, tag, or commit SHA)
  # - @path   - A string representing the filesystem path
  # - @commit - A Commit representing the commit from the given ref
  #
  # If the :id parameter appears to be requesting a specific response format,
  # that will be handled as well.
  #
  # If there is no path and the ref doesn't exist in the repo, try to resolve
  # the ref without an '.atom' suffix. If _that_ ref is found, set the request's
  # format to Atom manually.
  #
  # Automatically renders `not_found!` if a valid tree path could not be
  # resolved (e.g., when a user inserts an invalid path or ref).
  # rubocop:disable Gitlab/ModuleWithInstanceVariables
  def assign_ref_vars
    @id = get_id
    @ref, @path = extract_ref(@id)
    @repo = @project.repository
    @ref.strip!

    raise InvalidPathError if @ref.match?(/\s/)

    @commit = @repo.commit(@ref)

    if @path.empty? && !@commit && @id.ends_with?('.atom')
      @id = @ref = extract_ref_without_atom(@id)
      @commit = @repo.commit(@ref)

      request.format = :atom if @commit
    end

    raise InvalidPathError unless @commit

    @hex_path = Digest::SHA1.hexdigest(@path)
    @logs_path = logs_file_project_ref_path(@project, @ref, @path)
  rescue RuntimeError, NoMethodError, InvalidPathError
    render_404
  end
  # rubocop:enable Gitlab/ModuleWithInstanceVariables

  def tree
    @tree ||= @repo.tree(@commit.id, @path) # rubocop:disable Gitlab/ModuleWithInstanceVariables
  end

  def lfs_blob_ids
    blob_ids = tree.blobs.map(&:id)

    # When current endpoint is a Blob then `tree.blobs` will be empty, it means we need to analyze
    # the current Blob in order to determine if it's a LFS object
    blob_ids = Array.wrap(@repo.blob_at(@commit.id, @path)&.id) if blob_ids.empty? # rubocop:disable Gitlab/ModuleWithInstanceVariables

    @lfs_blob_ids = Gitlab::Git::Blob.batch_lfs_pointers(@project.repository, blob_ids).map(&:id) # rubocop:disable Gitlab/ModuleWithInstanceVariables
  end

  private

  # overridden in subclasses, do not remove
  def get_id
    id = [params[:id] || params[:ref]]
    id << "/" + params[:path] unless params[:path].blank?
    id.join
  end

  def ref_names
    return [] unless @project # rubocop:disable Gitlab/ModuleWithInstanceVariables

    @ref_names ||= @project.repository.ref_names # rubocop:disable Gitlab/ModuleWithInstanceVariables
  end
end