summaryrefslogtreecommitdiff
path: root/lib/extracts_ref.rb
blob: 49c9772f7606068869ac52da5a2bfcd71bd69129 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# frozen_string_literal: true

# Module providing methods for dealing with separating a tree-ish string and a
# file path string when combined in a request parameter
# Can be extended for different types of repository object, e.g. Project or Snippet
module ExtractsRef
  InvalidPathError = Class.new(StandardError)
  BRANCH_REF_TYPE = 'heads'
  TAG_REF_TYPE = 'tags'
  # Given a string containing both a Git tree-ish, such as a branch or tag, and
  # a filesystem path joined by forward slashes, attempts to separate the two.
  #
  # Expects a repository_container method that returns the active repository object. This is
  # used to check the input against a list of valid repository refs.
  #
  # Examples
  #
  #   # No repository_container available
  #   extract_ref('master')
  #   # => ['', '']
  #
  #   extract_ref('master')
  #   # => ['master', '']
  #
  #   extract_ref("f4b14494ef6abf3d144c28e4af0c20143383e062/CHANGELOG")
  #   # => ['f4b14494ef6abf3d144c28e4af0c20143383e062', 'CHANGELOG']
  #
  #   extract_ref("v2.0.0/README.md")
  #   # => ['v2.0.0', 'README.md']
  #
  #   extract_ref('master/app/models/project.rb')
  #   # => ['master', 'app/models/project.rb']
  #
  #   extract_ref('issues/1234/app/models/project.rb')
  #   # => ['issues/1234', 'app/models/project.rb']
  #
  #   # Given an invalid branch, we fall back to just splitting on the first slash
  #   extract_ref('non/existent/branch/README.md')
  #   # => ['non', 'existent/branch/README.md']
  #
  # Returns an Array where the first value is the tree-ish and the second is the
  # path
  def extract_ref(id)
    pair = extract_raw_ref(id)

    [
      pair[0].strip,
      pair[1].delete_prefix('/').delete_suffix('/')
    ]
  end

  # Assigns common instance variables for views working with Git tree-ish objects
  #
  # Assignments are:
  #
  # - @id     - A string representing the joined ref and path
  # - @ref    - A string representing the ref (e.g., the branch, tag, or commit SHA)
  # - @path   - A string representing the filesystem path
  # - @commit - A Commit representing the commit from the given ref
  #
  # If the :id parameter appears to be requesting a specific response format,
  # that will be handled as well.
  #
  # rubocop:disable Gitlab/ModuleWithInstanceVariables
  def assign_ref_vars
    @id, @ref, @path = extract_ref_path
    @repo = repository_container.repository
    raise InvalidPathError if @ref.match?(/\s/)

    return unless @ref.present?

    @commit = if ref_type
                @fully_qualified_ref = %(refs/#{ref_type}/#{@ref})
                @repo.commit(@fully_qualified_ref)
              else
                @repo.commit(@ref)
              end
  end
  # rubocop:enable Gitlab/ModuleWithInstanceVariables

  def tree
    @tree ||= @repo.tree(@commit.id, @path) # rubocop:disable Gitlab/ModuleWithInstanceVariables
  end

  def extract_ref_path
    id = get_id
    ref, path = extract_ref(id)

    [id, ref, path]
  end

  def ref_type
    return unless params[:ref_type].present?

    params[:ref_type] == TAG_REF_TYPE ? TAG_REF_TYPE : BRANCH_REF_TYPE
  end

  private

  def extract_raw_ref(id)
    return ['', ''] unless repository_container

    # If the ref appears to be a SHA, we're done, just split the string
    return $~.captures if id =~ /^(\h{40})(.+)/

    # No slash means we must have a ref and no path
    return [id, ''] unless id.include?('/')

    # Otherwise, attempt to detect the ref using a list of the
    # repository_container's branches and tags

    # Append a trailing slash if we only get a ref and no file path
    id = [id, '/'].join unless id.ends_with?('/')
    first_path_segment, rest = id.split('/', 2)

    return [first_path_segment, rest] if use_first_path_segment?(first_path_segment)

    valid_refs = ref_names.select { |v| id.start_with?("#{v}/") }

    # No exact ref match, so just try our best
    return id.match(%r{([^/]+)(.*)}).captures if valid_refs.empty?

    # There is a distinct possibility that multiple refs prefix the ID.
    # Use the longest match to maximize the chance that we have the
    # right ref.
    best_match = valid_refs.max_by(&:length)

    # Partition the string into the ref and the path, ignoring the empty first value
    id.partition(best_match)[1..]
  end

  def use_first_path_segment?(ref)
    return false unless repository_container
    return false if repository_container.repository.has_ambiguous_refs?

    repository_container.repository.branch_names_include?(ref) ||
      repository_container.repository.tag_names_include?(ref)
  end

  # overridden in subclasses, do not remove
  def get_id
    allowed_params = params.permit(:id, :ref, :path)

    id = [allowed_params[:id] || allowed_params[:ref]]
    id << "/" + allowed_params[:path] unless allowed_params[:path].blank?
    id.join
  end

  def ref_names
    return [] unless repository_container

    @ref_names ||= repository_container.repository.ref_names # rubocop:disable Gitlab/ModuleWithInstanceVariables
  end

  def repository_container
    raise NotImplementedError
  end

  def ambiguous_ref?(project, ref)
    return true if project.repository.ambiguous_ref?(ref)

    return false unless ref&.starts_with?('refs/')

    unprefixed_ref = ref.sub(%r{^refs/(heads|tags)/}, '')
    project.repository.commit(unprefixed_ref).present?
  end
end