summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/gitlab/git/attributes.rb131
-rw-r--r--lib/gitlab/git/blame.rb77
-rw-r--r--lib/gitlab/git/blob.rb333
-rw-r--r--lib/gitlab/git/blob_snippet.rb32
-rw-r--r--lib/gitlab/git/branch.rb6
-rw-r--r--lib/gitlab/git/commit.rb310
-rw-r--r--lib/gitlab/git/commit_stats.rb26
-rw-r--r--lib/gitlab/git/compare.rb43
-rw-r--r--lib/gitlab/git/diff.rb322
-rw-r--r--lib/gitlab/git/diff_collection.rb129
-rw-r--r--lib/gitlab/git/encoding_helper.rb58
-rw-r--r--lib/gitlab/git/path_helper.rb16
-rw-r--r--lib/gitlab/git/popen.rb26
-rw-r--r--lib/gitlab/git/ref.rb49
-rw-r--r--lib/gitlab/git/repository.rb1253
-rw-r--r--lib/gitlab/git/tag.rb17
-rw-r--r--lib/gitlab/git/tree.rb104
-rw-r--r--lib/gitlab/git/util.rb18
18 files changed, 2950 insertions, 0 deletions
diff --git a/lib/gitlab/git/attributes.rb b/lib/gitlab/git/attributes.rb
new file mode 100644
index 00000000000..42140ecc993
--- /dev/null
+++ b/lib/gitlab/git/attributes.rb
@@ -0,0 +1,131 @@
+module Gitlab
+ module Git
+ # Class for parsing Git attribute files and extracting the attributes for
+ # file patterns.
+ #
+ # Unlike Rugged this parser only needs a single IO call (a call to `open`),
+ # vastly reducing the time spent in extracting attributes.
+ #
+ # This class _only_ supports parsing the attributes file located at
+ # `$GIT_DIR/info/attributes` as GitLab doesn't use any other files
+ # (`.gitattributes` is copied to this particular path).
+ #
+ # Basic usage:
+ #
+ # attributes = Gitlab::Git::Attributes.new(some_repo.path)
+ #
+ # attributes.attributes('README.md') # => { "eol" => "lf }
+ class Attributes
+ # path - The path to the Git repository.
+ def initialize(path)
+ @path = File.expand_path(path)
+ @patterns = nil
+ end
+
+ # Returns all the Git attributes for the given path.
+ #
+ # path - A path to a file for which to get the attributes.
+ #
+ # Returns a Hash.
+ def attributes(path)
+ full_path = File.join(@path, path)
+
+ patterns.each do |pattern, attrs|
+ return attrs if File.fnmatch?(pattern, full_path)
+ end
+
+ {}
+ end
+
+ # Returns a Hash containing the file patterns and their attributes.
+ def patterns
+ @patterns ||= parse_file
+ end
+
+ # Parses an attribute string.
+ #
+ # These strings can be in the following formats:
+ #
+ # text # => { "text" => true }
+ # -text # => { "text" => false }
+ # key=value # => { "key" => "value" }
+ #
+ # string - The string to parse.
+ #
+ # Returns a Hash containing the attributes and their values.
+ def parse_attributes(string)
+ values = {}
+ dash = '-'
+ equal = '='
+ binary = 'binary'
+
+ string.split(/\s+/).each do |chunk|
+ # Data such as "foo = bar" should be treated as "foo" and "bar" being
+ # separate boolean attributes.
+ next if chunk == equal
+
+ key = chunk
+
+ # Input: "-foo"
+ if chunk.start_with?(dash)
+ key = chunk.byteslice(1, chunk.length - 1)
+ value = false
+
+ # Input: "foo=bar"
+ elsif chunk.include?(equal)
+ key, value = chunk.split(equal, 2)
+
+ # Input: "foo"
+ else
+ value = true
+ end
+
+ values[key] = value
+
+ # When the "binary" option is set the "diff" option should be set to
+ # the inverse. If "diff" is later set it should overwrite the
+ # automatically set value.
+ values['diff'] = false if key == binary && value
+ end
+
+ values
+ end
+
+ # Iterates over every line in the attributes file.
+ def each_line
+ full_path = File.join(@path, 'info/attributes')
+
+ return unless File.exist?(full_path)
+
+ File.open(full_path, 'r') do |handle|
+ handle.each_line do |line|
+ break unless line.valid_encoding?
+
+ yield line.strip
+ end
+ end
+ end
+
+ private
+
+ # Parses the Git attributes file.
+ def parse_file
+ pairs = []
+ comment = '#'
+
+ each_line do |line|
+ next if line.start_with?(comment) || line.empty?
+
+ pattern, attrs = line.split(/\s+/, 2)
+
+ parsed = attrs ? parse_attributes(attrs) : {}
+
+ pairs << [File.join(@path, pattern), parsed]
+ end
+
+ # Newer entries take precedence over older entries.
+ pairs.reverse.to_h
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/blame.rb b/lib/gitlab/git/blame.rb
new file mode 100644
index 00000000000..46f3969b6e1
--- /dev/null
+++ b/lib/gitlab/git/blame.rb
@@ -0,0 +1,77 @@
+require_relative 'encoding_helper'
+
+module Gitlab
+ module Git
+ class Blame
+ include Gitlab::Git::EncodingHelper
+
+ attr_reader :lines, :blames
+
+ def initialize(repository, sha, path)
+ @repo = repository
+ @sha = sha
+ @path = path
+ @lines = []
+ @blames = load_blame
+ end
+
+ def each
+ @blames.each do |blame|
+ yield(
+ Gitlab::Git::Commit.new(blame.commit),
+ blame.line
+ )
+ end
+ end
+
+ private
+
+ def load_blame
+ cmd = %W(git --git-dir=#{@repo.path} blame -p #{@sha} -- #{@path})
+ # Read in binary mode to ensure ASCII-8BIT
+ raw_output = IO.popen(cmd, 'rb') {|io| io.read }
+ output = encode_utf8(raw_output)
+ process_raw_blame output
+ end
+
+ def process_raw_blame(output)
+ lines, final = [], []
+ info, commits = {}, {}
+
+ # process the output
+ output.split("\n").each do |line|
+ if line[0, 1] == "\t"
+ lines << line[1, line.size]
+ elsif m = /^(\w{40}) (\d+) (\d+)/.match(line)
+ commit_id, old_lineno, lineno = m[1], m[2].to_i, m[3].to_i
+ commits[commit_id] = nil unless commits.key?(commit_id)
+ info[lineno] = [commit_id, old_lineno]
+ end
+ end
+
+ # load all commits in single call
+ commits.keys.each do |key|
+ commits[key] = @repo.lookup(key)
+ end
+
+ # get it together
+ info.sort.each do |lineno, (commit_id, old_lineno)|
+ commit = commits[commit_id]
+ final << BlameLine.new(lineno, old_lineno, commit, lines[lineno - 1])
+ end
+
+ @lines = final
+ end
+ end
+
+ class BlameLine
+ attr_accessor :lineno, :oldlineno, :commit, :line
+ def initialize(lineno, oldlineno, commit, line)
+ @lineno = lineno
+ @oldlineno = oldlineno
+ @commit = commit
+ @line = line
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb
new file mode 100644
index 00000000000..1325124daf4
--- /dev/null
+++ b/lib/gitlab/git/blob.rb
@@ -0,0 +1,333 @@
+require_relative 'encoding_helper'
+require_relative 'path_helper'
+
+module Gitlab
+ module Git
+ class Blob
+ include Linguist::BlobHelper
+ include Gitlab::Git::EncodingHelper
+
+ # This number is the maximum amount of data that we want to display to
+ # the user. We load as much as we can for encoding detection
+ # (Linguist) and LFS pointer parsing. All other cases where we need full
+ # blob data should use load_all_data!.
+ MAX_DATA_DISPLAY_SIZE = 10485760
+
+ attr_accessor :name, :path, :size, :data, :mode, :id, :commit_id, :loaded_size, :binary
+
+ class << self
+ def find(repository, sha, path)
+ commit = repository.lookup(sha)
+ root_tree = commit.tree
+
+ blob_entry = find_entry_by_path(repository, root_tree.oid, path)
+
+ return nil unless blob_entry
+
+ if blob_entry[:type] == :commit
+ submodule_blob(blob_entry, path, sha)
+ else
+ blob = repository.lookup(blob_entry[:oid])
+
+ if blob
+ Blob.new(
+ id: blob.oid,
+ name: blob_entry[:name],
+ size: blob.size,
+ data: blob.content(MAX_DATA_DISPLAY_SIZE),
+ mode: blob_entry[:filemode].to_s(8),
+ path: path,
+ commit_id: sha,
+ binary: blob.binary?
+ )
+ end
+ end
+ end
+
+ def raw(repository, sha)
+ blob = repository.lookup(sha)
+
+ Blob.new(
+ id: blob.oid,
+ size: blob.size,
+ data: blob.content(MAX_DATA_DISPLAY_SIZE),
+ binary: blob.binary?
+ )
+ end
+
+ # Recursive search of blob id by path
+ #
+ # Ex.
+ # blog/ # oid: 1a
+ # app/ # oid: 2a
+ # models/ # oid: 3a
+ # file.rb # oid: 4a
+ #
+ #
+ # Blob.find_entry_by_path(repo, '1a', 'app/file.rb') # => '4a'
+ #
+ def find_entry_by_path(repository, root_id, path)
+ root_tree = repository.lookup(root_id)
+ # Strip leading slashes
+ path[/^\/*/] = ''
+ path_arr = path.split('/')
+
+ entry = root_tree.find do |entry|
+ entry[:name] == path_arr[0]
+ end
+
+ return nil unless entry
+
+ if path_arr.size > 1
+ return nil unless entry[:type] == :tree
+ path_arr.shift
+ find_entry_by_path(repository, entry[:oid], path_arr.join('/'))
+ else
+ [:blob, :commit].include?(entry[:type]) ? entry : nil
+ end
+ end
+
+ def submodule_blob(blob_entry, path, sha)
+ Blob.new(
+ id: blob_entry[:oid],
+ name: blob_entry[:name],
+ data: '',
+ path: path,
+ commit_id: sha,
+ )
+ end
+
+ # Commit file in repository and return commit sha
+ #
+ # options should contain next structure:
+ # file: {
+ # content: 'Lorem ipsum...',
+ # path: 'documents/story.txt',
+ # update: true
+ # },
+ # author: {
+ # email: 'user@example.com',
+ # name: 'Test User',
+ # time: Time.now
+ # },
+ # committer: {
+ # email: 'user@example.com',
+ # name: 'Test User',
+ # time: Time.now
+ # },
+ # commit: {
+ # message: 'Wow such commit',
+ # branch: 'master',
+ # update_ref: false
+ # }
+ #
+ # rubocop:disable Metrics/AbcSize
+ # rubocop:disable Metrics/CyclomaticComplexity
+ # rubocop:disable Metrics/PerceivedComplexity
+ def commit(repository, options, action = :add)
+ file = options[:file]
+ update = file[:update].nil? ? true : file[:update]
+ author = options[:author]
+ committer = options[:committer]
+ commit = options[:commit]
+ repo = repository.rugged
+ ref = commit[:branch]
+ update_ref = commit[:update_ref].nil? ? true : commit[:update_ref]
+ parents = []
+ mode = 0o100644
+
+ unless ref.start_with?('refs/')
+ ref = 'refs/heads/' + ref
+ end
+
+ path_name = PathHelper.normalize_path(file[:path])
+ # Abort if any invalid characters remain (e.g. ../foo)
+ raise Repository::InvalidBlobName.new("Invalid path") if path_name.each_filename.to_a.include?('..')
+
+ filename = path_name.to_s
+ index = repo.index
+
+ unless repo.empty?
+ rugged_ref = repo.references[ref]
+ raise Repository::InvalidRef.new("Invalid branch name") unless rugged_ref
+ last_commit = rugged_ref.target
+ index.read_tree(last_commit.tree)
+ parents = [last_commit]
+ end
+
+ if action == :remove
+ index.remove(filename)
+ else
+ file_entry = index.get(filename)
+
+ if action == :rename
+ old_path_name = PathHelper.normalize_path(file[:previous_path])
+ old_filename = old_path_name.to_s
+ file_entry = index.get(old_filename)
+ index.remove(old_filename) unless file_entry.blank?
+ end
+
+ if file_entry
+ raise Repository::InvalidBlobName.new("Filename already exists; update not allowed") unless update
+
+ # Preserve the current file mode if one is available
+ mode = file_entry[:mode] if file_entry[:mode]
+ end
+
+ content = file[:content]
+ detect = CharlockHolmes::EncodingDetector.new.detect(content) if content
+
+ unless detect && detect[:type] == :binary
+ # When writing to the repo directly as we are doing here,
+ # the `core.autocrlf` config isn't taken into account.
+ content.gsub!("\r\n", "\n") if repository.autocrlf
+ end
+
+ oid = repo.write(content, :blob)
+ index.add(path: filename, oid: oid, mode: mode)
+ end
+
+ opts = {}
+ opts[:tree] = index.write_tree(repo)
+ opts[:author] = author
+ opts[:committer] = committer
+ opts[:message] = commit[:message]
+ opts[:parents] = parents
+ opts[:update_ref] = ref if update_ref
+
+ Rugged::Commit.create(repo, opts)
+ end
+ # rubocop:enable Metrics/AbcSize
+ # rubocop:enable Metrics/CyclomaticComplexity
+ # rubocop:enable Metrics/PerceivedComplexity
+
+ # Remove file from repository and return commit sha
+ #
+ # options should contain next structure:
+ # file: {
+ # path: 'documents/story.txt'
+ # },
+ # author: {
+ # email: 'user@example.com',
+ # name: 'Test User',
+ # time: Time.now
+ # },
+ # committer: {
+ # email: 'user@example.com',
+ # name: 'Test User',
+ # time: Time.now
+ # },
+ # commit: {
+ # message: 'Remove FILENAME',
+ # branch: 'master'
+ # }
+ #
+ def remove(repository, options)
+ commit(repository, options, :remove)
+ end
+
+ # Rename file from repository and return commit sha
+ #
+ # options should contain next structure:
+ # file: {
+ # previous_path: 'documents/old_story.txt'
+ # path: 'documents/story.txt'
+ # content: 'Lorem ipsum...',
+ # update: true
+ # },
+ # author: {
+ # email: 'user@example.com',
+ # name: 'Test User',
+ # time: Time.now
+ # },
+ # committer: {
+ # email: 'user@example.com',
+ # name: 'Test User',
+ # time: Time.now
+ # },
+ # commit: {
+ # message: 'Rename FILENAME',
+ # branch: 'master'
+ # }
+ #
+ def rename(repository, options)
+ commit(repository, options, :rename)
+ end
+ end
+
+ def initialize(options)
+ %w(id name path size data mode commit_id binary).each do |key|
+ self.send("#{key}=", options[key.to_sym])
+ end
+
+ @loaded_all_data = false
+ # Retain the actual size before it is encoded
+ @loaded_size = @data.bytesize if @data
+ end
+
+ def binary?
+ @binary.nil? ? super : @binary == true
+ end
+
+ def empty?
+ !data || data == ''
+ end
+
+ def data
+ encode! @data
+ end
+
+ # Load all blob data (not just the first MAX_DATA_DISPLAY_SIZE bytes) into
+ # memory as a Ruby string.
+ def load_all_data!(repository)
+ return if @data == '' # don't mess with submodule blobs
+ return @data if @loaded_all_data
+
+ @loaded_all_data = true
+ @data = repository.lookup(id).content
+ @loaded_size = @data.bytesize
+ end
+
+ def name
+ encode! @name
+ end
+
+ # Valid LFS object pointer is a text file consisting of
+ # version
+ # oid
+ # size
+ # see https://github.com/github/git-lfs/blob/v1.1.0/docs/spec.md#the-pointer
+ def lfs_pointer?
+ has_lfs_version_key? && lfs_oid.present? && lfs_size.present?
+ end
+
+ def lfs_oid
+ if has_lfs_version_key?
+ oid = data.match(/(?<=sha256:)([0-9a-f]{64})/)
+ return oid[1] if oid
+ end
+
+ nil
+ end
+
+ def lfs_size
+ if has_lfs_version_key?
+ size = data.match(/(?<=size )([0-9]+)/)
+ return size[1] if size
+ end
+
+ nil
+ end
+
+ def truncated?
+ size && (size > loaded_size)
+ end
+
+ private
+
+ def has_lfs_version_key?
+ !empty? && text? && data.start_with?("version https://git-lfs.github.com/spec")
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/blob_snippet.rb b/lib/gitlab/git/blob_snippet.rb
new file mode 100644
index 00000000000..e98de57fc22
--- /dev/null
+++ b/lib/gitlab/git/blob_snippet.rb
@@ -0,0 +1,32 @@
+module Gitlab
+ module Git
+ class BlobSnippet
+ include Linguist::BlobHelper
+
+ attr_accessor :ref
+ attr_accessor :lines
+ attr_accessor :filename
+ attr_accessor :startline
+
+ def initialize(ref, lines, startline, filename)
+ @ref, @lines, @startline, @filename = ref, lines, startline, filename
+ end
+
+ def data
+ lines.join("\n") if lines
+ end
+
+ def name
+ filename
+ end
+
+ def size
+ data.length
+ end
+
+ def mode
+ nil
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/branch.rb b/lib/gitlab/git/branch.rb
new file mode 100644
index 00000000000..586380da94a
--- /dev/null
+++ b/lib/gitlab/git/branch.rb
@@ -0,0 +1,6 @@
+module Gitlab
+ module Git
+ class Branch < Ref
+ end
+ end
+end
diff --git a/lib/gitlab/git/commit.rb b/lib/gitlab/git/commit.rb
new file mode 100644
index 00000000000..2077f60a178
--- /dev/null
+++ b/lib/gitlab/git/commit.rb
@@ -0,0 +1,310 @@
+# Gitlab::Git::Commit is a wrapper around native Rugged::Commit object
+module Gitlab
+ module Git
+ class Commit
+ include Gitlab::Git::EncodingHelper
+
+ attr_accessor :raw_commit, :head, :refs
+
+ SERIALIZE_KEYS = [
+ :id, :message, :parent_ids,
+ :authored_date, :author_name, :author_email,
+ :committed_date, :committer_name, :committer_email
+ ].freeze
+
+ attr_accessor *SERIALIZE_KEYS # rubocop:disable Lint/AmbiguousOperator
+
+ def ==(other)
+ return false unless other.is_a?(Gitlab::Git::Commit)
+
+ methods = [:message, :parent_ids, :authored_date, :author_name,
+ :author_email, :committed_date, :committer_name,
+ :committer_email]
+
+ methods.all? do |method|
+ send(method) == other.send(method)
+ end
+ end
+
+ class << self
+ # Get commits collection
+ #
+ # Ex.
+ # Commit.where(
+ # repo: repo,
+ # ref: 'master',
+ # path: 'app/models',
+ # limit: 10,
+ # offset: 5,
+ # )
+ #
+ def where(options)
+ repo = options.delete(:repo)
+ raise 'Gitlab::Git::Repository is required' unless repo.respond_to?(:log)
+
+ repo.log(options).map { |c| decorate(c) }
+ end
+
+ # Get single commit
+ #
+ # Ex.
+ # Commit.find(repo, '29eda46b')
+ #
+ # Commit.find(repo, 'master')
+ #
+ def find(repo, commit_id = "HEAD")
+ return decorate(commit_id) if commit_id.is_a?(Rugged::Commit)
+
+ obj = if commit_id.is_a?(String)
+ repo.rev_parse_target(commit_id)
+ else
+ Ref.dereference_object(commit_id)
+ end
+
+ return nil unless obj.is_a?(Rugged::Commit)
+
+ decorate(obj)
+ rescue Rugged::ReferenceError, Rugged::InvalidError, Rugged::ObjectError, Gitlab::Git::Repository::NoRepository
+ nil
+ end
+
+ # Get last commit for HEAD
+ #
+ # Ex.
+ # Commit.last(repo)
+ #
+ def last(repo)
+ find(repo)
+ end
+
+ # Get last commit for specified path and ref
+ #
+ # Ex.
+ # Commit.last_for_path(repo, '29eda46b', 'app/models')
+ #
+ # Commit.last_for_path(repo, 'master', 'Gemfile')
+ #
+ def last_for_path(repo, ref, path = nil)
+ where(
+ repo: repo,
+ ref: ref,
+ path: path,
+ limit: 1
+ ).first
+ end
+
+ # Get commits between two revspecs
+ # See also #repository.commits_between
+ #
+ # Ex.
+ # Commit.between(repo, '29eda46b', 'master')
+ #
+ def between(repo, base, head)
+ repo.commits_between(base, head).map do |commit|
+ decorate(commit)
+ end
+ rescue Rugged::ReferenceError
+ []
+ end
+
+ # Delegate Repository#find_commits
+ def find_all(repo, options = {})
+ repo.find_commits(options)
+ end
+
+ def decorate(commit, ref = nil)
+ Gitlab::Git::Commit.new(commit, ref)
+ end
+
+ # Returns a diff object for the changes introduced by +rugged_commit+.
+ # If +rugged_commit+ doesn't have a parent, then the diff is between
+ # this commit and an empty repo. See Repository#diff for the keys
+ # allowed in the +options+ hash.
+ def diff_from_parent(rugged_commit, options = {})
+ options ||= {}
+ break_rewrites = options[:break_rewrites]
+ actual_options = Diff.filter_diff_options(options)
+
+ diff = if rugged_commit.parents.empty?
+ rugged_commit.diff(actual_options.merge(reverse: true))
+ else
+ rugged_commit.parents[0].diff(rugged_commit, actual_options)
+ end
+
+ diff.find_similar!(break_rewrites: break_rewrites)
+ diff
+ end
+ end
+
+ def initialize(raw_commit, head = nil)
+ raise "Nil as raw commit passed" unless raw_commit
+
+ if raw_commit.is_a?(Hash)
+ init_from_hash(raw_commit)
+ elsif raw_commit.is_a?(Rugged::Commit)
+ init_from_rugged(raw_commit)
+ else
+ raise "Invalid raw commit type: #{raw_commit.class}"
+ end
+
+ @head = head
+ end
+
+ def sha
+ id
+ end
+
+ def short_id(length = 10)
+ id.to_s[0..length]
+ end
+
+ def safe_message
+ @safe_message ||= message
+ end
+
+ def created_at
+ committed_date
+ end
+
+ # Was this commit committed by a different person than the original author?
+ def different_committer?
+ author_name != committer_name || author_email != committer_email
+ end
+
+ def parent_id
+ parent_ids.first
+ end
+
+ # Shows the diff between the commit's parent and the commit.
+ #
+ # Cuts out the header and stats from #to_patch and returns only the diff.
+ def to_diff(options = {})
+ diff_from_parent(options).patch
+ end
+
+ # Returns a diff object for the changes from this commit's first parent.
+ # If there is no parent, then the diff is between this commit and an
+ # empty repo. See Repository#diff for keys allowed in the +options+
+ # hash.
+ def diff_from_parent(options = {})
+ Commit.diff_from_parent(raw_commit, options)
+ end
+
+ def has_zero_stats?
+ stats.total.zero?
+ rescue
+ true
+ end
+
+ def no_commit_message
+ "--no commit message"
+ end
+
+ def to_hash
+ serialize_keys.map.with_object({}) do |key, hash|
+ hash[key] = send(key)
+ end
+ end
+
+ def date
+ committed_date
+ end
+
+ def diffs(options = {})
+ DiffCollection.new(diff_from_parent(options), options)
+ end
+
+ def parents
+ raw_commit.parents.map { |c| Gitlab::Git::Commit.new(c) }
+ end
+
+ def tree
+ raw_commit.tree
+ end
+
+ def stats
+ Gitlab::Git::CommitStats.new(self)
+ end
+
+ def to_patch(options = {})
+ begin
+ raw_commit.to_mbox(options)
+ rescue Rugged::InvalidError => ex
+ if ex.message =~ /Commit \w+ is a merge commit/
+ 'Patch format is not currently supported for merge commits.'
+ end
+ end
+ end
+
+ # Get a collection of Rugged::Reference objects for this commit.
+ #
+ # Ex.
+ # commit.ref(repo)
+ #
+ def refs(repo)
+ repo.refs_hash[id]
+ end
+
+ # Get ref names collection
+ #
+ # Ex.
+ # commit.ref_names(repo)
+ #
+ def ref_names(repo)
+ refs(repo).map do |ref|
+ ref.name.sub(%r{^refs/(heads|remotes|tags)/}, "")
+ end
+ end
+
+ def message
+ encode! @message
+ end
+
+ def author_name
+ encode! @author_name
+ end
+
+ def author_email
+ encode! @author_email
+ end
+
+ def committer_name
+ encode! @committer_name
+ end
+
+ def committer_email
+ encode! @committer_email
+ end
+
+ private
+
+ def init_from_hash(hash)
+ raw_commit = hash.symbolize_keys
+
+ serialize_keys.each do |key|
+ send("#{key}=", raw_commit[key])
+ end
+ end
+
+ def init_from_rugged(commit)
+ author = commit.author
+ committer = commit.committer
+
+ @raw_commit = commit
+ @id = commit.oid
+ @message = commit.message
+ @authored_date = author[:time]
+ @committed_date = committer[:time]
+ @author_name = author[:name]
+ @author_email = author[:email]
+ @committer_name = committer[:name]
+ @committer_email = committer[:email]
+ @parent_ids = commit.parents.map(&:oid)
+ end
+
+ def serialize_keys
+ SERIALIZE_KEYS
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/commit_stats.rb b/lib/gitlab/git/commit_stats.rb
new file mode 100644
index 00000000000..e9118bbed0e
--- /dev/null
+++ b/lib/gitlab/git/commit_stats.rb
@@ -0,0 +1,26 @@
+# Gitlab::Git::CommitStats counts the additions, deletions, and total changes
+# in a commit.
+module Gitlab
+ module Git
+ class CommitStats
+ attr_reader :id, :additions, :deletions, :total
+
+ # Instantiate a CommitStats object
+ def initialize(commit)
+ @id = commit.id
+ @additions = 0
+ @deletions = 0
+ @total = 0
+
+ diff = commit.diff_from_parent
+
+ diff.each_patch do |p|
+ # TODO: Use the new Rugged convenience methods when they're released
+ @additions += p.stat[0]
+ @deletions += p.stat[1]
+ @total += p.changes
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/compare.rb b/lib/gitlab/git/compare.rb
new file mode 100644
index 00000000000..696a2acd5e3
--- /dev/null
+++ b/lib/gitlab/git/compare.rb
@@ -0,0 +1,43 @@
+module Gitlab
+ module Git
+ class Compare
+ attr_reader :head, :base, :straight
+
+ def initialize(repository, base, head, straight = false)
+ @repository = repository
+ @straight = straight
+
+ unless base && head
+ @commits = []
+ return
+ end
+
+ @base = Gitlab::Git::Commit.find(repository, base.try(:strip))
+ @head = Gitlab::Git::Commit.find(repository, head.try(:strip))
+
+ @commits = [] unless @base && @head
+ @commits = [] if same
+ end
+
+ def same
+ @base && @head && @base.id == @head.id
+ end
+
+ def commits
+ return @commits if defined?(@commits)
+
+ @commits = Gitlab::Git::Commit.between(@repository, @base.id, @head.id)
+ end
+
+ def diffs(options = {})
+ unless @head && @base
+ return Gitlab::Git::DiffCollection.new([])
+ end
+
+ paths = options.delete(:paths) || []
+ options[:straight] = @straight
+ Gitlab::Git::Diff.between(@repository, @head.id, @base.id, options, *paths)
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb
new file mode 100644
index 00000000000..d6b3b5705a9
--- /dev/null
+++ b/lib/gitlab/git/diff.rb
@@ -0,0 +1,322 @@
+# Gitlab::Git::Diff is a wrapper around native Rugged::Diff object
+module Gitlab
+ module Git
+ class Diff
+ class TimeoutError < StandardError; end
+ include Gitlab::Git::EncodingHelper
+
+ # Diff properties
+ attr_accessor :old_path, :new_path, :a_mode, :b_mode, :diff
+
+ # Stats properties
+ attr_accessor :new_file, :renamed_file, :deleted_file
+
+ attr_accessor :too_large
+
+ # The maximum size of a diff to display.
+ DIFF_SIZE_LIMIT = 102400 # 100 KB
+
+ # The maximum size before a diff is collapsed.
+ DIFF_COLLAPSE_LIMIT = 10240 # 10 KB
+
+ class << self
+ def between(repo, head, base, options = {}, *paths)
+ straight = options.delete(:straight) || false
+
+ common_commit = if straight
+ base
+ else
+ # Only show what is new in the source branch
+ # compared to the target branch, not the other way
+ # around. The linex below with merge_base is
+ # equivalent to diff with three dots (git diff
+ # branch1...branch2) From the git documentation:
+ # "git diff A...B" is equivalent to "git diff
+ # $(git-merge-base A B) B"
+ repo.merge_base_commit(head, base)
+ end
+
+ options ||= {}
+ actual_options = filter_diff_options(options)
+ repo.diff(common_commit, head, actual_options, *paths)
+ end
+
+ # Return a copy of the +options+ hash containing only keys that can be
+ # passed to Rugged. Allowed options are:
+ #
+ # :max_size ::
+ # An integer specifying the maximum byte size of a file before a it
+ # will be treated as binary. The default value is 512MB.
+ #
+ # :context_lines ::
+ # The number of unchanged lines that define the boundary of a hunk
+ # (and to display before and after the actual changes). The default is
+ # 3.
+ #
+ # :interhunk_lines ::
+ # The maximum number of unchanged lines between hunk boundaries before
+ # the hunks will be merged into a one. The default is 0.
+ #
+ # :old_prefix ::
+ # The virtual "directory" to prefix to old filenames in hunk headers.
+ # The default is "a".
+ #
+ # :new_prefix ::
+ # The virtual "directory" to prefix to new filenames in hunk headers.
+ # The default is "b".
+ #
+ # :reverse ::
+ # If true, the sides of the diff will be reversed.
+ #
+ # :force_text ::
+ # If true, all files will be treated as text, disabling binary
+ # attributes & detection.
+ #
+ # :ignore_whitespace ::
+ # If true, all whitespace will be ignored.
+ #
+ # :ignore_whitespace_change ::
+ # If true, changes in amount of whitespace will be ignored.
+ #
+ # :ignore_whitespace_eol ::
+ # If true, whitespace at end of line will be ignored.
+ #
+ # :ignore_submodules ::
+ # if true, submodules will be excluded from the diff completely.
+ #
+ # :patience ::
+ # If true, the "patience diff" algorithm will be used (currenlty
+ # unimplemented).
+ #
+ # :include_ignored ::
+ # If true, ignored files will be included in the diff.
+ #
+ # :include_untracked ::
+ # If true, untracked files will be included in the diff.
+ #
+ # :include_unmodified ::
+ # If true, unmodified files will be included in the diff.
+ #
+ # :recurse_untracked_dirs ::
+ # Even if +:include_untracked+ is true, untracked directories will
+ # only be marked with a single entry in the diff. If this flag is set
+ # to true, all files under ignored directories will be included in the
+ # diff, too.
+ #
+ # :disable_pathspec_match ::
+ # If true, the given +*paths+ will be applied as exact matches,
+ # instead of as fnmatch patterns.
+ #
+ # :deltas_are_icase ::
+ # If true, filename comparisons will be made with case-insensitivity.
+ #
+ # :include_untracked_content ::
+ # if true, untracked content will be contained in the the diff patch
+ # text.
+ #
+ # :skip_binary_check ::
+ # If true, diff deltas will be generated without spending time on
+ # binary detection. This is useful to improve performance in cases
+ # where the actual file content difference is not needed.
+ #
+ # :include_typechange ::
+ # If true, type changes for files will not be interpreted as deletion
+ # of the "old file" and addition of the "new file", but will generate
+ # typechange records.
+ #
+ # :include_typechange_trees ::
+ # Even if +:include_typechange+ is true, blob -> tree changes will
+ # still usually be handled as a deletion of the blob. If this flag is
+ # set to true, blob -> tree changes will be marked as typechanges.
+ #
+ # :ignore_filemode ::
+ # If true, file mode changes will be ignored.
+ #
+ # :recurse_ignored_dirs ::
+ # Even if +:include_ignored+ is true, ignored directories will only be
+ # marked with a single entry in the diff. If this flag is set to true,
+ # all files under ignored directories will be included in the diff,
+ # too.
+ def filter_diff_options(options, default_options = {})
+ allowed_options = [:max_size, :context_lines, :interhunk_lines,
+ :old_prefix, :new_prefix, :reverse, :force_text,
+ :ignore_whitespace, :ignore_whitespace_change,
+ :ignore_whitespace_eol, :ignore_submodules,
+ :patience, :include_ignored, :include_untracked,
+ :include_unmodified, :recurse_untracked_dirs,
+ :disable_pathspec_match, :deltas_are_icase,
+ :include_untracked_content, :skip_binary_check,
+ :include_typechange, :include_typechange_trees,
+ :ignore_filemode, :recurse_ignored_dirs, :paths,
+ :max_files, :max_lines, :all_diffs, :no_collapse]
+
+ if default_options
+ actual_defaults = default_options.dup
+ actual_defaults.keep_if do |key|
+ allowed_options.include?(key)
+ end
+ else
+ actual_defaults = {}
+ end
+
+ if options
+ filtered_opts = options.dup
+ filtered_opts.keep_if do |key|
+ allowed_options.include?(key)
+ end
+ filtered_opts = actual_defaults.merge(filtered_opts)
+ else
+ filtered_opts = actual_defaults
+ end
+
+ filtered_opts
+ end
+ end
+
+ def initialize(raw_diff, collapse: false)
+ case raw_diff
+ when Hash
+ init_from_hash(raw_diff, collapse: collapse)
+ when Rugged::Patch, Rugged::Diff::Delta
+ init_from_rugged(raw_diff, collapse: collapse)
+ when nil
+ raise "Nil as raw diff passed"
+ else
+ raise "Invalid raw diff type: #{raw_diff.class}"
+ end
+ end
+
+ def serialize_keys
+ @serialize_keys ||= %i(diff new_path old_path a_mode b_mode new_file renamed_file deleted_file too_large)
+ end
+
+ def to_hash
+ hash = {}
+
+ keys = serialize_keys
+
+ keys.each do |key|
+ hash[key] = send(key)
+ end
+
+ hash
+ end
+
+ def submodule?
+ a_mode == '160000' || b_mode == '160000'
+ end
+
+ def line_count
+ @line_count ||= Util.count_lines(@diff)
+ end
+
+ def too_large?
+ if @too_large.nil?
+ @too_large = @diff.bytesize >= DIFF_SIZE_LIMIT
+ else
+ @too_large
+ end
+ end
+
+ def collapsible?
+ @diff.bytesize >= DIFF_COLLAPSE_LIMIT
+ end
+
+ def prune_large_diff!
+ @diff = ''
+ @line_count = 0
+ @too_large = true
+ end
+
+ def collapsed?
+ return @collapsed if defined?(@collapsed)
+ false
+ end
+
+ def prune_collapsed_diff!
+ @diff = ''
+ @line_count = 0
+ @collapsed = true
+ end
+
+ private
+
+ def init_from_rugged(rugged, collapse: false)
+ if rugged.is_a?(Rugged::Patch)
+ init_from_rugged_patch(rugged, collapse: collapse)
+ d = rugged.delta
+ else
+ d = rugged
+ end
+
+ @new_path = encode!(d.new_file[:path])
+ @old_path = encode!(d.old_file[:path])
+ @a_mode = d.old_file[:mode].to_s(8)
+ @b_mode = d.new_file[:mode].to_s(8)
+ @new_file = d.added?
+ @renamed_file = d.renamed?
+ @deleted_file = d.deleted?
+ end
+
+ def init_from_rugged_patch(patch, collapse: false)
+ # Don't bother initializing diffs that are too large. If a diff is
+ # binary we're not going to display anything so we skip the size check.
+ return if !patch.delta.binary? && prune_large_patch(patch, collapse)
+
+ @diff = encode!(strip_diff_headers(patch.to_s))
+ end
+
+ def init_from_hash(hash, collapse: false)
+ raw_diff = hash.symbolize_keys
+
+ serialize_keys.each do |key|
+ send(:"#{key}=", raw_diff[key.to_sym])
+ end
+
+ prune_large_diff! if too_large?
+ prune_collapsed_diff! if collapse && collapsible?
+ end
+
+ # If the patch surpasses any of the diff limits it calls the appropiate
+ # prune method and returns true. Otherwise returns false.
+ def prune_large_patch(patch, collapse)
+ size = 0
+
+ patch.each_hunk do |hunk|
+ hunk.each_line do |line|
+ size += line.content.bytesize
+
+ if size >= DIFF_SIZE_LIMIT
+ prune_large_diff!
+ return true
+ end
+ end
+ end
+
+ if collapse && size >= DIFF_COLLAPSE_LIMIT
+ prune_collapsed_diff!
+ return true
+ end
+
+ false
+ end
+
+ # Strip out the information at the beginning of the patch's text to match
+ # Grit's output
+ def strip_diff_headers(diff_text)
+ # Delete everything up to the first line that starts with '---' or
+ # 'Binary'
+ diff_text.sub!(/\A.*?^(---|Binary)/m, '\1')
+
+ if diff_text.start_with?('---', 'Binary')
+ diff_text
+ else
+ # If the diff_text did not contain a line starting with '---' or
+ # 'Binary', return the empty string. No idea why; we are just
+ # preserving behavior from before the refactor.
+ ''
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/diff_collection.rb b/lib/gitlab/git/diff_collection.rb
new file mode 100644
index 00000000000..65e06f5065d
--- /dev/null
+++ b/lib/gitlab/git/diff_collection.rb
@@ -0,0 +1,129 @@
+module Gitlab
+ module Git
+ class DiffCollection
+ include Enumerable
+
+ DEFAULT_LIMITS = { max_files: 100, max_lines: 5000 }.freeze
+
+ def initialize(iterator, options = {})
+ @iterator = iterator
+ @max_files = options.fetch(:max_files, DEFAULT_LIMITS[:max_files])
+ @max_lines = options.fetch(:max_lines, DEFAULT_LIMITS[:max_lines])
+ @max_bytes = @max_files * 5120 # Average 5 KB per file
+ @safe_max_files = [@max_files, DEFAULT_LIMITS[:max_files]].min
+ @safe_max_lines = [@max_lines, DEFAULT_LIMITS[:max_lines]].min
+ @safe_max_bytes = @safe_max_files * 5120 # Average 5 KB per file
+ @all_diffs = !!options.fetch(:all_diffs, false)
+ @no_collapse = !!options.fetch(:no_collapse, true)
+ @deltas_only = !!options.fetch(:deltas_only, false)
+
+ @line_count = 0
+ @byte_count = 0
+ @overflow = false
+ @array = Array.new
+ end
+
+ def each(&block)
+ if @populated
+ # @iterator.each is slower than just iterating the array in place
+ @array.each(&block)
+ elsif @deltas_only
+ each_delta(&block)
+ else
+ each_patch(&block)
+ end
+ end
+
+ def empty?
+ !@iterator.any?
+ end
+
+ def overflow?
+ populate!
+ !!@overflow
+ end
+
+ def size
+ @size ||= count # forces a loop using each method
+ end
+
+ def real_size
+ populate!
+
+ if @overflow
+ "#{size}+"
+ else
+ size.to_s
+ end
+ end
+
+ def decorate!
+ collection = each_with_index do |element, i|
+ @array[i] = yield(element)
+ end
+ @populated = true
+ collection
+ end
+
+ private
+
+ def populate!
+ return if @populated
+
+ each { nil } # force a loop through all diffs
+ @populated = true
+ nil
+ end
+
+ def over_safe_limits?(files)
+ files >= @safe_max_files || @line_count > @safe_max_lines || @byte_count >= @safe_max_bytes
+ end
+
+ def each_delta
+ @iterator.each_delta.with_index do |delta, i|
+ diff = Gitlab::Git::Diff.new(delta)
+
+ yield @array[i] = diff
+ end
+ end
+
+ def each_patch
+ @iterator.each_with_index do |raw, i|
+ # First yield cached Diff instances from @array
+ if @array[i]
+ yield @array[i]
+ next
+ end
+
+ # We have exhausted @array, time to create new Diff instances or stop.
+ break if @overflow
+
+ if !@all_diffs && i >= @max_files
+ @overflow = true
+ break
+ end
+
+ collapse = !@all_diffs && !@no_collapse
+
+ diff = Gitlab::Git::Diff.new(raw, collapse: collapse)
+
+ if collapse && over_safe_limits?(i)
+ diff.prune_collapsed_diff!
+ end
+
+ @line_count += diff.line_count
+ @byte_count += diff.diff.bytesize
+
+ if !@all_diffs && (@line_count >= @max_lines || @byte_count >= @max_bytes)
+ # This last Diff instance pushes us over the lines limit. We stop and
+ # discard it.
+ @overflow = true
+ break
+ end
+
+ yield @array[i] = diff
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/encoding_helper.rb b/lib/gitlab/git/encoding_helper.rb
new file mode 100644
index 00000000000..e57d228e688
--- /dev/null
+++ b/lib/gitlab/git/encoding_helper.rb
@@ -0,0 +1,58 @@
+module Gitlab
+ module Git
+ module EncodingHelper
+ extend self
+
+ # This threshold is carefully tweaked to prevent usage of encodings detected
+ # by CharlockHolmes with low confidence. If CharlockHolmes confidence is low,
+ # we're better off sticking with utf8 encoding.
+ # Reason: git diff can return strings with invalid utf8 byte sequences if it
+ # truncates a diff in the middle of a multibyte character. In this case
+ # CharlockHolmes will try to guess the encoding and will likely suggest an
+ # obscure encoding with low confidence.
+ # There is a lot more info with this merge request:
+ # https://gitlab.com/gitlab-org/gitlab_git/merge_requests/77#note_4754193
+ ENCODING_CONFIDENCE_THRESHOLD = 40
+
+ def encode!(message)
+ return nil unless message.respond_to? :force_encoding
+
+ # if message is utf-8 encoding, just return it
+ message.force_encoding("UTF-8")
+ return message if message.valid_encoding?
+
+ # return message if message type is binary
+ detect = CharlockHolmes::EncodingDetector.detect(message)
+ return message.force_encoding("BINARY") if detect && detect[:type] == :binary
+
+ # force detected encoding if we have sufficient confidence.
+ if detect && detect[:encoding] && detect[:confidence] > ENCODING_CONFIDENCE_THRESHOLD
+ message.force_encoding(detect[:encoding])
+ end
+
+ # encode and clean the bad chars
+ message.replace clean(message)
+ rescue
+ encoding = detect ? detect[:encoding] : "unknown"
+ "--broken encoding: #{encoding}"
+ end
+
+ def encode_utf8(message)
+ detect = CharlockHolmes::EncodingDetector.detect(message)
+ if detect
+ CharlockHolmes::Converter.convert(message, detect[:encoding], 'UTF-8')
+ else
+ clean(message)
+ end
+ end
+
+ private
+
+ def clean(message)
+ message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
+ .encode("UTF-8")
+ .gsub("\0".encode("UTF-8"), "")
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/path_helper.rb b/lib/gitlab/git/path_helper.rb
new file mode 100644
index 00000000000..0148cd8df05
--- /dev/null
+++ b/lib/gitlab/git/path_helper.rb
@@ -0,0 +1,16 @@
+module Gitlab
+ module Git
+ class PathHelper
+ class << self
+ def normalize_path(filename)
+ # Strip all leading slashes so that //foo -> foo
+ filename[/^\/*/] = ''
+
+ # Expand relative paths (e.g. foo/../bar)
+ filename = Pathname.new(filename)
+ filename.relative_path_from(Pathname.new(''))
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/popen.rb b/lib/gitlab/git/popen.rb
new file mode 100644
index 00000000000..df9ca3ee5ac
--- /dev/null
+++ b/lib/gitlab/git/popen.rb
@@ -0,0 +1,26 @@
+require 'open3'
+
+module Gitlab
+ module Git
+ module Popen
+ def popen(cmd, path)
+ unless cmd.is_a?(Array)
+ raise "System commands must be given as an array of strings"
+ end
+
+ vars = { "PWD" => path }
+ options = { chdir: path }
+
+ @cmd_output = ""
+ @cmd_status = 0
+ Open3.popen3(vars, *cmd, options) do |stdin, stdout, stderr, wait_thr|
+ @cmd_output << stdout.read
+ @cmd_output << stderr.read
+ @cmd_status = wait_thr.value.exitstatus
+ end
+
+ [@cmd_output, @cmd_status]
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/ref.rb b/lib/gitlab/git/ref.rb
new file mode 100644
index 00000000000..ee559866e04
--- /dev/null
+++ b/lib/gitlab/git/ref.rb
@@ -0,0 +1,49 @@
+module Gitlab
+ module Git
+ class Ref
+ include Gitlab::Git::EncodingHelper
+
+ # Branch or tag name
+ # without "refs/tags|heads" prefix
+ attr_reader :name
+
+ # Target sha.
+ # Usually it is commit sha but in case
+ # when tag reference on other tag it can be tag sha
+ attr_reader :target
+
+ # Dereferenced target
+ # Commit object to which the Ref points to
+ attr_reader :dereferenced_target
+
+ # Extract branch name from full ref path
+ #
+ # Ex.
+ # Ref.extract_branch_name('refs/heads/master') #=> 'master'
+ def self.extract_branch_name(str)
+ str.gsub(/\Arefs\/heads\//, '')
+ end
+
+ def self.dereference_object(object)
+ object = object.target while object.is_a?(Rugged::Tag::Annotation)
+
+ object
+ end
+
+ def initialize(repository, name, target)
+ encode! name
+ @name = name.gsub(/\Arefs\/(tags|heads)\//, '')
+ @dereferenced_target = Commit.find(repository, target)
+ @target = if target.respond_to?(:oid)
+ target.oid
+ elsif target.respond_to?(:name)
+ target.name
+ elsif target.is_a? String
+ target
+ else
+ nil
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb
new file mode 100644
index 00000000000..0c75e5da356
--- /dev/null
+++ b/lib/gitlab/git/repository.rb
@@ -0,0 +1,1253 @@
+# Gitlab::Git::Repository is a wrapper around native Rugged::Repository object
+require_relative 'encoding_helper'
+require_relative 'path_helper'
+require 'forwardable'
+require 'tempfile'
+require 'forwardable'
+require "rubygems/package"
+
+module Gitlab
+ module Git
+ class Repository
+ extend Forwardable
+ include Gitlab::Git::Popen
+
+ SEARCH_CONTEXT_LINES = 3
+
+ class NoRepository < StandardError; end
+ class InvalidBlobName < StandardError; end
+ class InvalidRef < StandardError; end
+
+ # Full path to repo
+ attr_reader :path
+
+ # Directory name of repo
+ attr_reader :name
+
+ # Rugged repo object
+ attr_reader :rugged
+
+ # 'path' must be the path to a _bare_ git repository, e.g.
+ # /path/to/my-repo.git
+ def initialize(path)
+ @path = path
+ @name = path.split("/").last
+ @attributes = Attributes.new(path)
+ end
+
+ # Default branch in the repository
+ def root_ref
+ @root_ref ||= discover_default_branch
+ end
+
+ # Alias to old method for compatibility
+ def raw
+ rugged
+ end
+
+ def rugged
+ @rugged ||= Rugged::Repository.new(path)
+ rescue Rugged::RepositoryError, Rugged::OSError
+ raise NoRepository.new('no repository for such path')
+ end
+
+ # Returns an Array of branch names
+ # sorted by name ASC
+ def branch_names
+ branches.map(&:name)
+ end
+
+ # Returns an Array of Branches
+ def branches
+ rugged.branches.map do |rugged_ref|
+ begin
+ Branch.new(self, rugged_ref.name, rugged_ref.target)
+ rescue Rugged::ReferenceError
+ # Omit invalid branch
+ end
+ end.compact.sort_by(&:name)
+ end
+
+ def reload_rugged
+ @rugged = nil
+ end
+
+ # Directly find a branch with a simple name (e.g. master)
+ #
+ # force_reload causes a new Rugged repository to be instantiated
+ #
+ # This is to work around a bug in libgit2 that causes in-memory refs to
+ # be stale/invalid when packed-refs is changed.
+ # See https://gitlab.com/gitlab-org/gitlab-ce/issues/15392#note_14538333
+ def find_branch(name, force_reload = false)
+ reload_rugged if force_reload
+
+ rugged_ref = rugged.branches[name]
+ Branch.new(self, rugged_ref.name, rugged_ref.target) if rugged_ref
+ end
+
+ def local_branches
+ rugged.branches.each(:local).map do |branch|
+ Branch.new(self, branch.name, branch.target)
+ end
+ end
+
+ # Returns the number of valid branches
+ def branch_count
+ rugged.branches.count do |ref|
+ begin
+ ref.name && ref.target # ensures the branch is valid
+
+ true
+ rescue Rugged::ReferenceError
+ false
+ end
+ end
+ end
+
+ # Returns an Array of tag names
+ def tag_names
+ rugged.tags.map { |t| t.name }
+ end
+
+ # Returns an Array of Tags
+ def tags
+ rugged.references.each("refs/tags/*").map do |ref|
+ message = nil
+
+ if ref.target.is_a?(Rugged::Tag::Annotation)
+ tag_message = ref.target.message
+
+ if tag_message.respond_to?(:chomp)
+ message = tag_message.chomp
+ end
+ end
+
+ Tag.new(self, ref.name, ref.target, message)
+ end.sort_by(&:name)
+ end
+
+ # Returns true if the given tag exists
+ #
+ # name - The name of the tag as a String.
+ def tag_exists?(name)
+ !!rugged.tags[name]
+ end
+
+ # Returns true if the given branch exists
+ #
+ # name - The name of the branch as a String.
+ def branch_exists?(name)
+ rugged.branches.exists?(name)
+
+ # If the branch name is invalid (e.g. ".foo") Rugged will raise an error.
+ # Whatever code calls this method shouldn't have to deal with that so
+ # instead we just return `false` (which is true since a branch doesn't
+ # exist when it has an invalid name).
+ rescue Rugged::ReferenceError
+ false
+ end
+
+ # Returns an Array of branch and tag names
+ def ref_names
+ branch_names + tag_names
+ end
+
+ # Deprecated. Will be removed in 5.2
+ def heads
+ rugged.references.each("refs/heads/*").map do |head|
+ Gitlab::Git::Ref.new(self, head.name, head.target)
+ end.sort_by(&:name)
+ end
+
+ def has_commits?
+ !empty?
+ end
+
+ def empty?
+ rugged.empty?
+ end
+
+ def bare?
+ rugged.bare?
+ end
+
+ def repo_exists?
+ !!rugged
+ end
+
+ # Discovers the default branch based on the repository's available branches
+ #
+ # - If no branches are present, returns nil
+ # - If one branch is present, returns its name
+ # - If two or more branches are present, returns current HEAD or master or first branch
+ def discover_default_branch
+ names = branch_names
+
+ return if names.empty?
+
+ return names[0] if names.length == 1
+
+ if rugged_head
+ extracted_name = Ref.extract_branch_name(rugged_head.name)
+
+ return extracted_name if names.include?(extracted_name)
+ end
+
+ if names.include?('master')
+ 'master'
+ else
+ names[0]
+ end
+ end
+
+ def rugged_head
+ rugged.head
+ rescue Rugged::ReferenceError
+ nil
+ end
+
+ def archive_metadata(ref, storage_path, format = "tar.gz")
+ ref ||= root_ref
+ commit = Gitlab::Git::Commit.find(self, ref)
+ return {} if commit.nil?
+
+ project_name = self.name.chomp('.git')
+ prefix = "#{project_name}-#{ref}-#{commit.id}"
+
+ {
+ 'RepoPath' => path,
+ 'ArchivePrefix' => prefix,
+ 'ArchivePath' => archive_file_path(prefix, storage_path, format),
+ 'CommitId' => commit.id,
+ }
+ end
+
+ def archive_file_path(name, storage_path, format = "tar.gz")
+ # Build file path
+ return nil unless name
+
+ extension =
+ case format
+ when "tar.bz2", "tbz", "tbz2", "tb2", "bz2"
+ "tar.bz2"
+ when "tar"
+ "tar"
+ when "zip"
+ "zip"
+ else
+ # everything else should fall back to tar.gz
+ "tar.gz"
+ end
+
+ file_name = "#{name}.#{extension}"
+ File.join(storage_path, self.name, file_name)
+ end
+
+ # Return repo size in megabytes
+ def size
+ size = popen(%w(du -sk), path).first.strip.to_i
+ (size.to_f / 1024).round(2)
+ end
+
+ # Returns an array of BlobSnippets for files at the specified +ref+ that
+ # contain the +query+ string.
+ def search_files(query, ref = nil)
+ greps = []
+ ref ||= root_ref
+
+ populated_index(ref).each do |entry|
+ # Discard submodules
+ next if submodule?(entry)
+
+ blob = Blob.raw(self, entry[:oid])
+
+ # Skip binary files
+ next if blob.data.encoding == Encoding::ASCII_8BIT
+
+ blob.load_all_data!(self)
+ greps += build_greps(blob.data, query, ref, entry[:path])
+ end
+
+ greps
+ end
+
+ # Use the Rugged Walker API to build an array of commits.
+ #
+ # Usage.
+ # repo.log(
+ # ref: 'master',
+ # path: 'app/models',
+ # limit: 10,
+ # offset: 5,
+ # after: Time.new(2016, 4, 21, 14, 32, 10)
+ # )
+ #
+ def log(options)
+ default_options = {
+ limit: 10,
+ offset: 0,
+ path: nil,
+ follow: false,
+ skip_merges: false,
+ disable_walk: false,
+ after: nil,
+ before: nil
+ }
+
+ options = default_options.merge(options)
+ options[:limit] ||= 0
+ options[:offset] ||= 0
+ actual_ref = options[:ref] || root_ref
+ begin
+ sha = sha_from_ref(actual_ref)
+ rescue Rugged::OdbError, Rugged::InvalidError, Rugged::ReferenceError
+ # Return an empty array if the ref wasn't found
+ return []
+ end
+
+ if log_using_shell?(options)
+ log_by_shell(sha, options)
+ else
+ log_by_walk(sha, options)
+ end
+ end
+
+ def log_using_shell?(options)
+ options[:path].present? ||
+ options[:disable_walk] ||
+ options[:skip_merges] ||
+ options[:after] ||
+ options[:before]
+ end
+
+ def log_by_walk(sha, options)
+ walk_options = {
+ show: sha,
+ sort: Rugged::SORT_DATE,
+ limit: options[:limit],
+ offset: options[:offset]
+ }
+ Rugged::Walker.walk(rugged, walk_options).to_a
+ end
+
+ def log_by_shell(sha, options)
+ cmd = %W(git --git-dir=#{path} log)
+ cmd += %W(-n #{options[:limit].to_i})
+ cmd += %w(--format=%H)
+ cmd += %W(--skip=#{options[:offset].to_i})
+ cmd += %w(--follow) if options[:follow]
+ cmd += %w(--no-merges) if options[:skip_merges]
+ cmd += %W(--after=#{options[:after].iso8601}) if options[:after]
+ cmd += %W(--before=#{options[:before].iso8601}) if options[:before]
+ cmd += [sha]
+ cmd += %W(-- #{options[:path]}) if options[:path].present?
+
+ raw_output = IO.popen(cmd) {|io| io.read }
+
+ log = raw_output.lines.map do |c|
+ Rugged::Commit.new(rugged, c.strip)
+ end
+
+ log.is_a?(Array) ? log : []
+ end
+
+ def sha_from_ref(ref)
+ rev_parse_target(ref).oid
+ end
+
+ # Return the object that +revspec+ points to. If +revspec+ is an
+ # annotated tag, then return the tag's target instead.
+ def rev_parse_target(revspec)
+ obj = rugged.rev_parse(revspec)
+ Ref.dereference_object(obj)
+ end
+
+ # Return a collection of Rugged::Commits between the two revspec arguments.
+ # See http://git-scm.com/docs/git-rev-parse.html#_specifying_revisions for
+ # a detailed list of valid arguments.
+ def commits_between(from, to)
+ walker = Rugged::Walker.new(rugged)
+ walker.sorting(Rugged::SORT_DATE | Rugged::SORT_REVERSE)
+
+ sha_from = sha_from_ref(from)
+ sha_to = sha_from_ref(to)
+
+ walker.push(sha_to)
+ walker.hide(sha_from)
+
+ commits = walker.to_a
+ walker.reset
+
+ commits
+ end
+
+ # Counts the amount of commits between `from` and `to`.
+ def count_commits_between(from, to)
+ commits_between(from, to).size
+ end
+
+ # Returns the SHA of the most recent common ancestor of +from+ and +to+
+ def merge_base_commit(from, to)
+ rugged.merge_base(from, to)
+ end
+
+ # Return an array of Diff objects that represent the diff
+ # between +from+ and +to+. See Diff::filter_diff_options for the allowed
+ # diff options. The +options+ hash can also include :break_rewrites to
+ # split larger rewrites into delete/add pairs.
+ def diff(from, to, options = {}, *paths)
+ DiffCollection.new(diff_patches(from, to, options, *paths), options)
+ end
+
+ # Returns commits collection
+ #
+ # Ex.
+ # repo.find_commits(
+ # ref: 'master',
+ # max_count: 10,
+ # skip: 5,
+ # order: :date
+ # )
+ #
+ # +options+ is a Hash of optional arguments to git
+ # :ref is the ref from which to begin (SHA1 or name)
+ # :contains is the commit contained by the refs from which to begin (SHA1 or name)
+ # :max_count is the maximum number of commits to fetch
+ # :skip is the number of commits to skip
+ # :order is the commits order and allowed value is :date(default) or :topo
+ #
+ def find_commits(options = {})
+ actual_options = options.dup
+
+ allowed_options = [:ref, :max_count, :skip, :contains, :order]
+
+ actual_options.keep_if do |key|
+ allowed_options.include?(key)
+ end
+
+ default_options = { skip: 0 }
+ actual_options = default_options.merge(actual_options)
+
+ walker = Rugged::Walker.new(rugged)
+
+ if actual_options[:ref]
+ walker.push(rugged.rev_parse_oid(actual_options[:ref]))
+ elsif actual_options[:contains]
+ branches_contains(actual_options[:contains]).each do |branch|
+ walker.push(branch.target_id)
+ end
+ else
+ rugged.references.each("refs/heads/*") do |ref|
+ walker.push(ref.target_id)
+ end
+ end
+
+ if actual_options[:order] == :topo
+ walker.sorting(Rugged::SORT_TOPO)
+ else
+ walker.sorting(Rugged::SORT_DATE)
+ end
+
+ commits = []
+ offset = actual_options[:skip]
+ limit = actual_options[:max_count]
+ walker.each(offset: offset, limit: limit) do |commit|
+ gitlab_commit = Gitlab::Git::Commit.decorate(commit)
+ commits.push(gitlab_commit)
+ end
+
+ walker.reset
+
+ commits
+ rescue Rugged::OdbError
+ []
+ end
+
+ # Returns branch names collection that contains the special commit(SHA1
+ # or name)
+ #
+ # Ex.
+ # repo.branch_names_contains('master')
+ #
+ def branch_names_contains(commit)
+ branches_contains(commit).map { |c| c.name }
+ end
+
+ # Returns branch collection that contains the special commit(SHA1 or name)
+ #
+ # Ex.
+ # repo.branch_names_contains('master')
+ #
+ def branches_contains(commit)
+ commit_obj = rugged.rev_parse(commit)
+ parent = commit_obj.parents.first unless commit_obj.parents.empty?
+
+ walker = Rugged::Walker.new(rugged)
+
+ rugged.branches.select do |branch|
+ walker.push(branch.target_id)
+ walker.hide(parent) if parent
+ result = walker.any? { |c| c.oid == commit_obj.oid }
+ walker.reset
+
+ result
+ end
+ end
+
+ # Get refs hash which key is SHA1
+ # and value is a Rugged::Reference
+ def refs_hash
+ # Initialize only when first call
+ if @refs_hash.nil?
+ @refs_hash = Hash.new { |h, k| h[k] = [] }
+
+ rugged.references.each do |r|
+ # Symbolic/remote references may not have an OID; skip over them
+ target_oid = r.target.try(:oid)
+ if target_oid
+ sha = rev_parse_target(target_oid).oid
+ @refs_hash[sha] << r
+ end
+ end
+ end
+ @refs_hash
+ end
+
+ # Lookup for rugged object by oid or ref name
+ def lookup(oid_or_ref_name)
+ rugged.rev_parse(oid_or_ref_name)
+ end
+
+ # Return hash with submodules info for this repository
+ #
+ # Ex.
+ # {
+ # "rack" => {
+ # "id" => "c67be4624545b4263184c4a0e8f887efd0a66320",
+ # "path" => "rack",
+ # "url" => "git://github.com/chneukirchen/rack.git"
+ # },
+ # "encoding" => {
+ # "id" => ....
+ # }
+ # }
+ #
+ def submodules(ref)
+ commit = rev_parse_target(ref)
+ return {} unless commit
+
+ begin
+ content = blob_content(commit, ".gitmodules")
+ rescue InvalidBlobName
+ return {}
+ end
+
+ parse_gitmodules(commit, content)
+ end
+
+ # Return total commits count accessible from passed ref
+ def commit_count(ref)
+ walker = Rugged::Walker.new(rugged)
+ walker.sorting(Rugged::SORT_TOPO | Rugged::SORT_REVERSE)
+ oid = rugged.rev_parse_oid(ref)
+ walker.push(oid)
+ walker.count
+ end
+
+ # Sets HEAD to the commit specified by +ref+; +ref+ can be a branch or
+ # tag name or a commit SHA. Valid +reset_type+ values are:
+ #
+ # [:soft]
+ # the head will be moved to the commit.
+ # [:mixed]
+ # will trigger a +:soft+ reset, plus the index will be replaced
+ # with the content of the commit tree.
+ # [:hard]
+ # will trigger a +:mixed+ reset and the working directory will be
+ # replaced with the content of the index. (Untracked and ignored files
+ # will be left alone)
+ def reset(ref, reset_type)
+ rugged.reset(ref, reset_type)
+ end
+
+ # Mimic the `git clean` command and recursively delete untracked files.
+ # Valid keys that can be passed in the +options+ hash are:
+ #
+ # :d - Remove untracked directories
+ # :f - Remove untracked directories that are managed by a different
+ # repository
+ # :x - Remove ignored files
+ #
+ # The value in +options+ must evaluate to true for an option to take
+ # effect.
+ #
+ # Examples:
+ #
+ # repo.clean(d: true, f: true) # Enable the -d and -f options
+ #
+ # repo.clean(d: false, x: true) # -x is enabled, -d is not
+ def clean(options = {})
+ strategies = [:remove_untracked]
+ strategies.push(:force) if options[:f]
+ strategies.push(:remove_ignored) if options[:x]
+
+ # TODO: implement this method
+ end
+
+ # Check out the specified ref. Valid options are:
+ #
+ # :b - Create a new branch at +start_point+ and set HEAD to the new
+ # branch.
+ #
+ # * These options are passed to the Rugged::Repository#checkout method:
+ #
+ # :progress ::
+ # A callback that will be executed for checkout progress notifications.
+ # Up to 3 parameters are passed on each execution:
+ #
+ # - The path to the last updated file (or +nil+ on the very first
+ # invocation).
+ # - The number of completed checkout steps.
+ # - The number of total checkout steps to be performed.
+ #
+ # :notify ::
+ # A callback that will be executed for each checkout notification
+ # types specified with +:notify_flags+. Up to 5 parameters are passed
+ # on each execution:
+ #
+ # - An array containing the +:notify_flags+ that caused the callback
+ # execution.
+ # - The path of the current file.
+ # - A hash describing the baseline blob (or +nil+ if it does not
+ # exist).
+ # - A hash describing the target blob (or +nil+ if it does not exist).
+ # - A hash describing the workdir blob (or +nil+ if it does not
+ # exist).
+ #
+ # :strategy ::
+ # A single symbol or an array of symbols representing the strategies
+ # to use when performing the checkout. Possible values are:
+ #
+ # :none ::
+ # Perform a dry run (default).
+ #
+ # :safe ::
+ # Allow safe updates that cannot overwrite uncommitted data.
+ #
+ # :safe_create ::
+ # Allow safe updates plus creation of missing files.
+ #
+ # :force ::
+ # Allow all updates to force working directory to look like index.
+ #
+ # :allow_conflicts ::
+ # Allow checkout to make safe updates even if conflicts are found.
+ #
+ # :remove_untracked ::
+ # Remove untracked files not in index (that are not ignored).
+ #
+ # :remove_ignored ::
+ # Remove ignored files not in index.
+ #
+ # :update_only ::
+ # Only update existing files, don't create new ones.
+ #
+ # :dont_update_index ::
+ # Normally checkout updates index entries as it goes; this stops
+ # that.
+ #
+ # :no_refresh ::
+ # Don't refresh index/config/etc before doing checkout.
+ #
+ # :disable_pathspec_match ::
+ # Treat pathspec as simple list of exact match file paths.
+ #
+ # :skip_locked_directories ::
+ # Ignore directories in use, they will be left empty.
+ #
+ # :skip_unmerged ::
+ # Allow checkout to skip unmerged files (NOT IMPLEMENTED).
+ #
+ # :use_ours ::
+ # For unmerged files, checkout stage 2 from index (NOT IMPLEMENTED).
+ #
+ # :use_theirs ::
+ # For unmerged files, checkout stage 3 from index (NOT IMPLEMENTED).
+ #
+ # :update_submodules ::
+ # Recursively checkout submodules with same options (NOT
+ # IMPLEMENTED).
+ #
+ # :update_submodules_if_changed ::
+ # Recursively checkout submodules if HEAD moved in super repo (NOT
+ # IMPLEMENTED).
+ #
+ # :disable_filters ::
+ # If +true+, filters like CRLF line conversion will be disabled.
+ #
+ # :dir_mode ::
+ # Mode for newly created directories. Default: +0755+.
+ #
+ # :file_mode ::
+ # Mode for newly created files. Default: +0755+ or +0644+.
+ #
+ # :file_open_flags ::
+ # Mode for opening files. Default:
+ # <code>IO::CREAT | IO::TRUNC | IO::WRONLY</code>.
+ #
+ # :notify_flags ::
+ # A single symbol or an array of symbols representing the cases in
+ # which the +:notify+ callback should be invoked. Possible values are:
+ #
+ # :none ::
+ # Do not invoke the +:notify+ callback (default).
+ #
+ # :conflict ::
+ # Invoke the callback for conflicting paths.
+ #
+ # :dirty ::
+ # Invoke the callback for "dirty" files, i.e. those that do not need
+ # an update but no longer match the baseline.
+ #
+ # :updated ::
+ # Invoke the callback for any file that was changed.
+ #
+ # :untracked ::
+ # Invoke the callback for untracked files.
+ #
+ # :ignored ::
+ # Invoke the callback for ignored files.
+ #
+ # :all ::
+ # Invoke the callback for all these cases.
+ #
+ # :paths ::
+ # A glob string or an array of glob strings specifying which paths
+ # should be taken into account for the checkout operation. +nil+ will
+ # match all files. Default: +nil+.
+ #
+ # :baseline ::
+ # A Rugged::Tree that represents the current, expected contents of the
+ # workdir. Default: +HEAD+.
+ #
+ # :target_directory ::
+ # A path to an alternative workdir directory in which the checkout
+ # should be performed.
+ def checkout(ref, options = {}, start_point = "HEAD")
+ if options[:b]
+ rugged.branches.create(ref, start_point)
+ options.delete(:b)
+ end
+ default_options = { strategy: [:recreate_missing, :safe] }
+ rugged.checkout(ref, default_options.merge(options))
+ end
+
+ # Delete the specified branch from the repository
+ def delete_branch(branch_name)
+ rugged.branches.delete(branch_name)
+ end
+
+ # Create a new branch named **ref+ based on **stat_point+, HEAD by default
+ #
+ # Examples:
+ # create_branch("feature")
+ # create_branch("other-feature", "master")
+ def create_branch(ref, start_point = "HEAD")
+ rugged_ref = rugged.branches.create(ref, start_point)
+ Branch.new(self, rugged_ref.name, rugged_ref.target)
+ rescue Rugged::ReferenceError => e
+ raise InvalidRef.new("Branch #{ref} already exists") if e.to_s =~ /'refs\/heads\/#{ref}'/
+ raise InvalidRef.new("Invalid reference #{start_point}")
+ end
+
+ # Return an array of this repository's remote names
+ def remote_names
+ rugged.remotes.each_name.to_a
+ end
+
+ # Delete the specified remote from this repository.
+ def remote_delete(remote_name)
+ rugged.remotes.delete(remote_name)
+ end
+
+ # Add a new remote to this repository. Returns a Rugged::Remote object
+ def remote_add(remote_name, url)
+ rugged.remotes.create(remote_name, url)
+ end
+
+ # Update the specified remote using the values in the +options+ hash
+ #
+ # Example
+ # repo.update_remote("origin", url: "path/to/repo")
+ def remote_update(remote_name, options = {})
+ # TODO: Implement other remote options
+ rugged.remotes.set_url(remote_name, options[:url]) if options[:url]
+ end
+
+ # Fetch the specified remote
+ def fetch(remote_name)
+ rugged.remotes[remote_name].fetch
+ end
+
+ # Push +*refspecs+ to the remote identified by +remote_name+.
+ def push(remote_name, *refspecs)
+ rugged.remotes[remote_name].push(refspecs)
+ end
+
+ # Merge the +source_name+ branch into the +target_name+ branch. This is
+ # equivalent to `git merge --no_ff +source_name+`, since a merge commit
+ # is always created.
+ def merge(source_name, target_name, options = {})
+ our_commit = rugged.branches[target_name].target
+ their_commit = rugged.branches[source_name].target
+
+ raise "Invalid merge target" if our_commit.nil?
+ raise "Invalid merge source" if their_commit.nil?
+
+ merge_index = rugged.merge_commits(our_commit, their_commit)
+ return false if merge_index.conflicts?
+
+ actual_options = options.merge(
+ parents: [our_commit, their_commit],
+ tree: merge_index.write_tree(rugged),
+ update_ref: "refs/heads/#{target_name}"
+ )
+ Rugged::Commit.create(rugged, actual_options)
+ end
+
+ def commits_since(from_date)
+ walker = Rugged::Walker.new(rugged)
+ walker.sorting(Rugged::SORT_DATE | Rugged::SORT_REVERSE)
+
+ rugged.references.each("refs/heads/*") do |ref|
+ walker.push(ref.target_id)
+ end
+
+ commits = []
+ walker.each do |commit|
+ break if commit.author[:time].to_date < from_date
+ commits.push(commit)
+ end
+
+ commits
+ end
+
+ AUTOCRLF_VALUES = {
+ "true" => true,
+ "false" => false,
+ "input" => :input
+ }.freeze
+
+ def autocrlf
+ AUTOCRLF_VALUES[rugged.config['core.autocrlf']]
+ end
+
+ def autocrlf=(value)
+ rugged.config['core.autocrlf'] = AUTOCRLF_VALUES.invert[value]
+ end
+
+ # Create a new directory with a .gitkeep file. Creates
+ # all required nested directories (i.e. mkdir -p behavior)
+ #
+ # options should contain next structure:
+ # author: {
+ # email: 'user@example.com',
+ # name: 'Test User',
+ # time: Time.now
+ # },
+ # committer: {
+ # email: 'user@example.com',
+ # name: 'Test User',
+ # time: Time.now
+ # },
+ # commit: {
+ # message: 'Wow such commit',
+ # branch: 'master',
+ # update_ref: false
+ # }
+ def mkdir(path, options = {})
+ # Check if this directory exists; if it does, then don't bother
+ # adding .gitkeep file.
+ ref = options[:commit][:branch]
+ path = PathHelper.normalize_path(path).to_s
+ rugged_ref = rugged.ref(ref)
+
+ raise InvalidRef.new("Invalid ref") if rugged_ref.nil?
+
+ target_commit = rugged_ref.target
+
+ raise InvalidRef.new("Invalid target commit") if target_commit.nil?
+
+ entry = tree_entry(target_commit, path)
+
+ if entry
+ if entry[:type] == :blob
+ raise InvalidBlobName.new("Directory already exists as a file")
+ else
+ raise InvalidBlobName.new("Directory already exists")
+ end
+ end
+
+ options[:file] = {
+ content: '',
+ path: "#{path}/.gitkeep",
+ update: true
+ }
+
+ Blob.commit(self, options)
+ end
+
+ # Returns result like "git ls-files" , recursive and full file path
+ #
+ # Ex.
+ # repo.ls_files('master')
+ #
+ def ls_files(ref)
+ actual_ref = ref || root_ref
+
+ begin
+ sha_from_ref(actual_ref)
+ rescue Rugged::OdbError, Rugged::InvalidError, Rugged::ReferenceError
+ # Return an empty array if the ref wasn't found
+ return []
+ end
+
+ cmd = %W(git --git-dir=#{path} ls-tree)
+ cmd += %w(-r)
+ cmd += %w(--full-tree)
+ cmd += %w(--full-name)
+ cmd += %W(-- #{actual_ref})
+
+ raw_output = IO.popen(cmd, &:read).split("\n").map do |f|
+ stuff, path = f.split("\t")
+ _mode, type, _sha = stuff.split(" ")
+ path if type == "blob"
+ # Contain only blob type
+ end
+
+ raw_output.compact
+ end
+
+ def copy_gitattributes(ref)
+ begin
+ commit = lookup(ref)
+ rescue Rugged::ReferenceError
+ raise InvalidRef.new("Ref #{ref} is invalid")
+ end
+
+ # Create the paths
+ info_dir_path = File.join(path, 'info')
+ info_attributes_path = File.join(info_dir_path, 'attributes')
+
+ begin
+ # Retrieve the contents of the blob
+ gitattributes_content = blob_content(commit, '.gitattributes')
+ rescue InvalidBlobName
+ # No .gitattributes found. Should now remove any info/attributes and return
+ File.delete(info_attributes_path) if File.exist?(info_attributes_path)
+ return
+ end
+
+ # Create the info directory if needed
+ Dir.mkdir(info_dir_path) unless File.directory?(info_dir_path)
+
+ # Write the contents of the .gitattributes file to info/attributes
+ # Use binary mode to prevent Rails from converting ASCII-8BIT to UTF-8
+ File.open(info_attributes_path, "wb") do |file|
+ file.write(gitattributes_content)
+ end
+ end
+
+ # Checks if the blob should be diffable according to its attributes
+ def diffable?(blob)
+ attributes(blob.path).fetch('diff') { blob.text? }
+ end
+
+ # Returns the Git attributes for the given file path.
+ #
+ # See `Gitlab::Git::Attributes` for more information.
+ def attributes(path)
+ @attributes.attributes(path)
+ end
+
+ private
+
+ # Get the content of a blob for a given commit. If the blob is a commit
+ # (for submodules) then return the blob's OID.
+ def blob_content(commit, blob_name)
+ blob_entry = tree_entry(commit, blob_name)
+
+ unless blob_entry
+ raise InvalidBlobName.new("Invalid blob name: #{blob_name}")
+ end
+
+ case blob_entry[:type]
+ when :commit
+ blob_entry[:oid]
+ when :tree
+ raise InvalidBlobName.new("#{blob_name} is a tree, not a blob")
+ when :blob
+ rugged.lookup(blob_entry[:oid]).content
+ end
+ end
+
+ # Parses the contents of a .gitmodules file and returns a hash of
+ # submodule information.
+ def parse_gitmodules(commit, content)
+ results = {}
+
+ current = ""
+ content.split("\n").each do |txt|
+ if txt =~ /^\s*\[/
+ current = txt.match(/(?<=").*(?=")/)[0]
+ results[current] = {}
+ else
+ next unless results[current]
+ match_data = txt.match(/(\w+)\s*=\s*(.*)/)
+ next unless match_data
+ target = match_data[2].chomp
+ results[current][match_data[1]] = target
+
+ if match_data[1] == "path"
+ begin
+ results[current]["id"] = blob_content(commit, target)
+ rescue InvalidBlobName
+ results.delete(current)
+ end
+ end
+ end
+ end
+
+ results
+ end
+
+ # Returns true if +commit+ introduced changes to +path+, using commit
+ # trees to make that determination. Uses the history simplification
+ # rules that `git log` uses by default, where a commit is omitted if it
+ # is TREESAME to any parent.
+ #
+ # If the +follow+ option is true and the file specified by +path+ was
+ # renamed, then the path value is set to the old path.
+ def commit_touches_path?(commit, path, follow, walker)
+ entry = tree_entry(commit, path)
+
+ if commit.parents.empty?
+ # This is the root commit, return true if it has +path+ in its tree
+ return !entry.nil?
+ end
+
+ num_treesame = 0
+ commit.parents.each do |parent|
+ parent_entry = tree_entry(parent, path)
+
+ # Only follow the first TREESAME parent for merge commits
+ if num_treesame > 0
+ walker.hide(parent)
+ next
+ end
+
+ if entry.nil? && parent_entry.nil?
+ num_treesame += 1
+ elsif entry && parent_entry && entry[:oid] == parent_entry[:oid]
+ num_treesame += 1
+ end
+ end
+
+ case num_treesame
+ when 0
+ detect_rename(commit, commit.parents.first, path) if follow
+ true
+ else false
+ end
+ end
+
+ # Find the entry for +path+ in the tree for +commit+
+ def tree_entry(commit, path)
+ pathname = Pathname.new(path)
+ first = true
+ tmp_entry = nil
+
+ pathname.each_filename do |dir|
+ if first
+ tmp_entry = commit.tree[dir]
+ first = false
+ elsif tmp_entry.nil?
+ return nil
+ else
+ tmp_entry = rugged.lookup(tmp_entry[:oid])
+ return nil unless tmp_entry.type == :tree
+ tmp_entry = tmp_entry[dir]
+ end
+ end
+
+ tmp_entry
+ end
+
+ # Compare +commit+ and +parent+ for +path+. If +path+ is a file and was
+ # renamed in +commit+, then set +path+ to the old filename.
+ def detect_rename(commit, parent, path)
+ diff = parent.diff(commit, paths: [path], disable_pathspec_match: true)
+
+ # If +path+ is a filename, not a directory, then we should only have
+ # one delta. We don't need to follow renames for directories.
+ return nil if diff.each_delta.count > 1
+
+ delta = diff.each_delta.first
+ if delta.added?
+ full_diff = parent.diff(commit)
+ full_diff.find_similar!
+
+ full_diff.each_delta do |full_delta|
+ if full_delta.renamed? && path == full_delta.new_file[:path]
+ # Look for the old path in ancestors
+ path.replace(full_delta.old_file[:path])
+ end
+ end
+ end
+ end
+
+ def archive_to_file(treeish = 'master', filename = 'archive.tar.gz', format = nil, compress_cmd = %w(gzip -n))
+ git_archive_cmd = %W(git --git-dir=#{path} archive)
+
+ # Put files into a directory before archiving
+ prefix = "#{archive_name(treeish)}/"
+ git_archive_cmd << "--prefix=#{prefix}"
+
+ # Format defaults to tar
+ git_archive_cmd << "--format=#{format}" if format
+
+ git_archive_cmd += %W(-- #{treeish})
+
+ open(filename, 'w') do |file|
+ # Create a pipe to act as the '|' in 'git archive ... | gzip'
+ pipe_rd, pipe_wr = IO.pipe
+
+ # Get the compression process ready to accept data from the read end
+ # of the pipe
+ compress_pid = spawn(*nice(compress_cmd), in: pipe_rd, out: file)
+ # The read end belongs to the compression process now; we should
+ # close our file descriptor for it.
+ pipe_rd.close
+
+ # Start 'git archive' and tell it to write into the write end of the
+ # pipe.
+ git_archive_pid = spawn(*nice(git_archive_cmd), out: pipe_wr)
+ # The write end belongs to 'git archive' now; close it.
+ pipe_wr.close
+
+ # When 'git archive' and the compression process are finished, we are
+ # done.
+ Process.waitpid(git_archive_pid)
+ raise "#{git_archive_cmd.join(' ')} failed" unless $?.success?
+ Process.waitpid(compress_pid)
+ raise "#{compress_cmd.join(' ')} failed" unless $?.success?
+ end
+ end
+
+ def nice(cmd)
+ nice_cmd = %w(nice -n 20)
+ unless unsupported_platform?
+ nice_cmd += %w(ionice -c 2 -n 7)
+ end
+ nice_cmd + cmd
+ end
+
+ def unsupported_platform?
+ %w[darwin freebsd solaris].map { |platform| RUBY_PLATFORM.include?(platform) }.any?
+ end
+
+ # Returns true if the index entry has the special file mode that denotes
+ # a submodule.
+ def submodule?(index_entry)
+ index_entry[:mode] == 57344
+ end
+
+ # Return a Rugged::Index that has read from the tree at +ref_name+
+ def populated_index(ref_name)
+ commit = rev_parse_target(ref_name)
+ index = rugged.index
+ index.read_tree(commit.tree)
+ index
+ end
+
+ # Return an array of BlobSnippets for lines in +file_contents+ that match
+ # +query+
+ def build_greps(file_contents, query, ref, filename)
+ # The file_contents string is potentially huge so we make sure to loop
+ # through it one line at a time. This gives Ruby the chance to GC lines
+ # we are not interested in.
+ #
+ # We need to do a little extra work because we are not looking for just
+ # the lines that matches the query, but also for the context
+ # (surrounding lines). We will use Enumerable#each_cons to efficiently
+ # loop through the lines while keeping surrounding lines on hand.
+ #
+ # First, we turn "foo\nbar\nbaz" into
+ # [
+ # [nil, -3], [nil, -2], [nil, -1],
+ # ['foo', 0], ['bar', 1], ['baz', 3],
+ # [nil, 4], [nil, 5], [nil, 6]
+ # ]
+ lines_with_index = Enumerator.new do |yielder|
+ # Yield fake 'before' lines for the first line of file_contents
+ (-SEARCH_CONTEXT_LINES..-1).each do |i|
+ yielder.yield [nil, i]
+ end
+
+ # Yield the actual file contents
+ count = 0
+ file_contents.each_line do |line|
+ line.chomp!
+ yielder.yield [line, count]
+ count += 1
+ end
+
+ # Yield fake 'after' lines for the last line of file_contents
+ (count + 1..count + SEARCH_CONTEXT_LINES).each do |i|
+ yielder.yield [nil, i]
+ end
+ end
+
+ greps = []
+
+ # Loop through consecutive blocks of lines with indexes
+ lines_with_index.each_cons(2 * SEARCH_CONTEXT_LINES + 1) do |line_block|
+ # Get the 'middle' line and index from the block
+ line, _ = line_block[SEARCH_CONTEXT_LINES]
+
+ next unless line && line.match(/#{Regexp.escape(query)}/i)
+
+ # Yay, 'line' contains a match!
+ # Get an array with just the context lines (no indexes)
+ match_with_context = line_block.map(&:first)
+ # Remove 'nil' lines in case we are close to the first or last line
+ match_with_context.compact!
+
+ # Get the line number (1-indexed) of the first context line
+ first_context_line_number = line_block[0][1] + 1
+
+ greps << Gitlab::Git::BlobSnippet.new(
+ ref,
+ match_with_context,
+ first_context_line_number,
+ filename
+ )
+ end
+
+ greps
+ end
+
+ # Return the Rugged patches for the diff between +from+ and +to+.
+ def diff_patches(from, to, options = {}, *paths)
+ options ||= {}
+ break_rewrites = options[:break_rewrites]
+ actual_options = Diff.filter_diff_options(options.merge(paths: paths))
+
+ diff = rugged.diff(from, to, actual_options)
+ diff.find_similar!(break_rewrites: break_rewrites)
+ diff.each_patch
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/tag.rb b/lib/gitlab/git/tag.rb
new file mode 100644
index 00000000000..b5342c3d310
--- /dev/null
+++ b/lib/gitlab/git/tag.rb
@@ -0,0 +1,17 @@
+module Gitlab
+ module Git
+ class Tag < Ref
+ attr_reader :object_sha
+
+ def initialize(repository, name, target, message = nil)
+ super(repository, name, target)
+
+ @message = message
+ end
+
+ def message
+ encode! @message
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/tree.rb b/lib/gitlab/git/tree.rb
new file mode 100644
index 00000000000..76e3f112e2c
--- /dev/null
+++ b/lib/gitlab/git/tree.rb
@@ -0,0 +1,104 @@
+module Gitlab
+ module Git
+ class Tree
+ include Gitlab::Git::EncodingHelper
+
+ attr_accessor :id, :root_id, :name, :path, :type,
+ :mode, :commit_id, :submodule_url
+
+ class << self
+ # Get list of tree objects
+ # for repository based on commit sha and path
+ # Uses rugged for raw objects
+ def where(repository, sha, path = nil)
+ path = nil if path == '' || path == '/'
+
+ commit = repository.lookup(sha)
+ root_tree = commit.tree
+
+ tree = if path
+ id = Tree.find_id_by_path(repository, root_tree.oid, path)
+ if id
+ repository.lookup(id)
+ else
+ []
+ end
+ else
+ root_tree
+ end
+
+ tree.map do |entry|
+ Tree.new(
+ id: entry[:oid],
+ root_id: root_tree.oid,
+ name: entry[:name],
+ type: entry[:type],
+ mode: entry[:filemode],
+ path: path ? File.join(path, entry[:name]) : entry[:name],
+ commit_id: sha,
+ )
+ end
+ end
+
+ # Recursive search of tree id for path
+ #
+ # Ex.
+ # blog/ # oid: 1a
+ # app/ # oid: 2a
+ # models/ # oid: 3a
+ # views/ # oid: 4a
+ #
+ #
+ # Tree.find_id_by_path(repo, '1a', 'app/models') # => '3a'
+ #
+ def find_id_by_path(repository, root_id, path)
+ root_tree = repository.lookup(root_id)
+ path_arr = path.split('/')
+
+ entry = root_tree.find do |entry|
+ entry[:name] == path_arr[0] && entry[:type] == :tree
+ end
+
+ return nil unless entry
+
+ if path_arr.size > 1
+ path_arr.shift
+ find_id_by_path(repository, entry[:oid], path_arr.join('/'))
+ else
+ entry[:oid]
+ end
+ end
+ end
+
+ def initialize(options)
+ %w(id root_id name path type mode commit_id).each do |key|
+ self.send("#{key}=", options[key.to_sym])
+ end
+ end
+
+ def name
+ encode! @name
+ end
+
+ def dir?
+ type == :tree
+ end
+
+ def file?
+ type == :blob
+ end
+
+ def submodule?
+ type == :commit
+ end
+
+ def readme?
+ name =~ /^readme/i
+ end
+
+ def contributing?
+ name =~ /^contributing/i
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/util.rb b/lib/gitlab/git/util.rb
new file mode 100644
index 00000000000..7973da2e8f8
--- /dev/null
+++ b/lib/gitlab/git/util.rb
@@ -0,0 +1,18 @@
+module Gitlab
+ module Git
+ module Util
+ LINE_SEP = "\n".freeze
+
+ def self.count_lines(string)
+ case string[-1]
+ when nil
+ 0
+ when LINE_SEP
+ string.count(LINE_SEP)
+ else
+ string.count(LINE_SEP) + 1
+ end
+ end
+ end
+ end
+end