Merge branch 'rs-absorb-gitlab_git' into 'master'

Absorb gitlab_git Closes #24374 See merge request !8447
author: Douwe Maan <douwe@gitlab.com> 2017-01-05 12:22:25 +0000
committer: Douwe Maan <douwe@gitlab.com> 2017-01-05 12:22:25 +0000
commit: efb8da895fb8d0976f98d0e54e36d9e61dccef89 (patch)
tree: fd62ea90381aef068bd2c7a6fe141e2a000b28e4 /lib
parent: 0a1d1fbd292ee4dac8066973027b084eab437fe7 (diff)
parent: 2e20a71d21408d0a98b392209ff78bade26984bb (diff)
download: gitlab-ce-efb8da895fb8d0976f98d0e54e36d9e61dccef89.tar.gz
18 files changed, 2950 insertions, 0 deletions
diff --git a/lib/gitlab/git/attributes.rb b/lib/gitlab/git/attributes.rb
new file mode 100644
index 00000000000..42140ecc993
--- /dev/null
+++ b/lib/gitlab/git/attributes.rb
@@ -0,0 +1,131 @@
+module Gitlab
+  module Git
+    # Class for parsing Git attribute files and extracting the attributes for
+    # file patterns.
+    #
+    # Unlike Rugged this parser only needs a single IO call (a call to `open`),
+    # vastly reducing the time spent in extracting attributes.
+    #
+    # This class _only_ supports parsing the attributes file located at
+    # `$GIT_DIR/info/attributes` as GitLab doesn't use any other files
+    # (`.gitattributes` is copied to this particular path).
+    #
+    # Basic usage:
+    #
+    #     attributes = Gitlab::Git::Attributes.new(some_repo.path)
+    #
+    #     attributes.attributes('README.md') # => { "eol" => "lf }
+    class Attributes
+      # path - The path to the Git repository.
+      def initialize(path)
+        @path = File.expand_path(path)
+        @patterns = nil
+      end
+
+      # Returns all the Git attributes for the given path.
+      #
+      # path - A path to a file for which to get the attributes.
+      #
+      # Returns a Hash.
+      def attributes(path)
+        full_path = File.join(@path, path)
+
+        patterns.each do |pattern, attrs|
+          return attrs if File.fnmatch?(pattern, full_path)
+        end
+
+        {}
+      end
+
+      # Returns a Hash containing the file patterns and their attributes.
+      def patterns
+        @patterns ||= parse_file
+      end
+
+      # Parses an attribute string.
+      #
+      # These strings can be in the following formats:
+      #
+      #     text      # => { "text" => true }
+      #     -text     # => { "text" => false }
+      #     key=value # => { "key" => "value" }
+      #
+      # string - The string to parse.
+      #
+      # Returns a Hash containing the attributes and their values.
+      def parse_attributes(string)
+        values = {}
+        dash = '-'
+        equal = '='
+        binary = 'binary'
+
+        string.split(/\s+/).each do |chunk|
+          # Data such as "foo = bar" should be treated as "foo" and "bar" being
+          # separate boolean attributes.
+          next if chunk == equal
+
+          key = chunk
+
+          # Input: "-foo"
+          if chunk.start_with?(dash)
+            key = chunk.byteslice(1, chunk.length - 1)
+            value = false
+
+          # Input: "foo=bar"
+          elsif chunk.include?(equal)
+            key, value = chunk.split(equal, 2)
+
+          # Input: "foo"
+          else
+            value = true
+          end
+
+          values[key] = value
+
+          # When the "binary" option is set the "diff" option should be set to
+          # the inverse. If "diff" is later set it should overwrite the
+          # automatically set value.
+          values['diff'] = false if key == binary && value
+        end
+
+        values
+      end
+
+      # Iterates over every line in the attributes file.
+      def each_line
+        full_path = File.join(@path, 'info/attributes')
+
+        return unless File.exist?(full_path)
+
+        File.open(full_path, 'r') do |handle|
+          handle.each_line do |line|
+            break unless line.valid_encoding?
+
+            yield line.strip
+          end
+        end
+      end
+
+      private
+
+      # Parses the Git attributes file.
+      def parse_file
+        pairs = []
+        comment = '#'
+
+        each_line do |line|
+          next if line.start_with?(comment) || line.empty?
+
+          pattern, attrs = line.split(/\s+/, 2)
+
+          parsed = attrs ? parse_attributes(attrs) : {}
+
+          pairs << [File.join(@path, pattern), parsed]
+        end
+
+        # Newer entries take precedence over older entries.
+        pairs.reverse.to_h
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/git/blame.rb b/lib/gitlab/git/blame.rb
new file mode 100644
index 00000000000..46f3969b6e1
--- /dev/null
+++ b/lib/gitlab/git/blame.rb
@@ -0,0 +1,77 @@
+require_relative 'encoding_helper'
+
+module Gitlab
+  module Git
+    class Blame
+      include Gitlab::Git::EncodingHelper
+
+      attr_reader :lines, :blames
+
+      def initialize(repository, sha, path)
+        @repo = repository
+        @sha = sha
+        @path = path
+        @lines = []
+        @blames = load_blame
+      end
+
+      def each
+        @blames.each do |blame|
+          yield(
+            Gitlab::Git::Commit.new(blame.commit),
+            blame.line
+          )
+        end
+      end
+
+      private
+
+      def load_blame
+        cmd = %W(git --git-dir=#{@repo.path} blame -p #{@sha} -- #{@path})
+        # Read in binary mode to ensure ASCII-8BIT
+        raw_output = IO.popen(cmd, 'rb') {|io| io.read }
+        output = encode_utf8(raw_output)
+        process_raw_blame output
+      end
+
+      def process_raw_blame(output)
+        lines, final = [], []
+        info, commits = {}, {}
+
+        # process the output
+        output.split("\n").each do |line|
+          if line[0, 1] == "\t"
+            lines << line[1, line.size]
+          elsif m = /^(\w{40}) (\d+) (\d+)/.match(line)
+            commit_id, old_lineno, lineno = m[1], m[2].to_i, m[3].to_i
+            commits[commit_id] = nil unless commits.key?(commit_id)
+            info[lineno] = [commit_id, old_lineno]
+          end
+        end
+
+        # load all commits in single call
+        commits.keys.each do |key|
+          commits[key] = @repo.lookup(key)
+        end
+
+        # get it together
+        info.sort.each do |lineno, (commit_id, old_lineno)|
+          commit = commits[commit_id]
+          final << BlameLine.new(lineno, old_lineno, commit, lines[lineno - 1])
+        end
+
+        @lines = final
+      end
+    end
+
+    class BlameLine
+      attr_accessor :lineno, :oldlineno, :commit, :line
+      def initialize(lineno, oldlineno, commit, line)
+        @lineno = lineno
+        @oldlineno = oldlineno
+        @commit = commit
+        @line = line
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb
new file mode 100644
index 00000000000..4a623311c14
--- /dev/null
+++ b/lib/gitlab/git/blob.rb
@@ -0,0 +1,333 @@
+require_relative 'encoding_helper'
+require_relative 'path_helper'
+
+module Gitlab
+  module Git
+    class Blob
+      include Linguist::BlobHelper
+      include Gitlab::Git::EncodingHelper
+
+      # This number is the maximum amount of data that we want to display to
+      # the user. We load as much as we can for encoding detection
+      # (Linguist) and LFS pointer parsing. All other cases where we need full
+      # blob data should use load_all_data!.
+      MAX_DATA_DISPLAY_SIZE = 10485760
+
+      attr_accessor :name, :path, :size, :data, :mode, :id, :commit_id, :loaded_size, :binary
+
+      class << self
+        def find(repository, sha, path)
+          commit = repository.lookup(sha)
+          root_tree = commit.tree
+
+          blob_entry = find_entry_by_path(repository, root_tree.oid, path)
+
+          return nil unless blob_entry
+
+          if blob_entry[:type] == :commit
+            submodule_blob(blob_entry, path, sha)
+          else
+            blob = repository.lookup(blob_entry[:oid])
+
+            if blob
+              new(
+                id: blob.oid,
+                name: blob_entry[:name],
+                size: blob.size,
+                data: blob.content(MAX_DATA_DISPLAY_SIZE),
+                mode: blob_entry[:filemode].to_s(8),
+                path: path,
+                commit_id: sha,
+                binary: blob.binary?
+              )
+            end
+          end
+        end
+
+        def raw(repository, sha)
+          blob = repository.lookup(sha)
+
+          new(
+            id: blob.oid,
+            size: blob.size,
+            data: blob.content(MAX_DATA_DISPLAY_SIZE),
+            binary: blob.binary?
+          )
+        end
+
+        # Recursive search of blob id by path
+        #
+        # Ex.
+        #   blog/            # oid: 1a
+        #     app/           # oid: 2a
+        #       models/      # oid: 3a
+        #       file.rb      # oid: 4a
+        #
+        #
+        # Blob.find_entry_by_path(repo, '1a', 'app/file.rb') # => '4a'
+        #
+        def find_entry_by_path(repository, root_id, path)
+          root_tree = repository.lookup(root_id)
+          # Strip leading slashes
+          path[/^\/*/] = ''
+          path_arr = path.split('/')
+
+          entry = root_tree.find do |entry|
+            entry[:name] == path_arr[0]
+          end
+
+          return nil unless entry
+
+          if path_arr.size > 1
+            return nil unless entry[:type] == :tree
+            path_arr.shift
+            find_entry_by_path(repository, entry[:oid], path_arr.join('/'))
+          else
+            [:blob, :commit].include?(entry[:type]) ? entry : nil
+          end
+        end
+
+        def submodule_blob(blob_entry, path, sha)
+          new(
+            id: blob_entry[:oid],
+            name: blob_entry[:name],
+            data: '',
+            path: path,
+            commit_id: sha,
+          )
+        end
+
+        # Commit file in repository and return commit sha
+        #
+        # options should contain next structure:
+        #   file: {
+        #     content: 'Lorem ipsum...',
+        #     path: 'documents/story.txt',
+        #     update: true
+        #   },
+        #   author: {
+        #     email: 'user@example.com',
+        #     name: 'Test User',
+        #     time: Time.now
+        #   },
+        #   committer: {
+        #     email: 'user@example.com',
+        #     name: 'Test User',
+        #     time: Time.now
+        #   },
+        #   commit: {
+        #     message: 'Wow such commit',
+        #     branch: 'master',
+        #     update_ref: false
+        #   }
+        #
+        # rubocop:disable Metrics/AbcSize
+        # rubocop:disable Metrics/CyclomaticComplexity
+        # rubocop:disable Metrics/PerceivedComplexity
+        def commit(repository, options, action = :add)
+          file = options[:file]
+          update = file[:update].nil? ? true : file[:update]
+          author = options[:author]
+          committer = options[:committer]
+          commit = options[:commit]
+          repo = repository.rugged
+          ref = commit[:branch]
+          update_ref = commit[:update_ref].nil? ? true : commit[:update_ref]
+          parents = []
+          mode = 0o100644
+
+          unless ref.start_with?('refs/')
+            ref = 'refs/heads/' + ref
+          end
+
+          path_name = Gitlab::Git::PathHelper.normalize_path(file[:path])
+          # Abort if any invalid characters remain (e.g. ../foo)
+          raise Gitlab::Git::Repository::InvalidBlobName.new("Invalid path") if path_name.each_filename.to_a.include?('..')
+
+          filename = path_name.to_s
+          index = repo.index
+
+          unless repo.empty?
+            rugged_ref = repo.references[ref]
+            raise Gitlab::Git::Repository::InvalidRef.new("Invalid branch name") unless rugged_ref
+            last_commit = rugged_ref.target
+            index.read_tree(last_commit.tree)
+            parents = [last_commit]
+          end
+
+          if action == :remove
+            index.remove(filename)
+          else
+            file_entry = index.get(filename)
+
+            if action == :rename
+              old_path_name = Gitlab::Git::PathHelper.normalize_path(file[:previous_path])
+              old_filename = old_path_name.to_s
+              file_entry = index.get(old_filename)
+              index.remove(old_filename) unless file_entry.blank?
+            end
+
+            if file_entry
+              raise Gitlab::Git::Repository::InvalidBlobName.new("Filename already exists; update not allowed") unless update
+
+              # Preserve the current file mode if one is available
+              mode = file_entry[:mode] if file_entry[:mode]
+            end
+
+            content = file[:content]
+            detect = CharlockHolmes::EncodingDetector.new.detect(content) if content
+
+            unless detect && detect[:type] == :binary
+              # When writing to the repo directly as we are doing here,
+              # the `core.autocrlf` config isn't taken into account.
+              content.gsub!("\r\n", "\n") if repository.autocrlf
+            end
+
+            oid = repo.write(content, :blob)
+            index.add(path: filename, oid: oid, mode: mode)
+          end
+
+          opts = {}
+          opts[:tree] = index.write_tree(repo)
+          opts[:author] = author
+          opts[:committer] = committer
+          opts[:message] = commit[:message]
+          opts[:parents] = parents
+          opts[:update_ref] = ref if update_ref
+
+          Rugged::Commit.create(repo, opts)
+        end
+        # rubocop:enable Metrics/AbcSize
+        # rubocop:enable Metrics/CyclomaticComplexity
+        # rubocop:enable Metrics/PerceivedComplexity
+
+        # Remove file from repository and return commit sha
+        #
+        # options should contain next structure:
+        #   file: {
+        #     path: 'documents/story.txt'
+        #   },
+        #   author: {
+        #     email: 'user@example.com',
+        #     name: 'Test User',
+        #     time: Time.now
+        #   },
+        #   committer: {
+        #     email: 'user@example.com',
+        #     name: 'Test User',
+        #     time: Time.now
+        #   },
+        #   commit: {
+        #     message: 'Remove FILENAME',
+        #     branch: 'master'
+        #   }
+        #
+        def remove(repository, options)
+          commit(repository, options, :remove)
+        end
+
+        # Rename file from repository and return commit sha
+        #
+        # options should contain next structure:
+        #   file: {
+        #     previous_path: 'documents/old_story.txt'
+        #     path: 'documents/story.txt'
+        #     content: 'Lorem ipsum...',
+        #     update: true
+        #   },
+        #   author: {
+        #     email: 'user@example.com',
+        #     name: 'Test User',
+        #     time: Time.now
+        #   },
+        #   committer: {
+        #     email: 'user@example.com',
+        #     name: 'Test User',
+        #     time: Time.now
+        #   },
+        #   commit: {
+        #     message: 'Rename FILENAME',
+        #     branch: 'master'
+        #   }
+        #
+        def rename(repository, options)
+          commit(repository, options, :rename)
+        end
+      end
+
+      def initialize(options)
+        %w(id name path size data mode commit_id binary).each do |key|
+          self.send("#{key}=", options[key.to_sym])
+        end
+
+        @loaded_all_data = false
+        # Retain the actual size before it is encoded
+        @loaded_size = @data.bytesize if @data
+      end
+
+      def binary?
+        @binary.nil? ? super : @binary == true
+      end
+
+      def empty?
+        !data || data == ''
+      end
+
+      def data
+        encode! @data
+      end
+
+      # Load all blob data (not just the first MAX_DATA_DISPLAY_SIZE bytes) into
+      # memory as a Ruby string.
+      def load_all_data!(repository)
+        return if @data == '' # don't mess with submodule blobs
+        return @data if @loaded_all_data
+
+        @loaded_all_data = true
+        @data = repository.lookup(id).content
+        @loaded_size = @data.bytesize
+      end
+
+      def name
+        encode! @name
+      end
+
+      # Valid LFS object pointer is a text file consisting of
+      # version
+      # oid
+      # size
+      # see https://github.com/github/git-lfs/blob/v1.1.0/docs/spec.md#the-pointer
+      def lfs_pointer?
+        has_lfs_version_key? && lfs_oid.present? && lfs_size.present?
+      end
+
+      def lfs_oid
+        if has_lfs_version_key?
+          oid = data.match(/(?<=sha256:)([0-9a-f]{64})/)
+          return oid[1] if oid
+        end
+
+        nil
+      end
+
+      def lfs_size
+        if has_lfs_version_key?
+          size = data.match(/(?<=size )([0-9]+)/)
+          return size[1] if size
+        end
+
+        nil
+      end
+
+      def truncated?
+        size && (size > loaded_size)
+      end
+
+      private
+
+      def has_lfs_version_key?
+        !empty? && text? && data.start_with?("version https://git-lfs.github.com/spec")
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/git/blob_snippet.rb b/lib/gitlab/git/blob_snippet.rb
new file mode 100644
index 00000000000..e98de57fc22
--- /dev/null
+++ b/lib/gitlab/git/blob_snippet.rb
@@ -0,0 +1,32 @@
+module Gitlab
+  module Git
+    class BlobSnippet
+      include Linguist::BlobHelper
+
+      attr_accessor :ref
+      attr_accessor :lines
+      attr_accessor :filename
+      attr_accessor :startline
+
+      def initialize(ref, lines, startline, filename)
+        @ref, @lines, @startline, @filename = ref, lines, startline, filename
+      end
+
+      def data
+        lines.join("\n") if lines
+      end
+
+      def name
+        filename
+      end
+
+      def size
+        data.length
+      end
+
+      def mode
+        nil
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/git/branch.rb b/lib/gitlab/git/branch.rb
new file mode 100644
index 00000000000..586380da94a
--- /dev/null
+++ b/lib/gitlab/git/branch.rb
@@ -0,0 +1,6 @@
+module Gitlab
+  module Git
+    class Branch < Ref
+    end
+  end
+end
diff --git a/lib/gitlab/git/commit.rb b/lib/gitlab/git/commit.rb
new file mode 100644
index 00000000000..d785516ebdd
--- /dev/null
+++ b/lib/gitlab/git/commit.rb
@@ -0,0 +1,310 @@
+# Gitlab::Git::Commit is a wrapper around native Rugged::Commit object
+module Gitlab
+  module Git
+    class Commit
+      include Gitlab::Git::EncodingHelper
+
+      attr_accessor :raw_commit, :head, :refs
+
+      SERIALIZE_KEYS = [
+        :id, :message, :parent_ids,
+        :authored_date, :author_name, :author_email,
+        :committed_date, :committer_name, :committer_email
+      ].freeze
+
+      attr_accessor *SERIALIZE_KEYS # rubocop:disable Lint/AmbiguousOperator
+
+      def ==(other)
+        return false unless other.is_a?(Gitlab::Git::Commit)
+
+        methods = [:message, :parent_ids, :authored_date, :author_name,
+                   :author_email, :committed_date, :committer_name,
+                   :committer_email]
+
+        methods.all? do |method|
+          send(method) == other.send(method)
+        end
+      end
+
+      class << self
+        # Get commits collection
+        #
+        # Ex.
+        #   Commit.where(
+        #     repo: repo,
+        #     ref: 'master',
+        #     path: 'app/models',
+        #     limit: 10,
+        #     offset: 5,
+        #   )
+        #
+        def where(options)
+          repo = options.delete(:repo)
+          raise 'Gitlab::Git::Repository is required' unless repo.respond_to?(:log)
+
+          repo.log(options).map { |c| decorate(c) }
+        end
+
+        # Get single commit
+        #
+        # Ex.
+        #   Commit.find(repo, '29eda46b')
+        #
+        #   Commit.find(repo, 'master')
+        #
+        def find(repo, commit_id = "HEAD")
+          return decorate(commit_id) if commit_id.is_a?(Rugged::Commit)
+
+          obj = if commit_id.is_a?(String)
+                  repo.rev_parse_target(commit_id)
+                else
+                  Gitlab::Git::Ref.dereference_object(commit_id)
+                end
+
+          return nil unless obj.is_a?(Rugged::Commit)
+
+          decorate(obj)
+        rescue Rugged::ReferenceError, Rugged::InvalidError, Rugged::ObjectError, Gitlab::Git::Repository::NoRepository
+          nil
+        end
+
+        # Get last commit for HEAD
+        #
+        # Ex.
+        #   Commit.last(repo)
+        #
+        def last(repo)
+          find(repo)
+        end
+
+        # Get last commit for specified path and ref
+        #
+        # Ex.
+        #   Commit.last_for_path(repo, '29eda46b', 'app/models')
+        #
+        #   Commit.last_for_path(repo, 'master', 'Gemfile')
+        #
+        def last_for_path(repo, ref, path = nil)
+          where(
+            repo: repo,
+            ref: ref,
+            path: path,
+            limit: 1
+          ).first
+        end
+
+        # Get commits between two revspecs
+        # See also #repository.commits_between
+        #
+        # Ex.
+        #   Commit.between(repo, '29eda46b', 'master')
+        #
+        def between(repo, base, head)
+          repo.commits_between(base, head).map do |commit|
+            decorate(commit)
+          end
+        rescue Rugged::ReferenceError
+          []
+        end
+
+        # Delegate Repository#find_commits
+        def find_all(repo, options = {})
+          repo.find_commits(options)
+        end
+
+        def decorate(commit, ref = nil)
+          Gitlab::Git::Commit.new(commit, ref)
+        end
+
+        # Returns a diff object for the changes introduced by +rugged_commit+.
+        # If +rugged_commit+ doesn't have a parent, then the diff is between
+        # this commit and an empty repo.  See Repository#diff for the keys
+        # allowed in the +options+ hash.
+        def diff_from_parent(rugged_commit, options = {})
+          options ||= {}
+          break_rewrites = options[:break_rewrites]
+          actual_options = Gitlab::Git::Diff.filter_diff_options(options)
+
+          diff = if rugged_commit.parents.empty?
+                   rugged_commit.diff(actual_options.merge(reverse: true))
+                 else
+                   rugged_commit.parents[0].diff(rugged_commit, actual_options)
+                 end
+
+          diff.find_similar!(break_rewrites: break_rewrites)
+          diff
+        end
+      end
+
+      def initialize(raw_commit, head = nil)
+        raise "Nil as raw commit passed" unless raw_commit
+
+        if raw_commit.is_a?(Hash)
+          init_from_hash(raw_commit)
+        elsif raw_commit.is_a?(Rugged::Commit)
+          init_from_rugged(raw_commit)
+        else
+          raise "Invalid raw commit type: #{raw_commit.class}"
+        end
+
+        @head = head
+      end
+
+      def sha
+        id
+      end
+
+      def short_id(length = 10)
+        id.to_s[0..length]
+      end
+
+      def safe_message
+        @safe_message ||= message
+      end
+
+      def created_at
+        committed_date
+      end
+
+      # Was this commit committed by a different person than the original author?
+      def different_committer?
+        author_name != committer_name || author_email != committer_email
+      end
+
+      def parent_id
+        parent_ids.first
+      end
+
+      # Shows the diff between the commit's parent and the commit.
+      #
+      # Cuts out the header and stats from #to_patch and returns only the diff.
+      def to_diff(options = {})
+        diff_from_parent(options).patch
+      end
+
+      # Returns a diff object for the changes from this commit's first parent.
+      # If there is no parent, then the diff is between this commit and an
+      # empty repo.  See Repository#diff for keys allowed in the +options+
+      # hash.
+      def diff_from_parent(options = {})
+        Commit.diff_from_parent(raw_commit, options)
+      end
+
+      def has_zero_stats?
+        stats.total.zero?
+      rescue
+        true
+      end
+
+      def no_commit_message
+        "--no commit message"
+      end
+
+      def to_hash
+        serialize_keys.map.with_object({}) do |key, hash|
+          hash[key] = send(key)
+        end
+      end
+
+      def date
+        committed_date
+      end
+
+      def diffs(options = {})
+        Gitlab::Git::DiffCollection.new(diff_from_parent(options), options)
+      end
+
+      def parents
+        raw_commit.parents.map { |c| Gitlab::Git::Commit.new(c) }
+      end
+
+      def tree
+        raw_commit.tree
+      end
+
+      def stats
+        Gitlab::Git::CommitStats.new(self)
+      end
+
+      def to_patch(options = {})
+        begin
+          raw_commit.to_mbox(options)
+        rescue Rugged::InvalidError => ex
+          if ex.message =~ /Commit \w+ is a merge commit/
+            'Patch format is not currently supported for merge commits.'
+          end
+        end
+      end
+
+      # Get a collection of Rugged::Reference objects for this commit.
+      #
+      # Ex.
+      #   commit.ref(repo)
+      #
+      def refs(repo)
+        repo.refs_hash[id]
+      end
+
+      # Get ref names collection
+      #
+      # Ex.
+      #   commit.ref_names(repo)
+      #
+      def ref_names(repo)
+        refs(repo).map do |ref|
+          ref.name.sub(%r{^refs/(heads|remotes|tags)/}, "")
+        end
+      end
+
+      def message
+        encode! @message
+      end
+
+      def author_name
+        encode! @author_name
+      end
+
+      def author_email
+        encode! @author_email
+      end
+
+      def committer_name
+        encode! @committer_name
+      end
+
+      def committer_email
+        encode! @committer_email
+      end
+
+      private
+
+      def init_from_hash(hash)
+        raw_commit = hash.symbolize_keys
+
+        serialize_keys.each do |key|
+          send("#{key}=", raw_commit[key])
+        end
+      end
+
+      def init_from_rugged(commit)
+        author = commit.author
+        committer = commit.committer
+
+        @raw_commit = commit
+        @id = commit.oid
+        @message = commit.message
+        @authored_date = author[:time]
+        @committed_date = committer[:time]
+        @author_name = author[:name]
+        @author_email = author[:email]
+        @committer_name = committer[:name]
+        @committer_email = committer[:email]
+        @parent_ids = commit.parents.map(&:oid)
+      end
+
+      def serialize_keys
+        SERIALIZE_KEYS
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/git/commit_stats.rb b/lib/gitlab/git/commit_stats.rb
new file mode 100644
index 00000000000..e9118bbed0e
--- /dev/null
+++ b/lib/gitlab/git/commit_stats.rb
@@ -0,0 +1,26 @@
+# Gitlab::Git::CommitStats counts the additions, deletions, and total changes
+# in a commit.
+module Gitlab
+  module Git
+    class CommitStats
+      attr_reader :id, :additions, :deletions, :total
+
+      # Instantiate a CommitStats object
+      def initialize(commit)
+        @id = commit.id
+        @additions = 0
+        @deletions = 0
+        @total = 0
+
+        diff = commit.diff_from_parent
+
+        diff.each_patch do |p|
+          # TODO: Use the new Rugged convenience methods when they're released
+          @additions += p.stat[0]
+          @deletions += p.stat[1]
+          @total += p.changes
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/git/compare.rb b/lib/gitlab/git/compare.rb
new file mode 100644
index 00000000000..696a2acd5e3
--- /dev/null
+++ b/lib/gitlab/git/compare.rb
@@ -0,0 +1,43 @@
+module Gitlab
+  module Git
+    class Compare
+      attr_reader :head, :base, :straight
+
+      def initialize(repository, base, head, straight = false)
+        @repository = repository
+        @straight = straight
+
+        unless base && head
+          @commits = []
+          return
+        end
+
+        @base = Gitlab::Git::Commit.find(repository, base.try(:strip))
+        @head = Gitlab::Git::Commit.find(repository, head.try(:strip))
+
+        @commits = [] unless @base && @head
+        @commits = [] if same
+      end
+
+      def same
+        @base && @head && @base.id == @head.id
+      end
+
+      def commits
+        return @commits if defined?(@commits)
+
+        @commits = Gitlab::Git::Commit.between(@repository, @base.id, @head.id)
+      end
+
+      def diffs(options = {})
+        unless @head && @base
+          return Gitlab::Git::DiffCollection.new([])
+        end
+
+        paths = options.delete(:paths) || []
+        options[:straight] = @straight
+        Gitlab::Git::Diff.between(@repository, @head.id, @base.id, options, *paths)
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb
new file mode 100644
index 00000000000..d6b3b5705a9
--- /dev/null
+++ b/lib/gitlab/git/diff.rb
@@ -0,0 +1,322 @@
+# Gitlab::Git::Diff is a wrapper around native Rugged::Diff object
+module Gitlab
+  module Git
+    class Diff
+      class TimeoutError < StandardError; end
+      include Gitlab::Git::EncodingHelper
+
+      # Diff properties
+      attr_accessor :old_path, :new_path, :a_mode, :b_mode, :diff
+
+      # Stats properties
+      attr_accessor :new_file, :renamed_file, :deleted_file
+
+      attr_accessor :too_large
+
+      # The maximum size of a diff to display.
+      DIFF_SIZE_LIMIT = 102400 # 100 KB
+
+      # The maximum size before a diff is collapsed.
+      DIFF_COLLAPSE_LIMIT = 10240 # 10 KB
+
+      class << self
+        def between(repo, head, base, options = {}, *paths)
+          straight = options.delete(:straight) || false
+
+          common_commit = if straight
+                            base
+                          else
+                            # Only show what is new in the source branch
+                            # compared to the target branch, not the other way
+                            # around. The linex below with merge_base is
+                            # equivalent to diff with three dots (git diff
+                            # branch1...branch2) From the git documentation:
+                            # "git diff A...B" is equivalent to "git diff
+                            # $(git-merge-base A B) B"
+                            repo.merge_base_commit(head, base)
+                          end
+
+          options ||= {}
+          actual_options = filter_diff_options(options)
+          repo.diff(common_commit, head, actual_options, *paths)
+        end
+
+        # Return a copy of the +options+ hash containing only keys that can be
+        # passed to Rugged.  Allowed options are:
+        #
+        #  :max_size ::
+        #    An integer specifying the maximum byte size of a file before a it
+        #    will be treated as binary. The default value is 512MB.
+        #
+        #  :context_lines ::
+        #    The number of unchanged lines that define the boundary of a hunk
+        #    (and to display before and after the actual changes). The default is
+        #    3.
+        #
+        #  :interhunk_lines ::
+        #    The maximum number of unchanged lines between hunk boundaries before
+        #    the hunks will be merged into a one. The default is 0.
+        #
+        #  :old_prefix ::
+        #    The virtual "directory" to prefix to old filenames in hunk headers.
+        #    The default is "a".
+        #
+        #  :new_prefix ::
+        #    The virtual "directory" to prefix to new filenames in hunk headers.
+        #    The default is "b".
+        #
+        #  :reverse ::
+        #    If true, the sides of the diff will be reversed.
+        #
+        #  :force_text ::
+        #    If true, all files will be treated as text, disabling binary
+        #    attributes & detection.
+        #
+        #  :ignore_whitespace ::
+        #    If true, all whitespace will be ignored.
+        #
+        #  :ignore_whitespace_change ::
+        #    If true, changes in amount of whitespace will be ignored.
+        #
+        #  :ignore_whitespace_eol ::
+        #    If true, whitespace at end of line will be ignored.
+        #
+        #  :ignore_submodules ::
+        #    if true, submodules will be excluded from the diff completely.
+        #
+        #  :patience ::
+        #    If true, the "patience diff" algorithm will be used (currenlty
+        #    unimplemented).
+        #
+        #  :include_ignored ::
+        #    If true, ignored files will be included in the diff.
+        #
+        #  :include_untracked ::
+        #   If true, untracked files will be included in the diff.
+        #
+        #  :include_unmodified ::
+        #    If true, unmodified files will be included in the diff.
+        #
+        #  :recurse_untracked_dirs ::
+        #    Even if +:include_untracked+ is true, untracked directories will
+        #    only be marked with a single entry in the diff. If this flag is set
+        #    to true, all files under ignored directories will be included in the
+        #    diff, too.
+        #
+        #  :disable_pathspec_match ::
+        #    If true, the given +*paths+ will be applied as exact matches,
+        #    instead of as fnmatch patterns.
+        #
+        #  :deltas_are_icase ::
+        #    If true, filename comparisons will be made with case-insensitivity.
+        #
+        #  :include_untracked_content ::
+        #    if true, untracked content will be contained in the the diff patch
+        #    text.
+        #
+        #  :skip_binary_check ::
+        #    If true, diff deltas will be generated without spending time on
+        #    binary detection. This is useful to improve performance in cases
+        #    where the actual file content difference is not needed.
+        #
+        #  :include_typechange ::
+        #    If true, type changes for files will not be interpreted as deletion
+        #    of the "old file" and addition of the "new file", but will generate
+        #    typechange records.
+        #
+        #  :include_typechange_trees ::
+        #    Even if +:include_typechange+ is true, blob -> tree changes will
+        #    still usually be handled as a deletion of the blob. If this flag is
+        #    set to true, blob -> tree changes will be marked as typechanges.
+        #
+        #  :ignore_filemode ::
+        #    If true, file mode changes will be ignored.
+        #
+        #  :recurse_ignored_dirs ::
+        #    Even if +:include_ignored+ is true, ignored directories will only be
+        #    marked with a single entry in the diff. If this flag is set to true,
+        #    all files under ignored directories will be included in the diff,
+        #    too.
+        def filter_diff_options(options, default_options = {})
+          allowed_options = [:max_size, :context_lines, :interhunk_lines,
+                             :old_prefix, :new_prefix, :reverse, :force_text,
+                             :ignore_whitespace, :ignore_whitespace_change,
+                             :ignore_whitespace_eol, :ignore_submodules,
+                             :patience, :include_ignored, :include_untracked,
+                             :include_unmodified, :recurse_untracked_dirs,
+                             :disable_pathspec_match, :deltas_are_icase,
+                             :include_untracked_content, :skip_binary_check,
+                             :include_typechange, :include_typechange_trees,
+                             :ignore_filemode, :recurse_ignored_dirs, :paths,
+                             :max_files, :max_lines, :all_diffs, :no_collapse]
+
+          if default_options
+            actual_defaults = default_options.dup
+            actual_defaults.keep_if do |key|
+              allowed_options.include?(key)
+            end
+          else
+            actual_defaults = {}
+          end
+
+          if options
+            filtered_opts = options.dup
+            filtered_opts.keep_if do |key|
+              allowed_options.include?(key)
+            end
+            filtered_opts = actual_defaults.merge(filtered_opts)
+          else
+            filtered_opts = actual_defaults
+          end
+
+          filtered_opts
+        end
+      end
+
+      def initialize(raw_diff, collapse: false)
+        case raw_diff
+        when Hash
+          init_from_hash(raw_diff, collapse: collapse)
+        when Rugged::Patch, Rugged::Diff::Delta
+          init_from_rugged(raw_diff, collapse: collapse)
+        when nil
+          raise "Nil as raw diff passed"
+        else
+          raise "Invalid raw diff type: #{raw_diff.class}"
+        end
+      end
+
+      def serialize_keys
+        @serialize_keys ||= %i(diff new_path old_path a_mode b_mode new_file renamed_file deleted_file too_large)
+      end
+
+      def to_hash
+        hash = {}
+
+        keys = serialize_keys
+
+        keys.each do |key|
+          hash[key] = send(key)
+        end
+
+        hash
+      end
+
+      def submodule?
+        a_mode == '160000' || b_mode == '160000'
+      end
+
+      def line_count
+        @line_count ||= Util.count_lines(@diff)
+      end
+
+      def too_large?
+        if @too_large.nil?
+          @too_large = @diff.bytesize >= DIFF_SIZE_LIMIT
+        else
+          @too_large
+        end
+      end
+
+      def collapsible?
+        @diff.bytesize >= DIFF_COLLAPSE_LIMIT
+      end
+
+      def prune_large_diff!
+        @diff = ''
+        @line_count = 0
+        @too_large = true
+      end
+
+      def collapsed?
+        return @collapsed if defined?(@collapsed)
+        false
+      end
+
+      def prune_collapsed_diff!
+        @diff = ''
+        @line_count = 0
+        @collapsed = true
+      end
+
+      private
+
+      def init_from_rugged(rugged, collapse: false)
+        if rugged.is_a?(Rugged::Patch)
+          init_from_rugged_patch(rugged, collapse: collapse)
+          d = rugged.delta
+        else
+          d = rugged
+        end
+
+        @new_path = encode!(d.new_file[:path])
+        @old_path = encode!(d.old_file[:path])
+        @a_mode = d.old_file[:mode].to_s(8)
+        @b_mode = d.new_file[:mode].to_s(8)
+        @new_file = d.added?
+        @renamed_file = d.renamed?
+        @deleted_file = d.deleted?
+      end
+
+      def init_from_rugged_patch(patch, collapse: false)
+        # Don't bother initializing diffs that are too large. If a diff is
+        # binary we're not going to display anything so we skip the size check.
+        return if !patch.delta.binary? && prune_large_patch(patch, collapse)
+
+        @diff = encode!(strip_diff_headers(patch.to_s))
+      end
+
+      def init_from_hash(hash, collapse: false)
+        raw_diff = hash.symbolize_keys
+
+        serialize_keys.each do |key|
+          send(:"#{key}=", raw_diff[key.to_sym])
+        end
+
+        prune_large_diff! if too_large?
+        prune_collapsed_diff! if collapse && collapsible?
+      end
+
+      # If the patch surpasses any of the diff limits it calls the appropiate
+      # prune method and returns true. Otherwise returns false.
+      def prune_large_patch(patch, collapse)
+        size = 0
+
+        patch.each_hunk do |hunk|
+          hunk.each_line do |line|
+            size += line.content.bytesize
+
+            if size >= DIFF_SIZE_LIMIT
+              prune_large_diff!
+              return true
+            end
+          end
+        end
+
+        if collapse && size >= DIFF_COLLAPSE_LIMIT
+          prune_collapsed_diff!
+          return true
+        end
+
+        false
+      end
+
+      # Strip out the information at the beginning of the patch's text to match
+      # Grit's output
+      def strip_diff_headers(diff_text)
+        # Delete everything up to the first line that starts with '---' or
+        # 'Binary'
+        diff_text.sub!(/\A.*?^(---|Binary)/m, '\1')
+
+        if diff_text.start_with?('---', 'Binary')
+          diff_text
+        else
+          # If the diff_text did not contain a line starting with '---' or
+          # 'Binary', return the empty string. No idea why; we are just
+          # preserving behavior from before the refactor.
+          ''
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/git/diff_collection.rb b/lib/gitlab/git/diff_collection.rb
new file mode 100644
index 00000000000..65e06f5065d
--- /dev/null
+++ b/lib/gitlab/git/diff_collection.rb
@@ -0,0 +1,129 @@
+module Gitlab
+  module Git
+    class DiffCollection
+      include Enumerable
+
+      DEFAULT_LIMITS = { max_files: 100, max_lines: 5000 }.freeze
+
+      def initialize(iterator, options = {})
+        @iterator = iterator
+        @max_files = options.fetch(:max_files, DEFAULT_LIMITS[:max_files])
+        @max_lines = options.fetch(:max_lines, DEFAULT_LIMITS[:max_lines])
+        @max_bytes = @max_files * 5120 # Average 5 KB per file
+        @safe_max_files = [@max_files, DEFAULT_LIMITS[:max_files]].min
+        @safe_max_lines = [@max_lines, DEFAULT_LIMITS[:max_lines]].min
+        @safe_max_bytes = @safe_max_files * 5120 # Average 5 KB per file
+        @all_diffs = !!options.fetch(:all_diffs, false)
+        @no_collapse = !!options.fetch(:no_collapse, true)
+        @deltas_only = !!options.fetch(:deltas_only, false)
+
+        @line_count = 0
+        @byte_count = 0
+        @overflow = false
+        @array = Array.new
+      end
+
+      def each(&block)
+        if @populated
+          # @iterator.each is slower than just iterating the array in place
+          @array.each(&block)
+        elsif @deltas_only
+          each_delta(&block)
+        else
+          each_patch(&block)
+        end
+      end
+
+      def empty?
+        !@iterator.any?
+      end
+
+      def overflow?
+        populate!
+        !!@overflow
+      end
+
+      def size
+        @size ||= count # forces a loop using each method
+      end
+
+      def real_size
+        populate!
+
+        if @overflow
+          "#{size}+"
+        else
+          size.to_s
+        end
+      end
+
+      def decorate!
+        collection = each_with_index do |element, i|
+          @array[i] = yield(element)
+        end
+        @populated = true
+        collection
+      end
+
+      private
+
+      def populate!
+        return if @populated
+
+        each { nil } # force a loop through all diffs
+        @populated = true
+        nil
+      end
+
+      def over_safe_limits?(files)
+        files >= @safe_max_files || @line_count > @safe_max_lines || @byte_count >= @safe_max_bytes
+      end
+
+      def each_delta
+        @iterator.each_delta.with_index do |delta, i|
+          diff = Gitlab::Git::Diff.new(delta)
+
+          yield @array[i] = diff
+        end
+      end
+
+      def each_patch
+        @iterator.each_with_index do |raw, i|
+          # First yield cached Diff instances from @array
+          if @array[i]
+            yield @array[i]
+            next
+          end
+
+          # We have exhausted @array, time to create new Diff instances or stop.
+          break if @overflow
+
+          if !@all_diffs && i >= @max_files
+            @overflow = true
+            break
+          end
+
+          collapse = !@all_diffs && !@no_collapse
+
+          diff = Gitlab::Git::Diff.new(raw, collapse: collapse)
+
+          if collapse && over_safe_limits?(i)
+            diff.prune_collapsed_diff!
+          end
+
+          @line_count += diff.line_count
+          @byte_count += diff.diff.bytesize
+
+          if !@all_diffs && (@line_count >= @max_lines || @byte_count >= @max_bytes)
+            # This last Diff instance pushes us over the lines limit. We stop and
+            # discard it.
+            @overflow = true
+            break
+          end
+
+          yield @array[i] = diff
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/git/encoding_helper.rb b/lib/gitlab/git/encoding_helper.rb
new file mode 100644
index 00000000000..e57d228e688
--- /dev/null
+++ b/lib/gitlab/git/encoding_helper.rb
@@ -0,0 +1,58 @@
+module Gitlab
+  module Git
+    module EncodingHelper
+      extend self
+
+      # This threshold is carefully tweaked to prevent usage of encodings detected
+      # by CharlockHolmes with low confidence. If CharlockHolmes confidence is low,
+      # we're better off sticking with utf8 encoding.
+      # Reason: git diff can return strings with invalid utf8 byte sequences if it
+      # truncates a diff in the middle of a multibyte character. In this case
+      # CharlockHolmes will try to guess the encoding and will likely suggest an
+      # obscure encoding with low confidence.
+      # There is a lot more info with this merge request:
+      # https://gitlab.com/gitlab-org/gitlab_git/merge_requests/77#note_4754193
+      ENCODING_CONFIDENCE_THRESHOLD = 40
+
+      def encode!(message)
+        return nil unless message.respond_to? :force_encoding
+
+        # if message is utf-8 encoding, just return it
+        message.force_encoding("UTF-8")
+        return message if message.valid_encoding?
+
+        # return message if message type is binary
+        detect = CharlockHolmes::EncodingDetector.detect(message)
+        return message.force_encoding("BINARY") if detect && detect[:type] == :binary
+
+        # force detected encoding if we have sufficient confidence.
+        if detect && detect[:encoding] && detect[:confidence] > ENCODING_CONFIDENCE_THRESHOLD
+          message.force_encoding(detect[:encoding])
+        end
+
+        # encode and clean the bad chars
+        message.replace clean(message)
+      rescue
+        encoding = detect ? detect[:encoding] : "unknown"
+        "--broken encoding: #{encoding}"
+      end
+
+      def encode_utf8(message)
+        detect = CharlockHolmes::EncodingDetector.detect(message)
+        if detect
+          CharlockHolmes::Converter.convert(message, detect[:encoding], 'UTF-8')
+        else
+          clean(message)
+        end
+      end
+
+      private
+
+      def clean(message)
+        message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
+          .encode("UTF-8")
+          .gsub("\0".encode("UTF-8"), "")
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/git/path_helper.rb b/lib/gitlab/git/path_helper.rb
new file mode 100644
index 00000000000..0148cd8df05
--- /dev/null
+++ b/lib/gitlab/git/path_helper.rb
@@ -0,0 +1,16 @@
+module Gitlab
+  module Git
+    class PathHelper
+      class << self
+        def normalize_path(filename)
+          # Strip all leading slashes so that //foo -> foo
+          filename[/^\/*/] = ''
+
+          # Expand relative paths (e.g. foo/../bar)
+          filename = Pathname.new(filename)
+          filename.relative_path_from(Pathname.new(''))
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/git/popen.rb b/lib/gitlab/git/popen.rb
new file mode 100644
index 00000000000..df9ca3ee5ac
--- /dev/null
+++ b/lib/gitlab/git/popen.rb
@@ -0,0 +1,26 @@
+require 'open3'
+
+module Gitlab
+  module Git
+    module Popen
+      def popen(cmd, path)
+        unless cmd.is_a?(Array)
+          raise "System commands must be given as an array of strings"
+        end
+
+        vars = { "PWD" => path }
+        options = { chdir: path }
+
+        @cmd_output = ""
+        @cmd_status = 0
+        Open3.popen3(vars, *cmd, options) do |stdin, stdout, stderr, wait_thr|
+          @cmd_output << stdout.read
+          @cmd_output << stderr.read
+          @cmd_status = wait_thr.value.exitstatus
+        end
+
+        [@cmd_output, @cmd_status]
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/git/ref.rb b/lib/gitlab/git/ref.rb
new file mode 100644
index 00000000000..37ef6836742
--- /dev/null
+++ b/lib/gitlab/git/ref.rb
@@ -0,0 +1,49 @@
+module Gitlab
+  module Git
+    class Ref
+      include Gitlab::Git::EncodingHelper
+
+      # Branch or tag name
+      # without "refs/tags|heads" prefix
+      attr_reader :name
+
+      # Target sha.
+      # Usually it is commit sha but in case
+      # when tag reference on other tag it can be tag sha
+      attr_reader :target
+
+      # Dereferenced target
+      # Commit object to which the Ref points to
+      attr_reader :dereferenced_target
+
+      # Extract branch name from full ref path
+      #
+      # Ex.
+      #   Ref.extract_branch_name('refs/heads/master') #=> 'master'
+      def self.extract_branch_name(str)
+        str.gsub(/\Arefs\/heads\//, '')
+      end
+
+      def self.dereference_object(object)
+        object = object.target while object.is_a?(Rugged::Tag::Annotation)
+
+        object
+      end
+
+      def initialize(repository, name, target)
+        encode! name
+        @name = name.gsub(/\Arefs\/(tags|heads)\//, '')
+        @dereferenced_target = Gitlab::Git::Commit.find(repository, target)
+        @target = if target.respond_to?(:oid)
+                    target.oid
+                  elsif target.respond_to?(:name)
+                    target.name
+                  elsif target.is_a? String
+                    target
+                  else
+                    nil
+                  end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb
new file mode 100644
index 00000000000..963b326a730
--- /dev/null
+++ b/lib/gitlab/git/repository.rb
@@ -0,0 +1,1253 @@
+# Gitlab::Git::Repository is a wrapper around native Rugged::Repository object
+require_relative 'encoding_helper'
+require_relative 'path_helper'
+require 'forwardable'
+require 'tempfile'
+require 'forwardable'
+require "rubygems/package"
+
+module Gitlab
+  module Git
+    class Repository
+      extend Forwardable
+      include Gitlab::Git::Popen
+
+      SEARCH_CONTEXT_LINES = 3
+
+      class NoRepository < StandardError; end
+      class InvalidBlobName < StandardError; end
+      class InvalidRef < StandardError; end
+
+      # Full path to repo
+      attr_reader :path
+
+      # Directory name of repo
+      attr_reader :name
+
+      # Rugged repo object
+      attr_reader :rugged
+
+      # 'path' must be the path to a _bare_ git repository, e.g.
+      # /path/to/my-repo.git
+      def initialize(path)
+        @path = path
+        @name = path.split("/").last
+        @attributes = Gitlab::Git::Attributes.new(path)
+      end
+
+      # Default branch in the repository
+      def root_ref
+        @root_ref ||= discover_default_branch
+      end
+
+      # Alias to old method for compatibility
+      def raw
+        rugged
+      end
+
+      def rugged
+        @rugged ||= Rugged::Repository.new(path)
+      rescue Rugged::RepositoryError, Rugged::OSError
+        raise NoRepository.new('no repository for such path')
+      end
+
+      # Returns an Array of branch names
+      # sorted by name ASC
+      def branch_names
+        branches.map(&:name)
+      end
+
+      # Returns an Array of Branches
+      def branches
+        rugged.branches.map do |rugged_ref|
+          begin
+            Gitlab::Git::Branch.new(self, rugged_ref.name, rugged_ref.target)
+          rescue Rugged::ReferenceError
+            # Omit invalid branch
+          end
+        end.compact.sort_by(&:name)
+      end
+
+      def reload_rugged
+        @rugged = nil
+      end
+
+      # Directly find a branch with a simple name (e.g. master)
+      #
+      # force_reload causes a new Rugged repository to be instantiated
+      #
+      # This is to work around a bug in libgit2 that causes in-memory refs to
+      # be stale/invalid when packed-refs is changed.
+      # See https://gitlab.com/gitlab-org/gitlab-ce/issues/15392#note_14538333
+      def find_branch(name, force_reload = false)
+        reload_rugged if force_reload
+
+        rugged_ref = rugged.branches[name]
+        Gitlab::Git::Branch.new(self, rugged_ref.name, rugged_ref.target) if rugged_ref
+      end
+
+      def local_branches
+        rugged.branches.each(:local).map do |branch|
+          Gitlab::Git::Branch.new(self, branch.name, branch.target)
+        end
+      end
+
+      # Returns the number of valid branches
+      def branch_count
+        rugged.branches.count do |ref|
+          begin
+            ref.name && ref.target # ensures the branch is valid
+
+            true
+          rescue Rugged::ReferenceError
+            false
+          end
+        end
+      end
+
+      # Returns an Array of tag names
+      def tag_names
+        rugged.tags.map { |t| t.name }
+      end
+
+      # Returns an Array of Tags
+      def tags
+        rugged.references.each("refs/tags/*").map do |ref|
+          message = nil
+
+          if ref.target.is_a?(Rugged::Tag::Annotation)
+            tag_message = ref.target.message
+
+            if tag_message.respond_to?(:chomp)
+              message = tag_message.chomp
+            end
+          end
+
+          Gitlab::Git::Tag.new(self, ref.name, ref.target, message)
+        end.sort_by(&:name)
+      end
+
+      # Returns true if the given tag exists
+      #
+      # name - The name of the tag as a String.
+      def tag_exists?(name)
+        !!rugged.tags[name]
+      end
+
+      # Returns true if the given branch exists
+      #
+      # name - The name of the branch as a String.
+      def branch_exists?(name)
+        rugged.branches.exists?(name)
+
+      # If the branch name is invalid (e.g. ".foo") Rugged will raise an error.
+      # Whatever code calls this method shouldn't have to deal with that so
+      # instead we just return `false` (which is true since a branch doesn't
+      # exist when it has an invalid name).
+      rescue Rugged::ReferenceError
+        false
+      end
+
+      # Returns an Array of branch and tag names
+      def ref_names
+        branch_names + tag_names
+      end
+
+      # Deprecated. Will be removed in 5.2
+      def heads
+        rugged.references.each("refs/heads/*").map do |head|
+          Gitlab::Git::Ref.new(self, head.name, head.target)
+        end.sort_by(&:name)
+      end
+
+      def has_commits?
+        !empty?
+      end
+
+      def empty?
+        rugged.empty?
+      end
+
+      def bare?
+        rugged.bare?
+      end
+
+      def repo_exists?
+        !!rugged
+      end
+
+      # Discovers the default branch based on the repository's available branches
+      #
+      # - If no branches are present, returns nil
+      # - If one branch is present, returns its name
+      # - If two or more branches are present, returns current HEAD or master or first branch
+      def discover_default_branch
+        names = branch_names
+
+        return if names.empty?
+
+        return names[0] if names.length == 1
+
+        if rugged_head
+          extracted_name = Ref.extract_branch_name(rugged_head.name)
+
+          return extracted_name if names.include?(extracted_name)
+        end
+
+        if names.include?('master')
+          'master'
+        else
+          names[0]
+        end
+      end
+
+      def rugged_head
+        rugged.head
+      rescue Rugged::ReferenceError
+        nil
+      end
+
+      def archive_metadata(ref, storage_path, format = "tar.gz")
+        ref ||= root_ref
+        commit = Gitlab::Git::Commit.find(self, ref)
+        return {} if commit.nil?
+
+        project_name = self.name.chomp('.git')
+        prefix = "#{project_name}-#{ref}-#{commit.id}"
+
+        {
+          'RepoPath' => path,
+          'ArchivePrefix' => prefix,
+          'ArchivePath' => archive_file_path(prefix, storage_path, format),
+          'CommitId' => commit.id,
+        }
+      end
+
+      def archive_file_path(name, storage_path, format = "tar.gz")
+        # Build file path
+        return nil unless name
+
+        extension =
+          case format
+          when "tar.bz2", "tbz", "tbz2", "tb2", "bz2"
+            "tar.bz2"
+          when "tar"
+            "tar"
+          when "zip"
+            "zip"
+          else
+            # everything else should fall back to tar.gz
+            "tar.gz"
+          end
+
+        file_name = "#{name}.#{extension}"
+        File.join(storage_path, self.name, file_name)
+      end
+
+      # Return repo size in megabytes
+      def size
+        size = popen(%w(du -sk), path).first.strip.to_i
+        (size.to_f / 1024).round(2)
+      end
+
+      # Returns an array of BlobSnippets for files at the specified +ref+ that
+      # contain the +query+ string.
+      def search_files(query, ref = nil)
+        greps = []
+        ref ||= root_ref
+
+        populated_index(ref).each do |entry|
+          # Discard submodules
+          next if submodule?(entry)
+
+          blob = Gitlab::Git::Blob.raw(self, entry[:oid])
+
+          # Skip binary files
+          next if blob.data.encoding == Encoding::ASCII_8BIT
+
+          blob.load_all_data!(self)
+          greps += build_greps(blob.data, query, ref, entry[:path])
+        end
+
+        greps
+      end
+
+      # Use the Rugged Walker API to build an array of commits.
+      #
+      # Usage.
+      #   repo.log(
+      #     ref: 'master',
+      #     path: 'app/models',
+      #     limit: 10,
+      #     offset: 5,
+      #     after: Time.new(2016, 4, 21, 14, 32, 10)
+      #   )
+      #
+      def log(options)
+        default_options = {
+          limit: 10,
+          offset: 0,
+          path: nil,
+          follow: false,
+          skip_merges: false,
+          disable_walk: false,
+          after: nil,
+          before: nil
+        }
+
+        options = default_options.merge(options)
+        options[:limit] ||= 0
+        options[:offset] ||= 0
+        actual_ref = options[:ref] || root_ref
+        begin
+          sha = sha_from_ref(actual_ref)
+        rescue Rugged::OdbError, Rugged::InvalidError, Rugged::ReferenceError
+          # Return an empty array if the ref wasn't found
+          return []
+        end
+
+        if log_using_shell?(options)
+          log_by_shell(sha, options)
+        else
+          log_by_walk(sha, options)
+        end
+      end
+
+      def log_using_shell?(options)
+        options[:path].present? ||
+          options[:disable_walk] ||
+          options[:skip_merges] ||
+          options[:after] ||
+          options[:before]
+      end
+
+      def log_by_walk(sha, options)
+        walk_options = {
+          show: sha,
+          sort: Rugged::SORT_DATE,
+          limit: options[:limit],
+          offset: options[:offset]
+        }
+        Rugged::Walker.walk(rugged, walk_options).to_a
+      end
+
+      def log_by_shell(sha, options)
+        cmd = %W(git --git-dir=#{path} log)
+        cmd += %W(-n #{options[:limit].to_i})
+        cmd += %w(--format=%H)
+        cmd += %W(--skip=#{options[:offset].to_i})
+        cmd += %w(--follow) if options[:follow]
+        cmd += %w(--no-merges) if options[:skip_merges]
+        cmd += %W(--after=#{options[:after].iso8601}) if options[:after]
+        cmd += %W(--before=#{options[:before].iso8601}) if options[:before]
+        cmd += [sha]
+        cmd += %W(-- #{options[:path]}) if options[:path].present?
+
+        raw_output = IO.popen(cmd) {|io| io.read }
+
+        log = raw_output.lines.map do |c|
+          Rugged::Commit.new(rugged, c.strip)
+        end
+
+        log.is_a?(Array) ? log : []
+      end
+
+      def sha_from_ref(ref)
+        rev_parse_target(ref).oid
+      end
+
+      # Return the object that +revspec+ points to.  If +revspec+ is an
+      # annotated tag, then return the tag's target instead.
+      def rev_parse_target(revspec)
+        obj = rugged.rev_parse(revspec)
+        Ref.dereference_object(obj)
+      end
+
+      # Return a collection of Rugged::Commits between the two revspec arguments.
+      # See http://git-scm.com/docs/git-rev-parse.html#_specifying_revisions for
+      # a detailed list of valid arguments.
+      def commits_between(from, to)
+        walker = Rugged::Walker.new(rugged)
+        walker.sorting(Rugged::SORT_DATE | Rugged::SORT_REVERSE)
+
+        sha_from = sha_from_ref(from)
+        sha_to = sha_from_ref(to)
+
+        walker.push(sha_to)
+        walker.hide(sha_from)
+
+        commits = walker.to_a
+        walker.reset
+
+        commits
+      end
+
+      # Counts the amount of commits between `from` and `to`.
+      def count_commits_between(from, to)
+        commits_between(from, to).size
+      end
+
+      # Returns the SHA of the most recent common ancestor of +from+ and +to+
+      def merge_base_commit(from, to)
+        rugged.merge_base(from, to)
+      end
+
+      # Return an array of Diff objects that represent the diff
+      # between +from+ and +to+.  See Diff::filter_diff_options for the allowed
+      # diff options.  The +options+ hash can also include :break_rewrites to
+      # split larger rewrites into delete/add pairs.
+      def diff(from, to, options = {}, *paths)
+        Gitlab::Git::DiffCollection.new(diff_patches(from, to, options, *paths), options)
+      end
+
+      # Returns commits collection
+      #
+      # Ex.
+      #   repo.find_commits(
+      #     ref: 'master',
+      #     max_count: 10,
+      #     skip: 5,
+      #     order: :date
+      #   )
+      #
+      #   +options+ is a Hash of optional arguments to git
+      #     :ref is the ref from which to begin (SHA1 or name)
+      #     :contains is the commit contained by the refs from which to begin (SHA1 or name)
+      #     :max_count is the maximum number of commits to fetch
+      #     :skip is the number of commits to skip
+      #     :order is the commits order and allowed value is :date(default) or :topo
+      #
+      def find_commits(options = {})
+        actual_options = options.dup
+
+        allowed_options = [:ref, :max_count, :skip, :contains, :order]
+
+        actual_options.keep_if do |key|
+          allowed_options.include?(key)
+        end
+
+        default_options = { skip: 0 }
+        actual_options = default_options.merge(actual_options)
+
+        walker = Rugged::Walker.new(rugged)
+
+        if actual_options[:ref]
+          walker.push(rugged.rev_parse_oid(actual_options[:ref]))
+        elsif actual_options[:contains]
+          branches_contains(actual_options[:contains]).each do |branch|
+            walker.push(branch.target_id)
+          end
+        else
+          rugged.references.each("refs/heads/*") do |ref|
+            walker.push(ref.target_id)
+          end
+        end
+
+        if actual_options[:order] == :topo
+          walker.sorting(Rugged::SORT_TOPO)
+        else
+          walker.sorting(Rugged::SORT_DATE)
+        end
+
+        commits = []
+        offset = actual_options[:skip]
+        limit = actual_options[:max_count]
+        walker.each(offset: offset, limit: limit) do |commit|
+          gitlab_commit = Gitlab::Git::Commit.decorate(commit)
+          commits.push(gitlab_commit)
+        end
+
+        walker.reset
+
+        commits
+      rescue Rugged::OdbError
+        []
+      end
+
+      # Returns branch names collection that contains the special commit(SHA1
+      # or name)
+      #
+      # Ex.
+      #   repo.branch_names_contains('master')
+      #
+      def branch_names_contains(commit)
+        branches_contains(commit).map { |c| c.name }
+      end
+
+      # Returns branch collection that contains the special commit(SHA1 or name)
+      #
+      # Ex.
+      #   repo.branch_names_contains('master')
+      #
+      def branches_contains(commit)
+        commit_obj = rugged.rev_parse(commit)
+        parent = commit_obj.parents.first unless commit_obj.parents.empty?
+
+        walker = Rugged::Walker.new(rugged)
+
+        rugged.branches.select do |branch|
+          walker.push(branch.target_id)
+          walker.hide(parent) if parent
+          result = walker.any? { |c| c.oid == commit_obj.oid }
+          walker.reset
+
+          result
+        end
+      end
+
+      # Get refs hash which key is SHA1
+      # and value is a Rugged::Reference
+      def refs_hash
+        # Initialize only when first call
+        if @refs_hash.nil?
+          @refs_hash = Hash.new { |h, k| h[k] = [] }
+
+          rugged.references.each do |r|
+            # Symbolic/remote references may not have an OID; skip over them
+            target_oid = r.target.try(:oid)
+            if target_oid
+              sha = rev_parse_target(target_oid).oid
+              @refs_hash[sha] << r
+            end
+          end
+        end
+        @refs_hash
+      end
+
+      # Lookup for rugged object by oid or ref name
+      def lookup(oid_or_ref_name)
+        rugged.rev_parse(oid_or_ref_name)
+      end
+
+      # Return hash with submodules info for this repository
+      #
+      # Ex.
+      #   {
+      #     "rack"  => {
+      #       "id" => "c67be4624545b4263184c4a0e8f887efd0a66320",
+      #       "path" => "rack",
+      #       "url" => "git://github.com/chneukirchen/rack.git"
+      #     },
+      #     "encoding" => {
+      #       "id" => ....
+      #     }
+      #   }
+      #
+      def submodules(ref)
+        commit = rev_parse_target(ref)
+        return {} unless commit
+
+        begin
+          content = blob_content(commit, ".gitmodules")
+        rescue InvalidBlobName
+          return {}
+        end
+
+        parse_gitmodules(commit, content)
+      end
+
+      # Return total commits count accessible from passed ref
+      def commit_count(ref)
+        walker = Rugged::Walker.new(rugged)
+        walker.sorting(Rugged::SORT_TOPO | Rugged::SORT_REVERSE)
+        oid = rugged.rev_parse_oid(ref)
+        walker.push(oid)
+        walker.count
+      end
+
+      # Sets HEAD to the commit specified by +ref+; +ref+ can be a branch or
+      # tag name or a commit SHA.  Valid +reset_type+ values are:
+      #
+      #  [:soft]
+      #    the head will be moved to the commit.
+      #  [:mixed]
+      #    will trigger a +:soft+ reset, plus the index will be replaced
+      #    with the content of the commit tree.
+      #  [:hard]
+      #    will trigger a +:mixed+ reset and the working directory will be
+      #    replaced with the content of the index. (Untracked and ignored files
+      #    will be left alone)
+      def reset(ref, reset_type)
+        rugged.reset(ref, reset_type)
+      end
+
+      # Mimic the `git clean` command and recursively delete untracked files.
+      # Valid keys that can be passed in the +options+ hash are:
+      #
+      # :d - Remove untracked directories
+      # :f - Remove untracked directories that are managed by a different
+      #      repository
+      # :x - Remove ignored files
+      #
+      # The value in +options+ must evaluate to true for an option to take
+      # effect.
+      #
+      # Examples:
+      #
+      #   repo.clean(d: true, f: true) # Enable the -d and -f options
+      #
+      #   repo.clean(d: false, x: true) # -x is enabled, -d is not
+      def clean(options = {})
+        strategies = [:remove_untracked]
+        strategies.push(:force) if options[:f]
+        strategies.push(:remove_ignored) if options[:x]
+
+        # TODO: implement this method
+      end
+
+      # Check out the specified ref. Valid options are:
+      #
+      #  :b - Create a new branch at +start_point+ and set HEAD to the new
+      #       branch.
+      #
+      #  * These options are passed to the Rugged::Repository#checkout method:
+      #
+      #  :progress ::
+      #    A callback that will be executed for checkout progress notifications.
+      #    Up to 3 parameters are passed on each execution:
+      #
+      #    - The path to the last updated file (or +nil+ on the very first
+      #      invocation).
+      #    - The number of completed checkout steps.
+      #    - The number of total checkout steps to be performed.
+      #
+      #  :notify ::
+      #    A callback that will be executed for each checkout notification
+      #    types specified with +:notify_flags+. Up to 5 parameters are passed
+      #    on each execution:
+      #
+      #    - An array containing the +:notify_flags+ that caused the callback
+      #      execution.
+      #    - The path of the current file.
+      #    - A hash describing the baseline blob (or +nil+ if it does not
+      #      exist).
+      #    - A hash describing the target blob (or +nil+ if it does not exist).
+      #    - A hash describing the workdir blob (or +nil+ if it does not
+      #      exist).
+      #
+      #  :strategy ::
+      #    A single symbol or an array of symbols representing the strategies
+      #    to use when performing the checkout. Possible values are:
+      #
+      #    :none ::
+      #      Perform a dry run (default).
+      #
+      #    :safe ::
+      #      Allow safe updates that cannot overwrite uncommitted data.
+      #
+      #    :safe_create ::
+      #      Allow safe updates plus creation of missing files.
+      #
+      #    :force ::
+      #      Allow all updates to force working directory to look like index.
+      #
+      #    :allow_conflicts ::
+      #      Allow checkout to make safe updates even if conflicts are found.
+      #
+      #    :remove_untracked ::
+      #      Remove untracked files not in index (that are not ignored).
+      #
+      #    :remove_ignored ::
+      #      Remove ignored files not in index.
+      #
+      #    :update_only ::
+      #      Only update existing files, don't create new ones.
+      #
+      #    :dont_update_index ::
+      #      Normally checkout updates index entries as it goes; this stops
+      #      that.
+      #
+      #    :no_refresh ::
+      #      Don't refresh index/config/etc before doing checkout.
+      #
+      #    :disable_pathspec_match ::
+      #      Treat pathspec as simple list of exact match file paths.
+      #
+      #    :skip_locked_directories ::
+      #      Ignore directories in use, they will be left empty.
+      #
+      #    :skip_unmerged ::
+      #      Allow checkout to skip unmerged files (NOT IMPLEMENTED).
+      #
+      #    :use_ours ::
+      #      For unmerged files, checkout stage 2 from index (NOT IMPLEMENTED).
+      #
+      #    :use_theirs ::
+      #      For unmerged files, checkout stage 3 from index (NOT IMPLEMENTED).
+      #
+      #    :update_submodules ::
+      #      Recursively checkout submodules with same options (NOT
+      #      IMPLEMENTED).
+      #
+      #    :update_submodules_if_changed ::
+      #      Recursively checkout submodules if HEAD moved in super repo (NOT
+      #      IMPLEMENTED).
+      #
+      #  :disable_filters ::
+      #    If +true+, filters like CRLF line conversion will be disabled.
+      #
+      #  :dir_mode ::
+      #    Mode for newly created directories. Default: +0755+.
+      #
+      #  :file_mode ::
+      #    Mode for newly created files. Default: +0755+ or +0644+.
+      #
+      #  :file_open_flags ::
+      #    Mode for opening files. Default:
+      #    <code>IO::CREAT | IO::TRUNC | IO::WRONLY</code>.
+      #
+      #  :notify_flags ::
+      #    A single symbol or an array of symbols representing the cases in
+      #    which the +:notify+ callback should be invoked. Possible values are:
+      #
+      #    :none ::
+      #      Do not invoke the +:notify+ callback (default).
+      #
+      #    :conflict ::
+      #      Invoke the callback for conflicting paths.
+      #
+      #    :dirty ::
+      #      Invoke the callback for "dirty" files, i.e. those that do not need
+      #      an update but no longer match the baseline.
+      #
+      #    :updated ::
+      #      Invoke the callback for any file that was changed.
+      #
+      #    :untracked ::
+      #      Invoke the callback for untracked files.
+      #
+      #    :ignored ::
+      #      Invoke the callback for ignored files.
+      #
+      #    :all ::
+      #      Invoke the callback for all these cases.
+      #
+      #  :paths ::
+      #    A glob string or an array of glob strings specifying which paths
+      #    should be taken into account for the checkout operation. +nil+ will
+      #    match all files.  Default: +nil+.
+      #
+      #  :baseline ::
+      #    A Rugged::Tree that represents the current, expected contents of the
+      #    workdir.  Default: +HEAD+.
+      #
+      #  :target_directory ::
+      #    A path to an alternative workdir directory in which the checkout
+      #    should be performed.
+      def checkout(ref, options = {}, start_point = "HEAD")
+        if options[:b]
+          rugged.branches.create(ref, start_point)
+          options.delete(:b)
+        end
+        default_options = { strategy: [:recreate_missing, :safe] }
+        rugged.checkout(ref, default_options.merge(options))
+      end
+
+      # Delete the specified branch from the repository
+      def delete_branch(branch_name)
+        rugged.branches.delete(branch_name)
+      end
+
+      # Create a new branch named **ref+ based on **stat_point+, HEAD by default
+      #
+      # Examples:
+      #   create_branch("feature")
+      #   create_branch("other-feature", "master")
+      def create_branch(ref, start_point = "HEAD")
+        rugged_ref = rugged.branches.create(ref, start_point)
+        Gitlab::Git::Branch.new(self, rugged_ref.name, rugged_ref.target)
+      rescue Rugged::ReferenceError => e
+        raise InvalidRef.new("Branch #{ref} already exists") if e.to_s =~ /'refs\/heads\/#{ref}'/
+        raise InvalidRef.new("Invalid reference #{start_point}")
+      end
+
+      # Return an array of this repository's remote names
+      def remote_names
+        rugged.remotes.each_name.to_a
+      end
+
+      # Delete the specified remote from this repository.
+      def remote_delete(remote_name)
+        rugged.remotes.delete(remote_name)
+      end
+
+      # Add a new remote to this repository.  Returns a Rugged::Remote object
+      def remote_add(remote_name, url)
+        rugged.remotes.create(remote_name, url)
+      end
+
+      # Update the specified remote using the values in the +options+ hash
+      #
+      # Example
+      # repo.update_remote("origin", url: "path/to/repo")
+      def remote_update(remote_name, options = {})
+        # TODO: Implement other remote options
+        rugged.remotes.set_url(remote_name, options[:url]) if options[:url]
+      end
+
+      # Fetch the specified remote
+      def fetch(remote_name)
+        rugged.remotes[remote_name].fetch
+      end
+
+      # Push +*refspecs+ to the remote identified by +remote_name+.
+      def push(remote_name, *refspecs)
+        rugged.remotes[remote_name].push(refspecs)
+      end
+
+      # Merge the +source_name+ branch into the +target_name+ branch. This is
+      # equivalent to `git merge --no_ff +source_name+`, since a merge commit
+      # is always created.
+      def merge(source_name, target_name, options = {})
+        our_commit = rugged.branches[target_name].target
+        their_commit = rugged.branches[source_name].target
+
+        raise "Invalid merge target" if our_commit.nil?
+        raise "Invalid merge source" if their_commit.nil?
+
+        merge_index = rugged.merge_commits(our_commit, their_commit)
+        return false if merge_index.conflicts?
+
+        actual_options = options.merge(
+          parents: [our_commit, their_commit],
+          tree: merge_index.write_tree(rugged),
+          update_ref: "refs/heads/#{target_name}"
+        )
+        Rugged::Commit.create(rugged, actual_options)
+      end
+
+      def commits_since(from_date)
+        walker = Rugged::Walker.new(rugged)
+        walker.sorting(Rugged::SORT_DATE | Rugged::SORT_REVERSE)
+
+        rugged.references.each("refs/heads/*") do |ref|
+          walker.push(ref.target_id)
+        end
+
+        commits = []
+        walker.each do |commit|
+          break if commit.author[:time].to_date < from_date
+          commits.push(commit)
+        end
+
+        commits
+      end
+
+      AUTOCRLF_VALUES = {
+        "true" => true,
+        "false" => false,
+        "input" => :input
+      }.freeze
+
+      def autocrlf
+        AUTOCRLF_VALUES[rugged.config['core.autocrlf']]
+      end
+
+      def autocrlf=(value)
+        rugged.config['core.autocrlf'] = AUTOCRLF_VALUES.invert[value]
+      end
+
+      # Create a new directory with a .gitkeep file. Creates
+      # all required nested directories (i.e. mkdir -p behavior)
+      #
+      # options should contain next structure:
+      #   author: {
+      #     email: 'user@example.com',
+      #     name: 'Test User',
+      #     time: Time.now
+      #   },
+      #   committer: {
+      #     email: 'user@example.com',
+      #     name: 'Test User',
+      #     time: Time.now
+      #   },
+      #   commit: {
+      #     message: 'Wow such commit',
+      #     branch: 'master',
+      #     update_ref: false
+      #   }
+      def mkdir(path, options = {})
+        # Check if this directory exists; if it does, then don't bother
+        # adding .gitkeep file.
+        ref = options[:commit][:branch]
+        path = Gitlab::Git::PathHelper.normalize_path(path).to_s
+        rugged_ref = rugged.ref(ref)
+
+        raise InvalidRef.new("Invalid ref") if rugged_ref.nil?
+
+        target_commit = rugged_ref.target
+
+        raise InvalidRef.new("Invalid target commit") if target_commit.nil?
+
+        entry = tree_entry(target_commit, path)
+
+        if entry
+          if entry[:type] == :blob
+            raise InvalidBlobName.new("Directory already exists as a file")
+          else
+            raise InvalidBlobName.new("Directory already exists")
+          end
+        end
+
+        options[:file] = {
+          content: '',
+          path: "#{path}/.gitkeep",
+          update: true
+        }
+
+        Gitlab::Git::Blob.commit(self, options)
+      end
+
+      # Returns result like "git ls-files" , recursive and full file path
+      #
+      # Ex.
+      #   repo.ls_files('master')
+      #
+      def ls_files(ref)
+        actual_ref = ref || root_ref
+
+        begin
+          sha_from_ref(actual_ref)
+        rescue Rugged::OdbError, Rugged::InvalidError, Rugged::ReferenceError
+          # Return an empty array if the ref wasn't found
+          return []
+        end
+
+        cmd = %W(git --git-dir=#{path} ls-tree)
+        cmd += %w(-r)
+        cmd += %w(--full-tree)
+        cmd += %w(--full-name)
+        cmd += %W(-- #{actual_ref})
+
+        raw_output = IO.popen(cmd, &:read).split("\n").map do |f|
+          stuff, path = f.split("\t")
+          _mode, type, _sha = stuff.split(" ")
+          path if type == "blob"
+          # Contain only blob type
+        end
+
+        raw_output.compact
+      end
+
+      def copy_gitattributes(ref)
+        begin
+          commit = lookup(ref)
+        rescue Rugged::ReferenceError
+          raise InvalidRef.new("Ref #{ref} is invalid")
+        end
+
+        # Create the paths
+        info_dir_path = File.join(path, 'info')
+        info_attributes_path = File.join(info_dir_path, 'attributes')
+
+        begin
+          # Retrieve the contents of the blob
+          gitattributes_content = blob_content(commit, '.gitattributes')
+        rescue InvalidBlobName
+          # No .gitattributes found. Should now remove any info/attributes and return
+          File.delete(info_attributes_path) if File.exist?(info_attributes_path)
+          return
+        end
+
+        # Create the info directory if needed
+        Dir.mkdir(info_dir_path) unless File.directory?(info_dir_path)
+
+        # Write the contents of the .gitattributes file to info/attributes
+        # Use binary mode to prevent Rails from converting ASCII-8BIT to UTF-8
+        File.open(info_attributes_path, "wb") do |file|
+          file.write(gitattributes_content)
+        end
+      end
+
+      # Checks if the blob should be diffable according to its attributes
+      def diffable?(blob)
+        attributes(blob.path).fetch('diff') { blob.text? }
+      end
+
+      # Returns the Git attributes for the given file path.
+      #
+      # See `Gitlab::Git::Attributes` for more information.
+      def attributes(path)
+        @attributes.attributes(path)
+      end
+
+      private
+
+      # Get the content of a blob for a given commit.  If the blob is a commit
+      # (for submodules) then return the blob's OID.
+      def blob_content(commit, blob_name)
+        blob_entry = tree_entry(commit, blob_name)
+
+        unless blob_entry
+          raise InvalidBlobName.new("Invalid blob name: #{blob_name}")
+        end
+
+        case blob_entry[:type]
+        when :commit
+          blob_entry[:oid]
+        when :tree
+          raise InvalidBlobName.new("#{blob_name} is a tree, not a blob")
+        when :blob
+          rugged.lookup(blob_entry[:oid]).content
+        end
+      end
+
+      # Parses the contents of a .gitmodules file and returns a hash of
+      # submodule information.
+      def parse_gitmodules(commit, content)
+        results = {}
+
+        current = ""
+        content.split("\n").each do |txt|
+          if txt =~ /^\s*\[/
+            current = txt.match(/(?<=").*(?=")/)[0]
+            results[current] = {}
+          else
+            next unless results[current]
+            match_data = txt.match(/(\w+)\s*=\s*(.*)/)
+            next unless match_data
+            target = match_data[2].chomp
+            results[current][match_data[1]] = target
+
+            if match_data[1] == "path"
+              begin
+                results[current]["id"] = blob_content(commit, target)
+              rescue InvalidBlobName
+                results.delete(current)
+              end
+            end
+          end
+        end
+
+        results
+      end
+
+      # Returns true if +commit+ introduced changes to +path+, using commit
+      # trees to make that determination.  Uses the history simplification
+      # rules that `git log` uses by default, where a commit is omitted if it
+      # is TREESAME to any parent.
+      #
+      # If the +follow+ option is true and the file specified by +path+ was
+      # renamed, then the path value is set to the old path.
+      def commit_touches_path?(commit, path, follow, walker)
+        entry = tree_entry(commit, path)
+
+        if commit.parents.empty?
+          # This is the root commit, return true if it has +path+ in its tree
+          return !entry.nil?
+        end
+
+        num_treesame = 0
+        commit.parents.each do |parent|
+          parent_entry = tree_entry(parent, path)
+
+          # Only follow the first TREESAME parent for merge commits
+          if num_treesame > 0
+            walker.hide(parent)
+            next
+          end
+
+          if entry.nil? && parent_entry.nil?
+            num_treesame += 1
+          elsif entry && parent_entry && entry[:oid] == parent_entry[:oid]
+            num_treesame += 1
+          end
+        end
+
+        case num_treesame
+        when 0
+          detect_rename(commit, commit.parents.first, path) if follow
+          true
+        else false
+        end
+      end
+
+      # Find the entry for +path+ in the tree for +commit+
+      def tree_entry(commit, path)
+        pathname = Pathname.new(path)
+        first = true
+        tmp_entry = nil
+
+        pathname.each_filename do |dir|
+          if first
+            tmp_entry = commit.tree[dir]
+            first = false
+          elsif tmp_entry.nil?
+            return nil
+          else
+            tmp_entry = rugged.lookup(tmp_entry[:oid])
+            return nil unless tmp_entry.type == :tree
+            tmp_entry = tmp_entry[dir]
+          end
+        end
+
+        tmp_entry
+      end
+
+      # Compare +commit+ and +parent+ for +path+.  If +path+ is a file and was
+      # renamed in +commit+, then set +path+ to the old filename.
+      def detect_rename(commit, parent, path)
+        diff = parent.diff(commit, paths: [path], disable_pathspec_match: true)
+
+        # If +path+ is a filename, not a directory, then we should only have
+        # one delta.  We don't need to follow renames for directories.
+        return nil if diff.each_delta.count > 1
+
+        delta = diff.each_delta.first
+        if delta.added?
+          full_diff = parent.diff(commit)
+          full_diff.find_similar!
+
+          full_diff.each_delta do |full_delta|
+            if full_delta.renamed? && path == full_delta.new_file[:path]
+              # Look for the old path in ancestors
+              path.replace(full_delta.old_file[:path])
+            end
+          end
+        end
+      end
+
+      def archive_to_file(treeish = 'master', filename = 'archive.tar.gz', format = nil, compress_cmd = %w(gzip -n))
+        git_archive_cmd = %W(git --git-dir=#{path} archive)
+
+        # Put files into a directory before archiving
+        prefix = "#{archive_name(treeish)}/"
+        git_archive_cmd << "--prefix=#{prefix}"
+
+        # Format defaults to tar
+        git_archive_cmd << "--format=#{format}" if format
+
+        git_archive_cmd += %W(-- #{treeish})
+
+        open(filename, 'w') do |file|
+          # Create a pipe to act as the '|' in 'git archive ... | gzip'
+          pipe_rd, pipe_wr = IO.pipe
+
+          # Get the compression process ready to accept data from the read end
+          # of the pipe
+          compress_pid = spawn(*nice(compress_cmd), in: pipe_rd, out: file)
+          # The read end belongs to the compression process now; we should
+          # close our file descriptor for it.
+          pipe_rd.close
+
+          # Start 'git archive' and tell it to write into the write end of the
+          # pipe.
+          git_archive_pid = spawn(*nice(git_archive_cmd), out: pipe_wr)
+          # The write end belongs to 'git archive' now; close it.
+          pipe_wr.close
+
+          # When 'git archive' and the compression process are finished, we are
+          # done.
+          Process.waitpid(git_archive_pid)
+          raise "#{git_archive_cmd.join(' ')} failed" unless $?.success?
+          Process.waitpid(compress_pid)
+          raise "#{compress_cmd.join(' ')} failed" unless $?.success?
+        end
+      end
+
+      def nice(cmd)
+        nice_cmd = %w(nice -n 20)
+        unless unsupported_platform?
+          nice_cmd += %w(ionice -c 2 -n 7)
+        end
+        nice_cmd + cmd
+      end
+
+      def unsupported_platform?
+        %w[darwin freebsd solaris].map { |platform| RUBY_PLATFORM.include?(platform) }.any?
+      end
+
+      # Returns true if the index entry has the special file mode that denotes
+      # a submodule.
+      def submodule?(index_entry)
+        index_entry[:mode] == 57344
+      end
+
+      # Return a Rugged::Index that has read from the tree at +ref_name+
+      def populated_index(ref_name)
+        commit = rev_parse_target(ref_name)
+        index = rugged.index
+        index.read_tree(commit.tree)
+        index
+      end
+
+      # Return an array of BlobSnippets for lines in +file_contents+ that match
+      # +query+
+      def build_greps(file_contents, query, ref, filename)
+        # The file_contents string is potentially huge so we make sure to loop
+        # through it one line at a time. This gives Ruby the chance to GC lines
+        # we are not interested in.
+        #
+        # We need to do a little extra work because we are not looking for just
+        # the lines that matches the query, but also for the context
+        # (surrounding lines). We will use Enumerable#each_cons to efficiently
+        # loop through the lines while keeping surrounding lines on hand.
+        #
+        # First, we turn "foo\nbar\nbaz" into
+        # [
+        #  [nil, -3], [nil, -2], [nil, -1],
+        #  ['foo', 0], ['bar', 1], ['baz', 3],
+        #  [nil, 4], [nil, 5], [nil, 6]
+        # ]
+        lines_with_index = Enumerator.new do |yielder|
+          # Yield fake 'before' lines for the first line of file_contents
+          (-SEARCH_CONTEXT_LINES..-1).each do |i|
+            yielder.yield [nil, i]
+          end
+
+          # Yield the actual file contents
+          count = 0
+          file_contents.each_line do |line|
+            line.chomp!
+            yielder.yield [line, count]
+            count += 1
+          end
+
+          # Yield fake 'after' lines for the last line of file_contents
+          (count + 1..count + SEARCH_CONTEXT_LINES).each do |i|
+            yielder.yield [nil, i]
+          end
+        end
+
+        greps = []
+
+        # Loop through consecutive blocks of lines with indexes
+        lines_with_index.each_cons(2 * SEARCH_CONTEXT_LINES + 1) do |line_block|
+          # Get the 'middle' line and index from the block
+          line, _ = line_block[SEARCH_CONTEXT_LINES]
+
+          next unless line && line.match(/#{Regexp.escape(query)}/i)
+
+          # Yay, 'line' contains a match!
+          # Get an array with just the context lines (no indexes)
+          match_with_context = line_block.map(&:first)
+          # Remove 'nil' lines in case we are close to the first or last line
+          match_with_context.compact!
+
+          # Get the line number (1-indexed) of the first context line
+          first_context_line_number = line_block[0][1] + 1
+
+          greps << Gitlab::Git::BlobSnippet.new(
+            ref,
+            match_with_context,
+            first_context_line_number,
+            filename
+          )
+        end
+
+        greps
+      end
+
+      # Return the Rugged patches for the diff between +from+ and +to+.
+      def diff_patches(from, to, options = {}, *paths)
+        options ||= {}
+        break_rewrites = options[:break_rewrites]
+        actual_options = Gitlab::Git::Diff.filter_diff_options(options.merge(paths: paths))
+
+        diff = rugged.diff(from, to, actual_options)
+        diff.find_similar!(break_rewrites: break_rewrites)
+        diff.each_patch
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/git/tag.rb b/lib/gitlab/git/tag.rb
new file mode 100644
index 00000000000..b5342c3d310
--- /dev/null
+++ b/lib/gitlab/git/tag.rb
@@ -0,0 +1,17 @@
+module Gitlab
+  module Git
+    class Tag < Ref
+      attr_reader :object_sha
+
+      def initialize(repository, name, target, message = nil)
+        super(repository, name, target)
+
+        @message = message
+      end
+
+      def message
+        encode! @message
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/git/tree.rb b/lib/gitlab/git/tree.rb
new file mode 100644
index 00000000000..f7450e8b58f
--- /dev/null
+++ b/lib/gitlab/git/tree.rb
@@ -0,0 +1,104 @@
+module Gitlab
+  module Git
+    class Tree
+      include Gitlab::Git::EncodingHelper
+
+      attr_accessor :id, :root_id, :name, :path, :type,
+        :mode, :commit_id, :submodule_url
+
+      class << self
+        # Get list of tree objects
+        # for repository based on commit sha and path
+        # Uses rugged for raw objects
+        def where(repository, sha, path = nil)
+          path = nil if path == '' || path == '/'
+
+          commit = repository.lookup(sha)
+          root_tree = commit.tree
+
+          tree = if path
+                   id = find_id_by_path(repository, root_tree.oid, path)
+                   if id
+                     repository.lookup(id)
+                   else
+                     []
+                   end
+                 else
+                   root_tree
+                 end
+
+          tree.map do |entry|
+            new(
+              id: entry[:oid],
+              root_id: root_tree.oid,
+              name: entry[:name],
+              type: entry[:type],
+              mode: entry[:filemode],
+              path: path ? File.join(path, entry[:name]) : entry[:name],
+              commit_id: sha,
+            )
+          end
+        end
+
+        # Recursive search of tree id for path
+        #
+        # Ex.
+        #   blog/            # oid: 1a
+        #     app/           # oid: 2a
+        #       models/      # oid: 3a
+        #       views/       # oid: 4a
+        #
+        #
+        # Tree.find_id_by_path(repo, '1a', 'app/models') # => '3a'
+        #
+        def find_id_by_path(repository, root_id, path)
+          root_tree = repository.lookup(root_id)
+          path_arr = path.split('/')
+
+          entry = root_tree.find do |entry|
+            entry[:name] == path_arr[0] && entry[:type] == :tree
+          end
+
+          return nil unless entry
+
+          if path_arr.size > 1
+            path_arr.shift
+            find_id_by_path(repository, entry[:oid], path_arr.join('/'))
+          else
+            entry[:oid]
+          end
+        end
+      end
+
+      def initialize(options)
+        %w(id root_id name path type mode commit_id).each do |key|
+          self.send("#{key}=", options[key.to_sym])
+        end
+      end
+
+      def name
+        encode! @name
+      end
+
+      def dir?
+        type == :tree
+      end
+
+      def file?
+        type == :blob
+      end
+
+      def submodule?
+        type == :commit
+      end
+
+      def readme?
+        name =~ /^readme/i
+      end
+
+      def contributing?
+        name =~ /^contributing/i
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/git/util.rb b/lib/gitlab/git/util.rb
new file mode 100644
index 00000000000..7973da2e8f8
--- /dev/null
+++ b/lib/gitlab/git/util.rb
@@ -0,0 +1,18 @@
+module Gitlab
+  module Git
+    module Util
+      LINE_SEP = "\n".freeze
+
+      def self.count_lines(string)
+        case string[-1]
+        when nil
+          0
+        when LINE_SEP
+          string.count(LINE_SEP)
+        else
+          string.count(LINE_SEP) + 1
+        end
+      end
+    end
+  end
+end
author	Douwe Maan <douwe@gitlab.com>	2017-01-05 12:22:25 +0000
committer	Douwe Maan <douwe@gitlab.com>	2017-01-05 12:22:25 +0000
commit	efb8da895fb8d0976f98d0e54e36d9e61dccef89 (patch)
tree	fd62ea90381aef068bd2c7a6fe141e2a000b28e4 /lib
parent	0a1d1fbd292ee4dac8066973027b084eab437fe7 (diff)
parent	2e20a71d21408d0a98b392209ff78bade26984bb (diff)
download	gitlab-ce-efb8da895fb8d0976f98d0e54e36d9e61dccef89.tar.gz