diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/gitlab/git/attributes.rb | 131 | ||||
-rw-r--r-- | lib/gitlab/git/blame.rb | 77 | ||||
-rw-r--r-- | lib/gitlab/git/blob.rb | 333 | ||||
-rw-r--r-- | lib/gitlab/git/blob_snippet.rb | 32 | ||||
-rw-r--r-- | lib/gitlab/git/branch.rb | 6 | ||||
-rw-r--r-- | lib/gitlab/git/commit.rb | 310 | ||||
-rw-r--r-- | lib/gitlab/git/commit_stats.rb | 26 | ||||
-rw-r--r-- | lib/gitlab/git/compare.rb | 43 | ||||
-rw-r--r-- | lib/gitlab/git/diff.rb | 322 | ||||
-rw-r--r-- | lib/gitlab/git/diff_collection.rb | 129 | ||||
-rw-r--r-- | lib/gitlab/git/encoding_helper.rb | 58 | ||||
-rw-r--r-- | lib/gitlab/git/path_helper.rb | 16 | ||||
-rw-r--r-- | lib/gitlab/git/popen.rb | 26 | ||||
-rw-r--r-- | lib/gitlab/git/ref.rb | 49 | ||||
-rw-r--r-- | lib/gitlab/git/repository.rb | 1253 | ||||
-rw-r--r-- | lib/gitlab/git/tag.rb | 17 | ||||
-rw-r--r-- | lib/gitlab/git/tree.rb | 104 | ||||
-rw-r--r-- | lib/gitlab/git/util.rb | 18 |
18 files changed, 2950 insertions, 0 deletions
diff --git a/lib/gitlab/git/attributes.rb b/lib/gitlab/git/attributes.rb new file mode 100644 index 00000000000..42140ecc993 --- /dev/null +++ b/lib/gitlab/git/attributes.rb @@ -0,0 +1,131 @@ +module Gitlab + module Git + # Class for parsing Git attribute files and extracting the attributes for + # file patterns. + # + # Unlike Rugged this parser only needs a single IO call (a call to `open`), + # vastly reducing the time spent in extracting attributes. + # + # This class _only_ supports parsing the attributes file located at + # `$GIT_DIR/info/attributes` as GitLab doesn't use any other files + # (`.gitattributes` is copied to this particular path). + # + # Basic usage: + # + # attributes = Gitlab::Git::Attributes.new(some_repo.path) + # + # attributes.attributes('README.md') # => { "eol" => "lf } + class Attributes + # path - The path to the Git repository. + def initialize(path) + @path = File.expand_path(path) + @patterns = nil + end + + # Returns all the Git attributes for the given path. + # + # path - A path to a file for which to get the attributes. + # + # Returns a Hash. + def attributes(path) + full_path = File.join(@path, path) + + patterns.each do |pattern, attrs| + return attrs if File.fnmatch?(pattern, full_path) + end + + {} + end + + # Returns a Hash containing the file patterns and their attributes. + def patterns + @patterns ||= parse_file + end + + # Parses an attribute string. + # + # These strings can be in the following formats: + # + # text # => { "text" => true } + # -text # => { "text" => false } + # key=value # => { "key" => "value" } + # + # string - The string to parse. + # + # Returns a Hash containing the attributes and their values. + def parse_attributes(string) + values = {} + dash = '-' + equal = '=' + binary = 'binary' + + string.split(/\s+/).each do |chunk| + # Data such as "foo = bar" should be treated as "foo" and "bar" being + # separate boolean attributes. + next if chunk == equal + + key = chunk + + # Input: "-foo" + if chunk.start_with?(dash) + key = chunk.byteslice(1, chunk.length - 1) + value = false + + # Input: "foo=bar" + elsif chunk.include?(equal) + key, value = chunk.split(equal, 2) + + # Input: "foo" + else + value = true + end + + values[key] = value + + # When the "binary" option is set the "diff" option should be set to + # the inverse. If "diff" is later set it should overwrite the + # automatically set value. + values['diff'] = false if key == binary && value + end + + values + end + + # Iterates over every line in the attributes file. + def each_line + full_path = File.join(@path, 'info/attributes') + + return unless File.exist?(full_path) + + File.open(full_path, 'r') do |handle| + handle.each_line do |line| + break unless line.valid_encoding? + + yield line.strip + end + end + end + + private + + # Parses the Git attributes file. + def parse_file + pairs = [] + comment = '#' + + each_line do |line| + next if line.start_with?(comment) || line.empty? + + pattern, attrs = line.split(/\s+/, 2) + + parsed = attrs ? parse_attributes(attrs) : {} + + pairs << [File.join(@path, pattern), parsed] + end + + # Newer entries take precedence over older entries. + pairs.reverse.to_h + end + end + end +end diff --git a/lib/gitlab/git/blame.rb b/lib/gitlab/git/blame.rb new file mode 100644 index 00000000000..46f3969b6e1 --- /dev/null +++ b/lib/gitlab/git/blame.rb @@ -0,0 +1,77 @@ +require_relative 'encoding_helper' + +module Gitlab + module Git + class Blame + include Gitlab::Git::EncodingHelper + + attr_reader :lines, :blames + + def initialize(repository, sha, path) + @repo = repository + @sha = sha + @path = path + @lines = [] + @blames = load_blame + end + + def each + @blames.each do |blame| + yield( + Gitlab::Git::Commit.new(blame.commit), + blame.line + ) + end + end + + private + + def load_blame + cmd = %W(git --git-dir=#{@repo.path} blame -p #{@sha} -- #{@path}) + # Read in binary mode to ensure ASCII-8BIT + raw_output = IO.popen(cmd, 'rb') {|io| io.read } + output = encode_utf8(raw_output) + process_raw_blame output + end + + def process_raw_blame(output) + lines, final = [], [] + info, commits = {}, {} + + # process the output + output.split("\n").each do |line| + if line[0, 1] == "\t" + lines << line[1, line.size] + elsif m = /^(\w{40}) (\d+) (\d+)/.match(line) + commit_id, old_lineno, lineno = m[1], m[2].to_i, m[3].to_i + commits[commit_id] = nil unless commits.key?(commit_id) + info[lineno] = [commit_id, old_lineno] + end + end + + # load all commits in single call + commits.keys.each do |key| + commits[key] = @repo.lookup(key) + end + + # get it together + info.sort.each do |lineno, (commit_id, old_lineno)| + commit = commits[commit_id] + final << BlameLine.new(lineno, old_lineno, commit, lines[lineno - 1]) + end + + @lines = final + end + end + + class BlameLine + attr_accessor :lineno, :oldlineno, :commit, :line + def initialize(lineno, oldlineno, commit, line) + @lineno = lineno + @oldlineno = oldlineno + @commit = commit + @line = line + end + end + end +end diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb new file mode 100644 index 00000000000..1325124daf4 --- /dev/null +++ b/lib/gitlab/git/blob.rb @@ -0,0 +1,333 @@ +require_relative 'encoding_helper' +require_relative 'path_helper' + +module Gitlab + module Git + class Blob + include Linguist::BlobHelper + include Gitlab::Git::EncodingHelper + + # This number is the maximum amount of data that we want to display to + # the user. We load as much as we can for encoding detection + # (Linguist) and LFS pointer parsing. All other cases where we need full + # blob data should use load_all_data!. + MAX_DATA_DISPLAY_SIZE = 10485760 + + attr_accessor :name, :path, :size, :data, :mode, :id, :commit_id, :loaded_size, :binary + + class << self + def find(repository, sha, path) + commit = repository.lookup(sha) + root_tree = commit.tree + + blob_entry = find_entry_by_path(repository, root_tree.oid, path) + + return nil unless blob_entry + + if blob_entry[:type] == :commit + submodule_blob(blob_entry, path, sha) + else + blob = repository.lookup(blob_entry[:oid]) + + if blob + Blob.new( + id: blob.oid, + name: blob_entry[:name], + size: blob.size, + data: blob.content(MAX_DATA_DISPLAY_SIZE), + mode: blob_entry[:filemode].to_s(8), + path: path, + commit_id: sha, + binary: blob.binary? + ) + end + end + end + + def raw(repository, sha) + blob = repository.lookup(sha) + + Blob.new( + id: blob.oid, + size: blob.size, + data: blob.content(MAX_DATA_DISPLAY_SIZE), + binary: blob.binary? + ) + end + + # Recursive search of blob id by path + # + # Ex. + # blog/ # oid: 1a + # app/ # oid: 2a + # models/ # oid: 3a + # file.rb # oid: 4a + # + # + # Blob.find_entry_by_path(repo, '1a', 'app/file.rb') # => '4a' + # + def find_entry_by_path(repository, root_id, path) + root_tree = repository.lookup(root_id) + # Strip leading slashes + path[/^\/*/] = '' + path_arr = path.split('/') + + entry = root_tree.find do |entry| + entry[:name] == path_arr[0] + end + + return nil unless entry + + if path_arr.size > 1 + return nil unless entry[:type] == :tree + path_arr.shift + find_entry_by_path(repository, entry[:oid], path_arr.join('/')) + else + [:blob, :commit].include?(entry[:type]) ? entry : nil + end + end + + def submodule_blob(blob_entry, path, sha) + Blob.new( + id: blob_entry[:oid], + name: blob_entry[:name], + data: '', + path: path, + commit_id: sha, + ) + end + + # Commit file in repository and return commit sha + # + # options should contain next structure: + # file: { + # content: 'Lorem ipsum...', + # path: 'documents/story.txt', + # update: true + # }, + # author: { + # email: 'user@example.com', + # name: 'Test User', + # time: Time.now + # }, + # committer: { + # email: 'user@example.com', + # name: 'Test User', + # time: Time.now + # }, + # commit: { + # message: 'Wow such commit', + # branch: 'master', + # update_ref: false + # } + # + # rubocop:disable Metrics/AbcSize + # rubocop:disable Metrics/CyclomaticComplexity + # rubocop:disable Metrics/PerceivedComplexity + def commit(repository, options, action = :add) + file = options[:file] + update = file[:update].nil? ? true : file[:update] + author = options[:author] + committer = options[:committer] + commit = options[:commit] + repo = repository.rugged + ref = commit[:branch] + update_ref = commit[:update_ref].nil? ? true : commit[:update_ref] + parents = [] + mode = 0o100644 + + unless ref.start_with?('refs/') + ref = 'refs/heads/' + ref + end + + path_name = PathHelper.normalize_path(file[:path]) + # Abort if any invalid characters remain (e.g. ../foo) + raise Repository::InvalidBlobName.new("Invalid path") if path_name.each_filename.to_a.include?('..') + + filename = path_name.to_s + index = repo.index + + unless repo.empty? + rugged_ref = repo.references[ref] + raise Repository::InvalidRef.new("Invalid branch name") unless rugged_ref + last_commit = rugged_ref.target + index.read_tree(last_commit.tree) + parents = [last_commit] + end + + if action == :remove + index.remove(filename) + else + file_entry = index.get(filename) + + if action == :rename + old_path_name = PathHelper.normalize_path(file[:previous_path]) + old_filename = old_path_name.to_s + file_entry = index.get(old_filename) + index.remove(old_filename) unless file_entry.blank? + end + + if file_entry + raise Repository::InvalidBlobName.new("Filename already exists; update not allowed") unless update + + # Preserve the current file mode if one is available + mode = file_entry[:mode] if file_entry[:mode] + end + + content = file[:content] + detect = CharlockHolmes::EncodingDetector.new.detect(content) if content + + unless detect && detect[:type] == :binary + # When writing to the repo directly as we are doing here, + # the `core.autocrlf` config isn't taken into account. + content.gsub!("\r\n", "\n") if repository.autocrlf + end + + oid = repo.write(content, :blob) + index.add(path: filename, oid: oid, mode: mode) + end + + opts = {} + opts[:tree] = index.write_tree(repo) + opts[:author] = author + opts[:committer] = committer + opts[:message] = commit[:message] + opts[:parents] = parents + opts[:update_ref] = ref if update_ref + + Rugged::Commit.create(repo, opts) + end + # rubocop:enable Metrics/AbcSize + # rubocop:enable Metrics/CyclomaticComplexity + # rubocop:enable Metrics/PerceivedComplexity + + # Remove file from repository and return commit sha + # + # options should contain next structure: + # file: { + # path: 'documents/story.txt' + # }, + # author: { + # email: 'user@example.com', + # name: 'Test User', + # time: Time.now + # }, + # committer: { + # email: 'user@example.com', + # name: 'Test User', + # time: Time.now + # }, + # commit: { + # message: 'Remove FILENAME', + # branch: 'master' + # } + # + def remove(repository, options) + commit(repository, options, :remove) + end + + # Rename file from repository and return commit sha + # + # options should contain next structure: + # file: { + # previous_path: 'documents/old_story.txt' + # path: 'documents/story.txt' + # content: 'Lorem ipsum...', + # update: true + # }, + # author: { + # email: 'user@example.com', + # name: 'Test User', + # time: Time.now + # }, + # committer: { + # email: 'user@example.com', + # name: 'Test User', + # time: Time.now + # }, + # commit: { + # message: 'Rename FILENAME', + # branch: 'master' + # } + # + def rename(repository, options) + commit(repository, options, :rename) + end + end + + def initialize(options) + %w(id name path size data mode commit_id binary).each do |key| + self.send("#{key}=", options[key.to_sym]) + end + + @loaded_all_data = false + # Retain the actual size before it is encoded + @loaded_size = @data.bytesize if @data + end + + def binary? + @binary.nil? ? super : @binary == true + end + + def empty? + !data || data == '' + end + + def data + encode! @data + end + + # Load all blob data (not just the first MAX_DATA_DISPLAY_SIZE bytes) into + # memory as a Ruby string. + def load_all_data!(repository) + return if @data == '' # don't mess with submodule blobs + return @data if @loaded_all_data + + @loaded_all_data = true + @data = repository.lookup(id).content + @loaded_size = @data.bytesize + end + + def name + encode! @name + end + + # Valid LFS object pointer is a text file consisting of + # version + # oid + # size + # see https://github.com/github/git-lfs/blob/v1.1.0/docs/spec.md#the-pointer + def lfs_pointer? + has_lfs_version_key? && lfs_oid.present? && lfs_size.present? + end + + def lfs_oid + if has_lfs_version_key? + oid = data.match(/(?<=sha256:)([0-9a-f]{64})/) + return oid[1] if oid + end + + nil + end + + def lfs_size + if has_lfs_version_key? + size = data.match(/(?<=size )([0-9]+)/) + return size[1] if size + end + + nil + end + + def truncated? + size && (size > loaded_size) + end + + private + + def has_lfs_version_key? + !empty? && text? && data.start_with?("version https://git-lfs.github.com/spec") + end + end + end +end diff --git a/lib/gitlab/git/blob_snippet.rb b/lib/gitlab/git/blob_snippet.rb new file mode 100644 index 00000000000..e98de57fc22 --- /dev/null +++ b/lib/gitlab/git/blob_snippet.rb @@ -0,0 +1,32 @@ +module Gitlab + module Git + class BlobSnippet + include Linguist::BlobHelper + + attr_accessor :ref + attr_accessor :lines + attr_accessor :filename + attr_accessor :startline + + def initialize(ref, lines, startline, filename) + @ref, @lines, @startline, @filename = ref, lines, startline, filename + end + + def data + lines.join("\n") if lines + end + + def name + filename + end + + def size + data.length + end + + def mode + nil + end + end + end +end diff --git a/lib/gitlab/git/branch.rb b/lib/gitlab/git/branch.rb new file mode 100644 index 00000000000..586380da94a --- /dev/null +++ b/lib/gitlab/git/branch.rb @@ -0,0 +1,6 @@ +module Gitlab + module Git + class Branch < Ref + end + end +end diff --git a/lib/gitlab/git/commit.rb b/lib/gitlab/git/commit.rb new file mode 100644 index 00000000000..2077f60a178 --- /dev/null +++ b/lib/gitlab/git/commit.rb @@ -0,0 +1,310 @@ +# Gitlab::Git::Commit is a wrapper around native Rugged::Commit object +module Gitlab + module Git + class Commit + include Gitlab::Git::EncodingHelper + + attr_accessor :raw_commit, :head, :refs + + SERIALIZE_KEYS = [ + :id, :message, :parent_ids, + :authored_date, :author_name, :author_email, + :committed_date, :committer_name, :committer_email + ].freeze + + attr_accessor *SERIALIZE_KEYS # rubocop:disable Lint/AmbiguousOperator + + def ==(other) + return false unless other.is_a?(Gitlab::Git::Commit) + + methods = [:message, :parent_ids, :authored_date, :author_name, + :author_email, :committed_date, :committer_name, + :committer_email] + + methods.all? do |method| + send(method) == other.send(method) + end + end + + class << self + # Get commits collection + # + # Ex. + # Commit.where( + # repo: repo, + # ref: 'master', + # path: 'app/models', + # limit: 10, + # offset: 5, + # ) + # + def where(options) + repo = options.delete(:repo) + raise 'Gitlab::Git::Repository is required' unless repo.respond_to?(:log) + + repo.log(options).map { |c| decorate(c) } + end + + # Get single commit + # + # Ex. + # Commit.find(repo, '29eda46b') + # + # Commit.find(repo, 'master') + # + def find(repo, commit_id = "HEAD") + return decorate(commit_id) if commit_id.is_a?(Rugged::Commit) + + obj = if commit_id.is_a?(String) + repo.rev_parse_target(commit_id) + else + Ref.dereference_object(commit_id) + end + + return nil unless obj.is_a?(Rugged::Commit) + + decorate(obj) + rescue Rugged::ReferenceError, Rugged::InvalidError, Rugged::ObjectError, Gitlab::Git::Repository::NoRepository + nil + end + + # Get last commit for HEAD + # + # Ex. + # Commit.last(repo) + # + def last(repo) + find(repo) + end + + # Get last commit for specified path and ref + # + # Ex. + # Commit.last_for_path(repo, '29eda46b', 'app/models') + # + # Commit.last_for_path(repo, 'master', 'Gemfile') + # + def last_for_path(repo, ref, path = nil) + where( + repo: repo, + ref: ref, + path: path, + limit: 1 + ).first + end + + # Get commits between two revspecs + # See also #repository.commits_between + # + # Ex. + # Commit.between(repo, '29eda46b', 'master') + # + def between(repo, base, head) + repo.commits_between(base, head).map do |commit| + decorate(commit) + end + rescue Rugged::ReferenceError + [] + end + + # Delegate Repository#find_commits + def find_all(repo, options = {}) + repo.find_commits(options) + end + + def decorate(commit, ref = nil) + Gitlab::Git::Commit.new(commit, ref) + end + + # Returns a diff object for the changes introduced by +rugged_commit+. + # If +rugged_commit+ doesn't have a parent, then the diff is between + # this commit and an empty repo. See Repository#diff for the keys + # allowed in the +options+ hash. + def diff_from_parent(rugged_commit, options = {}) + options ||= {} + break_rewrites = options[:break_rewrites] + actual_options = Diff.filter_diff_options(options) + + diff = if rugged_commit.parents.empty? + rugged_commit.diff(actual_options.merge(reverse: true)) + else + rugged_commit.parents[0].diff(rugged_commit, actual_options) + end + + diff.find_similar!(break_rewrites: break_rewrites) + diff + end + end + + def initialize(raw_commit, head = nil) + raise "Nil as raw commit passed" unless raw_commit + + if raw_commit.is_a?(Hash) + init_from_hash(raw_commit) + elsif raw_commit.is_a?(Rugged::Commit) + init_from_rugged(raw_commit) + else + raise "Invalid raw commit type: #{raw_commit.class}" + end + + @head = head + end + + def sha + id + end + + def short_id(length = 10) + id.to_s[0..length] + end + + def safe_message + @safe_message ||= message + end + + def created_at + committed_date + end + + # Was this commit committed by a different person than the original author? + def different_committer? + author_name != committer_name || author_email != committer_email + end + + def parent_id + parent_ids.first + end + + # Shows the diff between the commit's parent and the commit. + # + # Cuts out the header and stats from #to_patch and returns only the diff. + def to_diff(options = {}) + diff_from_parent(options).patch + end + + # Returns a diff object for the changes from this commit's first parent. + # If there is no parent, then the diff is between this commit and an + # empty repo. See Repository#diff for keys allowed in the +options+ + # hash. + def diff_from_parent(options = {}) + Commit.diff_from_parent(raw_commit, options) + end + + def has_zero_stats? + stats.total.zero? + rescue + true + end + + def no_commit_message + "--no commit message" + end + + def to_hash + serialize_keys.map.with_object({}) do |key, hash| + hash[key] = send(key) + end + end + + def date + committed_date + end + + def diffs(options = {}) + DiffCollection.new(diff_from_parent(options), options) + end + + def parents + raw_commit.parents.map { |c| Gitlab::Git::Commit.new(c) } + end + + def tree + raw_commit.tree + end + + def stats + Gitlab::Git::CommitStats.new(self) + end + + def to_patch(options = {}) + begin + raw_commit.to_mbox(options) + rescue Rugged::InvalidError => ex + if ex.message =~ /Commit \w+ is a merge commit/ + 'Patch format is not currently supported for merge commits.' + end + end + end + + # Get a collection of Rugged::Reference objects for this commit. + # + # Ex. + # commit.ref(repo) + # + def refs(repo) + repo.refs_hash[id] + end + + # Get ref names collection + # + # Ex. + # commit.ref_names(repo) + # + def ref_names(repo) + refs(repo).map do |ref| + ref.name.sub(%r{^refs/(heads|remotes|tags)/}, "") + end + end + + def message + encode! @message + end + + def author_name + encode! @author_name + end + + def author_email + encode! @author_email + end + + def committer_name + encode! @committer_name + end + + def committer_email + encode! @committer_email + end + + private + + def init_from_hash(hash) + raw_commit = hash.symbolize_keys + + serialize_keys.each do |key| + send("#{key}=", raw_commit[key]) + end + end + + def init_from_rugged(commit) + author = commit.author + committer = commit.committer + + @raw_commit = commit + @id = commit.oid + @message = commit.message + @authored_date = author[:time] + @committed_date = committer[:time] + @author_name = author[:name] + @author_email = author[:email] + @committer_name = committer[:name] + @committer_email = committer[:email] + @parent_ids = commit.parents.map(&:oid) + end + + def serialize_keys + SERIALIZE_KEYS + end + end + end +end diff --git a/lib/gitlab/git/commit_stats.rb b/lib/gitlab/git/commit_stats.rb new file mode 100644 index 00000000000..e9118bbed0e --- /dev/null +++ b/lib/gitlab/git/commit_stats.rb @@ -0,0 +1,26 @@ +# Gitlab::Git::CommitStats counts the additions, deletions, and total changes +# in a commit. +module Gitlab + module Git + class CommitStats + attr_reader :id, :additions, :deletions, :total + + # Instantiate a CommitStats object + def initialize(commit) + @id = commit.id + @additions = 0 + @deletions = 0 + @total = 0 + + diff = commit.diff_from_parent + + diff.each_patch do |p| + # TODO: Use the new Rugged convenience methods when they're released + @additions += p.stat[0] + @deletions += p.stat[1] + @total += p.changes + end + end + end + end +end diff --git a/lib/gitlab/git/compare.rb b/lib/gitlab/git/compare.rb new file mode 100644 index 00000000000..696a2acd5e3 --- /dev/null +++ b/lib/gitlab/git/compare.rb @@ -0,0 +1,43 @@ +module Gitlab + module Git + class Compare + attr_reader :head, :base, :straight + + def initialize(repository, base, head, straight = false) + @repository = repository + @straight = straight + + unless base && head + @commits = [] + return + end + + @base = Gitlab::Git::Commit.find(repository, base.try(:strip)) + @head = Gitlab::Git::Commit.find(repository, head.try(:strip)) + + @commits = [] unless @base && @head + @commits = [] if same + end + + def same + @base && @head && @base.id == @head.id + end + + def commits + return @commits if defined?(@commits) + + @commits = Gitlab::Git::Commit.between(@repository, @base.id, @head.id) + end + + def diffs(options = {}) + unless @head && @base + return Gitlab::Git::DiffCollection.new([]) + end + + paths = options.delete(:paths) || [] + options[:straight] = @straight + Gitlab::Git::Diff.between(@repository, @head.id, @base.id, options, *paths) + end + end + end +end diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb new file mode 100644 index 00000000000..d6b3b5705a9 --- /dev/null +++ b/lib/gitlab/git/diff.rb @@ -0,0 +1,322 @@ +# Gitlab::Git::Diff is a wrapper around native Rugged::Diff object +module Gitlab + module Git + class Diff + class TimeoutError < StandardError; end + include Gitlab::Git::EncodingHelper + + # Diff properties + attr_accessor :old_path, :new_path, :a_mode, :b_mode, :diff + + # Stats properties + attr_accessor :new_file, :renamed_file, :deleted_file + + attr_accessor :too_large + + # The maximum size of a diff to display. + DIFF_SIZE_LIMIT = 102400 # 100 KB + + # The maximum size before a diff is collapsed. + DIFF_COLLAPSE_LIMIT = 10240 # 10 KB + + class << self + def between(repo, head, base, options = {}, *paths) + straight = options.delete(:straight) || false + + common_commit = if straight + base + else + # Only show what is new in the source branch + # compared to the target branch, not the other way + # around. The linex below with merge_base is + # equivalent to diff with three dots (git diff + # branch1...branch2) From the git documentation: + # "git diff A...B" is equivalent to "git diff + # $(git-merge-base A B) B" + repo.merge_base_commit(head, base) + end + + options ||= {} + actual_options = filter_diff_options(options) + repo.diff(common_commit, head, actual_options, *paths) + end + + # Return a copy of the +options+ hash containing only keys that can be + # passed to Rugged. Allowed options are: + # + # :max_size :: + # An integer specifying the maximum byte size of a file before a it + # will be treated as binary. The default value is 512MB. + # + # :context_lines :: + # The number of unchanged lines that define the boundary of a hunk + # (and to display before and after the actual changes). The default is + # 3. + # + # :interhunk_lines :: + # The maximum number of unchanged lines between hunk boundaries before + # the hunks will be merged into a one. The default is 0. + # + # :old_prefix :: + # The virtual "directory" to prefix to old filenames in hunk headers. + # The default is "a". + # + # :new_prefix :: + # The virtual "directory" to prefix to new filenames in hunk headers. + # The default is "b". + # + # :reverse :: + # If true, the sides of the diff will be reversed. + # + # :force_text :: + # If true, all files will be treated as text, disabling binary + # attributes & detection. + # + # :ignore_whitespace :: + # If true, all whitespace will be ignored. + # + # :ignore_whitespace_change :: + # If true, changes in amount of whitespace will be ignored. + # + # :ignore_whitespace_eol :: + # If true, whitespace at end of line will be ignored. + # + # :ignore_submodules :: + # if true, submodules will be excluded from the diff completely. + # + # :patience :: + # If true, the "patience diff" algorithm will be used (currenlty + # unimplemented). + # + # :include_ignored :: + # If true, ignored files will be included in the diff. + # + # :include_untracked :: + # If true, untracked files will be included in the diff. + # + # :include_unmodified :: + # If true, unmodified files will be included in the diff. + # + # :recurse_untracked_dirs :: + # Even if +:include_untracked+ is true, untracked directories will + # only be marked with a single entry in the diff. If this flag is set + # to true, all files under ignored directories will be included in the + # diff, too. + # + # :disable_pathspec_match :: + # If true, the given +*paths+ will be applied as exact matches, + # instead of as fnmatch patterns. + # + # :deltas_are_icase :: + # If true, filename comparisons will be made with case-insensitivity. + # + # :include_untracked_content :: + # if true, untracked content will be contained in the the diff patch + # text. + # + # :skip_binary_check :: + # If true, diff deltas will be generated without spending time on + # binary detection. This is useful to improve performance in cases + # where the actual file content difference is not needed. + # + # :include_typechange :: + # If true, type changes for files will not be interpreted as deletion + # of the "old file" and addition of the "new file", but will generate + # typechange records. + # + # :include_typechange_trees :: + # Even if +:include_typechange+ is true, blob -> tree changes will + # still usually be handled as a deletion of the blob. If this flag is + # set to true, blob -> tree changes will be marked as typechanges. + # + # :ignore_filemode :: + # If true, file mode changes will be ignored. + # + # :recurse_ignored_dirs :: + # Even if +:include_ignored+ is true, ignored directories will only be + # marked with a single entry in the diff. If this flag is set to true, + # all files under ignored directories will be included in the diff, + # too. + def filter_diff_options(options, default_options = {}) + allowed_options = [:max_size, :context_lines, :interhunk_lines, + :old_prefix, :new_prefix, :reverse, :force_text, + :ignore_whitespace, :ignore_whitespace_change, + :ignore_whitespace_eol, :ignore_submodules, + :patience, :include_ignored, :include_untracked, + :include_unmodified, :recurse_untracked_dirs, + :disable_pathspec_match, :deltas_are_icase, + :include_untracked_content, :skip_binary_check, + :include_typechange, :include_typechange_trees, + :ignore_filemode, :recurse_ignored_dirs, :paths, + :max_files, :max_lines, :all_diffs, :no_collapse] + + if default_options + actual_defaults = default_options.dup + actual_defaults.keep_if do |key| + allowed_options.include?(key) + end + else + actual_defaults = {} + end + + if options + filtered_opts = options.dup + filtered_opts.keep_if do |key| + allowed_options.include?(key) + end + filtered_opts = actual_defaults.merge(filtered_opts) + else + filtered_opts = actual_defaults + end + + filtered_opts + end + end + + def initialize(raw_diff, collapse: false) + case raw_diff + when Hash + init_from_hash(raw_diff, collapse: collapse) + when Rugged::Patch, Rugged::Diff::Delta + init_from_rugged(raw_diff, collapse: collapse) + when nil + raise "Nil as raw diff passed" + else + raise "Invalid raw diff type: #{raw_diff.class}" + end + end + + def serialize_keys + @serialize_keys ||= %i(diff new_path old_path a_mode b_mode new_file renamed_file deleted_file too_large) + end + + def to_hash + hash = {} + + keys = serialize_keys + + keys.each do |key| + hash[key] = send(key) + end + + hash + end + + def submodule? + a_mode == '160000' || b_mode == '160000' + end + + def line_count + @line_count ||= Util.count_lines(@diff) + end + + def too_large? + if @too_large.nil? + @too_large = @diff.bytesize >= DIFF_SIZE_LIMIT + else + @too_large + end + end + + def collapsible? + @diff.bytesize >= DIFF_COLLAPSE_LIMIT + end + + def prune_large_diff! + @diff = '' + @line_count = 0 + @too_large = true + end + + def collapsed? + return @collapsed if defined?(@collapsed) + false + end + + def prune_collapsed_diff! + @diff = '' + @line_count = 0 + @collapsed = true + end + + private + + def init_from_rugged(rugged, collapse: false) + if rugged.is_a?(Rugged::Patch) + init_from_rugged_patch(rugged, collapse: collapse) + d = rugged.delta + else + d = rugged + end + + @new_path = encode!(d.new_file[:path]) + @old_path = encode!(d.old_file[:path]) + @a_mode = d.old_file[:mode].to_s(8) + @b_mode = d.new_file[:mode].to_s(8) + @new_file = d.added? + @renamed_file = d.renamed? + @deleted_file = d.deleted? + end + + def init_from_rugged_patch(patch, collapse: false) + # Don't bother initializing diffs that are too large. If a diff is + # binary we're not going to display anything so we skip the size check. + return if !patch.delta.binary? && prune_large_patch(patch, collapse) + + @diff = encode!(strip_diff_headers(patch.to_s)) + end + + def init_from_hash(hash, collapse: false) + raw_diff = hash.symbolize_keys + + serialize_keys.each do |key| + send(:"#{key}=", raw_diff[key.to_sym]) + end + + prune_large_diff! if too_large? + prune_collapsed_diff! if collapse && collapsible? + end + + # If the patch surpasses any of the diff limits it calls the appropiate + # prune method and returns true. Otherwise returns false. + def prune_large_patch(patch, collapse) + size = 0 + + patch.each_hunk do |hunk| + hunk.each_line do |line| + size += line.content.bytesize + + if size >= DIFF_SIZE_LIMIT + prune_large_diff! + return true + end + end + end + + if collapse && size >= DIFF_COLLAPSE_LIMIT + prune_collapsed_diff! + return true + end + + false + end + + # Strip out the information at the beginning of the patch's text to match + # Grit's output + def strip_diff_headers(diff_text) + # Delete everything up to the first line that starts with '---' or + # 'Binary' + diff_text.sub!(/\A.*?^(---|Binary)/m, '\1') + + if diff_text.start_with?('---', 'Binary') + diff_text + else + # If the diff_text did not contain a line starting with '---' or + # 'Binary', return the empty string. No idea why; we are just + # preserving behavior from before the refactor. + '' + end + end + end + end +end diff --git a/lib/gitlab/git/diff_collection.rb b/lib/gitlab/git/diff_collection.rb new file mode 100644 index 00000000000..65e06f5065d --- /dev/null +++ b/lib/gitlab/git/diff_collection.rb @@ -0,0 +1,129 @@ +module Gitlab + module Git + class DiffCollection + include Enumerable + + DEFAULT_LIMITS = { max_files: 100, max_lines: 5000 }.freeze + + def initialize(iterator, options = {}) + @iterator = iterator + @max_files = options.fetch(:max_files, DEFAULT_LIMITS[:max_files]) + @max_lines = options.fetch(:max_lines, DEFAULT_LIMITS[:max_lines]) + @max_bytes = @max_files * 5120 # Average 5 KB per file + @safe_max_files = [@max_files, DEFAULT_LIMITS[:max_files]].min + @safe_max_lines = [@max_lines, DEFAULT_LIMITS[:max_lines]].min + @safe_max_bytes = @safe_max_files * 5120 # Average 5 KB per file + @all_diffs = !!options.fetch(:all_diffs, false) + @no_collapse = !!options.fetch(:no_collapse, true) + @deltas_only = !!options.fetch(:deltas_only, false) + + @line_count = 0 + @byte_count = 0 + @overflow = false + @array = Array.new + end + + def each(&block) + if @populated + # @iterator.each is slower than just iterating the array in place + @array.each(&block) + elsif @deltas_only + each_delta(&block) + else + each_patch(&block) + end + end + + def empty? + !@iterator.any? + end + + def overflow? + populate! + !!@overflow + end + + def size + @size ||= count # forces a loop using each method + end + + def real_size + populate! + + if @overflow + "#{size}+" + else + size.to_s + end + end + + def decorate! + collection = each_with_index do |element, i| + @array[i] = yield(element) + end + @populated = true + collection + end + + private + + def populate! + return if @populated + + each { nil } # force a loop through all diffs + @populated = true + nil + end + + def over_safe_limits?(files) + files >= @safe_max_files || @line_count > @safe_max_lines || @byte_count >= @safe_max_bytes + end + + def each_delta + @iterator.each_delta.with_index do |delta, i| + diff = Gitlab::Git::Diff.new(delta) + + yield @array[i] = diff + end + end + + def each_patch + @iterator.each_with_index do |raw, i| + # First yield cached Diff instances from @array + if @array[i] + yield @array[i] + next + end + + # We have exhausted @array, time to create new Diff instances or stop. + break if @overflow + + if !@all_diffs && i >= @max_files + @overflow = true + break + end + + collapse = !@all_diffs && !@no_collapse + + diff = Gitlab::Git::Diff.new(raw, collapse: collapse) + + if collapse && over_safe_limits?(i) + diff.prune_collapsed_diff! + end + + @line_count += diff.line_count + @byte_count += diff.diff.bytesize + + if !@all_diffs && (@line_count >= @max_lines || @byte_count >= @max_bytes) + # This last Diff instance pushes us over the lines limit. We stop and + # discard it. + @overflow = true + break + end + + yield @array[i] = diff + end + end + end + end +end diff --git a/lib/gitlab/git/encoding_helper.rb b/lib/gitlab/git/encoding_helper.rb new file mode 100644 index 00000000000..e57d228e688 --- /dev/null +++ b/lib/gitlab/git/encoding_helper.rb @@ -0,0 +1,58 @@ +module Gitlab + module Git + module EncodingHelper + extend self + + # This threshold is carefully tweaked to prevent usage of encodings detected + # by CharlockHolmes with low confidence. If CharlockHolmes confidence is low, + # we're better off sticking with utf8 encoding. + # Reason: git diff can return strings with invalid utf8 byte sequences if it + # truncates a diff in the middle of a multibyte character. In this case + # CharlockHolmes will try to guess the encoding and will likely suggest an + # obscure encoding with low confidence. + # There is a lot more info with this merge request: + # https://gitlab.com/gitlab-org/gitlab_git/merge_requests/77#note_4754193 + ENCODING_CONFIDENCE_THRESHOLD = 40 + + def encode!(message) + return nil unless message.respond_to? :force_encoding + + # if message is utf-8 encoding, just return it + message.force_encoding("UTF-8") + return message if message.valid_encoding? + + # return message if message type is binary + detect = CharlockHolmes::EncodingDetector.detect(message) + return message.force_encoding("BINARY") if detect && detect[:type] == :binary + + # force detected encoding if we have sufficient confidence. + if detect && detect[:encoding] && detect[:confidence] > ENCODING_CONFIDENCE_THRESHOLD + message.force_encoding(detect[:encoding]) + end + + # encode and clean the bad chars + message.replace clean(message) + rescue + encoding = detect ? detect[:encoding] : "unknown" + "--broken encoding: #{encoding}" + end + + def encode_utf8(message) + detect = CharlockHolmes::EncodingDetector.detect(message) + if detect + CharlockHolmes::Converter.convert(message, detect[:encoding], 'UTF-8') + else + clean(message) + end + end + + private + + def clean(message) + message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "") + .encode("UTF-8") + .gsub("\0".encode("UTF-8"), "") + end + end + end +end diff --git a/lib/gitlab/git/path_helper.rb b/lib/gitlab/git/path_helper.rb new file mode 100644 index 00000000000..0148cd8df05 --- /dev/null +++ b/lib/gitlab/git/path_helper.rb @@ -0,0 +1,16 @@ +module Gitlab + module Git + class PathHelper + class << self + def normalize_path(filename) + # Strip all leading slashes so that //foo -> foo + filename[/^\/*/] = '' + + # Expand relative paths (e.g. foo/../bar) + filename = Pathname.new(filename) + filename.relative_path_from(Pathname.new('')) + end + end + end + end +end diff --git a/lib/gitlab/git/popen.rb b/lib/gitlab/git/popen.rb new file mode 100644 index 00000000000..df9ca3ee5ac --- /dev/null +++ b/lib/gitlab/git/popen.rb @@ -0,0 +1,26 @@ +require 'open3' + +module Gitlab + module Git + module Popen + def popen(cmd, path) + unless cmd.is_a?(Array) + raise "System commands must be given as an array of strings" + end + + vars = { "PWD" => path } + options = { chdir: path } + + @cmd_output = "" + @cmd_status = 0 + Open3.popen3(vars, *cmd, options) do |stdin, stdout, stderr, wait_thr| + @cmd_output << stdout.read + @cmd_output << stderr.read + @cmd_status = wait_thr.value.exitstatus + end + + [@cmd_output, @cmd_status] + end + end + end +end diff --git a/lib/gitlab/git/ref.rb b/lib/gitlab/git/ref.rb new file mode 100644 index 00000000000..ee559866e04 --- /dev/null +++ b/lib/gitlab/git/ref.rb @@ -0,0 +1,49 @@ +module Gitlab + module Git + class Ref + include Gitlab::Git::EncodingHelper + + # Branch or tag name + # without "refs/tags|heads" prefix + attr_reader :name + + # Target sha. + # Usually it is commit sha but in case + # when tag reference on other tag it can be tag sha + attr_reader :target + + # Dereferenced target + # Commit object to which the Ref points to + attr_reader :dereferenced_target + + # Extract branch name from full ref path + # + # Ex. + # Ref.extract_branch_name('refs/heads/master') #=> 'master' + def self.extract_branch_name(str) + str.gsub(/\Arefs\/heads\//, '') + end + + def self.dereference_object(object) + object = object.target while object.is_a?(Rugged::Tag::Annotation) + + object + end + + def initialize(repository, name, target) + encode! name + @name = name.gsub(/\Arefs\/(tags|heads)\//, '') + @dereferenced_target = Commit.find(repository, target) + @target = if target.respond_to?(:oid) + target.oid + elsif target.respond_to?(:name) + target.name + elsif target.is_a? String + target + else + nil + end + end + end + end +end diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb new file mode 100644 index 00000000000..0c75e5da356 --- /dev/null +++ b/lib/gitlab/git/repository.rb @@ -0,0 +1,1253 @@ +# Gitlab::Git::Repository is a wrapper around native Rugged::Repository object +require_relative 'encoding_helper' +require_relative 'path_helper' +require 'forwardable' +require 'tempfile' +require 'forwardable' +require "rubygems/package" + +module Gitlab + module Git + class Repository + extend Forwardable + include Gitlab::Git::Popen + + SEARCH_CONTEXT_LINES = 3 + + class NoRepository < StandardError; end + class InvalidBlobName < StandardError; end + class InvalidRef < StandardError; end + + # Full path to repo + attr_reader :path + + # Directory name of repo + attr_reader :name + + # Rugged repo object + attr_reader :rugged + + # 'path' must be the path to a _bare_ git repository, e.g. + # /path/to/my-repo.git + def initialize(path) + @path = path + @name = path.split("/").last + @attributes = Attributes.new(path) + end + + # Default branch in the repository + def root_ref + @root_ref ||= discover_default_branch + end + + # Alias to old method for compatibility + def raw + rugged + end + + def rugged + @rugged ||= Rugged::Repository.new(path) + rescue Rugged::RepositoryError, Rugged::OSError + raise NoRepository.new('no repository for such path') + end + + # Returns an Array of branch names + # sorted by name ASC + def branch_names + branches.map(&:name) + end + + # Returns an Array of Branches + def branches + rugged.branches.map do |rugged_ref| + begin + Branch.new(self, rugged_ref.name, rugged_ref.target) + rescue Rugged::ReferenceError + # Omit invalid branch + end + end.compact.sort_by(&:name) + end + + def reload_rugged + @rugged = nil + end + + # Directly find a branch with a simple name (e.g. master) + # + # force_reload causes a new Rugged repository to be instantiated + # + # This is to work around a bug in libgit2 that causes in-memory refs to + # be stale/invalid when packed-refs is changed. + # See https://gitlab.com/gitlab-org/gitlab-ce/issues/15392#note_14538333 + def find_branch(name, force_reload = false) + reload_rugged if force_reload + + rugged_ref = rugged.branches[name] + Branch.new(self, rugged_ref.name, rugged_ref.target) if rugged_ref + end + + def local_branches + rugged.branches.each(:local).map do |branch| + Branch.new(self, branch.name, branch.target) + end + end + + # Returns the number of valid branches + def branch_count + rugged.branches.count do |ref| + begin + ref.name && ref.target # ensures the branch is valid + + true + rescue Rugged::ReferenceError + false + end + end + end + + # Returns an Array of tag names + def tag_names + rugged.tags.map { |t| t.name } + end + + # Returns an Array of Tags + def tags + rugged.references.each("refs/tags/*").map do |ref| + message = nil + + if ref.target.is_a?(Rugged::Tag::Annotation) + tag_message = ref.target.message + + if tag_message.respond_to?(:chomp) + message = tag_message.chomp + end + end + + Tag.new(self, ref.name, ref.target, message) + end.sort_by(&:name) + end + + # Returns true if the given tag exists + # + # name - The name of the tag as a String. + def tag_exists?(name) + !!rugged.tags[name] + end + + # Returns true if the given branch exists + # + # name - The name of the branch as a String. + def branch_exists?(name) + rugged.branches.exists?(name) + + # If the branch name is invalid (e.g. ".foo") Rugged will raise an error. + # Whatever code calls this method shouldn't have to deal with that so + # instead we just return `false` (which is true since a branch doesn't + # exist when it has an invalid name). + rescue Rugged::ReferenceError + false + end + + # Returns an Array of branch and tag names + def ref_names + branch_names + tag_names + end + + # Deprecated. Will be removed in 5.2 + def heads + rugged.references.each("refs/heads/*").map do |head| + Gitlab::Git::Ref.new(self, head.name, head.target) + end.sort_by(&:name) + end + + def has_commits? + !empty? + end + + def empty? + rugged.empty? + end + + def bare? + rugged.bare? + end + + def repo_exists? + !!rugged + end + + # Discovers the default branch based on the repository's available branches + # + # - If no branches are present, returns nil + # - If one branch is present, returns its name + # - If two or more branches are present, returns current HEAD or master or first branch + def discover_default_branch + names = branch_names + + return if names.empty? + + return names[0] if names.length == 1 + + if rugged_head + extracted_name = Ref.extract_branch_name(rugged_head.name) + + return extracted_name if names.include?(extracted_name) + end + + if names.include?('master') + 'master' + else + names[0] + end + end + + def rugged_head + rugged.head + rescue Rugged::ReferenceError + nil + end + + def archive_metadata(ref, storage_path, format = "tar.gz") + ref ||= root_ref + commit = Gitlab::Git::Commit.find(self, ref) + return {} if commit.nil? + + project_name = self.name.chomp('.git') + prefix = "#{project_name}-#{ref}-#{commit.id}" + + { + 'RepoPath' => path, + 'ArchivePrefix' => prefix, + 'ArchivePath' => archive_file_path(prefix, storage_path, format), + 'CommitId' => commit.id, + } + end + + def archive_file_path(name, storage_path, format = "tar.gz") + # Build file path + return nil unless name + + extension = + case format + when "tar.bz2", "tbz", "tbz2", "tb2", "bz2" + "tar.bz2" + when "tar" + "tar" + when "zip" + "zip" + else + # everything else should fall back to tar.gz + "tar.gz" + end + + file_name = "#{name}.#{extension}" + File.join(storage_path, self.name, file_name) + end + + # Return repo size in megabytes + def size + size = popen(%w(du -sk), path).first.strip.to_i + (size.to_f / 1024).round(2) + end + + # Returns an array of BlobSnippets for files at the specified +ref+ that + # contain the +query+ string. + def search_files(query, ref = nil) + greps = [] + ref ||= root_ref + + populated_index(ref).each do |entry| + # Discard submodules + next if submodule?(entry) + + blob = Blob.raw(self, entry[:oid]) + + # Skip binary files + next if blob.data.encoding == Encoding::ASCII_8BIT + + blob.load_all_data!(self) + greps += build_greps(blob.data, query, ref, entry[:path]) + end + + greps + end + + # Use the Rugged Walker API to build an array of commits. + # + # Usage. + # repo.log( + # ref: 'master', + # path: 'app/models', + # limit: 10, + # offset: 5, + # after: Time.new(2016, 4, 21, 14, 32, 10) + # ) + # + def log(options) + default_options = { + limit: 10, + offset: 0, + path: nil, + follow: false, + skip_merges: false, + disable_walk: false, + after: nil, + before: nil + } + + options = default_options.merge(options) + options[:limit] ||= 0 + options[:offset] ||= 0 + actual_ref = options[:ref] || root_ref + begin + sha = sha_from_ref(actual_ref) + rescue Rugged::OdbError, Rugged::InvalidError, Rugged::ReferenceError + # Return an empty array if the ref wasn't found + return [] + end + + if log_using_shell?(options) + log_by_shell(sha, options) + else + log_by_walk(sha, options) + end + end + + def log_using_shell?(options) + options[:path].present? || + options[:disable_walk] || + options[:skip_merges] || + options[:after] || + options[:before] + end + + def log_by_walk(sha, options) + walk_options = { + show: sha, + sort: Rugged::SORT_DATE, + limit: options[:limit], + offset: options[:offset] + } + Rugged::Walker.walk(rugged, walk_options).to_a + end + + def log_by_shell(sha, options) + cmd = %W(git --git-dir=#{path} log) + cmd += %W(-n #{options[:limit].to_i}) + cmd += %w(--format=%H) + cmd += %W(--skip=#{options[:offset].to_i}) + cmd += %w(--follow) if options[:follow] + cmd += %w(--no-merges) if options[:skip_merges] + cmd += %W(--after=#{options[:after].iso8601}) if options[:after] + cmd += %W(--before=#{options[:before].iso8601}) if options[:before] + cmd += [sha] + cmd += %W(-- #{options[:path]}) if options[:path].present? + + raw_output = IO.popen(cmd) {|io| io.read } + + log = raw_output.lines.map do |c| + Rugged::Commit.new(rugged, c.strip) + end + + log.is_a?(Array) ? log : [] + end + + def sha_from_ref(ref) + rev_parse_target(ref).oid + end + + # Return the object that +revspec+ points to. If +revspec+ is an + # annotated tag, then return the tag's target instead. + def rev_parse_target(revspec) + obj = rugged.rev_parse(revspec) + Ref.dereference_object(obj) + end + + # Return a collection of Rugged::Commits between the two revspec arguments. + # See http://git-scm.com/docs/git-rev-parse.html#_specifying_revisions for + # a detailed list of valid arguments. + def commits_between(from, to) + walker = Rugged::Walker.new(rugged) + walker.sorting(Rugged::SORT_DATE | Rugged::SORT_REVERSE) + + sha_from = sha_from_ref(from) + sha_to = sha_from_ref(to) + + walker.push(sha_to) + walker.hide(sha_from) + + commits = walker.to_a + walker.reset + + commits + end + + # Counts the amount of commits between `from` and `to`. + def count_commits_between(from, to) + commits_between(from, to).size + end + + # Returns the SHA of the most recent common ancestor of +from+ and +to+ + def merge_base_commit(from, to) + rugged.merge_base(from, to) + end + + # Return an array of Diff objects that represent the diff + # between +from+ and +to+. See Diff::filter_diff_options for the allowed + # diff options. The +options+ hash can also include :break_rewrites to + # split larger rewrites into delete/add pairs. + def diff(from, to, options = {}, *paths) + DiffCollection.new(diff_patches(from, to, options, *paths), options) + end + + # Returns commits collection + # + # Ex. + # repo.find_commits( + # ref: 'master', + # max_count: 10, + # skip: 5, + # order: :date + # ) + # + # +options+ is a Hash of optional arguments to git + # :ref is the ref from which to begin (SHA1 or name) + # :contains is the commit contained by the refs from which to begin (SHA1 or name) + # :max_count is the maximum number of commits to fetch + # :skip is the number of commits to skip + # :order is the commits order and allowed value is :date(default) or :topo + # + def find_commits(options = {}) + actual_options = options.dup + + allowed_options = [:ref, :max_count, :skip, :contains, :order] + + actual_options.keep_if do |key| + allowed_options.include?(key) + end + + default_options = { skip: 0 } + actual_options = default_options.merge(actual_options) + + walker = Rugged::Walker.new(rugged) + + if actual_options[:ref] + walker.push(rugged.rev_parse_oid(actual_options[:ref])) + elsif actual_options[:contains] + branches_contains(actual_options[:contains]).each do |branch| + walker.push(branch.target_id) + end + else + rugged.references.each("refs/heads/*") do |ref| + walker.push(ref.target_id) + end + end + + if actual_options[:order] == :topo + walker.sorting(Rugged::SORT_TOPO) + else + walker.sorting(Rugged::SORT_DATE) + end + + commits = [] + offset = actual_options[:skip] + limit = actual_options[:max_count] + walker.each(offset: offset, limit: limit) do |commit| + gitlab_commit = Gitlab::Git::Commit.decorate(commit) + commits.push(gitlab_commit) + end + + walker.reset + + commits + rescue Rugged::OdbError + [] + end + + # Returns branch names collection that contains the special commit(SHA1 + # or name) + # + # Ex. + # repo.branch_names_contains('master') + # + def branch_names_contains(commit) + branches_contains(commit).map { |c| c.name } + end + + # Returns branch collection that contains the special commit(SHA1 or name) + # + # Ex. + # repo.branch_names_contains('master') + # + def branches_contains(commit) + commit_obj = rugged.rev_parse(commit) + parent = commit_obj.parents.first unless commit_obj.parents.empty? + + walker = Rugged::Walker.new(rugged) + + rugged.branches.select do |branch| + walker.push(branch.target_id) + walker.hide(parent) if parent + result = walker.any? { |c| c.oid == commit_obj.oid } + walker.reset + + result + end + end + + # Get refs hash which key is SHA1 + # and value is a Rugged::Reference + def refs_hash + # Initialize only when first call + if @refs_hash.nil? + @refs_hash = Hash.new { |h, k| h[k] = [] } + + rugged.references.each do |r| + # Symbolic/remote references may not have an OID; skip over them + target_oid = r.target.try(:oid) + if target_oid + sha = rev_parse_target(target_oid).oid + @refs_hash[sha] << r + end + end + end + @refs_hash + end + + # Lookup for rugged object by oid or ref name + def lookup(oid_or_ref_name) + rugged.rev_parse(oid_or_ref_name) + end + + # Return hash with submodules info for this repository + # + # Ex. + # { + # "rack" => { + # "id" => "c67be4624545b4263184c4a0e8f887efd0a66320", + # "path" => "rack", + # "url" => "git://github.com/chneukirchen/rack.git" + # }, + # "encoding" => { + # "id" => .... + # } + # } + # + def submodules(ref) + commit = rev_parse_target(ref) + return {} unless commit + + begin + content = blob_content(commit, ".gitmodules") + rescue InvalidBlobName + return {} + end + + parse_gitmodules(commit, content) + end + + # Return total commits count accessible from passed ref + def commit_count(ref) + walker = Rugged::Walker.new(rugged) + walker.sorting(Rugged::SORT_TOPO | Rugged::SORT_REVERSE) + oid = rugged.rev_parse_oid(ref) + walker.push(oid) + walker.count + end + + # Sets HEAD to the commit specified by +ref+; +ref+ can be a branch or + # tag name or a commit SHA. Valid +reset_type+ values are: + # + # [:soft] + # the head will be moved to the commit. + # [:mixed] + # will trigger a +:soft+ reset, plus the index will be replaced + # with the content of the commit tree. + # [:hard] + # will trigger a +:mixed+ reset and the working directory will be + # replaced with the content of the index. (Untracked and ignored files + # will be left alone) + def reset(ref, reset_type) + rugged.reset(ref, reset_type) + end + + # Mimic the `git clean` command and recursively delete untracked files. + # Valid keys that can be passed in the +options+ hash are: + # + # :d - Remove untracked directories + # :f - Remove untracked directories that are managed by a different + # repository + # :x - Remove ignored files + # + # The value in +options+ must evaluate to true for an option to take + # effect. + # + # Examples: + # + # repo.clean(d: true, f: true) # Enable the -d and -f options + # + # repo.clean(d: false, x: true) # -x is enabled, -d is not + def clean(options = {}) + strategies = [:remove_untracked] + strategies.push(:force) if options[:f] + strategies.push(:remove_ignored) if options[:x] + + # TODO: implement this method + end + + # Check out the specified ref. Valid options are: + # + # :b - Create a new branch at +start_point+ and set HEAD to the new + # branch. + # + # * These options are passed to the Rugged::Repository#checkout method: + # + # :progress :: + # A callback that will be executed for checkout progress notifications. + # Up to 3 parameters are passed on each execution: + # + # - The path to the last updated file (or +nil+ on the very first + # invocation). + # - The number of completed checkout steps. + # - The number of total checkout steps to be performed. + # + # :notify :: + # A callback that will be executed for each checkout notification + # types specified with +:notify_flags+. Up to 5 parameters are passed + # on each execution: + # + # - An array containing the +:notify_flags+ that caused the callback + # execution. + # - The path of the current file. + # - A hash describing the baseline blob (or +nil+ if it does not + # exist). + # - A hash describing the target blob (or +nil+ if it does not exist). + # - A hash describing the workdir blob (or +nil+ if it does not + # exist). + # + # :strategy :: + # A single symbol or an array of symbols representing the strategies + # to use when performing the checkout. Possible values are: + # + # :none :: + # Perform a dry run (default). + # + # :safe :: + # Allow safe updates that cannot overwrite uncommitted data. + # + # :safe_create :: + # Allow safe updates plus creation of missing files. + # + # :force :: + # Allow all updates to force working directory to look like index. + # + # :allow_conflicts :: + # Allow checkout to make safe updates even if conflicts are found. + # + # :remove_untracked :: + # Remove untracked files not in index (that are not ignored). + # + # :remove_ignored :: + # Remove ignored files not in index. + # + # :update_only :: + # Only update existing files, don't create new ones. + # + # :dont_update_index :: + # Normally checkout updates index entries as it goes; this stops + # that. + # + # :no_refresh :: + # Don't refresh index/config/etc before doing checkout. + # + # :disable_pathspec_match :: + # Treat pathspec as simple list of exact match file paths. + # + # :skip_locked_directories :: + # Ignore directories in use, they will be left empty. + # + # :skip_unmerged :: + # Allow checkout to skip unmerged files (NOT IMPLEMENTED). + # + # :use_ours :: + # For unmerged files, checkout stage 2 from index (NOT IMPLEMENTED). + # + # :use_theirs :: + # For unmerged files, checkout stage 3 from index (NOT IMPLEMENTED). + # + # :update_submodules :: + # Recursively checkout submodules with same options (NOT + # IMPLEMENTED). + # + # :update_submodules_if_changed :: + # Recursively checkout submodules if HEAD moved in super repo (NOT + # IMPLEMENTED). + # + # :disable_filters :: + # If +true+, filters like CRLF line conversion will be disabled. + # + # :dir_mode :: + # Mode for newly created directories. Default: +0755+. + # + # :file_mode :: + # Mode for newly created files. Default: +0755+ or +0644+. + # + # :file_open_flags :: + # Mode for opening files. Default: + # <code>IO::CREAT | IO::TRUNC | IO::WRONLY</code>. + # + # :notify_flags :: + # A single symbol or an array of symbols representing the cases in + # which the +:notify+ callback should be invoked. Possible values are: + # + # :none :: + # Do not invoke the +:notify+ callback (default). + # + # :conflict :: + # Invoke the callback for conflicting paths. + # + # :dirty :: + # Invoke the callback for "dirty" files, i.e. those that do not need + # an update but no longer match the baseline. + # + # :updated :: + # Invoke the callback for any file that was changed. + # + # :untracked :: + # Invoke the callback for untracked files. + # + # :ignored :: + # Invoke the callback for ignored files. + # + # :all :: + # Invoke the callback for all these cases. + # + # :paths :: + # A glob string or an array of glob strings specifying which paths + # should be taken into account for the checkout operation. +nil+ will + # match all files. Default: +nil+. + # + # :baseline :: + # A Rugged::Tree that represents the current, expected contents of the + # workdir. Default: +HEAD+. + # + # :target_directory :: + # A path to an alternative workdir directory in which the checkout + # should be performed. + def checkout(ref, options = {}, start_point = "HEAD") + if options[:b] + rugged.branches.create(ref, start_point) + options.delete(:b) + end + default_options = { strategy: [:recreate_missing, :safe] } + rugged.checkout(ref, default_options.merge(options)) + end + + # Delete the specified branch from the repository + def delete_branch(branch_name) + rugged.branches.delete(branch_name) + end + + # Create a new branch named **ref+ based on **stat_point+, HEAD by default + # + # Examples: + # create_branch("feature") + # create_branch("other-feature", "master") + def create_branch(ref, start_point = "HEAD") + rugged_ref = rugged.branches.create(ref, start_point) + Branch.new(self, rugged_ref.name, rugged_ref.target) + rescue Rugged::ReferenceError => e + raise InvalidRef.new("Branch #{ref} already exists") if e.to_s =~ /'refs\/heads\/#{ref}'/ + raise InvalidRef.new("Invalid reference #{start_point}") + end + + # Return an array of this repository's remote names + def remote_names + rugged.remotes.each_name.to_a + end + + # Delete the specified remote from this repository. + def remote_delete(remote_name) + rugged.remotes.delete(remote_name) + end + + # Add a new remote to this repository. Returns a Rugged::Remote object + def remote_add(remote_name, url) + rugged.remotes.create(remote_name, url) + end + + # Update the specified remote using the values in the +options+ hash + # + # Example + # repo.update_remote("origin", url: "path/to/repo") + def remote_update(remote_name, options = {}) + # TODO: Implement other remote options + rugged.remotes.set_url(remote_name, options[:url]) if options[:url] + end + + # Fetch the specified remote + def fetch(remote_name) + rugged.remotes[remote_name].fetch + end + + # Push +*refspecs+ to the remote identified by +remote_name+. + def push(remote_name, *refspecs) + rugged.remotes[remote_name].push(refspecs) + end + + # Merge the +source_name+ branch into the +target_name+ branch. This is + # equivalent to `git merge --no_ff +source_name+`, since a merge commit + # is always created. + def merge(source_name, target_name, options = {}) + our_commit = rugged.branches[target_name].target + their_commit = rugged.branches[source_name].target + + raise "Invalid merge target" if our_commit.nil? + raise "Invalid merge source" if their_commit.nil? + + merge_index = rugged.merge_commits(our_commit, their_commit) + return false if merge_index.conflicts? + + actual_options = options.merge( + parents: [our_commit, their_commit], + tree: merge_index.write_tree(rugged), + update_ref: "refs/heads/#{target_name}" + ) + Rugged::Commit.create(rugged, actual_options) + end + + def commits_since(from_date) + walker = Rugged::Walker.new(rugged) + walker.sorting(Rugged::SORT_DATE | Rugged::SORT_REVERSE) + + rugged.references.each("refs/heads/*") do |ref| + walker.push(ref.target_id) + end + + commits = [] + walker.each do |commit| + break if commit.author[:time].to_date < from_date + commits.push(commit) + end + + commits + end + + AUTOCRLF_VALUES = { + "true" => true, + "false" => false, + "input" => :input + }.freeze + + def autocrlf + AUTOCRLF_VALUES[rugged.config['core.autocrlf']] + end + + def autocrlf=(value) + rugged.config['core.autocrlf'] = AUTOCRLF_VALUES.invert[value] + end + + # Create a new directory with a .gitkeep file. Creates + # all required nested directories (i.e. mkdir -p behavior) + # + # options should contain next structure: + # author: { + # email: 'user@example.com', + # name: 'Test User', + # time: Time.now + # }, + # committer: { + # email: 'user@example.com', + # name: 'Test User', + # time: Time.now + # }, + # commit: { + # message: 'Wow such commit', + # branch: 'master', + # update_ref: false + # } + def mkdir(path, options = {}) + # Check if this directory exists; if it does, then don't bother + # adding .gitkeep file. + ref = options[:commit][:branch] + path = PathHelper.normalize_path(path).to_s + rugged_ref = rugged.ref(ref) + + raise InvalidRef.new("Invalid ref") if rugged_ref.nil? + + target_commit = rugged_ref.target + + raise InvalidRef.new("Invalid target commit") if target_commit.nil? + + entry = tree_entry(target_commit, path) + + if entry + if entry[:type] == :blob + raise InvalidBlobName.new("Directory already exists as a file") + else + raise InvalidBlobName.new("Directory already exists") + end + end + + options[:file] = { + content: '', + path: "#{path}/.gitkeep", + update: true + } + + Blob.commit(self, options) + end + + # Returns result like "git ls-files" , recursive and full file path + # + # Ex. + # repo.ls_files('master') + # + def ls_files(ref) + actual_ref = ref || root_ref + + begin + sha_from_ref(actual_ref) + rescue Rugged::OdbError, Rugged::InvalidError, Rugged::ReferenceError + # Return an empty array if the ref wasn't found + return [] + end + + cmd = %W(git --git-dir=#{path} ls-tree) + cmd += %w(-r) + cmd += %w(--full-tree) + cmd += %w(--full-name) + cmd += %W(-- #{actual_ref}) + + raw_output = IO.popen(cmd, &:read).split("\n").map do |f| + stuff, path = f.split("\t") + _mode, type, _sha = stuff.split(" ") + path if type == "blob" + # Contain only blob type + end + + raw_output.compact + end + + def copy_gitattributes(ref) + begin + commit = lookup(ref) + rescue Rugged::ReferenceError + raise InvalidRef.new("Ref #{ref} is invalid") + end + + # Create the paths + info_dir_path = File.join(path, 'info') + info_attributes_path = File.join(info_dir_path, 'attributes') + + begin + # Retrieve the contents of the blob + gitattributes_content = blob_content(commit, '.gitattributes') + rescue InvalidBlobName + # No .gitattributes found. Should now remove any info/attributes and return + File.delete(info_attributes_path) if File.exist?(info_attributes_path) + return + end + + # Create the info directory if needed + Dir.mkdir(info_dir_path) unless File.directory?(info_dir_path) + + # Write the contents of the .gitattributes file to info/attributes + # Use binary mode to prevent Rails from converting ASCII-8BIT to UTF-8 + File.open(info_attributes_path, "wb") do |file| + file.write(gitattributes_content) + end + end + + # Checks if the blob should be diffable according to its attributes + def diffable?(blob) + attributes(blob.path).fetch('diff') { blob.text? } + end + + # Returns the Git attributes for the given file path. + # + # See `Gitlab::Git::Attributes` for more information. + def attributes(path) + @attributes.attributes(path) + end + + private + + # Get the content of a blob for a given commit. If the blob is a commit + # (for submodules) then return the blob's OID. + def blob_content(commit, blob_name) + blob_entry = tree_entry(commit, blob_name) + + unless blob_entry + raise InvalidBlobName.new("Invalid blob name: #{blob_name}") + end + + case blob_entry[:type] + when :commit + blob_entry[:oid] + when :tree + raise InvalidBlobName.new("#{blob_name} is a tree, not a blob") + when :blob + rugged.lookup(blob_entry[:oid]).content + end + end + + # Parses the contents of a .gitmodules file and returns a hash of + # submodule information. + def parse_gitmodules(commit, content) + results = {} + + current = "" + content.split("\n").each do |txt| + if txt =~ /^\s*\[/ + current = txt.match(/(?<=").*(?=")/)[0] + results[current] = {} + else + next unless results[current] + match_data = txt.match(/(\w+)\s*=\s*(.*)/) + next unless match_data + target = match_data[2].chomp + results[current][match_data[1]] = target + + if match_data[1] == "path" + begin + results[current]["id"] = blob_content(commit, target) + rescue InvalidBlobName + results.delete(current) + end + end + end + end + + results + end + + # Returns true if +commit+ introduced changes to +path+, using commit + # trees to make that determination. Uses the history simplification + # rules that `git log` uses by default, where a commit is omitted if it + # is TREESAME to any parent. + # + # If the +follow+ option is true and the file specified by +path+ was + # renamed, then the path value is set to the old path. + def commit_touches_path?(commit, path, follow, walker) + entry = tree_entry(commit, path) + + if commit.parents.empty? + # This is the root commit, return true if it has +path+ in its tree + return !entry.nil? + end + + num_treesame = 0 + commit.parents.each do |parent| + parent_entry = tree_entry(parent, path) + + # Only follow the first TREESAME parent for merge commits + if num_treesame > 0 + walker.hide(parent) + next + end + + if entry.nil? && parent_entry.nil? + num_treesame += 1 + elsif entry && parent_entry && entry[:oid] == parent_entry[:oid] + num_treesame += 1 + end + end + + case num_treesame + when 0 + detect_rename(commit, commit.parents.first, path) if follow + true + else false + end + end + + # Find the entry for +path+ in the tree for +commit+ + def tree_entry(commit, path) + pathname = Pathname.new(path) + first = true + tmp_entry = nil + + pathname.each_filename do |dir| + if first + tmp_entry = commit.tree[dir] + first = false + elsif tmp_entry.nil? + return nil + else + tmp_entry = rugged.lookup(tmp_entry[:oid]) + return nil unless tmp_entry.type == :tree + tmp_entry = tmp_entry[dir] + end + end + + tmp_entry + end + + # Compare +commit+ and +parent+ for +path+. If +path+ is a file and was + # renamed in +commit+, then set +path+ to the old filename. + def detect_rename(commit, parent, path) + diff = parent.diff(commit, paths: [path], disable_pathspec_match: true) + + # If +path+ is a filename, not a directory, then we should only have + # one delta. We don't need to follow renames for directories. + return nil if diff.each_delta.count > 1 + + delta = diff.each_delta.first + if delta.added? + full_diff = parent.diff(commit) + full_diff.find_similar! + + full_diff.each_delta do |full_delta| + if full_delta.renamed? && path == full_delta.new_file[:path] + # Look for the old path in ancestors + path.replace(full_delta.old_file[:path]) + end + end + end + end + + def archive_to_file(treeish = 'master', filename = 'archive.tar.gz', format = nil, compress_cmd = %w(gzip -n)) + git_archive_cmd = %W(git --git-dir=#{path} archive) + + # Put files into a directory before archiving + prefix = "#{archive_name(treeish)}/" + git_archive_cmd << "--prefix=#{prefix}" + + # Format defaults to tar + git_archive_cmd << "--format=#{format}" if format + + git_archive_cmd += %W(-- #{treeish}) + + open(filename, 'w') do |file| + # Create a pipe to act as the '|' in 'git archive ... | gzip' + pipe_rd, pipe_wr = IO.pipe + + # Get the compression process ready to accept data from the read end + # of the pipe + compress_pid = spawn(*nice(compress_cmd), in: pipe_rd, out: file) + # The read end belongs to the compression process now; we should + # close our file descriptor for it. + pipe_rd.close + + # Start 'git archive' and tell it to write into the write end of the + # pipe. + git_archive_pid = spawn(*nice(git_archive_cmd), out: pipe_wr) + # The write end belongs to 'git archive' now; close it. + pipe_wr.close + + # When 'git archive' and the compression process are finished, we are + # done. + Process.waitpid(git_archive_pid) + raise "#{git_archive_cmd.join(' ')} failed" unless $?.success? + Process.waitpid(compress_pid) + raise "#{compress_cmd.join(' ')} failed" unless $?.success? + end + end + + def nice(cmd) + nice_cmd = %w(nice -n 20) + unless unsupported_platform? + nice_cmd += %w(ionice -c 2 -n 7) + end + nice_cmd + cmd + end + + def unsupported_platform? + %w[darwin freebsd solaris].map { |platform| RUBY_PLATFORM.include?(platform) }.any? + end + + # Returns true if the index entry has the special file mode that denotes + # a submodule. + def submodule?(index_entry) + index_entry[:mode] == 57344 + end + + # Return a Rugged::Index that has read from the tree at +ref_name+ + def populated_index(ref_name) + commit = rev_parse_target(ref_name) + index = rugged.index + index.read_tree(commit.tree) + index + end + + # Return an array of BlobSnippets for lines in +file_contents+ that match + # +query+ + def build_greps(file_contents, query, ref, filename) + # The file_contents string is potentially huge so we make sure to loop + # through it one line at a time. This gives Ruby the chance to GC lines + # we are not interested in. + # + # We need to do a little extra work because we are not looking for just + # the lines that matches the query, but also for the context + # (surrounding lines). We will use Enumerable#each_cons to efficiently + # loop through the lines while keeping surrounding lines on hand. + # + # First, we turn "foo\nbar\nbaz" into + # [ + # [nil, -3], [nil, -2], [nil, -1], + # ['foo', 0], ['bar', 1], ['baz', 3], + # [nil, 4], [nil, 5], [nil, 6] + # ] + lines_with_index = Enumerator.new do |yielder| + # Yield fake 'before' lines for the first line of file_contents + (-SEARCH_CONTEXT_LINES..-1).each do |i| + yielder.yield [nil, i] + end + + # Yield the actual file contents + count = 0 + file_contents.each_line do |line| + line.chomp! + yielder.yield [line, count] + count += 1 + end + + # Yield fake 'after' lines for the last line of file_contents + (count + 1..count + SEARCH_CONTEXT_LINES).each do |i| + yielder.yield [nil, i] + end + end + + greps = [] + + # Loop through consecutive blocks of lines with indexes + lines_with_index.each_cons(2 * SEARCH_CONTEXT_LINES + 1) do |line_block| + # Get the 'middle' line and index from the block + line, _ = line_block[SEARCH_CONTEXT_LINES] + + next unless line && line.match(/#{Regexp.escape(query)}/i) + + # Yay, 'line' contains a match! + # Get an array with just the context lines (no indexes) + match_with_context = line_block.map(&:first) + # Remove 'nil' lines in case we are close to the first or last line + match_with_context.compact! + + # Get the line number (1-indexed) of the first context line + first_context_line_number = line_block[0][1] + 1 + + greps << Gitlab::Git::BlobSnippet.new( + ref, + match_with_context, + first_context_line_number, + filename + ) + end + + greps + end + + # Return the Rugged patches for the diff between +from+ and +to+. + def diff_patches(from, to, options = {}, *paths) + options ||= {} + break_rewrites = options[:break_rewrites] + actual_options = Diff.filter_diff_options(options.merge(paths: paths)) + + diff = rugged.diff(from, to, actual_options) + diff.find_similar!(break_rewrites: break_rewrites) + diff.each_patch + end + end + end +end diff --git a/lib/gitlab/git/tag.rb b/lib/gitlab/git/tag.rb new file mode 100644 index 00000000000..b5342c3d310 --- /dev/null +++ b/lib/gitlab/git/tag.rb @@ -0,0 +1,17 @@ +module Gitlab + module Git + class Tag < Ref + attr_reader :object_sha + + def initialize(repository, name, target, message = nil) + super(repository, name, target) + + @message = message + end + + def message + encode! @message + end + end + end +end diff --git a/lib/gitlab/git/tree.rb b/lib/gitlab/git/tree.rb new file mode 100644 index 00000000000..76e3f112e2c --- /dev/null +++ b/lib/gitlab/git/tree.rb @@ -0,0 +1,104 @@ +module Gitlab + module Git + class Tree + include Gitlab::Git::EncodingHelper + + attr_accessor :id, :root_id, :name, :path, :type, + :mode, :commit_id, :submodule_url + + class << self + # Get list of tree objects + # for repository based on commit sha and path + # Uses rugged for raw objects + def where(repository, sha, path = nil) + path = nil if path == '' || path == '/' + + commit = repository.lookup(sha) + root_tree = commit.tree + + tree = if path + id = Tree.find_id_by_path(repository, root_tree.oid, path) + if id + repository.lookup(id) + else + [] + end + else + root_tree + end + + tree.map do |entry| + Tree.new( + id: entry[:oid], + root_id: root_tree.oid, + name: entry[:name], + type: entry[:type], + mode: entry[:filemode], + path: path ? File.join(path, entry[:name]) : entry[:name], + commit_id: sha, + ) + end + end + + # Recursive search of tree id for path + # + # Ex. + # blog/ # oid: 1a + # app/ # oid: 2a + # models/ # oid: 3a + # views/ # oid: 4a + # + # + # Tree.find_id_by_path(repo, '1a', 'app/models') # => '3a' + # + def find_id_by_path(repository, root_id, path) + root_tree = repository.lookup(root_id) + path_arr = path.split('/') + + entry = root_tree.find do |entry| + entry[:name] == path_arr[0] && entry[:type] == :tree + end + + return nil unless entry + + if path_arr.size > 1 + path_arr.shift + find_id_by_path(repository, entry[:oid], path_arr.join('/')) + else + entry[:oid] + end + end + end + + def initialize(options) + %w(id root_id name path type mode commit_id).each do |key| + self.send("#{key}=", options[key.to_sym]) + end + end + + def name + encode! @name + end + + def dir? + type == :tree + end + + def file? + type == :blob + end + + def submodule? + type == :commit + end + + def readme? + name =~ /^readme/i + end + + def contributing? + name =~ /^contributing/i + end + end + end +end diff --git a/lib/gitlab/git/util.rb b/lib/gitlab/git/util.rb new file mode 100644 index 00000000000..7973da2e8f8 --- /dev/null +++ b/lib/gitlab/git/util.rb @@ -0,0 +1,18 @@ +module Gitlab + module Git + module Util + LINE_SEP = "\n".freeze + + def self.count_lines(string) + case string[-1] + when nil + 0 + when LINE_SEP + string.count(LINE_SEP) + else + string.count(LINE_SEP) + 1 + end + end + end + end +end |