From b30c16aa3298221b1957fef61e69c47be74bb25e Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 17 Apr 2017 20:13:08 -0400 Subject: repository: index submodules by path Submodules have a name in the configuration, but this name is simply the path at which the submodule was initially checked in (by default -- the name is totally arbitrary). If a submodule is moved, it retains its original name, but its path changes. Since we discover submodules inside trees, we have their path but not necessarily their name. Make the submodules() function return the submodule hash indexed by path rather than name, so that renamed submodules can be looked up. Signed-off-by: David Turner --- lib/gitlab/git/gitmodules_parser.rb | 77 +++++++++++++++++++++++++++++++++++++ lib/gitlab/git/repository.rb | 52 ++++++++----------------- 2 files changed, 92 insertions(+), 37 deletions(-) create mode 100644 lib/gitlab/git/gitmodules_parser.rb (limited to 'lib/gitlab/git') diff --git a/lib/gitlab/git/gitmodules_parser.rb b/lib/gitlab/git/gitmodules_parser.rb new file mode 100644 index 00000000000..f4e3b5e5129 --- /dev/null +++ b/lib/gitlab/git/gitmodules_parser.rb @@ -0,0 +1,77 @@ +module Gitlab + module Git + class GitmodulesParser + def initialize(content) + @content = content + end + + # Parses the contents of a .gitmodules file and returns a hash of + # submodule information, indexed by path. + def parse + reindex_by_path(get_submodules_by_name) + end + + private + + class State + def initialize + @result = {} + @current_submodule = nil + end + + def start_section(section) + # In some .gitmodules files (e.g. nodegit's), a header + # with the same name appears multiple times; we want to + # accumulate the configs across these + @current_submodule = @result[section] || { 'name' => section } + @result[section] = @current_submodule + end + + def set_attribute(attr, value) + @current_submodule[attr] = value + end + + def section_started? + !@current_submodule.nil? + end + + def submodules_by_name + @result + end + end + + def get_submodules_by_name + iterator = State.new + + @content.split("\n").each_with_object(iterator) do |text, iterator| + next if text =~ /^\s*#/ + + if text =~ /\A\[submodule "(?[^"]+)"\]\z/ + iterator.start_section($~[:name]) + else + next unless iterator.section_started? + + next unless text =~ /\A\s*(?\w+)\s*=\s*(?.*)\z/ + + value = $~[:value].chomp + iterator.set_attribute($~[:key], value) + end + end + + iterator.submodules_by_name + end + + def reindex_by_path(submodules_by_name) + # Convert from an indexed by name to an array indexed by path + # If a submodule doesn't have a path, it is considered bogus + # and is ignored + submodules_by_name.each_with_object({}) do |(name, data), results| + path = data.delete 'path' + next unless path + + results[path] = data + end + end + end + end +end diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index 85695d0a4df..c1f942f931a 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -617,9 +617,9 @@ module Gitlab # # Ex. # { - # "rack" => { + # "current_path/rack" => { + # "name" => "original_path/rack", # "id" => "c67be4624545b4263184c4a0e8f887efd0a66320", - # "path" => "rack", # "url" => "git://github.com/chneukirchen/rack.git" # }, # "encoding" => { @@ -637,7 +637,8 @@ module Gitlab return {} end - parse_gitmodules(commit, content) + parser = GitmodulesParser.new(content) + fill_submodule_ids(commit, parser.parse) end # Return total commits count accessible from passed ref @@ -998,42 +999,19 @@ module Gitlab end end - # Parses the contents of a .gitmodules file and returns a hash of - # submodule information. - def parse_gitmodules(commit, content) - modules = {} - - name = nil - content.each_line do |line| - case line.strip - when /\A\[submodule "(?[^"]+)"\]\z/ # Submodule header - name = $~[:name] - modules[name] = {} - when /\A(?\w+)\s*=\s*(?.*)\z/ # Key/value pair - key = $~[:key] - value = $~[:value].chomp - - next unless name && modules[name] - - modules[name][key] = value - - if key == 'path' - begin - modules[name]['id'] = blob_content(commit, value) - rescue InvalidBlobName - # The current entry is invalid - modules.delete(name) - name = nil - end - end - when /\A#/ # Comment - next - else # Invalid line - name = nil + # Fill in the 'id' field of a submodule hash from its values + # as-of +commit+. Return a Hash consisting only of entries + # from the submodule hash for which the 'id' field is filled. + def fill_submodule_ids(commit, submodule_data) + submodule_data.each do |path, data| + id = begin + blob_content(commit, path) + rescue InvalidBlobName + nil end + data['id'] = id end - - modules + submodule_data.select { |path, data| data['id'] } end # Returns true if +commit+ introduced changes to +path+, using commit -- cgit v1.2.1