5 files changed, 145 insertions, 48 deletions
diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb
index 33a7624e303..a7aceab4c14 100644
--- a/lib/gitlab/git/blob.rb
+++ b/lib/gitlab/git/blob.rb
@@ -14,6 +14,51 @@ module Gitlab
 
       class << self
         def find(repository, sha, path)
+          Gitlab::GitalyClient.migrate(:project_raw_show) do |is_enabled|
+            if is_enabled
+              find_by_gitaly(repository, sha, path)
+            else
+              find_by_rugged(repository, sha, path)
+            end
+          end
+        end
+
+        def find_by_gitaly(repository, sha, path)
+          path = path.sub(/\A\/*/, '')
+          path = '/' if path.empty?
+          name = File.basename(path)
+          entry = Gitlab::GitalyClient::Commit.new(repository).tree_entry(sha, path, MAX_DATA_DISPLAY_SIZE)
+          return unless entry
+
+          case entry.type
+          when :COMMIT
+            new(
+              id: entry.oid,
+              name: name,
+              size: 0,
+              data: '',
+              path: path,
+              commit_id: sha
+            )
+          when :BLOB
+            # EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks
+            # only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15),
+            # which is what we use below to keep a consistent behavior.
+            detect = CharlockHolmes::EncodingDetector.new(8000).detect(entry.data)
+            new(
+              id: entry.oid,
+              name: name,
+              size: entry.size,
+              data: entry.data.dup,
+              mode: entry.mode.to_s(8),
+              path: path,
+              commit_id: sha,
+              binary: detect && detect[:type] == :binary
+            )
+          end
+        end
+
+        def find_by_rugged(repository, sha, path)
           commit = repository.lookup(sha)
           root_tree = commit.tree
 
diff --git a/lib/gitlab/git/commit.rb b/lib/gitlab/git/commit.rb
index bb04731f08c..d5d149f1423 100644
--- a/lib/gitlab/git/commit.rb
+++ b/lib/gitlab/git/commit.rb
@@ -4,7 +4,7 @@ module Gitlab
     class Commit
       include Gitlab::EncodingHelper
 
-      attr_accessor :raw_commit, :head, :refs
+      attr_accessor :raw_commit, :head
 
       SERIALIZE_KEYS = [
         :id, :message, :parent_ids,
diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb
index 4b689f0e94f..f825568f194 100644
--- a/lib/gitlab/git/diff.rb
+++ b/lib/gitlab/git/diff.rb
@@ -16,11 +16,11 @@ module Gitlab
       alias_method :renamed_file?, :renamed_file
 
       attr_accessor :expanded
+      attr_writer :too_large
 
       alias_method :expanded?, :expanded
 
-      # We need this accessor because of `to_hash` and `init_from_hash`
-      attr_accessor :too_large
+      SERIALIZE_KEYS = %i(diff new_path old_path a_mode b_mode new_file renamed_file deleted_file too_large).freeze
 
       class << self
         # The maximum size of a diff to display.
@@ -231,16 +231,10 @@ module Gitlab
         end
       end
 
-      def serialize_keys
-        @serialize_keys ||= %i(diff new_path old_path a_mode b_mode new_file renamed_file deleted_file too_large)
-      end
-
       def to_hash
         hash = {}
 
-        keys = serialize_keys
-
-        keys.each do |key|
+        SERIALIZE_KEYS.each do |key|
           hash[key] = send(key)
         end
 
@@ -267,6 +261,9 @@ module Gitlab
         end
       end
 
+      # This is used by `to_hash` and `init_from_hash`.
+      alias_method :too_large, :too_large?
+
       def too_large!
         @diff = ''
         @line_count = 0
@@ -315,7 +312,7 @@ module Gitlab
       def init_from_hash(hash)
         raw_diff = hash.symbolize_keys
 
-        serialize_keys.each do |key|
+        SERIALIZE_KEYS.each do |key|
           send(:"#{key}=", raw_diff[key.to_sym])
         end
       end
diff --git a/lib/gitlab/git/gitmodules_parser.rb b/lib/gitlab/git/gitmodules_parser.rb
new file mode 100644
index 00000000000..f4e3b5e5129
--- /dev/null
+++ b/lib/gitlab/git/gitmodules_parser.rb
@@ -0,0 +1,77 @@
+module Gitlab
+  module Git
+    class GitmodulesParser
+      def initialize(content)
+        @content = content
+      end
+
+      # Parses the contents of a .gitmodules file and returns a hash of
+      # submodule information, indexed by path.
+      def parse
+        reindex_by_path(get_submodules_by_name)
+      end
+
+      private
+
+      class State
+        def initialize
+          @result = {}
+          @current_submodule = nil
+        end
+
+        def start_section(section)
+          # In some .gitmodules files (e.g. nodegit's), a header
+          # with the same name appears multiple times; we want to
+          # accumulate the configs across these
+          @current_submodule = @result[section] || { 'name' => section }
+          @result[section] = @current_submodule
+        end
+
+        def set_attribute(attr, value)
+          @current_submodule[attr] = value
+        end
+
+        def section_started?
+          !@current_submodule.nil?
+        end
+
+        def submodules_by_name
+          @result
+        end
+      end
+
+      def get_submodules_by_name
+        iterator = State.new
+
+        @content.split("\n").each_with_object(iterator) do |text, iterator|
+          next if text =~ /^\s*#/
+
+          if text =~ /\A\[submodule "(?<name>[^"]+)"\]\z/
+            iterator.start_section($~[:name])
+          else
+            next unless iterator.section_started?
+
+            next unless text =~ /\A\s*(?<key>\w+)\s*=\s*(?<value>.*)\z/
+
+            value = $~[:value].chomp
+            iterator.set_attribute($~[:key], value)
+          end
+        end
+
+        iterator.submodules_by_name
+      end
+
+      def reindex_by_path(submodules_by_name)
+        # Convert from an indexed by name to an array indexed by path
+        # If a submodule doesn't have a path, it is considered bogus
+        # and is ignored
+        submodules_by_name.each_with_object({}) do |(name, data), results|
+          path = data.delete 'path'
+          next unless path
+
+          results[path] = data
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb
index 85695d0a4df..c1f942f931a 100644
--- a/lib/gitlab/git/repository.rb
+++ b/lib/gitlab/git/repository.rb
@@ -617,9 +617,9 @@ module Gitlab
       #
       # Ex.
       #   {
-      #     "rack"  => {
+      #     "current_path/rack"  => {
+      #       "name" => "original_path/rack",
       #       "id" => "c67be4624545b4263184c4a0e8f887efd0a66320",
-      #       "path" => "rack",
       #       "url" => "git://github.com/chneukirchen/rack.git"
       #     },
       #     "encoding" => {
@@ -637,7 +637,8 @@ module Gitlab
           return {}
         end
 
-        parse_gitmodules(commit, content)
+        parser = GitmodulesParser.new(content)
+        fill_submodule_ids(commit, parser.parse)
       end
 
       # Return total commits count accessible from passed ref
@@ -998,42 +999,19 @@ module Gitlab
         end
       end
 
-      # Parses the contents of a .gitmodules file and returns a hash of
-      # submodule information.
-      def parse_gitmodules(commit, content)
-        modules = {}
-
-        name = nil
-        content.each_line do |line|
-          case line.strip
-          when /\A\[submodule "(?<name>[^"]+)"\]\z/ # Submodule header
-            name = $~[:name]
-            modules[name] = {}
-          when /\A(?<key>\w+)\s*=\s*(?<value>.*)\z/ # Key/value pair
-            key = $~[:key]
-            value = $~[:value].chomp
-
-            next unless name && modules[name]
-
-            modules[name][key] = value
-
-            if key == 'path'
-              begin
-                modules[name]['id'] = blob_content(commit, value)
-              rescue InvalidBlobName
-                # The current entry is invalid
-                modules.delete(name)
-                name = nil
-              end
-            end
-          when /\A#/ # Comment
-            next
-          else # Invalid line
-            name = nil
+      # Fill in the 'id' field of a submodule hash from its values
+      # as-of +commit+. Return a Hash consisting only of entries
+      # from the submodule hash for which the 'id' field is filled.
+      def fill_submodule_ids(commit, submodule_data)
+        submodule_data.each do |path, data|
+          id = begin
+            blob_content(commit, path)
+          rescue InvalidBlobName
+            nil
           end
+          data['id'] = id
         end
-
-        modules
+        submodule_data.select { |path, data| data['id'] }
       end
 
       # Returns true if +commit+ introduced changes to +path+, using commit