summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Edwards-Jones <jedwardsjones@gitlab.com>2018-01-11 23:12:34 +0000
committerJames Edwards-Jones <jedwardsjones@gitlab.com>2018-01-12 18:15:14 +0000
commitb0811aaa995782fa7874e3782f3667d4d72fa50d (patch)
tree68cbf781252bd8e24ab79327375f751b035e5d41
parentbe623ef3c1a867d23e9625fe372c17fcad3c47ce (diff)
downloadgitlab-ce-b0811aaa995782fa7874e3782f3667d4d72fa50d.tar.gz
Can parse root .gitattributes file for a ref
-rw-r--r--config/initializers/rugged_use_gitlab_git_attributes.rb4
-rw-r--r--lib/gitlab/git/attributes_at_ref_parser.rb14
-rw-r--r--lib/gitlab/git/attributes_info_parser.rb45
-rw-r--r--lib/gitlab/git/attributes_parser.rb (renamed from lib/gitlab/git/attributes.rb)48
-rw-r--r--lib/gitlab/git/repository.rb14
-rw-r--r--spec/lib/gitlab/git/attributes_at_ref_parser_spec.rb28
-rw-r--r--spec/lib/gitlab/git/attributes_info_parser_spec.rb54
-rw-r--r--spec/lib/gitlab/git/attributes_parser_spec.rb (renamed from spec/lib/gitlab/git/attributes_spec.rb)41
-rw-r--r--spec/support/test_env.rb2
9 files changed, 186 insertions, 64 deletions
diff --git a/config/initializers/rugged_use_gitlab_git_attributes.rb b/config/initializers/rugged_use_gitlab_git_attributes.rb
index 1cfb3bcb4bd..0b827ad8380 100644
--- a/config/initializers/rugged_use_gitlab_git_attributes.rb
+++ b/config/initializers/rugged_use_gitlab_git_attributes.rb
@@ -7,7 +7,7 @@
# repository-wide language statistics:
# <https://github.com/github/linguist/blob/v4.7.0/lib/linguist/lazy_blob.rb#L33-L36>
#
-# The options passed by Linguist are those assumed by Gitlab::Git::Attributes
+# The options passed by Linguist are those assumed by Gitlab::Git::AttributesInfoParser
# anyway, and there is no great efficiency gain from just fetching the listed
# attributes with our implementation, so we ignore the additional arguments.
#
@@ -19,7 +19,7 @@ module Rugged
end
def attributes
- @attributes ||= Gitlab::Git::Attributes.new(path)
+ @attributes ||= Gitlab::Git::AttributesInfoParser.new(path)
end
end
diff --git a/lib/gitlab/git/attributes_at_ref_parser.rb b/lib/gitlab/git/attributes_at_ref_parser.rb
new file mode 100644
index 00000000000..26b5bd520d5
--- /dev/null
+++ b/lib/gitlab/git/attributes_at_ref_parser.rb
@@ -0,0 +1,14 @@
+module Gitlab
+ module Git
+ # Parses root .gitattributes file at a given ref
+ class AttributesAtRefParser
+ delegate :attributes, to: :@parser
+
+ def initialize(repository, ref)
+ blob = repository.blob_at(ref, '.gitattributes')
+
+ @parser = AttributesParser.new(blob&.data)
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/attributes_info_parser.rb b/lib/gitlab/git/attributes_info_parser.rb
new file mode 100644
index 00000000000..a9427037067
--- /dev/null
+++ b/lib/gitlab/git/attributes_info_parser.rb
@@ -0,0 +1,45 @@
+# Gitaly note: JV: not sure what to make of this class. Why does it use
+# the full disk path of the repository to look up attributes This is
+# problematic in Gitaly, because Gitaly hides the full disk path to the
+# repository from gitlab-ce.
+
+module Gitlab
+ module Git
+ # Parses gitattributes at `$GIT_DIR/info/attributes`
+ #
+ # Unlike Rugged this parser only needs a single IO call (a call to `open`),
+ # vastly reducing the time spent in extracting attributes.
+ #
+ # This class _only_ supports parsing the attributes file located at
+ # `$GIT_DIR/info/attributes` as GitLab doesn't use any other files
+ # (`.gitattributes` is copied to this particular path).
+ #
+ # Basic usage:
+ #
+ # attributes = Gitlab::Git::AttributesInfoParser.new(some_repo.path)
+ #
+ # attributes.attributes('README.md') # => { "eol" => "lf }
+ class AttributesInfoParser < AttributesParser
+ # path - The path to the Git repository.
+ def initialize(path)
+ @repo_path = File.expand_path(path)
+ @patterns = nil
+ end
+
+ # Iterates over every line in the attributes file.
+ def each_line
+ full_path = File.join(@repo_path, 'info/attributes')
+
+ return unless File.exist?(full_path)
+
+ File.open(full_path, 'r') do |handle|
+ handle.each_line do |line|
+ break unless line.valid_encoding?
+
+ yield line.strip
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/attributes.rb b/lib/gitlab/git/attributes_parser.rb
index 2d20cd473a7..ffc880e0d9f 100644
--- a/lib/gitlab/git/attributes.rb
+++ b/lib/gitlab/git/attributes_parser.rb
@@ -1,42 +1,23 @@
-# Gitaly note: JV: not sure what to make of this class. Why does it use
-# the full disk path of the repository to look up attributes This is
-# problematic in Gitaly, because Gitaly hides the full disk path to the
-# repository from gitlab-ce.
-
module Gitlab
module Git
# Class for parsing Git attribute files and extracting the attributes for
# file patterns.
- #
- # Unlike Rugged this parser only needs a single IO call (a call to `open`),
- # vastly reducing the time spent in extracting attributes.
- #
- # This class _only_ supports parsing the attributes file located at
- # `$GIT_DIR/info/attributes` as GitLab doesn't use any other files
- # (`.gitattributes` is copied to this particular path).
- #
- # Basic usage:
- #
- # attributes = Gitlab::Git::Attributes.new(some_repo.path)
- #
- # attributes.attributes('README.md') # => { "eol" => "lf }
- class Attributes
- # path - The path to the Git repository.
- def initialize(path)
- @path = File.expand_path(path)
+ class AttributesParser
+ def initialize(attributes_data)
+ @data = attributes_data || ""
@patterns = nil
end
# Returns all the Git attributes for the given path.
#
- # path - A path to a file for which to get the attributes.
+ # file_path - A path to a file for which to get the attributes.
#
# Returns a Hash.
- def attributes(path)
- full_path = File.join(@path, path)
+ def attributes(file_path)
+ absolute_path = File.join('/', file_path)
patterns.each do |pattern, attrs|
- return attrs if File.fnmatch?(pattern, full_path)
+ return attrs if File.fnmatch?(pattern, absolute_path)
end
{}
@@ -98,16 +79,10 @@ module Gitlab
# Iterates over every line in the attributes file.
def each_line
- full_path = File.join(@path, 'info/attributes')
-
- return unless File.exist?(full_path)
+ @data.each_line do |line|
+ break unless line.valid_encoding?
- File.open(full_path, 'r') do |handle|
- handle.each_line do |line|
- break unless line.valid_encoding?
-
- yield line.strip
- end
+ yield line.strip
end
end
@@ -125,7 +100,8 @@ module Gitlab
parsed = attrs ? parse_attributes(attrs) : {}
- pairs << [File.join(@path, pattern), parsed]
+ absolute_pattern = File.join('/', pattern)
+ pairs << [absolute_pattern, parsed]
end
# Newer entries take precedence over older entries.
diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb
index 283134e043e..4c9bddf8bf7 100644
--- a/lib/gitlab/git/repository.rb
+++ b/lib/gitlab/git/repository.rb
@@ -102,7 +102,7 @@ module Gitlab
)
@path = File.join(storage_path, @relative_path)
@name = @relative_path.split("/").last
- @attributes = Gitlab::Git::Attributes.new(path)
+ @attributes = Gitlab::Git::AttributesInfoParser.new(path)
end
def ==(other)
@@ -1011,6 +1011,18 @@ module Gitlab
attributes(path)[name]
end
+ # Check .gitattributes for a given ref
+ #
+ # This only checks the root .gitattributes file,
+ # it does not traverse subfolders to find additional .gitattributes files
+ #
+ # This method is around 30 times slower than `attributes`,
+ # which uses `$GIT_DIR/info/attributes`
+ def attributes_at(ref, file_path)
+ parser = AttributesAtRefParser.new(self, ref)
+ parser.attributes(file_path)
+ end
+
def languages(ref = nil)
Gitlab::GitalyClient.migrate(:commit_languages) do |is_enabled|
if is_enabled
diff --git a/spec/lib/gitlab/git/attributes_at_ref_parser_spec.rb b/spec/lib/gitlab/git/attributes_at_ref_parser_spec.rb
new file mode 100644
index 00000000000..5d22dcfb508
--- /dev/null
+++ b/spec/lib/gitlab/git/attributes_at_ref_parser_spec.rb
@@ -0,0 +1,28 @@
+require 'spec_helper'
+
+describe Gitlab::Git::AttributesAtRefParser, seed_helper: true do
+ let(:project) { create(:project, :repository) }
+ let(:repository) { project.repository }
+
+ subject { described_class.new(repository, 'lfs') }
+
+ it 'loads .gitattributes blob' do
+ repository.raw # Initialize repository in advance since this also checks attributes
+
+ expected_filter = 'filter=lfs diff=lfs merge=lfs'
+ receive_blob = receive(:new).with(a_string_including(expected_filter))
+ expect(Gitlab::Git::AttributesParser).to receive_blob.and_call_original
+
+ subject
+ end
+
+ it 'handles missing blobs' do
+ expect { described_class.new(repository, 'non-existant-branch') }.not_to raise_error
+ end
+
+ describe '#attributes' do
+ it 'returns the attributes as a Hash' do
+ expect(subject.attributes('test.lfs')['filter']).to eq('lfs')
+ end
+ end
+end
diff --git a/spec/lib/gitlab/git/attributes_info_parser_spec.rb b/spec/lib/gitlab/git/attributes_info_parser_spec.rb
new file mode 100644
index 00000000000..7e5b53eecb9
--- /dev/null
+++ b/spec/lib/gitlab/git/attributes_info_parser_spec.rb
@@ -0,0 +1,54 @@
+require 'spec_helper'
+
+describe Gitlab::Git::AttributesInfoParser, seed_helper: true do
+ let(:path) do
+ File.join(SEED_STORAGE_PATH, 'with-git-attributes.git')
+ end
+
+ subject { described_class.new(path) }
+
+ describe '#attributes' do
+ context 'using a path with attributes' do
+ it 'returns the attributes as a Hash' do
+ expect(subject.attributes('test.txt')).to eq({ 'text' => true })
+ end
+
+ it 'returns an empty Hash for a defined path without attributes' do
+ expect(subject.attributes('bla/bla.txt')).to eq({})
+ end
+ end
+ end
+
+ describe '#patterns' do
+ it 'does not parse anything when the attributes file does not exist' do
+ expect(File).to receive(:exist?)
+ .with(File.join(path, 'info/attributes'))
+ .and_return(false)
+
+ expect(subject.patterns).to eq({})
+ end
+ end
+
+ describe '#each_line' do
+ it 'iterates over every line in the attributes file' do
+ args = [String] * 14 # the number of lines in the file
+
+ expect { |b| subject.each_line(&b) }.to yield_successive_args(*args)
+ end
+
+ it 'does not yield when the attributes file does not exist' do
+ expect(File).to receive(:exist?)
+ .with(File.join(path, 'info/attributes'))
+ .and_return(false)
+
+ expect { |b| subject.each_line(&b) }.not_to yield_control
+ end
+
+ it 'does not yield when the attributes file has an unsupported encoding' do
+ path = File.join(SEED_STORAGE_PATH, 'with-invalid-git-attributes.git')
+ attrs = described_class.new(path)
+
+ expect { |b| attrs.each_line(&b) }.not_to yield_control
+ end
+ end
+end
diff --git a/spec/lib/gitlab/git/attributes_spec.rb b/spec/lib/gitlab/git/attributes_parser_spec.rb
index b715fc3410a..06c3d881834 100644
--- a/spec/lib/gitlab/git/attributes_spec.rb
+++ b/spec/lib/gitlab/git/attributes_parser_spec.rb
@@ -1,11 +1,12 @@
require 'spec_helper'
-describe Gitlab::Git::Attributes, seed_helper: true do
- let(:path) do
- File.join(SEED_STORAGE_PATH, 'with-git-attributes.git')
+describe Gitlab::Git::AttributesParser, seed_helper: true do
+ let(:data) do
+ file_path = File.join(SEED_STORAGE_PATH, 'with-git-attributes.git', 'info', 'attributes')
+ File.read(file_path)
end
- subject { described_class.new(path) }
+ subject { described_class.new(data) }
describe '#attributes' do
context 'using a path with attributes' do
@@ -66,6 +67,14 @@ describe Gitlab::Git::Attributes, seed_helper: true do
expect(subject.attributes('test.foo')).to eq({})
end
end
+
+ context 'when attributes data is nil' do
+ let(:data){ nil }
+
+ it 'returns an empty Hash' do
+ expect(subject.attributes('test.foo')).to eq({})
+ end
+ end
end
describe '#patterns' do
@@ -74,14 +83,14 @@ describe Gitlab::Git::Attributes, seed_helper: true do
end
it 'parses an entry that uses a tab to separate the pattern and attributes' do
- expect(subject.patterns[File.join(path, '*.md')])
+ expect(subject.patterns[File.join('/', '*.md')])
.to eq({ 'gitlab-language' => 'markdown' })
end
it 'stores patterns in reverse order' do
first = subject.patterns.to_a[0]
- expect(first[0]).to eq(File.join(path, 'bla/bla.txt'))
+ expect(first[0]).to eq(File.join('/', 'bla/bla.txt'))
end
# It's a bit hard to test for something _not_ being processed. As such we'll
@@ -89,14 +98,6 @@ describe Gitlab::Git::Attributes, seed_helper: true do
it 'ignores any comments and empty lines' do
expect(subject.patterns.length).to eq(10)
end
-
- it 'does not parse anything when the attributes file does not exist' do
- expect(File).to receive(:exist?)
- .with(File.join(path, 'info/attributes'))
- .and_return(false)
-
- expect(subject.patterns).to eq({})
- end
end
describe '#parse_attributes' do
@@ -132,17 +133,9 @@ describe Gitlab::Git::Attributes, seed_helper: true do
expect { |b| subject.each_line(&b) }.to yield_successive_args(*args)
end
- it 'does not yield when the attributes file does not exist' do
- expect(File).to receive(:exist?)
- .with(File.join(path, 'info/attributes'))
- .and_return(false)
-
- expect { |b| subject.each_line(&b) }.not_to yield_control
- end
-
it 'does not yield when the attributes file has an unsupported encoding' do
- path = File.join(SEED_STORAGE_PATH, 'with-invalid-git-attributes.git')
- attrs = described_class.new(path)
+ path = File.join(SEED_STORAGE_PATH, 'with-invalid-git-attributes.git', 'info', 'attributes')
+ attrs = described_class.new(File.read(path))
expect { |b| attrs.each_line(&b) }.not_to yield_control
end
diff --git a/spec/support/test_env.rb b/spec/support/test_env.rb
index 664698fcbaf..a00ef543128 100644
--- a/spec/support/test_env.rb
+++ b/spec/support/test_env.rb
@@ -20,7 +20,7 @@ module TestEnv
'improve/awesome' => '5937ac0',
'merged-target' => '21751bf',
'markdown' => '0ed8c6c',
- 'lfs' => 'be93687',
+ 'lfs' => '55bc176',
'master' => 'b83d6e3',
'merge-test' => '5937ac0',
"'test'" => 'e56497b',