summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Edwards-Jones <jedwardsjones@gitlab.com>2018-01-11 23:12:34 +0000
committerJames Edwards-Jones <jedwardsjones@gitlab.com>2018-01-24 05:15:37 +0000
commit0683d31ab162284f9f37004da7365ce2f3e6da55 (patch)
treec3b784c9577f4fcaaba8cc0b7f3e65946ec94b34
parentbe623ef3c1a867d23e9625fe372c17fcad3c47ce (diff)
downloadgitlab-ce-jej/gitattributes-check-at-ref.tar.gz
Can parse root .gitattributes file for a refjej/gitattributes-check-at-ref
-rw-r--r--config/initializers/rugged_use_gitlab_git_attributes.rb4
-rw-r--r--lib/gitlab/git/attributes_at_ref_parser.rb14
-rw-r--r--lib/gitlab/git/attributes_parser.rb (renamed from lib/gitlab/git/attributes.rb)53
-rw-r--r--lib/gitlab/git/info_attributes.rb49
-rw-r--r--lib/gitlab/git/repository.rb14
-rw-r--r--spec/lib/gitlab/git/attributes_at_ref_parser_spec.rb28
-rw-r--r--spec/lib/gitlab/git/attributes_parser_spec.rb (renamed from spec/lib/gitlab/git/attributes_spec.rb)53
-rw-r--r--spec/lib/gitlab/git/info_attributes_spec.rb43
-rw-r--r--spec/support/test_env.rb2
9 files changed, 194 insertions, 66 deletions
diff --git a/config/initializers/rugged_use_gitlab_git_attributes.rb b/config/initializers/rugged_use_gitlab_git_attributes.rb
index 1cfb3bcb4bd..c0d45caec42 100644
--- a/config/initializers/rugged_use_gitlab_git_attributes.rb
+++ b/config/initializers/rugged_use_gitlab_git_attributes.rb
@@ -7,7 +7,7 @@
# repository-wide language statistics:
# <https://github.com/github/linguist/blob/v4.7.0/lib/linguist/lazy_blob.rb#L33-L36>
#
-# The options passed by Linguist are those assumed by Gitlab::Git::Attributes
+# The options passed by Linguist are those assumed by Gitlab::Git::InfoAttributes
# anyway, and there is no great efficiency gain from just fetching the listed
# attributes with our implementation, so we ignore the additional arguments.
#
@@ -19,7 +19,7 @@ module Rugged
end
def attributes
- @attributes ||= Gitlab::Git::Attributes.new(path)
+ @attributes ||= Gitlab::Git::InfoAttributes.new(path)
end
end
diff --git a/lib/gitlab/git/attributes_at_ref_parser.rb b/lib/gitlab/git/attributes_at_ref_parser.rb
new file mode 100644
index 00000000000..26b5bd520d5
--- /dev/null
+++ b/lib/gitlab/git/attributes_at_ref_parser.rb
@@ -0,0 +1,14 @@
+module Gitlab
+ module Git
+ # Parses root .gitattributes file at a given ref
+ class AttributesAtRefParser
+ delegate :attributes, to: :@parser
+
+ def initialize(repository, ref)
+ blob = repository.blob_at(ref, '.gitattributes')
+
+ @parser = AttributesParser.new(blob&.data)
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/attributes.rb b/lib/gitlab/git/attributes_parser.rb
index 2d20cd473a7..d8aeabb6cba 100644
--- a/lib/gitlab/git/attributes.rb
+++ b/lib/gitlab/git/attributes_parser.rb
@@ -1,42 +1,26 @@
-# Gitaly note: JV: not sure what to make of this class. Why does it use
-# the full disk path of the repository to look up attributes This is
-# problematic in Gitaly, because Gitaly hides the full disk path to the
-# repository from gitlab-ce.
-
module Gitlab
module Git
# Class for parsing Git attribute files and extracting the attributes for
# file patterns.
- #
- # Unlike Rugged this parser only needs a single IO call (a call to `open`),
- # vastly reducing the time spent in extracting attributes.
- #
- # This class _only_ supports parsing the attributes file located at
- # `$GIT_DIR/info/attributes` as GitLab doesn't use any other files
- # (`.gitattributes` is copied to this particular path).
- #
- # Basic usage:
- #
- # attributes = Gitlab::Git::Attributes.new(some_repo.path)
- #
- # attributes.attributes('README.md') # => { "eol" => "lf }
- class Attributes
- # path - The path to the Git repository.
- def initialize(path)
- @path = File.expand_path(path)
- @patterns = nil
+ class AttributesParser
+ def initialize(attributes_data)
+ @data = attributes_data || ""
+
+ if @data.is_a?(File)
+ @patterns = parse_file
+ end
end
# Returns all the Git attributes for the given path.
#
- # path - A path to a file for which to get the attributes.
+ # file_path - A path to a file for which to get the attributes.
#
# Returns a Hash.
- def attributes(path)
- full_path = File.join(@path, path)
+ def attributes(file_path)
+ absolute_path = File.join('/', file_path)
patterns.each do |pattern, attrs|
- return attrs if File.fnmatch?(pattern, full_path)
+ return attrs if File.fnmatch?(pattern, absolute_path)
end
{}
@@ -98,16 +82,10 @@ module Gitlab
# Iterates over every line in the attributes file.
def each_line
- full_path = File.join(@path, 'info/attributes')
+ @data.each_line do |line|
+ break unless line.valid_encoding?
- return unless File.exist?(full_path)
-
- File.open(full_path, 'r') do |handle|
- handle.each_line do |line|
- break unless line.valid_encoding?
-
- yield line.strip
- end
+ yield line.strip
end
end
@@ -125,7 +103,8 @@ module Gitlab
parsed = attrs ? parse_attributes(attrs) : {}
- pairs << [File.join(@path, pattern), parsed]
+ absolute_pattern = File.join('/', pattern)
+ pairs << [absolute_pattern, parsed]
end
# Newer entries take precedence over older entries.
diff --git a/lib/gitlab/git/info_attributes.rb b/lib/gitlab/git/info_attributes.rb
new file mode 100644
index 00000000000..e79a440950b
--- /dev/null
+++ b/lib/gitlab/git/info_attributes.rb
@@ -0,0 +1,49 @@
+# Gitaly note: JV: not sure what to make of this class. Why does it use
+# the full disk path of the repository to look up attributes This is
+# problematic in Gitaly, because Gitaly hides the full disk path to the
+# repository from gitlab-ce.
+
+module Gitlab
+ module Git
+ # Parses gitattributes at `$GIT_DIR/info/attributes`
+ #
+ # Unlike Rugged this parser only needs a single IO call (a call to `open`),
+ # vastly reducing the time spent in extracting attributes.
+ #
+ # This class _only_ supports parsing the attributes file located at
+ # `$GIT_DIR/info/attributes` as GitLab doesn't use any other files
+ # (`.gitattributes` is copied to this particular path).
+ #
+ # Basic usage:
+ #
+ # attributes = Gitlab::Git::InfoAttributes.new(some_repo.path)
+ #
+ # attributes.attributes('README.md') # => { "eol" => "lf }
+ class InfoAttributes
+ delegate :attributes, :patterns, to: :parser
+
+ # path - The path to the Git repository.
+ def initialize(path)
+ @repo_path = File.expand_path(path)
+ end
+
+ def parser
+ @parser ||= begin
+ if File.exist?(attributes_path)
+ File.open(attributes_path, 'r') do |file_handle|
+ AttributesParser.new(file_handle)
+ end
+ else
+ AttributesParser.new("")
+ end
+ end
+ end
+
+ private
+
+ def attributes_path
+ @attributes_path ||= File.join(@repo_path, 'info/attributes')
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb
index 283134e043e..736aaf0a642 100644
--- a/lib/gitlab/git/repository.rb
+++ b/lib/gitlab/git/repository.rb
@@ -102,7 +102,7 @@ module Gitlab
)
@path = File.join(storage_path, @relative_path)
@name = @relative_path.split("/").last
- @attributes = Gitlab::Git::Attributes.new(path)
+ @attributes = Gitlab::Git::InfoAttributes.new(path)
end
def ==(other)
@@ -1011,6 +1011,18 @@ module Gitlab
attributes(path)[name]
end
+ # Check .gitattributes for a given ref
+ #
+ # This only checks the root .gitattributes file,
+ # it does not traverse subfolders to find additional .gitattributes files
+ #
+ # This method is around 30 times slower than `attributes`,
+ # which uses `$GIT_DIR/info/attributes`
+ def attributes_at(ref, file_path)
+ parser = AttributesAtRefParser.new(self, ref)
+ parser.attributes(file_path)
+ end
+
def languages(ref = nil)
Gitlab::GitalyClient.migrate(:commit_languages) do |is_enabled|
if is_enabled
diff --git a/spec/lib/gitlab/git/attributes_at_ref_parser_spec.rb b/spec/lib/gitlab/git/attributes_at_ref_parser_spec.rb
new file mode 100644
index 00000000000..5d22dcfb508
--- /dev/null
+++ b/spec/lib/gitlab/git/attributes_at_ref_parser_spec.rb
@@ -0,0 +1,28 @@
+require 'spec_helper'
+
+describe Gitlab::Git::AttributesAtRefParser, seed_helper: true do
+ let(:project) { create(:project, :repository) }
+ let(:repository) { project.repository }
+
+ subject { described_class.new(repository, 'lfs') }
+
+ it 'loads .gitattributes blob' do
+ repository.raw # Initialize repository in advance since this also checks attributes
+
+ expected_filter = 'filter=lfs diff=lfs merge=lfs'
+ receive_blob = receive(:new).with(a_string_including(expected_filter))
+ expect(Gitlab::Git::AttributesParser).to receive_blob.and_call_original
+
+ subject
+ end
+
+ it 'handles missing blobs' do
+ expect { described_class.new(repository, 'non-existant-branch') }.not_to raise_error
+ end
+
+ describe '#attributes' do
+ it 'returns the attributes as a Hash' do
+ expect(subject.attributes('test.lfs')['filter']).to eq('lfs')
+ end
+ end
+end
diff --git a/spec/lib/gitlab/git/attributes_spec.rb b/spec/lib/gitlab/git/attributes_parser_spec.rb
index b715fc3410a..323334e99a5 100644
--- a/spec/lib/gitlab/git/attributes_spec.rb
+++ b/spec/lib/gitlab/git/attributes_parser_spec.rb
@@ -1,11 +1,10 @@
require 'spec_helper'
-describe Gitlab::Git::Attributes, seed_helper: true do
- let(:path) do
- File.join(SEED_STORAGE_PATH, 'with-git-attributes.git')
- end
+describe Gitlab::Git::AttributesParser, seed_helper: true do
+ let(:attributes_path) { File.join(SEED_STORAGE_PATH, 'with-git-attributes.git', 'info', 'attributes') }
+ let(:data) { File.read(attributes_path) }
- subject { described_class.new(path) }
+ subject { described_class.new(data) }
describe '#attributes' do
context 'using a path with attributes' do
@@ -66,6 +65,26 @@ describe Gitlab::Git::Attributes, seed_helper: true do
expect(subject.attributes('test.foo')).to eq({})
end
end
+
+ context 'when attributes data is a file handle' do
+ subject do
+ File.open(attributes_path, 'r') do |file_handle|
+ described_class.new(file_handle)
+ end
+ end
+
+ it 'returns the attributes as a Hash' do
+ expect(subject.attributes('test.txt')).to eq({ 'text' => true })
+ end
+ end
+
+ context 'when attributes data is nil' do
+ let(:data) { nil }
+
+ it 'returns an empty Hash' do
+ expect(subject.attributes('test.foo')).to eq({})
+ end
+ end
end
describe '#patterns' do
@@ -74,14 +93,14 @@ describe Gitlab::Git::Attributes, seed_helper: true do
end
it 'parses an entry that uses a tab to separate the pattern and attributes' do
- expect(subject.patterns[File.join(path, '*.md')])
+ expect(subject.patterns[File.join('/', '*.md')])
.to eq({ 'gitlab-language' => 'markdown' })
end
it 'stores patterns in reverse order' do
first = subject.patterns.to_a[0]
- expect(first[0]).to eq(File.join(path, 'bla/bla.txt'))
+ expect(first[0]).to eq(File.join('/', 'bla/bla.txt'))
end
# It's a bit hard to test for something _not_ being processed. As such we'll
@@ -89,14 +108,6 @@ describe Gitlab::Git::Attributes, seed_helper: true do
it 'ignores any comments and empty lines' do
expect(subject.patterns.length).to eq(10)
end
-
- it 'does not parse anything when the attributes file does not exist' do
- expect(File).to receive(:exist?)
- .with(File.join(path, 'info/attributes'))
- .and_return(false)
-
- expect(subject.patterns).to eq({})
- end
end
describe '#parse_attributes' do
@@ -132,17 +143,9 @@ describe Gitlab::Git::Attributes, seed_helper: true do
expect { |b| subject.each_line(&b) }.to yield_successive_args(*args)
end
- it 'does not yield when the attributes file does not exist' do
- expect(File).to receive(:exist?)
- .with(File.join(path, 'info/attributes'))
- .and_return(false)
-
- expect { |b| subject.each_line(&b) }.not_to yield_control
- end
-
it 'does not yield when the attributes file has an unsupported encoding' do
- path = File.join(SEED_STORAGE_PATH, 'with-invalid-git-attributes.git')
- attrs = described_class.new(path)
+ path = File.join(SEED_STORAGE_PATH, 'with-invalid-git-attributes.git', 'info', 'attributes')
+ attrs = described_class.new(File.read(path))
expect { |b| attrs.each_line(&b) }.not_to yield_control
end
diff --git a/spec/lib/gitlab/git/info_attributes_spec.rb b/spec/lib/gitlab/git/info_attributes_spec.rb
new file mode 100644
index 00000000000..ea84909c3e0
--- /dev/null
+++ b/spec/lib/gitlab/git/info_attributes_spec.rb
@@ -0,0 +1,43 @@
+require 'spec_helper'
+
+describe Gitlab::Git::InfoAttributes, seed_helper: true do
+ let(:path) do
+ File.join(SEED_STORAGE_PATH, 'with-git-attributes.git')
+ end
+
+ subject { described_class.new(path) }
+
+ describe '#attributes' do
+ context 'using a path with attributes' do
+ it 'returns the attributes as a Hash' do
+ expect(subject.attributes('test.txt')).to eq({ 'text' => true })
+ end
+
+ it 'returns an empty Hash for a defined path without attributes' do
+ expect(subject.attributes('bla/bla.txt')).to eq({})
+ end
+ end
+ end
+
+ describe '#parser' do
+ it 'parses a file with entries' do
+ expect(subject.patterns).to be_an_instance_of(Hash)
+ expect(subject.patterns["/*.txt"]).to eq({ 'text' => true })
+ end
+
+ it 'does not parse anything when the attributes file does not exist' do
+ expect(File).to receive(:exist?)
+ .with(File.join(path, 'info/attributes'))
+ .and_return(false)
+
+ expect(subject.patterns).to eq({})
+ end
+
+ it 'does not parse attributes files with unsupported encoding' do
+ path = File.join(SEED_STORAGE_PATH, 'with-invalid-git-attributes.git')
+ subject = described_class.new(path)
+
+ expect(subject.patterns).to eq({})
+ end
+ end
+end
diff --git a/spec/support/test_env.rb b/spec/support/test_env.rb
index 664698fcbaf..a00ef543128 100644
--- a/spec/support/test_env.rb
+++ b/spec/support/test_env.rb
@@ -20,7 +20,7 @@ module TestEnv
'improve/awesome' => '5937ac0',
'merged-target' => '21751bf',
'markdown' => '0ed8c6c',
- 'lfs' => 'be93687',
+ 'lfs' => '55bc176',
'master' => 'b83d6e3',
'merge-test' => '5937ac0',
"'test'" => 'e56497b',