summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--changelogs/unreleased/35942-api-binary-encoding.yaml3
-rw-r--r--lib/api/commits.rb2
-rw-r--r--lib/api/entities.rb3
-rw-r--r--lib/gitlab/encoding_helper.rb17
-rw-r--r--lib/gitlab/git/blob.rb12
-rw-r--r--lib/gitlab/git/diff.rb16
-rw-r--r--spec/lib/gitlab/git/diff_spec.rb19
-rw-r--r--spec/requests/api/commits_spec.rb6
8 files changed, 66 insertions, 12 deletions
diff --git a/changelogs/unreleased/35942-api-binary-encoding.yaml b/changelogs/unreleased/35942-api-binary-encoding.yaml
new file mode 100644
index 00000000000..4f7960d860e
--- /dev/null
+++ b/changelogs/unreleased/35942-api-binary-encoding.yaml
@@ -0,0 +1,3 @@
+---
+title: "Fix API to serve binary diffs that are treated as text."
+merge_request: 14038
diff --git a/lib/api/commits.rb b/lib/api/commits.rb
index ea78737288a..4b8d248f5f7 100644
--- a/lib/api/commits.rb
+++ b/lib/api/commits.rb
@@ -104,7 +104,7 @@ module API
not_found! 'Commit' unless commit
- commit.raw_diffs.to_a
+ present commit.raw_diffs.to_a, with: Entities::RepoDiff
end
desc "Get a commit's comments" do
diff --git a/lib/api/entities.rb b/lib/api/entities.rb
index 9114b69606b..1d224d7bc21 100644
--- a/lib/api/entities.rb
+++ b/lib/api/entities.rb
@@ -291,10 +291,11 @@ module API
end
class RepoDiff < Grape::Entity
- expose :old_path, :new_path, :a_mode, :b_mode, :diff
+ expose :old_path, :new_path, :a_mode, :b_mode
expose :new_file?, as: :new_file
expose :renamed_file?, as: :renamed_file
expose :deleted_file?, as: :deleted_file
+ expose :json_safe_diff, as: :diff
end
class ProtectedRefAccess < Grape::Entity
diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb
index 8ddc91e341d..7b3483a7f96 100644
--- a/lib/gitlab/encoding_helper.rb
+++ b/lib/gitlab/encoding_helper.rb
@@ -22,10 +22,10 @@ module Gitlab
# return message if message type is binary
detect = CharlockHolmes::EncodingDetector.detect(message)
- return message.force_encoding("BINARY") if detect && detect[:type] == :binary
+ return message.force_encoding("BINARY") if detect_binary?(message, detect)
- # force detected encoding if we have sufficient confidence.
if detect && detect[:encoding] && detect[:confidence] > ENCODING_CONFIDENCE_THRESHOLD
+ # force detected encoding if we have sufficient confidence.
message.force_encoding(detect[:encoding])
end
@@ -36,6 +36,19 @@ module Gitlab
"--broken encoding: #{encoding}"
end
+ def detect_binary?(data, detect = nil)
+ detect ||= CharlockHolmes::EncodingDetector.detect(data)
+ detect && detect[:type] == :binary && detect[:confidence] == 100
+ end
+
+ def detect_libgit2_binary?(data)
+ # EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks
+ # only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15),
+ # which is what we use below to keep a consistent behavior.
+ detect = CharlockHolmes::EncodingDetector.new(8000).detect(data)
+ detect && detect[:type] == :binary
+ end
+
def encode_utf8(message)
detect = CharlockHolmes::EncodingDetector.detect(message)
if detect && detect[:encoding]
diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb
index 7780f4e4d4f..8d96826f6ee 100644
--- a/lib/gitlab/git/blob.rb
+++ b/lib/gitlab/git/blob.rb
@@ -42,14 +42,6 @@ module Gitlab
end
end
- def binary?(data)
- # EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks
- # only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15),
- # which is what we use below to keep a consistent behavior.
- detect = CharlockHolmes::EncodingDetector.new(8000).detect(data)
- detect && detect[:type] == :binary
- end
-
# Returns an array of Blob instances, specified in blob_references as
# [[commit_sha, path], [commit_sha, path], ...]. If blob_size_limit < 0 then the
# full blob contents are returned. If blob_size_limit >= 0 then each blob will
@@ -65,6 +57,10 @@ module Gitlab
end
end
+ def binary?(data)
+ EncodingHelper.detect_libgit2_binary?(data)
+ end
+
private
# Recursive search of blob id by path
diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb
index ce3d65062e8..a23c8cf0dd1 100644
--- a/lib/gitlab/git/diff.rb
+++ b/lib/gitlab/git/diff.rb
@@ -116,6 +116,15 @@ module Gitlab
filtered_opts
end
+
+ # Return a binary diff message like:
+ #
+ # "Binary files a/file/path and b/file/path differ\n"
+ # This is used when we detect that a diff is binary
+ # using CharlockHolmes when Rugged treats it as text.
+ def binary_message(old_path, new_path)
+ "Binary files #{old_path} and #{new_path} differ\n"
+ end
end
def initialize(raw_diff, expanded: true)
@@ -190,6 +199,13 @@ module Gitlab
@collapsed = true
end
+ def json_safe_diff
+ return @diff unless detect_binary?(@diff)
+
+ # the diff is binary, let's make a message for it
+ Diff.binary_message(@old_path, @new_path)
+ end
+
private
def init_from_rugged(rugged)
diff --git a/spec/lib/gitlab/git/diff_spec.rb b/spec/lib/gitlab/git/diff_spec.rb
index dfbdbee48f7..d39b33a0c05 100644
--- a/spec/lib/gitlab/git/diff_spec.rb
+++ b/spec/lib/gitlab/git/diff_spec.rb
@@ -273,6 +273,25 @@ EOT
end
end
+ describe '#json_safe_diff' do
+ let(:project) { create(:project, :repository) }
+
+ it 'fake binary message when it detects binary' do
+ # Rugged will not detect this as binary, but we can fake it
+ diff_message = "Binary files files/images/icn-time-tracking.pdf and files/images/icn-time-tracking.pdf differ\n"
+ binary_diff = described_class.between(project.repository, 'add-pdf-text-binary', 'add-pdf-text-binary^').first
+
+ expect(binary_diff.diff).not_to be_empty
+ expect(binary_diff.json_safe_diff).to eq(diff_message)
+ end
+
+ it 'leave non-binary diffs as-is' do
+ diff = described_class.new(@rugged_diff)
+
+ expect(diff.json_safe_diff).to eq(diff.diff)
+ end
+ end
+
describe '#submodule?' do
before do
commit = repository.lookup('5937ac0a7beb003549fc5fd26fc247adbce4a52e')
diff --git a/spec/requests/api/commits_spec.rb b/spec/requests/api/commits_spec.rb
index edbfaf510c5..f663719d28c 100644
--- a/spec/requests/api/commits_spec.rb
+++ b/spec/requests/api/commits_spec.rb
@@ -673,6 +673,12 @@ describe API::Commits do
it_behaves_like 'ref diff'
end
end
+
+ context 'when binary diff are treated as text' do
+ let(:commit_id) { TestEnv::BRANCH_SHA['add-pdf-text-binary'] }
+
+ it_behaves_like 'ref diff'
+ end
end
end