diff options
-rw-r--r-- | changelogs/unreleased/35942-api-binary-encoding.yaml | 3 | ||||
-rw-r--r-- | lib/api/commits.rb | 2 | ||||
-rw-r--r-- | lib/api/entities.rb | 3 | ||||
-rw-r--r-- | lib/gitlab/encoding_helper.rb | 17 | ||||
-rw-r--r-- | lib/gitlab/git/blob.rb | 12 | ||||
-rw-r--r-- | lib/gitlab/git/diff.rb | 16 | ||||
-rw-r--r-- | spec/lib/gitlab/git/diff_spec.rb | 19 | ||||
-rw-r--r-- | spec/requests/api/commits_spec.rb | 6 |
8 files changed, 66 insertions, 12 deletions
diff --git a/changelogs/unreleased/35942-api-binary-encoding.yaml b/changelogs/unreleased/35942-api-binary-encoding.yaml new file mode 100644 index 00000000000..4f7960d860e --- /dev/null +++ b/changelogs/unreleased/35942-api-binary-encoding.yaml @@ -0,0 +1,3 @@ +--- +title: "Fix API to serve binary diffs that are treated as text." +merge_request: 14038 diff --git a/lib/api/commits.rb b/lib/api/commits.rb index ea78737288a..4b8d248f5f7 100644 --- a/lib/api/commits.rb +++ b/lib/api/commits.rb @@ -104,7 +104,7 @@ module API not_found! 'Commit' unless commit - commit.raw_diffs.to_a + present commit.raw_diffs.to_a, with: Entities::RepoDiff end desc "Get a commit's comments" do diff --git a/lib/api/entities.rb b/lib/api/entities.rb index 9114b69606b..1d224d7bc21 100644 --- a/lib/api/entities.rb +++ b/lib/api/entities.rb @@ -291,10 +291,11 @@ module API end class RepoDiff < Grape::Entity - expose :old_path, :new_path, :a_mode, :b_mode, :diff + expose :old_path, :new_path, :a_mode, :b_mode expose :new_file?, as: :new_file expose :renamed_file?, as: :renamed_file expose :deleted_file?, as: :deleted_file + expose :json_safe_diff, as: :diff end class ProtectedRefAccess < Grape::Entity diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb index 8ddc91e341d..7b3483a7f96 100644 --- a/lib/gitlab/encoding_helper.rb +++ b/lib/gitlab/encoding_helper.rb @@ -22,10 +22,10 @@ module Gitlab # return message if message type is binary detect = CharlockHolmes::EncodingDetector.detect(message) - return message.force_encoding("BINARY") if detect && detect[:type] == :binary + return message.force_encoding("BINARY") if detect_binary?(message, detect) - # force detected encoding if we have sufficient confidence. if detect && detect[:encoding] && detect[:confidence] > ENCODING_CONFIDENCE_THRESHOLD + # force detected encoding if we have sufficient confidence. message.force_encoding(detect[:encoding]) end @@ -36,6 +36,19 @@ module Gitlab "--broken encoding: #{encoding}" end + def detect_binary?(data, detect = nil) + detect ||= CharlockHolmes::EncodingDetector.detect(data) + detect && detect[:type] == :binary && detect[:confidence] == 100 + end + + def detect_libgit2_binary?(data) + # EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks + # only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15), + # which is what we use below to keep a consistent behavior. + detect = CharlockHolmes::EncodingDetector.new(8000).detect(data) + detect && detect[:type] == :binary + end + def encode_utf8(message) detect = CharlockHolmes::EncodingDetector.detect(message) if detect && detect[:encoding] diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb index 7780f4e4d4f..8d96826f6ee 100644 --- a/lib/gitlab/git/blob.rb +++ b/lib/gitlab/git/blob.rb @@ -42,14 +42,6 @@ module Gitlab end end - def binary?(data) - # EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks - # only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15), - # which is what we use below to keep a consistent behavior. - detect = CharlockHolmes::EncodingDetector.new(8000).detect(data) - detect && detect[:type] == :binary - end - # Returns an array of Blob instances, specified in blob_references as # [[commit_sha, path], [commit_sha, path], ...]. If blob_size_limit < 0 then the # full blob contents are returned. If blob_size_limit >= 0 then each blob will @@ -65,6 +57,10 @@ module Gitlab end end + def binary?(data) + EncodingHelper.detect_libgit2_binary?(data) + end + private # Recursive search of blob id by path diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb index ce3d65062e8..a23c8cf0dd1 100644 --- a/lib/gitlab/git/diff.rb +++ b/lib/gitlab/git/diff.rb @@ -116,6 +116,15 @@ module Gitlab filtered_opts end + + # Return a binary diff message like: + # + # "Binary files a/file/path and b/file/path differ\n" + # This is used when we detect that a diff is binary + # using CharlockHolmes when Rugged treats it as text. + def binary_message(old_path, new_path) + "Binary files #{old_path} and #{new_path} differ\n" + end end def initialize(raw_diff, expanded: true) @@ -190,6 +199,13 @@ module Gitlab @collapsed = true end + def json_safe_diff + return @diff unless detect_binary?(@diff) + + # the diff is binary, let's make a message for it + Diff.binary_message(@old_path, @new_path) + end + private def init_from_rugged(rugged) diff --git a/spec/lib/gitlab/git/diff_spec.rb b/spec/lib/gitlab/git/diff_spec.rb index dfbdbee48f7..d39b33a0c05 100644 --- a/spec/lib/gitlab/git/diff_spec.rb +++ b/spec/lib/gitlab/git/diff_spec.rb @@ -273,6 +273,25 @@ EOT end end + describe '#json_safe_diff' do + let(:project) { create(:project, :repository) } + + it 'fake binary message when it detects binary' do + # Rugged will not detect this as binary, but we can fake it + diff_message = "Binary files files/images/icn-time-tracking.pdf and files/images/icn-time-tracking.pdf differ\n" + binary_diff = described_class.between(project.repository, 'add-pdf-text-binary', 'add-pdf-text-binary^').first + + expect(binary_diff.diff).not_to be_empty + expect(binary_diff.json_safe_diff).to eq(diff_message) + end + + it 'leave non-binary diffs as-is' do + diff = described_class.new(@rugged_diff) + + expect(diff.json_safe_diff).to eq(diff.diff) + end + end + describe '#submodule?' do before do commit = repository.lookup('5937ac0a7beb003549fc5fd26fc247adbce4a52e') diff --git a/spec/requests/api/commits_spec.rb b/spec/requests/api/commits_spec.rb index edbfaf510c5..f663719d28c 100644 --- a/spec/requests/api/commits_spec.rb +++ b/spec/requests/api/commits_spec.rb @@ -673,6 +673,12 @@ describe API::Commits do it_behaves_like 'ref diff' end end + + context 'when binary diff are treated as text' do + let(:commit_id) { TestEnv::BRANCH_SHA['add-pdf-text-binary'] } + + it_behaves_like 'ref diff' + end end end |