diff options
author | Sean McGivern <sean@mcgivern.me.uk> | 2017-09-06 15:55:35 +0000 |
---|---|---|
committer | Sean McGivern <sean@mcgivern.me.uk> | 2017-09-06 15:55:35 +0000 |
commit | ba39b26cc2f1ace540177c3b73d64d40b06bc902 (patch) | |
tree | 74691ce0b45a6b2d35efc58cdbfa460dc0ddef22 /lib | |
parent | 93e1d4dd285c657a3abb09dff7f86e552b0097f2 (diff) | |
parent | 46f6092a6d4dd39bfa193d0a6ccbd5688df7eebe (diff) | |
download | gitlab-ce-ba39b26cc2f1ace540177c3b73d64d40b06bc902.tar.gz |
Merge branch '35942_api_binary_encoding' into 'master'
API fix for non UTF-8 data
Closes #35942
See merge request !14038
Diffstat (limited to 'lib')
-rw-r--r-- | lib/api/commits.rb | 2 | ||||
-rw-r--r-- | lib/api/entities.rb | 3 | ||||
-rw-r--r-- | lib/gitlab/encoding_helper.rb | 17 | ||||
-rw-r--r-- | lib/gitlab/git/blob.rb | 12 | ||||
-rw-r--r-- | lib/gitlab/git/diff.rb | 16 |
5 files changed, 38 insertions, 12 deletions
diff --git a/lib/api/commits.rb b/lib/api/commits.rb index ea78737288a..4b8d248f5f7 100644 --- a/lib/api/commits.rb +++ b/lib/api/commits.rb @@ -104,7 +104,7 @@ module API not_found! 'Commit' unless commit - commit.raw_diffs.to_a + present commit.raw_diffs.to_a, with: Entities::RepoDiff end desc "Get a commit's comments" do diff --git a/lib/api/entities.rb b/lib/api/entities.rb index 9114b69606b..1d224d7bc21 100644 --- a/lib/api/entities.rb +++ b/lib/api/entities.rb @@ -291,10 +291,11 @@ module API end class RepoDiff < Grape::Entity - expose :old_path, :new_path, :a_mode, :b_mode, :diff + expose :old_path, :new_path, :a_mode, :b_mode expose :new_file?, as: :new_file expose :renamed_file?, as: :renamed_file expose :deleted_file?, as: :deleted_file + expose :json_safe_diff, as: :diff end class ProtectedRefAccess < Grape::Entity diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb index 8ddc91e341d..7b3483a7f96 100644 --- a/lib/gitlab/encoding_helper.rb +++ b/lib/gitlab/encoding_helper.rb @@ -22,10 +22,10 @@ module Gitlab # return message if message type is binary detect = CharlockHolmes::EncodingDetector.detect(message) - return message.force_encoding("BINARY") if detect && detect[:type] == :binary + return message.force_encoding("BINARY") if detect_binary?(message, detect) - # force detected encoding if we have sufficient confidence. if detect && detect[:encoding] && detect[:confidence] > ENCODING_CONFIDENCE_THRESHOLD + # force detected encoding if we have sufficient confidence. message.force_encoding(detect[:encoding]) end @@ -36,6 +36,19 @@ module Gitlab "--broken encoding: #{encoding}" end + def detect_binary?(data, detect = nil) + detect ||= CharlockHolmes::EncodingDetector.detect(data) + detect && detect[:type] == :binary && detect[:confidence] == 100 + end + + def detect_libgit2_binary?(data) + # EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks + # only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15), + # which is what we use below to keep a consistent behavior. + detect = CharlockHolmes::EncodingDetector.new(8000).detect(data) + detect && detect[:type] == :binary + end + def encode_utf8(message) detect = CharlockHolmes::EncodingDetector.detect(message) if detect && detect[:encoding] diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb index 7780f4e4d4f..8d96826f6ee 100644 --- a/lib/gitlab/git/blob.rb +++ b/lib/gitlab/git/blob.rb @@ -42,14 +42,6 @@ module Gitlab end end - def binary?(data) - # EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks - # only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15), - # which is what we use below to keep a consistent behavior. - detect = CharlockHolmes::EncodingDetector.new(8000).detect(data) - detect && detect[:type] == :binary - end - # Returns an array of Blob instances, specified in blob_references as # [[commit_sha, path], [commit_sha, path], ...]. If blob_size_limit < 0 then the # full blob contents are returned. If blob_size_limit >= 0 then each blob will @@ -65,6 +57,10 @@ module Gitlab end end + def binary?(data) + EncodingHelper.detect_libgit2_binary?(data) + end + private # Recursive search of blob id by path diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb index ce3d65062e8..a23c8cf0dd1 100644 --- a/lib/gitlab/git/diff.rb +++ b/lib/gitlab/git/diff.rb @@ -116,6 +116,15 @@ module Gitlab filtered_opts end + + # Return a binary diff message like: + # + # "Binary files a/file/path and b/file/path differ\n" + # This is used when we detect that a diff is binary + # using CharlockHolmes when Rugged treats it as text. + def binary_message(old_path, new_path) + "Binary files #{old_path} and #{new_path} differ\n" + end end def initialize(raw_diff, expanded: true) @@ -190,6 +199,13 @@ module Gitlab @collapsed = true end + def json_safe_diff + return @diff unless detect_binary?(@diff) + + # the diff is binary, let's make a message for it + Diff.binary_message(@old_path, @new_path) + end + private def init_from_rugged(rugged) |