diff options
author | Alejandro RodrÃguez <alejorro70@gmail.com> | 2018-01-04 19:27:37 -0300 |
---|---|---|
committer | Alejandro RodrÃguez <alejorro70@gmail.com> | 2018-01-04 19:27:37 -0300 |
commit | 5152cc3bfb8d60814063e86c3776030aa8891e0b (patch) | |
tree | 4cec7672c48ab1087d55a96da57a4b2dd1698b1d | |
parent | 6f1b4dc76b4619f538b7216ad3a10ca9336d0c2b (diff) | |
download | gitlab-ce-5152cc3bfb8d60814063e86c3776030aa8891e0b.tar.gz |
Fix a bug where charlock_holmes was used needlessly to encode strings41677-branch-name-omitted-due-to-bad-utf-8-conversion-by-gitaly-ref-handler
-rw-r--r-- | lib/gitlab/encoding_helper.rb | 26 | ||||
-rw-r--r-- | lib/gitlab/git.rb | 2 | ||||
-rw-r--r-- | spec/lib/gitlab/encoding_helper_spec.rb | 18 |
3 files changed, 35 insertions, 11 deletions
diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb index 6b53eb4533d..c0edcabc6fd 100644 --- a/lib/gitlab/encoding_helper.rb +++ b/lib/gitlab/encoding_helper.rb @@ -14,14 +14,7 @@ module Gitlab ENCODING_CONFIDENCE_THRESHOLD = 50 def encode!(message) - return nil unless message.respond_to?(:force_encoding) - return message if message.encoding == Encoding::UTF_8 && message.valid_encoding? - - if message.respond_to?(:frozen?) && message.frozen? - message = message.dup - end - - message.force_encoding("UTF-8") + message = force_encode_utf8(message) return message if message.valid_encoding? # return message if message type is binary @@ -35,6 +28,8 @@ module Gitlab # encode and clean the bad chars message.replace clean(message) + rescue ArgumentError + return nil rescue encoding = detect ? detect[:encoding] : "unknown" "--broken encoding: #{encoding}" @@ -54,8 +49,8 @@ module Gitlab end def encode_utf8(message) - return nil unless message.is_a?(String) - return message if message.encoding == Encoding::UTF_8 && message.valid_encoding? + message = force_encode_utf8(message) + return message if message.valid_encoding? detect = CharlockHolmes::EncodingDetector.detect(message) if detect && detect[:encoding] @@ -69,6 +64,8 @@ module Gitlab else clean(message) end + rescue ArgumentError + return nil end def encode_binary(s) @@ -83,6 +80,15 @@ module Gitlab private + def force_encode_utf8(message) + raise ArgumentError unless message.respond_to?(:force_encoding) + return message if message.encoding == Encoding::UTF_8 && message.valid_encoding? + + message = message.dup if message.respond_to?(:frozen?) && message.frozen? + + message.force_encoding("UTF-8") + end + def clean(message) message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "") .encode("UTF-8") diff --git a/lib/gitlab/git.rb b/lib/gitlab/git.rb index 1f7c35cafaa..71647099f83 100644 --- a/lib/gitlab/git.rb +++ b/lib/gitlab/git.rb @@ -11,7 +11,7 @@ module Gitlab include Gitlab::EncodingHelper def ref_name(ref) - encode_utf8(ref).sub(/\Arefs\/(tags|heads|remotes)\//, '') + encode!(ref).sub(/\Arefs\/(tags|heads|remotes)\//, '') end def branch_name(ref) diff --git a/spec/lib/gitlab/encoding_helper_spec.rb b/spec/lib/gitlab/encoding_helper_spec.rb index 87ec2698fc1..4e9367323cb 100644 --- a/spec/lib/gitlab/encoding_helper_spec.rb +++ b/spec/lib/gitlab/encoding_helper_spec.rb @@ -120,6 +120,24 @@ describe Gitlab::EncodingHelper do it 'returns empty string on conversion errors' do expect { ext_class.encode_utf8('') }.not_to raise_error(ArgumentError) end + + context 'with strings that can be forcefully encoded into utf8' do + let(:test_string) do + "refs/heads/FixSymbolsTitleDropdown".encode("ASCII-8BIT") + end + let(:expected_string) do + "refs/heads/FixSymbolsTitleDropdown".encode("UTF-8") + end + + subject { ext_class.encode_utf8(test_string) } + + it "doesn't use CharlockHolmes if the encoding can be forced into utf_8" do + expect(CharlockHolmes::EncodingDetector).not_to receive(:detect) + + expect(subject).to eq(expected_string) + expect(subject.encoding.name).to eq('UTF-8') + end + end end describe '#clean' do |