diff options
author | Stan Hu <stanhu@gmail.com> | 2018-01-05 05:54:01 +0000 |
---|---|---|
committer | Stan Hu <stanhu@gmail.com> | 2018-01-05 05:54:01 +0000 |
commit | f7afb14c1150d686e53cb71da182ef014eba2a61 (patch) | |
tree | 6359048f642afd75c7f20119d8b93e98aa88e32b | |
parent | de3491cfc59187aa5550d9aad8b98556cc39e561 (diff) | |
parent | 5152cc3bfb8d60814063e86c3776030aa8891e0b (diff) | |
download | gitlab-ce-f7afb14c1150d686e53cb71da182ef014eba2a61.tar.gz |
Merge branch '41677-branch-name-omitted-due-to-bad-utf-8-conversion-by-gitaly-ref-handler' into 'master'
Resolve "Branch name omitted due to bad UTF-8 conversion by Gitaly ref handler"
Closes #41677
See merge request gitlab-org/gitlab-ce!16243
-rw-r--r-- | lib/gitlab/encoding_helper.rb | 26 | ||||
-rw-r--r-- | lib/gitlab/git.rb | 2 | ||||
-rw-r--r-- | spec/lib/gitlab/encoding_helper_spec.rb | 18 |
3 files changed, 35 insertions, 11 deletions
diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb index 6b53eb4533d..c0edcabc6fd 100644 --- a/lib/gitlab/encoding_helper.rb +++ b/lib/gitlab/encoding_helper.rb @@ -14,14 +14,7 @@ module Gitlab ENCODING_CONFIDENCE_THRESHOLD = 50 def encode!(message) - return nil unless message.respond_to?(:force_encoding) - return message if message.encoding == Encoding::UTF_8 && message.valid_encoding? - - if message.respond_to?(:frozen?) && message.frozen? - message = message.dup - end - - message.force_encoding("UTF-8") + message = force_encode_utf8(message) return message if message.valid_encoding? # return message if message type is binary @@ -35,6 +28,8 @@ module Gitlab # encode and clean the bad chars message.replace clean(message) + rescue ArgumentError + return nil rescue encoding = detect ? detect[:encoding] : "unknown" "--broken encoding: #{encoding}" @@ -54,8 +49,8 @@ module Gitlab end def encode_utf8(message) - return nil unless message.is_a?(String) - return message if message.encoding == Encoding::UTF_8 && message.valid_encoding? + message = force_encode_utf8(message) + return message if message.valid_encoding? detect = CharlockHolmes::EncodingDetector.detect(message) if detect && detect[:encoding] @@ -69,6 +64,8 @@ module Gitlab else clean(message) end + rescue ArgumentError + return nil end def encode_binary(s) @@ -83,6 +80,15 @@ module Gitlab private + def force_encode_utf8(message) + raise ArgumentError unless message.respond_to?(:force_encoding) + return message if message.encoding == Encoding::UTF_8 && message.valid_encoding? + + message = message.dup if message.respond_to?(:frozen?) && message.frozen? + + message.force_encoding("UTF-8") + end + def clean(message) message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "") .encode("UTF-8") diff --git a/lib/gitlab/git.rb b/lib/gitlab/git.rb index 1f7c35cafaa..71647099f83 100644 --- a/lib/gitlab/git.rb +++ b/lib/gitlab/git.rb @@ -11,7 +11,7 @@ module Gitlab include Gitlab::EncodingHelper def ref_name(ref) - encode_utf8(ref).sub(/\Arefs\/(tags|heads|remotes)\//, '') + encode!(ref).sub(/\Arefs\/(tags|heads|remotes)\//, '') end def branch_name(ref) diff --git a/spec/lib/gitlab/encoding_helper_spec.rb b/spec/lib/gitlab/encoding_helper_spec.rb index 87ec2698fc1..4e9367323cb 100644 --- a/spec/lib/gitlab/encoding_helper_spec.rb +++ b/spec/lib/gitlab/encoding_helper_spec.rb @@ -120,6 +120,24 @@ describe Gitlab::EncodingHelper do it 'returns empty string on conversion errors' do expect { ext_class.encode_utf8('') }.not_to raise_error(ArgumentError) end + + context 'with strings that can be forcefully encoded into utf8' do + let(:test_string) do + "refs/heads/FixSymbolsTitleDropdown".encode("ASCII-8BIT") + end + let(:expected_string) do + "refs/heads/FixSymbolsTitleDropdown".encode("UTF-8") + end + + subject { ext_class.encode_utf8(test_string) } + + it "doesn't use CharlockHolmes if the encoding can be forced into utf_8" do + expect(CharlockHolmes::EncodingDetector).not_to receive(:detect) + + expect(subject).to eq(expected_string) + expect(subject.encoding.name).to eq('UTF-8') + end + end end describe '#clean' do |