summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStan Hu <stanhu@gmail.com>2018-01-05 05:54:01 +0000
committerStan Hu <stanhu@gmail.com>2018-01-05 05:54:01 +0000
commitf7afb14c1150d686e53cb71da182ef014eba2a61 (patch)
tree6359048f642afd75c7f20119d8b93e98aa88e32b
parentde3491cfc59187aa5550d9aad8b98556cc39e561 (diff)
parent5152cc3bfb8d60814063e86c3776030aa8891e0b (diff)
downloadgitlab-ce-f7afb14c1150d686e53cb71da182ef014eba2a61.tar.gz
Merge branch '41677-branch-name-omitted-due-to-bad-utf-8-conversion-by-gitaly-ref-handler' into 'master'
Resolve "Branch name omitted due to bad UTF-8 conversion by Gitaly ref handler" Closes #41677 See merge request gitlab-org/gitlab-ce!16243
-rw-r--r--lib/gitlab/encoding_helper.rb26
-rw-r--r--lib/gitlab/git.rb2
-rw-r--r--spec/lib/gitlab/encoding_helper_spec.rb18
3 files changed, 35 insertions, 11 deletions
diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb
index 6b53eb4533d..c0edcabc6fd 100644
--- a/lib/gitlab/encoding_helper.rb
+++ b/lib/gitlab/encoding_helper.rb
@@ -14,14 +14,7 @@ module Gitlab
ENCODING_CONFIDENCE_THRESHOLD = 50
def encode!(message)
- return nil unless message.respond_to?(:force_encoding)
- return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
-
- if message.respond_to?(:frozen?) && message.frozen?
- message = message.dup
- end
-
- message.force_encoding("UTF-8")
+ message = force_encode_utf8(message)
return message if message.valid_encoding?
# return message if message type is binary
@@ -35,6 +28,8 @@ module Gitlab
# encode and clean the bad chars
message.replace clean(message)
+ rescue ArgumentError
+ return nil
rescue
encoding = detect ? detect[:encoding] : "unknown"
"--broken encoding: #{encoding}"
@@ -54,8 +49,8 @@ module Gitlab
end
def encode_utf8(message)
- return nil unless message.is_a?(String)
- return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
+ message = force_encode_utf8(message)
+ return message if message.valid_encoding?
detect = CharlockHolmes::EncodingDetector.detect(message)
if detect && detect[:encoding]
@@ -69,6 +64,8 @@ module Gitlab
else
clean(message)
end
+ rescue ArgumentError
+ return nil
end
def encode_binary(s)
@@ -83,6 +80,15 @@ module Gitlab
private
+ def force_encode_utf8(message)
+ raise ArgumentError unless message.respond_to?(:force_encoding)
+ return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
+
+ message = message.dup if message.respond_to?(:frozen?) && message.frozen?
+
+ message.force_encoding("UTF-8")
+ end
+
def clean(message)
message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
.encode("UTF-8")
diff --git a/lib/gitlab/git.rb b/lib/gitlab/git.rb
index 1f7c35cafaa..71647099f83 100644
--- a/lib/gitlab/git.rb
+++ b/lib/gitlab/git.rb
@@ -11,7 +11,7 @@ module Gitlab
include Gitlab::EncodingHelper
def ref_name(ref)
- encode_utf8(ref).sub(/\Arefs\/(tags|heads|remotes)\//, '')
+ encode!(ref).sub(/\Arefs\/(tags|heads|remotes)\//, '')
end
def branch_name(ref)
diff --git a/spec/lib/gitlab/encoding_helper_spec.rb b/spec/lib/gitlab/encoding_helper_spec.rb
index 87ec2698fc1..4e9367323cb 100644
--- a/spec/lib/gitlab/encoding_helper_spec.rb
+++ b/spec/lib/gitlab/encoding_helper_spec.rb
@@ -120,6 +120,24 @@ describe Gitlab::EncodingHelper do
it 'returns empty string on conversion errors' do
expect { ext_class.encode_utf8('') }.not_to raise_error(ArgumentError)
end
+
+ context 'with strings that can be forcefully encoded into utf8' do
+ let(:test_string) do
+ "refs/heads/FixSymbolsTitleDropdown".encode("ASCII-8BIT")
+ end
+ let(:expected_string) do
+ "refs/heads/FixSymbolsTitleDropdown".encode("UTF-8")
+ end
+
+ subject { ext_class.encode_utf8(test_string) }
+
+ it "doesn't use CharlockHolmes if the encoding can be forced into utf_8" do
+ expect(CharlockHolmes::EncodingDetector).not_to receive(:detect)
+
+ expect(subject).to eq(expected_string)
+ expect(subject.encoding.name).to eq('UTF-8')
+ end
+ end
end
describe '#clean' do