summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlejandro Rodríguez <alejorro70@gmail.com>2018-01-04 19:27:37 -0300
committerAlejandro Rodríguez <alejorro70@gmail.com>2018-01-04 19:27:37 -0300
commit5152cc3bfb8d60814063e86c3776030aa8891e0b (patch)
tree4cec7672c48ab1087d55a96da57a4b2dd1698b1d
parent6f1b4dc76b4619f538b7216ad3a10ca9336d0c2b (diff)
downloadgitlab-ce-5152cc3bfb8d60814063e86c3776030aa8891e0b.tar.gz
Fix a bug where charlock_holmes was used needlessly to encode strings41677-branch-name-omitted-due-to-bad-utf-8-conversion-by-gitaly-ref-handler
-rw-r--r--lib/gitlab/encoding_helper.rb26
-rw-r--r--lib/gitlab/git.rb2
-rw-r--r--spec/lib/gitlab/encoding_helper_spec.rb18
3 files changed, 35 insertions, 11 deletions
diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb
index 6b53eb4533d..c0edcabc6fd 100644
--- a/lib/gitlab/encoding_helper.rb
+++ b/lib/gitlab/encoding_helper.rb
@@ -14,14 +14,7 @@ module Gitlab
ENCODING_CONFIDENCE_THRESHOLD = 50
def encode!(message)
- return nil unless message.respond_to?(:force_encoding)
- return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
-
- if message.respond_to?(:frozen?) && message.frozen?
- message = message.dup
- end
-
- message.force_encoding("UTF-8")
+ message = force_encode_utf8(message)
return message if message.valid_encoding?
# return message if message type is binary
@@ -35,6 +28,8 @@ module Gitlab
# encode and clean the bad chars
message.replace clean(message)
+ rescue ArgumentError
+ return nil
rescue
encoding = detect ? detect[:encoding] : "unknown"
"--broken encoding: #{encoding}"
@@ -54,8 +49,8 @@ module Gitlab
end
def encode_utf8(message)
- return nil unless message.is_a?(String)
- return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
+ message = force_encode_utf8(message)
+ return message if message.valid_encoding?
detect = CharlockHolmes::EncodingDetector.detect(message)
if detect && detect[:encoding]
@@ -69,6 +64,8 @@ module Gitlab
else
clean(message)
end
+ rescue ArgumentError
+ return nil
end
def encode_binary(s)
@@ -83,6 +80,15 @@ module Gitlab
private
+ def force_encode_utf8(message)
+ raise ArgumentError unless message.respond_to?(:force_encoding)
+ return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
+
+ message = message.dup if message.respond_to?(:frozen?) && message.frozen?
+
+ message.force_encoding("UTF-8")
+ end
+
def clean(message)
message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
.encode("UTF-8")
diff --git a/lib/gitlab/git.rb b/lib/gitlab/git.rb
index 1f7c35cafaa..71647099f83 100644
--- a/lib/gitlab/git.rb
+++ b/lib/gitlab/git.rb
@@ -11,7 +11,7 @@ module Gitlab
include Gitlab::EncodingHelper
def ref_name(ref)
- encode_utf8(ref).sub(/\Arefs\/(tags|heads|remotes)\//, '')
+ encode!(ref).sub(/\Arefs\/(tags|heads|remotes)\//, '')
end
def branch_name(ref)
diff --git a/spec/lib/gitlab/encoding_helper_spec.rb b/spec/lib/gitlab/encoding_helper_spec.rb
index 87ec2698fc1..4e9367323cb 100644
--- a/spec/lib/gitlab/encoding_helper_spec.rb
+++ b/spec/lib/gitlab/encoding_helper_spec.rb
@@ -120,6 +120,24 @@ describe Gitlab::EncodingHelper do
it 'returns empty string on conversion errors' do
expect { ext_class.encode_utf8('') }.not_to raise_error(ArgumentError)
end
+
+ context 'with strings that can be forcefully encoded into utf8' do
+ let(:test_string) do
+ "refs/heads/FixSymbolsTitleDropdown".encode("ASCII-8BIT")
+ end
+ let(:expected_string) do
+ "refs/heads/FixSymbolsTitleDropdown".encode("UTF-8")
+ end
+
+ subject { ext_class.encode_utf8(test_string) }
+
+ it "doesn't use CharlockHolmes if the encoding can be forced into utf_8" do
+ expect(CharlockHolmes::EncodingDetector).not_to receive(:detect)
+
+ expect(subject).to eq(expected_string)
+ expect(subject.encoding.name).to eq('UTF-8')
+ end
+ end
end
describe '#clean' do