summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/gitlab/encoding_helper.rb26
-rw-r--r--lib/gitlab/git.rb2
-rw-r--r--spec/lib/gitlab/encoding_helper_spec.rb18
3 files changed, 35 insertions, 11 deletions
diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb
index 6b53eb4533d..c0edcabc6fd 100644
--- a/lib/gitlab/encoding_helper.rb
+++ b/lib/gitlab/encoding_helper.rb
@@ -14,14 +14,7 @@ module Gitlab
ENCODING_CONFIDENCE_THRESHOLD = 50
def encode!(message)
- return nil unless message.respond_to?(:force_encoding)
- return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
-
- if message.respond_to?(:frozen?) && message.frozen?
- message = message.dup
- end
-
- message.force_encoding("UTF-8")
+ message = force_encode_utf8(message)
return message if message.valid_encoding?
# return message if message type is binary
@@ -35,6 +28,8 @@ module Gitlab
# encode and clean the bad chars
message.replace clean(message)
+ rescue ArgumentError
+ return nil
rescue
encoding = detect ? detect[:encoding] : "unknown"
"--broken encoding: #{encoding}"
@@ -54,8 +49,8 @@ module Gitlab
end
def encode_utf8(message)
- return nil unless message.is_a?(String)
- return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
+ message = force_encode_utf8(message)
+ return message if message.valid_encoding?
detect = CharlockHolmes::EncodingDetector.detect(message)
if detect && detect[:encoding]
@@ -69,6 +64,8 @@ module Gitlab
else
clean(message)
end
+ rescue ArgumentError
+ return nil
end
def encode_binary(s)
@@ -83,6 +80,15 @@ module Gitlab
private
+ def force_encode_utf8(message)
+ raise ArgumentError unless message.respond_to?(:force_encoding)
+ return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
+
+ message = message.dup if message.respond_to?(:frozen?) && message.frozen?
+
+ message.force_encoding("UTF-8")
+ end
+
def clean(message)
message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
.encode("UTF-8")
diff --git a/lib/gitlab/git.rb b/lib/gitlab/git.rb
index 1f7c35cafaa..71647099f83 100644
--- a/lib/gitlab/git.rb
+++ b/lib/gitlab/git.rb
@@ -11,7 +11,7 @@ module Gitlab
include Gitlab::EncodingHelper
def ref_name(ref)
- encode_utf8(ref).sub(/\Arefs\/(tags|heads|remotes)\//, '')
+ encode!(ref).sub(/\Arefs\/(tags|heads|remotes)\//, '')
end
def branch_name(ref)
diff --git a/spec/lib/gitlab/encoding_helper_spec.rb b/spec/lib/gitlab/encoding_helper_spec.rb
index 87ec2698fc1..4e9367323cb 100644
--- a/spec/lib/gitlab/encoding_helper_spec.rb
+++ b/spec/lib/gitlab/encoding_helper_spec.rb
@@ -120,6 +120,24 @@ describe Gitlab::EncodingHelper do
it 'returns empty string on conversion errors' do
expect { ext_class.encode_utf8('') }.not_to raise_error(ArgumentError)
end
+
+ context 'with strings that can be forcefully encoded into utf8' do
+ let(:test_string) do
+ "refs/heads/FixSymbolsTitleDropdown".encode("ASCII-8BIT")
+ end
+ let(:expected_string) do
+ "refs/heads/FixSymbolsTitleDropdown".encode("UTF-8")
+ end
+
+ subject { ext_class.encode_utf8(test_string) }
+
+ it "doesn't use CharlockHolmes if the encoding can be forced into utf_8" do
+ expect(CharlockHolmes::EncodingDetector).not_to receive(:detect)
+
+ expect(subject).to eq(expected_string)
+ expect(subject.encoding.name).to eq('UTF-8')
+ end
+ end
end
describe '#clean' do