summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRémy Coutable <remy@rymai.me>2017-10-19 08:17:01 +0000
committerRémy Coutable <remy@rymai.me>2017-10-19 08:17:01 +0000
commit371eb62bc90a70f4a578303215e1d4dfc430ddbb (patch)
tree364233eda72d7e5d0a080d5f8899414a9687f6e0
parent65523e655f6c34e5ffa32a669f5e148e408345eb (diff)
parent520866a0d05d717d6d3da401100569f12cab409a (diff)
downloadgitlab-ce-371eb62bc90a70f4a578303215e1d4dfc430ddbb.tar.gz
Merge branch 'encoding-helper-performance' into 'master'
Avoind unnecesary `force_encoding` operations Closes #39227 See merge request gitlab-org/gitlab-ce!12167
-rw-r--r--lib/gitlab/encoding_helper.rb7
-rw-r--r--spec/lib/gitlab/encoding_helper_spec.rb16
2 files changed, 19 insertions, 4 deletions
diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb
index 7b3483a7f96..99dfee3dd9b 100644
--- a/lib/gitlab/encoding_helper.rb
+++ b/lib/gitlab/encoding_helper.rb
@@ -14,9 +14,9 @@ module Gitlab
ENCODING_CONFIDENCE_THRESHOLD = 50
def encode!(message)
- return nil unless message.respond_to? :force_encoding
+ return nil unless message.respond_to?(:force_encoding)
+ return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
- # if message is utf-8 encoding, just return it
message.force_encoding("UTF-8")
return message if message.valid_encoding?
@@ -50,6 +50,9 @@ module Gitlab
end
def encode_utf8(message)
+ return nil unless message.is_a?(String)
+ return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
+
detect = CharlockHolmes::EncodingDetector.detect(message)
if detect && detect[:encoding]
begin
diff --git a/spec/lib/gitlab/encoding_helper_spec.rb b/spec/lib/gitlab/encoding_helper_spec.rb
index 8b14b227e65..9151c66afb3 100644
--- a/spec/lib/gitlab/encoding_helper_spec.rb
+++ b/spec/lib/gitlab/encoding_helper_spec.rb
@@ -6,6 +6,9 @@ describe Gitlab::EncodingHelper do
describe '#encode!' do
[
+ ["nil", nil, nil],
+ ["empty string", "".encode("ASCII-8BIT"), "".encode("UTF-8")],
+ ["invalid utf-8 encoded string", "my bad string\xE5".force_encoding("UTF-8"), "my bad string"],
[
'leaves ascii only string as is',
'ascii only string',
@@ -81,6 +84,9 @@ describe Gitlab::EncodingHelper do
describe '#encode_utf8' do
[
+ ["nil", nil, nil],
+ ["empty string", "".encode("ASCII-8BIT"), "".encode("UTF-8")],
+ ["invalid utf-8 encoded string", "my bad string\xE5".force_encoding("UTF-8"), "my bad stringå"],
[
"encodes valid utf8 encoded string to utf8",
"λ, λ, λ".encode("UTF-8"),
@@ -95,12 +101,18 @@ describe Gitlab::EncodingHelper do
"encodes valid ISO-8859-1 encoded string to utf8",
"Rüby ist eine Programmiersprache. Wir verlängern den text damit ICU die Sprache erkennen kann.".encode("ISO-8859-1", "UTF-8"),
"Rüby ist eine Programmiersprache. Wir verlängern den text damit ICU die Sprache erkennen kann.".encode("UTF-8")
+ ],
+ [
+ # Test case from https://gitlab.com/gitlab-org/gitlab-ce/issues/39227
+ "Equifax branch name",
+ "refs/heads/Equifax".encode("UTF-8"),
+ "refs/heads/Equifax".encode("UTF-8")
]
].each do |description, test_string, xpect|
it description do
- r = ext_class.encode_utf8(test_string.force_encoding('UTF-8'))
+ r = ext_class.encode_utf8(test_string)
expect(r).to eq(xpect)
- expect(r.encoding.name).to eq('UTF-8')
+ expect(r.encoding.name).to eq('UTF-8') if xpect
end
end