summaryrefslogtreecommitdiff
path: root/lib/gitlab/encoding_helper.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/encoding_helper.rb')
-rw-r--r--lib/gitlab/encoding_helper.rb26
1 files changed, 16 insertions, 10 deletions
diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb
index 6b53eb4533d..c0edcabc6fd 100644
--- a/lib/gitlab/encoding_helper.rb
+++ b/lib/gitlab/encoding_helper.rb
@@ -14,14 +14,7 @@ module Gitlab
ENCODING_CONFIDENCE_THRESHOLD = 50
def encode!(message)
- return nil unless message.respond_to?(:force_encoding)
- return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
-
- if message.respond_to?(:frozen?) && message.frozen?
- message = message.dup
- end
-
- message.force_encoding("UTF-8")
+ message = force_encode_utf8(message)
return message if message.valid_encoding?
# return message if message type is binary
@@ -35,6 +28,8 @@ module Gitlab
# encode and clean the bad chars
message.replace clean(message)
+ rescue ArgumentError
+ return nil
rescue
encoding = detect ? detect[:encoding] : "unknown"
"--broken encoding: #{encoding}"
@@ -54,8 +49,8 @@ module Gitlab
end
def encode_utf8(message)
- return nil unless message.is_a?(String)
- return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
+ message = force_encode_utf8(message)
+ return message if message.valid_encoding?
detect = CharlockHolmes::EncodingDetector.detect(message)
if detect && detect[:encoding]
@@ -69,6 +64,8 @@ module Gitlab
else
clean(message)
end
+ rescue ArgumentError
+ return nil
end
def encode_binary(s)
@@ -83,6 +80,15 @@ module Gitlab
private
+ def force_encode_utf8(message)
+ raise ArgumentError unless message.respond_to?(:force_encoding)
+ return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
+
+ message = message.dup if message.respond_to?(:frozen?) && message.frozen?
+
+ message.force_encoding("UTF-8")
+ end
+
def clean(message)
message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
.encode("UTF-8")