diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2022-04-20 10:00:54 +0000 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2022-04-20 10:00:54 +0000 |
commit | 3cccd102ba543e02725d247893729e5c73b38295 (patch) | |
tree | f36a04ec38517f5deaaacb5acc7d949688d1e187 /lib/gitlab/encoding_helper.rb | |
parent | 205943281328046ef7b4528031b90fbda70c75ac (diff) | |
download | gitlab-ce-3cccd102ba543e02725d247893729e5c73b38295.tar.gz |
Add latest changes from gitlab-org/gitlab@14-10-stable-eev14.10.0-rc42
Diffstat (limited to 'lib/gitlab/encoding_helper.rb')
-rw-r--r-- | lib/gitlab/encoding_helper.rb | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb index 2e0060c7c18..f26ab6e3ed1 100644 --- a/lib/gitlab/encoding_helper.rb +++ b/lib/gitlab/encoding_helper.rb @@ -15,6 +15,8 @@ module Gitlab # https://gitlab.com/gitlab-org/gitlab_git/merge_requests/77#note_4754193 ENCODING_CONFIDENCE_THRESHOLD = 50 + UNICODE_REPLACEMENT_CHARACTER = "�" + def encode!(message) message = force_encode_utf8(message) return message if message.valid_encoding? @@ -65,6 +67,10 @@ module Gitlab message.encode(Encoding::UTF_8, invalid: :replace, undef: :replace) end + def encode_utf8_with_replacement_character(data) + encode_utf8(data, replace: UNICODE_REPLACEMENT_CHARACTER) + end + def encode_utf8(message, replace: "") message = force_encode_utf8(message) return message if message.valid_encoding? @@ -99,6 +105,35 @@ module Gitlab io.tap { |io| io.set_encoding(Encoding::ASCII_8BIT) } end + ESCAPED_CHARS = { + "a" => "\a", "b" => "\b", "e" => "\e", "f" => "\f", + "n" => "\n", "r" => "\r", "t" => "\t", "v" => "\v", + "\"" => "\"" + }.freeze + + # rubocop:disable Style/AsciiComments + # `unquote_path` decode filepaths that are returned by some git commands. + # The path may be returned in double-quotes if it contains special characters, + # that are encoded in octal. Also, some characters (see `ESCAPED_CHARS`) are escaped. + # eg. "\311\240\304\253\305\247\305\200\310\247\306\200" (quotes included) is decoded as ɠīŧŀȧƀ + # + # Based on `unquote_c_style` from git source + # https://github.com/git/git/blob/v2.35.1/quote.c#L399 + # rubocop:enable Style/AsciiComments + def unquote_path(filename) + return filename unless filename[0] == '"' + + filename = filename[1..-2].gsub(/\\(?:([#{ESCAPED_CHARS.keys.join}\\])|(\d{3}))/) do + if c = Regexp.last_match(1) + c == "\\" ? "\\" : ESCAPED_CHARS[c] + elsif c = Regexp.last_match(2) + c.to_i(8).chr + end + end + + filename.force_encoding("UTF-8") + end + private def force_encode_utf8(message) |