summaryrefslogtreecommitdiff
path: root/spec/lib/gitlab/encoding_helper_spec.rb
diff options
context:
space:
mode:
authorLin Jen-Shin <godfat@godfat.org>2017-08-08 19:05:40 +0800
committerLin Jen-Shin <godfat@godfat.org>2017-08-08 19:10:46 +0800
commitd831e8e1d06c11e9dc9c0c36767b005a3b86a308 (patch)
tree006f43816dd690c6d2aa4e7d9f0ee24c9bf3d9bf /spec/lib/gitlab/encoding_helper_spec.rb
parentb8ba0efed017b79647421d5ac38c539096b319cc (diff)
downloadgitlab-ce-d831e8e1d06c11e9dc9c0c36767b005a3b86a308.tar.gz
Add a test to show that threshold 40 would corrupt35098-raise-encoding-confidence-threshold
If we set `ENCODING_CONFIDENCE_THRESHOLD` to 40, this test case would not pass. If we raise to 50, this would pass. Note that if in the future rugged didn't return corrupt data, this would be less relevant. But still icu recommend the threshold to be 50, we should just stick with 50.
Diffstat (limited to 'spec/lib/gitlab/encoding_helper_spec.rb')
-rw-r--r--spec/lib/gitlab/encoding_helper_spec.rb44
1 files changed, 44 insertions, 0 deletions
diff --git a/spec/lib/gitlab/encoding_helper_spec.rb b/spec/lib/gitlab/encoding_helper_spec.rb
index 1482ef7132d..26138598651 100644
--- a/spec/lib/gitlab/encoding_helper_spec.rb
+++ b/spec/lib/gitlab/encoding_helper_spec.rb
@@ -30,6 +30,50 @@ describe Gitlab::EncodingHelper do
it 'leaves binary string as is' do
expect(ext_class.encode!(binary_string)).to eq(binary_string)
end
+
+ context 'with corrupted diff' do
+ let(:corrupted_diff) do
+ with_empty_bare_repository do |repo|
+ content = File.read(Rails.root.join(
+ 'spec/fixtures/encoding/Japanese.md').to_s)
+ commit_a = commit(repo, 'Japanese.md', content)
+ commit_b = commit(repo, 'Japanese.md',
+ content.sub('[TODO: Link]', '[現在作業中です: Link]'))
+
+ repo.diff(commit_a, commit_b).each_line.map(&:content).join
+ end
+ end
+
+ let(:cleaned_diff) do
+ corrupted_diff.dup.force_encoding('UTF-8')
+ .encode!('UTF-8', invalid: :replace, replace: '')
+ end
+
+ let(:encoded_diff) do
+ described_class.encode!(corrupted_diff.dup)
+ end
+
+ it 'does not corrupt data but remove invalid characters' do
+ expect(encoded_diff).to eq(cleaned_diff)
+ end
+
+ def commit(repo, path, content)
+ oid = repo.write(content, :blob)
+ index = repo.index
+
+ index.read_tree(repo.head.target.tree) unless repo.empty?
+
+ index.add(path: path, oid: oid, mode: 0100644)
+
+ Rugged::Commit.create(
+ repo,
+ tree: index.write_tree(repo),
+ message: "Update #{path}",
+ parents: repo.empty? ? [] : [repo.head.target].compact,
+ update_ref: 'HEAD'
+ )
+ end
+ end
end
describe '#encode_utf8' do