diff options
author | Lin Jen-Shin <godfat@godfat.org> | 2017-08-08 19:05:40 +0800 |
---|---|---|
committer | Lin Jen-Shin <godfat@godfat.org> | 2017-08-08 19:10:46 +0800 |
commit | d831e8e1d06c11e9dc9c0c36767b005a3b86a308 (patch) | |
tree | 006f43816dd690c6d2aa4e7d9f0ee24c9bf3d9bf /spec/lib/gitlab | |
parent | b8ba0efed017b79647421d5ac38c539096b319cc (diff) | |
download | gitlab-ce-d831e8e1d06c11e9dc9c0c36767b005a3b86a308.tar.gz |
Add a test to show that threshold 40 would corrupt35098-raise-encoding-confidence-threshold
If we set `ENCODING_CONFIDENCE_THRESHOLD` to 40,
this test case would not pass. If we raise to 50,
this would pass.
Note that if in the future rugged didn't return
corrupt data, this would be less relevant. But still
icu recommend the threshold to be 50, we should just
stick with 50.
Diffstat (limited to 'spec/lib/gitlab')
-rw-r--r-- | spec/lib/gitlab/encoding_helper_spec.rb | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/spec/lib/gitlab/encoding_helper_spec.rb b/spec/lib/gitlab/encoding_helper_spec.rb index 1482ef7132d..26138598651 100644 --- a/spec/lib/gitlab/encoding_helper_spec.rb +++ b/spec/lib/gitlab/encoding_helper_spec.rb @@ -30,6 +30,50 @@ describe Gitlab::EncodingHelper do it 'leaves binary string as is' do expect(ext_class.encode!(binary_string)).to eq(binary_string) end + + context 'with corrupted diff' do + let(:corrupted_diff) do + with_empty_bare_repository do |repo| + content = File.read(Rails.root.join( + 'spec/fixtures/encoding/Japanese.md').to_s) + commit_a = commit(repo, 'Japanese.md', content) + commit_b = commit(repo, 'Japanese.md', + content.sub('[TODO: Link]', '[現在作業中です: Link]')) + + repo.diff(commit_a, commit_b).each_line.map(&:content).join + end + end + + let(:cleaned_diff) do + corrupted_diff.dup.force_encoding('UTF-8') + .encode!('UTF-8', invalid: :replace, replace: '') + end + + let(:encoded_diff) do + described_class.encode!(corrupted_diff.dup) + end + + it 'does not corrupt data but remove invalid characters' do + expect(encoded_diff).to eq(cleaned_diff) + end + + def commit(repo, path, content) + oid = repo.write(content, :blob) + index = repo.index + + index.read_tree(repo.head.target.tree) unless repo.empty? + + index.add(path: path, oid: oid, mode: 0100644) + + Rugged::Commit.create( + repo, + tree: index.write_tree(repo), + message: "Update #{path}", + parents: repo.empty? ? [] : [repo.head.target].compact, + update_ref: 'HEAD' + ) + end + end end describe '#encode_utf8' do |