diff options
author | Austin Ziegler <austin@zieglers.ca> | 2013-03-30 17:12:43 -0400 |
---|---|---|
committer | Austin Ziegler <austin@zieglers.ca> | 2013-03-30 17:14:19 -0400 |
commit | 4122e0b9e52fd6fc659dd5f1fc9f844a43ff02cf (patch) | |
tree | c97177a9dd6f3c3f84a09dcf87947908480177e4 | |
parent | a72fbdc67d2be6937fd94baf9372bbb994b01be1 (diff) | |
download | diff-lcs-4122e0b9e52fd6fc659dd5f1fc9f844a43ff02cf.tar.gz |
Finalizing encoding-aware diff fixes.
* Diff::LCS::Hunk could not properly generate a difference for
comparison sets that are not US-ASCII-compatible because of the use of
literal regular expressions and strings. Jon Rowe (JonRowe) found this
in rspec/rspec-expectations#219 and provided a first pass
implementation in diff-lcs#15. I've reworked it because of test
failures in Rubinius when running in Ruby 1.9 mode. This coerces the
added values to the encoding of the old dataset (as determined by the
first piece of the old dataset).
https://github.com/rspec/rspec-expectations/issues/219
https://github.com/halostatue/diff-lcs/pull/15
* Adding Travis CI testing for Ruby 2.0.
-rw-r--r-- | .travis.yml | 3 | ||||
-rw-r--r-- | Contributing.rdoc | 1 | ||||
-rw-r--r-- | History.rdoc | 14 | ||||
-rw-r--r-- | README.rdoc | 6 | ||||
-rw-r--r-- | lib/diff/lcs.rb | 4 | ||||
-rw-r--r-- | lib/diff/lcs/hunk.rb | 50 | ||||
-rw-r--r-- | spec/hunk_spec.rb | 24 |
7 files changed, 57 insertions, 45 deletions
diff --git a/.travis.yml b/.travis.yml index d9aeccb..903cddf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,8 +6,9 @@ before_script: - rake travis:before -t language: ruby notifications: - email: false + email: true rvm: + - 2.0.0 - 1.9.3 - 1.9.2 - ruby-head diff --git a/Contributing.rdoc b/Contributing.rdoc index 45fa7ad..a0f37de 100644 --- a/Contributing.rdoc +++ b/Contributing.rdoc @@ -61,3 +61,4 @@ Thanks to everyone else who has contributed to Diff::LCS: * Kenichi Kamiya * Michael Granger * Vít Ondruch +* Jon Rowe diff --git a/History.rdoc b/History.rdoc index 6cc644d..d2d847c 100644 --- a/History.rdoc +++ b/History.rdoc @@ -1,3 +1,17 @@ +== 1.2.2 / 2013-03-30 + +* Bugs Fixed: + * Diff::LCS::Hunk could not properly generate a difference for comparison + sets that are not US-ASCII-compatible because of the use of literal regular + expressions and strings. Jon Rowe (JonRowe) found this in + rspec/rspec-expectations#219 and provided a first pass implementation in + diff-lcs#15. I've reworked it because of test failures in Rubinius when + running in Ruby 1.9 mode. This coerces the added values to the encoding of + the old dataset (as determined by the first piece of the old dataset). + https://github.com/rspec/rspec-expectations/issues/219 + https://github.com/halostatue/diff-lcs/pull/15 +* Adding Travis CI testing for Ruby 2.0. + == 1.2.1 / 2013-02-09 * Bugs Fixed: diff --git a/README.rdoc b/README.rdoc index f7758eb..afb421e 100644 --- a/README.rdoc +++ b/README.rdoc @@ -11,9 +11,9 @@ Diff::LCS computes the difference between two Enumerable sequences using the McIlroy-Hunt longest common subsequence (LCS) algorithm. It includes utilities to create a simple HTML diff output format and a standard diff-like tool. -This is release 1.2.1, restoring the public API to what existed in Diff::LCS -1.1.x. Everyone is strongly encouraged to upgrade to this version as it fixes -all known outstanding issues. +This is release 1.2.2, fixing a bug that prevented comparison of values that +are not US-ASCII-compatible. Thanks to Jon Rowe for finding and providing most +of the work behind this issue. This is a recommended release. == Synopsis diff --git a/lib/diff/lcs.rb b/lib/diff/lcs.rb index 7a8b982..620e200 100644 --- a/lib/diff/lcs.rb +++ b/lib/diff/lcs.rb @@ -1,7 +1,7 @@ # -*- ruby encoding: utf-8 -*- module Diff; end unless defined? Diff -# = Diff::LCS 1.2.1 +# = Diff::LCS 1.2.2 # # Computes "intelligent" differences between two sequenced Enumerables. This # is an implementation of the McIlroy-Hunt "diff" algorithm for Enumerable @@ -129,7 +129,7 @@ module Diff; end unless defined? Diff # Common Subsequences</em>, CACM, vol.20, no.5, pp.350-353, May # 1977, with a few minor improvements to improve the speed." module Diff::LCS - VERSION = '1.2.1' + VERSION = '1.2.2' end require 'diff/lcs/callbacks' diff --git a/lib/diff/lcs/hunk.rb b/lib/diff/lcs/hunk.rb index bcd85d5..6c5ea6a 100644 --- a/lib/diff/lcs/hunk.rb +++ b/lib/diff/lcs/hunk.rb @@ -11,7 +11,9 @@ class Diff::LCS::Hunk def initialize(data_old, data_new, piece, flag_context, file_length_difference) # At first, a hunk will have just one Block in it @blocks = [ Diff::LCS::Block.new(piece) ] - @preferred_data_encoding = data_old[0].encoding if String.method_defined?(:encoding) + if String.method_defined?(:encoding) + @preferred_data_encoding = data_old[0].encoding + end @data_old = data_old @data_new = data_new @@ -146,17 +148,17 @@ class Diff::LCS::Hunk # file -- don't take removed items into account. lo, hi, num_added, num_removed = @start_old, @end_old, 0, 0 - outlist = @data_old[lo .. hi].collect { |e| match_encoding_gsub(e,'^', ' ') } + outlist = @data_old[lo .. hi].map { |e| e.insert(0, encode(' ')) } @blocks.each do |block| block.remove.each do |item| - op = item.action.to_s # - + op = item.action.to_s # - offset = item.position - lo + num_added - match_encoding_gsub!(outlist[offset],'^ ', op.to_s) + outlist[offset][0, 1] = encode(op) num_removed += 1 end block.insert.each do |item| - op = item.action.to_s # + + op = item.action.to_s # + offset = item.position - @start_new + num_removed outlist[offset, 0] = encode(op) + @data_new[item.position] num_added += 1 @@ -177,10 +179,11 @@ class Diff::LCS::Hunk lo, hi = @start_old, @end_old removes = @blocks.select { |e| not e.remove.empty? } if removes - outlist = @data_old[lo .. hi].collect { |e| match_encoding_gsub(e,'^', ' ') } + outlist = @data_old[lo .. hi].map { |e| e.insert(0, encode(' ')) } + removes.each do |block| block.remove.each do |item| - match_encoding_gsub!( outlist[item.position - lo], '^ ') { block.op } # - or ! + outlist[item.position - lo][0, 1] = encode(block.op) # - or ! end end s << outlist.join("\n") @@ -190,10 +193,10 @@ class Diff::LCS::Hunk lo, hi = @start_new, @end_new inserts = @blocks.select { |e| not e.insert.empty? } if inserts - outlist = @data_new[lo .. hi].collect { |e| match_encoding_gsub(e,'^', ' ') } + outlist = @data_new[lo .. hi].collect { |e| e.insert(0, encode(' ')) } inserts.each do |block| block.insert.each do |item| - match_encoding_gsub!( outlist[item.position - lo], '^ ') { block.op } # + or ! + outlist[item.position - lo][0, 1] = encode(block.op) # + or ! end end s << outlist.join("\n") @@ -209,7 +212,7 @@ class Diff::LCS::Hunk if format == :reverse_ed s = encode("#{op_act[@blocks[0].op]}#{context_range(:old)}\n") else - s = encode("#{match_encoding_gsub(context_range(:old), ',', ' ')}#{op_act[@blocks[0].op]}\n") + s = encode("#{context_range(:old, ' ')}#{op_act[@blocks[0].op]}\n") end unless @blocks[0].insert.empty? @@ -222,7 +225,7 @@ class Diff::LCS::Hunk # Generate a range of item numbers to print. Only print 1 number if the # range has only one item in it. Otherwise, it's 'start,end' - def context_range(mode) + def context_range(mode, op = ',') case mode when :old s, e = (@start_old + 1), (@end_old + 1) @@ -230,7 +233,7 @@ class Diff::LCS::Hunk s, e = (@start_new + 1), (@end_new + 1) end - (s < e) ? "#{s},#{e}" : "#{e}" + (s < e) ? "#{s}#{op}#{e}" : "#{e}" end private :context_range @@ -252,33 +255,22 @@ class Diff::LCS::Hunk private :unified_range if String.method_defined?(:encoding) - def encode(literal) - literal.encode @preferred_data_encoding + def encode(literal, target_encoding = @preferred_data_encoding) + literal.encode target_encoding end - def encode_to(string, args) + def encode_as(string, *args) args.map { |arg| arg.encode(string.encoding) } end else - def encode(literal) + def encode(literal, target_encoding = nil) literal end - def encode_to(string, args) + def encode_as(string, *args) args end end private :encode - private :encode_to - - def match_encoding_gsub(string, *args, &block) - string.gsub(*encode_to(string,args), &block) - end - private :match_encoding_gsub - - def match_encoding_gsub!(string, *args, &block) - string.gsub!(*encode_to(string,args), &block) - end - private :match_encoding_gsub! - + private :encode_as end diff --git a/spec/hunk_spec.rb b/spec/hunk_spec.rb index 9c21bce..0741b87 100644 --- a/spec/hunk_spec.rb +++ b/spec/hunk_spec.rb @@ -2,38 +2,42 @@ require 'spec_helper' -describe "Diff::LCS::Hunk" do +def h(v) + v.to_s.bytes.to_a.map { |e| "%02x" % e }.join +end +describe "Diff::LCS::Hunk" do if String.method_defined?(:encoding) let(:old_data) { ["Tu avec carté {count} itém has".encode('UTF-16LE')] } let(:new_data) { ["Tu avec carte {count} item has".encode('UTF-16LE')] } - let(:peices) { Diff::LCS.diff old_data, new_data } - let(:hunk) { Diff::LCS::Hunk.new(old_data, new_data, peices[0], 3, 0) } + let(:pieces) { Diff::LCS.diff old_data, new_data } + let(:hunk) { Diff::LCS::Hunk.new(old_data, new_data, pieces[0], 3, 0) } - it 'should be able to produce a unified diff from the two peices' do + it 'should be able to produce a unified diff from the two pieces' do expected = (<<-EOD.encode('UTF-16LE').chomp) @@ -1,2 +1,2 @@ -Tu avec carté {count} itém has +-Tu avec carté {count} itém has +Tu avec carte {count} item has EOD expect(hunk.diff(:unified).to_s == expected).to eql true end - it 'should be able to produce a context diff from the two peices' do + it 'should be able to produce a context diff from the two pieces' do expected = (<<-EOD.encode('UTF-16LE').chomp) *************** *** 1,2 **** -Tu avec carté {count} itém has +!Tu avec carté {count} itém has --- 1,2 ---- -Tu avec carte {count} item has +!Tu avec carte {count} item has EOD + expect(hunk.diff(:context).to_s == expected).to eql true end - it 'should be able to produce an old diff from the two peices' do + it 'should be able to produce an old diff from the two pieces' do expected = (<<-EOD.encode('UTF-16LE').chomp) 1,2c1,2 @@ -45,7 +49,7 @@ EOD expect(hunk.diff(:old).to_s == expected).to eql true end - it 'should be able to produce a reverse ed diff from the two peices' do + it 'should be able to produce a reverse ed diff from the two pieces' do expected = (<<-EOD.encode('UTF-16LE').chomp) c1,2 |