summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAustin Ziegler <austin@zieglers.ca>2013-03-30 17:12:43 -0400
committerAustin Ziegler <austin@zieglers.ca>2013-03-30 17:14:19 -0400
commit4122e0b9e52fd6fc659dd5f1fc9f844a43ff02cf (patch)
treec97177a9dd6f3c3f84a09dcf87947908480177e4
parenta72fbdc67d2be6937fd94baf9372bbb994b01be1 (diff)
downloaddiff-lcs-4122e0b9e52fd6fc659dd5f1fc9f844a43ff02cf.tar.gz
Finalizing encoding-aware diff fixes.
* Diff::LCS::Hunk could not properly generate a difference for comparison sets that are not US-ASCII-compatible because of the use of literal regular expressions and strings. Jon Rowe (JonRowe) found this in rspec/rspec-expectations#219 and provided a first pass implementation in diff-lcs#15. I've reworked it because of test failures in Rubinius when running in Ruby 1.9 mode. This coerces the added values to the encoding of the old dataset (as determined by the first piece of the old dataset). https://github.com/rspec/rspec-expectations/issues/219 https://github.com/halostatue/diff-lcs/pull/15 * Adding Travis CI testing for Ruby 2.0.
-rw-r--r--.travis.yml3
-rw-r--r--Contributing.rdoc1
-rw-r--r--History.rdoc14
-rw-r--r--README.rdoc6
-rw-r--r--lib/diff/lcs.rb4
-rw-r--r--lib/diff/lcs/hunk.rb50
-rw-r--r--spec/hunk_spec.rb24
7 files changed, 57 insertions, 45 deletions
diff --git a/.travis.yml b/.travis.yml
index d9aeccb..903cddf 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,8 +6,9 @@ before_script:
- rake travis:before -t
language: ruby
notifications:
- email: false
+ email: true
rvm:
+ - 2.0.0
- 1.9.3
- 1.9.2
- ruby-head
diff --git a/Contributing.rdoc b/Contributing.rdoc
index 45fa7ad..a0f37de 100644
--- a/Contributing.rdoc
+++ b/Contributing.rdoc
@@ -61,3 +61,4 @@ Thanks to everyone else who has contributed to Diff::LCS:
* Kenichi Kamiya
* Michael Granger
* Vít Ondruch
+* Jon Rowe
diff --git a/History.rdoc b/History.rdoc
index 6cc644d..d2d847c 100644
--- a/History.rdoc
+++ b/History.rdoc
@@ -1,3 +1,17 @@
+== 1.2.2 / 2013-03-30
+
+* Bugs Fixed:
+ * Diff::LCS::Hunk could not properly generate a difference for comparison
+ sets that are not US-ASCII-compatible because of the use of literal regular
+ expressions and strings. Jon Rowe (JonRowe) found this in
+ rspec/rspec-expectations#219 and provided a first pass implementation in
+ diff-lcs#15. I've reworked it because of test failures in Rubinius when
+ running in Ruby 1.9 mode. This coerces the added values to the encoding of
+ the old dataset (as determined by the first piece of the old dataset).
+ https://github.com/rspec/rspec-expectations/issues/219
+ https://github.com/halostatue/diff-lcs/pull/15
+* Adding Travis CI testing for Ruby 2.0.
+
== 1.2.1 / 2013-02-09
* Bugs Fixed:
diff --git a/README.rdoc b/README.rdoc
index f7758eb..afb421e 100644
--- a/README.rdoc
+++ b/README.rdoc
@@ -11,9 +11,9 @@ Diff::LCS computes the difference between two Enumerable sequences using the
McIlroy-Hunt longest common subsequence (LCS) algorithm. It includes utilities
to create a simple HTML diff output format and a standard diff-like tool.
-This is release 1.2.1, restoring the public API to what existed in Diff::LCS
-1.1.x. Everyone is strongly encouraged to upgrade to this version as it fixes
-all known outstanding issues.
+This is release 1.2.2, fixing a bug that prevented comparison of values that
+are not US-ASCII-compatible. Thanks to Jon Rowe for finding and providing most
+of the work behind this issue. This is a recommended release.
== Synopsis
diff --git a/lib/diff/lcs.rb b/lib/diff/lcs.rb
index 7a8b982..620e200 100644
--- a/lib/diff/lcs.rb
+++ b/lib/diff/lcs.rb
@@ -1,7 +1,7 @@
# -*- ruby encoding: utf-8 -*-
module Diff; end unless defined? Diff
-# = Diff::LCS 1.2.1
+# = Diff::LCS 1.2.2
#
# Computes "intelligent" differences between two sequenced Enumerables. This
# is an implementation of the McIlroy-Hunt "diff" algorithm for Enumerable
@@ -129,7 +129,7 @@ module Diff; end unless defined? Diff
# Common Subsequences</em>, CACM, vol.20, no.5, pp.350-353, May
# 1977, with a few minor improvements to improve the speed."
module Diff::LCS
- VERSION = '1.2.1'
+ VERSION = '1.2.2'
end
require 'diff/lcs/callbacks'
diff --git a/lib/diff/lcs/hunk.rb b/lib/diff/lcs/hunk.rb
index bcd85d5..6c5ea6a 100644
--- a/lib/diff/lcs/hunk.rb
+++ b/lib/diff/lcs/hunk.rb
@@ -11,7 +11,9 @@ class Diff::LCS::Hunk
def initialize(data_old, data_new, piece, flag_context, file_length_difference)
# At first, a hunk will have just one Block in it
@blocks = [ Diff::LCS::Block.new(piece) ]
- @preferred_data_encoding = data_old[0].encoding if String.method_defined?(:encoding)
+ if String.method_defined?(:encoding)
+ @preferred_data_encoding = data_old[0].encoding
+ end
@data_old = data_old
@data_new = data_new
@@ -146,17 +148,17 @@ class Diff::LCS::Hunk
# file -- don't take removed items into account.
lo, hi, num_added, num_removed = @start_old, @end_old, 0, 0
- outlist = @data_old[lo .. hi].collect { |e| match_encoding_gsub(e,'^', ' ') }
+ outlist = @data_old[lo .. hi].map { |e| e.insert(0, encode(' ')) }
@blocks.each do |block|
block.remove.each do |item|
- op = item.action.to_s # -
+ op = item.action.to_s # -
offset = item.position - lo + num_added
- match_encoding_gsub!(outlist[offset],'^ ', op.to_s)
+ outlist[offset][0, 1] = encode(op)
num_removed += 1
end
block.insert.each do |item|
- op = item.action.to_s # +
+ op = item.action.to_s # +
offset = item.position - @start_new + num_removed
outlist[offset, 0] = encode(op) + @data_new[item.position]
num_added += 1
@@ -177,10 +179,11 @@ class Diff::LCS::Hunk
lo, hi = @start_old, @end_old
removes = @blocks.select { |e| not e.remove.empty? }
if removes
- outlist = @data_old[lo .. hi].collect { |e| match_encoding_gsub(e,'^', ' ') }
+ outlist = @data_old[lo .. hi].map { |e| e.insert(0, encode(' ')) }
+
removes.each do |block|
block.remove.each do |item|
- match_encoding_gsub!( outlist[item.position - lo], '^ ') { block.op } # - or !
+ outlist[item.position - lo][0, 1] = encode(block.op) # - or !
end
end
s << outlist.join("\n")
@@ -190,10 +193,10 @@ class Diff::LCS::Hunk
lo, hi = @start_new, @end_new
inserts = @blocks.select { |e| not e.insert.empty? }
if inserts
- outlist = @data_new[lo .. hi].collect { |e| match_encoding_gsub(e,'^', ' ') }
+ outlist = @data_new[lo .. hi].collect { |e| e.insert(0, encode(' ')) }
inserts.each do |block|
block.insert.each do |item|
- match_encoding_gsub!( outlist[item.position - lo], '^ ') { block.op } # + or !
+ outlist[item.position - lo][0, 1] = encode(block.op) # + or !
end
end
s << outlist.join("\n")
@@ -209,7 +212,7 @@ class Diff::LCS::Hunk
if format == :reverse_ed
s = encode("#{op_act[@blocks[0].op]}#{context_range(:old)}\n")
else
- s = encode("#{match_encoding_gsub(context_range(:old), ',', ' ')}#{op_act[@blocks[0].op]}\n")
+ s = encode("#{context_range(:old, ' ')}#{op_act[@blocks[0].op]}\n")
end
unless @blocks[0].insert.empty?
@@ -222,7 +225,7 @@ class Diff::LCS::Hunk
# Generate a range of item numbers to print. Only print 1 number if the
# range has only one item in it. Otherwise, it's 'start,end'
- def context_range(mode)
+ def context_range(mode, op = ',')
case mode
when :old
s, e = (@start_old + 1), (@end_old + 1)
@@ -230,7 +233,7 @@ class Diff::LCS::Hunk
s, e = (@start_new + 1), (@end_new + 1)
end
- (s < e) ? "#{s},#{e}" : "#{e}"
+ (s < e) ? "#{s}#{op}#{e}" : "#{e}"
end
private :context_range
@@ -252,33 +255,22 @@ class Diff::LCS::Hunk
private :unified_range
if String.method_defined?(:encoding)
- def encode(literal)
- literal.encode @preferred_data_encoding
+ def encode(literal, target_encoding = @preferred_data_encoding)
+ literal.encode target_encoding
end
- def encode_to(string, args)
+ def encode_as(string, *args)
args.map { |arg| arg.encode(string.encoding) }
end
else
- def encode(literal)
+ def encode(literal, target_encoding = nil)
literal
end
- def encode_to(string, args)
+ def encode_as(string, *args)
args
end
end
private :encode
- private :encode_to
-
- def match_encoding_gsub(string, *args, &block)
- string.gsub(*encode_to(string,args), &block)
- end
- private :match_encoding_gsub
-
- def match_encoding_gsub!(string, *args, &block)
- string.gsub!(*encode_to(string,args), &block)
- end
- private :match_encoding_gsub!
-
+ private :encode_as
end
diff --git a/spec/hunk_spec.rb b/spec/hunk_spec.rb
index 9c21bce..0741b87 100644
--- a/spec/hunk_spec.rb
+++ b/spec/hunk_spec.rb
@@ -2,38 +2,42 @@
require 'spec_helper'
-describe "Diff::LCS::Hunk" do
+def h(v)
+ v.to_s.bytes.to_a.map { |e| "%02x" % e }.join
+end
+describe "Diff::LCS::Hunk" do
if String.method_defined?(:encoding)
let(:old_data) { ["Tu avec carté {count} itém has".encode('UTF-16LE')] }
let(:new_data) { ["Tu avec carte {count} item has".encode('UTF-16LE')] }
- let(:peices) { Diff::LCS.diff old_data, new_data }
- let(:hunk) { Diff::LCS::Hunk.new(old_data, new_data, peices[0], 3, 0) }
+ let(:pieces) { Diff::LCS.diff old_data, new_data }
+ let(:hunk) { Diff::LCS::Hunk.new(old_data, new_data, pieces[0], 3, 0) }
- it 'should be able to produce a unified diff from the two peices' do
+ it 'should be able to produce a unified diff from the two pieces' do
expected =
(<<-EOD.encode('UTF-16LE').chomp)
@@ -1,2 +1,2 @@
-Tu avec carté {count} itém has
+-Tu avec carté {count} itém has
+Tu avec carte {count} item has
EOD
expect(hunk.diff(:unified).to_s == expected).to eql true
end
- it 'should be able to produce a context diff from the two peices' do
+ it 'should be able to produce a context diff from the two pieces' do
expected =
(<<-EOD.encode('UTF-16LE').chomp)
***************
*** 1,2 ****
-Tu avec carté {count} itém has
+!Tu avec carté {count} itém has
--- 1,2 ----
-Tu avec carte {count} item has
+!Tu avec carte {count} item has
EOD
+
expect(hunk.diff(:context).to_s == expected).to eql true
end
- it 'should be able to produce an old diff from the two peices' do
+ it 'should be able to produce an old diff from the two pieces' do
expected =
(<<-EOD.encode('UTF-16LE').chomp)
1,2c1,2
@@ -45,7 +49,7 @@ EOD
expect(hunk.diff(:old).to_s == expected).to eql true
end
- it 'should be able to produce a reverse ed diff from the two peices' do
+ it 'should be able to produce a reverse ed diff from the two pieces' do
expected =
(<<-EOD.encode('UTF-16LE').chomp)
c1,2