summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAustin Ziegler <austin@surfeasy.com>2012-03-24 00:35:04 -0400
committerAustin Ziegler <austin@surfeasy.com>2012-03-24 00:42:25 -0400
commit2151e2093dfd00d6aff72f49d13a137efeed2e53 (patch)
treea54fba041abf07cfe1c43c0652efc7c34b153eb9
parenta341ac7ca261a73e37c21a7f034f5892ea7dcde4 (diff)
downloaddiff-lcs-2151e2093dfd00d6aff72f49d13a137efeed2e53.tar.gz
Starting on a substantial refactoring to fix bugs.
1. All Diff::LCS::__* methods are now moved to Diff::LCS::Internals and named without the underscores. 2. Fixed the documentation for Diff::LCS::patch. (Some formatting may have been broken.) 3. Eliminated normalize_patchset in favour of the (substantially similar) analyze_patchset. This allows for a single-pass over a patchset to determine whether the patchset contains changes and transform it to the object-based format. 4. Made Diff::LCS::ContextChange descend from Diff::LCS::Change. This will probably change so that both descend from something like a Diff::LCS::AbstractChange because of some issues that may arise with case statement order.
-rw-r--r--lib/diff/lcs.rb398
-rw-r--r--lib/diff/lcs/change.rb209
-rw-r--r--lib/diff/lcs/internals.rb271
-rw-r--r--spec/lcs_spec.rb8
-rw-r--r--spec/patch_spec.rb2
5 files changed, 445 insertions, 443 deletions
diff --git a/lib/diff/lcs.rb b/lib/diff/lcs.rb
index 7218494..2af2a4e 100644
--- a/lib/diff/lcs.rb
+++ b/lib/diff/lcs.rb
@@ -134,6 +134,7 @@ module Diff
end
require 'diff/lcs/callbacks'
+require 'diff/lcs/internals'
module Diff::LCS
# Returns an Array containing the longest common subsequence(s) between
@@ -213,7 +214,7 @@ module Diff::LCS
# +seq1+ in turn and may be modified before they are placed into the
# returned Array of subsequences.
def LCS(seq1, seq2, &block) #:yields seq1[ii] for each matched:
- matches = Diff::LCS.__lcs(seq1, seq2)
+ matches = Diff::LCS::Internals.lcs(seq1, seq2)
ret = []
matches.each_with_index do |ee, ii|
unless matches[ii].nil?
@@ -379,7 +380,7 @@ module Diff::LCS
# sequence is reached, if +a+ has not yet reached the end of +A+ or +b+
# has not yet reached the end of +B+.
def traverse_sequences(seq1, seq2, callbacks = Diff::LCS::SequenceCallbacks, &block) #:yields change events:
- matches = Diff::LCS.__lcs(seq1, seq2)
+ matches = Diff::LCS::Internals.lcs(seq1, seq2)
run_finished_a = run_finished_b = false
string = seq1.kind_of?(String)
@@ -569,7 +570,7 @@ module Diff::LCS
# +a+ and +b+ are considered to be pointing to matching or changed
# elements.
def traverse_balanced(seq1, seq2, callbacks = Diff::LCS::BalancedCallbacks)
- matches = Diff::LCS.__lcs(seq1, seq2)
+ matches = Diff::LCS::Internals.lcs(seq1, seq2)
a_size = seq1.size
b_size = seq2.size
ai = bj = mb = 0
@@ -676,34 +677,67 @@ module Diff::LCS
:unpatch => { '+' => '-', '-' => '+', '!' => '!', '=' => '=' }
}
- # Given a patchset, convert the current version to the new
- # version. If +direction+ is not specified (must be
- # <tt>:patch</tt> or <tt>:unpatch</tt>), then discovery of the
- # direction of the patch will be attempted.
+ # Applies a +patchset+ to the sequence +src+ according to the
+ # +direction+ (<tt>:patch</tt> or <tt>:unpatch</tt>).
#
- # If the patchset is empty or all 'unchanged', the src value will be
- # returned as either <tt>src.dup</tt> or <tt>src</tt>.
+ # If the +direction+ is not specified, Diff::LCS::patch will attempt to
+ # discover the direction of the +patchset+.
+ #
+ # A +patchset+ can be considered to apply forward (<tt>:patch</tt>) if
+ # the following expression is true:
+ #
+ # patch(s1, diff(s1, s2)) -> s2
+ #
+ # A +patchset+ can be considered to apply backward (<tt>:unpatch</tt>)
+ # if the following expression is true:
+ #
+ # patch(s2, diff(s1, s2)) -> s1
+ #
+ # If the +patchset+ contains no changes, the +src+ value will be
+ # returned as either <tt>src.dup</tt> or +src+. A +patchset+ can be
+ # deemed as having no changes if the following predicate returns true:
+ #
+ # patchset.empty? or
+ # patchset.flatten.all? { |change| change.unchanged? }
+ #
+ # === Patchsets
+ # A +patchset+ is always an enumerable sequence of changes, hunks of
+ # changes, or a mix of the two. A hunk of changes is an enumerable
+ # sequence of changes:
+ #
+ # [ # patchset
+ # # change
+ # [ # hunk
+ # # change
+ # ]
+ # ]
+ #
+ # The +patch+ method accepts <tt>patchset</tt>s that are enumerable
+ # sequences containing either Diff::LCS::Change objects (or a subclass)
+ # or the array representations of those objects. Prior to application,
+ # array representations of Diff::LCS::Change objects will be reified.
def patch(src, patchset, direction = nil)
- patchset = __normalize_patchset(patchset)
+ # Normalize the patchset.
+ has_changes, patchset = Diff::LCS::Internals.analyze_patchset(patchset)
- if patchset.empty? or patchset.all? { |ps| ps.unchanged? }
+ if not has_changes
return src.dup if src.respond_to? :dup
return src
end
string = src.kind_of?(String)
- # Start with a new empty type of the source's class
+ # Start with a new empty type of the source's class
res = src.class.new
- # Normalize the patchset.
-
- direction ||= Diff::LCS.__diff_direction(src, patchset) || :patch
+ direction ||= Diff::LCS.__diff_direction(src, patchset)
ai = bj = 0
- patchset.each do |change|
- # Both Change and ContextChange support #action
- action = PATCH_MAP[direction][change.action]
+ patch_map = PATCH_MAP[direction]
+
+ patchset.flatten.each do |change|
+ # Both Change and ContextChange support #action
+ action = patch_map[change.action]
case change
when Diff::LCS::ContextChange
@@ -733,12 +767,12 @@ module Diff::LCS
bj += 1
end
- res << el
- bj += 1
+ res << el
+ bj += 1
when '='
- # This only appears in sdiff output with the SDiff callback.
- # Therefore, we only need to worry about dealing with a single
- # element.
+ # This only appears in sdiff output with the SDiff callback.
+ # Therefore, we only need to worry about dealing with a single
+ # element.
res << el
ai += 1
@@ -750,10 +784,10 @@ module Diff::LCS
bj += 1
end
- bj += 1
- ai += 1
+ bj += 1
+ ai += 1
- res << el
+ res << el
end
when Diff::LCS::Change
case action
@@ -798,318 +832,6 @@ module Diff::LCS
def patch!(src, patchset)
Diff::LCS.patch(src, patchset, :patch)
end
-
- # Compute the longest common subsequence between the sequenced
- # Enumerables +a+ and +b+. The result is an array whose contents is such
- # that
- #
- # result = Diff::LCS.__lcs(a, b)
- # result.each_with_index do |e, ii|
- # assert_equal(a[ii], b[e]) unless e.nil?
- # end
- #
- # Note: This will be deprecated as a public function in a future release.
- def __lcs(a, b)
- a_start = b_start = 0
- a_finish = a.size - 1
- b_finish = b.size - 1
- vector = []
-
- # Prune off any common elements at the beginning...
- while (a_start <= a_finish) and
- (b_start <= b_finish) and
- (a[a_start] == b[b_start])
- vector[a_start] = b_start
- a_start += 1
- b_start += 1
- end
-
- # Now the end...
- while (a_start <= a_finish) and
- (b_start <= b_finish) and
- (a[a_finish] == b[b_finish])
- vector[a_finish] = b_finish
- a_finish -= 1
- b_finish -= 1
- end
-
- # Now, compute the equivalence classes of positions of elements.
- b_matches = Diff::LCS.__position_hash(b, b_start .. b_finish)
-
- thresh = []
- links = []
-
- (a_start .. a_finish).each do |ii|
- ai = a.kind_of?(String) ? a[ii, 1] : a[ii]
- bm = b_matches[ai]
- kk = nil
- bm.reverse_each do |jj|
- if kk and (thresh[kk] > jj) and (thresh[kk - 1] < jj)
- thresh[kk] = jj
- else
- kk = Diff::LCS.__replace_next_larger(thresh, jj, kk)
- end
- links[kk] = [ (kk > 0) ? links[kk - 1] : nil, ii, jj ] unless kk.nil?
- end
- end
-
- unless thresh.empty?
- link = links[thresh.size - 1]
- while not link.nil?
- vector[link[1]] = link[2]
- link = link[0]
- end
- end
-
- vector
- end
-
- # Find the place at which +value+ would normally be inserted into the
- # Enumerable. If that place is already occupied by +value+, do nothing
- # and return +nil+. If the place does not exist (i.e., it is off the end
- # of the Enumerable), add it to the end. Otherwise, replace the element
- # at that point with +value+. It is assumed that the Enumerable's values
- # are numeric.
- #
- # This operation preserves the sort order.
- #
- # Note: This will be deprecated as a public function in a future release.
- def __replace_next_larger(enum, value, last_index = nil)
- # Off the end?
- if enum.empty? or (value > enum[-1])
- enum << value
- return enum.size - 1
- end
-
- # Binary search for the insertion point
- last_index ||= enum.size
- first_index = 0
- while (first_index <= last_index)
- ii = (first_index + last_index) >> 1
-
- found = enum[ii]
-
- if value == found
- return nil
- elsif value > found
- first_index = ii + 1
- else
- last_index = ii - 1
- end
- end
-
- # The insertion point is in first_index; overwrite the next larger
- # value.
- enum[first_index] = value
- return first_index
- end
-
- # If +vector+ maps the matching elements of another collection onto this
- # Enumerable, compute the inverse +vector+ that maps this Enumerable
- # onto the collection. (Currently unused.)
- #
- # Note: This will be deprecated as a public function in a future release.
- def __inverse_vector(a, vector)
- inverse = a.dup
- (0 ... vector.size).each do |ii|
- inverse[vector[ii]] = ii unless vector[ii].nil?
- end
- inverse
- end
-
- # Returns a hash mapping each element of an Enumerable to the set of
- # positions it occupies in the Enumerable, optionally restricted to the
- # elements specified in the range of indexes specified by +interval+.
- #
- # Note: This will be deprecated as a public function in a future release.
- def __position_hash(enum, interval = 0 .. -1)
- hash = Hash.new { |hh, kk| hh[kk] = [] }
- interval.each do |ii|
- kk = enum.kind_of?(String) ? enum[ii, 1] : enum[ii]
- hash[kk] << ii
- end
- hash
- end
-
- # Examine the patchset and the source to see in which direction the
- # patch should be applied.
- #
- # WARNING: By default, this examines the whole patch, so this could take
- # some time. This also works better with Diff::LCS::ContextChange or
- # Diff::LCS::Change as its source, as an array will cause the creation
- # of one of the above.
- #
- # Note: This will be deprecated as a public function in a future release.
- def __diff_direction(src, patchset, limit = nil)
- count = left_match = left_miss = right_match = right_miss = 0
- string = src.kind_of?(String)
-
- patchset.each do |change|
- count += 1
-
- case change
- when Diff::LCS::Change
- # With a simplistic change, we can't tell the difference between
- # the left and right on '!' actions, so we ignore those. On '='
- # actions, if there's a miss, we miss both left and right.
- element = string ? src[change.position, 1] : src[change.position]
-
- case change.action
- when '-'
- if element == change.element
- left_match += 1
- else
- left_miss += 1
- end
- when '+'
- if element == change.element
- right_match += 1
- else
- right_miss += 1
- end
- when '='
- if element != change.element
- left_miss += 1
- right_miss += 1
- end
- end
- when Diff::LCS::ContextChange
- case change.action
- when '-' # Remove details from the old string
- element = string ? src[change.old_position, 1] : src[change.old_position]
-
- if element == change.old_element
- left_match += 1
- else
- left_miss += 1
- end
- when '+'
- element = string ? src[change.new_position, 1] : src[change.new_position]
- if element == change.new_element
- right_match += 1
- else
- right_miss += 1
- end
- when '='
- le = string ? src[change.old_position, 1] : src[change.old_position]
- re = string ? src[change.new_position, 1] : src[change.new_position]
-
- left_miss += 1 if le != change.old_element
- right_miss += 1 if re != change.new_element
- when '!'
- element = string ? src[change.old_position, 1] : src[change.old_position]
- if element == change.old_element
- left_match += 1
- else
- element = string ? src[change.new_position, 1] : src[change.new_position]
- if element == change.new_element
- right_match += 1
- else
- left_miss += 1
- right_miss += 1
- end
- end
- end
- end
-
- break if (not limit.nil?) && (count > limit)
- end
-
- if left_match.zero?
- end
-
- no_left = (left_match == 0) && (left_miss >= 0)
- no_right = (right_match == 0) && (right_miss >= 0)
-
- case [no_left, no_right]
- when [false, true]
- return :patch
- when [true, false]
- return :unpatch
- else
- raise "The provided patchset does not appear to apply to the provided value as either source or destination value."
- end
- end
-
- # Normalize the patchset. A patchset is always a sequence of changes, but
- # how those changes are represented may vary, depending on how they were
- # generated. In all cases we support, we also support the array
- # representation of the changes. The formats are:
- #
- # [ # patchset <- Diff::LCS.diff(a, b)
- # [ # one or more hunks
- # Diff::LCS::Change # one or more changes
- # ] ]
- #
- # [ # patchset, equivalent to the above
- # [ # one or more hunks
- # [ action, line, value ] # one or more changes
- # ] ]
- #
- # [ # patchset <- Diff::LCS.diff(a, b, Diff::LCS::ContextDiffCallbacks)
- # # OR <- Diff::LCS.sdiff(a, b, Diff::LCS::ContextDiffCallbacks)
- # [ # one or more hunks
- # Diff::LCS::ContextChange # one or more changes
- # ] ]
- #
- # [ # patchset, equivalent to the above
- # [ # one or more hunks
- # [ action, [ old line, old value ], [ new line, new value ] ]
- # # one or more changes
- # ] ]
- #
- # [ # patchset <- Diff::LCS.sdiff(a, b)
- # # OR <- Diff::LCS.diff(a, b, Diff::LCS::SDiffCallbacks)
- # Diff::LCS::ContextChange # one or more changes
- # ]
- #
- # [ # patchset, equivalent to the above
- # [ action, [ old line, old value ], [ new line, new value ] ]
- # # one or more changes
- # ]
- #
- # The result of this will be either of the following.
- #
- # [ # patchset
- # Diff::LCS::ContextChange # one or more changes
- # ]
- #
- # [ # patchset
- # Diff::LCS::Change # one or more changes
- # ]
- #
- # If either of the above is provided, it will be returned as such.
- #
- # Note: This will be deprecated as a public function in a future release.
- def __normalize_patchset(patchset)
- patchset.map do |hunk|
- case hunk
- when Diff::LCS::ContextChange, Diff::LCS::Change
- hunk
- when Array
- if (not hunk[0].kind_of?(Array)) and hunk[1].kind_of?(Array) and hunk[2].kind_of?(Array)
- Diff::LCS::ContextChange.from_a(hunk)
- else
- hunk.map do |change|
- case change
- when Diff::LCS::ContextChange, Diff::LCS::Change
- change
- when Array
- # change[1] will ONLY be an array in a ContextChange#to_a call.
- # In Change#to_a, it represents the line (singular).
- if change[1].kind_of?(Array)
- Diff::LCS::ContextChange.from_a(change)
- else
- Diff::LCS::Change.from_a(change)
- end
- end
- end
- end
- else
- raise ArgumentError, "Cannot normalise a hunk of class #{hunk.class}."
- end
- end.flatten
- end
end
end
diff --git a/lib/diff/lcs/change.rb b/lib/diff/lcs/change.rb
index 5139821..50c1de9 100644
--- a/lib/diff/lcs/change.rb
+++ b/lib/diff/lcs/change.rb
@@ -1,59 +1,59 @@
-#! /usr/env/bin ruby
-#--
-# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
-# adapted from:
-# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
-# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
-# implements McIlroy-Hunt diff algorithm
-#
-# This program is free software. It may be redistributed and/or modified under
-# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
-# Ruby licence.
-#
-# $Id$
-#++
+# -*- ruby encoding: utf-8 -*-
# Provides Diff::LCS::Change and Diff::LCS::ContextChange.
- # Centralises the change test code in Diff::LCS::Change and
- # Diff::LCS::ContextChange, since it's the same for both classes.
-module Diff::LCS::ChangeTypeTests
- def deleting?
- @action == '-'
- end
+# Represents a simplistic (non-contextual) change. Represents the removal or
+# addition of an element from either the old or the new sequenced
+# enumerable.
+class Diff::LCS::Change
+ # The only actions valid for changes are '+' (add), '-' (delete), '='
+ # (no change), '!' (changed), '<' (tail changes from first sequence), or
+ # '>' (tail changes from second sequence). The last two ('<>') are only
+ # found with Diff::LCS::diff and Diff::LCS::sdiff.
+ VALID_ACTIONS = %W(+ - = ! > <)
- def adding?
- @action == '+'
+ def self.valid_action?(action)
+ VALID_ACTIONS.include? action
end
- def unchanged?
- @action == '='
- end
+ # Returns the action this Change represents.
+ attr_reader :action
- def changed?
- @action == '!'
+ # Returns the position of the Change.
+ attr_reader :position
+ # Returns the sequence element of the Change.
+ attr_reader :element
+
+ def initialize(*args)
+ @action, @position, @element = *args
+
+ unless Diff::LCS::Change.valid_action?(@action)
+ raise "Invalid Change Action '#{@action}'"
+ end
+ raise "Invalid Position Type" unless @position.kind_of? Fixnum
end
- def finished_a?
- @action == '>'
+ def inspect
+ %Q(#<#{self.class.name}:#{__id__.to_s(16)} @action=#{action} position=#{position} element=#{element.inspect})
end
- def finished_b?
- @action == '<'
+ def to_a
+ [ @action, @position, @element ]
end
-end
- # Represents a simplistic (non-contextual) change. Represents the removal or
- # addition of an element from either the old or the new sequenced enumerable.
-class Diff::LCS::Change
- # Returns the action this Change represents. Can be '+' (#adding?), '-'
- # (#deleting?), '=' (#unchanged?), # or '!' (#changed?). When created by
- # Diff::LCS#diff or Diff::LCS#sdiff, it may also be '>' (#finished_a?) or
- # '<' (#finished_b?).
- attr_reader :action
- attr_reader :position
- attr_reader :element
+ def self.from_a(arr)
+ arr = arr.flatten
+ case arr.size
+ when 5
+ Diff::LCS::ContextChange.new(*(arr[0...5]))
+ when 3
+ Diff::LCS::Change.new(*(arr[0...3]))
+ else
+ raise "Invalid change array format provided."
+ end
+ end
include Comparable
+
def ==(other)
(self.action == other.action) and
(self.position == other.position) and
@@ -67,85 +67,79 @@ class Diff::LCS::Change
r
end
- def initialize(action, position, element)
- @action = action
- @position = position
- @element = element
+ def adding?
+ @action == '+'
end
- # Creates a Change from an array produced by Change#to_a.
- def to_a
- [@action, @position, @element]
+ def deleting?
+ @action == '-'
end
- def self.from_a(arr)
- Diff::LCS::Change.new(arr[0], arr[1], arr[2])
+ def unchanged?
+ @action == '='
+ end
+
+ def changed?
+ @action == '!'
end
- include Diff::LCS::ChangeTypeTests
+ def finished_a?
+ @action == '>'
+ end
+
+ def finished_b?
+ @action == '<'
+ end
end
- # Represents a contextual change. Contains the position and values of the
- # elements in the old and the new sequenced enumerables as well as the action
- # taken.
-class Diff::LCS::ContextChange
- # Returns the action this Change represents. Can be '+' (#adding?), '-'
- # (#deleting?), '=' (#unchanged?), # or '!' (#changed?). When
- # created by Diff::LCS#diff or Diff::LCS#sdiff, it may also be '>'
- # (#finished_a?) or '<' (#finished_b?).
- attr_reader :action
+# Represents a contextual change. Contains the position and values of the
+# elements in the old and the new sequenced enumerables as well as the action
+# taken.
+class Diff::LCS::ContextChange < Diff::LCS::Change
+ # We don't need these two values.
+ undef :position
+ undef :element
+
+ # Returns the old position being changed.
attr_reader :old_position
- attr_reader :old_element
+ # Returns the new position being changed.
attr_reader :new_position
+ # Returns the old element being changed.
+ attr_reader :old_element
+ # Returns the new element being changed.
attr_reader :new_element
- include Comparable
+ def initialize(*args)
+ @action, @old_position, @old_element, @new_position, @new_element = *args
- def ==(other)
- (@action == other.action) and
- (@old_position == other.old_position) and
- (@new_position == other.new_position) and
- (@old_element == other.old_element) and
- (@new_element == other.new_element)
- end
-
- def inspect(*args)
- %Q(#<#{self.class.name}:#{__id__} @action=#{action} positions=#{old_position},#{new_position} elements=#{old_element.inspect},#{new_element.inspect}>)
- end
-
- def <=>(other)
- r = @action <=> other.action
- r = @old_position <=> other.old_position if r.zero?
- r = @new_position <=> other.new_position if r.zero?
- r = @old_element <=> other.old_element if r.zero?
- r = @new_element <=> other.new_element if r.zero?
- r
+ unless Diff::LCS::Change.valid_action?(@action)
+ raise "Invalid Change Action '#{@action}'"
+ end
+ unless @old_position.nil? or @old_position.kind_of? Fixnum
+ raise "Invalid (Old) Position Type"
+ end
+ unless @new_position.nil? or @new_position.kind_of? Fixnum
+ raise "Invalid (New) Position Type"
+ end
end
- def initialize(action, old_position, old_element, new_position, new_element)
- @action = action
- @old_position = old_position
- @old_element = old_element
- @new_position = new_position
- @new_element = new_element
+ def to_a
+ [ @action,
+ [ @old_position, @old_element ],
+ [ @new_position, @new_element ]
+ ]
end
- def to_a
- [@action, [@old_position, @old_element], [@new_position, @new_element]]
+ def inspect(*args)
+ %Q(#<#{self.class.name}:#{__id__} @action=#{action} positions=#{old_position},#{new_position} elements=#{old_element.inspect},#{new_element.inspect}>)
end
- # Creates a ContextChange from an array produced by ContextChange#to_a.
def self.from_a(arr)
- if arr.size == 5
- Diff::LCS::ContextChange.new(arr[0], arr[1], arr[2], arr[3], arr[4])
- else
- Diff::LCS::ContextChange.new(arr[0], arr[1][0], arr[1][1], arr[2][0],
- arr[2][1])
- end
+ Diff::LCS::Change.from_a(arr)
end
- # Simplifies a context change for use in some diff callbacks. '<' actions
- # are converted to '-' and '>' actions are converted to '+'.
+ # Simplifies a context change for use in some diff callbacks. '<' actions
+ # are converted to '-' and '>' actions are converted to '+'.
def self.simplify(event)
ea = event.to_a
@@ -165,5 +159,20 @@ class Diff::LCS::ContextChange
Diff::LCS::ContextChange.from_a(ea)
end
- include Diff::LCS::ChangeTypeTests
+ def ==(other)
+ (@action == other.action) and
+ (@old_position == other.old_position) and
+ (@new_position == other.new_position) and
+ (@old_element == other.old_element) and
+ (@new_element == other.new_element)
+ end
+
+ def <=>(other)
+ r = @action <=> other.action
+ r = @old_position <=> other.old_position if r.zero?
+ r = @new_position <=> other.new_position if r.zero?
+ r = @old_element <=> other.old_element if r.zero?
+ r = @new_element <=> other.new_element if r.zero?
+ r
+ end
end
diff --git a/lib/diff/lcs/internals.rb b/lib/diff/lcs/internals.rb
new file mode 100644
index 0000000..373a18a
--- /dev/null
+++ b/lib/diff/lcs/internals.rb
@@ -0,0 +1,271 @@
+# -*- ruby encoding: utf-8 -*-
+
+module Diff::LCS::Internals # :nodoc:
+ class << self
+ # Compute the longest common subsequence between the sequenced
+ # Enumerables +a+ and +b+. The result is an array whose contents is such
+ # that
+ #
+ # result = Diff::LCS::Internals.lcs(a, b)
+ # result.each_with_index do |e, ii|
+ # assert_equal(a[ii], b[e]) unless e.nil?
+ # end
+ def lcs(a, b)
+ a_start = b_start = 0
+ a_finish = a.size - 1
+ b_finish = b.size - 1
+ vector = []
+
+ # Prune off any common elements at the beginning...
+ while (a_start <= a_finish) and
+ (b_start <= b_finish) and
+ (a[a_start] == b[b_start])
+ vector[a_start] = b_start
+ a_start += 1
+ b_start += 1
+ end
+
+ # Now the end...
+ while (a_start <= a_finish) and
+ (b_start <= b_finish) and
+ (a[a_finish] == b[b_finish])
+ vector[a_finish] = b_finish
+ a_finish -= 1
+ b_finish -= 1
+ end
+
+ # Now, compute the equivalence classes of positions of elements.
+ b_matches = position_hash(b, b_start .. b_finish)
+
+ thresh = []
+ links = []
+
+ (a_start .. a_finish).each do |ii|
+ ai = a.kind_of?(String) ? a[ii, 1] : a[ii]
+ bm = b_matches[ai]
+ kk = nil
+ bm.reverse_each do |jj|
+ if kk and (thresh[kk] > jj) and (thresh[kk - 1] < jj)
+ thresh[kk] = jj
+ else
+ kk = replace_next_larger(thresh, jj, kk)
+ end
+ links[kk] = [ (kk > 0) ? links[kk - 1] : nil, ii, jj ] unless kk.nil?
+ end
+ end
+
+ unless thresh.empty?
+ link = links[thresh.size - 1]
+ while not link.nil?
+ vector[link[1]] = link[2]
+ link = link[0]
+ end
+ end
+
+ vector
+ end
+
+ # This method will analyze the provided patchset to provide a
+ # single-pass normalization (conversion of the array form of
+ # Diff::LCS::Change objects to the object form of same) and detection of
+ # whether the patchset represents changes to be made.
+ def analyze_patchset(patchset, depth = 0)
+ raise "Patchset too complex" if depth > 1
+
+ has_changes = false
+
+ # Format:
+ # [ # patchset
+ # # hunk (change)
+ # [ # hunk
+ # # change
+ # ]
+ # ]
+
+ patchset = patchset.map do |hunk|
+ case hunk
+ when Diff::LCS::Change
+ has_changes ||= !hunk.unchanged?
+ hunk
+ when Array
+ # Detect if the 'hunk' is actually an array-format
+ # Change object.
+ if Diff::LCS::Change.valid_action? hunk[0]
+ hunk = Diff::LCS::Change.from_a(hunk)
+ has_changes ||= !hunk.unchanged?
+ hunk
+ else
+ with_changes, hunk = analyze_patchset(hunk, depth + 1)
+ has_changes ||= with_changes
+ hunk
+ end
+ else
+ raise ArgumentError, "Cannot normalise a hunk of class #{hunk.class}."
+ end
+ end
+
+ [ has_changes, patchset ]
+ end
+
+ # Find the place at which +value+ would normally be inserted into the
+ # Enumerable. If that place is already occupied by +value+, do nothing
+ # and return +nil+. If the place does not exist (i.e., it is off the end
+ # of the Enumerable), add it to the end. Otherwise, replace the element
+ # at that point with +value+. It is assumed that the Enumerable's values
+ # are numeric.
+ #
+ # This operation preserves the sort order.
+ def replace_next_larger(enum, value, last_index = nil)
+ # Off the end?
+ if enum.empty? or (value > enum[-1])
+ enum << value
+ return enum.size - 1
+ end
+
+ # Binary search for the insertion point
+ last_index ||= enum.size
+ first_index = 0
+ while (first_index <= last_index)
+ ii = (first_index + last_index) >> 1
+
+ found = enum[ii]
+
+ if value == found
+ return nil
+ elsif value > found
+ first_index = ii + 1
+ else
+ last_index = ii - 1
+ end
+ end
+
+ # The insertion point is in first_index; overwrite the next larger
+ # value.
+ enum[first_index] = value
+ return first_index
+ end
+
+ # If +vector+ maps the matching elements of another collection onto this
+ # Enumerable, compute the inverse +vector+ that maps this Enumerable
+ # onto the collection. (Currently unused.)
+ def inverse_vector(a, vector)
+ inverse = a.dup
+ (0 ... vector.size).each do |ii|
+ inverse[vector[ii]] = ii unless vector[ii].nil?
+ end
+ inverse
+ end
+
+ # Returns a hash mapping each element of an Enumerable to the set of
+ # positions it occupies in the Enumerable, optionally restricted to the
+ # elements specified in the range of indexes specified by +interval+.
+ def position_hash(enum, interval = 0 .. -1)
+ hash = Hash.new { |hh, kk| hh[kk] = [] }
+ interval.each do |ii|
+ kk = enum.kind_of?(String) ? enum[ii, 1] : enum[ii]
+ hash[kk] << ii
+ end
+ hash
+ end
+
+ # Examine the patchset and the source to see in which direction the
+ # patch should be applied.
+ #
+ # WARNING: By default, this examines the whole patch, so this could take
+ # some time. This also works better with Diff::LCS::ContextChange or
+ # Diff::LCS::Change as its source, as an array will cause the creation
+ # of one of the above.
+ #
+ # Note: This will be deprecated as a public function in a future release.
+ def diff_direction(src, patchset, limit = nil)
+ count = left_match = left_miss = right_match = right_miss = 0
+ string = src.kind_of?(String)
+
+ patchset.each do |change|
+ count += 1
+
+ case change
+ when Diff::LCS::Change
+ # With a simplistic change, we can't tell the difference between
+ # the left and right on '!' actions, so we ignore those. On '='
+ # actions, if there's a miss, we miss both left and right.
+ element = string ? src[change.position, 1] : src[change.position]
+
+ case change.action
+ when '-'
+ if element == change.element
+ left_match += 1
+ else
+ left_miss += 1
+ end
+ when '+'
+ if element == change.element
+ right_match += 1
+ else
+ right_miss += 1
+ end
+ when '='
+ if element != change.element
+ left_miss += 1
+ right_miss += 1
+ end
+ end
+ when Diff::LCS::ContextChange
+ case change.action
+ when '-' # Remove details from the old string
+ element = string ? src[change.old_position, 1] : src[change.old_position]
+
+ if element == change.old_element
+ left_match += 1
+ else
+ left_miss += 1
+ end
+ when '+'
+ element = string ? src[change.new_position, 1] : src[change.new_position]
+ if element == change.new_element
+ right_match += 1
+ else
+ right_miss += 1
+ end
+ when '='
+ le = string ? src[change.old_position, 1] : src[change.old_position]
+ re = string ? src[change.new_position, 1] : src[change.new_position]
+
+ left_miss += 1 if le != change.old_element
+ right_miss += 1 if re != change.new_element
+ when '!'
+ element = string ? src[change.old_position, 1] : src[change.old_position]
+ if element == change.old_element
+ left_match += 1
+ else
+ element = string ? src[change.new_position, 1] : src[change.new_position]
+ if element == change.new_element
+ right_match += 1
+ else
+ left_miss += 1
+ right_miss += 1
+ end
+ end
+ end
+ end
+
+ break if (not limit.nil?) && (count > limit)
+ end
+
+ if left_match.zero?
+ end
+
+ no_left = (left_match == 0) && (left_miss >= 0)
+ no_right = (right_match == 0) && (right_miss >= 0)
+
+ direction = case [no_left, no_right]
+ when [false, true]
+ :patch
+ when [true, false]
+ :unpatch
+ else
+ raise "The provided patchset does not appear to apply to the provided value as either source or destination value."
+ end
+ end
+ end
+end
diff --git a/spec/lcs_spec.rb b/spec/lcs_spec.rb
index 0fc077e..addc2f7 100644
--- a/spec/lcs_spec.rb
+++ b/spec/lcs_spec.rb
@@ -2,11 +2,11 @@
require 'spec_helper'
-describe "Diff::LCS.__lcs" do
+describe "Diff::LCS::Internals.lcs" do
include Diff::LCS::SpecHelper::Matchers
it "should return a meaningful LCS array with (seq1, seq2)" do
- res = Diff::LCS.__lcs(seq1, seq2)
+ res = Diff::LCS::Internals.lcs(seq1, seq2)
# The result of the LCS (less the +nil+ values) must be as long as the
# correct result.
res.compact.size.should == correct_lcs.size
@@ -21,11 +21,11 @@ describe "Diff::LCS.__lcs" do
end
it "should return all indexes with (hello, hello)" do
- Diff::LCS.__lcs(hello, hello).should == (0...hello.size).to_a
+ Diff::LCS::Internals.lcs(hello, hello).should == (0...hello.size).to_a
end
it "should return all indexes with (hello_ary, hello_ary)" do
- Diff::LCS.__lcs(hello_ary, hello_ary).should == (0...hello_ary.size).to_a
+ Diff::LCS::Internals.lcs(hello_ary, hello_ary).should == (0...hello_ary.size).to_a
end
end
diff --git a/spec/patch_spec.rb b/spec/patch_spec.rb
index ad160c9..72bfe85 100644
--- a/spec/patch_spec.rb
+++ b/spec/patch_spec.rb
@@ -320,7 +320,7 @@ describe "Diff::LCS.patch" do
Diff::LCS.patch(@s2, @patch_set_s1_s2).should == @s1
end
- it "should correctly patch left-to-right (explicit patch)" do
+ it "should correctly patch left-to-right (explicit patch)", :only => true do
Diff::LCS.patch(@s1, @patch_set_s1_s2, :patch).should == @s2
Diff::LCS.patch(@s2, @patch_set_s2_s1, :patch).should == @s1
Diff::LCS.patch!(@s1, @patch_set_s1_s2).should == @s2