diff options
author | Austin Ziegler <austin@surfeasy.com> | 2012-03-24 00:35:04 -0400 |
---|---|---|
committer | Austin Ziegler <austin@surfeasy.com> | 2012-03-24 00:42:25 -0400 |
commit | 2151e2093dfd00d6aff72f49d13a137efeed2e53 (patch) | |
tree | a54fba041abf07cfe1c43c0652efc7c34b153eb9 | |
parent | a341ac7ca261a73e37c21a7f034f5892ea7dcde4 (diff) | |
download | diff-lcs-2151e2093dfd00d6aff72f49d13a137efeed2e53.tar.gz |
Starting on a substantial refactoring to fix bugs.
1. All Diff::LCS::__* methods are now moved to Diff::LCS::Internals and
named without the underscores.
2. Fixed the documentation for Diff::LCS::patch. (Some formatting may
have been broken.)
3. Eliminated normalize_patchset in favour of the (substantially
similar) analyze_patchset. This allows for a single-pass over a
patchset to determine whether the patchset contains changes and
transform it to the object-based format.
4. Made Diff::LCS::ContextChange descend from Diff::LCS::Change. This
will probably change so that both descend from something like a
Diff::LCS::AbstractChange because of some issues that may arise with
case statement order.
-rw-r--r-- | lib/diff/lcs.rb | 398 | ||||
-rw-r--r-- | lib/diff/lcs/change.rb | 209 | ||||
-rw-r--r-- | lib/diff/lcs/internals.rb | 271 | ||||
-rw-r--r-- | spec/lcs_spec.rb | 8 | ||||
-rw-r--r-- | spec/patch_spec.rb | 2 |
5 files changed, 445 insertions, 443 deletions
diff --git a/lib/diff/lcs.rb b/lib/diff/lcs.rb index 7218494..2af2a4e 100644 --- a/lib/diff/lcs.rb +++ b/lib/diff/lcs.rb @@ -134,6 +134,7 @@ module Diff end require 'diff/lcs/callbacks' +require 'diff/lcs/internals' module Diff::LCS # Returns an Array containing the longest common subsequence(s) between @@ -213,7 +214,7 @@ module Diff::LCS # +seq1+ in turn and may be modified before they are placed into the # returned Array of subsequences. def LCS(seq1, seq2, &block) #:yields seq1[ii] for each matched: - matches = Diff::LCS.__lcs(seq1, seq2) + matches = Diff::LCS::Internals.lcs(seq1, seq2) ret = [] matches.each_with_index do |ee, ii| unless matches[ii].nil? @@ -379,7 +380,7 @@ module Diff::LCS # sequence is reached, if +a+ has not yet reached the end of +A+ or +b+ # has not yet reached the end of +B+. def traverse_sequences(seq1, seq2, callbacks = Diff::LCS::SequenceCallbacks, &block) #:yields change events: - matches = Diff::LCS.__lcs(seq1, seq2) + matches = Diff::LCS::Internals.lcs(seq1, seq2) run_finished_a = run_finished_b = false string = seq1.kind_of?(String) @@ -569,7 +570,7 @@ module Diff::LCS # +a+ and +b+ are considered to be pointing to matching or changed # elements. def traverse_balanced(seq1, seq2, callbacks = Diff::LCS::BalancedCallbacks) - matches = Diff::LCS.__lcs(seq1, seq2) + matches = Diff::LCS::Internals.lcs(seq1, seq2) a_size = seq1.size b_size = seq2.size ai = bj = mb = 0 @@ -676,34 +677,67 @@ module Diff::LCS :unpatch => { '+' => '-', '-' => '+', '!' => '!', '=' => '=' } } - # Given a patchset, convert the current version to the new - # version. If +direction+ is not specified (must be - # <tt>:patch</tt> or <tt>:unpatch</tt>), then discovery of the - # direction of the patch will be attempted. + # Applies a +patchset+ to the sequence +src+ according to the + # +direction+ (<tt>:patch</tt> or <tt>:unpatch</tt>). # - # If the patchset is empty or all 'unchanged', the src value will be - # returned as either <tt>src.dup</tt> or <tt>src</tt>. + # If the +direction+ is not specified, Diff::LCS::patch will attempt to + # discover the direction of the +patchset+. + # + # A +patchset+ can be considered to apply forward (<tt>:patch</tt>) if + # the following expression is true: + # + # patch(s1, diff(s1, s2)) -> s2 + # + # A +patchset+ can be considered to apply backward (<tt>:unpatch</tt>) + # if the following expression is true: + # + # patch(s2, diff(s1, s2)) -> s1 + # + # If the +patchset+ contains no changes, the +src+ value will be + # returned as either <tt>src.dup</tt> or +src+. A +patchset+ can be + # deemed as having no changes if the following predicate returns true: + # + # patchset.empty? or + # patchset.flatten.all? { |change| change.unchanged? } + # + # === Patchsets + # A +patchset+ is always an enumerable sequence of changes, hunks of + # changes, or a mix of the two. A hunk of changes is an enumerable + # sequence of changes: + # + # [ # patchset + # # change + # [ # hunk + # # change + # ] + # ] + # + # The +patch+ method accepts <tt>patchset</tt>s that are enumerable + # sequences containing either Diff::LCS::Change objects (or a subclass) + # or the array representations of those objects. Prior to application, + # array representations of Diff::LCS::Change objects will be reified. def patch(src, patchset, direction = nil) - patchset = __normalize_patchset(patchset) + # Normalize the patchset. + has_changes, patchset = Diff::LCS::Internals.analyze_patchset(patchset) - if patchset.empty? or patchset.all? { |ps| ps.unchanged? } + if not has_changes return src.dup if src.respond_to? :dup return src end string = src.kind_of?(String) - # Start with a new empty type of the source's class + # Start with a new empty type of the source's class res = src.class.new - # Normalize the patchset. - - direction ||= Diff::LCS.__diff_direction(src, patchset) || :patch + direction ||= Diff::LCS.__diff_direction(src, patchset) ai = bj = 0 - patchset.each do |change| - # Both Change and ContextChange support #action - action = PATCH_MAP[direction][change.action] + patch_map = PATCH_MAP[direction] + + patchset.flatten.each do |change| + # Both Change and ContextChange support #action + action = patch_map[change.action] case change when Diff::LCS::ContextChange @@ -733,12 +767,12 @@ module Diff::LCS bj += 1 end - res << el - bj += 1 + res << el + bj += 1 when '=' - # This only appears in sdiff output with the SDiff callback. - # Therefore, we only need to worry about dealing with a single - # element. + # This only appears in sdiff output with the SDiff callback. + # Therefore, we only need to worry about dealing with a single + # element. res << el ai += 1 @@ -750,10 +784,10 @@ module Diff::LCS bj += 1 end - bj += 1 - ai += 1 + bj += 1 + ai += 1 - res << el + res << el end when Diff::LCS::Change case action @@ -798,318 +832,6 @@ module Diff::LCS def patch!(src, patchset) Diff::LCS.patch(src, patchset, :patch) end - - # Compute the longest common subsequence between the sequenced - # Enumerables +a+ and +b+. The result is an array whose contents is such - # that - # - # result = Diff::LCS.__lcs(a, b) - # result.each_with_index do |e, ii| - # assert_equal(a[ii], b[e]) unless e.nil? - # end - # - # Note: This will be deprecated as a public function in a future release. - def __lcs(a, b) - a_start = b_start = 0 - a_finish = a.size - 1 - b_finish = b.size - 1 - vector = [] - - # Prune off any common elements at the beginning... - while (a_start <= a_finish) and - (b_start <= b_finish) and - (a[a_start] == b[b_start]) - vector[a_start] = b_start - a_start += 1 - b_start += 1 - end - - # Now the end... - while (a_start <= a_finish) and - (b_start <= b_finish) and - (a[a_finish] == b[b_finish]) - vector[a_finish] = b_finish - a_finish -= 1 - b_finish -= 1 - end - - # Now, compute the equivalence classes of positions of elements. - b_matches = Diff::LCS.__position_hash(b, b_start .. b_finish) - - thresh = [] - links = [] - - (a_start .. a_finish).each do |ii| - ai = a.kind_of?(String) ? a[ii, 1] : a[ii] - bm = b_matches[ai] - kk = nil - bm.reverse_each do |jj| - if kk and (thresh[kk] > jj) and (thresh[kk - 1] < jj) - thresh[kk] = jj - else - kk = Diff::LCS.__replace_next_larger(thresh, jj, kk) - end - links[kk] = [ (kk > 0) ? links[kk - 1] : nil, ii, jj ] unless kk.nil? - end - end - - unless thresh.empty? - link = links[thresh.size - 1] - while not link.nil? - vector[link[1]] = link[2] - link = link[0] - end - end - - vector - end - - # Find the place at which +value+ would normally be inserted into the - # Enumerable. If that place is already occupied by +value+, do nothing - # and return +nil+. If the place does not exist (i.e., it is off the end - # of the Enumerable), add it to the end. Otherwise, replace the element - # at that point with +value+. It is assumed that the Enumerable's values - # are numeric. - # - # This operation preserves the sort order. - # - # Note: This will be deprecated as a public function in a future release. - def __replace_next_larger(enum, value, last_index = nil) - # Off the end? - if enum.empty? or (value > enum[-1]) - enum << value - return enum.size - 1 - end - - # Binary search for the insertion point - last_index ||= enum.size - first_index = 0 - while (first_index <= last_index) - ii = (first_index + last_index) >> 1 - - found = enum[ii] - - if value == found - return nil - elsif value > found - first_index = ii + 1 - else - last_index = ii - 1 - end - end - - # The insertion point is in first_index; overwrite the next larger - # value. - enum[first_index] = value - return first_index - end - - # If +vector+ maps the matching elements of another collection onto this - # Enumerable, compute the inverse +vector+ that maps this Enumerable - # onto the collection. (Currently unused.) - # - # Note: This will be deprecated as a public function in a future release. - def __inverse_vector(a, vector) - inverse = a.dup - (0 ... vector.size).each do |ii| - inverse[vector[ii]] = ii unless vector[ii].nil? - end - inverse - end - - # Returns a hash mapping each element of an Enumerable to the set of - # positions it occupies in the Enumerable, optionally restricted to the - # elements specified in the range of indexes specified by +interval+. - # - # Note: This will be deprecated as a public function in a future release. - def __position_hash(enum, interval = 0 .. -1) - hash = Hash.new { |hh, kk| hh[kk] = [] } - interval.each do |ii| - kk = enum.kind_of?(String) ? enum[ii, 1] : enum[ii] - hash[kk] << ii - end - hash - end - - # Examine the patchset and the source to see in which direction the - # patch should be applied. - # - # WARNING: By default, this examines the whole patch, so this could take - # some time. This also works better with Diff::LCS::ContextChange or - # Diff::LCS::Change as its source, as an array will cause the creation - # of one of the above. - # - # Note: This will be deprecated as a public function in a future release. - def __diff_direction(src, patchset, limit = nil) - count = left_match = left_miss = right_match = right_miss = 0 - string = src.kind_of?(String) - - patchset.each do |change| - count += 1 - - case change - when Diff::LCS::Change - # With a simplistic change, we can't tell the difference between - # the left and right on '!' actions, so we ignore those. On '=' - # actions, if there's a miss, we miss both left and right. - element = string ? src[change.position, 1] : src[change.position] - - case change.action - when '-' - if element == change.element - left_match += 1 - else - left_miss += 1 - end - when '+' - if element == change.element - right_match += 1 - else - right_miss += 1 - end - when '=' - if element != change.element - left_miss += 1 - right_miss += 1 - end - end - when Diff::LCS::ContextChange - case change.action - when '-' # Remove details from the old string - element = string ? src[change.old_position, 1] : src[change.old_position] - - if element == change.old_element - left_match += 1 - else - left_miss += 1 - end - when '+' - element = string ? src[change.new_position, 1] : src[change.new_position] - if element == change.new_element - right_match += 1 - else - right_miss += 1 - end - when '=' - le = string ? src[change.old_position, 1] : src[change.old_position] - re = string ? src[change.new_position, 1] : src[change.new_position] - - left_miss += 1 if le != change.old_element - right_miss += 1 if re != change.new_element - when '!' - element = string ? src[change.old_position, 1] : src[change.old_position] - if element == change.old_element - left_match += 1 - else - element = string ? src[change.new_position, 1] : src[change.new_position] - if element == change.new_element - right_match += 1 - else - left_miss += 1 - right_miss += 1 - end - end - end - end - - break if (not limit.nil?) && (count > limit) - end - - if left_match.zero? - end - - no_left = (left_match == 0) && (left_miss >= 0) - no_right = (right_match == 0) && (right_miss >= 0) - - case [no_left, no_right] - when [false, true] - return :patch - when [true, false] - return :unpatch - else - raise "The provided patchset does not appear to apply to the provided value as either source or destination value." - end - end - - # Normalize the patchset. A patchset is always a sequence of changes, but - # how those changes are represented may vary, depending on how they were - # generated. In all cases we support, we also support the array - # representation of the changes. The formats are: - # - # [ # patchset <- Diff::LCS.diff(a, b) - # [ # one or more hunks - # Diff::LCS::Change # one or more changes - # ] ] - # - # [ # patchset, equivalent to the above - # [ # one or more hunks - # [ action, line, value ] # one or more changes - # ] ] - # - # [ # patchset <- Diff::LCS.diff(a, b, Diff::LCS::ContextDiffCallbacks) - # # OR <- Diff::LCS.sdiff(a, b, Diff::LCS::ContextDiffCallbacks) - # [ # one or more hunks - # Diff::LCS::ContextChange # one or more changes - # ] ] - # - # [ # patchset, equivalent to the above - # [ # one or more hunks - # [ action, [ old line, old value ], [ new line, new value ] ] - # # one or more changes - # ] ] - # - # [ # patchset <- Diff::LCS.sdiff(a, b) - # # OR <- Diff::LCS.diff(a, b, Diff::LCS::SDiffCallbacks) - # Diff::LCS::ContextChange # one or more changes - # ] - # - # [ # patchset, equivalent to the above - # [ action, [ old line, old value ], [ new line, new value ] ] - # # one or more changes - # ] - # - # The result of this will be either of the following. - # - # [ # patchset - # Diff::LCS::ContextChange # one or more changes - # ] - # - # [ # patchset - # Diff::LCS::Change # one or more changes - # ] - # - # If either of the above is provided, it will be returned as such. - # - # Note: This will be deprecated as a public function in a future release. - def __normalize_patchset(patchset) - patchset.map do |hunk| - case hunk - when Diff::LCS::ContextChange, Diff::LCS::Change - hunk - when Array - if (not hunk[0].kind_of?(Array)) and hunk[1].kind_of?(Array) and hunk[2].kind_of?(Array) - Diff::LCS::ContextChange.from_a(hunk) - else - hunk.map do |change| - case change - when Diff::LCS::ContextChange, Diff::LCS::Change - change - when Array - # change[1] will ONLY be an array in a ContextChange#to_a call. - # In Change#to_a, it represents the line (singular). - if change[1].kind_of?(Array) - Diff::LCS::ContextChange.from_a(change) - else - Diff::LCS::Change.from_a(change) - end - end - end - end - else - raise ArgumentError, "Cannot normalise a hunk of class #{hunk.class}." - end - end.flatten - end end end diff --git a/lib/diff/lcs/change.rb b/lib/diff/lcs/change.rb index 5139821..50c1de9 100644 --- a/lib/diff/lcs/change.rb +++ b/lib/diff/lcs/change.rb @@ -1,59 +1,59 @@ -#! /usr/env/bin ruby -#-- -# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca> -# adapted from: -# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com> -# Smalltalk by Mario I. Wolczko <mario@wolczko.com> -# implements McIlroy-Hunt diff algorithm -# -# This program is free software. It may be redistributed and/or modified under -# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the -# Ruby licence. -# -# $Id$ -#++ +# -*- ruby encoding: utf-8 -*- # Provides Diff::LCS::Change and Diff::LCS::ContextChange. - # Centralises the change test code in Diff::LCS::Change and - # Diff::LCS::ContextChange, since it's the same for both classes. -module Diff::LCS::ChangeTypeTests - def deleting? - @action == '-' - end +# Represents a simplistic (non-contextual) change. Represents the removal or +# addition of an element from either the old or the new sequenced +# enumerable. +class Diff::LCS::Change + # The only actions valid for changes are '+' (add), '-' (delete), '=' + # (no change), '!' (changed), '<' (tail changes from first sequence), or + # '>' (tail changes from second sequence). The last two ('<>') are only + # found with Diff::LCS::diff and Diff::LCS::sdiff. + VALID_ACTIONS = %W(+ - = ! > <) - def adding? - @action == '+' + def self.valid_action?(action) + VALID_ACTIONS.include? action end - def unchanged? - @action == '=' - end + # Returns the action this Change represents. + attr_reader :action - def changed? - @action == '!' + # Returns the position of the Change. + attr_reader :position + # Returns the sequence element of the Change. + attr_reader :element + + def initialize(*args) + @action, @position, @element = *args + + unless Diff::LCS::Change.valid_action?(@action) + raise "Invalid Change Action '#{@action}'" + end + raise "Invalid Position Type" unless @position.kind_of? Fixnum end - def finished_a? - @action == '>' + def inspect + %Q(#<#{self.class.name}:#{__id__.to_s(16)} @action=#{action} position=#{position} element=#{element.inspect}) end - def finished_b? - @action == '<' + def to_a + [ @action, @position, @element ] end -end - # Represents a simplistic (non-contextual) change. Represents the removal or - # addition of an element from either the old or the new sequenced enumerable. -class Diff::LCS::Change - # Returns the action this Change represents. Can be '+' (#adding?), '-' - # (#deleting?), '=' (#unchanged?), # or '!' (#changed?). When created by - # Diff::LCS#diff or Diff::LCS#sdiff, it may also be '>' (#finished_a?) or - # '<' (#finished_b?). - attr_reader :action - attr_reader :position - attr_reader :element + def self.from_a(arr) + arr = arr.flatten + case arr.size + when 5 + Diff::LCS::ContextChange.new(*(arr[0...5])) + when 3 + Diff::LCS::Change.new(*(arr[0...3])) + else + raise "Invalid change array format provided." + end + end include Comparable + def ==(other) (self.action == other.action) and (self.position == other.position) and @@ -67,85 +67,79 @@ class Diff::LCS::Change r end - def initialize(action, position, element) - @action = action - @position = position - @element = element + def adding? + @action == '+' end - # Creates a Change from an array produced by Change#to_a. - def to_a - [@action, @position, @element] + def deleting? + @action == '-' end - def self.from_a(arr) - Diff::LCS::Change.new(arr[0], arr[1], arr[2]) + def unchanged? + @action == '=' + end + + def changed? + @action == '!' end - include Diff::LCS::ChangeTypeTests + def finished_a? + @action == '>' + end + + def finished_b? + @action == '<' + end end - # Represents a contextual change. Contains the position and values of the - # elements in the old and the new sequenced enumerables as well as the action - # taken. -class Diff::LCS::ContextChange - # Returns the action this Change represents. Can be '+' (#adding?), '-' - # (#deleting?), '=' (#unchanged?), # or '!' (#changed?). When - # created by Diff::LCS#diff or Diff::LCS#sdiff, it may also be '>' - # (#finished_a?) or '<' (#finished_b?). - attr_reader :action +# Represents a contextual change. Contains the position and values of the +# elements in the old and the new sequenced enumerables as well as the action +# taken. +class Diff::LCS::ContextChange < Diff::LCS::Change + # We don't need these two values. + undef :position + undef :element + + # Returns the old position being changed. attr_reader :old_position - attr_reader :old_element + # Returns the new position being changed. attr_reader :new_position + # Returns the old element being changed. + attr_reader :old_element + # Returns the new element being changed. attr_reader :new_element - include Comparable + def initialize(*args) + @action, @old_position, @old_element, @new_position, @new_element = *args - def ==(other) - (@action == other.action) and - (@old_position == other.old_position) and - (@new_position == other.new_position) and - (@old_element == other.old_element) and - (@new_element == other.new_element) - end - - def inspect(*args) - %Q(#<#{self.class.name}:#{__id__} @action=#{action} positions=#{old_position},#{new_position} elements=#{old_element.inspect},#{new_element.inspect}>) - end - - def <=>(other) - r = @action <=> other.action - r = @old_position <=> other.old_position if r.zero? - r = @new_position <=> other.new_position if r.zero? - r = @old_element <=> other.old_element if r.zero? - r = @new_element <=> other.new_element if r.zero? - r + unless Diff::LCS::Change.valid_action?(@action) + raise "Invalid Change Action '#{@action}'" + end + unless @old_position.nil? or @old_position.kind_of? Fixnum + raise "Invalid (Old) Position Type" + end + unless @new_position.nil? or @new_position.kind_of? Fixnum + raise "Invalid (New) Position Type" + end end - def initialize(action, old_position, old_element, new_position, new_element) - @action = action - @old_position = old_position - @old_element = old_element - @new_position = new_position - @new_element = new_element + def to_a + [ @action, + [ @old_position, @old_element ], + [ @new_position, @new_element ] + ] end - def to_a - [@action, [@old_position, @old_element], [@new_position, @new_element]] + def inspect(*args) + %Q(#<#{self.class.name}:#{__id__} @action=#{action} positions=#{old_position},#{new_position} elements=#{old_element.inspect},#{new_element.inspect}>) end - # Creates a ContextChange from an array produced by ContextChange#to_a. def self.from_a(arr) - if arr.size == 5 - Diff::LCS::ContextChange.new(arr[0], arr[1], arr[2], arr[3], arr[4]) - else - Diff::LCS::ContextChange.new(arr[0], arr[1][0], arr[1][1], arr[2][0], - arr[2][1]) - end + Diff::LCS::Change.from_a(arr) end - # Simplifies a context change for use in some diff callbacks. '<' actions - # are converted to '-' and '>' actions are converted to '+'. + # Simplifies a context change for use in some diff callbacks. '<' actions + # are converted to '-' and '>' actions are converted to '+'. def self.simplify(event) ea = event.to_a @@ -165,5 +159,20 @@ class Diff::LCS::ContextChange Diff::LCS::ContextChange.from_a(ea) end - include Diff::LCS::ChangeTypeTests + def ==(other) + (@action == other.action) and + (@old_position == other.old_position) and + (@new_position == other.new_position) and + (@old_element == other.old_element) and + (@new_element == other.new_element) + end + + def <=>(other) + r = @action <=> other.action + r = @old_position <=> other.old_position if r.zero? + r = @new_position <=> other.new_position if r.zero? + r = @old_element <=> other.old_element if r.zero? + r = @new_element <=> other.new_element if r.zero? + r + end end diff --git a/lib/diff/lcs/internals.rb b/lib/diff/lcs/internals.rb new file mode 100644 index 0000000..373a18a --- /dev/null +++ b/lib/diff/lcs/internals.rb @@ -0,0 +1,271 @@ +# -*- ruby encoding: utf-8 -*- + +module Diff::LCS::Internals # :nodoc: + class << self + # Compute the longest common subsequence between the sequenced + # Enumerables +a+ and +b+. The result is an array whose contents is such + # that + # + # result = Diff::LCS::Internals.lcs(a, b) + # result.each_with_index do |e, ii| + # assert_equal(a[ii], b[e]) unless e.nil? + # end + def lcs(a, b) + a_start = b_start = 0 + a_finish = a.size - 1 + b_finish = b.size - 1 + vector = [] + + # Prune off any common elements at the beginning... + while (a_start <= a_finish) and + (b_start <= b_finish) and + (a[a_start] == b[b_start]) + vector[a_start] = b_start + a_start += 1 + b_start += 1 + end + + # Now the end... + while (a_start <= a_finish) and + (b_start <= b_finish) and + (a[a_finish] == b[b_finish]) + vector[a_finish] = b_finish + a_finish -= 1 + b_finish -= 1 + end + + # Now, compute the equivalence classes of positions of elements. + b_matches = position_hash(b, b_start .. b_finish) + + thresh = [] + links = [] + + (a_start .. a_finish).each do |ii| + ai = a.kind_of?(String) ? a[ii, 1] : a[ii] + bm = b_matches[ai] + kk = nil + bm.reverse_each do |jj| + if kk and (thresh[kk] > jj) and (thresh[kk - 1] < jj) + thresh[kk] = jj + else + kk = replace_next_larger(thresh, jj, kk) + end + links[kk] = [ (kk > 0) ? links[kk - 1] : nil, ii, jj ] unless kk.nil? + end + end + + unless thresh.empty? + link = links[thresh.size - 1] + while not link.nil? + vector[link[1]] = link[2] + link = link[0] + end + end + + vector + end + + # This method will analyze the provided patchset to provide a + # single-pass normalization (conversion of the array form of + # Diff::LCS::Change objects to the object form of same) and detection of + # whether the patchset represents changes to be made. + def analyze_patchset(patchset, depth = 0) + raise "Patchset too complex" if depth > 1 + + has_changes = false + + # Format: + # [ # patchset + # # hunk (change) + # [ # hunk + # # change + # ] + # ] + + patchset = patchset.map do |hunk| + case hunk + when Diff::LCS::Change + has_changes ||= !hunk.unchanged? + hunk + when Array + # Detect if the 'hunk' is actually an array-format + # Change object. + if Diff::LCS::Change.valid_action? hunk[0] + hunk = Diff::LCS::Change.from_a(hunk) + has_changes ||= !hunk.unchanged? + hunk + else + with_changes, hunk = analyze_patchset(hunk, depth + 1) + has_changes ||= with_changes + hunk + end + else + raise ArgumentError, "Cannot normalise a hunk of class #{hunk.class}." + end + end + + [ has_changes, patchset ] + end + + # Find the place at which +value+ would normally be inserted into the + # Enumerable. If that place is already occupied by +value+, do nothing + # and return +nil+. If the place does not exist (i.e., it is off the end + # of the Enumerable), add it to the end. Otherwise, replace the element + # at that point with +value+. It is assumed that the Enumerable's values + # are numeric. + # + # This operation preserves the sort order. + def replace_next_larger(enum, value, last_index = nil) + # Off the end? + if enum.empty? or (value > enum[-1]) + enum << value + return enum.size - 1 + end + + # Binary search for the insertion point + last_index ||= enum.size + first_index = 0 + while (first_index <= last_index) + ii = (first_index + last_index) >> 1 + + found = enum[ii] + + if value == found + return nil + elsif value > found + first_index = ii + 1 + else + last_index = ii - 1 + end + end + + # The insertion point is in first_index; overwrite the next larger + # value. + enum[first_index] = value + return first_index + end + + # If +vector+ maps the matching elements of another collection onto this + # Enumerable, compute the inverse +vector+ that maps this Enumerable + # onto the collection. (Currently unused.) + def inverse_vector(a, vector) + inverse = a.dup + (0 ... vector.size).each do |ii| + inverse[vector[ii]] = ii unless vector[ii].nil? + end + inverse + end + + # Returns a hash mapping each element of an Enumerable to the set of + # positions it occupies in the Enumerable, optionally restricted to the + # elements specified in the range of indexes specified by +interval+. + def position_hash(enum, interval = 0 .. -1) + hash = Hash.new { |hh, kk| hh[kk] = [] } + interval.each do |ii| + kk = enum.kind_of?(String) ? enum[ii, 1] : enum[ii] + hash[kk] << ii + end + hash + end + + # Examine the patchset and the source to see in which direction the + # patch should be applied. + # + # WARNING: By default, this examines the whole patch, so this could take + # some time. This also works better with Diff::LCS::ContextChange or + # Diff::LCS::Change as its source, as an array will cause the creation + # of one of the above. + # + # Note: This will be deprecated as a public function in a future release. + def diff_direction(src, patchset, limit = nil) + count = left_match = left_miss = right_match = right_miss = 0 + string = src.kind_of?(String) + + patchset.each do |change| + count += 1 + + case change + when Diff::LCS::Change + # With a simplistic change, we can't tell the difference between + # the left and right on '!' actions, so we ignore those. On '=' + # actions, if there's a miss, we miss both left and right. + element = string ? src[change.position, 1] : src[change.position] + + case change.action + when '-' + if element == change.element + left_match += 1 + else + left_miss += 1 + end + when '+' + if element == change.element + right_match += 1 + else + right_miss += 1 + end + when '=' + if element != change.element + left_miss += 1 + right_miss += 1 + end + end + when Diff::LCS::ContextChange + case change.action + when '-' # Remove details from the old string + element = string ? src[change.old_position, 1] : src[change.old_position] + + if element == change.old_element + left_match += 1 + else + left_miss += 1 + end + when '+' + element = string ? src[change.new_position, 1] : src[change.new_position] + if element == change.new_element + right_match += 1 + else + right_miss += 1 + end + when '=' + le = string ? src[change.old_position, 1] : src[change.old_position] + re = string ? src[change.new_position, 1] : src[change.new_position] + + left_miss += 1 if le != change.old_element + right_miss += 1 if re != change.new_element + when '!' + element = string ? src[change.old_position, 1] : src[change.old_position] + if element == change.old_element + left_match += 1 + else + element = string ? src[change.new_position, 1] : src[change.new_position] + if element == change.new_element + right_match += 1 + else + left_miss += 1 + right_miss += 1 + end + end + end + end + + break if (not limit.nil?) && (count > limit) + end + + if left_match.zero? + end + + no_left = (left_match == 0) && (left_miss >= 0) + no_right = (right_match == 0) && (right_miss >= 0) + + direction = case [no_left, no_right] + when [false, true] + :patch + when [true, false] + :unpatch + else + raise "The provided patchset does not appear to apply to the provided value as either source or destination value." + end + end + end +end diff --git a/spec/lcs_spec.rb b/spec/lcs_spec.rb index 0fc077e..addc2f7 100644 --- a/spec/lcs_spec.rb +++ b/spec/lcs_spec.rb @@ -2,11 +2,11 @@ require 'spec_helper' -describe "Diff::LCS.__lcs" do +describe "Diff::LCS::Internals.lcs" do include Diff::LCS::SpecHelper::Matchers it "should return a meaningful LCS array with (seq1, seq2)" do - res = Diff::LCS.__lcs(seq1, seq2) + res = Diff::LCS::Internals.lcs(seq1, seq2) # The result of the LCS (less the +nil+ values) must be as long as the # correct result. res.compact.size.should == correct_lcs.size @@ -21,11 +21,11 @@ describe "Diff::LCS.__lcs" do end it "should return all indexes with (hello, hello)" do - Diff::LCS.__lcs(hello, hello).should == (0...hello.size).to_a + Diff::LCS::Internals.lcs(hello, hello).should == (0...hello.size).to_a end it "should return all indexes with (hello_ary, hello_ary)" do - Diff::LCS.__lcs(hello_ary, hello_ary).should == (0...hello_ary.size).to_a + Diff::LCS::Internals.lcs(hello_ary, hello_ary).should == (0...hello_ary.size).to_a end end diff --git a/spec/patch_spec.rb b/spec/patch_spec.rb index ad160c9..72bfe85 100644 --- a/spec/patch_spec.rb +++ b/spec/patch_spec.rb @@ -320,7 +320,7 @@ describe "Diff::LCS.patch" do Diff::LCS.patch(@s2, @patch_set_s1_s2).should == @s1 end - it "should correctly patch left-to-right (explicit patch)" do + it "should correctly patch left-to-right (explicit patch)", :only => true do Diff::LCS.patch(@s1, @patch_set_s1_s2, :patch).should == @s2 Diff::LCS.patch(@s2, @patch_set_s2_s1, :patch).should == @s1 Diff::LCS.patch!(@s1, @patch_set_s1_s2).should == @s2 |