summaryrefslogtreecommitdiff
path: root/Lib/difflib.py
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2003-06-08 11:07:08 +0000
committerRaymond Hettinger <python@rcn.com>2003-06-08 11:07:08 +0000
commit712cd62fbaa530ce6042fe9d9b3716ee0697142e (patch)
tree0af2e1349b5fa509cec2b69f0509d580bd99fa6c /Lib/difflib.py
parent1633e89eea836b96874aa5f8c8ae196f79c96884 (diff)
downloadcpython-712cd62fbaa530ce6042fe9d9b3716ee0697142e.tar.gz
Added functions for creating context diffs and unified diffs.
Documentation update and NEWS item are forthcoming.
Diffstat (limited to 'Lib/difflib.py')
-rw-r--r--Lib/difflib.py201
1 files changed, 200 insertions, 1 deletions
diff --git a/Lib/difflib.py b/Lib/difflib.py
index b09348f0c8..202b815feb 100644
--- a/Lib/difflib.py
+++ b/Lib/difflib.py
@@ -6,12 +6,18 @@ Module difflib -- helpers for computing deltas between objects.
Function get_close_matches(word, possibilities, n=3, cutoff=0.6):
Use SequenceMatcher to return list of the best "good enough" matches.
+Function context_diff(a, b):
+ For two lists of strings, return a delta in context diff format.
+
Function ndiff(a, b):
Return a delta: the difference between `a` and `b` (lists of strings).
Function restore(delta, which):
Return one of the two sequences that generated an ndiff delta.
+Function unified_diff(a, b):
+ For two lists of strings, return a delta in unified diff format.
+
Class SequenceMatcher:
A flexible class for comparing pairs of sequences of any type.
@@ -20,7 +26,8 @@ Class Differ:
"""
__all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
- 'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK']
+ 'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff',
+ 'unified_diff']
class SequenceMatcher:
@@ -532,6 +539,54 @@ class SequenceMatcher:
answer.append( ('equal', ai, i, bj, j) )
return answer
+ def get_grouped_opcodes(self, n=3):
+ """ Isolate change clusters by eliminating ranges with no changes.
+
+ Return a generator of groups with upto n lines of context.
+ Each group is in the same format as returned by get_opcodes().
+
+ >>> from pprint import pprint
+ >>> a = map(str, range(1,40))
+ >>> b = a[:]
+ >>> b[8:8] = ['i'] # Make an insertion
+ >>> b[20] += 'x' # Make a replacement
+ >>> b[23:28] = [] # Make a deletion
+ >>> b[30] += 'y' # Make another replacement
+ >>> pprint(list(SequenceMatcher(None,a,b).get_grouped_opcodes()))
+ [[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)],
+ [('equal', 16, 19, 17, 20),
+ ('replace', 19, 20, 20, 21),
+ ('equal', 20, 22, 21, 23),
+ ('delete', 22, 27, 23, 23),
+ ('equal', 27, 30, 23, 26)],
+ [('equal', 31, 34, 27, 30),
+ ('replace', 34, 35, 30, 31),
+ ('equal', 35, 38, 31, 34)]]
+ """
+
+ codes = self.get_opcodes()
+ # Fixup leading and trailing groups if they show no changes.
+ if codes[0][0] == 'equal':
+ tag, i1, i2, j1, j2 = codes[0]
+ codes[0] = tag, max(i1, i2-n), i2, max(j1, j2-n), j2
+ if codes[-1][0] == 'equal':
+ tag, i1, i2, j1, j2 = codes[-1]
+ codes[-1] = tag, i1, min(i2, i1+n), j1, min(j2, j1+n)
+
+ nn = n + n
+ group = []
+ for tag, i1, i2, j1, j2 in codes:
+ # End the current group and start a new one whenever
+ # there is a large range with no changes.
+ if tag == 'equal' and i2-i1 > nn:
+ group.append((tag, i1, min(i2, i1+n), j1, min(j2, j1+n)))
+ yield group
+ group = []
+ i1, j1 = max(i1, i2-n), max(j1, j2-n)
+ group.append((tag, i1, i2, j1 ,j2))
+ if group and not (len(group)==1 and group[0][0] == 'equal'):
+ yield group
+
def ratio(self):
"""Return a measure of the sequences' similarity (float in [0,1]).
@@ -1042,6 +1097,150 @@ def IS_CHARACTER_JUNK(ch, ws=" \t"):
del re
+
+def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
+ tofiledate='', n=3, lineterm='\n'):
+ r"""
+ Compare two sequences of lines; generate the delta as a unified diff.
+
+ Unified diffs are a compact way of showing line changes and a few
+ lines of context. The number of context lines is set by 'n' which
+ defaults to three.
+
+ By default, the diff control lines (those with *** or ---) are
+ created with a trailing newline. This is helpful so that inputs
+ created from file.readlines() result in diffs that are suitable for
+ file.writelines() since both the inputs and outputs have trailing
+ newlines.
+
+ For inputs that do not have trailing newlines, set the lineterm
+ argument to "" so that the output will be uniformly newline free.
+
+ The unidiff format normally has a header for filenames and modification
+ times. Any or all of these may be specified using strings for
+ 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification
+ times are normally expressed in the format returned by time.ctime().
+
+ Example:
+
+ >>> for line in unified_diff('one two three four'.split(),
+ ... 'zero one tree four'.split(), 'Original', 'Current',
+ ... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',
+ ... lineterm=''):
+ ... print line
+ --- Original Sat Jan 26 23:30:50 1991
+ +++ Current Fri Jun 06 10:20:52 2003
+ @@ -1,4 +1,4 @@
+ +zero
+ one
+ -two
+ -three
+ +tree
+ four
+ """
+
+ started = False
+ for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
+ if not started:
+ yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm)
+ yield '+++ %s %s%s' % (tofile, tofiledate, lineterm)
+ started = True
+ i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
+ yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)
+ for tag, i1, i2, j1, j2 in group:
+ if tag == 'equal':
+ for line in a[i1:i2]:
+ yield ' ' + line
+ continue
+ if tag == 'replace' or tag == 'delete':
+ for line in a[i1:i2]:
+ yield '-' + line
+ if tag == 'replace' or tag == 'insert':
+ for line in b[j1:j2]:
+ yield '+' + line
+
+# See http://www.unix.org/single_unix_specification/
+def context_diff(a, b, fromfile='', tofile='',
+ fromfiledate='', tofiledate='', n=3, lineterm='\n'):
+ r"""
+ Compare two sequences of lines; generate the delta as a context diff.
+
+ Context diffs are a compact way of showing line changes and a few
+ lines of context. The number of context lines is set by 'n' which
+ defaults to three.
+
+ By default, the diff control lines (those with *** or ---) are
+ created with a trailing newline. This is helpful so that inputs
+ created from file.readlines() result in diffs that are suitable for
+ file.writelines() since both the inputs and outputs have trailing
+ newlines.
+
+ For inputs that do not have trailing newlines, set the lineterm
+ argument to "" so that the output will be uniformly newline free.
+
+ The context diff format normally has a header for filenames and
+ modification times. Any or all of these may be specified using
+ strings for 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
+ The modification times are normally expressed in the format returned
+ by time.ctime(). If not specified, the strings default to blanks.
+
+ Example:
+
+ >>> print ''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(1),
+ ... 'zero\none\ntree\nfour\n'.splitlines(1), 'Original', 'Current',
+ ... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:22:46 2003')),
+ *** Original Sat Jan 26 23:30:50 1991
+ --- Current Fri Jun 06 10:22:46 2003
+ ***************
+ *** 1,4 ****
+ one
+ ! two
+ ! three
+ four
+ --- 1,4 ----
+ + zero
+ one
+ ! tree
+ four
+ """
+
+ started = False
+ prefixmap = dict(insert='+ ', delete='- ', replace='! ', equal=' ')
+ for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
+ if not started:
+ yield '*** %s %s%s' % (fromfile, fromfiledate, lineterm)
+ yield '--- %s %s%s' % (tofile, tofiledate, lineterm)
+ started = True
+ yield '***************%s' % (lineterm,)
+ if group[-1][2] - group[0][1] >= 2:
+ yield '*** %d,%d ****%s' % (group[0][1]+1, group[-1][2], lineterm)
+ else:
+ yield '*** %d ****%s' % (group[-1][2], lineterm)
+ empty = True
+ for tag, i1, i2, j1, j2 in group:
+ if tag == 'replace' or tag == 'delete':
+ empty = False
+ break
+ if not empty:
+ for tag, i1, i2, j1, j2 in group:
+ if tag != 'insert':
+ for line in a[i1:i2]:
+ yield prefixmap[tag] + line
+ if group[-1][4] - group[0][3] >= 2:
+ yield '--- %d,%d ----%s' % (group[0][3]+1, group[-1][4], lineterm)
+ else:
+ yield '--- %d ----%s' % (group[-1][4], lineterm)
+ empty = True
+ for tag, i1, i2, j1, j2 in group:
+ if tag == 'replace' or tag == 'insert':
+ empty = False
+ break
+ if not empty:
+ for tag, i1, i2, j1, j2 in group:
+ if tag != 'delete':
+ for line in b[j1:j2]:
+ yield prefixmap[tag] + line
+
def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK):
r"""
Compare `a` and `b` (lists of strings); return a `Differ`-style delta.