summaryrefslogtreecommitdiff
path: root/sandbox/rstdiff/studies/diff.py
diff options
context:
space:
mode:
authorsmerten <smerten@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2010-04-11 13:57:17 +0000
committersmerten <smerten@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2010-04-11 13:57:17 +0000
commitbc3a5b94ea82e730edeb42b6abce8bd987b83488 (patch)
tree261406f13880ed64b5d8f671a26d299247d4a538 /sandbox/rstdiff/studies/diff.py
parent913868fce935b220fea7da0879b99a59acf2dd56 (diff)
downloaddocutils-bc3a5b94ea82e730edeb42b6abce8bd987b83488.tar.gz
First studies for an implmentation of `rstdiff`.
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@6306 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'sandbox/rstdiff/studies/diff.py')
-rw-r--r--sandbox/rstdiff/studies/diff.py165
1 files changed, 165 insertions, 0 deletions
diff --git a/sandbox/rstdiff/studies/diff.py b/sandbox/rstdiff/studies/diff.py
new file mode 100644
index 000000000..8dea6c1db
--- /dev/null
+++ b/sandbox/rstdiff/studies/diff.py
@@ -0,0 +1,165 @@
+# Studies for using difflib on structures
+
+from difflib import SequenceMatcher
+
+from hashable import HashableImpl
+
+aI = ( 1, 2, 3, 5, 6, 7, )
+bI = ( 2, 3, 4, 8, 6 )
+
+sm = SequenceMatcher(None, aI, bI)
+
+# print(sm.find_longest_match(0, len(aI), 0, len(bI)))
+# print(sm.get_matching_blocks())
+# print(list(sm.get_grouped_opcodes(0))) # Useful for context diffs
+# print(sm.get_opcodes()) # The way to go
+
+class TreeNode(object):
+
+ tag = None
+ children = ( )
+
+ def __init__(self, tag, children=( )):
+ self.tag = tag
+ self.children = children
+
+class HashableTreeNodeImpl(HashableImpl):
+
+ """Consider only the root node (or include the children)."""
+ # TODO Needs a push/pop methods
+ rootOnly = False
+
+ def __init__(self):
+ super(self.__class__, self).__init__(TreeNode)
+
+ def impl__hash__(self, this):
+ rootHash = self.rootHash(this)
+ if self.rootOnly:
+ return rootHash
+ else:
+ return rootHash + self.childrenHash(this)
+
+ def impl__eq__(self, this, other):
+ if not self.rootEq(this, other):
+ return False
+ if self.rootOnly:
+ return True
+ return self.childrenEq(this, other)
+
+ def rootHash(self, this):
+ """Return a hash for the root only."""
+ return hash(this.tag)
+
+ def childrenHash(self, this):
+ """Return a hash for the children only."""
+ return reduce(lambda x, y: x + y,
+ [ hash(child)
+ for child in this.children ], 0)
+
+ def rootEq(self, this, other):
+ """True if the two nodes as roots are equal without considering their
+ children. This should be true if one node can be replaced by
+ the other and all changes can be represented without changing
+ the node itself."""
+ return type(this) == type(other) and this.tag == other.tag
+
+ def childrenEq(self, this, other):
+ """True if the children of the two nodes are equal without considering
+ the root features."""
+ if len(this.children) != len(other.children):
+ return False
+ for i in xrange(len(this.children)):
+ if not (this.children[i] == other.children[i]):
+ return False
+ return hashableImpl
+
+# To generate a diff tree:
+#
+# * ``equal`` opcodes need a copy
+#
+# * ``insert`` and ``delete`` opcodes must be processed as such
+#
+# * ``replace`` opcodes need to be analyzed recursively to find a
+# minimal set of changes
+
+def resolveDeepReplace(hashableImpl, opcodes, a, b):
+ """Resolves ``replace`` elements in `opcodes` pertaining to `a` and
+ `b`. Returns opcodes including nested elements for these cases.
+ `hashableImpl` is the `HashableImpl` used to control the hashable
+ feature."""
+ result = [ ]
+ for i in xrange(len(opcodes)):
+ ( opcode, aBeg, aEnd, bBeg, bEnd ) = opcodes[i]
+ if opcode != 'replace':
+ result.append(opcodes[i])
+ continue
+ try:
+ savedRootOnly = hashableImpl.rootOnly
+ hashableImpl.rootOnly = True
+ sm = SequenceMatcher(None, a[aBeg:aEnd], b[bBeg:bEnd])
+ rootOpcodes = sm.get_opcodes()
+ for j in xrange(len(rootOpcodes)):
+ ( subOpcode, aSubBeg, aSubEnd, bSubBeg, bSubEnd ) = rootOpcodes[j]
+ if subOpcode != 'equal':
+ result.append(( subOpcode, aBeg + aSubBeg, aBeg + aSubEnd,
+ bBeg + bSubBeg, bBeg + bSubEnd, ))
+ else:
+ for k in xrange(aSubEnd - aSubBeg):
+ aIdx = aBeg + aSubBeg + k
+ bIdx = bBeg + bSubBeg + k
+ result.append(('descend',
+ aIdx, aIdx + 1, bIdx, bIdx + 1,
+ resolveRootEqual(hashableImpl,
+ a[aIdx], b[bIdx]), ))
+ finally:
+ hashableImpl.rootOnly = savedRootOnly
+ return result
+
+def resolveRootEqual(hashableImpl, aElem, bElem):
+ """Considers children of `aElem` and `bElem` which have equal roots.
+ Returns opcodes for the children. `hashableImpl` is the
+ `HashableImpl` used to control the hashable feature."""
+ a = aElem.children
+ b = bElem.children
+ try:
+ savedRootOnly = hashableImpl.rootOnly
+ hashableImpl.rootOnly = False
+ sm = SequenceMatcher(None, a, b)
+ nestedOpcodes = sm.get_opcodes()
+ return resolveDeepReplace(hashableImpl, nestedOpcodes, a, b)
+ finally:
+ hashableImpl.rootOnly = savedRootOnly
+
+hashableImpl = HashableTreeNodeImpl()
+
+aT = ( TreeNode('first'),
+ TreeNode('second', (
+ TreeNode('second.first'),
+ )),
+ TreeNode('third', (
+ TreeNode(2),
+ )),
+ )
+
+bT = ( TreeNode('first'),
+ TreeNode('second', (
+ TreeNode('second.first', (
+ TreeNode('second.first.first'),
+ )),
+ TreeNode('second.second'),
+ )),
+ TreeNode('second1', (
+ TreeNode(2),
+ )),
+ TreeNode('third', (
+ TreeNode(2),
+ )),
+ TreeNode('fourth'),
+ )
+
+sm = SequenceMatcher(None, aT, bT)
+top = sm.get_opcodes()
+print(top)
+print('---')
+# Use a pseudo root
+print(resolveRootEqual(hashableImpl, TreeNode(None, aT), TreeNode(None, bT)))