summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsmerten <smerten@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2010-05-24 11:07:53 +0000
committersmerten <smerten@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2010-05-24 11:07:53 +0000
commit962063737d31621c46a7e5da8d1d49c1abd3b12c (patch)
treee41cbe9a3d7ae7959eb09f782cd1a7444a8ba866
parentab564b94752831465ff705dba7abe508df3b3d9e (diff)
downloaddocutils-962063737d31621c46a7e5da8d1d49c1abd3b12c.tar.gz
Added basic script including some infrastructure.
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@6329 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
-rw-r--r--sandbox/rstdiff/global.log161
-rwxr-xr-xsandbox/rstdiff/rstdiff.py265
-rw-r--r--sandbox/rstdiff/studies/diff.py84
-rw-r--r--sandbox/rstdiff/studies/hashable.py167
-rw-r--r--sandbox/rstdiff/tag.log2
-rw-r--r--sandbox/rstdiff/treediff/__init__.py294
6 files changed, 745 insertions, 228 deletions
diff --git a/sandbox/rstdiff/global.log b/sandbox/rstdiff/global.log
index aa26464d5..4766be9fe 100644
--- a/sandbox/rstdiff/global.log
+++ b/sandbox/rstdiff/global.log
@@ -1,4 +1,165 @@
**************************************
+Date: Mon May 24 12:52:09 CEST 2010
+Author: stefan
+Tag: rstdiff_1_25
+
+--------------------------------------
+Update of /home/stefan/vault/sm/rstdiff
+In directory rosalu:/home/stefan/free/rstdiff
+
+Modified Files:
+ rstdiff.py
+
+--------------------------------------
+Log Message:
+Completed infrastructure.
+**************************************
+Date: Sun May 23 13:55:05 CEST 2010
+Author: stefan
+Tag: rstdiff_1_24
+
+--------------------------------------
+Update of /home/stefan/vault/sm/rstdiff
+In directory rosalu:/home/stefan/free/rstdiff
+
+Modified Files:
+ rstdiff.py
+
+--------------------------------------
+Update of /home/stefan/vault/sm/rstdiff/treediff
+In directory rosalu:/home/stefan/free/rstdiff/treediff
+
+Modified Files:
+ __init__.py
+
+--------------------------------------
+Log Message:
+First steps towards a hashable implementation for Docutils nodes.
+**************************************
+Date: Sun May 16 14:45:35 CEST 2010
+Author: stefan
+Tag: rstdiff_1_23
+
+--------------------------------------
+Update of /home/stefan/vault/sm/rstdiff
+In directory rosalu:/home/stefan/free/rstdiff
+
+Modified Files:
+ rstdiff.py
+
+--------------------------------------
+Update of /home/stefan/vault/sm/rstdiff/studies
+In directory rosalu:/home/stefan/free/rstdiff/studies
+
+Modified Files:
+ diff.py hashable.py
+
+--------------------------------------
+Update of /home/stefan/vault/sm/rstdiff/treediff
+In directory rosalu:/home/stefan/free/rstdiff/treediff
+
+Added Files:
+ __init__.py
+
+--------------------------------------
+Log Message:
+Moved core of the studies to real package.
+**************************************
+Date: Sun May 16 14:44:48 CEST 2010
+Author: stefan
+
+--------------------------------------
+Update of /home/stefan/vault/sm/rstdiff/treediff
+In directory rosalu:/home/stefan/free/rstdiff/treediff
+
+
+--------------------------------------
+Log Message:
+Directory /home/stefan/vault/sm/rstdiff/treediff added to the repository
+**************************************
+Date: Thu May 13 14:32:49 CEST 2010
+Author: stefan
+Tag: rstdiff_1_22
+
+--------------------------------------
+Update of /home/stefan/vault/sm/rstdiff
+In directory rosalu:/home/stefan/free/rstdiff
+
+Modified Files:
+ rstdiff.py
+
+--------------------------------------
+Log Message:
+Writer is configured properly.
+**************************************
+Date: Sun May 9 14:38:14 CEST 2010
+Author: stefan
+Tag: rstdiff_1_21
+
+--------------------------------------
+Update of /home/stefan/vault/sm/rstdiff
+In directory rosalu:/home/stefan/free/rstdiff
+
+Modified Files:
+ rstdiff.py
+
+--------------------------------------
+Log Message:
+Input files are parsed.
+**************************************
+Date: Sun May 9 14:22:11 CEST 2010
+Author: stefan
+Tag: rstdiff_1_20
+
+--------------------------------------
+Update of /home/stefan/vault/sm/rstdiff
+In directory rosalu:/home/stefan/free/rstdiff
+
+Modified Files:
+ rstdiff.py
+
+--------------------------------------
+Log Message:
+Command line processing ok.
+**************************************
+Date: Sun May 9 10:25:10 CEST 2010
+Author: stefan
+Tag: rstdiff_1_19
+
+--------------------------------------
+Update of /home/stefan/vault/sm/rstdiff
+In directory rosalu:/home/stefan/free/rstdiff
+
+Added Files:
+ rstdiff.py
+
+--------------------------------------
+Update of /home/stefan/vault/sm/rstdiff/studies
+In directory rosalu:/home/stefan/free/rstdiff/studies
+
+Modified Files:
+ diff.py
+
+--------------------------------------
+Log Message:
+Added `rstdiff.py`.
+**************************************
+Date: Sat May 8 12:56:02 CEST 2010
+Author: stefan
+Tag: rstdiff_1_18
+
+--------------------------------------
+Update of /home/stefan/vault/sm/rstdiff/studies
+In directory rosalu:/home/stefan/free/rstdiff/studies
+
+Modified Files:
+ diff.py
+
+--------------------------------------
+Log Message:
+Continued studies. There is a class `TreeMatcher` similar to
+`SequenceMatcher`.
+**************************************
Date: Sun Apr 25 12:14:20 CEST 2010
Author: stefan
Tag: rstdiff_1_17
diff --git a/sandbox/rstdiff/rstdiff.py b/sandbox/rstdiff/rstdiff.py
new file mode 100755
index 000000000..a2af0b064
--- /dev/null
+++ b/sandbox/rstdiff/rstdiff.py
@@ -0,0 +1,265 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2010 Stefan Merten
+
+# rstdiff.py is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 2 of the License,
+# or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+"""
+Generates a structural diff from two reStructuredText input documents
+and produces an annotated result.
+"""
+
+__docformat__ = 'reStructuredText'
+
+try:
+ import locale
+ locale.setlocale(locale.LC_ALL, '')
+except:
+ pass
+
+import os, re, sys
+
+import docutils
+from docutils import frontend, writers, nodes, SettingsSpec
+from docutils.core import Publisher
+from docutils.utils import SystemMessage, Reporter, new_reporter
+from docutils.frontend import OptionParser, make_paths_absolute
+from docutils.nodes import Node
+
+from treediff import TreeMatcher, HashableNodeImpl
+
+###############################################################################
+# Command line specification
+
+description = ("""Generates a structural diff from two reStructuredText input
+documents and produces an annotated result. """)
+
+writerOption = 'writer'
+writerDefault = 'xml'
+writerArgRE1 = '^--' + writerOption + '=' + '(.*)$'
+
+settings_spec = (
+ 'rstdiff options',
+ None,
+ (('Select writer to write output with (default "xml").',
+ ['--' + writerOption],
+ {}),
+ )
+ )
+
+settings_defaults = {'output_encoding_error_handler': 'xmlcharrefreplace',
+ writerOption: writerDefault}
+
+config_section = 'rstdiff'
+
+usage = '%prog [options]... <old> [<new> [<output>]]'
+
+###############################################################################
+# Classes for three argument command lines
+
+class Publisher3Args(Publisher):
+
+ def setup_option_parser(self, usage=None, description=None,
+ settings_spec=None, config_section=None,
+ **defaults):
+ if config_section:
+ if not settings_spec:
+ settings_spec = SettingsSpec()
+ settings_spec.config_section = config_section
+ parts = config_section.split()
+ if len(parts) > 1 and parts[-1] == 'application':
+ settings_spec.config_section_dependencies = ['applications']
+ #@@@ Add self.source & self.destination to components in future?
+ option_parser = OptionParser3Args(
+ components=(self.parser, self.reader, self.writer, settings_spec),
+ defaults=defaults, read_config_files=1,
+ usage=usage, description=description)
+ return option_parser
+
+class OptionParser3Args(OptionParser):
+
+ def check_values(self, values, args):
+ """Store positional arguments as runtime settings."""
+ values._old_source, values._new_source, values._destination = self.check_args(args)
+ make_paths_absolute(values.__dict__, self.relative_path_settings,
+ os.getcwd())
+ values._config_files = self.config_files
+ return values
+
+ def check_args(self, args):
+ old_source = new_source = destination = None
+ if not args:
+ self.error('At least 1 argument required.')
+ else:
+ old_source = args.pop(0)
+ if old_source == '-': # means stdin
+ old_source = None
+ if args:
+ new_source = args.pop(0)
+ if new_source == '-': # means stdin
+ new_source = None
+ if args:
+ destination = args.pop(0)
+ if destination == '-': # means stdout
+ destination = None
+ if args:
+ self.error('Maximum 3 arguments allowed.')
+ if old_source is None and new_source is None:
+ self.error('Old and new source may not both use stdin.')
+ if (old_source and old_source == destination
+ or new_source and new_source == destination):
+ self.error('Do not specify the same file for both source and '
+ 'destination. It will clobber the source file.')
+ return old_source, new_source, destination
+
+###############################################################################
+# Hashable
+
+class HashableDocutilsNodeImpl(HashableNodeImpl):
+ """Implements equality for a docutils `Node`."""
+
+ def __init__(self):
+ super(self.__class__, self).__init__(Node)
+
+ def dispatchClass(self, function, node, *args):
+ """Dispatch a call of type `function` for the class of `node` using
+ arguments `node` and `args`. Default is to dispatch for imaginary class
+ "UNKNOWN"."""
+ try:
+ method = getattr(self,
+ "%s_%s" % ( function, node.__class__.__name__, ))
+ except AttributeError:
+ method = getattr(self,
+ "%s_%s" % ( function, "UNKNOWN", ))
+ return method(node, *args)
+
+ ###########################################################################
+ # Implementation of abstract methods
+
+ def rootHash(self, node):
+ """Return a hash for the root only. Subclasses must override
+ this."""
+ return self.dispatchClass('rootHash', node)
+
+ def rootHash_UNKNOWN(self, node):
+ return hash(node.__class__)
+
+ def rootEq(self, node, other):
+ """Returns root equality of `node` and an `other` node. ``True`` if
+ the two nodes as roots are equal without considering their
+ children. This should be true if one node can be replaced by
+ the other and all changes can be represented without changing
+ the node itself. Subclasses must override this."""
+ # Only nodes of the same class can be equal
+ if node.__class__ != other.__class__:
+ return False
+ return self.dispatchClass('rootEq', node, other)
+
+ def rootEq_UNKNOWN(self, node, other):
+ # Unless we know better two roots of the same type are considered equal
+ return True
+
+ def childHash(self, node):
+ """Return a hash for the node as a child. Subclasses must override
+ this."""
+ return self.dispatchClass('childHash', node)
+
+ def childHash_UNKNOWN(self, node):
+ # TODO Is this correct *and* good?
+ return hash(node.__class__)
+
+ def childEq(self, node, other):
+ """Returns equality of `node` and an `other` node as children.
+ ``True`` if the child features of the two nodes are equal
+ without considering the root. Subclasses must override
+ this."""
+ # Only nodes of the same class can be equal
+ # TODO Is this correct?
+ if node.__class__ != other.__class__:
+ return False
+ return self.dispatchClass('childEq', node, other)
+
+ def childEq_UNKNOWN(self, node, other):
+ # We don't know how to compare two nodes of same type as children
+ return False
+
+ def getChildren(self, node):
+ """Return the children of `node` as a list. Subclasses must override
+ this."""
+ return self.dispatchClass('getChildren', node)
+
+ def getChildren_UNKNOWN(self, node):
+ return node.children
+
+ ###########################################################################
+ # Real comparison
+
+###############################################################################
+# Main
+
+def processCommandLine():
+ """Process command line and return a `Publisher`."""
+ # Determine writer here so options can be given normally
+ preWriter = writerDefault
+ for arg in sys.argv:
+ match = re.search(writerArgRE1, arg)
+ if match:
+ preWriter = match.group(1)
+
+ pub = Publisher3Args()
+ pub.set_reader('standalone', None, 'restructuredtext')
+ pub.set_writer(preWriter)
+
+ settingsSpec = SettingsSpec()
+ settingsSpec.settings_spec = settings_spec
+ settingsSpec.settings_defaults = settings_defaults
+ pub.process_command_line(usage=usage, description=description,
+ settings_spec=settingsSpec,
+ config_section=config_section)
+ if pub.settings.writer != preWriter:
+ new_reporter('<cmdline>',
+ pub.settings).severe("Internal error: Mismatch of pre-parsed (%r) and real (%r) writer"
+ % ( preWriter, pub.settings.writer, ))
+ return pub
+
+def readTree(pub, sourceName):
+ """Read and return a tree from `sourceName`."""
+ # Reset reader - just in case it keeps state from a previous invocation
+ pub.set_reader('standalone', None, 'restructuredtext')
+ pub.set_source(None, sourceName)
+ return pub.reader.read(pub.source, pub.parser, pub.settings)
+
+def doDiff(pub, oldTree, newTree):
+ """Create a difference from `oldTree` to `newTree`. Returns the opcodes
+ necessary to transform `oldTree` to `newTree`."""
+ hashableNodeImpl = HashableDocutilsNodeImpl()
+ matcher = TreeMatcher(hashableNodeImpl, oldTree, newTree)
+ return matcher.get_opcodes()
+
+if __name__ == '__main__':
+ pub = processCommandLine()
+
+ pub.set_destination(None, None)
+
+ oldTree = readTree(pub, pub.settings._old_source)
+ newTree = readTree(pub, pub.settings._new_source)
+
+ opcodes = doDiff(pub, oldTree, newTree)
+
+ from pprint import pprint
+ print(newTree)
+ print(oldTree)
+ pprint(opcodes, sys.stdout, 2, 40, None)
diff --git a/sandbox/rstdiff/studies/diff.py b/sandbox/rstdiff/studies/diff.py
index 4ff8a2f41..7f35aae65 100644
--- a/sandbox/rstdiff/studies/diff.py
+++ b/sandbox/rstdiff/studies/diff.py
@@ -2,12 +2,12 @@
from difflib import SequenceMatcher
-from hashable import HashableNodeImpl
-
from pprint import pprint
import sys
+from treediff import TreeMatcher, HashableNodeImpl
+
__docformat__ = 'reStructuredText'
aI = ( 1, 2, 3, 5, 6, 7, )
@@ -21,6 +21,7 @@ sm = SequenceMatcher(None, aI, bI)
# print(sm.get_opcodes()) # The way to go
class TreeNode(object):
+ """An example tree node to play with."""
tag = None
children = ( )
@@ -30,6 +31,7 @@ class TreeNode(object):
self.children = children
class HashableTreeNodeImpl(HashableNodeImpl):
+ """A `HashableNodeImpl` for `TreeNode`."""
def __init__(self):
super(self.__class__, self).__init__(TreeNode)
@@ -38,8 +40,6 @@ class HashableTreeNodeImpl(HashableNodeImpl):
return hash(node.tag)
def childHash(self, node):
- """Return a hash for the children only. Subclasses should override
- this."""
return hash(node)
def rootEq(self, node, other):
@@ -51,61 +51,6 @@ class HashableTreeNodeImpl(HashableNodeImpl):
def getChildren(self, node):
return node.children
-# To generate a diff tree:
-#
-# * ``equal`` opcodes need a copy
-#
-# * ``insert`` and ``delete`` opcodes must be processed as such
-#
-# * ``replace`` opcodes need to be analyzed recursively to find a
-# minimal set of changes
-
-def resolveDeepReplace(hashableNodeImpl, opcodes, a, b):
- """Resolves ``replace`` elements in `opcodes` pertaining to `a` and
- `b`. Returns opcodes including nested elements for these cases.
- `hashableNodeImpl` is the `HashableNodeImpl` used to control the hashable
- feature."""
- result = [ ]
- for i in xrange(len(opcodes)):
- ( opcode, aBeg, aEnd, bBeg, bEnd ) = opcodes[i]
- if opcode != 'replace':
- result.append(opcodes[i])
- continue
- try:
- hashableNodeImpl.pushRootOnly(True)
- sm = SequenceMatcher(None, a[aBeg:aEnd], b[bBeg:bEnd])
- rootOpcodes = sm.get_opcodes()
- for j in xrange(len(rootOpcodes)):
- ( subOpcode, aSubBeg, aSubEnd, bSubBeg, bSubEnd ) = rootOpcodes[j]
- if subOpcode != 'equal':
- result.append(( subOpcode, aBeg + aSubBeg, aBeg + aSubEnd,
- bBeg + bSubBeg, bBeg + bSubEnd, ))
- else:
- for k in xrange(aSubEnd - aSubBeg):
- aIdx = aBeg + aSubBeg + k
- bIdx = bBeg + bSubBeg + k
- result.append(('descend',
- aIdx, aIdx + 1, bIdx, bIdx + 1,
- resolveRootEqual(hashableNodeImpl,
- a[aIdx], b[bIdx]), ))
- finally:
- hashableNodeImpl.popRootOnly()
- return result
-
-def resolveRootEqual(hashableNodeImpl, aElem, bElem):
- """Considers children of `aElem` and `bElem` which have equal roots.
- Returns opcodes for the children. `hashableNodeImpl` is the
- `HashableNodeImpl` used to control the hashable feature."""
- a = hashableNodeImpl.getChildren(aElem)
- b = hashableNodeImpl.getChildren(bElem)
- try:
- hashableNodeImpl.pushRootOnly(False)
- sm = SequenceMatcher(None, a, b)
- nestedOpcodes = sm.get_opcodes()
- return resolveDeepReplace(hashableNodeImpl, nestedOpcodes, a, b)
- finally:
- hashableNodeImpl.popRootOnly()
-
hashableNodeImpl = HashableTreeNodeImpl()
aT = ( TreeNode('first'),
@@ -140,7 +85,22 @@ sm = SequenceMatcher(None, aT, bT)
top = sm.get_opcodes()
pprint(top)
print('---')
-# Use a pseudo root
-pprint(resolveRootEqual(hashableNodeImpl,
- TreeNode(None, aT), TreeNode(None, bT)),
+
+# Use a pseudo root with different root nodes.
+pprint(TreeMatcher(hashableNodeImpl,
+ TreeNode('a', aT), TreeNode('b', bT)).get_opcodes(),
+ sys.stdout, 2, 40, None)
+
+# Use a pseudo root with equal root nodes.
+pprint(TreeMatcher(hashableNodeImpl,
+ TreeNode(None, aT), TreeNode(None, bT)).get_opcodes(),
sys.stdout, 2, 40, None)
+
+# To generate a diff tree:
+#
+# * ``equal`` opcodes need a copy
+#
+# * ``insert`` and ``delete`` opcodes must be processed as such
+#
+# * ``replace`` opcodes need to be analyzed recursively to find a
+# minimal set of changes
diff --git a/sandbox/rstdiff/studies/hashable.py b/sandbox/rstdiff/studies/hashable.py
index c3e2da060..803bb867e 100644
--- a/sandbox/rstdiff/studies/hashable.py
+++ b/sandbox/rstdiff/studies/hashable.py
@@ -2,172 +2,9 @@
from docutils.nodes import Node
-__docformat__ = 'reStructuredText'
-
-class HashableDescriptor(object):
- """A descriptor to plug into a class to be made hashable."""
-
- """Name of function to override."""
- override = None
- """Arity of function to override."""
- arity = None
- hashableImpl = None
- debug = False
-
- def __init__(self, override, arity, hashableImpl):
- """Initialize a descriptor for function `override` with `arity` using
- `hashableImpl`."""
- self.override = override
- self.arity = arity
- self.hashableImpl = hashableImpl
-
- def __get__(self, instance, owner):
- """Implements the descriptor protocol. Returns a function to use on
- `instance`."""
- if self.debug:
- print('__get__ called on ' + owner.__name__ + ' by '
- + self.override)
- try:
- fct = self.hashableImpl.getFunction(self.override)
- except:
- if self.debug:
- print('***Exception***')
- raise AttributeError('Can not access method %r'
- % ( self.override, ))
- if self.arity == 0:
- return lambda: fct(instance)
- elif self.arity == 1:
- return lambda other: fct(instance, other)
- else:
- raise AttributeError('Can not create function for %r with arity %d'
- % ( self.override, self.arity, ))
-
-class HashableImpl(object):
- """Implements hashability and reflexive equality for a class."""
-
- debug = False
-
- def __init__(self, cls):
- """Set methods to implement equality in `cls`."""
- setattr(cls, '__hash__', HashableDescriptor('__hash__', 0, self))
- setattr(cls, '__eq__', HashableDescriptor('__eq__', 1, self))
- setattr(cls, '__ne__', HashableDescriptor('__ne__', 1, self))
-
- def getFunction(self, name):
- """Return the function named `name`."""
- # Get the function from the *real* object
- return getattr(self, 'impl' + name)
-
- def impl__hash__(self, this):
- """Return `this.__hash__`(). Derived classes must override this."""
- if self.debug:
- print('impl__hash__ called')
- return id(this)
-
- def impl__eq__(self, this, other):
- """Return `this.__eq__`(other). Derived classes must override this."""
- if self.debug:
- print('impl__eq__ called')
- return id(this) == id(other)
-
- def impl__ne__(self, this, other):
- """Return `this.__ne__`(other). Derived classes must override this."""
- if self.debug:
- print('impl__ne__ called')
- return not self.impl__eq__(this, other)
-
-class HashableNodeImpl(HashableImpl):
- """An abstract implementation of `HashableImpl` for nodes in a tree.
- Allows switching between of root only comparison and deep
- comparison."""
+from treediff import HashableImpl
- """Consider only the root node (or include the children)."""
- _rootOnly = False
- """Stack for `_rootOnly`"""
- __rootOnlies = [ ]
-
- def __init__(self, cls):
- HashableImpl.__init__(self, cls)
-
- def pushRootOnly(self, newRootOnly):
- """Set `newRootOnly` as new `rootOnly` value. If ``True`` then only
- information in the root is considered."""
- self.__rootOnlies.append(self._rootOnly)
- self._rootOnly = newRootOnly
-
- def popRootOnly(self):
- """Pop and return last `rootOnly` value."""
- self._rootOnly = self.__rootOnlies.pop()
-
- def impl__hash__(self, node):
- """Returns the hash for node `node`. Subclasses may override this but
- overriding `rootHash` and `childrenHash` may make more sense."""
- rootHash = self.rootHash(node)
- if self._rootOnly:
- return rootHash
- else:
- return rootHash + self.childrenHash(node)
-
- def impl__eq__(self, node, other):
- """Returns equality between `node` and an `other` node. Subclasses may
- override this but overriding `rootEq` and `childrenEq` may
- make more sense."""
- if not self.rootEq(node, other):
- return False
- if self._rootOnly:
- return True
- return self.childrenEq(node, other)
-
- def childrenHash(self, node):
- """Return a hash for the children only. Subclasses may override
- this but overriding `childHash` may make more sense."""
- return reduce(lambda x, y: x + y,
- [ self.childHash(child)
- for child in self.getChildren(node) ], 0)
-
- def childrenEq(self, node, other):
- """Returns children equality of `node` and an `other` node. ``True``
- if the children of the two nodes are equal without considering
- the root. Subclasses may override this but overriding
- `childEq` may make more sense."""
- nodeChildren = self.getChildren(node)
- otherChildren = self.getChildren(other)
- if len(nodeChildren) != len(otherChildren):
- return False
- for i in xrange(len(nodeChildren)):
- if not self.childEq(nodeChildren[i], otherChildren[i]):
- return False
- return True
-
- def rootHash(self, node):
- """Return a hash for the root only. Subclasses must override
- this."""
- raise NotImplementedError()
-
- def childHash(self, node):
- """Return a hash for the node as a child. Subclasses must override
- this."""
- raise NotImplementedError()
-
- def rootEq(self, node, other):
- """Returns root equality of `node` and an `other` node. ``True`` if
- the two nodes as roots are equal without considering their
- children. This should be true if one node can be replaced by
- the other and all changes can be represented without changing
- the node itself. Subclasses must override this."""
- raise NotImplementedError()
-
- def childEq(self, node, other):
- """Returns equality of `node` and an `other` node as children.
- ``True`` if the child features of the two nodes are equal
- without considering the root. Subclasses must override
- this."""
- raise NotImplementedError()
-
- def getChildren(self, node):
- """Return the children of `node` as a list. Subclasses must override
- this."""
- raise NotImplementedError()
+__docformat__ = 'reStructuredText'
class HashableDocutilsNodeImpl(HashableImpl):
"""Implements equality for a docutils `Node`."""
diff --git a/sandbox/rstdiff/tag.log b/sandbox/rstdiff/tag.log
index 371e02913..155906132 100644
--- a/sandbox/rstdiff/tag.log
+++ b/sandbox/rstdiff/tag.log
@@ -1 +1 @@
-rstdiff_1_17
+rstdiff_1_25
diff --git a/sandbox/rstdiff/treediff/__init__.py b/sandbox/rstdiff/treediff/__init__.py
new file mode 100644
index 000000000..b58943588
--- /dev/null
+++ b/sandbox/rstdiff/treediff/__init__.py
@@ -0,0 +1,294 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2010 Stefan Merten
+
+# rstdiff.py is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 2 of the License,
+# or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+from difflib import SequenceMatcher
+
+__docformat__ = 'reStructuredText'
+
+###############################################################################
+# Hashable
+
+class HashableDescriptor(object):
+ """A descriptor to plug into a class to be made hashable."""
+
+ """Name of function to override."""
+ override = None
+ """Arity of function to override."""
+ arity = None
+ hashableImpl = None
+ debug = False
+
+ def __init__(self, override, arity, hashableImpl):
+ """Initialize a descriptor for function `override` with `arity` using
+ `hashableImpl`."""
+ self.override = override
+ self.arity = arity
+ self.hashableImpl = hashableImpl
+
+ def __get__(self, instance, owner):
+ """Implements the descriptor protocol. Returns a function to use on
+ `instance`."""
+ if self.debug:
+ print('__get__ called on ' + owner.__name__ + ' by '
+ + self.override)
+ try:
+ fct = self.hashableImpl.getFunction(self.override)
+ except:
+ if self.debug:
+ print('***Exception***')
+ raise AttributeError('Can not access method %r'
+ % ( self.override, ))
+ if self.arity == 0:
+ return lambda: fct(instance)
+ elif self.arity == 1:
+ return lambda other: fct(instance, other)
+ else:
+ raise AttributeError('Can not create function for %r with arity %d'
+ % ( self.override, self.arity, ))
+
+class HashableImpl(object):
+ """Implements hashability and reflexive equality for a class."""
+
+ debug = False
+
+ def __init__(self, cls):
+ """Set methods to implement equality in `cls`."""
+ setattr(cls, '__hash__', HashableDescriptor('__hash__', 0, self))
+ setattr(cls, '__eq__', HashableDescriptor('__eq__', 1, self))
+ setattr(cls, '__ne__', HashableDescriptor('__ne__', 1, self))
+
+ def getFunction(self, name):
+ """Return the function named `name`."""
+ # Get the function from the *real* object
+ return getattr(self, 'impl' + name)
+
+ def impl__hash__(self, this):
+ """Return `this.__hash__`(). Derived classes must override this."""
+ if self.debug:
+ print('impl__hash__ called')
+ return id(this)
+
+ def impl__eq__(self, this, other):
+ """Return `this.__eq__`(other). Derived classes must override this."""
+ if self.debug:
+ print('impl__eq__ called')
+ return id(this) == id(other)
+
+ def impl__ne__(self, this, other):
+ """Return `this.__ne__`(other). Derived classes must override this."""
+ if self.debug:
+ print('impl__ne__ called')
+ return not self.impl__eq__(this, other)
+
+class HashableNodeImpl(HashableImpl):
+ """An abstract implementation of `HashableImpl` for nodes in a tree.
+ Allows switching between of root only comparison and deep
+ comparison."""
+
+ """Consider only the root node (or include the children)."""
+ _rootOnly = False
+ """Stack for `_rootOnly`"""
+ __rootOnlies = [ ]
+
+ def __init__(self, cls):
+ HashableImpl.__init__(self, cls)
+
+ def pushRootOnly(self, newRootOnly):
+ """Set `newRootOnly` as new `rootOnly` value. If ``True`` then only
+ information in the root is considered."""
+ self.__rootOnlies.append(self._rootOnly)
+ self._rootOnly = newRootOnly
+
+ def popRootOnly(self):
+ """Pop and return last `rootOnly` value."""
+ self._rootOnly = self.__rootOnlies.pop()
+
+ def impl__hash__(self, node):
+ """Returns the hash for node `node`. Subclasses may override this but
+ overriding `rootHash` and `childrenHash` may make more sense."""
+ rootHash = self.rootHash(node)
+ if self._rootOnly:
+ return rootHash
+ else:
+ return rootHash + self.childrenHash(node)
+
+ def impl__eq__(self, node, other):
+ """Returns equality between `node` and an `other` node. Subclasses may
+ override this but overriding `rootEq` and `childrenEq` may
+ make more sense."""
+ if not self.rootEq(node, other):
+ return False
+ if self._rootOnly:
+ return True
+ return self.childrenEq(node, other)
+
+ def childrenHash(self, node):
+ """Return a hash for the children only. Subclasses may override
+ this but overriding `childHash` may make more sense."""
+ return reduce(lambda x, y: x + y,
+ [ self.childHash(child)
+ for child in self.getChildren(node) ], 0)
+
+ def childrenEq(self, node, other):
+ """Returns children equality of `node` and an `other` node. ``True``
+ if the children of the two nodes are equal without considering
+ the root. Subclasses may override this but overriding
+ `childEq` may make more sense."""
+ nodeChildren = self.getChildren(node)
+ otherChildren = self.getChildren(other)
+ if len(nodeChildren) != len(otherChildren):
+ return False
+ for i in xrange(len(nodeChildren)):
+ if not self.childEq(nodeChildren[i], otherChildren[i]):
+ return False
+ return True
+
+ def rootHash(self, node):
+ """Return a hash for the root only. Subclasses must override
+ this."""
+ raise NotImplementedError()
+
+ def childHash(self, node):
+ """Return a hash for the node as a child. Subclasses must override
+ this."""
+ raise NotImplementedError()
+
+ def rootEq(self, node, other):
+ """Returns root equality of `node` and an `other` node. ``True`` if
+ the two nodes as roots are equal without considering their
+ children. This should be true if one node can be replaced by
+ the other and all changes can be represented without changing
+ the node itself. Subclasses must override this."""
+ raise NotImplementedError()
+
+ def childEq(self, node, other):
+ """Returns equality of `node` and an `other` node as children.
+ ``True`` if the child features of the two nodes are equal
+ without considering the root. Subclasses must override
+ this."""
+ raise NotImplementedError()
+
+ def getChildren(self, node):
+ """Return the children of `node` as a list. Subclasses must override
+ this."""
+ raise NotImplementedError()
+
+###############################################################################
+# Tree matcher
+
+class TreeMatcher(object):
+ """Objects of this class are able to match trees. This is similar in
+spirit to `difflib.SequenceMatcher'"""
+
+ a = None
+ b = None
+ hashableNodeImpl = None
+
+ def __init__(self, hashableNodeImpl, a, b):
+ """Construct a TreeMatcher for matching trees `a` and `b`.
+
+`a` and `b` must be the root nodes of two trees to be compared.
+`hashableNodeImpl` must be an implementation of `HashableNodeImpl`
+governing the comparison of the nodes in the trees."""
+
+ self.a = a
+ self.b = b
+ self.hashableNodeImpl = hashableNodeImpl
+
+ def get_opcodes(self):
+ """Return list of 5- or 6-tuples describing how to turn `a` into `b`.
+
+Each tuple is of the form (tag, i1, i2, j1, j2, [sub]). The first tuple
+has i1 == j1 == 0, and remaining tuples have i1 == i2 from the
+tuple preceding it, and likewise for j1 == the previous j2.
+
+The tags are strings, with these meanings:
+
+'replace': a[i1:i2] should be replaced by b[j1:j2]
+'delete': a[i1:i2] should be deleted.
+ Note that j1==j2 in this case.
+'insert': b[j1:j2] should be inserted at a[i1:i1].
+ Note that i1==i2 in this case.
+'equal': a[i1:i2] == b[j1:j2]
+'descend': Descend on nodes a[i1] and b[i1]. In this case
+ sub is a list of opcodes pertaining to the list of children
+ of the two nodes.
+ Note that i2==i1+1 and j2==j1+1 in this case.
+
+Note that if the roots of the trees are not root-equal then the result
+is only a 'replace' of one tree by the other.
+"""
+
+ self.hashableNodeImpl.pushRootOnly(True)
+ try:
+ sm = SequenceMatcher(None, [ self.a, ], [ self.b, ])
+ rootOpcodes = sm.get_opcodes()
+ if rootOpcodes[0][0] == 'equal':
+ return ( 'descend', 0, 1, 0, 1,
+ self._resolveRootEqual(self.a, self.b), )
+ else:
+ return rootOpcodes
+ finally:
+ self.hashableNodeImpl.popRootOnly()
+
+ def _resolveRootEqual(self, aElem, bElem):
+ """Considers children of `aElem` and `bElem` which have equal roots.
+ Returns opcodes for the children."""
+ a = self.hashableNodeImpl.getChildren(aElem)
+ b = self.hashableNodeImpl.getChildren(bElem)
+ self.hashableNodeImpl.pushRootOnly(False)
+ try:
+ sm = SequenceMatcher(None, a, b)
+ nestedOpcodes = sm.get_opcodes()
+ return self._resolveDeepReplace(nestedOpcodes, a, b)
+ finally:
+ self.hashableNodeImpl.popRootOnly()
+
+ def _resolveDeepReplace(self, opcodes, a, b):
+ """Resolves ``replace`` elements in `opcodes` pertaining to `a` and
+ `b`. Returns opcodes including nested elements for these cases."""
+ result = [ ]
+ for i in xrange(len(opcodes)):
+ ( opcode, aBeg, aEnd, bBeg, bEnd ) = opcodes[i]
+ if opcode != 'replace':
+ result.append(opcodes[i])
+ continue
+ self.hashableNodeImpl.pushRootOnly(True)
+ try:
+ sm = SequenceMatcher(None, a[aBeg:aEnd], b[bBeg:bEnd])
+ rootOpcodes = sm.get_opcodes()
+ for j in xrange(len(rootOpcodes)):
+ ( subOpcode, aSubBeg, aSubEnd,
+ bSubBeg, bSubEnd ) = rootOpcodes[j]
+ if subOpcode != 'equal':
+ result.append(( subOpcode,
+ aBeg + aSubBeg, aBeg + aSubEnd,
+ bBeg + bSubBeg, bBeg + bSubEnd, ))
+ else:
+ for k in xrange(aSubEnd - aSubBeg):
+ aIdx = aBeg + aSubBeg + k
+ bIdx = bBeg + bSubBeg + k
+ result.append(('descend',
+ aIdx, aIdx + 1, bIdx, bIdx + 1,
+ self._resolveRootEqual(a[aIdx],
+ b[bIdx]), ))
+ finally:
+ self.hashableNodeImpl.popRootOnly()
+ return result
+