Drop in patch creation from jsondiff

https://github.com/nxsofsys/jsondiff
author: Stefan Kögl <stefan@skoegl.net> 2017-09-10 13:25:47 +0200
committer: Stefan Kögl <stefan@skoegl.net> 2017-09-10 19:24:48 +0200
commit: 196460716ac1fc3d84dee403cfa68386261998f5 (patch)
tree: 00f42863d79f20d10f7888817f97816597b8bcdc
parent: d778745b1448a82c840aadff422187459fe78c9f (diff)
download: python-json-patch-196460716ac1fc3d84dee403cfa68386261998f5.tar.gz
1 files changed, 256 insertions, 263 deletions
diff --git a/jsonpatch.py b/jsonpatch.py
index cee4820..d1d091f 100644
--- a/jsonpatch.py
+++ b/jsonpatch.py
@@ -42,6 +42,22 @@ import itertools
 import json
 import sys
 
+
+if sys.version_info[0] >= 3:
+    _range = range
+    _viewkeys = dict.keys
+else:
+    _range = xrange
+    if sys.version_info[1] >= 7:
+        _viewkeys = dict.viewkeys
+    else:
+        _viewkeys = lambda x: set(dict.keys(x))
+
+
+_ST_ADD    = 0
+_ST_REMOVE = 1
+
+
 try:
     from collections.abc import MutableMapping, MutableSequence
 except ImportError:
@@ -300,41 +316,11 @@ class JsonPatch(object):
         >>> new == dst
         True
         """
-        def compare_values(path, value, other):
-            if value == other:
-                return
-            if isinstance(value, MutableMapping) and \
-                    isinstance(other, MutableMapping):
-                for operation in compare_dicts(path, value, other):
-                    yield operation
-            elif isinstance(value, MutableSequence) and \
-                    isinstance(other, MutableSequence):
-                for operation in compare_lists(path, value, other):
-                    yield operation
-            else:
-                ptr = JsonPointer.from_parts(path)
-                yield {'op': 'replace', 'path': ptr.path, 'value': other}
-
-        def compare_dicts(path, src, dst):
-            for key in src:
-                if key not in dst:
-                    ptr = JsonPointer.from_parts(path + [key])
-                    yield {'op': 'remove', 'path': ptr.path}
-                    continue
-                current = path + [key]
-                for operation in compare_values(current, src[key], dst[key]):
-                    yield operation
-            for key in dst:
-                if key not in src:
-                    ptr = JsonPointer.from_parts(path + [key])
-                    yield {'op': 'add',
-                           'path': ptr.path,
-                           'value': dst[key]}
-
-        def compare_lists(path, src, dst):
-            return _compare_lists(path, src, dst, optimization=optimization)
 
-        return cls(list(compare_values([], src, dst)))
+        info = _compare_info()
+        _compare_values('', None, info, src, dst)
+        ops = [op for op in info.execute()]
+        return cls(ops)
 
     def to_string(self):
         """Returns patch set as JSON string."""
@@ -573,248 +559,255 @@ class CopyOperation(PatchOperation):
         return obj
 
 
-def _compare_lists(path, src, dst, optimization=True):
-    """Compares two lists objects and return JSON patch about."""
-    patch = list(_compare(path, src, dst, *_split_by_common_seq(src, dst)))
-    if optimization:
-        return list(_optimize(patch))
-    return patch
+class _compare_info(object):
 
+    def __init__(self):
+        self.index_storage = [{}, {}]
+        self.index_storage2 = [[], []]
+        self.__root = root = []
+        root[:] = [root, root, None]
 
-def _longest_common_subseq(src, dst):
-    """Returns pair of ranges of longest common subsequence for the `src`
-    and `dst` lists.
+    def store_index(self, value, index, st):
+        try:
+            storage = self.index_storage[st]
+            stored = storage.get(value)
+            if stored == None:
+                storage[value] = [index]
+            else:
+                storage[value].append(index)
+        except TypeError:
+            self.index_storage2[st].append((value, index))
 
-    >>> src = [1, 2, 3, 4]
-    >>> dst = [0, 1, 2, 3, 5]
-    >>> # The longest common subsequence for these lists is [1, 2, 3]
-    ... # which is located at (0, 3) index range for src list and (1, 4) for
-    ... # dst one. Tuple of these ranges we should get back.
-    ... assert ((0, 3), (1, 4)) == _longest_common_subseq(src, dst)
-    """
-    lsrc, ldst = len(src), len(dst)
-    drange = list(range(ldst))
-    matrix = [[0] * ldst for _ in range(lsrc)]
-    z = 0  # length of the longest subsequence
-    range_src, range_dst = None, None
-    for i, j in itertools.product(range(lsrc), drange):
-        if src[i] == dst[j]:
-            if i == 0 or j == 0:
-                matrix[i][j] = 1
+    def take_index(self, value, st):
+        try:
+            stored = self.index_storage[st].get(value)
+            if stored:
+                return stored.pop()
+        except TypeError:
+            storage = self.index_storage2[st]
+            for i in range(len(storage)-1, -1, -1):
+                if storage[i][0] == value:
+                    return storage.pop(i)[1]
+
+    def insert(self, op):
+        root = self.__root
+        last = root[0]
+        last[1] = root[0] = [last, root, op]
+        return root[0]
+
+    def remove(self, index):
+        link_prev, link_next, _ = index
+        link_prev[1] = link_next
+        link_next[0] = link_prev
+        index[:] = []
+
+    def iter_from(self, start):
+        root = self.__root
+        curr = start[1]
+        while curr is not root:
+            yield curr[2]
+            curr = curr[1]
+
+    def __iter__(self):
+        root = self.__root
+        curr = root[1]
+        while curr is not root:
+            yield curr[2]
+            curr = curr[1]
+
+    def execute(self):
+        root = self.__root
+        curr = root[1]
+        while curr is not root:
+            if curr[1] is not root:
+                op_first, op_second = curr[2], curr[1][2]
+                if op_first.key == op_second.key and \
+                        op_first.path == op_second.path and \
+                        type(op_first) == _op_remove and \
+                        type(op_second) == _op_add:
+                    yield _op_replace(op_second.path, op_second.key, op_second.value).get()
+                    curr = curr[1][1]
+                    continue
+            yield curr[2].get()
+            curr = curr[1]
+
+class _op_base(object):
+    def __init__(self, path, key, value):
+        self.path  = path
+        self.key   = key
+        self.value = value
+
+    def __repr__(self):
+        return str(self.get())
+
+class _op_add(_op_base):
+    def _on_undo_remove(self, path, key):
+        if self.path == path:
+            if self.key > key:
+                self.key += 1
             else:
-                matrix[i][j] = matrix[i-1][j-1] + 1
-            if matrix[i][j] > z:
-                z = matrix[i][j]
-                range_src = (i-z+1, i+1)
-                range_dst = (j-z+1, j+1)
-        else:
-            matrix[i][j] = 0
-    return range_src, range_dst
-
-
-def _split_by_common_seq(src, dst, bx=(0, -1), by=(0, -1)):
-    """Recursively splits the `dst` list onto two parts: left and right.
-    The left part contains differences on left from common subsequence,
-    same as the right part by for other side.
-
-    To easily understand the process let's take two lists: [0, 1, 2, 3] as
-    `src` and [1, 2, 4, 5] for `dst`. If we've tried to generate the binary tree
-    where nodes are common subsequence for both lists, leaves on the left
-    side are subsequence for `src` list and leaves on the right one for `dst`,
-    our tree would looks like::
-
-        [1, 2]
-       /     \
-    [0]       []
-             /  \
-          [3]   [4, 5]
-
-    This function generate the similar structure as flat tree, but without
-    nodes with common subsequences - since we're don't need them - only with
-    left and right leaves::
-
-        []
-       / \
-    [0]  []
-        / \
-     [3]  [4, 5]
-
-    The `bx` is the absolute range for currently processed subsequence of
-    `src` list.  The `by` means the same, but for the `dst` list.
-    """
-    # Prevent useless comparisons in future
-    bx = bx if bx[0] != bx[1] else None
-    by = by if by[0] != by[1] else None
+                key += 1
+        return key
 
-    if not src:
-        return [None, by]
-    elif not dst:
-        return [bx, None]
+    def _on_undo_add(self, path, key):
+        if self.path == path:
+            if self.key > key:
+                self.key -= 1
+            else:
+                key += 1
+        return key
 
-    # note that these ranges are relative for processed sublists
-    x, y = _longest_common_subseq(src, dst)
+    def get(self):
+        return {'op': 'add', 'path': _path_join(self.path, self.key), 'value': self.value}
 
-    if x is None or y is None:  # no more any common subsequence
-        return [bx, by]
+class _op_remove(_op_base):
+    def _on_undo_remove(self, path, key):
+        if self.path == path:
+            if self.key >= key:
+                self.key += 1
+            else:
+                key -= 1
+        return key
 
-    return [_split_by_common_seq(src[:x[0]], dst[:y[0]],
-                                 (bx[0], bx[0] + x[0]),
-                                 (by[0], by[0] + y[0])),
-            _split_by_common_seq(src[x[1]:], dst[y[1]:],
-                                 (bx[0] + x[1], bx[0] + len(src)),
-                                 (by[0] + y[1], by[0] + len(dst)))]
+    def _on_undo_add(self, path, key):
+        if self.path == path:
+            if self.key > key:
+                self.key -= 1
+            else:
+                key -= 1
+        return key
 
+    def get(self):
+        return {'op': 'remove', 'path': _path_join(self.path, self.key)}
 
-def _compare(path, src, dst, left, right):
-    """Same as :func:`_compare_with_shift` but strips emitted `shift` value."""
-    for op, _ in _compare_with_shift(path, src, dst, left, right, 0):
-        yield op
+class _op_replace(_op_base):
+    def _on_undo_remove(self, path, key):
+        return key
 
+    def _on_undo_add(self, path, key):
+        return key
 
-def _compare_with_shift(path, src, dst, left, right, shift):
-    """Recursively compares differences from `left` and `right` sides
-    from common subsequences.
+    def get(self):
+        return {'op': 'replace', 'path': _path_join(self.path, self.key), 'value': self.value}
 
-    The `shift` parameter is used to store index shift which caused
-    by ``add`` and ``remove`` operations.
 
-    Yields JSON patch operations and list index shift.
-    """
-    if isinstance(left, MutableSequence):
-        for item, shift in _compare_with_shift(path, src, dst, *left,
-                                               shift=shift):
-            yield item, shift
-    elif left is not None:
-        for item, shift in _compare_left(path, src, left, shift):
-            yield item, shift
-
-    if isinstance(right, MutableSequence):
-        for item, shift in _compare_with_shift(path, src, dst, *right,
-                                               shift=shift):
-            yield item, shift
-    elif right is not None:
-        for item, shift in _compare_right(path, dst, right, shift):
-            yield item, shift
-
-
-def _compare_left(path, src, left, shift):
-    """Yields JSON patch ``remove`` operations for elements that are only
-    exists in the `src` list."""
-    start, end = left
-    if end == -1:
-        end = len(src)
-    # we need to `remove` elements from list tail to not deal with index shift
-    for idx in reversed(range(start + shift, end + shift)):
-        ptr = JsonPointer.from_parts(path + [str(idx)])
-        yield (
-            {'op': 'remove',
-             # yes, there should be any value field, but we'll use it
-             # to apply `move` optimization a bit later and will remove
-             # it in _optimize function.
-             'value': src[idx - shift],
-             'path': ptr.path,
-            },
-            shift - 1
-        )
-        shift -= 1
-
-
-def _compare_right(path, dst, right, shift):
-    """Yields JSON patch ``add`` operations for elements that are only
-    exists in the `dst` list"""
-    start, end = right
-    if end == -1:
-        end = len(dst)
-    for idx in range(start, end):
-        ptr = JsonPointer.from_parts(path + [str(idx)])
-        yield (
-            {'op': 'add', 'path': ptr.path, 'value': dst[idx]},
-            shift + 1
-        )
-        shift += 1
-
-
-def _optimize(operations):
-    """Optimizes operations which was produced by lists comparison.
-
-    Actually it does two kinds of optimizations:
-
-    1. Seeks pair of ``remove`` and ``add`` operations against the same path
-       and replaces them with ``replace`` operation.
-    2. Seeks pair of ``remove`` and ``add`` operations for the same value
-       and replaces them with ``move`` operation.
-    """
-    result = []
-    ops_by_path = {}
-    ops_by_value = {}
-    add_remove = set(['add', 'remove'])
-    for item in operations:
-        # could we apply "move" optimization for dict values?
-        hashable_value = not isinstance(item['value'],
-                                        (MutableMapping, MutableSequence))
-        if item['path'] in ops_by_path:
-            _optimize_using_replace(ops_by_path[item['path']], item)
-            continue
-        if hashable_value and item['value'] in ops_by_value:
-            prev_item = ops_by_value[item['value']]
-            # ensure that we processing pair of add-remove ops
-            if set([item['op'], prev_item['op']]) == add_remove:
-                _optimize_using_move(prev_item, item)
-                ops_by_value.pop(item['value'])
+class _op_move(object):
+    def __init__(self, oldpath, oldkey, path, key):
+        self.oldpath = oldpath
+        self.oldkey  = oldkey
+        self.path    = path
+        self.key     = key
+
+    def _on_undo_remove(self, path, key):
+        if self.oldpath == path:
+            if self.oldkey >= key:
+                self.oldkey += 1
+            else:
+                key -= 1
+        if self.path == path:
+            if self.key > key:
+                self.key += 1
+            else:
+                key += 1
+        return key
+
+    def _on_undo_add(self, path, key):
+        if self.oldpath == path:
+            if self.oldkey > key:
+                self.oldkey -= 1
+            else:
+                key -= 1
+        if self.path == path:
+            if self.key > key:
+                self.key -= 1
+            else:
+                key += 1
+        return key
+
+    def get(self):
+        return {'op': 'move', 'path': _path_join(self.path, self.key), 'from': _path_join(self.oldpath, self.oldkey)}
+
+    def __repr__(self):
+        return str(self.get())
+
+def _path_join(path, key):
+    if key != None:
+        return path + '/' + str(key).replace('~', '~0').replace('/', '~1')
+    return path
+
+def _item_added(path, key, info, item):
+    index = info.take_index(item, _ST_REMOVE)
+    if index != None:
+        op = index[2]
+        if type(op.key) == int:
+            for v in info.iter_from(index):
+                op.key = v._on_undo_remove(op.path, op.key)
+        info.remove(index)
+        if op.path != path or op.key != key:
+            new_op = _op_move(op.path, op.key, path, key)
+            info.insert(new_op)
+    else:
+        new_op = _op_add(path, key, item)
+        new_index = info.insert(new_op)
+        info.store_index(item, new_index, _ST_ADD)
+
+def _item_removed(path, key, info, item):
+    new_op = _op_remove(path, key, item)
+    index = info.take_index(item, _ST_ADD)
+    new_index = info.insert(new_op)
+    if index != None:
+        op = index[2]
+        if type(op.key) == int:
+            for v in info.iter_from(index):
+                op.key = v._on_undo_add(op.path, op.key)
+        info.remove(index)
+        if new_op.path != op.path or new_op.key != op.key:
+            new_op = _op_move(new_op.path, new_op.key, op.path, op.key)
+            new_index[2] = new_op
+        else:
+            info.remove(new_index)
+    else:
+        info.store_index(item, new_index, _ST_REMOVE)
+
+def _item_replaced(path, key, info, item):
+    info.insert(_op_replace(path, key, item))
+
+def _compare_dicts(path, info, src, dst):
+    src_keys = _viewkeys(src)
+    dst_keys = _viewkeys(dst)
+    added_keys = dst_keys - src_keys
+    removed_keys = src_keys - dst_keys
+    for key in removed_keys:
+        _item_removed(path, str(key), info, src[key])
+    for key in added_keys:
+        _item_added(path, str(key), info, dst[key])
+    for key in src_keys & dst_keys:
+        _compare_values(path, key, info, src[key], dst[key])
+
+def _compare_lists(path, info, src, dst):
+    len_src, len_dst = len(src), len(dst)
+    max_len = max(len_src, len_dst)
+    min_len = min(len_src, len_dst)
+    for key in _range(max_len):
+        if key < min_len:
+            old, new = src[key], dst[key]
+            if old == new:
                 continue
-        result.append(item)
-        ops_by_path[item['path']] = item
-        if hashable_value:
-            ops_by_value[item['value']] = item
-
-    # cleanup
-    ops_by_path.clear()
-    ops_by_value.clear()
-    for item in result:
-        if item['op'] == 'remove':
-            item.pop('value')  # strip our hack
-        yield item
-
-
-def _optimize_using_replace(prev, cur):
-    """Optimises by replacing ``add``/``remove`` with ``replace`` on same path
-
-    For nested strucures, tries to recurse replacement, see #36 """
-    prev['op'] = 'replace'
-    if cur['op'] == 'add':
-        # make recursive patch
-        patch = make_patch(prev['value'], cur['value'])
-        # check case when dict "remove" is less than "add" and has a same key
-        if isinstance(prev['value'], dict) and isinstance(cur['value'], dict) and len(prev['value'].keys()) == 1:
-            prev_set = set(prev['value'].keys())
-            cur_set = set(cur['value'].keys())
-            if prev_set & cur_set == prev_set:
-                patch = make_patch(cur['value'], prev['value'])
-
-        if len(patch.patch) == 1 and \
-                patch.patch[0]['op'] != 'remove' and \
-                patch.patch[0]['path'] and patch.patch[0]['path'].split('/')[1] in prev['value']:
-            prev['path'] = prev['path'] + patch.patch[0]['path']
-            prev['value'] = patch.patch[0]['value']
+            _item_removed(path, key, info, old)
+            _item_added(path, key, info, new)
+        elif len_src > len_dst:
+            _item_removed(path, len_dst, info, src[key])
         else:
-            prev['value'] = cur['value']
-
-
-def _optimize_using_move(prev_item, item):
-    """Optimises JSON patch by using ``move`` operation instead of
-    ``remove` and ``add`` against the different paths but for the same value."""
-    prev_item['op'] = 'move'
-    move_from, move_to = [
-        (item['path'], prev_item['path']),
-        (prev_item['path'], item['path']),
-    ][item['op'] == 'add']
-    if item['op'] == 'add':  # first was remove then add
-        prev_item['from'] = move_from
-        prev_item['path'] = move_to
-    else:  # first was add then remove
-        head, move_from = move_from.rsplit('/', 1)
-        # since add operation was first it incremented
-        # overall index shift value. we have to fix this
-        move_from = int(move_from) - 1
-        prev_item['from'] = head + '/%d' % move_from
-        prev_item['path'] = move_to
+            _item_added(path, key, info, dst[key])
+
+def _compare_values(path, key, info, src, dst):
+    if src == dst:
+        return
+    elif isinstance(src, dict) and \
+            isinstance(dst, dict):
+        _compare_dicts(_path_join(path, key), info, src, dst)
+    elif isinstance(src, list) and \
+            isinstance(dst, list):
+        _compare_lists(_path_join(path, key), info, src, dst)
+    else:
+        _item_replaced(path, key, info, dst)
author	Stefan Kögl <stefan@skoegl.net>	2017-09-10 13:25:47 +0200
committer	Stefan Kögl <stefan@skoegl.net>	2017-09-10 19:24:48 +0200
commit	196460716ac1fc3d84dee403cfa68386261998f5 (patch)
tree	00f42863d79f20d10f7888817f97816597b8bcdc
parent	d778745b1448a82c840aadff422187459fe78c9f (diff)
download	python-json-patch-196460716ac1fc3d84dee403cfa68386261998f5.tar.gz