summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Kögl <stefan@skoegl.net>2017-09-10 13:25:47 +0200
committerStefan Kögl <stefan@skoegl.net>2017-09-10 19:24:48 +0200
commit196460716ac1fc3d84dee403cfa68386261998f5 (patch)
tree00f42863d79f20d10f7888817f97816597b8bcdc
parentd778745b1448a82c840aadff422187459fe78c9f (diff)
downloadpython-json-patch-196460716ac1fc3d84dee403cfa68386261998f5.tar.gz
Drop in patch creation from jsondiff
https://github.com/nxsofsys/jsondiff
-rw-r--r--jsonpatch.py519
1 files changed, 256 insertions, 263 deletions
diff --git a/jsonpatch.py b/jsonpatch.py
index cee4820..d1d091f 100644
--- a/jsonpatch.py
+++ b/jsonpatch.py
@@ -42,6 +42,22 @@ import itertools
import json
import sys
+
+if sys.version_info[0] >= 3:
+ _range = range
+ _viewkeys = dict.keys
+else:
+ _range = xrange
+ if sys.version_info[1] >= 7:
+ _viewkeys = dict.viewkeys
+ else:
+ _viewkeys = lambda x: set(dict.keys(x))
+
+
+_ST_ADD = 0
+_ST_REMOVE = 1
+
+
try:
from collections.abc import MutableMapping, MutableSequence
except ImportError:
@@ -300,41 +316,11 @@ class JsonPatch(object):
>>> new == dst
True
"""
- def compare_values(path, value, other):
- if value == other:
- return
- if isinstance(value, MutableMapping) and \
- isinstance(other, MutableMapping):
- for operation in compare_dicts(path, value, other):
- yield operation
- elif isinstance(value, MutableSequence) and \
- isinstance(other, MutableSequence):
- for operation in compare_lists(path, value, other):
- yield operation
- else:
- ptr = JsonPointer.from_parts(path)
- yield {'op': 'replace', 'path': ptr.path, 'value': other}
-
- def compare_dicts(path, src, dst):
- for key in src:
- if key not in dst:
- ptr = JsonPointer.from_parts(path + [key])
- yield {'op': 'remove', 'path': ptr.path}
- continue
- current = path + [key]
- for operation in compare_values(current, src[key], dst[key]):
- yield operation
- for key in dst:
- if key not in src:
- ptr = JsonPointer.from_parts(path + [key])
- yield {'op': 'add',
- 'path': ptr.path,
- 'value': dst[key]}
-
- def compare_lists(path, src, dst):
- return _compare_lists(path, src, dst, optimization=optimization)
- return cls(list(compare_values([], src, dst)))
+ info = _compare_info()
+ _compare_values('', None, info, src, dst)
+ ops = [op for op in info.execute()]
+ return cls(ops)
def to_string(self):
"""Returns patch set as JSON string."""
@@ -573,248 +559,255 @@ class CopyOperation(PatchOperation):
return obj
-def _compare_lists(path, src, dst, optimization=True):
- """Compares two lists objects and return JSON patch about."""
- patch = list(_compare(path, src, dst, *_split_by_common_seq(src, dst)))
- if optimization:
- return list(_optimize(patch))
- return patch
+class _compare_info(object):
+ def __init__(self):
+ self.index_storage = [{}, {}]
+ self.index_storage2 = [[], []]
+ self.__root = root = []
+ root[:] = [root, root, None]
-def _longest_common_subseq(src, dst):
- """Returns pair of ranges of longest common subsequence for the `src`
- and `dst` lists.
+ def store_index(self, value, index, st):
+ try:
+ storage = self.index_storage[st]
+ stored = storage.get(value)
+ if stored == None:
+ storage[value] = [index]
+ else:
+ storage[value].append(index)
+ except TypeError:
+ self.index_storage2[st].append((value, index))
- >>> src = [1, 2, 3, 4]
- >>> dst = [0, 1, 2, 3, 5]
- >>> # The longest common subsequence for these lists is [1, 2, 3]
- ... # which is located at (0, 3) index range for src list and (1, 4) for
- ... # dst one. Tuple of these ranges we should get back.
- ... assert ((0, 3), (1, 4)) == _longest_common_subseq(src, dst)
- """
- lsrc, ldst = len(src), len(dst)
- drange = list(range(ldst))
- matrix = [[0] * ldst for _ in range(lsrc)]
- z = 0 # length of the longest subsequence
- range_src, range_dst = None, None
- for i, j in itertools.product(range(lsrc), drange):
- if src[i] == dst[j]:
- if i == 0 or j == 0:
- matrix[i][j] = 1
+ def take_index(self, value, st):
+ try:
+ stored = self.index_storage[st].get(value)
+ if stored:
+ return stored.pop()
+ except TypeError:
+ storage = self.index_storage2[st]
+ for i in range(len(storage)-1, -1, -1):
+ if storage[i][0] == value:
+ return storage.pop(i)[1]
+
+ def insert(self, op):
+ root = self.__root
+ last = root[0]
+ last[1] = root[0] = [last, root, op]
+ return root[0]
+
+ def remove(self, index):
+ link_prev, link_next, _ = index
+ link_prev[1] = link_next
+ link_next[0] = link_prev
+ index[:] = []
+
+ def iter_from(self, start):
+ root = self.__root
+ curr = start[1]
+ while curr is not root:
+ yield curr[2]
+ curr = curr[1]
+
+ def __iter__(self):
+ root = self.__root
+ curr = root[1]
+ while curr is not root:
+ yield curr[2]
+ curr = curr[1]
+
+ def execute(self):
+ root = self.__root
+ curr = root[1]
+ while curr is not root:
+ if curr[1] is not root:
+ op_first, op_second = curr[2], curr[1][2]
+ if op_first.key == op_second.key and \
+ op_first.path == op_second.path and \
+ type(op_first) == _op_remove and \
+ type(op_second) == _op_add:
+ yield _op_replace(op_second.path, op_second.key, op_second.value).get()
+ curr = curr[1][1]
+ continue
+ yield curr[2].get()
+ curr = curr[1]
+
+class _op_base(object):
+ def __init__(self, path, key, value):
+ self.path = path
+ self.key = key
+ self.value = value
+
+ def __repr__(self):
+ return str(self.get())
+
+class _op_add(_op_base):
+ def _on_undo_remove(self, path, key):
+ if self.path == path:
+ if self.key > key:
+ self.key += 1
else:
- matrix[i][j] = matrix[i-1][j-1] + 1
- if matrix[i][j] > z:
- z = matrix[i][j]
- range_src = (i-z+1, i+1)
- range_dst = (j-z+1, j+1)
- else:
- matrix[i][j] = 0
- return range_src, range_dst
-
-
-def _split_by_common_seq(src, dst, bx=(0, -1), by=(0, -1)):
- """Recursively splits the `dst` list onto two parts: left and right.
- The left part contains differences on left from common subsequence,
- same as the right part by for other side.
-
- To easily understand the process let's take two lists: [0, 1, 2, 3] as
- `src` and [1, 2, 4, 5] for `dst`. If we've tried to generate the binary tree
- where nodes are common subsequence for both lists, leaves on the left
- side are subsequence for `src` list and leaves on the right one for `dst`,
- our tree would looks like::
-
- [1, 2]
- / \
- [0] []
- / \
- [3] [4, 5]
-
- This function generate the similar structure as flat tree, but without
- nodes with common subsequences - since we're don't need them - only with
- left and right leaves::
-
- []
- / \
- [0] []
- / \
- [3] [4, 5]
-
- The `bx` is the absolute range for currently processed subsequence of
- `src` list. The `by` means the same, but for the `dst` list.
- """
- # Prevent useless comparisons in future
- bx = bx if bx[0] != bx[1] else None
- by = by if by[0] != by[1] else None
+ key += 1
+ return key
- if not src:
- return [None, by]
- elif not dst:
- return [bx, None]
+ def _on_undo_add(self, path, key):
+ if self.path == path:
+ if self.key > key:
+ self.key -= 1
+ else:
+ key += 1
+ return key
- # note that these ranges are relative for processed sublists
- x, y = _longest_common_subseq(src, dst)
+ def get(self):
+ return {'op': 'add', 'path': _path_join(self.path, self.key), 'value': self.value}
- if x is None or y is None: # no more any common subsequence
- return [bx, by]
+class _op_remove(_op_base):
+ def _on_undo_remove(self, path, key):
+ if self.path == path:
+ if self.key >= key:
+ self.key += 1
+ else:
+ key -= 1
+ return key
- return [_split_by_common_seq(src[:x[0]], dst[:y[0]],
- (bx[0], bx[0] + x[0]),
- (by[0], by[0] + y[0])),
- _split_by_common_seq(src[x[1]:], dst[y[1]:],
- (bx[0] + x[1], bx[0] + len(src)),
- (by[0] + y[1], by[0] + len(dst)))]
+ def _on_undo_add(self, path, key):
+ if self.path == path:
+ if self.key > key:
+ self.key -= 1
+ else:
+ key -= 1
+ return key
+ def get(self):
+ return {'op': 'remove', 'path': _path_join(self.path, self.key)}
-def _compare(path, src, dst, left, right):
- """Same as :func:`_compare_with_shift` but strips emitted `shift` value."""
- for op, _ in _compare_with_shift(path, src, dst, left, right, 0):
- yield op
+class _op_replace(_op_base):
+ def _on_undo_remove(self, path, key):
+ return key
+ def _on_undo_add(self, path, key):
+ return key
-def _compare_with_shift(path, src, dst, left, right, shift):
- """Recursively compares differences from `left` and `right` sides
- from common subsequences.
+ def get(self):
+ return {'op': 'replace', 'path': _path_join(self.path, self.key), 'value': self.value}
- The `shift` parameter is used to store index shift which caused
- by ``add`` and ``remove`` operations.
- Yields JSON patch operations and list index shift.
- """
- if isinstance(left, MutableSequence):
- for item, shift in _compare_with_shift(path, src, dst, *left,
- shift=shift):
- yield item, shift
- elif left is not None:
- for item, shift in _compare_left(path, src, left, shift):
- yield item, shift
-
- if isinstance(right, MutableSequence):
- for item, shift in _compare_with_shift(path, src, dst, *right,
- shift=shift):
- yield item, shift
- elif right is not None:
- for item, shift in _compare_right(path, dst, right, shift):
- yield item, shift
-
-
-def _compare_left(path, src, left, shift):
- """Yields JSON patch ``remove`` operations for elements that are only
- exists in the `src` list."""
- start, end = left
- if end == -1:
- end = len(src)
- # we need to `remove` elements from list tail to not deal with index shift
- for idx in reversed(range(start + shift, end + shift)):
- ptr = JsonPointer.from_parts(path + [str(idx)])
- yield (
- {'op': 'remove',
- # yes, there should be any value field, but we'll use it
- # to apply `move` optimization a bit later and will remove
- # it in _optimize function.
- 'value': src[idx - shift],
- 'path': ptr.path,
- },
- shift - 1
- )
- shift -= 1
-
-
-def _compare_right(path, dst, right, shift):
- """Yields JSON patch ``add`` operations for elements that are only
- exists in the `dst` list"""
- start, end = right
- if end == -1:
- end = len(dst)
- for idx in range(start, end):
- ptr = JsonPointer.from_parts(path + [str(idx)])
- yield (
- {'op': 'add', 'path': ptr.path, 'value': dst[idx]},
- shift + 1
- )
- shift += 1
-
-
-def _optimize(operations):
- """Optimizes operations which was produced by lists comparison.
-
- Actually it does two kinds of optimizations:
-
- 1. Seeks pair of ``remove`` and ``add`` operations against the same path
- and replaces them with ``replace`` operation.
- 2. Seeks pair of ``remove`` and ``add`` operations for the same value
- and replaces them with ``move`` operation.
- """
- result = []
- ops_by_path = {}
- ops_by_value = {}
- add_remove = set(['add', 'remove'])
- for item in operations:
- # could we apply "move" optimization for dict values?
- hashable_value = not isinstance(item['value'],
- (MutableMapping, MutableSequence))
- if item['path'] in ops_by_path:
- _optimize_using_replace(ops_by_path[item['path']], item)
- continue
- if hashable_value and item['value'] in ops_by_value:
- prev_item = ops_by_value[item['value']]
- # ensure that we processing pair of add-remove ops
- if set([item['op'], prev_item['op']]) == add_remove:
- _optimize_using_move(prev_item, item)
- ops_by_value.pop(item['value'])
+class _op_move(object):
+ def __init__(self, oldpath, oldkey, path, key):
+ self.oldpath = oldpath
+ self.oldkey = oldkey
+ self.path = path
+ self.key = key
+
+ def _on_undo_remove(self, path, key):
+ if self.oldpath == path:
+ if self.oldkey >= key:
+ self.oldkey += 1
+ else:
+ key -= 1
+ if self.path == path:
+ if self.key > key:
+ self.key += 1
+ else:
+ key += 1
+ return key
+
+ def _on_undo_add(self, path, key):
+ if self.oldpath == path:
+ if self.oldkey > key:
+ self.oldkey -= 1
+ else:
+ key -= 1
+ if self.path == path:
+ if self.key > key:
+ self.key -= 1
+ else:
+ key += 1
+ return key
+
+ def get(self):
+ return {'op': 'move', 'path': _path_join(self.path, self.key), 'from': _path_join(self.oldpath, self.oldkey)}
+
+ def __repr__(self):
+ return str(self.get())
+
+def _path_join(path, key):
+ if key != None:
+ return path + '/' + str(key).replace('~', '~0').replace('/', '~1')
+ return path
+
+def _item_added(path, key, info, item):
+ index = info.take_index(item, _ST_REMOVE)
+ if index != None:
+ op = index[2]
+ if type(op.key) == int:
+ for v in info.iter_from(index):
+ op.key = v._on_undo_remove(op.path, op.key)
+ info.remove(index)
+ if op.path != path or op.key != key:
+ new_op = _op_move(op.path, op.key, path, key)
+ info.insert(new_op)
+ else:
+ new_op = _op_add(path, key, item)
+ new_index = info.insert(new_op)
+ info.store_index(item, new_index, _ST_ADD)
+
+def _item_removed(path, key, info, item):
+ new_op = _op_remove(path, key, item)
+ index = info.take_index(item, _ST_ADD)
+ new_index = info.insert(new_op)
+ if index != None:
+ op = index[2]
+ if type(op.key) == int:
+ for v in info.iter_from(index):
+ op.key = v._on_undo_add(op.path, op.key)
+ info.remove(index)
+ if new_op.path != op.path or new_op.key != op.key:
+ new_op = _op_move(new_op.path, new_op.key, op.path, op.key)
+ new_index[2] = new_op
+ else:
+ info.remove(new_index)
+ else:
+ info.store_index(item, new_index, _ST_REMOVE)
+
+def _item_replaced(path, key, info, item):
+ info.insert(_op_replace(path, key, item))
+
+def _compare_dicts(path, info, src, dst):
+ src_keys = _viewkeys(src)
+ dst_keys = _viewkeys(dst)
+ added_keys = dst_keys - src_keys
+ removed_keys = src_keys - dst_keys
+ for key in removed_keys:
+ _item_removed(path, str(key), info, src[key])
+ for key in added_keys:
+ _item_added(path, str(key), info, dst[key])
+ for key in src_keys & dst_keys:
+ _compare_values(path, key, info, src[key], dst[key])
+
+def _compare_lists(path, info, src, dst):
+ len_src, len_dst = len(src), len(dst)
+ max_len = max(len_src, len_dst)
+ min_len = min(len_src, len_dst)
+ for key in _range(max_len):
+ if key < min_len:
+ old, new = src[key], dst[key]
+ if old == new:
continue
- result.append(item)
- ops_by_path[item['path']] = item
- if hashable_value:
- ops_by_value[item['value']] = item
-
- # cleanup
- ops_by_path.clear()
- ops_by_value.clear()
- for item in result:
- if item['op'] == 'remove':
- item.pop('value') # strip our hack
- yield item
-
-
-def _optimize_using_replace(prev, cur):
- """Optimises by replacing ``add``/``remove`` with ``replace`` on same path
-
- For nested strucures, tries to recurse replacement, see #36 """
- prev['op'] = 'replace'
- if cur['op'] == 'add':
- # make recursive patch
- patch = make_patch(prev['value'], cur['value'])
- # check case when dict "remove" is less than "add" and has a same key
- if isinstance(prev['value'], dict) and isinstance(cur['value'], dict) and len(prev['value'].keys()) == 1:
- prev_set = set(prev['value'].keys())
- cur_set = set(cur['value'].keys())
- if prev_set & cur_set == prev_set:
- patch = make_patch(cur['value'], prev['value'])
-
- if len(patch.patch) == 1 and \
- patch.patch[0]['op'] != 'remove' and \
- patch.patch[0]['path'] and patch.patch[0]['path'].split('/')[1] in prev['value']:
- prev['path'] = prev['path'] + patch.patch[0]['path']
- prev['value'] = patch.patch[0]['value']
+ _item_removed(path, key, info, old)
+ _item_added(path, key, info, new)
+ elif len_src > len_dst:
+ _item_removed(path, len_dst, info, src[key])
else:
- prev['value'] = cur['value']
-
-
-def _optimize_using_move(prev_item, item):
- """Optimises JSON patch by using ``move`` operation instead of
- ``remove` and ``add`` against the different paths but for the same value."""
- prev_item['op'] = 'move'
- move_from, move_to = [
- (item['path'], prev_item['path']),
- (prev_item['path'], item['path']),
- ][item['op'] == 'add']
- if item['op'] == 'add': # first was remove then add
- prev_item['from'] = move_from
- prev_item['path'] = move_to
- else: # first was add then remove
- head, move_from = move_from.rsplit('/', 1)
- # since add operation was first it incremented
- # overall index shift value. we have to fix this
- move_from = int(move_from) - 1
- prev_item['from'] = head + '/%d' % move_from
- prev_item['path'] = move_to
+ _item_added(path, key, info, dst[key])
+
+def _compare_values(path, key, info, src, dst):
+ if src == dst:
+ return
+ elif isinstance(src, dict) and \
+ isinstance(dst, dict):
+ _compare_dicts(_path_join(path, key), info, src, dst)
+ elif isinstance(src, list) and \
+ isinstance(dst, list):
+ _compare_lists(_path_join(path, key), info, src, dst)
+ else:
+ _item_replaced(path, key, info, dst)