From 196460716ac1fc3d84dee403cfa68386261998f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20K=C3=B6gl?= Date: Sun, 10 Sep 2017 13:25:47 +0200 Subject: Drop in patch creation from jsondiff https://github.com/nxsofsys/jsondiff --- jsonpatch.py | 519 +++++++++++++++++++++++++++++------------------------------ 1 file changed, 256 insertions(+), 263 deletions(-) diff --git a/jsonpatch.py b/jsonpatch.py index cee4820..d1d091f 100644 --- a/jsonpatch.py +++ b/jsonpatch.py @@ -42,6 +42,22 @@ import itertools import json import sys + +if sys.version_info[0] >= 3: + _range = range + _viewkeys = dict.keys +else: + _range = xrange + if sys.version_info[1] >= 7: + _viewkeys = dict.viewkeys + else: + _viewkeys = lambda x: set(dict.keys(x)) + + +_ST_ADD = 0 +_ST_REMOVE = 1 + + try: from collections.abc import MutableMapping, MutableSequence except ImportError: @@ -300,41 +316,11 @@ class JsonPatch(object): >>> new == dst True """ - def compare_values(path, value, other): - if value == other: - return - if isinstance(value, MutableMapping) and \ - isinstance(other, MutableMapping): - for operation in compare_dicts(path, value, other): - yield operation - elif isinstance(value, MutableSequence) and \ - isinstance(other, MutableSequence): - for operation in compare_lists(path, value, other): - yield operation - else: - ptr = JsonPointer.from_parts(path) - yield {'op': 'replace', 'path': ptr.path, 'value': other} - - def compare_dicts(path, src, dst): - for key in src: - if key not in dst: - ptr = JsonPointer.from_parts(path + [key]) - yield {'op': 'remove', 'path': ptr.path} - continue - current = path + [key] - for operation in compare_values(current, src[key], dst[key]): - yield operation - for key in dst: - if key not in src: - ptr = JsonPointer.from_parts(path + [key]) - yield {'op': 'add', - 'path': ptr.path, - 'value': dst[key]} - - def compare_lists(path, src, dst): - return _compare_lists(path, src, dst, optimization=optimization) - return cls(list(compare_values([], src, dst))) + info = _compare_info() + _compare_values('', None, info, src, dst) + ops = [op for op in info.execute()] + return cls(ops) def to_string(self): """Returns patch set as JSON string.""" @@ -573,248 +559,255 @@ class CopyOperation(PatchOperation): return obj -def _compare_lists(path, src, dst, optimization=True): - """Compares two lists objects and return JSON patch about.""" - patch = list(_compare(path, src, dst, *_split_by_common_seq(src, dst))) - if optimization: - return list(_optimize(patch)) - return patch +class _compare_info(object): + def __init__(self): + self.index_storage = [{}, {}] + self.index_storage2 = [[], []] + self.__root = root = [] + root[:] = [root, root, None] -def _longest_common_subseq(src, dst): - """Returns pair of ranges of longest common subsequence for the `src` - and `dst` lists. + def store_index(self, value, index, st): + try: + storage = self.index_storage[st] + stored = storage.get(value) + if stored == None: + storage[value] = [index] + else: + storage[value].append(index) + except TypeError: + self.index_storage2[st].append((value, index)) - >>> src = [1, 2, 3, 4] - >>> dst = [0, 1, 2, 3, 5] - >>> # The longest common subsequence for these lists is [1, 2, 3] - ... # which is located at (0, 3) index range for src list and (1, 4) for - ... # dst one. Tuple of these ranges we should get back. - ... assert ((0, 3), (1, 4)) == _longest_common_subseq(src, dst) - """ - lsrc, ldst = len(src), len(dst) - drange = list(range(ldst)) - matrix = [[0] * ldst for _ in range(lsrc)] - z = 0 # length of the longest subsequence - range_src, range_dst = None, None - for i, j in itertools.product(range(lsrc), drange): - if src[i] == dst[j]: - if i == 0 or j == 0: - matrix[i][j] = 1 + def take_index(self, value, st): + try: + stored = self.index_storage[st].get(value) + if stored: + return stored.pop() + except TypeError: + storage = self.index_storage2[st] + for i in range(len(storage)-1, -1, -1): + if storage[i][0] == value: + return storage.pop(i)[1] + + def insert(self, op): + root = self.__root + last = root[0] + last[1] = root[0] = [last, root, op] + return root[0] + + def remove(self, index): + link_prev, link_next, _ = index + link_prev[1] = link_next + link_next[0] = link_prev + index[:] = [] + + def iter_from(self, start): + root = self.__root + curr = start[1] + while curr is not root: + yield curr[2] + curr = curr[1] + + def __iter__(self): + root = self.__root + curr = root[1] + while curr is not root: + yield curr[2] + curr = curr[1] + + def execute(self): + root = self.__root + curr = root[1] + while curr is not root: + if curr[1] is not root: + op_first, op_second = curr[2], curr[1][2] + if op_first.key == op_second.key and \ + op_first.path == op_second.path and \ + type(op_first) == _op_remove and \ + type(op_second) == _op_add: + yield _op_replace(op_second.path, op_second.key, op_second.value).get() + curr = curr[1][1] + continue + yield curr[2].get() + curr = curr[1] + +class _op_base(object): + def __init__(self, path, key, value): + self.path = path + self.key = key + self.value = value + + def __repr__(self): + return str(self.get()) + +class _op_add(_op_base): + def _on_undo_remove(self, path, key): + if self.path == path: + if self.key > key: + self.key += 1 else: - matrix[i][j] = matrix[i-1][j-1] + 1 - if matrix[i][j] > z: - z = matrix[i][j] - range_src = (i-z+1, i+1) - range_dst = (j-z+1, j+1) - else: - matrix[i][j] = 0 - return range_src, range_dst - - -def _split_by_common_seq(src, dst, bx=(0, -1), by=(0, -1)): - """Recursively splits the `dst` list onto two parts: left and right. - The left part contains differences on left from common subsequence, - same as the right part by for other side. - - To easily understand the process let's take two lists: [0, 1, 2, 3] as - `src` and [1, 2, 4, 5] for `dst`. If we've tried to generate the binary tree - where nodes are common subsequence for both lists, leaves on the left - side are subsequence for `src` list and leaves on the right one for `dst`, - our tree would looks like:: - - [1, 2] - / \ - [0] [] - / \ - [3] [4, 5] - - This function generate the similar structure as flat tree, but without - nodes with common subsequences - since we're don't need them - only with - left and right leaves:: - - [] - / \ - [0] [] - / \ - [3] [4, 5] - - The `bx` is the absolute range for currently processed subsequence of - `src` list. The `by` means the same, but for the `dst` list. - """ - # Prevent useless comparisons in future - bx = bx if bx[0] != bx[1] else None - by = by if by[0] != by[1] else None + key += 1 + return key - if not src: - return [None, by] - elif not dst: - return [bx, None] + def _on_undo_add(self, path, key): + if self.path == path: + if self.key > key: + self.key -= 1 + else: + key += 1 + return key - # note that these ranges are relative for processed sublists - x, y = _longest_common_subseq(src, dst) + def get(self): + return {'op': 'add', 'path': _path_join(self.path, self.key), 'value': self.value} - if x is None or y is None: # no more any common subsequence - return [bx, by] +class _op_remove(_op_base): + def _on_undo_remove(self, path, key): + if self.path == path: + if self.key >= key: + self.key += 1 + else: + key -= 1 + return key - return [_split_by_common_seq(src[:x[0]], dst[:y[0]], - (bx[0], bx[0] + x[0]), - (by[0], by[0] + y[0])), - _split_by_common_seq(src[x[1]:], dst[y[1]:], - (bx[0] + x[1], bx[0] + len(src)), - (by[0] + y[1], by[0] + len(dst)))] + def _on_undo_add(self, path, key): + if self.path == path: + if self.key > key: + self.key -= 1 + else: + key -= 1 + return key + def get(self): + return {'op': 'remove', 'path': _path_join(self.path, self.key)} -def _compare(path, src, dst, left, right): - """Same as :func:`_compare_with_shift` but strips emitted `shift` value.""" - for op, _ in _compare_with_shift(path, src, dst, left, right, 0): - yield op +class _op_replace(_op_base): + def _on_undo_remove(self, path, key): + return key + def _on_undo_add(self, path, key): + return key -def _compare_with_shift(path, src, dst, left, right, shift): - """Recursively compares differences from `left` and `right` sides - from common subsequences. + def get(self): + return {'op': 'replace', 'path': _path_join(self.path, self.key), 'value': self.value} - The `shift` parameter is used to store index shift which caused - by ``add`` and ``remove`` operations. - Yields JSON patch operations and list index shift. - """ - if isinstance(left, MutableSequence): - for item, shift in _compare_with_shift(path, src, dst, *left, - shift=shift): - yield item, shift - elif left is not None: - for item, shift in _compare_left(path, src, left, shift): - yield item, shift - - if isinstance(right, MutableSequence): - for item, shift in _compare_with_shift(path, src, dst, *right, - shift=shift): - yield item, shift - elif right is not None: - for item, shift in _compare_right(path, dst, right, shift): - yield item, shift - - -def _compare_left(path, src, left, shift): - """Yields JSON patch ``remove`` operations for elements that are only - exists in the `src` list.""" - start, end = left - if end == -1: - end = len(src) - # we need to `remove` elements from list tail to not deal with index shift - for idx in reversed(range(start + shift, end + shift)): - ptr = JsonPointer.from_parts(path + [str(idx)]) - yield ( - {'op': 'remove', - # yes, there should be any value field, but we'll use it - # to apply `move` optimization a bit later and will remove - # it in _optimize function. - 'value': src[idx - shift], - 'path': ptr.path, - }, - shift - 1 - ) - shift -= 1 - - -def _compare_right(path, dst, right, shift): - """Yields JSON patch ``add`` operations for elements that are only - exists in the `dst` list""" - start, end = right - if end == -1: - end = len(dst) - for idx in range(start, end): - ptr = JsonPointer.from_parts(path + [str(idx)]) - yield ( - {'op': 'add', 'path': ptr.path, 'value': dst[idx]}, - shift + 1 - ) - shift += 1 - - -def _optimize(operations): - """Optimizes operations which was produced by lists comparison. - - Actually it does two kinds of optimizations: - - 1. Seeks pair of ``remove`` and ``add`` operations against the same path - and replaces them with ``replace`` operation. - 2. Seeks pair of ``remove`` and ``add`` operations for the same value - and replaces them with ``move`` operation. - """ - result = [] - ops_by_path = {} - ops_by_value = {} - add_remove = set(['add', 'remove']) - for item in operations: - # could we apply "move" optimization for dict values? - hashable_value = not isinstance(item['value'], - (MutableMapping, MutableSequence)) - if item['path'] in ops_by_path: - _optimize_using_replace(ops_by_path[item['path']], item) - continue - if hashable_value and item['value'] in ops_by_value: - prev_item = ops_by_value[item['value']] - # ensure that we processing pair of add-remove ops - if set([item['op'], prev_item['op']]) == add_remove: - _optimize_using_move(prev_item, item) - ops_by_value.pop(item['value']) +class _op_move(object): + def __init__(self, oldpath, oldkey, path, key): + self.oldpath = oldpath + self.oldkey = oldkey + self.path = path + self.key = key + + def _on_undo_remove(self, path, key): + if self.oldpath == path: + if self.oldkey >= key: + self.oldkey += 1 + else: + key -= 1 + if self.path == path: + if self.key > key: + self.key += 1 + else: + key += 1 + return key + + def _on_undo_add(self, path, key): + if self.oldpath == path: + if self.oldkey > key: + self.oldkey -= 1 + else: + key -= 1 + if self.path == path: + if self.key > key: + self.key -= 1 + else: + key += 1 + return key + + def get(self): + return {'op': 'move', 'path': _path_join(self.path, self.key), 'from': _path_join(self.oldpath, self.oldkey)} + + def __repr__(self): + return str(self.get()) + +def _path_join(path, key): + if key != None: + return path + '/' + str(key).replace('~', '~0').replace('/', '~1') + return path + +def _item_added(path, key, info, item): + index = info.take_index(item, _ST_REMOVE) + if index != None: + op = index[2] + if type(op.key) == int: + for v in info.iter_from(index): + op.key = v._on_undo_remove(op.path, op.key) + info.remove(index) + if op.path != path or op.key != key: + new_op = _op_move(op.path, op.key, path, key) + info.insert(new_op) + else: + new_op = _op_add(path, key, item) + new_index = info.insert(new_op) + info.store_index(item, new_index, _ST_ADD) + +def _item_removed(path, key, info, item): + new_op = _op_remove(path, key, item) + index = info.take_index(item, _ST_ADD) + new_index = info.insert(new_op) + if index != None: + op = index[2] + if type(op.key) == int: + for v in info.iter_from(index): + op.key = v._on_undo_add(op.path, op.key) + info.remove(index) + if new_op.path != op.path or new_op.key != op.key: + new_op = _op_move(new_op.path, new_op.key, op.path, op.key) + new_index[2] = new_op + else: + info.remove(new_index) + else: + info.store_index(item, new_index, _ST_REMOVE) + +def _item_replaced(path, key, info, item): + info.insert(_op_replace(path, key, item)) + +def _compare_dicts(path, info, src, dst): + src_keys = _viewkeys(src) + dst_keys = _viewkeys(dst) + added_keys = dst_keys - src_keys + removed_keys = src_keys - dst_keys + for key in removed_keys: + _item_removed(path, str(key), info, src[key]) + for key in added_keys: + _item_added(path, str(key), info, dst[key]) + for key in src_keys & dst_keys: + _compare_values(path, key, info, src[key], dst[key]) + +def _compare_lists(path, info, src, dst): + len_src, len_dst = len(src), len(dst) + max_len = max(len_src, len_dst) + min_len = min(len_src, len_dst) + for key in _range(max_len): + if key < min_len: + old, new = src[key], dst[key] + if old == new: continue - result.append(item) - ops_by_path[item['path']] = item - if hashable_value: - ops_by_value[item['value']] = item - - # cleanup - ops_by_path.clear() - ops_by_value.clear() - for item in result: - if item['op'] == 'remove': - item.pop('value') # strip our hack - yield item - - -def _optimize_using_replace(prev, cur): - """Optimises by replacing ``add``/``remove`` with ``replace`` on same path - - For nested strucures, tries to recurse replacement, see #36 """ - prev['op'] = 'replace' - if cur['op'] == 'add': - # make recursive patch - patch = make_patch(prev['value'], cur['value']) - # check case when dict "remove" is less than "add" and has a same key - if isinstance(prev['value'], dict) and isinstance(cur['value'], dict) and len(prev['value'].keys()) == 1: - prev_set = set(prev['value'].keys()) - cur_set = set(cur['value'].keys()) - if prev_set & cur_set == prev_set: - patch = make_patch(cur['value'], prev['value']) - - if len(patch.patch) == 1 and \ - patch.patch[0]['op'] != 'remove' and \ - patch.patch[0]['path'] and patch.patch[0]['path'].split('/')[1] in prev['value']: - prev['path'] = prev['path'] + patch.patch[0]['path'] - prev['value'] = patch.patch[0]['value'] + _item_removed(path, key, info, old) + _item_added(path, key, info, new) + elif len_src > len_dst: + _item_removed(path, len_dst, info, src[key]) else: - prev['value'] = cur['value'] - - -def _optimize_using_move(prev_item, item): - """Optimises JSON patch by using ``move`` operation instead of - ``remove` and ``add`` against the different paths but for the same value.""" - prev_item['op'] = 'move' - move_from, move_to = [ - (item['path'], prev_item['path']), - (prev_item['path'], item['path']), - ][item['op'] == 'add'] - if item['op'] == 'add': # first was remove then add - prev_item['from'] = move_from - prev_item['path'] = move_to - else: # first was add then remove - head, move_from = move_from.rsplit('/', 1) - # since add operation was first it incremented - # overall index shift value. we have to fix this - move_from = int(move_from) - 1 - prev_item['from'] = head + '/%d' % move_from - prev_item['path'] = move_to + _item_added(path, key, info, dst[key]) + +def _compare_values(path, key, info, src, dst): + if src == dst: + return + elif isinstance(src, dict) and \ + isinstance(dst, dict): + _compare_dicts(_path_join(path, key), info, src, dst) + elif isinstance(src, list) and \ + isinstance(dst, list): + _compare_lists(_path_join(path, key), info, src, dst) + else: + _item_replaced(path, key, info, dst) -- cgit v1.2.1