diff options
author | Lorry <lorry@roadtrain.codethink.co.uk> | 2012-08-22 14:49:51 +0100 |
---|---|---|
committer | Lorry <lorry@roadtrain.codethink.co.uk> | 2012-08-22 14:49:51 +0100 |
commit | a498da43c7fdb9f24b73680c02a4a3588cc62d9a (patch) | |
tree | daf8119dae1749b5165b68033a1b23a7375ce9ce /mercurial/copies.py | |
download | mercurial-tarball-a498da43c7fdb9f24b73680c02a4a3588cc62d9a.tar.gz |
Tarball conversion
Diffstat (limited to 'mercurial/copies.py')
-rw-r--r-- | mercurial/copies.py | 372 |
1 files changed, 372 insertions, 0 deletions
diff --git a/mercurial/copies.py b/mercurial/copies.py new file mode 100644 index 0000000..90aa036 --- /dev/null +++ b/mercurial/copies.py @@ -0,0 +1,372 @@ +# copies.py - copy detection for Mercurial +# +# Copyright 2008 Matt Mackall <mpm@selenic.com> +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +import util +import heapq + +def _nonoverlap(d1, d2, d3): + "Return list of elements in d1 not in d2 or d3" + return sorted([d for d in d1 if d not in d3 and d not in d2]) + +def _dirname(f): + s = f.rfind("/") + if s == -1: + return "" + return f[:s] + +def _findlimit(repo, a, b): + """Find the earliest revision that's an ancestor of a or b but not both, + None if no such revision exists. + """ + # basic idea: + # - mark a and b with different sides + # - if a parent's children are all on the same side, the parent is + # on that side, otherwise it is on no side + # - walk the graph in topological order with the help of a heap; + # - add unseen parents to side map + # - clear side of any parent that has children on different sides + # - track number of interesting revs that might still be on a side + # - track the lowest interesting rev seen + # - quit when interesting revs is zero + + cl = repo.changelog + working = len(cl) # pseudo rev for the working directory + if a is None: + a = working + if b is None: + b = working + + side = {a: -1, b: 1} + visit = [-a, -b] + heapq.heapify(visit) + interesting = len(visit) + hascommonancestor = False + limit = working + + while interesting: + r = -heapq.heappop(visit) + if r == working: + parents = [cl.rev(p) for p in repo.dirstate.parents()] + else: + parents = cl.parentrevs(r) + for p in parents: + if p < 0: + continue + if p not in side: + # first time we see p; add it to visit + side[p] = side[r] + if side[p]: + interesting += 1 + heapq.heappush(visit, -p) + elif side[p] and side[p] != side[r]: + # p was interesting but now we know better + side[p] = 0 + interesting -= 1 + hascommonancestor = True + if side[r]: + limit = r # lowest rev visited + interesting -= 1 + + if not hascommonancestor: + return None + return limit + +def _chain(src, dst, a, b): + '''chain two sets of copies a->b''' + t = a.copy() + for k, v in b.iteritems(): + if v in t: + # found a chain + if t[v] != k: + # file wasn't renamed back to itself + t[k] = t[v] + if v not in dst: + # chain was a rename, not a copy + del t[v] + if v in src: + # file is a copy of an existing file + t[k] = v + + # remove criss-crossed copies + for k, v in t.items(): + if k in src and v in dst: + del t[k] + + return t + +def _tracefile(fctx, actx): + '''return file context that is the ancestor of fctx present in actx''' + stop = actx.rev() + am = actx.manifest() + + for f in fctx.ancestors(): + if am.get(f.path(), None) == f.filenode(): + return f + if f.rev() < stop: + return None + +def _dirstatecopies(d): + ds = d._repo.dirstate + c = ds.copies().copy() + for k in c.keys(): + if ds[k] not in 'anm': + del c[k] + return c + +def _forwardcopies(a, b): + '''find {dst@b: src@a} copy mapping where a is an ancestor of b''' + + # check for working copy + w = None + if b.rev() is None: + w = b + b = w.p1() + if a == b: + # short-circuit to avoid issues with merge states + return _dirstatecopies(w) + + # find where new files came from + # we currently don't try to find where old files went, too expensive + # this means we can miss a case like 'hg rm b; hg cp a b' + cm = {} + for f in b: + if f not in a: + ofctx = _tracefile(b[f], a) + if ofctx: + cm[f] = ofctx.path() + + # combine copies from dirstate if necessary + if w is not None: + cm = _chain(a, w, cm, _dirstatecopies(w)) + + return cm + +def _backwardcopies(a, b): + # because the forward mapping is 1:n, we can lose renames here + # in particular, we find renames better than copies + f = _forwardcopies(b, a) + r = {} + for k, v in f.iteritems(): + r[v] = k + return r + +def pathcopies(x, y): + '''find {dst@y: src@x} copy mapping for directed compare''' + if x == y or not x or not y: + return {} + a = y.ancestor(x) + if a == x: + return _forwardcopies(x, y) + if a == y: + return _backwardcopies(x, y) + return _chain(x, y, _backwardcopies(x, a), _forwardcopies(a, y)) + +def mergecopies(repo, c1, c2, ca): + """ + Find moves and copies between context c1 and c2 that are relevant + for merging. + + Returns two dicts, "copy" and "diverge". + + "copy" is a mapping from destination name -> source name, + where source is in c1 and destination is in c2 or vice-versa. + + "diverge" is a mapping of source name -> list of destination names + for divergent renames. + + "renamedelete" is a mapping of source name -> list of destination + names for files deleted in c1 that were renamed in c2 or vice-versa. + """ + # avoid silly behavior for update from empty dir + if not c1 or not c2 or c1 == c2: + return {}, {}, {} + + # avoid silly behavior for parent -> working dir + if c2.node() is None and c1.node() == repo.dirstate.p1(): + return repo.dirstate.copies(), {}, {} + + limit = _findlimit(repo, c1.rev(), c2.rev()) + if limit is None: + # no common ancestor, no copies + return {}, {}, {} + m1 = c1.manifest() + m2 = c2.manifest() + ma = ca.manifest() + + def makectx(f, n): + if len(n) != 20: # in a working context? + if c1.rev() is None: + return c1.filectx(f) + return c2.filectx(f) + return repo.filectx(f, fileid=n) + + ctx = util.lrucachefunc(makectx) + copy = {} + fullcopy = {} + diverge = {} + + def related(f1, f2, limit): + # Walk back to common ancestor to see if the two files originate + # from the same file. Since workingfilectx's rev() is None it messes + # up the integer comparison logic, hence the pre-step check for + # None (f1 and f2 can only be workingfilectx's initially). + + if f1 == f2: + return f1 # a match + + g1, g2 = f1.ancestors(), f2.ancestors() + try: + f1r, f2r = f1.rev(), f2.rev() + + if f1r is None: + f1 = g1.next() + if f2r is None: + f2 = g2.next() + + while True: + f1r, f2r = f1.rev(), f2.rev() + if f1r > f2r: + f1 = g1.next() + elif f2r > f1r: + f2 = g2.next() + elif f1 == f2: + return f1 # a match + elif f1r == f2r or f1r < limit or f2r < limit: + return False # copy no longer relevant + except StopIteration: + return False + + def checkcopies(f, m1, m2): + '''check possible copies of f from m1 to m2''' + of = None + seen = set([f]) + for oc in ctx(f, m1[f]).ancestors(): + ocr = oc.rev() + of = oc.path() + if of in seen: + # check limit late - grab last rename before + if ocr < limit: + break + continue + seen.add(of) + + fullcopy[f] = of # remember for dir rename detection + if of not in m2: + continue # no match, keep looking + if m2[of] == ma.get(of): + break # no merge needed, quit early + c2 = ctx(of, m2[of]) + cr = related(oc, c2, ca.rev()) + if cr and (of == f or of == c2.path()): # non-divergent + copy[f] = of + of = None + break + + if of in ma: + diverge.setdefault(of, []).append(f) + + repo.ui.debug(" searching for copies back to rev %d\n" % limit) + + u1 = _nonoverlap(m1, m2, ma) + u2 = _nonoverlap(m2, m1, ma) + + if u1: + repo.ui.debug(" unmatched files in local:\n %s\n" + % "\n ".join(u1)) + if u2: + repo.ui.debug(" unmatched files in other:\n %s\n" + % "\n ".join(u2)) + + for f in u1: + checkcopies(f, m1, m2) + for f in u2: + checkcopies(f, m2, m1) + + renamedelete = {} + renamedelete2 = set() + diverge2 = set() + for of, fl in diverge.items(): + if len(fl) == 1 or of in c1 or of in c2: + del diverge[of] # not actually divergent, or not a rename + if of not in c1 and of not in c2: + # renamed on one side, deleted on the other side, but filter + # out files that have been renamed and then deleted + renamedelete[of] = [f for f in fl if f in c1 or f in c2] + renamedelete2.update(fl) # reverse map for below + else: + diverge2.update(fl) # reverse map for below + + if fullcopy: + repo.ui.debug(" all copies found (* = to merge, ! = divergent, " + "% = renamed and deleted):\n") + for f in fullcopy: + note = "" + if f in copy: + note += "*" + if f in diverge2: + note += "!" + if f in renamedelete2: + note += "%" + repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note)) + del diverge2 + + if not fullcopy: + return copy, diverge, renamedelete + + repo.ui.debug(" checking for directory renames\n") + + # generate a directory move map + d1, d2 = c1.dirs(), c2.dirs() + d1.add('') + d2.add('') + invalid = set() + dirmove = {} + + # examine each file copy for a potential directory move, which is + # when all the files in a directory are moved to a new directory + for dst, src in fullcopy.iteritems(): + dsrc, ddst = _dirname(src), _dirname(dst) + if dsrc in invalid: + # already seen to be uninteresting + continue + elif dsrc in d1 and ddst in d1: + # directory wasn't entirely moved locally + invalid.add(dsrc) + elif dsrc in d2 and ddst in d2: + # directory wasn't entirely moved remotely + invalid.add(dsrc) + elif dsrc in dirmove and dirmove[dsrc] != ddst: + # files from the same directory moved to two different places + invalid.add(dsrc) + else: + # looks good so far + dirmove[dsrc + "/"] = ddst + "/" + + for i in invalid: + if i in dirmove: + del dirmove[i] + del d1, d2, invalid + + if not dirmove: + return copy, diverge, renamedelete + + for d in dirmove: + repo.ui.debug(" dir %s -> %s\n" % (d, dirmove[d])) + + # check unaccounted nonoverlapping files against directory moves + for f in u1 + u2: + if f not in fullcopy: + for d in dirmove: + if f.startswith(d): + # new file added in a directory that was moved, move it + df = dirmove[d] + f[len(d):] + if df not in copy: + copy[f] = df + repo.ui.debug(" file %s -> %s\n" % (f, copy[f])) + break + + return copy, diverge, renamedelete |