summaryrefslogtreecommitdiff
path: root/tools/client-side/svn-vendor.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/client-side/svn-vendor.py')
-rwxr-xr-xtools/client-side/svn-vendor.py1065
1 files changed, 1065 insertions, 0 deletions
diff --git a/tools/client-side/svn-vendor.py b/tools/client-side/svn-vendor.py
new file mode 100755
index 0000000..d0c862c
--- /dev/null
+++ b/tools/client-side/svn-vendor.py
@@ -0,0 +1,1065 @@
+#!/usr/bin/python3
+# vim: set sw=4 expandtab :
+# ====================================================================
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# ====================================================================
+#
+##############################################################################
+# svn-vendor.py
+#
+# Overview
+# --------
+# Replacement for svn_load_dirs.pl (included as a 'contributed utility' in
+# Subversion sources). Main difference is some heuristics in detection of
+# the renames. Note that this script does not attempt to automate remote
+# SVN operations (check-out, check-in and tagging), so it is possible to
+# review the state of sources that are about to be checked in. Another
+# difference is an ability to save the detected renames, review/re-apply
+# them.
+#
+# This script requires Python 3.3.x or higher. Sorry, I was too lazy
+# to write shell quoting routines that are already available in recent
+# Python versions.
+#
+# Using this script
+# -----------------
+# First, it is necessary to check out the working copy from the URL that
+# will host the imported sources. E.g., if the versions of FOO are being
+# imported into svn://example.com/vendor/FOO/current:
+#
+# svn co svn://example.com/vendor/FOO/current wc
+#
+# Then, unpack the sources of the version to be imported:
+#
+# tar xzf foo-1.1.tar.gz
+#
+# Examples below assume the command above created a `foo-1.1' directory.
+# After that, there are three different modes of operation:
+#
+# 1. Fully automatic
+#
+# svn-vendor.py --auto wc foo-1.1
+# svn st wc
+# svn ci wc
+#
+# In this mode, the script fully relies on its heuristics in detection of
+# renames. In many cases, it "just works". There can be spurious moves
+# detected in this mode, though. For example, consider a deleted header
+# that consists of 50 lines of GPL text, 1 line of copyright, and
+# 3 lines of declarations, and a similar unrelated header in the imported
+# sources. From the script's point of view, the files are nearly identical
+# (4 lines removed, 4 lines added, 50 lines unchanged).
+#
+# After the script completes, examine the working copy by doing 'svn diff'
+# and/or 'svn status', paying particular attention to renames. If all the
+# moves are detected correctly, check in the changes in the working copy.
+#
+# 2. Semi-automatic
+#
+# svn-vendor.py --detect moves-foo-1.1.txt wc foo-1.1
+# vi moves-foo-1.1.txt
+# svn-vendor.py --apply moves-foo-1.1.txt wc foo-1.1
+# svn ci wc
+#
+# If the fully automatic mode mis-detected some spurious moves, or did not
+# detect some renames you want to be performed, it is still possible to
+# leverage what the script has detected automatically. First command above
+# does the automatic detection, just as it does in fully automatic mode,
+# but stops short of performing any modification of the working copy.
+# The list of detected copies and renames is saved into a text file,
+# `moves-foo-1.1.txt'.
+#
+# That file can be inspected after the script finishes. Spurious moves can
+# be deleted from the file, and new copies/renames can be added. Then the
+# changes can be applied to the working copy.
+#
+# 3. Manual
+#
+# svn-vendor.py wc foo-1.1
+# (svn-vendor) detect
+# (svn-vendor) move x.c y.c
+# (svn-vendor) move include/1.h include/2.h
+# (svn-vendor) copy include/3.h include/3-copy.h
+# (svn-vendor) lsprep
+# (svn-vendor) save /tmp/renames-to-be-applied.txt
+# (svn-vendor) apply
+#
+# If the automatic detection does not help, it is possible to do the renames
+# manually (similarly to svn_load_dirs.pl). Use the 'help' command to get
+# the list of supported commands and their description. Feel free to play
+# around - since the script does not perform any remote SVN operation,
+# there is no chance to commit the changes accidentally.
+#
+# Notes
+# -----
+# I. The time for rename detection O(Fs*Fd) + O(Ds*Dd), where Fs is
+# the number of files removed from current directory, Fd is number of files
+# added in imported sources, and Ds/Dd is the same for directories. That is,
+# the running time may become an issue if the numbers of added/removed files
+# go into a few thousands (e.g. if updating Linux kernel 2.6.35 to 3.10).
+# As a workaround, import interim releases first so that the number of
+# renames remains sane at each step. That makes reviewing the renames
+# performed by the script much easier.
+#
+# Enjoy!
+#
+##############################################################################
+
+import argparse
+import cmd
+import difflib
+import filecmp
+import os
+import readline
+import shlex
+import shutil
+import subprocess
+import sys
+
+def name_similarity(n1, n2):
+ '''
+ Function to be used as a key for sorting dirs/files by name matching
+ '''
+ sm = difflib.SequenceMatcher(a=n1, b=n2)
+ return 1.0 - sm.ratio()
+
+
+def filename_sort_key(s):
+ '''
+ Function to sort filenames so that parent directory is always followed
+ by its children. Without it, [ "/a", "/a-b", "/a/b", "/a-b/c" ] would
+ not be sorted correctly.
+ '''
+ return s.replace('/', '\001')
+
+
+def descendant_or_self(path, ancestor):
+ '''
+ Check if path is somewhere in hierarchy under ancestor.
+ '''
+ return path == ancestor or path.startswith(ancestor + os.sep)
+
+def path_rebase(path, old_base, new_base):
+ '''
+ Return a path name that has the same relative path to new_base as path
+ had to old_base. Assumes path is a descendant of old_base.
+ '''
+ if path == old_base:
+ return new_base
+ return os.path.normpath(os.path.join(new_base,
+ os.path.relpath(path, old_base)))
+
+
+def for_all_parents(path, func):
+ '''
+ Invoke func for each parent path.
+ '''
+ d = os.path.dirname(path)
+ while d != "":
+ func(d)
+ d = os.path.dirname(d)
+
+class InvalidUsageException(Exception):
+ '''
+ Raised if command line arguments are invalid
+ '''
+ def __init__(self, cmd, msg):
+ Exception.__init__(self, msg)
+ self.cmd = cmd
+
+
+class NotImplementedException(Exception):
+ '''
+ Raised if some code path is not implemented
+ '''
+ pass
+
+
+# Indexes into FSO.state
+S_WC = 0
+S_IM = 1
+
+class FSO(object):
+ '''
+ File system object (file/dir either in imported dir or in WC)
+ '''
+ def __init__(self):
+ self.wc_path = None
+ self.state = [ "-", "-" ] # '-': absent, 'F': file, 'D': dir
+
+ def status(self):
+ return "[%s%s]" % (self.state[S_WC], self.state[S_IM])
+
+ def orig_reference(self, curpath):
+ if self.wc_path and self.wc_path != curpath:
+ return " (original: %s)" % shlex.quote(self.wc_path)
+ return ""
+
+
+class FSOCollection(dict):
+ '''
+ Collection of FSOs
+ '''
+ def print(self):
+ print(" / Status in working copy (-:absent, F:file, D:dir)")
+ print(" |/ Status in imported sources (-:absent, F:file, D:dir)")
+ for k in sorted(self.keys(), key=filename_sort_key):
+ e = self[k]
+ print("%s %s%s" % (e.status(), shlex.quote(k),
+ e.orig_reference(k)))
+
+ def get(self, path):
+ 'Get existing FSO or create a new one'
+ if path in self:
+ return self[path]
+ e = FSO()
+ self[path] = e
+ return e
+
+ def add(self, path, where, kind):
+ 'Adding entries during initial scan'
+ path = os.path.normpath(path)
+ e = self.get(path)
+ e.state[where] = kind
+ if where == S_WC:
+ e.wc_path = path
+
+ def wc_copy(self, src, dst):
+ 'Handle move in a working copy'
+ keys = list(self.keys())
+ for k in keys:
+ if descendant_or_self(k, src):
+ esrc = self[k]
+ if esrc.state[S_WC] == "-":
+ continue
+ kn = path_rebase(k, src, dst)
+ edst = self.get(kn)
+ if edst.state[S_WC] != "-":
+ # Copying into existing destination.
+ # Caller should've checked this.
+ raise NotImplementedException
+ edst.wc_path = esrc.wc_path
+ edst.state[S_WC] = esrc.state[S_WC]
+
+ def wc_remove(self, path):
+ 'Handle removal in a working copy'
+ keys = list(self.keys())
+ for k in keys:
+ if descendant_or_self(k, path):
+ self[k].state[S_WC] = "-"
+
+
+class ConfigOpt(object):
+ 'Helper class - single option (string)'
+ def __init__(self, value, helpmsg):
+ self.value = value
+ self.helpmsg = helpmsg
+
+ def set(self, new_value):
+ self.value = new_value
+
+ def __str__(self):
+ return "<none>" if self.value is None else "`%s'" % self.value
+
+
+class ConfigOptInt(ConfigOpt):
+ 'Helper class - single option (integer)'
+ def set(self, new_value):
+ try:
+ self.value = int(new_value)
+ except ValueError:
+ raise InvalidUsageException(None, "Value must be integer")
+
+ def __str__(self):
+ return "%d" % self.value
+
+
+class Config(dict):
+ '''
+ Store configuration options.
+ '''
+ def add_option(self, name, cfgopt):
+ self[name] = cfgopt
+
+ def set(self, name, value):
+ if name not in self:
+ raise InvalidUsageException(None,
+ "Unknown config variable '%s'" % name)
+ self[name].set(value)
+
+ def get(self, name):
+ if name not in self:
+ raise NotImplementedException()
+ return self[name].value
+
+ def print(self):
+ for k in sorted(self):
+ o = self[k]
+ print("# %s" % o.helpmsg)
+ print("%-20s: %s" % (k, str(o)))
+ print()
+
+
+class SvnVndImport(cmd.Cmd):
+ '''
+ Main driving class.
+ '''
+ intro = "Welcome to SVN vendor import helper. " + \
+ "Type help or ? to list commands.\n"
+ prompt = "(svn-vendor) "
+ prepare_ops = []
+
+ def __init__(self, wcdir, importdir, svninfo):
+ cmd.Cmd.__init__(self)
+ self.wcdir = wcdir
+ self.importdir = importdir
+ self.svninfo = svninfo
+ self.config = Config()
+ self.config.add_option('save-diff-copied',
+ ConfigOpt(None, "Save 'svn diff' output on the " +
+ "moved/copied files and directories to this " +
+ "file as part of 'apply'"))
+ self.config.add_option('dir-similarity',
+ ConfigOptInt(600, "Similarity between dirs to assume " +
+ "a copy/move [0..1000]"))
+ self.config.add_option('file-similarity',
+ ConfigOptInt(600, "Similarity between files to assume a " +
+ "copy/move [0..1000]"))
+ self.config.add_option('file-min-lines',
+ ConfigOptInt(10, "Minimal number of lines in a file for " +
+ "meaningful comparison"))
+ self.config.add_option('verbose',
+ ConfigOptInt(3, "Verbosity of the output [0..5]"))
+ try:
+ self.termwidth = os.get_terminal_size()[0]
+ except OSError:
+ # Not running in a terminal - probably redirected to file
+ self.termwidth = 150 # arbitrary number
+
+ def info(self, level, msg):
+ 'Print message with specified verbosity'
+ if level <= self.config.get('verbose'):
+ print(msg, flush=True)
+
+ def scan(self):
+ self.items = FSOCollection()
+ self.info(1, "Scanning working copy directory...")
+ self.get_lists(self.wcdir, S_WC)
+ self.info(1, "Scanning imported directory...")
+ self.get_lists(self.importdir, S_IM)
+
+ def get_lists(self, top, where):
+ for d, dn, fn in os.walk(top, followlinks=True):
+ dr = os.path.relpath(d, top)
+ # If under .svn directory at the top (SVN 1.7+) or has .svn
+ # in the path (older SVN), ignore
+ if descendant_or_self(dr, '.svn') or \
+ os.path.basename(dr) == '.svn' or \
+ (os.sep + '.svn' + os.sep) in dr:
+ continue
+ if dr != '.':
+ self.items.add(dr, where, "D")
+ for f in fn:
+ fr = os.path.normpath(os.path.join(dr, f))
+ self.items.add(fr, where, "F")
+
+ def onecmd(self, str):
+ 'Override for checking number of arguments'
+ try:
+ return cmd.Cmd.onecmd(self, str)
+ except InvalidUsageException as e:
+ if e.cmd is not None:
+ print("!!! Invalid usage of `%s' command: %s" % (e.cmd, e))
+ print()
+ self.onecmd("help " + e.cmd)
+ else:
+ print("!!! %s" % e)
+
+ def parse_args(self, line, nargs, cmd):
+ 'Parse arguments for a command'
+ args = shlex.split(line)
+ if len(args) != nargs:
+ raise InvalidUsageException(cmd, "expect %d arguments" % nargs)
+ return args
+
+ def run_svn(self, args_fixed, args_split=[]):
+ 'Run SVN command(s), potentially splitting long argument lists'
+ rv = True
+ pos = 0
+ atatime = 100
+ output = ""
+ while pos < len(args_split) or (pos == 0 and len(args_split) == 0):
+ svnargs = ['svn'] + args_fixed + args_split[pos : pos + atatime]
+ pos += atatime
+ self.info(5, "Running: " + " ".join(map(shlex.quote, svnargs)))
+ p = subprocess.Popen(args=svnargs, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE, cwd=self.wcdir)
+ so, se = p.communicate()
+ if p.returncode != 0:
+ print("`%s' exited with %d status:" %
+ (" ".join(map(shlex.quote, svnargs)), p.returncode))
+ print(se.decode())
+ rv = False
+ else:
+ output += so.decode()
+ return rv, output
+
+ def copy_or_move(self, op, src, dst):
+ 'Handle copy or move operation'
+ if src not in self.items or self.items[src].state[S_WC] == "-":
+ raise InvalidUsageException(None,
+ "Nothing known about `%s'" % src)
+ if dst in self.items and self.items[dst].state[S_WC] != "-":
+ raise InvalidUsageException(None,
+ "Destination path `%s' already exists" % dst)
+ # Check that we're not creating dst under a file (not a dir)
+ new_dirs = []
+ def check_parent(d):
+ if d not in self.items or self.items[d].state[S_WC] == "-":
+ new_dirs.append(d)
+ elif self.items[d].state[S_WC] == "F":
+ raise InvalidUsageException(None,
+ "Destination path `%s' created under `%s' " +
+ "which is a file" % (dst, d))
+ for_all_parents(dst, check_parent)
+ # All ok, record new directories that may be created
+ for d in new_dirs:
+ self.items.get(d).state[S_WC] = "D"
+ # Record the operation and update the FSO collection
+ self.prepare_ops.append((op, src, dst))
+ self.items.wc_copy(src, dst)
+ if op == "mv":
+ self.items.wc_remove(src)
+
+ def remove(self, path):
+ if path not in self.items or self.items[path].state[S_WC] == "-":
+ raise InvalidUsageException(None,
+ "Nothing known about `%s'" % path)
+ self.prepare_ops.append(("rm", path))
+ self.items.wc_remove(path)
+
+ def similarity_file(self, src, dst, threshold, lst_removal):
+ 'Compare two files, return similarity ratio on 0..1000 scale'
+ if self.items[src].state[S_WC] != "F":
+ return 0
+ # Source is in working copy
+ fn1 = os.path.join(self.wcdir, self.items[src].wc_path)
+ # Destination is in imported dir
+ fn2 = os.path.join(self.importdir, dst)
+ minlines = self.config.get('file-min-lines')
+ try:
+ f1 = open(fn1, 'r')
+ l1 = f1.readlines()
+ f1.close()
+ if len(l1) < minlines:
+ return 0
+ f2 = open(fn2, 'r')
+ l2 = f2.readlines()
+ f2.close()
+ if len(l2) < minlines:
+ return 0
+ sm = difflib.SequenceMatcher(a=l1, b=l2)
+ return int(1000 * sm.quick_ratio())
+ except UnicodeDecodeError:
+ # Oops, file seems to be binary. Fall back to comparing whole
+ # file contents.
+ if filecmp.cmp(fn1, fn2, shallow=False):
+ return 1000
+ return 0
+
+ def _similarity_dir(self, src, dst, get_file_similarity, lst_removal):
+ 'Iterate over FSOs, using callback to compare file entries'
+ common = 0
+ total = 0
+ for xsrc in self.items:
+ if xsrc.startswith(src + os.sep):
+ esrc = self.items[xsrc]
+ if esrc.state[S_WC] == "-":
+ # Source not in WC - ignore for similarity calculation
+ continue
+ skip = False
+ if lst_removal is not None:
+ for i in lst_removal:
+ if descendant_or_self(xsrc, i):
+ skip = True
+ if skip:
+ # Moved to another place, do not consider in score
+ continue
+ total += 1000
+ xdst = path_rebase(xsrc, src, dst)
+ if xdst not in self.items:
+ # Destination not in imported sources - non-similar item
+ continue
+ edst = self.items[xdst]
+ if edst.state[S_IM] == esrc.state[S_WC]:
+ if esrc.state[S_WC] == "D":
+ common += 1000
+ else:
+ common += get_file_similarity(xsrc, xdst)
+ if total == 0:
+ # No files/subdirs in source directory - avoid copying empty dirs
+ return 0
+ return 1000 * common / total
+
+ def similarity_dir(self, src, dst, threshold, lst_removal):
+ '''
+ Compare two dirs recursively, return similarity ratio on
+ 0..1000 scale.
+ '''
+ common = 0
+ total = 0
+ # Quickly estimate upper boundary by comparing file names. Only
+ # concern ourselves with files in source directory. I.e., if
+ # files were added after the move in the destination directory,
+ # it's ok. If most of the files from the source directory were
+ # removed, the directory is not considered similar - instead,
+ # file move detection would move files one by one.
+ upper = self._similarity_dir(src, dst, lambda s, d: 1000, lst_removal)
+ if upper <= threshold:
+ # Even the best estimate is worse than current cut-off
+ return 0
+ # Okay, looks roughly similar. Now redo the above procedure, but also
+ # compare the file content.
+ return self._similarity_dir(src, dst,
+ lambda s, d: self.similarity_file(s, d, 0, lst_removal),
+ lst_removal)
+
+ def similar(self, src, dst, threshold=0, lst_removal=None):
+ 'Compare two FSOs, source in WC and destination in imported dir'
+ if src not in self.items:
+ print("Source `%s' not in the working copy" % src)
+ return
+ xsrc = self.items[src]
+ if xsrc.state[S_WC] == "-":
+ print("Source `%s' not in the working copy" % src)
+ return
+ if dst not in self.items:
+ print("Destination `%s' not in imported sources" % dst)
+ return
+ xdst = self.items[dst]
+ if xdst.state[S_IM] == "-":
+ print("Destination `%s' not in imported sources" % dst)
+ return
+ if xsrc.state[S_WC] != xdst.state[S_IM]:
+ # Different kinds - definitely not the same object
+ return 0
+ if xsrc.state[S_WC] == "D":
+ return self.similarity_dir(src, dst, threshold, lst_removal)
+ else:
+ return self.similarity_file(src, dst, threshold, lst_removal)
+
+ def handle_op(self, op_tuple):
+ 'Handle one SVN operation, recorded as a tuple'
+ def x_mv(src, dst):
+ self.info(2, " Move `%s' to `%s'" % (src, dst))
+ self.copy_or_move("mv", src, dst)
+ def x_cp(src, dst):
+ self.info(2, " Copy `%s' to `%s'" % (src, dst))
+ self.copy_or_move("cp", src, dst)
+ def x_rm(path):
+ self.info(2, " Remove `%s'" % path)
+ self.remove(path)
+ known_ops = {
+ # key: (nargs, handler)
+ 'cp' : (3, x_cp),
+ 'mv' : (3, x_mv),
+ 'rm' : (2, x_rm),
+ }
+ if len(op_tuple) == 0:
+ raise InvalidUsageException
+ op = op_tuple[0]
+ if op not in known_ops:
+ return False
+ nargs, func = known_ops[op]
+ if nargs != len(op_tuple):
+ return False
+ func(*op_tuple[1:])
+ return True
+
+ def detect(self, thresholds):
+ 'Helper for finding copy/move destinations'
+ ilst = []
+ wlst = {}
+ ilst_map = {}
+ for p in self.items:
+ e = self.items[p]
+ if e.state[S_WC] != "-" and e.state[S_IM] == "-":
+ wlst[p] = [] # wlst hash stores copy destinations
+ elif e.state[S_WC] == "-" and e.state[S_IM] != "-":
+ # ilst just lists destination paths as tuples with node kind
+ ilst.append((e.state[S_IM], p))
+ iteration = 0
+ # Do not apply operations immediately - we'll need to post-process
+ # them to account for files/dirs moved inside a moved parent dir.
+ ops = []
+ to_be_removed = []
+ def get_renamed_name(path, rename_ops):
+ '''
+ Check if path was renamed/removed in the recorded operations,
+ return new name.
+ '''
+ for op_tuple in rename_ops:
+ # Since copies do not remove the source file, ignore them.
+ # We push no 'rm' ops in this function
+ if op_tuple[0] == "mv":
+ src = op_tuple[1]
+ dst = op_tuple[2]
+ if descendant_or_self(path, src):
+ path = path_rebase(path, src, dst)
+ return path
+
+ while len(wlst):
+ iteration += 1
+ self.info(2, ("Iteration %d: Possible sources: %d, " +
+ "possible destinations: %d") %
+ (iteration, len(wlst), len(ilst)))
+ ndst = len(ilst)
+ for idx, (nk, dst) in enumerate(sorted(ilst,
+ key=lambda s: filename_sort_key(s[1]))):
+ class SkipDestFile(Exception):
+ pass
+ # Check if moved as a part of a parent directory.
+ def check_moved_parent(xdst):
+ if xdst in ilst_map:
+ src = path_rebase(dst, xdst, ilst_map[xdst])
+ # Did it exist in copied directory?
+ if src in self.items and \
+ self.items[src].state[S_WC] == nk:
+ sim = self.similar(src, dst, thresholds[nk],
+ to_be_removed)
+ if sim > thresholds[nk]:
+ self.info(2, (" [%04d/%04d] Skipping `%s' " +
+ "(copied as part of `%s')") %
+ (idx, ndst, dst, xdst))
+ raise SkipDestFile
+ # Copied, not similar - search for other sources
+ raise StopIteration
+ try:
+ for_all_parents(dst, check_moved_parent)
+ except SkipDestFile:
+ continue
+ except StopIteration:
+ pass
+ self.info(2, (" [%04d/%04d] Looking for possible source " +
+ "for `%s'") % (idx, ndst, dst))
+ bestsrc = None
+ # Won't even consider those lower than threshold
+ bestsim = thresholds[nk]
+ for src in sorted(wlst.keys(),
+ key=lambda x: name_similarity(x, dst)):
+ sim = self.similar(src, dst, bestsim, to_be_removed)
+ if sim > bestsim:
+ self.info(3, " [similarity %4d] %s" % (sim, src))
+ bestsim = sim
+ bestsrc = src
+ if bestsim == 1000:
+ # No chance we're finding anything better
+ break
+ if bestsrc is not None:
+ wlst[bestsrc].append(dst)
+ ilst_map[dst] = bestsrc
+
+ # Discovered all copies/moves, now record them.
+ new_wlst = {}
+ for src in sorted(wlst.keys(), key=filename_sort_key):
+ dlist = wlst[src]
+ if len(dlist) == 0:
+ continue
+ if len(dlist) == 1:
+ ops.append(("mv", src, dlist[0]))
+ to_be_removed.append(src)
+ else:
+ # We don't remove the source here, it will be done when
+ # the changes are applied (it will remove all the WC files
+ # not found in imported sources). Avoiding removal here
+ # simplifies operation sorting below, since we would not
+ # be concerned with source file/dir disappearing before
+ # it is copied to its destination.
+ to_be_removed.append(src)
+ for d in dlist:
+ ops.append(("cp", src, d))
+ # If we copied something - recheck parent source directories.
+ # Since some source file/dir was scheduled to be removed,
+ # this may have increased the similarity to some destination.
+ def recheck_parent(x):
+ if x in wlst and len(wlst) == 0:
+ new_wlst[x] = []
+ for_all_parents(src, recheck_parent)
+
+ # At this point, if we're going to have the next iteration, we
+ # are only concerned about directories (by the way new_wlst is
+ # created above). So, filter out all files from ilst as well.
+ wlst = new_wlst
+ ilst = list(filter(lambda t: t[0] == 'D', ilst))
+
+ # Finished collecting the operations - now can post-process and
+ # apply them. First, sort copies/moves by destination (so that
+ # parent directories are created before files/subdirs are
+ # copied/renamed inside)
+ ops = sorted(ops, key=lambda op: filename_sort_key(op[2]))
+ for i, op_tuple in enumerate(ops):
+ # For each operation, go over its precedents to see if the source
+ # has been renamed. If it is, find out new name.
+ op = op_tuple[0]
+ src = get_renamed_name(op_tuple[1], reversed(ops[:i]))
+ if src != op_tuple[2]:
+ # Unless it became the same file after renames
+ try:
+ # Try to remove the destination, if it existed
+ self.remove(op_tuple[2])
+ except InvalidUsageException:
+ # Okay, it didn't exist
+ pass
+ self.handle_op((op, src, op_tuple[2]))
+
+ def do_detect(self, arg):
+ '''
+ detect : auto-detect possible moves (where source/destination name
+ is unique). If not all moves are applicable, save move list,
+ edit and load.
+ '''
+ self.parse_args(arg, 0, "detect")
+ self.detect({ "D": self.config.get('dir-similarity'),
+ "F": self.config.get('file-similarity')})
+
+ def do_apply(self, arg):
+ '''
+ apply : Perform copies/renames; then copy imported sources into
+ the working copy. Modifies working copy. Exits after
+ completion.
+ '''
+ self.info(1, "Copying imported sources into working copy...")
+ # Perform the recorded copies/moves/removals
+ self.info(2, " Preparatory operations (copies/renames/removals)")
+ to_be_diffed = []
+ for o in self.prepare_ops:
+ op = o[0]
+ if op == "mv":
+ self.run_svn(["mv", "--parents", o[1], o[2]])
+ to_be_diffed.append(o[2])
+ elif op == "cp":
+ self.run_svn(["cp", "--parents", o[1], o[2]])
+ to_be_diffed.append(o[2])
+ elif op == "rm":
+ # --force, as the removed path is likely created as a result
+ # of previous copy/rename
+ self.run_svn(["rm", "--force", o[1]])
+ dirs_added = []
+ dirs_removed = []
+ files_added = []
+ files_removed = []
+ self.info(2, " Creating dirs and copying files...")
+ for i in sorted(self.items.keys()):
+ e = self.items[i]
+ nk_wc = e.state[S_WC]
+ nk_im = e.state[S_IM]
+ flg = None
+ if nk_wc == "-":
+ # Absent in working copy
+ if nk_im == "D":
+ # Directory added
+ os.mkdir(os.path.join(self.wcdir, i))
+ dirs_added.append(i)
+ flg = "(added dir)"
+ elif nk_im == "F":
+ # New file added
+ shutil.copyfile(os.path.join(self.importdir, i),
+ os.path.join(self.wcdir, i))
+ files_added.append(i)
+ flg = "(added file)"
+ else:
+ # Not in imported sources, not in WC (moved
+ # away/removed) - nothing to do
+ pass
+ elif nk_wc == "F":
+ # File in a working copy
+ if nk_im == "D":
+ # File replaced with a directory. See comment above.
+ self.run_svn(["rm", "--force", i])
+ os.mkdir(os.path.join(self.wcdir, i))
+ dirs_added.append(i)
+ flg = "(replaced file with dir)"
+ elif nk_im == "F":
+ # Was a file, is a file - just copy contents
+ shutil.copyfile(os.path.join(self.importdir, i),
+ os.path.join(self.wcdir, i))
+ flg = "(copied)"
+ else:
+ # Was a file, removed
+ files_removed.append(i)
+ flg = "(removed file)"
+ elif nk_wc == "D":
+ # Directory in a working copy
+ if nk_im == "D":
+ # Was a directory, is a directory - nothing to do
+ pass
+ elif nk_im == "F":
+ # Directory replaced with file. Need to remove dir
+ # immediately, as bulk removals/additions assume new files
+ # and dirs already in place.
+ self.run_svn(["rm", "--force", i])
+ shutil.copyfile(os.path.join(self.importdir, i),
+ os.path.join(self.wcdir, i))
+ files_added.append(i)
+ flg = "(replaced dir with file)"
+ else:
+ # Directory removed
+ dirs_removed.append(i)
+ flg = "(removed dir)"
+ if flg is not None:
+ self.info(4, " %s %s %s" % (e.status(), i, flg))
+ # Filter files/directories removed as a part of parent directory
+ files_removed = list(filter(lambda x: os.path.dirname(x) not in
+ dirs_removed, files_removed))
+ dirs_removed = list(filter(lambda x: os.path.dirname(x) not in
+ dirs_removed, dirs_removed))
+ files_added = list(filter(lambda x: os.path.dirname(x) not in
+ dirs_added, files_added))
+ dirs_added = list(filter(lambda x: os.path.dirname(x) not in
+ dirs_added, dirs_added))
+ self.info(2, " Running SVN add/rm commands");
+ if len(dirs_added):
+ self.run_svn(["add"], dirs_added)
+ if len(files_added):
+ self.run_svn(["add"], files_added)
+ if len(dirs_removed):
+ self.run_svn(["rm"], dirs_removed)
+ if len(files_removed):
+ self.run_svn(["rm"], files_removed)
+ # Save the diff for the copied/moved items
+ diff_save = self.config.get('save-diff-copied')
+ if diff_save is not None:
+ self.info(2, " Saving 'svn diff' on copied files/dirs to `%s'" %
+ diff_save)
+ to_be_diffed = list(filter(lambda x: os.path.dirname(x) not in
+ to_be_diffed, to_be_diffed))
+ if len(to_be_diffed):
+ try:
+ rv, out = self.run_svn(["diff"], to_be_diffed)
+ except UnicodeDecodeError:
+ # Some binary files not marked with appropriate MIME type,
+ # or broken text files
+ rv, out = (True, "WARNING: diff contained binary files\n")
+ else:
+ rv, out = (True, "")
+ if rv:
+ f = open(diff_save, "w")
+ f.write(out)
+ f.close()
+ # Exiting, as the resulting working copy can no longer be used
+ # for move analysis
+ self.info(1, "Done. Exiting; please examine the working copy " +
+ "and commit.")
+ return True
+
+ def do_similarity(self, arg):
+ '''
+ similarity SRD DST : estimate whether SRC could be potential source
+ for DST (0=no match, 1000=perfect match)
+ '''
+ src, dst = self.parse_args(arg, 2, "similarity")
+ sim = self.similar(src, dst)
+ if sim is not None:
+ print("Similarity between source `%s' and destination `%s': %4d" %
+ (src, dst, sim))
+
+ def do_set(self, arg):
+ '''
+ set : display current settings
+ set CFG VAL : set a config variable
+ '''
+ if arg.strip() == '':
+ self.config.print()
+ else:
+ cfg, val = self.parse_args(arg, 2, "set")
+ self.config.set(cfg, val)
+
+ def do_move(self, arg):
+ '''
+ move SRC DST : Perform a move from source to destination
+ '''
+ src, dst = self.parse_args(arg, 2, "move")
+ self.copy_or_move("mv", src, dst)
+
+ def do_copy(self, arg):
+ '''
+ copy SRC DST : Perform a copy from source to destination
+ '''
+ src, dst = self.parse_args(arg, 2, "copy")
+ self.copy_or_move("cp", src, dst)
+
+ def do_remove(self, arg):
+ '''
+ remove PATH : Remove a path
+ '''
+ path = self.parse_args(arg, 1, "remove")[0]
+ self.copy_or_move("rm", path)
+
+ def do_lsprep(self, arg):
+ '''
+ lsprep : List the currently recorded moves/copies/removals
+ '''
+ self.parse_args(arg, 0, "lsprep")
+ colsz = int((self.termwidth - 14) / 2)
+ if len(self.prepare_ops):
+ print("Currently recorded preparatory operations:")
+ print()
+ print("%5s %s %-*s %-*s" %
+ ("#", "Op", colsz, "Source", colsz, "Destination"))
+ for id, o in enumerate(self.prepare_ops):
+ if id % 10 == 0:
+ print("%5s %s %*s %*s" %
+ ("-"*5, "--", colsz, "-"*colsz, colsz, "-"*colsz))
+ if len(o) == 3:
+ print("%5d %s %-*s %-*s" %
+ (id, o[0], colsz, o[1], colsz, o[2]))
+ else:
+ print("%5d %s %-*s" % (id, o[0], colsz, o[1]))
+ print()
+ else:
+ print("No copies/moves/removals recorded")
+ print()
+
+ def do_save(self, arg):
+ '''
+ save FILENAME : Save current preparation operations to a file
+ '''
+ fn = self.parse_args(arg, 1, "save")[0]
+ f = open(fn, 'w')
+ longestname = 0
+ for o in self.prepare_ops:
+ if len(o[1]) > longestname:
+ longestname = len(o[1])
+ if len(o) == 3 and len(o[2]) > longestname:
+ longestname = len(o[2])
+ for o in self.prepare_ops:
+ if len(o) == 2:
+ f.write("svn %s %-*s\n" %
+ (o[0], longestname, shlex.quote(o[1])))
+ else:
+ f.write("svn %s %-*s %-*s\n" %
+ (o[0], longestname, shlex.quote(o[1]),
+ longestname, shlex.quote(o[2])))
+ pass
+ f.close()
+
+ def do_load(self, arg):
+ '''
+ load FILENAME : Load/append preparation operations from a file
+ '''
+ fn = self.parse_args(arg, 1, "load")[0]
+ self.info(1, "Performing operations from `%s'" % fn)
+ f = open(fn, 'r')
+ for l in f.readlines():
+ if l[0] == '#':
+ continue
+ args = shlex.split(l)
+ try:
+ if len(args) < 2 or args[0] != 'svn':
+ raise InvalidUsageException(None, "")
+ self.handle_op(args[1:])
+ except InvalidUsageException as e:
+ # Rethrow
+ raise InvalidUsageException(None,
+ "Invalid line in file: %s(%s)" % (l, e))
+ f.close()
+
+ def do_svninfo(self, arg):
+ '''
+ svninfo : Display SVN info on the working copy (debug)
+ '''
+ self.parse_args(arg, 0, "svninfo")
+ print(str(self.svninfo))
+
+ def do_printlst(self, arg):
+ '''
+ printlst WHAT : Print list of files; WHAT is one of {dir,file} (debug)
+ '''
+ self.parse_args(arg, 0, "printlst")
+ self.items.print()
+
+ def do_help(self, arg):
+ '''
+ help [COMMAND] : Print the help message
+ '''
+ cmd.Cmd.do_help(self, arg)
+
+ def do_EOF(self, arg):
+ '''
+ Quit the script
+ '''
+ return True
+
+ def do_quit(self, arg):
+ '''
+ quit : Quit the script
+ '''
+ return True
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(
+ description="Prepare a working copy for SVN vendor import.")
+ parser.add_argument('wcdir',
+ help="Path to working copy (destination of import)")
+ parser.add_argument('importdir',
+ help="Path to imported sources (source of import)")
+ grp = parser.add_mutually_exclusive_group()
+ grp.add_argument('--auto', action='store_true',
+ help="Automatic mode: detect moves, apply them and copy sources")
+ grp.add_argument('--detect', metavar='FILE',
+ help="Semi-automatic mode: detect moves and save them to FILE")
+ grp.add_argument('--apply', metavar='FILE',
+ help="Semi-automatic mode: apply the moves from FILE " +
+ "and copy the sources")
+ parser.add_argument('--save', metavar='FILE',
+ help="Automatic mode: save moves to FILE after detection, " +
+ "then proceed to apply the changes")
+ parser.add_argument('--config', metavar=('OPT','VALUE'), action='append',
+ nargs=2, help="Set configuration option OPT to VALUE")
+ args = parser.parse_args()
+ p = subprocess.Popen(args=['svn', 'info', args.wcdir],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ so, se = p.communicate()
+ if p.returncode != 0:
+ print("%s: does not appear to be SVN working copy." % args.wcdir)
+ print("`svn info' exited with status %d and returned:" % p.returncode)
+ print()
+ print(se.decode())
+ sys.exit(1)
+ imp = SvnVndImport(args.wcdir, args.importdir, so.decode())
+ if args.config:
+ try:
+ for o, v in args.config:
+ imp.config.set(o, v)
+ except InvalidUsageException as e:
+ parser.error(e)
+ imp.scan()
+ if args.auto:
+ imp.onecmd("detect")
+ if args.save:
+ imp.onecmd("save " + shlex.quote(args.save))
+ imp.onecmd("apply")
+ elif args.detect:
+ imp.onecmd("detect")
+ imp.onecmd("save " + shlex.quote(args.detect))
+ elif args.apply:
+ imp.onecmd("load " + shlex.quote(args.apply))
+ imp.onecmd("apply")
+ else:
+ imp.cmdloop()