diff options
Diffstat (limited to 'exporters/darcs/darcs-fast-export')
-rwxr-xr-x | exporters/darcs/darcs-fast-export | 380 |
1 files changed, 0 insertions, 380 deletions
diff --git a/exporters/darcs/darcs-fast-export b/exporters/darcs/darcs-fast-export deleted file mode 100755 index fa850de..0000000 --- a/exporters/darcs/darcs-fast-export +++ /dev/null @@ -1,380 +0,0 @@ -#!/usr/bin/env python - -""" - - darcs-fast-export - darcs backend for fast data importers - - Copyright (c) 2008, 2009 Miklos Vajna <vmiklos@frugalware.org> - Copyright (c) 2008 Matthias Andree <matthias.andree@gmx.de> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -""" - -import xml.dom.minidom -import xml.parsers.expat -import os -import sys -import gzip -import time -import calendar -import shutil -import subprocess -import optparse -import re -import urllib -import urllib2 -import StringIO - -sys = reload(sys) -sys.setdefaultencoding("utf-8") - -class Handler: - def __init__(self): - self.hashes = [] - self.authormap = {} - self.export_marks = [] - self.import_marks = [] - - def get_patchname(self, patch): - ret = [] - s = "" - if patch.attributes['inverted'].value == 'True': - s = "UNDO: " - cs = patch.getElementsByTagName("name")[0].childNodes - if cs.length > 0: - ret.append(s + cs[0].data) - lines = patch.getElementsByTagName("comment") - if lines: - for i in lines[0].childNodes[0].data.split('\n'): - if not i.startswith("Ignore-this: "): - ret.append(i) - return "\n".join(ret).encode('utf-8') - - def get_author(self, patch): - """darcs allows any freeform string, but fast-import has a more - strict format, so fix up broken author names here.""" - - author = patch.attributes['author'].value - if author in self.authormap: - author = self.authormap[author] - if not len(author): - author = "darcs-fast-export <darcs-fast-export>" - # add missing name - elif not ">" in author: - author = "%s <%s>" % (author.split('@')[0], author) - # avoid double quoting - elif author[0] == '"' and author[-1] == '"': - author = author[1:-1] - # name after email - elif author[-1] != '>': - author = author[author.index('>')+2:] + ' ' + author[:author.index('>')+1] - return author.encode('utf-8') - - def get_date(self, patch): - try: - date = time.strptime(patch, "%Y%m%d%H%M%S") - except ValueError: - date = time.strptime(patch[:19] + patch[-5:], '%a %b %d %H:%M:%S %Y') - return calendar.timegm(date) - - def progress(self, s): - print "progress [%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s) - sys.stdout.flush() - - def log(self, s): - self.logsock.write("[%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s)) - self.logsock.flush() - - def parse_inventory(self, sock=None): - prev = None - nextprev = False - buf = [] - if not sock: - sock = self.open(os.path.join(self.origin, "_darcs", "hashed_inventory")) - for i in sock.readlines(): - if i.startswith("hash"): - buf.insert(0, i[6:-1]) - if i.startswith("Starting with inventory:"): - nextprev = True - elif nextprev: - prev = i[:-1] - nextprev = False - sock.close() - for i in buf: - self.hashes.insert(0, i) - if prev: - sock = self.gzip_open(os.path.join(self.origin, "_darcs", "inventories", prev)) - self.parse_inventory(sock) - - # this is like gzip.open but supports urls as well - def gzip_open(self, path): - if os.path.exists(path): - return gzip.open(path) - buf = urllib.urlopen(path).read() - sock = StringIO.StringIO(buf) - return gzip.GzipFile(fileobj=sock) - - # this is like os.path.exists but supports urls as well - def path_exists(self, path): - if os.path.exists(path): - return True - else: - try: - urllib2.urlopen(urllib2.Request(path)) - return True - except urllib2.HTTPError, e: - return False - - # this is like open, but supports urls as well - def open(self, path): - if os.path.exists(path): - return open(path) - else: - return urllib.urlopen(path) - - def handle_opts(self): - # Option Parser - usage="%prog [options] darcsrepo" - opp = optparse.OptionParser(usage=usage) - opp.add_option("--import-marks", metavar="IFILE", - help="read state for incremental imports from IFILE") - opp.add_option("--export-marks", metavar="OFILE", - help="write state for incremental imports from OFILE") - opp.add_option("--encoding", - help="encoding of log [default: %default], if unspecified and input isn't utf-8, guess") - opp.add_option("--authors-file", metavar="F", - help="read author transformations in old=new format from F") - opp.add_option("--working", metavar="W", - help="working directory which is removed at the end of non-incremental conversions") - opp.add_option("--logfile", metavar="L", - help="log file which contains the output of external programs invoked during the conversion") - opp.add_option("--git-branch", metavar="B", - help="git branch [default: refs/heads/master]") - opp.add_option("--progress", metavar="P", - help="insert progress statements after every n commit [default: 100]") - (self.options, self.args) = opp.parse_args() - if len(self.args) < 1: - opp.error("darcsrepo required") - - # read author mapping file in gitauthors format, - # i. e. in=out (one per # line) - if self.options.authors_file: - sock = open(self.options.authors_file) - self.authormap = dict([i.strip().split('=',1) for i in sock]) - sock.close() - - if "://" not in self.args[0]: - self.origin = os.path.abspath(self.args[0]) - else: - self.origin = self.args[0].strip('/') - if self.options.working: - self.working = os.path.abspath(self.options.working) - else: - if "://" not in self.origin: - self.working = "%s.darcs" % self.origin - else: - self.working = "%s.darcs" % os.path.split(self.origin)[-1] - if self.options.logfile: - logfile = os.path.abspath(self.options.logfile) - else: - if "://" not in self.origin: - logfile = "%s.log" % self.origin - else: - logfile = "%s.log" % os.path.split(self.origin)[-1] - self.logsock = open(logfile, "a") - if self.options.git_branch: - self.git_branch = self.options.git_branch - else: - self.git_branch = "refs/heads/master" - - if self.options.progress: - self.prognum = int(self.options.progress) - else: - self.prognum = 100 - - def handle_import_marks(self): - if self.options.import_marks: - sock = open(self.options.import_marks) - for i in sock.readlines(): - line = i.strip() - if not len(line): - continue - self.import_marks.append(line.split(' ')[1]) - self.export_marks.append(line) - sock.close() - - def get_patches(self): - self.progress("getting list of patches") - if not len(self.import_marks): - sock = os.popen("darcs changes --xml --reverse --repo %s" % self.origin) - else: - sock = os.popen("darcs changes --xml --reverse --repo %s --from-match 'hash %s'" % (self.origin, self.import_marks[-1])) - buf = sock.read() - sock.close() - # this is hackish. we need to escape some bad chars, otherwise the xml - # will not be valid - buf = buf.replace('\x1b', '^[') - if self.options.encoding: - xmldoc = xml.dom.minidom.parseString(unicode(buf, self.options.encoding).encode('utf-8')) - else: - try: - xmldoc = xml.dom.minidom.parseString(buf) - except xml.parsers.expat.ExpatError: - try: - import chardet - except ImportError: - sys.exit("Error, encoding is not utf-8. Please " + - "either specify it with the --encoding " + - "option or install chardet.") - self.progress("encoding is not utf8, guessing charset") - encoding = chardet.detect(buf)['encoding'] - self.progress("detected encoding is %s" % encoding) - xmldoc = xml.dom.minidom.parseString(unicode(buf, encoding).encode('utf-8')) - sys.stdout.flush() - return xmldoc.getElementsByTagName('patch') - - def setup_workdir(self): - darcs2 = False - self.oldfashionedpatch = True - self.cwd = os.getcwd() - if self.path_exists(os.path.join(self.origin, "_darcs", "format")): - sock = self.open(os.path.join(self.origin, "_darcs", "format")) - format = [x.strip() for x in sock] - sock.close() - darcs2 = 'darcs-2' in format - self.oldfashionedpatch = not 'hashed' in format - if not self.oldfashionedpatch: - self.progress("parsing the inventory") - if "://" not in self.origin: - os.chdir(self.origin) - self.parse_inventory() - if not self.options.import_marks or not os.path.exists(self.working): - # init the tmp darcs repo - os.mkdir(self.working) - os.chdir(self.working) - if darcs2: - os.system("darcs init --darcs-2") - else: - os.system("darcs init --old-fashioned-inventory") - else: - os.chdir(self.working) - if self.options.import_marks: - sock = os.popen("darcs pull -a --match 'hash %s' %s" % (self.import_marks[-1], self.origin)) - self.log("Building/updating working directory:\n%s" % sock.read()) - sock.close() - - def export_patches(self): - patches = self.get_patches() - # this is the number of the NEXT patch - count = 1 - if len(self.import_marks): - patches = patches[1:] - count = len(self.import_marks) + 1 - if len(self.export_marks): - # this is the mark number of the NEXT patch - markcount = int(self.export_marks[-1].split(' ')[0][1:]) + 1 - else: - markcount = count - # this may be huge and we need it many times - patchnum = len(patches) - - if not len(self.import_marks): - self.progress("starting export, repo has %d patches" % patchnum) - else: - self.progress("continuing export, %d patches to convert" % patchnum) - paths = [] - for i in patches: - # apply the patch - hash = i.attributes['hash'].value - buf = ["\nNew patches:\n"] - if self.oldfashionedpatch: - sock = self.gzip_open(os.path.join(self.origin, "_darcs", "patches", hash)) - else: - sock = self.gzip_open(os.path.join(self.origin, "_darcs", "patches", self.hashes[count-1])) - buf.append(sock.read()) - sock.close() - sock = os.popen("darcs changes --context") - buf.append(sock.read()) - sock.close() - sock = subprocess.Popen(["darcs", "apply", "--allow-conflicts"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) - sock.stdin.write("".join(buf)) - sock.stdin.close() - self.log("Applying %s:\n%s" % (hash, sock.stdout.read())) - sock.stdout.close() - message = self.get_patchname(i) - # export the commit - print "commit %s" % self.git_branch - print "mark :%s" % markcount - if self.options.export_marks: - self.export_marks.append(":%s %s" % (markcount, hash)) - date = self.get_date(i.attributes['date'].value) - print "committer %s %s +0000" % (self.get_author(i), date) - print "data %d\n%s" % (len(message), message) - if markcount > 1: - print "from :%s" % (markcount-1) - # export the files - for j in paths: - print "D %s" % j - paths = [] - for (root, dirs, files) in os.walk ("."): - for f in files: - j = os.path.normpath(os.path.join(root, f)) - if j.startswith("_darcs") or "-darcs-backup" in j: - continue - paths.append(j) - sock = open(j) - buf = sock.read() - sock.close() - # darcs does not track the executable bit :/ - print "M 644 inline %s" % j - print "data %s\n%s" % (len(buf), buf) - if message[:4] == "TAG ": - tag = re.sub('[^\xe9-\xf8\w.\-]+', '_', message[4:].strip().split('\n')[0]).strip('_') - print "tag %s" % tag - print "from :%s" % markcount - print "tagger %s %s +0000" % (self.get_author(i), date) - print "data %d\n%s" % (len(message), message) - if count % self.prognum == 0: - self.progress("%d/%d patches" % (count, patchnum)) - count += 1 - markcount += 1 - - os.chdir(self.cwd) - - if not self.options.export_marks: - shutil.rmtree(self.working) - self.logsock.close() - - def handle_export_marks(self): - if self.options.export_marks: - self.progress("writing export marks") - sock = open(self.options.export_marks, 'w') - sock.write("\n".join(self.export_marks)) - sock.write("\n") - sock.close() - - self.progress("finished") - - def handle(self): - self.handle_opts() - self.handle_import_marks() - self.setup_workdir() - self.export_patches() - self.handle_export_marks() - -if __name__ == "__main__": - h = Handler() - h.handle() |