summaryrefslogtreecommitdiff
path: root/exporters/darcs/darcs-fast-export
diff options
context:
space:
mode:
Diffstat (limited to 'exporters/darcs/darcs-fast-export')
-rwxr-xr-xexporters/darcs/darcs-fast-export380
1 files changed, 0 insertions, 380 deletions
diff --git a/exporters/darcs/darcs-fast-export b/exporters/darcs/darcs-fast-export
deleted file mode 100755
index fa850de..0000000
--- a/exporters/darcs/darcs-fast-export
+++ /dev/null
@@ -1,380 +0,0 @@
-#!/usr/bin/env python
-
-"""
-
- darcs-fast-export - darcs backend for fast data importers
-
- Copyright (c) 2008, 2009 Miklos Vajna <vmiklos@frugalware.org>
- Copyright (c) 2008 Matthias Andree <matthias.andree@gmx.de>
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-"""
-
-import xml.dom.minidom
-import xml.parsers.expat
-import os
-import sys
-import gzip
-import time
-import calendar
-import shutil
-import subprocess
-import optparse
-import re
-import urllib
-import urllib2
-import StringIO
-
-sys = reload(sys)
-sys.setdefaultencoding("utf-8")
-
-class Handler:
- def __init__(self):
- self.hashes = []
- self.authormap = {}
- self.export_marks = []
- self.import_marks = []
-
- def get_patchname(self, patch):
- ret = []
- s = ""
- if patch.attributes['inverted'].value == 'True':
- s = "UNDO: "
- cs = patch.getElementsByTagName("name")[0].childNodes
- if cs.length > 0:
- ret.append(s + cs[0].data)
- lines = patch.getElementsByTagName("comment")
- if lines:
- for i in lines[0].childNodes[0].data.split('\n'):
- if not i.startswith("Ignore-this: "):
- ret.append(i)
- return "\n".join(ret).encode('utf-8')
-
- def get_author(self, patch):
- """darcs allows any freeform string, but fast-import has a more
- strict format, so fix up broken author names here."""
-
- author = patch.attributes['author'].value
- if author in self.authormap:
- author = self.authormap[author]
- if not len(author):
- author = "darcs-fast-export <darcs-fast-export>"
- # add missing name
- elif not ">" in author:
- author = "%s <%s>" % (author.split('@')[0], author)
- # avoid double quoting
- elif author[0] == '"' and author[-1] == '"':
- author = author[1:-1]
- # name after email
- elif author[-1] != '>':
- author = author[author.index('>')+2:] + ' ' + author[:author.index('>')+1]
- return author.encode('utf-8')
-
- def get_date(self, patch):
- try:
- date = time.strptime(patch, "%Y%m%d%H%M%S")
- except ValueError:
- date = time.strptime(patch[:19] + patch[-5:], '%a %b %d %H:%M:%S %Y')
- return calendar.timegm(date)
-
- def progress(self, s):
- print "progress [%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s)
- sys.stdout.flush()
-
- def log(self, s):
- self.logsock.write("[%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s))
- self.logsock.flush()
-
- def parse_inventory(self, sock=None):
- prev = None
- nextprev = False
- buf = []
- if not sock:
- sock = self.open(os.path.join(self.origin, "_darcs", "hashed_inventory"))
- for i in sock.readlines():
- if i.startswith("hash"):
- buf.insert(0, i[6:-1])
- if i.startswith("Starting with inventory:"):
- nextprev = True
- elif nextprev:
- prev = i[:-1]
- nextprev = False
- sock.close()
- for i in buf:
- self.hashes.insert(0, i)
- if prev:
- sock = self.gzip_open(os.path.join(self.origin, "_darcs", "inventories", prev))
- self.parse_inventory(sock)
-
- # this is like gzip.open but supports urls as well
- def gzip_open(self, path):
- if os.path.exists(path):
- return gzip.open(path)
- buf = urllib.urlopen(path).read()
- sock = StringIO.StringIO(buf)
- return gzip.GzipFile(fileobj=sock)
-
- # this is like os.path.exists but supports urls as well
- def path_exists(self, path):
- if os.path.exists(path):
- return True
- else:
- try:
- urllib2.urlopen(urllib2.Request(path))
- return True
- except urllib2.HTTPError, e:
- return False
-
- # this is like open, but supports urls as well
- def open(self, path):
- if os.path.exists(path):
- return open(path)
- else:
- return urllib.urlopen(path)
-
- def handle_opts(self):
- # Option Parser
- usage="%prog [options] darcsrepo"
- opp = optparse.OptionParser(usage=usage)
- opp.add_option("--import-marks", metavar="IFILE",
- help="read state for incremental imports from IFILE")
- opp.add_option("--export-marks", metavar="OFILE",
- help="write state for incremental imports from OFILE")
- opp.add_option("--encoding",
- help="encoding of log [default: %default], if unspecified and input isn't utf-8, guess")
- opp.add_option("--authors-file", metavar="F",
- help="read author transformations in old=new format from F")
- opp.add_option("--working", metavar="W",
- help="working directory which is removed at the end of non-incremental conversions")
- opp.add_option("--logfile", metavar="L",
- help="log file which contains the output of external programs invoked during the conversion")
- opp.add_option("--git-branch", metavar="B",
- help="git branch [default: refs/heads/master]")
- opp.add_option("--progress", metavar="P",
- help="insert progress statements after every n commit [default: 100]")
- (self.options, self.args) = opp.parse_args()
- if len(self.args) < 1:
- opp.error("darcsrepo required")
-
- # read author mapping file in gitauthors format,
- # i. e. in=out (one per # line)
- if self.options.authors_file:
- sock = open(self.options.authors_file)
- self.authormap = dict([i.strip().split('=',1) for i in sock])
- sock.close()
-
- if "://" not in self.args[0]:
- self.origin = os.path.abspath(self.args[0])
- else:
- self.origin = self.args[0].strip('/')
- if self.options.working:
- self.working = os.path.abspath(self.options.working)
- else:
- if "://" not in self.origin:
- self.working = "%s.darcs" % self.origin
- else:
- self.working = "%s.darcs" % os.path.split(self.origin)[-1]
- if self.options.logfile:
- logfile = os.path.abspath(self.options.logfile)
- else:
- if "://" not in self.origin:
- logfile = "%s.log" % self.origin
- else:
- logfile = "%s.log" % os.path.split(self.origin)[-1]
- self.logsock = open(logfile, "a")
- if self.options.git_branch:
- self.git_branch = self.options.git_branch
- else:
- self.git_branch = "refs/heads/master"
-
- if self.options.progress:
- self.prognum = int(self.options.progress)
- else:
- self.prognum = 100
-
- def handle_import_marks(self):
- if self.options.import_marks:
- sock = open(self.options.import_marks)
- for i in sock.readlines():
- line = i.strip()
- if not len(line):
- continue
- self.import_marks.append(line.split(' ')[1])
- self.export_marks.append(line)
- sock.close()
-
- def get_patches(self):
- self.progress("getting list of patches")
- if not len(self.import_marks):
- sock = os.popen("darcs changes --xml --reverse --repo %s" % self.origin)
- else:
- sock = os.popen("darcs changes --xml --reverse --repo %s --from-match 'hash %s'" % (self.origin, self.import_marks[-1]))
- buf = sock.read()
- sock.close()
- # this is hackish. we need to escape some bad chars, otherwise the xml
- # will not be valid
- buf = buf.replace('\x1b', '^[')
- if self.options.encoding:
- xmldoc = xml.dom.minidom.parseString(unicode(buf, self.options.encoding).encode('utf-8'))
- else:
- try:
- xmldoc = xml.dom.minidom.parseString(buf)
- except xml.parsers.expat.ExpatError:
- try:
- import chardet
- except ImportError:
- sys.exit("Error, encoding is not utf-8. Please " +
- "either specify it with the --encoding " +
- "option or install chardet.")
- self.progress("encoding is not utf8, guessing charset")
- encoding = chardet.detect(buf)['encoding']
- self.progress("detected encoding is %s" % encoding)
- xmldoc = xml.dom.minidom.parseString(unicode(buf, encoding).encode('utf-8'))
- sys.stdout.flush()
- return xmldoc.getElementsByTagName('patch')
-
- def setup_workdir(self):
- darcs2 = False
- self.oldfashionedpatch = True
- self.cwd = os.getcwd()
- if self.path_exists(os.path.join(self.origin, "_darcs", "format")):
- sock = self.open(os.path.join(self.origin, "_darcs", "format"))
- format = [x.strip() for x in sock]
- sock.close()
- darcs2 = 'darcs-2' in format
- self.oldfashionedpatch = not 'hashed' in format
- if not self.oldfashionedpatch:
- self.progress("parsing the inventory")
- if "://" not in self.origin:
- os.chdir(self.origin)
- self.parse_inventory()
- if not self.options.import_marks or not os.path.exists(self.working):
- # init the tmp darcs repo
- os.mkdir(self.working)
- os.chdir(self.working)
- if darcs2:
- os.system("darcs init --darcs-2")
- else:
- os.system("darcs init --old-fashioned-inventory")
- else:
- os.chdir(self.working)
- if self.options.import_marks:
- sock = os.popen("darcs pull -a --match 'hash %s' %s" % (self.import_marks[-1], self.origin))
- self.log("Building/updating working directory:\n%s" % sock.read())
- sock.close()
-
- def export_patches(self):
- patches = self.get_patches()
- # this is the number of the NEXT patch
- count = 1
- if len(self.import_marks):
- patches = patches[1:]
- count = len(self.import_marks) + 1
- if len(self.export_marks):
- # this is the mark number of the NEXT patch
- markcount = int(self.export_marks[-1].split(' ')[0][1:]) + 1
- else:
- markcount = count
- # this may be huge and we need it many times
- patchnum = len(patches)
-
- if not len(self.import_marks):
- self.progress("starting export, repo has %d patches" % patchnum)
- else:
- self.progress("continuing export, %d patches to convert" % patchnum)
- paths = []
- for i in patches:
- # apply the patch
- hash = i.attributes['hash'].value
- buf = ["\nNew patches:\n"]
- if self.oldfashionedpatch:
- sock = self.gzip_open(os.path.join(self.origin, "_darcs", "patches", hash))
- else:
- sock = self.gzip_open(os.path.join(self.origin, "_darcs", "patches", self.hashes[count-1]))
- buf.append(sock.read())
- sock.close()
- sock = os.popen("darcs changes --context")
- buf.append(sock.read())
- sock.close()
- sock = subprocess.Popen(["darcs", "apply", "--allow-conflicts"], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
- sock.stdin.write("".join(buf))
- sock.stdin.close()
- self.log("Applying %s:\n%s" % (hash, sock.stdout.read()))
- sock.stdout.close()
- message = self.get_patchname(i)
- # export the commit
- print "commit %s" % self.git_branch
- print "mark :%s" % markcount
- if self.options.export_marks:
- self.export_marks.append(":%s %s" % (markcount, hash))
- date = self.get_date(i.attributes['date'].value)
- print "committer %s %s +0000" % (self.get_author(i), date)
- print "data %d\n%s" % (len(message), message)
- if markcount > 1:
- print "from :%s" % (markcount-1)
- # export the files
- for j in paths:
- print "D %s" % j
- paths = []
- for (root, dirs, files) in os.walk ("."):
- for f in files:
- j = os.path.normpath(os.path.join(root, f))
- if j.startswith("_darcs") or "-darcs-backup" in j:
- continue
- paths.append(j)
- sock = open(j)
- buf = sock.read()
- sock.close()
- # darcs does not track the executable bit :/
- print "M 644 inline %s" % j
- print "data %s\n%s" % (len(buf), buf)
- if message[:4] == "TAG ":
- tag = re.sub('[^\xe9-\xf8\w.\-]+', '_', message[4:].strip().split('\n')[0]).strip('_')
- print "tag %s" % tag
- print "from :%s" % markcount
- print "tagger %s %s +0000" % (self.get_author(i), date)
- print "data %d\n%s" % (len(message), message)
- if count % self.prognum == 0:
- self.progress("%d/%d patches" % (count, patchnum))
- count += 1
- markcount += 1
-
- os.chdir(self.cwd)
-
- if not self.options.export_marks:
- shutil.rmtree(self.working)
- self.logsock.close()
-
- def handle_export_marks(self):
- if self.options.export_marks:
- self.progress("writing export marks")
- sock = open(self.options.export_marks, 'w')
- sock.write("\n".join(self.export_marks))
- sock.write("\n")
- sock.close()
-
- self.progress("finished")
-
- def handle(self):
- self.handle_opts()
- self.handle_import_marks()
- self.setup_workdir()
- self.export_patches()
- self.handle_export_marks()
-
-if __name__ == "__main__":
- h = Handler()
- h.handle()