diff options
author | Miklos Vajna <vmiklos@frugalware.org> | 2009-10-22 12:26:32 +0200 |
---|---|---|
committer | Miklos Vajna <vmiklos@frugalware.org> | 2009-10-22 12:26:32 +0200 |
commit | e005d02ff7dad19204a59b1ada91182072dc3ba3 (patch) | |
tree | 661d0162e59e5cbe69d0bd8741458eccb4c6b4b4 | |
parent | 1699dee3a67089fb524d5cb3b99eb3209676501f (diff) | |
download | python-fastimport-e005d02ff7dad19204a59b1ada91182072dc3ba3.tar.gz |
darcs-fast-export: refactor to a python class
The code should be a lot more readable now and we can avoid function
names like open_() as well.
All testcases still pass.
-rwxr-xr-x | exporters/darcs/darcs-fast-export | 692 |
1 files changed, 356 insertions, 336 deletions
diff --git a/exporters/darcs/darcs-fast-export b/exporters/darcs/darcs-fast-export index fa5099e..d8a04b0 100755 --- a/exporters/darcs/darcs-fast-export +++ b/exporters/darcs/darcs-fast-export @@ -4,7 +4,7 @@ darcs-fast-export - darcs backend for fast data importers - Copyright (c) 2008 Miklos Vajna <vmiklos@frugalware.org> + Copyright (c) 2008, 2009 Miklos Vajna <vmiklos@frugalware.org> Copyright (c) 2008 Matthias Andree <matthias.andree@gmx.de> This program is free software; you can redistribute it and/or modify @@ -39,340 +39,360 @@ import StringIO sys = reload(sys) sys.setdefaultencoding("utf-8") -def __get_zone(): - now = time.localtime() - if time.daylight and now[-1]: - offset = time.altzone - else: - offset = time.timezone - hours, minutes = divmod(abs(offset), 3600) - if offset > 0: - sign = "-" - else: - sign = "+" - return sign, hours, minutes - -def get_zone_str(): - sign, hours, minutes = __get_zone() - return "%s%02d%02d" % (sign, hours, minutes // 60) - -def get_zone_int(): - sign, hours, minutes = __get_zone() - ret = hours*3600+minutes*60 - if sign == "-": - ret *= -1 - return ret - -def get_patchname(patch): - ret = [] - s = "" - if patch.attributes['inverted'].value == 'True': - s = "UNDO: " - cs = patch.getElementsByTagName("name")[0].childNodes - if cs.length > 0: - ret.append(s + cs[0].data) - lines = patch.getElementsByTagName("comment") - if lines: - for i in lines[0].childNodes[0].data.split('\n'): - if not i.startswith("Ignore-this: "): - ret.append(i) - return "\n".join(ret).encode('utf-8') - -def get_author(patch): - """darcs allows any freeform string, but fast-import has a more - strict format, so fix up broken author names here.""" - - author = patch.attributes['author'].value - if author in authormap: - author = authormap[author] - if not len(author): - author = "darcs-fast-export <darcs-fast-export>" - # add missing name - elif not ">" in author: - author = "%s <%s>" % (author.split('@')[0], author) - # avoid double quoting - elif author[0] == '"' and author[-1] == '"': - author = author[1:-1] - # name after email - elif author[-1] != '>': - author = author[author.index('>')+2:] + ' ' + author[:author.index('>')+1] - return author.encode('utf-8') - -def get_date(patch): - try: - date = time.strptime(patch, "%Y%m%d%H%M%S") - except ValueError: - date = time.strptime(patch[:19] + patch[-5:], '%a %b %d %H:%M:%S %Y') - return int(time.mktime(date)) + get_zone_int() - -def progress(s): - print "progress [%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s) - sys.stdout.flush() - -def log(s): - logsock.write("[%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s)) - logsock.flush() - -hashes = [] -def parse_inventory(sock=None): - prev = None - nextprev = False - buf = [] - if not sock: - sock = open_(os.path.join(origin, "_darcs", "hashed_inventory")) - for i in sock.readlines(): - if i.startswith("hash"): - buf.insert(0, i[6:-1]) - if i.startswith("Starting with inventory:"): - nextprev = True - elif nextprev: - prev = i[:-1] - nextprev = False - sock.close() - for i in buf: - hashes.insert(0, i) - if prev: - sock = gzip_open(os.path.join(origin, "_darcs", "inventories", prev)) - parse_inventory(sock) - -# this is like gzip.open but supports urls as well -def gzip_open(path): - if os.path.exists(path): - return gzip.open(path) - buf = urllib.urlopen(path).read() - sock = StringIO.StringIO(buf) - return gzip.GzipFile(fileobj=sock) - -# this is like os.path.exists but supports urls as well -def os_path_exists(path): - if os.path.exists(path): - return True - else: - return urllib.urlopen(path).getcode() == 200 - -# this is like open, but supports urls as well -def open_(path): - if os.path.exists(path): - return open(path) - else: - return urllib.urlopen(path) - -# Option Parser -usage="%prog [options] darcsrepo" -opp = optparse.OptionParser(usage=usage) -opp.add_option("--import-marks", metavar="IFILE", - help="read state for incremental imports from IFILE") -opp.add_option("--export-marks", metavar="OFILE", - help="write state for incremental imports from OFILE") -opp.add_option("--encoding", - help="encoding of log [default: %default], if unspecified and input isn't utf-8, guess") -opp.add_option("--authors-file", metavar="F", - help="read author transformations in old=new format from F") -opp.add_option("--working", metavar="W", - help="working directory which is removed at the end of non-incremental conversions") -opp.add_option("--logfile", metavar="L", - help="log file which contains the output of external programs invoked during the conversion") -opp.add_option("--git-branch", metavar="B", - help="git branch [default: refs/heads/master]") -opp.add_option("--progress", metavar="P", - help="insert progress statements after every n commit [default: 100]") -(options, args) = opp.parse_args() -if len(args) < 1: - opp.error("darcsrepo required") - -export_marks = [] -import_marks = [] -if options.import_marks: - sock = open(options.import_marks) - for i in sock.readlines(): - line = i.strip() - if not len(line): - continue - import_marks.append(line.split(' ')[1]) - export_marks.append(line) - sock.close() - -# read author mapping file in gitauthors format, -# i. e. in=out (one per # line) -authormap = {} -if options.authors_file: - sock = open(options.authors_file) - authormap = dict([i.strip().split('=',1) for i in sock]) - sock.close() - -if "://" not in args[0]: - origin = os.path.abspath(args[0]) -else: - origin = args[0].strip('/') -if options.working: - working = os.path.abspath(options.working) -else: - if "://" not in origin: - working = "%s.darcs" % origin - else: - working = "%s.darcs" % os.path.split(origin)[-1] -patchfile = "%s.patch" % origin -if options.logfile: - logfile = os.path.abspath(options.logfile) -else: - if "://" not in origin: - logfile = "%s.log" % origin - else: - logfile = "%s.log" % os.path.split(origin)[-1] -logsock = open(logfile, "a") -if options.git_branch: - git_branch = options.git_branch -else: - git_branch = "refs/heads/master" - -if options.progress: - prognum = int(options.progress) -else: - prognum = 100 - -progress("getting list of patches") -if not len(import_marks): - sock = os.popen("darcs changes --xml --reverse --repo %s" % origin) -else: - sock = os.popen("darcs changes --xml --reverse --repo %s --from-match 'hash %s'" % (origin, import_marks[-1])) -buf = sock.read() -sock.close() -# this is hackish. we need to escape some bad chars, otherwise the xml -# will not be valid -buf = buf.replace('\x1b', '^[') -if options.encoding: - xmldoc = xml.dom.minidom.parseString(unicode(buf, options.encoding).encode('utf-8')) -else: - try: - xmldoc = xml.dom.minidom.parseString(buf) - except xml.parsers.expat.ExpatError: +class Handler: + def __init__(self): + self.hashes = [] + self.authormap = {} + self.export_marks = [] + self.import_marks = [] + + def __get_zone(self): + now = time.localtime() + if time.daylight and now[-1]: + offset = time.altzone + else: + offset = time.timezone + hours, minutes = divmod(abs(offset), 3600) + if offset > 0: + sign = "-" + else: + sign = "+" + return sign, hours, minutes + + def get_zone_str(self): + sign, hours, minutes = self.__get_zone() + return "%s%02d%02d" % (sign, hours, minutes // 60) + + def get_zone_int(self): + sign, hours, minutes = self.__get_zone() + ret = hours*3600+minutes*60 + if sign == "-": + ret *= -1 + return ret + + def get_patchname(self, patch): + ret = [] + s = "" + if patch.attributes['inverted'].value == 'True': + s = "UNDO: " + cs = patch.getElementsByTagName("name")[0].childNodes + if cs.length > 0: + ret.append(s + cs[0].data) + lines = patch.getElementsByTagName("comment") + if lines: + for i in lines[0].childNodes[0].data.split('\n'): + if not i.startswith("Ignore-this: "): + ret.append(i) + return "\n".join(ret).encode('utf-8') + + def get_author(self, patch): + """darcs allows any freeform string, but fast-import has a more + strict format, so fix up broken author names here.""" + + author = patch.attributes['author'].value + if author in self.authormap: + author = self.authormap[author] + if not len(author): + author = "darcs-fast-export <darcs-fast-export>" + # add missing name + elif not ">" in author: + author = "%s <%s>" % (author.split('@')[0], author) + # avoid double quoting + elif author[0] == '"' and author[-1] == '"': + author = author[1:-1] + # name after email + elif author[-1] != '>': + author = author[author.index('>')+2:] + ' ' + author[:author.index('>')+1] + return author.encode('utf-8') + + def get_date(self, patch): try: - import chardet - except ImportError: - sys.exit("Error, encoding is not utf-8. Please " + - "either specify it with the --encoding " + - "option or install chardet.") - progress("encoding is not utf8, guessing charset") - encoding = chardet.detect(buf)['encoding'] - progress("detected encoding is %s" % encoding) - xmldoc = xml.dom.minidom.parseString(unicode(buf, encoding).encode('utf-8')) -sys.stdout.flush() - -darcs2 = False -oldfashionedpatch = True -cwd = os.getcwd() -if os_path_exists(os.path.join(origin, "_darcs", "format")): - sock = open_(os.path.join(origin, "_darcs", "format")) - format = [x.strip() for x in sock] - sock.close() - darcs2 = 'darcs-2' in format - oldfashionedpatch = not 'hashed' in format -if not oldfashionedpatch: - progress("parsing the inventory") - if "://" not in origin: - os.chdir(origin) - parse_inventory() -if not options.import_marks or not os.path.exists(working): - # init the tmp darcs repo - os.mkdir(working) - os.chdir(working) - if darcs2: - os.system("darcs init --darcs-2") - else: - os.system("darcs init --old-fashioned-inventory") -else: - os.chdir(working) -if options.import_marks: - sock = os.popen("darcs pull -a --match 'hash %s' %s" % (import_marks[-1], origin)) - log("Building/updating working directory:\n%s" % sock.read()) - sock.close() - -# this is the number of the NEXT patch -count = 1 -patches = xmldoc.getElementsByTagName('patch') -if len(import_marks): - patches = patches[1:] - count = len(import_marks) + 1 -if len(export_marks): - # this is the mark number of the NEXT patch - markcount = int(export_marks[-1].split(' ')[0][1:]) + 1 -else: - markcount = count -# this may be huge and we need it many times -patchnum = len(patches) - -if not len(import_marks): - progress("starting export, repo has %d patches" % patchnum) -else: - progress("continuing export, %d patches to convert" % patchnum) -paths = [] -for i in patches: - # apply the patch - hash = i.attributes['hash'].value - buf = ["\nNew patches:\n"] - if oldfashionedpatch: - sock = gzip_open(os.path.join(origin, "_darcs", "patches", hash)) - else: - sock = gzip_open(os.path.join(origin, "_darcs", "patches", hashes[count-1])) - buf.append(sock.read()) - sock.close() - sock = os.popen("darcs changes --context") - buf.append(sock.read()) - sock.close() - sock = subprocess.Popen(["darcs", "apply", "--allow-conflicts"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) - sock.stdin.write("".join(buf)) - sock.stdin.close() - log("Applying %s:\n%s" % (hash, sock.stdout.read())) - sock.stdout.close() - message = get_patchname(i) - # export the commit - print "commit %s" % git_branch - print "mark :%s" % markcount - if options.export_marks: - export_marks.append(":%s %s" % (markcount, hash)) - date = get_date(i.attributes['date'].value) - print "committer %s %s %s" % (get_author(i), date, get_zone_str()) - print "data %d\n%s" % (len(message), message) - if markcount > 1: - print "from :%s" % (markcount-1) - # export the files - for j in paths: - print "D %s" % j - paths = [] - for (root, dirs, files) in os.walk ("."): - for f in files: - j = os.path.normpath(os.path.join(root, f)) - if j.startswith("_darcs") or "-darcs-backup" in j: - continue - paths.append(j) - sock = open(j) - buf = sock.read() + date = time.strptime(patch, "%Y%m%d%H%M%S") + except ValueError: + date = time.strptime(patch[:19] + patch[-5:], '%a %b %d %H:%M:%S %Y') + return int(time.mktime(date)) + self.get_zone_int() + + def progress(self, s): + print "progress [%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s) + sys.stdout.flush() + + def log(self, s): + self.logsock.write("[%s] %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), s)) + self.logsock.flush() + + def parse_inventory(self, sock=None): + prev = None + nextprev = False + buf = [] + if not sock: + sock = self.open(os.path.join(self.origin, "_darcs", "hashed_inventory")) + for i in sock.readlines(): + if i.startswith("hash"): + buf.insert(0, i[6:-1]) + if i.startswith("Starting with inventory:"): + nextprev = True + elif nextprev: + prev = i[:-1] + nextprev = False + sock.close() + for i in buf: + self.hashes.insert(0, i) + if prev: + sock = self.gzip_open(os.path.join(self.origin, "_darcs", "inventories", prev)) + self.parse_inventory(sock) + + # this is like gzip.open but supports urls as well + def gzip_open(self, path): + if os.path.exists(path): + return gzip.open(path) + buf = urllib.urlopen(path).read() + sock = StringIO.StringIO(buf) + return gzip.GzipFile(fileobj=sock) + + # this is like os.path.exists but supports urls as well + def path_exists(self, path): + if os.path.exists(path): + return True + else: + return urllib.urlopen(path).getcode() == 200 + + # this is like open, but supports urls as well + def open(self, path): + if os.path.exists(path): + return open(path) + else: + return urllib.urlopen(path) + + def handle_opts(self): + # Option Parser + usage="%prog [options] darcsrepo" + opp = optparse.OptionParser(usage=usage) + opp.add_option("--import-marks", metavar="IFILE", + help="read state for incremental imports from IFILE") + opp.add_option("--export-marks", metavar="OFILE", + help="write state for incremental imports from OFILE") + opp.add_option("--encoding", + help="encoding of log [default: %default], if unspecified and input isn't utf-8, guess") + opp.add_option("--authors-file", metavar="F", + help="read author transformations in old=new format from F") + opp.add_option("--working", metavar="W", + help="working directory which is removed at the end of non-incremental conversions") + opp.add_option("--logfile", metavar="L", + help="log file which contains the output of external programs invoked during the conversion") + opp.add_option("--git-branch", metavar="B", + help="git branch [default: refs/heads/master]") + opp.add_option("--progress", metavar="P", + help="insert progress statements after every n commit [default: 100]") + (self.options, self.args) = opp.parse_args() + if len(self.args) < 1: + opp.error("darcsrepo required") + + # read author mapping file in gitauthors format, + # i. e. in=out (one per # line) + if self.options.authors_file: + sock = open(self.options.authors_file) + self.authormap = dict([i.strip().split('=',1) for i in sock]) sock.close() - # darcs does not track the executable bit :/ - print "M 644 inline %s" % j - print "data %s\n%s" % (len(buf), buf) - if message[:4] == "TAG ": - tag = re.sub('[^\xe9-\xf8\w.\-]+', '_', message[4:].strip().split('\n')[0]).strip('_') - print "tag %s" % tag - print "from :%s" % markcount - print "tagger %s %s %s" % (get_author(i), date, get_zone_str()) - print "data %d\n%s" % (len(message), message) - if count % prognum == 0: - progress("%d/%d patches" % (count, patchnum)) - count += 1 - markcount += 1 - -os.chdir(cwd) - -if not options.export_marks: - shutil.rmtree(working) -logsock.close() - -if options.export_marks: - progress("writing export marks") - sock = open(options.export_marks, 'w') - sock.write("\n".join(export_marks)) - sock.write("\n") - sock.close() - -progress("finished") + + if "://" not in self.args[0]: + self.origin = os.path.abspath(self.args[0]) + else: + self.origin = self.args[0].strip('/') + if self.options.working: + self.working = os.path.abspath(self.options.working) + else: + if "://" not in self.origin: + self.working = "%s.darcs" % self.origin + else: + self.working = "%s.darcs" % os.path.split(self.origin)[-1] + if self.options.logfile: + logfile = os.path.abspath(self.options.logfile) + else: + if "://" not in self.origin: + logfile = "%s.log" % self.origin + else: + logfile = "%s.log" % os.path.split(self.origin)[-1] + self.logsock = open(logfile, "a") + if self.options.git_branch: + self.git_branch = self.options.git_branch + else: + self.git_branch = "refs/heads/master" + + if self.options.progress: + self.prognum = int(self.options.progress) + else: + self.prognum = 100 + + def handle_import_marks(self): + if self.options.import_marks: + sock = open(self.options.import_marks) + for i in sock.readlines(): + line = i.strip() + if not len(line): + continue + self.import_marks.append(line.split(' ')[1]) + self.export_marks.append(line) + sock.close() + + def get_patches(self): + self.progress("getting list of patches") + if not len(self.import_marks): + sock = os.popen("darcs changes --xml --reverse --repo %s" % self.origin) + else: + sock = os.popen("darcs changes --xml --reverse --repo %s --from-match 'hash %s'" % (self.origin, self.import_marks[-1])) + buf = sock.read() + sock.close() + # this is hackish. we need to escape some bad chars, otherwise the xml + # will not be valid + buf = buf.replace('\x1b', '^[') + if self.options.encoding: + xmldoc = xml.dom.minidom.parseString(unicode(buf, self.options.encoding).encode('utf-8')) + else: + try: + xmldoc = xml.dom.minidom.parseString(buf) + except xml.parsers.expat.ExpatError: + try: + import chardet + except ImportError: + sys.exit("Error, encoding is not utf-8. Please " + + "either specify it with the --encoding " + + "option or install chardet.") + self.progress("encoding is not utf8, guessing charset") + encoding = chardet.detect(buf)['encoding'] + self.progress("detected encoding is %s" % encoding) + xmldoc = xml.dom.minidom.parseString(unicode(buf, encoding).encode('utf-8')) + sys.stdout.flush() + return xmldoc.getElementsByTagName('patch') + + def setup_workdir(self): + darcs2 = False + self.oldfashionedpatch = True + self.cwd = os.getcwd() + if self.path_exists(os.path.join(self.origin, "_darcs", "format")): + sock = self.open(os.path.join(self.origin, "_darcs", "format")) + format = [x.strip() for x in sock] + sock.close() + darcs2 = 'darcs-2' in format + self.oldfashionedpatch = not 'hashed' in format + if not self.oldfashionedpatch: + self.progress("parsing the inventory") + if "://" not in self.origin: + os.chdir(self.origin) + self.parse_inventory() + if not self.options.import_marks or not os.path.exists(self.working): + # init the tmp darcs repo + os.mkdir(self.working) + os.chdir(self.working) + if darcs2: + os.system("darcs init --darcs-2") + else: + os.system("darcs init --old-fashioned-inventory") + else: + os.chdir(self.working) + if self.options.import_marks: + sock = os.popen("darcs pull -a --match 'hash %s' %s" % (self.import_marks[-1], self.origin)) + self.log("Building/updating working directory:\n%s" % sock.read()) + sock.close() + + def export_patches(self): + patches = self.get_patches() + # this is the number of the NEXT patch + count = 1 + if len(self.import_marks): + patches = patches[1:] + count = len(self.import_marks) + 1 + if len(self.export_marks): + # this is the mark number of the NEXT patch + markcount = int(self.export_marks[-1].split(' ')[0][1:]) + 1 + else: + markcount = count + # this may be huge and we need it many times + patchnum = len(patches) + + if not len(self.import_marks): + self.progress("starting export, repo has %d patches" % patchnum) + else: + self.progress("continuing export, %d patches to convert" % patchnum) + paths = [] + for i in patches: + # apply the patch + hash = i.attributes['hash'].value + buf = ["\nNew patches:\n"] + if self.oldfashionedpatch: + sock = self.gzip_open(os.path.join(self.origin, "_darcs", "patches", hash)) + else: + sock = self.gzip_open(os.path.join(self.origin, "_darcs", "patches", self.hashes[count-1])) + buf.append(sock.read()) + sock.close() + sock = os.popen("darcs changes --context") + buf.append(sock.read()) + sock.close() + sock = subprocess.Popen(["darcs", "apply", "--allow-conflicts"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) + sock.stdin.write("".join(buf)) + sock.stdin.close() + self.log("Applying %s:\n%s" % (hash, sock.stdout.read())) + sock.stdout.close() + message = self.get_patchname(i) + # export the commit + print "commit %s" % self.git_branch + print "mark :%s" % markcount + if self.options.export_marks: + self.export_marks.append(":%s %s" % (markcount, hash)) + date = self.get_date(i.attributes['date'].value) + print "committer %s %s %s" % (self.get_author(i), date, self.get_zone_str()) + print "data %d\n%s" % (len(message), message) + if markcount > 1: + print "from :%s" % (markcount-1) + # export the files + for j in paths: + print "D %s" % j + paths = [] + for (root, dirs, files) in os.walk ("."): + for f in files: + j = os.path.normpath(os.path.join(root, f)) + if j.startswith("_darcs") or "-darcs-backup" in j: + continue + paths.append(j) + sock = open(j) + buf = sock.read() + sock.close() + # darcs does not track the executable bit :/ + print "M 644 inline %s" % j + print "data %s\n%s" % (len(buf), buf) + if message[:4] == "TAG ": + tag = re.sub('[^\xe9-\xf8\w.\-]+', '_', message[4:].strip().split('\n')[0]).strip('_') + print "tag %s" % tag + print "from :%s" % markcount + print "tagger %s %s %s" % (self.get_author(i), date, self.get_zone_str()) + print "data %d\n%s" % (len(message), message) + if count % self.prognum == 0: + self.progress("%d/%d patches" % (count, patchnum)) + count += 1 + markcount += 1 + + os.chdir(self.cwd) + + if not self.options.export_marks: + shutil.rmtree(self.working) + self.logsock.close() + + def handle_export_marks(self): + if self.options.export_marks: + self.progress("writing export marks") + sock = open(self.options.export_marks, 'w') + sock.write("\n".join(self.export_marks)) + sock.write("\n") + sock.close() + + self.progress("finished") + + def handle(self): + self.handle_opts() + self.handle_import_marks() + self.setup_workdir() + self.export_patches() + self.handle_export_marks() + +if __name__ == "__main__": + h = Handler() + h.handle() |