diff options
author | Ian Clatworthy <ian.clatworthy@canonical.com> | 2008-03-17 13:32:25 -0500 |
---|---|---|
committer | Ian Clatworthy <ian.clatworthy@canonical.com> | 2008-03-17 13:32:25 -0500 |
commit | fc8c4798831a491e88895f81b6240a248df95eff (patch) | |
tree | 53c3e9d2ba71dd1f53cc20582083780a299f56c1 | |
parent | 8dac77b8f22b0f685aceb28ba431a8d455071226 (diff) | |
download | bzr-fastimport-fc8c4798831a491e88895f81b6240a248df95eff.tar.gz |
improve hg-fast-export integration
-rw-r--r-- | exporters/hg-fast-export.README | 48 | ||||
-rwxr-xr-x | exporters/hg-fast-export.py | 6 | ||||
-rw-r--r-- | exporters/hg-fast-export.txt | 71 | ||||
-rwxr-xr-x | exporters/hg2git.py | 96 |
4 files changed, 147 insertions, 74 deletions
diff --git a/exporters/hg-fast-export.README b/exporters/hg-fast-export.README new file mode 100644 index 0000000..85ec8ab --- /dev/null +++ b/exporters/hg-fast-export.README @@ -0,0 +1,48 @@ +hg-fast-import.py - mercurial to bzr converter using bzr fast-import + +Legal +===== + +Most hg-* scripts are licensed under the MIT license[0] and were written +by Rocco Rutte <pdmef@gmx.net> with hints and help from the git list and +#mercurial on freenode. hg-fast-export[1] was integrated into +bzr-fastimport by Ian Clatworthy with permission from Rocco. + +Usage +===== + +Using hg-fast-export is quite simple: + + bzr init-repo foo.bzr + cd foo.bzr + hg-fast-import.py -r <repo> | bzr fast-import - + +Notes/Limitations +================= + +hg-fast-import supports multiple branches but only named branches with exactly +one head each. Otherwise commits to the tip of these heads within branch +will get flattened into merge commits. + +Design +====== + +hg-fast-import.py was designed in a way that doesn't require a 2-pass mechanism +or any prior repository analysis: if just outputs what it finds. +This also implies that it heavily relies on strictly +linear ordering of changesets from hg, i.e. its append-only storage +model so that changesets hg-fast-import already saw never get modified. + +Todo +==== + +Test incremental imports, particularly handling of branches and tags. + +For one-time conversions, everything should be fine. + +Footnotes +========= + +[0] http://www.opensource.org/licenses/mit-license.php + +[1] http://repo.or.cz/w/fast-export.git diff --git a/exporters/hg-fast-export.py b/exporters/hg-fast-export.py index e3af8a6..2890e9c 100755 --- a/exporters/hg-fast-export.py +++ b/exporters/hg-fast-export.py @@ -383,9 +383,9 @@ if __name__=='__main__': m=-1 if options.max!=None: m=options.max - if options.marksfile==None: bail(parser,'--marks') - if options.headsfile==None: bail(parser,'--heads') - if options.statusfile==None: bail(parser,'--status') + if options.marksfile==None: options.marksfile = 'hg-export.marks' + if options.headsfile==None: options.headsfile = 'hg-export.heads' + if options.statusfile==None: options.statusfile = 'hg-export.status' if options.repourl==None: bail(parser,'--repo') a={} diff --git a/exporters/hg-fast-export.txt b/exporters/hg-fast-export.txt deleted file mode 100644 index 1b8bb1c..0000000 --- a/exporters/hg-fast-export.txt +++ /dev/null @@ -1,71 +0,0 @@ -hg-fast-import.(sh|py) - mercurial to git converter using git-fast-import - -Legal -===== - -Most hg-* scripts are licensed under the MIT license[0] and were written -by Rocco Rutte <pdmef@gmx.net> with hints and help from the git list and -#mercurial on freenode. hg-reset.py is licensed under GPLv2 since it -copies some code from the mercurial sources. - -Usage -===== - -Using hg-fast-export is quite simple for a mercurial repository <repo>: - - mkdir repo-git # or whatever - cd repo-git - git init - hg-fast-import.sh -r <repo> - -Incremental imports to track hg repos is supported, too. - -Using hg-reset it is quite simple within a git repository that is -hg-fast-export'ed from mercurial: - - hg-reset.sh -R <revision> - -will give hints on which branches need adjustment for starting over -again. - -Notes/Limitations -================= - -hg-fast-import supports multiple branches but only named branches with exaclty -one head each. Otherwise commits to the tip of these heads within branch -will get flattened into merge commits. - -As each git-fast-import run creates a new pack file, it may be required -to repack the repository quite often for incremental imports (especially -when importing a small number of changesets per incremental import). - -Design -====== - -hg-fast-import.py was designed in a way that doesn't require a 2-pass mechanism -or any prior repository analysis: if just feeds what it finds into -git-fast-import. This also implies that it heavily relies on strictly -linear ordering of changesets from hg, i.e. its append-only storage -model so that changesets hg-fast-import already saw never get modified. - -Todo -==== - -For incremental imports, handling tags needs to be reworked (maybe): -Right now we assume that once a tag is created, it stays forever and -never changes. However, - - 1) tags in hg may be removed - 2) tags may change - -I'm not yet sure how to handle this and how this interferes with -non-hg-based tags in git. - -The same for branches: They may get removed. - -For one-time conversions, everything is fine. - -Footnotes -========= - -[0] http://www.opensource.org/licenses/mit-license.php diff --git a/exporters/hg2git.py b/exporters/hg2git.py new file mode 100755 index 0000000..3015e0b --- /dev/null +++ b/exporters/hg2git.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python + +# Copyright (c) 2007 Rocco Rutte <pdmef@gmx.net> +# License: MIT <http://www.opensource.org/licenses/mit-license.php> + +from mercurial import repo,hg,cmdutil,util,ui,revlog,node +import re +import os +import sys + +# git branch for hg's default 'HEAD' branch +cfg_master='master' +# silly regex to see if user field has email address +user_re=re.compile('([^<]+) (<[^>]+>)$') +# silly regex to clean out user names +user_clean_re=re.compile('^["]([^"]+)["]$') + +def setup_repo(url): + myui=ui.ui(interactive=False) + return myui,hg.repository(myui,url) + +def fixup_user(user,authors): + if authors!=None: + # if we have an authors table, try to get mapping + # by defaulting to the current value of 'user' + user=authors.get(user,user) + name,mail,m='','',user_re.match(user) + if m==None: + # if we don't have 'Name <mail>' syntax, use 'user + # <devnull@localhost>' if use contains no at and + # 'user <user>' otherwise + name=user + if '@' not in user: + mail='<devnull@localhost>' + else: + mail='<%s>' % user + else: + # if we have 'Name <mail>' syntax, everything is fine :) + name,mail=m.group(1),m.group(2) + + # remove any silly quoting from username + m2=user_clean_re.match(name) + if m2!=None: + name=m2.group(1) + return '%s %s' % (name,mail) + +def get_branch(name): + # HEAD may be from CVS imports into hg + if name=='HEAD' or name=='default' or name=='': + name=cfg_master + return name + +def get_changeset(ui,repo,revision,authors={}): + node=repo.lookup(revision) + (manifest,user,(time,timezone),files,desc,extra)=repo.changelog.read(node) + tz="%+03d%02d" % (-timezone / 3600, ((-timezone % 3600) / 60)) + branch=get_branch(extra.get('branch','master')) + return (node,manifest,fixup_user(user,authors),(time,tz),files,desc,branch,extra) + +def mangle_key(key): + return key + +def load_cache(filename,get_key=mangle_key): + cache={} + if not os.path.exists(filename): + return cache + f=open(filename,'r') + l=0 + for line in f.readlines(): + l+=1 + fields=line.split(' ') + if fields==None or not len(fields)==2 or fields[0][0]!=':': + sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l)) + continue + # put key:value in cache, key without ^: + cache[get_key(fields[0][1:])]=fields[1].split('\n')[0] + f.close() + return cache + +def save_cache(filename,cache): + f=open(filename,'w+') + map(lambda x: f.write(':%s %s\n' % (str(x),str(cache.get(x)))),cache.keys()) + f.close() + +def get_git_sha1(name,type='heads'): + try: + # use git-rev-parse to support packed refs + cmd="GIT_DIR='%s' git-rev-parse --verify refs/%s/%s 2>/dev/null" % (os.getenv('GIT_DIR','/dev/null'),type,name) + p=os.popen(cmd) + l=p.readline() + p.close() + if l == None or len(l) == 0: + return None + return l[0:40] + except IOError: + return None |