summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Clatworthy <ian.clatworthy@canonical.com>2008-03-17 13:32:25 -0500
committerIan Clatworthy <ian.clatworthy@canonical.com>2008-03-17 13:32:25 -0500
commitfc8c4798831a491e88895f81b6240a248df95eff (patch)
tree53c3e9d2ba71dd1f53cc20582083780a299f56c1
parent8dac77b8f22b0f685aceb28ba431a8d455071226 (diff)
downloadbzr-fastimport-fc8c4798831a491e88895f81b6240a248df95eff.tar.gz
improve hg-fast-export integration
-rw-r--r--exporters/hg-fast-export.README48
-rwxr-xr-xexporters/hg-fast-export.py6
-rw-r--r--exporters/hg-fast-export.txt71
-rwxr-xr-xexporters/hg2git.py96
4 files changed, 147 insertions, 74 deletions
diff --git a/exporters/hg-fast-export.README b/exporters/hg-fast-export.README
new file mode 100644
index 0000000..85ec8ab
--- /dev/null
+++ b/exporters/hg-fast-export.README
@@ -0,0 +1,48 @@
+hg-fast-import.py - mercurial to bzr converter using bzr fast-import
+
+Legal
+=====
+
+Most hg-* scripts are licensed under the MIT license[0] and were written
+by Rocco Rutte <pdmef@gmx.net> with hints and help from the git list and
+#mercurial on freenode. hg-fast-export[1] was integrated into
+bzr-fastimport by Ian Clatworthy with permission from Rocco.
+
+Usage
+=====
+
+Using hg-fast-export is quite simple:
+
+ bzr init-repo foo.bzr
+ cd foo.bzr
+ hg-fast-import.py -r <repo> | bzr fast-import -
+
+Notes/Limitations
+=================
+
+hg-fast-import supports multiple branches but only named branches with exactly
+one head each. Otherwise commits to the tip of these heads within branch
+will get flattened into merge commits.
+
+Design
+======
+
+hg-fast-import.py was designed in a way that doesn't require a 2-pass mechanism
+or any prior repository analysis: if just outputs what it finds.
+This also implies that it heavily relies on strictly
+linear ordering of changesets from hg, i.e. its append-only storage
+model so that changesets hg-fast-import already saw never get modified.
+
+Todo
+====
+
+Test incremental imports, particularly handling of branches and tags.
+
+For one-time conversions, everything should be fine.
+
+Footnotes
+=========
+
+[0] http://www.opensource.org/licenses/mit-license.php
+
+[1] http://repo.or.cz/w/fast-export.git
diff --git a/exporters/hg-fast-export.py b/exporters/hg-fast-export.py
index e3af8a6..2890e9c 100755
--- a/exporters/hg-fast-export.py
+++ b/exporters/hg-fast-export.py
@@ -383,9 +383,9 @@ if __name__=='__main__':
m=-1
if options.max!=None: m=options.max
- if options.marksfile==None: bail(parser,'--marks')
- if options.headsfile==None: bail(parser,'--heads')
- if options.statusfile==None: bail(parser,'--status')
+ if options.marksfile==None: options.marksfile = 'hg-export.marks'
+ if options.headsfile==None: options.headsfile = 'hg-export.heads'
+ if options.statusfile==None: options.statusfile = 'hg-export.status'
if options.repourl==None: bail(parser,'--repo')
a={}
diff --git a/exporters/hg-fast-export.txt b/exporters/hg-fast-export.txt
deleted file mode 100644
index 1b8bb1c..0000000
--- a/exporters/hg-fast-export.txt
+++ /dev/null
@@ -1,71 +0,0 @@
-hg-fast-import.(sh|py) - mercurial to git converter using git-fast-import
-
-Legal
-=====
-
-Most hg-* scripts are licensed under the MIT license[0] and were written
-by Rocco Rutte <pdmef@gmx.net> with hints and help from the git list and
-#mercurial on freenode. hg-reset.py is licensed under GPLv2 since it
-copies some code from the mercurial sources.
-
-Usage
-=====
-
-Using hg-fast-export is quite simple for a mercurial repository <repo>:
-
- mkdir repo-git # or whatever
- cd repo-git
- git init
- hg-fast-import.sh -r <repo>
-
-Incremental imports to track hg repos is supported, too.
-
-Using hg-reset it is quite simple within a git repository that is
-hg-fast-export'ed from mercurial:
-
- hg-reset.sh -R <revision>
-
-will give hints on which branches need adjustment for starting over
-again.
-
-Notes/Limitations
-=================
-
-hg-fast-import supports multiple branches but only named branches with exaclty
-one head each. Otherwise commits to the tip of these heads within branch
-will get flattened into merge commits.
-
-As each git-fast-import run creates a new pack file, it may be required
-to repack the repository quite often for incremental imports (especially
-when importing a small number of changesets per incremental import).
-
-Design
-======
-
-hg-fast-import.py was designed in a way that doesn't require a 2-pass mechanism
-or any prior repository analysis: if just feeds what it finds into
-git-fast-import. This also implies that it heavily relies on strictly
-linear ordering of changesets from hg, i.e. its append-only storage
-model so that changesets hg-fast-import already saw never get modified.
-
-Todo
-====
-
-For incremental imports, handling tags needs to be reworked (maybe):
-Right now we assume that once a tag is created, it stays forever and
-never changes. However,
-
- 1) tags in hg may be removed
- 2) tags may change
-
-I'm not yet sure how to handle this and how this interferes with
-non-hg-based tags in git.
-
-The same for branches: They may get removed.
-
-For one-time conversions, everything is fine.
-
-Footnotes
-=========
-
-[0] http://www.opensource.org/licenses/mit-license.php
diff --git a/exporters/hg2git.py b/exporters/hg2git.py
new file mode 100755
index 0000000..3015e0b
--- /dev/null
+++ b/exporters/hg2git.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2007 Rocco Rutte <pdmef@gmx.net>
+# License: MIT <http://www.opensource.org/licenses/mit-license.php>
+
+from mercurial import repo,hg,cmdutil,util,ui,revlog,node
+import re
+import os
+import sys
+
+# git branch for hg's default 'HEAD' branch
+cfg_master='master'
+# silly regex to see if user field has email address
+user_re=re.compile('([^<]+) (<[^>]+>)$')
+# silly regex to clean out user names
+user_clean_re=re.compile('^["]([^"]+)["]$')
+
+def setup_repo(url):
+ myui=ui.ui(interactive=False)
+ return myui,hg.repository(myui,url)
+
+def fixup_user(user,authors):
+ if authors!=None:
+ # if we have an authors table, try to get mapping
+ # by defaulting to the current value of 'user'
+ user=authors.get(user,user)
+ name,mail,m='','',user_re.match(user)
+ if m==None:
+ # if we don't have 'Name <mail>' syntax, use 'user
+ # <devnull@localhost>' if use contains no at and
+ # 'user <user>' otherwise
+ name=user
+ if '@' not in user:
+ mail='<devnull@localhost>'
+ else:
+ mail='<%s>' % user
+ else:
+ # if we have 'Name <mail>' syntax, everything is fine :)
+ name,mail=m.group(1),m.group(2)
+
+ # remove any silly quoting from username
+ m2=user_clean_re.match(name)
+ if m2!=None:
+ name=m2.group(1)
+ return '%s %s' % (name,mail)
+
+def get_branch(name):
+ # HEAD may be from CVS imports into hg
+ if name=='HEAD' or name=='default' or name=='':
+ name=cfg_master
+ return name
+
+def get_changeset(ui,repo,revision,authors={}):
+ node=repo.lookup(revision)
+ (manifest,user,(time,timezone),files,desc,extra)=repo.changelog.read(node)
+ tz="%+03d%02d" % (-timezone / 3600, ((-timezone % 3600) / 60))
+ branch=get_branch(extra.get('branch','master'))
+ return (node,manifest,fixup_user(user,authors),(time,tz),files,desc,branch,extra)
+
+def mangle_key(key):
+ return key
+
+def load_cache(filename,get_key=mangle_key):
+ cache={}
+ if not os.path.exists(filename):
+ return cache
+ f=open(filename,'r')
+ l=0
+ for line in f.readlines():
+ l+=1
+ fields=line.split(' ')
+ if fields==None or not len(fields)==2 or fields[0][0]!=':':
+ sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
+ continue
+ # put key:value in cache, key without ^:
+ cache[get_key(fields[0][1:])]=fields[1].split('\n')[0]
+ f.close()
+ return cache
+
+def save_cache(filename,cache):
+ f=open(filename,'w+')
+ map(lambda x: f.write(':%s %s\n' % (str(x),str(cache.get(x)))),cache.keys())
+ f.close()
+
+def get_git_sha1(name,type='heads'):
+ try:
+ # use git-rev-parse to support packed refs
+ cmd="GIT_DIR='%s' git-rev-parse --verify refs/%s/%s 2>/dev/null" % (os.getenv('GIT_DIR','/dev/null'),type,name)
+ p=os.popen(cmd)
+ l=p.readline()
+ p.close()
+ if l == None or len(l) == 0:
+ return None
+ return l[0:40]
+ except IOError:
+ return None