diff options
Diffstat (limited to 'exporters/hg-fast-export.py')
-rwxr-xr-x | exporters/hg-fast-export.py | 442 |
1 files changed, 0 insertions, 442 deletions
diff --git a/exporters/hg-fast-export.py b/exporters/hg-fast-export.py deleted file mode 100755 index 45c9ab4..0000000 --- a/exporters/hg-fast-export.py +++ /dev/null @@ -1,442 +0,0 @@ -#!/usr/bin/env python - -# Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others. -# License: MIT <http://www.opensource.org/licenses/mit-license.php> - -from mercurial import repo,hg,cmdutil,util,ui,revlog,node -from hg2git import setup_repo,fixup_user,get_branch,get_changeset -from hg2git import load_cache,save_cache,get_git_sha1,set_default_branch,set_origin_name -from tempfile import mkstemp -from optparse import OptionParser -import re -import sys -import os - -# silly regex to catch Signed-off-by lines in log message -sob_re=re.compile('^Signed-[Oo]ff-[Bb]y: (.+)$') -# insert 'checkpoint' command after this many commits or none at all if 0 -cfg_checkpoint_count=0 -# write some progress message every this many file contents written -cfg_export_boundary=1000 - -def gitmode(flags): - return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644' - -def wr(msg=''): - if msg == None: - msg = '' - print msg - #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n')) - -def checkpoint(count): - count=count+1 - if cfg_checkpoint_count>0 and count%cfg_checkpoint_count==0: - sys.stderr.write("Checkpoint after %d commits\n" % count) - wr('checkpoint') - wr() - return count - -def get_parent_mark(parent,marks): - """Get the mark for some parent. - If we saw it in the current session, return :%d syntax and - otherwise the SHA1 from the cache.""" - return marks.get(str(parent),':%d' % (parent+1)) - -def file_mismatch(f1,f2): - """See if two revisions of a file are not equal.""" - return node.hex(f1)!=node.hex(f2) - -def split_dict(dleft,dright,l=[],c=[],r=[],match=file_mismatch): - """Loop over our repository and find all changed and missing files.""" - for left in dleft.keys(): - right=dright.get(left,None) - if right==None: - # we have the file but our parent hasn't: add to left set - l.append(left) - elif match(dleft[left],right): - # we have it but checksums mismatch: add to center set - c.append(left) - for right in dright.keys(): - left=dleft.get(right,None) - if left==None: - # if parent has file but we don't: add to right set - r.append(right) - # change is already handled when comparing child against parent - return l,c,r - -def get_filechanges(repo,revision,parents,mleft): - """Given some repository and revision, find all changed/deleted files.""" - l,c,r=[],[],[] - for p in parents: - if p<0: continue - mright=repo.changectx(p).manifest() - l,c,r=split_dict(mleft,mright,l,c,r) - l.sort() - c.sort() - r.sort() - return l,c,r - -def get_author(logmessage,committer,authors): - """As git distincts between author and committer of a patch, try to - extract author by detecting Signed-off-by lines. - - This walks from the end of the log message towards the top skipping - empty lines. Upon the first non-empty line, it walks all Signed-off-by - lines upwards to find the first one. For that (if found), it extracts - authorship information the usual way (authors table, cleaning, etc.) - - If no Signed-off-by line is found, this defaults to the committer. - - This may sound stupid (and it somehow is), but in log messages we - accidentially may have lines in the middle starting with - "Signed-off-by: foo" and thus matching our detection regex. Prevent - that.""" - - loglines=logmessage.split('\n') - i=len(loglines) - # from tail walk to top skipping empty lines - while i>=0: - i-=1 - if len(loglines[i].strip())==0: continue - break - if i>=0: - # walk further upwards to find first sob line, store in 'first' - first=None - while i>=0: - m=sob_re.match(loglines[i]) - if m==None: break - first=m - i-=1 - # if the last non-empty line matches our Signed-Off-by regex: extract username - if first!=None: - r=fixup_user(first.group(1),authors) - return r - return committer - -def export_file_contents(ctx,manifest,files): - count=0 - max=len(files) - for file in files: - # Skip .hgtags files. They only get us in trouble. - if file == ".hgtags": - sys.stderr.write('Skip %s\n' % (file)) - continue - d=ctx.filectx(file).data() - wr('M %s inline %s' % (gitmode(manifest.flags(file)),file)) - wr('data %d' % len(d)) # had some trouble with size() - wr(d) - count+=1 - if count%cfg_export_boundary==0: - sys.stderr.write('Exported %d/%d files\n' % (count,max)) - if max>cfg_export_boundary: - sys.stderr.write('Exported %d/%d files\n' % (count,max)) - -def is_merge(parents): - c=0 - for parent in parents: - if parent>=0: - c+=1 - return c>1 - -def sanitize_name(name,what="branch"): - """Sanitize input roughly according to git-check-ref-format(1)""" - - def dot(name): - if name[0] == '.': return '_'+name[1:] - return name - - n=name - p=re.compile('([[ ~^:?*]|\.\.)') - n=p.sub('_', n) - if n[-1] == '/': n=n[:-1]+'_' - n='/'.join(map(dot,n.split('/'))) - p=re.compile('_+') - n=p.sub('_', n) - - if n!=name: - sys.stderr.write('Warning: sanitized %s [%s] to [%s]\n' % (what,name,n)) - return n - -def export_commit(ui,repo,revision,marks,mapping,heads,last,max,count,authors,sob,brmap): - def get_branchname(name): - if brmap.has_key(name): - return brmap[name] - n=sanitize_name(name) - brmap[name]=n - return n - - (revnode,_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision,authors) - parents=repo.changelog.parentrevs(revision) - - branch=get_branchname(branch) - - wr('commit refs/heads/%s' % branch) - wr('mark :%d' % (revision+1)) - if sob: - wr('author %s %d %s' % (get_author(desc,user,authors),time,timezone)) - wr('committer %s %d %s' % (user,time,timezone)) - wr('data %d' % (len(desc)+1)) # wtf? - wr(desc) - wr() - - pidx1, pidx2 = 0, 1 - if parents[1] > 0: - if parents[0] <= 0 or \ - repo.changelog.node(parents[0]) < repo.changelog.node(parents[1]): - pidx1, pidx2 = 1, 0 - - full_rev=False - if revision==0: full_rev=True - - src=heads.get(branch,'') - link='' - if src!='': - # if we have a cached head, this is an incremental import: initialize it - # and kill reference so we won't init it again - wr('from %s' % src) - heads[branch]='' - sys.stderr.write('%s: Initializing to parent [%s]\n' % - (branch,src)) - link=src # avoid making a merge commit for incremental import - elif link=='' and not heads.has_key(branch) and revision>0: - if parents[0]>=0: - # newly created branch with parent: connect to parent - tmp=get_parent_mark(parents[0],marks) - wr('from %s' % tmp) - sys.stderr.write('%s: Link new branch to parent [%s]\n' % - (branch,tmp)) - link=tmp # avoid making a merge commit for branch fork - else: - # newly created branch without parent: feed full revision - full_rev=True - elif last.get(branch,revision) != parents[pidx1] and parents[pidx1] > 0 and revision > 0: - pm=get_parent_mark(parents[pidx1],marks) - sys.stderr.write('%s: Placing commit [r%d] in branch [%s] on top of [r%d]\n' % - (branch,revision,branch,parents[pidx1])); - wr('from %s' % pm) - - if parents[pidx2] > 0: - pm=get_parent_mark(parents[pidx2],marks) - sys.stderr.write('%s: Merging with parent [%s] from [r%d]\n' % - (branch,pm,parents[pidx2])) - wr('merge %s' % pm) - - last[branch]=revision - heads[branch]='' - # we need this later to write out tags - marks[str(revision)]=':%d'%(revision+1) - - ctx=repo.changectx(str(revision)) - man=ctx.manifest() - added,changed,removed,type=[],[],[],'' - - if full_rev: - # first revision: feed in full manifest - added=man.keys() - added.sort() - type='full' - elif is_merge(parents): - # later merge revision: feed in changed manifest - # for many files comparing checksums is expensive so only do it for - # merges where we really need it due to hg's revlog logic - added,changed,removed=get_filechanges(repo,revision,parents,man) - type='thorough delta' - else: - # later non-merge revision: feed in changed manifest - # if we have exactly one parent, just take the changes from the - # manifest without expensively comparing checksums - f=repo.status(repo.lookup(parents[0]),revnode)[:3] - added,changed,removed=f[1],f[0],f[2] - type='simple delta' - - sys.stderr.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' % - (branch,type,revision+1,max,len(added),len(changed),len(removed))) - - map(lambda r: wr('D %s' % r),removed) - export_file_contents(ctx,man,added) - export_file_contents(ctx,man,changed) - wr() - - return checkpoint(count) - -def export_tags(ui,repo,marks_cache,mapping_cache,count,authors): - l=repo.tagslist() - for tag,node in l: - tag=sanitize_name(tag,"tag") - # ignore latest revision - if tag=='tip': continue - # ignore tags to nodes that are missing (ie, 'in the future') - if node.encode('hex_codec') not in mapping_cache: - sys.stderr.write('Tag %s refers to unseen node %s\n' % (tag, node.encode('hex_codec'))) - continue - - rev=int(mapping_cache[node.encode('hex_codec')]) - - ref=marks_cache.get(str(rev),':%d' % (rev)) - if ref==None: - sys.stderr.write('Failed to find reference for creating tag' - ' %s at r%d\n' % (tag,rev)) - continue - sys.stderr.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag,rev,ref)) - wr('reset refs/tags/%s' % tag) - wr('from %s' % ref) - wr() - count=checkpoint(count) - return count - -def load_authors(filename): - cache={} - if not os.path.exists(filename): - return cache - f=open(filename,'r') - l=0 - lre=re.compile('^([^=]+)[ ]*=[ ]*(.+)$') - for line in f.readlines(): - l+=1 - m=lre.match(line) - if m==None: - sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l)) - continue - # put key:value in cache, key without ^: - cache[m.group(1).strip()]=m.group(2).strip() - f.close() - sys.stderr.write('Loaded %d authors\n' % l) - return cache - -def verify_heads(ui,repo,cache,force): - branches=repo.branchtags() - l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()] - l.sort() - - # get list of hg's branches to verify, don't take all git has - for _,_,b in l: - b=get_branch(b) - sha1=get_git_sha1(b) - c=cache.get(b) - if sha1!=None and c!=None: - sys.stderr.write('Verifying branch [%s]\n' % b) - if sha1!=c: - sys.stderr.write('Error: Branch [%s] modified outside hg-fast-export:' - '\n%s (repo) != %s (cache)\n' % (b,sha1,c)) - if not force: return False - - # verify that branch has exactly one head - t={} - for h in repo.heads(): - (_,_,_,_,_,_,branch,_)=get_changeset(ui,repo,h) - if t.get(branch,False): - sys.stderr.write('Error: repository has at least one unnamed head: hg r%s\n' % - repo.changelog.rev(h)) - if not force: return False - t[branch]=True - - return True - -def mangle_mark(mark): - return str(int(mark)-1) - -def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,authors={},sob=False,force=False): - _max=int(m) - - try: - import msvcrt - msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) - except ImportError: - pass - - marks_cache=load_cache(marksfile,mangle_mark) - mapping_cache=load_cache(mappingfile) - heads_cache=load_cache(headsfile) - state_cache=load_cache(tipfile) - - ui,repo=setup_repo(repourl) - - if not verify_heads(ui,repo,heads_cache,force): - return 1 - - try: - tip=repo.changelog.count() - except AttributeError: - tip=len(repo) - - min=int(state_cache.get('tip',0)) - max=_max - if _max<0 or max>tip: - max=tip - - for rev in range(0,max): - (revnode,_,_,_,_,_,_,_)=get_changeset(ui,repo,rev,authors) - mapping_cache[revnode.encode('hex_codec')] = str(rev) - - - c=0 - last={} - brmap={} - for rev in range(min,max): - c=export_commit(ui,repo,rev,marks_cache,mapping_cache,heads_cache,last,max,c,authors,sob,brmap) - - state_cache['tip']=max - state_cache['repo']=repourl - save_cache(tipfile,state_cache) - save_cache(mappingfile,mapping_cache) - - c=export_tags(ui,repo,marks_cache,mapping_cache,c,authors) - - sys.stderr.write('Issued %d commands\n' % c) - - return 0 - -if __name__=='__main__': - def bail(parser,opt): - sys.stderr.write('Error: No %s option given\n' % opt) - parser.print_help() - sys.exit(2) - - parser=OptionParser() - - parser.add_option("-m","--max",type="int",dest="max", - help="Maximum hg revision to import") - parser.add_option("--mapping",dest="mappingfile", - help="File to read last run's hg-to-git SHA1 mapping") - parser.add_option("--marks",dest="marksfile", - help="File to read git-fast-import's marks from") - parser.add_option("--heads",dest="headsfile", - help="File to read last run's git heads from") - parser.add_option("--status",dest="statusfile", - help="File to read status from") - parser.add_option("-r","--repo",dest="repourl", - help="URL of repo to import") - parser.add_option("-s",action="store_true",dest="sob", - default=False,help="Enable parsing Signed-off-by lines") - parser.add_option("-A","--authors",dest="authorfile", - help="Read authormap from AUTHORFILE") - parser.add_option("-f","--force",action="store_true",dest="force", - default=False,help="Ignore validation errors by force") - parser.add_option("-M","--default-branch",dest="default_branch", - help="Set the default branch") - parser.add_option("-o","--origin",dest="origin_name", - help="use <name> as namespace to track upstream") - - (options,args)=parser.parse_args() - - m=-1 - if options.max!=None: m=options.max - - if options.marksfile==None: options.marksfile = 'hg-export.marks' - if options.mappingfile==None: options.mappingfile = 'hg-export.mapping' - if options.headsfile==None: options.headsfile = 'hg-export.heads' - if options.statusfile==None: options.statusfile = 'hg-export.status' - if options.repourl==None: bail(parser,'--repo') - - a={} - if options.authorfile!=None: - a=load_authors(options.authorfile) - - if options.default_branch!=None: - set_default_branch(options.default_branch) - - if options.origin_name!=None: - set_origin_name(options.origin_name) - - sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile,options.headsfile, - options.statusfile,authors=a,sob=options.sob,force=options.force)) |