diff options
Diffstat (limited to 'tools/dev/wc-ng/populate-pristine.py')
-rwxr-xr-x | tools/dev/wc-ng/populate-pristine.py | 108 |
1 files changed, 108 insertions, 0 deletions
diff --git a/tools/dev/wc-ng/populate-pristine.py b/tools/dev/wc-ng/populate-pristine.py new file mode 100755 index 0000000..8857371 --- /dev/null +++ b/tools/dev/wc-ng/populate-pristine.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +A script that takes a .svn/pristine/ hierarchy, with its existing +.svn/wc.db database, and populates the database's PRISTINE table +accordingly. (Use 'svn cleanup' to remove unreferenced pristines.) + +Usage: + + %s /path/to/wc [...] +""" + +# TODO: resolve the NotImplemented() in __main__ + +# TODO: increment refcount upon collision +# TODO: add <given file>, not just argv[1]/.svn/pristine/??/* + +import hashlib +import os +import re +import sqlite3 +import sys + +# ### This could require any other format that has the same PRISTINE schema +# ### and semantics. +FORMAT = 22 +BUFFER_SIZE = 4 * 1024 + +class UnknownFormat(Exception): + def __init__(self, formatno): + self.formatno = formatno + +def open_db(wc_path): + wc_db = os.path.join(wc_path, '.svn', 'wc.db') + conn = sqlite3.connect(wc_db) + curs = conn.cursor() + curs.execute('pragma user_version;') + formatno = int(curs.fetchone()[0]) + if formatno > FORMAT: + raise UnknownFormat(formatno) + return conn + +_sha1_re = re.compile(r'^[0-9a-f]{40}$') + +def md5_of(path): + fd = os.open(path, os.O_RDONLY) + ctx = hashlib.md5() + while True: + s = os.read(fd, BUFFER_SIZE) + if len(s): + ctx.update(s) + else: + os.close(fd) + return ctx.hexdigest() + +INSERT_QUERY = """ + INSERT OR REPLACE + INTO pristine(checksum,compression,size,refcount,md5_checksum) + VALUES (?,?,?,?,?) +""" + +def populate(wc_path): + conn = open_db(wc_path) + sys.stdout.write("Updating '%s': " % wc_path) + for dirname, dirs, files in os.walk(os.path.join(wc_path, '.svn/pristine/')): + # skip everything but .svn/pristine/xx/ + if os.path.basename(os.path.dirname(dirname)) == 'pristine': + sys.stdout.write("'%s', " % os.path.basename(dirname)) + for f in filter(lambda x: _sha1_re.match(x), files): + fullpath = os.path.join(dirname, f) + conn.execute(INSERT_QUERY, + ('$sha1$'+f, None, os.stat(fullpath).st_size, 1, + '$md5 $'+md5_of(fullpath))) + # periodic transaction commits, for efficiency + conn.commit() + else: + sys.stdout.write(".\n") + +if __name__ == '__main__': + raise NotImplemented("""Subversion does not know yet to avoid fetching + a file when a file with matching sha1 appears in the PRISTINE table.""") + + paths = sys.argv[1:] + if not paths: + paths = ['.'] + for wc_path in paths: + try: + populate(wc_path) + except UnknownFormat, e: + sys.stderr.write("Don't know how to handle '%s' (format %d)'\n" + % (wc_path, e.formatno)) |