summaryrefslogtreecommitdiff
path: root/tools/dev/wc-ng/populate-pristine.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/dev/wc-ng/populate-pristine.py')
-rwxr-xr-xtools/dev/wc-ng/populate-pristine.py108
1 files changed, 108 insertions, 0 deletions
diff --git a/tools/dev/wc-ng/populate-pristine.py b/tools/dev/wc-ng/populate-pristine.py
new file mode 100755
index 0000000..8857371
--- /dev/null
+++ b/tools/dev/wc-ng/populate-pristine.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+A script that takes a .svn/pristine/ hierarchy, with its existing
+.svn/wc.db database, and populates the database's PRISTINE table
+accordingly. (Use 'svn cleanup' to remove unreferenced pristines.)
+
+Usage:
+
+ %s /path/to/wc [...]
+"""
+
+# TODO: resolve the NotImplemented() in __main__
+
+# TODO: increment refcount upon collision
+# TODO: add <given file>, not just argv[1]/.svn/pristine/??/*
+
+import hashlib
+import os
+import re
+import sqlite3
+import sys
+
+# ### This could require any other format that has the same PRISTINE schema
+# ### and semantics.
+FORMAT = 22
+BUFFER_SIZE = 4 * 1024
+
+class UnknownFormat(Exception):
+ def __init__(self, formatno):
+ self.formatno = formatno
+
+def open_db(wc_path):
+ wc_db = os.path.join(wc_path, '.svn', 'wc.db')
+ conn = sqlite3.connect(wc_db)
+ curs = conn.cursor()
+ curs.execute('pragma user_version;')
+ formatno = int(curs.fetchone()[0])
+ if formatno > FORMAT:
+ raise UnknownFormat(formatno)
+ return conn
+
+_sha1_re = re.compile(r'^[0-9a-f]{40}$')
+
+def md5_of(path):
+ fd = os.open(path, os.O_RDONLY)
+ ctx = hashlib.md5()
+ while True:
+ s = os.read(fd, BUFFER_SIZE)
+ if len(s):
+ ctx.update(s)
+ else:
+ os.close(fd)
+ return ctx.hexdigest()
+
+INSERT_QUERY = """
+ INSERT OR REPLACE
+ INTO pristine(checksum,compression,size,refcount,md5_checksum)
+ VALUES (?,?,?,?,?)
+"""
+
+def populate(wc_path):
+ conn = open_db(wc_path)
+ sys.stdout.write("Updating '%s': " % wc_path)
+ for dirname, dirs, files in os.walk(os.path.join(wc_path, '.svn/pristine/')):
+ # skip everything but .svn/pristine/xx/
+ if os.path.basename(os.path.dirname(dirname)) == 'pristine':
+ sys.stdout.write("'%s', " % os.path.basename(dirname))
+ for f in filter(lambda x: _sha1_re.match(x), files):
+ fullpath = os.path.join(dirname, f)
+ conn.execute(INSERT_QUERY,
+ ('$sha1$'+f, None, os.stat(fullpath).st_size, 1,
+ '$md5 $'+md5_of(fullpath)))
+ # periodic transaction commits, for efficiency
+ conn.commit()
+ else:
+ sys.stdout.write(".\n")
+
+if __name__ == '__main__':
+ raise NotImplemented("""Subversion does not know yet to avoid fetching
+ a file when a file with matching sha1 appears in the PRISTINE table.""")
+
+ paths = sys.argv[1:]
+ if not paths:
+ paths = ['.']
+ for wc_path in paths:
+ try:
+ populate(wc_path)
+ except UnknownFormat, e:
+ sys.stderr.write("Don't know how to handle '%s' (format %d)'\n"
+ % (wc_path, e.formatno))