1 files changed, 137 insertions, 0 deletions
diff --git a/tools/dev/normalize-dump.py b/tools/dev/normalize-dump.py
new file mode 100755
index 0000000..10cde4e
--- /dev/null
+++ b/tools/dev/normalize-dump.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python
+#
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
+
+import sys
+import re
+
+header_re = re.compile(r'^([^:]*): ?(.*)$')
+
+class NodePath:
+    def __init__(self, path, headers):
+        self.path = path
+        self.headers = headers
+
+    def dump(self):
+        print((' ' * 3) + self.path)
+        headers = sorted(self.headers.keys())
+        for header in headers:
+            print((' ' * 6) + header + ': ' + self.headers[header])
+
+
+def dump_revision(rev, nodepaths):
+    sys.stderr.write('* Normalizing revision ' + rev + '...')
+    print('Revision ' + rev)
+    paths = sorted(nodepaths.keys())
+    for path in paths:
+        nodepath = nodepaths[path]
+        nodepath.dump()
+    sys.stderr.write('done\n')
+
+
+
+def parse_header_block(fp):
+    headers = {}
+    while True:
+        line = fp.readline()
+        if line == '':
+            return headers, 1
+        line = line.strip()
+        if line == '':
+            return headers, 0
+        matches = header_re.match(line)
+        if not matches:
+            raise Exception('Malformed header block')
+        headers[matches.group(1)] = matches.group(2)
+
+
+def parse_file(fp):
+    nodepaths = {}
+    current_rev = None
+
+    while True:
+        # Parse a block of headers
+        headers, eof = parse_header_block(fp)
+
+        # This is a revision header block
+        if 'Revision-number' in headers:
+
+            # If there was a previous revision, dump it
+            if current_rev:
+                dump_revision(current_rev, nodepaths)
+
+            # Reset the data for this revision
+            current_rev = headers['Revision-number']
+            nodepaths = {}
+
+            # Skip the contents
+            prop_len = headers.get('Prop-content-length', 0)
+            fp.read(int(prop_len))
+
+        # This is a node header block
+        elif 'Node-path' in headers:
+
+            # Make a new NodePath object, and add it to the
+            # dictionary thereof
+            path = headers['Node-path']
+            node = NodePath(path, headers)
+            nodepaths[path] = node
+
+            # Skip the content
+            text_len = headers.get('Text-content-length', 0)
+            prop_len = headers.get('Prop-content-length', 0)
+            fp.read(int(text_len) + int(prop_len))
+
+        # Not a revision, not a node -- if we've already seen at least
+        # one revision block, we are in an errorful state.
+        elif current_rev and len(headers.keys()):
+            raise Exception('Header block from outta nowhere')
+
+        if eof:
+            if current_rev:
+                dump_revision(current_rev, nodepaths)
+            break
+
+def usage():
+    print('Usage: ' + sys.argv[0] + ' [DUMPFILE]')
+    print('')
+    print('Reads a Subversion dumpfile from DUMPFILE (or, if not provided,')
+    print('from stdin) and normalizes the metadata contained therein,')
+    print('printing summarized and sorted information.  This is useful for')
+    print('generating data about dumpfiles in a diffable fashion.')
+    sys.exit(0)
+
+def main():
+    if len(sys.argv) > 1:
+        if sys.argv[1] == '--help':
+            usage()
+        fp = open(sys.argv[1], 'rb')
+    else:
+        fp = sys.stdin
+    parse_file(fp)
+
+
+if __name__ == '__main__':
+    main()
+
+
+
+