summaryrefslogtreecommitdiff
path: root/tools/dev/normalize-dump.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/dev/normalize-dump.py')
-rwxr-xr-xtools/dev/normalize-dump.py137
1 files changed, 137 insertions, 0 deletions
diff --git a/tools/dev/normalize-dump.py b/tools/dev/normalize-dump.py
new file mode 100755
index 0000000..10cde4e
--- /dev/null
+++ b/tools/dev/normalize-dump.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python
+#
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
+
+import sys
+import re
+
+header_re = re.compile(r'^([^:]*): ?(.*)$')
+
+class NodePath:
+ def __init__(self, path, headers):
+ self.path = path
+ self.headers = headers
+
+ def dump(self):
+ print((' ' * 3) + self.path)
+ headers = sorted(self.headers.keys())
+ for header in headers:
+ print((' ' * 6) + header + ': ' + self.headers[header])
+
+
+def dump_revision(rev, nodepaths):
+ sys.stderr.write('* Normalizing revision ' + rev + '...')
+ print('Revision ' + rev)
+ paths = sorted(nodepaths.keys())
+ for path in paths:
+ nodepath = nodepaths[path]
+ nodepath.dump()
+ sys.stderr.write('done\n')
+
+
+
+def parse_header_block(fp):
+ headers = {}
+ while True:
+ line = fp.readline()
+ if line == '':
+ return headers, 1
+ line = line.strip()
+ if line == '':
+ return headers, 0
+ matches = header_re.match(line)
+ if not matches:
+ raise Exception('Malformed header block')
+ headers[matches.group(1)] = matches.group(2)
+
+
+def parse_file(fp):
+ nodepaths = {}
+ current_rev = None
+
+ while True:
+ # Parse a block of headers
+ headers, eof = parse_header_block(fp)
+
+ # This is a revision header block
+ if 'Revision-number' in headers:
+
+ # If there was a previous revision, dump it
+ if current_rev:
+ dump_revision(current_rev, nodepaths)
+
+ # Reset the data for this revision
+ current_rev = headers['Revision-number']
+ nodepaths = {}
+
+ # Skip the contents
+ prop_len = headers.get('Prop-content-length', 0)
+ fp.read(int(prop_len))
+
+ # This is a node header block
+ elif 'Node-path' in headers:
+
+ # Make a new NodePath object, and add it to the
+ # dictionary thereof
+ path = headers['Node-path']
+ node = NodePath(path, headers)
+ nodepaths[path] = node
+
+ # Skip the content
+ text_len = headers.get('Text-content-length', 0)
+ prop_len = headers.get('Prop-content-length', 0)
+ fp.read(int(text_len) + int(prop_len))
+
+ # Not a revision, not a node -- if we've already seen at least
+ # one revision block, we are in an errorful state.
+ elif current_rev and len(headers.keys()):
+ raise Exception('Header block from outta nowhere')
+
+ if eof:
+ if current_rev:
+ dump_revision(current_rev, nodepaths)
+ break
+
+def usage():
+ print('Usage: ' + sys.argv[0] + ' [DUMPFILE]')
+ print('')
+ print('Reads a Subversion dumpfile from DUMPFILE (or, if not provided,')
+ print('from stdin) and normalizes the metadata contained therein,')
+ print('printing summarized and sorted information. This is useful for')
+ print('generating data about dumpfiles in a diffable fashion.')
+ sys.exit(0)
+
+def main():
+ if len(sys.argv) > 1:
+ if sys.argv[1] == '--help':
+ usage()
+ fp = open(sys.argv[1], 'rb')
+ else:
+ fp = sys.stdin
+ parse_file(fp)
+
+
+if __name__ == '__main__':
+ main()
+
+
+
+