diff options
Diffstat (limited to 'tools/dev/normalize-dump.py')
-rwxr-xr-x | tools/dev/normalize-dump.py | 137 |
1 files changed, 137 insertions, 0 deletions
diff --git a/tools/dev/normalize-dump.py b/tools/dev/normalize-dump.py new file mode 100755 index 0000000..10cde4e --- /dev/null +++ b/tools/dev/normalize-dump.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# + +import sys +import re + +header_re = re.compile(r'^([^:]*): ?(.*)$') + +class NodePath: + def __init__(self, path, headers): + self.path = path + self.headers = headers + + def dump(self): + print((' ' * 3) + self.path) + headers = sorted(self.headers.keys()) + for header in headers: + print((' ' * 6) + header + ': ' + self.headers[header]) + + +def dump_revision(rev, nodepaths): + sys.stderr.write('* Normalizing revision ' + rev + '...') + print('Revision ' + rev) + paths = sorted(nodepaths.keys()) + for path in paths: + nodepath = nodepaths[path] + nodepath.dump() + sys.stderr.write('done\n') + + + +def parse_header_block(fp): + headers = {} + while True: + line = fp.readline() + if line == '': + return headers, 1 + line = line.strip() + if line == '': + return headers, 0 + matches = header_re.match(line) + if not matches: + raise Exception('Malformed header block') + headers[matches.group(1)] = matches.group(2) + + +def parse_file(fp): + nodepaths = {} + current_rev = None + + while True: + # Parse a block of headers + headers, eof = parse_header_block(fp) + + # This is a revision header block + if 'Revision-number' in headers: + + # If there was a previous revision, dump it + if current_rev: + dump_revision(current_rev, nodepaths) + + # Reset the data for this revision + current_rev = headers['Revision-number'] + nodepaths = {} + + # Skip the contents + prop_len = headers.get('Prop-content-length', 0) + fp.read(int(prop_len)) + + # This is a node header block + elif 'Node-path' in headers: + + # Make a new NodePath object, and add it to the + # dictionary thereof + path = headers['Node-path'] + node = NodePath(path, headers) + nodepaths[path] = node + + # Skip the content + text_len = headers.get('Text-content-length', 0) + prop_len = headers.get('Prop-content-length', 0) + fp.read(int(text_len) + int(prop_len)) + + # Not a revision, not a node -- if we've already seen at least + # one revision block, we are in an errorful state. + elif current_rev and len(headers.keys()): + raise Exception('Header block from outta nowhere') + + if eof: + if current_rev: + dump_revision(current_rev, nodepaths) + break + +def usage(): + print('Usage: ' + sys.argv[0] + ' [DUMPFILE]') + print('') + print('Reads a Subversion dumpfile from DUMPFILE (or, if not provided,') + print('from stdin) and normalizes the metadata contained therein,') + print('printing summarized and sorted information. This is useful for') + print('generating data about dumpfiles in a diffable fashion.') + sys.exit(0) + +def main(): + if len(sys.argv) > 1: + if sys.argv[1] == '--help': + usage() + fp = open(sys.argv[1], 'rb') + else: + fp = sys.stdin + parse_file(fp) + + +if __name__ == '__main__': + main() + + + + |