diff options
Diffstat (limited to 'tools/dev/mlpatch.py')
-rwxr-xr-x | tools/dev/mlpatch.py | 167 |
1 files changed, 167 insertions, 0 deletions
diff --git a/tools/dev/mlpatch.py b/tools/dev/mlpatch.py new file mode 100755 index 0000000..d74d820 --- /dev/null +++ b/tools/dev/mlpatch.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# + +# mlpatch.py: Run with no arguments for usage + +import sys, os +import sgmllib +try: + # Python >=3.0 + from html.entities import entitydefs + from urllib.request import urlopen as urllib_request_urlopen +except ImportError: + # Python <3.0 + from htmlentitydefs import entitydefs + from urllib2 import urlopen as urllib_request_urlopen +import fileinput + +CHUNKSIZE = 8 * 1024 + +class MyParser(sgmllib.SGMLParser): + def __init__(self): + self.baseclass = sgmllib.SGMLParser + self.baseclass.__init__(self) + self.entitydefs = entitydefs + self.entitydefs["nbsp"] = " " + self.inbody = False + self.complete_line = False + self.discard_gathered() + + def discard_gathered(self): + self.gather_data = False + self.gathered_data = "" + + def noop(self): + pass + + def out(self, data): + sys.stdout.write(data) + + def handle_starttag(self, tag, method, attrs): + if not self.inbody: return + self.baseclass.handle_starttag(self, tag, method, attrs) + + def handle_endtag(self, tag, method): + if not self.inbody: return + self.baseclass.handle_endtag(self, tag, method) + + def handle_data(self, data): + if not self.inbody: return + data = data.replace('\n','') + if len(data) == 0: return + if self.gather_data: + self.gathered_data += data + else: + if self.complete_line: + if data[0] in ('+', '-', ' ', '#') \ + or data.startswith("Index:") \ + or data.startswith("@@ ") \ + or data.startswith("======"): + # Real new line + self.out('\n') + else: + # Presume that we are wrapped + self.out(' ') + self.complete_line = False + self.out(data) + + def handle_charref(self, ref): + if not self.inbody: return + self.baseclass.handle_charref(self, ref) + + def handle_entityref(self, ref): + if not self.inbody: return + self.baseclass.handle_entityref(self, ref) + + def handle_comment(self, comment): + if comment == ' body="start" ': + self.inbody = True + elif comment == ' body="end" ': + self.inbody = False + + def handle_decl(self, data): + if not self.inbody: return + print("DECL: " + data) + + def unknown_starttag(self, tag, attrs): + if not self.inbody: return + print("UNKTAG: %s %s" % (tag, attrs)) + + def unknown_endtag(self, tag): + if not self.inbody: return + print("UNKTAG: /%s" % (tag)) + + def do_br(self, attrs): + self.complete_line = True + + def do_p(self, attrs): + if self.complete_line: + self.out('\n') + self.out(' ') + self.complete_line = True + + def start_a(self, attrs): + self.gather_data = True + + def end_a(self): + self.out(self.gathered_data.replace('_at_', '@')) + self.discard_gathered() + + def close(self): + if self.complete_line: + self.out('\n') + self.baseclass.close(self) + + +def main(): + if len(sys.argv) == 1: + sys.stderr.write( + "usage: mlpatch.py dev|users year month msgno > foobar.patch\n" + + "example: mlpatch.py dev 2005 01 0001 > issue-XXXX.patch\n" + + """ + Very annoyingly, the http://svn.haxx.se/ subversion mailing list archives + mangle inline patches, and provide no raw message download facility + (other than for an entire month's email as an mbox). + + So, I wrote this script, to demangle them. It's not perfect, as it has to + guess about whitespace, but it does an acceptable job.\n""") + sys.exit(0) + elif len(sys.argv) != 5: + sys.stderr.write("error: mlpatch.py: Bad parameters - run with no " + + "parameters for usage\n") + sys.exit(1) + else: + list, year, month, msgno = sys.argv[1:] + url = "http://svn.haxx.se/" \ + + "%(list)s/archive-%(year)s-%(month)s/%(msgno)s.shtml" % locals() + print("MsgUrl: " + url) + msgfile = urllib_request_urlopen(url) + p = MyParser() + buffer = msgfile.read(CHUNKSIZE) + while buffer: + p.feed(buffer) + buffer = msgfile.read(CHUNKSIZE) + p.close() + msgfile.close() + +if __name__ == '__main__': + main() |