summaryrefslogtreecommitdiff
path: root/tools/dev/mlpatch.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/dev/mlpatch.py')
-rwxr-xr-xtools/dev/mlpatch.py167
1 files changed, 167 insertions, 0 deletions
diff --git a/tools/dev/mlpatch.py b/tools/dev/mlpatch.py
new file mode 100755
index 0000000..d74d820
--- /dev/null
+++ b/tools/dev/mlpatch.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python
+#
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
+
+# mlpatch.py: Run with no arguments for usage
+
+import sys, os
+import sgmllib
+try:
+ # Python >=3.0
+ from html.entities import entitydefs
+ from urllib.request import urlopen as urllib_request_urlopen
+except ImportError:
+ # Python <3.0
+ from htmlentitydefs import entitydefs
+ from urllib2 import urlopen as urllib_request_urlopen
+import fileinput
+
+CHUNKSIZE = 8 * 1024
+
+class MyParser(sgmllib.SGMLParser):
+ def __init__(self):
+ self.baseclass = sgmllib.SGMLParser
+ self.baseclass.__init__(self)
+ self.entitydefs = entitydefs
+ self.entitydefs["nbsp"] = " "
+ self.inbody = False
+ self.complete_line = False
+ self.discard_gathered()
+
+ def discard_gathered(self):
+ self.gather_data = False
+ self.gathered_data = ""
+
+ def noop(self):
+ pass
+
+ def out(self, data):
+ sys.stdout.write(data)
+
+ def handle_starttag(self, tag, method, attrs):
+ if not self.inbody: return
+ self.baseclass.handle_starttag(self, tag, method, attrs)
+
+ def handle_endtag(self, tag, method):
+ if not self.inbody: return
+ self.baseclass.handle_endtag(self, tag, method)
+
+ def handle_data(self, data):
+ if not self.inbody: return
+ data = data.replace('\n','')
+ if len(data) == 0: return
+ if self.gather_data:
+ self.gathered_data += data
+ else:
+ if self.complete_line:
+ if data[0] in ('+', '-', ' ', '#') \
+ or data.startswith("Index:") \
+ or data.startswith("@@ ") \
+ or data.startswith("======"):
+ # Real new line
+ self.out('\n')
+ else:
+ # Presume that we are wrapped
+ self.out(' ')
+ self.complete_line = False
+ self.out(data)
+
+ def handle_charref(self, ref):
+ if not self.inbody: return
+ self.baseclass.handle_charref(self, ref)
+
+ def handle_entityref(self, ref):
+ if not self.inbody: return
+ self.baseclass.handle_entityref(self, ref)
+
+ def handle_comment(self, comment):
+ if comment == ' body="start" ':
+ self.inbody = True
+ elif comment == ' body="end" ':
+ self.inbody = False
+
+ def handle_decl(self, data):
+ if not self.inbody: return
+ print("DECL: " + data)
+
+ def unknown_starttag(self, tag, attrs):
+ if not self.inbody: return
+ print("UNKTAG: %s %s" % (tag, attrs))
+
+ def unknown_endtag(self, tag):
+ if not self.inbody: return
+ print("UNKTAG: /%s" % (tag))
+
+ def do_br(self, attrs):
+ self.complete_line = True
+
+ def do_p(self, attrs):
+ if self.complete_line:
+ self.out('\n')
+ self.out(' ')
+ self.complete_line = True
+
+ def start_a(self, attrs):
+ self.gather_data = True
+
+ def end_a(self):
+ self.out(self.gathered_data.replace('_at_', '@'))
+ self.discard_gathered()
+
+ def close(self):
+ if self.complete_line:
+ self.out('\n')
+ self.baseclass.close(self)
+
+
+def main():
+ if len(sys.argv) == 1:
+ sys.stderr.write(
+ "usage: mlpatch.py dev|users year month msgno > foobar.patch\n" +
+ "example: mlpatch.py dev 2005 01 0001 > issue-XXXX.patch\n" +
+ """
+ Very annoyingly, the http://svn.haxx.se/ subversion mailing list archives
+ mangle inline patches, and provide no raw message download facility
+ (other than for an entire month's email as an mbox).
+
+ So, I wrote this script, to demangle them. It's not perfect, as it has to
+ guess about whitespace, but it does an acceptable job.\n""")
+ sys.exit(0)
+ elif len(sys.argv) != 5:
+ sys.stderr.write("error: mlpatch.py: Bad parameters - run with no "
+ + "parameters for usage\n")
+ sys.exit(1)
+ else:
+ list, year, month, msgno = sys.argv[1:]
+ url = "http://svn.haxx.se/" \
+ + "%(list)s/archive-%(year)s-%(month)s/%(msgno)s.shtml" % locals()
+ print("MsgUrl: " + url)
+ msgfile = urllib_request_urlopen(url)
+ p = MyParser()
+ buffer = msgfile.read(CHUNKSIZE)
+ while buffer:
+ p.feed(buffer)
+ buffer = msgfile.read(CHUNKSIZE)
+ p.close()
+ msgfile.close()
+
+if __name__ == '__main__':
+ main()