summaryrefslogtreecommitdiff
path: root/tools/dev/mlpatch.py
blob: d74d820cb8392d251215d3826751349a076d2495 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#!/usr/bin/env python
#
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
#

# mlpatch.py: Run with no arguments for usage

import sys, os
import sgmllib
try:
  # Python >=3.0
  from html.entities import entitydefs
  from urllib.request import urlopen as urllib_request_urlopen
except ImportError:
  # Python <3.0
  from htmlentitydefs import entitydefs
  from urllib2 import urlopen as urllib_request_urlopen
import fileinput

CHUNKSIZE = 8 * 1024

class MyParser(sgmllib.SGMLParser):
  def __init__(self):
    self.baseclass = sgmllib.SGMLParser
    self.baseclass.__init__(self)
    self.entitydefs = entitydefs
    self.entitydefs["nbsp"] = " "
    self.inbody = False
    self.complete_line = False
    self.discard_gathered()

  def discard_gathered(self):
    self.gather_data = False
    self.gathered_data = ""

  def noop(self):
    pass

  def out(self, data):
    sys.stdout.write(data)

  def handle_starttag(self, tag, method, attrs):
    if not self.inbody: return
    self.baseclass.handle_starttag(self, tag, method, attrs)

  def handle_endtag(self, tag, method):
    if not self.inbody: return
    self.baseclass.handle_endtag(self, tag, method)

  def handle_data(self, data):
    if not self.inbody: return
    data = data.replace('\n','')
    if len(data) == 0: return
    if self.gather_data:
      self.gathered_data += data
    else:
      if self.complete_line:
        if data[0] in ('+', '-', ' ', '#') \
            or data.startswith("Index:") \
            or data.startswith("@@ ") \
            or data.startswith("======"):
          # Real new line
          self.out('\n')
        else:
          # Presume that we are wrapped
          self.out(' ')
      self.complete_line = False
      self.out(data)

  def handle_charref(self, ref):
    if not self.inbody: return
    self.baseclass.handle_charref(self, ref)

  def handle_entityref(self, ref):
    if not self.inbody: return
    self.baseclass.handle_entityref(self, ref)

  def handle_comment(self, comment):
    if comment == ' body="start" ':
      self.inbody = True
    elif comment == ' body="end" ':
      self.inbody = False

  def handle_decl(self, data):
    if not self.inbody: return
    print("DECL: " + data)

  def unknown_starttag(self, tag, attrs):
    if not self.inbody: return
    print("UNKTAG: %s %s" % (tag, attrs))

  def unknown_endtag(self, tag):
    if not self.inbody: return
    print("UNKTAG: /%s" % (tag))

  def do_br(self, attrs):
    self.complete_line = True

  def do_p(self, attrs):
    if self.complete_line:
      self.out('\n')
    self.out(' ')
    self.complete_line = True

  def start_a(self, attrs):
    self.gather_data = True

  def end_a(self):
    self.out(self.gathered_data.replace('_at_', '@'))
    self.discard_gathered()

  def close(self):
    if self.complete_line:
      self.out('\n')
    self.baseclass.close(self)


def main():
  if len(sys.argv) == 1:
    sys.stderr.write(
    "usage:   mlpatch.py dev|users year month msgno > foobar.patch\n" +
    "example: mlpatch.py dev 2005 01 0001 > issue-XXXX.patch\n" +
    """
    Very annoyingly, the http://svn.haxx.se/ subversion mailing list archives
    mangle inline patches, and provide no raw message download facility
    (other than for an entire month's email as an mbox).

    So, I wrote this script, to demangle them. It's not perfect, as it has to
    guess about whitespace, but it does an acceptable job.\n""")
    sys.exit(0)
  elif len(sys.argv) != 5:
    sys.stderr.write("error: mlpatch.py: Bad parameters - run with no "
    + "parameters for usage\n")
    sys.exit(1)
  else:
    list, year, month, msgno = sys.argv[1:]
    url = "http://svn.haxx.se/" \
        + "%(list)s/archive-%(year)s-%(month)s/%(msgno)s.shtml" % locals()
    print("MsgUrl: " + url)
    msgfile = urllib_request_urlopen(url)
    p = MyParser()
    buffer = msgfile.read(CHUNKSIZE)
    while buffer:
      p.feed(buffer)
      buffer = msgfile.read(CHUNKSIZE)
    p.close()
    msgfile.close()

if __name__ == '__main__':
  main()