summaryrefslogtreecommitdiff
path: root/tools/server-side/svnpubsub/irkerbridge.py
blob: 04b7ee25db51ad63f8f3cd5f8707c5c9ac994f4a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
#!/usr/bin/env python
#
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# IrkerBridge - Bridge an SvnPubSub stream to Irker.

# Example:
#  irkerbridge.py --daemon --pidfile pid --logfile log config
#
# For detailed option help use:
#  irkerbridge.py --help

# It expects a config file that has the following parameters:
# streams=url
#   Space separated list of URLs to streams.
#   This option should only be in the DEFAULT section, is ignored in
#   all other sections.
# irker=hostname:port
#   The hostname/port combination of the irker daemon.  If port is
#   omitted it defaults to 6659.  Irker is connected to over UDP.
# match=What to use to decide if the commit should be sent to irker.
#   It consists of the repository UUID followed by a slash and a glob pattern.
#   The UUID may be replaced by a * to match all UUIDs. The glob pattern will
#   be matched against all of the dirs_changed.  Both the UUID and the glob
#   pattern must match to send the message to irker.
# to=url
#   Space separated list of URLs (any URL that Irker will accept) to
#   send the resulting message to.  At current Irker only supports IRC.
# template=string
#   A string to use to format the output.  The string is a Python
#   string Template.  The following variables are available:
#   $committer, $id, $date, $repository, $log, $log_firstline,
#   $log_firstparagraph, $dirs_changed, $dirs_count, $dirs_count_s,
#   $subdirs_count, $subdirs_count_s, $dirs_root
#   Most of them should be self explanatory.  $dirs_count is the number of
#   entries in $dirs_changed, $dirs_count_s is a friendly string version,
#   $dirs_root is the common root of all the $dirs_changed, $subdirs_count
#   is the number of subdirs under the $dirs_root that changed,
#   $subdirs_root_s is a friendly string version. $log_firstparagraph cuts
#   the log message at the first blank line and replaces newlines with spaces.
#
# Within the config file you have sections.  Any configuration option
# missing from a given section is found in the [DEFAULT] section.
#
# Section names are arbitrary names that mean nothing to the bridge.  Each
# section other than the [DEFAULT] section consists of a configuration that
# may match and send a message to irker to deliver.  All matching sections
# will generate a message.
#
# Interpolation of values within the config file is allowed by including
# %(name)s within a value.  For example I can reference the UUID of a repo
# repeatedly by doing:
# [DEFAULT]
# ASF_REPO=13f79535-47bb-0310-9956-ffa450edef68
#
# [#commits]
# match=%(ASF_REPO)s/
#
# You can HUP the process to reload the config file without restarting the
# process.  However, you cannot change the streams it is listening to without
# restarting the process.
#
# TODO: Logging in a better way.

# Messages longer than this will be truncated and ... added to the end such
# that the resulting message is no longer than this:
MAX_PRIVMSG = 400

import os
import sys
import posixpath
import socket
import json
import urlparse
import optparse
import ConfigParser
import traceback
import signal
import re
import fnmatch
from string import Template

# Packages that come with svnpubsub
import svnpubsub.client
import daemonize

class Daemon(daemonize.Daemon):
  def __init__(self, logfile, pidfile, bdec):
    daemonize.Daemon.__init__(self, logfile, pidfile)

    self.bdec = bdec

  def setup(self):
    # There is no setup which the parent needs to wait for.
    pass

  def run(self):
    print 'irkerbridge started, pid=%d' % (os.getpid())

    mc = svnpubsub.client.MultiClient(self.bdec.urls,
                                      self.bdec.commit,
                                      self.bdec.event)
    mc.run_forever()


class BigDoEverythingClass(object):
  def __init__(self, config, options):
    self.config = config
    self.options = options
    self.urls = config.get_value('streams').split()

  def locate_matching_configs(self, commit):
    result = [ ]
    for section in self.config.sections():
      match = self.config.get(section, "match").split('/', 1)
      if len(match) < 2:
        # No slash so assume all paths
        match.append('*')
      match_uuid, match_path = match
      if commit.repository == match_uuid or match_uuid == "*":
        for path in commit.changed:
          if fnmatch.fnmatch(path, match_path):
            result.append(section)
            break
    return result

  def _generate_dirs_changed(self, commit):
    if hasattr(commit, 'dirs_changed') or not hasattr(commit, 'changed'):
      return

    dirs_changed = set()
    for p in commit.changed:
      if p[-1] == '/' and commit.changed[p]['flags'][1] == 'U':
        # directory with property changes add the directory itself.
        dirs_changed.add(p)
      else:
        # everything else add the parent of the path
        # directories have a trailing slash so if it's present remove
        # it before finding the parent.  The result will be a directory
        # so it needs a trailing slash
        dirs_changed.add(posixpath.dirname(p.rstrip('/')) + '/')

    commit.dirs_changed = dirs_changed
    return

  def fill_in_extra_args(self, commit):
    # Set any empty members to the string "<null>"
    v = vars(commit)
    for k in v.keys():
      if not v[k]:
        v[k] = '<null>'

    self._generate_dirs_changed(commit)
    # Add entries to the commit object that are useful for
    # formatting.
    commit.log_firstline = commit.log.split("\n",1)[0]
    commit.log_firstparagraph = re.split("\r?\n\r?\n",commit.log,1)[0]
    commit.log_firstparagraph = re.sub("\r?\n"," ",commit.log_firstparagraph)
    if commit.dirs_changed:
      commit.dirs_root = posixpath.commonprefix(commit.dirs_changed)
      if commit.dirs_root == '':
        commit.dirs_root = '/'
      commit.dirs_count = len(commit.dirs_changed)
      if commit.dirs_count > 1:
        commit.dirs_count_s = " (%d dirs)" %(commit.dirs_count)
      else:
        commit.dirs_count_s = ""

      commit.subdirs_count = commit.dirs_count
      if commit.dirs_root in commit.dirs_changed:
        commit.subdirs_count -= 1
      if commit.subdirs_count >= 1:
        commit.subdirs_count_s = " + %d subdirs" % (commit.subdirs_count)
      else:
        commit.subdirs_count_s = ""

  def _send(self, irker, msg):
    sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
    irker_list = irker.split(':')
    if len(irker_list) < 2:
      irker_list.append(6659)
    json_msg = json.dumps(msg)
    sock.sendto(json_msg, (irker_list[0],int(irker_list[1])))
    if self.options.verbose:
      print "SENT: %s to %s" % (json_msg, irker)

  def join_all(self):
    # Like self.commit(), but ignores self.config.get(section, "template").
    for section in self.config.sections():
      irker = self.config.get(section, "irker")
      to_list = self.config.get(section, "to").split()
      if not irker or not to_list:
        continue
      for to in to_list:
        msg = {'to': to, 'privmsg': ''}
        self._send(irker, msg)

  def commit(self, url, commit):
    if self.options.verbose:
      print "RECV: from %s" % url
      print json.dumps(vars(commit), indent=2)

    try:
      config_sections = self.locate_matching_configs(commit)
      if len(config_sections) > 0:
        self.fill_in_extra_args(commit)
        for section in config_sections:
          irker = self.config.get(section, "irker")
          to_list = self.config.get(section, "to").split()
          template = self.config.get(section, "template")
          if not irker or not to_list or not template:
            continue
          privmsg = Template(template).safe_substitute(vars(commit))
          if len(privmsg) > MAX_PRIVMSG:
            privmsg = privmsg[:MAX_PRIVMSG-3] + '...'
          for to in to_list:
            msg = {'to': to, 'privmsg': privmsg}
            self._send(irker, msg)

    except:
      print "Unexpected error:"
      traceback.print_exc()
      sys.stdout.flush()
      raise

  def event(self, url, event_name, event_arg):
    if self.options.verbose or event_name != "ping":
      print 'EVENT: %s from %s' % (event_name, url)
      sys.stdout.flush()



class ReloadableConfig(ConfigParser.SafeConfigParser):
  def __init__(self, fname):
    ConfigParser.SafeConfigParser.__init__(self)

    self.fname = fname
    self.read(fname)

    signal.signal(signal.SIGHUP, self.hangup)

  def hangup(self, signalnum, frame):
    self.reload()

  def reload(self):
    print "RELOAD: config file: %s" % self.fname
    sys.stdout.flush()

    # Delete everything. Just re-reading would overlay, and would not
    # remove sections/options. Note that [DEFAULT] will not be removed.
    for section in self.sections():
      self.remove_section(section)

    # Get rid of [DEFAULT]
    self.remove_section(ConfigParser.DEFAULTSECT)

    # Now re-read the configuration file.
    self.read(self.fname)

  def get_value(self, which):
    return self.get(ConfigParser.DEFAULTSECT, which)


def main(args):
  parser = optparse.OptionParser(
      description='An SvnPubSub client that bridges the data to irker.',
      usage='Usage: %prog [options] CONFIG_FILE',
      )
  parser.add_option('--logfile',
      help='filename for logging')
  parser.add_option('--verbose', action='store_true',
      help="enable verbose logging")
  parser.add_option('--pidfile',
      help="the process' PID will be written to this file")
  parser.add_option('--daemon', action='store_true',
      help='run as a background daemon')

  options, extra = parser.parse_args(args)

  if len(extra) != 1:
    parser.error('CONFIG_FILE is requried')
  config_file = os.path.abspath(extra[0])

  logfile, pidfile = None, None
  if options.daemon:
    if options.logfile:
      logfile = os.path.abspath(options.logfile)
    else:
      parser.error('LOGFILE is required when running as a daemon')

    if options.pidfile:
      pidfile = os.path.abspath(options.pidfile)
    else:
      parser.error('PIDFILE is required when running as a daemon')


  config = ReloadableConfig(config_file)
  bdec = BigDoEverythingClass(config, options)

  d = Daemon(logfile, pidfile, bdec)
  if options.daemon:
    d.daemonize_exit()
  else:
    d.foreground()

if __name__ == "__main__":
  main(sys.argv[1:])