1 files changed, 454 insertions, 0 deletions
diff --git a/tools/dev/iz/find-fix.py b/tools/dev/iz/find-fix.py
new file mode 100755
index 0000000..513ccad
--- /dev/null
+++ b/tools/dev/iz/find-fix.py
@@ -0,0 +1,454 @@
+#!/usr/bin/env python
+#
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
+# -*- Python -*-
+"""find-fix.py: produce a find/fix report for Subversion's IZ database
+
+For simple text summary:
+       find-fix.py query-set-1.tsv YYYY-MM-DD YYYY-MM-DD
+Statistics will be printed for bugs found or fixed within the
+time frame.
+
+For gnuplot presentation:
+       find-fix.py query-set-1.tsv outfile
+Gnuplot provides its own way to select date ranges.
+
+Either way, get a query-set-1.tsv from:
+  http://subversion.tigris.org/iz-data/query-set-1.tsv  (updated nightly)
+See http://subversion.tigris.org/iz-data/README for more info on that file.
+
+For more usage info on this script:
+        find-fix.py --help
+"""
+
+_version = "$Revision:"
+
+#
+# This can be run over the data file found at:
+#   http://subversion.tigris.org/iz-data/query-set-1.tsv
+#
+
+import getopt
+try:
+  my_getopt = getopt.gnu_getopt
+except AttributeError:
+  my_getopt = getopt.getopt
+import operator
+import os
+import os.path
+import pydoc
+import re
+try:
+  # Python >=2.6
+  from functools import reduce
+except ImportError:
+  # Python <2.6
+  pass
+import sys
+import time
+
+me = os.path.basename(sys.argv[0])
+
+# Long options and their usage strings; "=" means it takes an argument.
+# To get a list suitable for getopt, just do
+#
+#   [x[0] for x in long_opts]
+#
+# Make sure to sacrifice a lamb to Guido for each element of the list.
+long_opts = [
+  ["milestones=",      """Optional, milestones NOT to report on
+        (one or more of Beta, 1.0, Post-1.0, cvs2svn-1.0, cvs2svn-opt,
+        inapplicable)"""],
+  ["update",          """Optional, update the statistics first."""],
+  ["doc",             """Optional, print pydocs."""],
+  ["help",            """Optional, print usage (this text)."""],
+  ["verbose",         """Optional, print more progress messages."""],
+  ]
+
+help    = 0
+verbose = 0
+update  = 0
+
+DATA_FILE = "http://subversion.tigris.org/iz-data/query-set-1.tsv"
+ONE_WEEK = 7 * 24 * 60 * 60
+
+_types = []
+_milestone_filter = []
+
+noncore_milestone_filter = [
+  'Post-1.0',
+  '1.1',
+  'cvs2svn-1.0',
+  'cvs2svn-opt',
+  'inapplicable',
+  'no milestone',
+  ]
+
+one_point_oh_milestone_filter = noncore_milestone_filter + []
+
+beta_milestone_filter = one_point_oh_milestone_filter + ['1.0']
+
+
+_types = [
+  'DEFECT',
+  'TASK',
+  'FEATURE',
+  'ENHANCEMENT',
+  'PATCH',
+  ]
+
+
+def main():
+  """Report bug find/fix rate statistics for Subversion."""
+
+  global verbose
+  global update
+  global _types
+  global _milestone_filter
+  global noncore_milestone_filter
+
+  try:
+      opts, args = my_getopt(sys.argv[1:], "", [x[0] for x in long_opts])
+  except getopt.GetoptError, e:
+      sys.stderr.write("Error: %s\n" % e.msg)
+      shortusage()
+      sys.stderr.write("%s --help for options.\n" % me)
+      sys.exit(1)
+
+  for opt, arg in opts:
+    if opt == "--help":
+      usage()
+      sys.exit(0)
+    elif opt == "--verbose":
+      verbose = 1
+    elif opt == "--milestones":
+      for mstone in arg.split(","):
+        if mstone == "noncore":
+          _milestone_filter = noncore_milestone_filter
+        elif mstone == "beta":
+          _milestone_filter = beta_milestone_filter
+        elif mstone == "one":
+          _milestone_filter = one_point_oh_milestone_filter
+        elif mstone[0] == '-':
+          if mstone[1:] in _milestone_filter:
+            spot = _milestone_filter.index(mstone[1:])
+            _milestone_filter = _milestone_filter[:spot] \
+                                + _milestone_filter[(spot+1):]
+        else:
+          _milestone_filter += [mstone]
+
+    elif opt == "--update":
+      update = 1
+    elif opt == "--doc":
+      pydoc.doc(pydoc.importfile(sys.argv[0]))
+      sys.exit(0)
+
+  if len(_milestone_filter) == 0:
+    _milestone_filter = noncore_milestone_filter
+
+  if verbose:
+    sys.stderr.write("%s: Filtering out milestones %s.\n"
+                     % (me, ", ".join(_milestone_filter)))
+
+  if len(args) == 2:
+    if verbose:
+      sys.stderr.write("%s: Generating gnuplot data.\n" % me)
+    if update:
+      if verbose:
+        sys.stderr.write("%s: Updating %s from %s.\n" % (me, args[0], DATA_FILE))
+      if os.system("curl " + DATA_FILE + "> " + args[0]):
+        os.system("wget " + DATA_FILE)
+    plot(args[0], args[1])
+
+  elif len(args) == 3:
+    if verbose:
+      sys.stderr.write("%s: Generating summary from %s to %s.\n"
+                       % (me, args[1], args[2]))
+    if update:
+      if verbose:
+        sys.stderr.write("%s: Updating %s from %s.\n" % (me, args[0], DATA_FILE))
+      if os.system("curl " + DATA_FILE + "> " + args[0]):
+        os.system("wget " + DATA_FILE)
+
+    try:
+      t_start = parse_time(args[1] + " 00:00:00")
+    except ValueError:
+      sys.stderr.write('%s: ERROR: bad time value: %s\n' % (me, args[1]))
+      sys.exit(1)
+
+    try:
+      t_end = parse_time(args[2] + " 00:00:00")
+    except ValueError:
+      sys.stderr.write('%s: ERROR: bad time value: %s\n' % (me, args[2]))
+      sys.exit(1)
+
+    summary(args[0], t_start, t_end)
+  else:
+    usage()
+
+  sys.exit(0)
+
+
+def summary(datafile, d_start, d_end):
+  "Prints a summary of activity within a specified date range."
+
+  data = load_data(datafile)
+
+  # activity during the requested period
+  found, fixed, inval, dup, other = extract(data, 1, d_start, d_end)
+
+  # activity from the beginning of time to the end of the request
+  # used to compute remaining
+  # XXX It would be faster to change extract to collect this in one
+  # pass.  But we don't presently have enough data, nor use this
+  # enough, to justify that rework.
+  fromzerofound, fromzerofixed, fromzeroinval, fromzerodup, fromzeroother \
+              = extract(data, 1, 0, d_end)
+
+  alltypes_found = alltypes_fixed = alltypes_inval = alltypes_dup \
+                   = alltypes_other = alltypes_rem = 0
+  for t in _types:
+    fromzerorem_t = fromzerofound[t]\
+                    - (fromzerofixed[t] + fromzeroinval[t] + fromzerodup[t]
+                       + fromzeroother[t])
+    print('%12s: found=%3d  fixed=%3d  inval=%3d  dup=%3d  ' \
+          'other=%3d  remain=%3d' \
+          % (t, found[t], fixed[t], inval[t], dup[t], other[t], fromzerorem_t))
+    alltypes_found = alltypes_found + found[t]
+    alltypes_fixed = alltypes_fixed + fixed[t]
+    alltypes_inval = alltypes_inval + inval[t]
+    alltypes_dup   = alltypes_dup   + dup[t]
+    alltypes_other = alltypes_other + other[t]
+    alltypes_rem   = alltypes_rem + fromzerorem_t
+
+  print('-' * 77)
+  print('%12s: found=%3d  fixed=%3d  inval=%3d  dup=%3d  ' \
+        'other=%3d  remain=%3d' \
+        % ('totals', alltypes_found, alltypes_fixed, alltypes_inval,
+           alltypes_dup, alltypes_other, alltypes_rem))
+  # print '%12s  find/fix ratio: %g%%' \
+  #      % (" "*12, (alltypes_found*100.0/(alltypes_fixed
+  #         + alltypes_inval + alltypes_dup + alltypes_other)))
+
+
+def plot(datafile, outbase):
+  "Generates data files intended for use by gnuplot."
+
+  global _types
+
+  data = load_data(datafile)
+
+  t_min = 1L<<32
+  for issue in data:
+    if issue.created < t_min:
+      t_min = issue.created
+
+  # break the time up into a tuple, then back up to Sunday
+  t_start = time.localtime(t_min)
+  t_start = time.mktime((t_start[0], t_start[1], t_start[2] - t_start[6] - 1,
+                         0, 0, 0, 0, 0, 0))
+
+  plots = { }
+  for t in _types:
+    # for each issue type, we will record per-week stats, compute a moving
+    # average of the find/fix delta, and track the number of open issues
+    plots[t] = [ [ ], MovingAverage(), 0 ]
+
+  week = 0
+  for date in range(t_start, time.time(), ONE_WEEK):
+    ### this is quite inefficient, as we could just sort by date, but
+    ### I'm being lazy
+    found, fixed = extract(data, None, date, date + ONE_WEEK - 1)
+
+    for t in _types:
+      per_week, avg, open_issues = plots[t]
+      delta = found[t] - fixed[t]
+      per_week.append((week, date,
+                       found[t], -fixed[t], avg.add(delta), open_issues))
+      plots[t][2] = open_issues + delta
+
+    week = week + 1
+
+  for t in _types:
+    week_data = plots[t][0]
+    write_file(week_data, outbase, t, 'found', 2)
+    write_file(week_data, outbase, t, 'fixed', 3)
+    write_file(week_data, outbase, t, 'avg', 4)
+    write_file(week_data, outbase, t, 'open', 5)
+
+def write_file(week_data, base, type, tag, idx):
+  f = open('%s.%s.%s' % (base, tag, type), 'w')
+  for info in week_data:
+    f.write('%s %s # %s\n' % (info[0], info[idx], time.ctime(info[1])))
+
+
+class MovingAverage:
+  "Helper class to compute moving averages."
+  def __init__(self, n=4):
+    self.n = n
+    self.data = [ 0 ] * n
+  def add(self, value):
+    self.data.pop(0)
+    self.data.append(float(value) / self.n)
+    return self.avg()
+  def avg(self):
+    return reduce(operator.add, self.data)
+
+
+def extract(data, details, d_start, d_end):
+  """Extract found/fixed counts for each issue type within the data range.
+
+  If DETAILS is false, then return two dictionaries:
+
+    found, fixed
+
+  ...each mapping issue types to the number of issues of that type
+  found or fixed respectively.
+
+  If DETAILS is true, return five dictionaries:
+
+    found, fixed, invalid, duplicate, other
+
+  The first is still the found issues, but the other four break down
+  the resolution into 'FIXED', 'INVALID', 'DUPLICATE', and a grab-bag
+  category for 'WORKSFORME', 'LATER', 'REMIND', and 'WONTFIX'."""
+
+  global _types
+  global _milestone_filter
+
+  found = { }
+  fixed = { }
+  invalid = { }
+  duplicate = { }
+  other = { }  # "WORKSFORME", "LATER", "REMIND", and "WONTFIX"
+
+  for t in _types:
+    found[t] = fixed[t] = invalid[t] = duplicate[t] = other[t] = 0
+
+  for issue in data:
+    # filter out disrespected milestones
+    if issue.milestone in _milestone_filter:
+      continue
+
+    # record the found/fixed counts
+    if d_start <= issue.created <= d_end:
+      found[issue.type] = found[issue.type] + 1
+    if d_start <= issue.resolved <= d_end:
+      if details:
+        if issue.resolution == "FIXED":
+          fixed[issue.type] = fixed[issue.type] + 1
+        elif issue.resolution == "INVALID":
+          invalid[issue.type] = invalid[issue.type] + 1
+        elif issue.resolution == "DUPLICATE":
+          duplicate[issue.type] = duplicate[issue.type] + 1
+        else:
+          other[issue.type] = other[issue.type] + 1
+      else:
+        fixed[issue.type] = fixed[issue.type] + 1
+
+  if details:
+    return found, fixed, invalid, duplicate, other
+  else:
+    return found, fixed
+
+
+def load_data(datafile):
+  "Return a list of Issue objects for the specified data."
+  return list(map(Issue, open(datafile).readlines()))
+
+
+class Issue:
+  "Represents a single issue from the exported IssueZilla data."
+
+  def __init__(self, line):
+    row = line.strip().split('\t')
+
+    self.id = int(row[0])
+    self.type = row[1]
+    self.reporter = row[2]
+    if row[3] == 'NULL':
+      self.assigned = None
+    else:
+      self.assigned = row[3]
+    self.milestone = row[4]
+    self.created = parse_time(row[5])
+    self.resolution = row[7]
+    if not self.resolution:
+      # If the resolution is empty, then force the resolved date to None.
+      # When an issue is reopened, there will still be activity showing
+      # a "RESOLVED", thus we get a resolved date. But we simply want to
+      # ignore that date.
+      self.resolved = None
+    else:
+      self.resolved = parse_time(row[6])
+    self.summary = row[8]
+
+
+parse_time_re = re.compile('([0-9]{4})-([0-9]{2})-([0-9]{2}) '
+                           '([0-9]{2}):([0-9]{2}):([0-9]{2})')
+
+def parse_time(t):
+  "Convert an exported MySQL timestamp into seconds since the epoch."
+
+  global parse_time_re
+
+  if t == 'NULL':
+    return None
+  try:
+    matches = parse_time_re.match(t)
+    return time.mktime((int(matches.group(1)),
+                        int(matches.group(2)),
+                        int(matches.group(3)),
+                        int(matches.group(4)),
+                        int(matches.group(5)),
+                        int(matches.group(6)),
+                        0, 0, -1))
+  except ValueError:
+    sys.stderr.write('ERROR: bad time value: %s\n'% t)
+    sys.exit(1)
+
+def shortusage():
+  print(pydoc.synopsis(sys.argv[0]))
+  print("""
+For simple text summary:
+       find-fix.py [options] query-set-1.tsv YYYY-MM-DD YYYY-MM-DD
+
+For gnuplot presentation:
+       find-fix.py [options] query-set-1.tsv outfile
+""")
+
+def usage():
+  shortusage()
+  for x in long_opts:
+      padding_limit = 18
+      if x[0][-1:] == '=':
+          sys.stdout.write("   --%s " % x[0][:-1])
+          padding_limit = 19
+      else:
+          sys.stdout.write("   --%s " % x[0])
+      print("%s %s" % ((' ' * (padding_limit - len(x[0]))), x[1]))
+  print('''
+Option keywords may be abbreviated to any unique prefix.
+Most options require "=xxx" arguments.
+Option order is not important.''')
+
+if __name__ == '__main__':
+  main()