summaryrefslogtreecommitdiff
path: root/tools/dev/iz/find-fix.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/dev/iz/find-fix.py')
-rwxr-xr-xtools/dev/iz/find-fix.py454
1 files changed, 454 insertions, 0 deletions
diff --git a/tools/dev/iz/find-fix.py b/tools/dev/iz/find-fix.py
new file mode 100755
index 0000000..513ccad
--- /dev/null
+++ b/tools/dev/iz/find-fix.py
@@ -0,0 +1,454 @@
+#!/usr/bin/env python
+#
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
+# -*- Python -*-
+"""find-fix.py: produce a find/fix report for Subversion's IZ database
+
+For simple text summary:
+ find-fix.py query-set-1.tsv YYYY-MM-DD YYYY-MM-DD
+Statistics will be printed for bugs found or fixed within the
+time frame.
+
+For gnuplot presentation:
+ find-fix.py query-set-1.tsv outfile
+Gnuplot provides its own way to select date ranges.
+
+Either way, get a query-set-1.tsv from:
+ http://subversion.tigris.org/iz-data/query-set-1.tsv (updated nightly)
+See http://subversion.tigris.org/iz-data/README for more info on that file.
+
+For more usage info on this script:
+ find-fix.py --help
+"""
+
+_version = "$Revision:"
+
+#
+# This can be run over the data file found at:
+# http://subversion.tigris.org/iz-data/query-set-1.tsv
+#
+
+import getopt
+try:
+ my_getopt = getopt.gnu_getopt
+except AttributeError:
+ my_getopt = getopt.getopt
+import operator
+import os
+import os.path
+import pydoc
+import re
+try:
+ # Python >=2.6
+ from functools import reduce
+except ImportError:
+ # Python <2.6
+ pass
+import sys
+import time
+
+me = os.path.basename(sys.argv[0])
+
+# Long options and their usage strings; "=" means it takes an argument.
+# To get a list suitable for getopt, just do
+#
+# [x[0] for x in long_opts]
+#
+# Make sure to sacrifice a lamb to Guido for each element of the list.
+long_opts = [
+ ["milestones=", """Optional, milestones NOT to report on
+ (one or more of Beta, 1.0, Post-1.0, cvs2svn-1.0, cvs2svn-opt,
+ inapplicable)"""],
+ ["update", """Optional, update the statistics first."""],
+ ["doc", """Optional, print pydocs."""],
+ ["help", """Optional, print usage (this text)."""],
+ ["verbose", """Optional, print more progress messages."""],
+ ]
+
+help = 0
+verbose = 0
+update = 0
+
+DATA_FILE = "http://subversion.tigris.org/iz-data/query-set-1.tsv"
+ONE_WEEK = 7 * 24 * 60 * 60
+
+_types = []
+_milestone_filter = []
+
+noncore_milestone_filter = [
+ 'Post-1.0',
+ '1.1',
+ 'cvs2svn-1.0',
+ 'cvs2svn-opt',
+ 'inapplicable',
+ 'no milestone',
+ ]
+
+one_point_oh_milestone_filter = noncore_milestone_filter + []
+
+beta_milestone_filter = one_point_oh_milestone_filter + ['1.0']
+
+
+_types = [
+ 'DEFECT',
+ 'TASK',
+ 'FEATURE',
+ 'ENHANCEMENT',
+ 'PATCH',
+ ]
+
+
+def main():
+ """Report bug find/fix rate statistics for Subversion."""
+
+ global verbose
+ global update
+ global _types
+ global _milestone_filter
+ global noncore_milestone_filter
+
+ try:
+ opts, args = my_getopt(sys.argv[1:], "", [x[0] for x in long_opts])
+ except getopt.GetoptError, e:
+ sys.stderr.write("Error: %s\n" % e.msg)
+ shortusage()
+ sys.stderr.write("%s --help for options.\n" % me)
+ sys.exit(1)
+
+ for opt, arg in opts:
+ if opt == "--help":
+ usage()
+ sys.exit(0)
+ elif opt == "--verbose":
+ verbose = 1
+ elif opt == "--milestones":
+ for mstone in arg.split(","):
+ if mstone == "noncore":
+ _milestone_filter = noncore_milestone_filter
+ elif mstone == "beta":
+ _milestone_filter = beta_milestone_filter
+ elif mstone == "one":
+ _milestone_filter = one_point_oh_milestone_filter
+ elif mstone[0] == '-':
+ if mstone[1:] in _milestone_filter:
+ spot = _milestone_filter.index(mstone[1:])
+ _milestone_filter = _milestone_filter[:spot] \
+ + _milestone_filter[(spot+1):]
+ else:
+ _milestone_filter += [mstone]
+
+ elif opt == "--update":
+ update = 1
+ elif opt == "--doc":
+ pydoc.doc(pydoc.importfile(sys.argv[0]))
+ sys.exit(0)
+
+ if len(_milestone_filter) == 0:
+ _milestone_filter = noncore_milestone_filter
+
+ if verbose:
+ sys.stderr.write("%s: Filtering out milestones %s.\n"
+ % (me, ", ".join(_milestone_filter)))
+
+ if len(args) == 2:
+ if verbose:
+ sys.stderr.write("%s: Generating gnuplot data.\n" % me)
+ if update:
+ if verbose:
+ sys.stderr.write("%s: Updating %s from %s.\n" % (me, args[0], DATA_FILE))
+ if os.system("curl " + DATA_FILE + "> " + args[0]):
+ os.system("wget " + DATA_FILE)
+ plot(args[0], args[1])
+
+ elif len(args) == 3:
+ if verbose:
+ sys.stderr.write("%s: Generating summary from %s to %s.\n"
+ % (me, args[1], args[2]))
+ if update:
+ if verbose:
+ sys.stderr.write("%s: Updating %s from %s.\n" % (me, args[0], DATA_FILE))
+ if os.system("curl " + DATA_FILE + "> " + args[0]):
+ os.system("wget " + DATA_FILE)
+
+ try:
+ t_start = parse_time(args[1] + " 00:00:00")
+ except ValueError:
+ sys.stderr.write('%s: ERROR: bad time value: %s\n' % (me, args[1]))
+ sys.exit(1)
+
+ try:
+ t_end = parse_time(args[2] + " 00:00:00")
+ except ValueError:
+ sys.stderr.write('%s: ERROR: bad time value: %s\n' % (me, args[2]))
+ sys.exit(1)
+
+ summary(args[0], t_start, t_end)
+ else:
+ usage()
+
+ sys.exit(0)
+
+
+def summary(datafile, d_start, d_end):
+ "Prints a summary of activity within a specified date range."
+
+ data = load_data(datafile)
+
+ # activity during the requested period
+ found, fixed, inval, dup, other = extract(data, 1, d_start, d_end)
+
+ # activity from the beginning of time to the end of the request
+ # used to compute remaining
+ # XXX It would be faster to change extract to collect this in one
+ # pass. But we don't presently have enough data, nor use this
+ # enough, to justify that rework.
+ fromzerofound, fromzerofixed, fromzeroinval, fromzerodup, fromzeroother \
+ = extract(data, 1, 0, d_end)
+
+ alltypes_found = alltypes_fixed = alltypes_inval = alltypes_dup \
+ = alltypes_other = alltypes_rem = 0
+ for t in _types:
+ fromzerorem_t = fromzerofound[t]\
+ - (fromzerofixed[t] + fromzeroinval[t] + fromzerodup[t]
+ + fromzeroother[t])
+ print('%12s: found=%3d fixed=%3d inval=%3d dup=%3d ' \
+ 'other=%3d remain=%3d' \
+ % (t, found[t], fixed[t], inval[t], dup[t], other[t], fromzerorem_t))
+ alltypes_found = alltypes_found + found[t]
+ alltypes_fixed = alltypes_fixed + fixed[t]
+ alltypes_inval = alltypes_inval + inval[t]
+ alltypes_dup = alltypes_dup + dup[t]
+ alltypes_other = alltypes_other + other[t]
+ alltypes_rem = alltypes_rem + fromzerorem_t
+
+ print('-' * 77)
+ print('%12s: found=%3d fixed=%3d inval=%3d dup=%3d ' \
+ 'other=%3d remain=%3d' \
+ % ('totals', alltypes_found, alltypes_fixed, alltypes_inval,
+ alltypes_dup, alltypes_other, alltypes_rem))
+ # print '%12s find/fix ratio: %g%%' \
+ # % (" "*12, (alltypes_found*100.0/(alltypes_fixed
+ # + alltypes_inval + alltypes_dup + alltypes_other)))
+
+
+def plot(datafile, outbase):
+ "Generates data files intended for use by gnuplot."
+
+ global _types
+
+ data = load_data(datafile)
+
+ t_min = 1L<<32
+ for issue in data:
+ if issue.created < t_min:
+ t_min = issue.created
+
+ # break the time up into a tuple, then back up to Sunday
+ t_start = time.localtime(t_min)
+ t_start = time.mktime((t_start[0], t_start[1], t_start[2] - t_start[6] - 1,
+ 0, 0, 0, 0, 0, 0))
+
+ plots = { }
+ for t in _types:
+ # for each issue type, we will record per-week stats, compute a moving
+ # average of the find/fix delta, and track the number of open issues
+ plots[t] = [ [ ], MovingAverage(), 0 ]
+
+ week = 0
+ for date in range(t_start, time.time(), ONE_WEEK):
+ ### this is quite inefficient, as we could just sort by date, but
+ ### I'm being lazy
+ found, fixed = extract(data, None, date, date + ONE_WEEK - 1)
+
+ for t in _types:
+ per_week, avg, open_issues = plots[t]
+ delta = found[t] - fixed[t]
+ per_week.append((week, date,
+ found[t], -fixed[t], avg.add(delta), open_issues))
+ plots[t][2] = open_issues + delta
+
+ week = week + 1
+
+ for t in _types:
+ week_data = plots[t][0]
+ write_file(week_data, outbase, t, 'found', 2)
+ write_file(week_data, outbase, t, 'fixed', 3)
+ write_file(week_data, outbase, t, 'avg', 4)
+ write_file(week_data, outbase, t, 'open', 5)
+
+def write_file(week_data, base, type, tag, idx):
+ f = open('%s.%s.%s' % (base, tag, type), 'w')
+ for info in week_data:
+ f.write('%s %s # %s\n' % (info[0], info[idx], time.ctime(info[1])))
+
+
+class MovingAverage:
+ "Helper class to compute moving averages."
+ def __init__(self, n=4):
+ self.n = n
+ self.data = [ 0 ] * n
+ def add(self, value):
+ self.data.pop(0)
+ self.data.append(float(value) / self.n)
+ return self.avg()
+ def avg(self):
+ return reduce(operator.add, self.data)
+
+
+def extract(data, details, d_start, d_end):
+ """Extract found/fixed counts for each issue type within the data range.
+
+ If DETAILS is false, then return two dictionaries:
+
+ found, fixed
+
+ ...each mapping issue types to the number of issues of that type
+ found or fixed respectively.
+
+ If DETAILS is true, return five dictionaries:
+
+ found, fixed, invalid, duplicate, other
+
+ The first is still the found issues, but the other four break down
+ the resolution into 'FIXED', 'INVALID', 'DUPLICATE', and a grab-bag
+ category for 'WORKSFORME', 'LATER', 'REMIND', and 'WONTFIX'."""
+
+ global _types
+ global _milestone_filter
+
+ found = { }
+ fixed = { }
+ invalid = { }
+ duplicate = { }
+ other = { } # "WORKSFORME", "LATER", "REMIND", and "WONTFIX"
+
+ for t in _types:
+ found[t] = fixed[t] = invalid[t] = duplicate[t] = other[t] = 0
+
+ for issue in data:
+ # filter out disrespected milestones
+ if issue.milestone in _milestone_filter:
+ continue
+
+ # record the found/fixed counts
+ if d_start <= issue.created <= d_end:
+ found[issue.type] = found[issue.type] + 1
+ if d_start <= issue.resolved <= d_end:
+ if details:
+ if issue.resolution == "FIXED":
+ fixed[issue.type] = fixed[issue.type] + 1
+ elif issue.resolution == "INVALID":
+ invalid[issue.type] = invalid[issue.type] + 1
+ elif issue.resolution == "DUPLICATE":
+ duplicate[issue.type] = duplicate[issue.type] + 1
+ else:
+ other[issue.type] = other[issue.type] + 1
+ else:
+ fixed[issue.type] = fixed[issue.type] + 1
+
+ if details:
+ return found, fixed, invalid, duplicate, other
+ else:
+ return found, fixed
+
+
+def load_data(datafile):
+ "Return a list of Issue objects for the specified data."
+ return list(map(Issue, open(datafile).readlines()))
+
+
+class Issue:
+ "Represents a single issue from the exported IssueZilla data."
+
+ def __init__(self, line):
+ row = line.strip().split('\t')
+
+ self.id = int(row[0])
+ self.type = row[1]
+ self.reporter = row[2]
+ if row[3] == 'NULL':
+ self.assigned = None
+ else:
+ self.assigned = row[3]
+ self.milestone = row[4]
+ self.created = parse_time(row[5])
+ self.resolution = row[7]
+ if not self.resolution:
+ # If the resolution is empty, then force the resolved date to None.
+ # When an issue is reopened, there will still be activity showing
+ # a "RESOLVED", thus we get a resolved date. But we simply want to
+ # ignore that date.
+ self.resolved = None
+ else:
+ self.resolved = parse_time(row[6])
+ self.summary = row[8]
+
+
+parse_time_re = re.compile('([0-9]{4})-([0-9]{2})-([0-9]{2}) '
+ '([0-9]{2}):([0-9]{2}):([0-9]{2})')
+
+def parse_time(t):
+ "Convert an exported MySQL timestamp into seconds since the epoch."
+
+ global parse_time_re
+
+ if t == 'NULL':
+ return None
+ try:
+ matches = parse_time_re.match(t)
+ return time.mktime((int(matches.group(1)),
+ int(matches.group(2)),
+ int(matches.group(3)),
+ int(matches.group(4)),
+ int(matches.group(5)),
+ int(matches.group(6)),
+ 0, 0, -1))
+ except ValueError:
+ sys.stderr.write('ERROR: bad time value: %s\n'% t)
+ sys.exit(1)
+
+def shortusage():
+ print(pydoc.synopsis(sys.argv[0]))
+ print("""
+For simple text summary:
+ find-fix.py [options] query-set-1.tsv YYYY-MM-DD YYYY-MM-DD
+
+For gnuplot presentation:
+ find-fix.py [options] query-set-1.tsv outfile
+""")
+
+def usage():
+ shortusage()
+ for x in long_opts:
+ padding_limit = 18
+ if x[0][-1:] == '=':
+ sys.stdout.write(" --%s " % x[0][:-1])
+ padding_limit = 19
+ else:
+ sys.stdout.write(" --%s " % x[0])
+ print("%s %s" % ((' ' * (padding_limit - len(x[0]))), x[1]))
+ print('''
+Option keywords may be abbreviated to any unique prefix.
+Most options require "=xxx" arguments.
+Option order is not important.''')
+
+if __name__ == '__main__':
+ main()