diff options
Diffstat (limited to 'tools/dev/iz/find-fix.py')
-rwxr-xr-x | tools/dev/iz/find-fix.py | 454 |
1 files changed, 454 insertions, 0 deletions
diff --git a/tools/dev/iz/find-fix.py b/tools/dev/iz/find-fix.py new file mode 100755 index 0000000..513ccad --- /dev/null +++ b/tools/dev/iz/find-fix.py @@ -0,0 +1,454 @@ +#!/usr/bin/env python +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# +# -*- Python -*- +"""find-fix.py: produce a find/fix report for Subversion's IZ database + +For simple text summary: + find-fix.py query-set-1.tsv YYYY-MM-DD YYYY-MM-DD +Statistics will be printed for bugs found or fixed within the +time frame. + +For gnuplot presentation: + find-fix.py query-set-1.tsv outfile +Gnuplot provides its own way to select date ranges. + +Either way, get a query-set-1.tsv from: + http://subversion.tigris.org/iz-data/query-set-1.tsv (updated nightly) +See http://subversion.tigris.org/iz-data/README for more info on that file. + +For more usage info on this script: + find-fix.py --help +""" + +_version = "$Revision:" + +# +# This can be run over the data file found at: +# http://subversion.tigris.org/iz-data/query-set-1.tsv +# + +import getopt +try: + my_getopt = getopt.gnu_getopt +except AttributeError: + my_getopt = getopt.getopt +import operator +import os +import os.path +import pydoc +import re +try: + # Python >=2.6 + from functools import reduce +except ImportError: + # Python <2.6 + pass +import sys +import time + +me = os.path.basename(sys.argv[0]) + +# Long options and their usage strings; "=" means it takes an argument. +# To get a list suitable for getopt, just do +# +# [x[0] for x in long_opts] +# +# Make sure to sacrifice a lamb to Guido for each element of the list. +long_opts = [ + ["milestones=", """Optional, milestones NOT to report on + (one or more of Beta, 1.0, Post-1.0, cvs2svn-1.0, cvs2svn-opt, + inapplicable)"""], + ["update", """Optional, update the statistics first."""], + ["doc", """Optional, print pydocs."""], + ["help", """Optional, print usage (this text)."""], + ["verbose", """Optional, print more progress messages."""], + ] + +help = 0 +verbose = 0 +update = 0 + +DATA_FILE = "http://subversion.tigris.org/iz-data/query-set-1.tsv" +ONE_WEEK = 7 * 24 * 60 * 60 + +_types = [] +_milestone_filter = [] + +noncore_milestone_filter = [ + 'Post-1.0', + '1.1', + 'cvs2svn-1.0', + 'cvs2svn-opt', + 'inapplicable', + 'no milestone', + ] + +one_point_oh_milestone_filter = noncore_milestone_filter + [] + +beta_milestone_filter = one_point_oh_milestone_filter + ['1.0'] + + +_types = [ + 'DEFECT', + 'TASK', + 'FEATURE', + 'ENHANCEMENT', + 'PATCH', + ] + + +def main(): + """Report bug find/fix rate statistics for Subversion.""" + + global verbose + global update + global _types + global _milestone_filter + global noncore_milestone_filter + + try: + opts, args = my_getopt(sys.argv[1:], "", [x[0] for x in long_opts]) + except getopt.GetoptError, e: + sys.stderr.write("Error: %s\n" % e.msg) + shortusage() + sys.stderr.write("%s --help for options.\n" % me) + sys.exit(1) + + for opt, arg in opts: + if opt == "--help": + usage() + sys.exit(0) + elif opt == "--verbose": + verbose = 1 + elif opt == "--milestones": + for mstone in arg.split(","): + if mstone == "noncore": + _milestone_filter = noncore_milestone_filter + elif mstone == "beta": + _milestone_filter = beta_milestone_filter + elif mstone == "one": + _milestone_filter = one_point_oh_milestone_filter + elif mstone[0] == '-': + if mstone[1:] in _milestone_filter: + spot = _milestone_filter.index(mstone[1:]) + _milestone_filter = _milestone_filter[:spot] \ + + _milestone_filter[(spot+1):] + else: + _milestone_filter += [mstone] + + elif opt == "--update": + update = 1 + elif opt == "--doc": + pydoc.doc(pydoc.importfile(sys.argv[0])) + sys.exit(0) + + if len(_milestone_filter) == 0: + _milestone_filter = noncore_milestone_filter + + if verbose: + sys.stderr.write("%s: Filtering out milestones %s.\n" + % (me, ", ".join(_milestone_filter))) + + if len(args) == 2: + if verbose: + sys.stderr.write("%s: Generating gnuplot data.\n" % me) + if update: + if verbose: + sys.stderr.write("%s: Updating %s from %s.\n" % (me, args[0], DATA_FILE)) + if os.system("curl " + DATA_FILE + "> " + args[0]): + os.system("wget " + DATA_FILE) + plot(args[0], args[1]) + + elif len(args) == 3: + if verbose: + sys.stderr.write("%s: Generating summary from %s to %s.\n" + % (me, args[1], args[2])) + if update: + if verbose: + sys.stderr.write("%s: Updating %s from %s.\n" % (me, args[0], DATA_FILE)) + if os.system("curl " + DATA_FILE + "> " + args[0]): + os.system("wget " + DATA_FILE) + + try: + t_start = parse_time(args[1] + " 00:00:00") + except ValueError: + sys.stderr.write('%s: ERROR: bad time value: %s\n' % (me, args[1])) + sys.exit(1) + + try: + t_end = parse_time(args[2] + " 00:00:00") + except ValueError: + sys.stderr.write('%s: ERROR: bad time value: %s\n' % (me, args[2])) + sys.exit(1) + + summary(args[0], t_start, t_end) + else: + usage() + + sys.exit(0) + + +def summary(datafile, d_start, d_end): + "Prints a summary of activity within a specified date range." + + data = load_data(datafile) + + # activity during the requested period + found, fixed, inval, dup, other = extract(data, 1, d_start, d_end) + + # activity from the beginning of time to the end of the request + # used to compute remaining + # XXX It would be faster to change extract to collect this in one + # pass. But we don't presently have enough data, nor use this + # enough, to justify that rework. + fromzerofound, fromzerofixed, fromzeroinval, fromzerodup, fromzeroother \ + = extract(data, 1, 0, d_end) + + alltypes_found = alltypes_fixed = alltypes_inval = alltypes_dup \ + = alltypes_other = alltypes_rem = 0 + for t in _types: + fromzerorem_t = fromzerofound[t]\ + - (fromzerofixed[t] + fromzeroinval[t] + fromzerodup[t] + + fromzeroother[t]) + print('%12s: found=%3d fixed=%3d inval=%3d dup=%3d ' \ + 'other=%3d remain=%3d' \ + % (t, found[t], fixed[t], inval[t], dup[t], other[t], fromzerorem_t)) + alltypes_found = alltypes_found + found[t] + alltypes_fixed = alltypes_fixed + fixed[t] + alltypes_inval = alltypes_inval + inval[t] + alltypes_dup = alltypes_dup + dup[t] + alltypes_other = alltypes_other + other[t] + alltypes_rem = alltypes_rem + fromzerorem_t + + print('-' * 77) + print('%12s: found=%3d fixed=%3d inval=%3d dup=%3d ' \ + 'other=%3d remain=%3d' \ + % ('totals', alltypes_found, alltypes_fixed, alltypes_inval, + alltypes_dup, alltypes_other, alltypes_rem)) + # print '%12s find/fix ratio: %g%%' \ + # % (" "*12, (alltypes_found*100.0/(alltypes_fixed + # + alltypes_inval + alltypes_dup + alltypes_other))) + + +def plot(datafile, outbase): + "Generates data files intended for use by gnuplot." + + global _types + + data = load_data(datafile) + + t_min = 1L<<32 + for issue in data: + if issue.created < t_min: + t_min = issue.created + + # break the time up into a tuple, then back up to Sunday + t_start = time.localtime(t_min) + t_start = time.mktime((t_start[0], t_start[1], t_start[2] - t_start[6] - 1, + 0, 0, 0, 0, 0, 0)) + + plots = { } + for t in _types: + # for each issue type, we will record per-week stats, compute a moving + # average of the find/fix delta, and track the number of open issues + plots[t] = [ [ ], MovingAverage(), 0 ] + + week = 0 + for date in range(t_start, time.time(), ONE_WEEK): + ### this is quite inefficient, as we could just sort by date, but + ### I'm being lazy + found, fixed = extract(data, None, date, date + ONE_WEEK - 1) + + for t in _types: + per_week, avg, open_issues = plots[t] + delta = found[t] - fixed[t] + per_week.append((week, date, + found[t], -fixed[t], avg.add(delta), open_issues)) + plots[t][2] = open_issues + delta + + week = week + 1 + + for t in _types: + week_data = plots[t][0] + write_file(week_data, outbase, t, 'found', 2) + write_file(week_data, outbase, t, 'fixed', 3) + write_file(week_data, outbase, t, 'avg', 4) + write_file(week_data, outbase, t, 'open', 5) + +def write_file(week_data, base, type, tag, idx): + f = open('%s.%s.%s' % (base, tag, type), 'w') + for info in week_data: + f.write('%s %s # %s\n' % (info[0], info[idx], time.ctime(info[1]))) + + +class MovingAverage: + "Helper class to compute moving averages." + def __init__(self, n=4): + self.n = n + self.data = [ 0 ] * n + def add(self, value): + self.data.pop(0) + self.data.append(float(value) / self.n) + return self.avg() + def avg(self): + return reduce(operator.add, self.data) + + +def extract(data, details, d_start, d_end): + """Extract found/fixed counts for each issue type within the data range. + + If DETAILS is false, then return two dictionaries: + + found, fixed + + ...each mapping issue types to the number of issues of that type + found or fixed respectively. + + If DETAILS is true, return five dictionaries: + + found, fixed, invalid, duplicate, other + + The first is still the found issues, but the other four break down + the resolution into 'FIXED', 'INVALID', 'DUPLICATE', and a grab-bag + category for 'WORKSFORME', 'LATER', 'REMIND', and 'WONTFIX'.""" + + global _types + global _milestone_filter + + found = { } + fixed = { } + invalid = { } + duplicate = { } + other = { } # "WORKSFORME", "LATER", "REMIND", and "WONTFIX" + + for t in _types: + found[t] = fixed[t] = invalid[t] = duplicate[t] = other[t] = 0 + + for issue in data: + # filter out disrespected milestones + if issue.milestone in _milestone_filter: + continue + + # record the found/fixed counts + if d_start <= issue.created <= d_end: + found[issue.type] = found[issue.type] + 1 + if d_start <= issue.resolved <= d_end: + if details: + if issue.resolution == "FIXED": + fixed[issue.type] = fixed[issue.type] + 1 + elif issue.resolution == "INVALID": + invalid[issue.type] = invalid[issue.type] + 1 + elif issue.resolution == "DUPLICATE": + duplicate[issue.type] = duplicate[issue.type] + 1 + else: + other[issue.type] = other[issue.type] + 1 + else: + fixed[issue.type] = fixed[issue.type] + 1 + + if details: + return found, fixed, invalid, duplicate, other + else: + return found, fixed + + +def load_data(datafile): + "Return a list of Issue objects for the specified data." + return list(map(Issue, open(datafile).readlines())) + + +class Issue: + "Represents a single issue from the exported IssueZilla data." + + def __init__(self, line): + row = line.strip().split('\t') + + self.id = int(row[0]) + self.type = row[1] + self.reporter = row[2] + if row[3] == 'NULL': + self.assigned = None + else: + self.assigned = row[3] + self.milestone = row[4] + self.created = parse_time(row[5]) + self.resolution = row[7] + if not self.resolution: + # If the resolution is empty, then force the resolved date to None. + # When an issue is reopened, there will still be activity showing + # a "RESOLVED", thus we get a resolved date. But we simply want to + # ignore that date. + self.resolved = None + else: + self.resolved = parse_time(row[6]) + self.summary = row[8] + + +parse_time_re = re.compile('([0-9]{4})-([0-9]{2})-([0-9]{2}) ' + '([0-9]{2}):([0-9]{2}):([0-9]{2})') + +def parse_time(t): + "Convert an exported MySQL timestamp into seconds since the epoch." + + global parse_time_re + + if t == 'NULL': + return None + try: + matches = parse_time_re.match(t) + return time.mktime((int(matches.group(1)), + int(matches.group(2)), + int(matches.group(3)), + int(matches.group(4)), + int(matches.group(5)), + int(matches.group(6)), + 0, 0, -1)) + except ValueError: + sys.stderr.write('ERROR: bad time value: %s\n'% t) + sys.exit(1) + +def shortusage(): + print(pydoc.synopsis(sys.argv[0])) + print(""" +For simple text summary: + find-fix.py [options] query-set-1.tsv YYYY-MM-DD YYYY-MM-DD + +For gnuplot presentation: + find-fix.py [options] query-set-1.tsv outfile +""") + +def usage(): + shortusage() + for x in long_opts: + padding_limit = 18 + if x[0][-1:] == '=': + sys.stdout.write(" --%s " % x[0][:-1]) + padding_limit = 19 + else: + sys.stdout.write(" --%s " % x[0]) + print("%s %s" % ((' ' * (padding_limit - len(x[0]))), x[1])) + print(''' +Option keywords may be abbreviated to any unique prefix. +Most options require "=xxx" arguments. +Option order is not important.''') + +if __name__ == '__main__': + main() |