diff options
Diffstat (limited to 'Tools/Scripts/webkitpy/tool/commands/analyzechangelog.py')
-rw-r--r-- | Tools/Scripts/webkitpy/tool/commands/analyzechangelog.py | 208 |
1 files changed, 208 insertions, 0 deletions
diff --git a/Tools/Scripts/webkitpy/tool/commands/analyzechangelog.py b/Tools/Scripts/webkitpy/tool/commands/analyzechangelog.py new file mode 100644 index 000000000..2fe34ade5 --- /dev/null +++ b/Tools/Scripts/webkitpy/tool/commands/analyzechangelog.py @@ -0,0 +1,208 @@ +# Copyright (c) 2011 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import with_statement + +import json +import re +import time + +from webkitpy.common.checkout.scm.detection import SCMDetector +from webkitpy.common.checkout.changelog import ChangeLog +from webkitpy.common.config.contributionareas import ContributionAreas +from webkitpy.common.system.filesystem import FileSystem +from webkitpy.common.system.executive import Executive +from webkitpy.tool.multicommandtool import AbstractDeclarativeCommand +from webkitpy.tool import steps + + +class AnalyzeChangeLog(AbstractDeclarativeCommand): + name = "analyze-changelog" + help_text = "Experimental command for analyzing change logs." + long_help = "This command parses changelogs in a specified directory and summarizes the result as JSON files." + + def __init__(self): + options = [ + steps.Options.changelog_count, + ] + AbstractDeclarativeCommand.__init__(self, options=options) + + @staticmethod + def _enumerate_changelogs(filesystem, dirname, changelog_count): + changelogs = [filesystem.join(dirname, filename) for filename in filesystem.listdir(dirname) if re.match('^ChangeLog(-(\d{4}-\d{2}-\d{2}))?$', filename)] + # Make sure ChangeLog shows up before ChangeLog-2011-01-01 + changelogs = sorted(changelogs, key=lambda filename: filename + 'X', reverse=True) + return changelogs[:changelog_count] + + @staticmethod + def _generate_jsons(filesystem, jsons, output_dir): + for filename in jsons: + print ' Generating', filename + filesystem.write_text_file(filesystem.join(output_dir, filename), json.dumps(jsons[filename], indent=2)) + + def execute(self, options, args, tool): + filesystem = self._tool.filesystem + if len(args) < 1 or not filesystem.exists(args[0]): + print "Need the directory name to look for changelog as the first argument" + return + changelog_dir = filesystem.abspath(args[0]) + + if len(args) < 2 or not filesystem.exists(args[1]): + print "Need the output directory name as the second argument" + return + output_dir = args[1] + + startTime = time.time() + + print 'Enumerating ChangeLog files...' + changelogs = AnalyzeChangeLog._enumerate_changelogs(filesystem, changelog_dir, options.changelog_count) + + analyzer = ChangeLogAnalyzer(tool, changelogs) + analyzer.analyze() + + print 'Generating json files...' + json_files = { + 'summary.json': analyzer.summary(), + 'contributors.json': analyzer.contributors_statistics(), + 'areas.json': analyzer.areas_statistics(), + } + AnalyzeChangeLog._generate_jsons(filesystem, json_files, output_dir) + commands_dir = filesystem.dirname(filesystem.path_to_module(self.__module__)) + print commands_dir + filesystem.copyfile(filesystem.join(commands_dir, 'data/summary.html'), filesystem.join(output_dir, 'summary.html')) + + tick = time.time() - startTime + print 'Finished in %02dm:%02ds' % (int(tick / 60), int(tick % 60)) + + +class ChangeLogAnalyzer(object): + def __init__(self, host, changelog_paths): + self._changelog_paths = changelog_paths + self._filesystem = host.filesystem + self._contribution_areas = ContributionAreas(host.filesystem) + self._scm = host.scm() + self._parsed_revisions = {} + + self._contributors_statistics = {} + self._areas_statistics = dict([(area, {'reviewed': 0, 'unreviewed': 0, 'contributors': {}}) for area in self._contribution_areas.names()]) + self._summary = {'reviewed': 0, 'unreviewed': 0} + + self._longest_filename = max([len(path) - len(self._scm.checkout_root) for path in changelog_paths]) + self._filename = '' + self._length_of_previous_output = 0 + + def contributors_statistics(self): + return self._contributors_statistics + + def areas_statistics(self): + return self._areas_statistics + + def summary(self): + return self._summary + + def _print_status(self, status): + if self._length_of_previous_output: + print "\r" + " " * self._length_of_previous_output, + new_output = ('%' + str(self._longest_filename) + 's: %s') % (self._filename, status) + print "\r" + new_output, + self._length_of_previous_output = len(new_output) + + def _set_filename(self, filename): + if self._filename: + print + self._filename = filename + + def analyze(self): + for path in self._changelog_paths: + self._set_filename(self._filesystem.relpath(path, self._scm.checkout_root)) + with self._filesystem.open_text_file_for_reading(path) as changelog: + self._print_status('Parsing entries...') + number_of_parsed_entries = self._analyze_entries(ChangeLog.parse_entries_from_file(changelog), path) + self._print_status('Done (%d entries)' % number_of_parsed_entries) + print + self._summary['contributors'] = len(self._contributors_statistics) + self._summary['contributors_with_reviews'] = sum([1 for contributor in self._contributors_statistics.values() if contributor['reviews']['total']]) + self._summary['contributors_without_reviews'] = self._summary['contributors'] - self._summary['contributors_with_reviews'] + + def _collect_statistics_for_contributor_area(self, area, contributor, contribution_type, reviewed): + area_contributors = self._areas_statistics[area]['contributors'] + if contributor not in area_contributors: + area_contributors[contributor] = {'reviews': 0, 'reviewed': 0, 'unreviewed': 0} + if contribution_type == 'patches': + contribution_type = 'reviewed' if reviewed else 'unreviewed' + area_contributors[contributor][contribution_type] += 1 + + def _collect_statistics_for_contributor(self, contributor, contribution_type, areas, touched_files, reviewed): + if contributor not in self._contributors_statistics: + self._contributors_statistics[contributor] = { + 'reviews': {'total': 0, 'areas': {}, 'files': {}}, + 'patches': {'reviewed': 0, 'unreviewed': 0, 'areas': {}, 'files': {}}} + statistics = self._contributors_statistics[contributor][contribution_type] + + if contribution_type == 'reviews': + statistics['total'] += 1 + elif reviewed: + statistics['reviewed'] += 1 + else: + statistics['unreviewed'] += 1 + + for area in areas: + self._increment_dictionary_value(statistics['areas'], area) + self._collect_statistics_for_contributor_area(area, contributor, contribution_type, reviewed) + for touchedfile in touched_files: + self._increment_dictionary_value(statistics['files'], touchedfile) + + def _increment_dictionary_value(self, dictionary, key): + dictionary[key] = dictionary.get(key, 0) + 1 + + def _analyze_entries(self, entries, changelog_path): + dirname = self._filesystem.dirname(changelog_path) + for i, entry in enumerate(entries): + self._print_status('(%s) entries' % i) + assert(entry.authors()) + + touchedfiles_for_entry = [self._filesystem.relpath(self._filesystem.join(dirname, name), self._scm.checkout_root) for name in entry.touched_files()] + areas_for_entry = self._contribution_areas.areas_for_touched_files(touchedfiles_for_entry) + authors_for_entry = entry.authors() + reviewers_for_entry = entry.reviewers() + + for reviewer in reviewers_for_entry: + self._collect_statistics_for_contributor(reviewer.full_name, 'reviews', areas_for_entry, touchedfiles_for_entry, reviewed=True) + + for author in authors_for_entry: + self._collect_statistics_for_contributor(author['name'], 'patches', areas_for_entry, touchedfiles_for_entry, + reviewed=bool(reviewers_for_entry)) + + for area in areas_for_entry: + self._areas_statistics[area]['reviewed' if reviewers_for_entry else 'unreviewed'] += 1 + + self._summary['reviewed' if reviewers_for_entry else 'unreviewed'] += 1 + + i += 1 + self._print_status('(%s) entries' % i) + return i |