summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoshua Harlow <harlowja@yahoo-inc.com>2015-11-17 13:31:14 -0800
committerJoshua Harlow <harlowja@yahoo-inc.com>2015-11-17 16:00:12 -0800
commiteff2625cbba5ea2c5b4463890fe660b8dfedc497 (patch)
tree5b9c6129dd9538fd88e2194fc8995d0dece22658
parent305c2b0d52d387c0039e3b3021c4de27e85cd175 (diff)
downloadoslo-incubator-eff2625cbba5ea2c5b4463890fe660b8dfedc497.tar.gz
Add new core analyzer script that can be called from 'analyze-oslo.sh' script
Change-Id: I7aefdb21abe299b70eb3bd41ed2889b30318848d
-rwxr-xr-xtools/analyze-oslo.sh9
-rw-r--r--tools/new_core_analyzer.py165
2 files changed, 168 insertions, 6 deletions
diff --git a/tools/analyze-oslo.sh b/tools/analyze-oslo.sh
index 7f5aabd6..cd35bd12 100755
--- a/tools/analyze-oslo.sh
+++ b/tools/analyze-oslo.sh
@@ -77,17 +77,14 @@ projects=$(ssh review.openstack.org -p 29418 gerrit ls-projects | grep -v 'attic
projects="$projects openstack/taskflow openstack/tooz openstack/cliff openstack/debtcollector"
projects="$projects openstack/futurist openstack/stevedore openstack-dev/cookiecutter"
projects="$projects openstack/automaton"
-mkdir -p "oslo_reports"
for repo in $projects; do
get_one_repo "$repo" "$base/$repo"
RC=$?
if [ $RC -ne 0 ] ; then
echo "Unable to obtain $repo"
- else
- echo
- echo "Producing inspector report for $repo"
- report_base=`basename $repo`
- (cd $repo && gitinspector -F htmlembedded -r -T > "${current_dir}/oslo_reports/${report_base}.html")
+ exit 1
fi
done
+
+python new_core_analyzer.py $projects > "${current_dir}/oslo_reports.txt"
diff --git a/tools/new_core_analyzer.py b/tools/new_core_analyzer.py
new file mode 100644
index 00000000..d8ee4621
--- /dev/null
+++ b/tools/new_core_analyzer.py
@@ -0,0 +1,165 @@
+import collections
+import contextlib
+import datetime
+import os
+import sys
+
+import tabulate
+
+from gitinspector.changes import Changes
+from gitinspector.metrics import MetricsLogic
+
+Repository = collections.namedtuple('Repository', 'name,location')
+
+CORE_SKIPS = frozenset([
+ u'Julien Danjou',
+ u'Davanum Srinivas',
+ u'Ben Nemec',
+ u'Joshua Harlow',
+ u'Brant Knudson',
+ u'Doug Hellmann',
+ u'Victor Stinner',
+ u'Michael Still',
+ u'Flavio Percoco',
+ u'Mehdi Abaakouk',
+ u'Robert Collins',
+])
+EMAIL_SKIPS = frozenset([
+ 'openstack-infra@lists.openstack.org',
+ 'flaper87@gmail.com',
+ 'fpercoco@redhat.com',
+])
+OLDEST_COMMIT_YEAR = 2014
+
+
+@contextlib.contextmanager
+def auto_cwd(target_dir):
+ old_dir = os.getcwd()
+ if old_dir == target_dir:
+ yield
+ else:
+ os.chdir(target_dir)
+ try:
+ yield
+ finally:
+ os.chdir(old_dir)
+
+
+def new_core_compare(c1, c2):
+ # Sort by insertions, deletions...
+ c1_info = (c1[3], c1[4], c1[5])
+ c2_info = (c2[3], c2[4], c2[5])
+ if c1_info == c2_info:
+ return 0
+ if c1_info < c2_info:
+ return -1
+ else:
+ return 1
+
+
+def should_discard(change_date, author_name, author_email, author_info):
+ if author_name in CORE_SKIPS:
+ return True
+ if author_email in EMAIL_SKIPS:
+ return True
+ if change_date is not None:
+ if change_date.year < OLDEST_COMMIT_YEAR:
+ return True
+ return False
+
+
+def dump_changes(repo):
+ with auto_cwd(repo.location):
+ print("Analyzing repo %s (%s):" % (repo.name, repo.location))
+ print("Please wait...")
+ Changes.authors.clear()
+ Changes.authors_dateinfo.clear()
+ Changes.authors_by_email.clear()
+ Changes.emails_by_author.clear()
+
+ changes = Changes(repo)
+ # This is needed to flush out changes progress message...
+ sys.stdout.write("\n")
+ # Force population of this info...
+ changes_per_author = changes.get_authordateinfo_list()
+ just_authors = changes.get_authorinfo_list()
+ better_changes_per_author = {}
+ maybe_new_cores = {}
+ for c in changes.get_commits():
+ change_date = c.timestamp
+ author_name = c.author
+ author_email = c.email
+ change_date = datetime.datetime.fromtimestamp(int(change_date))
+ try:
+ author_info = changes.authors[author_name]
+ better_changes_per_author[(change_date, author_name)] = author_info
+ except KeyError:
+ pass
+ for (change_date, author_name) in better_changes_per_author.keys():
+ author_email = changes.get_latest_email_by_author(author_name)
+ author_info = better_changes_per_author[(change_date, author_name)]
+ author_info.email = author_email
+ if not should_discard(change_date, author_name, author_email, author_info):
+ if author_name in maybe_new_cores:
+ existing_info = maybe_new_cores[author_name]
+ if existing_info[2] < change_date:
+ existing_info[2] = change_date
+ else:
+ maybe_core = [
+ author_name.encode("ascii", errors='replace'),
+ author_email,
+ change_date,
+ author_info.insertions,
+ author_info.deletions,
+ author_info.commits,
+ ]
+ maybe_new_cores[author_name] = maybe_core
+ if maybe_new_cores:
+ print("%s potential new cores found!!" % len(maybe_new_cores))
+ tmp_maybe_new_cores = sorted(list(maybe_new_cores.values()),
+ cmp=new_core_compare, reverse=True)
+ headers = ['Name', 'Email', 'Last change made', 'Insertions', 'Deletions', 'Commits']
+ print(tabulate.tabulate(tmp_maybe_new_cores, headers=headers,
+ tablefmt="grid"))
+ else:
+ print("No new cores found!!")
+ return changes.authors.copy()
+
+
+def main(repos):
+ raw_repos = [os.path.abspath(p) for p in repos]
+ parsed_repos = []
+ for repo in raw_repos:
+ parsed_repos.append(Repository(os.path.basename(repo), repo))
+ all_authors = []
+ for repo in parsed_repos:
+ all_authors.append(dump_changes(repo))
+ if all_authors:
+ print("Combined changes of %s repos:" % len(parsed_repos))
+ maybe_new_cores = {}
+ for repo_authors in all_authors:
+ for author_name, author_info in repo_authors.items():
+ change_date = datetime.datetime.now()
+ if not should_discard(None, author_name, author_info.email, author_info):
+ if author_name in maybe_new_cores:
+ prior_author_info = maybe_new_cores[author_name]
+ prior_author_info[3] = prior_author_info[3] + author_info.insertions
+ prior_author_info[4] = prior_author_info[4] + author_info.deletions
+ prior_author_info[5] = prior_author_info[5] + author_info.commits
+ else:
+ maybe_new_cores[author_name] = [
+ author_name.encode("ascii", errors='replace'),
+ author_info.email,
+ u"N/A",
+ author_info.insertions,
+ author_info.deletions,
+ author_info.commits,
+ ]
+ tmp_maybe_new_cores = sorted(list(maybe_new_cores.values()),
+ cmp=new_core_compare, reverse=True)
+ headers = ['Name', 'Email', 'Last change made', 'Insertions', 'Deletions', 'Commits']
+ print(tabulate.tabulate(tmp_maybe_new_cores, headers=headers,
+ tablefmt="grid"))
+
+if __name__ == '__main__':
+ main(sys.argv[1:])