1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
|
# Copyright (c) 2011, Apple Inc. All rights reserved.
# Copyright (c) 2009, 2011, 2012 Google Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# WebKit's Python module for committer and reviewer validation.
import fnmatch
import json
from webkitpy.common.editdistance import edit_distance
from webkitpy.common.memoized import memoized
from webkitpy.common.system.filesystem import FileSystem
# The list of contributors have been moved to contributors.json
class Contributor(object):
def __init__(self, name, email_or_emails, irc_nickname_or_nicknames=None):
assert(name)
assert(email_or_emails)
self.full_name = name
if isinstance(email_or_emails, str):
self.emails = [email_or_emails]
else:
self.emails = email_or_emails
self.emails = map(lambda email: email.lower(), self.emails) # Emails are case-insensitive.
if isinstance(irc_nickname_or_nicknames, str):
self.irc_nicknames = [irc_nickname_or_nicknames]
else:
self.irc_nicknames = irc_nickname_or_nicknames
self.can_commit = False
self.can_review = False
def bugzilla_email(self):
# FIXME: We're assuming the first email is a valid bugzilla email,
# which might not be right.
return self.emails[0]
def __str__(self):
return unicode(self).encode('utf-8')
def __unicode__(self):
return '"%s" <%s>' % (self.full_name, self.emails[0])
def contains_string(self, search_string):
string = search_string.lower()
if string in self.full_name.lower():
return True
if self.irc_nicknames:
for nickname in self.irc_nicknames:
if string in nickname.lower():
return True
for email in self.emails:
if string in email:
return True
return False
def matches_glob(self, glob_string):
if fnmatch.fnmatch(self.full_name, glob_string):
return True
if self.irc_nicknames:
for nickname in self.irc_nicknames:
if fnmatch.fnmatch(nickname, glob_string):
return True
for email in self.emails:
if fnmatch.fnmatch(email, glob_string):
return True
return False
class Committer(Contributor):
def __init__(self, name, email_or_emails, irc_nickname=None):
Contributor.__init__(self, name, email_or_emails, irc_nickname)
self.can_commit = True
class Reviewer(Committer):
def __init__(self, name, email_or_emails, irc_nickname=None):
Committer.__init__(self, name, email_or_emails, irc_nickname)
self.can_review = True
class CommitterList(object):
# Committers and reviewers are passed in to allow easy testing
def __init__(self,
committers=[],
reviewers=[],
contributors=[]):
# FIXME: These arguments only exist for testing. Clean it up.
if not (committers or reviewers or contributors):
loaded_data = self.load_json()
contributors = loaded_data['Contributors']
committers = loaded_data['Committers']
reviewers = loaded_data['Reviewers']
self._contributors = contributors + committers + reviewers
self._committers = committers + reviewers
self._reviewers = reviewers
self._contributors_by_name = {}
self._accounts_by_email = {}
self._accounts_by_login = {}
@staticmethod
@memoized
def load_json():
filesystem = FileSystem()
json_path = filesystem.join(filesystem.dirname(filesystem.path_to_module('webkitpy.common.config')), 'contributors.json')
contributors = json.loads(filesystem.read_text_file(json_path))
return {
'Contributors': [Contributor(name, data.get('emails'), data.get('nicks')) for name, data in contributors['Contributors'].iteritems()],
'Committers': [Committer(name, data.get('emails'), data.get('nicks')) for name, data in contributors['Committers'].iteritems()],
'Reviewers': [Reviewer(name, data.get('emails'), data.get('nicks')) for name, data in contributors['Reviewers'].iteritems()],
}
def contributors(self):
return self._contributors
def committers(self):
return self._committers
def reviewers(self):
return self._reviewers
def _name_to_contributor_map(self):
if not len(self._contributors_by_name):
for contributor in self._contributors:
assert(contributor.full_name)
assert(contributor.full_name.lower() not in self._contributors_by_name) # We should never have duplicate names.
self._contributors_by_name[contributor.full_name.lower()] = contributor
return self._contributors_by_name
def _email_to_account_map(self):
if not len(self._accounts_by_email):
for account in self._contributors:
for email in account.emails:
assert(email not in self._accounts_by_email) # We should never have duplicate emails.
self._accounts_by_email[email] = account
return self._accounts_by_email
def _login_to_account_map(self):
if not len(self._accounts_by_login):
for account in self._contributors:
if account.emails:
login = account.bugzilla_email()
assert(login not in self._accounts_by_login) # We should never have duplicate emails.
self._accounts_by_login[login] = account
return self._accounts_by_login
def _committer_only(self, record):
if record and not record.can_commit:
return None
return record
def _reviewer_only(self, record):
if record and not record.can_review:
return None
return record
def committer_by_name(self, name):
return self._committer_only(self.contributor_by_name(name))
def contributor_by_irc_nickname(self, irc_nickname):
for contributor in self.contributors():
# FIXME: This should do case-insensitive comparison or assert that all IRC nicknames are in lowercase
if contributor.irc_nicknames and irc_nickname in contributor.irc_nicknames:
return contributor
return None
def contributors_by_search_string(self, string):
glob_matches = filter(lambda contributor: contributor.matches_glob(string), self.contributors())
return glob_matches or filter(lambda contributor: contributor.contains_string(string), self.contributors())
def contributors_by_email_username(self, string):
string = string + '@'
result = []
for contributor in self.contributors():
for email in contributor.emails:
if email.startswith(string):
result.append(contributor)
break
return result
def _contributor_name_shorthands(self, contributor):
if ' ' not in contributor.full_name:
return []
split_fullname = contributor.full_name.split()
first_name = split_fullname[0]
last_name = split_fullname[-1]
return first_name, last_name, first_name + last_name[0], first_name + ' ' + last_name[0]
def _tokenize_contributor_name(self, contributor):
full_name_in_lowercase = contributor.full_name.lower()
tokens = [full_name_in_lowercase] + full_name_in_lowercase.split()
if contributor.irc_nicknames:
return tokens + [nickname.lower() for nickname in contributor.irc_nicknames if len(nickname) > 5]
return tokens
def contributors_by_fuzzy_match(self, string):
string_in_lowercase = string.lower()
# 1. Exact match for fullname, email and irc_nicknames
account = self.contributor_by_name(string_in_lowercase) or self.contributor_by_email(string_in_lowercase) or self.contributor_by_irc_nickname(string_in_lowercase)
if account:
return [account], 0
# 2. Exact match for email username (before @)
accounts = self.contributors_by_email_username(string_in_lowercase)
if accounts and len(accounts) == 1:
return accounts, 0
# 3. Exact match for first name, last name, and first name + initial combinations such as "Dan B" and "Tim H"
accounts = [contributor for contributor in self.contributors() if string in self._contributor_name_shorthands(contributor)]
if accounts and len(accounts) == 1:
return accounts, 0
# 4. Finally, fuzzy-match using edit-distance
string = string_in_lowercase
contributorWithMinDistance = []
minDistance = len(string) / 2 - 1
for contributor in self.contributors():
tokens = self._tokenize_contributor_name(contributor)
editdistances = [edit_distance(token, string) for token in tokens if abs(len(token) - len(string)) <= minDistance]
if not editdistances:
continue
distance = min(editdistances)
if distance == minDistance:
contributorWithMinDistance.append(contributor)
elif distance < minDistance:
contributorWithMinDistance = [contributor]
minDistance = distance
if not len(contributorWithMinDistance):
return [], len(string)
return contributorWithMinDistance, minDistance
def contributor_by_email(self, email):
return self._email_to_account_map().get(email.lower()) if email else None
def contributor_by_name(self, name):
return self._name_to_contributor_map().get(name.lower()) if name else None
def committer_by_email(self, email):
return self._committer_only(self.contributor_by_email(email))
def reviewer_by_email(self, email):
return self._reviewer_only(self.contributor_by_email(email))
|