summaryrefslogtreecommitdiff
path: root/pbr/git.py
blob: 6e0e3467a9b7421b35f0a2814618ae4dcc62fa79 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
# Copyright 2011 OpenStack Foundation
# Copyright 2012-2013 Hewlett-Packard Development Company, L.P.
# All Rights Reserved.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

from __future__ import unicode_literals

import distutils.errors
from distutils import log
import errno
import io
import os
import re
import subprocess
import time

import pkg_resources

from pbr import options
from pbr import version


def _run_shell_command(cmd, throw_on_error=False, buffer=True, env=None):
    if buffer:
        out_location = subprocess.PIPE
        err_location = subprocess.PIPE
    else:
        out_location = None
        err_location = None

    newenv = os.environ.copy()
    if env:
        newenv.update(env)

    output = subprocess.Popen(cmd,
                              stdout=out_location,
                              stderr=err_location,
                              env=newenv)
    out = output.communicate()
    if output.returncode and throw_on_error:
        raise distutils.errors.DistutilsError(
            "%s returned %d" % (cmd, output.returncode))
    if len(out) == 0 or not out[0] or not out[0].strip():
        return ''
    # Since we don't control the history, and forcing users to rebase arbitrary
    # history to fix utf8 issues is harsh, decode with replace.
    return out[0].strip().decode('utf-8', 'replace')


def _run_git_command(cmd, git_dir, **kwargs):
    if not isinstance(cmd, (list, tuple)):
        cmd = [cmd]
    return _run_shell_command(
        ['git', '--git-dir=%s' % git_dir] + cmd, **kwargs)


def _get_git_directory():
    try:
        return _run_shell_command(['git', 'rev-parse', '--git-dir'])
    except OSError as e:
        if e.errno == errno.ENOENT:
            # git not installed.
            return ''
        raise


def _git_is_installed():
    try:
        # We cannot use 'which git' as it may not be available
        # in some distributions, So just try 'git --version'
        # to see if we run into trouble
        _run_shell_command(['git', '--version'])
    except OSError:
        return False
    return True


def _get_highest_tag(tags):
    """Find the highest tag from a list.

    Pass in a list of tag strings and this will return the highest
    (latest) as sorted by the pkg_resources version parser.
    """
    return max(tags, key=pkg_resources.parse_version)


def _find_git_files(dirname='', git_dir=None):
    """Behave like a file finder entrypoint plugin.

    We don't actually use the entrypoints system for this because it runs
    at absurd times. We only want to do this when we are building an sdist.
    """
    file_list = []
    if git_dir is None:
        git_dir = _run_git_functions()
    if git_dir:
        log.info("[pbr] In git context, generating filelist from git")
        file_list = _run_git_command(['ls-files', '-z'], git_dir)
        # Users can fix utf8 issues locally with a single commit, so we are
        # strict here.
        file_list = file_list.split(b'\x00'.decode('utf-8'))
    return [f for f in file_list if f]


def _get_raw_tag_info(git_dir):
    describe = _run_git_command(['describe', '--always'], git_dir)
    if "-" in describe:
        return describe.rsplit("-", 2)[-2]
    if "." in describe:
        return 0
    return None


def get_is_release(git_dir):
    return _get_raw_tag_info(git_dir) == 0


def _run_git_functions():
    git_dir = None
    if _git_is_installed():
        git_dir = _get_git_directory()
    return git_dir or None


def get_git_short_sha(git_dir=None):
    """Return the short sha for this repo, if it exists."""
    if not git_dir:
        git_dir = _run_git_functions()
    if git_dir:
        return _run_git_command(
            ['log', '-n1', '--pretty=format:%h'], git_dir)
    return None


def _clean_changelog_message(msg):
    """Cleans any instances of invalid sphinx wording.

    This escapes/removes any instances of invalid characters
    that can be interpreted by sphinx as a warning or error
    when translating the Changelog into an HTML file for
    documentation building within projects.

    * Escapes '_' which is interpreted as a link
    * Escapes '*' which is interpreted as a new line
    * Escapes '`' which is interpreted as a literal
    """

    msg = msg.replace('*', '\*')
    msg = msg.replace('_', '\_')
    msg = msg.replace('`', '\`')

    return msg


def _iter_changelog(changelog):
    """Convert a oneline log iterator to formatted strings.

    :param changelog: An iterator of one line log entries like
        that given by _iter_log_oneline.
    :return: An iterator over (release, formatted changelog) tuples.
    """
    first_line = True
    current_release = None
    yield current_release, "CHANGES\n=======\n\n"
    for hash, tags, msg in changelog:
        if tags:
            current_release = _get_highest_tag(tags)
            underline = len(current_release) * '-'
            if not first_line:
                yield current_release, '\n'
            yield current_release, (
                "%(tag)s\n%(underline)s\n\n" %
                dict(tag=current_release, underline=underline))

        if not msg.startswith("Merge "):
            if msg.endswith("."):
                msg = msg[:-1]
            msg = _clean_changelog_message(msg)
            yield current_release, "* %(msg)s\n" % dict(msg=msg)
        first_line = False


def _iter_log_oneline(git_dir=None):
    """Iterate over --oneline log entries if possible.

    This parses the output into a structured form but does not apply
    presentation logic to the output - making it suitable for different
    uses.

    :return: An iterator of (hash, tags_set, 1st_line) tuples, or None if
        changelog generation is disabled / not available.
    """
    if git_dir is None:
        git_dir = _get_git_directory()
    if not git_dir:
        return []
    return _iter_log_inner(git_dir)


def _is_valid_version(candidate):
    try:
        version.SemanticVersion.from_pip_string(candidate)
        return True
    except ValueError:
        return False


def _iter_log_inner(git_dir):
    """Iterate over --oneline log entries.

    This parses the output intro a structured form but does not apply
    presentation logic to the output - making it suitable for different
    uses.

    .. caution:: this function risk to return a tag that doesn't exist really
                 inside the git objects list due to replacement made
                 to tag name to also list pre-release suffix.
                 Compliant with the SemVer specification (e.g 1.2.3-rc1)

    :return: An iterator of (hash, tags_set, 1st_line) tuples.
    """
    log.info('[pbr] Generating ChangeLog')
    log_cmd = ['log', '--decorate=full', '--format=%h%x00%s%x00%d']
    changelog = _run_git_command(log_cmd, git_dir)
    for line in changelog.split('\n'):
        line_parts = line.split('\x00')
        if len(line_parts) != 3:
            continue
        sha, msg, refname = line_parts
        tags = set()

        # refname can be:
        #  <empty>
        #  HEAD, tag: refs/tags/1.4.0, refs/remotes/origin/master, \
        #    refs/heads/master
        #  refs/tags/1.3.4
        if "refs/tags/" in refname:
            refname = refname.strip()[1:-1]  # remove wrapping ()'s
            # If we start with "tag: refs/tags/1.2b1, tag: refs/tags/1.2"
            # The first split gives us "['', '1.2b1, tag:', '1.2']"
            # Which is why we do the second split below on the comma
            for tag_string in refname.split("refs/tags/")[1:]:
                # git tag does not allow : or " " in tag names, so we split
                # on ", " which is the separator between elements
                candidate = tag_string.split(", ")[0].replace("-", ".")
                if _is_valid_version(candidate):
                    tags.add(candidate)

        yield sha, tags, msg


def write_git_changelog(git_dir=None, dest_dir=os.path.curdir,
                        option_dict=None, changelog=None):
    """Write a changelog based on the git changelog."""
    start = time.time()
    if not option_dict:
        option_dict = {}
    should_skip = options.get_boolean_option(option_dict, 'skip_changelog',
                                             'SKIP_WRITE_GIT_CHANGELOG')
    if should_skip:
        return
    if not changelog:
        changelog = _iter_log_oneline(git_dir=git_dir)
        if changelog:
            changelog = _iter_changelog(changelog)
    if not changelog:
        return
    new_changelog = os.path.join(dest_dir, 'ChangeLog')
    # If there's already a ChangeLog and it's not writable, just use it
    if (os.path.exists(new_changelog)
            and not os.access(new_changelog, os.W_OK)):
        log.info('[pbr] ChangeLog not written (file already'
                 ' exists and it is not writeable)')
        return
    log.info('[pbr] Writing ChangeLog')
    with io.open(new_changelog, "w", encoding="utf-8") as changelog_file:
        for release, content in changelog:
            changelog_file.write(content)
    stop = time.time()
    log.info('[pbr] ChangeLog complete (%0.1fs)' % (stop - start))


def generate_authors(git_dir=None, dest_dir='.', option_dict=dict()):
    """Create AUTHORS file using git commits."""
    should_skip = options.get_boolean_option(option_dict, 'skip_authors',
                                             'SKIP_GENERATE_AUTHORS')
    if should_skip:
        return
    start = time.time()
    old_authors = os.path.join(dest_dir, 'AUTHORS.in')
    new_authors = os.path.join(dest_dir, 'AUTHORS')
    # If there's already an AUTHORS file and it's not writable, just use it
    if (os.path.exists(new_authors)
            and not os.access(new_authors, os.W_OK)):
        return
    log.info('[pbr] Generating AUTHORS')
    ignore_emails = '((jenkins|zuul)@review|infra@lists|jenkins@openstack)'
    if git_dir is None:
        git_dir = _get_git_directory()
    if git_dir:
        authors = []

        # don't include jenkins email address in AUTHORS file
        git_log_cmd = ['log', '--format=%aN <%aE>']
        authors += _run_git_command(git_log_cmd, git_dir).split('\n')
        authors = [a for a in authors if not re.search(ignore_emails, a)]

        # get all co-authors from commit messages
        co_authors_out = _run_git_command('log', git_dir)
        co_authors = re.findall('Co-authored-by:.+', co_authors_out,
                                re.MULTILINE)
        co_authors = [signed.split(":", 1)[1].strip()
                      for signed in co_authors if signed]

        authors += co_authors
        authors = sorted(set(authors))

        with open(new_authors, 'wb') as new_authors_fh:
            if os.path.exists(old_authors):
                with open(old_authors, "rb") as old_authors_fh:
                    new_authors_fh.write(old_authors_fh.read())
            new_authors_fh.write(('\n'.join(authors) + '\n')
                                 .encode('utf-8'))
    stop = time.time()
    log.info('[pbr] AUTHORS complete (%0.1fs)' % (stop - start))