summaryrefslogtreecommitdiff
path: root/SCons/Util/hashes.py
blob: e14da012aa37810765b8f4fe364d40ee5a699ffe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
# SPDX-License-Identifier: MIT
#
# Copyright The SCons Foundation

"""SCons utility functions

Routines for working with hash formats.
"""

import hashlib
import sys

from .types import to_bytes


# Default hash function and format. SCons-internal.
DEFAULT_HASH_FORMATS = ['md5', 'sha1', 'sha256']
ALLOWED_HASH_FORMATS = []
_HASH_FUNCTION = None
_HASH_FORMAT = None


def _attempt_init_of_python_3_9_hash_object(hash_function_object, sys_used=sys):
    """Initialize hash function with non-security indicator.

    In Python 3.9 and onwards, :mod:`hashlib` constructors accept a
    keyword argument *usedforsecurity*, which, if set to ``False``,
    lets us continue to use algorithms that have been deprecated either
    by FIPS or by Python itself, as the MD5 algorithm SCons prefers is
    not being used for security purposes as much as a short, 32 char
    hash that is resistant to accidental collisions.

    In prior versions of python, :mod:`hashlib` returns a native function
    wrapper, which errors out when it's queried for the optional
    parameter, so this function wraps that call.

    It can still throw a ValueError if the initialization fails due to
    FIPS compliance issues, but that is assumed to be the responsibility
    of the caller.
    """
    if hash_function_object is None:
        return None

    # https://stackoverflow.com/a/11887885 details how to check versions
    # with the "packaging" library. However, for our purposes, checking
    # the version is greater than or equal to 3.9 is good enough, as the API
    # is guaranteed to have support for the 'usedforsecurity' flag in 3.9. See
    # https://docs.python.org/3/library/hashlib.html#:~:text=usedforsecurity
    # for the version support notes.
    if (sys_used.version_info.major > 3) or (
        sys_used.version_info.major == 3 and sys_used.version_info.minor >= 9
    ):
        return hash_function_object(usedforsecurity=False)

    # Note that this can throw a ValueError in FIPS-enabled versions of
    # Linux prior to 3.9. The OpenSSL hashlib will throw on first init here,
    # but it is assumed to be responsibility of the caller to diagnose the
    # ValueError & potentially display the error to screen.
    return hash_function_object()


def _set_allowed_viable_default_hashes(hashlib_used, sys_used=sys) -> None:
    """Check if the default hash algorithms can be called.

    This util class is sometimes called prior to setting the
    user-selected hash algorithm, meaning that on FIPS-compliant systems
    the library would default-initialize MD5 and throw an exception in
    set_hash_format. A common case is using the SConf options, which can
    run prior to main, and thus ignore the options.hash_format variable.

    This function checks the DEFAULT_HASH_FORMATS and sets the
    ALLOWED_HASH_FORMATS to only the ones that can be called. In Python
    >= 3.9 this will always default to MD5 as in Python 3.9 there is an
    optional attribute "usedforsecurity" set for the method.

    Throws if no allowed hash formats are detected.
    """
    global ALLOWED_HASH_FORMATS
    _last_error = None
    # note: if you call this method repeatedly, example using timeout,
    # this is needed. Otherwise it keeps appending valid formats to the string.
    ALLOWED_HASH_FORMATS = []

    for test_algorithm in DEFAULT_HASH_FORMATS:
        _test_hash = getattr(hashlib_used, test_algorithm, None)
        # we know hashlib claims to support it... check to see if we can call it.
        if _test_hash is not None:
            # The hashing library will throw an exception on initialization
            # in FIPS mode, meaning if we call the default algorithm returned
            # with no parameters, it'll throw if it's a bad algorithm,
            # otherwise it will append it to the known good formats.
            try:
                _attempt_init_of_python_3_9_hash_object(_test_hash, sys_used)
                ALLOWED_HASH_FORMATS.append(test_algorithm)
            except ValueError as e:
                _last_error = e
                continue

    if len(ALLOWED_HASH_FORMATS) == 0:
        from SCons.Errors import (
            SConsEnvironmentError,
        )  # pylint: disable=import-outside-toplevel

        # chain the exception thrown with the most recent error from hashlib.
        raise SConsEnvironmentError(
            'No usable hash algorithms found.'
            'Most recent error from hashlib attached in trace.'
        ) from _last_error


_set_allowed_viable_default_hashes(hashlib)


def get_hash_format():
    """Retrieves the hash format or ``None`` if not overridden.

    A return value of ``None``
    does not guarantee that MD5 is being used; instead, it means that the
    default precedence order documented in :func:`SCons.Util.set_hash_format`
    is respected.
    """
    return _HASH_FORMAT


def _attempt_get_hash_function(hash_name, hashlib_used=hashlib, sys_used=sys):
    """Wrapper used to try to initialize a hash function given.

    If successful, returns the name of the hash function back to the user.

    Otherwise returns None.
    """
    try:
        _fetch_hash = getattr(hashlib_used, hash_name, None)
        if _fetch_hash is None:
            return None
        _attempt_init_of_python_3_9_hash_object(_fetch_hash, sys_used)
        return hash_name
    except ValueError:
        # If attempt_init_of_python_3_9 throws, this is typically due to FIPS
        # being enabled. However, if we get to this point, the viable hash
        # function check has either been bypassed or otherwise failed to
        # properly restrict the user to only the supported functions.
        # As such throw the UserError as an internal assertion-like error.
        return None


def set_hash_format(hash_format, hashlib_used=hashlib, sys_used=sys):
    """Sets the default hash format used by SCons.

    If `hash_format` is ``None`` or
    an empty string, the default is determined by this function.

    Currently the default behavior is to use the first available format of
    the following options: MD5, SHA1, SHA256.
    """
    global _HASH_FORMAT, _HASH_FUNCTION

    _HASH_FORMAT = hash_format
    if hash_format:
        hash_format_lower = hash_format.lower()
        if hash_format_lower not in ALLOWED_HASH_FORMATS:
            from SCons.Errors import (
                UserError,
            )  # pylint: disable=import-outside-toplevel

            # User can select something not supported by their OS but
            # normally supported by SCons, example, selecting MD5 in an
            # OS with FIPS-mode turned on. Therefore we first check if
            # SCons supports it, and then if their local OS supports it.
            if hash_format_lower in DEFAULT_HASH_FORMATS:
                raise UserError(
                    'While hash format "%s" is supported by SCons, the '
                    'local system indicates only the following hash '
                    'formats are supported by the hashlib library: %s'
                    % (hash_format_lower, ', '.join(ALLOWED_HASH_FORMATS))
                )

            # The hash format isn't supported by SCons in any case.
            # Warn the user, and if we detect that SCons supports more
            # algorithms than their local system supports,
            # warn the user about that too.
            if ALLOWED_HASH_FORMATS == DEFAULT_HASH_FORMATS:
                raise UserError(
                    'Hash format "%s" is not supported by SCons. Only '
                    'the following hash formats are supported: %s'
                    % (hash_format_lower, ', '.join(ALLOWED_HASH_FORMATS))
                )

            raise UserError(
                'Hash format "%s" is not supported by SCons. '
                'SCons supports more hash formats than your local system '
                'is reporting; SCons supports: %s. Your local system only '
                'supports: %s'
                % (
                    hash_format_lower,
                    ', '.join(DEFAULT_HASH_FORMATS),
                    ', '.join(ALLOWED_HASH_FORMATS),
                )
            )

        # This is not expected to fail. If this fails it means the
        # set_allowed_viable_default_hashes function did not throw,
        # or when it threw, the exception was caught and ignored, or
        # the global ALLOWED_HASH_FORMATS was changed by an external user.
        _HASH_FUNCTION = _attempt_get_hash_function(
            hash_format_lower, hashlib_used, sys_used
        )

        if _HASH_FUNCTION is None:
            from SCons.Errors import (
                UserError,
            )  # pylint: disable=import-outside-toplevel

            raise UserError(
                f'Hash format "{hash_format_lower}" is not available in your '
                'Python interpreter. Expected to be supported algorithm by '
                'set_allowed_viable_default_hashes. Assertion error in SCons.'
            )
    else:
        # Set the default hash format based on what is available, defaulting
        # to the first supported hash algorithm (usually md5) for backwards
        # compatibility. In FIPS-compliant systems this usually defaults to
        # SHA1, unless that too has been disabled.
        for choice in ALLOWED_HASH_FORMATS:
            _HASH_FUNCTION = _attempt_get_hash_function(choice, hashlib_used, sys_used)

            if _HASH_FUNCTION is not None:
                break
        else:
            # This is not expected to happen in practice.
            from SCons.Errors import (
                UserError,
            )  # pylint: disable=import-outside-toplevel

            raise UserError(
                'Your Python interpreter does not have MD5, SHA1, or SHA256. '
                'SCons requires at least one. Expected to support one or more '
                'during set_allowed_viable_default_hashes.'
            )


# Ensure that this is initialized in case either:
#    1. This code is running in a unit test.
#    2. This code is running in a consumer that does hash operations while
#       SConscript files are being loaded.
set_hash_format(None)


def get_current_hash_algorithm_used():
    """Returns the current hash algorithm name used.

    Where the python version >= 3.9, this is expected to return md5.
    If python's version is <= 3.8, this returns md5 on non-FIPS-mode platforms, and
    sha1 or sha256 on FIPS-mode Linux platforms.

    This function is primarily useful for testing, where one expects a value to be
    one of N distinct hashes, and therefore the test needs to know which hash to select.
    """
    return _HASH_FUNCTION


def _get_hash_object(hash_format, hashlib_used=hashlib, sys_used=sys):
    """Allocates a hash object using the requested hash format.

    Args:
        hash_format: Hash format to use.

    Returns:
        hashlib object.
    """
    if hash_format is None:
        if _HASH_FUNCTION is None:
            from SCons.Errors import (
                UserError,
            )  # pylint: disable=import-outside-toplevel

            raise UserError(
                'There is no default hash function. Did you call '
                'a hashing function before SCons was initialized?'
            )
        return _attempt_init_of_python_3_9_hash_object(
            getattr(hashlib_used, _HASH_FUNCTION, None), sys_used
        )

    if not hasattr(hashlib, hash_format):
        from SCons.Errors import UserError  # pylint: disable=import-outside-toplevel

        raise UserError(
            f'Hash format "{hash_format}" is not available in your Python interpreter.'
        )

    return _attempt_init_of_python_3_9_hash_object(
        getattr(hashlib, hash_format), sys_used
    )


def hash_signature(s, hash_format=None):
    """
    Generate hash signature of a string

    Args:
        s: either string or bytes. Normally should be bytes
        hash_format: Specify to override default hash format

    Returns:
        String of hex digits representing the signature
    """
    m = _get_hash_object(hash_format)
    try:
        m.update(to_bytes(s))
    except TypeError:
        m.update(to_bytes(str(s)))

    return m.hexdigest()


def hash_file_signature(fname, chunksize: int=65536, hash_format=None):
    """
    Generate the md5 signature of a file

    Args:
        fname: file to hash
        chunksize: chunk size to read
        hash_format: Specify to override default hash format

    Returns:
        String of Hex digits representing the signature
    """

    m = _get_hash_object(hash_format)
    with open(fname, "rb") as f:
        while True:
            blck = f.read(chunksize)
            if not blck:
                break
            m.update(to_bytes(blck))
    return m.hexdigest()


def hash_collect(signatures, hash_format=None):
    """
    Collects a list of signatures into an aggregate signature.

    Args:
        signatures: a list of signatures
        hash_format: Specify to override default hash format

    Returns:
        the aggregate signature
    """

    if len(signatures) == 1:
        return signatures[0]

    return hash_signature(', '.join(signatures), hash_format)


_MD5_WARNING_SHOWN = False


def _show_md5_warning(function_name) -> None:
    """Shows a deprecation warning for various MD5 functions."""

    global _MD5_WARNING_SHOWN

    if not _MD5_WARNING_SHOWN:
        import SCons.Warnings  # pylint: disable=import-outside-toplevel

        SCons.Warnings.warn(
            SCons.Warnings.DeprecatedWarning,
            f"Function {function_name} is deprecated",
        )
        _MD5_WARNING_SHOWN = True


def MD5signature(s):
    """Deprecated. Use :func:`hash_signature` instead."""

    _show_md5_warning("MD5signature")
    return hash_signature(s)


def MD5filesignature(fname, chunksize: int=65536):
    """Deprecated. Use :func:`hash_file_signature` instead."""

    _show_md5_warning("MD5filesignature")
    return hash_file_signature(fname, chunksize)


def MD5collect(signatures):
    """Deprecated. Use :func:`hash_collect` instead."""

    _show_md5_warning("MD5collect")
    return hash_collect(signatures)


# Local Variables:
# tab-width:4
# indent-tabs-mode:nil
# End:
# vim: set expandtab tabstop=4 shiftwidth=4: