diff options
Diffstat (limited to 'swift/common/utils/__init__.py')
-rw-r--r-- | swift/common/utils/__init__.py | 1013 |
1 files changed, 123 insertions, 890 deletions
diff --git a/swift/common/utils/__init__.py b/swift/common/utils/__init__.py index 3b4db177e..ef6b0180e 100644 --- a/swift/common/utils/__init__.py +++ b/swift/common/utils/__init__.py @@ -26,7 +26,6 @@ import fcntl import grp import hashlib import json -import math import operator import os import pwd @@ -37,12 +36,9 @@ import sys import time import uuid import functools -import platform import email.parser from random import random, shuffle from contextlib import contextmanager, closing -import ctypes -import ctypes.util from optparse import OptionParser import traceback import warnings @@ -97,90 +93,36 @@ from swift.common.linkat import linkat # For backwards compatability with 3rd party middlewares from swift.common.registry import register_swift_info, get_swift_info # noqa +from swift.common.utils.libc import ( # noqa + F_SETPIPE_SZ, + load_libc_function, + config_fallocate_value, + disable_fallocate, + fallocate, + punch_hole, + drop_buffer_cache, + get_md5_socket, + modify_priority, +) +from swift.common.utils.timestamp import ( # noqa + NORMAL_FORMAT, + INTERNAL_FORMAT, + SHORT_FORMAT, + MAX_OFFSET, + PRECISION, + Timestamp, + encode_timestamps, + decode_timestamps, + normalize_timestamp, + EPOCH, + last_modified_date_to_timestamp, + normalize_delete_at_timestamp, +) -# logging doesn't import patched as cleanly as one would like from logging.handlers import SysLogHandler import logging -logging.thread = eventlet.green.thread -logging.threading = eventlet.green.threading -logging._lock = logging.threading.RLock() -# setup notice level logging -NOTICE = 25 -logging.addLevelName(NOTICE, 'NOTICE') -SysLogHandler.priority_map['NOTICE'] = 'notice' - -# These are lazily pulled from libc elsewhere -_sys_fallocate = None -_posix_fadvise = None -_libc_socket = None -_libc_bind = None -_libc_accept = None -# see man -s 2 setpriority -_libc_setpriority = None -# see man -s 2 syscall -_posix_syscall = None - -# If set to non-zero, fallocate routines will fail based on free space -# available being at or below this amount, in bytes. -FALLOCATE_RESERVE = 0 -# Indicates if FALLOCATE_RESERVE is the percentage of free space (True) or -# the number of bytes (False). -FALLOCATE_IS_PERCENT = False - -# from /usr/include/linux/falloc.h -FALLOC_FL_KEEP_SIZE = 1 -FALLOC_FL_PUNCH_HOLE = 2 - -# from /usr/src/linux-headers-*/include/uapi/linux/resource.h -PRIO_PROCESS = 0 - - -# /usr/include/x86_64-linux-gnu/asm/unistd_64.h defines syscalls there -# are many like it, but this one is mine, see man -s 2 ioprio_set -def NR_ioprio_set(): - """Give __NR_ioprio_set value for your system.""" - architecture = os.uname()[4] - arch_bits = platform.architecture()[0] - # check if supported system, now support x86_64 and AArch64 - if architecture == 'x86_64' and arch_bits == '64bit': - return 251 - elif architecture == 'aarch64' and arch_bits == '64bit': - return 30 - raise OSError("Swift doesn't support ionice priority for %s %s" % - (architecture, arch_bits)) - - -# this syscall integer probably only works on x86_64 linux systems, you -# can check if it's correct on yours with something like this: -""" -#include <stdio.h> -#include <sys/syscall.h> - -int main(int argc, const char* argv[]) { - printf("%d\n", __NR_ioprio_set); - return 0; -} -""" - -# this is the value for "which" that says our who value will be a pid -# pulled out of /usr/src/linux-headers-*/include/linux/ioprio.h -IOPRIO_WHO_PROCESS = 1 - - -IO_CLASS_ENUM = { - 'IOPRIO_CLASS_RT': 1, - 'IOPRIO_CLASS_BE': 2, - 'IOPRIO_CLASS_IDLE': 3, -} - -# the IOPRIO_PRIO_VALUE "macro" is also pulled from -# /usr/src/linux-headers-*/include/linux/ioprio.h -IOPRIO_CLASS_SHIFT = 13 - - -def IOPRIO_PRIO_VALUE(class_, data): - return (((class_) << IOPRIO_CLASS_SHIFT) | data) +NOTICE = 25 # Used by hash_path to offer a bit more security when generating hashes for # paths. It simply appends this value to all paths; guessing the hash a path @@ -190,12 +132,6 @@ HASH_PATH_PREFIX = b'' SWIFT_CONF_FILE = '/etc/swift/swift.conf' -# These constants are Linux-specific, and Python doesn't seem to know -# about them. We ask anyway just in case that ever gets fixed. -# -# The values were copied from the Linux 3.x kernel headers. -AF_ALG = getattr(socket, 'AF_ALG', 38) -F_SETPIPE_SZ = getattr(fcntl, 'F_SETPIPE_SZ', 1031) O_TMPFILE = getattr(os, 'O_TMPFILE', 0o20000000 | os.O_DIRECTORY) # Used by the parse_socket_string() function to validate IPv6 addresses @@ -500,6 +436,17 @@ def config_read_prefixed_options(conf, prefix_name, defaults): return params +def logging_monkey_patch(): + # explicitly patch the logging lock + logging._lock = logging.threading.RLock() + # setup notice level logging + logging.addLevelName(NOTICE, 'NOTICE') + SysLogHandler.priority_map['NOTICE'] = 'notice' + # Trying to log threads while monkey-patched can lead to deadlocks; see + # https://bugs.launchpad.net/swift/+bug/1895739 + logging.logThreads = 0 + + def eventlet_monkey_patch(): """ Install the appropriate Eventlet monkey patches. @@ -510,13 +457,14 @@ def eventlet_monkey_patch(): # if thread is monkey-patched. eventlet.patcher.monkey_patch(all=False, socket=True, select=True, thread=True) - # Trying to log threads while monkey-patched can lead to deadlocks; see - # https://bugs.launchpad.net/swift/+bug/1895739 - logging.logThreads = 0 -def noop_libc_function(*args): - return 0 +def monkey_patch(): + """ + Apply all swift monkey patching consistently in one place. + """ + eventlet_monkey_patch() + logging_monkey_patch() def validate_configuration(): @@ -526,39 +474,6 @@ def validate_configuration(): sys.exit("Error: %s" % e) -def load_libc_function(func_name, log_error=True, - fail_if_missing=False, errcheck=False): - """ - Attempt to find the function in libc, otherwise return a no-op func. - - :param func_name: name of the function to pull from libc. - :param log_error: log an error when a function can't be found - :param fail_if_missing: raise an exception when a function can't be found. - Default behavior is to return a no-op function. - :param errcheck: boolean, if true install a wrapper on the function - to check for a return values of -1 and call - ctype.get_errno and raise an OSError - """ - try: - libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True) - func = getattr(libc, func_name) - except AttributeError: - if fail_if_missing: - raise - if log_error: - logging.warning(_("Unable to locate %s in libc. Leaving as a " - "no-op."), func_name) - return noop_libc_function - if errcheck: - def _errcheck(result, f, args): - if result == -1: - errcode = ctypes.get_errno() - raise OSError(errcode, os.strerror(errcode)) - return result - func.errcheck = _errcheck - return func - - def generate_trans_id(trans_id_suffix): return 'tx%s-%010x%s' % ( uuid.uuid4().hex[:21], int(time.time()), quote(trans_id_suffix)) @@ -755,25 +670,6 @@ def get_trans_id_time(trans_id): return None -def config_fallocate_value(reserve_value): - """ - Returns fallocate reserve_value as an int or float. - Returns is_percent as a boolean. - Returns a ValueError on invalid fallocate value. - """ - try: - if str(reserve_value[-1:]) == '%': - reserve_value = float(reserve_value[:-1]) - is_percent = True - else: - reserve_value = int(reserve_value) - is_percent = False - except ValueError: - raise ValueError('Error: %s is an invalid value for fallocate' - '_reserve.' % reserve_value) - return reserve_value, is_percent - - class FileLikeIter(object): def __init__(self, iterable): @@ -924,164 +820,6 @@ def fs_has_free_space(fs_path, space_needed, is_percent): return free_bytes >= space_needed -class _LibcWrapper(object): - """ - A callable object that forwards its calls to a C function from libc. - - These objects are lazy. libc will not be checked until someone tries to - either call the function or check its availability. - - _LibcWrapper objects have an "available" property; if true, then libc - has the function of that name. If false, then calls will fail with a - NotImplementedError. - """ - - def __init__(self, func_name): - self._func_name = func_name - self._func_handle = None - self._loaded = False - - def _ensure_loaded(self): - if not self._loaded: - func_name = self._func_name - try: - # Keep everything in this try-block in local variables so - # that a typo in self.some_attribute_name doesn't raise a - # spurious AttributeError. - func_handle = load_libc_function( - func_name, fail_if_missing=True) - self._func_handle = func_handle - except AttributeError: - # We pass fail_if_missing=True to load_libc_function and - # then ignore the error. It's weird, but otherwise we have - # to check if self._func_handle is noop_libc_function, and - # that's even weirder. - pass - self._loaded = True - - @property - def available(self): - self._ensure_loaded() - return bool(self._func_handle) - - def __call__(self, *args): - if self.available: - return self._func_handle(*args) - else: - raise NotImplementedError( - "No function %r found in libc" % self._func_name) - - -_fallocate_enabled = True -_fallocate_warned_about_missing = False -_sys_fallocate = _LibcWrapper('fallocate') -_sys_posix_fallocate = _LibcWrapper('posix_fallocate') - - -def disable_fallocate(): - global _fallocate_enabled - _fallocate_enabled = False - - -def fallocate(fd, size, offset=0): - """ - Pre-allocate disk space for a file. - - This function can be disabled by calling disable_fallocate(). If no - suitable C function is available in libc, this function is a no-op. - - :param fd: file descriptor - :param size: size to allocate (in bytes) - """ - global _fallocate_enabled - if not _fallocate_enabled: - return - - if size < 0: - size = 0 # Done historically; not really sure why - if size >= (1 << 63): - raise ValueError('size must be less than 2 ** 63') - if offset < 0: - raise ValueError('offset must be non-negative') - if offset >= (1 << 63): - raise ValueError('offset must be less than 2 ** 63') - - # Make sure there's some (configurable) amount of free space in - # addition to the number of bytes we're allocating. - if FALLOCATE_RESERVE: - st = os.fstatvfs(fd) - free = st.f_frsize * st.f_bavail - size - if FALLOCATE_IS_PERCENT: - free = (float(free) / float(st.f_frsize * st.f_blocks)) * 100 - if float(free) <= float(FALLOCATE_RESERVE): - raise OSError( - errno.ENOSPC, - 'FALLOCATE_RESERVE fail %g <= %g' % - (free, FALLOCATE_RESERVE)) - - if _sys_fallocate.available: - # Parameters are (fd, mode, offset, length). - # - # mode=FALLOC_FL_KEEP_SIZE pre-allocates invisibly (without - # affecting the reported file size). - ret = _sys_fallocate( - fd, FALLOC_FL_KEEP_SIZE, ctypes.c_uint64(offset), - ctypes.c_uint64(size)) - err = ctypes.get_errno() - elif _sys_posix_fallocate.available: - # Parameters are (fd, offset, length). - ret = _sys_posix_fallocate(fd, ctypes.c_uint64(offset), - ctypes.c_uint64(size)) - err = ctypes.get_errno() - else: - # No suitable fallocate-like function is in our libc. Warn about it, - # but just once per process, and then do nothing. - global _fallocate_warned_about_missing - if not _fallocate_warned_about_missing: - logging.warning(_("Unable to locate fallocate, posix_fallocate in " - "libc. Leaving as a no-op.")) - _fallocate_warned_about_missing = True - return - - if ret and err not in (0, errno.ENOSYS, errno.EOPNOTSUPP, - errno.EINVAL): - raise OSError(err, 'Unable to fallocate(%s)' % size) - - -def punch_hole(fd, offset, length): - """ - De-allocate disk space in the middle of a file. - - :param fd: file descriptor - :param offset: index of first byte to de-allocate - :param length: number of bytes to de-allocate - """ - if offset < 0: - raise ValueError('offset must be non-negative') - if offset >= (1 << 63): - raise ValueError('offset must be less than 2 ** 63') - if length <= 0: - raise ValueError('length must be positive') - if length >= (1 << 63): - raise ValueError('length must be less than 2 ** 63') - - if _sys_fallocate.available: - # Parameters are (fd, mode, offset, length). - ret = _sys_fallocate( - fd, - FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, - ctypes.c_uint64(offset), - ctypes.c_uint64(length)) - err = ctypes.get_errno() - if ret and err: - mode_str = "FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE" - raise OSError(err, "Unable to fallocate(%d, %s, %d, %d)" % ( - fd, mode_str, offset, length)) - else: - raise OSError(errno.ENOTSUP, - 'No suitable C function found for hole punching') - - def fsync(fd): """ Sync modified file data and metadata to disk. @@ -1131,402 +869,6 @@ def fsync_dir(dirpath): os.close(dirfd) -def drop_buffer_cache(fd, offset, length): - """ - Drop 'buffer' cache for the given range of the given file. - - :param fd: file descriptor - :param offset: start offset - :param length: length - """ - global _posix_fadvise - if _posix_fadvise is None: - _posix_fadvise = load_libc_function('posix_fadvise64') - # 4 means "POSIX_FADV_DONTNEED" - ret = _posix_fadvise(fd, ctypes.c_uint64(offset), - ctypes.c_uint64(length), 4) - if ret != 0: - logging.warning("posix_fadvise64(%(fd)s, %(offset)s, %(length)s, 4) " - "-> %(ret)s", {'fd': fd, 'offset': offset, - 'length': length, 'ret': ret}) - - -NORMAL_FORMAT = "%016.05f" -INTERNAL_FORMAT = NORMAL_FORMAT + '_%016x' -SHORT_FORMAT = NORMAL_FORMAT + '_%x' -MAX_OFFSET = (16 ** 16) - 1 -PRECISION = 1e-5 -# Setting this to True will cause the internal format to always display -# extended digits - even when the value is equivalent to the normalized form. -# This isn't ideal during an upgrade when some servers might not understand -# the new time format - but flipping it to True works great for testing. -FORCE_INTERNAL = False # or True - - -@functools.total_ordering -class Timestamp(object): - """ - Internal Representation of Swift Time. - - The normalized form of the X-Timestamp header looks like a float - with a fixed width to ensure stable string sorting - normalized - timestamps look like "1402464677.04188" - - To support overwrites of existing data without modifying the original - timestamp but still maintain consistency a second internal offset vector - is append to the normalized timestamp form which compares and sorts - greater than the fixed width float format but less than a newer timestamp. - The internalized format of timestamps looks like - "1402464677.04188_0000000000000000" - the portion after the underscore is - the offset and is a formatted hexadecimal integer. - - The internalized form is not exposed to clients in responses from - Swift. Normal client operations will not create a timestamp with an - offset. - - The Timestamp class in common.utils supports internalized and - normalized formatting of timestamps and also comparison of timestamp - values. When the offset value of a Timestamp is 0 - it's considered - insignificant and need not be represented in the string format; to - support backwards compatibility during a Swift upgrade the - internalized and normalized form of a Timestamp with an - insignificant offset are identical. When a timestamp includes an - offset it will always be represented in the internalized form, but - is still excluded from the normalized form. Timestamps with an - equivalent timestamp portion (the float part) will compare and order - by their offset. Timestamps with a greater timestamp portion will - always compare and order greater than a Timestamp with a lesser - timestamp regardless of it's offset. String comparison and ordering - is guaranteed for the internalized string format, and is backwards - compatible for normalized timestamps which do not include an offset. - """ - - def __init__(self, timestamp, offset=0, delta=0, check_bounds=True): - """ - Create a new Timestamp. - - :param timestamp: time in seconds since the Epoch, may be any of: - - * a float or integer - * normalized/internalized string - * another instance of this class (offset is preserved) - - :param offset: the second internal offset vector, an int - :param delta: deca-microsecond difference from the base timestamp - param, an int - """ - if isinstance(timestamp, bytes): - timestamp = timestamp.decode('ascii') - if isinstance(timestamp, six.string_types): - base, base_offset = timestamp.partition('_')[::2] - self.timestamp = float(base) - if '_' in base_offset: - raise ValueError('invalid literal for int() with base 16: ' - '%r' % base_offset) - if base_offset: - self.offset = int(base_offset, 16) - else: - self.offset = 0 - else: - self.timestamp = float(timestamp) - self.offset = getattr(timestamp, 'offset', 0) - # increment offset - if offset >= 0: - self.offset += offset - else: - raise ValueError('offset must be non-negative') - if self.offset > MAX_OFFSET: - raise ValueError('offset must be smaller than %d' % MAX_OFFSET) - self.raw = int(round(self.timestamp / PRECISION)) - # add delta - if delta: - self.raw = self.raw + delta - if self.raw <= 0: - raise ValueError( - 'delta must be greater than %d' % (-1 * self.raw)) - self.timestamp = float(self.raw * PRECISION) - if check_bounds: - if self.timestamp < 0: - raise ValueError('timestamp cannot be negative') - if self.timestamp >= 10000000000: - raise ValueError('timestamp too large') - - @classmethod - def now(cls, offset=0, delta=0): - return cls(time.time(), offset=offset, delta=delta) - - def __repr__(self): - return INTERNAL_FORMAT % (self.timestamp, self.offset) - - def __str__(self): - raise TypeError('You must specify which string format is required') - - def __float__(self): - return self.timestamp - - def __int__(self): - return int(self.timestamp) - - def __nonzero__(self): - return bool(self.timestamp or self.offset) - - def __bool__(self): - return self.__nonzero__() - - @property - def normal(self): - return NORMAL_FORMAT % self.timestamp - - @property - def internal(self): - if self.offset or FORCE_INTERNAL: - return INTERNAL_FORMAT % (self.timestamp, self.offset) - else: - return self.normal - - @property - def short(self): - if self.offset or FORCE_INTERNAL: - return SHORT_FORMAT % (self.timestamp, self.offset) - else: - return self.normal - - @property - def isoformat(self): - """ - Get an isoformat string representation of the 'normal' part of the - Timestamp with microsecond precision and no trailing timezone, for - example:: - - 1970-01-01T00:00:00.000000 - - :return: an isoformat string - """ - t = float(self.normal) - if six.PY3: - # On Python 3, round manually using ROUND_HALF_EVEN rounding - # method, to use the same rounding method than Python 2. Python 3 - # used a different rounding method, but Python 3.4.4 and 3.5.1 use - # again ROUND_HALF_EVEN as Python 2. - # See https://bugs.python.org/issue23517 - frac, t = math.modf(t) - us = round(frac * 1e6) - if us >= 1000000: - t += 1 - us -= 1000000 - elif us < 0: - t -= 1 - us += 1000000 - dt = datetime.datetime.utcfromtimestamp(t) - dt = dt.replace(microsecond=us) - else: - dt = datetime.datetime.utcfromtimestamp(t) - - isoformat = dt.isoformat() - # python isoformat() doesn't include msecs when zero - if len(isoformat) < len("1970-01-01T00:00:00.000000"): - isoformat += ".000000" - return isoformat - - @classmethod - def from_isoformat(cls, date_string): - """ - Parse an isoformat string representation of time to a Timestamp object. - - :param date_string: a string formatted as per an Timestamp.isoformat - property. - :return: an instance of this class. - """ - start = datetime.datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%f") - delta = start - EPOCH - # This calculation is based on Python 2.7's Modules/datetimemodule.c, - # function delta_to_microseconds(), but written in Python. - return cls(delta.total_seconds()) - - def ceil(self): - """ - Return the 'normal' part of the timestamp rounded up to the nearest - integer number of seconds. - - This value should be used whenever the second-precision Last-Modified - time of a resource is required. - - :return: a float value with second precision. - """ - return math.ceil(float(self)) - - def __eq__(self, other): - if other is None: - return False - if not isinstance(other, Timestamp): - try: - other = Timestamp(other, check_bounds=False) - except ValueError: - return False - return self.internal == other.internal - - def __ne__(self, other): - return not (self == other) - - def __lt__(self, other): - if other is None: - return False - if not isinstance(other, Timestamp): - other = Timestamp(other, check_bounds=False) - if other.timestamp < 0: - return False - if other.timestamp >= 10000000000: - return True - return self.internal < other.internal - - def __hash__(self): - return hash(self.internal) - - def __invert__(self): - if self.offset: - raise ValueError('Cannot invert timestamps with offsets') - return Timestamp((999999999999999 - self.raw) * PRECISION) - - -def encode_timestamps(t1, t2=None, t3=None, explicit=False): - """ - Encode up to three timestamps into a string. Unlike a Timestamp object, the - encoded string does NOT used fixed width fields and consequently no - relative chronology of the timestamps can be inferred from lexicographic - sorting of encoded timestamp strings. - - The format of the encoded string is: - <t1>[<+/-><t2 - t1>[<+/-><t3 - t2>]] - - i.e. if t1 = t2 = t3 then just the string representation of t1 is returned, - otherwise the time offsets for t2 and t3 are appended. If explicit is True - then the offsets for t2 and t3 are always appended even if zero. - - Note: any offset value in t1 will be preserved, but offsets on t2 and t3 - are not preserved. In the anticipated use cases for this method (and the - inverse decode_timestamps method) the timestamps passed as t2 and t3 are - not expected to have offsets as they will be timestamps associated with a - POST request. In the case where the encoding is used in a container objects - table row, t1 could be the PUT or DELETE time but t2 and t3 represent the - content type and metadata times (if different from the data file) i.e. - correspond to POST timestamps. In the case where the encoded form is used - in a .meta file name, t1 and t2 both correspond to POST timestamps. - """ - form = '{0}' - values = [t1.short] - if t2 is not None: - t2_t1_delta = t2.raw - t1.raw - explicit = explicit or (t2_t1_delta != 0) - values.append(t2_t1_delta) - if t3 is not None: - t3_t2_delta = t3.raw - t2.raw - explicit = explicit or (t3_t2_delta != 0) - values.append(t3_t2_delta) - if explicit: - form += '{1:+x}' - if t3 is not None: - form += '{2:+x}' - return form.format(*values) - - -def decode_timestamps(encoded, explicit=False): - """ - Parses a string of the form generated by encode_timestamps and returns - a tuple of the three component timestamps. If explicit is False, component - timestamps that are not explicitly encoded will be assumed to have zero - delta from the previous component and therefore take the value of the - previous component. If explicit is True, component timestamps that are - not explicitly encoded will be returned with value None. - """ - # TODO: some tests, e.g. in test_replicator, put float timestamps values - # into container db's, hence this defensive check, but in real world - # this may never happen. - if not isinstance(encoded, six.string_types): - ts = Timestamp(encoded) - return ts, ts, ts - - parts = [] - signs = [] - pos_parts = encoded.split('+') - for part in pos_parts: - # parse time components and their signs - # e.g. x-y+z --> parts = [x, y, z] and signs = [+1, -1, +1] - neg_parts = part.split('-') - parts = parts + neg_parts - signs = signs + [1] + [-1] * (len(neg_parts) - 1) - t1 = Timestamp(parts[0]) - t2 = t3 = None - if len(parts) > 1: - t2 = t1 - delta = signs[1] * int(parts[1], 16) - # if delta = 0 we want t2 = t3 = t1 in order to - # preserve any offset in t1 - only construct a distinct - # timestamp if there is a non-zero delta. - if delta: - t2 = Timestamp((t1.raw + delta) * PRECISION) - elif not explicit: - t2 = t1 - if len(parts) > 2: - t3 = t2 - delta = signs[2] * int(parts[2], 16) - if delta: - t3 = Timestamp((t2.raw + delta) * PRECISION) - elif not explicit: - t3 = t2 - return t1, t2, t3 - - -def normalize_timestamp(timestamp): - """ - Format a timestamp (string or numeric) into a standardized - xxxxxxxxxx.xxxxx (10.5) format. - - Note that timestamps using values greater than or equal to November 20th, - 2286 at 17:46 UTC will use 11 digits to represent the number of - seconds. - - :param timestamp: unix timestamp - :returns: normalized timestamp as a string - """ - return Timestamp(timestamp).normal - - -EPOCH = datetime.datetime(1970, 1, 1) - - -def last_modified_date_to_timestamp(last_modified_date_str): - """ - Convert a last modified date (like you'd get from a container listing, - e.g. 2014-02-28T23:22:36.698390) to a float. - """ - return Timestamp.from_isoformat(last_modified_date_str) - - -def normalize_delete_at_timestamp(timestamp, high_precision=False): - """ - Format a timestamp (string or numeric) into a standardized - xxxxxxxxxx (10) or xxxxxxxxxx.xxxxx (10.5) format. - - Note that timestamps less than 0000000000 are raised to - 0000000000 and values greater than November 20th, 2286 at - 17:46:39 UTC will be capped at that date and time, resulting in - no return value exceeding 9999999999.99999 (or 9999999999 if - using low-precision). - - This cap is because the expirer is already working through a - sorted list of strings that were all a length of 10. Adding - another digit would mess up the sort and cause the expirer to - break from processing early. By 2286, this problem will need to - be fixed, probably by creating an additional .expiring_objects - account to work from with 11 (or more) digit container names. - - :param timestamp: unix timestamp - :returns: normalized timestamp as a string - """ - fmt = '%016.5f' if high_precision else '%010d' - return fmt % min(max(0, float(timestamp)), 9999999999.99999) - - def mkdirs(path): """ Ensures the path is a directory or makes it if not. Errors if the path @@ -2073,6 +1415,11 @@ class SwiftLoggerAdapter(logging.LoggerAdapter): process() method to accomplish anything useful. """ + @property + def name(self): + # py3 does this for us already; add it for py2 + return self.logger.name + def get_metric_name(self, metric): # subclasses may override this method to annotate the metric name return metric @@ -2274,8 +1621,10 @@ class LogAdapter(logging.LoggerAdapter, object): emsg = '%s: %s' % (exc.__class__.__name__, exc.line) elif isinstance(exc, eventlet.Timeout): emsg = exc.__class__.__name__ - if hasattr(exc, 'seconds'): - emsg += ' (%ss)' % exc.seconds + detail = '%ss' % exc.seconds + if hasattr(exc, 'created_at'): + detail += ' after %0.2fs' % (time.time() - exc.created_at) + emsg += ' (%s)' % detail if isinstance(exc, swift.common.exceptions.MessageTimeout): if exc.msg: emsg += ' %s' % exc.msg @@ -3205,6 +2554,7 @@ def readconf(conf_path, section_name=None, log_name=None, defaults=None, # values like "1%" (which we want to support for # fallocate_reserve). c = ConfigParser(defaults, interpolation=NicerInterpolation()) + c.optionxform = str # Don't lower-case keys if hasattr(conf_path, 'readline'): if hasattr(conf_path, 'seek'): @@ -5107,87 +4457,6 @@ def parse_content_disposition(header): return header, attributes -class sockaddr_alg(ctypes.Structure): - _fields_ = [("salg_family", ctypes.c_ushort), - ("salg_type", ctypes.c_ubyte * 14), - ("salg_feat", ctypes.c_uint), - ("salg_mask", ctypes.c_uint), - ("salg_name", ctypes.c_ubyte * 64)] - - -_bound_md5_sockfd = None - - -def get_md5_socket(): - """ - Get an MD5 socket file descriptor. One can MD5 data with it by writing it - to the socket with os.write, then os.read the 16 bytes of the checksum out - later. - - NOTE: It is the caller's responsibility to ensure that os.close() is - called on the returned file descriptor. This is a bare file descriptor, - not a Python object. It doesn't close itself. - """ - - # Linux's AF_ALG sockets work like this: - # - # First, initialize a socket with socket() and bind(). This tells the - # socket what algorithm to use, as well as setting up any necessary bits - # like crypto keys. Of course, MD5 doesn't need any keys, so it's just the - # algorithm name. - # - # Second, to hash some data, get a second socket by calling accept() on - # the first socket. Write data to the socket, then when finished, read the - # checksum from the socket and close it. This lets you checksum multiple - # things without repeating all the setup code each time. - # - # Since we only need to bind() one socket, we do that here and save it for - # future re-use. That way, we only use one file descriptor to get an MD5 - # socket instead of two, and we also get to save some syscalls. - - global _bound_md5_sockfd - global _libc_socket - global _libc_bind - global _libc_accept - - if _libc_accept is None: - _libc_accept = load_libc_function('accept', fail_if_missing=True) - if _libc_socket is None: - _libc_socket = load_libc_function('socket', fail_if_missing=True) - if _libc_bind is None: - _libc_bind = load_libc_function('bind', fail_if_missing=True) - - # Do this at first call rather than at import time so that we don't use a - # file descriptor on systems that aren't using any MD5 sockets. - if _bound_md5_sockfd is None: - sockaddr_setup = sockaddr_alg( - AF_ALG, - (ord('h'), ord('a'), ord('s'), ord('h'), 0), - 0, 0, - (ord('m'), ord('d'), ord('5'), 0)) - hash_sockfd = _libc_socket(ctypes.c_int(AF_ALG), - ctypes.c_int(socket.SOCK_SEQPACKET), - ctypes.c_int(0)) - if hash_sockfd < 0: - raise IOError(ctypes.get_errno(), - "Failed to initialize MD5 socket") - - bind_result = _libc_bind(ctypes.c_int(hash_sockfd), - ctypes.pointer(sockaddr_setup), - ctypes.c_int(ctypes.sizeof(sockaddr_alg))) - if bind_result < 0: - os.close(hash_sockfd) - raise IOError(ctypes.get_errno(), "Failed to bind MD5 socket") - - _bound_md5_sockfd = hash_sockfd - - md5_sockfd = _libc_accept(ctypes.c_int(_bound_md5_sockfd), None, 0) - if md5_sockfd < 0: - raise IOError(ctypes.get_errno(), "Failed to accept MD5 socket") - - return md5_sockfd - - try: _test_md5 = hashlib.md5(usedforsecurity=False) # nosec @@ -5443,6 +4712,12 @@ class NamespaceBoundList(object): """ self.bounds = [] if bounds is None else bounds + def __eq__(self, other): + # test for equality of NamespaceBoundList objects only + if not isinstance(other, NamespaceBoundList): + return False + return self.bounds == other.bounds + @classmethod def parse(cls, namespaces): """ @@ -5498,7 +4773,12 @@ class NamespaceBoundList(object): def get_namespace(self, item): """ - Get a Namespace instance that contains ``item``. + Get a Namespace instance that contains ``item`` by bisecting on the + lower bounds directly. This function is used for performance sensitive + path, for example, '_get_update_shard' in proxy object controller. For + normal paths, convert NamespaceBoundList to a list of Namespaces, and + use `~swift.common.utils.find_namespace` or + `~swift.common.utils.filter_namespaces`. :param item: The item for a which a Namespace is to be found. :return: the Namespace that contains ``item``. @@ -5509,6 +4789,24 @@ class NamespaceBoundList(object): else self.bounds[pos + 1][0]) return Namespace(name, lower, upper) + def get_namespaces(self): + """ + Get the contained namespaces as a list of contiguous Namespaces ordered + by lower bound. + + :return: A list of Namespace objects which are ordered by + ``lower bound``. + """ + if not self.bounds: + return [] + namespaces = [] + num_ns = len(self.bounds) + for i in range(num_ns): + lower, name = self.bounds[i] + upper = ('' if i + 1 == num_ns else self.bounds[i + 1][0]) + namespaces.append(Namespace(name, lower, upper)) + return namespaces + class ShardName(object): """ @@ -5693,11 +4991,11 @@ class ShardRange(Namespace): '_deleted', '_state', '_count', '_bytes', '_tombstones', '_reported') - def __init__(self, name, timestamp, + def __init__(self, name, timestamp=0, lower=Namespace.MIN, upper=Namespace.MAX, object_count=0, bytes_used=0, meta_timestamp=None, deleted=False, state=None, state_timestamp=None, epoch=None, - reported=False, tombstones=-1): + reported=False, tombstones=-1, **kwargs): super(ShardRange, self).__init__(name=name, lower=lower, upper=upper) self.account = self.container = self._timestamp = \ self._meta_timestamp = self._state_timestamp = self._epoch = None @@ -5720,7 +5018,8 @@ class ShardRange(Namespace): def sort_key(cls, sr): # defines the sort order for shard ranges # note if this ever changes to *not* sort by upper first then it breaks - # a key assumption for bisect, which is used by utils.find_shard_range + # a key assumption for bisect, which is used by utils.find_namespace + # with shard ranges. return sr.upper, sr.state, sr.lower, sr.name def is_child_of(self, parent): @@ -6276,7 +5575,7 @@ class ShardRangeList(UserList): containing the filtered shard ranges. """ return ShardRangeList( - filter_shard_ranges(self, includes, marker, end_marker)) + filter_namespaces(self, includes, marker, end_marker)) def find_lower(self, condition): """ @@ -6297,44 +5596,45 @@ class ShardRangeList(UserList): return self.upper -def find_shard_range(item, ranges): +def find_namespace(item, namespaces): """ - Find a ShardRange in given list of ``shard_ranges`` whose namespace + Find a Namespace/ShardRange in given list of ``namespaces`` whose namespace contains ``item``. - :param item: The item for a which a ShardRange is to be found. - :param ranges: a sorted list of ShardRanges. - :return: the ShardRange whose namespace contains ``item``, or None if - no suitable range is found. + :param item: The item for a which a Namespace is to be found. + :param ranges: a sorted list of Namespaces. + :return: the Namespace/ShardRange whose namespace contains ``item``, or + None if no suitable Namespace is found. """ - index = bisect.bisect_left(ranges, item) - if index != len(ranges) and item in ranges[index]: - return ranges[index] + index = bisect.bisect_left(namespaces, item) + if index != len(namespaces) and item in namespaces[index]: + return namespaces[index] return None -def filter_shard_ranges(shard_ranges, includes, marker, end_marker): +def filter_namespaces(namespaces, includes, marker, end_marker): """ - Filter the given shard ranges to those whose namespace includes the - ``includes`` name or any part of the namespace between ``marker`` and + Filter the given Namespaces/ShardRanges to those whose namespace includes + the ``includes`` name or any part of the namespace between ``marker`` and ``end_marker``. If none of ``includes``, ``marker`` or ``end_marker`` are - specified then all shard ranges will be returned. + specified then all Namespaces will be returned. - :param shard_ranges: A list of :class:`~swift.common.utils.ShardRange`. - :param includes: a string; if not empty then only the shard range, if any, - whose namespace includes this string will be returned, and ``marker`` - and ``end_marker`` will be ignored. + :param namespaces: A list of :class:`~swift.common.utils.Namespace` or + :class:`~swift.common.utils.ShardRange`. + :param includes: a string; if not empty then only the Namespace, + if any, whose namespace includes this string will be returned, + ``marker`` and ``end_marker`` will be ignored. :param marker: if specified then only shard ranges whose upper bound is greater than this value will be returned. :param end_marker: if specified then only shard ranges whose lower bound is less than this value will be returned. - :return: A filtered list of :class:`~swift.common.utils.ShardRange`. + :return: A filtered list of :class:`~swift.common.utils.Namespace`. """ if includes: - shard_range = find_shard_range(includes, shard_ranges) - return [shard_range] if shard_range else [] + namespace = find_namespace(includes, namespaces) + return [namespace] if namespace else [] - def shard_range_filter(sr): + def namespace_filter(sr): end = start = True if end_marker: end = end_marker > sr.lower @@ -6343,79 +5643,13 @@ def filter_shard_ranges(shard_ranges, includes, marker, end_marker): return start and end if marker or end_marker: - return list(filter(shard_range_filter, shard_ranges)) + return list(filter(namespace_filter, namespaces)) if marker == Namespace.MAX or end_marker == Namespace.MIN: - # MIN and MAX are both Falsy so not handled by shard_range_filter + # MIN and MAX are both Falsy so not handled by namespace_filter return [] - return shard_ranges - - -def modify_priority(conf, logger): - """ - Modify priority by nice and ionice. - """ - - global _libc_setpriority - if _libc_setpriority is None: - _libc_setpriority = load_libc_function('setpriority', - errcheck=True) - - def _setpriority(nice_priority): - """ - setpriority for this pid - - :param nice_priority: valid values are -19 to 20 - """ - try: - _libc_setpriority(PRIO_PROCESS, os.getpid(), - int(nice_priority)) - except (ValueError, OSError): - print(_("WARNING: Unable to modify scheduling priority of process." - " Keeping unchanged! Check logs for more info. ")) - logger.exception('Unable to modify nice priority') - else: - logger.debug('set nice priority to %s' % nice_priority) - - nice_priority = conf.get('nice_priority') - if nice_priority is not None: - _setpriority(nice_priority) - - global _posix_syscall - if _posix_syscall is None: - _posix_syscall = load_libc_function('syscall', errcheck=True) - - def _ioprio_set(io_class, io_priority): - """ - ioprio_set for this process - - :param io_class: the I/O class component, can be - IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, - or IOPRIO_CLASS_IDLE - :param io_priority: priority value in the I/O class - """ - try: - io_class = IO_CLASS_ENUM[io_class] - io_priority = int(io_priority) - _posix_syscall(NR_ioprio_set(), - IOPRIO_WHO_PROCESS, - os.getpid(), - IOPRIO_PRIO_VALUE(io_class, io_priority)) - except (KeyError, ValueError, OSError): - print(_("WARNING: Unable to modify I/O scheduling class " - "and priority of process. Keeping unchanged! " - "Check logs for more info.")) - logger.exception("Unable to modify ionice priority") - else: - logger.debug('set ionice class %s priority %s', - io_class, io_priority) - - io_class = conf.get("ionice_class") - if io_class is None: - return - io_priority = conf.get("ionice_priority", 0) - _ioprio_set(io_class, io_priority) + return namespaces def o_tmpfile_in_path_supported(dirpath): @@ -6995,14 +6229,15 @@ class Watchdog(object): :param timeout: duration before the timeout expires :param exc: exception to throw when the timeout expire, must inherit - from eventlet.timeouts.Timeout + from eventlet.Timeout :param timeout_at: allow to force the expiration timestamp :return: id of the scheduled timeout, needed to cancel it """ + now = time.time() if not timeout_at: - timeout_at = time.time() + timeout + timeout_at = now + timeout gth = eventlet.greenthread.getcurrent() - timeout_definition = (timeout, timeout_at, gth, exc) + timeout_definition = (timeout, timeout_at, gth, exc, now) key = id(timeout_definition) self._timeouts[key] = timeout_definition @@ -7025,8 +6260,7 @@ class Watchdog(object): :param key: timeout id, as returned by start() """ try: - if key in self._timeouts: - del(self._timeouts[key]) + del(self._timeouts[key]) except KeyError: pass @@ -7046,15 +6280,14 @@ class Watchdog(object): self._next_expiration = None if self._evt.ready(): self._evt.reset() - for k, (timeout, timeout_at, gth, exc) in list(self._timeouts.items()): + for k, (timeout, timeout_at, gth, exc, + created_at) in list(self._timeouts.items()): if timeout_at <= now: - try: - if k in self._timeouts: - del(self._timeouts[k]) - except KeyError: - pass + self.stop(k) e = exc() + # set this after __init__ to keep it off the eventlet scheduler e.seconds = timeout + e.created_at = created_at eventlet.hubs.get_hub().schedule_call_global(0, gth.throw, e) else: if (self._next_expiration is None |