1 files changed, 123 insertions, 890 deletions
diff --git a/swift/common/utils/__init__.py b/swift/common/utils/__init__.py
index 3b4db177e..ef6b0180e 100644
--- a/swift/common/utils/__init__.py
+++ b/swift/common/utils/__init__.py
@@ -26,7 +26,6 @@ import fcntl
 import grp
 import hashlib
 import json
-import math
 import operator
 import os
 import pwd
@@ -37,12 +36,9 @@ import sys
 import time
 import uuid
 import functools
-import platform
 import email.parser
 from random import random, shuffle
 from contextlib import contextmanager, closing
-import ctypes
-import ctypes.util
 from optparse import OptionParser
 import traceback
 import warnings
@@ -97,90 +93,36 @@ from swift.common.linkat import linkat
 
 # For backwards compatability with 3rd party middlewares
 from swift.common.registry import register_swift_info, get_swift_info  # noqa
+from swift.common.utils.libc import (  # noqa
+    F_SETPIPE_SZ,
+    load_libc_function,
+    config_fallocate_value,
+    disable_fallocate,
+    fallocate,
+    punch_hole,
+    drop_buffer_cache,
+    get_md5_socket,
+    modify_priority,
+)
+from swift.common.utils.timestamp import (  # noqa
+    NORMAL_FORMAT,
+    INTERNAL_FORMAT,
+    SHORT_FORMAT,
+    MAX_OFFSET,
+    PRECISION,
+    Timestamp,
+    encode_timestamps,
+    decode_timestamps,
+    normalize_timestamp,
+    EPOCH,
+    last_modified_date_to_timestamp,
+    normalize_delete_at_timestamp,
+)
 
-# logging doesn't import patched as cleanly as one would like
 from logging.handlers import SysLogHandler
 import logging
-logging.thread = eventlet.green.thread
-logging.threading = eventlet.green.threading
-logging._lock = logging.threading.RLock()
-# setup notice level logging
-NOTICE = 25
-logging.addLevelName(NOTICE, 'NOTICE')
-SysLogHandler.priority_map['NOTICE'] = 'notice'
-
-# These are lazily pulled from libc elsewhere
-_sys_fallocate = None
-_posix_fadvise = None
-_libc_socket = None
-_libc_bind = None
-_libc_accept = None
-# see man -s 2 setpriority
-_libc_setpriority = None
-# see man -s 2 syscall
-_posix_syscall = None
-
-# If set to non-zero, fallocate routines will fail based on free space
-# available being at or below this amount, in bytes.
-FALLOCATE_RESERVE = 0
-# Indicates if FALLOCATE_RESERVE is the percentage of free space (True) or
-# the number of bytes (False).
-FALLOCATE_IS_PERCENT = False
-
-# from /usr/include/linux/falloc.h
-FALLOC_FL_KEEP_SIZE = 1
-FALLOC_FL_PUNCH_HOLE = 2
-
-# from /usr/src/linux-headers-*/include/uapi/linux/resource.h
-PRIO_PROCESS = 0
-
-
-# /usr/include/x86_64-linux-gnu/asm/unistd_64.h defines syscalls there
-# are many like it, but this one is mine, see man -s 2 ioprio_set
-def NR_ioprio_set():
-    """Give __NR_ioprio_set value for your system."""
-    architecture = os.uname()[4]
-    arch_bits = platform.architecture()[0]
-    # check if supported system, now support x86_64 and AArch64
-    if architecture == 'x86_64' and arch_bits == '64bit':
-        return 251
-    elif architecture == 'aarch64' and arch_bits == '64bit':
-        return 30
-    raise OSError("Swift doesn't support ionice priority for %s %s" %
-                  (architecture, arch_bits))
-
-
-# this syscall integer probably only works on x86_64 linux systems, you
-# can check if it's correct on yours with something like this:
-"""
-#include <stdio.h>
-#include <sys/syscall.h>
-
-int main(int argc, const char* argv[]) {
-    printf("%d\n", __NR_ioprio_set);
-    return 0;
-}
-"""
-
-# this is the value for "which" that says our who value will be a pid
-# pulled out of /usr/src/linux-headers-*/include/linux/ioprio.h
-IOPRIO_WHO_PROCESS = 1
-
-
-IO_CLASS_ENUM = {
-    'IOPRIO_CLASS_RT': 1,
-    'IOPRIO_CLASS_BE': 2,
-    'IOPRIO_CLASS_IDLE': 3,
-}
-
-# the IOPRIO_PRIO_VALUE "macro" is also pulled from
-# /usr/src/linux-headers-*/include/linux/ioprio.h
-IOPRIO_CLASS_SHIFT = 13
-
-
-def IOPRIO_PRIO_VALUE(class_, data):
-    return (((class_) << IOPRIO_CLASS_SHIFT) | data)
 
+NOTICE = 25
 
 # Used by hash_path to offer a bit more security when generating hashes for
 # paths. It simply appends this value to all paths; guessing the hash a path
@@ -190,12 +132,6 @@ HASH_PATH_PREFIX = b''
 
 SWIFT_CONF_FILE = '/etc/swift/swift.conf'
 
-# These constants are Linux-specific, and Python doesn't seem to know
-# about them. We ask anyway just in case that ever gets fixed.
-#
-# The values were copied from the Linux 3.x kernel headers.
-AF_ALG = getattr(socket, 'AF_ALG', 38)
-F_SETPIPE_SZ = getattr(fcntl, 'F_SETPIPE_SZ', 1031)
 O_TMPFILE = getattr(os, 'O_TMPFILE', 0o20000000 | os.O_DIRECTORY)
 
 # Used by the parse_socket_string() function to validate IPv6 addresses
@@ -500,6 +436,17 @@ def config_read_prefixed_options(conf, prefix_name, defaults):
     return params
 
 
+def logging_monkey_patch():
+    # explicitly patch the logging lock
+    logging._lock = logging.threading.RLock()
+    # setup notice level logging
+    logging.addLevelName(NOTICE, 'NOTICE')
+    SysLogHandler.priority_map['NOTICE'] = 'notice'
+    # Trying to log threads while monkey-patched can lead to deadlocks; see
+    # https://bugs.launchpad.net/swift/+bug/1895739
+    logging.logThreads = 0
+
+
 def eventlet_monkey_patch():
     """
     Install the appropriate Eventlet monkey patches.
@@ -510,13 +457,14 @@ def eventlet_monkey_patch():
     #         if thread is monkey-patched.
     eventlet.patcher.monkey_patch(all=False, socket=True, select=True,
                                   thread=True)
-    # Trying to log threads while monkey-patched can lead to deadlocks; see
-    # https://bugs.launchpad.net/swift/+bug/1895739
-    logging.logThreads = 0
 
 
-def noop_libc_function(*args):
-    return 0
+def monkey_patch():
+    """
+    Apply all swift monkey patching consistently in one place.
+    """
+    eventlet_monkey_patch()
+    logging_monkey_patch()
 
 
 def validate_configuration():
@@ -526,39 +474,6 @@ def validate_configuration():
         sys.exit("Error: %s" % e)
 
 
-def load_libc_function(func_name, log_error=True,
-                       fail_if_missing=False, errcheck=False):
-    """
-    Attempt to find the function in libc, otherwise return a no-op func.
-
-    :param func_name: name of the function to pull from libc.
-    :param log_error: log an error when a function can't be found
-    :param fail_if_missing: raise an exception when a function can't be found.
-                            Default behavior is to return a no-op function.
-    :param errcheck: boolean, if true install a wrapper on the function
-                     to check for a return values of -1 and call
-                     ctype.get_errno and raise an OSError
-    """
-    try:
-        libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True)
-        func = getattr(libc, func_name)
-    except AttributeError:
-        if fail_if_missing:
-            raise
-        if log_error:
-            logging.warning(_("Unable to locate %s in libc.  Leaving as a "
-                            "no-op."), func_name)
-        return noop_libc_function
-    if errcheck:
-        def _errcheck(result, f, args):
-            if result == -1:
-                errcode = ctypes.get_errno()
-                raise OSError(errcode, os.strerror(errcode))
-            return result
-        func.errcheck = _errcheck
-    return func
-
-
 def generate_trans_id(trans_id_suffix):
     return 'tx%s-%010x%s' % (
         uuid.uuid4().hex[:21], int(time.time()), quote(trans_id_suffix))
@@ -755,25 +670,6 @@ def get_trans_id_time(trans_id):
     return None
 
 
-def config_fallocate_value(reserve_value):
-    """
-    Returns fallocate reserve_value as an int or float.
-    Returns is_percent as a boolean.
-    Returns a ValueError on invalid fallocate value.
-    """
-    try:
-        if str(reserve_value[-1:]) == '%':
-            reserve_value = float(reserve_value[:-1])
-            is_percent = True
-        else:
-            reserve_value = int(reserve_value)
-            is_percent = False
-    except ValueError:
-        raise ValueError('Error: %s is an invalid value for fallocate'
-                         '_reserve.' % reserve_value)
-    return reserve_value, is_percent
-
-
 class FileLikeIter(object):
 
     def __init__(self, iterable):
@@ -924,164 +820,6 @@ def fs_has_free_space(fs_path, space_needed, is_percent):
         return free_bytes >= space_needed
 
 
-class _LibcWrapper(object):
-    """
-    A callable object that forwards its calls to a C function from libc.
-
-    These objects are lazy. libc will not be checked until someone tries to
-    either call the function or check its availability.
-
-    _LibcWrapper objects have an "available" property; if true, then libc
-    has the function of that name. If false, then calls will fail with a
-    NotImplementedError.
-    """
-
-    def __init__(self, func_name):
-        self._func_name = func_name
-        self._func_handle = None
-        self._loaded = False
-
-    def _ensure_loaded(self):
-        if not self._loaded:
-            func_name = self._func_name
-            try:
-                # Keep everything in this try-block in local variables so
-                # that a typo in self.some_attribute_name doesn't raise a
-                # spurious AttributeError.
-                func_handle = load_libc_function(
-                    func_name, fail_if_missing=True)
-                self._func_handle = func_handle
-            except AttributeError:
-                # We pass fail_if_missing=True to load_libc_function and
-                # then ignore the error. It's weird, but otherwise we have
-                # to check if self._func_handle is noop_libc_function, and
-                # that's even weirder.
-                pass
-            self._loaded = True
-
-    @property
-    def available(self):
-        self._ensure_loaded()
-        return bool(self._func_handle)
-
-    def __call__(self, *args):
-        if self.available:
-            return self._func_handle(*args)
-        else:
-            raise NotImplementedError(
-                "No function %r found in libc" % self._func_name)
-
-
-_fallocate_enabled = True
-_fallocate_warned_about_missing = False
-_sys_fallocate = _LibcWrapper('fallocate')
-_sys_posix_fallocate = _LibcWrapper('posix_fallocate')
-
-
-def disable_fallocate():
-    global _fallocate_enabled
-    _fallocate_enabled = False
-
-
-def fallocate(fd, size, offset=0):
-    """
-    Pre-allocate disk space for a file.
-
-    This function can be disabled by calling disable_fallocate(). If no
-    suitable C function is available in libc, this function is a no-op.
-
-    :param fd: file descriptor
-    :param size: size to allocate (in bytes)
-    """
-    global _fallocate_enabled
-    if not _fallocate_enabled:
-        return
-
-    if size < 0:
-        size = 0  # Done historically; not really sure why
-    if size >= (1 << 63):
-        raise ValueError('size must be less than 2 ** 63')
-    if offset < 0:
-        raise ValueError('offset must be non-negative')
-    if offset >= (1 << 63):
-        raise ValueError('offset must be less than 2 ** 63')
-
-    # Make sure there's some (configurable) amount of free space in
-    # addition to the number of bytes we're allocating.
-    if FALLOCATE_RESERVE:
-        st = os.fstatvfs(fd)
-        free = st.f_frsize * st.f_bavail - size
-        if FALLOCATE_IS_PERCENT:
-            free = (float(free) / float(st.f_frsize * st.f_blocks)) * 100
-        if float(free) <= float(FALLOCATE_RESERVE):
-            raise OSError(
-                errno.ENOSPC,
-                'FALLOCATE_RESERVE fail %g <= %g' %
-                (free, FALLOCATE_RESERVE))
-
-    if _sys_fallocate.available:
-        # Parameters are (fd, mode, offset, length).
-        #
-        # mode=FALLOC_FL_KEEP_SIZE pre-allocates invisibly (without
-        # affecting the reported file size).
-        ret = _sys_fallocate(
-            fd, FALLOC_FL_KEEP_SIZE, ctypes.c_uint64(offset),
-            ctypes.c_uint64(size))
-        err = ctypes.get_errno()
-    elif _sys_posix_fallocate.available:
-        # Parameters are (fd, offset, length).
-        ret = _sys_posix_fallocate(fd, ctypes.c_uint64(offset),
-                                   ctypes.c_uint64(size))
-        err = ctypes.get_errno()
-    else:
-        # No suitable fallocate-like function is in our libc. Warn about it,
-        # but just once per process, and then do nothing.
-        global _fallocate_warned_about_missing
-        if not _fallocate_warned_about_missing:
-            logging.warning(_("Unable to locate fallocate, posix_fallocate in "
-                              "libc.  Leaving as a no-op."))
-            _fallocate_warned_about_missing = True
-        return
-
-    if ret and err not in (0, errno.ENOSYS, errno.EOPNOTSUPP,
-                           errno.EINVAL):
-        raise OSError(err, 'Unable to fallocate(%s)' % size)
-
-
-def punch_hole(fd, offset, length):
-    """
-    De-allocate disk space in the middle of a file.
-
-    :param fd: file descriptor
-    :param offset: index of first byte to de-allocate
-    :param length: number of bytes to de-allocate
-    """
-    if offset < 0:
-        raise ValueError('offset must be non-negative')
-    if offset >= (1 << 63):
-        raise ValueError('offset must be less than 2 ** 63')
-    if length <= 0:
-        raise ValueError('length must be positive')
-    if length >= (1 << 63):
-        raise ValueError('length must be less than 2 ** 63')
-
-    if _sys_fallocate.available:
-        # Parameters are (fd, mode, offset, length).
-        ret = _sys_fallocate(
-            fd,
-            FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
-            ctypes.c_uint64(offset),
-            ctypes.c_uint64(length))
-        err = ctypes.get_errno()
-        if ret and err:
-            mode_str = "FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE"
-            raise OSError(err, "Unable to fallocate(%d, %s, %d, %d)" % (
-                fd, mode_str, offset, length))
-    else:
-        raise OSError(errno.ENOTSUP,
-                      'No suitable C function found for hole punching')
-
-
 def fsync(fd):
     """
     Sync modified file data and metadata to disk.
@@ -1131,402 +869,6 @@ def fsync_dir(dirpath):
             os.close(dirfd)
 
 
-def drop_buffer_cache(fd, offset, length):
-    """
-    Drop 'buffer' cache for the given range of the given file.
-
-    :param fd: file descriptor
-    :param offset: start offset
-    :param length: length
-    """
-    global _posix_fadvise
-    if _posix_fadvise is None:
-        _posix_fadvise = load_libc_function('posix_fadvise64')
-    # 4 means "POSIX_FADV_DONTNEED"
-    ret = _posix_fadvise(fd, ctypes.c_uint64(offset),
-                         ctypes.c_uint64(length), 4)
-    if ret != 0:
-        logging.warning("posix_fadvise64(%(fd)s, %(offset)s, %(length)s, 4) "
-                        "-> %(ret)s", {'fd': fd, 'offset': offset,
-                                       'length': length, 'ret': ret})
-
-
-NORMAL_FORMAT = "%016.05f"
-INTERNAL_FORMAT = NORMAL_FORMAT + '_%016x'
-SHORT_FORMAT = NORMAL_FORMAT + '_%x'
-MAX_OFFSET = (16 ** 16) - 1
-PRECISION = 1e-5
-# Setting this to True will cause the internal format to always display
-# extended digits - even when the value is equivalent to the normalized form.
-# This isn't ideal during an upgrade when some servers might not understand
-# the new time format - but flipping it to True works great for testing.
-FORCE_INTERNAL = False  # or True
-
-
-@functools.total_ordering
-class Timestamp(object):
-    """
-    Internal Representation of Swift Time.
-
-    The normalized form of the X-Timestamp header looks like a float
-    with a fixed width to ensure stable string sorting - normalized
-    timestamps look like "1402464677.04188"
-
-    To support overwrites of existing data without modifying the original
-    timestamp but still maintain consistency a second internal offset vector
-    is append to the normalized timestamp form which compares and sorts
-    greater than the fixed width float format but less than a newer timestamp.
-    The internalized format of timestamps looks like
-    "1402464677.04188_0000000000000000" - the portion after the underscore is
-    the offset and is a formatted hexadecimal integer.
-
-    The internalized form is not exposed to clients in responses from
-    Swift.  Normal client operations will not create a timestamp with an
-    offset.
-
-    The Timestamp class in common.utils supports internalized and
-    normalized formatting of timestamps and also comparison of timestamp
-    values.  When the offset value of a Timestamp is 0 - it's considered
-    insignificant and need not be represented in the string format; to
-    support backwards compatibility during a Swift upgrade the
-    internalized and normalized form of a Timestamp with an
-    insignificant offset are identical.  When a timestamp includes an
-    offset it will always be represented in the internalized form, but
-    is still excluded from the normalized form.  Timestamps with an
-    equivalent timestamp portion (the float part) will compare and order
-    by their offset.  Timestamps with a greater timestamp portion will
-    always compare and order greater than a Timestamp with a lesser
-    timestamp regardless of it's offset.  String comparison and ordering
-    is guaranteed for the internalized string format, and is backwards
-    compatible for normalized timestamps which do not include an offset.
-    """
-
-    def __init__(self, timestamp, offset=0, delta=0, check_bounds=True):
-        """
-        Create a new Timestamp.
-
-        :param timestamp: time in seconds since the Epoch, may be any of:
-
-            * a float or integer
-            * normalized/internalized string
-            * another instance of this class (offset is preserved)
-
-        :param offset: the second internal offset vector, an int
-        :param delta: deca-microsecond difference from the base timestamp
-                      param, an int
-        """
-        if isinstance(timestamp, bytes):
-            timestamp = timestamp.decode('ascii')
-        if isinstance(timestamp, six.string_types):
-            base, base_offset = timestamp.partition('_')[::2]
-            self.timestamp = float(base)
-            if '_' in base_offset:
-                raise ValueError('invalid literal for int() with base 16: '
-                                 '%r' % base_offset)
-            if base_offset:
-                self.offset = int(base_offset, 16)
-            else:
-                self.offset = 0
-        else:
-            self.timestamp = float(timestamp)
-            self.offset = getattr(timestamp, 'offset', 0)
-        # increment offset
-        if offset >= 0:
-            self.offset += offset
-        else:
-            raise ValueError('offset must be non-negative')
-        if self.offset > MAX_OFFSET:
-            raise ValueError('offset must be smaller than %d' % MAX_OFFSET)
-        self.raw = int(round(self.timestamp / PRECISION))
-        # add delta
-        if delta:
-            self.raw = self.raw + delta
-            if self.raw <= 0:
-                raise ValueError(
-                    'delta must be greater than %d' % (-1 * self.raw))
-            self.timestamp = float(self.raw * PRECISION)
-        if check_bounds:
-            if self.timestamp < 0:
-                raise ValueError('timestamp cannot be negative')
-            if self.timestamp >= 10000000000:
-                raise ValueError('timestamp too large')
-
-    @classmethod
-    def now(cls, offset=0, delta=0):
-        return cls(time.time(), offset=offset, delta=delta)
-
-    def __repr__(self):
-        return INTERNAL_FORMAT % (self.timestamp, self.offset)
-
-    def __str__(self):
-        raise TypeError('You must specify which string format is required')
-
-    def __float__(self):
-        return self.timestamp
-
-    def __int__(self):
-        return int(self.timestamp)
-
-    def __nonzero__(self):
-        return bool(self.timestamp or self.offset)
-
-    def __bool__(self):
-        return self.__nonzero__()
-
-    @property
-    def normal(self):
-        return NORMAL_FORMAT % self.timestamp
-
-    @property
-    def internal(self):
-        if self.offset or FORCE_INTERNAL:
-            return INTERNAL_FORMAT % (self.timestamp, self.offset)
-        else:
-            return self.normal
-
-    @property
-    def short(self):
-        if self.offset or FORCE_INTERNAL:
-            return SHORT_FORMAT % (self.timestamp, self.offset)
-        else:
-            return self.normal
-
-    @property
-    def isoformat(self):
-        """
-        Get an isoformat string representation of the 'normal' part of the
-        Timestamp with microsecond precision and no trailing timezone, for
-        example::
-
-            1970-01-01T00:00:00.000000
-
-        :return: an isoformat string
-        """
-        t = float(self.normal)
-        if six.PY3:
-            # On Python 3, round manually using ROUND_HALF_EVEN rounding
-            # method, to use the same rounding method than Python 2. Python 3
-            # used a different rounding method, but Python 3.4.4 and 3.5.1 use
-            # again ROUND_HALF_EVEN as Python 2.
-            # See https://bugs.python.org/issue23517
-            frac, t = math.modf(t)
-            us = round(frac * 1e6)
-            if us >= 1000000:
-                t += 1
-                us -= 1000000
-            elif us < 0:
-                t -= 1
-                us += 1000000
-            dt = datetime.datetime.utcfromtimestamp(t)
-            dt = dt.replace(microsecond=us)
-        else:
-            dt = datetime.datetime.utcfromtimestamp(t)
-
-        isoformat = dt.isoformat()
-        # python isoformat() doesn't include msecs when zero
-        if len(isoformat) < len("1970-01-01T00:00:00.000000"):
-            isoformat += ".000000"
-        return isoformat
-
-    @classmethod
-    def from_isoformat(cls, date_string):
-        """
-        Parse an isoformat string representation of time to a Timestamp object.
-
-        :param date_string: a string formatted as per an Timestamp.isoformat
-            property.
-        :return: an instance of  this class.
-        """
-        start = datetime.datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%f")
-        delta = start - EPOCH
-        # This calculation is based on Python 2.7's Modules/datetimemodule.c,
-        # function delta_to_microseconds(), but written in Python.
-        return cls(delta.total_seconds())
-
-    def ceil(self):
-        """
-        Return the 'normal' part of the timestamp rounded up to the nearest
-        integer number of seconds.
-
-        This value should be used whenever the second-precision Last-Modified
-        time of a resource is required.
-
-        :return: a float value with second precision.
-        """
-        return math.ceil(float(self))
-
-    def __eq__(self, other):
-        if other is None:
-            return False
-        if not isinstance(other, Timestamp):
-            try:
-                other = Timestamp(other, check_bounds=False)
-            except ValueError:
-                return False
-        return self.internal == other.internal
-
-    def __ne__(self, other):
-        return not (self == other)
-
-    def __lt__(self, other):
-        if other is None:
-            return False
-        if not isinstance(other, Timestamp):
-            other = Timestamp(other, check_bounds=False)
-        if other.timestamp < 0:
-            return False
-        if other.timestamp >= 10000000000:
-            return True
-        return self.internal < other.internal
-
-    def __hash__(self):
-        return hash(self.internal)
-
-    def __invert__(self):
-        if self.offset:
-            raise ValueError('Cannot invert timestamps with offsets')
-        return Timestamp((999999999999999 - self.raw) * PRECISION)
-
-
-def encode_timestamps(t1, t2=None, t3=None, explicit=False):
-    """
-    Encode up to three timestamps into a string. Unlike a Timestamp object, the
-    encoded string does NOT used fixed width fields and consequently no
-    relative chronology of the timestamps can be inferred from lexicographic
-    sorting of encoded timestamp strings.
-
-    The format of the encoded string is:
-        <t1>[<+/-><t2 - t1>[<+/-><t3 - t2>]]
-
-    i.e. if t1 = t2 = t3 then just the string representation of t1 is returned,
-    otherwise the time offsets for t2 and t3 are appended. If explicit is True
-    then the offsets for t2 and t3 are always appended even if zero.
-
-    Note: any offset value in t1 will be preserved, but offsets on t2 and t3
-    are not preserved. In the anticipated use cases for this method (and the
-    inverse decode_timestamps method) the timestamps passed as t2 and t3 are
-    not expected to have offsets as they will be timestamps associated with a
-    POST request. In the case where the encoding is used in a container objects
-    table row, t1 could be the PUT or DELETE time but t2 and t3 represent the
-    content type and metadata times (if different from the data file) i.e.
-    correspond to POST timestamps. In the case where the encoded form is used
-    in a .meta file name, t1 and t2 both correspond to POST timestamps.
-    """
-    form = '{0}'
-    values = [t1.short]
-    if t2 is not None:
-        t2_t1_delta = t2.raw - t1.raw
-        explicit = explicit or (t2_t1_delta != 0)
-        values.append(t2_t1_delta)
-        if t3 is not None:
-            t3_t2_delta = t3.raw - t2.raw
-            explicit = explicit or (t3_t2_delta != 0)
-            values.append(t3_t2_delta)
-        if explicit:
-            form += '{1:+x}'
-            if t3 is not None:
-                form += '{2:+x}'
-    return form.format(*values)
-
-
-def decode_timestamps(encoded, explicit=False):
-    """
-    Parses a string of the form generated by encode_timestamps and returns
-    a tuple of the three component timestamps. If explicit is False, component
-    timestamps that are not explicitly encoded will be assumed to have zero
-    delta from the previous component and therefore take the value of the
-    previous component. If explicit is True, component timestamps that are
-    not explicitly encoded will be returned with value None.
-    """
-    # TODO: some tests, e.g. in test_replicator, put float timestamps values
-    # into container db's, hence this defensive check, but in real world
-    # this may never happen.
-    if not isinstance(encoded, six.string_types):
-        ts = Timestamp(encoded)
-        return ts, ts, ts
-
-    parts = []
-    signs = []
-    pos_parts = encoded.split('+')
-    for part in pos_parts:
-        # parse time components and their signs
-        # e.g. x-y+z --> parts = [x, y, z] and signs = [+1, -1, +1]
-        neg_parts = part.split('-')
-        parts = parts + neg_parts
-        signs = signs + [1] + [-1] * (len(neg_parts) - 1)
-    t1 = Timestamp(parts[0])
-    t2 = t3 = None
-    if len(parts) > 1:
-        t2 = t1
-        delta = signs[1] * int(parts[1], 16)
-        # if delta = 0 we want t2 = t3 = t1 in order to
-        # preserve any offset in t1 - only construct a distinct
-        # timestamp if there is a non-zero delta.
-        if delta:
-            t2 = Timestamp((t1.raw + delta) * PRECISION)
-    elif not explicit:
-        t2 = t1
-    if len(parts) > 2:
-        t3 = t2
-        delta = signs[2] * int(parts[2], 16)
-        if delta:
-            t3 = Timestamp((t2.raw + delta) * PRECISION)
-    elif not explicit:
-        t3 = t2
-    return t1, t2, t3
-
-
-def normalize_timestamp(timestamp):
-    """
-    Format a timestamp (string or numeric) into a standardized
-    xxxxxxxxxx.xxxxx (10.5) format.
-
-    Note that timestamps using values greater than or equal to November 20th,
-    2286 at 17:46 UTC will use 11 digits to represent the number of
-    seconds.
-
-    :param timestamp: unix timestamp
-    :returns: normalized timestamp as a string
-    """
-    return Timestamp(timestamp).normal
-
-
-EPOCH = datetime.datetime(1970, 1, 1)
-
-
-def last_modified_date_to_timestamp(last_modified_date_str):
-    """
-    Convert a last modified date (like you'd get from a container listing,
-    e.g. 2014-02-28T23:22:36.698390) to a float.
-    """
-    return Timestamp.from_isoformat(last_modified_date_str)
-
-
-def normalize_delete_at_timestamp(timestamp, high_precision=False):
-    """
-    Format a timestamp (string or numeric) into a standardized
-    xxxxxxxxxx (10) or xxxxxxxxxx.xxxxx (10.5) format.
-
-    Note that timestamps less than 0000000000 are raised to
-    0000000000 and values greater than November 20th, 2286 at
-    17:46:39 UTC will be capped at that date and time, resulting in
-    no return value exceeding 9999999999.99999 (or 9999999999 if
-    using low-precision).
-
-    This cap is because the expirer is already working through a
-    sorted list of strings that were all a length of 10. Adding
-    another digit would mess up the sort and cause the expirer to
-    break from processing early. By 2286, this problem will need to
-    be fixed, probably by creating an additional .expiring_objects
-    account to work from with 11 (or more) digit container names.
-
-    :param timestamp: unix timestamp
-    :returns: normalized timestamp as a string
-    """
-    fmt = '%016.5f' if high_precision else '%010d'
-    return fmt % min(max(0, float(timestamp)), 9999999999.99999)
-
-
 def mkdirs(path):
     """
     Ensures the path is a directory or makes it if not. Errors if the path
@@ -2073,6 +1415,11 @@ class SwiftLoggerAdapter(logging.LoggerAdapter):
     process() method to accomplish anything useful.
     """
 
+    @property
+    def name(self):
+        # py3 does this for us already; add it for py2
+        return self.logger.name
+
     def get_metric_name(self, metric):
         # subclasses may override this method to annotate the metric name
         return metric
@@ -2274,8 +1621,10 @@ class LogAdapter(logging.LoggerAdapter, object):
             emsg = '%s: %s' % (exc.__class__.__name__, exc.line)
         elif isinstance(exc, eventlet.Timeout):
             emsg = exc.__class__.__name__
-            if hasattr(exc, 'seconds'):
-                emsg += ' (%ss)' % exc.seconds
+            detail = '%ss' % exc.seconds
+            if hasattr(exc, 'created_at'):
+                detail += ' after %0.2fs' % (time.time() - exc.created_at)
+            emsg += ' (%s)' % detail
             if isinstance(exc, swift.common.exceptions.MessageTimeout):
                 if exc.msg:
                     emsg += ' %s' % exc.msg
@@ -3205,6 +2554,7 @@ def readconf(conf_path, section_name=None, log_name=None, defaults=None,
             # values like "1%" (which we want to support for
             # fallocate_reserve).
             c = ConfigParser(defaults, interpolation=NicerInterpolation())
+    c.optionxform = str  # Don't lower-case keys
 
     if hasattr(conf_path, 'readline'):
         if hasattr(conf_path, 'seek'):
@@ -5107,87 +4457,6 @@ def parse_content_disposition(header):
     return header, attributes
 
 
-class sockaddr_alg(ctypes.Structure):
-    _fields_ = [("salg_family", ctypes.c_ushort),
-                ("salg_type", ctypes.c_ubyte * 14),
-                ("salg_feat", ctypes.c_uint),
-                ("salg_mask", ctypes.c_uint),
-                ("salg_name", ctypes.c_ubyte * 64)]
-
-
-_bound_md5_sockfd = None
-
-
-def get_md5_socket():
-    """
-    Get an MD5 socket file descriptor. One can MD5 data with it by writing it
-    to the socket with os.write, then os.read the 16 bytes of the checksum out
-    later.
-
-    NOTE: It is the caller's responsibility to ensure that os.close() is
-    called on the returned file descriptor. This is a bare file descriptor,
-    not a Python object. It doesn't close itself.
-    """
-
-    # Linux's AF_ALG sockets work like this:
-    #
-    # First, initialize a socket with socket() and bind(). This tells the
-    # socket what algorithm to use, as well as setting up any necessary bits
-    # like crypto keys. Of course, MD5 doesn't need any keys, so it's just the
-    # algorithm name.
-    #
-    # Second, to hash some data, get a second socket by calling accept() on
-    # the first socket. Write data to the socket, then when finished, read the
-    # checksum from the socket and close it. This lets you checksum multiple
-    # things without repeating all the setup code each time.
-    #
-    # Since we only need to bind() one socket, we do that here and save it for
-    # future re-use. That way, we only use one file descriptor to get an MD5
-    # socket instead of two, and we also get to save some syscalls.
-
-    global _bound_md5_sockfd
-    global _libc_socket
-    global _libc_bind
-    global _libc_accept
-
-    if _libc_accept is None:
-        _libc_accept = load_libc_function('accept', fail_if_missing=True)
-    if _libc_socket is None:
-        _libc_socket = load_libc_function('socket', fail_if_missing=True)
-    if _libc_bind is None:
-        _libc_bind = load_libc_function('bind', fail_if_missing=True)
-
-    # Do this at first call rather than at import time so that we don't use a
-    # file descriptor on systems that aren't using any MD5 sockets.
-    if _bound_md5_sockfd is None:
-        sockaddr_setup = sockaddr_alg(
-            AF_ALG,
-            (ord('h'), ord('a'), ord('s'), ord('h'), 0),
-            0, 0,
-            (ord('m'), ord('d'), ord('5'), 0))
-        hash_sockfd = _libc_socket(ctypes.c_int(AF_ALG),
-                                   ctypes.c_int(socket.SOCK_SEQPACKET),
-                                   ctypes.c_int(0))
-        if hash_sockfd < 0:
-            raise IOError(ctypes.get_errno(),
-                          "Failed to initialize MD5 socket")
-
-        bind_result = _libc_bind(ctypes.c_int(hash_sockfd),
-                                 ctypes.pointer(sockaddr_setup),
-                                 ctypes.c_int(ctypes.sizeof(sockaddr_alg)))
-        if bind_result < 0:
-            os.close(hash_sockfd)
-            raise IOError(ctypes.get_errno(), "Failed to bind MD5 socket")
-
-        _bound_md5_sockfd = hash_sockfd
-
-    md5_sockfd = _libc_accept(ctypes.c_int(_bound_md5_sockfd), None, 0)
-    if md5_sockfd < 0:
-        raise IOError(ctypes.get_errno(), "Failed to accept MD5 socket")
-
-    return md5_sockfd
-
-
 try:
     _test_md5 = hashlib.md5(usedforsecurity=False)  # nosec
 
@@ -5443,6 +4712,12 @@ class NamespaceBoundList(object):
         """
         self.bounds = [] if bounds is None else bounds
 
+    def __eq__(self, other):
+        # test for equality of NamespaceBoundList objects only
+        if not isinstance(other, NamespaceBoundList):
+            return False
+        return self.bounds == other.bounds
+
     @classmethod
     def parse(cls, namespaces):
         """
@@ -5498,7 +4773,12 @@ class NamespaceBoundList(object):
 
     def get_namespace(self, item):
         """
-        Get a Namespace instance that contains ``item``.
+        Get a Namespace instance that contains ``item`` by bisecting on the
+        lower bounds directly. This function is used for performance sensitive
+        path, for example, '_get_update_shard' in proxy object controller. For
+        normal paths, convert NamespaceBoundList to a list of Namespaces, and
+        use `~swift.common.utils.find_namespace` or
+        `~swift.common.utils.filter_namespaces`.
 
         :param item: The item for a which a Namespace is to be found.
         :return: the Namespace that contains ``item``.
@@ -5509,6 +4789,24 @@ class NamespaceBoundList(object):
                  else self.bounds[pos + 1][0])
         return Namespace(name, lower, upper)
 
+    def get_namespaces(self):
+        """
+        Get the contained namespaces as a list of contiguous Namespaces ordered
+        by lower bound.
+
+        :return: A list of Namespace objects which are ordered by
+            ``lower bound``.
+        """
+        if not self.bounds:
+            return []
+        namespaces = []
+        num_ns = len(self.bounds)
+        for i in range(num_ns):
+            lower, name = self.bounds[i]
+            upper = ('' if i + 1 == num_ns else self.bounds[i + 1][0])
+            namespaces.append(Namespace(name, lower, upper))
+        return namespaces
+
 
 class ShardName(object):
     """
@@ -5693,11 +4991,11 @@ class ShardRange(Namespace):
         '_deleted', '_state', '_count', '_bytes',
         '_tombstones', '_reported')
 
-    def __init__(self, name, timestamp,
+    def __init__(self, name, timestamp=0,
                  lower=Namespace.MIN, upper=Namespace.MAX,
                  object_count=0, bytes_used=0, meta_timestamp=None,
                  deleted=False, state=None, state_timestamp=None, epoch=None,
-                 reported=False, tombstones=-1):
+                 reported=False, tombstones=-1, **kwargs):
         super(ShardRange, self).__init__(name=name, lower=lower, upper=upper)
         self.account = self.container = self._timestamp = \
             self._meta_timestamp = self._state_timestamp = self._epoch = None
@@ -5720,7 +5018,8 @@ class ShardRange(Namespace):
     def sort_key(cls, sr):
         # defines the sort order for shard ranges
         # note if this ever changes to *not* sort by upper first then it breaks
-        # a key assumption for bisect, which is used by utils.find_shard_range
+        # a key assumption for bisect, which is used by utils.find_namespace
+        # with shard ranges.
         return sr.upper, sr.state, sr.lower, sr.name
 
     def is_child_of(self, parent):
@@ -6276,7 +5575,7 @@ class ShardRangeList(UserList):
             containing the filtered shard ranges.
         """
         return ShardRangeList(
-            filter_shard_ranges(self, includes, marker, end_marker))
+            filter_namespaces(self, includes, marker, end_marker))
 
     def find_lower(self, condition):
         """
@@ -6297,44 +5596,45 @@ class ShardRangeList(UserList):
         return self.upper
 
 
-def find_shard_range(item, ranges):
+def find_namespace(item, namespaces):
     """
-    Find a ShardRange in given list of ``shard_ranges`` whose namespace
+    Find a Namespace/ShardRange in given list of ``namespaces`` whose namespace
     contains ``item``.
 
-    :param item: The item for a which a ShardRange is to be found.
-    :param ranges: a sorted list of ShardRanges.
-    :return: the ShardRange whose namespace contains ``item``, or None if
-        no suitable range is found.
+    :param item: The item for a which a Namespace is to be found.
+    :param ranges: a sorted list of Namespaces.
+    :return: the Namespace/ShardRange whose namespace contains ``item``, or
+        None if no suitable Namespace is found.
     """
-    index = bisect.bisect_left(ranges, item)
-    if index != len(ranges) and item in ranges[index]:
-        return ranges[index]
+    index = bisect.bisect_left(namespaces, item)
+    if index != len(namespaces) and item in namespaces[index]:
+        return namespaces[index]
     return None
 
 
-def filter_shard_ranges(shard_ranges, includes, marker, end_marker):
+def filter_namespaces(namespaces, includes, marker, end_marker):
     """
-    Filter the given shard ranges to those whose namespace includes the
-    ``includes`` name or any part of the namespace between ``marker`` and
+    Filter the given Namespaces/ShardRanges to those whose namespace includes
+    the ``includes`` name or any part of the namespace between ``marker`` and
     ``end_marker``. If none of ``includes``, ``marker`` or ``end_marker`` are
-    specified then all shard ranges will be returned.
+    specified then all Namespaces will be returned.
 
-    :param shard_ranges: A list of :class:`~swift.common.utils.ShardRange`.
-    :param includes: a string; if not empty then only the shard range, if any,
-        whose namespace includes this string will be returned, and ``marker``
-        and ``end_marker`` will be ignored.
+    :param namespaces: A list of :class:`~swift.common.utils.Namespace` or
+        :class:`~swift.common.utils.ShardRange`.
+    :param includes: a string; if not empty then only the Namespace,
+        if any, whose namespace includes this string will be returned,
+        ``marker`` and ``end_marker`` will be ignored.
     :param marker: if specified then only shard ranges whose upper bound is
         greater than this value will be returned.
     :param end_marker: if specified then only shard ranges whose lower bound is
         less than this value will be returned.
-    :return: A filtered list of :class:`~swift.common.utils.ShardRange`.
+    :return: A filtered list of :class:`~swift.common.utils.Namespace`.
     """
     if includes:
-        shard_range = find_shard_range(includes, shard_ranges)
-        return [shard_range] if shard_range else []
+        namespace = find_namespace(includes, namespaces)
+        return [namespace] if namespace else []
 
-    def shard_range_filter(sr):
+    def namespace_filter(sr):
         end = start = True
         if end_marker:
             end = end_marker > sr.lower
@@ -6343,79 +5643,13 @@ def filter_shard_ranges(shard_ranges, includes, marker, end_marker):
         return start and end
 
     if marker or end_marker:
-        return list(filter(shard_range_filter, shard_ranges))
+        return list(filter(namespace_filter, namespaces))
 
     if marker == Namespace.MAX or end_marker == Namespace.MIN:
-        # MIN and MAX are both Falsy so not handled by shard_range_filter
+        # MIN and MAX are both Falsy so not handled by namespace_filter
         return []
 
-    return shard_ranges
-
-
-def modify_priority(conf, logger):
-    """
-    Modify priority by nice and ionice.
-    """
-
-    global _libc_setpriority
-    if _libc_setpriority is None:
-        _libc_setpriority = load_libc_function('setpriority',
-                                               errcheck=True)
-
-    def _setpriority(nice_priority):
-        """
-        setpriority for this pid
-
-        :param nice_priority: valid values are -19 to 20
-        """
-        try:
-            _libc_setpriority(PRIO_PROCESS, os.getpid(),
-                              int(nice_priority))
-        except (ValueError, OSError):
-            print(_("WARNING: Unable to modify scheduling priority of process."
-                    " Keeping unchanged! Check logs for more info. "))
-            logger.exception('Unable to modify nice priority')
-        else:
-            logger.debug('set nice priority to %s' % nice_priority)
-
-    nice_priority = conf.get('nice_priority')
-    if nice_priority is not None:
-        _setpriority(nice_priority)
-
-    global _posix_syscall
-    if _posix_syscall is None:
-        _posix_syscall = load_libc_function('syscall', errcheck=True)
-
-    def _ioprio_set(io_class, io_priority):
-        """
-        ioprio_set for this process
-
-        :param io_class: the I/O class component, can be
-                         IOPRIO_CLASS_RT, IOPRIO_CLASS_BE,
-                         or IOPRIO_CLASS_IDLE
-        :param io_priority: priority value in the I/O class
-        """
-        try:
-            io_class = IO_CLASS_ENUM[io_class]
-            io_priority = int(io_priority)
-            _posix_syscall(NR_ioprio_set(),
-                           IOPRIO_WHO_PROCESS,
-                           os.getpid(),
-                           IOPRIO_PRIO_VALUE(io_class, io_priority))
-        except (KeyError, ValueError, OSError):
-            print(_("WARNING: Unable to modify I/O scheduling class "
-                    "and priority of process. Keeping unchanged! "
-                    "Check logs for more info."))
-            logger.exception("Unable to modify ionice priority")
-        else:
-            logger.debug('set ionice class %s priority %s',
-                         io_class, io_priority)
-
-    io_class = conf.get("ionice_class")
-    if io_class is None:
-        return
-    io_priority = conf.get("ionice_priority", 0)
-    _ioprio_set(io_class, io_priority)
+    return namespaces
 
 
 def o_tmpfile_in_path_supported(dirpath):
@@ -6995,14 +6229,15 @@ class Watchdog(object):
 
         :param timeout: duration before the timeout expires
         :param exc: exception to throw when the timeout expire, must inherit
-                    from eventlet.timeouts.Timeout
+                    from eventlet.Timeout
         :param timeout_at: allow to force the expiration timestamp
         :return: id of the scheduled timeout, needed to cancel it
         """
+        now = time.time()
         if not timeout_at:
-            timeout_at = time.time() + timeout
+            timeout_at = now + timeout
         gth = eventlet.greenthread.getcurrent()
-        timeout_definition = (timeout, timeout_at, gth, exc)
+        timeout_definition = (timeout, timeout_at, gth, exc, now)
         key = id(timeout_definition)
         self._timeouts[key] = timeout_definition
 
@@ -7025,8 +6260,7 @@ class Watchdog(object):
         :param key: timeout id, as returned by start()
         """
         try:
-            if key in self._timeouts:
-                del(self._timeouts[key])
+            del(self._timeouts[key])
         except KeyError:
             pass
 
@@ -7046,15 +6280,14 @@ class Watchdog(object):
         self._next_expiration = None
         if self._evt.ready():
             self._evt.reset()
-        for k, (timeout, timeout_at, gth, exc) in list(self._timeouts.items()):
+        for k, (timeout, timeout_at, gth, exc,
+                created_at) in list(self._timeouts.items()):
             if timeout_at <= now:
-                try:
-                    if k in self._timeouts:
-                        del(self._timeouts[k])
-                except KeyError:
-                    pass
+                self.stop(k)
                 e = exc()
+                # set this after __init__ to keep it off the eventlet scheduler
                 e.seconds = timeout
+                e.created_at = created_at
                 eventlet.hubs.get_hub().schedule_call_global(0, gth.throw, e)
             else:
                 if (self._next_expiration is None