diff options
author | Yikun Jiang <yikunkero@gmail.com> | 2018-03-29 18:29:17 +0800 |
---|---|---|
committer | Yikun Jiang <yikunkero@gmail.com> | 2018-04-13 09:26:12 +0800 |
commit | 4c205341797da9e00e8fa176ca6dde3cbd400ca1 (patch) | |
tree | fb497f2e3a622b25da2bec5ecc239424a774eb37 | |
parent | 85cf42e841bb59ddf294ec39286ce97e57a9faae (diff) | |
download | oslo-db-4c205341797da9e00e8fa176ca6dde3cbd400ca1.tar.gz |
Improve exponential backoff for wrap_db_retry
The @oslo_db.api.wrap_db_retry is used for db.api methods
retrying. If db_error raised, the decorator help us to call
the api methods again after a few seconds(accurately, is
2**retry_times seconds).
If the db_error is deadlock error, the old wrap_db_retry
is not so suitable anymore. As we know, some deadlocks cause
because we call some methods(transactions) concurrently. If
we only retry after stable 2**retry_times seconds, we will
recall the method concurrently again.
In order to minimize the chance of regenerating a deadlock
and reduce the average sleep time, we propose to add some
random jitter to the delay period by default when the
deadlock error is detected.
Change-Id: I206745708570f1f292529ff58eee9b83fc09a9f2
Closes-bug: #1737869
-rw-r--r-- | oslo_db/api.py | 41 | ||||
-rw-r--r-- | oslo_db/tests/test_api.py | 56 |
2 files changed, 90 insertions, 7 deletions
diff --git a/oslo_db/api.py b/oslo_db/api.py index 62cf889..7f92756 100644 --- a/oslo_db/api.py +++ b/oslo_db/api.py @@ -24,6 +24,7 @@ API methods. """ import logging +import random import threading import time @@ -103,15 +104,21 @@ class wrap_db_retry(object): :param exception_checker: checks if an exception should trigger a retry :type exception_checker: callable + + :param jitter: determine increase retry interval use jitter or not, jitter + is always interpreted as True for a DBDeadlockError + :type jitter: bool """ def __init__(self, retry_interval=1, max_retries=20, inc_retry_interval=True, max_retry_interval=10, retry_on_disconnect=False, retry_on_deadlock=False, - exception_checker=lambda exc: False): + exception_checker=lambda exc: False, + jitter=False): super(wrap_db_retry, self).__init__() + self.jitter = jitter self.db_error = (exception.RetryRequest, ) # default is that we re-raise anything unexpected self.exception_checker = exception_checker @@ -127,7 +134,7 @@ class wrap_db_retry(object): def __call__(self, f): @six.wraps(f) def wrapper(*args, **kwargs): - next_interval = self.retry_interval + sleep_time = next_interval = self.retry_interval remaining = self.max_retries while True: @@ -150,12 +157,20 @@ class wrap_db_retry(object): # NOTE(vsergeyev): We are using patched time module, so # this effectively yields the execution # context to another green thread. - time.sleep(next_interval) + time.sleep(sleep_time) if self.inc_retry_interval: - next_interval = min( - next_interval * 2, - self.max_retry_interval - ) + # NOTE(jiangyikun): In order to minimize the chance of + # regenerating a deadlock and reduce the average sleep + # time, we are using jitter by default when the + # deadlock is detected. With the jitter, + # sleep_time = [0, next_interval), otherwise, without + # the jitter, sleep_time = next_interval. + if isinstance(e, exception.DBDeadlock): + jitter = True + else: + jitter = self.jitter + sleep_time, next_interval = self._get_inc_interval( + next_interval, jitter) remaining -= 1 return wrapper @@ -170,6 +185,18 @@ class wrap_db_retry(object): return True return self.exception_checker(exc) + def _get_inc_interval(self, n, jitter): + # NOTE(jiangyikun): The "n" help us to record the 2 ** retry_times. + # The "sleep_time" means the real time to sleep: + # - Without jitter: sleep_time = 2 ** retry_times = n + # - With jitter: sleep_time = [0, 2 ** retry_times) < n + n = n * 2 + if jitter: + sleep_time = random.uniform(0, n) + else: + sleep_time = n + return min(sleep_time, self.max_retry_interval), n + class DBAPI(object): """Initialize the chosen DB API backend. diff --git a/oslo_db/tests/test_api.py b/oslo_db/tests/test_api.py index 6863790..7aafba6 100644 --- a/oslo_db/tests/test_api.py +++ b/oslo_db/tests/test_api.py @@ -253,3 +253,59 @@ class DBRetryRequestCase(DBAPITestCase): self.assertRaises(AttributeError, some_method) self.assertFalse(mock_log.called) + + @mock.patch('oslo_db.api.time.sleep', return_value=None) + def test_retry_wrapper_deadlock(self, mock_sleep): + + # Tests that jitter is False, if the retry wrapper hits a + # non-deadlock error + @api.wrap_db_retry(max_retries=1, retry_on_deadlock=True) + def some_method_no_deadlock(): + raise exception.RetryRequest(ValueError()) + with mock.patch( + 'oslo_db.api.wrap_db_retry._get_inc_interval') as mock_get: + mock_get.return_value = 2, 2 + self.assertRaises(ValueError, some_method_no_deadlock) + mock_get.assert_called_once_with(1, False) + + # Tests that jitter is True, if the retry wrapper hits a deadlock + # error. + @api.wrap_db_retry(max_retries=1, retry_on_deadlock=True) + def some_method_deadlock(): + raise exception.DBDeadlock('test') + with mock.patch( + 'oslo_db.api.wrap_db_retry._get_inc_interval') as mock_get: + mock_get.return_value = 0.1, 2 + self.assertRaises(exception.DBDeadlock, some_method_deadlock) + mock_get.assert_called_once_with(1, True) + + # Tests that jitter is True, if the jitter is enable by user + @api.wrap_db_retry(max_retries=1, retry_on_deadlock=True, jitter=True) + def some_method_no_deadlock_exp(): + raise exception.RetryRequest(ValueError()) + with mock.patch( + 'oslo_db.api.wrap_db_retry._get_inc_interval') as mock_get: + mock_get.return_value = 0.1, 2 + self.assertRaises(ValueError, some_method_no_deadlock_exp) + mock_get.assert_called_once_with(1, True) + + def test_wrap_db_retry_get_interval(self): + x = api.wrap_db_retry(max_retries=5, retry_on_deadlock=True, + max_retry_interval=11) + self.assertEqual(11, x.max_retry_interval) + for i in (1, 2, 4): + # With jitter: sleep_time = [0, 2 ** retry_times) + sleep_time, n = x._get_inc_interval(i, True) + self.assertEqual(2 * i, n) + self.assertTrue(2 * i > sleep_time) + # Without jitter: sleep_time = 2 ** retry_times + sleep_time, n = x._get_inc_interval(i, False) + self.assertEqual(2 * i, n) + self.assertEqual(2 * i, sleep_time) + for i in (8, 16, 32): + sleep_time, n = x._get_inc_interval(i, False) + self.assertEqual(x.max_retry_interval, sleep_time) + self.assertEqual(2 * i, n) + sleep_time, n = x._get_inc_interval(i, True) + self.assertTrue(x.max_retry_interval >= sleep_time) + self.assertEqual(2 * i, n) |