summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYikun Jiang <yikunkero@gmail.com>2018-03-29 18:29:17 +0800
committerYikun Jiang <yikunkero@gmail.com>2018-04-13 09:26:12 +0800
commit4c205341797da9e00e8fa176ca6dde3cbd400ca1 (patch)
treefb497f2e3a622b25da2bec5ecc239424a774eb37
parent85cf42e841bb59ddf294ec39286ce97e57a9faae (diff)
downloadoslo-db-4c205341797da9e00e8fa176ca6dde3cbd400ca1.tar.gz
Improve exponential backoff for wrap_db_retry
The @oslo_db.api.wrap_db_retry is used for db.api methods retrying. If db_error raised, the decorator help us to call the api methods again after a few seconds(accurately, is 2**retry_times seconds). If the db_error is deadlock error, the old wrap_db_retry is not so suitable anymore. As we know, some deadlocks cause because we call some methods(transactions) concurrently. If we only retry after stable 2**retry_times seconds, we will recall the method concurrently again. In order to minimize the chance of regenerating a deadlock and reduce the average sleep time, we propose to add some random jitter to the delay period by default when the deadlock error is detected. Change-Id: I206745708570f1f292529ff58eee9b83fc09a9f2 Closes-bug: #1737869
-rw-r--r--oslo_db/api.py41
-rw-r--r--oslo_db/tests/test_api.py56
2 files changed, 90 insertions, 7 deletions
diff --git a/oslo_db/api.py b/oslo_db/api.py
index 62cf889..7f92756 100644
--- a/oslo_db/api.py
+++ b/oslo_db/api.py
@@ -24,6 +24,7 @@ API methods.
"""
import logging
+import random
import threading
import time
@@ -103,15 +104,21 @@ class wrap_db_retry(object):
:param exception_checker: checks if an exception should trigger a retry
:type exception_checker: callable
+
+ :param jitter: determine increase retry interval use jitter or not, jitter
+ is always interpreted as True for a DBDeadlockError
+ :type jitter: bool
"""
def __init__(self, retry_interval=1, max_retries=20,
inc_retry_interval=True,
max_retry_interval=10, retry_on_disconnect=False,
retry_on_deadlock=False,
- exception_checker=lambda exc: False):
+ exception_checker=lambda exc: False,
+ jitter=False):
super(wrap_db_retry, self).__init__()
+ self.jitter = jitter
self.db_error = (exception.RetryRequest, )
# default is that we re-raise anything unexpected
self.exception_checker = exception_checker
@@ -127,7 +134,7 @@ class wrap_db_retry(object):
def __call__(self, f):
@six.wraps(f)
def wrapper(*args, **kwargs):
- next_interval = self.retry_interval
+ sleep_time = next_interval = self.retry_interval
remaining = self.max_retries
while True:
@@ -150,12 +157,20 @@ class wrap_db_retry(object):
# NOTE(vsergeyev): We are using patched time module, so
# this effectively yields the execution
# context to another green thread.
- time.sleep(next_interval)
+ time.sleep(sleep_time)
if self.inc_retry_interval:
- next_interval = min(
- next_interval * 2,
- self.max_retry_interval
- )
+ # NOTE(jiangyikun): In order to minimize the chance of
+ # regenerating a deadlock and reduce the average sleep
+ # time, we are using jitter by default when the
+ # deadlock is detected. With the jitter,
+ # sleep_time = [0, next_interval), otherwise, without
+ # the jitter, sleep_time = next_interval.
+ if isinstance(e, exception.DBDeadlock):
+ jitter = True
+ else:
+ jitter = self.jitter
+ sleep_time, next_interval = self._get_inc_interval(
+ next_interval, jitter)
remaining -= 1
return wrapper
@@ -170,6 +185,18 @@ class wrap_db_retry(object):
return True
return self.exception_checker(exc)
+ def _get_inc_interval(self, n, jitter):
+ # NOTE(jiangyikun): The "n" help us to record the 2 ** retry_times.
+ # The "sleep_time" means the real time to sleep:
+ # - Without jitter: sleep_time = 2 ** retry_times = n
+ # - With jitter: sleep_time = [0, 2 ** retry_times) < n
+ n = n * 2
+ if jitter:
+ sleep_time = random.uniform(0, n)
+ else:
+ sleep_time = n
+ return min(sleep_time, self.max_retry_interval), n
+
class DBAPI(object):
"""Initialize the chosen DB API backend.
diff --git a/oslo_db/tests/test_api.py b/oslo_db/tests/test_api.py
index 6863790..7aafba6 100644
--- a/oslo_db/tests/test_api.py
+++ b/oslo_db/tests/test_api.py
@@ -253,3 +253,59 @@ class DBRetryRequestCase(DBAPITestCase):
self.assertRaises(AttributeError, some_method)
self.assertFalse(mock_log.called)
+
+ @mock.patch('oslo_db.api.time.sleep', return_value=None)
+ def test_retry_wrapper_deadlock(self, mock_sleep):
+
+ # Tests that jitter is False, if the retry wrapper hits a
+ # non-deadlock error
+ @api.wrap_db_retry(max_retries=1, retry_on_deadlock=True)
+ def some_method_no_deadlock():
+ raise exception.RetryRequest(ValueError())
+ with mock.patch(
+ 'oslo_db.api.wrap_db_retry._get_inc_interval') as mock_get:
+ mock_get.return_value = 2, 2
+ self.assertRaises(ValueError, some_method_no_deadlock)
+ mock_get.assert_called_once_with(1, False)
+
+ # Tests that jitter is True, if the retry wrapper hits a deadlock
+ # error.
+ @api.wrap_db_retry(max_retries=1, retry_on_deadlock=True)
+ def some_method_deadlock():
+ raise exception.DBDeadlock('test')
+ with mock.patch(
+ 'oslo_db.api.wrap_db_retry._get_inc_interval') as mock_get:
+ mock_get.return_value = 0.1, 2
+ self.assertRaises(exception.DBDeadlock, some_method_deadlock)
+ mock_get.assert_called_once_with(1, True)
+
+ # Tests that jitter is True, if the jitter is enable by user
+ @api.wrap_db_retry(max_retries=1, retry_on_deadlock=True, jitter=True)
+ def some_method_no_deadlock_exp():
+ raise exception.RetryRequest(ValueError())
+ with mock.patch(
+ 'oslo_db.api.wrap_db_retry._get_inc_interval') as mock_get:
+ mock_get.return_value = 0.1, 2
+ self.assertRaises(ValueError, some_method_no_deadlock_exp)
+ mock_get.assert_called_once_with(1, True)
+
+ def test_wrap_db_retry_get_interval(self):
+ x = api.wrap_db_retry(max_retries=5, retry_on_deadlock=True,
+ max_retry_interval=11)
+ self.assertEqual(11, x.max_retry_interval)
+ for i in (1, 2, 4):
+ # With jitter: sleep_time = [0, 2 ** retry_times)
+ sleep_time, n = x._get_inc_interval(i, True)
+ self.assertEqual(2 * i, n)
+ self.assertTrue(2 * i > sleep_time)
+ # Without jitter: sleep_time = 2 ** retry_times
+ sleep_time, n = x._get_inc_interval(i, False)
+ self.assertEqual(2 * i, n)
+ self.assertEqual(2 * i, sleep_time)
+ for i in (8, 16, 32):
+ sleep_time, n = x._get_inc_interval(i, False)
+ self.assertEqual(x.max_retry_interval, sleep_time)
+ self.assertEqual(2 * i, n)
+ sleep_time, n = x._get_inc_interval(i, True)
+ self.assertTrue(x.max_retry_interval >= sleep_time)
+ self.assertEqual(2 * i, n)