summaryrefslogtreecommitdiff
path: root/nova
diff options
context:
space:
mode:
authormelanie witt <melwittt@gmail.com>2017-05-16 10:25:42 +0000
committermelanie witt <melwittt@gmail.com>2017-05-18 18:17:06 +0000
commit47fa88d94754fcdad6bb132b45196b4d44c0f4cd (patch)
treef6534e1654234bc143e58b33b5c5c2b94cd1a760 /nova
parent902b7bf6f5425824a0661b8e4beac4a894749c03 (diff)
downloadnova-47fa88d94754fcdad6bb132b45196b4d44c0f4cd.tar.gz
Cache database and message queue connection objects
Recently in the gate we have seen a trace on some work-in-progress patches: OperationalError: (pymysql.err.OperationalError) (1040, u'Too many connections') and at least one operator has reported that the number of database connections increased significantly going from Mitaka to Newton. It was suspected that the increase was caused by creating new oslo.db transaction context managers on-the-fly when switching database connections for cells. Comparing the dstat --tcp output of runs of the gate-tempest-dsvm-neutron-full-ubuntu-xenial job with and without caching of the database connections showed a difference of 445 active TCP connections and 1495 active TCP connections, respectively [1]. This adds caching of the oslo.db transaction context managers and the oslo.messaging transports to avoid creating a large number of objects that are not being garbage-collected as expected. Closes-Bug: #1691545 [1] https://docs.google.com/spreadsheets/d/1DIfFfX3kaA_SRoCM-aO7BN4IBEShChXLztOBFeKryt4/edit?usp=sharing Change-Id: I17e0eb836dd87aac5859f506e7d771d42753d31a
Diffstat (limited to 'nova')
-rw-r--r--nova/context.py30
-rw-r--r--nova/test.py1
-rw-r--r--nova/tests/unit/test_context.py28
3 files changed, 53 insertions, 6 deletions
diff --git a/nova/context.py b/nova/context.py
index be49d2945c..a22185dcbf 100644
--- a/nova/context.py
+++ b/nova/context.py
@@ -34,6 +34,10 @@ from nova import policy
from nova import utils
LOG = logging.getLogger(__name__)
+# TODO(melwitt): This cache should be cleared whenever WSGIService receives a
+# SIGHUP and periodically based on an expiration time. Currently, none of the
+# cell caches are purged, so neither is this one, for now.
+CELL_CACHE = {}
class _ContextAuthPlugin(plugin.BaseAuthPlugin):
@@ -370,15 +374,31 @@ def set_target_cell(context, cell_mapping):
:param context: The RequestContext to add connection information
:param cell_mapping: An objects.CellMapping object or None
"""
+ global CELL_CACHE
if cell_mapping is not None:
# avoid circular import
from nova import db
from nova import rpc
- db_connection_string = cell_mapping.database_connection
- context.db_connection = db.create_context_manager(db_connection_string)
- if not cell_mapping.transport_url.startswith('none'):
- context.mq_connection = rpc.create_transport(
- cell_mapping.transport_url)
+
+ # Synchronize access to the cache by multiple API workers.
+ @utils.synchronized(cell_mapping.uuid)
+ def get_or_set_cached_cell_and_set_connections():
+ try:
+ cell_tuple = CELL_CACHE[cell_mapping.uuid]
+ except KeyError:
+ db_connection_string = cell_mapping.database_connection
+ context.db_connection = db.create_context_manager(
+ db_connection_string)
+ if not cell_mapping.transport_url.startswith('none'):
+ context.mq_connection = rpc.create_transport(
+ cell_mapping.transport_url)
+ CELL_CACHE[cell_mapping.uuid] = (context.db_connection,
+ context.mq_connection)
+ else:
+ context.db_connection = cell_tuple[0]
+ context.mq_connection = cell_tuple[1]
+
+ get_or_set_cached_cell_and_set_connections()
else:
context.db_connection = None
context.mq_connection = None
diff --git a/nova/test.py b/nova/test.py
index d6e4d59a62..64d5486405 100644
--- a/nova/test.py
+++ b/nova/test.py
@@ -242,6 +242,7 @@ class TestCase(testtools.TestCase):
# NOTE(danms): Reset the cached list of cells
from nova.compute import api
api.CELLS = []
+ context.CELL_CACHE = {}
self.cell_mappings = {}
self.host_mappings = {}
diff --git a/nova/tests/unit/test_context.py b/nova/tests/unit/test_context.py
index 89a1db2372..e44a730eee 100644
--- a/nova/tests/unit/test_context.py
+++ b/nova/tests/unit/test_context.py
@@ -20,6 +20,7 @@ from nova import context
from nova import exception
from nova import objects
from nova import test
+from nova.tests import uuidsentinel as uuids
class ContextTestCase(test.NoDBTestCase):
@@ -302,7 +303,8 @@ class ContextTestCase(test.NoDBTestCase):
ctxt.db_connection = mock.sentinel.db_conn
ctxt.mq_connection = mock.sentinel.mq_conn
mapping = objects.CellMapping(database_connection='fake://',
- transport_url='fake://')
+ transport_url='fake://',
+ uuid=uuids.cell)
with context.target_cell(ctxt, mapping):
self.assertEqual(ctxt.db_connection, mock.sentinel.cdb)
self.assertEqual(ctxt.mq_connection, mock.sentinel.cmq)
@@ -333,3 +335,27 @@ class ContextTestCase(test.NoDBTestCase):
self.assertIsNone(ctxt.user_id)
self.assertIsNone(ctxt.project_id)
self.assertFalse(ctxt.is_admin)
+
+ @mock.patch('nova.rpc.create_transport')
+ @mock.patch('nova.db.create_context_manager')
+ def test_target_cell_caching(self, mock_create_cm, mock_create_tport):
+ mock_create_cm.return_value = mock.sentinel.db_conn_obj
+ mock_create_tport.return_value = mock.sentinel.mq_conn_obj
+ ctxt = context.get_context()
+ mapping = objects.CellMapping(database_connection='fake://db',
+ transport_url='fake://mq',
+ uuid=uuids.cell)
+ # First call should create new connection objects.
+ with context.target_cell(ctxt, mapping):
+ self.assertEqual(mock.sentinel.db_conn_obj, ctxt.db_connection)
+ self.assertEqual(mock.sentinel.mq_conn_obj, ctxt.mq_connection)
+ mock_create_cm.assert_called_once_with('fake://db')
+ mock_create_tport.assert_called_once_with('fake://mq')
+ # Second call should use cached objects.
+ mock_create_cm.reset_mock()
+ mock_create_tport.reset_mock()
+ with context.target_cell(ctxt, mapping):
+ self.assertEqual(mock.sentinel.db_conn_obj, ctxt.db_connection)
+ self.assertEqual(mock.sentinel.mq_conn_obj, ctxt.mq_connection)
+ mock_create_cm.assert_not_called()
+ mock_create_tport.assert_not_called()