summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlya Shakhat <shakhat@gmail.com>2018-12-14 10:43:04 +0100
committerIlya Shakhat <shakhat@gmail.com>2019-04-09 13:27:18 +0200
commit147effea78e92ae341fd048b1c9c1ff54c0ced0f (patch)
tree8d0d5e948af1ba8f7b30133f03e3287394fc5753
parenta9d7cb1842e6843dcd9f31afe9f300f9dcc5a357 (diff)
downloadosprofiler-147effea78e92ae341fd048b1c9c1ff54c0ced0f.tar.gz
Optimize storage schema for Redis driver
The original Redis driver stored each tracing event under its own key, as result both list and get operations required a full scan of the database. With this patch traces are stored as Redis lists under a key equal to trace id. So list operation iterates only over unique trace ids and get operation retrieves the content of a specified list. Note that list operation still needs to retrieve at least 1 event from the trace in order to get a timestamp. Performance test is executed with 1000 traces each consisting 1000 events: * existing driver: * write: 48 sec * list: 41 sec * get: 3.6 sec * optimized driver: * write: 44 sec * list: 4.1 sec * get: 0.01 sec Change-Id: I09a122f91e3d26531965fe53d6df7c66f502de4c
-rw-r--r--osprofiler/drivers/redis_driver.py32
-rw-r--r--releasenotes/notes/redis-improvement-d4c91683fc89f570.yaml16
2 files changed, 42 insertions, 6 deletions
diff --git a/osprofiler/drivers/redis_driver.py b/osprofiler/drivers/redis_driver.py
index 250a81c..b8101de 100644
--- a/osprofiler/drivers/redis_driver.py
+++ b/osprofiler/drivers/redis_driver.py
@@ -47,7 +47,8 @@ class Redis(base.Driver):
# only connection over network is supported with schema
# redis://[:password]@host[:port][/db]
self.db = StrictRedis.from_url(self.connection_str)
- self.namespace = "osprofiler:"
+ self.namespace_opt = "osprofiler_opt:"
+ self.namespace = "osprofiler:" # legacy
self.namespace_error = "osprofiler_error:"
@classmethod
@@ -73,9 +74,8 @@ class Redis(base.Driver):
data = info.copy()
data["project"] = self.project
data["service"] = self.service
- key = self.namespace + data["base_id"] + "_" + data["trace_id"] + "_" + \
- data["timestamp"]
- self.db.set(key, jsonutils.dumps(data))
+ key = self.namespace_opt + data["base_id"]
+ self.db.lpush(key, jsonutils.dumps(data))
if (self.filter_error_trace
and data.get("info", {}).get("etype") is not None):
@@ -100,6 +100,19 @@ class Redis(base.Driver):
"""
fields = set(fields or self.default_trace_fields)
+ # first get legacy events
+ result = self._list_traces_legacy(fields)
+
+ # with optimized schema trace events are stored in a list
+ ids = self.db.scan_iter(match=self.namespace_opt + "*")
+ for i in ids:
+ # for each trace query the first event to have a timestamp
+ first_event = jsonutils.loads(self.db.lindex(i, 1))
+ result.append({key: value for key, value in first_event.items()
+ if key in fields})
+ return result
+
+ def _list_traces_legacy(self, fields):
# With current schema every event is stored under its own unique key
# To query all traces we first need to get all keys, then
# get all events, sort them and pick up only the first one
@@ -134,8 +147,15 @@ class Redis(base.Driver):
:param base_id: Base id of trace elements.
"""
- for key in self.db.scan_iter(match=self.namespace + base_id + "*"):
- data = self.db.get(key)
+ def iterate_events():
+ for key in self.db.scan_iter(
+ match=self.namespace + base_id + "*"): # legacy
+ yield self.db.get(key)
+
+ for event in self.db.lrange(self.namespace_opt + base_id, 0, -1):
+ yield event
+
+ for data in iterate_events():
n = jsonutils.loads(data)
trace_id = n["trace_id"]
parent_id = n["parent_id"]
diff --git a/releasenotes/notes/redis-improvement-d4c91683fc89f570.yaml b/releasenotes/notes/redis-improvement-d4c91683fc89f570.yaml
new file mode 100644
index 0000000..c2ee2b9
--- /dev/null
+++ b/releasenotes/notes/redis-improvement-d4c91683fc89f570.yaml
@@ -0,0 +1,16 @@
+---
+features:
+ - |
+ Redis storage schema is optimized for higher performance.
+ Previously Redis driver stored each tracing event under its own key,
+ as result both list and get operations required full scan of the database.
+ With the optimized schema traces are stored as Redis lists under a key
+ equal to trace id. So list operation iterates only over unique
+ trace ids and get operation retrieves content of a specified list.
+ Note that list operation still needs to retrieve at least 1 event
+ from the trace to get a timestamp.
+upgrade:
+ - |
+ The optimized Redis driver is backward compatible: while new events are stored
+ using new schema the driver can retrieve existing events using both old and new
+ schemas.