diff options
author | Ilya Shakhat <shakhat@gmail.com> | 2018-12-14 10:43:04 +0100 |
---|---|---|
committer | Ilya Shakhat <shakhat@gmail.com> | 2019-04-09 13:27:18 +0200 |
commit | 147effea78e92ae341fd048b1c9c1ff54c0ced0f (patch) | |
tree | 8d0d5e948af1ba8f7b30133f03e3287394fc5753 | |
parent | a9d7cb1842e6843dcd9f31afe9f300f9dcc5a357 (diff) | |
download | osprofiler-147effea78e92ae341fd048b1c9c1ff54c0ced0f.tar.gz |
Optimize storage schema for Redis driver
The original Redis driver stored each tracing event under its own key,
as result both list and get operations required a full scan of the database.
With this patch traces are stored as Redis lists under a key equal
to trace id. So list operation iterates only over unique trace ids
and get operation retrieves the content of a specified list. Note that
list operation still needs to retrieve at least 1 event from the trace
in order to get a timestamp.
Performance test is executed with 1000 traces each consisting 1000 events:
* existing driver:
* write: 48 sec
* list: 41 sec
* get: 3.6 sec
* optimized driver:
* write: 44 sec
* list: 4.1 sec
* get: 0.01 sec
Change-Id: I09a122f91e3d26531965fe53d6df7c66f502de4c
-rw-r--r-- | osprofiler/drivers/redis_driver.py | 32 | ||||
-rw-r--r-- | releasenotes/notes/redis-improvement-d4c91683fc89f570.yaml | 16 |
2 files changed, 42 insertions, 6 deletions
diff --git a/osprofiler/drivers/redis_driver.py b/osprofiler/drivers/redis_driver.py index 250a81c..b8101de 100644 --- a/osprofiler/drivers/redis_driver.py +++ b/osprofiler/drivers/redis_driver.py @@ -47,7 +47,8 @@ class Redis(base.Driver): # only connection over network is supported with schema # redis://[:password]@host[:port][/db] self.db = StrictRedis.from_url(self.connection_str) - self.namespace = "osprofiler:" + self.namespace_opt = "osprofiler_opt:" + self.namespace = "osprofiler:" # legacy self.namespace_error = "osprofiler_error:" @classmethod @@ -73,9 +74,8 @@ class Redis(base.Driver): data = info.copy() data["project"] = self.project data["service"] = self.service - key = self.namespace + data["base_id"] + "_" + data["trace_id"] + "_" + \ - data["timestamp"] - self.db.set(key, jsonutils.dumps(data)) + key = self.namespace_opt + data["base_id"] + self.db.lpush(key, jsonutils.dumps(data)) if (self.filter_error_trace and data.get("info", {}).get("etype") is not None): @@ -100,6 +100,19 @@ class Redis(base.Driver): """ fields = set(fields or self.default_trace_fields) + # first get legacy events + result = self._list_traces_legacy(fields) + + # with optimized schema trace events are stored in a list + ids = self.db.scan_iter(match=self.namespace_opt + "*") + for i in ids: + # for each trace query the first event to have a timestamp + first_event = jsonutils.loads(self.db.lindex(i, 1)) + result.append({key: value for key, value in first_event.items() + if key in fields}) + return result + + def _list_traces_legacy(self, fields): # With current schema every event is stored under its own unique key # To query all traces we first need to get all keys, then # get all events, sort them and pick up only the first one @@ -134,8 +147,15 @@ class Redis(base.Driver): :param base_id: Base id of trace elements. """ - for key in self.db.scan_iter(match=self.namespace + base_id + "*"): - data = self.db.get(key) + def iterate_events(): + for key in self.db.scan_iter( + match=self.namespace + base_id + "*"): # legacy + yield self.db.get(key) + + for event in self.db.lrange(self.namespace_opt + base_id, 0, -1): + yield event + + for data in iterate_events(): n = jsonutils.loads(data) trace_id = n["trace_id"] parent_id = n["parent_id"] diff --git a/releasenotes/notes/redis-improvement-d4c91683fc89f570.yaml b/releasenotes/notes/redis-improvement-d4c91683fc89f570.yaml new file mode 100644 index 0000000..c2ee2b9 --- /dev/null +++ b/releasenotes/notes/redis-improvement-d4c91683fc89f570.yaml @@ -0,0 +1,16 @@ +--- +features: + - | + Redis storage schema is optimized for higher performance. + Previously Redis driver stored each tracing event under its own key, + as result both list and get operations required full scan of the database. + With the optimized schema traces are stored as Redis lists under a key + equal to trace id. So list operation iterates only over unique + trace ids and get operation retrieves content of a specified list. + Note that list operation still needs to retrieve at least 1 event + from the trace to get a timestamp. +upgrade: + - | + The optimized Redis driver is backward compatible: while new events are stored + using new schema the driver can retrieve existing events using both old and new + schemas. |