From 147effea78e92ae341fd048b1c9c1ff54c0ced0f Mon Sep 17 00:00:00 2001 From: Ilya Shakhat Date: Fri, 14 Dec 2018 10:43:04 +0100 Subject: Optimize storage schema for Redis driver The original Redis driver stored each tracing event under its own key, as result both list and get operations required a full scan of the database. With this patch traces are stored as Redis lists under a key equal to trace id. So list operation iterates only over unique trace ids and get operation retrieves the content of a specified list. Note that list operation still needs to retrieve at least 1 event from the trace in order to get a timestamp. Performance test is executed with 1000 traces each consisting 1000 events: * existing driver: * write: 48 sec * list: 41 sec * get: 3.6 sec * optimized driver: * write: 44 sec * list: 4.1 sec * get: 0.01 sec Change-Id: I09a122f91e3d26531965fe53d6df7c66f502de4c --- osprofiler/drivers/redis_driver.py | 32 ++++++++++++++++++---- .../notes/redis-improvement-d4c91683fc89f570.yaml | 16 +++++++++++ 2 files changed, 42 insertions(+), 6 deletions(-) create mode 100644 releasenotes/notes/redis-improvement-d4c91683fc89f570.yaml diff --git a/osprofiler/drivers/redis_driver.py b/osprofiler/drivers/redis_driver.py index 250a81c..b8101de 100644 --- a/osprofiler/drivers/redis_driver.py +++ b/osprofiler/drivers/redis_driver.py @@ -47,7 +47,8 @@ class Redis(base.Driver): # only connection over network is supported with schema # redis://[:password]@host[:port][/db] self.db = StrictRedis.from_url(self.connection_str) - self.namespace = "osprofiler:" + self.namespace_opt = "osprofiler_opt:" + self.namespace = "osprofiler:" # legacy self.namespace_error = "osprofiler_error:" @classmethod @@ -73,9 +74,8 @@ class Redis(base.Driver): data = info.copy() data["project"] = self.project data["service"] = self.service - key = self.namespace + data["base_id"] + "_" + data["trace_id"] + "_" + \ - data["timestamp"] - self.db.set(key, jsonutils.dumps(data)) + key = self.namespace_opt + data["base_id"] + self.db.lpush(key, jsonutils.dumps(data)) if (self.filter_error_trace and data.get("info", {}).get("etype") is not None): @@ -100,6 +100,19 @@ class Redis(base.Driver): """ fields = set(fields or self.default_trace_fields) + # first get legacy events + result = self._list_traces_legacy(fields) + + # with optimized schema trace events are stored in a list + ids = self.db.scan_iter(match=self.namespace_opt + "*") + for i in ids: + # for each trace query the first event to have a timestamp + first_event = jsonutils.loads(self.db.lindex(i, 1)) + result.append({key: value for key, value in first_event.items() + if key in fields}) + return result + + def _list_traces_legacy(self, fields): # With current schema every event is stored under its own unique key # To query all traces we first need to get all keys, then # get all events, sort them and pick up only the first one @@ -134,8 +147,15 @@ class Redis(base.Driver): :param base_id: Base id of trace elements. """ - for key in self.db.scan_iter(match=self.namespace + base_id + "*"): - data = self.db.get(key) + def iterate_events(): + for key in self.db.scan_iter( + match=self.namespace + base_id + "*"): # legacy + yield self.db.get(key) + + for event in self.db.lrange(self.namespace_opt + base_id, 0, -1): + yield event + + for data in iterate_events(): n = jsonutils.loads(data) trace_id = n["trace_id"] parent_id = n["parent_id"] diff --git a/releasenotes/notes/redis-improvement-d4c91683fc89f570.yaml b/releasenotes/notes/redis-improvement-d4c91683fc89f570.yaml new file mode 100644 index 0000000..c2ee2b9 --- /dev/null +++ b/releasenotes/notes/redis-improvement-d4c91683fc89f570.yaml @@ -0,0 +1,16 @@ +--- +features: + - | + Redis storage schema is optimized for higher performance. + Previously Redis driver stored each tracing event under its own key, + as result both list and get operations required full scan of the database. + With the optimized schema traces are stored as Redis lists under a key + equal to trace id. So list operation iterates only over unique + trace ids and get operation retrieves content of a specified list. + Note that list operation still needs to retrieve at least 1 event + from the trace to get a timestamp. +upgrade: + - | + The optimized Redis driver is backward compatible: while new events are stored + using new schema the driver can retrieve existing events using both old and new + schemas. -- cgit v1.2.1