From 048b772fa751f860ebe73e268a8719c9bc8bc35c Mon Sep 17 00:00:00 2001 From: Jordan Cook Date: Fri, 13 Jan 2023 16:08:35 -0600 Subject: Change DynamoDB table to use cache key as partition key --- requests_cache/backends/dynamodb.py | 83 ++++++++++++++-------------------- requests_cache/backends/mongodb.py | 2 +- tests/integration/base_storage_test.py | 4 +- tests/integration/test_dynamodb.py | 14 ++++-- 4 files changed, 49 insertions(+), 54 deletions(-) diff --git a/requests_cache/backends/dynamodb.py b/requests_cache/backends/dynamodb.py index 41021c3..f5d9838 100644 --- a/requests_cache/backends/dynamodb.py +++ b/requests_cache/backends/dynamodb.py @@ -4,16 +4,18 @@ :classes-only: :nosignatures: """ -from typing import Dict, Iterable, Optional +from typing import Iterable, Optional import boto3 from boto3.dynamodb.types import Binary from boto3.resources.base import ServiceResource from botocore.exceptions import ClientError +from requests_cache.backends.base import VT + from .._utils import get_valid_kwargs from ..serializers import SerializerType, dynamodb_document_serializer -from . import BaseCache, BaseStorage +from . import BaseCache, BaseStorage, DictStorage class DynamoDbCache(BaseCache): @@ -22,7 +24,6 @@ class DynamoDbCache(BaseCache): Args: table_name: DynamoDB table name - namespace: Name of DynamoDB hash map connection: :boto3:`DynamoDB Resource ` object to use instead of creating a new one ttl: Use DynamoDB TTL to automatically remove expired items @@ -32,6 +33,7 @@ class DynamoDbCache(BaseCache): def __init__( self, table_name: str = 'http_cache', + *, ttl: bool = True, connection: Optional[ServiceResource] = None, decode_content: bool = True, @@ -42,20 +44,13 @@ class DynamoDbCache(BaseCache): skwargs = {'serializer': serializer, **kwargs} if serializer else kwargs self.responses = DynamoDbDict( table_name, - namespace='responses', ttl=ttl, connection=connection, decode_content=decode_content, **skwargs, ) - self.redirects = DynamoDbDict( - table_name, - namespace='redirects', - ttl=False, - connection=self.responses.connection, - serialzier=None, - **kwargs, - ) + # Redirects will be only stored in memory and not persisted + self.redirects: BaseStorage[str, str] = DictStorage() class DynamoDbDict(BaseStorage): @@ -63,7 +58,6 @@ class DynamoDbDict(BaseStorage): Args: table_name: DynamoDB table name - namespace: Name of DynamoDB hash map connection: :boto3:`DynamoDB Resource ` object to use instead of creating a new one ttl: Use DynamoDB TTL to automatically remove expired items @@ -73,7 +67,6 @@ class DynamoDbDict(BaseStorage): def __init__( self, table_name: str, - namespace: str, ttl: bool = True, connection: Optional[ServiceResource] = None, serializer: Optional[SerializerType] = dynamodb_document_serializer, @@ -84,7 +77,6 @@ class DynamoDbDict(BaseStorage): boto3.Session.__init__, kwargs, extras=['endpoint_url'] ) self.connection = connection or boto3.resource('dynamodb', **connection_kwargs) - self.namespace = namespace self.table_name = table_name self.ttl = ttl @@ -98,13 +90,11 @@ class DynamoDbDict(BaseStorage): try: self.connection.create_table( AttributeDefinitions=[ - {'AttributeName': 'namespace', 'AttributeType': 'S'}, {'AttributeName': 'key', 'AttributeType': 'S'}, ], TableName=self.table_name, KeySchema=[ - {'AttributeName': 'namespace', 'KeyType': 'HASH'}, - {'AttributeName': 'key', 'KeyType': 'RANGE'}, + {'AttributeName': 'key', 'KeyType': 'HASH'}, ], BillingMode='PAY_PER_REQUEST', ) @@ -126,31 +116,14 @@ class DynamoDbDict(BaseStorage): if e.response['Error']['Code'] != 'ValidationException': raise - def _composite_key(self, key: str) -> Dict[str, str]: - return {'namespace': self.namespace, 'key': str(key)} - - def _scan(self): - expression_attribute_values = {':Namespace': self.namespace} - expression_attribute_names = {'#N': 'namespace'} - key_condition_expression = '#N = :Namespace' - return self._table.query( - ExpressionAttributeValues=expression_attribute_values, - ExpressionAttributeNames=expression_attribute_names, - KeyConditionExpression=key_condition_expression, - ) - def __getitem__(self, key): - result = self._table.get_item(Key=self._composite_key(key)) + result = self._table.get_item(Key={'key': key}) if 'Item' not in result: raise KeyError - - # With a custom serializer, the value may be a Binary object - raw_value = result['Item']['value'] - value = raw_value.value if isinstance(raw_value, Binary) else raw_value - return self.deserialize(key, value) + return self.deserialize(key, result['Item']['value']) def __setitem__(self, key, value): - item = {**self._composite_key(key), 'value': self.serialize(value)} + item = {'key': key, 'value': self.serialize(value)} # If enabled, set TTL value as a timestamp in unix format if self.ttl and getattr(value, 'expires_unix', None): @@ -159,28 +132,42 @@ class DynamoDbDict(BaseStorage): self._table.put_item(Item=item) def __delitem__(self, key): - response = self._table.delete_item(Key=self._composite_key(key), ReturnValues='ALL_OLD') + response = self._table.delete_item(Key={'key': key}, ReturnValues='ALL_OLD') if 'Attributes' not in response: raise KeyError def __iter__(self): - response = self._scan() - for item in response['Items']: + # Alias 'key' attribute since it's a reserved keyword + results = self._table.scan( + ProjectionExpression='#k', + ExpressionAttributeNames={'#k': 'key'}, + ) + for item in results['Items']: yield item['key'] def __len__(self): - return self._table.query( - Select='COUNT', - ExpressionAttributeNames={'#N': 'namespace'}, - ExpressionAttributeValues={':Namespace': self.namespace}, - KeyConditionExpression='#N = :Namespace', - )['Count'] + """Get the number of items in the table. + + **Note:** This is an estimate, and is updated every 6 hours. A full table scan will use up + your provisioned throughput, so it's not recommended. + """ + return self._table.item_count def bulk_delete(self, keys: Iterable[str]): """Delete multiple keys from the cache. Does not raise errors for missing keys.""" with self._table.batch_writer() as batch: for key in keys: - batch.delete_item(Key=self._composite_key(key)) + batch.delete_item(Key={'key': key}) def clear(self): self.bulk_delete((k for k in self)) + + def deserialize(self, key, value: VT): + """Handle Binary objects from a custom serializer""" + serialized_value = value.value if isinstance(value, Binary) else value + return super().deserialize(key, serialized_value) + + # TODO: Support pagination + def values(self): + for item in self._table.scan()['Items']: + yield self.deserialize(item['key'], item['value']) diff --git a/requests_cache/backends/mongodb.py b/requests_cache/backends/mongodb.py index 07671e7..76b33be 100644 --- a/requests_cache/backends/mongodb.py +++ b/requests_cache/backends/mongodb.py @@ -50,7 +50,7 @@ class MongoCache(BaseCache): db_name, collection_name='redirects', connection=self.responses.connection, - serialzier=None, + serializer=None, **kwargs, ) diff --git a/tests/integration/base_storage_test.py b/tests/integration/base_storage_test.py index a96ffd9..ce881e8 100644 --- a/tests/integration/base_storage_test.py +++ b/tests/integration/base_storage_test.py @@ -135,8 +135,8 @@ class BaseStorageTest: cache_2 = self.init_cache(connection=getattr(cache_1, 'connection', None)) for i in range(5): - cache_1[i] = f'value_{i}' - cache_2[i] = f'value_{i}' + cache_1[f'key_{i}'] = f'value_{i}' + cache_2[f'key_{i}'] = f'value_{i}' assert len(cache_1) == len(cache_2) == 5 cache_1.clear() diff --git a/tests/integration/test_dynamodb.py b/tests/integration/test_dynamodb.py index 42e22eb..af54e67 100644 --- a/tests/integration/test_dynamodb.py +++ b/tests/integration/test_dynamodb.py @@ -5,7 +5,7 @@ from unittest.mock import patch import pytest from requests_cache.backends import DynamoDbCache, DynamoDbDict -from tests.conftest import fail_if_no_connection +from tests.conftest import CACHE_NAME, fail_if_no_connection from tests.integration.base_cache_test import BaseCacheTest from tests.integration.base_storage_test import BaseStorageTest @@ -31,10 +31,18 @@ class TestDynamoDbDict(BaseStorageTest): storage_class = DynamoDbDict init_kwargs = AWS_OPTIONS + def init_cache(self, cache_name=CACHE_NAME, index=0, clear=True, **kwargs): + """For tests that use multiple tables, make index part of the table name""" + kwargs = {**self.init_kwargs, **kwargs} + cache = self.storage_class(f'{cache_name}_{index}', **kwargs) + if clear: + cache.clear() + return cache + @patch('requests_cache.backends.dynamodb.boto3.resource') def test_connection_kwargs(self, mock_resource): """A spot check to make sure optional connection kwargs gets passed to connection""" - DynamoDbDict('test_table', 'namespace', region_name='us-east-2', invalid_kwarg='???') + DynamoDbDict('test_table', region_name='us-east-2', invalid_kwarg='???') mock_resource.assert_called_with('dynamodb', region_name='us-east-2') def test_create_table_error(self): @@ -69,7 +77,7 @@ class TestDynamoDbDict(BaseStorageTest): # 'ttl' is a reserved word, so to retrieve it we need to alias it item = cache._table.get_item( - Key=cache._composite_key('key'), + Key={'key': 'key'}, ProjectionExpression='#t', ExpressionAttributeNames={'#t': 'ttl'}, ) -- cgit v1.2.1 From dc74f212ebaffdb183eb54e55a5d51e1271c671b Mon Sep 17 00:00:00 2001 From: Jordan Cook Date: Tue, 28 Feb 2023 13:09:49 -0600 Subject: Update docs and screenshots for DynamoDB --- HISTORY.md | 6 +++++- docs/_static/dynamodb_create_table.png | Bin 0 -> 51196 bytes docs/_static/dynamodb_items.png | Bin 69437 -> 37675 bytes docs/_static/dynamodb_response.png | Bin 124237 -> 102427 bytes docs/user_guide/backends/dynamodb.md | 25 +++++++++++++++++++------ examples/cloudformation.yml | 6 +----- 6 files changed, 25 insertions(+), 12 deletions(-) create mode 100644 docs/_static/dynamodb_create_table.png diff --git a/HISTORY.md b/HISTORY.md index 09b6c1b..6ffdfbe 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -31,6 +31,10 @@ 💾 **Backends:** * **DynamoDB**: + * For better read performance and usage of read throughput: + * The cache key is now used as the partition key + * Redirects are now cached only in-memory and not persisted + * Cache size (`len()`) now uses a fast table estimate instead of a full scan * Store responses in plain (human-readable) document format instead of fully serialized binary * Create default table in on-demand mode instead of provisioned * Add optional integration with DynamoDB TTL to improve performance for removing expired responses @@ -132,7 +136,6 @@ replacements are listed below. If this causes problems for you, please open an i ⚠️ **Breaking changes:** -Some breaking changes have been made that are not expected to affect most users. If you encounter a problem not listed here after updating to 1.0, please create a bug report! * The `BaseCache.urls` property has been replaced with a method that returns a list of URLs @@ -141,6 +144,7 @@ If you encounter a problem not listed here after updating to 1.0, please create * The `CachedSession` `backend` argument must be either an instance or string alias. Previously it would also accept a backend class. * After initialization, cache settings can only be accesed and modified via `CachedSession.settings`. Previously, some settings could be modified by setting them on either `CachedSession` or `BaseCache`. In some cases this could silently fail or otherwise have undefined behavior. +* DynamoDB table structure has changed. If you are using DynamoDB, you will need to create a new table when upgrading to 1.0. See [DynamoDB backend docs](https://requests-cache.readthedocs.io/en/stable/user_guide/backends/dynamodb.html#dynamodb) for more details. * The following is relevant for **custom backends** that extend built-in storage classes: * All serializer-specific `BaseStorage` subclasses have been removed, and merged into their respective parent classes. This includes `SQLitePickleDict`, `MongoPickleDict`, and `GridFSPickleDict`. * All `BaseStorage` subclasses now have a `serializer` attribute, which will be unused if diff --git a/docs/_static/dynamodb_create_table.png b/docs/_static/dynamodb_create_table.png new file mode 100644 index 0000000..0fcb7a0 Binary files /dev/null and b/docs/_static/dynamodb_create_table.png differ diff --git a/docs/_static/dynamodb_items.png b/docs/_static/dynamodb_items.png index 3ab4531..68066d0 100644 Binary files a/docs/_static/dynamodb_items.png and b/docs/_static/dynamodb_items.png differ diff --git a/docs/_static/dynamodb_response.png b/docs/_static/dynamodb_response.png index 9e2bae0..e0f4d85 100644 Binary files a/docs/_static/dynamodb_response.png and b/docs/_static/dynamodb_response.png differ diff --git a/docs/user_guide/backends/dynamodb.md b/docs/user_guide/backends/dynamodb.md index 7761903..85192ac 100644 --- a/docs/user_guide/backends/dynamodb.md +++ b/docs/user_guide/backends/dynamodb.md @@ -61,7 +61,12 @@ And here is an example response: ``` ::: -It is also possible query these responses with the [AWS CLI](https://aws.amazon.com/cli), for example: +It is also possible query these responses with the [AWS CLI](https://aws.amazon.com/cli), for +example: +```bash +aws dynamodb query --table-name http_cache > responses.json +``` + ```bash aws dynamodb query \ --table-name http_cache \ @@ -91,15 +96,23 @@ want to quickly test out DynamoDB as a cache backend, but in a production enviro likely want to create the tables yourself, for example with [CloudFormation](https://aws.amazon.com/cloudformation/) or [Terraform](https://www.terraform.io/). -Here are the details you will need: - +You just need a table with a single partition key. A `value` attribute (containing response data) +will be created dynamically once items are added to the table. - Table: `http_cache` (or any other name, as long as it matches the `table_name` parameter for `DynamoDbCache`) - Attributes: - - `namespace`: String - `key`: String - Keys: - - Partition key (aka namespace): `namespace` - - Range key (aka sort key): `key` + - Partition key (aka hash key): `key` + +Example of manually creating a table in the console: +:::{dropdown} Screenshot +:animate: fade-in-slide-down +:color: primary +:icon: file-media + +```{image} ../../_static/dynamodb_create_table.png +``` +::: ### Example CloudFormation Template :::{dropdown} Example diff --git a/examples/cloudformation.yml b/examples/cloudformation.yml index e88e0ab..fa83091 100644 --- a/examples/cloudformation.yml +++ b/examples/cloudformation.yml @@ -15,15 +15,11 @@ Resources: Properties: TableName: !Ref CacheTableName AttributeDefinitions: - - AttributeName: namespace - AttributeType: S - AttributeName: key AttributeType: S KeySchema: - - AttributeName: namespace - KeyType: HASH - AttributeName: key - KeyType: RANGE + KeyType: HASH # BillingMode: PAY_PER_REQUEST # Optional: Use provisioned throughput instead of on-demand -- cgit v1.2.1