#!/usr/bin/env python """An example of generating a test database with a large number of semi-randomized responses. This is useful, for example, for reproducing issues that only occur with large caches. """ import logging from datetime import datetime, timedelta from os import urandom from os.path import getsize from random import random from time import perf_counter as time import requests from rich.progress import Progress from requests_cache import ALL_METHODS, CachedResponse, CachedSession from requests_cache.models.response import format_file_size from tests.conftest import HTTPBIN_FORMATS, HTTPBIN_METHODS BACKEND = 'sqlite' CACHE_NAME = 'rubbish_bin' MAX_EXPIRE_AFTER = 30 # In seconds; set to -1 to disable expiration MAX_RESPONSE_SIZE = 10000 # In bytes N_RESPONSES = 100000 N_INVALID_RESPONSES = 10 BASE_RESPONSE = requests.get('https://httpbin.org/get') HTTPBIN_EXTRA_ENDPOINTS = [ 'anything', 'bytes/1024' 'cookies', 'ip', 'redirect/5', 'stream-bytes/1024', ] logging.basicConfig(level='INFO') logger = logging.getLogger('requests_cache') class InvalidResponse(CachedResponse): """Response that will raise an exception when deserialized""" def __setstate__(self, d): raise ValueError def populate_cache(progress, task): session = CachedSession(CACHE_NAME, backend=BACKEND, allowable_methods=ALL_METHODS) n_previous_responses = len(session.cache.responses) # Cache a variety of different response formats, which may result in different behavior urls = [ ('GET', f'https://httpbin.org/{endpoint}') for endpoint in HTTPBIN_FORMATS + HTTPBIN_EXTRA_ENDPOINTS ] urls += [(method, f'https://httpbin.org/{method.lower()}') for method in HTTPBIN_METHODS] for method, url in urls: session.request(method, url) progress.update(task, advance=1) # Cache a large number of responses with randomized response content, which will expire at random times with session.cache.responses.bulk_commit(): for i in range(N_RESPONSES): new_response = get_randomized_response(i + n_previous_responses) if MAX_EXPIRE_AFTER >= 0: expires = datetime.now() + timedelta(seconds=random() * MAX_EXPIRE_AFTER) else: expires = None session.cache.save_response(new_response, expires=expires) progress.update(task, advance=1) # Add some invalid responses with session.cache.responses.bulk_commit(): for i in range(N_INVALID_RESPONSES): new_response = InvalidResponse.from_response(BASE_RESPONSE) new_response.request.url += f'/invalid_response_{i}' key = session.cache.create_key(new_response.request) session.cache.responses[key] = new_response progress.update(task, advance=1) def get_randomized_response(i=0): """Get a response with randomized content""" new_response = CachedResponse.from_response(BASE_RESPONSE) n_bytes = int(random() * MAX_RESPONSE_SIZE) new_response._content = urandom(n_bytes) new_response.request.url += f'/response_{i}' return new_response def remove_expired_responses(): logger.setLevel('DEBUG') session = CachedSession(CACHE_NAME) total_responses = len(session.cache.responses) start = time() session.cache.delete(expired=True) elapsed = time() - start n_removed = total_responses - len(session.cache.responses) logger.info( f'Removed {n_removed} expired/invalid responses in {elapsed:.2f} seconds ' f'(avg {(elapsed / n_removed) * 1000:.2f}ms per response)' ) def main(): total_responses = len(HTTPBIN_FORMATS + HTTPBIN_EXTRA_ENDPOINTS + HTTPBIN_METHODS) total_responses += N_RESPONSES + N_INVALID_RESPONSES with Progress() as progress: task = progress.add_task('[cyan]Generating responses...', total=total_responses) populate_cache(progress, task) actual_total_responses = len(CachedSession(CACHE_NAME).cache.responses) logger.info(f'Generated cache with {actual_total_responses} responses') if BACKEND == 'sqlite': cache_file_size = format_file_size(getsize(f'{CACHE_NAME}.sqlite')) logger.info(f'Total cache size: {cache_file_size}') if __name__ == '__main__': main() # Remove some responses (with randomized expiration) # remove_expired_responses()