diff options
author | Alexander Ignatyev <alexander.ignatyev@mongodb.com> | 2022-11-11 11:38:41 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-11-11 12:20:20 +0000 |
commit | ea088bcf555050361ef313edf402025dd37788c2 (patch) | |
tree | 2f24431f747ee5edc5bf75c3a5fd01386ef69174 /buildscripts/cost_model/start.py | |
parent | dd6c21b4f7c6d32161a0c262dbd63a8c86b2a44d (diff) | |
download | mongo-ea088bcf555050361ef313edf402025dd37788c2.tar.gz |
SERVER-71220 Apply additional filters when calibrating Cost Model
Diffstat (limited to 'buildscripts/cost_model/start.py')
-rw-r--r-- | buildscripts/cost_model/start.py | 117 |
1 files changed, 42 insertions, 75 deletions
diff --git a/buildscripts/cost_model/start.py b/buildscripts/cost_model/start.py index f95e010ad7f..6dbe4ef2cce 100644 --- a/buildscripts/cost_model/start.py +++ b/buildscripts/cost_model/start.py @@ -63,16 +63,41 @@ def save_to_csv(parameters: Mapping[str, Sequence[CostModelParameters]], filepat writer.writerow(fields) -async def execute_general(database: DatabaseInstance, collections: Sequence[CollectionInfo]): +async def execute_index_scan_queries(database: DatabaseInstance, + collections: Sequence[CollectionInfo]): + collection = [ci for ci in collections if ci.name.startswith('index_scan')][0] + fields = [f for f in collection.fields if f.name == 'choice'] + requests = [] - for val in distributions['string_choice'].get_values()[::3]: - keys_length = len(val) + 2 - for i in range(1, 5): + + for field in fields: + for val in field.distribution.get_values(): + if val.startswith('_'): + continue + keys_length = len(val) + 2 requests.append( - Query(pipeline=[{'$match': {f'choice{i}': val}}], keys_length_in_bytes=keys_length)) + Query(pipeline=[{'$match': {field.name: val}}], keys_length_in_bytes=keys_length, + note='IndexScan')) - await workload_execution.execute(database, main_config.workload_execution, - [ci for ci in collections if ci.name.startswith('c_str')], + await workload_execution.execute(database, main_config.workload_execution, [collection], + requests) + + +async def execute_physical_scan_queries(database: DatabaseInstance, + collections: Sequence[CollectionInfo]): + collections = [ci for ci in collections if ci.name.startswith('physical_scan')] + fields = [f for f in collections[0].fields if f.name == 'choice'] + requests = [] + for field in fields: + for val in field.distribution.get_values()[::3]: + if val.startswith('_'): + continue + keys_length = len(val) + 2 + requests.append( + Query(pipeline=[{'$match': {field.name: val}}], keys_length_in_bytes=keys_length, + note='PhysicalScan')) + + await workload_execution.execute(database, main_config.workload_execution, collections, requests) @@ -151,62 +176,6 @@ async def execute_unwind(database: DatabaseInstance, collections: Sequence[Colle requests) -async def execute_small_queries(database: DatabaseInstance, collections: Sequence[CollectionInfo]): - # strings - requests = [] - for val in distributions['string_choice_small'].get_values(): - keys_length = len(val) + 2 - for i in range(1, 3): - requests.append( - Query(pipeline=[{'$match': {f'choice{i}': val}}], keys_length_in_bytes=keys_length)) - - await workload_execution.execute(database, main_config.workload_execution, - [ci for ci in collections if ci.name.startswith('c_str_02')], - requests) - - # index intersection - colls = [ci for ci in collections if ci.name.startswith('c_int_05')] - requests = [] - - for val in distributions['int_choice'].get_values(): - for val2 in distributions['int_choice'].get_values(): - requests.append( - Query(pipeline=[{'$match': {'in1': val, 'in2': val2}}], keys_length_in_bytes=1)) - - requests.append( - Query(pipeline=[{'$match': {'in1': val, 'in2': {'$gt': 500}}}], keys_length_in_bytes=1)) - - requests.append( - Query(pipeline=[{'$match': {'in1': {'$lte': 500}, 'in2': val}}], - keys_length_in_bytes=1)) - - await execute_index_intersections_with_requests(database, colls, requests) - - # Evaluation - colls = [ci for ci in collections if ci.name.startswith('c_int_05')] - requests = [] - - for val in distributions['int_choice'].get_values(): - requests.append( - Query(pipeline=[{"$match": {'in1': val}}, {'$project': {'proj1': 1}}], - keys_length_in_bytes=1, number_of_fields=1)) - - await workload_execution.execute(database, main_config.workload_execution, colls, requests) - - # Unwind - colls = [ci for ci in collections if ci.name.startswith('c_arr_01')] - requests = [] - # average size of arrays in the collection - average_size_of_arrays = 10 - - for val in distributions['int_choice'].get_values(): - requests.append( - Query(pipeline=[{"$match": {'in1': val}}, {"$unwind": "$as"}], - number_of_fields=average_size_of_arrays)) - - await workload_execution.execute(database, main_config.workload_execution, colls, requests) - - async def main(): """Entry point function.""" script_directory = os.path.abspath(os.path.dirname(__file__)) @@ -222,18 +191,16 @@ async def main(): # 3. Collecting data for calibration (optional). # It runs the pipelines and stores explains to the database. - - # Run this execute function only to collect calibration data in the "smaller" experiment. - # await execute_small_queries(database, generator.collection_infos); - - await execute_general(database, generator.collection_infos) - main_config.workload_execution.write_mode = WriteMode.APPEND - - await execute_index_intersections(database, generator.collection_infos) - - await execute_evaluation(database, generator.collection_infos) - - await execute_unwind(database, generator.collection_infos) + execution_query_functions = [ + execute_index_scan_queries, + execute_physical_scan_queries, + execute_index_intersections, + execute_evaluation, + execute_unwind, + ] + for execute_query in execution_query_functions: + await execute_query(database, generator.collection_infos) + main_config.workload_execution.write_mode = WriteMode.APPEND # Calibration phase (optional). # Reads the explains stored on the previous step (this run and/or previous runs), |