summaryrefslogtreecommitdiff
path: root/buildscripts/cost_model/start.py
diff options
context:
space:
mode:
Diffstat (limited to 'buildscripts/cost_model/start.py')
-rw-r--r--buildscripts/cost_model/start.py117
1 files changed, 42 insertions, 75 deletions
diff --git a/buildscripts/cost_model/start.py b/buildscripts/cost_model/start.py
index f95e010ad7f..6dbe4ef2cce 100644
--- a/buildscripts/cost_model/start.py
+++ b/buildscripts/cost_model/start.py
@@ -63,16 +63,41 @@ def save_to_csv(parameters: Mapping[str, Sequence[CostModelParameters]], filepat
writer.writerow(fields)
-async def execute_general(database: DatabaseInstance, collections: Sequence[CollectionInfo]):
+async def execute_index_scan_queries(database: DatabaseInstance,
+ collections: Sequence[CollectionInfo]):
+ collection = [ci for ci in collections if ci.name.startswith('index_scan')][0]
+ fields = [f for f in collection.fields if f.name == 'choice']
+
requests = []
- for val in distributions['string_choice'].get_values()[::3]:
- keys_length = len(val) + 2
- for i in range(1, 5):
+
+ for field in fields:
+ for val in field.distribution.get_values():
+ if val.startswith('_'):
+ continue
+ keys_length = len(val) + 2
requests.append(
- Query(pipeline=[{'$match': {f'choice{i}': val}}], keys_length_in_bytes=keys_length))
+ Query(pipeline=[{'$match': {field.name: val}}], keys_length_in_bytes=keys_length,
+ note='IndexScan'))
- await workload_execution.execute(database, main_config.workload_execution,
- [ci for ci in collections if ci.name.startswith('c_str')],
+ await workload_execution.execute(database, main_config.workload_execution, [collection],
+ requests)
+
+
+async def execute_physical_scan_queries(database: DatabaseInstance,
+ collections: Sequence[CollectionInfo]):
+ collections = [ci for ci in collections if ci.name.startswith('physical_scan')]
+ fields = [f for f in collections[0].fields if f.name == 'choice']
+ requests = []
+ for field in fields:
+ for val in field.distribution.get_values()[::3]:
+ if val.startswith('_'):
+ continue
+ keys_length = len(val) + 2
+ requests.append(
+ Query(pipeline=[{'$match': {field.name: val}}], keys_length_in_bytes=keys_length,
+ note='PhysicalScan'))
+
+ await workload_execution.execute(database, main_config.workload_execution, collections,
requests)
@@ -151,62 +176,6 @@ async def execute_unwind(database: DatabaseInstance, collections: Sequence[Colle
requests)
-async def execute_small_queries(database: DatabaseInstance, collections: Sequence[CollectionInfo]):
- # strings
- requests = []
- for val in distributions['string_choice_small'].get_values():
- keys_length = len(val) + 2
- for i in range(1, 3):
- requests.append(
- Query(pipeline=[{'$match': {f'choice{i}': val}}], keys_length_in_bytes=keys_length))
-
- await workload_execution.execute(database, main_config.workload_execution,
- [ci for ci in collections if ci.name.startswith('c_str_02')],
- requests)
-
- # index intersection
- colls = [ci for ci in collections if ci.name.startswith('c_int_05')]
- requests = []
-
- for val in distributions['int_choice'].get_values():
- for val2 in distributions['int_choice'].get_values():
- requests.append(
- Query(pipeline=[{'$match': {'in1': val, 'in2': val2}}], keys_length_in_bytes=1))
-
- requests.append(
- Query(pipeline=[{'$match': {'in1': val, 'in2': {'$gt': 500}}}], keys_length_in_bytes=1))
-
- requests.append(
- Query(pipeline=[{'$match': {'in1': {'$lte': 500}, 'in2': val}}],
- keys_length_in_bytes=1))
-
- await execute_index_intersections_with_requests(database, colls, requests)
-
- # Evaluation
- colls = [ci for ci in collections if ci.name.startswith('c_int_05')]
- requests = []
-
- for val in distributions['int_choice'].get_values():
- requests.append(
- Query(pipeline=[{"$match": {'in1': val}}, {'$project': {'proj1': 1}}],
- keys_length_in_bytes=1, number_of_fields=1))
-
- await workload_execution.execute(database, main_config.workload_execution, colls, requests)
-
- # Unwind
- colls = [ci for ci in collections if ci.name.startswith('c_arr_01')]
- requests = []
- # average size of arrays in the collection
- average_size_of_arrays = 10
-
- for val in distributions['int_choice'].get_values():
- requests.append(
- Query(pipeline=[{"$match": {'in1': val}}, {"$unwind": "$as"}],
- number_of_fields=average_size_of_arrays))
-
- await workload_execution.execute(database, main_config.workload_execution, colls, requests)
-
-
async def main():
"""Entry point function."""
script_directory = os.path.abspath(os.path.dirname(__file__))
@@ -222,18 +191,16 @@ async def main():
# 3. Collecting data for calibration (optional).
# It runs the pipelines and stores explains to the database.
-
- # Run this execute function only to collect calibration data in the "smaller" experiment.
- # await execute_small_queries(database, generator.collection_infos);
-
- await execute_general(database, generator.collection_infos)
- main_config.workload_execution.write_mode = WriteMode.APPEND
-
- await execute_index_intersections(database, generator.collection_infos)
-
- await execute_evaluation(database, generator.collection_infos)
-
- await execute_unwind(database, generator.collection_infos)
+ execution_query_functions = [
+ execute_index_scan_queries,
+ execute_physical_scan_queries,
+ execute_index_intersections,
+ execute_evaluation,
+ execute_unwind,
+ ]
+ for execute_query in execution_query_functions:
+ await execute_query(database, generator.collection_infos)
+ main_config.workload_execution.write_mode = WriteMode.APPEND
# Calibration phase (optional).
# Reads the explains stored on the previous step (this run and/or previous runs),