diff options
author | Alexander Ignatyev <alexander.ignatyev@mongodb.com> | 2022-08-30 08:14:49 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-08-30 09:32:51 +0000 |
commit | 8c0bbf5b254db460344293eca72fb4b2422dc65b (patch) | |
tree | c3221f43aaeb07e5e55d8b3576c5c6433e22b018 | |
parent | 3724e774d67d6ffb3bf1c2f52f7451eac38b5fd9 (diff) | |
download | mongo-8c0bbf5b254db460344293eca72fb4b2422dc65b.tar.gz |
SERVER-69089: Add ability to define indexes in config
-rw-r--r-- | buildscripts/cost_model/config.json | 28 | ||||
-rw-r--r-- | buildscripts/cost_model/config.py | 9 | ||||
-rw-r--r-- | buildscripts/cost_model/data_generator.py | 31 |
3 files changed, 46 insertions, 22 deletions
diff --git a/buildscripts/cost_model/config.json b/buildscripts/cost_model/config.json index cf16b3e5ecb..c4e3c51f49e 100644 --- a/buildscripts/cost_model/config.json +++ b/buildscripts/cost_model/config.json @@ -31,12 +31,14 @@ { "name": "choice1", "type": "str", - "distribution": "string_choice" + "distribution": "string_choice", + "indexed": true }, { "name": "mixed1", "type": "str", - "distribution": "string_mixed" + "distribution": "string_mixed", + "indexed": true }, { "name": "uniform1", @@ -53,6 +55,16 @@ "type": "str", "distribution": "string_mixed" } + ], + "compoundIndexes": [ + [ + "choice1", + "mixed1" + ], + [ + "uniform1", + "mixed2" + ] ] }, { @@ -61,7 +73,8 @@ { "name": "choice1", "type": "str", - "distribution": "string_choice" + "distribution": "string_choice", + "indexed": true }, { "name": "mixed1", @@ -76,7 +89,8 @@ { "name": "choice2", "type": "str", - "distribution": "string_choice" + "distribution": "string_choice", + "indexed": true }, { "name": "mixed2", @@ -91,7 +105,8 @@ { "name": "choice3", "type": "str", - "distribution": "string_choice" + "distribution": "string_choice", + "indexed": true }, { "name": "mixed3", @@ -106,7 +121,8 @@ { "name": "choice4", "type": "str", - "distribution": "string_choice" + "distribution": "string_choice", + "indexed": true } ] } diff --git a/buildscripts/cost_model/config.py b/buildscripts/cost_model/config.py index af9e9607c52..b56d38d233e 100644 --- a/buildscripts/cost_model/config.py +++ b/buildscripts/cost_model/config.py @@ -151,13 +151,15 @@ class CollectionTemplate: name: str fields: Sequence[FieldTemplate] + compound_indexes: Sequence[Sequence[str]] @staticmethod def create(json_config: dict[str, any]) -> CollectionTemplate: """Create new template object from JSON.""" name = json_config['name'] fields = [FieldTemplate.create(jc) for jc in json_config['fields']] - return CollectionTemplate(name=name, fields=fields) + compound_indexes = json_config.get('compoundIndexes', []) + return CollectionTemplate(name=name, fields=fields, compound_indexes=compound_indexes) @dataclass @@ -167,6 +169,7 @@ class FieldTemplate: name: str data_type: DataType distribution: str + indexed: bool @staticmethod def create(json_config: dict[str, any]) -> FieldTemplate: @@ -174,7 +177,9 @@ class FieldTemplate: name = json_config['name'] data_type = DataType.parse(json_config['type'], 'type') distribution = json_config['distribution'] - return FieldTemplate(name=name, data_type=data_type, distribution=distribution) + indexed = json_config.get('indexed', False) + return FieldTemplate(name=name, data_type=data_type, distribution=distribution, + indexed=indexed) class DataType(Enum): diff --git a/buildscripts/cost_model/data_generator.py b/buildscripts/cost_model/data_generator.py index d984d6b298d..5f4601f5fb1 100644 --- a/buildscripts/cost_model/data_generator.py +++ b/buildscripts/cost_model/data_generator.py @@ -52,6 +52,7 @@ class FieldInfo: name: str type: DataType distribution: RandomDistribution + indexed: bool @dataclass @@ -61,6 +62,7 @@ class CollectionInfo: name: str fields: Sequence[FieldInfo] documents_count: int + compound_indexes: Sequence[Sequence[str]] class DataGenerator: @@ -95,7 +97,7 @@ class DataGenerator: coll.drop() self._populate_collection(coll, coll_info) create_single_field_indexes(coll, coll_info.fields) - create_compound_index(coll, coll_info.fields) + create_compound_indexes(coll, coll_info) t1 = time.time() print(f'\npopulate Collections took {t1-t0} s.') @@ -104,12 +106,13 @@ class DataGenerator: for coll_template in self.config.collection_templates: fields = [ FieldInfo(name=ft.name, type=ft.data_type, - distribution=distributions[ft.distribution]) + distribution=distributions[ft.distribution], indexed=ft.indexed) for ft in coll_template.fields ] for doc_count in self.config.collection_cardinalities: name = f'{coll_template.name}_{doc_count}' - yield CollectionInfo(name=name, fields=fields, documents_count=doc_count) + yield CollectionInfo(name=name, fields=fields, documents_count=doc_count, + compound_indexes=coll_template.compound_indexes) @timer_decorator def _populate_collection(self, coll: Collection, coll_info: CollectionInfo) -> None: @@ -143,25 +146,25 @@ def create_single_field_indexes(coll: Collection, fields: Sequence[FieldInfo]) - t0 = time.time() - indexes = [IndexModel([(field.name, pymongo.ASCENDING)]) for field in fields] - coll.create_indexes(indexes) + indexes = [IndexModel([(field.name, pymongo.ASCENDING)]) for field in fields if field.indexed] + if len(indexes) > 0: + coll.create_indexes(indexes) t1 = time.time() print(f'createSingleFieldIndexes took {t1 - t0} s.') -def create_compound_index(coll: Collection, fields: Sequence[FieldInfo]) -> None: - """Create a coumpound index on the given collection.""" - - field_names = [fi.name for fi in fields if fi.type != DataType.ARRAY] - if len(field_names) < 2: - print(f'Collection: {coll.name} not suitable for compound index') - return +def create_compound_indexes(coll: Collection, coll_info: CollectionInfo) -> None: + """Create a coumpound indexes on the given collection.""" t0 = time.time() - index_spec = [(field, pymongo.ASCENDING) for field in field_names] - coll.create_index(index_spec) + indexes_spec = [] + for compound_index in coll_info.compound_indexes: + index_spec = IndexModel([(field, pymongo.ASCENDING) for field in compound_index]) + indexes_spec.append(index_spec) + if len(indexes_spec) > 0: + coll.create_indexes(indexes_spec) t1 = time.time() print(f'createCompoundIndex took {t1 - t0} s.') |