summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Ignatyev <alexander.ignatyev@mongodb.com>2022-08-30 08:14:49 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-08-30 09:32:51 +0000
commit8c0bbf5b254db460344293eca72fb4b2422dc65b (patch)
treec3221f43aaeb07e5e55d8b3576c5c6433e22b018
parent3724e774d67d6ffb3bf1c2f52f7451eac38b5fd9 (diff)
downloadmongo-8c0bbf5b254db460344293eca72fb4b2422dc65b.tar.gz
SERVER-69089: Add ability to define indexes in config
-rw-r--r--buildscripts/cost_model/config.json28
-rw-r--r--buildscripts/cost_model/config.py9
-rw-r--r--buildscripts/cost_model/data_generator.py31
3 files changed, 46 insertions, 22 deletions
diff --git a/buildscripts/cost_model/config.json b/buildscripts/cost_model/config.json
index cf16b3e5ecb..c4e3c51f49e 100644
--- a/buildscripts/cost_model/config.json
+++ b/buildscripts/cost_model/config.json
@@ -31,12 +31,14 @@
{
"name": "choice1",
"type": "str",
- "distribution": "string_choice"
+ "distribution": "string_choice",
+ "indexed": true
},
{
"name": "mixed1",
"type": "str",
- "distribution": "string_mixed"
+ "distribution": "string_mixed",
+ "indexed": true
},
{
"name": "uniform1",
@@ -53,6 +55,16 @@
"type": "str",
"distribution": "string_mixed"
}
+ ],
+ "compoundIndexes": [
+ [
+ "choice1",
+ "mixed1"
+ ],
+ [
+ "uniform1",
+ "mixed2"
+ ]
]
},
{
@@ -61,7 +73,8 @@
{
"name": "choice1",
"type": "str",
- "distribution": "string_choice"
+ "distribution": "string_choice",
+ "indexed": true
},
{
"name": "mixed1",
@@ -76,7 +89,8 @@
{
"name": "choice2",
"type": "str",
- "distribution": "string_choice"
+ "distribution": "string_choice",
+ "indexed": true
},
{
"name": "mixed2",
@@ -91,7 +105,8 @@
{
"name": "choice3",
"type": "str",
- "distribution": "string_choice"
+ "distribution": "string_choice",
+ "indexed": true
},
{
"name": "mixed3",
@@ -106,7 +121,8 @@
{
"name": "choice4",
"type": "str",
- "distribution": "string_choice"
+ "distribution": "string_choice",
+ "indexed": true
}
]
}
diff --git a/buildscripts/cost_model/config.py b/buildscripts/cost_model/config.py
index af9e9607c52..b56d38d233e 100644
--- a/buildscripts/cost_model/config.py
+++ b/buildscripts/cost_model/config.py
@@ -151,13 +151,15 @@ class CollectionTemplate:
name: str
fields: Sequence[FieldTemplate]
+ compound_indexes: Sequence[Sequence[str]]
@staticmethod
def create(json_config: dict[str, any]) -> CollectionTemplate:
"""Create new template object from JSON."""
name = json_config['name']
fields = [FieldTemplate.create(jc) for jc in json_config['fields']]
- return CollectionTemplate(name=name, fields=fields)
+ compound_indexes = json_config.get('compoundIndexes', [])
+ return CollectionTemplate(name=name, fields=fields, compound_indexes=compound_indexes)
@dataclass
@@ -167,6 +169,7 @@ class FieldTemplate:
name: str
data_type: DataType
distribution: str
+ indexed: bool
@staticmethod
def create(json_config: dict[str, any]) -> FieldTemplate:
@@ -174,7 +177,9 @@ class FieldTemplate:
name = json_config['name']
data_type = DataType.parse(json_config['type'], 'type')
distribution = json_config['distribution']
- return FieldTemplate(name=name, data_type=data_type, distribution=distribution)
+ indexed = json_config.get('indexed', False)
+ return FieldTemplate(name=name, data_type=data_type, distribution=distribution,
+ indexed=indexed)
class DataType(Enum):
diff --git a/buildscripts/cost_model/data_generator.py b/buildscripts/cost_model/data_generator.py
index d984d6b298d..5f4601f5fb1 100644
--- a/buildscripts/cost_model/data_generator.py
+++ b/buildscripts/cost_model/data_generator.py
@@ -52,6 +52,7 @@ class FieldInfo:
name: str
type: DataType
distribution: RandomDistribution
+ indexed: bool
@dataclass
@@ -61,6 +62,7 @@ class CollectionInfo:
name: str
fields: Sequence[FieldInfo]
documents_count: int
+ compound_indexes: Sequence[Sequence[str]]
class DataGenerator:
@@ -95,7 +97,7 @@ class DataGenerator:
coll.drop()
self._populate_collection(coll, coll_info)
create_single_field_indexes(coll, coll_info.fields)
- create_compound_index(coll, coll_info.fields)
+ create_compound_indexes(coll, coll_info)
t1 = time.time()
print(f'\npopulate Collections took {t1-t0} s.')
@@ -104,12 +106,13 @@ class DataGenerator:
for coll_template in self.config.collection_templates:
fields = [
FieldInfo(name=ft.name, type=ft.data_type,
- distribution=distributions[ft.distribution])
+ distribution=distributions[ft.distribution], indexed=ft.indexed)
for ft in coll_template.fields
]
for doc_count in self.config.collection_cardinalities:
name = f'{coll_template.name}_{doc_count}'
- yield CollectionInfo(name=name, fields=fields, documents_count=doc_count)
+ yield CollectionInfo(name=name, fields=fields, documents_count=doc_count,
+ compound_indexes=coll_template.compound_indexes)
@timer_decorator
def _populate_collection(self, coll: Collection, coll_info: CollectionInfo) -> None:
@@ -143,25 +146,25 @@ def create_single_field_indexes(coll: Collection, fields: Sequence[FieldInfo]) -
t0 = time.time()
- indexes = [IndexModel([(field.name, pymongo.ASCENDING)]) for field in fields]
- coll.create_indexes(indexes)
+ indexes = [IndexModel([(field.name, pymongo.ASCENDING)]) for field in fields if field.indexed]
+ if len(indexes) > 0:
+ coll.create_indexes(indexes)
t1 = time.time()
print(f'createSingleFieldIndexes took {t1 - t0} s.')
-def create_compound_index(coll: Collection, fields: Sequence[FieldInfo]) -> None:
- """Create a coumpound index on the given collection."""
-
- field_names = [fi.name for fi in fields if fi.type != DataType.ARRAY]
- if len(field_names) < 2:
- print(f'Collection: {coll.name} not suitable for compound index')
- return
+def create_compound_indexes(coll: Collection, coll_info: CollectionInfo) -> None:
+ """Create a coumpound indexes on the given collection."""
t0 = time.time()
- index_spec = [(field, pymongo.ASCENDING) for field in field_names]
- coll.create_index(index_spec)
+ indexes_spec = []
+ for compound_index in coll_info.compound_indexes:
+ index_spec = IndexModel([(field, pymongo.ASCENDING) for field in compound_index])
+ indexes_spec.append(index_spec)
+ if len(indexes_spec) > 0:
+ coll.create_indexes(indexes_spec)
t1 = time.time()
print(f'createCompoundIndex took {t1 - t0} s.')