summaryrefslogtreecommitdiff
path: root/buildscripts/cost_model/ce_generate_data.py
diff options
context:
space:
mode:
authorTimour Katchaounov <timour.katchaounov@mongodb.com>2023-02-15 15:01:33 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-02-15 16:32:12 +0000
commit5af5e4a1e8bc6930f837776cb533333d053a32f1 (patch)
treece130cdfab062ee2c47856e1050df62cf57ab747 /buildscripts/cost_model/ce_generate_data.py
parent9962320d74254ec0517ba4f478386f7f0bcb1f9e (diff)
downloadmongo-5af5e4a1e8bc6930f837776cb533333d053a32f1.tar.gz
SERVER-73031 Generate random data with mixed data types
* Added generation of random data with mixed data types * Generation of random dates and doubles * Some refactoring of the python generation framework wrt types
Diffstat (limited to 'buildscripts/cost_model/ce_generate_data.py')
-rw-r--r--buildscripts/cost_model/ce_generate_data.py8
1 files changed, 7 insertions, 1 deletions
diff --git a/buildscripts/cost_model/ce_generate_data.py b/buildscripts/cost_model/ce_generate_data.py
index f9aea10e1a7..01346ff639a 100644
--- a/buildscripts/cost_model/ce_generate_data.py
+++ b/buildscripts/cost_model/ce_generate_data.py
@@ -29,6 +29,7 @@
import asyncio
import dataclasses
+from datetime import datetime
import json
import math
import os
@@ -38,10 +39,11 @@ from pathlib import Path
import seaborn as sns
import bson
import matplotlib.pyplot as plt
-from config import CollectionTemplate, FieldTemplate, DataType
+from config import CollectionTemplate, FieldTemplate
from data_generator import CollectionInfo, DataGenerator
from database_instance import DatabaseInstance
import parameters_extractor
+from random_generator import DataType
from ce_data_settings import database_config, data_generator_config
__all__ = []
@@ -73,6 +75,8 @@ class OidEncoder(json.JSONEncoder):
# Replace the OID with a consequtive int number as needed by the query generator
OidEncoder.cur_oid += 1
return OidEncoder.cur_oid
+ if isinstance(o, datetime):
+ return str(o)
return super(OidEncoder, self).default(o)
@@ -107,6 +111,8 @@ async def generate_histograms(coll_template, coll, dump_path):
doc_count = await coll.count_documents({})
for field in coll_template.fields:
field_data = []
+ if re.match('^mixeddata_.*', field.name):
+ continue
async for doc in coll.find({field.name: {"$exists": True}}, {"_id": 0, field.name: 1}):
field_val = doc[field.name]
if isinstance(field_val, str):