diff options
author | Timour Katchaounov <timour.katchaounov@mongodb.com> | 2023-02-15 15:01:33 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-02-15 16:32:12 +0000 |
commit | 5af5e4a1e8bc6930f837776cb533333d053a32f1 (patch) | |
tree | ce130cdfab062ee2c47856e1050df62cf57ab747 /buildscripts/cost_model/ce_generate_data.py | |
parent | 9962320d74254ec0517ba4f478386f7f0bcb1f9e (diff) | |
download | mongo-5af5e4a1e8bc6930f837776cb533333d053a32f1.tar.gz |
SERVER-73031 Generate random data with mixed data types
* Added generation of random data with mixed data types
* Generation of random dates and doubles
* Some refactoring of the python generation framework wrt types
Diffstat (limited to 'buildscripts/cost_model/ce_generate_data.py')
-rw-r--r-- | buildscripts/cost_model/ce_generate_data.py | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/buildscripts/cost_model/ce_generate_data.py b/buildscripts/cost_model/ce_generate_data.py index f9aea10e1a7..01346ff639a 100644 --- a/buildscripts/cost_model/ce_generate_data.py +++ b/buildscripts/cost_model/ce_generate_data.py @@ -29,6 +29,7 @@ import asyncio import dataclasses +from datetime import datetime import json import math import os @@ -38,10 +39,11 @@ from pathlib import Path import seaborn as sns import bson import matplotlib.pyplot as plt -from config import CollectionTemplate, FieldTemplate, DataType +from config import CollectionTemplate, FieldTemplate from data_generator import CollectionInfo, DataGenerator from database_instance import DatabaseInstance import parameters_extractor +from random_generator import DataType from ce_data_settings import database_config, data_generator_config __all__ = [] @@ -73,6 +75,8 @@ class OidEncoder(json.JSONEncoder): # Replace the OID with a consequtive int number as needed by the query generator OidEncoder.cur_oid += 1 return OidEncoder.cur_oid + if isinstance(o, datetime): + return str(o) return super(OidEncoder, self).default(o) @@ -107,6 +111,8 @@ async def generate_histograms(coll_template, coll, dump_path): doc_count = await coll.count_documents({}) for field in coll_template.fields: field_data = [] + if re.match('^mixeddata_.*', field.name): + continue async for doc in coll.find({field.name: {"$exists": True}}, {"_id": 0, field.name: 1}): field_val = doc[field.name] if isinstance(field_val, str): |