From c1f5f78e39501d1bfd05c916bc8838ea604d001f Mon Sep 17 00:00:00 2001 From: Charlie Swanson Date: Mon, 19 Apr 2021 09:44:29 -0400 Subject: Add performance test scripts --- data_loader.js | 142 ++++++++++++++++++++++++++++++++++++++++++++ skunk_2021_perf.js | 169 +++++++++++++++++++++++++++++++++++++++++++++++++++++ skunk_shared.js | 131 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 442 insertions(+) create mode 100644 data_loader.js create mode 100644 skunk_2021_perf.js create mode 100644 skunk_shared.js diff --git a/data_loader.js b/data_loader.js new file mode 100644 index 00000000000..51536467b38 --- /dev/null +++ b/data_loader.js @@ -0,0 +1,142 @@ +(function() { +"use strict"; + +load("skunk_shared.js"); // For 'SharedSkunkState.' + +const collNaive = db.naive; +const collAttributePattern = db.attribute_pattern; +const collEnhancedAttributePattern = db.enhanced_attribute_pattern; +const collSingleWildCard = db.wildcard; +const collCompoundWildCard = db.compound_wildcard; + +const kIndexesForNaive = [ + {"field1": 1, "field2": 1, "field3": 1, "attributes.attr1": 1}, + {"field1": 1, "field2": 1, "field3": 1, "attributes.attr2": 1}, + {"field1": 1, "field2": 1, "field3": 1, "attributes.attr3": 1}, + {"field1": 1, "field2": 1, "field3": 1, "attributes.attr4": 1}, + {"field1": 1, "field2": 1, "field3": 1, "attributes.attr5": 1}, + {"field1": 1, "field2": 1, "field3": 1, "attributes.attr6": 1}, + {"field1": 1, "field2": 1, "field3": 1, "attributes.attr7": 1}, + {"field1": 1, "field2": 1, "field3": 1, "attributes.attr8": 1}, + {"field1": 1, "field2": 1, "field3": 1, "attributes.attr9": 1}, + {"field1": 1, "field2": 1, "field3": 1, "attributes.attr10": 1} +]; + +const kIndexesForAttributes = + [{"field1": 1, "field2": 1, "field3": 1, "attributes.k": 1, "attributes.v": 1}]; +const kIndexesForEnhancedAttributes = [{"field1": 1, "field2": 1, "field3": 1, "attributes": 1}]; + +const kIndexesForWildcard = [{"attributes.$**": 1}]; +const kIndexesForCompoundWildcard = [{field1: 1, field2: 1, field3: 1, "attributes.$**": 1}]; + +/** + * Generates a document based on a template. It will take a template document, it will take + * field names and data types. + * + * 'template': + * The document must have a list of scalar fields, the name and data types of these fields will + * be the same in the output document with random values. The document must have a field called + * attribute that should be a subdocument and all its fields should be scalar (no arrays, no + * subdocuments). + * + * If 'add_id' is true, generates a new ObjectId for the _id. + * + * Returns an object with random values based on the template + */ +function getBaseDocument(template, add_id) { + let output = {} + // iterates thru each field + for (let [key, value] of Object.entries(template)) { + if (key != SharedSkunkState.kAttributesField) { + // If the field is not attribute get a random value based on the current type + output[key] = SharedSkunkState.getRandomValue(value) + } else { + // If this is the attribute field it needs to be a subdocument or sub-array + if (value instanceof Array) { + let attributes = []; + for (let entry of value) { + attributes.push({k: entry.k, v: SharedSkunkState.getRandomValue(entry.v)}); + } + output[key] = attributes; + } else { + let attributes = {}; + for (let [attrKey, attrValue] of Object.entries(value)) { + attributes[attrKey] = SharedSkunkState.getRandomValue(attrValue); + } + output[key] = attributes; + } + } + // overrides _id + if (add_id) { + output["_id"] = new ObjectId(); + } + } + return output; +} + +function loadData(collection, indexes, template) { + const kNumDocs = 100 * 1000; + jsTestLog("Building indexes: " + tojson(indexes)); + for (let index of indexes) { + collection.createIndex(index); + } + jsTestLog("Building bulk op for insert... Example doc: " + tojson(getBaseDocument(template))); + const bulkOp = collection.initializeUnorderedBulkOp(); + for (let docId = 0; docId < kNumDocs; ++docId) { + bulkOp.insert(getBaseDocument(template)); + } + jsTestLog("Starting clock for insert..."); + let elapsed = Date.timeFunc(() => bulkOp.execute()); + jsTestLog(`Loading data done: ${elapsed}ms`); + const indexStats = collection.aggregate([{$collStats: {storageStats: {scale: 1024 * 1024}}}]) + .toArray()[0] + .storageStats.indexSizes; + jsTestLog(`Index stats: ${tojson(indexStats)}`); + return [elapsed, indexStats]; +} + +let allStats = {}; +jsTestLog("Loading data for naive configuration..."); +let [elapsed, indexStats] = loadData(collNaive, kIndexesForNaive, SharedSkunkState.kTemplateDoc); +allStats.naive = { + loadingTime: elapsed, + indexStats: indexStats, +}; + +jsTestLog("Loading data for attribute configuration..."); +[elapsed, indexStats] = + loadData(collAttributePattern, kIndexesForAttributes, SharedSkunkState.kAttributeTemplateDoc); +allStats.attributePattern = { + loadingTime: elapsed, + indexStats: indexStats, +}; + +jsTestLog("Loading data for enhanced attribute configuration..."); +[elapsed, indexStats] = loadData(collEnhancedAttributePattern, + kIndexesForEnhancedAttributes, + SharedSkunkState.kAttributeTemplateDoc); +allStats.enhancedAttributePattern = { + loadingTime: elapsed, + indexStats: indexStats +}; + +/* +jsTestLog("Loading data for wildcard configuration..."); +[elapsed, indexStats] = + loadData(collSingleWildCard, kIndexesForWildcard, SharedSkunkState.kTemplateDoc); +allStats.singleWildcard = { + loadingTime: elapsed, + indexStats: indexStats, +}; +*/ + +jsTestLog("Loading data for compound wildcard configuration..."); +[elapsed, indexStats] = + loadData(collCompoundWildCard, kIndexesForCompoundWildcard, SharedSkunkState.kTemplateDoc); +allStats.compoundWildcard = { + loadingTime: elapsed, + indexStats: indexStats, +}; + +jsTestLog("Finished! " + tojson(allStats)); +}()); diff --git a/skunk_2021_perf.js b/skunk_2021_perf.js new file mode 100644 index 00000000000..ab683086466 --- /dev/null +++ b/skunk_2021_perf.js @@ -0,0 +1,169 @@ +(function() { +"use strict"; + +load("skunk_shared.js"); // For 'SharedSkunkState.' + +const collNaive = db.naive; +const collAttributePattern = db.attribute_pattern; +const collEnhancedAttributePattern = db.enhanced_attribute_pattern; +const collSingleWildCard = db.wildcard; +const collCompoundWildCard = db.compound_wildcard; + +// Maximum value for the integer fields, minimum is 0 +const kMaxInt = 20; + +// Minimum and maximum values for the DateTime fields +const kBaseDate = new ISODate("1970-01-01T00:00:00Z"); +const kMaxDate = new ISODate("2070-01-01T00:00:00Z"); +const kMaximumSeconds = (kMaxDate.getTime() - kBaseDate.getTime()) / 1000; + +function addAttributesToQuery(andClauses, attributes, numAttrs, useEnhancedAttributePattern) { + // If this is the attribute field it needs to be a subdocument or sub-array + if (attributes instanceof Array) { + assert.lte(numAttrs, attributes.length); + // Be sure to avoid selecting 'numAttrs' with replacement. If we allow specifying the same + // attribute twice, we are likely to specify a query which is tautalogically false such as + // attr1 = 4 and attr1 = 7. + attributes = attributes.slice(); // Make a copy to avoid modifying the template. + for (let i = 0; i < numAttrs; i++) { + let selectedEntryIndex = Random.randInt(attributes.length); + let attrEntry = attributes[selectedEntryIndex]; + attributes.splice(selectedEntryIndex, 1); // Take it out to avoid selecting it again. + if (useEnhancedAttributePattern) { + andClauses.push({ + [SharedSkunkState.kAttributesField]: + {k: attrEntry.k, v: SharedSkunkState.getRandomValue(attrEntry.v)} + }); + } else { + andClauses.push({ + [SharedSkunkState.kAttributesField]: { + $elemMatch: + {k: attrEntry.k, v: SharedSkunkState.getRandomValue(attrEntry.v)} + } + }); + } + } + } else { + let attrEntries = Object.entries(attributes); + assert.lte(numAttrs, attrEntries.length); + // Be sure to avoid selecting 'numAttrs' with replacement. If we allow specifying the same + // attribute twice, we are likely to specify a query which is tautalogically false such as + // attr1 = 4 and attr1 = 7. + for (let i = 0; i < numAttrs; i++) { + let selectedEntryIndex = Random.randInt(attrEntries.length); + let [attrKey, attrVal] = attrEntries[selectedEntryIndex]; + attrEntries.splice(selectedEntryIndex, 1); // Take it out to avoid selecting it again. + andClauses.push({ + [`${SharedSkunkState.kAttributesField}.${attrKey}`]: + SharedSkunkState.getRandomValue(attrVal) + }); + } + } +} + +// Builds a compound equality query based off the template doc querying 'numField' top level fields +// and 'numAttr' attributes stored within 'SharedSkunkState.kAttributesField'. The fields at the top +// level matter for which index may be applicable, so they will be applied in order given in the +// template document. The attributes will be added randomly - so a query on two attributes may be on +// the 4th and 10th attribute for example. +function buildQuery(template, numFields, numAttrs, useEnhancedAttributePattern) { + let andClauses = []; + let fieldsAdded = 0; + for (let [topKey, topValue] of Object.entries(template)) { + if (fieldsAdded == numFields) { + break; + } + assert( + topKey != SharedSkunkState.kAttributesField, + "Added too many top-level fields. Ran out of non-attribute fields in the template (expected non-attribute fields to come first)."); + andClauses.push({[topKey]: SharedSkunkState.getRandomValue(topValue)}); + ++fieldsAdded; + } + const attributes = template[SharedSkunkState.kAttributesField]; + addAttributesToQuery(andClauses, attributes, numAttrs, useEnhancedAttributePattern); + return {$and: andClauses}; +} + +function avgTime(func, runs) { + let a = []; + runs = runs || 10; + + for (var i = 0; i < runs; i++) { + a.push(Date.timeFunc(func)) + } + + let out = {avg: Array.avg(a), stdDev: Array.stdDev(a)}; + out.sampStdDev = Math.sqrt((1 / (runs - 1)) * (out.stdDev * out.stdDev)); + return out; +} + +function buildBatchOfQueries(template, numFields, numAttrs, useEnhancedAttributePattern) { + const kNumUniqueQueries = 1000; + + let allQueries = []; + for (let i = 0; i < kNumUniqueQueries; i++) { + allQueries.push(buildQuery(template, numFields, numAttrs, useEnhancedAttributePattern)); + } + return allQueries; +} + +function runQueries(collection, queryBatch) { + let numResults = []; + for (let query of queryBatch) { + numResults.push(collection.find(query).itcount()); + } + print("Average number of results: " + Array.avg(numResults)); +} + +function testAllNumAttrs(collection, templateDoc, maxNumAttrs, useEnhancedAttributePattern) { + let allTimingInfo = []; + for (let numAttrs = 1; numAttrs <= maxNumAttrs; ++numAttrs) { + let queries = buildBatchOfQueries(templateDoc, 3, numAttrs, useEnhancedAttributePattern); + jsTestLog("Example query: " + tojson(queries[0])); + jsTestLog(`About to benchmark with ${numAttrs} attributes...`); + let timingInfo = avgTime(() => runQueries(collection, queries), 5); + jsTestLog("Avg time: " + tojson(timingInfo)); + allTimingInfo.push(timingInfo); + } + return allTimingInfo; +} + +const kMaxNumAttrs = 5; +let allStats = {}; +jsTestLog("Testing compound wildcard configuration..."); +let allTimingInfo = + testAllNumAttrs(collCompoundWildCard, SharedSkunkState.kTemplateDoc, kMaxNumAttrs); +allStats.compoundWildcard = { + timingInfo: allTimingInfo +}; + +jsTestLog("Testing enhanced attribute configuration..."); +allTimingInfo = testAllNumAttrs( + collEnhancedAttributePattern, SharedSkunkState.kAttributeTemplateDoc, kMaxNumAttrs, true); +allStats.enhancedAttributePattern = { + timingInfo: allTimingInfo +}; + +jsTestLog("Testing attribute configuration..."); +allTimingInfo = + testAllNumAttrs(collAttributePattern, SharedSkunkState.kAttributeTemplateDoc, kMaxNumAttrs); +allStats.attributePattern = { + timingInfo: allTimingInfo +}; + +jsTestLog("Testing naive configuration..."); +allTimingInfo = testAllNumAttrs(collNaive, SharedSkunkState.kTemplateDoc, kMaxNumAttrs); +allStats.naive = { + timingInfo: allTimingInfo +}; + +/* +jsTestLog("Testing wildcard configuration..."); +allTimingInfo = testAllNumAttrs(collSingleWildCard, SharedSkunkState.kTemplateDoc, kMaxNumAttrs); +allStats.singleWildcard = { + timingInfo: allTimingInfo +}; +*/ + +jsTestLog("Finished! " + tojson(allStats)); +}()); diff --git a/skunk_shared.js b/skunk_shared.js new file mode 100644 index 00000000000..a6c7958ea8d --- /dev/null +++ b/skunk_shared.js @@ -0,0 +1,131 @@ +"use strict"; + +const SharedSkunkState = (function() { + // String Catalog + const kStringCatalog = [ + "foo", + "bar", + "baz", + "qux", + "quux", + "corge", + "grault", + "garply", + "waldo", + "fred", + "plugh", + "xyzzy", + "thud" + ]; + + // Maximum value for the integer fields, minimum is 0 + const kMaxInt = 20; + + const kTemplateDoc = { + field1: "", + field2: "", + field3: "", + field4: 1, + attributes: { + attr1: "", + attr2: "", + attr3: 1, + attr4: 1, + attr5: 1, + attr6: 1, + attr7: 1, + attr8: "", + attr9: "", + attr10: "" + } + }; + + const kAttributeTemplateDoc = { + field1: "", + field2: "", + field3: "", + field4: 1, + attributes: [ + {k: "attr1", v: ""}, + {k: "attr2", v: ""}, + {k: "attr3", v: 1}, + {k: "attr4", v: 1}, + {k: "attr5", v: 1}, + {k: "attr6", v: 1}, + {k: "attr7", v: 1}, + {k: "attr8", v: ""}, + {k: "attr9", v: ""}, + {k: "attr10", v: ""}, + ] + }; + + const kEqualityQueryTemplate = { + "field1": "", + "field2": "", + "field3": "", + "attributes": { + "attr1": "", + "attr2": "", + "attr3": 1, + "attr4": 1, + "attr5": 1, + "attr6": 1, + "attr7": 1, + "attr8": "", + "attr9": "", + "attr10": "" + } + }; + + // name of the attrbiute field + const kAttributesField = "attributes"; + + const kAttributesToQuery = 10; + + /** + * Generates a random dictated by the type of 'exampleValue'. + */ + function getRandomValue(exampleValue) { + switch (typeof exampleValue) { + case "number": + return Random.randInt(kMaxInt); + + case "boolean": + return Random.randInt() % 2 == 0; + + case "object": + if (exampleValue == null) { + return null + } + if (exampleValue instanceof Date) { + return new Date(kBaseDate.getTime() + (Random.rand() * kMaximumSeconds)); + } + throw Error("Unknown type"); + + case "string": + return kStringCatalog[Random.randInt(kStringCatalog.length)]; + } + throw Error("Unknown type"); + } + + // Randomness generator + Random.setRandomSeed(); + return { + getRandomValue: getRandomValue, + kTemplateDoc: kTemplateDoc, + kAttributeTemplateDoc: kAttributeTemplateDoc, + kAttributesField: kAttributesField, + }; +})(); + +/* +let query = { + "$and": [ + {"field1": "qux"}, + {"field2": "waldo"}, + {"field3": "baz"}, + {"attributes": {"$elemMatch": {"k": "attr4", "v": 5}}}, + {"attributes": {"$elemMatch": {"k": "attr7", "v": 13}}} + ] +}; +*/ -- cgit v1.2.1