summaryrefslogtreecommitdiff
path: root/jstests/libs/load_ce_test_data.js
blob: 3f8802e9d6d9003a0c911cbabe876c40c7e6c335 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
load("jstests/libs/ce_stats_utils.js");

/**
 * Analyze all fields and create statistics.
 * Create single-field indexes on the fields with indexed flag.
 */
function analyzeAndIndexEnabledFields(db, coll, fields) {
    for (const field of fields) {
        assert.commandWorked(db.runCommand({analyze: coll.getName(), key: field.fieldName}));
        if (field.indexed) {
            assert.commandWorked(coll.createIndex({[field.fieldName]: 1}));
        }
    }
}

/**
 * Load a dataset described in the 'dbMetadata' global variable.
 */
function importDataset(dbName, dataDir, dbMetadata) {
    const testDB = db.getSiblingDB(dbName);
    print("Running mongoimport\n");
    for (const collMetadata of dbMetadata) {
        const collName = collMetadata.collectionName;
        const coll = testDB[collName];
        print(`Importing ${collName}\n`);
        const restore_rc = runProgram('mongoimport',
                                      '--db',
                                      dbName,
                                      '--verbose',
                                      '--host',
                                      'localhost:20000',
                                      '--file',
                                      `${dataDir}${collName}.dat`,
                                      '--drop');
        assert.eq(restore_rc, 0);

        // Create single-field indexes and analyze each field.
        analyzeAndIndexEnabledFields(testDB, coll, collMetadata.fields);
    }
    print("Done mongorestore\n");
}

/**
 * Load a JSON dataset stored as an array of names of data files, where each file contains
 * a variable that holds an object with the properties{collName, collData}.
 * For instance:
 * ce_data_20_1 = {collName: "ce_data_20",
 *                 collData: [{"_id": 0, "uniform_int_0-1000-1": 899, ...}, ...]}
 */
function loadJSONDataset(db, dataSet, dataDir, dbMetadata) {
    assert.commandWorked(
        db.adminCommand({setParameter: 1, internalQueryFrameworkControl: "tryBonsai"}));

    for (const collMetadata of dbMetadata) {
        let coll = db[collMetadata.collectionName];
        coll.drop();
    }

    for (const chunkName of dataSet) {
        let chunkFilePath = `${dataDir}${chunkName}`;
        print(`Loading chunk file: ${chunkFilePath}\n`);
        load(chunkFilePath);
        // At this point there is a variable named as the value of chunkName.
        let coll = eval(`db[${chunkName}.collName]`);
        eval(`assert.commandWorked(coll.insertMany(${chunkName}.collData, {ordered: false}));`);
        // Free the chunk memory after insertion into the DB
        eval(`${chunkName} = null`);
    }

    // TODO: This is better done by the CE-testing script because it knows better what fields to
    // analyze. Create single-field indexes and analyze each field. for (const collMetadata of
    // dbMetadata) {
    //     print(`\nIndexing collection: ${collMetadata.collectionName}`);
    //     coll = db[collMetadata.collectionName];
    //     analyzeAndIndexEnabledFields(db, coll, collMetadata.fields);
    // }
}