summaryrefslogtreecommitdiff
path: root/jstests/perf/mr_bench.js
blob: c141e112163dd0ce7837bc5bba85c1a36acc8f42 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84

t = db.mr_bench;
t.drop();

function getRandomStr(L) {
    var s = '';
    var randomchar = function() {
        var n = Math.floor(Math.random() * 62);
        if (n < 10)
            return n;  // 1-10
        if (n < 36)
            return String.fromCharCode(n + 55);  // A-Z
        return String.fromCharCode(n + 61);      // a-z
    };
    while (s.length < L)
        s += randomchar();
    return s;
}

t.ensureIndex({rand: 1}, {unique: true});

largeStr = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
while (largeStr.length < 512) {
    largeStr += largeStr;
}
largeStr = largeStr.substr(512);

for (i = 0; i < 100000; ++i) {
    t.save({rand: getRandomStr(20), same: "the same string", str: largeStr});
}

emit = printjson;
count = t.count();

function d(x) {
    printjson(x);
}

m = function() {
    emit(this.rand, {id: this._id, str: this.str});
};

m2 = function() {
    emit(this.same, this.rand);
};

r = function(k, vals) {
    var tmp = {};
    vals.forEach(function(i) {
        if (typeof (i) == 'string') {
            tmp[i] = true;
        } else {
            for (var z in i)
                tmp[z] = true;
        }
    });

    return tmp;
};

// following time limits are passing fine on a laptop with a debug build
// so should always pass in theory unless something is wrong: GC, too much reducing, etc

// 1st MR just uses random unique keys, with no reduce involved
// this should be straightforward for perf, but could lead to OOM if settings are bad
assert.time(function() {
    res = db.runCommand({mapreduce: "mr_bench", map: m, reduce: r, out: "mr_bench_out"});
    d(res);
    assert.eq(count, res.counts.input, "A");
    x = db[res.result];
    assert.eq(count, x.find().count(), "B");
    return 1;
}, "unique key mr", 15000);

// 2nd MR emits the same key, and a unique value is added as key to same object
// if object is kept in ram and being reduced, this can be really slow
assert.time(function() {
    res = db.runCommand({mapreduce: "mr_bench", map: m2, reduce: r, out: "mr_bench_out"});
    d(res);
    assert.eq(count, res.counts.input, "A");
    x = db[res.result];
    assert.eq(1, x.find().count(), "B");
    return 1;
}, "single key mr", 20000);