summaryrefslogtreecommitdiff
path: root/jstests/perf/mr_bench.js
blob: 38ef57f0835969a45d943816f4e6871fc8017212 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

t = db.mr_bench;
t.drop();

function getRandomStr(L){
    var s= '';
    var randomchar=function(){
        var n= Math.floor(Math.random()*62);
        if(n<10) return n; //1-10
        if(n<36) return String.fromCharCode(n+55); //A-Z
        return String.fromCharCode(n+61); //a-z
    };
    while(s.length< L) s+= randomchar();
    return s;
}

t.ensureIndex({rand: 1}, {unique: true});

largeStr = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
while (largeStr.length < 512) {
    largeStr += largeStr;
}
largeStr = largeStr.substr(512);

for ( i = 0; i < 100000; ++i ) {
    t.save({rand: getRandomStr(20), same: "the same string", str: largeStr});
}

emit = printjson;
count = t.count();

function d( x ){
    printjson( x );
}

m = function(){
    emit(this.rand, {id: this._id, str: this.str});
};

m2 = function(){
    emit(this.same, this.rand);
};

r = function(k,vals) {
     var tmp = {};
     vals.forEach(function(i) {
        if(typeof(i) == 'string') {
          tmp[i] = true;
        } else {
          for(var z in i) tmp[z] = true;
        }
     });

     return tmp;
};

// following time limits are passing fine on a laptop with a debug build
// so should always pass in theory unless something is wrong: GC, too much reducing, etc

// 1st MR just uses random unique keys, with no reduce involved
// this should be straightforward for perf, but could lead to OOM if settings are bad
assert.time(
function() { 
res = db.runCommand( { mapreduce : "mr_bench" , map : m , reduce : r , out : "mr_bench_out" } );
d( res );
assert.eq( count , res.counts.input , "A" );
x = db[res.result];
assert.eq( count , x.find().count() , "B" );
return 1;
}, "unique key mr", 15000);

// 2nd MR emits the same key, and a unique value is added as key to same object
// if object is kept in ram and being reduced, this can be really slow
assert.time(
function() { 
res = db.runCommand( { mapreduce : "mr_bench" , map : m2 , reduce : r , out : "mr_bench_out" } );
d( res );
assert.eq( count , res.counts.input , "A" );
x = db[res.result];
assert.eq( 1 , x.find().count() , "B" );
return 1;
}, "single key mr", 20000);