1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
t = db.mr_bench;
t.drop();
function getRandomStr(L){
var s= '';
var randomchar=function(){
var n= Math.floor(Math.random()*62);
if(n<10) return n; //1-10
if(n<36) return String.fromCharCode(n+55); //A-Z
return String.fromCharCode(n+61); //a-z
};
while(s.length< L) s+= randomchar();
return s;
}
t.ensureIndex({rand: 1}, {unique: true});
largeStr = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
while (largeStr.length < 512) {
largeStr += largeStr;
}
largeStr = largeStr.substr(512);
for ( i = 0; i < 100000; ++i ) {
t.save({rand: getRandomStr(20), same: "the same string", str: largeStr});
}
emit = printjson;
count = t.count();
function d( x ){
printjson( x );
}
m = function(){
emit(this.rand, {id: this._id, str: this.str});
};
m2 = function(){
emit(this.same, this.rand);
};
r = function(k,vals) {
var tmp = {};
vals.forEach(function(i) {
if(typeof(i) == 'string') {
tmp[i] = true;
} else {
for(var z in i) tmp[z] = true;
}
});
return tmp;
};
// following time limits are passing fine on a laptop with a debug build
// so should always pass in theory unless something is wrong: GC, too much reducing, etc
// 1st MR just uses random unique keys, with no reduce involved
// this should be straightforward for perf, but could lead to OOM if settings are bad
assert.time(
function() {
res = db.runCommand( { mapreduce : "mr_bench" , map : m , reduce : r , out : "mr_bench_out" } );
d( res );
assert.eq( count , res.counts.input , "A" );
x = db[res.result];
assert.eq( count , x.find().count() , "B" );
return 1;
}, "unique key mr", 15000);
// 2nd MR emits the same key, and a unique value is added as key to same object
// if object is kept in ram and being reduced, this can be really slow
assert.time(
function() {
res = db.runCommand( { mapreduce : "mr_bench" , map : m2 , reduce : r , out : "mr_bench_out" } );
d( res );
assert.eq( count , res.counts.input , "A" );
x = db[res.result];
assert.eq( 1 , x.find().count() , "B" );
return 1;
}, "single key mr", 20000);
|