1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
|
t = db.mr_bench;
t.drop();
function getRandomStr(L) {
var s = '';
var randomchar = function() {
var n = Math.floor(Math.random() * 62);
if (n < 10)
return n; // 1-10
if (n < 36)
return String.fromCharCode(n + 55); // A-Z
return String.fromCharCode(n + 61); // a-z
};
while (s.length < L)
s += randomchar();
return s;
}
t.ensureIndex({rand: 1}, {unique: true});
largeStr = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
while (largeStr.length < 512) {
largeStr += largeStr;
}
largeStr = largeStr.substr(512);
for (i = 0; i < 100000; ++i) {
t.save({rand: getRandomStr(20), same: "the same string", str: largeStr});
}
emit = printjson;
count = t.count();
function d(x) {
printjson(x);
}
m = function() {
emit(this.rand, {id: this._id, str: this.str});
};
m2 = function() {
emit(this.same, this.rand);
};
r = function(k, vals) {
var tmp = {};
vals.forEach(function(i) {
if (typeof(i) == 'string') {
tmp[i] = true;
} else {
for (var z in i)
tmp[z] = true;
}
});
return tmp;
};
// following time limits are passing fine on a laptop with a debug build
// so should always pass in theory unless something is wrong: GC, too much reducing, etc
// 1st MR just uses random unique keys, with no reduce involved
// this should be straightforward for perf, but could lead to OOM if settings are bad
assert.time(function() {
res = db.runCommand({mapreduce: "mr_bench", map: m, reduce: r, out: "mr_bench_out"});
d(res);
assert.eq(count, res.counts.input, "A");
x = db[res.result];
assert.eq(count, x.find().count(), "B");
return 1;
}, "unique key mr", 15000);
// 2nd MR emits the same key, and a unique value is added as key to same object
// if object is kept in ram and being reduced, this can be really slow
assert.time(function() {
res = db.runCommand({mapreduce: "mr_bench", map: m2, reduce: r, out: "mr_bench_out"});
d(res);
assert.eq(count, res.counts.input, "A");
x = db[res.result];
assert.eq(1, x.find().count(), "B");
return 1;
}, "single key mr", 20000);
|