t = db.mr_bench; t.drop(); function getRandomStr(L) { var s = ''; var randomchar = function() { var n = Math.floor(Math.random() * 62); if (n < 10) return n; // 1-10 if (n < 36) return String.fromCharCode(n + 55); // A-Z return String.fromCharCode(n + 61); // a-z }; while (s.length < L) s += randomchar(); return s; } t.ensureIndex({rand: 1}, {unique: true}); largeStr = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; while (largeStr.length < 512) { largeStr += largeStr; } largeStr = largeStr.substr(512); for (i = 0; i < 100000; ++i) { t.save({rand: getRandomStr(20), same: "the same string", str: largeStr}); } emit = printjson; count = t.count(); function d(x) { printjson(x); } m = function() { emit(this.rand, {id: this._id, str: this.str}); }; m2 = function() { emit(this.same, this.rand); }; r = function(k, vals) { var tmp = {}; vals.forEach(function(i) { if (typeof(i) == 'string') { tmp[i] = true; } else { for (var z in i) tmp[z] = true; } }); return tmp; }; // following time limits are passing fine on a laptop with a debug build // so should always pass in theory unless something is wrong: GC, too much reducing, etc // 1st MR just uses random unique keys, with no reduce involved // this should be straightforward for perf, but could lead to OOM if settings are bad assert.time(function() { res = db.runCommand({mapreduce: "mr_bench", map: m, reduce: r, out: "mr_bench_out"}); d(res); assert.eq(count, res.counts.input, "A"); x = db[res.result]; assert.eq(count, x.find().count(), "B"); return 1; }, "unique key mr", 15000); // 2nd MR emits the same key, and a unique value is added as key to same object // if object is kept in ram and being reduced, this can be really slow assert.time(function() { res = db.runCommand({mapreduce: "mr_bench", map: m2, reduce: r, out: "mr_bench_out"}); d(res); assert.eq(count, res.counts.input, "A"); x = db[res.result]; assert.eq(1, x.find().count(), "B"); return 1; }, "single key mr", 20000);