summaryrefslogtreecommitdiff
path: root/db
diff options
context:
space:
mode:
authoragirbal <antoine@10gen.com>2011-01-25 14:41:16 -0800
committeragirbal <antoine@10gen.com>2011-01-25 15:20:07 -0800
commit34f8e800ce518a3cedee2f2a61cafc7e919e0bb1 (patch)
tree69a14e081e47f89baf5364827a38de6fcd94a461 /db
parent1aabdc3d74d83759d40f4b290fc89aa653c494d5 (diff)
downloadmongo-34f8e800ce518a3cedee2f2a61cafc7e919e0bb1.tar.gz
[CACHE-2398]: for inline mapreduce, all emitted objects are kept in RAM before the 1st reduce, potential high memory usage
Diffstat (limited to 'db')
-rw-r--r--db/commands/mr.cpp29
-rw-r--r--db/commands/mr.h3
2 files changed, 18 insertions, 14 deletions
diff --git a/db/commands/mr.cpp b/db/commands/mr.cpp
index 532c430a5a7..5fbe2434f1e 100644
--- a/db/commands/mr.cpp
+++ b/db/commands/mr.cpp
@@ -652,6 +652,7 @@ namespace mongo {
InMemory * n = new InMemory(); // for new data
long nSize = 0;
+ long dupCount = 0;
for ( InMemory::iterator i=_temp->begin(); i!=_temp->end(); ++i ) {
BSONObj key = i->first;
@@ -667,19 +668,20 @@ namespace mongo {
}
else {
// add to new map
- _add( n , all[0] , nSize );
+ _add( n , all[0] , nSize, dupCount );
}
}
else if ( all.size() > 1 ) {
// several values, reduce and add to map
BSONObj res = _config.reducer->reduce( all );
- _add( n , res , nSize );
+ _add( n , res , nSize, dupCount );
}
}
// swap maps
_temp.reset( n );
_size = nSize;
+ _dupCount = dupCount;
}
/**
@@ -710,31 +712,32 @@ namespace mongo {
*/
void State::emit( const BSONObj& a ) {
_numEmits++;
- _add( _temp.get() , a , _size );
+ _add( _temp.get() , a , _size, _dupCount );
}
- void State::_add( InMemory* im, const BSONObj& a , long& size ) {
+ void State::_add( InMemory* im, const BSONObj& a , long& size, long& dupCount ) {
BSONList& all = (*im)[a];
all.push_back( a );
size += a.objsize() + 16;
+ if (all.size() > 1)
+ ++dupCount;
}
/**
* this method checks the size of in memory map and potentially flushes to disk
*/
void State::checkSize() {
- if ( ! _onDisk )
- return;
-
- // the limits to flush to disk are rather low, a few KB, may need to increase
- if ( _size < 1024 * 5 )
+ if ( _size < 1024 * 50 )
return;
- long before = _size;
- reduceInMemory();
- log(1) << " mr: did reduceInMemory " << before << " -->> " << _size << endl;
+ // attempt to reduce in memory map, if we've seen duplicates
+ if ( _dupCount > 0) {
+ long before = _size;
+ reduceInMemory();
+ log(1) << " mr: did reduceInMemory " << before << " -->> " << _size << endl;
+ }
- if ( _size < 1024 * 15 )
+ if ( ! _onDisk || _size < 1024 * 100 )
return;
dumpToInc();
diff --git a/db/commands/mr.h b/db/commands/mr.h
index f8ec495ecff..2f3520230f2 100644
--- a/db/commands/mr.h
+++ b/db/commands/mr.h
@@ -268,7 +268,7 @@ namespace mongo {
protected:
void _insertToInc( BSONObj& o );
- static void _add( InMemory* im , const BSONObj& a , long& size );
+ static void _add( InMemory* im , const BSONObj& a , long& size, long& dupCount );
scoped_ptr<Scope> _scope;
const Config& _config;
@@ -278,6 +278,7 @@ namespace mongo {
scoped_ptr<InMemory> _temp;
long _size; // bytes in _temp
+ long _dupCount; // number of duplicate key entries
long long _numEmits;
};