From 09da292c09789605ba469662e7775d6779475dbf Mon Sep 17 00:00:00 2001 From: Dwight Date: Fri, 11 Jul 2008 16:47:02 -0400 Subject: lots: new count implemenation, segv handler fix, tweak # of objects returned on unlimit()ed find --- db/btree.cpp | 2 +- db/db.cpp | 19 +++++++++----- db/jsobj.cpp | 4 ++- db/query.cpp | 85 ++++++++++++++++++++++++++++++++++++++++-------------------- 4 files changed, 73 insertions(+), 37 deletions(-) diff --git a/db/btree.cpp b/db/btree.cpp index bbe0d1a33f6..f19e9f84df7 100644 --- a/db/btree.cpp +++ b/db/btree.cpp @@ -15,7 +15,7 @@ int insert_debug = 0; DiskLoc maxDiskLoc(0x7fffffff, 0x7fffffff); DiskLoc minDiskLoc(0, 1); -inline KeyNode::KeyNode(BucketBasics& bb, _KeyNode &k) : +KeyNode::KeyNode(BucketBasics& bb, _KeyNode &k) : prevChildBucket(k.prevChildBucket), recordLoc(k.recordLoc), key(bb.data+k.keyDataOfs()) { } diff --git a/db/db.cpp b/db/db.cpp index 55db7028ae4..5241a15f9f4 100644 --- a/db/db.cpp +++ b/db/db.cpp @@ -637,7 +637,7 @@ skip: } catch( AssertionException ) { problem() << "Uncaught AssertionException, terminating" << endl; - exit(-7); + exit(15); } } @@ -696,15 +696,19 @@ void pipeSigHandler( int signal ) { int segvs = 0; void segvhandler(int x) { if( ++segvs > 1 ) { - if( segvs == 2 ) - cout << " got 2nd SIGSEGV" << endl; + signal(x, SIG_DFL); + if( segvs == 2 ) { + cout << "\n\n\n got 2nd SIGSEGV" << endl; + sayDbContext(); + } return; } problem() << "got SIGSEGV " << x << ", terminating :-(" << endl; sayDbContext(); - closeAllSockets(); - MemoryMappedFile::closeAllFiles(); - flushOpLog(); +// closeAllSockets(); +// MemoryMappedFile::closeAllFiles(); +// flushOpLog(); + dbexit(14); } void mysighandler(int x) { @@ -776,6 +780,7 @@ int test2(); int main(int argc, char* argv[], char *envp[] ) { + DEV cout << "warning: DEV mode enabled\n"; /* struct { int x; char ch; @@ -896,7 +901,7 @@ int main(int argc, char* argv[], char *envp[] ) exit(0); } - cout << "10gendb usage:\n"; + cout << "Mongo db usage:\n"; cout << " run run db" << endl; cout << " msg end [port] shut down db server listening on port (or default)" << endl; cout << " msg [msg] [port] send a request to the db server listening on port (or default)" << endl; diff --git a/db/jsobj.cpp b/db/jsobj.cpp index 02d00192058..50c7e2ae720 100644 --- a/db/jsobj.cpp +++ b/db/jsobj.cpp @@ -575,7 +575,9 @@ bool JSMatcher::matches(JSObj& jsobj, bool *deep) { /* JSObj ------------------------------------------------------------*/ -string JSObj::toString() const { +string JSObj::toString() const { + if( isEmpty() ) return "{}"; + stringstream s; s << "{ "; JSElemIter i(*this); diff --git a/db/query.cpp b/db/query.cpp index 49b87d90f2f..42b8a4a4ae6 100644 --- a/db/query.cpp +++ b/db/query.cpp @@ -44,9 +44,10 @@ int runCount(const char *ns, JSObj& cmd, string& err); parameters query - the query, e.g., { name: 'joe' } order - order by spec, e.g., { name: 1 } 1=ASC, -1=DESC - + simpleKeyMatch - set to true if the query is purely for a single key value + unchanged otherwise. */ -auto_ptr getIndexCursor(const char *ns, JSObj& query, JSObj& order) { +auto_ptr getIndexCursor(const char *ns, JSObj& query, JSObj& order, bool *simpleKeyMatch = 0) { NamespaceDetails *d = nsdetails(ns); if( d == 0 ) return auto_ptr(); @@ -85,6 +86,7 @@ auto_ptr getIndexCursor(const char *ns, JSObj& query, JSObj& order) { set keyFields; idxKey.getFieldNames(keyFields); if( keyFields == queryFields ) { + bool simple = true; JSObjBuilder b; JSObj q = query.extractFields(idxKey, b); /* regexp: only supported if form is /^text/ */ @@ -115,6 +117,7 @@ auto_ptr getIndexCursor(const char *ns, JSObj& query, JSObj& order) { first = false; if( e.type() == RegEx ) { + simple = false; if( *e.regexFlags() ) goto fail; const char *re = e.regex(); @@ -140,18 +143,18 @@ auto_ptr getIndexCursor(const char *ns, JSObj& query, JSObj& order) { } JSObj q2 = b2.done(); DEV cout << "using index " << d->indexes[i].indexNamespace() << endl; + if( simple && simpleKeyMatch ) *simpleKeyMatch = true; return auto_ptr( new BtreeCursor(d->indexes[i].head, q2, 1, true)); } } fail: + DEV cout << "getIndexCursor fail" << endl; return auto_ptr(); } void deleteObjects(const char *ns, JSObj pattern, bool justOne) { -// cout << "TEMP delete ns:" << ns << " queryobjsize:" << -// pattern.objsize() << endl; if( strstr(ns, ".system.") ) { if( strstr(ns, ".system.namespaces") ){ @@ -176,7 +179,7 @@ void deleteObjects(const char *ns, JSObj pattern, bool justOne) { int temp = 0; int tempd = 0; -DiskLoc _tempDelLoc; + DiskLoc _tempDelLoc; while( c->ok() ) { temp++; @@ -185,7 +188,7 @@ DiskLoc _tempDelLoc; DiskLoc rloc = c->currLoc(); c->advance(); // must advance before deleting as the next ptr will die JSObj js(r); - //cout << "TEMP: " << js.toString() << endl; + bool deep; if( !matcher.matches(js, &deep) ) { if( c->tempStopOnMiss() ) @@ -773,24 +776,52 @@ int runCount(const char *ns, JSObj& cmd, string& err) { JSObj query = cmd.getObjectField("query"); - auto_ptr c; - if( query.isEmpty() ) { - c = getIndexCursor(ns, id_obj, empty_obj); - } else { - c = getIndexCursor(ns, query, empty_obj); + if( query.isEmpty() ) { + // count of all objects + return (int) d->nrecords; } - if( c.get() == 0 ) { - cout << "TEMP: table scan" << endl; + auto_ptr c; + + bool simpleKeyToMatch = false; + c = getIndexCursor(ns, query, empty_obj, &simpleKeyToMatch); + + if( c.get() ) { + if( simpleKeyToMatch ) { + /* Here we only look at the btree keys to determine if a match, instead of looking + into the records, which would be much slower. + */ + int count = 0; + BtreeCursor *bc = dynamic_cast(c.get()); + if( c->ok() ) { + while( 1 ) { + if( !(query == bc->currKeyNode().key) ) + break; + count++; + if( !c->advance() ) + break; + } + } + return count; + } + } else { c = findTableScan(ns, empty_obj); } - else - cout << "TEMP: indexed scan" << endl; int count = 0; - if( c->ok() ) { - count++; - while( c->advance() ) count++; + auto_ptr matcher(new JSMatcher(query)); + while( c->ok() ) { + JSObj js = c->current(); + bool deep; + if( !matcher->matches(js, &deep) ) { + if( c->tempStopOnMiss() ) + break; + } + else if( !deep || !c->getsetdup(c->currLoc()) ) { // i.e., check for dups on deep items only + // got a match. + count++; + } + c->advance(); } return count; } @@ -880,7 +911,14 @@ assert( debug.getN() < 5000 ); if( ok ) { n++; if( (ntoreturn>0 && (n >= ntoreturn || b.len() > MaxBytesToReturnToClientAtOnce)) || - (ntoreturn==0 && b.len()>1*1024*1024) ) { + (ntoreturn==0 && (b.len()>1*1024*1024 || n>=101)) ) { + /* if ntoreturn is zero, we return up to 101 objects. on the subsequent getmore, there + is only a size limit. The idea is that on a find() where one doesn't use much results, + we don't return much, but once getmore kicks in, we start pushing significant quantities. + + The n limit (vs. size) is important when someone fetches only one small field from big + objects, which causes massive scanning server-side. + */ /* if only 1 requested, no cursor saved for efficiency...we assume it is findOne() */ if( wantMore && ntoreturn != 1 ) { c->advance(); @@ -909,15 +947,6 @@ assert( debug.getN() < 5000 ); if( client->profile ) ss << " nscanned:" << nscanned << ' '; } - /*catch( AssertionException e ) { - if( n ) - throw e; - if( nCaught++ >= 1000 ) { - cout << "Too many query exceptions, terminating" << endl; - exit(-8); - } - cout << " Assertion running query, returning an empty result" << endl; - }*/ } QueryResult *qr = (QueryResult *) b.buf(); -- cgit v1.2.1