diff options
author | dwight <dwight@10gen.com> | 2010-06-14 12:40:49 -0400 |
---|---|---|
committer | dwight <dwight@10gen.com> | 2010-06-14 12:40:49 -0400 |
commit | 56bdcafd9754c03954bd7bddd056d18a68aaa9de (patch) | |
tree | ee06b6a8f5bd37a8d8cf50e6efe31248bac1aaab | |
parent | c78026c136083d772a5cd641f121e8fb1f0fdfc9 (diff) | |
parent | efbbcc80beb24896387daecd64a8535f26fc0665 (diff) | |
download | mongo-56bdcafd9754c03954bd7bddd056d18a68aaa9de.tar.gz |
Merge branch 'master' of github.com:mongodb/mongo
-rw-r--r-- | db/index_geo2d.cpp | 72 | ||||
-rw-r--r-- | db/json.cpp | 19 | ||||
-rw-r--r-- | db/json.h | 3 | ||||
-rw-r--r-- | jstests/tool/exportimport1.js | 13 | ||||
-rw-r--r-- | s/chunk.cpp | 4 | ||||
-rw-r--r-- | tools/export.cpp | 15 | ||||
-rw-r--r-- | tools/import.cpp | 88 |
7 files changed, 152 insertions, 62 deletions
diff --git a/db/index_geo2d.cpp b/db/index_geo2d.cpp index 54c9e36075a..fa71f0bc50c 100644 --- a/db/index_geo2d.cpp +++ b/db/index_geo2d.cpp @@ -912,11 +912,11 @@ namespace mongo { virtual void add( const KeyNode& node ){ // when looking at other boxes, don't want to look at some object twice - if ( _seen.count( node.recordLoc ) ){ + pair<set<DiskLoc>::iterator,bool> seenBefore = _seen.insert( node.recordLoc ); + if ( ! seenBefore.second ){ GEODEBUG( "\t\t\t\t already seen : " << node.recordLoc.obj()["_id"] ); return; } - _seen.insert( node.recordLoc ); _lookedAt++; // distance check @@ -969,7 +969,7 @@ namespace mongo { GeoHopper( const Geo2dType * g , unsigned max , const GeoHash& n , const BSONObj& filter = BSONObj() , double maxDistance = numeric_limits<double>::max() ) : GeoAccumulator( g , filter ) , _max( max ) , _near( n ), _maxDistance( maxDistance ) { - + _farthest = -1; } virtual bool checkDistance( const GeoHash& h , double& d ){ @@ -986,24 +986,23 @@ namespace mongo { if ( _points.size() > _max ){ _points.erase( --_points.end() ); } - } - double farthest(){ - if ( _points.size() == 0 ) - return -1; - Holder::iterator i = _points.end(); i--; - return i->_distance; + _farthest = i->_distance; + } + + double farthest() const { + return _farthest; } unsigned _max; GeoHash _near; Holder _points; double _maxDistance; - + double _farthest; }; - + struct BtreeLocation { int pos; bool found; @@ -1133,14 +1132,17 @@ namespace mongo { if ( _found && _prefix.constrains() ){ // 2 Point center( _spec , _n ); - double boxSize = _spec->sizeEdge( _prefix ); double farthest = hopper->farthest(); - if ( farthest > boxSize ) - boxSize = farthest; - Box want( center._x - ( boxSize / 2 ) , center._y - ( boxSize / 2 ) , boxSize ); - while ( _spec->sizeEdge( _prefix ) < boxSize ) + Box want( center._x - farthest , center._y - farthest , farthest * 2 ); + _prefix = _n; + while ( _spec->sizeEdge( _prefix ) < ( farthest / 2 ) ){ _prefix = _prefix.up(); - log(1) << "want: " << want << " found:" << _found << " nscanned: " << _nscanned << " hash size:" << _spec->sizeEdge( _prefix ) << endl; + } + + if ( logLevel > 0 ){ + log(1) << "want: " << want << " found:" << _found << " nscanned: " << _nscanned << " hash size:" << _spec->sizeEdge( _prefix ) + << " farthest: " << farthest << " using box: " << Box( _spec , _prefix ).toString() << endl; + } for ( int x=-1; x<=1; x++ ){ for ( int y=-1; y<=1; y++ ){ @@ -1158,34 +1160,37 @@ namespace mongo { void doBox( const IndexDetails& id , const Box& want , const GeoHash& toscan , int depth = 0 ){ Box testBox( _spec , toscan ); - if ( logLevel > 0 ){ - log(1) << "\t"; - for ( int i=0; i<depth; i++ ){ - log(1) << "\t"; - log() << " doBox: " << testBox << "\t" << toscan.toString() << " scanned so far: " << _nscanned << endl; - } + if ( logLevel > 2 ){ + cout << "\t"; + for ( int i=0; i<depth; i++ ) + cout << "\t"; + cout << " doBox: " << testBox.toString() << "\t" << toscan.toString() << " scanned so far: " << _nscanned << endl; } - + double intPer = testBox.intersects( want ); if ( intPer <= 0 ) return; - if ( intPer < .5 && depth < 3 ){ - doBox( id , want , toscan + "00" , depth + 1); - doBox( id , want , toscan + "01" , depth + 1); - doBox( id , want , toscan + "10" , depth + 1); - doBox( id , want , toscan + "11" , depth + 1); - return; - } + bool goDeeper = intPer < .5 && depth < 2; + long long myscanned = 0; + BtreeLocation loc; loc.bucket = id.head.btree()->locate( id , id.head , toscan.wrap() , Ordering::make(_spec->_order) , loc.pos , loc.found , minDiskLoc ); loc.checkCur( _found , _hopper.get() ); - while ( loc.hasPrefix( toscan ) && loc.advance( 1 , _found , _hopper.get() ) ) + while ( loc.hasPrefix( toscan ) && loc.advance( 1 , _found , _hopper.get() ) ){ _nscanned++; - + if ( ++myscanned > 100 && goDeeper ){ + doBox( id , want , toscan + "00" , depth + 1); + doBox( id , want , toscan + "01" , depth + 1); + doBox( id , want , toscan + "10" , depth + 1); + doBox( id , want , toscan + "11" , depth + 1); + return; + } + } + } @@ -1718,6 +1723,7 @@ namespace mongo { stats.appendNumber( "nscanned" , gs._hopper->_lookedAt ); stats.appendNumber( "objectsLoaded" , gs._hopper->_objectsLoaded ); stats.append( "avgDistance" , totalDistance / x ); + stats.append( "maxDistance" , gs._hopper->farthest() ); stats.done(); return true; diff --git a/db/json.cpp b/db/json.cpp index 77983c39c36..208d78f5c0f 100644 --- a/db/json.cpp +++ b/db/json.cpp @@ -557,15 +557,20 @@ public: ObjectBuilder &b; }; - BSONObj fromjson( const char *str ) { - if ( str[0] == '\0' ) + BSONObj fromjson( const char *str , int* len) { + if ( str[0] == '\0' ){ + if (len) *len = 0; return BSONObj(); + } + ObjectBuilder b; JsonGrammar parser( b ); parse_info<> result = parse( str, parser, space_p ); - if ( !result.full ) { - int len = strnlen(result.stop , 10); - massert( 10340 , "Failure parsing JSON string near: " + string( result.stop, len ) , false ); + if (len) { + *len = result.stop - str; + } else if ( !result.full ) { + int limit = strnlen(result.stop , 10); + msgasserted(10340, "Failure parsing JSON string near: " + string( result.stop, limit )); } BSONObj ret = b.pop(); assert( b.empty() ); @@ -576,8 +581,4 @@ public: return fromjson( str.c_str() ); } - BSONObj fromjson( istream &str ) { - return BSONObj(); - } - } // namespace mongo diff --git a/db/json.h b/db/json.h index bbed12c0801..68dae042574 100644 --- a/db/json.h +++ b/db/json.h @@ -35,6 +35,7 @@ namespace mongo { */ BSONObj fromjson(const string &str); - BSONObj fromjson(const char *str); + /** len will be size of JSON object in text chars. */ + BSONObj fromjson(const char *str, int* len=NULL); } // namespace mongo diff --git a/jstests/tool/exportimport1.js b/jstests/tool/exportimport1.js index 22934febb2e..915adcde59b 100644 --- a/jstests/tool/exportimport1.js +++ b/jstests/tool/exportimport1.js @@ -17,4 +17,17 @@ assert.soon( "c.findOne()" , "no data after sleep" ); assert.eq( 1 , c.count() , "after restore 2" ); assert.eq( 22 , c.findOne().a , "after restore 2" ); + +// now with --jsonArray + +t.runTool( "export" , "--jsonArray" , "--out" , t.extFile , "-d" , t.baseName , "-c" , "foo" ); + +c.drop(); +assert.eq( 0 , c.count() , "after drop" , "-d" , t.baseName , "-c" , "foo" );; + +t.runTool( "import" , "--jsonArray" , "--file" , t.extFile , "-d" , t.baseName , "-c" , "foo" ); +assert.soon( "c.findOne()" , "no data after sleep" ); +assert.eq( 1 , c.count() , "after restore 2" ); +assert.eq( 22 , c.findOne().a , "after restore 2" ); + t.stop(); diff --git a/s/chunk.cpp b/s/chunk.cpp index 549d2ad8b85..972558e2b29 100644 --- a/s/chunk.cpp +++ b/s/chunk.cpp @@ -40,7 +40,7 @@ namespace mongo { // ------- Shard -------- - int Chunk::MaxChunkSize = 1024 * 1204 * 200; + int Chunk::MaxChunkSize = 1024 * 1024 * 200; Chunk::Chunk( ChunkManager * manager ) : _manager( manager ){ _modified = false; @@ -230,7 +230,6 @@ namespace mongo { return false; } - // update config db setShard( to ); // need to increment version # for old server @@ -240,6 +239,7 @@ namespace mongo { randomChunkOnOldServer->_markModified(); } + // update config db _manager->save(); BSONObj finishRes; diff --git a/tools/export.cpp b/tools/export.cpp index f6d60cd44ad..56038233082 100644 --- a/tools/export.cpp +++ b/tools/export.cpp @@ -39,6 +39,7 @@ public: ("query,q" , po::value<string>() , "query filter, as a JSON string" ) ("csv","export to csv instead of json") ("out,o", po::value<string>(), "output file; if not specified, stdout is used") + ("jsonArray", "output to a json array rather than one object per line") ; _usesstdout = false; } @@ -46,6 +47,7 @@ public: int run(){ string ns; const bool csv = hasParam( "csv" ); + const bool jsonArray = hasParam( "jsonArray" ); ostream *outPtr = &cout; string outfile = getParam( "out" ); auto_ptr<ofstream> fileStream; @@ -100,6 +102,9 @@ public: out << endl; } + if (jsonArray) + out << '['; + long long num = 0; while ( cursor->more() ) { num++; @@ -116,10 +121,18 @@ public: out << endl; } else { - out << obj.jsonString() << endl; + if (jsonArray && num != 1) + out << ','; + + out << obj.jsonString(); + + if (!jsonArray) + out << endl; } } + if (jsonArray) + out << ']' << endl; cerr << "exported " << num << " records" << endl; diff --git a/tools/import.cpp b/tools/import.cpp index 869d7103e43..3b2a66d10be 100644 --- a/tools/import.cpp +++ b/tools/import.cpp @@ -42,6 +42,8 @@ class Import : public Tool { bool _headerLine; bool _upsert; bool _doimport; + bool _jsonArray; + vector<string> _upsertFields; void _append( BSONObjBuilder& b , const string& fieldName , const string& data ){ if ( b.appendAsNumber( fieldName , data ) ) @@ -143,7 +145,9 @@ public: ("drop", "drop collection first " ) ("headerline","CSV,TSV only - use first line as headers") ("upsert", "insert or update objects that already exist" ) + ("upsertFields", po::value<string>(), "comma-separated fields for the query part of the upsert. You should make sure this is indexed" ) ("stopOnError", "stop importing at first error rather than continuing" ) + ("jsonArray", "load a json array, not one item per line. Currently limited to 4MB." ) ; add_hidden_options() ("noimport", "don't actually import. useful for benchmarking parser" ) @@ -154,6 +158,7 @@ public: _headerLine = false; _upsert = false; _doimport = true; + _jsonArray = false; } int run(){ @@ -197,6 +202,13 @@ public: if ( hasParam( "upsert" ) ){ _upsert = true; + + string uf = getParam("upsertFields"); + if (uf.empty()){ + _upsertFields.push_back("_id"); + } else { + StringSplitter(uf.c_str(), ",").split(_upsertFields); + } } if ( hasParam( "noimport" ) ){ @@ -227,6 +239,10 @@ public: needFields(); } + if (_type == JSON && hasParam("jsonArray")){ + _jsonArray = true; + } + int errors = 0; int num = 0; @@ -237,30 +253,70 @@ public: ProgressMeter pm( fileSize ); const int BUF_SIZE = 1024 * 1024 * 4; boost::scoped_array<char> line(new char[BUF_SIZE+2]); - while ( in->rdstate() == 0 ){ - char * buf = line.get(); - in->getline( buf , BUF_SIZE ); + char * buf = line.get(); + while ( _jsonArray || in->rdstate() == 0 ){ + if (_jsonArray){ + if (buf == line.get()){ //first pass + in->read(buf, BUF_SIZE); + uassert(13295, "JSONArray file too large", (in->rdstate() & ios_base::eofbit)); + buf[ in->gcount() ] = '\0'; + } + } else { + buf = line.get(); + in->getline( buf , BUF_SIZE ); + log(1) << "got line:" << buf << endl; + } uassert( 10263 , "unknown error reading file" , (!(in->rdstate() & ios_base::badbit)) && (!(in->rdstate() & ios_base::failbit) || (in->rdstate() & ios_base::eofbit)) ); - log(1) << "got line:" << buf << endl; - while( isspace( buf[0] ) ) buf++; - - int len = strlen( buf ); - if ( ! len ) - continue; - - buf[len+1] = 0; + int len = 0; + if (_jsonArray){ + while (buf[0] != '{' && buf[0] != '\0') { + len++; + buf++; + } + if (buf[0] == '\0') + break; + } else { + while (isspace( buf[0] )){ + len++; + buf++; + } + if (buf[0] == '\0') + continue; + len += strlen( buf ); + } try { - BSONObj o = parseLine( buf ); + BSONObj o; + if (_jsonArray){ + int jslen; + o = fromjson(buf, &jslen); + len += jslen; + buf += jslen; + } else { + o = parseLine( buf ); + } + if ( _headerLine ){ _headerLine = false; } else if (_doimport) { - BSONElement id = o["_id"]; - if (_upsert && !id.eoo()){ - conn().update( ns, QUERY("_id" << id), o, true); + bool doUpsert = _upsert; + BSONObjBuilder b; + if (_upsert){ + for (vector<string>::const_iterator it=_upsertFields.begin(), end=_upsertFields.end(); it!=end; ++it){ + BSONElement e = o.getFieldDotted(it->c_str()); + if (e.eoo()){ + doUpsert = false; + break; + } + b.appendAs(e, *it); + } + } + + if (doUpsert){ + conn().update(ns, Query(b.obj()), o, true); } else { conn().insert( ns.c_str() , o ); } @@ -273,7 +329,7 @@ public: cout << buf << endl; errors++; - if (hasParam("stopOnError")) + if (hasParam("stopOnError") || _jsonArray) break; } |