summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordwight <dwight@10gen.com>2010-06-14 12:40:49 -0400
committerdwight <dwight@10gen.com>2010-06-14 12:40:49 -0400
commit56bdcafd9754c03954bd7bddd056d18a68aaa9de (patch)
treeee06b6a8f5bd37a8d8cf50e6efe31248bac1aaab
parentc78026c136083d772a5cd641f121e8fb1f0fdfc9 (diff)
parentefbbcc80beb24896387daecd64a8535f26fc0665 (diff)
downloadmongo-56bdcafd9754c03954bd7bddd056d18a68aaa9de.tar.gz
Merge branch 'master' of github.com:mongodb/mongo
-rw-r--r--db/index_geo2d.cpp72
-rw-r--r--db/json.cpp19
-rw-r--r--db/json.h3
-rw-r--r--jstests/tool/exportimport1.js13
-rw-r--r--s/chunk.cpp4
-rw-r--r--tools/export.cpp15
-rw-r--r--tools/import.cpp88
7 files changed, 152 insertions, 62 deletions
diff --git a/db/index_geo2d.cpp b/db/index_geo2d.cpp
index 54c9e36075a..fa71f0bc50c 100644
--- a/db/index_geo2d.cpp
+++ b/db/index_geo2d.cpp
@@ -912,11 +912,11 @@ namespace mongo {
virtual void add( const KeyNode& node ){
// when looking at other boxes, don't want to look at some object twice
- if ( _seen.count( node.recordLoc ) ){
+ pair<set<DiskLoc>::iterator,bool> seenBefore = _seen.insert( node.recordLoc );
+ if ( ! seenBefore.second ){
GEODEBUG( "\t\t\t\t already seen : " << node.recordLoc.obj()["_id"] );
return;
}
- _seen.insert( node.recordLoc );
_lookedAt++;
// distance check
@@ -969,7 +969,7 @@ namespace mongo {
GeoHopper( const Geo2dType * g , unsigned max , const GeoHash& n , const BSONObj& filter = BSONObj() , double maxDistance = numeric_limits<double>::max() )
: GeoAccumulator( g , filter ) , _max( max ) , _near( n ), _maxDistance( maxDistance ) {
-
+ _farthest = -1;
}
virtual bool checkDistance( const GeoHash& h , double& d ){
@@ -986,24 +986,23 @@ namespace mongo {
if ( _points.size() > _max ){
_points.erase( --_points.end() );
}
- }
- double farthest(){
- if ( _points.size() == 0 )
- return -1;
-
Holder::iterator i = _points.end();
i--;
- return i->_distance;
+ _farthest = i->_distance;
+ }
+
+ double farthest() const {
+ return _farthest;
}
unsigned _max;
GeoHash _near;
Holder _points;
double _maxDistance;
-
+ double _farthest;
};
-
+
struct BtreeLocation {
int pos;
bool found;
@@ -1133,14 +1132,17 @@ namespace mongo {
if ( _found && _prefix.constrains() ){
// 2
Point center( _spec , _n );
- double boxSize = _spec->sizeEdge( _prefix );
double farthest = hopper->farthest();
- if ( farthest > boxSize )
- boxSize = farthest;
- Box want( center._x - ( boxSize / 2 ) , center._y - ( boxSize / 2 ) , boxSize );
- while ( _spec->sizeEdge( _prefix ) < boxSize )
+ Box want( center._x - farthest , center._y - farthest , farthest * 2 );
+ _prefix = _n;
+ while ( _spec->sizeEdge( _prefix ) < ( farthest / 2 ) ){
_prefix = _prefix.up();
- log(1) << "want: " << want << " found:" << _found << " nscanned: " << _nscanned << " hash size:" << _spec->sizeEdge( _prefix ) << endl;
+ }
+
+ if ( logLevel > 0 ){
+ log(1) << "want: " << want << " found:" << _found << " nscanned: " << _nscanned << " hash size:" << _spec->sizeEdge( _prefix )
+ << " farthest: " << farthest << " using box: " << Box( _spec , _prefix ).toString() << endl;
+ }
for ( int x=-1; x<=1; x++ ){
for ( int y=-1; y<=1; y++ ){
@@ -1158,34 +1160,37 @@ namespace mongo {
void doBox( const IndexDetails& id , const Box& want , const GeoHash& toscan , int depth = 0 ){
Box testBox( _spec , toscan );
- if ( logLevel > 0 ){
- log(1) << "\t";
- for ( int i=0; i<depth; i++ ){
- log(1) << "\t";
- log() << " doBox: " << testBox << "\t" << toscan.toString() << " scanned so far: " << _nscanned << endl;
- }
+ if ( logLevel > 2 ){
+ cout << "\t";
+ for ( int i=0; i<depth; i++ )
+ cout << "\t";
+ cout << " doBox: " << testBox.toString() << "\t" << toscan.toString() << " scanned so far: " << _nscanned << endl;
}
-
+
double intPer = testBox.intersects( want );
if ( intPer <= 0 )
return;
- if ( intPer < .5 && depth < 3 ){
- doBox( id , want , toscan + "00" , depth + 1);
- doBox( id , want , toscan + "01" , depth + 1);
- doBox( id , want , toscan + "10" , depth + 1);
- doBox( id , want , toscan + "11" , depth + 1);
- return;
- }
+ bool goDeeper = intPer < .5 && depth < 2;
+ long long myscanned = 0;
+
BtreeLocation loc;
loc.bucket = id.head.btree()->locate( id , id.head , toscan.wrap() , Ordering::make(_spec->_order) ,
loc.pos , loc.found , minDiskLoc );
loc.checkCur( _found , _hopper.get() );
- while ( loc.hasPrefix( toscan ) && loc.advance( 1 , _found , _hopper.get() ) )
+ while ( loc.hasPrefix( toscan ) && loc.advance( 1 , _found , _hopper.get() ) ){
_nscanned++;
-
+ if ( ++myscanned > 100 && goDeeper ){
+ doBox( id , want , toscan + "00" , depth + 1);
+ doBox( id , want , toscan + "01" , depth + 1);
+ doBox( id , want , toscan + "10" , depth + 1);
+ doBox( id , want , toscan + "11" , depth + 1);
+ return;
+ }
+ }
+
}
@@ -1718,6 +1723,7 @@ namespace mongo {
stats.appendNumber( "nscanned" , gs._hopper->_lookedAt );
stats.appendNumber( "objectsLoaded" , gs._hopper->_objectsLoaded );
stats.append( "avgDistance" , totalDistance / x );
+ stats.append( "maxDistance" , gs._hopper->farthest() );
stats.done();
return true;
diff --git a/db/json.cpp b/db/json.cpp
index 77983c39c36..208d78f5c0f 100644
--- a/db/json.cpp
+++ b/db/json.cpp
@@ -557,15 +557,20 @@ public:
ObjectBuilder &b;
};
- BSONObj fromjson( const char *str ) {
- if ( str[0] == '\0' )
+ BSONObj fromjson( const char *str , int* len) {
+ if ( str[0] == '\0' ){
+ if (len) *len = 0;
return BSONObj();
+ }
+
ObjectBuilder b;
JsonGrammar parser( b );
parse_info<> result = parse( str, parser, space_p );
- if ( !result.full ) {
- int len = strnlen(result.stop , 10);
- massert( 10340 , "Failure parsing JSON string near: " + string( result.stop, len ) , false );
+ if (len) {
+ *len = result.stop - str;
+ } else if ( !result.full ) {
+ int limit = strnlen(result.stop , 10);
+ msgasserted(10340, "Failure parsing JSON string near: " + string( result.stop, limit ));
}
BSONObj ret = b.pop();
assert( b.empty() );
@@ -576,8 +581,4 @@ public:
return fromjson( str.c_str() );
}
- BSONObj fromjson( istream &str ) {
- return BSONObj();
- }
-
} // namespace mongo
diff --git a/db/json.h b/db/json.h
index bbed12c0801..68dae042574 100644
--- a/db/json.h
+++ b/db/json.h
@@ -35,6 +35,7 @@ namespace mongo {
*/
BSONObj fromjson(const string &str);
- BSONObj fromjson(const char *str);
+ /** len will be size of JSON object in text chars. */
+ BSONObj fromjson(const char *str, int* len=NULL);
} // namespace mongo
diff --git a/jstests/tool/exportimport1.js b/jstests/tool/exportimport1.js
index 22934febb2e..915adcde59b 100644
--- a/jstests/tool/exportimport1.js
+++ b/jstests/tool/exportimport1.js
@@ -17,4 +17,17 @@ assert.soon( "c.findOne()" , "no data after sleep" );
assert.eq( 1 , c.count() , "after restore 2" );
assert.eq( 22 , c.findOne().a , "after restore 2" );
+
+// now with --jsonArray
+
+t.runTool( "export" , "--jsonArray" , "--out" , t.extFile , "-d" , t.baseName , "-c" , "foo" );
+
+c.drop();
+assert.eq( 0 , c.count() , "after drop" , "-d" , t.baseName , "-c" , "foo" );;
+
+t.runTool( "import" , "--jsonArray" , "--file" , t.extFile , "-d" , t.baseName , "-c" , "foo" );
+assert.soon( "c.findOne()" , "no data after sleep" );
+assert.eq( 1 , c.count() , "after restore 2" );
+assert.eq( 22 , c.findOne().a , "after restore 2" );
+
t.stop();
diff --git a/s/chunk.cpp b/s/chunk.cpp
index 549d2ad8b85..972558e2b29 100644
--- a/s/chunk.cpp
+++ b/s/chunk.cpp
@@ -40,7 +40,7 @@ namespace mongo {
// ------- Shard --------
- int Chunk::MaxChunkSize = 1024 * 1204 * 200;
+ int Chunk::MaxChunkSize = 1024 * 1024 * 200;
Chunk::Chunk( ChunkManager * manager ) : _manager( manager ){
_modified = false;
@@ -230,7 +230,6 @@ namespace mongo {
return false;
}
- // update config db
setShard( to );
// need to increment version # for old server
@@ -240,6 +239,7 @@ namespace mongo {
randomChunkOnOldServer->_markModified();
}
+ // update config db
_manager->save();
BSONObj finishRes;
diff --git a/tools/export.cpp b/tools/export.cpp
index f6d60cd44ad..56038233082 100644
--- a/tools/export.cpp
+++ b/tools/export.cpp
@@ -39,6 +39,7 @@ public:
("query,q" , po::value<string>() , "query filter, as a JSON string" )
("csv","export to csv instead of json")
("out,o", po::value<string>(), "output file; if not specified, stdout is used")
+ ("jsonArray", "output to a json array rather than one object per line")
;
_usesstdout = false;
}
@@ -46,6 +47,7 @@ public:
int run(){
string ns;
const bool csv = hasParam( "csv" );
+ const bool jsonArray = hasParam( "jsonArray" );
ostream *outPtr = &cout;
string outfile = getParam( "out" );
auto_ptr<ofstream> fileStream;
@@ -100,6 +102,9 @@ public:
out << endl;
}
+ if (jsonArray)
+ out << '[';
+
long long num = 0;
while ( cursor->more() ) {
num++;
@@ -116,10 +121,18 @@ public:
out << endl;
}
else {
- out << obj.jsonString() << endl;
+ if (jsonArray && num != 1)
+ out << ',';
+
+ out << obj.jsonString();
+
+ if (!jsonArray)
+ out << endl;
}
}
+ if (jsonArray)
+ out << ']' << endl;
cerr << "exported " << num << " records" << endl;
diff --git a/tools/import.cpp b/tools/import.cpp
index 869d7103e43..3b2a66d10be 100644
--- a/tools/import.cpp
+++ b/tools/import.cpp
@@ -42,6 +42,8 @@ class Import : public Tool {
bool _headerLine;
bool _upsert;
bool _doimport;
+ bool _jsonArray;
+ vector<string> _upsertFields;
void _append( BSONObjBuilder& b , const string& fieldName , const string& data ){
if ( b.appendAsNumber( fieldName , data ) )
@@ -143,7 +145,9 @@ public:
("drop", "drop collection first " )
("headerline","CSV,TSV only - use first line as headers")
("upsert", "insert or update objects that already exist" )
+ ("upsertFields", po::value<string>(), "comma-separated fields for the query part of the upsert. You should make sure this is indexed" )
("stopOnError", "stop importing at first error rather than continuing" )
+ ("jsonArray", "load a json array, not one item per line. Currently limited to 4MB." )
;
add_hidden_options()
("noimport", "don't actually import. useful for benchmarking parser" )
@@ -154,6 +158,7 @@ public:
_headerLine = false;
_upsert = false;
_doimport = true;
+ _jsonArray = false;
}
int run(){
@@ -197,6 +202,13 @@ public:
if ( hasParam( "upsert" ) ){
_upsert = true;
+
+ string uf = getParam("upsertFields");
+ if (uf.empty()){
+ _upsertFields.push_back("_id");
+ } else {
+ StringSplitter(uf.c_str(), ",").split(_upsertFields);
+ }
}
if ( hasParam( "noimport" ) ){
@@ -227,6 +239,10 @@ public:
needFields();
}
+ if (_type == JSON && hasParam("jsonArray")){
+ _jsonArray = true;
+ }
+
int errors = 0;
int num = 0;
@@ -237,30 +253,70 @@ public:
ProgressMeter pm( fileSize );
const int BUF_SIZE = 1024 * 1024 * 4;
boost::scoped_array<char> line(new char[BUF_SIZE+2]);
- while ( in->rdstate() == 0 ){
- char * buf = line.get();
- in->getline( buf , BUF_SIZE );
+ char * buf = line.get();
+ while ( _jsonArray || in->rdstate() == 0 ){
+ if (_jsonArray){
+ if (buf == line.get()){ //first pass
+ in->read(buf, BUF_SIZE);
+ uassert(13295, "JSONArray file too large", (in->rdstate() & ios_base::eofbit));
+ buf[ in->gcount() ] = '\0';
+ }
+ } else {
+ buf = line.get();
+ in->getline( buf , BUF_SIZE );
+ log(1) << "got line:" << buf << endl;
+ }
uassert( 10263 , "unknown error reading file" ,
(!(in->rdstate() & ios_base::badbit)) &&
(!(in->rdstate() & ios_base::failbit) || (in->rdstate() & ios_base::eofbit)) );
- log(1) << "got line:" << buf << endl;
- while( isspace( buf[0] ) ) buf++;
-
- int len = strlen( buf );
- if ( ! len )
- continue;
-
- buf[len+1] = 0;
+ int len = 0;
+ if (_jsonArray){
+ while (buf[0] != '{' && buf[0] != '\0') {
+ len++;
+ buf++;
+ }
+ if (buf[0] == '\0')
+ break;
+ } else {
+ while (isspace( buf[0] )){
+ len++;
+ buf++;
+ }
+ if (buf[0] == '\0')
+ continue;
+ len += strlen( buf );
+ }
try {
- BSONObj o = parseLine( buf );
+ BSONObj o;
+ if (_jsonArray){
+ int jslen;
+ o = fromjson(buf, &jslen);
+ len += jslen;
+ buf += jslen;
+ } else {
+ o = parseLine( buf );
+ }
+
if ( _headerLine ){
_headerLine = false;
} else if (_doimport) {
- BSONElement id = o["_id"];
- if (_upsert && !id.eoo()){
- conn().update( ns, QUERY("_id" << id), o, true);
+ bool doUpsert = _upsert;
+ BSONObjBuilder b;
+ if (_upsert){
+ for (vector<string>::const_iterator it=_upsertFields.begin(), end=_upsertFields.end(); it!=end; ++it){
+ BSONElement e = o.getFieldDotted(it->c_str());
+ if (e.eoo()){
+ doUpsert = false;
+ break;
+ }
+ b.appendAs(e, *it);
+ }
+ }
+
+ if (doUpsert){
+ conn().update(ns, Query(b.obj()), o, true);
} else {
conn().insert( ns.c_str() , o );
}
@@ -273,7 +329,7 @@ public:
cout << buf << endl;
errors++;
- if (hasParam("stopOnError"))
+ if (hasParam("stopOnError") || _jsonArray)
break;
}