summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEliot Horowitz <eliot@10gen.com>2009-10-09 14:55:29 -0400
committerEliot Horowitz <eliot@10gen.com>2009-10-09 14:55:29 -0400
commit0496aa5ac7e1a99b082f5375c38da9871d08847c (patch)
tree7664d2f0364120be70157fa5d8f9dccf92d1daab
parent35bb22d27ec01753a2507d19c3b2822cf29a8dbf (diff)
downloadmongo-0496aa5ac7e1a99b082f5375c38da9871d08847c.tar.gz
changed mongoimportJSON to mongoimport - handles json/tsv/csv
-rw-r--r--.gitignore2
-rw-r--r--SConstruct2
-rw-r--r--jstests/tool/tool1.js4
-rw-r--r--tools/export.cpp33
-rw-r--r--tools/import.cpp243
-rw-r--r--tools/importJSON.cpp116
-rw-r--r--tools/tool.cpp19
-rw-r--r--tools/tool.h6
8 files changed, 280 insertions, 145 deletions
diff --git a/.gitignore b/.gitignore
index e6a74705040..453057ef7f7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -57,7 +57,7 @@ mongorestore
mongofiles
mongoexport
-mongoimportjson
+mongoimport
mongosniff
mongobridge
diff --git a/SConstruct b/SConstruct
index f37912f2091..135ee402c58 100644
--- a/SConstruct
+++ b/SConstruct
@@ -840,7 +840,7 @@ env.Program( "mongodump" , allToolFiles + [ "tools/dump.cpp" ] )
env.Program( "mongorestore" , allToolFiles + [ "tools/restore.cpp" ] )
env.Program( "mongoexport" , allToolFiles + [ "tools/export.cpp" ] )
-env.Program( "mongoimportjson" , allToolFiles + [ "tools/importJSON.cpp" ] )
+env.Program( "mongoimport" , allToolFiles + [ "tools/import.cpp" ] )
env.Program( "mongofiles" , allToolFiles + [ "tools/files.cpp" ] )
diff --git a/jstests/tool/tool1.js b/jstests/tool/tool1.js
index 173981aa476..00e92e798b7 100644
--- a/jstests/tool/tool1.js
+++ b/jstests/tool/tool1.js
@@ -37,7 +37,7 @@ runMongoProgram( "mongoexport", "--host", "127.0.0.1:" + port, "-d", baseName, "
assert.lt( 10 , fileSize() , "file size changed" );
c.drop();
-runMongoProgram( "mongoimportjson", "--host", "127.0.0.1:" + port, "-d", baseName, "-c", baseName, "--file", externalFile );
+runMongoProgram( "mongoimport", "--host", "127.0.0.1:" + port, "-d", baseName, "-c", baseName, "--file", externalFile );
assert.soon( "c.findOne()" , "mongo import json A" );
assert( c.findOne() && 1 == c.findOne().a , "mongo import json B" );
@@ -57,7 +57,7 @@ resetDbpath( externalPath );
runMongoProgram( "mongoexport", "--dbpath", dbPath, "-d", baseName, "-c", baseName, "--out", externalFile );
resetDbpath( dbPath );
-runMongoProgram( "mongoimportjson", "--dbpath", dbPath, "-d", baseName, "-c", baseName, "--file", externalFile );
+runMongoProgram( "mongoimport", "--dbpath", dbPath, "-d", baseName, "-c", baseName, "--file", externalFile );
m = startMongoProgram( "mongod", "--port", port, "--dbpath", dbPath, "--nohttpinterface", "--bind_ip", "127.0.0.1" );
c = m.getDB( baseName ).getCollection( baseName );
assert.soon( "c.findOne()" , "object missing b" );
diff --git a/tools/export.cpp b/tools/export.cpp
index 16c5b93eb22..f142cefecb4 100644
--- a/tools/export.cpp
+++ b/tools/export.cpp
@@ -26,7 +26,6 @@
#include <iostream>
#include <boost/program_options.hpp>
-#include <pcrecpp.h>
using namespace mongo;
@@ -42,7 +41,7 @@ public:
("out,o", po::value<string>(), "output file; if not specified, stdout is used")
;
}
-
+
int run(){
string ns;
const bool csv = hasParam( "csv" );
@@ -55,8 +54,6 @@ public:
BSONObj * fieldsToReturn = 0;
BSONObj realFieldsToReturn;
- vector<string> fields;
-
try {
ns = getNS();
} catch (...) {
@@ -67,24 +64,12 @@ public:
auth();
if ( hasParam( "fields" ) ){
-
- BSONObjBuilder b;
-
- string fields_arg = getParam("fields");
- pcrecpp::StringPiece input(fields_arg);
-
- string f;
- pcrecpp::RE re("([\\w\\.]+),?" );
- while ( re.Consume( &input, &f ) ){
- fields.push_back( f );
- b.append( f.c_str() , 1 );
- }
-
- realFieldsToReturn = b.obj();
- fieldsToReturn = &realFieldsToReturn;
+ needFields();
+ fieldsToReturn = &_fieldsObj;
}
- if ( csv && fields.size() == 0 ){
+
+ if ( csv && _fields.size() == 0 ){
cerr << "csv mode requires a field list" << endl;
return -1;
}
@@ -93,8 +78,8 @@ public:
auto_ptr<DBClientCursor> cursor = conn().query( ns.c_str() , ((Query)(getParam( "query" , "" ))).snapshot() , 0 , 0 , fieldsToReturn , Option_SlaveOk | Option_NoCursorTimeout );
if ( csv ){
- for ( vector<string>::iterator i=fields.begin(); i != fields.end(); i++ ){
- if ( i != fields.begin() )
+ for ( vector<string>::iterator i=_fields.begin(); i != _fields.end(); i++ ){
+ if ( i != _fields.begin() )
out << ",";
out << *i;
}
@@ -104,8 +89,8 @@ public:
while ( cursor->more() ) {
BSONObj obj = cursor->next();
if ( csv ){
- for ( vector<string>::iterator i=fields.begin(); i != fields.end(); i++ ){
- if ( i != fields.begin() )
+ for ( vector<string>::iterator i=_fields.begin(); i != _fields.end(); i++ ){
+ if ( i != _fields.begin() )
out << ",";
const BSONElement & e = obj.getFieldDotted(i->c_str());
if ( ! e.eoo() ){
diff --git a/tools/import.cpp b/tools/import.cpp
new file mode 100644
index 00000000000..5bca049146e
--- /dev/null
+++ b/tools/import.cpp
@@ -0,0 +1,243 @@
+// import.cpp
+
+/**
+* Copyright (C) 2008 10gen Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "stdafx.h"
+#include "client/dbclient.h"
+#include "db/json.h"
+
+#include "tool.h"
+
+#include <fstream>
+#include <iostream>
+
+#include <boost/program_options.hpp>
+
+using namespace mongo;
+
+namespace po = boost::program_options;
+
+class Import : public Tool {
+
+ enum Type { JSON , CSV , TSV };
+ Type _type;
+
+ const char * _sep;
+
+ bool _appendNumber( BSONObjBuilder& b , const string& fieldName , const string& data ){
+ if ( data.size() == 0 )
+ return false;
+
+ unsigned int pos=0;
+ if ( data[0] == '-' )
+ pos++;
+
+ bool hasDec = false;
+
+ for ( ; pos<data.size(); pos++ ){
+ if ( isdigit(data[pos]) )
+ continue;
+
+ if ( data[pos] == '.' ){
+ if ( hasDec )
+ return false;
+ hasDec = true;
+ continue;
+ }
+
+ return false;
+ }
+
+ if ( hasDec ){
+ double d = atof( data.c_str() );
+ b.append( fieldName.c_str() , d );
+ return true;
+ }
+
+ if ( data.size() < 8 ){
+ b.append( fieldName , atoi( data.c_str() ) );
+ return true;
+ }
+
+ b.append( fieldName , atol( data.c_str() ) );
+ return true;
+ }
+
+ void _append( BSONObjBuilder& b , const string& fieldName , const string& data ){
+ if ( _appendNumber( b , fieldName , data ) )
+ return;
+
+ // TODO: other types?
+ b.append( fieldName.c_str() , data );
+ }
+
+ BSONObj parseLine( const char * line ){
+ if ( _type == JSON )
+ return fromjson( line );
+
+ BSONObjBuilder b;
+
+ unsigned int pos=0;
+ while ( line[0] ){
+ string name;
+ if ( pos < _fields.size() ){
+ name = _fields[pos];
+ }
+ else {
+ stringstream ss;
+ ss << "field" << pos;
+ name = ss.str();
+ }
+ pos++;
+
+ const char * end = strstr( line , _sep );
+ if ( ! end ){
+ _append( b , name , string( line ) );
+ break;
+ }
+
+ _append( b , name , string( line , end - line ) );
+ line = end + 1;
+ }
+
+ return b.obj();
+ }
+
+public:
+ Import() : Tool( "import" ){
+ add_options()
+ ("type",po::value<string>() , "type of file to import. default: json (json,csv,tsv)")
+ ("fields,f" , po::value<string>() , "comma seperated list of field names e.g. -f name,age" )
+ ("file",po::value<string>() , "file to import from; if not specified stdin is used" )
+ ("drop", "drop collection first " )
+ ;
+ addPositionArg( "file" , 1 );
+ _type = JSON;
+ }
+
+ int run(){
+ string filename = getParam( "file" );
+ long long fileSize = -1;
+
+ istream * in = &cin;
+
+ ifstream file( filename.c_str() , ios_base::in | ios_base::binary);
+
+ if ( filename.size() > 0 && filename != "-" ){
+ if ( ! exists( filename ) ){
+ cerr << "file doesn't exist: " << filename << endl;
+ return -1;
+ }
+ in = &file;
+ fileSize = file_size( filename );
+ }
+
+ string ns;
+
+ try {
+ ns = getNS();
+ } catch (...) {
+ printHelp(cerr);
+ return -1;
+ }
+
+ auth();
+
+ if ( hasParam( "drop" ) ){
+ cout << "dropping: " << ns << endl;
+ conn().dropCollection( ns.c_str() );
+ }
+
+ if ( hasParam( "type" ) ){
+ string type = getParam( "type" );
+ if ( type == "json" )
+ _type = JSON;
+ else if ( type == "csv" ){
+ _type = CSV;
+ _sep = ",";
+ }
+ else if ( type == "tsv" ){
+ _type = TSV;
+ _sep = "\t";
+ }
+ else {
+ cerr << "don't know what type [" << type << "] is" << endl;
+ return -1;
+ }
+ }
+
+ if ( _type == CSV || _type == TSV ){
+ if ( ! hasParam( "fields" ) ){
+ cerr << "need to speicfy fields for csv and tsv" << endl;
+ return -1;
+ }
+ needFields();
+ }
+
+ int errors = 0;
+
+ int num = 0;
+
+ time_t start = time(0);
+
+ ProgressMeter pm( fileSize );
+ const int BUF_SIZE = 1024 * 1024 * 4;
+ char line[ (1024 * 1024 * 4) + 128];
+ while ( *in ){
+ in->getline( line , BUF_SIZE );
+
+ char * buf = line;
+ while( isspace( buf[0] ) ) buf++;
+
+ int len = strlen( buf );
+ if ( ! len )
+ continue;
+
+ if ( in->rdstate() == ios_base::eofbit )
+ break;
+ assert( in->rdstate() == 0 );
+
+ try {
+ BSONObj o = parseLine( buf );
+ conn().insert( ns.c_str() , o );
+ }
+ catch ( std::exception& e ){
+ cout << "exception:" << e.what() << endl;
+ cout << buf << endl;
+ errors++;
+ }
+
+ num++;
+ if ( pm.hit( len + 1 ) ){
+ cout << "\t\t\t" << num << "\t" << ( num / ( time(0) - start ) ) << "/second" << endl;
+ }
+ }
+
+ cout << "imported " << num << " objects" << endl;
+
+ if ( errors == 0 )
+ return 0;
+
+ cerr << "encountered " << errors << " error" << ( errors == 1 ? "" : "s" ) << endl;
+ return -1;
+ }
+};
+
+int main( int argc , char ** argv ) {
+ Import import;
+ return import.main( argc , argv );
+}
diff --git a/tools/importJSON.cpp b/tools/importJSON.cpp
deleted file mode 100644
index e54c18b0fc3..00000000000
--- a/tools/importJSON.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-// importJSON.cpp
-
-/**
-* Copyright (C) 2008 10gen Inc.
-*
-* This program is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Affero General Public License, version 3,
-* as published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU Affero General Public License for more details.
-*
-* You should have received a copy of the GNU Affero General Public License
-* along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "stdafx.h"
-#include "client/dbclient.h"
-#include "db/json.h"
-
-#include "tool.h"
-
-#include <fstream>
-#include <iostream>
-
-#include <boost/program_options.hpp>
-
-using namespace mongo;
-
-namespace po = boost::program_options;
-
-class ImportJSON : public Tool {
-public:
- ImportJSON() : Tool( "importjson" ){
- add_options()
- ("file",po::value<string>() , "file to import from; if not specified stdin is used" )
- ("drop", "drop collection first " )
- ;
- addPositionArg( "file" , 1 );
- }
-
- int run(){
- string filename = getParam( "file" );
- long long fileSize = -1;
-
- istream * in = &cin;
-
- ifstream file( filename.c_str() , ios_base::in | ios_base::binary);
-
- if ( filename.size() > 0 && filename != "-" ){
- in = &file;
- fileSize = file_size( filename );
- }
-
- string ns;
-
- try {
- ns = getNS();
- } catch (...) {
- printHelp(cerr);
- return -1;
- }
-
- auth();
-
- if ( hasParam( "drop" ) ){
- cout << "dropping: " << ns << endl;
- conn().dropCollection( ns.c_str() );
- }
-
- int num = 0;
-
- time_t start = time(0);
-
- ProgressMeter pm( fileSize );
- const int BUF_SIZE = 1024 * 1024 * 4;
- char line[ (1024 * 1024 * 4) + 128];
- while ( *in ){
- in->getline( line , BUF_SIZE );
-
- char * buf = line;
- while( isspace( buf[0] ) ) buf++;
-
- int len = strlen( buf );
- if ( ! len )
- continue;
-
- if ( in->rdstate() == ios_base::eofbit )
- break;
- assert( in->rdstate() == 0 );
-
- try {
- BSONObj o = fromjson( buf );
- conn().insert( ns.c_str() , o );
- }
- catch ( MsgAssertionException& ma ){
- cout << "exception:" << ma.toString() << endl;
- cout << buf << endl;
- }
-
- num++;
- if ( pm.hit( len + 1 ) ){
- cout << "\t\t\t" << num << "\t" << ( num / ( time(0) - start ) ) << "/second" << endl;
- }
- }
-
- return 0;
- }
-};
-
-int main( int argc , char ** argv ) {
- ImportJSON import;
- return import.main( argc , argv );
-}
diff --git a/tools/tool.cpp b/tools/tool.cpp
index 8aa1cfaac98..5784099a956 100644
--- a/tools/tool.cpp
+++ b/tools/tool.cpp
@@ -5,6 +5,7 @@
#include <iostream>
#include <boost/filesystem/operations.hpp>
+#include <pcrecpp.h>
#include "util/file_allocator.h"
@@ -156,6 +157,24 @@ mongo::DBClientBase& mongo::Tool::conn( bool slaveIfPaired ){
return *_conn;
}
+void mongo::Tool::needFields(){
+ uassert( "you need to specify fields" , hasParam( "fields" ) );
+
+ BSONObjBuilder b;
+
+ string fields_arg = getParam("fields");
+ pcrecpp::StringPiece input(fields_arg);
+
+ string f;
+ pcrecpp::RE re("([\\w\\.]+),?" );
+ while ( re.Consume( &input, &f ) ){
+ _fields.push_back( f );
+ b.append( f.c_str() , 1 );
+ }
+
+ _fieldsObj = b.obj();
+}
+
void mongo::Tool::auth( string dbname ){
if ( ! dbname.size() )
dbname = _db;
diff --git a/tools/tool.h b/tools/tool.h
index a398ed5a6d8..c6d2db8780a 100644
--- a/tools/tool.h
+++ b/tools/tool.h
@@ -61,7 +61,7 @@ namespace mongo {
mongo::DBClientBase &conn( bool slaveIfPaired = false );
void auth( string db = "" );
-
+
string _name;
string _db;
@@ -70,7 +70,11 @@ namespace mongo {
string _username;
string _password;
+ void needFields();
+ vector<string> _fields;
+ BSONObj _fieldsObj;
+
private:
string _host;
mongo::DBClientBase * _conn;