summaryrefslogtreecommitdiff
path: root/src/mongo/tools/export.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/tools/export.cpp')
-rw-r--r--src/mongo/tools/export.cpp248
1 files changed, 248 insertions, 0 deletions
diff --git a/src/mongo/tools/export.cpp b/src/mongo/tools/export.cpp
new file mode 100644
index 00000000000..0d9f0225da0
--- /dev/null
+++ b/src/mongo/tools/export.cpp
@@ -0,0 +1,248 @@
+// export.cpp
+
+/**
+* Copyright (C) 2008 10gen Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "pch.h"
+#include "client/dbclient.h"
+#include "db/json.h"
+
+#include "tool.h"
+
+#include <fstream>
+#include <iostream>
+
+#include <boost/program_options.hpp>
+
+using namespace mongo;
+
+namespace po = boost::program_options;
+
+class Export : public Tool {
+public:
+ Export() : Tool( "export" ) {
+ addFieldOptions();
+ add_options()
+ ("query,q" , po::value<string>() , "query filter, as a JSON string" )
+ ("csv","export to csv instead of json")
+ ("out,o", po::value<string>(), "output file; if not specified, stdout is used")
+ ("jsonArray", "output to a json array rather than one object per line")
+ ("slaveOk,k", po::value<bool>()->default_value(true) , "use secondaries for export if available, default true")
+ ;
+ _usesstdout = false;
+ }
+
+ virtual void preSetup() {
+ string out = getParam("out");
+ if ( out == "-" ) {
+ // write output to standard error to avoid mangling output
+ // must happen early to avoid sending junk to stdout
+ useStandardOutput(false);
+ }
+ }
+
+ virtual void printExtraHelp( ostream & out ) {
+ out << "Export MongoDB data to CSV, TSV or JSON files.\n" << endl;
+ }
+
+ // Turn every double quote character into two double quote characters
+ // If hasSurroundingQuotes is true, doesn't escape the first and last
+ // characters of the string, if it's false, add a double quote character
+ // around the whole string.
+ string csvEscape(string str, bool hasSurroundingQuotes = false) {
+ size_t index = hasSurroundingQuotes ? 1 : 0;
+ while (((index = str.find('"', index)) != string::npos)
+ && (index < (hasSurroundingQuotes ? str.size() - 1 : str.size()))) {
+ str.replace(index, 1, "\"\"");
+ index += 2;
+ }
+ return hasSurroundingQuotes ? str : "\"" + str + "\"";
+ }
+
+ // Gets the string representation of a BSON object that can be correctly written to a CSV file
+ string csvString (const BSONElement& object) {
+ const char* binData; // Only used with BinData type
+
+ switch (object.type()) {
+ case MinKey:
+ return "$MinKey";
+ case MaxKey:
+ return "$MaxKey";
+ case NumberInt:
+ case NumberDouble:
+ case NumberLong:
+ case Bool:
+ return object.toString(false);
+ case String:
+ case Symbol:
+ return csvEscape(object.toString(false), true);
+ case Object:
+ return csvEscape(object.jsonString(Strict, false));
+ case Array:
+ return csvEscape(object.jsonString(Strict, false));
+ case BinData:
+ int len;
+ binData = object.binDataClean(len);
+ return toHex(binData, len);
+ case jstOID:
+ return "ObjectID(" + object.OID().toString() + ")"; // OIDs are always 24 bytes
+ case Date:
+ return timeToISOString(object.Date() / 1000);
+ case Timestamp:
+ return csvEscape(object.jsonString(Strict, false));
+ case RegEx:
+ return csvEscape("/" + string(object.regex()) + "/" + string(object.regexFlags()));
+ case Code:
+ return csvEscape(object.toString(false));
+ case CodeWScope:
+ if (string(object.codeWScopeScopeData()) == "") {
+ return csvEscape(object.toString(false));
+ } else {
+ return csvEscape(object.jsonString(Strict, false));
+ }
+ case EOO:
+ case Undefined:
+ case DBRef:
+ case jstNULL:
+ cerr << "Invalid BSON object type for CSV output: " << object.type() << endl;
+ return "";
+ }
+ // Can never get here
+ assert(false);
+ return "";
+ }
+
+ int run() {
+ string ns;
+ const bool csv = hasParam( "csv" );
+ const bool jsonArray = hasParam( "jsonArray" );
+ ostream *outPtr = &cout;
+ string outfile = getParam( "out" );
+ auto_ptr<ofstream> fileStream;
+ if ( hasParam( "out" ) ) {
+ size_t idx = outfile.rfind( "/" );
+ if ( idx != string::npos ) {
+ string dir = outfile.substr( 0 , idx + 1 );
+ create_directories( dir );
+ }
+ ofstream * s = new ofstream( outfile.c_str() , ios_base::out );
+ fileStream.reset( s );
+ outPtr = s;
+ if ( ! s->good() ) {
+ cerr << "couldn't open [" << outfile << "]" << endl;
+ return -1;
+ }
+ }
+ ostream &out = *outPtr;
+
+ BSONObj * fieldsToReturn = 0;
+ BSONObj realFieldsToReturn;
+
+ try {
+ ns = getNS();
+ }
+ catch (...) {
+ printHelp(cerr);
+ return 1;
+ }
+
+ auth();
+
+ if ( hasParam( "fields" ) || csv ) {
+ needFields();
+
+ // we can't use just _fieldsObj since we support everything getFieldDotted does
+
+ set<string> seen;
+ BSONObjBuilder b;
+
+ BSONObjIterator i( _fieldsObj );
+ while ( i.more() ){
+ BSONElement e = i.next();
+ string f = str::before( e.fieldName() , '.' );
+ if ( seen.insert( f ).second )
+ b.append( f , 1 );
+ }
+
+ realFieldsToReturn = b.obj();
+ fieldsToReturn = &realFieldsToReturn;
+ }
+
+
+ if ( csv && _fields.size() == 0 ) {
+ cerr << "csv mode requires a field list" << endl;
+ return -1;
+ }
+
+ Query q( getParam( "query" , "" ) );
+ if ( q.getFilter().isEmpty() && !hasParam("dbpath"))
+ q.snapshot();
+
+ bool slaveOk = _params["slaveOk"].as<bool>();
+
+ auto_ptr<DBClientCursor> cursor = conn().query( ns.c_str() , q , 0 , 0 , fieldsToReturn , ( slaveOk ? QueryOption_SlaveOk : 0 ) | QueryOption_NoCursorTimeout );
+
+ if ( csv ) {
+ for ( vector<string>::iterator i=_fields.begin(); i != _fields.end(); i++ ) {
+ if ( i != _fields.begin() )
+ out << ",";
+ out << *i;
+ }
+ out << endl;
+ }
+
+ if (jsonArray)
+ out << '[';
+
+ long long num = 0;
+ while ( cursor->more() ) {
+ num++;
+ BSONObj obj = cursor->next();
+ if ( csv ) {
+ for ( vector<string>::iterator i=_fields.begin(); i != _fields.end(); i++ ) {
+ if ( i != _fields.begin() )
+ out << ",";
+ const BSONElement & e = obj.getFieldDotted(i->c_str());
+ if ( ! e.eoo() ) {
+ out << csvString(e);
+ }
+ }
+ out << endl;
+ }
+ else {
+ if (jsonArray && num != 1)
+ out << ',';
+
+ out << obj.jsonString();
+
+ if (!jsonArray)
+ out << endl;
+ }
+ }
+
+ if (jsonArray)
+ out << ']' << endl;
+
+ cerr << "exported " << num << " records" << endl;
+
+ return 0;
+ }
+};
+
+int main( int argc , char ** argv ) {
+ Export e;
+ return e.main( argc , argv );
+}