diff options
Diffstat (limited to 'src/mongo/tools/export.cpp')
-rw-r--r-- | src/mongo/tools/export.cpp | 248 |
1 files changed, 248 insertions, 0 deletions
diff --git a/src/mongo/tools/export.cpp b/src/mongo/tools/export.cpp new file mode 100644 index 00000000000..0d9f0225da0 --- /dev/null +++ b/src/mongo/tools/export.cpp @@ -0,0 +1,248 @@ +// export.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "pch.h" +#include "client/dbclient.h" +#include "db/json.h" + +#include "tool.h" + +#include <fstream> +#include <iostream> + +#include <boost/program_options.hpp> + +using namespace mongo; + +namespace po = boost::program_options; + +class Export : public Tool { +public: + Export() : Tool( "export" ) { + addFieldOptions(); + add_options() + ("query,q" , po::value<string>() , "query filter, as a JSON string" ) + ("csv","export to csv instead of json") + ("out,o", po::value<string>(), "output file; if not specified, stdout is used") + ("jsonArray", "output to a json array rather than one object per line") + ("slaveOk,k", po::value<bool>()->default_value(true) , "use secondaries for export if available, default true") + ; + _usesstdout = false; + } + + virtual void preSetup() { + string out = getParam("out"); + if ( out == "-" ) { + // write output to standard error to avoid mangling output + // must happen early to avoid sending junk to stdout + useStandardOutput(false); + } + } + + virtual void printExtraHelp( ostream & out ) { + out << "Export MongoDB data to CSV, TSV or JSON files.\n" << endl; + } + + // Turn every double quote character into two double quote characters + // If hasSurroundingQuotes is true, doesn't escape the first and last + // characters of the string, if it's false, add a double quote character + // around the whole string. + string csvEscape(string str, bool hasSurroundingQuotes = false) { + size_t index = hasSurroundingQuotes ? 1 : 0; + while (((index = str.find('"', index)) != string::npos) + && (index < (hasSurroundingQuotes ? str.size() - 1 : str.size()))) { + str.replace(index, 1, "\"\""); + index += 2; + } + return hasSurroundingQuotes ? str : "\"" + str + "\""; + } + + // Gets the string representation of a BSON object that can be correctly written to a CSV file + string csvString (const BSONElement& object) { + const char* binData; // Only used with BinData type + + switch (object.type()) { + case MinKey: + return "$MinKey"; + case MaxKey: + return "$MaxKey"; + case NumberInt: + case NumberDouble: + case NumberLong: + case Bool: + return object.toString(false); + case String: + case Symbol: + return csvEscape(object.toString(false), true); + case Object: + return csvEscape(object.jsonString(Strict, false)); + case Array: + return csvEscape(object.jsonString(Strict, false)); + case BinData: + int len; + binData = object.binDataClean(len); + return toHex(binData, len); + case jstOID: + return "ObjectID(" + object.OID().toString() + ")"; // OIDs are always 24 bytes + case Date: + return timeToISOString(object.Date() / 1000); + case Timestamp: + return csvEscape(object.jsonString(Strict, false)); + case RegEx: + return csvEscape("/" + string(object.regex()) + "/" + string(object.regexFlags())); + case Code: + return csvEscape(object.toString(false)); + case CodeWScope: + if (string(object.codeWScopeScopeData()) == "") { + return csvEscape(object.toString(false)); + } else { + return csvEscape(object.jsonString(Strict, false)); + } + case EOO: + case Undefined: + case DBRef: + case jstNULL: + cerr << "Invalid BSON object type for CSV output: " << object.type() << endl; + return ""; + } + // Can never get here + assert(false); + return ""; + } + + int run() { + string ns; + const bool csv = hasParam( "csv" ); + const bool jsonArray = hasParam( "jsonArray" ); + ostream *outPtr = &cout; + string outfile = getParam( "out" ); + auto_ptr<ofstream> fileStream; + if ( hasParam( "out" ) ) { + size_t idx = outfile.rfind( "/" ); + if ( idx != string::npos ) { + string dir = outfile.substr( 0 , idx + 1 ); + create_directories( dir ); + } + ofstream * s = new ofstream( outfile.c_str() , ios_base::out ); + fileStream.reset( s ); + outPtr = s; + if ( ! s->good() ) { + cerr << "couldn't open [" << outfile << "]" << endl; + return -1; + } + } + ostream &out = *outPtr; + + BSONObj * fieldsToReturn = 0; + BSONObj realFieldsToReturn; + + try { + ns = getNS(); + } + catch (...) { + printHelp(cerr); + return 1; + } + + auth(); + + if ( hasParam( "fields" ) || csv ) { + needFields(); + + // we can't use just _fieldsObj since we support everything getFieldDotted does + + set<string> seen; + BSONObjBuilder b; + + BSONObjIterator i( _fieldsObj ); + while ( i.more() ){ + BSONElement e = i.next(); + string f = str::before( e.fieldName() , '.' ); + if ( seen.insert( f ).second ) + b.append( f , 1 ); + } + + realFieldsToReturn = b.obj(); + fieldsToReturn = &realFieldsToReturn; + } + + + if ( csv && _fields.size() == 0 ) { + cerr << "csv mode requires a field list" << endl; + return -1; + } + + Query q( getParam( "query" , "" ) ); + if ( q.getFilter().isEmpty() && !hasParam("dbpath")) + q.snapshot(); + + bool slaveOk = _params["slaveOk"].as<bool>(); + + auto_ptr<DBClientCursor> cursor = conn().query( ns.c_str() , q , 0 , 0 , fieldsToReturn , ( slaveOk ? QueryOption_SlaveOk : 0 ) | QueryOption_NoCursorTimeout ); + + if ( csv ) { + for ( vector<string>::iterator i=_fields.begin(); i != _fields.end(); i++ ) { + if ( i != _fields.begin() ) + out << ","; + out << *i; + } + out << endl; + } + + if (jsonArray) + out << '['; + + long long num = 0; + while ( cursor->more() ) { + num++; + BSONObj obj = cursor->next(); + if ( csv ) { + for ( vector<string>::iterator i=_fields.begin(); i != _fields.end(); i++ ) { + if ( i != _fields.begin() ) + out << ","; + const BSONElement & e = obj.getFieldDotted(i->c_str()); + if ( ! e.eoo() ) { + out << csvString(e); + } + } + out << endl; + } + else { + if (jsonArray && num != 1) + out << ','; + + out << obj.jsonString(); + + if (!jsonArray) + out << endl; + } + } + + if (jsonArray) + out << ']' << endl; + + cerr << "exported " << num << " records" << endl; + + return 0; + } +}; + +int main( int argc , char ** argv ) { + Export e; + return e.main( argc , argv ); +} |