summaryrefslogtreecommitdiff
path: root/tools/export.cpp
blob: 344fd3cbc2338f7c6e12a8caebcaf71ea9995be5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
// export.cpp

/**
*    Copyright (C) 2008 10gen Inc.
*
*    This program is free software: you can redistribute it and/or  modify
*    it under the terms of the GNU Affero General Public License, version 3,
*    as published by the Free Software Foundation.
*
*    This program is distributed in the hope that it will be useful,
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*    GNU Affero General Public License for more details.
*
*    You should have received a copy of the GNU Affero General Public License
*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include "pch.h"
#include "client/dbclient.h"
#include "db/json.h"

#include "tool.h"

#include <fstream>
#include <iostream>

#include <boost/program_options.hpp>

using namespace mongo;

namespace po = boost::program_options;

class Export : public Tool {
public:
    Export() : Tool( "export" ) {
        addFieldOptions();
        add_options()
        ("query,q" , po::value<string>() , "query filter, as a JSON string" )
        ("csv","export to csv instead of json")
        ("out,o", po::value<string>(), "output file; if not specified, stdout is used")
        ("jsonArray", "output to a json array rather than one object per line")
        ("slaveOk,k", po::value<bool>()->default_value(true) , "use secondaries for export if available, default true")
        ;
        _usesstdout = false;
    }

    virtual void preSetup() {
        string out = getParam("out");
        if ( out == "-" ) {
                // write output to standard error to avoid mangling output
                // must happen early to avoid sending junk to stdout
                useStandardOutput(false);
        }
    }

    virtual void printExtraHelp( ostream & out ) {
        out << "Export MongoDB data to CSV, TSV or JSON files.\n" << endl;
    }

    // Turn every double quote character into two double quote characters
    // If hasSurroundingQuotes is true, doesn't escape the first and last
    // characters of the string, if it's false, add a double quote character
    // around the whole string.
    string csvEscape(string str, bool hasSurroundingQuotes = false) {
        size_t index = hasSurroundingQuotes ? 1 : 0;
        while (((index = str.find('"', index)) != string::npos)
               && (index < (hasSurroundingQuotes ? str.size() - 1 : str.size()))) {
            str.replace(index, 1, "\"\"");
            index += 2;
        }
        return hasSurroundingQuotes ? str : "\"" + str + "\"";
    }

    // Gets the string representation of a BSON object that can be correctly written to a CSV file
    string csvString (const BSONElement& object) {
        const char* binData; // Only used with BinData type

        switch (object.type()) {
        case MinKey:
            return "$MinKey";
        case MaxKey:
            return "$MaxKey";
        case NumberInt:
        case NumberDouble:
        case NumberLong:
        case Bool:
            return object.toString(false);
        case String:
        case Symbol:
            return csvEscape(object.toString(false, true), true);
        case Object:
            return csvEscape(object.jsonString(Strict, false));
        case Array:
            return csvEscape(object.jsonString(Strict, false));
        case BinData:
            int len;
            binData = object.binDataClean(len);
            return toHex(binData, len);
        case jstOID:
            return "ObjectID(" + object.OID().toString() + ")"; // OIDs are always 24 bytes
        case Date:
            return timeToISOString(object.Date() / 1000);
        case Timestamp:
            return csvEscape(object.jsonString(Strict, false));
        case RegEx:
            return csvEscape("/" + string(object.regex()) + "/" + string(object.regexFlags()));
        case Code:
            return csvEscape(object.toString(false));
        case CodeWScope:
            if (string(object.codeWScopeScopeData()) == "") {
                return csvEscape(object.toString(false));
            } else {
                return csvEscape(object.jsonString(Strict, false));
            }
        case EOO:
        case Undefined:
        case DBRef:
        case jstNULL:
            cerr << "Invalid BSON object type for CSV output: " << object.type() << endl;
            return "";
        }
        // Can never get here
        assert(false);
        return "";
    }

    int run() {
        string ns;
        const bool csv = hasParam( "csv" );
        const bool jsonArray = hasParam( "jsonArray" );
        ostream *outPtr = &cout;
        string outfile = getParam( "out" );
        auto_ptr<ofstream> fileStream;
        if ( hasParam( "out" ) ) {
            size_t idx = outfile.rfind( "/" );
            if ( idx != string::npos ) {
                string dir = outfile.substr( 0 , idx + 1 );
                create_directories( dir );
            }
            ofstream * s = new ofstream( outfile.c_str() , ios_base::out );
            fileStream.reset( s );
            outPtr = s;
            if ( ! s->good() ) {
                cerr << "couldn't open [" << outfile << "]" << endl;
                return -1;
            }
        }
        ostream &out = *outPtr;

        BSONObj * fieldsToReturn = 0;
        BSONObj realFieldsToReturn;

        try {
            ns = getNS();
        }
        catch (...) {
            printHelp(cerr);
            return 1;
        }

        auth();

        if ( hasParam( "fields" ) || csv ) {
            needFields();
            
            // we can't use just _fieldsObj since we support everything getFieldDotted does
            
            set<string> seen;
            BSONObjBuilder b;
            
            BSONObjIterator i( _fieldsObj );
            while ( i.more() ){
                BSONElement e = i.next();
                string f = str::before( e.fieldName() , '.' );
                if ( seen.insert( f ).second )
                    b.append( f , 1 );
            }
            
            realFieldsToReturn = b.obj();
            fieldsToReturn = &realFieldsToReturn;
        }
        
        
        if ( csv && _fields.size() == 0 ) {
            cerr << "csv mode requires a field list" << endl;
            return -1;
        }

        Query q( getParam( "query" , "" ) );
        if ( q.getFilter().isEmpty() && !hasParam("dbpath"))
            q.snapshot();

        bool slaveOk = _params["slaveOk"].as<bool>();

        auto_ptr<DBClientCursor> cursor = conn().query( ns.c_str() , q , 0 , 0 , fieldsToReturn , ( slaveOk ? QueryOption_SlaveOk : 0 ) | QueryOption_NoCursorTimeout );

        if ( csv ) {
            for ( vector<string>::iterator i=_fields.begin(); i != _fields.end(); i++ ) {
                if ( i != _fields.begin() )
                    out << ",";
                out << *i;
            }
            out << endl;
        }

        if (jsonArray)
            out << '[';

        long long num = 0;
        while ( cursor->more() ) {
            num++;
            BSONObj obj = cursor->next();
            if ( csv ) {
                for ( vector<string>::iterator i=_fields.begin(); i != _fields.end(); i++ ) {
                    if ( i != _fields.begin() )
                        out << ",";
                    const BSONElement & e = obj.getFieldDotted(i->c_str());
                    if ( ! e.eoo() ) {
                        out << csvString(e);
                    }
                }
                out << endl;
            }
            else {
                if (jsonArray && num != 1)
                    out << ',';

                out << obj.jsonString();

                if (!jsonArray)
                    out << endl;
            }
        }

        if (jsonArray)
            out << ']' << endl;

        cerr << "exported " << num << " records" << endl;

        return 0;
    }
};

int main( int argc , char ** argv ) {
    Export e;
    return e.main( argc , argv );
}