summaryrefslogtreecommitdiff
path: root/src/mongo/gotools/mongoexport
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/gotools/mongoexport')
-rw-r--r--src/mongo/gotools/mongoexport/csv.go148
-rw-r--r--src/mongo/gotools/mongoexport/csv_test.go139
-rw-r--r--src/mongo/gotools/mongoexport/json.go109
-rw-r--r--src/mongo/gotools/mongoexport/json_test.go73
-rw-r--r--src/mongo/gotools/mongoexport/kerberos_test.go44
-rw-r--r--src/mongo/gotools/mongoexport/main/mongoexport.go143
-rw-r--r--src/mongo/gotools/mongoexport/mongoexport.go425
-rw-r--r--src/mongo/gotools/mongoexport/mongoexport_test.go44
-rw-r--r--src/mongo/gotools/mongoexport/options.go79
9 files changed, 1204 insertions, 0 deletions
diff --git a/src/mongo/gotools/mongoexport/csv.go b/src/mongo/gotools/mongoexport/csv.go
new file mode 100644
index 00000000000..165cb560cce
--- /dev/null
+++ b/src/mongo/gotools/mongoexport/csv.go
@@ -0,0 +1,148 @@
+package mongoexport
+
+import (
+ "encoding/csv"
+ "fmt"
+ "github.com/mongodb/mongo-tools/common/bsonutil"
+ "github.com/mongodb/mongo-tools/common/json"
+ "gopkg.in/mgo.v2/bson"
+ "io"
+ "reflect"
+ "strconv"
+ "strings"
+)
+
+// type for reflect code
+var marshalDType = reflect.TypeOf(bsonutil.MarshalD{})
+
+// CSVExportOutput is an implementation of ExportOutput that writes documents to the output in CSV format.
+type CSVExportOutput struct {
+ // Fields is a list of field names in the bson documents to be exported.
+ // A field can also use dot-delimited modifiers to address nested structures,
+ // for example "location.city" or "addresses.0".
+ Fields []string
+
+ // NumExported maintains a running total of the number of documents written.
+ NumExported int64
+
+ // NoHeaderLine, if set, will export CSV data without a list of field names at the first line
+ NoHeaderLine bool
+
+ csvWriter *csv.Writer
+}
+
+// NewCSVExportOutput returns a CSVExportOutput configured to write output to the
+// given io.Writer, extracting the specified fields only.
+func NewCSVExportOutput(fields []string, noHeaderLine bool, out io.Writer) *CSVExportOutput {
+ return &CSVExportOutput{
+ fields,
+ 0,
+ noHeaderLine,
+ csv.NewWriter(out),
+ }
+}
+
+// WriteHeader writes a comma-delimited list of fields as the output header row.
+func (csvExporter *CSVExportOutput) WriteHeader() error {
+ if !csvExporter.NoHeaderLine {
+ csvExporter.csvWriter.Write(csvExporter.Fields)
+ return csvExporter.csvWriter.Error()
+ }
+ return nil
+}
+
+// WriteFooter is a no-op for CSV export formats.
+func (csvExporter *CSVExportOutput) WriteFooter() error {
+ // no CSV footer
+ return nil
+}
+
+// Flush writes any pending data to the underlying I/O stream.
+func (csvExporter *CSVExportOutput) Flush() error {
+ csvExporter.csvWriter.Flush()
+ return csvExporter.csvWriter.Error()
+}
+
+// ExportDocument writes a line to output with the CSV representation of a document.
+func (csvExporter *CSVExportOutput) ExportDocument(document bson.D) error {
+ rowOut := make([]string, 0, len(csvExporter.Fields))
+ extendedDoc, err := bsonutil.ConvertBSONValueToJSON(document)
+ if err != nil {
+ return err
+ }
+
+ for _, fieldName := range csvExporter.Fields {
+ fieldVal := extractFieldByName(fieldName, extendedDoc)
+ if fieldVal == nil {
+ rowOut = append(rowOut, "")
+ } else if reflect.TypeOf(fieldVal) == reflect.TypeOf(bson.M{}) ||
+ reflect.TypeOf(fieldVal) == reflect.TypeOf(bson.D{}) ||
+ reflect.TypeOf(fieldVal) == marshalDType ||
+ reflect.TypeOf(fieldVal) == reflect.TypeOf([]interface{}{}) {
+ buf, err := json.Marshal(fieldVal)
+ if err != nil {
+ rowOut = append(rowOut, "")
+ } else {
+ rowOut = append(rowOut, string(buf))
+ }
+ } else {
+ rowOut = append(rowOut, fmt.Sprintf("%v", fieldVal))
+ }
+ }
+ csvExporter.csvWriter.Write(rowOut)
+ csvExporter.NumExported++
+ return csvExporter.csvWriter.Error()
+}
+
+// extractFieldByName takes a field name and document, and returns a value representing
+// the value of that field in the document in a format that can be printed as a string.
+// It will also handle dot-delimited field names for nested arrays or documents.
+func extractFieldByName(fieldName string, document interface{}) interface{} {
+ dotParts := strings.Split(fieldName, ".")
+ var subdoc interface{} = document
+
+ for _, path := range dotParts {
+ docValue := reflect.ValueOf(subdoc)
+ if !docValue.IsValid() {
+ return ""
+ }
+ docType := docValue.Type()
+ docKind := docType.Kind()
+ if docKind == reflect.Map {
+ subdocVal := docValue.MapIndex(reflect.ValueOf(path))
+ if subdocVal.Kind() == reflect.Invalid {
+ return ""
+ }
+ subdoc = subdocVal.Interface()
+ } else if docKind == reflect.Slice {
+ if docType == marshalDType {
+ // dive into a D as a document
+ asD := bson.D(subdoc.(bsonutil.MarshalD))
+ var err error
+ subdoc, err = bsonutil.FindValueByKey(path, &asD)
+ if err != nil {
+ return ""
+ }
+ } else {
+ // check that the path can be converted to int
+ arrayIndex, err := strconv.Atoi(path)
+ if err != nil {
+ return ""
+ }
+ // bounds check for slice
+ if arrayIndex < 0 || arrayIndex >= docValue.Len() {
+ return ""
+ }
+ subdocVal := docValue.Index(arrayIndex)
+ if subdocVal.Kind() == reflect.Invalid {
+ return ""
+ }
+ subdoc = subdocVal.Interface()
+ }
+ } else {
+ // trying to index into a non-compound type - just return blank.
+ return ""
+ }
+ }
+ return subdoc
+}
diff --git a/src/mongo/gotools/mongoexport/csv_test.go b/src/mongo/gotools/mongoexport/csv_test.go
new file mode 100644
index 00000000000..cdf65814e72
--- /dev/null
+++ b/src/mongo/gotools/mongoexport/csv_test.go
@@ -0,0 +1,139 @@
+package mongoexport
+
+import (
+ "bytes"
+ "encoding/csv"
+ "github.com/mongodb/mongo-tools/common/bsonutil"
+ "github.com/mongodb/mongo-tools/common/testutil"
+ . "github.com/smartystreets/goconvey/convey"
+ "gopkg.in/mgo.v2/bson"
+ "strings"
+ "testing"
+)
+
+func TestWriteCSV(t *testing.T) {
+ testutil.VerifyTestType(t, testutil.UnitTestType)
+
+ Convey("With a CSV export output", t, func() {
+ fields := []string{"_id", "x", " y", "z.1.a"}
+ out := &bytes.Buffer{}
+
+ Convey("Headers should be written correctly", func() {
+ csvExporter := NewCSVExportOutput(fields, false, out)
+ err := csvExporter.WriteHeader()
+ So(err, ShouldBeNil)
+ csvExporter.ExportDocument(bson.D{{"_id", "12345"}})
+ csvExporter.WriteFooter()
+ csvExporter.Flush()
+ rec, err := csv.NewReader(strings.NewReader(out.String())).Read()
+ So(err, ShouldBeNil)
+ So(rec, ShouldResemble, []string{"_id", "x", " y", "z.1.a"})
+ })
+
+ Convey("Headers should not be written", func() {
+ csvExporter := NewCSVExportOutput(fields, true, out)
+ err := csvExporter.WriteHeader()
+ So(err, ShouldBeNil)
+ csvExporter.ExportDocument(bson.D{{"_id", "12345"}})
+ csvExporter.WriteFooter()
+ csvExporter.Flush()
+ rec, err := csv.NewReader(strings.NewReader(out.String())).Read()
+ So(err, ShouldBeNil)
+ So(rec, ShouldResemble, []string{"12345", "", "", ""})
+ })
+
+ Convey("Exported document with missing fields should print as blank", func() {
+ csvExporter := NewCSVExportOutput(fields, true, out)
+ csvExporter.ExportDocument(bson.D{{"_id", "12345"}})
+ csvExporter.WriteFooter()
+ csvExporter.Flush()
+ rec, err := csv.NewReader(strings.NewReader(out.String())).Read()
+ So(err, ShouldBeNil)
+ So(rec, ShouldResemble, []string{"12345", "", "", ""})
+ })
+
+ Convey("Exported document with index into nested objects should print correctly", func() {
+ csvExporter := NewCSVExportOutput(fields, true, out)
+ z := []interface{}{"x", bson.D{{"a", "T"}, {"B", 1}}}
+ csvExporter.ExportDocument(bson.D{{Name: "z", Value: z}})
+ csvExporter.WriteFooter()
+ csvExporter.Flush()
+ rec, err := csv.NewReader(strings.NewReader(out.String())).Read()
+ So(err, ShouldBeNil)
+ So(rec, ShouldResemble, []string{"", "", "", "T"})
+ })
+
+ Reset(func() {
+ out.Reset()
+ })
+
+ })
+}
+
+func TestExtractDField(t *testing.T) {
+ Convey("With a test bson.D", t, func() {
+ b := []interface{}{"inner", bsonutil.MarshalD{{"inner2", 1}}}
+ c := bsonutil.MarshalD{{"x", 5}}
+ d := bsonutil.MarshalD{{"z", nil}}
+ testD := bsonutil.MarshalD{
+ {"a", "string"},
+ {"b", b},
+ {"c", c},
+ {"d", d},
+ }
+
+ Convey("regular fields should be extracted by name", func() {
+ val := extractFieldByName("a", testD)
+ So(val, ShouldEqual, "string")
+ })
+
+ Convey("array fields should be extracted by name", func() {
+ val := extractFieldByName("b.1", testD)
+ So(val, ShouldResemble, bsonutil.MarshalD{{"inner2", 1}})
+ val = extractFieldByName("b.1.inner2", testD)
+ So(val, ShouldEqual, 1)
+ val = extractFieldByName("b.0", testD)
+ So(val, ShouldEqual, "inner")
+ })
+
+ Convey("subdocument fields should be extracted by name", func() {
+ val := extractFieldByName("c", testD)
+ So(val, ShouldResemble, bsonutil.MarshalD{{"x", 5}})
+ val = extractFieldByName("c.x", testD)
+ So(val, ShouldEqual, 5)
+
+ Convey("even if they contain null values", func() {
+ val := extractFieldByName("d", testD)
+ So(val, ShouldResemble, bsonutil.MarshalD{{"z", nil}})
+ val = extractFieldByName("d.z", testD)
+ So(val, ShouldEqual, nil)
+ val = extractFieldByName("d.z.nope", testD)
+ So(val, ShouldEqual, "")
+ })
+ })
+
+ Convey(`non-existing fields should return ""`, func() {
+ val := extractFieldByName("f", testD)
+ So(val, ShouldEqual, "")
+ val = extractFieldByName("c.nope", testD)
+ So(val, ShouldEqual, "")
+ val = extractFieldByName("c.nope.NOPE", testD)
+ So(val, ShouldEqual, "")
+ val = extractFieldByName("b.1000", testD)
+ So(val, ShouldEqual, "")
+ val = extractFieldByName("b.1.nada", testD)
+ So(val, ShouldEqual, "")
+ })
+
+ })
+
+ Convey(`Extraction of a non-document should return ""`, t, func() {
+ val := extractFieldByName("meh", []interface{}{"meh"})
+ So(val, ShouldEqual, "")
+ })
+
+ Convey(`Extraction of a nil document should return ""`, t, func() {
+ val := extractFieldByName("a", nil)
+ So(val, ShouldEqual, "")
+ })
+}
diff --git a/src/mongo/gotools/mongoexport/json.go b/src/mongo/gotools/mongoexport/json.go
new file mode 100644
index 00000000000..7697b0dc264
--- /dev/null
+++ b/src/mongo/gotools/mongoexport/json.go
@@ -0,0 +1,109 @@
+package mongoexport
+
+import (
+ "bytes"
+ "fmt"
+ "github.com/mongodb/mongo-tools/common/bsonutil"
+ "github.com/mongodb/mongo-tools/common/json"
+ "gopkg.in/mgo.v2/bson"
+ "io"
+)
+
+// JSONExportOutput is an implementation of ExportOutput that writes documents
+// to the output in JSON format.
+type JSONExportOutput struct {
+ // ArrayOutput when set to true indicates that the output should be written
+ // as a JSON array, where each document is an element in the array.
+ ArrayOutput bool
+ // Pretty when set to true indicates that the output will be written in pretty mode.
+ PrettyOutput bool
+ Encoder *json.Encoder
+ Out io.Writer
+ NumExported int64
+}
+
+// NewJSONExportOutput creates a new JSONExportOutput in array mode if specified,
+// configured to write data to the given io.Writer.
+func NewJSONExportOutput(arrayOutput bool, prettyOutput bool, out io.Writer) *JSONExportOutput {
+ return &JSONExportOutput{
+ arrayOutput,
+ prettyOutput,
+ json.NewEncoder(out),
+ out,
+ 0,
+ }
+}
+
+// WriteHeader writes the opening square bracket if in array mode, otherwise it
+// behaves as a no-op.
+func (jsonExporter *JSONExportOutput) WriteHeader() error {
+ if jsonExporter.ArrayOutput {
+ // TODO check # bytes written?
+ _, err := jsonExporter.Out.Write([]byte{json.ArrayStart})
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// WriteFooter writes the closing square bracket if in array mode, otherwise it
+// behaves as a no-op.
+func (jsonExporter *JSONExportOutput) WriteFooter() error {
+ if jsonExporter.ArrayOutput {
+ _, err := jsonExporter.Out.Write([]byte{json.ArrayEnd, '\n'})
+ // TODO check # bytes written?
+ if err != nil {
+ return err
+ }
+ }
+ if jsonExporter.PrettyOutput {
+ jsonExporter.Out.Write([]byte("\n"))
+ }
+ return nil
+}
+
+// Flush is a no-op for JSON export formats.
+func (jsonExporter *JSONExportOutput) Flush() error {
+ return nil
+}
+
+// ExportDocument converts the given document to extended JSON, and writes it
+// to the output.
+func (jsonExporter *JSONExportOutput) ExportDocument(document bson.D) error {
+ if jsonExporter.ArrayOutput || jsonExporter.PrettyOutput {
+ if jsonExporter.NumExported >= 1 {
+ if jsonExporter.ArrayOutput {
+ jsonExporter.Out.Write([]byte(","))
+ }
+ if jsonExporter.PrettyOutput {
+ jsonExporter.Out.Write([]byte("\n"))
+ }
+ }
+ extendedDoc, err := bsonutil.ConvertBSONValueToJSON(document)
+ if err != nil {
+ return err
+ }
+ jsonOut, err := json.Marshal(extendedDoc)
+ if err != nil {
+ return fmt.Errorf("error converting BSON to extended JSON: %v", err)
+ }
+ if jsonExporter.PrettyOutput {
+ var jsonFormatted bytes.Buffer
+ json.Indent(&jsonFormatted, jsonOut, "", "\t")
+ jsonOut = jsonFormatted.Bytes()
+ }
+ jsonExporter.Out.Write(jsonOut)
+ } else {
+ extendedDoc, err := bsonutil.ConvertBSONValueToJSON(document)
+ if err != nil {
+ return err
+ }
+ err = jsonExporter.Encoder.Encode(extendedDoc)
+ if err != nil {
+ return err
+ }
+ }
+ jsonExporter.NumExported++
+ return nil
+}
diff --git a/src/mongo/gotools/mongoexport/json_test.go b/src/mongo/gotools/mongoexport/json_test.go
new file mode 100644
index 00000000000..ff988f971c0
--- /dev/null
+++ b/src/mongo/gotools/mongoexport/json_test.go
@@ -0,0 +1,73 @@
+package mongoexport
+
+import (
+ "bytes"
+ "github.com/mongodb/mongo-tools/common/json"
+ "github.com/mongodb/mongo-tools/common/testutil"
+ . "github.com/smartystreets/goconvey/convey"
+ "gopkg.in/mgo.v2/bson"
+ "testing"
+)
+
+func TestWriteJSON(t *testing.T) {
+ testutil.VerifyTestType(t, testutil.UnitTestType)
+
+ Convey("With a JSON export output", t, func() {
+ out := &bytes.Buffer{}
+
+ Convey("Special types should serialize as extended JSON", func() {
+
+ Convey("ObjectId should have an extended JSON format", func() {
+ jsonExporter := NewJSONExportOutput(false, false, out)
+ objId := bson.NewObjectId()
+ err := jsonExporter.WriteHeader()
+ So(err, ShouldBeNil)
+ err = jsonExporter.ExportDocument(bson.D{{"_id", objId}})
+ So(err, ShouldBeNil)
+ err = jsonExporter.WriteFooter()
+ So(err, ShouldBeNil)
+ So(out.String(), ShouldEqual, `{"_id":{"$oid":"`+objId.Hex()+`"}}`+"\n")
+ })
+
+ Reset(func() {
+ out.Reset()
+ })
+ })
+
+ })
+}
+
+func TestJSONArray(t *testing.T) {
+ testutil.VerifyTestType(t, testutil.UnitTestType)
+
+ Convey("With a JSON export output in array mode", t, func() {
+ out := &bytes.Buffer{}
+ Convey("exporting a bunch of documents should produce valid json", func() {
+ jsonExporter := NewJSONExportOutput(true, false, out)
+ err := jsonExporter.WriteHeader()
+ So(err, ShouldBeNil)
+
+ // Export a few docs of various types
+
+ testObjs := []interface{}{bson.NewObjectId(), "asd", 12345, 3.14159, bson.D{{"A", 1}}}
+ for _, obj := range testObjs {
+ err = jsonExporter.ExportDocument(bson.D{{"_id", obj}})
+ So(err, ShouldBeNil)
+ }
+
+ err = jsonExporter.WriteFooter()
+ So(err, ShouldBeNil)
+ // Unmarshal the whole thing, it should be valid json
+ fromJSON := []map[string]interface{}{}
+ err = json.Unmarshal(out.Bytes(), &fromJSON)
+ So(err, ShouldBeNil)
+ So(len(fromJSON), ShouldEqual, len(testObjs))
+
+ })
+
+ Reset(func() {
+ out.Reset()
+ })
+
+ })
+}
diff --git a/src/mongo/gotools/mongoexport/kerberos_test.go b/src/mongo/gotools/mongoexport/kerberos_test.go
new file mode 100644
index 00000000000..06c7ecb0b6e
--- /dev/null
+++ b/src/mongo/gotools/mongoexport/kerberos_test.go
@@ -0,0 +1,44 @@
+package mongoexport
+
+import (
+ "bytes"
+ "github.com/mongodb/mongo-tools/common/db"
+ "github.com/mongodb/mongo-tools/common/json"
+ "github.com/mongodb/mongo-tools/common/testutil"
+ . "github.com/smartystreets/goconvey/convey"
+ "strings"
+ "testing"
+)
+
+func TestKerberos(t *testing.T) {
+ testutil.VerifyTestType(t, testutil.KerberosTestType)
+
+ Convey("Should be able to run mongoexport with Kerberos auth", t, func() {
+ opts, err := testutil.GetKerberosOptions()
+
+ So(err, ShouldBeNil)
+
+ sessionProvider, err := db.NewSessionProvider(*opts)
+ So(err, ShouldBeNil)
+
+ export := MongoExport{
+ ToolOptions: *opts,
+ OutputOpts: &OutputFormatOptions{},
+ InputOpts: &InputOptions{},
+ SessionProvider: sessionProvider,
+ }
+
+ var out bytes.Buffer
+ num, err := export.exportInternal(&out)
+
+ So(err, ShouldBeNil)
+ So(num, ShouldEqual, 1)
+ outputLines := strings.Split(strings.TrimSpace(out.String()), "\n")
+ So(len(outputLines), ShouldEqual, 1)
+ outMap := map[string]interface{}{}
+ So(json.Unmarshal([]byte(outputLines[0]), &outMap), ShouldBeNil)
+ So(outMap["kerberos"], ShouldEqual, true)
+ So(outMap["authenticated"], ShouldEqual, "yeah")
+ So(outMap["_id"].(map[string]interface{})["$oid"], ShouldEqual, "528fb35afb3a8030e2f643c3")
+ })
+}
diff --git a/src/mongo/gotools/mongoexport/main/mongoexport.go b/src/mongo/gotools/mongoexport/main/mongoexport.go
new file mode 100644
index 00000000000..c18451f03df
--- /dev/null
+++ b/src/mongo/gotools/mongoexport/main/mongoexport.go
@@ -0,0 +1,143 @@
+// Main package for the mongoexport tool.
+package main
+
+import (
+ "github.com/mongodb/mongo-tools/common/db"
+ "github.com/mongodb/mongo-tools/common/log"
+ "github.com/mongodb/mongo-tools/common/options"
+ "github.com/mongodb/mongo-tools/common/signals"
+ "github.com/mongodb/mongo-tools/common/util"
+ "github.com/mongodb/mongo-tools/mongoexport"
+ "gopkg.in/mgo.v2"
+ "gopkg.in/mgo.v2/bson"
+ "os"
+)
+
+func main() {
+ // initialize command-line opts
+ opts := options.New("mongoexport", mongoexport.Usage,
+ options.EnabledOptions{Auth: true, Connection: true, Namespace: true})
+
+ outputOpts := &mongoexport.OutputFormatOptions{}
+ opts.AddOptions(outputOpts)
+ inputOpts := &mongoexport.InputOptions{}
+ opts.AddOptions(inputOpts)
+
+ args, err := opts.Parse()
+ if err != nil {
+ log.Logvf(log.Always, "error parsing command line options: %v", err)
+ log.Logvf(log.Always, "try 'mongoexport --help' for more information")
+ os.Exit(util.ExitBadOptions)
+ }
+ if len(args) != 0 {
+ log.Logvf(log.Always, "too many positional arguments: %v", args)
+ log.Logvf(log.Always, "try 'mongoexport --help' for more information")
+ os.Exit(util.ExitBadOptions)
+ }
+
+ log.SetVerbosity(opts.Verbosity)
+ signals.Handle()
+
+ // print help, if specified
+ if opts.PrintHelp(false) {
+ return
+ }
+
+ // print version, if specified
+ if opts.PrintVersion() {
+ return
+ }
+
+ // connect directly, unless a replica set name is explicitly specified
+ _, setName := util.ParseConnectionString(opts.Host)
+ opts.Direct = (setName == "")
+ opts.ReplicaSetName = setName
+
+ provider, err := db.NewSessionProvider(*opts)
+
+ // temporarily allow secondary reads for the isMongos check
+ provider.SetReadPreference(mgo.Nearest)
+ isMongos, err := provider.IsMongos()
+ if err != nil {
+ log.Logvf(log.Always, "%v", err)
+ os.Exit(util.ExitError)
+ }
+
+ provider.SetFlags(db.DisableSocketTimeout)
+
+ if inputOpts.SlaveOk {
+ if inputOpts.ReadPreference != "" {
+ log.Logvf(log.Always, "--slaveOk can't be specified when --readPreference is specified")
+ os.Exit(util.ExitBadOptions)
+ }
+ log.Logvf(log.Always, "--slaveOk is deprecated and being internally rewritten as --readPreference=nearest")
+ inputOpts.ReadPreference = "nearest"
+ }
+
+ var mode mgo.Mode
+ if opts.ReplicaSetName != "" || isMongos {
+ mode = mgo.Primary
+ } else {
+ mode = mgo.Nearest
+ }
+ var tags bson.D
+ if inputOpts.ReadPreference != "" {
+ mode, tags, err = db.ParseReadPreference(inputOpts.ReadPreference)
+ if err != nil {
+ log.Logvf(log.Always, "error parsing --ReadPreference: %v", err)
+ os.Exit(util.ExitBadOptions)
+ }
+ if len(tags) > 0 {
+ provider.SetTags(tags)
+ }
+ }
+
+ // warn if we are trying to export from a secondary in a sharded cluster
+ if isMongos && mode != mgo.Primary {
+ log.Logvf(log.Always, db.WarningNonPrimaryMongosConnection)
+ }
+
+ provider.SetReadPreference(mode)
+
+ if err != nil {
+ log.Logvf(log.Always, "error connecting to host: %v", err)
+ os.Exit(util.ExitError)
+ }
+ exporter := mongoexport.MongoExport{
+ ToolOptions: *opts,
+ OutputOpts: outputOpts,
+ InputOpts: inputOpts,
+ SessionProvider: provider,
+ }
+
+ err = exporter.ValidateSettings()
+ if err != nil {
+ log.Logvf(log.Always, "error validating settings: %v", err)
+ log.Logvf(log.Always, "try 'mongoexport --help' for more information")
+ os.Exit(util.ExitBadOptions)
+ }
+
+ writer, err := exporter.GetOutputWriter()
+ if err != nil {
+ log.Logvf(log.Always, "error opening output stream: %v", err)
+ os.Exit(util.ExitError)
+ }
+ if writer == nil {
+ writer = os.Stdout
+ } else {
+ defer writer.Close()
+ }
+
+ numDocs, err := exporter.Export(writer)
+ if err != nil {
+ log.Logvf(log.Always, "Failed: %v", err)
+ os.Exit(util.ExitError)
+ }
+
+ if numDocs == 1 {
+ log.Logvf(log.Always, "exported %v record", numDocs)
+ } else {
+ log.Logvf(log.Always, "exported %v records", numDocs)
+ }
+
+}
diff --git a/src/mongo/gotools/mongoexport/mongoexport.go b/src/mongo/gotools/mongoexport/mongoexport.go
new file mode 100644
index 00000000000..995e252f4df
--- /dev/null
+++ b/src/mongo/gotools/mongoexport/mongoexport.go
@@ -0,0 +1,425 @@
+// Package mongoexport produces a JSON or CSV export of data stored in a MongoDB instance.
+package mongoexport
+
+import (
+ "fmt"
+ "github.com/mongodb/mongo-tools/common/bsonutil"
+ "github.com/mongodb/mongo-tools/common/db"
+ "github.com/mongodb/mongo-tools/common/json"
+ "github.com/mongodb/mongo-tools/common/log"
+ "github.com/mongodb/mongo-tools/common/options"
+ "github.com/mongodb/mongo-tools/common/progress"
+ "github.com/mongodb/mongo-tools/common/util"
+ "gopkg.in/mgo.v2"
+ "gopkg.in/mgo.v2/bson"
+ "io"
+ "os"
+ "path/filepath"
+ "strings"
+ "time"
+)
+
+// Output types supported by mongoexport.
+const (
+ CSV = "csv"
+ JSON = "json"
+ progressBarLength = 24
+ progressBarWaitTime = time.Second
+ watchProgressorUpdateFrequency = 8000
+)
+
+// MongoExport is a container for the user-specified options and
+// internal state used for running mongoexport.
+type MongoExport struct {
+ // generic mongo tool options
+ ToolOptions options.ToolOptions
+
+ // OutputOpts controls options for how the exported data should be formatted
+ OutputOpts *OutputFormatOptions
+
+ InputOpts *InputOptions
+
+ // for connecting to the db
+ SessionProvider *db.SessionProvider
+ ExportOutput ExportOutput
+}
+
+// ExportOutput is an interface that specifies how a document should be formatted
+// and written to an output stream.
+type ExportOutput interface {
+ // WriteHeader outputs any pre-record headers that are written once
+ // per output file.
+ WriteHeader() error
+
+ // WriteRecord writes the given document to the given io.Writer according to
+ // the format supported by the underlying ExportOutput implementation.
+ ExportDocument(bson.D) error
+
+ // WriteFooter outputs any post-record headers that are written once per
+ // output file.
+ WriteFooter() error
+
+ // Flush writes any pending data to the underlying I/O stream.
+ Flush() error
+}
+
+// ValidateSettings returns an error if any settings specified on the command line
+// were invalid, or nil if they are valid.
+func (exp *MongoExport) ValidateSettings() error {
+ // Namespace must have a valid database if none is specified,
+ // use 'test'
+ if exp.ToolOptions.Namespace.DB == "" {
+ exp.ToolOptions.Namespace.DB = "test"
+ }
+
+ if exp.ToolOptions.Namespace.Collection == "" {
+ return fmt.Errorf("must specify a collection")
+ }
+ if err := util.ValidateCollectionName(exp.ToolOptions.Namespace.Collection); err != nil {
+ return err
+ }
+
+ exp.OutputOpts.Type = strings.ToLower(exp.OutputOpts.Type)
+
+ if exp.OutputOpts.CSVOutputType {
+ log.Logv(log.Always, "csv flag is deprecated; please use --type=csv instead")
+ exp.OutputOpts.Type = CSV
+ }
+
+ if exp.OutputOpts.Type == "" {
+ // special error for an empty type value
+ return fmt.Errorf("--type cannot be empty")
+ }
+ if exp.OutputOpts.Type != CSV && exp.OutputOpts.Type != JSON {
+ return fmt.Errorf("invalid output type '%v', choose 'json' or 'csv'", exp.OutputOpts.Type)
+ }
+
+ if exp.InputOpts.Query != "" && exp.InputOpts.ForceTableScan {
+ return fmt.Errorf("cannot use --forceTableScan when specifying --query")
+ }
+
+ if exp.InputOpts.Query != "" && exp.InputOpts.QueryFile != "" {
+ return fmt.Errorf("either --query or --queryFile can be specified as a query option")
+ }
+
+ if exp.InputOpts != nil && exp.InputOpts.HasQuery() {
+ content, err := exp.InputOpts.GetQuery()
+ if err != nil {
+ return err
+ }
+ _, err2 := getObjectFromByteArg(content)
+ if err2 != nil {
+ return err2
+ }
+ }
+
+ if exp.InputOpts != nil && exp.InputOpts.Sort != "" {
+ _, err := getSortFromArg(exp.InputOpts.Sort)
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// GetOutputWriter opens and returns an io.WriteCloser for the output
+// options or nil if none is set. The caller is responsible for closing it.
+func (exp *MongoExport) GetOutputWriter() (io.WriteCloser, error) {
+ if exp.OutputOpts.OutputFile != "" {
+ // If the directory in which the output file is to be
+ // written does not exist, create it
+ fileDir := filepath.Dir(exp.OutputOpts.OutputFile)
+ err := os.MkdirAll(fileDir, 0750)
+ if err != nil {
+ return nil, err
+ }
+
+ file, err := os.Create(util.ToUniversalPath(exp.OutputOpts.OutputFile))
+ if err != nil {
+ return nil, err
+ }
+ return file, err
+ }
+ // No writer, so caller should assume Stdout (or some other reasonable default)
+ return nil, nil
+}
+
+// Take a comma-delimited set of field names and build a selector doc for query projection.
+// For fields containing a dot '.', we project the entire top-level portion.
+// e.g. "a,b,c.d.e,f.$" -> {a:1, b:1, "c":1, "f.$": 1}.
+func makeFieldSelector(fields string) bson.M {
+ selector := bson.M{"_id": 1}
+ if fields == "" {
+ return selector
+ }
+
+ for _, field := range strings.Split(fields, ",") {
+ // Projections like "a.0" work fine for nested documents not for arrays
+ // - if passed directly to mongod. To handle this, we have to retrieve
+ // the entire top-level document and then filter afterwards. An exception
+ // is made for '$' projections - which are passed directly to mongod.
+ if i := strings.LastIndex(field, "."); i != -1 && field[i+1:] != "$" {
+ field = field[:strings.Index(field, ".")]
+ }
+ selector[field] = 1
+ }
+ return selector
+}
+
+// getCount returns an estimate of how many documents the cursor will fetch
+// It always returns Limit if there is a limit, assuming that in general
+// limits will less then the total possible.
+// If there is a query and no limit then it returns 0, because it's too expensive to count the query.
+// Otherwise it returns the count minus the skip
+func (exp *MongoExport) getCount() (c int, err error) {
+ session, err := exp.SessionProvider.GetSession()
+ if err != nil {
+ return 0, err
+ }
+ defer session.Close()
+ if exp.InputOpts != nil && exp.InputOpts.Limit != 0 {
+ return exp.InputOpts.Limit, nil
+ }
+ if exp.InputOpts != nil && exp.InputOpts.Query != "" {
+ return 0, nil
+ }
+ q := session.DB(exp.ToolOptions.Namespace.DB).C(exp.ToolOptions.Namespace.Collection).Find(nil)
+ c, err = q.Count()
+ if err != nil {
+ return 0, err
+ }
+ var skip int
+ if exp.InputOpts != nil {
+ skip = exp.InputOpts.Skip
+ }
+ if skip > c {
+ c = 0
+ } else {
+ c -= skip
+ }
+ return c, nil
+}
+
+// getCursor returns a cursor that can be iterated over to get all the documents
+// to export, based on the options given to mongoexport. Also returns the
+// associated session, so that it can be closed once the cursor is used up.
+func (exp *MongoExport) getCursor() (*mgo.Iter, *mgo.Session, error) {
+ sortFields := []string{}
+ if exp.InputOpts != nil && exp.InputOpts.Sort != "" {
+ sortD, err := getSortFromArg(exp.InputOpts.Sort)
+ if err != nil {
+ return nil, nil, err
+ }
+ sortFields, err = bsonutil.MakeSortString(sortD)
+ if err != nil {
+ return nil, nil, err
+ }
+ }
+
+ query := map[string]interface{}{}
+ if exp.InputOpts != nil && exp.InputOpts.HasQuery() {
+ var err error
+ content, err := exp.InputOpts.GetQuery()
+ if err != nil {
+ return nil, nil, err
+ }
+ query, err = getObjectFromByteArg(content)
+ if err != nil {
+ return nil, nil, err
+ }
+ }
+
+ flags := 0
+ if len(query) == 0 && exp.InputOpts != nil &&
+ exp.InputOpts.ForceTableScan != true && exp.InputOpts.Sort == "" {
+ flags = flags | db.Snapshot
+ }
+
+ session, err := exp.SessionProvider.GetSession()
+ if err != nil {
+ return nil, nil, err
+ }
+
+ skip := 0
+ if exp.InputOpts != nil {
+ skip = exp.InputOpts.Skip
+ }
+ limit := 0
+ if exp.InputOpts != nil {
+ limit = exp.InputOpts.Limit
+ }
+
+ if exp.InputOpts.AssertExists {
+ collNames, err := session.DB(exp.ToolOptions.Namespace.DB).CollectionNames()
+ if err != nil {
+ return nil, session, err
+ }
+ if !util.StringSliceContains(collNames, exp.ToolOptions.Namespace.Collection) {
+ return nil, session, fmt.Errorf("collection '%s' does not exist",
+ exp.ToolOptions.Namespace.Collection)
+ }
+ }
+
+ // build the query
+ q := session.DB(exp.ToolOptions.Namespace.DB).
+ C(exp.ToolOptions.Namespace.Collection).Find(query).Sort(sortFields...).
+ Skip(skip).Limit(limit)
+
+ if len(exp.OutputOpts.Fields) > 0 {
+ q.Select(makeFieldSelector(exp.OutputOpts.Fields))
+ }
+
+ q = db.ApplyFlags(q, session, flags)
+
+ return q.Iter(), session, nil
+
+}
+
+// Internal function that handles exporting to the given writer. Used primarily
+// for testing, because it bypasses writing to the file system.
+func (exp *MongoExport) exportInternal(out io.Writer) (int64, error) {
+
+ max, err := exp.getCount()
+ if err != nil {
+ return 0, err
+ }
+
+ progressManager := progress.NewProgressBarManager(log.Writer(0), progressBarWaitTime)
+ progressManager.Start()
+ defer progressManager.Stop()
+
+ watchProgressor := progress.NewCounter(int64(max))
+ bar := &progress.Bar{
+ Name: fmt.Sprintf("%v.%v", exp.ToolOptions.Namespace.DB, exp.ToolOptions.Namespace.Collection),
+ Watching: watchProgressor,
+ BarLength: progressBarLength,
+ }
+ progressManager.Attach(bar)
+ defer progressManager.Detach(bar)
+
+ exportOutput, err := exp.getExportOutput(out)
+ if err != nil {
+ return 0, err
+ }
+
+ cursor, session, err := exp.getCursor()
+ if err != nil {
+ return 0, err
+ }
+ defer session.Close()
+ defer cursor.Close()
+
+ connURL := exp.ToolOptions.Host
+ if connURL == "" {
+ connURL = util.DefaultHost
+ }
+ if exp.ToolOptions.Port != "" {
+ connURL = connURL + ":" + exp.ToolOptions.Port
+ }
+ log.Logvf(log.Always, "connected to: %v", connURL)
+
+ // Write headers
+ err = exportOutput.WriteHeader()
+ if err != nil {
+ return 0, err
+ }
+
+ var result bson.D
+
+ docsCount := int64(0)
+
+ // Write document content
+ for cursor.Next(&result) {
+ err := exportOutput.ExportDocument(result)
+ if err != nil {
+ return docsCount, err
+ }
+ docsCount++
+ if docsCount%watchProgressorUpdateFrequency == 0 {
+ watchProgressor.Set(docsCount)
+ }
+ }
+ watchProgressor.Set(docsCount)
+ if err := cursor.Err(); err != nil {
+ return docsCount, err
+ }
+
+ // Write footers
+ err = exportOutput.WriteFooter()
+ if err != nil {
+ return docsCount, err
+ }
+ exportOutput.Flush()
+ return docsCount, nil
+}
+
+// Export executes the entire export operation. It returns an integer of the count
+// of documents successfully exported, and a non-nil error if something went wrong
+// during the export operation.
+func (exp *MongoExport) Export(out io.Writer) (int64, error) {
+ count, err := exp.exportInternal(out)
+ return count, err
+}
+
+// getExportOutput returns an implementation of ExportOutput which can handle
+// transforming BSON documents into the appropriate output format and writing
+// them to an output stream.
+func (exp *MongoExport) getExportOutput(out io.Writer) (ExportOutput, error) {
+ if exp.OutputOpts.Type == CSV {
+ // TODO what if user specifies *both* --fields and --fieldFile?
+ var fields []string
+ var err error
+ if len(exp.OutputOpts.Fields) > 0 {
+ fields = strings.Split(exp.OutputOpts.Fields, ",")
+ } else if exp.OutputOpts.FieldFile != "" {
+ fields, err = util.GetFieldsFromFile(exp.OutputOpts.FieldFile)
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ return nil, fmt.Errorf("CSV mode requires a field list")
+ }
+
+ exportFields := make([]string, 0, len(fields))
+ for _, field := range fields {
+ // for '$' field projections, exclude '.$' from the field name
+ if i := strings.LastIndex(field, "."); i != -1 && field[i+1:] == "$" {
+ exportFields = append(exportFields, field[:i])
+ } else {
+ exportFields = append(exportFields, field)
+ }
+ }
+
+ return NewCSVExportOutput(exportFields, exp.OutputOpts.NoHeaderLine, out), nil
+ }
+ return NewJSONExportOutput(exp.OutputOpts.JSONArray, exp.OutputOpts.Pretty, out), nil
+}
+
+// getObjectFromByteArg takes an object in extended JSON, and converts it to an object that
+// can be passed straight to db.collection.find(...) as a query or sort critera.
+// Returns an error if the string is not valid JSON, or extended JSON.
+func getObjectFromByteArg(queryRaw []byte) (map[string]interface{}, error) {
+ parsedJSON := map[string]interface{}{}
+ err := json.Unmarshal(queryRaw, &parsedJSON)
+ if err != nil {
+ return nil, fmt.Errorf("query '%v' is not valid JSON: %v", queryRaw, err)
+ }
+
+ err = bsonutil.ConvertJSONDocumentToBSON(parsedJSON)
+ if err != nil {
+ return nil, err
+ }
+ return parsedJSON, nil
+}
+
+// getSortFromArg takes a sort specification in JSON and returns it as a bson.D
+// object which preserves the ordering of the keys as they appear in the input.
+func getSortFromArg(queryRaw string) (bson.D, error) {
+ parsedJSON := bson.D{}
+ err := json.Unmarshal([]byte(queryRaw), &parsedJSON)
+ if err != nil {
+ return nil, fmt.Errorf("query '%v' is not valid JSON: %v", queryRaw, err)
+ }
+ // TODO: verify sort specification before returning a nil error
+ return parsedJSON, nil
+}
diff --git a/src/mongo/gotools/mongoexport/mongoexport_test.go b/src/mongo/gotools/mongoexport/mongoexport_test.go
new file mode 100644
index 00000000000..f5d0e7e2afa
--- /dev/null
+++ b/src/mongo/gotools/mongoexport/mongoexport_test.go
@@ -0,0 +1,44 @@
+package mongoexport
+
+import (
+ "encoding/json"
+ "github.com/mongodb/mongo-tools/common/bsonutil"
+ "github.com/mongodb/mongo-tools/common/testutil"
+ . "github.com/smartystreets/goconvey/convey"
+ "gopkg.in/mgo.v2/bson"
+ "os"
+ "testing"
+)
+
+func TestExtendedJSON(t *testing.T) {
+ testutil.VerifyTestType(t, testutil.UnitTestType)
+
+ Convey("Serializing a doc to extended JSON should work", t, func() {
+ x := bson.M{
+ "_id": bson.NewObjectId(),
+ "hey": "sup",
+ "subdoc": bson.M{
+ "subid": bson.NewObjectId(),
+ },
+ "array": []interface{}{
+ bson.NewObjectId(),
+ bson.Undefined,
+ },
+ }
+ out, err := bsonutil.ConvertBSONValueToJSON(x)
+ So(err, ShouldBeNil)
+
+ jsonEncoder := json.NewEncoder(os.Stdout)
+ jsonEncoder.Encode(out)
+ })
+}
+
+func TestFieldSelect(t *testing.T) {
+ testutil.VerifyTestType(t, testutil.UnitTestType)
+
+ Convey("Using makeFieldSelector should return correct projection doc", t, func() {
+ So(makeFieldSelector("a,b"), ShouldResemble, bson.M{"_id": 1, "a": 1, "b": 1})
+ So(makeFieldSelector(""), ShouldResemble, bson.M{"_id": 1})
+ So(makeFieldSelector("x,foo.baz"), ShouldResemble, bson.M{"_id": 1, "foo": 1, "x": 1})
+ })
+}
diff --git a/src/mongo/gotools/mongoexport/options.go b/src/mongo/gotools/mongoexport/options.go
new file mode 100644
index 00000000000..54896655581
--- /dev/null
+++ b/src/mongo/gotools/mongoexport/options.go
@@ -0,0 +1,79 @@
+package mongoexport
+
+import (
+ "fmt"
+ "io/ioutil"
+)
+
+var Usage = `<options>
+
+Export data from MongoDB in CSV or JSON format.
+
+See http://docs.mongodb.org/manual/reference/program/mongoexport/ for more information.`
+
+// OutputFormatOptions defines the set of options to use in formatting exported data.
+type OutputFormatOptions struct {
+ // Fields is an option to directly specify comma-separated fields to export to CSV.
+ Fields string `long:"fields" value-name:"<field>[,<field>]*" short:"f" description:"comma separated list of field names (required for exporting CSV) e.g. -f \"name,age\" "`
+
+ // FieldFile is a filename that refers to a list of fields to export, 1 per line.
+ FieldFile string `long:"fieldFile" value-name:"<filename>" description:"file with field names - 1 per line"`
+
+ // Type selects the type of output to export as (json or csv).
+ Type string `long:"type" value-name:"<type>" default:"json" default-mask:"-" description:"the output format, either json or csv (defaults to 'json')"`
+
+ // Deprecated: allow legacy --csv option in place of --type=csv
+ CSVOutputType bool `long:"csv" default:"false" hidden:"true"`
+
+ // OutputFile specifies an output file path.
+ OutputFile string `long:"out" value-name:"<filename>" short:"o" description:"output file; if not specified, stdout is used"`
+
+ // JSONArray if set will export the documents an array of JSON documents.
+ JSONArray bool `long:"jsonArray" description:"output to a JSON array rather than one object per line"`
+
+ // Pretty displays JSON data in a human-readable form.
+ Pretty bool `long:"pretty" description:"output JSON formatted to be human-readable"`
+
+ // NoHeaderLine, if set, will export CSV data without a list of field names at the first line.
+ NoHeaderLine bool `long:"noHeaderLine" description:"export CSV data without a list of field names at the first line"`
+}
+
+// Name returns a human-readable group name for output format options.
+func (*OutputFormatOptions) Name() string {
+ return "output"
+}
+
+// InputOptions defines the set of options to use in retrieving data from the server.
+type InputOptions struct {
+ Query string `long:"query" value-name:"<json>" short:"q" description:"query filter, as a JSON string, e.g., '{x:{$gt:1}}'"`
+ QueryFile string `long:"queryFile" value-name:"<filename>" description:"path to a file containing a query filter (JSON)"`
+ SlaveOk bool `long:"slaveOk" short:"k" description:"allow secondary reads if available (default true)" default:"false" default-mask:"-"`
+ ReadPreference string `long:"readPreference" value-name:"<string>|<json>" description:"specify either a preference name or a preference json object"`
+ ForceTableScan bool `long:"forceTableScan" description:"force a table scan (do not use $snapshot)"`
+ Skip int `long:"skip" value-name:"<count>" description:"number of documents to skip"`
+ Limit int `long:"limit" value-name:"<count>" description:"limit the number of documents to export"`
+ Sort string `long:"sort" value-name:"<json>" description:"sort order, as a JSON string, e.g. '{x:1}'"`
+ AssertExists bool `long:"assertExists" default:"false" description:"if specified, export fails if the collection does not exist"`
+}
+
+// Name returns a human-readable group name for input options.
+func (*InputOptions) Name() string {
+ return "querying"
+}
+
+func (inputOptions *InputOptions) HasQuery() bool {
+ return inputOptions.Query != "" || inputOptions.QueryFile != ""
+}
+
+func (inputOptions *InputOptions) GetQuery() ([]byte, error) {
+ if inputOptions.Query != "" {
+ return []byte(inputOptions.Query), nil
+ } else if inputOptions.QueryFile != "" {
+ content, err := ioutil.ReadFile(inputOptions.QueryFile)
+ if err != nil {
+ err = fmt.Errorf("error reading queryFile: %s", err)
+ }
+ return content, err
+ }
+ panic("GetQuery can return valid values only for query or queryFile input")
+}