diff options
author | Ramon Fernandez <ramon@mongodb.com> | 2016-08-25 16:34:34 -0400 |
---|---|---|
committer | Ramon Fernandez <ramon@mongodb.com> | 2016-08-25 16:54:18 -0400 |
commit | c330c9991ab45e7d0685d53e699ef26dba065660 (patch) | |
tree | 3dc5cd06b5f6c7eaaa4cb20cbe763504c14a772b /src/mongo/gotools/mongoexport | |
parent | eb62b862d5ebf179a1bcd9f394070e69c30188ab (diff) | |
download | mongo-c330c9991ab45e7d0685d53e699ef26dba065660.tar.gz |
Import tools: 5b883d86fdb4df55036d5dba2ca6f9dfa0750b44 from branch v3.3
ref: 1ac1389bda..5b883d86fd
for: 3.3.12
SERVER-25814 Initial vendor import: tools
Diffstat (limited to 'src/mongo/gotools/mongoexport')
-rw-r--r-- | src/mongo/gotools/mongoexport/csv.go | 148 | ||||
-rw-r--r-- | src/mongo/gotools/mongoexport/csv_test.go | 139 | ||||
-rw-r--r-- | src/mongo/gotools/mongoexport/json.go | 109 | ||||
-rw-r--r-- | src/mongo/gotools/mongoexport/json_test.go | 73 | ||||
-rw-r--r-- | src/mongo/gotools/mongoexport/kerberos_test.go | 44 | ||||
-rw-r--r-- | src/mongo/gotools/mongoexport/main/mongoexport.go | 143 | ||||
-rw-r--r-- | src/mongo/gotools/mongoexport/mongoexport.go | 425 | ||||
-rw-r--r-- | src/mongo/gotools/mongoexport/mongoexport_test.go | 44 | ||||
-rw-r--r-- | src/mongo/gotools/mongoexport/options.go | 79 |
9 files changed, 1204 insertions, 0 deletions
diff --git a/src/mongo/gotools/mongoexport/csv.go b/src/mongo/gotools/mongoexport/csv.go new file mode 100644 index 00000000000..165cb560cce --- /dev/null +++ b/src/mongo/gotools/mongoexport/csv.go @@ -0,0 +1,148 @@ +package mongoexport + +import ( + "encoding/csv" + "fmt" + "github.com/mongodb/mongo-tools/common/bsonutil" + "github.com/mongodb/mongo-tools/common/json" + "gopkg.in/mgo.v2/bson" + "io" + "reflect" + "strconv" + "strings" +) + +// type for reflect code +var marshalDType = reflect.TypeOf(bsonutil.MarshalD{}) + +// CSVExportOutput is an implementation of ExportOutput that writes documents to the output in CSV format. +type CSVExportOutput struct { + // Fields is a list of field names in the bson documents to be exported. + // A field can also use dot-delimited modifiers to address nested structures, + // for example "location.city" or "addresses.0". + Fields []string + + // NumExported maintains a running total of the number of documents written. + NumExported int64 + + // NoHeaderLine, if set, will export CSV data without a list of field names at the first line + NoHeaderLine bool + + csvWriter *csv.Writer +} + +// NewCSVExportOutput returns a CSVExportOutput configured to write output to the +// given io.Writer, extracting the specified fields only. +func NewCSVExportOutput(fields []string, noHeaderLine bool, out io.Writer) *CSVExportOutput { + return &CSVExportOutput{ + fields, + 0, + noHeaderLine, + csv.NewWriter(out), + } +} + +// WriteHeader writes a comma-delimited list of fields as the output header row. +func (csvExporter *CSVExportOutput) WriteHeader() error { + if !csvExporter.NoHeaderLine { + csvExporter.csvWriter.Write(csvExporter.Fields) + return csvExporter.csvWriter.Error() + } + return nil +} + +// WriteFooter is a no-op for CSV export formats. +func (csvExporter *CSVExportOutput) WriteFooter() error { + // no CSV footer + return nil +} + +// Flush writes any pending data to the underlying I/O stream. +func (csvExporter *CSVExportOutput) Flush() error { + csvExporter.csvWriter.Flush() + return csvExporter.csvWriter.Error() +} + +// ExportDocument writes a line to output with the CSV representation of a document. +func (csvExporter *CSVExportOutput) ExportDocument(document bson.D) error { + rowOut := make([]string, 0, len(csvExporter.Fields)) + extendedDoc, err := bsonutil.ConvertBSONValueToJSON(document) + if err != nil { + return err + } + + for _, fieldName := range csvExporter.Fields { + fieldVal := extractFieldByName(fieldName, extendedDoc) + if fieldVal == nil { + rowOut = append(rowOut, "") + } else if reflect.TypeOf(fieldVal) == reflect.TypeOf(bson.M{}) || + reflect.TypeOf(fieldVal) == reflect.TypeOf(bson.D{}) || + reflect.TypeOf(fieldVal) == marshalDType || + reflect.TypeOf(fieldVal) == reflect.TypeOf([]interface{}{}) { + buf, err := json.Marshal(fieldVal) + if err != nil { + rowOut = append(rowOut, "") + } else { + rowOut = append(rowOut, string(buf)) + } + } else { + rowOut = append(rowOut, fmt.Sprintf("%v", fieldVal)) + } + } + csvExporter.csvWriter.Write(rowOut) + csvExporter.NumExported++ + return csvExporter.csvWriter.Error() +} + +// extractFieldByName takes a field name and document, and returns a value representing +// the value of that field in the document in a format that can be printed as a string. +// It will also handle dot-delimited field names for nested arrays or documents. +func extractFieldByName(fieldName string, document interface{}) interface{} { + dotParts := strings.Split(fieldName, ".") + var subdoc interface{} = document + + for _, path := range dotParts { + docValue := reflect.ValueOf(subdoc) + if !docValue.IsValid() { + return "" + } + docType := docValue.Type() + docKind := docType.Kind() + if docKind == reflect.Map { + subdocVal := docValue.MapIndex(reflect.ValueOf(path)) + if subdocVal.Kind() == reflect.Invalid { + return "" + } + subdoc = subdocVal.Interface() + } else if docKind == reflect.Slice { + if docType == marshalDType { + // dive into a D as a document + asD := bson.D(subdoc.(bsonutil.MarshalD)) + var err error + subdoc, err = bsonutil.FindValueByKey(path, &asD) + if err != nil { + return "" + } + } else { + // check that the path can be converted to int + arrayIndex, err := strconv.Atoi(path) + if err != nil { + return "" + } + // bounds check for slice + if arrayIndex < 0 || arrayIndex >= docValue.Len() { + return "" + } + subdocVal := docValue.Index(arrayIndex) + if subdocVal.Kind() == reflect.Invalid { + return "" + } + subdoc = subdocVal.Interface() + } + } else { + // trying to index into a non-compound type - just return blank. + return "" + } + } + return subdoc +} diff --git a/src/mongo/gotools/mongoexport/csv_test.go b/src/mongo/gotools/mongoexport/csv_test.go new file mode 100644 index 00000000000..cdf65814e72 --- /dev/null +++ b/src/mongo/gotools/mongoexport/csv_test.go @@ -0,0 +1,139 @@ +package mongoexport + +import ( + "bytes" + "encoding/csv" + "github.com/mongodb/mongo-tools/common/bsonutil" + "github.com/mongodb/mongo-tools/common/testutil" + . "github.com/smartystreets/goconvey/convey" + "gopkg.in/mgo.v2/bson" + "strings" + "testing" +) + +func TestWriteCSV(t *testing.T) { + testutil.VerifyTestType(t, testutil.UnitTestType) + + Convey("With a CSV export output", t, func() { + fields := []string{"_id", "x", " y", "z.1.a"} + out := &bytes.Buffer{} + + Convey("Headers should be written correctly", func() { + csvExporter := NewCSVExportOutput(fields, false, out) + err := csvExporter.WriteHeader() + So(err, ShouldBeNil) + csvExporter.ExportDocument(bson.D{{"_id", "12345"}}) + csvExporter.WriteFooter() + csvExporter.Flush() + rec, err := csv.NewReader(strings.NewReader(out.String())).Read() + So(err, ShouldBeNil) + So(rec, ShouldResemble, []string{"_id", "x", " y", "z.1.a"}) + }) + + Convey("Headers should not be written", func() { + csvExporter := NewCSVExportOutput(fields, true, out) + err := csvExporter.WriteHeader() + So(err, ShouldBeNil) + csvExporter.ExportDocument(bson.D{{"_id", "12345"}}) + csvExporter.WriteFooter() + csvExporter.Flush() + rec, err := csv.NewReader(strings.NewReader(out.String())).Read() + So(err, ShouldBeNil) + So(rec, ShouldResemble, []string{"12345", "", "", ""}) + }) + + Convey("Exported document with missing fields should print as blank", func() { + csvExporter := NewCSVExportOutput(fields, true, out) + csvExporter.ExportDocument(bson.D{{"_id", "12345"}}) + csvExporter.WriteFooter() + csvExporter.Flush() + rec, err := csv.NewReader(strings.NewReader(out.String())).Read() + So(err, ShouldBeNil) + So(rec, ShouldResemble, []string{"12345", "", "", ""}) + }) + + Convey("Exported document with index into nested objects should print correctly", func() { + csvExporter := NewCSVExportOutput(fields, true, out) + z := []interface{}{"x", bson.D{{"a", "T"}, {"B", 1}}} + csvExporter.ExportDocument(bson.D{{Name: "z", Value: z}}) + csvExporter.WriteFooter() + csvExporter.Flush() + rec, err := csv.NewReader(strings.NewReader(out.String())).Read() + So(err, ShouldBeNil) + So(rec, ShouldResemble, []string{"", "", "", "T"}) + }) + + Reset(func() { + out.Reset() + }) + + }) +} + +func TestExtractDField(t *testing.T) { + Convey("With a test bson.D", t, func() { + b := []interface{}{"inner", bsonutil.MarshalD{{"inner2", 1}}} + c := bsonutil.MarshalD{{"x", 5}} + d := bsonutil.MarshalD{{"z", nil}} + testD := bsonutil.MarshalD{ + {"a", "string"}, + {"b", b}, + {"c", c}, + {"d", d}, + } + + Convey("regular fields should be extracted by name", func() { + val := extractFieldByName("a", testD) + So(val, ShouldEqual, "string") + }) + + Convey("array fields should be extracted by name", func() { + val := extractFieldByName("b.1", testD) + So(val, ShouldResemble, bsonutil.MarshalD{{"inner2", 1}}) + val = extractFieldByName("b.1.inner2", testD) + So(val, ShouldEqual, 1) + val = extractFieldByName("b.0", testD) + So(val, ShouldEqual, "inner") + }) + + Convey("subdocument fields should be extracted by name", func() { + val := extractFieldByName("c", testD) + So(val, ShouldResemble, bsonutil.MarshalD{{"x", 5}}) + val = extractFieldByName("c.x", testD) + So(val, ShouldEqual, 5) + + Convey("even if they contain null values", func() { + val := extractFieldByName("d", testD) + So(val, ShouldResemble, bsonutil.MarshalD{{"z", nil}}) + val = extractFieldByName("d.z", testD) + So(val, ShouldEqual, nil) + val = extractFieldByName("d.z.nope", testD) + So(val, ShouldEqual, "") + }) + }) + + Convey(`non-existing fields should return ""`, func() { + val := extractFieldByName("f", testD) + So(val, ShouldEqual, "") + val = extractFieldByName("c.nope", testD) + So(val, ShouldEqual, "") + val = extractFieldByName("c.nope.NOPE", testD) + So(val, ShouldEqual, "") + val = extractFieldByName("b.1000", testD) + So(val, ShouldEqual, "") + val = extractFieldByName("b.1.nada", testD) + So(val, ShouldEqual, "") + }) + + }) + + Convey(`Extraction of a non-document should return ""`, t, func() { + val := extractFieldByName("meh", []interface{}{"meh"}) + So(val, ShouldEqual, "") + }) + + Convey(`Extraction of a nil document should return ""`, t, func() { + val := extractFieldByName("a", nil) + So(val, ShouldEqual, "") + }) +} diff --git a/src/mongo/gotools/mongoexport/json.go b/src/mongo/gotools/mongoexport/json.go new file mode 100644 index 00000000000..7697b0dc264 --- /dev/null +++ b/src/mongo/gotools/mongoexport/json.go @@ -0,0 +1,109 @@ +package mongoexport + +import ( + "bytes" + "fmt" + "github.com/mongodb/mongo-tools/common/bsonutil" + "github.com/mongodb/mongo-tools/common/json" + "gopkg.in/mgo.v2/bson" + "io" +) + +// JSONExportOutput is an implementation of ExportOutput that writes documents +// to the output in JSON format. +type JSONExportOutput struct { + // ArrayOutput when set to true indicates that the output should be written + // as a JSON array, where each document is an element in the array. + ArrayOutput bool + // Pretty when set to true indicates that the output will be written in pretty mode. + PrettyOutput bool + Encoder *json.Encoder + Out io.Writer + NumExported int64 +} + +// NewJSONExportOutput creates a new JSONExportOutput in array mode if specified, +// configured to write data to the given io.Writer. +func NewJSONExportOutput(arrayOutput bool, prettyOutput bool, out io.Writer) *JSONExportOutput { + return &JSONExportOutput{ + arrayOutput, + prettyOutput, + json.NewEncoder(out), + out, + 0, + } +} + +// WriteHeader writes the opening square bracket if in array mode, otherwise it +// behaves as a no-op. +func (jsonExporter *JSONExportOutput) WriteHeader() error { + if jsonExporter.ArrayOutput { + // TODO check # bytes written? + _, err := jsonExporter.Out.Write([]byte{json.ArrayStart}) + if err != nil { + return err + } + } + return nil +} + +// WriteFooter writes the closing square bracket if in array mode, otherwise it +// behaves as a no-op. +func (jsonExporter *JSONExportOutput) WriteFooter() error { + if jsonExporter.ArrayOutput { + _, err := jsonExporter.Out.Write([]byte{json.ArrayEnd, '\n'}) + // TODO check # bytes written? + if err != nil { + return err + } + } + if jsonExporter.PrettyOutput { + jsonExporter.Out.Write([]byte("\n")) + } + return nil +} + +// Flush is a no-op for JSON export formats. +func (jsonExporter *JSONExportOutput) Flush() error { + return nil +} + +// ExportDocument converts the given document to extended JSON, and writes it +// to the output. +func (jsonExporter *JSONExportOutput) ExportDocument(document bson.D) error { + if jsonExporter.ArrayOutput || jsonExporter.PrettyOutput { + if jsonExporter.NumExported >= 1 { + if jsonExporter.ArrayOutput { + jsonExporter.Out.Write([]byte(",")) + } + if jsonExporter.PrettyOutput { + jsonExporter.Out.Write([]byte("\n")) + } + } + extendedDoc, err := bsonutil.ConvertBSONValueToJSON(document) + if err != nil { + return err + } + jsonOut, err := json.Marshal(extendedDoc) + if err != nil { + return fmt.Errorf("error converting BSON to extended JSON: %v", err) + } + if jsonExporter.PrettyOutput { + var jsonFormatted bytes.Buffer + json.Indent(&jsonFormatted, jsonOut, "", "\t") + jsonOut = jsonFormatted.Bytes() + } + jsonExporter.Out.Write(jsonOut) + } else { + extendedDoc, err := bsonutil.ConvertBSONValueToJSON(document) + if err != nil { + return err + } + err = jsonExporter.Encoder.Encode(extendedDoc) + if err != nil { + return err + } + } + jsonExporter.NumExported++ + return nil +} diff --git a/src/mongo/gotools/mongoexport/json_test.go b/src/mongo/gotools/mongoexport/json_test.go new file mode 100644 index 00000000000..ff988f971c0 --- /dev/null +++ b/src/mongo/gotools/mongoexport/json_test.go @@ -0,0 +1,73 @@ +package mongoexport + +import ( + "bytes" + "github.com/mongodb/mongo-tools/common/json" + "github.com/mongodb/mongo-tools/common/testutil" + . "github.com/smartystreets/goconvey/convey" + "gopkg.in/mgo.v2/bson" + "testing" +) + +func TestWriteJSON(t *testing.T) { + testutil.VerifyTestType(t, testutil.UnitTestType) + + Convey("With a JSON export output", t, func() { + out := &bytes.Buffer{} + + Convey("Special types should serialize as extended JSON", func() { + + Convey("ObjectId should have an extended JSON format", func() { + jsonExporter := NewJSONExportOutput(false, false, out) + objId := bson.NewObjectId() + err := jsonExporter.WriteHeader() + So(err, ShouldBeNil) + err = jsonExporter.ExportDocument(bson.D{{"_id", objId}}) + So(err, ShouldBeNil) + err = jsonExporter.WriteFooter() + So(err, ShouldBeNil) + So(out.String(), ShouldEqual, `{"_id":{"$oid":"`+objId.Hex()+`"}}`+"\n") + }) + + Reset(func() { + out.Reset() + }) + }) + + }) +} + +func TestJSONArray(t *testing.T) { + testutil.VerifyTestType(t, testutil.UnitTestType) + + Convey("With a JSON export output in array mode", t, func() { + out := &bytes.Buffer{} + Convey("exporting a bunch of documents should produce valid json", func() { + jsonExporter := NewJSONExportOutput(true, false, out) + err := jsonExporter.WriteHeader() + So(err, ShouldBeNil) + + // Export a few docs of various types + + testObjs := []interface{}{bson.NewObjectId(), "asd", 12345, 3.14159, bson.D{{"A", 1}}} + for _, obj := range testObjs { + err = jsonExporter.ExportDocument(bson.D{{"_id", obj}}) + So(err, ShouldBeNil) + } + + err = jsonExporter.WriteFooter() + So(err, ShouldBeNil) + // Unmarshal the whole thing, it should be valid json + fromJSON := []map[string]interface{}{} + err = json.Unmarshal(out.Bytes(), &fromJSON) + So(err, ShouldBeNil) + So(len(fromJSON), ShouldEqual, len(testObjs)) + + }) + + Reset(func() { + out.Reset() + }) + + }) +} diff --git a/src/mongo/gotools/mongoexport/kerberos_test.go b/src/mongo/gotools/mongoexport/kerberos_test.go new file mode 100644 index 00000000000..06c7ecb0b6e --- /dev/null +++ b/src/mongo/gotools/mongoexport/kerberos_test.go @@ -0,0 +1,44 @@ +package mongoexport + +import ( + "bytes" + "github.com/mongodb/mongo-tools/common/db" + "github.com/mongodb/mongo-tools/common/json" + "github.com/mongodb/mongo-tools/common/testutil" + . "github.com/smartystreets/goconvey/convey" + "strings" + "testing" +) + +func TestKerberos(t *testing.T) { + testutil.VerifyTestType(t, testutil.KerberosTestType) + + Convey("Should be able to run mongoexport with Kerberos auth", t, func() { + opts, err := testutil.GetKerberosOptions() + + So(err, ShouldBeNil) + + sessionProvider, err := db.NewSessionProvider(*opts) + So(err, ShouldBeNil) + + export := MongoExport{ + ToolOptions: *opts, + OutputOpts: &OutputFormatOptions{}, + InputOpts: &InputOptions{}, + SessionProvider: sessionProvider, + } + + var out bytes.Buffer + num, err := export.exportInternal(&out) + + So(err, ShouldBeNil) + So(num, ShouldEqual, 1) + outputLines := strings.Split(strings.TrimSpace(out.String()), "\n") + So(len(outputLines), ShouldEqual, 1) + outMap := map[string]interface{}{} + So(json.Unmarshal([]byte(outputLines[0]), &outMap), ShouldBeNil) + So(outMap["kerberos"], ShouldEqual, true) + So(outMap["authenticated"], ShouldEqual, "yeah") + So(outMap["_id"].(map[string]interface{})["$oid"], ShouldEqual, "528fb35afb3a8030e2f643c3") + }) +} diff --git a/src/mongo/gotools/mongoexport/main/mongoexport.go b/src/mongo/gotools/mongoexport/main/mongoexport.go new file mode 100644 index 00000000000..c18451f03df --- /dev/null +++ b/src/mongo/gotools/mongoexport/main/mongoexport.go @@ -0,0 +1,143 @@ +// Main package for the mongoexport tool. +package main + +import ( + "github.com/mongodb/mongo-tools/common/db" + "github.com/mongodb/mongo-tools/common/log" + "github.com/mongodb/mongo-tools/common/options" + "github.com/mongodb/mongo-tools/common/signals" + "github.com/mongodb/mongo-tools/common/util" + "github.com/mongodb/mongo-tools/mongoexport" + "gopkg.in/mgo.v2" + "gopkg.in/mgo.v2/bson" + "os" +) + +func main() { + // initialize command-line opts + opts := options.New("mongoexport", mongoexport.Usage, + options.EnabledOptions{Auth: true, Connection: true, Namespace: true}) + + outputOpts := &mongoexport.OutputFormatOptions{} + opts.AddOptions(outputOpts) + inputOpts := &mongoexport.InputOptions{} + opts.AddOptions(inputOpts) + + args, err := opts.Parse() + if err != nil { + log.Logvf(log.Always, "error parsing command line options: %v", err) + log.Logvf(log.Always, "try 'mongoexport --help' for more information") + os.Exit(util.ExitBadOptions) + } + if len(args) != 0 { + log.Logvf(log.Always, "too many positional arguments: %v", args) + log.Logvf(log.Always, "try 'mongoexport --help' for more information") + os.Exit(util.ExitBadOptions) + } + + log.SetVerbosity(opts.Verbosity) + signals.Handle() + + // print help, if specified + if opts.PrintHelp(false) { + return + } + + // print version, if specified + if opts.PrintVersion() { + return + } + + // connect directly, unless a replica set name is explicitly specified + _, setName := util.ParseConnectionString(opts.Host) + opts.Direct = (setName == "") + opts.ReplicaSetName = setName + + provider, err := db.NewSessionProvider(*opts) + + // temporarily allow secondary reads for the isMongos check + provider.SetReadPreference(mgo.Nearest) + isMongos, err := provider.IsMongos() + if err != nil { + log.Logvf(log.Always, "%v", err) + os.Exit(util.ExitError) + } + + provider.SetFlags(db.DisableSocketTimeout) + + if inputOpts.SlaveOk { + if inputOpts.ReadPreference != "" { + log.Logvf(log.Always, "--slaveOk can't be specified when --readPreference is specified") + os.Exit(util.ExitBadOptions) + } + log.Logvf(log.Always, "--slaveOk is deprecated and being internally rewritten as --readPreference=nearest") + inputOpts.ReadPreference = "nearest" + } + + var mode mgo.Mode + if opts.ReplicaSetName != "" || isMongos { + mode = mgo.Primary + } else { + mode = mgo.Nearest + } + var tags bson.D + if inputOpts.ReadPreference != "" { + mode, tags, err = db.ParseReadPreference(inputOpts.ReadPreference) + if err != nil { + log.Logvf(log.Always, "error parsing --ReadPreference: %v", err) + os.Exit(util.ExitBadOptions) + } + if len(tags) > 0 { + provider.SetTags(tags) + } + } + + // warn if we are trying to export from a secondary in a sharded cluster + if isMongos && mode != mgo.Primary { + log.Logvf(log.Always, db.WarningNonPrimaryMongosConnection) + } + + provider.SetReadPreference(mode) + + if err != nil { + log.Logvf(log.Always, "error connecting to host: %v", err) + os.Exit(util.ExitError) + } + exporter := mongoexport.MongoExport{ + ToolOptions: *opts, + OutputOpts: outputOpts, + InputOpts: inputOpts, + SessionProvider: provider, + } + + err = exporter.ValidateSettings() + if err != nil { + log.Logvf(log.Always, "error validating settings: %v", err) + log.Logvf(log.Always, "try 'mongoexport --help' for more information") + os.Exit(util.ExitBadOptions) + } + + writer, err := exporter.GetOutputWriter() + if err != nil { + log.Logvf(log.Always, "error opening output stream: %v", err) + os.Exit(util.ExitError) + } + if writer == nil { + writer = os.Stdout + } else { + defer writer.Close() + } + + numDocs, err := exporter.Export(writer) + if err != nil { + log.Logvf(log.Always, "Failed: %v", err) + os.Exit(util.ExitError) + } + + if numDocs == 1 { + log.Logvf(log.Always, "exported %v record", numDocs) + } else { + log.Logvf(log.Always, "exported %v records", numDocs) + } + +} diff --git a/src/mongo/gotools/mongoexport/mongoexport.go b/src/mongo/gotools/mongoexport/mongoexport.go new file mode 100644 index 00000000000..995e252f4df --- /dev/null +++ b/src/mongo/gotools/mongoexport/mongoexport.go @@ -0,0 +1,425 @@ +// Package mongoexport produces a JSON or CSV export of data stored in a MongoDB instance. +package mongoexport + +import ( + "fmt" + "github.com/mongodb/mongo-tools/common/bsonutil" + "github.com/mongodb/mongo-tools/common/db" + "github.com/mongodb/mongo-tools/common/json" + "github.com/mongodb/mongo-tools/common/log" + "github.com/mongodb/mongo-tools/common/options" + "github.com/mongodb/mongo-tools/common/progress" + "github.com/mongodb/mongo-tools/common/util" + "gopkg.in/mgo.v2" + "gopkg.in/mgo.v2/bson" + "io" + "os" + "path/filepath" + "strings" + "time" +) + +// Output types supported by mongoexport. +const ( + CSV = "csv" + JSON = "json" + progressBarLength = 24 + progressBarWaitTime = time.Second + watchProgressorUpdateFrequency = 8000 +) + +// MongoExport is a container for the user-specified options and +// internal state used for running mongoexport. +type MongoExport struct { + // generic mongo tool options + ToolOptions options.ToolOptions + + // OutputOpts controls options for how the exported data should be formatted + OutputOpts *OutputFormatOptions + + InputOpts *InputOptions + + // for connecting to the db + SessionProvider *db.SessionProvider + ExportOutput ExportOutput +} + +// ExportOutput is an interface that specifies how a document should be formatted +// and written to an output stream. +type ExportOutput interface { + // WriteHeader outputs any pre-record headers that are written once + // per output file. + WriteHeader() error + + // WriteRecord writes the given document to the given io.Writer according to + // the format supported by the underlying ExportOutput implementation. + ExportDocument(bson.D) error + + // WriteFooter outputs any post-record headers that are written once per + // output file. + WriteFooter() error + + // Flush writes any pending data to the underlying I/O stream. + Flush() error +} + +// ValidateSettings returns an error if any settings specified on the command line +// were invalid, or nil if they are valid. +func (exp *MongoExport) ValidateSettings() error { + // Namespace must have a valid database if none is specified, + // use 'test' + if exp.ToolOptions.Namespace.DB == "" { + exp.ToolOptions.Namespace.DB = "test" + } + + if exp.ToolOptions.Namespace.Collection == "" { + return fmt.Errorf("must specify a collection") + } + if err := util.ValidateCollectionName(exp.ToolOptions.Namespace.Collection); err != nil { + return err + } + + exp.OutputOpts.Type = strings.ToLower(exp.OutputOpts.Type) + + if exp.OutputOpts.CSVOutputType { + log.Logv(log.Always, "csv flag is deprecated; please use --type=csv instead") + exp.OutputOpts.Type = CSV + } + + if exp.OutputOpts.Type == "" { + // special error for an empty type value + return fmt.Errorf("--type cannot be empty") + } + if exp.OutputOpts.Type != CSV && exp.OutputOpts.Type != JSON { + return fmt.Errorf("invalid output type '%v', choose 'json' or 'csv'", exp.OutputOpts.Type) + } + + if exp.InputOpts.Query != "" && exp.InputOpts.ForceTableScan { + return fmt.Errorf("cannot use --forceTableScan when specifying --query") + } + + if exp.InputOpts.Query != "" && exp.InputOpts.QueryFile != "" { + return fmt.Errorf("either --query or --queryFile can be specified as a query option") + } + + if exp.InputOpts != nil && exp.InputOpts.HasQuery() { + content, err := exp.InputOpts.GetQuery() + if err != nil { + return err + } + _, err2 := getObjectFromByteArg(content) + if err2 != nil { + return err2 + } + } + + if exp.InputOpts != nil && exp.InputOpts.Sort != "" { + _, err := getSortFromArg(exp.InputOpts.Sort) + if err != nil { + return err + } + } + return nil +} + +// GetOutputWriter opens and returns an io.WriteCloser for the output +// options or nil if none is set. The caller is responsible for closing it. +func (exp *MongoExport) GetOutputWriter() (io.WriteCloser, error) { + if exp.OutputOpts.OutputFile != "" { + // If the directory in which the output file is to be + // written does not exist, create it + fileDir := filepath.Dir(exp.OutputOpts.OutputFile) + err := os.MkdirAll(fileDir, 0750) + if err != nil { + return nil, err + } + + file, err := os.Create(util.ToUniversalPath(exp.OutputOpts.OutputFile)) + if err != nil { + return nil, err + } + return file, err + } + // No writer, so caller should assume Stdout (or some other reasonable default) + return nil, nil +} + +// Take a comma-delimited set of field names and build a selector doc for query projection. +// For fields containing a dot '.', we project the entire top-level portion. +// e.g. "a,b,c.d.e,f.$" -> {a:1, b:1, "c":1, "f.$": 1}. +func makeFieldSelector(fields string) bson.M { + selector := bson.M{"_id": 1} + if fields == "" { + return selector + } + + for _, field := range strings.Split(fields, ",") { + // Projections like "a.0" work fine for nested documents not for arrays + // - if passed directly to mongod. To handle this, we have to retrieve + // the entire top-level document and then filter afterwards. An exception + // is made for '$' projections - which are passed directly to mongod. + if i := strings.LastIndex(field, "."); i != -1 && field[i+1:] != "$" { + field = field[:strings.Index(field, ".")] + } + selector[field] = 1 + } + return selector +} + +// getCount returns an estimate of how many documents the cursor will fetch +// It always returns Limit if there is a limit, assuming that in general +// limits will less then the total possible. +// If there is a query and no limit then it returns 0, because it's too expensive to count the query. +// Otherwise it returns the count minus the skip +func (exp *MongoExport) getCount() (c int, err error) { + session, err := exp.SessionProvider.GetSession() + if err != nil { + return 0, err + } + defer session.Close() + if exp.InputOpts != nil && exp.InputOpts.Limit != 0 { + return exp.InputOpts.Limit, nil + } + if exp.InputOpts != nil && exp.InputOpts.Query != "" { + return 0, nil + } + q := session.DB(exp.ToolOptions.Namespace.DB).C(exp.ToolOptions.Namespace.Collection).Find(nil) + c, err = q.Count() + if err != nil { + return 0, err + } + var skip int + if exp.InputOpts != nil { + skip = exp.InputOpts.Skip + } + if skip > c { + c = 0 + } else { + c -= skip + } + return c, nil +} + +// getCursor returns a cursor that can be iterated over to get all the documents +// to export, based on the options given to mongoexport. Also returns the +// associated session, so that it can be closed once the cursor is used up. +func (exp *MongoExport) getCursor() (*mgo.Iter, *mgo.Session, error) { + sortFields := []string{} + if exp.InputOpts != nil && exp.InputOpts.Sort != "" { + sortD, err := getSortFromArg(exp.InputOpts.Sort) + if err != nil { + return nil, nil, err + } + sortFields, err = bsonutil.MakeSortString(sortD) + if err != nil { + return nil, nil, err + } + } + + query := map[string]interface{}{} + if exp.InputOpts != nil && exp.InputOpts.HasQuery() { + var err error + content, err := exp.InputOpts.GetQuery() + if err != nil { + return nil, nil, err + } + query, err = getObjectFromByteArg(content) + if err != nil { + return nil, nil, err + } + } + + flags := 0 + if len(query) == 0 && exp.InputOpts != nil && + exp.InputOpts.ForceTableScan != true && exp.InputOpts.Sort == "" { + flags = flags | db.Snapshot + } + + session, err := exp.SessionProvider.GetSession() + if err != nil { + return nil, nil, err + } + + skip := 0 + if exp.InputOpts != nil { + skip = exp.InputOpts.Skip + } + limit := 0 + if exp.InputOpts != nil { + limit = exp.InputOpts.Limit + } + + if exp.InputOpts.AssertExists { + collNames, err := session.DB(exp.ToolOptions.Namespace.DB).CollectionNames() + if err != nil { + return nil, session, err + } + if !util.StringSliceContains(collNames, exp.ToolOptions.Namespace.Collection) { + return nil, session, fmt.Errorf("collection '%s' does not exist", + exp.ToolOptions.Namespace.Collection) + } + } + + // build the query + q := session.DB(exp.ToolOptions.Namespace.DB). + C(exp.ToolOptions.Namespace.Collection).Find(query).Sort(sortFields...). + Skip(skip).Limit(limit) + + if len(exp.OutputOpts.Fields) > 0 { + q.Select(makeFieldSelector(exp.OutputOpts.Fields)) + } + + q = db.ApplyFlags(q, session, flags) + + return q.Iter(), session, nil + +} + +// Internal function that handles exporting to the given writer. Used primarily +// for testing, because it bypasses writing to the file system. +func (exp *MongoExport) exportInternal(out io.Writer) (int64, error) { + + max, err := exp.getCount() + if err != nil { + return 0, err + } + + progressManager := progress.NewProgressBarManager(log.Writer(0), progressBarWaitTime) + progressManager.Start() + defer progressManager.Stop() + + watchProgressor := progress.NewCounter(int64(max)) + bar := &progress.Bar{ + Name: fmt.Sprintf("%v.%v", exp.ToolOptions.Namespace.DB, exp.ToolOptions.Namespace.Collection), + Watching: watchProgressor, + BarLength: progressBarLength, + } + progressManager.Attach(bar) + defer progressManager.Detach(bar) + + exportOutput, err := exp.getExportOutput(out) + if err != nil { + return 0, err + } + + cursor, session, err := exp.getCursor() + if err != nil { + return 0, err + } + defer session.Close() + defer cursor.Close() + + connURL := exp.ToolOptions.Host + if connURL == "" { + connURL = util.DefaultHost + } + if exp.ToolOptions.Port != "" { + connURL = connURL + ":" + exp.ToolOptions.Port + } + log.Logvf(log.Always, "connected to: %v", connURL) + + // Write headers + err = exportOutput.WriteHeader() + if err != nil { + return 0, err + } + + var result bson.D + + docsCount := int64(0) + + // Write document content + for cursor.Next(&result) { + err := exportOutput.ExportDocument(result) + if err != nil { + return docsCount, err + } + docsCount++ + if docsCount%watchProgressorUpdateFrequency == 0 { + watchProgressor.Set(docsCount) + } + } + watchProgressor.Set(docsCount) + if err := cursor.Err(); err != nil { + return docsCount, err + } + + // Write footers + err = exportOutput.WriteFooter() + if err != nil { + return docsCount, err + } + exportOutput.Flush() + return docsCount, nil +} + +// Export executes the entire export operation. It returns an integer of the count +// of documents successfully exported, and a non-nil error if something went wrong +// during the export operation. +func (exp *MongoExport) Export(out io.Writer) (int64, error) { + count, err := exp.exportInternal(out) + return count, err +} + +// getExportOutput returns an implementation of ExportOutput which can handle +// transforming BSON documents into the appropriate output format and writing +// them to an output stream. +func (exp *MongoExport) getExportOutput(out io.Writer) (ExportOutput, error) { + if exp.OutputOpts.Type == CSV { + // TODO what if user specifies *both* --fields and --fieldFile? + var fields []string + var err error + if len(exp.OutputOpts.Fields) > 0 { + fields = strings.Split(exp.OutputOpts.Fields, ",") + } else if exp.OutputOpts.FieldFile != "" { + fields, err = util.GetFieldsFromFile(exp.OutputOpts.FieldFile) + if err != nil { + return nil, err + } + } else { + return nil, fmt.Errorf("CSV mode requires a field list") + } + + exportFields := make([]string, 0, len(fields)) + for _, field := range fields { + // for '$' field projections, exclude '.$' from the field name + if i := strings.LastIndex(field, "."); i != -1 && field[i+1:] == "$" { + exportFields = append(exportFields, field[:i]) + } else { + exportFields = append(exportFields, field) + } + } + + return NewCSVExportOutput(exportFields, exp.OutputOpts.NoHeaderLine, out), nil + } + return NewJSONExportOutput(exp.OutputOpts.JSONArray, exp.OutputOpts.Pretty, out), nil +} + +// getObjectFromByteArg takes an object in extended JSON, and converts it to an object that +// can be passed straight to db.collection.find(...) as a query or sort critera. +// Returns an error if the string is not valid JSON, or extended JSON. +func getObjectFromByteArg(queryRaw []byte) (map[string]interface{}, error) { + parsedJSON := map[string]interface{}{} + err := json.Unmarshal(queryRaw, &parsedJSON) + if err != nil { + return nil, fmt.Errorf("query '%v' is not valid JSON: %v", queryRaw, err) + } + + err = bsonutil.ConvertJSONDocumentToBSON(parsedJSON) + if err != nil { + return nil, err + } + return parsedJSON, nil +} + +// getSortFromArg takes a sort specification in JSON and returns it as a bson.D +// object which preserves the ordering of the keys as they appear in the input. +func getSortFromArg(queryRaw string) (bson.D, error) { + parsedJSON := bson.D{} + err := json.Unmarshal([]byte(queryRaw), &parsedJSON) + if err != nil { + return nil, fmt.Errorf("query '%v' is not valid JSON: %v", queryRaw, err) + } + // TODO: verify sort specification before returning a nil error + return parsedJSON, nil +} diff --git a/src/mongo/gotools/mongoexport/mongoexport_test.go b/src/mongo/gotools/mongoexport/mongoexport_test.go new file mode 100644 index 00000000000..f5d0e7e2afa --- /dev/null +++ b/src/mongo/gotools/mongoexport/mongoexport_test.go @@ -0,0 +1,44 @@ +package mongoexport + +import ( + "encoding/json" + "github.com/mongodb/mongo-tools/common/bsonutil" + "github.com/mongodb/mongo-tools/common/testutil" + . "github.com/smartystreets/goconvey/convey" + "gopkg.in/mgo.v2/bson" + "os" + "testing" +) + +func TestExtendedJSON(t *testing.T) { + testutil.VerifyTestType(t, testutil.UnitTestType) + + Convey("Serializing a doc to extended JSON should work", t, func() { + x := bson.M{ + "_id": bson.NewObjectId(), + "hey": "sup", + "subdoc": bson.M{ + "subid": bson.NewObjectId(), + }, + "array": []interface{}{ + bson.NewObjectId(), + bson.Undefined, + }, + } + out, err := bsonutil.ConvertBSONValueToJSON(x) + So(err, ShouldBeNil) + + jsonEncoder := json.NewEncoder(os.Stdout) + jsonEncoder.Encode(out) + }) +} + +func TestFieldSelect(t *testing.T) { + testutil.VerifyTestType(t, testutil.UnitTestType) + + Convey("Using makeFieldSelector should return correct projection doc", t, func() { + So(makeFieldSelector("a,b"), ShouldResemble, bson.M{"_id": 1, "a": 1, "b": 1}) + So(makeFieldSelector(""), ShouldResemble, bson.M{"_id": 1}) + So(makeFieldSelector("x,foo.baz"), ShouldResemble, bson.M{"_id": 1, "foo": 1, "x": 1}) + }) +} diff --git a/src/mongo/gotools/mongoexport/options.go b/src/mongo/gotools/mongoexport/options.go new file mode 100644 index 00000000000..54896655581 --- /dev/null +++ b/src/mongo/gotools/mongoexport/options.go @@ -0,0 +1,79 @@ +package mongoexport + +import ( + "fmt" + "io/ioutil" +) + +var Usage = `<options> + +Export data from MongoDB in CSV or JSON format. + +See http://docs.mongodb.org/manual/reference/program/mongoexport/ for more information.` + +// OutputFormatOptions defines the set of options to use in formatting exported data. +type OutputFormatOptions struct { + // Fields is an option to directly specify comma-separated fields to export to CSV. + Fields string `long:"fields" value-name:"<field>[,<field>]*" short:"f" description:"comma separated list of field names (required for exporting CSV) e.g. -f \"name,age\" "` + + // FieldFile is a filename that refers to a list of fields to export, 1 per line. + FieldFile string `long:"fieldFile" value-name:"<filename>" description:"file with field names - 1 per line"` + + // Type selects the type of output to export as (json or csv). + Type string `long:"type" value-name:"<type>" default:"json" default-mask:"-" description:"the output format, either json or csv (defaults to 'json')"` + + // Deprecated: allow legacy --csv option in place of --type=csv + CSVOutputType bool `long:"csv" default:"false" hidden:"true"` + + // OutputFile specifies an output file path. + OutputFile string `long:"out" value-name:"<filename>" short:"o" description:"output file; if not specified, stdout is used"` + + // JSONArray if set will export the documents an array of JSON documents. + JSONArray bool `long:"jsonArray" description:"output to a JSON array rather than one object per line"` + + // Pretty displays JSON data in a human-readable form. + Pretty bool `long:"pretty" description:"output JSON formatted to be human-readable"` + + // NoHeaderLine, if set, will export CSV data without a list of field names at the first line. + NoHeaderLine bool `long:"noHeaderLine" description:"export CSV data without a list of field names at the first line"` +} + +// Name returns a human-readable group name for output format options. +func (*OutputFormatOptions) Name() string { + return "output" +} + +// InputOptions defines the set of options to use in retrieving data from the server. +type InputOptions struct { + Query string `long:"query" value-name:"<json>" short:"q" description:"query filter, as a JSON string, e.g., '{x:{$gt:1}}'"` + QueryFile string `long:"queryFile" value-name:"<filename>" description:"path to a file containing a query filter (JSON)"` + SlaveOk bool `long:"slaveOk" short:"k" description:"allow secondary reads if available (default true)" default:"false" default-mask:"-"` + ReadPreference string `long:"readPreference" value-name:"<string>|<json>" description:"specify either a preference name or a preference json object"` + ForceTableScan bool `long:"forceTableScan" description:"force a table scan (do not use $snapshot)"` + Skip int `long:"skip" value-name:"<count>" description:"number of documents to skip"` + Limit int `long:"limit" value-name:"<count>" description:"limit the number of documents to export"` + Sort string `long:"sort" value-name:"<json>" description:"sort order, as a JSON string, e.g. '{x:1}'"` + AssertExists bool `long:"assertExists" default:"false" description:"if specified, export fails if the collection does not exist"` +} + +// Name returns a human-readable group name for input options. +func (*InputOptions) Name() string { + return "querying" +} + +func (inputOptions *InputOptions) HasQuery() bool { + return inputOptions.Query != "" || inputOptions.QueryFile != "" +} + +func (inputOptions *InputOptions) GetQuery() ([]byte, error) { + if inputOptions.Query != "" { + return []byte(inputOptions.Query), nil + } else if inputOptions.QueryFile != "" { + content, err := ioutil.ReadFile(inputOptions.QueryFile) + if err != nil { + err = fmt.Errorf("error reading queryFile: %s", err) + } + return content, err + } + panic("GetQuery can return valid values only for query or queryFile input") +} |