diff options
Diffstat (limited to 'src/mongo/gotools/mongorestore/filepath.go')
-rw-r--r-- | src/mongo/gotools/mongorestore/filepath.go | 644 |
1 files changed, 644 insertions, 0 deletions
diff --git a/src/mongo/gotools/mongorestore/filepath.go b/src/mongo/gotools/mongorestore/filepath.go new file mode 100644 index 00000000000..61ccbbfb08f --- /dev/null +++ b/src/mongo/gotools/mongorestore/filepath.go @@ -0,0 +1,644 @@ +package mongorestore + +import ( + "compress/gzip" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strings" + "sync/atomic" + + "github.com/mongodb/mongo-tools/common" + "github.com/mongodb/mongo-tools/common/archive" + "github.com/mongodb/mongo-tools/common/intents" + "github.com/mongodb/mongo-tools/common/log" + "github.com/mongodb/mongo-tools/common/util" +) + +// FileType describes the various types of restore documents. +type FileType uint + +// File types constants used by mongorestore. +const ( + UnknownFileType FileType = iota + BSONFileType + MetadataFileType +) + +type errorWriter struct{} + +func (errorWriter) Write([]byte) (int, error) { + return 0, os.ErrInvalid +} + +// PosReader is a ReadCloser which maintains the position of what has been +// read from the Reader. +type PosReader interface { + io.ReadCloser + Pos() int64 +} + +// posTrackingReader is a type for reading from a file and being able to determine +// what position the file is at. +type posTrackingReader struct { + pos int64 // updated atomically, aligned at the beginning of the struct + io.ReadCloser +} + +func (f *posTrackingReader) Read(p []byte) (int, error) { + n, err := f.ReadCloser.Read(p) + atomic.AddInt64(&f.pos, int64(n)) + return n, err +} + +func (f *posTrackingReader) Pos() int64 { + return atomic.LoadInt64(&f.pos) +} + +// mixedPosTrackingReader is a type for reading from one file but getting the position of a +// different file. This is useful for compressed files where the appropriate position for progress +// bars is that of the compressed file, but file should be read from the uncompressed file. +type mixedPosTrackingReader struct { + readHolder PosReader + posHolder PosReader +} + +func (f *mixedPosTrackingReader) Read(p []byte) (int, error) { + return f.readHolder.Read(p) +} + +func (f *mixedPosTrackingReader) Pos() int64 { + return f.posHolder.Pos() +} + +func (f *mixedPosTrackingReader) Close() error { + err := f.readHolder.Close() + if err != nil { + return err + } + return f.posHolder.Close() +} + +// realBSONFile implements the intents.file interface. It lets intents read from real BSON files +// ok disk via an embedded os.File +// The Read, Write and Close methods of the intents.file interface is implemented here by the +// embedded os.File, the Write will return an error and not succeed +type realBSONFile struct { + path string + PosReader + // errorWrite adds a Write() method to this object allowing it to be an + // intent.file ( a ReadWriteOpenCloser ) + errorWriter + intent *intents.Intent + gzip bool +} + +// Open is part of the intents.file interface. realBSONFiles need to be Opened before Read +// can be called on them. +func (f *realBSONFile) Open() (err error) { + if f.path == "" { + // this error shouldn't happen normally + return fmt.Errorf("error reading BSON file for %v", f.intent.Namespace()) + } + file, err := os.Open(f.path) + if err != nil { + return fmt.Errorf("error reading BSON file %v: %v", f.path, err) + } + posFile := &posTrackingReader{0, file} + if f.gzip { + gzFile, err := gzip.NewReader(posFile) + posUncompressedFile := &posTrackingReader{0, gzFile} + if err != nil { + return fmt.Errorf("error decompressing compresed BSON file %v: %v", f.path, err) + } + f.PosReader = &mixedPosTrackingReader{ + readHolder: posUncompressedFile, + posHolder: posFile} + } else { + f.PosReader = posFile + } + return nil +} + +// realMetadataFile implements the intents.file interface. It lets intents read from real +// metadata.json files ok disk via an embedded os.File +// The Read, Write and Close methods of the intents.file interface is implemented here by the +// embedded os.File, the Write will return an error and not succeed +type realMetadataFile struct { + pos int64 // updated atomically, aligned at the beginning of the struct + io.ReadCloser + path string + // errorWrite adds a Write() method to this object allowing it to be an + // intent.file ( a ReadWriteOpenCloser ) + errorWriter + intent *intents.Intent + gzip bool +} + +// Open is part of the intents.file interface. realMetadataFiles need to be Opened before Read +// can be called on them. +func (f *realMetadataFile) Open() (err error) { + if f.path == "" { + return fmt.Errorf("error reading metadata for %v", f.intent.Namespace()) + } + file, err := os.Open(f.path) + if err != nil { + return fmt.Errorf("error reading metadata %v: %v", f.path, err) + } + if f.gzip { + gzFile, err := gzip.NewReader(file) + if err != nil { + return fmt.Errorf("error reading compressed metadata %v: %v", f.path, err) + } + f.ReadCloser = &wrappedReadCloser{gzFile, file} + } else { + f.ReadCloser = file + } + return nil +} + +func (f *realMetadataFile) Read(p []byte) (int, error) { + n, err := f.ReadCloser.Read(p) + atomic.AddInt64(&f.pos, int64(n)) + return n, err +} + +func (f *realMetadataFile) Pos() int64 { + return atomic.LoadInt64(&f.pos) +} + +// stdinFile implements the intents.file interface. They allow intents to read single collections +// from standard input +type stdinFile struct { + pos int64 // updated atomically, aligned at the beginning of the struct + io.Reader + errorWriter +} + +// Open is part of the intents.file interface. stdinFile needs to have Open called on it before +// Read can be called on it. +func (f *stdinFile) Open() error { + return nil +} + +func (f *stdinFile) Read(p []byte) (int, error) { + n, err := f.Reader.Read(p) + atomic.AddInt64(&f.pos, int64(n)) + return n, err +} + +func (f *stdinFile) Pos() int64 { + return atomic.LoadInt64(&f.pos) +} + +// Close is part of the intents.file interface. After Close is called, Read will fail. +func (f *stdinFile) Close() error { + f.Reader = nil + return nil +} + +// getInfoFromFilename pulls the base collection name and FileType from a given file. +func (restore *MongoRestore) getInfoFromFilename(filename string) (string, FileType) { + baseFileName := filepath.Base(filename) + // .bin supported for legacy reasons + if strings.HasSuffix(baseFileName, ".bin") { + baseName := strings.TrimSuffix(baseFileName, ".bin") + return baseName, BSONFileType + } + // Gzip indicates that files in a dump directory should have a .gz suffix + // but it does not indicate that the "files" provided by the archive should, + // compressed or otherwise. + if restore.InputOptions.Gzip && restore.InputOptions.Archive == "" { + if strings.HasSuffix(baseFileName, ".metadata.json.gz") { + baseName := strings.TrimSuffix(baseFileName, ".metadata.json.gz") + return baseName, MetadataFileType + } else if strings.HasSuffix(baseFileName, ".bson.gz") { + baseName := strings.TrimSuffix(baseFileName, ".bson.gz") + return baseName, BSONFileType + } + return "", UnknownFileType + } + if strings.HasSuffix(baseFileName, ".metadata.json") { + baseName := strings.TrimSuffix(baseFileName, ".metadata.json") + return baseName, MetadataFileType + } else if strings.HasSuffix(baseFileName, ".bson") { + baseName := strings.TrimSuffix(baseFileName, ".bson") + return baseName, BSONFileType + } + return "", UnknownFileType +} + +// CreateAllIntents drills down into a dump folder, creating intents for all of +// the databases and collections it finds. +func (restore *MongoRestore) CreateAllIntents(dir archive.DirLike) error { + log.Logvf(log.DebugHigh, "using %v as dump root directory", dir.Path()) + entries, err := dir.ReadDir() + if err != nil { + return fmt.Errorf("error reading root dump folder: %v", err) + } + for _, entry := range entries { + if entry.IsDir() { + if err = util.ValidateDBName(entry.Name()); err != nil { + return fmt.Errorf("invalid database name '%v': %v", entry.Name(), err) + } + err = restore.CreateIntentsForDB(entry.Name(), entry) + if err != nil { + return err + } + } else { + if entry.Name() == "oplog.bson" { + if restore.InputOptions.OplogReplay { + log.Logv(log.DebugLow, "found oplog.bson file to replay") + } + oplogIntent := &intents.Intent{ + C: "oplog", + Size: entry.Size(), + Location: entry.Path(), + } + if !restore.InputOptions.OplogReplay { + if restore.InputOptions.Archive != "" { + mutedOut := &archive.MutedCollection{ + Intent: oplogIntent, + Demux: restore.archive.Demux, + } + restore.archive.Demux.Open( + oplogIntent.Namespace(), + mutedOut, + ) + } + continue + } + if restore.InputOptions.Archive != "" { + if restore.InputOptions.Archive == "-" { + oplogIntent.Location = "archive on stdin" + } else { + oplogIntent.Location = fmt.Sprintf("archive '%v'", restore.InputOptions.Archive) + } + + // no need to check that we want to cache here + oplogIntent.BSONFile = &archive.RegularCollectionReceiver{ + Intent: oplogIntent, + Origin: oplogIntent.Namespace(), + Demux: restore.archive.Demux, + } + } else { + oplogIntent.BSONFile = &realBSONFile{path: entry.Path(), intent: oplogIntent, gzip: restore.InputOptions.Gzip} + } + restore.manager.Put(oplogIntent) + } else { + log.Logvf(log.Always, `don't know what to do with file "%v", skipping...`, entry.Path()) + } + } + } + return nil +} + +// CreateIntentForOplog creates an intent for a file that we want to treat as an oplog. +func (restore *MongoRestore) CreateIntentForOplog() error { + target, err := newActualPath(restore.InputOptions.OplogFile) + db := "" + collection := "oplog" + if err != nil { + return err + } + log.Logvf(log.DebugLow, "reading oplog from %v", target.Path()) + + if target.IsDir() { + return fmt.Errorf("file %v is a directory, not a bson file", target.Path()) + } + + // Then create its intent. + intent := &intents.Intent{ + DB: db, + C: collection, + Size: target.Size(), + Location: target.Path(), + } + intent.BSONFile = &realBSONFile{path: target.Path(), intent: intent, gzip: restore.InputOptions.Gzip} + restore.manager.PutOplogIntent(intent, "oplogFile") + return nil +} + +// CreateIntentsForDB drills down into the dir folder, creating intents +// for all of the collection dump files it finds for the db database. +func (restore *MongoRestore) CreateIntentsForDB(db string, dir archive.DirLike) (err error) { + var entries []archive.DirLike + log.Logvf(log.DebugHigh, "reading collections for database %v in %v", db, dir.Name()) + entries, err = dir.ReadDir() + if err != nil { + return fmt.Errorf("error reading db folder %v: %v", db, err) + } + usesMetadataFiles := hasMetadataFiles(entries) + for _, entry := range entries { + if entry.IsDir() { + log.Logvf(log.Always, `don't know what to do with subdirectory "%v", skipping...`, + filepath.Join(dir.Name(), entry.Name())) + } else { + collection, fileType := restore.getInfoFromFilename(entry.Name()) + sourceNS := db + "." + collection + switch fileType { + case BSONFileType: + var skip bool + // Dumps of a single database (i.e. with the -d flag) may contain special + // db-specific collections that start with a "$" (for example, $admin.system.users + // holds the users for a database that was dumped with --dumpDbUsersAndRoles enabled). + // If these special files manage to be included in a dump directory during a full + // (multi-db) restore, we should ignore them. + if restore.NSOptions.DB == "" && strings.HasPrefix(collection, "$") { + log.Logvf(log.DebugLow, "not restoring special collection %v.%v", db, collection) + skip = true + } + // TOOLS-717: disallow restoring to the system.profile collection. + // Server versions >= 3.0.3 disallow user inserts to system.profile so + // it would likely fail anyway. + if collection == "system.profile" { + log.Logvf(log.DebugLow, "skipping restore of system.profile collection", db) + skip = true + } + // skip restoring the indexes collection if we are using metadata + // files to store index information, to eliminate redundancy + if collection == "system.indexes" && usesMetadataFiles { + log.Logvf(log.DebugLow, + "not restoring system.indexes collection because database %v "+ + "has .metadata.json files", db) + skip = true + } + + if !restore.includer.Has(sourceNS) { + log.Logvf(log.DebugLow, "skipping restoring %v.%v, it is not included", db, collection) + skip = true + } + if restore.excluder.Has(sourceNS) { + log.Logvf(log.DebugLow, "skipping restoring %v.%v, it is excluded", db, collection) + skip = true + } + destNS := restore.renamer.Get(sourceNS) + destDB, destC := common.SplitNamespace(destNS) + intent := &intents.Intent{ + DB: destDB, + C: destC, + Size: entry.Size(), + } + if restore.InputOptions.Archive != "" { + if restore.InputOptions.Archive == "-" { + intent.Location = "archive on stdin" + } else { + intent.Location = fmt.Sprintf("archive '%v'", restore.InputOptions.Archive) + } + if skip { + // adding the DemuxOut to the demux, but not adding the intent to the manager + mutedOut := &archive.MutedCollection{Intent: intent, Demux: restore.archive.Demux} + restore.archive.Demux.Open(sourceNS, mutedOut) + continue + } + if intent.IsSpecialCollection() { + specialCollectionCache := archive.NewSpecialCollectionCache(intent, restore.archive.Demux) + intent.BSONFile = specialCollectionCache + restore.archive.Demux.Open(sourceNS, specialCollectionCache) + } else { + intent.BSONFile = &archive.RegularCollectionReceiver{ + Origin: sourceNS, + Intent: intent, + Demux: restore.archive.Demux, + } + } + } else { + if skip { + continue + } + intent.Location = entry.Path() + intent.BSONFile = &realBSONFile{path: entry.Path(), intent: intent, gzip: restore.InputOptions.Gzip} + } + log.Logvf(log.Info, "found collection %v bson to restore to %v", sourceNS, destNS) + restore.manager.PutWithNamespace(sourceNS, intent) + case MetadataFileType: + if !restore.includer.Has(sourceNS) { + log.Logvf(log.DebugLow, "skipping restoring %v.%v metadata, it is not included", db, collection) + continue + } + if restore.excluder.Has(sourceNS) { + log.Logvf(log.DebugLow, "skipping restoring %v.%v metadata, it is excluded", db, collection) + continue + } + + usesMetadataFiles = true + destNS := restore.renamer.Get(sourceNS) + rnDB, rnC := common.SplitNamespace(destNS) + intent := &intents.Intent{ + DB: rnDB, + C: rnC, + } + + if restore.InputOptions.Archive != "" { + if restore.InputOptions.Archive == "-" { + intent.MetadataLocation = "archive on stdin" + } else { + intent.MetadataLocation = fmt.Sprintf("archive '%v'", restore.InputOptions.Archive) + } + intent.MetadataFile = &archive.MetadataPreludeFile{Origin: sourceNS, Intent: intent, Prelude: restore.archive.Prelude} + } else { + intent.MetadataLocation = entry.Path() + intent.MetadataFile = &realMetadataFile{path: entry.Path(), intent: intent, gzip: restore.InputOptions.Gzip} + } + log.Logvf(log.Info, "found collection metadata from %v to restore to %v", sourceNS, destNS) + restore.manager.PutWithNamespace(sourceNS, intent) + default: + log.Logvf(log.Always, `don't know what to do with file "%v", skipping...`, + entry.Path()) + } + } + } + return nil +} + +// CreateStdinIntentForCollection builds an intent for the given database and collection name +// that is to be read from standard input +func (restore *MongoRestore) CreateStdinIntentForCollection(db string, collection string) error { + log.Logvf(log.DebugLow, "reading collection %v for database %v from standard input", + collection, db) + intent := &intents.Intent{ + DB: db, + C: collection, + Location: "-", + } + intent.BSONFile = &stdinFile{Reader: restore.stdin} + restore.manager.Put(intent) + return nil +} + +// CreateIntentForCollection builds an intent for the given database and collection name +// along with a path to a .bson collection file. It searches the file's parent directory +// for a matching metadata file. +// +// This method is not called by CreateIntentsForDB, +// it is only used in the case where --db and --collection flags are set. +func (restore *MongoRestore) CreateIntentForCollection(db string, collection string, dir archive.DirLike) error { + log.Logvf(log.DebugLow, "reading collection %v for database %v from %v", + collection, db, dir.Path()) + // first make sure the bson file exists and is valid + _, err := dir.Stat() + if err != nil { + return err + } + if dir.IsDir() { + return fmt.Errorf("file %v is a directory, not a bson file", dir.Path()) + } + + baseName, fileType := restore.getInfoFromFilename(dir.Name()) + if fileType != BSONFileType { + return fmt.Errorf("file %v does not have .bson extension", dir.Path()) + } + + // then create its intent + intent := &intents.Intent{ + DB: db, + C: collection, + Size: dir.Size(), + Location: dir.Path(), + } + intent.BSONFile = &realBSONFile{path: dir.Path(), intent: intent, gzip: restore.InputOptions.Gzip} + + // finally, check if it has a .metadata.json file in its folder + log.Logvf(log.DebugLow, "scanning directory %v for metadata", dir.Name()) + entries, err := dir.Parent().ReadDir() + if err != nil { + // try and carry on if we can + log.Logvf(log.Info, "error attempting to locate metadata for file: %v", err) + log.Logv(log.Info, "restoring collection without metadata") + restore.manager.Put(intent) + return nil + } + metadataName := baseName + ".metadata.json" + if restore.InputOptions.Gzip { + metadataName += ".gz" + } + for _, entry := range entries { + if entry.Name() == metadataName { + metadataPath := entry.Path() + log.Logvf(log.Info, "found metadata for collection at %v", metadataPath) + intent.MetadataLocation = metadataPath + intent.MetadataFile = &realMetadataFile{path: metadataPath, intent: intent, gzip: restore.InputOptions.Gzip} + break + } + } + + if intent.MetadataFile == nil { + log.Logv(log.Info, "restoring collection without metadata") + } + + restore.manager.Put(intent) + + return nil +} + +// helper for searching a list of FileInfo for metadata files +func hasMetadataFiles(files []archive.DirLike) bool { + for _, file := range files { + if strings.HasSuffix(file.Name(), ".metadata.json") { + return true + } + } + return false +} + +// handleBSONInsteadOfDirectory updates -d and -c settings based on +// the path to the BSON file passed to mongorestore. This is only +// applicable if the target path points to a .bson file. +// +// As an example, when the user passes 'dump/mydb/col.bson', this method +// will infer that 'mydb' is the database and 'col' is the collection name. +func (restore *MongoRestore) handleBSONInsteadOfDirectory(path string) error { + // we know we have been given a non-directory, so we should handle it + // like a bson file and infer as much as we can + if restore.NSOptions.Collection == "" { + // if the user did not set -c, use the file name for the collection + newCollectionName, fileType := restore.getInfoFromFilename(path) + if fileType != BSONFileType { + return fmt.Errorf("file %v does not have .bson extension", path) + } + restore.NSOptions.Collection = newCollectionName + log.Logvf(log.DebugLow, "inferred collection '%v' from file", restore.NSOptions.Collection) + } + if restore.NSOptions.DB == "" { + // if the user did not set -d, use the directory containing the target + // file as the db name (as it would be in a dump directory). If + // we cannot determine the directory name, use "test" + dirForFile := filepath.Base(filepath.Dir(path)) + if dirForFile == "." || dirForFile == ".." { + dirForFile = "test" + } + restore.NSOptions.DB = dirForFile + log.Logvf(log.DebugLow, "inferred db '%v' from the file's directory", restore.NSOptions.DB) + } + return nil +} + +type actualPath struct { + os.FileInfo + path string + parent *actualPath +} + +func newActualPath(dir string) (*actualPath, error) { + stat, err := os.Stat(dir) + if err != nil { + return nil, err + } + path := filepath.Dir(filepath.Clean(dir)) + parent := &actualPath{} + parentStat, err := os.Stat(path) + if err == nil { + parent.FileInfo = parentStat + parent.path = filepath.Dir(path) + } + ap := &actualPath{ + FileInfo: stat, + path: path, + parent: parent, + } + return ap, nil +} + +func (ap actualPath) Path() string { + return filepath.Join(ap.path, ap.Name()) +} + +func (ap actualPath) Parent() archive.DirLike { + // returns nil if there is no parent + return ap.parent +} + +func (ap actualPath) ReadDir() ([]archive.DirLike, error) { + entries, err := ioutil.ReadDir(ap.Path()) + if err != nil { + return nil, err + } + var returnFileInfo = make([]archive.DirLike, 0, len(entries)) + for _, entry := range entries { + returnFileInfo = append(returnFileInfo, + actualPath{ + FileInfo: entry, + path: ap.Path(), + parent: &ap, + }) + } + return returnFileInfo, nil +} + +func (ap actualPath) Stat() (archive.DirLike, error) { + stat, err := os.Stat(ap.Path()) + if err != nil { + return nil, err + } + return &actualPath{FileInfo: stat, path: ap.Path()}, nil +} + +func (ap actualPath) IsDir() bool { + stat, err := os.Stat(ap.Path()) + if err != nil { + return false + } + return stat.IsDir() +} |