/**
 *    Copyright (C) 2018-present MongoDB, Inc.
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the Server Side Public License, version 1,
 *    as published by MongoDB, Inc.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    Server Side Public License for more details.
 *
 *    You should have received a copy of the Server Side Public License
 *    along with this program. If not, see
 *    <http://www.mongodb.com/licensing/server-side-public-license>.
 *
 *    As a special exception, the copyright holders give permission to link the
 *    code of portions of this program with the OpenSSL library under certain
 *    conditions as described in each individual source file and distribute
 *    linked combinations including the program with the OpenSSL library. You
 *    must comply with the Server Side Public License in all respects for
 *    all of the code used other than as permitted herein. If you modify file(s)
 *    with this exception, you may extend this exception to your version of the
 *    file(s), but you are not obligated to do so. If you do not wish to do so,
 *    delete this exception statement from your version. If you delete this
 *    exception statement from all source files in the program, then also delete
 *    it in the license file.
 */

#pragma once

#include <boost/optional.hpp>

#include "mongo/base/owned_pointer_vector.h"
#include "mongo/bson/mutable/damage_vector.h"
#include "mongo/db/exec/collection_scan_common.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/record_id.h"
#include "mongo/db/storage/record_data.h"

namespace mongo {

class CappedCallback;
class Collection;
class MAdvise;
class OperationContext;

class RecordStore;

struct ValidateResults;
class ValidateAdaptor;

/**
 * The data items stored in a RecordStore.
 */
struct Record {
    RecordId id;
    RecordData data;
};

/**
 * Retrieves Records from a RecordStore.
 *
 * A cursor is constructed with a direction flag with the following effects:
 *      - The direction that next() moves.
 *      - If a restore cannot return to the saved position, cursors will be positioned on the
 *        closest position *after* the query in the direction of the scan.
 *
 * A cursor is tied to a transaction, such as the OperationContext or a WriteUnitOfWork
 * inside that context. Any cursor acquired inside a transaction is invalid outside
 * of that transaction, instead use the save and restore methods to reestablish the cursor.
 *
 * Any method other than the save method may throw WriteConflictException. If that happens, the
 * cursor may not be used again until it has been saved and successfully restored. If next() or
 * restore() throw a WCE the cursor's position will be the same as before the call (strong exception
 * guarantee). All other methods leave the cursor in a valid state but with an unspecified position
 * (basic exception guarantee). If any exception other than WCE is thrown, the cursor must be
 * destroyed, which is guaranteed not to leak any resources.
 *
 * Any returned unowned BSON is only valid until the next call to any method on this
 * interface.
 *
 * Implementations may override any default implementation if they can provide a more
 * efficient implementation.
 *
 * Storage engines only need to implement the derived SeekableRecordCursor, but may choose
 * to implement this simpler interface for cursors used for repair or random traversal.
 *
 * IMPORTANT NOTE FOR DOCUMENT-LOCKING ENGINES: If you implement capped collections with a
 * "visibility" system such that documents that exist in your snapshot but were inserted after
 * the last uncommitted document are hidden, you must follow the following rules:
 *   - next() on forward cursors must never return invisible documents.
 *   - If next() on a forward cursor hits an invisible document, it should behave as if it hit
 *     the end of the collection.
 *   - Reverse cursors must ignore the visibility filter. That means that they initially return the
 *     newest committed record in the collection and may skip over uncommitted records.
 *   - SeekableRecordCursor::seekExact() must ignore the visibility filter and return the requested
 *     document even if it is supposed to be invisible.
 * TODO SERVER-18934 Handle this above the storage engine layer so storage engines don't have to
 * deal with capped visibility.
 */
class RecordCursor {
public:
    virtual ~RecordCursor() = default;

    /**
     * Moves forward and returns the new data or boost::none if there is no more data.
     * Continues returning boost::none once it reaches EOF unlike stl iterators.
     */
    virtual boost::optional<Record> next() = 0;

    //
    // Saving and restoring state
    //

    /**
     * Prepares for state changes in underlying data in a way that allows the cursor's
     * current position to be restored.
     *
     * It is safe to call save multiple times in a row.
     * No other method (excluding destructor) may be called until successfully restored.
     */
    virtual void save() = 0;

    /**
     * Recovers from potential state changes in underlying data.
     *
     * Returns false if it is invalid to continue using this Cursor. This usually means that
     * capped deletes have caught up to the position of this Cursor and continuing could
     * result in missed data. Note that Cursors, unlike iterators can continue to iterate past the
     * "end"
     *
     * If the former position no longer exists, but it is safe to continue iterating, the
     * following call to next() will return the next closest position in the direction of the
     * scan, if any.
     *
     * This handles restoring after either save() or SeekableRecordCursor::saveUnpositioned().
     */
    virtual bool restore() = 0;

    /**
     * Detaches from the OperationContext and releases any storage-engine state.
     *
     * It is only legal to call this when in a "saved" state. While in the "detached" state, it is
     * only legal to call reattachToOperationContext or the destructor. It is not legal to call
     * detachFromOperationContext() while already in the detached state.
     */
    virtual void detachFromOperationContext() = 0;

    /**
     * Reattaches to the OperationContext and reacquires any storage-engine state.
     *
     * It is only legal to call this in the "detached" state. On return, the cursor is left in a
     * "saved" state, so callers must still call restoreState to use this object.
     */
    virtual void reattachToOperationContext(OperationContext* opCtx) = 0;
};

/**
 * Adds explicit seeking of records. This functionality is separated out from RecordCursor, because
 * some cursors are not required to support seeking. All storage engines must support detecting the
 * existence of Records.
 */
class SeekableRecordCursor : public RecordCursor {
public:
    /**
     * Seeks to a Record with the provided id.
     *
     * If an exact match can't be found, boost::none will be returned and the resulting position
     * of the cursor is unspecified.
     */
    virtual boost::optional<Record> seekExact(const RecordId& id) = 0;

    /**
     * Prepares for state changes in underlying data without necessarily saving the current
     * state.
     *
     * The cursor's position when restored is unspecified. Caller is expected to seek rather
     * than call next() following the restore.
     *
     * It is safe to call saveUnpositioned multiple times in a row.
     * No other method (excluding destructor) may be called until successfully restored.
     */
    virtual void saveUnpositioned() {
        save();
    }
};

/**
 * An abstraction used for storing documents in a collection or entries in an index.
 *
 * In storage engines implementing the KVEngine, record stores are also used for implementing
 * catalogs.
 *
 * Many methods take an OperationContext parameter. This contains the RecoveryUnit, with
 * all RecordStore specific transaction information, as well as the LockState. Methods that take
 * an OperationContext may throw a WriteConflictException.
 *
 * This class must be thread-safe for document-level locking storage engines. In addition, for
 * storage engines implementing the KVEngine some methods must be thread safe, see DurableCatalog.
 */
class RecordStore {
    RecordStore(const RecordStore&) = delete;
    RecordStore& operator=(const RecordStore&) = delete;

public:
    RecordStore(StringData ns) : _ns(ns.toString()) {}

    virtual ~RecordStore() {}

    // META

    // name of the RecordStore implementation
    virtual const char* name() const = 0;

    const std::string& ns() const {
        return _ns;
    }

    void setNs(NamespaceString ns) {
        _ns = ns.ns();
    }

    bool isTemp() const {
        return ns().size() == 0;
    }

    virtual const std::string& getIdent() const = 0;

    /**
     * The dataSize is an approximation of the sum of the sizes (in bytes) of the
     * documents or entries in the recordStore.
     */
    virtual long long dataSize(OperationContext* opCtx) const = 0;

    /**
     * Total number of records in the RecordStore. You may need to cache it, so this call
     * takes constant time, as it is called often.
     */
    virtual long long numRecords(OperationContext* opCtx) const = 0;

    virtual bool isCapped() const = 0;

    virtual void setCappedCallback(CappedCallback*) {
        MONGO_UNREACHABLE;
    }

    /**
     * @param extraInfo - optional more debug info
     * @param level - optional, level of debug info to put in (higher is more)
     * @return total estimate size (in bytes) on stable storage
     */
    virtual int64_t storageSize(OperationContext* opCtx,
                                BSONObjBuilder* extraInfo = nullptr,
                                int infoLevel = 0) const = 0;

    /**
     * @return file bytes available for reuse
     * A return value of zero can mean either no bytes are available, or that the real value is
     * unknown.
     */
    virtual int64_t freeStorageSize(OperationContext* opCtx) const {
        return 0;
    }

    // CRUD related

    /**
     * Get the RecordData at loc, which must exist.
     *
     * If unowned data is returned, it is only valid until either of these happens:
     *  - The record is modified
     *  - The snapshot from which it was obtained is abandoned
     *  - The lock on the collection is released
     *
     * In general, prefer findRecord or RecordCursor::seekExact since they can tell you if a record
     * has been removed.
     */
    RecordData dataFor(OperationContext* opCtx, const RecordId& loc) const {
        RecordData data;
        invariant(findRecord(opCtx, loc, &data),
                  str::stream() << "Didn't find RecordId " << loc << " in record store " << ns());
        return data;
    }

    /**
     * @param out - If the record exists, the contents of this are set.
     * @return true iff there is a Record for loc
     *
     * If unowned data is returned, it is valid until the next modification of this Record or
     * the lock on this collection is released.
     *
     * In general prefer RecordCursor::seekExact since it can avoid copying data in more
     * storageEngines.
     */
    virtual bool findRecord(OperationContext* opCtx, const RecordId& loc, RecordData* out) const {
        auto cursor = getCursor(opCtx);
        auto record = cursor->seekExact(loc);
        if (!record)
            return false;

        record->data.makeOwned();  // Unowned data expires when cursor goes out of scope.
        *out = std::move(record->data);
        return true;
    }

    virtual void deleteRecord(OperationContext* opCtx, const RecordId& dl) = 0;

    /**
     * Inserts the specified records into this RecordStore by copying the passed-in record data and
     * updates 'inOutRecords' to contain the ids of the inserted records.
     */
    virtual Status insertRecords(OperationContext* opCtx,
                                 std::vector<Record>* inOutRecords,
                                 const std::vector<Timestamp>& timestamps) = 0;

    /**
     * A thin wrapper around insertRecords() to simplify handling of single document inserts.
     */
    StatusWith<RecordId> insertRecord(OperationContext* opCtx,
                                      const char* data,
                                      int len,
                                      Timestamp timestamp) {
        std::vector<Record> inOutRecords{Record{RecordId(), RecordData(data, len)}};
        Status status = insertRecords(opCtx, &inOutRecords, std::vector<Timestamp>{timestamp});
        if (!status.isOK())
            return status;
        return inOutRecords.front().id;
    }

    /**
     * Updates the record with id 'recordId', replacing its contents with those described by
     * 'data' and 'len'.
     */
    virtual Status updateRecord(OperationContext* opCtx,
                                const RecordId& recordId,
                                const char* data,
                                int len) = 0;

    /**
     * @return Returns 'false' if this record store does not implement
     * 'updatewithDamages'. If this method returns false, 'updateWithDamages' must not be
     * called, and all updates must be routed through 'updateRecord' above. This allows the
     * update framework to avoid doing the work of damage tracking if the underlying record
     * store cannot utilize that information.
     */
    virtual bool updateWithDamagesSupported() const = 0;

    /**
     * Updates the record positioned at 'loc' in-place using the deltas described by 'damages'. The
     * 'damages' vector describes contiguous ranges of 'damageSource' from which to copy and apply
     * byte-level changes to the data. Behavior is undefined for calling this on a non-existant loc.
     *
     * @return the updated version of the record. If unowned data is returned, then it is valid
     * until the next modification of this Record or the lock on the collection has been released.
     */
    virtual StatusWith<RecordData> updateWithDamages(OperationContext* opCtx,
                                                     const RecordId& loc,
                                                     const RecordData& oldRec,
                                                     const char* damageSource,
                                                     const mutablebson::DamageVector& damages) = 0;

    /**
     * Returns a new cursor over this record store.
     *
     * The cursor is logically positioned before the first (or last if !forward) Record in the
     * collection so that Record will be returned on the first call to next(). Implementations
     * are allowed to lazily seek to the first Record when next() is called rather than doing
     * it on construction.
     */
    virtual std::unique_ptr<SeekableRecordCursor> getCursor(OperationContext* opCtx,
                                                            bool forward = true) const = 0;

    /**
     * Constructs a cursor over a record store that returns documents in a randomized order, and
     * allows storage engines to provide a more efficient way of random sampling of a record store
     * than MongoDB's default sampling methods, which is used when this method returns {}.
     *
     * This method may be implemented using a pseudo-random walk over B-trees or a similar approach.
     * Different cursors should return documents in a different order. Random cursors may return
     * the same document more than once and, as a result, may return more documents than exist in
     * the record store. Implementations should avoid obvious biases toward older, newer, larger
     * smaller or other specific classes of documents.
     */
    virtual std::unique_ptr<RecordCursor> getRandomCursor(OperationContext* opCtx) const {
        return {};
    }

    // higher level


    /**
     * removes all Records
     */
    virtual Status truncate(OperationContext* opCtx) = 0;

    /**
     * Truncate documents newer than the document at 'end' from the capped
     * collection.  The collection cannot be completely emptied using this
     * function.  An assertion will be thrown if that is attempted.
     * @param inclusive - Truncate 'end' as well iff true
     */
    virtual void cappedTruncateAfter(OperationContext* opCtx, RecordId end, bool inclusive) = 0;

    /**
     * does this RecordStore support the compact operation?
     *
     * If you return true, you must provide implementations of all compact methods.
     */
    virtual bool compactSupported() const {
        return false;
    }

    /**
     * If compact() supports online compaction.
     *
     * Only called if compactSupported() returns true.
     */
    virtual bool supportsOnlineCompaction() const {
        MONGO_UNREACHABLE;
    }

    /**
     * Attempt to reduce the storage space used by this RecordStore.
     *
     * Only called if compactSupported() returns true.
     */
    virtual Status compact(OperationContext* opCtx) {
        MONGO_UNREACHABLE;
    }

    /**
     * Does the RecordStore cursor retrieve its document in RecordId Order?
     *
     * If a subclass overrides the default value to true, the RecordStore cursor must retrieve
     * its documents in RecordId order.
     *
     * This enables your storage engine to run collection validation in the
     * background.
     */
    virtual bool isInRecordIdOrder() const {
        return false;
    }

    /**
     * Performs record store specific validation to ensure consistency of underlying data
     * structures. If corruption is found, details of the errors will be in the results parameter.
     */
    virtual void validate(OperationContext* opCtx,
                          ValidateResults* results,
                          BSONObjBuilder* output) {}

    /**
     * @param scaleSize - amount by which to scale size metrics
     * appends any custom stats from the RecordStore or other unique stats
     */
    virtual void appendCustomStats(OperationContext* opCtx,
                                   BSONObjBuilder* result,
                                   double scale) const = 0;

    /**
     * Return the RecordId of an oplog entry as close to startingPosition as possible without
     * being higher. If there are no entries <= startingPosition, return RecordId().
     *
     * If you don't implement the oplogStartHack, just use the default implementation which
     * returns boost::none.
     */
    virtual boost::optional<RecordId> oplogStartHack(OperationContext* opCtx,
                                                     const RecordId& startingPosition) const {
        return boost::none;
    }

    /**
     * When we write to an oplog, we call this so that if the storage engine
     * supports doc locking, it can manage the visibility of oplog entries to ensure
     * they are ordered.
     *
     * Since this is called inside of a WriteUnitOfWork while holding a std::mutex, it is
     * illegal to acquire any LockManager locks inside of this function.
     *
     * If `orderedCommit` is true, the storage engine can assume the input `opTime` has become
     * visible in the oplog. Otherwise the storage engine must continue to maintain its own
     * visibility management. Calls with `orderedCommit` true will not be concurrent with calls of
     * `orderedCommit` false.
     */
    virtual Status oplogDiskLocRegister(OperationContext* opCtx,
                                        const Timestamp& opTime,
                                        bool orderedCommit) {
        return Status::OK();
    }

    /**
     * Waits for all writes that completed before this call to be visible to forward scans.
     * See the comment on RecordCursor for more details about the visibility rules.
     *
     * It is only legal to call this on an oplog. It is illegal to call this inside a
     * WriteUnitOfWork.
     */
    virtual void waitForAllEarlierOplogWritesToBeVisible(OperationContext* opCtx) const = 0;

    /**
     * Called after a repair operation is run with the recomputed numRecords and dataSize.
     */
    virtual void updateStatsAfterRepair(OperationContext* opCtx,
                                        long long numRecords,
                                        long long dataSize) = 0;

    /**
     * used to support online change oplog size.
     */
    virtual Status updateCappedSize(OperationContext* opCtx, long long cappedSize) {
        return Status(ErrorCodes::CommandNotSupported,
                      "this storage engine does not support updateCappedSize");
    }

    /**
     * Returns false if the oplog was dropped while waiting for a deletion request.
     * This should only be called if StorageEngine::supportsOplogStones() is true.
     * Storage engines supporting oplog stones must implement this function.
     */
    virtual bool yieldAndAwaitOplogDeletionRequest(OperationContext* opCtx) {
        MONGO_UNREACHABLE;
    }

    /**
     * This should only be called if StorageEngine::supportsOplogStones() is true.
     * Storage engines supporting oplog stones must implement this function.
     */
    virtual void reclaimOplog(OperationContext* opCtx) {
        MONGO_UNREACHABLE;
    }

    /**
     * This should only be called if StorageEngine::supportsOplogStones() is true.
     * Storage engines supporting oplog stones must implement this function.
     * Populates `builder` with various statistics pertaining to oplog stones and oplog truncation.
     */
    virtual void getOplogTruncateStats(BSONObjBuilder& builder) const {
        MONGO_UNREACHABLE;
    }

    /**
     * If supported, this method returns the timestamp value for the latest storage engine committed
     * oplog document. Note that this method will not include uncommitted writes on the input
     * OperationContext. A new transaction may be created and destroyed to service this call.
     *
     * Unsupported RecordStores return the OplogOperationUnsupported error code.
     */
    virtual StatusWith<Timestamp> getLatestOplogTimestamp(OperationContext* opCtx) const {
        return Status(ErrorCodes::OplogOperationUnsupported,
                      "The current storage engine doesn't support an optimized implementation for "
                      "getting the latest oplog timestamp.");
    }

    /**
     * If supported, this method returns the timestamp value for the earliest storage engine
     * committed oplog document.
     *
     * Unsupported RecordStores return the OplogOperationUnsupported error code.
     */
    virtual StatusWith<Timestamp> getEarliestOplogTimestamp(OperationContext* opCtx) {
        return Status(ErrorCodes::OplogOperationUnsupported,
                      "The current storage engine doesn't support an optimized implementation for "
                      "getting the earliest oplog timestamp.");
    }

protected:
    std::string _ns;
};

struct ValidateResults {
    ValidateResults() {
        valid = true;
    }
    bool valid;
    std::vector<std::string> errors;
    std::vector<std::string> warnings;
    std::vector<BSONObj> extraIndexEntries;
    std::vector<BSONObj> missingIndexEntries;
};
}  // namespace mongo