/*-
 * Copyright (c) 2014-2019 MongoDB, Inc.
 * Copyright (c) 2008-2014 WiredTiger, Inc.
 *	All rights reserved.
 *
 * See the file LICENSE for redistribution information.
 */

#include "wt_internal.h"

/*
 * __key_return --
 *     Change the cursor to reference an internal return key.
 */
static inline int
__key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
{
    WT_CURSOR *cursor;
    WT_ITEM *tmp;
    WT_PAGE *page;
    WT_ROW *rip;

    page = cbt->ref->page;
    cursor = &cbt->iface;

    if (page->type == WT_PAGE_ROW_LEAF) {
        rip = &page->pg_row[cbt->slot];

        /*
         * If the cursor references a WT_INSERT item, take its key. Else, if we have an exact match,
         * we copied the key in the search function, take it from there. If we don't have an exact
         * match, take the key from the original page.
         */
        if (cbt->ins != NULL) {
            cursor->key.data = WT_INSERT_KEY(cbt->ins);
            cursor->key.size = WT_INSERT_KEY_SIZE(cbt->ins);
            return (0);
        }

        if (cbt->compare == 0) {
            /*
             * If not in an insert list and there's an exact match, the row-store search function
             * built the key we want to return in the cursor's temporary buffer. Swap the cursor's
             * search-key and temporary buffers so we can return it (it's unsafe to return the
             * temporary buffer itself because our caller might do another search in this table
             * using the key we return, and we'd corrupt the search key during any subsequent search
             * that used the temporary buffer).
             */
            tmp = cbt->row_key;
            cbt->row_key = cbt->tmp;
            cbt->tmp = tmp;

            cursor->key.data = cbt->row_key->data;
            cursor->key.size = cbt->row_key->size;
            return (0);
        }
        return (__wt_row_leaf_key(session, page, rip, &cursor->key, false));
    }

    /*
     * WT_PAGE_COL_FIX, WT_PAGE_COL_VAR:
     *	The interface cursor's record has usually been set, but that
     * isn't universally true, specifically, cursor.search_near may call
     * here without first setting the interface cursor.
     */
    cursor->recno = cbt->recno;
    return (0);
}

/*
 * __value_return --
 *     Change the cursor to reference an internal original-page return value.
 */
static inline int
__value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
{
    WT_BTREE *btree;
    WT_CELL *cell;
    WT_CELL_UNPACK unpack;
    WT_CURSOR *cursor;
    WT_PAGE *page;
    WT_ROW *rip;
    uint8_t v;

    btree = S2BT(session);

    page = cbt->ref->page;
    cursor = &cbt->iface;

    if (page->type == WT_PAGE_ROW_LEAF) {
        rip = &page->pg_row[cbt->slot];

        /* Simple values have their location encoded in the WT_ROW. */
        if (__wt_row_leaf_value(page, rip, &cursor->value))
            return (0);

        /* Take the value from the original page cell. */
        __wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
        return (__wt_page_cell_data_ref(session, page, &unpack, &cursor->value));
    }

    if (page->type == WT_PAGE_COL_VAR) {
        /* Take the value from the original page cell. */
        cell = WT_COL_PTR(page, &page->pg_var[cbt->slot]);
        __wt_cell_unpack(session, page, cell, &unpack);
        return (__wt_page_cell_data_ref(session, page, &unpack, &cursor->value));
    }

    /* WT_PAGE_COL_FIX: Take the value from the original page. */
    v = __bit_getv_recno(cbt->ref, cursor->recno, btree->bitcnt);
    return (__wt_buf_set(session, &cursor->value, &v, 1));
}

/*
 * When threads race modifying a record, we can end up with more than the usual maximum number of
 * modifications in an update list. We'd prefer not to allocate memory in a return path, so add a
 * few additional slots to the array we use to build up a list of modify records to apply.
 */
#define WT_MODIFY_ARRAY_SIZE (WT_MAX_MODIFY_UPDATE + 10)

/*
 * __wt_value_return_upd --
 *     Change the cursor to reference an internal update structure return value.
 */
int
__wt_value_return_upd(
  WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd, bool ignore_visibility)
{
    WT_CURSOR *cursor;
    WT_DECL_RET;
    WT_UPDATE **listp, *list[WT_MODIFY_ARRAY_SIZE];
    size_t allocated_bytes;
    u_int i;
    bool skipped_birthmark;

    cursor = &cbt->iface;
    allocated_bytes = 0;

    /*
     * We're passed a "standard" or "modified"  update that's visible to us.
     * Our caller should have already checked for deleted items (we're too
     * far down the call stack to return not-found).
     *
     * Fast path if it's a standard item, assert our caller's behavior.
     */
    if (upd->type == WT_UPDATE_STANDARD) {
        cursor->value.data = upd->data;
        cursor->value.size = upd->size;
        return (0);
    }
    WT_ASSERT(session, upd->type == WT_UPDATE_MODIFY);

    /*
     * Find a complete update that's visible to us, tracking modifications that are visible to us.
     */
    for (i = 0, listp = list, skipped_birthmark = false; upd != NULL; upd = upd->next) {
        if (upd->txnid == WT_TXN_ABORTED)
            continue;

        if (!ignore_visibility && !__wt_txn_upd_visible(session, upd)) {
            if (upd->type == WT_UPDATE_BIRTHMARK)
                skipped_birthmark = true;
            continue;
        }

        if (upd->type == WT_UPDATE_BIRTHMARK) {
            upd = NULL;
            break;
        }

        if (WT_UPDATE_DATA_VALUE(upd))
            break;

        if (upd->type == WT_UPDATE_MODIFY) {
            /*
             * Update lists are expected to be short, but it's not guaranteed. There's sufficient
             * room on the stack to avoid memory allocation in normal cases, but we have to handle
             * the edge cases too.
             */
            if (i >= WT_MODIFY_ARRAY_SIZE) {
                if (i == WT_MODIFY_ARRAY_SIZE)
                    listp = NULL;
                WT_ERR(__wt_realloc_def(session, &allocated_bytes, i + 1, &listp));
                if (i == WT_MODIFY_ARRAY_SIZE)
                    memcpy(listp, list, sizeof(list));
            }
            listp[i++] = upd;

            /*
             * Once a modify is found, all previously committed modifications should be applied
             * regardless of visibility.
             */
            ignore_visibility = true;
        }
    }

    /*
     * If there's no visible update and we skipped a birthmark, the base item is an empty item (in
     * other words, birthmarks we can't read act as tombstones). If there's no visible update and we
     * didn't skip a birthmark, the base item is the on-page item, which must be globally visible.
     * If there's a visible update and it's a tombstone, the base item is an empty item. If there's
     * a visible update and it's not a tombstone, the base item is the on-page item.
     */
    if (upd == NULL) {
        if (skipped_birthmark)
            WT_ERR(__wt_buf_set(session, &cursor->value, "", 0));
        else {
            /*
             * Callers of this function set the cursor slot to an impossible value to check we don't
             * try and return on-page values when the update list should have been sufficient (which
             * happens, for example, if an update list was truncated, deleting some standard update
             * required by a previous modify update). Assert the case.
             */
            WT_ASSERT(session, cbt->slot != UINT32_MAX);

            WT_ERR(__value_return(session, cbt));
        }
    } else if (upd->type == WT_UPDATE_TOMBSTONE)
        WT_ERR(__wt_buf_set(session, &cursor->value, "", 0));
    else
        WT_ERR(__wt_buf_set(session, &cursor->value, upd->data, upd->size));

    /*
     * Once we have a base item, roll forward through any visible modify updates.
     */
    while (i > 0)
        WT_ERR(__wt_modify_apply(cursor, listp[--i]->data));

err:
    if (allocated_bytes != 0)
        __wt_free(session, listp);
    return (ret);
}

/*
 * __wt_key_return --
 *     Change the cursor to reference an internal return key.
 */
int
__wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
{
    WT_CURSOR *cursor;

    cursor = &cbt->iface;

    /*
     * We may already have an internal key and the cursor may not be set up to get another copy, so
     * we have to leave it alone. Consider a cursor search followed by an update: the update doesn't
     * repeat the search, it simply updates the currently referenced key's value. We will end up
     * here with the correct internal key, but we can't "return" the key again even if we wanted to
     * do the additional work, the cursor isn't set up for that because we didn't just complete a
     * search.
     */
    F_CLR(cursor, WT_CURSTD_KEY_EXT);
    if (!F_ISSET(cursor, WT_CURSTD_KEY_INT)) {
        WT_RET(__key_return(session, cbt));
        F_SET(cursor, WT_CURSTD_KEY_INT);
    }
    return (0);
}

/*
 * __wt_value_return --
 *     Change the cursor to reference an internal return value.
 */
int
__wt_value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
{
    WT_CURSOR *cursor;

    cursor = &cbt->iface;

    F_CLR(cursor, WT_CURSTD_VALUE_EXT);
    if (upd == NULL)
        WT_RET(__value_return(session, cbt));
    else
        WT_RET(__wt_value_return_upd(session, cbt, upd, false));
    F_SET(cursor, WT_CURSTD_VALUE_INT);
    return (0);
}