summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Bostic <keith.bostic@wiredtiger.com>2012-01-12 16:33:26 +0000
committerKeith Bostic <keith.bostic@wiredtiger.com>2012-01-12 16:33:26 +0000
commit933cadcb3499d4014c2cd47c2e6801b3517e08bf (patch)
treeec254830cb45781a47f918ed7d634cca4d70ef81
parentf03c0f181f94923cb275f73d4110a1f3633b915b (diff)
downloadmongo-933cadcb3499d4014c2cd47c2e6801b3517e08bf.tar.gz
Remove session buffer support: they are no longer used for bulk-load and they
tended to tie down too much memory for long periods, in Sesame's workloads. --HG-- extra : rebase_source : 0e038d8ece0ed6ee929eacac4e3af8bb6fe4586a
-rw-r--r--dist/filelist1
-rw-r--r--dist/s_funcs.list3
-rw-r--r--dist/s_string.ok1
-rw-r--r--dist/serial.py14
-rw-r--r--src/btree/bt_discard.c8
-rw-r--r--src/btree/col_modify.c8
-rw-r--r--src/btree/row_key.c6
-rw-r--r--src/btree/row_modify.c12
-rw-r--r--src/include/api.h3
-rw-r--r--src/include/btmem.h6
-rw-r--r--src/include/btree.h17
-rw-r--r--src/include/extern.h11
-rw-r--r--src/include/serial_funcs.i6
-rw-r--r--src/session/session_api.c3
-rw-r--r--src/support/err.c11
-rw-r--r--src/support/pow.c2
-rw-r--r--src/support/sess_buf.c229
17 files changed, 31 insertions, 310 deletions
diff --git a/dist/filelist b/dist/filelist
index e79ee98c3d2..2ad8dba7d39 100644
--- a/dist/filelist
+++ b/dist/filelist
@@ -106,6 +106,5 @@ src/support/huffman.c
src/support/pow.c
src/support/rand.c
src/support/scratch.c
-src/support/sess_buf.c
src/support/sess_dump.c
src/support/stat.c
diff --git a/dist/s_funcs.list b/dist/s_funcs.list
index 0f6c0eee29a..a684867267a 100644
--- a/dist/s_funcs.list
+++ b/dist/s_funcs.list
@@ -2,6 +2,7 @@
__bit_ffs
__bit_nclr
__wt_block_dump
+__wt_bm_addr_stderr
__wt_btree_lex_compare
__wt_config_getone
__wt_debug_addr
@@ -9,6 +10,8 @@ __wt_debug_tree
__wt_debug_tree_all
__wt_log_printf
__wt_log_put
+__wt_nlpo2
+__wt_nlpo2_round
__wt_nlpo2_round
__wt_print_huffman_code
__wt_session_dump
diff --git a/dist/s_string.ok b/dist/s_string.ok
index 111bc3e8663..b4f360d2349 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -65,6 +65,7 @@ INIT
INITIALIZER
INSERT's
JPEG
+JSON
KV
Kanowski's
Kounavis
diff --git a/dist/serial.py b/dist/serial.py
index 59c15b8aa20..9fb8d3fed69 100644
--- a/dist/serial.py
+++ b/dist/serial.py
@@ -4,11 +4,10 @@ import textwrap
from dist import compare_srcfile
class SerialArg:
- def __init__(self, typestr, name, sized=0, sb=0):
+ def __init__(self, typestr, name, sized=0):
self.typestr = typestr
self.name = name
self.sized = sized
- self.sb = sb
class Serial:
def __init__(self, name, op, args):
@@ -22,7 +21,7 @@ Serial('col_append', 'WT_SERIAL_FUNC', [
SerialArg('WT_INSERT ***', 'ins_stack'),
SerialArg('WT_INSERT_HEAD **', 'new_inslist', 1),
SerialArg('WT_INSERT_HEAD *', 'new_inshead', 1),
- SerialArg('WT_INSERT *', 'new_ins', 1, 1),
+ SerialArg('WT_INSERT *', 'new_ins', 1),
SerialArg('u_int', 'skipdepth'),
]),
@@ -37,7 +36,7 @@ Serial('insert', 'WT_SERIAL_FUNC', [
SerialArg('WT_INSERT ***', 'ins_stack'),
SerialArg('WT_INSERT_HEAD **', 'new_inslist', 1),
SerialArg('WT_INSERT_HEAD *', 'new_inshead', 1),
- SerialArg('WT_INSERT *', 'new_ins', 1, 1),
+ SerialArg('WT_INSERT *', 'new_ins', 1),
SerialArg('u_int', 'skipdepth'),
]),
@@ -52,7 +51,7 @@ Serial('update', 'WT_SERIAL_FUNC', [
SerialArg('uint32_t', 'write_gen'),
SerialArg('WT_UPDATE **', 'srch_upd'),
SerialArg('WT_UPDATE **', 'new_upd', 1),
- SerialArg('WT_UPDATE *', 'upd', 1, 1),
+ SerialArg('WT_UPDATE *', 'upd', 1),
]),
]
@@ -124,10 +123,7 @@ typedef struct {
if not l.sized:
continue
f.write('\tif (!args->' + l.name + '_taken)\n')
- if l.sb:
- f.write('\t\t__wt_sb_decrement(session, args->' + l.name + '->sb, args->' + l.name + ');\n')
- else:
- f.write('\t\t__wt_free(session, args->' + l.name + ');\n')
+ f.write('\t\t__wt_free(session, args->' + l.name + ');\n')
f.write('\treturn (ret);\n')
f.write('}\n\n')
diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c
index c93f9ba34ca..9108548d5ac 100644
--- a/src/btree/bt_discard.c
+++ b/src/btree/bt_discard.c
@@ -175,7 +175,7 @@ __free_page_row_int(WT_SESSION_IMPL *session, WT_PAGE *page)
*/
WT_REF_FOREACH(page, ref, i) {
if ((ikey = ref->u.key) != NULL)
- __wt_sb_free(session, ikey->sb, ikey);
+ __wt_free(session, ikey);
if (ref->addr != NULL &&
__wt_off_page(page, ref->addr)) {
__wt_free(session, ((WT_ADDR *)ref->addr)->addr);
@@ -207,7 +207,7 @@ __free_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
*/
WT_ROW_FOREACH(page, rip, i)
if ((ikey = rip->key) != NULL && __wt_off_page(page, ikey))
- __wt_sb_free(session, ikey->sb, ikey);
+ __wt_free(session, ikey);
__wt_free(session, page->u.row.d);
/*
@@ -263,7 +263,7 @@ __free_insert_list(WT_SESSION_IMPL *session, WT_INSERT *ins)
__free_update_list(session, ins->upd);
next = WT_SKIP_NEXT(ins);
- __wt_sb_free(session, ins->sb, ins);
+ __wt_free(session, ins);
} while ((ins = next) != NULL);
}
@@ -301,6 +301,6 @@ __free_update_list(WT_SESSION_IMPL *session, WT_UPDATE *upd)
do {
next = upd->next;
- __wt_sb_free(session, upd->sb, upd);
+ __wt_free(session, upd);
} while ((upd = next) != NULL);
}
diff --git a/src/btree/col_modify.c b/src/btree/col_modify.c
index 842ff3d8282..17be4e5c899 100644
--- a/src/btree/col_modify.c
+++ b/src/btree/col_modify.c
@@ -175,9 +175,9 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int op)
if (ret != 0) {
err: if (ins != NULL)
- __wt_sb_decrement(session, ins->sb, ins);
+ __wt_free(session, ins);
if (upd != NULL)
- __wt_sb_decrement(session, upd->sb, upd);
+ __wt_free(session, upd);
}
__wt_free(session, new_inslist);
@@ -195,7 +195,6 @@ static int
__col_insert_alloc(WT_SESSION_IMPL *session,
uint64_t recno, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep)
{
- WT_SESSION_BUFFER *sb;
WT_INSERT *ins;
size_t ins_size;
@@ -204,9 +203,8 @@ __col_insert_alloc(WT_SESSION_IMPL *session,
* the record number into place.
*/
ins_size = sizeof(WT_INSERT) + skipdepth * sizeof(WT_INSERT *);
- WT_RET(__wt_sb_alloc(session, ins_size, &ins, &sb));
+ WT_RET(__wt_calloc(session, 1, ins_size, &ins));
- ins->sb = sb;
WT_INSERT_RECNO(ins) = recno;
*insp = ins;
diff --git a/src/btree/row_key.c b/src/btree/row_key.c
index 2bacd9e840f..e7af7af0a0b 100644
--- a/src/btree/row_key.c
+++ b/src/btree/row_key.c
@@ -330,7 +330,7 @@ next: switch (direction) {
* the key.
*/
if (rip_arg->key != ikey)
- __wt_sb_decrement(session, ikey->sb, ikey);
+ __wt_free(session, ikey);
__wt_scr_free(&retb);
@@ -396,14 +396,12 @@ __wt_row_ikey_alloc(WT_SESSION_IMPL *session,
uint32_t cell_offset, const void *key, uint32_t size, WT_IKEY **ikeyp)
{
WT_IKEY *ikey;
- WT_SESSION_BUFFER *sb;
/*
* Allocate the WT_IKEY structure and room for the value, then copy
* the value into place.
*/
- WT_RET(__wt_sb_alloc(session, sizeof(WT_IKEY) + size, &ikey, &sb));
- ikey->sb = sb;
+ WT_RET(__wt_calloc(session, 1, sizeof(WT_IKEY) + size, &ikey));
ikey->size = size;
ikey->cell_offset = cell_offset;
memcpy(WT_IKEY_DATA(ikey), key, size);
diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c
index 98f1b36f2f9..5dd6c7718b4 100644
--- a/src/btree/row_modify.c
+++ b/src/btree/row_modify.c
@@ -136,9 +136,9 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int is_remove)
if (ret != 0) {
err: if (ins != NULL)
- __wt_sb_decrement(session, ins->sb, ins);
+ __wt_free(session, ins);
if (upd != NULL)
- __wt_sb_decrement(session, upd->sb, upd);
+ __wt_free(session, upd);
}
/* Free any insert, update arrays. */
@@ -158,7 +158,6 @@ int
__wt_row_insert_alloc(WT_SESSION_IMPL *session,
WT_BUF *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep)
{
- WT_SESSION_BUFFER *sb;
WT_INSERT *ins;
size_t ins_size;
@@ -168,9 +167,8 @@ __wt_row_insert_alloc(WT_SESSION_IMPL *session,
*/
ins_size = sizeof(WT_INSERT) +
skipdepth * sizeof(WT_INSERT *) + key->size;
- WT_RET(__wt_sb_alloc(session, ins_size, &ins, &sb));
+ WT_RET(__wt_calloc(session, 1, ins_size, &ins));
- ins->sb = sb;
ins->u.key.offset = WT_STORE_SIZE(ins_size - key->size);
WT_INSERT_KEY_SIZE(ins) = key->size;
memcpy(WT_INSERT_KEY(ins), key->data, key->size);
@@ -257,7 +255,6 @@ int
__wt_update_alloc(WT_SESSION_IMPL *session,
WT_BUF *value, WT_UPDATE **updp, size_t *sizep)
{
- WT_SESSION_BUFFER *sb;
WT_UPDATE *upd;
size_t size;
@@ -266,8 +263,7 @@ __wt_update_alloc(WT_SESSION_IMPL *session,
* the value into place.
*/
size = value == NULL ? 0 : value->size;
- WT_RET(__wt_sb_alloc(session, sizeof(WT_UPDATE) + size, &upd, &sb));
- upd->sb = sb;
+ WT_RET(__wt_calloc(session, 1, sizeof(WT_UPDATE) + size, &upd));
if (value == NULL)
WT_UPDATE_DELETED_SET(upd);
else {
diff --git a/src/include/api.h b/src/include/api.h
index dd5e0367a82..3f3c80450f3 100644
--- a/src/include/api.h
+++ b/src/include/api.h
@@ -103,9 +103,6 @@ struct __wt_session_impl {
void *reconcile; /* Reconciliation structure */
- WT_SESSION_BUFFER *sb; /* Per-thread update buffer */
- uint32_t update_alloc_size; /* Allocation size */
-
uint32_t flags;
};
diff --git a/src/include/btmem.h b/src/include/btmem.h
index 1df75a04cf3..0a7ec76fdc2 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -474,8 +474,6 @@ struct __wt_col_rle {
* structure.
*/
struct __wt_ikey {
- WT_SESSION_BUFFER *sb; /* Session buffer holding the WT_IKEY */
-
uint32_t size; /* Key length */
/*
@@ -504,8 +502,6 @@ struct __wt_ikey {
* list.
*/
struct __wt_update {
- WT_SESSION_BUFFER *sb; /* session buffer holding this update */
-
WT_UPDATE *next; /* forward-linked list */
/*
@@ -556,8 +552,6 @@ struct __wt_update {
* scale and it isn't useful enough to re-implement, IMNSHO.)
*/
struct __wt_insert {
- WT_SESSION_BUFFER *sb; /* insert session buffer */
-
WT_UPDATE *upd; /* value */
union {
diff --git a/src/include/btree.h b/src/include/btree.h
index 361c8983448..780efd56fe6 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -9,6 +9,10 @@
#define WT_BTREE_MINOR_VERSION 0
/*
+ * Key and data item lengths are stored in 32-bit unsigned integers, meaning
+ * the largest key or data item is 4GB. Record numbers are stored in 64-bit
+ * unsigned integers, meaning the largest record number is "really, really big".
+ *
* The minimum btree leaf and internal page sizes are 512B, the maximum 512MB.
* (The maximum of 512MB is enforced by the software, it could be set as high
* as 4GB.)
@@ -25,19 +29,6 @@
WT_ALIGN(((uintmax_t)(pagesize) * (pct)) / 100, allocsize)
/*
- * Limit the maximum size of a single object to 4GB - 512B: in some places we
- * allocate memory to store objects plus associated data structures. 512B is
- * far more space than we ever need, but I'm not eager to debug any off-by-ones,
- * and storing a 4GB object in the file is flatly insane, anyway.
- *
- * Key and data item lengths are stored in 32-bit unsigned integers, meaning
- * the largest key or data item is 4GB (minus a few bytes). Record numbers
- * are stored in 64-bit unsigned integers, meaning the largest record number
- * is "really, really big".
- */
-#define WT_BTREE_OBJECT_SIZE_MAX (UINT32_MAX - 512)
-
-/*
* XXX
* The server threads use their own WT_SESSION_IMPL handles because they may
* want to block (for example, the eviction server calls reconciliation, and
diff --git a/src/include/extern.h b/src/include/extern.h
index e2f5913118b..4386de0ad6d 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -765,7 +765,6 @@ extern int __wt_assert(WT_SESSION_IMPL *session,
5,
6)));
extern int __wt_illegal_value(WT_SESSION_IMPL *session);
-extern int __wt_file_item_too_big(WT_SESSION_IMPL *session);
extern int __wt_unknown_object_type(WT_SESSION_IMPL *session, const char *uri);
extern int __wt_filename(WT_SESSION_IMPL *session,
const char *name,
@@ -850,16 +849,6 @@ extern void __wt_scr_free(WT_BUF **bufp);
extern void __wt_scr_discard(WT_SESSION_IMPL *session);
extern void *__wt_scr_alloc_ext(WT_SESSION *wt_session, size_t size);
extern void __wt_scr_free_ext(WT_SESSION *wt_session, void *p);
-extern int __wt_sb_alloc( WT_SESSION_IMPL *session,
- size_t size,
- void *retp,
- WT_SESSION_BUFFER **sbp);
-extern void __wt_sb_free(WT_SESSION_IMPL *session,
- WT_SESSION_BUFFER *sb,
- void *p);
-extern void __wt_sb_decrement(WT_SESSION_IMPL *session,
- WT_SESSION_BUFFER *sb,
- void *p);
extern void __wt_session_dump_all(WT_SESSION_IMPL *session);
extern void __wt_session_dump(WT_SESSION_IMPL *session);
extern int __wt_stat_alloc_btree_stats(WT_SESSION_IMPL *session,
diff --git a/src/include/serial_funcs.i b/src/include/serial_funcs.i
index 2426dc02971..3f30c199f36 100644
--- a/src/include/serial_funcs.i
+++ b/src/include/serial_funcs.i
@@ -66,7 +66,7 @@ __wt_col_append_serial(
if (!args->new_inshead_taken)
__wt_free(session, args->new_inshead);
if (!args->new_ins_taken)
- __wt_sb_decrement(session, args->new_ins->sb, args->new_ins);
+ __wt_free(session, args->new_ins);
return (ret);
}
@@ -225,7 +225,7 @@ __wt_insert_serial(
if (!args->new_inshead_taken)
__wt_free(session, args->new_inshead);
if (!args->new_ins_taken)
- __wt_sb_decrement(session, args->new_ins->sb, args->new_ins);
+ __wt_free(session, args->new_ins);
return (ret);
}
@@ -375,7 +375,7 @@ __wt_update_serial(
if (!args->new_upd_taken)
__wt_free(session, args->new_upd);
if (!args->upd_taken)
- __wt_sb_decrement(session, args->upd->sb, args->upd);
+ __wt_free(session, args->upd);
return (ret);
}
diff --git a/src/session/session_api.c b/src/session/session_api.c
index 24e78df0cb2..896bf2ffe16 100644
--- a/src/session/session_api.c
+++ b/src/session/session_api.c
@@ -44,9 +44,6 @@ __session_close(WT_SESSION *wt_session, const char *config)
WT_TRET(__wt_schema_close_tables(session));
__wt_spin_lock(session, &conn->spinlock);
- /* Unpin the current session buffer. */
- if (session->sb != NULL)
- __wt_sb_decrement(session, session->sb, NULL);
/* Discard scratch buffers. */
__wt_scr_discard(session);
diff --git a/src/support/err.c b/src/support/err.c
index eb5ec59b7ca..18454629d32 100644
--- a/src/support/err.c
+++ b/src/support/err.c
@@ -176,17 +176,6 @@ __wt_illegal_value(WT_SESSION_IMPL *session)
}
/*
- * __wt_file_item_too_big --
- * Print a standard error message when an element is too large to store.
- */
-int
-__wt_file_item_too_big(WT_SESSION_IMPL *session)
-{
- WT_RET_MSG(session, WT_ERROR,
- "the item is too large for the file to store");
-}
-
-/*
* __wt_unknown_object_type --
* Print a standard error message when given an unknown object type.
*/
diff --git a/src/support/pow.c b/src/support/pow.c
index 33b58d8b46c..5b73ec11d08 100644
--- a/src/support/pow.c
+++ b/src/support/pow.c
@@ -7,6 +7,7 @@
#include "wt_internal.h"
+#ifdef __WIREDTIGER_UNUSED__
/*
* __wt_nlpo2_round --
* Round up to the next-largest power-of-two for a 32-bit unsigned value.
@@ -52,6 +53,7 @@ __wt_nlpo2(uint32_t v)
v |= v >> 16;
return (v + 1);
}
+#endif /* __WIREDTIGER_UNUSED__ */
/*
* __wt_ispo2 --
diff --git a/src/support/sess_buf.c b/src/support/sess_buf.c
deleted file mode 100644
index bd7e109cb70..00000000000
--- a/src/support/sess_buf.c
+++ /dev/null
@@ -1,229 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 2008-2011 WiredTiger, Inc.
- * All rights reserved.
- */
-
-#include "wt_internal.h"
-
-/*
- * __wt_sb_alloc --
- * Allocate memory from the WT_SESSION_IMPL's buffer and fill it in.
- */
-int
-__wt_sb_alloc(
- WT_SESSION_IMPL *session, size_t size, void *retp, WT_SESSION_BUFFER **sbp)
-{
-#ifndef HAVE_SESSION_BUFFERS
- *sbp = NULL;
- return (__wt_calloc(session, 1, size, retp));
-#else
- WT_SESSION_BUFFER *sb;
- size_t alloc_size;
- uint32_t align_size;
- int single_use;
-
- /*
- * Allocate memory for an insert or change; there's a buffer in the
- * WT_SESSION_IMPL structure for allocation of chunks of memory to hold
- * changed or inserted values.
- *
- * We align allocations because we directly access WT_UPDATE structure
- * fields in the memory (the x86 handles unaligned accesses, but I don't
- * want to have to find and fix this code for a port to a system that
- * doesn't handle unaligned accesses). It wastes space, but this memory
- * is never written to disk and there are fewer concerns about memory
- * than with on-disk structures. Any other code allocating memory from
- * this buffer needs to align its allocations as well.
- *
- * The first thing in each chunk of memory is a WT_SESSION_BUFFER
- * structure (check to be a multiple of 4B during initialization);
- * then one or more WT_UPDATE structure plus value chunk pairs.
- *
- * Figure out how much space we need: this code limits the maximum size
- * of a data item stored in the file. In summary, for a big item we
- * have to store a WT_SESSION_BUFFER structure, the WT_UPDATE structure
- * and the data, all in an allocated buffer. We only pass a 32-bit
- * value to our allocation routine, so we can't store an item bigger
- * than the maximum 32-bit value minus the sizes of those two
- * structures, where the WT_UPDATE structure and data item are aligned
- * to a 32-bit boundary. We could fix this, but it's unclear it's
- * worth the effort: document you can store a (4GB - 512B) item max,
- * it's insane to store 4GB items in the file anyway.
- */
- if (size > WT_BTREE_OBJECT_SIZE_MAX)
- return (__wt_file_item_too_big(session));
- align_size = WT_ALIGN(size + sizeof(WT_UPDATE), sizeof(uint32_t));
-
- /* If we already have a buffer and the data fits, we're done. */
- sb = session->sb;
- if (sb != NULL && align_size <= sb->space_avail)
- goto no_allocation;
-
- /*
- * We start by allocating 4KB for the thread, then every time we have
- * to re-allocate the buffer, we double the allocation size, up to a
- * total of 8MB, so any thread doing a lot of updates won't re-allocate
- * new chunks of memory that often.
- */
- if (session->update_alloc_size == 0) {
- /*
- * 2KB is correct, we're going to double it to 4KB when we
- * calculate a new allocation size.
- */
- session->update_alloc_size = 2 * 1024;
-
- /*
- * We don't want to never aggregate changes because records are
- * initially relatively large, compared to the allocation size,
- * that is, if the application is loading many 4KB records, we'd
- * like to handle that reasonably. This code adjusts for that
- * case.
- *
- * If we get nothing but 256KB inserts, this code will allocate
- * each of them individually, without aggregation, never growing
- * the aggregation buffer size. That doesn't seem all that bad,
- * aggregation isn't intended for lots of large records, rather
- * it's intended for lots of small records.
- */
- if (align_size > session->update_alloc_size &&
- align_size < 128 * 1024)
- session->update_alloc_size = 128 * 1024;
- }
-
- /*
- * Decide how much memory to allocate: if it's a one-off (that is, the
- * value is bigger than anything we'll aggregate into these buffers),
- * allocate just enough memory. Else, allocate the next power-of-two
- * larger, up to 8MB.
- */
- if (align_size > session->update_alloc_size) {
- alloc_size = sizeof(WT_SESSION_BUFFER) + align_size;
- single_use = 1;
- } else {
- if (session->update_alloc_size < 8 * WT_MEGABYTE)
- session->update_alloc_size =
- __wt_nlpo2(session->update_alloc_size);
- alloc_size = session->update_alloc_size;
- single_use = 0;
- }
-
- WT_RET(__wt_calloc(session, 1, alloc_size, &sb));
- sb->len = WT_STORE_SIZE(alloc_size);
- sb->space_avail = WT_STORE_SIZE(alloc_size - sizeof(WT_SESSION_BUFFER));
- sb->first_free = (uint8_t *)sb + sizeof(WT_SESSION_BUFFER);
-
- /*
- * If it's a single use allocation, ignore any current buffer in the
- * session; else, release the old session buffer and replace it with
- * the new one.
- */
- if (!single_use) {
- /*
- * The "in" reference count is artificially incremented by 1 as
- * long as a session buffer is referenced by the session
- * handle; we do not want session buffers freed because a page
- * was evicted and the count went to 0 while the buffer might
- * still be used for future K/V inserts or modifications.
- */
- if (session->sb != NULL)
- __wt_sb_decrement(session, session->sb, NULL);
- session->sb = sb;
-
- sb->in = 1;
- }
-
-no_allocation:
- *(void **)retp = sb->first_free;
- *sbp = sb;
-
- sb->first_free += align_size;
- sb->space_avail -= align_size;
- ++sb->in;
- WT_ASSERT(session, sb->in != 0);
-
- return (0);
-#endif
-}
-
-/*
- * __wt_sb_free --
- * Free a chunk of memory from a per-WT_SESSION_IMPL buffer.
- */
-void
-__wt_sb_free(WT_SESSION_IMPL *session, WT_SESSION_BUFFER *sb, void *p)
-{
-#ifndef HAVE_SESSION_BUFFERS
- WT_UNUSED(sb);
-
- __wt_free(session, p);
-#else
- WT_UNUSED(p);
-
- WT_ASSERT(session, sb->out < sb->in);
-
- if (++sb->out == sb->in)
- __wt_free(session, sb);
-#endif
-}
-
-/*
- * __wt_sb_decrement --
- * Decrement the "insert" value of a per-WT_SESSION_IMPL buffer.
- */
-void
-__wt_sb_decrement(WT_SESSION_IMPL *session, WT_SESSION_BUFFER *sb, void *p)
-{
-#ifndef HAVE_SESSION_BUFFERS
- WT_UNUSED(sb);
-
- __wt_free(session, p);
-#else
- WT_UNUSED(p);
-
- WT_ASSERT(session, sb->out < sb->in);
-
- /*
- * This function is used for two reasons.
- *
- * #1: it's possible we allocated memory from the session buffer, but
- * then an error occurred. In this case we don't try and clean up the
- * session buffer, it's simpler to decrement the counters and pretend
- * the memory is no longer in use. We're still in the allocation path
- * so we decrement the "in" field instead of incrementing the "out"
- * field, if the eviction thread were to update the "out" field at the
- * same time, we could race.
- *
- * #2: the "in" reference count is artificially incremented by 1 as
- * long as a session buffer is referenced by the session handle; we do
- * not want session buffers freed because a page was evicted and the
- * count went to 0 while the buffer might still be used for future K/V
- * inserts or modifications.
- */
- --sb->in;
-
- /*
- * In the above case #1, if the session buffer was a one-off (allocated
- * for a single use), we have to free it here, it's not linked to any
- * WT_PAGE in the system.
- *
- * In the above case #2, our artificial increment might be the last
- * reference, if all of the WT_PAGE's referencing this buffer have been
- * reconciled since the K/V inserts or modifications.
- *
- * In both of these cases, sb->in == sb->out, and we need to free the
- * buffer.
- *
- * XXX
- * There's a race here in the above case #2: if this code, and the page
- * discard code race, it's possible neither will realize the buffer is
- * no longer needed and free it. The fix is to involve the eviction
- * thread: it may need a linked list of buffers they review to ensure
- * it never happens. I'm living with this now: it's an unlikely
- * race, and it's a memory leak if it ever happens.
- */
- if (sb->in == sb->out)
- __wt_free(session, sb);
-#endif
-}