diff options
author | Keith Bostic <keith.bostic@wiredtiger.com> | 2012-01-12 16:33:26 +0000 |
---|---|---|
committer | Keith Bostic <keith.bostic@wiredtiger.com> | 2012-01-12 16:33:26 +0000 |
commit | 933cadcb3499d4014c2cd47c2e6801b3517e08bf (patch) | |
tree | ec254830cb45781a47f918ed7d634cca4d70ef81 | |
parent | f03c0f181f94923cb275f73d4110a1f3633b915b (diff) | |
download | mongo-933cadcb3499d4014c2cd47c2e6801b3517e08bf.tar.gz |
Remove session buffer support: they are no longer used for bulk-load and they
tended to tie down too much memory for long periods, in Sesame's workloads.
--HG--
extra : rebase_source : 0e038d8ece0ed6ee929eacac4e3af8bb6fe4586a
-rw-r--r-- | dist/filelist | 1 | ||||
-rw-r--r-- | dist/s_funcs.list | 3 | ||||
-rw-r--r-- | dist/s_string.ok | 1 | ||||
-rw-r--r-- | dist/serial.py | 14 | ||||
-rw-r--r-- | src/btree/bt_discard.c | 8 | ||||
-rw-r--r-- | src/btree/col_modify.c | 8 | ||||
-rw-r--r-- | src/btree/row_key.c | 6 | ||||
-rw-r--r-- | src/btree/row_modify.c | 12 | ||||
-rw-r--r-- | src/include/api.h | 3 | ||||
-rw-r--r-- | src/include/btmem.h | 6 | ||||
-rw-r--r-- | src/include/btree.h | 17 | ||||
-rw-r--r-- | src/include/extern.h | 11 | ||||
-rw-r--r-- | src/include/serial_funcs.i | 6 | ||||
-rw-r--r-- | src/session/session_api.c | 3 | ||||
-rw-r--r-- | src/support/err.c | 11 | ||||
-rw-r--r-- | src/support/pow.c | 2 | ||||
-rw-r--r-- | src/support/sess_buf.c | 229 |
17 files changed, 31 insertions, 310 deletions
diff --git a/dist/filelist b/dist/filelist index e79ee98c3d2..2ad8dba7d39 100644 --- a/dist/filelist +++ b/dist/filelist @@ -106,6 +106,5 @@ src/support/huffman.c src/support/pow.c src/support/rand.c src/support/scratch.c -src/support/sess_buf.c src/support/sess_dump.c src/support/stat.c diff --git a/dist/s_funcs.list b/dist/s_funcs.list index 0f6c0eee29a..a684867267a 100644 --- a/dist/s_funcs.list +++ b/dist/s_funcs.list @@ -2,6 +2,7 @@ __bit_ffs __bit_nclr __wt_block_dump +__wt_bm_addr_stderr __wt_btree_lex_compare __wt_config_getone __wt_debug_addr @@ -9,6 +10,8 @@ __wt_debug_tree __wt_debug_tree_all __wt_log_printf __wt_log_put +__wt_nlpo2 +__wt_nlpo2_round __wt_nlpo2_round __wt_print_huffman_code __wt_session_dump diff --git a/dist/s_string.ok b/dist/s_string.ok index 111bc3e8663..b4f360d2349 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -65,6 +65,7 @@ INIT INITIALIZER INSERT's JPEG +JSON KV Kanowski's Kounavis diff --git a/dist/serial.py b/dist/serial.py index 59c15b8aa20..9fb8d3fed69 100644 --- a/dist/serial.py +++ b/dist/serial.py @@ -4,11 +4,10 @@ import textwrap from dist import compare_srcfile class SerialArg: - def __init__(self, typestr, name, sized=0, sb=0): + def __init__(self, typestr, name, sized=0): self.typestr = typestr self.name = name self.sized = sized - self.sb = sb class Serial: def __init__(self, name, op, args): @@ -22,7 +21,7 @@ Serial('col_append', 'WT_SERIAL_FUNC', [ SerialArg('WT_INSERT ***', 'ins_stack'), SerialArg('WT_INSERT_HEAD **', 'new_inslist', 1), SerialArg('WT_INSERT_HEAD *', 'new_inshead', 1), - SerialArg('WT_INSERT *', 'new_ins', 1, 1), + SerialArg('WT_INSERT *', 'new_ins', 1), SerialArg('u_int', 'skipdepth'), ]), @@ -37,7 +36,7 @@ Serial('insert', 'WT_SERIAL_FUNC', [ SerialArg('WT_INSERT ***', 'ins_stack'), SerialArg('WT_INSERT_HEAD **', 'new_inslist', 1), SerialArg('WT_INSERT_HEAD *', 'new_inshead', 1), - SerialArg('WT_INSERT *', 'new_ins', 1, 1), + SerialArg('WT_INSERT *', 'new_ins', 1), SerialArg('u_int', 'skipdepth'), ]), @@ -52,7 +51,7 @@ Serial('update', 'WT_SERIAL_FUNC', [ SerialArg('uint32_t', 'write_gen'), SerialArg('WT_UPDATE **', 'srch_upd'), SerialArg('WT_UPDATE **', 'new_upd', 1), - SerialArg('WT_UPDATE *', 'upd', 1, 1), + SerialArg('WT_UPDATE *', 'upd', 1), ]), ] @@ -124,10 +123,7 @@ typedef struct { if not l.sized: continue f.write('\tif (!args->' + l.name + '_taken)\n') - if l.sb: - f.write('\t\t__wt_sb_decrement(session, args->' + l.name + '->sb, args->' + l.name + ');\n') - else: - f.write('\t\t__wt_free(session, args->' + l.name + ');\n') + f.write('\t\t__wt_free(session, args->' + l.name + ');\n') f.write('\treturn (ret);\n') f.write('}\n\n') diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c index c93f9ba34ca..9108548d5ac 100644 --- a/src/btree/bt_discard.c +++ b/src/btree/bt_discard.c @@ -175,7 +175,7 @@ __free_page_row_int(WT_SESSION_IMPL *session, WT_PAGE *page) */ WT_REF_FOREACH(page, ref, i) { if ((ikey = ref->u.key) != NULL) - __wt_sb_free(session, ikey->sb, ikey); + __wt_free(session, ikey); if (ref->addr != NULL && __wt_off_page(page, ref->addr)) { __wt_free(session, ((WT_ADDR *)ref->addr)->addr); @@ -207,7 +207,7 @@ __free_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page) */ WT_ROW_FOREACH(page, rip, i) if ((ikey = rip->key) != NULL && __wt_off_page(page, ikey)) - __wt_sb_free(session, ikey->sb, ikey); + __wt_free(session, ikey); __wt_free(session, page->u.row.d); /* @@ -263,7 +263,7 @@ __free_insert_list(WT_SESSION_IMPL *session, WT_INSERT *ins) __free_update_list(session, ins->upd); next = WT_SKIP_NEXT(ins); - __wt_sb_free(session, ins->sb, ins); + __wt_free(session, ins); } while ((ins = next) != NULL); } @@ -301,6 +301,6 @@ __free_update_list(WT_SESSION_IMPL *session, WT_UPDATE *upd) do { next = upd->next; - __wt_sb_free(session, upd->sb, upd); + __wt_free(session, upd); } while ((upd = next) != NULL); } diff --git a/src/btree/col_modify.c b/src/btree/col_modify.c index 842ff3d8282..17be4e5c899 100644 --- a/src/btree/col_modify.c +++ b/src/btree/col_modify.c @@ -175,9 +175,9 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int op) if (ret != 0) { err: if (ins != NULL) - __wt_sb_decrement(session, ins->sb, ins); + __wt_free(session, ins); if (upd != NULL) - __wt_sb_decrement(session, upd->sb, upd); + __wt_free(session, upd); } __wt_free(session, new_inslist); @@ -195,7 +195,6 @@ static int __col_insert_alloc(WT_SESSION_IMPL *session, uint64_t recno, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep) { - WT_SESSION_BUFFER *sb; WT_INSERT *ins; size_t ins_size; @@ -204,9 +203,8 @@ __col_insert_alloc(WT_SESSION_IMPL *session, * the record number into place. */ ins_size = sizeof(WT_INSERT) + skipdepth * sizeof(WT_INSERT *); - WT_RET(__wt_sb_alloc(session, ins_size, &ins, &sb)); + WT_RET(__wt_calloc(session, 1, ins_size, &ins)); - ins->sb = sb; WT_INSERT_RECNO(ins) = recno; *insp = ins; diff --git a/src/btree/row_key.c b/src/btree/row_key.c index 2bacd9e840f..e7af7af0a0b 100644 --- a/src/btree/row_key.c +++ b/src/btree/row_key.c @@ -330,7 +330,7 @@ next: switch (direction) { * the key. */ if (rip_arg->key != ikey) - __wt_sb_decrement(session, ikey->sb, ikey); + __wt_free(session, ikey); __wt_scr_free(&retb); @@ -396,14 +396,12 @@ __wt_row_ikey_alloc(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, uint32_t size, WT_IKEY **ikeyp) { WT_IKEY *ikey; - WT_SESSION_BUFFER *sb; /* * Allocate the WT_IKEY structure and room for the value, then copy * the value into place. */ - WT_RET(__wt_sb_alloc(session, sizeof(WT_IKEY) + size, &ikey, &sb)); - ikey->sb = sb; + WT_RET(__wt_calloc(session, 1, sizeof(WT_IKEY) + size, &ikey)); ikey->size = size; ikey->cell_offset = cell_offset; memcpy(WT_IKEY_DATA(ikey), key, size); diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c index 98f1b36f2f9..5dd6c7718b4 100644 --- a/src/btree/row_modify.c +++ b/src/btree/row_modify.c @@ -136,9 +136,9 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int is_remove) if (ret != 0) { err: if (ins != NULL) - __wt_sb_decrement(session, ins->sb, ins); + __wt_free(session, ins); if (upd != NULL) - __wt_sb_decrement(session, upd->sb, upd); + __wt_free(session, upd); } /* Free any insert, update arrays. */ @@ -158,7 +158,6 @@ int __wt_row_insert_alloc(WT_SESSION_IMPL *session, WT_BUF *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep) { - WT_SESSION_BUFFER *sb; WT_INSERT *ins; size_t ins_size; @@ -168,9 +167,8 @@ __wt_row_insert_alloc(WT_SESSION_IMPL *session, */ ins_size = sizeof(WT_INSERT) + skipdepth * sizeof(WT_INSERT *) + key->size; - WT_RET(__wt_sb_alloc(session, ins_size, &ins, &sb)); + WT_RET(__wt_calloc(session, 1, ins_size, &ins)); - ins->sb = sb; ins->u.key.offset = WT_STORE_SIZE(ins_size - key->size); WT_INSERT_KEY_SIZE(ins) = key->size; memcpy(WT_INSERT_KEY(ins), key->data, key->size); @@ -257,7 +255,6 @@ int __wt_update_alloc(WT_SESSION_IMPL *session, WT_BUF *value, WT_UPDATE **updp, size_t *sizep) { - WT_SESSION_BUFFER *sb; WT_UPDATE *upd; size_t size; @@ -266,8 +263,7 @@ __wt_update_alloc(WT_SESSION_IMPL *session, * the value into place. */ size = value == NULL ? 0 : value->size; - WT_RET(__wt_sb_alloc(session, sizeof(WT_UPDATE) + size, &upd, &sb)); - upd->sb = sb; + WT_RET(__wt_calloc(session, 1, sizeof(WT_UPDATE) + size, &upd)); if (value == NULL) WT_UPDATE_DELETED_SET(upd); else { diff --git a/src/include/api.h b/src/include/api.h index dd5e0367a82..3f3c80450f3 100644 --- a/src/include/api.h +++ b/src/include/api.h @@ -103,9 +103,6 @@ struct __wt_session_impl { void *reconcile; /* Reconciliation structure */ - WT_SESSION_BUFFER *sb; /* Per-thread update buffer */ - uint32_t update_alloc_size; /* Allocation size */ - uint32_t flags; }; diff --git a/src/include/btmem.h b/src/include/btmem.h index 1df75a04cf3..0a7ec76fdc2 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -474,8 +474,6 @@ struct __wt_col_rle { * structure. */ struct __wt_ikey { - WT_SESSION_BUFFER *sb; /* Session buffer holding the WT_IKEY */ - uint32_t size; /* Key length */ /* @@ -504,8 +502,6 @@ struct __wt_ikey { * list. */ struct __wt_update { - WT_SESSION_BUFFER *sb; /* session buffer holding this update */ - WT_UPDATE *next; /* forward-linked list */ /* @@ -556,8 +552,6 @@ struct __wt_update { * scale and it isn't useful enough to re-implement, IMNSHO.) */ struct __wt_insert { - WT_SESSION_BUFFER *sb; /* insert session buffer */ - WT_UPDATE *upd; /* value */ union { diff --git a/src/include/btree.h b/src/include/btree.h index 361c8983448..780efd56fe6 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -9,6 +9,10 @@ #define WT_BTREE_MINOR_VERSION 0 /* + * Key and data item lengths are stored in 32-bit unsigned integers, meaning + * the largest key or data item is 4GB. Record numbers are stored in 64-bit + * unsigned integers, meaning the largest record number is "really, really big". + * * The minimum btree leaf and internal page sizes are 512B, the maximum 512MB. * (The maximum of 512MB is enforced by the software, it could be set as high * as 4GB.) @@ -25,19 +29,6 @@ WT_ALIGN(((uintmax_t)(pagesize) * (pct)) / 100, allocsize) /* - * Limit the maximum size of a single object to 4GB - 512B: in some places we - * allocate memory to store objects plus associated data structures. 512B is - * far more space than we ever need, but I'm not eager to debug any off-by-ones, - * and storing a 4GB object in the file is flatly insane, anyway. - * - * Key and data item lengths are stored in 32-bit unsigned integers, meaning - * the largest key or data item is 4GB (minus a few bytes). Record numbers - * are stored in 64-bit unsigned integers, meaning the largest record number - * is "really, really big". - */ -#define WT_BTREE_OBJECT_SIZE_MAX (UINT32_MAX - 512) - -/* * XXX * The server threads use their own WT_SESSION_IMPL handles because they may * want to block (for example, the eviction server calls reconciliation, and diff --git a/src/include/extern.h b/src/include/extern.h index e2f5913118b..4386de0ad6d 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -765,7 +765,6 @@ extern int __wt_assert(WT_SESSION_IMPL *session, 5, 6))); extern int __wt_illegal_value(WT_SESSION_IMPL *session); -extern int __wt_file_item_too_big(WT_SESSION_IMPL *session); extern int __wt_unknown_object_type(WT_SESSION_IMPL *session, const char *uri); extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, @@ -850,16 +849,6 @@ extern void __wt_scr_free(WT_BUF **bufp); extern void __wt_scr_discard(WT_SESSION_IMPL *session); extern void *__wt_scr_alloc_ext(WT_SESSION *wt_session, size_t size); extern void __wt_scr_free_ext(WT_SESSION *wt_session, void *p); -extern int __wt_sb_alloc( WT_SESSION_IMPL *session, - size_t size, - void *retp, - WT_SESSION_BUFFER **sbp); -extern void __wt_sb_free(WT_SESSION_IMPL *session, - WT_SESSION_BUFFER *sb, - void *p); -extern void __wt_sb_decrement(WT_SESSION_IMPL *session, - WT_SESSION_BUFFER *sb, - void *p); extern void __wt_session_dump_all(WT_SESSION_IMPL *session); extern void __wt_session_dump(WT_SESSION_IMPL *session); extern int __wt_stat_alloc_btree_stats(WT_SESSION_IMPL *session, diff --git a/src/include/serial_funcs.i b/src/include/serial_funcs.i index 2426dc02971..3f30c199f36 100644 --- a/src/include/serial_funcs.i +++ b/src/include/serial_funcs.i @@ -66,7 +66,7 @@ __wt_col_append_serial( if (!args->new_inshead_taken) __wt_free(session, args->new_inshead); if (!args->new_ins_taken) - __wt_sb_decrement(session, args->new_ins->sb, args->new_ins); + __wt_free(session, args->new_ins); return (ret); } @@ -225,7 +225,7 @@ __wt_insert_serial( if (!args->new_inshead_taken) __wt_free(session, args->new_inshead); if (!args->new_ins_taken) - __wt_sb_decrement(session, args->new_ins->sb, args->new_ins); + __wt_free(session, args->new_ins); return (ret); } @@ -375,7 +375,7 @@ __wt_update_serial( if (!args->new_upd_taken) __wt_free(session, args->new_upd); if (!args->upd_taken) - __wt_sb_decrement(session, args->upd->sb, args->upd); + __wt_free(session, args->upd); return (ret); } diff --git a/src/session/session_api.c b/src/session/session_api.c index 24e78df0cb2..896bf2ffe16 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -44,9 +44,6 @@ __session_close(WT_SESSION *wt_session, const char *config) WT_TRET(__wt_schema_close_tables(session)); __wt_spin_lock(session, &conn->spinlock); - /* Unpin the current session buffer. */ - if (session->sb != NULL) - __wt_sb_decrement(session, session->sb, NULL); /* Discard scratch buffers. */ __wt_scr_discard(session); diff --git a/src/support/err.c b/src/support/err.c index eb5ec59b7ca..18454629d32 100644 --- a/src/support/err.c +++ b/src/support/err.c @@ -176,17 +176,6 @@ __wt_illegal_value(WT_SESSION_IMPL *session) } /* - * __wt_file_item_too_big -- - * Print a standard error message when an element is too large to store. - */ -int -__wt_file_item_too_big(WT_SESSION_IMPL *session) -{ - WT_RET_MSG(session, WT_ERROR, - "the item is too large for the file to store"); -} - -/* * __wt_unknown_object_type -- * Print a standard error message when given an unknown object type. */ diff --git a/src/support/pow.c b/src/support/pow.c index 33b58d8b46c..5b73ec11d08 100644 --- a/src/support/pow.c +++ b/src/support/pow.c @@ -7,6 +7,7 @@ #include "wt_internal.h" +#ifdef __WIREDTIGER_UNUSED__ /* * __wt_nlpo2_round -- * Round up to the next-largest power-of-two for a 32-bit unsigned value. @@ -52,6 +53,7 @@ __wt_nlpo2(uint32_t v) v |= v >> 16; return (v + 1); } +#endif /* __WIREDTIGER_UNUSED__ */ /* * __wt_ispo2 -- diff --git a/src/support/sess_buf.c b/src/support/sess_buf.c deleted file mode 100644 index bd7e109cb70..00000000000 --- a/src/support/sess_buf.c +++ /dev/null @@ -1,229 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 2008-2011 WiredTiger, Inc. - * All rights reserved. - */ - -#include "wt_internal.h" - -/* - * __wt_sb_alloc -- - * Allocate memory from the WT_SESSION_IMPL's buffer and fill it in. - */ -int -__wt_sb_alloc( - WT_SESSION_IMPL *session, size_t size, void *retp, WT_SESSION_BUFFER **sbp) -{ -#ifndef HAVE_SESSION_BUFFERS - *sbp = NULL; - return (__wt_calloc(session, 1, size, retp)); -#else - WT_SESSION_BUFFER *sb; - size_t alloc_size; - uint32_t align_size; - int single_use; - - /* - * Allocate memory for an insert or change; there's a buffer in the - * WT_SESSION_IMPL structure for allocation of chunks of memory to hold - * changed or inserted values. - * - * We align allocations because we directly access WT_UPDATE structure - * fields in the memory (the x86 handles unaligned accesses, but I don't - * want to have to find and fix this code for a port to a system that - * doesn't handle unaligned accesses). It wastes space, but this memory - * is never written to disk and there are fewer concerns about memory - * than with on-disk structures. Any other code allocating memory from - * this buffer needs to align its allocations as well. - * - * The first thing in each chunk of memory is a WT_SESSION_BUFFER - * structure (check to be a multiple of 4B during initialization); - * then one or more WT_UPDATE structure plus value chunk pairs. - * - * Figure out how much space we need: this code limits the maximum size - * of a data item stored in the file. In summary, for a big item we - * have to store a WT_SESSION_BUFFER structure, the WT_UPDATE structure - * and the data, all in an allocated buffer. We only pass a 32-bit - * value to our allocation routine, so we can't store an item bigger - * than the maximum 32-bit value minus the sizes of those two - * structures, where the WT_UPDATE structure and data item are aligned - * to a 32-bit boundary. We could fix this, but it's unclear it's - * worth the effort: document you can store a (4GB - 512B) item max, - * it's insane to store 4GB items in the file anyway. - */ - if (size > WT_BTREE_OBJECT_SIZE_MAX) - return (__wt_file_item_too_big(session)); - align_size = WT_ALIGN(size + sizeof(WT_UPDATE), sizeof(uint32_t)); - - /* If we already have a buffer and the data fits, we're done. */ - sb = session->sb; - if (sb != NULL && align_size <= sb->space_avail) - goto no_allocation; - - /* - * We start by allocating 4KB for the thread, then every time we have - * to re-allocate the buffer, we double the allocation size, up to a - * total of 8MB, so any thread doing a lot of updates won't re-allocate - * new chunks of memory that often. - */ - if (session->update_alloc_size == 0) { - /* - * 2KB is correct, we're going to double it to 4KB when we - * calculate a new allocation size. - */ - session->update_alloc_size = 2 * 1024; - - /* - * We don't want to never aggregate changes because records are - * initially relatively large, compared to the allocation size, - * that is, if the application is loading many 4KB records, we'd - * like to handle that reasonably. This code adjusts for that - * case. - * - * If we get nothing but 256KB inserts, this code will allocate - * each of them individually, without aggregation, never growing - * the aggregation buffer size. That doesn't seem all that bad, - * aggregation isn't intended for lots of large records, rather - * it's intended for lots of small records. - */ - if (align_size > session->update_alloc_size && - align_size < 128 * 1024) - session->update_alloc_size = 128 * 1024; - } - - /* - * Decide how much memory to allocate: if it's a one-off (that is, the - * value is bigger than anything we'll aggregate into these buffers), - * allocate just enough memory. Else, allocate the next power-of-two - * larger, up to 8MB. - */ - if (align_size > session->update_alloc_size) { - alloc_size = sizeof(WT_SESSION_BUFFER) + align_size; - single_use = 1; - } else { - if (session->update_alloc_size < 8 * WT_MEGABYTE) - session->update_alloc_size = - __wt_nlpo2(session->update_alloc_size); - alloc_size = session->update_alloc_size; - single_use = 0; - } - - WT_RET(__wt_calloc(session, 1, alloc_size, &sb)); - sb->len = WT_STORE_SIZE(alloc_size); - sb->space_avail = WT_STORE_SIZE(alloc_size - sizeof(WT_SESSION_BUFFER)); - sb->first_free = (uint8_t *)sb + sizeof(WT_SESSION_BUFFER); - - /* - * If it's a single use allocation, ignore any current buffer in the - * session; else, release the old session buffer and replace it with - * the new one. - */ - if (!single_use) { - /* - * The "in" reference count is artificially incremented by 1 as - * long as a session buffer is referenced by the session - * handle; we do not want session buffers freed because a page - * was evicted and the count went to 0 while the buffer might - * still be used for future K/V inserts or modifications. - */ - if (session->sb != NULL) - __wt_sb_decrement(session, session->sb, NULL); - session->sb = sb; - - sb->in = 1; - } - -no_allocation: - *(void **)retp = sb->first_free; - *sbp = sb; - - sb->first_free += align_size; - sb->space_avail -= align_size; - ++sb->in; - WT_ASSERT(session, sb->in != 0); - - return (0); -#endif -} - -/* - * __wt_sb_free -- - * Free a chunk of memory from a per-WT_SESSION_IMPL buffer. - */ -void -__wt_sb_free(WT_SESSION_IMPL *session, WT_SESSION_BUFFER *sb, void *p) -{ -#ifndef HAVE_SESSION_BUFFERS - WT_UNUSED(sb); - - __wt_free(session, p); -#else - WT_UNUSED(p); - - WT_ASSERT(session, sb->out < sb->in); - - if (++sb->out == sb->in) - __wt_free(session, sb); -#endif -} - -/* - * __wt_sb_decrement -- - * Decrement the "insert" value of a per-WT_SESSION_IMPL buffer. - */ -void -__wt_sb_decrement(WT_SESSION_IMPL *session, WT_SESSION_BUFFER *sb, void *p) -{ -#ifndef HAVE_SESSION_BUFFERS - WT_UNUSED(sb); - - __wt_free(session, p); -#else - WT_UNUSED(p); - - WT_ASSERT(session, sb->out < sb->in); - - /* - * This function is used for two reasons. - * - * #1: it's possible we allocated memory from the session buffer, but - * then an error occurred. In this case we don't try and clean up the - * session buffer, it's simpler to decrement the counters and pretend - * the memory is no longer in use. We're still in the allocation path - * so we decrement the "in" field instead of incrementing the "out" - * field, if the eviction thread were to update the "out" field at the - * same time, we could race. - * - * #2: the "in" reference count is artificially incremented by 1 as - * long as a session buffer is referenced by the session handle; we do - * not want session buffers freed because a page was evicted and the - * count went to 0 while the buffer might still be used for future K/V - * inserts or modifications. - */ - --sb->in; - - /* - * In the above case #1, if the session buffer was a one-off (allocated - * for a single use), we have to free it here, it's not linked to any - * WT_PAGE in the system. - * - * In the above case #2, our artificial increment might be the last - * reference, if all of the WT_PAGE's referencing this buffer have been - * reconciled since the K/V inserts or modifications. - * - * In both of these cases, sb->in == sb->out, and we need to free the - * buffer. - * - * XXX - * There's a race here in the above case #2: if this code, and the page - * discard code race, it's possible neither will realize the buffer is - * no longer needed and free it. The fix is to involve the eviction - * thread: it may need a linked list of buffers they review to ensure - * it never happens. I'm living with this now: it's an unlikely - * race, and it's a memory leak if it ever happens. - */ - if (sb->in == sb->out) - __wt_free(session, sb); -#endif -} |