summaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
Diffstat (limited to 'src/include')
-rw-r--r--src/include/btmem.h27
-rw-r--r--src/include/connection.h15
-rw-r--r--src/include/extern.h7
-rw-r--r--src/include/extern_posix.h2
-rw-r--r--src/include/extern_win.h2
-rw-r--r--src/include/mutex.h31
-rw-r--r--src/include/serial.i30
-rw-r--r--src/include/session.h4
-rw-r--r--src/include/verify_build.h1
-rw-r--r--src/include/wt_internal.h4
10 files changed, 61 insertions, 62 deletions
diff --git a/src/include/btmem.h b/src/include/btmem.h
index f1bb08d2699..eb523c01ad7 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -414,18 +414,20 @@ struct __wt_page_modify {
size_t discard_allocated;
} *ovfl_track;
+#define WT_PAGE_LOCK(s, p) \
+ __wt_spin_lock((s), &(p)->modify->page_lock)
+#define WT_PAGE_TRYLOCK(s, p) \
+ __wt_spin_trylock((s), &(p)->modify->page_lock)
+#define WT_PAGE_UNLOCK(s, p) \
+ __wt_spin_unlock((s), &(p)->modify->page_lock)
+ WT_SPINLOCK page_lock; /* Page's spinlock */
+
/*
* The write generation is incremented when a page is modified, a page
* is clean if the write generation is 0.
*/
uint32_t write_gen;
-#define WT_PAGE_LOCK(s, p) \
- __wt_spin_lock((s), &S2C(s)->page_lock[(p)->modify->page_lock])
-#define WT_PAGE_UNLOCK(s, p) \
- __wt_spin_unlock((s), &S2C(s)->page_lock[(p)->modify->page_lock])
- uint8_t page_lock; /* Page's spinlock */
-
#define WT_PM_REC_EMPTY 1 /* Reconciliation: no replacement */
#define WT_PM_REC_MULTIBLOCK 2 /* Reconciliation: multiple blocks */
#define WT_PM_REC_REPLACE 3 /* Reconciliation: single block */
@@ -603,13 +605,6 @@ struct __wt_page {
uint8_t unused[2]; /* Unused padding */
/*
- * Used to protect and co-ordinate splits for internal pages and
- * reconciliation for all pages. Only used to co-ordinate among the
- * uncommon cases that require exclusive access to a page.
- */
- WT_RWLOCK page_lock;
-
- /*
* The page's read generation acts as an LRU value for each page in the
* tree; it is used by the eviction server thread to select pages to be
* discarded from the in-memory tree.
@@ -635,8 +630,6 @@ struct __wt_page {
#define WT_READGEN_STEP 100
uint64_t read_gen;
- uint64_t evict_pass_gen; /* Eviction pass generation */
-
size_t memory_footprint; /* Memory attached to the page */
/* Page's on-disk representation: NULL for pages created in memory. */
@@ -644,6 +637,10 @@ struct __wt_page {
/* If/when the page is modified, we need lots more information. */
WT_PAGE_MODIFY *modify;
+
+ /* This is the 64 byte boundary, try to keep hot fields above here. */
+
+ uint64_t evict_pass_gen; /* Eviction pass generation */
};
/*
diff --git a/src/include/connection.h b/src/include/connection.h
index 6c23492e926..f74732684f5 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -175,21 +175,6 @@ struct __wt_connection_impl {
WT_SPINLOCK turtle_lock; /* Turtle file spinlock */
WT_RWLOCK dhandle_lock; /* Data handle list lock */
- /*
- * We distribute the btree page locks across a set of spin locks. Don't
- * use too many: they are only held for very short operations, each one
- * is 64 bytes, so 256 will fill the L1 cache on most CPUs.
- *
- * Use a prime number of buckets rather than assuming a good hash
- * (Reference Sedgewick, Algorithms in C, "Hash Functions").
- *
- * Note: this can't be an array, we impose cache-line alignment and gcc
- * doesn't support that for arrays smaller than the alignment.
- */
-#define WT_PAGE_LOCKS 17
- WT_SPINLOCK *page_lock; /* Btree page spinlocks */
- u_int page_lock_cnt; /* Next spinlock to use */
-
/* Connection queue */
TAILQ_ENTRY(__wt_connection_impl) q;
/* Cache pool queue */
diff --git a/src/include/extern.h b/src/include/extern.h
index 0cfc284b313..bf3279d0f94 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -182,7 +182,7 @@ extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *b
extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_tree_walk_skip(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_row_leaf_key_copy( WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -191,7 +191,7 @@ extern int __wt_row_ikey_alloc(WT_SESSION_IMPL *session, uint32_t cell_offset, c
extern int __wt_row_ikey_incr(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_row_ikey(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, WT_ITEM *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_update_alloc( WT_SESSION_IMPL *session, WT_ITEM *value, WT_UPDATE **updp, size_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd);
@@ -674,10 +674,9 @@ extern void __wt_huffman_close(WT_SESSION_IMPL *session, void *huffman_arg);
extern void __wt_print_huffman_code(void *huffman_arg, uint16_t symbol);
extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern void __wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l);
+extern int __wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK *l);
extern int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern void __wt_readlock_spin(WT_SESSION_IMPL *session, WT_RWLOCK *l);
extern void __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l);
extern void __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l);
extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/include/extern_posix.h b/src/include/extern_posix.h
index 3afffef687b..c0ed056c7b6 100644
--- a/src/include/extern_posix.h
+++ b/src/include/extern_posix.h
@@ -15,7 +15,7 @@ extern int __wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *ma
extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled);
extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond);
-extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp);
extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_absolute_path(const char *path);
diff --git a/src/include/extern_win.h b/src/include/extern_win.h
index 4e232a2df80..d548ee0b2ec 100644
--- a/src/include/extern_win.h
+++ b/src/include/extern_win.h
@@ -13,7 +13,7 @@ extern int __wt_win_unmap(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, v
extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled);
extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond);
-extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp);
extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_absolute_path(const char *path);
diff --git a/src/include/mutex.h b/src/include/mutex.h
index 910eb7af5b9..36acea810d9 100644
--- a/src/include/mutex.h
+++ b/src/include/mutex.h
@@ -37,17 +37,21 @@ struct __wt_condvar {
* Don't modify this structure without understanding the read/write locking
* functions.
*/
-union __wt_rwlock { /* Read/write lock */
- uint64_t u;
- struct {
- uint32_t wr; /* Writers and readers */
- } i;
- struct {
- uint16_t writers; /* Now serving for writers */
- uint16_t readers; /* Now serving for readers */
- uint16_t next; /* Next available ticket number */
- uint16_t writers_active;/* Count of active writers */
- } s;
+struct __wt_rwlock { /* Read/write lock */
+ volatile union {
+ uint64_t v; /* Full 64-bit value */
+ struct {
+ uint8_t current; /* Current ticket */
+ uint8_t next; /* Next available ticket */
+ uint8_t reader; /* Read queue ticket */
+ uint8_t __notused; /* Padding */
+ uint16_t readers_active;/* Count of active readers */
+ uint16_t readers_queued;/* Count of queued readers */
+ } s;
+ } u;
+
+ WT_CONDVAR *cond_readers; /* Blocking readers */
+ WT_CONDVAR *cond_writers; /* Blocking writers */
};
/*
@@ -63,8 +67,8 @@ union __wt_rwlock { /* Read/write lock */
#define SPINLOCK_PTHREAD_MUTEX_ADAPTIVE 3
struct __wt_spinlock {
- WT_CACHE_LINE_PAD_BEGIN
#if SPINLOCK_TYPE == SPINLOCK_GCC
+ WT_CACHE_LINE_PAD_BEGIN
volatile int lock;
#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\
SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE ||\
@@ -87,5 +91,8 @@ struct __wt_spinlock {
int16_t stat_int_usecs_off; /* waiting server threads offset */
int8_t initialized; /* Lock initialized, for cleanup */
+
+#if SPINLOCK_TYPE == SPINLOCK_GCC
WT_CACHE_LINE_PAD_END
+#endif
};
diff --git a/src/include/serial.i b/src/include/serial.i
index 982f196b0b8..0134e1a9c20 100644
--- a/src/include/serial.i
+++ b/src/include/serial.i
@@ -154,7 +154,7 @@ __col_append_serial_func(WT_SESSION_IMPL *session, WT_INSERT_HEAD *ins_head,
static inline int
__wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp,
- size_t new_ins_size, uint64_t *recnop, u_int skipdepth)
+ size_t new_ins_size, uint64_t *recnop, u_int skipdepth, bool exclusive)
{
WT_INSERT *new_ins = *new_insp;
WT_DECL_RET;
@@ -165,11 +165,16 @@ __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
/* Clear references to memory we now own and must free on error. */
*new_insp = NULL;
- /* Acquire the page's spinlock, call the worker function. */
- WT_PAGE_LOCK(session, page);
+ /*
+ * Acquire the page's spinlock unless we already have exclusive access.
+ * Then call the worker function.
+ */
+ if (!exclusive)
+ WT_PAGE_LOCK(session, page);
ret = __col_append_serial_func(
session, ins_head, ins_stack, new_ins, recnop, skipdepth);
- WT_PAGE_UNLOCK(session, page);
+ if (!exclusive)
+ WT_PAGE_UNLOCK(session, page);
if (ret != 0) {
/* Free unused memory on error. */
@@ -198,7 +203,7 @@ __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
static inline int
__wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp,
- size_t new_ins_size, u_int skipdepth)
+ size_t new_ins_size, u_int skipdepth, bool exclusive)
{
WT_INSERT *new_ins = *new_insp;
WT_DECL_RET;
@@ -220,10 +225,12 @@ __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
ret = __insert_simple_func(
session, ins_stack, new_ins, skipdepth);
else {
- WT_PAGE_LOCK(session, page);
+ if (!exclusive)
+ WT_PAGE_LOCK(session, page);
ret = __insert_serial_func(
session, ins_head, ins_stack, new_ins, skipdepth);
- WT_PAGE_UNLOCK(session, page);
+ if (!exclusive)
+ WT_PAGE_UNLOCK(session, page);
}
if (ret != 0) {
@@ -252,7 +259,8 @@ __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
*/
static inline int
__wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
- WT_UPDATE **srch_upd, WT_UPDATE **updp, size_t upd_size)
+ WT_UPDATE **srch_upd, WT_UPDATE **updp, size_t upd_size,
+ bool exclusive)
{
WT_DECL_RET;
WT_UPDATE *obsolete, *upd = *updp;
@@ -295,7 +303,7 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
/*
* If there are no subsequent WT_UPDATE structures we are done here.
*/
- if (upd->next == NULL)
+ if (upd->next == NULL || exclusive)
return (0);
/*
@@ -316,11 +324,11 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
}
/* If we can't lock it, don't scan, that's okay. */
- if (__wt_try_writelock(session, &page->page_lock) != 0)
+ if (WT_PAGE_TRYLOCK(session, page) != 0)
return (0);
obsolete = __wt_update_obsolete_check(session, page, upd->next);
- __wt_writeunlock(session, &page->page_lock);
+ WT_PAGE_UNLOCK(session, page);
if (obsolete != NULL)
__wt_update_obsolete_free(session, page, obsolete);
diff --git a/src/include/session.h b/src/include/session.h
index 674e92671b1..1b2dfd1ed2b 100644
--- a/src/include/session.h
+++ b/src/include/session.h
@@ -97,6 +97,10 @@ struct __wt_session_impl {
*/
TAILQ_HEAD(__tables, __wt_table) tables;
+ /* Current rwlock for callback. */
+ WT_RWLOCK *current_rwlock;
+ uint8_t current_rwticket;
+
WT_ITEM **scratch; /* Temporary memory for any function */
u_int scratch_alloc; /* Currently allocated */
size_t scratch_cached; /* Scratch bytes cached */
diff --git a/src/include/verify_build.h b/src/include/verify_build.h
index 640f5e4cf5f..a657b9ac460 100644
--- a/src/include/verify_build.h
+++ b/src/include/verify_build.h
@@ -59,7 +59,6 @@ __wt_verify_build(void)
sizeof(s) > WT_CACHE_LINE_ALIGNMENT || \
sizeof(s) % WT_CACHE_LINE_ALIGNMENT == 0)
WT_PADDING_CHECK(WT_LOGSLOT);
- WT_PADDING_CHECK(WT_SPINLOCK);
WT_PADDING_CHECK(WT_TXN_STATE);
/*
diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h
index da318ad8a86..cf79578985b 100644
--- a/src/include/wt_internal.h
+++ b/src/include/wt_internal.h
@@ -268,6 +268,8 @@ struct __wt_ref;
typedef struct __wt_ref WT_REF;
struct __wt_row;
typedef struct __wt_row WT_ROW;
+struct __wt_rwlock;
+ typedef struct __wt_rwlock WT_RWLOCK;
struct __wt_salvage_cookie;
typedef struct __wt_salvage_cookie WT_SALVAGE_COOKIE;
struct __wt_save_upd;
@@ -302,8 +304,6 @@ union __wt_lsn;
typedef union __wt_lsn WT_LSN;
union __wt_rand_state;
typedef union __wt_rand_state WT_RAND_STATE;
-union __wt_rwlock;
- typedef union __wt_rwlock WT_RWLOCK;
/*
* Forward type declarations for internal types: END
* DO NOT EDIT: automatically built by dist/s_typedef.