diff options
Diffstat (limited to 'src/include')
-rw-r--r-- | src/include/btmem.h | 27 | ||||
-rw-r--r-- | src/include/connection.h | 15 | ||||
-rw-r--r-- | src/include/extern.h | 7 | ||||
-rw-r--r-- | src/include/extern_posix.h | 2 | ||||
-rw-r--r-- | src/include/extern_win.h | 2 | ||||
-rw-r--r-- | src/include/mutex.h | 31 | ||||
-rw-r--r-- | src/include/serial.i | 30 | ||||
-rw-r--r-- | src/include/session.h | 4 | ||||
-rw-r--r-- | src/include/verify_build.h | 1 | ||||
-rw-r--r-- | src/include/wt_internal.h | 4 |
10 files changed, 61 insertions, 62 deletions
diff --git a/src/include/btmem.h b/src/include/btmem.h index f1bb08d2699..eb523c01ad7 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -414,18 +414,20 @@ struct __wt_page_modify { size_t discard_allocated; } *ovfl_track; +#define WT_PAGE_LOCK(s, p) \ + __wt_spin_lock((s), &(p)->modify->page_lock) +#define WT_PAGE_TRYLOCK(s, p) \ + __wt_spin_trylock((s), &(p)->modify->page_lock) +#define WT_PAGE_UNLOCK(s, p) \ + __wt_spin_unlock((s), &(p)->modify->page_lock) + WT_SPINLOCK page_lock; /* Page's spinlock */ + /* * The write generation is incremented when a page is modified, a page * is clean if the write generation is 0. */ uint32_t write_gen; -#define WT_PAGE_LOCK(s, p) \ - __wt_spin_lock((s), &S2C(s)->page_lock[(p)->modify->page_lock]) -#define WT_PAGE_UNLOCK(s, p) \ - __wt_spin_unlock((s), &S2C(s)->page_lock[(p)->modify->page_lock]) - uint8_t page_lock; /* Page's spinlock */ - #define WT_PM_REC_EMPTY 1 /* Reconciliation: no replacement */ #define WT_PM_REC_MULTIBLOCK 2 /* Reconciliation: multiple blocks */ #define WT_PM_REC_REPLACE 3 /* Reconciliation: single block */ @@ -603,13 +605,6 @@ struct __wt_page { uint8_t unused[2]; /* Unused padding */ /* - * Used to protect and co-ordinate splits for internal pages and - * reconciliation for all pages. Only used to co-ordinate among the - * uncommon cases that require exclusive access to a page. - */ - WT_RWLOCK page_lock; - - /* * The page's read generation acts as an LRU value for each page in the * tree; it is used by the eviction server thread to select pages to be * discarded from the in-memory tree. @@ -635,8 +630,6 @@ struct __wt_page { #define WT_READGEN_STEP 100 uint64_t read_gen; - uint64_t evict_pass_gen; /* Eviction pass generation */ - size_t memory_footprint; /* Memory attached to the page */ /* Page's on-disk representation: NULL for pages created in memory. */ @@ -644,6 +637,10 @@ struct __wt_page { /* If/when the page is modified, we need lots more information. */ WT_PAGE_MODIFY *modify; + + /* This is the 64 byte boundary, try to keep hot fields above here. */ + + uint64_t evict_pass_gen; /* Eviction pass generation */ }; /* diff --git a/src/include/connection.h b/src/include/connection.h index 6c23492e926..f74732684f5 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -175,21 +175,6 @@ struct __wt_connection_impl { WT_SPINLOCK turtle_lock; /* Turtle file spinlock */ WT_RWLOCK dhandle_lock; /* Data handle list lock */ - /* - * We distribute the btree page locks across a set of spin locks. Don't - * use too many: they are only held for very short operations, each one - * is 64 bytes, so 256 will fill the L1 cache on most CPUs. - * - * Use a prime number of buckets rather than assuming a good hash - * (Reference Sedgewick, Algorithms in C, "Hash Functions"). - * - * Note: this can't be an array, we impose cache-line alignment and gcc - * doesn't support that for arrays smaller than the alignment. - */ -#define WT_PAGE_LOCKS 17 - WT_SPINLOCK *page_lock; /* Btree page spinlocks */ - u_int page_lock_cnt; /* Next spinlock to use */ - /* Connection queue */ TAILQ_ENTRY(__wt_connection_impl) q; /* Cache pool queue */ diff --git a/src/include/extern.h b/src/include/extern.h index 0cfc284b313..bf3279d0f94 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -182,7 +182,7 @@ extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *b extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_tree_walk_skip(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_leaf_key_copy( WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -191,7 +191,7 @@ extern int __wt_row_ikey_alloc(WT_SESSION_IMPL *session, uint32_t cell_offset, c extern int __wt_row_ikey_incr(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_ikey(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, WT_ITEM *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_update_alloc( WT_SESSION_IMPL *session, WT_ITEM *value, WT_UPDATE **updp, size_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd); @@ -674,10 +674,9 @@ extern void __wt_huffman_close(WT_SESSION_IMPL *session, void *huffman_arg); extern void __wt_print_huffman_code(void *huffman_arg, uint16_t symbol); extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern void __wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l); +extern int __wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK *l); extern int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern void __wt_readlock_spin(WT_SESSION_IMPL *session, WT_RWLOCK *l); extern void __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l); extern void __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l); extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/include/extern_posix.h b/src/include/extern_posix.h index 3afffef687b..c0ed056c7b6 100644 --- a/src/include/extern_posix.h +++ b/src/include/extern_posix.h @@ -15,7 +15,7 @@ extern int __wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *ma extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled); extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); -extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp); extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern bool __wt_absolute_path(const char *path); diff --git a/src/include/extern_win.h b/src/include/extern_win.h index 4e232a2df80..d548ee0b2ec 100644 --- a/src/include/extern_win.h +++ b/src/include/extern_win.h @@ -13,7 +13,7 @@ extern int __wt_win_unmap(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, v extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled); extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); -extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp); extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern bool __wt_absolute_path(const char *path); diff --git a/src/include/mutex.h b/src/include/mutex.h index 910eb7af5b9..36acea810d9 100644 --- a/src/include/mutex.h +++ b/src/include/mutex.h @@ -37,17 +37,21 @@ struct __wt_condvar { * Don't modify this structure without understanding the read/write locking * functions. */ -union __wt_rwlock { /* Read/write lock */ - uint64_t u; - struct { - uint32_t wr; /* Writers and readers */ - } i; - struct { - uint16_t writers; /* Now serving for writers */ - uint16_t readers; /* Now serving for readers */ - uint16_t next; /* Next available ticket number */ - uint16_t writers_active;/* Count of active writers */ - } s; +struct __wt_rwlock { /* Read/write lock */ + volatile union { + uint64_t v; /* Full 64-bit value */ + struct { + uint8_t current; /* Current ticket */ + uint8_t next; /* Next available ticket */ + uint8_t reader; /* Read queue ticket */ + uint8_t __notused; /* Padding */ + uint16_t readers_active;/* Count of active readers */ + uint16_t readers_queued;/* Count of queued readers */ + } s; + } u; + + WT_CONDVAR *cond_readers; /* Blocking readers */ + WT_CONDVAR *cond_writers; /* Blocking writers */ }; /* @@ -63,8 +67,8 @@ union __wt_rwlock { /* Read/write lock */ #define SPINLOCK_PTHREAD_MUTEX_ADAPTIVE 3 struct __wt_spinlock { - WT_CACHE_LINE_PAD_BEGIN #if SPINLOCK_TYPE == SPINLOCK_GCC + WT_CACHE_LINE_PAD_BEGIN volatile int lock; #elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\ SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE ||\ @@ -87,5 +91,8 @@ struct __wt_spinlock { int16_t stat_int_usecs_off; /* waiting server threads offset */ int8_t initialized; /* Lock initialized, for cleanup */ + +#if SPINLOCK_TYPE == SPINLOCK_GCC WT_CACHE_LINE_PAD_END +#endif }; diff --git a/src/include/serial.i b/src/include/serial.i index 982f196b0b8..0134e1a9c20 100644 --- a/src/include/serial.i +++ b/src/include/serial.i @@ -154,7 +154,7 @@ __col_append_serial_func(WT_SESSION_IMPL *session, WT_INSERT_HEAD *ins_head, static inline int __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp, - size_t new_ins_size, uint64_t *recnop, u_int skipdepth) + size_t new_ins_size, uint64_t *recnop, u_int skipdepth, bool exclusive) { WT_INSERT *new_ins = *new_insp; WT_DECL_RET; @@ -165,11 +165,16 @@ __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page, /* Clear references to memory we now own and must free on error. */ *new_insp = NULL; - /* Acquire the page's spinlock, call the worker function. */ - WT_PAGE_LOCK(session, page); + /* + * Acquire the page's spinlock unless we already have exclusive access. + * Then call the worker function. + */ + if (!exclusive) + WT_PAGE_LOCK(session, page); ret = __col_append_serial_func( session, ins_head, ins_stack, new_ins, recnop, skipdepth); - WT_PAGE_UNLOCK(session, page); + if (!exclusive) + WT_PAGE_UNLOCK(session, page); if (ret != 0) { /* Free unused memory on error. */ @@ -198,7 +203,7 @@ __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page, static inline int __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp, - size_t new_ins_size, u_int skipdepth) + size_t new_ins_size, u_int skipdepth, bool exclusive) { WT_INSERT *new_ins = *new_insp; WT_DECL_RET; @@ -220,10 +225,12 @@ __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page, ret = __insert_simple_func( session, ins_stack, new_ins, skipdepth); else { - WT_PAGE_LOCK(session, page); + if (!exclusive) + WT_PAGE_LOCK(session, page); ret = __insert_serial_func( session, ins_head, ins_stack, new_ins, skipdepth); - WT_PAGE_UNLOCK(session, page); + if (!exclusive) + WT_PAGE_UNLOCK(session, page); } if (ret != 0) { @@ -252,7 +259,8 @@ __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page, */ static inline int __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, - WT_UPDATE **srch_upd, WT_UPDATE **updp, size_t upd_size) + WT_UPDATE **srch_upd, WT_UPDATE **updp, size_t upd_size, + bool exclusive) { WT_DECL_RET; WT_UPDATE *obsolete, *upd = *updp; @@ -295,7 +303,7 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, /* * If there are no subsequent WT_UPDATE structures we are done here. */ - if (upd->next == NULL) + if (upd->next == NULL || exclusive) return (0); /* @@ -316,11 +324,11 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, } /* If we can't lock it, don't scan, that's okay. */ - if (__wt_try_writelock(session, &page->page_lock) != 0) + if (WT_PAGE_TRYLOCK(session, page) != 0) return (0); obsolete = __wt_update_obsolete_check(session, page, upd->next); - __wt_writeunlock(session, &page->page_lock); + WT_PAGE_UNLOCK(session, page); if (obsolete != NULL) __wt_update_obsolete_free(session, page, obsolete); diff --git a/src/include/session.h b/src/include/session.h index 674e92671b1..1b2dfd1ed2b 100644 --- a/src/include/session.h +++ b/src/include/session.h @@ -97,6 +97,10 @@ struct __wt_session_impl { */ TAILQ_HEAD(__tables, __wt_table) tables; + /* Current rwlock for callback. */ + WT_RWLOCK *current_rwlock; + uint8_t current_rwticket; + WT_ITEM **scratch; /* Temporary memory for any function */ u_int scratch_alloc; /* Currently allocated */ size_t scratch_cached; /* Scratch bytes cached */ diff --git a/src/include/verify_build.h b/src/include/verify_build.h index 640f5e4cf5f..a657b9ac460 100644 --- a/src/include/verify_build.h +++ b/src/include/verify_build.h @@ -59,7 +59,6 @@ __wt_verify_build(void) sizeof(s) > WT_CACHE_LINE_ALIGNMENT || \ sizeof(s) % WT_CACHE_LINE_ALIGNMENT == 0) WT_PADDING_CHECK(WT_LOGSLOT); - WT_PADDING_CHECK(WT_SPINLOCK); WT_PADDING_CHECK(WT_TXN_STATE); /* diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h index da318ad8a86..cf79578985b 100644 --- a/src/include/wt_internal.h +++ b/src/include/wt_internal.h @@ -268,6 +268,8 @@ struct __wt_ref; typedef struct __wt_ref WT_REF; struct __wt_row; typedef struct __wt_row WT_ROW; +struct __wt_rwlock; + typedef struct __wt_rwlock WT_RWLOCK; struct __wt_salvage_cookie; typedef struct __wt_salvage_cookie WT_SALVAGE_COOKIE; struct __wt_save_upd; @@ -302,8 +304,6 @@ union __wt_lsn; typedef union __wt_lsn WT_LSN; union __wt_rand_state; typedef union __wt_rand_state WT_RAND_STATE; -union __wt_rwlock; - typedef union __wt_rwlock WT_RWLOCK; /* * Forward type declarations for internal types: END * DO NOT EDIT: automatically built by dist/s_typedef. |