summaryrefslogtreecommitdiff
path: root/storage/innobase/buf/buf0buddy.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/buf/buf0buddy.cc')
-rw-r--r--storage/innobase/buf/buf0buddy.cc536
1 files changed, 536 insertions, 0 deletions
diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc
new file mode 100644
index 00000000000..b6774aede8e
--- /dev/null
+++ b/storage/innobase/buf/buf0buddy.cc
@@ -0,0 +1,536 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file buf/buf0buddy.cc
+Binary buddy allocator for compressed pages
+
+Created December 2006 by Marko Makela
+*******************************************************/
+
+#define THIS_MODULE
+#include "buf0buddy.h"
+#ifdef UNIV_NONINL
+# include "buf0buddy.ic"
+#endif
+#undef THIS_MODULE
+#include "buf0buf.h"
+#include "buf0lru.h"
+#include "buf0flu.h"
+#include "page0zip.h"
+
+/**********************************************************************//**
+Get the offset of the buddy of a compressed page frame.
+@return the buddy relative of page */
+UNIV_INLINE
+byte*
+buf_buddy_get(
+/*==========*/
+ byte* page, /*!< in: compressed page */
+ ulint size) /*!< in: page size in bytes */
+{
+ ut_ad(ut_is_2pow(size));
+ ut_ad(size >= BUF_BUDDY_LOW);
+ ut_ad(BUF_BUDDY_LOW <= UNIV_ZIP_SIZE_MIN);
+ ut_ad(size < BUF_BUDDY_HIGH);
+ ut_ad(BUF_BUDDY_HIGH == UNIV_PAGE_SIZE);
+ ut_ad(!ut_align_offset(page, size));
+
+ if (((ulint) page) & size) {
+ return(page - size);
+ } else {
+ return(page + size);
+ }
+}
+
+/** Validate a given zip_free list. */
+struct CheckZipFree {
+ void operator()(const buf_page_t* elem) const
+ {
+ ut_a(buf_page_get_state(elem) == BUF_BLOCK_ZIP_FREE);
+ }
+};
+
+#define BUF_BUDDY_LIST_VALIDATE(bp, i) \
+ UT_LIST_VALIDATE(list, buf_page_t, bp->zip_free[i], CheckZipFree())
+
+/**********************************************************************//**
+Add a block to the head of the appropriate buddy free list. */
+UNIV_INLINE
+void
+buf_buddy_add_to_free(
+/*==================*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ buf_page_t* bpage, /*!< in,own: block to be freed */
+ ulint i) /*!< in: index of
+ buf_pool->zip_free[] */
+{
+ ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
+ ut_ad(buf_pool->zip_free[i].start != bpage);
+ UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
+}
+
+/**********************************************************************//**
+Remove a block from the appropriate buddy free list. */
+UNIV_INLINE
+void
+buf_buddy_remove_from_free(
+/*=======================*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ buf_page_t* bpage, /*!< in: block to be removed */
+ ulint i) /*!< in: index of
+ buf_pool->zip_free[] */
+{
+#ifdef UNIV_DEBUG
+ buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
+ buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
+
+ ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE);
+ ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
+#endif /* UNIV_DEBUG */
+
+ ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
+ UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
+}
+
+/**********************************************************************//**
+Try to allocate a block from buf_pool->zip_free[].
+@return allocated block, or NULL if buf_pool->zip_free[] was empty */
+static
+void*
+buf_buddy_alloc_zip(
+/*================*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ ulint i) /*!< in: index of buf_pool->zip_free[] */
+{
+ buf_page_t* bpage;
+
+ ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_a(i < BUF_BUDDY_SIZES);
+ ut_a(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
+
+ ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
+
+ bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
+
+ if (bpage) {
+ ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
+
+ buf_buddy_remove_from_free(buf_pool, bpage, i);
+ } else if (i + 1 < BUF_BUDDY_SIZES) {
+ /* Attempt to split. */
+ bpage = (buf_page_t*) buf_buddy_alloc_zip(buf_pool, i + 1);
+
+ if (bpage) {
+ buf_page_t* buddy = (buf_page_t*)
+ (((char*) bpage) + (BUF_BUDDY_LOW << i));
+
+ ut_ad(!buf_pool_contains_zip(buf_pool, buddy));
+ ut_d(memset(buddy, i, BUF_BUDDY_LOW << i));
+ buddy->state = BUF_BLOCK_ZIP_FREE;
+ buf_buddy_add_to_free(buf_pool, buddy, i);
+ }
+ }
+
+ if (bpage) {
+ ut_d(memset(bpage, ~i, BUF_BUDDY_LOW << i));
+ UNIV_MEM_ALLOC(bpage, BUF_BUDDY_SIZES << i);
+ }
+
+ return(bpage);
+}
+
+/**********************************************************************//**
+Deallocate a buffer frame of UNIV_PAGE_SIZE. */
+static
+void
+buf_buddy_block_free(
+/*=================*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ void* buf) /*!< in: buffer frame to deallocate */
+{
+ const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
+ buf_page_t* bpage;
+ buf_block_t* block;
+
+ ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(!mutex_own(&buf_pool->zip_mutex));
+ ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
+
+ HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
+ && bpage->in_zip_hash && !bpage->in_page_hash),
+ ((buf_block_t*) bpage)->frame == buf);
+ ut_a(bpage);
+ ut_a(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY);
+ ut_ad(!bpage->in_page_hash);
+ ut_ad(bpage->in_zip_hash);
+ ut_d(bpage->in_zip_hash = FALSE);
+ HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
+
+ ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
+ UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
+
+ block = (buf_block_t*) bpage;
+ mutex_enter(&block->mutex);
+ buf_LRU_block_free_non_file_page(block);
+ mutex_exit(&block->mutex);
+
+ ut_ad(buf_pool->buddy_n_frames > 0);
+ ut_d(buf_pool->buddy_n_frames--);
+}
+
+/**********************************************************************//**
+Allocate a buffer block to the buddy allocator. */
+static
+void
+buf_buddy_block_register(
+/*=====================*/
+ buf_block_t* block) /*!< in: buffer frame to allocate */
+{
+ buf_pool_t* buf_pool = buf_pool_from_block(block);
+ const ulint fold = BUF_POOL_ZIP_FOLD(block);
+ ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(!mutex_own(&buf_pool->zip_mutex));
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
+
+ buf_block_set_state(block, BUF_BLOCK_MEMORY);
+
+ ut_a(block->frame);
+ ut_a(!ut_align_offset(block->frame, UNIV_PAGE_SIZE));
+
+ ut_ad(!block->page.in_page_hash);
+ ut_ad(!block->page.in_zip_hash);
+ ut_d(block->page.in_zip_hash = TRUE);
+ HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
+
+ ut_d(buf_pool->buddy_n_frames++);
+}
+
+/**********************************************************************//**
+Allocate a block from a bigger object.
+@return allocated block */
+static
+void*
+buf_buddy_alloc_from(
+/*=================*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ void* buf, /*!< in: a block that is free to use */
+ ulint i, /*!< in: index of
+ buf_pool->zip_free[] */
+ ulint j) /*!< in: size of buf as an index
+ of buf_pool->zip_free[] */
+{
+ ulint offs = BUF_BUDDY_LOW << j;
+ ut_ad(j <= BUF_BUDDY_SIZES);
+ ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
+ ut_ad(j >= i);
+ ut_ad(!ut_align_offset(buf, offs));
+
+ /* Add the unused parts of the block to the free lists. */
+ while (j > i) {
+ buf_page_t* bpage;
+
+ offs >>= 1;
+ j--;
+
+ bpage = (buf_page_t*) ((byte*) buf + offs);
+ ut_d(memset(bpage, j, BUF_BUDDY_LOW << j));
+ bpage->state = BUF_BLOCK_ZIP_FREE;
+ ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
+ buf_buddy_add_to_free(buf_pool, bpage, j);
+ }
+
+ return(buf);
+}
+
+/**********************************************************************//**
+Allocate a block. The thread calling this function must hold
+buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex.
+The buf_pool_mutex may be released and reacquired.
+@return allocated block, never NULL */
+UNIV_INTERN
+void*
+buf_buddy_alloc_low(
+/*================*/
+ buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
+ ulint i, /*!< in: index of buf_pool->zip_free[],
+ or BUF_BUDDY_SIZES */
+ ibool* lru) /*!< in: pointer to a variable that
+ will be assigned TRUE if storage was
+ allocated from the LRU list and
+ buf_pool->mutex was temporarily
+ released */
+{
+ buf_block_t* block;
+
+ ut_ad(lru);
+ ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(!mutex_own(&buf_pool->zip_mutex));
+ ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
+
+ if (i < BUF_BUDDY_SIZES) {
+ /* Try to allocate from the buddy system. */
+ block = (buf_block_t*) buf_buddy_alloc_zip(buf_pool, i);
+
+ if (block) {
+ goto func_exit;
+ }
+ }
+
+ /* Try allocating from the buf_pool->free list. */
+ block = buf_LRU_get_free_only(buf_pool);
+
+ if (block) {
+
+ goto alloc_big;
+ }
+
+ /* Try replacing an uncompressed page in the buffer pool. */
+ buf_pool_mutex_exit(buf_pool);
+ block = buf_LRU_get_free_block(buf_pool);
+ *lru = TRUE;
+ buf_pool_mutex_enter(buf_pool);
+
+alloc_big:
+ buf_buddy_block_register(block);
+
+ block = (buf_block_t*) buf_buddy_alloc_from(
+ buf_pool, block->frame, i, BUF_BUDDY_SIZES);
+
+func_exit:
+ buf_pool->buddy_stat[i].used++;
+ return(block);
+}
+
+/**********************************************************************//**
+Try to relocate a block.
+@return TRUE if relocated */
+static
+ibool
+buf_buddy_relocate(
+/*===============*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ void* src, /*!< in: block to relocate */
+ void* dst, /*!< in: free block to relocate to */
+ ulint i) /*!< in: index of
+ buf_pool->zip_free[] */
+{
+ buf_page_t* bpage;
+ const ulint size = BUF_BUDDY_LOW << i;
+ mutex_t* mutex;
+ ulint space;
+ ulint page_no;
+
+ ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(!mutex_own(&buf_pool->zip_mutex));
+ ut_ad(!ut_align_offset(src, size));
+ ut_ad(!ut_align_offset(dst, size));
+ ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
+ UNIV_MEM_ASSERT_W(dst, size);
+
+ /* We assume that all memory from buf_buddy_alloc()
+ is used for compressed page frames. */
+
+ /* We look inside the allocated objects returned by
+ buf_buddy_alloc() and assume that each block is a compressed
+ page that contains a valid space_id and page_no in the page
+ header. Should the fields be invalid, we will be unable to
+ relocate the block. */
+
+ /* The src block may be split into smaller blocks,
+ some of which may be free. Thus, the
+ mach_read_from_4() calls below may attempt to read
+ from free memory. The memory is "owned" by the buddy
+ allocator (and it has been allocated from the buffer
+ pool), so there is nothing wrong about this. The
+ mach_read_from_4() calls here will only trigger bogus
+ Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
+ space = mach_read_from_4((const byte*) src
+ + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ page_no = mach_read_from_4((const byte*) src
+ + FIL_PAGE_OFFSET);
+ /* Suppress Valgrind warnings about conditional jump
+ on uninitialized value. */
+ UNIV_MEM_VALID(&space, sizeof space);
+ UNIV_MEM_VALID(&page_no, sizeof page_no);
+ bpage = buf_page_hash_get(buf_pool, space, page_no);
+
+ if (!bpage || bpage->zip.data != src) {
+ /* The block has probably been freshly
+ allocated by buf_LRU_get_free_block() but not
+ added to buf_pool->page_hash yet. Obviously,
+ it cannot be relocated. */
+
+ return(FALSE);
+ }
+
+ if (page_zip_get_size(&bpage->zip) != size) {
+ /* The block is of different size. We would
+ have to relocate all blocks covered by src.
+ For the sake of simplicity, give up. */
+ ut_ad(page_zip_get_size(&bpage->zip) < size);
+
+ return(FALSE);
+ }
+
+ /* The block must have been allocated, but it may
+ contain uninitialized data. */
+ UNIV_MEM_ASSERT_W(src, size);
+
+ mutex = buf_page_get_mutex(bpage);
+
+ mutex_enter(mutex);
+
+ if (buf_page_can_relocate(bpage)) {
+ /* Relocate the compressed page. */
+ ullint usec = ut_time_us(NULL);
+ ut_a(bpage->zip.data == src);
+ memcpy(dst, src, size);
+ bpage->zip.data = (page_zip_t*) dst;
+ mutex_exit(mutex);
+ UNIV_MEM_INVALID(src, size);
+ {
+ buf_buddy_stat_t* buddy_stat
+ = &buf_pool->buddy_stat[i];
+ buddy_stat->relocated++;
+ buddy_stat->relocated_usec
+ += ut_time_us(NULL) - usec;
+ }
+ return(TRUE);
+ }
+
+ mutex_exit(mutex);
+ return(FALSE);
+}
+
+/**********************************************************************//**
+Deallocate a block. */
+UNIV_INTERN
+void
+buf_buddy_free_low(
+/*===============*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ void* buf, /*!< in: block to be freed, must not be
+ pointed to by the buffer pool */
+ ulint i) /*!< in: index of buf_pool->zip_free[],
+ or BUF_BUDDY_SIZES */
+{
+ buf_page_t* bpage;
+ buf_page_t* buddy;
+
+ ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(!mutex_own(&buf_pool->zip_mutex));
+ ut_ad(i <= BUF_BUDDY_SIZES);
+ ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
+ ut_ad(buf_pool->buddy_stat[i].used > 0);
+
+ buf_pool->buddy_stat[i].used--;
+recombine:
+ UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i);
+ ((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE;
+
+ if (i == BUF_BUDDY_SIZES) {
+ buf_buddy_block_free(buf_pool, buf);
+ return;
+ }
+
+ ut_ad(i < BUF_BUDDY_SIZES);
+ ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
+ ut_ad(!buf_pool_contains_zip(buf_pool, buf));
+
+ /* Do not recombine blocks if there are few free blocks.
+ We may waste up to 15360*max_len bytes to free blocks
+ (1024 + 2048 + 4096 + 8192 = 15360) */
+ if (UT_LIST_GET_LEN(buf_pool->zip_free[i]) < 16) {
+ goto func_exit;
+ }
+
+ /* Try to combine adjacent blocks. */
+ buddy = (buf_page_t*) buf_buddy_get(((byte*) buf), BUF_BUDDY_LOW << i);
+
+#ifndef UNIV_DEBUG_VALGRIND
+ /* When Valgrind instrumentation is not enabled, we can read
+ buddy->state to quickly determine that a block is not free.
+ When the block is not free, buddy->state belongs to a compressed
+ page frame that may be flagged uninitialized in our Valgrind
+ instrumentation. */
+
+ if (buddy->state != BUF_BLOCK_ZIP_FREE) {
+
+ goto buddy_nonfree;
+ }
+#endif /* !UNIV_DEBUG_VALGRIND */
+
+ for (bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); bpage; ) {
+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
+
+ if (bpage == buddy) {
+ /* The buddy is free: recombine */
+ buf_buddy_remove_from_free(buf_pool, bpage, i);
+buddy_is_free:
+ ut_ad(buf_page_get_state(buddy) == BUF_BLOCK_ZIP_FREE);
+ ut_ad(!buf_pool_contains_zip(buf_pool, buddy));
+ i++;
+ buf = ut_align_down(buf, BUF_BUDDY_LOW << i);
+
+ goto recombine;
+ }
+
+ ut_a(bpage != buf);
+ UNIV_MEM_ASSERT_W(bpage, BUF_BUDDY_LOW << i);
+ bpage = UT_LIST_GET_NEXT(list, bpage);
+ }
+
+#ifndef UNIV_DEBUG_VALGRIND
+buddy_nonfree:
+#endif /* !UNIV_DEBUG_VALGRIND */
+
+ ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
+
+ /* The buddy is not free. Is there a free block of this size? */
+ bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
+
+ if (bpage) {
+
+ /* Remove the block from the free list, because a successful
+ buf_buddy_relocate() will overwrite bpage->list. */
+ buf_buddy_remove_from_free(buf_pool, bpage, i);
+
+ /* Try to relocate the buddy of buf to the free block. */
+ if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
+
+ buddy->state = BUF_BLOCK_ZIP_FREE;
+ goto buddy_is_free;
+ }
+
+ buf_buddy_add_to_free(buf_pool, bpage, i);
+ }
+
+func_exit:
+ /* Free the block to the buddy list. */
+ bpage = (buf_page_t*) buf;
+
+ /* Fill large blocks with a constant pattern. */
+ ut_d(memset(bpage, i, BUF_BUDDY_LOW << i));
+ UNIV_MEM_INVALID(bpage, BUF_BUDDY_LOW << i);
+ bpage->state = BUF_BLOCK_ZIP_FREE;
+ buf_buddy_add_to_free(buf_pool, bpage, i);
+}