summaryrefslogtreecommitdiff
path: root/innobase/include
diff options
context:
space:
mode:
authorheikki@hundin.mysql.fi <>2003-01-06 22:07:25 +0200
committerheikki@hundin.mysql.fi <>2003-01-06 22:07:25 +0200
commitb1b47e93b130da5540cc80e0e464041ff37ea57a (patch)
tree88f03b8d4b471aa94f27b4a3ed510bcd7c2c3ccb /innobase/include
parentedb019aeaf616442d93db2eab0df4b2b09003d14 (diff)
downloadmariadb-git-b1b47e93b130da5540cc80e0e464041ff37ea57a.tar.gz
buf0buf.c, buf0buf.ic, buf0buf.h:
Reduce memory usage of the buffer headers Many files: Merge InnoDB-4.1 with AWE support
Diffstat (limited to 'innobase/include')
-rw-r--r--innobase/include/btr0pcur.h3
-rw-r--r--innobase/include/buf0buf.h120
-rw-r--r--innobase/include/buf0buf.ic119
-rw-r--r--innobase/include/buf0lru.h4
-rw-r--r--innobase/include/log0recv.h7
-rw-r--r--innobase/include/os0proc.h70
-rw-r--r--innobase/include/srv0srv.h3
7 files changed, 240 insertions, 86 deletions
diff --git a/innobase/include/btr0pcur.h b/innobase/include/btr0pcur.h
index 9d07dd0de18..81f19af4d40 100644
--- a/innobase/include/btr0pcur.h
+++ b/innobase/include/btr0pcur.h
@@ -466,6 +466,9 @@ struct btr_pcur_struct{
BTR_PCUR_AFTER, depending on whether
cursor was on, before, or after the
old_rec record */
+ buf_block_t* block_when_stored;/* buffer block when the position was
+ stored; note that if AWE is on, frames
+ may move */
dulint modify_clock; /* the modify clock value of the
buffer block when the cursor position
was stored */
diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h
index 395f88a2c7c..c7db3d9bcc9 100644
--- a/innobase/include/buf0buf.h
+++ b/innobase/include/buf0buf.h
@@ -30,6 +30,7 @@ Created 11/5/1995 Heikki Tuuri
#include "sync0rw.h"
#include "hash0hash.h"
#include "ut0byte.h"
+#include "os0proc.h"
/* Flags for flush types */
#define BUF_FLUSH_LRU 1
@@ -58,23 +59,34 @@ extern ibool buf_debug_prints;/* If this is set TRUE, the program
occurs */
/************************************************************************
-Initializes the buffer pool of the database. */
+Creates the buffer pool. */
-void
+buf_pool_t*
buf_pool_init(
/*==========*/
- ulint max_size, /* in: maximum size of the pool in blocks */
- ulint curr_size); /* in: current size to use, must be <=
+ /* out, own: buf_pool object, NULL if not
+ enough memory or error */
+ ulint max_size, /* in: maximum size of the buf_pool in
+ blocks */
+ ulint curr_size, /* in: current size to use, must be <=
+ max_size, currently must be equal to
max_size */
+ ulint n_frames); /* in: number of frames; if AWE is used,
+ this is the size of the address space window
+ where physical memory pages are mapped; if
+ AWE is not used then this must be the same
+ as max_size */
/*************************************************************************
-Gets the current size of buffer pool in bytes. */
+Gets the current size of buffer buf_pool in bytes. In the case of AWE, the
+size of AWE window (= the frames). */
UNIV_INLINE
ulint
buf_pool_get_curr_size(void);
/*========================*/
/* out: size in bytes */
/*************************************************************************
-Gets the maximum size of buffer pool in bytes. */
+Gets the maximum size of buffer pool in bytes. In the case of AWE, the
+size of AWE window (= the frames). */
UNIV_INLINE
ulint
buf_pool_get_max_size(void);
@@ -138,8 +150,8 @@ improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */
NOTE! The following macros should be used instead of
buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
RW_X_LATCH are allowed as LA! */
-#define buf_page_optimistic_get(LA, G, MC, MTR) buf_page_optimistic_get_func(\
- LA, G, MC, IB__FILE__, __LINE__, MTR)
+#define buf_page_optimistic_get(LA, BL, G, MC, MTR) buf_page_optimistic_get_func(\
+ LA, BL, G, MC, IB__FILE__, __LINE__, MTR)
/************************************************************************
This is the general function used to get optimistic access to a database
page. */
@@ -149,7 +161,9 @@ buf_page_optimistic_get_func(
/*=========================*/
/* out: TRUE if success */
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
- buf_frame_t* guess, /* in: guessed frame */
+ buf_block_t* block, /* in: guessed block */
+ buf_frame_t* guess, /* in: guessed frame; note that AWE may move
+ frames */
dulint modify_clock,/* in: modify clock value if mode is
..._GUESS_ON_CLOCK */
char* file, /* in: file name */
@@ -350,6 +364,16 @@ buf_frame_modify_clock_inc(
/* out: new value */
buf_frame_t* frame); /* in: pointer to a frame */
/************************************************************************
+Increments the modify clock of a frame by 1. The caller must (1) own the
+buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
+on the block. */
+UNIV_INLINE
+dulint
+buf_block_modify_clock_inc(
+/*=======================*/
+ /* out: new value */
+ buf_block_t* block); /* in: block */
+/************************************************************************
Returns the value of the modify clock. The caller must have an s-lock
or x-lock on the block. */
UNIV_INLINE
@@ -428,7 +452,7 @@ UNIV_INLINE
buf_frame_t*
buf_frame_align(
/*============*/
- /* out: pointer to block */
+ /* out: pointer to frame */
byte* ptr); /* in: pointer to a frame */
/***********************************************************************
Checks if a pointer points to the block array of the buffer pool (blocks, not
@@ -505,6 +529,19 @@ buf_pool_invalidate(void);
--------------------------- LOWER LEVEL ROUTINES -------------------------
=========================================================================*/
+/************************************************************************
+Maps the page of block to a frame, if not mapped yet. Unmaps some page
+from the end of the awe_LRU_free_mapped. */
+
+void
+buf_awe_map_page_to_frame(
+/*======================*/
+ buf_block_t* block, /* in: block whose page should be
+ mapped to a frame */
+ ibool add_to_mapped_list);/* in: TRUE if we in the case
+ we need to map the page should also
+ add the block to the
+ awe_LRU_free_mapped list */
/*************************************************************************
Adds latch level info for the rw-lock protecting the buffer frame. This
should be called in the debug version after a successful latching of a
@@ -638,7 +675,16 @@ struct buf_block_struct{
byte* frame; /* pointer to buffer frame which
is of size UNIV_PAGE_SIZE, and
aligned to an address divisible by
- UNIV_PAGE_SIZE */
+ UNIV_PAGE_SIZE; if AWE is used, this
+ will be NULL for the pages which are
+ currently not mapped into the virtual
+ address space window of the buffer
+ pool */
+ os_awe_t* awe_info; /* if AWE is used, then an array of
+ awe page infos for
+ UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE
+ (normally = 4) physical memory
+ pages; otherwise NULL */
ulint space; /* space id of the page */
ulint offset; /* page number within the space */
ulint lock_hash_val; /* hashed value of the page address
@@ -691,6 +737,10 @@ struct buf_block_struct{
/* node of the free block list */
UT_LIST_NODE_T(buf_block_t) LRU;
/* node of the LRU list */
+ UT_LIST_NODE_T(buf_block_t) awe_LRU_free_mapped;
+ /* in the AWE version node in the
+ list of free and LRU blocks which are
+ mapped to a frame */
ulint LRU_position; /* value which monotonically
decreases (or may stay constant if
the block is in the old blocks) toward
@@ -758,11 +808,12 @@ struct buf_block_struct{
BTR_SEARCH_RIGHT_SIDE in hash
indexing */
/* 6. Debug fields */
-
+#ifdef UNIV_SYNC_DEBUG
rw_lock_t debug_latch; /* in the debug version, each thread
which bufferfixes the block acquires
an s-latch here; so we can use the
debug utilities in sync0rw */
+#endif
ibool file_page_was_freed;
/* this is set to TRUE when fsp
frees a page in buffer pool */
@@ -781,16 +832,36 @@ struct buf_pool_struct{
struct and control blocks, except the
read-write lock in them */
byte* frame_mem; /* pointer to the memory area which
- was allocated for the frames */
+ was allocated for the frames; in AWE
+ this is the virtual address space
+ window where we map pages stored
+ in physical memory */
byte* frame_zero; /* pointer to the first buffer frame:
this may differ from frame_mem, because
this is aligned by the frame size */
- byte* high_end; /* pointer to the end of the
- buffer pool */
+ byte* high_end; /* pointer to the end of the buffer
+ frames */
+ ulint n_frames; /* number of frames */
buf_block_t* blocks; /* array of buffer control blocks */
+ buf_block_t** blocks_of_frames;/* inverse mapping which can be used
+ to retrieve the buffer control block
+ of a frame; this is an array which
+ lists the blocks of frames in the
+ order frame_zero,
+ frame_zero + UNIV_PAGE_SIZE, ...
+ a control block is always assigned
+ for each frame, even if the frame does
+ not contain any data; note that in AWE
+ there are more control blocks than
+ buffer frames */
+ os_awe_t* awe_info; /* if AWE is used, AWE info for the
+ physical 4 kB memory pages associated
+ with buffer frames */
ulint max_size; /* number of control blocks ==
maximum pool size in pages */
- ulint curr_size; /* current pool size in pages */
+ ulint curr_size; /* current pool size in pages;
+ currently always the same as
+ max_size */
hash_table_t* page_hash; /* hash table of the file pages */
ulint n_pend_reads; /* number of pending read operations */
@@ -802,11 +873,14 @@ struct buf_pool_struct{
ulint n_pages_created;/* number of pages created in the pool
with no read */
ulint n_page_gets; /* number of page gets performed;
- also successful seraches through
+ also successful searches through
the adaptive hash index are
counted as page gets; this field
is NOT protected by the buffer
pool mutex */
+ ulint n_pages_awe_remapped; /* if AWE is enabled, the
+ number of remaps of blocks to
+ buffer frames */
ulint n_page_gets_old;/* n_page_gets when buf_print was
last time called: used to calculate
hit rate */
@@ -815,6 +889,7 @@ struct buf_pool_struct{
ulint n_pages_written_old;/* number write operations */
ulint n_pages_created_old;/* number of pages created in
the pool with no read */
+ ulint n_pages_awe_remapped_old;
/* 2. Page flushing algorithm fields */
UT_LIST_BASE_NODE_T(buf_block_t) flush_list;
@@ -847,7 +922,10 @@ struct buf_pool_struct{
/* 3. LRU replacement algorithm fields */
UT_LIST_BASE_NODE_T(buf_block_t) free;
- /* base node of the free block list */
+ /* base node of the free block list;
+ in the case of AWE, at the start are
+ always free blocks for which the
+ physical memory is mapped to a frame */
UT_LIST_BASE_NODE_T(buf_block_t) LRU;
/* base node of the LRU list */
buf_block_t* LRU_old; /* pointer to the about 3/8 oldest
@@ -859,6 +937,12 @@ struct buf_pool_struct{
see buf0lru.c for the restrictions
on this value; not defined if
LRU_old == NULL */
+ UT_LIST_BASE_NODE_T(buf_block_t) awe_LRU_free_mapped;
+ /* list of those blocks which are
+ in the LRU list or the free list, and
+ where the page is mapped to a frame;
+ thus, frames allocated, e.g., to the
+ locki table, are not in this list */
};
/* States of a control block */
diff --git a/innobase/include/buf0buf.ic b/innobase/include/buf0buf.ic
index 7227c79dc6a..d4e7122f3f9 100644
--- a/innobase/include/buf0buf.ic
+++ b/innobase/include/buf0buf.ic
@@ -36,25 +36,27 @@ buf_block_peek_if_too_old(
}
/*************************************************************************
-Gets the current size of buffer buf_pool in bytes. */
+Gets the current size of buffer buf_pool in bytes. In the case of AWE, the
+size of AWE window (= the frames). */
UNIV_INLINE
ulint
buf_pool_get_curr_size(void)
/*========================*/
/* out: size in bytes */
{
- return((buf_pool->curr_size) * UNIV_PAGE_SIZE);
+ return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
}
/*************************************************************************
-Gets the maximum size of buffer buf_pool in bytes. */
+Gets the maximum size of buffer buf_pool in bytes. In the case of AWE, the
+size of AWE window (= the frames). */
UNIV_INLINE
ulint
buf_pool_get_max_size(void)
/*=======================*/
/* out: size in bytes */
{
- return((buf_pool->max_size) * UNIV_PAGE_SIZE);
+ return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
}
/***********************************************************************
@@ -207,54 +209,24 @@ buf_block_align(
frame_zero = buf_pool->frame_zero;
- ut_ad((ulint)ptr >= (ulint)frame_zero);
-
- block = buf_pool_get_nth_block(buf_pool, ((ulint)(ptr - frame_zero))
- >> UNIV_PAGE_SIZE_SHIFT);
- if (block < buf_pool->blocks
- || block >= buf_pool->blocks + buf_pool->max_size) {
+ if ((ulint)ptr < (ulint)frame_zero
+ || (ulint)ptr > (ulint)(buf_pool->high_end)) {
+ ut_print_timestamp(stderr);
fprintf(stderr,
-"InnoDB: Error: trying to access a stray pointer %lx\n"
-"InnoDB: buf pool start is at %lx, number of pages %lu\n", (ulint)ptr,
- (ulint)frame_zero, buf_pool->max_size);
+" InnoDB: Error: trying to access a stray pointer %lx\n"
+"InnoDB: buf pool start is at %lx, end at %lx\n"
+"InnoDB: Probable reason is database corruption or memory\n"
+"InnoDB: corruption. If this happens in an InnoDB database recovery,\n"
+"InnoDB: you can look from section 6.1 at http://www.innodb.com/ibman.html\n"
+"InnoDB: how to force recovery.\n",
+ (ulint)ptr, (ulint)frame_zero,
+ (ulint)(buf_pool->high_end));
ut_a(0);
}
-
- return(block);
-}
-
-/***********************************************************************
-Gets the block to whose frame the pointer is pointing to. Does not
-require a file page to be bufferfixed. */
-UNIV_INLINE
-buf_block_t*
-buf_block_align_low(
-/*================*/
- /* out: pointer to block */
- byte* ptr) /* in: pointer to a frame */
-{
- buf_block_t* block;
- buf_frame_t* frame_zero;
-
- ut_ad(ptr);
-
- frame_zero = buf_pool->frame_zero;
-
- ut_ad((ulint)ptr >= (ulint)frame_zero);
-
- block = buf_pool_get_nth_block(buf_pool, ((ulint)(ptr - frame_zero))
- >> UNIV_PAGE_SIZE_SHIFT);
- if (block < buf_pool->blocks
- || block >= buf_pool->blocks + buf_pool->max_size) {
-
- fprintf(stderr,
-"InnoDB: Error: trying to access a stray pointer %lx\n"
-"InnoDB: buf pool start is at %lx, number of pages %lu\n", (ulint)ptr,
- (ulint)frame_zero, buf_pool->max_size);
- ut_a(0);
- }
-
+
+ block = *(buf_pool->blocks_of_frames + (((ulint)(ptr - frame_zero))
+ >> UNIV_PAGE_SIZE_SHIFT));
return(block);
}
@@ -264,7 +236,7 @@ UNIV_INLINE
buf_frame_t*
buf_frame_align(
/*============*/
- /* out: pointer to block */
+ /* out: pointer to frame */
byte* ptr) /* in: pointer to a frame */
{
buf_frame_t* frame;
@@ -273,14 +245,19 @@ buf_frame_align(
frame = ut_align_down(ptr, UNIV_PAGE_SIZE);
- if (((ulint)frame
- < (ulint)(buf_pool->frame_zero))
- || ((ulint)frame > (ulint)(buf_pool_get_nth_block(buf_pool,
- buf_pool->max_size - 1)->frame))) {
+ if (((ulint)frame < (ulint)(buf_pool->frame_zero))
+ || (ulint)frame >= (ulint)(buf_pool->high_end)) {
+
+ ut_print_timestamp(stderr);
fprintf(stderr,
-"InnoDB: Error: trying to access a stray pointer %lx\n"
-"InnoDB: buf pool start is at %lx, number of pages %lu\n", (ulint)ptr,
- (ulint)(buf_pool->frame_zero), buf_pool->max_size);
+" InnoDB: Error: trying to access a stray pointer %lx\n"
+"InnoDB: buf pool start is at %lx, end at %lx\n"
+"InnoDB: Probable reason is database corruption or memory\n"
+"InnoDB: corruption. If this happens in an InnoDB database recovery,\n"
+"InnoDB: you can look from section 6.1 at http://www.innodb.com/ibman.html\n"
+"InnoDB: how to force recovery.\n",
+ (ulint)ptr, (ulint)(buf_pool->frame_zero),
+ (ulint)(buf_pool->high_end));
ut_a(0);
}
@@ -469,7 +446,7 @@ buf_frame_modify_clock_inc(
ut_ad(frame);
- block = buf_block_align_low(frame);
+ block = buf_block_align(frame);
ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
|| rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
@@ -480,6 +457,25 @@ buf_frame_modify_clock_inc(
}
/************************************************************************
+Increments the modify clock of a frame by 1. The caller must (1) own the
+buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
+on the block. */
+UNIV_INLINE
+dulint
+buf_block_modify_clock_inc(
+/*=======================*/
+ /* out: new value */
+ buf_block_t* block) /* in: block */
+{
+ ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
+ || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
+
+ UT_DULINT_INC(block->modify_clock);
+
+ return(block->modify_clock);
+}
+
+/************************************************************************
Returns the value of the modify clock. The caller must have an s-lock
or x-lock on the block. */
UNIV_INLINE
@@ -508,15 +504,16 @@ void
buf_block_buf_fix_inc_debug(
/*========================*/
buf_block_t* block, /* in: block to bufferfix */
- char* file, /* in: file name */
- ulint line) /* in: line */
+ char* file __attribute__ ((unused)), /* in: file name */
+ ulint line __attribute__ ((unused))) /* in: line */
{
+#ifdef UNIV_SYNC_DEBUG
ibool ret;
-
+
ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line);
ut_ad(ret == TRUE);
-
+#endif
block->buf_fix_count++;
}
diff --git a/innobase/include/buf0lru.h b/innobase/include/buf0lru.h
index 946b6c4e31d..6a3c948507d 100644
--- a/innobase/include/buf0lru.h
+++ b/innobase/include/buf0lru.h
@@ -53,7 +53,9 @@ LRU list to the free list. */
buf_block_t*
buf_LRU_get_free_block(void);
/*=========================*/
- /* out: the free control block */
+ /* out: the free control block; also if AWE is
+ used, it is guaranteed that the block has its
+ page mapped to a frame when we return */
/**********************************************************************
Puts a block back to the free list. */
diff --git a/innobase/include/log0recv.h b/innobase/include/log0recv.h
index 7418e4abf1b..bef42cfec1c 100644
--- a/innobase/include/log0recv.h
+++ b/innobase/include/log0recv.h
@@ -355,12 +355,7 @@ in the debug version: spaces with an odd number as the id are replicate
spaces */
#define RECV_REPLICA_SPACE_ADD 1
-/* This many blocks must be left free in the buffer pool when we scan
-the log and store the scanned log records in the buffer pool: we will
-use these free blocks to read in pages when we start applying the
-log records to the database. */
-
-#define RECV_POOL_N_FREE_BLOCKS (ut_min(256, buf_pool_get_curr_size() / 8))
+extern ulint recv_n_pool_free_frames;
#ifndef UNIV_NONINL
#include "log0recv.ic"
diff --git a/innobase/include/os0proc.h b/innobase/include/os0proc.h
index 79750e5c1f7..08510db4366 100644
--- a/innobase/include/os0proc.h
+++ b/innobase/include/os0proc.h
@@ -15,6 +15,76 @@ Created 9/30/1995 Heikki Tuuri
typedef void* os_process_t;
typedef unsigned long int os_process_id_t;
+/* The cell type in os_awe_allocate_mem page info */
+#ifdef __NT__
+typedef ULONG_PTR os_awe_t;
+#else
+typedef ulint os_awe_t;
+#endif
+
+/* Physical page size when Windows AWE is used. This is the normal
+page size of an Intel x86 processor. We cannot use AWE with 2 MB or 4 MB
+pages. */
+#define OS_AWE_X86_PAGE_SIZE 4096
+
+/********************************************************************
+Windows AWE support. Tries to enable the "lock pages in memory" privilege for
+the current process so that the current process can allocate memory-locked
+virtual address space to act as the window where AWE maps physical memory. */
+
+ibool
+os_awe_enable_lock_pages_in_mem(void);
+/*=================================*/
+ /* out: TRUE if success, FALSE if error;
+ prints error info to stderr if no success */
+/********************************************************************
+Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86
+processor. */
+
+ibool
+os_awe_allocate_physical_mem(
+/*=========================*/
+ /* out: TRUE if success */
+ os_awe_t** page_info, /* out, own: array of opaque data containing
+ the info for allocated physical memory pages;
+ each allocated 4 kB physical memory page has
+ one slot of type os_awe_t in the array */
+ ulint n_megabytes); /* in: number of megabytes to allocate */
+/********************************************************************
+Allocates a window in the virtual address space where we can map then
+pages of physical memory. */
+
+byte*
+os_awe_allocate_virtual_mem_window(
+/*===============================*/
+ /* out, own: allocated memory, or NULL if did not
+ succeed */
+ ulint size); /* in: virtual memory allocation size in bytes, must
+ be < 2 GB */
+/********************************************************************
+With this function you can map parts of physical memory allocated with
+the ..._allocate_physical_mem to the virtual address space allocated with
+the previous function. Intel implements this so that the process page
+tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP
+showed that this takes < 1 microsecond, much better than the estimated 80 us
+for copying a 16 kB page memory to memory. But, the operation will at least
+partially invalidate the translation lookaside buffer (TLB) of all
+processors. Under a real-world load the performance hit may be bigger. */
+
+ibool
+os_awe_map_physical_mem_to_window(
+/*==============================*/
+ /* out: TRUE if success; the function
+ calls exit(1) in case of an error */
+ byte* ptr, /* in: a page-aligned pointer to
+ somewhere in the virtual address
+ space window; we map the physical mem
+ pages here */
+ ulint n_mem_pages, /* in: number of 4 kB mem pages to
+ map */
+ os_awe_t* page_info); /* in: array of page infos for those
+ pages; each page has one slot in the
+ array */
/********************************************************************
Converts the current process id to a number. It is not guaranteed that the
number is unique. In Linux returns the 'process number' of the current
diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h
index ad6f71f7a3a..bc0960ae023 100644
--- a/innobase/include/srv0srv.h
+++ b/innobase/include/srv0srv.h
@@ -61,6 +61,7 @@ extern ulint srv_flush_log_at_trx_commit;
extern byte srv_latin1_ordering[256];/* The sort order table of the latin1
character set */
extern ulint srv_pool_size;
+extern ulint srv_awe_window_size;
extern ulint srv_mem_pool_size;
extern ulint srv_lock_table_size;
@@ -86,6 +87,8 @@ extern ibool srv_use_doublewrite_buf;
extern ibool srv_set_thread_priorities;
extern int srv_query_thread_priority;
+extern ibool srv_use_awe;
+extern ibool srv_use_adaptive_hash_indexes;
/*-------------------------------------------*/
extern ulint srv_n_rows_inserted;