summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <kaa@polly.local>2004-12-14 22:26:31 +0300
committerunknown <kaa@polly.local>2004-12-14 22:26:31 +0300
commit526fbcbbc551495d1fed482fbfa9b4f7496a935c (patch)
tree039af5ac48ea9b322014c4c858874b5452e38d9d
parent8d45c5f9d0ed290bb0bb880488b5ff1d5c2c7619 (diff)
downloadmariadb-git-526fbcbbc551495d1fed482fbfa9b4f7496a935c.tar.gz
Forward port of HugeTLB, InnoDB doublewrite and checksums patches to 5.0
BitKeeper/etc/logging_ok: Logging to logging@openlogging.org accepted
-rw-r--r--BitKeeper/etc/logging_ok1
-rw-r--r--configure.in18
-rw-r--r--include/my_sys.h15
-rw-r--r--innobase/buf/buf0buf.c56
-rw-r--r--innobase/buf/buf0flu.c6
-rw-r--r--innobase/include/buf0buf.h2
-rw-r--r--innobase/include/os0proc.h28
-rw-r--r--innobase/include/srv0srv.h1
-rw-r--r--innobase/os/os0proc.c81
-rw-r--r--innobase/srv/srv0srv.c1
-rw-r--r--innobase/trx/trx0sys.c19
-rw-r--r--mysys/Makefile.am2
-rw-r--r--mysys/mf_keycache.c10
-rw-r--r--mysys/my_largepage.c167
-rw-r--r--mysys/my_static.c6
-rw-r--r--sql/ha_innodb.cc10
-rw-r--r--sql/ha_innodb.h4
-rw-r--r--sql/mysql_priv.h2
-rw-r--r--sql/mysqld.cc35
-rw-r--r--sql/set_var.cc4
20 files changed, 435 insertions, 33 deletions
diff --git a/BitKeeper/etc/logging_ok b/BitKeeper/etc/logging_ok
index 9f58011490e..107e4cadacf 100644
--- a/BitKeeper/etc/logging_ok
+++ b/BitKeeper/etc/logging_ok
@@ -96,6 +96,7 @@ joerg@mysql.com
joreland@mysql.com
jorge@linux.jorge.mysql.com
jplindst@t41.(none)
+kaa@polly.local
kaj@work.mysql.com
kent@mysql.com
konstantin@mysql.com
diff --git a/configure.in b/configure.in
index 7a334dee7c1..786a5429064 100644
--- a/configure.in
+++ b/configure.in
@@ -748,7 +748,7 @@ AC_CHECK_HEADERS(fcntl.h float.h floatingpoint.h ieeefp.h limits.h \
strings.h string.h synch.h sys/mman.h sys/socket.h netinet/in.h arpa/inet.h \
sys/timeb.h sys/types.h sys/un.h sys/vadvise.h sys/wait.h term.h \
unistd.h utime.h sys/utime.h termio.h termios.h sched.h crypt.h alloca.h \
- sys/ioctl.h malloc.h sys/malloc.h linux/config.h)
+ sys/ioctl.h malloc.h sys/malloc.h sys/ipc.h sys/shm.h linux/config.h)
#--------------------------------------------------------------------
# Check for system libraries. Adds the library to $LIBS
@@ -775,6 +775,22 @@ AC_CHECK_FUNC(crypt, AC_DEFINE([HAVE_CRYPT], [1], [crypt]))
AC_CHECK_FUNC(sem_init, , AC_CHECK_LIB(posix4, sem_init))
MYSQL_CHECK_ZLIB_WITH_COMPRESS
+# For large pages support
+if test "$IS_LINUX" = "true"
+then
+ # For SHM_HUGETLB on Linux
+ AC_CHECK_DECLS(SHM_HUGETLB,
+ AC_DEFINE([HAVE_LARGE_PAGES], [1],
+ [Define if you have large pages support])
+ AC_DEFINE([HUGETLB_USE_PROC_MEMINFO], [1],
+ [Define if /proc/meminfo shows the huge page size (Linux only)])
+ , ,
+ [
+#include <sys/shm.h>
+ ]
+ )
+fi
+
#--------------------------------------------------------------------
# Check for TCP wrapper support
#--------------------------------------------------------------------
diff --git a/include/my_sys.h b/include/my_sys.h
index 3de3ec9687c..e630c9bdbba 100644
--- a/include/my_sys.h
+++ b/include/my_sys.h
@@ -168,6 +168,16 @@ extern char *my_strdup_with_length(const byte *from, uint length,
#define TRASH(A,B) /* nothing */
#endif
+#ifdef HAVE_LARGE_PAGES
+extern uint my_get_large_page_size(void);
+extern gptr my_large_malloc(uint size, myf my_flags);
+extern void my_large_free(gptr ptr, myf my_flags);
+#else
+#define my_get_large_page_size() (0)
+#define my_large_malloc(A,B) my_malloc_lock((A),(B))
+#define my_large_free(A,B) my_free_lock((A),(B))
+#endif /* HAVE_LARGE_PAGES */
+
#ifdef HAVE_ALLOCA
#if defined(_AIX) && !defined(__GNUC__) && !defined(_AIX43)
#pragma alloca
@@ -213,6 +223,11 @@ extern int (*fatal_error_handler_hook)(uint my_err, const char *str,
myf MyFlags);
extern uint my_file_limit;
+#ifdef HAVE_LARGE_PAGES
+extern my_bool my_use_large_pages;
+extern uint my_large_page_size;
+#endif
+
/* charsets */
extern CHARSET_INFO *default_charset_info;
extern CHARSET_INFO *all_charsets[256];
diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c
index 3930ea93889..89f851709db 100644
--- a/innobase/buf/buf0buf.c
+++ b/innobase/buf/buf0buf.c
@@ -331,33 +331,43 @@ buf_page_is_corrupted(
}
}
#endif
- old_checksum = buf_calc_page_old_checksum(read_buf);
-
- old_checksum_field = mach_read_from_4(read_buf + UNIV_PAGE_SIZE
+
+ /* If we use checksums validation, make additional check before returning
+ TRUE to ensure that the checksum is not equal to BUF_NO_CHECKSUM_MAGIC which
+ might be stored by InnoDB with checksums disabled.
+ Otherwise, skip checksum calculation and return FALSE */
+
+ if (srv_use_checksums) {
+ old_checksum = buf_calc_page_old_checksum(read_buf);
+
+ old_checksum_field = mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN_OLD_CHKSUM);
- /* There are 2 valid formulas for old_checksum_field:
- 1. Very old versions of InnoDB only stored 8 byte lsn to the start
- and the end of the page.
- 2. Newer InnoDB versions store the old formula checksum there. */
+ /* There are 2 valid formulas for old_checksum_field:
+ 1. Very old versions of InnoDB only stored 8 byte lsn to the start
+ and the end of the page.
+ 2. Newer InnoDB versions store the old formula checksum there. */
- if (old_checksum_field != mach_read_from_4(read_buf + FIL_PAGE_LSN)
- && old_checksum_field != old_checksum) {
+ if (old_checksum_field != mach_read_from_4(read_buf + FIL_PAGE_LSN)
+ && old_checksum_field != old_checksum
+ && old_checksum_field != BUF_NO_CHECKSUM_MAGIC) {
- return(TRUE);
- }
+ return(TRUE);
+ }
- checksum = buf_calc_page_new_checksum(read_buf);
- checksum_field = mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM);
+ checksum = buf_calc_page_new_checksum(read_buf);
+ checksum_field = mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM);
- /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
- (always equal to 0), to FIL_PAGE_SPACE_SPACE_OR_CHKSUM */
+ /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
+ (always equal to 0), to FIL_PAGE_SPACE_SPACE_OR_CHKSUM */
- if (checksum_field != 0 && checksum_field != checksum) {
-
- return(TRUE);
- }
+ if (checksum_field != 0 && checksum_field != checksum
+ && checksum_field != BUF_NO_CHECKSUM_MAGIC) {
+ return(TRUE);
+ }
+ }
+
return(FALSE);
}
@@ -379,8 +389,10 @@ buf_page_print(
ut_print_buf(stderr, read_buf, UNIV_PAGE_SIZE);
fputs("InnoDB: End of page dump\n", stderr);
- checksum = buf_calc_page_new_checksum(read_buf);
- old_checksum = buf_calc_page_old_checksum(read_buf);
+ checksum = srv_use_checksums ?
+ buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
+ old_checksum = srv_use_checksums ?
+ buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
ut_print_timestamp(stderr);
fprintf(stderr,
@@ -548,7 +560,7 @@ buf_pool_init(
}
/*----------------------------------------*/
} else {
- buf_pool->frame_mem = ut_malloc_low(
+ buf_pool->frame_mem = os_mem_alloc_large(
UNIV_PAGE_SIZE * (n_frames + 1),
TRUE, FALSE);
}
diff --git a/innobase/buf/buf0flu.c b/innobase/buf/buf0flu.c
index aff4fe92a71..a0ca614d9b3 100644
--- a/innobase/buf/buf0flu.c
+++ b/innobase/buf/buf0flu.c
@@ -448,7 +448,8 @@ buf_flush_init_for_writing(
/* Store the new formula checksum */
mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
- buf_calc_page_new_checksum(page));
+ srv_use_checksums ?
+ buf_calc_page_new_checksum(page) : BUF_NO_CHECKSUM_MAGIC);
/* We overwrite the first 4 bytes of the end lsn field to store
the old formula checksum. Since it depends also on the field
@@ -456,7 +457,8 @@ buf_flush_init_for_writing(
new formula checksum. */
mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
- buf_calc_page_old_checksum(page));
+ srv_use_checksums ?
+ buf_calc_page_old_checksum(page) : BUF_NO_CHECKSUM_MAGIC);
}
/************************************************************************
diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h
index 8df1211327f..5ee323f1b1e 100644
--- a/innobase/include/buf0buf.h
+++ b/innobase/include/buf0buf.h
@@ -52,6 +52,8 @@ Created 11/5/1995 Heikki Tuuri
/* Modes for buf_page_get_known_nowait */
#define BUF_MAKE_YOUNG 51
#define BUF_KEEP_OLD 52
+/* Magic value to use instead of checksums when they are disabled */
+#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
extern buf_pool_t* buf_pool; /* The buffer pool of the database */
extern ibool buf_debug_prints;/* If this is set TRUE, the program
diff --git a/innobase/include/os0proc.h b/innobase/include/os0proc.h
index d0d3cf82e38..b0b72e18675 100644
--- a/innobase/include/os0proc.h
+++ b/innobase/include/os0proc.h
@@ -12,6 +12,11 @@ Created 9/30/1995 Heikki Tuuri
#include "univ.i"
+#ifdef UNIV_LINUX
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#endif
+
typedef void* os_process_t;
typedef unsigned long int os_process_id_t;
@@ -27,6 +32,10 @@ page size of an Intel x86 processor. We cannot use AWE with 2 MB or 4 MB
pages. */
#define OS_AWE_X86_PAGE_SIZE 4096
+extern ibool os_use_large_pages;
+/* Large page size. This may be a boot-time option on some platforms */
+extern ulint os_large_page_size;
+
/********************************************************************
Windows AWE support. Tries to enable the "lock pages in memory" privilege for
the current process so that the current process can allocate memory-locked
@@ -103,6 +112,25 @@ os_mem_alloc_nocache(
/* out: allocated memory */
ulint n); /* in: number of bytes */
/********************************************************************
+Allocates large pages memory. */
+
+void*
+os_mem_alloc_large(
+/*=================*/
+ /* out: allocated memory */
+ ulint n, /* in: number of bytes */
+ ibool set_to_zero, /* in: TRUE if allocated memory should be set
+ to zero if UNIV_SET_MEM_TO_ZERO is defined */
+ ibool assert_on_error); /* in: if TRUE, we crash mysqld if the memory
+ cannot be allocated */
+/********************************************************************
+Frees large pages memory. */
+
+void
+os_mem_free_large(
+/*=================*/
+void *ptr); /* in: number of bytes */
+/********************************************************************
Sets the priority boost for threads released from waiting within the current
process. */
diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h
index d4cc7d8222f..84b7d14ca00 100644
--- a/innobase/include/srv0srv.h
+++ b/innobase/include/srv0srv.h
@@ -107,6 +107,7 @@ extern ibool srv_very_fast_shutdown; /* if this TRUE, do not flush the
extern ibool srv_innodb_status;
extern ibool srv_use_doublewrite_buf;
+extern ibool srv_use_checksums;
extern ibool srv_set_thread_priorities;
extern int srv_query_thread_priority;
diff --git a/innobase/os/os0proc.c b/innobase/os/os0proc.c
index 2f155788420..98254ae1055 100644
--- a/innobase/os/os0proc.c
+++ b/innobase/os/os0proc.c
@@ -69,6 +69,10 @@ byte* os_awe_window;
ulint os_awe_window_size;
#endif
+ibool os_use_large_pages;
+/* Large page size. This may be a boot-time option on some platforms */
+ulint os_large_page_size;
+
/********************************************************************
Windows AWE support. Tries to enable the "lock pages in memory" privilege for
the current process so that the current process can allocate memory-locked
@@ -516,6 +520,83 @@ os_mem_alloc_nocache(
}
/********************************************************************
+Allocates large pages memory. */
+
+void*
+os_mem_alloc_large(
+/*=================*/
+ /* out: allocated memory */
+ ulint n, /* in: number of bytes */
+ ibool set_to_zero, /* in: TRUE if allocated memory should be set
+ to zero if UNIV_SET_MEM_TO_ZERO is defined */
+ ibool assert_on_error) /* in: if TRUE, we crash mysqld if the memory
+ cannot be allocated */
+{
+#ifdef UNIV_LINUX
+ ulint size;
+ int shmid;
+ void *ptr = NULL;
+ struct shmid_ds buf;
+
+ if (!os_use_large_pages || !os_large_page_size) {
+ goto skip;
+ }
+
+ /* Align block size to os_large_page_size */
+ size = ((n - 1) & ~(os_large_page_size - 1)) + os_large_page_size;
+
+ shmid = shmget(IPC_PRIVATE, (size_t)size, SHM_HUGETLB | SHM_R | SHM_W);
+ if (shmid < 0) {
+ fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. "
+ "errno %d\n", n, errno);
+ } else {
+ ptr = shmat(shmid, NULL, 0);
+ if (ptr == (void *)-1) {
+ fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to attach shared memory "
+ "segment, errno %d\n", errno);
+ }
+ /*
+ Remove the shared memory segment so that it will be automatically freed
+ after memory is detached or process exits
+ */
+ shmctl(shmid, IPC_RMID, &buf);
+ }
+
+ if (ptr) {
+ if (set_to_zero) {
+#ifdef UNIV_SET_MEM_TO_ZERO
+ memset(ret, '\0', size);
+#endif
+ }
+
+ return(ptr);
+ }
+
+ fprintf(stderr, "InnoDB HugeTLB: Warning: Using conventional memory pool\n");
+#endif
+skip:
+
+ return(ut_malloc_low(n, set_to_zero, assert_on_error));
+}
+
+/********************************************************************
+Frees large pages memory. */
+
+void
+os_mem_free_large(
+/*=================*/
+ void *ptr) /* in: number of bytes */
+{
+#ifdef UNIV_LINUX
+ if (os_use_large_pages && os_large_page_size && !shmdt(ptr)) {
+ return;
+ }
+#endif
+
+ ut_free(ptr);
+}
+
+/********************************************************************
Sets the priority boost for threads released from waiting within the current
process. */
diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c
index 40befae424e..83d4fb4d39d 100644
--- a/innobase/srv/srv0srv.c
+++ b/innobase/srv/srv0srv.c
@@ -313,6 +313,7 @@ ibool srv_very_fast_shutdown = FALSE; /* if this TRUE, do not flush the
ibool srv_innodb_status = FALSE;
ibool srv_use_doublewrite_buf = TRUE;
+ibool srv_use_checksums = TRUE;
ibool srv_set_thread_priorities = TRUE;
int srv_query_thread_priority = 0;
diff --git a/innobase/trx/trx0sys.c b/innobase/trx/trx0sys.c
index 35e18064329..57166e98f45 100644
--- a/innobase/trx/trx0sys.c
+++ b/innobase/trx/trx0sys.c
@@ -125,6 +125,22 @@ trx_doublewrite_init(
}
/********************************************************************
+Frees the doublewrite buffer. */
+static
+void
+trx_doublewrite_free(void)
+/*======================*/
+{
+ mutex_free(&(trx_doublewrite->mutex));
+
+ mem_free(trx_doublewrite->buf_block_arr);
+ ut_free(trx_doublewrite->write_buf_unaligned);
+
+ mem_free(trx_doublewrite);
+ trx_doublewrite = NULL;
+}
+
+/********************************************************************
Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
multiple tablespace format. */
@@ -512,6 +528,9 @@ trx_sys_doublewrite_init_or_restore_pages(
fil_flush_file_spaces(FIL_TABLESPACE);
+ if (!srv_use_doublewrite_buf)
+ trx_doublewrite_free();
+
leave_func:
ut_free(unaligned_read_buf);
}
diff --git a/mysys/Makefile.am b/mysys/Makefile.am
index 6a118df03cc..b0ca1b402ee 100644
--- a/mysys/Makefile.am
+++ b/mysys/Makefile.am
@@ -53,7 +53,7 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \
my_net.c my_semaphore.c my_port.c my_sleep.c \
charset.c charset-def.c my_bitmap.c my_bit.c md5.c \
my_gethostbyname.c rijndael.c my_aes.c sha1.c \
- my_handler.c my_netware.c
+ my_handler.c my_netware.c my_largepage.c
EXTRA_DIST = thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \
thr_mutex.c thr_rwlock.c
libmysys_a_LIBADD = @THREAD_LOBJECTS@
diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c
index 052d6c79ab9..bf7ed7ab6b6 100644
--- a/mysys/mf_keycache.c
+++ b/mysys/mf_keycache.c
@@ -341,8 +341,8 @@ int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
blocks--;
/* Allocate memory for cache page buffers */
if ((keycache->block_mem=
- my_malloc_lock((ulong) blocks * keycache->key_cache_block_size,
- MYF(0))))
+ my_large_malloc((ulong) blocks * keycache->key_cache_block_size,
+ MYF(MY_WME))))
{
/*
Allocate memory for blocks, hash_links and hash entries;
@@ -351,7 +351,7 @@ int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
if ((keycache->block_root= (BLOCK_LINK*) my_malloc((uint) length,
MYF(0))))
break;
- my_free_lock(keycache->block_mem, MYF(0));
+ my_large_free(keycache->block_mem, MYF(0));
}
if (blocks < 8)
{
@@ -421,7 +421,7 @@ err:
keycache->blocks= 0;
if (keycache->block_mem)
{
- my_free_lock((gptr) keycache->block_mem, MYF(0));
+ my_large_free((gptr) keycache->block_mem, MYF(0));
keycache->block_mem= NULL;
}
if (keycache->block_root)
@@ -605,7 +605,7 @@ void end_key_cache(KEY_CACHE *keycache, my_bool cleanup)
{
if (keycache->block_mem)
{
- my_free_lock((gptr) keycache->block_mem, MYF(0));
+ my_large_free((gptr) keycache->block_mem, MYF(0));
keycache->block_mem= NULL;
my_free((gptr) keycache->block_root, MYF(0));
keycache->block_root= NULL;
diff --git a/mysys/my_largepage.c b/mysys/my_largepage.c
new file mode 100644
index 00000000000..0639c360b46
--- /dev/null
+++ b/mysys/my_largepage.c
@@ -0,0 +1,167 @@
+/* Copyright (C) 2004 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "mysys_priv.h"
+
+#ifdef HAVE_LARGE_PAGES
+
+#ifdef HAVE_SYS_IPC_H
+#include <sys/ipc.h>
+#endif
+
+#ifdef HAVE_SYS_SHM_H
+#include <sys/shm.h>
+#endif
+
+static uint my_get_large_page_size_int(void);
+static gptr my_large_malloc_int(uint size, myf my_flags);
+static my_bool my_large_free_int(gptr ptr, myf my_flags);
+
+/* Gets the size of large pages from the OS */
+
+uint my_get_large_page_size(void)
+{
+ uint size;
+ DBUG_ENTER("my_get_large_page_size");
+
+ if (!(size = my_get_large_page_size_int()))
+ fprintf(stderr, "Warning: Failed to determine large page size\n");
+
+ DBUG_RETURN(size);
+}
+
+/*
+ General large pages allocator.
+ Tries to allocate memory from large pages pool and falls back to
+ my_malloc_lock() in case of failure
+*/
+
+gptr my_large_malloc(uint size, myf my_flags)
+{
+ gptr ptr;
+ DBUG_ENTER("my_large_malloc");
+
+ if (my_use_large_pages && my_large_page_size)
+ {
+ if ((ptr = my_large_malloc_int(size, my_flags)) != NULL)
+ DBUG_RETURN(ptr);
+ if (my_flags & MY_WME)
+ fprintf(stderr, "Warning: Using conventional memory pool\n");
+ }
+
+ DBUG_RETURN(my_malloc_lock(size, my_flags));
+}
+
+/*
+ General large pages deallocator.
+ Tries to deallocate memory as if it was from large pages pool and falls back
+ to my_free_lock() in case of failure
+ */
+
+void my_large_free(gptr ptr, myf my_flags __attribute__((unused)))
+{
+ DBUG_ENTER("my_large_free");
+
+ /*
+ my_large_free_int() can only fail if ptr was not allocated with
+ my_large_malloc_int(), i.e. my_malloc_lock() was used so we should free it
+ with my_free_lock()
+ */
+ if (!my_use_large_pages || !my_large_page_size ||
+ !my_large_free_int(ptr, my_flags))
+ my_free_lock(ptr, my_flags);
+
+ DBUG_VOID_RETURN;
+}
+
+#ifdef HUGETLB_USE_PROC_MEMINFO
+/* Linux-specific function to determine the size of large pages */
+
+uint my_get_large_page_size_int(void)
+{
+ FILE *f;
+ uint size = 0;
+ char buf[256];
+ DBUG_ENTER("my_get_large_page_size_int");
+
+ if (!(f = my_fopen("/proc/meminfo", O_RDONLY, MYF(MY_WME))))
+ goto finish;
+
+ while (fgets(buf, sizeof(buf), f))
+ if (sscanf(buf, "Hugepagesize: %u kB", &size))
+ break;
+
+ my_fclose(f, MYF(MY_WME));
+
+finish:
+ DBUG_RETURN(size * 1024);
+}
+#endif /* HUGETLB_USE_PROC_MEMINFO */
+
+#if HAVE_DECL_SHM_HUGETLB
+/* Linux-specific large pages allocator */
+
+gptr my_large_malloc_int(uint size, myf my_flags)
+{
+ int shmid;
+ gptr ptr;
+ struct shmid_ds buf;
+ DBUG_ENTER("my_large_malloc_int");
+
+ /* Align block size to my_large_page_size */
+ size = ((size - 1) & ~(my_large_page_size - 1)) + my_large_page_size;
+
+ shmid = shmget(IPC_PRIVATE, (size_t)size, SHM_HUGETLB | SHM_R | SHM_W);
+ if (shmid < 0)
+ {
+ if (my_flags & MY_WME)
+ fprintf(stderr,
+ "Warning: Failed to allocate %d bytes from HugeTLB memory."
+ " errno %d\n", size, errno);
+
+ DBUG_RETURN(NULL);
+ }
+
+ ptr = shmat(shmid, NULL, 0);
+ if (ptr == (void *)-1)
+ {
+ if (my_flags& MY_WME)
+ fprintf(stderr, "Warning: Failed to attach shared memory segment,"
+ " errno %d\n", errno);
+ shmctl(shmid, IPC_RMID, &buf);
+
+ DBUG_RETURN(NULL);
+ }
+
+ /*
+ Remove the shared memory segment so that it will be automatically freed
+ after memory is detached or process exits
+ */
+ shmctl(shmid, IPC_RMID, &buf);
+
+ DBUG_RETURN(ptr);
+}
+
+/* Linux-specific large pages deallocator */
+
+my_bool my_large_free_int(byte *ptr, myf my_flags __attribute__((unused)))
+{
+ DBUG_ENTER("my_large_free_int");
+ DBUG_RETURN(shmdt(ptr) == 0);
+}
+#endif /* HAVE_DECL_SHM_HUGETLB */
+
+#endif /* HAVE_LARGE_PAGES */
diff --git a/mysys/my_static.c b/mysys/my_static.c
index 5f034555156..57d41676390 100644
--- a/mysys/my_static.c
+++ b/mysys/my_static.c
@@ -61,6 +61,12 @@ const char *soundex_map= "01230120022455012623010202";
USED_MEM* my_once_root_block=0; /* pointer to first block */
uint my_once_extra=ONCE_ALLOC_INIT; /* Memory to alloc / block */
+ /* from my_largepage.c */
+#ifdef HAVE_LARGE_PAGES
+my_bool my_use_large_pages= 0;
+uint my_large_page_size= 0;
+#endif
+
/* from my_tempnam */
#if !defined(HAVE_TEMPNAM) || defined(HPUX11)
int _my_tempnam_used=0;
diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc
index 0c7fc23ee49..a2859384454 100644
--- a/sql/ha_innodb.cc
+++ b/sql/ha_innodb.cc
@@ -88,6 +88,7 @@ extern "C" {
uint innobase_init_flags = 0;
ulong innobase_cache_size = 0;
+ulong innobase_large_page_size = 0;
/* The default values for the following, type long, start-up parameters
are declared in mysqld.cc: */
@@ -116,6 +117,9 @@ values */
uint innobase_flush_log_at_trx_commit = 1;
my_bool innobase_log_archive = FALSE;/* unused */
+my_bool innobase_use_doublewrite = TRUE;
+my_bool innobase_use_checksums = TRUE;
+my_bool innobase_use_large_pages = FALSE;
my_bool innobase_use_native_aio = FALSE;
my_bool innobase_fast_shutdown = TRUE;
my_bool innobase_very_fast_shutdown = FALSE; /* this can be set to
@@ -1123,6 +1127,12 @@ innobase_init(void)
srv_fast_shutdown = (ibool) innobase_fast_shutdown;
+ srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
+ srv_use_checksums = (ibool) innobase_use_checksums;
+
+ os_use_large_pages = (ibool) innobase_use_large_pages;
+ os_large_page_size = (ulint) innobase_large_page_size;
+
srv_file_per_table = (ibool) innobase_file_per_table;
srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h
index b3b8d1a29e8..bc4e3db7467 100644
--- a/sql/ha_innodb.h
+++ b/sql/ha_innodb.h
@@ -181,6 +181,7 @@ extern struct show_var_st innodb_status_variables[];
extern uint innobase_init_flags, innobase_lock_type;
extern uint innobase_flush_log_at_trx_commit;
extern ulong innobase_cache_size;
+extern ulong innobase_large_page_size;
extern char *innobase_home, *innobase_tmpdir, *innobase_logdir;
extern long innobase_lock_scan_time;
extern long innobase_mirrored_log_groups, innobase_log_files_in_group;
@@ -195,6 +196,9 @@ extern char *innobase_log_group_home_dir, *innobase_log_arch_dir;
extern char *innobase_unix_file_flush_method;
/* The following variables have to be my_bool for SHOW VARIABLES to work */
extern my_bool innobase_log_archive,
+ innobase_use_doublewrite,
+ innobase_use_checksums,
+ innobase_use_large_pages,
innobase_use_native_aio, innobase_fast_shutdown,
innobase_file_per_table, innobase_locks_unsafe_for_binlog,
innobase_create_status_file;
diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h
index 91d15dc1125..e277cedb336 100644
--- a/sql/mysql_priv.h
+++ b/sql/mysql_priv.h
@@ -1027,6 +1027,8 @@ extern uint opt_crash_binlog_innodb;
extern char *shared_memory_base_name, *mysqld_unix_port;
extern bool opt_enable_shared_memory;
extern char *default_tz_name;
+extern my_bool opt_large_pages;
+extern uint opt_large_page_size;
extern MYSQL_LOG mysql_log,mysql_slow_log,mysql_bin_log;
extern FILE *bootstrap_file;
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 42ae6982eb0..85b4c41be04 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -299,6 +299,8 @@ my_bool opt_short_log_format= 0;
my_bool opt_log_queries_not_using_indexes= 0;
my_bool lower_case_file_system= 0;
my_bool opt_innodb_safe_binlog= 0;
+my_bool opt_large_pages= 0;
+uint opt_large_page_size= 0;
volatile bool mqh_used = 0;
uint mysqld_port, test_flags, select_errors, dropping_tables, ha_open_options;
@@ -2423,6 +2425,19 @@ static int init_common_variables(const char *conf_file_name, int argc,
DBUG_PRINT("info",("%s Ver %s for %s on %s\n",my_progname,
server_version, SYSTEM_TYPE,MACHINE_TYPE));
+#ifdef HAVE_LARGE_PAGES
+ /* Initialize large page size */
+ if (opt_large_pages && (opt_large_page_size= my_get_large_page_size()))
+ {
+ my_use_large_pages= 1;
+ my_large_page_size= opt_large_page_size;
+#ifdef HAVE_INNOBASE_DB
+ innobase_use_large_pages= 1;
+ innobase_large_page_size= opt_large_page_size;
+#endif
+ }
+#endif /* HAVE_LARGE_PAGES */
+
/* connections and databases needs lots of files */
{
uint files, wanted_files;
@@ -4086,6 +4101,8 @@ enum options_mysqld
OPT_INNODB_LOG_ARCHIVE,
OPT_INNODB_FLUSH_LOG_AT_TRX_COMMIT,
OPT_INNODB_FLUSH_METHOD,
+ OPT_INNODB_DOUBLEWRITE,
+ OPT_INNODB_CHECKSUMS,
OPT_INNODB_FAST_SHUTDOWN,
OPT_INNODB_FILE_PER_TABLE, OPT_CRASH_BINLOG_INNODB,
OPT_INNODB_LOCKS_UNSAFE_FOR_BINLOG,
@@ -4184,7 +4201,8 @@ enum options_mysqld
OPT_OPTIMIZER_SEARCH_DEPTH,
OPT_OPTIMIZER_PRUNE_LEVEL,
OPT_UPDATABLE_VIEWS_WITH_LIMIT,
- OPT_AUTO_INCREMENT, OPT_AUTO_INCREMENT_OFFSET
+ OPT_AUTO_INCREMENT, OPT_AUTO_INCREMENT_OFFSET,
+ OPT_ENABLE_LARGE_PAGES
};
@@ -4343,6 +4361,12 @@ Disable with --skip-bdb (will save memory).",
"Set up signals usable for debugging",
(gptr*) &opt_debugging, (gptr*) &opt_debugging,
0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+#ifdef HAVE_LARGE_PAGES
+ {"large-pages", OPT_ENABLE_LARGE_PAGES, "Enable support for large pages. \
+Disable with --skip-large-pages.",
+ (gptr*) &opt_large_pages, (gptr*) &opt_large_pages, 0, GET_BOOL, NO_ARG, 0, 0, 0,
+ 0, 0, 0},
+#endif
{"init-connect", OPT_INIT_CONNECT, "Command(s) that are executed for each new connection",
(gptr*) &opt_init_connect, (gptr*) &opt_init_connect, 0, GET_STR_ALLOC,
REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
@@ -4366,6 +4390,12 @@ Disable with --skip-innodb (will save memory).",
"The common part for InnoDB table spaces.", (gptr*) &innobase_data_home_dir,
(gptr*) &innobase_data_home_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0,
0},
+ {"innodb_doublewrite", OPT_INNODB_DOUBLEWRITE, "Enable InnoDB doublewrite buffer (enabled by default). \
+Disable with --skip-innodb-doublewrite.", (gptr*) &innobase_use_doublewrite,
+ (gptr*) &innobase_use_doublewrite, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0},
+ {"innodb_checksums", OPT_INNODB_CHECKSUMS, "Enable InnoDB checksums validation (enabled by default). \
+Disable with --skip-innodb-checksums.", (gptr*) &innobase_use_checksums,
+ (gptr*) &innobase_use_checksums, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0},
{"innodb_fast_shutdown", OPT_INNODB_FAST_SHUTDOWN,
"Speeds up server shutdown process.", (gptr*) &innobase_fast_shutdown,
(gptr*) &innobase_fast_shutdown, 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
@@ -5687,7 +5717,8 @@ static void mysql_init_variables(void)
mysqld_unix_port= opt_mysql_tmpdir= my_bind_addr_str= NullS;
bzero((gptr) &mysql_tmpdir_list, sizeof(mysql_tmpdir_list));
bzero((char *) &global_status_var, sizeof(global_status_var));
-
+ opt_large_pages= 0;
+
/* Character sets */
system_charset_info= &my_charset_utf8_general_ci;
files_charset_info= &my_charset_utf8_general_ci;
diff --git a/sql/set_var.cc b/sql/set_var.cc
index 234ec6617c3..25935dbc31f 100644
--- a/sql/set_var.cc
+++ b/sql/set_var.cc
@@ -735,6 +735,8 @@ struct show_var_st init_vars[]= {
{"innodb_buffer_pool_size", (char*) &innobase_buffer_pool_size, SHOW_LONG },
{"innodb_data_file_path", (char*) &innobase_data_file_path, SHOW_CHAR_PTR},
{"innodb_data_home_dir", (char*) &innobase_data_home_dir, SHOW_CHAR_PTR},
+ {"innodb_doublewrite", (char*) &innobase_use_doublewrite, SHOW_MY_BOOL},
+ {"innodb_checksums", (char*) &innobase_use_checksums, SHOW_MY_BOOL},
{"innodb_fast_shutdown", (char*) &innobase_fast_shutdown, SHOW_MY_BOOL},
{"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG },
{"innodb_file_per_table", (char*) &innobase_file_per_table, SHOW_MY_BOOL},
@@ -768,6 +770,8 @@ struct show_var_st init_vars[]= {
SHOW_SYS},
{"language", language, SHOW_CHAR},
{"large_files_support", (char*) &opt_large_files, SHOW_BOOL},
+ {"large_pages", (char*) &opt_large_pages, SHOW_MY_BOOL},
+ {"large_page_size", (char*) &opt_large_page_size, SHOW_INT},
{sys_license.name, (char*) &sys_license, SHOW_SYS},
{sys_local_infile.name, (char*) &sys_local_infile, SHOW_SYS},
#ifdef HAVE_MLOCKALL