summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2017-02-21 16:52:41 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2017-02-22 12:21:44 +0200
commit81695ab8b53013c302b50e015836d38146436cba (patch)
treeb84d73ed2125a46660a090217794d12d0c573efe
parent6dc00f97b7db13975a26fd92ffd08aaa60339b87 (diff)
downloadmariadb-git-81695ab8b53013c302b50e015836d38146436cba.tar.gz
MDEV-11520 Extending an InnoDB data file unnecessarily allocates
a large memory buffer on Windows fil_extend_space_to_desired_size(), os_file_set_size(): Use calloc() for memory allocation, and handle failures. Properly check the return status of posix_fallocate(), and pass the correct arguments to posix_fallocate(). On Windows, instead of extending the file by at most 1 megabyte at a time, write a zero-filled page at the end of the file. According to the Microsoft blog post https://blogs.msdn.microsoft.com/oldnewthing/20110922-00/?p=9573 this will physically extend the file by writing zero bytes. (InnoDB never uses DeviceIoControl() to set the file sparse.) I tested that the file extension works properly with a multi-file system tablespace, both with --innodb-use-fallocate and --skip-innodb-use-fallocate (the default): ./mtr \ --mysqld=--innodb-use-fallocate \ --mysqld=--innodb-autoextend-increment=1 \ --mysqld=--innodb-data-file-path='ibdata1:5M;ibdata2:5M:autoextend' \ --parallel=auto --force --retry=0 --suite=innodb & ls -lsh mysql-test/var/*/mysqld.1/data/ibdata2 (several samples while running the test)
-rw-r--r--storage/innobase/fil/fil0fil.cc99
-rw-r--r--storage/innobase/os/os0file.cc79
-rw-r--r--storage/xtradb/fil/fil0fil.cc97
-rw-r--r--storage/xtradb/os/os0file.cc79
4 files changed, 153 insertions, 201 deletions
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 9d471f9dbd3..874997cb005 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -4953,15 +4954,12 @@ fil_extend_space_to_desired_size(
byte* buf;
ulint buf_size;
ulint start_page_no;
- ulint file_start_page_no;
ulint page_size;
- ulint pages_added;
ibool success;
ut_ad(!srv_read_only_mode);
retry:
- pages_added = 0;
success = TRUE;
fil_mutex_enter_and_prepare_for_io(space_id);
@@ -5015,29 +5013,29 @@ retry:
mutex_exit(&fil_system->mutex);
start_page_no = space->size;
- file_start_page_no = space->size - node->size;
-
+ const ulint file_start_page_no = space->size - node->size;
#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
- os_offset_t start_offset = start_page_no * page_size;
- os_offset_t n_pages = (size_after_extend - start_page_no);
- os_offset_t len = n_pages * page_size;
-
- if (posix_fallocate(node->handle, start_offset, len) == -1) {
- ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file "
- "space for file \'%s\' failed. Current size "
- INT64PF ", desired size " INT64PF "\n",
- node->name, start_offset, len+start_offset);
- os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE);
- success = FALSE;
- } else {
- success = TRUE;
+ os_offset_t start_offset
+ = (start_page_no - file_start_page_no) * page_size;
+ ulint n_pages = size_after_extend - start_page_no;
+ os_offset_t len = os_offset_t(n_pages) * page_size;
+
+ int err = posix_fallocate(node->handle, start_offset, len);
+ success = !err;
+ if (!success) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "extending file %s"
+ " from " INT64PF " to " INT64PF " bytes"
+ " failed with error %d",
+ node->name, start_offset, len + start_offset,
+ err);
}
DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
- success = FALSE; errno = 28; os_has_said_disk_full = TRUE;);
+ success = FALSE; os_has_said_disk_full = TRUE;);
mutex_enter(&fil_system->mutex);
+
if (success) {
node->size += n_pages;
space->size += n_pages;
@@ -5054,14 +5052,24 @@ retry:
}
#endif
+#ifdef _WIN32
+ /* Write 1 page of zeroes at the desired end. */
+ start_page_no = size_after_extend - 1;
+ buf_size = page_size;
+#else
/* Extend at most 64 pages at a time */
buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
- buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size));
+#endif
+ buf2 = static_cast<byte*>(calloc(1, buf_size + page_size));
+ if (!buf2) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "Cannot allocate " ULINTPF
+ " bytes to extend file",
+ buf_size + page_size);
+ success = FALSE;
+ }
buf = static_cast<byte*>(ut_align(buf2, page_size));
- memset(buf, 0, buf_size);
-
- while (start_page_no < size_after_extend) {
+ while (success && start_page_no < size_after_extend) {
ulint n_pages
= ut_min(buf_size / page_size,
size_after_extend - start_page_no);
@@ -5070,56 +5078,47 @@ retry:
= ((os_offset_t) (start_page_no - file_start_page_no))
* page_size;
- const char* name = node->name == NULL ? space->name : node->name;
-
#ifdef UNIV_HOTBACKUP
- success = os_file_write(name, node->handle, buf,
+ success = os_file_write(node->name, node->handle, buf,
offset, page_size * n_pages);
#else
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
- name, node->handle, buf,
+ node->name, node->handle, buf,
offset, page_size * n_pages,
NULL, NULL);
#endif /* UNIV_HOTBACKUP */
-
DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
- success = FALSE; errno = 28; os_has_said_disk_full = TRUE;);
-
- if (success) {
- os_has_said_disk_full = FALSE;
- } else {
- /* Let us measure the size of the file to determine
- how much we were able to extend it */
- os_offset_t size;
-
- size = os_file_get_size(node->handle);
- ut_a(size != (os_offset_t) -1);
+ success = FALSE; os_has_said_disk_full = TRUE;);
- n_pages = ((ulint) (size / page_size))
- - node->size - pages_added;
+ /* Let us measure the size of the file to determine
+ how much we were able to extend it */
+ os_offset_t size = os_file_get_size(node->handle);
+ ut_a(size != (os_offset_t) -1);
- pages_added += n_pages;
- break;
- }
-
- start_page_no += n_pages;
- pages_added += n_pages;
+ start_page_no = (ulint) (size / page_size)
+ + file_start_page_no;
}
- mem_free(buf2);
+ free(buf2);
mutex_enter(&fil_system->mutex);
ut_a(node->being_extended);
+ ut_a(start_page_no - file_start_page_no >= node->size);
- space->size += pages_added;
- node->size += pages_added;
+ if (buf) {
+ ulint file_size = start_page_no - file_start_page_no;
+ space->size += file_size - node->size;
+ node->size = file_size;
+ }
fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
/* At this point file has been extended */
+#ifdef HAVE_POSIX_FALLOCATE
file_extended:
+#endif /* HAVE_POSIX_FALLOCATE */
node->being_extended = FALSE;
*actual_size = space->size;
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 109865e8e30..9fdd52bf736 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -2,7 +2,7 @@
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
-Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
+Copyright (c) 2012, 2017, MariaDB Corporation. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
@@ -2123,48 +2123,47 @@ os_file_set_size(
os_file_t file, /*!< in: handle to a file */
os_offset_t size) /*!< in: file size */
{
- os_offset_t current_size;
ibool ret;
byte* buf;
byte* buf2;
ulint buf_size;
- current_size = 0;
-
#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
-
- if (posix_fallocate(file, current_size, size) == -1) {
-
- fprintf(stderr, "InnoDB: Error: preallocating file "
- "space for file \'%s\' failed. Current size "
- "%lu, desired size %lu\n",
- name, (long unsigned) current_size, (long unsigned) size);
- os_file_handle_error_no_exit(name, "posix_fallocate", FALSE);
- return(FALSE);
+ int err = posix_fallocate(file, 0, size);
+ if (err) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "preallocating " INT64PF " bytes for"
+ "file %s failed with error %d",
+ size, name, err);
}
- return(TRUE);
+ return(!err);
}
#endif
-
+#ifdef _WIN32
+ /* Write 1 page of zeroes at the desired end. */
+ buf_size = UNIV_PAGE_SIZE;
+ os_offset_t current_size = size - buf_size;
+#else
/* Write up to 1 megabyte at a time. */
buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE))
* UNIV_PAGE_SIZE;
- buf2 = static_cast<byte*>(ut_malloc(buf_size + UNIV_PAGE_SIZE));
+ os_offset_t current_size = 0;
+#endif
+ buf2 = static_cast<byte*>(calloc(1, buf_size + UNIV_PAGE_SIZE));
+
+ if (!buf2) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot allocate " ULINTPF " bytes to extend file\n",
+ buf_size + UNIV_PAGE_SIZE);
+ return(FALSE);
+ }
/* Align the buffer for possible raw i/o */
buf = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
- /* Write buffer full of zeros */
- memset(buf, 0, buf_size);
-
- if (size >= (os_offset_t) 100 << 20) {
-
- fprintf(stderr, "InnoDB: Progress in MB:");
- }
-
- while (current_size < size) {
+ do {
ulint n_bytes;
if (size - current_size < (os_offset_t) buf_size) {
@@ -2175,37 +2174,15 @@ os_file_set_size(
ret = os_file_write(name, file, buf, current_size, n_bytes);
if (!ret) {
- ut_free(buf2);
- goto error_handling;
- }
-
- /* Print about progress for each 100 MB written */
- if ((current_size + n_bytes) / (100 << 20)
- != current_size / (100 << 20)) {
-
- fprintf(stderr, " %lu00",
- (ulong) ((current_size + n_bytes)
- / (100 << 20)));
+ break;
}
current_size += n_bytes;
- }
+ } while (current_size < size);
- if (size >= (os_offset_t) 100 << 20) {
-
- fprintf(stderr, "\n");
- }
+ free(buf2);
- ut_free(buf2);
-
- ret = os_file_flush(file);
-
- if (ret) {
- return(TRUE);
- }
-
-error_handling:
- return(FALSE);
+ return(ret && os_file_flush(file));
}
/***********************************************************************//**
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
index a7b0377d2a4..de6eef8f1d0 100644
--- a/storage/xtradb/fil/fil0fil.cc
+++ b/storage/xtradb/fil/fil0fil.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -4993,15 +4994,12 @@ fil_extend_space_to_desired_size(
byte* buf;
ulint buf_size;
ulint start_page_no;
- ulint file_start_page_no;
ulint page_size;
- ulint pages_added;
ibool success;
ut_ad(!srv_read_only_mode);
retry:
- pages_added = 0;
success = TRUE;
fil_mutex_enter_and_prepare_for_io(space_id);
@@ -5055,27 +5053,26 @@ retry:
mutex_exit(&fil_system->mutex);
start_page_no = space->size;
- file_start_page_no = space->size - node->size;
-
+ const ulint file_start_page_no = space->size - node->size;
#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
- os_offset_t start_offset = start_page_no * page_size;
- os_offset_t n_pages = (size_after_extend - start_page_no);
- os_offset_t len = n_pages * page_size;
-
- if (posix_fallocate(node->handle, start_offset, len) == -1) {
- ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file "
- "space for file \'%s\' failed. Current size "
- INT64PF ", desired size " INT64PF,
- node->name, start_offset, len+start_offset);
- os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE);
- success = FALSE;
- } else {
- success = TRUE;
+ os_offset_t start_offset
+ = (start_page_no - file_start_page_no) * page_size;
+ ulint n_pages = size_after_extend - start_page_no;
+ os_offset_t len = os_offset_t(n_pages) * page_size;
+
+ int err = posix_fallocate(node->handle, start_offset, len);
+ success = !err;
+ if (!success) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "extending file %s"
+ " from " INT64PF " to " INT64PF " bytes"
+ " failed with error %d",
+ node->name, start_offset, len + start_offset,
+ err);
}
DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
- success = FALSE; errno = 28;os_has_said_disk_full = TRUE;);
+ success = FALSE; os_has_said_disk_full = TRUE;);
mutex_enter(&fil_system->mutex);
@@ -5095,14 +5092,24 @@ retry:
}
#endif
+#ifdef _WIN32
+ /* Write 1 page of zeroes at the desired end. */
+ start_page_no = size_after_extend - 1;
+ buf_size = page_size;
+#else
/* Extend at most 64 pages at a time */
buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
- buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size));
+#endif
+ buf2 = static_cast<byte*>(calloc(1, buf_size + page_size));
+ if (!buf2) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "Cannot allocate " ULINTPF
+ " bytes to extend file",
+ buf_size + page_size);
+ success = FALSE;
+ }
buf = static_cast<byte*>(ut_align(buf2, page_size));
- memset(buf, 0, buf_size);
-
- while (start_page_no < size_after_extend) {
+ while (success && start_page_no < size_after_extend) {
ulint n_pages
= ut_min(buf_size / page_size,
size_after_extend - start_page_no);
@@ -5111,55 +5118,47 @@ retry:
= ((os_offset_t) (start_page_no - file_start_page_no))
* page_size;
- const char* name = node->name == NULL ? space->name : node->name;
-
#ifdef UNIV_HOTBACKUP
- success = os_file_write(name, node->handle, buf,
+ success = os_file_write(node->name, node->handle, buf,
offset, page_size * n_pages);
#else
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
- name, node->handle, buf,
+ node->name, node->handle, buf,
offset, page_size * n_pages,
NULL, NULL, space_id, NULL);
#endif /* UNIV_HOTBACKUP */
DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
- success = FALSE; errno = 28; os_has_said_disk_full = TRUE;);
+ success = FALSE; os_has_said_disk_full = TRUE;);
- if (success) {
- os_has_said_disk_full = FALSE;
- } else {
- /* Let us measure the size of the file to determine
- how much we were able to extend it */
- os_offset_t size;
-
- size = os_file_get_size(node->handle);
- ut_a(size != (os_offset_t) -1);
-
- n_pages = ((ulint) (size / page_size))
- - node->size - pages_added;
+ /* Let us measure the size of the file to determine
+ how much we were able to extend it */
+ os_offset_t size = os_file_get_size(node->handle);
+ ut_a(size != (os_offset_t) -1);
- pages_added += n_pages;
- break;
- }
-
- start_page_no += n_pages;
- pages_added += n_pages;
+ start_page_no = (ulint) (size / page_size)
+ + file_start_page_no;
}
- mem_free(buf2);
+ free(buf2);
mutex_enter(&fil_system->mutex);
ut_a(node->being_extended);
+ ut_a(start_page_no - file_start_page_no >= node->size);
- space->size += pages_added;
- node->size += pages_added;
+ if (buf) {
+ ulint file_size = start_page_no - file_start_page_no;
+ space->size += file_size - node->size;
+ node->size = file_size;
+ }
fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
/* At this point file has been extended */
+#ifdef HAVE_POSIX_FALLOCATE
file_extended:
+#endif /* HAVE_POSIX_FALLOCATE */
node->being_extended = FALSE;
*actual_size = space->size;
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index bf957d24ea7..5e7a0251a00 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -2,7 +2,7 @@
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
-Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
+Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
@@ -2341,48 +2341,47 @@ os_file_set_size(
os_file_t file, /*!< in: handle to a file */
os_offset_t size) /*!< in: file size */
{
- os_offset_t current_size;
ibool ret;
byte* buf;
byte* buf2;
ulint buf_size;
- current_size = 0;
-
#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
-
- if (posix_fallocate(file, current_size, size) == -1) {
-
- ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file "
- "space for file \'%s\' failed. Current size "
- INT64PF ", desired size " INT64PF,
- name, current_size, size);
- os_file_handle_error_no_exit (name, "posix_fallocate",
- FALSE);
- return(FALSE);
+ int err = posix_fallocate(file, 0, size);
+ if (err) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "preallocating " INT64PF " bytes for"
+ "file %s failed with error %d",
+ size, name, err);
}
- return(TRUE);
+ return(!err);
}
#endif
+#ifdef _WIN32
+ /* Write 1 page of zeroes at the desired end. */
+ buf_size = UNIV_PAGE_SIZE;
+ os_offset_t current_size = size - buf_size;
+#else
/* Write up to 1 megabyte at a time. */
buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE))
* UNIV_PAGE_SIZE;
- buf2 = static_cast<byte*>(ut_malloc(buf_size + UNIV_PAGE_SIZE));
+ os_offset_t current_size = 0;
+#endif
+ buf2 = static_cast<byte*>(calloc(1, buf_size + UNIV_PAGE_SIZE));
+
+ if (!buf2) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot allocate " ULINTPF " bytes to extend file\n",
+ buf_size + UNIV_PAGE_SIZE);
+ return(FALSE);
+ }
/* Align the buffer for possible raw i/o */
buf = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
- /* Write buffer full of zeros */
- memset(buf, 0, buf_size);
-
- if (size >= (os_offset_t) 100 << 20) {
-
- fprintf(stderr, "InnoDB: Progress in MB:");
- }
-
- while (current_size < size) {
+ do {
ulint n_bytes;
if (size - current_size < (os_offset_t) buf_size) {
@@ -2393,37 +2392,15 @@ os_file_set_size(
ret = os_file_write(name, file, buf, current_size, n_bytes);
if (!ret) {
- ut_free(buf2);
- goto error_handling;
- }
-
- /* Print about progress for each 100 MB written */
- if ((current_size + n_bytes) / (100 << 20)
- != current_size / (100 << 20)) {
-
- fprintf(stderr, " %lu00",
- (ulong) ((current_size + n_bytes)
- / (100 << 20)));
+ break;
}
current_size += n_bytes;
- }
-
- if (size >= (os_offset_t) 100 << 20) {
+ } while (current_size < size);
- fprintf(stderr, "\n");
- }
-
- ut_free(buf2);
-
- ret = os_file_flush(file);
-
- if (ret) {
- return(TRUE);
- }
+ free(buf2);
-error_handling:
- return(FALSE);
+ return(ret && os_file_flush(file));
}
/***********************************************************************//**