diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2017-02-21 16:52:41 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2017-02-22 12:21:44 +0200 |
commit | 81695ab8b53013c302b50e015836d38146436cba (patch) | |
tree | b84d73ed2125a46660a090217794d12d0c573efe /storage | |
parent | 6dc00f97b7db13975a26fd92ffd08aaa60339b87 (diff) | |
download | mariadb-git-81695ab8b53013c302b50e015836d38146436cba.tar.gz |
MDEV-11520 Extending an InnoDB data file unnecessarily allocates
a large memory buffer on Windows
fil_extend_space_to_desired_size(), os_file_set_size(): Use calloc()
for memory allocation, and handle failures. Properly check the return
status of posix_fallocate(), and pass the correct arguments to
posix_fallocate().
On Windows, instead of extending the file by at most 1 megabyte at a time,
write a zero-filled page at the end of the file.
According to the Microsoft blog post
https://blogs.msdn.microsoft.com/oldnewthing/20110922-00/?p=9573
this will physically extend the file by writing zero bytes.
(InnoDB never uses DeviceIoControl() to set the file sparse.)
I tested that the file extension works properly with a multi-file
system tablespace, both with --innodb-use-fallocate and
--skip-innodb-use-fallocate (the default):
./mtr \
--mysqld=--innodb-use-fallocate \
--mysqld=--innodb-autoextend-increment=1 \
--mysqld=--innodb-data-file-path='ibdata1:5M;ibdata2:5M:autoextend' \
--parallel=auto --force --retry=0 --suite=innodb &
ls -lsh mysql-test/var/*/mysqld.1/data/ibdata2
(several samples while running the test)
Diffstat (limited to 'storage')
-rw-r--r-- | storage/innobase/fil/fil0fil.cc | 99 | ||||
-rw-r--r-- | storage/innobase/os/os0file.cc | 79 | ||||
-rw-r--r-- | storage/xtradb/fil/fil0fil.cc | 97 | ||||
-rw-r--r-- | storage/xtradb/os/os0file.cc | 79 |
4 files changed, 153 insertions, 201 deletions
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 9d471f9dbd3..874997cb005 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -4953,15 +4954,12 @@ fil_extend_space_to_desired_size( byte* buf; ulint buf_size; ulint start_page_no; - ulint file_start_page_no; ulint page_size; - ulint pages_added; ibool success; ut_ad(!srv_read_only_mode); retry: - pages_added = 0; success = TRUE; fil_mutex_enter_and_prepare_for_io(space_id); @@ -5015,29 +5013,29 @@ retry: mutex_exit(&fil_system->mutex); start_page_no = space->size; - file_start_page_no = space->size - node->size; - + const ulint file_start_page_no = space->size - node->size; #ifdef HAVE_POSIX_FALLOCATE if (srv_use_posix_fallocate) { - os_offset_t start_offset = start_page_no * page_size; - os_offset_t n_pages = (size_after_extend - start_page_no); - os_offset_t len = n_pages * page_size; - - if (posix_fallocate(node->handle, start_offset, len) == -1) { - ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file " - "space for file \'%s\' failed. Current size " - INT64PF ", desired size " INT64PF "\n", - node->name, start_offset, len+start_offset); - os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE); - success = FALSE; - } else { - success = TRUE; + os_offset_t start_offset + = (start_page_no - file_start_page_no) * page_size; + ulint n_pages = size_after_extend - start_page_no; + os_offset_t len = os_offset_t(n_pages) * page_size; + + int err = posix_fallocate(node->handle, start_offset, len); + success = !err; + if (!success) { + ib_logf(IB_LOG_LEVEL_ERROR, "extending file %s" + " from " INT64PF " to " INT64PF " bytes" + " failed with error %d", + node->name, start_offset, len + start_offset, + err); } DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", - success = FALSE; errno = 28; os_has_said_disk_full = TRUE;); + success = FALSE; os_has_said_disk_full = TRUE;); mutex_enter(&fil_system->mutex); + if (success) { node->size += n_pages; space->size += n_pages; @@ -5054,14 +5052,24 @@ retry: } #endif +#ifdef _WIN32 + /* Write 1 page of zeroes at the desired end. */ + start_page_no = size_after_extend - 1; + buf_size = page_size; +#else /* Extend at most 64 pages at a time */ buf_size = ut_min(64, size_after_extend - start_page_no) * page_size; - buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size)); +#endif + buf2 = static_cast<byte*>(calloc(1, buf_size + page_size)); + if (!buf2) { + ib_logf(IB_LOG_LEVEL_ERROR, "Cannot allocate " ULINTPF + " bytes to extend file", + buf_size + page_size); + success = FALSE; + } buf = static_cast<byte*>(ut_align(buf2, page_size)); - memset(buf, 0, buf_size); - - while (start_page_no < size_after_extend) { + while (success && start_page_no < size_after_extend) { ulint n_pages = ut_min(buf_size / page_size, size_after_extend - start_page_no); @@ -5070,56 +5078,47 @@ retry: = ((os_offset_t) (start_page_no - file_start_page_no)) * page_size; - const char* name = node->name == NULL ? space->name : node->name; - #ifdef UNIV_HOTBACKUP - success = os_file_write(name, node->handle, buf, + success = os_file_write(node->name, node->handle, buf, offset, page_size * n_pages); #else success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC, - name, node->handle, buf, + node->name, node->handle, buf, offset, page_size * n_pages, NULL, NULL); #endif /* UNIV_HOTBACKUP */ - DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", - success = FALSE; errno = 28; os_has_said_disk_full = TRUE;); - - if (success) { - os_has_said_disk_full = FALSE; - } else { - /* Let us measure the size of the file to determine - how much we were able to extend it */ - os_offset_t size; - - size = os_file_get_size(node->handle); - ut_a(size != (os_offset_t) -1); + success = FALSE; os_has_said_disk_full = TRUE;); - n_pages = ((ulint) (size / page_size)) - - node->size - pages_added; + /* Let us measure the size of the file to determine + how much we were able to extend it */ + os_offset_t size = os_file_get_size(node->handle); + ut_a(size != (os_offset_t) -1); - pages_added += n_pages; - break; - } - - start_page_no += n_pages; - pages_added += n_pages; + start_page_no = (ulint) (size / page_size) + + file_start_page_no; } - mem_free(buf2); + free(buf2); mutex_enter(&fil_system->mutex); ut_a(node->being_extended); + ut_a(start_page_no - file_start_page_no >= node->size); - space->size += pages_added; - node->size += pages_added; + if (buf) { + ulint file_size = start_page_no - file_start_page_no; + space->size += file_size - node->size; + node->size = file_size; + } fil_node_complete_io(node, fil_system, OS_FILE_WRITE); /* At this point file has been extended */ +#ifdef HAVE_POSIX_FALLOCATE file_extended: +#endif /* HAVE_POSIX_FALLOCATE */ node->being_extended = FALSE; *actual_size = space->size; diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index 109865e8e30..9fdd52bf736 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -2,7 +2,7 @@ Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. -Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. +Copyright (c) 2012, 2017, MariaDB Corporation. All Rights Reserved. Portions of this file contain modifications contributed and copyrighted by Percona Inc.. Those modifications are @@ -2123,48 +2123,47 @@ os_file_set_size( os_file_t file, /*!< in: handle to a file */ os_offset_t size) /*!< in: file size */ { - os_offset_t current_size; ibool ret; byte* buf; byte* buf2; ulint buf_size; - current_size = 0; - #ifdef HAVE_POSIX_FALLOCATE if (srv_use_posix_fallocate) { - - if (posix_fallocate(file, current_size, size) == -1) { - - fprintf(stderr, "InnoDB: Error: preallocating file " - "space for file \'%s\' failed. Current size " - "%lu, desired size %lu\n", - name, (long unsigned) current_size, (long unsigned) size); - os_file_handle_error_no_exit(name, "posix_fallocate", FALSE); - return(FALSE); + int err = posix_fallocate(file, 0, size); + if (err) { + ib_logf(IB_LOG_LEVEL_ERROR, + "preallocating " INT64PF " bytes for" + "file %s failed with error %d", + size, name, err); } - return(TRUE); + return(!err); } #endif - +#ifdef _WIN32 + /* Write 1 page of zeroes at the desired end. */ + buf_size = UNIV_PAGE_SIZE; + os_offset_t current_size = size - buf_size; +#else /* Write up to 1 megabyte at a time. */ buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE)) * UNIV_PAGE_SIZE; - buf2 = static_cast<byte*>(ut_malloc(buf_size + UNIV_PAGE_SIZE)); + os_offset_t current_size = 0; +#endif + buf2 = static_cast<byte*>(calloc(1, buf_size + UNIV_PAGE_SIZE)); + + if (!buf2) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Cannot allocate " ULINTPF " bytes to extend file\n", + buf_size + UNIV_PAGE_SIZE); + return(FALSE); + } /* Align the buffer for possible raw i/o */ buf = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE)); - /* Write buffer full of zeros */ - memset(buf, 0, buf_size); - - if (size >= (os_offset_t) 100 << 20) { - - fprintf(stderr, "InnoDB: Progress in MB:"); - } - - while (current_size < size) { + do { ulint n_bytes; if (size - current_size < (os_offset_t) buf_size) { @@ -2175,37 +2174,15 @@ os_file_set_size( ret = os_file_write(name, file, buf, current_size, n_bytes); if (!ret) { - ut_free(buf2); - goto error_handling; - } - - /* Print about progress for each 100 MB written */ - if ((current_size + n_bytes) / (100 << 20) - != current_size / (100 << 20)) { - - fprintf(stderr, " %lu00", - (ulong) ((current_size + n_bytes) - / (100 << 20))); + break; } current_size += n_bytes; - } + } while (current_size < size); - if (size >= (os_offset_t) 100 << 20) { - - fprintf(stderr, "\n"); - } + free(buf2); - ut_free(buf2); - - ret = os_file_flush(file); - - if (ret) { - return(TRUE); - } - -error_handling: - return(FALSE); + return(ret && os_file_flush(file)); } /***********************************************************************//** diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index a7b0377d2a4..de6eef8f1d0 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -4993,15 +4994,12 @@ fil_extend_space_to_desired_size( byte* buf; ulint buf_size; ulint start_page_no; - ulint file_start_page_no; ulint page_size; - ulint pages_added; ibool success; ut_ad(!srv_read_only_mode); retry: - pages_added = 0; success = TRUE; fil_mutex_enter_and_prepare_for_io(space_id); @@ -5055,27 +5053,26 @@ retry: mutex_exit(&fil_system->mutex); start_page_no = space->size; - file_start_page_no = space->size - node->size; - + const ulint file_start_page_no = space->size - node->size; #ifdef HAVE_POSIX_FALLOCATE if (srv_use_posix_fallocate) { - os_offset_t start_offset = start_page_no * page_size; - os_offset_t n_pages = (size_after_extend - start_page_no); - os_offset_t len = n_pages * page_size; - - if (posix_fallocate(node->handle, start_offset, len) == -1) { - ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file " - "space for file \'%s\' failed. Current size " - INT64PF ", desired size " INT64PF, - node->name, start_offset, len+start_offset); - os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE); - success = FALSE; - } else { - success = TRUE; + os_offset_t start_offset + = (start_page_no - file_start_page_no) * page_size; + ulint n_pages = size_after_extend - start_page_no; + os_offset_t len = os_offset_t(n_pages) * page_size; + + int err = posix_fallocate(node->handle, start_offset, len); + success = !err; + if (!success) { + ib_logf(IB_LOG_LEVEL_ERROR, "extending file %s" + " from " INT64PF " to " INT64PF " bytes" + " failed with error %d", + node->name, start_offset, len + start_offset, + err); } DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", - success = FALSE; errno = 28;os_has_said_disk_full = TRUE;); + success = FALSE; os_has_said_disk_full = TRUE;); mutex_enter(&fil_system->mutex); @@ -5095,14 +5092,24 @@ retry: } #endif +#ifdef _WIN32 + /* Write 1 page of zeroes at the desired end. */ + start_page_no = size_after_extend - 1; + buf_size = page_size; +#else /* Extend at most 64 pages at a time */ buf_size = ut_min(64, size_after_extend - start_page_no) * page_size; - buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size)); +#endif + buf2 = static_cast<byte*>(calloc(1, buf_size + page_size)); + if (!buf2) { + ib_logf(IB_LOG_LEVEL_ERROR, "Cannot allocate " ULINTPF + " bytes to extend file", + buf_size + page_size); + success = FALSE; + } buf = static_cast<byte*>(ut_align(buf2, page_size)); - memset(buf, 0, buf_size); - - while (start_page_no < size_after_extend) { + while (success && start_page_no < size_after_extend) { ulint n_pages = ut_min(buf_size / page_size, size_after_extend - start_page_no); @@ -5111,55 +5118,47 @@ retry: = ((os_offset_t) (start_page_no - file_start_page_no)) * page_size; - const char* name = node->name == NULL ? space->name : node->name; - #ifdef UNIV_HOTBACKUP - success = os_file_write(name, node->handle, buf, + success = os_file_write(node->name, node->handle, buf, offset, page_size * n_pages); #else success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC, - name, node->handle, buf, + node->name, node->handle, buf, offset, page_size * n_pages, NULL, NULL, space_id, NULL); #endif /* UNIV_HOTBACKUP */ DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", - success = FALSE; errno = 28; os_has_said_disk_full = TRUE;); + success = FALSE; os_has_said_disk_full = TRUE;); - if (success) { - os_has_said_disk_full = FALSE; - } else { - /* Let us measure the size of the file to determine - how much we were able to extend it */ - os_offset_t size; - - size = os_file_get_size(node->handle); - ut_a(size != (os_offset_t) -1); - - n_pages = ((ulint) (size / page_size)) - - node->size - pages_added; + /* Let us measure the size of the file to determine + how much we were able to extend it */ + os_offset_t size = os_file_get_size(node->handle); + ut_a(size != (os_offset_t) -1); - pages_added += n_pages; - break; - } - - start_page_no += n_pages; - pages_added += n_pages; + start_page_no = (ulint) (size / page_size) + + file_start_page_no; } - mem_free(buf2); + free(buf2); mutex_enter(&fil_system->mutex); ut_a(node->being_extended); + ut_a(start_page_no - file_start_page_no >= node->size); - space->size += pages_added; - node->size += pages_added; + if (buf) { + ulint file_size = start_page_no - file_start_page_no; + space->size += file_size - node->size; + node->size = file_size; + } fil_node_complete_io(node, fil_system, OS_FILE_WRITE); /* At this point file has been extended */ +#ifdef HAVE_POSIX_FALLOCATE file_extended: +#endif /* HAVE_POSIX_FALLOCATE */ node->being_extended = FALSE; *actual_size = space->size; diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc index bf957d24ea7..5e7a0251a00 100644 --- a/storage/xtradb/os/os0file.cc +++ b/storage/xtradb/os/os0file.cc @@ -2,7 +2,7 @@ Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. -Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. +Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved. Portions of this file contain modifications contributed and copyrighted by Percona Inc.. Those modifications are @@ -2341,48 +2341,47 @@ os_file_set_size( os_file_t file, /*!< in: handle to a file */ os_offset_t size) /*!< in: file size */ { - os_offset_t current_size; ibool ret; byte* buf; byte* buf2; ulint buf_size; - current_size = 0; - #ifdef HAVE_POSIX_FALLOCATE if (srv_use_posix_fallocate) { - - if (posix_fallocate(file, current_size, size) == -1) { - - ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file " - "space for file \'%s\' failed. Current size " - INT64PF ", desired size " INT64PF, - name, current_size, size); - os_file_handle_error_no_exit (name, "posix_fallocate", - FALSE); - return(FALSE); + int err = posix_fallocate(file, 0, size); + if (err) { + ib_logf(IB_LOG_LEVEL_ERROR, + "preallocating " INT64PF " bytes for" + "file %s failed with error %d", + size, name, err); } - return(TRUE); + return(!err); } #endif +#ifdef _WIN32 + /* Write 1 page of zeroes at the desired end. */ + buf_size = UNIV_PAGE_SIZE; + os_offset_t current_size = size - buf_size; +#else /* Write up to 1 megabyte at a time. */ buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE)) * UNIV_PAGE_SIZE; - buf2 = static_cast<byte*>(ut_malloc(buf_size + UNIV_PAGE_SIZE)); + os_offset_t current_size = 0; +#endif + buf2 = static_cast<byte*>(calloc(1, buf_size + UNIV_PAGE_SIZE)); + + if (!buf2) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Cannot allocate " ULINTPF " bytes to extend file\n", + buf_size + UNIV_PAGE_SIZE); + return(FALSE); + } /* Align the buffer for possible raw i/o */ buf = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE)); - /* Write buffer full of zeros */ - memset(buf, 0, buf_size); - - if (size >= (os_offset_t) 100 << 20) { - - fprintf(stderr, "InnoDB: Progress in MB:"); - } - - while (current_size < size) { + do { ulint n_bytes; if (size - current_size < (os_offset_t) buf_size) { @@ -2393,37 +2392,15 @@ os_file_set_size( ret = os_file_write(name, file, buf, current_size, n_bytes); if (!ret) { - ut_free(buf2); - goto error_handling; - } - - /* Print about progress for each 100 MB written */ - if ((current_size + n_bytes) / (100 << 20) - != current_size / (100 << 20)) { - - fprintf(stderr, " %lu00", - (ulong) ((current_size + n_bytes) - / (100 << 20))); + break; } current_size += n_bytes; - } - - if (size >= (os_offset_t) 100 << 20) { + } while (current_size < size); - fprintf(stderr, "\n"); - } - - ut_free(buf2); - - ret = os_file_flush(file); - - if (ret) { - return(TRUE); - } + free(buf2); -error_handling: - return(FALSE); + return(ret && os_file_flush(file)); } /***********************************************************************//** |