diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2017-02-20 17:58:42 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2017-02-21 16:45:03 +0200 |
commit | 978179a9d4933d3d8d2ac99028798e8a07095dd4 (patch) | |
tree | a4f21dbbd812e8c347413f7ead578b1677d08691 | |
parent | 2bfe83adec576a27aed2d87ff65cebddc3430d2e (diff) | |
download | mariadb-git-978179a9d4933d3d8d2ac99028798e8a07095dd4.tar.gz |
MDEV-11520 Extending an InnoDB data file unnecessarily allocates
a large memory buffer on Windows
fil_extend_space_to_desired_size(), os_file_set_size(): Use calloc()
for memory allocation, and handle failures. Properly check the return
status of posix_fallocate().
On Windows, instead of extending the file by at most 1 megabyte at a time,
write a zero-filled page at the end of the file.
According to the Microsoft blog post
https://blogs.msdn.microsoft.com/oldnewthing/20110922-00/?p=9573
this will physically extend the file by writing zero bytes.
(InnoDB never uses DeviceIoControl() to set the file sparse.)
For innodb_plugin, port the XtraDB fix for MySQL Bug#56433
(introducing fil_system->file_extend_mutex). The bug was
fixed differently in MySQL 5.6 (and MariaDB Server 10.0).
-rw-r--r-- | storage/innobase/fil/fil0fil.c | 85 | ||||
-rw-r--r-- | storage/innobase/include/sync0sync.h | 1 | ||||
-rw-r--r-- | storage/innobase/os/os0file.c | 82 | ||||
-rw-r--r-- | storage/xtradb/fil/fil0fil.c | 84 | ||||
-rw-r--r-- | storage/xtradb/os/os0file.c | 76 |
5 files changed, 162 insertions, 166 deletions
diff --git a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c index d7ac3dd14eb..195fa7adde9 100644 --- a/storage/innobase/fil/fil0fil.c +++ b/storage/innobase/fil/fil0fil.c @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -248,6 +249,7 @@ the ib_logfiles form a 'space' and it is handled here */ struct fil_system_struct { #ifndef UNIV_HOTBACKUP mutex_t mutex; /*!< The mutex protecting the cache */ + mutex_t file_extend_mutex; #endif /* !UNIV_HOTBACKUP */ hash_table_t* spaces; /*!< The hash table of spaces in the system; they are hashed on the space @@ -1658,6 +1660,8 @@ fil_init( mutex_create(fil_system_mutex_key, &fil_system->mutex, SYNC_ANY_LATCH); + mutex_create(fil_system_mutex_key, + &fil_system->file_extend_mutex, SYNC_OUTER_ANY_LATCH); fil_system->spaces = hash_create(hash_size); fil_system->name_hash = hash_create(hash_size); @@ -4096,6 +4100,10 @@ fil_extend_space_to_desired_size( ulint page_size; ibool success = TRUE; + /* fil_system->file_extend_mutex is for http://bugs.mysql.com/56433 + to prevent concurrent fil_extend_space_to_desired_size() + while fil_system->mutex is temporarily released */ + mutex_enter(&fil_system->file_extend_mutex); fil_mutex_enter_and_prepare_for_io(space_id); space = fil_space_get_by_id(space_id); @@ -4107,6 +4115,7 @@ fil_extend_space_to_desired_size( *actual_size = space->size; mutex_exit(&fil_system->mutex); + mutex_exit(&fil_system->file_extend_mutex); return(TRUE); } @@ -4123,22 +4132,24 @@ fil_extend_space_to_desired_size( start_page_no = space->size; file_start_page_no = space->size - node->size; + mutex_exit(&fil_system->mutex); + #ifdef HAVE_POSIX_FALLOCATE if (srv_use_posix_fallocate) { ib_int64_t start_offset = start_page_no * page_size; ib_int64_t end_offset = (size_after_extend - start_page_no) * page_size; ib_int64_t desired_size = size_after_extend*page_size; + int err = posix_fallocate( + node->handle, start_offset, end_offset); - mutex_exit(&fil_system->mutex); + success = !err; - if (posix_fallocate(node->handle, start_offset, end_offset) == -1) { - fprintf(stderr, "InnoDB: Error: preallocating file " - "space for file \'%s\' failed. Current size " - " %lld, len %lld, desired size %lld\n", - node->name, start_offset, end_offset, desired_size); - success = FALSE; - } else { - success = TRUE; + if (!success) { + fprintf(stderr, + "InnoDB: Error: extending file %s" + " from %lld to %lld bytes" + " failed with error %d\n", + node->name, start_offset, end_offset, err); } mutex_enter(&fil_system->mutex); @@ -4154,14 +4165,25 @@ fil_extend_space_to_desired_size( } #endif +#ifdef _WIN32 + /* Write 1 page of zeroes at the desired end. */ + start_page_no = size_after_extend - 1; + buf_size = page_size; +#else /* Extend at most 64 pages at a time */ buf_size = ut_min(64, size_after_extend - start_page_no) * page_size; - buf2 = mem_alloc(buf_size + page_size); +#endif + buf2 = calloc(1, buf_size + page_size); + if (!buf2) { + fprintf(stderr, "InnoDB: Cannot allocate " ULINTPF + " bytes to extend file\n", + buf_size + page_size); + mutex_exit(&fil_system->file_extend_mutex); + return(FALSE); + } buf = ut_align(buf2, page_size); - memset(buf, 0, buf_size); - - while (start_page_no < size_after_extend) { + for (;;) { ulint n_pages = ut_min(buf_size / page_size, size_after_extend - start_page_no); @@ -4170,6 +4192,7 @@ fil_extend_space_to_desired_size( offset_low = ((start_page_no - file_start_page_no) % (4096 * ((1024 * 1024) / page_size))) * page_size; + #ifdef UNIV_HOTBACKUP success = os_file_write(node->name, node->handle, buf, offset_low, offset_high, @@ -4181,34 +4204,37 @@ fil_extend_space_to_desired_size( page_size * n_pages, NULL, NULL); #endif - if (success) { - node->size += n_pages; - space->size += n_pages; - os_has_said_disk_full = FALSE; - } else { - /* Let us measure the size of the file to determine - how much we were able to extend it */ + /* Let us measure the size of the file to determine + how much we were able to extend it */ - n_pages = ((ulint) - (os_file_get_size_as_iblonglong( - node->handle) - / page_size)) - node->size; + n_pages = (ulint) (os_file_get_size_as_iblonglong(node->handle) + / page_size); - node->size += n_pages; - space->size += n_pages; + mutex_enter(&fil_system->mutex); + ut_a(n_pages >= node->size); + + start_page_no += n_pages - node->size; + space->size += n_pages - node->size; + node->size = n_pages; + if (success) { + os_has_said_disk_full = FALSE; + } + + if (!success || start_page_no >= size_after_extend) { break; } - start_page_no += n_pages; + mutex_exit(&fil_system->mutex); } - mem_free(buf2); - + free(buf2); fil_node_complete_io(node, fil_system, OS_FILE_WRITE); +#ifdef HAVE_POSIX_FALLOCATE complete_io: +#endif /* HAVE_POSIX_FALLOCATE */ *actual_size = space->size; @@ -4228,6 +4254,7 @@ complete_io: printf("Extended %s to %lu, actual size %lu pages\n", space->name, size_after_extend, *actual_size); */ mutex_exit(&fil_system->mutex); + mutex_exit(&fil_system->file_extend_mutex); fil_flush(space_id); diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h index bc8d0d27be3..f074ca2f189 100644 --- a/storage/innobase/include/sync0sync.h +++ b/storage/innobase/include/sync0sync.h @@ -675,6 +675,7 @@ or row lock! */ #define SYNC_BUF_BLOCK 146 /* Block mutex */ #define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */ #define SYNC_DOUBLEWRITE 140 +#define SYNC_OUTER_ANY_LATCH 136 #define SYNC_ANY_LATCH 135 #define SYNC_MEM_HASH 131 #define SYNC_MEM_POOL 130 diff --git a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c index d792e7a61d8..72b9651f596 100644 --- a/storage/innobase/os/os0file.c +++ b/storage/innobase/os/os0file.c @@ -2,6 +2,7 @@ Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. +Copyright (c) 2012, 2017, MariaDB Corporation. All Rights Reserved. Portions of this file contain modifications contributed and copyrighted by Percona Inc.. Those modifications are @@ -2027,48 +2028,44 @@ os_file_set_size( ut_a(size == (size & 0xFFFFFFFF)); - current_size = 0; desired_size = (ib_int64_t)size + (((ib_int64_t)size_high) << 32); #ifdef HAVE_POSIX_FALLOCATE - if (srv_use_posix_fallocate) { - if (posix_fallocate(file, current_size, desired_size) == -1) { + if (srv_use_posix_fallocate) { + int err = posix_fallocate(file, 0, desired_size); + if (err) { fprintf(stderr, - "InnoDB: Error: preallocating data for" - " file %s failed at\n" - "InnoDB: offset 0 size %lld %lld. Operating system" - " error number %d.\n" - "InnoDB: Check that the disk is not full" - " or a disk quota exceeded.\n" - "InnoDB: Some operating system error numbers" - " are described at\n" - "InnoDB: " - REFMAN "operating-system-error-codes.html\n", - name, (long long)size_high, (long long)size, errno); - - return (FALSE); + "InnoDB: Error: preallocating %lld bytes for" + " file %s failed with error %d.\n", + desired_size, name, err); } - return (TRUE); + return(!err); } #endif +#ifdef _WIN32 + /* Write 1 page of zeroes at the desired end. */ + buf_size = UNIV_PAGE_SIZE; + current_size = desired_size - buf_size; +#else /* Write up to 1 megabyte at a time. */ buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE)) * UNIV_PAGE_SIZE; - buf2 = ut_malloc(buf_size + UNIV_PAGE_SIZE); + current_size = 0; +#endif + buf2 = calloc(1, buf_size + UNIV_PAGE_SIZE); + + if (!buf2) { + fprintf(stderr, "InnoDB: Cannot allocate " ULINTPF + " bytes to extend file\n", + buf_size + UNIV_PAGE_SIZE); + return(FALSE); + } /* Align the buffer for possible raw i/o */ buf = ut_align(buf2, UNIV_PAGE_SIZE); - /* Write buffer full of zeros */ - memset(buf, 0, buf_size); - - if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) { - - fprintf(stderr, "InnoDB: Progress in MB:"); - } - - while (current_size < desired_size) { + do { ulint n_bytes; if (desired_size - current_size < (ib_int64_t) buf_size) { @@ -2082,37 +2079,14 @@ os_file_set_size( (ulint)(current_size >> 32), n_bytes); if (!ret) { - ut_free(buf2); - goto error_handling; - } - - /* Print about progress for each 100 MB written */ - if ((ib_int64_t) (current_size + n_bytes) / (ib_int64_t)(100 * 1024 * 1024) - != current_size / (ib_int64_t)(100 * 1024 * 1024)) { - - fprintf(stderr, " %lu00", - (ulong) ((current_size + n_bytes) - / (ib_int64_t)(100 * 1024 * 1024))); + break; } current_size += n_bytes; - } - - if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) { - - fprintf(stderr, "\n"); - } + } while (current_size < desired_size); - ut_free(buf2); - - ret = os_file_flush(file); - - if (ret) { - return(TRUE); - } - -error_handling: - return(FALSE); + free(buf2); + return(ret && os_file_flush(file)); } /***********************************************************************//** diff --git a/storage/xtradb/fil/fil0fil.c b/storage/xtradb/fil/fil0fil.c index 86e00dc22e4..3f9103c521f 100644 --- a/storage/xtradb/fil/fil0fil.c +++ b/storage/xtradb/fil/fil0fil.c @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -4934,9 +4935,9 @@ fil_extend_space_to_desired_size( ulint page_size; ibool success = TRUE; - /* file_extend_mutex is for http://bugs.mysql.com/56433 */ - /* to protect from the other fil_extend_space_to_desired_size() */ - /* during temprary releasing &fil_system->mutex */ + /* fil_system->file_extend_mutex is for http://bugs.mysql.com/56433 + to prevent concurrent fil_extend_space_to_desired_size() + while fil_system->mutex is temporarily released */ mutex_enter(&fil_system->file_extend_mutex); fil_mutex_enter_and_prepare_for_io(space_id); @@ -4966,6 +4967,8 @@ fil_extend_space_to_desired_size( start_page_no = space->size; file_start_page_no = space->size - node->size; + mutex_exit(&fil_system->mutex); + #ifdef HAVE_POSIX_FALLOCATE if (srv_use_posix_fallocate) { @@ -4973,19 +4976,19 @@ fil_extend_space_to_desired_size( = file_start_page_no * page_size; ib_int64_t end_offset = (size_after_extend - file_start_page_no) * page_size; + int err = posix_fallocate( + node->handle, start_offset, end_offset); - mutex_exit(&fil_system->mutex); - success = (posix_fallocate(node->handle, start_offset, - end_offset) == 0); - if (!success) - { + success = !err; + + if (!success) { fprintf(stderr, - "InnoDB: Error: preallocating file space for " - "file \'%s\' failed. Current size %lld, " - "len %lld, desired size %lld\n", node->name, - start_offset, end_offset, - start_offset + end_offset); + "InnoDB: Error: extending file %s" + " from %lld to %lld bytes" + " failed with error %d\n", + node->name, start_offset, end_offset, err); } + mutex_enter(&fil_system->mutex); if (success) { @@ -4999,14 +5002,25 @@ fil_extend_space_to_desired_size( } #endif +#ifdef _WIN32 + /* Write 1 page of zeroes at the desired end. */ + start_page_no = size_after_extend - 1; + buf_size = page_size; +#else /* Extend at most 64 pages at a time */ buf_size = ut_min(64, size_after_extend - start_page_no) * page_size; - buf2 = mem_alloc(buf_size + page_size); +#endif + buf2 = calloc(1, buf_size + page_size); + if (!buf2) { + fprintf(stderr, "InnoDB: Cannot allocate " ULINTPF + " bytes to extend file\n", + buf_size + page_size); + mutex_exit(&fil_system->file_extend_mutex); + return(FALSE); + } buf = ut_align(buf2, page_size); - memset(buf, 0, buf_size); - - while (start_page_no < size_after_extend) { + for (;;) { ulint n_pages = ut_min(buf_size / page_size, size_after_extend - start_page_no); @@ -5016,7 +5030,6 @@ fil_extend_space_to_desired_size( % (4096 * ((1024 * 1024) / page_size))) * page_size; - mutex_exit(&fil_system->mutex); #ifdef UNIV_HOTBACKUP success = os_file_write(node->name, node->handle, buf, offset_low, offset_high, @@ -5028,36 +5041,37 @@ fil_extend_space_to_desired_size( page_size * n_pages, NULL, NULL, space_id, NULL); #endif - mutex_enter(&fil_system->mutex); - if (success) { - node->size += n_pages; - space->size += n_pages; + /* Let us measure the size of the file to determine + how much we were able to extend it */ - os_has_said_disk_full = FALSE; - } else { - /* Let us measure the size of the file to determine - how much we were able to extend it */ + n_pages = (ulint) (os_file_get_size_as_iblonglong(node->handle) + / page_size); - n_pages = ((ulint) - (os_file_get_size_as_iblonglong( - node->handle) - / page_size)) - node->size; + mutex_enter(&fil_system->mutex); + ut_a(n_pages >= node->size); - node->size += n_pages; - space->size += n_pages; + start_page_no += n_pages - node->size; + space->size += n_pages - node->size; + node->size = n_pages; + if (success) { + os_has_said_disk_full = FALSE; + } + + if (!success || start_page_no >= size_after_extend) { break; } - start_page_no += n_pages; + mutex_exit(&fil_system->mutex); } - mem_free(buf2); - + free(buf2); fil_node_complete_io(node, fil_system, OS_FILE_WRITE); +#ifdef HAVE_POSIX_FALLOCATE complete_io: +#endif /* HAVE_POSIX_FALLOCATE */ *actual_size = space->size; diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c index cca5ffa4772..201e4487ada 100644 --- a/storage/xtradb/os/os0file.c +++ b/storage/xtradb/os/os0file.c @@ -2,6 +2,7 @@ Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. +Copyright (c) 2011, 2017, MariaDB Corporation. All Rights Reserved. Portions of this file contain modifications contributed and copyrighted by Percona Inc.. Those modifications are @@ -2184,42 +2185,44 @@ os_file_set_size( ut_a(size == (size & 0xFFFFFFFF)); - current_size = 0; desired_size = (ib_int64_t)size + (((ib_int64_t)size_high) << 32); #ifdef HAVE_POSIX_FALLOCATE if (srv_use_posix_fallocate) { - - if (posix_fallocate(file, current_size, desired_size) == -1) { - - fprintf(stderr, "InnoDB: Error: preallocating file " - "space for file \'%s\' failed. Current size " - "%lld, desired size %lld\n", - name, current_size, desired_size); - os_file_handle_error_no_exit(name, "posix_fallocate"); - return(FALSE); + int err = posix_fallocate(file, 0, desired_size); + if (err) { + fprintf(stderr, + "InnoDB: Error: preallocating %lld bytes for" + " file %s failed with error %d.\n", + desired_size, name, err); } - return(TRUE); + return(!err); } #endif +#ifdef _WIN32 + /* Write 1 page of zeroes at the desired end. */ + buf_size = UNIV_PAGE_SIZE; + current_size = desired_size - buf_size; +#else /* Write up to 1 megabyte at a time. */ buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE)) * UNIV_PAGE_SIZE; - buf2 = ut_malloc(buf_size + UNIV_PAGE_SIZE); + current_size = 0; +#endif + buf2 = calloc(1, buf_size + UNIV_PAGE_SIZE); + + if (!buf2) { + fprintf(stderr, "InnoDB: Cannot allocate " ULINTPF + " bytes to extend file\n", + buf_size + UNIV_PAGE_SIZE); + return(FALSE); + } /* Align the buffer for possible raw i/o */ buf = ut_align(buf2, UNIV_PAGE_SIZE); - /* Write buffer full of zeros */ - memset(buf, 0, buf_size); - - if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) { - - fprintf(stderr, "InnoDB: Progress in MB:"); - } - - while (current_size < desired_size) { + do { ulint n_bytes; if (desired_size - current_size < (ib_int64_t) buf_size) { @@ -2233,37 +2236,14 @@ os_file_set_size( (ulint)(current_size >> 32), n_bytes); if (!ret) { - ut_free(buf2); - goto error_handling; - } - - /* Print about progress for each 100 MB written */ - if ((ib_int64_t) (current_size + n_bytes) / (ib_int64_t)(100 * 1024 * 1024) - != current_size / (ib_int64_t)(100 * 1024 * 1024)) { - - fprintf(stderr, " %lu00", - (ulong) ((current_size + n_bytes) - / (ib_int64_t)(100 * 1024 * 1024))); + break; } current_size += n_bytes; - } - - if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) { - - fprintf(stderr, "\n"); - } - - ut_free(buf2); - - ret = os_file_flush(file, TRUE); - - if (ret) { - return(TRUE); - } + } while (current_size < desired_size); -error_handling: - return(FALSE); + free(buf2); + return(ret && os_file_flush(file, TRUE)); } /***********************************************************************//** |