summaryrefslogtreecommitdiff
path: root/storage/xtradb
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2017-02-22 17:17:00 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2017-02-22 22:29:56 +0200
commitec4cf111c0d8599c63c486dd96b180d5e7cff8ea (patch)
tree7e23c566d0b4579b0df6d766f6ad5994b481467b /storage/xtradb
parente1e920bf63550304eccee057edc568479ecf79ac (diff)
downloadmariadb-git-ec4cf111c0d8599c63c486dd96b180d5e7cff8ea.tar.gz
MDEV-11520 after-merge fix for 10.1: Use sparse files.
If page_compression (introduced in MariaDB Server 10.1) is enabled, the logical action is to not preallocate space to the data files, but to only logically extend the files with zeroes. fil_create_new_single_table_tablespace(): Create smaller files for ROW_FORMAT=COMPRESSED tables, but adhere to the minimum file size of 4*innodb_page_size. fil_space_extend_must_retry(), os_file_set_size(): On Windows, use SetFileInformationByHandle() and FILE_END_OF_FILE_INFO, which depends on bumping _WIN32_WINNT to 0x0600. FIXME: The files are not yet set up as sparse, so this will currently end up physically extending (preallocating) the files, wasting storage for unused pages. os_file_set_size(): Add the parameter "bool sparse=false" to declare that the file is to be extended logically, instead of being preallocated. The only caller with sparse=true is fil_create_new_single_table_tablespace(). (The system tablespace cannot be created with page_compression.) fil_space_extend_must_retry(), os_file_set_size(): Outside Windows, use ftruncate() to extend files that are supposed to be sparse. On systems where ftruncate() is limited to files less than 4GiB (if there are any), fil_space_extend_must_retry() retains the old logic of physically extending the file.
Diffstat (limited to 'storage/xtradb')
-rw-r--r--storage/xtradb/fil/fil0fil.cc118
-rw-r--r--storage/xtradb/include/os0file.h20
-rw-r--r--storage/xtradb/os/os0file.cc71
-rw-r--r--storage/xtradb/srv/srv0start.cc18
4 files changed, 154 insertions, 73 deletions
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
index 7976f65df31..848406d9061 100644
--- a/storage/xtradb/fil/fil0fil.cc
+++ b/storage/xtradb/fil/fil0fil.cc
@@ -694,12 +694,10 @@ fil_node_open_file(
return(false);
}
- if (!fsp_flags_is_compressed(flags)) {
- node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
+ if (ulint zip_size = fsp_flags_get_zip_size(flags)) {
+ node->size = ulint(size_bytes / zip_size);
} else {
- node->size = (ulint)
- (size_bytes
- / fsp_flags_get_zip_size(flags));
+ node->size = ulint(size_bytes / UNIV_PAGE_SIZE);
}
#ifdef UNIV_HOTBACKUP
@@ -1044,20 +1042,57 @@ fil_space_extend_must_retry(
}
ulint page_size = fsp_flags_get_zip_size(space->flags);
-
if (!page_size) {
page_size = UNIV_PAGE_SIZE;
}
-#ifdef HAVE_POSIX_FALLOCATE
+#ifdef _WIN32
+ const ulint io_completion_type = OS_FILE_READ;
+ /* Logically or physically extend the file with zero bytes,
+ depending on whether it is sparse. */
+
+ /* FIXME: Call DeviceIoControl(node->handle, FSCTL_SET_SPARSE, ...)
+ when opening a file when FSP_FLAGS_HAS_PAGE_COMPRESSION(). */
+ {
+ FILE_END_OF_FILE_INFO feof;
+ /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes.
+ fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes.
+ Do not shrink short ROW_FORMAT=COMPRESSED files. */
+ feof.EndOfFile.QuadPart = std::max(
+ os_offset_t(size - file_start_page_no) * page_size,
+ os_offset_t(FIL_IBD_FILE_INITIAL_SIZE
+ * UNIV_PAGE_SIZE));
+ *success = SetFileInformationByHandle(node->handle,
+ FileEndOfFileInfo,
+ &feof, sizeof feof);
+ if (!*success) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "extending file %s"
+ " from " INT64PF
+ " to " INT64PF " bytes failed with %u",
+ node->name,
+ os_offset_t(node->size) * page_size,
+ feof.EndOfFile.QuadPart, GetLastError());
+ } else {
+ start_page_no = size;
+ }
+ }
+#else
+ /* We will logically extend the file with ftruncate() if
+ page_compression is enabled, because the file is expected to
+ be sparse in that case. Make sure that ftruncate() can deal
+ with large files. */
+ const bool is_sparse = sizeof(off_t) >= 8
+ && FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags);
+
+# ifdef HAVE_POSIX_FALLOCATE
/* We must complete the I/O request after invoking
posix_fallocate() to avoid an assertion failure at shutdown.
Because no actual writes were dispatched, a read operation
will suffice. */
const ulint io_completion_type = srv_use_posix_fallocate
- ? OS_FILE_READ : OS_FILE_WRITE;
+ || is_sparse ? OS_FILE_READ : OS_FILE_WRITE;
- if (srv_use_posix_fallocate) {
+ if (srv_use_posix_fallocate && !is_sparse) {
const os_offset_t start_offset
= os_offset_t(start_page_no - file_start_page_no)
* page_size;
@@ -1083,19 +1118,33 @@ fil_space_extend_must_retry(
start_page_no = size;
}
} else
-#else
- const ulint io_completion_type = OS_FILE_WRITE;
-#endif
- {
-#ifdef _WIN32
- /* Write 1 page of zeroes at the desired end. */
- ulint buf_size = page_size;
- start_page_no = size - 1;
-#else
+# else
+ const ulint io_completion_type = is_sparse
+ ? OS_FILE_READ : OS_FILE_WRITE;
+# endif
+ if (is_sparse) {
+ /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes.
+ fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes.
+ Do not shrink short ROW_FORMAT=COMPRESSED files. */
+ off_t s = std::max(off_t(size - file_start_page_no)
+ * off_t(page_size),
+ off_t(FIL_IBD_FILE_INITIAL_SIZE
+ * UNIV_PAGE_SIZE));
+ *success = !ftruncate(node->handle, s);
+ if (!*success) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "ftruncate of file %s"
+ " from " INT64PF " to " INT64PF " bytes"
+ " failed with error %d",
+ node->name,
+ os_offset_t(start_page_no - file_start_page_no)
+ * page_size, os_offset_t(s), errno);
+ } else {
+ start_page_no = size;
+ }
+ } else {
/* Extend at most 64 pages at a time */
ulint buf_size = ut_min(64, size - start_page_no)
* page_size;
-#endif
byte* buf2 = static_cast<byte*>(
calloc(1, buf_size + page_size));
*success = buf2 != NULL;
@@ -1141,7 +1190,7 @@ fil_space_extend_must_retry(
free(buf2);
}
-
+#endif
mutex_enter(&fil_system->mutex);
ut_a(node->being_extended);
@@ -3840,7 +3889,23 @@ fil_create_new_single_table_tablespace(
goto error_exit_3;
}
- ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE);
+ {
+ /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes.
+ fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes.
+ Do not create too short ROW_FORMAT=COMPRESSED files. */
+ const ulint zip_size = fsp_flags_get_zip_size(flags);
+ const ulint page_size = zip_size ? zip_size : UNIV_PAGE_SIZE;
+ const os_offset_t fsize = std::max(
+ os_offset_t(size) * page_size,
+ os_offset_t(FIL_IBD_FILE_INITIAL_SIZE
+ * UNIV_PAGE_SIZE));
+ /* ROW_FORMAT=COMPRESSED files never use page_compression
+ (are never sparse). */
+ ut_ad(!zip_size || !FSP_FLAGS_HAS_PAGE_COMPRESSION(flags));
+
+ ret = os_file_set_size(path, file, fsize,
+ FSP_FLAGS_HAS_PAGE_COMPRESSION(flags));
+ }
if (!ret) {
err = DB_OUT_OF_FILE_SPACE;
@@ -3868,14 +3933,8 @@ fil_create_new_single_table_tablespace(
fsp_header_init_fields(page, space_id, flags);
mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
- if (!(fsp_flags_is_compressed(flags))) {
- buf_flush_init_for_writing(page, NULL, 0);
- ret = os_file_write(path, file, page, 0, UNIV_PAGE_SIZE);
- } else {
+ if (const ulint zip_size = fsp_flags_get_zip_size(flags)) {
page_zip_des_t page_zip;
- ulint zip_size;
-
- zip_size = fsp_flags_get_zip_size(flags);
page_zip_set_size(&page_zip, zip_size);
page_zip.data = page + UNIV_PAGE_SIZE;
@@ -3886,6 +3945,9 @@ fil_create_new_single_table_tablespace(
page_zip.n_blobs = 0;
buf_flush_init_for_writing(page, &page_zip, 0);
ret = os_file_write(path, file, page_zip.data, 0, zip_size);
+ } else {
+ buf_flush_init_for_writing(page, NULL, 0);
+ ret = os_file_write(path, file, page, 0, UNIV_PAGE_SIZE);
}
ut_free(buf2);
diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h
index 933fc971926..f590c5f16ce 100644
--- a/storage/xtradb/include/os0file.h
+++ b/storage/xtradb/include/os0file.h
@@ -904,17 +904,19 @@ os_file_get_size(
/*=============*/
os_file_t file) /*!< in: handle to a file */
MY_ATTRIBUTE((warn_unused_result));
-/***********************************************************************//**
-Write the specified number of zeros to a newly created file.
-@return TRUE if success */
+/** Set the size of a newly created file.
+@param[in] name file name
+@param[in] file file handle
+@param[in] size desired file size
+@param[in] sparse whether to create a sparse file (no preallocating)
+@return whether the operation succeeded */
UNIV_INTERN
-ibool
+bool
os_file_set_size(
-/*=============*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- os_file_t file, /*!< in: handle to a file */
- os_offset_t size) /*!< in: file size */
+ const char* name,
+ os_file_t file,
+ os_offset_t size,
+ bool is_sparse = false)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/***********************************************************************//**
Truncates a file at its current position.
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index e96ec77af39..d9445f6959a 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -355,7 +355,7 @@ UNIV_INTERN ulint os_n_pending_writes = 0;
UNIV_INTERN ulint os_n_pending_reads = 0;
/** After first fallocate failure we will disable os_file_trim */
-UNIV_INTERN ibool os_fallocate_failed = FALSE;
+static ibool os_fallocate_failed;
/**********************************************************************//**
Directly manipulate the allocated disk space by deallocating for the file referred to
@@ -364,7 +364,7 @@ Within the specified range, partial file system blocks are zeroed, and whole
file system blocks are removed from the file. After a successful call,
subsequent reads from this range will return zeroes.
@return true if success, false if error */
-UNIV_INTERN
+static
ibool
os_file_trim(
/*=========*/
@@ -2541,24 +2541,44 @@ os_file_get_size(
#endif /* __WIN__ */
}
-/***********************************************************************//**
-Write the specified number of zeros to a newly created file.
-@return TRUE if success */
+/** Set the size of a newly created file.
+@param[in] name file name
+@param[in] file file handle
+@param[in] size desired file size
+@param[in] sparse whether to create a sparse file (no preallocating)
+@return whether the operation succeeded */
UNIV_INTERN
-ibool
+bool
os_file_set_size(
-/*=============*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- os_file_t file, /*!< in: handle to a file */
- os_offset_t size) /*!< in: file size */
+ const char* name,
+ os_file_t file,
+ os_offset_t size,
+ bool is_sparse)
{
- ibool ret;
- byte* buf;
- byte* buf2;
- ulint buf_size;
+#ifdef _WIN32
+ FILE_END_OF_FILE_INFO feof;
+ feof.EndOfFile.QuadPart = size;
+ bool success = SetFileInformationByHandle(file,
+ FileEndOfFileInfo,
+ &feof, sizeof feof);
+ if (!success) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "os_file_set_size() of file %s"
+ " to " INT64PF " bytes failed with %u",
+ name, size, GetLastError());
+ }
+ return(success);
+#else
+ if (is_sparse) {
+ bool success = !ftruncate(file, size);
+ if (!success) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "ftruncate of file %s"
+ " to " INT64PF " bytes failed with error %d",
+ name, size, errno);
+ }
+ return(success);
+ }
-#ifdef HAVE_POSIX_FALLOCATE
+# ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
int err = posix_fallocate(file, 0, size);
if (err) {
@@ -2569,29 +2589,25 @@ os_file_set_size(
}
return(!err);
}
-#endif
+# endif
-#ifdef _WIN32
- /* Write 1 page of zeroes at the desired end. */
- buf_size = UNIV_PAGE_SIZE;
- os_offset_t current_size = size - buf_size;
-#else
/* Write up to 1 megabyte at a time. */
- buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE))
+ ulint buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE))
* UNIV_PAGE_SIZE;
os_offset_t current_size = 0;
-#endif
- buf2 = static_cast<byte*>(calloc(1, buf_size + UNIV_PAGE_SIZE));
+
+ byte* buf2 = static_cast<byte*>(calloc(1, buf_size + UNIV_PAGE_SIZE));
if (!buf2) {
ib_logf(IB_LOG_LEVEL_ERROR,
"Cannot allocate " ULINTPF " bytes to extend file\n",
buf_size + UNIV_PAGE_SIZE);
- return(FALSE);
+ return(false);
}
/* Align the buffer for possible raw i/o */
- buf = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
+ byte* buf = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
+ bool ret;
do {
ulint n_bytes;
@@ -2614,6 +2630,7 @@ os_file_set_size(
free(buf2);
return(ret && os_file_flush(file));
+#endif
}
/***********************************************************************//**
diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc
index a04f8dc732c..679913959c9 100644
--- a/storage/xtradb/srv/srv0start.cc
+++ b/storage/xtradb/srv/srv0start.cc
@@ -1162,14 +1162,13 @@ check_first_page:
(ulong) (srv_data_file_sizes[i]
>> (20 - UNIV_PAGE_SIZE_SHIFT)));
- ib_logf(IB_LOG_LEVEL_INFO,
- "Database physically writes the"
- " file full: wait...");
-
ret = os_file_set_size(
name, files[i],
(os_offset_t) srv_data_file_sizes[i]
- << UNIV_PAGE_SIZE_SHIFT);
+ << UNIV_PAGE_SIZE_SHIFT
+ /* TODO: enable page_compression on the
+ system tablespace and add
+ , FSP_FLAGS_HAS_PAGE_COMPRESSION(flags)*/);
if (!ret) {
ib_logf(IB_LOG_LEVEL_ERROR,
@@ -1266,10 +1265,11 @@ srv_undo_tablespace_create(
"Setting file %s size to %lu MB",
name, size >> (20 - UNIV_PAGE_SIZE_SHIFT));
- ib_logf(IB_LOG_LEVEL_INFO,
- "Database physically writes the file full: wait...");
-
- ret = os_file_set_size(name, fh, size << UNIV_PAGE_SIZE_SHIFT);
+ ret = os_file_set_size(name, fh, size << UNIV_PAGE_SIZE_SHIFT
+ /* TODO: enable page_compression on the
+ system tablespace and add
+ FSP_FLAGS_HAS_PAGE_COMPRESSION(flags)
+ */);
if (!ret) {
ib_logf(IB_LOG_LEVEL_INFO,