summaryrefslogtreecommitdiff
path: root/numpy/core
diff options
context:
space:
mode:
authorJulian Taylor <jtaylor.debian@googlemail.com>2015-11-16 23:09:57 +0100
committerCharles Harris <charlesr.harris@gmail.com>2015-12-17 13:18:41 -0700
commit1350b46714ac8f6f04646ae637b84ef23c2ac917 (patch)
treee5d5bfd6ac6b6d84978b8a6f67fb1b5788d44382 /numpy/core
parent316c19836aa86e3fe26a041877aabbce432ec554 (diff)
downloadnumpy-1350b46714ac8f6f04646ae637b84ef23c2ac917.tar.gz
ENH: use linux fallocate to reserve diskspace in array.tofile
fallocate allows the filesystem to make smarter decisions about space allocation and gives a fast failure path for insufficient space. This is very important for filesystems that suffer a lot from fragmentation like btrfs. Restricted to linux only as that is the only system I know the behavior of. Other systems might also have this system call but we don't want to accidentally trigger explicit zeroing behavior as e.g. posix_fallocate would when there is no support for a real fallocate.
Diffstat (limited to 'numpy/core')
-rw-r--r--numpy/core/setup_common.py2
-rw-r--r--numpy/core/src/multiarray/convert.c41
-rw-r--r--numpy/core/tests/test_multiarray.py13
3 files changed, 55 insertions, 1 deletions
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index d93e475e3..e0cb3f630 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -104,7 +104,7 @@ MANDATORY_FUNCS = ["sin", "cos", "tan", "sinh", "cosh", "tanh", "fabs",
OPTIONAL_STDFUNCS = ["expm1", "log1p", "acosh", "asinh", "atanh",
"rint", "trunc", "exp2", "log2", "hypot", "atan2", "pow",
"copysign", "nextafter", "ftello", "fseeko",
- "strtoll", "strtoull", "cbrt", "strtold_l",]
+ "strtoll", "strtoull", "cbrt", "strtold_l", "fallocate"]
OPTIONAL_HEADERS = [
diff --git a/numpy/core/src/multiarray/convert.c b/numpy/core/src/multiarray/convert.c
index 7cb27581a..805adec8f 100644
--- a/numpy/core/src/multiarray/convert.c
+++ b/numpy/core/src/multiarray/convert.c
@@ -2,6 +2,8 @@
#include <Python.h>
#include "structmember.h"
+#include <npy_config.h>
+
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define _MULTIARRAYMODULE
#include "numpy/arrayobject.h"
@@ -19,6 +21,42 @@
#include "convert.h"
+int fallocate(int fd, int mode, off_t offset, off_t len);
+
+/*
+ * allocate nbytes of diskspace for file fp
+ * this allows the filesystem to make smarter allocation decisions and gives a
+ * fast exit on not enough free space
+ * returns -1 and raises exception on no space, ignores all other errors
+ */
+static int npy_fallocate(npy_intp nbytes, FILE * fp)
+{
+ /*
+ * unknown behavior on non-linux so don't try it
+ * we don't want explicit zeroing to happen
+ */
+#if defined(HAVE_FALLOCATE) && defined(__linux__)
+ int r;
+ /* small files not worth the system call */
+ if (nbytes < 16 * 1024 * 1024) {
+ return 0;
+ }
+ /* btrfs can take a while to allocate making release worthwhile */
+ NPY_BEGIN_ALLOW_THREADS;
+ r = fallocate(fileno(fp), 0, npy_ftell(fp), nbytes);
+ NPY_END_ALLOW_THREADS;
+ /*
+ * early exit on no space, other errors will also get found during fwrite
+ */
+ if (r == -1 && errno == ENOSPC) {
+ PyErr_Format(PyExc_IOError, "Not enough free space to write "
+ "%"NPY_INTP_FMT" bytes", nbytes);
+ return -1;
+ }
+#endif
+ return 0;
+}
+
/*
* Converts a subarray of 'self' into lists, with starting data pointer
* 'dataptr' and from dimension 'startdim' to the last dimension of 'self'.
@@ -92,6 +130,9 @@ PyArray_ToFile(PyArrayObject *self, FILE *fp, char *sep, char *format)
"cannot write object arrays to a file in binary mode");
return -1;
}
+ if (npy_fallocate(PyArray_NBYTES(self), fp) != 0) {
+ return -1;
+ }
if (PyArray_ISCONTIGUOUS(self)) {
size = PyArray_SIZE(self);
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 593607954..d03c5f547 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -9,6 +9,7 @@ import operator
import io
import itertools
import ctypes
+import os
if sys.version_info[0] >= 3:
import builtins
else:
@@ -3377,6 +3378,18 @@ class TestIO(object):
y = np.fromfile(self.filename, dtype=self.dtype)
assert_array_equal(y, self.x.flat)
+ def test_largish_file(self):
+ # check the fallocate path on files > 16MB
+ d = np.zeros(4 * 1024 ** 2)
+ d.tofile(self.filename)
+ assert_equal(os.path.getsize(self.filename), d.nbytes)
+ assert_array_equal(d, np.fromfile(self.filename));
+ # check offset
+ with open(self.filename, "r+b") as f:
+ f.seek(d.nbytes)
+ d.tofile(f)
+ assert_equal(os.path.getsize(self.filename), d.nbytes * 2)
+
def test_file_position_after_fromfile(self):
# gh-4118
sizes = [io.DEFAULT_BUFFER_SIZE//8,