diff options
author | Julian Taylor <jtaylor.debian@googlemail.com> | 2015-11-16 23:09:57 +0100 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2015-12-17 13:18:41 -0700 |
commit | 1350b46714ac8f6f04646ae637b84ef23c2ac917 (patch) | |
tree | e5d5bfd6ac6b6d84978b8a6f67fb1b5788d44382 /numpy/core | |
parent | 316c19836aa86e3fe26a041877aabbce432ec554 (diff) | |
download | numpy-1350b46714ac8f6f04646ae637b84ef23c2ac917.tar.gz |
ENH: use linux fallocate to reserve diskspace in array.tofile
fallocate allows the filesystem to make smarter decisions about space
allocation and gives a fast failure path for insufficient space.
This is very important for filesystems that suffer a lot from
fragmentation like btrfs.
Restricted to linux only as that is the only system I know the behavior
of. Other systems might also have this system call but we don't want to
accidentally trigger explicit zeroing behavior as e.g. posix_fallocate
would when there is no support for a real fallocate.
Diffstat (limited to 'numpy/core')
-rw-r--r-- | numpy/core/setup_common.py | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/convert.c | 41 | ||||
-rw-r--r-- | numpy/core/tests/test_multiarray.py | 13 |
3 files changed, 55 insertions, 1 deletions
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index d93e475e3..e0cb3f630 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -104,7 +104,7 @@ MANDATORY_FUNCS = ["sin", "cos", "tan", "sinh", "cosh", "tanh", "fabs", OPTIONAL_STDFUNCS = ["expm1", "log1p", "acosh", "asinh", "atanh", "rint", "trunc", "exp2", "log2", "hypot", "atan2", "pow", "copysign", "nextafter", "ftello", "fseeko", - "strtoll", "strtoull", "cbrt", "strtold_l",] + "strtoll", "strtoull", "cbrt", "strtold_l", "fallocate"] OPTIONAL_HEADERS = [ diff --git a/numpy/core/src/multiarray/convert.c b/numpy/core/src/multiarray/convert.c index 7cb27581a..805adec8f 100644 --- a/numpy/core/src/multiarray/convert.c +++ b/numpy/core/src/multiarray/convert.c @@ -2,6 +2,8 @@ #include <Python.h> #include "structmember.h" +#include <npy_config.h> + #define NPY_NO_DEPRECATED_API NPY_API_VERSION #define _MULTIARRAYMODULE #include "numpy/arrayobject.h" @@ -19,6 +21,42 @@ #include "convert.h" +int fallocate(int fd, int mode, off_t offset, off_t len); + +/* + * allocate nbytes of diskspace for file fp + * this allows the filesystem to make smarter allocation decisions and gives a + * fast exit on not enough free space + * returns -1 and raises exception on no space, ignores all other errors + */ +static int npy_fallocate(npy_intp nbytes, FILE * fp) +{ + /* + * unknown behavior on non-linux so don't try it + * we don't want explicit zeroing to happen + */ +#if defined(HAVE_FALLOCATE) && defined(__linux__) + int r; + /* small files not worth the system call */ + if (nbytes < 16 * 1024 * 1024) { + return 0; + } + /* btrfs can take a while to allocate making release worthwhile */ + NPY_BEGIN_ALLOW_THREADS; + r = fallocate(fileno(fp), 0, npy_ftell(fp), nbytes); + NPY_END_ALLOW_THREADS; + /* + * early exit on no space, other errors will also get found during fwrite + */ + if (r == -1 && errno == ENOSPC) { + PyErr_Format(PyExc_IOError, "Not enough free space to write " + "%"NPY_INTP_FMT" bytes", nbytes); + return -1; + } +#endif + return 0; +} + /* * Converts a subarray of 'self' into lists, with starting data pointer * 'dataptr' and from dimension 'startdim' to the last dimension of 'self'. @@ -92,6 +130,9 @@ PyArray_ToFile(PyArrayObject *self, FILE *fp, char *sep, char *format) "cannot write object arrays to a file in binary mode"); return -1; } + if (npy_fallocate(PyArray_NBYTES(self), fp) != 0) { + return -1; + } if (PyArray_ISCONTIGUOUS(self)) { size = PyArray_SIZE(self); diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 593607954..d03c5f547 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -9,6 +9,7 @@ import operator import io import itertools import ctypes +import os if sys.version_info[0] >= 3: import builtins else: @@ -3377,6 +3378,18 @@ class TestIO(object): y = np.fromfile(self.filename, dtype=self.dtype) assert_array_equal(y, self.x.flat) + def test_largish_file(self): + # check the fallocate path on files > 16MB + d = np.zeros(4 * 1024 ** 2) + d.tofile(self.filename) + assert_equal(os.path.getsize(self.filename), d.nbytes) + assert_array_equal(d, np.fromfile(self.filename)); + # check offset + with open(self.filename, "r+b") as f: + f.seek(d.nbytes) + d.tofile(f) + assert_equal(os.path.getsize(self.filename), d.nbytes * 2) + def test_file_position_after_fromfile(self): # gh-4118 sizes = [io.DEFAULT_BUFFER_SIZE//8, |