diff options
author | Antony Lee <anntzer.lee@gmail.com> | 2021-08-17 20:56:19 +0200 |
---|---|---|
committer | Antony Lee <anntzer.lee@gmail.com> | 2021-08-17 21:00:07 +0200 |
commit | 4e5b2e1f0bdcce5c0d030f0e539c19dda518d765 (patch) | |
tree | cf8f5c14313f078c724d3704c71ce5dc3c800c36 /numpy/lib/npyio.py | |
parent | db1a3432cb347808f297af51ca16a016fef934b3 (diff) | |
download | numpy-4e5b2e1f0bdcce5c0d030f0e539c19dda518d765.tar.gz |
MAINT: In loadtxt, inline read_data.
No speed difference; the point is to avoid an unnecessary inner
generator (which was previously defined quite far away from its point of
use).
Diffstat (limited to 'numpy/lib/npyio.py')
-rw-r--r-- | numpy/lib/npyio.py | 51 |
1 files changed, 18 insertions, 33 deletions
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 7a594f25b..00b5918f8 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -977,35 +977,6 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, line = line.strip('\r\n') return line.split(delimiter) if line else [] - def read_data(lineno_words_iter, chunk_size): - """ - Parse each line, including the first. - - Parameters - ---------- - lineno_words_iter : Iterator[tuple[int, list[str]]] - Iterator returning line numbers and non-empty lines already split - into words. - chunk_size : int - At most `chunk_size` lines are read at a time, with iteration - until all lines are read. - """ - X = [] - for lineno, words in lineno_words_iter: - if usecols: - words = [words[j] for j in usecols] - if len(words) != ncols: - raise ValueError(f"Wrong number of columns at line {lineno}") - # Convert each value according to its column, then pack it - # according to the dtype's nesting - items = packer(convert_row(words)) - X.append(items) - if len(X) > chunk_size: - yield X - X = [] - if X: - yield X - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Main body of loadtxt. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -1171,15 +1142,29 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, # probably not relevant compared to the cost of actually reading and # converting the data X = None - for x in read_data(lineno_words_iter, _loadtxt_chunksize): + while True: + chunk = [] + for lineno, words in itertools.islice( + lineno_words_iter, _loadtxt_chunksize): + if usecols: + words = [words[j] for j in usecols] + if len(words) != ncols: + raise ValueError( + f"Wrong number of columns at line {lineno}") + # Convert each value according to its column, then pack it + # according to the dtype's nesting, and store it. + chunk.append(packer(convert_row(words))) + if not chunk: # The islice is empty, i.e. we're done. + break + if X is None: - X = np.array(x, dtype) + X = np.array(chunk, dtype) else: nshape = list(X.shape) pos = nshape[0] - nshape[0] += len(x) + nshape[0] += len(chunk) X.resize(nshape, refcheck=False) - X[pos:, ...] = x + X[pos:, ...] = chunk finally: if fown: fh.close() |