summaryrefslogtreecommitdiff
path: root/numpy/lib/npyio.py
diff options
context:
space:
mode:
authorAntony Lee <anntzer.lee@gmail.com>2021-08-17 20:56:19 +0200
committerAntony Lee <anntzer.lee@gmail.com>2021-08-17 21:00:07 +0200
commit4e5b2e1f0bdcce5c0d030f0e539c19dda518d765 (patch)
treecf8f5c14313f078c724d3704c71ce5dc3c800c36 /numpy/lib/npyio.py
parentdb1a3432cb347808f297af51ca16a016fef934b3 (diff)
downloadnumpy-4e5b2e1f0bdcce5c0d030f0e539c19dda518d765.tar.gz
MAINT: In loadtxt, inline read_data.
No speed difference; the point is to avoid an unnecessary inner generator (which was previously defined quite far away from its point of use).
Diffstat (limited to 'numpy/lib/npyio.py')
-rw-r--r--numpy/lib/npyio.py51
1 files changed, 18 insertions, 33 deletions
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 7a594f25b..00b5918f8 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -977,35 +977,6 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
line = line.strip('\r\n')
return line.split(delimiter) if line else []
- def read_data(lineno_words_iter, chunk_size):
- """
- Parse each line, including the first.
-
- Parameters
- ----------
- lineno_words_iter : Iterator[tuple[int, list[str]]]
- Iterator returning line numbers and non-empty lines already split
- into words.
- chunk_size : int
- At most `chunk_size` lines are read at a time, with iteration
- until all lines are read.
- """
- X = []
- for lineno, words in lineno_words_iter:
- if usecols:
- words = [words[j] for j in usecols]
- if len(words) != ncols:
- raise ValueError(f"Wrong number of columns at line {lineno}")
- # Convert each value according to its column, then pack it
- # according to the dtype's nesting
- items = packer(convert_row(words))
- X.append(items)
- if len(X) > chunk_size:
- yield X
- X = []
- if X:
- yield X
-
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Main body of loadtxt.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@@ -1171,15 +1142,29 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
# probably not relevant compared to the cost of actually reading and
# converting the data
X = None
- for x in read_data(lineno_words_iter, _loadtxt_chunksize):
+ while True:
+ chunk = []
+ for lineno, words in itertools.islice(
+ lineno_words_iter, _loadtxt_chunksize):
+ if usecols:
+ words = [words[j] for j in usecols]
+ if len(words) != ncols:
+ raise ValueError(
+ f"Wrong number of columns at line {lineno}")
+ # Convert each value according to its column, then pack it
+ # according to the dtype's nesting, and store it.
+ chunk.append(packer(convert_row(words)))
+ if not chunk: # The islice is empty, i.e. we're done.
+ break
+
if X is None:
- X = np.array(x, dtype)
+ X = np.array(chunk, dtype)
else:
nshape = list(X.shape)
pos = nshape[0]
- nshape[0] += len(x)
+ nshape[0] += len(chunk)
X.resize(nshape, refcheck=False)
- X[pos:, ...] = x
+ X[pos:, ...] = chunk
finally:
if fown:
fh.close()