summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2016-03-08 18:35:19 +0200
committerSerhiy Storchaka <storchaka@gmail.com>2016-03-08 18:35:19 +0200
commit382e80c85505be5909d3d738fd936e039a6c6c3a (patch)
tree732ef046c273423effb170fa83eb63cdc7418499
parent0281a15c6d744e820d141982bc1b65563cc9cfe2 (diff)
parent9d2efe46b0b370a51cfc44bac527782ad89be94e (diff)
downloadcpython-382e80c85505be5909d3d738fd936e039a6c6c3a.tar.gz
Issue #15068: Got rid of excessive buffering in fileinput.
The bufsize parameter is now deprecated and ignored.
-rw-r--r--Doc/library/fileinput.rst7
-rw-r--r--Lib/fileinput.py166
-rw-r--r--Lib/test/test_fileinput.py79
-rw-r--r--Misc/NEWS3
4 files changed, 160 insertions, 95 deletions
diff --git a/Doc/library/fileinput.rst b/Doc/library/fileinput.rst
index ee06830ad8..6ca4008399 100644
--- a/Doc/library/fileinput.rst
+++ b/Doc/library/fileinput.rst
@@ -71,6 +71,8 @@ The following function is the primary interface of this module:
.. versionchanged:: 3.2
Can be used as a context manager.
+ .. deprecated-removed:: 3.6 3.8
+ The *bufsize* parameter.
The following functions use the global state created by :func:`fileinput.input`;
if there is no active state, :exc:`RuntimeError` is raised.
@@ -161,7 +163,10 @@ available for subclassing as well:
Can be used as a context manager.
.. deprecated:: 3.4
- The ``'rU'`` and ``'U'`` modes.
+ The ``'rU'`` and ``'U'`` modes.
+
+ .. deprecated-removed:: 3.6 3.8
+ The *bufsize* parameter.
**Optional in-place filtering:** if the keyword argument ``inplace=True`` is
diff --git a/Lib/fileinput.py b/Lib/fileinput.py
index 3543653f26..4286156991 100644
--- a/Lib/fileinput.py
+++ b/Lib/fileinput.py
@@ -64,13 +64,6 @@ deleted when the output file is closed. In-place filtering is
disabled when standard input is read. XXX The current implementation
does not work for MS-DOS 8+3 filesystems.
-Performance: this module is unfortunately one of the slower ways of
-processing large numbers of input lines. Nevertheless, a significant
-speed-up has been obtained by using readlines(bufsize) instead of
-readline(). A new keyword argument, bufsize=N, is present on the
-input() function and the FileInput() class to override the default
-buffer size.
-
XXX Possible additions:
- optional getopt argument processing
@@ -87,8 +80,6 @@ __all__ = ["input", "close", "nextfile", "filename", "lineno", "filelineno",
_state = None
-DEFAULT_BUFSIZE = 8*1024
-
def input(files=None, inplace=False, backup="", bufsize=0,
mode="r", openhook=None):
"""Return an instance of the FileInput class, which can be iterated.
@@ -208,17 +199,19 @@ class FileInput:
self._files = files
self._inplace = inplace
self._backup = backup
- self._bufsize = bufsize or DEFAULT_BUFSIZE
+ if bufsize:
+ import warnings
+ warnings.warn('bufsize is deprecated and ignored',
+ DeprecationWarning, stacklevel=2)
self._savestdout = None
self._output = None
self._filename = None
- self._lineno = 0
+ self._startlineno = 0
self._filelineno = 0
self._file = None
+ self._readline = self._start_readline
self._isstdin = False
self._backupfilename = None
- self._buffer = []
- self._bufindex = 0
# restrict mode argument to reading modes
if mode not in ('r', 'rU', 'U', 'rb'):
raise ValueError("FileInput opening mode must be one of "
@@ -254,22 +247,18 @@ class FileInput:
return self
def __next__(self):
- try:
- line = self._buffer[self._bufindex]
- except IndexError:
- pass
- else:
- self._bufindex += 1
- self._lineno += 1
+ line = self._readline()
+ if line:
self._filelineno += 1
return line
- line = self.readline()
- if not line:
+ if not self._file:
raise StopIteration
- return line
+ self.nextfile()
+ # Recursive call
+ return self.__next__()
def __getitem__(self, i):
- if i != self._lineno:
+ if i != self.lineno():
raise RuntimeError("accessing lines out of order")
try:
return self.__next__()
@@ -290,6 +279,7 @@ class FileInput:
finally:
file = self._file
self._file = None
+ self._readline = self._start_readline
try:
if file and not self._isstdin:
file.close()
@@ -301,85 +291,81 @@ class FileInput:
except OSError: pass
self._isstdin = False
- self._buffer = []
- self._bufindex = 0
def readline(self):
- try:
- line = self._buffer[self._bufindex]
- except IndexError:
- pass
+ while True:
+ line = self._readline()
+ if line:
+ self._filelineno += 1
+ return line
+ if not self._file:
+ return line
+ self.nextfile()
+ # repeat with next file
+
+ def _start_readline(self):
+ if not self._files:
+ if 'b' in self._mode:
+ return b''
+ else:
+ return ''
+ self._filename = self._files[0]
+ self._files = self._files[1:]
+ self._startlineno = self.lineno()
+ self._filelineno = 0
+ self._file = None
+ self._isstdin = False
+ self._backupfilename = 0
+ if self._filename == '-':
+ self._filename = '<stdin>'
+ if 'b' in self._mode:
+ self._file = getattr(sys.stdin, 'buffer', sys.stdin)
+ else:
+ self._file = sys.stdin
+ self._isstdin = True
else:
- self._bufindex += 1
- self._lineno += 1
- self._filelineno += 1
- return line
- if not self._file:
- if not self._files:
- if 'b' in self._mode:
- return b''
+ if self._inplace:
+ self._backupfilename = (
+ self._filename + (self._backup or ".bak"))
+ try:
+ os.unlink(self._backupfilename)
+ except OSError:
+ pass
+ # The next few lines may raise OSError
+ os.rename(self._filename, self._backupfilename)
+ self._file = open(self._backupfilename, self._mode)
+ try:
+ perm = os.fstat(self._file.fileno()).st_mode
+ except OSError:
+ self._output = open(self._filename, "w")
else:
- return ''
- self._filename = self._files[0]
- self._files = self._files[1:]
- self._filelineno = 0
- self._file = None
- self._isstdin = False
- self._backupfilename = 0
- if self._filename == '-':
- self._filename = '<stdin>'
- if 'b' in self._mode:
- self._file = getattr(sys.stdin, 'buffer', sys.stdin)
- else:
- self._file = sys.stdin
- self._isstdin = True
- else:
- if self._inplace:
- self._backupfilename = (
- self._filename + (self._backup or ".bak"))
+ mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC
+ if hasattr(os, 'O_BINARY'):
+ mode |= os.O_BINARY
+
+ fd = os.open(self._filename, mode, perm)
+ self._output = os.fdopen(fd, "w")
try:
- os.unlink(self._backupfilename)
+ if hasattr(os, 'chmod'):
+ os.chmod(self._filename, perm)
except OSError:
pass
- # The next few lines may raise OSError
- os.rename(self._filename, self._backupfilename)
- self._file = open(self._backupfilename, self._mode)
- try:
- perm = os.fstat(self._file.fileno()).st_mode
- except OSError:
- self._output = open(self._filename, "w")
- else:
- mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC
- if hasattr(os, 'O_BINARY'):
- mode |= os.O_BINARY
-
- fd = os.open(self._filename, mode, perm)
- self._output = os.fdopen(fd, "w")
- try:
- if hasattr(os, 'chmod'):
- os.chmod(self._filename, perm)
- except OSError:
- pass
- self._savestdout = sys.stdout
- sys.stdout = self._output
+ self._savestdout = sys.stdout
+ sys.stdout = self._output
+ else:
+ # This may raise OSError
+ if self._openhook:
+ self._file = self._openhook(self._filename, self._mode)
else:
- # This may raise OSError
- if self._openhook:
- self._file = self._openhook(self._filename, self._mode)
- else:
- self._file = open(self._filename, self._mode)
- self._buffer = self._file.readlines(self._bufsize)
- self._bufindex = 0
- if not self._buffer:
- self.nextfile()
- # Recursive call
- return self.readline()
+ self._file = open(self._filename, self._mode)
+ self._readline = self._file.readline
+ return self._readline()
def filename(self):
return self._filename
def lineno(self):
- return self._lineno
+ return self._startlineno + self._filelineno
def filelineno(self):
return self._filelineno
diff --git a/Lib/test/test_fileinput.py b/Lib/test/test_fileinput.py
index ad8130468b..bdf4252d8d 100644
--- a/Lib/test/test_fileinput.py
+++ b/Lib/test/test_fileinput.py
@@ -47,6 +47,42 @@ def remove_tempfiles(*names):
if name:
safe_unlink(name)
+class LineReader:
+
+ def __init__(self):
+ self._linesread = []
+
+ @property
+ def linesread(self):
+ try:
+ return self._linesread[:]
+ finally:
+ self._linesread = []
+
+ def openhook(self, filename, mode):
+ self.it = iter(filename.splitlines(True))
+ return self
+
+ def readline(self, size=None):
+ line = next(self.it, '')
+ self._linesread.append(line)
+ return line
+
+ def readlines(self, hint=-1):
+ lines = []
+ size = 0
+ while True:
+ line = self.readline()
+ if not line:
+ return lines
+ lines.append(line)
+ size += len(line)
+ if size >= hint:
+ return lines
+
+ def close(self):
+ pass
+
class BufferSizesTests(unittest.TestCase):
def test_buffer_sizes(self):
# First, run the tests with default and teeny buffer size.
@@ -57,7 +93,11 @@ class BufferSizesTests(unittest.TestCase):
t2 = writeTmp(2, ["Line %s of file 2\n" % (i+1) for i in range(10)])
t3 = writeTmp(3, ["Line %s of file 3\n" % (i+1) for i in range(5)])
t4 = writeTmp(4, ["Line %s of file 4\n" % (i+1) for i in range(1)])
- self.buffer_size_test(t1, t2, t3, t4, bs, round)
+ if bs:
+ with self.assertWarns(DeprecationWarning):
+ self.buffer_size_test(t1, t2, t3, t4, bs, round)
+ else:
+ self.buffer_size_test(t1, t2, t3, t4, bs, round)
finally:
remove_tempfiles(t1, t2, t3, t4)
@@ -290,7 +330,7 @@ class FileInputTests(unittest.TestCase):
self.addCleanup(safe_unlink, TESTFN)
with FileInput(files=TESTFN,
- openhook=hook_encoded('ascii'), bufsize=8) as fi:
+ openhook=hook_encoded('ascii')) as fi:
try:
self.assertEqual(fi.readline(), 'A\n')
self.assertEqual(fi.readline(), 'B\n')
@@ -458,6 +498,38 @@ class FileInputTests(unittest.TestCase):
self.assertEqual(result, -1, "fileno() should return -1")
+ def test_readline_buffering(self):
+ src = LineReader()
+ with FileInput(files=['line1\nline2', 'line3\n'],
+ openhook=src.openhook) as fi:
+ self.assertEqual(src.linesread, [])
+ self.assertEqual(fi.readline(), 'line1\n')
+ self.assertEqual(src.linesread, ['line1\n'])
+ self.assertEqual(fi.readline(), 'line2')
+ self.assertEqual(src.linesread, ['line2'])
+ self.assertEqual(fi.readline(), 'line3\n')
+ self.assertEqual(src.linesread, ['', 'line3\n'])
+ self.assertEqual(fi.readline(), '')
+ self.assertEqual(src.linesread, [''])
+ self.assertEqual(fi.readline(), '')
+ self.assertEqual(src.linesread, [])
+
+ def test_iteration_buffering(self):
+ src = LineReader()
+ with FileInput(files=['line1\nline2', 'line3\n'],
+ openhook=src.openhook) as fi:
+ self.assertEqual(src.linesread, [])
+ self.assertEqual(next(fi), 'line1\n')
+ self.assertEqual(src.linesread, ['line1\n'])
+ self.assertEqual(next(fi), 'line2')
+ self.assertEqual(src.linesread, ['line2'])
+ self.assertEqual(next(fi), 'line3\n')
+ self.assertEqual(src.linesread, ['', 'line3\n'])
+ self.assertRaises(StopIteration, next, fi)
+ self.assertEqual(src.linesread, [''])
+ self.assertRaises(StopIteration, next, fi)
+ self.assertEqual(src.linesread, [])
+
class MockFileInput:
"""A class that mocks out fileinput.FileInput for use during unit tests"""
@@ -917,8 +989,7 @@ class Test_hook_encoded(unittest.TestCase):
class MiscTest(unittest.TestCase):
def test_all(self):
- blacklist = {'DEFAULT_BUFSIZE'}
- support.check__all__(self, fileinput, blacklist=blacklist)
+ support.check__all__(self, fileinput)
if __name__ == "__main__":
diff --git a/Misc/NEWS b/Misc/NEWS
index 734c967aca..cb04c90fd0 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -201,6 +201,9 @@ Core and Builtins
Library
-------
+- Issue #15068: Got rid of excessive buffering in fileinput.
+ The bufsize parameter is now deprecated and ignored.
+
- Issue #19475: Added an optional argument timespec to the datetime
isoformat() method to choose the precision of the time component.