diff options
author | Ryan Petrello <lists@ryanpetrello.com> | 2018-01-15 16:43:10 -0500 |
---|---|---|
committer | Ryan Petrello <lists@ryanpetrello.com> | 2018-02-05 16:07:38 -0500 |
commit | fd7332f59e003a8ee658647f883d4b03018c5e5b (patch) | |
tree | 86f195b5ad8d0ff1ec5663c19c3b9de81c9334e5 | |
parent | 28e82cda5056df20ec55995ea7fe66abd346f6bb (diff) | |
download | pexpect-fd7332f59e003a8ee658647f883d4b03018c5e5b.tar.gz |
optimize pty buffering and searching
Python strings are slow and expensive as buffers because they're
immutable; replace the output buffer with a StringIO/BytesIO
object
see: https://github.com/pexpect/pexpect/issues/438
-rw-r--r-- | pexpect/_async.py | 4 | ||||
-rw-r--r-- | pexpect/expect.py | 36 | ||||
-rw-r--r-- | pexpect/pty_spawn.py | 8 | ||||
-rw-r--r-- | pexpect/spawnbase.py | 18 | ||||
-rwxr-xr-x | tests/test_expect.py | 8 | ||||
-rwxr-xr-x | tests/test_performance.py | 6 |
6 files changed, 59 insertions, 21 deletions
diff --git a/pexpect/_async.py b/pexpect/_async.py index 3a1a1ad..7a02ed6 100644 --- a/pexpect/_async.py +++ b/pexpect/_async.py @@ -8,7 +8,7 @@ def expect_async(expecter, timeout=None): # First process data that was previously read - if it maches, we don't need # async stuff. previously_read = expecter.spawn.buffer - expecter.spawn.buffer = expecter.spawn.string_type() + expecter.spawn._buffer = expecter.spawn.buffer_type() idx = expecter.new_data(previously_read) if idx is not None: return idx @@ -55,7 +55,7 @@ class PatternWaiter(asyncio.Protocol): spawn._log(s, 'read') if self.fut.done(): - spawn.buffer += s + spawn._buffer.write(s) return try: diff --git a/pexpect/expect.py b/pexpect/expect.py index 3080d6c..4aac640 100644 --- a/pexpect/expect.py +++ b/pexpect/expect.py @@ -9,30 +9,42 @@ class Expecter(object): if searchwindowsize == -1: searchwindowsize = spawn.searchwindowsize self.searchwindowsize = searchwindowsize - + def new_data(self, data): spawn = self.spawn searcher = self.searcher - incoming = spawn.buffer + data - freshlen = len(data) - index = searcher.search(incoming, freshlen, self.searchwindowsize) + pos = spawn._buffer.tell() + spawn._buffer.write(data) + + # determine which chunk of data to search; if a windowsize is + # specified, this is the *new* data + the preceding <windowsize> bytes + if self.searchwindowsize: + spawn._buffer.seek(max(0, pos - self.searchwindowsize)) + window = spawn._buffer.read(self.searchwindowsize + len(data)) + else: + # otherwise, search the whole buffer (really slow for large datasets) + window = spawn.buffer + index = searcher.search(window, len(data)) if index >= 0: - spawn.buffer = incoming[searcher.end:] - spawn.before = incoming[: searcher.start] - spawn.after = incoming[searcher.start: searcher.end] + value = spawn.buffer + spawn._buffer = spawn.buffer_type() + spawn._buffer.write(value[searcher.end:]) + spawn.before = value[: searcher.start] + spawn.after = value[searcher.start: searcher.end] spawn.match = searcher.match spawn.match_index = index # Found a match return index - - spawn.buffer = incoming - + elif self.searchwindowsize: + spawn._buffer = spawn.buffer_type() + spawn._buffer.write(window) + def eof(self, err=None): spawn = self.spawn spawn.before = spawn.buffer - spawn.buffer = spawn.string_type() + spawn._buffer = spawn.buffer_type() spawn.after = EOF index = self.searcher.eof_index if index >= 0: @@ -83,7 +95,7 @@ class Expecter(object): try: incoming = spawn.buffer - spawn.buffer = spawn.string_type() # Treat buffer as new data + spawn._buffer = spawn.buffer_type() while True: idx = self.new_data(incoming) # Keep reading until exception or return. diff --git a/pexpect/pty_spawn.py b/pexpect/pty_spawn.py index 4afda6a..9e012e7 100644 --- a/pexpect/pty_spawn.py +++ b/pexpect/pty_spawn.py @@ -205,10 +205,8 @@ class spawn(SpawnBase): s.append(repr(self)) s.append('command: ' + str(self.command)) s.append('args: %r' % (self.args,)) - s.append('buffer (last 100 chars): %r' % ( - self.buffer[-100:] if self.buffer else self.buffer,)) - s.append('before (last 100 chars): %r' % ( - self.before[-100:] if self.before else self.before,)) + s.append('buffer (last 100 chars): %r' % self.buffer[-100:]) + s.append('before (last 100 chars): %r' % self.before[-100:] if self.before else '') s.append('after: %r' % (self.after,)) s.append('match: %r' % (self.match,)) s.append('match_index: ' + str(self.match_index)) @@ -740,7 +738,7 @@ class spawn(SpawnBase): # Flush the buffer. self.write_to_stdout(self.buffer) self.stdout.flush() - self.buffer = self.string_type() + self._buffer = self.buffer_type() mode = tty.tcgetattr(self.STDIN_FILENO) tty.setraw(self.STDIN_FILENO) if escape_character is not None and PY3: diff --git a/pexpect/spawnbase.py b/pexpect/spawnbase.py index 9cdcba6..0bb28d1 100644 --- a/pexpect/spawnbase.py +++ b/pexpect/spawnbase.py @@ -1,3 +1,4 @@ +from io import StringIO, BytesIO import codecs import os import sys @@ -57,8 +58,6 @@ class SpawnBase(object): self.logfile_send = None # max bytes to read at one time into buffer self.maxread = maxread - # This is the read buffer. See maxread. - self.buffer = bytes() if (encoding is None) else text_type() # Data before searchwindowsize point is preserved, but not searched. self.searchwindowsize = searchwindowsize # Delay used before sending data to child. Time in seconds. @@ -87,6 +86,7 @@ class SpawnBase(object): # bytes mode (accepts some unicode for backwards compatibility) self._encoder = self._decoder = _NullCoder() self.string_type = bytes + self.buffer_type = BytesIO self.crlf = b'\r\n' if PY3: self.allowed_string_types = (bytes, str) @@ -107,6 +107,7 @@ class SpawnBase(object): self._encoder = codecs.getincrementalencoder(encoding)(codec_errors) self._decoder = codecs.getincrementaldecoder(encoding)(codec_errors) self.string_type = text_type + self.buffer_type = StringIO self.crlf = u'\r\n' self.allowed_string_types = (text_type, ) if PY3: @@ -117,6 +118,8 @@ class SpawnBase(object): self.write_to_stdout = sys.stdout.write # storage for async transport self.async_pw_transport = None + # This is the read buffer. See maxread. + self._buffer = self.buffer_type() def _log(self, s, direction): if self.logfile is not None: @@ -140,6 +143,17 @@ class SpawnBase(object): return s.encode('utf-8') return s + def _get_buffer(self): + return self._buffer.getvalue() + + def _set_buffer(self, value): + self._buffer = self.buffer_type() + self._buffer.write(value) + + # This property is provided for backwards compatability (self.buffer used + # to be a string/bytes object) + buffer = property(_get_buffer, _set_buffer) + def read_nonblocking(self, size=1, timeout=None): """This reads data from the file descriptor. diff --git a/tests/test_expect.py b/tests/test_expect.py index dcf059b..ec55d43 100755 --- a/tests/test_expect.py +++ b/tests/test_expect.py @@ -400,6 +400,14 @@ class ExpectTestCase (PexpectTestCase.PexpectTestCase): else: self.fail ('Expected an EOF exception.') + def test_buffer_interface(self): + p = pexpect.spawn('cat', timeout=5) + p.sendline (b'Hello') + p.expect (b'Hello') + assert len(p.buffer) + p.buffer = b'Testing' + p.sendeof () + def _before_after(self, p): p.timeout = 5 diff --git a/tests/test_performance.py b/tests/test_performance.py index 7be0cf6..63778af 100755 --- a/tests/test_performance.py +++ b/tests/test_performance.py @@ -23,6 +23,7 @@ from __future__ import print_function import unittest, time, sys import platform import pexpect +import re from . import PexpectTestCase # This isn't exactly a unit test, but it fits in nicely with the rest of the tests. @@ -101,6 +102,11 @@ class PerformanceTestCase (PexpectTestCase.PexpectTestCase): self.faster_range(100000) print("100000 calls to faster_range:", (time.time() - start_time)) + def test_large_stdout_stream(self): + e = pexpect.spawn('openssl rand -base64 {}'.format(1024*1024*25), searchwindowsize=1000) + resp = e.expect(['Password:', pexpect.EOF, pexpect.TIMEOUT]) + assert resp == 1 # index 1 == EOF + if __name__ == "__main__": unittest.main() |