summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyan Petrello <lists@ryanpetrello.com>2018-01-15 16:43:10 -0500
committerRyan Petrello <lists@ryanpetrello.com>2018-02-05 16:07:38 -0500
commitfd7332f59e003a8ee658647f883d4b03018c5e5b (patch)
tree86f195b5ad8d0ff1ec5663c19c3b9de81c9334e5
parent28e82cda5056df20ec55995ea7fe66abd346f6bb (diff)
downloadpexpect-fd7332f59e003a8ee658647f883d4b03018c5e5b.tar.gz
optimize pty buffering and searching
Python strings are slow and expensive as buffers because they're immutable; replace the output buffer with a StringIO/BytesIO object see: https://github.com/pexpect/pexpect/issues/438
-rw-r--r--pexpect/_async.py4
-rw-r--r--pexpect/expect.py36
-rw-r--r--pexpect/pty_spawn.py8
-rw-r--r--pexpect/spawnbase.py18
-rwxr-xr-xtests/test_expect.py8
-rwxr-xr-xtests/test_performance.py6
6 files changed, 59 insertions, 21 deletions
diff --git a/pexpect/_async.py b/pexpect/_async.py
index 3a1a1ad..7a02ed6 100644
--- a/pexpect/_async.py
+++ b/pexpect/_async.py
@@ -8,7 +8,7 @@ def expect_async(expecter, timeout=None):
# First process data that was previously read - if it maches, we don't need
# async stuff.
previously_read = expecter.spawn.buffer
- expecter.spawn.buffer = expecter.spawn.string_type()
+ expecter.spawn._buffer = expecter.spawn.buffer_type()
idx = expecter.new_data(previously_read)
if idx is not None:
return idx
@@ -55,7 +55,7 @@ class PatternWaiter(asyncio.Protocol):
spawn._log(s, 'read')
if self.fut.done():
- spawn.buffer += s
+ spawn._buffer.write(s)
return
try:
diff --git a/pexpect/expect.py b/pexpect/expect.py
index 3080d6c..4aac640 100644
--- a/pexpect/expect.py
+++ b/pexpect/expect.py
@@ -9,30 +9,42 @@ class Expecter(object):
if searchwindowsize == -1:
searchwindowsize = spawn.searchwindowsize
self.searchwindowsize = searchwindowsize
-
+
def new_data(self, data):
spawn = self.spawn
searcher = self.searcher
- incoming = spawn.buffer + data
- freshlen = len(data)
- index = searcher.search(incoming, freshlen, self.searchwindowsize)
+ pos = spawn._buffer.tell()
+ spawn._buffer.write(data)
+
+ # determine which chunk of data to search; if a windowsize is
+ # specified, this is the *new* data + the preceding <windowsize> bytes
+ if self.searchwindowsize:
+ spawn._buffer.seek(max(0, pos - self.searchwindowsize))
+ window = spawn._buffer.read(self.searchwindowsize + len(data))
+ else:
+ # otherwise, search the whole buffer (really slow for large datasets)
+ window = spawn.buffer
+ index = searcher.search(window, len(data))
if index >= 0:
- spawn.buffer = incoming[searcher.end:]
- spawn.before = incoming[: searcher.start]
- spawn.after = incoming[searcher.start: searcher.end]
+ value = spawn.buffer
+ spawn._buffer = spawn.buffer_type()
+ spawn._buffer.write(value[searcher.end:])
+ spawn.before = value[: searcher.start]
+ spawn.after = value[searcher.start: searcher.end]
spawn.match = searcher.match
spawn.match_index = index
# Found a match
return index
-
- spawn.buffer = incoming
-
+ elif self.searchwindowsize:
+ spawn._buffer = spawn.buffer_type()
+ spawn._buffer.write(window)
+
def eof(self, err=None):
spawn = self.spawn
spawn.before = spawn.buffer
- spawn.buffer = spawn.string_type()
+ spawn._buffer = spawn.buffer_type()
spawn.after = EOF
index = self.searcher.eof_index
if index >= 0:
@@ -83,7 +95,7 @@ class Expecter(object):
try:
incoming = spawn.buffer
- spawn.buffer = spawn.string_type() # Treat buffer as new data
+ spawn._buffer = spawn.buffer_type()
while True:
idx = self.new_data(incoming)
# Keep reading until exception or return.
diff --git a/pexpect/pty_spawn.py b/pexpect/pty_spawn.py
index 4afda6a..9e012e7 100644
--- a/pexpect/pty_spawn.py
+++ b/pexpect/pty_spawn.py
@@ -205,10 +205,8 @@ class spawn(SpawnBase):
s.append(repr(self))
s.append('command: ' + str(self.command))
s.append('args: %r' % (self.args,))
- s.append('buffer (last 100 chars): %r' % (
- self.buffer[-100:] if self.buffer else self.buffer,))
- s.append('before (last 100 chars): %r' % (
- self.before[-100:] if self.before else self.before,))
+ s.append('buffer (last 100 chars): %r' % self.buffer[-100:])
+ s.append('before (last 100 chars): %r' % self.before[-100:] if self.before else '')
s.append('after: %r' % (self.after,))
s.append('match: %r' % (self.match,))
s.append('match_index: ' + str(self.match_index))
@@ -740,7 +738,7 @@ class spawn(SpawnBase):
# Flush the buffer.
self.write_to_stdout(self.buffer)
self.stdout.flush()
- self.buffer = self.string_type()
+ self._buffer = self.buffer_type()
mode = tty.tcgetattr(self.STDIN_FILENO)
tty.setraw(self.STDIN_FILENO)
if escape_character is not None and PY3:
diff --git a/pexpect/spawnbase.py b/pexpect/spawnbase.py
index 9cdcba6..0bb28d1 100644
--- a/pexpect/spawnbase.py
+++ b/pexpect/spawnbase.py
@@ -1,3 +1,4 @@
+from io import StringIO, BytesIO
import codecs
import os
import sys
@@ -57,8 +58,6 @@ class SpawnBase(object):
self.logfile_send = None
# max bytes to read at one time into buffer
self.maxread = maxread
- # This is the read buffer. See maxread.
- self.buffer = bytes() if (encoding is None) else text_type()
# Data before searchwindowsize point is preserved, but not searched.
self.searchwindowsize = searchwindowsize
# Delay used before sending data to child. Time in seconds.
@@ -87,6 +86,7 @@ class SpawnBase(object):
# bytes mode (accepts some unicode for backwards compatibility)
self._encoder = self._decoder = _NullCoder()
self.string_type = bytes
+ self.buffer_type = BytesIO
self.crlf = b'\r\n'
if PY3:
self.allowed_string_types = (bytes, str)
@@ -107,6 +107,7 @@ class SpawnBase(object):
self._encoder = codecs.getincrementalencoder(encoding)(codec_errors)
self._decoder = codecs.getincrementaldecoder(encoding)(codec_errors)
self.string_type = text_type
+ self.buffer_type = StringIO
self.crlf = u'\r\n'
self.allowed_string_types = (text_type, )
if PY3:
@@ -117,6 +118,8 @@ class SpawnBase(object):
self.write_to_stdout = sys.stdout.write
# storage for async transport
self.async_pw_transport = None
+ # This is the read buffer. See maxread.
+ self._buffer = self.buffer_type()
def _log(self, s, direction):
if self.logfile is not None:
@@ -140,6 +143,17 @@ class SpawnBase(object):
return s.encode('utf-8')
return s
+ def _get_buffer(self):
+ return self._buffer.getvalue()
+
+ def _set_buffer(self, value):
+ self._buffer = self.buffer_type()
+ self._buffer.write(value)
+
+ # This property is provided for backwards compatability (self.buffer used
+ # to be a string/bytes object)
+ buffer = property(_get_buffer, _set_buffer)
+
def read_nonblocking(self, size=1, timeout=None):
"""This reads data from the file descriptor.
diff --git a/tests/test_expect.py b/tests/test_expect.py
index dcf059b..ec55d43 100755
--- a/tests/test_expect.py
+++ b/tests/test_expect.py
@@ -400,6 +400,14 @@ class ExpectTestCase (PexpectTestCase.PexpectTestCase):
else:
self.fail ('Expected an EOF exception.')
+ def test_buffer_interface(self):
+ p = pexpect.spawn('cat', timeout=5)
+ p.sendline (b'Hello')
+ p.expect (b'Hello')
+ assert len(p.buffer)
+ p.buffer = b'Testing'
+ p.sendeof ()
+
def _before_after(self, p):
p.timeout = 5
diff --git a/tests/test_performance.py b/tests/test_performance.py
index 7be0cf6..63778af 100755
--- a/tests/test_performance.py
+++ b/tests/test_performance.py
@@ -23,6 +23,7 @@ from __future__ import print_function
import unittest, time, sys
import platform
import pexpect
+import re
from . import PexpectTestCase
# This isn't exactly a unit test, but it fits in nicely with the rest of the tests.
@@ -101,6 +102,11 @@ class PerformanceTestCase (PexpectTestCase.PexpectTestCase):
self.faster_range(100000)
print("100000 calls to faster_range:", (time.time() - start_time))
+ def test_large_stdout_stream(self):
+ e = pexpect.spawn('openssl rand -base64 {}'.format(1024*1024*25), searchwindowsize=1000)
+ resp = e.expect(['Password:', pexpect.EOF, pexpect.TIMEOUT])
+ assert resp == 1 # index 1 == EOF
+
if __name__ == "__main__":
unittest.main()