summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjquast <contact@jeffquast.com>2014-05-24 23:29:57 -0700
committerjquast <contact@jeffquast.com>2014-05-24 23:34:48 -0700
commit78b16ad8f06ce20b5274a7a7c17f3a8a042edd46 (patch)
tree9bb2b83b7dbe62bcbfffbe6a9da425e9e3de116a
parentc4359ad421a6d75a4b8859f7fdcb21c568173865 (diff)
downloadpexpect-78b16ad8f06ce20b5274a7a7c17f3a8a042edd46.tar.gz
Issue #20: Strange EOF/TIMEOUT behavior
Problem ------- When running a subprocess that writes a lot of data to stdout, then exits, it is possible to read all data from stdout, then, when calling waitpid(2), to be told the subprocess has not yet exited. pexpect would then call read_nonblocking and block for a full 30 seconds, because there remains no more data on stdout. Solution -------- Add new parameter, poll_exit to read_nonblocking so that select(2) is called at shorter intervals than previously done, but still up until timeout specified, doing a poll for waitpid(2) at regular intervals. This ensures that no longer than (default, 0.15) seconds elapse, instead of the default of 30. Testing ------- Includes travis changes for python3.4, also disabling use_site_packages which doesn't appear supported any longer (most builds have been failing for some time). Additionally, a 'death' flag for checking select(2) after waitpid(2) was necessary only for python3.4 on travis, i could not reproduce locally. All existing unit tests, of course. However, as a race condition, it is not possible to reliably reproduce, it may require anywhere from 1 to 5 minutes of looping to cause it to reproduce, so it was not put into base tests. A simple program was authored:: TIMEOUT=1 def test_2(): while True: stime = time.time() child = pexpect.spawn('ls -lR /dev', timeout=TIMEOUT) child.expect(pexpect.EOF) elapsed = time.time() - stime print('got eof in {0:0.2f} in pid {1}' .format(elapsed, child.pid)) assert math.floor(elapsed) < TIMEOUT, elapsed Without this change, this program will raise an exception after several dozen runs, hitting the race condition. With this change, I have been executing this program for several thousand iterations without failed assertion. PLEASE CONFER ------------- @takluyver, with this change, I think we may also delete the self.__irix_hack and its surrounding hack -- and even the first self.isalive() check just before this change, for "some systems such as Solaris" without any penalty in performance. What do you think?
-rw-r--r--.travis.yml6
-rw-r--r--doc/history.rst7
-rw-r--r--pexpect/__init__.py42
-rwxr-xr-xtests/test_expect.py1
4 files changed, 43 insertions, 13 deletions
diff --git a/.travis.yml b/.travis.yml
index 15ba2a2..2690bce 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,13 +5,12 @@ python:
- 2.7
- 3.2
- 3.3
+ - 3.4
- pypy
-virtualenv:
- system_site_packages: true
-
before_install:
- sudo apt-get install python-yaml python3-yaml
+
install:
- export PYTHONIOENCODING=UTF8
- python setup.py install
@@ -19,6 +18,7 @@ install:
before_script:
- source test.env
+
script:
- coverage run tools/testall.py
diff --git a/doc/history.rst b/doc/history.rst
index 57d2cdf..70fb6f6 100644
--- a/doc/history.rst
+++ b/doc/history.rst
@@ -4,6 +4,13 @@ History
Releases
--------
+Version 3.3
+```````````
+
+* Fix a race codntion between waitpid(2) and select(2) on some operating systems
+ (OSX) where ``expect(EOF)`` might delay up to ``timeout`` even though all
+ output has already been received.
+
Version 3.2
```````````
diff --git a/pexpect/__init__.py b/pexpect/__init__.py
index 942d19f..bdae9a0 100644
--- a/pexpect/__init__.py
+++ b/pexpect/__init__.py
@@ -851,7 +851,7 @@ class spawn(object):
second_log.write(s)
second_log.flush()
- def read_nonblocking(self, size=1, timeout=-1):
+ def read_nonblocking(self, size=1, timeout=-1, poll_exit=0.15):
'''This reads at most size characters from the child application. It
includes a timeout. If the read does not complete within the timeout
period then a TIMEOUT exception is raised. If the end of file is read
@@ -869,6 +869,11 @@ class spawn(object):
available right away then one character will be returned immediately.
It will not wait for 30 seconds for another 99 characters to come in.
+ Keyword argument poll_exit refers to the interval that select.select()
+ is interrupted to check if the child application has exited, which is
+ necessary to prevent very long blocking calls after the child process
+ has already existed on some systems (at least OSX and maybe Irix).
+
This is a wrapper around os.read(). It uses select.select() to
implement the timeout. '''
@@ -877,6 +882,8 @@ class spawn(object):
if timeout == -1:
timeout = self.timeout
+ elif timeout is not None:
+ poll_exit = min(poll_exit, timeout)
# Note that some systems such as Solaris do not give an EOF when
# the child dies. In fact, you can still try to read
@@ -898,17 +905,32 @@ class spawn(object):
self.flag_eof = True
raise EOF('End Of File (EOF). Slow platform.')
- r, w, e = self.__select([self.child_fd], [], [], timeout)
-
- if not r:
- if not self.isalive():
- # Some platforms, such as Irix, will claim that their
- # processes are alive; timeout on the select; and
- # then finally admit that they are not alive.
+ stime = time.time()
+ death = False
+ while True:
+ r, w, e = self.__select([self.child_fd], [], [], poll_exit)
+ elapsed = time.time() - stime
+ if r:
+ break
+ elif not death and not self.isalive():
+ # Under Irix and OSX, even after all output from child_fd has
+ # been received, the parent process has not yet received a
+ # (pid, status) from waitpid(2) until at least one additional
+ # call to select(2) is issued. Therefor, we poll every
+ # `poll_exit` interval for waitpid() which may cause EOF.
+ death = True
+ # poll at least just one more time for output
+ poll_exit = 0
+ elif death:
self.flag_eof = True
raise EOF('End of File (EOF). Very slow platform.')
- else:
- raise TIMEOUT('Timeout exceeded.')
+ elif timeout is not None:
+ if elapsed > timeout:
+ raise TIMEOUT('Timeout exceeded.')
+ else:
+ # if poll_exit is less than the time remaining,
+ # use only time_remaining for next call to select(2)
+ poll_exit = min(timeout - elapsed, poll_exit)
if self.child_fd in r:
try:
diff --git a/tests/test_expect.py b/tests/test_expect.py
index d5d0c9c..bcaed82 100755
--- a/tests/test_expect.py
+++ b/tests/test_expect.py
@@ -460,6 +460,7 @@ class ExpectTestCase (PexpectTestCase.PexpectTestCase):
self.assertEqual(expect([b'1, 2, 3', b'2,']), 1)
def test_greed(self):
+ # fails on travis on occasion .. ?
p = pexpect.spawn(self.PYTHONBIN + ' list100.py')
self._greed(p.expect)