1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
|
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# vi:ts=4:et
# Retrieves a single URL using the CurlMulti.socket_action calls, using
# select as the I/O polling mechanism:
#
# First, create a Multi object, and set socket and timer callbacks on it.
# Observed side effect: this causes the timer callback to be immediately
# invoked with the zero value for the timeout.
#
# The timer callback is very simple - it stores the timeout value passed
# into it in the global state for future use by the select calls that
# we will be making.
#
# The socket callback is more complicated. Its job is to add and remove
# socket handles to/from the data structure that we use for waiting for
# activity on them. The callback is invoked with a socket handle and the
# needed action (add for reading, add for writing or remove).
# Since this script utilizes the select call for waiting for activity,
# the socket callback updates the list of sockets which we should be
# polling for readability and the list that we should be polling for
# writability, which are then passed to the select call (and both of the
# sets are passed as the sockets to wait for errors/exceptions on).
#
# Next, create a Curl object (mapping to a libcurl easy handle), set the URL
# we are going to retrieve as well as any transfer options. This script sets
# the timeout to 5 seconds to be able to test failing transfers easily.
#
# Add the Curl object to the Multi object.
#
# Invoke Multi.socket_action to start the retrieval operation.
# Observed side effect: this causes the timer callback to be invoked
# with a greater than zero value for the timeout.
#
# By now we should have initialized our own state, which this script does
# prior to invoking any libcurl functions. Importantly, the state includes
# the timeout value that was communicated to us by libcurl.
#
# Run a loop which waits for activity on any of the sockets used by libcurl.
# The sockets are set that the socket callback has produced as of the
# present moment; the timeout is the most recent timeout value received by
# the timer callback.
#
# Importantly, the loop should not simply sleep for the entire
# timeout interval, as that would cause the transfer to take a very long time.
# It is *required* to use something like a select call to wait for activity
# on any of the sockets currently active for *up to* the timeout value.
#
# The loop terminates when the number of active transfers (handles in libcurl
# parlance) reaches zero. This number is provided by each socket_action
# call, which is why each call (even the ones that are called due to
# timeout being reached, as opposed to any socket activity) must update
# the number of running handles.
#
# After the loop terminates, clean up everything: remove the easy object from
# the multi object, close the easy object, close the multi object.
import sys, select
import pycurl
from io import BytesIO
if len(sys.argv) > 1:
url = sys.argv[1]
else:
url = 'https://www.python.org'
state = {
'rlist': [],
'wlist': [],
'running': None,
'timeout': None,
'result': None,
# If the transfer failed, code and msg will be filled in.
'code': None,
'msg': None,
}
def socket_fn(what, sock_fd, multi, socketp):
if what == pycurl.POLL_IN or what == pycurl.POLL_INOUT:
state['rlist'].append(sock_fd)
elif what == pycurl.POLL_OUT or what == pycurl.POLL_INOUT:
state['wlist'].append(sock_fd)
elif what == pycurl.POLL_REMOVE:
if sock_fd in state['rlist']:
state['rlist'].remove(sock_fd)
if sock_fd in state['wlist']:
state['wlist'].remove(sock_fd)
else:
raise Exception("Unknown value of what: %s" % what)
def work(timeout):
rready, wready, xready = select.select(
state['rlist'], state['wlist'], set(state['rlist']) | set(state['wlist']), timeout)
if len(rready) == 0 and len(wready) == 0 and len(xready) == 0:
# The number of running handles must be updated after each
# call to socket_action, which includes those with the SOCKET_TIMEOUT
# argument (otherwise e.g. a transfer which failed due to
# exceeding the connection timeout would hang).
_, running = multi.socket_action(pycurl.SOCKET_TIMEOUT, 0)
else:
for sock_fd in rready:
# socket_action returns a tuple whose first element is always the
# CURLE_OK value (0), ignore it and use the second element only.
_, running = multi.socket_action(sock_fd, pycurl.CSELECT_IN)
for sock_fd in wready:
_, running = multi.socket_action(sock_fd, pycurl.CSELECT_OUT)
for sock_fd in xready:
_, running = multi.socket_action(sock_fd, pycurl.CSELECT_ERR)
# Since we are only performing a single transfer, we could call
# Multi.info_read after the I/O loop terminates.
# In practice, you would probably use socket_action with multiple
# transfers, and you may want to be notified about transfer completion
# as soon as the result is available.
if state['running'] is not None and running != state['running']:
# Some handle has completed.
#
# Note that socket_action was potentially called multiple times
# in this function (e.g. if both a read handle became ready and a
# different write handle became ready), therefore it is possible
# that multiple handles have completed. In this particular script
# we are only performing a single transfer (one
# Curl object / easy handle), therefore only one transfer can ever
# possibly complete.
qmsg, successes, failures = multi.info_read()
# We should have retrieved all of the available statuses, leaving
# none in the queue.
assert qmsg == 0
# We have only one transfer.
assert len(successes) == 1 and len(failures) == 0 or \
len(successes) == 0 and len(failures) == 1
if successes:
state['result'] = True
if failures:
state['result'] = False
# The failures array contains tuples of
# (easy object, CURLE code, error message).
_easy, state['code'], state['msg'] = failures[0]
state['running'] = running
def timer_fn(timeout_ms):
if timeout_ms < 0:
# libcurl passes a negative timeout value when no further
# calls should be made.
state['timeout'] = None
state['timeout'] = timeout_ms / 1000.0
multi = pycurl.CurlMulti()
multi.setopt(pycurl.M_SOCKETFUNCTION, socket_fn)
multi.setopt(pycurl.M_TIMERFUNCTION, timer_fn)
easy = pycurl.Curl()
easy.setopt(pycurl.URL, url)
# Uncomment to see what libcurl is doing throughout the transfer.
#easy.setopt(pycurl.VERBOSE, 1)
easy.setopt(pycurl.CONNECTTIMEOUT, 5)
easy.setopt(pycurl.LOW_SPEED_TIME, 5)
easy.setopt(pycurl.LOW_SPEED_LIMIT, 1)
_io = BytesIO()
easy.setopt(pycurl.WRITEDATA, _io)
multi.add_handle(easy)
handles = multi.socket_action(pycurl.SOCKET_TIMEOUT, 0)
# This should invoke the timer function with a timeout value.
while True:
if state['running'] == 0:
break
else:
# By the time we get here, timer function should have been already
# invoked at least once so that we have a libcurl-supplied
# timeout value. But in case this hasn't happened, default the timeout
# to 1 second.
timeout = state['timeout']
if timeout is None:
raise Exception('Need to poll for I/O but the timeout is not set!')
work(timeout)
multi.remove_handle(easy)
easy.close()
multi.close()
# Uncomment to print the retrieved contents.
#print(_io.getvalue().decode())
if state['result'] is None:
raise Exception('Script finished without a result!')
if state['result']:
print('Transfer successful, retrieved %d bytes' % len(_io.getvalue()))
else:
print('Transfer failed with code %d: %s' % (state['code'], state['msg']))
|