summaryrefslogtreecommitdiff
path: root/poll/unix/z_asio.c
blob: 6537d490948680530bfef362e91c0c13a5784862 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 *
 ******************************************************************************
 *
 * This implementation is based on a design by John Brooks (IBM Pok) which uses
 * the z/OS sockets async i/o facility.  When a
 * socket is added to the pollset, an async poll is issued for that individual
 * socket.  It specifies that the kernel should send an IPC message when the
 * socket becomes ready.  The IPC messages are sent to a single message queue
 * that is part of the pollset.  apr_pollset_poll waits on the arrival of IPC
 * messages or the specified timeout.
 *
 * Since z/OS does not support async i/o for pipes or files at present, this
 * implementation falls back to using ordinary poll() when
 * APR_POLLSET_THREADSAFE is unset.
 *
 * Greg Ames
 * April 2012
 */

#include "apr.h"
#include "apr_hash.h"
#include "apr_poll.h"
#include "apr_time.h"
#include "apr_portable.h"
#include "apr_arch_inherit.h"
#include "apr_arch_file_io.h"
#include "apr_arch_networkio.h"
#include "apr_arch_poll_private.h"

#ifdef HAVE_AIO_MSGQ

#include <sys/msg.h>  	/* msgget etc   */
#include <time.h>     	/* timestruct   */
#include <poll.h>     	/* pollfd       */
#include <limits.h>     /* MAX_INT      */

struct apr_pollset_private_t
{
    int             msg_q;              /* IPC message queue. The z/OS kernel sends messages
                                         * to this queue when our async polls on individual
                                         * file descriptors complete
                                         */
    apr_pollfd_t    *result_set;
    apr_uint32_t    size;

#if APR_HAS_THREADS
    /* A thread mutex to protect operations on the rings and the hash */
    apr_thread_mutex_t *ring_lock;
#endif

    /* A hash of all active elements used for O(1) _remove operations */
    apr_hash_t      *elems;

    APR_RING_HEAD(ready_ring_t,       asio_elem_t)      ready_ring;
    APR_RING_HEAD(prior_ready_ring_t, asio_elem_t)      prior_ready_ring;
    APR_RING_HEAD(free_ring_t,        asio_elem_t)      free_ring;

    /* for pipes etc with no asio */
    struct pollfd   *pollset;
    apr_pollfd_t    *query_set;
};

typedef enum {
    ASIO_INIT = 0,
    ASIO_REMOVED,
    ASIO_COMPLETE
} asio_state_e;

typedef struct asio_elem_t asio_elem_t;

struct asio_msgbuf_t {
    long         msg_type;       /* must be > 0 */
    asio_elem_t *msg_elem;
};

struct asio_elem_t
{
    APR_RING_ENTRY(asio_elem_t) link;
    apr_pollfd_t                pfd;
    struct pollfd               os_pfd;
    struct aiocb                a;
    asio_state_e                state;
    struct asio_msgbuf_t        msg;
};

#define DEBUG 0

/* DEBUG settings: 0 - no debug messages at all,
 *                 1 - should not occur messages,
 *                 2 - apr_pollset_* entry and exit messages,
 *                 3 - state changes, memory usage,
 *                 4 - z/OS, APR, and internal calls,
 *                 5 - everything else except the timer pop path,
 *                 6 - everything, including the Event 1 sec timer pop path
 *
 *  each DEBUG level includes all messages produced by lower numbered levels
 */

#if DEBUG

#include <assert.h>
#include <unistd.h>	/* getpid       */

#define DBG_BUFF char dbg_msg_buff[256];

#define DBG_TEST(lvl) if (lvl <= DEBUG) {

#define DBG_CORE(msg)               sprintf(dbg_msg_buff, "% 8d " __FUNCTION__ \
                                        " "  msg, getpid()),                   \
                                    fprintf(stderr, "%s", dbg_msg_buff);
#define DBG_CORE1(msg, var1)        sprintf(dbg_msg_buff, "% 8d " __FUNCTION__ \
                                        " " msg, getpid(), var1),              \
                                    fprintf(stderr, "%s", dbg_msg_buff);
#define DBG_CORE2(msg, var1, var2)  sprintf(dbg_msg_buff, "% 8d " __FUNCTION__ \
                                        " " msg, getpid(), var1, var2),        \
                                    fprintf(stderr, "%s", dbg_msg_buff);
#define DBG_CORE3(msg, var1, var2, var3)                                       \
                                    sprintf(dbg_msg_buff, "% 8d " __FUNCTION__ \
                                        " " msg, getpid(), var1, var2, var3),  \
                                    fprintf(stderr, "%s", dbg_msg_buff);
#define DBG_CORE4(msg, var1, var2, var3, var4)                                 \
                                    sprintf(dbg_msg_buff, "% 8d " __FUNCTION__ \
                                        " " msg, getpid(), var1, var2, var3, var4),\
                                    fprintf(stderr, "%s", dbg_msg_buff);

#define DBG_END }

#define DBG(lvl, msg)   DBG_TEST(lvl)   \
                        DBG_CORE(msg)   \
                        DBG_END

#define DBG1(lvl, msg, var1)    DBG_TEST(lvl)           \
                                DBG_CORE1(msg, var1)    \
                                DBG_END

#define DBG2(lvl, msg, var1, var2)      DBG_TEST(lvl)               \
                                        DBG_CORE2(msg, var1, var2)  \
                                        DBG_END

#define DBG3(lvl, msg, var1, var2, var3)                        \
                        DBG_TEST(lvl)                           \
                        DBG_CORE3(msg, var1, var2, var3)        \
                        DBG_END

#define DBG4(lvl, msg, var1, var2, var3, var4)                  \
                        DBG_TEST(lvl)                           \
                        DBG_CORE4(msg, var1, var2, var3, var4)  \
                        DBG_END

#else  /* DEBUG is 0 */
#define DBG_BUFF
#define DBG(lvl, msg)                            ((void)0)
#define DBG1(lvl, msg, var1)                     ((void)0)
#define DBG2(lvl, msg, var1, var2)               ((void)0)
#define DBG3(lvl, msg, var1, var2, var3)         ((void)0)
#define DBG4(lvl, msg, var1, var2, var3, var4)   ((void)0)

#endif /* DEBUG */

static int asyncio(struct aiocb *a)
{
    DBG_BUFF
    int rv;

#ifdef _LP64
#define AIO BPX4AIO
#else
#define AIO BPX1AIO
#endif

    AIO(sizeof(struct aiocb), a, &rv, &errno, __err2ad());
    DBG2(4, "BPX4AIO aiocb %p rv %d\n",
             a, rv);
#ifdef DEBUG
    if (rv < 0) {
        DBG2(4, "errno %d errnojr %08x\n",
                 errno, *__err2ad());
    }
#endif
    return rv;
}

static apr_int16_t get_event(apr_int16_t event)
{
    DBG_BUFF
    apr_int16_t rv = 0;
    DBG(4, "entered\n");

    if (event & APR_POLLIN)
        rv |= POLLIN;
    if (event & APR_POLLPRI)
        rv |= POLLPRI;
    if (event & APR_POLLOUT)
        rv |= POLLOUT;
    if (event & APR_POLLERR)
        rv |= POLLERR;
    if (event & APR_POLLHUP)
        rv |= POLLHUP;
    if (event & APR_POLLNVAL)
        rv |= POLLNVAL;

    DBG(4, "exiting\n");
    return rv;
}

static apr_int16_t get_revent(apr_int16_t event)
{
    DBG_BUFF
    apr_int16_t rv = 0;
    DBG(4, "entered\n");

    if (event & POLLIN)
        rv |= APR_POLLIN;
    if (event & POLLPRI)
        rv |= APR_POLLPRI;
    if (event & POLLOUT)
        rv |= APR_POLLOUT;
    if (event & POLLERR)
        rv |= APR_POLLERR;
    if (event & POLLHUP)
        rv |= APR_POLLHUP;
    if (event & POLLNVAL)
        rv |= APR_POLLNVAL;

    DBG(4, "exiting\n");
    return rv;
}

static apr_status_t asio_pollset_cleanup(apr_pollset_t *pollset)
{
    DBG_BUFF
    int rv;

    DBG(4, "entered\n");
    rv = msgctl(pollset->p->msg_q, IPC_RMID, NULL);

    DBG1(4, "exiting, msgctl(IPC_RMID) returned %d\n", rv);
    return rv;
}

static apr_status_t asio_pollset_create(apr_pollset_t *pollset,
                                        apr_uint32_t size,
                                        apr_pool_t *p,
                                        apr_uint32_t flags)
{
    DBG_BUFF
    apr_status_t rv;
    apr_pollset_private_t *priv;

    DBG1(2, "entered, flags: %x\n", flags);

    priv = pollset->p = apr_palloc(p, sizeof(*priv));

    if (flags & APR_POLLSET_THREADSAFE) {
#if APR_HAS_THREADS
        if ((rv = apr_thread_mutex_create(&(priv->ring_lock),
                                           APR_THREAD_MUTEX_DEFAULT,
                                           p)) != APR_SUCCESS) {
            DBG1(1, "apr_thread_mutex_create returned %d\n", rv);
            pollset->p = NULL;
            return rv;
        }
        rv = msgget(IPC_PRIVATE, S_IWUSR+S_IRUSR); /* user r/w perms */
        if (rv < 0) {
#if DEBUG
            perror(__FUNCTION__ " msgget returned < 0 ");
#endif
            pollset->p = NULL;
            return rv;
        }

        DBG2(4, "pollset %p msgget was OK, rv=%d\n", pollset, rv);
        priv->msg_q = rv;
        priv->elems   = apr_hash_make(p);

        APR_RING_INIT(&priv->free_ring, asio_elem_t, link);
        APR_RING_INIT(&priv->prior_ready_ring, asio_elem_t, link);

#else  /* APR doesn't have threads but caller wants a threadsafe pollset */
        pollset->p = NULL;
        return APR_ENOTIMPL;
#endif

    } else {  /* APR_POLLSET_THREADSAFE not set, i.e. no async i/o,
               * init fields only needed in old style pollset
               */

        priv->pollset = apr_palloc(p, size * sizeof(struct pollfd));
        priv->query_set = apr_palloc(p, size * sizeof(apr_pollfd_t));

        if ((!priv->pollset) || (!priv->query_set)) {
            pollset->p = NULL;
            return APR_ENOMEM;
        }
    }

    pollset->nelts   = 0;
    pollset->flags   = flags;
    pollset->pool    = p;
    priv->size    = size;
    priv->result_set = apr_palloc(p, size * sizeof(apr_pollfd_t));
    if (!priv->result_set) {
        if (flags & APR_POLLSET_THREADSAFE) {
            msgctl(priv->msg_q, IPC_RMID, NULL);
        }
        pollset->p = NULL;
        return APR_ENOMEM;
    }

    DBG2(2, "exiting, pollset: %p, type: %s\n",
             pollset,
             flags & APR_POLLSET_THREADSAFE ? "async" : "POSIX");


    return APR_SUCCESS;

} /* end of asio_pollset_create */

static apr_status_t posix_add(apr_pollset_t      *pollset,
                              const apr_pollfd_t *descriptor)
{
    DBG_BUFF
    int fd;
    apr_pool_t  *p = pollset->pool;
    apr_pollset_private_t *priv = pollset->p;

    DBG(4, "entered\n");

    if (pollset->nelts == priv->size) {
        return APR_ENOMEM;
    }

    priv->query_set[pollset->nelts] = *descriptor;
    if (descriptor->desc_type == APR_POLL_SOCKET) {
        fd = descriptor->desc.s->socketdes;
    }
    else {
        fd = descriptor->desc.f->filedes;
    }

    priv->pollset[pollset->nelts].fd = fd;

    priv->pollset[pollset->nelts].events =
        get_event(descriptor->reqevents);

    pollset->nelts++;

    DBG2(4, "exiting, fd %d added to pollset %p\n", fd, pollset);

    return APR_SUCCESS;
}   /* end of posix_add */


static apr_status_t asio_pollset_add(apr_pollset_t *pollset,
                                     const apr_pollfd_t *descriptor)
{
    DBG_BUFF
    asio_elem_t *elem;
    apr_status_t rv = APR_SUCCESS;
    apr_pollset_private_t *priv = pollset->p;

    pollset_lock_rings();
    DBG(2, "entered\n");

    if (pollset->flags & APR_POLLSET_THREADSAFE) {

        if (!APR_RING_EMPTY(&(priv->free_ring), asio_elem_t, link)) {
            elem = APR_RING_FIRST(&(priv->free_ring));
            APR_RING_REMOVE(elem, link);
            DBG1(3, "used recycled memory at %08p\n", elem);
            elem->state = ASIO_INIT;
            elem->a.aio_cflags = 0;
        }
        else {
            elem = (asio_elem_t *) apr_pcalloc(pollset->pool, sizeof(asio_elem_t));
            DBG1(3, "alloced new memory at %08p\n", elem);

            elem->a.aio_notifytype = AIO_MSGQ;
            elem->a.aio_msgev_qid  = priv->msg_q;
            DBG1(5, "aio_msgev_quid = %d \n", elem->a.aio_msgev_qid);
            elem->a.aio_msgev_size = sizeof(asio_elem_t *);
            elem->a.aio_msgev_flag = 0;     /* wait if queue is full */
            elem->a.aio_msgev_addr = &(elem->msg);
            elem->a.aio_buf        = &(elem->os_pfd);
            elem->a.aio_nbytes     = 1;     /* number of pfds to poll */
            elem->msg.msg_type     = 1;
            elem->msg.msg_elem     = elem;
        }

        /* z/OS only supports async I/O for sockets for now */
        elem->os_pfd.fd = descriptor->desc.s->socketdes;

        APR_RING_ELEM_INIT(elem, link);
        elem->a.aio_cmd       = AIO_SELPOLL;
        elem->a.aio_cflags    &= ~AIO_OK2COMPIMD; /* not OK to complete inline*/
        elem->pfd             = *descriptor;
        elem->os_pfd.events   = get_event(descriptor->reqevents);

        if (0 != asyncio(&elem->a)) {
            rv = errno;
            DBG3(4, "pollset %p asio failed fd %d, errno %p\n",
                     pollset, elem->os_pfd.fd, rv);
#if DEBUG
            perror(__FUNCTION__ " asio failure");
#endif
        }
        else {
            DBG2(4, "good asio call, adding fd %d to pollset %p\n",
                     elem->os_pfd.fd, pollset);

            pollset->nelts++;
            apr_hash_set(priv->elems, &(elem->os_pfd.fd), sizeof(int), elem);
        }
    }
    else {
        /* APR_POLLSET_THREADSAFE isn't set.  use POSIX poll in case
         * pipes or files are used with this pollset
         */

        rv = posix_add(pollset, descriptor);
    }

    DBG1(2, "exiting, rv = %d\n", rv);

    pollset_unlock_rings();
    return rv;
} /* end of asio_pollset_add */

static posix_remove(apr_pollset_t *pollset, const apr_pollfd_t *descriptor)
{
    DBG_BUFF
    apr_uint32_t i;
    apr_pollset_private_t *priv = pollset->p;

    DBG(4, "entered\n");
    for (i = 0; i < pollset->nelts; i++) {
        if (descriptor->desc.s == priv->query_set[i].desc.s) {
            /* Found an instance of the fd: remove this and any other copies */
            apr_uint32_t dst = i;
            apr_uint32_t old_nelts = pollset->nelts;
            pollset->nelts--;
            for (i++; i < old_nelts; i++) {
                if (descriptor->desc.s == priv->query_set[i].desc.s) {
                    pollset->nelts--;
                }
                else {
                    priv->pollset[dst] = priv->pollset[i];
                    priv->query_set[dst] = priv->query_set[i];
                    dst++;
                }
            }
            DBG(4, "returning OK\n");
            return APR_SUCCESS;
        }
    }

    DBG(1, "returning APR_NOTFOUND\n");
    return APR_NOTFOUND;

}   /* end of posix_remove */

static apr_status_t asio_pollset_remove(apr_pollset_t *pollset,
                                        const apr_pollfd_t *descriptor)
{
    DBG_BUFF
    asio_elem_t *elem;
    apr_status_t rv = APR_SUCCESS;
    apr_pollset_private_t *priv = pollset->p;
    struct aiocb cancel_a;   /* AIO_CANCEL is synchronous, so autodata works fine */

    int fd;

    DBG(2, "entered\n");

    if (!(pollset->flags & APR_POLLSET_THREADSAFE)) {
        return posix_remove(pollset, descriptor);
    }

    pollset_lock_rings();

#if DEBUG
    assert(descriptor->desc_type == APR_POLL_SOCKET);
#endif
    /* zOS 1.12 doesn't support files for async i/o */
    fd = descriptor->desc.s->socketdes;

    elem = apr_hash_get(priv->elems, &(fd), sizeof(int));
    if (elem == NULL) {
        DBG1(1, "couldn't find fd %d\n", fd);
        rv = APR_NOTFOUND;
    } else {
        DBG1(5, "hash found fd %d\n", fd);
        /* delete this fd from the hash */
        apr_hash_set(priv->elems, &(fd), sizeof(int), NULL);

        if (elem->state == ASIO_INIT) {
            /* asyncio call to cancel */
            cancel_a.aio_cmd = AIO_CANCEL;
            cancel_a.aio_buf = &elem->a;   /* point to original aiocb */

            cancel_a.aio_cflags  = 0;
            cancel_a.aio_cflags2 = 0;

            /* we want the original aiocb to show up on the pollset message queue 
             * before recycling its memory to eliminate race conditions
             */

            rv = asyncio(&cancel_a);
            DBG1(4, "asyncio returned %d\n", rv);

#if DEBUG
            assert(rv == 1);
#endif
        }
        elem->state = ASIO_REMOVED;
        rv = APR_SUCCESS;
    }

    DBG1(2, "exiting, rv: %d\n", rv);

    pollset_unlock_rings();

    return rv;
}   /* end of asio_pollset_remove */

static posix_poll(apr_pollset_t *pollset,
                  apr_interval_time_t timeout,
                  apr_int32_t *num,
                  const apr_pollfd_t **descriptors)
{
    DBG_BUFF
    int rv;
    apr_uint32_t i, j;
    apr_pollset_private_t *priv = pollset->p;

    DBG(4, "entered\n");

    if (timeout > 0) {
        timeout /= 1000;
    }
    rv = poll(priv->pollset, pollset->nelts, timeout);
    (*num) = rv;
    if (rv < 0) {
        return apr_get_netos_error();
    }
    if (rv == 0) {
        return APR_TIMEUP;
    }
    j = 0;
    for (i = 0; i < pollset->nelts; i++) {
        if (priv->pollset[i].revents != 0) {
            priv->result_set[j] = priv->query_set[i];
            priv->result_set[j].rtnevents =
                get_revent(priv->pollset[i].revents);
            j++;
        }
    }
    if (descriptors)
        *descriptors = priv->result_set;

    DBG(4, "exiting ok\n");
    return APR_SUCCESS;

}   /* end of posix_poll */

static process_msg(apr_pollset_t *pollset, struct asio_msgbuf_t *msg)
{
    DBG_BUFF
    asio_elem_t *elem = msg->msg_elem;

    switch(elem->state) {
    case ASIO_REMOVED:
        DBG2(5, "for cancelled elem, recycling memory - elem %08p, fd %d\n",
                elem, elem->os_pfd.fd);
        APR_RING_INSERT_TAIL(&(pollset->p->free_ring), elem,
                             asio_elem_t, link);
        break;
    case ASIO_INIT:
        DBG2(4, "adding to ready ring: elem %08p, fd %d\n",
                elem, elem->os_pfd.fd);
        elem->state = ASIO_COMPLETE;
        APR_RING_INSERT_TAIL(&(pollset->p->ready_ring), elem,
                             asio_elem_t, link);
        break;
    default:
        DBG3(1, "unexpected state: elem %08p, fd %d, state %d\n",
            elem, elem->os_pfd.fd, elem->state);
#if DEBUG
        assert(0);
#endif
    }
}

static apr_status_t asio_pollset_poll(apr_pollset_t *pollset,
                                      apr_interval_time_t timeout,
                                      apr_int32_t *num,
                                      const apr_pollfd_t **descriptors)
{
    DBG_BUFF
    int i, ret;
    asio_elem_t *elem, *next_elem;
    struct asio_msgbuf_t msg_buff;
    struct timespec tv;
    apr_status_t rv = APR_SUCCESS;
    apr_pollset_private_t *priv = pollset->p;

    DBG(6, "entered\n"); /* chatty - traces every second w/Event */

    if ((pollset->flags & APR_POLLSET_THREADSAFE) == 0 ) {
        return posix_poll(pollset, timeout, num, descriptors);
    }

    pollset_lock_rings();
    APR_RING_INIT(&(priv->ready_ring), asio_elem_t, link);

    while (!APR_RING_EMPTY(&(priv->prior_ready_ring), asio_elem_t, link)) {
        elem = APR_RING_FIRST(&(priv->prior_ready_ring));
        DBG3(5, "pollset %p elem %p fd %d on prior ready ring\n",
                pollset,
                elem,
                elem->os_pfd.fd);

        APR_RING_REMOVE(elem, link);

        /*
         * since USS does not remember what's in our pollset, we have
         * to re-add fds which have not been apr_pollset_remove'd
         *
         * there may have been too many ready fd's to return in the
         * result set last time. re-poll inline for both cases
         */

        if (elem->state == ASIO_REMOVED) {

            /* 
             * async i/o is done since it was found on prior_ready
             * the state says the caller is done with it too 
             * so recycle the elem 
             */
             
            APR_RING_INSERT_TAIL(&(priv->free_ring), elem,
                                 asio_elem_t, link);
            continue;  /* do not re-add if it has been _removed */
        }

        elem->state = ASIO_INIT;
        elem->a.aio_cflags     = AIO_OK2COMPIMD;

        if (0 != (ret = asyncio(&elem->a))) {
            if (ret == 1) {
                DBG(4, "asyncio() completed inline\n");
                /* it's ready now */
                elem->state = ASIO_COMPLETE;
                APR_RING_INSERT_TAIL(&(priv->ready_ring), elem, asio_elem_t,
                                     link);
            }
            else {
                DBG2(1, "asyncio() failed, ret: %d, errno: %d\n",
                        ret, errno);
                pollset_unlock_rings();
                return errno;
            }
        }
        DBG1(4, "asyncio() completed rc %d\n", ret);
    }

    DBG(6, "after prior ready loop\n"); /* chatty w/timeouts, hence 6 */

    /* Gather async poll completions that have occurred since the last call */
    while (0 < msgrcv(priv->msg_q, &msg_buff, sizeof(asio_elem_t *), 0,
                      IPC_NOWAIT)) {
        process_msg(pollset, &msg_buff);
    }

    /* Suspend if nothing is ready yet. */
    if (APR_RING_EMPTY(&(priv->ready_ring), asio_elem_t, link)) {

        if (timeout >= 0) {
            tv.tv_sec  = apr_time_sec(timeout);
            tv.tv_nsec = apr_time_usec(timeout) * 1000;
        } else {
            tv.tv_sec = INT_MAX;  /* block until something is ready */
        }

        DBG2(6, "nothing on the ready ring "
                "- blocking for %d seconds %d ns\n",
                tv.tv_sec, tv.tv_nsec);

        pollset_unlock_rings();   /* allow other apr_pollset_* calls while blocked */

        if (0 >= (ret = __msgrcv_timed(priv->msg_q, &msg_buff,
                                       sizeof(asio_elem_t *), 0, NULL, &tv))) {
#if DEBUG
            if (errno == EAGAIN) {
                DBG(6, "__msgrcv_timed timed out\n"); /* timeout path, so 6 */
            }
            else {
                DBG(1, "__msgrcv_timed failed!\n");
            }
#endif
            return (errno == EAGAIN) ? APR_TIMEUP : errno;
        }

        pollset_lock_rings();

        process_msg(pollset, &msg_buff);
    }

    APR_RING_INIT(&priv->prior_ready_ring, asio_elem_t, link);

    (*num) = 0;
    elem = APR_RING_FIRST(&(priv->ready_ring));

    for (i = 0;

        i < priv->size
                && elem != APR_RING_SENTINEL(&(priv->ready_ring), asio_elem_t, link);
        i++) {
             DBG2(5, "ready ring: elem %08p, fd %d\n", elem, elem->os_pfd.fd);

             priv->result_set[i] = elem->pfd;
             priv->result_set[i].rtnevents
                                    = get_revent(elem->os_pfd.revents);
             (*num)++;

             elem = APR_RING_NEXT(elem, link);

#if DEBUG
             if (elem == APR_RING_SENTINEL(&(priv->ready_ring), asio_elem_t, link)) {
                 DBG(5, "end of ready ring reached\n");
             }
#endif
    }

    if (descriptors) {
        *descriptors = priv->result_set;
    }

    /* if the result size is too small, remember which descriptors
     * haven't had results reported yet.  we will look
     * at these descriptors on the next apr_pollset_poll call
     */

    APR_RING_CONCAT(&priv->prior_ready_ring, &(priv->ready_ring), asio_elem_t, link);

    DBG1(2, "exiting, rv = %d\n", rv);

    pollset_unlock_rings();

    return rv;
}  /* end of asio_pollset_poll */

static apr_pollset_provider_t impl = {
    asio_pollset_create,
    asio_pollset_add,
    asio_pollset_remove,
    asio_pollset_poll,
    asio_pollset_cleanup,
    "asio"
};

apr_pollset_provider_t *apr_pollset_provider_aio_msgq = &impl;

#endif /* HAVE_AIO_MSGQ */