summaryrefslogtreecommitdiff
path: root/libc/nptl/sysdeps/unix/sysv/linux
diff options
context:
space:
mode:
Diffstat (limited to 'libc/nptl/sysdeps/unix/sysv/linux')
-rw-r--r--libc/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S50
-rw-r--r--libc/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S129
-rw-r--r--libc/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h2
-rw-r--r--libc/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h2
-rw-r--r--libc/nptl/sysdeps/unix/sysv/linux/powerpc/pthread_once.c21
-rw-r--r--libc/nptl/sysdeps/unix/sysv/linux/s390/lowlevellock.h2
-rw-r--r--libc/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.h2
-rw-r--r--libc/nptl/sysdeps/unix/sysv/linux/sparc/lowlevellock.h2
-rw-r--r--libc/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h2
-rw-r--r--libc/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S51
-rw-r--r--libc/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S121
11 files changed, 200 insertions, 184 deletions
diff --git a/libc/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S b/libc/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
index 5f1fd5ddc..6761c136e 100644
--- a/libc/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
+++ b/libc/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
@@ -200,9 +200,11 @@ __pthread_cond_timedwait:
42: leal (%ebp), %esi
movl 28(%esp), %edx
addl $cond_futex, %ebx
+.Ladd_cond_futex_pi:
movl $SYS_futex, %eax
ENTER_KERNEL
subl $cond_futex, %ebx
+.Lsub_cond_futex_pi:
movl %eax, %esi
/* Set the pi-requeued flag only if the kernel has returned 0. The
kernel does not hold the mutex on ETIMEDOUT or any other error. */
@@ -210,8 +212,23 @@ __pthread_cond_timedwait:
sete 24(%esp)
je 41f
- /* Normal and PI futexes dont mix. Use normal futex functions only
- if the kernel does not support the PI futex functions. */
+ /* When a futex syscall with FUTEX_WAIT_REQUEUE_PI returns
+ successfully, it has already locked the mutex for us and the
+ pi_flag (24(%esp)) is set to denote that fact. However, if another
+ thread changed the futex value before we entered the wait, the
+ syscall may return an EAGAIN and the mutex is not locked. We go
+ ahead with a success anyway since later we look at the pi_flag to
+ decide if we got the mutex or not. The sequence numbers then make
+ sure that only one of the threads actually wake up. We retry using
+ normal FUTEX_WAIT only if the kernel returned ENOSYS, since normal
+ and PI futexes don't mix.
+
+ Note that we don't check for EAGAIN specifically; we assume that the
+ only other error the futex function could return is EAGAIN (barring
+ the ETIMEOUT of course, for the timeout case in futex) since
+ anything else would mean an error in our function. It is too
+ expensive to do that check for every call (which is quite common in
+ case of a large number of threads), so it has been skipped. */
cmpl $-ENOSYS, %eax
jne 41f
xorl %ecx, %ecx
@@ -271,9 +288,24 @@ __pthread_cond_timedwait:
jne 9f
15: cmpl $-ETIMEDOUT, %esi
- jne 8b
+ je 28f
+
+ /* We need to go back to futex_wait. If we're using requeue_pi, then
+ release the mutex we had acquired and go back. */
+ movl 24(%esp), %edx
+ test %edx, %edx
+ jz 8b
+
+ /* Adjust the mutex values first and then unlock it. The unlock
+ should always succeed or else the kernel did not lock the mutex
+ correctly. */
+ movl dep_mutex(%ebx), %eax
+ call __pthread_mutex_cond_lock_adjust
+ xorl %edx, %edx
+ call __pthread_mutex_unlock_usercnt
+ jmp 8b
- addl $1, wakeup_seq(%ebx)
+28: addl $1, wakeup_seq(%ebx)
adcl $0, wakeup_seq+4(%ebx)
addl $1, cond_futex(%ebx)
movl $ETIMEDOUT, %esi
@@ -638,7 +670,15 @@ __condvar_tw_cleanup:
.uleb128 .Lcstend-.Lcstbegin
.Lcstbegin:
.long .LcleanupSTART-.LSTARTCODE
- .long .Ladd_cond_futex-.LcleanupSTART
+ .long .Ladd_cond_futex_pi-.LcleanupSTART
+ .long __condvar_tw_cleanup-.LSTARTCODE
+ .uleb128 0
+ .long .Ladd_cond_futex_pi-.LSTARTCODE
+ .long .Lsub_cond_futex_pi-.Ladd_cond_futex_pi
+ .long __condvar_tw_cleanup2-.LSTARTCODE
+ .uleb128 0
+ .long .Lsub_cond_futex_pi-.LSTARTCODE
+ .long .Ladd_cond_futex-.Lsub_cond_futex_pi
.long __condvar_tw_cleanup-.LSTARTCODE
.uleb128 0
.long .Ladd_cond_futex-.LSTARTCODE
diff --git a/libc/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S b/libc/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S
index 2ae7af261..0af06acad 100644
--- a/libc/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S
+++ b/libc/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S
@@ -136,25 +136,37 @@ __pthread_cond_wait:
cmpl $PI_BIT, %eax
jne 18f
-90:
movl $(FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %ecx
movl %ebp, %edx
xorl %esi, %esi
addl $cond_futex, %ebx
+.Ladd_cond_futex_pi:
movl $SYS_futex, %eax
ENTER_KERNEL
subl $cond_futex, %ebx
+.Lsub_cond_futex_pi:
/* Set the pi-requeued flag only if the kernel has returned 0. The
kernel does not hold the mutex on error. */
cmpl $0, %eax
sete 16(%esp)
je 19f
- cmpl $-EAGAIN, %eax
- je 91f
-
- /* Normal and PI futexes dont mix. Use normal futex functions only
- if the kernel does not support the PI futex functions. */
+ /* When a futex syscall with FUTEX_WAIT_REQUEUE_PI returns
+ successfully, it has already locked the mutex for us and the
+ pi_flag (16(%esp)) is set to denote that fact. However, if another
+ thread changed the futex value before we entered the wait, the
+ syscall may return an EAGAIN and the mutex is not locked. We go
+ ahead with a success anyway since later we look at the pi_flag to
+ decide if we got the mutex or not. The sequence numbers then make
+ sure that only one of the threads actually wake up. We retry using
+ normal FUTEX_WAIT only if the kernel returned ENOSYS, since normal
+ and PI futexes don't mix.
+
+ Note that we don't check for EAGAIN specifically; we assume that the
+ only other error the futex function could return is EAGAIN since
+ anything else would mean an error in our function. It is too
+ expensive to do that check for every call (which is quite common in
+ case of a large number of threads), so it has been skipped. */
cmpl $-ENOSYS, %eax
jne 19f
xorl %ecx, %ecx
@@ -204,12 +216,12 @@ __pthread_cond_wait:
cmpl 8(%esp), %edx
jne 7f
cmpl 4(%esp), %edi
- je 8b
+ je 22f
7: cmpl %ecx, %edx
jne 9f
cmp %eax, %edi
- je 8b
+ je 22f
9: addl $1, woken_seq(%ebx)
adcl $0, woken_seq+4(%ebx)
@@ -285,6 +297,22 @@ __pthread_cond_wait:
jmp 20b
cfi_adjust_cfa_offset(-FRAME_SIZE);
+
+ /* We need to go back to futex_wait. If we're using requeue_pi, then
+ release the mutex we had acquired and go back. */
+22: movl 16(%esp), %edx
+ test %edx, %edx
+ jz 8b
+
+ /* Adjust the mutex values first and then unlock it. The unlock
+ should always succeed or else the kernel did not lock the mutex
+ correctly. */
+ movl dep_mutex(%ebx), %eax
+ call __pthread_mutex_cond_lock_adjust
+ xorl %edx, %edx
+ call __pthread_mutex_unlock_usercnt
+ jmp 8b
+
/* Initial locking failed. */
1:
#if cond_lock == 0
@@ -398,77 +426,6 @@ __pthread_cond_wait:
call __lll_unlock_wake
jmp 11b
-91:
-.LcleanupSTART2:
- /* FUTEX_WAIT_REQUEUE_PI returned EAGAIN. We need to
- call it again. */
-
- /* Get internal lock. */
- movl $1, %edx
- xorl %eax, %eax
- LOCK
-#if cond_lock == 0
- cmpxchgl %edx, (%ebx)
-#else
- cmpxchgl %edx, cond_lock(%ebx)
-#endif
- jz 92f
-
-#if cond_lock == 0
- movl %ebx, %edx
-#else
- leal cond_lock(%ebx), %edx
-#endif
-#if (LLL_SHARED-LLL_PRIVATE) > 255
- xorl %ecx, %ecx
-#endif
- cmpl $-1, dep_mutex(%ebx)
- setne %cl
- subl $1, %ecx
- andl $(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
- addl $LLL_PRIVATE, %ecx
-#endif
- call __lll_lock_wait
-
-92:
- /* Increment the cond_futex value again, so it can be used as a new
- expected value. */
- addl $1, cond_futex(%ebx)
- movl cond_futex(%ebx), %ebp
-
- /* Unlock. */
- LOCK
-#if cond_lock == 0
- subl $1, (%ebx)
-#else
- subl $1, cond_lock(%ebx)
-#endif
- je 93f
-#if cond_lock == 0
- movl %ebx, %eax
-#else
- leal cond_lock(%ebx), %eax
-#endif
-#if (LLL_SHARED-LLL_PRIVATE) > 255
- xorl %ecx, %ecx
-#endif
- cmpl $-1, dep_mutex(%ebx)
- setne %cl
- subl $1, %ecx
- andl $(LLL_SHARED-LLL_PRIVATE), %ecx
-#if LLL_PRIVATE != 0
- addl $LLL_PRIVATE, %ecx
-#endif
- call __lll_unlock_wake
-
-93:
- /* Set the rest of SYS_futex args for FUTEX_WAIT_REQUEUE_PI. */
- xorl %ecx, %ecx
- movl dep_mutex(%ebx), %edi
- jmp 90b
-.LcleanupEND2:
-
.size __pthread_cond_wait, .-__pthread_cond_wait
versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
GLIBC_2_3_2)
@@ -630,7 +587,15 @@ __condvar_w_cleanup:
.uleb128 .Lcstend-.Lcstbegin
.Lcstbegin:
.long .LcleanupSTART-.LSTARTCODE
- .long .Ladd_cond_futex-.LcleanupSTART
+ .long .Ladd_cond_futex_pi-.LcleanupSTART
+ .long __condvar_w_cleanup-.LSTARTCODE
+ .uleb128 0
+ .long .Ladd_cond_futex_pi-.LSTARTCODE
+ .long .Lsub_cond_futex_pi-.Ladd_cond_futex_pi
+ .long __condvar_w_cleanup2-.LSTARTCODE
+ .uleb128 0
+ .long .Lsub_cond_futex_pi-.LSTARTCODE
+ .long .Ladd_cond_futex-.Lsub_cond_futex_pi
.long __condvar_w_cleanup-.LSTARTCODE
.uleb128 0
.long .Ladd_cond_futex-.LSTARTCODE
@@ -641,10 +606,6 @@ __condvar_w_cleanup:
.long .LcleanupEND-.Lsub_cond_futex
.long __condvar_w_cleanup-.LSTARTCODE
.uleb128 0
- .long .LcleanupSTART2-.LSTARTCODE
- .long .LcleanupEND2-.LcleanupSTART2
- .long __condvar_w_cleanup-.LSTARTCODE
- .uleb128 0
.long .LcallUR-.LSTARTCODE
.long .LENDCODE-.LcallUR
.long 0
diff --git a/libc/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h b/libc/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h
index 3d805ff43..7b576ca00 100644
--- a/libc/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h
+++ b/libc/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h
@@ -544,7 +544,7 @@ LLL_STUB_UNWIND_INFO_END
#define lll_islocked(futex) \
(futex != LLL_LOCK_INITIALIZER)
-/* The kernel notifies a process with uses CLONE_CLEARTID via futex
+/* The kernel notifies a process which uses CLONE_CHILD_CLEARTID via futex
wakeup when the clone terminates. The memory location contains the
thread ID while the clone is running and is reset to zero
afterwards.
diff --git a/libc/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h b/libc/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
index 234e04234..406c290d7 100644
--- a/libc/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
+++ b/libc/nptl/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h
@@ -288,7 +288,7 @@ extern int __lll_robust_timedlock_wait
1 - taken by one user
>1 - taken by more users */
-/* The kernel notifies a process which uses CLONE_CLEARTID via futex
+/* The kernel notifies a process which uses CLONE_CHILD_CLEARTID via futex
wakeup when the clone terminates. The memory location contains the
thread ID while the clone is running and is reset to zero
afterwards. */
diff --git a/libc/nptl/sysdeps/unix/sysv/linux/powerpc/pthread_once.c b/libc/nptl/sysdeps/unix/sysv/linux/powerpc/pthread_once.c
index 4e3d7bd49..52ab53f0a 100644
--- a/libc/nptl/sysdeps/unix/sysv/linux/powerpc/pthread_once.c
+++ b/libc/nptl/sysdeps/unix/sysv/linux/powerpc/pthread_once.c
@@ -28,6 +28,7 @@ clear_once_control (void *arg)
{
pthread_once_t *once_control = (pthread_once_t *) arg;
+ __asm __volatile (__lll_rel_instr);
*once_control = 0;
lll_futex_wake (once_control, INT_MAX, LLL_PRIVATE);
}
@@ -47,15 +48,15 @@ __pthread_once (pthread_once_t *once_control, void (*init_routine) (void))
oldval = *once_control;
if ((oldval & 2) == 0)
*once_control = newval;
- Do this atomically.
+ Do this atomically with an acquire barrier.
*/
newval = __fork_generation | 1;
- __asm __volatile ("1: lwarx %0,0,%3\n"
+ __asm __volatile ("1: lwarx %0,0,%3" MUTEX_HINT_ACQ "\n"
" andi. %1,%0,2\n"
" bne 2f\n"
" stwcx. %4,0,%3\n"
" bne 1b\n"
- "2: isync"
+ "2: " __lll_acq_instr
: "=&r" (oldval), "=&r" (tmp), "=m" (*once_control)
: "r" (once_control), "r" (newval), "m" (*once_control)
: "cr0");
@@ -87,8 +88,18 @@ __pthread_once (pthread_once_t *once_control, void (*init_routine) (void))
pthread_cleanup_pop (0);
- /* Add one to *once_control to take the bottom 2 bits from 01 to 10. */
- atomic_increment (once_control);
+ /* Add one to *once_control to take the bottom 2 bits from 01 to 10.
+ A release barrier is needed to ensure memory written by init_routine
+ is seen in other threads before *once_control changes. */
+ int tmp;
+ __asm __volatile (__lll_rel_instr "\n"
+ "1: lwarx %0,0,%2" MUTEX_HINT_REL "\n"
+ " addi %0,%0,1\n"
+ " stwcx. %0,0,%2\n"
+ " bne- 1b"
+ : "=&b" (tmp), "=m" (*once_control)
+ : "r" (once_control), "m" (*once_control)
+ : "cr0");
/* Wake up all other threads. */
lll_futex_wake (once_control, INT_MAX, LLL_PRIVATE);
diff --git a/libc/nptl/sysdeps/unix/sysv/linux/s390/lowlevellock.h b/libc/nptl/sysdeps/unix/sysv/linux/s390/lowlevellock.h
index eeb7a72e2..97092823d 100644
--- a/libc/nptl/sysdeps/unix/sysv/linux/s390/lowlevellock.h
+++ b/libc/nptl/sysdeps/unix/sysv/linux/s390/lowlevellock.h
@@ -329,7 +329,7 @@ __lll_robust_timedlock (int *futex, const struct timespec *abstime,
#define LLL_LOCK_INITIALIZER (0)
#define LLL_LOCK_INITIALIZER_LOCKED (1)
-/* The kernel notifies a process with uses CLONE_CLEARTID via futex
+/* The kernel notifies a process which uses CLONE_CHILD_CLEARTID via futex
wakeup when the clone terminates. The memory location contains the
thread ID while the clone is running and is reset to zero
afterwards. */
diff --git a/libc/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.h b/libc/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.h
index f3ef3883a..cfd8d0cec 100644
--- a/libc/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.h
+++ b/libc/nptl/sysdeps/unix/sysv/linux/sh/lowlevellock.h
@@ -389,7 +389,7 @@ extern int __lll_unlock_wake (int *__futex, int private) attribute_hidden;
#define lll_islocked(futex) \
(futex != LLL_LOCK_INITIALIZER)
-/* The kernel notifies a process with uses CLONE_CLEARTID via futex
+/* The kernel notifies a process which uses CLONE_CHILD_CLEARTID via futex
wakeup when the clone terminates. The memory location contains the
thread ID while the clone is running and is reset to zero
afterwards. */
diff --git a/libc/nptl/sysdeps/unix/sysv/linux/sparc/lowlevellock.h b/libc/nptl/sysdeps/unix/sysv/linux/sparc/lowlevellock.h
index 73c3327c0..fafb0873f 100644
--- a/libc/nptl/sysdeps/unix/sysv/linux/sparc/lowlevellock.h
+++ b/libc/nptl/sysdeps/unix/sysv/linux/sparc/lowlevellock.h
@@ -268,7 +268,7 @@ __lll_robust_timedlock (int *futex, const struct timespec *abstime,
#define LLL_LOCK_INITIALIZER (0)
#define LLL_LOCK_INITIALIZER_LOCKED (1)
-/* The kernel notifies a process with uses CLONE_CLEARTID via futex
+/* The kernel notifies a process which uses CLONE_CHILD_CLEARTID via futex
wakeup when the clone terminates. The memory location contains the
thread ID while the clone is running and is reset to zero
afterwards. */
diff --git a/libc/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h b/libc/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
index 3686970f0..5a80ddd8e 100644
--- a/libc/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
+++ b/libc/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
@@ -560,7 +560,7 @@ LLL_STUB_UNWIND_INFO_END
(futex != LLL_LOCK_INITIALIZER)
-/* The kernel notifies a process with uses CLONE_CLEARTID via futex
+/* The kernel notifies a process which uses CLONE_CHILD_CLEARTID via futex
wakeup when the clone terminates. The memory location contains the
thread ID while the clone is running and is reset to zero
afterwards.
diff --git a/libc/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S b/libc/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
index a1c8ca87b..b669abb57 100644
--- a/libc/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
+++ b/libc/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
@@ -103,7 +103,7 @@ __pthread_cond_timedwait:
mov %RSI_LP, dep_mutex(%rdi)
22:
- xorl %r15d, %r15d
+ xorb %r15b, %r15b
#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
# ifdef PIC
@@ -190,18 +190,39 @@ __pthread_cond_timedwait:
movl $SYS_futex, %eax
syscall
- movl $1, %r15d
+ cmpl $0, %eax
+ sete %r15b
+
#ifdef __ASSUME_REQUEUE_PI
jmp 62f
#else
- cmpq $-4095, %rax
- jnae 62f
+ je 62f
+
+ /* When a futex syscall with FUTEX_WAIT_REQUEUE_PI returns
+ successfully, it has already locked the mutex for us and the
+ pi_flag (%r15b) is set to denote that fact. However, if another
+ thread changed the futex value before we entered the wait, the
+ syscall may return an EAGAIN and the mutex is not locked. We go
+ ahead with a success anyway since later we look at the pi_flag to
+ decide if we got the mutex or not. The sequence numbers then make
+ sure that only one of the threads actually wake up. We retry using
+ normal FUTEX_WAIT only if the kernel returned ENOSYS, since normal
+ and PI futexes don't mix.
+
+ Note that we don't check for EAGAIN specifically; we assume that the
+ only other error the futex function could return is EAGAIN (barring
+ the ETIMEOUT of course, for the timeout case in futex) since
+ anything else would mean an error in our function. It is too
+ expensive to do that check for every call (which is quite common in
+ case of a large number of threads), so it has been skipped. */
+ cmpl $-ENOSYS, %eax
+ jne 62f
subq $cond_futex, %rdi
#endif
61: movl $(FUTEX_WAIT_BITSET|FUTEX_PRIVATE_FLAG), %esi
-60: xorl %r15d, %r15d
+60: xorb %r15b, %r15b
xorl %eax, %eax
/* The following only works like this because we only support
two clocks, represented using a single bit. */
@@ -248,7 +269,23 @@ __pthread_cond_timedwait:
ja 39f
45: cmpq $-ETIMEDOUT, %r14
- jne 38b
+ je 99f
+
+ /* We need to go back to futex_wait. If we're using requeue_pi, then
+ release the mutex we had acquired and go back. */
+ test %r15b, %r15b
+ jz 38b
+
+ /* Adjust the mutex values first and then unlock it. The unlock
+ should always succeed or else the kernel did not lock the
+ mutex correctly. */
+ movq %r8, %rdi
+ callq __pthread_mutex_cond_lock_adjust
+ xorl %esi, %esi
+ callq __pthread_mutex_unlock_usercnt
+ /* Reload cond_var. */
+ movq 8(%rsp), %rdi
+ jmp 38b
99: incq wakeup_seq(%rdi)
incl cond_futex(%rdi)
@@ -298,7 +335,7 @@ __pthread_cond_timedwait:
/* If requeue_pi is used the kernel performs the locking of the
mutex. */
41: movq 16(%rsp), %rdi
- testl %r15d, %r15d
+ testb %r15b, %r15b
jnz 64f
callq __pthread_mutex_cond_lock
diff --git a/libc/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S b/libc/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
index 61948523a..ec403cd9b 100644
--- a/libc/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
+++ b/libc/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
@@ -136,19 +136,36 @@ __pthread_cond_wait:
cmpl $PI_BIT, %eax
jne 61f
-90:
movl $(FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %esi
movl $SYS_futex, %eax
syscall
- movl $1, %r8d
- cmpq $-EAGAIN, %rax
- je 91f
+ cmpl $0, %eax
+ sete %r8b
+
#ifdef __ASSUME_REQUEUE_PI
jmp 62f
#else
- cmpq $-4095, %rax
- jnae 62f
+ je 62f
+
+ /* When a futex syscall with FUTEX_WAIT_REQUEUE_PI returns
+ successfully, it has already locked the mutex for us and the
+ pi_flag (%r8b) is set to denote that fact. However, if another
+ thread changed the futex value before we entered the wait, the
+ syscall may return an EAGAIN and the mutex is not locked. We go
+ ahead with a success anyway since later we look at the pi_flag to
+ decide if we got the mutex or not. The sequence numbers then make
+ sure that only one of the threads actually wake up. We retry using
+ normal FUTEX_WAIT only if the kernel returned ENOSYS, since normal
+ and PI futexes don't mix.
+
+ Note that we don't check for EAGAIN specifically; we assume that the
+ only other error the futex function could return is EAGAIN since
+ anything else would mean an error in our function. It is too
+ expensive to do that check for every call (which is quite common in
+ case of a large number of threads), so it has been skipped. */
+ cmpl $-ENOSYS, %eax
+ jne 62f
# ifndef __ASSUME_PRIVATE_FUTEX
movl $FUTEX_WAIT, %esi
@@ -161,7 +178,7 @@ __pthread_cond_wait:
#else
orl %fs:PRIVATE_FUTEX, %esi
#endif
-60: xorl %r8d, %r8d
+60: xorb %r8b, %r8b
movl $SYS_futex, %eax
syscall
@@ -191,10 +208,10 @@ __pthread_cond_wait:
jne 16f
cmpq 24(%rsp), %r9
- jbe 8b
+ jbe 19f
cmpq %rax, %r9
- jna 8b
+ jna 19f
incq woken_seq(%rdi)
@@ -236,7 +253,7 @@ __pthread_cond_wait:
/* If requeue_pi is used the kernel performs the locking of the
mutex. */
11: movq 16(%rsp), %rdi
- testl %r8d, %r8d
+ testb %r8b, %r8b
jnz 18f
callq __pthread_mutex_cond_lock
@@ -253,6 +270,23 @@ __pthread_cond_wait:
xorl %eax, %eax
jmp 14b
+ /* We need to go back to futex_wait. If we're using requeue_pi, then
+ release the mutex we had acquired and go back. */
+19: testb %r8b, %r8b
+ jz 8b
+
+ /* Adjust the mutex values first and then unlock it. The unlock
+ should always succeed or else the kernel did not lock the mutex
+ correctly. */
+ movq 16(%rsp), %rdi
+ callq __pthread_mutex_cond_lock_adjust
+ movq %rdi, %r8
+ xorl %esi, %esi
+ callq __pthread_mutex_unlock_usercnt
+ /* Reload cond_var. */
+ movq 8(%rsp), %rdi
+ jmp 8b
+
/* Initial locking failed. */
1:
#if cond_lock != 0
@@ -331,69 +365,6 @@ __pthread_cond_wait:
13: movq %r10, %rax
jmp 14b
-91:
-.LcleanupSTART2:
- /* FUTEX_WAIT_REQUEUE_PI returned EAGAIN. We need to
- call it again. */
- movq 8(%rsp), %rdi
-
- /* Get internal lock. */
- movl $1, %esi
- xorl %eax, %eax
- LOCK
-#if cond_lock == 0
- cmpxchgl %esi, (%rdi)
-#else
- cmpxchgl %esi, cond_lock(%rdi)
-#endif
- jz 92f
-
-#if cond_lock != 0
- addq $cond_lock, %rdi
-#endif
- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
- movl $LLL_PRIVATE, %eax
- movl $LLL_SHARED, %esi
- cmovne %eax, %esi
- callq __lll_lock_wait
-#if cond_lock != 0
- subq $cond_lock, %rdi
-#endif
-92:
- /* Increment the cond_futex value again, so it can be used as a new
- expected value. */
- incl cond_futex(%rdi)
- movl cond_futex(%rdi), %edx
-
- /* Release internal lock. */
- LOCK
-#if cond_lock == 0
- decl (%rdi)
-#else
- decl cond_lock(%rdi)
-#endif
- jz 93f
-
-#if cond_lock != 0
- addq $cond_lock, %rdi
-#endif
- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
- movl $LLL_PRIVATE, %eax
- movl $LLL_SHARED, %esi
- cmovne %eax, %esi
- /* The call preserves %rdx. */
- callq __lll_unlock_wake
-#if cond_lock != 0
- subq $cond_lock, %rdi
-#endif
-93:
- /* Set the rest of SYS_futex args for FUTEX_WAIT_REQUEUE_PI. */
- xorq %r10, %r10
- mov dep_mutex(%rdi), %R8_LP
- leaq cond_futex(%rdi), %rdi
- jmp 90b
-.LcleanupEND2:
-
.size __pthread_cond_wait, .-__pthread_cond_wait
versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
GLIBC_2_3_2)
@@ -547,10 +518,6 @@ __condvar_cleanup1:
.uleb128 .LcleanupEND-.LcleanupSTART
.uleb128 __condvar_cleanup1-.LSTARTCODE
.uleb128 0
- .uleb128 .LcleanupSTART2-.LSTARTCODE
- .uleb128 .LcleanupEND2-.LcleanupSTART2
- .uleb128 __condvar_cleanup1-.LSTARTCODE
- .uleb128 0
.uleb128 .LcallUR-.LSTARTCODE
.uleb128 .LENDCODE-.LcallUR
.uleb128 0