summaryrefslogtreecommitdiff
path: root/nptl
diff options
context:
space:
mode:
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>2019-02-22 10:27:08 -0300
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2019-05-14 08:41:52 -0300
commitd7e4c642ef13933361c276df87c52e83b2c212a2 (patch)
tree39b40efaf4452a300b27cd10ded65bc70b6dc4be /nptl
parentfdb8a0be9b3fea36e6c8b471c77d42e0b803720e (diff)
downloadglibc-d7e4c642ef13933361c276df87c52e83b2c212a2.tar.gz
Small optimization for lowlevellock
This patch optimizes both __lll_lock_wait_private and __lll_lock_wait by issuing only one lll_futex_wait. Since it is defined as an inlined syscall and inlined syscalls are defined using inlined assembly the compiler usually can not see both calls are equal and optimize accordingly. On aarch64 the resulting binary is change from: 0000000000000060 <__lll_lock_wait>: 60: 2a0103e5 mov w5, w1 64: b9400001 ldr w1, [x0] 68: aa0003e4 mov x4, x0 6c: 7100083f cmp w1, #0x2 70: 540000e1 b.ne 8c <__lll_lock_wait+0x2c> // b.any 74: 521900a1 eor w1, w5, #0x80 78: d2800042 mov x2, #0x2 // #2 7c: 93407c21 sxtw x1, w1 80: d2800003 mov x3, #0x0 // #0 84: d2800c48 mov x8, #0x62 // #98 88: d4000001 svc #0x0 8c: 521900a5 eor w5, w5, #0x80 90: 52800046 mov w6, #0x2 // #2 94: 93407ca5 sxtw x5, w5 98: 14000008 b b8 <__lll_lock_wait+0x58> 9c: d503201f nop a0: aa0403e0 mov x0, x4 a4: aa0503e1 mov x1, x5 a8: d2800042 mov x2, #0x2 // #2 ac: d2800003 mov x3, #0x0 // #0 b0: d2800c48 mov x8, #0x62 // #98 b4: d4000001 svc #0x0 b8: 885ffc80 ldaxr w0, [x4] bc: 88017c86 stxr w1, w6, [x4] c0: 35ffffc1 cbnz w1, b8 <__lll_lock_wait+0x58> c4: 35fffee0 cbnz w0, a0 <__lll_lock_wait+0x40> c8: d65f03c0 ret To: 0000000000000048 <__lll_lock_wait>: 48: aa0003e4 mov x4, x0 4c: 2a0103e5 mov w5, w1 50: b9400000 ldr w0, [x0] 54: 7100081f cmp w0, #0x2 58: 540000c0 b.eq 70 <__lll_lock_wait+0x28> // b.none 5c: 52800041 mov w1, #0x2 // #2 60: 885ffc80 ldaxr w0, [x4] 64: 88027c81 stxr w2, w1, [x4] 68: 35ffffc2 cbnz w2, 60 <__lll_lock_wait+0x18> 6c: 34000120 cbz w0, 90 <__lll_lock_wait+0x48> 70: 521900a1 eor w1, w5, #0x80 74: aa0403e0 mov x0, x4 78: 93407c21 sxtw x1, w1 7c: d2800042 mov x2, #0x2 // #2 80: d2800003 mov x3, #0x0 // #0 84: d2800c48 mov x8, #0x62 // #98 88: d4000001 svc #0x0 8c: 17fffff4 b 5c <__lll_lock_wait+0x14> 90: d65f03c0 ret I see similar changes on powerpc and other architectures. It also aligns with x86_64 implementation by adding the systemtap probes. Checker on aarch64-linux-gnu. * nptl/lowlevellock.c (__lll_lock_wait, __lll_lock_wait_private): Optimize futex call and add systemtap probe.
Diffstat (limited to 'nptl')
-rw-r--r--nptl/lowlevellock.c31
1 files changed, 19 insertions, 12 deletions
diff --git a/nptl/lowlevellock.c b/nptl/lowlevellock.c
index 5eaa3807ea..47548ff121 100644
--- a/nptl/lowlevellock.c
+++ b/nptl/lowlevellock.c
@@ -17,20 +17,23 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <errno.h>
#include <sysdep.h>
#include <lowlevellock.h>
-#include <sys/time.h>
#include <atomic.h>
+#include <stap-probe.h>
void
__lll_lock_wait_private (int *futex)
{
- if (*futex == 2)
- lll_futex_wait (futex, 2, LLL_PRIVATE); /* Wait if *futex == 2. */
-
- while (atomic_exchange_acq (futex, 2) != 0)
- lll_futex_wait (futex, 2, LLL_PRIVATE); /* Wait if *futex == 2. */
+ if (atomic_load_relaxed (futex) == 2)
+ goto futex;
+
+ while (atomic_exchange_acquire (futex, 2) != 0)
+ {
+ futex:
+ LIBC_PROBE (lll_lock_wait_private, 1, futex);
+ lll_futex_wait (futex, 2, LLL_PRIVATE); /* Wait if *futex == 2. */
+ }
}
@@ -39,10 +42,14 @@ __lll_lock_wait_private (int *futex)
void
__lll_lock_wait (int *futex, int private)
{
- if (*futex == 2)
- lll_futex_wait (futex, 2, private); /* Wait if *futex == 2. */
-
- while (atomic_exchange_acq (futex, 2) != 0)
- lll_futex_wait (futex, 2, private); /* Wait if *futex == 2. */
+ if (atomic_load_relaxed (futex) == 2)
+ goto futex;
+
+ while (atomic_exchange_acquire (futex, 2) != 0)
+ {
+ futex:
+ LIBC_PROBE (lll_lock_wait, 1, futex);
+ lll_futex_wait (futex, 2, private); /* Wait if *futex == 2. */
+ }
}
#endif