summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2023-04-11 12:43:44 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2023-04-11 12:43:45 +1000
commit86c49f36732ccc13ce6974bf64bd700fd775fb23 (patch)
tree881ab7b1697eeff11ef1daf3be94cd8ba5c66d41
parent2c14545170f12460d6b6564ca6922077bfee83f4 (diff)
parenta5e77e941a5737a9c012e20f75f6723862832ee2 (diff)
downloadlinux-next-86c49f36732ccc13ce6974bf64bd700fd775fb23.tar.gz
Merge branch 'rcu/next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
-rw-r--r--Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst6
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst2
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.rst36
-rw-r--r--Documentation/RCU/RTFP.txt10
-rw-r--r--Documentation/RCU/UP.rst4
-rw-r--r--Documentation/RCU/checklist.rst2
-rw-r--r--Documentation/RCU/lockdep.rst2
-rw-r--r--Documentation/RCU/torture.rst4
-rw-r--r--Documentation/RCU/whatisRCU.rst7
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt139
-rw-r--r--Documentation/litmus-tests/README2
-rw-r--r--Documentation/litmus-tests/locking/DCL-broken.litmus54
-rw-r--r--Documentation/litmus-tests/locking/DCL-fixed.litmus55
-rw-r--r--Documentation/litmus-tests/locking/RM-broken.litmus41
-rw-r--r--Documentation/litmus-tests/locking/RM-fixed.litmus41
-rw-r--r--MAINTAINERS4
-rw-r--r--arch/arm64/kvm/Kconfig1
-rw-r--r--arch/mips/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/riscv/kvm/Kconfig1
-rw-r--r--arch/s390/kvm/Kconfig1
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--drivers/base/cpu.c3
-rw-r--r--drivers/block/drbd/drbd_nl.c6
-rw-r--r--drivers/block/drbd/drbd_receiver.c4
-rw-r--r--drivers/block/drbd/drbd_state.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c2
-rw-r--r--drivers/misc/vmw_vmci/vmci_context.c2
-rw-r--r--drivers/misc/vmw_vmci/vmci_event.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c4
-rw-r--r--fs/ext4/super.c2
-rw-r--r--include/linux/instrumented.h63
-rw-r--r--include/linux/lockdep.h8
-rw-r--r--include/linux/notifier.h5
-rw-r--r--include/linux/rcupdate.h25
-rw-r--r--include/linux/srcu.h38
-rw-r--r--include/linux/srcutiny.h6
-rw-r--r--include/linux/srcutree.h94
-rw-r--r--include/linux/tick.h2
-rw-r--r--include/trace/events/rcu.h4
-rw-r--r--include/trace/events/timer.h3
-rw-r--r--kernel/kcsan/core.c17
-rw-r--r--kernel/locking/lockdep.c64
-rw-r--r--kernel/locking/locktorture.c239
-rw-r--r--kernel/locking/test-ww_mutex.c2
-rw-r--r--kernel/rcu/Kconfig21
-rw-r--r--kernel/rcu/rcu.h43
-rw-r--r--kernel/rcu/rcuscale.c206
-rw-r--r--kernel/rcu/rcutorture.c234
-rw-r--r--kernel/rcu/refscale.c2
-rw-r--r--kernel/rcu/srcutiny.c2
-rw-r--r--kernel/rcu/srcutree.c438
-rw-r--r--kernel/rcu/tasks.h38
-rw-r--r--kernel/rcu/tree.c106
-rw-r--r--kernel/rcu/tree_exp.h16
-rw-r--r--kernel/rcu/tree_nocb.h4
-rw-r--r--kernel/time/tick-sched.c16
-rw-r--r--kernel/trace/trace_osnoise.c2
-rw-r--r--kernel/trace/trace_probe.c2
-rw-r--r--lib/test_vmalloc.c2
-rw-r--r--mm/Kconfig1
-rw-r--r--net/core/sysctl_net_core.c4
-rw-r--r--net/mac802154/scan.c4
-rwxr-xr-xscripts/checkpatch.pl9
-rw-r--r--tools/include/nolibc/.gitignore1
-rw-r--r--tools/include/nolibc/Makefile4
-rw-r--r--tools/include/nolibc/arch-i386.h7
-rw-r--r--tools/include/nolibc/arch-loongarch.h200
-rw-r--r--tools/include/nolibc/arch-x86_64.h5
-rw-r--r--tools/include/nolibc/arch.h2
-rw-r--r--tools/include/nolibc/nolibc.h1
-rw-r--r--tools/include/nolibc/stackprotector.h53
-rw-r--r--tools/include/nolibc/std.h15
-rw-r--r--tools/include/nolibc/stdint.h99
-rw-r--r--tools/include/nolibc/stdio.h95
-rw-r--r--tools/include/nolibc/sys.h123
-rw-r--r--tools/include/nolibc/types.h30
-rw-r--r--tools/include/nolibc/unistd.h5
-rw-r--r--tools/memory-model/Documentation/explanation.txt178
-rw-r--r--tools/memory-model/Documentation/litmus-tests.txt27
-rw-r--r--tools/memory-model/Documentation/locking.txt298
-rw-r--r--tools/memory-model/linux-kernel.bell30
-rw-r--r--tools/memory-model/linux-kernel.cat20
-rw-r--r--tools/memory-model/linux-kernel.def7
-rw-r--r--tools/memory-model/litmus-tests/.gitignore2
-rw-r--r--tools/memory-model/lock.cat6
-rw-r--r--tools/memory-model/scripts/README48
-rwxr-xr-xtools/memory-model/scripts/checkalllitmus.sh29
-rwxr-xr-xtools/memory-model/scripts/checkghlitmus.sh15
-rwxr-xr-xtools/memory-model/scripts/checklitmus.sh25
-rwxr-xr-xtools/memory-model/scripts/checklitmushist.sh2
-rwxr-xr-xtools/memory-model/scripts/checktheselitmus.sh43
-rwxr-xr-xtools/memory-model/scripts/cmplitmushist.sh49
-rwxr-xr-xtools/memory-model/scripts/hwfnseg.sh20
-rwxr-xr-xtools/memory-model/scripts/initlitmushist.sh2
-rwxr-xr-xtools/memory-model/scripts/judgelitmus.sh120
-rwxr-xr-xtools/memory-model/scripts/newlitmushist.sh4
-rwxr-xr-xtools/memory-model/scripts/parseargs.sh21
-rwxr-xr-xtools/memory-model/scripts/runlitmus.sh80
-rwxr-xr-xtools/memory-model/scripts/runlitmushist.sh29
-rwxr-xr-xtools/memory-model/scripts/simpletest.sh35
-rwxr-xr-x[-rw-r--r--]tools/rcu/extract-stall.sh26
-rw-r--r--tools/testing/selftests/nolibc/.gitignore1
-rw-r--r--tools/testing/selftests/nolibc/Makefile96
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c306
-rw-r--r--tools/testing/selftests/rcutorture/bin/functions.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-again.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/srcu_lockdep.sh78
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh6
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/CFLIST2
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK086
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK08.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK096
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK09.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE011
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE041
-rw-r--r--tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt4
121 files changed, 3457 insertions, 982 deletions
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
index c9c957c85bac..93d899d53258 100644
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
@@ -277,7 +277,7 @@ the following access functions:
Again, only one request in a given batch need actually carry out a
grace-period operation, which means there must be an efficient way to
-identify which of many concurrent reqeusts will initiate the grace
+identify which of many concurrent requests will initiate the grace
period, and that there be an efficient way for the remaining requests to
wait for that grace period to complete. However, that is the topic of
the next section.
@@ -405,7 +405,7 @@ Use of Workqueues
In earlier implementations, the task requesting the expedited grace
period also drove it to completion. This straightforward approach had
the disadvantage of needing to account for POSIX signals sent to user
-tasks, so more recent implemementations use the Linux kernel's
+tasks, so more recent implementations use the Linux kernel's
workqueues (see Documentation/core-api/workqueue.rst).
The requesting task still does counter snapshotting and funnel-lock
@@ -465,7 +465,7 @@ corresponding disadvantage that workqueues cannot be used until they are
initialized, which does not happen until some time after the scheduler
spawns the first task. Given that there are parts of the kernel that
really do want to execute grace periods during this mid-boot “dead
-zone”, expedited grace periods must do something else during thie time.
+zone”, expedited grace periods must do something else during this time.
What they do is to fall back to the old practice of requiring that the
requesting task drive the expedited grace period, as was the case before
diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
index 7fdf151a8680..5750f125361b 100644
--- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
+++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
@@ -168,7 +168,7 @@ an ``atomic_add_return()`` of zero) to detect idle CPUs.
+-----------------------------------------------------------------------+
The approach must be extended to handle one final case, that of waking a
-task blocked in ``synchronize_rcu()``. This task might be affinitied to
+task blocked in ``synchronize_rcu()``. This task might be affined to
a CPU that is not yet aware that the grace period has ended, and thus
might not yet be subject to the grace period's memory ordering.
Therefore, there is an ``smp_mb()`` after the return from
diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
index 49387d823619..77155b51d4c2 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@@ -2071,41 +2071,7 @@ call.
Because RCU avoids interrupting idle CPUs, it is illegal to execute an
RCU read-side critical section on an idle CPU. (Kernels built with
-``CONFIG_PROVE_RCU=y`` will splat if you try it.) The RCU_NONIDLE()
-macro and ``_rcuidle`` event tracing is provided to work around this
-restriction. In addition, rcu_is_watching() may be used to test
-whether or not it is currently legal to run RCU read-side critical
-sections on this CPU. I learned of the need for diagnostics on the one
-hand and RCU_NONIDLE() on the other while inspecting idle-loop code.
-Steven Rostedt supplied ``_rcuidle`` event tracing, which is used quite
-heavily in the idle loop. However, there are some restrictions on the
-code placed within RCU_NONIDLE():
-
-#. Blocking is prohibited. In practice, this is not a serious
- restriction given that idle tasks are prohibited from blocking to
- begin with.
-#. Although nesting RCU_NONIDLE() is permitted, they cannot nest
- indefinitely deeply. However, given that they can be nested on the
- order of a million deep, even on 32-bit systems, this should not be a
- serious restriction. This nesting limit would probably be reached
- long after the compiler OOMed or the stack overflowed.
-#. Any code path that enters RCU_NONIDLE() must sequence out of that
- same RCU_NONIDLE(). For example, the following is grossly
- illegal:
-
- ::
-
- 1 RCU_NONIDLE({
- 2 do_something();
- 3 goto bad_idea; /* BUG!!! */
- 4 do_something_else();});
- 5 bad_idea:
-
-
- It is just as illegal to transfer control into the middle of
- RCU_NONIDLE()'s argument. Yes, in theory, you could transfer in
- as long as you also transferred out, but in practice you could also
- expect to get sharply worded review comments.
+``CONFIG_PROVE_RCU=y`` will splat if you try it.)
It is similarly socially unacceptable to interrupt an ``nohz_full`` CPU
running in userspace. RCU must therefore track ``nohz_full`` userspace
diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt
index 588d97366a46..db8f16b392aa 100644
--- a/Documentation/RCU/RTFP.txt
+++ b/Documentation/RCU/RTFP.txt
@@ -201,7 +201,7 @@ work looked at debugging uses of RCU [Seyster:2011:RFA:2075416.2075425].
In 2012, Josh Triplett received his Ph.D. with his dissertation
covering RCU-protected resizable hash tables and the relationship
between memory barriers and read-side traversal order: If the updater
-is making changes in the opposite direction from the read-side traveral
+is making changes in the opposite direction from the read-side traversal
order, the updater need only execute a memory-barrier instruction,
but if in the same direction, the updater needs to wait for a grace
period between the individual updates [JoshTriplettPhD]. Also in 2012,
@@ -1245,7 +1245,7 @@ Oregon Health and Sciences University"
[Viewed September 5, 2005]"
,annotation={
First posting showing how RCU can be safely adapted for
- preemptable RCU read side critical sections.
+ preemptible RCU read side critical sections.
}
}
@@ -1888,7 +1888,7 @@ Revised:
\url{https://lore.kernel.org/r/20070910183004.GA3299@linux.vnet.ibm.com}
[Viewed October 25, 2007]"
,annotation={
- Final patch for preemptable RCU to -rt. (Later patches were
+ Final patch for preemptible RCU to -rt. (Later patches were
to mainline, eventually incorporated.)
}
}
@@ -2275,7 +2275,7 @@ lot of {Linux} into your technology!!!"
\url{https://lore.kernel.org/r/20090724001429.GA17374@linux.vnet.ibm.com}
[Viewed August 15, 2009]"
,annotation={
- First posting of simple and fast preemptable RCU.
+ First posting of simple and fast preemptible RCU.
}
}
@@ -2639,7 +2639,7 @@ lot of {Linux} into your technology!!!"
RCU-protected hash tables, barriers vs. read-side traversal order.
.
If the updater is making changes in the opposite direction from
- the read-side traveral order, the updater need only execute a
+ the read-side traversal order, the updater need only execute a
memory-barrier instruction, but if in the same direction, the
updater needs to wait for a grace period between the individual
updates.
diff --git a/Documentation/RCU/UP.rst b/Documentation/RCU/UP.rst
index 8b20fd45f255..4060d7a2f62a 100644
--- a/Documentation/RCU/UP.rst
+++ b/Documentation/RCU/UP.rst
@@ -107,7 +107,7 @@ UP systems, including PREEMPT SMP builds running on UP systems.
Quick Quiz #3:
Why can't synchronize_rcu() return immediately on UP systems running
- preemptable RCU?
+ preemptible RCU?
.. _answer_quick_quiz_up:
@@ -143,7 +143,7 @@ Answer to Quick Quiz #2:
Answer to Quick Quiz #3:
Why can't synchronize_rcu() return immediately on UP systems
- running preemptable RCU?
+ running preemptible RCU?
Because some other task might have been preempted in the middle
of an RCU read-side critical section. If synchronize_rcu()
diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst
index cc361fb01ed4..bd3c58c44bef 100644
--- a/Documentation/RCU/checklist.rst
+++ b/Documentation/RCU/checklist.rst
@@ -70,7 +70,7 @@ over a rather long period of time, but improvements are always welcome!
can serve as rcu_read_lock_sched(), but is less readable and
prevents lockdep from detecting locking issues.
- Please not that you *cannot* rely on code known to be built
+ Please note that you *cannot* rely on code known to be built
only in non-preemptible kernels. Such code can and will break,
especially in kernels built with CONFIG_PREEMPT_COUNT=y.
diff --git a/Documentation/RCU/lockdep.rst b/Documentation/RCU/lockdep.rst
index 2749f43ec1b0..69e73a39bd11 100644
--- a/Documentation/RCU/lockdep.rst
+++ b/Documentation/RCU/lockdep.rst
@@ -65,7 +65,7 @@ checking of rcu_dereference() primitives:
rcu_access_pointer(p):
Return the value of the pointer and omit all barriers,
but retain the compiler constraints that prevent duplicating
- or coalescsing. This is useful when testing the
+ or coalescing. This is useful when testing the
value of the pointer itself, for example, against NULL.
The rcu_dereference_check() check expression can be any boolean
diff --git a/Documentation/RCU/torture.rst b/Documentation/RCU/torture.rst
index 0316ba0c6922..b3b6dfa85248 100644
--- a/Documentation/RCU/torture.rst
+++ b/Documentation/RCU/torture.rst
@@ -216,7 +216,7 @@ Kernel boot arguments can also be supplied, for example, to control
rcutorture's module parameters. For example, to test a change to RCU's
CPU stall-warning code, use "--bootargs 'rcutorture.stall_cpu=30'".
This will of course result in the scripting reporting a failure, namely
-the resuling RCU CPU stall warning. As noted above, reducing memory may
+the resulting RCU CPU stall warning. As noted above, reducing memory may
require disabling rcutorture's callback-flooding tests::
kvm.sh --cpus 448 --configs '56*TREE04' --memory 128M \
@@ -370,5 +370,5 @@ You can also re-run a previous remote run in a manner similar to kvm.sh:
tools/testing/selftests/rcutorture/res/2022.11.03-11.26.28-remote \
--duration 24h
-In this case, most of the kvm-again.sh parmeters may be supplied following
+In this case, most of the kvm-again.sh parameters may be supplied following
the pathname of the old run-results directory.
diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst
index 2c5563a91998..e488c8e557a9 100644
--- a/Documentation/RCU/whatisRCU.rst
+++ b/Documentation/RCU/whatisRCU.rst
@@ -597,10 +597,10 @@ to avoid having to write your own callback::
If the occasional sleep is permitted, the single-argument form may
be used, omitting the rcu_head structure from struct foo.
- kfree_rcu(old_fp);
+ kfree_rcu_mightsleep(old_fp);
-This variant of kfree_rcu() almost never blocks, but might do so by
-invoking synchronize_rcu() in response to memory-allocation failure.
+This variant almost never blocks, but might do so by invoking
+synchronize_rcu() in response to memory-allocation failure.
Again, see checklist.rst for additional rules governing the use of RCU.
@@ -1117,7 +1117,6 @@ All: lockdep-checked RCU utility APIs::
RCU_LOCKDEP_WARN
rcu_sleep_check
- RCU_NONIDLE
All: Unchecked RCU-protected pointer access::
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6a6ede750c5c..2c94b94b259b 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4713,43 +4713,6 @@
the propagation of recent CPU-hotplug changes up
the rcu_node combining tree.
- rcutree.use_softirq= [KNL]
- If set to zero, move all RCU_SOFTIRQ processing to
- per-CPU rcuc kthreads. Defaults to a non-zero
- value, meaning that RCU_SOFTIRQ is used by default.
- Specify rcutree.use_softirq=0 to use rcuc kthreads.
-
- But note that CONFIG_PREEMPT_RT=y kernels disable
- this kernel boot parameter, forcibly setting it
- to zero.
-
- rcutree.rcu_fanout_exact= [KNL]
- Disable autobalancing of the rcu_node combining
- tree. This is used by rcutorture, and might
- possibly be useful for architectures having high
- cache-to-cache transfer latencies.
-
- rcutree.rcu_fanout_leaf= [KNL]
- Change the number of CPUs assigned to each
- leaf rcu_node structure. Useful for very
- large systems, which will choose the value 64,
- and for NUMA systems with large remote-access
- latencies, which will choose a value aligned
- with the appropriate hardware boundaries.
-
- rcutree.rcu_min_cached_objs= [KNL]
- Minimum number of objects which are cached and
- maintained per one CPU. Object size is equal
- to PAGE_SIZE. The cache allows to reduce the
- pressure to page allocator, also it makes the
- whole algorithm to behave better in low memory
- condition.
-
- rcutree.rcu_delay_page_cache_fill_msec= [KNL]
- Set the page-cache refill delay (in milliseconds)
- in response to low-memory conditions. The range
- of permitted values is in the range 0:100000.
-
rcutree.jiffies_till_first_fqs= [KNL]
Set delay from grace-period initialization to
first attempt to force quiescent states.
@@ -4788,21 +4751,6 @@
When RCU_NOCB_CPU is set, also adjust the
priority of NOCB callback kthreads.
- rcutree.rcu_divisor= [KNL]
- Set the shift-right count to use to compute
- the callback-invocation batch limit bl from
- the number of callbacks queued on this CPU.
- The result will be bounded below by the value of
- the rcutree.blimit kernel parameter. Every bl
- callbacks, the softirq handler will exit in
- order to allow the CPU to do other work.
-
- Please note that this callback-invocation batch
- limit applies only to non-offloaded callback
- invocation. Offloaded callbacks are instead
- invoked in the context of an rcuoc kthread, which
- scheduler will preempt as it does any other task.
-
rcutree.nocb_nobypass_lim_per_jiffy= [KNL]
On callback-offloaded (rcu_nocbs) CPUs,
RCU reduces the lock contention that would
@@ -4816,14 +4764,6 @@
the ->nocb_bypass queue. The definition of "too
many" is supplied by this kernel boot parameter.
- rcutree.rcu_nocb_gp_stride= [KNL]
- Set the number of NOCB callback kthreads in
- each group, which defaults to the square root
- of the number of CPUs. Larger numbers reduce
- the wakeup overhead on the global grace-period
- kthread, but increases that same overhead on
- each group's NOCB grace-period kthread.
-
rcutree.qhimark= [KNL]
Set threshold of queued RCU callbacks beyond which
batch limiting is disabled.
@@ -4841,6 +4781,56 @@
on rcutree.qhimark at boot time and to zero to
disable more aggressive help enlistment.
+ rcutree.rcu_delay_page_cache_fill_msec= [KNL]
+ Set the page-cache refill delay (in milliseconds)
+ in response to low-memory conditions. The range
+ of permitted values is in the range 0:100000.
+
+ rcutree.rcu_divisor= [KNL]
+ Set the shift-right count to use to compute
+ the callback-invocation batch limit bl from
+ the number of callbacks queued on this CPU.
+ The result will be bounded below by the value of
+ the rcutree.blimit kernel parameter. Every bl
+ callbacks, the softirq handler will exit in
+ order to allow the CPU to do other work.
+
+ Please note that this callback-invocation batch
+ limit applies only to non-offloaded callback
+ invocation. Offloaded callbacks are instead
+ invoked in the context of an rcuoc kthread, which
+ scheduler will preempt as it does any other task.
+
+ rcutree.rcu_fanout_exact= [KNL]
+ Disable autobalancing of the rcu_node combining
+ tree. This is used by rcutorture, and might
+ possibly be useful for architectures having high
+ cache-to-cache transfer latencies.
+
+ rcutree.rcu_fanout_leaf= [KNL]
+ Change the number of CPUs assigned to each
+ leaf rcu_node structure. Useful for very
+ large systems, which will choose the value 64,
+ and for NUMA systems with large remote-access
+ latencies, which will choose a value aligned
+ with the appropriate hardware boundaries.
+
+ rcutree.rcu_min_cached_objs= [KNL]
+ Minimum number of objects which are cached and
+ maintained per one CPU. Object size is equal
+ to PAGE_SIZE. The cache allows to reduce the
+ pressure to page allocator, also it makes the
+ whole algorithm to behave better in low memory
+ condition.
+
+ rcutree.rcu_nocb_gp_stride= [KNL]
+ Set the number of NOCB callback kthreads in
+ each group, which defaults to the square root
+ of the number of CPUs. Larger numbers reduce
+ the wakeup overhead on the global grace-period
+ kthread, but increases that same overhead on
+ each group's NOCB grace-period kthread.
+
rcutree.rcu_kick_kthreads= [KNL]
Cause the grace-period kthread to get an extra
wake_up() if it sleeps three times longer than
@@ -4848,6 +4838,13 @@
This wake_up() will be accompanied by a
WARN_ONCE() splat and an ftrace_dump().
+ rcutree.rcu_resched_ns= [KNL]
+ Limit the time spend invoking a batch of RCU
+ callbacks to the specified number of nanoseconds.
+ By default, this limit is checked only once
+ every 32 callbacks in order to limit the pain
+ inflicted by local_clock() overhead.
+
rcutree.rcu_unlock_delay= [KNL]
In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels,
this specifies an rcu_read_unlock()-time delay
@@ -4862,6 +4859,16 @@
rcu_node tree with an eye towards determining
why a new grace period has not yet started.
+ rcutree.use_softirq= [KNL]
+ If set to zero, move all RCU_SOFTIRQ processing to
+ per-CPU rcuc kthreads. Defaults to a non-zero
+ value, meaning that RCU_SOFTIRQ is used by default.
+ Specify rcutree.use_softirq=0 to use rcuc kthreads.
+
+ But note that CONFIG_PREEMPT_RT=y kernels disable
+ this kernel boot parameter, forcibly setting it
+ to zero.
+
rcuscale.gp_async= [KNL]
Measure performance of asynchronous
grace-period primitives such as call_rcu().
@@ -5064,8 +5071,16 @@
rcutorture.stall_cpu_block= [KNL]
Sleep while stalling if set. This will result
- in warnings from preemptible RCU in addition
- to any other stall-related activity.
+ in warnings from preemptible RCU in addition to
+ any other stall-related activity. Note that
+ in kernels built with CONFIG_PREEMPTION=n and
+ CONFIG_PREEMPT_COUNT=y, this parameter will
+ cause the CPU to pass through a quiescent state.
+ Any such quiescent states will suppress RCU CPU
+ stall warnings, but the time-based sleep will
+ also result in scheduling-while-atomic splats.
+ Which might or might not be what you want.
+
rcutorture.stall_cpu_holdoff= [KNL]
Time to wait (s) after boot before inducing stall.
diff --git a/Documentation/litmus-tests/README b/Documentation/litmus-tests/README
index 7f5c6c3ed6c3..658d37860d39 100644
--- a/Documentation/litmus-tests/README
+++ b/Documentation/litmus-tests/README
@@ -9,7 +9,7 @@ a kernel test module based on a litmus test, please see
tools/memory-model/README.
-atomic (/atomic derectory)
+atomic (/atomic directory)
--------------------------
Atomic-RMW+mb__after_atomic-is-stronger-than-acquire.litmus
diff --git a/Documentation/litmus-tests/locking/DCL-broken.litmus b/Documentation/litmus-tests/locking/DCL-broken.litmus
new file mode 100644
index 000000000000..bfb7ba4316d6
--- /dev/null
+++ b/Documentation/litmus-tests/locking/DCL-broken.litmus
@@ -0,0 +1,54 @@
+C DCL-broken
+
+(*
+ * Result: Sometimes
+ *
+ * This litmus test demonstrates more than just locking is required to
+ * correctly implement double-checked locking.
+ *)
+
+{
+ int flag;
+ int data;
+}
+
+P0(int *flag, int *data, spinlock_t *lck)
+{
+ int r0;
+ int r1;
+ int r2;
+
+ r0 = READ_ONCE(*flag);
+ if (r0 == 0) {
+ spin_lock(lck);
+ r1 = READ_ONCE(*flag);
+ if (r1 == 0) {
+ WRITE_ONCE(*data, 1);
+ WRITE_ONCE(*flag, 1);
+ }
+ spin_unlock(lck);
+ }
+ r2 = READ_ONCE(*data);
+}
+
+P1(int *flag, int *data, spinlock_t *lck)
+{
+ int r0;
+ int r1;
+ int r2;
+
+ r0 = READ_ONCE(*flag);
+ if (r0 == 0) {
+ spin_lock(lck);
+ r1 = READ_ONCE(*flag);
+ if (r1 == 0) {
+ WRITE_ONCE(*data, 1);
+ WRITE_ONCE(*flag, 1);
+ }
+ spin_unlock(lck);
+ }
+ r2 = READ_ONCE(*data);
+}
+
+locations [flag;data;0:r0;0:r1;1:r0;1:r1]
+exists (0:r2=0 \/ 1:r2=0)
diff --git a/Documentation/litmus-tests/locking/DCL-fixed.litmus b/Documentation/litmus-tests/locking/DCL-fixed.litmus
new file mode 100644
index 000000000000..d1b60bcb0c8f
--- /dev/null
+++ b/Documentation/litmus-tests/locking/DCL-fixed.litmus
@@ -0,0 +1,55 @@
+C DCL-fixed
+
+(*
+ * Result: Never
+ *
+ * This litmus test demonstrates that double-checked locking can be
+ * reliable given proper use of smp_load_acquire() and smp_store_release()
+ * in addition to the locking.
+ *)
+
+{
+ int flag;
+ int data;
+}
+
+P0(int *flag, int *data, spinlock_t *lck)
+{
+ int r0;
+ int r1;
+ int r2;
+
+ r0 = smp_load_acquire(flag);
+ if (r0 == 0) {
+ spin_lock(lck);
+ r1 = READ_ONCE(*flag);
+ if (r1 == 0) {
+ WRITE_ONCE(*data, 1);
+ smp_store_release(flag, 1);
+ }
+ spin_unlock(lck);
+ }
+ r2 = READ_ONCE(*data);
+}
+
+P1(int *flag, int *data, spinlock_t *lck)
+{
+ int r0;
+ int r1;
+ int r2;
+
+ r0 = smp_load_acquire(flag);
+ if (r0 == 0) {
+ spin_lock(lck);
+ r1 = READ_ONCE(*flag);
+ if (r1 == 0) {
+ WRITE_ONCE(*data, 1);
+ smp_store_release(flag, 1);
+ }
+ spin_unlock(lck);
+ }
+ r2 = READ_ONCE(*data);
+}
+
+locations [flag;data;0:r0;0:r1;1:r0;1:r1]
+exists (0:r2=0 \/ 1:r2=0)
diff --git a/Documentation/litmus-tests/locking/RM-broken.litmus b/Documentation/litmus-tests/locking/RM-broken.litmus
new file mode 100644
index 000000000000..b7ef30cedfe5
--- /dev/null
+++ b/Documentation/litmus-tests/locking/RM-broken.litmus
@@ -0,0 +1,41 @@
+C RM-broken
+
+(*
+ * Result: DEADLOCK
+ *
+ * This litmus test demonstrates that the old "roach motel" approach
+ * to locking, where code can be freely moved into critical sections,
+ * cannot be used in the Linux kernel.
+ *)
+
+{
+ int x;
+ atomic_t y;
+}
+
+P0(int *x, atomic_t *y, spinlock_t *lck)
+{
+ int r2;
+
+ spin_lock(lck);
+ r2 = atomic_inc_return(y);
+ WRITE_ONCE(*x, 1);
+ spin_unlock(lck);
+}
+
+P1(int *x, atomic_t *y, spinlock_t *lck)
+{
+ int r0;
+ int r1;
+ int r2;
+
+ spin_lock(lck);
+ r0 = READ_ONCE(*x);
+ r1 = READ_ONCE(*x);
+ r2 = atomic_inc_return(y);
+ spin_unlock(lck);
+}
+
+locations [x;0:r2;1:r0;1:r1;1:r2]
+filter (1:r0=0 /\ 1:r1=1)
+exists (1:r2=1)
diff --git a/Documentation/litmus-tests/locking/RM-fixed.litmus b/Documentation/litmus-tests/locking/RM-fixed.litmus
new file mode 100644
index 000000000000..b62817559616
--- /dev/null
+++ b/Documentation/litmus-tests/locking/RM-fixed.litmus
@@ -0,0 +1,41 @@
+C RM-fixed
+
+(*
+ * Result: Never
+ *
+ * This litmus test demonstrates that the old "roach motel" approach
+ * to locking, where code can be freely moved into critical sections,
+ * cannot be used in the Linux kernel.
+ *)
+
+{
+ int x;
+ atomic_t y;
+}
+
+P0(int *x, atomic_t *y, spinlock_t *lck)
+{
+ int r2;
+
+ spin_lock(lck);
+ r2 = atomic_inc_return(y);
+ WRITE_ONCE(*x, 1);
+ spin_unlock(lck);
+}
+
+P1(int *x, atomic_t *y, spinlock_t *lck)
+{
+ int r0;
+ int r1;
+ int r2;
+
+ r0 = READ_ONCE(*x);
+ r1 = READ_ONCE(*x);
+ spin_lock(lck);
+ r2 = atomic_inc_return(y);
+ spin_unlock(lck);
+}
+
+locations [x;0:r2;1:r0;1:r1;1:r2]
+filter (1:r0=0 /\ 1:r1=1)
+exists (1:r2=1)
diff --git a/MAINTAINERS b/MAINTAINERS
index 49321684cd8e..9754e91c7cd8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17736,11 +17736,13 @@ READ-COPY UPDATE (RCU)
M: "Paul E. McKenney" <paulmck@kernel.org>
M: Frederic Weisbecker <frederic@kernel.org> (kernel/rcu/tree_nocb.h)
M: Neeraj Upadhyay <quic_neeraju@quicinc.com> (kernel/rcu/tasks.h)
+M: Joel Fernandes <joel@joelfernandes.org>
M: Josh Triplett <josh@joshtriplett.org>
+M: Boqun Feng <boqun.feng@gmail.com>
R: Steven Rostedt <rostedt@goodmis.org>
R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
R: Lai Jiangshan <jiangshanlai@gmail.com>
-R: Joel Fernandes <joel@joelfernandes.org>
+R: Zqiang <qiang1.zhang@intel.com>
L: rcu@vger.kernel.org
S: Supported
W: http://www.rdrop.com/users/paulmck/RCU/
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index ca6eadeb7d1a..f531da6b362e 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -29,7 +29,6 @@ menuconfig KVM
select KVM_MMIO
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_XFER_TO_GUEST_WORK
- select SRCU
select KVM_VFIO
select HAVE_KVM_EVENTFD
select HAVE_KVM_IRQFD
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig
index 29e51649203b..a8cdba75f98d 100644
--- a/arch/mips/kvm/Kconfig
+++ b/arch/mips/kvm/Kconfig
@@ -26,7 +26,6 @@ config KVM
select HAVE_KVM_VCPU_ASYNC_IOCTL
select KVM_MMIO
select MMU_NOTIFIER
- select SRCU
select INTERVAL_TREE
select KVM_GENERIC_HARDWARE_ENABLING
help
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index a9f57dad6d91..902611954200 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -22,7 +22,6 @@ config KVM
select PREEMPT_NOTIFIERS
select HAVE_KVM_EVENTFD
select HAVE_KVM_VCPU_ASYNC_IOCTL
- select SRCU
select KVM_VFIO
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
index d5a658a047a7..5682d8c017b3 100644
--- a/arch/riscv/kvm/Kconfig
+++ b/arch/riscv/kvm/Kconfig
@@ -28,7 +28,6 @@ config KVM
select KVM_XFER_TO_GUEST_WORK
select HAVE_KVM_VCPU_ASYNC_IOCTL
select HAVE_KVM_EVENTFD
- select SRCU
help
Support hosting virtualized guest machines.
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 33f4ff909476..45fdf2a9b2e3 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -31,7 +31,6 @@ config KVM
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_INVALID_WAKEUPS
select HAVE_KVM_NO_POLL
- select SRCU
select KVM_VFIO
select INTERVAL_TREE
select MMU_NOTIFIER
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5254225aea73..ebf2837c17f8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -286,7 +286,6 @@ config X86
select RTC_LIB
select RTC_MC146818_LIB
select SPARSE_IRQ
- select SRCU
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
select TRACE_IRQFLAGS_SUPPORT
@@ -1947,7 +1946,6 @@ config X86_SGX
depends on X86_64 && CPU_SUP_INTEL && X86_X2APIC
depends on CRYPTO=y
depends on CRYPTO_SHA256=y
- select SRCU
select MMU_NOTIFIER
select NUMA_KEEP_MEMINFO if NUMA
select XARRAY_MULTI
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 8e578311ca9d..89ca7f4c1464 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -46,7 +46,6 @@ config KVM
select KVM_XFER_TO_GUEST_WORK
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_VFIO
- select SRCU
select INTERVAL_TREE
select HAVE_KVM_PM_NOTIFIER if PM
select KVM_GENERIC_HARDWARE_ENABLING
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 182c6122f815..c1815b9dae68 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -487,7 +487,8 @@ static const struct attribute_group *cpu_root_attr_groups[] = {
bool cpu_is_hotpluggable(unsigned int cpu)
{
struct device *dev = get_cpu_device(cpu);
- return dev && container_of(dev, struct cpu, dev)->hotpluggable;
+ return dev && container_of(dev, struct cpu, dev)->hotpluggable
+ && tick_nohz_cpu_hotpluggable(cpu);
}
EXPORT_SYMBOL_GPL(cpu_is_hotpluggable);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 8967298968f3..1a5d3d72d91d 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1615,7 +1615,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
drbd_send_sync_param(peer_device);
}
- kvfree_rcu(old_disk_conf);
+ kvfree_rcu_mightsleep(old_disk_conf);
kfree(old_plan);
mod_timer(&device->request_timer, jiffies + HZ);
goto success;
@@ -2448,7 +2448,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
mutex_unlock(&connection->resource->conf_update);
mutex_unlock(&connection->data.mutex);
- kvfree_rcu(old_net_conf);
+ kvfree_rcu_mightsleep(old_net_conf);
if (connection->cstate >= C_WF_REPORT_PARAMS) {
struct drbd_peer_device *peer_device;
@@ -2862,7 +2862,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
new_disk_conf->disk_size = (sector_t)rs.resize_size;
rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
mutex_unlock(&device->resource->conf_update);
- kvfree_rcu(old_disk_conf);
+ kvfree_rcu_mightsleep(old_disk_conf);
new_disk_conf = NULL;
}
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index e54404c632e7..719a7260c22b 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3762,7 +3762,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in
drbd_info(connection, "peer data-integrity-alg: %s\n",
integrity_alg[0] ? integrity_alg : "(none)");
- kvfree_rcu(old_net_conf);
+ kvfree_rcu_mightsleep(old_net_conf);
return 0;
disconnect_rcu_unlock:
@@ -4130,7 +4130,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
mutex_unlock(&connection->resource->conf_update);
- kvfree_rcu(old_disk_conf);
+ kvfree_rcu_mightsleep(old_disk_conf);
drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
(unsigned long)p_usize, (unsigned long)my_usize);
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 563e67f1ead9..287a8d1d3f70 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -2076,7 +2076,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
conn_free_crypto(connection);
mutex_unlock(&connection->resource->conf_update);
- kvfree_rcu(old_conf);
+ kvfree_rcu_mightsleep(old_conf);
}
if (ns_max.susp_fen) {
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 4e2db7c2e4ed..39f3367cec6c 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1326,7 +1326,7 @@ static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
if (cleanup_err)
rxe_err_mr(mr, "cleanup failed, err = %d", cleanup_err);
- kfree_rcu(mr);
+ kfree(mr);
return 0;
err_out:
diff --git a/drivers/misc/vmw_vmci/vmci_context.c b/drivers/misc/vmw_vmci/vmci_context.c
index 172696abce31..f22b44827e92 100644
--- a/drivers/misc/vmw_vmci/vmci_context.c
+++ b/drivers/misc/vmw_vmci/vmci_context.c
@@ -687,7 +687,7 @@ int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid)
spin_unlock(&context->lock);
if (notifier)
- kvfree_rcu(notifier);
+ kvfree_rcu_mightsleep(notifier);
vmci_ctx_put(context);
diff --git a/drivers/misc/vmw_vmci/vmci_event.c b/drivers/misc/vmw_vmci/vmci_event.c
index 2100297c94ad..5d7ac07623c2 100644
--- a/drivers/misc/vmw_vmci/vmci_event.c
+++ b/drivers/misc/vmw_vmci/vmci_event.c
@@ -209,7 +209,7 @@ int vmci_event_unsubscribe(u32 sub_id)
if (!s)
return VMCI_ERROR_NOT_FOUND;
- kvfree_rcu(s);
+ kvfree_rcu_mightsleep(s);
return VMCI_SUCCESS;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c
index ca834bbcb44f..8afcec0c5d3c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c
@@ -242,7 +242,7 @@ mlx5e_int_port_remove(struct mlx5e_tc_int_port_priv *priv,
mlx5_del_flow_rules(int_port->rx_rule);
mapping_remove(ctx, int_port->mapping);
mlx5e_int_port_metadata_free(priv, int_port->match_metadata);
- kfree_rcu(int_port);
+ kfree_rcu_mightsleep(int_port);
priv->num_ports--;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
index 33b3620ea45c..51f1cd8364c2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
@@ -670,7 +670,7 @@ static int mlx5e_macsec_del_txsa(struct macsec_context *ctx)
mlx5e_macsec_cleanup_sa(macsec, tx_sa, true);
mlx5_destroy_encryption_key(macsec->mdev, tx_sa->enc_key_id);
- kfree_rcu(tx_sa);
+ kfree_rcu_mightsleep(tx_sa);
macsec_device->tx_sa[assoc_num] = NULL;
out:
@@ -849,7 +849,7 @@ static void macsec_del_rxsc_ctx(struct mlx5e_macsec *macsec, struct mlx5e_macsec
xa_erase(&macsec->sc_xarray, rx_sc->sc_xarray_element->fs_id);
metadata_dst_free(rx_sc->md_dst);
kfree(rx_sc->sc_xarray_element);
- kfree_rcu(rx_sc);
+ kfree_rcu_mightsleep(rx_sc);
}
static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f226f8ab469b..e6d84c1e34a4 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2500,7 +2500,7 @@ static void ext4_apply_quota_options(struct fs_context *fc,
qname = rcu_replace_pointer(sbi->s_qf_names[i], qname,
lockdep_is_held(&sb->s_umount));
if (qname)
- kfree_rcu(qname);
+ kfree_rcu_mightsleep(qname);
}
}
diff --git a/include/linux/instrumented.h b/include/linux/instrumented.h
index 501fa8486749..1b608e00290a 100644
--- a/include/linux/instrumented.h
+++ b/include/linux/instrumented.h
@@ -15,12 +15,11 @@
/**
* instrument_read - instrument regular read access
+ * @v: address of access
+ * @size: size of access
*
* Instrument a regular read access. The instrumentation should be inserted
* before the actual read happens.
- *
- * @ptr address of access
- * @size size of access
*/
static __always_inline void instrument_read(const volatile void *v, size_t size)
{
@@ -30,12 +29,11 @@ static __always_inline void instrument_read(const volatile void *v, size_t size)
/**
* instrument_write - instrument regular write access
+ * @v: address of access
+ * @size: size of access
*
* Instrument a regular write access. The instrumentation should be inserted
* before the actual write happens.
- *
- * @ptr address of access
- * @size size of access
*/
static __always_inline void instrument_write(const volatile void *v, size_t size)
{
@@ -45,12 +43,11 @@ static __always_inline void instrument_write(const volatile void *v, size_t size
/**
* instrument_read_write - instrument regular read-write access
+ * @v: address of access
+ * @size: size of access
*
* Instrument a regular write access. The instrumentation should be inserted
* before the actual write happens.
- *
- * @ptr address of access
- * @size size of access
*/
static __always_inline void instrument_read_write(const volatile void *v, size_t size)
{
@@ -60,12 +57,11 @@ static __always_inline void instrument_read_write(const volatile void *v, size_t
/**
* instrument_atomic_read - instrument atomic read access
+ * @v: address of access
+ * @size: size of access
*
* Instrument an atomic read access. The instrumentation should be inserted
* before the actual read happens.
- *
- * @ptr address of access
- * @size size of access
*/
static __always_inline void instrument_atomic_read(const volatile void *v, size_t size)
{
@@ -75,12 +71,11 @@ static __always_inline void instrument_atomic_read(const volatile void *v, size_
/**
* instrument_atomic_write - instrument atomic write access
+ * @v: address of access
+ * @size: size of access
*
* Instrument an atomic write access. The instrumentation should be inserted
* before the actual write happens.
- *
- * @ptr address of access
- * @size size of access
*/
static __always_inline void instrument_atomic_write(const volatile void *v, size_t size)
{
@@ -90,12 +85,11 @@ static __always_inline void instrument_atomic_write(const volatile void *v, size
/**
* instrument_atomic_read_write - instrument atomic read-write access
+ * @v: address of access
+ * @size: size of access
*
* Instrument an atomic read-write access. The instrumentation should be
* inserted before the actual write happens.
- *
- * @ptr address of access
- * @size size of access
*/
static __always_inline void instrument_atomic_read_write(const volatile void *v, size_t size)
{
@@ -105,13 +99,12 @@ static __always_inline void instrument_atomic_read_write(const volatile void *v,
/**
* instrument_copy_to_user - instrument reads of copy_to_user
+ * @to: destination address
+ * @from: source address
+ * @n: number of bytes to copy
*
* Instrument reads from kernel memory, that are due to copy_to_user (and
* variants). The instrumentation must be inserted before the accesses.
- *
- * @to destination address
- * @from source address
- * @n number of bytes to copy
*/
static __always_inline void
instrument_copy_to_user(void __user *to, const void *from, unsigned long n)
@@ -123,13 +116,12 @@ instrument_copy_to_user(void __user *to, const void *from, unsigned long n)
/**
* instrument_copy_from_user_before - add instrumentation before copy_from_user
+ * @to: destination address
+ * @from: source address
+ * @n: number of bytes to copy
*
* Instrument writes to kernel memory, that are due to copy_from_user (and
* variants). The instrumentation should be inserted before the accesses.
- *
- * @to destination address
- * @from source address
- * @n number of bytes to copy
*/
static __always_inline void
instrument_copy_from_user_before(const void *to, const void __user *from, unsigned long n)
@@ -140,14 +132,13 @@ instrument_copy_from_user_before(const void *to, const void __user *from, unsign
/**
* instrument_copy_from_user_after - add instrumentation after copy_from_user
+ * @to: destination address
+ * @from: source address
+ * @n: number of bytes to copy
+ * @left: number of bytes not copied (as returned by copy_from_user)
*
* Instrument writes to kernel memory, that are due to copy_from_user (and
* variants). The instrumentation should be inserted after the accesses.
- *
- * @to destination address
- * @from source address
- * @n number of bytes to copy
- * @left number of bytes not copied (as returned by copy_from_user)
*/
static __always_inline void
instrument_copy_from_user_after(const void *to, const void __user *from,
@@ -158,12 +149,11 @@ instrument_copy_from_user_after(const void *to, const void __user *from,
/**
* instrument_get_user() - add instrumentation to get_user()-like macros
+ * @to: destination variable, may not be address-taken
*
* get_user() and friends are fragile, so it may depend on the implementation
* whether the instrumentation happens before or after the data is copied from
* the userspace.
- *
- * @to destination variable, may not be address-taken
*/
#define instrument_get_user(to) \
({ \
@@ -175,14 +165,13 @@ instrument_copy_from_user_after(const void *to, const void __user *from,
/**
* instrument_put_user() - add instrumentation to put_user()-like macros
+ * @from: source address
+ * @ptr: userspace pointer to copy to
+ * @size: number of bytes to copy
*
* put_user() and friends are fragile, so it may depend on the implementation
* whether the instrumentation happens before or after the data is copied from
* the userspace.
- *
- * @from source address
- * @ptr userspace pointer to copy to
- * @size number of bytes to copy
*/
#define instrument_put_user(from, ptr, size) \
({ \
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 1023f349af71..b32256e9e944 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -134,7 +134,8 @@ struct held_lock {
unsigned int read:2; /* see lock_acquire() comment */
unsigned int check:1; /* see lock_acquire() comment */
unsigned int hardirqs_off:1;
- unsigned int references:12; /* 32 bits */
+ unsigned int sync:1;
+ unsigned int references:11; /* 32 bits */
unsigned int pin_count;
};
@@ -268,6 +269,10 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
extern void lock_release(struct lockdep_map *lock, unsigned long ip);
+extern void lock_sync(struct lockdep_map *lock, unsigned int subclass,
+ int read, int check, struct lockdep_map *nest_lock,
+ unsigned long ip);
+
/* lock_is_held_type() returns */
#define LOCK_STATE_UNKNOWN -1
#define LOCK_STATE_NOT_HELD 0
@@ -554,6 +559,7 @@ do { \
#define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_)
#define lock_map_acquire_tryread(l) lock_acquire_shared_recursive(l, 0, 1, NULL, _THIS_IP_)
#define lock_map_release(l) lock_release(l, _THIS_IP_)
+#define lock_map_sync(l) lock_sync(l, 0, 0, 1, NULL, _THIS_IP_)
#ifdef CONFIG_PROVE_LOCKING
# define might_lock(lock) \
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index aef88c2d1173..2aba75145144 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -73,6 +73,9 @@ struct raw_notifier_head {
struct srcu_notifier_head {
struct mutex mutex;
+#ifdef CONFIG_TREE_SRCU
+ struct srcu_usage srcuu;
+#endif
struct srcu_struct srcu;
struct notifier_block __rcu *head;
};
@@ -107,7 +110,7 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
{ \
.mutex = __MUTEX_INITIALIZER(name.mutex), \
.head = NULL, \
- .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \
+ .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
}
#define ATOMIC_NOTIFIER_HEAD(name) \
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 094321c17e48..ddd42efc6224 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -156,31 +156,6 @@ static inline int rcu_nocb_cpu_deoffload(int cpu) { return 0; }
static inline void rcu_nocb_flush_deferred_wakeup(void) { }
#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
-/**
- * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers
- * @a: Code that RCU needs to pay attention to.
- *
- * RCU read-side critical sections are forbidden in the inner idle loop,
- * that is, between the ct_idle_enter() and the ct_idle_exit() -- RCU
- * will happily ignore any such read-side critical sections. However,
- * things like powertop need tracepoints in the inner idle loop.
- *
- * This macro provides the way out: RCU_NONIDLE(do_something_with_RCU())
- * will tell RCU that it needs to pay attention, invoke its argument
- * (in this example, calling the do_something_with_RCU() function),
- * and then tell RCU to go back to ignoring this CPU. It is permissible
- * to nest RCU_NONIDLE() wrappers, but not indefinitely (but the limit is
- * on the order of a million or so, even on 32-bit systems). It is
- * not legal to block within RCU_NONIDLE(), nor is it permissible to
- * transfer control either into or out of RCU_NONIDLE()'s statement.
- */
-#define RCU_NONIDLE(a) \
- do { \
- ct_irq_enter_irqson(); \
- do { a; } while (0); \
- ct_irq_exit_irqson(); \
- } while (0)
-
/*
* Note a quasi-voluntary context switch for RCU-tasks's benefit.
* This is a macro rather than an inline function to avoid #include hell.
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 74796cd7e7a9..eb92a50a4599 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -102,6 +102,32 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
return lock_is_held(&ssp->dep_map);
}
+/*
+ * Annotations provide deadlock detection for SRCU.
+ *
+ * Similar to other lockdep annotations, except there is an additional
+ * srcu_lock_sync(), which is basically an empty *write*-side critical section,
+ * see lock_sync() for more information.
+ */
+
+/* Annotates a srcu_read_lock() */
+static inline void srcu_lock_acquire(struct lockdep_map *map)
+{
+ lock_map_acquire_read(map);
+}
+
+/* Annotates a srcu_read_lock() */
+static inline void srcu_lock_release(struct lockdep_map *map)
+{
+ lock_map_release(map);
+}
+
+/* Annotates a synchronize_srcu() */
+static inline void srcu_lock_sync(struct lockdep_map *map)
+{
+ lock_map_sync(map);
+}
+
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
@@ -109,6 +135,10 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
return 1;
}
+#define srcu_lock_acquire(m) do { } while (0)
+#define srcu_lock_release(m) do { } while (0)
+#define srcu_lock_sync(m) do { } while (0)
+
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
#define SRCU_NMI_UNKNOWN 0x0
@@ -182,7 +212,7 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
srcu_check_nmi_safety(ssp, false);
retval = __srcu_read_lock(ssp);
- rcu_lock_acquire(&(ssp)->dep_map);
+ srcu_lock_acquire(&ssp->dep_map);
return retval;
}
@@ -199,7 +229,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp
srcu_check_nmi_safety(ssp, true);
retval = __srcu_read_lock_nmisafe(ssp);
- rcu_lock_acquire(&(ssp)->dep_map);
+ rcu_lock_acquire(&ssp->dep_map);
return retval;
}
@@ -254,7 +284,7 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
{
WARN_ON_ONCE(idx & ~0x1);
srcu_check_nmi_safety(ssp, false);
- rcu_lock_release(&(ssp)->dep_map);
+ srcu_lock_release(&ssp->dep_map);
__srcu_read_unlock(ssp, idx);
}
@@ -270,7 +300,7 @@ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
{
WARN_ON_ONCE(idx & ~0x1);
srcu_check_nmi_safety(ssp, true);
- rcu_lock_release(&(ssp)->dep_map);
+ rcu_lock_release(&ssp->dep_map);
__srcu_read_unlock_nmisafe(ssp, idx);
}
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 5aa5e0faf6a1..ebd72491af99 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -31,7 +31,7 @@ struct srcu_struct {
void srcu_drive_gp(struct work_struct *wp);
-#define __SRCU_STRUCT_INIT(name, __ignored) \
+#define __SRCU_STRUCT_INIT(name, __ignored, ___ignored) \
{ \
.srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq), \
.srcu_cb_tail = &name.srcu_cb_head, \
@@ -44,9 +44,9 @@ void srcu_drive_gp(struct work_struct *wp);
* Tree SRCU, which needs some per-CPU data.
*/
#define DEFINE_SRCU(name) \
- struct srcu_struct name = __SRCU_STRUCT_INIT(name, name)
+ struct srcu_struct name = __SRCU_STRUCT_INIT(name, name, name)
#define DEFINE_STATIC_SRCU(name) \
- static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name)
+ static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name, name)
void synchronize_srcu(struct srcu_struct *ssp);
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 558057b517b7..8f3f72480e78 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -58,9 +58,9 @@ struct srcu_node {
};
/*
- * Per-SRCU-domain structure, similar in function to rcu_state.
+ * Per-SRCU-domain structure, update-side data linked from srcu_struct.
*/
-struct srcu_struct {
+struct srcu_usage {
struct srcu_node *node; /* Combining tree. */
struct srcu_node *level[RCU_NUM_LVLS + 1];
/* First node at each level. */
@@ -68,7 +68,6 @@ struct srcu_struct {
struct mutex srcu_cb_mutex; /* Serialize CB preparation. */
spinlock_t __private lock; /* Protect counters and size state. */
struct mutex srcu_gp_mutex; /* Serialize GP work. */
- unsigned int srcu_idx; /* Current rdr array element. */
unsigned long srcu_gp_seq; /* Grace-period seq #. */
unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */
unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */
@@ -77,7 +76,6 @@ struct srcu_struct {
unsigned long srcu_size_jiffies; /* Current contention-measurement interval. */
unsigned long srcu_n_lock_retries; /* Contention events in current interval. */
unsigned long srcu_n_exp_nodelay; /* # expedited no-delays in current GP phase. */
- struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */
bool sda_is_static; /* May ->sda be passed to free_percpu()? */
unsigned long srcu_barrier_seq; /* srcu_barrier seq #. */
struct mutex srcu_barrier_mutex; /* Serialize barrier ops. */
@@ -89,32 +87,68 @@ struct srcu_struct {
unsigned long reschedule_jiffies;
unsigned long reschedule_count;
struct delayed_work work;
+ struct srcu_struct *srcu_ssp;
+};
+
+/*
+ * Per-SRCU-domain structure, similar in function to rcu_state.
+ */
+struct srcu_struct {
+ unsigned int srcu_idx; /* Current rdr array element. */
+ struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */
struct lockdep_map dep_map;
+ struct srcu_usage *srcu_sup; /* Update-side data. */
};
-/* Values for size state variable (->srcu_size_state). */
-#define SRCU_SIZE_SMALL 0
-#define SRCU_SIZE_ALLOC 1
-#define SRCU_SIZE_WAIT_BARRIER 2
-#define SRCU_SIZE_WAIT_CALL 3
-#define SRCU_SIZE_WAIT_CBS1 4
-#define SRCU_SIZE_WAIT_CBS2 5
-#define SRCU_SIZE_WAIT_CBS3 6
-#define SRCU_SIZE_WAIT_CBS4 7
-#define SRCU_SIZE_BIG 8
+// Values for size state variable (->srcu_size_state). Once the state
+// has been set to SRCU_SIZE_ALLOC, the grace-period code advances through
+// this state machine one step per grace period until the SRCU_SIZE_BIG state
+// is reached. Otherwise, the state machine remains in the SRCU_SIZE_SMALL
+// state indefinitely.
+#define SRCU_SIZE_SMALL 0 // No srcu_node combining tree, ->node == NULL
+#define SRCU_SIZE_ALLOC 1 // An srcu_node tree is being allocated, initialized,
+ // and then referenced by ->node. It will not be used.
+#define SRCU_SIZE_WAIT_BARRIER 2 // The srcu_node tree starts being used by everything
+ // except call_srcu(), especially by srcu_barrier().
+ // By the end of this state, all CPUs and threads
+ // are aware of this tree's existence.
+#define SRCU_SIZE_WAIT_CALL 3 // The srcu_node tree starts being used by call_srcu().
+ // By the end of this state, all of the call_srcu()
+ // invocations that were running on a non-boot CPU
+ // and using the boot CPU's callback queue will have
+ // completed.
+#define SRCU_SIZE_WAIT_CBS1 4 // Don't trust the ->srcu_have_cbs[] grace-period
+#define SRCU_SIZE_WAIT_CBS2 5 // sequence elements or the ->srcu_data_have_cbs[]
+#define SRCU_SIZE_WAIT_CBS3 6 // CPU-bitmask elements until all four elements of
+#define SRCU_SIZE_WAIT_CBS4 7 // each array have been initialized.
+#define SRCU_SIZE_BIG 8 // The srcu_node combining tree is fully initialized
+ // and all aspects of it are being put to use.
/* Values for state variable (bottom bits of ->srcu_gp_seq). */
#define SRCU_STATE_IDLE 0
#define SRCU_STATE_SCAN1 1
#define SRCU_STATE_SCAN2 2
-#define __SRCU_STRUCT_INIT(name, pcpu_name) \
-{ \
- .sda = &pcpu_name, \
- .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
- .srcu_gp_seq_needed = -1UL, \
- .work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0), \
- __SRCU_DEP_MAP_INIT(name) \
+#define __SRCU_USAGE_INIT(name) \
+{ \
+ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
+ .srcu_gp_seq_needed = -1UL, \
+ .work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0), \
+}
+
+#define __SRCU_STRUCT_INIT_COMMON(name, usage_name) \
+ .srcu_sup = &usage_name, \
+ __SRCU_DEP_MAP_INIT(name)
+
+#define __SRCU_STRUCT_INIT_MODULE(name, usage_name) \
+{ \
+ __SRCU_STRUCT_INIT_COMMON(name, usage_name) \
+}
+
+#define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name) \
+{ \
+ .sda = &pcpu_name, \
+ __SRCU_STRUCT_INIT_COMMON(name, usage_name) \
}
/*
@@ -137,16 +171,18 @@ struct srcu_struct {
* See include/linux/percpu-defs.h for the rules on per-CPU variables.
*/
#ifdef MODULE
-# define __DEFINE_SRCU(name, is_static) \
- is_static struct srcu_struct name; \
- extern struct srcu_struct * const __srcu_struct_##name; \
- struct srcu_struct * const __srcu_struct_##name \
+# define __DEFINE_SRCU(name, is_static) \
+ static struct srcu_usage name##_srcu_usage = __SRCU_USAGE_INIT(name##_srcu_usage); \
+ is_static struct srcu_struct name = __SRCU_STRUCT_INIT_MODULE(name, name##_srcu_usage); \
+ extern struct srcu_struct * const __srcu_struct_##name; \
+ struct srcu_struct * const __srcu_struct_##name \
__section("___srcu_struct_ptrs") = &name
#else
-# define __DEFINE_SRCU(name, is_static) \
- static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data); \
- is_static struct srcu_struct name = \
- __SRCU_STRUCT_INIT(name, name##_srcu_data)
+# define __DEFINE_SRCU(name, is_static) \
+ static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data); \
+ static struct srcu_usage name##_srcu_usage = __SRCU_USAGE_INIT(name##_srcu_usage); \
+ is_static struct srcu_struct name = \
+ __SRCU_STRUCT_INIT(name, name##_srcu_usage, name##_srcu_data)
#endif
#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */)
#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static)
diff --git a/include/linux/tick.h b/include/linux/tick.h
index bfd571f18cfd..9459fef5b857 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -216,6 +216,7 @@ extern void tick_nohz_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit);
+extern bool tick_nohz_cpu_hotpluggable(unsigned int cpu);
/*
* The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
@@ -280,6 +281,7 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
static inline void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
static inline void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
+static inline bool tick_nohz_cpu_hotpluggable(unsigned int cpu) { return true; }
static inline void tick_dep_set(enum tick_dep_bits bit) { }
static inline void tick_dep_clear(enum tick_dep_bits bit) { }
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 012fa0d171b2..2ef9c719772a 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -776,9 +776,7 @@ TRACE_EVENT_RCU(rcu_torture_read,
),
TP_fast_assign(
- strncpy(__entry->rcutorturename, rcutorturename,
- RCUTORTURENAME_LEN);
- __entry->rcutorturename[RCUTORTURENAME_LEN - 1] = 0;
+ strscpy(__entry->rcutorturename, rcutorturename, RCUTORTURENAME_LEN);
__entry->rhp = rhp;
__entry->secs = secs;
__entry->c_old = c_old;
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 2e713a7d9aa3..3e8619c72f77 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -371,7 +371,8 @@ TRACE_EVENT(itimer_expire,
tick_dep_name(PERF_EVENTS) \
tick_dep_name(SCHED) \
tick_dep_name(CLOCK_UNSTABLE) \
- tick_dep_name_end(RCU)
+ tick_dep_name(RCU) \
+ tick_dep_name_end(RCU_EXP)
#undef tick_dep_name
#undef tick_dep_mask_name
diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index 54d077e1a2dc..5a60cc52adc0 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -337,11 +337,20 @@ static void delay_access(int type)
*/
static __always_inline u64 read_instrumented_memory(const volatile void *ptr, size_t size)
{
+ /*
+ * In the below we don't necessarily need the read of the location to
+ * be atomic, and we don't use READ_ONCE(), since all we need for race
+ * detection is to observe 2 different values.
+ *
+ * Furthermore, on certain architectures (such as arm64), READ_ONCE()
+ * may turn into more complex instructions than a plain load that cannot
+ * do unaligned accesses.
+ */
switch (size) {
- case 1: return READ_ONCE(*(const u8 *)ptr);
- case 2: return READ_ONCE(*(const u16 *)ptr);
- case 4: return READ_ONCE(*(const u32 *)ptr);
- case 8: return READ_ONCE(*(const u64 *)ptr);
+ case 1: return *(const volatile u8 *)ptr;
+ case 2: return *(const volatile u16 *)ptr;
+ case 4: return *(const volatile u32 *)ptr;
+ case 8: return *(const volatile u64 *)ptr;
default: return 0; /* Ignore; we do not diff the values. */
}
}
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 50d4863974e7..dcd1d5bfc1e0 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1881,6 +1881,8 @@ print_circular_lock_scenario(struct held_lock *src,
struct lock_class *source = hlock_class(src);
struct lock_class *target = hlock_class(tgt);
struct lock_class *parent = prt->class;
+ int src_read = src->read;
+ int tgt_read = tgt->read;
/*
* A direct locking problem where unsafe_class lock is taken
@@ -1908,7 +1910,10 @@ print_circular_lock_scenario(struct held_lock *src,
printk(" Possible unsafe locking scenario:\n\n");
printk(" CPU0 CPU1\n");
printk(" ---- ----\n");
- printk(" lock(");
+ if (tgt_read != 0)
+ printk(" rlock(");
+ else
+ printk(" lock(");
__print_lock_name(target);
printk(KERN_CONT ");\n");
printk(" lock(");
@@ -1917,7 +1922,12 @@ print_circular_lock_scenario(struct held_lock *src,
printk(" lock(");
__print_lock_name(target);
printk(KERN_CONT ");\n");
- printk(" lock(");
+ if (src_read != 0)
+ printk(" rlock(");
+ else if (src->sync)
+ printk(" sync(");
+ else
+ printk(" lock(");
__print_lock_name(source);
printk(KERN_CONT ");\n");
printk("\n *** DEADLOCK ***\n\n");
@@ -4531,7 +4541,13 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
return 0;
}
}
- if (!hlock->hardirqs_off) {
+
+ /*
+ * For lock_sync(), don't mark the ENABLED usage, since lock_sync()
+ * creates no critical section and no extra dependency can be introduced
+ * by interrupts
+ */
+ if (!hlock->hardirqs_off && !hlock->sync) {
if (hlock->read) {
if (!mark_lock(curr, hlock,
LOCK_ENABLED_HARDIRQ_READ))
@@ -4910,7 +4926,7 @@ static int __lock_is_held(const struct lockdep_map *lock, int read);
static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
int trylock, int read, int check, int hardirqs_off,
struct lockdep_map *nest_lock, unsigned long ip,
- int references, int pin_count)
+ int references, int pin_count, int sync)
{
struct task_struct *curr = current;
struct lock_class *class = NULL;
@@ -4961,7 +4977,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
class_idx = class - lock_classes;
- if (depth) { /* we're holding locks */
+ if (depth && !sync) {
+ /* we're holding locks and the new held lock is not a sync */
hlock = curr->held_locks + depth - 1;
if (hlock->class_idx == class_idx && nest_lock) {
if (!references)
@@ -4995,6 +5012,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
hlock->trylock = trylock;
hlock->read = read;
hlock->check = check;
+ hlock->sync = !!sync;
hlock->hardirqs_off = !!hardirqs_off;
hlock->references = references;
#ifdef CONFIG_LOCK_STAT
@@ -5056,6 +5074,10 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
if (!validate_chain(curr, hlock, chain_head, chain_key))
return 0;
+ /* For lock_sync(), we are done here since no actual critical section */
+ if (hlock->sync)
+ return 1;
+
curr->curr_chain_key = chain_key;
curr->lockdep_depth++;
check_chain_key(curr);
@@ -5197,7 +5219,7 @@ static int reacquire_held_locks(struct task_struct *curr, unsigned int depth,
hlock->read, hlock->check,
hlock->hardirqs_off,
hlock->nest_lock, hlock->acquire_ip,
- hlock->references, hlock->pin_count)) {
+ hlock->references, hlock->pin_count, 0)) {
case 0:
return 1;
case 1:
@@ -5667,7 +5689,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
lockdep_recursion_inc();
__lock_acquire(lock, subclass, trylock, read, check,
- irqs_disabled_flags(flags), nest_lock, ip, 0, 0);
+ irqs_disabled_flags(flags), nest_lock, ip, 0, 0, 0);
lockdep_recursion_finish();
raw_local_irq_restore(flags);
}
@@ -5693,6 +5715,34 @@ void lock_release(struct lockdep_map *lock, unsigned long ip)
}
EXPORT_SYMBOL_GPL(lock_release);
+/*
+ * lock_sync() - A special annotation for synchronize_{s,}rcu()-like API.
+ *
+ * No actual critical section is created by the APIs annotated with this: these
+ * APIs are used to wait for one or multiple critical sections (on other CPUs
+ * or threads), and it means that calling these APIs inside these critical
+ * sections is potential deadlock.
+ */
+void lock_sync(struct lockdep_map *lock, unsigned subclass, int read,
+ int check, struct lockdep_map *nest_lock, unsigned long ip)
+{
+ unsigned long flags;
+
+ if (unlikely(!lockdep_enabled()))
+ return;
+
+ raw_local_irq_save(flags);
+ check_flags(flags);
+
+ lockdep_recursion_inc();
+ __lock_acquire(lock, subclass, 0, read, check,
+ irqs_disabled_flags(flags), nest_lock, ip, 0, 0, 1);
+ check_chain_key(current);
+ lockdep_recursion_finish();
+ raw_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(lock_sync);
+
noinstr int lock_is_held_type(const struct lockdep_map *lock, int read)
{
unsigned long flags;
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index f04b1978899d..949d3deae506 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -33,26 +33,24 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>");
-torture_param(int, nwriters_stress, -1,
- "Number of write-locking stress-test threads");
-torture_param(int, nreaders_stress, -1,
- "Number of read-locking stress-test threads");
+torture_param(int, nwriters_stress, -1, "Number of write-locking stress-test threads");
+torture_param(int, nreaders_stress, -1, "Number of read-locking stress-test threads");
+torture_param(int, long_hold, 100, "Do occasional long hold of lock (ms), 0=disable");
torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
-torture_param(int, onoff_interval, 0,
- "Time between CPU hotplugs (s), 0=disable");
-torture_param(int, shuffle_interval, 3,
- "Number of jiffies between shuffles, 0=disable");
+torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (s), 0=disable");
+torture_param(int, shuffle_interval, 3, "Number of jiffies between shuffles, 0=disable");
torture_param(int, shutdown_secs, 0, "Shutdown time (j), <= zero to disable.");
-torture_param(int, stat_interval, 60,
- "Number of seconds between stats printk()s");
+torture_param(int, stat_interval, 60, "Number of seconds between stats printk()s");
torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable");
torture_param(int, rt_boost, 2,
- "Do periodic rt-boost. 0=Disable, 1=Only for rt_mutex, 2=For all lock types.");
+ "Do periodic rt-boost. 0=Disable, 1=Only for rt_mutex, 2=For all lock types.");
torture_param(int, rt_boost_factor, 50, "A factor determining how often rt-boost happens.");
-torture_param(int, verbose, 1,
- "Enable verbose debugging printk()s");
+torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
+torture_param(int, nested_locks, 0, "Number of nested locks (max = 8)");
+/* Going much higher trips "BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!" errors */
+#define MAX_NESTED_LOCKS 8
-static char *torture_type = "spin_lock";
+static char *torture_type = IS_ENABLED(CONFIG_PREEMPT_RT) ? "raw_spin_lock" : "spin_lock";
module_param(torture_type, charp, 0444);
MODULE_PARM_DESC(torture_type,
"Type of lock to torture (spin_lock, spin_lock_irq, mutex_lock, ...)");
@@ -79,10 +77,12 @@ static void lock_torture_cleanup(void);
struct lock_torture_ops {
void (*init)(void);
void (*exit)(void);
+ int (*nested_lock)(int tid, u32 lockset);
int (*writelock)(int tid);
void (*write_delay)(struct torture_random_state *trsp);
void (*task_boost)(struct torture_random_state *trsp);
void (*writeunlock)(int tid);
+ void (*nested_unlock)(int tid, u32 lockset);
int (*readlock)(int tid);
void (*read_delay)(struct torture_random_state *trsp);
void (*readunlock)(int tid);
@@ -115,7 +115,7 @@ static int torture_lock_busted_write_lock(int tid __maybe_unused)
static void torture_lock_busted_write_delay(struct torture_random_state *trsp)
{
- const unsigned long longdelay_ms = 100;
+ const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX;
/* We want a long delay occasionally to force massive contention. */
if (!(torture_random(trsp) %
@@ -193,16 +193,18 @@ __acquires(torture_spinlock)
static void torture_spin_lock_write_delay(struct torture_random_state *trsp)
{
const unsigned long shortdelay_us = 2;
- const unsigned long longdelay_ms = 100;
+ const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX;
+ unsigned long j;
/* We want a short delay mostly to emulate likely code, and
* we want a long delay occasionally to force massive contention.
*/
- if (!(torture_random(trsp) %
- (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
+ if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 2000 * longdelay_ms))) {
+ j = jiffies;
mdelay(longdelay_ms);
- if (!(torture_random(trsp) %
- (cxt.nrealwriters_stress * 2 * shortdelay_us)))
+ pr_alert("%s: delay = %lu jiffies.\n", __func__, jiffies - j);
+ }
+ if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 200 * shortdelay_us)))
udelay(shortdelay_us);
if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
torture_preempt_schedule(); /* Allow test to be preempted. */
@@ -252,6 +254,59 @@ static struct lock_torture_ops spin_lock_irq_ops = {
.name = "spin_lock_irq"
};
+static DEFINE_RAW_SPINLOCK(torture_raw_spinlock);
+
+static int torture_raw_spin_lock_write_lock(int tid __maybe_unused)
+__acquires(torture_raw_spinlock)
+{
+ raw_spin_lock(&torture_raw_spinlock);
+ return 0;
+}
+
+static void torture_raw_spin_lock_write_unlock(int tid __maybe_unused)
+__releases(torture_raw_spinlock)
+{
+ raw_spin_unlock(&torture_raw_spinlock);
+}
+
+static struct lock_torture_ops raw_spin_lock_ops = {
+ .writelock = torture_raw_spin_lock_write_lock,
+ .write_delay = torture_spin_lock_write_delay,
+ .task_boost = torture_rt_boost,
+ .writeunlock = torture_raw_spin_lock_write_unlock,
+ .readlock = NULL,
+ .read_delay = NULL,
+ .readunlock = NULL,
+ .name = "raw_spin_lock"
+};
+
+static int torture_raw_spin_lock_write_lock_irq(int tid __maybe_unused)
+__acquires(torture_raw_spinlock)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&torture_raw_spinlock, flags);
+ cxt.cur_ops->flags = flags;
+ return 0;
+}
+
+static void torture_raw_spin_lock_write_unlock_irq(int tid __maybe_unused)
+__releases(torture_raw_spinlock)
+{
+ raw_spin_unlock_irqrestore(&torture_raw_spinlock, cxt.cur_ops->flags);
+}
+
+static struct lock_torture_ops raw_spin_lock_irq_ops = {
+ .writelock = torture_raw_spin_lock_write_lock_irq,
+ .write_delay = torture_spin_lock_write_delay,
+ .task_boost = torture_rt_boost,
+ .writeunlock = torture_raw_spin_lock_write_unlock_irq,
+ .readlock = NULL,
+ .read_delay = NULL,
+ .readunlock = NULL,
+ .name = "raw_spin_lock_irq"
+};
+
static DEFINE_RWLOCK(torture_rwlock);
static int torture_rwlock_write_lock(int tid __maybe_unused)
@@ -264,7 +319,7 @@ __acquires(torture_rwlock)
static void torture_rwlock_write_delay(struct torture_random_state *trsp)
{
const unsigned long shortdelay_us = 2;
- const unsigned long longdelay_ms = 100;
+ const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX;
/* We want a short delay mostly to emulate likely code, and
* we want a long delay occasionally to force massive contention.
@@ -365,6 +420,28 @@ static struct lock_torture_ops rw_lock_irq_ops = {
};
static DEFINE_MUTEX(torture_mutex);
+static struct mutex torture_nested_mutexes[MAX_NESTED_LOCKS];
+static struct lock_class_key nested_mutex_keys[MAX_NESTED_LOCKS];
+
+static void torture_mutex_init(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_NESTED_LOCKS; i++)
+ __mutex_init(&torture_nested_mutexes[i], __func__,
+ &nested_mutex_keys[i]);
+}
+
+static int torture_mutex_nested_lock(int tid __maybe_unused,
+ u32 lockset)
+{
+ int i;
+
+ for (i = 0; i < nested_locks; i++)
+ if (lockset & (1 << i))
+ mutex_lock(&torture_nested_mutexes[i]);
+ return 0;
+}
static int torture_mutex_lock(int tid __maybe_unused)
__acquires(torture_mutex)
@@ -375,14 +452,12 @@ __acquires(torture_mutex)
static void torture_mutex_delay(struct torture_random_state *trsp)
{
- const unsigned long longdelay_ms = 100;
+ const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX;
/* We want a long delay occasionally to force massive contention. */
if (!(torture_random(trsp) %
(cxt.nrealwriters_stress * 2000 * longdelay_ms)))
mdelay(longdelay_ms * 5);
- else
- mdelay(longdelay_ms / 5);
if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
torture_preempt_schedule(); /* Allow test to be preempted. */
}
@@ -393,11 +468,24 @@ __releases(torture_mutex)
mutex_unlock(&torture_mutex);
}
+static void torture_mutex_nested_unlock(int tid __maybe_unused,
+ u32 lockset)
+{
+ int i;
+
+ for (i = nested_locks - 1; i >= 0; i--)
+ if (lockset & (1 << i))
+ mutex_unlock(&torture_nested_mutexes[i]);
+}
+
static struct lock_torture_ops mutex_lock_ops = {
+ .init = torture_mutex_init,
+ .nested_lock = torture_mutex_nested_lock,
.writelock = torture_mutex_lock,
.write_delay = torture_mutex_delay,
.task_boost = torture_rt_boost,
.writeunlock = torture_mutex_unlock,
+ .nested_unlock = torture_mutex_nested_unlock,
.readlock = NULL,
.read_delay = NULL,
.readunlock = NULL,
@@ -504,6 +592,28 @@ static struct lock_torture_ops ww_mutex_lock_ops = {
#ifdef CONFIG_RT_MUTEXES
static DEFINE_RT_MUTEX(torture_rtmutex);
+static struct rt_mutex torture_nested_rtmutexes[MAX_NESTED_LOCKS];
+static struct lock_class_key nested_rtmutex_keys[MAX_NESTED_LOCKS];
+
+static void torture_rtmutex_init(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_NESTED_LOCKS; i++)
+ __rt_mutex_init(&torture_nested_rtmutexes[i], __func__,
+ &nested_rtmutex_keys[i]);
+}
+
+static int torture_rtmutex_nested_lock(int tid __maybe_unused,
+ u32 lockset)
+{
+ int i;
+
+ for (i = 0; i < nested_locks; i++)
+ if (lockset & (1 << i))
+ rt_mutex_lock(&torture_nested_rtmutexes[i]);
+ return 0;
+}
static int torture_rtmutex_lock(int tid __maybe_unused)
__acquires(torture_rtmutex)
@@ -515,7 +625,7 @@ __acquires(torture_rtmutex)
static void torture_rtmutex_delay(struct torture_random_state *trsp)
{
const unsigned long shortdelay_us = 2;
- const unsigned long longdelay_ms = 100;
+ const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX;
/*
* We want a short delay mostly to emulate likely code, and
@@ -525,7 +635,7 @@ static void torture_rtmutex_delay(struct torture_random_state *trsp)
(cxt.nrealwriters_stress * 2000 * longdelay_ms)))
mdelay(longdelay_ms);
if (!(torture_random(trsp) %
- (cxt.nrealwriters_stress * 2 * shortdelay_us)))
+ (cxt.nrealwriters_stress * 200 * shortdelay_us)))
udelay(shortdelay_us);
if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
torture_preempt_schedule(); /* Allow test to be preempted. */
@@ -545,11 +655,24 @@ static void torture_rt_boost_rtmutex(struct torture_random_state *trsp)
__torture_rt_boost(trsp);
}
+static void torture_rtmutex_nested_unlock(int tid __maybe_unused,
+ u32 lockset)
+{
+ int i;
+
+ for (i = nested_locks - 1; i >= 0; i--)
+ if (lockset & (1 << i))
+ rt_mutex_unlock(&torture_nested_rtmutexes[i]);
+}
+
static struct lock_torture_ops rtmutex_lock_ops = {
+ .init = torture_rtmutex_init,
+ .nested_lock = torture_rtmutex_nested_lock,
.writelock = torture_rtmutex_lock,
.write_delay = torture_rtmutex_delay,
.task_boost = torture_rt_boost_rtmutex,
.writeunlock = torture_rtmutex_unlock,
+ .nested_unlock = torture_rtmutex_nested_unlock,
.readlock = NULL,
.read_delay = NULL,
.readunlock = NULL,
@@ -567,14 +690,12 @@ __acquires(torture_rwsem)
static void torture_rwsem_write_delay(struct torture_random_state *trsp)
{
- const unsigned long longdelay_ms = 100;
+ const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX;
/* We want a long delay occasionally to force massive contention. */
if (!(torture_random(trsp) %
(cxt.nrealwriters_stress * 2000 * longdelay_ms)))
mdelay(longdelay_ms * 10);
- else
- mdelay(longdelay_ms / 10);
if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
torture_preempt_schedule(); /* Allow test to be preempted. */
}
@@ -684,6 +805,8 @@ static int lock_torture_writer(void *arg)
struct lock_stress_stats *lwsp = arg;
int tid = lwsp - cxt.lwsa;
DEFINE_TORTURE_RANDOM(rand);
+ u32 lockset_mask;
+ bool skip_main_lock;
VERBOSE_TOROUT_STRING("lock_torture_writer task started");
set_user_nice(current, MAX_NICE);
@@ -692,19 +815,40 @@ static int lock_torture_writer(void *arg)
if ((torture_random(&rand) & 0xfffff) == 0)
schedule_timeout_uninterruptible(1);
- cxt.cur_ops->task_boost(&rand);
- cxt.cur_ops->writelock(tid);
- if (WARN_ON_ONCE(lock_is_write_held))
- lwsp->n_lock_fail++;
- lock_is_write_held = true;
- if (WARN_ON_ONCE(atomic_read(&lock_is_read_held)))
- lwsp->n_lock_fail++; /* rare, but... */
+ lockset_mask = torture_random(&rand);
+ /*
+ * When using nested_locks, we want to occasionally
+ * skip the main lock so we can avoid always serializing
+ * the lock chains on that central lock. By skipping the
+ * main lock occasionally, we can create different
+ * contention patterns (allowing for multiple disjoint
+ * blocked trees)
+ */
+ skip_main_lock = (nested_locks &&
+ !(torture_random(&rand) % 100));
- lwsp->n_lock_acquired++;
- cxt.cur_ops->write_delay(&rand);
- lock_is_write_held = false;
- WRITE_ONCE(last_lock_release, jiffies);
- cxt.cur_ops->writeunlock(tid);
+ cxt.cur_ops->task_boost(&rand);
+ if (cxt.cur_ops->nested_lock)
+ cxt.cur_ops->nested_lock(tid, lockset_mask);
+
+ if (!skip_main_lock) {
+ cxt.cur_ops->writelock(tid);
+ if (WARN_ON_ONCE(lock_is_write_held))
+ lwsp->n_lock_fail++;
+ lock_is_write_held = true;
+ if (WARN_ON_ONCE(atomic_read(&lock_is_read_held)))
+ lwsp->n_lock_fail++; /* rare, but... */
+
+ lwsp->n_lock_acquired++;
+ }
+ if (!skip_main_lock) {
+ cxt.cur_ops->write_delay(&rand);
+ lock_is_write_held = false;
+ WRITE_ONCE(last_lock_release, jiffies);
+ cxt.cur_ops->writeunlock(tid);
+ }
+ if (cxt.cur_ops->nested_unlock)
+ cxt.cur_ops->nested_unlock(tid, lockset_mask);
stutter_wait("lock_torture_writer");
} while (!torture_must_stop());
@@ -845,11 +989,11 @@ lock_torture_print_module_parms(struct lock_torture_ops *cur_ops,
const char *tag)
{
pr_alert("%s" TORTURE_FLAG
- "--- %s%s: nwriters_stress=%d nreaders_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n",
+ "--- %s%s: nwriters_stress=%d nreaders_stress=%d nested_locks=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n",
torture_type, tag, cxt.debug_lock ? " [debug]": "",
- cxt.nrealwriters_stress, cxt.nrealreaders_stress, stat_interval,
- verbose, shuffle_interval, stutter, shutdown_secs,
- onoff_interval, onoff_holdoff);
+ cxt.nrealwriters_stress, cxt.nrealreaders_stress,
+ nested_locks, stat_interval, verbose, shuffle_interval,
+ stutter, shutdown_secs, onoff_interval, onoff_holdoff);
}
static void lock_torture_cleanup(void)
@@ -919,6 +1063,7 @@ static int __init lock_torture_init(void)
static struct lock_torture_ops *torture_ops[] = {
&lock_busted_ops,
&spin_lock_ops, &spin_lock_irq_ops,
+ &raw_spin_lock_ops, &raw_spin_lock_irq_ops,
&rw_lock_ops, &rw_lock_irq_ops,
&mutex_lock_ops,
&ww_mutex_lock_ops,
@@ -1068,6 +1213,10 @@ static int __init lock_torture_init(void)
}
}
+ /* cap nested_locks to MAX_NESTED_LOCKS */
+ if (nested_locks > MAX_NESTED_LOCKS)
+ nested_locks = MAX_NESTED_LOCKS;
+
if (cxt.cur_ops->readlock) {
reader_tasks = kcalloc(cxt.nrealreaders_stress,
sizeof(reader_tasks[0]),
diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c
index 29dc253d03af..93cca6e69860 100644
--- a/kernel/locking/test-ww_mutex.c
+++ b/kernel/locking/test-ww_mutex.c
@@ -659,7 +659,7 @@ static int __init test_ww_mutex_init(void)
if (ret)
return ret;
- ret = stress(4095, hweight32(STRESS_ALL)*ncpus, STRESS_ALL);
+ ret = stress(2047, hweight32(STRESS_ALL)*ncpus, STRESS_ALL);
if (ret)
return ret;
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index ab62074174c3..bdd7eadb33d8 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -53,9 +53,6 @@ config RCU_EXPERT
Say N if you are unsure.
-config SRCU
- def_bool y
-
config TINY_SRCU
bool
default y if TINY_RCU
@@ -317,4 +314,22 @@ config RCU_LAZY
To save power, batch RCU callbacks and flush after delay, memory
pressure, or callback list growing too big.
+config RCU_DOUBLE_CHECK_CB_TIME
+ bool "RCU callback-batch backup time check"
+ depends on RCU_EXPERT
+ default n
+ help
+ Use this option to provide more precise enforcement of the
+ rcutree.rcu_resched_ns module parameter in situations where
+ a single RCU callback might run for hundreds of microseconds,
+ thus defeating the 32-callback batching used to amortize the
+ cost of the fine-grained but expensive local_clock() function.
+
+ This option rounds rcutree.rcu_resched_ns up to the next
+ jiffy, and overrides the 32-callback batching if this limit
+ is exceeded.
+
+ Say Y here if you need tighter callback-limit enforcement.
+ Say N here if you are unsure.
+
endmenu # "RCU Subsystem"
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 115616ac3bfa..4a1b9622598b 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -14,6 +14,43 @@
/*
* Grace-period counter management.
+ *
+ * The two least significant bits contain the control flags.
+ * The most significant bits contain the grace-period sequence counter.
+ *
+ * When both control flags are zero, no grace period is in progress.
+ * When either bit is non-zero, a grace period has started and is in
+ * progress. When the grace period completes, the control flags are reset
+ * to 0 and the grace-period sequence counter is incremented.
+ *
+ * However some specific RCU usages make use of custom values.
+ *
+ * SRCU special control values:
+ *
+ * SRCU_SNP_INIT_SEQ : Invalid/init value set when SRCU node
+ * is initialized.
+ *
+ * SRCU_STATE_IDLE : No SRCU gp is in progress
+ *
+ * SRCU_STATE_SCAN1 : State set by rcu_seq_start(). Indicates
+ * we are scanning the readers on the slot
+ * defined as inactive (there might well
+ * be pending readers that will use that
+ * index, but their number is bounded).
+ *
+ * SRCU_STATE_SCAN2 : State set manually via rcu_seq_set_state()
+ * Indicates we are flipping the readers
+ * index and then scanning the readers on the
+ * slot newly designated as inactive (again,
+ * the number of pending readers that will use
+ * this inactive index is bounded).
+ *
+ * RCU polled GP special control value:
+ *
+ * RCU_GET_STATE_COMPLETED : State value indicating an already-completed
+ * polled GP has completed. This value covers
+ * both the state and the counter of the
+ * grace-period sequence number.
*/
#define RCU_SEQ_CTR_SHIFT 2
@@ -341,11 +378,13 @@ extern void rcu_init_geometry(void);
* specified state structure (for SRCU) or the only rcu_state structure
* (for RCU).
*/
-#define srcu_for_each_node_breadth_first(sp, rnp) \
+#define _rcu_for_each_node_breadth_first(sp, rnp) \
for ((rnp) = &(sp)->node[0]; \
(rnp) < &(sp)->node[rcu_num_nodes]; (rnp)++)
#define rcu_for_each_node_breadth_first(rnp) \
- srcu_for_each_node_breadth_first(&rcu_state, rnp)
+ _rcu_for_each_node_breadth_first(&rcu_state, rnp)
+#define srcu_for_each_node_breadth_first(ssp, rnp) \
+ _rcu_for_each_node_breadth_first(ssp->srcu_sup, rnp)
/*
* Scan the leaves of the rcu_node hierarchy for the rcu_state structure.
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 91fb5905a008..d1221731c7cf 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -522,89 +522,6 @@ rcu_scale_print_module_parms(struct rcu_scale_ops *cur_ops, const char *tag)
scale_type, tag, nrealreaders, nrealwriters, verbose, shutdown);
}
-static void
-rcu_scale_cleanup(void)
-{
- int i;
- int j;
- int ngps = 0;
- u64 *wdp;
- u64 *wdpp;
-
- /*
- * Would like warning at start, but everything is expedited
- * during the mid-boot phase, so have to wait till the end.
- */
- if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp)
- SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
- if (rcu_gp_is_normal() && gp_exp)
- SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
- if (gp_exp && gp_async)
- SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!");
-
- if (torture_cleanup_begin())
- return;
- if (!cur_ops) {
- torture_cleanup_end();
- return;
- }
-
- if (reader_tasks) {
- for (i = 0; i < nrealreaders; i++)
- torture_stop_kthread(rcu_scale_reader,
- reader_tasks[i]);
- kfree(reader_tasks);
- }
-
- if (writer_tasks) {
- for (i = 0; i < nrealwriters; i++) {
- torture_stop_kthread(rcu_scale_writer,
- writer_tasks[i]);
- if (!writer_n_durations)
- continue;
- j = writer_n_durations[i];
- pr_alert("%s%s writer %d gps: %d\n",
- scale_type, SCALE_FLAG, i, j);
- ngps += j;
- }
- pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n",
- scale_type, SCALE_FLAG,
- t_rcu_scale_writer_started, t_rcu_scale_writer_finished,
- t_rcu_scale_writer_finished -
- t_rcu_scale_writer_started,
- ngps,
- rcuscale_seq_diff(b_rcu_gp_test_finished,
- b_rcu_gp_test_started));
- for (i = 0; i < nrealwriters; i++) {
- if (!writer_durations)
- break;
- if (!writer_n_durations)
- continue;
- wdpp = writer_durations[i];
- if (!wdpp)
- continue;
- for (j = 0; j < writer_n_durations[i]; j++) {
- wdp = &wdpp[j];
- pr_alert("%s%s %4d writer-duration: %5d %llu\n",
- scale_type, SCALE_FLAG,
- i, j, *wdp);
- if (j % 100 == 0)
- schedule_timeout_uninterruptible(1);
- }
- kfree(writer_durations[i]);
- }
- kfree(writer_tasks);
- kfree(writer_durations);
- kfree(writer_n_durations);
- }
-
- /* Do torture-type-specific cleanup operations. */
- if (cur_ops->cleanup != NULL)
- cur_ops->cleanup();
-
- torture_cleanup_end();
-}
-
/*
* Return the number if non-negative. If -1, the number of CPUs.
* If less than -1, that much less than the number of CPUs, but
@@ -625,21 +542,6 @@ static int compute_real(int n)
}
/*
- * RCU scalability shutdown kthread. Just waits to be awakened, then shuts
- * down system.
- */
-static int
-rcu_scale_shutdown(void *arg)
-{
- wait_event(shutdown_wq,
- atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters);
- smp_mb(); /* Wake before output. */
- rcu_scale_cleanup();
- kernel_power_off();
- return -EINVAL;
-}
-
-/*
* kfree_rcu() scalability tests: Start a kfree_rcu() loop on all CPUs for number
* of iterations and measure total time and number of GP for all iterations to complete.
*/
@@ -716,7 +618,7 @@ kfree_scale_thread(void *arg)
// is tested.
if ((kfree_rcu_test_single && !kfree_rcu_test_double) ||
(kfree_rcu_test_both && torture_random(&tr) & 0x800))
- kfree_rcu(alloc_ptr);
+ kfree_rcu_mightsleep(alloc_ptr);
else
kfree_rcu(alloc_ptr, rh);
}
@@ -771,8 +673,8 @@ kfree_scale_cleanup(void)
static int
kfree_scale_shutdown(void *arg)
{
- wait_event(shutdown_wq,
- atomic_read(&n_kfree_scale_thread_ended) >= kfree_nrealthreads);
+ wait_event_idle(shutdown_wq,
+ atomic_read(&n_kfree_scale_thread_ended) >= kfree_nrealthreads);
smp_mb(); /* Wake before output. */
@@ -875,6 +777,108 @@ unwind:
return firsterr;
}
+static void
+rcu_scale_cleanup(void)
+{
+ int i;
+ int j;
+ int ngps = 0;
+ u64 *wdp;
+ u64 *wdpp;
+
+ /*
+ * Would like warning at start, but everything is expedited
+ * during the mid-boot phase, so have to wait till the end.
+ */
+ if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp)
+ SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
+ if (rcu_gp_is_normal() && gp_exp)
+ SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
+ if (gp_exp && gp_async)
+ SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!");
+
+ if (kfree_rcu_test) {
+ kfree_scale_cleanup();
+ return;
+ }
+
+ if (torture_cleanup_begin())
+ return;
+ if (!cur_ops) {
+ torture_cleanup_end();
+ return;
+ }
+
+ if (reader_tasks) {
+ for (i = 0; i < nrealreaders; i++)
+ torture_stop_kthread(rcu_scale_reader,
+ reader_tasks[i]);
+ kfree(reader_tasks);
+ }
+
+ if (writer_tasks) {
+ for (i = 0; i < nrealwriters; i++) {
+ torture_stop_kthread(rcu_scale_writer,
+ writer_tasks[i]);
+ if (!writer_n_durations)
+ continue;
+ j = writer_n_durations[i];
+ pr_alert("%s%s writer %d gps: %d\n",
+ scale_type, SCALE_FLAG, i, j);
+ ngps += j;
+ }
+ pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n",
+ scale_type, SCALE_FLAG,
+ t_rcu_scale_writer_started, t_rcu_scale_writer_finished,
+ t_rcu_scale_writer_finished -
+ t_rcu_scale_writer_started,
+ ngps,
+ rcuscale_seq_diff(b_rcu_gp_test_finished,
+ b_rcu_gp_test_started));
+ for (i = 0; i < nrealwriters; i++) {
+ if (!writer_durations)
+ break;
+ if (!writer_n_durations)
+ continue;
+ wdpp = writer_durations[i];
+ if (!wdpp)
+ continue;
+ for (j = 0; j < writer_n_durations[i]; j++) {
+ wdp = &wdpp[j];
+ pr_alert("%s%s %4d writer-duration: %5d %llu\n",
+ scale_type, SCALE_FLAG,
+ i, j, *wdp);
+ if (j % 100 == 0)
+ schedule_timeout_uninterruptible(1);
+ }
+ kfree(writer_durations[i]);
+ }
+ kfree(writer_tasks);
+ kfree(writer_durations);
+ kfree(writer_n_durations);
+ }
+
+ /* Do torture-type-specific cleanup operations. */
+ if (cur_ops->cleanup != NULL)
+ cur_ops->cleanup();
+
+ torture_cleanup_end();
+}
+
+/*
+ * RCU scalability shutdown kthread. Just waits to be awakened, then shuts
+ * down system.
+ */
+static int
+rcu_scale_shutdown(void *arg)
+{
+ wait_event_idle(shutdown_wq, atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters);
+ smp_mb(); /* Wake before output. */
+ rcu_scale_cleanup();
+ kernel_power_off();
+ return -EINVAL;
+}
+
static int __init
rcu_scale_init(void)
{
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 8e6c023212cb..147551c23baf 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -119,7 +119,9 @@ torture_param(int, stutter, 5, "Number of seconds to run/halt test");
torture_param(int, test_boost, 1, "Test RCU prio boost: 0=no, 1=maybe, 2=yes.");
torture_param(int, test_boost_duration, 4, "Duration of each boost test, seconds.");
torture_param(int, test_boost_interval, 7, "Interval between boost tests, seconds.");
+torture_param(int, test_nmis, 0, "End-test NMI tests, 0 to disable.");
torture_param(bool, test_no_idle_hz, true, "Test support for tickless idle CPUs");
+torture_param(int, test_srcu_lockdep, 0, "Test specified SRCU deadlock scenario.");
torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
static char *torture_type = "rcu";
@@ -179,7 +181,6 @@ static atomic_t n_rcu_torture_mbchk_tries;
static atomic_t n_rcu_torture_error;
static long n_rcu_torture_barrier_error;
static long n_rcu_torture_boost_ktrerror;
-static long n_rcu_torture_boost_rterror;
static long n_rcu_torture_boost_failure;
static long n_rcu_torture_boosts;
static atomic_long_t n_rcu_torture_timers;
@@ -2194,12 +2195,11 @@ rcu_torture_stats_print(void)
atomic_read(&n_rcu_torture_alloc),
atomic_read(&n_rcu_torture_alloc_fail),
atomic_read(&n_rcu_torture_free));
- pr_cont("rtmbe: %d rtmbkf: %d/%d rtbe: %ld rtbke: %ld rtbre: %ld ",
+ pr_cont("rtmbe: %d rtmbkf: %d/%d rtbe: %ld rtbke: %ld ",
atomic_read(&n_rcu_torture_mberror),
atomic_read(&n_rcu_torture_mbchk_fail), atomic_read(&n_rcu_torture_mbchk_tries),
n_rcu_torture_barrier_error,
- n_rcu_torture_boost_ktrerror,
- n_rcu_torture_boost_rterror);
+ n_rcu_torture_boost_ktrerror);
pr_cont("rtbf: %ld rtb: %ld nt: %ld ",
n_rcu_torture_boost_failure,
n_rcu_torture_boosts,
@@ -2217,15 +2217,13 @@ rcu_torture_stats_print(void)
if (atomic_read(&n_rcu_torture_mberror) ||
atomic_read(&n_rcu_torture_mbchk_fail) ||
n_rcu_torture_barrier_error || n_rcu_torture_boost_ktrerror ||
- n_rcu_torture_boost_rterror || n_rcu_torture_boost_failure ||
- i > 1) {
+ n_rcu_torture_boost_failure || i > 1) {
pr_cont("%s", "!!! ");
atomic_inc(&n_rcu_torture_error);
WARN_ON_ONCE(atomic_read(&n_rcu_torture_mberror));
WARN_ON_ONCE(atomic_read(&n_rcu_torture_mbchk_fail));
WARN_ON_ONCE(n_rcu_torture_barrier_error); // rcu_barrier()
WARN_ON_ONCE(n_rcu_torture_boost_ktrerror); // no boost kthread
- WARN_ON_ONCE(n_rcu_torture_boost_rterror); // can't set RT prio
WARN_ON_ONCE(n_rcu_torture_boost_failure); // boost failed (TIMER_SOFTIRQ RT prio?)
WARN_ON_ONCE(i > 1); // Too-short grace period
}
@@ -2358,7 +2356,8 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
"n_barrier_cbs=%d "
"onoff_interval=%d onoff_holdoff=%d "
"read_exit_delay=%d read_exit_burst=%d "
- "nocbs_nthreads=%d nocbs_toggle=%d\n",
+ "nocbs_nthreads=%d nocbs_toggle=%d "
+ "test_nmis=%d\n",
torture_type, tag, nrealreaders, nfakewriters,
stat_interval, verbose, test_no_idle_hz, shuffle_interval,
stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
@@ -2369,7 +2368,8 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
n_barrier_cbs,
onoff_interval, onoff_holdoff,
read_exit_delay, read_exit_burst,
- nocbs_nthreads, nocbs_toggle);
+ nocbs_nthreads, nocbs_toggle,
+ test_nmis);
}
static int rcutorture_booster_cleanup(unsigned int cpu)
@@ -3273,6 +3273,29 @@ static void rcu_torture_read_exit_cleanup(void)
torture_stop_kthread(rcutorture_read_exit, read_exit_task);
}
+static void rcutorture_test_nmis(int n)
+{
+#if IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)
+ int cpu;
+ int dumpcpu;
+ int i;
+
+ for (i = 0; i < n; i++) {
+ preempt_disable();
+ cpu = smp_processor_id();
+ dumpcpu = cpu + 1;
+ if (dumpcpu >= nr_cpu_ids)
+ dumpcpu = 0;
+ pr_alert("%s: CPU %d invoking dump_cpu_task(%d)\n", __func__, cpu, dumpcpu);
+ dump_cpu_task(dumpcpu);
+ preempt_enable();
+ schedule_timeout_uninterruptible(15 * HZ);
+ }
+#else // #if IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)
+ WARN_ONCE(n, "Non-zero rcutorture.test_nmis=%d permitted only when rcutorture is built in.\n", test_nmis);
+#endif // #else // #if IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)
+}
+
static enum cpuhp_state rcutor_hp;
static void
@@ -3297,6 +3320,8 @@ rcu_torture_cleanup(void)
return;
}
+ rcutorture_test_nmis(test_nmis);
+
if (cur_ops->gp_kthread_dbg)
cur_ops->gp_kthread_dbg();
rcu_torture_read_exit_cleanup();
@@ -3463,6 +3488,188 @@ static void rcutorture_sync(void)
cur_ops->sync();
}
+static DEFINE_MUTEX(mut0);
+static DEFINE_MUTEX(mut1);
+static DEFINE_MUTEX(mut2);
+static DEFINE_MUTEX(mut3);
+static DEFINE_MUTEX(mut4);
+static DEFINE_MUTEX(mut5);
+static DEFINE_MUTEX(mut6);
+static DEFINE_MUTEX(mut7);
+static DEFINE_MUTEX(mut8);
+static DEFINE_MUTEX(mut9);
+
+static DECLARE_RWSEM(rwsem0);
+static DECLARE_RWSEM(rwsem1);
+static DECLARE_RWSEM(rwsem2);
+static DECLARE_RWSEM(rwsem3);
+static DECLARE_RWSEM(rwsem4);
+static DECLARE_RWSEM(rwsem5);
+static DECLARE_RWSEM(rwsem6);
+static DECLARE_RWSEM(rwsem7);
+static DECLARE_RWSEM(rwsem8);
+static DECLARE_RWSEM(rwsem9);
+
+DEFINE_STATIC_SRCU(srcu0);
+DEFINE_STATIC_SRCU(srcu1);
+DEFINE_STATIC_SRCU(srcu2);
+DEFINE_STATIC_SRCU(srcu3);
+DEFINE_STATIC_SRCU(srcu4);
+DEFINE_STATIC_SRCU(srcu5);
+DEFINE_STATIC_SRCU(srcu6);
+DEFINE_STATIC_SRCU(srcu7);
+DEFINE_STATIC_SRCU(srcu8);
+DEFINE_STATIC_SRCU(srcu9);
+
+static int srcu_lockdep_next(const char *f, const char *fl, const char *fs, const char *fu, int i,
+ int cyclelen, int deadlock)
+{
+ int j = i + 1;
+
+ if (j >= cyclelen)
+ j = deadlock ? 0 : -1;
+ if (j >= 0)
+ pr_info("%s: %s(%d), %s(%d), %s(%d)\n", f, fl, i, fs, j, fu, i);
+ else
+ pr_info("%s: %s(%d), %s(%d)\n", f, fl, i, fu, i);
+ return j;
+}
+
+// Test lockdep on SRCU-based deadlock scenarios.
+static void rcu_torture_init_srcu_lockdep(void)
+{
+ int cyclelen;
+ int deadlock;
+ bool err = false;
+ int i;
+ int j;
+ int idx;
+ struct mutex *muts[] = { &mut0, &mut1, &mut2, &mut3, &mut4,
+ &mut5, &mut6, &mut7, &mut8, &mut9 };
+ struct rw_semaphore *rwsems[] = { &rwsem0, &rwsem1, &rwsem2, &rwsem3, &rwsem4,
+ &rwsem5, &rwsem6, &rwsem7, &rwsem8, &rwsem9 };
+ struct srcu_struct *srcus[] = { &srcu0, &srcu1, &srcu2, &srcu3, &srcu4,
+ &srcu5, &srcu6, &srcu7, &srcu8, &srcu9 };
+ int testtype;
+
+ if (!test_srcu_lockdep)
+ return;
+
+ deadlock = test_srcu_lockdep / 1000;
+ testtype = (test_srcu_lockdep / 10) % 100;
+ cyclelen = test_srcu_lockdep % 10;
+ WARN_ON_ONCE(ARRAY_SIZE(muts) != ARRAY_SIZE(srcus));
+ if (WARN_ONCE(deadlock != !!deadlock,
+ "%s: test_srcu_lockdep=%d and deadlock digit %d must be zero or one.\n",
+ __func__, test_srcu_lockdep, deadlock))
+ err = true;
+ if (WARN_ONCE(cyclelen <= 0,
+ "%s: test_srcu_lockdep=%d and cycle-length digit %d must be greater than zero.\n",
+ __func__, test_srcu_lockdep, cyclelen))
+ err = true;
+ if (err)
+ goto err_out;
+
+ if (testtype == 0) {
+ pr_info("%s: test_srcu_lockdep = %05d: SRCU %d-way %sdeadlock.\n",
+ __func__, test_srcu_lockdep, cyclelen, deadlock ? "" : "non-");
+ if (deadlock && cyclelen == 1)
+ pr_info("%s: Expect hang.\n", __func__);
+ for (i = 0; i < cyclelen; i++) {
+ j = srcu_lockdep_next(__func__, "srcu_read_lock", "synchronize_srcu",
+ "srcu_read_unlock", i, cyclelen, deadlock);
+ idx = srcu_read_lock(srcus[i]);
+ if (j >= 0)
+ synchronize_srcu(srcus[j]);
+ srcu_read_unlock(srcus[i], idx);
+ }
+ return;
+ }
+
+ if (testtype == 1) {
+ pr_info("%s: test_srcu_lockdep = %05d: SRCU/mutex %d-way %sdeadlock.\n",
+ __func__, test_srcu_lockdep, cyclelen, deadlock ? "" : "non-");
+ for (i = 0; i < cyclelen; i++) {
+ pr_info("%s: srcu_read_lock(%d), mutex_lock(%d), mutex_unlock(%d), srcu_read_unlock(%d)\n",
+ __func__, i, i, i, i);
+ idx = srcu_read_lock(srcus[i]);
+ mutex_lock(muts[i]);
+ mutex_unlock(muts[i]);
+ srcu_read_unlock(srcus[i], idx);
+
+ j = srcu_lockdep_next(__func__, "mutex_lock", "synchronize_srcu",
+ "mutex_unlock", i, cyclelen, deadlock);
+ mutex_lock(muts[i]);
+ if (j >= 0)
+ synchronize_srcu(srcus[j]);
+ mutex_unlock(muts[i]);
+ }
+ return;
+ }
+
+ if (testtype == 2) {
+ pr_info("%s: test_srcu_lockdep = %05d: SRCU/rwsem %d-way %sdeadlock.\n",
+ __func__, test_srcu_lockdep, cyclelen, deadlock ? "" : "non-");
+ for (i = 0; i < cyclelen; i++) {
+ pr_info("%s: srcu_read_lock(%d), down_read(%d), up_read(%d), srcu_read_unlock(%d)\n",
+ __func__, i, i, i, i);
+ idx = srcu_read_lock(srcus[i]);
+ down_read(rwsems[i]);
+ up_read(rwsems[i]);
+ srcu_read_unlock(srcus[i], idx);
+
+ j = srcu_lockdep_next(__func__, "down_write", "synchronize_srcu",
+ "up_write", i, cyclelen, deadlock);
+ down_write(rwsems[i]);
+ if (j >= 0)
+ synchronize_srcu(srcus[j]);
+ up_write(rwsems[i]);
+ }
+ return;
+ }
+
+#ifdef CONFIG_TASKS_TRACE_RCU
+ if (testtype == 3) {
+ pr_info("%s: test_srcu_lockdep = %05d: SRCU and Tasks Trace RCU %d-way %sdeadlock.\n",
+ __func__, test_srcu_lockdep, cyclelen, deadlock ? "" : "non-");
+ if (deadlock && cyclelen == 1)
+ pr_info("%s: Expect hang.\n", __func__);
+ for (i = 0; i < cyclelen; i++) {
+ char *fl = i == 0 ? "rcu_read_lock_trace" : "srcu_read_lock";
+ char *fs = i == cyclelen - 1 ? "synchronize_rcu_tasks_trace"
+ : "synchronize_srcu";
+ char *fu = i == 0 ? "rcu_read_unlock_trace" : "srcu_read_unlock";
+
+ j = srcu_lockdep_next(__func__, fl, fs, fu, i, cyclelen, deadlock);
+ if (i == 0)
+ rcu_read_lock_trace();
+ else
+ idx = srcu_read_lock(srcus[i]);
+ if (j >= 0) {
+ if (i == cyclelen - 1)
+ synchronize_rcu_tasks_trace();
+ else
+ synchronize_srcu(srcus[j]);
+ }
+ if (i == 0)
+ rcu_read_unlock_trace();
+ else
+ srcu_read_unlock(srcus[i], idx);
+ }
+ return;
+ }
+#endif // #ifdef CONFIG_TASKS_TRACE_RCU
+
+err_out:
+ pr_info("%s: test_srcu_lockdep = %05d does nothing.\n", __func__, test_srcu_lockdep);
+ pr_info("%s: test_srcu_lockdep = DNNL.\n", __func__);
+ pr_info("%s: D: Deadlock if nonzero.\n", __func__);
+ pr_info("%s: NN: Test number, 0=SRCU, 1=SRCU/mutex, 2=SRCU/rwsem, 3=SRCU/Tasks Trace RCU.\n", __func__);
+ pr_info("%s: L: Cycle length.\n", __func__);
+ if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU))
+ pr_info("%s: NN=3 disallowed because kernel is built with CONFIG_TASKS_TRACE_RCU=n\n", __func__);
+}
+
static int __init
rcu_torture_init(void)
{
@@ -3501,9 +3708,17 @@ rcu_torture_init(void)
pr_alert("rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n");
fqs_duration = 0;
}
+ if (nocbs_nthreads != 0 && (cur_ops != &rcu_ops ||
+ !IS_ENABLED(CONFIG_RCU_NOCB_CPU))) {
+ pr_alert("rcu-torture types: %s and CONFIG_RCU_NOCB_CPU=%d, nocb toggle disabled.\n",
+ cur_ops->name, IS_ENABLED(CONFIG_RCU_NOCB_CPU));
+ nocbs_nthreads = 0;
+ }
if (cur_ops->init)
cur_ops->init();
+ rcu_torture_init_srcu_lockdep();
+
if (nreaders >= 0) {
nrealreaders = nreaders;
} else {
@@ -3540,7 +3755,6 @@ rcu_torture_init(void)
atomic_set(&n_rcu_torture_error, 0);
n_rcu_torture_barrier_error = 0;
n_rcu_torture_boost_ktrerror = 0;
- n_rcu_torture_boost_rterror = 0;
n_rcu_torture_boost_failure = 0;
n_rcu_torture_boosts = 0;
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
index afa3e1a2f690..1970ce5f22d4 100644
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -1031,7 +1031,7 @@ ref_scale_cleanup(void)
static int
ref_scale_shutdown(void *arg)
{
- wait_event(shutdown_wq, shutdown_start);
+ wait_event_idle(shutdown_wq, shutdown_start);
smp_mb(); // Wake before output.
ref_scale_cleanup();
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index b12fb0cec44d..336af24e0fe3 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -197,6 +197,8 @@ void synchronize_srcu(struct srcu_struct *ssp)
{
struct rcu_synchronize rs;
+ srcu_lock_sync(&ssp->dep_map);
+
RCU_LOCKDEP_WARN(lockdep_is_held(ssp) ||
lock_is_held(&rcu_bh_lock_map) ||
lock_is_held(&rcu_lock_map) ||
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index ab4ee58af84b..20d7a238d675 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -103,7 +103,7 @@ do { \
#define spin_trylock_irqsave_rcu_node(p, flags) \
({ \
- bool ___locked = spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
+ bool ___locked = spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
\
if (___locked) \
smp_mb__after_unlock_lock(); \
@@ -135,8 +135,8 @@ static void init_srcu_struct_data(struct srcu_struct *ssp)
spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
rcu_segcblist_init(&sdp->srcu_cblist);
sdp->srcu_cblist_invoking = false;
- sdp->srcu_gp_seq_needed = ssp->srcu_gp_seq;
- sdp->srcu_gp_seq_needed_exp = ssp->srcu_gp_seq;
+ sdp->srcu_gp_seq_needed = ssp->srcu_sup->srcu_gp_seq;
+ sdp->srcu_gp_seq_needed_exp = ssp->srcu_sup->srcu_gp_seq;
sdp->mynode = NULL;
sdp->cpu = cpu;
INIT_WORK(&sdp->work, srcu_invoke_callbacks);
@@ -173,14 +173,14 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags)
/* Initialize geometry if it has not already been initialized. */
rcu_init_geometry();
- ssp->node = kcalloc(rcu_num_nodes, sizeof(*ssp->node), gfp_flags);
- if (!ssp->node)
+ ssp->srcu_sup->node = kcalloc(rcu_num_nodes, sizeof(*ssp->srcu_sup->node), gfp_flags);
+ if (!ssp->srcu_sup->node)
return false;
/* Work out the overall tree geometry. */
- ssp->level[0] = &ssp->node[0];
+ ssp->srcu_sup->level[0] = &ssp->srcu_sup->node[0];
for (i = 1; i < rcu_num_lvls; i++)
- ssp->level[i] = ssp->level[i - 1] + num_rcu_lvl[i - 1];
+ ssp->srcu_sup->level[i] = ssp->srcu_sup->level[i - 1] + num_rcu_lvl[i - 1];
rcu_init_levelspread(levelspread, num_rcu_lvl);
/* Each pass through this loop initializes one srcu_node structure. */
@@ -195,17 +195,17 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags)
snp->srcu_gp_seq_needed_exp = SRCU_SNP_INIT_SEQ;
snp->grplo = -1;
snp->grphi = -1;
- if (snp == &ssp->node[0]) {
+ if (snp == &ssp->srcu_sup->node[0]) {
/* Root node, special case. */
snp->srcu_parent = NULL;
continue;
}
/* Non-root node. */
- if (snp == ssp->level[level + 1])
+ if (snp == ssp->srcu_sup->level[level + 1])
level++;
- snp->srcu_parent = ssp->level[level - 1] +
- (snp - ssp->level[level]) /
+ snp->srcu_parent = ssp->srcu_sup->level[level - 1] +
+ (snp - ssp->srcu_sup->level[level]) /
levelspread[level - 1];
}
@@ -214,7 +214,7 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags)
* leaves of the srcu_node tree.
*/
level = rcu_num_lvls - 1;
- snp_first = ssp->level[level];
+ snp_first = ssp->srcu_sup->level[level];
for_each_possible_cpu(cpu) {
sdp = per_cpu_ptr(ssp->sda, cpu);
sdp->mynode = &snp_first[cpu / levelspread[level]];
@@ -225,7 +225,7 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags)
}
sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
}
- smp_store_release(&ssp->srcu_size_state, SRCU_SIZE_WAIT_BARRIER);
+ smp_store_release(&ssp->srcu_sup->srcu_size_state, SRCU_SIZE_WAIT_BARRIER);
return true;
}
@@ -236,36 +236,47 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags)
*/
static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
{
- ssp->srcu_size_state = SRCU_SIZE_SMALL;
- ssp->node = NULL;
- mutex_init(&ssp->srcu_cb_mutex);
- mutex_init(&ssp->srcu_gp_mutex);
+ if (!is_static)
+ ssp->srcu_sup = kzalloc(sizeof(*ssp->srcu_sup), GFP_KERNEL);
+ if (!ssp->srcu_sup)
+ return -ENOMEM;
+ if (!is_static)
+ spin_lock_init(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
+ ssp->srcu_sup->srcu_size_state = SRCU_SIZE_SMALL;
+ ssp->srcu_sup->node = NULL;
+ mutex_init(&ssp->srcu_sup->srcu_cb_mutex);
+ mutex_init(&ssp->srcu_sup->srcu_gp_mutex);
ssp->srcu_idx = 0;
- ssp->srcu_gp_seq = 0;
- ssp->srcu_barrier_seq = 0;
- mutex_init(&ssp->srcu_barrier_mutex);
- atomic_set(&ssp->srcu_barrier_cpu_cnt, 0);
- INIT_DELAYED_WORK(&ssp->work, process_srcu);
- ssp->sda_is_static = is_static;
+ ssp->srcu_sup->srcu_gp_seq = 0;
+ ssp->srcu_sup->srcu_barrier_seq = 0;
+ mutex_init(&ssp->srcu_sup->srcu_barrier_mutex);
+ atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0);
+ INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
+ ssp->srcu_sup->sda_is_static = is_static;
if (!is_static)
ssp->sda = alloc_percpu(struct srcu_data);
- if (!ssp->sda)
+ if (!ssp->sda) {
+ if (!is_static)
+ kfree(ssp->srcu_sup);
return -ENOMEM;
+ }
init_srcu_struct_data(ssp);
- ssp->srcu_gp_seq_needed_exp = 0;
- ssp->srcu_last_gp_end = ktime_get_mono_fast_ns();
- if (READ_ONCE(ssp->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) {
+ ssp->srcu_sup->srcu_gp_seq_needed_exp = 0;
+ ssp->srcu_sup->srcu_last_gp_end = ktime_get_mono_fast_ns();
+ if (READ_ONCE(ssp->srcu_sup->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) {
if (!init_srcu_struct_nodes(ssp, GFP_ATOMIC)) {
- if (!ssp->sda_is_static) {
+ if (!ssp->srcu_sup->sda_is_static) {
free_percpu(ssp->sda);
ssp->sda = NULL;
+ kfree(ssp->srcu_sup);
return -ENOMEM;
}
} else {
- WRITE_ONCE(ssp->srcu_size_state, SRCU_SIZE_BIG);
+ WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG);
}
}
- smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */
+ ssp->srcu_sup->srcu_ssp = ssp;
+ smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed, 0); /* Init done. */
return 0;
}
@@ -277,7 +288,6 @@ int __init_srcu_struct(struct srcu_struct *ssp, const char *name,
/* Don't re-initialize a lock while it is held. */
debug_check_no_locks_freed((void *)ssp, sizeof(*ssp));
lockdep_init_map(&ssp->dep_map, name, key, 0);
- spin_lock_init(&ACCESS_PRIVATE(ssp, lock));
return init_srcu_struct_fields(ssp, false);
}
EXPORT_SYMBOL_GPL(__init_srcu_struct);
@@ -294,7 +304,6 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct);
*/
int init_srcu_struct(struct srcu_struct *ssp)
{
- spin_lock_init(&ACCESS_PRIVATE(ssp, lock));
return init_srcu_struct_fields(ssp, false);
}
EXPORT_SYMBOL_GPL(init_srcu_struct);
@@ -306,8 +315,8 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
*/
static void __srcu_transition_to_big(struct srcu_struct *ssp)
{
- lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock));
- smp_store_release(&ssp->srcu_size_state, SRCU_SIZE_ALLOC);
+ lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
+ smp_store_release(&ssp->srcu_sup->srcu_size_state, SRCU_SIZE_ALLOC);
}
/*
@@ -318,15 +327,15 @@ static void srcu_transition_to_big(struct srcu_struct *ssp)
unsigned long flags;
/* Double-checked locking on ->srcu_size-state. */
- if (smp_load_acquire(&ssp->srcu_size_state) != SRCU_SIZE_SMALL)
+ if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL)
return;
- spin_lock_irqsave_rcu_node(ssp, flags);
- if (smp_load_acquire(&ssp->srcu_size_state) != SRCU_SIZE_SMALL) {
- spin_unlock_irqrestore_rcu_node(ssp, flags);
+ spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
+ if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) {
+ spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
return;
}
__srcu_transition_to_big(ssp);
- spin_unlock_irqrestore_rcu_node(ssp, flags);
+ spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
}
/*
@@ -337,14 +346,14 @@ static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp)
{
unsigned long j;
- if (!SRCU_SIZING_IS_CONTEND() || ssp->srcu_size_state)
+ if (!SRCU_SIZING_IS_CONTEND() || ssp->srcu_sup->srcu_size_state)
return;
j = jiffies;
- if (ssp->srcu_size_jiffies != j) {
- ssp->srcu_size_jiffies = j;
- ssp->srcu_n_lock_retries = 0;
+ if (ssp->srcu_sup->srcu_size_jiffies != j) {
+ ssp->srcu_sup->srcu_size_jiffies = j;
+ ssp->srcu_sup->srcu_n_lock_retries = 0;
}
- if (++ssp->srcu_n_lock_retries <= small_contention_lim)
+ if (++ssp->srcu_sup->srcu_n_lock_retries <= small_contention_lim)
return;
__srcu_transition_to_big(ssp);
}
@@ -361,9 +370,9 @@ static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned lon
if (spin_trylock_irqsave_rcu_node(sdp, *flags))
return;
- spin_lock_irqsave_rcu_node(ssp, *flags);
+ spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags);
spin_lock_irqsave_check_contention(ssp);
- spin_unlock_irqrestore_rcu_node(ssp, *flags);
+ spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, *flags);
spin_lock_irqsave_rcu_node(sdp, *flags);
}
@@ -375,9 +384,9 @@ static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned lon
*/
static void spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags)
{
- if (spin_trylock_irqsave_rcu_node(ssp, *flags))
+ if (spin_trylock_irqsave_rcu_node(ssp->srcu_sup, *flags))
return;
- spin_lock_irqsave_rcu_node(ssp, *flags);
+ spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags);
spin_lock_irqsave_check_contention(ssp);
}
@@ -394,15 +403,15 @@ static void check_init_srcu_struct(struct srcu_struct *ssp)
unsigned long flags;
/* The smp_load_acquire() pairs with the smp_store_release(). */
- if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq_needed))) /*^^^*/
+ if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed))) /*^^^*/
return; /* Already initialized. */
- spin_lock_irqsave_rcu_node(ssp, flags);
- if (!rcu_seq_state(ssp->srcu_gp_seq_needed)) {
- spin_unlock_irqrestore_rcu_node(ssp, flags);
+ spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
+ if (!rcu_seq_state(ssp->srcu_sup->srcu_gp_seq_needed)) {
+ spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
return;
}
init_srcu_struct_fields(ssp, true);
- spin_unlock_irqrestore_rcu_node(ssp, flags);
+ spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
}
/*
@@ -607,17 +616,18 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp)
unsigned long gpstart;
unsigned long j;
unsigned long jbase = SRCU_INTERVAL;
+ struct srcu_usage *sup = ssp->srcu_sup;
- if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
+ if (ULONG_CMP_LT(READ_ONCE(sup->srcu_gp_seq), READ_ONCE(sup->srcu_gp_seq_needed_exp)))
jbase = 0;
- if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) {
+ if (rcu_seq_state(READ_ONCE(sup->srcu_gp_seq))) {
j = jiffies - 1;
- gpstart = READ_ONCE(ssp->srcu_gp_start);
+ gpstart = READ_ONCE(sup->srcu_gp_start);
if (time_after(j, gpstart))
jbase += j - gpstart;
if (!jbase) {
- WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
- if (READ_ONCE(ssp->srcu_n_exp_nodelay) > srcu_max_nodelay_phase)
+ WRITE_ONCE(sup->srcu_n_exp_nodelay, READ_ONCE(sup->srcu_n_exp_nodelay) + 1);
+ if (READ_ONCE(sup->srcu_n_exp_nodelay) > srcu_max_nodelay_phase)
jbase = 1;
}
}
@@ -634,12 +644,13 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp)
void cleanup_srcu_struct(struct srcu_struct *ssp)
{
int cpu;
+ struct srcu_usage *sup = ssp->srcu_sup;
if (WARN_ON(!srcu_get_delay(ssp)))
return; /* Just leak it! */
if (WARN_ON(srcu_readers_active(ssp)))
return; /* Just leak it! */
- flush_delayed_work(&ssp->work);
+ flush_delayed_work(&sup->work);
for_each_possible_cpu(cpu) {
struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
@@ -648,21 +659,23 @@ void cleanup_srcu_struct(struct srcu_struct *ssp)
if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist)))
return; /* Forgot srcu_barrier(), so just leak it! */
}
- if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
- WARN_ON(rcu_seq_current(&ssp->srcu_gp_seq) != ssp->srcu_gp_seq_needed) ||
+ if (WARN_ON(rcu_seq_state(READ_ONCE(sup->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
+ WARN_ON(rcu_seq_current(&sup->srcu_gp_seq) != sup->srcu_gp_seq_needed) ||
WARN_ON(srcu_readers_active(ssp))) {
pr_info("%s: Active srcu_struct %p read state: %d gp state: %lu/%lu\n",
- __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)),
- rcu_seq_current(&ssp->srcu_gp_seq), ssp->srcu_gp_seq_needed);
+ __func__, ssp, rcu_seq_state(READ_ONCE(sup->srcu_gp_seq)),
+ rcu_seq_current(&sup->srcu_gp_seq), sup->srcu_gp_seq_needed);
return; /* Caller forgot to stop doing call_srcu()? */
}
- if (!ssp->sda_is_static) {
+ kfree(sup->node);
+ sup->node = NULL;
+ sup->srcu_size_state = SRCU_SIZE_SMALL;
+ if (!sup->sda_is_static) {
free_percpu(ssp->sda);
ssp->sda = NULL;
+ kfree(sup);
+ ssp->srcu_sup = NULL;
}
- kfree(ssp->node);
- ssp->node = NULL;
- ssp->srcu_size_state = SRCU_SIZE_SMALL;
}
EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
@@ -760,23 +773,23 @@ static void srcu_gp_start(struct srcu_struct *ssp)
struct srcu_data *sdp;
int state;
- if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
+ if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id());
else
sdp = this_cpu_ptr(ssp->sda);
- lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock));
- WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
+ lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
+ WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed));
spin_lock_rcu_node(sdp); /* Interrupts already disabled. */
rcu_segcblist_advance(&sdp->srcu_cblist,
- rcu_seq_current(&ssp->srcu_gp_seq));
+ rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
- rcu_seq_snap(&ssp->srcu_gp_seq));
+ rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq));
spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */
- WRITE_ONCE(ssp->srcu_gp_start, jiffies);
- WRITE_ONCE(ssp->srcu_n_exp_nodelay, 0);
+ WRITE_ONCE(ssp->srcu_sup->srcu_gp_start, jiffies);
+ WRITE_ONCE(ssp->srcu_sup->srcu_n_exp_nodelay, 0);
smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */
- rcu_seq_start(&ssp->srcu_gp_seq);
- state = rcu_seq_state(ssp->srcu_gp_seq);
+ rcu_seq_start(&ssp->srcu_sup->srcu_gp_seq);
+ state = rcu_seq_state(ssp->srcu_sup->srcu_gp_seq);
WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
}
@@ -849,28 +862,29 @@ static void srcu_gp_end(struct srcu_struct *ssp)
unsigned long sgsne;
struct srcu_node *snp;
int ss_state;
+ struct srcu_usage *sup = ssp->srcu_sup;
/* Prevent more than one additional grace period. */
- mutex_lock(&ssp->srcu_cb_mutex);
+ mutex_lock(&sup->srcu_cb_mutex);
/* End the current grace period. */
- spin_lock_irq_rcu_node(ssp);
- idx = rcu_seq_state(ssp->srcu_gp_seq);
+ spin_lock_irq_rcu_node(sup);
+ idx = rcu_seq_state(sup->srcu_gp_seq);
WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
- if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
+ if (ULONG_CMP_LT(READ_ONCE(sup->srcu_gp_seq), READ_ONCE(sup->srcu_gp_seq_needed_exp)))
cbdelay = 0;
- WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
- rcu_seq_end(&ssp->srcu_gp_seq);
- gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
- if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq))
- WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, gpseq);
- spin_unlock_irq_rcu_node(ssp);
- mutex_unlock(&ssp->srcu_gp_mutex);
+ WRITE_ONCE(sup->srcu_last_gp_end, ktime_get_mono_fast_ns());
+ rcu_seq_end(&sup->srcu_gp_seq);
+ gpseq = rcu_seq_current(&sup->srcu_gp_seq);
+ if (ULONG_CMP_LT(sup->srcu_gp_seq_needed_exp, gpseq))
+ WRITE_ONCE(sup->srcu_gp_seq_needed_exp, gpseq);
+ spin_unlock_irq_rcu_node(sup);
+ mutex_unlock(&sup->srcu_gp_mutex);
/* A new grace period can start at this point. But only one. */
/* Initiate callback invocation as needed. */
- ss_state = smp_load_acquire(&ssp->srcu_size_state);
+ ss_state = smp_load_acquire(&sup->srcu_size_state);
if (ss_state < SRCU_SIZE_WAIT_BARRIER) {
srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, get_boot_cpu_id()),
cbdelay);
@@ -879,7 +893,7 @@ static void srcu_gp_end(struct srcu_struct *ssp)
srcu_for_each_node_breadth_first(ssp, snp) {
spin_lock_irq_rcu_node(snp);
cbs = false;
- last_lvl = snp >= ssp->level[rcu_num_lvls - 1];
+ last_lvl = snp >= sup->level[rcu_num_lvls - 1];
if (last_lvl)
cbs = ss_state < SRCU_SIZE_BIG || snp->srcu_have_cbs[idx] == gpseq;
snp->srcu_have_cbs[idx] = gpseq;
@@ -911,18 +925,18 @@ static void srcu_gp_end(struct srcu_struct *ssp)
}
/* Callback initiation done, allow grace periods after next. */
- mutex_unlock(&ssp->srcu_cb_mutex);
+ mutex_unlock(&sup->srcu_cb_mutex);
/* Start a new grace period if needed. */
- spin_lock_irq_rcu_node(ssp);
- gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
+ spin_lock_irq_rcu_node(sup);
+ gpseq = rcu_seq_current(&sup->srcu_gp_seq);
if (!rcu_seq_state(gpseq) &&
- ULONG_CMP_LT(gpseq, ssp->srcu_gp_seq_needed)) {
+ ULONG_CMP_LT(gpseq, sup->srcu_gp_seq_needed)) {
srcu_gp_start(ssp);
- spin_unlock_irq_rcu_node(ssp);
+ spin_unlock_irq_rcu_node(sup);
srcu_reschedule(ssp, 0);
} else {
- spin_unlock_irq_rcu_node(ssp);
+ spin_unlock_irq_rcu_node(sup);
}
/* Transition to big if needed. */
@@ -930,7 +944,7 @@ static void srcu_gp_end(struct srcu_struct *ssp)
if (ss_state == SRCU_SIZE_ALLOC)
init_srcu_struct_nodes(ssp, GFP_KERNEL);
else
- smp_store_release(&ssp->srcu_size_state, ss_state + 1);
+ smp_store_release(&sup->srcu_size_state, ss_state + 1);
}
}
@@ -950,7 +964,7 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp
if (snp)
for (; snp != NULL; snp = snp->srcu_parent) {
sgsne = READ_ONCE(snp->srcu_gp_seq_needed_exp);
- if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) ||
+ if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) ||
(!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)))
return;
spin_lock_irqsave_rcu_node(snp, flags);
@@ -963,9 +977,9 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp
spin_unlock_irqrestore_rcu_node(snp, flags);
}
spin_lock_irqsave_ssp_contention(ssp, &flags);
- if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
- WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s);
- spin_unlock_irqrestore_rcu_node(ssp, flags);
+ if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, s))
+ WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, s);
+ spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
}
/*
@@ -990,9 +1004,10 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
struct srcu_node *snp;
struct srcu_node *snp_leaf;
unsigned long snp_seq;
+ struct srcu_usage *sup = ssp->srcu_sup;
/* Ensure that snp node tree is fully initialized before traversing it */
- if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
+ if (smp_load_acquire(&sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
snp_leaf = NULL;
else
snp_leaf = sdp->mynode;
@@ -1000,7 +1015,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
if (snp_leaf)
/* Each pass through the loop does one level of the srcu_node tree. */
for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) {
- if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) && snp != snp_leaf)
+ if (WARN_ON_ONCE(rcu_seq_done(&sup->srcu_gp_seq, s)) && snp != snp_leaf)
return; /* GP already done and CBs recorded. */
spin_lock_irqsave_rcu_node(snp, flags);
snp_seq = snp->srcu_have_cbs[idx];
@@ -1027,20 +1042,20 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
/* Top of tree, must ensure the grace period will be started. */
spin_lock_irqsave_ssp_contention(ssp, &flags);
- if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed, s)) {
+ if (ULONG_CMP_LT(sup->srcu_gp_seq_needed, s)) {
/*
* Record need for grace period s. Pair with load
* acquire setting up for initialization.
*/
- smp_store_release(&ssp->srcu_gp_seq_needed, s); /*^^^*/
+ smp_store_release(&sup->srcu_gp_seq_needed, s); /*^^^*/
}
- if (!do_norm && ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
- WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s);
+ if (!do_norm && ULONG_CMP_LT(sup->srcu_gp_seq_needed_exp, s))
+ WRITE_ONCE(sup->srcu_gp_seq_needed_exp, s);
/* If grace period not already in progress, start it. */
- if (!WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) &&
- rcu_seq_state(ssp->srcu_gp_seq) == SRCU_STATE_IDLE) {
- WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
+ if (!WARN_ON_ONCE(rcu_seq_done(&sup->srcu_gp_seq, s)) &&
+ rcu_seq_state(sup->srcu_gp_seq) == SRCU_STATE_IDLE) {
+ WARN_ON_ONCE(ULONG_CMP_GE(sup->srcu_gp_seq, sup->srcu_gp_seq_needed));
srcu_gp_start(ssp);
// And how can that list_add() in the "else" clause
@@ -1049,12 +1064,12 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
// can only be executed during early boot when there is only
// the one boot CPU running with interrupts still disabled.
if (likely(srcu_init_done))
- queue_delayed_work(rcu_gp_wq, &ssp->work,
+ queue_delayed_work(rcu_gp_wq, &sup->work,
!!srcu_get_delay(ssp));
- else if (list_empty(&ssp->work.work.entry))
- list_add(&ssp->work.work.entry, &srcu_boot_list);
+ else if (list_empty(&sup->work.work.entry))
+ list_add(&sup->work.work.entry, &srcu_boot_list);
}
- spin_unlock_irqrestore_rcu_node(ssp, flags);
+ spin_unlock_irqrestore_rcu_node(sup, flags);
}
/*
@@ -1085,16 +1100,36 @@ static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
static void srcu_flip(struct srcu_struct *ssp)
{
/*
- * Ensure that if this updater saw a given reader's increment
- * from __srcu_read_lock(), that reader was using an old value
- * of ->srcu_idx. Also ensure that if a given reader sees the
- * new value of ->srcu_idx, this updater's earlier scans cannot
- * have seen that reader's increments (which is OK, because this
- * grace period need not wait on that reader).
+ * Because the flip of ->srcu_idx is executed only if the
+ * preceding call to srcu_readers_active_idx_check() found that
+ * the ->srcu_unlock_count[] and ->srcu_lock_count[] sums matched
+ * and because that summing uses atomic_long_read(), there is
+ * ordering due to a control dependency between that summing and
+ * the WRITE_ONCE() in this call to srcu_flip(). This ordering
+ * ensures that if this updater saw a given reader's increment from
+ * __srcu_read_lock(), that reader was using a value of ->srcu_idx
+ * from before the previous call to srcu_flip(), which should be
+ * quite rare. This ordering thus helps forward progress because
+ * the grace period could otherwise be delayed by additional
+ * calls to __srcu_read_lock() using that old (soon to be new)
+ * value of ->srcu_idx.
+ *
+ * This sum-equality check and ordering also ensures that if
+ * a given call to __srcu_read_lock() uses the new value of
+ * ->srcu_idx, this updater's earlier scans cannot have seen
+ * that reader's increments, which is all to the good, because
+ * this grace period need not wait on that reader. After all,
+ * if those earlier scans had seen that reader, there would have
+ * been a sum mismatch and this code would not be reached.
+ *
+ * This means that the following smp_mb() is redundant, but
+ * it stays until either (1) Compilers learn about this sort of
+ * control dependency or (2) Some production workload running on
+ * a production system is unduly delayed by this slowpath smp_mb().
*/
smp_mb(); /* E */ /* Pairs with B and C. */
- WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1);
+ WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); // Flip the counter.
/*
* Ensure that if the updater misses an __srcu_read_unlock()
@@ -1154,18 +1189,18 @@ static bool srcu_might_be_idle(struct srcu_struct *ssp)
/* First, see if enough time has passed since the last GP. */
t = ktime_get_mono_fast_ns();
- tlast = READ_ONCE(ssp->srcu_last_gp_end);
+ tlast = READ_ONCE(ssp->srcu_sup->srcu_last_gp_end);
if (exp_holdoff == 0 ||
time_in_range_open(t, tlast, tlast + exp_holdoff))
return false; /* Too soon after last GP. */
/* Next, check for probable idleness. */
- curseq = rcu_seq_current(&ssp->srcu_gp_seq);
+ curseq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq);
smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */
- if (ULONG_CMP_LT(curseq, READ_ONCE(ssp->srcu_gp_seq_needed)))
+ if (ULONG_CMP_LT(curseq, READ_ONCE(ssp->srcu_sup->srcu_gp_seq_needed)))
return false; /* Grace period in progress, so not idle. */
smp_mb(); /* Order ->srcu_gp_seq with prior access. */
- if (curseq != rcu_seq_current(&ssp->srcu_gp_seq))
+ if (curseq != rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq))
return false; /* GP # changed, so not idle. */
return true; /* With reasonable probability, idle! */
}
@@ -1199,7 +1234,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
* sequence number cannot wrap around in the meantime.
*/
idx = __srcu_read_lock_nmisafe(ssp);
- ss_state = smp_load_acquire(&ssp->srcu_size_state);
+ ss_state = smp_load_acquire(&ssp->srcu_sup->srcu_size_state);
if (ss_state < SRCU_SIZE_WAIT_CALL)
sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id());
else
@@ -1208,8 +1243,8 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
if (rhp)
rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp);
rcu_segcblist_advance(&sdp->srcu_cblist,
- rcu_seq_current(&ssp->srcu_gp_seq));
- s = rcu_seq_snap(&ssp->srcu_gp_seq);
+ rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
+ s = rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq);
(void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s);
if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) {
sdp->srcu_gp_seq_needed = s;
@@ -1307,6 +1342,8 @@ static void __synchronize_srcu(struct srcu_struct *ssp, bool do_norm)
{
struct rcu_synchronize rcu;
+ srcu_lock_sync(&ssp->dep_map);
+
RCU_LOCKDEP_WARN(lockdep_is_held(ssp) ||
lock_is_held(&rcu_bh_lock_map) ||
lock_is_held(&rcu_lock_map) ||
@@ -1420,7 +1457,7 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp)
// Any prior manipulation of SRCU-protected data must happen
// before the load from ->srcu_gp_seq.
smp_mb();
- return rcu_seq_snap(&ssp->srcu_gp_seq);
+ return rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq);
}
EXPORT_SYMBOL_GPL(get_state_synchronize_srcu);
@@ -1467,7 +1504,7 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu);
*/
bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
{
- if (!rcu_seq_done(&ssp->srcu_gp_seq, cookie))
+ if (!rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, cookie))
return false;
// Ensure that the end of the SRCU grace period happens before
// any subsequent code that the caller might execute.
@@ -1486,8 +1523,8 @@ static void srcu_barrier_cb(struct rcu_head *rhp)
sdp = container_of(rhp, struct srcu_data, srcu_barrier_head);
ssp = sdp->ssp;
- if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt))
- complete(&ssp->srcu_barrier_completion);
+ if (atomic_dec_and_test(&ssp->srcu_sup->srcu_barrier_cpu_cnt))
+ complete(&ssp->srcu_sup->srcu_barrier_completion);
}
/*
@@ -1501,13 +1538,13 @@ static void srcu_barrier_cb(struct rcu_head *rhp)
static void srcu_barrier_one_cpu(struct srcu_struct *ssp, struct srcu_data *sdp)
{
spin_lock_irq_rcu_node(sdp);
- atomic_inc(&ssp->srcu_barrier_cpu_cnt);
+ atomic_inc(&ssp->srcu_sup->srcu_barrier_cpu_cnt);
sdp->srcu_barrier_head.func = srcu_barrier_cb;
debug_rcu_head_queue(&sdp->srcu_barrier_head);
if (!rcu_segcblist_entrain(&sdp->srcu_cblist,
&sdp->srcu_barrier_head)) {
debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
- atomic_dec(&ssp->srcu_barrier_cpu_cnt);
+ atomic_dec(&ssp->srcu_sup->srcu_barrier_cpu_cnt);
}
spin_unlock_irq_rcu_node(sdp);
}
@@ -1520,23 +1557,23 @@ void srcu_barrier(struct srcu_struct *ssp)
{
int cpu;
int idx;
- unsigned long s = rcu_seq_snap(&ssp->srcu_barrier_seq);
+ unsigned long s = rcu_seq_snap(&ssp->srcu_sup->srcu_barrier_seq);
check_init_srcu_struct(ssp);
- mutex_lock(&ssp->srcu_barrier_mutex);
- if (rcu_seq_done(&ssp->srcu_barrier_seq, s)) {
+ mutex_lock(&ssp->srcu_sup->srcu_barrier_mutex);
+ if (rcu_seq_done(&ssp->srcu_sup->srcu_barrier_seq, s)) {
smp_mb(); /* Force ordering following return. */
- mutex_unlock(&ssp->srcu_barrier_mutex);
+ mutex_unlock(&ssp->srcu_sup->srcu_barrier_mutex);
return; /* Someone else did our work for us. */
}
- rcu_seq_start(&ssp->srcu_barrier_seq);
- init_completion(&ssp->srcu_barrier_completion);
+ rcu_seq_start(&ssp->srcu_sup->srcu_barrier_seq);
+ init_completion(&ssp->srcu_sup->srcu_barrier_completion);
/* Initial count prevents reaching zero until all CBs are posted. */
- atomic_set(&ssp->srcu_barrier_cpu_cnt, 1);
+ atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 1);
idx = __srcu_read_lock_nmisafe(ssp);
- if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
+ if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, get_boot_cpu_id()));
else
for_each_possible_cpu(cpu)
@@ -1544,12 +1581,12 @@ void srcu_barrier(struct srcu_struct *ssp)
__srcu_read_unlock_nmisafe(ssp, idx);
/* Remove the initial count, at which point reaching zero can happen. */
- if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt))
- complete(&ssp->srcu_barrier_completion);
- wait_for_completion(&ssp->srcu_barrier_completion);
+ if (atomic_dec_and_test(&ssp->srcu_sup->srcu_barrier_cpu_cnt))
+ complete(&ssp->srcu_sup->srcu_barrier_completion);
+ wait_for_completion(&ssp->srcu_sup->srcu_barrier_completion);
- rcu_seq_end(&ssp->srcu_barrier_seq);
- mutex_unlock(&ssp->srcu_barrier_mutex);
+ rcu_seq_end(&ssp->srcu_sup->srcu_barrier_seq);
+ mutex_unlock(&ssp->srcu_sup->srcu_barrier_mutex);
}
EXPORT_SYMBOL_GPL(srcu_barrier);
@@ -1575,7 +1612,7 @@ static void srcu_advance_state(struct srcu_struct *ssp)
{
int idx;
- mutex_lock(&ssp->srcu_gp_mutex);
+ mutex_lock(&ssp->srcu_sup->srcu_gp_mutex);
/*
* Because readers might be delayed for an extended period after
@@ -1587,39 +1624,39 @@ static void srcu_advance_state(struct srcu_struct *ssp)
* The load-acquire ensures that we see the accesses performed
* by the prior grace period.
*/
- idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq)); /* ^^^ */
+ idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq)); /* ^^^ */
if (idx == SRCU_STATE_IDLE) {
- spin_lock_irq_rcu_node(ssp);
- if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) {
- WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq));
- spin_unlock_irq_rcu_node(ssp);
- mutex_unlock(&ssp->srcu_gp_mutex);
+ spin_lock_irq_rcu_node(ssp->srcu_sup);
+ if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) {
+ WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq));
+ spin_unlock_irq_rcu_node(ssp->srcu_sup);
+ mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
return;
}
- idx = rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq));
+ idx = rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq));
if (idx == SRCU_STATE_IDLE)
srcu_gp_start(ssp);
- spin_unlock_irq_rcu_node(ssp);
+ spin_unlock_irq_rcu_node(ssp->srcu_sup);
if (idx != SRCU_STATE_IDLE) {
- mutex_unlock(&ssp->srcu_gp_mutex);
+ mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
return; /* Someone else started the grace period. */
}
}
- if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
+ if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
idx = 1 ^ (ssp->srcu_idx & 1);
if (!try_check_zero(ssp, idx, 1)) {
- mutex_unlock(&ssp->srcu_gp_mutex);
+ mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
return; /* readers present, retry later. */
}
srcu_flip(ssp);
- spin_lock_irq_rcu_node(ssp);
- rcu_seq_set_state(&ssp->srcu_gp_seq, SRCU_STATE_SCAN2);
- ssp->srcu_n_exp_nodelay = 0;
- spin_unlock_irq_rcu_node(ssp);
+ spin_lock_irq_rcu_node(ssp->srcu_sup);
+ rcu_seq_set_state(&ssp->srcu_sup->srcu_gp_seq, SRCU_STATE_SCAN2);
+ ssp->srcu_sup->srcu_n_exp_nodelay = 0;
+ spin_unlock_irq_rcu_node(ssp->srcu_sup);
}
- if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
+ if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
/*
* SRCU read-side critical sections are normally short,
@@ -1627,10 +1664,10 @@ static void srcu_advance_state(struct srcu_struct *ssp)
*/
idx = 1 ^ (ssp->srcu_idx & 1);
if (!try_check_zero(ssp, idx, 2)) {
- mutex_unlock(&ssp->srcu_gp_mutex);
+ mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
return; /* readers present, retry later. */
}
- ssp->srcu_n_exp_nodelay = 0;
+ ssp->srcu_sup->srcu_n_exp_nodelay = 0;
srcu_gp_end(ssp); /* Releases ->srcu_gp_mutex. */
}
}
@@ -1656,7 +1693,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
rcu_cblist_init(&ready_cbs);
spin_lock_irq_rcu_node(sdp);
rcu_segcblist_advance(&sdp->srcu_cblist,
- rcu_seq_current(&ssp->srcu_gp_seq));
+ rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
if (sdp->srcu_cblist_invoking ||
!rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
spin_unlock_irq_rcu_node(sdp);
@@ -1684,7 +1721,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
spin_lock_irq_rcu_node(sdp);
rcu_segcblist_add_len(&sdp->srcu_cblist, -len);
(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
- rcu_seq_snap(&ssp->srcu_gp_seq));
+ rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq));
sdp->srcu_cblist_invoking = false;
more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
spin_unlock_irq_rcu_node(sdp);
@@ -1700,20 +1737,20 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay)
{
bool pushgp = true;
- spin_lock_irq_rcu_node(ssp);
- if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) {
- if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq))) {
+ spin_lock_irq_rcu_node(ssp->srcu_sup);
+ if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) {
+ if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq))) {
/* All requests fulfilled, time to go idle. */
pushgp = false;
}
- } else if (!rcu_seq_state(ssp->srcu_gp_seq)) {
+ } else if (!rcu_seq_state(ssp->srcu_sup->srcu_gp_seq)) {
/* Outstanding request and no GP. Start one. */
srcu_gp_start(ssp);
}
- spin_unlock_irq_rcu_node(ssp);
+ spin_unlock_irq_rcu_node(ssp->srcu_sup);
if (pushgp)
- queue_delayed_work(rcu_gp_wq, &ssp->work, delay);
+ queue_delayed_work(rcu_gp_wq, &ssp->srcu_sup->work, delay);
}
/*
@@ -1724,22 +1761,24 @@ static void process_srcu(struct work_struct *work)
unsigned long curdelay;
unsigned long j;
struct srcu_struct *ssp;
+ struct srcu_usage *sup;
- ssp = container_of(work, struct srcu_struct, work.work);
+ sup = container_of(work, struct srcu_usage, work.work);
+ ssp = sup->srcu_ssp;
srcu_advance_state(ssp);
curdelay = srcu_get_delay(ssp);
if (curdelay) {
- WRITE_ONCE(ssp->reschedule_count, 0);
+ WRITE_ONCE(sup->reschedule_count, 0);
} else {
j = jiffies;
- if (READ_ONCE(ssp->reschedule_jiffies) == j) {
- WRITE_ONCE(ssp->reschedule_count, READ_ONCE(ssp->reschedule_count) + 1);
- if (READ_ONCE(ssp->reschedule_count) > srcu_max_nodelay)
+ if (READ_ONCE(sup->reschedule_jiffies) == j) {
+ WRITE_ONCE(sup->reschedule_count, READ_ONCE(sup->reschedule_count) + 1);
+ if (READ_ONCE(sup->reschedule_count) > srcu_max_nodelay)
curdelay = 1;
} else {
- WRITE_ONCE(ssp->reschedule_count, 1);
- WRITE_ONCE(ssp->reschedule_jiffies, j);
+ WRITE_ONCE(sup->reschedule_count, 1);
+ WRITE_ONCE(sup->reschedule_jiffies, j);
}
}
srcu_reschedule(ssp, curdelay);
@@ -1752,7 +1791,7 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type,
if (test_type != SRCU_FLAVOR)
return;
*flags = 0;
- *gp_seq = rcu_seq_current(&ssp->srcu_gp_seq);
+ *gp_seq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq);
}
EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
@@ -1774,14 +1813,14 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
int cpu;
int idx;
unsigned long s0 = 0, s1 = 0;
- int ss_state = READ_ONCE(ssp->srcu_size_state);
+ int ss_state = READ_ONCE(ssp->srcu_sup->srcu_size_state);
int ss_state_idx = ss_state;
idx = ssp->srcu_idx & 0x1;
if (ss_state < 0 || ss_state >= ARRAY_SIZE(srcu_size_state_name))
ss_state_idx = ARRAY_SIZE(srcu_size_state_name) - 1;
pr_alert("%s%s Tree SRCU g%ld state %d (%s)",
- tt, tf, rcu_seq_current(&ssp->srcu_gp_seq), ss_state,
+ tt, tf, rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq), ss_state,
srcu_size_state_name[ss_state_idx]);
if (!ssp->sda) {
// Called after cleanup_srcu_struct(), perhaps.
@@ -1838,7 +1877,7 @@ early_initcall(srcu_bootup_announce);
void __init srcu_init(void)
{
- struct srcu_struct *ssp;
+ struct srcu_usage *sup;
/* Decide on srcu_struct-size strategy. */
if (SRCU_SIZING_IS(SRCU_SIZING_AUTO)) {
@@ -1858,12 +1897,13 @@ void __init srcu_init(void)
*/
srcu_init_done = true;
while (!list_empty(&srcu_boot_list)) {
- ssp = list_first_entry(&srcu_boot_list, struct srcu_struct,
+ sup = list_first_entry(&srcu_boot_list, struct srcu_usage,
work.work.entry);
- list_del_init(&ssp->work.work.entry);
- if (SRCU_SIZING_IS(SRCU_SIZING_INIT) && ssp->srcu_size_state == SRCU_SIZE_SMALL)
- ssp->srcu_size_state = SRCU_SIZE_ALLOC;
- queue_work(rcu_gp_wq, &ssp->work.work);
+ list_del_init(&sup->work.work.entry);
+ if (SRCU_SIZING_IS(SRCU_SIZING_INIT) &&
+ sup->srcu_size_state == SRCU_SIZE_SMALL)
+ sup->srcu_size_state = SRCU_SIZE_ALLOC;
+ queue_work(rcu_gp_wq, &sup->work.work);
}
}
@@ -1873,13 +1913,14 @@ void __init srcu_init(void)
static int srcu_module_coming(struct module *mod)
{
int i;
+ struct srcu_struct *ssp;
struct srcu_struct **sspp = mod->srcu_struct_ptrs;
- int ret;
for (i = 0; i < mod->num_srcu_structs; i++) {
- ret = init_srcu_struct(*(sspp++));
- if (WARN_ON_ONCE(ret))
- return ret;
+ ssp = *(sspp++);
+ ssp->sda = alloc_percpu(struct srcu_data);
+ if (WARN_ON_ONCE(!ssp->sda))
+ return -ENOMEM;
}
return 0;
}
@@ -1888,10 +1929,17 @@ static int srcu_module_coming(struct module *mod)
static void srcu_module_going(struct module *mod)
{
int i;
+ struct srcu_struct *ssp;
struct srcu_struct **sspp = mod->srcu_struct_ptrs;
- for (i = 0; i < mod->num_srcu_structs; i++)
- cleanup_srcu_struct(*(sspp++));
+ for (i = 0; i < mod->num_srcu_structs; i++) {
+ ssp = *(sspp++);
+ if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed)) &&
+ !WARN_ON_ONCE(!ssp->srcu_sup->sda_is_static))
+ cleanup_srcu_struct(ssp);
+ if (!WARN_ON(srcu_readers_active(ssp)))
+ free_percpu(ssp->sda);
+ }
}
/* Handle one module, either coming or going. */
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index bfb5e1549f2b..cf7b00af9474 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -136,8 +136,16 @@ static struct rcu_tasks rt_name = \
.kname = #rt_name, \
}
+#ifdef CONFIG_TASKS_RCU
/* Track exiting tasks in order to allow them to be waited for. */
DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
+#endif
+
+#ifdef CONFIG_TASKS_RCU
+/* Report delay in synchronize_srcu() completion in rcu_tasks_postscan(). */
+static void tasks_rcu_exit_srcu_stall(struct timer_list *unused);
+static DEFINE_TIMER(tasks_rcu_exit_srcu_stall_timer, tasks_rcu_exit_srcu_stall);
+#endif
/* Avoid IPIing CPUs early in the grace period. */
#define RCU_TASK_IPI_DELAY (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) ? HZ / 2 : 0)
@@ -233,7 +241,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
if (rcu_task_enqueue_lim < 0) {
rcu_task_enqueue_lim = 1;
rcu_task_cb_adjust = true;
- pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
} else if (rcu_task_enqueue_lim == 0) {
rcu_task_enqueue_lim = 1;
}
@@ -264,7 +271,9 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
}
raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
- pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim));
+
+ pr_info("%s: Setting shift to %d and lim to %d rcu_task_cb_adjust=%d.\n", rtp->name,
+ data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim), rcu_task_cb_adjust);
}
// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic().
@@ -830,6 +839,13 @@ static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop)
/* Processing between scanning taskslist and draining the holdout list. */
static void rcu_tasks_postscan(struct list_head *hop)
{
+ int rtsi = READ_ONCE(rcu_task_stall_info);
+
+ if (!IS_ENABLED(CONFIG_TINY_RCU)) {
+ tasks_rcu_exit_srcu_stall_timer.expires = jiffies + rtsi;
+ add_timer(&tasks_rcu_exit_srcu_stall_timer);
+ }
+
/*
* Exiting tasks may escape the tasklist scan. Those are vulnerable
* until their final schedule() with TASK_DEAD state. To cope with
@@ -848,6 +864,9 @@ static void rcu_tasks_postscan(struct list_head *hop)
* call to synchronize_rcu().
*/
synchronize_srcu(&tasks_rcu_exit_srcu);
+
+ if (!IS_ENABLED(CONFIG_TINY_RCU))
+ del_timer_sync(&tasks_rcu_exit_srcu_stall_timer);
}
/* See if tasks are still holding out, complain if so. */
@@ -923,6 +942,21 @@ static void rcu_tasks_postgp(struct rcu_tasks *rtp)
void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func);
DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks");
+static void tasks_rcu_exit_srcu_stall(struct timer_list *unused)
+{
+#ifndef CONFIG_TINY_RCU
+ int rtsi;
+
+ rtsi = READ_ONCE(rcu_task_stall_info);
+ pr_info("%s: %s grace period number %lu (since boot) gp_state: %s is %lu jiffies old.\n",
+ __func__, rcu_tasks.kname, rcu_tasks.tasks_gp_seq,
+ tasks_gp_state_getname(&rcu_tasks), jiffies - rcu_tasks.gp_jiffies);
+ pr_info("Please check any exiting tasks stuck between calls to exit_tasks_rcu_start() and exit_tasks_rcu_finish()\n");
+ tasks_rcu_exit_srcu_stall_timer.expires = jiffies + rtsi;
+ add_timer(&tasks_rcu_exit_srcu_stall_timer);
+#endif // #ifndef CONFIG_TINY_RCU
+}
+
/**
* call_rcu_tasks() - Queue an RCU for invocation task-based grace period
* @rhp: structure to be used for queueing the RCU updates.
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 7b95ee98a1a5..9cc0a7766fd2 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -640,6 +640,7 @@ void __rcu_irq_enter_check_tick(void)
}
raw_spin_unlock_rcu_node(rdp->mynode);
}
+NOKPROBE_SYMBOL(__rcu_irq_enter_check_tick);
#endif /* CONFIG_NO_HZ_FULL */
/*
@@ -1955,7 +1956,6 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
{
unsigned long flags;
unsigned long mask;
- bool needwake = false;
bool needacc = false;
struct rcu_node *rnp;
@@ -1987,7 +1987,12 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
* NOCB kthreads have their own way to deal with that...
*/
if (!rcu_rdp_is_offloaded(rdp)) {
- needwake = rcu_accelerate_cbs(rnp, rdp);
+ /*
+ * The current GP has not yet ended, so it
+ * should not be possible for rcu_accelerate_cbs()
+ * to return true. So complain, but don't awaken.
+ */
+ WARN_ON_ONCE(rcu_accelerate_cbs(rnp, rdp));
} else if (!rcu_segcblist_completely_offloaded(&rdp->cblist)) {
/*
* ...but NOCB kthreads may miss or delay callbacks acceleration
@@ -1999,8 +2004,6 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
rcu_disable_urgency_upon_qs(rdp);
rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
/* ^^^ Released rnp->lock */
- if (needwake)
- rcu_gp_kthread_wake();
if (needacc) {
rcu_nocb_lock_irqsave(rdp, flags);
@@ -2043,19 +2046,35 @@ rcu_check_quiescent_state(struct rcu_data *rdp)
rcu_report_qs_rdp(rdp);
}
+/* Return true if callback-invocation time limit exceeded. */
+static bool rcu_do_batch_check_time(long count, long tlimit,
+ bool jlimit_check, unsigned long jlimit)
+{
+ // Invoke local_clock() only once per 32 consecutive callbacks.
+ return unlikely(tlimit) &&
+ (!likely(count & 31) ||
+ (IS_ENABLED(CONFIG_RCU_DOUBLE_CHECK_CB_TIME) &&
+ jlimit_check && time_after(jiffies, jlimit))) &&
+ local_clock() >= tlimit;
+}
+
/*
* Invoke any RCU callbacks that have made it to the end of their grace
* period. Throttle as specified by rdp->blimit.
*/
static void rcu_do_batch(struct rcu_data *rdp)
{
+ long bl;
+ long count = 0;
int div;
bool __maybe_unused empty;
unsigned long flags;
- struct rcu_head *rhp;
+ unsigned long jlimit;
+ bool jlimit_check = false;
+ long pending;
struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
- long bl, count = 0;
- long pending, tlimit = 0;
+ struct rcu_head *rhp;
+ long tlimit = 0;
/* If no callbacks are ready, just return. */
if (!rcu_segcblist_ready_cbs(&rdp->cblist)) {
@@ -2079,11 +2098,15 @@ static void rcu_do_batch(struct rcu_data *rdp)
div = READ_ONCE(rcu_divisor);
div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div;
bl = max(rdp->blimit, pending >> div);
- if (in_serving_softirq() && unlikely(bl > 100)) {
+ if ((in_serving_softirq() || rdp->rcu_cpu_kthread_status == RCU_KTHREAD_RUNNING) &&
+ (IS_ENABLED(CONFIG_RCU_DOUBLE_CHECK_CB_TIME) || unlikely(bl > 100))) {
+ const long npj = NSEC_PER_SEC / HZ;
long rrn = READ_ONCE(rcu_resched_ns);
rrn = rrn < NSEC_PER_MSEC ? NSEC_PER_MSEC : rrn > NSEC_PER_SEC ? NSEC_PER_SEC : rrn;
tlimit = local_clock() + rrn;
+ jlimit = jiffies + (rrn + npj + 1) / npj;
+ jlimit_check = true;
}
trace_rcu_batch_start(rcu_state.name,
rcu_segcblist_n_cbs(&rdp->cblist), bl);
@@ -2123,19 +2146,23 @@ static void rcu_do_batch(struct rcu_data *rdp)
* Make sure we don't spend too much time here and deprive other
* softirq vectors of CPU cycles.
*/
- if (unlikely(tlimit)) {
- /* only call local_clock() every 32 callbacks */
- if (likely((count & 31) || local_clock() < tlimit))
- continue;
- /* Exceeded the time limit, so leave. */
+ if (rcu_do_batch_check_time(count, tlimit, jlimit_check, jlimit))
break;
- }
} else {
+ // In rcuc/rcuoc context, so no worries about
+ // depriving other softirq vectors of CPU cycles.
local_bh_enable();
lockdep_assert_irqs_enabled();
cond_resched_tasks_rcu_qs();
lockdep_assert_irqs_enabled();
local_bh_disable();
+ // But rcuc kthreads can delay quiescent-state
+ // reporting, so check time limits for them.
+ if (rdp->rcu_cpu_kthread_status == RCU_KTHREAD_RUNNING &&
+ rcu_do_batch_check_time(count, tlimit, jlimit_check, jlimit)) {
+ rdp->rcu_cpu_has_work = 1;
+ break;
+ }
}
}
@@ -2751,7 +2778,7 @@ EXPORT_SYMBOL_GPL(call_rcu);
*/
struct kvfree_rcu_bulk_data {
struct list_head list;
- unsigned long gp_snap;
+ struct rcu_gp_oldstate gp_snap;
unsigned long nr_records;
void *records[];
};
@@ -2775,6 +2802,7 @@ struct kvfree_rcu_bulk_data {
struct kfree_rcu_cpu_work {
struct rcu_work rcu_work;
struct rcu_head *head_free;
+ struct rcu_gp_oldstate head_free_gp_snap;
struct list_head bulk_head_free[FREE_N_CHANNELS];
struct kfree_rcu_cpu *krcp;
};
@@ -2915,24 +2943,25 @@ kvfree_rcu_bulk(struct kfree_rcu_cpu *krcp,
unsigned long flags;
int i;
- debug_rcu_bhead_unqueue(bnode);
-
- rcu_lock_acquire(&rcu_callback_map);
- if (idx == 0) { // kmalloc() / kfree().
- trace_rcu_invoke_kfree_bulk_callback(
- rcu_state.name, bnode->nr_records,
- bnode->records);
-
- kfree_bulk(bnode->nr_records, bnode->records);
- } else { // vmalloc() / vfree().
- for (i = 0; i < bnode->nr_records; i++) {
- trace_rcu_invoke_kvfree_callback(
- rcu_state.name, bnode->records[i], 0);
-
- vfree(bnode->records[i]);
+ if (!WARN_ON_ONCE(!poll_state_synchronize_rcu_full(&bnode->gp_snap))) {
+ debug_rcu_bhead_unqueue(bnode);
+ rcu_lock_acquire(&rcu_callback_map);
+ if (idx == 0) { // kmalloc() / kfree().
+ trace_rcu_invoke_kfree_bulk_callback(
+ rcu_state.name, bnode->nr_records,
+ bnode->records);
+
+ kfree_bulk(bnode->nr_records, bnode->records);
+ } else { // vmalloc() / vfree().
+ for (i = 0; i < bnode->nr_records; i++) {
+ trace_rcu_invoke_kvfree_callback(
+ rcu_state.name, bnode->records[i], 0);
+
+ vfree(bnode->records[i]);
+ }
}
+ rcu_lock_release(&rcu_callback_map);
}
- rcu_lock_release(&rcu_callback_map);
raw_spin_lock_irqsave(&krcp->lock, flags);
if (put_cached_bnode(krcp, bnode))
@@ -2979,6 +3008,7 @@ static void kfree_rcu_work(struct work_struct *work)
struct rcu_head *head;
struct kfree_rcu_cpu *krcp;
struct kfree_rcu_cpu_work *krwp;
+ struct rcu_gp_oldstate head_gp_snap;
int i;
krwp = container_of(to_rcu_work(work),
@@ -2993,6 +3023,7 @@ static void kfree_rcu_work(struct work_struct *work)
// Channel 3.
head = krwp->head_free;
krwp->head_free = NULL;
+ head_gp_snap = krwp->head_free_gp_snap;
raw_spin_unlock_irqrestore(&krcp->lock, flags);
// Handle the first two channels.
@@ -3009,7 +3040,8 @@ static void kfree_rcu_work(struct work_struct *work)
* queued on a linked list through their rcu_head structures.
* This list is named "Channel 3".
*/
- kvfree_rcu_list(head);
+ if (head && !WARN_ON_ONCE(!poll_state_synchronize_rcu_full(&head_gp_snap)))
+ kvfree_rcu_list(head);
}
static bool
@@ -3076,7 +3108,7 @@ kvfree_rcu_drain_ready(struct kfree_rcu_cpu *krcp)
INIT_LIST_HEAD(&bulk_ready[i]);
list_for_each_entry_safe_reverse(bnode, n, &krcp->bulk_head[i], list) {
- if (!poll_state_synchronize_rcu(bnode->gp_snap))
+ if (!poll_state_synchronize_rcu_full(&bnode->gp_snap))
break;
atomic_sub(bnode->nr_records, &krcp->bulk_count[i]);
@@ -3141,6 +3173,7 @@ static void kfree_rcu_monitor(struct work_struct *work)
// objects queued on the linked list.
if (!krwp->head_free) {
krwp->head_free = krcp->head;
+ get_state_synchronize_rcu_full(&krwp->head_free_gp_snap);
atomic_set(&krcp->head_count, 0);
WRITE_ONCE(krcp->head, NULL);
}
@@ -3280,7 +3313,7 @@ add_ptr_to_bulk_krc_lock(struct kfree_rcu_cpu **krcp,
// Finally insert and update the GP for this page.
bnode->records[bnode->nr_records++] = ptr;
- bnode->gp_snap = get_state_synchronize_rcu();
+ get_state_synchronize_rcu_full(&bnode->gp_snap);
atomic_inc(&(*krcp)->bulk_count[idx]);
return true;
@@ -4951,9 +4984,8 @@ void __init rcu_init(void)
else
qovld_calc = qovld;
- // Kick-start any polled grace periods that started early.
- if (!(per_cpu_ptr(&rcu_data, cpu)->mynode->exp_seq_poll_rq & 0x1))
- (void)start_poll_synchronize_rcu_expedited();
+ // Kick-start in case any polled grace periods started early.
+ (void)start_poll_synchronize_rcu_expedited();
rcu_test_sync_prims();
}
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 249c2967d9e6..3b7abb58157d 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -594,6 +594,7 @@ static void synchronize_rcu_expedited_wait(void)
struct rcu_data *rdp;
struct rcu_node *rnp;
struct rcu_node *rnp_root = rcu_get_root();
+ unsigned long flags;
trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
jiffies_stall = rcu_exp_jiffies_till_stall_check();
@@ -602,17 +603,17 @@ static void synchronize_rcu_expedited_wait(void)
if (synchronize_rcu_expedited_wait_once(1))
return;
rcu_for_each_leaf_node(rnp) {
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
mask = READ_ONCE(rnp->expmask);
for_each_leaf_node_cpu_mask(rnp, cpu, mask) {
rdp = per_cpu_ptr(&rcu_data, cpu);
if (rdp->rcu_forced_tick_exp)
continue;
rdp->rcu_forced_tick_exp = true;
- preempt_disable();
if (cpu_online(cpu))
tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
- preempt_enable();
}
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
j = READ_ONCE(jiffies_till_first_fqs);
if (synchronize_rcu_expedited_wait_once(j + HZ))
@@ -802,9 +803,11 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
int ndetected = 0;
struct task_struct *t;
- if (!READ_ONCE(rnp->exp_tasks))
- return 0;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ if (!rnp->exp_tasks) {
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ return 0;
+ }
t = list_entry(rnp->exp_tasks->prev,
struct task_struct, rcu_node_entry);
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
@@ -1065,9 +1068,10 @@ unsigned long start_poll_synchronize_rcu_expedited(void)
if (rcu_init_invoked())
raw_spin_lock_irqsave(&rnp->exp_poll_lock, flags);
if (!poll_state_synchronize_rcu(s)) {
- rnp->exp_seq_poll_rq = s;
- if (rcu_init_invoked())
+ if (rcu_init_invoked()) {
+ rnp->exp_seq_poll_rq = s;
queue_work(rcu_gp_wq, &rnp->exp_poll_wq);
+ }
}
if (rcu_init_invoked())
raw_spin_unlock_irqrestore(&rnp->exp_poll_lock, flags);
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 9e1c8caec5ce..f2280616f9d5 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1312,6 +1312,7 @@ int rcu_nocb_cpu_offload(int cpu)
}
EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload);
+#ifdef CONFIG_RCU_LAZY
static unsigned long
lazy_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
@@ -1360,6 +1361,7 @@ static struct shrinker lazy_rcu_shrinker = {
.batch = 0,
.seeks = DEFAULT_SEEKS,
};
+#endif // #ifdef CONFIG_RCU_LAZY
void __init rcu_init_nohz(void)
{
@@ -1391,8 +1393,10 @@ void __init rcu_init_nohz(void)
if (!rcu_state.nocb_is_setup)
return;
+#ifdef CONFIG_RCU_LAZY
if (register_shrinker(&lazy_rcu_shrinker, "rcu-lazy"))
pr_err("Failed to register lazy_rcu shrinker!\n");
+#endif // #ifdef CONFIG_RCU_LAZY
if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index b0e3c9205946..a46506f7ec6d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -281,6 +281,11 @@ static bool check_tick_dependency(atomic_t *dep)
return true;
}
+ if (val & TICK_DEP_MASK_RCU_EXP) {
+ trace_tick_stop(0, TICK_DEP_MASK_RCU_EXP);
+ return true;
+ }
+
return false;
}
@@ -527,7 +532,7 @@ void __init tick_nohz_full_setup(cpumask_var_t cpumask)
tick_nohz_full_running = true;
}
-static int tick_nohz_cpu_down(unsigned int cpu)
+bool tick_nohz_cpu_hotpluggable(unsigned int cpu)
{
/*
* The tick_do_timer_cpu CPU handles housekeeping duty (unbound
@@ -535,8 +540,13 @@ static int tick_nohz_cpu_down(unsigned int cpu)
* CPUs. It must remain online when nohz full is enabled.
*/
if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
- return -EBUSY;
- return 0;
+ return false;
+ return true;
+}
+
+static int tick_nohz_cpu_down(unsigned int cpu)
+{
+ return tick_nohz_cpu_hotpluggable(cpu) ? 0 : -EBUSY;
}
void __init tick_nohz_init(void)
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 4496975f2029..efbbec2caff8 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -159,7 +159,7 @@ static void osnoise_unregister_instance(struct trace_array *tr)
if (!found)
return;
- kvfree_rcu(inst);
+ kvfree_rcu_mightsleep(inst);
}
/*
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 20d0c4a97633..2d2616678295 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -1172,7 +1172,7 @@ int trace_probe_remove_file(struct trace_probe *tp,
return -ENOENT;
list_del_rcu(&link->list);
- kvfree_rcu(link);
+ kvfree_rcu_mightsleep(link);
if (list_empty(&tp->event->files))
trace_probe_clear_flag(tp, TP_FLAG_TRACE);
diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c
index 84c124f097b7..9dd9745d365f 100644
--- a/lib/test_vmalloc.c
+++ b/lib/test_vmalloc.c
@@ -335,7 +335,7 @@ kvfree_rcu_1_arg_vmalloc_test(void)
return -1;
p->array[0] = 'a';
- kvfree_rcu(p);
+ kvfree_rcu_mightsleep(p);
}
return 0;
diff --git a/mm/Kconfig b/mm/Kconfig
index b9da19fdf30d..aa1b3ed29966 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -689,7 +689,6 @@ config BOUNCE
config MMU_NOTIFIER
bool
- select SRCU
select INTERVAL_TREE
config KSM
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 74842b453407..782273bb93c2 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -177,7 +177,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
if (orig_sock_table) {
static_branch_dec(&rps_needed);
static_branch_dec(&rfs_needed);
- kvfree_rcu(orig_sock_table);
+ kvfree_rcu_mightsleep(orig_sock_table);
}
}
}
@@ -215,7 +215,7 @@ static int flow_limit_cpu_sysctl(struct ctl_table *table, int write,
lockdep_is_held(&flow_limit_update_mutex));
if (cur && !cpumask_test_cpu(i, mask)) {
RCU_INIT_POINTER(sd->flow_limit, NULL);
- kfree_rcu(cur);
+ kfree_rcu_mightsleep(cur);
} else if (!cur && cpumask_test_cpu(i, mask)) {
cur = kzalloc_node(len, GFP_KERNEL,
cpu_to_node(i));
diff --git a/net/mac802154/scan.c b/net/mac802154/scan.c
index 9b0933a185eb..5c191bedd72c 100644
--- a/net/mac802154/scan.c
+++ b/net/mac802154/scan.c
@@ -52,7 +52,7 @@ static int mac802154_scan_cleanup_locked(struct ieee802154_local *local,
request = rcu_replace_pointer(local->scan_req, NULL, 1);
if (!request)
return 0;
- kfree_rcu(request);
+ kvfree_rcu_mightsleep(request);
/* Advertize first, while we know the devices cannot be removed */
if (aborted)
@@ -403,7 +403,7 @@ int mac802154_stop_beacons_locked(struct ieee802154_local *local,
request = rcu_replace_pointer(local->beacon_req, NULL, 1);
if (!request)
return 0;
- kfree_rcu(request);
+ kvfree_rcu_mightsleep(request);
nl802154_beaconing_done(wpan_dev);
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 3e6f5a8614d3..a6e27ecec3c6 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -6417,6 +6417,15 @@ sub process {
}
}
+# check for soon-to-be-deprecated single-argument k[v]free_rcu() API
+ if ($line =~ /\bk[v]?free_rcu\s*\([^(]+\)/) {
+ if ($line =~ /\bk[v]?free_rcu\s*\([^,]+\)/) {
+ ERROR("DEPRECATED_API",
+ "Single-argument k[v]free_rcu() API is deprecated, please pass rcu_head object or call k[v]free_rcu_mightsleep()." . $herecurr);
+ }
+ }
+
+
# check for unnecessary "Out of Memory" messages
if ($line =~ /^\+.*\b$logFunctions\s*\(/ &&
$prevline =~ /^[ \+]\s*if\s*\(\s*(\!\s*|NULL\s*==\s*)?($Lval)(\s*==\s*NULL\s*)?\s*\)/ &&
diff --git a/tools/include/nolibc/.gitignore b/tools/include/nolibc/.gitignore
new file mode 100644
index 000000000000..dea22eaaed2b
--- /dev/null
+++ b/tools/include/nolibc/.gitignore
@@ -0,0 +1 @@
+sysroot
diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile
index cfd06764b5ae..9839feafd38a 100644
--- a/tools/include/nolibc/Makefile
+++ b/tools/include/nolibc/Makefile
@@ -25,8 +25,8 @@ endif
nolibc_arch := $(patsubst arm64,aarch64,$(ARCH))
arch_file := arch-$(nolibc_arch).h
-all_files := ctype.h errno.h nolibc.h signal.h std.h stdio.h stdlib.h string.h \
- sys.h time.h types.h unistd.h
+all_files := ctype.h errno.h nolibc.h signal.h stackprotector.h std.h stdint.h \
+ stdio.h stdlib.h string.h sys.h time.h types.h unistd.h
# install all headers needed to support a bare-metal compiler
all: headers
diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
index e8d0cf545bf1..2d98d78fd3f3 100644
--- a/tools/include/nolibc/arch-i386.h
+++ b/tools/include/nolibc/arch-i386.h
@@ -181,6 +181,8 @@ struct sys_stat_struct {
char **environ __attribute__((weak));
const unsigned long *_auxv __attribute__((weak));
+#define __ARCH_SUPPORTS_STACK_PROTECTOR
+
/* startup code */
/*
* i386 System V ABI mandates:
@@ -188,9 +190,12 @@ const unsigned long *_auxv __attribute__((weak));
* 2) The deepest stack frame should be set to zero
*
*/
-void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) _start(void)
+void __attribute__((weak,noreturn,optimize("omit-frame-pointer"),no_stack_protector)) _start(void)
{
__asm__ volatile (
+#ifdef NOLIBC_STACKPROTECTOR
+ "call __stack_chk_init\n" // initialize stack protector
+#endif
"pop %eax\n" // argc (first arg, %eax)
"mov %esp, %ebx\n" // argv[] (second arg, %ebx)
"lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h
new file mode 100644
index 000000000000..029ee3cd6baf
--- /dev/null
+++ b/tools/include/nolibc/arch-loongarch.h
@@ -0,0 +1,200 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * LoongArch specific definitions for NOLIBC
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#ifndef _NOLIBC_ARCH_LOONGARCH_H
+#define _NOLIBC_ARCH_LOONGARCH_H
+
+/* Syscalls for LoongArch :
+ * - stack is 16-byte aligned
+ * - syscall number is passed in a7
+ * - arguments are in a0, a1, a2, a3, a4, a5
+ * - the system call is performed by calling "syscall 0"
+ * - syscall return comes in a0
+ * - the arguments are cast to long and assigned into the target
+ * registers which are then simply passed as registers to the asm code,
+ * so that we don't have to experience issues with register constraints.
+ *
+ * On LoongArch, select() is not implemented so we have to use pselect6().
+ */
+#define __ARCH_WANT_SYS_PSELECT6
+
+#define my_syscall0(num) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0"); \
+ \
+ __asm__ volatile ( \
+ "syscall 0\n" \
+ : "=r"(_arg1) \
+ : "r"(_num) \
+ : "memory", "$t0", "$t1", "$t2", "$t3", \
+ "$t4", "$t5", "$t6", "$t7", "$t8" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ "syscall 0\n" \
+ : "+r"(_arg1) \
+ : "r"(_num) \
+ : "memory", "$t0", "$t1", "$t2", "$t3", \
+ "$t4", "$t5", "$t6", "$t7", "$t8" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ "syscall 0\n" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), \
+ "r"(_num) \
+ : "memory", "$t0", "$t1", "$t2", "$t3", \
+ "$t4", "$t5", "$t6", "$t7", "$t8" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ "syscall 0\n" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), \
+ "r"(_num) \
+ : "memory", "$t0", "$t1", "$t2", "$t3", \
+ "$t4", "$t5", "$t6", "$t7", "$t8" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "syscall 0\n" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "r"(_num) \
+ : "memory", "$t0", "$t1", "$t2", "$t3", \
+ "$t4", "$t5", "$t6", "$t7", "$t8" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3") = (long)(arg4); \
+ register long _arg5 __asm__ ("a4") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "syscall 0\n" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_num) \
+ : "memory", "$t0", "$t1", "$t2", "$t3", \
+ "$t4", "$t5", "$t6", "$t7", "$t8" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3") = (long)(arg4); \
+ register long _arg5 __asm__ ("a4") = (long)(arg5); \
+ register long _arg6 __asm__ ("a5") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ "syscall 0\n" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
+ "r"(_num) \
+ : "memory", "$t0", "$t1", "$t2", "$t3", \
+ "$t4", "$t5", "$t6", "$t7", "$t8" \
+ ); \
+ _arg1; \
+})
+
+char **environ __attribute__((weak));
+const unsigned long *_auxv __attribute__((weak));
+
+#if __loongarch_grlen == 32
+#define LONGLOG "2"
+#define SZREG "4"
+#define REG_L "ld.w"
+#define LONG_S "st.w"
+#define LONG_ADD "add.w"
+#define LONG_ADDI "addi.w"
+#define LONG_SLL "slli.w"
+#define LONG_BSTRINS "bstrins.w"
+#else // __loongarch_grlen == 64
+#define LONGLOG "3"
+#define SZREG "8"
+#define REG_L "ld.d"
+#define LONG_S "st.d"
+#define LONG_ADD "add.d"
+#define LONG_ADDI "addi.d"
+#define LONG_SLL "slli.d"
+#define LONG_BSTRINS "bstrins.d"
+#endif
+
+/* startup code */
+void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) _start(void)
+{
+ __asm__ volatile (
+ REG_L " $a0, $sp, 0\n" // argc (a0) was in the stack
+ LONG_ADDI " $a1, $sp, "SZREG"\n" // argv (a1) = sp + SZREG
+ LONG_SLL " $a2, $a0, "LONGLOG"\n" // envp (a2) = SZREG*argc ...
+ LONG_ADDI " $a2, $a2, "SZREG"\n" // + SZREG (skip null)
+ LONG_ADD " $a2, $a2, $a1\n" // + argv
+
+ "move $a3, $a2\n" // iterate a3 over envp to find auxv (after NULL)
+ "0:\n" // do {
+ REG_L " $a4, $a3, 0\n" // a4 = *a3;
+ LONG_ADDI " $a3, $a3, "SZREG"\n" // a3 += sizeof(void*);
+ "bne $a4, $zero, 0b\n" // } while (a4);
+ "la.pcrel $a4, _auxv\n" // a4 = &_auxv
+ LONG_S " $a3, $a4, 0\n" // store a3 into _auxv
+
+ "la.pcrel $a3, environ\n" // a3 = &environ
+ LONG_S " $a2, $a3, 0\n" // store envp(a2) into environ
+ LONG_BSTRINS " $sp, $zero, 3, 0\n" // sp must be 16-byte aligned
+ "bl main\n" // main() returns the status code, we'll exit with it.
+ "li.w $a7, 93\n" // NR_exit == 93
+ "syscall 0\n"
+ );
+ __builtin_unreachable();
+}
+
+#endif // _NOLIBC_ARCH_LOONGARCH_H
diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
index 17f6751208e7..f7f2a11d4c3b 100644
--- a/tools/include/nolibc/arch-x86_64.h
+++ b/tools/include/nolibc/arch-x86_64.h
@@ -181,6 +181,8 @@ struct sys_stat_struct {
char **environ __attribute__((weak));
const unsigned long *_auxv __attribute__((weak));
+#define __ARCH_SUPPORTS_STACK_PROTECTOR
+
/* startup code */
/*
* x86-64 System V ABI mandates:
@@ -191,6 +193,9 @@ const unsigned long *_auxv __attribute__((weak));
void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) _start(void)
{
__asm__ volatile (
+#ifdef NOLIBC_STACKPROTECTOR
+ "call __stack_chk_init\n" // initialize stack protector
+#endif
"pop %rdi\n" // argc (first arg, %rdi)
"mov %rsp, %rsi\n" // argv[] (second arg, %rsi)
"lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h
index 78b067a4fa47..2d5386a8d6aa 100644
--- a/tools/include/nolibc/arch.h
+++ b/tools/include/nolibc/arch.h
@@ -29,6 +29,8 @@
#include "arch-riscv.h"
#elif defined(__s390x__)
#include "arch-s390.h"
+#elif defined(__loongarch__)
+#include "arch-loongarch.h"
#endif
#endif /* _NOLIBC_ARCH_H */
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index b2bc48d3cfe4..04739a6293c4 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -104,6 +104,7 @@
#include "string.h"
#include "time.h"
#include "unistd.h"
+#include "stackprotector.h"
/* Used by programs to avoid std includes */
#define NOLIBC
diff --git a/tools/include/nolibc/stackprotector.h b/tools/include/nolibc/stackprotector.h
new file mode 100644
index 000000000000..d119cbbbc256
--- /dev/null
+++ b/tools/include/nolibc/stackprotector.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Stack protector support for NOLIBC
+ * Copyright (C) 2023 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+#ifndef _NOLIBC_STACKPROTECTOR_H
+#define _NOLIBC_STACKPROTECTOR_H
+
+#include "arch.h"
+
+#if defined(NOLIBC_STACKPROTECTOR)
+
+#if !defined(__ARCH_SUPPORTS_STACK_PROTECTOR)
+#error "nolibc does not support stack protectors on this arch"
+#endif
+
+#include "sys.h"
+#include "stdlib.h"
+
+/* The functions in this header are using raw syscall macros to avoid
+ * triggering stack protector errors themselves
+ */
+
+__attribute__((weak,noreturn,section(".text.nolibc_stack_chk")))
+void __stack_chk_fail(void)
+{
+ pid_t pid;
+ my_syscall3(__NR_write, STDERR_FILENO, "!!Stack smashing detected!!\n", 28);
+ pid = my_syscall0(__NR_getpid);
+ my_syscall2(__NR_kill, pid, SIGABRT);
+ for (;;);
+}
+
+__attribute__((weak,noreturn,section(".text.nolibc_stack_chk")))
+void __stack_chk_fail_local(void)
+{
+ __stack_chk_fail();
+}
+
+__attribute__((weak,section(".data.nolibc_stack_chk")))
+uintptr_t __stack_chk_guard;
+
+__attribute__((weak,no_stack_protector,section(".text.nolibc_stack_chk")))
+void __stack_chk_init(void)
+{
+ my_syscall3(__NR_getrandom, &__stack_chk_guard, sizeof(__stack_chk_guard), 0);
+ /* a bit more randomness in case getrandom() fails */
+ __stack_chk_guard ^= (uintptr_t) &__stack_chk_guard;
+}
+#endif // defined(NOLIBC_STACKPROTECTOR)
+
+#endif // _NOLIBC_STACKPROTECTOR_H
diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h
index 1747ae125392..933bc0be7e1c 100644
--- a/tools/include/nolibc/std.h
+++ b/tools/include/nolibc/std.h
@@ -18,20 +18,7 @@
#define NULL ((void *)0)
#endif
-/* stdint types */
-typedef unsigned char uint8_t;
-typedef signed char int8_t;
-typedef unsigned short uint16_t;
-typedef signed short int16_t;
-typedef unsigned int uint32_t;
-typedef signed int int32_t;
-typedef unsigned long long uint64_t;
-typedef signed long long int64_t;
-typedef unsigned long size_t;
-typedef signed long ssize_t;
-typedef unsigned long uintptr_t;
-typedef signed long intptr_t;
-typedef signed long ptrdiff_t;
+#include "stdint.h"
/* those are commonly provided by sys/types.h */
typedef unsigned int dev_t;
diff --git a/tools/include/nolibc/stdint.h b/tools/include/nolibc/stdint.h
new file mode 100644
index 000000000000..c1ce4f5e0603
--- /dev/null
+++ b/tools/include/nolibc/stdint.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Standard definitions and types for NOLIBC
+ * Copyright (C) 2023 Vincent Dagonneau <v@vda.io>
+ */
+
+#ifndef _NOLIBC_STDINT_H
+#define _NOLIBC_STDINT_H
+
+typedef unsigned char uint8_t;
+typedef signed char int8_t;
+typedef unsigned short uint16_t;
+typedef signed short int16_t;
+typedef unsigned int uint32_t;
+typedef signed int int32_t;
+typedef unsigned long long uint64_t;
+typedef signed long long int64_t;
+typedef unsigned long size_t;
+typedef signed long ssize_t;
+typedef unsigned long uintptr_t;
+typedef signed long intptr_t;
+typedef signed long ptrdiff_t;
+
+typedef int8_t int_least8_t;
+typedef uint8_t uint_least8_t;
+typedef int16_t int_least16_t;
+typedef uint16_t uint_least16_t;
+typedef int32_t int_least32_t;
+typedef uint32_t uint_least32_t;
+typedef int64_t int_least64_t;
+typedef uint64_t uint_least64_t;
+
+typedef int8_t int_fast8_t;
+typedef uint8_t uint_fast8_t;
+typedef ssize_t int_fast16_t;
+typedef size_t uint_fast16_t;
+typedef ssize_t int_fast32_t;
+typedef size_t uint_fast32_t;
+typedef ssize_t int_fast64_t;
+typedef size_t uint_fast64_t;
+
+typedef int64_t intmax_t;
+typedef uint64_t uintmax_t;
+
+/* limits of integral types */
+
+#define INT8_MIN (-128)
+#define INT16_MIN (-32767-1)
+#define INT32_MIN (-2147483647-1)
+#define INT64_MIN (-9223372036854775807LL-1)
+
+#define INT8_MAX (127)
+#define INT16_MAX (32767)
+#define INT32_MAX (2147483647)
+#define INT64_MAX (9223372036854775807LL)
+
+#define UINT8_MAX (255)
+#define UINT16_MAX (65535)
+#define UINT32_MAX (4294967295U)
+#define UINT64_MAX (18446744073709551615ULL)
+
+#define INT_LEAST8_MIN INT8_MIN
+#define INT_LEAST16_MIN INT16_MIN
+#define INT_LEAST32_MIN INT32_MIN
+#define INT_LEAST64_MIN INT64_MIN
+
+#define INT_LEAST8_MAX INT8_MAX
+#define INT_LEAST16_MAX INT16_MAX
+#define INT_LEAST32_MAX INT32_MAX
+#define INT_LEAST64_MAX INT64_MAX
+
+#define UINT_LEAST8_MAX UINT8_MAX
+#define UINT_LEAST16_MAX UINT16_MAX
+#define UINT_LEAST32_MAX UINT32_MAX
+#define UINT_LEAST64_MAX UINT64_MAX
+
+#define SIZE_MAX ((size_t)(__LONG_MAX__) * 2 + 1)
+#define INTPTR_MIN (-__LONG_MAX__ - 1)
+#define INTPTR_MAX __LONG_MAX__
+#define PTRDIFF_MIN INTPTR_MIN
+#define PTRDIFF_MAX INTPTR_MAX
+#define UINTPTR_MAX SIZE_MAX
+
+#define INT_FAST8_MIN INT8_MIN
+#define INT_FAST16_MIN INTPTR_MIN
+#define INT_FAST32_MIN INTPTR_MIN
+#define INT_FAST64_MIN INTPTR_MIN
+
+#define INT_FAST8_MAX INT8_MAX
+#define INT_FAST16_MAX INTPTR_MAX
+#define INT_FAST32_MAX INTPTR_MAX
+#define INT_FAST64_MAX INTPTR_MAX
+
+#define UINT_FAST8_MAX UINT8_MAX
+#define UINT_FAST16_MAX SIZE_MAX
+#define UINT_FAST32_MAX SIZE_MAX
+#define UINT_FAST64_MAX SIZE_MAX
+
+#endif /* _NOLIBC_STDINT_H */
diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
index 96ac8afc5aee..4add736c07aa 100644
--- a/tools/include/nolibc/stdio.h
+++ b/tools/include/nolibc/stdio.h
@@ -21,17 +21,75 @@
#define EOF (-1)
#endif
-/* just define FILE as a non-empty type */
+/* just define FILE as a non-empty type. The value of the pointer gives
+ * the FD: FILE=~fd for fd>=0 or NULL for fd<0. This way positive FILE
+ * are immediately identified as abnormal entries (i.e. possible copies
+ * of valid pointers to something else).
+ */
typedef struct FILE {
char dummy[1];
} FILE;
-/* We define the 3 common stdio files as constant invalid pointers that
- * are easily recognized.
- */
-static __attribute__((unused)) FILE* const stdin = (FILE*)-3;
-static __attribute__((unused)) FILE* const stdout = (FILE*)-2;
-static __attribute__((unused)) FILE* const stderr = (FILE*)-1;
+static __attribute__((unused)) FILE* const stdin = (FILE*)(intptr_t)~STDIN_FILENO;
+static __attribute__((unused)) FILE* const stdout = (FILE*)(intptr_t)~STDOUT_FILENO;
+static __attribute__((unused)) FILE* const stderr = (FILE*)(intptr_t)~STDERR_FILENO;
+
+/* provides a FILE* equivalent of fd. The mode is ignored. */
+static __attribute__((unused))
+FILE *fdopen(int fd, const char *mode __attribute__((unused)))
+{
+ if (fd < 0) {
+ SET_ERRNO(EBADF);
+ return NULL;
+ }
+ return (FILE*)(intptr_t)~fd;
+}
+
+/* provides the fd of stream. */
+static __attribute__((unused))
+int fileno(FILE *stream)
+{
+ intptr_t i = (intptr_t)stream;
+
+ if (i >= 0) {
+ SET_ERRNO(EBADF);
+ return -1;
+ }
+ return ~i;
+}
+
+/* flush a stream. */
+static __attribute__((unused))
+int fflush(FILE *stream)
+{
+ intptr_t i = (intptr_t)stream;
+
+ /* NULL is valid here. */
+ if (i > 0) {
+ SET_ERRNO(EBADF);
+ return -1;
+ }
+
+ /* Don't do anything, nolibc does not support buffering. */
+ return 0;
+}
+
+/* flush a stream. */
+static __attribute__((unused))
+int fclose(FILE *stream)
+{
+ intptr_t i = (intptr_t)stream;
+
+ if (i >= 0) {
+ SET_ERRNO(EBADF);
+ return -1;
+ }
+
+ if (close(~i))
+ return EOF;
+
+ return 0;
+}
/* getc(), fgetc(), getchar() */
@@ -41,14 +99,8 @@ static __attribute__((unused))
int fgetc(FILE* stream)
{
unsigned char ch;
- int fd;
- if (stream < stdin || stream > stderr)
- return EOF;
-
- fd = 3 + (long)stream;
-
- if (read(fd, &ch, 1) <= 0)
+ if (read(fileno(stream), &ch, 1) <= 0)
return EOF;
return ch;
}
@@ -68,14 +120,8 @@ static __attribute__((unused))
int fputc(int c, FILE* stream)
{
unsigned char ch = c;
- int fd;
-
- if (stream < stdin || stream > stderr)
- return EOF;
-
- fd = 3 + (long)stream;
- if (write(fd, &ch, 1) <= 0)
+ if (write(fileno(stream), &ch, 1) <= 0)
return EOF;
return ch;
}
@@ -96,12 +142,7 @@ static __attribute__((unused))
int _fwrite(const void *buf, size_t size, FILE *stream)
{
ssize_t ret;
- int fd;
-
- if (stream < stdin || stream > stderr)
- return EOF;
-
- fd = 3 + (long)stream;
+ int fd = fileno(stream);
while (size) {
ret = write(fd, buf, size);
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index b5f8cd35c03b..bea9760dbd16 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -11,7 +11,6 @@
#include "std.h"
/* system includes */
-#include <asm/fcntl.h> // for O_*
#include <asm/unistd.h>
#include <asm/signal.h> // for SIGCHLD
#include <asm/ioctls.h>
@@ -20,6 +19,8 @@
#include <linux/loop.h>
#include <linux/time.h>
#include <linux/auxvec.h>
+#include <linux/fcntl.h> // for O_* and AT_*
+#include <linux/stat.h> // for statx()
#include "arch.h"
#include "errno.h"
@@ -411,6 +412,27 @@ int getdents64(int fd, struct linux_dirent64 *dirp, int count)
/*
+ * uid_t geteuid(void);
+ */
+
+static __attribute__((unused))
+uid_t sys_geteuid(void)
+{
+#ifdef __NR_geteuid32
+ return my_syscall0(__NR_geteuid32);
+#else
+ return my_syscall0(__NR_geteuid);
+#endif
+}
+
+static __attribute__((unused))
+uid_t geteuid(void)
+{
+ return sys_geteuid();
+}
+
+
+/*
* pid_t getpgid(pid_t pid);
*/
@@ -545,6 +567,27 @@ int gettimeofday(struct timeval *tv, struct timezone *tz)
/*
+ * uid_t getuid(void);
+ */
+
+static __attribute__((unused))
+uid_t sys_getuid(void)
+{
+#ifdef __NR_getuid32
+ return my_syscall0(__NR_getuid32);
+#else
+ return my_syscall0(__NR_getuid);
+#endif
+}
+
+static __attribute__((unused))
+uid_t getuid(void)
+{
+ return sys_getuid();
+}
+
+
+/*
* int ioctl(int fd, unsigned long req, void *value);
*/
@@ -1048,12 +1091,66 @@ pid_t setsid(void)
return ret;
}
+#if defined(__NR_statx)
+/*
+ * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf);
+ */
+
+static __attribute__((unused))
+int sys_statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf)
+{
+ return my_syscall5(__NR_statx, fd, path, flags, mask, buf);
+}
+
+static __attribute__((unused))
+int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf)
+{
+ int ret = sys_statx(fd, path, flags, mask, buf);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+#endif
/*
* int stat(const char *path, struct stat *buf);
* Warning: the struct stat's layout is arch-dependent.
*/
+#if defined(__NR_statx) && !defined(__NR_newfstatat) && !defined(__NR_stat)
+/*
+ * Maybe we can just use statx() when available for all architectures?
+ */
+static __attribute__((unused))
+int sys_stat(const char *path, struct stat *buf)
+{
+ struct statx statx;
+ long ret;
+
+ ret = sys_statx(AT_FDCWD, path, AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx);
+ buf->st_dev = ((statx.stx_dev_minor & 0xff)
+ | (statx.stx_dev_major << 8)
+ | ((statx.stx_dev_minor & ~0xff) << 12));
+ buf->st_ino = statx.stx_ino;
+ buf->st_mode = statx.stx_mode;
+ buf->st_nlink = statx.stx_nlink;
+ buf->st_uid = statx.stx_uid;
+ buf->st_gid = statx.stx_gid;
+ buf->st_rdev = ((statx.stx_rdev_minor & 0xff)
+ | (statx.stx_rdev_major << 8)
+ | ((statx.stx_rdev_minor & ~0xff) << 12));
+ buf->st_size = statx.stx_size;
+ buf->st_blksize = statx.stx_blksize;
+ buf->st_blocks = statx.stx_blocks;
+ buf->st_atime = statx.stx_atime.tv_sec;
+ buf->st_mtime = statx.stx_mtime.tv_sec;
+ buf->st_ctime = statx.stx_ctime.tv_sec;
+ return ret;
+}
+#else
static __attribute__((unused))
int sys_stat(const char *path, struct stat *buf)
{
@@ -1083,6 +1180,7 @@ int sys_stat(const char *path, struct stat *buf)
buf->st_ctime = stat.st_ctime;
return ret;
}
+#endif
static __attribute__((unused))
int stat(const char *path, struct stat *buf)
@@ -1267,6 +1365,29 @@ ssize_t write(int fd, const void *buf, size_t count)
return ret;
}
+
+/*
+ * int memfd_create(const char *name, unsigned int flags);
+ */
+
+static __attribute__((unused))
+int sys_memfd_create(const char *name, unsigned int flags)
+{
+ return my_syscall2(__NR_memfd_create, name, flags);
+}
+
+static __attribute__((unused))
+int memfd_create(const char *name, unsigned int flags)
+{
+ ssize_t ret = sys_memfd_create(name, flags);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
/* make sure to include all global symbols */
#include "nolibc.h"
diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h
index fbbc0e68c001..aedd7d9e3f64 100644
--- a/tools/include/nolibc/types.h
+++ b/tools/include/nolibc/types.h
@@ -9,6 +9,7 @@
#include "std.h"
#include <linux/time.h>
+#include <linux/stat.h>
/* Only the generic macros and types may be defined here. The arch-specific
@@ -16,7 +17,11 @@
* the layout of sys_stat_struct must not be defined here.
*/
-/* stat flags (WARNING, octal here) */
+/* stat flags (WARNING, octal here). We need to check for an existing
+ * definition because linux/stat.h may omit to define those if it finds
+ * that any glibc header was already included.
+ */
+#if !defined(S_IFMT)
#define S_IFDIR 0040000
#define S_IFCHR 0020000
#define S_IFBLK 0060000
@@ -34,6 +39,22 @@
#define S_ISLNK(mode) (((mode) & S_IFMT) == S_IFLNK)
#define S_ISSOCK(mode) (((mode) & S_IFMT) == S_IFSOCK)
+#define S_IRWXU 00700
+#define S_IRUSR 00400
+#define S_IWUSR 00200
+#define S_IXUSR 00100
+
+#define S_IRWXG 00070
+#define S_IRGRP 00040
+#define S_IWGRP 00020
+#define S_IXGRP 00010
+
+#define S_IRWXO 00007
+#define S_IROTH 00004
+#define S_IWOTH 00002
+#define S_IXOTH 00001
+#endif
+
/* dirent types */
#define DT_UNKNOWN 0x0
#define DT_FIFO 0x1
@@ -60,11 +81,6 @@
#define MAXPATHLEN (PATH_MAX)
#endif
-/* Special FD used by all the *at functions */
-#ifndef AT_FDCWD
-#define AT_FDCWD (-100)
-#endif
-
/* whence values for lseek() */
#define SEEK_SET 0
#define SEEK_CUR 1
@@ -81,6 +97,8 @@
/* Macros used on waitpid()'s return status */
#define WEXITSTATUS(status) (((status) & 0xff00) >> 8)
#define WIFEXITED(status) (((status) & 0x7f) == 0)
+#define WTERMSIG(status) ((status) & 0x7f)
+#define WIFSIGNALED(status) ((status) - 1 < 0xff)
/* waitpid() flags */
#define WNOHANG 1
diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h
index 1cfcd52106a4..ac7d53d986cd 100644
--- a/tools/include/nolibc/unistd.h
+++ b/tools/include/nolibc/unistd.h
@@ -13,6 +13,11 @@
#include "sys.h"
+#define STDIN_FILENO 0
+#define STDOUT_FILENO 1
+#define STDERR_FILENO 2
+
+
static __attribute__((unused))
int msleep(unsigned int msecs)
{
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index 8e7085238470..6dc8b3642458 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -28,9 +28,10 @@ Explanation of the Linux-Kernel Memory Consistency Model
20. THE HAPPENS-BEFORE RELATION: hb
21. THE PROPAGATES-BEFORE RELATION: pb
22. RCU RELATIONS: rcu-link, rcu-gp, rcu-rscsi, rcu-order, rcu-fence, and rb
- 23. LOCKING
- 24. PLAIN ACCESSES AND DATA RACES
- 25. ODDS AND ENDS
+ 23. SRCU READ-SIDE CRITICAL SECTIONS
+ 24. LOCKING
+ 25. PLAIN ACCESSES AND DATA RACES
+ 26. ODDS AND ENDS
@@ -1848,14 +1849,169 @@ section in P0 both starts before P1's grace period does and ends
before it does, and the critical section in P2 both starts after P1's
grace period does and ends after it does.
-Addendum: The LKMM now supports SRCU (Sleepable Read-Copy-Update) in
-addition to normal RCU. The ideas involved are much the same as
-above, with new relations srcu-gp and srcu-rscsi added to represent
-SRCU grace periods and read-side critical sections. There is a
-restriction on the srcu-gp and srcu-rscsi links that can appear in an
-rcu-order sequence (the srcu-rscsi links must be paired with srcu-gp
-links having the same SRCU domain with proper nesting); the details
-are relatively unimportant.
+The LKMM supports SRCU (Sleepable Read-Copy-Update) in addition to
+normal RCU. The ideas involved are much the same as above, with new
+relations srcu-gp and srcu-rscsi added to represent SRCU grace periods
+and read-side critical sections. However, there are some significant
+differences between RCU read-side critical sections and their SRCU
+counterparts, as described in the next section.
+
+
+SRCU READ-SIDE CRITICAL SECTIONS
+--------------------------------
+
+The LKMM uses the srcu-rscsi relation to model SRCU read-side critical
+sections. They differ from RCU read-side critical sections in the
+following respects:
+
+1. Unlike the analogous RCU primitives, synchronize_srcu(),
+ srcu_read_lock(), and srcu_read_unlock() take a pointer to a
+ struct srcu_struct as an argument. This structure is called
+ an SRCU domain, and calls linked by srcu-rscsi must have the
+ same domain. Read-side critical sections and grace periods
+ associated with different domains are independent of one
+ another; the SRCU version of the RCU Guarantee applies only
+ to pairs of critical sections and grace periods having the
+ same domain.
+
+2. srcu_read_lock() returns a value, called the index, which must
+ be passed to the matching srcu_read_unlock() call. Unlike
+ rcu_read_lock() and rcu_read_unlock(), an srcu_read_lock()
+ call does not always have to match the next unpaired
+ srcu_read_unlock(). In fact, it is possible for two SRCU
+ read-side critical sections to overlap partially, as in the
+ following example (where s is an srcu_struct and idx1 and idx2
+ are integer variables):
+
+ idx1 = srcu_read_lock(&s); // Start of first RSCS
+ idx2 = srcu_read_lock(&s); // Start of second RSCS
+ srcu_read_unlock(&s, idx1); // End of first RSCS
+ srcu_read_unlock(&s, idx2); // End of second RSCS
+
+ The matching is determined entirely by the domain pointer and
+ index value. By contrast, if the calls had been
+ rcu_read_lock() and rcu_read_unlock() then they would have
+ created two nested (fully overlapping) read-side critical
+ sections: an inner one and an outer one.
+
+3. The srcu_down_read() and srcu_up_read() primitives work
+ exactly like srcu_read_lock() and srcu_read_unlock(), except
+ that matching calls don't have to execute on the same CPU.
+ (The names are meant to be suggestive of operations on
+ semaphores.) Since the matching is determined by the domain
+ pointer and index value, these primitives make it possible for
+ an SRCU read-side critical section to start on one CPU and end
+ on another, so to speak.
+
+In order to account for these properties of SRCU, the LKMM models
+srcu_read_lock() as a special type of load event (which is
+appropriate, since it takes a memory location as argument and returns
+a value, just as a load does) and srcu_read_unlock() as a special type
+of store event (again appropriate, since it takes as arguments a
+memory location and a value). These loads and stores are annotated as
+belonging to the "srcu-lock" and "srcu-unlock" event classes
+respectively.
+
+This approach allows the LKMM to tell whether two events are
+associated with the same SRCU domain, simply by checking whether they
+access the same memory location (i.e., they are linked by the loc
+relation). It also gives a way to tell which unlock matches a
+particular lock, by checking for the presence of a data dependency
+from the load (srcu-lock) to the store (srcu-unlock). For example,
+given the situation outlined earlier (with statement labels added):
+
+ A: idx1 = srcu_read_lock(&s);
+ B: idx2 = srcu_read_lock(&s);
+ C: srcu_read_unlock(&s, idx1);
+ D: srcu_read_unlock(&s, idx2);
+
+the LKMM will treat A and B as loads from s yielding values saved in
+idx1 and idx2 respectively. Similarly, it will treat C and D as
+though they stored the values from idx1 and idx2 in s. The end result
+is much as if we had written:
+
+ A: idx1 = READ_ONCE(s);
+ B: idx2 = READ_ONCE(s);
+ C: WRITE_ONCE(s, idx1);
+ D: WRITE_ONCE(s, idx2);
+
+except for the presence of the special srcu-lock and srcu-unlock
+annotations. You can see at once that we have A ->data C and
+B ->data D. These dependencies tell the LKMM that C is the
+srcu-unlock event matching srcu-lock event A, and D is the
+srcu-unlock event matching srcu-lock event B.
+
+This approach is admittedly a hack, and it has the potential to lead
+to problems. For example, in:
+
+ idx1 = srcu_read_lock(&s);
+ srcu_read_unlock(&s, idx1);
+ idx2 = srcu_read_lock(&s);
+ srcu_read_unlock(&s, idx2);
+
+the LKMM will believe that idx2 must have the same value as idx1,
+since it reads from the immediately preceding store of idx1 in s.
+Fortunately this won't matter, assuming that litmus tests never do
+anything with SRCU index values other than pass them to
+srcu_read_unlock() or srcu_up_read() calls.
+
+However, sometimes it is necessary to store an index value in a
+shared variable temporarily. In fact, this is the only way for
+srcu_down_read() to pass the index it gets to an srcu_up_read() call
+on a different CPU. In more detail, we might have soething like:
+
+ struct srcu_struct s;
+ int x;
+
+ P0()
+ {
+ int r0;
+
+ A: r0 = srcu_down_read(&s);
+ B: WRITE_ONCE(x, r0);
+ }
+
+ P1()
+ {
+ int r1;
+
+ C: r1 = READ_ONCE(x);
+ D: srcu_up_read(&s, r1);
+ }
+
+Assuming that P1 executes after P0 and does read the index value
+stored in x, we can write this (using brackets to represent event
+annotations) as:
+
+ A[srcu-lock] ->data B[once] ->rf C[once] ->data D[srcu-unlock].
+
+The LKMM defines a carry-srcu-data relation to express this pattern;
+it permits an arbitrarily long sequence of
+
+ data ; rf
+
+pairs (that is, a data link followed by an rf link) to occur between
+an srcu-lock event and the final data dependency leading to the
+matching srcu-unlock event. carry-srcu-data is complicated by the
+need to ensure that none of the intermediate store events in this
+sequence are instances of srcu-unlock. This is necessary because in a
+pattern like the one above:
+
+ A: idx1 = srcu_read_lock(&s);
+ B: srcu_read_unlock(&s, idx1);
+ C: idx2 = srcu_read_lock(&s);
+ D: srcu_read_unlock(&s, idx2);
+
+the LKMM treats B as a store to the variable s and C as a load from
+that variable, creating an undesirable rf link from B to C:
+
+ A ->data B ->rf C ->data D.
+
+This would cause carry-srcu-data to mistakenly extend a data
+dependency from A to D, giving the impression that D was the
+srcu-unlock event matching A's srcu-lock. To avoid such problems,
+carry-srcu-data does not accept sequences in which the ends of any of
+the intermediate ->data links (B above) is an srcu-unlock event.
LOCKING
diff --git a/tools/memory-model/Documentation/litmus-tests.txt b/tools/memory-model/Documentation/litmus-tests.txt
index 26554b1c5575..acac527328a1 100644
--- a/tools/memory-model/Documentation/litmus-tests.txt
+++ b/tools/memory-model/Documentation/litmus-tests.txt
@@ -1028,32 +1028,7 @@ Limitations of the Linux-kernel memory model (LKMM) include:
additional call_rcu() process to the site of the
emulated rcu-barrier().
- e. Although sleepable RCU (SRCU) is now modeled, there
- are some subtle differences between its semantics and
- those in the Linux kernel. For example, the kernel
- might interpret the following sequence as two partially
- overlapping SRCU read-side critical sections:
-
- 1 r1 = srcu_read_lock(&my_srcu);
- 2 do_something_1();
- 3 r2 = srcu_read_lock(&my_srcu);
- 4 do_something_2();
- 5 srcu_read_unlock(&my_srcu, r1);
- 6 do_something_3();
- 7 srcu_read_unlock(&my_srcu, r2);
-
- In contrast, LKMM will interpret this as a nested pair of
- SRCU read-side critical sections, with the outer critical
- section spanning lines 1-7 and the inner critical section
- spanning lines 3-5.
-
- This difference would be more of a concern had anyone
- identified a reasonable use case for partially overlapping
- SRCU read-side critical sections. For more information
- on the trickiness of such overlapping, please see:
- https://paulmck.livejournal.com/40593.html
-
- f. Reader-writer locking is not modeled. It can be
+ e. Reader-writer locking is not modeled. It can be
emulated in litmus tests using atomic read-modify-write
operations.
diff --git a/tools/memory-model/Documentation/locking.txt b/tools/memory-model/Documentation/locking.txt
new file mode 100644
index 000000000000..65c898c64a93
--- /dev/null
+++ b/tools/memory-model/Documentation/locking.txt
@@ -0,0 +1,298 @@
+Locking
+=======
+
+Locking is well-known and the common use cases are straightforward: Any
+CPU holding a given lock sees any changes previously seen or made by any
+CPU before it previously released that same lock. This last sentence
+is the only part of this document that most developers will need to read.
+
+However, developers who would like to also access lock-protected shared
+variables outside of their corresponding locks should continue reading.
+
+
+Locking and Prior Accesses
+--------------------------
+
+The basic rule of locking is worth repeating:
+
+ Any CPU holding a given lock sees any changes previously seen
+ or made by any CPU before it previously released that same lock.
+
+Note that this statement is a bit stronger than "Any CPU holding a
+given lock sees all changes made by any CPU during the time that CPU was
+previously holding this same lock". For example, consider the following
+pair of code fragments:
+
+ /* See MP+polocks.litmus. */
+ void CPU0(void)
+ {
+ WRITE_ONCE(x, 1);
+ spin_lock(&mylock);
+ WRITE_ONCE(y, 1);
+ spin_unlock(&mylock);
+ }
+
+ void CPU1(void)
+ {
+ spin_lock(&mylock);
+ r0 = READ_ONCE(y);
+ spin_unlock(&mylock);
+ r1 = READ_ONCE(x);
+ }
+
+The basic rule guarantees that if CPU0() acquires mylock before CPU1(),
+then both r0 and r1 must be set to the value 1. This also has the
+consequence that if the final value of r0 is equal to 1, then the final
+value of r1 must also be equal to 1. In contrast, the weaker rule would
+say nothing about the final value of r1.
+
+
+Locking and Subsequent Accesses
+-------------------------------
+
+The converse to the basic rule also holds: Any CPU holding a given
+lock will not see any changes that will be made by any CPU after it
+subsequently acquires this same lock. This converse statement is
+illustrated by the following litmus test:
+
+ /* See MP+porevlocks.litmus. */
+ void CPU0(void)
+ {
+ r0 = READ_ONCE(y);
+ spin_lock(&mylock);
+ r1 = READ_ONCE(x);
+ spin_unlock(&mylock);
+ }
+
+ void CPU1(void)
+ {
+ spin_lock(&mylock);
+ WRITE_ONCE(x, 1);
+ spin_unlock(&mylock);
+ WRITE_ONCE(y, 1);
+ }
+
+This converse to the basic rule guarantees that if CPU0() acquires
+mylock before CPU1(), then both r0 and r1 must be set to the value 0.
+This also has the consequence that if the final value of r1 is equal
+to 0, then the final value of r0 must also be equal to 0. In contrast,
+the weaker rule would say nothing about the final value of r0.
+
+These examples show only a single pair of CPUs, but the effects of the
+locking basic rule extend across multiple acquisitions of a given lock
+across multiple CPUs.
+
+
+Double-Checked Locking
+----------------------
+
+It is well known that more than just a lock is required to make
+double-checked locking work correctly, This litmus test illustrates
+one incorrect approach:
+
+ /* See Documentation/litmus-tests/locking/DCL-broken.litmus. */
+ void CPU0(void)
+ {
+ r0 = READ_ONCE(flag);
+ if (r0 == 0) {
+ spin_lock(&lck);
+ r1 = READ_ONCE(flag);
+ if (r1 == 0) {
+ WRITE_ONCE(data, 1);
+ WRITE_ONCE(flag, 1);
+ }
+ spin_unlock(&lck);
+ }
+ r2 = READ_ONCE(data);
+ }
+ /* CPU1() is the exactly the same as CPU0(). */
+
+There are two problems. First, there is no ordering between the first
+READ_ONCE() of "flag" and the READ_ONCE() of "data". Second, there is
+no ordering between the two WRITE_ONCE() calls. It should therefore be
+no surprise that "r2" can be zero, and a quick herd7 run confirms this.
+
+One way to fix this is to use smp_load_acquire() and smp_store_release()
+as shown in this corrected version:
+
+ /* See Documentation/litmus-tests/locking/DCL-fixed.litmus. */
+ void CPU0(void)
+ {
+ r0 = smp_load_acquire(&flag);
+ if (r0 == 0) {
+ spin_lock(&lck);
+ r1 = READ_ONCE(flag);
+ if (r1 == 0) {
+ WRITE_ONCE(data, 1);
+ smp_store_release(&flag, 1);
+ }
+ spin_unlock(&lck);
+ }
+ r2 = READ_ONCE(data);
+ }
+ /* CPU1() is the exactly the same as CPU0(). */
+
+The smp_load_acquire() guarantees that its load from "flags" will
+be ordered before the READ_ONCE() from data, thus solving the first
+problem. The smp_store_release() guarantees that its store will be
+ordered after the WRITE_ONCE() to "data", solving the second problem.
+The smp_store_release() pairs with the smp_load_acquire(), thus ensuring
+that the ordering provided by each actually takes effect. Again, a
+quick herd7 run confirms this.
+
+In short, if you access a lock-protected variable without holding the
+corresponding lock, you will need to provide additional ordering, in
+this case, via the smp_load_acquire() and the smp_store_release().
+
+
+Ordering Provided by a Lock to CPUs Not Holding That Lock
+---------------------------------------------------------
+
+It is not necessarily the case that accesses ordered by locking will be
+seen as ordered by CPUs not holding that lock. Consider this example:
+
+ /* See Z6.0+pooncelock+pooncelock+pombonce.litmus. */
+ void CPU0(void)
+ {
+ spin_lock(&mylock);
+ WRITE_ONCE(x, 1);
+ WRITE_ONCE(y, 1);
+ spin_unlock(&mylock);
+ }
+
+ void CPU1(void)
+ {
+ spin_lock(&mylock);
+ r0 = READ_ONCE(y);
+ WRITE_ONCE(z, 1);
+ spin_unlock(&mylock);
+ }
+
+ void CPU2(void)
+ {
+ WRITE_ONCE(z, 2);
+ smp_mb();
+ r1 = READ_ONCE(x);
+ }
+
+Counter-intuitive though it might be, it is quite possible to have
+the final value of r0 be 1, the final value of z be 2, and the final
+value of r1 be 0. The reason for this surprising outcome is that CPU2()
+never acquired the lock, and thus did not fully benefit from the lock's
+ordering properties.
+
+Ordering can be extended to CPUs not holding the lock by careful use
+of smp_mb__after_spinlock():
+
+ /* See Z6.0+pooncelock+poonceLock+pombonce.litmus. */
+ void CPU0(void)
+ {
+ spin_lock(&mylock);
+ WRITE_ONCE(x, 1);
+ WRITE_ONCE(y, 1);
+ spin_unlock(&mylock);
+ }
+
+ void CPU1(void)
+ {
+ spin_lock(&mylock);
+ smp_mb__after_spinlock();
+ r0 = READ_ONCE(y);
+ WRITE_ONCE(z, 1);
+ spin_unlock(&mylock);
+ }
+
+ void CPU2(void)
+ {
+ WRITE_ONCE(z, 2);
+ smp_mb();
+ r1 = READ_ONCE(x);
+ }
+
+This addition of smp_mb__after_spinlock() strengthens the lock
+acquisition sufficiently to rule out the counter-intuitive outcome.
+In other words, the addition of the smp_mb__after_spinlock() prohibits
+the counter-intuitive result where the final value of r0 is 1, the final
+value of z is 2, and the final value of r1 is 0.
+
+
+No Roach-Motel Locking!
+-----------------------
+
+This example requires familiarity with the herd7 "filter" clause, so
+please read up on that topic in litmus-tests.txt.
+
+It is tempting to allow memory-reference instructions to be pulled
+into a critical section, but this cannot be allowed in the general case.
+For example, consider a spin loop preceding a lock-based critical section.
+Now, herd7 does not model spin loops, but we can emulate one with two
+loads, with a "filter" clause to constrain the first to return the
+initial value and the second to return the updated value, as shown below:
+
+ /* See Documentation/litmus-tests/locking/RM-fixed.litmus. */
+ void CPU0(void)
+ {
+ spin_lock(&lck);
+ r2 = atomic_inc_return(&y);
+ WRITE_ONCE(x, 1);
+ spin_unlock(&lck);
+ }
+
+ void CPU1(void)
+ {
+ r0 = READ_ONCE(x);
+ r1 = READ_ONCE(x);
+ spin_lock(&lck);
+ r2 = atomic_inc_return(&y);
+ spin_unlock(&lck);
+ }
+
+ filter (1:r0=0 /\ 1:r1=1)
+ exists (1:r2=1)
+
+The variable "x" is the control variable for the emulated spin loop.
+CPU0() sets it to "1" while holding the lock, and CPU1() emulates the
+spin loop by reading it twice, first into "1:r0" (which should get the
+initial value "0") and then into "1:r1" (which should get the updated
+value "1").
+
+The "filter" clause takes this into account, constraining "1:r0" to
+equal "0" and "1:r1" to equal 1.
+
+Then the "exists" clause checks to see if CPU1() acquired its lock first,
+which should not happen given the filter clause because CPU0() updates
+"x" while holding the lock. And herd7 confirms this.
+
+But suppose that the compiler was permitted to reorder the spin loop
+into CPU1()'s critical section, like this:
+
+ /* See Documentation/litmus-tests/locking/RM-broken.litmus. */
+ void CPU0(void)
+ {
+ int r2;
+
+ spin_lock(&lck);
+ r2 = atomic_inc_return(&y);
+ WRITE_ONCE(x, 1);
+ spin_unlock(&lck);
+ }
+
+ void CPU1(void)
+ {
+ spin_lock(&lck);
+ r0 = READ_ONCE(x);
+ r1 = READ_ONCE(x);
+ r2 = atomic_inc_return(&y);
+ spin_unlock(&lck);
+ }
+
+ filter (1:r0=0 /\ 1:r1=1)
+ exists (1:r2=1)
+
+If "1:r0" is equal to "0", "1:r1" can never equal "1" because CPU0()
+cannot update "x" while CPU1() holds the lock. And herd7 confirms this,
+showing zero executions matching the "filter" criteria.
+
+And this is why Linux-kernel lock and unlock primitives must prevent
+code from entering critical sections. It is not sufficient to only
+prevent code from leaving them.
diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
index 70a9073dec3e..ce068700939c 100644
--- a/tools/memory-model/linux-kernel.bell
+++ b/tools/memory-model/linux-kernel.bell
@@ -31,7 +31,8 @@ enum Barriers = 'wmb (*smp_wmb*) ||
'before-atomic (*smp_mb__before_atomic*) ||
'after-atomic (*smp_mb__after_atomic*) ||
'after-spinlock (*smp_mb__after_spinlock*) ||
- 'after-unlock-lock (*smp_mb__after_unlock_lock*)
+ 'after-unlock-lock (*smp_mb__after_unlock_lock*) ||
+ 'after-srcu-read-unlock (*smp_mb__after_srcu_read_unlock*)
instructions F[Barriers]
(* SRCU *)
@@ -53,38 +54,31 @@ let rcu-rscs = let rec
in matched
(* Validate nesting *)
-flag ~empty Rcu-lock \ domain(rcu-rscs) as unbalanced-rcu-locking
-flag ~empty Rcu-unlock \ range(rcu-rscs) as unbalanced-rcu-locking
+flag ~empty Rcu-lock \ domain(rcu-rscs) as unmatched-rcu-lock
+flag ~empty Rcu-unlock \ range(rcu-rscs) as unmatched-rcu-unlock
(* Compute matching pairs of nested Srcu-lock and Srcu-unlock *)
-let srcu-rscs = let rec
- unmatched-locks = Srcu-lock \ domain(matched)
- and unmatched-unlocks = Srcu-unlock \ range(matched)
- and unmatched = unmatched-locks | unmatched-unlocks
- and unmatched-po = ([unmatched] ; po ; [unmatched]) & loc
- and unmatched-locks-to-unlocks =
- ([unmatched-locks] ; po ; [unmatched-unlocks]) & loc
- and matched = matched | (unmatched-locks-to-unlocks \
- (unmatched-po ; unmatched-po))
- in matched
+let carry-srcu-data = (data ; [~ Srcu-unlock] ; rf)*
+let srcu-rscs = ([Srcu-lock] ; carry-srcu-data ; data ; [Srcu-unlock]) & loc
(* Validate nesting *)
-flag ~empty Srcu-lock \ domain(srcu-rscs) as unbalanced-srcu-locking
-flag ~empty Srcu-unlock \ range(srcu-rscs) as unbalanced-srcu-locking
+flag ~empty Srcu-lock \ domain(srcu-rscs) as unmatched-srcu-lock
+flag ~empty Srcu-unlock \ range(srcu-rscs) as unmatched-srcu-unlock
+flag ~empty (srcu-rscs^-1 ; srcu-rscs) \ id as multiple-srcu-matches
(* Check for use of synchronize_srcu() inside an RCU critical section *)
flag ~empty rcu-rscs & (po ; [Sync-srcu] ; po) as invalid-sleep
(* Validate SRCU dynamic match *)
-flag ~empty different-values(srcu-rscs) as srcu-bad-nesting
+flag ~empty different-values(srcu-rscs) as srcu-bad-value-match
(* Compute marked and plain memory accesses *)
let Marked = (~M) | IW | Once | Release | Acquire | domain(rmw) | range(rmw) |
- LKR | LKW | UL | LF | RL | RU
+ LKR | LKW | UL | LF | RL | RU | Srcu-lock | Srcu-unlock
let Plain = M \ Marked
(* Redefine dependencies to include those carried through plain accesses *)
-let carry-dep = (data ; rfi)*
+let carry-dep = (data ; [~ Srcu-unlock] ; rfi)*
let addr = carry-dep ; addr
let ctrl = carry-dep ; ctrl
let data = carry-dep ; data
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index 07f884f9b2bf..adf3c4f41229 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -37,8 +37,20 @@ let mb = ([M] ; fencerel(Mb) ; [M]) |
([M] ; fencerel(Before-atomic) ; [RMW] ; po? ; [M]) |
([M] ; po? ; [RMW] ; fencerel(After-atomic) ; [M]) |
([M] ; po? ; [LKW] ; fencerel(After-spinlock) ; [M]) |
- ([M] ; po ; [UL] ; (co | po) ; [LKW] ;
- fencerel(After-unlock-lock) ; [M])
+(*
+ * Note: The po-unlock-lock-po relation only passes the lock to the direct
+ * successor, perhaps giving the impression that the ordering of the
+ * smp_mb__after_unlock_lock() fence only affects a single lock handover.
+ * However, in a longer sequence of lock handovers, the implicit
+ * A-cumulative release fences of lock-release ensure that any stores that
+ * propagate to one of the involved CPUs before it hands over the lock to
+ * the next CPU will also propagate to the final CPU handing over the lock
+ * to the CPU that executes the fence. Therefore, all those stores are
+ * also affected by the fence.
+ *)
+ ([M] ; po-unlock-lock-po ;
+ [After-unlock-lock] ; po ; [M]) |
+ ([M] ; po? ; [Srcu-unlock] ; fencerel(After-srcu-read-unlock) ; [M])
let gp = po ; [Sync-rcu | Sync-srcu] ; po?
let strong-fence = mb | gp
@@ -69,8 +81,8 @@ let dep = addr | data
let rwdep = (dep | ctrl) ; [W]
let overwrite = co | fr
let to-w = rwdep | (overwrite & int) | (addr ; [Plain] ; wmb)
-let to-r = addr | (dep ; [Marked] ; rfi)
-let ppo = to-r | to-w | fence | (po-unlock-lock-po & int)
+let to-r = (addr ; [R]) | (dep ; [Marked] ; rfi)
+let ppo = to-r | to-w | (fence & int) | (po-unlock-lock-po & int)
(* Propagation: Ordering from release operations and strong fences. *)
let A-cumul(r) = (rfe ; [Marked])? ; r
diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def
index ef0f3c1850de..88a39601f525 100644
--- a/tools/memory-model/linux-kernel.def
+++ b/tools/memory-model/linux-kernel.def
@@ -24,6 +24,7 @@ smp_mb__before_atomic() { __fence{before-atomic}; }
smp_mb__after_atomic() { __fence{after-atomic}; }
smp_mb__after_spinlock() { __fence{after-spinlock}; }
smp_mb__after_unlock_lock() { __fence{after-unlock-lock}; }
+smp_mb__after_srcu_read_unlock() { __fence{after-srcu-read-unlock}; }
barrier() { __fence{barrier}; }
// Exchange
@@ -49,8 +50,10 @@ synchronize_rcu() { __fence{sync-rcu}; }
synchronize_rcu_expedited() { __fence{sync-rcu}; }
// SRCU
-srcu_read_lock(X) __srcu{srcu-lock}(X)
-srcu_read_unlock(X,Y) { __srcu{srcu-unlock}(X,Y); }
+srcu_read_lock(X) __load{srcu-lock}(*X)
+srcu_read_unlock(X,Y) { __store{srcu-unlock}(*X,Y); }
+srcu_down_read(X) __load{srcu-lock}(*X)
+srcu_up_read(X,Y) { __store{srcu-unlock}(*X,Y); }
synchronize_srcu(X) { __srcu{sync-srcu}(X); }
synchronize_srcu_expedited(X) { __srcu{sync-srcu}(X); }
diff --git a/tools/memory-model/litmus-tests/.gitignore b/tools/memory-model/litmus-tests/.gitignore
index c492a1ddad91..19c379cf069d 100644
--- a/tools/memory-model/litmus-tests/.gitignore
+++ b/tools/memory-model/litmus-tests/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-*.litmus.out
+*.litmus.*
diff --git a/tools/memory-model/lock.cat b/tools/memory-model/lock.cat
index 6b52f365d73a..53b5a492739d 100644
--- a/tools/memory-model/lock.cat
+++ b/tools/memory-model/lock.cat
@@ -36,9 +36,9 @@ let RU = try RU with emptyset
(* Treat RL as a kind of LF: a read with no ordering properties *)
let LF = LF | RL
-(* There should be no ordinary R or W accesses to spinlocks *)
-let ALL-LOCKS = LKR | LKW | UL | LF | RU
-flag ~empty [M \ IW] ; loc ; [ALL-LOCKS] as mixed-lock-accesses
+(* There should be no ordinary R or W accesses to spinlocks or SRCU structs *)
+let ALL-LOCKS = LKR | LKW | UL | LF | RU | Srcu-lock | Srcu-unlock | Sync-srcu
+flag ~empty [M \ IW \ ALL-LOCKS] ; loc ; [ALL-LOCKS] as mixed-lock-accesses
(* Link Lock-Reads to their RMW-partner Lock-Writes *)
let lk-rmw = ([LKR] ; po-loc ; [LKW]) \ (po ; po)
diff --git a/tools/memory-model/scripts/README b/tools/memory-model/scripts/README
index 095c7eb36f9f..fb39bd0fd1b9 100644
--- a/tools/memory-model/scripts/README
+++ b/tools/memory-model/scripts/README
@@ -27,6 +27,14 @@ checklitmushist.sh
checklitmus.sh
Check a single litmus test against its "Result:" expected result.
+ Not intended to for manual use.
+
+checktheselitmus.sh
+
+ Check the specified list of litmus tests against their "Result:"
+ expected results. This takes optional parseargs.sh arguments,
+ followed by "--" followed by pathnames starting from the current
+ directory.
cmplitmushist.sh
@@ -43,10 +51,10 @@ initlitmushist.sh
judgelitmus.sh
- Given a .litmus file and its .litmus.out herd7 output, check the
- .litmus.out file against the .litmus file's "Result:" comment to
- judge whether the test ran correctly. Not normally run manually,
- provided instead for use by other scripts.
+ Given a .litmus file and its herd7 output, check the output file
+ against the .litmus file's "Result:" comment to judge whether
+ the test ran correctly. Not normally run manually, provided
+ instead for use by other scripts.
newlitmushist.sh
@@ -68,3 +76,35 @@ runlitmushist.sh
README
This file
+
+Testing a change to LKMM might go as follows:
+
+ # Populate expected results without that change, and
+ # runs for about an hour on an 8-CPU x86 system:
+ scripts/initlitmushist.sh --timeout 10m --procs 10
+ # Incorporate the change:
+ git am -s -3 /path/to/patch # Or whatever it takes.
+
+ # Test the new version of LKMM as follows...
+
+ # Runs in seconds, good smoke test:
+ scripts/checkalllitmus.sh
+
+ # Compares results to those produced by initlitmushist.sh,
+ # and runs for about an hour on an 8-CPU x86 system:
+ scripts/checklitmushist.sh --timeout 10m --procs 10
+
+ # Checks results against Result tags, runs in minutes:
+ scripts/checkghlitmus.sh --timeout 10m --procs 10
+
+The checkghlitmus.sh should not report errors in cases where the
+checklitmushist.sh script did not also report a change. However,
+this check is nevertheless valuable because it can find errors in the
+original version of LKMM. Note however, that given the above procedure,
+an error in the original LKMM version that is fixed by the patch will
+be reported both as a mismatch by checklitmushist.sh and as an error
+by checkghlitmus.sh. One exception to this rule of thumb is when the
+test fails completely on the original version of LKMM and passes on the
+new version. In this case, checklitmushist.sh will report a mismatch
+and checkghlitmus.sh will report success. This happens when the change
+to LKMM introduces a new primitive for which litmus tests already existed.
diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh
index 3c0c7fbbd223..2d3ee850a839 100755
--- a/tools/memory-model/scripts/checkalllitmus.sh
+++ b/tools/memory-model/scripts/checkalllitmus.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0+
#
# Run herd7 tests on all .litmus files in the litmus-tests directory
@@ -8,6 +8,11 @@
# "^^^". It also outputs verification results to a file whose name is
# that of the specified litmus test, but with ".out" appended.
#
+# If the --hw argument is specified, this script translates the .litmus
+# C-language file to the specified type of assembly and verifies that.
+# But in this case, litmus tests using complex synchronization (such as
+# locking, RCU, and SRCU) are cheerfully ignored.
+#
# Usage:
# checkalllitmus.sh
#
@@ -17,7 +22,7 @@
#
# Copyright IBM Corporation, 2018
#
-# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
. scripts/parseargs.sh
@@ -30,29 +35,23 @@ else
exit 255
fi
-# Create any new directories that have appeared in the github litmus
-# repo since the last run.
+# Create any new directories that have appeared in the litmus-tests
+# directory since the last run.
if test "$LKMM_DESTDIR" != "."
then
find $litmusdir -type d -print |
( cd "$LKMM_DESTDIR"; sed -e 's/^/mkdir -p /' | sh )
fi
-# Find the checklitmus script. If it is not where we expect it, then
-# assume that the caller has the PATH environment variable set
-# appropriately.
-if test -x scripts/checklitmus.sh
-then
- clscript=scripts/checklitmus.sh
-else
- clscript=checklitmus.sh
-fi
-
# Run the script on all the litmus tests in the specified directory
ret=0
for i in $litmusdir/*.litmus
do
- if ! $clscript $i
+ if test -n "$LKMM_HW_MAP_FILE" && ! scripts/simpletest.sh $i
+ then
+ continue
+ fi
+ if ! scripts/checklitmus.sh $i
then
ret=1
fi
diff --git a/tools/memory-model/scripts/checkghlitmus.sh b/tools/memory-model/scripts/checkghlitmus.sh
index 6589fbb6f653..d3dfb321259f 100755
--- a/tools/memory-model/scripts/checkghlitmus.sh
+++ b/tools/memory-model/scripts/checkghlitmus.sh
@@ -10,6 +10,7 @@
# parseargs.sh scripts for arguments.
. scripts/parseargs.sh
+. scripts/hwfnseg.sh
T=/tmp/checkghlitmus.sh.$$
trap 'rm -rf $T' 0
@@ -32,19 +33,19 @@ then
( cd "$LKMM_DESTDIR"; sed -e 's/^/mkdir -p /' | sh )
fi
-# Create a list of the C-language litmus tests previously run.
-( cd $LKMM_DESTDIR; find litmus -name '*.litmus.out' -print ) |
- sed -e 's/\.out$//' |
- xargs -r egrep -l '^ \* Result: (Never|Sometimes|Always|DEADLOCK)' |
+# Create a list of the specified litmus tests previously run.
+( cd $LKMM_DESTDIR; find litmus -name "*.litmus${hwfnseg}.out" -print ) |
+ sed -e "s/${hwfnseg}"'\.out$//' |
+ xargs -r grep -E -l '^ \* Result: (Never|Sometimes|Always|DEADLOCK)' |
xargs -r grep -L "^P${LKMM_PROCS}"> $T/list-C-already
# Create a list of C-language litmus tests with "Result:" commands and
# no more than the specified number of processes.
-find litmus -name '*.litmus' -exec grep -l -m 1 "^C " {} \; > $T/list-C
-xargs < $T/list-C -r egrep -l '^ \* Result: (Never|Sometimes|Always|DEADLOCK)' > $T/list-C-result
+find litmus -name '*.litmus' -print | mselect7 -arch C > $T/list-C
+xargs < $T/list-C -r grep -E -l '^ \* Result: (Never|Sometimes|Always|DEADLOCK)' > $T/list-C-result
xargs < $T/list-C-result -r grep -L "^P${LKMM_PROCS}" > $T/list-C-result-short
-# Form list of tests without corresponding .litmus.out files
+# Form list of tests without corresponding .out files
sort $T/list-C-already $T/list-C-result-short | uniq -u > $T/list-C-needed
# Run any needed tests.
diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh
index 11461ed40b5e..4c1d0cf0ddad 100755
--- a/tools/memory-model/scripts/checklitmus.sh
+++ b/tools/memory-model/scripts/checklitmus.sh
@@ -1,10 +1,8 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0+
#
-# Run a herd7 test and invokes judgelitmus.sh to check the result against
-# a "Result:" comment within the litmus test. It also outputs verification
-# results to a file whose name is that of the specified litmus test, but
-# with ".out" appended.
+# Invokes runlitmus.sh and judgelitmus.sh on its arguments to run the
+# specified litmus test and pass judgment on the results.
#
# Usage:
# checklitmus.sh file.litmus
@@ -15,20 +13,7 @@
#
# Copyright IBM Corporation, 2018
#
-# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
-litmus=$1
-herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg}
-
-if test -f "$litmus" -a -r "$litmus"
-then
- :
-else
- echo ' --- ' error: \"$litmus\" is not a readable file
- exit 255
-fi
-
-echo Herd options: $herdoptions > $LKMM_DESTDIR/$litmus.out
-/usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1
-
-scripts/judgelitmus.sh $litmus
+scripts/runlitmus.sh $1
+scripts/judgelitmus.sh $1
diff --git a/tools/memory-model/scripts/checklitmushist.sh b/tools/memory-model/scripts/checklitmushist.sh
index 1d210ffb7c8a..406ecfc0aee4 100755
--- a/tools/memory-model/scripts/checklitmushist.sh
+++ b/tools/memory-model/scripts/checklitmushist.sh
@@ -12,7 +12,7 @@
#
# Copyright IBM Corporation, 2018
#
-# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
. scripts/parseargs.sh
diff --git a/tools/memory-model/scripts/checktheselitmus.sh b/tools/memory-model/scripts/checktheselitmus.sh
new file mode 100755
index 000000000000..10eeb5ecea6d
--- /dev/null
+++ b/tools/memory-model/scripts/checktheselitmus.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Invokes checklitmus.sh on its arguments to run the specified litmus
+# test and pass judgment on the results.
+#
+# Usage:
+# checktheselitmus.sh -- [ file1.litmus [ file2.litmus ... ] ]
+#
+# Run this in the directory containing the memory model, specifying the
+# pathname of the litmus test to check. The usual parseargs.sh arguments
+# can be specified prior to the "--".
+#
+# This script is intended for use with pathnames that start from the
+# tools/memory-model directory. If some of the pathnames instead start at
+# the root directory, they all must do so and the "--destdir /" parseargs.sh
+# argument must be specified prior to the "--". Alternatively, some other
+# "--destdir" argument can be supplied as long as the needed subdirectories
+# are populated.
+#
+# Copyright IBM Corporation, 2018
+#
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
+
+. scripts/parseargs.sh
+
+ret=0
+for i in "$@"
+do
+ if scripts/checklitmus.sh $i
+ then
+ :
+ else
+ ret=1
+ fi
+done
+if test "$ret" -ne 0
+then
+ echo " ^^^ VERIFICATION MISMATCHES" 1>&2
+else
+ echo All litmus tests verified as was expected. 1>&2
+fi
+exit $ret
diff --git a/tools/memory-model/scripts/cmplitmushist.sh b/tools/memory-model/scripts/cmplitmushist.sh
index 0f498aeeccf5..ca1ac8b64614 100755
--- a/tools/memory-model/scripts/cmplitmushist.sh
+++ b/tools/memory-model/scripts/cmplitmushist.sh
@@ -12,12 +12,49 @@ trap 'rm -rf $T' 0
mkdir $T
# comparetest oldpath newpath
+badmacnam=0
+timedout=0
perfect=0
obsline=0
noobsline=0
obsresult=0
badcompare=0
comparetest () {
+ if grep -q ': Unknown macro ' $1 || grep -q ': Unknown macro ' $2
+ then
+ if grep -q ': Unknown macro ' $1
+ then
+ badname=`grep ': Unknown macro ' $1 |
+ sed -e 's/^.*: Unknown macro //' |
+ sed -e 's/ (User error).*$//'`
+ echo 'Current LKMM version does not know "'$badname'"' $1
+ fi
+ if grep -q ': Unknown macro ' $2
+ then
+ badname=`grep ': Unknown macro ' $2 |
+ sed -e 's/^.*: Unknown macro //' |
+ sed -e 's/ (User error).*$//'`
+ echo 'Current LKMM version does not know "'$badname'"' $2
+ fi
+ badmacnam=`expr "$badmacnam" + 1`
+ return 0
+ elif grep -q '^Command exited with non-zero status 124' $1 ||
+ grep -q '^Command exited with non-zero status 124' $2
+ then
+ if grep -q '^Command exited with non-zero status 124' $1 &&
+ grep -q '^Command exited with non-zero status 124' $2
+ then
+ echo Both runs timed out: $2
+ elif grep -q '^Command exited with non-zero status 124' $1
+ then
+ echo Old run timed out: $2
+ elif grep -q '^Command exited with non-zero status 124' $2
+ then
+ echo New run timed out: $2
+ fi
+ timedout=`expr "$timedout" + 1`
+ return 0
+ fi
grep -v 'maxresident)k\|minor)pagefaults\|^Time' $1 > $T/oldout
grep -v 'maxresident)k\|minor)pagefaults\|^Time' $2 > $T/newout
if cmp -s $T/oldout $T/newout && grep -q '^Observation' $1
@@ -38,7 +75,7 @@ comparetest () {
return 0
fi
else
- echo Missing Observation line "(e.g., herd7 timeout)": $2
+ echo Missing Observation line "(e.g., syntax error)": $2
noobsline=`expr "$noobsline" + 1`
return 0
fi
@@ -72,12 +109,20 @@ then
fi
if test "$noobsline" -ne 0
then
- echo Missing Observation line "(e.g., herd7 timeout)": $noobsline 1>&2
+ echo Missing Observation line "(e.g., syntax error)": $noobsline 1>&2
fi
if test "$obsresult" -ne 0
then
echo Matching Observation Always/Sometimes/Never result: $obsresult 1>&2
fi
+if test "$timedout" -ne 0
+then
+ echo "!!!" Timed out: $timedout 1>&2
+fi
+if test "$badmacnam" -ne 0
+then
+ echo "!!!" Unknown primitive: $badmacnam 1>&2
+fi
if test "$badcompare" -ne 0
then
echo "!!!" Result changed: $badcompare 1>&2
diff --git a/tools/memory-model/scripts/hwfnseg.sh b/tools/memory-model/scripts/hwfnseg.sh
new file mode 100755
index 000000000000..580c3281181c
--- /dev/null
+++ b/tools/memory-model/scripts/hwfnseg.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Generate the hardware extension to the litmus-test filename, or the
+# empty string if this is an LKMM run. The extension is placed in
+# the shell variable hwfnseg.
+#
+# Usage:
+# . hwfnseg.sh
+#
+# Copyright IBM Corporation, 2019
+#
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
+
+if test -z "$LKMM_HW_MAP_FILE"
+then
+ hwfnseg=
+else
+ hwfnseg=".$LKMM_HW_MAP_FILE"
+fi
diff --git a/tools/memory-model/scripts/initlitmushist.sh b/tools/memory-model/scripts/initlitmushist.sh
index 956b6957484d..31ea782955d3 100755
--- a/tools/memory-model/scripts/initlitmushist.sh
+++ b/tools/memory-model/scripts/initlitmushist.sh
@@ -60,7 +60,7 @@ fi
# Create a list of the C-language litmus tests with no more than the
# specified number of processes (per the --procs argument).
-find litmus -name '*.litmus' -exec grep -l -m 1 "^C " {} \; > $T/list-C
+find litmus -name '*.litmus' -print | mselect7 -arch C > $T/list-C
xargs < $T/list-C -r grep -L "^P${LKMM_PROCS}" > $T/list-C-short
scripts/runlitmushist.sh < $T/list-C-short
diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh
index 0cc63875e395..1ec5d89fcfbb 100755
--- a/tools/memory-model/scripts/judgelitmus.sh
+++ b/tools/memory-model/scripts/judgelitmus.sh
@@ -1,9 +1,22 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0+
#
-# Given a .litmus test and the corresponding .litmus.out file, check
-# the .litmus.out file against the "Result:" comment to judge whether
-# the test ran correctly.
+# Given a .litmus test and the corresponding litmus output file, check
+# the .litmus.out file against the "Result:" comment to judge whether the
+# test ran correctly. If the --hw argument is omitted, check against the
+# LKMM output, which is assumed to be in file.litmus.out. If either a
+# "DATARACE" marker in the "Result:" comment or a "Flag data-race" marker
+# in the LKMM output is present, the other must also be as well, at least
+# for litmus tests having a "Result:" comment. In this case, a failure of
+# the Always/Sometimes/Never portion of the "Result:" prediction will be
+# noted, but forgiven.
+#
+# If the --hw argument is provided, this is assumed to be a hardware
+# test, and the output is assumed to be in file.litmus.HW.out, where
+# "HW" is the --hw argument. In addition, non-Sometimes verification
+# results will be noted, but forgiven. Furthermore, if there is no
+# "Result:" comment but there is an LKMM .litmus.out file, the observation
+# in that file will be used to judge the assembly-language verification.
#
# Usage:
# judgelitmus.sh file.litmus
@@ -13,7 +26,7 @@
#
# Copyright IBM Corporation, 2018
#
-# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
litmus=$1
@@ -24,55 +37,120 @@ else
echo ' --- ' error: \"$litmus\" is not a readable file
exit 255
fi
-if test -f "$LKMM_DESTDIR/$litmus".out -a -r "$LKMM_DESTDIR/$litmus".out
+if test -z "$LKMM_HW_MAP_FILE"
+then
+ litmusout=$litmus.out
+ lkmmout=
+else
+ litmusout="`echo $litmus |
+ sed -e 's/\.litmus$/.litmus.'${LKMM_HW_MAP_FILE}'/'`.out"
+ lkmmout=$litmus.out
+fi
+if test -f "$LKMM_DESTDIR/$litmusout" -a -r "$LKMM_DESTDIR/$litmusout"
then
:
else
- echo ' --- ' error: \"$LKMM_DESTDIR/$litmus\".out is not a readable file
+ echo ' --- ' error: \"$LKMM_DESTDIR/$litmusout is not a readable file
exit 255
fi
-if grep -q '^ \* Result: ' $litmus
+if grep -q '^Flag data-race$' "$LKMM_DESTDIR/$litmusout"
+then
+ datarace_modeled=1
+fi
+if grep -q '^[( ]\* Result: ' $litmus
+then
+ outcome=`grep -m 1 '^[( ]\* Result: ' $litmus | awk '{ print $3 }'`
+ if grep -m1 '^[( ]\* Result: .* DATARACE' $litmus
+ then
+ datarace_predicted=1
+ fi
+ if test -n "$datarace_predicted" -a -z "$datarace_modeled" -a -z "$LKMM_HW_MAP_FILE"
+ then
+ echo '!!! Predicted data race not modeled' $litmus
+ exit 252
+ elif test -z "$datarace_predicted" -a -n "$datarace_modeled"
+ then
+ # Note that hardware models currently don't model data races
+ echo '!!! Unexpected data race modeled' $litmus
+ exit 253
+ fi
+elif test -n "$LKMM_HW_MAP_FILE" && grep -q '^Observation' $LKMM_DESTDIR/$lkmmout > /dev/null 2>&1
then
- outcome=`grep -m 1 '^ \* Result: ' $litmus | awk '{ print $3 }'`
+ outcome=`grep -m 1 '^Observation ' $LKMM_DESTDIR/$lkmmout | awk '{ print $3 }'`
else
outcome=specified
fi
-grep '^Observation' $LKMM_DESTDIR/$litmus.out
-if grep -q '^Observation' $LKMM_DESTDIR/$litmus.out
+grep '^Observation' $LKMM_DESTDIR/$litmusout
+if grep -q '^Observation' $LKMM_DESTDIR/$litmusout
then
:
+elif grep ': Unknown macro ' $LKMM_DESTDIR/$litmusout
+then
+ badname=`grep ': Unknown macro ' $LKMM_DESTDIR/$litmusout |
+ sed -e 's/^.*: Unknown macro //' |
+ sed -e 's/ (User error).*$//'`
+ badmsg=' !!! Current LKMM version does not know "'$badname'"'" $litmus"
+ echo $badmsg
+ if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout
+ then
+ echo ' !!! '$badmsg >> $LKMM_DESTDIR/$litmusout 2>&1
+ fi
+ exit 254
+elif grep '^Command exited with non-zero status 124' $LKMM_DESTDIR/$litmusout
+then
+ echo ' !!! Timeout' $litmus
+ if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout
+ then
+ echo ' !!! Timeout' >> $LKMM_DESTDIR/$litmusout 2>&1
+ fi
+ exit 124
else
echo ' !!! Verification error' $litmus
- if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out
+ if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout
then
- echo ' !!! Verification error' >> $LKMM_DESTDIR/$litmus.out 2>&1
+ echo ' !!! Verification error' >> $LKMM_DESTDIR/$litmusout 2>&1
fi
exit 255
fi
if test "$outcome" = DEADLOCK
then
- if grep '^Observation' $LKMM_DESTDIR/$litmus.out | grep -q 'Never 0 0$'
+ if grep '^Observation' $LKMM_DESTDIR/$litmusout | grep -q 'Never 0 0$'
then
ret=0
else
echo " !!! Unexpected non-$outcome verification" $litmus
- if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out
+ if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout
then
- echo " !!! Unexpected non-$outcome verification" >> $LKMM_DESTDIR/$litmus.out 2>&1
+ echo " !!! Unexpected non-$outcome verification" >> $LKMM_DESTDIR/$litmusout 2>&1
fi
ret=1
fi
-elif grep '^Observation' $LKMM_DESTDIR/$litmus.out | grep -q $outcome || test "$outcome" = Maybe
+elif grep '^Observation' $LKMM_DESTDIR/$litmusout | grep -q 'Never 0 0$'
+then
+ echo " !!! Unexpected non-$outcome deadlock" $litmus
+ if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout
+ then
+ echo " !!! Unexpected non-$outcome deadlock" $litmus >> $LKMM_DESTDIR/$litmusout 2>&1
+ fi
+ ret=1
+elif grep '^Observation' $LKMM_DESTDIR/$litmusout | grep -q $outcome || test "$outcome" = Maybe
then
ret=0
else
- echo " !!! Unexpected non-$outcome verification" $litmus
- if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out
+ if test \( -n "$LKMM_HW_MAP_FILE" -a "$outcome" = Sometimes \) -o -n "$datarace_modeled"
then
- echo " !!! Unexpected non-$outcome verification" >> $LKMM_DESTDIR/$litmus.out 2>&1
+ flag="--- Forgiven"
+ ret=0
+ else
+ flag="!!! Unexpected"
+ ret=1
+ fi
+ echo " $flag non-$outcome verification" $litmus
+ if ! grep -qe "$flag" $LKMM_DESTDIR/$litmusout
+ then
+ echo " $flag non-$outcome verification" >> $LKMM_DESTDIR/$litmusout 2>&1
fi
- ret=1
fi
-tail -2 $LKMM_DESTDIR/$litmus.out | head -1
+tail -2 $LKMM_DESTDIR/$litmusout | head -1
exit $ret
diff --git a/tools/memory-model/scripts/newlitmushist.sh b/tools/memory-model/scripts/newlitmushist.sh
index 991f8f814881..25235e2049cf 100755
--- a/tools/memory-model/scripts/newlitmushist.sh
+++ b/tools/memory-model/scripts/newlitmushist.sh
@@ -12,7 +12,7 @@
#
# Copyright IBM Corporation, 2018
#
-# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
. scripts/parseargs.sh
@@ -43,7 +43,7 @@ fi
# Form full list of litmus tests with no more than the specified
# number of processes (per the --procs argument).
-find litmus -name '*.litmus' -exec grep -l -m 1 "^C " {} \; > $T/list-C-all
+find litmus -name '*.litmus' -print | mselect7 -arch C > $T/list-C-all
xargs < $T/list-C-all -r grep -L "^P${LKMM_PROCS}" > $T/list-C-short
# Form list of new tests. Note: This does not handle litmus-test deletion!
diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh
index 40f52080fdbd..08ded5909860 100755
--- a/tools/memory-model/scripts/parseargs.sh
+++ b/tools/memory-model/scripts/parseargs.sh
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0+
#
-# the corresponding .litmus.out file, and does not judge the result.
+# Parse arguments common to the various scripts.
#
# . scripts/parseargs.sh
#
@@ -9,7 +9,7 @@
#
# Copyright IBM Corporation, 2018
#
-# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
T=/tmp/parseargs.sh.$$
mkdir $T
@@ -27,6 +27,7 @@ initparam () {
initparam LKMM_DESTDIR "."
initparam LKMM_HERD_OPTIONS "-conf linux-kernel.cfg"
+initparam LKMM_HW_MAP_FILE ""
initparam LKMM_JOBS `getconf _NPROCESSORS_ONLN`
initparam LKMM_PROCS "3"
initparam LKMM_TIMEOUT "1m"
@@ -37,10 +38,11 @@ usagehelp () {
echo "Usage $scriptname [ arguments ]"
echo " --destdir path (place for .litmus.out, default by .litmus)"
echo " --herdopts -conf linux-kernel.cfg ..."
+ echo " --hw AArch64"
echo " --jobs N (number of jobs, default one per CPU)"
echo " --procs N (litmus tests with at most this many processes)"
echo " --timeout N (herd7 timeout (e.g., 10s, 1m, 2hr, 1d, '')"
- echo "Defaults: --destdir '$LKMM_DESTDIR_DEF' --herdopts '$LKMM_HERD_OPTIONS_DEF' --jobs '$LKMM_JOBS_DEF' --procs '$LKMM_PROCS_DEF' --timeout '$LKMM_TIMEOUT_DEF'"
+ echo "Defaults: --destdir '$LKMM_DESTDIR_DEF' --herdopts '$LKMM_HERD_OPTIONS_DEF' --hw '$LKMM_HW_MAP_FILE' --jobs '$LKMM_JOBS_DEF' --procs '$LKMM_PROCS_DEF' --timeout '$LKMM_TIMEOUT_DEF'"
exit 1
}
@@ -81,7 +83,7 @@ do
echo "Cannot create directory --destdir '$LKMM_DESTDIR'"
usage
fi
- if test -d "$LKMM_DESTDIR" -a -w "$LKMM_DESTDIR" -a -x "$LKMM_DESTDIR"
+ if test -d "$LKMM_DESTDIR" -a -x "$LKMM_DESTDIR"
then
:
else
@@ -95,6 +97,11 @@ do
LKMM_HERD_OPTIONS="$2"
shift
;;
+ --hw)
+ checkarg --hw "(.map file architecture name)" "$#" "$2" '^[A-Za-z0-9_-]\+' '^--'
+ LKMM_HW_MAP_FILE="$2"
+ shift
+ ;;
-j[1-9]*)
njobs="`echo $1 | sed -e 's/^-j//'`"
trailchars="`echo $njobs | sed -e 's/[0-9]\+\(.*\)$/\1/'`"
@@ -106,7 +113,7 @@ do
LKMM_JOBS="`echo $njobs | sed -e 's/^\([0-9]\+\).*$/\1/'`"
;;
--jobs|--job|-j)
- checkarg --jobs "(number)" "$#" "$2" '^[1-9][0-9]\+$' '^--'
+ checkarg --jobs "(number)" "$#" "$2" '^[1-9][0-9]*$' '^--'
LKMM_JOBS="$2"
shift
;;
@@ -120,6 +127,10 @@ do
LKMM_TIMEOUT="$2"
shift
;;
+ --)
+ shift
+ break
+ ;;
*)
echo Unknown argument $1
usage
diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh
new file mode 100755
index 000000000000..94608d4b6502
--- /dev/null
+++ b/tools/memory-model/scripts/runlitmus.sh
@@ -0,0 +1,80 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Without the -hw argument, runs a herd7 test and outputs verification
+# results to a file whose name is that of the specified litmus test,
+# but with ".out" appended.
+#
+# If the --hw argument is specified, this script translates the .litmus
+# C-language file to the specified type of assembly and verifies that.
+# But in this case, litmus tests using complex synchronization (such as
+# locking, RCU, and SRCU) are cheerfully ignored.
+#
+# Either way, return the status of the herd7 command.
+#
+# Usage:
+# runlitmus.sh file.litmus
+#
+# Run this in the directory containing the memory model, specifying the
+# pathname of the litmus test to check. The caller is expected to have
+# properly set up the LKMM environment variables.
+#
+# Copyright IBM Corporation, 2019
+#
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
+
+litmus=$1
+if test -f "$litmus" -a -r "$litmus"
+then
+ :
+else
+ echo ' !!! ' error: \"$litmus\" is not a readable file
+ exit 255
+fi
+
+if test -z "$LKMM_HW_MAP_FILE" -o ! -e $LKMM_DESTDIR/$litmus.out
+then
+ # LKMM run
+ herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg}
+ echo Herd options: $herdoptions > $LKMM_DESTDIR/$litmus.out
+ /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1
+ ret=$?
+ if test -z "$LKMM_HW_MAP_FILE"
+ then
+ exit $ret
+ fi
+ echo " --- " Automatically generated LKMM output for '"'--hw $LKMM_HW_MAP_FILE'"' run
+fi
+
+# Hardware run
+
+T=/tmp/checklitmushw.sh.$$
+trap 'rm -rf $T' 0 2
+mkdir $T
+
+# Generate filenames
+mapfile="Linux2${LKMM_HW_MAP_FILE}.map"
+themefile="$T/${LKMM_HW_MAP_FILE}.theme"
+herdoptions="-model $LKMM_HW_CAT_FILE"
+hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.litmus.'${LKMM_HW_MAP_FILE}'/'`
+hwlitmusfile=`echo $hwlitmus | sed -e 's,^.*/,,'`
+
+# Don't run on litmus tests with complex synchronization
+if ! scripts/simpletest.sh $litmus
+then
+ echo ' --- ' error: \"$litmus\" contains locking, RCU, or SRCU
+ exit 254
+fi
+
+# Generate the assembly code and run herd7 on it.
+gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile
+jingle7 -v -theme $themefile $litmus > $LKMM_DESTDIR/$hwlitmus 2> $T/$hwlitmusfile.jingle7.out
+if grep -q "Generated 0 tests" $T/$hwlitmusfile.jingle7.out
+then
+ echo ' !!! ' jingle7 failed, errors in $hwlitmus.err
+ cp $T/$hwlitmusfile.jingle7.out $LKMM_DESTDIR/$hwlitmus.err
+ exit 253
+fi
+/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -unroll 0 $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1
+
+exit $?
diff --git a/tools/memory-model/scripts/runlitmushist.sh b/tools/memory-model/scripts/runlitmushist.sh
index 6ed376f495bb..c6c2bdc67a50 100755
--- a/tools/memory-model/scripts/runlitmushist.sh
+++ b/tools/memory-model/scripts/runlitmushist.sh
@@ -13,7 +13,9 @@
#
# Copyright IBM Corporation, 2018
#
-# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
+
+. scripts/hwfnseg.sh
T=/tmp/runlitmushist.sh.$$
trap 'rm -rf $T' 0
@@ -30,15 +32,12 @@ fi
# Prefixes for per-CPU scripts
for ((i=0;i<$LKMM_JOBS;i++))
do
- echo dir="$LKMM_DESTDIR" > $T/$i.sh
echo T=$T >> $T/$i.sh
- echo herdoptions=\"$LKMM_HERD_OPTIONS\" >> $T/$i.sh
cat << '___EOF___' >> $T/$i.sh
runtest () {
- echo ' ... ' /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $1 '>' $dir/$1.out '2>&1'
- if /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $1 > $dir/$1.out 2>&1
+ if scripts/runlitmus.sh $1
then
- if ! grep -q '^Observation ' $dir/$1.out
+ if ! grep -q '^Observation ' $LKMM_DESTDIR/$1$2.out
then
echo ' !!! Herd failed, no Observation:' $1
fi
@@ -47,10 +46,16 @@ do
if test "$exitcode" -eq 124
then
exitmsg="timed out"
+ elif test "$exitcode" -eq 253
+ then
+ exitmsg=
else
exitmsg="failed, exit code $exitcode"
fi
- echo ' !!! Herd' ${exitmsg}: $1
+ if test -n "$exitmsg"
+ then
+ echo ' !!! Herd' ${exitmsg}: $1
+ fi
fi
}
___EOF___
@@ -59,11 +64,13 @@ done
awk -v q="'" -v b='\\' '
{
print "echo `grep " q "^P[0-9]" b "+(" q " " $0 " | tail -1 | sed -e " q "s/^P" b "([0-9]" b "+" b ")(.*$/" b "1/" q "` " $0
-}' | bash |
-sort -k1n |
-awk -v ncpu=$LKMM_JOBS -v t=$T '
+}' | sh | sort -k1n |
+awk -v dq='"' -v hwfnseg="$hwfnseg" -v ncpu="$LKMM_JOBS" -v t="$T" '
{
- print "runtest " $2 >> t "/" NR % ncpu ".sh";
+ print "if test -z " dq hwfnseg dq " || scripts/simpletest.sh " dq $2 dq
+ print "then"
+ print "\techo runtest " dq $2 dq " " hwfnseg " >> " t "/" NR % ncpu ".sh";
+ print "fi"
}
END {
diff --git a/tools/memory-model/scripts/simpletest.sh b/tools/memory-model/scripts/simpletest.sh
new file mode 100755
index 000000000000..7edc5d361665
--- /dev/null
+++ b/tools/memory-model/scripts/simpletest.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Give zero status if this is a simple test and non-zero otherwise.
+# Simple tests do not contain locking, RCU, or SRCU.
+#
+# Usage:
+# simpletest.sh file.litmus
+#
+# Copyright IBM Corporation, 2019
+#
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
+
+
+litmus=$1
+
+if test -f "$litmus" -a -r "$litmus"
+then
+ :
+else
+ echo ' --- ' error: \"$litmus\" is not a readable file
+ exit 255
+fi
+exclude="^[[:space:]]*\("
+exclude="${exclude}spin_lock(\|spin_unlock(\|spin_trylock(\|spin_is_locked("
+exclude="${exclude}\|rcu_read_lock(\|rcu_read_unlock("
+exclude="${exclude}\|synchronize_rcu(\|synchronize_rcu_expedited("
+exclude="${exclude}\|srcu_read_lock(\|srcu_read_unlock("
+exclude="${exclude}\|synchronize_srcu(\|synchronize_srcu_expedited("
+exclude="${exclude}\)"
+if grep -q $exclude $litmus
+then
+ exit 255
+fi
+exit 0
diff --git a/tools/rcu/extract-stall.sh b/tools/rcu/extract-stall.sh
index e565697c9f90..08a39ad44320 100644..100755
--- a/tools/rcu/extract-stall.sh
+++ b/tools/rcu/extract-stall.sh
@@ -1,11 +1,25 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0+
-#
-# Extract any RCU CPU stall warnings present in specified file.
-# Filter out clocksource lines. Note that preceding-lines excludes the
-# initial line of the stall warning but trailing-lines includes it.
-#
-# Usage: extract-stall.sh dmesg-file [ preceding-lines [ trailing-lines ] ]
+
+usage() {
+ echo Extract any RCU CPU stall warnings present in specified file.
+ echo Filter out clocksource lines. Note that preceding-lines excludes the
+ echo initial line of the stall warning but trailing-lines includes it.
+ echo
+ echo Usage: $(basename $0) dmesg-file [ preceding-lines [ trailing-lines ] ]
+ echo
+ echo Error: $1
+}
+
+# Terminate the script, if the argument is missing
+
+if test -f "$1" && test -r "$1"
+then
+ :
+else
+ usage "Console log file \"$1\" missing or unreadable."
+ exit 1
+fi
echo $1
preceding_lines="${2-3}"
diff --git a/tools/testing/selftests/nolibc/.gitignore b/tools/testing/selftests/nolibc/.gitignore
index 4696df589d68..52f613cdad54 100644
--- a/tools/testing/selftests/nolibc/.gitignore
+++ b/tools/testing/selftests/nolibc/.gitignore
@@ -1,4 +1,5 @@
/initramfs/
+/libc-test
/nolibc-test
/run.out
/sysroot/
diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 8fe61d3e3cce..0cbe13809b37 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for nolibc tests
include ../../../scripts/Makefile.include
+# We need this for the "cc-option" macro.
+include ../../../build/Build.include
# we're in ".../tools/testing/selftests/nolibc"
ifeq ($(srctree),)
@@ -13,52 +15,56 @@ ARCH = $(SUBARCH)
endif
# kernel image names by architecture
-IMAGE_i386 = arch/x86/boot/bzImage
-IMAGE_x86_64 = arch/x86/boot/bzImage
-IMAGE_x86 = arch/x86/boot/bzImage
-IMAGE_arm64 = arch/arm64/boot/Image
-IMAGE_arm = arch/arm/boot/zImage
-IMAGE_mips = vmlinuz
-IMAGE_riscv = arch/riscv/boot/Image
-IMAGE_s390 = arch/s390/boot/bzImage
-IMAGE = $(IMAGE_$(ARCH))
-IMAGE_NAME = $(notdir $(IMAGE))
+IMAGE_i386 = arch/x86/boot/bzImage
+IMAGE_x86_64 = arch/x86/boot/bzImage
+IMAGE_x86 = arch/x86/boot/bzImage
+IMAGE_arm64 = arch/arm64/boot/Image
+IMAGE_arm = arch/arm/boot/zImage
+IMAGE_mips = vmlinuz
+IMAGE_riscv = arch/riscv/boot/Image
+IMAGE_s390 = arch/s390/boot/bzImage
+IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi
+IMAGE = $(IMAGE_$(ARCH))
+IMAGE_NAME = $(notdir $(IMAGE))
# default kernel configurations that appear to be usable
-DEFCONFIG_i386 = defconfig
-DEFCONFIG_x86_64 = defconfig
-DEFCONFIG_x86 = defconfig
-DEFCONFIG_arm64 = defconfig
-DEFCONFIG_arm = multi_v7_defconfig
-DEFCONFIG_mips = malta_defconfig
-DEFCONFIG_riscv = defconfig
-DEFCONFIG_s390 = defconfig
-DEFCONFIG = $(DEFCONFIG_$(ARCH))
+DEFCONFIG_i386 = defconfig
+DEFCONFIG_x86_64 = defconfig
+DEFCONFIG_x86 = defconfig
+DEFCONFIG_arm64 = defconfig
+DEFCONFIG_arm = multi_v7_defconfig
+DEFCONFIG_mips = malta_defconfig
+DEFCONFIG_riscv = defconfig
+DEFCONFIG_s390 = defconfig
+DEFCONFIG_loongarch = defconfig
+DEFCONFIG = $(DEFCONFIG_$(ARCH))
# optional tests to run (default = all)
TEST =
# QEMU_ARCH: arch names used by qemu
-QEMU_ARCH_i386 = i386
-QEMU_ARCH_x86_64 = x86_64
-QEMU_ARCH_x86 = x86_64
-QEMU_ARCH_arm64 = aarch64
-QEMU_ARCH_arm = arm
-QEMU_ARCH_mips = mipsel # works with malta_defconfig
-QEMU_ARCH_riscv = riscv64
-QEMU_ARCH_s390 = s390x
-QEMU_ARCH = $(QEMU_ARCH_$(ARCH))
+QEMU_ARCH_i386 = i386
+QEMU_ARCH_x86_64 = x86_64
+QEMU_ARCH_x86 = x86_64
+QEMU_ARCH_arm64 = aarch64
+QEMU_ARCH_arm = arm
+QEMU_ARCH_mips = mipsel # works with malta_defconfig
+QEMU_ARCH_riscv = riscv64
+QEMU_ARCH_s390 = s390x
+QEMU_ARCH_loongarch = loongarch64
+QEMU_ARCH = $(QEMU_ARCH_$(ARCH))
# QEMU_ARGS : some arch-specific args to pass to qemu
-QEMU_ARGS_i386 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_mips = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_s390 = -M s390-ccw-virtio -m 1G -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS = $(QEMU_ARGS_$(ARCH))
+QEMU_ARGS_i386 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_mips = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_s390 = -M s390-ccw-virtio -m 1G -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS = $(QEMU_ARGS_$(ARCH))
# OUTPUT is only set when run from the main makefile, otherwise
# it defaults to this nolibc directory.
@@ -70,8 +76,16 @@ else
Q=@
endif
+CFLAGS_STACKPROTECTOR = -DNOLIBC_STACKPROTECTOR \
+ $(call cc-option,-mstack-protector-guard=global) \
+ $(call cc-option,-fstack-protector-all)
+CFLAGS_STKP_i386 = $(CFLAGS_STACKPROTECTOR)
+CFLAGS_STKP_x86_64 = $(CFLAGS_STACKPROTECTOR)
+CFLAGS_STKP_x86 = $(CFLAGS_STACKPROTECTOR)
CFLAGS_s390 = -m64
-CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables $(CFLAGS_$(ARCH))
+CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables \
+ $(call cc-option,-fno-stack-protector) \
+ $(CFLAGS_STKP_$(ARCH)) $(CFLAGS_$(ARCH))
LDFLAGS := -s
help:
@@ -80,6 +94,7 @@ help:
@echo " help this help"
@echo " sysroot create the nolibc sysroot here (uses \$$ARCH)"
@echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)"
+ @echo " libc-test build an executable using the compiler's default libc instead"
@echo " run-user runs the executable under QEMU (uses \$$ARCH, \$$TEST)"
@echo " initramfs prepare the initramfs with nolibc-test"
@echo " defconfig create a fresh new default config (uses \$$ARCH)"
@@ -114,6 +129,9 @@ nolibc-test: nolibc-test.c sysroot/$(ARCH)/include
$(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
-nostdlib -static -Isysroot/$(ARCH)/include $< -lgcc
+libc-test: nolibc-test.c
+ $(QUIET_CC)$(CC) -o $@ $<
+
# qemu user-land test
run-user: nolibc-test
$(Q)qemu-$(QEMU_ARCH) ./nolibc-test > "$(CURDIR)/run.out" || :
@@ -145,6 +163,8 @@ clean:
$(Q)rm -rf sysroot
$(call QUIET_CLEAN, nolibc-test)
$(Q)rm -f nolibc-test
+ $(call QUIET_CLEAN, libc-test)
+ $(Q)rm -f libc-test
$(call QUIET_CLEAN, initramfs)
$(Q)rm -rf initramfs
$(call QUIET_CLEAN, run.out)
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index c4a0c915139c..888da60eb5ba 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -20,6 +20,7 @@
#include <linux/reboot.h>
#include <sys/io.h>
#include <sys/ioctl.h>
+#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/reboot.h>
#include <sys/stat.h>
@@ -34,6 +35,8 @@
#include <sched.h>
#include <signal.h>
#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
#include <unistd.h>
#endif
#endif
@@ -130,111 +133,111 @@ static int pad_spc(int llen, int cnt, const char *fmt, ...)
*/
#define EXPECT_ZR(cond, expr) \
- do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_zr(expr, llen); } while (0)
+ do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_zr(expr, llen); } while (0)
static int expect_zr(int expr, int llen)
{
int ret = !(expr == 0);
llen += printf(" = %d ", expr);
- pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n");
+ pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
return ret;
}
#define EXPECT_NZ(cond, expr, val) \
- do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_nz(expr, llen; } while (0)
+ do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_nz(expr, llen; } while (0)
static int expect_nz(int expr, int llen)
{
int ret = !(expr != 0);
llen += printf(" = %d ", expr);
- pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n");
+ pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
return ret;
}
#define EXPECT_EQ(cond, expr, val) \
- do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_eq(expr, llen, val); } while (0)
+ do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_eq(expr, llen, val); } while (0)
-static int expect_eq(int expr, int llen, int val)
+static int expect_eq(uint64_t expr, int llen, uint64_t val)
{
int ret = !(expr == val);
- llen += printf(" = %d ", expr);
- pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n");
+ llen += printf(" = %lld ", expr);
+ pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
return ret;
}
#define EXPECT_NE(cond, expr, val) \
- do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_ne(expr, llen, val); } while (0)
+ do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ne(expr, llen, val); } while (0)
static int expect_ne(int expr, int llen, int val)
{
int ret = !(expr != val);
llen += printf(" = %d ", expr);
- pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n");
+ pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
return ret;
}
#define EXPECT_GE(cond, expr, val) \
- do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_ge(expr, llen, val); } while (0)
+ do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ge(expr, llen, val); } while (0)
static int expect_ge(int expr, int llen, int val)
{
int ret = !(expr >= val);
llen += printf(" = %d ", expr);
- pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n");
+ pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
return ret;
}
#define EXPECT_GT(cond, expr, val) \
- do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_gt(expr, llen, val); } while (0)
+ do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_gt(expr, llen, val); } while (0)
static int expect_gt(int expr, int llen, int val)
{
int ret = !(expr > val);
llen += printf(" = %d ", expr);
- pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n");
+ pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
return ret;
}
#define EXPECT_LE(cond, expr, val) \
- do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_le(expr, llen, val); } while (0)
+ do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_le(expr, llen, val); } while (0)
static int expect_le(int expr, int llen, int val)
{
int ret = !(expr <= val);
llen += printf(" = %d ", expr);
- pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n");
+ pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
return ret;
}
#define EXPECT_LT(cond, expr, val) \
- do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_lt(expr, llen, val); } while (0)
+ do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_lt(expr, llen, val); } while (0)
static int expect_lt(int expr, int llen, int val)
{
int ret = !(expr < val);
llen += printf(" = %d ", expr);
- pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n");
+ pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
return ret;
}
#define EXPECT_SYSZR(cond, expr) \
- do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_syszr(expr, llen); } while (0)
+ do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_syszr(expr, llen); } while (0)
static int expect_syszr(int expr, int llen)
{
@@ -243,17 +246,17 @@ static int expect_syszr(int expr, int llen)
if (expr) {
ret = 1;
llen += printf(" = %d %s ", expr, errorname(errno));
- llen += pad_spc(llen, 40, "[FAIL]\n");
+ llen += pad_spc(llen, 64, "[FAIL]\n");
} else {
llen += printf(" = %d ", expr);
- llen += pad_spc(llen, 40, " [OK]\n");
+ llen += pad_spc(llen, 64, " [OK]\n");
}
return ret;
}
#define EXPECT_SYSEQ(cond, expr, val) \
- do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_syseq(expr, llen, val); } while (0)
+ do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_syseq(expr, llen, val); } while (0)
static int expect_syseq(int expr, int llen, int val)
{
@@ -262,17 +265,17 @@ static int expect_syseq(int expr, int llen, int val)
if (expr != val) {
ret = 1;
llen += printf(" = %d %s ", expr, errorname(errno));
- llen += pad_spc(llen, 40, "[FAIL]\n");
+ llen += pad_spc(llen, 64, "[FAIL]\n");
} else {
llen += printf(" = %d ", expr);
- llen += pad_spc(llen, 40, " [OK]\n");
+ llen += pad_spc(llen, 64, " [OK]\n");
}
return ret;
}
#define EXPECT_SYSNE(cond, expr, val) \
- do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_sysne(expr, llen, val); } while (0)
+ do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_sysne(expr, llen, val); } while (0)
static int expect_sysne(int expr, int llen, int val)
{
@@ -281,17 +284,17 @@ static int expect_sysne(int expr, int llen, int val)
if (expr == val) {
ret = 1;
llen += printf(" = %d %s ", expr, errorname(errno));
- llen += pad_spc(llen, 40, "[FAIL]\n");
+ llen += pad_spc(llen, 64, "[FAIL]\n");
} else {
llen += printf(" = %d ", expr);
- llen += pad_spc(llen, 40, " [OK]\n");
+ llen += pad_spc(llen, 64, " [OK]\n");
}
return ret;
}
#define EXPECT_SYSER(cond, expr, expret, experr) \
- do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_syserr(expr, expret, experr, llen); } while (0)
+ do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_syserr(expr, expret, experr, llen); } while (0)
static int expect_syserr(int expr, int expret, int experr, int llen)
{
@@ -302,16 +305,16 @@ static int expect_syserr(int expr, int expret, int experr, int llen)
if (expr != expret || _errno != experr) {
ret = 1;
llen += printf(" != (%d %s) ", expret, errorname(experr));
- llen += pad_spc(llen, 40, "[FAIL]\n");
+ llen += pad_spc(llen, 64, "[FAIL]\n");
} else {
- llen += pad_spc(llen, 40, " [OK]\n");
+ llen += pad_spc(llen, 64, " [OK]\n");
}
return ret;
}
#define EXPECT_PTRZR(cond, expr) \
- do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_ptrzr(expr, llen); } while (0)
+ do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptrzr(expr, llen); } while (0)
static int expect_ptrzr(const void *expr, int llen)
{
@@ -320,16 +323,16 @@ static int expect_ptrzr(const void *expr, int llen)
llen += printf(" = <%p> ", expr);
if (expr) {
ret = 1;
- llen += pad_spc(llen, 40, "[FAIL]\n");
+ llen += pad_spc(llen, 64, "[FAIL]\n");
} else {
- llen += pad_spc(llen, 40, " [OK]\n");
+ llen += pad_spc(llen, 64, " [OK]\n");
}
return ret;
}
#define EXPECT_PTRNZ(cond, expr) \
- do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_ptrnz(expr, llen); } while (0)
+ do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptrnz(expr, llen); } while (0)
static int expect_ptrnz(const void *expr, int llen)
{
@@ -338,16 +341,16 @@ static int expect_ptrnz(const void *expr, int llen)
llen += printf(" = <%p> ", expr);
if (!expr) {
ret = 1;
- llen += pad_spc(llen, 40, "[FAIL]\n");
+ llen += pad_spc(llen, 64, "[FAIL]\n");
} else {
- llen += pad_spc(llen, 40, " [OK]\n");
+ llen += pad_spc(llen, 64, " [OK]\n");
}
return ret;
}
#define EXPECT_STRZR(cond, expr) \
- do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_strzr(expr, llen); } while (0)
+ do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_strzr(expr, llen); } while (0)
static int expect_strzr(const char *expr, int llen)
{
@@ -356,16 +359,16 @@ static int expect_strzr(const char *expr, int llen)
llen += printf(" = <%s> ", expr);
if (expr) {
ret = 1;
- llen += pad_spc(llen, 40, "[FAIL]\n");
+ llen += pad_spc(llen, 64, "[FAIL]\n");
} else {
- llen += pad_spc(llen, 40, " [OK]\n");
+ llen += pad_spc(llen, 64, " [OK]\n");
}
return ret;
}
#define EXPECT_STRNZ(cond, expr) \
- do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_strnz(expr, llen); } while (0)
+ do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_strnz(expr, llen); } while (0)
static int expect_strnz(const char *expr, int llen)
{
@@ -374,16 +377,16 @@ static int expect_strnz(const char *expr, int llen)
llen += printf(" = <%s> ", expr);
if (!expr) {
ret = 1;
- llen += pad_spc(llen, 40, "[FAIL]\n");
+ llen += pad_spc(llen, 64, "[FAIL]\n");
} else {
- llen += pad_spc(llen, 40, " [OK]\n");
+ llen += pad_spc(llen, 64, " [OK]\n");
}
return ret;
}
#define EXPECT_STREQ(cond, expr, cmp) \
- do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_streq(expr, llen, cmp); } while (0)
+ do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_streq(expr, llen, cmp); } while (0)
static int expect_streq(const char *expr, int llen, const char *cmp)
{
@@ -392,16 +395,16 @@ static int expect_streq(const char *expr, int llen, const char *cmp)
llen += printf(" = <%s> ", expr);
if (strcmp(expr, cmp) != 0) {
ret = 1;
- llen += pad_spc(llen, 40, "[FAIL]\n");
+ llen += pad_spc(llen, 64, "[FAIL]\n");
} else {
- llen += pad_spc(llen, 40, " [OK]\n");
+ llen += pad_spc(llen, 64, " [OK]\n");
}
return ret;
}
#define EXPECT_STRNE(cond, expr, cmp) \
- do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_strne(expr, llen, cmp); } while (0)
+ do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_strne(expr, llen, cmp); } while (0)
static int expect_strne(const char *expr, int llen, const char *cmp)
{
@@ -410,9 +413,9 @@ static int expect_strne(const char *expr, int llen, const char *cmp)
llen += printf(" = <%s> ", expr);
if (strcmp(expr, cmp) == 0) {
ret = 1;
- llen += pad_spc(llen, 40, "[FAIL]\n");
+ llen += pad_spc(llen, 64, "[FAIL]\n");
} else {
- llen += pad_spc(llen, 40, " [OK]\n");
+ llen += pad_spc(llen, 64, " [OK]\n");
}
return ret;
}
@@ -477,6 +480,7 @@ static int test_getpagesize(void)
int run_syscall(int min, int max)
{
struct stat stat_buf;
+ int euid0;
int proc;
int test;
int tmp;
@@ -486,6 +490,9 @@ int run_syscall(int min, int max)
/* <proc> indicates whether or not /proc is mounted */
proc = stat("/proc", &stat_buf) == 0;
+ /* this will be used to skip certain tests that can't be run unprivileged */
+ euid0 = geteuid() == 0;
+
for (test = min; test >= 0 && test <= max; test++) {
int llen = 0; // line length
@@ -511,7 +518,7 @@ int run_syscall(int min, int max)
CASE_TEST(chmod_net); EXPECT_SYSZR(proc, chmod("/proc/self/net", 0555)); break;
CASE_TEST(chmod_self); EXPECT_SYSER(proc, chmod("/proc/self", 0555), -1, EPERM); break;
CASE_TEST(chown_self); EXPECT_SYSER(proc, chown("/proc/self", 0, 0), -1, EPERM); break;
- CASE_TEST(chroot_root); EXPECT_SYSZR(1, chroot("/")); break;
+ CASE_TEST(chroot_root); EXPECT_SYSZR(euid0, chroot("/")); break;
CASE_TEST(chroot_blah); EXPECT_SYSER(1, chroot("/proc/self/blah"), -1, ENOENT); break;
CASE_TEST(chroot_exe); EXPECT_SYSER(proc, chroot("/proc/self/exe"), -1, ENOTDIR); break;
CASE_TEST(close_m1); EXPECT_SYSER(1, close(-1), -1, EBADF); break;
@@ -536,7 +543,7 @@ int run_syscall(int min, int max)
CASE_TEST(ioctl_tiocinq); EXPECT_SYSZR(1, ioctl(0, TIOCINQ, &tmp)); break;
CASE_TEST(link_root1); EXPECT_SYSER(1, link("/", "/"), -1, EEXIST); break;
CASE_TEST(link_blah); EXPECT_SYSER(1, link("/proc/self/blah", "/blah"), -1, ENOENT); break;
- CASE_TEST(link_dir); EXPECT_SYSER(1, link("/", "/blah"), -1, EPERM); break;
+ CASE_TEST(link_dir); EXPECT_SYSER(euid0, link("/", "/blah"), -1, EPERM); break;
CASE_TEST(link_cross); EXPECT_SYSER(proc, link("/proc/self/net", "/blah"), -1, EXDEV); break;
CASE_TEST(lseek_m1); EXPECT_SYSER(1, lseek(-1, 0, SEEK_SET), -1, EBADF); break;
CASE_TEST(lseek_0); EXPECT_SYSER(1, lseek(0, 0, SEEK_SET), -1, ESPIPE); break;
@@ -602,6 +609,59 @@ int run_stdlib(int min, int max)
CASE_TEST(memcmp_e0_20); EXPECT_GT(1, memcmp("aaa\xe0", "aaa\x20", 4), 0); break;
CASE_TEST(memcmp_80_e0); EXPECT_LT(1, memcmp("aaa\x80", "aaa\xe0", 4), 0); break;
CASE_TEST(memcmp_e0_80); EXPECT_GT(1, memcmp("aaa\xe0", "aaa\x80", 4), 0); break;
+ CASE_TEST(limit_int8_max); EXPECT_EQ(1, INT8_MAX, (int8_t) 0x7f); break;
+ CASE_TEST(limit_int8_min); EXPECT_EQ(1, INT8_MIN, (int8_t) 0x80); break;
+ CASE_TEST(limit_uint8_max); EXPECT_EQ(1, UINT8_MAX, (uint8_t) 0xff); break;
+ CASE_TEST(limit_int16_max); EXPECT_EQ(1, INT16_MAX, (int16_t) 0x7fff); break;
+ CASE_TEST(limit_int16_min); EXPECT_EQ(1, INT16_MIN, (int16_t) 0x8000); break;
+ CASE_TEST(limit_uint16_max); EXPECT_EQ(1, UINT16_MAX, (uint16_t) 0xffff); break;
+ CASE_TEST(limit_int32_max); EXPECT_EQ(1, INT32_MAX, (int32_t) 0x7fffffff); break;
+ CASE_TEST(limit_int32_min); EXPECT_EQ(1, INT32_MIN, (int32_t) 0x80000000); break;
+ CASE_TEST(limit_uint32_max); EXPECT_EQ(1, UINT32_MAX, (uint32_t) 0xffffffff); break;
+ CASE_TEST(limit_int64_max); EXPECT_EQ(1, INT64_MAX, (int64_t) 0x7fffffffffffffff); break;
+ CASE_TEST(limit_int64_min); EXPECT_EQ(1, INT64_MIN, (int64_t) 0x8000000000000000); break;
+ CASE_TEST(limit_uint64_max); EXPECT_EQ(1, UINT64_MAX, (uint64_t) 0xffffffffffffffff); break;
+ CASE_TEST(limit_int_least8_max); EXPECT_EQ(1, INT_LEAST8_MAX, (int_least8_t) 0x7f); break;
+ CASE_TEST(limit_int_least8_min); EXPECT_EQ(1, INT_LEAST8_MIN, (int_least8_t) 0x80); break;
+ CASE_TEST(limit_uint_least8_max); EXPECT_EQ(1, UINT_LEAST8_MAX, (uint_least8_t) 0xff); break;
+ CASE_TEST(limit_int_least16_max); EXPECT_EQ(1, INT_LEAST16_MAX, (int_least16_t) 0x7fff); break;
+ CASE_TEST(limit_int_least16_min); EXPECT_EQ(1, INT_LEAST16_MIN, (int_least16_t) 0x8000); break;
+ CASE_TEST(limit_uint_least16_max); EXPECT_EQ(1, UINT_LEAST16_MAX, (uint_least16_t) 0xffff); break;
+ CASE_TEST(limit_int_least32_max); EXPECT_EQ(1, INT_LEAST32_MAX, (int_least32_t) 0x7fffffff); break;
+ CASE_TEST(limit_int_least32_min); EXPECT_EQ(1, INT_LEAST32_MIN, (int_least32_t) 0x80000000); break;
+ CASE_TEST(limit_uint_least32_max); EXPECT_EQ(1, UINT_LEAST32_MAX, (uint_least32_t) 0xffffffffU); break;
+ CASE_TEST(limit_int_least64_min); EXPECT_EQ(1, INT_LEAST64_MIN, (int_least64_t) 0x8000000000000000LL); break;
+ CASE_TEST(limit_int_least64_max); EXPECT_EQ(1, INT_LEAST64_MAX, (int_least64_t) 0x7fffffffffffffffLL); break;
+ CASE_TEST(limit_uint_least64_max); EXPECT_EQ(1, UINT_LEAST64_MAX, (uint_least64_t) 0xffffffffffffffffULL); break;
+ CASE_TEST(limit_int_fast8_max); EXPECT_EQ(1, INT_FAST8_MAX, (int_fast8_t) 0x7f); break;
+ CASE_TEST(limit_int_fast8_min); EXPECT_EQ(1, INT_FAST8_MIN, (int_fast8_t) 0x80); break;
+ CASE_TEST(limit_uint_fast8_max); EXPECT_EQ(1, UINT_FAST8_MAX, (uint_fast8_t) 0xff); break;
+ CASE_TEST(limit_int_fast16_min); EXPECT_EQ(1, INT_FAST16_MIN, (int_fast16_t) INTPTR_MIN); break;
+ CASE_TEST(limit_int_fast16_max); EXPECT_EQ(1, INT_FAST16_MAX, (int_fast16_t) INTPTR_MAX); break;
+ CASE_TEST(limit_uint_fast16_max); EXPECT_EQ(1, UINT_FAST16_MAX, (uint_fast16_t) UINTPTR_MAX); break;
+ CASE_TEST(limit_int_fast32_min); EXPECT_EQ(1, INT_FAST32_MIN, (int_fast32_t) INTPTR_MIN); break;
+ CASE_TEST(limit_int_fast32_max); EXPECT_EQ(1, INT_FAST32_MAX, (int_fast32_t) INTPTR_MAX); break;
+ CASE_TEST(limit_uint_fast32_max); EXPECT_EQ(1, UINT_FAST32_MAX, (uint_fast32_t) UINTPTR_MAX); break;
+ CASE_TEST(limit_int_fast64_min); EXPECT_EQ(1, INT_FAST64_MIN, (int_fast64_t) INTPTR_MIN); break;
+ CASE_TEST(limit_int_fast64_max); EXPECT_EQ(1, INT_FAST64_MAX, (int_fast64_t) INTPTR_MAX); break;
+ CASE_TEST(limit_uint_fast64_max); EXPECT_EQ(1, UINT_FAST64_MAX, (uint_fast64_t) UINTPTR_MAX); break;
+#if __SIZEOF_LONG__ == 8
+ CASE_TEST(limit_intptr_min); EXPECT_EQ(1, INTPTR_MIN, (intptr_t) 0x8000000000000000LL); break;
+ CASE_TEST(limit_intptr_max); EXPECT_EQ(1, INTPTR_MAX, (intptr_t) 0x7fffffffffffffffLL); break;
+ CASE_TEST(limit_uintptr_max); EXPECT_EQ(1, UINTPTR_MAX, (uintptr_t) 0xffffffffffffffffULL); break;
+ CASE_TEST(limit_ptrdiff_min); EXPECT_EQ(1, PTRDIFF_MIN, (ptrdiff_t) 0x8000000000000000LL); break;
+ CASE_TEST(limit_ptrdiff_max); EXPECT_EQ(1, PTRDIFF_MAX, (ptrdiff_t) 0x7fffffffffffffffLL); break;
+ CASE_TEST(limit_size_max); EXPECT_EQ(1, SIZE_MAX, (size_t) 0xffffffffffffffffULL); break;
+#elif __SIZEOF_LONG__ == 4
+ CASE_TEST(limit_intptr_min); EXPECT_EQ(1, INTPTR_MIN, (intptr_t) 0x80000000); break;
+ CASE_TEST(limit_intptr_max); EXPECT_EQ(1, INTPTR_MAX, (intptr_t) 0x7fffffff); break;
+ CASE_TEST(limit_uintptr_max); EXPECT_EQ(1, UINTPTR_MAX, (uintptr_t) 0xffffffffU); break;
+ CASE_TEST(limit_ptrdiff_min); EXPECT_EQ(1, PTRDIFF_MIN, (ptrdiff_t) 0x80000000); break;
+ CASE_TEST(limit_ptrdiff_max); EXPECT_EQ(1, PTRDIFF_MAX, (ptrdiff_t) 0x7fffffff); break;
+ CASE_TEST(limit_size_max); EXPECT_EQ(1, SIZE_MAX, (size_t) 0xffffffffU); break;
+#else
+# warning "__SIZEOF_LONG__ is undefined"
+#endif /* __SIZEOF_LONG__ */
case __LINE__:
return ret; /* must be last */
/* note: do not set any defaults so as to permit holes above */
@@ -610,6 +670,144 @@ int run_stdlib(int min, int max)
return ret;
}
+#define EXPECT_VFPRINTF(c, expected, fmt, ...) \
+ ret += expect_vfprintf(llen, c, expected, fmt, ##__VA_ARGS__)
+
+static int expect_vfprintf(int llen, size_t c, const char *expected, const char *fmt, ...)
+{
+ int ret, fd, w, r;
+ char buf[100];
+ FILE *memfile;
+ va_list args;
+
+ fd = memfd_create("vfprintf", 0);
+ if (fd == -1) {
+ pad_spc(llen, 64, "[FAIL]\n");
+ return 1;
+ }
+
+ memfile = fdopen(fd, "w+");
+ if (!memfile) {
+ pad_spc(llen, 64, "[FAIL]\n");
+ return 1;
+ }
+
+ va_start(args, fmt);
+ w = vfprintf(memfile, fmt, args);
+ va_end(args);
+
+ if (w != c) {
+ llen += printf(" written(%d) != %d", w, (int) c);
+ pad_spc(llen, 64, "[FAIL]\n");
+ return 1;
+ }
+
+ fflush(memfile);
+ lseek(fd, 0, SEEK_SET);
+
+ r = read(fd, buf, sizeof(buf) - 1);
+ buf[r] = '\0';
+
+ fclose(memfile);
+
+ if (r != w) {
+ llen += printf(" written(%d) != read(%d)", w, r);
+ pad_spc(llen, 64, "[FAIL]\n");
+ return 1;
+ }
+
+ llen += printf(" \"%s\" = \"%s\"", expected, buf);
+ ret = strncmp(expected, buf, c);
+
+ pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
+ return ret;
+}
+
+static int run_vfprintf(int min, int max)
+{
+ int test;
+ int tmp;
+ int ret = 0;
+ void *p1, *p2;
+
+ for (test = min; test >= 0 && test <= max; test++) {
+ int llen = 0; // line length
+
+ /* avoid leaving empty lines below, this will insert holes into
+ * test numbers.
+ */
+ switch (test + __LINE__ + 1) {
+ CASE_TEST(empty); EXPECT_VFPRINTF(0, "", ""); break;
+ CASE_TEST(simple); EXPECT_VFPRINTF(3, "foo", "foo"); break;
+ CASE_TEST(string); EXPECT_VFPRINTF(3, "foo", "%s", "foo"); break;
+ CASE_TEST(number); EXPECT_VFPRINTF(4, "1234", "%d", 1234); break;
+ CASE_TEST(negnumber); EXPECT_VFPRINTF(5, "-1234", "%d", -1234); break;
+ CASE_TEST(unsigned); EXPECT_VFPRINTF(5, "12345", "%u", 12345); break;
+ CASE_TEST(char); EXPECT_VFPRINTF(1, "c", "%c", 'c'); break;
+ CASE_TEST(hex); EXPECT_VFPRINTF(1, "f", "%x", 0xf); break;
+ CASE_TEST(pointer); EXPECT_VFPRINTF(3, "0x1", "%p", (void *) 0x1); break;
+ case __LINE__:
+ return ret; /* must be last */
+ /* note: do not set any defaults so as to permit holes above */
+ }
+ }
+ return ret;
+}
+
+static int smash_stack(void)
+{
+ char buf[100];
+ volatile char *ptr = buf;
+ size_t i;
+
+ for (i = 0; i < 200; i++)
+ ptr[i] = 'P';
+
+ return 1;
+}
+
+static int run_protection(int min, int max)
+{
+ pid_t pid;
+ int llen = 0, status;
+
+ llen += printf("0 -fstackprotector ");
+
+#if !defined(NOLIBC_STACKPROTECTOR)
+ llen += printf("not supported");
+ pad_spc(llen, 64, "[SKIPPED]\n");
+ return 0;
+#endif
+
+ pid = -1;
+ pid = fork();
+
+ switch (pid) {
+ case -1:
+ llen += printf("fork()");
+ pad_spc(llen, 64, "[FAIL]\n");
+ return 1;
+
+ case 0:
+ close(STDOUT_FILENO);
+ close(STDERR_FILENO);
+
+ smash_stack();
+ return 1;
+
+ default:
+ pid = waitpid(pid, &status, 0);
+
+ if (pid == -1 || !WIFSIGNALED(status) || WTERMSIG(status) != SIGABRT) {
+ llen += printf("waitpid()");
+ pad_spc(llen, 64, "[FAIL]\n");
+ return 1;
+ }
+ pad_spc(llen, 64, " [OK]\n");
+ return 0;
+ }
+}
+
/* prepare what needs to be prepared for pid 1 (stdio, /dev, /proc, etc) */
int prepare(void)
{
@@ -660,10 +858,12 @@ int prepare(void)
}
/* This is the definition of known test names, with their functions */
-static struct test test_names[] = {
+static const struct test test_names[] = {
/* add new tests here */
- { .name = "syscall", .func = run_syscall },
- { .name = "stdlib", .func = run_stdlib },
+ { .name = "syscall", .func = run_syscall },
+ { .name = "stdlib", .func = run_stdlib },
+ { .name = "vfprintf", .func = run_vfprintf },
+ { .name = "protection", .func = run_protection },
{ 0 }
};
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index b52d5069563c..48b9147e8c91 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -250,7 +250,7 @@ identify_qemu_args () {
echo -machine virt,gic-version=host -cpu host
;;
qemu-system-ppc64)
- echo -enable-kvm -M pseries -nodefaults
+ echo -M pseries -nodefaults
echo -device spapr-vscsi
if test -n "$TORTURE_QEMU_INTERACTIVE" -a -n "$TORTURE_QEMU_MAC"
then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
index 8a968fbda02c..88ca4e368489 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
@@ -193,7 +193,7 @@ do
qemu_cmd_dir="`dirname "$i"`"
kernel_dir="`echo $qemu_cmd_dir | sed -e 's/\.[0-9]\+$//'`"
jitter_dir="`dirname "$kernel_dir"`"
- kvm-transform.sh "$kernel_dir/bzImage" "$qemu_cmd_dir/console.log" "$jitter_dir" $dur "$bootargs" < $T/qemu-cmd > $i
+ kvm-transform.sh "$kernel_dir/bzImage" "$qemu_cmd_dir/console.log" "$jitter_dir" "$dur" "$bootargs" < $T/qemu-cmd > $i
if test -n "$arg_remote"
then
echo "# TORTURE_KCONFIG_GDB_ARG=''" >> $i
diff --git a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh
new file mode 100755
index 000000000000..2e63ef009d59
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Run SRCU-lockdep tests and report any that fail to meet expectations.
+#
+# Copyright (C) 2021 Meta Platforms, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+usage () {
+ echo "Usage: $scriptname optional arguments:"
+ echo " --datestamp string"
+ exit 1
+}
+
+ds=`date +%Y.%m.%d-%H.%M.%S`-srcu_lockdep
+scriptname="$0"
+
+T="`mktemp -d ${TMPDIR-/tmp}/srcu_lockdep.sh.XXXXXX`"
+trap 'rm -rf $T' 0
+
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
+. functions.sh
+
+while test $# -gt 0
+do
+ case "$1" in
+ --datestamp)
+ checkarg --datestamp "(relative pathname)" "$#" "$2" '^[a-zA-Z0-9._/-]*$' '^--'
+ ds=$2
+ shift
+ ;;
+ *)
+ echo Unknown argument $1
+ usage
+ ;;
+ esac
+ shift
+done
+
+err=
+nerrs=0
+for d in 0 1
+do
+ for t in 0 1 2
+ do
+ for c in 1 2 3
+ do
+ err=
+ val=$((d*1000+t*10+c))
+ tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --bootargs "rcutorture.test_srcu_lockdep=$val" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1
+ ret=$?
+ mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val"
+ if test "$d" -ne 0 && test "$ret" -eq 0
+ then
+ err=1
+ echo -n Unexpected success for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err"
+ fi
+ if test "$d" -eq 0 && test "$ret" -ne 0
+ then
+ err=1
+ echo -n Unexpected failure for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err"
+ fi
+ if test -n "$err"
+ then
+ grep "rcu_torture_init_srcu_lockdep: test_srcu_lockdep = " "$RCUTORTURE/res/$ds/$val/SRCU-P/console.log" | sed -e 's/^.*rcu_torture_init_srcu_lockdep://' >> "$RCUTORTURE/res/$ds/$val/kvm.sh.err"
+ cat "$RCUTORTURE/res/$ds/$val/kvm.sh.err"
+ nerrs=$((nerrs+1))
+ fi
+ done
+ done
+done
+if test "$nerrs" -ne 0
+then
+ exit 1
+fi
+exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index 130d0de4c3bb..5a2ae2264403 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -497,16 +497,16 @@ fi
if test "$do_clocksourcewd" = "yes"
then
- torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000"
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 tsc=watchdog"
torture_set "clocksourcewd-1" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --kconfig "CONFIG_TEST_CLOCKSOURCE_WATCHDOG=y" --trust-make
- torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 clocksource.max_cswd_read_retries=1"
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 clocksource.max_cswd_read_retries=1 tsc=watchdog"
torture_set "clocksourcewd-2" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --kconfig "CONFIG_TEST_CLOCKSOURCE_WATCHDOG=y" --trust-make
# In case our work is already done...
if test "$do_rcutorture" != "yes"
then
- torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000"
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 tsc=watchdog"
torture_set "clocksourcewd-3" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --trust-make
fi
fi
diff --git a/tools/testing/selftests/rcutorture/configs/lock/CFLIST b/tools/testing/selftests/rcutorture/configs/lock/CFLIST
index 41bae5824339..28e23d05d5a5 100644
--- a/tools/testing/selftests/rcutorture/configs/lock/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/lock/CFLIST
@@ -5,3 +5,5 @@ LOCK04
LOCK05
LOCK06
LOCK07
+LOCK08
+LOCK09
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK08 b/tools/testing/selftests/rcutorture/configs/lock/LOCK08
new file mode 100644
index 000000000000..1d1da1477fc3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK08
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK08.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK08.boot
new file mode 100644
index 000000000000..b8b6caebb89e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK08.boot
@@ -0,0 +1 @@
+locktorture.torture_type=mutex_lock locktorture.nested_locks=8
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK09 b/tools/testing/selftests/rcutorture/configs/lock/LOCK09
new file mode 100644
index 000000000000..1d1da1477fc3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK09
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK09.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK09.boot
new file mode 100644
index 000000000000..fd5eff148a93
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK09.boot
@@ -0,0 +1 @@
+locktorture.torture_type=rtmutex_lock locktorture.nested_locks=8
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot
index f57720c52c0f..84f6bb98ce99 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot
@@ -5,4 +5,4 @@ rcutree.gp_init_delay=3
rcutree.gp_cleanup_delay=3
rcutree.kthread_prio=2
threadirqs
-tree.use_softirq=0
+rcutree.use_softirq=0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
index 8ae41d5f81a3..04831ef1f9b5 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
@@ -15,3 +15,4 @@ CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_RCU_EXPERT=y
+CONFIG_BOOTPARAM_HOTPLUG_CPU0=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
index 64f864f1f361..8e50bfd4b710 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
@@ -4,4 +4,4 @@ rcutree.gp_init_delay=3
rcutree.gp_cleanup_delay=3
rcutree.kthread_prio=2
threadirqs
-tree.use_softirq=0
+rcutree.use_softirq=0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
index ae395981b5e5..dc4985064b3a 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -15,3 +15,4 @@ CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_RCU_EXPERT=y
CONFIG_RCU_EQS_DEBUG=y
+CONFIG_RCU_LAZY=y
diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
index 42acb1a64ce1..3f5fb66f16df 100644
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -71,9 +71,5 @@ CONFIG_TASKS_RCU
These are controlled by CONFIG_PREEMPT and/or CONFIG_SMP.
-CONFIG_SRCU
-
- Selected by CONFIG_RCU_TORTURE_TEST, so cannot disable.
-
boot parameters ignored: TBD