From 2e672ab2d491713541963afca3a5967ccc2376e9 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 2 Oct 2017 16:54:49 -0700
Subject: rcu: Avoid ->dynticks_nmi_nesting store tearing

NMIs can nest, and store tearing could in theory happen on carries
from one byte to the next.  This commit therefore adds the WRITE_ONCE()
macros preventing this.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f9c0ca2ccf0c..c5d960f86cf8 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1103,7 +1103,8 @@ void rcu_nmi_enter(void)
 		rcu_dynticks_eqs_exit();
 		incby = 1;
 	}
-	rdtp->dynticks_nmi_nesting += incby;
+	WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */
+		   rdtp->dynticks_nmi_nesting + incby);
 	barrier();
 }
 
@@ -1135,12 +1136,13 @@ void rcu_nmi_exit(void)
 	 * leave it in non-RCU-idle state.
 	 */
 	if (rdtp->dynticks_nmi_nesting != 1) {
-		rdtp->dynticks_nmi_nesting -= 2;
+		WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */
+			   rdtp->dynticks_nmi_nesting - 2);
 		return;
 	}
 
 	/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
-	rdtp->dynticks_nmi_nesting = 0;
+	WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
 	rcu_dynticks_eqs_enter();
 }
 
-- 
cgit v1.2.1


From a0eb22bf64a755bb162b421120b9fbe7d012b85f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 2 Oct 2017 19:45:10 -0700
Subject: rcu: Reduce dyntick-idle state space

Both extended-quiescent-state entry and exit first update the nesting
counter and then adjust the dyntick-idle state.  This means that there
are four states: (1) Both nesting and dyntick idle indicate idle,
(2) Nesting indicates idle but dyntick idle does not, (3) Nesting indicates
non-idle and dyntick idle does not, and (4) Both nesting and dyntick
idle indicate non-idle.  This commit simplifies the state space by
eliminating #3, reversing the order of updates on exit from extended
quiescent state.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index c5d960f86cf8..49f661bb8ffe 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -928,21 +928,21 @@ void rcu_irq_exit_irqson(void)
  * we really have exited idle, and must do the appropriate accounting.
  * The caller must have disabled interrupts.
  */
-static void rcu_eqs_exit_common(long long oldval, int user)
+static void rcu_eqs_exit_common(long long newval, int user)
 {
 	RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);)
 
 	rcu_dynticks_task_exit();
 	rcu_dynticks_eqs_exit();
 	rcu_cleanup_after_idle();
-	trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
+	trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, newval);
 	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
 	    !user && !is_idle_task(current)) {
 		struct task_struct *idle __maybe_unused =
 			idle_task(smp_processor_id());
 
 		trace_rcu_dyntick(TPS("Error on exit: not idle task"),
-				  oldval, rdtp->dynticks_nesting);
+				  rdtp->dynticks_nesting, newval);
 		rcu_ftrace_dump(DUMP_ORIG);
 		WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
 			  current->pid, current->comm,
@@ -967,8 +967,8 @@ static void rcu_eqs_exit(bool user)
 		rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
 	} else {
 		__this_cpu_inc(disable_rcu_irq_enter);
+		rcu_eqs_exit_common(DYNTICK_TASK_EXIT_IDLE, user);
 		rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
-		rcu_eqs_exit_common(oldval, user);
 		__this_cpu_dec(disable_rcu_irq_enter);
 	}
 }
@@ -1037,7 +1037,7 @@ void rcu_user_exit(void)
 void rcu_irq_enter(void)
 {
 	struct rcu_dynticks *rdtp;
-	long long oldval;
+	long long newval;
 
 	lockdep_assert_irqs_disabled();
 	rdtp = this_cpu_ptr(&rcu_dynticks);
@@ -1046,14 +1046,13 @@ void rcu_irq_enter(void)
 	if (rdtp->dynticks_nmi_nesting)
 		return;
 
-	oldval = rdtp->dynticks_nesting;
-	rdtp->dynticks_nesting++;
-	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
-		     rdtp->dynticks_nesting == 0);
-	if (oldval)
-		trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
+	newval = rdtp->dynticks_nesting + 1;
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && newval == 0);
+	if (rdtp->dynticks_nesting)
+		trace_rcu_dyntick(TPS("++="), rdtp->dynticks_nesting, newval);
 	else
-		rcu_eqs_exit_common(oldval, true);
+		rcu_eqs_exit_common(newval, true);
+	rdtp->dynticks_nesting++;
 }
 
 /*
-- 
cgit v1.2.1


From fd581a91ac16187625ec509414d08d37827472c4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 2 Oct 2017 21:56:20 -0700
Subject: rcu: Move rcu_nmi_{enter,exit}() to prepare for consolidation

This is a code-motion-only commit that prepares to define rcu_irq_enter()
in terms of rcu_nmi_enter() and rcu_irq_exit() in terms of rcu_irq_exit().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 150 +++++++++++++++++++++++++++---------------------------
 1 file changed, 75 insertions(+), 75 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 49f661bb8ffe..419f3c38e1b6 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -866,6 +866,44 @@ void rcu_user_enter(void)
 }
 #endif /* CONFIG_NO_HZ_FULL */
 
+/**
+ * rcu_nmi_exit - inform RCU of exit from NMI context
+ *
+ * If we are returning from the outermost NMI handler that interrupted an
+ * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting
+ * to let the RCU grace-period handling know that the CPU is back to
+ * being RCU-idle.
+ *
+ * If you add or remove a call to rcu_nmi_exit(), be sure to test
+ * with CONFIG_RCU_EQS_DEBUG=y.
+ */
+void rcu_nmi_exit(void)
+{
+	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
+
+	/*
+	 * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
+	 * (We are exiting an NMI handler, so RCU better be paying attention
+	 * to us!)
+	 */
+	WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0);
+	WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs());
+
+	/*
+	 * If the nesting level is not 1, the CPU wasn't RCU-idle, so
+	 * leave it in non-RCU-idle state.
+	 */
+	if (rdtp->dynticks_nmi_nesting != 1) {
+		WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */
+			   rdtp->dynticks_nmi_nesting - 2);
+		return;
+	}
+
+	/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
+	WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
+	rcu_dynticks_eqs_enter();
+}
+
 /**
  * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
  *
@@ -1012,6 +1050,43 @@ void rcu_user_exit(void)
 }
 #endif /* CONFIG_NO_HZ_FULL */
 
+/**
+ * rcu_nmi_enter - inform RCU of entry to NMI context
+ *
+ * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and
+ * rdtp->dynticks_nmi_nesting to let the RCU grace-period handling know
+ * that the CPU is active.  This implementation permits nested NMIs, as
+ * long as the nesting level does not overflow an int.  (You will probably
+ * run out of stack space first.)
+ *
+ * If you add or remove a call to rcu_nmi_enter(), be sure to test
+ * with CONFIG_RCU_EQS_DEBUG=y.
+ */
+void rcu_nmi_enter(void)
+{
+	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
+	int incby = 2;
+
+	/* Complain about underflow. */
+	WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0);
+
+	/*
+	 * If idle from RCU viewpoint, atomically increment ->dynticks
+	 * to mark non-idle and increment ->dynticks_nmi_nesting by one.
+	 * Otherwise, increment ->dynticks_nmi_nesting by two.  This means
+	 * if ->dynticks_nmi_nesting is equal to one, we are guaranteed
+	 * to be in the outermost NMI handler that interrupted an RCU-idle
+	 * period (observation due to Andy Lutomirski).
+	 */
+	if (rcu_dynticks_curr_cpu_in_eqs()) {
+		rcu_dynticks_eqs_exit();
+		incby = 1;
+	}
+	WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */
+		   rdtp->dynticks_nmi_nesting + incby);
+	barrier();
+}
+
 /**
  * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
  *
@@ -1070,81 +1145,6 @@ void rcu_irq_enter_irqson(void)
 	local_irq_restore(flags);
 }
 
-/**
- * rcu_nmi_enter - inform RCU of entry to NMI context
- *
- * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and
- * rdtp->dynticks_nmi_nesting to let the RCU grace-period handling know
- * that the CPU is active.  This implementation permits nested NMIs, as
- * long as the nesting level does not overflow an int.  (You will probably
- * run out of stack space first.)
- *
- * If you add or remove a call to rcu_nmi_enter(), be sure to test
- * with CONFIG_RCU_EQS_DEBUG=y.
- */
-void rcu_nmi_enter(void)
-{
-	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
-	int incby = 2;
-
-	/* Complain about underflow. */
-	WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0);
-
-	/*
-	 * If idle from RCU viewpoint, atomically increment ->dynticks
-	 * to mark non-idle and increment ->dynticks_nmi_nesting by one.
-	 * Otherwise, increment ->dynticks_nmi_nesting by two.  This means
-	 * if ->dynticks_nmi_nesting is equal to one, we are guaranteed
-	 * to be in the outermost NMI handler that interrupted an RCU-idle
-	 * period (observation due to Andy Lutomirski).
-	 */
-	if (rcu_dynticks_curr_cpu_in_eqs()) {
-		rcu_dynticks_eqs_exit();
-		incby = 1;
-	}
-	WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */
-		   rdtp->dynticks_nmi_nesting + incby);
-	barrier();
-}
-
-/**
- * rcu_nmi_exit - inform RCU of exit from NMI context
- *
- * If we are returning from the outermost NMI handler that interrupted an
- * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting
- * to let the RCU grace-period handling know that the CPU is back to
- * being RCU-idle.
- *
- * If you add or remove a call to rcu_nmi_exit(), be sure to test
- * with CONFIG_RCU_EQS_DEBUG=y.
- */
-void rcu_nmi_exit(void)
-{
-	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
-
-	/*
-	 * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
-	 * (We are exiting an NMI handler, so RCU better be paying attention
-	 * to us!)
-	 */
-	WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0);
-	WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs());
-
-	/*
-	 * If the nesting level is not 1, the CPU wasn't RCU-idle, so
-	 * leave it in non-RCU-idle state.
-	 */
-	if (rdtp->dynticks_nmi_nesting != 1) {
-		WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */
-			   rdtp->dynticks_nmi_nesting - 2);
-		return;
-	}
-
-	/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
-	WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
-	rcu_dynticks_eqs_enter();
-}
-
 /**
  * rcu_is_watching - see if RCU thinks that the current CPU is idle
  *
-- 
cgit v1.2.1


From 6136d6e48a0138f6be5bb3427dbeb0ba07a546a4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 3 Oct 2017 08:28:04 -0700
Subject: rcu: Clamp ->dynticks_nmi_nesting at eqs entry/exit

In preparation for merging dyntick-idle irq handling into the NMI
algorithm, clamp ->dynticks_nmi_nesting value to allow for interrupts
that enter but never leave and vice versa.

It is important that the clamping happen outside of the extended quiescent
state.  Otherwise, there will be short windows where irqs and NMIs fail
to convince RCU to start watching.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/rcu.h  | 2 ++
 kernel/rcu/tree.c | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'kernel')

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 59c471de342a..f4a411964c41 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -56,6 +56,8 @@
 #define DYNTICK_TASK_EXIT_IDLE	   (DYNTICK_TASK_NEST_VALUE + \
 				    DYNTICK_TASK_FLAG)
 
+#define DYNTICK_IRQ_NONIDLE	((INT_MAX / 2) + 1)
+
 
 /*
  * Grace-period counter management.
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 419f3c38e1b6..142cdd4a50c9 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -818,6 +818,7 @@ static void rcu_eqs_enter(bool user)
 	struct rcu_dynticks *rdtp;
 
 	rdtp = this_cpu_ptr(&rcu_dynticks);
+	WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0);
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
 		     (rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0);
 	if ((rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
@@ -1008,6 +1009,7 @@ static void rcu_eqs_exit(bool user)
 		rcu_eqs_exit_common(DYNTICK_TASK_EXIT_IDLE, user);
 		rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
 		__this_cpu_dec(disable_rcu_irq_enter);
+		WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
 	}
 }
 
-- 
cgit v1.2.1


From 58721f5da4bcd5187566f4159a4fc88f70bf74f6 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 3 Oct 2017 10:42:22 -0700
Subject: rcu: Define rcu_irq_{enter,exit}() in terms of rcu_nmi_{enter,exit}()

RCU currently uses two different mechanisms for tracking irqs and NMIs.
This is unnecessary complexity: Given that NMIs can nest and given that
RCU's tracking handles such nesting, the NMI tracking mechanism can also
be used to track irqs.  This commit therefore defines rcu_irq_enter()
in terms of rcu_nmi_enter() and rcu_irq_exit() in terms of rcu_nmi_exit().

Unfortunately, callers must still distinguish between the irq and NMI
functions because additional actions are taken when an irq interrupts
idle or nohz_full usermode execution, and these actions cannot always
be taken from NMI handlers.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 59 ++++++++++++++++++++-----------------------------------
 1 file changed, 21 insertions(+), 38 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 142cdd4a50c9..fde0e840563f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -266,6 +266,7 @@ void rcu_bh_qs(void)
 
 static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
 	.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
+	.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
 	.dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
 };
 
@@ -914,8 +915,8 @@ void rcu_nmi_exit(void)
  *
  * This code assumes that the idle loop never does anything that might
  * result in unbalanced calls to irq_enter() and irq_exit().  If your
- * architecture violates this assumption, RCU will give you what you
- * deserve, good and hard.  But very infrequently and irreproducibly.
+ * architecture's idle loop violates this assumption, RCU will give you what
+ * you deserve, good and hard.  But very infrequently and irreproducibly.
  *
  * Use things like work queues to work around this limitation.
  *
@@ -926,23 +927,14 @@ void rcu_nmi_exit(void)
  */
 void rcu_irq_exit(void)
 {
-	struct rcu_dynticks *rdtp;
+	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
 
 	lockdep_assert_irqs_disabled();
-	rdtp = this_cpu_ptr(&rcu_dynticks);
-
-	/* Page faults can happen in NMI handlers, so check... */
-	if (rdtp->dynticks_nmi_nesting)
-		return;
-
-	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
-		     rdtp->dynticks_nesting < 1);
-	if (rdtp->dynticks_nesting <= 1) {
-		rcu_eqs_enter_common(true);
-	} else {
-		trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nesting, rdtp->dynticks_nesting - 1);
-		rdtp->dynticks_nesting--;
-	}
+	if (rdtp->dynticks_nmi_nesting == 1)
+		rcu_prepare_for_idle();
+	rcu_nmi_exit();
+	if (rdtp->dynticks_nmi_nesting == 0)
+		rcu_dynticks_task_enter();
 }
 
 /*
@@ -1097,12 +1089,12 @@ void rcu_nmi_enter(void)
  * sections can occur.  The caller must have disabled interrupts.
  *
  * Note that the Linux kernel is fully capable of entering an interrupt
- * handler that it never exits, for example when doing upcalls to
- * user mode!  This code assumes that the idle loop never does upcalls to
- * user mode.  If your architecture does do upcalls from the idle loop (or
- * does anything else that results in unbalanced calls to the irq_enter()
- * and irq_exit() functions), RCU will give you what you deserve, good
- * and hard.  But very infrequently and irreproducibly.
+ * handler that it never exits, for example when doing upcalls to user mode!
+ * This code assumes that the idle loop never does upcalls to user mode.
+ * If your architecture's idle loop does do upcalls to user mode (or does
+ * anything else that results in unbalanced calls to the irq_enter() and
+ * irq_exit() functions), RCU will give you what you deserve, good and hard.
+ * But very infrequently and irreproducibly.
  *
  * Use things like work queues to work around this limitation.
  *
@@ -1113,23 +1105,14 @@ void rcu_nmi_enter(void)
  */
 void rcu_irq_enter(void)
 {
-	struct rcu_dynticks *rdtp;
-	long long newval;
+	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
 
 	lockdep_assert_irqs_disabled();
-	rdtp = this_cpu_ptr(&rcu_dynticks);
-
-	/* Page faults can happen in NMI handlers, so check... */
-	if (rdtp->dynticks_nmi_nesting)
-		return;
-
-	newval = rdtp->dynticks_nesting + 1;
-	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && newval == 0);
-	if (rdtp->dynticks_nesting)
-		trace_rcu_dyntick(TPS("++="), rdtp->dynticks_nesting, newval);
-	else
-		rcu_eqs_exit_common(newval, true);
-	rdtp->dynticks_nesting++;
+	if (rdtp->dynticks_nmi_nesting == 0)
+		rcu_dynticks_task_exit();
+	rcu_nmi_enter();
+	if (rdtp->dynticks_nmi_nesting == 1)
+		rcu_cleanup_after_idle();
 }
 
 /*
-- 
cgit v1.2.1


From 51a1fd30f13090be7750fed86cf3728afaf4e394 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 3 Oct 2017 14:43:40 -0700
Subject: rcu: Make ->dynticks_nesting be a simple counter

Now that ->dynticks_nesting counts only process-level dyntick-idle
entry and exit, there is no need for the elaborate segmented counter
with its guard fields and overflow checking.  This commit therefore
makes ->dynticks_nesting be a simple counter.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/rcu.h  | 27 +--------------------------
 kernel/rcu/tree.c | 40 ++++++++++++++++++++--------------------
 kernel/rcu/tree.h |  1 -
 3 files changed, 21 insertions(+), 47 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index f4a411964c41..afe0559d1867 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -30,32 +30,7 @@
 #define RCU_TRACE(stmt)
 #endif /* #else #ifdef CONFIG_RCU_TRACE */
 
-/*
- * Process-level increment to ->dynticks_nesting field.  This allows for
- * architectures that use half-interrupts and half-exceptions from
- * process context.
- *
- * DYNTICK_TASK_NEST_MASK defines a field of width DYNTICK_TASK_NEST_WIDTH
- * that counts the number of process-based reasons why RCU cannot
- * consider the corresponding CPU to be idle, and DYNTICK_TASK_NEST_VALUE
- * is the value used to increment or decrement this field.
- *
- * The rest of the bits could in principle be used to count interrupts,
- * but this would mean that a negative-one value in the interrupt
- * field could incorrectly zero out the DYNTICK_TASK_NEST_MASK field.
- * We therefore provide a two-bit guard field defined by DYNTICK_TASK_MASK
- * that is set to DYNTICK_TASK_FLAG upon initial exit from idle.
- * The DYNTICK_TASK_EXIT_IDLE value is thus the combined value used upon
- * initial exit from idle.
- */
-#define DYNTICK_TASK_NEST_WIDTH 7
-#define DYNTICK_TASK_NEST_VALUE ((LLONG_MAX >> DYNTICK_TASK_NEST_WIDTH) + 1)
-#define DYNTICK_TASK_NEST_MASK  (LLONG_MAX - DYNTICK_TASK_NEST_VALUE + 1)
-#define DYNTICK_TASK_FLAG	   ((DYNTICK_TASK_NEST_VALUE / 8) * 2)
-#define DYNTICK_TASK_MASK	   ((DYNTICK_TASK_NEST_VALUE / 8) * 3)
-#define DYNTICK_TASK_EXIT_IDLE	   (DYNTICK_TASK_NEST_VALUE + \
-				    DYNTICK_TASK_FLAG)
-
+/* Offset to allow for unmatched rcu_irq_{enter,exit}(). */
 #define DYNTICK_IRQ_NONIDLE	((INT_MAX / 2) + 1)
 
 
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index fde0e840563f..d123474fe829 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -265,7 +265,7 @@ void rcu_bh_qs(void)
 #endif
 
 static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
-	.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
+	.dynticks_nesting = 1,
 	.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
 	.dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
 };
@@ -813,6 +813,10 @@ static void rcu_eqs_enter_common(bool user)
 /*
  * Enter an RCU extended quiescent state, which can be either the
  * idle loop or adaptive-tickless usermode execution.
+ *
+ * We crowbar the ->dynticks_nmi_nesting field to zero to allow for
+ * the possibility of usermode upcalls having messed up our count
+ * of interrupt nesting level during the prior busy period.
  */
 static void rcu_eqs_enter(bool user)
 {
@@ -821,11 +825,11 @@ static void rcu_eqs_enter(bool user)
 	rdtp = this_cpu_ptr(&rcu_dynticks);
 	WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0);
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
-		     (rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0);
-	if ((rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
+		     rdtp->dynticks_nesting == 0);
+	if (rdtp->dynticks_nesting == 1)
 		rcu_eqs_enter_common(user);
 	else
-		rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
+		rdtp->dynticks_nesting--;
 }
 
 /**
@@ -836,10 +840,6 @@ static void rcu_eqs_enter(bool user)
  * critical sections can occur in irq handlers in idle, a possibility
  * handled by irq_enter() and irq_exit().)
  *
- * We crowbar the ->dynticks_nesting field to zero to allow for
- * the possibility of usermode upcalls having messed up our count
- * of interrupt nesting level during the prior busy period.
- *
  * If you add or remove a call to rcu_idle_enter(), be sure to test with
  * CONFIG_RCU_EQS_DEBUG=y.
  */
@@ -984,6 +984,10 @@ static void rcu_eqs_exit_common(long long newval, int user)
 /*
  * Exit an RCU extended quiescent state, which can be either the
  * idle loop or adaptive-tickless usermode execution.
+ *
+ * We crowbar the ->dynticks_nmi_nesting field to DYNTICK_IRQ_NONIDLE to
+ * allow for the possibility of usermode upcalls messing up our count of
+ * interrupt nesting level during the busy period that is just now starting.
  */
 static void rcu_eqs_exit(bool user)
 {
@@ -994,12 +998,12 @@ static void rcu_eqs_exit(bool user)
 	rdtp = this_cpu_ptr(&rcu_dynticks);
 	oldval = rdtp->dynticks_nesting;
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
-	if (oldval & DYNTICK_TASK_NEST_MASK) {
-		rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
+	if (oldval) {
+		rdtp->dynticks_nesting++;
 	} else {
 		__this_cpu_inc(disable_rcu_irq_enter);
-		rcu_eqs_exit_common(DYNTICK_TASK_EXIT_IDLE, user);
-		rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
+		rcu_eqs_exit_common(1, user);
+		rdtp->dynticks_nesting = 1;
 		__this_cpu_dec(disable_rcu_irq_enter);
 		WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
 	}
@@ -1011,11 +1015,6 @@ static void rcu_eqs_exit(bool user)
  * Exit idle mode, in other words, -enter- the mode in which RCU
  * read-side critical sections can occur.
  *
- * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to
- * allow for the possibility of usermode upcalls messing up our count
- * of interrupt nesting level during the busy period that is just
- * now starting.
- *
  * If you add or remove a call to rcu_idle_exit(), be sure to test with
  * CONFIG_RCU_EQS_DEBUG=y.
  */
@@ -1219,7 +1218,8 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
  */
 static int rcu_is_cpu_rrupt_from_idle(void)
 {
-	return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 1;
+	return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 0 &&
+	       __this_cpu_read(rcu_dynticks.dynticks_nmi_nesting) <= 1;
 }
 
 /*
@@ -3709,7 +3709,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
 	rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
-	WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
+	WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != 1);
 	WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp->dynticks)));
 	rdp->cpu = cpu;
 	rdp->rsp = rsp;
@@ -3738,7 +3738,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 	if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */
 	    !init_nocb_callback_list(rdp))
 		rcu_segcblist_init(&rdp->cblist);  /* Re-enable callbacks. */
-	rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
+	rdp->dynticks->dynticks_nesting = 1;
 	rcu_dynticks_eqs_online();
 	raw_spin_unlock_rcu_node(rnp);		/* irqs remain disabled. */
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 46a5d1991450..dbd7e3753bed 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -39,7 +39,6 @@
  */
 struct rcu_dynticks {
 	long long dynticks_nesting; /* Track irq/process nesting level. */
-				    /* Process level is worth LLONG_MAX/2. */
 	int dynticks_nmi_nesting;   /* Track NMI nesting level. */
 	atomic_t dynticks;	    /* Even value for idle, else odd. */
 	bool rcu_need_heavy_qs;     /* GP old, need heavy quiescent state. */
-- 
cgit v1.2.1


From 844ccdd7dce2c1a6ea9b437fcf8c3265b136e4a5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 3 Oct 2017 16:51:47 -0700
Subject: rcu: Eliminate rcu_irq_enter_disabled()

Now that the irq path uses the rcu_nmi_{enter,exit}() algorithm,
rcu_irq_enter() and rcu_irq_exit() may be used from any context.  There is
thus no need for rcu_irq_enter_disabled() and for the checks using it.
This commit therefore eliminates rcu_irq_enter_disabled().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c    | 22 ++--------------------
 kernel/trace/trace.c | 11 -----------
 2 files changed, 2 insertions(+), 31 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index d123474fe829..444aa2b3f24d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -270,20 +270,6 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
 	.dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
 };
 
-/*
- * There's a few places, currently just in the tracing infrastructure,
- * that uses rcu_irq_enter() to make sure RCU is watching. But there's
- * a small location where that will not even work. In those cases
- * rcu_irq_enter_disabled() needs to be checked to make sure rcu_irq_enter()
- * can be called.
- */
-static DEFINE_PER_CPU(bool, disable_rcu_irq_enter);
-
-bool rcu_irq_enter_disabled(void)
-{
-	return this_cpu_read(disable_rcu_irq_enter);
-}
-
 /*
  * Record entry into an extended quiescent state.  This is only to be
  * called when not already in an extended quiescent state.
@@ -792,10 +778,8 @@ static void rcu_eqs_enter_common(bool user)
 		do_nocb_deferred_wakeup(rdp);
 	}
 	rcu_prepare_for_idle();
-	__this_cpu_inc(disable_rcu_irq_enter);
-	rdtp->dynticks_nesting = 0; /* Breaks tracing momentarily. */
-	rcu_dynticks_eqs_enter(); /* After this, tracing works again. */
-	__this_cpu_dec(disable_rcu_irq_enter);
+	rdtp->dynticks_nesting = 0;
+	rcu_dynticks_eqs_enter();
 	rcu_dynticks_task_enter();
 
 	/*
@@ -1001,10 +985,8 @@ static void rcu_eqs_exit(bool user)
 	if (oldval) {
 		rdtp->dynticks_nesting++;
 	} else {
-		__this_cpu_inc(disable_rcu_irq_enter);
 		rcu_eqs_exit_common(1, user);
 		rdtp->dynticks_nesting = 1;
-		__this_cpu_dec(disable_rcu_irq_enter);
 		WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
 	}
 }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 73e67b68c53b..dbce1be3bab8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2682,17 +2682,6 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
 	if (unlikely(in_nmi()))
 		return;
 
-	/*
-	 * It is possible that a function is being traced in a
-	 * location that RCU is not watching. A call to
-	 * rcu_irq_enter() will make sure that it is, but there's
-	 * a few internal rcu functions that could be traced
-	 * where that wont work either. In those cases, we just
-	 * do nothing.
-	 */
-	if (unlikely(rcu_irq_enter_disabled()))
-		return;
-
 	rcu_irq_enter_irqson();
 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
 	rcu_irq_exit_irqson();
-- 
cgit v1.2.1


From bd2b879a1ca55486fdb9dcac691bfd3dd79c83d6 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 4 Oct 2017 12:29:01 -0700
Subject: rcu: Add tracing to irq/NMI dyntick-idle transitions

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'kernel')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 444aa2b3f24d..d069ba2d8412 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -880,12 +880,15 @@ void rcu_nmi_exit(void)
 	 * leave it in non-RCU-idle state.
 	 */
 	if (rdtp->dynticks_nmi_nesting != 1) {
+		trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nmi_nesting,
+				  rdtp->dynticks_nmi_nesting - 2);
 		WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */
 			   rdtp->dynticks_nmi_nesting - 2);
 		return;
 	}
 
 	/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
+	trace_rcu_dyntick(TPS("Startirq"), rdtp->dynticks_nmi_nesting, 0);
 	WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
 	rcu_dynticks_eqs_enter();
 }
@@ -1057,6 +1060,9 @@ void rcu_nmi_enter(void)
 		rcu_dynticks_eqs_exit();
 		incby = 1;
 	}
+	trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
+			  rdtp->dynticks_nmi_nesting,
+			  rdtp->dynticks_nmi_nesting + incby);
 	WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */
 		   rdtp->dynticks_nmi_nesting + incby);
 	barrier();
-- 
cgit v1.2.1


From 84585aa8b6ad24e5bdfba9db4a320a6aeed192ab Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 4 Oct 2017 15:55:16 -0700
Subject: rcu: Shrink ->dynticks_{nmi_,}nesting from long long to long

Because the ->dynticks_nesting field now only contains the process-based
nesting level instead of a value encoding both the process nesting level
and the irq "nesting" level, we no longer need a long long, even on
32-bit systems.  This commit therefore changes both the ->dynticks_nesting
and ->dynticks_nmi_nesting fields to long.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/rcu.h         | 2 +-
 kernel/rcu/tree.c        | 6 +++---
 kernel/rcu/tree.h        | 4 ++--
 kernel/rcu/tree_plugin.h | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index afe0559d1867..6334f2c1abd0 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -31,7 +31,7 @@
 #endif /* #else #ifdef CONFIG_RCU_TRACE */
 
 /* Offset to allow for unmatched rcu_irq_{enter,exit}(). */
-#define DYNTICK_IRQ_NONIDLE	((INT_MAX / 2) + 1)
+#define DYNTICK_IRQ_NONIDLE	((LONG_MAX / 2) + 1)
 
 
 /*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index d069ba2d8412..92de3bacda07 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -946,7 +946,7 @@ void rcu_irq_exit_irqson(void)
  * we really have exited idle, and must do the appropriate accounting.
  * The caller must have disabled interrupts.
  */
-static void rcu_eqs_exit_common(long long newval, int user)
+static void rcu_eqs_exit_common(long newval, int user)
 {
 	RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);)
 
@@ -979,7 +979,7 @@ static void rcu_eqs_exit_common(long long newval, int user)
 static void rcu_eqs_exit(bool user)
 {
 	struct rcu_dynticks *rdtp;
-	long long oldval;
+	long oldval;
 
 	lockdep_assert_irqs_disabled();
 	rdtp = this_cpu_ptr(&rcu_dynticks);
@@ -1043,7 +1043,7 @@ void rcu_user_exit(void)
 void rcu_nmi_enter(void)
 {
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
-	int incby = 2;
+	long incby = 2;
 
 	/* Complain about underflow. */
 	WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index dbd7e3753bed..6488a3b0e729 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -38,8 +38,8 @@
  * Dynticks per-CPU state.
  */
 struct rcu_dynticks {
-	long long dynticks_nesting; /* Track irq/process nesting level. */
-	int dynticks_nmi_nesting;   /* Track NMI nesting level. */
+	long dynticks_nesting;      /* Track process nesting level. */
+	long dynticks_nmi_nesting;  /* Track irq/NMI nesting level. */
 	atomic_t dynticks;	    /* Even value for idle, else odd. */
 	bool rcu_need_heavy_qs;     /* GP old, need heavy quiescent state. */
 	unsigned long rcu_qs_ctr;   /* Light universal quiescent state ctr. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index db85ca3975f1..e94e754464cd 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1687,7 +1687,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
 	}
 	print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
 	delta = rdp->mynode->gpnum - rdp->rcu_iw_gpnum;
-	pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n",
+	pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%ld softirq=%u/%u fqs=%ld %s\n",
 	       cpu,
 	       "O."[!!cpu_online(cpu)],
 	       "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
-- 
cgit v1.2.1


From dec98900eae1e22467182e58688abe5fae98bd5f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 4 Oct 2017 16:24:29 -0700
Subject: rcu: Add ->dynticks field to rcu_dyntick trace event

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 92de3bacda07..5febb76809f6 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -761,13 +761,13 @@ static void rcu_eqs_enter_common(bool user)
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
 
 	lockdep_assert_irqs_disabled();
-	trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0);
+	trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0, rdtp->dynticks);
 	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
 	    !user && !is_idle_task(current)) {
 		struct task_struct *idle __maybe_unused =
 			idle_task(smp_processor_id());
 
-		trace_rcu_dyntick(TPS("Error on entry: not idle task"), rdtp->dynticks_nesting, 0);
+		trace_rcu_dyntick(TPS("Error on entry: not idle task"), rdtp->dynticks_nesting, 0, rdtp->dynticks);
 		rcu_ftrace_dump(DUMP_ORIG);
 		WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
 			  current->pid, current->comm,
@@ -880,15 +880,14 @@ void rcu_nmi_exit(void)
 	 * leave it in non-RCU-idle state.
 	 */
 	if (rdtp->dynticks_nmi_nesting != 1) {
-		trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nmi_nesting,
-				  rdtp->dynticks_nmi_nesting - 2);
+		trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nmi_nesting, rdtp->dynticks_nmi_nesting - 2, rdtp->dynticks);
 		WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */
 			   rdtp->dynticks_nmi_nesting - 2);
 		return;
 	}
 
 	/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
-	trace_rcu_dyntick(TPS("Startirq"), rdtp->dynticks_nmi_nesting, 0);
+	trace_rcu_dyntick(TPS("Startirq"), rdtp->dynticks_nmi_nesting, 0, rdtp->dynticks);
 	WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
 	rcu_dynticks_eqs_enter();
 }
@@ -953,14 +952,13 @@ static void rcu_eqs_exit_common(long newval, int user)
 	rcu_dynticks_task_exit();
 	rcu_dynticks_eqs_exit();
 	rcu_cleanup_after_idle();
-	trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, newval);
+	trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, newval, rdtp->dynticks);
 	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
 	    !user && !is_idle_task(current)) {
 		struct task_struct *idle __maybe_unused =
 			idle_task(smp_processor_id());
 
-		trace_rcu_dyntick(TPS("Error on exit: not idle task"),
-				  rdtp->dynticks_nesting, newval);
+		trace_rcu_dyntick(TPS("Error on exit: not idle task"), rdtp->dynticks_nesting, newval, rdtp->dynticks);
 		rcu_ftrace_dump(DUMP_ORIG);
 		WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
 			  current->pid, current->comm,
@@ -1062,7 +1060,7 @@ void rcu_nmi_enter(void)
 	}
 	trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
 			  rdtp->dynticks_nmi_nesting,
-			  rdtp->dynticks_nmi_nesting + incby);
+			  rdtp->dynticks_nmi_nesting + incby, rdtp->dynticks);
 	WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */
 		   rdtp->dynticks_nmi_nesting + incby);
 	barrier();
-- 
cgit v1.2.1


From 914955e18ca09fc404d7fc3614bb04c96a03692c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 5 Oct 2017 13:50:57 -0700
Subject: rcu: Stop duplicating lockdep checks in RCU's idle-entry code

The three RCU_LOCKDEP_WARN() calls in rcu_eqs_enter_common() are
redundant with other lockdep checks, so this commit removes them.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 5febb76809f6..80cada11f544 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -781,17 +781,6 @@ static void rcu_eqs_enter_common(bool user)
 	rdtp->dynticks_nesting = 0;
 	rcu_dynticks_eqs_enter();
 	rcu_dynticks_task_enter();
-
-	/*
-	 * It is illegal to enter an extended quiescent state while
-	 * in an RCU read-side critical section.
-	 */
-	RCU_LOCKDEP_WARN(lock_is_held(&rcu_lock_map),
-			 "Illegal idle entry in RCU read-side critical section.");
-	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map),
-			 "Illegal idle entry in RCU-bh read-side critical section.");
-	RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map),
-			 "Illegal idle entry in RCU-sched read-side critical section.");
 }
 
 /*
-- 
cgit v1.2.1


From 2342172fd6c148506456862d795c7f155baf6797 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 5 Oct 2017 15:03:10 -0700
Subject: rcu: Avoid ->dynticks_nesting store tearing

Although ->dynticks_nesting is updated only by process level, it is
accessed from hardirq to check for interrupt-from-idle quiescent states.
Store tearing is thus possible, so this commit applies WRITE_ONCE()
to ->dynticks_nesting stores.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 80cada11f544..b2ded4d436c6 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -778,7 +778,7 @@ static void rcu_eqs_enter_common(bool user)
 		do_nocb_deferred_wakeup(rdp);
 	}
 	rcu_prepare_for_idle();
-	rdtp->dynticks_nesting = 0;
+	WRITE_ONCE(rdtp->dynticks_nesting, 0); /* Avoid irq-access tearing. */
 	rcu_dynticks_eqs_enter();
 	rcu_dynticks_task_enter();
 }
@@ -976,7 +976,7 @@ static void rcu_eqs_exit(bool user)
 		rdtp->dynticks_nesting++;
 	} else {
 		rcu_eqs_exit_common(1, user);
-		rdtp->dynticks_nesting = 1;
+		WRITE_ONCE(rdtp->dynticks_nesting, 1);
 		WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
 	}
 }
@@ -3713,7 +3713,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 	if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */
 	    !init_nocb_callback_list(rdp))
 		rcu_segcblist_init(&rdp->cblist);  /* Re-enable callbacks. */
-	rdp->dynticks->dynticks_nesting = 1;
+	rdp->dynticks->dynticks_nesting = 1;	/* CPU not up, no tearing. */
 	rcu_dynticks_eqs_online();
 	raw_spin_unlock_rcu_node(rnp);		/* irqs remain disabled. */
 
-- 
cgit v1.2.1


From 215bba9f59e35e64b9936da62632b2fa3ede647c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 5 Oct 2017 16:37:03 -0700
Subject: rcu: Fold rcu_eqs_enter_common() into rcu_eqs_enter()

There is now only one call to rcu_eqs_enter_common() and there is no other
reason to keep it separate.  This commit therefore inlines it into its
sole call site, saving a few lines of code in the process.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 43 ++++++++++++++++---------------------------
 1 file changed, 16 insertions(+), 27 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index b2ded4d436c6..5c8a5796c71f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -749,16 +749,27 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
 }
 
 /*
- * rcu_eqs_enter_common - current CPU is entering an extended quiescent state
+ * Enter an RCU extended quiescent state, which can be either the
+ * idle loop or adaptive-tickless usermode execution.
  *
- * Enter idle, doing appropriate accounting.  The caller must have
- * disabled interrupts.
+ * We crowbar the ->dynticks_nmi_nesting field to zero to allow for
+ * the possibility of usermode upcalls having messed up our count
+ * of interrupt nesting level during the prior busy period.
  */
-static void rcu_eqs_enter_common(bool user)
+static void rcu_eqs_enter(bool user)
 {
 	struct rcu_state *rsp;
 	struct rcu_data *rdp;
-	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
+	struct rcu_dynticks *rdtp;
+
+	rdtp = this_cpu_ptr(&rcu_dynticks);
+	WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0);
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+		     rdtp->dynticks_nesting == 0);
+	if (rdtp->dynticks_nesting != 1) {
+		rdtp->dynticks_nesting--;
+		return;
+	}
 
 	lockdep_assert_irqs_disabled();
 	trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0, rdtp->dynticks);
@@ -783,28 +794,6 @@ static void rcu_eqs_enter_common(bool user)
 	rcu_dynticks_task_enter();
 }
 
-/*
- * Enter an RCU extended quiescent state, which can be either the
- * idle loop or adaptive-tickless usermode execution.
- *
- * We crowbar the ->dynticks_nmi_nesting field to zero to allow for
- * the possibility of usermode upcalls having messed up our count
- * of interrupt nesting level during the prior busy period.
- */
-static void rcu_eqs_enter(bool user)
-{
-	struct rcu_dynticks *rdtp;
-
-	rdtp = this_cpu_ptr(&rcu_dynticks);
-	WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0);
-	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
-		     rdtp->dynticks_nesting == 0);
-	if (rdtp->dynticks_nesting == 1)
-		rcu_eqs_enter_common(user);
-	else
-		rdtp->dynticks_nesting--;
-}
-
 /**
  * rcu_idle_enter - inform RCU that current CPU is entering idle
  *
-- 
cgit v1.2.1


From 9dd238e28640d5514bbd0ff2d425f32409981d85 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 5 Oct 2017 16:56:26 -0700
Subject: rcu: Fold rcu_eqs_exit_common() into rcu_eqs_exit()

There is now only one call to rcu_eqs_exit_common() and there is no other
reason to keep it separate.  This commit therefore inlines it into its
sole call site, saving a few lines of code in the process.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 50 ++++++++++++++++++--------------------------------
 1 file changed, 18 insertions(+), 32 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 5c8a5796c71f..46a8e06bf03e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -916,34 +916,6 @@ void rcu_irq_exit_irqson(void)
 	local_irq_restore(flags);
 }
 
-/*
- * rcu_eqs_exit_common - current CPU moving away from extended quiescent state
- *
- * If the new value of the ->dynticks_nesting counter was previously zero,
- * we really have exited idle, and must do the appropriate accounting.
- * The caller must have disabled interrupts.
- */
-static void rcu_eqs_exit_common(long newval, int user)
-{
-	RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);)
-
-	rcu_dynticks_task_exit();
-	rcu_dynticks_eqs_exit();
-	rcu_cleanup_after_idle();
-	trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, newval, rdtp->dynticks);
-	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
-	    !user && !is_idle_task(current)) {
-		struct task_struct *idle __maybe_unused =
-			idle_task(smp_processor_id());
-
-		trace_rcu_dyntick(TPS("Error on exit: not idle task"), rdtp->dynticks_nesting, newval, rdtp->dynticks);
-		rcu_ftrace_dump(DUMP_ORIG);
-		WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
-			  current->pid, current->comm,
-			  idle->pid, idle->comm); /* must be idle task! */
-	}
-}
-
 /*
  * Exit an RCU extended quiescent state, which can be either the
  * idle loop or adaptive-tickless usermode execution.
@@ -963,11 +935,25 @@ static void rcu_eqs_exit(bool user)
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
 	if (oldval) {
 		rdtp->dynticks_nesting++;
-	} else {
-		rcu_eqs_exit_common(1, user);
-		WRITE_ONCE(rdtp->dynticks_nesting, 1);
-		WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
+		return;
+	}
+	rcu_dynticks_task_exit();
+	rcu_dynticks_eqs_exit();
+	rcu_cleanup_after_idle();
+	trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, 1, rdtp->dynticks);
+	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
+	    !user && !is_idle_task(current)) {
+		struct task_struct *idle __maybe_unused =
+			idle_task(smp_processor_id());
+
+		trace_rcu_dyntick(TPS("Error on exit: not idle task"), rdtp->dynticks_nesting, 1, rdtp->dynticks);
+		rcu_ftrace_dump(DUMP_ORIG);
+		WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
+			  current->pid, current->comm,
+			  idle->pid, idle->comm); /* must be idle task! */
 	}
+	WRITE_ONCE(rdtp->dynticks_nesting, 1);
+	WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
 }
 
 /**
-- 
cgit v1.2.1


From e68bbb266dcfed201d8d54a2828ef820d747f083 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 5 Oct 2017 19:55:31 -0700
Subject: rcu: Simplify rcu_eqs_{enter,exit}() non-idle task debug code

The code that checks for non-idle non-nohz_idle-usermode tasks invoking
rcu_eqs_enter() and rcu_eqs_exit() prints a considerable quantity of
helpful information.  However, these checks fire rarely, so the extra
complexity is no longer worth it.  This commit therefore replaces this
debug code with simple WARN_ON_ONCE() statements.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 24 ++----------------------
 1 file changed, 2 insertions(+), 22 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 46a8e06bf03e..4d374d2bc925 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -773,17 +773,7 @@ static void rcu_eqs_enter(bool user)
 
 	lockdep_assert_irqs_disabled();
 	trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0, rdtp->dynticks);
-	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
-	    !user && !is_idle_task(current)) {
-		struct task_struct *idle __maybe_unused =
-			idle_task(smp_processor_id());
-
-		trace_rcu_dyntick(TPS("Error on entry: not idle task"), rdtp->dynticks_nesting, 0, rdtp->dynticks);
-		rcu_ftrace_dump(DUMP_ORIG);
-		WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
-			  current->pid, current->comm,
-			  idle->pid, idle->comm); /* must be idle task! */
-	}
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
 	for_each_rcu_flavor(rsp) {
 		rdp = this_cpu_ptr(rsp->rda);
 		do_nocb_deferred_wakeup(rdp);
@@ -941,17 +931,7 @@ static void rcu_eqs_exit(bool user)
 	rcu_dynticks_eqs_exit();
 	rcu_cleanup_after_idle();
 	trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, 1, rdtp->dynticks);
-	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
-	    !user && !is_idle_task(current)) {
-		struct task_struct *idle __maybe_unused =
-			idle_task(smp_processor_id());
-
-		trace_rcu_dyntick(TPS("Error on exit: not idle task"), rdtp->dynticks_nesting, 1, rdtp->dynticks);
-		rcu_ftrace_dump(DUMP_ORIG);
-		WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
-			  current->pid, current->comm,
-			  idle->pid, idle->comm); /* must be idle task! */
-	}
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
 	WRITE_ONCE(rdtp->dynticks_nesting, 1);
 	WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
 }
-- 
cgit v1.2.1


From d633198088bd9e358566c470ed182994403acc7a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 10 Oct 2017 13:52:30 -0700
Subject: srcu: Prohibit call_srcu() use under raw spinlocks

Invoking queue_delayed_work() while holding a raw spinlock is forbidden
in -rt kernels, which is exactly what __call_srcu() does, indirectly via
srcu_funnel_gp_start().  This commit therefore downgrades Tree SRCU's
locking from raw to non-raw spinlocks, which works because call_srcu()
is not ever called while holding a raw spinlock.

Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/srcutree.c | 109 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 68 insertions(+), 41 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 6d5880089ff6..d5cea81378cc 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -53,6 +53,33 @@ static void srcu_invoke_callbacks(struct work_struct *work);
 static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
 static void process_srcu(struct work_struct *work);
 
+/* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
+#define spin_lock_rcu_node(p)					\
+do {									\
+	spin_lock(&ACCESS_PRIVATE(p, lock));			\
+	smp_mb__after_unlock_lock();					\
+} while (0)
+
+#define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock))
+
+#define spin_lock_irq_rcu_node(p)					\
+do {									\
+	spin_lock_irq(&ACCESS_PRIVATE(p, lock));			\
+	smp_mb__after_unlock_lock();					\
+} while (0)
+
+#define spin_unlock_irq_rcu_node(p)					\
+	spin_unlock_irq(&ACCESS_PRIVATE(p, lock))
+
+#define spin_lock_irqsave_rcu_node(p, flags)			\
+do {									\
+	spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags);	\
+	smp_mb__after_unlock_lock();					\
+} while (0)
+
+#define spin_unlock_irqrestore_rcu_node(p, flags)			\
+	spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags)	\
+
 /*
  * Initialize SRCU combining tree.  Note that statically allocated
  * srcu_struct structures might already have srcu_read_lock() and
@@ -77,7 +104,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
 
 	/* Each pass through this loop initializes one srcu_node structure. */
 	rcu_for_each_node_breadth_first(sp, snp) {
-		raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock));
+		spin_lock_init(&ACCESS_PRIVATE(snp, lock));
 		WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) !=
 			     ARRAY_SIZE(snp->srcu_data_have_cbs));
 		for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
@@ -111,7 +138,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
 	snp_first = sp->level[level];
 	for_each_possible_cpu(cpu) {
 		sdp = per_cpu_ptr(sp->sda, cpu);
-		raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
+		spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
 		rcu_segcblist_init(&sdp->srcu_cblist);
 		sdp->srcu_cblist_invoking = false;
 		sdp->srcu_gp_seq_needed = sp->srcu_gp_seq;
@@ -170,7 +197,7 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name,
 	/* Don't re-initialize a lock while it is held. */
 	debug_check_no_locks_freed((void *)sp, sizeof(*sp));
 	lockdep_init_map(&sp->dep_map, name, key, 0);
-	raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock));
+	spin_lock_init(&ACCESS_PRIVATE(sp, lock));
 	return init_srcu_struct_fields(sp, false);
 }
 EXPORT_SYMBOL_GPL(__init_srcu_struct);
@@ -187,7 +214,7 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct);
  */
 int init_srcu_struct(struct srcu_struct *sp)
 {
-	raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock));
+	spin_lock_init(&ACCESS_PRIVATE(sp, lock));
 	return init_srcu_struct_fields(sp, false);
 }
 EXPORT_SYMBOL_GPL(init_srcu_struct);
@@ -210,13 +237,13 @@ static void check_init_srcu_struct(struct srcu_struct *sp)
 	/* The smp_load_acquire() pairs with the smp_store_release(). */
 	if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/
 		return; /* Already initialized. */
-	raw_spin_lock_irqsave_rcu_node(sp, flags);
+	spin_lock_irqsave_rcu_node(sp, flags);
 	if (!rcu_seq_state(sp->srcu_gp_seq_needed)) {
-		raw_spin_unlock_irqrestore_rcu_node(sp, flags);
+		spin_unlock_irqrestore_rcu_node(sp, flags);
 		return;
 	}
 	init_srcu_struct_fields(sp, true);
-	raw_spin_unlock_irqrestore_rcu_node(sp, flags);
+	spin_unlock_irqrestore_rcu_node(sp, flags);
 }
 
 /*
@@ -513,7 +540,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
 	mutex_lock(&sp->srcu_cb_mutex);
 
 	/* End the current grace period. */
-	raw_spin_lock_irq_rcu_node(sp);
+	spin_lock_irq_rcu_node(sp);
 	idx = rcu_seq_state(sp->srcu_gp_seq);
 	WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
 	cbdelay = srcu_get_delay(sp);
@@ -522,7 +549,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
 	gpseq = rcu_seq_current(&sp->srcu_gp_seq);
 	if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq))
 		sp->srcu_gp_seq_needed_exp = gpseq;
-	raw_spin_unlock_irq_rcu_node(sp);
+	spin_unlock_irq_rcu_node(sp);
 	mutex_unlock(&sp->srcu_gp_mutex);
 	/* A new grace period can start at this point.  But only one. */
 
@@ -530,7 +557,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
 	idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
 	idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs);
 	rcu_for_each_node_breadth_first(sp, snp) {
-		raw_spin_lock_irq_rcu_node(snp);
+		spin_lock_irq_rcu_node(snp);
 		cbs = false;
 		if (snp >= sp->level[rcu_num_lvls - 1])
 			cbs = snp->srcu_have_cbs[idx] == gpseq;
@@ -540,7 +567,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
 			snp->srcu_gp_seq_needed_exp = gpseq;
 		mask = snp->srcu_data_have_cbs[idx];
 		snp->srcu_data_have_cbs[idx] = 0;
-		raw_spin_unlock_irq_rcu_node(snp);
+		spin_unlock_irq_rcu_node(snp);
 		if (cbs)
 			srcu_schedule_cbs_snp(sp, snp, mask, cbdelay);
 
@@ -548,11 +575,11 @@ static void srcu_gp_end(struct srcu_struct *sp)
 		if (!(gpseq & counter_wrap_check))
 			for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
 				sdp = per_cpu_ptr(sp->sda, cpu);
-				raw_spin_lock_irqsave_rcu_node(sdp, flags);
+				spin_lock_irqsave_rcu_node(sdp, flags);
 				if (ULONG_CMP_GE(gpseq,
 						 sdp->srcu_gp_seq_needed + 100))
 					sdp->srcu_gp_seq_needed = gpseq;
-				raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
+				spin_unlock_irqrestore_rcu_node(sdp, flags);
 			}
 	}
 
@@ -560,17 +587,17 @@ static void srcu_gp_end(struct srcu_struct *sp)
 	mutex_unlock(&sp->srcu_cb_mutex);
 
 	/* Start a new grace period if needed. */
-	raw_spin_lock_irq_rcu_node(sp);
+	spin_lock_irq_rcu_node(sp);
 	gpseq = rcu_seq_current(&sp->srcu_gp_seq);
 	if (!rcu_seq_state(gpseq) &&
 	    ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) {
 		srcu_gp_start(sp);
-		raw_spin_unlock_irq_rcu_node(sp);
+		spin_unlock_irq_rcu_node(sp);
 		/* Throttle expedited grace periods: Should be rare! */
 		srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff
 				    ? 0 : SRCU_INTERVAL);
 	} else {
-		raw_spin_unlock_irq_rcu_node(sp);
+		spin_unlock_irq_rcu_node(sp);
 	}
 }
 
@@ -590,18 +617,18 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp,
 		if (rcu_seq_done(&sp->srcu_gp_seq, s) ||
 		    ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s))
 			return;
-		raw_spin_lock_irqsave_rcu_node(snp, flags);
+		spin_lock_irqsave_rcu_node(snp, flags);
 		if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) {
-			raw_spin_unlock_irqrestore_rcu_node(snp, flags);
+			spin_unlock_irqrestore_rcu_node(snp, flags);
 			return;
 		}
 		WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
-		raw_spin_unlock_irqrestore_rcu_node(snp, flags);
+		spin_unlock_irqrestore_rcu_node(snp, flags);
 	}
-	raw_spin_lock_irqsave_rcu_node(sp, flags);
+	spin_lock_irqsave_rcu_node(sp, flags);
 	if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
 		sp->srcu_gp_seq_needed_exp = s;
-	raw_spin_unlock_irqrestore_rcu_node(sp, flags);
+	spin_unlock_irqrestore_rcu_node(sp, flags);
 }
 
 /*
@@ -623,12 +650,12 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
 	for (; snp != NULL; snp = snp->srcu_parent) {
 		if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode)
 			return; /* GP already done and CBs recorded. */
-		raw_spin_lock_irqsave_rcu_node(snp, flags);
+		spin_lock_irqsave_rcu_node(snp, flags);
 		if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) {
 			snp_seq = snp->srcu_have_cbs[idx];
 			if (snp == sdp->mynode && snp_seq == s)
 				snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
-			raw_spin_unlock_irqrestore_rcu_node(snp, flags);
+			spin_unlock_irqrestore_rcu_node(snp, flags);
 			if (snp == sdp->mynode && snp_seq != s) {
 				srcu_schedule_cbs_sdp(sdp, do_norm
 							   ? SRCU_INTERVAL
@@ -644,11 +671,11 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
 			snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
 		if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s))
 			snp->srcu_gp_seq_needed_exp = s;
-		raw_spin_unlock_irqrestore_rcu_node(snp, flags);
+		spin_unlock_irqrestore_rcu_node(snp, flags);
 	}
 
 	/* Top of tree, must ensure the grace period will be started. */
-	raw_spin_lock_irqsave_rcu_node(sp, flags);
+	spin_lock_irqsave_rcu_node(sp, flags);
 	if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) {
 		/*
 		 * Record need for grace period s.  Pair with load
@@ -667,7 +694,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
 		queue_delayed_work(system_power_efficient_wq, &sp->work,
 				   srcu_get_delay(sp));
 	}
-	raw_spin_unlock_irqrestore_rcu_node(sp, flags);
+	spin_unlock_irqrestore_rcu_node(sp, flags);
 }
 
 /*
@@ -830,7 +857,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 	rhp->func = func;
 	local_irq_save(flags);
 	sdp = this_cpu_ptr(sp->sda);
-	raw_spin_lock_rcu_node(sdp);
+	spin_lock_rcu_node(sdp);
 	rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false);
 	rcu_segcblist_advance(&sdp->srcu_cblist,
 			      rcu_seq_current(&sp->srcu_gp_seq));
@@ -844,7 +871,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 		sdp->srcu_gp_seq_needed_exp = s;
 		needexp = true;
 	}
-	raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
+	spin_unlock_irqrestore_rcu_node(sdp, flags);
 	if (needgp)
 		srcu_funnel_gp_start(sp, sdp, s, do_norm);
 	else if (needexp)
@@ -900,7 +927,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm)
 
 	/*
 	 * Make sure that later code is ordered after the SRCU grace
-	 * period.  This pairs with the raw_spin_lock_irq_rcu_node()
+	 * period.  This pairs with the spin_lock_irq_rcu_node()
 	 * in srcu_invoke_callbacks().  Unlike Tree RCU, this is needed
 	 * because the current CPU might have been totally uninvolved with
 	 * (and thus unordered against) that grace period.
@@ -1024,7 +1051,7 @@ void srcu_barrier(struct srcu_struct *sp)
 	 */
 	for_each_possible_cpu(cpu) {
 		sdp = per_cpu_ptr(sp->sda, cpu);
-		raw_spin_lock_irq_rcu_node(sdp);
+		spin_lock_irq_rcu_node(sdp);
 		atomic_inc(&sp->srcu_barrier_cpu_cnt);
 		sdp->srcu_barrier_head.func = srcu_barrier_cb;
 		debug_rcu_head_queue(&sdp->srcu_barrier_head);
@@ -1033,7 +1060,7 @@ void srcu_barrier(struct srcu_struct *sp)
 			debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
 			atomic_dec(&sp->srcu_barrier_cpu_cnt);
 		}
-		raw_spin_unlock_irq_rcu_node(sdp);
+		spin_unlock_irq_rcu_node(sdp);
 	}
 
 	/* Remove the initial count, at which point reaching zero can happen. */
@@ -1082,17 +1109,17 @@ static void srcu_advance_state(struct srcu_struct *sp)
 	 */
 	idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */
 	if (idx == SRCU_STATE_IDLE) {
-		raw_spin_lock_irq_rcu_node(sp);
+		spin_lock_irq_rcu_node(sp);
 		if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
 			WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq));
-			raw_spin_unlock_irq_rcu_node(sp);
+			spin_unlock_irq_rcu_node(sp);
 			mutex_unlock(&sp->srcu_gp_mutex);
 			return;
 		}
 		idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
 		if (idx == SRCU_STATE_IDLE)
 			srcu_gp_start(sp);
-		raw_spin_unlock_irq_rcu_node(sp);
+		spin_unlock_irq_rcu_node(sp);
 		if (idx != SRCU_STATE_IDLE) {
 			mutex_unlock(&sp->srcu_gp_mutex);
 			return; /* Someone else started the grace period. */
@@ -1141,19 +1168,19 @@ static void srcu_invoke_callbacks(struct work_struct *work)
 	sdp = container_of(work, struct srcu_data, work.work);
 	sp = sdp->sp;
 	rcu_cblist_init(&ready_cbs);
-	raw_spin_lock_irq_rcu_node(sdp);
+	spin_lock_irq_rcu_node(sdp);
 	rcu_segcblist_advance(&sdp->srcu_cblist,
 			      rcu_seq_current(&sp->srcu_gp_seq));
 	if (sdp->srcu_cblist_invoking ||
 	    !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
-		raw_spin_unlock_irq_rcu_node(sdp);
+		spin_unlock_irq_rcu_node(sdp);
 		return;  /* Someone else on the job or nothing to do. */
 	}
 
 	/* We are on the job!  Extract and invoke ready callbacks. */
 	sdp->srcu_cblist_invoking = true;
 	rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs);
-	raw_spin_unlock_irq_rcu_node(sdp);
+	spin_unlock_irq_rcu_node(sdp);
 	rhp = rcu_cblist_dequeue(&ready_cbs);
 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
 		debug_rcu_head_unqueue(rhp);
@@ -1166,13 +1193,13 @@ static void srcu_invoke_callbacks(struct work_struct *work)
 	 * Update counts, accelerate new callbacks, and if needed,
 	 * schedule another round of callback invocation.
 	 */
-	raw_spin_lock_irq_rcu_node(sdp);
+	spin_lock_irq_rcu_node(sdp);
 	rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs);
 	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
 				       rcu_seq_snap(&sp->srcu_gp_seq));
 	sdp->srcu_cblist_invoking = false;
 	more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
-	raw_spin_unlock_irq_rcu_node(sdp);
+	spin_unlock_irq_rcu_node(sdp);
 	if (more)
 		srcu_schedule_cbs_sdp(sdp, 0);
 }
@@ -1185,7 +1212,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
 {
 	bool pushgp = true;
 
-	raw_spin_lock_irq_rcu_node(sp);
+	spin_lock_irq_rcu_node(sp);
 	if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
 		if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) {
 			/* All requests fulfilled, time to go idle. */
@@ -1195,7 +1222,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
 		/* Outstanding request and no GP.  Start one. */
 		srcu_gp_start(sp);
 	}
-	raw_spin_unlock_irq_rcu_node(sp);
+	spin_unlock_irq_rcu_node(sp);
 
 	if (pushgp)
 		queue_delayed_work(system_power_efficient_wq, &sp->work, delay);
-- 
cgit v1.2.1


From dac95906003fec1b4801115830cc14ec61c74960 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 4 Oct 2017 11:23:10 -0700
Subject: torture: Suppress CPU stall warnings during shutdown ftrace dump

The torture_shutdown() function directly invokes ftrace_dump(), which
can result in RCU CPU stall warnings when the ftrace buffer is large,
which it usually is.  This commit therefore invoks rcu_ftrace_dump()
in place of ftrace_dump(), suppressing RCU CPU stall warnings during
this time.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/torture.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/torture.c b/kernel/torture.c
index 637e172835d8..52781e838541 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -47,6 +47,7 @@
 #include <linux/ktime.h>
 #include <asm/byteorder.h>
 #include <linux/torture.h>
+#include "rcu/rcu.h"
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com>");
@@ -500,7 +501,7 @@ static int torture_shutdown(void *arg)
 		torture_shutdown_hook();
 	else
 		VERBOSE_TOROUT_STRING("No torture_shutdown_hook(), skipping.");
-	ftrace_dump(DUMP_ALL);
+	rcu_ftrace_dump(DUMP_ALL);
 	kernel_power_off();	/* Shut down the system. */
 	return 0;
 }
-- 
cgit v1.2.1


From a0982dfa03efca6c239c52cabebcea4afb93ea6b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 13 Oct 2017 16:24:28 -0700
Subject: sched: Stop resched_cpu() from sending IPIs to offline CPUs

The rcutorture test suite occasionally provokes a splat due to invoking
resched_cpu() on an offline CPU:

WARNING: CPU: 2 PID: 8 at /home/paulmck/public_git/linux-rcu/arch/x86/kernel/smp.c:128 native_smp_send_reschedule+0x37/0x40
Modules linked in:
CPU: 2 PID: 8 Comm: rcu_preempt Not tainted 4.14.0-rc4+ #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
task: ffff902ede9daf00 task.stack: ffff96c50010c000
RIP: 0010:native_smp_send_reschedule+0x37/0x40
RSP: 0018:ffff96c50010fdb8 EFLAGS: 00010096
RAX: 000000000000002e RBX: ffff902edaab4680 RCX: 0000000000000003
RDX: 0000000080000003 RSI: 0000000000000000 RDI: 00000000ffffffff
RBP: ffff96c50010fdb8 R08: 0000000000000000 R09: 0000000000000001
R10: 0000000000000000 R11: 00000000299f36ae R12: 0000000000000001
R13: ffffffff9de64240 R14: 0000000000000001 R15: ffffffff9de64240
FS:  0000000000000000(0000) GS:ffff902edfc80000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000f7d4c642 CR3: 000000001e0e2000 CR4: 00000000000006e0
Call Trace:
 resched_curr+0x8f/0x1c0
 resched_cpu+0x2c/0x40
 rcu_implicit_dynticks_qs+0x152/0x220
 force_qs_rnp+0x147/0x1d0
 ? sync_rcu_exp_select_cpus+0x450/0x450
 rcu_gp_kthread+0x5a9/0x950
 kthread+0x142/0x180
 ? force_qs_rnp+0x1d0/0x1d0
 ? kthread_create_on_node+0x40/0x40
 ret_from_fork+0x27/0x40
Code: 14 01 0f 92 c0 84 c0 74 14 48 8b 05 14 4f f4 00 be fd 00 00 00 ff 90 a0 00 00 00 5d c3 89 fe 48 c7 c7 38 89 ca 9d e8 e5 56 08 00 <0f> ff 5d c3 0f 1f 44 00 00 8b 05 52 9e 37 02 85 c0 75 38 55 48
---[ end trace 26df9e5df4bba4ac ]---

This splat cannot be generated by expedited grace periods because they
always invoke resched_cpu() on the current CPU, which is good because
expedited grace periods require that resched_cpu() unconditionally
succeed.  However, other parts of RCU can tolerate resched_cpu() acting
as a no-op, at least as long as it doesn't happen too often.

This commit therefore makes resched_cpu() invoke resched_curr() only if
the CPU is either online or is the current CPU.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 kernel/sched/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 75554f366fd3..c85dfb746f8c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -508,7 +508,8 @@ void resched_cpu(int cpu)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&rq->lock, flags);
-	resched_curr(rq);
+	if (cpu_online(cpu) || cpu == smp_processor_id())
+		resched_curr(rq);
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 
-- 
cgit v1.2.1


From 2fe2582649aa2355f79acddb86bd4d6c5363eb63 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 13 Oct 2017 17:00:18 -0700
Subject: sched: Stop switched_to_rt() from sending IPIs to offline CPUs

The rcutorture test suite occasionally provokes a splat due to invoking
rt_mutex_lock() which needs to boost the priority of a task currently
sitting on a runqueue that belongs to an offline CPU:

WARNING: CPU: 0 PID: 12 at /home/paulmck/public_git/linux-rcu/arch/x86/kernel/smp.c:128 native_smp_send_reschedule+0x37/0x40
Modules linked in:
CPU: 0 PID: 12 Comm: rcub/7 Not tainted 4.14.0-rc4+ #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
task: ffff9ed3de5f8cc0 task.stack: ffffbbf80012c000
RIP: 0010:native_smp_send_reschedule+0x37/0x40
RSP: 0018:ffffbbf80012fd10 EFLAGS: 00010082
RAX: 000000000000002f RBX: ffff9ed3dd9cb300 RCX: 0000000000000004
RDX: 0000000080000004 RSI: 0000000000000086 RDI: 00000000ffffffff
RBP: ffffbbf80012fd10 R08: 000000000009da7a R09: 0000000000007b9d
R10: 0000000000000001 R11: ffffffffbb57c2cd R12: 000000000000000d
R13: ffff9ed3de5f8cc0 R14: 0000000000000061 R15: ffff9ed3ded59200
FS:  0000000000000000(0000) GS:ffff9ed3dea00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000080686f0 CR3: 000000001b9e0000 CR4: 00000000000006f0
Call Trace:
 resched_curr+0x61/0xd0
 switched_to_rt+0x8f/0xa0
 rt_mutex_setprio+0x25c/0x410
 task_blocks_on_rt_mutex+0x1b3/0x1f0
 rt_mutex_slowlock+0xa9/0x1e0
 rt_mutex_lock+0x29/0x30
 rcu_boost_kthread+0x127/0x3c0
 kthread+0x104/0x140
 ? rcu_report_unblock_qs_rnp+0x90/0x90
 ? kthread_create_on_node+0x40/0x40
 ret_from_fork+0x22/0x30
Code: f0 00 0f 92 c0 84 c0 74 14 48 8b 05 34 74 c5 00 be fd 00 00 00 ff 90 a0 00 00 00 5d c3 89 fe 48 c7 c7 a0 c6 fc b9 e8 d5 b5 06 00 <0f> ff 5d c3 0f 1f 44 00 00 8b 05 a2 d1 13 02 85 c0 75 38 55 48

But the target task's priority has already been adjusted, so the only
purpose of switched_to_rt() invoking resched_curr() is to wake up the
CPU running some task that needs to be preempted by the boosted task.
But the CPU is offline, which presumably means that the task must be
migrated to some other CPU, and that this other CPU will undertake any
needed preemption at the time of migration.  Because the runqueue lock
is held when resched_curr() is invoked, we know that the boosted task
cannot go anywhere, so it is not necessary to invoke resched_curr()
in this particular case.

This commit therefore makes switched_to_rt() refrain from invoking
resched_curr() when the target CPU is offline.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 kernel/sched/rt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 4056c19ca3f0..f242f642ef53 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2206,7 +2206,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
 		if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
 			queue_push_tasks(rq);
 #endif /* CONFIG_SMP */
-		if (p->prio < rq->curr->prio)
+		if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq)))
 			resched_curr(rq);
 	}
 }
-- 
cgit v1.2.1


From a7e6425ea5816947d3cb51fbf57351207c074383 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 24 Oct 2017 08:25:02 -0700
Subject: workqueue: Eliminate cond_resched_rcu_qs() in favor of cond_resched()

Now that cond_resched() also provides RCU quiescent states when
needed, it can be used in place of cond_resched_rcu_qs().  This
commit therefore makes this change.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <jiangshanlai@gmail.com>
---
 kernel/workqueue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8fdb710bfdd7..aee7eaab05cb 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2135,7 +2135,7 @@ __acquires(&pool->lock)
 	 * stop_machine. At the same time, report a quiescent RCU state so
 	 * the same condition doesn't freeze RCU.
 	 */
-	cond_resched_rcu_qs();
+	cond_resched();
 
 	spin_lock_irq(&pool->lock);
 
-- 
cgit v1.2.1


From e31d28b6ab8f0431e2288edb02723269f54d1471 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 24 Oct 2017 08:26:32 -0700
Subject: trace: Eliminate cond_resched_rcu_qs() in favor of cond_resched()

Now that cond_resched() also provides RCU quiescent states when
needed, it can be used in place of cond_resched_rcu_qs().  This
commit therefore makes this change.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@redhat.com>
---
 kernel/trace/trace_benchmark.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c
index 79f838a75077..22fee766081b 100644
--- a/kernel/trace/trace_benchmark.c
+++ b/kernel/trace/trace_benchmark.c
@@ -165,7 +165,7 @@ static int benchmark_event_kthread(void *arg)
 		 * this thread will never voluntarily schedule which would
 		 * block synchronize_rcu_tasks() indefinitely.
 		 */
-		cond_resched_rcu_qs();
+		cond_resched();
 	}
 
 	return 0;
-- 
cgit v1.2.1


From edf22f4ca26babcd8cba4f049c6be53f0e73bcc1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 24 Oct 2017 08:31:12 -0700
Subject: softirq: Eliminate cond_resched_rcu_qs() in favor of cond_resched()

Now that cond_resched() also provides RCU quiescent states when
needed, it can be used in place of cond_resched_rcu_qs().  This
commit therefore makes this change.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: NeilBrown <neilb@suse.com>
Cc: Ingo Molnar <mingo@kernel.org>
---
 kernel/softirq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/softirq.c b/kernel/softirq.c
index 2f5e87f1bae2..24d243ef8e71 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -665,7 +665,7 @@ static void run_ksoftirqd(unsigned int cpu)
 		 */
 		__do_softirq();
 		local_irq_enable();
-		cond_resched_rcu_qs();
+		cond_resched();
 		return;
 	}
 	local_irq_enable();
-- 
cgit v1.2.1


From 5c6338b4877038d28148fcfe1e7f862970ebaad1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 9 Oct 2017 11:08:53 -0700
Subject: uprobes: Remove now-redundant smp_read_barrier_depends()

Now that READ_ONCE() implies smp_read_barrier_depends(), the
get_xol_area() and get_trampoline_vaddr() no longer need their
smp_read_barrier_depends() calls, which this commit removes.
While we are here, convert the corresponding smp_wmb() to an
smp_store_release().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
 kernel/events/uprobes.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 267f6ef91d97..ce6848e46e94 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1167,8 +1167,8 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
 	}
 
 	ret = 0;
-	smp_wmb();	/* pairs with get_xol_area() */
-	mm->uprobes_state.xol_area = area;
+	/* pairs with get_xol_area() */
+	smp_store_release(&mm->uprobes_state.xol_area, area); /* ^^^ */
  fail:
 	up_write(&mm->mmap_sem);
 
@@ -1230,8 +1230,8 @@ static struct xol_area *get_xol_area(void)
 	if (!mm->uprobes_state.xol_area)
 		__create_xol_area(0);
 
-	area = mm->uprobes_state.xol_area;
-	smp_read_barrier_depends();	/* pairs with wmb in xol_add_vma() */
+	/* Pairs with xol_add_vma() smp_store_release() */
+	area = READ_ONCE(mm->uprobes_state.xol_area); /* ^^^ */
 	return area;
 }
 
@@ -1528,8 +1528,8 @@ static unsigned long get_trampoline_vaddr(void)
 	struct xol_area *area;
 	unsigned long trampoline_vaddr = -1;
 
-	area = current->mm->uprobes_state.xol_area;
-	smp_read_barrier_depends();
+	/* Pairs with xol_add_vma() smp_store_release() */
+	area = READ_ONCE(current->mm->uprobes_state.xol_area); /* ^^^ */
 	if (area)
 		trampoline_vaddr = area->vaddr;
 
-- 
cgit v1.2.1


From 548095dea63ffc016d39c35b32c628d033638aca Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 9 Oct 2017 11:22:50 -0700
Subject: locking: Remove smp_read_barrier_depends() from
 queued_spin_lock_slowpath()

Queued spinlocks are not used by DEC Alpha, and furthermore operations
such as READ_ONCE() and release/relaxed RMW atomics are being changed
to imply smp_read_barrier_depends().  This commit therefore removes the
now-redundant smp_read_barrier_depends() from queued_spin_lock_slowpath(),
and adjusts the comments accordingly.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
---
 kernel/locking/qspinlock.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 294294c71ba4..38ece035039e 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -170,7 +170,7 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
  * @tail : The new queue tail code word
  * Return: The previous queue tail code word
  *
- * xchg(lock, tail)
+ * xchg(lock, tail), which heads an address dependency
  *
  * p,*,* -> n,*,* ; prev = xchg(lock, node)
  */
@@ -409,13 +409,11 @@ queue:
 	if (old & _Q_TAIL_MASK) {
 		prev = decode_tail(old);
 		/*
-		 * The above xchg_tail() is also a load of @lock which generates,
-		 * through decode_tail(), a pointer.
-		 *
-		 * The address dependency matches the RELEASE of xchg_tail()
-		 * such that the access to @prev must happen after.
+		 * The above xchg_tail() is also a load of @lock which
+		 * generates, through decode_tail(), a pointer.  The address
+		 * dependency matches the RELEASE of xchg_tail() such that
+		 * the subsequent access to @prev happens after.
 		 */
-		smp_read_barrier_depends();
 
 		WRITE_ONCE(prev->next, node);
 
-- 
cgit v1.2.1


From 243d1a7977ae0814aa1ccb8bb87f8a4e0822ca8f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 9 Oct 2017 11:30:11 -0700
Subject: tracepoint: Remove smp_read_barrier_depends() from comment

The comment in tracepoint_add_func() mentions smp_read_barrier_depends(),
whose use should be quite restricted.  This commit updates the comment
to instead mention the smp_store_release() and rcu_dereference_sched()
that the current code actually uses.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
 kernel/tracepoint.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 685c50ae6300..671b13457387 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -212,11 +212,10 @@ static int tracepoint_add_func(struct tracepoint *tp,
 	}
 
 	/*
-	 * rcu_assign_pointer has a smp_wmb() which makes sure that the new
-	 * probe callbacks array is consistent before setting a pointer to it.
-	 * This array is referenced by __DO_TRACE from
-	 * include/linux/tracepoints.h. A matching smp_read_barrier_depends()
-	 * is used.
+	 * rcu_assign_pointer has as smp_store_release() which makes sure
+	 * that the new probe callbacks array is consistent before setting
+	 * a pointer to it.  This array is referenced by __DO_TRACE from
+	 * include/linux/tracepoint.h using rcu_dereference_sched().
 	 */
 	rcu_assign_pointer(tp->funcs, tp_funcs);
 	if (!static_key_enabled(&tp->key))
-- 
cgit v1.2.1


From efd88b02bb9e6b8b73a20ea611e5d07ed6d4af34 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 19 Oct 2017 14:52:41 -0700
Subject: rcu: Add comment giving debug strategy for double call_rcu()

The following statement has for some reason proven non-intuitive:

	WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != (count == 0));

This commit therefore adds a comment that states that this warning
usually triggers in response to a double call_rcu(), which is sort
of like a double free.  The comment also suggests building with
CONFIG_DEBUG_OBJECTS_RCU_HEAD=y to track down the double call_rcu().

Reported-by: David Howells <dhowells@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'kernel')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f9c0ca2ccf0c..1bdc0481aaf1 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2789,6 +2789,11 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 		rdp->n_force_qs_snap = rsp->n_force_qs;
 	} else if (count < rdp->qlen_last_fqs_check - qhimark)
 		rdp->qlen_last_fqs_check = count;
+
+	/*
+	 * The following usually indicates a double call_rcu().  To track
+	 * this down, try building with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y.
+	 */
 	WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != (count == 0));
 
 	local_irq_restore(flags);
-- 
cgit v1.2.1


From 84b12b752f41cd3d25d75692c2145d816e42926c Mon Sep 17 00:00:00 2001
From: Rakib Mullick <rakib.mullick@gmail.com>
Date: Fri, 17 Nov 2017 21:40:15 +0600
Subject: rcu: Remove have_rcu_nocb_mask from tree_plugin.h

Currently have_rcu_nocb_mask is used to avoid double allocation of
rcu_nocb_mask during boot up. Due to different representation of
cpumask_var_t on different kernel config CPUMASK=y(or n) it was okay.
But now we have a helper cpumask_available(), which can be utilized
to check whether rcu_nocb_mask has been allocated or not without using
a variable.

Removing the variable also reduces vmlinux size.

Unpatched version:
text	   data	    bss	    dec	    hex	filename
13050393	7852470	14543408	35446271	21cddff	vmlinux

Patched version:
 text	   data	    bss	    dec	    hex	filename
13050390	7852438	14543408	35446236	21cdddc	vmlinux

Signed-off-by: Rakib Mullick <rakib.mullick@gmail.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree_plugin.h | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index db85ca3975f1..13a8e08f1998 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -61,7 +61,6 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work);
 
 #ifdef CONFIG_RCU_NOCB_CPU
 static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
-static bool have_rcu_nocb_mask;	    /* Was rcu_nocb_mask allocated? */
 static bool __read_mostly rcu_nocb_poll;    /* Offload kthread are to poll. */
 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
 
@@ -1752,7 +1751,6 @@ static void increment_cpu_stall_ticks(void)
 static int __init rcu_nocb_setup(char *str)
 {
 	alloc_bootmem_cpumask_var(&rcu_nocb_mask);
-	have_rcu_nocb_mask = true;
 	cpulist_parse(str, rcu_nocb_mask);
 	return 1;
 }
@@ -1801,7 +1799,7 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
 /* Is the specified CPU a no-CBs CPU? */
 bool rcu_is_nocb_cpu(int cpu)
 {
-	if (have_rcu_nocb_mask)
+	if (cpumask_available(rcu_nocb_mask))
 		return cpumask_test_cpu(cpu, rcu_nocb_mask);
 	return false;
 }
@@ -2295,14 +2293,13 @@ void __init rcu_init_nohz(void)
 		need_rcu_nocb_mask = true;
 #endif /* #if defined(CONFIG_NO_HZ_FULL) */
 
-	if (!have_rcu_nocb_mask && need_rcu_nocb_mask) {
+	if (!cpumask_available(rcu_nocb_mask) && need_rcu_nocb_mask) {
 		if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
 			pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
 			return;
 		}
-		have_rcu_nocb_mask = true;
 	}
-	if (!have_rcu_nocb_mask)
+	if (!cpumask_available(rcu_nocb_mask))
 		return;
 
 #if defined(CONFIG_NO_HZ_FULL)
@@ -2428,7 +2425,7 @@ static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp)
 	struct rcu_data *rdp_leader = NULL;  /* Suppress misguided gcc warn. */
 	struct rcu_data *rdp_prev = NULL;
 
-	if (!have_rcu_nocb_mask)
+	if (!cpumask_available(rcu_nocb_mask))
 		return;
 	if (ls == -1) {
 		ls = int_sqrt(nr_cpu_ids);
-- 
cgit v1.2.1


From cc1321c96f855525fbd847fec130f000daa1bb1f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 16 Oct 2017 11:05:03 -0700
Subject: torture: Reduce #ifdefs for preempt_schedule()

This commit adds a torture_preempt_schedule() that is nothingness
in !PREEMPT builds and is preempt_schedule() otherwise.  Then
torture_preempt_schedule() is used to eliminate several ugly #ifdefs,
both in rcutorture and in locktorture.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/locking/locktorture.c | 24 ++++++------------------
 kernel/rcu/rcutorture.c      |  4 +---
 2 files changed, 7 insertions(+), 21 deletions(-)

(limited to 'kernel')

diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index f24582d4dad3..617cea2520b3 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -130,10 +130,8 @@ static void torture_lock_busted_write_delay(struct torture_random_state *trsp)
 	if (!(torture_random(trsp) %
 	      (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
 		mdelay(longdelay_ms);
-#ifdef CONFIG_PREEMPT
 	if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
-		preempt_schedule();  /* Allow test to be preempted. */
-#endif
+		torture_preempt_schedule();  /* Allow test to be preempted. */
 }
 
 static void torture_lock_busted_write_unlock(void)
@@ -179,10 +177,8 @@ static void torture_spin_lock_write_delay(struct torture_random_state *trsp)
 	if (!(torture_random(trsp) %
 	      (cxt.nrealwriters_stress * 2 * shortdelay_us)))
 		udelay(shortdelay_us);
-#ifdef CONFIG_PREEMPT
 	if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
-		preempt_schedule();  /* Allow test to be preempted. */
-#endif
+		torture_preempt_schedule();  /* Allow test to be preempted. */
 }
 
 static void torture_spin_lock_write_unlock(void) __releases(torture_spinlock)
@@ -352,10 +348,8 @@ static void torture_mutex_delay(struct torture_random_state *trsp)
 		mdelay(longdelay_ms * 5);
 	else
 		mdelay(longdelay_ms / 5);
-#ifdef CONFIG_PREEMPT
 	if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
-		preempt_schedule();  /* Allow test to be preempted. */
-#endif
+		torture_preempt_schedule();  /* Allow test to be preempted. */
 }
 
 static void torture_mutex_unlock(void) __releases(torture_mutex)
@@ -507,10 +501,8 @@ static void torture_rtmutex_delay(struct torture_random_state *trsp)
 	if (!(torture_random(trsp) %
 	      (cxt.nrealwriters_stress * 2 * shortdelay_us)))
 		udelay(shortdelay_us);
-#ifdef CONFIG_PREEMPT
 	if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
-		preempt_schedule();  /* Allow test to be preempted. */
-#endif
+		torture_preempt_schedule();  /* Allow test to be preempted. */
 }
 
 static void torture_rtmutex_unlock(void) __releases(torture_rtmutex)
@@ -547,10 +539,8 @@ static void torture_rwsem_write_delay(struct torture_random_state *trsp)
 		mdelay(longdelay_ms * 10);
 	else
 		mdelay(longdelay_ms / 10);
-#ifdef CONFIG_PREEMPT
 	if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
-		preempt_schedule();  /* Allow test to be preempted. */
-#endif
+		torture_preempt_schedule();  /* Allow test to be preempted. */
 }
 
 static void torture_rwsem_up_write(void) __releases(torture_rwsem)
@@ -574,10 +564,8 @@ static void torture_rwsem_read_delay(struct torture_random_state *trsp)
 		mdelay(longdelay_ms * 2);
 	else
 		mdelay(longdelay_ms / 2);
-#ifdef CONFIG_PREEMPT
 	if (!(torture_random(trsp) % (cxt.nrealreaders_stress * 20000)))
-		preempt_schedule();  /* Allow test to be preempted. */
-#endif
+		torture_preempt_schedule();  /* Allow test to be preempted. */
 }
 
 static void torture_rwsem_up_read(void) __releases(torture_rwsem)
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 74f6b0146b98..e7d3cce84214 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -315,11 +315,9 @@ static void rcu_read_delay(struct torture_random_state *rrsp)
 	}
 	if (!(torture_random(rrsp) % (nrealreaders * 2 * shortdelay_us)))
 		udelay(shortdelay_us);
-#ifdef CONFIG_PREEMPT
 	if (!preempt_count() &&
 	    !(torture_random(rrsp) % (nrealreaders * 20000)))
-		preempt_schedule();  /* No QS if preempt_disable() in effect */
-#endif
+		torture_preempt_schedule();  /* QS only if preemptible. */
 }
 
 static void rcu_torture_read_unlock(int idx) __releases(RCU)
-- 
cgit v1.2.1


From e8302739aa2204d52dacf9e9619cb6e755fa997a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 16 Oct 2017 11:23:42 -0700
Subject: rcutorture: Preempt RCU-preempt readers more vigorously

This commit attempts to make a very rare rcutorture failure happen
more often by increasing the fraction of RCU-preempt read-side critical
sections that are preempted.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/rcutorture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index e7d3cce84214..1074ecc3f72f 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -316,7 +316,7 @@ static void rcu_read_delay(struct torture_random_state *rrsp)
 	if (!(torture_random(rrsp) % (nrealreaders * 2 * shortdelay_us)))
 		udelay(shortdelay_us);
 	if (!preempt_count() &&
-	    !(torture_random(rrsp) % (nrealreaders * 20000)))
+	    !(torture_random(rrsp) % (nrealreaders * 500)))
 		torture_preempt_schedule();  /* QS only if preemptible. */
 }
 
-- 
cgit v1.2.1


From f2f762608f45353b0b8c37507824f95bb716c3d5 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Mon, 15 May 2017 02:07:22 -0700
Subject: locking/locktorture: Fix rwsem reader_delay

We should account for nreader threads, not writers in this
callback. Could even trigger a div by 0 if the user explicitly
disables writers.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/locking/locktorture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 617cea2520b3..a307a79e6b0b 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -560,7 +560,7 @@ static void torture_rwsem_read_delay(struct torture_random_state *trsp)
 
 	/* We want a long delay occasionally to force massive contention.  */
 	if (!(torture_random(trsp) %
-	      (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
+	      (cxt.nrealreaders_stress * 2000 * longdelay_ms)))
 		mdelay(longdelay_ms * 2);
 	else
 		mdelay(longdelay_ms / 2);
-- 
cgit v1.2.1


From 2ce77d16db4240dd2e422fc0a5c26d3e2ec03446 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Mon, 15 May 2017 02:07:23 -0700
Subject: locking/locktorture: Fix num reader/writer corner cases

Things can explode for locktorture if the user does combinations
of nwriters_stress=0 nreaders_stress=0. Fix this by not assuming
we always want to torture writer threads.

Reported-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Jeremy Linton <jeremy.linton@arm.com>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
---
 kernel/locking/locktorture.c | 76 +++++++++++++++++++++++++-------------------
 1 file changed, 44 insertions(+), 32 deletions(-)

(limited to 'kernel')

diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index a307a79e6b0b..2a1fc2a58910 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -703,8 +703,7 @@ static void __torture_print_stats(char *page,
 {
 	bool fail = 0;
 	int i, n_stress;
-	long max = 0;
-	long min = statp[0].n_lock_acquired;
+	long max = 0, min = statp ? statp[0].n_lock_acquired : 0;
 	long long sum = 0;
 
 	n_stress = write ? cxt.nrealwriters_stress : cxt.nrealreaders_stress;
@@ -811,7 +810,7 @@ static void lock_torture_cleanup(void)
 	 * such, only perform the underlying torture-specific cleanups,
 	 * and avoid anything related to locktorture.
 	 */
-	if (!cxt.lwsa)
+	if (!cxt.lwsa && !cxt.lrsa)
 		goto end;
 
 	if (writer_tasks) {
@@ -886,6 +885,13 @@ static int __init lock_torture_init(void)
 		firsterr = -EINVAL;
 		goto unwind;
 	}
+
+	if (nwriters_stress == 0 && nreaders_stress == 0) {
+		pr_alert("lock-torture: must run at least one locking thread\n");
+		firsterr = -EINVAL;
+		goto unwind;
+	}
+
 	if (cxt.cur_ops->init)
 		cxt.cur_ops->init();
 
@@ -909,17 +915,19 @@ static int __init lock_torture_init(void)
 #endif
 
 	/* Initialize the statistics so that each run gets its own numbers. */
+	if (nwriters_stress) {
+		lock_is_write_held = 0;
+		cxt.lwsa = kmalloc(sizeof(*cxt.lwsa) * cxt.nrealwriters_stress, GFP_KERNEL);
+		if (cxt.lwsa == NULL) {
+			VERBOSE_TOROUT_STRING("cxt.lwsa: Out of memory");
+			firsterr = -ENOMEM;
+			goto unwind;
+		}
 
-	lock_is_write_held = 0;
-	cxt.lwsa = kmalloc(sizeof(*cxt.lwsa) * cxt.nrealwriters_stress, GFP_KERNEL);
-	if (cxt.lwsa == NULL) {
-		VERBOSE_TOROUT_STRING("cxt.lwsa: Out of memory");
-		firsterr = -ENOMEM;
-		goto unwind;
-	}
-	for (i = 0; i < cxt.nrealwriters_stress; i++) {
-		cxt.lwsa[i].n_lock_fail = 0;
-		cxt.lwsa[i].n_lock_acquired = 0;
+		for (i = 0; i < cxt.nrealwriters_stress; i++) {
+			cxt.lwsa[i].n_lock_fail = 0;
+			cxt.lwsa[i].n_lock_acquired = 0;
+		}
 	}
 
 	if (cxt.cur_ops->readlock) {
@@ -936,19 +944,21 @@ static int __init lock_torture_init(void)
 			cxt.nrealreaders_stress = cxt.nrealwriters_stress;
 		}
 
-		lock_is_read_held = 0;
-		cxt.lrsa = kmalloc(sizeof(*cxt.lrsa) * cxt.nrealreaders_stress, GFP_KERNEL);
-		if (cxt.lrsa == NULL) {
-			VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory");
-			firsterr = -ENOMEM;
-			kfree(cxt.lwsa);
-			cxt.lwsa = NULL;
-			goto unwind;
-		}
-
-		for (i = 0; i < cxt.nrealreaders_stress; i++) {
-			cxt.lrsa[i].n_lock_fail = 0;
-			cxt.lrsa[i].n_lock_acquired = 0;
+		if (nreaders_stress) {
+			lock_is_read_held = 0;
+			cxt.lrsa = kmalloc(sizeof(*cxt.lrsa) * cxt.nrealreaders_stress, GFP_KERNEL);
+			if (cxt.lrsa == NULL) {
+				VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory");
+				firsterr = -ENOMEM;
+				kfree(cxt.lwsa);
+				cxt.lwsa = NULL;
+				goto unwind;
+			}
+
+			for (i = 0; i < cxt.nrealreaders_stress; i++) {
+				cxt.lrsa[i].n_lock_fail = 0;
+				cxt.lrsa[i].n_lock_acquired = 0;
+			}
 		}
 	}
 
@@ -978,12 +988,14 @@ static int __init lock_torture_init(void)
 			goto unwind;
 	}
 
-	writer_tasks = kzalloc(cxt.nrealwriters_stress * sizeof(writer_tasks[0]),
-			       GFP_KERNEL);
-	if (writer_tasks == NULL) {
-		VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory");
-		firsterr = -ENOMEM;
-		goto unwind;
+	if (nwriters_stress) {
+		writer_tasks = kzalloc(cxt.nrealwriters_stress * sizeof(writer_tasks[0]),
+				       GFP_KERNEL);
+		if (writer_tasks == NULL) {
+			VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory");
+			firsterr = -ENOMEM;
+			goto unwind;
+		}
 	}
 
 	if (cxt.cur_ops->readlock) {
-- 
cgit v1.2.1


From 4ced3314fd3a73dabac4e8a41747883eff36c3e8 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 21 Nov 2017 15:01:02 -0800
Subject: torture: Make stutter less vulnerable to compilers and races

The stutter_wait() function repeatedly fetched stutter_pause_test, and
should really just fetch it once on each pass.  The races should be
harmless, but why have the races?  Also, the whole point of the value
"2" for stutter_pause_test is to get everyone to start at very nearly
the same time, but the value "2" was the first jiffy of the stutter
rather than the last jiffy of the stutter.

This commit rearranges the code to be more sensible.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/torture.c | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

(limited to 'kernel')

diff --git a/kernel/torture.c b/kernel/torture.c
index 52781e838541..3bcbd4fbfe18 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -573,18 +573,21 @@ static int stutter;
  */
 void stutter_wait(const char *title)
 {
+	int spt;
+
 	cond_resched_rcu_qs();
-	while (READ_ONCE(stutter_pause_test) ||
-	       (torture_runnable && !READ_ONCE(*torture_runnable))) {
-		if (stutter_pause_test)
-			if (READ_ONCE(stutter_pause_test) == 1)
-				schedule_timeout_interruptible(1);
-			else
-				while (READ_ONCE(stutter_pause_test))
-					cond_resched();
-		else
+	spt = READ_ONCE(stutter_pause_test);
+	while (spt || (torture_runnable && !READ_ONCE(*torture_runnable))) {
+		if (spt == 1) {
+			schedule_timeout_interruptible(1);
+		} else if (spt == 2) {
+			while (READ_ONCE(stutter_pause_test))
+				cond_resched();
+		} else {
 			schedule_timeout_interruptible(round_jiffies_relative(HZ));
+		}
 		torture_shutdown_absorb(title);
+		spt = READ_ONCE(stutter_pause_test);
 	}
 }
 EXPORT_SYMBOL_GPL(stutter_wait);
@@ -597,17 +600,15 @@ static int torture_stutter(void *arg)
 {
 	VERBOSE_TOROUT_STRING("torture_stutter task started");
 	do {
-		if (!torture_must_stop()) {
-			if (stutter > 1) {
-				schedule_timeout_interruptible(stutter - 1);
-				WRITE_ONCE(stutter_pause_test, 2);
-			}
-			schedule_timeout_interruptible(1);
+		if (!torture_must_stop() && stutter > 1) {
 			WRITE_ONCE(stutter_pause_test, 1);
+			schedule_timeout_interruptible(stutter - 1);
+			WRITE_ONCE(stutter_pause_test, 2);
+			schedule_timeout_interruptible(1);
 		}
+		WRITE_ONCE(stutter_pause_test, 0);
 		if (!torture_must_stop())
 			schedule_timeout_interruptible(stutter);
-		WRITE_ONCE(stutter_pause_test, 0);
 		torture_shutdown_absorb("torture_stutter");
 	} while (!torture_must_stop());
 	torture_kthread_stopping("torture_stutter");
-- 
cgit v1.2.1


From a2f2577d96ad060b65eb909dd39b57d676754119 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 21 Nov 2017 20:19:17 -0800
Subject: torture: Eliminate torture_runnable and perf_runnable

The purpose of torture_runnable is to allow rcutorture and locktorture
to be started and stopped via sysfs when they are built into the kernel
(as in not compiled as loadable modules).  However, the 0444 permissions
for both instances of torture_runnable prevent this use case from ever
being put into practice.  Given that there have been no complaints
about this deficiency, it is reasonable to conclude that no one actually
makes use of this sysfs capability.  The perf_runnable module parameter
for rcuperf is in the same situation.

This commit therefore removes both torture_runnable instances as well
as perf_runnable.

Reported-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/locking/locktorture.c | 6 +-----
 kernel/rcu/rcuperf.c         | 6 +-----
 kernel/rcu/rcutorture.c      | 6 +-----
 kernel/torture.c             | 6 ++----
 4 files changed, 5 insertions(+), 19 deletions(-)

(limited to 'kernel')

diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 2a1fc2a58910..6850ffd69125 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -77,10 +77,6 @@ struct lock_stress_stats {
 	long n_lock_acquired;
 };
 
-int torture_runnable = IS_ENABLED(MODULE);
-module_param(torture_runnable, int, 0444);
-MODULE_PARM_DESC(torture_runnable, "Start locktorture at module init");
-
 /* Forward reference. */
 static void lock_torture_cleanup(void);
 
@@ -866,7 +862,7 @@ static int __init lock_torture_init(void)
 		&percpu_rwsem_lock_ops,
 	};
 
-	if (!torture_init_begin(torture_type, verbose, &torture_runnable))
+	if (!torture_init_begin(torture_type, verbose))
 		return -EBUSY;
 
 	/* Process args and tell the world that the torturer is on the job. */
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index 1f87a02c3399..d1ebdf9868bb 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -106,10 +106,6 @@ static int rcu_perf_writer_state;
 #define MAX_MEAS 10000
 #define MIN_MEAS 100
 
-static int perf_runnable = IS_ENABLED(MODULE);
-module_param(perf_runnable, int, 0444);
-MODULE_PARM_DESC(perf_runnable, "Start rcuperf at boot");
-
 /*
  * Operations vector for selecting different types of tests.
  */
@@ -646,7 +642,7 @@ rcu_perf_init(void)
 		&tasks_ops,
 	};
 
-	if (!torture_init_begin(perf_type, verbose, &perf_runnable))
+	if (!torture_init_begin(perf_type, verbose))
 		return -EBUSY;
 
 	/* Process args and tell the world that the perf'er is on the job. */
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 1074ecc3f72f..308e6fdbced8 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -187,10 +187,6 @@ static const char *rcu_torture_writer_state_getname(void)
 	return rcu_torture_writer_state_names[i];
 }
 
-static int torture_runnable = IS_ENABLED(MODULE);
-module_param(torture_runnable, int, 0444);
-MODULE_PARM_DESC(torture_runnable, "Start rcutorture at boot");
-
 #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU)
 #define rcu_can_boost() 1
 #else /* #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
@@ -1729,7 +1725,7 @@ rcu_torture_init(void)
 		&sched_ops, &tasks_ops,
 	};
 
-	if (!torture_init_begin(torture_type, verbose, &torture_runnable))
+	if (!torture_init_begin(torture_type, verbose))
 		return -EBUSY;
 
 	/* Process args and tell the world that the torturer is on the job. */
diff --git a/kernel/torture.c b/kernel/torture.c
index 3bcbd4fbfe18..572576ad9f58 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -61,7 +61,6 @@ static bool verbose;
 #define FULLSTOP_RMMOD    2	/* Normal rmmod of torture. */
 static int fullstop = FULLSTOP_RMMOD;
 static DEFINE_MUTEX(fullstop_mutex);
-static int *torture_runnable;
 
 #ifdef CONFIG_HOTPLUG_CPU
 
@@ -577,7 +576,7 @@ void stutter_wait(const char *title)
 
 	cond_resched_rcu_qs();
 	spt = READ_ONCE(stutter_pause_test);
-	while (spt || (torture_runnable && !READ_ONCE(*torture_runnable))) {
+	while (spt) {
 		if (spt == 1) {
 			schedule_timeout_interruptible(1);
 		} else if (spt == 2) {
@@ -649,7 +648,7 @@ static void torture_stutter_cleanup(void)
  * The runnable parameter points to a flag that controls whether or not
  * the test is currently runnable.  If there is no such flag, pass in NULL.
  */
-bool torture_init_begin(char *ttype, bool v, int *runnable)
+bool torture_init_begin(char *ttype, bool v)
 {
 	mutex_lock(&fullstop_mutex);
 	if (torture_type != NULL) {
@@ -661,7 +660,6 @@ bool torture_init_begin(char *ttype, bool v, int *runnable)
 	}
 	torture_type = ttype;
 	verbose = v;
-	torture_runnable = runnable;
 	fullstop = FULLSTOP_DONTSTOP;
 	return true;
 }
-- 
cgit v1.2.1


From 29d3939084583b26a5487be64b9523e61468f1be Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 21 Nov 2017 22:07:59 -0800
Subject: torture: Save a line in stutter_wait(): while -> for

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/torture.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/torture.c b/kernel/torture.c
index 572576ad9f58..37b94012a3f8 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -576,7 +576,7 @@ void stutter_wait(const char *title)
 
 	cond_resched_rcu_qs();
 	spt = READ_ONCE(stutter_pause_test);
-	while (spt) {
+	for (; spt; spt = READ_ONCE(stutter_pause_test)) {
 		if (spt == 1) {
 			schedule_timeout_interruptible(1);
 		} else if (spt == 2) {
@@ -586,7 +586,6 @@ void stutter_wait(const char *title)
 			schedule_timeout_interruptible(round_jiffies_relative(HZ));
 		}
 		torture_shutdown_absorb(title);
-		spt = READ_ONCE(stutter_pause_test);
 	}
 }
 EXPORT_SYMBOL_GPL(stutter_wait);
-- 
cgit v1.2.1