diff options
Diffstat (limited to 'include/linux')
77 files changed, 1998 insertions, 284 deletions
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 535ab2e13d2e..cfc246899473 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -209,7 +209,7 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; } - +void __blk_mq_complete_request_remote_work(struct work_struct *work); int blk_mq_request_started(struct request *rq); void blk_mq_start_request(struct request *rq); void blk_mq_end_request(struct request *rq, int error); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2fc4ba6fa07f..dd8e0bdbb40f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -24,6 +24,7 @@ #include <linux/rcupdate.h> #include <linux/percpu-refcount.h> #include <linux/scatterlist.h> +#include <linux/swork.h> struct module; struct scsi_ioctl_command; @@ -89,6 +90,7 @@ struct request { struct list_head queuelist; union { struct call_single_data csd; + struct work_struct work; u64 fifo_time; }; @@ -473,6 +475,7 @@ struct request_queue { #endif struct rcu_head rcu_head; wait_queue_head_t mq_freeze_wq; + struct swork_event mq_pcpu_wake; struct percpu_ref q_usage_counter; struct list_head all_q_node; diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h index 8fdcb783197d..d07dbeec7bc1 100644 --- a/include/linux/bottom_half.h +++ b/include/linux/bottom_half.h @@ -3,6 +3,39 @@ #include <linux/preempt.h> +#ifdef CONFIG_PREEMPT_RT_FULL + +extern void __local_bh_disable(void); +extern void _local_bh_enable(void); +extern void __local_bh_enable(void); + +static inline void local_bh_disable(void) +{ + __local_bh_disable(); +} + +static inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) +{ + __local_bh_disable(); +} + +static inline void local_bh_enable(void) +{ + __local_bh_enable(); +} + +static inline void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) +{ + __local_bh_enable(); +} + +static inline void local_bh_enable_ip(unsigned long ip) +{ + __local_bh_enable(); +} + +#else + #ifdef CONFIG_TRACE_IRQFLAGS extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); #else @@ -30,5 +63,6 @@ static inline void local_bh_enable(void) { __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); } +#endif #endif /* _LINUX_BH_H */ diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 4431ea2c8802..0744157a97ca 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -75,8 +75,50 @@ struct buffer_head { struct address_space *b_assoc_map; /* mapping this buffer is associated with */ atomic_t b_count; /* users using this buffer_head */ +#ifdef CONFIG_PREEMPT_RT_BASE + spinlock_t b_uptodate_lock; +#if IS_ENABLED(CONFIG_JBD2) + spinlock_t b_state_lock; + spinlock_t b_journal_head_lock; +#endif +#endif }; +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh) +{ + unsigned long flags; + +#ifndef CONFIG_PREEMPT_RT_BASE + local_irq_save(flags); + bit_spin_lock(BH_Uptodate_Lock, &bh->b_state); +#else + spin_lock_irqsave(&bh->b_uptodate_lock, flags); +#endif + return flags; +} + +static inline void +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags) +{ +#ifndef CONFIG_PREEMPT_RT_BASE + bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state); + local_irq_restore(flags); +#else + spin_unlock_irqrestore(&bh->b_uptodate_lock, flags); +#endif +} + +static inline void buffer_head_init_locks(struct buffer_head *bh) +{ +#ifdef CONFIG_PREEMPT_RT_BASE + spin_lock_init(&bh->b_uptodate_lock); +#if IS_ENABLED(CONFIG_JBD2) + spin_lock_init(&bh->b_state_lock); + spin_lock_init(&bh->b_journal_head_lock); +#endif +#endif +} + /* * macro tricks to expand the set_buffer_foo(), clear_buffer_foo() * and buffer_foo() functions. diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1619a3213af5..3281593f6299 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -16,6 +16,7 @@ #include <linux/percpu-refcount.h> #include <linux/percpu-rwsem.h> #include <linux/workqueue.h> +#include <linux/swork.h> #ifdef CONFIG_CGROUPS @@ -138,6 +139,7 @@ struct cgroup_subsys_state { /* percpu_ref killing and RCU release */ struct rcu_head rcu_head; struct work_struct destroy_work; + struct swork_event destroy_swork; }; /* diff --git a/include/linux/completion.h b/include/linux/completion.h index 5d5aaae3af43..3bca1590e29f 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -7,8 +7,7 @@ * Atomic wait-for-completion handler data structures. * See kernel/sched/completion.c for details. */ - -#include <linux/wait.h> +#include <linux/swait.h> /* * struct completion - structure used to maintain state for a "completion" @@ -24,11 +23,11 @@ */ struct completion { unsigned int done; - wait_queue_head_t wait; + struct swait_queue_head wait; }; #define COMPLETION_INITIALIZER(work) \ - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } + { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) } #define COMPLETION_INITIALIZER_ONSTACK(work) \ ({ init_completion(&work); work; }) @@ -73,7 +72,7 @@ struct completion { static inline void init_completion(struct completion *x) { x->done = 0; - init_waitqueue_head(&x->wait); + init_swait_queue_head(&x->wait); } /** diff --git a/include/linux/cpu.h b/include/linux/cpu.h index e19bbc38a722..dd888fe6ec9b 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -200,6 +200,8 @@ extern void get_online_cpus(void); extern void put_online_cpus(void); extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); +extern void pin_current_cpu(void); +extern void unpin_current_cpu(void); #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri) #define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri) #define register_hotcpu_notifier(nb) register_cpu_notifier(nb) @@ -217,6 +219,8 @@ static inline void cpu_hotplug_done(void) {} #define put_online_cpus() do { } while (0) #define cpu_hotplug_disable() do { } while (0) #define cpu_hotplug_enable() do { } while (0) +static inline void pin_current_cpu(void) { } +static inline void unpin_current_cpu(void) { } #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) #define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) /* These aren't inline functions due to a GCC bug. */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index b757ee42bc63..239db4827cfb 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -11,6 +11,7 @@ #include <linux/rcupdate.h> #include <linux/lockref.h> #include <linux/stringhash.h> +#include <linux/wait.h> struct path; struct vfsmount; @@ -100,7 +101,7 @@ struct dentry { union { struct list_head d_lru; /* LRU list */ - wait_queue_head_t *d_wait; /* in-lookup ones only */ + struct swait_queue_head *d_wait; /* in-lookup ones only */ }; struct list_head d_child; /* child of parent list */ struct list_head d_subdirs; /* our children */ @@ -231,7 +232,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, - wait_queue_head_t *); + struct swait_queue_head *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern struct dentry * d_exact_alias(struct dentry *, struct inode *); diff --git a/include/linux/delay.h b/include/linux/delay.h index a6ecb34cf547..37caab306336 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -52,4 +52,10 @@ static inline void ssleep(unsigned int seconds) msleep(seconds * 1000); } +#ifdef CONFIG_PREEMPT_RT_FULL +extern void cpu_chill(void); +#else +# define cpu_chill() cpu_relax() +#endif + #endif /* defined(_LINUX_DELAY_H) */ diff --git a/include/linux/fs.h b/include/linux/fs.h index c2c04f891785..26dcf677fe8f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -695,7 +695,7 @@ struct inode { struct block_device *i_bdev; struct cdev *i_cdev; char *i_link; - unsigned i_dir_seq; + unsigned __i_dir_seq; }; __u32 i_generation; diff --git a/include/linux/highmem.h b/include/linux/highmem.h index bb3f3297062a..a117a33ef72c 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -7,6 +7,7 @@ #include <linux/mm.h> #include <linux/uaccess.h> #include <linux/hardirq.h> +#include <linux/sched.h> #include <asm/cacheflush.h> @@ -65,7 +66,7 @@ static inline void kunmap(struct page *page) static inline void *kmap_atomic(struct page *page) { - preempt_disable(); + preempt_disable_nort(); pagefault_disable(); return page_address(page); } @@ -74,7 +75,7 @@ static inline void *kmap_atomic(struct page *page) static inline void __kunmap_atomic(void *addr) { pagefault_enable(); - preempt_enable(); + preempt_enable_nort(); } #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) @@ -86,32 +87,51 @@ static inline void __kunmap_atomic(void *addr) #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) +#ifndef CONFIG_PREEMPT_RT_FULL DECLARE_PER_CPU(int, __kmap_atomic_idx); +#endif static inline int kmap_atomic_idx_push(void) { +#ifndef CONFIG_PREEMPT_RT_FULL int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1; -#ifdef CONFIG_DEBUG_HIGHMEM +# ifdef CONFIG_DEBUG_HIGHMEM WARN_ON_ONCE(in_irq() && !irqs_disabled()); BUG_ON(idx >= KM_TYPE_NR); -#endif +# endif return idx; +#else + current->kmap_idx++; + BUG_ON(current->kmap_idx > KM_TYPE_NR); + return current->kmap_idx - 1; +#endif } static inline int kmap_atomic_idx(void) { +#ifndef CONFIG_PREEMPT_RT_FULL return __this_cpu_read(__kmap_atomic_idx) - 1; +#else + return current->kmap_idx - 1; +#endif } static inline void kmap_atomic_idx_pop(void) { -#ifdef CONFIG_DEBUG_HIGHMEM +#ifndef CONFIG_PREEMPT_RT_FULL +# ifdef CONFIG_DEBUG_HIGHMEM int idx = __this_cpu_dec_return(__kmap_atomic_idx); BUG_ON(idx < 0); -#else +# else __this_cpu_dec(__kmap_atomic_idx); +# endif +#else + current->kmap_idx--; +# ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(current->kmap_idx < 0); +# endif #endif } diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 5ae70d2f9a2f..cf423f1570c5 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -87,6 +87,9 @@ enum hrtimer_restart { * @function: timer expiry callback function * @base: pointer to the timer base (per cpu and per clock) * @state: state information (See bit values above) + * @cb_entry: list entry to defer timers from hardirq context + * @irqsafe: timer can run in hardirq context + * @praecox: timer expiry time if expired at the time of programming * @is_rel: Set if the timer was armed relative * @start_pid: timer statistics field to store the pid of the task which * started the timer @@ -103,6 +106,11 @@ struct hrtimer { enum hrtimer_restart (*function)(struct hrtimer *); struct hrtimer_clock_base *base; u8 state; + struct list_head cb_entry; + int irqsafe; +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST + ktime_t praecox; +#endif u8 is_rel; #ifdef CONFIG_TIMER_STATS int start_pid; @@ -123,11 +131,7 @@ struct hrtimer_sleeper { struct task_struct *task; }; -#ifdef CONFIG_64BIT # define HRTIMER_CLOCK_BASE_ALIGN 64 -#else -# define HRTIMER_CLOCK_BASE_ALIGN 32 -#endif /** * struct hrtimer_clock_base - the timer base for a specific clock @@ -136,6 +140,7 @@ struct hrtimer_sleeper { * timer to a base on another cpu. * @clockid: clock id for per_cpu support * @active: red black tree root node for the active timers + * @expired: list head for deferred timers. * @get_time: function to retrieve the current time of the clock * @offset: offset of this clock to the monotonic base */ @@ -144,6 +149,7 @@ struct hrtimer_clock_base { int index; clockid_t clockid; struct timerqueue_head active; + struct list_head expired; ktime_t (*get_time)(void); ktime_t offset; } __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN))); @@ -187,6 +193,7 @@ struct hrtimer_cpu_base { raw_spinlock_t lock; seqcount_t seq; struct hrtimer *running; + struct hrtimer *running_soft; unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; @@ -203,6 +210,9 @@ struct hrtimer_cpu_base { unsigned int nr_hangs; unsigned int max_hang_time; #endif +#ifdef CONFIG_PREEMPT_RT_BASE + wait_queue_head_t wait; +#endif struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; } ____cacheline_aligned; @@ -412,6 +422,13 @@ static inline void hrtimer_restart(struct hrtimer *timer) hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } +/* Softirq preemption could deadlock timer removal */ +#ifdef CONFIG_PREEMPT_RT_BASE + extern void hrtimer_wait_for_timer(const struct hrtimer *timer); +#else +# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0) +#endif + /* Query timers: */ extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust); @@ -442,9 +459,15 @@ static inline bool hrtimer_is_queued(struct hrtimer *timer) * Helper function to check, whether the timer is running the callback * function */ -static inline int hrtimer_callback_running(struct hrtimer *timer) +static inline int hrtimer_callback_running(const struct hrtimer *timer) { - return timer->base->cpu_base->running == timer; + if (timer->base->cpu_base->running == timer) + return 1; +#ifdef CONFIG_PREEMPT_RT_BASE + if (timer->base->cpu_base->running_soft == timer) + return 1; +#endif + return 0; } /* Forward a hrtimer so it expires after now: */ diff --git a/include/linux/idr.h b/include/linux/idr.h index 083d61e92706..5899796f50cb 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -95,10 +95,14 @@ bool idr_is_empty(struct idr *idp); * Each idr_preload() should be matched with an invocation of this * function. See idr_preload() for details. */ +#ifdef CONFIG_PREEMPT_RT_FULL +void idr_preload_end(void); +#else static inline void idr_preload_end(void) { preempt_enable(); } +#endif /** * idr_find - return pointer for given id diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 325f649d77ff..a56e263f5005 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -150,6 +150,12 @@ extern struct task_group root_task_group; # define INIT_PERF_EVENTS(tsk) #endif +#ifdef CONFIG_PREEMPT_RT_BASE +# define INIT_TIMER_LIST .posix_timer_list = NULL, +#else +# define INIT_TIMER_LIST +#endif + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN # define INIT_VTIME(tsk) \ .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \ @@ -164,6 +170,7 @@ extern struct task_group root_task_group; #ifdef CONFIG_RT_MUTEXES # define INIT_RT_MUTEXES(tsk) \ .pi_waiters = RB_ROOT, \ + .pi_top_task = NULL, \ .pi_waiters_leftmost = NULL, #else # define INIT_RT_MUTEXES(tsk) @@ -250,6 +257,7 @@ extern struct task_group root_task_group; .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ .timer_slack_ns = 50000, /* 50 usec default slack */ \ + INIT_TIMER_LIST \ .pids = { \ [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 72f0721f75e7..480972ae47d3 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -14,6 +14,7 @@ #include <linux/hrtimer.h> #include <linux/kref.h> #include <linux/workqueue.h> +#include <linux/swork.h> #include <linux/atomic.h> #include <asm/ptrace.h> @@ -61,6 +62,7 @@ * interrupt handler after suspending interrupts. For system * wakeup devices users need to implement wakeup detection in * their interrupt handlers. + * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT) */ #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 @@ -74,6 +76,7 @@ #define IRQF_NO_THREAD 0x00010000 #define IRQF_EARLY_RESUME 0x00020000 #define IRQF_COND_SUSPEND 0x00040000 +#define IRQF_NO_SOFTIRQ_CALL 0x00080000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) @@ -196,7 +199,7 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); #ifdef CONFIG_LOCKDEP # define local_irq_enable_in_hardirq() do { } while (0) #else -# define local_irq_enable_in_hardirq() local_irq_enable() +# define local_irq_enable_in_hardirq() local_irq_enable_nort() #endif extern void disable_irq_nosync(unsigned int irq); @@ -216,6 +219,7 @@ extern void resume_device_irqs(void); * struct irq_affinity_notify - context for notification of IRQ affinity changes * @irq: Interrupt to which notification applies * @kref: Reference count, for internal use + * @swork: Swork item, for internal use * @work: Work item, for internal use * @notify: Function to be called on change. This will be * called in process context. @@ -227,7 +231,11 @@ extern void resume_device_irqs(void); struct irq_affinity_notify { unsigned int irq; struct kref kref; +#ifdef CONFIG_PREEMPT_RT_BASE + struct swork_event swork; +#else struct work_struct work; +#endif void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask); void (*release)(struct kref *ref); }; @@ -406,9 +414,13 @@ extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, bool state); #ifdef CONFIG_IRQ_FORCED_THREADING +# ifndef CONFIG_PREEMPT_RT_BASE extern bool force_irqthreads; +# else +# define force_irqthreads (true) +# endif #else -#define force_irqthreads (0) +#define force_irqthreads (false) #endif #ifndef __ARCH_SET_SOFTIRQ_PENDING @@ -465,9 +477,10 @@ struct softirq_action void (*action)(struct softirq_action *); }; +#ifndef CONFIG_PREEMPT_RT_FULL asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); - +static inline void thread_do_softirq(void) { do_softirq(); } #ifdef __ARCH_HAS_DO_SOFTIRQ void do_softirq_own_stack(void); #else @@ -476,13 +489,25 @@ static inline void do_softirq_own_stack(void) __do_softirq(); } #endif +#else +extern void thread_do_softirq(void); +#endif extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); extern void __raise_softirq_irqoff(unsigned int nr); +#ifdef CONFIG_PREEMPT_RT_FULL +extern void __raise_softirq_irqoff_ksoft(unsigned int nr); +#else +static inline void __raise_softirq_irqoff_ksoft(unsigned int nr) +{ + __raise_softirq_irqoff(nr); +} +#endif extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); +extern void softirq_check_pending_idle(void); DECLARE_PER_CPU(struct task_struct *, ksoftirqd); @@ -504,8 +529,9 @@ static inline struct task_struct *this_cpu_ksoftirqd(void) to be executed on some cpu at least once after this. * If the tasklet is already scheduled, but its execution is still not started, it will be executed only once. - * If this tasklet is already running on another CPU (or schedule is called - from tasklet itself), it is rescheduled for later. + * If this tasklet is already running on another CPU, it is rescheduled + for later. + * Schedule must not be called from the tasklet itself (a lockup occurs) * Tasklet is strictly serialized wrt itself, but not wrt another tasklets. If client needs some intertask synchronization, he makes it with spinlocks. @@ -530,27 +556,36 @@ struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data } enum { TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */ - TASKLET_STATE_RUN /* Tasklet is running (SMP only) */ + TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */ + TASKLET_STATE_PENDING /* Tasklet is pending */ }; -#ifdef CONFIG_SMP +#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED) +#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN) +#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING) + +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) static inline int tasklet_trylock(struct tasklet_struct *t) { return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); } +static inline int tasklet_tryunlock(struct tasklet_struct *t) +{ + return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN; +} + static inline void tasklet_unlock(struct tasklet_struct *t) { smp_mb__before_atomic(); clear_bit(TASKLET_STATE_RUN, &(t)->state); } -static inline void tasklet_unlock_wait(struct tasklet_struct *t) -{ - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } -} +extern void tasklet_unlock_wait(struct tasklet_struct *t); + #else #define tasklet_trylock(t) 1 +#define tasklet_tryunlock(t) 1 #define tasklet_unlock_wait(t) do { } while (0) #define tasklet_unlock(t) do { } while (0) #endif @@ -599,12 +634,7 @@ static inline void tasklet_disable(struct tasklet_struct *t) smp_mb(); } -static inline void tasklet_enable(struct tasklet_struct *t) -{ - smp_mb__before_atomic(); - atomic_dec(&t->count); -} - +extern void tasklet_enable(struct tasklet_struct *t); extern void tasklet_kill(struct tasklet_struct *t); extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); extern void tasklet_init(struct tasklet_struct *t, @@ -635,6 +665,12 @@ void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer) tasklet_kill(&ttimer->tasklet); } +#ifdef CONFIG_PREEMPT_RT_FULL +extern void softirq_early_init(void); +#else +static inline void softirq_early_init(void) { } +#endif + /* * Autoprobing for irqs: * diff --git a/include/linux/irq.h b/include/linux/irq.h index 39e3254e5769..8ebac94fbb9f 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -72,6 +72,7 @@ enum irqchip_irq_state; * IRQ_IS_POLLED - Always polled by another interrupt. Exclude * it from the spurious interrupt detection * mechanism and from core side polling. + * IRQ_NO_SOFTIRQ_CALL - No softirq processing in the irq thread context (RT) * IRQ_DISABLE_UNLAZY - Disable lazy irq disable */ enum { @@ -99,13 +100,14 @@ enum { IRQ_PER_CPU_DEVID = (1 << 17), IRQ_IS_POLLED = (1 << 18), IRQ_DISABLE_UNLAZY = (1 << 19), + IRQ_NO_SOFTIRQ_CALL = (1 << 20), }; #define IRQF_MODIFY_MASK \ (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \ IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \ - IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY) + IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_NO_SOFTIRQ_CALL) #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 47b9ebd4a74f..2543aab05daa 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -16,6 +16,7 @@ #define IRQ_WORK_BUSY 2UL #define IRQ_WORK_FLAGS 3UL #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */ +#define IRQ_WORK_HARD_IRQ 8UL /* Run hard IRQ context, even on RT */ struct irq_work { unsigned long flags; @@ -51,4 +52,10 @@ static inline bool irq_work_needs_cpu(void) { return false; } static inline void irq_work_run(void) { } #endif +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) +void irq_work_tick_soft(void); +#else +static inline void irq_work_tick_soft(void) { } +#endif + #endif /* _LINUX_IRQ_WORK_H */ diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index bb5547a83daf..529da7ef55e3 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -67,6 +67,7 @@ struct irq_desc { unsigned int irqs_unhandled; atomic_t threads_handled; int threads_handled_last; + u64 random_ip; raw_spinlock_t lock; struct cpumask *percpu_enabled; const struct cpumask *percpu_affinity; diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 5dd1272d1ab2..9b77034f7c5e 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -25,8 +25,6 @@ # define trace_softirqs_enabled(p) ((p)->softirqs_enabled) # define trace_hardirq_enter() do { current->hardirq_context++; } while (0) # define trace_hardirq_exit() do { current->hardirq_context--; } while (0) -# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) -# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, #else # define trace_hardirqs_on() do { } while (0) @@ -39,9 +37,15 @@ # define trace_softirqs_enabled(p) 0 # define trace_hardirq_enter() do { } while (0) # define trace_hardirq_exit() do { } while (0) +# define INIT_TRACE_IRQFLAGS +#endif + +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL) +# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) +# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) +#else # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) -# define INIT_TRACE_IRQFLAGS #endif #if defined(CONFIG_IRQSOFF_TRACER) || \ @@ -148,4 +152,23 @@ #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) +/* + * local_irq* variants depending on RT/!RT + */ +#ifdef CONFIG_PREEMPT_RT_FULL +# define local_irq_disable_nort() do { } while (0) +# define local_irq_enable_nort() do { } while (0) +# define local_irq_save_nort(flags) local_save_flags(flags) +# define local_irq_restore_nort(flags) (void)(flags) +# define local_irq_disable_rt() local_irq_disable() +# define local_irq_enable_rt() local_irq_enable() +#else +# define local_irq_disable_nort() local_irq_disable() +# define local_irq_enable_nort() local_irq_enable() +# define local_irq_save_nort(flags) local_irq_save(flags) +# define local_irq_restore_nort(flags) local_irq_restore(flags) +# define local_irq_disable_rt() do { } while (0) +# define local_irq_enable_rt() do { } while (0) +#endif + #endif diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 344eb873f6f5..757524c8103e 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -347,32 +347,56 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh) static inline void jbd_lock_bh_state(struct buffer_head *bh) { +#ifndef CONFIG_PREEMPT_RT_BASE bit_spin_lock(BH_State, &bh->b_state); +#else + spin_lock(&bh->b_state_lock); +#endif } static inline int jbd_trylock_bh_state(struct buffer_head *bh) { +#ifndef CONFIG_PREEMPT_RT_BASE return bit_spin_trylock(BH_State, &bh->b_state); +#else + return spin_trylock(&bh->b_state_lock); +#endif } static inline int jbd_is_locked_bh_state(struct buffer_head *bh) { +#ifndef CONFIG_PREEMPT_RT_BASE return bit_spin_is_locked(BH_State, &bh->b_state); +#else + return spin_is_locked(&bh->b_state_lock); +#endif } static inline void jbd_unlock_bh_state(struct buffer_head *bh) { +#ifndef CONFIG_PREEMPT_RT_BASE bit_spin_unlock(BH_State, &bh->b_state); +#else + spin_unlock(&bh->b_state_lock); +#endif } static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) { +#ifndef CONFIG_PREEMPT_RT_BASE bit_spin_lock(BH_JournalHead, &bh->b_state); +#else + spin_lock(&bh->b_journal_head_lock); +#endif } static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) { +#ifndef CONFIG_PREEMPT_RT_BASE bit_spin_unlock(BH_JournalHead, &bh->b_state); +#else + spin_unlock(&bh->b_journal_head_lock); +#endif } #define J_ASSERT(assert) BUG_ON(!(assert)) diff --git a/include/linux/kdb.h b/include/linux/kdb.h index 410decacff8f..0861bebfc188 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -167,6 +167,7 @@ extern __printf(2, 0) int vkdb_printf(enum kdb_msgsrc src, const char *fmt, extern __printf(1, 2) int kdb_printf(const char *, ...); typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...); +#define in_kdb_printk() (kdb_trap_printk) extern void kdb_init(int level); /* Access to kdb specific polling devices */ @@ -201,6 +202,7 @@ extern int kdb_register_flags(char *, kdb_func_t, char *, char *, extern int kdb_unregister(char *); #else /* ! CONFIG_KGDB_KDB */ static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; } +#define in_kdb_printk() (0) static inline void kdb_init(int level) {} static inline int kdb_register(char *cmd, kdb_func_t func, char *usage, char *help, short minlen) { return 0; } diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d83fc669eeac..1699cbdb9c97 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -195,6 +195,9 @@ extern int _cond_resched(void); */ # define might_sleep() \ do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) + +# define might_sleep_no_state_check() \ + do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) # define sched_annotate_sleep() (current->task_state_change = 0) #else static inline void ___might_sleep(const char *file, int line, @@ -202,6 +205,7 @@ extern int _cond_resched(void); static inline void __might_sleep(const char *file, int line, int preempt_offset) { } # define might_sleep() do { might_resched(); } while (0) +# define might_sleep_no_state_check() do { might_resched(); } while (0) # define sched_annotate_sleep() do { } while (0) #endif @@ -489,6 +493,7 @@ extern enum system_states { SYSTEM_HALT, SYSTEM_POWER_OFF, SYSTEM_RESTART, + SYSTEM_SUSPEND, } system_state; #define TAINT_PROPRIETARY_MODULE 0 diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h index cb483305e1f5..4e5062316bb6 100644 --- a/include/linux/list_bl.h +++ b/include/linux/list_bl.h @@ -2,6 +2,7 @@ #define _LINUX_LIST_BL_H #include <linux/list.h> +#include <linux/spinlock.h> #include <linux/bit_spinlock.h> /* @@ -32,13 +33,24 @@ struct hlist_bl_head { struct hlist_bl_node *first; +#ifdef CONFIG_PREEMPT_RT_BASE + raw_spinlock_t lock; +#endif }; struct hlist_bl_node { struct hlist_bl_node *next, **pprev; }; -#define INIT_HLIST_BL_HEAD(ptr) \ - ((ptr)->first = NULL) + +#ifdef CONFIG_PREEMPT_RT_BASE +#define INIT_HLIST_BL_HEAD(h) \ +do { \ + (h)->first = NULL; \ + raw_spin_lock_init(&(h)->lock); \ +} while (0) +#else +#define INIT_HLIST_BL_HEAD(h) (h)->first = NULL +#endif static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h) { @@ -118,12 +130,26 @@ static inline void hlist_bl_del_init(struct hlist_bl_node *n) static inline void hlist_bl_lock(struct hlist_bl_head *b) { +#ifndef CONFIG_PREEMPT_RT_BASE bit_spin_lock(0, (unsigned long *)b); +#else + raw_spin_lock(&b->lock); +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) + __set_bit(0, (unsigned long *)b); +#endif +#endif } static inline void hlist_bl_unlock(struct hlist_bl_head *b) { +#ifndef CONFIG_PREEMPT_RT_BASE __bit_spin_unlock(0, (unsigned long *)b); +#else +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) + __clear_bit(0, (unsigned long *)b); +#endif + raw_spin_unlock(&b->lock); +#endif } static inline bool hlist_bl_is_locked(struct hlist_bl_head *b) diff --git a/include/linux/locallock.h b/include/linux/locallock.h new file mode 100644 index 000000000000..0c3ff5b23f6a --- /dev/null +++ b/include/linux/locallock.h @@ -0,0 +1,297 @@ +#ifndef _LINUX_LOCALLOCK_H +#define _LINUX_LOCALLOCK_H + +#include <linux/percpu.h> +#include <linux/spinlock.h> + +#ifdef CONFIG_PREEMPT_RT_BASE + +#ifdef CONFIG_DEBUG_SPINLOCK +# define LL_WARN(cond) WARN_ON(cond) +#else +# define LL_WARN(cond) do { } while (0) +#endif + +/* + * per cpu lock based substitute for local_irq_*() + */ +struct local_irq_lock { + spinlock_t lock; + struct task_struct *owner; + int nestcnt; + unsigned long flags; +}; + +#define DEFINE_LOCAL_IRQ_LOCK(lvar) \ + DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \ + .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) } + +#define DECLARE_LOCAL_IRQ_LOCK(lvar) \ + DECLARE_PER_CPU(struct local_irq_lock, lvar) + +#define local_irq_lock_init(lvar) \ + do { \ + int __cpu; \ + for_each_possible_cpu(__cpu) \ + spin_lock_init(&per_cpu(lvar, __cpu).lock); \ + } while (0) + +/* + * spin_lock|trylock|unlock_local flavour that does not migrate disable + * used for __local_lock|trylock|unlock where get_local_var/put_local_var + * already takes care of the migrate_disable/enable + * for CONFIG_PREEMPT_BASE map to the normal spin_* calls. + */ +#ifdef CONFIG_PREEMPT_RT_FULL +# define spin_lock_local(lock) rt_spin_lock__no_mg(lock) +# define spin_trylock_local(lock) rt_spin_trylock__no_mg(lock) +# define spin_unlock_local(lock) rt_spin_unlock__no_mg(lock) +#else +# define spin_lock_local(lock) spin_lock(lock) +# define spin_trylock_local(lock) spin_trylock(lock) +# define spin_unlock_local(lock) spin_unlock(lock) +#endif + +static inline void __local_lock(struct local_irq_lock *lv) +{ + if (lv->owner != current) { + spin_lock_local(&lv->lock); + LL_WARN(lv->owner); + LL_WARN(lv->nestcnt); + lv->owner = current; + } + lv->nestcnt++; +} + +#define local_lock(lvar) \ + do { __local_lock(&get_local_var(lvar)); } while (0) + +#define local_lock_on(lvar, cpu) \ + do { __local_lock(&per_cpu(lvar, cpu)); } while (0) + +static inline int __local_trylock(struct local_irq_lock *lv) +{ + if (lv->owner != current && spin_trylock_local(&lv->lock)) { + LL_WARN(lv->owner); + LL_WARN(lv->nestcnt); + lv->owner = current; + lv->nestcnt = 1; + return 1; + } else if (lv->owner == current) { + lv->nestcnt++; + return 1; + } + return 0; +} + +#define local_trylock(lvar) \ + ({ \ + int __locked; \ + __locked = __local_trylock(&get_local_var(lvar)); \ + if (!__locked) \ + put_local_var(lvar); \ + __locked; \ + }) + +static inline void __local_unlock(struct local_irq_lock *lv) +{ + LL_WARN(lv->nestcnt == 0); + LL_WARN(lv->owner != current); + if (--lv->nestcnt) + return; + + lv->owner = NULL; + spin_unlock_local(&lv->lock); +} + +#define local_unlock(lvar) \ + do { \ + __local_unlock(this_cpu_ptr(&lvar)); \ + put_local_var(lvar); \ + } while (0) + +#define local_unlock_on(lvar, cpu) \ + do { __local_unlock(&per_cpu(lvar, cpu)); } while (0) + +static inline void __local_lock_irq(struct local_irq_lock *lv) +{ + spin_lock_irqsave(&lv->lock, lv->flags); + LL_WARN(lv->owner); + LL_WARN(lv->nestcnt); + lv->owner = current; + lv->nestcnt = 1; +} + +#define local_lock_irq(lvar) \ + do { __local_lock_irq(&get_local_var(lvar)); } while (0) + +#define local_lock_irq_on(lvar, cpu) \ + do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0) + +static inline void __local_unlock_irq(struct local_irq_lock *lv) +{ + LL_WARN(!lv->nestcnt); + LL_WARN(lv->owner != current); + lv->owner = NULL; + lv->nestcnt = 0; + spin_unlock_irq(&lv->lock); +} + +#define local_unlock_irq(lvar) \ + do { \ + __local_unlock_irq(this_cpu_ptr(&lvar)); \ + put_local_var(lvar); \ + } while (0) + +#define local_unlock_irq_on(lvar, cpu) \ + do { \ + __local_unlock_irq(&per_cpu(lvar, cpu)); \ + } while (0) + +static inline int __local_lock_irqsave(struct local_irq_lock *lv) +{ + if (lv->owner != current) { + __local_lock_irq(lv); + return 0; + } else { + lv->nestcnt++; + return 1; + } +} + +#define local_lock_irqsave(lvar, _flags) \ + do { \ + if (__local_lock_irqsave(&get_local_var(lvar))) \ + put_local_var(lvar); \ + _flags = __this_cpu_read(lvar.flags); \ + } while (0) + +#define local_lock_irqsave_on(lvar, _flags, cpu) \ + do { \ + __local_lock_irqsave(&per_cpu(lvar, cpu)); \ + _flags = per_cpu(lvar, cpu).flags; \ + } while (0) + +static inline int __local_unlock_irqrestore(struct local_irq_lock *lv, + unsigned long flags) +{ + LL_WARN(!lv->nestcnt); + LL_WARN(lv->owner != current); + if (--lv->nestcnt) + return 0; + + lv->owner = NULL; + spin_unlock_irqrestore(&lv->lock, lv->flags); + return 1; +} + +#define local_unlock_irqrestore(lvar, flags) \ + do { \ + if (__local_unlock_irqrestore(this_cpu_ptr(&lvar), flags)) \ + put_local_var(lvar); \ + } while (0) + +#define local_unlock_irqrestore_on(lvar, flags, cpu) \ + do { \ + __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \ + } while (0) + +#define local_spin_trylock_irq(lvar, lock) \ + ({ \ + int __locked; \ + local_lock_irq(lvar); \ + __locked = spin_trylock(lock); \ + if (!__locked) \ + local_unlock_irq(lvar); \ + __locked; \ + }) + +#define local_spin_lock_irq(lvar, lock) \ + do { \ + local_lock_irq(lvar); \ + spin_lock(lock); \ + } while (0) + +#define local_spin_unlock_irq(lvar, lock) \ + do { \ + spin_unlock(lock); \ + local_unlock_irq(lvar); \ + } while (0) + +#define local_spin_lock_irqsave(lvar, lock, flags) \ + do { \ + local_lock_irqsave(lvar, flags); \ + spin_lock(lock); \ + } while (0) + +#define local_spin_unlock_irqrestore(lvar, lock, flags) \ + do { \ + spin_unlock(lock); \ + local_unlock_irqrestore(lvar, flags); \ + } while (0) + +#define get_locked_var(lvar, var) \ + (*({ \ + local_lock(lvar); \ + this_cpu_ptr(&var); \ + })) + +#define put_locked_var(lvar, var) local_unlock(lvar); + +#define get_locked_ptr(lvar, var) \ + ({ \ + local_lock(lvar); \ + this_cpu_ptr(var); \ + }) + +#define put_locked_ptr(lvar, var) local_unlock(lvar); + +#define local_lock_cpu(lvar) \ + ({ \ + local_lock(lvar); \ + smp_processor_id(); \ + }) + +#define local_unlock_cpu(lvar) local_unlock(lvar) + +#else /* PREEMPT_RT_BASE */ + +#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar +#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar + +static inline void local_irq_lock_init(int lvar) { } + +#define local_trylock(lvar) \ + ({ \ + preempt_disable(); \ + 1; \ + }) + +#define local_lock(lvar) preempt_disable() +#define local_unlock(lvar) preempt_enable() +#define local_lock_irq(lvar) local_irq_disable() +#define local_lock_irq_on(lvar, cpu) local_irq_disable() +#define local_unlock_irq(lvar) local_irq_enable() +#define local_unlock_irq_on(lvar, cpu) local_irq_enable() +#define local_lock_irqsave(lvar, flags) local_irq_save(flags) +#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags) + +#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock) +#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock) +#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock) +#define local_spin_lock_irqsave(lvar, lock, flags) \ + spin_lock_irqsave(lock, flags) +#define local_spin_unlock_irqrestore(lvar, lock, flags) \ + spin_unlock_irqrestore(lock, flags) + +#define get_locked_var(lvar, var) get_cpu_var(var) +#define put_locked_var(lvar, var) put_cpu_var(var) +#define get_locked_ptr(lvar, var) get_cpu_ptr(var) +#define put_locked_ptr(lvar, var) put_cpu_ptr(var) + +#define local_lock_cpu(lvar) get_cpu() +#define local_unlock_cpu(lvar) put_cpu() + +#endif + +#endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 21b18b755c0e..89542dc0752c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -11,6 +11,7 @@ #include <linux/completion.h> #include <linux/cpumask.h> #include <linux/uprobes.h> +#include <linux/rcupdate.h> #include <linux/page-flags-layout.h> #include <linux/workqueue.h> #include <asm/page.h> @@ -518,6 +519,9 @@ struct mm_struct { bool tlb_flush_batched; #endif struct uprobes_state uprobes_state; +#ifdef CONFIG_PREEMPT_RT_BASE + struct rcu_head delayed_drop; +#endif #ifdef CONFIG_X86_INTEL_MPX /* address of the bounds directory */ void __user *bd_addr; diff --git a/include/linux/module.h b/include/linux/module.h index 99f330ae13da..88a79e17fad1 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -496,6 +496,7 @@ static inline int module_is_live(struct module *mod) struct module *__module_text_address(unsigned long addr); struct module *__module_address(unsigned long addr); bool is_module_address(unsigned long addr); +bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr); bool is_module_percpu_address(unsigned long addr); bool is_module_text_address(unsigned long addr); @@ -663,6 +664,11 @@ static inline bool is_module_percpu_address(unsigned long addr) return false; } +static inline bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) +{ + return false; +} + static inline bool is_module_text_address(unsigned long addr) { return false; diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 2cb7531e7d7a..b3fdfc820216 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -19,6 +19,17 @@ #include <asm/processor.h> #include <linux/osq_lock.h> +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ + , .dep_map = { .name = #lockname } +#else +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) +#endif + +#ifdef CONFIG_PREEMPT_RT_FULL +# include <linux/mutex_rt.h> +#else + /* * Simple, straightforward mutexes with strict semantics: * @@ -99,13 +110,6 @@ do { \ static inline void mutex_destroy(struct mutex *lock) {} #endif -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ - , .dep_map = { .name = #lockname } -#else -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) -#endif - #define __MUTEX_INITIALIZER(lockname) \ { .count = ATOMIC_INIT(1) \ , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ @@ -173,6 +177,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); +#endif /* !PREEMPT_RT_FULL */ + extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); #endif /* __LINUX_MUTEX_H */ diff --git a/include/linux/mutex_rt.h b/include/linux/mutex_rt.h new file mode 100644 index 000000000000..e0284edec655 --- /dev/null +++ b/include/linux/mutex_rt.h @@ -0,0 +1,89 @@ +#ifndef __LINUX_MUTEX_RT_H +#define __LINUX_MUTEX_RT_H + +#ifndef __LINUX_MUTEX_H +#error "Please include mutex.h" +#endif + +#include <linux/rtmutex.h> + +/* FIXME: Just for __lockfunc */ +#include <linux/spinlock.h> + +struct mutex { + struct rt_mutex lock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#define __MUTEX_INITIALIZER(mutexname) \ + { \ + .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \ + __DEP_MAP_MUTEX_INITIALIZER(mutexname) \ + } + +#define DEFINE_MUTEX(mutexname) \ + struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) + +extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key); +extern void __lockfunc _mutex_lock(struct mutex *lock); +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock); +extern int __lockfunc _mutex_lock_killable(struct mutex *lock); +extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass); +extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); +extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass); +extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass); +extern int __lockfunc _mutex_trylock(struct mutex *lock); +extern void __lockfunc _mutex_unlock(struct mutex *lock); + +#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock) +#define mutex_lock(l) _mutex_lock(l) +#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l) +#define mutex_lock_killable(l) _mutex_lock_killable(l) +#define mutex_trylock(l) _mutex_trylock(l) +#define mutex_unlock(l) _mutex_unlock(l) + +#ifdef CONFIG_DEBUG_MUTEXES +#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock) +#else +static inline void mutex_destroy(struct mutex *lock) {} +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s) +# define mutex_lock_interruptible_nested(l, s) \ + _mutex_lock_interruptible_nested(l, s) +# define mutex_lock_killable_nested(l, s) \ + _mutex_lock_killable_nested(l, s) + +# define mutex_lock_nest_lock(lock, nest_lock) \ +do { \ + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ + _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ +} while (0) + +#else +# define mutex_lock_nested(l, s) _mutex_lock(l) +# define mutex_lock_interruptible_nested(l, s) \ + _mutex_lock_interruptible(l) +# define mutex_lock_killable_nested(l, s) \ + _mutex_lock_killable(l) +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) +#endif + +# define mutex_init(mutex) \ +do { \ + static struct lock_class_key __key; \ + \ + rt_mutex_init(&(mutex)->lock); \ + __mutex_do_init((mutex), #mutex, &__key); \ +} while (0) + +# define __mutex_init(mutex, name, key) \ +do { \ + rt_mutex_init(&(mutex)->lock); \ + __mutex_do_init((mutex), name, key); \ +} while (0) + +#endif diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 81c85ba6e2b8..5decbf0980b0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -396,7 +396,19 @@ typedef enum rx_handler_result rx_handler_result_t; typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb); void __napi_schedule(struct napi_struct *n); + +/* + * When PREEMPT_RT_FULL is defined, all device interrupt handlers + * run as threads, and they can also be preempted (without PREEMPT_RT + * interrupt threads can not be preempted). Which means that calling + * __napi_schedule_irqoff() from an interrupt handler can be preempted + * and can corrupt the napi->poll_list. + */ +#ifdef CONFIG_PREEMPT_RT_FULL +#define __napi_schedule_irqoff(n) __napi_schedule(n) +#else void __napi_schedule_irqoff(struct napi_struct *n); +#endif static inline bool napi_disable_pending(struct napi_struct *n) { @@ -582,7 +594,11 @@ struct netdev_queue { * write-mostly part */ spinlock_t _xmit_lock ____cacheline_aligned_in_smp; +#ifdef CONFIG_PREEMPT_RT_FULL + struct task_struct *xmit_lock_owner; +#else int xmit_lock_owner; +#endif /* * Time (in jiffies) of last Tx */ @@ -2479,14 +2495,53 @@ void netdev_freemem(struct net_device *dev); void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); -DECLARE_PER_CPU(int, xmit_recursion); #define XMIT_RECURSION_LIMIT 10 +#ifdef CONFIG_PREEMPT_RT_FULL +static inline int dev_recursion_level(void) +{ + return current->xmit_recursion; +} + +static inline int xmit_rec_read(void) +{ + return current->xmit_recursion; +} + +static inline void xmit_rec_inc(void) +{ + current->xmit_recursion++; +} + +static inline void xmit_rec_dec(void) +{ + current->xmit_recursion--; +} + +#else + +DECLARE_PER_CPU(int, xmit_recursion); static inline int dev_recursion_level(void) { return this_cpu_read(xmit_recursion); } +static inline int xmit_rec_read(void) +{ + return __this_cpu_read(xmit_recursion); +} + +static inline void xmit_rec_inc(void) +{ + __this_cpu_inc(xmit_recursion); +} + +static inline void xmit_rec_dec(void) +{ + __this_cpu_dec(xmit_recursion); +} +#endif + struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); @@ -2871,6 +2926,7 @@ struct softnet_data { unsigned int dropped; struct sk_buff_head input_pkt_queue; struct napi_struct backlog; + struct sk_buff_head tofree_queue; }; @@ -3573,41 +3629,79 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) return (1U << debug_value) - 1; } +#ifdef CONFIG_PREEMPT_RT_FULL +static inline void netdev_queue_set_owner(struct netdev_queue *txq, int cpu) +{ + txq->xmit_lock_owner = current; +} + +static inline void netdev_queue_clear_owner(struct netdev_queue *txq) +{ + txq->xmit_lock_owner = NULL; +} + +static inline bool netdev_queue_has_owner(struct netdev_queue *txq) +{ + if (txq->xmit_lock_owner != NULL) + return true; + return false; +} + +#else + +static inline void netdev_queue_set_owner(struct netdev_queue *txq, int cpu) +{ + txq->xmit_lock_owner = cpu; +} + +static inline void netdev_queue_clear_owner(struct netdev_queue *txq) +{ + txq->xmit_lock_owner = -1; +} + +static inline bool netdev_queue_has_owner(struct netdev_queue *txq) +{ + if (txq->xmit_lock_owner != -1) + return true; + return false; +} +#endif + static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) { spin_lock(&txq->_xmit_lock); - txq->xmit_lock_owner = cpu; + netdev_queue_set_owner(txq, cpu); } static inline void __netif_tx_lock_bh(struct netdev_queue *txq) { spin_lock_bh(&txq->_xmit_lock); - txq->xmit_lock_owner = smp_processor_id(); + netdev_queue_set_owner(txq, smp_processor_id()); } static inline bool __netif_tx_trylock(struct netdev_queue *txq) { bool ok = spin_trylock(&txq->_xmit_lock); if (likely(ok)) - txq->xmit_lock_owner = smp_processor_id(); + netdev_queue_set_owner(txq, smp_processor_id()); return ok; } static inline void __netif_tx_unlock(struct netdev_queue *txq) { - txq->xmit_lock_owner = -1; + netdev_queue_clear_owner(txq); spin_unlock(&txq->_xmit_lock); } static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) { - txq->xmit_lock_owner = -1; + netdev_queue_clear_owner(txq); spin_unlock_bh(&txq->_xmit_lock); } static inline void txq_trans_update(struct netdev_queue *txq) { - if (txq->xmit_lock_owner != -1) + if (netdev_queue_has_owner(txq)) txq->trans_start = jiffies; } diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 69111fa2e578..e64c02af5894 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -4,6 +4,7 @@ #include <linux/netdevice.h> #include <linux/static_key.h> +#include <linux/locallock.h> #include <uapi/linux/netfilter/x_tables.h> /* Test a struct->invflags and a boolean for inequality */ @@ -302,6 +303,8 @@ void xt_free_table_info(struct xt_table_info *info); */ DECLARE_PER_CPU(seqcount_t, xt_recseq); +DECLARE_LOCAL_IRQ_LOCK(xt_write_lock); + /* xt_tee_enabled - true if x_tables needs to handle reentrancy * * Enabled if current ip(6)tables ruleset has at least one -j TEE rule. @@ -322,6 +325,9 @@ static inline unsigned int xt_write_recseq_begin(void) { unsigned int addend; + /* RT protection */ + local_lock(xt_write_lock); + /* * Low order bit of sequence is set if we already * called xt_write_recseq_begin(). @@ -352,6 +358,7 @@ static inline void xt_write_recseq_end(unsigned int addend) /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */ smp_wmb(); __this_cpu_add(xt_recseq.sequence, addend); + local_unlock(xt_write_lock); } /* diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 810124b33327..d54ca43d571f 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -165,7 +165,11 @@ struct nfs_inode { /* Readers: in-flight sillydelete RPC calls */ /* Writers: rmdir */ +#ifdef CONFIG_PREEMPT_RT_BASE + struct semaphore rmdir_sem; +#else struct rw_semaphore rmdir_sem; +#endif #if IS_ENABLED(CONFIG_NFS_V4) struct nfs4_cached_acl *nfs4_acl; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 3bf867a0c3b3..71c6bdd14c8a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1490,7 +1490,7 @@ struct nfs_unlinkdata { struct nfs_removeargs args; struct nfs_removeres res; struct dentry *dentry; - wait_queue_head_t wq; + struct swait_queue_head wq; struct rpc_cred *cred; struct nfs_fattr dir_attr; long timeout; diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 4149868de4e6..babe5b9bcb91 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -6,7 +6,7 @@ * * Alan Cox <Alan.Cox@linux.org> */ - + #ifndef _LINUX_NOTIFIER_H #define _LINUX_NOTIFIER_H #include <linux/errno.h> @@ -42,9 +42,7 @@ * in srcu_notifier_call_chain(): no cache bounces and no memory barriers. * As compensation, srcu_notifier_chain_unregister() is rather expensive. * SRCU notifier chains should be used when the chain will be called very - * often but notifier_blocks will seldom be removed. Also, SRCU notifier - * chains are slightly more difficult to use because they require special - * runtime initialization. + * often but notifier_blocks will seldom be removed. */ struct notifier_block; @@ -90,7 +88,7 @@ struct srcu_notifier_head { (name)->head = NULL; \ } while (0) -/* srcu_notifier_heads must be initialized and cleaned up dynamically */ +/* srcu_notifier_heads must be cleaned up dynamically */ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); #define srcu_cleanup_notifier_head(name) \ cleanup_srcu_struct(&(name)->srcu); @@ -103,7 +101,13 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); .head = NULL } #define RAW_NOTIFIER_INIT(name) { \ .head = NULL } -/* srcu_notifier_heads cannot be initialized statically */ + +#define SRCU_NOTIFIER_INIT(name, pcpu) \ + { \ + .mutex = __MUTEX_INITIALIZER(name.mutex), \ + .head = NULL, \ + .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \ + } #define ATOMIC_NOTIFIER_HEAD(name) \ struct atomic_notifier_head name = \ @@ -115,6 +119,18 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); struct raw_notifier_head name = \ RAW_NOTIFIER_INIT(name) +#define _SRCU_NOTIFIER_HEAD(name, mod) \ + static DEFINE_PER_CPU(struct srcu_struct_array, \ + name##_head_srcu_array); \ + mod struct srcu_notifier_head name = \ + SRCU_NOTIFIER_INIT(name, name##_head_srcu_array) + +#define SRCU_NOTIFIER_HEAD(name) \ + _SRCU_NOTIFIER_HEAD(name, ) + +#define SRCU_NOTIFIER_HEAD_STATIC(name) \ + _SRCU_NOTIFIER_HEAD(name, static) + #ifdef __KERNEL__ extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh, @@ -184,12 +200,12 @@ static inline int notifier_to_errno(int ret) /* * Declared notifiers so far. I can imagine quite a few more chains - * over time (eg laptop power reset chains, reboot chain (to clean + * over time (eg laptop power reset chains, reboot chain (to clean * device units up), device [un]mount chain, module load/unload chain, - * low memory chain, screenblank chain (for plug in modular screenblankers) + * low memory chain, screenblank chain (for plug in modular screenblankers) * VC switch chains (for loadable kernel svgalib VC switch helpers) etc... */ - + /* CPU notfiers are defined in include/linux/cpu.h. */ /* netdevice notifiers are defined in include/linux/netdevice.h */ diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 5b2e6159b744..ea940f451606 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -4,7 +4,7 @@ #include <linux/atomic.h> #include <linux/rwsem.h> #include <linux/percpu.h> -#include <linux/wait.h> +#include <linux/swait.h> #include <linux/rcu_sync.h> #include <linux/lockdep.h> @@ -12,7 +12,7 @@ struct percpu_rw_semaphore { struct rcu_sync rss; unsigned int __percpu *read_count; struct rw_semaphore rw_sem; - wait_queue_head_t writer; + struct swait_queue_head writer; int readers_block; }; @@ -22,13 +22,13 @@ static struct percpu_rw_semaphore name = { \ .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \ .read_count = &__percpu_rwsem_rc_##name, \ .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \ - .writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer), \ + .writer = __SWAIT_QUEUE_HEAD_INITIALIZER(name.writer), \ } extern int __percpu_down_read(struct percpu_rw_semaphore *, int); extern void __percpu_up_read(struct percpu_rw_semaphore *); -static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *sem) +static inline void percpu_down_read(struct percpu_rw_semaphore *sem) { might_sleep(); @@ -46,16 +46,10 @@ static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore * __this_cpu_inc(*sem->read_count); if (unlikely(!rcu_sync_is_idle(&sem->rss))) __percpu_down_read(sem, false); /* Unconditional memory barrier */ - barrier(); /* - * The barrier() prevents the compiler from + * The preempt_enable() prevents the compiler from * bleeding the critical section out. */ -} - -static inline void percpu_down_read(struct percpu_rw_semaphore *sem) -{ - percpu_down_read_preempt_disable(sem); preempt_enable(); } @@ -82,13 +76,9 @@ static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem) return ret; } -static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem) +static inline void percpu_up_read(struct percpu_rw_semaphore *sem) { - /* - * The barrier() prevents the compiler from - * bleeding the critical section out. - */ - barrier(); + preempt_disable(); /* * Same as in percpu_down_read(). */ @@ -101,12 +91,6 @@ static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_); } -static inline void percpu_up_read(struct percpu_rw_semaphore *sem) -{ - preempt_disable(); - percpu_up_read_preempt_enable(sem); -} - extern void percpu_down_write(struct percpu_rw_semaphore *); extern void percpu_up_write(struct percpu_rw_semaphore *); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 56939d3f6e53..b988bf40ad3e 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -18,6 +18,35 @@ #define PERCPU_MODULE_RESERVE 0 #endif +#ifdef CONFIG_PREEMPT_RT_FULL + +#define get_local_var(var) (*({ \ + migrate_disable(); \ + this_cpu_ptr(&var); })) + +#define put_local_var(var) do { \ + (void)&(var); \ + migrate_enable(); \ +} while (0) + +# define get_local_ptr(var) ({ \ + migrate_disable(); \ + this_cpu_ptr(var); }) + +# define put_local_ptr(var) do { \ + (void)(var); \ + migrate_enable(); \ +} while (0) + +#else + +#define get_local_var(var) get_cpu_var(var) +#define put_local_var(var) put_cpu_var(var) +#define get_local_ptr(var) get_cpu_ptr(var) +#define put_local_ptr(var) put_cpu_ptr(var) + +#endif + /* minimum unit size, also is the maximum supported allocation size */ #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) @@ -110,6 +139,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, #endif extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); +extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr); extern bool is_kernel_percpu_address(unsigned long addr); #if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) diff --git a/include/linux/pid.h b/include/linux/pid.h index 97b745ddece5..01a5460a0c85 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -2,6 +2,7 @@ #define _LINUX_PID_H #include <linux/rcupdate.h> +#include <linux/atomic.h> enum pid_type { diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index e4b8678183f5..7bebe3286c4e 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -92,8 +92,8 @@ struct k_itimer { struct alarm alarmtimer; ktime_t interval; } alarm; - struct rcu_head rcu; } it; + struct rcu_head rcu; }; struct k_clock { diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 7eeceac52dea..f97c54265904 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -50,7 +50,11 @@ #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) #define NMI_OFFSET (1UL << NMI_SHIFT) -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) +#ifndef CONFIG_PREEMPT_RT_FULL +# define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) +#else +# define SOFTIRQ_DISABLE_OFFSET (0) +#endif /* We use the MSB mostly because its available */ #define PREEMPT_NEED_RESCHED 0x80000000 @@ -59,9 +63,15 @@ #include <asm/preempt.h> #define hardirq_count() (preempt_count() & HARDIRQ_MASK) -#define softirq_count() (preempt_count() & SOFTIRQ_MASK) #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ | NMI_MASK)) +#ifndef CONFIG_PREEMPT_RT_FULL +# define softirq_count() (preempt_count() & SOFTIRQ_MASK) +# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) +#else +# define softirq_count() (0UL) +extern int in_serving_softirq(void); +#endif /* * Are we doing bottom half or hardware interrupt processing? @@ -79,7 +89,6 @@ #define in_irq() (hardirq_count()) #define in_softirq() (softirq_count()) #define in_interrupt() (irq_count()) -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) #define in_nmi() (preempt_count() & NMI_MASK) #define in_task() (!(preempt_count() & \ (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) @@ -96,7 +105,11 @@ /* * The preempt_count offset after spin_lock() */ +#if !defined(CONFIG_PREEMPT_RT_FULL) #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET +#else +#define PREEMPT_LOCK_OFFSET 0 +#endif /* * The preempt_count offset needed for things like: @@ -145,6 +158,20 @@ extern void preempt_count_sub(int val); #define preempt_count_inc() preempt_count_add(1) #define preempt_count_dec() preempt_count_sub(1) +#ifdef CONFIG_PREEMPT_LAZY +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0) +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0) +#define inc_preempt_lazy_count() add_preempt_lazy_count(1) +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1) +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count) +#else +#define add_preempt_lazy_count(val) do { } while (0) +#define sub_preempt_lazy_count(val) do { } while (0) +#define inc_preempt_lazy_count() do { } while (0) +#define dec_preempt_lazy_count() do { } while (0) +#define preempt_lazy_count() (0) +#endif + #ifdef CONFIG_PREEMPT_COUNT #define preempt_disable() \ @@ -153,13 +180,25 @@ do { \ barrier(); \ } while (0) +#define preempt_lazy_disable() \ +do { \ + inc_preempt_lazy_count(); \ + barrier(); \ +} while (0) + #define sched_preempt_enable_no_resched() \ do { \ barrier(); \ preempt_count_dec(); \ } while (0) -#define preempt_enable_no_resched() sched_preempt_enable_no_resched() +#ifdef CONFIG_PREEMPT_RT_BASE +# define preempt_enable_no_resched() sched_preempt_enable_no_resched() +# define preempt_check_resched_rt() preempt_check_resched() +#else +# define preempt_enable_no_resched() preempt_enable() +# define preempt_check_resched_rt() barrier(); +#endif #define preemptible() (preempt_count() == 0 && !irqs_disabled()) @@ -184,6 +223,13 @@ do { \ __preempt_schedule(); \ } while (0) +#define preempt_lazy_enable() \ +do { \ + dec_preempt_lazy_count(); \ + barrier(); \ + preempt_check_resched(); \ +} while (0) + #else /* !CONFIG_PREEMPT */ #define preempt_enable() \ do { \ @@ -229,6 +275,7 @@ do { \ #define preempt_disable_notrace() barrier() #define preempt_enable_no_resched_notrace() barrier() #define preempt_enable_notrace() barrier() +#define preempt_check_resched_rt() barrier() #define preemptible() 0 #endif /* CONFIG_PREEMPT_COUNT */ @@ -249,10 +296,31 @@ do { \ } while (0) #define preempt_fold_need_resched() \ do { \ - if (tif_need_resched()) \ + if (tif_need_resched_now()) \ set_preempt_need_resched(); \ } while (0) +#ifdef CONFIG_PREEMPT_RT_FULL +# define preempt_disable_rt() preempt_disable() +# define preempt_enable_rt() preempt_enable() +# define preempt_disable_nort() barrier() +# define preempt_enable_nort() barrier() +# ifdef CONFIG_SMP + extern void migrate_disable(void); + extern void migrate_enable(void); +# else /* CONFIG_SMP */ +# define migrate_disable() barrier() +# define migrate_enable() barrier() +# endif /* CONFIG_SMP */ +#else +# define preempt_disable_rt() barrier() +# define preempt_enable_rt() barrier() +# define preempt_disable_nort() preempt_disable() +# define preempt_enable_nort() preempt_enable() +# define migrate_disable() preempt_disable() +# define migrate_enable() preempt_enable() +#endif + #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier; diff --git a/include/linux/printk.h b/include/linux/printk.h index eac1af8502bb..37e647af0b0b 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -126,9 +126,11 @@ struct va_format { #ifdef CONFIG_EARLY_PRINTK extern asmlinkage __printf(1, 2) void early_printk(const char *fmt, ...); +extern void printk_kill(void); #else static inline __printf(1, 2) __cold void early_printk(const char *s, ...) { } +static inline void printk_kill(void) { } #endif #ifdef CONFIG_PRINTK_NMI diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index af3581b8a451..277295039c8f 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -292,6 +292,8 @@ unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root, int radix_tree_preload(gfp_t gfp_mask); int radix_tree_maybe_preload(gfp_t gfp_mask); int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order); +void radix_tree_preload_end(void); + void radix_tree_init(void); void *radix_tree_tag_set(struct radix_tree_root *root, unsigned long index, unsigned int tag); @@ -314,11 +316,6 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item); -static inline void radix_tree_preload_end(void) -{ - preempt_enable(); -} - /** * struct radix_tree_iter - radix tree iterator state * diff --git a/include/linux/random.h b/include/linux/random.h index 16ab429735a7..9d0fecb5b6c2 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -31,7 +31,7 @@ static inline void add_latent_entropy(void) {} extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; -extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy; +extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip) __latent_entropy; extern void get_random_bytes(void *buf, int nbytes); extern int add_random_ready_callback(struct random_ready_callback *rdy); diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index e585018498d5..25c64474fc27 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -31,7 +31,7 @@ #include <linux/kernel.h> #include <linux/stddef.h> -#include <linux/rcupdate.h> +#include <linux/rcu_assign_pointer.h> struct rb_node { unsigned long __rb_parent_color; diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index d076183e49be..36bfb4dd57ae 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -26,6 +26,7 @@ #include <linux/compiler.h> #include <linux/rbtree.h> +#include <linux/rcupdate.h> /* * Please note - only struct rb_augment_callbacks and the prototypes for diff --git a/include/linux/rcu_assign_pointer.h b/include/linux/rcu_assign_pointer.h new file mode 100644 index 000000000000..7066962a4379 --- /dev/null +++ b/include/linux/rcu_assign_pointer.h @@ -0,0 +1,54 @@ +#ifndef __LINUX_RCU_ASSIGN_POINTER_H__ +#define __LINUX_RCU_ASSIGN_POINTER_H__ +#include <linux/compiler.h> +#include <asm/barrier.h> + +/** + * RCU_INITIALIZER() - statically initialize an RCU-protected global variable + * @v: The value to statically initialize with. + */ +#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) + +/** + * rcu_assign_pointer() - assign to RCU-protected pointer + * @p: pointer to assign to + * @v: value to assign (publish) + * + * Assigns the specified value to the specified RCU-protected + * pointer, ensuring that any concurrent RCU readers will see + * any prior initialization. + * + * Inserts memory barriers on architectures that require them + * (which is most of them), and also prevents the compiler from + * reordering the code that initializes the structure after the pointer + * assignment. More importantly, this call documents which pointers + * will be dereferenced by RCU read-side code. + * + * In some special cases, you may use RCU_INIT_POINTER() instead + * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due + * to the fact that it does not constrain either the CPU or the compiler. + * That said, using RCU_INIT_POINTER() when you should have used + * rcu_assign_pointer() is a very bad thing that results in + * impossible-to-diagnose memory corruption. So please be careful. + * See the RCU_INIT_POINTER() comment header for details. + * + * Note that rcu_assign_pointer() evaluates each of its arguments only + * once, appearances notwithstanding. One of the "extra" evaluations + * is in typeof() and the other visible only to sparse (__CHECKER__), + * neither of which actually execute the argument. As with most cpp + * macros, this execute-arguments-only-once property is important, so + * please be careful when making changes to rcu_assign_pointer() and the + * other macros that it invokes. + */ +#define rcu_assign_pointer(p, v) \ +({ \ + uintptr_t _r_a_p__v = (uintptr_t)(v); \ + \ + if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ + WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ + else \ + smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ + _r_a_p__v; \ +}) + +#endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 96037ba940ee..790740d4a3e1 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -46,6 +46,7 @@ #include <linux/compiler.h> #include <linux/ktime.h> #include <linux/irqflags.h> +#include <linux/rcu_assign_pointer.h> #include <asm/barrier.h> @@ -178,6 +179,9 @@ void call_rcu(struct rcu_head *head, #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_PREEMPT_RT_FULL +#define call_rcu_bh call_rcu +#else /** * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. @@ -201,6 +205,7 @@ void call_rcu(struct rcu_head *head, */ void call_rcu_bh(struct rcu_head *head, rcu_callback_t func); +#endif /** * call_rcu_sched() - Queue an RCU for invocation after sched grace period. @@ -301,6 +306,11 @@ void synchronize_rcu(void); * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. */ #define rcu_preempt_depth() (current->rcu_read_lock_nesting) +#ifndef CONFIG_PREEMPT_RT_FULL +#define sched_rcu_preempt_depth() rcu_preempt_depth() +#else +static inline int sched_rcu_preempt_depth(void) { return 0; } +#endif #else /* #ifdef CONFIG_PREEMPT_RCU */ @@ -324,6 +334,8 @@ static inline int rcu_preempt_depth(void) return 0; } +#define sched_rcu_preempt_depth() rcu_preempt_depth() + #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ /* Internal to kernel */ @@ -503,7 +515,14 @@ extern struct lockdep_map rcu_callback_map; int debug_lockdep_rcu_enabled(void); int rcu_read_lock_held(void); +#ifdef CONFIG_PREEMPT_RT_FULL +static inline int rcu_read_lock_bh_held(void) +{ + return rcu_read_lock_held(); +} +#else int rcu_read_lock_bh_held(void); +#endif /** * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? @@ -624,54 +643,6 @@ static inline void rcu_preempt_sleep_check(void) }) /** - * RCU_INITIALIZER() - statically initialize an RCU-protected global variable - * @v: The value to statically initialize with. - */ -#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) - -/** - * rcu_assign_pointer() - assign to RCU-protected pointer - * @p: pointer to assign to - * @v: value to assign (publish) - * - * Assigns the specified value to the specified RCU-protected - * pointer, ensuring that any concurrent RCU readers will see - * any prior initialization. - * - * Inserts memory barriers on architectures that require them - * (which is most of them), and also prevents the compiler from - * reordering the code that initializes the structure after the pointer - * assignment. More importantly, this call documents which pointers - * will be dereferenced by RCU read-side code. - * - * In some special cases, you may use RCU_INIT_POINTER() instead - * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due - * to the fact that it does not constrain either the CPU or the compiler. - * That said, using RCU_INIT_POINTER() when you should have used - * rcu_assign_pointer() is a very bad thing that results in - * impossible-to-diagnose memory corruption. So please be careful. - * See the RCU_INIT_POINTER() comment header for details. - * - * Note that rcu_assign_pointer() evaluates each of its arguments only - * once, appearances notwithstanding. One of the "extra" evaluations - * is in typeof() and the other visible only to sparse (__CHECKER__), - * neither of which actually execute the argument. As with most cpp - * macros, this execute-arguments-only-once property is important, so - * please be careful when making changes to rcu_assign_pointer() and the - * other macros that it invokes. - */ -#define rcu_assign_pointer(p, v) \ -({ \ - uintptr_t _r_a_p__v = (uintptr_t)(v); \ - \ - if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ - WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ - else \ - smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ - _r_a_p__v; \ -}) - -/** * rcu_access_pointer() - fetch RCU pointer with no dereferencing * @p: The pointer to read * @@ -949,10 +920,14 @@ static inline void rcu_read_unlock(void) static inline void rcu_read_lock_bh(void) { local_bh_disable(); +#ifdef CONFIG_PREEMPT_RT_FULL + rcu_read_lock(); +#else __acquire(RCU_BH); rcu_lock_acquire(&rcu_bh_lock_map); RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_lock_bh() used illegally while idle"); +#endif } /* @@ -962,10 +937,14 @@ static inline void rcu_read_lock_bh(void) */ static inline void rcu_read_unlock_bh(void) { +#ifdef CONFIG_PREEMPT_RT_FULL + rcu_read_unlock(); +#else RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_unlock_bh() used illegally while idle"); rcu_lock_release(&rcu_bh_lock_map); __release(RCU_BH); +#endif local_bh_enable(); } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 63a4e4cf40a5..08ab12df2863 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -44,7 +44,11 @@ static inline void rcu_virt_note_context_switch(int cpu) rcu_note_context_switch(); } +#ifdef CONFIG_PREEMPT_RT_FULL +# define synchronize_rcu_bh synchronize_rcu +#else void synchronize_rcu_bh(void); +#endif void synchronize_sched_expedited(void); void synchronize_rcu_expedited(void); @@ -72,7 +76,11 @@ static inline void synchronize_rcu_bh_expedited(void) } void rcu_barrier(void); +#ifdef CONFIG_PREEMPT_RT_FULL +# define rcu_barrier_bh rcu_barrier +#else void rcu_barrier_bh(void); +#endif void rcu_barrier_sched(void); unsigned long get_state_synchronize_rcu(void); void cond_synchronize_rcu(unsigned long oldstate); @@ -82,17 +90,14 @@ void cond_synchronize_sched(unsigned long oldstate); extern unsigned long rcutorture_testseq; extern unsigned long rcutorture_vernum; unsigned long rcu_batches_started(void); -unsigned long rcu_batches_started_bh(void); unsigned long rcu_batches_started_sched(void); unsigned long rcu_batches_completed(void); -unsigned long rcu_batches_completed_bh(void); unsigned long rcu_batches_completed_sched(void); unsigned long rcu_exp_batches_completed(void); unsigned long rcu_exp_batches_completed_sched(void); void show_rcu_gp_kthreads(void); void rcu_force_quiescent_state(void); -void rcu_bh_force_quiescent_state(void); void rcu_sched_force_quiescent_state(void); void rcu_idle_enter(void); @@ -109,6 +114,16 @@ extern int rcu_scheduler_active __read_mostly; bool rcu_is_watching(void); +#ifndef CONFIG_PREEMPT_RT_FULL +void rcu_bh_force_quiescent_state(void); +unsigned long rcu_batches_started_bh(void); +unsigned long rcu_batches_completed_bh(void); +#else +# define rcu_bh_force_quiescent_state rcu_force_quiescent_state +# define rcu_batches_completed_bh rcu_batches_completed +# define rcu_batches_started_bh rcu_batches_completed +#endif + void rcu_all_qs(void); /* RCUtree hotplug events */ diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 1abba5ce2a2f..294a8b4875f1 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -13,11 +13,15 @@ #define __LINUX_RT_MUTEX_H #include <linux/linkage.h> +#include <linux/spinlock_types_raw.h> #include <linux/rbtree.h> -#include <linux/spinlock_types.h> extern int max_lock_depth; /* for sysctl */ +#ifdef CONFIG_DEBUG_MUTEXES +#include <linux/debug_locks.h> +#endif + /** * The rt_mutex structure * @@ -31,8 +35,8 @@ struct rt_mutex { struct rb_root waiters; struct rb_node *waiters_leftmost; struct task_struct *owner; -#ifdef CONFIG_DEBUG_RT_MUTEXES int save_state; +#ifdef CONFIG_DEBUG_RT_MUTEXES const char *name, *file; int line; void *magic; @@ -55,22 +59,33 @@ struct hrtimer_sleeper; # define rt_mutex_debug_check_no_locks_held(task) do { } while (0) #endif +# define rt_mutex_init(mutex) \ + do { \ + raw_spin_lock_init(&(mutex)->wait_lock); \ + __rt_mutex_init(mutex, #mutex); \ + } while (0) + #ifdef CONFIG_DEBUG_RT_MUTEXES # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ , .name = #mutexname, .file = __FILE__, .line = __LINE__ -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__) extern void rt_mutex_debug_task_free(struct task_struct *tsk); #else # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL) # define rt_mutex_debug_task_free(t) do { } while (0) #endif -#define __RT_MUTEX_INITIALIZER(mutexname) \ - { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ +#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ , .waiters = RB_ROOT \ , .owner = NULL \ - __DEBUG_RT_MUTEX_INITIALIZER(mutexname)} + __DEBUG_RT_MUTEX_INITIALIZER(mutexname) + +#define __RT_MUTEX_INITIALIZER(mutexname) \ + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) } + +#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \ + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ + , .save_state = 1 } #define DEFINE_RT_MUTEX(mutexname) \ struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) @@ -90,7 +105,9 @@ extern void __rt_mutex_init(struct rt_mutex *lock, const char *name); extern void rt_mutex_destroy(struct rt_mutex *lock); extern void rt_mutex_lock(struct rt_mutex *lock); +extern int rt_mutex_lock_state(struct rt_mutex *lock, int state); extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); +extern int rt_mutex_lock_killable(struct rt_mutex *lock); extern int rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout); diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h new file mode 100644 index 000000000000..49ed2d45d3be --- /dev/null +++ b/include/linux/rwlock_rt.h @@ -0,0 +1,99 @@ +#ifndef __LINUX_RWLOCK_RT_H +#define __LINUX_RWLOCK_RT_H + +#ifndef __LINUX_SPINLOCK_H +#error Do not include directly. Use spinlock.h +#endif + +#define rwlock_init(rwl) \ +do { \ + static struct lock_class_key __key; \ + \ + rt_mutex_init(&(rwl)->lock); \ + __rt_rwlock_init(rwl, #rwl, &__key); \ +} while (0) + +extern void __lockfunc rt_write_lock(rwlock_t *rwlock); +extern void __lockfunc rt_read_lock(rwlock_t *rwlock); +extern int __lockfunc rt_write_trylock(rwlock_t *rwlock); +extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags); +extern int __lockfunc rt_read_trylock(rwlock_t *rwlock); +extern void __lockfunc rt_write_unlock(rwlock_t *rwlock); +extern void __lockfunc rt_read_unlock(rwlock_t *rwlock); +extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock); +extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock); +extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key); + +#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock)) +#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock)) + +#define write_trylock_irqsave(lock, flags) \ + __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags)) + +#define read_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = rt_read_lock_irqsave(lock); \ + } while (0) + +#define write_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = rt_write_lock_irqsave(lock); \ + } while (0) + +#define read_lock(lock) rt_read_lock(lock) + +#define read_lock_bh(lock) \ + do { \ + local_bh_disable(); \ + rt_read_lock(lock); \ + } while (0) + +#define read_lock_irq(lock) read_lock(lock) + +#define write_lock(lock) rt_write_lock(lock) + +#define write_lock_bh(lock) \ + do { \ + local_bh_disable(); \ + rt_write_lock(lock); \ + } while (0) + +#define write_lock_irq(lock) write_lock(lock) + +#define read_unlock(lock) rt_read_unlock(lock) + +#define read_unlock_bh(lock) \ + do { \ + rt_read_unlock(lock); \ + local_bh_enable(); \ + } while (0) + +#define read_unlock_irq(lock) read_unlock(lock) + +#define write_unlock(lock) rt_write_unlock(lock) + +#define write_unlock_bh(lock) \ + do { \ + rt_write_unlock(lock); \ + local_bh_enable(); \ + } while (0) + +#define write_unlock_irq(lock) write_unlock(lock) + +#define read_unlock_irqrestore(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + (void) flags; \ + rt_read_unlock(lock); \ + } while (0) + +#define write_unlock_irqrestore(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + (void) flags; \ + rt_write_unlock(lock); \ + } while (0) + +#endif diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h index cc0072e93e36..5317cd957292 100644 --- a/include/linux/rwlock_types.h +++ b/include/linux/rwlock_types.h @@ -1,6 +1,10 @@ #ifndef __LINUX_RWLOCK_TYPES_H #define __LINUX_RWLOCK_TYPES_H +#if !defined(__LINUX_SPINLOCK_TYPES_H) +# error "Do not include directly, include spinlock_types.h" +#endif + /* * include/linux/rwlock_types.h - generic rwlock type definitions * and initializers diff --git a/include/linux/rwlock_types_rt.h b/include/linux/rwlock_types_rt.h new file mode 100644 index 000000000000..51b28d775fe1 --- /dev/null +++ b/include/linux/rwlock_types_rt.h @@ -0,0 +1,33 @@ +#ifndef __LINUX_RWLOCK_TYPES_RT_H +#define __LINUX_RWLOCK_TYPES_RT_H + +#ifndef __LINUX_SPINLOCK_TYPES_H +#error "Do not include directly. Include spinlock_types.h instead" +#endif + +/* + * rwlocks - rtmutex which allows single reader recursion + */ +typedef struct { + struct rt_mutex lock; + int read_depth; + unsigned int break_lock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +} rwlock_t; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } +#else +# define RW_DEP_MAP_INIT(lockname) +#endif + +#define __RW_LOCK_UNLOCKED(name) \ + { .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \ + RW_DEP_MAP_INIT(name) } + +#define DEFINE_RWLOCK(name) \ + rwlock_t name = __RW_LOCK_UNLOCKED(name) + +#endif diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index dd1d14250340..aa2ac1f65c2d 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -19,6 +19,10 @@ #include <linux/osq_lock.h> #endif +#ifdef CONFIG_PREEMPT_RT_FULL +#include <linux/rwsem_rt.h> +#else /* PREEMPT_RT_FULL */ + struct rw_semaphore; #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK @@ -106,6 +110,13 @@ static inline int rwsem_is_contended(struct rw_semaphore *sem) return !list_empty(&sem->wait_list); } +#endif /* !PREEMPT_RT_FULL */ + +/* + * The functions below are the same for all rwsem implementations including + * the RT specific variant. + */ + /* * lock for reading */ diff --git a/include/linux/rwsem_rt.h b/include/linux/rwsem_rt.h new file mode 100644 index 000000000000..2ffbf093ae92 --- /dev/null +++ b/include/linux/rwsem_rt.h @@ -0,0 +1,67 @@ +#ifndef _LINUX_RWSEM_RT_H +#define _LINUX_RWSEM_RT_H + +#ifndef _LINUX_RWSEM_H +#error "Include rwsem.h" +#endif + +#include <linux/rtmutex.h> +#include <linux/swait.h> + +#define READER_BIAS (1U << 31) +#define WRITER_BIAS (1U << 30) + +struct rw_semaphore { + atomic_t readers; + struct rt_mutex rtmutex; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#define __RWSEM_INITIALIZER(name) \ +{ \ + .readers = ATOMIC_INIT(READER_BIAS), \ + .rtmutex = __RT_MUTEX_INITIALIZER(name.rtmutex), \ + RW_DEP_MAP_INIT(name) \ +} + +#define DECLARE_RWSEM(lockname) \ + struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) + +extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name, + struct lock_class_key *key); + +#define __init_rwsem(sem, name, key) \ +do { \ + rt_mutex_init(&(sem)->rtmutex); \ + __rwsem_init((sem), (name), (key)); \ +} while (0) + +#define init_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_rwsem((sem), #sem, &__key); \ +} while (0) + +static inline int rwsem_is_locked(struct rw_semaphore *sem) +{ + return atomic_read(&sem->readers) != READER_BIAS; +} + +static inline int rwsem_is_contended(struct rw_semaphore *sem) +{ + return atomic_read(&sem->readers) > 0; +} + +extern void __down_read(struct rw_semaphore *sem); +extern int __down_read_trylock(struct rw_semaphore *sem); +extern void __down_write(struct rw_semaphore *sem); +extern int __must_check __down_write_killable(struct rw_semaphore *sem); +extern int __down_write_trylock(struct rw_semaphore *sem); +extern void __up_read(struct rw_semaphore *sem); +extern void __up_write(struct rw_semaphore *sem); +extern void __downgrade_write(struct rw_semaphore *sem); + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 275511b60978..629e562c5fe8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -26,6 +26,7 @@ struct sched_param { #include <linux/nodemask.h> #include <linux/mm_types.h> #include <linux/preempt.h> +#include <asm/kmap_types.h> #include <asm/page.h> #include <asm/ptrace.h> @@ -236,17 +237,13 @@ extern char ___assert_task_state[1 - 2*!!( /* Convenience macros for the sake of wake_up */ #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) -#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) /* get_task_state() */ #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) -#define task_is_stopped_or_traced(task) \ - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) #define task_contributes_to_load(task) \ ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ (task->flags & PF_FROZEN) == 0 && \ @@ -312,6 +309,11 @@ extern char ___assert_task_state[1 - 2*!!( #endif +#define __set_current_state_no_track(state_value) \ + do { current->state = (state_value); } while (0) +#define set_current_state_no_track(state_value) \ + set_mb(current->state, (state_value)) + /* Task command name length */ #define TASK_COMM_LEN 16 @@ -1023,9 +1025,31 @@ struct wake_q_head { #define WAKE_Q(name) \ struct wake_q_head name = { WAKE_Q_TAIL, &name.first } -extern void wake_q_add(struct wake_q_head *head, - struct task_struct *task); -extern void wake_up_q(struct wake_q_head *head); +extern void __wake_q_add(struct wake_q_head *head, + struct task_struct *task, bool sleeper); +static inline void wake_q_add(struct wake_q_head *head, + struct task_struct *task) +{ + __wake_q_add(head, task, false); +} + +static inline void wake_q_add_sleeper(struct wake_q_head *head, + struct task_struct *task) +{ + __wake_q_add(head, task, true); +} + +extern void __wake_up_q(struct wake_q_head *head, bool sleeper); + +static inline void wake_up_q(struct wake_q_head *head) +{ + __wake_up_q(head, false); +} + +static inline void wake_up_q_sleeper(struct wake_q_head *head) +{ + __wake_up_q(head, true); +} /* * sched-domains (multiprocessor balancing) declarations: @@ -1493,6 +1517,7 @@ struct task_struct { struct thread_info thread_info; #endif volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ + volatile long saved_state; /* saved state for "spinlock sleepers" */ void *stack; atomic_t usage; unsigned int flags; /* per process flags, defined below */ @@ -1532,6 +1557,13 @@ struct task_struct { #endif unsigned int policy; +#ifdef CONFIG_PREEMPT_RT_FULL + int migrate_disable; + int migrate_disable_update; +# ifdef CONFIG_SCHED_DEBUG + int migrate_disable_atomic; +# endif +#endif int nr_cpus_allowed; cpumask_t cpus_allowed; @@ -1670,6 +1702,9 @@ struct task_struct { struct task_cputime cputime_expires; struct list_head cpu_timers[3]; +#ifdef CONFIG_PREEMPT_RT_BASE + struct task_struct *posix_timer_list; +#endif /* process credentials */ const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ @@ -1701,10 +1736,15 @@ struct task_struct { /* signal handlers */ struct signal_struct *signal; struct sighand_struct *sighand; + struct sigqueue *sigqueue_cache; sigset_t blocked, real_blocked; sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ struct sigpending pending; +#ifdef CONFIG_PREEMPT_RT_FULL + /* TODO: move me into ->restart_block ? */ + struct siginfo forced_info; +#endif unsigned long sas_ss_sp; size_t sas_ss_size; @@ -1730,11 +1770,14 @@ struct task_struct { raw_spinlock_t pi_lock; struct wake_q_node wake_q; + struct wake_q_node wake_q_sleeper; #ifdef CONFIG_RT_MUTEXES /* PI waiters blocked on a rt_mutex held by this task */ struct rb_root pi_waiters; struct rb_node *pi_waiters_leftmost; + /* Updated under owner's pi_lock and rq lock */ + struct task_struct *pi_top_task; /* Deadlock detection and priority inheritance handling */ struct rt_mutex_waiter *pi_blocked_on; #endif @@ -1933,6 +1976,12 @@ struct task_struct { /* bitmask and counter of trace recursion */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ +#ifdef CONFIG_WAKEUP_LATENCY_HIST + u64 preempt_timestamp_hist; +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST + long timer_offset; +#endif +#endif #ifdef CONFIG_KCOV /* Coverage collection mode enabled for this task (0 if disabled). */ enum kcov_mode kcov_mode; @@ -1958,9 +2007,23 @@ struct task_struct { unsigned int sequential_io; unsigned int sequential_io_avg; #endif +#ifdef CONFIG_PREEMPT_RT_BASE + struct rcu_head put_rcu; + int softirq_nestcnt; + unsigned int softirqs_raised; +#endif +#ifdef CONFIG_PREEMPT_RT_FULL +# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32 + int kmap_idx; + pte_t kmap_pte[KM_TYPE_NR]; +# endif +#endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; #endif +#ifdef CONFIG_PREEMPT_RT_FULL + int xmit_recursion; +#endif int pagefault_disabled; #ifdef CONFIG_MMU struct task_struct *oom_reaper_list; @@ -2000,14 +2063,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) } #endif -/* Future-safe accessor for struct task_struct's cpus_allowed. */ -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) - -static inline int tsk_nr_cpus_allowed(struct task_struct *p) -{ - return p->nr_cpus_allowed; -} - #define TNF_MIGRATED 0x01 #define TNF_NO_GROUP 0x02 #define TNF_SHARED 0x04 @@ -2227,6 +2282,15 @@ extern struct pid *cad_pid; extern void free_task(struct task_struct *tsk); #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) +#ifdef CONFIG_PREEMPT_RT_BASE +extern void __put_task_struct_cb(struct rcu_head *rhp); + +static inline void put_task_struct(struct task_struct *t) +{ + if (atomic_dec_and_test(&t->usage)) + call_rcu(&t->put_rcu, __put_task_struct_cb); +} +#else extern void __put_task_struct(struct task_struct *t); static inline void put_task_struct(struct task_struct *t) @@ -2234,6 +2298,7 @@ static inline void put_task_struct(struct task_struct *t) if (atomic_dec_and_test(&t->usage)) __put_task_struct(t); } +#endif struct task_struct *task_rcu_dereference(struct task_struct **ptask); struct task_struct *try_get_task_struct(struct task_struct **ptask); @@ -2275,6 +2340,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, /* * Per process flags */ +#define PF_IN_SOFTIRQ 0x00000001 /* Task is serving softirq */ #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ @@ -2461,6 +2527,10 @@ extern void do_set_cpus_allowed(struct task_struct *p, extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); +int migrate_me(void); +void tell_sched_cpu_down_begin(int cpu); +void tell_sched_cpu_down_done(int cpu); + #else static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) @@ -2473,6 +2543,9 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, return -EINVAL; return 0; } +static inline int migrate_me(void) { return 0; } +static inline void tell_sched_cpu_down_begin(int cpu) { } +static inline void tell_sched_cpu_down_done(int cpu) { } #endif #ifdef CONFIG_NO_HZ_COMMON @@ -2711,6 +2784,7 @@ extern void xtime_update(unsigned long ticks); extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); +extern int wake_up_lock_sleeper(struct task_struct * tsk); extern void wake_up_new_task(struct task_struct *tsk); #ifdef CONFIG_SMP extern void kick_process(struct task_struct *tsk); @@ -2919,6 +2993,17 @@ static inline void mmdrop(struct mm_struct *mm) __mmdrop(mm); } +#ifdef CONFIG_PREEMPT_RT_BASE +extern void __mmdrop_delayed(struct rcu_head *rhp); +static inline void mmdrop_delayed(struct mm_struct *mm) +{ + if (atomic_dec_and_test(&mm->mm_count)) + call_rcu(&mm->delayed_drop, __mmdrop_delayed); +} +#else +# define mmdrop_delayed(mm) mmdrop(mm) +#endif + static inline void mmdrop_async_fn(struct work_struct *work) { struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); @@ -3315,6 +3400,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } +#ifdef CONFIG_PREEMPT_LAZY +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk) +{ + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); +} + +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) +{ + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); +} + +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk) +{ + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY)); +} + +static inline int need_resched_lazy(void) +{ + return test_thread_flag(TIF_NEED_RESCHED_LAZY); +} + +static inline int need_resched_now(void) +{ + return test_thread_flag(TIF_NEED_RESCHED); +} + +#else +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { } +static inline int need_resched_lazy(void) { return 0; } + +static inline int need_resched_now(void) +{ + return test_thread_flag(TIF_NEED_RESCHED); +} + +#endif + static inline int restart_syscall(void) { set_tsk_thread_flag(current, TIF_SIGPENDING); @@ -3346,6 +3468,51 @@ static inline int signal_pending_state(long state, struct task_struct *p) return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); } +static inline bool __task_is_stopped_or_traced(struct task_struct *task) +{ + if (task->state & (__TASK_STOPPED | __TASK_TRACED)) + return true; +#ifdef CONFIG_PREEMPT_RT_FULL + if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED)) + return true; +#endif + return false; +} + +static inline bool task_is_stopped_or_traced(struct task_struct *task) +{ + bool traced_stopped; + +#ifdef CONFIG_PREEMPT_RT_FULL + unsigned long flags; + + raw_spin_lock_irqsave(&task->pi_lock, flags); + traced_stopped = __task_is_stopped_or_traced(task); + raw_spin_unlock_irqrestore(&task->pi_lock, flags); +#else + traced_stopped = __task_is_stopped_or_traced(task); +#endif + return traced_stopped; +} + +static inline bool task_is_traced(struct task_struct *task) +{ + bool traced = false; + + if (task->state & __TASK_TRACED) + return true; +#ifdef CONFIG_PREEMPT_RT_FULL + /* in case the task is sleeping on tasklist_lock */ + raw_spin_lock_irq(&task->pi_lock); + if (task->state & __TASK_TRACED) + traced = true; + else if (task->saved_state & __TASK_TRACED) + traced = true; + raw_spin_unlock_irq(&task->pi_lock); +#endif + return traced; +} + /* * cond_resched() and cond_resched_lock(): latency reduction via * explicit rescheduling in places that are safe. The return @@ -3371,12 +3538,16 @@ extern int __cond_resched_lock(spinlock_t *lock); __cond_resched_lock(lock); \ }) +#ifndef CONFIG_PREEMPT_RT_FULL extern int __cond_resched_softirq(void); #define cond_resched_softirq() ({ \ ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ __cond_resched_softirq(); \ }) +#else +# define cond_resched_softirq() cond_resched() +#endif static inline void cond_resched_rcu(void) { @@ -3551,6 +3722,31 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) #endif /* CONFIG_SMP */ +static inline int __migrate_disabled(struct task_struct *p) +{ +#ifdef CONFIG_PREEMPT_RT_FULL + return p->migrate_disable; +#else + return 0; +#endif +} + +/* Future-safe accessor for struct task_struct's cpus_allowed. */ +static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p) +{ + if (__migrate_disabled(p)) + return cpumask_of(task_cpu(p)); + + return &p->cpus_allowed; +} + +static inline int tsk_nr_cpus_allowed(struct task_struct *p) +{ + if (__migrate_disabled(p)) + return 1; + return p->nr_cpus_allowed; +} + extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index a30b172df6e1..db3e91f2bc03 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -16,27 +16,20 @@ static inline int rt_task(struct task_struct *p) } #ifdef CONFIG_RT_MUTEXES -extern int rt_mutex_getprio(struct task_struct *p); -extern void rt_mutex_setprio(struct task_struct *p, int prio); -extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio); -extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task); +/* + * Must hold either p->pi_lock or task_rq(p)->lock. + */ +static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p) +{ + return p->pi_top_task; +} +extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); extern void rt_mutex_adjust_pi(struct task_struct *p); static inline bool tsk_is_pi_blocked(struct task_struct *tsk) { return tsk->pi_blocked_on != NULL; } #else -static inline int rt_mutex_getprio(struct task_struct *p) -{ - return p->normal_prio; -} - -static inline int rt_mutex_get_effective_prio(struct task_struct *task, - int newprio) -{ - return newprio; -} - static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) { return NULL; diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index ead97654c4e9..de04d6d0face 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -220,20 +220,30 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) return __read_seqcount_retry(s, start); } - - -static inline void raw_write_seqcount_begin(seqcount_t *s) +static inline void __raw_write_seqcount_begin(seqcount_t *s) { s->sequence++; smp_wmb(); } -static inline void raw_write_seqcount_end(seqcount_t *s) +static inline void raw_write_seqcount_begin(seqcount_t *s) +{ + preempt_disable_rt(); + __raw_write_seqcount_begin(s); +} + +static inline void __raw_write_seqcount_end(seqcount_t *s) { smp_wmb(); s->sequence++; } +static inline void raw_write_seqcount_end(seqcount_t *s) +{ + __raw_write_seqcount_end(s); + preempt_enable_rt(); +} + /** * raw_write_seqcount_barrier - do a seq write barrier * @s: pointer to seqcount_t @@ -428,10 +438,33 @@ typedef struct { /* * Read side functions for starting and finalizing a read side section. */ +#ifndef CONFIG_PREEMPT_RT_FULL static inline unsigned read_seqbegin(const seqlock_t *sl) { return read_seqcount_begin(&sl->seqcount); } +#else +/* + * Starvation safe read side for RT + */ +static inline unsigned read_seqbegin(seqlock_t *sl) +{ + unsigned ret; + +repeat: + ret = ACCESS_ONCE(sl->seqcount.sequence); + if (unlikely(ret & 1)) { + /* + * Take the lock and let the writer proceed (i.e. evtl + * boost it), otherwise we could loop here forever. + */ + spin_unlock_wait(&sl->lock); + goto repeat; + } + smp_rmb(); + return ret; +} +#endif static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) { @@ -446,36 +479,45 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) static inline void write_seqlock(seqlock_t *sl) { spin_lock(&sl->lock); - write_seqcount_begin(&sl->seqcount); + __raw_write_seqcount_begin(&sl->seqcount); +} + +static inline int try_write_seqlock(seqlock_t *sl) +{ + if (spin_trylock(&sl->lock)) { + __raw_write_seqcount_begin(&sl->seqcount); + return 1; + } + return 0; } static inline void write_sequnlock(seqlock_t *sl) { - write_seqcount_end(&sl->seqcount); + __raw_write_seqcount_end(&sl->seqcount); spin_unlock(&sl->lock); } static inline void write_seqlock_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); - write_seqcount_begin(&sl->seqcount); + __raw_write_seqcount_begin(&sl->seqcount); } static inline void write_sequnlock_bh(seqlock_t *sl) { - write_seqcount_end(&sl->seqcount); + __raw_write_seqcount_end(&sl->seqcount); spin_unlock_bh(&sl->lock); } static inline void write_seqlock_irq(seqlock_t *sl) { spin_lock_irq(&sl->lock); - write_seqcount_begin(&sl->seqcount); + __raw_write_seqcount_begin(&sl->seqcount); } static inline void write_sequnlock_irq(seqlock_t *sl) { - write_seqcount_end(&sl->seqcount); + __raw_write_seqcount_end(&sl->seqcount); spin_unlock_irq(&sl->lock); } @@ -484,7 +526,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) unsigned long flags; spin_lock_irqsave(&sl->lock, flags); - write_seqcount_begin(&sl->seqcount); + __raw_write_seqcount_begin(&sl->seqcount); return flags; } @@ -494,7 +536,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) static inline void write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) { - write_seqcount_end(&sl->seqcount); + __raw_write_seqcount_end(&sl->seqcount); spin_unlock_irqrestore(&sl->lock, flags); } diff --git a/include/linux/signal.h b/include/linux/signal.h index ffa58ff53e22..0d7faf67a464 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -250,6 +250,7 @@ static inline void init_sigpending(struct sigpending *sig) } extern void flush_sigqueue(struct sigpending *queue); +extern void flush_task_sigqueue(struct task_struct *tsk); /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ static inline int valid_signal(unsigned long sig) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e37112ac332f..b9a54b02f63d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -284,6 +284,7 @@ struct sk_buff_head { __u32 qlen; spinlock_t lock; + raw_spinlock_t raw_lock; }; struct sk_buff; @@ -1579,6 +1580,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list) __skb_queue_head_init(list); } +static inline void skb_queue_head_init_raw(struct sk_buff_head *list) +{ + raw_spin_lock_init(&list->raw_lock); + __skb_queue_head_init(list); +} + static inline void skb_queue_head_init_class(struct sk_buff_head *list, struct lock_class_key *class) { diff --git a/include/linux/smp.h b/include/linux/smp.h index 8e0cb7a0f836..891c533724f5 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -120,6 +120,13 @@ extern unsigned int setup_max_cpus; extern void __init setup_nr_cpu_ids(void); extern void __init smp_init(void); +extern int __boot_cpu_id; + +static inline int get_boot_cpu_id(void) +{ + return __boot_cpu_id; +} + #else /* !SMP */ static inline void smp_send_stop(void) { } @@ -158,6 +165,11 @@ static inline void smp_init(void) { up_late_init(); } static inline void smp_init(void) { } #endif +static inline int get_boot_cpu_id(void) +{ + return 0; +} + #endif /* !SMP */ /* @@ -185,6 +197,9 @@ static inline void smp_init(void) { } #define get_cpu() ({ preempt_disable(); smp_processor_id(); }) #define put_cpu() preempt_enable() +#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); }) +#define put_cpu_light() migrate_enable() + /* * Callback to arch code if there's nosmp or maxcpus=0 on the * boot command line: diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 47dd0cebd204..b241cc044bd3 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -271,7 +271,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock)) /* Include rwlock functions */ -#include <linux/rwlock.h> +#ifdef CONFIG_PREEMPT_RT_FULL +# include <linux/rwlock_rt.h> +#else +# include <linux/rwlock.h> +#endif /* * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: @@ -282,6 +286,10 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) # include <linux/spinlock_api_up.h> #endif +#ifdef CONFIG_PREEMPT_RT_FULL +# include <linux/spinlock_rt.h> +#else /* PREEMPT_RT_FULL */ + /* * Map the spin_lock functions to the raw variants for PREEMPT_RT=n */ @@ -416,4 +424,6 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); #define atomic_dec_and_lock(atomic, lock) \ __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) +#endif /* !PREEMPT_RT_FULL */ + #endif /* __LINUX_SPINLOCK_H */ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 5344268e6e62..043263f30e81 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -189,6 +189,8 @@ static inline int __raw_spin_trylock_bh(raw_spinlock_t *lock) return 0; } -#include <linux/rwlock_api_smp.h> +#ifndef CONFIG_PREEMPT_RT_FULL +# include <linux/rwlock_api_smp.h> +#endif #endif /* __LINUX_SPINLOCK_API_SMP_H */ diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h new file mode 100644 index 000000000000..43ca841b913a --- /dev/null +++ b/include/linux/spinlock_rt.h @@ -0,0 +1,162 @@ +#ifndef __LINUX_SPINLOCK_RT_H +#define __LINUX_SPINLOCK_RT_H + +#ifndef __LINUX_SPINLOCK_H +#error Do not include directly. Use spinlock.h +#endif + +#include <linux/bug.h> + +extern void +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key); + +#define spin_lock_init(slock) \ +do { \ + static struct lock_class_key __key; \ + \ + rt_mutex_init(&(slock)->lock); \ + __rt_spin_lock_init(slock, #slock, &__key); \ +} while (0) + +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock); +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock); +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock); + +extern void __lockfunc rt_spin_lock(spinlock_t *lock); +extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock); +extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass); +extern void __lockfunc rt_spin_unlock(spinlock_t *lock); +extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock); +extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags); +extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock); +extern int __lockfunc rt_spin_trylock(spinlock_t *lock); +extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock); + +/* + * lockdep-less calls, for derived types like rwlock: + * (for trylock they can use rt_mutex_trylock() directly. + */ +extern void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock); +extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock); +extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock); + +#define spin_lock(lock) rt_spin_lock(lock) + +#define spin_lock_bh(lock) \ + do { \ + local_bh_disable(); \ + rt_spin_lock(lock); \ + } while (0) + +#define spin_lock_irq(lock) spin_lock(lock) + +#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock)) + +#define spin_trylock(lock) \ +({ \ + int __locked; \ + __locked = spin_do_trylock(lock); \ + __locked; \ +}) + +#ifdef CONFIG_LOCKDEP +# define spin_lock_nested(lock, subclass) \ + do { \ + rt_spin_lock_nested(lock, subclass); \ + } while (0) + +#define spin_lock_bh_nested(lock, subclass) \ + do { \ + local_bh_disable(); \ + rt_spin_lock_nested(lock, subclass); \ + } while (0) + +# define spin_lock_irqsave_nested(lock, flags, subclass) \ + do { \ + typecheck(unsigned long, flags); \ + flags = 0; \ + rt_spin_lock_nested(lock, subclass); \ + } while (0) +#else +# define spin_lock_nested(lock, subclass) spin_lock(lock) +# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(lock) + +# define spin_lock_irqsave_nested(lock, flags, subclass) \ + do { \ + typecheck(unsigned long, flags); \ + flags = 0; \ + spin_lock(lock); \ + } while (0) +#endif + +#define spin_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = 0; \ + spin_lock(lock); \ + } while (0) + +static inline unsigned long spin_lock_trace_flags(spinlock_t *lock) +{ + unsigned long flags = 0; +#ifdef CONFIG_TRACE_IRQFLAGS + flags = rt_spin_lock_trace_flags(lock); +#else + spin_lock(lock); /* lock_local */ +#endif + return flags; +} + +/* FIXME: we need rt_spin_lock_nest_lock */ +#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0) + +#define spin_unlock(lock) rt_spin_unlock(lock) + +#define spin_unlock_bh(lock) \ + do { \ + rt_spin_unlock(lock); \ + local_bh_enable(); \ + } while (0) + +#define spin_unlock_irq(lock) spin_unlock(lock) + +#define spin_unlock_irqrestore(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + (void) flags; \ + spin_unlock(lock); \ + } while (0) + +#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock)) +#define spin_trylock_irq(lock) spin_trylock(lock) + +#define spin_trylock_irqsave(lock, flags) \ + rt_spin_trylock_irqsave(lock, &(flags)) + +#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock) + +#ifdef CONFIG_GENERIC_LOCKBREAK +# define spin_is_contended(lock) ((lock)->break_lock) +#else +# define spin_is_contended(lock) (((void)(lock), 0)) +#endif + +static inline int spin_can_lock(spinlock_t *lock) +{ + return !rt_mutex_is_locked(&lock->lock); +} + +static inline int spin_is_locked(spinlock_t *lock) +{ + return rt_mutex_is_locked(&lock->lock); +} + +static inline void assert_spin_locked(spinlock_t *lock) +{ + BUG_ON(!spin_is_locked(lock)); +} + +#define atomic_dec_and_lock(atomic, lock) \ + atomic_dec_and_spin_lock(atomic, lock) + +#endif diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index 73548eb13a5d..10bac715ea96 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -9,80 +9,15 @@ * Released under the General Public License (GPL). */ -#if defined(CONFIG_SMP) -# include <asm/spinlock_types.h> -#else -# include <linux/spinlock_types_up.h> -#endif - -#include <linux/lockdep.h> - -typedef struct raw_spinlock { - arch_spinlock_t raw_lock; -#ifdef CONFIG_GENERIC_LOCKBREAK - unsigned int break_lock; -#endif -#ifdef CONFIG_DEBUG_SPINLOCK - unsigned int magic, owner_cpu; - void *owner; -#endif -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -} raw_spinlock_t; - -#define SPINLOCK_MAGIC 0xdead4ead - -#define SPINLOCK_OWNER_INIT ((void *)-1L) - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } -#else -# define SPIN_DEP_MAP_INIT(lockname) -#endif +#include <linux/spinlock_types_raw.h> -#ifdef CONFIG_DEBUG_SPINLOCK -# define SPIN_DEBUG_INIT(lockname) \ - .magic = SPINLOCK_MAGIC, \ - .owner_cpu = -1, \ - .owner = SPINLOCK_OWNER_INIT, +#ifndef CONFIG_PREEMPT_RT_FULL +# include <linux/spinlock_types_nort.h> +# include <linux/rwlock_types.h> #else -# define SPIN_DEBUG_INIT(lockname) +# include <linux/rtmutex.h> +# include <linux/spinlock_types_rt.h> +# include <linux/rwlock_types_rt.h> #endif -#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ - { \ - .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ - SPIN_DEBUG_INIT(lockname) \ - SPIN_DEP_MAP_INIT(lockname) } - -#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ - (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) - -#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) - -typedef struct spinlock { - union { - struct raw_spinlock rlock; - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) - struct { - u8 __padding[LOCK_PADSIZE]; - struct lockdep_map dep_map; - }; -#endif - }; -} spinlock_t; - -#define __SPIN_LOCK_INITIALIZER(lockname) \ - { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } } - -#define __SPIN_LOCK_UNLOCKED(lockname) \ - (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) - -#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) - -#include <linux/rwlock_types.h> - #endif /* __LINUX_SPINLOCK_TYPES_H */ diff --git a/include/linux/spinlock_types_nort.h b/include/linux/spinlock_types_nort.h new file mode 100644 index 000000000000..f1dac1fb1d6a --- /dev/null +++ b/include/linux/spinlock_types_nort.h @@ -0,0 +1,33 @@ +#ifndef __LINUX_SPINLOCK_TYPES_NORT_H +#define __LINUX_SPINLOCK_TYPES_NORT_H + +#ifndef __LINUX_SPINLOCK_TYPES_H +#error "Do not include directly. Include spinlock_types.h instead" +#endif + +/* + * The non RT version maps spinlocks to raw_spinlocks + */ +typedef struct spinlock { + union { + struct raw_spinlock rlock; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) + struct { + u8 __padding[LOCK_PADSIZE]; + struct lockdep_map dep_map; + }; +#endif + }; +} spinlock_t; + +#define __SPIN_LOCK_INITIALIZER(lockname) \ + { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } } + +#define __SPIN_LOCK_UNLOCKED(lockname) \ + (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) + +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) + +#endif diff --git a/include/linux/spinlock_types_raw.h b/include/linux/spinlock_types_raw.h new file mode 100644 index 000000000000..03235b475b77 --- /dev/null +++ b/include/linux/spinlock_types_raw.h @@ -0,0 +1,58 @@ +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H +#define __LINUX_SPINLOCK_TYPES_RAW_H + +#include <linux/types.h> + +#if defined(CONFIG_SMP) +# include <asm/spinlock_types.h> +#else +# include <linux/spinlock_types_up.h> +#endif + +#include <linux/lockdep.h> + +typedef struct raw_spinlock { + arch_spinlock_t raw_lock; +#ifdef CONFIG_GENERIC_LOCKBREAK + unsigned int break_lock; +#endif +#ifdef CONFIG_DEBUG_SPINLOCK + unsigned int magic, owner_cpu; + void *owner; +#endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +} raw_spinlock_t; + +#define SPINLOCK_MAGIC 0xdead4ead + +#define SPINLOCK_OWNER_INIT ((void *)-1L) + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } +#else +# define SPIN_DEP_MAP_INIT(lockname) +#endif + +#ifdef CONFIG_DEBUG_SPINLOCK +# define SPIN_DEBUG_INIT(lockname) \ + .magic = SPINLOCK_MAGIC, \ + .owner_cpu = -1, \ + .owner = SPINLOCK_OWNER_INIT, +#else +# define SPIN_DEBUG_INIT(lockname) +#endif + +#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ + { \ + .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ + SPIN_DEBUG_INIT(lockname) \ + SPIN_DEP_MAP_INIT(lockname) } + +#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ + (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) + +#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) + +#endif diff --git a/include/linux/spinlock_types_rt.h b/include/linux/spinlock_types_rt.h new file mode 100644 index 000000000000..3e3d8c5f7a9a --- /dev/null +++ b/include/linux/spinlock_types_rt.h @@ -0,0 +1,48 @@ +#ifndef __LINUX_SPINLOCK_TYPES_RT_H +#define __LINUX_SPINLOCK_TYPES_RT_H + +#ifndef __LINUX_SPINLOCK_TYPES_H +#error "Do not include directly. Include spinlock_types.h instead" +#endif + +#include <linux/cache.h> + +/* + * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field: + */ +typedef struct spinlock { + struct rt_mutex lock; + unsigned int break_lock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +} spinlock_t; + +#ifdef CONFIG_DEBUG_RT_MUTEXES +# define __RT_SPIN_INITIALIZER(name) \ + { \ + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ + .save_state = 1, \ + .file = __FILE__, \ + .line = __LINE__ , \ + } +#else +# define __RT_SPIN_INITIALIZER(name) \ + { \ + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ + .save_state = 1, \ + } +#endif + +/* +.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock) +*/ + +#define __SPIN_LOCK_UNLOCKED(name) \ + { .lock = __RT_SPIN_INITIALIZER(name.lock), \ + SPIN_DEP_MAP_INIT(name) } + +#define DEFINE_SPINLOCK(name) \ + spinlock_t name = __SPIN_LOCK_UNLOCKED(name) + +#endif diff --git a/include/linux/srcu.h b/include/linux/srcu.h index dc8eb63c6568..e793d3a257da 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -84,10 +84,10 @@ int init_srcu_struct(struct srcu_struct *sp); void process_srcu(struct work_struct *work); -#define __SRCU_STRUCT_INIT(name) \ +#define __SRCU_STRUCT_INIT(name, pcpu_name) \ { \ .completed = -300, \ - .per_cpu_ref = &name##_srcu_array, \ + .per_cpu_ref = &pcpu_name, \ .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ .running = false, \ .batch_queue = RCU_BATCH_INIT(name.batch_queue), \ @@ -119,7 +119,7 @@ void process_srcu(struct work_struct *work); */ #define __DEFINE_SRCU(name, is_static) \ static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_array) #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 249dafce2788..3b53173d0ce6 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -193,6 +193,12 @@ struct platform_freeze_ops { void (*end)(void); }; +#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION) +extern bool pm_in_action; +#else +# define pm_in_action false +#endif + #ifdef CONFIG_SUSPEND /** * suspend_set_ops - set platform dependent suspend operations diff --git a/include/linux/swait.h b/include/linux/swait.h index c1f9c62a8a50..83f004a72320 100644 --- a/include/linux/swait.h +++ b/include/linux/swait.h @@ -87,6 +87,7 @@ static inline int swait_active(struct swait_queue_head *q) extern void swake_up(struct swait_queue_head *q); extern void swake_up_all(struct swait_queue_head *q); extern void swake_up_locked(struct swait_queue_head *q); +extern void swake_up_all_locked(struct swait_queue_head *q); extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state); diff --git a/include/linux/swap.h b/include/linux/swap.h index d13617c7bcc4..0986230f3c7d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -11,6 +11,7 @@ #include <linux/fs.h> #include <linux/atomic.h> #include <linux/page-flags.h> +#include <linux/locallock.h> #include <asm/page.h> struct notifier_block; @@ -247,7 +248,8 @@ struct swap_info_struct { void *workingset_eviction(struct address_space *mapping, struct page *page); bool workingset_refault(void *shadow); void workingset_activation(struct page *page); -extern struct list_lru workingset_shadow_nodes; +extern struct list_lru __workingset_shadow_nodes; +DECLARE_LOCAL_IRQ_LOCK(workingset_shadow_lock); static inline unsigned int workingset_node_pages(struct radix_tree_node *node) { @@ -292,6 +294,7 @@ extern unsigned long nr_free_pagecache_pages(void); /* linux/mm/swap.c */ +DECLARE_LOCAL_IRQ_LOCK(swapvec_lock); extern void lru_cache_add(struct page *); extern void lru_cache_add_anon(struct page *page); extern void lru_cache_add_file(struct page *page); diff --git a/include/linux/swork.h b/include/linux/swork.h new file mode 100644 index 000000000000..f175fa9a6016 --- /dev/null +++ b/include/linux/swork.h @@ -0,0 +1,24 @@ +#ifndef _LINUX_SWORK_H +#define _LINUX_SWORK_H + +#include <linux/list.h> + +struct swork_event { + struct list_head item; + unsigned long flags; + void (*func)(struct swork_event *); +}; + +static inline void INIT_SWORK(struct swork_event *event, + void (*func)(struct swork_event *)) +{ + event->flags = 0; + event->func = func; +} + +bool swork_queue(struct swork_event *sev); + +int swork_get(void); +void swork_put(void); + +#endif /* _LINUX_SWORK_H */ diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 5e6436741f96..4f857aa29174 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -102,7 +102,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) #define test_thread_flag(flag) \ test_ti_thread_flag(current_thread_info(), flag) -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) +#ifdef CONFIG_PREEMPT_LAZY +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \ + test_thread_flag(TIF_NEED_RESCHED_LAZY)) +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED)) +#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY)) + +#else +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) +#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED) +#define tif_need_resched_lazy() 0 +#endif #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES static inline int arch_within_stack_frames(const void * const stack, diff --git a/include/linux/timer.h b/include/linux/timer.h index ec86e4e55ea3..8e5b680d1275 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -241,7 +241,7 @@ extern void add_timer(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) extern int del_timer_sync(struct timer_list *timer); #else # define del_timer_sync(t) del_timer(t) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index ba57266d9e80..5c36934ec2bc 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -56,6 +56,9 @@ struct trace_entry { unsigned char flags; unsigned char preempt_count; int pid; + unsigned short migrate_disable; + unsigned short padding; + unsigned char preempt_lazy_count; }; #define TRACE_EVENT_TYPE_MAX \ diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 9442423979c1..e507ceae4537 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -27,6 +27,7 @@ static __always_inline void pagefault_disabled_dec(void) */ static inline void pagefault_disable(void) { + migrate_disable(); pagefault_disabled_inc(); /* * make sure to have issued the store before a pagefault @@ -43,6 +44,7 @@ static inline void pagefault_enable(void) */ barrier(); pagefault_disabled_dec(); + migrate_enable(); } /* diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 4a29c75b146e..0a294e950df8 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -27,6 +27,7 @@ #include <linux/errno.h> #include <linux/rbtree.h> #include <linux/types.h> +#include <linux/wait.h> struct vm_area_struct; struct mm_struct; diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 613771909b6e..e28c5a43229d 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -33,7 +33,9 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states); */ static inline void __count_vm_event(enum vm_event_item item) { + preempt_disable_rt(); raw_cpu_inc(vm_event_states.event[item]); + preempt_enable_rt(); } static inline void count_vm_event(enum vm_event_item item) @@ -43,7 +45,9 @@ static inline void count_vm_event(enum vm_event_item item) static inline void __count_vm_events(enum vm_event_item item, long delta) { + preempt_disable_rt(); raw_cpu_add(vm_event_states.event[item], delta); + preempt_enable_rt(); } static inline void count_vm_events(enum vm_event_item item, long delta) diff --git a/include/linux/wait.h b/include/linux/wait.h index 2408e8d5c05c..db50d6609195 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -8,6 +8,7 @@ #include <linux/spinlock.h> #include <asm/current.h> #include <uapi/linux/wait.h> +#include <linux/atomic.h> typedef struct __wait_queue wait_queue_t; typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key); |