From da77ceac3d20f27310a07a7c346a4ee6b40d6c28 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 17 Dec 2019 21:59:50 +0100 Subject: [ANNOUNCE] v5.4.3-rt1 Dear RT folks! I'm pleased to announce the v5.4.3-rt1 patch set. Changes since v5.2.21-rt15: - Rebase to v5.4 - CONFIG_PREEMPT_RT is part of v5.4. While rebasing I merged CONFIG_PREEMPT_RT_BASE and CONFIG_PREEMPT_RT_FULL into CONFIG_PREEMPT_RT. This switch depends on ARCH_SUPPORTS_RT (which is currently provided by arm, powerpc and x86) and EXPERT. Known issues - None You can get this release via the git tree at: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v5.4.3-rt1 The RT patch against v5.4.3 can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.4/older/patch-5.4.3-rt1.patch.xz The split quilt queue is available at: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.4/older/patches-5.4.3-rt1.tar.xz Sebastian Signed-off-by: Sebastian Andrzej Siewior --- ...ff-and-zone-lock-while-freeing-pages-from.patch | 14 +- patches/0001-cgroup-Remove-css_rstat_flush.patch | 116 ++ ...-READ_ONCE-to-access-timer-base-in-hrimer.patch | 32 - .../0001-jbd2-Simplify-journal_unmap_buffer.patch | 57 + ...001-mm-page_alloc-Split-drain_local_pages.patch | 56 + ...on-t-assume-that-the-callback-has-interru.patch | 35 + ...ff-and-zone-lock-while-freeing-pages-from.patch | 20 +- ...up-Consolidate-users-of-cgroup_rstat_lock.patch | 68 + ...-t-grab-the-expiry-lock-for-non-soft-hrti.patch | 30 - .../0002-jbd2-Remove-jbd_trylock_bh_state.patch | 30 + ...-Add-static-key-dependent-pagevec-locking.patch | 418 ++++++ .../0002-printk-rb-add-prb-locking-functions.patch | 2 +- ...0002-sched-swait-Add-swait_event_lock_irq.patch | 33 + ...ve-may_sleep-from-cgroup_rstat_flush_lock.patch | 55 + ...vent-using-hrtimer_grab_expiry_lock-on-mi.patch | 33 - ...ropping-of-jh-reference-out-of-un-re-fili.patch | 150 ++ ...m-SLxB-change-list_lock-to-raw_spinlock_t.patch | 112 +- ...03-mm-swap-Access-struct-pagevec-remotely.patch | 136 ++ ...3-workqueue-Use-swait-for-wq_manager_wait.patch | 53 + ...ire-cgroup_rstat_lock-with-enabled-interr.patch | 71 + ...nnecessary-branch-from-jbd2_journal_forge.patch | 27 + ...ay-giving-back-empty-slubs-to-IRQ-enabled.patch | 36 +- ...-Enable-use_pvec_lock-nohz_full-dependent.patch | 56 + ...4-workqueue-Convert-the-locks-to-raw-type.patch | 663 +++++++++ ...5-jbd2-Don-t-call-__bforget-unnecessarily.patch | 58 + patches/0006-jbd2-Make-state-lock-a-spinlock.patch | 675 +++++++++ ...ree-journal-head-outside-of-locked-region.patch | 88 ++ .../0008-printk-add-ring-buffer-and-kthread.patch | 8 +- ...0009-printk-remove-exclusive-console-hack.patch | 14 +- ...ntk-redirect-emit-store-to-new-ringbuffer.patch | 16 +- .../0011-printk_safe-remove-printk-safe-code.patch | 16 +- ...k-minimize-console-locking-implementation.patch | 12 +- patches/0013-printk-track-seq-per-console.patch | 10 +- ...tk-do-boot_delay_msec-inside-printk_delay.patch | 6 +- ...015-printk-print-history-for-new-consoles.patch | 4 +- .../0016-printk-implement-CON_PRINTBUFFER.patch | 12 +- ...017-printk-add-processor-number-to-output.patch | 18 +- .../0018-console-add-write_atomic-interface.patch | 2 +- .../0019-printk-introduce-emergency-messages.patch | 16 +- .../0020-serial-8250-implement-write_atomic.patch | 148 +- patches/0021-printk-implement-KERN_CONT.patch | 8 +- patches/0022-printk-implement-dev-kmsg.patch | 12 +- patches/0023-printk-implement-syslog.patch | 12 +- patches/0024-printk-implement-kmsg_dump.patch | 20 +- patches/0025-printk-remove-unused-code.patch | 24 +- patches/ARM-Allow-to-enable-RT.patch | 21 + ...irq-in-translation-section-permission-fau.patch | 6 +- patches/ARM64-Allow-to-enable-RT.patch | 21 + patches/BPF-Disable-on-PREEMPT_RT.patch | 35 + ...-hv-vmbus-include-header-for-get_irq_regs.patch | 33 - ...64-downgrade-preempt_disable-d-region-to-.patch | 6 +- ...NFSv4-replace-seqcount_t-with-a-seqlock_t.patch | 33 +- patches/POWERPC-Allow-to-enable-RT.patch | 21 + patches/Use-CONFIG_PREEMPTION.patch | 1523 ++++++++++++++++++++ patches/add_migrate_disable.patch | 648 +++++++-- ...r-use-a-locallock-instead-preempt_disable.patch | 2 +- patches/arch-arm64-Add-lazy-preempt-support.patch | 37 +- patches/arm-disable-NEON-in-kernel-mode.patch | 53 +- patches/arm-enable-highmem-for-rt.patch | 10 +- patches/arm-imx6-cpuidle-Use-raw_spinlock_t.patch | 42 - patches/arm-preempt-lazy-support.patch | 6 +- patches/arm-remove-printk_nmi_.patch | 2 +- ...ompute_layout-before-altenates-are-applie.patch | 78 +- ...d-use-preemp_disable-in-addition-to-local.patch | 128 +- ...pt-Check-preemption-level-before-looking-.patch | 24 - patches/arm64-preempt-Fixup-lazy-preempt.patch | 26 - patches/at91_dont_enable_disable_clock.patch | 12 +- ...n-t-disable-interrupts-in-trigger_softirq.patch | 35 + ...q-move-blk_queue_usage_counter_release-in.patch | 102 -- .../block-mq-don-t-complete-requests-via-IPI.patch | 4 +- patches/block-mq-drop-preempt-disable.patch | 6 +- patches/block-mq-use-cpu_light.patch | 29 - patches/block-use-cpu-chill.patch | 2 + ...clocksource-tclib-allow-higher-clockrates.patch | 24 +- patches/completion-use-simple-wait-queues.patch | 57 +- patches/cond-resched-lock-rt-tweak.patch | 2 +- patches/cpu-hotplug--Implement-CPU-pinning.patch | 108 -- patches/cpumask-disable-offstack-on-rt.patch | 8 +- ...t-Convert-callback_lock-to-raw_spinlock_t.patch | 58 +- ...educe-preempt-disabled-regions-more-algos.patch | 48 +- ...td-add-a-lock-instead-preempt_disable-loc.patch | 14 +- .../crypto-limit-more-FPU-enabled-sections.patch | 4 +- patches/debugobjects-rt.patch | 4 +- ...e-seqlock_t-instread-disabling-preemption.patch | 171 ++- ...ck-zram-Replace-bit-spinlocks-with-rtmute.patch | 4 +- .../drivers-tty-pl011-irq-disable-madness.patch | 4 +- ...m-Don-t-disable-preemption-in-zcomp_strea.patch | 6 +- ...n-t-disable-interrupts-for-intel_engine_b.patch | 75 +- ...n-t-disable-interrupts-independently-of-t.patch | 63 +- patches/drm-i915-Drop-the-IRQ-off-asserts.patch | 20 +- patches/drm-i915-disable-tracing-on-RT.patch | 2 +- ...ip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch | 2 +- ...ock_irq()_in_intel_pipe_update_startend().patch | 18 +- ...empt_disableenable_rt()_where_recommended.patch | 8 +- patches/efi-Allow-efi-runtime.patch | 6 +- patches/efi-Disable-runtime-services-on-RT.patch | 4 +- patches/fs-aio-simple-simple-work.patch | 66 - ...ake-BH_Uptodate_Lock-bit_spin_lock-a-regu.patch | 192 +++ ...bring-back-explicit-INIT_HLIST_BL_HEAD-in.patch | 6 +- ...isable-preemption-on-i_dir_seq-s-write-si.patch | 43 +- .../fs-dcache-use-cpu-chill-in-trylock-loops.patch | 58 - ...ache-use-swait_queue-instead-of-waitqueue.patch | 20 +- patches/fs-jbd-replace-bh_state-lock.patch | 96 -- ...-namespace-use-cpu-chill-in-trylock-loops.patch | 38 + .../fs-nfs-turn-rmdir_sem-into-a-semaphore.patch | 16 +- patches/fs-replace-bh_uptodate_lock-for-rt.patch | 184 --- patches/ftrace-Fix-trace-header-alignment.patch | 45 - patches/ftrace-migrate-disable-tracing.patch | 6 +- ...e-lock-unlock-symetry-versus-pi_lock-and-.patch | 2 +- patches/futex-requeue-pi-fix.patch | 6 +- ...round-migrate_disable-enable-in-different.patch | 20 +- patches/genirq-disable-irqpoll-on-rt.patch | 8 +- patches/genirq-force-threading.patch | 45 - ...pdate-irq_set_irqchip_state-documentation.patch | 2 +- ...plug-duct-tape-RT-rwlock-usage-for-non-RT.patch | 95 -- patches/hotplug-light-get-online-cpus.patch | 88 -- ...-a-missing-bracket-and-hide-migration_bas.patch | 65 - .../hrtimer-Allow-raw-wakeups-during-boot.patch | 33 + patches/hrtimer-Introduce-expiry-spin-lock.patch | 102 -- ...imers-by-default-into-the-softirq-context.patch | 209 --- ...solidate-hrtimer_init-hrtimer_init_sleepe.patch | 270 ---- ...e-state-change-before-hrtimer_cancel-in-d.patch | 2 +- patches/i2c-exynos5-Remove-IRQF_ONESHOT.patch | 38 - patches/i2c-hix5hd2-Remove-IRQF_ONESHOT.patch | 32 - ...qwork-push_most_work_into_softirq_context.patch | 18 +- patches/jump-label-rt.patch | 4 +- patches/kconfig-disable-a-few-options-rt.patch | 8 +- patches/kconfig-preempt-rt-full.patch | 58 - ...d-Provide-a-pointer-to-the-valid-CPU-mask.patch | 730 ---------- ...d-move-stack-kprobe-clean-up-to-__put_tas.patch | 21 +- ...ange-the-lock-of-kmemleak_object-to-raw_s.patch | 74 +- ...-Turn-kmemleak_lock-to-raw-spinlock-on-RT.patch | 54 +- .../leds-trigger-disable-CPU-trigger-on-RT.patch | 2 +- ...cessor_id-Adjust-check_preemption_disable.patch | 30 - ...-smp_processor_id-Don-t-use-cpumask_equal.patch | 2 +- patches/list_bl-fixup-bogus-lockdep-warning.patch | 97 -- .../list_bl.h-make-list-head-locking-RT-safe.patch | 30 +- ...lock-provide-get-put-_locked_ptr-variants.patch | 42 - patches/localversion.patch | 2 +- patches/lockdep-disable-self-test.patch | 4 +- patches/lockdep-no-softirq-accounting-on-rt.patch | 8 +- ...ftest-fix-warnings-due-to-missing-PREEMPT.patch | 18 +- ...-do-hardirq-context-test-for-raw-spinlock.patch | 2 +- ...e-spinlock_t-and-rwlock_t-a-RCU-section-o.patch | 12 +- ...kdep-Don-t-complain-about-incorrect-name-.patch | 43 - ...ktorture-Do-NOT-include-rwlock.h-directly.patch | 2 +- ...mutex-fix-deadlock-in-device-mapper-block.patch | 6 +- ...utex-Clean-pi_blocked_on-in-the-error-cas.patch | 8 +- ...utex-re-init-the-wait_lock-in-rt_mutex_in.patch | 2 +- patches/md-disable-bcache.patch | 3 +- patches/md-raid5-percpu-handling-rt-aware.patch | 4 +- patches/mips-disable-highmem-on-rt.patch | 4 +- ...on-Disable-compact_unevictable_allowed-on.patch | 54 + patches/mm-disable-sloub-rt.patch | 21 +- patches/mm-enable-slub.patch | 6 +- ...ol-Don-t-call-schedule_work_on-in-preempt.patch | 6 +- patches/mm-memcontrol-do_not_disable_irq.patch | 22 +- .../mm-page_alloc-rt-friendly-per-cpu-pages.patch | 77 +- .../mm-perform-lru_add_drain_all-remotely.patch | 6 +- patches/mm-protect-activate-switch-mm.patch | 71 - patches/mm-rt-kmap-atomic-scheduling.patch | 36 +- .../mm-scatterlist-dont-disable-irqs-on-RT.patch | 2 +- patches/mm-swap-Enable-use-pvec-lock-on-RT.patch | 41 + ...remove-preempt_disable-enable-when-doing-.patch | 105 ++ patches/mm-vmalloc-use-get-cpu-light.patch | 10 +- ...-not-disable-preemption-in-zswap_frontswa.patch | 6 +- ...smalloc_copy_with_get_cpu_var_and_locking.patch | 38 +- patches/mutex-no-spin-on-rt.patch | 28 - ...napi_schedule_irqoff-disable-interrupts-o.patch | 68 - .../net-Qdisc-use-a-seqlock-instead-seqcount.patch | 51 +- ...r-local-irq-disable-alloc-atomic-headache.patch | 58 - ...otect-users-of-napi_alloc_cache-against-r.patch | 112 -- ...-core-use-local_bh_disable-in-netif_rx_ni.patch | 2 +- ...ays-take-qdisc-s-busylock-in-__dev_xmit_s.patch | 4 +- ...-iptable-xt-write-recseq-begin-rt-fallout.patch | 75 - patches/net-make-devnet_rename_seq-a-mutex.patch | 12 +- ...ev_deactivate_many-use-msleep-1-instead-o.patch | 2 +- patches/net-use-cpu-chill.patch | 22 +- patches/net_disable_NET_RX_BUSY_POLL.patch | 29 +- ...nd-simplify-phandle-cache-to-use-a-fixed-.patch | 314 ++++ ...-free-phandle-cache-outside-of-the-devtre.patch | 95 -- patches/oleg-signal-rt-fix.patch | 68 +- patches/panic-disable-random-on-rt.patch | 4 +- ...itchtec-Don-t-use-completion-s-wait-queue.patch | 16 +- ...-refcount-use-normal-instead-of-RCU-sched.patch | 100 ++ ...re-Add-SRCU-annotation-for-pmus-list-walk.patch | 30 + patches/pid.h-include-atomic.h.patch | 4 +- ...rs-Unlock-expiry-lock-in-the-early-return.patch | 31 - patches/posix-timers-expiry-lock.patch | 241 +--- patches/posix-timers-move-rcu-out-of-union.patch | 51 - ...osix-timers-thread-posix-cpu-timers-on-rt.patch | 172 ++- patches/power-disable-highmem-on-rt.patch | 4 +- .../powerpc-Fixup-compile-and-lazy-preempt.patch | 46 - ...-Disable-in-kernel-MPIC-emulation-for-PRE.patch | 6 +- patches/powerpc-preempt-lazy-support.patch | 22 +- ...es-iommu-Use-a-locallock-instead-local_ir.patch | 12 +- ...ckprotector-work-around-stack-guard-init-.patch | 2 +- patches/preempt-lazy-support.patch | 134 +- patches/preempt-nort-rt-variants.patch | 6 +- ...-devkmsg-llseek-reset-clear-if-it-is-lost.patch | 6 +- ...msg-read-Return-EPIPE-when-the-first-mess.patch | 2 +- .../printk-hack-out-emergency-loglevel-usage.patch | 6 +- ...tk-handle-iterating-while-buffer-changing.patch | 4 +- patches/printk-kmsg_dump-remove-mutex-usage.patch | 10 +- ...tk-only-allow-kernel-to-emergency-message.patch | 8 +- ...deferred-to-default-loglevel-enforce-mask.patch | 4 +- .../ptrace-fix-ptrace-vs-tasklist_lock-race.patch | 18 +- patches/random-make-it-work-on-rt.patch | 76 +- .../rcu-Acquire-RCU-lock-when-disabling-BHs.patch | 56 - ...Eliminate-softirq-processing-from-rcutree.patch | 451 ------ ...-Use-rcuc-threads-on-PREEMPT_RT-as-we-did.patch | 6 +- ...e-rcu_normal_after_boot-by-default-for-RT.patch | 6 +- patches/rcu-make-RCU_BOOST-default-on-RT.patch | 4 +- ...Avoid-problematic-critical-section-nestin.patch | 8 +- ...-decrease-the-nr-of-migratory-tasks-when-.patch | 154 -- patches/rt-introduce-cpu-chill.patch | 6 +- patches/rt-local-irq-lock.patch | 22 +- patches/rt-preempt-base-config.patch | 55 - patches/rt-serial-warn-fix.patch | 4 +- patches/rtmutex-Make-lock_killable-work.patch | 2 +- .../rtmutex-Provide-rt_mutex_slowlock_locked.patch | 6 +- .../rtmutex-add-sleeping-lock-implementation.patch | 84 +- .../rtmutex-add-ww_mutex-addon-for-mutex-rt.patch | 34 +- .../rtmutex-annotate-sleeping-lock-context.patch | 293 ---- ...ort-lockdep-less-version-of-rt_mutex-s-lo.patch | 26 +- patches/rtmutex-futex-prepare-rt.patch | 24 +- patches/rtmutex-lock-killable.patch | 2 +- patches/rtmutex-trylock-is-okay-on-RT.patch | 10 +- patches/rtmutex-wire-up-RT-s-locking.patch | 201 ++- patches/rtmutex_dont_include_rcu.patch | 40 +- ...-pinned-user-tasks-to-be-awakened-to-the-.patch | 32 - .../sched-Lazy-migrate_disable-processing.patch | 591 -------- ...ched-Remove-dead-__migrate_disabled-check.patch | 29 - ..._cpus_allowed_ptr-Check-cpus_mask-not-cpu.patch | 6 +- ...etion-Fix-a-lockup-in-wait_for_completion.patch | 60 - ...migrate_enable-must-access-takedown_cpu_t.patch | 6 +- ...ine-Ensure-inactive_timer-runs-in-hardirq.patch | 40 - patches/sched-delay-put-task.patch | 85 -- patches/sched-disable-rt-group-sched-on-rt.patch | 4 +- patches/sched-disable-ttwu-queue.patch | 4 +- ...hed-fair-Make-the-hrtimers-non-hard-again.patch | 27 - patches/sched-limit-nr-migrate.patch | 4 +- ...ched-might-sleep-do-not-account-rcu-depth.patch | 4 +- ...igrate-disable-Protect-cpus_ptr-with-lock.patch | 37 - ...te_dis-enable-Use-sleeping_lock-to-annota.patch | 46 - ...te_disable-Add-export_symbol_gpl-for-__mi.patch | 31 - ...te_disable-fallback-to-preempt_disable-in.patch | 182 --- ...te_enable-Busy-loop-until-the-migration-r.patch | 47 - ...hed-migrate_enable-Use-select_fallback_rq.patch | 56 - ...ed-migrate_enable-Use-stop_one_cpu_nowait.patch | 28 +- patches/sched-mmdrop-delayed.patch | 18 +- patches/sched-rt-mutex-wakeup.patch | 50 +- patches/sched-swait-Add-swait_event_lock_irq.patch | 32 - patches/scsi-fcoe-rt-aware.patch | 8 +- patches/seqlock-prevent-rt-starvation.patch | 2 +- ...-export-symbols-which-are-used-by-symbols.patch | 6 +- ...-remove-that-trylock-in-serial8250_consol.patch | 4 +- patches/series | 473 +++--- patches/signal-revert-ptrace-preempt-magic.patch | 5 +- ...low-rt-tasks-to-cache-one-sigqueue-struct.patch | 20 +- patches/skbufhead-raw-lock.patch | 22 +- patches/slub-disable-SLUB_CPU_PARTIAL.patch | 6 +- patches/slub-enable-irqs-for-no-wait.patch | 8 +- patches/softirq-Add-preemptible-softirq.patch | 85 +- ...id-a-cancel-dead-lock-in-tasklet-handling.patch | 4 +- .../softirq-disable-softirq-stacks-for-rt.patch | 24 +- patches/softirq-preempt-fix-3-re.patch | 36 +- ...cu-replace-local_irqsave-with-a-locallock.patch | 4 +- patches/sysfs-realtime-entry.patch | 4 +- ..._pkg_temp-make-pkg_temp_lock-a-raw-spinlo.patch | 46 +- patches/timekeeping-split-jiffies-lock.patch | 4 +- ...p-expiry-lock-after-each-timer-invocation.patch | 49 - patches/timers-Introduce-expiry-spin-lock.patch | 152 -- patches/timers-prepare-for-full-preemption.patch | 47 - patches/tpm_tis-fix-stall-after-iowrite-s.patch | 4 +- ...-serial-pl011-warning-about-uninitialized.patch | 2 +- ...t-remove-preemption-disabling-in-netif_rx.patch | 6 +- ...revent-deferral-of-watchdogd-wakeup-on-RT.patch | 16 +- ...onvert-for_each_wq-to-use-built-in-list-c.patch | 45 + .../workqueue-Convert-the-locks-to-raw-type.patch | 696 --------- ...ake-alloc-apply-free_workqueue_attrs-stat.patch | 64 - ...emove-GPF-argument-from-alloc_workqueue_a.patch | 105 -- patches/x86-Disable-HAVE_ARCH_JUMP_LABEL.patch | 6 +- patches/x86-Enable-RT-also-on-32bit.patch | 27 + patches/x86-Enable-RT.patch | 21 + ...86-crypto-reduce-preempt-disabled-regions.patch | 10 +- ...on-t-cache-access-to-fpu_fpregs_owner_ctx.patch | 107 -- .../x86-highmem-add-a-already-used-pte-check.patch | 2 +- ...Don-t-let-setaffinity-unmask-threaded-EOI.patch | 105 -- ...Prevent-inconsistent-state-when-moving-an.patch | 72 + patches/x86-ioapic-Rename-misnamed-functions.patch | 86 ++ patches/x86-kvm-require-const-tsc-for-rt.patch | 4 +- ...t-Initialize-the-context-lock-for-init_mm.patch | 27 - ...-Check-preemption-level-before-looking-at.patch | 24 - patches/x86-preempt-lazy.patch | 48 +- ...x86-signal-delay-calling-signals-on-32bit.patch | 42 - patches/x86-stackprot-no-random-on-rt.patch | 2 +- 297 files changed, 8478 insertions(+), 9633 deletions(-) create mode 100644 patches/0001-cgroup-Remove-css_rstat_flush.patch delete mode 100644 patches/0001-hrtimer-Use-READ_ONCE-to-access-timer-base-in-hrimer.patch create mode 100644 patches/0001-jbd2-Simplify-journal_unmap_buffer.patch create mode 100644 patches/0001-mm-page_alloc-Split-drain_local_pages.patch create mode 100644 patches/0001-workqueue-Don-t-assume-that-the-callback-has-interru.patch create mode 100644 patches/0002-cgroup-Consolidate-users-of-cgroup_rstat_lock.patch delete mode 100644 patches/0002-hrtimer-Don-t-grab-the-expiry-lock-for-non-soft-hrti.patch create mode 100644 patches/0002-jbd2-Remove-jbd_trylock_bh_state.patch create mode 100644 patches/0002-mm-swap-Add-static-key-dependent-pagevec-locking.patch create mode 100644 patches/0002-sched-swait-Add-swait_event_lock_irq.patch create mode 100644 patches/0003-cgroup-Remove-may_sleep-from-cgroup_rstat_flush_lock.patch delete mode 100644 patches/0003-hrtimer-Prevent-using-hrtimer_grab_expiry_lock-on-mi.patch create mode 100644 patches/0003-jbd2-Move-dropping-of-jh-reference-out-of-un-re-fili.patch create mode 100644 patches/0003-mm-swap-Access-struct-pagevec-remotely.patch create mode 100644 patches/0003-workqueue-Use-swait-for-wq_manager_wait.patch create mode 100644 patches/0004-cgroup-Acquire-cgroup_rstat_lock-with-enabled-interr.patch create mode 100644 patches/0004-jbd2-Drop-unnecessary-branch-from-jbd2_journal_forge.patch create mode 100644 patches/0004-mm-swap-Enable-use_pvec_lock-nohz_full-dependent.patch create mode 100644 patches/0004-workqueue-Convert-the-locks-to-raw-type.patch create mode 100644 patches/0005-jbd2-Don-t-call-__bforget-unnecessarily.patch create mode 100644 patches/0006-jbd2-Make-state-lock-a-spinlock.patch create mode 100644 patches/0007-jbd2-Free-journal-head-outside-of-locked-region.patch create mode 100644 patches/ARM-Allow-to-enable-RT.patch create mode 100644 patches/ARM64-Allow-to-enable-RT.patch create mode 100644 patches/BPF-Disable-on-PREEMPT_RT.patch delete mode 100644 patches/Drivers-hv-vmbus-include-header-for-get_irq_regs.patch create mode 100644 patches/POWERPC-Allow-to-enable-RT.patch create mode 100644 patches/Use-CONFIG_PREEMPTION.patch delete mode 100644 patches/arm-imx6-cpuidle-Use-raw_spinlock_t.patch delete mode 100644 patches/arm64-preempt-Check-preemption-level-before-looking-.patch delete mode 100644 patches/arm64-preempt-Fixup-lazy-preempt.patch create mode 100644 patches/block-Don-t-disable-interrupts-in-trigger_softirq.patch delete mode 100644 patches/block-blk-mq-move-blk_queue_usage_counter_release-in.patch delete mode 100644 patches/block-mq-use-cpu_light.patch delete mode 100644 patches/cpu-hotplug--Implement-CPU-pinning.patch delete mode 100644 patches/fs-aio-simple-simple-work.patch create mode 100644 patches/fs-buffer-Make-BH_Uptodate_Lock-bit_spin_lock-a-regu.patch delete mode 100644 patches/fs-dcache-use-cpu-chill-in-trylock-loops.patch delete mode 100644 patches/fs-jbd-replace-bh_state-lock.patch create mode 100644 patches/fs-namespace-use-cpu-chill-in-trylock-loops.patch delete mode 100644 patches/fs-replace-bh_uptodate_lock-for-rt.patch delete mode 100644 patches/ftrace-Fix-trace-header-alignment.patch delete mode 100644 patches/genirq-force-threading.patch delete mode 100644 patches/hotplug-duct-tape-RT-rwlock-usage-for-non-RT.patch delete mode 100644 patches/hotplug-light-get-online-cpus.patch delete mode 100644 patches/hrtimer-Add-a-missing-bracket-and-hide-migration_bas.patch create mode 100644 patches/hrtimer-Allow-raw-wakeups-during-boot.patch delete mode 100644 patches/hrtimer-Introduce-expiry-spin-lock.patch delete mode 100644 patches/hrtimer-by-timers-by-default-into-the-softirq-context.patch delete mode 100644 patches/hrtimer-consolidate-hrtimer_init-hrtimer_init_sleepe.patch delete mode 100644 patches/i2c-exynos5-Remove-IRQF_ONESHOT.patch delete mode 100644 patches/i2c-hix5hd2-Remove-IRQF_ONESHOT.patch delete mode 100644 patches/kconfig-preempt-rt-full.patch delete mode 100644 patches/kernel-sched-Provide-a-pointer-to-the-valid-CPU-mask.patch delete mode 100644 patches/lib-smp_processor_id-Adjust-check_preemption_disable.patch delete mode 100644 patches/list_bl-fixup-bogus-lockdep-warning.patch delete mode 100644 patches/locallock-provide-get-put-_locked_ptr-variants.patch delete mode 100644 patches/locking-lockdep-Don-t-complain-about-incorrect-name-.patch create mode 100644 patches/mm-compaction-Disable-compact_unevictable_allowed-on.patch delete mode 100644 patches/mm-protect-activate-switch-mm.patch create mode 100644 patches/mm-swap-Enable-use-pvec-lock-on-RT.patch create mode 100644 patches/mm-vmalloc-remove-preempt_disable-enable-when-doing-.patch delete mode 100644 patches/mutex-no-spin-on-rt.patch delete mode 100644 patches/net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch delete mode 100644 patches/net-another-local-irq-disable-alloc-atomic-headache.patch delete mode 100644 patches/net-core-protect-users-of-napi_alloc_cache-against-r.patch delete mode 100644 patches/net-fix-iptable-xt-write-recseq-begin-rt-fallout.patch create mode 100644 patches/of-Rework-and-simplify-phandle-cache-to-use-a-fixed-.patch delete mode 100644 patches/of-allocate-free-phandle-cache-outside-of-the-devtre.patch create mode 100644 patches/percpu-refcount-use-normal-instead-of-RCU-sched.patch create mode 100644 patches/perf-core-Add-SRCU-annotation-for-pmus-list-walk.patch delete mode 100644 patches/posix-timers-Unlock-expiry-lock-in-the-early-return.patch delete mode 100644 patches/posix-timers-move-rcu-out-of-union.patch delete mode 100644 patches/powerpc-Fixup-compile-and-lazy-preempt.patch delete mode 100644 patches/rcu-Acquire-RCU-lock-when-disabling-BHs.patch delete mode 100644 patches/rcu-Eliminate-softirq-processing-from-rcutree.patch delete mode 100644 patches/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch delete mode 100644 patches/rt-preempt-base-config.patch delete mode 100644 patches/rtmutex-annotate-sleeping-lock-context.patch delete mode 100644 patches/sched-Allow-pinned-user-tasks-to-be-awakened-to-the-.patch delete mode 100644 patches/sched-Lazy-migrate_disable-processing.patch delete mode 100644 patches/sched-Remove-dead-__migrate_disabled-check.patch delete mode 100644 patches/sched-completion-Fix-a-lockup-in-wait_for_completion.patch delete mode 100644 patches/sched-deadline-Ensure-inactive_timer-runs-in-hardirq.patch delete mode 100644 patches/sched-delay-put-task.patch delete mode 100644 patches/sched-fair-Make-the-hrtimers-non-hard-again.patch delete mode 100644 patches/sched-migrate-disable-Protect-cpus_ptr-with-lock.patch delete mode 100644 patches/sched-migrate_dis-enable-Use-sleeping_lock-to-annota.patch delete mode 100644 patches/sched-migrate_disable-Add-export_symbol_gpl-for-__mi.patch delete mode 100644 patches/sched-migrate_disable-fallback-to-preempt_disable-in.patch delete mode 100644 patches/sched-migrate_enable-Busy-loop-until-the-migration-r.patch delete mode 100644 patches/sched-migrate_enable-Use-select_fallback_rq.patch delete mode 100644 patches/sched-swait-Add-swait_event_lock_irq.patch delete mode 100644 patches/timers-Drop-expiry-lock-after-each-timer-invocation.patch delete mode 100644 patches/timers-Introduce-expiry-spin-lock.patch delete mode 100644 patches/timers-prepare-for-full-preemption.patch create mode 100644 patches/workqueue-Convert-for_each_wq-to-use-built-in-list-c.patch delete mode 100644 patches/workqueue-Convert-the-locks-to-raw-type.patch delete mode 100644 patches/workqueue-Make-alloc-apply-free_workqueue_attrs-stat.patch delete mode 100644 patches/workqueue-Remove-GPF-argument-from-alloc_workqueue_a.patch create mode 100644 patches/x86-Enable-RT-also-on-32bit.patch create mode 100644 patches/x86-Enable-RT.patch delete mode 100644 patches/x86-fpu-Don-t-cache-access-to-fpu_fpregs_owner_ctx.patch delete mode 100644 patches/x86-ioapic-Don-t-let-setaffinity-unmask-threaded-EOI.patch create mode 100644 patches/x86-ioapic-Prevent-inconsistent-state-when-moving-an.patch create mode 100644 patches/x86-ioapic-Rename-misnamed-functions.patch delete mode 100644 patches/x86-ldt-Initialize-the-context-lock-for-init_mm.patch delete mode 100644 patches/x86-preempt-Check-preemption-level-before-looking-at.patch delete mode 100644 patches/x86-signal-delay-calling-signals-on-32bit.patch diff --git a/patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch b/patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch index c85197db2806..4d6188c179ee 100644 --- a/patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch +++ b/patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch @@ -17,7 +17,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -1182,7 +1182,7 @@ static inline void prefetch_buddy(struct +@@ -1243,7 +1243,7 @@ static inline void prefetch_buddy(struct } /* @@ -26,7 +26,7 @@ Signed-off-by: Sebastian Andrzej Siewior * Assumes all pages on list are in same zone, and of same order. * count is the number of pages to free. * -@@ -1193,14 +1193,41 @@ static inline void prefetch_buddy(struct +@@ -1254,14 +1254,41 @@ static inline void prefetch_buddy(struct * pinned" detection logic. */ static void free_pcppages_bulk(struct zone *zone, int count, @@ -72,7 +72,7 @@ Signed-off-by: Sebastian Andrzej Siewior while (count) { struct list_head *list; -@@ -1232,7 +1259,7 @@ static void free_pcppages_bulk(struct zo +@@ -1293,7 +1320,7 @@ static void free_pcppages_bulk(struct zo if (bulkfree_pcp_prepare(page)) continue; @@ -81,7 +81,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * We are going to put the page back to the global -@@ -1247,26 +1274,6 @@ static void free_pcppages_bulk(struct zo +@@ -1308,26 +1335,6 @@ static void free_pcppages_bulk(struct zo prefetch_buddy(page); } while (--count && --batch_free && !list_empty(list)); } @@ -108,7 +108,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static void free_one_page(struct zone *zone, -@@ -2713,13 +2720,18 @@ void drain_zone_pages(struct zone *zone, +@@ -2799,13 +2806,18 @@ void drain_zone_pages(struct zone *zone, { unsigned long flags; int to_drain, batch; @@ -128,7 +128,7 @@ Signed-off-by: Sebastian Andrzej Siewior } #endif -@@ -2735,14 +2747,21 @@ static void drain_pages_zone(unsigned in +@@ -2821,14 +2833,21 @@ static void drain_pages_zone(unsigned in unsigned long flags; struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; @@ -152,7 +152,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* -@@ -2970,7 +2989,10 @@ static void free_unref_page_commit(struc +@@ -3056,7 +3075,10 @@ static void free_unref_page_commit(struc pcp->count++; if (pcp->count >= pcp->high) { unsigned long batch = READ_ONCE(pcp->batch); diff --git a/patches/0001-cgroup-Remove-css_rstat_flush.patch b/patches/0001-cgroup-Remove-css_rstat_flush.patch new file mode 100644 index 000000000000..03382aec6302 --- /dev/null +++ b/patches/0001-cgroup-Remove-css_rstat_flush.patch @@ -0,0 +1,116 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 15 Aug 2019 18:14:16 +0200 +Subject: [PATCH 1/4] cgroup: Remove ->css_rstat_flush() + +I was looking at the lifetime of the the ->css_rstat_flush() to see if +cgroup_rstat_cpu_lock should remain a raw_spinlock_t. I didn't find any +users and is unused since it was introduced in commit + 8f53470bab042 ("cgroup: Add cgroup_subsys->css_rstat_flush()") + +Remove the css_rstat_flush callback because it has no users. + +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/cgroup-defs.h | 5 ----- + kernel/cgroup/cgroup.c | 12 ------------ + kernel/cgroup/rstat.c | 10 +--------- + 3 files changed, 1 insertion(+), 26 deletions(-) + +--- a/include/linux/cgroup-defs.h ++++ b/include/linux/cgroup-defs.h +@@ -144,9 +144,6 @@ struct cgroup_subsys_state { + struct list_head sibling; + struct list_head children; + +- /* flush target list anchored at cgrp->rstat_css_list */ +- struct list_head rstat_css_node; +- + /* + * PI: Subsys-unique ID. 0 is unused and root is always 1. The + * matching css can be looked up using css_from_id(). +@@ -455,7 +452,6 @@ struct cgroup { + + /* per-cpu recursive resource statistics */ + struct cgroup_rstat_cpu __percpu *rstat_cpu; +- struct list_head rstat_css_list; + + /* cgroup basic resource statistics */ + struct cgroup_base_stat pending_bstat; /* pending from children */ +@@ -633,7 +629,6 @@ struct cgroup_subsys { + void (*css_released)(struct cgroup_subsys_state *css); + void (*css_free)(struct cgroup_subsys_state *css); + void (*css_reset)(struct cgroup_subsys_state *css); +- void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu); + int (*css_extra_stat_show)(struct seq_file *seq, + struct cgroup_subsys_state *css); + +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -1957,7 +1957,6 @@ static void init_cgroup_housekeeping(str + cgrp->dom_cgrp = cgrp; + cgrp->max_descendants = INT_MAX; + cgrp->max_depth = INT_MAX; +- INIT_LIST_HEAD(&cgrp->rstat_css_list); + prev_cputime_init(&cgrp->prev_cputime); + + for_each_subsys(ss, ssid) +@@ -5013,12 +5012,6 @@ static void css_release_work_fn(struct w + list_del_rcu(&css->sibling); + + if (ss) { +- /* css release path */ +- if (!list_empty(&css->rstat_css_node)) { +- cgroup_rstat_flush(cgrp); +- list_del_rcu(&css->rstat_css_node); +- } +- + cgroup_idr_replace(&ss->css_idr, NULL, css->id); + if (ss->css_released) + ss->css_released(css); +@@ -5080,7 +5073,6 @@ static void init_and_link_css(struct cgr + css->id = -1; + INIT_LIST_HEAD(&css->sibling); + INIT_LIST_HEAD(&css->children); +- INIT_LIST_HEAD(&css->rstat_css_node); + css->serial_nr = css_serial_nr_next++; + atomic_set(&css->online_cnt, 0); + +@@ -5089,9 +5081,6 @@ static void init_and_link_css(struct cgr + css_get(css->parent); + } + +- if (cgroup_on_dfl(cgrp) && ss->css_rstat_flush) +- list_add_rcu(&css->rstat_css_node, &cgrp->rstat_css_list); +- + BUG_ON(cgroup_css(cgrp, ss)); + } + +@@ -5193,7 +5182,6 @@ static struct cgroup_subsys_state *css_c + err_list_del: + list_del_rcu(&css->sibling); + err_free_css: +- list_del_rcu(&css->rstat_css_node); + INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); + queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); + return ERR_PTR(err); +--- a/kernel/cgroup/rstat.c ++++ b/kernel/cgroup/rstat.c +@@ -162,17 +162,9 @@ static void cgroup_rstat_flush_locked(st + struct cgroup *pos = NULL; + + raw_spin_lock(cpu_lock); +- while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) { +- struct cgroup_subsys_state *css; +- ++ while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) + cgroup_base_stat_flush(pos, cpu); + +- rcu_read_lock(); +- list_for_each_entry_rcu(css, &pos->rstat_css_list, +- rstat_css_node) +- css->ss->css_rstat_flush(css, cpu); +- rcu_read_unlock(); +- } + raw_spin_unlock(cpu_lock); + + /* if @may_sleep, play nice and yield if necessary */ diff --git a/patches/0001-hrtimer-Use-READ_ONCE-to-access-timer-base-in-hrimer.patch b/patches/0001-hrtimer-Use-READ_ONCE-to-access-timer-base-in-hrimer.patch deleted file mode 100644 index 85df2daa9140..000000000000 --- a/patches/0001-hrtimer-Use-READ_ONCE-to-access-timer-base-in-hrimer.patch +++ /dev/null @@ -1,32 +0,0 @@ -From: Julien Grall -Date: Wed, 21 Aug 2019 10:24:07 +0100 -Subject: [PATCH 1/3] hrtimer: Use READ_ONCE to access timer->base in - hrimer_grab_expiry_lock() - -The update to timer->base is protected by the base->cpu_base->lock(). -However, hrtimer_grab_expirty_lock() does not access it with the lock. - -So it would theorically be possible to have timer->base changed under -our feet. We need to prevent the compiler to refetch timer->base so the -check and the access is performed on the same base. - -Other access of timer->base are either done with a lock or protected -with READ_ONCE(). So use READ_ONCE() in hrtimer_grab_expirty_lock(). - -Signed-off-by: Julien Grall -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/time/hrtimer.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/time/hrtimer.c -+++ b/kernel/time/hrtimer.c -@@ -932,7 +932,7 @@ EXPORT_SYMBOL_GPL(hrtimer_forward); - - void hrtimer_grab_expiry_lock(const struct hrtimer *timer) - { -- struct hrtimer_clock_base *base = timer->base; -+ struct hrtimer_clock_base *base = READ_ONCE(timer->base); - - if (base && base->cpu_base) { - spin_lock(&base->cpu_base->softirq_expiry_lock); diff --git a/patches/0001-jbd2-Simplify-journal_unmap_buffer.patch b/patches/0001-jbd2-Simplify-journal_unmap_buffer.patch new file mode 100644 index 000000000000..9f5b0476a086 --- /dev/null +++ b/patches/0001-jbd2-Simplify-journal_unmap_buffer.patch @@ -0,0 +1,57 @@ +From: Thomas Gleixner +Date: Fri, 9 Aug 2019 14:42:27 +0200 +Subject: [PATCH 1/7] jbd2: Simplify journal_unmap_buffer() + +journal_unmap_buffer() checks first whether the buffer head is a journal. +If so it takes locks and then invokes jbd2_journal_grab_journal_head() +followed by another check whether this is journal head buffer. + +The double checking is pointless. + +Replace the initial check with jbd2_journal_grab_journal_head() which +alredy checks whether the buffer head is actually a journal. + +Allows also early access to the journal head pointer for the upcoming +conversion of state lock to a regular spinlock. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Jan Kara +Cc: linux-ext4@vger.kernel.org +Cc: "Theodore Ts'o" +Signed-off-by: Jan Kara +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/jbd2/transaction.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/fs/jbd2/transaction.c ++++ b/fs/jbd2/transaction.c +@@ -2199,7 +2199,8 @@ static int journal_unmap_buffer(journal_ + * holding the page lock. --sct + */ + +- if (!buffer_jbd(bh)) ++ jh = jbd2_journal_grab_journal_head(bh); ++ if (!jh) + goto zap_buffer_unlocked; + + /* OK, we have data buffer in journaled mode */ +@@ -2207,10 +2208,6 @@ static int journal_unmap_buffer(journal_ + jbd_lock_bh_state(bh); + spin_lock(&journal->j_list_lock); + +- jh = jbd2_journal_grab_journal_head(bh); +- if (!jh) +- goto zap_buffer_no_jh; +- + /* + * We cannot remove the buffer from checkpoint lists until the + * transaction adding inode to orphan list (let's call it T) +@@ -2332,7 +2329,6 @@ static int journal_unmap_buffer(journal_ + */ + jh->b_modified = 0; + jbd2_journal_put_journal_head(jh); +-zap_buffer_no_jh: + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + write_unlock(&journal->j_state_lock); diff --git a/patches/0001-mm-page_alloc-Split-drain_local_pages.patch b/patches/0001-mm-page_alloc-Split-drain_local_pages.patch new file mode 100644 index 000000000000..49a825886823 --- /dev/null +++ b/patches/0001-mm-page_alloc-Split-drain_local_pages.patch @@ -0,0 +1,56 @@ +From: Anna-Maria Gleixner +Date: Thu, 18 Apr 2019 11:09:04 +0200 +Subject: [PATCH 1/4] mm/page_alloc: Split drain_local_pages() + +Splitting the functionality of drain_local_pages() into a separate +function. This is a preparatory work for introducing the static key +dependend locking mechanism. + +No functional change. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/gfp.h | 1 + + mm/page_alloc.c | 13 +++++++++---- + 2 files changed, 10 insertions(+), 4 deletions(-) + +--- a/include/linux/gfp.h ++++ b/include/linux/gfp.h +@@ -580,6 +580,7 @@ extern void page_frag_free(void *addr); + void page_alloc_init(void); + void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); + void drain_all_pages(struct zone *zone); ++void drain_cpu_pages(unsigned int cpu, struct zone *zone); + void drain_local_pages(struct zone *zone); + + void page_alloc_init_late(void); +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -2894,6 +2894,14 @@ static void drain_pages(unsigned int cpu + } + } + ++void drain_cpu_pages(unsigned int cpu, struct zone *zone) ++{ ++ if (zone) ++ drain_pages_zone(cpu, zone); ++ else ++ drain_pages(cpu); ++} ++ + /* + * Spill all of this CPU's per-cpu pages back into the buddy allocator. + * +@@ -2904,10 +2912,7 @@ void drain_local_pages(struct zone *zone + { + int cpu = smp_processor_id(); + +- if (zone) +- drain_pages_zone(cpu, zone); +- else +- drain_pages(cpu); ++ drain_cpu_pages(cpu, zone); + } + + static void drain_local_pages_wq(struct work_struct *work) diff --git a/patches/0001-workqueue-Don-t-assume-that-the-callback-has-interru.patch b/patches/0001-workqueue-Don-t-assume-that-the-callback-has-interru.patch new file mode 100644 index 000000000000..09df6b96aff8 --- /dev/null +++ b/patches/0001-workqueue-Don-t-assume-that-the-callback-has-interru.patch @@ -0,0 +1,35 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 11 Jun 2019 11:21:02 +0200 +Subject: [PATCH 1/4] workqueue: Don't assume that the callback has interrupts + disabled + +Due to the TIMER_IRQSAFE flag, the timer callback is invoked with +disabled interrupts. On -RT the callback is invoked in softirq context +with enabled interrupts. Since the interrupts are threaded, there are +are no in_irq() users. The local_bh_disable() around the threaded +handler ensures that there is either a timer or a threaded handler +active on the CPU. + +Disable interrupts before __queue_work() is invoked from the timer +callback. + +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/workqueue.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -1605,9 +1605,11 @@ EXPORT_SYMBOL_GPL(queue_work_node); + void delayed_work_timer_fn(struct timer_list *t) + { + struct delayed_work *dwork = from_timer(dwork, t, timer); ++ unsigned long flags; + +- /* should have been called from irqsafe timer with irq already off */ ++ local_irq_save(flags); + __queue_work(dwork->cpu, dwork->wq, &dwork->work); ++ local_irq_restore(flags); + } + EXPORT_SYMBOL(delayed_work_timer_fn); + diff --git a/patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch b/patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch index 3b3590699604..7da1092ba766 100644 --- a/patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch +++ b/patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch @@ -17,7 +17,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -1192,8 +1192,8 @@ static inline void prefetch_buddy(struct +@@ -1253,8 +1253,8 @@ static inline void prefetch_buddy(struct * And clear the zone's pages_scanned counter, to hold off the "all pages are * pinned" detection logic. */ @@ -28,7 +28,7 @@ Signed-off-by: Sebastian Andrzej Siewior { bool isolated_pageblocks; struct page *page, *tmp; -@@ -1208,12 +1208,27 @@ static void free_pcppages_bulk(struct zo +@@ -1269,12 +1269,27 @@ static void free_pcppages_bulk(struct zo */ list_for_each_entry_safe(page, tmp, head, lru) { int mt = get_pcppage_migratetype(page); @@ -56,7 +56,7 @@ Signed-off-by: Sebastian Andrzej Siewior __free_one_page(page, page_to_pfn(page), zone, 0, mt); trace_mm_page_pcpu_drain(page, 0, mt); } -@@ -2731,7 +2746,7 @@ void drain_zone_pages(struct zone *zone, +@@ -2817,7 +2832,7 @@ void drain_zone_pages(struct zone *zone, local_irq_restore(flags); if (to_drain > 0) @@ -65,7 +65,7 @@ Signed-off-by: Sebastian Andrzej Siewior } #endif -@@ -2761,7 +2776,7 @@ static void drain_pages_zone(unsigned in +@@ -2847,7 +2862,7 @@ static void drain_pages_zone(unsigned in local_irq_restore(flags); if (count) @@ -74,7 +74,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* -@@ -2960,7 +2975,8 @@ static bool free_unref_page_prepare(stru +@@ -3046,7 +3061,8 @@ static bool free_unref_page_prepare(stru return true; } @@ -84,7 +84,7 @@ Signed-off-by: Sebastian Andrzej Siewior { struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; -@@ -2989,10 +3005,8 @@ static void free_unref_page_commit(struc +@@ -3075,10 +3091,8 @@ static void free_unref_page_commit(struc pcp->count++; if (pcp->count >= pcp->high) { unsigned long batch = READ_ONCE(pcp->batch); @@ -96,7 +96,7 @@ Signed-off-by: Sebastian Andrzej Siewior } } -@@ -3003,13 +3017,17 @@ void free_unref_page(struct page *page) +@@ -3089,13 +3103,17 @@ void free_unref_page(struct page *page) { unsigned long flags; unsigned long pfn = page_to_pfn(page); @@ -115,7 +115,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* -@@ -3020,6 +3038,11 @@ void free_unref_page_list(struct list_he +@@ -3106,6 +3124,11 @@ void free_unref_page_list(struct list_he struct page *page, *next; unsigned long flags, pfn; int batch_count = 0; @@ -127,7 +127,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Prepare pages for freeing */ list_for_each_entry_safe(page, next, list, lru) { -@@ -3032,10 +3055,12 @@ void free_unref_page_list(struct list_he +@@ -3118,10 +3141,12 @@ void free_unref_page_list(struct list_he local_irq_save(flags); list_for_each_entry_safe(page, next, list, lru) { unsigned long pfn = page_private(page); @@ -141,7 +141,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Guard against excessive IRQ disabled times when we get -@@ -3048,6 +3073,21 @@ void free_unref_page_list(struct list_he +@@ -3134,6 +3159,21 @@ void free_unref_page_list(struct list_he } } local_irq_restore(flags); diff --git a/patches/0002-cgroup-Consolidate-users-of-cgroup_rstat_lock.patch b/patches/0002-cgroup-Consolidate-users-of-cgroup_rstat_lock.patch new file mode 100644 index 000000000000..6136ad721957 --- /dev/null +++ b/patches/0002-cgroup-Consolidate-users-of-cgroup_rstat_lock.patch @@ -0,0 +1,68 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 16 Aug 2019 12:20:42 +0200 +Subject: [PATCH 2/4] cgroup: Consolidate users of cgroup_rstat_lock. + +cgroup_rstat_flush_irqsafe() has no users, remove it. +cgroup_rstat_flush_hold() and cgroup_rstat_flush_release() are only used within +this file. Make it static. + +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/cgroup.h | 3 --- + kernel/cgroup/rstat.c | 19 ++----------------- + 2 files changed, 2 insertions(+), 20 deletions(-) + +--- a/include/linux/cgroup.h ++++ b/include/linux/cgroup.h +@@ -750,9 +750,6 @@ static inline void cgroup_path_from_kern + */ + void cgroup_rstat_updated(struct cgroup *cgrp, int cpu); + void cgroup_rstat_flush(struct cgroup *cgrp); +-void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp); +-void cgroup_rstat_flush_hold(struct cgroup *cgrp); +-void cgroup_rstat_flush_release(void); + + /* + * Basic resource stats. +--- a/kernel/cgroup/rstat.c ++++ b/kernel/cgroup/rstat.c +@@ -201,21 +201,6 @@ void cgroup_rstat_flush(struct cgroup *c + } + + /** +- * cgroup_rstat_flush_irqsafe - irqsafe version of cgroup_rstat_flush() +- * @cgrp: target cgroup +- * +- * This function can be called from any context. +- */ +-void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp) +-{ +- unsigned long flags; +- +- spin_lock_irqsave(&cgroup_rstat_lock, flags); +- cgroup_rstat_flush_locked(cgrp, false); +- spin_unlock_irqrestore(&cgroup_rstat_lock, flags); +-} +- +-/** + * cgroup_rstat_flush_begin - flush stats in @cgrp's subtree and hold + * @cgrp: target cgroup + * +@@ -224,7 +209,7 @@ void cgroup_rstat_flush_irqsafe(struct c + * + * This function may block. + */ +-void cgroup_rstat_flush_hold(struct cgroup *cgrp) ++static void cgroup_rstat_flush_hold(struct cgroup *cgrp) + __acquires(&cgroup_rstat_lock) + { + might_sleep(); +@@ -235,7 +220,7 @@ void cgroup_rstat_flush_hold(struct cgro + /** + * cgroup_rstat_flush_release - release cgroup_rstat_flush_hold() + */ +-void cgroup_rstat_flush_release(void) ++static void cgroup_rstat_flush_release(void) + __releases(&cgroup_rstat_lock) + { + spin_unlock_irq(&cgroup_rstat_lock); diff --git a/patches/0002-hrtimer-Don-t-grab-the-expiry-lock-for-non-soft-hrti.patch b/patches/0002-hrtimer-Don-t-grab-the-expiry-lock-for-non-soft-hrti.patch deleted file mode 100644 index 43992c497c2c..000000000000 --- a/patches/0002-hrtimer-Don-t-grab-the-expiry-lock-for-non-soft-hrti.patch +++ /dev/null @@ -1,30 +0,0 @@ -From: Julien Grall -Date: Wed, 21 Aug 2019 10:24:08 +0100 -Subject: [PATCH 2/3] hrtimer: Don't grab the expiry lock for non-soft hrtimer - -Acquiring the lock in hrtimer_grab_expiry_lock() is designed for -sleeping-locks and should not be used with disabled interrupts. -hrtimer_cancel() may invoke hrtimer_grab_expiry_lock() also for locks -which expire in hard-IRQ context. - -Let hrtimer_cancel() invoke hrtimer_grab_expiry_lock() only for locks -which expire in softirq context. - -Signed-off-by: Julien Grall -[bigeasy: rewrite changelog] -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/time/hrtimer.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/time/hrtimer.c -+++ b/kernel/time/hrtimer.c -@@ -934,7 +934,7 @@ void hrtimer_grab_expiry_lock(const stru - { - struct hrtimer_clock_base *base = READ_ONCE(timer->base); - -- if (base && base->cpu_base) { -+ if (timer->is_soft && base && base->cpu_base) { - spin_lock(&base->cpu_base->softirq_expiry_lock); - spin_unlock(&base->cpu_base->softirq_expiry_lock); - } diff --git a/patches/0002-jbd2-Remove-jbd_trylock_bh_state.patch b/patches/0002-jbd2-Remove-jbd_trylock_bh_state.patch new file mode 100644 index 000000000000..bce5d83a7991 --- /dev/null +++ b/patches/0002-jbd2-Remove-jbd_trylock_bh_state.patch @@ -0,0 +1,30 @@ +From: Thomas Gleixner +Date: Fri, 9 Aug 2019 14:42:28 +0200 +Subject: [PATCH 2/7] jbd2: Remove jbd_trylock_bh_state() + +No users. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Jan Kara +Cc: linux-ext4@vger.kernel.org +Cc: "Theodore Ts'o" +Signed-off-by: Jan Kara +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/jbd2.h | 5 ----- + 1 file changed, 5 deletions(-) + +--- a/include/linux/jbd2.h ++++ b/include/linux/jbd2.h +@@ -347,11 +347,6 @@ static inline void jbd_lock_bh_state(str + bit_spin_lock(BH_State, &bh->b_state); + } + +-static inline int jbd_trylock_bh_state(struct buffer_head *bh) +-{ +- return bit_spin_trylock(BH_State, &bh->b_state); +-} +- + static inline int jbd_is_locked_bh_state(struct buffer_head *bh) + { + return bit_spin_is_locked(BH_State, &bh->b_state); diff --git a/patches/0002-mm-swap-Add-static-key-dependent-pagevec-locking.patch b/patches/0002-mm-swap-Add-static-key-dependent-pagevec-locking.patch new file mode 100644 index 000000000000..4bb0d7ef15f7 --- /dev/null +++ b/patches/0002-mm-swap-Add-static-key-dependent-pagevec-locking.patch @@ -0,0 +1,418 @@ +From: Thomas Gleixner +Date: Thu, 18 Apr 2019 11:09:05 +0200 +Subject: [PATCH 2/4] mm/swap: Add static key dependent pagevec locking + +The locking of struct pagevec is done by disabling preemption. In case the +struct has be accessed form interrupt context then interrupts are +disabled. This means the struct can only be accessed locally from the +CPU. There is also no lockdep coverage which would scream during if it +accessed from wrong context. + +Create struct swap_pagevec which contains of a pagevec member and a +spin_lock_t. Introduce a static key, which changes the locking behavior +only if the key is set in the following way: Before the struct is accessed +the spin_lock has to be acquired instead of using preempt_disable(). Since +the struct is used CPU-locally there is no spinning on the lock but the +lock is acquired immediately. If the struct is accessed from interrupt +context, spin_lock_irqsave() is used. + +No functional change yet because static key is not enabled. + +[anna-maria: introduce static key] +Signed-off-by: Thomas Gleixner +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + mm/compaction.c | 14 ++- + mm/internal.h | 2 + mm/swap.c | 202 +++++++++++++++++++++++++++++++++++++++++++++----------- + 3 files changed, 176 insertions(+), 42 deletions(-) + +--- a/mm/compaction.c ++++ b/mm/compaction.c +@@ -2244,10 +2244,16 @@ compact_zone(struct compact_control *cc, + block_start_pfn(cc->migrate_pfn, cc->order); + + if (last_migrated_pfn < current_block_start) { +- cpu = get_cpu(); +- lru_add_drain_cpu(cpu); +- drain_local_pages(cc->zone); +- put_cpu(); ++ if (static_branch_likely(&use_pvec_lock)) { ++ cpu = raw_smp_processor_id(); ++ lru_add_drain_cpu(cpu); ++ drain_cpu_pages(cpu, cc->zone); ++ } else { ++ cpu = get_cpu(); ++ lru_add_drain_cpu(cpu); ++ drain_local_pages(cc->zone); ++ put_cpu(); ++ } + /* No more flushing until we migrate again */ + last_migrated_pfn = 0; + } +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -32,6 +32,8 @@ + /* Do not use these with a slab allocator */ + #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK) + ++extern struct static_key_false use_pvec_lock; ++ + void page_writeback_init(void); + + vm_fault_t do_swap_page(struct vm_fault *vmf); +--- a/mm/swap.c ++++ b/mm/swap.c +@@ -44,15 +44,108 @@ + /* How many pages do we try to swap or page in/out together? */ + int page_cluster; + +-static DEFINE_PER_CPU(struct pagevec, lru_add_pvec); +-static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); +-static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs); +-static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs); +-static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs); ++DEFINE_STATIC_KEY_FALSE(use_pvec_lock); ++ ++struct swap_pagevec { ++ spinlock_t lock; ++ struct pagevec pvec; ++}; ++ ++#define DEFINE_PER_CPU_PAGEVEC(lvar) \ ++ DEFINE_PER_CPU(struct swap_pagevec, lvar) = { \ ++ .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) } ++ ++static DEFINE_PER_CPU_PAGEVEC(lru_add_pvec); ++static DEFINE_PER_CPU_PAGEVEC(lru_rotate_pvecs); ++static DEFINE_PER_CPU_PAGEVEC(lru_deactivate_file_pvecs); ++static DEFINE_PER_CPU_PAGEVEC(lru_deactivate_pvecs); ++static DEFINE_PER_CPU_PAGEVEC(lru_lazyfree_pvecs); + #ifdef CONFIG_SMP +-static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs); ++static DEFINE_PER_CPU_PAGEVEC(activate_page_pvecs); + #endif + ++static inline ++struct swap_pagevec *lock_swap_pvec(struct swap_pagevec __percpu *p) ++{ ++ struct swap_pagevec *swpvec; ++ ++ if (static_branch_likely(&use_pvec_lock)) { ++ swpvec = raw_cpu_ptr(p); ++ ++ spin_lock(&swpvec->lock); ++ } else { ++ swpvec = &get_cpu_var(*p); ++ } ++ return swpvec; ++} ++ ++static inline struct swap_pagevec * ++lock_swap_pvec_cpu(struct swap_pagevec __percpu *p, int cpu) ++{ ++ struct swap_pagevec *swpvec = per_cpu_ptr(p, cpu); ++ ++ if (static_branch_likely(&use_pvec_lock)) ++ spin_lock(&swpvec->lock); ++ ++ return swpvec; ++} ++ ++static inline struct swap_pagevec * ++lock_swap_pvec_irqsave(struct swap_pagevec __percpu *p, unsigned long *flags) ++{ ++ struct swap_pagevec *swpvec; ++ ++ if (static_branch_likely(&use_pvec_lock)) { ++ swpvec = raw_cpu_ptr(p); ++ ++ spin_lock_irqsave(&swpvec->lock, (*flags)); ++ } else { ++ local_irq_save(*flags); ++ ++ swpvec = this_cpu_ptr(p); ++ } ++ return swpvec; ++} ++ ++static inline struct swap_pagevec * ++lock_swap_pvec_cpu_irqsave(struct swap_pagevec __percpu *p, int cpu, ++ unsigned long *flags) ++{ ++ struct swap_pagevec *swpvec = per_cpu_ptr(p, cpu); ++ ++ if (static_branch_likely(&use_pvec_lock)) ++ spin_lock_irqsave(&swpvec->lock, *flags); ++ else ++ local_irq_save(*flags); ++ ++ return swpvec; ++} ++ ++static inline void unlock_swap_pvec(struct swap_pagevec *swpvec, ++ struct swap_pagevec __percpu *p) ++{ ++ if (static_branch_likely(&use_pvec_lock)) ++ spin_unlock(&swpvec->lock); ++ else ++ put_cpu_var(*p); ++ ++} ++ ++static inline void unlock_swap_pvec_cpu(struct swap_pagevec *swpvec) ++{ ++ if (static_branch_likely(&use_pvec_lock)) ++ spin_unlock(&swpvec->lock); ++} ++ ++static inline void ++unlock_swap_pvec_irqrestore(struct swap_pagevec *swpvec, unsigned long flags) ++{ ++ if (static_branch_likely(&use_pvec_lock)) ++ spin_unlock_irqrestore(&swpvec->lock, flags); ++ else ++ local_irq_restore(flags); ++} ++ + /* + * This path almost never happens for VM activity - pages are normally + * freed via pagevecs. But it gets used by networking. +@@ -250,15 +343,17 @@ void rotate_reclaimable_page(struct page + { + if (!PageLocked(page) && !PageDirty(page) && + !PageUnevictable(page) && PageLRU(page)) { ++ struct swap_pagevec *swpvec; + struct pagevec *pvec; + unsigned long flags; + + get_page(page); +- local_irq_save(flags); +- pvec = this_cpu_ptr(&lru_rotate_pvecs); ++ ++ swpvec = lock_swap_pvec_irqsave(&lru_rotate_pvecs, &flags); ++ pvec = &swpvec->pvec; + if (!pagevec_add(pvec, page) || PageCompound(page)) + pagevec_move_tail(pvec); +- local_irq_restore(flags); ++ unlock_swap_pvec_irqrestore(swpvec, flags); + } + } + +@@ -293,27 +388,32 @@ static void __activate_page(struct page + #ifdef CONFIG_SMP + static void activate_page_drain(int cpu) + { +- struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu); ++ struct swap_pagevec *swpvec = lock_swap_pvec_cpu(&activate_page_pvecs, cpu); ++ struct pagevec *pvec = &swpvec->pvec; + + if (pagevec_count(pvec)) + pagevec_lru_move_fn(pvec, __activate_page, NULL); ++ unlock_swap_pvec_cpu(swpvec); + } + + static bool need_activate_page_drain(int cpu) + { +- return pagevec_count(&per_cpu(activate_page_pvecs, cpu)) != 0; ++ return pagevec_count(per_cpu_ptr(&activate_page_pvecs.pvec, cpu)) != 0; + } + + void activate_page(struct page *page) + { + page = compound_head(page); + if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { +- struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); ++ struct swap_pagevec *swpvec; ++ struct pagevec *pvec; + + get_page(page); ++ swpvec = lock_swap_pvec(&activate_page_pvecs); ++ pvec = &swpvec->pvec; + if (!pagevec_add(pvec, page) || PageCompound(page)) + pagevec_lru_move_fn(pvec, __activate_page, NULL); +- put_cpu_var(activate_page_pvecs); ++ unlock_swap_pvec(swpvec, &activate_page_pvecs); + } + } + +@@ -335,7 +435,8 @@ void activate_page(struct page *page) + + static void __lru_cache_activate_page(struct page *page) + { +- struct pagevec *pvec = &get_cpu_var(lru_add_pvec); ++ struct swap_pagevec *swpvec = lock_swap_pvec(&lru_add_pvec); ++ struct pagevec *pvec = &swpvec->pvec; + int i; + + /* +@@ -357,7 +458,7 @@ static void __lru_cache_activate_page(st + } + } + +- put_cpu_var(lru_add_pvec); ++ unlock_swap_pvec(swpvec, &lru_add_pvec); + } + + /* +@@ -399,12 +500,13 @@ EXPORT_SYMBOL(mark_page_accessed); + + static void __lru_cache_add(struct page *page) + { +- struct pagevec *pvec = &get_cpu_var(lru_add_pvec); ++ struct swap_pagevec *swpvec = lock_swap_pvec(&lru_add_pvec); ++ struct pagevec *pvec = &swpvec->pvec; + + get_page(page); + if (!pagevec_add(pvec, page) || PageCompound(page)) + __pagevec_lru_add(pvec); +- put_cpu_var(lru_add_pvec); ++ unlock_swap_pvec(swpvec, &lru_add_pvec); + } + + /** +@@ -588,32 +690,40 @@ static void lru_lazyfree_fn(struct page + */ + void lru_add_drain_cpu(int cpu) + { +- struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu); ++ struct swap_pagevec *swpvec = lock_swap_pvec_cpu(&lru_add_pvec, cpu); ++ struct pagevec *pvec = &swpvec->pvec; ++ unsigned long flags; + + if (pagevec_count(pvec)) + __pagevec_lru_add(pvec); ++ unlock_swap_pvec_cpu(swpvec); + +- pvec = &per_cpu(lru_rotate_pvecs, cpu); ++ swpvec = lock_swap_pvec_cpu_irqsave(&lru_rotate_pvecs, cpu, &flags); ++ pvec = &swpvec->pvec; + if (pagevec_count(pvec)) { +- unsigned long flags; + + /* No harm done if a racing interrupt already did this */ +- local_irq_save(flags); + pagevec_move_tail(pvec); +- local_irq_restore(flags); + } ++ unlock_swap_pvec_irqrestore(swpvec, flags); + +- pvec = &per_cpu(lru_deactivate_file_pvecs, cpu); ++ swpvec = lock_swap_pvec_cpu(&lru_deactivate_file_pvecs, cpu); ++ pvec = &swpvec->pvec; + if (pagevec_count(pvec)) + pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL); ++ unlock_swap_pvec_cpu(swpvec); + +- pvec = &per_cpu(lru_deactivate_pvecs, cpu); ++ swpvec = lock_swap_pvec_cpu(&lru_deactivate_pvecs, cpu); ++ pvec = &swpvec->pvec; + if (pagevec_count(pvec)) + pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); ++ unlock_swap_pvec_cpu(swpvec); + +- pvec = &per_cpu(lru_lazyfree_pvecs, cpu); ++ swpvec = lock_swap_pvec_cpu(&lru_lazyfree_pvecs, cpu); ++ pvec = &swpvec->pvec; + if (pagevec_count(pvec)) + pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL); ++ unlock_swap_pvec_cpu(swpvec); + + activate_page_drain(cpu); + } +@@ -628,6 +738,9 @@ void lru_add_drain_cpu(int cpu) + */ + void deactivate_file_page(struct page *page) + { ++ struct swap_pagevec *swpvec; ++ struct pagevec *pvec; ++ + /* + * In a workload with many unevictable page such as mprotect, + * unevictable page deactivation for accelerating reclaim is pointless. +@@ -636,11 +749,12 @@ void deactivate_file_page(struct page *p + return; + + if (likely(get_page_unless_zero(page))) { +- struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs); ++ swpvec = lock_swap_pvec(&lru_deactivate_file_pvecs); ++ pvec = &swpvec->pvec; + + if (!pagevec_add(pvec, page) || PageCompound(page)) + pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL); +- put_cpu_var(lru_deactivate_file_pvecs); ++ unlock_swap_pvec(swpvec, &lru_deactivate_file_pvecs); + } + } + +@@ -655,12 +769,16 @@ void deactivate_file_page(struct page *p + void deactivate_page(struct page *page) + { + if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { +- struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs); ++ struct swap_pagevec *swpvec; ++ struct pagevec *pvec; ++ ++ swpvec = lock_swap_pvec(&lru_deactivate_pvecs); ++ pvec = &swpvec->pvec; + + get_page(page); + if (!pagevec_add(pvec, page) || PageCompound(page)) + pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); +- put_cpu_var(lru_deactivate_pvecs); ++ unlock_swap_pvec(swpvec, &lru_deactivate_pvecs); + } + } + +@@ -673,21 +791,29 @@ void deactivate_page(struct page *page) + */ + void mark_page_lazyfree(struct page *page) + { ++ struct swap_pagevec *swpvec; ++ struct pagevec *pvec; ++ + if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && + !PageSwapCache(page) && !PageUnevictable(page)) { +- struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs); ++ swpvec = lock_swap_pvec(&lru_lazyfree_pvecs); ++ pvec = &swpvec->pvec; + + get_page(page); + if (!pagevec_add(pvec, page) || PageCompound(page)) + pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL); +- put_cpu_var(lru_lazyfree_pvecs); ++ unlock_swap_pvec(swpvec, &lru_lazyfree_pvecs); + } + } + + void lru_add_drain(void) + { +- lru_add_drain_cpu(get_cpu()); +- put_cpu(); ++ if (static_branch_likely(&use_pvec_lock)) { ++ lru_add_drain_cpu(raw_smp_processor_id()); ++ } else { ++ lru_add_drain_cpu(get_cpu()); ++ put_cpu(); ++ } + } + + #ifdef CONFIG_SMP +@@ -725,11 +851,11 @@ void lru_add_drain_all(void) + for_each_online_cpu(cpu) { + struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); + +- if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) || +- pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) || +- pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) || +- pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || +- pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) || ++ if (pagevec_count(&per_cpu(lru_add_pvec.pvec, cpu)) || ++ pagevec_count(&per_cpu(lru_rotate_pvecs.pvec, cpu)) || ++ pagevec_count(&per_cpu(lru_deactivate_file_pvecs.pvec, cpu)) || ++ pagevec_count(&per_cpu(lru_deactivate_pvecs.pvec, cpu)) || ++ pagevec_count(&per_cpu(lru_lazyfree_pvecs.pvec, cpu)) || + need_activate_page_drain(cpu)) { + INIT_WORK(work, lru_add_drain_per_cpu); + queue_work_on(cpu, mm_percpu_wq, work); diff --git a/patches/0002-printk-rb-add-prb-locking-functions.patch b/patches/0002-printk-rb-add-prb-locking-functions.patch index 9ec09b8643b4..29e2b17ff94f 100644 --- a/patches/0002-printk-rb-add-prb-locking-functions.patch +++ b/patches/0002-printk-rb-add-prb-locking-functions.patch @@ -67,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior +#endif /*_LINUX_PRINTK_RINGBUFFER_H */ --- a/lib/Makefile +++ b/lib/Makefile -@@ -30,7 +30,7 @@ endif +@@ -26,7 +26,7 @@ endif lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o timerqueue.o xarray.o \ diff --git a/patches/0002-sched-swait-Add-swait_event_lock_irq.patch b/patches/0002-sched-swait-Add-swait_event_lock_irq.patch new file mode 100644 index 000000000000..9e24a802c289 --- /dev/null +++ b/patches/0002-sched-swait-Add-swait_event_lock_irq.patch @@ -0,0 +1,33 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 22 May 2019 12:42:26 +0200 +Subject: [PATCH 2/4] sched/swait: Add swait_event_lock_irq() + +The swait_event_lock_irq() is inspired by wait_event_lock_irq(). This is +required by the workqueue code once it switches to swait. + +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/swait.h | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +--- a/include/linux/swait.h ++++ b/include/linux/swait.h +@@ -297,4 +297,18 @@ do { \ + __ret; \ + }) + ++#define __swait_event_lock_irq(wq, condition, lock, cmd) \ ++ ___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \ ++ raw_spin_unlock_irq(&lock); \ ++ cmd; \ ++ schedule(); \ ++ raw_spin_lock_irq(&lock)) ++ ++#define swait_event_lock_irq(wq_head, condition, lock) \ ++ do { \ ++ if (condition) \ ++ break; \ ++ __swait_event_lock_irq(wq_head, condition, lock, ); \ ++ } while (0) ++ + #endif /* _LINUX_SWAIT_H */ diff --git a/patches/0003-cgroup-Remove-may_sleep-from-cgroup_rstat_flush_lock.patch b/patches/0003-cgroup-Remove-may_sleep-from-cgroup_rstat_flush_lock.patch new file mode 100644 index 000000000000..b5f81326bc18 --- /dev/null +++ b/patches/0003-cgroup-Remove-may_sleep-from-cgroup_rstat_flush_lock.patch @@ -0,0 +1,55 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 16 Aug 2019 12:25:35 +0200 +Subject: [PATCH 3/4] cgroup: Remove `may_sleep' from + cgroup_rstat_flush_locked() + +cgroup_rstat_flush_locked() is always invoked with `may_sleep' set to +true so that this case can be made default and the parameter removed. + +Remove the `may_sleep' parameter. + +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/cgroup/rstat.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +--- a/kernel/cgroup/rstat.c ++++ b/kernel/cgroup/rstat.c +@@ -149,7 +149,7 @@ static struct cgroup *cgroup_rstat_cpu_p + } + + /* see cgroup_rstat_flush() */ +-static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep) ++static void cgroup_rstat_flush_locked(struct cgroup *cgrp) + __releases(&cgroup_rstat_lock) __acquires(&cgroup_rstat_lock) + { + int cpu; +@@ -167,9 +167,7 @@ static void cgroup_rstat_flush_locked(st + + raw_spin_unlock(cpu_lock); + +- /* if @may_sleep, play nice and yield if necessary */ +- if (may_sleep && (need_resched() || +- spin_needbreak(&cgroup_rstat_lock))) { ++ if (need_resched() || spin_needbreak(&cgroup_rstat_lock)) { + spin_unlock_irq(&cgroup_rstat_lock); + if (!cond_resched()) + cpu_relax(); +@@ -196,7 +194,7 @@ void cgroup_rstat_flush(struct cgroup *c + might_sleep(); + + spin_lock_irq(&cgroup_rstat_lock); +- cgroup_rstat_flush_locked(cgrp, true); ++ cgroup_rstat_flush_locked(cgrp); + spin_unlock_irq(&cgroup_rstat_lock); + } + +@@ -214,7 +212,7 @@ static void cgroup_rstat_flush_hold(stru + { + might_sleep(); + spin_lock_irq(&cgroup_rstat_lock); +- cgroup_rstat_flush_locked(cgrp, true); ++ cgroup_rstat_flush_locked(cgrp); + } + + /** diff --git a/patches/0003-hrtimer-Prevent-using-hrtimer_grab_expiry_lock-on-mi.patch b/patches/0003-hrtimer-Prevent-using-hrtimer_grab_expiry_lock-on-mi.patch deleted file mode 100644 index e2d0d2242e24..000000000000 --- a/patches/0003-hrtimer-Prevent-using-hrtimer_grab_expiry_lock-on-mi.patch +++ /dev/null @@ -1,33 +0,0 @@ -From: Julien Grall -Date: Wed, 21 Aug 2019 10:24:09 +0100 -Subject: [PATCH 3/3] hrtimer: Prevent using hrtimer_grab_expiry_lock() on - migration_base - -As tglx puts it: -|If base == migration_base then there is no point to lock soft_expiry_lock -|simply because the timer is not executing the callback in soft irq context -|and the whole lock/unlock dance can be avoided. - -Furthermore, all the path leading to hrtimer_grab_expiry_lock() assumes -timer->base and timer->base->cpu_base are always non-NULL. So it is safe -to remove the NULL checks here. - -Signed-off-by: Julien Grall -Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1908211557420.2223@nanos.tec.linutronix.de -[bigeasy: rewrite changelog] -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/time/hrtimer.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/time/hrtimer.c -+++ b/kernel/time/hrtimer.c -@@ -934,7 +934,7 @@ void hrtimer_grab_expiry_lock(const stru - { - struct hrtimer_clock_base *base = READ_ONCE(timer->base); - -- if (timer->is_soft && base && base->cpu_base) { -+ if (timer->is_soft && base != &migration_base) { - spin_lock(&base->cpu_base->softirq_expiry_lock); - spin_unlock(&base->cpu_base->softirq_expiry_lock); - } diff --git a/patches/0003-jbd2-Move-dropping-of-jh-reference-out-of-un-re-fili.patch b/patches/0003-jbd2-Move-dropping-of-jh-reference-out-of-un-re-fili.patch new file mode 100644 index 000000000000..82cb224af25f --- /dev/null +++ b/patches/0003-jbd2-Move-dropping-of-jh-reference-out-of-un-re-fili.patch @@ -0,0 +1,150 @@ +From: Jan Kara +Date: Fri, 9 Aug 2019 14:42:29 +0200 +Subject: [PATCH 3/7] jbd2: Move dropping of jh reference out of un/re-filing + functions + +__jbd2_journal_unfile_buffer() and __jbd2_journal_refile_buffer() drop +transaction's jh reference when they remove jh from a transaction. This +will be however inconvenient once we move state lock into journal_head +itself as we still need to unlock it and we'd need to grab jh reference +just for that. Move dropping of jh reference out of these functions into +the few callers. + +Signed-off-by: Jan Kara +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/jbd2/commit.c | 5 ++++- + fs/jbd2/transaction.c | 23 +++++++++++++++-------- + include/linux/jbd2.h | 2 +- + 3 files changed, 20 insertions(+), 10 deletions(-) + +--- a/fs/jbd2/commit.c ++++ b/fs/jbd2/commit.c +@@ -918,6 +918,7 @@ void jbd2_journal_commit_transaction(jou + transaction_t *cp_transaction; + struct buffer_head *bh; + int try_to_free = 0; ++ bool drop_ref; + + jh = commit_transaction->t_forget; + spin_unlock(&journal->j_list_lock); +@@ -1022,8 +1023,10 @@ void jbd2_journal_commit_transaction(jou + try_to_free = 1; + } + JBUFFER_TRACE(jh, "refile or unfile buffer"); +- __jbd2_journal_refile_buffer(jh); ++ drop_ref = __jbd2_journal_refile_buffer(jh); + jbd_unlock_bh_state(bh); ++ if (drop_ref) ++ jbd2_journal_put_journal_head(jh); + if (try_to_free) + release_buffer_page(bh); /* Drops bh reference */ + else +--- a/fs/jbd2/transaction.c ++++ b/fs/jbd2/transaction.c +@@ -1598,6 +1598,7 @@ int jbd2_journal_forget (handle_t *handl + __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); + } else { + __jbd2_journal_unfile_buffer(jh); ++ jbd2_journal_put_journal_head(jh); + if (!buffer_jbd(bh)) { + spin_unlock(&journal->j_list_lock); + goto not_jbd; +@@ -1971,17 +1972,15 @@ static void __jbd2_journal_temp_unlink_b + } + + /* +- * Remove buffer from all transactions. ++ * Remove buffer from all transactions. The caller is responsible for dropping ++ * the jh reference that belonged to the transaction. + * + * Called with bh_state lock and j_list_lock +- * +- * jh and bh may be already freed when this function returns. + */ + static void __jbd2_journal_unfile_buffer(struct journal_head *jh) + { + __jbd2_journal_temp_unlink_buffer(jh); + jh->b_transaction = NULL; +- jbd2_journal_put_journal_head(jh); + } + + void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) +@@ -1995,6 +1994,7 @@ void jbd2_journal_unfile_buffer(journal_ + __jbd2_journal_unfile_buffer(jh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); ++ jbd2_journal_put_journal_head(jh); + __brelse(bh); + } + +@@ -2133,6 +2133,7 @@ static int __dispose_buffer(struct journ + } else { + JBUFFER_TRACE(jh, "on running transaction"); + __jbd2_journal_unfile_buffer(jh); ++ jbd2_journal_put_journal_head(jh); + } + return may_free; + } +@@ -2496,9 +2497,11 @@ void jbd2_journal_file_buffer(struct jou + * Called under j_list_lock + * Called under jbd_lock_bh_state(jh2bh(jh)) + * +- * jh and bh may be already free when this function returns ++ * When this function returns true, there's no next transaction to refile to ++ * and the caller has to drop jh reference through ++ * jbd2_journal_put_journal_head(). + */ +-void __jbd2_journal_refile_buffer(struct journal_head *jh) ++bool __jbd2_journal_refile_buffer(struct journal_head *jh) + { + int was_dirty, jlist; + struct buffer_head *bh = jh2bh(jh); +@@ -2510,7 +2513,7 @@ void __jbd2_journal_refile_buffer(struct + /* If the buffer is now unused, just drop it. */ + if (jh->b_next_transaction == NULL) { + __jbd2_journal_unfile_buffer(jh); +- return; ++ return true; + } + + /* +@@ -2538,6 +2541,7 @@ void __jbd2_journal_refile_buffer(struct + + if (was_dirty) + set_buffer_jbddirty(bh); ++ return false; + } + + /* +@@ -2549,15 +2553,18 @@ void __jbd2_journal_refile_buffer(struct + void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) + { + struct buffer_head *bh = jh2bh(jh); ++ bool drop; + + /* Get reference so that buffer cannot be freed before we unlock it */ + get_bh(bh); + jbd_lock_bh_state(bh); + spin_lock(&journal->j_list_lock); +- __jbd2_journal_refile_buffer(jh); ++ drop = __jbd2_journal_refile_buffer(jh); + jbd_unlock_bh_state(bh); + spin_unlock(&journal->j_list_lock); + __brelse(bh); ++ if (drop) ++ jbd2_journal_put_journal_head(jh); + } + + /* +--- a/include/linux/jbd2.h ++++ b/include/linux/jbd2.h +@@ -1252,7 +1252,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM + + /* Filing buffers */ + extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *); +-extern void __jbd2_journal_refile_buffer(struct journal_head *); ++extern bool __jbd2_journal_refile_buffer(struct journal_head *); + extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *); + extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); + extern void __journal_free_buffer(struct journal_head *bh); diff --git a/patches/0003-mm-SLxB-change-list_lock-to-raw_spinlock_t.patch b/patches/0003-mm-SLxB-change-list_lock-to-raw_spinlock_t.patch index 355f2de0ac87..aa5ae3a17236 100644 --- a/patches/0003-mm-SLxB-change-list_lock-to-raw_spinlock_t.patch +++ b/patches/0003-mm-SLxB-change-list_lock-to-raw_spinlock_t.patch @@ -27,7 +27,7 @@ Signed-off-by: Sebastian Andrzej Siewior parent->free_objects = 0; parent->free_touched = 0; } -@@ -564,9 +564,9 @@ static noinline void cache_free_pfmemall +@@ -558,9 +558,9 @@ static noinline void cache_free_pfmemall page_node = page_to_nid(page); n = get_node(cachep, page_node); @@ -39,7 +39,7 @@ Signed-off-by: Sebastian Andrzej Siewior slabs_destroy(cachep, &list); } -@@ -694,7 +694,7 @@ static void __drain_alien_cache(struct k +@@ -688,7 +688,7 @@ static void __drain_alien_cache(struct k struct kmem_cache_node *n = get_node(cachep, node); if (ac->avail) { @@ -48,7 +48,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Stuff objects into the remote nodes shared array first. * That way we could avoid the overhead of putting the objects -@@ -705,7 +705,7 @@ static void __drain_alien_cache(struct k +@@ -699,7 +699,7 @@ static void __drain_alien_cache(struct k free_block(cachep, ac->entry, ac->avail, node, list); ac->avail = 0; @@ -57,7 +57,7 @@ Signed-off-by: Sebastian Andrzej Siewior } } -@@ -778,9 +778,9 @@ static int __cache_free_alien(struct kme +@@ -772,9 +772,9 @@ static int __cache_free_alien(struct kme slabs_destroy(cachep, &list); } else { n = get_node(cachep, page_node); @@ -69,7 +69,7 @@ Signed-off-by: Sebastian Andrzej Siewior slabs_destroy(cachep, &list); } return 1; -@@ -821,10 +821,10 @@ static int init_cache_node(struct kmem_c +@@ -815,10 +815,10 @@ static int init_cache_node(struct kmem_c */ n = get_node(cachep, node); if (n) { @@ -82,7 +82,7 @@ Signed-off-by: Sebastian Andrzej Siewior return 0; } -@@ -903,7 +903,7 @@ static int setup_kmem_cache_node(struct +@@ -897,7 +897,7 @@ static int setup_kmem_cache_node(struct goto fail; n = get_node(cachep, node); @@ -91,7 +91,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (n->shared && force_change) { free_block(cachep, n->shared->entry, n->shared->avail, node, &list); -@@ -921,7 +921,7 @@ static int setup_kmem_cache_node(struct +@@ -915,7 +915,7 @@ static int setup_kmem_cache_node(struct new_alien = NULL; } @@ -100,7 +100,7 @@ Signed-off-by: Sebastian Andrzej Siewior slabs_destroy(cachep, &list); /* -@@ -960,7 +960,7 @@ static void cpuup_canceled(long cpu) +@@ -954,7 +954,7 @@ static void cpuup_canceled(long cpu) if (!n) continue; @@ -109,7 +109,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Free limit for this kmem_cache_node */ n->free_limit -= cachep->batchcount; -@@ -971,7 +971,7 @@ static void cpuup_canceled(long cpu) +@@ -965,7 +965,7 @@ static void cpuup_canceled(long cpu) nc->avail = 0; if (!cpumask_empty(mask)) { @@ -118,7 +118,7 @@ Signed-off-by: Sebastian Andrzej Siewior goto free_slab; } -@@ -985,7 +985,7 @@ static void cpuup_canceled(long cpu) +@@ -979,7 +979,7 @@ static void cpuup_canceled(long cpu) alien = n->alien; n->alien = NULL; @@ -127,7 +127,7 @@ Signed-off-by: Sebastian Andrzej Siewior kfree(shared); if (alien) { -@@ -1169,7 +1169,7 @@ static void __init init_list(struct kmem +@@ -1163,7 +1163,7 @@ static void __init init_list(struct kmem /* * Do not assume that spinlocks can be initialized via memcpy: */ @@ -136,7 +136,7 @@ Signed-off-by: Sebastian Andrzej Siewior MAKE_ALL_LISTS(cachep, ptr, nodeid); cachep->node[nodeid] = ptr; -@@ -1340,11 +1340,11 @@ slab_out_of_memory(struct kmem_cache *ca +@@ -1334,11 +1334,11 @@ slab_out_of_memory(struct kmem_cache *ca for_each_kmem_cache_node(cachep, node, n) { unsigned long total_slabs, free_slabs, free_objs; @@ -150,7 +150,7 @@ Signed-off-by: Sebastian Andrzej Siewior pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld\n", node, total_slabs - free_slabs, total_slabs, -@@ -2107,7 +2107,7 @@ static void check_spinlock_acquired(stru +@@ -2096,7 +2096,7 @@ static void check_spinlock_acquired(stru { #ifdef CONFIG_SMP check_irq_off(); @@ -159,7 +159,7 @@ Signed-off-by: Sebastian Andrzej Siewior #endif } -@@ -2115,7 +2115,7 @@ static void check_spinlock_acquired_node +@@ -2104,7 +2104,7 @@ static void check_spinlock_acquired_node { #ifdef CONFIG_SMP check_irq_off(); @@ -168,7 +168,7 @@ Signed-off-by: Sebastian Andrzej Siewior #endif } -@@ -2155,9 +2155,9 @@ static void do_drain(void *arg) +@@ -2144,9 +2144,9 @@ static void do_drain(void *arg) check_irq_off(); ac = cpu_cache_get(cachep); n = get_node(cachep, node); @@ -180,7 +180,7 @@ Signed-off-by: Sebastian Andrzej Siewior slabs_destroy(cachep, &list); ac->avail = 0; } -@@ -2175,9 +2175,9 @@ static void drain_cpu_caches(struct kmem +@@ -2164,9 +2164,9 @@ static void drain_cpu_caches(struct kmem drain_alien_cache(cachep, n->alien); for_each_kmem_cache_node(cachep, node, n) { @@ -192,7 +192,7 @@ Signed-off-by: Sebastian Andrzej Siewior slabs_destroy(cachep, &list); } -@@ -2199,10 +2199,10 @@ static int drain_freelist(struct kmem_ca +@@ -2188,10 +2188,10 @@ static int drain_freelist(struct kmem_ca nr_freed = 0; while (nr_freed < tofree && !list_empty(&n->slabs_free)) { @@ -205,7 +205,7 @@ Signed-off-by: Sebastian Andrzej Siewior goto out; } -@@ -2215,7 +2215,7 @@ static int drain_freelist(struct kmem_ca +@@ -2204,7 +2204,7 @@ static int drain_freelist(struct kmem_ca * to the cache. */ n->free_objects -= cache->num; @@ -214,7 +214,7 @@ Signed-off-by: Sebastian Andrzej Siewior slab_destroy(cache, page); nr_freed++; } -@@ -2664,7 +2664,7 @@ static void cache_grow_end(struct kmem_c +@@ -2657,7 +2657,7 @@ static void cache_grow_end(struct kmem_c INIT_LIST_HEAD(&page->slab_list); n = get_node(cachep, page_to_nid(page)); @@ -223,7 +223,7 @@ Signed-off-by: Sebastian Andrzej Siewior n->total_slabs++; if (!page->active) { list_add_tail(&page->slab_list, &n->slabs_free); -@@ -2674,7 +2674,7 @@ static void cache_grow_end(struct kmem_c +@@ -2667,7 +2667,7 @@ static void cache_grow_end(struct kmem_c STATS_INC_GROWN(cachep); n->free_objects += cachep->num - page->active; @@ -232,7 +232,7 @@ Signed-off-by: Sebastian Andrzej Siewior fixup_objfreelist_debug(cachep, &list); } -@@ -2840,7 +2840,7 @@ static struct page *get_first_slab(struc +@@ -2833,7 +2833,7 @@ static struct page *get_first_slab(struc { struct page *page; @@ -241,7 +241,7 @@ Signed-off-by: Sebastian Andrzej Siewior page = list_first_entry_or_null(&n->slabs_partial, struct page, slab_list); if (!page) { -@@ -2867,10 +2867,10 @@ static noinline void *cache_alloc_pfmema +@@ -2860,10 +2860,10 @@ static noinline void *cache_alloc_pfmema if (!gfp_pfmemalloc_allowed(flags)) return NULL; @@ -254,7 +254,7 @@ Signed-off-by: Sebastian Andrzej Siewior return NULL; } -@@ -2879,7 +2879,7 @@ static noinline void *cache_alloc_pfmema +@@ -2872,7 +2872,7 @@ static noinline void *cache_alloc_pfmema fixup_slab_list(cachep, n, page, &list); @@ -263,7 +263,7 @@ Signed-off-by: Sebastian Andrzej Siewior fixup_objfreelist_debug(cachep, &list); return obj; -@@ -2938,7 +2938,7 @@ static void *cache_alloc_refill(struct k +@@ -2931,7 +2931,7 @@ static void *cache_alloc_refill(struct k if (!n->free_objects && (!shared || !shared->avail)) goto direct_grow; @@ -272,7 +272,7 @@ Signed-off-by: Sebastian Andrzej Siewior shared = READ_ONCE(n->shared); /* See if we can refill from the shared array */ -@@ -2962,7 +2962,7 @@ static void *cache_alloc_refill(struct k +@@ -2955,7 +2955,7 @@ static void *cache_alloc_refill(struct k must_grow: n->free_objects -= ac->avail; alloc_done: @@ -281,7 +281,7 @@ Signed-off-by: Sebastian Andrzej Siewior fixup_objfreelist_debug(cachep, &list); direct_grow: -@@ -3187,7 +3187,7 @@ static void *____cache_alloc_node(struct +@@ -3180,7 +3180,7 @@ static void *____cache_alloc_node(struct BUG_ON(!n); check_irq_off(); @@ -290,7 +290,7 @@ Signed-off-by: Sebastian Andrzej Siewior page = get_first_slab(n, false); if (!page) goto must_grow; -@@ -3205,12 +3205,12 @@ static void *____cache_alloc_node(struct +@@ -3198,12 +3198,12 @@ static void *____cache_alloc_node(struct fixup_slab_list(cachep, n, page, &list); @@ -305,7 +305,7 @@ Signed-off-by: Sebastian Andrzej Siewior page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid); if (page) { /* This slab isn't counted yet so don't update free_objects */ -@@ -3386,7 +3386,7 @@ static void cache_flusharray(struct kmem +@@ -3379,7 +3379,7 @@ static void cache_flusharray(struct kmem check_irq_off(); n = get_node(cachep, node); @@ -314,7 +314,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (n->shared) { struct array_cache *shared_array = n->shared; int max = shared_array->limit - shared_array->avail; -@@ -3415,7 +3415,7 @@ static void cache_flusharray(struct kmem +@@ -3408,7 +3408,7 @@ static void cache_flusharray(struct kmem STATS_SET_FREEABLE(cachep, i); } #endif @@ -323,7 +323,7 @@ Signed-off-by: Sebastian Andrzej Siewior slabs_destroy(cachep, &list); ac->avail -= batchcount; memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); -@@ -3829,9 +3829,9 @@ static int __do_tune_cpucache(struct kme +@@ -3830,9 +3830,9 @@ static int __do_tune_cpucache(struct kme node = cpu_to_mem(cpu); n = get_node(cachep, node); @@ -335,7 +335,7 @@ Signed-off-by: Sebastian Andrzej Siewior slabs_destroy(cachep, &list); } free_percpu(prev); -@@ -3956,9 +3956,9 @@ static void drain_array(struct kmem_cach +@@ -3957,9 +3957,9 @@ static void drain_array(struct kmem_cach return; } @@ -347,7 +347,7 @@ Signed-off-by: Sebastian Andrzej Siewior slabs_destroy(cachep, &list); } -@@ -4042,7 +4042,7 @@ void get_slabinfo(struct kmem_cache *cac +@@ -4043,7 +4043,7 @@ void get_slabinfo(struct kmem_cache *cac for_each_kmem_cache_node(cachep, node, n) { check_irq_on(); @@ -356,7 +356,7 @@ Signed-off-by: Sebastian Andrzej Siewior total_slabs += n->total_slabs; free_slabs += n->free_slabs; -@@ -4051,7 +4051,7 @@ void get_slabinfo(struct kmem_cache *cac +@@ -4052,7 +4052,7 @@ void get_slabinfo(struct kmem_cache *cac if (n->shared) shared_avail += n->shared->avail; @@ -367,7 +367,7 @@ Signed-off-by: Sebastian Andrzej Siewior active_slabs = total_slabs - free_slabs; --- a/mm/slab.h +++ b/mm/slab.h -@@ -449,7 +449,7 @@ static inline void slab_post_alloc_hook( +@@ -596,7 +596,7 @@ static inline void slab_post_alloc_hook( * The slab lists for all objects. */ struct kmem_cache_node { @@ -378,7 +378,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct list_head slabs_partial; /* partial list first, better asm code */ --- a/mm/slub.c +++ b/mm/slub.c -@@ -1175,7 +1175,7 @@ static noinline int free_debug_processin +@@ -1176,7 +1176,7 @@ static noinline int free_debug_processin unsigned long uninitialized_var(flags); int ret = 0; @@ -387,7 +387,7 @@ Signed-off-by: Sebastian Andrzej Siewior slab_lock(page); if (s->flags & SLAB_CONSISTENCY_CHECKS) { -@@ -1210,7 +1210,7 @@ static noinline int free_debug_processin +@@ -1211,7 +1211,7 @@ static noinline int free_debug_processin bulk_cnt, cnt); slab_unlock(page); @@ -396,7 +396,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!ret) slab_fix(s, "Object at 0x%p not freed", object); return ret; -@@ -1854,7 +1854,7 @@ static void *get_partial_node(struct kme +@@ -1849,7 +1849,7 @@ static void *get_partial_node(struct kme if (!n || !n->nr_partial) return NULL; @@ -405,7 +405,7 @@ Signed-off-by: Sebastian Andrzej Siewior list_for_each_entry_safe(page, page2, &n->partial, slab_list) { void *t; -@@ -1879,7 +1879,7 @@ static void *get_partial_node(struct kme +@@ -1874,7 +1874,7 @@ static void *get_partial_node(struct kme break; } @@ -414,7 +414,7 @@ Signed-off-by: Sebastian Andrzej Siewior return object; } -@@ -2125,7 +2125,7 @@ static void deactivate_slab(struct kmem_ +@@ -2122,7 +2122,7 @@ static void deactivate_slab(struct kmem_ * that acquire_slab() will see a slab page that * is frozen */ @@ -423,7 +423,7 @@ Signed-off-by: Sebastian Andrzej Siewior } } else { m = M_FULL; -@@ -2136,7 +2136,7 @@ static void deactivate_slab(struct kmem_ +@@ -2133,7 +2133,7 @@ static void deactivate_slab(struct kmem_ * slabs from diagnostic functions will not see * any frozen slabs. */ @@ -432,7 +432,7 @@ Signed-off-by: Sebastian Andrzej Siewior } } -@@ -2160,7 +2160,7 @@ static void deactivate_slab(struct kmem_ +@@ -2157,7 +2157,7 @@ static void deactivate_slab(struct kmem_ goto redo; if (lock) @@ -441,7 +441,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (m == M_PARTIAL) stat(s, tail); -@@ -2199,10 +2199,10 @@ static void unfreeze_partials(struct kme +@@ -2196,10 +2196,10 @@ static void unfreeze_partials(struct kme n2 = get_node(s, page_to_nid(page)); if (n != n2) { if (n) @@ -454,7 +454,7 @@ Signed-off-by: Sebastian Andrzej Siewior } do { -@@ -2231,7 +2231,7 @@ static void unfreeze_partials(struct kme +@@ -2228,7 +2228,7 @@ static void unfreeze_partials(struct kme } if (n) @@ -463,7 +463,7 @@ Signed-off-by: Sebastian Andrzej Siewior while (discard_page) { page = discard_page; -@@ -2398,10 +2398,10 @@ static unsigned long count_partial(struc +@@ -2395,10 +2395,10 @@ static unsigned long count_partial(struc unsigned long x = 0; struct page *page; @@ -476,7 +476,7 @@ Signed-off-by: Sebastian Andrzej Siewior return x; } #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */ -@@ -2835,7 +2835,7 @@ static void __slab_free(struct kmem_cach +@@ -2845,7 +2845,7 @@ static void __slab_free(struct kmem_cach do { if (unlikely(n)) { @@ -485,7 +485,7 @@ Signed-off-by: Sebastian Andrzej Siewior n = NULL; } prior = page->freelist; -@@ -2867,7 +2867,7 @@ static void __slab_free(struct kmem_cach +@@ -2877,7 +2877,7 @@ static void __slab_free(struct kmem_cach * Otherwise the list_lock will synchronize with * other processors updating the list of slabs. */ @@ -494,7 +494,7 @@ Signed-off-by: Sebastian Andrzej Siewior } } -@@ -2908,7 +2908,7 @@ static void __slab_free(struct kmem_cach +@@ -2918,7 +2918,7 @@ static void __slab_free(struct kmem_cach add_partial(n, page, DEACTIVATE_TO_TAIL); stat(s, FREE_ADD_PARTIAL); } @@ -503,7 +503,7 @@ Signed-off-by: Sebastian Andrzej Siewior return; slab_empty: -@@ -2923,7 +2923,7 @@ static void __slab_free(struct kmem_cach +@@ -2933,7 +2933,7 @@ static void __slab_free(struct kmem_cach remove_full(s, n, page); } @@ -512,7 +512,7 @@ Signed-off-by: Sebastian Andrzej Siewior stat(s, FREE_SLAB); discard_slab(s, page); } -@@ -3310,7 +3310,7 @@ static void +@@ -3323,7 +3323,7 @@ static void init_kmem_cache_node(struct kmem_cache_node *n) { n->nr_partial = 0; @@ -521,7 +521,7 @@ Signed-off-by: Sebastian Andrzej Siewior INIT_LIST_HEAD(&n->partial); #ifdef CONFIG_SLUB_DEBUG atomic_long_set(&n->nr_slabs, 0); -@@ -3695,7 +3695,7 @@ static void free_partial(struct kmem_cac +@@ -3704,7 +3704,7 @@ static void free_partial(struct kmem_cac struct page *page, *h; BUG_ON(irqs_disabled()); @@ -530,7 +530,7 @@ Signed-off-by: Sebastian Andrzej Siewior list_for_each_entry_safe(page, h, &n->partial, slab_list) { if (!page->inuse) { remove_partial(n, page); -@@ -3705,7 +3705,7 @@ static void free_partial(struct kmem_cac +@@ -3714,7 +3714,7 @@ static void free_partial(struct kmem_cac "Objects remaining in %s on __kmem_cache_shutdown()"); } } @@ -539,7 +539,7 @@ Signed-off-by: Sebastian Andrzej Siewior list_for_each_entry_safe(page, h, &discard, slab_list) discard_slab(s, page); -@@ -3979,7 +3979,7 @@ int __kmem_cache_shrink(struct kmem_cach +@@ -3986,7 +3986,7 @@ int __kmem_cache_shrink(struct kmem_cach for (i = 0; i < SHRINK_PROMOTE_MAX; i++) INIT_LIST_HEAD(promote + i); @@ -548,7 +548,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Build lists of slabs to discard or promote. -@@ -4010,7 +4010,7 @@ int __kmem_cache_shrink(struct kmem_cach +@@ -4017,7 +4017,7 @@ int __kmem_cache_shrink(struct kmem_cach for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--) list_splice(promote + i, &n->partial); @@ -557,7 +557,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Release empty slabs */ list_for_each_entry_safe(page, t, &discard, slab_list) -@@ -4424,7 +4424,7 @@ static int validate_slab_node(struct kme +@@ -4425,7 +4425,7 @@ static int validate_slab_node(struct kme struct page *page; unsigned long flags; @@ -566,7 +566,7 @@ Signed-off-by: Sebastian Andrzej Siewior list_for_each_entry(page, &n->partial, slab_list) { validate_slab_slab(s, page, map); -@@ -4446,7 +4446,7 @@ static int validate_slab_node(struct kme +@@ -4447,7 +4447,7 @@ static int validate_slab_node(struct kme s->name, count, atomic_long_read(&n->nr_slabs)); out: @@ -575,7 +575,7 @@ Signed-off-by: Sebastian Andrzej Siewior return count; } -@@ -4632,12 +4632,12 @@ static int list_locations(struct kmem_ca +@@ -4633,12 +4633,12 @@ static int list_locations(struct kmem_ca if (!atomic_long_read(&n->nr_slabs)) continue; diff --git a/patches/0003-mm-swap-Access-struct-pagevec-remotely.patch b/patches/0003-mm-swap-Access-struct-pagevec-remotely.patch new file mode 100644 index 000000000000..e0bac5c753a6 --- /dev/null +++ b/patches/0003-mm-swap-Access-struct-pagevec-remotely.patch @@ -0,0 +1,136 @@ +From: Thomas Gleixner +Date: Thu, 18 Apr 2019 11:09:06 +0200 +Subject: [PATCH 3/4] mm/swap: Access struct pagevec remotely + +When the newly introduced static key would be enabled, struct pagevec is +locked during access. So it is possible to access it from a remote CPU. The +advantage is that the work can be done from the "requesting" CPU without +firing a worker on a remote CPU and waiting for it to complete the work. + +No functional change because static key is not enabled. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + mm/page_alloc.c | 19 ++++++++------ + mm/swap.c | 75 +++++++++++++++++++++++++++++++++----------------------- + 2 files changed, 57 insertions(+), 37 deletions(-) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -2999,15 +2999,20 @@ void drain_all_pages(struct zone *zone) + cpumask_clear_cpu(cpu, &cpus_with_pcps); + } + +- for_each_cpu(cpu, &cpus_with_pcps) { +- struct pcpu_drain *drain = per_cpu_ptr(&pcpu_drain, cpu); ++ if (static_branch_likely(&use_pvec_lock)) { ++ for_each_cpu(cpu, &cpus_with_pcps) ++ drain_cpu_pages(cpu, zone); ++ } else { ++ for_each_cpu(cpu, &cpus_with_pcps) { ++ struct pcpu_drain *drain = per_cpu_ptr(&pcpu_drain, cpu); + +- drain->zone = zone; +- INIT_WORK(&drain->work, drain_local_pages_wq); +- queue_work_on(cpu, mm_percpu_wq, &drain->work); ++ drain->zone = zone; ++ INIT_WORK(&drain->work, drain_local_pages_wq); ++ queue_work_on(cpu, mm_percpu_wq, &drain->work); ++ } ++ for_each_cpu(cpu, &cpus_with_pcps) ++ flush_work(&per_cpu_ptr(&pcpu_drain, cpu)->work); + } +- for_each_cpu(cpu, &cpus_with_pcps) +- flush_work(&per_cpu_ptr(&pcpu_drain, cpu)->work); + + mutex_unlock(&pcpu_drain_mutex); + } +--- a/mm/swap.c ++++ b/mm/swap.c +@@ -834,39 +834,54 @@ static void lru_add_drain_per_cpu(struct + */ + void lru_add_drain_all(void) + { +- static DEFINE_MUTEX(lock); +- static struct cpumask has_work; +- int cpu; +- +- /* +- * Make sure nobody triggers this path before mm_percpu_wq is fully +- * initialized. +- */ +- if (WARN_ON(!mm_percpu_wq)) +- return; +- +- mutex_lock(&lock); +- cpumask_clear(&has_work); +- +- for_each_online_cpu(cpu) { +- struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); +- +- if (pagevec_count(&per_cpu(lru_add_pvec.pvec, cpu)) || +- pagevec_count(&per_cpu(lru_rotate_pvecs.pvec, cpu)) || +- pagevec_count(&per_cpu(lru_deactivate_file_pvecs.pvec, cpu)) || +- pagevec_count(&per_cpu(lru_deactivate_pvecs.pvec, cpu)) || +- pagevec_count(&per_cpu(lru_lazyfree_pvecs.pvec, cpu)) || +- need_activate_page_drain(cpu)) { +- INIT_WORK(work, lru_add_drain_per_cpu); +- queue_work_on(cpu, mm_percpu_wq, work); +- cpumask_set_cpu(cpu, &has_work); ++ if (static_branch_likely(&use_pvec_lock)) { ++ int cpu; ++ ++ for_each_online_cpu(cpu) { ++ if (pagevec_count(&per_cpu(lru_add_pvec.pvec, cpu)) || ++ pagevec_count(&per_cpu(lru_rotate_pvecs.pvec, cpu)) || ++ pagevec_count(&per_cpu(lru_deactivate_file_pvecs.pvec, cpu)) || ++ pagevec_count(&per_cpu(lru_deactivate_pvecs.pvec, cpu)) || ++ pagevec_count(&per_cpu(lru_lazyfree_pvecs.pvec, cpu)) || ++ need_activate_page_drain(cpu)) { ++ lru_add_drain_cpu(cpu); ++ } ++ } ++ } else { ++ static DEFINE_MUTEX(lock); ++ static struct cpumask has_work; ++ int cpu; ++ ++ /* ++ * Make sure nobody triggers this path before mm_percpu_wq ++ * is fully initialized. ++ */ ++ if (WARN_ON(!mm_percpu_wq)) ++ return; ++ ++ mutex_lock(&lock); ++ cpumask_clear(&has_work); ++ ++ for_each_online_cpu(cpu) { ++ struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); ++ ++ if (pagevec_count(&per_cpu(lru_add_pvec.pvec, cpu)) || ++ pagevec_count(&per_cpu(lru_rotate_pvecs.pvec, cpu)) || ++ pagevec_count(&per_cpu(lru_deactivate_file_pvecs.pvec, cpu)) || ++ pagevec_count(&per_cpu(lru_deactivate_pvecs.pvec, cpu)) || ++ pagevec_count(&per_cpu(lru_lazyfree_pvecs.pvec, cpu)) || ++ need_activate_page_drain(cpu)) { ++ INIT_WORK(work, lru_add_drain_per_cpu); ++ queue_work_on(cpu, mm_percpu_wq, work); ++ cpumask_set_cpu(cpu, &has_work); ++ } + } +- } + +- for_each_cpu(cpu, &has_work) +- flush_work(&per_cpu(lru_add_drain_work, cpu)); ++ for_each_cpu(cpu, &has_work) ++ flush_work(&per_cpu(lru_add_drain_work, cpu)); + +- mutex_unlock(&lock); ++ mutex_unlock(&lock); ++ } + } + #else + void lru_add_drain_all(void) diff --git a/patches/0003-workqueue-Use-swait-for-wq_manager_wait.patch b/patches/0003-workqueue-Use-swait-for-wq_manager_wait.patch new file mode 100644 index 000000000000..cacead32defd --- /dev/null +++ b/patches/0003-workqueue-Use-swait-for-wq_manager_wait.patch @@ -0,0 +1,53 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 11 Jun 2019 11:21:09 +0200 +Subject: [PATCH 3/4] workqueue: Use swait for wq_manager_wait + +In order for the workqueue code use raw_spinlock_t typed locking there +must not be a spinlock_t typed lock be acquired. A wait_queue_head uses +a spinlock_t lock for its list protection. + +Use a swait based queue head to avoid raw_spinlock_t -> spinlock_t +locking. + +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/workqueue.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -50,6 +50,7 @@ + #include + #include + #include ++#include + + #include "workqueue_internal.h" + +@@ -301,7 +302,7 @@ static struct workqueue_attrs *wq_update + static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */ + static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */ + static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ +-static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */ ++static DECLARE_SWAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */ + + static LIST_HEAD(workqueues); /* PR: list of all workqueues */ + static bool workqueue_freezing; /* PL: have wqs started freezing? */ +@@ -2137,7 +2138,7 @@ static bool manage_workers(struct worker + + pool->manager = NULL; + pool->flags &= ~POOL_MANAGER_ACTIVE; +- wake_up(&wq_manager_wait); ++ swake_up_one(&wq_manager_wait); + return true; + } + +@@ -3532,7 +3533,7 @@ static void put_unbound_pool(struct work + * manager and @pool gets freed with the flag set. + */ + spin_lock_irq(&pool->lock); +- wait_event_lock_irq(wq_manager_wait, ++ swait_event_lock_irq(wq_manager_wait, + !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock); + pool->flags |= POOL_MANAGER_ACTIVE; + diff --git a/patches/0004-cgroup-Acquire-cgroup_rstat_lock-with-enabled-interr.patch b/patches/0004-cgroup-Acquire-cgroup_rstat_lock-with-enabled-interr.patch new file mode 100644 index 000000000000..0262605df378 --- /dev/null +++ b/patches/0004-cgroup-Acquire-cgroup_rstat_lock-with-enabled-interr.patch @@ -0,0 +1,71 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 16 Aug 2019 12:49:36 +0200 +Subject: [PATCH 4/4] cgroup: Acquire cgroup_rstat_lock with enabled interrupts + +There is no need to disable interrupts while cgroup_rstat_lock is +acquired. The lock is never used in-IRQ context so a simple spin_lock() +is enough for synchronisation purpose. + +Acquire cgroup_rstat_lock without disabling interrupts and ensure that +cgroup_rstat_cpu_lock is acquired with disabled interrupts (this one is +acquired in-IRQ context). + +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/cgroup/rstat.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +--- a/kernel/cgroup/rstat.c ++++ b/kernel/cgroup/rstat.c +@@ -161,17 +161,17 @@ static void cgroup_rstat_flush_locked(st + cpu); + struct cgroup *pos = NULL; + +- raw_spin_lock(cpu_lock); ++ raw_spin_lock_irq(cpu_lock); + while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) + cgroup_base_stat_flush(pos, cpu); + +- raw_spin_unlock(cpu_lock); ++ raw_spin_unlock_irq(cpu_lock); + + if (need_resched() || spin_needbreak(&cgroup_rstat_lock)) { +- spin_unlock_irq(&cgroup_rstat_lock); ++ spin_unlock(&cgroup_rstat_lock); + if (!cond_resched()) + cpu_relax(); +- spin_lock_irq(&cgroup_rstat_lock); ++ spin_lock(&cgroup_rstat_lock); + } + } + } +@@ -193,9 +193,9 @@ void cgroup_rstat_flush(struct cgroup *c + { + might_sleep(); + +- spin_lock_irq(&cgroup_rstat_lock); ++ spin_lock(&cgroup_rstat_lock); + cgroup_rstat_flush_locked(cgrp); +- spin_unlock_irq(&cgroup_rstat_lock); ++ spin_unlock(&cgroup_rstat_lock); + } + + /** +@@ -211,7 +211,7 @@ static void cgroup_rstat_flush_hold(stru + __acquires(&cgroup_rstat_lock) + { + might_sleep(); +- spin_lock_irq(&cgroup_rstat_lock); ++ spin_lock(&cgroup_rstat_lock); + cgroup_rstat_flush_locked(cgrp); + } + +@@ -221,7 +221,7 @@ static void cgroup_rstat_flush_hold(stru + static void cgroup_rstat_flush_release(void) + __releases(&cgroup_rstat_lock) + { +- spin_unlock_irq(&cgroup_rstat_lock); ++ spin_unlock(&cgroup_rstat_lock); + } + + int cgroup_rstat_init(struct cgroup *cgrp) diff --git a/patches/0004-jbd2-Drop-unnecessary-branch-from-jbd2_journal_forge.patch b/patches/0004-jbd2-Drop-unnecessary-branch-from-jbd2_journal_forge.patch new file mode 100644 index 000000000000..4238849af1cf --- /dev/null +++ b/patches/0004-jbd2-Drop-unnecessary-branch-from-jbd2_journal_forge.patch @@ -0,0 +1,27 @@ +From: Jan Kara +Date: Fri, 9 Aug 2019 14:42:30 +0200 +Subject: [PATCH 4/7] jbd2: Drop unnecessary branch from jbd2_journal_forget() + +We have cleared both dirty & jbddirty bits from the bh. So there's no +difference between bforget() and brelse(). Thus there's no point jumping +to no_jbd branch. + +Signed-off-by: Jan Kara +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/jbd2/transaction.c | 4 ---- + 1 file changed, 4 deletions(-) + +--- a/fs/jbd2/transaction.c ++++ b/fs/jbd2/transaction.c +@@ -1599,10 +1599,6 @@ int jbd2_journal_forget (handle_t *handl + } else { + __jbd2_journal_unfile_buffer(jh); + jbd2_journal_put_journal_head(jh); +- if (!buffer_jbd(bh)) { +- spin_unlock(&journal->j_list_lock); +- goto not_jbd; +- } + } + spin_unlock(&journal->j_list_lock); + } else if (jh->b_transaction) { diff --git a/patches/0004-mm-SLUB-delay-giving-back-empty-slubs-to-IRQ-enabled.patch b/patches/0004-mm-SLUB-delay-giving-back-empty-slubs-to-IRQ-enabled.patch index a6d92a9520c7..a58ef462ea5f 100644 --- a/patches/0004-mm-SLUB-delay-giving-back-empty-slubs-to-IRQ-enabled.patch +++ b/patches/0004-mm-SLUB-delay-giving-back-empty-slubs-to-IRQ-enabled.patch @@ -17,7 +17,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/mm/slub.c +++ b/mm/slub.c -@@ -1378,6 +1378,12 @@ static inline void dec_slabs_node(struct +@@ -1381,6 +1381,12 @@ static inline void dec_slabs_node(struct #endif /* CONFIG_SLUB_DEBUG */ @@ -30,7 +30,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Hooks for other subsystems that check memory allocations. In a typical * production configuration these hooks all should produce no code at all. -@@ -1736,6 +1742,16 @@ static void __free_slab(struct kmem_cach +@@ -1731,6 +1737,16 @@ static void __free_slab(struct kmem_cach __free_pages(page, order); } @@ -47,7 +47,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void rcu_free_slab(struct rcu_head *h) { struct page *page = container_of(h, struct page, rcu_head); -@@ -1747,6 +1763,12 @@ static void free_slab(struct kmem_cache +@@ -1742,6 +1758,12 @@ static void free_slab(struct kmem_cache { if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) { call_rcu(&page->rcu_head, rcu_free_slab); @@ -60,7 +60,7 @@ Signed-off-by: Sebastian Andrzej Siewior } else __free_slab(s, page); } -@@ -2268,14 +2290,21 @@ static void put_cpu_partial(struct kmem_ +@@ -2265,14 +2287,21 @@ static void put_cpu_partial(struct kmem_ pobjects = oldpage->pobjects; pages = oldpage->pages; if (drain && pobjects > s->cpu_partial) { @@ -82,7 +82,7 @@ Signed-off-by: Sebastian Andrzej Siewior oldpage = NULL; pobjects = 0; pages = 0; -@@ -2343,7 +2372,22 @@ static bool has_cpu_slab(int cpu, void * +@@ -2340,7 +2369,22 @@ static bool has_cpu_slab(int cpu, void * static void flush_all(struct kmem_cache *s) { @@ -105,7 +105,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* -@@ -2540,8 +2584,10 @@ static inline void *get_freelist(struct +@@ -2537,8 +2581,10 @@ static inline void *get_freelist(struct * already disabled (which is the case for bulk allocation). */ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, @@ -117,7 +117,7 @@ Signed-off-by: Sebastian Andrzej Siewior void *freelist; struct page *page; -@@ -2597,6 +2643,13 @@ static void *___slab_alloc(struct kmem_c +@@ -2594,6 +2640,13 @@ static void *___slab_alloc(struct kmem_c VM_BUG_ON(!c->page->frozen); c->freelist = get_freepointer(s, freelist); c->tid = next_tid(c->tid); @@ -131,7 +131,7 @@ Signed-off-by: Sebastian Andrzej Siewior return freelist; new_slab: -@@ -2612,7 +2665,7 @@ static void *___slab_alloc(struct kmem_c +@@ -2609,7 +2662,7 @@ static void *___slab_alloc(struct kmem_c if (unlikely(!freelist)) { slab_out_of_memory(s, gfpflags, node); @@ -140,7 +140,7 @@ Signed-off-by: Sebastian Andrzej Siewior } page = c->page; -@@ -2625,7 +2678,7 @@ static void *___slab_alloc(struct kmem_c +@@ -2622,7 +2675,7 @@ static void *___slab_alloc(struct kmem_c goto new_slab; /* Slab failed checks. Next slab needed */ deactivate_slab(s, page, get_freepointer(s, freelist), c); @@ -149,15 +149,15 @@ Signed-off-by: Sebastian Andrzej Siewior } /* -@@ -2637,6 +2690,7 @@ static void *__slab_alloc(struct kmem_ca +@@ -2634,6 +2687,7 @@ static void *__slab_alloc(struct kmem_ca { void *p; unsigned long flags; + LIST_HEAD(tofree); local_irq_save(flags); - #ifdef CONFIG_PREEMPT -@@ -2648,8 +2702,9 @@ static void *__slab_alloc(struct kmem_ca + #ifdef CONFIG_PREEMPTION +@@ -2645,8 +2699,9 @@ static void *__slab_alloc(struct kmem_ca c = this_cpu_ptr(s->cpu_slab); #endif @@ -168,7 +168,7 @@ Signed-off-by: Sebastian Andrzej Siewior return p; } -@@ -3126,6 +3181,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca +@@ -3136,6 +3191,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca void **p) { struct kmem_cache_cpu *c; @@ -176,7 +176,7 @@ Signed-off-by: Sebastian Andrzej Siewior int i; /* memcg and kmem_cache debug support */ -@@ -3149,7 +3205,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca +@@ -3159,7 +3215,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca * of re-populating per CPU c->freelist */ p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, @@ -185,15 +185,15 @@ Signed-off-by: Sebastian Andrzej Siewior if (unlikely(!p[i])) goto error; -@@ -3161,6 +3217,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca +@@ -3174,6 +3230,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca } c->tid = next_tid(c->tid); local_irq_enable(); + free_delayed(&to_free); /* Clear memory outside IRQ disabled fastpath loop */ - if (unlikely(flags & __GFP_ZERO)) { -@@ -3175,6 +3232,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca + if (unlikely(slab_want_init_on_alloc(flags, s))) { +@@ -3188,6 +3245,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca return i; error: local_irq_enable(); @@ -201,7 +201,7 @@ Signed-off-by: Sebastian Andrzej Siewior slab_post_alloc_hook(s, flags, i, p); __kmem_cache_free_bulk(s, i, p); return 0; -@@ -4223,6 +4281,12 @@ void __init kmem_cache_init(void) +@@ -4224,6 +4282,12 @@ void __init kmem_cache_init(void) { static __initdata struct kmem_cache boot_kmem_cache, boot_kmem_cache_node; diff --git a/patches/0004-mm-swap-Enable-use_pvec_lock-nohz_full-dependent.patch b/patches/0004-mm-swap-Enable-use_pvec_lock-nohz_full-dependent.patch new file mode 100644 index 000000000000..4ca49256f3ee --- /dev/null +++ b/patches/0004-mm-swap-Enable-use_pvec_lock-nohz_full-dependent.patch @@ -0,0 +1,56 @@ +From: Anna-Maria Gleixner +Date: Thu, 18 Apr 2019 11:09:07 +0200 +Subject: [PATCH 4/4] mm/swap: Enable "use_pvec_lock" nohz_full dependent + +When a system runs with CONFIG_NO_HZ_FULL enabled, the tick of CPUs listed +in 'nohz_full=' kernel command line parameter should be stopped whenever +possible. The tick stays longer stopped, when work for this CPU is handled +by another CPU. + +With the already introduced static key 'use_pvec_lock' there is the +possibility to prevent firing a worker for mm/swap work on a remote CPU +with a stopped tick. + +Therefore enabling the static key in case kernel command line parameter +'nohz_full=' setup was successful, which implies that CONFIG_NO_HZ_FULL is +set. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/sched/isolation.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +--- a/kernel/sched/isolation.c ++++ b/kernel/sched/isolation.c +@@ -8,6 +8,7 @@ + * + */ + #include "sched.h" ++#include "../../mm/internal.h" + + DEFINE_STATIC_KEY_FALSE(housekeeping_overridden); + EXPORT_SYMBOL_GPL(housekeeping_overridden); +@@ -139,10 +140,21 @@ static int __init housekeeping_setup(cha + static int __init housekeeping_nohz_full_setup(char *str) + { + unsigned int flags; ++ int ret; + + flags = HK_FLAG_TICK | HK_FLAG_WQ | HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC; + +- return housekeeping_setup(str, flags); ++ ret = housekeeping_setup(str, flags); ++ ++ /* ++ * Protect struct pagevec with a lock instead using preemption disable; ++ * with lock protection, remote handling of events instead of queue ++ * work on remote cpu is default behavior. ++ */ ++ if (ret) ++ static_branch_enable(&use_pvec_lock); ++ ++ return ret; + } + __setup("nohz_full=", housekeeping_nohz_full_setup); + diff --git a/patches/0004-workqueue-Convert-the-locks-to-raw-type.patch b/patches/0004-workqueue-Convert-the-locks-to-raw-type.patch new file mode 100644 index 000000000000..d731cf7d6eb9 --- /dev/null +++ b/patches/0004-workqueue-Convert-the-locks-to-raw-type.patch @@ -0,0 +1,663 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 22 May 2019 12:43:56 +0200 +Subject: [PATCH 4/4] workqueue: Convert the locks to raw type + +After all the workqueue and the timer rework, we can finally make the +worker_pool lock raw. +The lock is not held over an unbounded period of time/iterations. + +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/workqueue.c | 164 ++++++++++++++++++++++++++--------------------------- + 1 file changed, 82 insertions(+), 82 deletions(-) + +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -146,7 +146,7 @@ enum { + /* struct worker is defined in workqueue_internal.h */ + + struct worker_pool { +- spinlock_t lock; /* the pool lock */ ++ raw_spinlock_t lock; /* the pool lock */ + int cpu; /* I: the associated cpu */ + int node; /* I: the associated node ID */ + int id; /* I: pool ID */ +@@ -301,7 +301,7 @@ static struct workqueue_attrs *wq_update + + static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */ + static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */ +-static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ ++static DEFINE_RAW_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ + static DECLARE_SWAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */ + + static LIST_HEAD(workqueues); /* PR: list of all workqueues */ +@@ -826,7 +826,7 @@ static struct worker *first_idle_worker( + * Wake up the first idle worker of @pool. + * + * CONTEXT: +- * spin_lock_irq(pool->lock). ++ * raw_spin_lock_irq(pool->lock). + */ + static void wake_up_worker(struct worker_pool *pool) + { +@@ -879,7 +879,7 @@ void wq_worker_sleeping(struct task_stru + return; + + worker->sleeping = 1; +- spin_lock_irq(&pool->lock); ++ raw_spin_lock_irq(&pool->lock); + + /* + * The counterpart of the following dec_and_test, implied mb, +@@ -898,7 +898,7 @@ void wq_worker_sleeping(struct task_stru + if (next) + wake_up_process(next->task); + } +- spin_unlock_irq(&pool->lock); ++ raw_spin_unlock_irq(&pool->lock); + } + + /** +@@ -909,7 +909,7 @@ void wq_worker_sleeping(struct task_stru + * the scheduler to get a worker's last known identity. + * + * CONTEXT: +- * spin_lock_irq(rq->lock) ++ * raw_spin_lock_irq(rq->lock) + * + * This function is called during schedule() when a kworker is going + * to sleep. It's used by psi to identify aggregation workers during +@@ -940,7 +940,7 @@ work_func_t wq_worker_last_func(struct t + * Set @flags in @worker->flags and adjust nr_running accordingly. + * + * CONTEXT: +- * spin_lock_irq(pool->lock) ++ * raw_spin_lock_irq(pool->lock) + */ + static inline void worker_set_flags(struct worker *worker, unsigned int flags) + { +@@ -965,7 +965,7 @@ static inline void worker_set_flags(stru + * Clear @flags in @worker->flags and adjust nr_running accordingly. + * + * CONTEXT: +- * spin_lock_irq(pool->lock) ++ * raw_spin_lock_irq(pool->lock) + */ + static inline void worker_clr_flags(struct worker *worker, unsigned int flags) + { +@@ -1013,7 +1013,7 @@ static inline void worker_clr_flags(stru + * actually occurs, it should be easy to locate the culprit work function. + * + * CONTEXT: +- * spin_lock_irq(pool->lock). ++ * raw_spin_lock_irq(pool->lock). + * + * Return: + * Pointer to worker which is executing @work if found, %NULL +@@ -1048,7 +1048,7 @@ static struct worker *find_worker_execut + * nested inside outer list_for_each_entry_safe(). + * + * CONTEXT: +- * spin_lock_irq(pool->lock). ++ * raw_spin_lock_irq(pool->lock). + */ + static void move_linked_works(struct work_struct *work, struct list_head *head, + struct work_struct **nextp) +@@ -1126,9 +1126,9 @@ static void put_pwq_unlocked(struct pool + * As both pwqs and pools are RCU protected, the + * following lock operations are safe. + */ +- spin_lock_irq(&pwq->pool->lock); ++ raw_spin_lock_irq(&pwq->pool->lock); + put_pwq(pwq); +- spin_unlock_irq(&pwq->pool->lock); ++ raw_spin_unlock_irq(&pwq->pool->lock); + } + } + +@@ -1161,7 +1161,7 @@ static void pwq_activate_first_delayed(s + * decrement nr_in_flight of its pwq and handle workqueue flushing. + * + * CONTEXT: +- * spin_lock_irq(pool->lock). ++ * raw_spin_lock_irq(pool->lock). + */ + static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color) + { +@@ -1260,7 +1260,7 @@ static int try_to_grab_pending(struct wo + if (!pool) + goto fail; + +- spin_lock(&pool->lock); ++ raw_spin_lock(&pool->lock); + /* + * work->data is guaranteed to point to pwq only while the work + * item is queued on pwq->wq, and both updating work->data to point +@@ -1289,11 +1289,11 @@ static int try_to_grab_pending(struct wo + /* work->data points to pwq iff queued, point to pool */ + set_work_pool_and_keep_pending(work, pool->id); + +- spin_unlock(&pool->lock); ++ raw_spin_unlock(&pool->lock); + rcu_read_unlock(); + return 1; + } +- spin_unlock(&pool->lock); ++ raw_spin_unlock(&pool->lock); + fail: + rcu_read_unlock(); + local_irq_restore(*flags); +@@ -1314,7 +1314,7 @@ static int try_to_grab_pending(struct wo + * work_struct flags. + * + * CONTEXT: +- * spin_lock_irq(pool->lock). ++ * raw_spin_lock_irq(pool->lock). + */ + static void insert_work(struct pool_workqueue *pwq, struct work_struct *work, + struct list_head *head, unsigned int extra_flags) +@@ -1429,7 +1429,7 @@ static void __queue_work(int cpu, struct + if (last_pool && last_pool != pwq->pool) { + struct worker *worker; + +- spin_lock(&last_pool->lock); ++ raw_spin_lock(&last_pool->lock); + + worker = find_worker_executing_work(last_pool, work); + +@@ -1437,11 +1437,11 @@ static void __queue_work(int cpu, struct + pwq = worker->current_pwq; + } else { + /* meh... not running there, queue here */ +- spin_unlock(&last_pool->lock); +- spin_lock(&pwq->pool->lock); ++ raw_spin_unlock(&last_pool->lock); ++ raw_spin_lock(&pwq->pool->lock); + } + } else { +- spin_lock(&pwq->pool->lock); ++ raw_spin_lock(&pwq->pool->lock); + } + + /* +@@ -1454,7 +1454,7 @@ static void __queue_work(int cpu, struct + */ + if (unlikely(!pwq->refcnt)) { + if (wq->flags & WQ_UNBOUND) { +- spin_unlock(&pwq->pool->lock); ++ raw_spin_unlock(&pwq->pool->lock); + cpu_relax(); + goto retry; + } +@@ -1486,7 +1486,7 @@ static void __queue_work(int cpu, struct + insert_work(pwq, work, worklist, work_flags); + + out: +- spin_unlock(&pwq->pool->lock); ++ raw_spin_unlock(&pwq->pool->lock); + rcu_read_unlock(); + } + +@@ -1757,7 +1757,7 @@ EXPORT_SYMBOL(queue_rcu_work); + * necessary. + * + * LOCKING: +- * spin_lock_irq(pool->lock). ++ * raw_spin_lock_irq(pool->lock). + */ + static void worker_enter_idle(struct worker *worker) + { +@@ -1797,7 +1797,7 @@ static void worker_enter_idle(struct wor + * @worker is leaving idle state. Update stats. + * + * LOCKING: +- * spin_lock_irq(pool->lock). ++ * raw_spin_lock_irq(pool->lock). + */ + static void worker_leave_idle(struct worker *worker) + { +@@ -1935,11 +1935,11 @@ static struct worker *create_worker(stru + worker_attach_to_pool(worker, pool); + + /* start the newly created worker */ +- spin_lock_irq(&pool->lock); ++ raw_spin_lock_irq(&pool->lock); + worker->pool->nr_workers++; + worker_enter_idle(worker); + wake_up_process(worker->task); +- spin_unlock_irq(&pool->lock); ++ raw_spin_unlock_irq(&pool->lock); + + return worker; + +@@ -1958,7 +1958,7 @@ static struct worker *create_worker(stru + * be idle. + * + * CONTEXT: +- * spin_lock_irq(pool->lock). ++ * raw_spin_lock_irq(pool->lock). + */ + static void destroy_worker(struct worker *worker) + { +@@ -1984,7 +1984,7 @@ static void idle_worker_timeout(struct t + { + struct worker_pool *pool = from_timer(pool, t, idle_timer); + +- spin_lock_irq(&pool->lock); ++ raw_spin_lock_irq(&pool->lock); + + while (too_many_workers(pool)) { + struct worker *worker; +@@ -2002,7 +2002,7 @@ static void idle_worker_timeout(struct t + destroy_worker(worker); + } + +- spin_unlock_irq(&pool->lock); ++ raw_spin_unlock_irq(&pool->lock); + } + + static void send_mayday(struct work_struct *work) +@@ -2033,8 +2033,8 @@ static void pool_mayday_timeout(struct t + struct worker_pool *pool = from_timer(pool, t, mayday_timer); + struct work_struct *work; + +- spin_lock_irq(&pool->lock); +- spin_lock(&wq_mayday_lock); /* for wq->maydays */ ++ raw_spin_lock_irq(&pool->lock); ++ raw_spin_lock(&wq_mayday_lock); /* for wq->maydays */ + + if (need_to_create_worker(pool)) { + /* +@@ -2047,8 +2047,8 @@ static void pool_mayday_timeout(struct t + send_mayday(work); + } + +- spin_unlock(&wq_mayday_lock); +- spin_unlock_irq(&pool->lock); ++ raw_spin_unlock(&wq_mayday_lock); ++ raw_spin_unlock_irq(&pool->lock); + + mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL); + } +@@ -2067,7 +2067,7 @@ static void pool_mayday_timeout(struct t + * may_start_working() %true. + * + * LOCKING: +- * spin_lock_irq(pool->lock) which may be released and regrabbed ++ * raw_spin_lock_irq(pool->lock) which may be released and regrabbed + * multiple times. Does GFP_KERNEL allocations. Called only from + * manager. + */ +@@ -2076,7 +2076,7 @@ static void maybe_create_worker(struct w + __acquires(&pool->lock) + { + restart: +- spin_unlock_irq(&pool->lock); ++ raw_spin_unlock_irq(&pool->lock); + + /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */ + mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT); +@@ -2092,7 +2092,7 @@ static void maybe_create_worker(struct w + } + + del_timer_sync(&pool->mayday_timer); +- spin_lock_irq(&pool->lock); ++ raw_spin_lock_irq(&pool->lock); + /* + * This is necessary even after a new worker was just successfully + * created as @pool->lock was dropped and the new worker might have +@@ -2115,7 +2115,7 @@ static void maybe_create_worker(struct w + * and may_start_working() is true. + * + * CONTEXT: +- * spin_lock_irq(pool->lock) which may be released and regrabbed ++ * raw_spin_lock_irq(pool->lock) which may be released and regrabbed + * multiple times. Does GFP_KERNEL allocations. + * + * Return: +@@ -2154,7 +2154,7 @@ static bool manage_workers(struct worker + * call this function to process a work. + * + * CONTEXT: +- * spin_lock_irq(pool->lock) which is released and regrabbed. ++ * raw_spin_lock_irq(pool->lock) which is released and regrabbed. + */ + static void process_one_work(struct worker *worker, struct work_struct *work) + __releases(&pool->lock) +@@ -2236,7 +2236,7 @@ static void process_one_work(struct work + */ + set_work_pool_and_clear_pending(work, pool->id); + +- spin_unlock_irq(&pool->lock); ++ raw_spin_unlock_irq(&pool->lock); + + lock_map_acquire(&pwq->wq->lockdep_map); + lock_map_acquire(&lockdep_map); +@@ -2291,7 +2291,7 @@ static void process_one_work(struct work + */ + cond_resched(); + +- spin_lock_irq(&pool->lock); ++ raw_spin_lock_irq(&pool->lock); + + /* clear cpu intensive status */ + if (unlikely(cpu_intensive)) +@@ -2317,7 +2317,7 @@ static void process_one_work(struct work + * fetches a work from the top and executes it. + * + * CONTEXT: +- * spin_lock_irq(pool->lock) which may be released and regrabbed ++ * raw_spin_lock_irq(pool->lock) which may be released and regrabbed + * multiple times. + */ + static void process_scheduled_works(struct worker *worker) +@@ -2359,11 +2359,11 @@ static int worker_thread(void *__worker) + /* tell the scheduler that this is a workqueue worker */ + set_pf_worker(true); + woke_up: +- spin_lock_irq(&pool->lock); ++ raw_spin_lock_irq(&pool->lock); + + /* am I supposed to die? */ + if (unlikely(worker->flags & WORKER_DIE)) { +- spin_unlock_irq(&pool->lock); ++ raw_spin_unlock_irq(&pool->lock); + WARN_ON_ONCE(!list_empty(&worker->entry)); + set_pf_worker(false); + +@@ -2429,7 +2429,7 @@ static int worker_thread(void *__worker) + */ + worker_enter_idle(worker); + __set_current_state(TASK_IDLE); +- spin_unlock_irq(&pool->lock); ++ raw_spin_unlock_irq(&pool->lock); + schedule(); + goto woke_up; + } +@@ -2483,7 +2483,7 @@ static int rescuer_thread(void *__rescue + should_stop = kthread_should_stop(); + + /* see whether any pwq is asking for help */ +- spin_lock_irq(&wq_mayday_lock); ++ raw_spin_lock_irq(&wq_mayday_lock); + + while (!list_empty(&wq->maydays)) { + struct pool_workqueue *pwq = list_first_entry(&wq->maydays, +@@ -2495,11 +2495,11 @@ static int rescuer_thread(void *__rescue + __set_current_state(TASK_RUNNING); + list_del_init(&pwq->mayday_node); + +- spin_unlock_irq(&wq_mayday_lock); ++ raw_spin_unlock_irq(&wq_mayday_lock); + + worker_attach_to_pool(rescuer, pool); + +- spin_lock_irq(&pool->lock); ++ raw_spin_lock_irq(&pool->lock); + + /* + * Slurp in all works issued via this workqueue and +@@ -2528,10 +2528,10 @@ static int rescuer_thread(void *__rescue + * incur MAYDAY_INTERVAL delay inbetween. + */ + if (need_to_create_worker(pool)) { +- spin_lock(&wq_mayday_lock); ++ raw_spin_lock(&wq_mayday_lock); + get_pwq(pwq); + list_move_tail(&pwq->mayday_node, &wq->maydays); +- spin_unlock(&wq_mayday_lock); ++ raw_spin_unlock(&wq_mayday_lock); + } + } + +@@ -2549,14 +2549,14 @@ static int rescuer_thread(void *__rescue + if (need_more_worker(pool)) + wake_up_worker(pool); + +- spin_unlock_irq(&pool->lock); ++ raw_spin_unlock_irq(&pool->lock); + + worker_detach_from_pool(rescuer); + +- spin_lock_irq(&wq_mayday_lock); ++ raw_spin_lock_irq(&wq_mayday_lock); + } + +- spin_unlock_irq(&wq_mayday_lock); ++ raw_spin_unlock_irq(&wq_mayday_lock); + + if (should_stop) { + __set_current_state(TASK_RUNNING); +@@ -2636,7 +2636,7 @@ static void wq_barrier_func(struct work_ + * underneath us, so we can't reliably determine pwq from @target. + * + * CONTEXT: +- * spin_lock_irq(pool->lock). ++ * raw_spin_lock_irq(pool->lock). + */ + static void insert_wq_barrier(struct pool_workqueue *pwq, + struct wq_barrier *barr, +@@ -2723,7 +2723,7 @@ static bool flush_workqueue_prep_pwqs(st + for_each_pwq(pwq, wq) { + struct worker_pool *pool = pwq->pool; + +- spin_lock_irq(&pool->lock); ++ raw_spin_lock_irq(&pool->lock); + + if (flush_color >= 0) { + WARN_ON_ONCE(pwq->flush_color != -1); +@@ -2740,7 +2740,7 @@ static bool flush_workqueue_prep_pwqs(st + pwq->work_color = work_color; + } + +- spin_unlock_irq(&pool->lock); ++ raw_spin_unlock_irq(&pool->lock); + } + + if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush)) +@@ -2940,9 +2940,9 @@ void drain_workqueue(struct workqueue_st + for_each_pwq(pwq, wq) { + bool drained; + +- spin_lock_irq(&pwq->pool->lock); ++ raw_spin_lock_irq(&pwq->pool->lock); + drained = !pwq->nr_active && list_empty(&pwq->delayed_works); +- spin_unlock_irq(&pwq->pool->lock); ++ raw_spin_unlock_irq(&pwq->pool->lock); + + if (drained) + continue; +@@ -2978,7 +2978,7 @@ static bool start_flush_work(struct work + return false; + } + +- spin_lock_irq(&pool->lock); ++ raw_spin_lock_irq(&pool->lock); + /* see the comment in try_to_grab_pending() with the same code */ + pwq = get_work_pwq(work); + if (pwq) { +@@ -2994,7 +2994,7 @@ static bool start_flush_work(struct work + check_flush_dependency(pwq->wq, work); + + insert_wq_barrier(pwq, barr, work, worker); +- spin_unlock_irq(&pool->lock); ++ raw_spin_unlock_irq(&pool->lock); + + /* + * Force a lock recursion deadlock when using flush_work() inside a +@@ -3013,7 +3013,7 @@ static bool start_flush_work(struct work + rcu_read_unlock(); + return true; + already_gone: +- spin_unlock_irq(&pool->lock); ++ raw_spin_unlock_irq(&pool->lock); + rcu_read_unlock(); + return false; + } +@@ -3406,7 +3406,7 @@ static bool wqattrs_equal(const struct w + */ + static int init_worker_pool(struct worker_pool *pool) + { +- spin_lock_init(&pool->lock); ++ raw_spin_lock_init(&pool->lock); + pool->id = -1; + pool->cpu = -1; + pool->node = NUMA_NO_NODE; +@@ -3532,7 +3532,7 @@ static void put_unbound_pool(struct work + * @pool's workers from blocking on attach_mutex. We're the last + * manager and @pool gets freed with the flag set. + */ +- spin_lock_irq(&pool->lock); ++ raw_spin_lock_irq(&pool->lock); + swait_event_lock_irq(wq_manager_wait, + !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock); + pool->flags |= POOL_MANAGER_ACTIVE; +@@ -3540,7 +3540,7 @@ static void put_unbound_pool(struct work + while ((worker = first_idle_worker(pool))) + destroy_worker(worker); + WARN_ON(pool->nr_workers || pool->nr_idle); +- spin_unlock_irq(&pool->lock); ++ raw_spin_unlock_irq(&pool->lock); + + mutex_lock(&wq_pool_attach_mutex); + if (!list_empty(&pool->workers)) +@@ -3696,7 +3696,7 @@ static void pwq_adjust_max_active(struct + return; + + /* this function can be called during early boot w/ irq disabled */ +- spin_lock_irqsave(&pwq->pool->lock, flags); ++ raw_spin_lock_irqsave(&pwq->pool->lock, flags); + + /* + * During [un]freezing, the caller is responsible for ensuring that +@@ -3719,7 +3719,7 @@ static void pwq_adjust_max_active(struct + pwq->max_active = 0; + } + +- spin_unlock_irqrestore(&pwq->pool->lock, flags); ++ raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); + } + + /* initialize newly alloced @pwq which is associated with @wq and @pool */ +@@ -4121,9 +4121,9 @@ static void wq_update_unbound_numa(struc + + use_dfl_pwq: + mutex_lock(&wq->mutex); +- spin_lock_irq(&wq->dfl_pwq->pool->lock); ++ raw_spin_lock_irq(&wq->dfl_pwq->pool->lock); + get_pwq(wq->dfl_pwq); +- spin_unlock_irq(&wq->dfl_pwq->pool->lock); ++ raw_spin_unlock_irq(&wq->dfl_pwq->pool->lock); + old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq); + out_unlock: + mutex_unlock(&wq->mutex); +@@ -4519,10 +4519,10 @@ unsigned int work_busy(struct work_struc + rcu_read_lock(); + pool = get_work_pool(work); + if (pool) { +- spin_lock_irqsave(&pool->lock, flags); ++ raw_spin_lock_irqsave(&pool->lock, flags); + if (find_worker_executing_work(pool, work)) + ret |= WORK_BUSY_RUNNING; +- spin_unlock_irqrestore(&pool->lock, flags); ++ raw_spin_unlock_irqrestore(&pool->lock, flags); + } + rcu_read_unlock(); + +@@ -4728,10 +4728,10 @@ void show_workqueue_state(void) + pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags); + + for_each_pwq(pwq, wq) { +- spin_lock_irqsave(&pwq->pool->lock, flags); ++ raw_spin_lock_irqsave(&pwq->pool->lock, flags); + if (pwq->nr_active || !list_empty(&pwq->delayed_works)) + show_pwq(pwq); +- spin_unlock_irqrestore(&pwq->pool->lock, flags); ++ raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); + /* + * We could be printing a lot from atomic context, e.g. + * sysrq-t -> show_workqueue_state(). Avoid triggering +@@ -4745,7 +4745,7 @@ void show_workqueue_state(void) + struct worker *worker; + bool first = true; + +- spin_lock_irqsave(&pool->lock, flags); ++ raw_spin_lock_irqsave(&pool->lock, flags); + if (pool->nr_workers == pool->nr_idle) + goto next_pool; + +@@ -4764,7 +4764,7 @@ void show_workqueue_state(void) + } + pr_cont("\n"); + next_pool: +- spin_unlock_irqrestore(&pool->lock, flags); ++ raw_spin_unlock_irqrestore(&pool->lock, flags); + /* + * We could be printing a lot from atomic context, e.g. + * sysrq-t -> show_workqueue_state(). Avoid triggering +@@ -4794,7 +4794,7 @@ void wq_worker_comm(char *buf, size_t si + struct worker_pool *pool = worker->pool; + + if (pool) { +- spin_lock_irq(&pool->lock); ++ raw_spin_lock_irq(&pool->lock); + /* + * ->desc tracks information (wq name or + * set_worker_desc()) for the latest execution. If +@@ -4808,7 +4808,7 @@ void wq_worker_comm(char *buf, size_t si + scnprintf(buf + off, size - off, "-%s", + worker->desc); + } +- spin_unlock_irq(&pool->lock); ++ raw_spin_unlock_irq(&pool->lock); + } + } + +@@ -4839,7 +4839,7 @@ static void unbind_workers(int cpu) + + for_each_cpu_worker_pool(pool, cpu) { + mutex_lock(&wq_pool_attach_mutex); +- spin_lock_irq(&pool->lock); ++ raw_spin_lock_irq(&pool->lock); + + /* + * We've blocked all attach/detach operations. Make all workers +@@ -4853,7 +4853,7 @@ static void unbind_workers(int cpu) + + pool->flags |= POOL_DISASSOCIATED; + +- spin_unlock_irq(&pool->lock); ++ raw_spin_unlock_irq(&pool->lock); + mutex_unlock(&wq_pool_attach_mutex); + + /* +@@ -4879,9 +4879,9 @@ static void unbind_workers(int cpu) + * worker blocking could lead to lengthy stalls. Kick off + * unbound chain execution of currently pending work items. + */ +- spin_lock_irq(&pool->lock); ++ raw_spin_lock_irq(&pool->lock); + wake_up_worker(pool); +- spin_unlock_irq(&pool->lock); ++ raw_spin_unlock_irq(&pool->lock); + } + } + +@@ -4908,7 +4908,7 @@ static void rebind_workers(struct worker + WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, + pool->attrs->cpumask) < 0); + +- spin_lock_irq(&pool->lock); ++ raw_spin_lock_irq(&pool->lock); + + pool->flags &= ~POOL_DISASSOCIATED; + +@@ -4947,7 +4947,7 @@ static void rebind_workers(struct worker + WRITE_ONCE(worker->flags, worker_flags); + } + +- spin_unlock_irq(&pool->lock); ++ raw_spin_unlock_irq(&pool->lock); + } + + /** diff --git a/patches/0005-jbd2-Don-t-call-__bforget-unnecessarily.patch b/patches/0005-jbd2-Don-t-call-__bforget-unnecessarily.patch new file mode 100644 index 000000000000..fe92c3b6d922 --- /dev/null +++ b/patches/0005-jbd2-Don-t-call-__bforget-unnecessarily.patch @@ -0,0 +1,58 @@ +From: Jan Kara +Date: Fri, 9 Aug 2019 14:42:31 +0200 +Subject: [PATCH 5/7] jbd2: Don't call __bforget() unnecessarily + +jbd2_journal_forget() jumps to 'not_jbd' branch which calls __bforget() +in cases where the buffer is clean which is pointless. In case of failed +assertion, it can be even argued that it is safer not to touch buffer's +dirty bits. Also logically it makes more sense to just jump to 'drop' +and that will make logic also simpler when we switch bh_state_lock to a +spinlock. + +Signed-off-by: Jan Kara +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/jbd2/transaction.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/fs/jbd2/transaction.c ++++ b/fs/jbd2/transaction.c +@@ -1550,7 +1550,7 @@ int jbd2_journal_forget (handle_t *handl + if (!J_EXPECT_JH(jh, !jh->b_committed_data, + "inconsistent data on disk")) { + err = -EIO; +- goto not_jbd; ++ goto drop; + } + + /* keep track of whether or not this transaction modified us */ +@@ -1640,7 +1640,7 @@ int jbd2_journal_forget (handle_t *handl + if (!jh->b_cp_transaction) { + JBUFFER_TRACE(jh, "belongs to none transaction"); + spin_unlock(&journal->j_list_lock); +- goto not_jbd; ++ goto drop; + } + + /* +@@ -1650,7 +1650,7 @@ int jbd2_journal_forget (handle_t *handl + if (!buffer_dirty(bh)) { + __jbd2_journal_remove_checkpoint(jh); + spin_unlock(&journal->j_list_lock); +- goto not_jbd; ++ goto drop; + } + + /* +@@ -1663,10 +1663,9 @@ int jbd2_journal_forget (handle_t *handl + __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); + spin_unlock(&journal->j_list_lock); + } +- ++drop: + jbd_unlock_bh_state(bh); + __brelse(bh); +-drop: + if (drop_reserve) { + /* no need to reserve log space for this block -bzzz */ + handle->h_buffer_credits++; diff --git a/patches/0006-jbd2-Make-state-lock-a-spinlock.patch b/patches/0006-jbd2-Make-state-lock-a-spinlock.patch new file mode 100644 index 000000000000..67433727d495 --- /dev/null +++ b/patches/0006-jbd2-Make-state-lock-a-spinlock.patch @@ -0,0 +1,675 @@ +From: Thomas Gleixner +Date: Fri, 9 Aug 2019 14:42:32 +0200 +Subject: [PATCH 6/7] jbd2: Make state lock a spinlock + +Bit-spinlocks are problematic on PREEMPT_RT if functions which might sleep +on RT, e.g. spin_lock(), alloc/free(), are invoked inside the lock held +region because bit spinlocks disable preemption even on RT. + +A first attempt was to replace state lock with a spinlock placed in struct +buffer_head and make the locking conditional on PREEMPT_RT and +DEBUG_BIT_SPINLOCKS. + +Jan pointed out that there is a 4 byte hole in struct journal_head where a +regular spinlock fits in and he would not object to convert the state lock +to a spinlock unconditionally. + +Aside of solving the RT problem, this also gains lockdep coverage for the +journal head state lock (bit-spinlocks are not covered by lockdep as it's +hard to fit a lockdep map into a single bit). + +The trivial change would have been to convert the jbd_*lock_bh_state() +inlines, but that comes with the downside that these functions take a +buffer head pointer which needs to be converted to a journal head pointer +which adds another level of indirection. + +As almost all functions which use this lock have a journal head pointer +readily available, it makes more sense to remove the lock helper inlines +and write out spin_*lock() at all call sites. + +Fixup all locking comments as well. + +Suggested-by: Jan Kara +Signed-off-by: Thomas Gleixner +Signed-off-by: Jan Kara +Cc: "Theodore Ts'o" +Cc: Mark Fasheh +Cc: Joseph Qi +Cc: Joel Becker +Cc: Jan Kara +Cc: linux-ext4@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/jbd2/commit.c | 8 +-- + fs/jbd2/journal.c | 10 ++-- + fs/jbd2/transaction.c | 100 ++++++++++++++++++++----------------------- + fs/ocfs2/suballoc.c | 19 ++++---- + include/linux/jbd2.h | 20 -------- + include/linux/journal-head.h | 21 ++++++--- + 6 files changed, 84 insertions(+), 94 deletions(-) + +--- a/fs/jbd2/commit.c ++++ b/fs/jbd2/commit.c +@@ -482,10 +482,10 @@ void jbd2_journal_commit_transaction(jou + if (jh->b_committed_data) { + struct buffer_head *bh = jh2bh(jh); + +- jbd_lock_bh_state(bh); ++ spin_lock(&jh->b_state_lock); + jbd2_free(jh->b_committed_data, bh->b_size); + jh->b_committed_data = NULL; +- jbd_unlock_bh_state(bh); ++ spin_unlock(&jh->b_state_lock); + } + jbd2_journal_refile_buffer(journal, jh); + } +@@ -928,7 +928,7 @@ void jbd2_journal_commit_transaction(jou + * done with it. + */ + get_bh(bh); +- jbd_lock_bh_state(bh); ++ spin_lock(&jh->b_state_lock); + J_ASSERT_JH(jh, jh->b_transaction == commit_transaction); + + /* +@@ -1024,7 +1024,7 @@ void jbd2_journal_commit_transaction(jou + } + JBUFFER_TRACE(jh, "refile or unfile buffer"); + drop_ref = __jbd2_journal_refile_buffer(jh); +- jbd_unlock_bh_state(bh); ++ spin_unlock(&jh->b_state_lock); + if (drop_ref) + jbd2_journal_put_journal_head(jh); + if (try_to_free) +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -363,7 +363,7 @@ int jbd2_journal_write_metadata_buffer(t + /* keep subsequent assertions sane */ + atomic_set(&new_bh->b_count, 1); + +- jbd_lock_bh_state(bh_in); ++ spin_lock(&jh_in->b_state_lock); + repeat: + /* + * If a new transaction has already done a buffer copy-out, then +@@ -405,13 +405,13 @@ int jbd2_journal_write_metadata_buffer(t + if (need_copy_out && !done_copy_out) { + char *tmp; + +- jbd_unlock_bh_state(bh_in); ++ spin_unlock(&jh_in->b_state_lock); + tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); + if (!tmp) { + brelse(new_bh); + return -ENOMEM; + } +- jbd_lock_bh_state(bh_in); ++ spin_lock(&jh_in->b_state_lock); + if (jh_in->b_frozen_data) { + jbd2_free(tmp, bh_in->b_size); + goto repeat; +@@ -464,7 +464,7 @@ int jbd2_journal_write_metadata_buffer(t + __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); + spin_unlock(&journal->j_list_lock); + set_buffer_shadow(bh_in); +- jbd_unlock_bh_state(bh_in); ++ spin_unlock(&jh_in->b_state_lock); + + return do_escape | (done_copy_out << 1); + } +@@ -2410,6 +2410,8 @@ static struct journal_head *journal_allo + ret = kmem_cache_zalloc(jbd2_journal_head_cache, + GFP_NOFS | __GFP_NOFAIL); + } ++ if (ret) ++ spin_lock_init(&ret->b_state_lock); + return ret; + } + +--- a/fs/jbd2/transaction.c ++++ b/fs/jbd2/transaction.c +@@ -879,7 +879,7 @@ do_get_write_access(handle_t *handle, st + + start_lock = jiffies; + lock_buffer(bh); +- jbd_lock_bh_state(bh); ++ spin_lock(&jh->b_state_lock); + + /* If it takes too long to lock the buffer, trace it */ + time_lock = jbd2_time_diff(start_lock, jiffies); +@@ -929,7 +929,7 @@ do_get_write_access(handle_t *handle, st + + error = -EROFS; + if (is_handle_aborted(handle)) { +- jbd_unlock_bh_state(bh); ++ spin_unlock(&jh->b_state_lock); + goto out; + } + error = 0; +@@ -993,7 +993,7 @@ do_get_write_access(handle_t *handle, st + */ + if (buffer_shadow(bh)) { + JBUFFER_TRACE(jh, "on shadow: sleep"); +- jbd_unlock_bh_state(bh); ++ spin_unlock(&jh->b_state_lock); + wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE); + goto repeat; + } +@@ -1014,7 +1014,7 @@ do_get_write_access(handle_t *handle, st + JBUFFER_TRACE(jh, "generate frozen data"); + if (!frozen_buffer) { + JBUFFER_TRACE(jh, "allocate memory for buffer"); +- jbd_unlock_bh_state(bh); ++ spin_unlock(&jh->b_state_lock); + frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size, + GFP_NOFS | __GFP_NOFAIL); + goto repeat; +@@ -1033,7 +1033,7 @@ do_get_write_access(handle_t *handle, st + jh->b_next_transaction = transaction; + + done: +- jbd_unlock_bh_state(bh); ++ spin_unlock(&jh->b_state_lock); + + /* + * If we are about to journal a buffer, then any revoke pending on it is +@@ -1172,7 +1172,7 @@ int jbd2_journal_get_create_access(handl + * that case: the transaction must have deleted the buffer for it to be + * reused here. + */ +- jbd_lock_bh_state(bh); ++ spin_lock(&jh->b_state_lock); + J_ASSERT_JH(jh, (jh->b_transaction == transaction || + jh->b_transaction == NULL || + (jh->b_transaction == journal->j_committing_transaction && +@@ -1207,7 +1207,7 @@ int jbd2_journal_get_create_access(handl + jh->b_next_transaction = transaction; + spin_unlock(&journal->j_list_lock); + } +- jbd_unlock_bh_state(bh); ++ spin_unlock(&jh->b_state_lock); + + /* + * akpm: I added this. ext3_alloc_branch can pick up new indirect +@@ -1275,13 +1275,13 @@ int jbd2_journal_get_undo_access(handle_ + committed_data = jbd2_alloc(jh2bh(jh)->b_size, + GFP_NOFS|__GFP_NOFAIL); + +- jbd_lock_bh_state(bh); ++ spin_lock(&jh->b_state_lock); + if (!jh->b_committed_data) { + /* Copy out the current buffer contents into the + * preserved, committed copy. */ + JBUFFER_TRACE(jh, "generate b_committed data"); + if (!committed_data) { +- jbd_unlock_bh_state(bh); ++ spin_unlock(&jh->b_state_lock); + goto repeat; + } + +@@ -1289,7 +1289,7 @@ int jbd2_journal_get_undo_access(handle_ + committed_data = NULL; + memcpy(jh->b_committed_data, bh->b_data, bh->b_size); + } +- jbd_unlock_bh_state(bh); ++ spin_unlock(&jh->b_state_lock); + out: + jbd2_journal_put_journal_head(jh); + if (unlikely(committed_data)) +@@ -1390,16 +1390,16 @@ int jbd2_journal_dirty_metadata(handle_t + */ + if (jh->b_transaction != transaction && + jh->b_next_transaction != transaction) { +- jbd_lock_bh_state(bh); ++ spin_lock(&jh->b_state_lock); + J_ASSERT_JH(jh, jh->b_transaction == transaction || + jh->b_next_transaction == transaction); +- jbd_unlock_bh_state(bh); ++ spin_unlock(&jh->b_state_lock); + } + if (jh->b_modified == 1) { + /* If it's in our transaction it must be in BJ_Metadata list. */ + if (jh->b_transaction == transaction && + jh->b_jlist != BJ_Metadata) { +- jbd_lock_bh_state(bh); ++ spin_lock(&jh->b_state_lock); + if (jh->b_transaction == transaction && + jh->b_jlist != BJ_Metadata) + pr_err("JBD2: assertion failure: h_type=%u " +@@ -1409,13 +1409,13 @@ int jbd2_journal_dirty_metadata(handle_t + jh->b_jlist); + J_ASSERT_JH(jh, jh->b_transaction != transaction || + jh->b_jlist == BJ_Metadata); +- jbd_unlock_bh_state(bh); ++ spin_unlock(&jh->b_state_lock); + } + goto out; + } + + journal = transaction->t_journal; +- jbd_lock_bh_state(bh); ++ spin_lock(&jh->b_state_lock); + + if (jh->b_modified == 0) { + /* +@@ -1501,7 +1501,7 @@ int jbd2_journal_dirty_metadata(handle_t + __jbd2_journal_file_buffer(jh, transaction, BJ_Metadata); + spin_unlock(&journal->j_list_lock); + out_unlock_bh: +- jbd_unlock_bh_state(bh); ++ spin_unlock(&jh->b_state_lock); + out: + JBUFFER_TRACE(jh, "exit"); + return ret; +@@ -1539,11 +1539,13 @@ int jbd2_journal_forget (handle_t *handl + + BUFFER_TRACE(bh, "entry"); + +- jbd_lock_bh_state(bh); ++ jh = jbd2_journal_grab_journal_head(bh); ++ if (!jh) { ++ __bforget(bh); ++ return 0; ++ } + +- if (!buffer_jbd(bh)) +- goto not_jbd; +- jh = bh2jh(bh); ++ spin_lock(&jh->b_state_lock); + + /* Critical error: attempting to delete a bitmap buffer, maybe? + * Don't do any jbd operations, and return an error. */ +@@ -1664,18 +1666,14 @@ int jbd2_journal_forget (handle_t *handl + spin_unlock(&journal->j_list_lock); + } + drop: +- jbd_unlock_bh_state(bh); + __brelse(bh); ++ spin_unlock(&jh->b_state_lock); ++ jbd2_journal_put_journal_head(jh); + if (drop_reserve) { + /* no need to reserve log space for this block -bzzz */ + handle->h_buffer_credits++; + } + return err; +- +-not_jbd: +- jbd_unlock_bh_state(bh); +- __bforget(bh); +- goto drop; + } + + /** +@@ -1874,7 +1872,7 @@ int jbd2_journal_stop(handle_t *handle) + * + * j_list_lock is held. + * +- * jbd_lock_bh_state(jh2bh(jh)) is held. ++ * jh->b_state_lock is held. + */ + + static inline void +@@ -1898,7 +1896,7 @@ static inline void + * + * Called with j_list_lock held, and the journal may not be locked. + * +- * jbd_lock_bh_state(jh2bh(jh)) is held. ++ * jh->b_state_lock is held. + */ + + static inline void +@@ -1930,7 +1928,7 @@ static void __jbd2_journal_temp_unlink_b + transaction_t *transaction; + struct buffer_head *bh = jh2bh(jh); + +- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); ++ lockdep_assert_held(&jh->b_state_lock); + transaction = jh->b_transaction; + if (transaction) + assert_spin_locked(&transaction->t_journal->j_list_lock); +@@ -1984,11 +1982,11 @@ void jbd2_journal_unfile_buffer(journal_ + + /* Get reference so that buffer cannot be freed before we unlock it */ + get_bh(bh); +- jbd_lock_bh_state(bh); ++ spin_lock(&jh->b_state_lock); + spin_lock(&journal->j_list_lock); + __jbd2_journal_unfile_buffer(jh); + spin_unlock(&journal->j_list_lock); +- jbd_unlock_bh_state(bh); ++ spin_unlock(&jh->b_state_lock); + jbd2_journal_put_journal_head(jh); + __brelse(bh); + } +@@ -1996,7 +1994,7 @@ void jbd2_journal_unfile_buffer(journal_ + /* + * Called from jbd2_journal_try_to_free_buffers(). + * +- * Called under jbd_lock_bh_state(bh) ++ * Called under jh->b_state_lock + */ + static void + __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) +@@ -2083,10 +2081,10 @@ int jbd2_journal_try_to_free_buffers(jou + if (!jh) + continue; + +- jbd_lock_bh_state(bh); ++ spin_lock(&jh->b_state_lock); + __journal_try_to_free_buffer(journal, bh); ++ spin_unlock(&jh->b_state_lock); + jbd2_journal_put_journal_head(jh); +- jbd_unlock_bh_state(bh); + if (buffer_jbd(bh)) + goto busy; + } while ((bh = bh->b_this_page) != head); +@@ -2107,7 +2105,7 @@ int jbd2_journal_try_to_free_buffers(jou + * + * Called under j_list_lock. + * +- * Called under jbd_lock_bh_state(bh). ++ * Called under jh->b_state_lock. + */ + static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) + { +@@ -2201,7 +2199,7 @@ static int journal_unmap_buffer(journal_ + + /* OK, we have data buffer in journaled mode */ + write_lock(&journal->j_state_lock); +- jbd_lock_bh_state(bh); ++ spin_lock(&jh->b_state_lock); + spin_lock(&journal->j_list_lock); + + /* +@@ -2282,10 +2280,10 @@ static int journal_unmap_buffer(journal_ + * for commit and try again. + */ + if (partial_page) { +- jbd2_journal_put_journal_head(jh); + spin_unlock(&journal->j_list_lock); +- jbd_unlock_bh_state(bh); ++ spin_unlock(&jh->b_state_lock); + write_unlock(&journal->j_state_lock); ++ jbd2_journal_put_journal_head(jh); + return -EBUSY; + } + /* +@@ -2297,10 +2295,10 @@ static int journal_unmap_buffer(journal_ + set_buffer_freed(bh); + if (journal->j_running_transaction && buffer_jbddirty(bh)) + jh->b_next_transaction = journal->j_running_transaction; +- jbd2_journal_put_journal_head(jh); + spin_unlock(&journal->j_list_lock); +- jbd_unlock_bh_state(bh); ++ spin_unlock(&jh->b_state_lock); + write_unlock(&journal->j_state_lock); ++ jbd2_journal_put_journal_head(jh); + return 0; + } else { + /* Good, the buffer belongs to the running transaction. +@@ -2324,10 +2322,10 @@ static int journal_unmap_buffer(journal_ + * here. + */ + jh->b_modified = 0; +- jbd2_journal_put_journal_head(jh); + spin_unlock(&journal->j_list_lock); +- jbd_unlock_bh_state(bh); ++ spin_unlock(&jh->b_state_lock); + write_unlock(&journal->j_state_lock); ++ jbd2_journal_put_journal_head(jh); + zap_buffer_unlocked: + clear_buffer_dirty(bh); + J_ASSERT_BH(bh, !buffer_jbddirty(bh)); +@@ -2414,7 +2412,7 @@ void __jbd2_journal_file_buffer(struct j + int was_dirty = 0; + struct buffer_head *bh = jh2bh(jh); + +- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); ++ lockdep_assert_held(&jh->b_state_lock); + assert_spin_locked(&transaction->t_journal->j_list_lock); + + J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); +@@ -2476,11 +2474,11 @@ void __jbd2_journal_file_buffer(struct j + void jbd2_journal_file_buffer(struct journal_head *jh, + transaction_t *transaction, int jlist) + { +- jbd_lock_bh_state(jh2bh(jh)); ++ spin_lock(&jh->b_state_lock); + spin_lock(&transaction->t_journal->j_list_lock); + __jbd2_journal_file_buffer(jh, transaction, jlist); + spin_unlock(&transaction->t_journal->j_list_lock); +- jbd_unlock_bh_state(jh2bh(jh)); ++ spin_unlock(&jh->b_state_lock); + } + + /* +@@ -2490,7 +2488,7 @@ void jbd2_journal_file_buffer(struct jou + * buffer on that transaction's metadata list. + * + * Called under j_list_lock +- * Called under jbd_lock_bh_state(jh2bh(jh)) ++ * Called under jh->b_state_lock + * + * When this function returns true, there's no next transaction to refile to + * and the caller has to drop jh reference through +@@ -2501,7 +2499,7 @@ bool __jbd2_journal_refile_buffer(struct + int was_dirty, jlist; + struct buffer_head *bh = jh2bh(jh); + +- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); ++ lockdep_assert_held(&jh->b_state_lock); + if (jh->b_transaction) + assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock); + +@@ -2547,17 +2545,13 @@ bool __jbd2_journal_refile_buffer(struct + */ + void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) + { +- struct buffer_head *bh = jh2bh(jh); + bool drop; + +- /* Get reference so that buffer cannot be freed before we unlock it */ +- get_bh(bh); +- jbd_lock_bh_state(bh); ++ spin_lock(&jh->b_state_lock); + spin_lock(&journal->j_list_lock); + drop = __jbd2_journal_refile_buffer(jh); +- jbd_unlock_bh_state(bh); ++ spin_unlock(&jh->b_state_lock); + spin_unlock(&journal->j_list_lock); +- __brelse(bh); + if (drop) + jbd2_journal_put_journal_head(jh); + } +--- a/fs/ocfs2/suballoc.c ++++ b/fs/ocfs2/suballoc.c +@@ -1252,6 +1252,7 @@ static int ocfs2_test_bg_bit_allocatable + int nr) + { + struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; ++ struct journal_head *jh; + int ret; + + if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) +@@ -1260,13 +1261,14 @@ static int ocfs2_test_bg_bit_allocatable + if (!buffer_jbd(bg_bh)) + return 1; + +- jbd_lock_bh_state(bg_bh); +- bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data; ++ jh = bh2jh(bg_bh); ++ spin_lock(&jh->b_state_lock); ++ bg = (struct ocfs2_group_desc *) jh->b_committed_data; + if (bg) + ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); + else + ret = 1; +- jbd_unlock_bh_state(bg_bh); ++ spin_unlock(&jh->b_state_lock); + + return ret; + } +@@ -2387,6 +2389,7 @@ static int ocfs2_block_group_clear_bits( + int status; + unsigned int tmp; + struct ocfs2_group_desc *undo_bg = NULL; ++ struct journal_head *jh; + + /* The caller got this descriptor from + * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ +@@ -2405,10 +2408,10 @@ static int ocfs2_block_group_clear_bits( + goto bail; + } + ++ jh = bh2jh(group_bh); + if (undo_fn) { +- jbd_lock_bh_state(group_bh); +- undo_bg = (struct ocfs2_group_desc *) +- bh2jh(group_bh)->b_committed_data; ++ spin_lock(&jh->b_state_lock); ++ undo_bg = (struct ocfs2_group_desc *) jh->b_committed_data; + BUG_ON(!undo_bg); + } + +@@ -2423,7 +2426,7 @@ static int ocfs2_block_group_clear_bits( + le16_add_cpu(&bg->bg_free_bits_count, num_bits); + if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { + if (undo_fn) +- jbd_unlock_bh_state(group_bh); ++ spin_unlock(&jh->b_state_lock); + return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n", + (unsigned long long)le64_to_cpu(bg->bg_blkno), + le16_to_cpu(bg->bg_bits), +@@ -2432,7 +2435,7 @@ static int ocfs2_block_group_clear_bits( + } + + if (undo_fn) +- jbd_unlock_bh_state(group_bh); ++ spin_unlock(&jh->b_state_lock); + + ocfs2_journal_dirty(handle, group_bh); + bail: +--- a/include/linux/jbd2.h ++++ b/include/linux/jbd2.h +@@ -313,7 +313,6 @@ enum jbd_state_bits { + BH_Revoked, /* Has been revoked from the log */ + BH_RevokeValid, /* Revoked flag is valid */ + BH_JBDDirty, /* Is dirty but journaled */ +- BH_State, /* Pins most journal_head state */ + BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ + BH_Shadow, /* IO on shadow buffer is running */ + BH_Verified, /* Metadata block has been verified ok */ +@@ -342,21 +341,6 @@ static inline struct journal_head *bh2jh + return bh->b_private; + } + +-static inline void jbd_lock_bh_state(struct buffer_head *bh) +-{ +- bit_spin_lock(BH_State, &bh->b_state); +-} +- +-static inline int jbd_is_locked_bh_state(struct buffer_head *bh) +-{ +- return bit_spin_is_locked(BH_State, &bh->b_state); +-} +- +-static inline void jbd_unlock_bh_state(struct buffer_head *bh) +-{ +- bit_spin_unlock(BH_State, &bh->b_state); +-} +- + static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) + { + bit_spin_lock(BH_JournalHead, &bh->b_state); +@@ -551,9 +535,9 @@ struct transaction_chp_stats_s { + * ->jbd_lock_bh_journal_head() (This is "innermost") + * + * j_state_lock +- * ->jbd_lock_bh_state() ++ * ->b_state_lock + * +- * jbd_lock_bh_state() ++ * b_state_lock + * ->j_list_lock + * + * j_state_lock +--- a/include/linux/journal-head.h ++++ b/include/linux/journal-head.h +@@ -11,6 +11,8 @@ + #ifndef JOURNAL_HEAD_H_INCLUDED + #define JOURNAL_HEAD_H_INCLUDED + ++#include ++ + typedef unsigned int tid_t; /* Unique transaction ID */ + typedef struct transaction_s transaction_t; /* Compound transaction type */ + +@@ -24,13 +26,18 @@ struct journal_head { + struct buffer_head *b_bh; + + /* ++ * Protect the buffer head state ++ */ ++ spinlock_t b_state_lock; ++ ++ /* + * Reference count - see description in journal.c + * [jbd_lock_bh_journal_head()] + */ + int b_jcount; + + /* +- * Journalling list for this buffer [jbd_lock_bh_state()] ++ * Journalling list for this buffer [b_state_lock] + * NOTE: We *cannot* combine this with b_modified into a bitfield + * as gcc would then (which the C standard allows but which is + * very unuseful) make 64-bit accesses to the bitfield and clobber +@@ -41,20 +48,20 @@ struct journal_head { + /* + * This flag signals the buffer has been modified by + * the currently running transaction +- * [jbd_lock_bh_state()] ++ * [b_state_lock] + */ + unsigned b_modified; + + /* + * Copy of the buffer data frozen for writing to the log. +- * [jbd_lock_bh_state()] ++ * [b_state_lock] + */ + char *b_frozen_data; + + /* + * Pointer to a saved copy of the buffer containing no uncommitted + * deallocation references, so that allocations can avoid overwriting +- * uncommitted deletes. [jbd_lock_bh_state()] ++ * uncommitted deletes. [b_state_lock] + */ + char *b_committed_data; + +@@ -63,7 +70,7 @@ struct journal_head { + * metadata: either the running transaction or the committing + * transaction (if there is one). Only applies to buffers on a + * transaction's data or metadata journaling list. +- * [j_list_lock] [jbd_lock_bh_state()] ++ * [j_list_lock] [b_state_lock] + * Either of these locks is enough for reading, both are needed for + * changes. + */ +@@ -73,13 +80,13 @@ struct journal_head { + * Pointer to the running compound transaction which is currently + * modifying the buffer's metadata, if there was already a transaction + * committing it when the new transaction touched it. +- * [t_list_lock] [jbd_lock_bh_state()] ++ * [t_list_lock] [b_state_lock] + */ + transaction_t *b_next_transaction; + + /* + * Doubly-linked list of buffers on a transaction's data, metadata or +- * forget queue. [t_list_lock] [jbd_lock_bh_state()] ++ * forget queue. [t_list_lock] [b_state_lock] + */ + struct journal_head *b_tnext, *b_tprev; + diff --git a/patches/0007-jbd2-Free-journal-head-outside-of-locked-region.patch b/patches/0007-jbd2-Free-journal-head-outside-of-locked-region.patch new file mode 100644 index 000000000000..9b06a971884d --- /dev/null +++ b/patches/0007-jbd2-Free-journal-head-outside-of-locked-region.patch @@ -0,0 +1,88 @@ +From: Thomas Gleixner +Date: Fri, 9 Aug 2019 14:42:33 +0200 +Subject: [PATCH 7/7] jbd2: Free journal head outside of locked region + +On PREEMPT_RT bit-spinlocks have the same semantics as on PREEMPT_RT=n, +i.e. they disable preemption. That means functions which are not safe to be +called in preempt disabled context on RT trigger a might_sleep() assert. + +The journal head bit spinlock is mostly held for short code sequences with +trivial RT safe functionality, except for one place: + +jbd2_journal_put_journal_head() invokes __journal_remove_journal_head() +with the journal head bit spinlock held. __journal_remove_journal_head() +invokes kmem_cache_free() which must not be called with preemption disabled +on RT. + +Jan suggested to rework the removal function so the actual free happens +outside the bit-spinlocked region. + +Split it into two parts: + + - Do the sanity checks and the buffer head detach under the lock + + - Do the actual free after dropping the lock + +There is error case handling in the free part which needs to dereference +the b_size field of the now detached buffer head. Due to paranoia (caused +by ignorance) the size is retrieved in the detach function and handed into +the free function. Might be over-engineered, but better safe than sorry. + +This makes the journal head bit-spinlock usage RT compliant and also avoids +nested locking which is not covered by lockdep. + +Suggested-by: Jan Kara +Signed-off-by: Thomas Gleixner +Cc: linux-ext4@vger.kernel.org +Cc: "Theodore Ts'o" +Cc: Jan Kara +Signed-off-by: Jan Kara +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/jbd2/journal.c | 20 ++++++++++++++------ + 1 file changed, 14 insertions(+), 6 deletions(-) + +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -2531,17 +2531,23 @@ static void __journal_remove_journal_hea + J_ASSERT_BH(bh, buffer_jbd(bh)); + J_ASSERT_BH(bh, jh2bh(jh) == bh); + BUFFER_TRACE(bh, "remove journal_head"); ++ ++ /* Unlink before dropping the lock */ ++ bh->b_private = NULL; ++ jh->b_bh = NULL; /* debug, really */ ++ clear_buffer_jbd(bh); ++} ++ ++static void journal_release_journal_head(struct journal_head *jh, size_t b_size) ++{ + if (jh->b_frozen_data) { + printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); +- jbd2_free(jh->b_frozen_data, bh->b_size); ++ jbd2_free(jh->b_frozen_data, b_size); + } + if (jh->b_committed_data) { + printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); +- jbd2_free(jh->b_committed_data, bh->b_size); ++ jbd2_free(jh->b_committed_data, b_size); + } +- bh->b_private = NULL; +- jh->b_bh = NULL; /* debug, really */ +- clear_buffer_jbd(bh); + journal_free_journal_head(jh); + } + +@@ -2559,9 +2565,11 @@ void jbd2_journal_put_journal_head(struc + if (!jh->b_jcount) { + __journal_remove_journal_head(bh); + jbd_unlock_bh_journal_head(bh); ++ journal_release_journal_head(jh, bh->b_size); + __brelse(bh); +- } else ++ } else { + jbd_unlock_bh_journal_head(bh); ++ } + } + + /* diff --git a/patches/0008-printk-add-ring-buffer-and-kthread.patch b/patches/0008-printk-add-ring-buffer-and-kthread.patch index 51624ec6144e..4bd53be6501a 100644 --- a/patches/0008-printk-add-ring-buffer-and-kthread.patch +++ b/patches/0008-printk-add-ring-buffer-and-kthread.patch @@ -34,7 +34,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include #include #include -@@ -407,7 +409,12 @@ DEFINE_RAW_SPINLOCK(logbuf_lock); +@@ -417,7 +419,12 @@ DEFINE_RAW_SPINLOCK(logbuf_lock); printk_safe_exit_irqrestore(flags); \ } while (0) @@ -47,7 +47,7 @@ Signed-off-by: Sebastian Andrzej Siewior DECLARE_WAIT_QUEUE_HEAD(log_wait); /* the next printk record to read by syslog(READ) or /proc/kmsg */ static u64 syslog_seq; -@@ -770,6 +777,10 @@ static ssize_t msg_print_ext_body(char * +@@ -780,6 +787,10 @@ static ssize_t msg_print_ext_body(char * return p - buf; } @@ -58,7 +58,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* /dev/kmsg - userspace message inject/listen interface */ struct devkmsg_user { u64 seq; -@@ -1610,6 +1621,34 @@ SYSCALL_DEFINE3(syslog, int, type, char +@@ -1620,6 +1631,34 @@ SYSCALL_DEFINE3(syslog, int, type, char return do_syslog(type, buf, len, SYSLOG_FROM_READER); } @@ -93,7 +93,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Special console_lock variants that help to reduce the risk of soft-lockups. * They allow to pass console_lock to another printk() call using a busy wait. -@@ -2964,6 +3003,72 @@ void wake_up_klogd(void) +@@ -2974,6 +3013,72 @@ void wake_up_klogd(void) preempt_enable(); } diff --git a/patches/0009-printk-remove-exclusive-console-hack.patch b/patches/0009-printk-remove-exclusive-console-hack.patch index c3ceb4961da2..bb7249de8a4c 100644 --- a/patches/0009-printk-remove-exclusive-console-hack.patch +++ b/patches/0009-printk-remove-exclusive-console-hack.patch @@ -20,7 +20,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -259,11 +259,6 @@ static void __up_console_sem(unsigned lo +@@ -269,11 +269,6 @@ static void __up_console_sem(unsigned lo static int console_locked, console_suspended; /* @@ -32,7 +32,7 @@ Signed-off-by: Sebastian Andrzej Siewior * Array of consoles built from command line options (console=) */ -@@ -433,7 +428,6 @@ static u32 log_next_idx; +@@ -443,7 +438,6 @@ static u32 log_next_idx; /* the next printk record to write to the console */ static u64 console_seq; static u32 console_idx; @@ -40,7 +40,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* the next printk record to read after the last 'clear' command */ static u64 clear_seq; -@@ -1805,8 +1799,6 @@ static void call_console_drivers(const c +@@ -1815,8 +1809,6 @@ static void call_console_drivers(const c return; for_each_console(con) { @@ -49,7 +49,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!(con->flags & CON_ENABLED)) continue; if (!con->write) -@@ -2099,7 +2091,6 @@ static u64 syslog_seq; +@@ -2109,7 +2101,6 @@ static u64 syslog_seq; static u32 syslog_idx; static u64 console_seq; static u32 console_idx; @@ -57,7 +57,7 @@ Signed-off-by: Sebastian Andrzej Siewior static u64 log_first_seq; static u32 log_first_idx; static u64 log_next_seq; -@@ -2468,12 +2459,6 @@ void console_unlock(void) +@@ -2478,12 +2469,6 @@ void console_unlock(void) goto skip; } @@ -70,7 +70,7 @@ Signed-off-by: Sebastian Andrzej Siewior len += msg_print_text(msg, console_msg_format & MSG_FORMAT_SYSLOG, printk_time, text + len, sizeof(text) - len); -@@ -2801,17 +2786,6 @@ void register_console(struct console *ne +@@ -2811,17 +2796,6 @@ void register_console(struct console *ne logbuf_lock_irqsave(flags); console_seq = syslog_seq; console_idx = syslog_idx; @@ -88,7 +88,7 @@ Signed-off-by: Sebastian Andrzej Siewior logbuf_unlock_irqrestore(flags); } console_unlock(); -@@ -2823,6 +2797,10 @@ void register_console(struct console *ne +@@ -2833,6 +2807,10 @@ void register_console(struct console *ne * boot consoles, real consoles, etc - this is to ensure that end * users know there might be something in the kernel's log buffer that * went to the bootconsole (that they do not see on the real console) diff --git a/patches/0010-printk-redirect-emit-store-to-new-ringbuffer.patch b/patches/0010-printk-redirect-emit-store-to-new-ringbuffer.patch index 0da7b1ed74e0..0355541c4935 100644 --- a/patches/0010-printk-redirect-emit-store-to-new-ringbuffer.patch +++ b/patches/0010-printk-redirect-emit-store-to-new-ringbuffer.patch @@ -25,7 +25,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -507,90 +507,6 @@ static u32 log_next(u32 idx) +@@ -517,90 +517,6 @@ static u32 log_next(u32 idx) return idx + msg->len; } @@ -116,7 +116,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* insert record into the buffer, discard old ones, update heads */ static int log_store(u32 caller_id, int facility, int level, enum log_flags flags, u64 ts_nsec, -@@ -598,57 +514,39 @@ static int log_store(u32 caller_id, int +@@ -608,57 +524,39 @@ static int log_store(u32 caller_id, int const char *text, u16 text_len) { struct printk_log *msg; @@ -187,7 +187,7 @@ Signed-off-by: Sebastian Andrzej Siewior return msg->text_len; } -@@ -1719,70 +1617,6 @@ static int console_lock_spinning_disable +@@ -1729,70 +1627,6 @@ static int console_lock_spinning_disable return 1; } @@ -258,7 +258,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Call the console drivers, asking them to write out * log_buf[start] to log_buf[end - 1]. -@@ -1803,7 +1637,7 @@ static void call_console_drivers(const c +@@ -1813,7 +1647,7 @@ static void call_console_drivers(const c continue; if (!con->write) continue; @@ -267,7 +267,7 @@ Signed-off-by: Sebastian Andrzej Siewior !(con->flags & CON_ANYTIME)) continue; if (con->flags & CON_EXTENDED) -@@ -1833,6 +1667,8 @@ static inline u32 printk_caller_id(void) +@@ -1843,6 +1677,8 @@ static inline u32 printk_caller_id(void) 0x80000000 + raw_smp_processor_id(); } @@ -276,7 +276,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Continuation lines are buffered, and not committed to the record buffer * until the line is complete, or a race forces it. The line fragments -@@ -1888,56 +1724,45 @@ static bool cont_add(u32 caller_id, int +@@ -1898,56 +1734,45 @@ static bool cont_add(u32 caller_id, int return true; } @@ -364,7 +364,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (text_len && text[text_len-1] == '\n') { text_len--; lflags |= LOG_NEWLINE; -@@ -1968,58 +1793,10 @@ int vprintk_store(int facility, int leve +@@ -1978,58 +1803,10 @@ int vprintk_store(int facility, int leve if (dict) lflags |= LOG_NEWLINE; @@ -426,7 +426,7 @@ Signed-off-by: Sebastian Andrzej Siewior return printed_len; } EXPORT_SYMBOL(vprintk_emit); -@@ -2484,7 +2261,7 @@ void console_unlock(void) +@@ -2494,7 +2271,7 @@ void console_unlock(void) console_lock_spinning_enable(); stop_critical_timings(); /* don't trace print latency */ diff --git a/patches/0011-printk_safe-remove-printk-safe-code.patch b/patches/0011-printk_safe-remove-printk-safe-code.patch index baedc972b923..504e3a3a5344 100644 --- a/patches/0011-printk_safe-remove-printk-safe-code.patch +++ b/patches/0011-printk_safe-remove-printk-safe-code.patch @@ -120,7 +120,7 @@ Signed-off-by: Sebastian Andrzej Siewior extern int kptr_restrict; --- a/init/main.c +++ b/init/main.c -@@ -669,7 +669,6 @@ asmlinkage __visible void __init start_k +@@ -693,7 +693,6 @@ asmlinkage __visible void __init start_k boot_init_stack_canary(); time_init(); @@ -140,7 +140,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* --- a/kernel/panic.c +++ b/kernel/panic.c -@@ -228,7 +228,6 @@ void panic(const char *fmt, ...) +@@ -237,7 +237,6 @@ void panic(const char *fmt, ...) * Bypass the panic_cpu check and call __crash_kexec directly. */ if (!_crash_kexec_post_notifiers) { @@ -148,7 +148,7 @@ Signed-off-by: Sebastian Andrzej Siewior __crash_kexec(NULL); /* -@@ -252,8 +251,6 @@ void panic(const char *fmt, ...) +@@ -261,8 +260,6 @@ void panic(const char *fmt, ...) */ atomic_notifier_call_chain(&panic_notifier_list, 0, buf); @@ -214,7 +214,7 @@ Signed-off-by: Sebastian Andrzej Siewior -#endif /* CONFIG_PRINTK */ --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1726,13 +1726,6 @@ static bool cont_add(u32 caller_id, int +@@ -1736,13 +1736,6 @@ static bool cont_add(u32 caller_id, int } #endif /* 0 */ @@ -228,7 +228,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* ring buffer used as memory allocator for temporary sprint buffers */ DECLARE_STATIC_PRINTKRB(sprint_rb, ilog2(PRINTK_RECORD_MAX + sizeof(struct prb_entry) + -@@ -1801,6 +1794,11 @@ asmlinkage int vprintk_emit(int facility +@@ -1811,6 +1804,11 @@ asmlinkage int vprintk_emit(int facility } EXPORT_SYMBOL(vprintk_emit); @@ -240,7 +240,7 @@ Signed-off-by: Sebastian Andrzej Siewior asmlinkage int vprintk(const char *fmt, va_list args) { return vprintk_func(fmt, args); -@@ -3201,5 +3199,4 @@ void kmsg_dump_rewind(struct kmsg_dumper +@@ -3211,5 +3209,4 @@ void kmsg_dump_rewind(struct kmsg_dumper logbuf_unlock_irqrestore(flags); } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); @@ -666,7 +666,7 @@ Signed-off-by: Sebastian Andrzej Siewior -} --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -8874,7 +8874,6 @@ void ftrace_dump(enum ftrace_dump_mode o +@@ -8908,7 +8908,6 @@ void ftrace_dump(enum ftrace_dump_mode o tracing_off(); local_irq_save(flags); @@ -674,7 +674,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Simulate the iterator */ trace_init_global_iter(&iter); -@@ -8951,7 +8950,6 @@ void ftrace_dump(enum ftrace_dump_mode o +@@ -8985,7 +8984,6 @@ void ftrace_dump(enum ftrace_dump_mode o atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); } atomic_dec(&dump_running); diff --git a/patches/0012-printk-minimize-console-locking-implementation.patch b/patches/0012-printk-minimize-console-locking-implementation.patch index e05e800d7a12..c509bd4e5af5 100644 --- a/patches/0012-printk-minimize-console-locking-implementation.patch +++ b/patches/0012-printk-minimize-console-locking-implementation.patch @@ -16,7 +16,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -217,19 +217,7 @@ static int nr_ext_console_drivers; +@@ -227,19 +227,7 @@ static int nr_ext_console_drivers; static int __down_trylock_console_sem(unsigned long ip) { @@ -37,7 +37,7 @@ Signed-off-by: Sebastian Andrzej Siewior return 1; mutex_acquire(&console_lock_dep_map, 0, 1, ip); return 0; -@@ -238,13 +226,9 @@ static int __down_trylock_console_sem(un +@@ -248,13 +236,9 @@ static int __down_trylock_console_sem(un static void __up_console_sem(unsigned long ip) { @@ -51,7 +51,7 @@ Signed-off-by: Sebastian Andrzej Siewior } #define up_console_sem() __up_console_sem(_RET_IP_) -@@ -1542,82 +1526,6 @@ static void format_text(struct printk_lo +@@ -1552,82 +1536,6 @@ static void format_text(struct printk_lo } /* @@ -134,7 +134,7 @@ Signed-off-by: Sebastian Andrzej Siewior * Call the console drivers, asking them to write out * log_buf[start] to log_buf[end - 1]. * The console_lock must be held. -@@ -1879,8 +1787,6 @@ static ssize_t msg_print_ext_header(char +@@ -1889,8 +1797,6 @@ static ssize_t msg_print_ext_header(char static ssize_t msg_print_ext_body(char *buf, size_t size, char *dict, size_t dict_len, char *text, size_t text_len) { return 0; } @@ -143,7 +143,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void call_console_drivers(const char *ext_text, size_t ext_len, const char *text, size_t len) {} static size_t msg_print_text(const struct printk_log *msg, bool syslog, -@@ -2115,35 +2021,6 @@ int is_console_locked(void) +@@ -2125,35 +2031,6 @@ int is_console_locked(void) { return console_locked; } @@ -179,7 +179,7 @@ Signed-off-by: Sebastian Andrzej Siewior /** * console_unlock - unlock the console system -@@ -2151,147 +2028,17 @@ static inline int can_use_console(void) +@@ -2161,147 +2038,17 @@ static inline int can_use_console(void) * Releases the console_lock which the caller holds on the console system * and the console driver list. * diff --git a/patches/0013-printk-track-seq-per-console.patch b/patches/0013-printk-track-seq-per-console.patch index 020a572cc082..41a6f3d94b61 100644 --- a/patches/0013-printk-track-seq-per-console.patch +++ b/patches/0013-printk-track-seq-per-console.patch @@ -24,7 +24,7 @@ Signed-off-by: Sebastian Andrzej Siewior }; --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1497,6 +1497,16 @@ SYSCALL_DEFINE3(syslog, int, type, char +@@ -1507,6 +1507,16 @@ SYSCALL_DEFINE3(syslog, int, type, char return do_syslog(type, buf, len, SYSLOG_FROM_READER); } @@ -41,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void format_text(struct printk_log *msg, u64 seq, char *ext_text, size_t *ext_len, char *text, size_t *len, bool time) -@@ -1530,7 +1540,7 @@ static void format_text(struct printk_lo +@@ -1540,7 +1550,7 @@ static void format_text(struct printk_lo * log_buf[start] to log_buf[end - 1]. * The console_lock must be held. */ @@ -50,7 +50,7 @@ Signed-off-by: Sebastian Andrzej Siewior const char *text, size_t len) { struct console *con; -@@ -1548,6 +1558,19 @@ static void call_console_drivers(const c +@@ -1558,6 +1568,19 @@ static void call_console_drivers(const c if (!cpu_online(raw_smp_processor_id()) && !(con->flags & CON_ANYTIME)) continue; @@ -70,7 +70,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (con->flags & CON_EXTENDED) con->write(con, ext_text, ext_len); else -@@ -1787,7 +1810,7 @@ static ssize_t msg_print_ext_header(char +@@ -1797,7 +1820,7 @@ static ssize_t msg_print_ext_header(char static ssize_t msg_print_ext_body(char *buf, size_t size, char *dict, size_t dict_len, char *text, size_t text_len) { return 0; } @@ -79,7 +79,7 @@ Signed-off-by: Sebastian Andrzej Siewior const char *text, size_t len) {} static size_t msg_print_text(const struct printk_log *msg, bool syslog, bool time, char *buf, size_t size) { return 0; } -@@ -2540,8 +2563,9 @@ static int printk_kthread_func(void *dat +@@ -2550,8 +2573,9 @@ static int printk_kthread_func(void *dat &len, printk_time); console_lock(); diff --git a/patches/0014-printk-do-boot_delay_msec-inside-printk_delay.patch b/patches/0014-printk-do-boot_delay_msec-inside-printk_delay.patch index 37b41e738682..0a12b5135740 100644 --- a/patches/0014-printk-do-boot_delay_msec-inside-printk_delay.patch +++ b/patches/0014-printk-do-boot_delay_msec-inside-printk_delay.patch @@ -13,7 +13,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1497,6 +1497,21 @@ SYSCALL_DEFINE3(syslog, int, type, char +@@ -1507,6 +1507,21 @@ SYSCALL_DEFINE3(syslog, int, type, char return do_syslog(type, buf, len, SYSLOG_FROM_READER); } @@ -35,7 +35,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void print_console_dropped(struct console *con, u64 count) { char text[64]; -@@ -1578,20 +1593,6 @@ static void call_console_drivers(u64 seq +@@ -1588,20 +1603,6 @@ static void call_console_drivers(u64 seq } } @@ -56,7 +56,7 @@ Signed-off-by: Sebastian Andrzej Siewior static inline u32 printk_caller_id(void) { return in_task() ? task_pid_nr(current) : -@@ -2565,10 +2566,8 @@ static int printk_kthread_func(void *dat +@@ -2575,10 +2576,8 @@ static int printk_kthread_func(void *dat console_lock(); call_console_drivers(master_seq, ext_text, ext_len, text, len); diff --git a/patches/0015-printk-print-history-for-new-consoles.patch b/patches/0015-printk-print-history-for-new-consoles.patch index e55209f5b064..1b222c55b836 100644 --- a/patches/0015-printk-print-history-for-new-consoles.patch +++ b/patches/0015-printk-print-history-for-new-consoles.patch @@ -27,7 +27,7 @@ Signed-off-by: Sebastian Andrzej Siewior }; --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1550,6 +1550,77 @@ static void format_text(struct printk_lo +@@ -1560,6 +1560,77 @@ static void format_text(struct printk_lo } } @@ -105,7 +105,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Call the console drivers, asking them to write out * log_buf[start] to log_buf[end - 1]. -@@ -1568,6 +1639,10 @@ static void call_console_drivers(u64 seq +@@ -1578,6 +1649,10 @@ static void call_console_drivers(u64 seq for_each_console(con) { if (!(con->flags & CON_ENABLED)) continue; diff --git a/patches/0016-printk-implement-CON_PRINTBUFFER.patch b/patches/0016-printk-implement-CON_PRINTBUFFER.patch index 519e75d7d017..ed15f6624856 100644 --- a/patches/0016-printk-implement-CON_PRINTBUFFER.patch +++ b/patches/0016-printk-implement-CON_PRINTBUFFER.patch @@ -13,7 +13,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -409,10 +409,6 @@ static u32 log_first_idx; +@@ -419,10 +419,6 @@ static u32 log_first_idx; static u64 log_next_seq; static u32 log_next_idx; @@ -24,7 +24,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* the next printk record to read after the last 'clear' command */ static u64 clear_seq; static u32 clear_idx; -@@ -1640,8 +1636,12 @@ static void call_console_drivers(u64 seq +@@ -1650,8 +1646,12 @@ static void call_console_drivers(u64 seq if (!(con->flags & CON_ENABLED)) continue; if (!con->wrote_history) { @@ -39,7 +39,7 @@ Signed-off-by: Sebastian Andrzej Siewior } if (!con->write) continue; -@@ -1871,8 +1871,6 @@ EXPORT_SYMBOL(printk); +@@ -1881,8 +1881,6 @@ EXPORT_SYMBOL(printk); static u64 syslog_seq; static u32 syslog_idx; @@ -48,7 +48,7 @@ Signed-off-by: Sebastian Andrzej Siewior static u64 log_first_seq; static u32 log_first_idx; static u64 log_next_seq; -@@ -2196,15 +2194,6 @@ void console_flush_on_panic(enum con_flu +@@ -2206,15 +2204,6 @@ void console_flush_on_panic(enum con_flu */ console_trylock(); console_may_schedule = 0; @@ -64,7 +64,7 @@ Signed-off-by: Sebastian Andrzej Siewior console_unlock(); } -@@ -2283,7 +2272,6 @@ early_param("keep_bootcon", keep_bootcon +@@ -2293,7 +2282,6 @@ early_param("keep_bootcon", keep_bootcon void register_console(struct console *newcon) { int i; @@ -72,7 +72,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct console *bcon = NULL; struct console_cmdline *c; static bool has_preferred; -@@ -2399,16 +2387,6 @@ void register_console(struct console *ne +@@ -2409,16 +2397,6 @@ void register_console(struct console *ne if (newcon->flags & CON_EXTENDED) nr_ext_console_drivers++; diff --git a/patches/0017-printk-add-processor-number-to-output.patch b/patches/0017-printk-add-processor-number-to-output.patch index 405f17ad8b16..d46699c6c34a 100644 --- a/patches/0017-printk-add-processor-number-to-output.patch +++ b/patches/0017-printk-add-processor-number-to-output.patch @@ -14,7 +14,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -338,6 +338,7 @@ enum log_flags { +@@ -348,6 +348,7 @@ enum log_flags { struct printk_log { u64 ts_nsec; /* timestamp in nanoseconds */ @@ -22,7 +22,7 @@ Signed-off-by: Sebastian Andrzej Siewior u16 len; /* length of entire record */ u16 text_len; /* length of text buffer */ u16 dict_len; /* length of dictionary buffer */ -@@ -489,7 +490,7 @@ static u32 log_next(u32 idx) +@@ -499,7 +500,7 @@ static u32 log_next(u32 idx) /* insert record into the buffer, discard old ones, update heads */ static int log_store(u32 caller_id, int facility, int level, @@ -31,7 +31,7 @@ Signed-off-by: Sebastian Andrzej Siewior const char *dict, u16 dict_len, const char *text, u16 text_len) { -@@ -523,6 +524,7 @@ static int log_store(u32 caller_id, int +@@ -533,6 +534,7 @@ static int log_store(u32 caller_id, int #ifdef CONFIG_PRINTK_CALLER msg->caller_id = caller_id; #endif @@ -39,7 +39,7 @@ Signed-off-by: Sebastian Andrzej Siewior msg->len = size; /* insert message */ -@@ -596,9 +598,9 @@ static ssize_t msg_print_ext_header(char +@@ -606,9 +608,9 @@ static ssize_t msg_print_ext_header(char do_div(ts_usec, 1000); @@ -51,7 +51,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static ssize_t msg_print_ext_body(char *buf, size_t size, -@@ -1132,6 +1134,11 @@ static inline void boot_delay_msec(int l +@@ -1142,6 +1144,11 @@ static inline void boot_delay_msec(int l static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME); module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); @@ -63,7 +63,7 @@ Signed-off-by: Sebastian Andrzej Siewior static size_t print_syslog(unsigned int level, char *buf) { return sprintf(buf, "<%u>", level); -@@ -1175,6 +1182,7 @@ static size_t print_prefix(const struct +@@ -1185,6 +1192,7 @@ static size_t print_prefix(const struct buf[len++] = ' '; buf[len] = '\0'; } @@ -71,7 +71,7 @@ Signed-off-by: Sebastian Andrzej Siewior return len; } -@@ -1750,6 +1758,7 @@ asmlinkage int vprintk_emit(int facility +@@ -1760,6 +1768,7 @@ asmlinkage int vprintk_emit(int facility u64 ts_nsec; char *text; char *rbuf; @@ -79,7 +79,7 @@ Signed-off-by: Sebastian Andrzej Siewior ts_nsec = local_clock(); -@@ -1759,6 +1768,8 @@ asmlinkage int vprintk_emit(int facility +@@ -1769,6 +1778,8 @@ asmlinkage int vprintk_emit(int facility return printed_len; } @@ -88,7 +88,7 @@ Signed-off-by: Sebastian Andrzej Siewior text = rbuf; text_len = vscnprintf(text, PRINTK_SPRINT_MAX, fmt, args); -@@ -1793,7 +1804,7 @@ asmlinkage int vprintk_emit(int facility +@@ -1803,7 +1814,7 @@ asmlinkage int vprintk_emit(int facility if (dict) lflags |= LOG_NEWLINE; diff --git a/patches/0018-console-add-write_atomic-interface.patch b/patches/0018-console-add-write_atomic-interface.patch index f9ba2e42d31f..6de18c0d8d1a 100644 --- a/patches/0018-console-add-write_atomic-interface.patch +++ b/patches/0018-console-add-write_atomic-interface.patch @@ -46,7 +46,7 @@ Signed-off-by: Sebastian Andrzej Siewior #endif /* _LINUX_CONSOLE_H */ --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -3034,3 +3034,15 @@ void kmsg_dump_rewind(struct kmsg_dumper +@@ -3044,3 +3044,15 @@ void kmsg_dump_rewind(struct kmsg_dumper } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); #endif diff --git a/patches/0019-printk-introduce-emergency-messages.patch b/patches/0019-printk-introduce-emergency-messages.patch index 784381d7f5ff..21e12d3a7fec 100644 --- a/patches/0019-printk-introduce-emergency-messages.patch +++ b/patches/0019-printk-introduce-emergency-messages.patch @@ -81,7 +81,7 @@ Signed-off-by: Sebastian Andrzej Siewior }; EXPORT_SYMBOL_GPL(console_printk); -@@ -488,6 +490,9 @@ static u32 log_next(u32 idx) +@@ -498,6 +500,9 @@ static u32 log_next(u32 idx) return idx + msg->len; } @@ -91,7 +91,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* insert record into the buffer, discard old ones, update heads */ static int log_store(u32 caller_id, int facility, int level, enum log_flags flags, u64 ts_nsec, u16 cpu, -@@ -1631,7 +1636,7 @@ static void printk_write_history(struct +@@ -1641,7 +1646,7 @@ static void printk_write_history(struct * The console_lock must be held. */ static void call_console_drivers(u64 seq, const char *ext_text, size_t ext_len, @@ -100,7 +100,7 @@ Signed-off-by: Sebastian Andrzej Siewior { struct console *con; -@@ -1651,6 +1656,18 @@ static void call_console_drivers(u64 seq +@@ -1661,6 +1666,18 @@ static void call_console_drivers(u64 seq con->wrote_history = 1; con->printk_seq = seq - 1; } @@ -119,7 +119,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!con->write) continue; if (!cpu_online(raw_smp_processor_id()) && -@@ -1770,8 +1787,12 @@ asmlinkage int vprintk_emit(int facility +@@ -1780,8 +1797,12 @@ asmlinkage int vprintk_emit(int facility cpu = raw_smp_processor_id(); @@ -134,7 +134,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* strip and flag a trailing newline */ if (text_len && text[text_len-1] == '\n') { -@@ -1804,6 +1825,14 @@ asmlinkage int vprintk_emit(int facility +@@ -1814,6 +1835,14 @@ asmlinkage int vprintk_emit(int facility if (dict) lflags |= LOG_NEWLINE; @@ -149,7 +149,7 @@ Signed-off-by: Sebastian Andrzej Siewior printed_len = log_store(caller_id, facility, level, lflags, ts_nsec, cpu, dict, dictlen, text, text_len); -@@ -1896,7 +1925,7 @@ static ssize_t msg_print_ext_body(char * +@@ -1906,7 +1935,7 @@ static ssize_t msg_print_ext_body(char * char *dict, size_t dict_len, char *text, size_t text_len) { return 0; } static void call_console_drivers(u64 seq, const char *ext_text, size_t ext_len, @@ -158,7 +158,7 @@ Signed-off-by: Sebastian Andrzej Siewior static size_t msg_print_text(const struct printk_log *msg, bool syslog, bool time, char *buf, size_t size) { return 0; } static bool suppress_message_printing(int level) { return false; } -@@ -2629,7 +2658,7 @@ static int printk_kthread_func(void *dat +@@ -2639,7 +2668,7 @@ static int printk_kthread_func(void *dat console_lock(); call_console_drivers(master_seq, ext_text, @@ -167,7 +167,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (len > 0 || ext_len > 0) printk_delay(msg->level); console_unlock(); -@@ -3033,6 +3062,76 @@ void kmsg_dump_rewind(struct kmsg_dumper +@@ -3043,6 +3072,76 @@ void kmsg_dump_rewind(struct kmsg_dumper logbuf_unlock_irqrestore(flags); } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); diff --git a/patches/0020-serial-8250-implement-write_atomic.patch b/patches/0020-serial-8250-implement-write_atomic.patch index 12ce24e8595a..4445ea7cdec2 100644 --- a/patches/0020-serial-8250-implement-write_atomic.patch +++ b/patches/0020-serial-8250-implement-write_atomic.patch @@ -15,26 +15,61 @@ preempted write_atomic. Signed-off-by: John Ogness Signed-off-by: Sebastian Andrzej Siewior --- - drivers/tty/serial/8250/8250.h | 4 + drivers/tty/serial/8250/8250.h | 22 +++++ drivers/tty/serial/8250/8250_core.c | 19 +++- - drivers/tty/serial/8250/8250_dma.c | 5 - + drivers/tty/serial/8250/8250_dma.c | 4 drivers/tty/serial/8250/8250_port.c | 154 ++++++++++++++++++++++++++---------- include/linux/serial_8250.h | 5 + - 5 files changed, 139 insertions(+), 48 deletions(-) + 5 files changed, 157 insertions(+), 47 deletions(-) --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h -@@ -255,3 +255,7 @@ static inline int serial_index(struct ua - { - return port->minor - 64; - } -+ +@@ -96,6 +96,10 @@ struct serial8250_config { + #define SERIAL8250_SHARE_IRQS 0 + #endif + +void set_ier(struct uart_8250_port *up, unsigned char ier); +void clear_ier(struct uart_8250_port *up); +void restore_ier(struct uart_8250_port *up); ++ + #define SERIAL8250_PORT_FLAGS(_base, _irq, _flags) \ + { \ + .iobase = _base, \ +@@ -139,6 +143,15 @@ static inline bool serial8250_set_THRI(s + return true; + } + ++static inline bool serial8250_set_THRI_sier(struct uart_8250_port *up) ++{ ++ if (up->ier & UART_IER_THRI) ++ return false; ++ up->ier |= UART_IER_THRI; ++ set_ier(up, up->ier); ++ return true; ++} ++ + static inline bool serial8250_clear_THRI(struct uart_8250_port *up) + { + if (!(up->ier & UART_IER_THRI)) +@@ -148,6 +161,15 @@ static inline bool serial8250_clear_THRI + return true; + } + ++static inline bool serial8250_clear_THRI_sier(struct uart_8250_port *up) ++{ ++ if (!(up->ier & UART_IER_THRI)) ++ return false; ++ up->ier &= ~UART_IER_THRI; ++ set_ier(up, up->ier); ++ return true; ++} ++ + struct uart_8250_port *serial8250_get_port(int line); + + void serial8250_rpm_get(struct uart_8250_port *p); --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c -@@ -265,7 +265,7 @@ static void serial8250_timeout(struct ti +@@ -266,7 +266,7 @@ static void serial8250_timeout(struct ti static void serial8250_backup_timeout(struct timer_list *t) { struct uart_8250_port *up = from_timer(up, t, timer); @@ -43,7 +78,7 @@ Signed-off-by: Sebastian Andrzej Siewior unsigned long flags; spin_lock_irqsave(&up->port.lock, flags); -@@ -274,10 +274,8 @@ static void serial8250_backup_timeout(st +@@ -275,10 +275,8 @@ static void serial8250_backup_timeout(st * Must disable interrupts or else we risk racing with the interrupt * based handler. */ @@ -56,7 +91,7 @@ Signed-off-by: Sebastian Andrzej Siewior iir = serial_in(up, UART_IIR); -@@ -300,7 +298,7 @@ static void serial8250_backup_timeout(st +@@ -301,7 +299,7 @@ static void serial8250_backup_timeout(st serial8250_tx_chars(up); if (up->port.irq) @@ -65,7 +100,7 @@ Signed-off-by: Sebastian Andrzej Siewior spin_unlock_irqrestore(&up->port.lock, flags); -@@ -578,6 +576,14 @@ serial8250_register_ports(struct uart_dr +@@ -579,6 +577,14 @@ serial8250_register_ports(struct uart_dr #ifdef CONFIG_SERIAL_8250_CONSOLE @@ -80,7 +115,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void univ8250_console_write(struct console *co, const char *s, unsigned int count) { -@@ -663,6 +669,7 @@ static int univ8250_console_match(struct +@@ -664,6 +670,7 @@ static int univ8250_console_match(struct static struct console univ8250_console = { .name = "ttyS", @@ -90,28 +125,27 @@ Signed-off-by: Sebastian Andrzej Siewior .setup = univ8250_console_setup, --- a/drivers/tty/serial/8250/8250_dma.c +++ b/drivers/tty/serial/8250/8250_dma.c -@@ -36,7 +36,7 @@ static void __dma_tx_complete(void *para +@@ -35,7 +35,7 @@ static void __dma_tx_complete(void *para + ret = serial8250_tx_dma(p); - if (ret) { - p->ier |= UART_IER_THRI; -- serial_port_out(&p->port, UART_IER, p->ier); -+ set_ier(p, p->ier); - } + if (ret) +- serial8250_set_THRI(p); ++ serial8250_set_THRI_sier(p); spin_unlock_irqrestore(&p->port.lock, flags); -@@ -101,8 +101,7 @@ int serial8250_tx_dma(struct uart_8250_p + } +@@ -98,7 +98,7 @@ int serial8250_tx_dma(struct uart_8250_p + dma_async_issue_pending(dma->txchan); if (dma->tx_err) { dma->tx_err = 0; - if (p->ier & UART_IER_THRI) { -- p->ier &= ~UART_IER_THRI; -- serial_out(p, UART_IER, p->ier); -+ set_ier(p, p->ier); - } +- serial8250_clear_THRI(p); ++ serial8250_clear_THRI_sier(p); } return 0; + err: --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c -@@ -731,7 +731,7 @@ static void serial8250_set_sleep(struct +@@ -721,7 +721,7 @@ static void serial8250_set_sleep(struct serial_out(p, UART_EFR, UART_EFR_ECB); serial_out(p, UART_LCR, 0); } @@ -120,7 +154,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (p->capabilities & UART_CAP_EFR) { serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(p, UART_EFR, efr); -@@ -1433,7 +1433,7 @@ static void serial8250_stop_rx(struct ua +@@ -1390,7 +1390,7 @@ static void serial8250_stop_rx(struct ua up->ier &= ~(UART_IER_RLSI | UART_IER_RDI); up->port.read_status_mask &= ~UART_LSR_DR; @@ -129,7 +163,7 @@ Signed-off-by: Sebastian Andrzej Siewior serial8250_rpm_put(up); } -@@ -1451,7 +1451,7 @@ static void __do_stop_tx_rs485(struct ua +@@ -1408,7 +1408,7 @@ static void __do_stop_tx_rs485(struct ua serial8250_clear_and_reinit_fifos(p); p->ier |= UART_IER_RLSI | UART_IER_RDI; @@ -138,26 +172,26 @@ Signed-off-by: Sebastian Andrzej Siewior } } static enum hrtimer_restart serial8250_em485_handle_stop_tx(struct hrtimer *t) -@@ -1504,7 +1504,7 @@ static inline void __do_stop_tx(struct u +@@ -1459,7 +1459,7 @@ static void __stop_tx_rs485(struct uart_ + + static inline void __do_stop_tx(struct uart_8250_port *p) { - if (p->ier & UART_IER_THRI) { - p->ier &= ~UART_IER_THRI; -- serial_out(p, UART_IER, p->ier); -+ set_ier(p, p->ier); +- if (serial8250_clear_THRI(p)) ++ if (serial8250_clear_THRI_sier(p)) serial8250_rpm_put_tx(p); - } } -@@ -1557,7 +1557,7 @@ static inline void __start_tx(struct uar - if (!(up->ier & UART_IER_THRI)) { - up->ier |= UART_IER_THRI; -- serial_port_out(port, UART_IER, up->ier); -+ set_ier(up, up->ier); +@@ -1509,7 +1509,7 @@ static inline void __start_tx(struct uar + if (up->dma && !up->dma->tx_dma(up)) + return; +- if (serial8250_set_THRI(up)) { ++ if (serial8250_set_THRI_sier(up)) { if (up->bugs & UART_BUG_TXEN) { unsigned char lsr; -@@ -1663,7 +1663,7 @@ static void serial8250_disable_ms(struct - return; + +@@ -1616,7 +1616,7 @@ static void serial8250_disable_ms(struct + mctrl_gpio_disable_ms(up->gpios); up->ier &= ~UART_IER_MSI; - serial_port_out(port, UART_IER, up->ier); @@ -165,7 +199,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static void serial8250_enable_ms(struct uart_port *port) -@@ -1677,7 +1677,7 @@ static void serial8250_enable_ms(struct +@@ -1632,7 +1632,7 @@ static void serial8250_enable_ms(struct up->ier |= UART_IER_MSI; serial8250_rpm_get(up); @@ -174,7 +208,7 @@ Signed-off-by: Sebastian Andrzej Siewior serial8250_rpm_put(up); } -@@ -2050,6 +2050,52 @@ static void wait_for_xmitr(struct uart_8 +@@ -1991,6 +1991,52 @@ static void wait_for_xmitr(struct uart_8 } } @@ -227,7 +261,7 @@ Signed-off-by: Sebastian Andrzej Siewior #ifdef CONFIG_CONSOLE_POLL /* * Console polling routines for writing and reading from the uart while -@@ -2081,18 +2127,10 @@ static int serial8250_get_poll_char(stru +@@ -2022,18 +2068,10 @@ static int serial8250_get_poll_char(stru static void serial8250_put_poll_char(struct uart_port *port, unsigned char c) { @@ -247,7 +281,7 @@ Signed-off-by: Sebastian Andrzej Siewior wait_for_xmitr(up, BOTH_EMPTY); /* -@@ -2105,7 +2143,7 @@ static void serial8250_put_poll_char(str +@@ -2046,7 +2084,7 @@ static void serial8250_put_poll_char(str * and restore the IER */ wait_for_xmitr(up, BOTH_EMPTY); @@ -256,7 +290,7 @@ Signed-off-by: Sebastian Andrzej Siewior serial8250_rpm_put(up); } -@@ -2417,7 +2455,7 @@ void serial8250_do_shutdown(struct uart_ +@@ -2354,7 +2392,7 @@ void serial8250_do_shutdown(struct uart_ */ spin_lock_irqsave(&port->lock, flags); up->ier = 0; @@ -265,7 +299,7 @@ Signed-off-by: Sebastian Andrzej Siewior spin_unlock_irqrestore(&port->lock, flags); synchronize_irq(port->irq); -@@ -2728,7 +2766,7 @@ serial8250_do_set_termios(struct uart_po +@@ -2639,7 +2677,7 @@ serial8250_do_set_termios(struct uart_po if (up->capabilities & UART_CAP_RTOIE) up->ier |= UART_IER_RTOIE; @@ -274,7 +308,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (up->capabilities & UART_CAP_EFR) { unsigned char efr = 0; -@@ -3192,7 +3230,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_default +@@ -3103,7 +3141,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_default #ifdef CONFIG_SERIAL_8250_CONSOLE @@ -283,7 +317,7 @@ Signed-off-by: Sebastian Andrzej Siewior { struct uart_8250_port *up = up_to_u8250p(port); -@@ -3200,6 +3238,18 @@ static void serial8250_console_putchar(s +@@ -3111,6 +3149,18 @@ static void serial8250_console_putchar(s serial_port_out(port, UART_TX, ch); } @@ -302,7 +336,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Restore serial console when h/w power-off detected */ -@@ -3221,6 +3271,42 @@ static void serial8250_console_restore(s +@@ -3132,6 +3182,42 @@ static void serial8250_console_restore(s serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS); } @@ -345,7 +379,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Print a string to the serial port trying not to disturb * any possible real use of the port... -@@ -3232,27 +3318,13 @@ void serial8250_console_write(struct uar +@@ -3143,27 +3229,13 @@ void serial8250_console_write(struct uar { struct uart_port *port = &up->port; unsigned long flags; @@ -375,7 +409,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* check scratch reg to see if port powered off during system sleep */ if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { -@@ -3260,14 +3332,16 @@ void serial8250_console_write(struct uar +@@ -3171,14 +3243,16 @@ void serial8250_console_write(struct uar up->canary = 0; } @@ -393,7 +427,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * The receive handling will happen properly because the -@@ -3279,8 +3353,7 @@ void serial8250_console_write(struct uar +@@ -3190,8 +3264,7 @@ void serial8250_console_write(struct uar if (up->msr_saved_flags) serial8250_modem_status(up); @@ -403,7 +437,7 @@ Signed-off-by: Sebastian Andrzej Siewior serial8250_rpm_put(up); } -@@ -3301,6 +3374,7 @@ static unsigned int probe_baud(struct ua +@@ -3212,6 +3285,7 @@ static unsigned int probe_baud(struct ua int serial8250_console_setup(struct uart_port *port, char *options, bool probe) { @@ -411,7 +445,7 @@ Signed-off-by: Sebastian Andrzej Siewior int baud = 9600; int bits = 8; int parity = 'n'; -@@ -3309,6 +3383,8 @@ int serial8250_console_setup(struct uart +@@ -3220,6 +3294,8 @@ int serial8250_console_setup(struct uart if (!port->iobase && !port->membase) return -ENODEV; @@ -430,7 +464,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include #include #include -@@ -122,6 +123,8 @@ struct uart_8250_port { +@@ -123,6 +124,8 @@ struct uart_8250_port { #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA unsigned char msr_saved_flags; @@ -439,7 +473,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct uart_8250_dma *dma; const struct uart_8250_ops *ops; -@@ -173,6 +176,8 @@ void serial8250_init_port(struct uart_82 +@@ -174,6 +177,8 @@ void serial8250_init_port(struct uart_82 void serial8250_set_defaults(struct uart_8250_port *up); void serial8250_console_write(struct uart_8250_port *up, const char *s, unsigned int count); diff --git a/patches/0021-printk-implement-KERN_CONT.patch b/patches/0021-printk-implement-KERN_CONT.patch index 0559dd12f0cd..f2c03f9f27ca 100644 --- a/patches/0021-printk-implement-KERN_CONT.patch +++ b/patches/0021-printk-implement-KERN_CONT.patch @@ -15,7 +15,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1699,8 +1699,6 @@ static inline u32 printk_caller_id(void) +@@ -1709,8 +1709,6 @@ static inline u32 printk_caller_id(void) 0x80000000 + raw_smp_processor_id(); } @@ -24,7 +24,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Continuation lines are buffered, and not committed to the record buffer * until the line is complete, or a race forces it. The line fragments -@@ -1711,52 +1709,55 @@ static struct cont { +@@ -1721,52 +1719,55 @@ static struct cont { char buf[LOG_LINE_MAX]; size_t len; /* length == 0 means unused buffer */ u32 caller_id; /* printk_caller_id() of first print */ @@ -104,7 +104,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* ring buffer used as memory allocator for temporary sprint buffers */ DECLARE_STATIC_PRINTKRB(sprint_rb, -@@ -1768,6 +1769,7 @@ asmlinkage int vprintk_emit(int facility +@@ -1778,6 +1779,7 @@ asmlinkage int vprintk_emit(int facility const char *fmt, va_list args) { const u32 caller_id = printk_caller_id(); @@ -112,7 +112,7 @@ Signed-off-by: Sebastian Andrzej Siewior enum log_flags lflags = 0; int printed_len = 0; struct prb_handle h; -@@ -1833,8 +1835,15 @@ asmlinkage int vprintk_emit(int facility +@@ -1843,8 +1845,15 @@ asmlinkage int vprintk_emit(int facility */ printk_emergency(rbuf, level, ts_nsec, cpu, text, text_len); diff --git a/patches/0022-printk-implement-dev-kmsg.patch b/patches/0022-printk-implement-dev-kmsg.patch index cc982e9416a9..411044a17029 100644 --- a/patches/0022-printk-implement-dev-kmsg.patch +++ b/patches/0022-printk-implement-dev-kmsg.patch @@ -45,7 +45,7 @@ Signed-off-by: Sebastian Andrzej Siewior int vprintk(const char *s, va_list args) --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -663,10 +663,11 @@ static ssize_t msg_print_ext_body(char * +@@ -673,10 +673,11 @@ static ssize_t msg_print_ext_body(char * /* /dev/kmsg - userspace message inject/listen interface */ struct devkmsg_user { u64 seq; @@ -58,7 +58,7 @@ Signed-off-by: Sebastian Andrzej Siewior }; static __printf(3, 4) __cold -@@ -749,9 +750,11 @@ static ssize_t devkmsg_read(struct file +@@ -759,9 +760,11 @@ static ssize_t devkmsg_read(struct file size_t count, loff_t *ppos) { struct devkmsg_user *user = file->private_data; @@ -71,7 +71,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!user) return -EBADF; -@@ -760,52 +763,67 @@ static ssize_t devkmsg_read(struct file +@@ -770,52 +773,67 @@ static ssize_t devkmsg_read(struct file if (ret) return ret; @@ -165,7 +165,7 @@ Signed-off-by: Sebastian Andrzej Siewior out: mutex_unlock(&user->lock); return ret; -@@ -814,19 +832,21 @@ static ssize_t devkmsg_read(struct file +@@ -824,19 +842,21 @@ static ssize_t devkmsg_read(struct file static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) { struct devkmsg_user *user = file->private_data; @@ -191,7 +191,7 @@ Signed-off-by: Sebastian Andrzej Siewior break; case SEEK_DATA: /* -@@ -834,40 +854,83 @@ static loff_t devkmsg_llseek(struct file +@@ -844,40 +864,83 @@ static loff_t devkmsg_llseek(struct file * like issued by 'dmesg -c'. Reading /dev/kmsg itself * changes no global state, and does not clear anything. */ @@ -290,7 +290,7 @@ Signed-off-by: Sebastian Andrzej Siewior return ret; } -@@ -897,10 +960,7 @@ static int devkmsg_open(struct inode *in +@@ -907,10 +970,7 @@ static int devkmsg_open(struct inode *in mutex_init(&user->lock); diff --git a/patches/0023-printk-implement-syslog.patch b/patches/0023-printk-implement-syslog.patch index 7426238f38e1..c5af3128e640 100644 --- a/patches/0023-printk-implement-syslog.patch +++ b/patches/0023-printk-implement-syslog.patch @@ -13,7 +13,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -397,10 +397,12 @@ DECLARE_STATIC_PRINTKRB_CPULOCK(printk_c +@@ -407,10 +407,12 @@ DECLARE_STATIC_PRINTKRB_CPULOCK(printk_c /* record buffer */ DECLARE_STATIC_PRINTKRB(printk_rb, CONFIG_LOG_BUF_SHIFT, &printk_cpulock); @@ -27,7 +27,7 @@ Signed-off-by: Sebastian Andrzej Siewior static size_t syslog_partial; static bool syslog_time; -@@ -1293,30 +1295,42 @@ static size_t msg_print_text(const struc +@@ -1303,30 +1305,42 @@ static size_t msg_print_text(const struc return len; } @@ -84,7 +84,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* -@@ -1326,131 +1340,212 @@ static int syslog_print(char __user *buf +@@ -1336,131 +1350,212 @@ static int syslog_print(char __user *buf if (!syslog_partial) syslog_time = printk_time; @@ -359,7 +359,7 @@ Signed-off-by: Sebastian Andrzej Siewior error = check_syslog_permissions(type, source); if (error) -@@ -1468,11 +1563,49 @@ int do_syslog(int type, char __user *buf +@@ -1478,11 +1573,49 @@ int do_syslog(int type, char __user *buf return 0; if (!access_ok(buf, len)) return -EFAULT; @@ -413,7 +413,7 @@ Signed-off-by: Sebastian Andrzej Siewior break; /* Read/clear last kernel messages */ case SYSLOG_ACTION_READ_CLEAR: -@@ -1517,47 +1650,45 @@ int do_syslog(int type, char __user *buf +@@ -1527,47 +1660,45 @@ int do_syslog(int type, char __user *buf break; /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: @@ -483,7 +483,7 @@ Signed-off-by: Sebastian Andrzej Siewior return error; } -@@ -1979,7 +2110,6 @@ EXPORT_SYMBOL(printk); +@@ -1989,7 +2120,6 @@ EXPORT_SYMBOL(printk); #define printk_time false static u64 syslog_seq; diff --git a/patches/0024-printk-implement-kmsg_dump.patch b/patches/0024-printk-implement-kmsg_dump.patch index 5869c8594f92..4de0076408f1 100644 --- a/patches/0024-printk-implement-kmsg_dump.patch +++ b/patches/0024-printk-implement-kmsg_dump.patch @@ -29,7 +29,7 @@ Signed-off-by: Sebastian Andrzej Siewior #ifdef CONFIG_PRINTK --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -407,13 +407,13 @@ static size_t syslog_partial; +@@ -417,13 +417,13 @@ static size_t syslog_partial; static bool syslog_time; /* index and sequence number of the first record stored in the buffer */ @@ -45,7 +45,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* the next printk record to read after the last 'clear' command */ static u64 clear_seq; static u32 clear_idx; -@@ -460,38 +460,6 @@ static char *log_dict(const struct print +@@ -470,38 +470,6 @@ static char *log_dict(const struct print return (char *)msg + sizeof(struct printk_log) + msg->text_len; } @@ -84,7 +84,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void printk_emergency(char *buffer, int level, u64 ts_nsec, u16 cpu, char *text, u16 text_len); -@@ -2110,9 +2078,7 @@ EXPORT_SYMBOL(printk); +@@ -2120,9 +2088,7 @@ EXPORT_SYMBOL(printk); #define printk_time false static u64 syslog_seq; @@ -94,7 +94,7 @@ Signed-off-by: Sebastian Andrzej Siewior static char *log_text(const struct printk_log *msg) { return NULL; } static char *log_dict(const struct printk_log *msg) { return NULL; } static struct printk_log *log_from_idx(u32 idx) { return NULL; } -@@ -3022,7 +2988,6 @@ module_param_named(always_kmsg_dump, alw +@@ -3032,7 +2998,6 @@ module_param_named(always_kmsg_dump, alw void kmsg_dump(enum kmsg_dump_reason reason) { struct kmsg_dumper *dumper; @@ -102,7 +102,7 @@ Signed-off-by: Sebastian Andrzej Siewior if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump) return; -@@ -3035,12 +3000,7 @@ void kmsg_dump(enum kmsg_dump_reason rea +@@ -3045,12 +3010,7 @@ void kmsg_dump(enum kmsg_dump_reason rea /* initialize iterator with data about the stored records */ dumper->active = true; @@ -116,7 +116,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* invoke dumper which will iterate over records */ dumper->dump(dumper, reason); -@@ -3073,33 +3033,67 @@ void kmsg_dump(enum kmsg_dump_reason rea +@@ -3083,33 +3043,67 @@ void kmsg_dump(enum kmsg_dump_reason rea bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, char *line, size_t size, size_t *len) { @@ -199,7 +199,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /** -@@ -3122,12 +3116,11 @@ bool kmsg_dump_get_line_nolock(struct km +@@ -3132,12 +3126,11 @@ bool kmsg_dump_get_line_nolock(struct km bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, char *line, size_t size, size_t *len) { @@ -214,7 +214,7 @@ Signed-off-by: Sebastian Andrzej Siewior return ret; } -@@ -3155,74 +3148,101 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line); +@@ -3165,74 +3158,101 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line); bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, char *buf, size_t size, size_t *len) { @@ -368,7 +368,7 @@ Signed-off-by: Sebastian Andrzej Siewior } EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); -@@ -3238,10 +3258,8 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); +@@ -3248,10 +3268,8 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); */ void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) { @@ -381,7 +381,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /** -@@ -3254,11 +3272,9 @@ void kmsg_dump_rewind_nolock(struct kmsg +@@ -3264,11 +3282,9 @@ void kmsg_dump_rewind_nolock(struct kmsg */ void kmsg_dump_rewind(struct kmsg_dumper *dumper) { diff --git a/patches/0025-printk-remove-unused-code.patch b/patches/0025-printk-remove-unused-code.patch index a9dfe658d943..31dba1a776e7 100644 --- a/patches/0025-printk-remove-unused-code.patch +++ b/patches/0025-printk-remove-unused-code.patch @@ -68,7 +68,7 @@ Signed-off-by: Sebastian Andrzej Siewior int console_printk[5] = { CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ -@@ -356,41 +355,6 @@ struct printk_log { +@@ -366,41 +365,6 @@ struct printk_log { #endif ; @@ -110,7 +110,7 @@ Signed-off-by: Sebastian Andrzej Siewior DECLARE_STATIC_PRINTKRB_CPULOCK(printk_cpulock); #ifdef CONFIG_PRINTK -@@ -400,23 +364,15 @@ DECLARE_STATIC_PRINTKRB(printk_rb, CONFI +@@ -410,23 +374,15 @@ DECLARE_STATIC_PRINTKRB(printk_rb, CONFI static DEFINE_MUTEX(syslog_lock); DECLARE_STATIC_PRINTKRB_ITER(syslog_iter, &printk_rb); @@ -135,7 +135,7 @@ Signed-off-by: Sebastian Andrzej Siewior #ifdef CONFIG_PRINTK_CALLER #define PREFIX_MAX 48 -@@ -428,24 +384,16 @@ static u32 clear_idx; +@@ -438,24 +394,16 @@ static u32 clear_idx; #define LOG_LEVEL(v) ((v) & 0x07) #define LOG_FACILITY(v) ((v) >> 3 & 0xff) @@ -162,7 +162,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* human readable text of the record */ -@@ -970,11 +918,6 @@ const struct file_operations kmsg_fops = +@@ -980,11 +928,6 @@ const struct file_operations kmsg_fops = */ void log_buf_vmcoreinfo_setup(void) { @@ -174,7 +174,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Export struct printk_log size and field offsets. User space tools can * parse it and detect any changes to structure down the line. -@@ -990,6 +933,8 @@ void log_buf_vmcoreinfo_setup(void) +@@ -1000,6 +943,8 @@ void log_buf_vmcoreinfo_setup(void) } #endif @@ -183,7 +183,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* requested log_buf_len from kernel cmdline */ static unsigned long __initdata new_log_buf_len; -@@ -1055,9 +1000,12 @@ static void __init log_buf_add_cpu(void) +@@ -1065,9 +1010,12 @@ static void __init log_buf_add_cpu(void) #else /* !CONFIG_SMP */ static inline void log_buf_add_cpu(void) {} #endif /* CONFIG_SMP */ @@ -196,7 +196,7 @@ Signed-off-by: Sebastian Andrzej Siewior unsigned long flags; char *new_log_buf; unsigned int free; -@@ -1089,6 +1037,7 @@ void __init setup_log_buf(int early) +@@ -1099,6 +1047,7 @@ void __init setup_log_buf(int early) pr_info("log_buf_len: %u bytes\n", log_buf_len); pr_info("early log buf free: %u(%u%%)\n", free, (free * 100) / __LOG_BUF_LEN); @@ -204,7 +204,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static bool __read_mostly ignore_loglevel; -@@ -2009,7 +1958,7 @@ asmlinkage int vprintk_emit(int facility +@@ -2019,7 +1968,7 @@ asmlinkage int vprintk_emit(int facility } EXPORT_SYMBOL(vprintk_emit); @@ -213,7 +213,7 @@ Signed-off-by: Sebastian Andrzej Siewior { return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); } -@@ -2070,31 +2019,6 @@ asmlinkage __visible int printk(const ch +@@ -2080,31 +2029,6 @@ asmlinkage __visible int printk(const ch return r; } EXPORT_SYMBOL(printk); @@ -245,7 +245,7 @@ Signed-off-by: Sebastian Andrzej Siewior #endif /* CONFIG_PRINTK */ #ifdef CONFIG_EARLY_PRINTK -@@ -2391,15 +2315,10 @@ void console_unblank(void) +@@ -2401,15 +2325,10 @@ void console_unblank(void) void console_flush_on_panic(enum con_flush_mode mode) { /* @@ -264,7 +264,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* -@@ -2748,43 +2667,6 @@ static int __init printk_late_init(void) +@@ -2758,43 +2677,6 @@ static int __init printk_late_init(void) late_initcall(printk_late_init); #if defined CONFIG_PRINTK @@ -308,7 +308,7 @@ Signed-off-by: Sebastian Andrzej Siewior static int printk_kthread_func(void *data) { struct prb_iterator iter; -@@ -2850,22 +2732,9 @@ static int __init init_printk_kthread(vo +@@ -2860,22 +2742,9 @@ static int __init init_printk_kthread(vo } late_initcall(init_printk_kthread); diff --git a/patches/ARM-Allow-to-enable-RT.patch b/patches/ARM-Allow-to-enable-RT.patch new file mode 100644 index 000000000000..1721a11eb9c5 --- /dev/null +++ b/patches/ARM-Allow-to-enable-RT.patch @@ -0,0 +1,21 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 11 Oct 2019 13:14:29 +0200 +Subject: [PATCH] ARM: Allow to enable RT + +Allow to select RT. + +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/arm/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/arm/Kconfig ++++ b/arch/arm/Kconfig +@@ -32,6 +32,7 @@ config ARM + select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 + select ARCH_SUPPORTS_ATOMIC_RMW ++ select ARCH_SUPPORTS_RT + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU diff --git a/patches/ARM-enable-irq-in-translation-section-permission-fau.patch b/patches/ARM-enable-irq-in-translation-section-permission-fau.patch index d90d23403814..9e083f041c94 100644 --- a/patches/ARM-enable-irq-in-translation-section-permission-fau.patch +++ b/patches/ARM-enable-irq-in-translation-section-permission-fau.patch @@ -3,7 +3,7 @@ Date: Wed, 10 Dec 2014 10:32:09 +0800 Subject: ARM: enable irq in translation/section permission fault handlers Probably happens on all ARM, with -CONFIG_PREEMPT_RT_FULL +CONFIG_PREEMPT_RT CONFIG_DEBUG_ATOMIC_SLEEP This simple program.... @@ -63,7 +63,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c -@@ -434,6 +434,9 @@ do_translation_fault(unsigned long addr, +@@ -414,6 +414,9 @@ do_translation_fault(unsigned long addr, if (addr < TASK_SIZE) return do_page_fault(addr, fsr, regs); @@ -73,7 +73,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (user_mode(regs)) goto bad_area; -@@ -501,6 +504,9 @@ do_translation_fault(unsigned long addr, +@@ -481,6 +484,9 @@ do_translation_fault(unsigned long addr, static int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { diff --git a/patches/ARM64-Allow-to-enable-RT.patch b/patches/ARM64-Allow-to-enable-RT.patch new file mode 100644 index 000000000000..42e9db67ac86 --- /dev/null +++ b/patches/ARM64-Allow-to-enable-RT.patch @@ -0,0 +1,21 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 11 Oct 2019 13:14:35 +0200 +Subject: [PATCH] ARM64: Allow to enable RT + +Allow to select RT. + +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/arm64/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -69,6 +69,7 @@ config ARM64 + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 || CC_IS_CLANG + select ARCH_SUPPORTS_NUMA_BALANCING ++ select ARCH_SUPPORTS_RT + select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT + select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT + select ARCH_WANT_FRAME_POINTERS diff --git a/patches/BPF-Disable-on-PREEMPT_RT.patch b/patches/BPF-Disable-on-PREEMPT_RT.patch new file mode 100644 index 000000000000..a44722ca8a29 --- /dev/null +++ b/patches/BPF-Disable-on-PREEMPT_RT.patch @@ -0,0 +1,35 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 10 Oct 2019 16:54:45 +0200 +Subject: [PATCH] BPF: Disable on PREEMPT_RT + +Disable BPF on PREEMPT_RT because +- it allocates and frees memory in atomic context +- it uses up_read_non_owner() +- BPF_PROG_RUN() expects to be invoked in non-preemptible context + +Signed-off-by: Sebastian Andrzej Siewior +--- + init/Kconfig | 1 + + net/kcm/Kconfig | 1 + + 2 files changed, 2 insertions(+) + +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -1629,6 +1629,7 @@ config KALLSYMS_BASE_RELATIVE + # syscall, maps, verifier + config BPF_SYSCALL + bool "Enable bpf() system call" ++ depends on !PREEMPT_RT + select BPF + select IRQ_WORK + default n +--- a/net/kcm/Kconfig ++++ b/net/kcm/Kconfig +@@ -3,6 +3,7 @@ + config AF_KCM + tristate "KCM sockets" + depends on INET ++ depends on !PREEMPT_RT + select BPF_SYSCALL + select STREAM_PARSER + ---help--- diff --git a/patches/Drivers-hv-vmbus-include-header-for-get_irq_regs.patch b/patches/Drivers-hv-vmbus-include-header-for-get_irq_regs.patch deleted file mode 100644 index 33d7d5c132cf..000000000000 --- a/patches/Drivers-hv-vmbus-include-header-for-get_irq_regs.patch +++ /dev/null @@ -1,33 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Wed, 29 Aug 2018 21:59:04 +0200 -Subject: [PATCH] Drivers: hv: vmbus: include header for get_irq_regs() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -On !RT the header file get_irq_regs() gets pulled in via other header files. On -RT it does not and the build fails: - - drivers/hv/vmbus_drv.c:975 implicit declaration of function ‘get_irq_regs’ [-Werror=implicit-function-declaration] - drivers/hv/hv.c:115 implicit declaration of function ‘get_irq_regs’ [-Werror=implicit-function-declaration] - -Add the header file for get_irq_regs() in a common header so it used by -vmbus_drv.c by hv.c for their get_irq_regs() usage. - -Reported-by: Bernhard Landauer -Reported-by: Ralf Ramsauer -Signed-off-by: Sebastian Andrzej Siewior ---- - drivers/hv/hyperv_vmbus.h | 1 + - 1 file changed, 1 insertion(+) - ---- a/drivers/hv/hyperv_vmbus.h -+++ b/drivers/hv/hyperv_vmbus.h -@@ -18,6 +18,7 @@ - #include - #include - #include -+#include - - #include "hv_trace.h" - diff --git a/patches/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch b/patches/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch index 50708ff16c22..818944745bec 100644 --- a/patches/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch +++ b/patches/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch @@ -22,7 +22,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c -@@ -702,7 +702,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v +@@ -700,7 +700,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v * involves poking the GIC, which must be done in a * non-preemptible context. */ @@ -31,7 +31,7 @@ Signed-off-by: Sebastian Andrzej Siewior kvm_pmu_flush_hwstate(vcpu); -@@ -751,7 +751,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v +@@ -749,7 +749,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v kvm_timer_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu); local_irq_enable(); @@ -40,7 +40,7 @@ Signed-off-by: Sebastian Andrzej Siewior continue; } -@@ -829,7 +829,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v +@@ -827,7 +827,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v /* Exit types that need handling before we can be preempted */ handle_exit_early(vcpu, run, ret); diff --git a/patches/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch b/patches/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch index 11030c3d408c..a503b934ea44 100644 --- a/patches/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch +++ b/patches/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch @@ -6,20 +6,21 @@ Cc: Anna Schumaker , tglx@linutronix.de Subject: NFSv4: replace seqcount_t with a seqlock_t -The raw_write_seqcount_begin() in nfs4_reclaim_open_state() bugs me -because it maps to preempt_disable() in -RT which I can't have at this -point. So I took a look at the code. -It the lockdep part was removed in commit abbec2da13f0 ("NFS: Use -raw_write_seqcount_begin/end int nfs4_reclaim_open_state") because -lockdep complained. The whole seqcount thing was introduced in commit -c137afabe330 ("NFSv4: Allow the state manager to mark an open_owner as -being recovered"). +The raw_write_seqcount_begin() in nfs4_reclaim_open_state() causes a +preempt_disable() on -RT. The spin_lock()/spin_unlock() in that section does +not work. +The lockdep part was removed in commit + abbec2da13f0 ("NFS: Use raw_write_seqcount_begin/end int nfs4_reclaim_open_state") +because lockdep complained. +The whole seqcount thing was introduced in commit + c137afabe330 ("NFSv4: Allow the state manager to mark an open_owner as being recovered"). The recovery threads runs only once. write_seqlock() does not work on !RT because it disables preemption and it the writer side is preemptible (has to remain so despite the fact that it will block readers). Reported-by: kernel test robot +Link: https://lkml.kernel.org/r/20161021164727.24485-1-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior --- fs/nfs/delegation.c | 4 ++-- @@ -30,7 +31,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c -@@ -152,11 +152,11 @@ static int nfs_delegation_claim_opens(st +@@ -162,11 +162,11 @@ static int nfs_delegation_claim_opens(st sp = state->owner; /* Block nfs4_proc_unlck */ mutex_lock(&sp->so_delegreturn_mutex); @@ -57,7 +58,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c -@@ -2926,7 +2926,7 @@ static int _nfs4_open_and_get_state(stru +@@ -2956,7 +2956,7 @@ static int _nfs4_open_and_get_state(stru unsigned int seq; int ret; @@ -66,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior ret = _nfs4_proc_open(opendata, ctx); if (ret != 0) -@@ -2967,7 +2967,7 @@ static int _nfs4_open_and_get_state(stru +@@ -2998,7 +2998,7 @@ static int _nfs4_open_and_get_state(stru if (d_inode(dentry) == state->inode) { nfs_inode_attach_open_context(ctx); @@ -86,11 +87,11 @@ Signed-off-by: Sebastian Andrzej Siewior mutex_init(&sp->so_delegreturn_mutex); return sp; } -@@ -1633,8 +1633,12 @@ static int nfs4_reclaim_open_state(struc +@@ -1618,8 +1618,12 @@ static int nfs4_reclaim_open_state(struc * recovering after a network partition or a reboot from a * server that doesn't support a grace period. */ -+#ifdef CONFIG_PREEMPT_RT_FULL ++#ifdef CONFIG_PREEMPT_RT + write_seqlock(&sp->so_reclaim_seqlock); +#else + write_seqcount_begin(&sp->so_reclaim_seqlock.seqcount); @@ -100,13 +101,13 @@ Signed-off-by: Sebastian Andrzej Siewior restart: list_for_each_entry(state, &sp->so_states, open_states) { if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) -@@ -1695,14 +1699,20 @@ static int nfs4_reclaim_open_state(struc +@@ -1680,14 +1684,20 @@ static int nfs4_reclaim_open_state(struc spin_lock(&sp->so_lock); goto restart; } - raw_write_seqcount_end(&sp->so_reclaim_seqcount); spin_unlock(&sp->so_lock); -+#ifdef CONFIG_PREEMPT_RT_FULL ++#ifdef CONFIG_PREEMPT_RT + write_sequnlock(&sp->so_reclaim_seqlock); +#else + write_seqcount_end(&sp->so_reclaim_seqlock.seqcount); @@ -117,7 +118,7 @@ Signed-off-by: Sebastian Andrzej Siewior - spin_lock(&sp->so_lock); - raw_write_seqcount_end(&sp->so_reclaim_seqcount); - spin_unlock(&sp->so_lock); -+#ifdef CONFIG_PREEMPT_RT_FULL ++#ifdef CONFIG_PREEMPT_RT + write_sequnlock(&sp->so_reclaim_seqlock); +#else + write_seqcount_end(&sp->so_reclaim_seqlock.seqcount); diff --git a/patches/POWERPC-Allow-to-enable-RT.patch b/patches/POWERPC-Allow-to-enable-RT.patch new file mode 100644 index 000000000000..97bb509b0ed9 --- /dev/null +++ b/patches/POWERPC-Allow-to-enable-RT.patch @@ -0,0 +1,21 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 11 Oct 2019 13:14:41 +0200 +Subject: [PATCH] POWERPC: Allow to enable RT + +Allow to select RT. + +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/powerpc/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/powerpc/Kconfig ++++ b/arch/powerpc/Kconfig +@@ -144,6 +144,7 @@ config PPC + select ARCH_MIGHT_HAVE_PC_SERIO + select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_SUPPORTS_ATOMIC_RMW ++ select ARCH_SUPPORTS_RT + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF if PPC64 + select ARCH_WANT_IPC_PARSE_VERSION diff --git a/patches/Use-CONFIG_PREEMPTION.patch b/patches/Use-CONFIG_PREEMPTION.patch new file mode 100644 index 000000000000..ca2dc098bef4 --- /dev/null +++ b/patches/Use-CONFIG_PREEMPTION.patch @@ -0,0 +1,1523 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 26 Jul 2019 11:30:49 +0200 +Subject: [PATCH] Use CONFIG_PREEMPTION + +Thisi is an all-in-one patch of the current `PREEMPTION' branch. + +Signed-off-by: Sebastian Andrzej Siewior +--- + Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html | 8 - + Documentation/RCU/Design/Requirements/Requirements.html | 24 ++-- + Documentation/RCU/checklist.txt | 4 + Documentation/RCU/rcubarrier.txt | 8 - + Documentation/RCU/stallwarn.txt | 4 + Documentation/RCU/whatisRCU.txt | 7 - + Documentation/trace/ftrace-uses.rst | 2 + arch/arc/kernel/entry.S | 6 - + arch/arm/include/asm/switch_to.h | 2 + arch/arm/kernel/entry-armv.S | 4 + arch/arm/kernel/traps.c | 2 + arch/arm/mm/cache-v7.S | 4 + arch/arm/mm/cache-v7m.S | 4 + arch/arm64/Kconfig | 52 +++++----- + arch/arm64/crypto/sha256-glue.c | 2 + arch/arm64/include/asm/assembler.h | 6 - + arch/arm64/include/asm/preempt.h | 4 + arch/arm64/kernel/entry.S | 2 + arch/arm64/kernel/traps.c | 3 + arch/c6x/kernel/entry.S | 8 - + arch/csky/kernel/entry.S | 4 + arch/h8300/kernel/entry.S | 6 - + arch/hexagon/kernel/vm_entry.S | 6 - + arch/ia64/kernel/entry.S | 12 +- + arch/ia64/kernel/kprobes.c | 2 + arch/m68k/coldfire/entry.S | 2 + arch/microblaze/kernel/entry.S | 2 + arch/mips/include/asm/asmmacro.h | 4 + arch/mips/kernel/entry.S | 6 - + arch/nds32/Kconfig | 2 + arch/nds32/kernel/ex-exit.S | 4 + arch/nios2/kernel/entry.S | 2 + arch/parisc/Kconfig | 2 + arch/parisc/kernel/entry.S | 10 - + arch/powerpc/Kconfig | 2 + arch/powerpc/kernel/entry_32.S | 4 + arch/powerpc/kernel/entry_64.S | 4 + arch/powerpc/kernel/traps.c | 7 + + arch/riscv/kernel/entry.S | 4 + arch/s390/Kconfig | 2 + arch/s390/include/asm/preempt.h | 4 + arch/s390/kernel/dumpstack.c | 2 + arch/s390/kernel/entry.S | 2 + arch/sh/Kconfig | 2 + arch/sh/kernel/cpu/sh5/entry.S | 4 + arch/sh/kernel/entry-common.S | 4 + arch/sparc/Kconfig | 2 + arch/sparc/kernel/rtrap_64.S | 2 + arch/xtensa/kernel/entry.S | 2 + arch/xtensa/kernel/traps.c | 7 - + drivers/gpu/drm/Kconfig | 2 + drivers/media/platform/Kconfig | 2 + drivers/video/backlight/Kconfig | 4 + drivers/xen/preempt.c | 4 + fs/btrfs/volumes.h | 2 + fs/stack.c | 6 - + include/linux/fs.h | 4 + include/linux/genhd.h | 6 - + include/linux/rcupdate.h | 4 + include/xen/xen-ops.h | 4 + kernel/Kconfig.locks | 12 +- + kernel/rcu/Kconfig | 4 + kernel/rcu/rcutorture.c | 2 + kernel/rcu/srcutiny.c | 2 + kernel/rcu/tree.c | 4 + kernel/rcu/tree_exp.h | 2 + kernel/rcu/tree_plugin.h | 4 + kernel/trace/trace.c | 2 + kernel/workqueue.c | 2 + lib/Kconfig.debug | 2 + mm/memory.c | 2 + mm/slub.c | 12 +- + net/core/dev.c | 2 + 73 files changed, 191 insertions(+), 173 deletions(-) + +--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html ++++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html +@@ -56,8 +56,8 @@ sections. + RCU-preempt Expedited Grace Periods + +

+-CONFIG_PREEMPT=y kernels implement RCU-preempt. +-The overall flow of the handling of a given CPU by an RCU-preempt ++CONFIG_PREEMPT=y and CONFIG_PREEMPT_RT=y kernels implement ++RCU-preempt. The overall flow of the handling of a given CPU by an RCU-preempt + expedited grace period is shown in the following diagram: + +

ExpRCUFlow.svg +@@ -140,8 +140,8 @@ or offline, among other things. + RCU-sched Expedited Grace Periods + +

+-CONFIG_PREEMPT=n kernels implement RCU-sched. +-The overall flow of the handling of a given CPU by an RCU-sched ++CONFIG_PREEMPT=n and CONFIG_PREEMPT_RT=n kernels implement ++RCU-sched. The overall flow of the handling of a given CPU by an RCU-sched + expedited grace period is shown in the following diagram: + +

ExpSchedFlow.svg +--- a/Documentation/RCU/Design/Requirements/Requirements.html ++++ b/Documentation/RCU/Design/Requirements/Requirements.html +@@ -106,7 +106,7 @@ big RCU read-side critical section. + Production-quality implementations of rcu_read_lock() and + rcu_read_unlock() are extremely lightweight, and in + fact have exactly zero overhead in Linux kernels built for production +-use with CONFIG_PREEMPT=n. ++use with CONFIG_PREEMPTION=n. + +

+ This guarantee allows ordering to be enforced with extremely low +@@ -1499,7 +1499,7 @@ costs have plummeted. + However, as I learned from Matt Mackall's + bloatwatch + efforts, memory footprint is critically important on single-CPU systems with +-non-preemptible (CONFIG_PREEMPT=n) kernels, and thus ++non-preemptible (CONFIG_PREEMPTION=n) kernels, and thus + tiny RCU + was born. + Josh Triplett has since taken over the small-memory banner with his +@@ -1887,7 +1887,7 @@ constructs, there are limitations. +

+ Implementations of RCU for which rcu_read_lock() + and rcu_read_unlock() generate no code, such as +-Linux-kernel RCU when CONFIG_PREEMPT=n, can be ++Linux-kernel RCU when CONFIG_PREEMPTION=n, can be + nested arbitrarily deeply. + After all, there is no overhead. + Except that if all these instances of rcu_read_lock() +@@ -2229,7 +2229,7 @@ be a no-op. +

+ However, once the scheduler has spawned its first kthread, this early + boot trick fails for synchronize_rcu() (as well as for +-synchronize_rcu_expedited()) in CONFIG_PREEMPT=y ++synchronize_rcu_expedited()) in CONFIG_PREEMPTION=y + kernels. + The reason is that an RCU read-side critical section might be preempted, + which means that a subsequent synchronize_rcu() really does have +@@ -2568,7 +2568,7 @@ The compiler must not be permitted to tr + +

+ If the compiler did make this transformation in a +-CONFIG_PREEMPT=n kernel build, and if get_user() did ++CONFIG_PREEMPTION=n kernel build, and if get_user() did + page fault, the result would be a quiescent state in the middle + of an RCU read-side critical section. + This misplaced quiescent state could result in line 4 being +@@ -2906,7 +2906,7 @@ in conjunction with the + The real-time-latency response requirements are such that the + traditional approach of disabling preemption across RCU + read-side critical sections is inappropriate. +-Kernels built with CONFIG_PREEMPT=y therefore ++Kernels built with CONFIG_PREEMPTION=y therefore + use an RCU implementation that allows RCU read-side critical + sections to be preempted. + This requirement made its presence known after users made it +@@ -3064,7 +3064,7 @@ includes + rcu_barrier_bh(), and + rcu_read_lock_bh_held(). + However, the update-side APIs are now simple wrappers for other RCU +-flavors, namely RCU-sched in CONFIG_PREEMPT=n kernels and RCU-preempt ++flavors, namely RCU-sched in CONFIG_PREEMPTION=n kernels and RCU-preempt + otherwise. + +

Sched Flavor (Historical)

+@@ -3088,12 +3088,12 @@ of an RCU read-side critical section can + Therefore, RCU-sched was created, which follows “classic” + RCU in that an RCU-sched grace period waits for for pre-existing + interrupt and NMI handlers. +-In kernels built with CONFIG_PREEMPT=n, the RCU and RCU-sched ++In kernels built with CONFIG_PREEMPTION=n, the RCU and RCU-sched + APIs have identical implementations, while kernels built with +-CONFIG_PREEMPT=y provide a separate implementation for each. ++CONFIG_PREEMPTION=y provide a separate implementation for each. + +

+-Note well that in CONFIG_PREEMPT=y kernels, ++Note well that in CONFIG_PREEMPTION=y kernels, + rcu_read_lock_sched() and rcu_read_unlock_sched() + disable and re-enable preemption, respectively. + This means that if there was a preemption attempt during the +@@ -3302,12 +3302,12 @@ The tasks-RCU API is quite compact, cons + call_rcu_tasks(), + synchronize_rcu_tasks(), and + rcu_barrier_tasks(). +-In CONFIG_PREEMPT=n kernels, trampolines cannot be preempted, ++In CONFIG_PREEMPTION=n kernels, trampolines cannot be preempted, + so these APIs map to + call_rcu(), + synchronize_rcu(), and + rcu_barrier(), respectively. +-In CONFIG_PREEMPT=y kernels, trampolines can be preempted, ++In CONFIG_PREEMPTION=y kernels, trampolines can be preempted, + and these three APIs are therefore implemented by separate functions + that check for voluntary context switches. + +--- a/Documentation/RCU/checklist.txt ++++ b/Documentation/RCU/checklist.txt +@@ -210,8 +210,8 @@ over a rather long period of time, but i + the rest of the system. + + 7. As of v4.20, a given kernel implements only one RCU flavor, +- which is RCU-sched for PREEMPT=n and RCU-preempt for PREEMPT=y. +- If the updater uses call_rcu() or synchronize_rcu(), ++ which is RCU-sched for PREEMPTION=n and RCU-preempt for ++ PREEMPTION=y. If the updater uses call_rcu() or synchronize_rcu(), + then the corresponding readers my use rcu_read_lock() and + rcu_read_unlock(), rcu_read_lock_bh() and rcu_read_unlock_bh(), + or any pair of primitives that disables and re-enables preemption, +--- a/Documentation/RCU/rcubarrier.txt ++++ b/Documentation/RCU/rcubarrier.txt +@@ -6,8 +6,8 @@ RCU (read-copy update) is a synchronizat + of as a replacement for read-writer locking (among other things), but with + very low-overhead readers that are immune to deadlock, priority inversion, + and unbounded latency. RCU read-side critical sections are delimited +-by rcu_read_lock() and rcu_read_unlock(), which, in non-CONFIG_PREEMPT +-kernels, generate no code whatsoever. ++by rcu_read_lock() and rcu_read_unlock(), which, in ++non-CONFIG_PREEMPTION kernels, generate no code whatsoever. + + This means that RCU writers are unaware of the presence of concurrent + readers, so that RCU updates to shared data must be undertaken quite +@@ -303,10 +303,10 @@ Answer: This cannot happen. The reason i + to smp_call_function() and further to smp_call_function_on_cpu(), + causing this latter to spin until the cross-CPU invocation of + rcu_barrier_func() has completed. This by itself would prevent +- a grace period from completing on non-CONFIG_PREEMPT kernels, ++ a grace period from completing on non-CONFIG_PREEMPTION kernels, + since each CPU must undergo a context switch (or other quiescent + state) before the grace period can complete. However, this is +- of no use in CONFIG_PREEMPT kernels. ++ of no use in CONFIG_PREEMPTION kernels. + + Therefore, on_each_cpu() disables preemption across its call + to smp_call_function() and also across the local call to +--- a/Documentation/RCU/stallwarn.txt ++++ b/Documentation/RCU/stallwarn.txt +@@ -20,7 +20,7 @@ o A CPU looping with preemption disabled + + o A CPU looping with bottom halves disabled. + +-o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel ++o For !CONFIG_PREEMPTION kernels, a CPU looping anywhere in the kernel + without invoking schedule(). If the looping in the kernel is + really expected and desirable behavior, you might need to add + some calls to cond_resched(). +@@ -39,7 +39,7 @@ o Anything that prevents RCU's grace-per + result in the "rcu_.*kthread starved for" console-log message, + which will include additional debugging information. + +-o A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might ++o A CPU-bound real-time task in a CONFIG_PREEMPTION kernel, which might + happen to preempt a low-priority task in the middle of an RCU + read-side critical section. This is especially damaging if + that low-priority task is not permitted to run on any other CPU, +--- a/Documentation/RCU/whatisRCU.txt ++++ b/Documentation/RCU/whatisRCU.txt +@@ -648,9 +648,10 @@ Quick Quiz #1: Why is this argument naiv + + This section presents a "toy" RCU implementation that is based on + "classic RCU". It is also short on performance (but only for updates) and +-on features such as hotplug CPU and the ability to run in CONFIG_PREEMPT +-kernels. The definitions of rcu_dereference() and rcu_assign_pointer() +-are the same as those shown in the preceding section, so they are omitted. ++on features such as hotplug CPU and the ability to run in ++CONFIG_PREEMPTION kernels. The definitions of rcu_dereference() and ++rcu_assign_pointer() are the same as those shown in the preceding ++section, so they are omitted. + + void rcu_read_lock(void) { } + +--- a/Documentation/trace/ftrace-uses.rst ++++ b/Documentation/trace/ftrace-uses.rst +@@ -146,7 +146,7 @@ FTRACE_OPS_FL_RECURSION_SAFE + itself or any nested functions that those functions call. + + If this flag is set, it is possible that the callback will also +- be called with preemption enabled (when CONFIG_PREEMPT is set), ++ be called with preemption enabled (when CONFIG_PREEMPTION is set), + but this is not guaranteed. + + FTRACE_OPS_FL_IPMODIFY +--- a/arch/arc/kernel/entry.S ++++ b/arch/arc/kernel/entry.S +@@ -337,11 +337,11 @@ ENTRY(ret_from_exception) + resume_kernel_mode: + + ; Disable Interrupts from this point on +- ; CONFIG_PREEMPT: This is a must for preempt_schedule_irq() +- ; !CONFIG_PREEMPT: To ensure restore_regs is intr safe ++ ; CONFIG_PREEMPTION: This is a must for preempt_schedule_irq() ++ ; !CONFIG_PREEMPTION: To ensure restore_regs is intr safe + IRQ_DISABLE r9 + +-#ifdef CONFIG_PREEMPT ++#ifdef CONFIG_PREEMPTION + + ; Can't preempt if preemption disabled + GET_CURR_THR_INFO_FROM_SP r10 +--- a/arch/arm/include/asm/switch_to.h ++++ b/arch/arm/include/asm/switch_to.h +@@ -10,7 +10,7 @@ + * to ensure that the maintenance completes in case we migrate to another + * CPU. + */ +-#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) && defined(CONFIG_CPU_V7) ++#if defined(CONFIG_PREEMPTION) && defined(CONFIG_SMP) && defined(CONFIG_CPU_V7) + #define __complete_pending_tlbi() dsb(ish) + #else + #define __complete_pending_tlbi() +--- a/arch/arm/kernel/entry-armv.S ++++ b/arch/arm/kernel/entry-armv.S +@@ -211,7 +211,7 @@ ENDPROC(__dabt_svc) + svc_entry + irq_handler + +-#ifdef CONFIG_PREEMPT ++#ifdef CONFIG_PREEMPTION + ldr r8, [tsk, #TI_PREEMPT] @ get preempt count + ldr r0, [tsk, #TI_FLAGS] @ get flags + teq r8, #0 @ if preempt count != 0 +@@ -226,7 +226,7 @@ ENDPROC(__irq_svc) + + .ltorg + +-#ifdef CONFIG_PREEMPT ++#ifdef CONFIG_PREEMPTION + svc_preempt: + mov r8, lr + 1: bl preempt_schedule_irq @ irq en/disable is done inside +--- a/arch/arm/kernel/traps.c ++++ b/arch/arm/kernel/traps.c +@@ -248,6 +248,8 @@ void show_stack(struct task_struct *tsk, + + #ifdef CONFIG_PREEMPT + #define S_PREEMPT " PREEMPT" ++#elif defined(CONFIG_PREEMPT_RT) ++#define S_PREEMPT " PREEMPT_RT" + #else + #define S_PREEMPT "" + #endif +--- a/arch/arm/mm/cache-v7.S ++++ b/arch/arm/mm/cache-v7.S +@@ -135,13 +135,13 @@ ENTRY(v7_flush_dcache_all) + and r1, r1, #7 @ mask of the bits for current cache only + cmp r1, #2 @ see what cache we have at this level + blt skip @ skip if no cache, or just i-cache +-#ifdef CONFIG_PREEMPT ++#ifdef CONFIG_PREEMPTION + save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic + #endif + mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr + isb @ isb to sych the new cssr&csidr + mrc p15, 1, r1, c0, c0, 0 @ read the new csidr +-#ifdef CONFIG_PREEMPT ++#ifdef CONFIG_PREEMPTION + restore_irqs_notrace r9 + #endif + and r2, r1, #7 @ extract the length of the cache lines +--- a/arch/arm/mm/cache-v7m.S ++++ b/arch/arm/mm/cache-v7m.S +@@ -183,13 +183,13 @@ ENTRY(v7m_flush_dcache_all) + and r1, r1, #7 @ mask of the bits for current cache only + cmp r1, #2 @ see what cache we have at this level + blt skip @ skip if no cache, or just i-cache +-#ifdef CONFIG_PREEMPT ++#ifdef CONFIG_PREEMPTION + save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic + #endif + write_csselr r10, r1 @ set current cache level + isb @ isb to sych the new cssr&csidr + read_ccsidr r1 @ read the new csidr +-#ifdef CONFIG_PREEMPT ++#ifdef CONFIG_PREEMPTION + restore_irqs_notrace r9 + #endif + and r2, r1, #7 @ extract the length of the cache lines +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -35,32 +35,32 @@ config ARM64 + select ARCH_HAS_TEARDOWN_DMA_OPS if IOMMU_SUPPORT + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAVE_NMI_SAFE_CMPXCHG +- select ARCH_INLINE_READ_LOCK if !PREEMPT +- select ARCH_INLINE_READ_LOCK_BH if !PREEMPT +- select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPT +- select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPT +- select ARCH_INLINE_READ_UNLOCK if !PREEMPT +- select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPT +- select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPT +- select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPT +- select ARCH_INLINE_WRITE_LOCK if !PREEMPT +- select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPT +- select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPT +- select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPT +- select ARCH_INLINE_WRITE_UNLOCK if !PREEMPT +- select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPT +- select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPT +- select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPT +- select ARCH_INLINE_SPIN_TRYLOCK if !PREEMPT +- select ARCH_INLINE_SPIN_TRYLOCK_BH if !PREEMPT +- select ARCH_INLINE_SPIN_LOCK if !PREEMPT +- select ARCH_INLINE_SPIN_LOCK_BH if !PREEMPT +- select ARCH_INLINE_SPIN_LOCK_IRQ if !PREEMPT +- select ARCH_INLINE_SPIN_LOCK_IRQSAVE if !PREEMPT +- select ARCH_INLINE_SPIN_UNLOCK if !PREEMPT +- select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPT +- select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPT +- select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPT ++ select ARCH_INLINE_READ_LOCK if !PREEMPTION ++ select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION ++ select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPTION ++ select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPTION ++ select ARCH_INLINE_READ_UNLOCK if !PREEMPTION ++ select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPTION ++ select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPTION ++ select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPTION ++ select ARCH_INLINE_WRITE_LOCK if !PREEMPTION ++ select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPTION ++ select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPTION ++ select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPTION ++ select ARCH_INLINE_WRITE_UNLOCK if !PREEMPTION ++ select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPTION ++ select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPTION ++ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPTION ++ select ARCH_INLINE_SPIN_TRYLOCK if !PREEMPTION ++ select ARCH_INLINE_SPIN_TRYLOCK_BH if !PREEMPTION ++ select ARCH_INLINE_SPIN_LOCK if !PREEMPTION ++ select ARCH_INLINE_SPIN_LOCK_BH if !PREEMPTION ++ select ARCH_INLINE_SPIN_LOCK_IRQ if !PREEMPTION ++ select ARCH_INLINE_SPIN_LOCK_IRQSAVE if !PREEMPTION ++ select ARCH_INLINE_SPIN_UNLOCK if !PREEMPTION ++ select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPTION ++ select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION ++ select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION + select ARCH_KEEP_MEMBLOCK + select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_USE_QUEUED_RWLOCKS +--- a/arch/arm64/crypto/sha256-glue.c ++++ b/arch/arm64/crypto/sha256-glue.c +@@ -97,7 +97,7 @@ static int sha256_update_neon(struct sha + * input when running on a preemptible kernel, but process the + * data block by block instead. + */ +- if (IS_ENABLED(CONFIG_PREEMPT) && ++ if (IS_ENABLED(CONFIG_PREEMPTION) && + chunk + sctx->count % SHA256_BLOCK_SIZE > SHA256_BLOCK_SIZE) + chunk = SHA256_BLOCK_SIZE - + sctx->count % SHA256_BLOCK_SIZE; +--- a/arch/arm64/include/asm/assembler.h ++++ b/arch/arm64/include/asm/assembler.h +@@ -699,8 +699,8 @@ USER(\label, ic ivau, \tmp2) // invali + * where