summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt9
-rw-r--r--Documentation/admin-guide/perf/arm-cmn.rst2
-rw-r--r--Documentation/devicetree/bindings/arm/pmu.yaml1
-rw-r--r--Documentation/virt/kvm/arm/hyp-abi.rst9
-rw-r--r--arch/arm/include/asm/archrandom.h10
-rw-r--r--arch/arm64/Kconfig11
-rw-r--r--arch/arm64/Makefile10
-rw-r--r--arch/arm64/include/asm/archrandom.h82
-rw-r--r--arch/arm64/include/asm/asm-uaccess.h4
-rw-r--r--arch/arm64/include/asm/assembler.h33
-rw-r--r--arch/arm64/include/asm/cacheflush.h5
-rw-r--r--arch/arm64/include/asm/cpufeature.h11
-rw-r--r--arch/arm64/include/asm/el2_setup.h60
-rw-r--r--arch/arm64/include/asm/kexec.h5
-rw-r--r--arch/arm64/include/asm/kvm_asm.h8
-rw-r--r--arch/arm64/include/asm/memory.h12
-rw-r--r--arch/arm64/include/asm/mmu_context.h7
-rw-r--r--arch/arm64/include/asm/pgtable.h12
-rw-r--r--arch/arm64/include/asm/pointer_auth.h10
-rw-r--r--arch/arm64/include/asm/setup.h11
-rw-r--r--arch/arm64/include/asm/sparsemem.h23
-rw-r--r--arch/arm64/include/asm/stackprotector.h1
-rw-r--r--arch/arm64/include/asm/sysreg.h9
-rw-r--r--arch/arm64/include/asm/trans_pgd.h39
-rw-r--r--arch/arm64/include/asm/uaccess.h2
-rw-r--r--arch/arm64/include/asm/virt.h7
-rw-r--r--arch/arm64/kernel/Makefile7
-rw-r--r--arch/arm64/kernel/alternative.c2
-rw-r--r--arch/arm64/kernel/asm-offsets.c3
-rw-r--r--arch/arm64/kernel/cpu_errata.c2
-rw-r--r--arch/arm64/kernel/cpufeature.c75
-rw-r--r--arch/arm64/kernel/entry-common.c54
-rw-r--r--arch/arm64/kernel/entry.S14
-rw-r--r--arch/arm64/kernel/head.S75
-rw-r--r--arch/arm64/kernel/hibernate.c271
-rw-r--r--arch/arm64/kernel/hyp-stub.S115
-rw-r--r--arch/arm64/kernel/idreg-override.c216
-rw-r--r--arch/arm64/kernel/kaslr.c43
-rw-r--r--arch/arm64/kernel/machine_kexec.c57
-rw-r--r--arch/arm64/kernel/module-plts.c2
-rw-r--r--arch/arm64/kernel/perf_event.c15
-rw-r--r--arch/arm64/kernel/process.c4
-rw-r--r--arch/arm64/kernel/ptrace.c1
-rw-r--r--arch/arm64/kernel/relocate_kernel.S48
-rw-r--r--arch/arm64/kernel/setup.c15
-rw-r--r--arch/arm64/kernel/sleep.S1
-rw-r--r--arch/arm64/kernel/stacktrace.c13
-rw-r--r--arch/arm64/kernel/syscall.c30
-rw-r--r--arch/arm64/kernel/topology.c115
-rw-r--r--arch/arm64/kernel/traps.c2
-rw-r--r--arch/arm64/kernel/vdso-wrap.S (renamed from arch/arm64/kernel/vdso/vdso.S)0
-rw-r--r--arch/arm64/kernel/vdso/Makefile1
-rwxr-xr-xarch/arm64/kernel/vdso/gen_vdso_offsets.sh2
-rw-r--r--arch/arm64/kernel/vdso32-wrap.S (renamed from arch/arm64/kernel/vdso32/vdso.S)0
-rw-r--r--arch/arm64/kernel/vdso32/Makefile1
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S8
-rw-r--r--arch/arm64/kvm/arm.c3
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-init.S2
-rw-r--r--arch/arm64/mm/Makefile1
-rw-r--r--arch/arm64/mm/fault.c34
-rw-r--r--arch/arm64/mm/mmap.c15
-rw-r--r--arch/arm64/mm/mmu.c4
-rw-r--r--arch/arm64/mm/proc.S16
-rw-r--r--arch/arm64/mm/ptdump.c1
-rw-r--r--arch/arm64/mm/trans_pgd.c324
-rw-r--r--drivers/char/random.c15
-rw-r--r--drivers/firmware/smccc/smccc.c6
-rw-r--r--drivers/perf/arm-cci.c7
-rw-r--r--drivers/perf/arm-cmn.c19
-rw-r--r--drivers/perf/arm_dmc620_pmu.c5
-rw-r--r--drivers/perf/arm_pmu.c2
-rw-r--r--drivers/perf/arm_smmuv3_pmu.c8
-rw-r--r--drivers/perf/arm_spe_pmu.c23
-rw-r--r--drivers/perf/fsl_imx8_ddr_perf.c10
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c2
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_hha_pmu.c2
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c2
-rw-r--r--drivers/perf/qcom_l2_pmu.c6
-rw-r--r--drivers/perf/qcom_l3_pmu.c6
-rw-r--r--drivers/perf/xgene_pmu.c5
-rw-r--r--fs/xfs/xfs_file.c6
-rw-r--r--include/linux/arm-smccc.h31
-rw-r--r--include/linux/mm.h25
-rw-r--r--include/linux/pgtable.h11
-rw-r--r--mm/filemap.c179
-rw-r--r--mm/khugepaged.c37
-rw-r--r--mm/memory.c223
-rw-r--r--mm/nommu.c3
-rw-r--r--mm/shmem.c6
-rw-r--r--mm/swapfile.c11
-rw-r--r--tools/testing/selftests/arm64/mte/check_buffer_fill.c2
91 files changed, 1652 insertions, 985 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 46c60ccb650d..07f9372b494a 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -373,6 +373,12 @@
arcrimi= [HW,NET] ARCnet - "RIM I" (entirely mem-mapped) cards
Format: <io>,<irq>,<nodeID>
+ arm64.nobti [ARM64] Unconditionally disable Branch Target
+ Identification support
+
+ arm64.nopauth [ARM64] Unconditionally disable Pointer Authentication
+ support
+
ataflop= [HW,M68k]
atarimouse= [HW,MOUSE] Atari Mouse
@@ -2252,6 +2258,9 @@
kvm-arm.mode=
[KVM,ARM] Select one of KVM/arm64's modes of operation.
+ nvhe: Standard nVHE-based mode, without support for
+ protected guests.
+
protected: nVHE-based mode with support for guests whose
state is kept private from the host.
Not valid if the kernel is running in EL2.
diff --git a/Documentation/admin-guide/perf/arm-cmn.rst b/Documentation/admin-guide/perf/arm-cmn.rst
index 0e4809346014..796e25b7027b 100644
--- a/Documentation/admin-guide/perf/arm-cmn.rst
+++ b/Documentation/admin-guide/perf/arm-cmn.rst
@@ -17,7 +17,7 @@ PMU events
----------
The PMU driver registers a single PMU device for the whole interconnect,
-see /sys/bus/event_source/devices/arm_cmn. Multi-chip systems may link
+see /sys/bus/event_source/devices/arm_cmn_0. Multi-chip systems may link
more than one CMN together via external CCIX links - in this situation,
each mesh counts its own events entirely independently, and additional
PMU devices will be named arm_cmn_{1..n}.
diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml
index 693ef3f185a8..e17ac049e890 100644
--- a/Documentation/devicetree/bindings/arm/pmu.yaml
+++ b/Documentation/devicetree/bindings/arm/pmu.yaml
@@ -43,6 +43,7 @@ properties:
- arm,cortex-a75-pmu
- arm,cortex-a76-pmu
- arm,cortex-a77-pmu
+ - arm,cortex-a78-pmu
- arm,neoverse-e1-pmu
- arm,neoverse-n1-pmu
- brcm,vulcan-pmu
diff --git a/Documentation/virt/kvm/arm/hyp-abi.rst b/Documentation/virt/kvm/arm/hyp-abi.rst
index 83cadd8186fa..4d43fbc25195 100644
--- a/Documentation/virt/kvm/arm/hyp-abi.rst
+++ b/Documentation/virt/kvm/arm/hyp-abi.rst
@@ -58,6 +58,15 @@ these functions (see arch/arm{,64}/include/asm/virt.h):
into place (arm64 only), and jump to the restart address while at HYP/EL2.
This hypercall is not expected to return to its caller.
+* ::
+
+ x0 = HVC_VHE_RESTART (arm64 only)
+
+ Attempt to upgrade the kernel's exception level from EL1 to EL2 by enabling
+ the VHE mode. This is conditioned by the CPU supporting VHE, the EL2 MMU
+ being off, and VHE not being disabled by any other means (command line
+ option, for example).
+
Any other value of r0/x0 triggers a hypervisor-specific handling,
which is not documented here.
diff --git a/arch/arm/include/asm/archrandom.h b/arch/arm/include/asm/archrandom.h
new file mode 100644
index 000000000000..a8e84ca5c2ee
--- /dev/null
+++ b/arch/arm/include/asm/archrandom.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARCHRANDOM_H
+#define _ASM_ARCHRANDOM_H
+
+static inline bool __init smccc_probe_trng(void)
+{
+ return false;
+}
+
+#endif /* _ASM_ARCHRANDOM_H */
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 52bc8f6206b7..1b8cecdc028e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -522,7 +522,7 @@ config ARM64_ERRATUM_1024718
help
This option adds a workaround for ARM Cortex-A55 Erratum 1024718.
- Affected Cortex-A55 cores (r0p0, r0p1, r1p0) could cause incorrect
+ Affected Cortex-A55 cores (all revisions) could cause incorrect
update of the hardware dirty bit when the DBM/AP bits are updated
without a break-before-make. The workaround is to disable the usage
of hardware DBM locally on the affected cores. CPUs not affected by
@@ -952,8 +952,9 @@ choice
that is selected here.
config CPU_BIG_ENDIAN
- bool "Build big-endian kernel"
- help
+ bool "Build big-endian kernel"
+ depends on !LD_IS_LLD || LLD_VERSION >= 130000
+ help
Say Y if you plan on running a kernel with a big-endian userspace.
config CPU_LITTLE_ENDIAN
@@ -1132,6 +1133,10 @@ config CRASH_DUMP
For more details see Documentation/admin-guide/kdump/kdump.rst
+config TRANS_TABLE
+ def_bool y
+ depends on HIBERNATION
+
config XEN_DOM0
def_bool y
depends on XEN
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 90309208bb28..5b84aec31ed3 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -188,10 +188,12 @@ ifeq ($(KBUILD_EXTMOD),)
# this hack.
prepare: vdso_prepare
vdso_prepare: prepare0
- $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h
- $(if $(CONFIG_COMPAT_VDSO),$(Q)$(MAKE) \
- $(build)=arch/arm64/kernel/vdso32 \
- include/generated/vdso32-offsets.h)
+ $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso \
+ include/generated/vdso-offsets.h arch/arm64/kernel/vdso/vdso.so
+ifdef CONFIG_COMPAT_VDSO
+ $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 \
+ include/generated/vdso32-offsets.h arch/arm64/kernel/vdso32/vdso.so
+endif
endif
define archhelp
diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h
index ffb1a40d5475..09e43272ccb0 100644
--- a/arch/arm64/include/asm/archrandom.h
+++ b/arch/arm64/include/asm/archrandom.h
@@ -4,10 +4,26 @@
#ifdef CONFIG_ARCH_RANDOM
+#include <linux/arm-smccc.h>
#include <linux/bug.h>
#include <linux/kernel.h>
#include <asm/cpufeature.h>
+#define ARM_SMCCC_TRNG_MIN_VERSION 0x10000UL
+
+extern bool smccc_trng_available;
+
+static inline bool __init smccc_probe_trng(void)
+{
+ struct arm_smccc_res res;
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_VERSION, &res);
+ if ((s32)res.a0 < 0)
+ return false;
+
+ return res.a0 >= ARM_SMCCC_TRNG_MIN_VERSION;
+}
+
static inline bool __arm64_rndr(unsigned long *v)
{
bool ok;
@@ -38,26 +54,55 @@ static inline bool __must_check arch_get_random_int(unsigned int *v)
static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
{
+ struct arm_smccc_res res;
+
+ /*
+ * We prefer the SMCCC call, since its semantics (return actual
+ * hardware backed entropy) is closer to the idea behind this
+ * function here than what even the RNDRSS register provides
+ * (the output of a pseudo RNG freshly seeded by a TRNG).
+ */
+ if (smccc_trng_available) {
+ arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND64, 64, &res);
+ if ((int)res.a0 >= 0) {
+ *v = res.a3;
+ return true;
+ }
+ }
+
/*
* Only support the generic interface after we have detected
* the system wide capability, avoiding complexity with the
* cpufeature code and with potential scheduling between CPUs
* with and without the feature.
*/
- if (!cpus_have_const_cap(ARM64_HAS_RNG))
- return false;
+ if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v))
+ return true;
- return __arm64_rndr(v);
+ return false;
}
-
static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
{
+ struct arm_smccc_res res;
unsigned long val;
- bool ok = arch_get_random_seed_long(&val);
- *v = val;
- return ok;
+ if (smccc_trng_available) {
+ arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND64, 32, &res);
+ if ((int)res.a0 >= 0) {
+ *v = res.a3 & GENMASK(31, 0);
+ return true;
+ }
+ }
+
+ if (cpus_have_const_cap(ARM64_HAS_RNG)) {
+ if (__arm64_rndr(&val)) {
+ *v = val;
+ return true;
+ }
+ }
+
+ return false;
}
static inline bool __init __early_cpu_has_rndr(void)
@@ -72,12 +117,29 @@ arch_get_random_seed_long_early(unsigned long *v)
{
WARN_ON(system_state != SYSTEM_BOOTING);
- if (!__early_cpu_has_rndr())
- return false;
+ if (smccc_trng_available) {
+ struct arm_smccc_res res;
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND64, 64, &res);
+ if ((int)res.a0 >= 0) {
+ *v = res.a3;
+ return true;
+ }
+ }
+
+ if (__early_cpu_has_rndr() && __arm64_rndr(v))
+ return true;
- return __arm64_rndr(v);
+ return false;
}
#define arch_get_random_seed_long_early arch_get_random_seed_long_early
+#else /* !CONFIG_ARCH_RANDOM */
+
+static inline bool __init smccc_probe_trng(void)
+{
+ return false;
+}
+
#endif /* CONFIG_ARCH_RANDOM */
#endif /* _ASM_ARCHRANDOM_H */
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index 9990059be106..ccedf548dac9 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -15,10 +15,10 @@
.macro __uaccess_ttbr0_disable, tmp1
mrs \tmp1, ttbr1_el1 // swapper_pg_dir
bic \tmp1, \tmp1, #TTBR_ASID_MASK
- sub \tmp1, \tmp1, #PAGE_SIZE // reserved_pg_dir just before swapper_pg_dir
+ sub \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET // reserved_pg_dir
msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1
isb
- add \tmp1, \tmp1, #PAGE_SIZE
+ add \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET
msr ttbr1_el1, \tmp1 // set reserved ASID
isb
.endm
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index bf125c591116..ca31594d3d6c 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -676,6 +676,23 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
.endm
/*
+ * Set SCTLR_EL1 to the passed value, and invalidate the local icache
+ * in the process. This is called when setting the MMU on.
+ */
+.macro set_sctlr_el1, reg
+ msr sctlr_el1, \reg
+ isb
+ /*
+ * Invalidate the local I-cache so that any instructions fetched
+ * speculatively from the PoC are discarded, since they may have
+ * been dynamically patched at the PoU.
+ */
+ ic iallu
+ dsb nsh
+ isb
+.endm
+
+/*
* Check whether to yield to another runnable task from kernel mode NEON code
* (which runs with preemption disabled).
*
@@ -745,6 +762,22 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
.Lyield_out_\@ :
.endm
+ /*
+ * Check whether preempt-disabled code should yield as soon as it
+ * is able. This is the case if re-enabling preemption a single
+ * time results in a preempt count of zero, and the TIF_NEED_RESCHED
+ * flag is set. (Note that the latter is stored negated in the
+ * top word of the thread_info::preempt_count field)
+ */
+ .macro cond_yield, lbl:req, tmp:req
+#ifdef CONFIG_PREEMPTION
+ get_current_task \tmp
+ ldr \tmp, [\tmp, #TSK_TI_PREEMPT]
+ sub \tmp, \tmp, #PREEMPT_DISABLE_OFFSET
+ cbz \tmp, \lbl
+#endif
+ .endm
+
/*
* This macro emits a program property note section identifying
* architecture features which require special handling, mainly for
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 45217f21f1fe..52e5c1623224 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -30,11 +30,6 @@
* the implementation assumes non-aliasing VIPT D-cache and (aliasing)
* VIPT I-cache.
*
- * flush_cache_mm(mm)
- *
- * Clean and invalidate all user space cache entries
- * before a change of page tables.
- *
* flush_icache_range(start, end)
*
* Ensure coherency between the I-cache and the D-cache in the
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 9a555809b89c..61177bac49fa 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -63,6 +63,11 @@ struct arm64_ftr_bits {
s64 safe_val; /* safe value for FTR_EXACT features */
};
+struct arm64_ftr_override {
+ u64 val;
+ u64 mask;
+};
+
/*
* @arm64_ftr_reg - Feature register
* @strict_mask Bits which should match across all CPUs for sanity.
@@ -74,6 +79,7 @@ struct arm64_ftr_reg {
u64 user_mask;
u64 sys_val;
u64 user_val;
+ struct arm64_ftr_override *override;
const struct arm64_ftr_bits *ftr_bits;
};
@@ -600,6 +606,7 @@ void __init setup_cpu_features(void);
void check_local_cpu_capabilities(void);
u64 read_sanitised_ftr_reg(u32 id);
+u64 __read_sysreg_by_encoding(u32 sys_id);
static inline bool cpu_supports_mixed_endian_el0(void)
{
@@ -811,6 +818,10 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
return 8;
}
+extern struct arm64_ftr_override id_aa64mmfr1_override;
+extern struct arm64_ftr_override id_aa64pfr1_override;
+extern struct arm64_ftr_override id_aa64isar1_override;
+
u32 get_kvm_ipa_limit(void);
void dump_cpu_features(void);
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index a7f5a1bbc8ac..d77d358f9395 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -32,46 +32,39 @@
* to transparently mess with the EL0 bits via CNTKCTL_EL1 access in
* EL2.
*/
-.macro __init_el2_timers mode
-.ifeqs "\mode", "nvhe"
+.macro __init_el2_timers
mrs x0, cnthctl_el2
orr x0, x0, #3 // Enable EL1 physical timers
msr cnthctl_el2, x0
-.endif
msr cntvoff_el2, xzr // Clear virtual offset
.endm
-.macro __init_el2_debug mode
+.macro __init_el2_debug
mrs x1, id_aa64dfr0_el1
sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4
cmp x0, #1
- b.lt 1f // Skip if no PMU present
+ b.lt .Lskip_pmu_\@ // Skip if no PMU present
mrs x0, pmcr_el0 // Disable debug access traps
ubfx x0, x0, #11, #5 // to EL2 and allow access to
-1:
+.Lskip_pmu_\@:
csel x2, xzr, x0, lt // all PMU counters from EL1
/* Statistical profiling */
ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
- cbz x0, 3f // Skip if SPE not present
+ cbz x0, .Lskip_spe_\@ // Skip if SPE not present
-.ifeqs "\mode", "nvhe"
mrs_s x0, SYS_PMBIDR_EL1 // If SPE available at EL2,
and x0, x0, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
- cbnz x0, 2f // then permit sampling of physical
+ cbnz x0, .Lskip_spe_el2_\@ // then permit sampling of physical
mov x0, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
1 << SYS_PMSCR_EL2_PA_SHIFT)
msr_s SYS_PMSCR_EL2, x0 // addresses and physical counter
-2:
+.Lskip_spe_el2_\@:
mov x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
orr x2, x2, x0 // If we don't have VHE, then
// use EL1&0 translation.
-.else
- orr x2, x2, #MDCR_EL2_TPMS // For VHE, use EL2 translation
- // and disable access from EL1
-.endif
-3:
+.Lskip_spe_\@:
msr mdcr_el2, x2 // Configure debug traps
.endm
@@ -79,9 +72,9 @@
.macro __init_el2_lor
mrs x1, id_aa64mmfr1_el1
ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
- cbz x0, 1f
+ cbz x0, .Lskip_lor_\@
msr_s SYS_LORC_EL1, xzr
-1:
+.Lskip_lor_\@:
.endm
/* Stage-2 translation */
@@ -93,7 +86,7 @@
.macro __init_el2_gicv3
mrs x0, id_aa64pfr0_el1
ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4
- cbz x0, 1f
+ cbz x0, .Lskip_gicv3_\@
mrs_s x0, SYS_ICC_SRE_EL2
orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1
@@ -103,7 +96,7 @@
mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back,
tbz x0, #0, 1f // and check that it sticks
msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults
-1:
+.Lskip_gicv3_\@:
.endm
.macro __init_el2_hstr
@@ -128,14 +121,14 @@
.macro __init_el2_nvhe_sve
mrs x1, id_aa64pfr0_el1
ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
- cbz x1, 1f
+ cbz x1, .Lskip_sve_\@
bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps
msr cptr_el2, x0 // Disable copro. traps to EL2
isb
mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
msr_s SYS_ZCR_EL2, x1 // length for EL1.
-1:
+.Lskip_sve_\@:
.endm
.macro __init_el2_nvhe_prepare_eret
@@ -145,37 +138,24 @@
/**
* Initialize EL2 registers to sane values. This should be called early on all
- * cores that were booted in EL2.
+ * cores that were booted in EL2. Note that everything gets initialised as
+ * if VHE was not evailable. The kernel context will be upgraded to VHE
+ * if possible later on in the boot process
*
* Regs: x0, x1 and x2 are clobbered.
*/
-.macro init_el2_state mode
-.ifnes "\mode", "vhe"
-.ifnes "\mode", "nvhe"
-.error "Invalid 'mode' argument"
-.endif
-.endif
-
+.macro init_el2_state
__init_el2_sctlr
- __init_el2_timers \mode
- __init_el2_debug \mode
+ __init_el2_timers
+ __init_el2_debug
__init_el2_lor
__init_el2_stage2
__init_el2_gicv3
__init_el2_hstr
-
- /*
- * When VHE is not in use, early init of EL2 needs to be done here.
- * When VHE _is_ in use, EL1 will not be used in the host and
- * requires no configuration, and all non-hyp-specific EL2 setup
- * will be done via the _EL1 system register aliases in __cpu_setup.
- */
-.ifeqs "\mode", "nvhe"
__init_el2_nvhe_idregs
__init_el2_nvhe_cptr
__init_el2_nvhe_sve
__init_el2_nvhe_prepare_eret
-.endif
.endm
#endif /* __ARM_KVM_INIT_H__ */
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index d24b527e8c00..9befcd87e9a8 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -90,18 +90,19 @@ static inline void crash_prepare_suspend(void) {}
static inline void crash_post_resume(void) {}
#endif
-#ifdef CONFIG_KEXEC_FILE
#define ARCH_HAS_KIMAGE_ARCH
struct kimage_arch {
void *dtb;
- unsigned long dtb_mem;
+ phys_addr_t dtb_mem;
+ phys_addr_t kern_reloc;
/* Core ELF header buffer */
void *elf_headers;
unsigned long elf_headers_mem;
unsigned long elf_headers_sz;
};
+#ifdef CONFIG_KEXEC_FILE
extern const struct kexec_file_ops kexec_image_ops;
struct kimage;
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 8a33d83ea843..7ccf770c53d9 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -199,12 +199,6 @@ extern void __vgic_v3_init_lrs(void);
extern u32 __kvm_get_mdcr_el2(void);
-#if defined(GCC_VERSION) && GCC_VERSION < 50000
-#define SYM_CONSTRAINT "i"
-#else
-#define SYM_CONSTRAINT "S"
-#endif
-
/*
* Obtain the PC-relative address of a kernel symbol
* s: symbol
@@ -221,7 +215,7 @@ extern u32 __kvm_get_mdcr_el2(void);
typeof(s) *addr; \
asm("adrp %0, %1\n" \
"add %0, %0, :lo12:%1\n" \
- : "=r" (addr) : SYM_CONSTRAINT (&s)); \
+ : "=r" (addr) : "S" (&s)); \
addr; \
})
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index ff4732785c32..bc09af26c1b8 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -159,6 +159,18 @@
#define IOREMAP_MAX_ORDER (PMD_SHIFT)
#endif
+/*
+ * Open-coded (swapper_pg_dir - reserved_pg_dir) as this cannot be calculated
+ * until link time.
+ */
+#define RESERVED_SWAPPER_OFFSET (PAGE_SIZE)
+
+/*
+ * Open-coded (swapper_pg_dir - tramp_pg_dir) as this cannot be calculated
+ * until link time.
+ */
+#define TRAMP_SWAPPER_OFFSET (2 * PAGE_SIZE)
+
#ifndef __ASSEMBLY__
#include <linux/bitops.h>
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 0b3079fd28eb..70ce8c1d2b07 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -81,16 +81,15 @@ static inline bool __cpu_uses_extended_idmap_level(void)
}
/*
- * Set TCR.T0SZ to its default value (based on VA_BITS)
+ * Ensure TCR.T0SZ is set to the provided value.
*/
static inline void __cpu_set_tcr_t0sz(unsigned long t0sz)
{
- unsigned long tcr;
+ unsigned long tcr = read_sysreg(tcr_el1);
- if (!__cpu_uses_extended_idmap())
+ if ((tcr & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET == t0sz)
return;
- tcr = read_sysreg(tcr_el1);
tcr &= ~TCR_T0SZ_MASK;
tcr |= t0sz << TCR_T0SZ_OFFSET;
write_sysreg(tcr, tcr_el1);
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 501562793ce2..e17b96d0e4b5 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -980,7 +980,17 @@ static inline bool arch_faults_on_old_pte(void)
return !cpu_has_hw_af();
}
-#define arch_faults_on_old_pte arch_faults_on_old_pte
+#define arch_faults_on_old_pte arch_faults_on_old_pte
+
+/*
+ * Experimentally, it's cheap to set the access flag in hardware and we
+ * benefit from prefaulting mappings as 'old' to start with.
+ */
+static inline bool arch_wants_old_prefaulted_pte(void)
+{
+ return !arch_faults_on_old_pte();
+}
+#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte
#endif /* !__ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
index c6b4f0603024..b112a11e9302 100644
--- a/arch/arm64/include/asm/pointer_auth.h
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -76,6 +76,15 @@ static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
return ptrauth_clear_pac(ptr);
}
+static __always_inline void ptrauth_enable(void)
+{
+ if (!system_supports_address_auth())
+ return;
+ sysreg_clear_set(sctlr_el1, 0, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB |
+ SCTLR_ELx_ENDA | SCTLR_ELx_ENDB));
+ isb();
+}
+
#define ptrauth_thread_init_user(tsk) \
ptrauth_keys_init_user(&(tsk)->thread.keys_user)
#define ptrauth_thread_init_kernel(tsk) \
@@ -84,6 +93,7 @@ static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
ptrauth_keys_switch_kernel(&(tsk)->thread.keys_kernel)
#else /* CONFIG_ARM64_PTR_AUTH */
+#define ptrauth_enable()
#define ptrauth_prctl_reset_keys(tsk, arg) (-EINVAL)
#define ptrauth_strip_insn_pac(lr) (lr)
#define ptrauth_thread_init_user(tsk)
diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h
new file mode 100644
index 000000000000..d3320618ed14
--- /dev/null
+++ b/arch/arm64/include/asm/setup.h
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef __ARM64_ASM_SETUP_H
+#define __ARM64_ASM_SETUP_H
+
+#include <uapi/asm/setup.h>
+
+void *get_early_fdt_ptr(void);
+void early_fdt_map(u64 dt_phys);
+
+#endif
diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h
index 1f43fcc79738..eb4a75d720ed 100644
--- a/arch/arm64/include/asm/sparsemem.h
+++ b/arch/arm64/include/asm/sparsemem.h
@@ -7,7 +7,26 @@
#ifdef CONFIG_SPARSEMEM
#define MAX_PHYSMEM_BITS CONFIG_ARM64_PA_BITS
-#define SECTION_SIZE_BITS 30
-#endif
+
+/*
+ * Section size must be at least 512MB for 64K base
+ * page size config. Otherwise it will be less than
+ * (MAX_ORDER - 1) and the build process will fail.
+ */
+#ifdef CONFIG_ARM64_64K_PAGES
+#define SECTION_SIZE_BITS 29
+
+#else
+
+/*
+ * Section size must be at least 128MB for 4K base
+ * page size config. Otherwise PMD based huge page
+ * entries could not be created for vmemmap mappings.
+ * 16K follows 4K for simplicity.
+ */
+#define SECTION_SIZE_BITS 27
+#endif /* CONFIG_ARM64_64K_PAGES */
+
+#endif /* CONFIG_SPARSEMEM*/
#endif
diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h
index 7263e0bac680..33f1bb453150 100644
--- a/arch/arm64/include/asm/stackprotector.h
+++ b/arch/arm64/include/asm/stackprotector.h
@@ -41,6 +41,7 @@ static __always_inline void boot_init_stack_canary(void)
#endif
ptrauth_thread_init_kernel(current);
ptrauth_thread_switch_kernel(current);
+ ptrauth_enable();
}
#endif /* _ASM_STACKPROTECTOR_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 8b5e7e5c3cc8..767bb2d47be9 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -291,7 +291,11 @@
#define SYS_PMSFCR_EL1_ST_SHIFT 18
#define SYS_PMSEVFR_EL1 sys_reg(3, 0, 9, 9, 5)
-#define SYS_PMSEVFR_EL1_RES0 0x0000ffff00ff0f55UL
+#define SYS_PMSEVFR_EL1_RES0_8_2 \
+ (GENMASK_ULL(47, 32) | GENMASK_ULL(23, 16) | GENMASK_ULL(11, 8) |\
+ BIT_ULL(6) | BIT_ULL(4) | BIT_ULL(2) | BIT_ULL(0))
+#define SYS_PMSEVFR_EL1_RES0_8_3 \
+ (SYS_PMSEVFR_EL1_RES0_8_2 & ~(BIT_ULL(18) | BIT_ULL(17) | BIT_ULL(11)))
#define SYS_PMSLATFR_EL1 sys_reg(3, 0, 9, 9, 6)
#define SYS_PMSLATFR_EL1_MINLAT_SHIFT 0
@@ -844,6 +848,9 @@
#define ID_AA64DFR0_PMUVER_8_5 0x6
#define ID_AA64DFR0_PMUVER_IMP_DEF 0xf
+#define ID_AA64DFR0_PMSVER_8_2 0x1
+#define ID_AA64DFR0_PMSVER_8_3 0x2
+
#define ID_DFR0_PERFMON_SHIFT 24
#define ID_DFR0_PERFMON_8_1 0x4
diff --git a/arch/arm64/include/asm/trans_pgd.h b/arch/arm64/include/asm/trans_pgd.h
new file mode 100644
index 000000000000..5d08e5adf3d5
--- /dev/null
+++ b/arch/arm64/include/asm/trans_pgd.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2020, Microsoft Corporation.
+ * Pavel Tatashin <pasha.tatashin@soleen.com>
+ */
+
+#ifndef _ASM_TRANS_TABLE_H
+#define _ASM_TRANS_TABLE_H
+
+#include <linux/bits.h>
+#include <linux/types.h>
+#include <asm/pgtable-types.h>
+
+/*
+ * trans_alloc_page
+ * - Allocator that should return exactly one zeroed page, if this
+ * allocator fails, trans_pgd_create_copy() and trans_pgd_map_page()
+ * return -ENOMEM error.
+ *
+ * trans_alloc_arg
+ * - Passed to trans_alloc_page as an argument
+ */
+
+struct trans_pgd_info {
+ void * (*trans_alloc_page)(void *arg);
+ void *trans_alloc_arg;
+};
+
+int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **trans_pgd,
+ unsigned long start, unsigned long end);
+
+int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd,
+ void *page, unsigned long dst_addr, pgprot_t pgprot);
+
+int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0,
+ unsigned long *t0sz, void *page);
+
+#endif /* _ASM_TRANS_TABLE_H */
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index f0fe0cc6abe0..0deb88467111 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -87,7 +87,7 @@ static inline void __uaccess_ttbr0_disable(void)
ttbr = read_sysreg(ttbr1_el1);
ttbr &= ~TTBR_ASID_MASK;
/* reserved_pg_dir placed before swapper_pg_dir */
- write_sysreg(ttbr - PAGE_SIZE, ttbr0_el1);
+ write_sysreg(ttbr - RESERVED_SWAPPER_OFFSET, ttbr0_el1);
isb();
/* Set reserved ASID */
write_sysreg(ttbr, ttbr1_el1);
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index ee6a48df89d9..7379f35ae2c6 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -35,8 +35,13 @@
*/
#define HVC_RESET_VECTORS 2
+/*
+ * HVC_VHE_RESTART - Upgrade the CPU from EL1 to EL2, if possible
+ */
+#define HVC_VHE_RESTART 3
+
/* Max number of HYP stub hypercalls */
-#define HVC_STUB_HCALL_NR 3
+#define HVC_STUB_HCALL_NR 4
/* Error returned when an invalid stub number is passed into x0 */
#define HVC_STUB_ERR 0xbadca11
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 86364ab6f13f..ed65576ce710 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -17,7 +17,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
return_address.o cpuinfo.o cpu_errata.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
- syscall.o proton-pack.o
+ syscall.o proton-pack.o idreg-override.o
targets += efi-entry.o
@@ -59,9 +59,10 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
obj-$(CONFIG_ARM64_MTE) += mte.o
+obj-y += vdso-wrap.o
+obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o
-obj-y += vdso/ probes/
-obj-$(CONFIG_COMPAT_VDSO) += vdso32/
+obj-y += probes/
head-y := head.o
extra-y += $(head-y) vmlinux.lds
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index a57cffb752e8..1184c44ea2c7 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -17,7 +17,7 @@
#include <asm/sections.h>
#include <linux/stop_machine.h>
-#define __ALT_PTR(a,f) ((void *)&(a)->f + (a)->f)
+#define __ALT_PTR(a, f) ((void *)&(a)->f + (a)->f)
#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 301784463587..a36e2fc330d4 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -99,6 +99,9 @@ int main(void)
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();
+ DEFINE(FTR_OVR_VAL_OFFSET, offsetof(struct arm64_ftr_override, val));
+ DEFINE(FTR_OVR_MASK_OFFSET, offsetof(struct arm64_ftr_override, mask));
+ BLANK();
#ifdef CONFIG_KVM
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index a63428301f42..506a1cd37973 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -107,8 +107,6 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap)
}
#ifdef CONFIG_ARM64_ERRATUM_1463225
-DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
-
static bool
has_cortex_a76_erratum_1463225(const struct arm64_cpu_capabilities *entry,
int scope)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 3e6331b64932..066030717a4c 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -352,9 +352,12 @@ static const struct arm64_ftr_bits ftr_ctr[] = {
ARM64_FTR_END,
};
+static struct arm64_ftr_override __ro_after_init no_override = { };
+
struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = {
.name = "SYS_CTR_EL0",
- .ftr_bits = ftr_ctr
+ .ftr_bits = ftr_ctr,
+ .override = &no_override,
};
static const struct arm64_ftr_bits ftr_id_mmfr0[] = {
@@ -544,13 +547,20 @@ static const struct arm64_ftr_bits ftr_raz[] = {
ARM64_FTR_END,
};
-#define ARM64_FTR_REG(id, table) { \
- .sys_id = id, \
- .reg = &(struct arm64_ftr_reg){ \
- .name = #id, \
- .ftr_bits = &((table)[0]), \
+#define ARM64_FTR_REG_OVERRIDE(id, table, ovr) { \
+ .sys_id = id, \
+ .reg = &(struct arm64_ftr_reg){ \
+ .name = #id, \
+ .override = (ovr), \
+ .ftr_bits = &((table)[0]), \
}}
+#define ARM64_FTR_REG(id, table) ARM64_FTR_REG_OVERRIDE(id, table, &no_override)
+
+struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override;
+struct arm64_ftr_override __ro_after_init id_aa64pfr1_override;
+struct arm64_ftr_override __ro_after_init id_aa64isar1_override;
+
static const struct __ftr_reg_entry {
u32 sys_id;
struct arm64_ftr_reg *reg;
@@ -585,7 +595,8 @@ static const struct __ftr_reg_entry {
/* Op1 = 0, CRn = 0, CRm = 4 */
ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
- ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1,
+ &id_aa64pfr1_override),
ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0),
/* Op1 = 0, CRn = 0, CRm = 5 */
@@ -594,11 +605,13 @@ static const struct __ftr_reg_entry {
/* Op1 = 0, CRn = 0, CRm = 6 */
ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
- ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1,
+ &id_aa64isar1_override),
/* Op1 = 0, CRn = 0, CRm = 7 */
ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
- ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1,
+ &id_aa64mmfr1_override),
ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
/* Op1 = 0, CRn = 1, CRm = 2 */
@@ -770,6 +783,33 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
u64 ftr_mask = arm64_ftr_mask(ftrp);
s64 ftr_new = arm64_ftr_value(ftrp, new);
+ s64 ftr_ovr = arm64_ftr_value(ftrp, reg->override->val);
+
+ if ((ftr_mask & reg->override->mask) == ftr_mask) {
+ s64 tmp = arm64_ftr_safe_value(ftrp, ftr_ovr, ftr_new);
+ char *str = NULL;
+
+ if (ftr_ovr != tmp) {
+ /* Unsafe, remove the override */
+ reg->override->mask &= ~ftr_mask;
+ reg->override->val &= ~ftr_mask;
+ tmp = ftr_ovr;
+ str = "ignoring override";
+ } else if (ftr_new != tmp) {
+ /* Override was valid */
+ ftr_new = tmp;
+ str = "forced";
+ } else if (ftr_ovr == tmp) {
+ /* Override was the safe value */
+ str = "already set";
+ }
+
+ if (str)
+ pr_warn("%s[%d:%d]: %s to %llx\n",
+ reg->name,
+ ftrp->shift + ftrp->width - 1,
+ ftrp->shift, str, tmp);
+ }
val = arm64_ftr_set_value(ftrp, val, ftr_new);
@@ -1115,14 +1155,17 @@ u64 read_sanitised_ftr_reg(u32 id)
EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg);
#define read_sysreg_case(r) \
- case r: return read_sysreg_s(r)
+ case r: val = read_sysreg_s(r); break;
/*
* __read_sysreg_by_encoding() - Used by a STARTING cpu before cpuinfo is populated.
* Read the system register on the current CPU
*/
-static u64 __read_sysreg_by_encoding(u32 sys_id)
+u64 __read_sysreg_by_encoding(u32 sys_id)
{
+ struct arm64_ftr_reg *regp;
+ u64 val;
+
switch (sys_id) {
read_sysreg_case(SYS_ID_PFR0_EL1);
read_sysreg_case(SYS_ID_PFR1_EL1);
@@ -1165,6 +1208,14 @@ static u64 __read_sysreg_by_encoding(u32 sys_id)
BUG();
return 0;
}
+
+ regp = get_arm64_ftr_reg(sys_id);
+ if (regp) {
+ val &= ~regp->override->mask;
+ val |= (regp->override->val & regp->override->mask);
+ }
+
+ return val;
}
#include <linux/irqchip/arm-gic-v3.h>
@@ -1455,7 +1506,7 @@ static bool cpu_has_broken_dbm(void)
/* List of CPUs which have broken DBM support. */
static const struct midr_range cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_1024718
- MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 1, 0), // A55 r0p0 -r1p0
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
/* Kryo4xx Silver (rdpe => r1p0) */
MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe),
#endif
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 5346953e4382..9d3588450473 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -109,6 +109,55 @@ asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
exit_to_kernel_mode(regs);
}
+#ifdef CONFIG_ARM64_ERRATUM_1463225
+static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
+
+static void cortex_a76_erratum_1463225_svc_handler(void)
+{
+ u32 reg, val;
+
+ if (!unlikely(test_thread_flag(TIF_SINGLESTEP)))
+ return;
+
+ if (!unlikely(this_cpu_has_cap(ARM64_WORKAROUND_1463225)))
+ return;
+
+ __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1);
+ reg = read_sysreg(mdscr_el1);
+ val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE;
+ write_sysreg(val, mdscr_el1);
+ asm volatile("msr daifclr, #8");
+ isb();
+
+ /* We will have taken a single-step exception by this point */
+
+ write_sysreg(reg, mdscr_el1);
+ __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 0);
+}
+
+static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
+{
+ if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa))
+ return false;
+
+ /*
+ * We've taken a dummy step exception from the kernel to ensure
+ * that interrupts are re-enabled on the syscall path. Return back
+ * to cortex_a76_erratum_1463225_svc_handler() with debug exceptions
+ * masked so that we can safely restore the mdscr and get on with
+ * handling the syscall.
+ */
+ regs->pstate |= PSR_D_BIT;
+ return true;
+}
+#else /* CONFIG_ARM64_ERRATUM_1463225 */
+static void cortex_a76_erratum_1463225_svc_handler(void) { }
+static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
+{
+ return false;
+}
+#endif /* CONFIG_ARM64_ERRATUM_1463225 */
+
static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
@@ -186,7 +235,8 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
arm64_enter_el1_dbg(regs);
- do_debug_exception(far, esr, regs);
+ if (!cortex_a76_erratum_1463225_debug_handler(regs))
+ do_debug_exception(far, esr, regs);
arm64_exit_el1_dbg(regs);
}
@@ -362,6 +412,7 @@ static void noinstr el0_svc(struct pt_regs *regs)
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
enter_from_user_mode();
+ cortex_a76_erratum_1463225_svc_handler();
do_el0_svc(regs);
}
@@ -439,6 +490,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs)
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
enter_from_user_mode();
+ cortex_a76_erratum_1463225_svc_handler();
do_el0_svc_compat(regs);
}
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index c9bae73f2621..a31a0a713c85 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -261,16 +261,16 @@ alternative_else_nop_endif
stp lr, x21, [sp, #S_LR]
/*
- * In order to be able to dump the contents of struct pt_regs at the
- * time the exception was taken (in case we attempt to walk the call
- * stack later), chain it together with the stack frames.
+ * For exceptions from EL0, terminate the callchain here.
+ * For exceptions from EL1, create a synthetic frame record so the
+ * interrupted code shows up in the backtrace.
*/
.if \el == 0
- stp xzr, xzr, [sp, #S_STACKFRAME]
+ mov x29, xzr
.else
stp x29, x22, [sp, #S_STACKFRAME]
- .endif
add x29, sp, #S_STACKFRAME
+ .endif
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
alternative_if_not ARM64_HAS_PAN
@@ -805,7 +805,7 @@ SYM_CODE_END(ret_to_user)
// Move from tramp_pg_dir to swapper_pg_dir
.macro tramp_map_kernel, tmp
mrs \tmp, ttbr1_el1
- add \tmp, \tmp, #(2 * PAGE_SIZE)
+ add \tmp, \tmp, #TRAMP_SWAPPER_OFFSET
bic \tmp, \tmp, #USER_ASID_FLAG
msr ttbr1_el1, \tmp
#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
@@ -825,7 +825,7 @@ alternative_else_nop_endif
// Move from swapper_pg_dir to tramp_pg_dir
.macro tramp_unmap_kernel, tmp
mrs \tmp, ttbr1_el1
- sub \tmp, \tmp, #(2 * PAGE_SIZE)
+ sub \tmp, \tmp, #TRAMP_SWAPPER_OFFSET
orr \tmp, \tmp, #USER_ASID_FLAG
msr ttbr1_el1, \tmp
/*
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index a0dc987724ed..1e30b5550d2a 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -404,10 +404,6 @@ SYM_FUNC_START_LOCAL(__primary_switched)
adr_l x5, init_task
msr sp_el0, x5 // Save thread_info
-#ifdef CONFIG_ARM64_PTR_AUTH
- __ptrauth_keys_init_cpu x5, x6, x7, x8
-#endif
-
adr_l x8, vectors // load VBAR_EL1 with virtual
msr vbar_el1, x8 // vector table address
isb
@@ -436,10 +432,12 @@ SYM_FUNC_START_LOCAL(__primary_switched)
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
bl kasan_early_init
#endif
+ mov x0, x21 // pass FDT address in x0
+ bl early_fdt_map // Try mapping the FDT early
+ bl init_feature_override // Parse cpu feature overrides
#ifdef CONFIG_RANDOMIZE_BASE
tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized?
b.ne 0f
- mov x0, x21 // pass FDT address in x0
bl kaslr_early_init // parse FDT for KASLR options
cbz x0, 0f // KASLR disabled? just proceed
orr x23, x23, x0 // record KASLR offset
@@ -447,6 +445,7 @@ SYM_FUNC_START_LOCAL(__primary_switched)
ret // to __primary_switch()
0:
#endif
+ bl switch_to_vhe // Prefer VHE if possible
add sp, sp, #16
mov x29, #0
mov x30, #0
@@ -478,13 +477,14 @@ EXPORT_SYMBOL(kimage_vaddr)
* booted in EL1 or EL2 respectively.
*/
SYM_FUNC_START(init_kernel_el)
+ mov_q x0, INIT_SCTLR_EL1_MMU_OFF
+ msr sctlr_el1, x0
+
mrs x0, CurrentEL
cmp x0, #CurrentEL_EL2
b.eq init_el2
SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
- mov_q x0, INIT_SCTLR_EL1_MMU_OFF
- msr sctlr_el1, x0
isb
mov_q x0, INIT_PSTATE_EL1
msr spsr_el1, x0
@@ -493,50 +493,11 @@ SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
eret
SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
-#ifdef CONFIG_ARM64_VHE
- /*
- * Check for VHE being present. x2 being non-zero indicates that we
- * do have VHE, and that the kernel is intended to run at EL2.
- */
- mrs x2, id_aa64mmfr1_el1
- ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
-#else
- mov x2, xzr
-#endif
- cbz x2, init_el2_nvhe
-
- /*
- * When VHE _is_ in use, EL1 will not be used in the host and
- * requires no configuration, and all non-hyp-specific EL2 setup
- * will be done via the _EL1 system register aliases in __cpu_setup.
- */
- mov_q x0, HCR_HOST_VHE_FLAGS
- msr hcr_el2, x0
- isb
-
- init_el2_state vhe
-
- isb
-
- mov_q x0, INIT_PSTATE_EL2
- msr spsr_el2, x0
- msr elr_el2, lr
- mov w0, #BOOT_CPU_MODE_EL2
- eret
-
-SYM_INNER_LABEL(init_el2_nvhe, SYM_L_LOCAL)
- /*
- * When VHE is not in use, early init of EL2 and EL1 needs to be
- * done here.
- */
- mov_q x0, INIT_SCTLR_EL1_MMU_OFF
- msr sctlr_el1, x0
-
mov_q x0, HCR_HOST_NVHE_FLAGS
msr hcr_el2, x0
isb
- init_el2_state nvhe
+ init_el2_state
/* Hypervisor stub */
adr_l x0, __hyp_stub_vectors
@@ -623,6 +584,7 @@ SYM_FUNC_START_LOCAL(secondary_startup)
/*
* Common entry point for secondary CPUs.
*/
+ bl switch_to_vhe
bl __cpu_secondary_check52bitva
bl __cpu_setup // initialise processor
adrp x1, swapper_pg_dir
@@ -703,16 +665,9 @@ SYM_FUNC_START(__enable_mmu)
offset_ttbr1 x1, x3
msr ttbr1_el1, x1 // load TTBR1
isb
- msr sctlr_el1, x0
- isb
- /*
- * Invalidate the local I-cache so that any instructions fetched
- * speculatively from the PoC are discarded, since they may have
- * been dynamically patched at the PoU.
- */
- ic iallu
- dsb nsh
- isb
+
+ set_sctlr_el1 x0
+
ret
SYM_FUNC_END(__enable_mmu)
@@ -883,11 +838,7 @@ SYM_FUNC_START_LOCAL(__primary_switch)
tlbi vmalle1 // Remove any stale TLB entries
dsb nsh
- msr sctlr_el1, x19 // re-enable the MMU
- isb
- ic iallu // flush instructions fetched
- dsb nsh // via old mapping
- isb
+ set_sctlr_el1 x19 // re-enable the MMU
bl __relocate_kernel
#endif
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 9c9f47e9f7f4..b1cef371df2b 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -16,7 +16,6 @@
#define pr_fmt(x) "hibernate: " x
#include <linux/cpu.h>
#include <linux/kvm_host.h>
-#include <linux/mm.h>
#include <linux/pm.h>
#include <linux/sched.h>
#include <linux/suspend.h>
@@ -31,13 +30,12 @@
#include <asm/memory.h>
#include <asm/mmu_context.h>
#include <asm/mte.h>
-#include <asm/pgalloc.h>
-#include <asm/pgtable-hwdef.h>
#include <asm/sections.h>
#include <asm/smp.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
#include <asm/sysreg.h>
+#include <asm/trans_pgd.h>
#include <asm/virt.h>
/*
@@ -178,52 +176,9 @@ int arch_hibernation_header_restore(void *addr)
}
EXPORT_SYMBOL(arch_hibernation_header_restore);
-static int trans_pgd_map_page(pgd_t *trans_pgd, void *page,
- unsigned long dst_addr,
- pgprot_t pgprot)
+static void *hibernate_page_alloc(void *arg)
{
- pgd_t *pgdp;
- p4d_t *p4dp;
- pud_t *pudp;
- pmd_t *pmdp;
- pte_t *ptep;
-
- pgdp = pgd_offset_pgd(trans_pgd, dst_addr);
- if (pgd_none(READ_ONCE(*pgdp))) {
- pudp = (void *)get_safe_page(GFP_ATOMIC);
- if (!pudp)
- return -ENOMEM;
- pgd_populate(&init_mm, pgdp, pudp);
- }
-
- p4dp = p4d_offset(pgdp, dst_addr);
- if (p4d_none(READ_ONCE(*p4dp))) {
- pudp = (void *)get_safe_page(GFP_ATOMIC);
- if (!pudp)
- return -ENOMEM;
- p4d_populate(&init_mm, p4dp, pudp);
- }
-
- pudp = pud_offset(p4dp, dst_addr);
- if (pud_none(READ_ONCE(*pudp))) {
- pmdp = (void *)get_safe_page(GFP_ATOMIC);
- if (!pmdp)
- return -ENOMEM;
- pud_populate(&init_mm, pudp, pmdp);
- }
-
- pmdp = pmd_offset(pudp, dst_addr);
- if (pmd_none(READ_ONCE(*pmdp))) {
- ptep = (void *)get_safe_page(GFP_ATOMIC);
- if (!ptep)
- return -ENOMEM;
- pmd_populate_kernel(&init_mm, pmdp, ptep);
- }
-
- ptep = pte_offset_kernel(pmdp, dst_addr);
- set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC));
-
- return 0;
+ return (void *)get_safe_page((__force gfp_t)(unsigned long)arg);
}
/*
@@ -239,11 +194,16 @@ static int trans_pgd_map_page(pgd_t *trans_pgd, void *page,
* page system.
*/
static int create_safe_exec_page(void *src_start, size_t length,
- unsigned long dst_addr,
phys_addr_t *phys_dst_addr)
{
+ struct trans_pgd_info trans_info = {
+ .trans_alloc_page = hibernate_page_alloc,
+ .trans_alloc_arg = (__force void *)GFP_ATOMIC,
+ };
+
void *page = (void *)get_safe_page(GFP_ATOMIC);
- pgd_t *trans_pgd;
+ phys_addr_t trans_ttbr0;
+ unsigned long t0sz;
int rc;
if (!page)
@@ -251,13 +211,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
memcpy(page, src_start, length);
__flush_icache_range((unsigned long)page, (unsigned long)page + length);
-
- trans_pgd = (void *)get_safe_page(GFP_ATOMIC);
- if (!trans_pgd)
- return -ENOMEM;
-
- rc = trans_pgd_map_page(trans_pgd, page, dst_addr,
- PAGE_KERNEL_EXEC);
+ rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page);
if (rc)
return rc;
@@ -270,12 +224,15 @@ static int create_safe_exec_page(void *src_start, size_t length,
* page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI
* runtime services), while for a userspace-driven test_resume cycle it
* points to userspace page tables (and we must point it at a zero page
- * ourselves). Elsewhere we only (un)install the idmap with preemption
- * disabled, so T0SZ should be as required regardless.
+ * ourselves).
+ *
+ * We change T0SZ as part of installing the idmap. This is undone by
+ * cpu_uninstall_idmap() in __cpu_suspend_exit().
*/
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
- write_sysreg(phys_to_ttbr(virt_to_phys(trans_pgd)), ttbr0_el1);
+ __cpu_set_tcr_t0sz(t0sz);
+ write_sysreg(trans_ttbr0, ttbr0_el1);
isb();
*phys_dst_addr = virt_to_phys(page);
@@ -462,182 +419,6 @@ int swsusp_arch_suspend(void)
return ret;
}
-static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
-{
- pte_t pte = READ_ONCE(*src_ptep);
-
- if (pte_valid(pte)) {
- /*
- * Resume will overwrite areas that may be marked
- * read only (code, rodata). Clear the RDONLY bit from
- * the temporary mappings we use during restore.
- */
- set_pte(dst_ptep, pte_mkwrite(pte));
- } else if (debug_pagealloc_enabled() && !pte_none(pte)) {
- /*
- * debug_pagealloc will removed the PTE_VALID bit if
- * the page isn't in use by the resume kernel. It may have
- * been in use by the original kernel, in which case we need
- * to put it back in our copy to do the restore.
- *
- * Before marking this entry valid, check the pfn should
- * be mapped.
- */
- BUG_ON(!pfn_valid(pte_pfn(pte)));
-
- set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
- }
-}
-
-static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
- unsigned long end)
-{
- pte_t *src_ptep;
- pte_t *dst_ptep;
- unsigned long addr = start;
-
- dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
- if (!dst_ptep)
- return -ENOMEM;
- pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
- dst_ptep = pte_offset_kernel(dst_pmdp, start);
-
- src_ptep = pte_offset_kernel(src_pmdp, start);
- do {
- _copy_pte(dst_ptep, src_ptep, addr);
- } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
-
- return 0;
-}
-
-static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
- unsigned long end)
-{
- pmd_t *src_pmdp;
- pmd_t *dst_pmdp;
- unsigned long next;
- unsigned long addr = start;
-
- if (pud_none(READ_ONCE(*dst_pudp))) {
- dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
- if (!dst_pmdp)
- return -ENOMEM;
- pud_populate(&init_mm, dst_pudp, dst_pmdp);
- }
- dst_pmdp = pmd_offset(dst_pudp, start);
-
- src_pmdp = pmd_offset(src_pudp, start);
- do {
- pmd_t pmd = READ_ONCE(*src_pmdp);
-
- next = pmd_addr_end(addr, end);
- if (pmd_none(pmd))
- continue;
- if (pmd_table(pmd)) {
- if (copy_pte(dst_pmdp, src_pmdp, addr, next))
- return -ENOMEM;
- } else {
- set_pmd(dst_pmdp,
- __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
- }
- } while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
-
- return 0;
-}
-
-static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start,
- unsigned long end)
-{
- pud_t *dst_pudp;
- pud_t *src_pudp;
- unsigned long next;
- unsigned long addr = start;
-
- if (p4d_none(READ_ONCE(*dst_p4dp))) {
- dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
- if (!dst_pudp)
- return -ENOMEM;
- p4d_populate(&init_mm, dst_p4dp, dst_pudp);
- }
- dst_pudp = pud_offset(dst_p4dp, start);
-
- src_pudp = pud_offset(src_p4dp, start);
- do {
- pud_t pud = READ_ONCE(*src_pudp);
-
- next = pud_addr_end(addr, end);
- if (pud_none(pud))
- continue;
- if (pud_table(pud)) {
- if (copy_pmd(dst_pudp, src_pudp, addr, next))
- return -ENOMEM;
- } else {
- set_pud(dst_pudp,
- __pud(pud_val(pud) & ~PUD_SECT_RDONLY));
- }
- } while (dst_pudp++, src_pudp++, addr = next, addr != end);
-
- return 0;
-}
-
-static int copy_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
- unsigned long end)
-{
- p4d_t *dst_p4dp;
- p4d_t *src_p4dp;
- unsigned long next;
- unsigned long addr = start;
-
- dst_p4dp = p4d_offset(dst_pgdp, start);
- src_p4dp = p4d_offset(src_pgdp, start);
- do {
- next = p4d_addr_end(addr, end);
- if (p4d_none(READ_ONCE(*src_p4dp)))
- continue;
- if (copy_pud(dst_p4dp, src_p4dp, addr, next))
- return -ENOMEM;
- } while (dst_p4dp++, src_p4dp++, addr = next, addr != end);
-
- return 0;
-}
-
-static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
- unsigned long end)
-{
- unsigned long next;
- unsigned long addr = start;
- pgd_t *src_pgdp = pgd_offset_k(start);
-
- dst_pgdp = pgd_offset_pgd(dst_pgdp, start);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none(READ_ONCE(*src_pgdp)))
- continue;
- if (copy_p4d(dst_pgdp, src_pgdp, addr, next))
- return -ENOMEM;
- } while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
-
- return 0;
-}
-
-static int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start,
- unsigned long end)
-{
- int rc;
- pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC);
-
- if (!trans_pgd) {
- pr_err("Failed to allocate memory for temporary page tables.\n");
- return -ENOMEM;
- }
-
- rc = copy_page_tables(trans_pgd, start, end);
- if (!rc)
- *dst_pgdp = trans_pgd;
-
- return rc;
-}
-
/*
* Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
*
@@ -650,16 +431,20 @@ int swsusp_arch_resume(void)
void *zero_page;
size_t exit_size;
pgd_t *tmp_pg_dir;
- phys_addr_t phys_hibernate_exit;
void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
void *, phys_addr_t, phys_addr_t);
+ struct trans_pgd_info trans_info = {
+ .trans_alloc_page = hibernate_page_alloc,
+ .trans_alloc_arg = (void *)GFP_ATOMIC,
+ };
/*
* Restoring the memory image will overwrite the ttbr1 page tables.
* Create a second copy of just the linear map, and use this when
* restoring.
*/
- rc = trans_pgd_create_copy(&tmp_pg_dir, PAGE_OFFSET, PAGE_END);
+ rc = trans_pgd_create_copy(&trans_info, &tmp_pg_dir, PAGE_OFFSET,
+ PAGE_END);
if (rc)
return rc;
@@ -673,19 +458,13 @@ int swsusp_arch_resume(void)
return -ENOMEM;
}
- /*
- * Locate the exit code in the bottom-but-one page, so that *NULL
- * still has disastrous affects.
- */
- hibernate_exit = (void *)PAGE_SIZE;
exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start;
/*
* Copy swsusp_arch_suspend_exit() to a safe page. This will generate
* a new set of ttbr0 page tables and load them.
*/
rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size,
- (unsigned long)hibernate_exit,
- &phys_hibernate_exit);
+ (phys_addr_t *)&hibernate_exit);
if (rc) {
pr_err("Failed to create safe executable page for hibernate_exit code.\n");
return rc;
@@ -704,7 +483,7 @@ int swsusp_arch_resume(void)
* We can skip this step if we booted at EL1, or are running with VHE.
*/
if (el2_reset_needed()) {
- phys_addr_t el2_vectors = phys_hibernate_exit; /* base */
+ phys_addr_t el2_vectors = (phys_addr_t)hibernate_exit;
el2_vectors += hibernate_el2_vectors -
__hibernate_exit_text_start; /* offset */
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 160f5881a0b7..678cd2c618ee 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -8,9 +8,9 @@
#include <linux/init.h>
#include <linux/linkage.h>
-#include <linux/irqchip/arm-gic-v3.h>
#include <asm/assembler.h>
+#include <asm/el2_setup.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/ptrace.h>
@@ -47,10 +47,13 @@ SYM_CODE_END(__hyp_stub_vectors)
SYM_CODE_START_LOCAL(el1_sync)
cmp x0, #HVC_SET_VECTORS
- b.ne 2f
+ b.ne 1f
msr vbar_el2, x1
b 9f
+1: cmp x0, #HVC_VHE_RESTART
+ b.eq mutate_to_vhe
+
2: cmp x0, #HVC_SOFT_RESTART
b.ne 3f
mov x0, x2
@@ -70,6 +73,88 @@ SYM_CODE_START_LOCAL(el1_sync)
eret
SYM_CODE_END(el1_sync)
+// nVHE? No way! Give me the real thing!
+SYM_CODE_START_LOCAL(mutate_to_vhe)
+ // Be prepared to fail
+ mov_q x0, HVC_STUB_ERR
+
+ // Sanity check: MMU *must* be off
+ mrs x1, sctlr_el2
+ tbnz x1, #0, 1f
+
+ // Needs to be VHE capable, obviously
+ mrs x1, id_aa64mmfr1_el1
+ ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4
+ cbz x1, 1f
+
+ // Check whether VHE is disabled from the command line
+ adr_l x1, id_aa64mmfr1_override
+ ldr x2, [x1, FTR_OVR_VAL_OFFSET]
+ ldr x1, [x1, FTR_OVR_MASK_OFFSET]
+ ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
+ ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4
+ cmp x1, xzr
+ and x2, x2, x1
+ csinv x2, x2, xzr, ne
+ cbz x2, 1f
+
+ // Engage the VHE magic!
+ mov_q x0, HCR_HOST_VHE_FLAGS
+ msr hcr_el2, x0
+ isb
+
+ // Use the EL1 allocated stack, per-cpu offset
+ mrs x0, sp_el1
+ mov sp, x0
+ mrs x0, tpidr_el1
+ msr tpidr_el2, x0
+
+ // FP configuration, vectors
+ mrs_s x0, SYS_CPACR_EL12
+ msr cpacr_el1, x0
+ mrs_s x0, SYS_VBAR_EL12
+ msr vbar_el1, x0
+
+ // Use EL2 translations for SPE and disable access from EL1
+ mrs x0, mdcr_el2
+ bic x0, x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
+ msr mdcr_el2, x0
+
+ // Transfer the MM state from EL1 to EL2
+ mrs_s x0, SYS_TCR_EL12
+ msr tcr_el1, x0
+ mrs_s x0, SYS_TTBR0_EL12
+ msr ttbr0_el1, x0
+ mrs_s x0, SYS_TTBR1_EL12
+ msr ttbr1_el1, x0
+ mrs_s x0, SYS_MAIR_EL12
+ msr mair_el1, x0
+ isb
+
+ // Invalidate TLBs before enabling the MMU
+ tlbi vmalle1
+ dsb nsh
+
+ // Enable the EL2 S1 MMU, as set up from EL1
+ mrs_s x0, SYS_SCTLR_EL12
+ set_sctlr_el1 x0
+
+ // Disable the EL1 S1 MMU for a good measure
+ mov_q x0, INIT_SCTLR_EL1_MMU_OFF
+ msr_s SYS_SCTLR_EL12, x0
+
+ // Hack the exception return to stay at EL2
+ mrs x0, spsr_el1
+ and x0, x0, #~PSR_MODE_MASK
+ mov x1, #PSR_MODE_EL2h
+ orr x0, x0, x1
+ msr spsr_el1, x0
+
+ mov x0, xzr
+
+1: eret
+SYM_CODE_END(mutate_to_vhe)
+
.macro invalid_vector label
SYM_CODE_START_LOCAL(\label)
b \label
@@ -85,6 +170,8 @@ SYM_CODE_END(\label)
invalid_vector el1_fiq_invalid
invalid_vector el1_error_invalid
+ .popsection
+
/*
* __hyp_set_vectors: Call this after boot to set the initial hypervisor
* vectors as part of hypervisor installation. On an SMP system, this should
@@ -118,3 +205,27 @@ SYM_FUNC_START(__hyp_reset_vectors)
hvc #0
ret
SYM_FUNC_END(__hyp_reset_vectors)
+
+/*
+ * Entry point to switch to VHE if deemed capable
+ */
+SYM_FUNC_START(switch_to_vhe)
+#ifdef CONFIG_ARM64_VHE
+ // Need to have booted at EL2
+ adr_l x1, __boot_cpu_mode
+ ldr w0, [x1]
+ cmp w0, #BOOT_CPU_MODE_EL2
+ b.ne 1f
+
+ // and still be at EL1
+ mrs x0, CurrentEL
+ cmp x0, #CurrentEL_EL1
+ b.ne 1f
+
+ // Turn the world upside down
+ mov x0, #HVC_VHE_RESTART
+ hvc #0
+1:
+#endif
+ ret
+SYM_FUNC_END(switch_to_vhe)
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
new file mode 100644
index 000000000000..dffb16682330
--- /dev/null
+++ b/arch/arm64/kernel/idreg-override.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Early cpufeature override framework
+ *
+ * Copyright (C) 2020 Google LLC
+ * Author: Marc Zyngier <maz@kernel.org>
+ */
+
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/libfdt.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
+#include <asm/setup.h>
+
+#define FTR_DESC_NAME_LEN 20
+#define FTR_DESC_FIELD_LEN 10
+#define FTR_ALIAS_NAME_LEN 30
+#define FTR_ALIAS_OPTION_LEN 80
+
+struct ftr_set_desc {
+ char name[FTR_DESC_NAME_LEN];
+ struct arm64_ftr_override *override;
+ struct {
+ char name[FTR_DESC_FIELD_LEN];
+ u8 shift;
+ } fields[];
+};
+
+static const struct ftr_set_desc mmfr1 __initconst = {
+ .name = "id_aa64mmfr1",
+ .override = &id_aa64mmfr1_override,
+ .fields = {
+ { "vh", ID_AA64MMFR1_VHE_SHIFT },
+ {}
+ },
+};
+
+static const struct ftr_set_desc pfr1 __initconst = {
+ .name = "id_aa64pfr1",
+ .override = &id_aa64pfr1_override,
+ .fields = {
+ { "bt", ID_AA64PFR1_BT_SHIFT },
+ {}
+ },
+};
+
+static const struct ftr_set_desc isar1 __initconst = {
+ .name = "id_aa64isar1",
+ .override = &id_aa64isar1_override,
+ .fields = {
+ { "gpi", ID_AA64ISAR1_GPI_SHIFT },
+ { "gpa", ID_AA64ISAR1_GPA_SHIFT },
+ { "api", ID_AA64ISAR1_API_SHIFT },
+ { "apa", ID_AA64ISAR1_APA_SHIFT },
+ {}
+ },
+};
+
+extern struct arm64_ftr_override kaslr_feature_override;
+
+static const struct ftr_set_desc kaslr __initconst = {
+ .name = "kaslr",
+#ifdef CONFIG_RANDOMIZE_BASE
+ .override = &kaslr_feature_override,
+#endif
+ .fields = {
+ { "disabled", 0 },
+ {}
+ },
+};
+
+static const struct ftr_set_desc * const regs[] __initconst = {
+ &mmfr1,
+ &pfr1,
+ &isar1,
+ &kaslr,
+};
+
+static const struct {
+ char alias[FTR_ALIAS_NAME_LEN];
+ char feature[FTR_ALIAS_OPTION_LEN];
+} aliases[] __initconst = {
+ { "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" },
+ { "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" },
+ { "arm64.nobti", "id_aa64pfr1.bt=0" },
+ { "arm64.nopauth",
+ "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
+ "id_aa64isar1.api=0 id_aa64isar1.apa=0" },
+ { "nokaslr", "kaslr.disabled=1" },
+};
+
+static int __init find_field(const char *cmdline,
+ const struct ftr_set_desc *reg, int f, u64 *v)
+{
+ char opt[FTR_DESC_NAME_LEN + FTR_DESC_FIELD_LEN + 2];
+ int len;
+
+ len = snprintf(opt, ARRAY_SIZE(opt), "%s.%s=",
+ reg->name, reg->fields[f].name);
+
+ if (!parameqn(cmdline, opt, len))
+ return -1;
+
+ return kstrtou64(cmdline + len, 0, v);
+}
+
+static void __init match_options(const char *cmdline)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(regs); i++) {
+ int f;
+
+ if (!regs[i]->override)
+ continue;
+
+ for (f = 0; strlen(regs[i]->fields[f].name); f++) {
+ u64 shift = regs[i]->fields[f].shift;
+ u64 mask = 0xfUL << shift;
+ u64 v;
+
+ if (find_field(cmdline, regs[i], f, &v))
+ continue;
+
+ regs[i]->override->val &= ~mask;
+ regs[i]->override->val |= (v << shift) & mask;
+ regs[i]->override->mask |= mask;
+
+ return;
+ }
+ }
+}
+
+static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
+{
+ do {
+ char buf[256];
+ size_t len;
+ int i;
+
+ cmdline = skip_spaces(cmdline);
+
+ for (len = 0; cmdline[len] && !isspace(cmdline[len]); len++);
+ if (!len)
+ return;
+
+ len = min(len, ARRAY_SIZE(buf) - 1);
+ strncpy(buf, cmdline, len);
+ buf[len] = 0;
+
+ if (strcmp(buf, "--") == 0)
+ return;
+
+ cmdline += len;
+
+ match_options(buf);
+
+ for (i = 0; parse_aliases && i < ARRAY_SIZE(aliases); i++)
+ if (parameq(buf, aliases[i].alias))
+ __parse_cmdline(aliases[i].feature, false);
+ } while (1);
+}
+
+static __init void parse_cmdline(void)
+{
+ if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
+ const u8 *prop;
+ void *fdt;
+ int node;
+
+ fdt = get_early_fdt_ptr();
+ if (!fdt)
+ goto out;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ goto out;
+
+ prop = fdt_getprop(fdt, node, "bootargs", NULL);
+ if (!prop)
+ goto out;
+
+ __parse_cmdline(prop, true);
+
+ if (!IS_ENABLED(CONFIG_CMDLINE_EXTEND))
+ return;
+ }
+
+out:
+ __parse_cmdline(CONFIG_CMDLINE, true);
+}
+
+/* Keep checkers quiet */
+void init_feature_override(void);
+
+asmlinkage void __init init_feature_override(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(regs); i++) {
+ if (regs[i]->override) {
+ regs[i]->override->val = 0;
+ regs[i]->override->mask = 0;
+ }
+ }
+
+ parse_cmdline();
+
+ for (i = 0; i < ARRAY_SIZE(regs); i++) {
+ if (regs[i]->override)
+ __flush_dcache_area(regs[i]->override,
+ sizeof(*regs[i]->override));
+ }
+}
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 1c74c45b9494..27f8939deb1b 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -19,6 +19,7 @@
#include <asm/memory.h>
#include <asm/mmu.h>
#include <asm/sections.h>
+#include <asm/setup.h>
enum kaslr_status {
KASLR_ENABLED,
@@ -50,39 +51,7 @@ static __init u64 get_kaslr_seed(void *fdt)
return ret;
}
-static __init bool cmdline_contains_nokaslr(const u8 *cmdline)
-{
- const u8 *str;
-
- str = strstr(cmdline, "nokaslr");
- return str == cmdline || (str > cmdline && *(str - 1) == ' ');
-}
-
-static __init bool is_kaslr_disabled_cmdline(void *fdt)
-{
- if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
- int node;
- const u8 *prop;
-
- node = fdt_path_offset(fdt, "/chosen");
- if (node < 0)
- goto out;
-
- prop = fdt_getprop(fdt, node, "bootargs", NULL);
- if (!prop)
- goto out;
-
- if (cmdline_contains_nokaslr(prop))
- return true;
-
- if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
- goto out;
-
- return false;
- }
-out:
- return cmdline_contains_nokaslr(CONFIG_CMDLINE);
-}
+struct arm64_ftr_override kaslr_feature_override __initdata;
/*
* This routine will be executed with the kernel mapped at its default virtual
@@ -92,12 +61,11 @@ out:
* containing function pointers) to be reinitialized, and zero-initialized
* .bss variables will be reset to 0.
*/
-u64 __init kaslr_early_init(u64 dt_phys)
+u64 __init kaslr_early_init(void)
{
void *fdt;
u64 seed, offset, mask, module_range;
unsigned long raw;
- int size;
/*
* Set a reasonable default for module_alloc_base in case
@@ -111,8 +79,7 @@ u64 __init kaslr_early_init(u64 dt_phys)
* and proceed with KASLR disabled. We will make another
* attempt at mapping the FDT in setup_machine()
*/
- early_fixmap_init();
- fdt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
+ fdt = get_early_fdt_ptr();
if (!fdt) {
kaslr_status = KASLR_DISABLED_FDT_REMAP;
return 0;
@@ -127,7 +94,7 @@ u64 __init kaslr_early_init(u64 dt_phys)
* Check if 'nokaslr' appears on the command line, and
* return 0 if that is the case.
*/
- if (is_kaslr_disabled_cmdline(fdt)) {
+ if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) {
kaslr_status = KASLR_DISABLED_CMDLINE;
return 0;
}
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index a0b144cfaea7..90a335c74442 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -42,6 +42,7 @@ static void _kexec_image_info(const char *func, int line,
pr_debug(" start: %lx\n", kimage->start);
pr_debug(" head: %lx\n", kimage->head);
pr_debug(" nr_segments: %lu\n", kimage->nr_segments);
+ pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc);
for (i = 0; i < kimage->nr_segments; i++) {
pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
@@ -58,6 +59,23 @@ void machine_kexec_cleanup(struct kimage *kimage)
/* Empty routine needed to avoid build errors. */
}
+int machine_kexec_post_load(struct kimage *kimage)
+{
+ void *reloc_code = page_to_virt(kimage->control_code_page);
+
+ memcpy(reloc_code, arm64_relocate_new_kernel,
+ arm64_relocate_new_kernel_size);
+ kimage->arch.kern_reloc = __pa(reloc_code);
+ kexec_image_info(kimage);
+
+ /* Flush the reloc_code in preparation for its execution. */
+ __flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size);
+ flush_icache_range((uintptr_t)reloc_code, (uintptr_t)reloc_code +
+ arm64_relocate_new_kernel_size);
+
+ return 0;
+}
+
/**
* machine_kexec_prepare - Prepare for a kexec reboot.
*
@@ -67,8 +85,6 @@ void machine_kexec_cleanup(struct kimage *kimage)
*/
int machine_kexec_prepare(struct kimage *kimage)
{
- kexec_image_info(kimage);
-
if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
pr_err("Can't kexec: CPUs are stuck in the kernel.\n");
return -EBUSY;
@@ -143,8 +159,6 @@ static void kexec_segment_flush(const struct kimage *kimage)
*/
void machine_kexec(struct kimage *kimage)
{
- phys_addr_t reboot_code_buffer_phys;
- void *reboot_code_buffer;
bool in_kexec_crash = (kimage == kexec_crash_image);
bool stuck_cpus = cpus_are_stuck_in_kernel();
@@ -155,31 +169,6 @@ void machine_kexec(struct kimage *kimage)
WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
"Some CPUs may be stale, kdump will be unreliable.\n");
- reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
- reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
-
- kexec_image_info(kimage);
-
- /*
- * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
- * after the kernel is shut down.
- */
- memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
- arm64_relocate_new_kernel_size);
-
- /* Flush the reboot_code_buffer in preparation for its execution. */
- __flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
-
- /*
- * Although we've killed off the secondary CPUs, we don't update
- * the online mask if we're handling a crash kernel and consequently
- * need to avoid flush_icache_range(), which will attempt to IPI
- * the offline CPUs. Therefore, we must use the __* variant here.
- */
- __flush_icache_range((uintptr_t)reboot_code_buffer,
- (uintptr_t)reboot_code_buffer +
- arm64_relocate_new_kernel_size);
-
/* Flush the kimage list and its buffers. */
kexec_list_flush(kimage);
@@ -193,7 +182,7 @@ void machine_kexec(struct kimage *kimage)
/*
* cpu_soft_restart will shutdown the MMU, disable data caches, then
- * transfer control to the reboot_code_buffer which contains a copy of
+ * transfer control to the kern_reloc which contains a copy of
* the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel
* uses physical addressing to relocate the new image to its final
* position and transfers control to the image entry point when the
@@ -203,12 +192,8 @@ void machine_kexec(struct kimage *kimage)
* userspace (kexec-tools).
* In kexec_file case, the kernel starts directly without purgatory.
*/
- cpu_soft_restart(reboot_code_buffer_phys, kimage->head, kimage->start,
-#ifdef CONFIG_KEXEC_FILE
- kimage->arch.dtb_mem);
-#else
- 0);
-#endif
+ cpu_soft_restart(kimage->arch.kern_reloc, kimage->head, kimage->start,
+ kimage->arch.dtb_mem);
BUG(); /* Should never get here. */
}
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index 2e224435c024..e53493d8b208 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -131,7 +131,7 @@ u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
}
#endif
-#define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b))
+#define cmp_3way(a, b) ((a) < (b) ? -1 : (a) > (b))
static int cmp_rela(const void *a, const void *b)
{
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 3605f77ad4df..7d2318f80955 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -280,7 +280,7 @@ armv8pmu_event_attr_is_visible(struct kobject *kobj,
return 0;
}
-static struct attribute_group armv8_pmuv3_events_attr_group = {
+static const struct attribute_group armv8_pmuv3_events_attr_group = {
.name = "events",
.attrs = armv8_pmuv3_event_attrs,
.is_visible = armv8pmu_event_attr_is_visible,
@@ -300,7 +300,7 @@ static struct attribute *armv8_pmuv3_format_attrs[] = {
NULL,
};
-static struct attribute_group armv8_pmuv3_format_attr_group = {
+static const struct attribute_group armv8_pmuv3_format_attr_group = {
.name = "format",
.attrs = armv8_pmuv3_format_attrs,
};
@@ -322,7 +322,7 @@ static struct attribute *armv8_pmuv3_caps_attrs[] = {
NULL,
};
-static struct attribute_group armv8_pmuv3_caps_attr_group = {
+static const struct attribute_group armv8_pmuv3_caps_attr_group = {
.name = "caps",
.attrs = armv8_pmuv3_caps_attrs,
};
@@ -810,7 +810,7 @@ static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc,
{
int idx;
- for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx ++) {
+ for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) {
if (!test_and_set_bit(idx, cpuc->used_mask))
return idx;
}
@@ -1188,6 +1188,12 @@ static int armv8_a77_pmu_init(struct arm_pmu *cpu_pmu)
armv8_pmuv3_map_event);
}
+static int armv8_a78_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a78",
+ armv8_pmuv3_map_event);
+}
+
static int armv8_e1_pmu_init(struct arm_pmu *cpu_pmu)
{
return armv8_pmu_init_nogroups(cpu_pmu, "armv8_neoverse_e1",
@@ -1225,6 +1231,7 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
{.compatible = "arm,cortex-a75-pmu", .data = armv8_a75_pmu_init},
{.compatible = "arm,cortex-a76-pmu", .data = armv8_a76_pmu_init},
{.compatible = "arm,cortex-a77-pmu", .data = armv8_a77_pmu_init},
+ {.compatible = "arm,cortex-a78-pmu", .data = armv8_a78_pmu_init},
{.compatible = "arm,neoverse-e1-pmu", .data = armv8_e1_pmu_init},
{.compatible = "arm,neoverse-n1-pmu", .data = armv8_n1_pmu_init},
{.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init},
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 6616486a58fe..4cc1ccc8d6ab 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -304,7 +304,7 @@ void __show_regs(struct pt_regs *regs)
}
}
-void show_regs(struct pt_regs * regs)
+void show_regs(struct pt_regs *regs)
{
__show_regs(regs);
dump_backtrace(regs, NULL, KERN_DEFAULT);
@@ -587,7 +587,7 @@ unsigned long get_wchan(struct task_struct *p)
ret = frame.pc;
goto out;
}
- } while (count ++ < 16);
+ } while (count++ < 16);
out:
put_task_stack(p);
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 8ac487c84e37..3d5c8afca75b 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -194,6 +194,7 @@ static void ptrace_hbptriggered(struct perf_event *bp,
}
arm64_force_sig_ptrace_errno_trap(si_errno, bkpt->trigger,
desc);
+ return;
}
#endif
arm64_force_sig_fault(SIGTRAP, TRAP_HWBKPT, bkpt->trigger, desc);
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
index 84eec95ec06c..b78ea5de97a4 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -17,28 +17,24 @@
/*
* arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
*
- * The memory that the old kernel occupies may be overwritten when coping the
+ * The memory that the old kernel occupies may be overwritten when copying the
* new image to its final location. To assure that the
* arm64_relocate_new_kernel routine which does that copy is not overwritten,
* all code and data needed by arm64_relocate_new_kernel must be between the
* symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end. The
* machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
- * control_code_page, a special page which has been set up to be preserved
- * during the copy operation.
+ * safe memory that has been set up to be preserved during the copy operation.
*/
SYM_CODE_START(arm64_relocate_new_kernel)
-
/* Setup the list loop variables. */
mov x18, x2 /* x18 = dtb address */
mov x17, x1 /* x17 = kimage_start */
mov x16, x0 /* x16 = kimage_head */
- raw_dcache_line_size x15, x0 /* x15 = dcache line size */
mov x14, xzr /* x14 = entry ptr */
mov x13, xzr /* x13 = copy dest */
-
/* Check if the new image needs relocation. */
tbnz x16, IND_DONE_BIT, .Ldone
-
+ raw_dcache_line_size x15, x1 /* x15 = dcache line size */
.Lloop:
and x12, x16, PAGE_MASK /* x12 = addr */
@@ -47,44 +43,28 @@ SYM_CODE_START(arm64_relocate_new_kernel)
tbz x16, IND_SOURCE_BIT, .Ltest_indirection
/* Invalidate dest page to PoC. */
- mov x0, x13
- add x20, x0, #PAGE_SIZE
+ mov x2, x13
+ add x20, x2, #PAGE_SIZE
sub x1, x15, #1
- bic x0, x0, x1
-2: dc ivac, x0
- add x0, x0, x15
- cmp x0, x20
+ bic x2, x2, x1
+2: dc ivac, x2
+ add x2, x2, x15
+ cmp x2, x20
b.lo 2b
dsb sy
- mov x20, x13
- mov x21, x12
- copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7
-
- /* dest += PAGE_SIZE */
- add x13, x13, PAGE_SIZE
+ copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8
b .Lnext
-
.Ltest_indirection:
tbz x16, IND_INDIRECTION_BIT, .Ltest_destination
-
- /* ptr = addr */
- mov x14, x12
+ mov x14, x12 /* ptr = addr */
b .Lnext
-
.Ltest_destination:
tbz x16, IND_DESTINATION_BIT, .Lnext
-
- /* dest = addr */
- mov x13, x12
-
+ mov x13, x12 /* dest = addr */
.Lnext:
- /* entry = *ptr++ */
- ldr x16, [x14], #8
-
- /* while (!(entry & DONE)) */
- tbz x16, IND_DONE_BIT, .Lloop
-
+ ldr x16, [x14], #8 /* entry = *ptr++ */
+ tbz x16, IND_DONE_BIT, .Lloop /* while (!(entry & DONE)) */
.Ldone:
/* wait for writes from copy_page to finish */
dsb nsh
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index c18aacde8bb0..61845c0821d9 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -168,6 +168,21 @@ static void __init smp_build_mpidr_hash(void)
pr_warn("Large number of MPIDR hash buckets detected\n");
}
+static void *early_fdt_ptr __initdata;
+
+void __init *get_early_fdt_ptr(void)
+{
+ return early_fdt_ptr;
+}
+
+asmlinkage void __init early_fdt_map(u64 dt_phys)
+{
+ int fdt_size;
+
+ early_fixmap_init();
+ early_fdt_ptr = fixmap_remap_fdt(dt_phys, &fdt_size, PAGE_KERNEL);
+}
+
static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
int size;
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index 6bdef7362c0e..5bfd9b87f85d 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -100,6 +100,7 @@ SYM_FUNC_END(__cpu_suspend_enter)
.pushsection ".idmap.text", "awx"
SYM_CODE_START(cpu_resume)
bl init_kernel_el
+ bl switch_to_vhe
bl __cpu_setup
/* enable the MMU early - so we can access sleep_save_stash by va */
adrp x1, swapper_pg_dir
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index fa56af1a59c3..0fb42129b469 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -44,6 +44,10 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
unsigned long fp = frame->fp;
struct stack_info info;
+ /* Terminal record; nothing to unwind */
+ if (!fp)
+ return -EINVAL;
+
if (fp & 0xf)
return -EINVAL;
@@ -104,15 +108,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
frame->pc = ptrauth_strip_insn_pac(frame->pc);
- /*
- * Frames created upon entry from EL0 have NULL FP and PC values, so
- * don't bother reporting these. Frames created by __noreturn functions
- * might have a valid FP even if PC is bogus, so only terminate where
- * both are NULL.
- */
- if (!frame->fp && !frame->pc)
- return -EINVAL;
-
return 0;
}
NOKPROBE_SYMBOL(unwind_frame);
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index c2877c332f2d..b9cf12b271d7 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -65,35 +65,6 @@ static inline bool has_syscall_work(unsigned long flags)
int syscall_trace_enter(struct pt_regs *regs);
void syscall_trace_exit(struct pt_regs *regs);
-#ifdef CONFIG_ARM64_ERRATUM_1463225
-DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
-
-static void cortex_a76_erratum_1463225_svc_handler(void)
-{
- u32 reg, val;
-
- if (!unlikely(test_thread_flag(TIF_SINGLESTEP)))
- return;
-
- if (!unlikely(this_cpu_has_cap(ARM64_WORKAROUND_1463225)))
- return;
-
- __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1);
- reg = read_sysreg(mdscr_el1);
- val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE;
- write_sysreg(val, mdscr_el1);
- asm volatile("msr daifclr, #8");
- isb();
-
- /* We will have taken a single-step exception by this point */
-
- write_sysreg(reg, mdscr_el1);
- __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 0);
-}
-#else
-static void cortex_a76_erratum_1463225_svc_handler(void) { }
-#endif /* CONFIG_ARM64_ERRATUM_1463225 */
-
static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
const syscall_fn_t syscall_table[])
{
@@ -120,7 +91,6 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
* (Similarly for HVC and SMC elsewhere.)
*/
- cortex_a76_erratum_1463225_svc_handler();
local_daif_restore(DAIF_PROCCTX);
if (flags & _TIF_MTE_ASYNC_FAULT) {
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index f6faa697e83e..e08a4126453a 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -199,76 +199,38 @@ static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
return 0;
}
-static inline bool
-enable_policy_freq_counters(int cpu, cpumask_var_t valid_cpus)
-{
- struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
-
- if (!policy) {
- pr_debug("CPU%d: No cpufreq policy found.\n", cpu);
- return false;
- }
-
- if (cpumask_subset(policy->related_cpus, valid_cpus))
- cpumask_or(amu_fie_cpus, policy->related_cpus,
- amu_fie_cpus);
-
- cpufreq_cpu_put(policy);
-
- return true;
-}
-
static DEFINE_STATIC_KEY_FALSE(amu_fie_key);
#define amu_freq_invariant() static_branch_unlikely(&amu_fie_key)
-static int __init init_amu_fie(void)
+static void amu_fie_setup(const struct cpumask *cpus)
{
- bool invariance_status = topology_scale_freq_invariant();
- cpumask_var_t valid_cpus;
- bool have_policy = false;
- int ret = 0;
+ bool invariant;
int cpu;
- if (!zalloc_cpumask_var(&valid_cpus, GFP_KERNEL))
- return -ENOMEM;
-
- if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) {
- ret = -ENOMEM;
- goto free_valid_mask;
- }
+ /* We are already set since the last insmod of cpufreq driver */
+ if (unlikely(cpumask_subset(cpus, amu_fie_cpus)))
+ return;
- for_each_present_cpu(cpu) {
+ for_each_cpu(cpu, cpus) {
if (!freq_counters_valid(cpu) ||
freq_inv_set_max_ratio(cpu,
cpufreq_get_hw_max_freq(cpu) * 1000,
arch_timer_get_rate()))
- continue;
-
- cpumask_set_cpu(cpu, valid_cpus);
- have_policy |= enable_policy_freq_counters(cpu, valid_cpus);
+ return;
}
- /*
- * If we are not restricted by cpufreq policies, we only enable
- * the use of the AMU feature for FIE if all CPUs support AMU.
- * Otherwise, enable_policy_freq_counters has already enabled
- * policy cpus.
- */
- if (!have_policy && cpumask_equal(valid_cpus, cpu_present_mask))
- cpumask_or(amu_fie_cpus, amu_fie_cpus, valid_cpus);
+ cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus);
- if (!cpumask_empty(amu_fie_cpus)) {
- pr_info("CPUs[%*pbl]: counters will be used for FIE.",
- cpumask_pr_args(amu_fie_cpus));
- static_branch_enable(&amu_fie_key);
- }
+ invariant = topology_scale_freq_invariant();
- /*
- * If the system is not fully invariant after AMU init, disable
- * partial use of counters for frequency invariance.
- */
- if (!topology_scale_freq_invariant())
- static_branch_disable(&amu_fie_key);
+ /* We aren't fully invariant yet */
+ if (!invariant && !cpumask_equal(amu_fie_cpus, cpu_present_mask))
+ return;
+
+ static_branch_enable(&amu_fie_key);
+
+ pr_debug("CPUs[%*pbl]: counters will be used for FIE.",
+ cpumask_pr_args(cpus));
/*
* Task scheduler behavior depends on frequency invariance support,
@@ -276,15 +238,50 @@ static int __init init_amu_fie(void)
* a result of counter initialisation and use, retrigger the build of
* scheduling domains to ensure the information is propagated properly.
*/
- if (invariance_status != topology_scale_freq_invariant())
+ if (!invariant)
rebuild_sched_domains_energy();
+}
+
+static int init_amu_fie_callback(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cpufreq_policy *policy = data;
+
+ if (val == CPUFREQ_CREATE_POLICY)
+ amu_fie_setup(policy->related_cpus);
+
+ /*
+ * We don't need to handle CPUFREQ_REMOVE_POLICY event as the AMU
+ * counters don't have any dependency on cpufreq driver once we have
+ * initialized AMU support and enabled invariance. The AMU counters will
+ * keep on working just fine in the absence of the cpufreq driver, and
+ * for the CPUs for which there are no counters available, the last set
+ * value of freq_scale will remain valid as that is the frequency those
+ * CPUs are running at.
+ */
+
+ return 0;
+}
+
+static struct notifier_block init_amu_fie_notifier = {
+ .notifier_call = init_amu_fie_callback,
+};
+
+static int __init init_amu_fie(void)
+{
+ int ret;
+
+ if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL))
+ return -ENOMEM;
-free_valid_mask:
- free_cpumask_var(valid_cpus);
+ ret = cpufreq_register_notifier(&init_amu_fie_notifier,
+ CPUFREQ_POLICY_NOTIFIER);
+ if (ret)
+ free_cpumask_var(amu_fie_cpus);
return ret;
}
-late_initcall_sync(init_amu_fie);
+core_initcall(init_amu_fie);
bool arch_freq_counters_available(const struct cpumask *cpus)
{
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 6895ce777e7f..a05d34f0e82a 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -45,7 +45,7 @@
#include <asm/system_misc.h>
#include <asm/sysreg.h>
-static const char *handler[]= {
+static const char *handler[] = {
"Synchronous Abort",
"IRQ",
"FIQ",
diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso-wrap.S
index c4b1990bf2be..c4b1990bf2be 100644
--- a/arch/arm64/kernel/vdso/vdso.S
+++ b/arch/arm64/kernel/vdso-wrap.S
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index cd9c3fa25902..76c0255ecc91 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -44,7 +44,6 @@ endif
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n
-obj-y += vdso.o
targets += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
diff --git a/arch/arm64/kernel/vdso/gen_vdso_offsets.sh b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh
index 8b806eacd0a6..0387209d65b1 100755
--- a/arch/arm64/kernel/vdso/gen_vdso_offsets.sh
+++ b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh
@@ -13,4 +13,4 @@
LC_ALL=C
sed -n -e 's/^00*/0/' -e \
-'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2\t0x\1/p'
+'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2 0x\1/p'
diff --git a/arch/arm64/kernel/vdso32/vdso.S b/arch/arm64/kernel/vdso32-wrap.S
index e72ac7bc4c04..e72ac7bc4c04 100644
--- a/arch/arm64/kernel/vdso32/vdso.S
+++ b/arch/arm64/kernel/vdso32-wrap.S
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index a1e0f91e6cea..789ad420f16b 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -155,7 +155,6 @@ c-obj-vdso-gettimeofday := $(addprefix $(obj)/, $(c-obj-vdso-gettimeofday))
asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso))
obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
-obj-y += vdso.o
targets += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 4c0b0c89ad59..68f76a96c60b 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -316,3 +316,11 @@ ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE,
* If padding is applied before .head.text, virt<->phys conversions will fail.
*/
ASSERT(_text == KIMAGE_VADDR, "HEAD is misaligned")
+
+ASSERT(swapper_pg_dir - reserved_pg_dir == RESERVED_SWAPPER_OFFSET,
+ "RESERVED_SWAPPER_OFFSET is wrong!")
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ASSERT(swapper_pg_dir - tramp_pg_dir == TRAMP_SWAPPER_OFFSET,
+ "TRAMP_SWAPPER_OFFSET is wrong!")
+#endif
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index fe60d25c000e..6804c5673312 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1967,6 +1967,9 @@ static int __init early_kvm_mode_cfg(char *arg)
return 0;
}
+ if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode()))
+ return 0;
+
return -EINVAL;
}
early_param("kvm-arm.mode", early_kvm_mode_cfg);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index b17bf19217f1..882fd40d068e 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -191,7 +191,7 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
2: msr SPsel, #1 // We want to use SP_EL{1,2}
/* Initialize EL2 CPU state to sane values. */
- init_el2_state nvhe // Clobbers x0..x2
+ init_el2_state // Clobbers x0..x2
/* Enable MMU, set vectors and stack. */
mov x0, x28
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 5ead3c3de3b6..77222d92667a 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -6,6 +6,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o
+obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o
obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
obj-$(CONFIG_ARM64_MTE) += mteswap.o
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 35d75c60e2b8..2e339f0bd958 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -564,7 +564,7 @@ retry:
mmap_read_lock(mm);
} else {
/*
- * The above down_read_trylock() might have succeeded in which
+ * The above mmap_read_trylock() might have succeeded in which
* case, we'll have missed the might_sleep() from down_read().
*/
might_sleep();
@@ -875,44 +875,12 @@ static void debug_exception_exit(struct pt_regs *regs)
}
NOKPROBE_SYMBOL(debug_exception_exit);
-#ifdef CONFIG_ARM64_ERRATUM_1463225
-DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
-
-static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
-{
- if (user_mode(regs))
- return 0;
-
- if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa))
- return 0;
-
- /*
- * We've taken a dummy step exception from the kernel to ensure
- * that interrupts are re-enabled on the syscall path. Return back
- * to cortex_a76_erratum_1463225_svc_handler() with debug exceptions
- * masked so that we can safely restore the mdscr and get on with
- * handling the syscall.
- */
- regs->pstate |= PSR_D_BIT;
- return 1;
-}
-#else
-static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
-{
- return 0;
-}
-#endif /* CONFIG_ARM64_ERRATUM_1463225 */
-NOKPROBE_SYMBOL(cortex_a76_erratum_1463225_debug_handler);
-
void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
struct pt_regs *regs)
{
const struct fault_info *inf = esr_to_debug_fault_info(esr);
unsigned long pc = instruction_pointer(regs);
- if (cortex_a76_erratum_1463225_debug_handler(regs))
- return;
-
debug_exception_enter(regs);
if (user_mode(regs) && !is_ttbr0_addr(pc))
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 07937b49cb88..a38f54cd638c 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -5,20 +5,11 @@
* Copyright (C) 2012 ARM Ltd.
*/
-#include <linux/elf.h>
-#include <linux/fs.h>
-#include <linux/memblock.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/export.h>
-#include <linux/shm.h>
-#include <linux/sched/signal.h>
-#include <linux/sched/mm.h>
#include <linux/io.h>
-#include <linux/personality.h>
-#include <linux/random.h>
+#include <linux/memblock.h>
+#include <linux/types.h>
-#include <asm/cputype.h>
+#include <asm/page.h>
/*
* You really shouldn't be using read() or write() on /dev/mem. This might go
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index ae0c3d023824..25af183e4bed 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -628,7 +628,7 @@ static bool arm64_early_this_cpu_has_bti(void)
if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
return false;
- pfr1 = read_sysreg_s(SYS_ID_AA64PFR1_EL1);
+ pfr1 = __read_sysreg_by_encoding(SYS_ID_AA64PFR1_EL1);
return cpuid_feature_extract_unsigned_field(pfr1,
ID_AA64PFR1_BT_SHIFT);
}
@@ -1094,6 +1094,7 @@ static void free_empty_tables(unsigned long addr, unsigned long end,
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
+ WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
return vmemmap_populate_basepages(start, end, node, altmap);
}
#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */
@@ -1107,6 +1108,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
pud_t *pudp;
pmd_t *pmdp;
+ WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
do {
next = pmd_addr_end(addr, end);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 1f7ee8c8b7b8..c967bfd30d2b 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -291,17 +291,7 @@ skip_pgd:
/* We're done: fire up the MMU again */
mrs x17, sctlr_el1
orr x17, x17, #SCTLR_ELx_M
- msr sctlr_el1, x17
- isb
-
- /*
- * Invalidate the local I-cache so that any instructions fetched
- * speculatively from the PoC are discarded, since they may have
- * been dynamically patched at the PoU.
- */
- ic iallu
- dsb nsh
- isb
+ set_sctlr_el1 x17
/* Set the flag to zero to indicate that we're all done */
str wzr, [flag_ptr]
@@ -464,8 +454,8 @@ SYM_FUNC_START(__cpu_setup)
#endif
msr mair_el1, x5
/*
- * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for
- * both user and kernel.
+ * Set/prepare TCR and TTBR. TCR_EL1.T1SZ gets further
+ * adjusted if the kernel is compiled with 52bit VA support.
*/
mov_q x10, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 04137a8f3d2d..0e050d76b83a 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -324,6 +324,7 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
st = (struct pg_state){
.seq = s,
.marker = info->markers,
+ .level = -1,
.ptdump = {
.note_page = note_page,
.range = (struct ptdump_range[]){
diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c
new file mode 100644
index 000000000000..527f0a39c3da
--- /dev/null
+++ b/arch/arm64/mm/trans_pgd.c
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Transitional page tables for kexec and hibernate
+ *
+ * This file derived from: arch/arm64/kernel/hibernate.c
+ *
+ * Copyright (c) 2020, Microsoft Corporation.
+ * Pavel Tatashin <pasha.tatashin@soleen.com>
+ *
+ */
+
+/*
+ * Transitional tables are used during system transferring from one world to
+ * another: such as during hibernate restore, and kexec reboots. During these
+ * phases one cannot rely on page table not being overwritten. This is because
+ * hibernate and kexec can overwrite the current page tables during transition.
+ */
+
+#include <asm/trans_pgd.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <linux/suspend.h>
+#include <linux/bug.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+
+static void *trans_alloc(struct trans_pgd_info *info)
+{
+ return info->trans_alloc_page(info->trans_alloc_arg);
+}
+
+static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
+{
+ pte_t pte = READ_ONCE(*src_ptep);
+
+ if (pte_valid(pte)) {
+ /*
+ * Resume will overwrite areas that may be marked
+ * read only (code, rodata). Clear the RDONLY bit from
+ * the temporary mappings we use during restore.
+ */
+ set_pte(dst_ptep, pte_mkwrite(pte));
+ } else if (debug_pagealloc_enabled() && !pte_none(pte)) {
+ /*
+ * debug_pagealloc will removed the PTE_VALID bit if
+ * the page isn't in use by the resume kernel. It may have
+ * been in use by the original kernel, in which case we need
+ * to put it back in our copy to do the restore.
+ *
+ * Before marking this entry valid, check the pfn should
+ * be mapped.
+ */
+ BUG_ON(!pfn_valid(pte_pfn(pte)));
+
+ set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
+ }
+}
+
+static int copy_pte(struct trans_pgd_info *info, pmd_t *dst_pmdp,
+ pmd_t *src_pmdp, unsigned long start, unsigned long end)
+{
+ pte_t *src_ptep;
+ pte_t *dst_ptep;
+ unsigned long addr = start;
+
+ dst_ptep = trans_alloc(info);
+ if (!dst_ptep)
+ return -ENOMEM;
+ pmd_populate_kernel(NULL, dst_pmdp, dst_ptep);
+ dst_ptep = pte_offset_kernel(dst_pmdp, start);
+
+ src_ptep = pte_offset_kernel(src_pmdp, start);
+ do {
+ _copy_pte(dst_ptep, src_ptep, addr);
+ } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
+
+ return 0;
+}
+
+static int copy_pmd(struct trans_pgd_info *info, pud_t *dst_pudp,
+ pud_t *src_pudp, unsigned long start, unsigned long end)
+{
+ pmd_t *src_pmdp;
+ pmd_t *dst_pmdp;
+ unsigned long next;
+ unsigned long addr = start;
+
+ if (pud_none(READ_ONCE(*dst_pudp))) {
+ dst_pmdp = trans_alloc(info);
+ if (!dst_pmdp)
+ return -ENOMEM;
+ pud_populate(NULL, dst_pudp, dst_pmdp);
+ }
+ dst_pmdp = pmd_offset(dst_pudp, start);
+
+ src_pmdp = pmd_offset(src_pudp, start);
+ do {
+ pmd_t pmd = READ_ONCE(*src_pmdp);
+
+ next = pmd_addr_end(addr, end);
+ if (pmd_none(pmd))
+ continue;
+ if (pmd_table(pmd)) {
+ if (copy_pte(info, dst_pmdp, src_pmdp, addr, next))
+ return -ENOMEM;
+ } else {
+ set_pmd(dst_pmdp,
+ __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
+ }
+ } while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
+
+ return 0;
+}
+
+static int copy_pud(struct trans_pgd_info *info, p4d_t *dst_p4dp,
+ p4d_t *src_p4dp, unsigned long start,
+ unsigned long end)
+{
+ pud_t *dst_pudp;
+ pud_t *src_pudp;
+ unsigned long next;
+ unsigned long addr = start;
+
+ if (p4d_none(READ_ONCE(*dst_p4dp))) {
+ dst_pudp = trans_alloc(info);
+ if (!dst_pudp)
+ return -ENOMEM;
+ p4d_populate(NULL, dst_p4dp, dst_pudp);
+ }
+ dst_pudp = pud_offset(dst_p4dp, start);
+
+ src_pudp = pud_offset(src_p4dp, start);
+ do {
+ pud_t pud = READ_ONCE(*src_pudp);
+
+ next = pud_addr_end(addr, end);
+ if (pud_none(pud))
+ continue;
+ if (pud_table(pud)) {
+ if (copy_pmd(info, dst_pudp, src_pudp, addr, next))
+ return -ENOMEM;
+ } else {
+ set_pud(dst_pudp,
+ __pud(pud_val(pud) & ~PUD_SECT_RDONLY));
+ }
+ } while (dst_pudp++, src_pudp++, addr = next, addr != end);
+
+ return 0;
+}
+
+static int copy_p4d(struct trans_pgd_info *info, pgd_t *dst_pgdp,
+ pgd_t *src_pgdp, unsigned long start,
+ unsigned long end)
+{
+ p4d_t *dst_p4dp;
+ p4d_t *src_p4dp;
+ unsigned long next;
+ unsigned long addr = start;
+
+ dst_p4dp = p4d_offset(dst_pgdp, start);
+ src_p4dp = p4d_offset(src_pgdp, start);
+ do {
+ next = p4d_addr_end(addr, end);
+ if (p4d_none(READ_ONCE(*src_p4dp)))
+ continue;
+ if (copy_pud(info, dst_p4dp, src_p4dp, addr, next))
+ return -ENOMEM;
+ } while (dst_p4dp++, src_p4dp++, addr = next, addr != end);
+
+ return 0;
+}
+
+static int copy_page_tables(struct trans_pgd_info *info, pgd_t *dst_pgdp,
+ unsigned long start, unsigned long end)
+{
+ unsigned long next;
+ unsigned long addr = start;
+ pgd_t *src_pgdp = pgd_offset_k(start);
+
+ dst_pgdp = pgd_offset_pgd(dst_pgdp, start);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(READ_ONCE(*src_pgdp)))
+ continue;
+ if (copy_p4d(info, dst_pgdp, src_pgdp, addr, next))
+ return -ENOMEM;
+ } while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
+
+ return 0;
+}
+
+/*
+ * Create trans_pgd and copy linear map.
+ * info: contains allocator and its argument
+ * dst_pgdp: new page table that is created, and to which map is copied.
+ * start: Start of the interval (inclusive).
+ * end: End of the interval (exclusive).
+ *
+ * Returns 0 on success, and -ENOMEM on failure.
+ */
+int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **dst_pgdp,
+ unsigned long start, unsigned long end)
+{
+ int rc;
+ pgd_t *trans_pgd = trans_alloc(info);
+
+ if (!trans_pgd) {
+ pr_err("Failed to allocate memory for temporary page tables.\n");
+ return -ENOMEM;
+ }
+
+ rc = copy_page_tables(info, trans_pgd, start, end);
+ if (!rc)
+ *dst_pgdp = trans_pgd;
+
+ return rc;
+}
+
+/*
+ * Add map entry to trans_pgd for a base-size page at PTE level.
+ * info: contains allocator and its argument
+ * trans_pgd: page table in which new map is added.
+ * page: page to be mapped.
+ * dst_addr: new VA address for the page
+ * pgprot: protection for the page.
+ *
+ * Returns 0 on success, and -ENOMEM on failure.
+ */
+int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd,
+ void *page, unsigned long dst_addr, pgprot_t pgprot)
+{
+ pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ pgdp = pgd_offset_pgd(trans_pgd, dst_addr);
+ if (pgd_none(READ_ONCE(*pgdp))) {
+ p4dp = trans_alloc(info);
+ if (!pgdp)
+ return -ENOMEM;
+ pgd_populate(NULL, pgdp, p4dp);
+ }
+
+ p4dp = p4d_offset(pgdp, dst_addr);
+ if (p4d_none(READ_ONCE(*p4dp))) {
+ pudp = trans_alloc(info);
+ if (!pudp)
+ return -ENOMEM;
+ p4d_populate(NULL, p4dp, pudp);
+ }
+
+ pudp = pud_offset(p4dp, dst_addr);
+ if (pud_none(READ_ONCE(*pudp))) {
+ pmdp = trans_alloc(info);
+ if (!pmdp)
+ return -ENOMEM;
+ pud_populate(NULL, pudp, pmdp);
+ }
+
+ pmdp = pmd_offset(pudp, dst_addr);
+ if (pmd_none(READ_ONCE(*pmdp))) {
+ ptep = trans_alloc(info);
+ if (!ptep)
+ return -ENOMEM;
+ pmd_populate_kernel(NULL, pmdp, ptep);
+ }
+
+ ptep = pte_offset_kernel(pmdp, dst_addr);
+ set_pte(ptep, pfn_pte(virt_to_pfn(page), pgprot));
+
+ return 0;
+}
+
+/*
+ * The page we want to idmap may be outside the range covered by VA_BITS that
+ * can be built using the kernel's p?d_populate() helpers. As a one off, for a
+ * single page, we build these page tables bottom up and just assume that will
+ * need the maximum T0SZ.
+ *
+ * Returns 0 on success, and -ENOMEM on failure.
+ * On success trans_ttbr0 contains page table with idmapped page, t0sz is set to
+ * maximum T0SZ for this page.
+ */
+int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0,
+ unsigned long *t0sz, void *page)
+{
+ phys_addr_t dst_addr = virt_to_phys(page);
+ unsigned long pfn = __phys_to_pfn(dst_addr);
+ int max_msb = (dst_addr & GENMASK(52, 48)) ? 51 : 47;
+ int bits_mapped = PAGE_SHIFT - 4;
+ unsigned long level_mask, prev_level_entry, *levels[4];
+ int this_level, index, level_lsb, level_msb;
+
+ dst_addr &= PAGE_MASK;
+ prev_level_entry = pte_val(pfn_pte(pfn, PAGE_KERNEL_EXEC));
+
+ for (this_level = 3; this_level >= 0; this_level--) {
+ levels[this_level] = trans_alloc(info);
+ if (!levels[this_level])
+ return -ENOMEM;
+
+ level_lsb = ARM64_HW_PGTABLE_LEVEL_SHIFT(this_level);
+ level_msb = min(level_lsb + bits_mapped, max_msb);
+ level_mask = GENMASK_ULL(level_msb, level_lsb);
+
+ index = (dst_addr & level_mask) >> level_lsb;
+ *(levels[this_level] + index) = prev_level_entry;
+
+ pfn = virt_to_pfn(levels[this_level]);
+ prev_level_entry = pte_val(pfn_pte(pfn,
+ __pgprot(PMD_TYPE_TABLE)));
+
+ if (level_msb == max_msb)
+ break;
+ }
+
+ *trans_ttbr0 = phys_to_ttbr(__pfn_to_phys(pfn));
+ *t0sz = TCR_T0SZ(max_msb + 1);
+
+ return 0;
+}
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 5f3b8ac9d97b..84e24986a97a 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1261,8 +1261,6 @@ void add_interrupt_randomness(int irq, int irq_flags)
cycles_t cycles = random_get_entropy();
__u32 c_high, j_high;
__u64 ip;
- unsigned long seed;
- int credit = 0;
if (cycles == 0)
cycles = get_reg(fast_pool, regs);
@@ -1298,23 +1296,12 @@ void add_interrupt_randomness(int irq, int irq_flags)
fast_pool->last = now;
__mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool));
-
- /*
- * If we have architectural seed generator, produce a seed and
- * add it to the pool. For the sake of paranoia don't let the
- * architectural seed generator dominate the input from the
- * interrupt noise.
- */
- if (arch_get_random_seed_long(&seed)) {
- __mix_pool_bytes(r, &seed, sizeof(seed));
- credit = 1;
- }
spin_unlock(&r->lock);
fast_pool->count = 0;
/* award one bit for the contents of the fast pool */
- credit_entropy_bits(r, credit + 1);
+ credit_entropy_bits(r, 1);
}
EXPORT_SYMBOL_GPL(add_interrupt_randomness);
diff --git a/drivers/firmware/smccc/smccc.c b/drivers/firmware/smccc/smccc.c
index 00c88b809c0c..d52bfc5ed5e4 100644
--- a/drivers/firmware/smccc/smccc.c
+++ b/drivers/firmware/smccc/smccc.c
@@ -5,16 +5,22 @@
#define pr_fmt(fmt) "smccc: " fmt
+#include <linux/cache.h>
#include <linux/init.h>
#include <linux/arm-smccc.h>
+#include <asm/archrandom.h>
static u32 smccc_version = ARM_SMCCC_VERSION_1_0;
static enum arm_smccc_conduit smccc_conduit = SMCCC_CONDUIT_NONE;
+bool __ro_after_init smccc_trng_available = false;
+
void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit)
{
smccc_version = version;
smccc_conduit = conduit;
+
+ smccc_trng_available = smccc_probe_trng();
}
enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void)
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index 87c4be9dd412..f81e2ec90005 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -1026,12 +1026,11 @@ static void pmu_event_set_period(struct perf_event *event)
static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
{
- unsigned long flags;
struct cci_pmu *cci_pmu = dev;
struct cci_pmu_hw_events *events = &cci_pmu->hw_events;
int idx, handled = IRQ_NONE;
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
+ raw_spin_lock(&events->pmu_lock);
/* Disable the PMU while we walk through the counters */
__cci_pmu_disable(cci_pmu);
@@ -1061,7 +1060,7 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
/* Enable the PMU and sync possibly overflowed counters */
__cci_pmu_enable_sync(cci_pmu);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+ raw_spin_unlock(&events->pmu_lock);
return IRQ_RETVAL(handled);
}
@@ -1376,7 +1375,7 @@ static struct attribute *pmu_attrs[] = {
NULL,
};
-static struct attribute_group pmu_attr_group = {
+static const struct attribute_group pmu_attr_group = {
.attrs = pmu_attrs,
};
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index a76ff594f3ca..1328159fe564 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -616,7 +616,7 @@ static struct attribute *arm_cmn_cpumask_attrs[] = {
NULL,
};
-static struct attribute_group arm_cmn_cpumask_attr_group = {
+static const struct attribute_group arm_cmn_cpumask_attr_group = {
.attrs = arm_cmn_cpumask_attrs,
};
@@ -1150,7 +1150,7 @@ static int arm_cmn_commit_txn(struct pmu *pmu)
static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
{
struct arm_cmn *cmn;
- unsigned int target;
+ unsigned int i, target;
cmn = hlist_entry_safe(node, struct arm_cmn, cpuhp_node);
if (cpu != cmn->cpu)
@@ -1161,6 +1161,8 @@ static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
return 0;
perf_pmu_migrate_context(&cmn->pmu, cpu, target);
+ for (i = 0; i < cmn->num_dtcs; i++)
+ irq_set_affinity_hint(cmn->dtc[i].irq, cpumask_of(target));
cmn->cpu = target;
return 0;
}
@@ -1502,7 +1504,7 @@ static int arm_cmn_probe(struct platform_device *pdev)
struct arm_cmn *cmn;
const char *name;
static atomic_t id;
- int err, rootnode, this_id;
+ int err, rootnode;
cmn = devm_kzalloc(&pdev->dev, sizeof(*cmn), GFP_KERNEL);
if (!cmn)
@@ -1549,14 +1551,9 @@ static int arm_cmn_probe(struct platform_device *pdev)
.cancel_txn = arm_cmn_end_txn,
};
- this_id = atomic_fetch_inc(&id);
- if (this_id == 0) {
- name = "arm_cmn";
- } else {
- name = devm_kasprintf(cmn->dev, GFP_KERNEL, "arm_cmn_%d", this_id);
- if (!name)
- return -ENOMEM;
- }
+ name = devm_kasprintf(cmn->dev, GFP_KERNEL, "arm_cmn_%d", atomic_fetch_inc(&id));
+ if (!name)
+ return -ENOMEM;
err = cpuhp_state_add_instance(arm_cmn_hp_state, &cmn->cpuhp_node);
if (err)
diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
index 004930eb4bbb..66ad5b3ece19 100644
--- a/drivers/perf/arm_dmc620_pmu.c
+++ b/drivers/perf/arm_dmc620_pmu.c
@@ -159,7 +159,7 @@ static struct attribute *dmc620_pmu_events_attrs[] = {
NULL,
};
-static struct attribute_group dmc620_pmu_events_attr_group = {
+static const struct attribute_group dmc620_pmu_events_attr_group = {
.name = "events",
.attrs = dmc620_pmu_events_attrs,
};
@@ -222,7 +222,7 @@ static struct attribute *dmc620_pmu_formats_attrs[] = {
NULL,
};
-static struct attribute_group dmc620_pmu_format_attr_group = {
+static const struct attribute_group dmc620_pmu_format_attr_group = {
.name = "format",
.attrs = dmc620_pmu_formats_attrs,
};
@@ -717,6 +717,7 @@ static struct platform_driver dmc620_pmu_driver = {
.driver = {
.name = DMC620_DRVNAME,
.acpi_match_table = dmc620_acpi_match,
+ .suppress_bind_attrs = true,
},
.probe = dmc620_pmu_device_probe,
.remove = dmc620_pmu_device_remove,
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index cb2f55f450e4..2d10d84fb79c 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -577,7 +577,7 @@ static struct attribute *armpmu_common_attrs[] = {
NULL,
};
-static struct attribute_group armpmu_common_attr_group = {
+static const struct attribute_group armpmu_common_attr_group = {
.attrs = armpmu_common_attrs,
};
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index 74474bb322c3..8ff7a67f691c 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -493,7 +493,7 @@ static struct attribute *smmu_pmu_cpumask_attrs[] = {
NULL
};
-static struct attribute_group smmu_pmu_cpumask_group = {
+static const struct attribute_group smmu_pmu_cpumask_group = {
.attrs = smmu_pmu_cpumask_attrs,
};
@@ -548,7 +548,7 @@ static umode_t smmu_pmu_event_is_visible(struct kobject *kobj,
return 0;
}
-static struct attribute_group smmu_pmu_events_group = {
+static const struct attribute_group smmu_pmu_events_group = {
.name = "events",
.attrs = smmu_pmu_events,
.is_visible = smmu_pmu_event_is_visible,
@@ -583,7 +583,7 @@ static struct attribute *smmu_pmu_identifier_attrs[] = {
NULL
};
-static struct attribute_group smmu_pmu_identifier_group = {
+static const struct attribute_group smmu_pmu_identifier_group = {
.attrs = smmu_pmu_identifier_attrs,
.is_visible = smmu_pmu_identifier_attr_visible,
};
@@ -602,7 +602,7 @@ static struct attribute *smmu_pmu_formats[] = {
NULL
};
-static struct attribute_group smmu_pmu_format_group = {
+static const struct attribute_group smmu_pmu_format_group = {
.name = "format",
.attrs = smmu_pmu_formats,
};
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index cc00915ad6d1..d3929ccebfd2 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -54,7 +54,7 @@ struct arm_spe_pmu {
struct hlist_node hotplug_node;
int irq; /* PPI */
-
+ u16 pmsver;
u16 min_period;
u16 counter_sz;
@@ -146,7 +146,7 @@ static struct attribute *arm_spe_pmu_cap_attr[] = {
NULL,
};
-static struct attribute_group arm_spe_pmu_cap_group = {
+static const struct attribute_group arm_spe_pmu_cap_group = {
.name = "caps",
.attrs = arm_spe_pmu_cap_attr,
};
@@ -227,7 +227,7 @@ static struct attribute *arm_spe_pmu_formats_attr[] = {
NULL,
};
-static struct attribute_group arm_spe_pmu_format_group = {
+static const struct attribute_group arm_spe_pmu_format_group = {
.name = "format",
.attrs = arm_spe_pmu_formats_attr,
};
@@ -247,7 +247,7 @@ static struct attribute *arm_spe_pmu_attrs[] = {
NULL,
};
-static struct attribute_group arm_spe_pmu_group = {
+static const struct attribute_group arm_spe_pmu_group = {
.attrs = arm_spe_pmu_attrs,
};
@@ -655,6 +655,18 @@ static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev)
return IRQ_HANDLED;
}
+static u64 arm_spe_pmsevfr_res0(u16 pmsver)
+{
+ switch (pmsver) {
+ case ID_AA64DFR0_PMSVER_8_2:
+ return SYS_PMSEVFR_EL1_RES0_8_2;
+ case ID_AA64DFR0_PMSVER_8_3:
+ /* Return the highest version we support in default */
+ default:
+ return SYS_PMSEVFR_EL1_RES0_8_3;
+ }
+}
+
/* Perf callbacks */
static int arm_spe_pmu_event_init(struct perf_event *event)
{
@@ -670,7 +682,7 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
!cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus))
return -ENOENT;
- if (arm_spe_event_to_pmsevfr(event) & SYS_PMSEVFR_EL1_RES0)
+ if (arm_spe_event_to_pmsevfr(event) & arm_spe_pmsevfr_res0(spe_pmu->pmsver))
return -EOPNOTSUPP;
if (attr->exclude_idle)
@@ -937,6 +949,7 @@ static void __arm_spe_pmu_dev_probe(void *info)
fld, smp_processor_id());
return;
}
+ spe_pmu->pmsver = (u16)fld;
/* Read PMBIDR first to determine whether or not we have access */
reg = read_sysreg_s(SYS_PMBIDR_EL1);
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index a11bfd8a0823..be1f26b62ddb 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -133,7 +133,7 @@ static struct attribute *ddr_perf_identifier_attrs[] = {
NULL,
};
-static struct attribute_group ddr_perf_identifier_attr_group = {
+static const struct attribute_group ddr_perf_identifier_attr_group = {
.attrs = ddr_perf_identifier_attrs,
.is_visible = ddr_perf_identifier_attr_visible,
};
@@ -188,7 +188,7 @@ static struct attribute *ddr_perf_filter_cap_attr[] = {
NULL,
};
-static struct attribute_group ddr_perf_filter_cap_attr_group = {
+static const struct attribute_group ddr_perf_filter_cap_attr_group = {
.name = "caps",
.attrs = ddr_perf_filter_cap_attr,
};
@@ -209,7 +209,7 @@ static struct attribute *ddr_perf_cpumask_attrs[] = {
NULL,
};
-static struct attribute_group ddr_perf_cpumask_attr_group = {
+static const struct attribute_group ddr_perf_cpumask_attr_group = {
.attrs = ddr_perf_cpumask_attrs,
};
@@ -265,7 +265,7 @@ static struct attribute *ddr_perf_events_attrs[] = {
NULL,
};
-static struct attribute_group ddr_perf_events_attr_group = {
+static const struct attribute_group ddr_perf_events_attr_group = {
.name = "events",
.attrs = ddr_perf_events_attrs,
};
@@ -281,7 +281,7 @@ static struct attribute *ddr_perf_format_attrs[] = {
NULL,
};
-static struct attribute_group ddr_perf_format_attr_group = {
+static const struct attribute_group ddr_perf_format_attr_group = {
.name = "format",
.attrs = ddr_perf_format_attrs,
};
diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
index 5ac6c9113767..ac1a8c120a00 100644
--- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
@@ -319,7 +319,7 @@ static struct attribute *hisi_ddrc_pmu_identifier_attrs[] = {
NULL
};
-static struct attribute_group hisi_ddrc_pmu_identifier_group = {
+static const struct attribute_group hisi_ddrc_pmu_identifier_group = {
.attrs = hisi_ddrc_pmu_identifier_attrs,
};
diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
index 41b2dceb5f26..3402f1a395a8 100644
--- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
@@ -331,7 +331,7 @@ static struct attribute *hisi_hha_pmu_identifier_attrs[] = {
NULL
};
-static struct attribute_group hisi_hha_pmu_identifier_group = {
+static const struct attribute_group hisi_hha_pmu_identifier_group = {
.attrs = hisi_hha_pmu_identifier_attrs,
};
diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
index 705501d18d03..7d792435c2aa 100644
--- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
@@ -321,7 +321,7 @@ static struct attribute *hisi_l3c_pmu_identifier_attrs[] = {
NULL
};
-static struct attribute_group hisi_l3c_pmu_identifier_group = {
+static const struct attribute_group hisi_l3c_pmu_identifier_group = {
.attrs = hisi_l3c_pmu_identifier_attrs,
};
diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
index 23a0e008dafa..8883af955a2a 100644
--- a/drivers/perf/qcom_l2_pmu.c
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -649,7 +649,7 @@ static struct attribute *l2_cache_pmu_cpumask_attrs[] = {
NULL,
};
-static struct attribute_group l2_cache_pmu_cpumask_group = {
+static const struct attribute_group l2_cache_pmu_cpumask_group = {
.attrs = l2_cache_pmu_cpumask_attrs,
};
@@ -665,7 +665,7 @@ static struct attribute *l2_cache_pmu_formats[] = {
NULL,
};
-static struct attribute_group l2_cache_pmu_format_group = {
+static const struct attribute_group l2_cache_pmu_format_group = {
.name = "format",
.attrs = l2_cache_pmu_formats,
};
@@ -700,7 +700,7 @@ static struct attribute *l2_cache_pmu_events[] = {
NULL
};
-static struct attribute_group l2_cache_pmu_events_group = {
+static const struct attribute_group l2_cache_pmu_events_group = {
.name = "events",
.attrs = l2_cache_pmu_events,
};
diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c
index 9ddb577c542b..fb34b87b9471 100644
--- a/drivers/perf/qcom_l3_pmu.c
+++ b/drivers/perf/qcom_l3_pmu.c
@@ -630,7 +630,7 @@ static struct attribute *qcom_l3_cache_pmu_formats[] = {
NULL,
};
-static struct attribute_group qcom_l3_cache_pmu_format_group = {
+static const struct attribute_group qcom_l3_cache_pmu_format_group = {
.name = "format",
.attrs = qcom_l3_cache_pmu_formats,
};
@@ -663,7 +663,7 @@ static struct attribute *qcom_l3_cache_pmu_events[] = {
NULL
};
-static struct attribute_group qcom_l3_cache_pmu_events_group = {
+static const struct attribute_group qcom_l3_cache_pmu_events_group = {
.name = "events",
.attrs = qcom_l3_cache_pmu_events,
};
@@ -685,7 +685,7 @@ static struct attribute *qcom_l3_cache_pmu_cpumask_attrs[] = {
NULL,
};
-static struct attribute_group qcom_l3_cache_pmu_cpumask_attr_group = {
+static const struct attribute_group qcom_l3_cache_pmu_cpumask_attr_group = {
.attrs = qcom_l3_cache_pmu_cpumask_attrs,
};
diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index 633cf07ba672..44faa51ba799 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -1234,10 +1234,9 @@ static irqreturn_t xgene_pmu_isr(int irq, void *dev_id)
u32 intr_mcu, intr_mcb, intr_l3c, intr_iob;
struct xgene_pmu_dev_ctx *ctx;
struct xgene_pmu *xgene_pmu = dev_id;
- unsigned long flags;
u32 val;
- raw_spin_lock_irqsave(&xgene_pmu->lock, flags);
+ raw_spin_lock(&xgene_pmu->lock);
/* Get Interrupt PMU source */
val = readl(xgene_pmu->pcppmu_csr + PCPPMU_INTSTATUS_REG);
@@ -1273,7 +1272,7 @@ static irqreturn_t xgene_pmu_isr(int irq, void *dev_id)
}
}
- raw_spin_unlock_irqrestore(&xgene_pmu->lock, flags);
+ raw_spin_unlock(&xgene_pmu->lock);
return IRQ_HANDLED;
}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index dc91973c0b4f..68ca1b40d8c7 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1376,17 +1376,19 @@ xfs_filemap_pfn_mkwrite(
return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
}
-static void
+static vm_fault_t
xfs_filemap_map_pages(
struct vm_fault *vmf,
pgoff_t start_pgoff,
pgoff_t end_pgoff)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
+ vm_fault_t ret;
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- filemap_map_pages(vmf, start_pgoff, end_pgoff);
+ ret = filemap_map_pages(vmf, start_pgoff, end_pgoff);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+ return ret;
}
static const struct vm_operations_struct xfs_file_vm_ops = {
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index f860645f6512..62c54234576c 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -102,6 +102,37 @@
ARM_SMCCC_OWNER_STANDARD_HYP, \
0x21)
+/* TRNG entropy source calls (defined by ARM DEN0098) */
+#define ARM_SMCCC_TRNG_VERSION \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_32, \
+ ARM_SMCCC_OWNER_STANDARD, \
+ 0x50)
+
+#define ARM_SMCCC_TRNG_FEATURES \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_32, \
+ ARM_SMCCC_OWNER_STANDARD, \
+ 0x51)
+
+#define ARM_SMCCC_TRNG_GET_UUID \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_32, \
+ ARM_SMCCC_OWNER_STANDARD, \
+ 0x52)
+
+#define ARM_SMCCC_TRNG_RND32 \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_32, \
+ ARM_SMCCC_OWNER_STANDARD, \
+ 0x53)
+
+#define ARM_SMCCC_TRNG_RND64 \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_64, \
+ ARM_SMCCC_OWNER_STANDARD, \
+ 0x53)
+
/*
* Return codes defined in ARM DEN 0070A
* ARM DEN 0070A is now merged/consolidated into ARM DEN 0028 C
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bd634bc9d256..ab959488bc0f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -514,11 +514,14 @@ static inline bool fault_flag_allow_retry_first(unsigned int flags)
* pgoff should be used in favour of virtual_address, if possible.
*/
struct vm_fault {
- struct vm_area_struct *vma; /* Target VMA */
- unsigned int flags; /* FAULT_FLAG_xxx flags */
- gfp_t gfp_mask; /* gfp mask to be used for allocations */
- pgoff_t pgoff; /* Logical page offset based on vma */
- unsigned long address; /* Faulting virtual address */
+ const struct {
+ struct vm_area_struct *vma; /* Target VMA */
+ gfp_t gfp_mask; /* gfp mask to be used for allocations */
+ pgoff_t pgoff; /* Logical page offset based on vma */
+ unsigned long address; /* Faulting virtual address */
+ };
+ unsigned int flags; /* FAULT_FLAG_xxx flags
+ * XXX: should really be 'const' */
pmd_t *pmd; /* Pointer to pmd entry matching
* the 'address' */
pud_t *pud; /* Pointer to pud entry matching
@@ -542,8 +545,8 @@ struct vm_fault {
* is not NULL, otherwise pmd.
*/
pgtable_t prealloc_pte; /* Pre-allocated pte page table.
- * vm_ops->map_pages() calls
- * alloc_set_pte() from atomic context.
+ * vm_ops->map_pages() sets up a page
+ * table from atomic context.
* do_fault_around() pre-allocates
* page table to avoid allocation from
* atomic context.
@@ -578,7 +581,7 @@ struct vm_operations_struct {
vm_fault_t (*fault)(struct vm_fault *vmf);
vm_fault_t (*huge_fault)(struct vm_fault *vmf,
enum page_entry_size pe_size);
- void (*map_pages)(struct vm_fault *vmf,
+ vm_fault_t (*map_pages)(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
unsigned long (*pagesize)(struct vm_area_struct * area);
@@ -988,7 +991,9 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
return pte;
}
-vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page);
+vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page);
+void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr);
+
vm_fault_t finish_fault(struct vm_fault *vmf);
vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
#endif
@@ -2622,7 +2627,7 @@ extern void truncate_inode_pages_final(struct address_space *);
/* generic vm_area_ops exported for stackable file systems */
extern vm_fault_t filemap_fault(struct vm_fault *vmf);
-extern void filemap_map_pages(struct vm_fault *vmf,
+extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 8fcdfa52eb4b..36eb748f3c97 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1314,6 +1314,17 @@ static inline int pmd_trans_unstable(pmd_t *pmd)
#endif
}
+/*
+ * the ordering of these checks is important for pmds with _page_devmap set.
+ * if we check pmd_trans_unstable() first we will trip the bad_pmd() check
+ * inside of pmd_none_or_trans_huge_or_clear_bad(). this will end up correctly
+ * returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
+ */
+static inline int pmd_devmap_trans_unstable(pmd_t *pmd)
+{
+ return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
+}
+
#ifndef CONFIG_NUMA_BALANCING
/*
* Technically a PTE can be PROTNONE even when not doing NUMA balancing but
diff --git a/mm/filemap.c b/mm/filemap.c
index aa0e0fb04670..6ff2a3fb0dc7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -42,6 +42,8 @@
#include <linux/psi.h>
#include <linux/ramfs.h>
#include <linux/page_idle.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
@@ -2915,74 +2917,163 @@ out_retry:
}
EXPORT_SYMBOL(filemap_fault);
-void filemap_map_pages(struct vm_fault *vmf,
- pgoff_t start_pgoff, pgoff_t end_pgoff)
+static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page)
{
- struct file *file = vmf->vma->vm_file;
+ struct mm_struct *mm = vmf->vma->vm_mm;
+
+ /* Huge page is mapped? No need to proceed. */
+ if (pmd_trans_huge(*vmf->pmd)) {
+ unlock_page(page);
+ put_page(page);
+ return true;
+ }
+
+ if (pmd_none(*vmf->pmd) && PageTransHuge(page)) {
+ vm_fault_t ret = do_set_pmd(vmf, page);
+ if (!ret) {
+ /* The page is mapped successfully, reference consumed. */
+ unlock_page(page);
+ return true;
+ }
+ }
+
+ if (pmd_none(*vmf->pmd)) {
+ vmf->ptl = pmd_lock(mm, vmf->pmd);
+ if (likely(pmd_none(*vmf->pmd))) {
+ mm_inc_nr_ptes(mm);
+ pmd_populate(mm, vmf->pmd, vmf->prealloc_pte);
+ vmf->prealloc_pte = NULL;
+ }
+ spin_unlock(vmf->ptl);
+ }
+
+ /* See comment in handle_pte_fault() */
+ if (pmd_devmap_trans_unstable(vmf->pmd)) {
+ unlock_page(page);
+ put_page(page);
+ return true;
+ }
+
+ return false;
+}
+
+static struct page *next_uptodate_page(struct page *page,
+ struct address_space *mapping,
+ struct xa_state *xas, pgoff_t end_pgoff)
+{
+ unsigned long max_idx;
+
+ do {
+ if (!page)
+ return NULL;
+ if (xas_retry(xas, page))
+ continue;
+ if (xa_is_value(page))
+ continue;
+ if (PageLocked(page))
+ continue;
+ if (!page_cache_get_speculative(page))
+ continue;
+ /* Has the page moved or been split? */
+ if (unlikely(page != xas_reload(xas)))
+ goto skip;
+ if (!PageUptodate(page) || PageReadahead(page))
+ goto skip;
+ if (PageHWPoison(page))
+ goto skip;
+ if (!trylock_page(page))
+ goto skip;
+ if (page->mapping != mapping)
+ goto unlock;
+ if (!PageUptodate(page))
+ goto unlock;
+ max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
+ if (xas->xa_index >= max_idx)
+ goto unlock;
+ return page;
+unlock:
+ unlock_page(page);
+skip:
+ put_page(page);
+ } while ((page = xas_next_entry(xas, end_pgoff)) != NULL);
+
+ return NULL;
+}
+
+static inline struct page *first_map_page(struct address_space *mapping,
+ struct xa_state *xas,
+ pgoff_t end_pgoff)
+{
+ return next_uptodate_page(xas_find(xas, end_pgoff),
+ mapping, xas, end_pgoff);
+}
+
+static inline struct page *next_map_page(struct address_space *mapping,
+ struct xa_state *xas,
+ pgoff_t end_pgoff)
+{
+ return next_uptodate_page(xas_next_entry(xas, end_pgoff),
+ mapping, xas, end_pgoff);
+}
+
+vm_fault_t filemap_map_pages(struct vm_fault *vmf,
+ pgoff_t start_pgoff, pgoff_t end_pgoff)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct file *file = vma->vm_file;
struct address_space *mapping = file->f_mapping;
pgoff_t last_pgoff = start_pgoff;
- unsigned long max_idx;
+ unsigned long addr;
XA_STATE(xas, &mapping->i_pages, start_pgoff);
struct page *head, *page;
unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
+ vm_fault_t ret = 0;
rcu_read_lock();
- xas_for_each(&xas, head, end_pgoff) {
- if (xas_retry(&xas, head))
- continue;
- if (xa_is_value(head))
- goto next;
+ head = first_map_page(mapping, &xas, end_pgoff);
+ if (!head)
+ goto out;
- /*
- * Check for a locked page first, as a speculative
- * reference may adversely influence page migration.
- */
- if (PageLocked(head))
- goto next;
- if (!page_cache_get_speculative(head))
- goto next;
+ if (filemap_map_pmd(vmf, head)) {
+ ret = VM_FAULT_NOPAGE;
+ goto out;
+ }
- /* Has the page moved or been split? */
- if (unlikely(head != xas_reload(&xas)))
- goto skip;
+ addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);
+ do {
page = find_subpage(head, xas.xa_index);
-
- if (!PageUptodate(head) ||
- PageReadahead(page) ||
- PageHWPoison(page))
- goto skip;
- if (!trylock_page(head))
- goto skip;
-
- if (head->mapping != mapping || !PageUptodate(head))
- goto unlock;
-
- max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
- if (xas.xa_index >= max_idx)
+ if (PageHWPoison(page))
goto unlock;
if (mmap_miss > 0)
mmap_miss--;
- vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
- if (vmf->pte)
- vmf->pte += xas.xa_index - last_pgoff;
+ addr += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
+ vmf->pte += xas.xa_index - last_pgoff;
last_pgoff = xas.xa_index;
- if (alloc_set_pte(vmf, page))
+
+ if (!pte_none(*vmf->pte))
goto unlock;
+
+ /* We're about to handle the fault */
+ if (vmf->address == addr)
+ ret = VM_FAULT_NOPAGE;
+
+ do_set_pte(vmf, page, addr);
+ /* no need to invalidate: a not-present page won't be cached */
+ update_mmu_cache(vma, addr, vmf->pte);
unlock_page(head);
- goto next;
+ continue;
unlock:
unlock_page(head);
-skip:
put_page(head);
-next:
- /* Huge page is mapped? No need to proceed. */
- if (pmd_trans_huge(*vmf->pmd))
- break;
- }
+ } while ((head = next_map_page(mapping, &xas, end_pgoff)) != NULL);
+ pte_unmap_unlock(vmf->pte, vmf->ptl);
+out:
rcu_read_unlock();
WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss);
+ return ret;
}
EXPORT_SYMBOL(filemap_map_pages);
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 67ab391a5373..fb0fdaec34d5 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -991,38 +991,41 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
static bool __collapse_huge_page_swapin(struct mm_struct *mm,
struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmd,
+ unsigned long haddr, pmd_t *pmd,
int referenced)
{
int swapped_in = 0;
vm_fault_t ret = 0;
- struct vm_fault vmf = {
- .vma = vma,
- .address = address,
- .flags = FAULT_FLAG_ALLOW_RETRY,
- .pmd = pmd,
- .pgoff = linear_page_index(vma, address),
- };
-
- vmf.pte = pte_offset_map(pmd, address);
- for (; vmf.address < address + HPAGE_PMD_NR*PAGE_SIZE;
- vmf.pte++, vmf.address += PAGE_SIZE) {
+ unsigned long address, end = haddr + (HPAGE_PMD_NR * PAGE_SIZE);
+
+ for (address = haddr; address < end; address += PAGE_SIZE) {
+ struct vm_fault vmf = {
+ .vma = vma,
+ .address = address,
+ .pgoff = linear_page_index(vma, haddr),
+ .flags = FAULT_FLAG_ALLOW_RETRY,
+ .pmd = pmd,
+ };
+
+ vmf.pte = pte_offset_map(pmd, address);
vmf.orig_pte = *vmf.pte;
- if (!is_swap_pte(vmf.orig_pte))
+ if (!is_swap_pte(vmf.orig_pte)) {
+ pte_unmap(vmf.pte);
continue;
+ }
swapped_in++;
ret = do_swap_page(&vmf);
/* do_swap_page returns VM_FAULT_RETRY with released mmap_lock */
if (ret & VM_FAULT_RETRY) {
mmap_read_lock(mm);
- if (hugepage_vma_revalidate(mm, address, &vmf.vma)) {
+ if (hugepage_vma_revalidate(mm, haddr, &vma)) {
/* vma is no longer available, don't continue to swapin */
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
return false;
}
/* check if the pmd is still valid */
- if (mm_find_pmd(mm, address) != pmd) {
+ if (mm_find_pmd(mm, haddr) != pmd) {
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
return false;
}
@@ -1031,11 +1034,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
return false;
}
- /* pte is unmapped now, we need to map it */
- vmf.pte = pte_offset_map(pmd, vmf.address);
}
- vmf.pte--;
- pte_unmap(vmf.pte);
/* Drain LRU add pagevec to remove extra pin on the swapped in pages */
if (swapped_in)
diff --git a/mm/memory.c b/mm/memory.c
index 9e8576a83147..b32f32bf584d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -134,6 +134,18 @@ static inline bool arch_faults_on_old_pte(void)
}
#endif
+#ifndef arch_wants_old_prefaulted_pte
+static inline bool arch_wants_old_prefaulted_pte(void)
+{
+ /*
+ * Transitioning a PTE from 'old' to 'young' can be expensive on
+ * some architectures, even if it's performed in hardware. By
+ * default, "false" means prefaulted entries will be 'young'.
+ */
+ return false;
+}
+#endif
+
static int __init disable_randmaps(char *s)
{
randomize_va_space = 0;
@@ -3503,7 +3515,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
if (pte_alloc(vma->vm_mm, vmf->pmd))
return VM_FAULT_OOM;
- /* See the comment in pte_alloc_one_map() */
+ /* See comment in handle_pte_fault() */
if (unlikely(pmd_trans_unstable(vmf->pmd)))
return 0;
@@ -3643,66 +3655,6 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
return ret;
}
-/*
- * The ordering of these checks is important for pmds with _PAGE_DEVMAP set.
- * If we check pmd_trans_unstable() first we will trip the bad_pmd() check
- * inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly
- * returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
- */
-static int pmd_devmap_trans_unstable(pmd_t *pmd)
-{
- return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
-}
-
-static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
-{
- struct vm_area_struct *vma = vmf->vma;
-
- if (!pmd_none(*vmf->pmd))
- goto map_pte;
- if (vmf->prealloc_pte) {
- vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
- if (unlikely(!pmd_none(*vmf->pmd))) {
- spin_unlock(vmf->ptl);
- goto map_pte;
- }
-
- mm_inc_nr_ptes(vma->vm_mm);
- pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
- spin_unlock(vmf->ptl);
- vmf->prealloc_pte = NULL;
- } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
- return VM_FAULT_OOM;
- }
-map_pte:
- /*
- * If a huge pmd materialized under us just retry later. Use
- * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of
- * pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge
- * under us and then back to pmd_none, as a result of MADV_DONTNEED
- * running immediately after a huge pmd fault in a different thread of
- * this mm, in turn leading to a misleading pmd_trans_huge() retval.
- * All we have to ensure is that it is a regular pmd that we can walk
- * with pte_offset_map() and we can do that through an atomic read in
- * C, which is what pmd_trans_unstable() provides.
- */
- if (pmd_devmap_trans_unstable(vmf->pmd))
- return VM_FAULT_NOPAGE;
-
- /*
- * At this point we know that our vmf->pmd points to a page of ptes
- * and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge()
- * for the duration of the fault. If a racing MADV_DONTNEED runs and
- * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still
- * be valid and we will re-check to make sure the vmf->pte isn't
- * pte_none() under vmf->ptl protection when we return to
- * alloc_set_pte().
- */
- vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
- &vmf->ptl);
- return 0;
-}
-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static void deposit_prealloc_pte(struct vm_fault *vmf)
{
@@ -3717,7 +3669,7 @@ static void deposit_prealloc_pte(struct vm_fault *vmf)
vmf->prealloc_pte = NULL;
}
-static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
+vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
{
struct vm_area_struct *vma = vmf->vma;
bool write = vmf->flags & FAULT_FLAG_WRITE;
@@ -3775,76 +3727,41 @@ out:
return ret;
}
#else
-static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
+vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
{
- BUILD_BUG();
- return 0;
+ return VM_FAULT_FALLBACK;
}
#endif
-/**
- * alloc_set_pte - setup new PTE entry for given page and add reverse page
- * mapping. If needed, the function allocates page table or use pre-allocated.
- *
- * @vmf: fault environment
- * @page: page to map
- *
- * Caller must take care of unlocking vmf->ptl, if vmf->pte is non-NULL on
- * return.
- *
- * Target users are page handler itself and implementations of
- * vm_ops->map_pages.
- *
- * Return: %0 on success, %VM_FAULT_ code in case of error.
- */
-vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page)
+void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
{
struct vm_area_struct *vma = vmf->vma;
bool write = vmf->flags & FAULT_FLAG_WRITE;
+ bool prefault = vmf->address != addr;
pte_t entry;
- vm_fault_t ret;
-
- if (pmd_none(*vmf->pmd) && PageTransCompound(page)) {
- ret = do_set_pmd(vmf, page);
- if (ret != VM_FAULT_FALLBACK)
- return ret;
- }
-
- if (!vmf->pte) {
- ret = pte_alloc_one_map(vmf);
- if (ret)
- return ret;
- }
-
- /* Re-check under ptl */
- if (unlikely(!pte_none(*vmf->pte))) {
- update_mmu_tlb(vma, vmf->address, vmf->pte);
- return VM_FAULT_NOPAGE;
- }
flush_icache_page(vma, page);
entry = mk_pte(page, vma->vm_page_prot);
- entry = pte_sw_mkyoung(entry);
+
+ if (prefault && arch_wants_old_prefaulted_pte())
+ entry = pte_mkold(entry);
+ else
+ entry = pte_sw_mkyoung(entry);
+
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
/* copy-on-write page */
if (write && !(vma->vm_flags & VM_SHARED)) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
- page_add_new_anon_rmap(page, vma, vmf->address, false);
+ page_add_new_anon_rmap(page, vma, addr, false);
lru_cache_add_inactive_or_unevictable(page, vma);
} else {
inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
page_add_file_rmap(page, false);
}
- set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
-
- /* no need to invalidate: a not-present page won't be cached */
- update_mmu_cache(vma, vmf->address, vmf->pte);
-
- return 0;
+ set_pte_at(vma->vm_mm, addr, vmf->pte, entry);
}
-
/**
* finish_fault - finish page fault once we have prepared the page to fault
*
@@ -3862,12 +3779,12 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page)
*/
vm_fault_t finish_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct page *page;
- vm_fault_t ret = 0;
+ vm_fault_t ret;
/* Did we COW the page? */
- if ((vmf->flags & FAULT_FLAG_WRITE) &&
- !(vmf->vma->vm_flags & VM_SHARED))
+ if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED))
page = vmf->cow_page;
else
page = vmf->page;
@@ -3876,12 +3793,38 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
* check even for read faults because we might have lost our CoWed
* page
*/
- if (!(vmf->vma->vm_flags & VM_SHARED))
- ret = check_stable_address_space(vmf->vma->vm_mm);
- if (!ret)
- ret = alloc_set_pte(vmf, page);
- if (vmf->pte)
- pte_unmap_unlock(vmf->pte, vmf->ptl);
+ if (!(vma->vm_flags & VM_SHARED)) {
+ ret = check_stable_address_space(vma->vm_mm);
+ if (ret)
+ return ret;
+ }
+
+ if (pmd_none(*vmf->pmd)) {
+ if (PageTransCompound(page)) {
+ ret = do_set_pmd(vmf, page);
+ if (ret != VM_FAULT_FALLBACK)
+ return ret;
+ }
+
+ if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd)))
+ return VM_FAULT_OOM;
+ }
+
+ /* See comment in handle_pte_fault() */
+ if (pmd_devmap_trans_unstable(vmf->pmd))
+ return 0;
+
+ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
+ vmf->address, &vmf->ptl);
+ ret = 0;
+ /* Re-check under ptl */
+ if (likely(pte_none(*vmf->pte)))
+ do_set_pte(vmf, page, vmf->address);
+ else
+ ret = VM_FAULT_NOPAGE;
+
+ update_mmu_tlb(vma, vmf->address, vmf->pte);
+ pte_unmap_unlock(vmf->pte, vmf->ptl);
return ret;
}
@@ -3951,13 +3894,12 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
pgoff_t start_pgoff = vmf->pgoff;
pgoff_t end_pgoff;
int off;
- vm_fault_t ret = 0;
nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT;
mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
- vmf->address = max(address & mask, vmf->vma->vm_start);
- off = ((address - vmf->address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+ address = max(address & mask, vmf->vma->vm_start);
+ off = ((vmf->address - address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
start_pgoff -= off;
/*
@@ -3965,7 +3907,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
* the vma or nr_pages from start_pgoff, depending what is nearest.
*/
end_pgoff = start_pgoff -
- ((vmf->address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
+ ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
PTRS_PER_PTE - 1;
end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1,
start_pgoff + nr_pages - 1);
@@ -3973,31 +3915,11 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
if (pmd_none(*vmf->pmd)) {
vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
if (!vmf->prealloc_pte)
- goto out;
+ return VM_FAULT_OOM;
smp_wmb(); /* See comment in __pte_alloc() */
}
- vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff);
-
- /* Huge page is mapped? Page fault is solved */
- if (pmd_trans_huge(*vmf->pmd)) {
- ret = VM_FAULT_NOPAGE;
- goto out;
- }
-
- /* ->map_pages() haven't done anything useful. Cold page cache? */
- if (!vmf->pte)
- goto out;
-
- /* check if the page fault is solved */
- vmf->pte -= (vmf->address >> PAGE_SHIFT) - (address >> PAGE_SHIFT);
- if (!pte_none(*vmf->pte))
- ret = VM_FAULT_NOPAGE;
- pte_unmap_unlock(vmf->pte, vmf->ptl);
-out:
- vmf->address = address;
- vmf->pte = NULL;
- return ret;
+ return vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff);
}
static vm_fault_t do_read_fault(struct vm_fault *vmf)
@@ -4353,7 +4275,18 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
*/
vmf->pte = NULL;
} else {
- /* See comment in pte_alloc_one_map() */
+ /*
+ * If a huge pmd materialized under us just retry later. Use
+ * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead
+ * of pmd_trans_huge() to ensure the pmd didn't become
+ * pmd_trans_huge under us and then back to pmd_none, as a
+ * result of MADV_DONTNEED running immediately after a huge pmd
+ * fault in a different thread of this mm, in turn leading to a
+ * misleading pmd_trans_huge() retval. All we have to ensure is
+ * that it is a regular pmd that we can walk with
+ * pte_offset_map() and we can do that through an atomic read
+ * in C, which is what pmd_trans_unstable() provides.
+ */
if (pmd_devmap_trans_unstable(vmf->pmd))
return 0;
/*
diff --git a/mm/nommu.c b/mm/nommu.c
index 870fea12823e..5c9ab799c0e6 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1668,10 +1668,11 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
}
EXPORT_SYMBOL(filemap_fault);
-void filemap_map_pages(struct vm_fault *vmf,
+vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff)
{
BUG();
+ return 0;
}
EXPORT_SYMBOL(filemap_map_pages);
diff --git a/mm/shmem.c b/mm/shmem.c
index 7c6b6d8f6c39..1b254fbfdf52 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1520,11 +1520,11 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
{
struct vm_area_struct pvma;
struct page *page;
- struct vm_fault vmf;
+ struct vm_fault vmf = {
+ .vma = &pvma,
+ };
shmem_pseudo_vma_init(&pvma, info, index);
- vmf.vma = &pvma;
- vmf.address = 0;
page = swap_cluster_readahead(swap, gfp, &vmf);
shmem_pseudo_vma_destroy(&pvma);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 21a98cb8d646..96799a2f6957 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1951,8 +1951,6 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
si = swap_info[type];
pte = pte_offset_map(pmd, addr);
do {
- struct vm_fault vmf;
-
if (!is_swap_pte(*pte))
continue;
@@ -1968,9 +1966,12 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
swap_map = &si->swap_map[offset];
page = lookup_swap_cache(entry, vma, addr);
if (!page) {
- vmf.vma = vma;
- vmf.address = addr;
- vmf.pmd = pmd;
+ struct vm_fault vmf = {
+ .vma = vma,
+ .address = addr,
+ .pmd = pmd,
+ };
+
page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
&vmf);
}
diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
index c9fa141ebdcc..75fc482d63b6 100644
--- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c
+++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
@@ -81,7 +81,7 @@ static int check_buffer_underflow_by_byte(int mem_type, int mode,
last_index = 0;
/* Set some value in tagged memory and make the buffer underflow */
for (j = sizes[i] - 1; (j >= -underflow_range) &&
- (cur_mte_cxt.fault_valid == false); j--) {
+ (!cur_mte_cxt.fault_valid); j--) {
ptr[j] = '1';
last_index = j;
}