diff options
Diffstat (limited to 'arch/riscv')
138 files changed, 5835 insertions, 1500 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c5e42cc37604..348c0fa1fc8c 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -12,6 +12,7 @@ config 32BIT config RISCV def_bool y + select ARCH_DMA_DEFAULT_COHERENT select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE @@ -33,6 +34,7 @@ config RISCV select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAS_VDSO_DATA select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT select ARCH_STACKWALK @@ -44,25 +46,28 @@ config RISCV select ARCH_USE_QUEUED_RWLOCKS select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS - select ARCH_WANT_GENERAL_HUGETLB - select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP + select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT select ARCH_WANT_HUGE_PMD_SHARE if 64BIT select ARCH_WANT_LD_ORPHAN_WARN if !XIP_KERNEL + select ARCH_WANT_OPTIMIZE_VMEMMAP select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU select BUILDTIME_TABLE_SORT if MMU select CLINT_TIMER if !MMU select CLONE_BACKWARDS select COMMON_CLK - select CPU_PM if CPU_IDLE + select CPU_PM if CPU_IDLE || HIBERNATION select EDAC_SUPPORT select GENERIC_ARCH_TOPOLOGY select GENERIC_ATOMIC64 if !64BIT select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_EARLY_IOREMAP + select GENERIC_ENTRY select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO select GENERIC_IDLE_POLL_SETUP select GENERIC_IOREMAP if MMU + select GENERIC_IRQ_IPI if SMP + select GENERIC_IRQ_IPI_MUX if SMP select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL @@ -74,6 +79,7 @@ config RISCV select GENERIC_TIME_VSYSCALL if MMU && 64BIT select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO select HARDIRQS_SW_RESEND + select HAS_IOPORT if MMU select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT && !XIP_KERNEL @@ -118,14 +124,15 @@ config RISCV select HAVE_SYSCALL_TRACEPOINTS select IRQ_DOMAIN select IRQ_FORCED_THREADING + select KASAN_VMALLOC if KASAN select MODULES_USE_ELF_RELA if MODULES select MODULE_SECTIONS if MODULES select OF - select OF_DMA_DEFAULT_COHERENT select OF_EARLY_FLATTREE select OF_IRQ select PCI_DOMAINS_GENERIC if PCI select PCI_MSI if PCI + select RISCV_ALTERNATIVE if !XIP_KERNEL select RISCV_INTC select RISCV_TIMER if RISCV_SBI select SIFIVE_PLIC @@ -135,12 +142,23 @@ config RISCV select TRACE_IRQFLAGS_SUPPORT select UACCESS_MEMCPY if !MMU select ZONE_DMA32 if 64BIT - select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && $(cc-option,-fpatchable-function-entry=8) + select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE) select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION +config CLANG_SUPPORTS_DYNAMIC_FTRACE + def_bool CC_IS_CLANG + # https://github.com/llvm/llvm-project/commit/6ab8927931851bb42b2c93a00801dc499d7d9b1e + depends on CLANG_VERSION >= 130000 + # https://github.com/ClangBuiltLinux/linux/issues/1817 + depends on AS_IS_GNU || (AS_IS_LLVM && (LD_IS_LLD || LD_VERSION >= 23600)) + +config GCC_SUPPORTS_DYNAMIC_FTRACE + def_bool CC_IS_GCC + depends on $(cc-option,-fpatchable-function-entry=8) + config ARCH_MMAP_RND_BITS_MIN default 18 if 64BIT default 8 @@ -177,8 +195,8 @@ config MMU config PAGE_OFFSET hex - default 0xC0000000 if 32BIT - default 0x80000000 if 64BIT && !MMU + default 0xC0000000 if 32BIT && MMU + default 0x80000000 if !MMU default 0xff60000000000000 if 64BIT config KASAN_SHADOW_OFFSET @@ -245,7 +263,7 @@ config AS_HAS_INSN def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero) source "arch/riscv/Kconfig.socs" -source "arch/riscv/Kconfig.erratas" +source "arch/riscv/Kconfig.errata" menu "Platform type" @@ -279,7 +297,6 @@ config ARCH_RV32I select GENERIC_LIB_ASHRDI3 select GENERIC_LIB_LSHRDI3 select GENERIC_LIB_UCMPDI2 - select MMU config ARCH_RV64I bool "RV64I" @@ -320,6 +337,14 @@ config SMP If you don't know what to do here, say N. +config SCHED_MC + bool "Multi-core scheduler support" + depends on SMP + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + config NR_CPUS int "Maximum number of CPUs (2-512)" depends on SMP @@ -378,9 +403,9 @@ config RISCV_ALTERNATIVE depends on !XIP_KERNEL help This Kconfig allows the kernel to automatically patch the - errata required by the execution platform at run time. The - code patching is performed once in the boot stages. It means - that the overhead from this mechanism is just taken once. + erratum or cpufeature required by the execution platform at run + time. The code patching overhead is minimal, as it's only done + once at boot and once on each module load. config RISCV_ALTERNATIVE_EARLY bool @@ -398,14 +423,32 @@ config RISCV_ISA_C If you don't know what to do here, say Y. +config RISCV_ISA_SVNAPOT + bool "Svnapot extension support for supervisor mode NAPOT pages" + depends on 64BIT && MMU + depends on RISCV_ALTERNATIVE + default y + help + Allow kernel to detect the Svnapot ISA-extension dynamically at boot + time and enable its usage. + + The Svnapot extension is used to mark contiguous PTEs as a range + of contiguous virtual-to-physical translations for a naturally + aligned power-of-2 (NAPOT) granularity larger than the base 4KB page + size. When HUGETLBFS is also selected this option unconditionally + allocates some memory for each NAPOT page size supported by the kernel. + When optimizing for low memory consumption and for platforms without + the Svnapot extension, it may be better to say N here. + + If you don't know what to do here, say Y. + config RISCV_ISA_SVPBMT - bool "SVPBMT extension support" + bool "Svpbmt extension support for supervisor mode page-based memory types" depends on 64BIT && MMU - depends on !XIP_KERNEL + depends on RISCV_ALTERNATIVE default y - select RISCV_ALTERNATIVE help - Adds support to dynamically detect the presence of the SVPBMT + Adds support to dynamically detect the presence of the Svpbmt ISA-extension (Supervisor-mode: page-based memory types) and enable its usage. @@ -413,7 +456,7 @@ config RISCV_ISA_SVPBMT that indicate the cacheability, idempotency, and ordering properties for access to that page. - The SVPBMT extension is only available on 64Bit cpus. + The Svpbmt extension is only available on 64-bit cpus. If you don't know what to do here, say Y. @@ -428,8 +471,8 @@ config TOOLCHAIN_HAS_ZBB config RISCV_ISA_ZBB bool "Zbb extension support for bit manipulation instructions" depends on TOOLCHAIN_HAS_ZBB - depends on !XIP_KERNEL && MMU - select RISCV_ALTERNATIVE + depends on MMU + depends on RISCV_ALTERNATIVE default y help Adds support to dynamically detect the presence of the ZBB @@ -443,9 +486,9 @@ config RISCV_ISA_ZBB config RISCV_ISA_ZICBOM bool "Zicbom extension support for non-coherent DMA operation" - depends on !XIP_KERNEL && MMU + depends on MMU + depends on RISCV_ALTERNATIVE default y - select RISCV_ALTERNATIVE select RISCV_DMA_NONCOHERENT help Adds support to dynamically detect the presence of the ZICBOM @@ -457,6 +500,19 @@ config RISCV_ISA_ZICBOM If you don't know what to do here, say Y. +config RISCV_ISA_ZICBOZ + bool "Zicboz extension support for faster zeroing of memory" + depends on MMU + depends on RISCV_ALTERNATIVE + default y + help + Enable the use of the Zicboz extension (cbo.zero instruction) + when available. + + The Zicboz extension is used for faster zeroing of memory. + + If you don't know what to do here, say Y. + config TOOLCHAIN_HAS_ZIHINTPAUSE bool default y @@ -464,6 +520,28 @@ config TOOLCHAIN_HAS_ZIHINTPAUSE depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zihintpause) depends on LLD_VERSION >= 150000 || LD_VERSION >= 23600 +config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI + def_bool y + # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc + depends on AS_IS_GNU && AS_VERSION >= 23800 + help + Newer binutils versions default to ISA spec version 20191213 which + moves some instructions from the I extension to the Zicsr and Zifencei + extensions. + +config TOOLCHAIN_NEEDS_OLD_ISA_SPEC + def_bool y + depends on TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI + # https://github.com/llvm/llvm-project/commit/22e199e6afb1263c943c0c0d4498694e15bf8a16 + depends on CC_IS_CLANG && CLANG_VERSION < 170000 + help + Certain versions of clang do not support zicsr and zifencei via -march + but newer versions of binutils require it for the reasons noted in the + help text of CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI. This + option causes an older ISA spec compatible with these older versions + of clang to be passed to GAS, which has the same result as passing zicsr + and zifencei to -march. + config FPU bool "FPU support" default y @@ -561,6 +639,20 @@ config COMPAT If you want to execute 32-bit userspace applications, say Y. +config RELOCATABLE + bool "Build a relocatable kernel" + depends on MMU && 64BIT && !XIP_KERNEL + help + This builds a kernel as a Position Independent Executable (PIE), + which retains all relocation metadata required to relocate the + kernel binary at runtime to a different virtual address than the + address it was linked at. + Since RISCV uses the RELA relocation format, this requires a + relocation pass at runtime even if the kernel is loaded at the + same address it was linked at. + + If unsure, say N. + endmenu # "Kernel features" menu "Boot options" @@ -707,6 +799,12 @@ menu "Power management options" source "kernel/power/Kconfig" +config ARCH_HIBERNATION_POSSIBLE + def_bool y + +config ARCH_HIBERNATION_HEADER + def_bool HIBERNATION + endmenu # "Power management options" menu "CPU Power Management" diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.errata index 69621ae6d647..0c8f4652cd82 100644 --- a/arch/riscv/Kconfig.erratas +++ b/arch/riscv/Kconfig.errata @@ -2,8 +2,7 @@ menu "CPU errata selection" config ERRATA_SIFIVE bool "SiFive errata" - depends on !XIP_KERNEL - select RISCV_ALTERNATIVE + depends on RISCV_ALTERNATIVE help All SiFive errata Kconfig depend on this Kconfig. Disabling this Kconfig will disable all SiFive errata. Please say "Y" @@ -35,8 +34,7 @@ config ERRATA_SIFIVE_CIP_1200 config ERRATA_THEAD bool "T-HEAD errata" - depends on !XIP_KERNEL - select RISCV_ALTERNATIVE + depends on RISCV_ALTERNATIVE help All T-HEAD errata Kconfig depend on this Kconfig. Disabling this Kconfig will disable all T-HEAD errata. Please say "Y" diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 6203c3378922..0fb256bf8270 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -7,9 +7,13 @@ # OBJCOPYFLAGS := -O binary -LDFLAGS_vmlinux := +LDFLAGS_vmlinux := -z norelro +ifeq ($(CONFIG_RELOCATABLE),y) + LDFLAGS_vmlinux += -shared -Bsymbolic -z notext --emit-relocs + KBUILD_CFLAGS += -fPIE +endif ifeq ($(CONFIG_DYNAMIC_FTRACE),y) - LDFLAGS_vmlinux := --no-relax + LDFLAGS_vmlinux += --no-relax KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY ifeq ($(CONFIG_RISCV_ISA_C),y) CC_FLAGS_FTRACE := -fpatchable-function-entry=4 @@ -57,10 +61,12 @@ riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c -# Newer binutils versions default to ISA spec version 20191213 which moves some -# instructions from the I extension to the Zicsr and Zifencei extensions. -toolchain-need-zicsr-zifencei := $(call cc-option-yn, -march=$(riscv-march-y)_zicsr_zifencei) -riscv-march-$(toolchain-need-zicsr-zifencei) := $(riscv-march-y)_zicsr_zifencei +ifdef CONFIG_TOOLCHAIN_NEEDS_OLD_ISA_SPEC +KBUILD_CFLAGS += -Wa,-misa-spec=2.2 +KBUILD_AFLAGS += -Wa,-misa-spec=2.2 +else +riscv-march-$(CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI) := $(riscv-march-y)_zicsr_zifencei +endif # Check if the toolchain supports Zihintpause extension riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause @@ -84,6 +90,13 @@ endif # Avoid generating .eh_frame sections. KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -fno-unwind-tables +# The RISC-V attributes frequently cause compatibility issues and provide no +# information, so just turn them off. +KBUILD_CFLAGS += $(call cc-option,-mno-riscv-attribute) +KBUILD_AFLAGS += $(call cc-option,-mno-riscv-attribute) +KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mno-arch-attr) +KBUILD_AFLAGS += $(call as-option,-Wa$(comma)-mno-arch-attr) + KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax) @@ -174,3 +187,7 @@ rv64_randconfig: PHONY += rv32_defconfig rv32_defconfig: $(Q)$(MAKE) -f $(srctree)/Makefile defconfig 32-bit.config + +PHONY += rv32_nommu_virt_defconfig +rv32_nommu_virt_defconfig: + $(Q)$(MAKE) -f $(srctree)/Makefile nommu_virt_defconfig 32-bit.config diff --git a/arch/riscv/Makefile.postlink b/arch/riscv/Makefile.postlink new file mode 100644 index 000000000000..a46fc578b30b --- /dev/null +++ b/arch/riscv/Makefile.postlink @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: GPL-2.0 +# =========================================================================== +# Post-link riscv pass +# =========================================================================== +# +# Check that vmlinux relocations look sane + +PHONY := __archpost +__archpost: + +-include include/config/auto.conf +include $(srctree)/scripts/Kbuild.include + +quiet_cmd_relocs_check = CHKREL $@ +cmd_relocs_check = \ + $(CONFIG_SHELL) $(srctree)/arch/riscv/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" + +ifdef CONFIG_RELOCATABLE +quiet_cmd_cp_vmlinux_relocs = CPREL vmlinux.relocs +cmd_cp_vmlinux_relocs = cp vmlinux vmlinux.relocs + +quiet_cmd_relocs_strip = STRIPREL $@ +cmd_relocs_strip = $(OBJCOPY) --remove-section='.rel.*' \ + --remove-section='.rel__*' \ + --remove-section='.rela.*' \ + --remove-section='.rela__*' $@ +endif + +# `@true` prevents complaint when there is nothing to be done + +vmlinux: FORCE + @true +ifdef CONFIG_RELOCATABLE + $(call if_changed,relocs_check) + $(call if_changed,cp_vmlinux_relocs) + $(call if_changed,relocs_strip) +endif + +%.ko: FORCE + @true + +clean: + @true + +PHONY += FORCE clean + +FORCE: + +.PHONY: $(PHONY) diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index c72de7232abb..22b13947bd13 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -33,7 +33,14 @@ $(obj)/xipImage: vmlinux FORCE endif +ifdef CONFIG_RELOCATABLE +vmlinux.relocs: vmlinux + @ (! [ -f vmlinux.relocs ] && echo "vmlinux.relocs can't be found, please remove vmlinux and try again") || true + +$(obj)/Image: vmlinux.relocs FORCE +else $(obj)/Image: vmlinux FORCE +endif $(call if_changed,objcopy) $(obj)/Image.gz: $(obj)/Image FORCE diff --git a/arch/riscv/boot/dts/allwinner/sun20i-d1-nezha.dts b/arch/riscv/boot/dts/allwinner/sun20i-d1-nezha.dts index a0769185be97..4ed33c1e7c9c 100644 --- a/arch/riscv/boot/dts/allwinner/sun20i-d1-nezha.dts +++ b/arch/riscv/boot/dts/allwinner/sun20i-d1-nezha.dts @@ -1,6 +1,25 @@ // SPDX-License-Identifier: (GPL-2.0+ or MIT) // Copyright (C) 2021-2022 Samuel Holland <samuel@sholland.org> +/* + * gpio line names + * + * The Nezha-D1 has a 40-pin IO header. Some of these pins are routed + * directly to pads on the SoC, others come from an 8-bit pcf857x IO + * expander. Therefore, these line names are specified in two places: + * one set for the pcf857x, and one set for the pio controller. + * + * Lines which are routed to the 40-pin header are named as follows: + * <pin#> [<pin name>] + * where: + * <pin#> is the actual pin number of the 40-pin header + * <pin name> is the name of the pin by function/gpio# + * + * For details regarding pin numbers and names see the schematics (under + * "IO EXPAND"): + * http://dl.linux-sunxi.org/D1/D1_Nezha_development_board_schematic_diagram_20210224.pdf + */ + #include <dt-bindings/gpio/gpio.h> #include <dt-bindings/input/input.h> @@ -90,6 +109,15 @@ gpio-controller; #gpio-cells = <2>; #interrupt-cells = <2>; + gpio-line-names = + "pin13 [gpio8]", + "pin16 [gpio10]", + "pin18 [gpio11]", + "pin26 [gpio17]", + "pin22 [gpio14]", + "pin28 [gpio19]", + "pin37 [gpio23]", + "pin11 [gpio6]"; }; }; @@ -164,3 +192,47 @@ usb1_vbus-supply = <®_vcc>; status = "okay"; }; + +&pio { + gpio-line-names = + /* Port A */ + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + /* Port B */ + "pin5 [gpio2/twi2-sck]", + "pin3 [gpio1/twi2-sda]", + "", + "pin38 [gpio24/i2s2-din]", + "pin40 [gpio25/i2s2-dout]", + "pin12 [gpio7/i2s-clk]", + "pin35 [gpio22/i2s2-lrck]", + "", + "pin8 [gpio4/uart0-txd]", + "pin10 [gpio5/uart0-rxd]", + "", + "", + "pin15 [gpio9]", + "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + /* Port C */ + "", + "pin31 [gpio21]", + "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + /* Port D */ + "", "", "", "", "", "", "", "", + "", "", + "pin24 [gpio16/spi1-ce0]", + "pin23 [gpio15/spi1-clk]", + "pin19 [gpio12/spi1-mosi]", + "pin21 [gpio13/spi1-miso]", + "pin27 [gpio18/spi1-hold]", + "pin29 [gpio20/spi1-wp]", + "", "", "", "", "", "", + "pin7 [gpio3/pwm]"; +}; diff --git a/arch/riscv/boot/dts/allwinner/sunxi-d1s-t113.dtsi b/arch/riscv/boot/dts/allwinner/sunxi-d1s-t113.dtsi index 6fadcee7800f..922e8e0e2c09 100644 --- a/arch/riscv/boot/dts/allwinner/sunxi-d1s-t113.dtsi +++ b/arch/riscv/boot/dts/allwinner/sunxi-d1s-t113.dtsi @@ -211,7 +211,7 @@ clocks = <&ccu CLK_BUS_UART0>; resets = <&ccu RST_BUS_UART0>; dmas = <&dma 14>, <&dma 14>; - dma-names = "rx", "tx"; + dma-names = "tx", "rx"; status = "disabled"; }; @@ -224,7 +224,7 @@ clocks = <&ccu CLK_BUS_UART1>; resets = <&ccu RST_BUS_UART1>; dmas = <&dma 15>, <&dma 15>; - dma-names = "rx", "tx"; + dma-names = "tx", "rx"; status = "disabled"; }; @@ -237,7 +237,7 @@ clocks = <&ccu CLK_BUS_UART2>; resets = <&ccu RST_BUS_UART2>; dmas = <&dma 16>, <&dma 16>; - dma-names = "rx", "tx"; + dma-names = "tx", "rx"; status = "disabled"; }; @@ -250,7 +250,7 @@ clocks = <&ccu CLK_BUS_UART3>; resets = <&ccu RST_BUS_UART3>; dmas = <&dma 17>, <&dma 17>; - dma-names = "rx", "tx"; + dma-names = "tx", "rx"; status = "disabled"; }; @@ -263,7 +263,7 @@ clocks = <&ccu CLK_BUS_UART4>; resets = <&ccu RST_BUS_UART4>; dmas = <&dma 18>, <&dma 18>; - dma-names = "rx", "tx"; + dma-names = "tx", "rx"; status = "disabled"; }; @@ -276,7 +276,7 @@ clocks = <&ccu CLK_BUS_UART5>; resets = <&ccu RST_BUS_UART5>; dmas = <&dma 19>, <&dma 19>; - dma-names = "rx", "tx"; + dma-names = "tx", "rx"; status = "disabled"; }; @@ -367,6 +367,18 @@ #size-cells = <1>; }; + crypto: crypto@3040000 { + compatible = "allwinner,sun20i-d1-crypto"; + reg = <0x3040000 0x800>; + interrupts = <SOC_PERIPHERAL_IRQ(52) IRQ_TYPE_LEVEL_HIGH>; + clocks = <&ccu CLK_BUS_CE>, + <&ccu CLK_CE>, + <&ccu CLK_MBUS_CE>, + <&rtc CLK_IOSC>; + clock-names = "bus", "mod", "ram", "trng"; + resets = <&ccu RST_BUS_CE>; + }; + mbus: dram-controller@3102000 { compatible = "allwinner,sun20i-d1-mbus"; reg = <0x3102000 0x1000>, diff --git a/arch/riscv/boot/dts/canaan/k210.dtsi b/arch/riscv/boot/dts/canaan/k210.dtsi index 07e2e2649604..f87c5164d9cf 100644 --- a/arch/riscv/boot/dts/canaan/k210.dtsi +++ b/arch/riscv/boot/dts/canaan/k210.dtsi @@ -259,7 +259,6 @@ <&sysclk K210_CLK_APB0>; clock-names = "ssi_clk", "pclk"; resets = <&sysrst K210_RST_SPI2>; - spi-max-frequency = <25000000>; }; i2s0: i2s@50250000 { diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi index 0a9bb84af438..104504352e99 100644 --- a/arch/riscv/boot/dts/microchip/mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi @@ -234,6 +234,7 @@ reg = <0x0 0x20002000 0x0 0x1000>, <0x0 0x3E001000 0x0 0x1000>; clocks = <&refclk>; #clock-cells = <1>; + #reset-cells = <1>; }; ccc_se: clock-controller@38010000 { @@ -415,7 +416,7 @@ }; mac0: ethernet@20110000 { - compatible = "cdns,macb"; + compatible = "microchip,mpfs-macb", "cdns,macb"; reg = <0x0 0x20110000 0x0 0x2000>; #address-cells = <1>; #size-cells = <0>; @@ -424,11 +425,12 @@ local-mac-address = [00 00 00 00 00 00]; clocks = <&clkcfg CLK_MAC0>, <&clkcfg CLK_AHB>; clock-names = "pclk", "hclk"; + resets = <&clkcfg CLK_MAC0>; status = "disabled"; }; mac1: ethernet@20112000 { - compatible = "cdns,macb"; + compatible = "microchip,mpfs-macb", "cdns,macb"; reg = <0x0 0x20112000 0x0 0x2000>; #address-cells = <1>; #size-cells = <0>; @@ -437,6 +439,7 @@ local-mac-address = [00 00 00 00 00 00]; clocks = <&clkcfg CLK_MAC1>, <&clkcfg CLK_AHB>; clock-names = "pclk", "hclk"; + resets = <&clkcfg CLK_MAC1>; status = "disabled"; }; @@ -498,7 +501,8 @@ mbox: mailbox@37020000 { compatible = "microchip,mpfs-mailbox"; - reg = <0x0 0x37020000 0x0 0x1000>, <0x0 0x2000318C 0x0 0x40>; + reg = <0x0 0x37020000 0x0 0x58>, <0x0 0x2000318C 0x0 0x40>, + <0x0 0x37020800 0x0 0x100>; interrupt-parent = <&plic>; interrupts = <96>; #mbox-cells = <1>; diff --git a/arch/riscv/boot/dts/starfive/Makefile b/arch/riscv/boot/dts/starfive/Makefile index 7b00a48580ca..170956846d49 100644 --- a/arch/riscv/boot/dts/starfive/Makefile +++ b/arch/riscv/boot/dts/starfive/Makefile @@ -1,2 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -dtb-$(CONFIG_ARCH_STARFIVE) += jh7100-beaglev-starlight.dtb jh7100-starfive-visionfive-v1.dtb +dtb-$(CONFIG_ARCH_STARFIVE) += jh7100-beaglev-starlight.dtb +dtb-$(CONFIG_ARCH_STARFIVE) += jh7100-starfive-visionfive-v1.dtb + +dtb-$(CONFIG_ARCH_STARFIVE) += jh7110-starfive-visionfive-2-v1.2a.dtb +dtb-$(CONFIG_ARCH_STARFIVE) += jh7110-starfive-visionfive-2-v1.3b.dtb diff --git a/arch/riscv/boot/dts/starfive/jh7100.dtsi b/arch/riscv/boot/dts/starfive/jh7100.dtsi index 000447482aca..4218621ea3b9 100644 --- a/arch/riscv/boot/dts/starfive/jh7100.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7100.dtsi @@ -238,5 +238,15 @@ #size-cells = <0>; status = "disabled"; }; + + watchdog@12480000 { + compatible = "starfive,jh7100-wdt"; + reg = <0x0 0x12480000 0x0 0x10000>; + clocks = <&clkgen JH7100_CLK_WDTIMER_APB>, + <&clkgen JH7100_CLK_WDT_CORE>; + clock-names = "apb", "core"; + resets = <&rstgen JH7100_RSTN_WDTIMER_APB>, + <&rstgen JH7100_RSTN_WDT>; + }; }; }; diff --git a/arch/riscv/boot/dts/starfive/jh7110-pinfunc.h b/arch/riscv/boot/dts/starfive/jh7110-pinfunc.h new file mode 100644 index 000000000000..fb0139b56723 --- /dev/null +++ b/arch/riscv/boot/dts/starfive/jh7110-pinfunc.h @@ -0,0 +1,308 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2022 Emil Renner Berthing <kernel@esmil.dk> + * Copyright (C) 2022 StarFive Technology Co., Ltd. + */ + +#ifndef __JH7110_PINFUNC_H__ +#define __JH7110_PINFUNC_H__ + +/* + * mux bits: + * | 31 - 24 | 23 - 16 | 15 - 10 | 9 - 8 | 7 - 0 | + * | din | dout | doen | function | gpio nr | + * + * dout: output signal + * doen: output enable signal + * din: optional input signal, 0xff = none + * function: function selector + * gpio nr: gpio number, 0 - 63 + */ +#define GPIOMUX(n, dout, doen, din) ( \ + (((din) & 0xff) << 24) | \ + (((dout) & 0xff) << 16) | \ + (((doen) & 0x3f) << 10) | \ + ((n) & 0x3f)) + +#define PINMUX(n, func) ((1 << 10) | (((func) & 0x3) << 8) | ((n) & 0xff)) + +/* sys_iomux dout */ +#define GPOUT_LOW 0 +#define GPOUT_HIGH 1 +#define GPOUT_SYS_WAVE511_UART_TX 2 +#define GPOUT_SYS_CAN0_STBY 3 +#define GPOUT_SYS_CAN0_TST_NEXT_BIT 4 +#define GPOUT_SYS_CAN0_TST_SAMPLE_POINT 5 +#define GPOUT_SYS_CAN0_TXD 6 +#define GPOUT_SYS_USB_DRIVE_VBUS 7 +#define GPOUT_SYS_QSPI_CS1 8 +#define GPOUT_SYS_SPDIF 9 +#define GPOUT_SYS_HDMI_CEC_SDA 10 +#define GPOUT_SYS_HDMI_DDC_SCL 11 +#define GPOUT_SYS_HDMI_DDC_SDA 12 +#define GPOUT_SYS_WATCHDOG 13 +#define GPOUT_SYS_I2C0_CLK 14 +#define GPOUT_SYS_I2C0_DATA 15 +#define GPOUT_SYS_SDIO0_BACK_END_POWER 16 +#define GPOUT_SYS_SDIO0_CARD_POWER_EN 17 +#define GPOUT_SYS_SDIO0_CCMD_OD_PULLUP_EN 18 +#define GPOUT_SYS_SDIO0_RST 19 +#define GPOUT_SYS_UART0_TX 20 +#define GPOUT_SYS_HIFI4_JTAG_TDO 21 +#define GPOUT_SYS_JTAG_TDO 22 +#define GPOUT_SYS_PDM_MCLK 23 +#define GPOUT_SYS_PWM_CHANNEL0 24 +#define GPOUT_SYS_PWM_CHANNEL1 25 +#define GPOUT_SYS_PWM_CHANNEL2 26 +#define GPOUT_SYS_PWM_CHANNEL3 27 +#define GPOUT_SYS_PWMDAC_LEFT 28 +#define GPOUT_SYS_PWMDAC_RIGHT 29 +#define GPOUT_SYS_SPI0_CLK 30 +#define GPOUT_SYS_SPI0_FSS 31 +#define GPOUT_SYS_SPI0_TXD 32 +#define GPOUT_SYS_GMAC_PHYCLK 33 +#define GPOUT_SYS_I2SRX_BCLK 34 +#define GPOUT_SYS_I2SRX_LRCK 35 +#define GPOUT_SYS_I2STX0_BCLK 36 +#define GPOUT_SYS_I2STX0_LRCK 37 +#define GPOUT_SYS_MCLK 38 +#define GPOUT_SYS_TDM_CLK 39 +#define GPOUT_SYS_TDM_SYNC 40 +#define GPOUT_SYS_TDM_TXD 41 +#define GPOUT_SYS_TRACE_DATA0 42 +#define GPOUT_SYS_TRACE_DATA1 43 +#define GPOUT_SYS_TRACE_DATA2 44 +#define GPOUT_SYS_TRACE_DATA3 45 +#define GPOUT_SYS_TRACE_REF 46 +#define GPOUT_SYS_CAN1_STBY 47 +#define GPOUT_SYS_CAN1_TST_NEXT_BIT 48 +#define GPOUT_SYS_CAN1_TST_SAMPLE_POINT 49 +#define GPOUT_SYS_CAN1_TXD 50 +#define GPOUT_SYS_I2C1_CLK 51 +#define GPOUT_SYS_I2C1_DATA 52 +#define GPOUT_SYS_SDIO1_BACK_END_POWER 53 +#define GPOUT_SYS_SDIO1_CARD_POWER_EN 54 +#define GPOUT_SYS_SDIO1_CLK 55 +#define GPOUT_SYS_SDIO1_CMD_OD_PULLUP_EN 56 +#define GPOUT_SYS_SDIO1_CMD 57 +#define GPOUT_SYS_SDIO1_DATA0 58 +#define GPOUT_SYS_SDIO1_DATA1 59 +#define GPOUT_SYS_SDIO1_DATA2 60 +#define GPOUT_SYS_SDIO1_DATA3 61 +#define GPOUT_SYS_SDIO1_DATA4 63 +#define GPOUT_SYS_SDIO1_DATA5 63 +#define GPOUT_SYS_SDIO1_DATA6 64 +#define GPOUT_SYS_SDIO1_DATA7 65 +#define GPOUT_SYS_SDIO1_RST 66 +#define GPOUT_SYS_UART1_RTS 67 +#define GPOUT_SYS_UART1_TX 68 +#define GPOUT_SYS_I2STX1_SDO0 69 +#define GPOUT_SYS_I2STX1_SDO1 70 +#define GPOUT_SYS_I2STX1_SDO2 71 +#define GPOUT_SYS_I2STX1_SDO3 72 +#define GPOUT_SYS_SPI1_CLK 73 +#define GPOUT_SYS_SPI1_FSS 74 +#define GPOUT_SYS_SPI1_TXD 75 +#define GPOUT_SYS_I2C2_CLK 76 +#define GPOUT_SYS_I2C2_DATA 77 +#define GPOUT_SYS_UART2_RTS 78 +#define GPOUT_SYS_UART2_TX 79 +#define GPOUT_SYS_SPI2_CLK 80 +#define GPOUT_SYS_SPI2_FSS 81 +#define GPOUT_SYS_SPI2_TXD 82 +#define GPOUT_SYS_I2C3_CLK 83 +#define GPOUT_SYS_I2C3_DATA 84 +#define GPOUT_SYS_UART3_TX 85 +#define GPOUT_SYS_SPI3_CLK 86 +#define GPOUT_SYS_SPI3_FSS 87 +#define GPOUT_SYS_SPI3_TXD 88 +#define GPOUT_SYS_I2C4_CLK 89 +#define GPOUT_SYS_I2C4_DATA 90 +#define GPOUT_SYS_UART4_RTS 91 +#define GPOUT_SYS_UART4_TX 92 +#define GPOUT_SYS_SPI4_CLK 93 +#define GPOUT_SYS_SPI4_FSS 94 +#define GPOUT_SYS_SPI4_TXD 95 +#define GPOUT_SYS_I2C5_CLK 96 +#define GPOUT_SYS_I2C5_DATA 97 +#define GPOUT_SYS_UART5_RTS 98 +#define GPOUT_SYS_UART5_TX 99 +#define GPOUT_SYS_SPI5_CLK 100 +#define GPOUT_SYS_SPI5_FSS 101 +#define GPOUT_SYS_SPI5_TXD 102 +#define GPOUT_SYS_I2C6_CLK 103 +#define GPOUT_SYS_I2C6_DATA 104 +#define GPOUT_SYS_SPI6_CLK 105 +#define GPOUT_SYS_SPI6_FSS 106 +#define GPOUT_SYS_SPI6_TXD 107 + +/* aon_iomux dout */ +#define GPOUT_AON_CLK_32K_OUT 2 +#define GPOUT_AON_PTC0_PWM4 3 +#define GPOUT_AON_PTC0_PWM5 4 +#define GPOUT_AON_PTC0_PWM6 5 +#define GPOUT_AON_PTC0_PWM7 6 +#define GPOUT_AON_CLK_GCLK0 7 +#define GPOUT_AON_CLK_GCLK1 8 +#define GPOUT_AON_CLK_GCLK2 9 + +/* sys_iomux doen */ +#define GPOEN_ENABLE 0 +#define GPOEN_DISABLE 1 +#define GPOEN_SYS_HDMI_CEC_SDA 2 +#define GPOEN_SYS_HDMI_DDC_SCL 3 +#define GPOEN_SYS_HDMI_DDC_SDA 4 +#define GPOEN_SYS_I2C0_CLK 5 +#define GPOEN_SYS_I2C0_DATA 6 +#define GPOEN_SYS_HIFI4_JTAG_TDO 7 +#define GPOEN_SYS_JTAG_TDO 8 +#define GPOEN_SYS_PWM0_CHANNEL0 9 +#define GPOEN_SYS_PWM0_CHANNEL1 10 +#define GPOEN_SYS_PWM0_CHANNEL2 11 +#define GPOEN_SYS_PWM0_CHANNEL3 12 +#define GPOEN_SYS_SPI0_NSSPCTL 13 +#define GPOEN_SYS_SPI0_NSSP 14 +#define GPOEN_SYS_TDM_SYNC 15 +#define GPOEN_SYS_TDM_TXD 16 +#define GPOEN_SYS_I2C1_CLK 17 +#define GPOEN_SYS_I2C1_DATA 18 +#define GPOEN_SYS_SDIO1_CMD 19 +#define GPOEN_SYS_SDIO1_DATA0 20 +#define GPOEN_SYS_SDIO1_DATA1 21 +#define GPOEN_SYS_SDIO1_DATA2 22 +#define GPOEN_SYS_SDIO1_DATA3 23 +#define GPOEN_SYS_SDIO1_DATA4 24 +#define GPOEN_SYS_SDIO1_DATA5 25 +#define GPOEN_SYS_SDIO1_DATA6 26 +#define GPOEN_SYS_SDIO1_DATA7 27 +#define GPOEN_SYS_SPI1_NSSPCTL 28 +#define GPOEN_SYS_SPI1_NSSP 29 +#define GPOEN_SYS_I2C2_CLK 30 +#define GPOEN_SYS_I2C2_DATA 31 +#define GPOEN_SYS_SPI2_NSSPCTL 32 +#define GPOEN_SYS_SPI2_NSSP 33 +#define GPOEN_SYS_I2C3_CLK 34 +#define GPOEN_SYS_I2C3_DATA 35 +#define GPOEN_SYS_SPI3_NSSPCTL 36 +#define GPOEN_SYS_SPI3_NSSP 37 +#define GPOEN_SYS_I2C4_CLK 38 +#define GPOEN_SYS_I2C4_DATA 39 +#define GPOEN_SYS_SPI4_NSSPCTL 40 +#define GPOEN_SYS_SPI4_NSSP 41 +#define GPOEN_SYS_I2C5_CLK 42 +#define GPOEN_SYS_I2C5_DATA 43 +#define GPOEN_SYS_SPI5_NSSPCTL 44 +#define GPOEN_SYS_SPI5_NSSP 45 +#define GPOEN_SYS_I2C6_CLK 46 +#define GPOEN_SYS_I2C6_DATA 47 +#define GPOEN_SYS_SPI6_NSSPCTL 48 +#define GPOEN_SYS_SPI6_NSSP 49 + +/* aon_iomux doen */ +#define GPOEN_AON_PTC0_OE_N_4 2 +#define GPOEN_AON_PTC0_OE_N_5 3 +#define GPOEN_AON_PTC0_OE_N_6 4 +#define GPOEN_AON_PTC0_OE_N_7 5 + +/* sys_iomux gin */ +#define GPI_NONE 255 + +#define GPI_SYS_WAVE511_UART_RX 0 +#define GPI_SYS_CAN0_RXD 1 +#define GPI_SYS_USB_OVERCURRENT 2 +#define GPI_SYS_SPDIF 3 +#define GPI_SYS_JTAG_RST 4 +#define GPI_SYS_HDMI_CEC_SDA 5 +#define GPI_SYS_HDMI_DDC_SCL 6 +#define GPI_SYS_HDMI_DDC_SDA 7 +#define GPI_SYS_HDMI_HPD 8 +#define GPI_SYS_I2C0_CLK 9 +#define GPI_SYS_I2C0_DATA 10 +#define GPI_SYS_SDIO0_CD 11 +#define GPI_SYS_SDIO0_INT 12 +#define GPI_SYS_SDIO0_WP 13 +#define GPI_SYS_UART0_RX 14 +#define GPI_SYS_HIFI4_JTAG_TCK 15 +#define GPI_SYS_HIFI4_JTAG_TDI 16 +#define GPI_SYS_HIFI4_JTAG_TMS 17 +#define GPI_SYS_HIFI4_JTAG_RST 18 +#define GPI_SYS_JTAG_TDI 19 +#define GPI_SYS_JTAG_TMS 20 +#define GPI_SYS_PDM_DMIC0 21 +#define GPI_SYS_PDM_DMIC1 22 +#define GPI_SYS_I2SRX_SDIN0 23 +#define GPI_SYS_I2SRX_SDIN1 24 +#define GPI_SYS_I2SRX_SDIN2 25 +#define GPI_SYS_SPI0_CLK 26 +#define GPI_SYS_SPI0_FSS 27 +#define GPI_SYS_SPI0_RXD 28 +#define GPI_SYS_JTAG_TCK 29 +#define GPI_SYS_MCLK_EXT 30 +#define GPI_SYS_I2SRX_BCLK 31 +#define GPI_SYS_I2SRX_LRCK 32 +#define GPI_SYS_I2STX0_BCLK 33 +#define GPI_SYS_I2STX0_LRCK 34 +#define GPI_SYS_TDM_CLK 35 +#define GPI_SYS_TDM_RXD 36 +#define GPI_SYS_TDM_SYNC 37 +#define GPI_SYS_CAN1_RXD 38 +#define GPI_SYS_I2C1_CLK 39 +#define GPI_SYS_I2C1_DATA 40 +#define GPI_SYS_SDIO1_CD 41 +#define GPI_SYS_SDIO1_INT 42 +#define GPI_SYS_SDIO1_WP 43 +#define GPI_SYS_SDIO1_CMD 44 +#define GPI_SYS_SDIO1_DATA0 45 +#define GPI_SYS_SDIO1_DATA1 46 +#define GPI_SYS_SDIO1_DATA2 47 +#define GPI_SYS_SDIO1_DATA3 48 +#define GPI_SYS_SDIO1_DATA4 49 +#define GPI_SYS_SDIO1_DATA5 50 +#define GPI_SYS_SDIO1_DATA6 51 +#define GPI_SYS_SDIO1_DATA7 52 +#define GPI_SYS_SDIO1_STRB 53 +#define GPI_SYS_UART1_CTS 54 +#define GPI_SYS_UART1_RX 55 +#define GPI_SYS_SPI1_CLK 56 +#define GPI_SYS_SPI1_FSS 57 +#define GPI_SYS_SPI1_RXD 58 +#define GPI_SYS_I2C2_CLK 59 +#define GPI_SYS_I2C2_DATA 60 +#define GPI_SYS_UART2_CTS 61 +#define GPI_SYS_UART2_RX 62 +#define GPI_SYS_SPI2_CLK 63 +#define GPI_SYS_SPI2_FSS 64 +#define GPI_SYS_SPI2_RXD 65 +#define GPI_SYS_I2C3_CLK 66 +#define GPI_SYS_I2C3_DATA 67 +#define GPI_SYS_UART3_RX 68 +#define GPI_SYS_SPI3_CLK 69 +#define GPI_SYS_SPI3_FSS 70 +#define GPI_SYS_SPI3_RXD 71 +#define GPI_SYS_I2C4_CLK 72 +#define GPI_SYS_I2C4_DATA 73 +#define GPI_SYS_UART4_CTS 74 +#define GPI_SYS_UART4_RX 75 +#define GPI_SYS_SPI4_CLK 76 +#define GPI_SYS_SPI4_FSS 77 +#define GPI_SYS_SPI4_RXD 78 +#define GPI_SYS_I2C5_CLK 79 +#define GPI_SYS_I2C5_DATA 80 +#define GPI_SYS_UART5_CTS 81 +#define GPI_SYS_UART5_RX 82 +#define GPI_SYS_SPI5_CLK 83 +#define GPI_SYS_SPI5_FSS 84 +#define GPI_SYS_SPI5_RXD 85 +#define GPI_SYS_I2C6_CLK 86 +#define GPI_SYS_I2C6_DATA 87 +#define GPI_SYS_SPI6_CLK 88 +#define GPI_SYS_SPI6_FSS 89 +#define GPI_SYS_SPI6_RXD 90 + +/* aon_iomux gin */ +#define GPI_AON_PMU_GPIO_WAKEUP_0 0 +#define GPI_AON_PMU_GPIO_WAKEUP_1 1 +#define GPI_AON_PMU_GPIO_WAKEUP_2 2 +#define GPI_AON_PMU_GPIO_WAKEUP_3 3 + +#endif diff --git a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2-v1.2a.dts b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2-v1.2a.dts new file mode 100644 index 000000000000..4af3300f3cf3 --- /dev/null +++ b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2-v1.2a.dts @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2022 StarFive Technology Co., Ltd. + * Copyright (C) 2022 Emil Renner Berthing <kernel@esmil.dk> + */ + +/dts-v1/; +#include "jh7110-starfive-visionfive-2.dtsi" + +/ { + model = "StarFive VisionFive 2 v1.2A"; + compatible = "starfive,visionfive-2-v1.2a", "starfive,jh7110"; +}; diff --git a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2-v1.3b.dts b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2-v1.3b.dts new file mode 100644 index 000000000000..9230cc3d8946 --- /dev/null +++ b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2-v1.3b.dts @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2022 StarFive Technology Co., Ltd. + * Copyright (C) 2022 Emil Renner Berthing <kernel@esmil.dk> + */ + +/dts-v1/; +#include "jh7110-starfive-visionfive-2.dtsi" + +/ { + model = "StarFive VisionFive 2 v1.3B"; + compatible = "starfive,visionfive-2-v1.3b", "starfive,jh7110"; +}; diff --git a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi new file mode 100644 index 000000000000..2a6d81609284 --- /dev/null +++ b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2022 StarFive Technology Co., Ltd. + * Copyright (C) 2022 Emil Renner Berthing <kernel@esmil.dk> + */ + +/dts-v1/; +#include "jh7110.dtsi" +#include "jh7110-pinfunc.h" +#include <dt-bindings/gpio/gpio.h> + +/ { + aliases { + i2c0 = &i2c0; + i2c2 = &i2c2; + i2c5 = &i2c5; + i2c6 = &i2c6; + serial0 = &uart0; + }; + + chosen { + stdout-path = "serial0:115200n8"; + }; + + cpus { + timebase-frequency = <4000000>; + }; + + memory@40000000 { + device_type = "memory"; + reg = <0x0 0x40000000 0x1 0x0>; + }; + + gpio-restart { + compatible = "gpio-restart"; + gpios = <&sysgpio 35 GPIO_ACTIVE_HIGH>; + priority = <224>; + }; +}; + +&gmac0_rgmii_rxin { + clock-frequency = <125000000>; +}; + +&gmac0_rmii_refin { + clock-frequency = <50000000>; +}; + +&gmac1_rgmii_rxin { + clock-frequency = <125000000>; +}; + +&gmac1_rmii_refin { + clock-frequency = <50000000>; +}; + +&i2srx_bclk_ext { + clock-frequency = <12288000>; +}; + +&i2srx_lrck_ext { + clock-frequency = <192000>; +}; + +&i2stx_bclk_ext { + clock-frequency = <12288000>; +}; + +&i2stx_lrck_ext { + clock-frequency = <192000>; +}; + +&mclk_ext { + clock-frequency = <12288000>; +}; + +&osc { + clock-frequency = <24000000>; +}; + +&rtc_osc { + clock-frequency = <32768>; +}; + +&tdm_ext { + clock-frequency = <49152000>; +}; + +&i2c0 { + clock-frequency = <100000>; + i2c-sda-hold-time-ns = <300>; + i2c-sda-falling-time-ns = <510>; + i2c-scl-falling-time-ns = <510>; + pinctrl-names = "default"; + pinctrl-0 = <&i2c0_pins>; + status = "okay"; +}; + +&i2c2 { + clock-frequency = <100000>; + i2c-sda-hold-time-ns = <300>; + i2c-sda-falling-time-ns = <510>; + i2c-scl-falling-time-ns = <510>; + pinctrl-names = "default"; + pinctrl-0 = <&i2c2_pins>; + status = "okay"; +}; + +&i2c5 { + clock-frequency = <100000>; + i2c-sda-hold-time-ns = <300>; + i2c-sda-falling-time-ns = <510>; + i2c-scl-falling-time-ns = <510>; + pinctrl-names = "default"; + pinctrl-0 = <&i2c5_pins>; + status = "okay"; +}; + +&i2c6 { + clock-frequency = <100000>; + i2c-sda-hold-time-ns = <300>; + i2c-sda-falling-time-ns = <510>; + i2c-scl-falling-time-ns = <510>; + pinctrl-names = "default"; + pinctrl-0 = <&i2c6_pins>; + status = "okay"; +}; + +&sysgpio { + i2c0_pins: i2c0-0 { + i2c-pins { + pinmux = <GPIOMUX(57, GPOUT_LOW, + GPOEN_SYS_I2C0_CLK, + GPI_SYS_I2C0_CLK)>, + <GPIOMUX(58, GPOUT_LOW, + GPOEN_SYS_I2C0_DATA, + GPI_SYS_I2C0_DATA)>; + bias-disable; /* external pull-up */ + input-enable; + input-schmitt-enable; + }; + }; + + i2c2_pins: i2c2-0 { + i2c-pins { + pinmux = <GPIOMUX(3, GPOUT_LOW, + GPOEN_SYS_I2C2_CLK, + GPI_SYS_I2C2_CLK)>, + <GPIOMUX(2, GPOUT_LOW, + GPOEN_SYS_I2C2_DATA, + GPI_SYS_I2C2_DATA)>; + bias-disable; /* external pull-up */ + input-enable; + input-schmitt-enable; + }; + }; + + i2c5_pins: i2c5-0 { + i2c-pins { + pinmux = <GPIOMUX(19, GPOUT_LOW, + GPOEN_SYS_I2C5_CLK, + GPI_SYS_I2C5_CLK)>, + <GPIOMUX(20, GPOUT_LOW, + GPOEN_SYS_I2C5_DATA, + GPI_SYS_I2C5_DATA)>; + bias-disable; /* external pull-up */ + input-enable; + input-schmitt-enable; + }; + }; + + i2c6_pins: i2c6-0 { + i2c-pins { + pinmux = <GPIOMUX(16, GPOUT_LOW, + GPOEN_SYS_I2C6_CLK, + GPI_SYS_I2C6_CLK)>, + <GPIOMUX(17, GPOUT_LOW, + GPOEN_SYS_I2C6_DATA, + GPI_SYS_I2C6_DATA)>; + bias-disable; /* external pull-up */ + input-enable; + input-schmitt-enable; + }; + }; + + uart0_pins: uart0-0 { + tx-pins { + pinmux = <GPIOMUX(5, GPOUT_SYS_UART0_TX, + GPOEN_ENABLE, + GPI_NONE)>; + bias-disable; + drive-strength = <12>; + input-disable; + input-schmitt-disable; + slew-rate = <0>; + }; + + rx-pins { + pinmux = <GPIOMUX(6, GPOUT_LOW, + GPOEN_DISABLE, + GPI_SYS_UART0_RX)>; + bias-disable; /* external pull-up */ + drive-strength = <2>; + input-enable; + input-schmitt-enable; + slew-rate = <0>; + }; + }; +}; + +&uart0 { + pinctrl-names = "default"; + pinctrl-0 = <&uart0_pins>; + status = "okay"; +}; diff --git a/arch/riscv/boot/dts/starfive/jh7110.dtsi b/arch/riscv/boot/dts/starfive/jh7110.dtsi new file mode 100644 index 000000000000..03c6cc49fa22 --- /dev/null +++ b/arch/riscv/boot/dts/starfive/jh7110.dtsi @@ -0,0 +1,517 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2022 StarFive Technology Co., Ltd. + * Copyright (C) 2022 Emil Renner Berthing <kernel@esmil.dk> + */ + +/dts-v1/; +#include <dt-bindings/clock/starfive,jh7110-crg.h> +#include <dt-bindings/reset/starfive,jh7110-crg.h> + +/ { + compatible = "starfive,jh7110"; + #address-cells = <2>; + #size-cells = <2>; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + S7_0: cpu@0 { + compatible = "sifive,s7", "riscv"; + reg = <0>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <16384>; + next-level-cache = <&ccache>; + riscv,isa = "rv64imac_zba_zbb"; + status = "disabled"; + + cpu0_intc: interrupt-controller { + compatible = "riscv,cpu-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + }; + + U74_1: cpu@1 { + compatible = "sifive,u74-mc", "riscv"; + reg = <1>; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <40>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <40>; + mmu-type = "riscv,sv39"; + next-level-cache = <&ccache>; + riscv,isa = "rv64imafdc_zba_zbb"; + tlb-split; + + cpu1_intc: interrupt-controller { + compatible = "riscv,cpu-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + }; + + U74_2: cpu@2 { + compatible = "sifive,u74-mc", "riscv"; + reg = <2>; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <40>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <40>; + mmu-type = "riscv,sv39"; + next-level-cache = <&ccache>; + riscv,isa = "rv64imafdc_zba_zbb"; + tlb-split; + + cpu2_intc: interrupt-controller { + compatible = "riscv,cpu-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + }; + + U74_3: cpu@3 { + compatible = "sifive,u74-mc", "riscv"; + reg = <3>; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <40>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <40>; + mmu-type = "riscv,sv39"; + next-level-cache = <&ccache>; + riscv,isa = "rv64imafdc_zba_zbb"; + tlb-split; + + cpu3_intc: interrupt-controller { + compatible = "riscv,cpu-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + }; + + U74_4: cpu@4 { + compatible = "sifive,u74-mc", "riscv"; + reg = <4>; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <40>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <40>; + mmu-type = "riscv,sv39"; + next-level-cache = <&ccache>; + riscv,isa = "rv64imafdc_zba_zbb"; + tlb-split; + + cpu4_intc: interrupt-controller { + compatible = "riscv,cpu-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + }; + + cpu-map { + cluster0 { + core0 { + cpu = <&S7_0>; + }; + + core1 { + cpu = <&U74_1>; + }; + + core2 { + cpu = <&U74_2>; + }; + + core3 { + cpu = <&U74_3>; + }; + + core4 { + cpu = <&U74_4>; + }; + }; + }; + }; + + gmac0_rgmii_rxin: gmac0-rgmii-rxin-clock { + compatible = "fixed-clock"; + clock-output-names = "gmac0_rgmii_rxin"; + #clock-cells = <0>; + }; + + gmac0_rmii_refin: gmac0-rmii-refin-clock { + compatible = "fixed-clock"; + clock-output-names = "gmac0_rmii_refin"; + #clock-cells = <0>; + }; + + gmac1_rgmii_rxin: gmac1-rgmii-rxin-clock { + compatible = "fixed-clock"; + clock-output-names = "gmac1_rgmii_rxin"; + #clock-cells = <0>; + }; + + gmac1_rmii_refin: gmac1-rmii-refin-clock { + compatible = "fixed-clock"; + clock-output-names = "gmac1_rmii_refin"; + #clock-cells = <0>; + }; + + i2srx_bclk_ext: i2srx-bclk-ext-clock { + compatible = "fixed-clock"; + clock-output-names = "i2srx_bclk_ext"; + #clock-cells = <0>; + }; + + i2srx_lrck_ext: i2srx-lrck-ext-clock { + compatible = "fixed-clock"; + clock-output-names = "i2srx_lrck_ext"; + #clock-cells = <0>; + }; + + i2stx_bclk_ext: i2stx-bclk-ext-clock { + compatible = "fixed-clock"; + clock-output-names = "i2stx_bclk_ext"; + #clock-cells = <0>; + }; + + i2stx_lrck_ext: i2stx-lrck-ext-clock { + compatible = "fixed-clock"; + clock-output-names = "i2stx_lrck_ext"; + #clock-cells = <0>; + }; + + mclk_ext: mclk-ext-clock { + compatible = "fixed-clock"; + clock-output-names = "mclk_ext"; + #clock-cells = <0>; + }; + + osc: oscillator { + compatible = "fixed-clock"; + clock-output-names = "osc"; + #clock-cells = <0>; + }; + + rtc_osc: rtc-oscillator { + compatible = "fixed-clock"; + clock-output-names = "rtc_osc"; + #clock-cells = <0>; + }; + + tdm_ext: tdm-ext-clock { + compatible = "fixed-clock"; + clock-output-names = "tdm_ext"; + #clock-cells = <0>; + }; + + soc { + compatible = "simple-bus"; + interrupt-parent = <&plic>; + #address-cells = <2>; + #size-cells = <2>; + ranges; + + clint: timer@2000000 { + compatible = "starfive,jh7110-clint", "sifive,clint0"; + reg = <0x0 0x2000000 0x0 0x10000>; + interrupts-extended = <&cpu0_intc 3>, <&cpu0_intc 7>, + <&cpu1_intc 3>, <&cpu1_intc 7>, + <&cpu2_intc 3>, <&cpu2_intc 7>, + <&cpu3_intc 3>, <&cpu3_intc 7>, + <&cpu4_intc 3>, <&cpu4_intc 7>; + }; + + ccache: cache-controller@2010000 { + compatible = "starfive,jh7110-ccache", "sifive,ccache0", "cache"; + reg = <0x0 0x2010000 0x0 0x4000>; + interrupts = <1>, <3>, <4>, <2>; + cache-block-size = <64>; + cache-level = <2>; + cache-sets = <2048>; + cache-size = <2097152>; + cache-unified; + }; + + plic: interrupt-controller@c000000 { + compatible = "starfive,jh7110-plic", "sifive,plic-1.0.0"; + reg = <0x0 0xc000000 0x0 0x4000000>; + interrupts-extended = <&cpu0_intc 11>, + <&cpu1_intc 11>, <&cpu1_intc 9>, + <&cpu2_intc 11>, <&cpu2_intc 9>, + <&cpu3_intc 11>, <&cpu3_intc 9>, + <&cpu4_intc 11>, <&cpu4_intc 9>; + interrupt-controller; + #interrupt-cells = <1>; + #address-cells = <0>; + riscv,ndev = <136>; + }; + + uart0: serial@10000000 { + compatible = "snps,dw-apb-uart"; + reg = <0x0 0x10000000 0x0 0x10000>; + clocks = <&syscrg JH7110_SYSCLK_UART0_CORE>, + <&syscrg JH7110_SYSCLK_UART0_APB>; + clock-names = "baudclk", "apb_pclk"; + resets = <&syscrg JH7110_SYSRST_UART0_APB>; + interrupts = <32>; + reg-io-width = <4>; + reg-shift = <2>; + status = "disabled"; + }; + + uart1: serial@10010000 { + compatible = "snps,dw-apb-uart"; + reg = <0x0 0x10010000 0x0 0x10000>; + clocks = <&syscrg JH7110_SYSCLK_UART1_CORE>, + <&syscrg JH7110_SYSCLK_UART1_APB>; + clock-names = "baudclk", "apb_pclk"; + resets = <&syscrg JH7110_SYSRST_UART1_APB>; + interrupts = <33>; + reg-io-width = <4>; + reg-shift = <2>; + status = "disabled"; + }; + + uart2: serial@10020000 { + compatible = "snps,dw-apb-uart"; + reg = <0x0 0x10020000 0x0 0x10000>; + clocks = <&syscrg JH7110_SYSCLK_UART2_CORE>, + <&syscrg JH7110_SYSCLK_UART2_APB>; + clock-names = "baudclk", "apb_pclk"; + resets = <&syscrg JH7110_SYSRST_UART2_APB>; + interrupts = <34>; + reg-io-width = <4>; + reg-shift = <2>; + status = "disabled"; + }; + + i2c0: i2c@10030000 { + compatible = "snps,designware-i2c"; + reg = <0x0 0x10030000 0x0 0x10000>; + clocks = <&syscrg JH7110_SYSCLK_I2C0_APB>; + clock-names = "ref"; + resets = <&syscrg JH7110_SYSRST_I2C0_APB>; + interrupts = <35>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + + i2c1: i2c@10040000 { + compatible = "snps,designware-i2c"; + reg = <0x0 0x10040000 0x0 0x10000>; + clocks = <&syscrg JH7110_SYSCLK_I2C1_APB>; + clock-names = "ref"; + resets = <&syscrg JH7110_SYSRST_I2C1_APB>; + interrupts = <36>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + + i2c2: i2c@10050000 { + compatible = "snps,designware-i2c"; + reg = <0x0 0x10050000 0x0 0x10000>; + clocks = <&syscrg JH7110_SYSCLK_I2C2_APB>; + clock-names = "ref"; + resets = <&syscrg JH7110_SYSRST_I2C2_APB>; + interrupts = <37>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + + uart3: serial@12000000 { + compatible = "snps,dw-apb-uart"; + reg = <0x0 0x12000000 0x0 0x10000>; + clocks = <&syscrg JH7110_SYSCLK_UART3_CORE>, + <&syscrg JH7110_SYSCLK_UART3_APB>; + clock-names = "baudclk", "apb_pclk"; + resets = <&syscrg JH7110_SYSRST_UART3_APB>; + interrupts = <45>; + reg-io-width = <4>; + reg-shift = <2>; + status = "disabled"; + }; + + uart4: serial@12010000 { + compatible = "snps,dw-apb-uart"; + reg = <0x0 0x12010000 0x0 0x10000>; + clocks = <&syscrg JH7110_SYSCLK_UART4_CORE>, + <&syscrg JH7110_SYSCLK_UART4_APB>; + clock-names = "baudclk", "apb_pclk"; + resets = <&syscrg JH7110_SYSRST_UART4_APB>; + interrupts = <46>; + reg-io-width = <4>; + reg-shift = <2>; + status = "disabled"; + }; + + uart5: serial@12020000 { + compatible = "snps,dw-apb-uart"; + reg = <0x0 0x12020000 0x0 0x10000>; + clocks = <&syscrg JH7110_SYSCLK_UART5_CORE>, + <&syscrg JH7110_SYSCLK_UART5_APB>; + clock-names = "baudclk", "apb_pclk"; + resets = <&syscrg JH7110_SYSRST_UART5_APB>; + interrupts = <47>; + reg-io-width = <4>; + reg-shift = <2>; + status = "disabled"; + }; + + i2c3: i2c@12030000 { + compatible = "snps,designware-i2c"; + reg = <0x0 0x12030000 0x0 0x10000>; + clocks = <&syscrg JH7110_SYSCLK_I2C3_APB>; + clock-names = "ref"; + resets = <&syscrg JH7110_SYSRST_I2C3_APB>; + interrupts = <48>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + + i2c4: i2c@12040000 { + compatible = "snps,designware-i2c"; + reg = <0x0 0x12040000 0x0 0x10000>; + clocks = <&syscrg JH7110_SYSCLK_I2C4_APB>; + clock-names = "ref"; + resets = <&syscrg JH7110_SYSRST_I2C4_APB>; + interrupts = <49>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + + i2c5: i2c@12050000 { + compatible = "snps,designware-i2c"; + reg = <0x0 0x12050000 0x0 0x10000>; + clocks = <&syscrg JH7110_SYSCLK_I2C5_APB>; + clock-names = "ref"; + resets = <&syscrg JH7110_SYSRST_I2C5_APB>; + interrupts = <50>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + + i2c6: i2c@12060000 { + compatible = "snps,designware-i2c"; + reg = <0x0 0x12060000 0x0 0x10000>; + clocks = <&syscrg JH7110_SYSCLK_I2C6_APB>; + clock-names = "ref"; + resets = <&syscrg JH7110_SYSRST_I2C6_APB>; + interrupts = <51>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + + syscrg: clock-controller@13020000 { + compatible = "starfive,jh7110-syscrg"; + reg = <0x0 0x13020000 0x0 0x10000>; + clocks = <&osc>, <&gmac1_rmii_refin>, + <&gmac1_rgmii_rxin>, + <&i2stx_bclk_ext>, <&i2stx_lrck_ext>, + <&i2srx_bclk_ext>, <&i2srx_lrck_ext>, + <&tdm_ext>, <&mclk_ext>; + clock-names = "osc", "gmac1_rmii_refin", + "gmac1_rgmii_rxin", + "i2stx_bclk_ext", "i2stx_lrck_ext", + "i2srx_bclk_ext", "i2srx_lrck_ext", + "tdm_ext", "mclk_ext"; + #clock-cells = <1>; + #reset-cells = <1>; + }; + + sysgpio: pinctrl@13040000 { + compatible = "starfive,jh7110-sys-pinctrl"; + reg = <0x0 0x13040000 0x0 0x10000>; + clocks = <&syscrg JH7110_SYSCLK_IOMUX_APB>; + resets = <&syscrg JH7110_SYSRST_IOMUX_APB>; + interrupts = <86>; + interrupt-controller; + #interrupt-cells = <2>; + gpio-controller; + #gpio-cells = <2>; + }; + + watchdog@13070000 { + compatible = "starfive,jh7110-wdt"; + reg = <0x0 0x13070000 0x0 0x10000>; + clocks = <&syscrg JH7110_SYSCLK_WDT_APB>, + <&syscrg JH7110_SYSCLK_WDT_CORE>; + clock-names = "apb", "core"; + resets = <&syscrg JH7110_SYSRST_WDT_APB>, + <&syscrg JH7110_SYSRST_WDT_CORE>; + }; + + aoncrg: clock-controller@17000000 { + compatible = "starfive,jh7110-aoncrg"; + reg = <0x0 0x17000000 0x0 0x10000>; + clocks = <&osc>, <&gmac0_rmii_refin>, + <&gmac0_rgmii_rxin>, + <&syscrg JH7110_SYSCLK_STG_AXIAHB>, + <&syscrg JH7110_SYSCLK_APB_BUS>, + <&syscrg JH7110_SYSCLK_GMAC0_GTXCLK>, + <&rtc_osc>; + clock-names = "osc", "gmac0_rmii_refin", + "gmac0_rgmii_rxin", "stg_axiahb", + "apb_bus", "gmac0_gtxclk", + "rtc_osc"; + #clock-cells = <1>; + #reset-cells = <1>; + }; + + aongpio: pinctrl@17020000 { + compatible = "starfive,jh7110-aon-pinctrl"; + reg = <0x0 0x17020000 0x0 0x10000>; + resets = <&aoncrg JH7110_AONRST_IOMUX>; + interrupts = <85>; + interrupt-controller; + #interrupt-cells = <2>; + gpio-controller; + #gpio-cells = <2>; + }; + + pwrc: power-controller@17030000 { + compatible = "starfive,jh7110-pmu"; + reg = <0x0 0x17030000 0x0 0x10000>; + interrupts = <111>; + #power-domain-cells = <1>; + }; + }; +}; diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig index 79b3ccd58ff0..e36fffd6fb18 100644 --- a/arch/riscv/configs/nommu_k210_defconfig +++ b/arch/riscv/configs/nommu_k210_defconfig @@ -1,6 +1,5 @@ # CONFIG_CPU_ISOLATION is not set CONFIG_LOG_BUF_SHIFT=13 -CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12 CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_GZIP is not set # CONFIG_RD_BZIP2 is not set diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig index 6b80bb13b8ed..c1ad85f0a4f7 100644 --- a/arch/riscv/configs/nommu_k210_sdcard_defconfig +++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig @@ -1,6 +1,5 @@ # CONFIG_CPU_ISOLATION is not set CONFIG_LOG_BUF_SHIFT=13 -CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12 CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_SYSFS_SYSCALL is not set # CONFIG_FHANDLE is not set diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig index 4cf0f297091e..b794e2f8144e 100644 --- a/arch/riscv/configs/nommu_virt_defconfig +++ b/arch/riscv/configs/nommu_virt_defconfig @@ -1,6 +1,5 @@ # CONFIG_CPU_ISOLATION is not set CONFIG_LOG_BUF_SHIFT=16 -CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12 CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c index da55cb247e89..3d9a32d791f7 100644 --- a/arch/riscv/errata/sifive/errata.c +++ b/arch/riscv/errata/sifive/errata.c @@ -14,7 +14,7 @@ #include <asm/errata_list.h> struct errata_info_t { - char name[ERRATA_STRING_LENGTH_MAX]; + char name[32]; bool (*check_func)(unsigned long arch_id, unsigned long impid); }; @@ -82,11 +82,9 @@ static void __init_or_module warn_miss_errata(u32 miss_errata) pr_warn("----------------------------------------------------------------\n"); } -void __init_or_module sifive_errata_patch_func(struct alt_entry *begin, - struct alt_entry *end, - unsigned long archid, - unsigned long impid, - unsigned int stage) +void sifive_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, + unsigned long archid, unsigned long impid, + unsigned int stage) { struct alt_entry *alt; u32 cpu_req_errata; @@ -101,17 +99,17 @@ void __init_or_module sifive_errata_patch_func(struct alt_entry *begin, for (alt = begin; alt < end; alt++) { if (alt->vendor_id != SIFIVE_VENDOR_ID) continue; - if (alt->errata_id >= ERRATA_SIFIVE_NUMBER) { - WARN(1, "This errata id:%d is not in kernel errata list", alt->errata_id); + if (alt->patch_id >= ERRATA_SIFIVE_NUMBER) { + WARN(1, "This errata id:%d is not in kernel errata list", alt->patch_id); continue; } - tmp = (1U << alt->errata_id); + tmp = (1U << alt->patch_id); if (cpu_req_errata & tmp) { mutex_lock(&text_mutex); patch_text_nosync(ALT_OLD_PTR(alt), ALT_ALT_PTR(alt), alt->alt_len); - mutex_lock(&text_mutex); + mutex_unlock(&text_mutex); cpu_apply_errata |= tmp; } } diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index 3b96a06d3c54..c259dc925ec1 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -11,7 +11,9 @@ #include <linux/uaccess.h> #include <asm/alternative.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> #include <asm/errata_list.h> +#include <asm/hwprobe.h> #include <asm/patch.h> #include <asm/vendorid_list.h> @@ -81,9 +83,9 @@ static u32 thead_errata_probe(unsigned int stage, return cpu_req_errata; } -void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, - unsigned long archid, unsigned long impid, - unsigned int stage) +void thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, + unsigned long archid, unsigned long impid, + unsigned int stage) { struct alt_entry *alt; u32 cpu_req_errata = thead_errata_probe(stage, archid, impid); @@ -93,10 +95,10 @@ void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct al for (alt = begin; alt < end; alt++) { if (alt->vendor_id != THEAD_VENDOR_ID) continue; - if (alt->errata_id >= ERRATA_THEAD_NUMBER) + if (alt->patch_id >= ERRATA_THEAD_NUMBER) continue; - tmp = (1U << alt->errata_id); + tmp = (1U << alt->patch_id); if (cpu_req_errata & tmp) { oldptr = ALT_OLD_PTR(alt); altptr = ALT_ALT_PTR(alt); @@ -115,3 +117,11 @@ void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct al if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) local_flush_icache_all(); } + +void thead_feature_probe_func(unsigned int cpu, + unsigned long archid, + unsigned long impid) +{ + if ((archid == 0) && (impid == 0)) + per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_FAST; +} diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index 51c6867e02f3..b8c55fb3ab2c 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -6,18 +6,18 @@ #ifdef __ASSEMBLY__ -.macro ALT_ENTRY oldptr newptr vendor_id errata_id new_len +.macro ALT_ENTRY oldptr newptr vendor_id patch_id new_len .4byte \oldptr - . .4byte \newptr - . .2byte \vendor_id .2byte \new_len - .4byte \errata_id + .4byte \patch_id .endm -.macro ALT_NEW_CONTENT vendor_id, errata_id, enable = 1, new_c : vararg +.macro ALT_NEW_CONTENT vendor_id, patch_id, enable = 1, new_c .if \enable .pushsection .alternative, "a" - ALT_ENTRY 886b, 888f, \vendor_id, \errata_id, 889f - 888f + ALT_ENTRY 886b, 888f, \vendor_id, \patch_id, 889f - 888f .popsection .subsection 1 888 : @@ -33,7 +33,7 @@ .endif .endm -.macro ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, enable +.macro ALTERNATIVE_CFG old_c, new_c, vendor_id, patch_id, enable 886 : .option push .option norvc @@ -41,13 +41,13 @@ \old_c .option pop 887 : - ALT_NEW_CONTENT \vendor_id, \errata_id, \enable, \new_c + ALT_NEW_CONTENT \vendor_id, \patch_id, \enable, "\new_c" .endm -.macro ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ - new_c_2, vendor_id_2, errata_id_2, enable_2 - ALTERNATIVE_CFG "\old_c", "\new_c_1", \vendor_id_1, \errata_id_1, \enable_1 - ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \enable_2, \new_c_2 +.macro ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, patch_id_1, enable_1, \ + new_c_2, vendor_id_2, patch_id_2, enable_2 + ALTERNATIVE_CFG "\old_c", "\new_c_1", \vendor_id_1, \patch_id_1, \enable_1 + ALT_NEW_CONTENT \vendor_id_2, \patch_id_2, \enable_2, "\new_c_2" .endm #define __ALTERNATIVE_CFG(...) ALTERNATIVE_CFG __VA_ARGS__ @@ -58,17 +58,17 @@ #include <asm/asm.h> #include <linux/stringify.h> -#define ALT_ENTRY(oldptr, newptr, vendor_id, errata_id, newlen) \ +#define ALT_ENTRY(oldptr, newptr, vendor_id, patch_id, newlen) \ ".4byte ((" oldptr ") - .) \n" \ ".4byte ((" newptr ") - .) \n" \ ".2byte " vendor_id "\n" \ ".2byte " newlen "\n" \ - ".4byte " errata_id "\n" + ".4byte " patch_id "\n" -#define ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c) \ +#define ALT_NEW_CONTENT(vendor_id, patch_id, enable, new_c) \ ".if " __stringify(enable) " == 1\n" \ ".pushsection .alternative, \"a\"\n" \ - ALT_ENTRY("886b", "888f", __stringify(vendor_id), __stringify(errata_id), "889f - 888f") \ + ALT_ENTRY("886b", "888f", __stringify(vendor_id), __stringify(patch_id), "889f - 888f") \ ".popsection\n" \ ".subsection 1\n" \ "888 :\n" \ @@ -83,7 +83,7 @@ ".previous\n" \ ".endif\n" -#define __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, enable) \ +#define __ALTERNATIVE_CFG(old_c, new_c, vendor_id, patch_id, enable) \ "886 :\n" \ ".option push\n" \ ".option norvc\n" \ @@ -91,22 +91,22 @@ old_c "\n" \ ".option pop\n" \ "887 :\n" \ - ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c) + ALT_NEW_CONTENT(vendor_id, patch_id, enable, new_c) -#define __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ - new_c_2, vendor_id_2, errata_id_2, enable_2) \ - __ALTERNATIVE_CFG(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1) \ - ALT_NEW_CONTENT(vendor_id_2, errata_id_2, enable_2, new_c_2) +#define __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, patch_id_1, enable_1, \ + new_c_2, vendor_id_2, patch_id_2, enable_2) \ + __ALTERNATIVE_CFG(old_c, new_c_1, vendor_id_1, patch_id_1, enable_1) \ + ALT_NEW_CONTENT(vendor_id_2, patch_id_2, enable_2, new_c_2) #endif /* __ASSEMBLY__ */ -#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ - __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k)) +#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, patch_id, CONFIG_k) \ + __ALTERNATIVE_CFG(old_c, new_c, vendor_id, patch_id, IS_ENABLED(CONFIG_k)) -#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, CONFIG_k_1, \ - new_c_2, vendor_id_2, errata_id_2, CONFIG_k_2) \ - __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, IS_ENABLED(CONFIG_k_1), \ - new_c_2, vendor_id_2, errata_id_2, IS_ENABLED(CONFIG_k_2)) +#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, patch_id_1, CONFIG_k_1, \ + new_c_2, vendor_id_2, patch_id_2, CONFIG_k_2) \ + __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, patch_id_1, IS_ENABLED(CONFIG_k_1), \ + new_c_2, vendor_id_2, patch_id_2, IS_ENABLED(CONFIG_k_2)) #else /* CONFIG_RISCV_ALTERNATIVE */ #ifdef __ASSEMBLY__ @@ -137,19 +137,19 @@ /* * Usage: - * ALTERNATIVE(old_content, new_content, vendor_id, errata_id, CONFIG_k) + * ALTERNATIVE(old_content, new_content, vendor_id, patch_id, CONFIG_k) * in the assembly code. Otherwise, - * asm(ALTERNATIVE(old_content, new_content, vendor_id, errata_id, CONFIG_k)); + * asm(ALTERNATIVE(old_content, new_content, vendor_id, patch_id, CONFIG_k)); * * old_content: The old content which is probably replaced with new content. * new_content: The new content. * vendor_id: The CPU vendor ID. - * errata_id: The errata ID. - * CONFIG_k: The Kconfig of this errata. When Kconfig is disabled, the old + * patch_id: The patch ID (erratum ID or cpufeature ID). + * CONFIG_k: The Kconfig of this patch ID. When Kconfig is disabled, the old * content will alwyas be executed. */ -#define ALTERNATIVE(old_content, new_content, vendor_id, errata_id, CONFIG_k) \ - _ALTERNATIVE_CFG(old_content, new_content, vendor_id, errata_id, CONFIG_k) +#define ALTERNATIVE(old_content, new_content, vendor_id, patch_id, CONFIG_k) \ + _ALTERNATIVE_CFG(old_content, new_content, vendor_id, patch_id, CONFIG_k) /* * A vendor wants to replace an old_content, but another vendor has used @@ -158,9 +158,9 @@ * on the following sample code and then replace ALTERNATIVE() with * ALTERNATIVE_2() to append its customized content. */ -#define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1, \ - new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2) \ - _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1, \ - new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2) +#define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1, patch_id_1, CONFIG_k_1, \ + new_content_2, vendor_id_2, patch_id_2, CONFIG_k_2) \ + _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, patch_id_1, CONFIG_k_1, \ + new_content_2, vendor_id_2, patch_id_2, CONFIG_k_2) #endif diff --git a/arch/riscv/include/asm/alternative.h b/arch/riscv/include/asm/alternative.h index b8648d4f2ac1..6a41537826a7 100644 --- a/arch/riscv/include/asm/alternative.h +++ b/arch/riscv/include/asm/alternative.h @@ -6,8 +6,6 @@ #ifndef __ASM_ALTERNATIVE_H #define __ASM_ALTERNATIVE_H -#define ERRATA_STRING_LENGTH_MAX 32 - #include <asm/alternative-macros.h> #ifndef __ASSEMBLY__ @@ -15,10 +13,14 @@ #ifdef CONFIG_RISCV_ALTERNATIVE #include <linux/init.h> +#include <linux/kernel.h> #include <linux/types.h> #include <linux/stddef.h> #include <asm/hwcap.h> +#define PATCH_ID_CPUFEATURE_ID(p) lower_16_bits(p) +#define PATCH_ID_CPUFEATURE_VALUE(p) upper_16_bits(p) + #define RISCV_ALTERNATIVES_BOOT 0 /* alternatives applied during regular boot */ #define RISCV_ALTERNATIVES_MODULE 1 /* alternatives applied during module-init */ #define RISCV_ALTERNATIVES_EARLY_BOOT 2 /* alternatives applied before mmu start */ @@ -28,6 +30,7 @@ #define ALT_OLD_PTR(a) __ALT_PTR(a, old_offset) #define ALT_ALT_PTR(a) __ALT_PTR(a, alt_offset) +void probe_vendor_features(unsigned int cpu); void __init apply_boot_alternatives(void); void __init apply_early_boot_alternatives(void); void apply_module_alternatives(void *start, size_t length); @@ -38,14 +41,9 @@ void riscv_alternative_fix_offsets(void *alt_ptr, unsigned int len, struct alt_entry { s32 old_offset; /* offset relative to original instruction or data */ s32 alt_offset; /* offset relative to replacement instruction or data */ - u16 vendor_id; /* cpu vendor id */ + u16 vendor_id; /* CPU vendor ID */ u16 alt_len; /* The replacement size */ - u32 errata_id; /* The errata id */ -}; - -struct errata_checkfunc_id { - unsigned long vendor_id; - bool (*func)(struct alt_entry *alt); + u32 patch_id; /* The patch ID (erratum ID or cpufeature ID) */ }; void sifive_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, @@ -55,11 +53,15 @@ void thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, unsigned long archid, unsigned long impid, unsigned int stage); +void thead_feature_probe_func(unsigned int cpu, unsigned long archid, + unsigned long impid); + void riscv_cpufeature_patch_func(struct alt_entry *begin, struct alt_entry *end, unsigned int stage); #else /* CONFIG_RISCV_ALTERNATIVE */ +static inline void probe_vendor_features(unsigned int cpu) { } static inline void apply_boot_alternatives(void) { } static inline void apply_early_boot_alternatives(void) { } static inline void apply_module_alternatives(void *start, size_t length) { } diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h index ef386fcf3939..61ba8ed43d8f 100644 --- a/arch/riscv/include/asm/asm-prototypes.h +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -27,5 +27,7 @@ DECLARE_DO_ERROR_INFO(do_trap_break); asmlinkage unsigned long get_overflow_stack(void); asmlinkage void handle_bad_stack(struct pt_regs *regs); +asmlinkage void do_page_fault(struct pt_regs *regs); +asmlinkage void do_irq(struct pt_regs *regs); #endif /* _ASM_RISCV_PROTOTYPES_H */ diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 816e753de636..114bbadaef41 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -69,6 +69,7 @@ #endif #ifdef __ASSEMBLY__ +#include <asm/asm-offsets.h> /* Common assembly source macros */ @@ -81,6 +82,66 @@ .endr .endm + /* save all GPs except x1 ~ x5 */ + .macro save_from_x6_to_x31 + REG_S x6, PT_T1(sp) + REG_S x7, PT_T2(sp) + REG_S x8, PT_S0(sp) + REG_S x9, PT_S1(sp) + REG_S x10, PT_A0(sp) + REG_S x11, PT_A1(sp) + REG_S x12, PT_A2(sp) + REG_S x13, PT_A3(sp) + REG_S x14, PT_A4(sp) + REG_S x15, PT_A5(sp) + REG_S x16, PT_A6(sp) + REG_S x17, PT_A7(sp) + REG_S x18, PT_S2(sp) + REG_S x19, PT_S3(sp) + REG_S x20, PT_S4(sp) + REG_S x21, PT_S5(sp) + REG_S x22, PT_S6(sp) + REG_S x23, PT_S7(sp) + REG_S x24, PT_S8(sp) + REG_S x25, PT_S9(sp) + REG_S x26, PT_S10(sp) + REG_S x27, PT_S11(sp) + REG_S x28, PT_T3(sp) + REG_S x29, PT_T4(sp) + REG_S x30, PT_T5(sp) + REG_S x31, PT_T6(sp) + .endm + + /* restore all GPs except x1 ~ x5 */ + .macro restore_from_x6_to_x31 + REG_L x6, PT_T1(sp) + REG_L x7, PT_T2(sp) + REG_L x8, PT_S0(sp) + REG_L x9, PT_S1(sp) + REG_L x10, PT_A0(sp) + REG_L x11, PT_A1(sp) + REG_L x12, PT_A2(sp) + REG_L x13, PT_A3(sp) + REG_L x14, PT_A4(sp) + REG_L x15, PT_A5(sp) + REG_L x16, PT_A6(sp) + REG_L x17, PT_A7(sp) + REG_L x18, PT_S2(sp) + REG_L x19, PT_S3(sp) + REG_L x20, PT_S4(sp) + REG_L x21, PT_S5(sp) + REG_L x22, PT_S6(sp) + REG_L x23, PT_S7(sp) + REG_L x24, PT_S8(sp) + REG_L x25, PT_S9(sp) + REG_L x26, PT_S10(sp) + REG_L x27, PT_S11(sp) + REG_L x28, PT_T3(sp) + REG_L x29, PT_T4(sp) + REG_L x30, PT_T5(sp) + REG_L x31, PT_T6(sp) + .endm + #endif /* __ASSEMBLY__ */ #endif /* _ASM_RISCV_ASM_H */ diff --git a/arch/riscv/include/asm/assembler.h b/arch/riscv/include/asm/assembler.h new file mode 100644 index 000000000000..44b1457d3e95 --- /dev/null +++ b/arch/riscv/include/asm/assembler.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 StarFive Technology Co., Ltd. + * + * Author: Jee Heng Sia <jeeheng.sia@starfivetech.com> + */ + +#ifndef __ASSEMBLY__ +#error "Only include this from assembly code" +#endif + +#ifndef __ASM_ASSEMBLER_H +#define __ASM_ASSEMBLER_H + +#include <asm/asm.h> +#include <asm/asm-offsets.h> +#include <asm/csr.h> + +/* + * suspend_restore_csrs - restore CSRs + */ + .macro suspend_restore_csrs + REG_L t0, (SUSPEND_CONTEXT_REGS + PT_EPC)(a0) + csrw CSR_EPC, t0 + REG_L t0, (SUSPEND_CONTEXT_REGS + PT_STATUS)(a0) + csrw CSR_STATUS, t0 + REG_L t0, (SUSPEND_CONTEXT_REGS + PT_BADADDR)(a0) + csrw CSR_TVAL, t0 + REG_L t0, (SUSPEND_CONTEXT_REGS + PT_CAUSE)(a0) + csrw CSR_CAUSE, t0 + .endm + +/* + * suspend_restore_regs - Restore registers (except A0 and T0-T6) + */ + .macro suspend_restore_regs + REG_L ra, (SUSPEND_CONTEXT_REGS + PT_RA)(a0) + REG_L sp, (SUSPEND_CONTEXT_REGS + PT_SP)(a0) + REG_L gp, (SUSPEND_CONTEXT_REGS + PT_GP)(a0) + REG_L tp, (SUSPEND_CONTEXT_REGS + PT_TP)(a0) + REG_L s0, (SUSPEND_CONTEXT_REGS + PT_S0)(a0) + REG_L s1, (SUSPEND_CONTEXT_REGS + PT_S1)(a0) + REG_L a1, (SUSPEND_CONTEXT_REGS + PT_A1)(a0) + REG_L a2, (SUSPEND_CONTEXT_REGS + PT_A2)(a0) + REG_L a3, (SUSPEND_CONTEXT_REGS + PT_A3)(a0) + REG_L a4, (SUSPEND_CONTEXT_REGS + PT_A4)(a0) + REG_L a5, (SUSPEND_CONTEXT_REGS + PT_A5)(a0) + REG_L a6, (SUSPEND_CONTEXT_REGS + PT_A6)(a0) + REG_L a7, (SUSPEND_CONTEXT_REGS + PT_A7)(a0) + REG_L s2, (SUSPEND_CONTEXT_REGS + PT_S2)(a0) + REG_L s3, (SUSPEND_CONTEXT_REGS + PT_S3)(a0) + REG_L s4, (SUSPEND_CONTEXT_REGS + PT_S4)(a0) + REG_L s5, (SUSPEND_CONTEXT_REGS + PT_S5)(a0) + REG_L s6, (SUSPEND_CONTEXT_REGS + PT_S6)(a0) + REG_L s7, (SUSPEND_CONTEXT_REGS + PT_S7)(a0) + REG_L s8, (SUSPEND_CONTEXT_REGS + PT_S8)(a0) + REG_L s9, (SUSPEND_CONTEXT_REGS + PT_S9)(a0) + REG_L s10, (SUSPEND_CONTEXT_REGS + PT_S10)(a0) + REG_L s11, (SUSPEND_CONTEXT_REGS + PT_S11)(a0) + .endm + +/* + * copy_page - copy 1 page (4KB) of data from source to destination + * @a0 - destination + * @a1 - source + */ + .macro copy_page a0, a1 + lui a2, 0x1 + add a2, a2, a0 +1 : + REG_L t0, 0(a1) + REG_L t1, SZREG(a1) + + REG_S t0, 0(a0) + REG_S t1, SZREG(a0) + + addi a0, a0, 2 * SZREG + addi a1, a1, 2 * SZREG + bne a2, a0, 1b + .endm + +#endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index 0dfe9d857a76..bba472928b53 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -261,7 +261,7 @@ c_t arch_atomic##prefix##_xchg_release(atomic##prefix##_t *v, c_t n) \ static __always_inline \ c_t arch_atomic##prefix##_xchg(atomic##prefix##_t *v, c_t n) \ { \ - return __xchg(&(v->counter), n, size); \ + return __arch_xchg(&(v->counter), n, size); \ } \ static __always_inline \ c_t arch_atomic##prefix##_cmpxchg_relaxed(atomic##prefix##_t *v, \ diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 03e3b95ae6da..8091b8bf4883 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -50,7 +50,8 @@ void flush_icache_mm(struct mm_struct *mm, bool local); #endif /* CONFIG_SMP */ extern unsigned int riscv_cbom_block_size; -void riscv_init_cbom_blocksize(void); +extern unsigned int riscv_cboz_block_size; +void riscv_init_cbo_blocksizes(void); #ifdef CONFIG_RISCV_DMA_NONCOHERENT void riscv_noncoherent_supported(void); diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 12debce235e5..2f4726d3cfcc 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -114,7 +114,7 @@ _x_, sizeof(*(ptr))); \ }) -#define __xchg(ptr, new, size) \ +#define __arch_xchg(ptr, new, size) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ __typeof__(new) __new = (new); \ @@ -143,7 +143,7 @@ #define arch_xchg(ptr, x) \ ({ \ __typeof__(*(ptr)) _x_ = (x); \ - (__typeof__(*(ptr))) __xchg((ptr), _x_, sizeof(*(ptr))); \ + (__typeof__(*(ptr))) __arch_xchg((ptr), _x_, sizeof(*(ptr))); \ }) #define xchg32(ptr, x) \ diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h new file mode 100644 index 000000000000..808d5403f2ac --- /dev/null +++ b/arch/riscv/include/asm/cpufeature.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2022-2023 Rivos, Inc + */ + +#ifndef _ASM_CPUFEATURE_H +#define _ASM_CPUFEATURE_H + +/* + * These are probed via a device_initcall(), via either the SBI or directly + * from the corresponding CSRs. + */ +struct riscv_cpuinfo { + unsigned long mvendorid; + unsigned long marchid; + unsigned long mimpid; +}; + +DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); + +DECLARE_PER_CPU(long, misaligned_access_speed); + +#endif diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 0e571f6483d9..b6acb7ed115f 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -7,7 +7,7 @@ #define _ASM_RISCV_CSR_H #include <asm/asm.h> -#include <linux/const.h> +#include <linux/bits.h> /* Status register flags */ #define SR_SIE _AC(0x00000002, UL) /* Supervisor Interrupt Enable */ @@ -40,7 +40,6 @@ #define SR_UXL _AC(0x300000000, UL) /* XLEN mask for U-mode */ #define SR_UXL_32 _AC(0x100000000, UL) /* XLEN = 32 for U-mode */ #define SR_UXL_64 _AC(0x200000000, UL) /* XLEN = 64 for U-mode */ -#define SR_UXL_SHIFT 32 #endif /* SATP flags */ @@ -73,7 +72,10 @@ #define IRQ_S_EXT 9 #define IRQ_VS_EXT 10 #define IRQ_M_EXT 11 +#define IRQ_S_GEXT 12 #define IRQ_PMU_OVF 13 +#define IRQ_LOCAL_MAX (IRQ_PMU_OVF + 1) +#define IRQ_LOCAL_MASK GENMASK((IRQ_LOCAL_MAX - 1), 0) /* Exception causes */ #define EXC_INST_MISALIGNED 0 @@ -128,25 +130,25 @@ #define HGATP32_MODE_SHIFT 31 #define HGATP32_VMID_SHIFT 22 -#define HGATP32_VMID_MASK _AC(0x1FC00000, UL) -#define HGATP32_PPN _AC(0x003FFFFF, UL) +#define HGATP32_VMID GENMASK(28, 22) +#define HGATP32_PPN GENMASK(21, 0) #define HGATP64_MODE_SHIFT 60 #define HGATP64_VMID_SHIFT 44 -#define HGATP64_VMID_MASK _AC(0x03FFF00000000000, UL) -#define HGATP64_PPN _AC(0x00000FFFFFFFFFFF, UL) +#define HGATP64_VMID GENMASK(57, 44) +#define HGATP64_PPN GENMASK(43, 0) #define HGATP_PAGE_SHIFT 12 #ifdef CONFIG_64BIT #define HGATP_PPN HGATP64_PPN #define HGATP_VMID_SHIFT HGATP64_VMID_SHIFT -#define HGATP_VMID_MASK HGATP64_VMID_MASK +#define HGATP_VMID HGATP64_VMID #define HGATP_MODE_SHIFT HGATP64_MODE_SHIFT #else #define HGATP_PPN HGATP32_PPN #define HGATP_VMID_SHIFT HGATP32_VMID_SHIFT -#define HGATP_VMID_MASK HGATP32_VMID_MASK +#define HGATP_VMID HGATP32_VMID #define HGATP_MODE_SHIFT HGATP32_MODE_SHIFT #endif @@ -156,6 +158,27 @@ (_AC(1, UL) << IRQ_S_TIMER) | \ (_AC(1, UL) << IRQ_S_EXT)) +/* AIA CSR bits */ +#define TOPI_IID_SHIFT 16 +#define TOPI_IID_MASK GENMASK(11, 0) +#define TOPI_IPRIO_MASK GENMASK(7, 0) +#define TOPI_IPRIO_BITS 8 + +#define TOPEI_ID_SHIFT 16 +#define TOPEI_ID_MASK GENMASK(10, 0) +#define TOPEI_PRIO_MASK GENMASK(10, 0) + +#define ISELECT_IPRIO0 0x30 +#define ISELECT_IPRIO15 0x3f +#define ISELECT_MASK GENMASK(8, 0) + +#define HVICTL_VTI BIT(30) +#define HVICTL_IID GENMASK(27, 16) +#define HVICTL_IID_SHIFT 16 +#define HVICTL_DPR BIT(9) +#define HVICTL_IPRIOM BIT(8) +#define HVICTL_IPRIO GENMASK(7, 0) + /* xENVCFG flags */ #define ENVCFG_STCE (_AC(1, ULL) << 63) #define ENVCFG_PBMTE (_AC(1, ULL) << 62) @@ -250,6 +273,18 @@ #define CSR_STIMECMP 0x14D #define CSR_STIMECMPH 0x15D +/* Supervisor-Level Window to Indirectly Accessed Registers (AIA) */ +#define CSR_SISELECT 0x150 +#define CSR_SIREG 0x151 + +/* Supervisor-Level Interrupts (AIA) */ +#define CSR_STOPEI 0x15c +#define CSR_STOPI 0xdb0 + +/* Supervisor-Level High-Half CSRs (AIA) */ +#define CSR_SIEH 0x114 +#define CSR_SIPH 0x154 + #define CSR_VSSTATUS 0x200 #define CSR_VSIE 0x204 #define CSR_VSTVEC 0x205 @@ -279,8 +314,32 @@ #define CSR_HGATP 0x680 #define CSR_HGEIP 0xe12 +/* Virtual Interrupts and Interrupt Priorities (H-extension with AIA) */ +#define CSR_HVIEN 0x608 +#define CSR_HVICTL 0x609 +#define CSR_HVIPRIO1 0x646 +#define CSR_HVIPRIO2 0x647 + +/* VS-Level Window to Indirectly Accessed Registers (H-extension with AIA) */ +#define CSR_VSISELECT 0x250 +#define CSR_VSIREG 0x251 + +/* VS-Level Interrupts (H-extension with AIA) */ +#define CSR_VSTOPEI 0x25c +#define CSR_VSTOPI 0xeb0 + +/* Hypervisor and VS-Level High-Half CSRs (H-extension with AIA) */ +#define CSR_HIDELEGH 0x613 +#define CSR_HVIENH 0x618 +#define CSR_HVIPH 0x655 +#define CSR_HVIPRIO1H 0x656 +#define CSR_HVIPRIO2H 0x657 +#define CSR_VSIEH 0x214 +#define CSR_VSIPH 0x254 + #define CSR_MSTATUS 0x300 #define CSR_MISA 0x301 +#define CSR_MIDELEG 0x303 #define CSR_MIE 0x304 #define CSR_MTVEC 0x305 #define CSR_MENVCFG 0x30a @@ -297,6 +356,25 @@ #define CSR_MIMPID 0xf13 #define CSR_MHARTID 0xf14 +/* Machine-Level Window to Indirectly Accessed Registers (AIA) */ +#define CSR_MISELECT 0x350 +#define CSR_MIREG 0x351 + +/* Machine-Level Interrupts (AIA) */ +#define CSR_MTOPEI 0x35c +#define CSR_MTOPI 0xfb0 + +/* Virtual Interrupts for Supervisor Level (AIA) */ +#define CSR_MVIEN 0x308 +#define CSR_MVIP 0x309 + +/* Machine-Level High-Half CSRs (AIA) */ +#define CSR_MIDELEGH 0x313 +#define CSR_MIEH 0x314 +#define CSR_MVIENH 0x318 +#define CSR_MVIPH 0x319 +#define CSR_MIPH 0x354 + #ifdef CONFIG_RISCV_M_MODE # define CSR_STATUS CSR_MSTATUS # define CSR_IE CSR_MIE @@ -307,6 +385,13 @@ # define CSR_TVAL CSR_MTVAL # define CSR_IP CSR_MIP +# define CSR_IEH CSR_MIEH +# define CSR_ISELECT CSR_MISELECT +# define CSR_IREG CSR_MIREG +# define CSR_IPH CSR_MIPH +# define CSR_TOPEI CSR_MTOPEI +# define CSR_TOPI CSR_MTOPI + # define SR_IE SR_MIE # define SR_PIE SR_MPIE # define SR_PP SR_MPP @@ -324,6 +409,13 @@ # define CSR_TVAL CSR_STVAL # define CSR_IP CSR_SIP +# define CSR_IEH CSR_SIEH +# define CSR_ISELECT CSR_SISELECT +# define CSR_IREG CSR_SIREG +# define CSR_IPH CSR_SIPH +# define CSR_TOPEI CSR_STOPEI +# define CSR_TOPI CSR_STOPI + # define SR_IE SR_SIE # define SR_PIE SR_SPIE # define SR_PP SR_SPP diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h new file mode 100644 index 000000000000..6e4dee49d84b --- /dev/null +++ b/arch/riscv/include/asm/entry-common.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_RISCV_ENTRY_COMMON_H +#define _ASM_RISCV_ENTRY_COMMON_H + +#include <asm/stacktrace.h> + +void handle_page_fault(struct pt_regs *regs); +void handle_break(struct pt_regs *regs); + +#endif /* _ASM_RISCV_ENTRY_COMMON_H */ diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h index 5c3e7b97fcc6..0a55099bb734 100644 --- a/arch/riscv/include/asm/fixmap.h +++ b/arch/riscv/include/asm/fixmap.h @@ -22,6 +22,14 @@ */ enum fixed_addresses { FIX_HOLE, + /* + * The fdt fixmap mapping must be PMD aligned and will be mapped + * using PMD entries in fixmap_pmd in 64-bit and a PGD entry in 32-bit. + */ + FIX_FDT_END, + FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1, + + /* Below fixmaps will be mapped using fixmap_pte */ FIX_PTE, FIX_PMD, FIX_PUD, diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 9e73922e1e2e..d47d87c2d7e3 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -109,6 +109,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #define ftrace_init_nop ftrace_init_nop #endif -#endif +#endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* _ASM_RISCV_FTRACE_H */ diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index ec19d6afc896..fe6f23006641 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -2,7 +2,6 @@ #ifndef _ASM_RISCV_HUGETLB_H #define _ASM_RISCV_HUGETLB_H -#include <asm-generic/hugetlb.h> #include <asm/page.h> static inline void arch_clear_hugepage_flags(struct page *page) @@ -11,4 +10,37 @@ static inline void arch_clear_hugepage_flags(struct page *page) } #define arch_clear_hugepage_flags arch_clear_hugepage_flags +#ifdef CONFIG_RISCV_ISA_SVNAPOT +#define __HAVE_ARCH_HUGE_PTE_CLEAR +void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long sz); + +#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT +void set_huge_pte_at(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, pte_t pte); + +#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); + +#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH +pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); + +#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT +void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); + +#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS +int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty); + +pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags); +#define arch_make_huge_pte arch_make_huge_pte + +#endif /*CONFIG_RISCV_ISA_SVNAPOT*/ + +#include <asm-generic/hugetlb.h> + #endif /* _ASM_RISCV_HUGETLB_H */ diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index e3021b2590de..e0c40a4c63d5 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -42,10 +42,20 @@ #define RISCV_ISA_EXT_ZBB 30 #define RISCV_ISA_EXT_ZICBOM 31 #define RISCV_ISA_EXT_ZIHINTPAUSE 32 +#define RISCV_ISA_EXT_SVNAPOT 33 +#define RISCV_ISA_EXT_ZICBOZ 34 +#define RISCV_ISA_EXT_SMAIA 35 +#define RISCV_ISA_EXT_SSAIA 36 #define RISCV_ISA_EXT_MAX 64 #define RISCV_ISA_EXT_NAME_LEN_MAX 32 +#ifdef CONFIG_RISCV_M_MODE +#define RISCV_ISA_EXT_SxAIA RISCV_ISA_EXT_SMAIA +#else +#define RISCV_ISA_EXT_SxAIA RISCV_ISA_EXT_SSAIA +#endif + #ifndef __ASSEMBLY__ #include <linux/jump_label.h> @@ -57,18 +67,31 @@ struct riscv_isa_ext_data { unsigned int isa_ext_id; }; +unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); + +#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext) + +bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit); +#define riscv_isa_extension_available(isa_bitmap, ext) \ + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) + static __always_inline bool riscv_has_extension_likely(const unsigned long ext) { compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); - asm_volatile_goto( - ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_no); + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + asm_volatile_goto( + ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1) + : + : [ext] "i" (ext) + : + : l_no); + } else { + if (!__riscv_isa_extension_available(NULL, ext)) + goto l_no; + } return true; l_no: @@ -81,26 +104,23 @@ riscv_has_extension_unlikely(const unsigned long ext) compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); - asm_volatile_goto( - ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_yes); + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + asm_volatile_goto( + ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1) + : + : [ext] "i" (ext) + : + : l_yes); + } else { + if (__riscv_isa_extension_available(NULL, ext)) + goto l_yes; + } return false; l_yes: return true; } -unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); - -#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext) - -bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit); -#define riscv_isa_extension_available(isa_bitmap, ext) \ - __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) - #endif #endif /* _ASM_RISCV_HWCAP_H */ diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h new file mode 100644 index 000000000000..78936f4ff513 --- /dev/null +++ b/arch/riscv/include/asm/hwprobe.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright 2023 Rivos, Inc + */ + +#ifndef _ASM_HWPROBE_H +#define _ASM_HWPROBE_H + +#include <uapi/asm/hwprobe.h> + +#define RISCV_HWPROBE_MAX_KEY 5 + +#endif diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h index e01ab51f50d2..6960beb75f32 100644 --- a/arch/riscv/include/asm/insn-def.h +++ b/arch/riscv/include/asm/insn-def.h @@ -192,4 +192,8 @@ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ RS1(base), SIMM12(2)) +#define CBO_zero(base) \ + INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ + RS1(base), SIMM12(4)) + #endif /* __ASM_INSN_DEF_H */ diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h index e4c435509983..43b9ebfbd943 100644 --- a/arch/riscv/include/asm/irq.h +++ b/arch/riscv/include/asm/irq.h @@ -12,6 +12,10 @@ #include <asm-generic/irq.h> +void riscv_set_intc_hwnode_fn(struct fwnode_handle *(*fn)(void)); + +struct fwnode_handle *riscv_get_intc_hwnode(void); + extern void __init init_IRQ(void); #endif /* _ASM_RISCV_IRQ_H */ diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h new file mode 100644 index 000000000000..1de0717112e5 --- /dev/null +++ b/arch/riscv/include/asm/kvm_aia.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + * Copyright (C) 2022 Ventana Micro Systems Inc. + * + * Authors: + * Anup Patel <apatel@ventanamicro.com> + */ + +#ifndef __KVM_RISCV_AIA_H +#define __KVM_RISCV_AIA_H + +#include <linux/jump_label.h> +#include <linux/kvm_types.h> +#include <asm/csr.h> + +struct kvm_aia { + /* In-kernel irqchip created */ + bool in_kernel; + + /* In-kernel irqchip initialized */ + bool initialized; +}; + +struct kvm_vcpu_aia_csr { + unsigned long vsiselect; + unsigned long hviprio1; + unsigned long hviprio2; + unsigned long vsieh; + unsigned long hviph; + unsigned long hviprio1h; + unsigned long hviprio2h; +}; + +struct kvm_vcpu_aia { + /* CPU AIA CSR context of Guest VCPU */ + struct kvm_vcpu_aia_csr guest_csr; + + /* CPU AIA CSR context upon Guest VCPU reset */ + struct kvm_vcpu_aia_csr guest_reset_csr; +}; + +#define kvm_riscv_aia_initialized(k) ((k)->arch.aia.initialized) + +#define irqchip_in_kernel(k) ((k)->arch.aia.in_kernel) + +DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available); +#define kvm_riscv_aia_available() \ + static_branch_unlikely(&kvm_riscv_aia_available) + +#define KVM_RISCV_AIA_IMSIC_TOPEI (ISELECT_MASK + 1) +static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, + unsigned long isel, + unsigned long *val, + unsigned long new_val, + unsigned long wr_mask) +{ + return 0; +} + +#ifdef CONFIG_32BIT +void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu); +#else +static inline void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu) +{ +} +static inline void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu) +{ +} +#endif +bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask); + +void kvm_riscv_vcpu_aia_update_hvip(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_aia_load(struct kvm_vcpu *vcpu, int cpu); +void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu); +int kvm_riscv_vcpu_aia_get_csr(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long *out_val); +int kvm_riscv_vcpu_aia_set_csr(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long val); + +int kvm_riscv_vcpu_aia_rmw_topei(struct kvm_vcpu *vcpu, + unsigned int csr_num, + unsigned long *val, + unsigned long new_val, + unsigned long wr_mask); +int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num, + unsigned long *val, unsigned long new_val, + unsigned long wr_mask); +#define KVM_RISCV_VCPU_AIA_CSR_FUNCS \ +{ .base = CSR_SIREG, .count = 1, .func = kvm_riscv_vcpu_aia_rmw_ireg }, \ +{ .base = CSR_STOPEI, .count = 1, .func = kvm_riscv_vcpu_aia_rmw_topei }, + +static inline int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu) +{ + return 1; +} + +static inline void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu) +{ +} + +static inline int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static inline void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu) +{ +} + +static inline void kvm_riscv_aia_init_vm(struct kvm *kvm) +{ +} + +static inline void kvm_riscv_aia_destroy_vm(struct kvm *kvm) +{ +} + +void kvm_riscv_aia_enable(void); +void kvm_riscv_aia_disable(void); +int kvm_riscv_aia_init(void); +void kvm_riscv_aia_exit(void); + +#endif diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index cc7da66ee0c0..ee0acccb1d3b 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -14,6 +14,7 @@ #include <linux/kvm_types.h> #include <linux/spinlock.h> #include <asm/hwcap.h> +#include <asm/kvm_aia.h> #include <asm/kvm_vcpu_fp.h> #include <asm/kvm_vcpu_insn.h> #include <asm/kvm_vcpu_sbi.h> @@ -94,6 +95,9 @@ struct kvm_arch { /* Guest Timer */ struct kvm_guest_timer timer; + + /* AIA Guest/VM context */ + struct kvm_aia aia; }; struct kvm_cpu_trap { @@ -200,8 +204,9 @@ struct kvm_vcpu_arch { * in irqs_pending. Our approach is modeled around multiple producer * and single consumer problem where the consumer is the VCPU itself. */ - unsigned long irqs_pending; - unsigned long irqs_pending_mask; +#define KVM_RISCV_VCPU_NR_IRQS 64 + DECLARE_BITMAP(irqs_pending, KVM_RISCV_VCPU_NR_IRQS); + DECLARE_BITMAP(irqs_pending_mask, KVM_RISCV_VCPU_NR_IRQS); /* VCPU Timer */ struct kvm_vcpu_timer timer; @@ -221,6 +226,9 @@ struct kvm_vcpu_arch { /* SBI context */ struct kvm_vcpu_sbi_context sbi_context; + /* AIA VCPU context */ + struct kvm_vcpu_aia aia_context; + /* Cache pages needed to program page tables with spinlock held */ struct kvm_mmu_memory_cache mmu_page_cache; @@ -327,7 +335,7 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu); -bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask); +bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask); void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu); diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h index 8425556af7d1..4278125a38a5 100644 --- a/arch/riscv/include/asm/kvm_vcpu_sbi.h +++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h @@ -16,6 +16,7 @@ struct kvm_vcpu_sbi_context { int return_handled; + bool extension_disabled[KVM_RISCV_SBI_EXT_MAX]; }; struct kvm_vcpu_sbi_return { @@ -45,7 +46,12 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, struct kvm_run *run, u32 type, u64 flags); int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run); -const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid); +int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg); +int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg); +const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext( + struct kvm_vcpu *vcpu, unsigned long extid); int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run); #ifdef CONFIG_RISCV_SBI_V01 diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index 5ff1f19fd45c..0099dc116168 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -19,8 +19,6 @@ typedef struct { #ifdef CONFIG_SMP /* A local icache flush is needed before user execution can resume. */ cpumask_t icache_stale_mask; - /* A local tlb flush is needed before user execution can resume. */ - cpumask_t tlb_stale_mask; #endif } mm_context_t; diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 7fed7c431928..b55ba20903ec 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -16,11 +16,6 @@ #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE - 1)) -#ifdef CONFIG_64BIT -#define HUGE_MAX_HSTATE 2 -#else -#define HUGE_MAX_HSTATE 1 -#endif #define HPAGE_SHIFT PMD_SHIFT #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) @@ -49,10 +44,14 @@ #ifndef __ASSEMBLY__ +#ifdef CONFIG_RISCV_ISA_ZICBOZ +void clear_page(void *page); +#else #define clear_page(pgaddr) memset((pgaddr), 0, PAGE_SIZE) +#endif #define copy_page(to, from) memcpy((to), (from), PAGE_SIZE) -#define clear_user_page(pgaddr, vaddr, page) memset((pgaddr), 0, PAGE_SIZE) +#define clear_user_page(pgaddr, vaddr, page) clear_page(pgaddr) #define copy_user_page(vto, vfrom, vaddr, topg) \ memcpy((vto), (vfrom), PAGE_SIZE) @@ -90,9 +89,16 @@ typedef struct page *pgtable_t; #define PTE_FMT "%08lx" #endif +#ifdef CONFIG_64BIT +/* + * We override this value as its generic definition uses __pa too early in + * the boot process (before kernel_map.va_pa_offset is set). + */ +#define MIN_MEMBLOCK_ADDR 0 +#endif + #ifdef CONFIG_MMU -extern unsigned long riscv_pfn_base; -#define ARCH_PFN_OFFSET (riscv_pfn_base) +#define ARCH_PFN_OFFSET (PFN_DOWN((unsigned long)phys_ram_base)) #else #define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT) #endif /* CONFIG_MMU */ @@ -122,7 +128,11 @@ extern phys_addr_t phys_ram_base; #define is_linear_mapping(x) \ ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < PAGE_OFFSET + KERN_VIRT_SIZE)) +#ifndef CONFIG_DEBUG_VIRTUAL #define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset)) +#else +void *linear_mapping_pa_to_va(unsigned long x); +#endif #define kernel_mapping_pa_to_va(y) ({ \ unsigned long _y = (unsigned long)(y); \ (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ? \ @@ -131,7 +141,11 @@ extern phys_addr_t phys_ram_base; }) #define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x) +#ifndef CONFIG_DEBUG_VIRTUAL #define linear_mapping_va_to_pa(x) ((unsigned long)(x) - kernel_map.va_pa_offset) +#else +phys_addr_t linear_mapping_va_to_pa(unsigned long x); +#endif #define kernel_mapping_va_to_pa(y) ({ \ unsigned long _y = (unsigned long)(y); \ (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ? \ diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h index f433121774c0..63c98833d510 100644 --- a/arch/riscv/include/asm/patch.h +++ b/arch/riscv/include/asm/patch.h @@ -9,4 +9,6 @@ int patch_text_nosync(void *addr, const void *insns, size_t len); int patch_text(void *addr, u32 *insns, int ninsns); +extern int riscv_patch_in_stop_machine; + #endif /* _ASM_RISCV_PATCH_H */ diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 42a042c0e13e..7a5097202e15 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -79,6 +79,40 @@ typedef struct { #define _PAGE_PFN_MASK GENMASK(53, 10) /* + * [63] Svnapot definitions: + * 0 Svnapot disabled + * 1 Svnapot enabled + */ +#define _PAGE_NAPOT_SHIFT 63 +#define _PAGE_NAPOT BIT(_PAGE_NAPOT_SHIFT) +/* + * Only 64KB (order 4) napot ptes supported. + */ +#define NAPOT_CONT_ORDER_BASE 4 +enum napot_cont_order { + NAPOT_CONT64KB_ORDER = NAPOT_CONT_ORDER_BASE, + NAPOT_ORDER_MAX, +}; + +#define for_each_napot_order(order) \ + for (order = NAPOT_CONT_ORDER_BASE; order < NAPOT_ORDER_MAX; order++) +#define for_each_napot_order_rev(order) \ + for (order = NAPOT_ORDER_MAX - 1; \ + order >= NAPOT_CONT_ORDER_BASE; order--) +#define napot_cont_order(val) (__builtin_ctzl((val.pte >> _PAGE_PFN_SHIFT) << 1)) + +#define napot_cont_shift(order) ((order) + PAGE_SHIFT) +#define napot_cont_size(order) BIT(napot_cont_shift(order)) +#define napot_cont_mask(order) (~(napot_cont_size(order) - 1UL)) +#define napot_pte_num(order) BIT(order) + +#ifdef CONFIG_RISCV_ISA_SVNAPOT +#define HUGE_MAX_HSTATE (2 + (NAPOT_ORDER_MAX - NAPOT_CONT_ORDER_BASE)) +#else +#define HUGE_MAX_HSTATE 2 +#endif + +/* * [62:61] Svpbmt Memory Type definitions: * * 00 - PMA Normal Cacheable, No change to implied PMA memory type diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index ab05f892d317..2258b27173b0 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -87,9 +87,13 @@ #define FIXADDR_TOP PCI_IO_START #ifdef CONFIG_64BIT -#define FIXADDR_SIZE PMD_SIZE +#define MAX_FDT_SIZE PMD_SIZE +#define FIX_FDT_SIZE (MAX_FDT_SIZE + SZ_2M) +#define FIXADDR_SIZE (PMD_SIZE + FIX_FDT_SIZE) #else -#define FIXADDR_SIZE PGDIR_SIZE +#define MAX_FDT_SIZE PGDIR_SIZE +#define FIX_FDT_SIZE MAX_FDT_SIZE +#define FIXADDR_SIZE (PGDIR_SIZE + FIX_FDT_SIZE) #endif #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) @@ -264,10 +268,47 @@ static inline pte_t pud_pte(pud_t pud) return __pte(pud_val(pud)); } +#ifdef CONFIG_RISCV_ISA_SVNAPOT + +static __always_inline bool has_svnapot(void) +{ + return riscv_has_extension_likely(RISCV_ISA_EXT_SVNAPOT); +} + +static inline unsigned long pte_napot(pte_t pte) +{ + return pte_val(pte) & _PAGE_NAPOT; +} + +static inline pte_t pte_mknapot(pte_t pte, unsigned int order) +{ + int pos = order - 1 + _PAGE_PFN_SHIFT; + unsigned long napot_bit = BIT(pos); + unsigned long napot_mask = ~GENMASK(pos, _PAGE_PFN_SHIFT); + + return __pte((pte_val(pte) & napot_mask) | napot_bit | _PAGE_NAPOT); +} + +#else + +static __always_inline bool has_svnapot(void) { return false; } + +static inline unsigned long pte_napot(pte_t pte) +{ + return 0; +} + +#endif /* CONFIG_RISCV_ISA_SVNAPOT */ + /* Yields the page frame number (PFN) of a page table entry */ static inline unsigned long pte_pfn(pte_t pte) { - return __page_val_to_pfn(pte_val(pte)); + unsigned long res = __page_val_to_pfn(pte_val(pte)); + + if (has_svnapot() && pte_napot(pte)) + res = res & (res - 1UL); + + return res; } #define pte_page(x) pfn_to_page(pte_pfn(x)) diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h index 6ecd461129d2..b5b0adcc85c1 100644 --- a/arch/riscv/include/asm/ptrace.h +++ b/arch/riscv/include/asm/ptrace.h @@ -53,6 +53,9 @@ struct pt_regs { unsigned long orig_a0; }; +#define PTRACE_SYSEMU 0x1f +#define PTRACE_SYSEMU_SINGLESTEP 0x20 + #ifdef CONFIG_64BIT #define REG_FMT "%016lx" #else @@ -121,8 +124,6 @@ extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, unsigned long frame_pointer); -int do_syscall_trace_enter(struct pt_regs *regs); -void do_syscall_trace_exit(struct pt_regs *regs); /** * regs_get_register() - get register value from its offset @@ -172,6 +173,11 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, return 0; } +static inline int regs_irqs_disabled(struct pt_regs *regs) +{ + return !(regs->status & SR_PIE); +} + #endif /* __ASSEMBLY__ */ #endif /* _ASM_RISCV_PTRACE_H */ diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 945b7be249c1..5b4a1bf5f439 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -271,8 +271,7 @@ long sbi_get_marchid(void); long sbi_get_mimpid(void); void sbi_set_timer(uint64_t stime_value); void sbi_shutdown(void); -void sbi_clear_ipi(void); -int sbi_send_ipi(const struct cpumask *cpu_mask); +void sbi_send_ipi(unsigned int cpu); int sbi_remote_fence_i(const struct cpumask *cpu_mask); int sbi_remote_sfence_vma(const struct cpumask *cpu_mask, unsigned long start, @@ -296,7 +295,7 @@ int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long asid); -int sbi_probe_extension(int ext); +long sbi_probe_extension(int ext); /* Check if current SBI specification version is 0.1 or not */ static inline int sbi_spec_is_0_1(void) @@ -335,4 +334,10 @@ unsigned long riscv_cached_mvendorid(unsigned int cpu_id); unsigned long riscv_cached_marchid(unsigned int cpu_id); unsigned long riscv_cached_mimpid(unsigned int cpu_id); +#if IS_ENABLED(CONFIG_SMP) && IS_ENABLED(CONFIG_RISCV_SBI) +void sbi_ipi_init(void); +#else +static inline void sbi_ipi_init(void) { } +#endif + #endif /* _ASM_RISCV_SBI_H */ diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h index a2c14d4b3993..ec11001c3fe0 100644 --- a/arch/riscv/include/asm/set_memory.h +++ b/arch/riscv/include/asm/set_memory.h @@ -56,4 +56,7 @@ bool kernel_page_present(struct page *page); #define SECTION_ALIGN L1_CACHE_BYTES #endif /* CONFIG_STRICT_KERNEL_RWX */ +#define PECOFF_SECTION_ALIGNMENT 0x1000 +#define PECOFF_FILE_ALIGNMENT 0x200 + #endif /* _ASM_RISCV_SET_MEMORY_H */ diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index 3831b638ecab..c4b77017ec58 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -15,12 +15,10 @@ struct seq_file; extern unsigned long boot_cpu_hartid; -struct riscv_ipi_ops { - void (*ipi_inject)(const struct cpumask *target); - void (*ipi_clear)(void); -}; - #ifdef CONFIG_SMP + +#include <linux/jump_label.h> + /* * Mapping between linux logical cpu index and hartid. */ @@ -33,9 +31,6 @@ void show_ipi_stats(struct seq_file *p, int prec); /* SMP initialization hook for setup_arch */ void __init setup_smp(void); -/* Called from C code, this handles an IPI. */ -void handle_IPI(struct pt_regs *regs); - /* Hook for the generic smp_call_function_many() routine. */ void arch_send_call_function_ipi_mask(struct cpumask *mask); @@ -44,11 +39,22 @@ void arch_send_call_function_single_ipi(int cpu); int riscv_hartid_to_cpuid(unsigned long hartid); -/* Set custom IPI operations */ -void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops); +/* Enable IPI for CPU hotplug */ +void riscv_ipi_enable(void); + +/* Disable IPI for CPU hotplug */ +void riscv_ipi_disable(void); -/* Clear IPI for current CPU */ -void riscv_clear_ipi(void); +/* Check if IPI interrupt numbers are available */ +bool riscv_ipi_have_virq_range(void); + +/* Set the IPI interrupt numbers for arch (called by irqchip drivers) */ +void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence); + +/* Check if we can use IPIs for remote FENCEs */ +DECLARE_STATIC_KEY_FALSE(riscv_ipi_for_rfence); +#define riscv_use_ipi_for_rfence() \ + static_branch_unlikely(&riscv_ipi_for_rfence) /* Check other CPUs stop or not */ bool smp_crash_stop_failed(void); @@ -85,14 +91,29 @@ static inline unsigned long cpuid_to_hartid_map(int cpu) return boot_cpu_hartid; } -static inline void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops) +static inline void riscv_ipi_enable(void) +{ +} + +static inline void riscv_ipi_disable(void) { } -static inline void riscv_clear_ipi(void) +static inline bool riscv_ipi_have_virq_range(void) +{ + return false; +} + +static inline void riscv_ipi_set_virq_range(int virq, int nr, + bool use_for_rfence) { } +static inline bool riscv_use_ipi_for_rfence(void) +{ + return false; +} + #endif /* CONFIG_SMP */ #if defined(CONFIG_HOTPLUG_CPU) && (CONFIG_SMP) diff --git a/arch/riscv/include/asm/stacktrace.h b/arch/riscv/include/asm/stacktrace.h index 3450c1912afd..f7e8ef2418b9 100644 --- a/arch/riscv/include/asm/stacktrace.h +++ b/arch/riscv/include/asm/stacktrace.h @@ -16,4 +16,9 @@ extern void notrace walk_stackframe(struct task_struct *task, struct pt_regs *re extern void dump_backtrace(struct pt_regs *regs, struct task_struct *task, const char *loglvl); +static inline bool on_thread_stack(void) +{ + return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1)); +} + #endif /* _ASM_RISCV_STACKTRACE_H */ diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h index 8be391c2aecb..02f87867389a 100644 --- a/arch/riscv/include/asm/suspend.h +++ b/arch/riscv/include/asm/suspend.h @@ -21,6 +21,11 @@ struct suspend_context { #endif }; +/* + * Used by hibernation core and cleared during resume sequence + */ +extern int in_suspend; + /* Low-level CPU suspend entry function */ int __cpu_suspend_enter(struct suspend_context *context); @@ -33,4 +38,21 @@ int cpu_suspend(unsigned long arg, /* Low-level CPU resume entry function */ int __cpu_resume_enter(unsigned long hartid, unsigned long context); +/* Used to save and restore the CSRs */ +void suspend_save_csrs(struct suspend_context *context); +void suspend_restore_csrs(struct suspend_context *context); + +/* Low-level API to support hibernation */ +int swsusp_arch_suspend(void); +int swsusp_arch_resume(void); +int arch_hibernation_header_save(void *addr, unsigned int max_size); +int arch_hibernation_header_restore(void *addr); +int __hibernate_cpu_resume(void); + +/* Used to resume on the CPU we hibernated on */ +int hibernate_resume_nonboot_cpu_disable(void); + +asmlinkage void hibernate_restore_image(unsigned long resume_satp, unsigned long satp_temp, + unsigned long cpu_resume); +asmlinkage int hibernate_core_restore_code(void); #endif diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index 384a63b86420..0148c6bd9675 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -10,6 +10,7 @@ #ifndef _ASM_RISCV_SYSCALL_H #define _ASM_RISCV_SYSCALL_H +#include <asm/hwprobe.h> #include <uapi/linux/audit.h> #include <linux/sched.h> #include <linux/err.h> @@ -74,5 +75,29 @@ static inline int syscall_get_arch(struct task_struct *task) #endif } +typedef long (*syscall_t)(ulong, ulong, ulong, ulong, ulong, ulong, ulong); +static inline void syscall_handler(struct pt_regs *regs, ulong syscall) +{ + syscall_t fn; + +#ifdef CONFIG_COMPAT + if ((regs->status & SR_UXL) == SR_UXL_32) + fn = compat_sys_call_table[syscall]; + else +#endif + fn = sys_call_table[syscall]; + + regs->a0 = fn(regs->orig_a0, regs->a1, regs->a2, + regs->a3, regs->a4, regs->a5, regs->a6); +} + +static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs) +{ + return false; +} + asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t); + +asmlinkage long sys_riscv_hwprobe(struct riscv_hwprobe *, size_t, size_t, + unsigned long *, unsigned int); #endif /* _ASM_RISCV_SYSCALL_H */ diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index f704c8dd57e0..e0d202134b44 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -67,6 +67,7 @@ struct thread_info { long kernel_sp; /* Kernel stack pointer */ long user_sp; /* User stack pointer */ int cpu; + unsigned long syscall_work; /* SYSCALL_WORK_ flags */ }; /* @@ -89,26 +90,18 @@ struct thread_info { * - pending work-to-be-done flags are in lowest half-word * - other flags in upper half-word(s) */ -#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_RESTORE_SIGMASK 4 /* restore signal mask in do_signal() */ #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ -#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ -#define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ -#define TIF_SECCOMP 8 /* syscall secure computing */ #define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */ #define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */ #define TIF_32BIT 11 /* compat-mode 32bit process */ -#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) -#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -#define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_UPROBE (1 << TIF_UPROBE) @@ -116,8 +109,4 @@ struct thread_info { (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \ _TIF_NOTIFY_SIGNAL | _TIF_UPROBE) -#define _TIF_SYSCALL_WORK \ - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP) - #endif /* _ASM_RISCV_THREAD_INFO_H */ diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 907b9efd39a8..a09196f8de68 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -12,6 +12,8 @@ #include <asm/errata_list.h> #ifdef CONFIG_MMU +extern unsigned long asid_mask; + static inline void local_flush_tlb_all(void) { __asm__ __volatile__ ("sfence.vma" : : : "memory"); @@ -22,24 +24,6 @@ static inline void local_flush_tlb_page(unsigned long addr) { ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); } - -static inline void local_flush_tlb_all_asid(unsigned long asid) -{ - __asm__ __volatile__ ("sfence.vma x0, %0" - : - : "r" (asid) - : "memory"); -} - -static inline void local_flush_tlb_page_asid(unsigned long addr, - unsigned long asid) -{ - __asm__ __volatile__ ("sfence.vma %0, %1" - : - : "r" (addr), "r" (asid) - : "memory"); -} - #else /* CONFIG_MMU */ #define local_flush_tlb_all() do { } while (0) #define local_flush_tlb_page(addr) do { } while (0) diff --git a/arch/riscv/include/asm/topology.h b/arch/riscv/include/asm/topology.h new file mode 100644 index 000000000000..e316ab3b77f3 --- /dev/null +++ b/arch/riscv/include/asm/topology.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_TOPOLOGY_H +#define _ASM_RISCV_TOPOLOGY_H + +#include <linux/arch_topology.h> + +/* Replace task scheduler's default frequency-invariant accounting */ +#define arch_scale_freq_tick topology_scale_freq_tick +#define arch_set_freq_scale topology_set_freq_scale +#define arch_scale_freq_capacity topology_get_freq_scale +#define arch_scale_freq_invariant topology_scale_freq_invariant + +/* Replace task scheduler's default cpu-invariant accounting */ +#define arch_scale_cpu_capacity topology_get_cpu_scale + +/* Enable topology flag updates */ +#define arch_update_cpu_topology topology_update_cpu_topology + +#include <asm-generic/topology.h> + +#endif /* _ASM_RISCV_TOPOLOGY_H */ diff --git a/arch/riscv/include/asm/vdso/data.h b/arch/riscv/include/asm/vdso/data.h new file mode 100644 index 000000000000..dc2f76f58b76 --- /dev/null +++ b/arch/riscv/include/asm/vdso/data.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __RISCV_ASM_VDSO_DATA_H +#define __RISCV_ASM_VDSO_DATA_H + +#include <linux/types.h> +#include <vdso/datapage.h> +#include <asm/hwprobe.h> + +struct arch_vdso_data { + /* Stash static answers to the hwprobe queries when all CPUs are selected. */ + __u64 all_cpu_hwprobe_values[RISCV_HWPROBE_MAX_KEY + 1]; + + /* Boolean indicating all CPUs have the same static hwprobe values. */ + __u8 homogeneous_cpus; +}; + +#endif /* __RISCV_ASM_VDSO_DATA_H */ diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h index 77d9c2f721c4..ba3283cf7acc 100644 --- a/arch/riscv/include/asm/vdso/gettimeofday.h +++ b/arch/riscv/include/asm/vdso/gettimeofday.h @@ -9,6 +9,12 @@ #include <asm/csr.h> #include <uapi/linux/time.h> +/* + * 32-bit land is lacking generic time vsyscalls as well as the legacy 32-bit + * time syscalls like gettimeofday. Skip these definitions since on 32-bit. + */ +#ifdef CONFIG_GENERIC_TIME_VSYSCALL + #define VDSO_HAS_CLOCK_GETRES 1 static __always_inline @@ -60,6 +66,8 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) return ret; } +#endif /* CONFIG_GENERIC_TIME_VSYSCALL */ + static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd) { diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h index 48da5371f1e9..58d3e447f191 100644 --- a/arch/riscv/include/asm/vmalloc.h +++ b/arch/riscv/include/asm/vmalloc.h @@ -17,6 +17,65 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot) return true; } -#endif +#ifdef CONFIG_RISCV_ISA_SVNAPOT +#include <linux/pgtable.h> +#define arch_vmap_pte_range_map_size arch_vmap_pte_range_map_size +static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, unsigned long end, + u64 pfn, unsigned int max_page_shift) +{ + unsigned long map_size = PAGE_SIZE; + unsigned long size, order; + + if (!has_svnapot()) + return map_size; + + for_each_napot_order_rev(order) { + if (napot_cont_shift(order) > max_page_shift) + continue; + + size = napot_cont_size(order); + if (end - addr < size) + continue; + + if (!IS_ALIGNED(addr, size)) + continue; + + if (!IS_ALIGNED(PFN_PHYS(pfn), size)) + continue; + + map_size = size; + break; + } + + return map_size; +} + +#define arch_vmap_pte_supported_shift arch_vmap_pte_supported_shift +static inline int arch_vmap_pte_supported_shift(unsigned long size) +{ + int shift = PAGE_SHIFT; + unsigned long order; + + if (!has_svnapot()) + return shift; + + WARN_ON_ONCE(size >= PMD_SIZE); + + for_each_napot_order_rev(order) { + if (napot_cont_size(order) > size) + continue; + + if (!IS_ALIGNED(size, napot_cont_size(order))) + continue; + + shift = napot_cont_shift(order); + break; + } + + return shift; +} + +#endif /* CONFIG_RISCV_ISA_SVNAPOT */ +#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ #endif /* _ASM_RISCV_VMALLOC_H */ diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h new file mode 100644 index 000000000000..8d745a4ad8a2 --- /dev/null +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright 2023 Rivos, Inc + */ + +#ifndef _UAPI_ASM_HWPROBE_H +#define _UAPI_ASM_HWPROBE_H + +#include <linux/types.h> + +/* + * Interface for probing hardware capabilities from userspace, see + * Documentation/riscv/hwprobe.rst for more information. + */ +struct riscv_hwprobe { + __s64 key; + __u64 value; +}; + +#define RISCV_HWPROBE_KEY_MVENDORID 0 +#define RISCV_HWPROBE_KEY_MARCHID 1 +#define RISCV_HWPROBE_KEY_MIMPID 2 +#define RISCV_HWPROBE_KEY_BASE_BEHAVIOR 3 +#define RISCV_HWPROBE_BASE_BEHAVIOR_IMA (1 << 0) +#define RISCV_HWPROBE_KEY_IMA_EXT_0 4 +#define RISCV_HWPROBE_IMA_FD (1 << 0) +#define RISCV_HWPROBE_IMA_C (1 << 1) +#define RISCV_HWPROBE_KEY_CPUPERF_0 5 +#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) +#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) +#define RISCV_HWPROBE_MISALIGNED_SLOW (2 << 0) +#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0) +#define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0) +#define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0) +/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ + +#endif diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 92af6f3f057c..f92790c9481a 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -12,6 +12,7 @@ #ifndef __ASSEMBLY__ #include <linux/types.h> +#include <asm/bitsperlong.h> #include <asm/ptrace.h> #define __KVM_HAVE_READONLY_MEM @@ -52,6 +53,7 @@ struct kvm_riscv_config { unsigned long mvendorid; unsigned long marchid; unsigned long mimpid; + unsigned long zicboz_block_size; }; /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ @@ -64,7 +66,7 @@ struct kvm_riscv_core { #define KVM_RISCV_MODE_S 1 #define KVM_RISCV_MODE_U 0 -/* CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +/* General CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ struct kvm_riscv_csr { unsigned long sstatus; unsigned long sie; @@ -78,6 +80,17 @@ struct kvm_riscv_csr { unsigned long scounteren; }; +/* AIA CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +struct kvm_riscv_aia_csr { + unsigned long siselect; + unsigned long iprio1; + unsigned long iprio2; + unsigned long sieh; + unsigned long siph; + unsigned long iprio1h; + unsigned long iprio2h; +}; + /* TIMER registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ struct kvm_riscv_timer { __u64 frequency; @@ -105,9 +118,29 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_SVINVAL, KVM_RISCV_ISA_EXT_ZIHINTPAUSE, KVM_RISCV_ISA_EXT_ZICBOM, + KVM_RISCV_ISA_EXT_ZICBOZ, + KVM_RISCV_ISA_EXT_ZBB, + KVM_RISCV_ISA_EXT_SSAIA, KVM_RISCV_ISA_EXT_MAX, }; +/* + * SBI extension IDs specific to KVM. This is not the same as the SBI + * extension IDs defined by the RISC-V SBI specification. + */ +enum KVM_RISCV_SBI_EXT_ID { + KVM_RISCV_SBI_EXT_V01 = 0, + KVM_RISCV_SBI_EXT_TIME, + KVM_RISCV_SBI_EXT_IPI, + KVM_RISCV_SBI_EXT_RFENCE, + KVM_RISCV_SBI_EXT_SRST, + KVM_RISCV_SBI_EXT_HSM, + KVM_RISCV_SBI_EXT_PMU, + KVM_RISCV_SBI_EXT_EXPERIMENTAL, + KVM_RISCV_SBI_EXT_VENDOR, + KVM_RISCV_SBI_EXT_MAX, +}; + /* Possible states for kvm_riscv_timer */ #define KVM_RISCV_TIMER_STATE_OFF 0 #define KVM_RISCV_TIMER_STATE_ON 1 @@ -118,6 +151,8 @@ enum KVM_RISCV_ISA_EXT_ID { /* If you need to interpret the index values, here is the key: */ #define KVM_REG_RISCV_TYPE_MASK 0x00000000FF000000 #define KVM_REG_RISCV_TYPE_SHIFT 24 +#define KVM_REG_RISCV_SUBTYPE_MASK 0x0000000000FF0000 +#define KVM_REG_RISCV_SUBTYPE_SHIFT 16 /* Config registers are mapped as type 1 */ #define KVM_REG_RISCV_CONFIG (0x01 << KVM_REG_RISCV_TYPE_SHIFT) @@ -131,8 +166,12 @@ enum KVM_RISCV_ISA_EXT_ID { /* Control and status registers are mapped as type 3 */ #define KVM_REG_RISCV_CSR (0x03 << KVM_REG_RISCV_TYPE_SHIFT) +#define KVM_REG_RISCV_CSR_GENERAL (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT) +#define KVM_REG_RISCV_CSR_AIA (0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT) #define KVM_REG_RISCV_CSR_REG(name) \ (offsetof(struct kvm_riscv_csr, name) / sizeof(unsigned long)) +#define KVM_REG_RISCV_CSR_AIA_REG(name) \ + (offsetof(struct kvm_riscv_aia_csr, name) / sizeof(unsigned long)) /* Timer registers are mapped as type 4 */ #define KVM_REG_RISCV_TIMER (0x04 << KVM_REG_RISCV_TYPE_SHIFT) @@ -152,6 +191,18 @@ enum KVM_RISCV_ISA_EXT_ID { /* ISA Extension registers are mapped as type 7 */ #define KVM_REG_RISCV_ISA_EXT (0x07 << KVM_REG_RISCV_TYPE_SHIFT) +/* SBI extension registers are mapped as type 8 */ +#define KVM_REG_RISCV_SBI_EXT (0x08 << KVM_REG_RISCV_TYPE_SHIFT) +#define KVM_REG_RISCV_SBI_SINGLE (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT) +#define KVM_REG_RISCV_SBI_MULTI_EN (0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT) +#define KVM_REG_RISCV_SBI_MULTI_DIS (0x2 << KVM_REG_RISCV_SUBTYPE_SHIFT) +#define KVM_REG_RISCV_SBI_MULTI_REG(__ext_id) \ + ((__ext_id) / __BITS_PER_LONG) +#define KVM_REG_RISCV_SBI_MULTI_MASK(__ext_id) \ + (1UL << ((__ext_id) % __BITS_PER_LONG)) +#define KVM_REG_RISCV_SBI_MULTI_REG_LAST \ + KVM_REG_RISCV_SBI_MULTI_REG(KVM_RISCV_SBI_EXT_MAX - 1) + #endif #endif /* __LINUX_KVM_RISCV_H */ diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h index 73d7cdd2ec49..950ab3fd4409 100644 --- a/arch/riscv/include/uapi/asm/unistd.h +++ b/arch/riscv/include/uapi/asm/unistd.h @@ -43,3 +43,12 @@ #define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15) #endif __SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache) + +/* + * Allows userspace to query the kernel for CPU architecture and + * microarchitecture details across a given set of CPUs. + */ +#ifndef __NR_riscv_hwprobe +#define __NR_riscv_hwprobe (__NR_arch_specific_syscall + 14) +#endif +__SYSCALL(__NR_riscv_hwprobe, sys_riscv_hwprobe) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 4cf303a779ab..fbdccc21418a 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -9,6 +9,7 @@ CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE) endif CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) +CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,) ifdef CONFIG_KEXEC AFLAGS_kexec_relocate.o := -mcmodel=medany $(call cc-option,-mno-relax) @@ -64,16 +65,16 @@ obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o obj-$(CONFIG_CPU_PM) += suspend_entry.o suspend.o +obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o -obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o - obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o obj-$(CONFIG_RISCV_SBI) += sbi.o ifeq ($(CONFIG_RISCV_SBI), y) +obj-$(CONFIG_SMP) += sbi-ipi.o obj-$(CONFIG_SMP) += cpu_ops_sbi.o endif obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o @@ -89,3 +90,5 @@ obj-$(CONFIG_EFI) += efi.o obj-$(CONFIG_COMPAT) += compat_syscall_table.o obj-$(CONFIG_COMPAT) += compat_signal.o obj-$(CONFIG_COMPAT) += compat_vdso/ + +obj-$(CONFIG_64BIT) += pi/ diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c index 2354c69dc7d1..6b75788c18e6 100644 --- a/arch/riscv/kernel/alternative.c +++ b/arch/riscv/kernel/alternative.c @@ -27,9 +27,11 @@ struct cpu_manufacturer_info_t { void (*patch_func)(struct alt_entry *begin, struct alt_entry *end, unsigned long archid, unsigned long impid, unsigned int stage); + void (*feature_probe_func)(unsigned int cpu, unsigned long archid, + unsigned long impid); }; -static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info) +static void riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info) { #ifdef CONFIG_RISCV_M_MODE cpu_mfr_info->vendor_id = csr_read(CSR_MVENDORID); @@ -41,6 +43,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf cpu_mfr_info->imp_id = sbi_get_mimpid(); #endif + cpu_mfr_info->feature_probe_func = NULL; switch (cpu_mfr_info->vendor_id) { #ifdef CONFIG_ERRATA_SIFIVE case SIFIVE_VENDOR_ID: @@ -50,6 +53,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf #ifdef CONFIG_ERRATA_THEAD case THEAD_VENDOR_ID: cpu_mfr_info->patch_func = thead_errata_patch_func; + cpu_mfr_info->feature_probe_func = thead_feature_probe_func; break; #endif default: @@ -139,6 +143,20 @@ void riscv_alternative_fix_offsets(void *alt_ptr, unsigned int len, } } +/* Called on each CPU as it starts */ +void probe_vendor_features(unsigned int cpu) +{ + struct cpu_manufacturer_info_t cpu_mfr_info; + + riscv_fill_cpu_mfr_info(&cpu_mfr_info); + if (!cpu_mfr_info.feature_probe_func) + return; + + cpu_mfr_info.feature_probe_func(cpu, + cpu_mfr_info.arch_id, + cpu_mfr_info.imp_id); +} + /* * This is called very early in the boot process (directly after we run * a feature detect on the boot CPU). No need to worry about other CPUs @@ -193,6 +211,7 @@ void __init apply_boot_alternatives(void) /* If called on non-boot cpu things could go wrong */ WARN_ON(smp_processor_id() != 0); + probe_vendor_features(0); _apply_alternatives((struct alt_entry *)__alt_start, (struct alt_entry *)__alt_end, RISCV_ALTERNATIVES_BOOT); diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index df9444397908..d6a75aac1d27 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -9,6 +9,7 @@ #include <linux/kbuild.h> #include <linux/mm.h> #include <linux/sched.h> +#include <linux/suspend.h> #include <asm/kvm_host.h> #include <asm/thread_info.h> #include <asm/ptrace.h> @@ -116,6 +117,10 @@ void asm_offsets(void) OFFSET(SUSPEND_CONTEXT_REGS, suspend_context, regs); + OFFSET(HIBERN_PBE_ADDR, pbe, address); + OFFSET(HIBERN_PBE_ORIG, pbe, orig_address); + OFFSET(HIBERN_PBE_NEXT, pbe, next); + OFFSET(KVM_ARCH_GUEST_ZERO, kvm_vcpu_arch, guest_context.zero); OFFSET(KVM_ARCH_GUEST_RA, kvm_vcpu_arch, guest_context.ra); OFFSET(KVM_ARCH_GUEST_SP, kvm_vcpu_arch, guest_context.sp); diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index e3829d2de5d9..09e9b88110d1 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -63,53 +63,12 @@ uintptr_t get_cache_geometry(u32 level, enum cache_type type) 0; } -static void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type, - unsigned int level, unsigned int size, - unsigned int sets, unsigned int line_size) +static void ci_leaf_init(struct cacheinfo *this_leaf, + struct device_node *node, + enum cache_type type, unsigned int level) { this_leaf->level = level; this_leaf->type = type; - this_leaf->size = size; - this_leaf->number_of_sets = sets; - this_leaf->coherency_line_size = line_size; - - /* - * If the cache is fully associative, there is no need to - * check the other properties. - */ - if (sets == 1) - return; - - /* - * Set the ways number for n-ways associative, make sure - * all properties are big than zero. - */ - if (sets > 0 && size > 0 && line_size > 0) - this_leaf->ways_of_associativity = (size / sets) / line_size; -} - -static void fill_cacheinfo(struct cacheinfo **this_leaf, - struct device_node *node, unsigned int level) -{ - unsigned int size, sets, line_size; - - if (!of_property_read_u32(node, "cache-size", &size) && - !of_property_read_u32(node, "cache-block-size", &line_size) && - !of_property_read_u32(node, "cache-sets", &sets)) { - ci_leaf_init((*this_leaf)++, CACHE_TYPE_UNIFIED, level, size, sets, line_size); - } - - if (!of_property_read_u32(node, "i-cache-size", &size) && - !of_property_read_u32(node, "i-cache-sets", &sets) && - !of_property_read_u32(node, "i-cache-block-size", &line_size)) { - ci_leaf_init((*this_leaf)++, CACHE_TYPE_INST, level, size, sets, line_size); - } - - if (!of_property_read_u32(node, "d-cache-size", &size) && - !of_property_read_u32(node, "d-cache-sets", &sets) && - !of_property_read_u32(node, "d-cache-block-size", &line_size)) { - ci_leaf_init((*this_leaf)++, CACHE_TYPE_DATA, level, size, sets, line_size); - } } int populate_cache_leaves(unsigned int cpu) @@ -120,24 +79,29 @@ int populate_cache_leaves(unsigned int cpu) struct device_node *prev = NULL; int levels = 1, level = 1; - /* Level 1 caches in cpu node */ - fill_cacheinfo(&this_leaf, np, level); + if (of_property_read_bool(np, "cache-size")) + ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); + if (of_property_read_bool(np, "i-cache-size")) + ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); + if (of_property_read_bool(np, "d-cache-size")) + ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); - /* Next level caches in cache nodes */ prev = np; while ((np = of_find_next_cache_node(np))) { of_node_put(prev); prev = np; - if (!of_device_is_compatible(np, "cache")) break; if (of_property_read_u32(np, "cache-level", &level)) break; if (level <= levels) break; - - fill_cacheinfo(&this_leaf, np, level); - + if (of_property_read_bool(np, "cache-size")) + ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); + if (of_property_read_bool(np, "i-cache-size")) + ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); + if (of_property_read_bool(np, "d-cache-size")) + ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); levels = level; } of_node_put(np); diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile index 260daf3236d3..189345773e7e 100644 --- a/arch/riscv/kernel/compat_vdso/Makefile +++ b/arch/riscv/kernel/compat_vdso/Makefile @@ -14,6 +14,10 @@ COMPAT_LD := $(LD) COMPAT_CC_FLAGS := -march=rv32g -mabi=ilp32 COMPAT_LD_FLAGS := -melf32lriscv +# Disable attributes, as they're useless and break the build. +COMPAT_CC_FLAGS += $(call cc-option,-mno-riscv-attribute) +COMPAT_CC_FLAGS += $(call as-option,-Wa$(comma)-mno-arch-attr) + # Files to link into the compat_vdso obj-compat_vdso = $(patsubst %, %.o, $(compat_vdso-syms)) note.o @@ -22,7 +26,7 @@ targets := $(obj-compat_vdso) compat_vdso.so compat_vdso.so.dbg compat_vdso.lds obj-compat_vdso := $(addprefix $(obj)/, $(obj-compat_vdso)) obj-y += compat_vdso.o -CPPFLAGS_compat_vdso.lds += -P -C -U$(ARCH) +CPPFLAGS_compat_vdso.lds += -P -C -DCOMPAT_VDSO -U$(ARCH) # Disable profiling and instrumentation for VDSO code GCOV_PROFILE := n diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c index f7a832e3a1d1..a941adc7cbf2 100644 --- a/arch/riscv/kernel/cpu-hotplug.c +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -13,7 +13,7 @@ #include <asm/irq.h> #include <asm/cpu_ops.h> #include <asm/numa.h> -#include <asm/sbi.h> +#include <asm/smp.h> bool cpu_has_hotplug(unsigned int cpu) { @@ -43,6 +43,7 @@ int __cpu_disable(void) remove_cpu_topology(cpu); numa_remove_cpu(cpu); set_cpu_online(cpu, false); + riscv_ipi_disable(); irq_migrate_all_off_this_cpu(); return ret; @@ -71,7 +72,7 @@ void __cpu_die(unsigned int cpu) /* * Called from the idle thread for the CPU which has been shutdown. */ -void arch_cpu_idle_dead(void) +void __noreturn arch_cpu_idle_dead(void) { idle_task_exit(); diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index 8400f0cc9704..c96aa56cf1c7 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -7,6 +7,7 @@ #include <linux/init.h> #include <linux/seq_file.h> #include <linux/of.h> +#include <asm/cpufeature.h> #include <asm/csr.h> #include <asm/hwcap.h> #include <asm/sbi.h> @@ -70,12 +71,7 @@ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid) return -1; } -struct riscv_cpuinfo { - unsigned long mvendorid; - unsigned long marchid; - unsigned long mimpid; -}; -static DEFINE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); +DEFINE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); unsigned long riscv_cached_mvendorid(unsigned int cpu_id) { @@ -186,11 +182,15 @@ arch_initcall(riscv_cpuinfo_init); */ static struct riscv_isa_ext_data isa_ext_arr[] = { __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), + __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB), + __RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA), + __RISCV_ISA_EXT_DATA(ssaia, RISCV_ISA_EXT_SSAIA), __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC), __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL), + __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT), __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT), __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX), }; diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c index 8275f237a59d..eb479a88a954 100644 --- a/arch/riscv/kernel/cpu_ops.c +++ b/arch/riscv/kernel/cpu_ops.c @@ -27,7 +27,7 @@ const struct cpu_operations cpu_ops_spinwait = { void __init cpu_set_ops(int cpuid) { #if IS_ENABLED(CONFIG_RISCV_SBI) - if (sbi_probe_extension(SBI_EXT_HSM) > 0) { + if (sbi_probe_extension(SBI_EXT_HSM)) { if (!cpuid) pr_info("SBI HSM extension detected\n"); cpu_ops[cpuid] = &cpu_ops_sbi; diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 59d58ee0f68d..b1d6b7e4b829 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -8,20 +8,16 @@ #include <linux/bitmap.h> #include <linux/ctype.h> -#include <linux/libfdt.h> #include <linux/log2.h> #include <linux/memory.h> #include <linux/module.h> #include <linux/of.h> #include <asm/alternative.h> #include <asm/cacheflush.h> -#include <asm/errata_list.h> +#include <asm/cpufeature.h> #include <asm/hwcap.h> #include <asm/patch.h> -#include <asm/pgtable.h> #include <asm/processor.h> -#include <asm/smp.h> -#include <asm/switch_to.h> #define NUM_ALPHA_EXTS ('z' - 'a' + 1) @@ -30,6 +26,9 @@ unsigned long elf_hwcap __read_mostly; /* Host ISA bitmap */ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; +/* Performance information */ +DEFINE_PER_CPU(long, misaligned_access_speed); + /** * riscv_isa_extension_base() - Get base extension word * @@ -79,6 +78,15 @@ static bool riscv_isa_extension_check(int id) return false; } return true; + case RISCV_ISA_EXT_ZICBOZ: + if (!riscv_cboz_block_size) { + pr_err("Zicboz detected in ISA string, but no cboz-block-size found\n"); + return false; + } else if (!is_power_of_2(riscv_cboz_block_size)) { + pr_err("cboz-block-size present, but is not a power-of-2\n"); + return false; + } + return true; } return true; @@ -221,12 +229,16 @@ void __init riscv_fill_hwcap(void) } } else { /* sorted alphabetically */ + SET_ISA_EXT_MAP("smaia", RISCV_ISA_EXT_SMAIA); + SET_ISA_EXT_MAP("ssaia", RISCV_ISA_EXT_SSAIA); SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF); SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC); SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL); + SET_ISA_EXT_MAP("svnapot", RISCV_ISA_EXT_SVNAPOT); SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT); SET_ISA_EXT_MAP("zbb", RISCV_ISA_EXT_ZBB); SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM); + SET_ISA_EXT_MAP("zicboz", RISCV_ISA_EXT_ZICBOZ); SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE); } #undef SET_ISA_EXT_MAP @@ -269,12 +281,46 @@ void __init riscv_fill_hwcap(void) } #ifdef CONFIG_RISCV_ALTERNATIVE +/* + * Alternative patch sites consider 48 bits when determining when to patch + * the old instruction sequence with the new. These bits are broken into a + * 16-bit vendor ID and a 32-bit patch ID. A non-zero vendor ID means the + * patch site is for an erratum, identified by the 32-bit patch ID. When + * the vendor ID is zero, the patch site is for a cpufeature. cpufeatures + * further break down patch ID into two 16-bit numbers. The lower 16 bits + * are the cpufeature ID and the upper 16 bits are used for a value specific + * to the cpufeature and patch site. If the upper 16 bits are zero, then it + * implies no specific value is specified. cpufeatures that want to control + * patching on a per-site basis will provide non-zero values and implement + * checks here. The checks return true when patching should be done, and + * false otherwise. + */ +static bool riscv_cpufeature_patch_check(u16 id, u16 value) +{ + if (!value) + return true; + + switch (id) { + case RISCV_ISA_EXT_ZICBOZ: + /* + * Zicboz alternative applications provide the maximum + * supported block size order, or zero when it doesn't + * matter. If the current block size exceeds the maximum, + * then the alternative cannot be applied. + */ + return riscv_cboz_block_size <= (1U << value); + } + + return false; +} + void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, struct alt_entry *end, unsigned int stage) { struct alt_entry *alt; void *oldptr, *altptr; + u16 id, value; if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) return; @@ -282,13 +328,19 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, for (alt = begin; alt < end; alt++) { if (alt->vendor_id != 0) continue; - if (alt->errata_id >= RISCV_ISA_EXT_MAX) { - WARN(1, "This extension id:%d is not in ISA extension list", - alt->errata_id); + + id = PATCH_ID_CPUFEATURE_ID(alt->patch_id); + + if (id >= RISCV_ISA_EXT_MAX) { + WARN(1, "This extension id:%d is not in ISA extension list", id); continue; } - if (!__riscv_isa_extension_available(NULL, alt->errata_id)) + if (!__riscv_isa_extension_available(NULL, id)) + continue; + + value = PATCH_ID_CPUFEATURE_VALUE(alt->patch_id); + if (!riscv_cpufeature_patch_check(id, value)) continue; oldptr = ALT_OLD_PTR(alt); diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S index 8e733aa48ba6..515b2dfbca75 100644 --- a/arch/riscv/kernel/efi-header.S +++ b/arch/riscv/kernel/efi-header.S @@ -6,6 +6,7 @@ #include <linux/pe.h> #include <linux/sizes.h> +#include <asm/set_memory.h> .macro __EFI_PE_HEADER .long PE_MAGIC @@ -33,7 +34,11 @@ optional_header: .byte 0x02 // MajorLinkerVersion .byte 0x14 // MinorLinkerVersion .long __pecoff_text_end - efi_header_end // SizeOfCode - .long __pecoff_data_virt_size // SizeOfInitializedData +#ifdef __clang__ + .long __pecoff_data_virt_size // SizeOfInitializedData +#else + .long __pecoff_data_virt_end - __pecoff_text_end // SizeOfInitializedData +#endif .long 0 // SizeOfUninitializedData .long __efistub_efi_pe_entry - _start // AddressOfEntryPoint .long efi_header_end - _start // BaseOfCode @@ -91,9 +96,17 @@ section_table: IMAGE_SCN_MEM_EXECUTE // Characteristics .ascii ".data\0\0\0" - .long __pecoff_data_virt_size // VirtualSize +#ifdef __clang__ + .long __pecoff_data_virt_size // VirtualSize +#else + .long __pecoff_data_virt_end - __pecoff_text_end // VirtualSize +#endif .long __pecoff_text_end - _start // VirtualAddress - .long __pecoff_data_raw_size // SizeOfRawData +#ifdef __clang__ + .long __pecoff_data_raw_size // SizeOfRawData +#else + .long __pecoff_data_raw_end - __pecoff_text_end // SizeOfRawData +#endif .long __pecoff_text_end - _start // PointerToRawData .long 0 // PointerToRelocations diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 99d38fdf8b18..3fbb100bc9e4 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -14,11 +14,7 @@ #include <asm/asm-offsets.h> #include <asm/errata_list.h> -#if !IS_ENABLED(CONFIG_PREEMPTION) -.set resume_kernel, restore_all -#endif - -ENTRY(handle_exception) +SYM_CODE_START(handle_exception) /* * If coming from userspace, preserve the user thread pointer and load * the kernel thread pointer. If we came from the kernel, the scratch @@ -46,32 +42,7 @@ _save_context: REG_S x1, PT_RA(sp) REG_S x3, PT_GP(sp) REG_S x5, PT_T0(sp) - REG_S x6, PT_T1(sp) - REG_S x7, PT_T2(sp) - REG_S x8, PT_S0(sp) - REG_S x9, PT_S1(sp) - REG_S x10, PT_A0(sp) - REG_S x11, PT_A1(sp) - REG_S x12, PT_A2(sp) - REG_S x13, PT_A3(sp) - REG_S x14, PT_A4(sp) - REG_S x15, PT_A5(sp) - REG_S x16, PT_A6(sp) - REG_S x17, PT_A7(sp) - REG_S x18, PT_S2(sp) - REG_S x19, PT_S3(sp) - REG_S x20, PT_S4(sp) - REG_S x21, PT_S5(sp) - REG_S x22, PT_S6(sp) - REG_S x23, PT_S7(sp) - REG_S x24, PT_S8(sp) - REG_S x25, PT_S9(sp) - REG_S x26, PT_S10(sp) - REG_S x27, PT_S11(sp) - REG_S x28, PT_T3(sp) - REG_S x29, PT_T4(sp) - REG_S x30, PT_T5(sp) - REG_S x31, PT_T6(sp) + save_from_x6_to_x31 /* * Disable user-mode memory access as it should only be set in the @@ -106,19 +77,8 @@ _save_context: .option norelax la gp, __global_pointer$ .option pop - -#ifdef CONFIG_TRACE_IRQFLAGS - call __trace_hardirqs_off -#endif - -#ifdef CONFIG_CONTEXT_TRACKING_USER - /* If previous state is in user mode, call user_exit_callable(). */ - li a0, SR_PP - and a0, s1, a0 - bnez a0, skip_context_tracking - call user_exit_callable -skip_context_tracking: -#endif + move a0, sp /* pt_regs */ + la ra, ret_from_exception /* * MSB of cause differentiates between @@ -126,38 +86,13 @@ skip_context_tracking: */ bge s4, zero, 1f - la ra, ret_from_exception - /* Handle interrupts */ - move a0, sp /* pt_regs */ - la a1, generic_handle_arch_irq - jr a1 + tail do_irq 1: - /* - * Exceptions run with interrupts enabled or disabled depending on the - * state of SR_PIE in m/sstatus. - */ - andi t0, s1, SR_PIE - beqz t0, 1f - /* kprobes, entered via ebreak, must have interrupts disabled. */ - li t0, EXC_BREAKPOINT - beq s4, t0, 1f -#ifdef CONFIG_TRACE_IRQFLAGS - call __trace_hardirqs_on -#endif - csrs CSR_STATUS, SR_IE - -1: - la ra, ret_from_exception - /* Handle syscalls */ - li t0, EXC_SYSCALL - beq s4, t0, handle_syscall - /* Handle other exceptions */ slli t0, s4, RISCV_LGPTR la t1, excp_vect_table la t2, excp_vect_table_end - move a0, sp /* pt_regs */ add t0, t1, t0 /* Check if exception code lies within bounds */ bgeu t0, t2, 1f @@ -165,95 +100,16 @@ skip_context_tracking: jr t0 1: tail do_trap_unknown +SYM_CODE_END(handle_exception) -handle_syscall: -#ifdef CONFIG_RISCV_M_MODE - /* - * When running is M-Mode (no MMU config), MPIE does not get set. - * As a result, we need to force enable interrupts here because - * handle_exception did not do set SR_IE as it always sees SR_PIE - * being cleared. - */ - csrs CSR_STATUS, SR_IE -#endif -#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING_USER) - /* Recover a0 - a7 for system calls */ - REG_L a0, PT_A0(sp) - REG_L a1, PT_A1(sp) - REG_L a2, PT_A2(sp) - REG_L a3, PT_A3(sp) - REG_L a4, PT_A4(sp) - REG_L a5, PT_A5(sp) - REG_L a6, PT_A6(sp) - REG_L a7, PT_A7(sp) -#endif - /* save the initial A0 value (needed in signal handlers) */ - REG_S a0, PT_ORIG_A0(sp) - /* - * Advance SEPC to avoid executing the original - * scall instruction on sret - */ - addi s2, s2, 0x4 - REG_S s2, PT_EPC(sp) - /* Trace syscalls, but only if requested by the user. */ - REG_L t0, TASK_TI_FLAGS(tp) - andi t0, t0, _TIF_SYSCALL_WORK - bnez t0, handle_syscall_trace_enter -check_syscall_nr: - /* Check to make sure we don't jump to a bogus syscall number. */ - li t0, __NR_syscalls - la s0, sys_ni_syscall - /* - * Syscall number held in a7. - * If syscall number is above allowed value, redirect to ni_syscall. - */ - bgeu a7, t0, 3f -#ifdef CONFIG_COMPAT - REG_L s0, PT_STATUS(sp) - srli s0, s0, SR_UXL_SHIFT - andi s0, s0, (SR_UXL >> SR_UXL_SHIFT) - li t0, (SR_UXL_32 >> SR_UXL_SHIFT) - sub t0, s0, t0 - bnez t0, 1f - - /* Call compat_syscall */ - la s0, compat_sys_call_table - j 2f -1: -#endif - /* Call syscall */ - la s0, sys_call_table -2: - slli t0, a7, RISCV_LGPTR - add s0, s0, t0 - REG_L s0, 0(s0) -3: - jalr s0 - -ret_from_syscall: - /* Set user a0 to kernel a0 */ - REG_S a0, PT_A0(sp) - /* - * We didn't execute the actual syscall. - * Seccomp already set return value for the current task pt_regs. - * (If it was configured with SECCOMP_RET_ERRNO/TRACE) - */ -ret_from_syscall_rejected: -#ifdef CONFIG_DEBUG_RSEQ - move a0, sp - call rseq_syscall -#endif - /* Trace syscalls, but only if requested by the user. */ - REG_L t0, TASK_TI_FLAGS(tp) - andi t0, t0, _TIF_SYSCALL_WORK - bnez t0, handle_syscall_trace_exit - +/* + * The ret_from_exception must be called with interrupt disabled. Here is the + * caller list: + * - handle_exception + * - ret_from_fork + */ SYM_CODE_START_NOALIGN(ret_from_exception) REG_L s0, PT_STATUS(sp) - csrc CSR_STATUS, SR_IE -#ifdef CONFIG_TRACE_IRQFLAGS - call __trace_hardirqs_off -#endif #ifdef CONFIG_RISCV_M_MODE /* the MPP value is too large to be used as an immediate arg for addi */ li t0, SR_MPP @@ -261,17 +117,7 @@ SYM_CODE_START_NOALIGN(ret_from_exception) #else andi s0, s0, SR_SPP #endif - bnez s0, resume_kernel -SYM_CODE_END(ret_from_exception) - - /* Interrupts must be disabled here so flags are checked atomically */ - REG_L s0, TASK_TI_FLAGS(tp) /* current_thread_info->flags */ - andi s1, s0, _TIF_WORK_MASK - bnez s1, resume_userspace_slow -resume_userspace: -#ifdef CONFIG_CONTEXT_TRACKING_USER - call user_enter_callable -#endif + bnez s0, 1f /* Save unwound kernel stack pointer in thread_info */ addi s0, sp, PT_SIZE_ON_STACK @@ -282,18 +128,7 @@ resume_userspace: * structures again. */ csrw CSR_SCRATCH, tp - -restore_all: -#ifdef CONFIG_TRACE_IRQFLAGS - REG_L s1, PT_STATUS(sp) - andi t0, s1, SR_PIE - beqz t0, 1f - call __trace_hardirqs_on - j 2f 1: - call __trace_hardirqs_off -2: -#endif REG_L a0, PT_STATUS(sp) /* * The current load reservation is effectively part of the processor's @@ -322,32 +157,7 @@ restore_all: REG_L x3, PT_GP(sp) REG_L x4, PT_TP(sp) REG_L x5, PT_T0(sp) - REG_L x6, PT_T1(sp) - REG_L x7, PT_T2(sp) - REG_L x8, PT_S0(sp) - REG_L x9, PT_S1(sp) - REG_L x10, PT_A0(sp) - REG_L x11, PT_A1(sp) - REG_L x12, PT_A2(sp) - REG_L x13, PT_A3(sp) - REG_L x14, PT_A4(sp) - REG_L x15, PT_A5(sp) - REG_L x16, PT_A6(sp) - REG_L x17, PT_A7(sp) - REG_L x18, PT_S2(sp) - REG_L x19, PT_S3(sp) - REG_L x20, PT_S4(sp) - REG_L x21, PT_S5(sp) - REG_L x22, PT_S6(sp) - REG_L x23, PT_S7(sp) - REG_L x24, PT_S8(sp) - REG_L x25, PT_S9(sp) - REG_L x26, PT_S10(sp) - REG_L x27, PT_S11(sp) - REG_L x28, PT_T3(sp) - REG_L x29, PT_T4(sp) - REG_L x30, PT_T5(sp) - REG_L x31, PT_T6(sp) + restore_from_x6_to_x31 REG_L x2, PT_SP(sp) @@ -356,47 +166,10 @@ restore_all: #else sret #endif - -#if IS_ENABLED(CONFIG_PREEMPTION) -resume_kernel: - REG_L s0, TASK_TI_PREEMPT_COUNT(tp) - bnez s0, restore_all - REG_L s0, TASK_TI_FLAGS(tp) - andi s0, s0, _TIF_NEED_RESCHED - beqz s0, restore_all - call preempt_schedule_irq - j restore_all -#endif - -resume_userspace_slow: - /* Enter slow path for supplementary processing */ - move a0, sp /* pt_regs */ - move a1, s0 /* current_thread_info->flags */ - call do_work_pending - j resume_userspace - -/* Slow paths for ptrace. */ -handle_syscall_trace_enter: - move a0, sp - call do_syscall_trace_enter - move t0, a0 - REG_L a0, PT_A0(sp) - REG_L a1, PT_A1(sp) - REG_L a2, PT_A2(sp) - REG_L a3, PT_A3(sp) - REG_L a4, PT_A4(sp) - REG_L a5, PT_A5(sp) - REG_L a6, PT_A6(sp) - REG_L a7, PT_A7(sp) - bnez t0, ret_from_syscall_rejected - j check_syscall_nr -handle_syscall_trace_exit: - move a0, sp - call do_syscall_trace_exit - j ret_from_exception +SYM_CODE_END(ret_from_exception) #ifdef CONFIG_VMAP_STACK -handle_kernel_stack_overflow: +SYM_CODE_START_LOCAL(handle_kernel_stack_overflow) /* * Takes the psuedo-spinlock for the shadow stack, in case multiple * harts are concurrently overflowing their kernel stacks. We could @@ -464,32 +237,7 @@ restore_caller_reg: REG_S x1, PT_RA(sp) REG_S x3, PT_GP(sp) REG_S x5, PT_T0(sp) - REG_S x6, PT_T1(sp) - REG_S x7, PT_T2(sp) - REG_S x8, PT_S0(sp) - REG_S x9, PT_S1(sp) - REG_S x10, PT_A0(sp) - REG_S x11, PT_A1(sp) - REG_S x12, PT_A2(sp) - REG_S x13, PT_A3(sp) - REG_S x14, PT_A4(sp) - REG_S x15, PT_A5(sp) - REG_S x16, PT_A6(sp) - REG_S x17, PT_A7(sp) - REG_S x18, PT_S2(sp) - REG_S x19, PT_S3(sp) - REG_S x20, PT_S4(sp) - REG_S x21, PT_S5(sp) - REG_S x22, PT_S6(sp) - REG_S x23, PT_S7(sp) - REG_S x24, PT_S8(sp) - REG_S x25, PT_S9(sp) - REG_S x26, PT_S10(sp) - REG_S x27, PT_S11(sp) - REG_S x28, PT_T3(sp) - REG_S x29, PT_T4(sp) - REG_S x30, PT_T5(sp) - REG_S x31, PT_T6(sp) + save_from_x6_to_x31 REG_L s0, TASK_TI_KERNEL_SP(tp) csrr s1, CSR_STATUS @@ -505,23 +253,20 @@ restore_caller_reg: REG_S s5, PT_TP(sp) move a0, sp tail handle_bad_stack +SYM_CODE_END(handle_kernel_stack_overflow) #endif -END(handle_exception) - -ENTRY(ret_from_fork) - la ra, ret_from_exception - tail schedule_tail -ENDPROC(ret_from_fork) - -ENTRY(ret_from_kernel_thread) +SYM_CODE_START(ret_from_fork) call schedule_tail + beqz s0, 1f /* not from kernel thread */ /* Call fn(arg) */ - la ra, ret_from_exception move a0, s1 - jr s0 -ENDPROC(ret_from_kernel_thread) - + jalr s0 +1: + move a0, sp /* pt_regs */ + la ra, ret_from_exception + tail syscall_exit_to_user_mode +SYM_CODE_END(ret_from_fork) /* * Integer register context switch @@ -533,7 +278,7 @@ ENDPROC(ret_from_kernel_thread) * The value of a0 and a1 must be preserved by this function, as that's how * arguments are passed to schedule_tail. */ -ENTRY(__switch_to) +SYM_FUNC_START(__switch_to) /* Save context into prev->thread */ li a4, TASK_THREAD_RA add a3, a0, a4 @@ -570,7 +315,7 @@ ENTRY(__switch_to) /* The offset of thread_info in task_struct is zero. */ move tp, a1 ret -ENDPROC(__switch_to) +SYM_FUNC_END(__switch_to) #ifndef CONFIG_MMU #define do_page_fault do_trap_unknown @@ -579,7 +324,7 @@ ENDPROC(__switch_to) .section ".rodata" .align LGREG /* Exception vector table */ -ENTRY(excp_vect_table) +SYM_CODE_START(excp_vect_table) RISCV_PTR do_trap_insn_misaligned ALT_INSN_FAULT(RISCV_PTR do_trap_insn_fault) RISCV_PTR do_trap_insn_illegal @@ -588,7 +333,7 @@ ENTRY(excp_vect_table) RISCV_PTR do_trap_load_fault RISCV_PTR do_trap_store_misaligned RISCV_PTR do_trap_store_fault - RISCV_PTR do_trap_ecall_u /* system call, gets intercepted */ + RISCV_PTR do_trap_ecall_u /* system call */ RISCV_PTR do_trap_ecall_s RISCV_PTR do_trap_unknown RISCV_PTR do_trap_ecall_m @@ -598,11 +343,11 @@ ENTRY(excp_vect_table) RISCV_PTR do_trap_unknown RISCV_PTR do_page_fault /* store page fault */ excp_vect_table_end: -END(excp_vect_table) +SYM_CODE_END(excp_vect_table) #ifndef CONFIG_MMU -ENTRY(__user_rt_sigreturn) +SYM_CODE_START(__user_rt_sigreturn) li a7, __NR_rt_sigreturn scall -END(__user_rt_sigreturn) +SYM_CODE_END(__user_rt_sigreturn) #endif diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 5bff37af4770..03a6434a8cdd 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -15,10 +15,19 @@ void ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex) { mutex_lock(&text_mutex); + + /* + * The code sequences we use for ftrace can't be patched while the + * kernel is running, so we need to use stop_machine() to modify them + * for now. This doesn't play nice with text_mutex, we use this flag + * to elide the check. + */ + riscv_patch_in_stop_machine = true; } void ftrace_arch_code_modify_post_process(void) __releases(&text_mutex) { + riscv_patch_in_stop_machine = false; mutex_unlock(&text_mutex); } @@ -107,9 +116,9 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) { int out; - ftrace_arch_code_modify_prepare(); + mutex_lock(&text_mutex); out = ftrace_make_nop(mod, rec, MCOUNT_ADDR); - ftrace_arch_code_modify_post_process(); + mutex_unlock(&text_mutex); return out; } diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h index 726731ada534..a556fdaafed9 100644 --- a/arch/riscv/kernel/head.h +++ b/arch/riscv/kernel/head.h @@ -10,7 +10,6 @@ extern atomic_t hart_lottery; -asmlinkage void do_page_fault(struct pt_regs *regs); asmlinkage void __init setup_vm(uintptr_t dtb_pa); #ifdef CONFIG_XIP_KERNEL asmlinkage void __init __copy_data(void); diff --git a/arch/riscv/kernel/hibernate-asm.S b/arch/riscv/kernel/hibernate-asm.S new file mode 100644 index 000000000000..effaf5ca5da0 --- /dev/null +++ b/arch/riscv/kernel/hibernate-asm.S @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Hibernation low level support for RISCV. + * + * Copyright (C) 2023 StarFive Technology Co., Ltd. + * + * Author: Jee Heng Sia <jeeheng.sia@starfivetech.com> + */ + +#include <asm/asm.h> +#include <asm/asm-offsets.h> +#include <asm/assembler.h> +#include <asm/csr.h> + +#include <linux/linkage.h> + +/* + * int __hibernate_cpu_resume(void) + * Switch back to the hibernated image's page table prior to restoring the CPU + * context. + * + * Always returns 0 + */ +ENTRY(__hibernate_cpu_resume) + /* switch to hibernated image's page table. */ + csrw CSR_SATP, s0 + sfence.vma + + REG_L a0, hibernate_cpu_context + + suspend_restore_csrs + suspend_restore_regs + + /* Return zero value. */ + mv a0, zero + + ret +END(__hibernate_cpu_resume) + +/* + * Prepare to restore the image. + * a0: satp of saved page tables. + * a1: satp of temporary page tables. + * a2: cpu_resume. + */ +ENTRY(hibernate_restore_image) + mv s0, a0 + mv s1, a1 + mv s2, a2 + REG_L s4, restore_pblist + REG_L a1, relocated_restore_code + + jalr a1 +END(hibernate_restore_image) + +/* + * The below code will be executed from a 'safe' page. + * It first switches to the temporary page table, then starts to copy the pages + * back to the original memory location. Finally, it jumps to __hibernate_cpu_resume() + * to restore the CPU context. + */ +ENTRY(hibernate_core_restore_code) + /* switch to temp page table. */ + csrw satp, s1 + sfence.vma +.Lcopy: + /* The below code will restore the hibernated image. */ + REG_L a1, HIBERN_PBE_ADDR(s4) + REG_L a0, HIBERN_PBE_ORIG(s4) + + copy_page a0, a1 + + REG_L s4, HIBERN_PBE_NEXT(s4) + bnez s4, .Lcopy + + jalr s2 +END(hibernate_core_restore_code) diff --git a/arch/riscv/kernel/hibernate.c b/arch/riscv/kernel/hibernate.c new file mode 100644 index 000000000000..264b2dcdd67e --- /dev/null +++ b/arch/riscv/kernel/hibernate.c @@ -0,0 +1,427 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Hibernation support for RISCV + * + * Copyright (C) 2023 StarFive Technology Co., Ltd. + * + * Author: Jee Heng Sia <jeeheng.sia@starfivetech.com> + */ + +#include <asm/barrier.h> +#include <asm/cacheflush.h> +#include <asm/mmu_context.h> +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/sections.h> +#include <asm/set_memory.h> +#include <asm/smp.h> +#include <asm/suspend.h> + +#include <linux/cpu.h> +#include <linux/memblock.h> +#include <linux/pm.h> +#include <linux/sched.h> +#include <linux/suspend.h> +#include <linux/utsname.h> + +/* The logical cpu number we should resume on, initialised to a non-cpu number. */ +static int sleep_cpu = -EINVAL; + +/* Pointer to the temporary resume page table. */ +static pgd_t *resume_pg_dir; + +/* CPU context to be saved. */ +struct suspend_context *hibernate_cpu_context; +EXPORT_SYMBOL_GPL(hibernate_cpu_context); + +unsigned long relocated_restore_code; +EXPORT_SYMBOL_GPL(relocated_restore_code); + +/** + * struct arch_hibernate_hdr_invariants - container to store kernel build version. + * @uts_version: to save the build number and date so that we do not resume with + * a different kernel. + */ +struct arch_hibernate_hdr_invariants { + char uts_version[__NEW_UTS_LEN + 1]; +}; + +/** + * struct arch_hibernate_hdr - helper parameters that help us to restore the image. + * @invariants: container to store kernel build version. + * @hartid: to make sure same boot_cpu executes the hibernate/restore code. + * @saved_satp: original page table used by the hibernated image. + * @restore_cpu_addr: the kernel's image address to restore the CPU context. + */ +static struct arch_hibernate_hdr { + struct arch_hibernate_hdr_invariants invariants; + unsigned long hartid; + unsigned long saved_satp; + unsigned long restore_cpu_addr; +} resume_hdr; + +static void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i) +{ + memset(i, 0, sizeof(*i)); + memcpy(i->uts_version, init_utsname()->version, sizeof(i->uts_version)); +} + +/* + * Check if the given pfn is in the 'nosave' section. + */ +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = sym_to_pfn(&__nosave_begin); + unsigned long nosave_end_pfn = sym_to_pfn(&__nosave_end - 1); + + return ((pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn)); +} + +void notrace save_processor_state(void) +{ + WARN_ON(num_online_cpus() != 1); +} + +void notrace restore_processor_state(void) +{ +} + +/* + * Helper parameters need to be saved to the hibernation image header. + */ +int arch_hibernation_header_save(void *addr, unsigned int max_size) +{ + struct arch_hibernate_hdr *hdr = addr; + + if (max_size < sizeof(*hdr)) + return -EOVERFLOW; + + arch_hdr_invariants(&hdr->invariants); + + hdr->hartid = cpuid_to_hartid_map(sleep_cpu); + hdr->saved_satp = csr_read(CSR_SATP); + hdr->restore_cpu_addr = (unsigned long)__hibernate_cpu_resume; + + return 0; +} +EXPORT_SYMBOL_GPL(arch_hibernation_header_save); + +/* + * Retrieve the helper parameters from the hibernation image header. + */ +int arch_hibernation_header_restore(void *addr) +{ + struct arch_hibernate_hdr_invariants invariants; + struct arch_hibernate_hdr *hdr = addr; + int ret = 0; + + arch_hdr_invariants(&invariants); + + if (memcmp(&hdr->invariants, &invariants, sizeof(invariants))) { + pr_crit("Hibernate image not generated by this kernel!\n"); + return -EINVAL; + } + + sleep_cpu = riscv_hartid_to_cpuid(hdr->hartid); + if (sleep_cpu < 0) { + pr_crit("Hibernated on a CPU not known to this kernel!\n"); + sleep_cpu = -EINVAL; + return -EINVAL; + } + +#ifdef CONFIG_SMP + ret = bringup_hibernate_cpu(sleep_cpu); + if (ret) { + sleep_cpu = -EINVAL; + return ret; + } +#endif + resume_hdr = *hdr; + + return ret; +} +EXPORT_SYMBOL_GPL(arch_hibernation_header_restore); + +int swsusp_arch_suspend(void) +{ + int ret = 0; + + if (__cpu_suspend_enter(hibernate_cpu_context)) { + sleep_cpu = smp_processor_id(); + suspend_save_csrs(hibernate_cpu_context); + ret = swsusp_save(); + } else { + suspend_restore_csrs(hibernate_cpu_context); + flush_tlb_all(); + flush_icache_all(); + + /* + * Tell the hibernation core that we've just restored the memory. + */ + in_suspend = 0; + sleep_cpu = -EINVAL; + } + + return ret; +} + +static int temp_pgtable_map_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start, + unsigned long end, pgprot_t prot) +{ + pte_t *src_ptep; + pte_t *dst_ptep; + + if (pmd_none(READ_ONCE(*dst_pmdp))) { + dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC); + if (!dst_ptep) + return -ENOMEM; + + pmd_populate_kernel(NULL, dst_pmdp, dst_ptep); + } + + dst_ptep = pte_offset_kernel(dst_pmdp, start); + src_ptep = pte_offset_kernel(src_pmdp, start); + + do { + pte_t pte = READ_ONCE(*src_ptep); + + if (pte_present(pte)) + set_pte(dst_ptep, __pte(pte_val(pte) | pgprot_val(prot))); + } while (dst_ptep++, src_ptep++, start += PAGE_SIZE, start < end); + + return 0; +} + +static int temp_pgtable_map_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start, + unsigned long end, pgprot_t prot) +{ + unsigned long next; + unsigned long ret; + pmd_t *src_pmdp; + pmd_t *dst_pmdp; + + if (pud_none(READ_ONCE(*dst_pudp))) { + dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pmdp) + return -ENOMEM; + + pud_populate(NULL, dst_pudp, dst_pmdp); + } + + dst_pmdp = pmd_offset(dst_pudp, start); + src_pmdp = pmd_offset(src_pudp, start); + + do { + pmd_t pmd = READ_ONCE(*src_pmdp); + + next = pmd_addr_end(start, end); + + if (pmd_none(pmd)) + continue; + + if (pmd_leaf(pmd)) { + set_pmd(dst_pmdp, __pmd(pmd_val(pmd) | pgprot_val(prot))); + } else { + ret = temp_pgtable_map_pte(dst_pmdp, src_pmdp, start, next, prot); + if (ret) + return -ENOMEM; + } + } while (dst_pmdp++, src_pmdp++, start = next, start != end); + + return 0; +} + +static int temp_pgtable_map_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start, + unsigned long end, pgprot_t prot) +{ + unsigned long next; + unsigned long ret; + pud_t *dst_pudp; + pud_t *src_pudp; + + if (p4d_none(READ_ONCE(*dst_p4dp))) { + dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pudp) + return -ENOMEM; + + p4d_populate(NULL, dst_p4dp, dst_pudp); + } + + dst_pudp = pud_offset(dst_p4dp, start); + src_pudp = pud_offset(src_p4dp, start); + + do { + pud_t pud = READ_ONCE(*src_pudp); + + next = pud_addr_end(start, end); + + if (pud_none(pud)) + continue; + + if (pud_leaf(pud)) { + set_pud(dst_pudp, __pud(pud_val(pud) | pgprot_val(prot))); + } else { + ret = temp_pgtable_map_pmd(dst_pudp, src_pudp, start, next, prot); + if (ret) + return -ENOMEM; + } + } while (dst_pudp++, src_pudp++, start = next, start != end); + + return 0; +} + +static int temp_pgtable_map_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start, + unsigned long end, pgprot_t prot) +{ + unsigned long next; + unsigned long ret; + p4d_t *dst_p4dp; + p4d_t *src_p4dp; + + if (pgd_none(READ_ONCE(*dst_pgdp))) { + dst_p4dp = (p4d_t *)get_safe_page(GFP_ATOMIC); + if (!dst_p4dp) + return -ENOMEM; + + pgd_populate(NULL, dst_pgdp, dst_p4dp); + } + + dst_p4dp = p4d_offset(dst_pgdp, start); + src_p4dp = p4d_offset(src_pgdp, start); + + do { + p4d_t p4d = READ_ONCE(*src_p4dp); + + next = p4d_addr_end(start, end); + + if (p4d_none(p4d)) + continue; + + if (p4d_leaf(p4d)) { + set_p4d(dst_p4dp, __p4d(p4d_val(p4d) | pgprot_val(prot))); + } else { + ret = temp_pgtable_map_pud(dst_p4dp, src_p4dp, start, next, prot); + if (ret) + return -ENOMEM; + } + } while (dst_p4dp++, src_p4dp++, start = next, start != end); + + return 0; +} + +static int temp_pgtable_mapping(pgd_t *pgdp, unsigned long start, unsigned long end, pgprot_t prot) +{ + pgd_t *dst_pgdp = pgd_offset_pgd(pgdp, start); + pgd_t *src_pgdp = pgd_offset_k(start); + unsigned long next; + unsigned long ret; + + do { + pgd_t pgd = READ_ONCE(*src_pgdp); + + next = pgd_addr_end(start, end); + + if (pgd_none(pgd)) + continue; + + if (pgd_leaf(pgd)) { + set_pgd(dst_pgdp, __pgd(pgd_val(pgd) | pgprot_val(prot))); + } else { + ret = temp_pgtable_map_p4d(dst_pgdp, src_pgdp, start, next, prot); + if (ret) + return -ENOMEM; + } + } while (dst_pgdp++, src_pgdp++, start = next, start != end); + + return 0; +} + +static unsigned long relocate_restore_code(void) +{ + void *page = (void *)get_safe_page(GFP_ATOMIC); + + if (!page) + return -ENOMEM; + + copy_page(page, hibernate_core_restore_code); + + /* Make the page containing the relocated code executable. */ + set_memory_x((unsigned long)page, 1); + + return (unsigned long)page; +} + +int swsusp_arch_resume(void) +{ + unsigned long end = (unsigned long)pfn_to_virt(max_low_pfn); + unsigned long start = PAGE_OFFSET; + int ret; + + /* + * Memory allocated by get_safe_page() will be dealt with by the hibernation core, + * we don't need to free it here. + */ + resume_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC); + if (!resume_pg_dir) + return -ENOMEM; + + /* + * Create a temporary page table and map the whole linear region as executable and + * writable. + */ + ret = temp_pgtable_mapping(resume_pg_dir, start, end, __pgprot(_PAGE_WRITE | _PAGE_EXEC)); + if (ret) + return ret; + + /* Move the restore code to a new page so that it doesn't get overwritten by itself. */ + relocated_restore_code = relocate_restore_code(); + if (relocated_restore_code == -ENOMEM) + return -ENOMEM; + + /* + * Map the __hibernate_cpu_resume() address to the temporary page table so that the + * restore code can jumps to it after finished restore the image. The next execution + * code doesn't find itself in a different address space after switching over to the + * original page table used by the hibernated image. + * The __hibernate_cpu_resume() mapping is unnecessary for RV32 since the kernel and + * linear addresses are identical, but different for RV64. To ensure consistency, we + * map it for both RV32 and RV64 kernels. + * Additionally, we should ensure that the page is writable before restoring the image. + */ + start = (unsigned long)resume_hdr.restore_cpu_addr; + end = start + PAGE_SIZE; + + ret = temp_pgtable_mapping(resume_pg_dir, start, end, __pgprot(_PAGE_WRITE)); + if (ret) + return ret; + + hibernate_restore_image(resume_hdr.saved_satp, (PFN_DOWN(__pa(resume_pg_dir)) | satp_mode), + resume_hdr.restore_cpu_addr); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP_SMP +int hibernate_resume_nonboot_cpu_disable(void) +{ + if (sleep_cpu < 0) { + pr_err("Failing to resume from hibernate on an unknown CPU\n"); + return -ENODEV; + } + + return freeze_secondary_cpus(sleep_cpu); +} +#endif + +static int __init riscv_hibernate_init(void) +{ + hibernate_cpu_context = kzalloc(sizeof(*hibernate_cpu_context), GFP_KERNEL); + + if (WARN_ON(!hibernate_cpu_context)) + return -ENOMEM; + + return 0; +} + +early_initcall(riscv_hibernate_init); diff --git a/arch/riscv/kernel/image-vars.h b/arch/riscv/kernel/image-vars.h index 7e2962ef73f9..15616155008c 100644 --- a/arch/riscv/kernel/image-vars.h +++ b/arch/riscv/kernel/image-vars.h @@ -23,8 +23,6 @@ * linked at. The routines below are all implemented in assembler in a * position independent manner */ -__efistub_strcmp = strcmp; - __efistub__start = _start; __efistub__start_kernel = _start_kernel; __efistub__end = _end; diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index 7207fa08d78f..eb9a68a539e6 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -7,8 +7,26 @@ #include <linux/interrupt.h> #include <linux/irqchip.h> +#include <linux/irqdomain.h> +#include <linux/module.h> #include <linux/seq_file.h> -#include <asm/smp.h> +#include <asm/sbi.h> + +static struct fwnode_handle *(*__get_intc_node)(void); + +void riscv_set_intc_hwnode_fn(struct fwnode_handle *(*fn)(void)) +{ + __get_intc_node = fn; +} + +struct fwnode_handle *riscv_get_intc_hwnode(void) +{ + if (__get_intc_node) + return __get_intc_node(); + + return NULL; +} +EXPORT_SYMBOL_GPL(riscv_get_intc_hwnode); int arch_show_interrupts(struct seq_file *p, int prec) { @@ -21,4 +39,5 @@ void __init init_IRQ(void) irqchip_init(); if (!handle_arch_irq) panic("No interrupt controller found."); + sbi_ipi_init(); } diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index 125de818d1ba..669b8697aa38 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -66,66 +66,17 @@ REG_S x3, PT_GP(sp) REG_S x4, PT_TP(sp) REG_S x5, PT_T0(sp) - REG_S x6, PT_T1(sp) - REG_S x7, PT_T2(sp) - REG_S x8, PT_S0(sp) - REG_S x9, PT_S1(sp) - REG_S x10, PT_A0(sp) - REG_S x11, PT_A1(sp) - REG_S x12, PT_A2(sp) - REG_S x13, PT_A3(sp) - REG_S x14, PT_A4(sp) - REG_S x15, PT_A5(sp) - REG_S x16, PT_A6(sp) - REG_S x17, PT_A7(sp) - REG_S x18, PT_S2(sp) - REG_S x19, PT_S3(sp) - REG_S x20, PT_S4(sp) - REG_S x21, PT_S5(sp) - REG_S x22, PT_S6(sp) - REG_S x23, PT_S7(sp) - REG_S x24, PT_S8(sp) - REG_S x25, PT_S9(sp) - REG_S x26, PT_S10(sp) - REG_S x27, PT_S11(sp) - REG_S x28, PT_T3(sp) - REG_S x29, PT_T4(sp) - REG_S x30, PT_T5(sp) - REG_S x31, PT_T6(sp) + save_from_x6_to_x31 .endm .macro RESTORE_ALL - REG_L t0, PT_EPC(sp) REG_L x1, PT_RA(sp) REG_L x2, PT_SP(sp) REG_L x3, PT_GP(sp) REG_L x4, PT_TP(sp) - REG_L x6, PT_T1(sp) - REG_L x7, PT_T2(sp) - REG_L x8, PT_S0(sp) - REG_L x9, PT_S1(sp) - REG_L x10, PT_A0(sp) - REG_L x11, PT_A1(sp) - REG_L x12, PT_A2(sp) - REG_L x13, PT_A3(sp) - REG_L x14, PT_A4(sp) - REG_L x15, PT_A5(sp) - REG_L x16, PT_A6(sp) - REG_L x17, PT_A7(sp) - REG_L x18, PT_S2(sp) - REG_L x19, PT_S3(sp) - REG_L x20, PT_S4(sp) - REG_L x21, PT_S5(sp) - REG_L x22, PT_S6(sp) - REG_L x23, PT_S7(sp) - REG_L x24, PT_S8(sp) - REG_L x25, PT_S9(sp) - REG_L x26, PT_S10(sp) - REG_L x27, PT_S11(sp) - REG_L x28, PT_T3(sp) - REG_L x29, PT_T4(sp) - REG_L x30, PT_T5(sp) - REG_L x31, PT_T6(sp) + /* Restore t0 with PT_EPC */ + REG_L x5, PT_EPC(sp) + restore_from_x6_to_x31 addi sp, sp, PT_SIZE_ON_STACK .endm diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 8086d1a281cd..575e71d6c8ae 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -11,6 +11,7 @@ #include <asm/kprobes.h> #include <asm/cacheflush.h> #include <asm/fixmap.h> +#include <asm/ftrace.h> #include <asm/patch.h> struct patch_insn { @@ -20,6 +21,8 @@ struct patch_insn { atomic_t cpu_count; }; +int riscv_patch_in_stop_machine = false; + #ifdef CONFIG_MMU /* * The fix_to_virt(, idx) needs a const value (not a dynamic variable of @@ -60,8 +63,15 @@ static int patch_insn_write(void *addr, const void *insn, size_t len) * Before reaching here, it was expected to lock the text_mutex * already, so we don't need to give another lock here and could * ensure that it was safe between each cores. + * + * We're currently using stop_machine() for ftrace & kprobes, and while + * that ensures text_mutex is held before installing the mappings it + * does not ensure text_mutex is held by the calling thread. That's + * safe but triggers a lockdep failure, so just elide it for that + * specific case. */ - lockdep_assert_held(&text_mutex); + if (!riscv_patch_in_stop_machine) + lockdep_assert_held(&text_mutex); if (across_pages) patch_map(addr + len, FIX_TEXT_POKE1); @@ -125,6 +135,7 @@ NOKPROBE_SYMBOL(patch_text_cb); int patch_text(void *addr, u32 *insns, int ninsns) { + int ret; struct patch_insn patch = { .addr = addr, .insns = insns, @@ -132,7 +143,18 @@ int patch_text(void *addr, u32 *insns, int ninsns) .cpu_count = ATOMIC_INIT(0), }; - return stop_machine_cpuslocked(patch_text_cb, - &patch, cpu_online_mask); + /* + * kprobes takes text_mutex, before calling patch_text(), but as we call + * calls stop_machine(), the lockdep assertion in patch_insn_write() + * gets confused by the context in which the lock is taken. + * Instead, ensure the lock is held before calling stop_machine(), and + * set riscv_patch_in_stop_machine to skip the check in + * patch_insn_write(). + */ + lockdep_assert_held(&text_mutex); + riscv_patch_in_stop_machine = true; + ret = stop_machine_cpuslocked(patch_text_cb, &patch, cpu_online_mask); + riscv_patch_in_stop_machine = false; + return ret; } NOKPROBE_SYMBOL(patch_text); diff --git a/arch/riscv/kernel/pi/Makefile b/arch/riscv/kernel/pi/Makefile new file mode 100644 index 000000000000..7b593d44c712 --- /dev/null +++ b/arch/riscv/kernel/pi/Makefile @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: GPL-2.0 +# This file was copied from arm64/kernel/pi/Makefile. + +KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \ + -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \ + $(call cc-option,-mbranch-protection=none) \ + -I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \ + -D__DISABLE_EXPORTS -ffreestanding \ + -fno-asynchronous-unwind-tables -fno-unwind-tables \ + $(call cc-option,-fno-addrsig) + +KBUILD_CFLAGS += -mcmodel=medany + +CFLAGS_cmdline_early.o += -D__NO_FORTIFY +CFLAGS_lib-fdt_ro.o += -D__NO_FORTIFY + +GCOV_PROFILE := n +KASAN_SANITIZE := n +KCSAN_SANITIZE := n +UBSAN_SANITIZE := n +KCOV_INSTRUMENT := n + +$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \ + --remove-section=.note.gnu.property \ + --prefix-alloc-sections=.init.pi +$(obj)/%.pi.o: $(obj)/%.o FORCE + $(call if_changed,objcopy) + +$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE + $(call if_changed_rule,cc_o_c) + +$(obj)/string.o: $(srctree)/lib/string.c FORCE + $(call if_changed_rule,cc_o_c) + +$(obj)/ctype.o: $(srctree)/lib/ctype.c FORCE + $(call if_changed_rule,cc_o_c) + +obj-y := cmdline_early.pi.o string.pi.o ctype.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o +extra-y := $(patsubst %.pi.o,%.o,$(obj-y)) diff --git a/arch/riscv/kernel/pi/cmdline_early.c b/arch/riscv/kernel/pi/cmdline_early.c new file mode 100644 index 000000000000..05652d13c746 --- /dev/null +++ b/arch/riscv/kernel/pi/cmdline_early.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/types.h> +#include <linux/init.h> +#include <linux/libfdt.h> +#include <linux/string.h> +#include <asm/pgtable.h> +#include <asm/setup.h> + +static char early_cmdline[COMMAND_LINE_SIZE]; + +/* + * Declare the functions that are exported (but prefixed) here so that LLVM + * does not complain it lacks the 'static' keyword (which, if added, makes + * LLVM complain because the function is actually unused in this file). + */ +u64 set_satp_mode_from_cmdline(uintptr_t dtb_pa); + +static char *get_early_cmdline(uintptr_t dtb_pa) +{ + const char *fdt_cmdline = NULL; + unsigned int fdt_cmdline_size = 0; + int chosen_node; + + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) { + chosen_node = fdt_path_offset((void *)dtb_pa, "/chosen"); + if (chosen_node >= 0) { + fdt_cmdline = fdt_getprop((void *)dtb_pa, chosen_node, + "bootargs", NULL); + if (fdt_cmdline) { + fdt_cmdline_size = strlen(fdt_cmdline); + strscpy(early_cmdline, fdt_cmdline, + COMMAND_LINE_SIZE); + } + } + } + + if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || + IS_ENABLED(CONFIG_CMDLINE_FORCE) || + fdt_cmdline_size == 0 /* CONFIG_CMDLINE_FALLBACK */) { + strncat(early_cmdline, CONFIG_CMDLINE, + COMMAND_LINE_SIZE - fdt_cmdline_size); + } + + return early_cmdline; +} + +static u64 match_noXlvl(char *cmdline) +{ + if (strstr(cmdline, "no4lvl")) + return SATP_MODE_48; + else if (strstr(cmdline, "no5lvl")) + return SATP_MODE_57; + + return 0; +} + +u64 set_satp_mode_from_cmdline(uintptr_t dtb_pa) +{ + char *cmdline = get_early_cmdline(dtb_pa); + + return match_noXlvl(cmdline); +} diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 774ffde386ab..e2a060066730 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -34,7 +34,6 @@ EXPORT_SYMBOL(__stack_chk_guard); #endif extern asmlinkage void ret_from_fork(void); -extern asmlinkage void ret_from_kernel_thread(void); void arch_cpu_idle(void) { @@ -173,7 +172,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) /* Supervisor/Machine, irqs on: */ childregs->status = SR_PP | SR_PIE; - p->thread.ra = (unsigned long)ret_from_kernel_thread; p->thread.s[0] = (unsigned long)args->fn; p->thread.s[1] = (unsigned long)args->fn_arg; } else { @@ -183,8 +181,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (clone_flags & CLONE_SETTLS) childregs->tp = tls; childregs->a0 = 0; /* Return value of fork() */ - p->thread.ra = (unsigned long)ret_from_fork; + p->thread.s[0] = 0; } + p->thread.ra = (unsigned long)ret_from_fork; p->thread.sp = (unsigned long)childregs; /* kernel sp */ return 0; } diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 2ae8280ae475..23c48b14a0e7 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -19,9 +19,6 @@ #include <linux/sched.h> #include <linux/sched/task_stack.h> -#define CREATE_TRACE_POINTS -#include <trace/events/syscalls.h> - enum riscv_regset { REGSET_X, #ifdef CONFIG_FPU @@ -212,7 +209,6 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) void ptrace_disable(struct task_struct *child) { - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } long arch_ptrace(struct task_struct *child, long request, @@ -229,46 +225,6 @@ long arch_ptrace(struct task_struct *child, long request, return ret; } -/* - * Allows PTRACE_SYSCALL to work. These are called from entry.S in - * {handle,ret_from}_syscall. - */ -__visible int do_syscall_trace_enter(struct pt_regs *regs) -{ - if (test_thread_flag(TIF_SYSCALL_TRACE)) - if (ptrace_report_syscall_entry(regs)) - return -1; - - /* - * Do the secure computing after ptrace; failures should be fast. - * If this fails we might have return value in a0 from seccomp - * (via SECCOMP_RET_ERRNO/TRACE). - */ - if (secure_computing() == -1) - return -1; - -#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS - if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) - trace_sys_enter(regs, syscall_get_nr(current, regs)); -#endif - - audit_syscall_entry(regs->a7, regs->a0, regs->a1, regs->a2, regs->a3); - return 0; -} - -__visible void do_syscall_trace_exit(struct pt_regs *regs) -{ - audit_syscall_exit(regs); - - if (test_thread_flag(TIF_SYSCALL_TRACE)) - ptrace_report_syscall_exit(regs, 0); - -#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS - if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) - trace_sys_exit(regs, regs_return_value(regs)); -#endif -} - #ifdef CONFIG_COMPAT static int compat_riscv_gpr_get(struct task_struct *target, const struct user_regset *regset, diff --git a/arch/riscv/kernel/sbi-ipi.c b/arch/riscv/kernel/sbi-ipi.c new file mode 100644 index 000000000000..a4559695ce62 --- /dev/null +++ b/arch/riscv/kernel/sbi-ipi.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Multiplex several IPIs over a single HW IPI. + * + * Copyright (c) 2022 Ventana Micro Systems Inc. + */ + +#define pr_fmt(fmt) "riscv: " fmt +#include <linux/cpu.h> +#include <linux/init.h> +#include <linux/irq.h> +#include <linux/irqchip/chained_irq.h> +#include <linux/irqdomain.h> +#include <asm/sbi.h> + +static int sbi_ipi_virq; + +static void sbi_ipi_handle(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + + chained_irq_enter(chip, desc); + + csr_clear(CSR_IP, IE_SIE); + ipi_mux_process(); + + chained_irq_exit(chip, desc); +} + +static int sbi_ipi_starting_cpu(unsigned int cpu) +{ + enable_percpu_irq(sbi_ipi_virq, irq_get_trigger_type(sbi_ipi_virq)); + return 0; +} + +void __init sbi_ipi_init(void) +{ + int virq; + struct irq_domain *domain; + + if (riscv_ipi_have_virq_range()) + return; + + domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(), + DOMAIN_BUS_ANY); + if (!domain) { + pr_err("unable to find INTC IRQ domain\n"); + return; + } + + sbi_ipi_virq = irq_create_mapping(domain, RV_IRQ_SOFT); + if (!sbi_ipi_virq) { + pr_err("unable to create INTC IRQ mapping\n"); + return; + } + + virq = ipi_mux_create(BITS_PER_BYTE, sbi_send_ipi); + if (virq <= 0) { + pr_err("unable to create muxed IPIs\n"); + irq_dispose_mapping(sbi_ipi_virq); + return; + } + + irq_set_chained_handler(sbi_ipi_virq, sbi_ipi_handle); + + /* + * Don't disable IPI when CPU goes offline because + * the masking/unmasking of virtual IPIs is done + * via generic IPI-Mux + */ + cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "irqchip/sbi-ipi:starting", + sbi_ipi_starting_cpu, NULL); + + riscv_ipi_set_virq_range(virq, BITS_PER_BYTE, false); + pr_info("providing IPIs using SBI IPI extension\n"); +} diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index 5c87db8fdff2..c672c8ba9a2a 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -17,7 +17,7 @@ unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT; EXPORT_SYMBOL(sbi_spec_version); static void (*__sbi_set_timer)(uint64_t stime) __ro_after_init; -static int (*__sbi_send_ipi)(const struct cpumask *cpu_mask) __ro_after_init; +static void (*__sbi_send_ipi)(unsigned int cpu) __ro_after_init; static int (*__sbi_rfence)(int fid, const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long arg4, unsigned long arg5) __ro_after_init; @@ -131,17 +131,6 @@ void sbi_shutdown(void) EXPORT_SYMBOL(sbi_shutdown); /** - * sbi_clear_ipi() - Clear any pending IPIs for the calling hart. - * - * Return: None - */ -void sbi_clear_ipi(void) -{ - sbi_ecall(SBI_EXT_0_1_CLEAR_IPI, 0, 0, 0, 0, 0, 0, 0); -} -EXPORT_SYMBOL(sbi_clear_ipi); - -/** * __sbi_set_timer_v01() - Program the timer for next timer event. * @stime_value: The value after which next timer event should fire. * @@ -157,17 +146,12 @@ static void __sbi_set_timer_v01(uint64_t stime_value) #endif } -static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask) +static void __sbi_send_ipi_v01(unsigned int cpu) { - unsigned long hart_mask; - - if (!cpu_mask || cpumask_empty(cpu_mask)) - cpu_mask = cpu_online_mask; - hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask); - + unsigned long hart_mask = + __sbi_v01_cpumask_to_hartmask(cpumask_of(cpu)); sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)(&hart_mask), 0, 0, 0, 0, 0); - return 0; } static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask, @@ -216,12 +200,10 @@ static void __sbi_set_timer_v01(uint64_t stime_value) sbi_major_version(), sbi_minor_version()); } -static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask) +static void __sbi_send_ipi_v01(unsigned int cpu) { pr_warn("IPI extension is not available in SBI v%lu.%lu\n", sbi_major_version(), sbi_minor_version()); - - return 0; } static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask, @@ -248,55 +230,18 @@ static void __sbi_set_timer_v02(uint64_t stime_value) #endif } -static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask) +static void __sbi_send_ipi_v02(unsigned int cpu) { - unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0; - struct sbiret ret = {0}; int result; + struct sbiret ret = {0}; - if (!cpu_mask || cpumask_empty(cpu_mask)) - cpu_mask = cpu_online_mask; - - for_each_cpu(cpuid, cpu_mask) { - hartid = cpuid_to_hartid_map(cpuid); - if (hmask) { - if (hartid + BITS_PER_LONG <= htop || - hbase + BITS_PER_LONG <= hartid) { - ret = sbi_ecall(SBI_EXT_IPI, - SBI_EXT_IPI_SEND_IPI, hmask, - hbase, 0, 0, 0, 0); - if (ret.error) - goto ecall_failed; - hmask = 0; - } else if (hartid < hbase) { - /* shift the mask to fit lower hartid */ - hmask <<= hbase - hartid; - hbase = hartid; - } - } - if (!hmask) { - hbase = hartid; - htop = hartid; - } else if (hartid > htop) { - htop = hartid; - } - hmask |= BIT(hartid - hbase); - } - - if (hmask) { - ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI, - hmask, hbase, 0, 0, 0, 0); - if (ret.error) - goto ecall_failed; + ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI, + 1UL, cpuid_to_hartid_map(cpu), 0, 0, 0, 0); + if (ret.error) { + result = sbi_err_map_linux_errno(ret.error); + pr_err("%s: hbase = [%lu] failed (error [%d])\n", + __func__, cpuid_to_hartid_map(cpu), result); } - - return 0; - -ecall_failed: - result = sbi_err_map_linux_errno(ret.error); - pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n", - __func__, hbase, hmask, result); - return result; } static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask, @@ -410,13 +355,11 @@ void sbi_set_timer(uint64_t stime_value) /** * sbi_send_ipi() - Send an IPI to any hart. - * @cpu_mask: A cpu mask containing all the target harts. - * - * Return: 0 on success, appropriate linux error code otherwise. + * @cpu: Logical id of the target CPU. */ -int sbi_send_ipi(const struct cpumask *cpu_mask) +void sbi_send_ipi(unsigned int cpu) { - return __sbi_send_ipi(cpu_mask); + __sbi_send_ipi(cpu); } EXPORT_SYMBOL(sbi_send_ipi); @@ -581,19 +524,18 @@ static void sbi_srst_power_off(void) * sbi_probe_extension() - Check if an SBI extension ID is supported or not. * @extid: The extension ID to be probed. * - * Return: Extension specific nonzero value f yes, -ENOTSUPP otherwise. + * Return: 1 or an extension specific nonzero value if yes, 0 otherwise. */ -int sbi_probe_extension(int extid) +long sbi_probe_extension(int extid) { struct sbiret ret; ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_PROBE_EXT, extid, 0, 0, 0, 0, 0); if (!ret.error) - if (ret.value) - return ret.value; + return ret.value; - return -ENOTSUPP; + return 0; } EXPORT_SYMBOL(sbi_probe_extension); @@ -641,15 +583,6 @@ long sbi_get_mimpid(void) } EXPORT_SYMBOL_GPL(sbi_get_mimpid); -static void sbi_send_cpumask_ipi(const struct cpumask *target) -{ - sbi_send_ipi(target); -} - -static const struct riscv_ipi_ops sbi_ipi_ops = { - .ipi_inject = sbi_send_cpumask_ipi -}; - void __init sbi_init(void) { int ret; @@ -665,26 +598,26 @@ void __init sbi_init(void) if (!sbi_spec_is_0_1()) { pr_info("SBI implementation ID=0x%lx Version=0x%lx\n", sbi_get_firmware_id(), sbi_get_firmware_version()); - if (sbi_probe_extension(SBI_EXT_TIME) > 0) { + if (sbi_probe_extension(SBI_EXT_TIME)) { __sbi_set_timer = __sbi_set_timer_v02; pr_info("SBI TIME extension detected\n"); } else { __sbi_set_timer = __sbi_set_timer_v01; } - if (sbi_probe_extension(SBI_EXT_IPI) > 0) { + if (sbi_probe_extension(SBI_EXT_IPI)) { __sbi_send_ipi = __sbi_send_ipi_v02; pr_info("SBI IPI extension detected\n"); } else { __sbi_send_ipi = __sbi_send_ipi_v01; } - if (sbi_probe_extension(SBI_EXT_RFENCE) > 0) { + if (sbi_probe_extension(SBI_EXT_RFENCE)) { __sbi_rfence = __sbi_rfence_v02; pr_info("SBI RFENCE extension detected\n"); } else { __sbi_rfence = __sbi_rfence_v01; } if ((sbi_spec_version >= sbi_mk_version(0, 3)) && - (sbi_probe_extension(SBI_EXT_SRST) > 0)) { + sbi_probe_extension(SBI_EXT_SRST)) { pr_info("SBI SRST extension detected\n"); pm_power_off = sbi_srst_power_off; sbi_srst_reboot_nb.notifier_call = sbi_srst_reboot; @@ -696,6 +629,4 @@ void __init sbi_init(void) __sbi_send_ipi = __sbi_send_ipi_v01; __sbi_rfence = __sbi_rfence_v01; } - - riscv_set_ipi_ops(&sbi_ipi_ops); } diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index dcfa4b6fa4b1..36b026057503 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -278,12 +278,8 @@ void __init setup_arch(char **cmdline_p) #if IS_ENABLED(CONFIG_BUILTIN_DTB) unflatten_and_copy_device_tree(); #else - if (early_init_dt_verify(__va(XIP_FIXUP(dtb_early_pa)))) - unflatten_device_tree(); - else - pr_err("No DTB found in kernel mappings\n"); + unflatten_device_tree(); #endif - early_init_fdt_scan_reserved_mem(); misc_mem_init(); init_resources(); @@ -297,7 +293,7 @@ void __init setup_arch(char **cmdline_p) setup_smp(); #endif - riscv_init_cbom_blocksize(); + riscv_init_cbo_blocksizes(); riscv_fill_hwcap(); apply_boot_alternatives(); if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) && diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index bfb2afa4135f..9aff9d720590 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -12,6 +12,7 @@ #include <linux/syscalls.h> #include <linux/resume_user_mode.h> #include <linux/linkage.h> +#include <linux/entry-common.h> #include <asm/ucontext.h> #include <asm/vdso.h> @@ -19,6 +20,7 @@ #include <asm/signal32.h> #include <asm/switch_to.h> #include <asm/csr.h> +#include <asm/cacheflush.h> extern u32 __user_rt_sigreturn[2]; @@ -181,6 +183,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, { struct rt_sigframe __user *frame; long err = 0; + unsigned long __maybe_unused addr; frame = get_sigframe(ksig, regs, sizeof(*frame)); if (!access_ok(frame, sizeof(*frame))) @@ -209,7 +212,12 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, if (copy_to_user(&frame->sigreturn_code, __user_rt_sigreturn, sizeof(frame->sigreturn_code))) return -EFAULT; - regs->ra = (unsigned long)&frame->sigreturn_code; + + addr = (unsigned long)&frame->sigreturn_code; + /* Make sure the two instructions are pushed to icache. */ + flush_icache_range(addr, addr + sizeof(frame->sigreturn_code)); + + regs->ra = addr; #endif /* CONFIG_MMU */ /* @@ -274,7 +282,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) signal_setup_done(ret, ksig, 0); } -static void do_signal(struct pt_regs *regs) +void arch_do_signal_or_restart(struct pt_regs *regs) { struct ksignal ksig; @@ -311,29 +319,3 @@ static void do_signal(struct pt_regs *regs) */ restore_saved_sigmask(); } - -/* - * Handle any pending work on the resume-to-userspace path, as indicated by - * _TIF_WORK_MASK. Entered from assembly with IRQs off. - */ -asmlinkage __visible void do_work_pending(struct pt_regs *regs, - unsigned long thread_info_flags) -{ - do { - if (thread_info_flags & _TIF_NEED_RESCHED) { - schedule(); - } else { - local_irq_enable(); - if (thread_info_flags & _TIF_UPROBE) - uprobe_notify_resume(regs); - /* Handle pending signal delivery */ - if (thread_info_flags & (_TIF_SIGPENDING | - _TIF_NOTIFY_SIGNAL)) - do_signal(regs); - if (thread_info_flags & _TIF_NOTIFY_RESUME) - resume_user_mode_work(regs); - } - local_irq_disable(); - thread_info_flags = read_thread_flags(); - } while (thread_info_flags & _TIF_WORK_MASK); -} diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 8c3b59f1f9b8..23e533766a49 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -13,14 +13,15 @@ #include <linux/interrupt.h> #include <linux/module.h> #include <linux/kexec.h> +#include <linux/percpu.h> #include <linux/profile.h> #include <linux/smp.h> #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/delay.h> +#include <linux/irq.h> #include <linux/irq_work.h> -#include <asm/sbi.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> #include <asm/cpu_ops.h> @@ -44,11 +45,10 @@ void __init smp_setup_processor_id(void) cpuid_to_hartid_map(0) = boot_cpu_hartid; } -/* A collection of single bit ipi messages. */ -static struct { - unsigned long stats[IPI_MAX] ____cacheline_aligned; - unsigned long bits ____cacheline_aligned; -} ipi_data[NR_CPUS] __cacheline_aligned; +static DEFINE_PER_CPU_READ_MOSTLY(int, ipi_dummy_dev); +static int ipi_virq_base __ro_after_init; +static int nr_ipi __ro_after_init = IPI_MAX; +static struct irq_desc *ipi_desc[IPI_MAX] __read_mostly; int riscv_hartid_to_cpuid(unsigned long hartid) { @@ -100,48 +100,14 @@ static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) } #endif -static const struct riscv_ipi_ops *ipi_ops __ro_after_init; - -void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops) -{ - ipi_ops = ops; -} -EXPORT_SYMBOL_GPL(riscv_set_ipi_ops); - -void riscv_clear_ipi(void) -{ - if (ipi_ops && ipi_ops->ipi_clear) - ipi_ops->ipi_clear(); - - csr_clear(CSR_IP, IE_SIE); -} -EXPORT_SYMBOL_GPL(riscv_clear_ipi); - static void send_ipi_mask(const struct cpumask *mask, enum ipi_message_type op) { - int cpu; - - smp_mb__before_atomic(); - for_each_cpu(cpu, mask) - set_bit(op, &ipi_data[cpu].bits); - smp_mb__after_atomic(); - - if (ipi_ops && ipi_ops->ipi_inject) - ipi_ops->ipi_inject(mask); - else - pr_warn("SMP: IPI inject method not available\n"); + __ipi_send_mask(ipi_desc[op], mask); } static void send_ipi_single(int cpu, enum ipi_message_type op) { - smp_mb__before_atomic(); - set_bit(op, &ipi_data[cpu].bits); - smp_mb__after_atomic(); - - if (ipi_ops && ipi_ops->ipi_inject) - ipi_ops->ipi_inject(cpumask_of(cpu)); - else - pr_warn("SMP: IPI inject method not available\n"); + __ipi_send_mask(ipi_desc[op], cpumask_of(cpu)); } #ifdef CONFIG_IRQ_WORK @@ -151,59 +117,98 @@ void arch_irq_work_raise(void) } #endif -void handle_IPI(struct pt_regs *regs) +static irqreturn_t handle_IPI(int irq, void *data) { - unsigned int cpu = smp_processor_id(); - unsigned long *pending_ipis = &ipi_data[cpu].bits; - unsigned long *stats = ipi_data[cpu].stats; + int ipi = irq - ipi_virq_base; + + switch (ipi) { + case IPI_RESCHEDULE: + scheduler_ipi(); + break; + case IPI_CALL_FUNC: + generic_smp_call_function_interrupt(); + break; + case IPI_CPU_STOP: + ipi_stop(); + break; + case IPI_CPU_CRASH_STOP: + ipi_cpu_crash_stop(smp_processor_id(), get_irq_regs()); + break; + case IPI_IRQ_WORK: + irq_work_run(); + break; +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST + case IPI_TIMER: + tick_receive_broadcast(); + break; +#endif + default: + pr_warn("CPU%d: unhandled IPI%d\n", smp_processor_id(), ipi); + break; + } - riscv_clear_ipi(); + return IRQ_HANDLED; +} - while (true) { - unsigned long ops; +void riscv_ipi_enable(void) +{ + int i; - /* Order bit clearing and data access. */ - mb(); + if (WARN_ON_ONCE(!ipi_virq_base)) + return; - ops = xchg(pending_ipis, 0); - if (ops == 0) - return; + for (i = 0; i < nr_ipi; i++) + enable_percpu_irq(ipi_virq_base + i, 0); +} - if (ops & (1 << IPI_RESCHEDULE)) { - stats[IPI_RESCHEDULE]++; - scheduler_ipi(); - } +void riscv_ipi_disable(void) +{ + int i; - if (ops & (1 << IPI_CALL_FUNC)) { - stats[IPI_CALL_FUNC]++; - generic_smp_call_function_interrupt(); - } + if (WARN_ON_ONCE(!ipi_virq_base)) + return; - if (ops & (1 << IPI_CPU_STOP)) { - stats[IPI_CPU_STOP]++; - ipi_stop(); - } + for (i = 0; i < nr_ipi; i++) + disable_percpu_irq(ipi_virq_base + i); +} - if (ops & (1 << IPI_CPU_CRASH_STOP)) { - ipi_cpu_crash_stop(cpu, get_irq_regs()); - } +bool riscv_ipi_have_virq_range(void) +{ + return (ipi_virq_base) ? true : false; +} - if (ops & (1 << IPI_IRQ_WORK)) { - stats[IPI_IRQ_WORK]++; - irq_work_run(); - } +DEFINE_STATIC_KEY_FALSE(riscv_ipi_for_rfence); +EXPORT_SYMBOL_GPL(riscv_ipi_for_rfence); -#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST - if (ops & (1 << IPI_TIMER)) { - stats[IPI_TIMER]++; - tick_receive_broadcast(); - } -#endif - BUG_ON((ops >> IPI_MAX) != 0); +void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence) +{ + int i, err; + + if (WARN_ON(ipi_virq_base)) + return; + + WARN_ON(nr < IPI_MAX); + nr_ipi = min(nr, IPI_MAX); + ipi_virq_base = virq; - /* Order data access and bit testing. */ - mb(); + /* Request IPIs */ + for (i = 0; i < nr_ipi; i++) { + err = request_percpu_irq(ipi_virq_base + i, handle_IPI, + "IPI", &ipi_dummy_dev); + WARN_ON(err); + + ipi_desc[i] = irq_to_desc(ipi_virq_base + i); + irq_set_status_flags(ipi_virq_base + i, IRQ_HIDDEN); } + + /* Enabled IPIs for boot CPU immediately */ + riscv_ipi_enable(); + + /* Update RFENCE static key */ + if (use_for_rfence) + static_branch_enable(&riscv_ipi_for_rfence); + else + static_branch_disable(&riscv_ipi_for_rfence); } static const char * const ipi_names[] = { @@ -223,7 +228,7 @@ void show_ipi_stats(struct seq_file *p, int prec) seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : ""); for_each_online_cpu(cpu) - seq_printf(p, "%10lu ", ipi_data[cpu].stats[i]); + seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu)); seq_printf(p, " %s\n", ipi_names[i]); } } @@ -328,8 +333,8 @@ bool smp_crash_stop_failed(void) } #endif -void smp_send_reschedule(int cpu) +void arch_smp_send_reschedule(int cpu) { send_ipi_single(cpu, IPI_RESCHEDULE); } -EXPORT_SYMBOL_GPL(smp_send_reschedule); +EXPORT_SYMBOL_GPL(arch_smp_send_reschedule); diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index ddb2afba6d25..445a4efee267 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -30,7 +30,6 @@ #include <asm/numa.h> #include <asm/tlbflush.h> #include <asm/sections.h> -#include <asm/sbi.h> #include <asm/smp.h> #include "head.h" @@ -158,16 +157,17 @@ asmlinkage __visible void smp_callin(void) struct mm_struct *mm = &init_mm; unsigned int curr_cpuid = smp_processor_id(); - riscv_clear_ipi(); - /* All kernel threads share the same mm context. */ mmgrab(mm); current->active_mm = mm; + riscv_ipi_enable(); + store_cpu_topology(curr_cpuid); notify_cpu_starting(curr_cpuid); numa_add_cpu(curr_cpuid); set_cpu_online(curr_cpuid, 1); + probe_vendor_features(curr_cpuid); /* * Remote TLB flushes are ignored while the CPU is offline, so emit diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index f9a5a7c90ff0..64a9c093aef9 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -101,7 +101,7 @@ void notrace walk_stackframe(struct task_struct *task, while (!kstack_end(ksp)) { if (__kernel_text_address(pc) && unlikely(!fn(arg, pc))) break; - pc = (*ksp++) - 0x4; + pc = READ_ONCE_NOCHECK(*ksp++) - 0x4; } } diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c index 9ba24fb8cc93..3c89b8ec69c4 100644 --- a/arch/riscv/kernel/suspend.c +++ b/arch/riscv/kernel/suspend.c @@ -8,7 +8,7 @@ #include <asm/csr.h> #include <asm/suspend.h> -static void suspend_save_csrs(struct suspend_context *context) +void suspend_save_csrs(struct suspend_context *context) { context->scratch = csr_read(CSR_SCRATCH); context->tvec = csr_read(CSR_TVEC); @@ -29,7 +29,7 @@ static void suspend_save_csrs(struct suspend_context *context) #endif } -static void suspend_restore_csrs(struct suspend_context *context) +void suspend_restore_csrs(struct suspend_context *context) { csr_write(CSR_SCRATCH, context->scratch); csr_write(CSR_TVEC, context->tvec); diff --git a/arch/riscv/kernel/suspend_entry.S b/arch/riscv/kernel/suspend_entry.S index aafcca58c19d..12b52afe09a4 100644 --- a/arch/riscv/kernel/suspend_entry.S +++ b/arch/riscv/kernel/suspend_entry.S @@ -7,6 +7,7 @@ #include <linux/linkage.h> #include <asm/asm.h> #include <asm/asm-offsets.h> +#include <asm/assembler.h> #include <asm/csr.h> #include <asm/xip_fixup.h> @@ -83,39 +84,10 @@ ENTRY(__cpu_resume_enter) add a0, a1, zero /* Restore CSRs */ - REG_L t0, (SUSPEND_CONTEXT_REGS + PT_EPC)(a0) - csrw CSR_EPC, t0 - REG_L t0, (SUSPEND_CONTEXT_REGS + PT_STATUS)(a0) - csrw CSR_STATUS, t0 - REG_L t0, (SUSPEND_CONTEXT_REGS + PT_BADADDR)(a0) - csrw CSR_TVAL, t0 - REG_L t0, (SUSPEND_CONTEXT_REGS + PT_CAUSE)(a0) - csrw CSR_CAUSE, t0 + suspend_restore_csrs /* Restore registers (except A0 and T0-T6) */ - REG_L ra, (SUSPEND_CONTEXT_REGS + PT_RA)(a0) - REG_L sp, (SUSPEND_CONTEXT_REGS + PT_SP)(a0) - REG_L gp, (SUSPEND_CONTEXT_REGS + PT_GP)(a0) - REG_L tp, (SUSPEND_CONTEXT_REGS + PT_TP)(a0) - REG_L s0, (SUSPEND_CONTEXT_REGS + PT_S0)(a0) - REG_L s1, (SUSPEND_CONTEXT_REGS + PT_S1)(a0) - REG_L a1, (SUSPEND_CONTEXT_REGS + PT_A1)(a0) - REG_L a2, (SUSPEND_CONTEXT_REGS + PT_A2)(a0) - REG_L a3, (SUSPEND_CONTEXT_REGS + PT_A3)(a0) - REG_L a4, (SUSPEND_CONTEXT_REGS + PT_A4)(a0) - REG_L a5, (SUSPEND_CONTEXT_REGS + PT_A5)(a0) - REG_L a6, (SUSPEND_CONTEXT_REGS + PT_A6)(a0) - REG_L a7, (SUSPEND_CONTEXT_REGS + PT_A7)(a0) - REG_L s2, (SUSPEND_CONTEXT_REGS + PT_S2)(a0) - REG_L s3, (SUSPEND_CONTEXT_REGS + PT_S3)(a0) - REG_L s4, (SUSPEND_CONTEXT_REGS + PT_S4)(a0) - REG_L s5, (SUSPEND_CONTEXT_REGS + PT_S5)(a0) - REG_L s6, (SUSPEND_CONTEXT_REGS + PT_S6)(a0) - REG_L s7, (SUSPEND_CONTEXT_REGS + PT_S7)(a0) - REG_L s8, (SUSPEND_CONTEXT_REGS + PT_S8)(a0) - REG_L s9, (SUSPEND_CONTEXT_REGS + PT_S9)(a0) - REG_L s10, (SUSPEND_CONTEXT_REGS + PT_S10)(a0) - REG_L s11, (SUSPEND_CONTEXT_REGS + PT_S11)(a0) + suspend_restore_regs /* Return zero value */ add a0, zero, zero diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 5d3f2fbeb33c..5db29683ebee 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -6,9 +6,15 @@ */ #include <linux/syscalls.h> -#include <asm/unistd.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> +#include <asm/hwprobe.h> +#include <asm/sbi.h> +#include <asm/switch_to.h> +#include <asm/uaccess.h> +#include <asm/unistd.h> #include <asm-generic/mman-common.h> +#include <vdso/vsyscall.h> static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -69,3 +75,225 @@ SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end, return 0; } + +/* + * The hwprobe interface, for allowing userspace to probe to see which features + * are supported by the hardware. See Documentation/riscv/hwprobe.rst for more + * details. + */ +static void hwprobe_arch_id(struct riscv_hwprobe *pair, + const struct cpumask *cpus) +{ + u64 id = -1ULL; + bool first = true; + int cpu; + + for_each_cpu(cpu, cpus) { + u64 cpu_id; + + switch (pair->key) { + case RISCV_HWPROBE_KEY_MVENDORID: + cpu_id = riscv_cached_mvendorid(cpu); + break; + case RISCV_HWPROBE_KEY_MIMPID: + cpu_id = riscv_cached_mimpid(cpu); + break; + case RISCV_HWPROBE_KEY_MARCHID: + cpu_id = riscv_cached_marchid(cpu); + break; + } + + if (first) { + id = cpu_id; + first = false; + } + + /* + * If there's a mismatch for the given set, return -1 in the + * value. + */ + if (id != cpu_id) { + id = -1ULL; + break; + } + } + + pair->value = id; +} + +static u64 hwprobe_misaligned(const struct cpumask *cpus) +{ + int cpu; + u64 perf = -1ULL; + + for_each_cpu(cpu, cpus) { + int this_perf = per_cpu(misaligned_access_speed, cpu); + + if (perf == -1ULL) + perf = this_perf; + + if (perf != this_perf) { + perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + break; + } + } + + if (perf == -1ULL) + return RISCV_HWPROBE_MISALIGNED_UNKNOWN; + + return perf; +} + +static void hwprobe_one_pair(struct riscv_hwprobe *pair, + const struct cpumask *cpus) +{ + switch (pair->key) { + case RISCV_HWPROBE_KEY_MVENDORID: + case RISCV_HWPROBE_KEY_MARCHID: + case RISCV_HWPROBE_KEY_MIMPID: + hwprobe_arch_id(pair, cpus); + break; + /* + * The kernel already assumes that the base single-letter ISA + * extensions are supported on all harts, and only supports the + * IMA base, so just cheat a bit here and tell that to + * userspace. + */ + case RISCV_HWPROBE_KEY_BASE_BEHAVIOR: + pair->value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA; + break; + + case RISCV_HWPROBE_KEY_IMA_EXT_0: + pair->value = 0; + if (has_fpu()) + pair->value |= RISCV_HWPROBE_IMA_FD; + + if (riscv_isa_extension_available(NULL, c)) + pair->value |= RISCV_HWPROBE_IMA_C; + + break; + + case RISCV_HWPROBE_KEY_CPUPERF_0: + pair->value = hwprobe_misaligned(cpus); + break; + + /* + * For forward compatibility, unknown keys don't fail the whole + * call, but get their element key set to -1 and value set to 0 + * indicating they're unrecognized. + */ + default: + pair->key = -1; + pair->value = 0; + break; + } +} + +static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, + size_t pair_count, size_t cpu_count, + unsigned long __user *cpus_user, + unsigned int flags) +{ + size_t out; + int ret; + cpumask_t cpus; + + /* Check the reserved flags. */ + if (flags != 0) + return -EINVAL; + + /* + * The interface supports taking in a CPU mask, and returns values that + * are consistent across that mask. Allow userspace to specify NULL and + * 0 as a shortcut to all online CPUs. + */ + cpumask_clear(&cpus); + if (!cpu_count && !cpus_user) { + cpumask_copy(&cpus, cpu_online_mask); + } else { + if (cpu_count > cpumask_size()) + cpu_count = cpumask_size(); + + ret = copy_from_user(&cpus, cpus_user, cpu_count); + if (ret) + return -EFAULT; + + /* + * Userspace must provide at least one online CPU, without that + * there's no way to define what is supported. + */ + cpumask_and(&cpus, &cpus, cpu_online_mask); + if (cpumask_empty(&cpus)) + return -EINVAL; + } + + for (out = 0; out < pair_count; out++, pairs++) { + struct riscv_hwprobe pair; + + if (get_user(pair.key, &pairs->key)) + return -EFAULT; + + pair.value = 0; + hwprobe_one_pair(&pair, &cpus); + ret = put_user(pair.key, &pairs->key); + if (ret == 0) + ret = put_user(pair.value, &pairs->value); + + if (ret) + return -EFAULT; + } + + return 0; +} + +#ifdef CONFIG_MMU + +static int __init init_hwprobe_vdso_data(void) +{ + struct vdso_data *vd = __arch_get_k_vdso_data(); + struct arch_vdso_data *avd = &vd->arch_data; + u64 id_bitsmash = 0; + struct riscv_hwprobe pair; + int key; + + /* + * Initialize vDSO data with the answers for the "all CPUs" case, to + * save a syscall in the common case. + */ + for (key = 0; key <= RISCV_HWPROBE_MAX_KEY; key++) { + pair.key = key; + hwprobe_one_pair(&pair, cpu_online_mask); + + WARN_ON_ONCE(pair.key < 0); + + avd->all_cpu_hwprobe_values[key] = pair.value; + /* + * Smash together the vendor, arch, and impl IDs to see if + * they're all 0 or any negative. + */ + if (key <= RISCV_HWPROBE_KEY_MIMPID) + id_bitsmash |= pair.value; + } + + /* + * If the arch, vendor, and implementation ID are all the same across + * all harts, then assume all CPUs are the same, and allow the vDSO to + * answer queries for arbitrary masks. However if all values are 0 (not + * populated) or any value returns -1 (varies across CPUs), then the + * vDSO should defer to the kernel for exotic cpu masks. + */ + avd->homogeneous_cpus = id_bitsmash != 0 && id_bitsmash != -1; + return 0; +} + +arch_initcall_sync(init_hwprobe_vdso_data); + +#endif /* CONFIG_MMU */ + +SYSCALL_DEFINE5(riscv_hwprobe, struct riscv_hwprobe __user *, pairs, + size_t, pair_count, size_t, cpu_count, unsigned long __user *, + cpus, unsigned int, flags) +{ + return do_riscv_hwprobe(pairs, pair_count, cpu_count, + cpus, flags); +} diff --git a/arch/riscv/kernel/trace_irq.c b/arch/riscv/kernel/trace_irq.c deleted file mode 100644 index 095ac976d7da..000000000000 --- a/arch/riscv/kernel/trace_irq.c +++ /dev/null @@ -1,27 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com> - */ - -#include <linux/irqflags.h> -#include <linux/kprobes.h> -#include "trace_irq.h" - -/* - * trace_hardirqs_on/off require the caller to setup frame pointer properly. - * Otherwise, CALLER_ADDR1 might trigger an pagging exception in kernel. - * Here we add one extra level so they can be safely called by low - * level entry code which $fp is used for other purpose. - */ - -void __trace_hardirqs_on(void) -{ - trace_hardirqs_on(); -} -NOKPROBE_SYMBOL(__trace_hardirqs_on); - -void __trace_hardirqs_off(void) -{ - trace_hardirqs_off(); -} -NOKPROBE_SYMBOL(__trace_hardirqs_off); diff --git a/arch/riscv/kernel/trace_irq.h b/arch/riscv/kernel/trace_irq.h deleted file mode 100644 index 99fe67377e5e..000000000000 --- a/arch/riscv/kernel/trace_irq.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com> - */ -#ifndef __TRACE_IRQ_H -#define __TRACE_IRQ_H - -void __trace_hardirqs_on(void); -void __trace_hardirqs_off(void); - -#endif /* __TRACE_IRQ_H */ diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index f6fda94e8e59..8c258b78c925 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -17,12 +17,14 @@ #include <linux/module.h> #include <linux/irq.h> #include <linux/kexec.h> +#include <linux/entry-common.h> #include <asm/asm-prototypes.h> #include <asm/bug.h> #include <asm/csr.h> #include <asm/processor.h> #include <asm/ptrace.h> +#include <asm/syscall.h> #include <asm/thread_info.h> int show_unhandled_signals = 1; @@ -119,14 +121,22 @@ static void do_trap_error(struct pt_regs *regs, int signo, int code, } #if defined(CONFIG_XIP_KERNEL) && defined(CONFIG_RISCV_ALTERNATIVE) -#define __trap_section __section(".xip.traps") +#define __trap_section __noinstr_section(".xip.traps") #else -#define __trap_section +#define __trap_section noinstr #endif -#define DO_ERROR_INFO(name, signo, code, str) \ -asmlinkage __visible __trap_section void name(struct pt_regs *regs) \ -{ \ - do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \ +#define DO_ERROR_INFO(name, signo, code, str) \ +asmlinkage __visible __trap_section void name(struct pt_regs *regs) \ +{ \ + if (user_mode(regs)) { \ + irqentry_enter_from_user_mode(regs); \ + do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \ + irqentry_exit_to_user_mode(regs); \ + } else { \ + irqentry_state_t state = irqentry_nmi_enter(regs); \ + do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \ + irqentry_nmi_exit(regs, state); \ + } \ } DO_ERROR_INFO(do_trap_unknown, @@ -148,26 +158,50 @@ DO_ERROR_INFO(do_trap_store_misaligned, int handle_misaligned_load(struct pt_regs *regs); int handle_misaligned_store(struct pt_regs *regs); -asmlinkage void __trap_section do_trap_load_misaligned(struct pt_regs *regs) +asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs) { - if (!handle_misaligned_load(regs)) - return; - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - load address misaligned"); + if (user_mode(regs)) { + irqentry_enter_from_user_mode(regs); + + if (handle_misaligned_load(regs)) + do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, + "Oops - load address misaligned"); + + irqentry_exit_to_user_mode(regs); + } else { + irqentry_state_t state = irqentry_nmi_enter(regs); + + if (handle_misaligned_load(regs)) + do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, + "Oops - load address misaligned"); + + irqentry_nmi_exit(regs, state); + } } -asmlinkage void __trap_section do_trap_store_misaligned(struct pt_regs *regs) +asmlinkage __visible __trap_section void do_trap_store_misaligned(struct pt_regs *regs) { - if (!handle_misaligned_store(regs)) - return; - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - store (or AMO) address misaligned"); + if (user_mode(regs)) { + irqentry_enter_from_user_mode(regs); + + if (handle_misaligned_store(regs)) + do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, + "Oops - store (or AMO) address misaligned"); + + irqentry_exit_to_user_mode(regs); + } else { + irqentry_state_t state = irqentry_nmi_enter(regs); + + if (handle_misaligned_store(regs)) + do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, + "Oops - store (or AMO) address misaligned"); + + irqentry_nmi_exit(regs, state); + } } #endif DO_ERROR_INFO(do_trap_store_fault, SIGSEGV, SEGV_ACCERR, "store (or AMO) access fault"); -DO_ERROR_INFO(do_trap_ecall_u, - SIGILL, ILL_ILLTRP, "environment call from U-mode"); DO_ERROR_INFO(do_trap_ecall_s, SIGILL, ILL_ILLTRP, "environment call from S-mode"); DO_ERROR_INFO(do_trap_ecall_m, @@ -183,7 +217,7 @@ static inline unsigned long get_break_insn_length(unsigned long pc) return GET_INSN_LENGTH(insn); } -asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) +void handle_break(struct pt_regs *regs) { #ifdef CONFIG_KPROBES if (kprobe_single_step_handler(regs)) @@ -213,7 +247,77 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) else die(regs, "Kernel BUG"); } -NOKPROBE_SYMBOL(do_trap_break); + +asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) +{ + if (user_mode(regs)) { + irqentry_enter_from_user_mode(regs); + + handle_break(regs); + + irqentry_exit_to_user_mode(regs); + } else { + irqentry_state_t state = irqentry_nmi_enter(regs); + + handle_break(regs); + + irqentry_nmi_exit(regs, state); + } +} + +asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs) +{ + if (user_mode(regs)) { + ulong syscall = regs->a7; + + regs->epc += 4; + regs->orig_a0 = regs->a0; + + syscall = syscall_enter_from_user_mode(regs, syscall); + + if (syscall < NR_syscalls) + syscall_handler(regs, syscall); + else + regs->a0 = -ENOSYS; + + syscall_exit_to_user_mode(regs); + } else { + irqentry_state_t state = irqentry_nmi_enter(regs); + + do_trap_error(regs, SIGILL, ILL_ILLTRP, regs->epc, + "Oops - environment call from U-mode"); + + irqentry_nmi_exit(regs, state); + } + +} + +#ifdef CONFIG_MMU +asmlinkage __visible noinstr void do_page_fault(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + + handle_page_fault(regs); + + local_irq_disable(); + + irqentry_exit(regs, state); +} +#endif + +asmlinkage __visible noinstr void do_irq(struct pt_regs *regs) +{ + struct pt_regs *old_regs; + irqentry_state_t state = irqentry_enter(regs); + + irq_enter_rcu(); + old_regs = set_irq_regs(regs); + handle_arch_irq(regs); + set_irq_regs(old_regs); + irq_exit_rcu(); + + irqentry_exit(regs, state); +} #ifdef CONFIG_GENERIC_BUG int is_valid_bugaddr(unsigned long pc) diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index cc2d1e8c8736..9a68e7eaae4d 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -14,13 +14,7 @@ #include <asm/page.h> #include <asm/vdso.h> #include <linux/time_namespace.h> - -#ifdef CONFIG_GENERIC_TIME_VSYSCALL #include <vdso/datapage.h> -#else -struct vdso_data { -}; -#endif enum vvar_pages { VVAR_DATA_PAGE_OFFSET, diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 06e6b27f3bcc..6b1dba11bf6d 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -1,9 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only # Copied from arch/tile/kernel/vdso/Makefile -# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before -# the inclusion of generic Makefile. -ARCH_REL_TYPE_ABS := R_RISCV_32|R_RISCV_64|R_RISCV_JUMP_SLOT +# Include the generic Makefile to check the built vdso. include $(srctree)/lib/vdso/Makefile # Symbols present in the vdso vdso-syms = rt_sigreturn @@ -12,6 +10,8 @@ vdso-syms += vgettimeofday endif vdso-syms += getcpu vdso-syms += flush_icache +vdso-syms += hwprobe +vdso-syms += sys_hwprobe # Files to link into the vdso obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o @@ -23,6 +23,8 @@ ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) endif +CFLAGS_hwprobe.o += -fPIC + # Build rules targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c new file mode 100644 index 000000000000..d40bec6ac078 --- /dev/null +++ b/arch/riscv/kernel/vdso/hwprobe.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2023 Rivos, Inc + */ + +#include <linux/types.h> +#include <vdso/datapage.h> +#include <vdso/helpers.h> + +extern int riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, + size_t cpu_count, unsigned long *cpus, + unsigned int flags); + +/* Add a prototype to avoid -Wmissing-prototypes warning. */ +int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, + size_t cpu_count, unsigned long *cpus, + unsigned int flags); + +int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, + size_t cpu_count, unsigned long *cpus, + unsigned int flags) +{ + const struct vdso_data *vd = __arch_get_vdso_data(); + const struct arch_vdso_data *avd = &vd->arch_data; + bool all_cpus = !cpu_count && !cpus; + struct riscv_hwprobe *p = pairs; + struct riscv_hwprobe *end = pairs + pair_count; + + /* + * Defer to the syscall for exotic requests. The vdso has answers + * stashed away only for the "all cpus" case. If all CPUs are + * homogeneous, then this function can handle requests for arbitrary + * masks. + */ + if ((flags != 0) || (!all_cpus && !avd->homogeneous_cpus)) + return riscv_hwprobe(pairs, pair_count, cpu_count, cpus, flags); + + /* This is something we can handle, fill out the pairs. */ + while (p < end) { + if (p->key <= RISCV_HWPROBE_MAX_KEY) { + p->value = avd->all_cpu_hwprobe_values[p->key]; + + } else { + p->key = -1; + p->value = 0; + } + + p++; + } + + return 0; +} diff --git a/arch/riscv/kernel/vdso/sys_hwprobe.S b/arch/riscv/kernel/vdso/sys_hwprobe.S new file mode 100644 index 000000000000..4e704146c77a --- /dev/null +++ b/arch/riscv/kernel/vdso/sys_hwprobe.S @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2023 Rivos, Inc */ + +#include <linux/linkage.h> +#include <asm/unistd.h> + +.text +ENTRY(riscv_hwprobe) + .cfi_startproc + li a7, __NR_riscv_hwprobe + ecall + ret + + .cfi_endproc +ENDPROC(riscv_hwprobe) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 4a0606633290..82ce64900f3d 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -82,6 +82,9 @@ VERSION #endif __vdso_getcpu; __vdso_flush_icache; +#ifndef COMPAT_VDSO + __vdso_riscv_hwprobe; +#endif local: *; }; } diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 53a8ad65b255..e5f9f4677bbf 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -27,9 +27,6 @@ ENTRY(_start) jiffies = jiffies_64; -PECOFF_SECTION_ALIGNMENT = 0x1000; -PECOFF_FILE_ALIGNMENT = 0x200; - SECTIONS { /* Beginning of code and text segment */ @@ -86,6 +83,11 @@ SECTIONS /* Start of init data section */ __init_data_begin = .; INIT_DATA_SECTION(16) + + .init.pi : { + *(.init.pi*) + } + .init.bss : { *(.init.bss) /* from the EFI stub */ } @@ -99,8 +101,10 @@ SECTIONS *(.rel.dyn*) } - .rela.dyn : { - *(.rela*) + .rela.dyn : ALIGN(8) { + __rela_dyn_start = .; + *(.rela .rela*) + __rela_dyn_end = .; } __init_data_end = .; @@ -129,9 +133,22 @@ SECTIONS *(.sdata*) } + .got : { *(.got*) } + +#ifdef CONFIG_RELOCATABLE + .data.rel : { *(.data.rel*) } + .plt : { *(.plt) } + .dynamic : { *(.dynamic) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .hash : { *(.hash) } + .gnu.hash : { *(.gnu.hash) } +#endif + #ifdef CONFIG_EFI .pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); } __pecoff_data_raw_size = ABSOLUTE(. - __pecoff_text_end); + __pecoff_data_raw_end = ABSOLUTE(.); #endif /* End of data section */ @@ -142,6 +159,7 @@ SECTIONS #ifdef CONFIG_EFI . = ALIGN(PECOFF_SECTION_ALIGNMENT); __pecoff_data_virt_size = ABSOLUTE(. - __pecoff_text_end); + __pecoff_data_virt_end = ABSOLUTE(.); #endif _end = .; diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index d5a658a047a7..28891e583259 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig @@ -20,15 +20,14 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)" depends on RISCV_SBI && MMU + select HAVE_KVM_EVENTFD + select HAVE_KVM_VCPU_ASYNC_IOCTL + select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_GENERIC_HARDWARE_ENABLING - select MMU_NOTIFIER - select PREEMPT_NOTIFIERS select KVM_MMIO - select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_XFER_TO_GUEST_WORK - select HAVE_KVM_VCPU_ASYNC_IOCTL - select HAVE_KVM_EVENTFD - select SRCU + select MMU_NOTIFIER + select PREEMPT_NOTIFIERS help Support hosting virtualized guest machines. diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index 278e97c06e0a..8031b8912a0d 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -26,3 +26,4 @@ kvm-y += vcpu_sbi_replace.o kvm-y += vcpu_sbi_hsm.o kvm-y += vcpu_timer.o kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o +kvm-y += aia.o diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c new file mode 100644 index 000000000000..4f1286fc7f17 --- /dev/null +++ b/arch/riscv/kvm/aia.c @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + * Copyright (C) 2022 Ventana Micro Systems Inc. + * + * Authors: + * Anup Patel <apatel@ventanamicro.com> + */ + +#include <linux/kernel.h> +#include <linux/kvm_host.h> +#include <asm/hwcap.h> + +DEFINE_STATIC_KEY_FALSE(kvm_riscv_aia_available); + +static void aia_set_hvictl(bool ext_irq_pending) +{ + unsigned long hvictl; + + /* + * HVICTL.IID == 9 and HVICTL.IPRIO == 0 represents + * no interrupt in HVICTL. + */ + + hvictl = (IRQ_S_EXT << HVICTL_IID_SHIFT) & HVICTL_IID; + hvictl |= ext_irq_pending; + csr_write(CSR_HVICTL, hvictl); +} + +#ifdef CONFIG_32BIT +void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; + unsigned long mask, val; + + if (!kvm_riscv_aia_available()) + return; + + if (READ_ONCE(vcpu->arch.irqs_pending_mask[1])) { + mask = xchg_acquire(&vcpu->arch.irqs_pending_mask[1], 0); + val = READ_ONCE(vcpu->arch.irqs_pending[1]) & mask; + + csr->hviph &= ~mask; + csr->hviph |= val; + } +} + +void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; + + if (kvm_riscv_aia_available()) + csr->vsieh = csr_read(CSR_VSIEH); +} +#endif + +bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) +{ + unsigned long seip; + + if (!kvm_riscv_aia_available()) + return false; + +#ifdef CONFIG_32BIT + if (READ_ONCE(vcpu->arch.irqs_pending[1]) & + (vcpu->arch.aia_context.guest_csr.vsieh & upper_32_bits(mask))) + return true; +#endif + + seip = vcpu->arch.guest_csr.vsie; + seip &= (unsigned long)mask; + seip &= BIT(IRQ_S_EXT); + + if (!kvm_riscv_aia_initialized(vcpu->kvm) || !seip) + return false; + + return false; +} + +void kvm_riscv_vcpu_aia_update_hvip(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + + if (!kvm_riscv_aia_available()) + return; + +#ifdef CONFIG_32BIT + csr_write(CSR_HVIPH, vcpu->arch.aia_context.guest_csr.hviph); +#endif + aia_set_hvictl(!!(csr->hvip & BIT(IRQ_VS_EXT))); +} + +void kvm_riscv_vcpu_aia_load(struct kvm_vcpu *vcpu, int cpu) +{ + struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; + + if (!kvm_riscv_aia_available()) + return; + + csr_write(CSR_VSISELECT, csr->vsiselect); + csr_write(CSR_HVIPRIO1, csr->hviprio1); + csr_write(CSR_HVIPRIO2, csr->hviprio2); +#ifdef CONFIG_32BIT + csr_write(CSR_VSIEH, csr->vsieh); + csr_write(CSR_HVIPH, csr->hviph); + csr_write(CSR_HVIPRIO1H, csr->hviprio1h); + csr_write(CSR_HVIPRIO2H, csr->hviprio2h); +#endif +} + +void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; + + if (!kvm_riscv_aia_available()) + return; + + csr->vsiselect = csr_read(CSR_VSISELECT); + csr->hviprio1 = csr_read(CSR_HVIPRIO1); + csr->hviprio2 = csr_read(CSR_HVIPRIO2); +#ifdef CONFIG_32BIT + csr->vsieh = csr_read(CSR_VSIEH); + csr->hviph = csr_read(CSR_HVIPH); + csr->hviprio1h = csr_read(CSR_HVIPRIO1H); + csr->hviprio2h = csr_read(CSR_HVIPRIO2H); +#endif +} + +int kvm_riscv_vcpu_aia_get_csr(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long *out_val) +{ + struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; + + if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long)) + return -EINVAL; + + *out_val = 0; + if (kvm_riscv_aia_available()) + *out_val = ((unsigned long *)csr)[reg_num]; + + return 0; +} + +int kvm_riscv_vcpu_aia_set_csr(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long val) +{ + struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; + + if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long)) + return -EINVAL; + + if (kvm_riscv_aia_available()) { + ((unsigned long *)csr)[reg_num] = val; + +#ifdef CONFIG_32BIT + if (reg_num == KVM_REG_RISCV_CSR_AIA_REG(siph)) + WRITE_ONCE(vcpu->arch.irqs_pending_mask[1], 0); +#endif + } + + return 0; +} + +int kvm_riscv_vcpu_aia_rmw_topei(struct kvm_vcpu *vcpu, + unsigned int csr_num, + unsigned long *val, + unsigned long new_val, + unsigned long wr_mask) +{ + /* If AIA not available then redirect trap */ + if (!kvm_riscv_aia_available()) + return KVM_INSN_ILLEGAL_TRAP; + + /* If AIA not initialized then forward to user space */ + if (!kvm_riscv_aia_initialized(vcpu->kvm)) + return KVM_INSN_EXIT_TO_USER_SPACE; + + return kvm_riscv_vcpu_aia_imsic_rmw(vcpu, KVM_RISCV_AIA_IMSIC_TOPEI, + val, new_val, wr_mask); +} + +/* + * External IRQ priority always read-only zero. This means default + * priority order is always preferred for external IRQs unless + * HVICTL.IID == 9 and HVICTL.IPRIO != 0 + */ +static int aia_irq2bitpos[] = { +0, 8, -1, -1, 16, 24, -1, -1, /* 0 - 7 */ +32, -1, -1, -1, -1, 40, 48, 56, /* 8 - 15 */ +64, 72, 80, 88, 96, 104, 112, 120, /* 16 - 23 */ +-1, -1, -1, -1, -1, -1, -1, -1, /* 24 - 31 */ +-1, -1, -1, -1, -1, -1, -1, -1, /* 32 - 39 */ +-1, -1, -1, -1, -1, -1, -1, -1, /* 40 - 47 */ +-1, -1, -1, -1, -1, -1, -1, -1, /* 48 - 55 */ +-1, -1, -1, -1, -1, -1, -1, -1, /* 56 - 63 */ +}; + +static u8 aia_get_iprio8(struct kvm_vcpu *vcpu, unsigned int irq) +{ + unsigned long hviprio; + int bitpos = aia_irq2bitpos[irq]; + + if (bitpos < 0) + return 0; + + switch (bitpos / BITS_PER_LONG) { + case 0: + hviprio = csr_read(CSR_HVIPRIO1); + break; + case 1: +#ifndef CONFIG_32BIT + hviprio = csr_read(CSR_HVIPRIO2); + break; +#else + hviprio = csr_read(CSR_HVIPRIO1H); + break; + case 2: + hviprio = csr_read(CSR_HVIPRIO2); + break; + case 3: + hviprio = csr_read(CSR_HVIPRIO2H); + break; +#endif + default: + return 0; + } + + return (hviprio >> (bitpos % BITS_PER_LONG)) & TOPI_IPRIO_MASK; +} + +static void aia_set_iprio8(struct kvm_vcpu *vcpu, unsigned int irq, u8 prio) +{ + unsigned long hviprio; + int bitpos = aia_irq2bitpos[irq]; + + if (bitpos < 0) + return; + + switch (bitpos / BITS_PER_LONG) { + case 0: + hviprio = csr_read(CSR_HVIPRIO1); + break; + case 1: +#ifndef CONFIG_32BIT + hviprio = csr_read(CSR_HVIPRIO2); + break; +#else + hviprio = csr_read(CSR_HVIPRIO1H); + break; + case 2: + hviprio = csr_read(CSR_HVIPRIO2); + break; + case 3: + hviprio = csr_read(CSR_HVIPRIO2H); + break; +#endif + default: + return; + } + + hviprio &= ~(TOPI_IPRIO_MASK << (bitpos % BITS_PER_LONG)); + hviprio |= (unsigned long)prio << (bitpos % BITS_PER_LONG); + + switch (bitpos / BITS_PER_LONG) { + case 0: + csr_write(CSR_HVIPRIO1, hviprio); + break; + case 1: +#ifndef CONFIG_32BIT + csr_write(CSR_HVIPRIO2, hviprio); + break; +#else + csr_write(CSR_HVIPRIO1H, hviprio); + break; + case 2: + csr_write(CSR_HVIPRIO2, hviprio); + break; + case 3: + csr_write(CSR_HVIPRIO2H, hviprio); + break; +#endif + default: + return; + } +} + +static int aia_rmw_iprio(struct kvm_vcpu *vcpu, unsigned int isel, + unsigned long *val, unsigned long new_val, + unsigned long wr_mask) +{ + int i, first_irq, nirqs; + unsigned long old_val; + u8 prio; + +#ifndef CONFIG_32BIT + if (isel & 0x1) + return KVM_INSN_ILLEGAL_TRAP; +#endif + + nirqs = 4 * (BITS_PER_LONG / 32); + first_irq = (isel - ISELECT_IPRIO0) * 4; + + old_val = 0; + for (i = 0; i < nirqs; i++) { + prio = aia_get_iprio8(vcpu, first_irq + i); + old_val |= (unsigned long)prio << (TOPI_IPRIO_BITS * i); + } + + if (val) + *val = old_val; + + if (wr_mask) { + new_val = (old_val & ~wr_mask) | (new_val & wr_mask); + for (i = 0; i < nirqs; i++) { + prio = (new_val >> (TOPI_IPRIO_BITS * i)) & + TOPI_IPRIO_MASK; + aia_set_iprio8(vcpu, first_irq + i, prio); + } + } + + return KVM_INSN_CONTINUE_NEXT_SEPC; +} + +#define IMSIC_FIRST 0x70 +#define IMSIC_LAST 0xff +int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num, + unsigned long *val, unsigned long new_val, + unsigned long wr_mask) +{ + unsigned int isel; + + /* If AIA not available then redirect trap */ + if (!kvm_riscv_aia_available()) + return KVM_INSN_ILLEGAL_TRAP; + + /* First try to emulate in kernel space */ + isel = csr_read(CSR_VSISELECT) & ISELECT_MASK; + if (isel >= ISELECT_IPRIO0 && isel <= ISELECT_IPRIO15) + return aia_rmw_iprio(vcpu, isel, val, new_val, wr_mask); + else if (isel >= IMSIC_FIRST && isel <= IMSIC_LAST && + kvm_riscv_aia_initialized(vcpu->kvm)) + return kvm_riscv_vcpu_aia_imsic_rmw(vcpu, isel, val, new_val, + wr_mask); + + /* We can't handle it here so redirect to user space */ + return KVM_INSN_EXIT_TO_USER_SPACE; +} + +void kvm_riscv_aia_enable(void) +{ + if (!kvm_riscv_aia_available()) + return; + + aia_set_hvictl(false); + csr_write(CSR_HVIPRIO1, 0x0); + csr_write(CSR_HVIPRIO2, 0x0); +#ifdef CONFIG_32BIT + csr_write(CSR_HVIPH, 0x0); + csr_write(CSR_HIDELEGH, 0x0); + csr_write(CSR_HVIPRIO1H, 0x0); + csr_write(CSR_HVIPRIO2H, 0x0); +#endif +} + +void kvm_riscv_aia_disable(void) +{ + if (!kvm_riscv_aia_available()) + return; + + aia_set_hvictl(false); +} + +int kvm_riscv_aia_init(void) +{ + if (!riscv_isa_extension_available(NULL, SxAIA)) + return -ENODEV; + + /* Enable KVM AIA support */ + static_branch_enable(&kvm_riscv_aia_available); + + return 0; +} + +void kvm_riscv_aia_exit(void) +{ +} diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 41ad7639a17b..a7112d583637 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -44,11 +44,15 @@ int kvm_arch_hardware_enable(void) csr_write(CSR_HVIP, 0); + kvm_riscv_aia_enable(); + return 0; } void kvm_arch_hardware_disable(void) { + kvm_riscv_aia_disable(); + /* * After clearing the hideleg CSR, the host kernel will receive * spurious interrupts if hvip CSR has pending interrupts and the @@ -63,6 +67,7 @@ void kvm_arch_hardware_disable(void) static int __init riscv_kvm_init(void) { + int rc; const char *str; if (!riscv_isa_extension_available(NULL, h)) { @@ -75,7 +80,7 @@ static int __init riscv_kvm_init(void) return -ENODEV; } - if (sbi_probe_extension(SBI_EXT_RFENCE) <= 0) { + if (!sbi_probe_extension(SBI_EXT_RFENCE)) { kvm_info("require SBI RFENCE extension\n"); return -ENODEV; } @@ -84,6 +89,10 @@ static int __init riscv_kvm_init(void) kvm_riscv_gstage_vmid_detect(); + rc = kvm_riscv_aia_init(); + if (rc && rc != -ENODEV) + return rc; + kvm_info("hypervisor extension available\n"); switch (kvm_riscv_gstage_mode()) { @@ -106,12 +115,23 @@ static int __init riscv_kvm_init(void) kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits()); - return kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); + if (kvm_riscv_aia_available()) + kvm_info("AIA available\n"); + + rc = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); + if (rc) { + kvm_riscv_aia_exit(); + return rc; + } + + return 0; } module_init(riscv_kvm_init); static void __exit riscv_kvm_exit(void) { + kvm_riscv_aia_exit(); + kvm_exit(); } module_exit(riscv_kvm_exit); diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 78211aed36fa..f2eb47925806 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -628,6 +628,13 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, !(memslot->flags & KVM_MEM_READONLY)) ? true : false; unsigned long vma_pagesize, mmu_seq; + /* We need minimum second+third level pages */ + ret = kvm_mmu_topup_memory_cache(pcache, gstage_pgd_levels); + if (ret) { + kvm_err("Failed to topup G-stage cache\n"); + return ret; + } + mmap_read_lock(current->mm); vma = vma_lookup(current->mm, hva); @@ -648,6 +655,15 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) gfn = (gpa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT; + /* + * Read mmu_invalidate_seq so that KVM can detect if the results of + * vma_lookup() or gfn_to_pfn_prot() become stale priort to acquiring + * kvm->mmu_lock. + * + * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs + * with the smp_wmb() in kvm_mmu_invalidate_end(). + */ + mmu_seq = kvm->mmu_invalidate_seq; mmap_read_unlock(current->mm); if (vma_pagesize != PUD_SIZE && @@ -657,15 +673,6 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, return -EFAULT; } - /* We need minimum second+third level pages */ - ret = kvm_mmu_topup_memory_cache(pcache, gstage_pgd_levels); - if (ret) { - kvm_err("Failed to topup G-stage cache\n"); - return ret; - } - - mmu_seq = kvm->mmu_invalidate_seq; - hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writable); if (hfn == KVM_PFN_ERR_HWPOISON) { send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva, @@ -748,8 +755,7 @@ void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu) unsigned long hgatp = gstage_mode; struct kvm_arch *k = &vcpu->kvm->arch; - hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & - HGATP_VMID_MASK; + hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID; hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN; csr_write(CSR_HGATP, hgatp); diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 7d010b0be54e..8bd9f2a8a0b9 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -58,11 +58,14 @@ static const unsigned long kvm_isa_ext_arr[] = { [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i, [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m, + KVM_ISA_EXT_ARR(SSAIA), KVM_ISA_EXT_ARR(SSTC), KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVPBMT), + KVM_ISA_EXT_ARR(ZBB), KVM_ISA_EXT_ARR(ZIHINTPAUSE), KVM_ISA_EXT_ARR(ZICBOM), + KVM_ISA_EXT_ARR(ZICBOZ), }; static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext) @@ -96,9 +99,11 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_C: case KVM_RISCV_ISA_EXT_I: case KVM_RISCV_ISA_EXT_M: + case KVM_RISCV_ISA_EXT_SSAIA: case KVM_RISCV_ISA_EXT_SSTC: case KVM_RISCV_ISA_EXT_SVINVAL: case KVM_RISCV_ISA_EXT_ZIHINTPAUSE: + case KVM_RISCV_ISA_EXT_ZBB: return false; default: break; @@ -135,8 +140,10 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) kvm_riscv_vcpu_timer_reset(vcpu); - WRITE_ONCE(vcpu->arch.irqs_pending, 0); - WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0); + kvm_riscv_vcpu_aia_reset(vcpu); + + bitmap_zero(vcpu->arch.irqs_pending, KVM_RISCV_VCPU_NR_IRQS); + bitmap_zero(vcpu->arch.irqs_pending_mask, KVM_RISCV_VCPU_NR_IRQS); kvm_riscv_vcpu_pmu_reset(vcpu); @@ -157,6 +164,7 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) { + int rc; struct kvm_cpu_context *cntx; struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr; unsigned long host_isa, i; @@ -199,6 +207,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) /* setup performance monitoring */ kvm_riscv_vcpu_pmu_init(vcpu); + /* Setup VCPU AIA */ + rc = kvm_riscv_vcpu_aia_init(vcpu); + if (rc) + return rc; + /* Reset VCPU */ kvm_riscv_reset_vcpu(vcpu); @@ -218,6 +231,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { + /* Cleanup VCPU AIA context */ + kvm_riscv_vcpu_aia_deinit(vcpu); + /* Cleanup VCPU timer */ kvm_riscv_vcpu_timer_deinit(vcpu); @@ -283,6 +299,11 @@ static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu, return -EINVAL; reg_val = riscv_cbom_block_size; break; + case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): + if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ)) + return -EINVAL; + reg_val = riscv_cboz_block_size; + break; case KVM_REG_RISCV_CONFIG_REG(mvendorid): reg_val = vcpu->arch.mvendorid; break; @@ -354,6 +375,8 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu, break; case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): return -EOPNOTSUPP; + case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): + return -EOPNOTSUPP; case KVM_REG_RISCV_CONFIG_REG(mvendorid): if (!vcpu->arch.ran_atleast_once) vcpu->arch.mvendorid = reg_val; @@ -447,27 +470,76 @@ static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu, return 0; } +static int kvm_riscv_vcpu_general_get_csr(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long *out_val) +{ + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + + if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long)) + return -EINVAL; + + if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) { + kvm_riscv_vcpu_flush_interrupts(vcpu); + *out_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK; + *out_val |= csr->hvip & ~IRQ_LOCAL_MASK; + } else + *out_val = ((unsigned long *)csr)[reg_num]; + + return 0; +} + +static inline int kvm_riscv_vcpu_general_set_csr(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long reg_val) +{ + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + + if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long)) + return -EINVAL; + + if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) { + reg_val &= VSIP_VALID_MASK; + reg_val <<= VSIP_TO_HVIP_SHIFT; + } + + ((unsigned long *)csr)[reg_num] = reg_val; + + if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) + WRITE_ONCE(vcpu->arch.irqs_pending_mask[0], 0); + + return 0; +} + static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { - struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + int rc; unsigned long __user *uaddr = (unsigned long __user *)(unsigned long)reg->addr; unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_RISCV_CSR); - unsigned long reg_val; + unsigned long reg_val, reg_subtype; if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) return -EINVAL; - if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long)) - return -EINVAL; - if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) { - kvm_riscv_vcpu_flush_interrupts(vcpu); - reg_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK; - } else - reg_val = ((unsigned long *)csr)[reg_num]; + reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; + reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; + switch (reg_subtype) { + case KVM_REG_RISCV_CSR_GENERAL: + rc = kvm_riscv_vcpu_general_get_csr(vcpu, reg_num, ®_val); + break; + case KVM_REG_RISCV_CSR_AIA: + rc = kvm_riscv_vcpu_aia_get_csr(vcpu, reg_num, ®_val); + break; + default: + rc = -EINVAL; + break; + } + if (rc) + return rc; if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) return -EFAULT; @@ -478,31 +550,35 @@ static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu, static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { - struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + int rc; unsigned long __user *uaddr = (unsigned long __user *)(unsigned long)reg->addr; unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_RISCV_CSR); - unsigned long reg_val; + unsigned long reg_val, reg_subtype; if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) return -EINVAL; - if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long)) - return -EINVAL; if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) return -EFAULT; - if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) { - reg_val &= VSIP_VALID_MASK; - reg_val <<= VSIP_TO_HVIP_SHIFT; + reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; + reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; + switch (reg_subtype) { + case KVM_REG_RISCV_CSR_GENERAL: + rc = kvm_riscv_vcpu_general_set_csr(vcpu, reg_num, reg_val); + break; + case KVM_REG_RISCV_CSR_AIA: + rc = kvm_riscv_vcpu_aia_set_csr(vcpu, reg_num, reg_val); + break; + default: + rc = -EINVAL; + break; } - - ((unsigned long *)csr)[reg_num] = reg_val; - - if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) - WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0); + if (rc) + return rc; return 0; } @@ -601,6 +677,8 @@ static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu, KVM_REG_RISCV_FP_D); case KVM_REG_RISCV_ISA_EXT: return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg); + case KVM_REG_RISCV_SBI_EXT: + return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg); default: break; } @@ -628,6 +706,8 @@ static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu, KVM_REG_RISCV_FP_D); case KVM_REG_RISCV_ISA_EXT: return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg); + case KVM_REG_RISCV_SBI_EXT: + return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg); default: break; } @@ -728,13 +808,16 @@ void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu) struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; unsigned long mask, val; - if (READ_ONCE(vcpu->arch.irqs_pending_mask)) { - mask = xchg_acquire(&vcpu->arch.irqs_pending_mask, 0); - val = READ_ONCE(vcpu->arch.irqs_pending) & mask; + if (READ_ONCE(vcpu->arch.irqs_pending_mask[0])) { + mask = xchg_acquire(&vcpu->arch.irqs_pending_mask[0], 0); + val = READ_ONCE(vcpu->arch.irqs_pending[0]) & mask; csr->hvip &= ~mask; csr->hvip |= val; } + + /* Flush AIA high interrupts */ + kvm_riscv_vcpu_aia_flush_interrupts(vcpu); } void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) @@ -751,29 +834,38 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) { if (hvip & (1UL << IRQ_VS_SOFT)) { if (!test_and_set_bit(IRQ_VS_SOFT, - &v->irqs_pending_mask)) - set_bit(IRQ_VS_SOFT, &v->irqs_pending); + v->irqs_pending_mask)) + set_bit(IRQ_VS_SOFT, v->irqs_pending); } else { if (!test_and_set_bit(IRQ_VS_SOFT, - &v->irqs_pending_mask)) - clear_bit(IRQ_VS_SOFT, &v->irqs_pending); + v->irqs_pending_mask)) + clear_bit(IRQ_VS_SOFT, v->irqs_pending); } } + /* Sync-up AIA high interrupts */ + kvm_riscv_vcpu_aia_sync_interrupts(vcpu); + /* Sync-up timer CSRs */ kvm_riscv_vcpu_timer_sync(vcpu); } int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) { - if (irq != IRQ_VS_SOFT && + /* + * We only allow VS-mode software, timer, and external + * interrupts when irq is one of the local interrupts + * defined by RISC-V privilege specification. + */ + if (irq < IRQ_LOCAL_MAX && + irq != IRQ_VS_SOFT && irq != IRQ_VS_TIMER && irq != IRQ_VS_EXT) return -EINVAL; - set_bit(irq, &vcpu->arch.irqs_pending); + set_bit(irq, vcpu->arch.irqs_pending); smp_mb__before_atomic(); - set_bit(irq, &vcpu->arch.irqs_pending_mask); + set_bit(irq, vcpu->arch.irqs_pending_mask); kvm_vcpu_kick(vcpu); @@ -782,24 +874,37 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) { - if (irq != IRQ_VS_SOFT && + /* + * We only allow VS-mode software, timer, and external + * interrupts when irq is one of the local interrupts + * defined by RISC-V privilege specification. + */ + if (irq < IRQ_LOCAL_MAX && + irq != IRQ_VS_SOFT && irq != IRQ_VS_TIMER && irq != IRQ_VS_EXT) return -EINVAL; - clear_bit(irq, &vcpu->arch.irqs_pending); + clear_bit(irq, vcpu->arch.irqs_pending); smp_mb__before_atomic(); - set_bit(irq, &vcpu->arch.irqs_pending_mask); + set_bit(irq, vcpu->arch.irqs_pending_mask); return 0; } -bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask) +bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) { - unsigned long ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK) - << VSIP_TO_HVIP_SHIFT) & mask; + unsigned long ie; - return (READ_ONCE(vcpu->arch.irqs_pending) & ie) ? true : false; + ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK) + << VSIP_TO_HVIP_SHIFT) & (unsigned long)mask; + ie |= vcpu->arch.guest_csr.vsie & ~IRQ_LOCAL_MASK & + (unsigned long)mask; + if (READ_ONCE(vcpu->arch.irqs_pending[0]) & ie) + return true; + + /* Check AIA high interrupts */ + return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask); } void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) @@ -865,6 +970,9 @@ static void kvm_riscv_vcpu_update_config(const unsigned long *isa) if (riscv_isa_extension_available(isa, ZICBOM)) henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE); + if (riscv_isa_extension_available(isa, ZICBOZ)) + henvcfg |= ENVCFG_CBZE; + csr_write(CSR_HENVCFG, henvcfg); #ifdef CONFIG_32BIT csr_write(CSR_HENVCFGH, henvcfg >> 32); @@ -895,6 +1003,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context, vcpu->arch.isa); + kvm_riscv_vcpu_aia_load(vcpu, cpu); + vcpu->cpu = cpu; } @@ -904,6 +1014,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->cpu = -1; + kvm_riscv_vcpu_aia_put(vcpu); + kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context, vcpu->arch.isa); kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context); @@ -971,6 +1083,7 @@ static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu) struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; csr_write(CSR_HVIP, csr->hvip); + kvm_riscv_vcpu_aia_update_hvip(vcpu); } /* @@ -1043,6 +1156,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_riscv_check_vcpu_requests(vcpu); + preempt_disable(); + + /* Update AIA HW state before entering guest */ + ret = kvm_riscv_vcpu_aia_update(vcpu); + if (ret <= 0) { + preempt_enable(); + continue; + } + local_irq_disable(); /* @@ -1071,6 +1193,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) xfer_to_guest_mode_work_pending()) { vcpu->mode = OUTSIDE_GUEST_MODE; local_irq_enable(); + preempt_enable(); kvm_vcpu_srcu_read_lock(vcpu); continue; } @@ -1104,8 +1227,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) /* Syncup interrupts state with HW */ kvm_riscv_vcpu_sync_interrupts(vcpu); - preempt_disable(); - /* * We must ensure that any pending interrupts are taken before * we exit guest timing so that timer ticks are accounted as diff --git a/arch/riscv/kvm/vcpu_insn.c b/arch/riscv/kvm/vcpu_insn.c index f689337b78ff..7a6abed41bc1 100644 --- a/arch/riscv/kvm/vcpu_insn.c +++ b/arch/riscv/kvm/vcpu_insn.c @@ -214,6 +214,7 @@ struct csr_func { }; static const struct csr_func csr_funcs[] = { + KVM_RISCV_VCPU_AIA_CSR_FUNCS KVM_RISCV_VCPU_HPMCOUNTER_CSR_FUNCS }; diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index 15fde15f9fb8..e52fde504433 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -30,17 +30,52 @@ static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu = { }; #endif -static const struct kvm_vcpu_sbi_extension *sbi_ext[] = { - &vcpu_sbi_ext_v01, - &vcpu_sbi_ext_base, - &vcpu_sbi_ext_time, - &vcpu_sbi_ext_ipi, - &vcpu_sbi_ext_rfence, - &vcpu_sbi_ext_srst, - &vcpu_sbi_ext_hsm, - &vcpu_sbi_ext_pmu, - &vcpu_sbi_ext_experimental, - &vcpu_sbi_ext_vendor, +struct kvm_riscv_sbi_extension_entry { + enum KVM_RISCV_SBI_EXT_ID dis_idx; + const struct kvm_vcpu_sbi_extension *ext_ptr; +}; + +static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = { + { + .dis_idx = KVM_RISCV_SBI_EXT_V01, + .ext_ptr = &vcpu_sbi_ext_v01, + }, + { + .dis_idx = KVM_RISCV_SBI_EXT_MAX, /* Can't be disabled */ + .ext_ptr = &vcpu_sbi_ext_base, + }, + { + .dis_idx = KVM_RISCV_SBI_EXT_TIME, + .ext_ptr = &vcpu_sbi_ext_time, + }, + { + .dis_idx = KVM_RISCV_SBI_EXT_IPI, + .ext_ptr = &vcpu_sbi_ext_ipi, + }, + { + .dis_idx = KVM_RISCV_SBI_EXT_RFENCE, + .ext_ptr = &vcpu_sbi_ext_rfence, + }, + { + .dis_idx = KVM_RISCV_SBI_EXT_SRST, + .ext_ptr = &vcpu_sbi_ext_srst, + }, + { + .dis_idx = KVM_RISCV_SBI_EXT_HSM, + .ext_ptr = &vcpu_sbi_ext_hsm, + }, + { + .dis_idx = KVM_RISCV_SBI_EXT_PMU, + .ext_ptr = &vcpu_sbi_ext_pmu, + }, + { + .dis_idx = KVM_RISCV_SBI_EXT_EXPERIMENTAL, + .ext_ptr = &vcpu_sbi_ext_experimental, + }, + { + .dis_idx = KVM_RISCV_SBI_EXT_VENDOR, + .ext_ptr = &vcpu_sbi_ext_vendor, + }, }; void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run) @@ -99,14 +134,192 @@ int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run) return 0; } -const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid) +static int riscv_vcpu_set_sbi_ext_single(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long reg_val) +{ + unsigned long i; + const struct kvm_riscv_sbi_extension_entry *sext = NULL; + struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; + + if (reg_num >= KVM_RISCV_SBI_EXT_MAX || + (reg_val != 1 && reg_val != 0)) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { + if (sbi_ext[i].dis_idx == reg_num) { + sext = &sbi_ext[i]; + break; + } + } + if (!sext) + return -ENOENT; + + scontext->extension_disabled[sext->dis_idx] = !reg_val; + + return 0; +} + +static int riscv_vcpu_get_sbi_ext_single(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long *reg_val) +{ + unsigned long i; + const struct kvm_riscv_sbi_extension_entry *sext = NULL; + struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; + + if (reg_num >= KVM_RISCV_SBI_EXT_MAX) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { + if (sbi_ext[i].dis_idx == reg_num) { + sext = &sbi_ext[i]; + break; + } + } + if (!sext) + return -ENOENT; + + *reg_val = !scontext->extension_disabled[sext->dis_idx]; + + return 0; +} + +static int riscv_vcpu_set_sbi_ext_multi(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long reg_val, bool enable) +{ + unsigned long i, ext_id; + + if (reg_num > KVM_REG_RISCV_SBI_MULTI_REG_LAST) + return -EINVAL; + + for_each_set_bit(i, ®_val, BITS_PER_LONG) { + ext_id = i + reg_num * BITS_PER_LONG; + if (ext_id >= KVM_RISCV_SBI_EXT_MAX) + break; + + riscv_vcpu_set_sbi_ext_single(vcpu, ext_id, enable); + } + + return 0; +} + +static int riscv_vcpu_get_sbi_ext_multi(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long *reg_val) +{ + unsigned long i, ext_id, ext_val; + + if (reg_num > KVM_REG_RISCV_SBI_MULTI_REG_LAST) + return -EINVAL; + + for (i = 0; i < BITS_PER_LONG; i++) { + ext_id = i + reg_num * BITS_PER_LONG; + if (ext_id >= KVM_RISCV_SBI_EXT_MAX) + break; + + ext_val = 0; + riscv_vcpu_get_sbi_ext_single(vcpu, ext_id, &ext_val); + if (ext_val) + *reg_val |= KVM_REG_RISCV_SBI_MULTI_MASK(ext_id); + } + + return 0; +} + +int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_SBI_EXT); + unsigned long reg_val, reg_subtype; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + + if (vcpu->arch.ran_atleast_once) + return -EBUSY; + + reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; + reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; + + if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + switch (reg_subtype) { + case KVM_REG_RISCV_SBI_SINGLE: + return riscv_vcpu_set_sbi_ext_single(vcpu, reg_num, reg_val); + case KVM_REG_RISCV_SBI_MULTI_EN: + return riscv_vcpu_set_sbi_ext_multi(vcpu, reg_num, reg_val, true); + case KVM_REG_RISCV_SBI_MULTI_DIS: + return riscv_vcpu_set_sbi_ext_multi(vcpu, reg_num, reg_val, false); + default: + return -EINVAL; + } + + return 0; +} + +int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + int rc; + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_SBI_EXT); + unsigned long reg_val, reg_subtype; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + + reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; + reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; + + reg_val = 0; + switch (reg_subtype) { + case KVM_REG_RISCV_SBI_SINGLE: + rc = riscv_vcpu_get_sbi_ext_single(vcpu, reg_num, ®_val); + break; + case KVM_REG_RISCV_SBI_MULTI_EN: + case KVM_REG_RISCV_SBI_MULTI_DIS: + rc = riscv_vcpu_get_sbi_ext_multi(vcpu, reg_num, ®_val); + if (!rc && reg_subtype == KVM_REG_RISCV_SBI_MULTI_DIS) + reg_val = ~reg_val; + break; + default: + rc = -EINVAL; + } + if (rc) + return rc; + + if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + +const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext( + struct kvm_vcpu *vcpu, unsigned long extid) { - int i = 0; + int i; + const struct kvm_riscv_sbi_extension_entry *sext; + struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { - if (sbi_ext[i]->extid_start <= extid && - sbi_ext[i]->extid_end >= extid) - return sbi_ext[i]; + sext = &sbi_ext[i]; + if (sext->ext_ptr->extid_start <= extid && + sext->ext_ptr->extid_end >= extid) { + if (sext->dis_idx < KVM_RISCV_SBI_EXT_MAX && + scontext->extension_disabled[sext->dis_idx]) + return NULL; + return sbi_ext[i].ext_ptr; + } } return NULL; @@ -126,7 +339,7 @@ int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run) }; bool ext_is_v01 = false; - sbi_ext = kvm_vcpu_sbi_find_ext(cp->a7); + sbi_ext = kvm_vcpu_sbi_find_ext(vcpu, cp->a7); if (sbi_ext && sbi_ext->handler) { #ifdef CONFIG_RISCV_SBI_V01 if (cp->a7 >= SBI_EXT_0_1_SET_TIMER && diff --git a/arch/riscv/kvm/vcpu_sbi_base.c b/arch/riscv/kvm/vcpu_sbi_base.c index 9945aff34c14..5bc570b984f4 100644 --- a/arch/riscv/kvm/vcpu_sbi_base.c +++ b/arch/riscv/kvm/vcpu_sbi_base.c @@ -44,7 +44,7 @@ static int kvm_sbi_ext_base_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, kvm_riscv_vcpu_sbi_forward(vcpu, run); retdata->uexit = true; } else { - sbi_ext = kvm_vcpu_sbi_find_ext(cp->a0); + sbi_ext = kvm_vcpu_sbi_find_ext(vcpu, cp->a0); *out_val = sbi_ext && sbi_ext->probe ? sbi_ext->probe(vcpu) : !!sbi_ext; } diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c index ad34519c8a13..3ac2ff6a65da 100644 --- a/arch/riscv/kvm/vcpu_timer.c +++ b/arch/riscv/kvm/vcpu_timer.c @@ -147,10 +147,8 @@ static void kvm_riscv_vcpu_timer_blocking(struct kvm_vcpu *vcpu) return; delta_ns = kvm_riscv_delta_cycles2ns(t->next_cycles, gt, t); - if (delta_ns) { - hrtimer_start(&t->hrt, ktime_set(0, delta_ns), HRTIMER_MODE_REL); - t->next_set = true; - } + hrtimer_start(&t->hrt, ktime_set(0, delta_ns), HRTIMER_MODE_REL); + t->next_set = true; } static void kvm_riscv_vcpu_timer_unblocking(struct kvm_vcpu *vcpu) diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index 65a964d7e70d..6ef15f78e80f 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -41,6 +41,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return r; } + kvm_riscv_aia_init_vm(kvm); + kvm_riscv_guest_timer_init(kvm); return 0; @@ -49,6 +51,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_destroy_vcpus(kvm); + + kvm_riscv_aia_destroy_vm(kvm); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -87,8 +91,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) return r; } -long kvm_arch_vm_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { return -EINVAL; } diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c index 5246da1c9167..ddc98714ce8e 100644 --- a/arch/riscv/kvm/vmid.c +++ b/arch/riscv/kvm/vmid.c @@ -26,9 +26,9 @@ void __init kvm_riscv_gstage_vmid_detect(void) /* Figure-out number of VMID bits in HW */ old = csr_read(CSR_HGATP); - csr_write(CSR_HGATP, old | HGATP_VMID_MASK); + csr_write(CSR_HGATP, old | HGATP_VMID); vmid_bits = csr_read(CSR_HGATP); - vmid_bits = (vmid_bits & HGATP_VMID_MASK) >> HGATP_VMID_SHIFT; + vmid_bits = (vmid_bits & HGATP_VMID) >> HGATP_VMID_SHIFT; vmid_bits = fls_long(vmid_bits); csr_write(CSR_HGATP, old); diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 6c74b0bedd60..26cb2502ecf8 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -8,5 +8,6 @@ lib-y += strlen.o lib-y += strncmp.o lib-$(CONFIG_MMU) += uaccess.o lib-$(CONFIG_64BIT) += tishift.o +lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/riscv/lib/clear_page.S b/arch/riscv/lib/clear_page.S new file mode 100644 index 000000000000..d7a256eb53f4 --- /dev/null +++ b/arch/riscv/lib/clear_page.S @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023 Ventana Micro Systems Inc. + */ + +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm/alternative-macros.h> +#include <asm-generic/export.h> +#include <asm/hwcap.h> +#include <asm/insn-def.h> +#include <asm/page.h> + +#define CBOZ_ALT(order, old, new) \ + ALTERNATIVE(old, new, 0, \ + ((order) << 16) | RISCV_ISA_EXT_ZICBOZ, \ + CONFIG_RISCV_ISA_ZICBOZ) + +/* void clear_page(void *page) */ +SYM_FUNC_START(clear_page) + li a2, PAGE_SIZE + + /* + * If Zicboz isn't present, or somehow has a block + * size larger than 4K, then fallback to memset. + */ + CBOZ_ALT(12, "j .Lno_zicboz", "nop") + + lw a1, riscv_cboz_block_size + add a2, a0, a2 +.Lzero_loop: + CBO_zero(a0) + add a0, a0, a1 + CBOZ_ALT(11, "bltu a0, a2, .Lzero_loop; ret", "nop; nop") + CBO_zero(a0) + add a0, a0, a1 + CBOZ_ALT(10, "bltu a0, a2, .Lzero_loop; ret", "nop; nop") + CBO_zero(a0) + add a0, a0, a1 + CBO_zero(a0) + add a0, a0, a1 + CBOZ_ALT(9, "bltu a0, a2, .Lzero_loop; ret", "nop; nop") + CBO_zero(a0) + add a0, a0, a1 + CBO_zero(a0) + add a0, a0, a1 + CBO_zero(a0) + add a0, a0, a1 + CBO_zero(a0) + add a0, a0, a1 + CBOZ_ALT(8, "bltu a0, a2, .Lzero_loop; ret", "nop; nop") + CBO_zero(a0) + add a0, a0, a1 + CBO_zero(a0) + add a0, a0, a1 + CBO_zero(a0) + add a0, a0, a1 + CBO_zero(a0) + add a0, a0, a1 + CBO_zero(a0) + add a0, a0, a1 + CBO_zero(a0) + add a0, a0, a1 + CBO_zero(a0) + add a0, a0, a1 + CBO_zero(a0) + add a0, a0, a1 + bltu a0, a2, .Lzero_loop + ret +.Lno_zicboz: + li a1, 0 + tail __memset +SYM_FUNC_END(clear_page) +EXPORT_SYMBOL(clear_page) diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S index 51ab716253fa..1a40d01a9543 100644 --- a/arch/riscv/lib/memcpy.S +++ b/arch/riscv/lib/memcpy.S @@ -106,3 +106,5 @@ WEAK(memcpy) 6: ret END(__memcpy) +SYM_FUNC_ALIAS(__pi_memcpy, __memcpy) +SYM_FUNC_ALIAS(__pi___memcpy, __memcpy) diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S index e0609e1f0864..838ff2022fe3 100644 --- a/arch/riscv/lib/memmove.S +++ b/arch/riscv/lib/memmove.S @@ -314,3 +314,5 @@ return_from_memmove: SYM_FUNC_END(memmove) SYM_FUNC_END(__memmove) +SYM_FUNC_ALIAS(__pi_memmove, __memmove) +SYM_FUNC_ALIAS(__pi___memmove, __memmove) diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S index c42a8412547f..687b2bea5c43 100644 --- a/arch/riscv/lib/strcmp.S +++ b/arch/riscv/lib/strcmp.S @@ -2,9 +2,8 @@ #include <linux/linkage.h> #include <asm/asm.h> -#include <asm-generic/export.h> #include <asm/alternative-macros.h> -#include <asm/errata_list.h> +#include <asm/hwcap.h> /* int strcmp(const char *cs, const char *ct) */ SYM_FUNC_START(strcmp) diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S index 15bb8f3aa959..8ae3064e45ff 100644 --- a/arch/riscv/lib/strlen.S +++ b/arch/riscv/lib/strlen.S @@ -2,9 +2,8 @@ #include <linux/linkage.h> #include <asm/asm.h> -#include <asm-generic/export.h> #include <asm/alternative-macros.h> -#include <asm/errata_list.h> +#include <asm/hwcap.h> /* int strlen(const char *s) */ SYM_FUNC_START(strlen) @@ -131,3 +130,4 @@ strlen_zbb: .option pop #endif SYM_FUNC_END(strlen) +SYM_FUNC_ALIAS(__pi_strlen, strlen) diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S index 7ac2f667285a..aba5b3148621 100644 --- a/arch/riscv/lib/strncmp.S +++ b/arch/riscv/lib/strncmp.S @@ -2,9 +2,8 @@ #include <linux/linkage.h> #include <asm/asm.h> -#include <asm-generic/export.h> #include <asm/alternative-macros.h> -#include <asm/errata_list.h> +#include <asm/hwcap.h> /* int strncmp(const char *cs, const char *ct, size_t count) */ SYM_FUNC_START(strncmp) diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 2ac177c05352..b85e9e82f082 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -1,6 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS_init.o := -mcmodel=medany +ifdef CONFIG_RELOCATABLE +CFLAGS_init.o += -fno-pie +endif + ifdef CONFIG_FTRACE CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE) diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index fcd6145fbead..fca532ddf3ec 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -19,7 +19,7 @@ void flush_icache_all(void) { local_flush_icache_all(); - if (IS_ENABLED(CONFIG_RISCV_SBI)) + if (IS_ENABLED(CONFIG_RISCV_SBI) && !riscv_use_ipi_for_rfence()) sbi_remote_fence_i(NULL); else on_each_cpu(ipi_remote_fence_i, NULL, 1); @@ -67,7 +67,8 @@ void flush_icache_mm(struct mm_struct *mm, bool local) * with flush_icache_deferred(). */ smp_mb(); - } else if (IS_ENABLED(CONFIG_RISCV_SBI)) { + } else if (IS_ENABLED(CONFIG_RISCV_SBI) && + !riscv_use_ipi_for_rfence()) { sbi_remote_fence_i(&others); } else { on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1); @@ -100,36 +101,48 @@ void flush_icache_pte(pte_t pte) unsigned int riscv_cbom_block_size; EXPORT_SYMBOL_GPL(riscv_cbom_block_size); -void riscv_init_cbom_blocksize(void) +unsigned int riscv_cboz_block_size; +EXPORT_SYMBOL_GPL(riscv_cboz_block_size); + +static void cbo_get_block_size(struct device_node *node, + const char *name, u32 *block_size, + unsigned long *first_hartid) { + unsigned long hartid; + u32 val; + + if (riscv_of_processor_hartid(node, &hartid)) + return; + + if (of_property_read_u32(node, name, &val)) + return; + + if (!*block_size) { + *block_size = val; + *first_hartid = hartid; + } else if (*block_size != val) { + pr_warn("%s mismatched between harts %lu and %lu\n", + name, *first_hartid, hartid); + } +} + +void riscv_init_cbo_blocksizes(void) +{ + unsigned long cbom_hartid, cboz_hartid; + u32 cbom_block_size = 0, cboz_block_size = 0; struct device_node *node; - unsigned long cbom_hartid; - u32 val, probed_block_size; - int ret; - probed_block_size = 0; for_each_of_cpu_node(node) { - unsigned long hartid; - - ret = riscv_of_processor_hartid(node, &hartid); - if (ret) - continue; - - /* set block-size for cbom extension if available */ - ret = of_property_read_u32(node, "riscv,cbom-block-size", &val); - if (ret) - continue; - - if (!probed_block_size) { - probed_block_size = val; - cbom_hartid = hartid; - } else { - if (probed_block_size != val) - pr_warn("cbom-block-size mismatched between harts %lu and %lu\n", - cbom_hartid, hartid); - } + /* set block-size for cbom and/or cboz extension if available */ + cbo_get_block_size(node, "riscv,cbom-block-size", + &cbom_block_size, &cbom_hartid); + cbo_get_block_size(node, "riscv,cboz-block-size", + &cboz_block_size, &cboz_hartid); } - if (probed_block_size) - riscv_cbom_block_size = probed_block_size; + if (cbom_block_size) + riscv_cbom_block_size = cbom_block_size; + + if (cboz_block_size) + riscv_cboz_block_size = cboz_block_size; } diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 80ce9caba8d2..12e22e7330e7 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -22,7 +22,7 @@ DEFINE_STATIC_KEY_FALSE(use_asid_allocator); static unsigned long asid_bits; static unsigned long num_asids; -static unsigned long asid_mask; +unsigned long asid_mask; static atomic_long_t current_version; @@ -196,16 +196,6 @@ switch_mm_fast: if (need_flush_tlb) local_flush_tlb_all(); -#ifdef CONFIG_SMP - else { - cpumask_t *mask = &mm->context.tlb_stale_mask; - - if (cpumask_test_cpu(cpu, mask)) { - cpumask_clear_cpu(cpu, mask); - local_flush_tlb_all_asid(cntx & asid_mask); - } - } -#endif } static void set_mm_noasid(struct mm_struct *mm) @@ -215,12 +205,24 @@ static void set_mm_noasid(struct mm_struct *mm) local_flush_tlb_all(); } -static inline void set_mm(struct mm_struct *mm, unsigned int cpu) +static inline void set_mm(struct mm_struct *prev, + struct mm_struct *next, unsigned int cpu) { - if (static_branch_unlikely(&use_asid_allocator)) - set_mm_asid(mm, cpu); - else - set_mm_noasid(mm); + /* + * The mm_cpumask indicates which harts' TLBs contain the virtual + * address mapping of the mm. Compared to noasid, using asid + * can't guarantee that stale TLB entries are invalidated because + * the asid mechanism wouldn't flush TLB for every switch_mm for + * performance. So when using asid, keep all CPUs footmarks in + * cpumask() until mm reset. + */ + cpumask_set_cpu(cpu, mm_cpumask(next)); + if (static_branch_unlikely(&use_asid_allocator)) { + set_mm_asid(next, cpu); + } else { + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + set_mm_noasid(next); + } } static int __init asids_init(void) @@ -274,7 +276,8 @@ static int __init asids_init(void) } early_initcall(asids_init); #else -static inline void set_mm(struct mm_struct *mm, unsigned int cpu) +static inline void set_mm(struct mm_struct *prev, + struct mm_struct *next, unsigned int cpu) { /* Nothing to do here when there is no MMU */ } @@ -327,10 +330,7 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ cpu = smp_processor_id(); - cpumask_clear_cpu(cpu, mm_cpumask(prev)); - cpumask_set_cpu(cpu, mm_cpumask(next)); - - set_mm(next, cpu); + set_mm(prev, next, cpu); flush_icache_deferred(next, cpu); } diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 460f785f6e09..8685f85a7474 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -15,6 +15,7 @@ #include <linux/uaccess.h> #include <linux/kprobes.h> #include <linux/kfence.h> +#include <linux/entry-common.h> #include <asm/ptrace.h> #include <asm/tlbflush.h> @@ -143,6 +144,8 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a no_context(regs, addr); return; } + if (pud_leaf(*pud_k)) + goto flush_tlb; /* * Since the vmalloc area is global, it is unnecessary @@ -153,6 +156,8 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a no_context(regs, addr); return; } + if (pmd_leaf(*pmd_k)) + goto flush_tlb; /* * Make sure the actual PTE exists as well to @@ -172,6 +177,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a * ordering constraint, not a cache flush; it is * necessary even after writing invalid entries. */ +flush_tlb: local_flush_tlb_page(addr); } @@ -204,7 +210,7 @@ static inline bool access_error(unsigned long cause, struct vm_area_struct *vma) * This routine handles page faults. It determines the address and the * problem, and then passes it off to one of the appropriate routines. */ -asmlinkage void do_page_fault(struct pt_regs *regs) +void handle_page_fault(struct pt_regs *regs) { struct task_struct *tsk; struct vm_area_struct *vma; @@ -251,7 +257,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs) } #endif /* Enable interrupts if they were enabled in the parent context. */ - if (likely(regs->status & SR_PIE)) + if (!regs_irqs_disabled(regs)) local_irq_enable(); /* @@ -356,4 +362,3 @@ good_area: } return; } -NOKPROBE_SYMBOL(do_page_fault); diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 932dadfdca54..a163a3e0f0d4 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -2,6 +2,305 @@ #include <linux/hugetlb.h> #include <linux/err.h> +#ifdef CONFIG_RISCV_ISA_SVNAPOT +pte_t *huge_pte_alloc(struct mm_struct *mm, + struct vm_area_struct *vma, + unsigned long addr, + unsigned long sz) +{ + unsigned long order; + pte_t *pte = NULL; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return NULL; + + pud = pud_alloc(mm, p4d, addr); + if (!pud) + return NULL; + + if (sz == PUD_SIZE) { + pte = (pte_t *)pud; + goto out; + } + + if (sz == PMD_SIZE) { + if (want_pmd_share(vma, addr) && pud_none(*pud)) + pte = huge_pmd_share(mm, vma, addr, pud); + else + pte = (pte_t *)pmd_alloc(mm, pud, addr); + goto out; + } + + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return NULL; + + for_each_napot_order(order) { + if (napot_cont_size(order) == sz) { + pte = pte_alloc_map(mm, pmd, addr & napot_cont_mask(order)); + break; + } + } + +out: + WARN_ON_ONCE(pte && pte_present(*pte) && !pte_huge(*pte)); + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, + unsigned long addr, + unsigned long sz) +{ + unsigned long order; + pte_t *pte = NULL; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, addr); + if (!pgd_present(*pgd)) + return NULL; + + p4d = p4d_offset(pgd, addr); + if (!p4d_present(*p4d)) + return NULL; + + pud = pud_offset(p4d, addr); + if (sz == PUD_SIZE) + /* must be pud huge, non-present or none */ + return (pte_t *)pud; + + if (!pud_present(*pud)) + return NULL; + + pmd = pmd_offset(pud, addr); + if (sz == PMD_SIZE) + /* must be pmd huge, non-present or none */ + return (pte_t *)pmd; + + if (!pmd_present(*pmd)) + return NULL; + + for_each_napot_order(order) { + if (napot_cont_size(order) == sz) { + pte = pte_offset_kernel(pmd, addr & napot_cont_mask(order)); + break; + } + } + return pte; +} + +static pte_t get_clear_contig(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + unsigned long pte_num) +{ + pte_t orig_pte = ptep_get(ptep); + unsigned long i; + + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) { + pte_t pte = ptep_get_and_clear(mm, addr, ptep); + + if (pte_dirty(pte)) + orig_pte = pte_mkdirty(orig_pte); + + if (pte_young(pte)) + orig_pte = pte_mkyoung(orig_pte); + } + + return orig_pte; +} + +static pte_t get_clear_contig_flush(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + unsigned long pte_num) +{ + pte_t orig_pte = get_clear_contig(mm, addr, ptep, pte_num); + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); + bool valid = !pte_none(orig_pte); + + if (valid) + flush_tlb_range(&vma, addr, addr + (PAGE_SIZE * pte_num)); + + return orig_pte; +} + +pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) +{ + unsigned long order; + + for_each_napot_order(order) { + if (shift == napot_cont_shift(order)) { + entry = pte_mknapot(entry, order); + break; + } + } + if (order == NAPOT_ORDER_MAX) + entry = pte_mkhuge(entry); + + return entry; +} + +void set_huge_pte_at(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + pte_t pte) +{ + int i, pte_num; + + if (!pte_napot(pte)) { + set_pte_at(mm, addr, ptep, pte); + return; + } + + pte_num = napot_pte_num(napot_cont_order(pte)); + for (i = 0; i < pte_num; i++, ptep++, addr += PAGE_SIZE) + set_pte_at(mm, addr, ptep, pte); +} + +int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, + pte_t *ptep, + pte_t pte, + int dirty) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long order; + pte_t orig_pte; + int i, pte_num; + + if (!pte_napot(pte)) + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); + + order = napot_cont_order(pte); + pte_num = napot_pte_num(order); + ptep = huge_pte_offset(mm, addr, napot_cont_size(order)); + orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num); + + if (pte_dirty(orig_pte)) + pte = pte_mkdirty(pte); + + if (pte_young(orig_pte)) + pte = pte_mkyoung(pte); + + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) + set_pte_at(mm, addr, ptep, pte); + + return true; +} + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep) +{ + pte_t orig_pte = ptep_get(ptep); + int pte_num; + + if (!pte_napot(orig_pte)) + return ptep_get_and_clear(mm, addr, ptep); + + pte_num = napot_pte_num(napot_cont_order(orig_pte)); + + return get_clear_contig(mm, addr, ptep, pte_num); +} + +void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep) +{ + pte_t pte = ptep_get(ptep); + unsigned long order; + int i, pte_num; + + if (!pte_napot(pte)) { + ptep_set_wrprotect(mm, addr, ptep); + return; + } + + order = napot_cont_order(pte); + pte_num = napot_pte_num(order); + ptep = huge_pte_offset(mm, addr, napot_cont_size(order)); + + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) + ptep_set_wrprotect(mm, addr, ptep); +} + +pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, + pte_t *ptep) +{ + pte_t pte = ptep_get(ptep); + int pte_num; + + if (!pte_napot(pte)) + return ptep_clear_flush(vma, addr, ptep); + + pte_num = napot_pte_num(napot_cont_order(pte)); + + return get_clear_contig_flush(vma->vm_mm, addr, ptep, pte_num); +} + +void huge_pte_clear(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + unsigned long sz) +{ + pte_t pte = READ_ONCE(*ptep); + int i, pte_num; + + if (!pte_napot(pte)) { + pte_clear(mm, addr, ptep); + return; + } + + pte_num = napot_pte_num(napot_cont_order(pte)); + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) + pte_clear(mm, addr, ptep); +} + +static __init bool is_napot_size(unsigned long size) +{ + unsigned long order; + + if (!has_svnapot()) + return false; + + for_each_napot_order(order) { + if (size == napot_cont_size(order)) + return true; + } + return false; +} + +static __init int napot_hugetlbpages_init(void) +{ + if (has_svnapot()) { + unsigned long order; + + for_each_napot_order(order) + hugetlb_add_hstate(order); + } + return 0; +} +arch_initcall(napot_hugetlbpages_init); + +#else + +static __init bool is_napot_size(unsigned long size) +{ + return false; +} + +#endif /*CONFIG_RISCV_ISA_SVNAPOT*/ + int pud_huge(pud_t pud) { return pud_leaf(pud); @@ -18,6 +317,8 @@ bool __init arch_hugetlb_valid_size(unsigned long size) return true; else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE) return true; + else if (is_napot_size(size)) + return true; else return false; } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 478d6763a01a..747e5b1ef02d 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -20,6 +20,9 @@ #include <linux/dma-map-ops.h> #include <linux/crash_dump.h> #include <linux/hugetlb.h> +#ifdef CONFIG_RELOCATABLE +#include <linux/elf.h> +#endif #include <asm/fixmap.h> #include <asm/tlbflush.h> @@ -57,7 +60,6 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] EXPORT_SYMBOL(empty_zero_page); extern char _start[]; -#define DTB_EARLY_BASE_VA PGDIR_SIZE void *_dtb_early_va __initdata; uintptr_t _dtb_early_pa __initdata; @@ -146,7 +148,7 @@ static void __init print_vm_layout(void) print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END); #endif - print_ml("kernel", (unsigned long)KERNEL_LINK_ADDR, + print_ml("kernel", (unsigned long)kernel_map.virt_addr, (unsigned long)ADDRESS_SPACE_END); } } @@ -213,6 +215,14 @@ static void __init setup_bootmem(void) phys_ram_end = memblock_end_of_DRAM(); if (!IS_ENABLED(CONFIG_XIP_KERNEL)) phys_ram_base = memblock_start_of_DRAM(); + + /* + * In 64-bit, any use of __va/__pa before this point is wrong as we + * did not know the start of DRAM before. + */ + if (IS_ENABLED(CONFIG_64BIT)) + kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base; + /* * memblock allocator is not aware of the fact that last 4K bytes of * the addressable memory can not be mapped because of IS_ERR_VALUE @@ -236,31 +246,22 @@ static void __init setup_bootmem(void) set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); reserve_initrd_mem(); + + /* + * No allocation should be done before reserving the memory as defined + * in the device tree, otherwise the allocation could end up in a + * reserved region. + */ + early_init_fdt_scan_reserved_mem(); + /* * If DTB is built in, no need to reserve its memblock. * Otherwise, do reserve it but avoid using * early_init_fdt_reserve_self() since __pa() does * not work for DTB pointers that are fixmap addresses */ - if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) { - /* - * In case the DTB is not located in a memory region we won't - * be able to locate it later on via the linear mapping and - * get a segfault when accessing it via __va(dtb_early_pa). - * To avoid this situation copy DTB to a memory region. - * Note that memblock_phys_alloc will also reserve DTB region. - */ - if (!memblock_is_memory(dtb_early_pa)) { - size_t fdt_size = fdt_totalsize(dtb_early_va); - phys_addr_t new_dtb_early_pa = memblock_phys_alloc(fdt_size, PAGE_SIZE); - void *new_dtb_early_va = early_memremap(new_dtb_early_pa, fdt_size); - - memcpy(new_dtb_early_va, dtb_early_va, fdt_size); - early_memunmap(new_dtb_early_va, fdt_size); - _dtb_early_pa = new_dtb_early_pa; - } else - memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); - } + if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) + memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); dma_contiguous_reserve(dma32_phys_limit); if (IS_ENABLED(CONFIG_64BIT)) @@ -271,21 +272,14 @@ static void __init setup_bootmem(void) #ifdef CONFIG_MMU struct pt_alloc_ops pt_ops __initdata; -unsigned long riscv_pfn_base __ro_after_init; -EXPORT_SYMBOL(riscv_pfn_base); - pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); -static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); -static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); -static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); #ifdef CONFIG_XIP_KERNEL #define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops)) -#define riscv_pfn_base (*(unsigned long *)XIP_FIXUP(&riscv_pfn_base)) #define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir)) #define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte)) #define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir)) @@ -626,9 +620,6 @@ static void __init create_p4d_mapping(p4d_t *p4dp, #define trampoline_pgd_next (pgtable_l5_enabled ? \ (uintptr_t)trampoline_p4d : (pgtable_l4_enabled ? \ (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)) -#define early_dtb_pgd_next (pgtable_l5_enabled ? \ - (uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ? \ - (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)) #else #define pgd_next_t pte_t #define alloc_pgd_next(__va) pt_ops.alloc_pte(__va) @@ -636,7 +627,6 @@ static void __init create_p4d_mapping(p4d_t *p4dp, #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ create_pte_mapping(__nextp, __va, __pa, __sz, __prot) #define fixmap_pgd_next ((uintptr_t)fixmap_pte) -#define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd) #define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) #define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) #define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) @@ -671,9 +661,16 @@ void __init create_pgd_mapping(pgd_t *pgdp, static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) { - /* Upgrade to PMD_SIZE mappings whenever possible */ - base &= PMD_SIZE - 1; - if (!base && size >= PMD_SIZE) + if (!(base & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE) + return PGDIR_SIZE; + + if (!(base & (P4D_SIZE - 1)) && size >= P4D_SIZE) + return P4D_SIZE; + + if (!(base & (PUD_SIZE - 1)) && size >= PUD_SIZE) + return PUD_SIZE; + + if (!(base & (PMD_SIZE - 1)) && size >= PMD_SIZE) return PMD_SIZE; return PAGE_SIZE; @@ -732,6 +729,8 @@ static __init pgprot_t pgprot_from_va(uintptr_t va) #endif /* CONFIG_STRICT_KERNEL_RWX */ #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) +u64 __pi_set_satp_mode_from_cmdline(uintptr_t dtb_pa); + static void __init disable_pgtable_l5(void) { pgtable_l5_enabled = false; @@ -746,17 +745,39 @@ static void __init disable_pgtable_l4(void) satp_mode = SATP_MODE_39; } +static int __init print_no4lvl(char *p) +{ + pr_info("Disabled 4-level and 5-level paging"); + return 0; +} +early_param("no4lvl", print_no4lvl); + +static int __init print_no5lvl(char *p) +{ + pr_info("Disabled 5-level paging"); + return 0; +} +early_param("no5lvl", print_no5lvl); + /* * There is a simple way to determine if 4-level is supported by the * underlying hardware: establish 1:1 mapping in 4-level page table mode * then read SATP to see if the configuration was taken into account * meaning sv48 is supported. */ -static __init void set_satp_mode(void) +static __init void set_satp_mode(uintptr_t dtb_pa) { u64 identity_satp, hw_satp; uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK; - bool check_l4 = false; + u64 satp_mode_cmdline = __pi_set_satp_mode_from_cmdline(dtb_pa); + + if (satp_mode_cmdline == SATP_MODE_57) { + disable_pgtable_l5(); + } else if (satp_mode_cmdline == SATP_MODE_48) { + disable_pgtable_l5(); + disable_pgtable_l4(); + return; + } create_p4d_mapping(early_p4d, set_satp_mode_pmd, (uintptr_t)early_pud, @@ -775,7 +796,8 @@ static __init void set_satp_mode(void) retry: create_pgd_mapping(early_pg_dir, set_satp_mode_pmd, - check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d, + pgtable_l5_enabled ? + (uintptr_t)early_p4d : (uintptr_t)early_pud, PGDIR_SIZE, PAGE_TABLE); identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode; @@ -786,9 +808,8 @@ retry: local_flush_tlb_all(); if (hw_satp != identity_satp) { - if (!check_l4) { + if (pgtable_l5_enabled) { disable_pgtable_l5(); - check_l4 = true; memset(early_pg_dir, 0, PAGE_SIZE); goto retry; } @@ -820,6 +841,44 @@ retry: #error "setup_vm() is called from head.S before relocate so it should not use absolute addressing." #endif +#ifdef CONFIG_RELOCATABLE +extern unsigned long __rela_dyn_start, __rela_dyn_end; + +static void __init relocate_kernel(void) +{ + Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start; + /* + * This holds the offset between the linked virtual address and the + * relocated virtual address. + */ + uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR; + /* + * This holds the offset between kernel linked virtual address and + * physical address. + */ + uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr; + + for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) { + Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset); + Elf64_Addr relocated_addr = rela->r_addend; + + if (rela->r_info != R_RISCV_RELATIVE) + continue; + + /* + * Make sure to not relocate vdso symbols like rt_sigreturn + * which are linked from the address 0 in vmlinux since + * vdso symbol addresses are actually used as an offset from + * mm->context.vdso in VDSO_OFFSET macro. + */ + if (relocated_addr >= KERNEL_LINK_ADDR) + relocated_addr += reloc_offset; + + *(Elf64_Addr *)addr = relocated_addr; + } +} +#endif /* CONFIG_RELOCATABLE */ + #ifdef CONFIG_XIP_KERNEL static void __init create_kernel_page_table(pgd_t *pgdir, __always_unused bool early) @@ -860,32 +919,27 @@ static void __init create_kernel_page_table(pgd_t *pgdir, bool early) * this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR * entry. */ -static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa) +static void __init create_fdt_early_page_table(uintptr_t fix_fdt_va, + uintptr_t dtb_pa) { -#ifndef CONFIG_BUILTIN_DTB uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1); - create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, - IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa, - PGDIR_SIZE, - IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL); - - if (pgtable_l5_enabled) - create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA, - (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE); - - if (pgtable_l4_enabled) - create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA, - (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE); +#ifndef CONFIG_BUILTIN_DTB + /* Make sure the fdt fixmap address is always aligned on PMD size */ + BUILD_BUG_ON(FIX_FDT % (PMD_SIZE / PAGE_SIZE)); - if (IS_ENABLED(CONFIG_64BIT)) { - create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA, + /* In 32-bit only, the fdt lies in its own PGD */ + if (!IS_ENABLED(CONFIG_64BIT)) { + create_pgd_mapping(early_pg_dir, fix_fdt_va, + pa, MAX_FDT_SIZE, PAGE_KERNEL); + } else { + create_pmd_mapping(fixmap_pmd, fix_fdt_va, pa, PMD_SIZE, PAGE_KERNEL); - create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE, + create_pmd_mapping(fixmap_pmd, fix_fdt_va + PMD_SIZE, pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL); } - dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1)); + dtb_early_va = (void *)fix_fdt_va + (dtb_pa & (PMD_SIZE - 1)); #else /* * For 64-bit kernel, __va can't be used since it would return a linear @@ -979,14 +1033,25 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) #endif #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) - set_satp_mode(); + set_satp_mode(dtb_pa); #endif - kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr; + /* + * In 64-bit, we defer the setup of va_pa_offset to setup_bootmem, + * where we have the system memory layout: this allows us to align + * the physical and virtual mappings and then make use of PUD/P4D/PGD + * for the linear mapping. This is only possible because the kernel + * mapping lies outside the linear mapping. + * In 32-bit however, as the kernel resides in the linear mapping, + * setup_vm_final can not change the mapping established here, + * otherwise the same kernel addresses would get mapped to different + * physical addresses (if the start of dram is different from the + * kernel physical address start). + */ + kernel_map.va_pa_offset = IS_ENABLED(CONFIG_64BIT) ? + 0UL : PAGE_OFFSET - kernel_map.phys_addr; kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr; - riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr); - /* * The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit * kernel, whereas for 64-bit kernel, the end of the virtual address @@ -1007,6 +1072,17 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K); #endif +#ifdef CONFIG_RELOCATABLE + /* + * Early page table uses only one PUD, which makes it possible + * to map PUD_SIZE aligned on PUD_SIZE: if the relocation offset + * makes the kernel cross over a PUD_SIZE boundary, raise a bug + * since a part of the kernel would not get mapped. + */ + BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size); + relocate_kernel(); +#endif + apply_early_boot_alternatives(); pt_ops_set_early(); @@ -1055,7 +1131,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) create_kernel_page_table(early_pg_dir, true); /* Setup early mapping for FDT early scan */ - create_fdt_early_page_table(early_pg_dir, dtb_pa); + create_fdt_early_page_table(__fix_to_virt(FIX_FDT), dtb_pa); /* * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap @@ -1090,16 +1166,36 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) pt_ops_set_fixmap(); } -static void __init setup_vm_final(void) +static void __init create_linear_mapping_range(phys_addr_t start, + phys_addr_t end) { + phys_addr_t pa; uintptr_t va, map_size; - phys_addr_t pa, start, end; + + for (pa = start; pa < end; pa += map_size) { + va = (uintptr_t)__va(pa); + map_size = best_map_size(pa, end - pa); + + create_pgd_mapping(swapper_pg_dir, va, pa, map_size, + pgprot_from_va(va)); + } +} + +static void __init create_linear_mapping_page_table(void) +{ + phys_addr_t start, end; u64 i; - /* Setup swapper PGD for fixmap */ - create_pgd_mapping(swapper_pg_dir, FIXADDR_START, - __pa_symbol(fixmap_pgd_next), - PGDIR_SIZE, PAGE_TABLE); +#ifdef CONFIG_STRICT_KERNEL_RWX + phys_addr_t ktext_start = __pa_symbol(_start); + phys_addr_t ktext_size = __init_data_begin - _start; + phys_addr_t krodata_start = __pa_symbol(__start_rodata); + phys_addr_t krodata_size = _data - __start_rodata; + + /* Isolate kernel text and rodata so they don't get mapped with a PUD */ + memblock_mark_nomap(ktext_start, ktext_size); + memblock_mark_nomap(krodata_start, krodata_size); +#endif /* Map all memory banks in the linear mapping */ for_each_mem_range(i, &start, &end) { @@ -1111,15 +1207,39 @@ static void __init setup_vm_final(void) if (end >= __pa(PAGE_OFFSET) + memory_limit) end = __pa(PAGE_OFFSET) + memory_limit; - for (pa = start; pa < end; pa += map_size) { - va = (uintptr_t)__va(pa); - map_size = best_map_size(pa, end - pa); - - create_pgd_mapping(swapper_pg_dir, va, pa, map_size, - pgprot_from_va(va)); - } + create_linear_mapping_range(start, end); } +#ifdef CONFIG_STRICT_KERNEL_RWX + create_linear_mapping_range(ktext_start, ktext_start + ktext_size); + create_linear_mapping_range(krodata_start, + krodata_start + krodata_size); + + memblock_clear_nomap(ktext_start, ktext_size); + memblock_clear_nomap(krodata_start, krodata_size); +#endif +} + +static void __init setup_vm_final(void) +{ + /* Setup swapper PGD for fixmap */ +#if !defined(CONFIG_64BIT) + /* + * In 32-bit, the device tree lies in a pgd entry, so it must be copied + * directly in swapper_pg_dir in addition to the pgd entry that points + * to fixmap_pte. + */ + unsigned long idx = pgd_index(__fix_to_virt(FIX_FDT)); + + set_pgd(&swapper_pg_dir[idx], early_pg_dir[idx]); +#endif + create_pgd_mapping(swapper_pg_dir, FIXADDR_START, + __pa_symbol(fixmap_pgd_next), + PGDIR_SIZE, PAGE_TABLE); + + /* Map the linear mapping */ + create_linear_mapping_page_table(); + /* Map the kernel */ if (IS_ENABLED(CONFIG_64BIT)) create_kernel_page_table(swapper_pg_dir, false); diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index e1226709490f..8fc0efcf905c 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -18,58 +18,48 @@ * For sv39, the region is aligned on PGDIR_SIZE so we only need to populate * the page global directory with kasan_early_shadow_pmd. * - * For sv48 and sv57, the region is not aligned on PGDIR_SIZE so the mapping - * must be divided as follows: - * - the first PGD entry, although incomplete, is populated with - * kasan_early_shadow_pud/p4d - * - the PGD entries in the middle are populated with kasan_early_shadow_pud/p4d - * - the last PGD entry is shared with the kernel mapping so populated at the - * lower levels pud/p4d - * - * In addition, when shallow populating a kasan region (for example vmalloc), - * this region may also not be aligned on PGDIR size, so we must go down to the - * pud level too. + * For sv48 and sv57, the region start is aligned on PGDIR_SIZE whereas the end + * region is not and then we have to go down to the PUD level. */ extern pgd_t early_pg_dir[PTRS_PER_PGD]; +pgd_t tmp_pg_dir[PTRS_PER_PGD] __page_aligned_bss; +p4d_t tmp_p4d[PTRS_PER_P4D] __page_aligned_bss; +pud_t tmp_pud[PTRS_PER_PUD] __page_aligned_bss; static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end) { phys_addr_t phys_addr; - pte_t *ptep, *base_pte; + pte_t *ptep, *p; - if (pmd_none(*pmd)) - base_pte = memblock_alloc(PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE); - else - base_pte = (pte_t *)pmd_page_vaddr(*pmd); + if (pmd_none(*pmd)) { + p = memblock_alloc(PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE); + set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(p)), PAGE_TABLE)); + } - ptep = base_pte + pte_index(vaddr); + ptep = pte_offset_kernel(pmd, vaddr); do { if (pte_none(*ptep)) { phys_addr = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); set_pte(ptep, pfn_pte(PFN_DOWN(phys_addr), PAGE_KERNEL)); + memset(__va(phys_addr), KASAN_SHADOW_INIT, PAGE_SIZE); } } while (ptep++, vaddr += PAGE_SIZE, vaddr != end); - - set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(base_pte)), PAGE_TABLE)); } static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned long end) { phys_addr_t phys_addr; - pmd_t *pmdp, *base_pmd; + pmd_t *pmdp, *p; unsigned long next; if (pud_none(*pud)) { - base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE); - } else { - base_pmd = (pmd_t *)pud_pgtable(*pud); - if (base_pmd == lm_alias(kasan_early_shadow_pmd)) - base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE); + p = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE); + set_pud(pud, pfn_pud(PFN_DOWN(__pa(p)), PAGE_TABLE)); } - pmdp = base_pmd + pmd_index(vaddr); + pmdp = pmd_offset(pud, vaddr); do { next = pmd_addr_end(vaddr, end); @@ -78,157 +68,77 @@ static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned phys_addr = memblock_phys_alloc(PMD_SIZE, PMD_SIZE); if (phys_addr) { set_pmd(pmdp, pfn_pmd(PFN_DOWN(phys_addr), PAGE_KERNEL)); + memset(__va(phys_addr), KASAN_SHADOW_INIT, PMD_SIZE); continue; } } kasan_populate_pte(pmdp, vaddr, next); } while (pmdp++, vaddr = next, vaddr != end); - - /* - * Wait for the whole PGD to be populated before setting the PGD in - * the page table, otherwise, if we did set the PGD before populating - * it entirely, memblock could allocate a page at a physical address - * where KASAN is not populated yet and then we'd get a page fault. - */ - set_pud(pud, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE)); } -static void __init kasan_populate_pud(pgd_t *pgd, - unsigned long vaddr, unsigned long end, - bool early) +static void __init kasan_populate_pud(p4d_t *p4d, + unsigned long vaddr, unsigned long end) { phys_addr_t phys_addr; - pud_t *pudp, *base_pud; + pud_t *pudp, *p; unsigned long next; - if (early) { - /* - * We can't use pgd_page_vaddr here as it would return a linear - * mapping address but it is not mapped yet, but when populating - * early_pg_dir, we need the physical address and when populating - * swapper_pg_dir, we need the kernel virtual address so use - * pt_ops facility. - */ - base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd))); - } else if (pgd_none(*pgd)) { - base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE); - memcpy(base_pud, (void *)kasan_early_shadow_pud, - sizeof(pud_t) * PTRS_PER_PUD); - } else { - base_pud = (pud_t *)pgd_page_vaddr(*pgd); - if (base_pud == lm_alias(kasan_early_shadow_pud)) { - base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE); - memcpy(base_pud, (void *)kasan_early_shadow_pud, - sizeof(pud_t) * PTRS_PER_PUD); - } + if (p4d_none(*p4d)) { + p = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE); + set_p4d(p4d, pfn_p4d(PFN_DOWN(__pa(p)), PAGE_TABLE)); } - pudp = base_pud + pud_index(vaddr); + pudp = pud_offset(p4d, vaddr); do { next = pud_addr_end(vaddr, end); if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) { - if (early) { - phys_addr = __pa(((uintptr_t)kasan_early_shadow_pmd)); - set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_TABLE)); + phys_addr = memblock_phys_alloc(PUD_SIZE, PUD_SIZE); + if (phys_addr) { + set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_KERNEL)); + memset(__va(phys_addr), KASAN_SHADOW_INIT, PUD_SIZE); continue; - } else { - phys_addr = memblock_phys_alloc(PUD_SIZE, PUD_SIZE); - if (phys_addr) { - set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_KERNEL)); - continue; - } } } kasan_populate_pmd(pudp, vaddr, next); } while (pudp++, vaddr = next, vaddr != end); - - /* - * Wait for the whole PGD to be populated before setting the PGD in - * the page table, otherwise, if we did set the PGD before populating - * it entirely, memblock could allocate a page at a physical address - * where KASAN is not populated yet and then we'd get a page fault. - */ - if (!early) - set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pud)), PAGE_TABLE)); } static void __init kasan_populate_p4d(pgd_t *pgd, - unsigned long vaddr, unsigned long end, - bool early) + unsigned long vaddr, unsigned long end) { phys_addr_t phys_addr; - p4d_t *p4dp, *base_p4d; + p4d_t *p4dp, *p; unsigned long next; - if (early) { - /* - * We can't use pgd_page_vaddr here as it would return a linear - * mapping address but it is not mapped yet, but when populating - * early_pg_dir, we need the physical address and when populating - * swapper_pg_dir, we need the kernel virtual address so use - * pt_ops facility. - */ - base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgd))); - } else { - base_p4d = (p4d_t *)pgd_page_vaddr(*pgd); - if (base_p4d == lm_alias(kasan_early_shadow_p4d)) { - base_p4d = memblock_alloc(PTRS_PER_PUD * sizeof(p4d_t), PAGE_SIZE); - memcpy(base_p4d, (void *)kasan_early_shadow_p4d, - sizeof(p4d_t) * PTRS_PER_P4D); - } + if (pgd_none(*pgd)) { + p = memblock_alloc(PTRS_PER_P4D * sizeof(p4d_t), PAGE_SIZE); + set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE)); } - p4dp = base_p4d + p4d_index(vaddr); + p4dp = p4d_offset(pgd, vaddr); do { next = p4d_addr_end(vaddr, end); if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) { - if (early) { - phys_addr = __pa(((uintptr_t)kasan_early_shadow_pud)); - set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_TABLE)); + phys_addr = memblock_phys_alloc(P4D_SIZE, P4D_SIZE); + if (phys_addr) { + set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_KERNEL)); + memset(__va(phys_addr), KASAN_SHADOW_INIT, P4D_SIZE); continue; - } else { - phys_addr = memblock_phys_alloc(P4D_SIZE, P4D_SIZE); - if (phys_addr) { - set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_KERNEL)); - continue; - } } } - kasan_populate_pud((pgd_t *)p4dp, vaddr, next, early); + kasan_populate_pud(p4dp, vaddr, next); } while (p4dp++, vaddr = next, vaddr != end); - - /* - * Wait for the whole P4D to be populated before setting the P4D in - * the page table, otherwise, if we did set the P4D before populating - * it entirely, memblock could allocate a page at a physical address - * where KASAN is not populated yet and then we'd get a page fault. - */ - if (!early) - set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_p4d)), PAGE_TABLE)); } -#define kasan_early_shadow_pgd_next (pgtable_l5_enabled ? \ - (uintptr_t)kasan_early_shadow_p4d : \ - (pgtable_l4_enabled ? \ - (uintptr_t)kasan_early_shadow_pud : \ - (uintptr_t)kasan_early_shadow_pmd)) -#define kasan_populate_pgd_next(pgdp, vaddr, next, early) \ - (pgtable_l5_enabled ? \ - kasan_populate_p4d(pgdp, vaddr, next, early) : \ - (pgtable_l4_enabled ? \ - kasan_populate_pud(pgdp, vaddr, next, early) : \ - kasan_populate_pmd((pud_t *)pgdp, vaddr, next))) - static void __init kasan_populate_pgd(pgd_t *pgdp, - unsigned long vaddr, unsigned long end, - bool early) + unsigned long vaddr, unsigned long end) { phys_addr_t phys_addr; unsigned long next; @@ -236,29 +146,174 @@ static void __init kasan_populate_pgd(pgd_t *pgdp, do { next = pgd_addr_end(vaddr, end); - if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) { - if (early) { - phys_addr = __pa((uintptr_t)kasan_early_shadow_pgd_next); - set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_TABLE)); + if (pgd_none(*pgdp) && IS_ALIGNED(vaddr, PGDIR_SIZE) && + (next - vaddr) >= PGDIR_SIZE) { + phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE); + if (phys_addr) { + set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL)); + memset(__va(phys_addr), KASAN_SHADOW_INIT, PGDIR_SIZE); continue; - } else if (pgd_page_vaddr(*pgdp) == - (unsigned long)lm_alias(kasan_early_shadow_pgd_next)) { - /* - * pgdp can't be none since kasan_early_init - * initialized all KASAN shadow region with - * kasan_early_shadow_pud: if this is still the - * case, that means we can try to allocate a - * hugepage as a replacement. - */ - phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE); - if (phys_addr) { - set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL)); - continue; - } } } - kasan_populate_pgd_next(pgdp, vaddr, next, early); + kasan_populate_p4d(pgdp, vaddr, next); + } while (pgdp++, vaddr = next, vaddr != end); +} + +static void __init kasan_early_clear_pud(p4d_t *p4dp, + unsigned long vaddr, unsigned long end) +{ + pud_t *pudp, *base_pud; + unsigned long next; + + if (!pgtable_l4_enabled) { + pudp = (pud_t *)p4dp; + } else { + base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(*p4dp))); + pudp = base_pud + pud_index(vaddr); + } + + do { + next = pud_addr_end(vaddr, end); + + if (IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) { + pud_clear(pudp); + continue; + } + + BUG(); + } while (pudp++, vaddr = next, vaddr != end); +} + +static void __init kasan_early_clear_p4d(pgd_t *pgdp, + unsigned long vaddr, unsigned long end) +{ + p4d_t *p4dp, *base_p4d; + unsigned long next; + + if (!pgtable_l5_enabled) { + p4dp = (p4d_t *)pgdp; + } else { + base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgdp))); + p4dp = base_p4d + p4d_index(vaddr); + } + + do { + next = p4d_addr_end(vaddr, end); + + if (pgtable_l4_enabled && IS_ALIGNED(vaddr, P4D_SIZE) && + (next - vaddr) >= P4D_SIZE) { + p4d_clear(p4dp); + continue; + } + + kasan_early_clear_pud(p4dp, vaddr, next); + } while (p4dp++, vaddr = next, vaddr != end); +} + +static void __init kasan_early_clear_pgd(pgd_t *pgdp, + unsigned long vaddr, unsigned long end) +{ + unsigned long next; + + do { + next = pgd_addr_end(vaddr, end); + + if (pgtable_l5_enabled && IS_ALIGNED(vaddr, PGDIR_SIZE) && + (next - vaddr) >= PGDIR_SIZE) { + pgd_clear(pgdp); + continue; + } + + kasan_early_clear_p4d(pgdp, vaddr, next); + } while (pgdp++, vaddr = next, vaddr != end); +} + +static void __init kasan_early_populate_pud(p4d_t *p4dp, + unsigned long vaddr, + unsigned long end) +{ + pud_t *pudp, *base_pud; + phys_addr_t phys_addr; + unsigned long next; + + if (!pgtable_l4_enabled) { + pudp = (pud_t *)p4dp; + } else { + base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(*p4dp))); + pudp = base_pud + pud_index(vaddr); + } + + do { + next = pud_addr_end(vaddr, end); + + if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) && + (next - vaddr) >= PUD_SIZE) { + phys_addr = __pa((uintptr_t)kasan_early_shadow_pmd); + set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_TABLE)); + continue; + } + + BUG(); + } while (pudp++, vaddr = next, vaddr != end); +} + +static void __init kasan_early_populate_p4d(pgd_t *pgdp, + unsigned long vaddr, + unsigned long end) +{ + p4d_t *p4dp, *base_p4d; + phys_addr_t phys_addr; + unsigned long next; + + /* + * We can't use pgd_page_vaddr here as it would return a linear + * mapping address but it is not mapped yet, but when populating + * early_pg_dir, we need the physical address and when populating + * swapper_pg_dir, we need the kernel virtual address so use + * pt_ops facility. + * Note that this test is then completely equivalent to + * p4dp = p4d_offset(pgdp, vaddr) + */ + if (!pgtable_l5_enabled) { + p4dp = (p4d_t *)pgdp; + } else { + base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgdp))); + p4dp = base_p4d + p4d_index(vaddr); + } + + do { + next = p4d_addr_end(vaddr, end); + + if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) && + (next - vaddr) >= P4D_SIZE) { + phys_addr = __pa((uintptr_t)kasan_early_shadow_pud); + set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_TABLE)); + continue; + } + + kasan_early_populate_pud(p4dp, vaddr, next); + } while (p4dp++, vaddr = next, vaddr != end); +} + +static void __init kasan_early_populate_pgd(pgd_t *pgdp, + unsigned long vaddr, + unsigned long end) +{ + phys_addr_t phys_addr; + unsigned long next; + + do { + next = pgd_addr_end(vaddr, end); + + if (pgd_none(*pgdp) && IS_ALIGNED(vaddr, PGDIR_SIZE) && + (next - vaddr) >= PGDIR_SIZE) { + phys_addr = __pa((uintptr_t)kasan_early_shadow_p4d); + set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_TABLE)); + continue; + } + + kasan_early_populate_p4d(pgdp, vaddr, next); } while (pgdp++, vaddr = next, vaddr != end); } @@ -295,16 +350,16 @@ asmlinkage void __init kasan_early_init(void) PAGE_TABLE)); } - kasan_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START), - KASAN_SHADOW_START, KASAN_SHADOW_END, true); + kasan_early_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START), + KASAN_SHADOW_START, KASAN_SHADOW_END); local_flush_tlb_all(); } void __init kasan_swapper_init(void) { - kasan_populate_pgd(pgd_offset_k(KASAN_SHADOW_START), - KASAN_SHADOW_START, KASAN_SHADOW_END, true); + kasan_early_populate_pgd(pgd_offset_k(KASAN_SHADOW_START), + KASAN_SHADOW_START, KASAN_SHADOW_END); local_flush_tlb_all(); } @@ -314,118 +369,65 @@ static void __init kasan_populate(void *start, void *end) unsigned long vaddr = (unsigned long)start & PAGE_MASK; unsigned long vend = PAGE_ALIGN((unsigned long)end); - kasan_populate_pgd(pgd_offset_k(vaddr), vaddr, vend, false); - - local_flush_tlb_all(); - memset(start, KASAN_SHADOW_INIT, end - start); + kasan_populate_pgd(pgd_offset_k(vaddr), vaddr, vend); } -static void __init kasan_shallow_populate_pmd(pgd_t *pgdp, +static void __init kasan_shallow_populate_pud(p4d_t *p4d, unsigned long vaddr, unsigned long end) { unsigned long next; - pmd_t *pmdp, *base_pmd; - bool is_kasan_pte; - - base_pmd = (pmd_t *)pgd_page_vaddr(*pgdp); - pmdp = base_pmd + pmd_index(vaddr); - - do { - next = pmd_addr_end(vaddr, end); - is_kasan_pte = (pmd_pgtable(*pmdp) == lm_alias(kasan_early_shadow_pte)); - - if (is_kasan_pte) - pmd_clear(pmdp); - } while (pmdp++, vaddr = next, vaddr != end); -} - -static void __init kasan_shallow_populate_pud(pgd_t *pgdp, - unsigned long vaddr, unsigned long end) -{ - unsigned long next; - pud_t *pudp, *base_pud; - pmd_t *base_pmd; - bool is_kasan_pmd; - - base_pud = (pud_t *)pgd_page_vaddr(*pgdp); - pudp = base_pud + pud_index(vaddr); + void *p; + pud_t *pud_k = pud_offset(p4d, vaddr); do { next = pud_addr_end(vaddr, end); - is_kasan_pmd = (pud_pgtable(*pudp) == lm_alias(kasan_early_shadow_pmd)); - - if (!is_kasan_pmd) - continue; - - base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE)); - if (IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) + if (pud_none(*pud_k)) { + p = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + set_pud(pud_k, pfn_pud(PFN_DOWN(__pa(p)), PAGE_TABLE)); continue; + } - memcpy(base_pmd, (void *)kasan_early_shadow_pmd, PAGE_SIZE); - kasan_shallow_populate_pmd((pgd_t *)pudp, vaddr, next); - } while (pudp++, vaddr = next, vaddr != end); + BUG(); + } while (pud_k++, vaddr = next, vaddr != end); } -static void __init kasan_shallow_populate_p4d(pgd_t *pgdp, +static void __init kasan_shallow_populate_p4d(pgd_t *pgd, unsigned long vaddr, unsigned long end) { unsigned long next; - p4d_t *p4dp, *base_p4d; - pud_t *base_pud; - bool is_kasan_pud; - - base_p4d = (p4d_t *)pgd_page_vaddr(*pgdp); - p4dp = base_p4d + p4d_index(vaddr); + void *p; + p4d_t *p4d_k = p4d_offset(pgd, vaddr); do { next = p4d_addr_end(vaddr, end); - is_kasan_pud = (p4d_pgtable(*p4dp) == lm_alias(kasan_early_shadow_pud)); - if (!is_kasan_pud) - continue; - - base_pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - set_p4d(p4dp, pfn_p4d(PFN_DOWN(__pa(base_pud)), PAGE_TABLE)); - - if (IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) + if (p4d_none(*p4d_k)) { + p = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + set_p4d(p4d_k, pfn_p4d(PFN_DOWN(__pa(p)), PAGE_TABLE)); continue; + } - memcpy(base_pud, (void *)kasan_early_shadow_pud, PAGE_SIZE); - kasan_shallow_populate_pud((pgd_t *)p4dp, vaddr, next); - } while (p4dp++, vaddr = next, vaddr != end); + kasan_shallow_populate_pud(p4d_k, vaddr, end); + } while (p4d_k++, vaddr = next, vaddr != end); } -#define kasan_shallow_populate_pgd_next(pgdp, vaddr, next) \ - (pgtable_l5_enabled ? \ - kasan_shallow_populate_p4d(pgdp, vaddr, next) : \ - (pgtable_l4_enabled ? \ - kasan_shallow_populate_pud(pgdp, vaddr, next) : \ - kasan_shallow_populate_pmd(pgdp, vaddr, next))) - static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end) { unsigned long next; void *p; pgd_t *pgd_k = pgd_offset_k(vaddr); - bool is_kasan_pgd_next; do { next = pgd_addr_end(vaddr, end); - is_kasan_pgd_next = (pgd_page_vaddr(*pgd_k) == - (unsigned long)lm_alias(kasan_early_shadow_pgd_next)); - if (is_kasan_pgd_next) { + if (pgd_none(*pgd_k)) { p = memblock_alloc(PAGE_SIZE, PAGE_SIZE); set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE)); - } - - if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) continue; + } - memcpy(p, (void *)kasan_early_shadow_pgd_next, PAGE_SIZE); - kasan_shallow_populate_pgd_next(pgd_k, vaddr, next); + kasan_shallow_populate_p4d(pgd_k, vaddr, next); } while (pgd_k++, vaddr = next, vaddr != end); } @@ -435,7 +437,37 @@ static void __init kasan_shallow_populate(void *start, void *end) unsigned long vend = PAGE_ALIGN((unsigned long)end); kasan_shallow_populate_pgd(vaddr, vend); - local_flush_tlb_all(); +} + +static void create_tmp_mapping(void) +{ + void *ptr; + p4d_t *base_p4d; + + /* + * We need to clean the early mapping: this is hard to achieve "in-place", + * so install a temporary mapping like arm64 and x86 do. + */ + memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(pgd_t) * PTRS_PER_PGD); + + /* Copy the last p4d since it is shared with the kernel mapping. */ + if (pgtable_l5_enabled) { + ptr = (p4d_t *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END)); + memcpy(tmp_p4d, ptr, sizeof(p4d_t) * PTRS_PER_P4D); + set_pgd(&tmp_pg_dir[pgd_index(KASAN_SHADOW_END)], + pfn_pgd(PFN_DOWN(__pa(tmp_p4d)), PAGE_TABLE)); + base_p4d = tmp_p4d; + } else { + base_p4d = (p4d_t *)tmp_pg_dir; + } + + /* Copy the last pud since it is shared with the kernel mapping. */ + if (pgtable_l4_enabled) { + ptr = (pud_t *)p4d_page_vaddr(*(base_p4d + p4d_index(KASAN_SHADOW_END))); + memcpy(tmp_pud, ptr, sizeof(pud_t) * PTRS_PER_PUD); + set_p4d(&base_p4d[p4d_index(KASAN_SHADOW_END)], + pfn_p4d(PFN_DOWN(__pa(tmp_pud)), PAGE_TABLE)); + } } void __init kasan_init(void) @@ -443,10 +475,27 @@ void __init kasan_init(void) phys_addr_t p_start, p_end; u64 i; - if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) + create_tmp_mapping(); + csr_write(CSR_SATP, PFN_DOWN(__pa(tmp_pg_dir)) | satp_mode); + + kasan_early_clear_pgd(pgd_offset_k(KASAN_SHADOW_START), + KASAN_SHADOW_START, KASAN_SHADOW_END); + + kasan_populate_early_shadow((void *)kasan_mem_to_shadow((void *)FIXADDR_START), + (void *)kasan_mem_to_shadow((void *)VMALLOC_START)); + + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { kasan_shallow_populate( (void *)kasan_mem_to_shadow((void *)VMALLOC_START), (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); + /* Shallow populate modules and BPF which are vmalloc-allocated */ + kasan_shallow_populate( + (void *)kasan_mem_to_shadow((void *)MODULES_VADDR), + (void *)kasan_mem_to_shadow((void *)MODULES_END)); + } else { + kasan_populate_early_shadow((void *)kasan_mem_to_shadow((void *)VMALLOC_START), + (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); + } /* Populate the linear mapping */ for_each_mem_range(i, &p_start, &p_end) { @@ -459,8 +508,8 @@ void __init kasan_init(void) kasan_populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end)); } - /* Populate kernel, BPF, modules mapping */ - kasan_populate(kasan_mem_to_shadow((const void *)MODULES_VADDR), + /* Populate kernel */ + kasan_populate(kasan_mem_to_shadow((const void *)MODULES_END), kasan_mem_to_shadow((const void *)MODULES_VADDR + SZ_2G)); for (i = 0; i < PTRS_PER_PTE; i++) @@ -471,4 +520,7 @@ void __init kasan_init(void) memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE); init_task.kasan_depth = 0; + + csr_write(CSR_SATP, PFN_DOWN(__pa(swapper_pg_dir)) | satp_mode); + local_flush_tlb_all(); } diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index 86c56616e5de..ea3d61de065b 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -217,18 +217,26 @@ bool kernel_page_present(struct page *page) pgd = pgd_offset_k(addr); if (!pgd_present(*pgd)) return false; + if (pgd_leaf(*pgd)) + return true; p4d = p4d_offset(pgd, addr); if (!p4d_present(*p4d)) return false; + if (p4d_leaf(*p4d)) + return true; pud = pud_offset(p4d, addr); if (!pud_present(*pud)) return false; + if (pud_leaf(*pud)) + return true; pmd = pmd_offset(pud, addr); if (!pmd_present(*pmd)) return false; + if (pmd_leaf(*pmd)) + return true; pte = pte_offset_kernel(pmd, addr); return pte_present(*pte); diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c index 9b18bda74154..18706f457da7 100644 --- a/arch/riscv/mm/physaddr.c +++ b/arch/riscv/mm/physaddr.c @@ -33,3 +33,19 @@ phys_addr_t __phys_addr_symbol(unsigned long x) return __va_to_pa_nodebug(x); } EXPORT_SYMBOL(__phys_addr_symbol); + +phys_addr_t linear_mapping_va_to_pa(unsigned long x) +{ + BUG_ON(!kernel_map.va_pa_offset); + + return ((unsigned long)(x) - kernel_map.va_pa_offset); +} +EXPORT_SYMBOL(linear_mapping_va_to_pa); + +void *linear_mapping_pa_to_va(unsigned long x) +{ + BUG_ON(!kernel_map.va_pa_offset); + + return ((void *)((unsigned long)(x) + kernel_map.va_pa_offset)); +} +EXPORT_SYMBOL(linear_mapping_pa_to_va); diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 830e7de65e3a..20a9f991a6d7 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -59,10 +59,6 @@ struct ptd_mm_info { }; enum address_markers_idx { -#ifdef CONFIG_KASAN - KASAN_SHADOW_START_NR, - KASAN_SHADOW_END_NR, -#endif FIXMAP_START_NR, FIXMAP_END_NR, PCI_IO_START_NR, @@ -74,6 +70,10 @@ enum address_markers_idx { VMALLOC_START_NR, VMALLOC_END_NR, PAGE_OFFSET_NR, +#ifdef CONFIG_KASAN + KASAN_SHADOW_START_NR, + KASAN_SHADOW_END_NR, +#endif #ifdef CONFIG_64BIT MODULES_MAPPING_NR, KERNEL_MAPPING_NR, @@ -82,10 +82,6 @@ enum address_markers_idx { }; static struct addr_marker address_markers[] = { -#ifdef CONFIG_KASAN - {0, "Kasan shadow start"}, - {0, "Kasan shadow end"}, -#endif {0, "Fixmap start"}, {0, "Fixmap end"}, {0, "PCI I/O start"}, @@ -97,6 +93,10 @@ static struct addr_marker address_markers[] = { {0, "vmalloc() area"}, {0, "vmalloc() end"}, {0, "Linear mapping"}, +#ifdef CONFIG_KASAN + {0, "Kasan shadow start"}, + {0, "Kasan shadow end"}, +#endif #ifdef CONFIG_64BIT {0, "Modules/BPF mapping"}, {0, "Kernel mapping"}, @@ -362,10 +362,6 @@ static int __init ptdump_init(void) { unsigned int i, j; -#ifdef CONFIG_KASAN - address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START; - address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END; -#endif address_markers[FIXMAP_START_NR].start_address = FIXADDR_START; address_markers[FIXMAP_END_NR].start_address = FIXADDR_TOP; address_markers[PCI_IO_START_NR].start_address = PCI_IO_START; @@ -377,6 +373,10 @@ static int __init ptdump_init(void) address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; address_markers[PAGE_OFFSET_NR].start_address = PAGE_OFFSET; +#ifdef CONFIG_KASAN + address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START; + address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END; +#endif #ifdef CONFIG_64BIT address_markers[MODULES_MAPPING_NR].start_address = MODULES_VADDR; address_markers[KERNEL_MAPPING_NR].start_address = kernel_map.virt_addr; diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index ce7dfc81bb3f..77be59aadc73 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -5,17 +5,80 @@ #include <linux/sched.h> #include <asm/sbi.h> #include <asm/mmu_context.h> -#include <asm/tlbflush.h> + +static inline void local_flush_tlb_all_asid(unsigned long asid) +{ + __asm__ __volatile__ ("sfence.vma x0, %0" + : + : "r" (asid) + : "memory"); +} + +static inline void local_flush_tlb_page_asid(unsigned long addr, + unsigned long asid) +{ + __asm__ __volatile__ ("sfence.vma %0, %1" + : + : "r" (addr), "r" (asid) + : "memory"); +} + +static inline void local_flush_tlb_range(unsigned long start, + unsigned long size, unsigned long stride) +{ + if (size <= stride) + local_flush_tlb_page(start); + else + local_flush_tlb_all(); +} + +static inline void local_flush_tlb_range_asid(unsigned long start, + unsigned long size, unsigned long stride, unsigned long asid) +{ + if (size <= stride) + local_flush_tlb_page_asid(start, asid); + else + local_flush_tlb_all_asid(asid); +} + +static void __ipi_flush_tlb_all(void *info) +{ + local_flush_tlb_all(); +} void flush_tlb_all(void) { - sbi_remote_sfence_vma(NULL, 0, -1); + if (riscv_use_ipi_for_rfence()) + on_each_cpu(__ipi_flush_tlb_all, NULL, 1); + else + sbi_remote_sfence_vma(NULL, 0, -1); +} + +struct flush_tlb_range_data { + unsigned long asid; + unsigned long start; + unsigned long size; + unsigned long stride; +}; + +static void __ipi_flush_tlb_range_asid(void *info) +{ + struct flush_tlb_range_data *d = info; + + local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid); +} + +static void __ipi_flush_tlb_range(void *info) +{ + struct flush_tlb_range_data *d = info; + + local_flush_tlb_range(d->start, d->size, d->stride); } -static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, - unsigned long size, unsigned long stride) +static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, + unsigned long size, unsigned long stride) { - struct cpumask *pmask = &mm->context.tlb_stale_mask; + struct flush_tlb_range_data ftd; struct cpumask *cmask = mm_cpumask(mm); unsigned int cpuid; bool broadcast; @@ -27,31 +90,37 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, /* check if the tlbflush needs to be sent to other CPUs */ broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; if (static_branch_unlikely(&use_asid_allocator)) { - unsigned long asid = atomic_long_read(&mm->context.id); - - /* - * TLB will be immediately flushed on harts concurrently - * executing this MM context. TLB flush on other harts - * is deferred until this MM context migrates there. - */ - cpumask_setall(pmask); - cpumask_clear_cpu(cpuid, pmask); - cpumask_andnot(pmask, pmask, cmask); + unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask; if (broadcast) { - sbi_remote_sfence_vma_asid(cmask, start, size, asid); - } else if (size <= stride) { - local_flush_tlb_page_asid(start, asid); + if (riscv_use_ipi_for_rfence()) { + ftd.asid = asid; + ftd.start = start; + ftd.size = size; + ftd.stride = stride; + on_each_cpu_mask(cmask, + __ipi_flush_tlb_range_asid, + &ftd, 1); + } else + sbi_remote_sfence_vma_asid(cmask, + start, size, asid); } else { - local_flush_tlb_all_asid(asid); + local_flush_tlb_range_asid(start, size, stride, asid); } } else { if (broadcast) { - sbi_remote_sfence_vma(cmask, start, size); - } else if (size <= stride) { - local_flush_tlb_page(start); + if (riscv_use_ipi_for_rfence()) { + ftd.asid = 0; + ftd.start = start; + ftd.size = size; + ftd.stride = stride; + on_each_cpu_mask(cmask, + __ipi_flush_tlb_range, + &ftd, 1); + } else + sbi_remote_sfence_vma(cmask, start, size); } else { - local_flush_tlb_all(); + local_flush_tlb_range(start, size, stride); } } @@ -60,23 +129,23 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, void flush_tlb_mm(struct mm_struct *mm) { - __sbi_tlb_flush_range(mm, 0, -1, PAGE_SIZE); + __flush_tlb_range(mm, 0, -1, PAGE_SIZE); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { - __sbi_tlb_flush_range(vma->vm_mm, addr, PAGE_SIZE, PAGE_SIZE); + __flush_tlb_range(vma->vm_mm, addr, PAGE_SIZE, PAGE_SIZE); } void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __sbi_tlb_flush_range(vma->vm_mm, start, end - start, PAGE_SIZE); + __flush_tlb_range(vma->vm_mm, start, end - start, PAGE_SIZE); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __sbi_tlb_flush_range(vma->vm_mm, start, end - start, PMD_SIZE); + __flush_tlb_range(vma->vm_mm, start, end - start, PMD_SIZE); } #endif diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index f5a668736c79..c648864c8cd1 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -10,6 +10,7 @@ #include <linux/filter.h> #include <linux/memory.h> #include <linux/stop_machine.h> +#include <asm/patch.h> #include "bpf_jit.h" #define RV_REG_TCC RV_REG_A6 @@ -1751,3 +1752,8 @@ void bpf_jit_build_epilogue(struct rv_jit_context *ctx) { __build_epilogue(false, ctx); } + +bool bpf_jit_supports_kfunc_call(void) +{ + return true; +} diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index d16bf715a586..5730797a6b40 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -84,12 +84,7 @@ CFLAGS_string.o += $(PURGATORY_CFLAGS) CFLAGS_REMOVE_ctype.o += $(PURGATORY_CFLAGS_REMOVE) CFLAGS_ctype.o += $(PURGATORY_CFLAGS) -AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2 -AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2 -AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2 -AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2 -AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2 -AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2 +asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x)) $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) diff --git a/arch/riscv/tools/relocs_check.sh b/arch/riscv/tools/relocs_check.sh new file mode 100755 index 000000000000..baeb2e7b2290 --- /dev/null +++ b/arch/riscv/tools/relocs_check.sh @@ -0,0 +1,26 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-or-later +# Based on powerpc relocs_check.sh + +# This script checks the relocations of a vmlinux for "suspicious" +# relocations. + +if [ $# -lt 3 ]; then + echo "$0 [path to objdump] [path to nm] [path to vmlinux]" 1>&2 + exit 1 +fi + +bad_relocs=$( +${srctree}/scripts/relocs_check.sh "$@" | + # These relocations are okay + # R_RISCV_RELATIVE + grep -F -w -v 'R_RISCV_RELATIVE' +) + +if [ -z "$bad_relocs" ]; then + exit 0 +fi + +num_bad=$(echo "$bad_relocs" | wc -l) +echo "WARNING: $num_bad bad relocations" +echo "$bad_relocs" |