summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/cmdline.c4
-rw-r--r--arch/x86/boot/compressed/kaslr.c240
-rw-r--r--arch/x86/boot/compressed/misc.c7
-rw-r--r--arch/x86/boot/compressed/misc.h4
-rw-r--r--arch/x86/boot/string.h5
-rw-r--r--arch/x86/entry/calling.h10
-rw-r--r--arch/x86/entry/common.c29
-rw-r--r--arch/x86/entry/thunk_32.S5
-rw-r--r--arch/x86/events/intel/core.c6
-rw-r--r--arch/x86/events/intel/lbr.c2
-rw-r--r--arch/x86/events/intel/uncore_snb.c52
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/efi.h10
-rw-r--r--arch/x86/include/asm/entry-common.h12
-rw-r--r--arch/x86/include/asm/fpu/internal.h7
-rw-r--r--arch/x86/include/asm/mmu.h1
-rw-r--r--arch/x86/include/asm/ptrace.h2
-rw-r--r--arch/x86/include/asm/special_insns.h6
-rw-r--r--arch/x86/include/asm/svm.h106
-rw-r--r--arch/x86/include/asm/sync_core.h34
-rw-r--r--arch/x86/kernel/alternative.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c4
-rw-r--r--arch/x86/kernel/apic/probe_32.c2
-rw-r--r--arch/x86/kernel/apic/vector.c16
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c2
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c2
-rw-r--r--arch/x86/kernel/hw_breakpoint.c2
-rw-r--r--arch/x86/kernel/kgdb.c4
-rw-r--r--arch/x86/kernel/mpparse.c4
-rw-r--r--arch/x86/kernel/process.c4
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/signal.c2
-rw-r--r--arch/x86/kernel/smpboot.c26
-rw-r--r--arch/x86/kernel/traps.c65
-rw-r--r--arch/x86/kernel/umip.c40
-rw-r--r--arch/x86/kernel/uprobes.c4
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/emulate.c2
-rw-r--r--arch/x86/kvm/hyperv.c2
-rw-r--r--arch/x86/kvm/irq_comm.c2
-rw-r--r--arch/x86/kvm/lapic.c6
-rw-r--r--arch/x86/kvm/mmu/mmu.c2
-rw-r--r--arch/x86/kvm/svm/nested.c47
-rw-r--r--arch/x86/kvm/svm/svm.c4
-rw-r--r--arch/x86/kvm/vmx/vmx.c12
-rw-r--r--arch/x86/kvm/x86.c11
-rw-r--r--arch/x86/lib/Makefile2
-rw-r--r--arch/x86/lib/cmdline.c8
-rw-r--r--arch/x86/lib/insn-eval.c6
-rw-r--r--arch/x86/math-emu/errors.c2
-rw-r--r--arch/x86/math-emu/fpu_trig.c2
-rw-r--r--arch/x86/mm/fault.c78
-rw-r--r--arch/x86/mm/ioremap.c2
-rw-r--r--arch/x86/mm/numa_emulation.c2
-rw-r--r--arch/x86/mm/tlb.c13
-rw-r--r--arch/x86/pci/xen.c1
-rw-r--r--arch/x86/platform/efi/efi.c69
-rw-r--r--arch/x86/platform/efi/efi_32.c44
-rw-r--r--arch/x86/platform/efi/efi_64.c2
62 files changed, 623 insertions, 428 deletions
diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c
index 4ff01176c1cc..21d56ae83cdf 100644
--- a/arch/x86/boot/cmdline.c
+++ b/arch/x86/boot/cmdline.c
@@ -54,7 +54,7 @@ int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *b
/* else */
state = st_wordcmp;
opptr = option;
- /* fall through */
+ fallthrough;
case st_wordcmp:
if (c == '=' && !*opptr) {
@@ -129,7 +129,7 @@ int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option)
state = st_wordcmp;
opptr = option;
wstart = pos;
- /* fall through */
+ fallthrough;
case st_wordcmp:
if (!*opptr)
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 0048269180d5..877970d76249 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -36,6 +36,10 @@
#define STATIC
#include <linux/decompress/mm.h>
+#define _SETUP
+#include <asm/setup.h> /* For COMMAND_LINE_SIZE */
+#undef _SETUP
+
#ifdef CONFIG_X86_5LEVEL
unsigned int __pgtable_l5_enabled;
unsigned int pgdir_shift __ro_after_init = 39;
@@ -87,8 +91,11 @@ static unsigned long get_boot_seed(void)
static bool memmap_too_large;
-/* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */
-static unsigned long long mem_limit = ULLONG_MAX;
+/*
+ * Store memory limit: MAXMEM on 64-bit and KERNEL_IMAGE_SIZE on 32-bit.
+ * It may be reduced by "mem=nn[KMG]" or "memmap=nn[KMG]" command line options.
+ */
+static u64 mem_limit;
/* Number of immovable memory regions */
static int num_immovable_mem;
@@ -131,8 +138,7 @@ enum parse_mode {
};
static int
-parse_memmap(char *p, unsigned long long *start, unsigned long long *size,
- enum parse_mode mode)
+parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode)
{
char *oldp;
@@ -162,7 +168,7 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size,
*/
*size = 0;
} else {
- unsigned long long flags;
+ u64 flags;
/*
* efi_fake_mem=nn@ss:attr the attr specifies
@@ -178,7 +184,7 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size,
}
*size = 0;
}
- /* Fall through */
+ fallthrough;
default:
/*
* If w/o offset, only size specified, memmap=nn[KMG] has the
@@ -201,7 +207,7 @@ static void mem_avoid_memmap(enum parse_mode mode, char *str)
while (str && (i < MAX_MEMMAP_REGIONS)) {
int rc;
- unsigned long long start, size;
+ u64 start, size;
char *k = strchr(str, ',');
if (k)
@@ -214,7 +220,7 @@ static void mem_avoid_memmap(enum parse_mode mode, char *str)
if (start == 0) {
/* Store the specified memory limit if size > 0 */
- if (size > 0)
+ if (size > 0 && size < mem_limit)
mem_limit = size;
continue;
@@ -261,15 +267,15 @@ static void parse_gb_huge_pages(char *param, char *val)
static void handle_mem_options(void)
{
char *args = (char *)get_cmd_line_ptr();
- size_t len = strlen((char *)args);
+ size_t len;
char *tmp_cmdline;
char *param, *val;
u64 mem_size;
- if (!strstr(args, "memmap=") && !strstr(args, "mem=") &&
- !strstr(args, "hugepages"))
+ if (!args)
return;
+ len = strnlen(args, COMMAND_LINE_SIZE-1);
tmp_cmdline = malloc(len + 1);
if (!tmp_cmdline)
error("Failed to allocate space for tmp_cmdline");
@@ -284,14 +290,12 @@ static void handle_mem_options(void)
while (*args) {
args = next_arg(args, &param, &val);
/* Stop at -- */
- if (!val && strcmp(param, "--") == 0) {
- warn("Only '--' specified in cmdline");
- goto out;
- }
+ if (!val && strcmp(param, "--") == 0)
+ break;
if (!strcmp(param, "memmap")) {
mem_avoid_memmap(PARSE_MEMMAP, val);
- } else if (strstr(param, "hugepages")) {
+ } else if (IS_ENABLED(CONFIG_X86_64) && strstr(param, "hugepages")) {
parse_gb_huge_pages(param, val);
} else if (!strcmp(param, "mem")) {
char *p = val;
@@ -300,21 +304,23 @@ static void handle_mem_options(void)
continue;
mem_size = memparse(p, &p);
if (mem_size == 0)
- goto out;
+ break;
- mem_limit = mem_size;
+ if (mem_size < mem_limit)
+ mem_limit = mem_size;
} else if (!strcmp(param, "efi_fake_mem")) {
mem_avoid_memmap(PARSE_EFI, val);
}
}
-out:
free(tmp_cmdline);
return;
}
/*
- * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
+ * In theory, KASLR can put the kernel anywhere in the range of [16M, MAXMEM)
+ * on 64-bit, and [16M, KERNEL_IMAGE_SIZE) on 32-bit.
+ *
* The mem_avoid array is used to store the ranges that need to be avoided
* when KASLR searches for an appropriate random address. We must avoid any
* regions that are unsafe to overlap with during decompression, and other
@@ -392,8 +398,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
{
unsigned long init_size = boot_params->hdr.init_size;
u64 initrd_start, initrd_size;
- u64 cmd_line, cmd_line_size;
- char *ptr;
+ unsigned long cmd_line, cmd_line_size;
/*
* Avoid the region that is unsafe to overlap during
@@ -414,16 +419,15 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
/* No need to set mapping for initrd, it will be handled in VO. */
/* Avoid kernel command line. */
- cmd_line = (u64)boot_params->ext_cmd_line_ptr << 32;
- cmd_line |= boot_params->hdr.cmd_line_ptr;
+ cmd_line = get_cmd_line_ptr();
/* Calculate size of cmd_line. */
- ptr = (char *)(unsigned long)cmd_line;
- for (cmd_line_size = 0; ptr[cmd_line_size++];)
- ;
- mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
- mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
- add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start,
- mem_avoid[MEM_AVOID_CMDLINE].size);
+ if (cmd_line) {
+ cmd_line_size = strnlen((char *)cmd_line, COMMAND_LINE_SIZE-1) + 1;
+ mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
+ mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
+ add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start,
+ mem_avoid[MEM_AVOID_CMDLINE].size);
+ }
/* Avoid boot parameters. */
mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params;
@@ -454,7 +458,7 @@ static bool mem_avoid_overlap(struct mem_vector *img,
{
int i;
struct setup_data *ptr;
- unsigned long earliest = img->start + img->size;
+ u64 earliest = img->start + img->size;
bool is_overlapping = false;
for (i = 0; i < MEM_AVOID_MAX; i++) {
@@ -499,18 +503,16 @@ static bool mem_avoid_overlap(struct mem_vector *img,
}
struct slot_area {
- unsigned long addr;
- int num;
+ u64 addr;
+ unsigned long num;
};
#define MAX_SLOT_AREA 100
static struct slot_area slot_areas[MAX_SLOT_AREA];
-
+static unsigned int slot_area_index;
static unsigned long slot_max;
-static unsigned long slot_area_index;
-
static void store_slot_info(struct mem_vector *region, unsigned long image_size)
{
struct slot_area slot_area;
@@ -519,13 +521,10 @@ static void store_slot_info(struct mem_vector *region, unsigned long image_size)
return;
slot_area.addr = region->start;
- slot_area.num = (region->size - image_size) /
- CONFIG_PHYSICAL_ALIGN + 1;
+ slot_area.num = 1 + (region->size - image_size) / CONFIG_PHYSICAL_ALIGN;
- if (slot_area.num > 0) {
- slot_areas[slot_area_index++] = slot_area;
- slot_max += slot_area.num;
- }
+ slot_areas[slot_area_index++] = slot_area;
+ slot_max += slot_area.num;
}
/*
@@ -535,57 +534,53 @@ static void store_slot_info(struct mem_vector *region, unsigned long image_size)
static void
process_gb_huge_pages(struct mem_vector *region, unsigned long image_size)
{
- unsigned long addr, size = 0;
+ u64 pud_start, pud_end;
+ unsigned long gb_huge_pages;
struct mem_vector tmp;
- int i = 0;
- if (!max_gb_huge_pages) {
+ if (!IS_ENABLED(CONFIG_X86_64) || !max_gb_huge_pages) {
store_slot_info(region, image_size);
return;
}
- addr = ALIGN(region->start, PUD_SIZE);
- /* Did we raise the address above the passed in memory entry? */
- if (addr < region->start + region->size)
- size = region->size - (addr - region->start);
-
- /* Check how many 1GB huge pages can be filtered out: */
- while (size > PUD_SIZE && max_gb_huge_pages) {
- size -= PUD_SIZE;
- max_gb_huge_pages--;
- i++;
- }
+ /* Are there any 1GB pages in the region? */
+ pud_start = ALIGN(region->start, PUD_SIZE);
+ pud_end = ALIGN_DOWN(region->start + region->size, PUD_SIZE);
/* No good 1GB huge pages found: */
- if (!i) {
+ if (pud_start >= pud_end) {
store_slot_info(region, image_size);
return;
}
- /*
- * Skip those 'i'*1GB good huge pages, and continue checking and
- * processing the remaining head or tail part of the passed region
- * if available.
- */
-
- if (addr >= region->start + image_size) {
+ /* Check if the head part of the region is usable. */
+ if (pud_start >= region->start + image_size) {
tmp.start = region->start;
- tmp.size = addr - region->start;
+ tmp.size = pud_start - region->start;
store_slot_info(&tmp, image_size);
}
- size = region->size - (addr - region->start) - i * PUD_SIZE;
- if (size >= image_size) {
- tmp.start = addr + i * PUD_SIZE;
- tmp.size = size;
+ /* Skip the good 1GB pages. */
+ gb_huge_pages = (pud_end - pud_start) >> PUD_SHIFT;
+ if (gb_huge_pages > max_gb_huge_pages) {
+ pud_end = pud_start + (max_gb_huge_pages << PUD_SHIFT);
+ max_gb_huge_pages = 0;
+ } else {
+ max_gb_huge_pages -= gb_huge_pages;
+ }
+
+ /* Check if the tail part of the region is usable. */
+ if (region->start + region->size >= pud_end + image_size) {
+ tmp.start = pud_end;
+ tmp.size = region->start + region->size - pud_end;
store_slot_info(&tmp, image_size);
}
}
-static unsigned long slots_fetch_random(void)
+static u64 slots_fetch_random(void)
{
unsigned long slot;
- int i;
+ unsigned int i;
/* Handle case of no slots stored. */
if (slot_max == 0)
@@ -598,7 +593,7 @@ static unsigned long slots_fetch_random(void)
slot -= slot_areas[i].num;
continue;
}
- return slot_areas[i].addr + slot * CONFIG_PHYSICAL_ALIGN;
+ return slot_areas[i].addr + ((u64)slot * CONFIG_PHYSICAL_ALIGN);
}
if (i == slot_area_index)
@@ -611,49 +606,23 @@ static void __process_mem_region(struct mem_vector *entry,
unsigned long image_size)
{
struct mem_vector region, overlap;
- unsigned long start_orig, end;
- struct mem_vector cur_entry;
-
- /* On 32-bit, ignore entries entirely above our maximum. */
- if (IS_ENABLED(CONFIG_X86_32) && entry->start >= KERNEL_IMAGE_SIZE)
- return;
+ u64 region_end;
- /* Ignore entries entirely below our minimum. */
- if (entry->start + entry->size < minimum)
- return;
-
- /* Ignore entries above memory limit */
- end = min(entry->size + entry->start, mem_limit);
- if (entry->start >= end)
- return;
- cur_entry.start = entry->start;
- cur_entry.size = end - entry->start;
-
- region.start = cur_entry.start;
- region.size = cur_entry.size;
+ /* Enforce minimum and memory limit. */
+ region.start = max_t(u64, entry->start, minimum);
+ region_end = min(entry->start + entry->size, mem_limit);
/* Give up if slot area array is full. */
while (slot_area_index < MAX_SLOT_AREA) {
- start_orig = region.start;
-
- /* Potentially raise address to minimum location. */
- if (region.start < minimum)
- region.start = minimum;
-
/* Potentially raise address to meet alignment needs. */
region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
/* Did we raise the address above the passed in memory entry? */
- if (region.start > cur_entry.start + cur_entry.size)
+ if (region.start > region_end)
return;
/* Reduce size by any delta from the original address. */
- region.size -= region.start - start_orig;
-
- /* On 32-bit, reduce region size to fit within max size. */
- if (IS_ENABLED(CONFIG_X86_32) &&
- region.start + region.size > KERNEL_IMAGE_SIZE)
- region.size = KERNEL_IMAGE_SIZE - region.start;
+ region.size = region_end - region.start;
/* Return if region can't contain decompressed kernel */
if (region.size < image_size)
@@ -666,27 +635,19 @@ static void __process_mem_region(struct mem_vector *entry,
}
/* Store beginning of region if holds at least image_size. */
- if (overlap.start > region.start + image_size) {
- struct mem_vector beginning;
-
- beginning.start = region.start;
- beginning.size = overlap.start - region.start;
- process_gb_huge_pages(&beginning, image_size);
+ if (overlap.start >= region.start + image_size) {
+ region.size = overlap.start - region.start;
+ process_gb_huge_pages(&region, image_size);
}
- /* Return if overlap extends to or past end of region. */
- if (overlap.start + overlap.size >= region.start + region.size)
- return;
-
/* Clip off the overlapping region and start over. */
- region.size -= overlap.start - region.start + overlap.size;
region.start = overlap.start + overlap.size;
}
}
static bool process_mem_region(struct mem_vector *region,
- unsigned long long minimum,
- unsigned long long image_size)
+ unsigned long minimum,
+ unsigned long image_size)
{
int i;
/*
@@ -709,7 +670,7 @@ static bool process_mem_region(struct mem_vector *region,
* immovable memory and @region.
*/
for (i = 0; i < num_immovable_mem; i++) {
- unsigned long long start, end, entry_end, region_end;
+ u64 start, end, entry_end, region_end;
struct mem_vector entry;
if (!mem_overlaps(region, &immovable_mem[i]))
@@ -736,8 +697,8 @@ static bool process_mem_region(struct mem_vector *region,
#ifdef CONFIG_EFI
/*
- * Returns true if mirror region found (and must have been processed
- * for slots adding)
+ * Returns true if we processed the EFI memmap, which we prefer over the E820
+ * table if it is available.
*/
static bool
process_efi_entries(unsigned long minimum, unsigned long image_size)
@@ -839,20 +800,30 @@ static void process_e820_entries(unsigned long minimum,
static unsigned long find_random_phys_addr(unsigned long minimum,
unsigned long image_size)
{
+ u64 phys_addr;
+
+ /* Bail out early if it's impossible to succeed. */
+ if (minimum + image_size > mem_limit)
+ return 0;
+
/* Check if we had too many memmaps. */
if (memmap_too_large) {
debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n");
return 0;
}
- /* Make sure minimum is aligned. */
- minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
+ if (!process_efi_entries(minimum, image_size))
+ process_e820_entries(minimum, image_size);
- if (process_efi_entries(minimum, image_size))
- return slots_fetch_random();
+ phys_addr = slots_fetch_random();
- process_e820_entries(minimum, image_size);
- return slots_fetch_random();
+ /* Perform a final check to make sure the address is in range. */
+ if (phys_addr < minimum || phys_addr + image_size > mem_limit) {
+ warn("Invalid physical address chosen!\n");
+ return 0;
+ }
+
+ return (unsigned long)phys_addr;
}
static unsigned long find_random_virt_addr(unsigned long minimum,
@@ -860,18 +831,12 @@ static unsigned long find_random_virt_addr(unsigned long minimum,
{
unsigned long slots, random_addr;
- /* Make sure minimum is aligned. */
- minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
- /* Align image_size for easy slot calculations. */
- image_size = ALIGN(image_size, CONFIG_PHYSICAL_ALIGN);
-
/*
* There are how many CONFIG_PHYSICAL_ALIGN-sized slots
* that can hold image_size within the range of minimum to
* KERNEL_IMAGE_SIZE?
*/
- slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
- CONFIG_PHYSICAL_ALIGN + 1;
+ slots = 1 + (KERNEL_IMAGE_SIZE - minimum - image_size) / CONFIG_PHYSICAL_ALIGN;
random_addr = kaslr_get_random_long("Virtual") % slots;
@@ -908,6 +873,11 @@ void choose_random_location(unsigned long input,
/* Prepare to add new identity pagetables on demand. */
initialize_identity_maps();
+ if (IS_ENABLED(CONFIG_X86_32))
+ mem_limit = KERNEL_IMAGE_SIZE;
+ else
+ mem_limit = MAXMEM;
+
/* Record the various known unsafe memory ranges. */
mem_avoid_init(input, input_size, *output);
@@ -917,6 +887,8 @@ void choose_random_location(unsigned long input,
* location:
*/
min_addr = min(*output, 512UL << 20);
+ /* Make sure minimum is aligned. */
+ min_addr = ALIGN(min_addr, CONFIG_PHYSICAL_ALIGN);
/* Walk available memory entries to find a random address. */
random_addr = find_random_phys_addr(min_addr, output_size);
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 39e592d0e0b4..e478e40fbe5a 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -30,12 +30,9 @@
#define STATIC static
/*
- * Use normal definitions of mem*() from string.c. There are already
- * included header files which expect a definition of memset() and by
- * the time we define memset macro, it is too late.
+ * Provide definitions of memzero and memmove as some of the decompressors will
+ * try to define their own functions if these are not defined as macros.
*/
-#undef memcpy
-#undef memset
#define memzero(s, n) memset((s), 0, (n))
#define memmove memmove
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 726e264410ff..3efce27ba35c 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -70,8 +70,8 @@ int cmdline_find_option(const char *option, char *buffer, int bufsize);
int cmdline_find_option_bool(const char *option);
struct mem_vector {
- unsigned long long start;
- unsigned long long size;
+ u64 start;
+ u64 size;
};
#if CONFIG_RANDOMIZE_BASE
diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h
index 995f7b7ad512..a232da487cd2 100644
--- a/arch/x86/boot/string.h
+++ b/arch/x86/boot/string.h
@@ -11,10 +11,7 @@ void *memcpy(void *dst, const void *src, size_t len);
void *memset(void *dst, int c, size_t len);
int memcmp(const void *s1, const void *s2, size_t len);
-/*
- * Access builtin version by default. If one needs to use optimized version,
- * do "undef memcpy" in .c file and link against right string.c
- */
+/* Access builtin version by default. */
#define memcpy(d,s,l) __builtin_memcpy(d,s,l)
#define memset(d,c,l) __builtin_memset(d,c,l)
#define memcmp __builtin_memcmp
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 98e4d8886f11..ae9b0d4615b3 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -374,12 +374,14 @@ For 32-bit we have the following conventions - kernel is built with
* Fetch the per-CPU GSBASE value for this processor and put it in @reg.
* We normally use %gs for accessing per-CPU data, but we are setting up
* %gs here and obviously can not use %gs itself to access per-CPU data.
+ *
+ * Do not use RDPID, because KVM loads guest's TSC_AUX on vm-entry and
+ * may not restore the host's value until the CPU returns to userspace.
+ * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives
+ * while running KVM's run loop.
*/
.macro GET_PERCPU_BASE reg:req
- ALTERNATIVE \
- "LOAD_CPU_AND_NODE_SEG_LIMIT \reg", \
- "RDPID \reg", \
- X86_FEATURE_RDPID
+ LOAD_CPU_AND_NODE_SEG_LIMIT \reg
andq $VDSO_CPUNODE_MASK, \reg
movq __per_cpu_offset(, \reg, 8), \reg
.endm
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 48512c7944e7..2f84c7ca74ea 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -60,16 +60,10 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
{
- unsigned int nr = (unsigned int)regs->orig_ax;
-
if (IS_ENABLED(CONFIG_IA32_EMULATION))
current_thread_info()->status |= TS_COMPAT;
- /*
- * Subtlety here: if ptrace pokes something larger than 2^32-1 into
- * orig_ax, the unsigned int return value truncates it. This may
- * or may not be necessary, but it matches the old asm behavior.
- */
- return (unsigned int)syscall_enter_from_user_mode(regs, nr);
+
+ return (unsigned int)regs->orig_ax;
}
/*
@@ -91,15 +85,29 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
{
unsigned int nr = syscall_32_enter(regs);
+ /*
+ * Subtlety here: if ptrace pokes something larger than 2^32-1 into
+ * orig_ax, the unsigned int return value truncates it. This may
+ * or may not be necessary, but it matches the old asm behavior.
+ */
+ nr = (unsigned int)syscall_enter_from_user_mode(regs, nr);
+
do_syscall_32_irqs_on(regs, nr);
syscall_exit_to_user_mode(regs);
}
static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
{
- unsigned int nr = syscall_32_enter(regs);
+ unsigned int nr = syscall_32_enter(regs);
int res;
+ /*
+ * This cannot use syscall_enter_from_user_mode() as it has to
+ * fetch EBP before invoking any of the syscall entry work
+ * functions.
+ */
+ syscall_enter_from_user_mode_prepare(regs);
+
instrumentation_begin();
/* Fetch EBP from where the vDSO stashed it. */
if (IS_ENABLED(CONFIG_X86_64)) {
@@ -122,6 +130,9 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
return false;
}
+ /* The case truncates any ptrace induced syscall nr > 2^32 -1 */
+ nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr);
+
/* Now this is just like a normal syscall. */
do_syscall_32_irqs_on(regs, nr);
syscall_exit_to_user_mode(regs);
diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S
index 3a07ce3ec70b..f1f96d4d8cd6 100644
--- a/arch/x86/entry/thunk_32.S
+++ b/arch/x86/entry/thunk_32.S
@@ -29,11 +29,6 @@ SYM_CODE_START_NOALIGN(\name)
SYM_CODE_END(\name)
.endm
-#ifdef CONFIG_TRACE_IRQFLAGS
- THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1
- THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1
-#endif
-
#ifdef CONFIG_PREEMPTION
THUNK preempt_schedule_thunk, preempt_schedule
THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 50963472ee85..31e6887d24f1 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4682,7 +4682,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_CORE2_MEROM:
x86_add_quirk(intel_clovertown_quirk);
- /* fall through */
+ fallthrough;
case INTEL_FAM6_CORE2_MEROM_L:
case INTEL_FAM6_CORE2_PENRYN:
@@ -5062,7 +5062,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_SKYLAKE_X:
pmem = true;
- /* fall through */
+ fallthrough;
case INTEL_FAM6_SKYLAKE_L:
case INTEL_FAM6_SKYLAKE:
case INTEL_FAM6_KABYLAKE_L:
@@ -5114,7 +5114,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_ICELAKE_X:
case INTEL_FAM6_ICELAKE_D:
pmem = true;
- /* fall through */
+ fallthrough;
case INTEL_FAM6_ICELAKE_L:
case INTEL_FAM6_ICELAKE:
case INTEL_FAM6_TIGERLAKE_L:
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 63f58bdf556c..8961653c5dd2 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1268,7 +1268,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
ret = X86_BR_ZERO_CALL;
break;
}
- /* fall through */
+ fallthrough;
case 0x9a: /* call far absolute */
ret = X86_BR_CALL;
break;
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index cb94ba86efd2..6a4ca27b2c9e 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -390,6 +390,18 @@ static struct uncore_event_desc snb_uncore_imc_events[] = {
INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(gt_requests, "event=0x03"),
+ INTEL_UNCORE_EVENT_DESC(gt_requests.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(gt_requests.unit, "MiB"),
+
+ INTEL_UNCORE_EVENT_DESC(ia_requests, "event=0x04"),
+ INTEL_UNCORE_EVENT_DESC(ia_requests.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(ia_requests.unit, "MiB"),
+
+ INTEL_UNCORE_EVENT_DESC(io_requests, "event=0x05"),
+ INTEL_UNCORE_EVENT_DESC(io_requests.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(io_requests.unit, "MiB"),
+
{ /* end: all zeroes */ },
};
@@ -405,13 +417,35 @@ static struct uncore_event_desc snb_uncore_imc_events[] = {
#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054
#define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE
+/* BW break down- legacy counters */
+#define SNB_UNCORE_PCI_IMC_GT_REQUESTS 0x3
+#define SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE 0x5040
+#define SNB_UNCORE_PCI_IMC_IA_REQUESTS 0x4
+#define SNB_UNCORE_PCI_IMC_IA_REQUESTS_BASE 0x5044
+#define SNB_UNCORE_PCI_IMC_IO_REQUESTS 0x5
+#define SNB_UNCORE_PCI_IMC_IO_REQUESTS_BASE 0x5048
+
enum perf_snb_uncore_imc_freerunning_types {
- SNB_PCI_UNCORE_IMC_DATA = 0,
+ SNB_PCI_UNCORE_IMC_DATA_READS = 0,
+ SNB_PCI_UNCORE_IMC_DATA_WRITES,
+ SNB_PCI_UNCORE_IMC_GT_REQUESTS,
+ SNB_PCI_UNCORE_IMC_IA_REQUESTS,
+ SNB_PCI_UNCORE_IMC_IO_REQUESTS,
+
SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX,
};
static struct freerunning_counters snb_uncore_imc_freerunning[] = {
- [SNB_PCI_UNCORE_IMC_DATA] = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE, 0x4, 0x0, 2, 32 },
+ [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE,
+ 0x0, 0x0, 1, 32 },
+ [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE,
+ 0x0, 0x0, 1, 32 },
+ [SNB_PCI_UNCORE_IMC_GT_REQUESTS] = { SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE,
+ 0x0, 0x0, 1, 32 },
+ [SNB_PCI_UNCORE_IMC_IA_REQUESTS] = { SNB_UNCORE_PCI_IMC_IA_REQUESTS_BASE,
+ 0x0, 0x0, 1, 32 },
+ [SNB_PCI_UNCORE_IMC_IO_REQUESTS] = { SNB_UNCORE_PCI_IMC_IO_REQUESTS_BASE,
+ 0x0, 0x0, 1, 32 },
};
static struct attribute *snb_uncore_imc_formats_attr[] = {
@@ -525,6 +559,18 @@ static int snb_uncore_imc_event_init(struct perf_event *event)
base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE;
idx = UNCORE_PMC_IDX_FREERUNNING;
break;
+ case SNB_UNCORE_PCI_IMC_GT_REQUESTS:
+ base = SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE;
+ idx = UNCORE_PMC_IDX_FREERUNNING;
+ break;
+ case SNB_UNCORE_PCI_IMC_IA_REQUESTS:
+ base = SNB_UNCORE_PCI_IMC_IA_REQUESTS_BASE;
+ idx = UNCORE_PMC_IDX_FREERUNNING;
+ break;
+ case SNB_UNCORE_PCI_IMC_IO_REQUESTS:
+ base = SNB_UNCORE_PCI_IMC_IO_REQUESTS_BASE;
+ idx = UNCORE_PMC_IDX_FREERUNNING;
+ break;
default:
return -EINVAL;
}
@@ -598,7 +644,7 @@ static struct intel_uncore_ops snb_uncore_imc_ops = {
static struct intel_uncore_type snb_uncore_imc = {
.name = "imc",
- .num_counters = 2,
+ .num_counters = 5,
.num_boxes = 1,
.num_freerunning_types = SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX,
.mmio_map_size = SNB_UNCORE_PCI_IMC_MAP_SIZE,
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 2901d5df4366..83fc9d38eb1f 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -368,6 +368,7 @@
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */
+#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index b9c2667ac46c..bc9758ef292e 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -81,11 +81,8 @@ extern unsigned long efi_fw_vendor, efi_config_table;
kernel_fpu_end(); \
})
-
#define arch_efi_call_virt(p, f, args...) p->f(args)
-#define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size)
-
#else /* !CONFIG_X86_32 */
#define EFI_LOADER_SIGNATURE "EL64"
@@ -125,9 +122,6 @@ struct efi_scratch {
kernel_fpu_end(); \
})
-extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
- u32 type, u64 attribute);
-
#ifdef CONFIG_KASAN
/*
* CONFIG_KASAN may redefine memset to __memset. __memset function is present
@@ -143,17 +137,13 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
#endif /* CONFIG_X86_32 */
extern struct efi_scratch efi_scratch;
-extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
extern int __init efi_memblock_x86_reserve_range(void);
extern void __init efi_print_memmap(void);
-extern void __init efi_memory_uc(u64 addr, unsigned long size);
extern void __init efi_map_region(efi_memory_desc_t *md);
extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
extern void efi_sync_low_kernel_mappings(void);
extern int __init efi_alloc_page_tables(void);
extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
-extern void __init old_map_region(efi_memory_desc_t *md);
-extern void __init runtime_code_page_mkexec(void);
extern void __init efi_runtime_update_mappings(void);
extern void __init efi_dump_pagetable(void);
extern void __init efi_apply_memmap_quirks(void);
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index a8f9315b9eae..6fe54b2813c1 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -18,8 +18,16 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs)
* state, not the interrupt state as imagined by Xen.
*/
unsigned long flags = native_save_fl();
- WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF |
- X86_EFLAGS_NT));
+ unsigned long mask = X86_EFLAGS_DF | X86_EFLAGS_NT;
+
+ /*
+ * For !SMAP hardware we patch out CLAC on entry.
+ */
+ if (boot_cpu_has(X86_FEATURE_SMAP) ||
+ (IS_ENABLED(CONFIG_64_BIT) && boot_cpu_has(X86_FEATURE_XENPV)))
+ mask |= X86_EFLAGS_AC;
+
+ WARN_ON_ONCE(flags & mask);
/* We think we came from user mode. Make sure pt_regs agrees. */
WARN_ON_ONCE(!user_mode(regs));
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 0a460f2a3f90..21a8b5259477 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -602,9 +602,7 @@ static inline u64 xgetbv(u32 index)
{
u32 eax, edx;
- asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
- : "=a" (eax), "=d" (edx)
- : "c" (index));
+ asm volatile("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index));
return eax + ((u64)edx << 32);
}
@@ -613,8 +611,7 @@ static inline void xsetbv(u32 index, u64 value)
u32 eax = value;
u32 edx = value >> 32;
- asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
- : : "a" (eax), "d" (edx), "c" (index));
+ asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
}
#endif /* _ASM_X86_FPU_INTERNAL_H */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 0a301ad0b02f..9257667d13c5 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -59,5 +59,6 @@ typedef struct {
}
void leave_mm(int cpu);
+#define leave_mm leave_mm
#endif /* _ASM_X86_MMU_H */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 40aa69d04862..d8324a236696 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -327,8 +327,8 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
static const unsigned int argument_offs[] = {
#ifdef __i386__
offsetof(struct pt_regs, ax),
- offsetof(struct pt_regs, cx),
offsetof(struct pt_regs, dx),
+ offsetof(struct pt_regs, cx),
#define NR_REG_ARGUMENTS 3
#else
offsetof(struct pt_regs, di),
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 59a3e13204c3..5999b0b3dd4a 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -234,6 +234,12 @@ static inline void clwb(volatile void *__p)
#define nop() asm volatile ("nop")
+static inline void serialize(void)
+{
+ /* Instruction opcode for SERIALIZE; supported in binutils >= 2.35. */
+ asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory");
+}
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_SPECIAL_INSNS_H */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 8a1f5382a4ea..cf13f9e78585 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -150,14 +150,14 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define SVM_NESTED_CTL_NP_ENABLE BIT(0)
#define SVM_NESTED_CTL_SEV_ENABLE BIT(1)
-struct __attribute__ ((__packed__)) vmcb_seg {
+struct vmcb_seg {
u16 selector;
u16 attrib;
u32 limit;
u64 base;
-};
+} __packed;
-struct __attribute__ ((__packed__)) vmcb_save_area {
+struct vmcb_save_area {
struct vmcb_seg es;
struct vmcb_seg cs;
struct vmcb_seg ss;
@@ -200,20 +200,67 @@ struct __attribute__ ((__packed__)) vmcb_save_area {
u64 br_to;
u64 last_excp_from;
u64 last_excp_to;
-};
+ /*
+ * The following part of the save area is valid only for
+ * SEV-ES guests when referenced through the GHCB.
+ */
+ u8 reserved_7[104];
+ u64 reserved_8; /* rax already available at 0x01f8 */
+ u64 rcx;
+ u64 rdx;
+ u64 rbx;
+ u64 reserved_9; /* rsp already available at 0x01d8 */
+ u64 rbp;
+ u64 rsi;
+ u64 rdi;
+ u64 r8;
+ u64 r9;
+ u64 r10;
+ u64 r11;
+ u64 r12;
+ u64 r13;
+ u64 r14;
+ u64 r15;
+ u8 reserved_10[16];
+ u64 sw_exit_code;
+ u64 sw_exit_info_1;
+ u64 sw_exit_info_2;
+ u64 sw_scratch;
+ u8 reserved_11[56];
+ u64 xcr0;
+ u8 valid_bitmap[16];
+ u64 x87_state_gpa;
+} __packed;
+
+struct ghcb {
+ struct vmcb_save_area save;
+ u8 reserved_save[2048 - sizeof(struct vmcb_save_area)];
+
+ u8 shared_buffer[2032];
+
+ u8 reserved_1[10];
+ u16 protocol_version; /* negotiated SEV-ES/GHCB protocol version */
+ u32 ghcb_usage;
+} __packed;
+
+
+#define EXPECTED_VMCB_SAVE_AREA_SIZE 1032
+#define EXPECTED_VMCB_CONTROL_AREA_SIZE 256
+#define EXPECTED_GHCB_SIZE PAGE_SIZE
static inline void __unused_size_checks(void)
{
- BUILD_BUG_ON(sizeof(struct vmcb_save_area) != 0x298);
- BUILD_BUG_ON(sizeof(struct vmcb_control_area) != 256);
+ BUILD_BUG_ON(sizeof(struct vmcb_save_area) != EXPECTED_VMCB_SAVE_AREA_SIZE);
+ BUILD_BUG_ON(sizeof(struct vmcb_control_area) != EXPECTED_VMCB_CONTROL_AREA_SIZE);
+ BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE);
}
-struct __attribute__ ((__packed__)) vmcb {
+struct vmcb {
struct vmcb_control_area control;
u8 reserved_control[1024 - sizeof(struct vmcb_control_area)];
struct vmcb_save_area save;
-};
+} __packed;
#define SVM_CPUID_FUNC 0x8000000a
@@ -298,4 +345,47 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
+/* GHCB Accessor functions */
+
+#define GHCB_BITMAP_IDX(field) \
+ (offsetof(struct vmcb_save_area, field) / sizeof(u64))
+
+#define DEFINE_GHCB_ACCESSORS(field) \
+ static inline bool ghcb_##field##_is_valid(const struct ghcb *ghcb) \
+ { \
+ return test_bit(GHCB_BITMAP_IDX(field), \
+ (unsigned long *)&ghcb->save.valid_bitmap); \
+ } \
+ \
+ static inline void ghcb_set_##field(struct ghcb *ghcb, u64 value) \
+ { \
+ __set_bit(GHCB_BITMAP_IDX(field), \
+ (unsigned long *)&ghcb->save.valid_bitmap); \
+ ghcb->save.field = value; \
+ }
+
+DEFINE_GHCB_ACCESSORS(cpl)
+DEFINE_GHCB_ACCESSORS(rip)
+DEFINE_GHCB_ACCESSORS(rsp)
+DEFINE_GHCB_ACCESSORS(rax)
+DEFINE_GHCB_ACCESSORS(rcx)
+DEFINE_GHCB_ACCESSORS(rdx)
+DEFINE_GHCB_ACCESSORS(rbx)
+DEFINE_GHCB_ACCESSORS(rbp)
+DEFINE_GHCB_ACCESSORS(rsi)
+DEFINE_GHCB_ACCESSORS(rdi)
+DEFINE_GHCB_ACCESSORS(r8)
+DEFINE_GHCB_ACCESSORS(r9)
+DEFINE_GHCB_ACCESSORS(r10)
+DEFINE_GHCB_ACCESSORS(r11)
+DEFINE_GHCB_ACCESSORS(r12)
+DEFINE_GHCB_ACCESSORS(r13)
+DEFINE_GHCB_ACCESSORS(r14)
+DEFINE_GHCB_ACCESSORS(r15)
+DEFINE_GHCB_ACCESSORS(sw_exit_code)
+DEFINE_GHCB_ACCESSORS(sw_exit_info_1)
+DEFINE_GHCB_ACCESSORS(sw_exit_info_2)
+DEFINE_GHCB_ACCESSORS(sw_scratch)
+DEFINE_GHCB_ACCESSORS(xcr0)
+
#endif
diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h
index fdb5b356e59b..0fd4a9dfb29c 100644
--- a/arch/x86/include/asm/sync_core.h
+++ b/arch/x86/include/asm/sync_core.h
@@ -5,6 +5,7 @@
#include <linux/preempt.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
+#include <asm/special_insns.h>
#ifdef CONFIG_X86_32
static inline void iret_to_self(void)
@@ -46,22 +47,34 @@ static inline void iret_to_self(void)
*
* b) Text was modified on a different CPU, may subsequently be
* executed on this CPU, and you want to make sure the new version
- * gets executed. This generally means you're calling this in a IPI.
+ * gets executed. This generally means you're calling this in an IPI.
*
* If you're calling this for a different reason, you're probably doing
* it wrong.
+ *
+ * Like all of Linux's memory ordering operations, this is a
+ * compiler barrier as well.
*/
static inline void sync_core(void)
{
/*
- * There are quite a few ways to do this. IRET-to-self is nice
- * because it works on every CPU, at any CPL (so it's compatible
- * with paravirtualization), and it never exits to a hypervisor.
- * The only down sides are that it's a bit slow (it seems to be
- * a bit more than 2x slower than the fastest options) and that
- * it unmasks NMIs. The "push %cs" is needed because, in
- * paravirtual environments, __KERNEL_CS may not be a valid CS
- * value when we do IRET directly.
+ * The SERIALIZE instruction is the most straightforward way to
+ * do this, but it is not universally available.
+ */
+ if (static_cpu_has(X86_FEATURE_SERIALIZE)) {
+ serialize();
+ return;
+ }
+
+ /*
+ * For all other processors, there are quite a few ways to do this.
+ * IRET-to-self is nice because it works on every CPU, at any CPL
+ * (so it's compatible with paravirtualization), and it never exits
+ * to a hypervisor. The only downsides are that it's a bit slow
+ * (it seems to be a bit more than 2x slower than the fastest
+ * options) and that it unmasks NMIs. The "push %cs" is needed,
+ * because in paravirtual environments __KERNEL_CS may not be a
+ * valid CS value when we do IRET directly.
*
* In case NMI unmasking or performance ever becomes a problem,
* the next best option appears to be MOV-to-CR2 and an
@@ -71,9 +84,6 @@ static inline void sync_core(void)
* CPUID is the conventional way, but it's nasty: it doesn't
* exist on some 486-like CPUs, and it usually exits to a
* hypervisor.
- *
- * Like all of Linux's memory ordering operations, this is a
- * compiler barrier as well.
*/
iret_to_self();
}
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index c3daf0aaa0ee..cdaab30880b9 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -239,7 +239,7 @@ void __init arch_init_ideal_nops(void)
return;
}
- /* fall through */
+ fallthrough;
default:
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 21325a4a78b9..779a89e31c4c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -800,7 +800,7 @@ static int irq_polarity(int idx)
return IOAPIC_POL_HIGH;
case MP_IRQPOL_RESERVED:
pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n");
- /* fall through */
+ fallthrough;
case MP_IRQPOL_ACTIVE_LOW:
default: /* Pointless default required due to do gcc stupidity */
return IOAPIC_POL_LOW;
@@ -848,7 +848,7 @@ static int irq_trigger(int idx)
return IOAPIC_EDGE;
case MP_IRQTRIG_RESERVED:
pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n");
- /* fall through */
+ fallthrough;
case MP_IRQTRIG_LEVEL:
default: /* Pointless default required due to do gcc stupidity */
return IOAPIC_LEVEL;
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 7bda71def557..99ee61c9ba54 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -149,7 +149,7 @@ void __init default_setup_apic_routing(void)
break;
}
/* P4 and above */
- /* fall through */
+ fallthrough;
case X86_VENDOR_HYGON:
case X86_VENDOR_AMD:
def_to_bigsmp = 1;
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index dae32d948bf2..f8a56b5dc29f 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -161,6 +161,7 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
apicd->move_in_progress = true;
apicd->prev_vector = apicd->vector;
apicd->prev_cpu = apicd->cpu;
+ WARN_ON_ONCE(apicd->cpu == newcpu);
} else {
irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector,
managed);
@@ -910,7 +911,7 @@ void send_cleanup_vector(struct irq_cfg *cfg)
__send_cleanup_vector(apicd);
}
-static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
+void irq_complete_move(struct irq_cfg *cfg)
{
struct apic_chip_data *apicd;
@@ -918,15 +919,16 @@ static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
if (likely(!apicd->move_in_progress))
return;
- if (vector == apicd->vector && apicd->cpu == smp_processor_id())
+ /*
+ * If the interrupt arrived on the new target CPU, cleanup the
+ * vector on the old target CPU. A vector check is not required
+ * because an interrupt can never move from one vector to another
+ * on the same CPU.
+ */
+ if (apicd->cpu == smp_processor_id())
__send_cleanup_vector(apicd);
}
-void irq_complete_move(struct irq_cfg *cfg)
-{
- __irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
-}
-
/*
* Called from fixup_irqs() with @desc->lock held and interrupts disabled.
*/
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index c7503be92f35..57074cf3ad7c 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -248,7 +248,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
switch (leaf) {
case 1:
l1 = &l1i;
- /* fall through */
+ fallthrough;
case 0:
if (!l1->val)
return;
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 7843ab3fde09..3a44346f2276 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -199,7 +199,7 @@ static int raise_local(void)
* calling irq_enter, but the necessary
* machinery isn't exported currently.
*/
- /*FALL THROUGH*/
+ fallthrough;
case MCJ_CTX_PROCESS:
raise_exception(m, NULL);
break;
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index d8f9230d2034..abe9fe0fb851 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -193,7 +193,7 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval)
if (!atomic_sub_return(1, &cmci_storm_on_cpus))
pr_notice("CMCI storm subsided: switching to interrupt mode\n");
- /* FALLTHROUGH */
+ fallthrough;
case CMCI_STORM_SUBSIDED:
/*
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 72182809b333..ca670919b561 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -98,7 +98,7 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
case 7:
if (size < 0x40)
break;
- /* Else, fall through */
+ fallthrough;
case 6:
case 5:
case 4:
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 8cdf29ffd95f..b98ff620ba77 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -349,7 +349,7 @@ static int arch_build_bp_info(struct perf_event *bp,
hw->len = X86_BREAKPOINT_LEN_X;
return 0;
}
- /* fall through */
+ fallthrough;
default:
return -EINVAL;
}
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 68acd30c6b87..c2f02f308ecf 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -450,7 +450,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
ptr = &remcomInBuffer[1];
if (kgdb_hex2long(&ptr, &addr))
linux_regs->ip = addr;
- /* fall through */
+ fallthrough;
case 'D':
case 'k':
/* clear the trace bit */
@@ -539,7 +539,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
* a system call which should be ignored
*/
return NOTIFY_DONE;
- /* fall through */
+ fallthrough;
default:
if (user_mode(regs))
return NOTIFY_DONE;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 411af4aa7b51..baa21090c9be 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -312,7 +312,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
case 2:
if (i == 0 || i == 13)
continue; /* IRQ0 & IRQ13 not connected */
- /* fall through */
+ fallthrough;
default:
if (i == 2)
continue; /* IRQ2 is never connected */
@@ -356,7 +356,7 @@ static void __init construct_ioapic_table(int mpc_default_type)
default:
pr_err("???\nUnknown standard configuration %d\n",
mpc_default_type);
- /* fall through */
+ fallthrough;
case 1:
case 5:
memcpy(bus.bustype, "ISA ", 6);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 994d8393f2f7..13ce616cc7af 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -684,9 +684,7 @@ void arch_cpu_idle(void)
*/
void __cpuidle default_idle(void)
{
- trace_cpu_idle_rcuidle(1, smp_processor_id());
safe_halt();
- trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
}
#if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)
EXPORT_SYMBOL(default_idle);
@@ -792,7 +790,6 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
static __cpuidle void mwait_idle(void)
{
if (!current_set_polling_and_test()) {
- trace_cpu_idle_rcuidle(1, smp_processor_id());
if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
mb(); /* quirk */
clflush((void *)&current_thread_info()->flags);
@@ -804,7 +801,6 @@ static __cpuidle void mwait_idle(void)
__sti_mwait(0, 0);
else
local_irq_enable();
- trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
} else {
local_irq_enable();
}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 5679aa3fdcb8..e7537c5440bb 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -204,7 +204,7 @@ static int set_segment_reg(struct task_struct *task,
case offsetof(struct user_regs_struct, ss):
if (unlikely(value == 0))
return -EIO;
- /* Else, fall through */
+ fallthrough;
default:
*pt_regs_access(task_pt_regs(task), offset) = value;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 0ec7ced727fe..a515e2d230b7 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -654,7 +654,7 @@ static void native_machine_emergency_restart(void)
case BOOT_CF9_FORCE:
port_cf9_safe = true;
- /* Fall through */
+ fallthrough;
case BOOT_CF9_SAFE:
if (port_cf9_safe) {
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index d5fa494c2304..be0d7d4152ec 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -726,7 +726,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
regs->ax = -EINTR;
break;
}
- /* fallthrough */
+ fallthrough;
case -ERESTARTNOINTR:
regs->ax = regs->orig_ax;
regs->ip -= 2;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 27aa04a95702..f5ef689dd62a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1594,14 +1594,28 @@ int native_cpu_disable(void)
if (ret)
return ret;
- /*
- * Disable the local APIC. Otherwise IPI broadcasts will reach
- * it. It still responds normally to INIT, NMI, SMI, and SIPI
- * messages.
- */
- apic_soft_disable();
cpu_disable_common();
+ /*
+ * Disable the local APIC. Otherwise IPI broadcasts will reach
+ * it. It still responds normally to INIT, NMI, SMI, and SIPI
+ * messages.
+ *
+ * Disabling the APIC must happen after cpu_disable_common()
+ * which invokes fixup_irqs().
+ *
+ * Disabling the APIC preserves already set bits in IRR, but
+ * an interrupt arriving after disabling the local APIC does not
+ * set the corresponding IRR bit.
+ *
+ * fixup_irqs() scans IRR for set bits so it can raise a not
+ * yet handled interrupt on the new destination CPU via an IPI
+ * but obviously it can't do so for IRR bits which are not set.
+ * IOW, interrupts arriving after disabling the local APIC will
+ * be lost.
+ */
+ apic_soft_disable();
+
return 0;
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 1f66d2d1e998..81a2fb711091 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -729,20 +729,9 @@ static bool is_sysenter_singlestep(struct pt_regs *regs)
#endif
}
-static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7)
+static __always_inline unsigned long debug_read_clear_dr6(void)
{
- /*
- * Disable breakpoints during exception handling; recursive exceptions
- * are exceedingly 'fun'.
- *
- * Since this function is NOKPROBE, and that also applies to
- * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
- * HW_BREAKPOINT_W on our stack)
- *
- * Entry text is excluded for HW_BP_X and cpu_entry_area, which
- * includes the entry stack is excluded for everything.
- */
- *dr7 = local_db_save();
+ unsigned long dr6;
/*
* The Intel SDM says:
@@ -755,15 +744,12 @@ static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7)
*
* Keep it simple: clear DR6 immediately.
*/
- get_debugreg(*dr6, 6);
+ get_debugreg(dr6, 6);
set_debugreg(0, 6);
/* Filter out all the reserved bits which are preset to 1 */
- *dr6 &= ~DR6_RESERVED;
-}
+ dr6 &= ~DR6_RESERVED;
-static __always_inline void debug_exit(unsigned long dr7)
-{
- local_db_restore(dr7);
+ return dr6;
}
/*
@@ -863,6 +849,18 @@ out:
static __always_inline void exc_debug_kernel(struct pt_regs *regs,
unsigned long dr6)
{
+ /*
+ * Disable breakpoints during exception handling; recursive exceptions
+ * are exceedingly 'fun'.
+ *
+ * Since this function is NOKPROBE, and that also applies to
+ * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
+ * HW_BREAKPOINT_W on our stack)
+ *
+ * Entry text is excluded for HW_BP_X and cpu_entry_area, which
+ * includes the entry stack is excluded for everything.
+ */
+ unsigned long dr7 = local_db_save();
bool irq_state = idtentry_enter_nmi(regs);
instrumentation_begin();
@@ -883,6 +881,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
instrumentation_end();
idtentry_exit_nmi(regs, irq_state);
+
+ local_db_restore(dr7);
}
static __always_inline void exc_debug_user(struct pt_regs *regs,
@@ -894,6 +894,15 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
*/
WARN_ON_ONCE(!user_mode(regs));
+ /*
+ * NB: We can't easily clear DR7 here because
+ * idtentry_exit_to_usermode() can invoke ptrace, schedule, access
+ * user memory, etc. This means that a recursive #DB is possible. If
+ * this happens, that #DB will hit exc_debug_kernel() and clear DR7.
+ * Since we're not on the IST stack right now, everything will be
+ * fine.
+ */
+
irqentry_enter_from_user_mode(regs);
instrumentation_begin();
@@ -907,36 +916,24 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
/* IST stack entry */
DEFINE_IDTENTRY_DEBUG(exc_debug)
{
- unsigned long dr6, dr7;
-
- debug_enter(&dr6, &dr7);
- exc_debug_kernel(regs, dr6);
- debug_exit(dr7);
+ exc_debug_kernel(regs, debug_read_clear_dr6());
}
/* User entry, runs on regular task stack */
DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
{
- unsigned long dr6, dr7;
-
- debug_enter(&dr6, &dr7);
- exc_debug_user(regs, dr6);
- debug_exit(dr7);
+ exc_debug_user(regs, debug_read_clear_dr6());
}
#else
/* 32 bit does not have separate entry points. */
DEFINE_IDTENTRY_RAW(exc_debug)
{
- unsigned long dr6, dr7;
-
- debug_enter(&dr6, &dr7);
+ unsigned long dr6 = debug_read_clear_dr6();
if (user_mode(regs))
exc_debug_user(regs, dr6);
else
exc_debug_kernel(regs, dr6);
-
- debug_exit(dr7);
}
#endif
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 8d5cbe1bbb3b..2c304fd0bb1a 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -45,11 +45,12 @@
* value that, lies close to the top of the kernel memory. The limit for the GDT
* and the IDT are set to zero.
*
- * Given that SLDT and STR are not commonly used in programs that run on WineHQ
- * or DOSEMU2, they are not emulated.
- *
- * The instruction smsw is emulated to return the value that the register CR0
+ * The instruction SMSW is emulated to return the value that the register CR0
* has at boot time as set in the head_32.
+ * SLDT and STR are emulated to return the values that the kernel programmatically
+ * assigns:
+ * - SLDT returns (GDT_ENTRY_LDT * 8) if an LDT has been set, 0 if not.
+ * - STR returns (GDT_ENTRY_TSS * 8).
*
* Emulation is provided for both 32-bit and 64-bit processes.
*
@@ -244,16 +245,34 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst,
*data_size += UMIP_GDT_IDT_LIMIT_SIZE;
memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE);
- } else if (umip_inst == UMIP_INST_SMSW) {
- unsigned long dummy_value = CR0_STATE;
+ } else if (umip_inst == UMIP_INST_SMSW || umip_inst == UMIP_INST_SLDT ||
+ umip_inst == UMIP_INST_STR) {
+ unsigned long dummy_value;
+
+ if (umip_inst == UMIP_INST_SMSW) {
+ dummy_value = CR0_STATE;
+ } else if (umip_inst == UMIP_INST_STR) {
+ dummy_value = GDT_ENTRY_TSS * 8;
+ } else if (umip_inst == UMIP_INST_SLDT) {
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+ down_read(&current->mm->context.ldt_usr_sem);
+ if (current->mm->context.ldt)
+ dummy_value = GDT_ENTRY_LDT * 8;
+ else
+ dummy_value = 0;
+ up_read(&current->mm->context.ldt_usr_sem);
+#else
+ dummy_value = 0;
+#endif
+ }
/*
- * Even though the CR0 register has 4 bytes, the number
+ * For these 3 instructions, the number
* of bytes to be copied in the result buffer is determined
* by whether the operand is a register or a memory location.
* If operand is a register, return as many bytes as the operand
* size. If operand is memory, return only the two least
- * siginificant bytes of CR0.
+ * siginificant bytes.
*/
if (X86_MODRM_MOD(insn->modrm.value) == 3)
*data_size = insn->opnd_bytes;
@@ -261,7 +280,6 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst,
*data_size = 2;
memcpy(data, &dummy_value, *data_size);
- /* STR and SLDT are not emulated */
} else {
return -EINVAL;
}
@@ -383,10 +401,6 @@ bool fixup_umip_exception(struct pt_regs *regs)
umip_pr_warn(regs, "%s instruction cannot be used by applications.\n",
umip_insns[umip_inst]);
- /* Do not emulate (spoof) SLDT or STR. */
- if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT)
- return false;
-
umip_pr_warn(regs, "For now, expensive software emulation returns the result.\n");
if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size,
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 15e5aad8ac2c..3fdaa042823d 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -735,7 +735,7 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
* OPCODE1() of the "short" jmp which checks the same condition.
*/
opc1 = OPCODE2(insn) - 0x10;
- /* fall through */
+ fallthrough;
default:
if (!is_cond_jmp_opcode(opc1))
return -ENOSYS;
@@ -892,7 +892,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
fix_ip_or_call = 0;
break;
}
- /* fall through */
+ fallthrough;
default:
riprel_analyze(auprobe, &insn);
}
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 3fd6eec202d7..7456f9ad424b 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -371,7 +371,7 @@ void kvm_set_cpu_caps(void)
F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
- F(SERIALIZE)
+ F(SERIALIZE) | F(TSXLDTRK)
);
/* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 1d450d7710d6..2f6510de6b0c 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3028,7 +3028,7 @@ static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
case 0xa4: /* movsb */
case 0xa5: /* movsd/w */
*reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
- /* fall through */
+ fallthrough;
case 0xaa: /* stosb */
case 0xab: /* stosd/w */
*reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 814d3aee5cef..1d330564eed8 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1779,7 +1779,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
if (ret != HV_STATUS_INVALID_PORT_ID)
break;
- /* fall through - maybe userspace knows this conn_id. */
+ fallthrough; /* maybe userspace knows this conn_id */
case HVCALL_POST_MESSAGE:
/* don't bother userspace if it has no way to handle it */
if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) {
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index c47d2acec529..4aa1c2e00e2a 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -285,7 +285,7 @@ int kvm_set_routing_entry(struct kvm *kvm,
switch (ue->u.irqchip.irqchip) {
case KVM_IRQCHIP_PIC_SLAVE:
e->irqchip.pin += PIC_NUM_PINS / 2;
- /* fall through */
+ fallthrough;
case KVM_IRQCHIP_PIC_MASTER:
if (ue->u.irqchip.pin >= PIC_NUM_PINS / 2)
return -EINVAL;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 5ccbee7165a2..35cca2e0c802 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1053,7 +1053,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
switch (delivery_mode) {
case APIC_DM_LOWEST:
vcpu->arch.apic_arb_prio++;
- /* fall through */
+ fallthrough;
case APIC_DM_FIXED:
if (unlikely(trig_mode && !level))
break;
@@ -1341,7 +1341,7 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
break;
case APIC_TASKPRI:
report_tpr_access(apic, false);
- /* fall thru */
+ fallthrough;
default:
val = kvm_lapic_get_reg(apic, offset);
break;
@@ -2027,7 +2027,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_LVT0:
apic_manage_nmi_watchdog(apic, val);
- /* fall through */
+ fallthrough;
case APIC_LVTTHMR:
case APIC_LVTPC:
case APIC_LVT1:
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 76c5826e29a2..71aa3da2a0b7 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4422,7 +4422,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
rsvd_bits(maxphyaddr, 51);
rsvd_check->rsvd_bits_mask[1][4] =
rsvd_check->rsvd_bits_mask[0][4];
- /* fall through */
+ fallthrough;
case PT64_ROOT_4LEVEL:
rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index e90bc436f584..598a769f1961 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1062,10 +1062,14 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
struct vmcb *hsave = svm->nested.hsave;
struct vmcb __user *user_vmcb = (struct vmcb __user *)
&user_kvm_nested_state->data.svm[0];
- struct vmcb_control_area ctl;
- struct vmcb_save_area save;
+ struct vmcb_control_area *ctl;
+ struct vmcb_save_area *save;
+ int ret;
u32 cr0;
+ BUILD_BUG_ON(sizeof(struct vmcb_control_area) + sizeof(struct vmcb_save_area) >
+ KVM_STATE_NESTED_SVM_VMCB_SIZE);
+
if (kvm_state->format != KVM_STATE_NESTED_FORMAT_SVM)
return -EINVAL;
@@ -1097,13 +1101,22 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
return -EINVAL;
if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE)
return -EINVAL;
- if (copy_from_user(&ctl, &user_vmcb->control, sizeof(ctl)))
- return -EFAULT;
- if (copy_from_user(&save, &user_vmcb->save, sizeof(save)))
- return -EFAULT;
- if (!nested_vmcb_check_controls(&ctl))
- return -EINVAL;
+ ret = -ENOMEM;
+ ctl = kzalloc(sizeof(*ctl), GFP_KERNEL);
+ save = kzalloc(sizeof(*save), GFP_KERNEL);
+ if (!ctl || !save)
+ goto out_free;
+
+ ret = -EFAULT;
+ if (copy_from_user(ctl, &user_vmcb->control, sizeof(*ctl)))
+ goto out_free;
+ if (copy_from_user(save, &user_vmcb->save, sizeof(*save)))
+ goto out_free;
+
+ ret = -EINVAL;
+ if (!nested_vmcb_check_controls(ctl))
+ goto out_free;
/*
* Processor state contains L2 state. Check that it is
@@ -1111,15 +1124,15 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
*/
cr0 = kvm_read_cr0(vcpu);
if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW))
- return -EINVAL;
+ goto out_free;
/*
* Validate host state saved from before VMRUN (see
* nested_svm_check_permissions).
* TODO: validate reserved bits for all saved state.
*/
- if (!(save.cr0 & X86_CR0_PG))
- return -EINVAL;
+ if (!(save->cr0 & X86_CR0_PG))
+ goto out_free;
/*
* All checks done, we can enter guest mode. L1 control fields
@@ -1128,10 +1141,10 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
* contains saved L1 state.
*/
copy_vmcb_control_area(&hsave->control, &svm->vmcb->control);
- hsave->save = save;
+ hsave->save = *save;
svm->nested.vmcb = kvm_state->hdr.svm.vmcb_pa;
- load_nested_vmcb_control(svm, &ctl);
+ load_nested_vmcb_control(svm, ctl);
nested_prepare_vmcb_control(svm);
if (!nested_svm_vmrun_msrpm(svm))
@@ -1139,7 +1152,13 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
out_set_gif:
svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
- return 0;
+
+ ret = 0;
+out_free:
+ kfree(save);
+ kfree(ctl);
+
+ return ret;
}
struct kvm_x86_nested_ops svm_nested_ops = {
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 5764b87379cf..c91acabf18d0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2668,7 +2668,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_IA32_APICBASE:
if (kvm_vcpu_apicv_active(vcpu))
avic_update_vapic_bar(to_svm(vcpu), data);
- /* Fall through */
+ fallthrough;
default:
return kvm_set_msr_common(vcpu, msr);
}
@@ -4170,6 +4170,8 @@ static struct kvm_x86_init_ops svm_init_ops __initdata = {
static int __init svm_init(void)
{
+ __unused_size_checks();
+
return kvm_init(&svm_init_ops, sizeof(struct vcpu_svm),
__alignof__(struct vcpu_svm), THIS_MODULE);
}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 75cd720c9e8c..8646a797b7a8 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4654,7 +4654,7 @@ static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
return false;
- /* fall through */
+ fallthrough;
case DB_VECTOR:
return !(vcpu->guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP));
@@ -4827,7 +4827,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
}
kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
- /* fall through */
+ fallthrough;
case BP_VECTOR:
/*
* Update instruction length as we may reinject #BP from
@@ -5257,7 +5257,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
error_code =
vmcs_read32(IDT_VECTORING_ERROR_CODE);
}
- /* fall through */
+ fallthrough;
case INTR_TYPE_SOFT_EXCEPTION:
kvm_clear_exception_queue(vcpu);
break;
@@ -5610,7 +5610,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
* keeping track of global entries in shadow page tables.
*/
- /* fall-through */
+ fallthrough;
case INVPCID_TYPE_ALL_INCL_GLOBAL:
kvm_mmu_unload(vcpu);
return kvm_skip_emulated_instruction(vcpu);
@@ -6579,7 +6579,7 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
break;
case INTR_TYPE_SOFT_EXCEPTION:
vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
- /* fall through */
+ fallthrough;
case INTR_TYPE_HARD_EXCEPTION:
if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
u32 err = vmcs_read32(error_code_field);
@@ -6589,7 +6589,7 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
break;
case INTR_TYPE_SOFT_INTR:
vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
- /* fall through */
+ fallthrough;
case INTR_TYPE_EXT_INTR:
kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
break;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e3de0fe5af37..1994602a0851 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1116,14 +1116,12 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
vcpu->arch.eff_db[dr] = val;
break;
case 4:
- /* fall through */
case 6:
if (!kvm_dr6_valid(val))
return -1; /* #GP */
vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
break;
case 5:
- /* fall through */
default: /* 7 */
if (!kvm_dr7_valid(val))
return -1; /* #GP */
@@ -1154,12 +1152,10 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
*val = vcpu->arch.db[array_index_nospec(dr, size)];
break;
case 4:
- /* fall through */
case 6:
*val = vcpu->arch.dr6;
break;
case 5:
- /* fall through */
default: /* 7 */
*val = vcpu->arch.dr7;
break;
@@ -3051,7 +3047,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
- pr = true; /* fall through */
+ pr = true;
+ fallthrough;
case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
if (kvm_pmu_is_valid_msr(vcpu, msr))
@@ -4362,7 +4359,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
case KVM_CAP_HYPERV_SYNIC2:
if (cap->args[0])
return -EINVAL;
- /* fall through */
+ fallthrough;
case KVM_CAP_HYPERV_SYNIC:
if (!irqchip_in_kernel(vcpu->kvm))
@@ -8675,7 +8672,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
vcpu->arch.pv.pv_unhalted = false;
vcpu->arch.mp_state =
KVM_MP_STATE_RUNNABLE;
- /* fall through */
+ fallthrough;
case KVM_MP_STATE_RUNNABLE:
vcpu->arch.apf.halted = false;
break;
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index d46fff11f06f..aa067859a70b 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -24,7 +24,7 @@ ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_cmdline.o = -pg
endif
-CFLAGS_cmdline.o := -fno-stack-protector
+CFLAGS_cmdline.o := -fno-stack-protector -fno-jump-tables
endif
inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c
index 4f1719e22d3c..b6da09339308 100644
--- a/arch/x86/lib/cmdline.c
+++ b/arch/x86/lib/cmdline.c
@@ -58,7 +58,7 @@ __cmdline_find_option_bool(const char *cmdline, int max_cmdline_size,
state = st_wordcmp;
opptr = option;
wstart = pos;
- /* fall through */
+ fallthrough;
case st_wordcmp:
if (!*opptr) {
@@ -89,7 +89,7 @@ __cmdline_find_option_bool(const char *cmdline, int max_cmdline_size,
break;
}
state = st_wordskip;
- /* fall through */
+ fallthrough;
case st_wordskip:
if (!c)
@@ -151,7 +151,7 @@ __cmdline_find_option(const char *cmdline, int max_cmdline_size,
state = st_wordcmp;
opptr = option;
- /* fall through */
+ fallthrough;
case st_wordcmp:
if ((c == '=') && !*opptr) {
@@ -172,7 +172,7 @@ __cmdline_find_option(const char *cmdline, int max_cmdline_size,
break;
}
state = st_wordskip;
- /* fall through */
+ fallthrough;
case st_wordskip:
if (myisspace(c))
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 31600d851fd8..5e69603ff63f 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -179,7 +179,7 @@ static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off)
if (insn->addr_bytes == 2)
return -EINVAL;
- /* fall through */
+ fallthrough;
case -EDOM:
case offsetof(struct pt_regs, bx):
@@ -362,7 +362,6 @@ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx)
case INAT_SEG_REG_GS:
return vm86regs->gs;
case INAT_SEG_REG_IGNORE:
- /* fall through */
default:
return -EINVAL;
}
@@ -386,7 +385,6 @@ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx)
*/
return get_user_gs(regs);
case INAT_SEG_REG_IGNORE:
- /* fall through */
default:
return -EINVAL;
}
@@ -786,7 +784,7 @@ int insn_get_code_seg_params(struct pt_regs *regs)
*/
return INSN_CODE_SEG_PARAMS(4, 8);
case 3: /* Invalid setting. CS.L=1, CS.D=1 */
- /* fall through */
+ fallthrough;
default:
return -EINVAL;
}
diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c
index 73dc66d887f3..ec071cbb0804 100644
--- a/arch/x86/math-emu/errors.c
+++ b/arch/x86/math-emu/errors.c
@@ -186,7 +186,7 @@ void FPU_printall(void)
case TAG_Special:
/* Update tagi for the printk below */
tagi = FPU_Special(r);
- /* fall through */
+ fallthrough;
case TAG_Valid:
printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6d ", i,
getsign(r) ? '-' : '+',
diff --git a/arch/x86/math-emu/fpu_trig.c b/arch/x86/math-emu/fpu_trig.c
index 127ea54122d7..4a9887851ad8 100644
--- a/arch/x86/math-emu/fpu_trig.c
+++ b/arch/x86/math-emu/fpu_trig.c
@@ -1352,7 +1352,7 @@ static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag)
case TW_Denormal:
if (denormal_operand() < 0)
return;
- /* fall through */
+ fallthrough;
case TAG_Zero:
case TAG_Valid:
setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr));
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 35f1498e9832..6e3e8a124903 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -190,6 +190,53 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
return pmd_k;
}
+/*
+ * Handle a fault on the vmalloc or module mapping area
+ *
+ * This is needed because there is a race condition between the time
+ * when the vmalloc mapping code updates the PMD to the point in time
+ * where it synchronizes this update with the other page-tables in the
+ * system.
+ *
+ * In this race window another thread/CPU can map an area on the same
+ * PMD, finds it already present and does not synchronize it with the
+ * rest of the system yet. As a result v[mz]alloc might return areas
+ * which are not mapped in every page-table in the system, causing an
+ * unhandled page-fault when they are accessed.
+ */
+static noinline int vmalloc_fault(unsigned long address)
+{
+ unsigned long pgd_paddr;
+ pmd_t *pmd_k;
+ pte_t *pte_k;
+
+ /* Make sure we are in vmalloc area: */
+ if (!(address >= VMALLOC_START && address < VMALLOC_END))
+ return -1;
+
+ /*
+ * Synchronize this task's top level page-table
+ * with the 'reference' page table.
+ *
+ * Do _not_ use "current" here. We might be inside
+ * an interrupt in the middle of a task switch..
+ */
+ pgd_paddr = read_cr3_pa();
+ pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
+ if (!pmd_k)
+ return -1;
+
+ if (pmd_large(*pmd_k))
+ return 0;
+
+ pte_k = pte_offset_kernel(pmd_k, address);
+ if (!pte_present(*pte_k))
+ return -1;
+
+ return 0;
+}
+NOKPROBE_SYMBOL(vmalloc_fault);
+
void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
{
unsigned long addr;
@@ -1110,6 +1157,37 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
*/
WARN_ON_ONCE(hw_error_code & X86_PF_PK);
+#ifdef CONFIG_X86_32
+ /*
+ * We can fault-in kernel-space virtual memory on-demand. The
+ * 'reference' page table is init_mm.pgd.
+ *
+ * NOTE! We MUST NOT take any locks for this case. We may
+ * be in an interrupt or a critical region, and should
+ * only copy the information from the master page table,
+ * nothing more.
+ *
+ * Before doing this on-demand faulting, ensure that the
+ * fault is not any of the following:
+ * 1. A fault on a PTE with a reserved bit set.
+ * 2. A fault caused by a user-mode access. (Do not demand-
+ * fault kernel memory due to user-mode accesses).
+ * 3. A fault caused by a page-level protection violation.
+ * (A demand fault would be on a non-present page which
+ * would have X86_PF_PROT==0).
+ *
+ * This is only needed to close a race condition on x86-32 in
+ * the vmalloc mapping/unmapping code. See the comment above
+ * vmalloc_fault() for details. On x86-64 the race does not
+ * exist as the vmalloc mappings don't need to be synchronized
+ * there.
+ */
+ if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
+ if (vmalloc_fault(address) >= 0)
+ return;
+ }
+#endif
+
/* Was the fault spurious, caused by lazy TLB invalidation? */
if (spurious_kernel_fault(hw_error_code, address))
return;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 84d85dbd1dad..9e5ccc56f8e0 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -574,7 +574,7 @@ static bool memremap_should_map_decrypted(resource_size_t phys_addr,
/* For SEV, these areas are encrypted */
if (sev_active())
break;
- /* Fallthrough */
+ fallthrough;
case E820_TYPE_PRAM:
return true;
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index c5174b4e318b..683cd12f4793 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -321,7 +321,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
u64 addr, u64 max_addr, u64 size)
{
return split_nodes_size_interleave_uniform(ei, pi, addr, max_addr, size,
- 0, NULL, NUMA_NO_NODE);
+ 0, NULL, 0);
}
static int __init setup_emu2phys_nid(int *dfl_phys_nid)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 1a3569b43aa5..0951b47e64c1 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -555,21 +555,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
load_new_mm_cr3(next->pgd, new_asid, true);
- /*
- * NB: This gets called via leave_mm() in the idle path
- * where RCU functions differently. Tracing normally
- * uses RCU, so we need to use the _rcuidle variant.
- *
- * (There is no good reason for this. The idle code should
- * be rearranged to call this before rcu_idle_enter().)
- */
- trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
} else {
/* The new ASID is already up to date. */
load_new_mm_cr3(next->pgd, new_asid, false);
- /* See above wrt _rcuidle. */
- trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
}
/* Make sure we write CR3 before loaded_mm. */
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 9f9aad42ccff..89395a5049bb 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -26,6 +26,7 @@
#include <asm/xen/pci.h>
#include <asm/xen/cpuid.h>
#include <asm/apic.h>
+#include <asm/acpi.h>
#include <asm/i8259.h>
static int xen_pcifront_enable_irq(struct pci_dev *dev)
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index f6ea8f1a9d57..d37ebe6e70d7 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -49,7 +49,6 @@
#include <asm/efi.h>
#include <asm/e820/api.h>
#include <asm/time.h>
-#include <asm/set_memory.h>
#include <asm/tlbflush.h>
#include <asm/x86_init.h>
#include <asm/uv/uv.h>
@@ -496,74 +495,6 @@ void __init efi_init(void)
efi_print_memmap();
}
-#if defined(CONFIG_X86_32)
-
-void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
-{
- u64 addr, npages;
-
- addr = md->virt_addr;
- npages = md->num_pages;
-
- memrange_efi_to_native(&addr, &npages);
-
- if (executable)
- set_memory_x(addr, npages);
- else
- set_memory_nx(addr, npages);
-}
-
-void __init runtime_code_page_mkexec(void)
-{
- efi_memory_desc_t *md;
-
- /* Make EFI runtime service code area executable */
- for_each_efi_memory_desc(md) {
- if (md->type != EFI_RUNTIME_SERVICES_CODE)
- continue;
-
- efi_set_executable(md, true);
- }
-}
-
-void __init efi_memory_uc(u64 addr, unsigned long size)
-{
- unsigned long page_shift = 1UL << EFI_PAGE_SHIFT;
- u64 npages;
-
- npages = round_up(size, page_shift) / page_shift;
- memrange_efi_to_native(&addr, &npages);
- set_memory_uc(addr, npages);
-}
-
-void __init old_map_region(efi_memory_desc_t *md)
-{
- u64 start_pfn, end_pfn, end;
- unsigned long size;
- void *va;
-
- start_pfn = PFN_DOWN(md->phys_addr);
- size = md->num_pages << PAGE_SHIFT;
- end = md->phys_addr + size;
- end_pfn = PFN_UP(end);
-
- if (pfn_range_is_mapped(start_pfn, end_pfn)) {
- va = __va(md->phys_addr);
-
- if (!(md->attribute & EFI_MEMORY_WB))
- efi_memory_uc((u64)(unsigned long)va, size);
- } else
- va = efi_ioremap(md->phys_addr, size,
- md->type, md->attribute);
-
- md->virt_addr = (u64) (unsigned long) va;
- if (!va)
- pr_err("ioremap of 0x%llX failed!\n",
- (unsigned long long)md->phys_addr);
-}
-
-#endif
-
/* Merge contiguous regions of the same type and attribute */
static void __init efi_merge_regions(void)
{
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 826ead67753d..e06a199423c0 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -29,9 +29,35 @@
#include <asm/io.h>
#include <asm/desc.h>
#include <asm/page.h>
+#include <asm/set_memory.h>
#include <asm/tlbflush.h>
#include <asm/efi.h>
+void __init efi_map_region(efi_memory_desc_t *md)
+{
+ u64 start_pfn, end_pfn, end;
+ unsigned long size;
+ void *va;
+
+ start_pfn = PFN_DOWN(md->phys_addr);
+ size = md->num_pages << PAGE_SHIFT;
+ end = md->phys_addr + size;
+ end_pfn = PFN_UP(end);
+
+ if (pfn_range_is_mapped(start_pfn, end_pfn)) {
+ va = __va(md->phys_addr);
+
+ if (!(md->attribute & EFI_MEMORY_WB))
+ set_memory_uc((unsigned long)va, md->num_pages);
+ } else {
+ va = ioremap_cache(md->phys_addr, size);
+ }
+
+ md->virt_addr = (unsigned long)va;
+ if (!va)
+ pr_err("ioremap of 0x%llX failed!\n", md->phys_addr);
+}
+
/*
* To make EFI call EFI runtime service in physical addressing mode we need
* prolog/epilog before/after the invocation to claim the EFI runtime service
@@ -58,11 +84,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
return 0;
}
-void __init efi_map_region(efi_memory_desc_t *md)
-{
- old_map_region(md);
-}
-
void __init efi_map_region_fixed(efi_memory_desc_t *md) {}
void __init parse_efi_setup(u64 phys_addr, u32 data_len) {}
@@ -107,6 +128,15 @@ efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size,
void __init efi_runtime_update_mappings(void)
{
- if (__supported_pte_mask & _PAGE_NX)
- runtime_code_page_mkexec();
+ if (__supported_pte_mask & _PAGE_NX) {
+ efi_memory_desc_t *md;
+
+ /* Make EFI runtime service code area executable */
+ for_each_efi_memory_desc(md) {
+ if (md->type != EFI_RUNTIME_SERVICES_CODE)
+ continue;
+
+ set_memory_x(md->virt_addr, md->num_pages);
+ }
+ }
}
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 413583f904a6..6af4da1149ba 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -259,6 +259,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
npages = (__end_rodata - __start_rodata) >> PAGE_SHIFT;
rodata = __pa(__start_rodata);
pfn = rodata >> PAGE_SHIFT;
+
+ pf = _PAGE_NX | _PAGE_ENC;
if (kernel_map_pages_in_pgd(pgd, pfn, rodata, npages, pf)) {
pr_err("Failed to map kernel rodata 1:1\n");
return 1;