summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/cgroup/memcg_shrinker.py71
-rw-r--r--tools/include/linux/gfp.h3
-rw-r--r--tools/include/linux/slab.h4
-rw-r--r--tools/perf/builtin-kmem.c1
-rw-r--r--tools/testing/radix-tree/.gitignore2
-rw-r--r--tools/testing/radix-tree/Makefile9
-rw-r--r--tools/testing/radix-tree/generated/autoconf.h1
-rw-r--r--tools/testing/radix-tree/linux.c160
-rw-r--r--tools/testing/radix-tree/linux/kernel.h1
-rw-r--r--tools/testing/radix-tree/linux/lockdep.h2
-rw-r--r--tools/testing/radix-tree/linux/maple_tree.h7
-rw-r--r--tools/testing/radix-tree/maple.c59
-rw-r--r--tools/testing/radix-tree/trace/events/maple_tree.h5
-rw-r--r--tools/testing/selftests/vm/Makefile1
-rw-r--r--tools/testing/selftests/vm/hmm-tests.c306
-rw-r--r--tools/testing/selftests/vm/mremap_test.c49
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests.sh20
-rwxr-xr-xtools/testing/selftests/vm/test_hmm.sh24
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c70
-rw-r--r--tools/testing/selftests/vm/va_128TBswitch.sh39
20 files changed, 786 insertions, 48 deletions
diff --git a/tools/cgroup/memcg_shrinker.py b/tools/cgroup/memcg_shrinker.py
new file mode 100644
index 000000000000..706ab27666a4
--- /dev/null
+++ b/tools/cgroup/memcg_shrinker.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2022 Roman Gushchin <roman.gushchin@linux.dev>
+# Copyright (C) 2022 Meta
+
+import os
+import argparse
+import sys
+
+
+def scan_cgroups(cgroup_root):
+ cgroups = {}
+
+ for root, subdirs, _ in os.walk(cgroup_root):
+ for cgroup in subdirs:
+ path = os.path.join(root, cgroup)
+ ino = os.stat(path).st_ino
+ cgroups[ino] = path
+
+ # (memcg ino, path)
+ return cgroups
+
+
+def scan_shrinkers(shrinker_debugfs):
+ shrinkers = []
+
+ for root, subdirs, _ in os.walk(shrinker_debugfs):
+ for shrinker in subdirs:
+ count_path = os.path.join(root, shrinker, "count")
+ with open(count_path) as f:
+ for line in f.readlines():
+ items = line.split(' ')
+ ino = int(items[0])
+ # (count, shrinker, memcg ino)
+ shrinkers.append((int(items[1]), shrinker, ino))
+ return shrinkers
+
+
+def main():
+ parser = argparse.ArgumentParser(description='Display biggest shrinkers')
+ parser.add_argument('-n', '--lines', type=int, help='Number of lines to print')
+
+ args = parser.parse_args()
+
+ cgroups = scan_cgroups("/sys/fs/cgroup/")
+ shrinkers = scan_shrinkers("/sys/kernel/debug/shrinker/")
+ shrinkers = sorted(shrinkers, reverse = True, key = lambda x: x[0])
+
+ n = 0
+ for s in shrinkers:
+ count, name, ino = (s[0], s[1], s[2])
+ if count == 0:
+ break
+
+ if ino == 0 or ino == 1:
+ cg = "/"
+ else:
+ try:
+ cg = cgroups[ino]
+ except KeyError:
+ cg = "unknown (%d)" % ino
+
+ print("%-8s %-20s %s" % (count, name, cg))
+
+ n += 1
+ if args.lines and n >= args.lines:
+ break
+
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/include/linux/gfp.h b/tools/include/linux/gfp.h
index b238dbc9eb85..56eec4445bc9 100644
--- a/tools/include/linux/gfp.h
+++ b/tools/include/linux/gfp.h
@@ -12,7 +12,6 @@
#define __GFP_FS 0x80u
#define __GFP_NOWARN 0x200u
#define __GFP_ZERO 0x8000u
-#define __GFP_ATOMIC 0x80000u
#define __GFP_ACCOUNT 0x100000u
#define __GFP_DIRECT_RECLAIM 0x400000u
#define __GFP_KSWAPD_RECLAIM 0x2000000u
@@ -20,7 +19,7 @@
#define __GFP_RECLAIM (__GFP_DIRECT_RECLAIM | __GFP_KSWAPD_RECLAIM)
#define GFP_ZONEMASK 0x0fu
-#define GFP_ATOMIC (__GFP_HIGH | __GFP_ATOMIC | __GFP_KSWAPD_RECLAIM)
+#define GFP_ATOMIC (__GFP_HIGH | __GFP_KSWAPD_RECLAIM)
#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM)
diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h
index 0616409513eb..311759ea25e9 100644
--- a/tools/include/linux/slab.h
+++ b/tools/include/linux/slab.h
@@ -41,4 +41,8 @@ struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
unsigned int align, unsigned int flags,
void (*ctor)(void *));
+void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list);
+int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
+ void **list);
+
#endif /* _TOOLS_SLAB_H */
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index ebfab2ca1702..4a06d83f2ac5 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -640,7 +640,6 @@ static const struct {
{ "__GFP_HIGHMEM", "HM" },
{ "GFP_DMA32", "D32" },
{ "__GFP_HIGH", "H" },
- { "__GFP_ATOMIC", "_A" },
{ "__GFP_IO", "I" },
{ "__GFP_FS", "F" },
{ "__GFP_NOWARN", "NWR" },
diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
index d971516401e6..c901d96dd013 100644
--- a/tools/testing/radix-tree/.gitignore
+++ b/tools/testing/radix-tree/.gitignore
@@ -6,3 +6,5 @@ main
multiorder
radix-tree.c
xarray
+maple
+ma_xa_benchmark
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index c4ea4fbb0bfc..89d613e0505b 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -4,9 +4,9 @@ CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \
-fsanitize=undefined
LDFLAGS += -fsanitize=address -fsanitize=undefined
LDLIBS+= -lpthread -lurcu
-TARGETS = main idr-test multiorder xarray
+TARGETS = main idr-test multiorder xarray maple
CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o \
- slab.o
+ slab.o maple.o
OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
regression4.o tag_check.o multiorder.o idr-test.o iteration_check.o \
iteration_check_2.o benchmark.o
@@ -29,6 +29,8 @@ idr-test: idr-test.o $(CORE_OFILES)
xarray: $(CORE_OFILES)
+maple: $(CORE_OFILES)
+
multiorder: multiorder.o $(CORE_OFILES)
clean:
@@ -40,6 +42,7 @@ $(OFILES): Makefile *.h */*.h generated/map-shift.h \
../../include/linux/*.h \
../../include/asm/*.h \
../../../include/linux/xarray.h \
+ ../../../include/linux/maple_tree.h \
../../../include/linux/radix-tree.h \
../../../include/linux/idr.h
@@ -51,6 +54,8 @@ idr.c: ../../../lib/idr.c
xarray.o: ../../../lib/xarray.c ../../../lib/test_xarray.c
+maple.o: ../../../lib/maple_tree.c ../../../lib/test_maple_tree.c
+
generated/map-shift.h:
@if ! grep -qws $(SHIFT) generated/map-shift.h; then \
echo "#define XA_CHUNK_SHIFT $(SHIFT)" > \
diff --git a/tools/testing/radix-tree/generated/autoconf.h b/tools/testing/radix-tree/generated/autoconf.h
index 2218b3cc184e..e7da80350236 100644
--- a/tools/testing/radix-tree/generated/autoconf.h
+++ b/tools/testing/radix-tree/generated/autoconf.h
@@ -1 +1,2 @@
#define CONFIG_XARRAY_MULTI 1
+#define CONFIG_64BIT 1
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index d5c1bcba86fe..2048d12c31df 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -23,15 +23,47 @@ struct kmem_cache {
int nr_objs;
void *objs;
void (*ctor)(void *);
+ unsigned int non_kernel;
+ unsigned long nr_allocated;
+ unsigned long nr_tallocated;
};
+void kmem_cache_set_non_kernel(struct kmem_cache *cachep, unsigned int val)
+{
+ cachep->non_kernel = val;
+}
+
+unsigned long kmem_cache_get_alloc(struct kmem_cache *cachep)
+{
+ return cachep->size * cachep->nr_allocated;
+}
+
+unsigned long kmem_cache_nr_allocated(struct kmem_cache *cachep)
+{
+ return cachep->nr_allocated;
+}
+
+unsigned long kmem_cache_nr_tallocated(struct kmem_cache *cachep)
+{
+ return cachep->nr_tallocated;
+}
+
+void kmem_cache_zero_nr_tallocated(struct kmem_cache *cachep)
+{
+ cachep->nr_tallocated = 0;
+}
+
void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
int gfp)
{
void *p;
- if (!(gfp & __GFP_DIRECT_RECLAIM))
- return NULL;
+ if (!(gfp & __GFP_DIRECT_RECLAIM)) {
+ if (!cachep->non_kernel)
+ return NULL;
+
+ cachep->non_kernel--;
+ }
pthread_mutex_lock(&cachep->lock);
if (cachep->nr_objs) {
@@ -53,19 +85,21 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
memset(p, 0, cachep->size);
}
+ uatomic_inc(&cachep->nr_allocated);
uatomic_inc(&nr_allocated);
+ uatomic_inc(&cachep->nr_tallocated);
if (kmalloc_verbose)
printf("Allocating %p from slab\n", p);
return p;
}
-void kmem_cache_free(struct kmem_cache *cachep, void *objp)
+void kmem_cache_free_locked(struct kmem_cache *cachep, void *objp)
{
assert(objp);
uatomic_dec(&nr_allocated);
+ uatomic_dec(&cachep->nr_allocated);
if (kmalloc_verbose)
printf("Freeing %p to slab\n", objp);
- pthread_mutex_lock(&cachep->lock);
if (cachep->nr_objs > 10 || cachep->align) {
memset(objp, POISON_FREE, cachep->size);
free(objp);
@@ -75,9 +109,80 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
node->parent = cachep->objs;
cachep->objs = node;
}
+}
+
+void kmem_cache_free(struct kmem_cache *cachep, void *objp)
+{
+ pthread_mutex_lock(&cachep->lock);
+ kmem_cache_free_locked(cachep, objp);
pthread_mutex_unlock(&cachep->lock);
}
+void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list)
+{
+ if (kmalloc_verbose)
+ pr_debug("Bulk free %p[0-%lu]\n", list, size - 1);
+
+ pthread_mutex_lock(&cachep->lock);
+ for (int i = 0; i < size; i++)
+ kmem_cache_free_locked(cachep, list[i]);
+ pthread_mutex_unlock(&cachep->lock);
+}
+
+int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
+ void **p)
+{
+ size_t i;
+
+ if (kmalloc_verbose)
+ pr_debug("Bulk alloc %lu\n", size);
+
+ if (!(gfp & __GFP_DIRECT_RECLAIM)) {
+ if (cachep->non_kernel < size)
+ return 0;
+
+ cachep->non_kernel -= size;
+ }
+
+ pthread_mutex_lock(&cachep->lock);
+ if (cachep->nr_objs >= size) {
+ struct radix_tree_node *node;
+
+ for (i = 0; i < size; i++) {
+ node = cachep->objs;
+ cachep->nr_objs--;
+ cachep->objs = node->parent;
+ p[i] = node;
+ node->parent = NULL;
+ }
+ pthread_mutex_unlock(&cachep->lock);
+ } else {
+ pthread_mutex_unlock(&cachep->lock);
+ for (i = 0; i < size; i++) {
+ if (cachep->align) {
+ posix_memalign(&p[i], cachep->align,
+ cachep->size * size);
+ } else {
+ p[i] = malloc(cachep->size * size);
+ }
+ if (cachep->ctor)
+ cachep->ctor(p[i]);
+ else if (gfp & __GFP_ZERO)
+ memset(p[i], 0, cachep->size);
+ }
+ }
+
+ for (i = 0; i < size; i++) {
+ uatomic_inc(&nr_allocated);
+ uatomic_inc(&cachep->nr_allocated);
+ uatomic_inc(&cachep->nr_tallocated);
+ if (kmalloc_verbose)
+ printf("Allocating %p from slab\n", p[i]);
+ }
+
+ return size;
+}
+
struct kmem_cache *
kmem_cache_create(const char *name, unsigned int size, unsigned int align,
unsigned int flags, void (*ctor)(void *))
@@ -88,7 +193,54 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align,
ret->size = size;
ret->align = align;
ret->nr_objs = 0;
+ ret->nr_allocated = 0;
+ ret->nr_tallocated = 0;
ret->objs = NULL;
ret->ctor = ctor;
+ ret->non_kernel = 0;
return ret;
}
+
+/*
+ * Test the test infrastructure for kem_cache_alloc/free and bulk counterparts.
+ */
+void test_kmem_cache_bulk(void)
+{
+ int i;
+ void *list[12];
+ static struct kmem_cache *test_cache, *test_cache2;
+
+ /*
+ * Testing the bulk allocators without aligned kmem_cache to force the
+ * bulk alloc/free to reuse
+ */
+ test_cache = kmem_cache_create("test_cache", 256, 0, SLAB_PANIC, NULL);
+
+ for (i = 0; i < 5; i++)
+ list[i] = kmem_cache_alloc(test_cache, __GFP_DIRECT_RECLAIM);
+
+ for (i = 0; i < 5; i++)
+ kmem_cache_free(test_cache, list[i]);
+ assert(test_cache->nr_objs == 5);
+
+ kmem_cache_alloc_bulk(test_cache, __GFP_DIRECT_RECLAIM, 5, list);
+ kmem_cache_free_bulk(test_cache, 5, list);
+
+ for (i = 0; i < 12 ; i++)
+ list[i] = kmem_cache_alloc(test_cache, __GFP_DIRECT_RECLAIM);
+
+ for (i = 0; i < 12; i++)
+ kmem_cache_free(test_cache, list[i]);
+
+ /* The last free will not be kept around */
+ assert(test_cache->nr_objs == 11);
+
+ /* Aligned caches will immediately free */
+ test_cache2 = kmem_cache_create("test_cache2", 128, 128, SLAB_PANIC, NULL);
+
+ kmem_cache_alloc_bulk(test_cache2, __GFP_DIRECT_RECLAIM, 10, list);
+ kmem_cache_free_bulk(test_cache2, 10, list);
+ assert(!test_cache2->nr_objs);
+
+
+}
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 39867fd80c8f..c5c9d05f29da 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -14,6 +14,7 @@
#include "../../../include/linux/kconfig.h"
#define printk printf
+#define pr_err printk
#define pr_info printk
#define pr_debug printk
#define pr_cont printk
diff --git a/tools/testing/radix-tree/linux/lockdep.h b/tools/testing/radix-tree/linux/lockdep.h
index 016cff473cfc..62473ab57f99 100644
--- a/tools/testing/radix-tree/linux/lockdep.h
+++ b/tools/testing/radix-tree/linux/lockdep.h
@@ -11,4 +11,6 @@ static inline void lockdep_set_class(spinlock_t *lock,
struct lock_class_key *key)
{
}
+
+extern int lockdep_is_held(const void *);
#endif /* _LINUX_LOCKDEP_H */
diff --git a/tools/testing/radix-tree/linux/maple_tree.h b/tools/testing/radix-tree/linux/maple_tree.h
new file mode 100644
index 000000000000..7d8d1f445b89
--- /dev/null
+++ b/tools/testing/radix-tree/linux/maple_tree.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#define atomic_t int32_t
+#include "../../../../include/linux/maple_tree.h"
+#define atomic_inc(x) uatomic_inc(x)
+#define atomic_read(x) uatomic_read(x)
+#define atomic_set(x, y) do {} while (0)
+#define U8_MAX UCHAR_MAX
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
new file mode 100644
index 000000000000..35082671928a
--- /dev/null
+++ b/tools/testing/radix-tree/maple.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * maple_tree.c: Userspace shim for maple tree test-suite
+ * Copyright (c) 2018 Liam R. Howlett <Liam.Howlett@Oracle.com>
+ */
+
+#define CONFIG_DEBUG_MAPLE_TREE
+#define CONFIG_MAPLE_SEARCH
+#include "test.h"
+
+#define module_init(x)
+#define module_exit(x)
+#define MODULE_AUTHOR(x)
+#define MODULE_LICENSE(x)
+#define dump_stack() assert(0)
+
+#include "../../../lib/maple_tree.c"
+#undef CONFIG_DEBUG_MAPLE_TREE
+#include "../../../lib/test_maple_tree.c"
+
+void farmer_tests(void)
+{
+ struct maple_node *node;
+ DEFINE_MTREE(tree);
+
+ mt_dump(&tree);
+
+ tree.ma_root = xa_mk_value(0);
+ mt_dump(&tree);
+
+ node = mt_alloc_one(GFP_KERNEL);
+ node->parent = (void *)((unsigned long)(&tree) | 1);
+ node->slot[0] = xa_mk_value(0);
+ node->slot[1] = xa_mk_value(1);
+ node->mr64.pivot[0] = 0;
+ node->mr64.pivot[1] = 1;
+ node->mr64.pivot[2] = 0;
+ tree.ma_root = mt_mk_node(node, maple_leaf_64);
+ mt_dump(&tree);
+
+ ma_free_rcu(node);
+}
+
+void maple_tree_tests(void)
+{
+ farmer_tests();
+ maple_tree_seed();
+ maple_tree_harvest();
+}
+
+int __weak main(void)
+{
+ maple_tree_init();
+ maple_tree_tests();
+ rcu_barrier();
+ if (nr_allocated)
+ printf("nr_allocated = %d\n", nr_allocated);
+ return 0;
+}
diff --git a/tools/testing/radix-tree/trace/events/maple_tree.h b/tools/testing/radix-tree/trace/events/maple_tree.h
new file mode 100644
index 000000000000..97d0e1ddcf08
--- /dev/null
+++ b/tools/testing/radix-tree/trace/events/maple_tree.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#define trace_ma_op(a, b) do {} while (0)
+#define trace_ma_read(a, b) do {} while (0)
+#define trace_ma_write(a, b, c, d) do {} while (0)
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 44f25acfbeca..6a34209379a4 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -93,6 +93,7 @@ TEST_PROGS := run_vmtests.sh
TEST_FILES := test_vmalloc.sh
TEST_FILES += test_hmm.sh
+TEST_FILES += va_128TBswitch.sh
KSFT_KHDR_INSTALL := 1
include ../lib.mk
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 203323967b50..2da9d5baf339 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -36,6 +36,7 @@
* in the usual include/uapi/... directory.
*/
#include "../../../../lib/test_hmm_uapi.h"
+#include "../../../../mm/gup_test.h"
struct hmm_buffer {
void *ptr;
@@ -46,12 +47,21 @@ struct hmm_buffer {
uint64_t faults;
};
+enum {
+ HMM_PRIVATE_DEVICE_ONE,
+ HMM_PRIVATE_DEVICE_TWO,
+ HMM_COHERENCE_DEVICE_ONE,
+ HMM_COHERENCE_DEVICE_TWO,
+};
+
#define TWOMEG (1 << 21)
#define HMM_BUFFER_SIZE (1024 << 12)
#define HMM_PATH_MAX 64
#define NTIMES 10
#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
+/* Just the flags we need, copied from mm.h: */
+#define FOLL_WRITE 0x01 /* check pte is writable */
FIXTURE(hmm)
{
@@ -60,6 +70,21 @@ FIXTURE(hmm)
unsigned int page_shift;
};
+FIXTURE_VARIANT(hmm)
+{
+ int device_number;
+};
+
+FIXTURE_VARIANT_ADD(hmm, hmm_device_private)
+{
+ .device_number = HMM_PRIVATE_DEVICE_ONE,
+};
+
+FIXTURE_VARIANT_ADD(hmm, hmm_device_coherent)
+{
+ .device_number = HMM_COHERENCE_DEVICE_ONE,
+};
+
FIXTURE(hmm2)
{
int fd0;
@@ -68,6 +93,24 @@ FIXTURE(hmm2)
unsigned int page_shift;
};
+FIXTURE_VARIANT(hmm2)
+{
+ int device_number0;
+ int device_number1;
+};
+
+FIXTURE_VARIANT_ADD(hmm2, hmm2_device_private)
+{
+ .device_number0 = HMM_PRIVATE_DEVICE_ONE,
+ .device_number1 = HMM_PRIVATE_DEVICE_TWO,
+};
+
+FIXTURE_VARIANT_ADD(hmm2, hmm2_device_coherent)
+{
+ .device_number0 = HMM_COHERENCE_DEVICE_ONE,
+ .device_number1 = HMM_COHERENCE_DEVICE_TWO,
+};
+
static int hmm_open(int unit)
{
char pathname[HMM_PATH_MAX];
@@ -81,12 +124,19 @@ static int hmm_open(int unit)
return fd;
}
+static bool hmm_is_coherent_type(int dev_num)
+{
+ return (dev_num >= HMM_COHERENCE_DEVICE_ONE);
+}
+
FIXTURE_SETUP(hmm)
{
self->page_size = sysconf(_SC_PAGE_SIZE);
self->page_shift = ffs(self->page_size) - 1;
- self->fd = hmm_open(0);
+ self->fd = hmm_open(variant->device_number);
+ if (self->fd < 0 && hmm_is_coherent_type(variant->device_number))
+ SKIP(exit(0), "DEVICE_COHERENT not available");
ASSERT_GE(self->fd, 0);
}
@@ -95,9 +145,11 @@ FIXTURE_SETUP(hmm2)
self->page_size = sysconf(_SC_PAGE_SIZE);
self->page_shift = ffs(self->page_size) - 1;
- self->fd0 = hmm_open(0);
+ self->fd0 = hmm_open(variant->device_number0);
+ if (self->fd0 < 0 && hmm_is_coherent_type(variant->device_number0))
+ SKIP(exit(0), "DEVICE_COHERENT not available");
ASSERT_GE(self->fd0, 0);
- self->fd1 = hmm_open(1);
+ self->fd1 = hmm_open(variant->device_number1);
ASSERT_GE(self->fd1, 0);
}
@@ -211,6 +263,20 @@ static void hmm_nanosleep(unsigned int n)
nanosleep(&t, NULL);
}
+static int hmm_migrate_sys_to_dev(int fd,
+ struct hmm_buffer *buffer,
+ unsigned long npages)
+{
+ return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_DEV, buffer, npages);
+}
+
+static int hmm_migrate_dev_to_sys(int fd,
+ struct hmm_buffer *buffer,
+ unsigned long npages)
+{
+ return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_SYS, buffer, npages);
+}
+
/*
* Simple NULL test of device open/close.
*/
@@ -875,7 +941,7 @@ TEST_F(hmm, migrate)
ptr[i] = i;
/* Migrate memory to device. */
- ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
@@ -923,7 +989,7 @@ TEST_F(hmm, migrate_fault)
ptr[i] = i;
/* Migrate memory to device. */
- ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
@@ -936,7 +1002,7 @@ TEST_F(hmm, migrate_fault)
ASSERT_EQ(ptr[i], i);
/* Migrate memory to the device again. */
- ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
@@ -976,7 +1042,7 @@ TEST_F(hmm, migrate_shared)
ASSERT_NE(buffer->ptr, MAP_FAILED);
/* Migrate memory to device. */
- ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, -ENOENT);
hmm_buffer_free(buffer);
@@ -1015,7 +1081,7 @@ TEST_F(hmm2, migrate_mixed)
p = buffer->ptr;
/* Migrating a protected area should be an error. */
- ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages);
ASSERT_EQ(ret, -EINVAL);
/* Punch a hole after the first page address. */
@@ -1023,7 +1089,7 @@ TEST_F(hmm2, migrate_mixed)
ASSERT_EQ(ret, 0);
/* We expect an error if the vma doesn't cover the range. */
- ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 3);
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 3);
ASSERT_EQ(ret, -EINVAL);
/* Page 2 will be a read-only zero page. */
@@ -1055,13 +1121,13 @@ TEST_F(hmm2, migrate_mixed)
/* Now try to migrate pages 2-5 to device 1. */
buffer->ptr = p + 2 * self->page_size;
- ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 4);
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 4);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, 4);
/* Page 5 won't be migrated to device 0 because it's on device 1. */
buffer->ptr = p + 5 * self->page_size;
- ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1);
+ ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1);
ASSERT_EQ(ret, -ENOENT);
buffer->ptr = p;
@@ -1070,8 +1136,12 @@ TEST_F(hmm2, migrate_mixed)
}
/*
- * Migrate anonymous memory to device private memory and fault it back to system
- * memory multiple times.
+ * Migrate anonymous memory to device memory and back to system memory
+ * multiple times. In case of private zone configuration, this is done
+ * through fault pages accessed by CPU. In case of coherent zone configuration,
+ * the pages from the device should be explicitly migrated back to system memory.
+ * The reason is Coherent device zone has coherent access by CPU, therefore
+ * it will not generate any page fault.
*/
TEST_F(hmm, migrate_multiple)
{
@@ -1107,8 +1177,7 @@ TEST_F(hmm, migrate_multiple)
ptr[i] = i;
/* Migrate memory to device. */
- ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer,
- npages);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
@@ -1116,7 +1185,13 @@ TEST_F(hmm, migrate_multiple)
for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
- /* Fault pages back to system memory and check them. */
+ /* Migrate back to system memory and check them. */
+ if (hmm_is_coherent_type(variant->device_number)) {
+ ret = hmm_migrate_dev_to_sys(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ }
+
for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
@@ -1354,13 +1429,13 @@ TEST_F(hmm2, snapshot)
/* Page 5 will be migrated to device 0. */
buffer->ptr = p + 5 * self->page_size;
- ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1);
+ ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, 1);
/* Page 6 will be migrated to device 1. */
buffer->ptr = p + 6 * self->page_size;
- ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 1);
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 1);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, 1);
@@ -1377,9 +1452,16 @@ TEST_F(hmm2, snapshot)
ASSERT_EQ(m[2], HMM_DMIRROR_PROT_ZERO | HMM_DMIRROR_PROT_READ);
ASSERT_EQ(m[3], HMM_DMIRROR_PROT_READ);
ASSERT_EQ(m[4], HMM_DMIRROR_PROT_WRITE);
- ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL |
- HMM_DMIRROR_PROT_WRITE);
- ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE);
+ if (!hmm_is_coherent_type(variant->device_number0)) {
+ ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL |
+ HMM_DMIRROR_PROT_WRITE);
+ ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE);
+ } else {
+ ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL |
+ HMM_DMIRROR_PROT_WRITE);
+ ASSERT_EQ(m[6], HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE |
+ HMM_DMIRROR_PROT_WRITE);
+ }
hmm_buffer_free(buffer);
}
@@ -1685,4 +1767,186 @@ TEST_F(hmm, exclusive_cow)
hmm_buffer_free(buffer);
}
+static int gup_test_exec(int gup_fd, unsigned long addr,
+ int cmd, int npages, int size)
+{
+ struct gup_test gup = {
+ .nr_pages_per_call = npages,
+ .addr = addr,
+ .gup_flags = FOLL_WRITE,
+ .size = size,
+ };
+
+ if (ioctl(gup_fd, cmd, &gup)) {
+ perror("ioctl on error\n");
+ return errno;
+ }
+
+ return 0;
+}
+
+/*
+ * Test get user device pages through gup_test. Setting PIN_LONGTERM flag.
+ * This should trigger a migration back to system memory for both, private
+ * and coherent type pages.
+ * This test makes use of gup_test module. Make sure GUP_TEST_CONFIG is added
+ * to your configuration before you run it.
+ */
+TEST_F(hmm, hmm_gup_test)
+{
+ struct hmm_buffer *buffer;
+ int gup_fd;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ unsigned char *m;
+
+ gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+ if (gup_fd == -1)
+ SKIP(return, "Skipping test, could not find gup_test driver");
+
+ npages = 3;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ ASSERT_EQ(gup_test_exec(gup_fd,
+ (unsigned long)buffer->ptr,
+ GUP_BASIC_TEST, 1, self->page_size), 0);
+ ASSERT_EQ(gup_test_exec(gup_fd,
+ (unsigned long)buffer->ptr + 1 * self->page_size,
+ GUP_FAST_BENCHMARK, 1, self->page_size), 0);
+ ASSERT_EQ(gup_test_exec(gup_fd,
+ (unsigned long)buffer->ptr + 2 * self->page_size,
+ PIN_LONGTERM_BENCHMARK, 1, self->page_size), 0);
+
+ /* Take snapshot to CPU pagetables */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ m = buffer->mirror;
+ if (hmm_is_coherent_type(variant->device_number)) {
+ ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[0]);
+ ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[1]);
+ } else {
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[0]);
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[1]);
+ }
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[2]);
+ /*
+ * Check again the content on the pages. Make sure there's no
+ * corrupted data.
+ */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ close(gup_fd);
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Test copy-on-write in device pages.
+ * In case of writing to COW private page(s), a page fault will migrate pages
+ * back to system memory first. Then, these pages will be duplicated. In case
+ * of COW device coherent type, pages are duplicated directly from device
+ * memory.
+ */
+TEST_F(hmm, hmm_cow_in_device)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ unsigned char *m;
+ pid_t pid;
+ int status;
+
+ npages = 4;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ pid = fork();
+ if (pid == -1)
+ ASSERT_EQ(pid, 0);
+ if (!pid) {
+ /* Child process waitd for SIGTERM from the parent. */
+ while (1) {
+ }
+ perror("Should not reach this\n");
+ exit(0);
+ }
+ /* Parent process writes to COW pages(s) and gets a
+ * new copy in system. In case of device private pages,
+ * this write causes a migration to system mem first.
+ */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Terminate child and wait */
+ EXPECT_EQ(0, kill(pid, SIGTERM));
+ EXPECT_EQ(pid, waitpid(pid, &status, 0));
+ EXPECT_NE(0, WIFSIGNALED(status));
+ EXPECT_EQ(SIGTERM, WTERMSIG(status));
+
+ /* Take snapshot to CPU pagetables */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ m = buffer->mirror;
+ for (i = 0; i < npages; i++)
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[i]);
+
+ hmm_buffer_free(buffer);
+}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
index db0270127aeb..9496346973d4 100644
--- a/tools/testing/selftests/vm/mremap_test.c
+++ b/tools/testing/selftests/vm/mremap_test.c
@@ -119,6 +119,50 @@ static unsigned long long get_mmap_min_addr(void)
}
/*
+ * This test validates that merge is called when expanding a mapping.
+ * Mapping containing three pages is created, middle page is unmapped
+ * and then the mapping containing the first page is expanded so that
+ * it fills the created hole. The two parts should merge creating
+ * single mapping with three pages.
+ */
+static void mremap_expand_merge(unsigned long page_size)
+{
+ char *test_name = "mremap expand merge";
+ FILE *fp;
+ char *line = NULL;
+ size_t len = 0;
+ bool success = false;
+ char *start = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ munmap(start + page_size, page_size);
+ mremap(start, page_size, 2 * page_size, 0);
+
+ fp = fopen("/proc/self/maps", "r");
+ if (fp == NULL) {
+ ksft_test_result_fail("%s\n", test_name);
+ return;
+ }
+
+ while (getline(&line, &len, fp) != -1) {
+ char *first = strtok(line, "- ");
+ void *first_val = (void *)strtol(first, NULL, 16);
+ char *second = strtok(NULL, "- ");
+ void *second_val = (void *) strtol(second, NULL, 16);
+
+ if (first_val == start && second_val == start + 3 * page_size) {
+ success = true;
+ break;
+ }
+ }
+ if (success)
+ ksft_test_result_pass("%s\n", test_name);
+ else
+ ksft_test_result_fail("%s\n", test_name);
+ fclose(fp);
+}
+
+/*
* Returns the start address of the mapping on success, else returns
* NULL on failure.
*/
@@ -336,6 +380,7 @@ int main(int argc, char **argv)
int i, run_perf_tests;
unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
unsigned int pattern_seed;
+ int num_expand_tests = 1;
struct test test_cases[MAX_TEST];
struct test perf_test_cases[MAX_PERF_TEST];
int page_size;
@@ -407,12 +452,14 @@ int main(int argc, char **argv)
(threshold_mb * _1MB >= _1GB);
ksft_set_plan(ARRAY_SIZE(test_cases) + (run_perf_tests ?
- ARRAY_SIZE(perf_test_cases) : 0));
+ ARRAY_SIZE(perf_test_cases) : 0) + num_expand_tests);
for (i = 0; i < ARRAY_SIZE(test_cases); i++)
run_mremap_test_case(test_cases[i], &failures, threshold_mb,
pattern_seed);
+ mremap_expand_merge(page_size);
+
if (run_perf_tests) {
ksft_print_msg("\n%s\n",
"mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:");
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index 41fce8bea929..249295a10f56 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -121,10 +121,17 @@ run_test ./gup_test -a
run_test ./gup_test -ct -F 0x1 0 19 0x1000
run_test ./userfaultfd anon 20 16
-# Test requires source and destination huge pages. Size of source
-# (half_ufd_size_MB) is passed as argument to test.
+run_test ./userfaultfd anon:dev 20 16
+# Hugetlb tests require source and destination huge pages. Pass in half the
+# size ($half_ufd_size_MB), which is used for *each*.
run_test ./userfaultfd hugetlb "$half_ufd_size_MB" 32
+run_test ./userfaultfd hugetlb:dev "$half_ufd_size_MB" 32
+run_test ./userfaultfd hugetlb_shared "$half_ufd_size_MB" 32 "$mnt"/uffd-test
+rm -f "$mnt"/uffd-test
+run_test ./userfaultfd hugetlb_shared:dev "$half_ufd_size_MB" 32 "$mnt"/uffd-test
+rm -f "$mnt"/uffd-test
run_test ./userfaultfd shmem 20 16
+run_test ./userfaultfd shmem:dev 20 16
#cleanup
umount "$mnt"
@@ -151,7 +158,7 @@ if [ $VADDR64 -ne 0 ]; then
run_test ./virtual_address_range
# virtual address 128TB switch test
- run_test ./va_128TBswitch
+ run_test ./va_128TBswitch.sh
fi # VADDR64
# vmalloc stability smoke test
@@ -179,4 +186,11 @@ run_test ./ksm_tests -N -m 1
# KSM test with 2 NUMA nodes and merge_across_nodes = 0
run_test ./ksm_tests -N -m 0
+# protection_keys tests
+if [ $VADDR64 -eq 0 ]; then
+ run_test ./protection_keys_32
+else
+ run_test ./protection_keys_64
+fi
+
exit $exitcode
diff --git a/tools/testing/selftests/vm/test_hmm.sh b/tools/testing/selftests/vm/test_hmm.sh
index 0647b525a625..539c9371e592 100755
--- a/tools/testing/selftests/vm/test_hmm.sh
+++ b/tools/testing/selftests/vm/test_hmm.sh
@@ -40,11 +40,26 @@ check_test_requirements()
load_driver()
{
- modprobe $DRIVER > /dev/null 2>&1
+ if [ $# -eq 0 ]; then
+ modprobe $DRIVER > /dev/null 2>&1
+ else
+ if [ $# -eq 2 ]; then
+ modprobe $DRIVER spm_addr_dev0=$1 spm_addr_dev1=$2
+ > /dev/null 2>&1
+ else
+ echo "Missing module parameters. Make sure pass"\
+ "spm_addr_dev0 and spm_addr_dev1"
+ usage
+ fi
+ fi
if [ $? == 0 ]; then
major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices)
mknod /dev/hmm_dmirror0 c $major 0
mknod /dev/hmm_dmirror1 c $major 1
+ if [ $# -eq 2 ]; then
+ mknod /dev/hmm_dmirror2 c $major 2
+ mknod /dev/hmm_dmirror3 c $major 3
+ fi
fi
}
@@ -58,7 +73,7 @@ run_smoke()
{
echo "Running smoke test. Note, this test provides basic coverage."
- load_driver
+ load_driver $1 $2
$(dirname "${BASH_SOURCE[0]}")/hmm-tests
unload_driver
}
@@ -75,6 +90,9 @@ usage()
echo "# Smoke testing"
echo "./${TEST_NAME}.sh smoke"
echo
+ echo "# Smoke testing with SPM enabled"
+ echo "./${TEST_NAME}.sh smoke <spm_addr_dev0> <spm_addr_dev1>"
+ echo
exit 0
}
@@ -84,7 +102,7 @@ function run_test()
usage
else
if [ "$1" = "smoke" ]; then
- run_smoke
+ run_smoke $2 $3
else
usage
fi
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 4bc24581760d..28b881523d15 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -77,6 +77,9 @@ static int bounces;
#define TEST_SHMEM 3
static int test_type;
+/* test using /dev/userfaultfd, instead of userfaultfd(2) */
+static bool test_dev_userfaultfd;
+
/* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */
#define ALARM_INTERVAL_SECS 10
static volatile bool test_uffdio_copy_eexist = true;
@@ -125,6 +128,8 @@ struct uffd_stats {
const char *examples =
"# Run anonymous memory test on 100MiB region with 99999 bounces:\n"
"./userfaultfd anon 100 99999\n\n"
+ "# Run the same anonymous memory test, but using /dev/userfaultfd:\n"
+ "./userfaultfd anon:dev 100 99999\n\n"
"# Run share memory test on 1GiB region with 99 bounces:\n"
"./userfaultfd shmem 1000 99\n\n"
"# Run hugetlb memory test on 256MiB region with 50 bounces:\n"
@@ -141,6 +146,13 @@ static void usage(void)
"[hugetlbfs_file]\n\n");
fprintf(stderr, "Supported <test type>: anon, hugetlb, "
"hugetlb_shared, shmem\n\n");
+ fprintf(stderr, "'Test mods' can be joined to the test type string with a ':'. "
+ "Supported mods:\n");
+ fprintf(stderr, "\tdev - Use /dev/userfaultfd instead of userfaultfd(2)\n");
+ fprintf(stderr, "\nExample test mod usage:\n");
+ fprintf(stderr, "# Run anonymous memory test with /dev/userfaultfd:\n");
+ fprintf(stderr, "./userfaultfd anon:dev 100 99999\n\n");
+
fprintf(stderr, "Examples:\n\n");
fprintf(stderr, "%s", examples);
exit(1);
@@ -154,12 +166,14 @@ static void usage(void)
ret, __LINE__); \
} while (0)
-#define err(fmt, ...) \
+#define errexit(exitcode, fmt, ...) \
do { \
_err(fmt, ##__VA_ARGS__); \
- exit(1); \
+ exit(exitcode); \
} while (0)
+#define err(fmt, ...) errexit(1, fmt, ##__VA_ARGS__)
+
static void uffd_stats_reset(struct uffd_stats *uffd_stats,
unsigned long n_cpus)
{
@@ -383,13 +397,31 @@ static void assert_expected_ioctls_present(uint64_t mode, uint64_t ioctls)
}
}
+static void __userfaultfd_open_dev(void)
+{
+ int fd;
+
+ uffd = -1;
+ fd = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ return;
+
+ uffd = ioctl(fd, USERFAULTFD_IOC_NEW,
+ O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
+ close(fd);
+}
+
static void userfaultfd_open(uint64_t *features)
{
struct uffdio_api uffdio_api;
- uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
+ if (test_dev_userfaultfd)
+ __userfaultfd_open_dev();
+ else
+ uffd = syscall(__NR_userfaultfd,
+ O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
if (uffd < 0)
- err("userfaultfd syscall not available in this kernel");
+ errexit(KSFT_SKIP, "creating userfaultfd failed");
uffd_flags = fcntl(uffd, F_GETFD, NULL);
uffdio_api.api = UFFD_API;
@@ -931,7 +963,7 @@ static int faulting_process(int signal_test)
unsigned long split_nr_pages;
unsigned long lastnr;
struct sigaction act;
- unsigned long signalled = 0;
+ volatile unsigned long signalled = 0;
split_nr_pages = (nr_pages + 1) / 2;
@@ -946,7 +978,7 @@ static int faulting_process(int signal_test)
}
for (nr = 0; nr < split_nr_pages; nr++) {
- int steps = 1;
+ volatile int steps = 1;
unsigned long offset = nr * page_size;
if (signal_test) {
@@ -1584,8 +1616,6 @@ unsigned long default_huge_page_size(void)
static void set_test_type(const char *type)
{
- uint64_t features = UFFD_API_FEATURES;
-
if (!strcmp(type, "anon")) {
test_type = TEST_ANON;
uffd_test_ops = &anon_uffd_test_ops;
@@ -1603,9 +1633,27 @@ static void set_test_type(const char *type)
test_type = TEST_SHMEM;
uffd_test_ops = &shmem_uffd_test_ops;
test_uffdio_minor = true;
- } else {
- err("Unknown test type: %s", type);
}
+}
+
+static void parse_test_type_arg(const char *raw_type)
+{
+ char *buf = strdup(raw_type);
+ uint64_t features = UFFD_API_FEATURES;
+
+ while (buf) {
+ const char *token = strsep(&buf, ":");
+
+ if (!test_type)
+ set_test_type(token);
+ else if (!strcmp(token, "dev"))
+ test_dev_userfaultfd = true;
+ else
+ err("unrecognized test mod '%s'", token);
+ }
+
+ if (!test_type)
+ err("failed to parse test type argument: '%s'", raw_type);
if (test_type == TEST_HUGETLB)
page_size = default_huge_page_size();
@@ -1653,7 +1701,7 @@ int main(int argc, char **argv)
err("failed to arm SIGALRM");
alarm(ALARM_INTERVAL_SECS);
- set_test_type(argv[1]);
+ parse_test_type_arg(argv[1]);
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size /
diff --git a/tools/testing/selftests/vm/va_128TBswitch.sh b/tools/testing/selftests/vm/va_128TBswitch.sh
new file mode 100644
index 000000000000..767a6465b5d2
--- /dev/null
+++ b/tools/testing/selftests/vm/va_128TBswitch.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2022 Adam Sindelar (Meta) <adam@wowsignal.io>
+#
+# This is a test for mmap behavior with 5-level paging. This script wraps the
+# real test to check that the kernel is configured to support at least 5
+# pagetable levels.
+
+# 1 means the test failed
+exitcode=1
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+die()
+{
+ echo "$1"
+ exit $exitcode
+}
+
+check_test_requirements()
+{
+ local config="/proc/config.gz"
+ [[ -f "${config}" ]] || config="/boot/config-$(uname -r)"
+ [[ -f "${config}" ]] || die "Cannot find kernel config in /proc or /boot"
+
+ # gzip -dcfq automatically handles both compressed and plaintext input.
+ # See man 1 gzip under '-f'.
+ local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2)
+
+ if [[ "${pg_table_levels}" -lt 5 ]]; then
+ echo "$0: PG_TABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
+ exit $ksft_skip
+ fi
+}
+
+check_test_requirements
+./va_128TBswitch