summaryrefslogtreecommitdiff
path: root/kernel/bpf/hashtab.c
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@fb.com>2016-02-01 22:39:55 -0800
committerDavid S. Miller <davem@davemloft.net>2016-02-06 03:34:36 -0500
commit15a07b33814d14ca817887dbea8530728dc0fbe4 (patch)
treecf33026c34f2fbbe72e6ba3dd8079e3b0afa00ad /kernel/bpf/hashtab.c
parenta10423b87a7eae75da79ce80a8d9475047a674ee (diff)
downloadlinux-15a07b33814d14ca817887dbea8530728dc0fbe4.tar.gz
bpf: add lookup/update support for per-cpu hash and array maps
The functions bpf_map_lookup_elem(map, key, value) and bpf_map_update_elem(map, key, value, flags) need to get/set values from all-cpus for per-cpu hash and array maps, so that user space can aggregate/update them as necessary. Example of single counter aggregation in user space: unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); long values[nr_cpus]; long value = 0; bpf_lookup_elem(fd, key, values); for (i = 0; i < nr_cpus; i++) value += values[i]; The user space must provide round_up(value_size, 8) * nr_cpus array to get/set values, since kernel will use 'long' copy of per-cpu values to try to copy good counters atomically. It's a best-effort, since bpf programs and user space are racing to access the same memory. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf/hashtab.c')
-rw-r--r--kernel/bpf/hashtab.c83
1 files changed, 74 insertions, 9 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 2be5f6e8bb04..fd5db8fe9360 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -290,7 +290,7 @@ static void free_htab_elem(struct htab_elem *l, bool percpu, u32 key_size)
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
void *value, u32 key_size, u32 hash,
- bool percpu)
+ bool percpu, bool onallcpus)
{
u32 size = htab->map.value_size;
struct htab_elem *l_new;
@@ -312,8 +312,18 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
return NULL;
}
- /* copy true value_size bytes */
- memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
+ if (!onallcpus) {
+ /* copy true value_size bytes */
+ memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
+ } else {
+ int off = 0, cpu;
+
+ for_each_possible_cpu(cpu) {
+ bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
+ value + off, size);
+ off += size;
+ }
+ }
htab_elem_set_ptr(l_new, key_size, pptr);
} else {
memcpy(l_new->key + round_up(key_size, 8), value, size);
@@ -368,7 +378,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
/* allocate new element outside of the lock, since
* we're most likley going to insert it
*/
- l_new = alloc_htab_elem(htab, key, value, key_size, hash, false);
+ l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false);
if (!l_new)
return -ENOMEM;
@@ -402,8 +412,9 @@ err:
return ret;
}
-static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
- void *value, u64 map_flags)
+static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags,
+ bool onallcpus)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new = NULL, *l_old;
@@ -436,12 +447,25 @@ static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
goto err;
if (l_old) {
+ void __percpu *pptr = htab_elem_get_ptr(l_old, key_size);
+ u32 size = htab->map.value_size;
+
/* per-cpu hash map can update value in-place */
- memcpy(this_cpu_ptr(htab_elem_get_ptr(l_old, key_size)),
- value, htab->map.value_size);
+ if (!onallcpus) {
+ memcpy(this_cpu_ptr(pptr), value, size);
+ } else {
+ int off = 0, cpu;
+
+ size = round_up(size, 8);
+ for_each_possible_cpu(cpu) {
+ bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
+ value + off, size);
+ off += size;
+ }
+ }
} else {
l_new = alloc_htab_elem(htab, key, value, key_size,
- hash, true);
+ hash, true, onallcpus);
if (!l_new) {
ret = -ENOMEM;
goto err;
@@ -455,6 +479,12 @@ err:
return ret;
}
+static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags)
+{
+ return __htab_percpu_map_update_elem(map, key, value, map_flags, false);
+}
+
/* Called from syscall or from eBPF program */
static int htab_map_delete_elem(struct bpf_map *map, void *key)
{
@@ -557,6 +587,41 @@ static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
return NULL;
}
+int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
+{
+ struct htab_elem *l;
+ void __percpu *pptr;
+ int ret = -ENOENT;
+ int cpu, off = 0;
+ u32 size;
+
+ /* per_cpu areas are zero-filled and bpf programs can only
+ * access 'value_size' of them, so copying rounded areas
+ * will not leak any kernel data
+ */
+ size = round_up(map->value_size, 8);
+ rcu_read_lock();
+ l = __htab_map_lookup_elem(map, key);
+ if (!l)
+ goto out;
+ pptr = htab_elem_get_ptr(l, map->key_size);
+ for_each_possible_cpu(cpu) {
+ bpf_long_memcpy(value + off,
+ per_cpu_ptr(pptr, cpu), size);
+ off += size;
+ }
+ ret = 0;
+out:
+ rcu_read_unlock();
+ return ret;
+}
+
+int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
+ u64 map_flags)
+{
+ return __htab_percpu_map_update_elem(map, key, value, map_flags, true);
+}
+
static const struct bpf_map_ops htab_percpu_ops = {
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,