diff options
Diffstat (limited to 'kernel/bpf/bpf_local_storage.c')
-rw-r--r-- | kernel/bpf/bpf_local_storage.c | 371 |
1 files changed, 296 insertions, 75 deletions
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 35f4138a54dc..47d9948d768f 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -51,11 +51,21 @@ owner_storage(struct bpf_local_storage_map *smap, void *owner) return map->ops->map_owner_storage_ptr(owner); } +static bool selem_linked_to_storage_lockless(const struct bpf_local_storage_elem *selem) +{ + return !hlist_unhashed_lockless(&selem->snode); +} + static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem) { return !hlist_unhashed(&selem->snode); } +static bool selem_linked_to_map_lockless(const struct bpf_local_storage_elem *selem) +{ + return !hlist_unhashed_lockless(&selem->map_node); +} + static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem) { return !hlist_unhashed(&selem->map_node); @@ -70,11 +80,28 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, if (charge_mem && mem_charge(smap, owner, smap->elem_size)) return NULL; - selem = bpf_map_kzalloc(&smap->map, smap->elem_size, - gfp_flags | __GFP_NOWARN); + if (smap->bpf_ma) { + migrate_disable(); + selem = bpf_mem_cache_alloc_flags(&smap->selem_ma, gfp_flags); + migrate_enable(); + if (selem) + /* Keep the original bpf_map_kzalloc behavior + * before started using the bpf_mem_cache_alloc. + * + * No need to use zero_map_value. The bpf_selem_free() + * only does bpf_mem_cache_free when there is + * no other bpf prog is using the selem. + */ + memset(SDATA(selem)->data, 0, smap->map.value_size); + } else { + selem = bpf_map_kzalloc(&smap->map, smap->elem_size, + gfp_flags | __GFP_NOWARN); + } + if (selem) { if (value) copy_map_value(&smap->map, SDATA(selem)->data, value); + /* No need to call check_and_init_map_value as memory is zero init */ return selem; } @@ -84,7 +111,8 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, return NULL; } -void bpf_local_storage_free_rcu(struct rcu_head *rcu) +/* rcu tasks trace callback for bpf_ma == false */ +static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) { struct bpf_local_storage *local_storage; @@ -98,7 +126,66 @@ void bpf_local_storage_free_rcu(struct rcu_head *rcu) kfree_rcu(local_storage, rcu); } -static void bpf_selem_free_rcu(struct rcu_head *rcu) +static void bpf_local_storage_free_rcu(struct rcu_head *rcu) +{ + struct bpf_local_storage *local_storage; + + local_storage = container_of(rcu, struct bpf_local_storage, rcu); + bpf_mem_cache_raw_free(local_storage); +} + +static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) +{ + if (rcu_trace_implies_rcu_gp()) + bpf_local_storage_free_rcu(rcu); + else + call_rcu(rcu, bpf_local_storage_free_rcu); +} + +/* Handle bpf_ma == false */ +static void __bpf_local_storage_free(struct bpf_local_storage *local_storage, + bool vanilla_rcu) +{ + if (vanilla_rcu) + kfree_rcu(local_storage, rcu); + else + call_rcu_tasks_trace(&local_storage->rcu, + __bpf_local_storage_free_trace_rcu); +} + +static void bpf_local_storage_free(struct bpf_local_storage *local_storage, + struct bpf_local_storage_map *smap, + bool bpf_ma, bool reuse_now) +{ + if (!local_storage) + return; + + if (!bpf_ma) { + __bpf_local_storage_free(local_storage, reuse_now); + return; + } + + if (!reuse_now) { + call_rcu_tasks_trace(&local_storage->rcu, + bpf_local_storage_free_trace_rcu); + return; + } + + if (smap) { + migrate_disable(); + bpf_mem_cache_free(&smap->storage_ma, local_storage); + migrate_enable(); + } else { + /* smap could be NULL if the selem that triggered + * this 'local_storage' creation had been long gone. + * In this case, directly do call_rcu(). + */ + call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu); + } +} + +/* rcu tasks trace callback for bpf_ma == false */ +static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu) { struct bpf_local_storage_elem *selem; @@ -109,13 +196,63 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu) kfree_rcu(selem, rcu); } +/* Handle bpf_ma == false */ +static void __bpf_selem_free(struct bpf_local_storage_elem *selem, + bool vanilla_rcu) +{ + if (vanilla_rcu) + kfree_rcu(selem, rcu); + else + call_rcu_tasks_trace(&selem->rcu, __bpf_selem_free_trace_rcu); +} + +static void bpf_selem_free_rcu(struct rcu_head *rcu) +{ + struct bpf_local_storage_elem *selem; + + selem = container_of(rcu, struct bpf_local_storage_elem, rcu); + bpf_mem_cache_raw_free(selem); +} + +static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) +{ + if (rcu_trace_implies_rcu_gp()) + bpf_selem_free_rcu(rcu); + else + call_rcu(rcu, bpf_selem_free_rcu); +} + +void bpf_selem_free(struct bpf_local_storage_elem *selem, + struct bpf_local_storage_map *smap, + bool reuse_now) +{ + bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); + + if (!smap->bpf_ma) { + __bpf_selem_free(selem, reuse_now); + return; + } + + if (!reuse_now) { + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); + } else { + /* Instead of using the vanilla call_rcu(), + * bpf_mem_cache_free will be able to reuse selem + * immediately. + */ + migrate_disable(); + bpf_mem_cache_free(&smap->selem_ma, selem); + migrate_enable(); + } +} + /* local_storage->lock must be held and selem->local_storage == local_storage. * The caller must ensure selem->smap is still valid to be * dereferenced for its smap->elem_size and smap->cache_idx. */ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem, - bool uncharge_mem, bool use_trace_rcu) + bool uncharge_mem, bool reuse_now) { struct bpf_local_storage_map *smap; bool free_local_storage; @@ -159,40 +296,75 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor SDATA(selem)) RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); - if (use_trace_rcu) - call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu); - else - kfree_rcu(selem, rcu); + bpf_selem_free(selem, smap, reuse_now); + + if (rcu_access_pointer(local_storage->smap) == smap) + RCU_INIT_POINTER(local_storage->smap, NULL); return free_local_storage; } -static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, - bool use_trace_rcu) +static bool check_storage_bpf_ma(struct bpf_local_storage *local_storage, + struct bpf_local_storage_map *storage_smap, + struct bpf_local_storage_elem *selem) +{ + + struct bpf_local_storage_map *selem_smap; + + /* local_storage->smap may be NULL. If it is, get the bpf_ma + * from any selem in the local_storage->list. The bpf_ma of all + * local_storage and selem should have the same value + * for the same map type. + * + * If the local_storage->list is already empty, the caller will not + * care about the bpf_ma value also because the caller is not + * responsibile to free the local_storage. + */ + + if (storage_smap) + return storage_smap->bpf_ma; + + if (!selem) { + struct hlist_node *n; + + n = rcu_dereference_check(hlist_first_rcu(&local_storage->list), + bpf_rcu_lock_held()); + if (!n) + return false; + + selem = hlist_entry(n, struct bpf_local_storage_elem, snode); + } + selem_smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); + + return selem_smap->bpf_ma; +} + +static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, + bool reuse_now) { + struct bpf_local_storage_map *storage_smap; struct bpf_local_storage *local_storage; - bool free_local_storage = false; + bool bpf_ma, free_local_storage = false; unsigned long flags; - if (unlikely(!selem_linked_to_storage(selem))) + if (unlikely(!selem_linked_to_storage_lockless(selem))) /* selem has already been unlinked from sk */ return; local_storage = rcu_dereference_check(selem->local_storage, bpf_rcu_lock_held()); + storage_smap = rcu_dereference_check(local_storage->smap, + bpf_rcu_lock_held()); + bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, selem); + raw_spin_lock_irqsave(&local_storage->lock, flags); if (likely(selem_linked_to_storage(selem))) free_local_storage = bpf_selem_unlink_storage_nolock( - local_storage, selem, true, use_trace_rcu); + local_storage, selem, true, reuse_now); raw_spin_unlock_irqrestore(&local_storage->lock, flags); - if (free_local_storage) { - if (use_trace_rcu) - call_rcu_tasks_trace(&local_storage->rcu, - bpf_local_storage_free_rcu); - else - kfree_rcu(local_storage, rcu); - } + if (free_local_storage) + bpf_local_storage_free(local_storage, storage_smap, bpf_ma, reuse_now); } void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, @@ -202,13 +374,13 @@ void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, hlist_add_head_rcu(&selem->snode, &local_storage->list); } -void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) +static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) { struct bpf_local_storage_map *smap; struct bpf_local_storage_map_bucket *b; unsigned long flags; - if (unlikely(!selem_linked_to_map(selem))) + if (unlikely(!selem_linked_to_map_lockless(selem))) /* selem has already be unlinked from smap */ return; @@ -232,14 +404,14 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap, raw_spin_unlock_irqrestore(&b->lock, flags); } -void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu) +void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now) { /* Always unlink from map before unlinking from local_storage * because selem will be freed after successfully unlinked from * the local_storage. */ bpf_selem_unlink_map(selem); - __bpf_selem_unlink_storage(selem, use_trace_rcu); + bpf_selem_unlink_storage(selem, reuse_now); } /* If cacheit_lockit is false, this lookup function is lockless */ @@ -312,13 +484,21 @@ int bpf_local_storage_alloc(void *owner, if (err) return err; - storage = bpf_map_kzalloc(&smap->map, sizeof(*storage), - gfp_flags | __GFP_NOWARN); + if (smap->bpf_ma) { + migrate_disable(); + storage = bpf_mem_cache_alloc_flags(&smap->storage_ma, gfp_flags); + migrate_enable(); + } else { + storage = bpf_map_kzalloc(&smap->map, sizeof(*storage), + gfp_flags | __GFP_NOWARN); + } + if (!storage) { err = -ENOMEM; goto uncharge; } + RCU_INIT_POINTER(storage->smap, smap); INIT_HLIST_HEAD(&storage->list); raw_spin_lock_init(&storage->lock); storage->owner = owner; @@ -358,7 +538,7 @@ int bpf_local_storage_alloc(void *owner, return 0; uncharge: - kfree(storage); + bpf_local_storage_free(storage, smap, smap->bpf_ma, true); mem_uncharge(smap, owner, sizeof(*storage)); return err; } @@ -402,7 +582,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags); if (err) { - kfree(selem); + bpf_selem_free(selem, smap, true); mem_uncharge(smap, owner, smap->elem_size); return ERR_PTR(err); } @@ -420,7 +600,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, err = check_flags(old_sdata, map_flags); if (err) return ERR_PTR(err); - if (old_sdata && selem_linked_to_storage(SELEM(old_sdata))) { + if (old_sdata && selem_linked_to_storage_lockless(SELEM(old_sdata))) { copy_map_value_locked(&smap->map, old_sdata->data, value, false); return old_sdata; @@ -485,7 +665,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, if (old_sdata) { bpf_selem_unlink_map(SELEM(old_sdata)); bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata), - false, true); + false, false); } unlock: @@ -496,7 +676,7 @@ unlock_err: raw_spin_unlock_irqrestore(&local_storage->lock, flags); if (selem) { mem_uncharge(smap, owner, smap->elem_size); - kfree(selem); + bpf_selem_free(selem, smap, true); } return ERR_PTR(err); } @@ -552,40 +732,6 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr) return 0; } -static struct bpf_local_storage_map *__bpf_local_storage_map_alloc(union bpf_attr *attr) -{ - struct bpf_local_storage_map *smap; - unsigned int i; - u32 nbuckets; - - smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE); - if (!smap) - return ERR_PTR(-ENOMEM); - bpf_map_init_from_attr(&smap->map, attr); - - nbuckets = roundup_pow_of_two(num_possible_cpus()); - /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */ - nbuckets = max_t(u32, 2, nbuckets); - smap->bucket_log = ilog2(nbuckets); - - smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets), - nbuckets, GFP_USER | __GFP_NOWARN); - if (!smap->buckets) { - bpf_map_area_free(smap); - return ERR_PTR(-ENOMEM); - } - - for (i = 0; i < nbuckets; i++) { - INIT_HLIST_HEAD(&smap->buckets[i].list); - raw_spin_lock_init(&smap->buckets[i].lock); - } - - smap->elem_size = offsetof(struct bpf_local_storage_elem, - sdata.data[attr->value_size]); - - return smap; -} - int bpf_local_storage_map_check_btf(const struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, @@ -603,11 +749,16 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map, return 0; } -bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage) +void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) { + struct bpf_local_storage_map *storage_smap; struct bpf_local_storage_elem *selem; - bool free_storage = false; + bool bpf_ma, free_storage = false; struct hlist_node *n; + unsigned long flags; + + storage_smap = rcu_dereference_check(local_storage->smap, bpf_rcu_lock_held()); + bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, NULL); /* Neither the bpf_prog nor the bpf_map's syscall * could be modifying the local_storage->list now. @@ -618,6 +769,7 @@ bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage) * when unlinking elem from the local_storage->list and * the map's bucket->list. */ + raw_spin_lock_irqsave(&local_storage->lock, flags); hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { /* Always unlink from map before unlinking from * local_storage. @@ -630,24 +782,89 @@ bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage) * of the loop will set the free_cgroup_storage to true. */ free_storage = bpf_selem_unlink_storage_nolock( - local_storage, selem, false, false); + local_storage, selem, false, true); } + raw_spin_unlock_irqrestore(&local_storage->lock, flags); - return free_storage; + if (free_storage) + bpf_local_storage_free(local_storage, storage_smap, bpf_ma, true); } +u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map) +{ + struct bpf_local_storage_map *smap = (struct bpf_local_storage_map *)map; + u64 usage = sizeof(*smap); + + /* The dynamically callocated selems are not counted currently. */ + usage += sizeof(*smap->buckets) * (1ULL << smap->bucket_log); + return usage; +} + +/* When bpf_ma == true, the bpf_mem_alloc is used to allocate and free memory. + * A deadlock free allocator is useful for storage that the bpf prog can easily + * get a hold of the owner PTR_TO_BTF_ID in any context. eg. bpf_get_current_task_btf. + * The task and cgroup storage fall into this case. The bpf_mem_alloc reuses + * memory immediately. To be reuse-immediate safe, the owner destruction + * code path needs to go through a rcu grace period before calling + * bpf_local_storage_destroy(). + * + * When bpf_ma == false, the kmalloc and kfree are used. + */ struct bpf_map * bpf_local_storage_map_alloc(union bpf_attr *attr, - struct bpf_local_storage_cache *cache) + struct bpf_local_storage_cache *cache, + bool bpf_ma) { struct bpf_local_storage_map *smap; + unsigned int i; + u32 nbuckets; + int err; + + smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE); + if (!smap) + return ERR_PTR(-ENOMEM); + bpf_map_init_from_attr(&smap->map, attr); + + nbuckets = roundup_pow_of_two(num_possible_cpus()); + /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */ + nbuckets = max_t(u32, 2, nbuckets); + smap->bucket_log = ilog2(nbuckets); + + smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets), + nbuckets, GFP_USER | __GFP_NOWARN); + if (!smap->buckets) { + err = -ENOMEM; + goto free_smap; + } + + for (i = 0; i < nbuckets; i++) { + INIT_HLIST_HEAD(&smap->buckets[i].list); + raw_spin_lock_init(&smap->buckets[i].lock); + } + + smap->elem_size = offsetof(struct bpf_local_storage_elem, + sdata.data[attr->value_size]); - smap = __bpf_local_storage_map_alloc(attr); - if (IS_ERR(smap)) - return ERR_CAST(smap); + smap->bpf_ma = bpf_ma; + if (bpf_ma) { + err = bpf_mem_alloc_init(&smap->selem_ma, smap->elem_size, false); + if (err) + goto free_smap; + + err = bpf_mem_alloc_init(&smap->storage_ma, sizeof(struct bpf_local_storage), false); + if (err) { + bpf_mem_alloc_destroy(&smap->selem_ma); + goto free_smap; + } + } smap->cache_idx = bpf_local_storage_cache_idx_get(cache); return &smap->map; + +free_smap: + kvfree(smap->buckets); + bpf_map_area_free(smap); + return ERR_PTR(err); } void bpf_local_storage_map_free(struct bpf_map *map, @@ -689,7 +906,7 @@ void bpf_local_storage_map_free(struct bpf_map *map, migrate_disable(); this_cpu_inc(*busy_counter); } - bpf_selem_unlink(selem, false); + bpf_selem_unlink(selem, true); if (busy_counter) { this_cpu_dec(*busy_counter); migrate_enable(); @@ -713,6 +930,10 @@ void bpf_local_storage_map_free(struct bpf_map *map, */ synchronize_rcu(); + if (smap->bpf_ma) { + bpf_mem_alloc_destroy(&smap->selem_ma); + bpf_mem_alloc_destroy(&smap->storage_ma); + } kvfree(smap->buckets); bpf_map_area_free(smap); } |