mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 01:04:41 +01:00
bpf-fixes
-----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEE+soXsSLHKoYyzcli6rmadz2vbToFAmmjmDIACgkQ6rmadz2v bTq3gg//QQLOT/FxP2/dDurliDTXvQRr1tUxmIw6s3P6hnz9j/LLEVKpLRVkqd8t XEwbubPd1TXDRsJ4f26Ew01YUtf9xi6ZQoMe/BL1okxi0ZwQGGRVMkiKOQgRT+rj qYSN5JMfPzA2AuM6FjBF/hhw24yVRdgKRYBam6D7XLfFf3s8TOhHHjJ925PqEo0t uJOy4ddDYB9BcGmfoeyiFgUtpPqcYrKIUCLBdwFvT2fnPJvrFFoCF3t7NS9UJu/O wd6ZPuGWSOl9A7vSheldP6cJUDX8L/5WEGO4/LjN7plkySF0HNv8uq/b1T3kKqoY Y3unXerLGJUAA9D5wpYAekx9YmvRTPQ/o39oTbquEB4SSJVU/SPUpvFw7m2Moq10 51yuyXLcPlI3xtk0Bd8c/CESSmkRenjWzsuZQhDGhsR0I9mIaALrhf9LaatHtXI5 f5ct73e+beK7Fc0Ze+b0JxDeFvzA3CKfAF0/fvGt0r9VZjBaMD+a3NnscBlyKztW UCXazcfndMhNfUUWanktbT5YhYPmY7hzVQEOl7HAMGn4yG6XbXXmzzY6BqEXIucM etueW2msZJHGBHQGe2RK3lxtmiB7/FglJHd86xebkIU2gCzqt8fGUha8AIuJ4rLS 7wxC33DycCofRGWdseVu7PsTasdhSGsHKbXz2fOFOFESOczYRw8= =fj3P -----END PGP SIGNATURE----- Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf Pull bpf fixes from Alexei Starovoitov: - Fix alignment of arm64 JIT buffer to prevent atomic tearing (Fuad Tabba) - Fix invariant violation for single value tnums in the verifier (Harishankar Vishwanathan, Paul Chaignon) - Fix a bunch of issues found by ASAN in selftests/bpf (Ihor Solodrai) - Fix race in devmpa and cpumap on PREEMPT_RT (Jiayuan Chen) - Fix show_fdinfo of kprobe_multi when cookies are not present (Jiri Olsa) - Fix race in freeing special fields in BPF maps to prevent memory leaks (Kumar Kartikeya Dwivedi) - Fix OOB read in dmabuf_collector (T.J. Mercier) * tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: (36 commits) selftests/bpf: Avoid simplification of crafted bounds test selftests/bpf: Test refinement of single-value tnum bpf: Improve bounds when tnum has a single possible value bpf: Introduce tnum_step to step through tnum's members bpf: Fix race in devmap on PREEMPT_RT bpf: Fix race in cpumap on PREEMPT_RT selftests/bpf: Add tests for special fields races bpf: Retire rcu_trace_implies_rcu_gp() from local storage bpf: Delay freeing fields in local storage bpf: Lose const-ness of map in map_check_btf() bpf: Register dtor for freeing special fields selftests/bpf: Fix OOB read in dmabuf_collector selftests/bpf: Fix a memory leak in xdp_flowtable test bpf: Fix stack-out-of-bounds write in devmap bpf: Fix kprobe_multi cookies access in show_fdinfo callback bpf, arm64: Force 8-byte alignment for JIT buffer to prevent atomic tearing selftests/bpf: Don't override SIGSEGV handler with ASAN selftests/bpf: Check BPFTOOL env var in detect_bpftool_path() selftests/bpf: Fix out-of-bounds array access bugs reported by ASAN selftests/bpf: Fix array bounds warning in jit_disasm_helpers ...
This commit is contained in:
commit
eb71ab2bf7
73 changed files with 1181 additions and 237 deletions
|
|
@ -303,7 +303,7 @@ static long arena_map_update_elem(struct bpf_map *map, void *key,
|
|||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int arena_map_check_btf(const struct bpf_map *map, const struct btf *btf,
|
||||
static int arena_map_check_btf(struct bpf_map *map, const struct btf *btf,
|
||||
const struct btf_type *key_type, const struct btf_type *value_type)
|
||||
{
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -548,7 +548,7 @@ static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
|
|||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static int array_map_check_btf(const struct bpf_map *map,
|
||||
static int array_map_check_btf(struct bpf_map *map,
|
||||
const struct btf *btf,
|
||||
const struct btf_type *key_type,
|
||||
const struct btf_type *value_type)
|
||||
|
|
|
|||
|
|
@ -180,7 +180,7 @@ static long bloom_map_update_elem(struct bpf_map *map, void *key,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int bloom_map_check_btf(const struct bpf_map *map,
|
||||
static int bloom_map_check_btf(struct bpf_map *map,
|
||||
const struct btf *btf,
|
||||
const struct btf_type *key_type,
|
||||
const struct btf_type *value_type)
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ static long insn_array_delete_elem(struct bpf_map *map, void *key)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int insn_array_check_btf(const struct bpf_map *map,
|
||||
static int insn_array_check_btf(struct bpf_map *map,
|
||||
const struct btf *btf,
|
||||
const struct btf_type *key_type,
|
||||
const struct btf_type *value_type)
|
||||
|
|
|
|||
|
|
@ -107,14 +107,12 @@ static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
|
|||
{
|
||||
struct bpf_local_storage *local_storage;
|
||||
|
||||
/* If RCU Tasks Trace grace period implies RCU grace period, do
|
||||
* kfree(), else do kfree_rcu().
|
||||
/*
|
||||
* RCU Tasks Trace grace period implies RCU grace period, do
|
||||
* kfree() directly.
|
||||
*/
|
||||
local_storage = container_of(rcu, struct bpf_local_storage, rcu);
|
||||
if (rcu_trace_implies_rcu_gp())
|
||||
kfree(local_storage);
|
||||
else
|
||||
kfree_rcu(local_storage, rcu);
|
||||
kfree(local_storage);
|
||||
}
|
||||
|
||||
/* Handle use_kmalloc_nolock == false */
|
||||
|
|
@ -138,10 +136,11 @@ static void bpf_local_storage_free_rcu(struct rcu_head *rcu)
|
|||
|
||||
static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
if (rcu_trace_implies_rcu_gp())
|
||||
bpf_local_storage_free_rcu(rcu);
|
||||
else
|
||||
call_rcu(rcu, bpf_local_storage_free_rcu);
|
||||
/*
|
||||
* RCU Tasks Trace grace period implies RCU grace period, do
|
||||
* kfree() directly.
|
||||
*/
|
||||
bpf_local_storage_free_rcu(rcu);
|
||||
}
|
||||
|
||||
static void bpf_local_storage_free(struct bpf_local_storage *local_storage,
|
||||
|
|
@ -164,16 +163,29 @@ static void bpf_local_storage_free(struct bpf_local_storage *local_storage,
|
|||
bpf_local_storage_free_trace_rcu);
|
||||
}
|
||||
|
||||
/* rcu callback for use_kmalloc_nolock == false */
|
||||
static void __bpf_selem_free_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct bpf_local_storage_elem *selem;
|
||||
struct bpf_local_storage_map *smap;
|
||||
|
||||
selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
|
||||
/* bpf_selem_unlink_nofail may have already cleared smap and freed fields. */
|
||||
smap = rcu_dereference_check(SDATA(selem)->smap, 1);
|
||||
|
||||
if (smap)
|
||||
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
|
||||
kfree(selem);
|
||||
}
|
||||
|
||||
/* rcu tasks trace callback for use_kmalloc_nolock == false */
|
||||
static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct bpf_local_storage_elem *selem;
|
||||
|
||||
selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
|
||||
if (rcu_trace_implies_rcu_gp())
|
||||
kfree(selem);
|
||||
else
|
||||
kfree_rcu(selem, rcu);
|
||||
/*
|
||||
* RCU Tasks Trace grace period implies RCU grace period, do
|
||||
* kfree() directly.
|
||||
*/
|
||||
__bpf_selem_free_rcu(rcu);
|
||||
}
|
||||
|
||||
/* Handle use_kmalloc_nolock == false */
|
||||
|
|
@ -181,7 +193,7 @@ static void __bpf_selem_free(struct bpf_local_storage_elem *selem,
|
|||
bool vanilla_rcu)
|
||||
{
|
||||
if (vanilla_rcu)
|
||||
kfree_rcu(selem, rcu);
|
||||
call_rcu(&selem->rcu, __bpf_selem_free_rcu);
|
||||
else
|
||||
call_rcu_tasks_trace(&selem->rcu, __bpf_selem_free_trace_rcu);
|
||||
}
|
||||
|
|
@ -195,37 +207,29 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
|
|||
/* The bpf_local_storage_map_free will wait for rcu_barrier */
|
||||
smap = rcu_dereference_check(SDATA(selem)->smap, 1);
|
||||
|
||||
if (smap) {
|
||||
migrate_disable();
|
||||
if (smap)
|
||||
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
|
||||
migrate_enable();
|
||||
}
|
||||
kfree_nolock(selem);
|
||||
}
|
||||
|
||||
static void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
if (rcu_trace_implies_rcu_gp())
|
||||
bpf_selem_free_rcu(rcu);
|
||||
else
|
||||
call_rcu(rcu, bpf_selem_free_rcu);
|
||||
/*
|
||||
* RCU Tasks Trace grace period implies RCU grace period, do
|
||||
* kfree() directly.
|
||||
*/
|
||||
bpf_selem_free_rcu(rcu);
|
||||
}
|
||||
|
||||
void bpf_selem_free(struct bpf_local_storage_elem *selem,
|
||||
bool reuse_now)
|
||||
{
|
||||
struct bpf_local_storage_map *smap;
|
||||
|
||||
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
|
||||
|
||||
if (!selem->use_kmalloc_nolock) {
|
||||
/*
|
||||
* No uptr will be unpin even when reuse_now == false since uptr
|
||||
* is only supported in task local storage, where
|
||||
* smap->use_kmalloc_nolock == true.
|
||||
*/
|
||||
if (smap)
|
||||
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
|
||||
__bpf_selem_free(selem, reuse_now);
|
||||
return;
|
||||
}
|
||||
|
|
@ -797,7 +801,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int bpf_local_storage_map_check_btf(const struct bpf_map *map,
|
||||
int bpf_local_storage_map_check_btf(struct bpf_map *map,
|
||||
const struct btf *btf,
|
||||
const struct btf_type *key_type,
|
||||
const struct btf_type *value_type)
|
||||
|
|
@ -958,10 +962,9 @@ restart:
|
|||
*/
|
||||
synchronize_rcu();
|
||||
|
||||
if (smap->use_kmalloc_nolock) {
|
||||
rcu_barrier_tasks_trace();
|
||||
rcu_barrier();
|
||||
}
|
||||
/* smap remains in use regardless of kmalloc_nolock, so wait unconditionally. */
|
||||
rcu_barrier_tasks_trace();
|
||||
rcu_barrier();
|
||||
kvfree(smap->buckets);
|
||||
bpf_map_area_free(smap);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@
|
|||
#include <linux/sched.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/local_lock.h>
|
||||
#include <linux/completion.h>
|
||||
#include <trace/events/xdp.h>
|
||||
#include <linux/btf_ids.h>
|
||||
|
|
@ -52,6 +53,7 @@ struct xdp_bulk_queue {
|
|||
struct list_head flush_node;
|
||||
struct bpf_cpu_map_entry *obj;
|
||||
unsigned int count;
|
||||
local_lock_t bq_lock;
|
||||
};
|
||||
|
||||
/* Struct for every remote "destination" CPU in map */
|
||||
|
|
@ -451,6 +453,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
|
|||
for_each_possible_cpu(i) {
|
||||
bq = per_cpu_ptr(rcpu->bulkq, i);
|
||||
bq->obj = rcpu;
|
||||
local_lock_init(&bq->bq_lock);
|
||||
}
|
||||
|
||||
/* Alloc queue */
|
||||
|
|
@ -722,6 +725,8 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq)
|
|||
struct ptr_ring *q;
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(&bq->bq_lock);
|
||||
|
||||
if (unlikely(!bq->count))
|
||||
return;
|
||||
|
||||
|
|
@ -749,11 +754,15 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq)
|
|||
}
|
||||
|
||||
/* Runs under RCU-read-side, plus in softirq under NAPI protection.
|
||||
* Thus, safe percpu variable access.
|
||||
* Thus, safe percpu variable access. PREEMPT_RT relies on
|
||||
* local_lock_nested_bh() to serialise access to the per-CPU bq.
|
||||
*/
|
||||
static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
|
||||
{
|
||||
struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
|
||||
struct xdp_bulk_queue *bq;
|
||||
|
||||
local_lock_nested_bh(&rcpu->bulkq->bq_lock);
|
||||
bq = this_cpu_ptr(rcpu->bulkq);
|
||||
|
||||
if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
|
||||
bq_flush_to_queue(bq);
|
||||
|
|
@ -774,6 +783,8 @@ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
|
|||
|
||||
list_add(&bq->flush_node, flush_list);
|
||||
}
|
||||
|
||||
local_unlock_nested_bh(&rcpu->bulkq->bq_lock);
|
||||
}
|
||||
|
||||
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf,
|
||||
|
|
@ -810,7 +821,9 @@ void __cpu_map_flush(struct list_head *flush_list)
|
|||
struct xdp_bulk_queue *bq, *tmp;
|
||||
|
||||
list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
|
||||
local_lock_nested_bh(&bq->obj->bulkq->bq_lock);
|
||||
bq_flush_to_queue(bq);
|
||||
local_unlock_nested_bh(&bq->obj->bulkq->bq_lock);
|
||||
|
||||
/* If already running, costs spin_lock_irqsave + smb_mb */
|
||||
wake_up_process(bq->obj->kthread);
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@
|
|||
* types of devmap; only the lookup and insertion is different.
|
||||
*/
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/local_lock.h>
|
||||
#include <net/xdp.h>
|
||||
#include <linux/filter.h>
|
||||
#include <trace/events/xdp.h>
|
||||
|
|
@ -60,6 +61,7 @@ struct xdp_dev_bulk_queue {
|
|||
struct net_device *dev_rx;
|
||||
struct bpf_prog *xdp_prog;
|
||||
unsigned int count;
|
||||
local_lock_t bq_lock;
|
||||
};
|
||||
|
||||
struct bpf_dtab_netdev {
|
||||
|
|
@ -381,6 +383,8 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
|
|||
int to_send = cnt;
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(&bq->bq_lock);
|
||||
|
||||
if (unlikely(!cnt))
|
||||
return;
|
||||
|
||||
|
|
@ -425,10 +429,12 @@ void __dev_flush(struct list_head *flush_list)
|
|||
struct xdp_dev_bulk_queue *bq, *tmp;
|
||||
|
||||
list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
|
||||
local_lock_nested_bh(&bq->dev->xdp_bulkq->bq_lock);
|
||||
bq_xmit_all(bq, XDP_XMIT_FLUSH);
|
||||
bq->dev_rx = NULL;
|
||||
bq->xdp_prog = NULL;
|
||||
__list_del_clearprev(&bq->flush_node);
|
||||
local_unlock_nested_bh(&bq->dev->xdp_bulkq->bq_lock);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -451,12 +457,16 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
|
|||
|
||||
/* Runs in NAPI, i.e., softirq under local_bh_disable(). Thus, safe percpu
|
||||
* variable access, and map elements stick around. See comment above
|
||||
* xdp_do_flush() in filter.c.
|
||||
* xdp_do_flush() in filter.c. PREEMPT_RT relies on local_lock_nested_bh()
|
||||
* to serialise access to the per-CPU bq.
|
||||
*/
|
||||
static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
|
||||
struct net_device *dev_rx, struct bpf_prog *xdp_prog)
|
||||
{
|
||||
struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
|
||||
struct xdp_dev_bulk_queue *bq;
|
||||
|
||||
local_lock_nested_bh(&dev->xdp_bulkq->bq_lock);
|
||||
bq = this_cpu_ptr(dev->xdp_bulkq);
|
||||
|
||||
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
|
||||
bq_xmit_all(bq, 0);
|
||||
|
|
@ -477,6 +487,8 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
|
|||
}
|
||||
|
||||
bq->q[bq->count++] = xdpf;
|
||||
|
||||
local_unlock_nested_bh(&dev->xdp_bulkq->bq_lock);
|
||||
}
|
||||
|
||||
static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
|
||||
|
|
@ -588,18 +600,22 @@ static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifin
|
|||
}
|
||||
|
||||
/* Get ifindex of each upper device. 'indexes' must be able to hold at
|
||||
* least MAX_NEST_DEV elements.
|
||||
* Returns the number of ifindexes added.
|
||||
* least 'max' elements.
|
||||
* Returns the number of ifindexes added, or -EOVERFLOW if there are too
|
||||
* many upper devices.
|
||||
*/
|
||||
static int get_upper_ifindexes(struct net_device *dev, int *indexes)
|
||||
static int get_upper_ifindexes(struct net_device *dev, int *indexes, int max)
|
||||
{
|
||||
struct net_device *upper;
|
||||
struct list_head *iter;
|
||||
int n = 0;
|
||||
|
||||
netdev_for_each_upper_dev_rcu(dev, upper, iter) {
|
||||
if (n >= max)
|
||||
return -EOVERFLOW;
|
||||
indexes[n++] = upper->ifindex;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
|
|
@ -615,7 +631,11 @@ int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
|
|||
int err;
|
||||
|
||||
if (exclude_ingress) {
|
||||
num_excluded = get_upper_ifindexes(dev_rx, excluded_devices);
|
||||
num_excluded = get_upper_ifindexes(dev_rx, excluded_devices,
|
||||
ARRAY_SIZE(excluded_devices) - 1);
|
||||
if (num_excluded < 0)
|
||||
return num_excluded;
|
||||
|
||||
excluded_devices[num_excluded++] = dev_rx->ifindex;
|
||||
}
|
||||
|
||||
|
|
@ -733,7 +753,11 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
|
|||
int err;
|
||||
|
||||
if (exclude_ingress) {
|
||||
num_excluded = get_upper_ifindexes(dev, excluded_devices);
|
||||
num_excluded = get_upper_ifindexes(dev, excluded_devices,
|
||||
ARRAY_SIZE(excluded_devices) - 1);
|
||||
if (num_excluded < 0)
|
||||
return num_excluded;
|
||||
|
||||
excluded_devices[num_excluded++] = dev->ifindex;
|
||||
}
|
||||
|
||||
|
|
@ -1115,8 +1139,13 @@ static int dev_map_notification(struct notifier_block *notifier,
|
|||
if (!netdev->xdp_bulkq)
|
||||
return NOTIFY_BAD;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
per_cpu_ptr(netdev->xdp_bulkq, cpu)->dev = netdev;
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct xdp_dev_bulk_queue *bq;
|
||||
|
||||
bq = per_cpu_ptr(netdev->xdp_bulkq, cpu);
|
||||
bq->dev = netdev;
|
||||
local_lock_init(&bq->bq_lock);
|
||||
}
|
||||
break;
|
||||
case NETDEV_UNREGISTER:
|
||||
/* This rcu_read_lock/unlock pair is needed because
|
||||
|
|
|
|||
|
|
@ -125,6 +125,11 @@ struct htab_elem {
|
|||
char key[] __aligned(8);
|
||||
};
|
||||
|
||||
struct htab_btf_record {
|
||||
struct btf_record *record;
|
||||
u32 key_size;
|
||||
};
|
||||
|
||||
static inline bool htab_is_prealloc(const struct bpf_htab *htab)
|
||||
{
|
||||
return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
|
||||
|
|
@ -457,6 +462,83 @@ static int htab_map_alloc_check(union bpf_attr *attr)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void htab_mem_dtor(void *obj, void *ctx)
|
||||
{
|
||||
struct htab_btf_record *hrec = ctx;
|
||||
struct htab_elem *elem = obj;
|
||||
void *map_value;
|
||||
|
||||
if (IS_ERR_OR_NULL(hrec->record))
|
||||
return;
|
||||
|
||||
map_value = htab_elem_value(elem, hrec->key_size);
|
||||
bpf_obj_free_fields(hrec->record, map_value);
|
||||
}
|
||||
|
||||
static void htab_pcpu_mem_dtor(void *obj, void *ctx)
|
||||
{
|
||||
void __percpu *pptr = *(void __percpu **)obj;
|
||||
struct htab_btf_record *hrec = ctx;
|
||||
int cpu;
|
||||
|
||||
if (IS_ERR_OR_NULL(hrec->record))
|
||||
return;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
bpf_obj_free_fields(hrec->record, per_cpu_ptr(pptr, cpu));
|
||||
}
|
||||
|
||||
static void htab_dtor_ctx_free(void *ctx)
|
||||
{
|
||||
struct htab_btf_record *hrec = ctx;
|
||||
|
||||
btf_record_free(hrec->record);
|
||||
kfree(ctx);
|
||||
}
|
||||
|
||||
static int htab_set_dtor(struct bpf_htab *htab, void (*dtor)(void *, void *))
|
||||
{
|
||||
u32 key_size = htab->map.key_size;
|
||||
struct bpf_mem_alloc *ma;
|
||||
struct htab_btf_record *hrec;
|
||||
int err;
|
||||
|
||||
/* No need for dtors. */
|
||||
if (IS_ERR_OR_NULL(htab->map.record))
|
||||
return 0;
|
||||
|
||||
hrec = kzalloc(sizeof(*hrec), GFP_KERNEL);
|
||||
if (!hrec)
|
||||
return -ENOMEM;
|
||||
hrec->key_size = key_size;
|
||||
hrec->record = btf_record_dup(htab->map.record);
|
||||
if (IS_ERR(hrec->record)) {
|
||||
err = PTR_ERR(hrec->record);
|
||||
kfree(hrec);
|
||||
return err;
|
||||
}
|
||||
ma = htab_is_percpu(htab) ? &htab->pcpu_ma : &htab->ma;
|
||||
bpf_mem_alloc_set_dtor(ma, dtor, htab_dtor_ctx_free, hrec);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int htab_map_check_btf(struct bpf_map *map, const struct btf *btf,
|
||||
const struct btf_type *key_type, const struct btf_type *value_type)
|
||||
{
|
||||
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
|
||||
|
||||
if (htab_is_prealloc(htab))
|
||||
return 0;
|
||||
/*
|
||||
* We must set the dtor using this callback, as map's BTF record is not
|
||||
* populated in htab_map_alloc(), so it will always appear as NULL.
|
||||
*/
|
||||
if (htab_is_percpu(htab))
|
||||
return htab_set_dtor(htab, htab_pcpu_mem_dtor);
|
||||
else
|
||||
return htab_set_dtor(htab, htab_mem_dtor);
|
||||
}
|
||||
|
||||
static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
|
||||
|
|
@ -2281,6 +2363,7 @@ const struct bpf_map_ops htab_map_ops = {
|
|||
.map_seq_show_elem = htab_map_seq_show_elem,
|
||||
.map_set_for_each_callback_args = map_set_for_each_callback_args,
|
||||
.map_for_each_callback = bpf_for_each_hash_elem,
|
||||
.map_check_btf = htab_map_check_btf,
|
||||
.map_mem_usage = htab_map_mem_usage,
|
||||
BATCH_OPS(htab),
|
||||
.map_btf_id = &htab_map_btf_ids[0],
|
||||
|
|
@ -2303,6 +2386,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
|
|||
.map_seq_show_elem = htab_map_seq_show_elem,
|
||||
.map_set_for_each_callback_args = map_set_for_each_callback_args,
|
||||
.map_for_each_callback = bpf_for_each_hash_elem,
|
||||
.map_check_btf = htab_map_check_btf,
|
||||
.map_mem_usage = htab_map_mem_usage,
|
||||
BATCH_OPS(htab_lru),
|
||||
.map_btf_id = &htab_map_btf_ids[0],
|
||||
|
|
@ -2482,6 +2566,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
|
|||
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
|
||||
.map_set_for_each_callback_args = map_set_for_each_callback_args,
|
||||
.map_for_each_callback = bpf_for_each_hash_elem,
|
||||
.map_check_btf = htab_map_check_btf,
|
||||
.map_mem_usage = htab_map_mem_usage,
|
||||
BATCH_OPS(htab_percpu),
|
||||
.map_btf_id = &htab_map_btf_ids[0],
|
||||
|
|
@ -2502,6 +2587,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
|
|||
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
|
||||
.map_set_for_each_callback_args = map_set_for_each_callback_args,
|
||||
.map_for_each_callback = bpf_for_each_hash_elem,
|
||||
.map_check_btf = htab_map_check_btf,
|
||||
.map_mem_usage = htab_map_mem_usage,
|
||||
BATCH_OPS(htab_lru_percpu),
|
||||
.map_btf_id = &htab_map_btf_ids[0],
|
||||
|
|
|
|||
|
|
@ -364,7 +364,7 @@ static long cgroup_storage_delete_elem(struct bpf_map *map, void *key)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int cgroup_storage_check_btf(const struct bpf_map *map,
|
||||
static int cgroup_storage_check_btf(struct bpf_map *map,
|
||||
const struct btf *btf,
|
||||
const struct btf_type *key_type,
|
||||
const struct btf_type *value_type)
|
||||
|
|
|
|||
|
|
@ -751,7 +751,7 @@ free_stack:
|
|||
return err;
|
||||
}
|
||||
|
||||
static int trie_check_btf(const struct bpf_map *map,
|
||||
static int trie_check_btf(struct bpf_map *map,
|
||||
const struct btf *btf,
|
||||
const struct btf_type *key_type,
|
||||
const struct btf_type *value_type)
|
||||
|
|
|
|||
|
|
@ -102,6 +102,8 @@ struct bpf_mem_cache {
|
|||
int percpu_size;
|
||||
bool draining;
|
||||
struct bpf_mem_cache *tgt;
|
||||
void (*dtor)(void *obj, void *ctx);
|
||||
void *dtor_ctx;
|
||||
|
||||
/* list of objects to be freed after RCU GP */
|
||||
struct llist_head free_by_rcu;
|
||||
|
|
@ -260,12 +262,14 @@ static void free_one(void *obj, bool percpu)
|
|||
kfree(obj);
|
||||
}
|
||||
|
||||
static int free_all(struct llist_node *llnode, bool percpu)
|
||||
static int free_all(struct bpf_mem_cache *c, struct llist_node *llnode, bool percpu)
|
||||
{
|
||||
struct llist_node *pos, *t;
|
||||
int cnt = 0;
|
||||
|
||||
llist_for_each_safe(pos, t, llnode) {
|
||||
if (c->dtor)
|
||||
c->dtor((void *)pos + LLIST_NODE_SZ, c->dtor_ctx);
|
||||
free_one(pos, percpu);
|
||||
cnt++;
|
||||
}
|
||||
|
|
@ -276,7 +280,7 @@ static void __free_rcu(struct rcu_head *head)
|
|||
{
|
||||
struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu_ttrace);
|
||||
|
||||
free_all(llist_del_all(&c->waiting_for_gp_ttrace), !!c->percpu_size);
|
||||
free_all(c, llist_del_all(&c->waiting_for_gp_ttrace), !!c->percpu_size);
|
||||
atomic_set(&c->call_rcu_ttrace_in_progress, 0);
|
||||
}
|
||||
|
||||
|
|
@ -308,7 +312,7 @@ static void do_call_rcu_ttrace(struct bpf_mem_cache *c)
|
|||
if (atomic_xchg(&c->call_rcu_ttrace_in_progress, 1)) {
|
||||
if (unlikely(READ_ONCE(c->draining))) {
|
||||
llnode = llist_del_all(&c->free_by_rcu_ttrace);
|
||||
free_all(llnode, !!c->percpu_size);
|
||||
free_all(c, llnode, !!c->percpu_size);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
@ -417,7 +421,7 @@ static void check_free_by_rcu(struct bpf_mem_cache *c)
|
|||
dec_active(c, &flags);
|
||||
|
||||
if (unlikely(READ_ONCE(c->draining))) {
|
||||
free_all(llist_del_all(&c->waiting_for_gp), !!c->percpu_size);
|
||||
free_all(c, llist_del_all(&c->waiting_for_gp), !!c->percpu_size);
|
||||
atomic_set(&c->call_rcu_in_progress, 0);
|
||||
} else {
|
||||
call_rcu_hurry(&c->rcu, __free_by_rcu);
|
||||
|
|
@ -635,13 +639,13 @@ static void drain_mem_cache(struct bpf_mem_cache *c)
|
|||
* Except for waiting_for_gp_ttrace list, there are no concurrent operations
|
||||
* on these lists, so it is safe to use __llist_del_all().
|
||||
*/
|
||||
free_all(llist_del_all(&c->free_by_rcu_ttrace), percpu);
|
||||
free_all(llist_del_all(&c->waiting_for_gp_ttrace), percpu);
|
||||
free_all(__llist_del_all(&c->free_llist), percpu);
|
||||
free_all(__llist_del_all(&c->free_llist_extra), percpu);
|
||||
free_all(__llist_del_all(&c->free_by_rcu), percpu);
|
||||
free_all(__llist_del_all(&c->free_llist_extra_rcu), percpu);
|
||||
free_all(llist_del_all(&c->waiting_for_gp), percpu);
|
||||
free_all(c, llist_del_all(&c->free_by_rcu_ttrace), percpu);
|
||||
free_all(c, llist_del_all(&c->waiting_for_gp_ttrace), percpu);
|
||||
free_all(c, __llist_del_all(&c->free_llist), percpu);
|
||||
free_all(c, __llist_del_all(&c->free_llist_extra), percpu);
|
||||
free_all(c, __llist_del_all(&c->free_by_rcu), percpu);
|
||||
free_all(c, __llist_del_all(&c->free_llist_extra_rcu), percpu);
|
||||
free_all(c, llist_del_all(&c->waiting_for_gp), percpu);
|
||||
}
|
||||
|
||||
static void check_mem_cache(struct bpf_mem_cache *c)
|
||||
|
|
@ -680,6 +684,9 @@ static void check_leaked_objs(struct bpf_mem_alloc *ma)
|
|||
|
||||
static void free_mem_alloc_no_barrier(struct bpf_mem_alloc *ma)
|
||||
{
|
||||
/* We can free dtor ctx only once all callbacks are done using it. */
|
||||
if (ma->dtor_ctx_free)
|
||||
ma->dtor_ctx_free(ma->dtor_ctx);
|
||||
check_leaked_objs(ma);
|
||||
free_percpu(ma->cache);
|
||||
free_percpu(ma->caches);
|
||||
|
|
@ -1014,3 +1021,32 @@ int bpf_mem_alloc_check_size(bool percpu, size_t size)
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bpf_mem_alloc_set_dtor(struct bpf_mem_alloc *ma, void (*dtor)(void *obj, void *ctx),
|
||||
void (*dtor_ctx_free)(void *ctx), void *ctx)
|
||||
{
|
||||
struct bpf_mem_caches *cc;
|
||||
struct bpf_mem_cache *c;
|
||||
int cpu, i;
|
||||
|
||||
ma->dtor_ctx_free = dtor_ctx_free;
|
||||
ma->dtor_ctx = ctx;
|
||||
|
||||
if (ma->cache) {
|
||||
for_each_possible_cpu(cpu) {
|
||||
c = per_cpu_ptr(ma->cache, cpu);
|
||||
c->dtor = dtor;
|
||||
c->dtor_ctx = ctx;
|
||||
}
|
||||
}
|
||||
if (ma->caches) {
|
||||
for_each_possible_cpu(cpu) {
|
||||
cc = per_cpu_ptr(ma->caches, cpu);
|
||||
for (i = 0; i < NUM_CACHES; i++) {
|
||||
c = &cc->cache[i];
|
||||
c->dtor = dtor;
|
||||
c->dtor_ctx = ctx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1234,7 +1234,7 @@ int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_obj_name_cpy);
|
||||
|
||||
int map_check_no_btf(const struct bpf_map *map,
|
||||
int map_check_no_btf(struct bpf_map *map,
|
||||
const struct btf *btf,
|
||||
const struct btf_type *key_type,
|
||||
const struct btf_type *value_type)
|
||||
|
|
|
|||
|
|
@ -269,3 +269,59 @@ struct tnum tnum_bswap64(struct tnum a)
|
|||
{
|
||||
return TNUM(swab64(a.value), swab64(a.mask));
|
||||
}
|
||||
|
||||
/* Given tnum t, and a number z such that tmin <= z < tmax, where tmin
|
||||
* is the smallest member of the t (= t.value) and tmax is the largest
|
||||
* member of t (= t.value | t.mask), returns the smallest member of t
|
||||
* larger than z.
|
||||
*
|
||||
* For example,
|
||||
* t = x11100x0
|
||||
* z = 11110001 (241)
|
||||
* result = 11110010 (242)
|
||||
*
|
||||
* Note: if this function is called with z >= tmax, it just returns
|
||||
* early with tmax; if this function is called with z < tmin, the
|
||||
* algorithm already returns tmin.
|
||||
*/
|
||||
u64 tnum_step(struct tnum t, u64 z)
|
||||
{
|
||||
u64 tmax, j, p, q, r, s, v, u, w, res;
|
||||
u8 k;
|
||||
|
||||
tmax = t.value | t.mask;
|
||||
|
||||
/* if z >= largest member of t, return largest member of t */
|
||||
if (z >= tmax)
|
||||
return tmax;
|
||||
|
||||
/* if z < smallest member of t, return smallest member of t */
|
||||
if (z < t.value)
|
||||
return t.value;
|
||||
|
||||
/* keep t's known bits, and match all unknown bits to z */
|
||||
j = t.value | (z & t.mask);
|
||||
|
||||
if (j > z) {
|
||||
p = ~z & t.value & ~t.mask;
|
||||
k = fls64(p); /* k is the most-significant 0-to-1 flip */
|
||||
q = U64_MAX << k;
|
||||
r = q & z; /* positions > k matched to z */
|
||||
s = ~q & t.value; /* positions <= k matched to t.value */
|
||||
v = r | s;
|
||||
res = v;
|
||||
} else {
|
||||
p = z & ~t.value & ~t.mask;
|
||||
k = fls64(p); /* k is the most-significant 1-to-0 flip */
|
||||
q = U64_MAX << k;
|
||||
r = q & t.mask & z; /* unknown positions > k, matched to z */
|
||||
s = q & ~t.mask; /* known positions > k, set to 1 */
|
||||
v = r | s;
|
||||
/* add 1 to unknown positions > k to make value greater than z */
|
||||
u = v + (1ULL << k);
|
||||
/* extract bits in unknown positions > k from u, rest from t.value */
|
||||
w = (u & t.mask) | t.value;
|
||||
res = w;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2379,6 +2379,9 @@ static void __update_reg32_bounds(struct bpf_reg_state *reg)
|
|||
|
||||
static void __update_reg64_bounds(struct bpf_reg_state *reg)
|
||||
{
|
||||
u64 tnum_next, tmax;
|
||||
bool umin_in_tnum;
|
||||
|
||||
/* min signed is max(sign bit) | min(other bits) */
|
||||
reg->smin_value = max_t(s64, reg->smin_value,
|
||||
reg->var_off.value | (reg->var_off.mask & S64_MIN));
|
||||
|
|
@ -2388,6 +2391,33 @@ static void __update_reg64_bounds(struct bpf_reg_state *reg)
|
|||
reg->umin_value = max(reg->umin_value, reg->var_off.value);
|
||||
reg->umax_value = min(reg->umax_value,
|
||||
reg->var_off.value | reg->var_off.mask);
|
||||
|
||||
/* Check if u64 and tnum overlap in a single value */
|
||||
tnum_next = tnum_step(reg->var_off, reg->umin_value);
|
||||
umin_in_tnum = (reg->umin_value & ~reg->var_off.mask) == reg->var_off.value;
|
||||
tmax = reg->var_off.value | reg->var_off.mask;
|
||||
if (umin_in_tnum && tnum_next > reg->umax_value) {
|
||||
/* The u64 range and the tnum only overlap in umin.
|
||||
* u64: ---[xxxxxx]-----
|
||||
* tnum: --xx----------x-
|
||||
*/
|
||||
___mark_reg_known(reg, reg->umin_value);
|
||||
} else if (!umin_in_tnum && tnum_next == tmax) {
|
||||
/* The u64 range and the tnum only overlap in the maximum value
|
||||
* represented by the tnum, called tmax.
|
||||
* u64: ---[xxxxxx]-----
|
||||
* tnum: xx-----x--------
|
||||
*/
|
||||
___mark_reg_known(reg, tmax);
|
||||
} else if (!umin_in_tnum && tnum_next <= reg->umax_value &&
|
||||
tnum_step(reg->var_off, tnum_next) > reg->umax_value) {
|
||||
/* The u64 range and the tnum only overlap in between umin
|
||||
* (excluded) and umax.
|
||||
* u64: ---[xxxxxx]-----
|
||||
* tnum: xx----x-------x-
|
||||
*/
|
||||
___mark_reg_known(reg, tnum_next);
|
||||
}
|
||||
}
|
||||
|
||||
static void __update_reg_bounds(struct bpf_reg_state *reg)
|
||||
|
|
|
|||
|
|
@ -2454,8 +2454,10 @@ static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link,
|
|||
struct seq_file *seq)
|
||||
{
|
||||
struct bpf_kprobe_multi_link *kmulti_link;
|
||||
bool has_cookies;
|
||||
|
||||
kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
|
||||
has_cookies = !!kmulti_link->cookies;
|
||||
|
||||
seq_printf(seq,
|
||||
"kprobe_cnt:\t%u\n"
|
||||
|
|
@ -2467,7 +2469,7 @@ static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link,
|
|||
for (int i = 0; i < kmulti_link->cnt; i++) {
|
||||
seq_printf(seq,
|
||||
"%llu\t %pS\n",
|
||||
kmulti_link->cookies[i],
|
||||
has_cookies ? kmulti_link->cookies[i] : 0,
|
||||
(void *)kmulti_link->addrs[i]);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue