bpf-fixes

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEE+soXsSLHKoYyzcli6rmadz2vbToFAmmjmDIACgkQ6rmadz2v
 bTq3gg//QQLOT/FxP2/dDurliDTXvQRr1tUxmIw6s3P6hnz9j/LLEVKpLRVkqd8t
 XEwbubPd1TXDRsJ4f26Ew01YUtf9xi6ZQoMe/BL1okxi0ZwQGGRVMkiKOQgRT+rj
 qYSN5JMfPzA2AuM6FjBF/hhw24yVRdgKRYBam6D7XLfFf3s8TOhHHjJ925PqEo0t
 uJOy4ddDYB9BcGmfoeyiFgUtpPqcYrKIUCLBdwFvT2fnPJvrFFoCF3t7NS9UJu/O
 wd6ZPuGWSOl9A7vSheldP6cJUDX8L/5WEGO4/LjN7plkySF0HNv8uq/b1T3kKqoY
 Y3unXerLGJUAA9D5wpYAekx9YmvRTPQ/o39oTbquEB4SSJVU/SPUpvFw7m2Moq10
 51yuyXLcPlI3xtk0Bd8c/CESSmkRenjWzsuZQhDGhsR0I9mIaALrhf9LaatHtXI5
 f5ct73e+beK7Fc0Ze+b0JxDeFvzA3CKfAF0/fvGt0r9VZjBaMD+a3NnscBlyKztW
 UCXazcfndMhNfUUWanktbT5YhYPmY7hzVQEOl7HAMGn4yG6XbXXmzzY6BqEXIucM
 etueW2msZJHGBHQGe2RK3lxtmiB7/FglJHd86xebkIU2gCzqt8fGUha8AIuJ4rLS
 7wxC33DycCofRGWdseVu7PsTasdhSGsHKbXz2fOFOFESOczYRw8=
 =fj3P
 -----END PGP SIGNATURE-----

Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Pull bpf fixes from Alexei Starovoitov:

 - Fix alignment of arm64 JIT buffer to prevent atomic tearing (Fuad
   Tabba)

 - Fix invariant violation for single value tnums in the verifier
   (Harishankar Vishwanathan, Paul Chaignon)

 - Fix a bunch of issues found by ASAN in selftests/bpf (Ihor Solodrai)

 - Fix race in devmpa and cpumap on PREEMPT_RT (Jiayuan Chen)

 - Fix show_fdinfo of kprobe_multi when cookies are not present (Jiri
   Olsa)

 - Fix race in freeing special fields in BPF maps to prevent memory
   leaks (Kumar Kartikeya Dwivedi)

 - Fix OOB read in dmabuf_collector (T.J. Mercier)

* tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: (36 commits)
  selftests/bpf: Avoid simplification of crafted bounds test
  selftests/bpf: Test refinement of single-value tnum
  bpf: Improve bounds when tnum has a single possible value
  bpf: Introduce tnum_step to step through tnum's members
  bpf: Fix race in devmap on PREEMPT_RT
  bpf: Fix race in cpumap on PREEMPT_RT
  selftests/bpf: Add tests for special fields races
  bpf: Retire rcu_trace_implies_rcu_gp() from local storage
  bpf: Delay freeing fields in local storage
  bpf: Lose const-ness of map in map_check_btf()
  bpf: Register dtor for freeing special fields
  selftests/bpf: Fix OOB read in dmabuf_collector
  selftests/bpf: Fix a memory leak in xdp_flowtable test
  bpf: Fix stack-out-of-bounds write in devmap
  bpf: Fix kprobe_multi cookies access in show_fdinfo callback
  bpf, arm64: Force 8-byte alignment for JIT buffer to prevent atomic tearing
  selftests/bpf: Don't override SIGSEGV handler with ASAN
  selftests/bpf: Check BPFTOOL env var in detect_bpftool_path()
  selftests/bpf: Fix out-of-bounds array access bugs reported by ASAN
  selftests/bpf: Fix array bounds warning in jit_disasm_helpers
  ...
This commit is contained in:
Linus Torvalds 2026-02-28 19:54:28 -08:00
commit eb71ab2bf7
73 changed files with 1181 additions and 237 deletions

View file

@ -303,7 +303,7 @@ static long arena_map_update_elem(struct bpf_map *map, void *key,
return -EOPNOTSUPP;
}
static int arena_map_check_btf(const struct bpf_map *map, const struct btf *btf,
static int arena_map_check_btf(struct bpf_map *map, const struct btf *btf,
const struct btf_type *key_type, const struct btf_type *value_type)
{
return 0;

View file

@ -548,7 +548,7 @@ static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
rcu_read_unlock();
}
static int array_map_check_btf(const struct bpf_map *map,
static int array_map_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)

View file

@ -180,7 +180,7 @@ static long bloom_map_update_elem(struct bpf_map *map, void *key,
return -EINVAL;
}
static int bloom_map_check_btf(const struct bpf_map *map,
static int bloom_map_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)

View file

@ -98,7 +98,7 @@ static long insn_array_delete_elem(struct bpf_map *map, void *key)
return -EINVAL;
}
static int insn_array_check_btf(const struct bpf_map *map,
static int insn_array_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)

View file

@ -107,14 +107,12 @@ static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage *local_storage;
/* If RCU Tasks Trace grace period implies RCU grace period, do
* kfree(), else do kfree_rcu().
/*
* RCU Tasks Trace grace period implies RCU grace period, do
* kfree() directly.
*/
local_storage = container_of(rcu, struct bpf_local_storage, rcu);
if (rcu_trace_implies_rcu_gp())
kfree(local_storage);
else
kfree_rcu(local_storage, rcu);
kfree(local_storage);
}
/* Handle use_kmalloc_nolock == false */
@ -138,10 +136,11 @@ static void bpf_local_storage_free_rcu(struct rcu_head *rcu)
static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
{
if (rcu_trace_implies_rcu_gp())
bpf_local_storage_free_rcu(rcu);
else
call_rcu(rcu, bpf_local_storage_free_rcu);
/*
* RCU Tasks Trace grace period implies RCU grace period, do
* kfree() directly.
*/
bpf_local_storage_free_rcu(rcu);
}
static void bpf_local_storage_free(struct bpf_local_storage *local_storage,
@ -164,16 +163,29 @@ static void bpf_local_storage_free(struct bpf_local_storage *local_storage,
bpf_local_storage_free_trace_rcu);
}
/* rcu callback for use_kmalloc_nolock == false */
static void __bpf_selem_free_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage_elem *selem;
struct bpf_local_storage_map *smap;
selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
/* bpf_selem_unlink_nofail may have already cleared smap and freed fields. */
smap = rcu_dereference_check(SDATA(selem)->smap, 1);
if (smap)
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
kfree(selem);
}
/* rcu tasks trace callback for use_kmalloc_nolock == false */
static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage_elem *selem;
selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
if (rcu_trace_implies_rcu_gp())
kfree(selem);
else
kfree_rcu(selem, rcu);
/*
* RCU Tasks Trace grace period implies RCU grace period, do
* kfree() directly.
*/
__bpf_selem_free_rcu(rcu);
}
/* Handle use_kmalloc_nolock == false */
@ -181,7 +193,7 @@ static void __bpf_selem_free(struct bpf_local_storage_elem *selem,
bool vanilla_rcu)
{
if (vanilla_rcu)
kfree_rcu(selem, rcu);
call_rcu(&selem->rcu, __bpf_selem_free_rcu);
else
call_rcu_tasks_trace(&selem->rcu, __bpf_selem_free_trace_rcu);
}
@ -195,37 +207,29 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
/* The bpf_local_storage_map_free will wait for rcu_barrier */
smap = rcu_dereference_check(SDATA(selem)->smap, 1);
if (smap) {
migrate_disable();
if (smap)
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
migrate_enable();
}
kfree_nolock(selem);
}
static void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
{
if (rcu_trace_implies_rcu_gp())
bpf_selem_free_rcu(rcu);
else
call_rcu(rcu, bpf_selem_free_rcu);
/*
* RCU Tasks Trace grace period implies RCU grace period, do
* kfree() directly.
*/
bpf_selem_free_rcu(rcu);
}
void bpf_selem_free(struct bpf_local_storage_elem *selem,
bool reuse_now)
{
struct bpf_local_storage_map *smap;
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
if (!selem->use_kmalloc_nolock) {
/*
* No uptr will be unpin even when reuse_now == false since uptr
* is only supported in task local storage, where
* smap->use_kmalloc_nolock == true.
*/
if (smap)
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
__bpf_selem_free(selem, reuse_now);
return;
}
@ -797,7 +801,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
return 0;
}
int bpf_local_storage_map_check_btf(const struct bpf_map *map,
int bpf_local_storage_map_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)
@ -958,10 +962,9 @@ restart:
*/
synchronize_rcu();
if (smap->use_kmalloc_nolock) {
rcu_barrier_tasks_trace();
rcu_barrier();
}
/* smap remains in use regardless of kmalloc_nolock, so wait unconditionally. */
rcu_barrier_tasks_trace();
rcu_barrier();
kvfree(smap->buckets);
bpf_map_area_free(smap);
}

View file

@ -29,6 +29,7 @@
#include <linux/sched.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <linux/local_lock.h>
#include <linux/completion.h>
#include <trace/events/xdp.h>
#include <linux/btf_ids.h>
@ -52,6 +53,7 @@ struct xdp_bulk_queue {
struct list_head flush_node;
struct bpf_cpu_map_entry *obj;
unsigned int count;
local_lock_t bq_lock;
};
/* Struct for every remote "destination" CPU in map */
@ -451,6 +453,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
for_each_possible_cpu(i) {
bq = per_cpu_ptr(rcpu->bulkq, i);
bq->obj = rcpu;
local_lock_init(&bq->bq_lock);
}
/* Alloc queue */
@ -722,6 +725,8 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq)
struct ptr_ring *q;
int i;
lockdep_assert_held(&bq->bq_lock);
if (unlikely(!bq->count))
return;
@ -749,11 +754,15 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq)
}
/* Runs under RCU-read-side, plus in softirq under NAPI protection.
* Thus, safe percpu variable access.
* Thus, safe percpu variable access. PREEMPT_RT relies on
* local_lock_nested_bh() to serialise access to the per-CPU bq.
*/
static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
{
struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
struct xdp_bulk_queue *bq;
local_lock_nested_bh(&rcpu->bulkq->bq_lock);
bq = this_cpu_ptr(rcpu->bulkq);
if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
bq_flush_to_queue(bq);
@ -774,6 +783,8 @@ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
list_add(&bq->flush_node, flush_list);
}
local_unlock_nested_bh(&rcpu->bulkq->bq_lock);
}
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf,
@ -810,7 +821,9 @@ void __cpu_map_flush(struct list_head *flush_list)
struct xdp_bulk_queue *bq, *tmp;
list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
local_lock_nested_bh(&bq->obj->bulkq->bq_lock);
bq_flush_to_queue(bq);
local_unlock_nested_bh(&bq->obj->bulkq->bq_lock);
/* If already running, costs spin_lock_irqsave + smb_mb */
wake_up_process(bq->obj->kthread);

View file

@ -45,6 +45,7 @@
* types of devmap; only the lookup and insertion is different.
*/
#include <linux/bpf.h>
#include <linux/local_lock.h>
#include <net/xdp.h>
#include <linux/filter.h>
#include <trace/events/xdp.h>
@ -60,6 +61,7 @@ struct xdp_dev_bulk_queue {
struct net_device *dev_rx;
struct bpf_prog *xdp_prog;
unsigned int count;
local_lock_t bq_lock;
};
struct bpf_dtab_netdev {
@ -381,6 +383,8 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
int to_send = cnt;
int i;
lockdep_assert_held(&bq->bq_lock);
if (unlikely(!cnt))
return;
@ -425,10 +429,12 @@ void __dev_flush(struct list_head *flush_list)
struct xdp_dev_bulk_queue *bq, *tmp;
list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
local_lock_nested_bh(&bq->dev->xdp_bulkq->bq_lock);
bq_xmit_all(bq, XDP_XMIT_FLUSH);
bq->dev_rx = NULL;
bq->xdp_prog = NULL;
__list_del_clearprev(&bq->flush_node);
local_unlock_nested_bh(&bq->dev->xdp_bulkq->bq_lock);
}
}
@ -451,12 +457,16 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
/* Runs in NAPI, i.e., softirq under local_bh_disable(). Thus, safe percpu
* variable access, and map elements stick around. See comment above
* xdp_do_flush() in filter.c.
* xdp_do_flush() in filter.c. PREEMPT_RT relies on local_lock_nested_bh()
* to serialise access to the per-CPU bq.
*/
static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
struct net_device *dev_rx, struct bpf_prog *xdp_prog)
{
struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
struct xdp_dev_bulk_queue *bq;
local_lock_nested_bh(&dev->xdp_bulkq->bq_lock);
bq = this_cpu_ptr(dev->xdp_bulkq);
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
bq_xmit_all(bq, 0);
@ -477,6 +487,8 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
}
bq->q[bq->count++] = xdpf;
local_unlock_nested_bh(&dev->xdp_bulkq->bq_lock);
}
static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
@ -588,18 +600,22 @@ static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifin
}
/* Get ifindex of each upper device. 'indexes' must be able to hold at
* least MAX_NEST_DEV elements.
* Returns the number of ifindexes added.
* least 'max' elements.
* Returns the number of ifindexes added, or -EOVERFLOW if there are too
* many upper devices.
*/
static int get_upper_ifindexes(struct net_device *dev, int *indexes)
static int get_upper_ifindexes(struct net_device *dev, int *indexes, int max)
{
struct net_device *upper;
struct list_head *iter;
int n = 0;
netdev_for_each_upper_dev_rcu(dev, upper, iter) {
if (n >= max)
return -EOVERFLOW;
indexes[n++] = upper->ifindex;
}
return n;
}
@ -615,7 +631,11 @@ int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
int err;
if (exclude_ingress) {
num_excluded = get_upper_ifindexes(dev_rx, excluded_devices);
num_excluded = get_upper_ifindexes(dev_rx, excluded_devices,
ARRAY_SIZE(excluded_devices) - 1);
if (num_excluded < 0)
return num_excluded;
excluded_devices[num_excluded++] = dev_rx->ifindex;
}
@ -733,7 +753,11 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
int err;
if (exclude_ingress) {
num_excluded = get_upper_ifindexes(dev, excluded_devices);
num_excluded = get_upper_ifindexes(dev, excluded_devices,
ARRAY_SIZE(excluded_devices) - 1);
if (num_excluded < 0)
return num_excluded;
excluded_devices[num_excluded++] = dev->ifindex;
}
@ -1115,8 +1139,13 @@ static int dev_map_notification(struct notifier_block *notifier,
if (!netdev->xdp_bulkq)
return NOTIFY_BAD;
for_each_possible_cpu(cpu)
per_cpu_ptr(netdev->xdp_bulkq, cpu)->dev = netdev;
for_each_possible_cpu(cpu) {
struct xdp_dev_bulk_queue *bq;
bq = per_cpu_ptr(netdev->xdp_bulkq, cpu);
bq->dev = netdev;
local_lock_init(&bq->bq_lock);
}
break;
case NETDEV_UNREGISTER:
/* This rcu_read_lock/unlock pair is needed because

View file

@ -125,6 +125,11 @@ struct htab_elem {
char key[] __aligned(8);
};
struct htab_btf_record {
struct btf_record *record;
u32 key_size;
};
static inline bool htab_is_prealloc(const struct bpf_htab *htab)
{
return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
@ -457,6 +462,83 @@ static int htab_map_alloc_check(union bpf_attr *attr)
return 0;
}
static void htab_mem_dtor(void *obj, void *ctx)
{
struct htab_btf_record *hrec = ctx;
struct htab_elem *elem = obj;
void *map_value;
if (IS_ERR_OR_NULL(hrec->record))
return;
map_value = htab_elem_value(elem, hrec->key_size);
bpf_obj_free_fields(hrec->record, map_value);
}
static void htab_pcpu_mem_dtor(void *obj, void *ctx)
{
void __percpu *pptr = *(void __percpu **)obj;
struct htab_btf_record *hrec = ctx;
int cpu;
if (IS_ERR_OR_NULL(hrec->record))
return;
for_each_possible_cpu(cpu)
bpf_obj_free_fields(hrec->record, per_cpu_ptr(pptr, cpu));
}
static void htab_dtor_ctx_free(void *ctx)
{
struct htab_btf_record *hrec = ctx;
btf_record_free(hrec->record);
kfree(ctx);
}
static int htab_set_dtor(struct bpf_htab *htab, void (*dtor)(void *, void *))
{
u32 key_size = htab->map.key_size;
struct bpf_mem_alloc *ma;
struct htab_btf_record *hrec;
int err;
/* No need for dtors. */
if (IS_ERR_OR_NULL(htab->map.record))
return 0;
hrec = kzalloc(sizeof(*hrec), GFP_KERNEL);
if (!hrec)
return -ENOMEM;
hrec->key_size = key_size;
hrec->record = btf_record_dup(htab->map.record);
if (IS_ERR(hrec->record)) {
err = PTR_ERR(hrec->record);
kfree(hrec);
return err;
}
ma = htab_is_percpu(htab) ? &htab->pcpu_ma : &htab->ma;
bpf_mem_alloc_set_dtor(ma, dtor, htab_dtor_ctx_free, hrec);
return 0;
}
static int htab_map_check_btf(struct bpf_map *map, const struct btf *btf,
const struct btf_type *key_type, const struct btf_type *value_type)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
if (htab_is_prealloc(htab))
return 0;
/*
* We must set the dtor using this callback, as map's BTF record is not
* populated in htab_map_alloc(), so it will always appear as NULL.
*/
if (htab_is_percpu(htab))
return htab_set_dtor(htab, htab_pcpu_mem_dtor);
else
return htab_set_dtor(htab, htab_mem_dtor);
}
static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
{
bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
@ -2281,6 +2363,7 @@ const struct bpf_map_ops htab_map_ops = {
.map_seq_show_elem = htab_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
.map_check_btf = htab_map_check_btf,
.map_mem_usage = htab_map_mem_usage,
BATCH_OPS(htab),
.map_btf_id = &htab_map_btf_ids[0],
@ -2303,6 +2386,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
.map_seq_show_elem = htab_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
.map_check_btf = htab_map_check_btf,
.map_mem_usage = htab_map_mem_usage,
BATCH_OPS(htab_lru),
.map_btf_id = &htab_map_btf_ids[0],
@ -2482,6 +2566,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
.map_check_btf = htab_map_check_btf,
.map_mem_usage = htab_map_mem_usage,
BATCH_OPS(htab_percpu),
.map_btf_id = &htab_map_btf_ids[0],
@ -2502,6 +2587,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
.map_check_btf = htab_map_check_btf,
.map_mem_usage = htab_map_mem_usage,
BATCH_OPS(htab_lru_percpu),
.map_btf_id = &htab_map_btf_ids[0],

View file

@ -364,7 +364,7 @@ static long cgroup_storage_delete_elem(struct bpf_map *map, void *key)
return -EINVAL;
}
static int cgroup_storage_check_btf(const struct bpf_map *map,
static int cgroup_storage_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)

View file

@ -751,7 +751,7 @@ free_stack:
return err;
}
static int trie_check_btf(const struct bpf_map *map,
static int trie_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)

View file

@ -102,6 +102,8 @@ struct bpf_mem_cache {
int percpu_size;
bool draining;
struct bpf_mem_cache *tgt;
void (*dtor)(void *obj, void *ctx);
void *dtor_ctx;
/* list of objects to be freed after RCU GP */
struct llist_head free_by_rcu;
@ -260,12 +262,14 @@ static void free_one(void *obj, bool percpu)
kfree(obj);
}
static int free_all(struct llist_node *llnode, bool percpu)
static int free_all(struct bpf_mem_cache *c, struct llist_node *llnode, bool percpu)
{
struct llist_node *pos, *t;
int cnt = 0;
llist_for_each_safe(pos, t, llnode) {
if (c->dtor)
c->dtor((void *)pos + LLIST_NODE_SZ, c->dtor_ctx);
free_one(pos, percpu);
cnt++;
}
@ -276,7 +280,7 @@ static void __free_rcu(struct rcu_head *head)
{
struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu_ttrace);
free_all(llist_del_all(&c->waiting_for_gp_ttrace), !!c->percpu_size);
free_all(c, llist_del_all(&c->waiting_for_gp_ttrace), !!c->percpu_size);
atomic_set(&c->call_rcu_ttrace_in_progress, 0);
}
@ -308,7 +312,7 @@ static void do_call_rcu_ttrace(struct bpf_mem_cache *c)
if (atomic_xchg(&c->call_rcu_ttrace_in_progress, 1)) {
if (unlikely(READ_ONCE(c->draining))) {
llnode = llist_del_all(&c->free_by_rcu_ttrace);
free_all(llnode, !!c->percpu_size);
free_all(c, llnode, !!c->percpu_size);
}
return;
}
@ -417,7 +421,7 @@ static void check_free_by_rcu(struct bpf_mem_cache *c)
dec_active(c, &flags);
if (unlikely(READ_ONCE(c->draining))) {
free_all(llist_del_all(&c->waiting_for_gp), !!c->percpu_size);
free_all(c, llist_del_all(&c->waiting_for_gp), !!c->percpu_size);
atomic_set(&c->call_rcu_in_progress, 0);
} else {
call_rcu_hurry(&c->rcu, __free_by_rcu);
@ -635,13 +639,13 @@ static void drain_mem_cache(struct bpf_mem_cache *c)
* Except for waiting_for_gp_ttrace list, there are no concurrent operations
* on these lists, so it is safe to use __llist_del_all().
*/
free_all(llist_del_all(&c->free_by_rcu_ttrace), percpu);
free_all(llist_del_all(&c->waiting_for_gp_ttrace), percpu);
free_all(__llist_del_all(&c->free_llist), percpu);
free_all(__llist_del_all(&c->free_llist_extra), percpu);
free_all(__llist_del_all(&c->free_by_rcu), percpu);
free_all(__llist_del_all(&c->free_llist_extra_rcu), percpu);
free_all(llist_del_all(&c->waiting_for_gp), percpu);
free_all(c, llist_del_all(&c->free_by_rcu_ttrace), percpu);
free_all(c, llist_del_all(&c->waiting_for_gp_ttrace), percpu);
free_all(c, __llist_del_all(&c->free_llist), percpu);
free_all(c, __llist_del_all(&c->free_llist_extra), percpu);
free_all(c, __llist_del_all(&c->free_by_rcu), percpu);
free_all(c, __llist_del_all(&c->free_llist_extra_rcu), percpu);
free_all(c, llist_del_all(&c->waiting_for_gp), percpu);
}
static void check_mem_cache(struct bpf_mem_cache *c)
@ -680,6 +684,9 @@ static void check_leaked_objs(struct bpf_mem_alloc *ma)
static void free_mem_alloc_no_barrier(struct bpf_mem_alloc *ma)
{
/* We can free dtor ctx only once all callbacks are done using it. */
if (ma->dtor_ctx_free)
ma->dtor_ctx_free(ma->dtor_ctx);
check_leaked_objs(ma);
free_percpu(ma->cache);
free_percpu(ma->caches);
@ -1014,3 +1021,32 @@ int bpf_mem_alloc_check_size(bool percpu, size_t size)
return 0;
}
void bpf_mem_alloc_set_dtor(struct bpf_mem_alloc *ma, void (*dtor)(void *obj, void *ctx),
void (*dtor_ctx_free)(void *ctx), void *ctx)
{
struct bpf_mem_caches *cc;
struct bpf_mem_cache *c;
int cpu, i;
ma->dtor_ctx_free = dtor_ctx_free;
ma->dtor_ctx = ctx;
if (ma->cache) {
for_each_possible_cpu(cpu) {
c = per_cpu_ptr(ma->cache, cpu);
c->dtor = dtor;
c->dtor_ctx = ctx;
}
}
if (ma->caches) {
for_each_possible_cpu(cpu) {
cc = per_cpu_ptr(ma->caches, cpu);
for (i = 0; i < NUM_CACHES; i++) {
c = &cc->cache[i];
c->dtor = dtor;
c->dtor_ctx = ctx;
}
}
}
}

View file

@ -1234,7 +1234,7 @@ int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)
}
EXPORT_SYMBOL_GPL(bpf_obj_name_cpy);
int map_check_no_btf(const struct bpf_map *map,
int map_check_no_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)

View file

@ -269,3 +269,59 @@ struct tnum tnum_bswap64(struct tnum a)
{
return TNUM(swab64(a.value), swab64(a.mask));
}
/* Given tnum t, and a number z such that tmin <= z < tmax, where tmin
* is the smallest member of the t (= t.value) and tmax is the largest
* member of t (= t.value | t.mask), returns the smallest member of t
* larger than z.
*
* For example,
* t = x11100x0
* z = 11110001 (241)
* result = 11110010 (242)
*
* Note: if this function is called with z >= tmax, it just returns
* early with tmax; if this function is called with z < tmin, the
* algorithm already returns tmin.
*/
u64 tnum_step(struct tnum t, u64 z)
{
u64 tmax, j, p, q, r, s, v, u, w, res;
u8 k;
tmax = t.value | t.mask;
/* if z >= largest member of t, return largest member of t */
if (z >= tmax)
return tmax;
/* if z < smallest member of t, return smallest member of t */
if (z < t.value)
return t.value;
/* keep t's known bits, and match all unknown bits to z */
j = t.value | (z & t.mask);
if (j > z) {
p = ~z & t.value & ~t.mask;
k = fls64(p); /* k is the most-significant 0-to-1 flip */
q = U64_MAX << k;
r = q & z; /* positions > k matched to z */
s = ~q & t.value; /* positions <= k matched to t.value */
v = r | s;
res = v;
} else {
p = z & ~t.value & ~t.mask;
k = fls64(p); /* k is the most-significant 1-to-0 flip */
q = U64_MAX << k;
r = q & t.mask & z; /* unknown positions > k, matched to z */
s = q & ~t.mask; /* known positions > k, set to 1 */
v = r | s;
/* add 1 to unknown positions > k to make value greater than z */
u = v + (1ULL << k);
/* extract bits in unknown positions > k from u, rest from t.value */
w = (u & t.mask) | t.value;
res = w;
}
return res;
}

View file

@ -2379,6 +2379,9 @@ static void __update_reg32_bounds(struct bpf_reg_state *reg)
static void __update_reg64_bounds(struct bpf_reg_state *reg)
{
u64 tnum_next, tmax;
bool umin_in_tnum;
/* min signed is max(sign bit) | min(other bits) */
reg->smin_value = max_t(s64, reg->smin_value,
reg->var_off.value | (reg->var_off.mask & S64_MIN));
@ -2388,6 +2391,33 @@ static void __update_reg64_bounds(struct bpf_reg_state *reg)
reg->umin_value = max(reg->umin_value, reg->var_off.value);
reg->umax_value = min(reg->umax_value,
reg->var_off.value | reg->var_off.mask);
/* Check if u64 and tnum overlap in a single value */
tnum_next = tnum_step(reg->var_off, reg->umin_value);
umin_in_tnum = (reg->umin_value & ~reg->var_off.mask) == reg->var_off.value;
tmax = reg->var_off.value | reg->var_off.mask;
if (umin_in_tnum && tnum_next > reg->umax_value) {
/* The u64 range and the tnum only overlap in umin.
* u64: ---[xxxxxx]-----
* tnum: --xx----------x-
*/
___mark_reg_known(reg, reg->umin_value);
} else if (!umin_in_tnum && tnum_next == tmax) {
/* The u64 range and the tnum only overlap in the maximum value
* represented by the tnum, called tmax.
* u64: ---[xxxxxx]-----
* tnum: xx-----x--------
*/
___mark_reg_known(reg, tmax);
} else if (!umin_in_tnum && tnum_next <= reg->umax_value &&
tnum_step(reg->var_off, tnum_next) > reg->umax_value) {
/* The u64 range and the tnum only overlap in between umin
* (excluded) and umax.
* u64: ---[xxxxxx]-----
* tnum: xx----x-------x-
*/
___mark_reg_known(reg, tnum_next);
}
}
static void __update_reg_bounds(struct bpf_reg_state *reg)

View file

@ -2454,8 +2454,10 @@ static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link,
struct seq_file *seq)
{
struct bpf_kprobe_multi_link *kmulti_link;
bool has_cookies;
kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
has_cookies = !!kmulti_link->cookies;
seq_printf(seq,
"kprobe_cnt:\t%u\n"
@ -2467,7 +2469,7 @@ static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link,
for (int i = 0; i < kmulti_link->cnt; i++) {
seq_printf(seq,
"%llu\t %pS\n",
kmulti_link->cookies[i],
has_cookies ? kmulti_link->cookies[i] : 0,
(void *)kmulti_link->addrs[i]);
}
}