slab updates for 7.0 part2

-----BEGIN PGP SIGNATURE-----
 
 iQFPBAABCAA5FiEEe7vIQRWZI0iWSE3xu+CwddJFiJoFAmmTRqgbFIAAAAAABAAO
 bWFudTIsMi41KzEuMTEsMiwyAAoJELvgsHXSRYiaUboIAIQRGZNZLzAD04PpEwDe
 LP3g1iI6DytfzHkcqkf+cV1OHpsKZjKUDY8xw42L3ztktzD83W6ypSzQBz1opnUx
 5w7N8EoE/GtY+pbOgBwGi7rvwg2i0+IkCdt9R8VpKa5fmwcgWcIpNtp0XRdWjWTb
 pn04sRTHiNHlMZxdVHVAmlxgcC/8SNBHi4w5KJtDUrq+bkZUS3XAN2ssU3oKBpMy
 OxhZw7BwfIO7PbBLFTrGQNPjfDU6IL7q8p7T6JcLyugPmqbvzAk07fDOs6GBFPBt
 jc1wZvC8h32y7WnWqA4rU+g06jXb088B71IywpxzUSIyPs0rfGy/eEtdEOBWrqIT
 5o8=
 =dulw
 -----END PGP SIGNATURE-----

Merge tag 'slab-for-7.0-part2' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab

Pull more slab updates from Vlastimil Babka:

 - Two stable fixes for kmalloc_nolock() usage from NMI context (Harry
   Yoo)

 - Allow kmalloc_nolock() allocations to be freed with kfree() and thus
   also kfree_rcu() and simplify slabobj_ext handling - we no longer
   need to track how it was allocated to use the matching freeing
   function (Harry Yoo)

* tag 'slab-for-7.0-part2' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab:
  mm/slab: drop the OBJEXTS_NOSPIN_ALLOC flag from enum objext_flags
  mm/slab: allow freeing kmalloc_nolock()'d objects using kfree[_rcu]()
  mm/slab: use prandom if !allow_spin
  mm/slab: do not access current->mems_allowed_seq if !allow_spin
This commit is contained in:
Linus Torvalds 2026-02-16 13:41:38 -08:00
commit 9702969978
4 changed files with 76 additions and 33 deletions

View file

@ -359,8 +359,7 @@ enum objext_flags {
* MEMCG_DATA_OBJEXTS. * MEMCG_DATA_OBJEXTS.
*/ */
OBJEXTS_ALLOC_FAIL = __OBJEXTS_ALLOC_FAIL, OBJEXTS_ALLOC_FAIL = __OBJEXTS_ALLOC_FAIL,
/* slabobj_ext vector allocated with kmalloc_nolock() */ __OBJEXTS_FLAG_UNUSED = __FIRST_OBJEXT_FLAG,
OBJEXTS_NOSPIN_ALLOC = __FIRST_OBJEXT_FLAG,
/* the next bit after the last actual flag */ /* the next bit after the last actual flag */
__NR_OBJEXTS_FLAGS = (__FIRST_OBJEXT_FLAG << 1), __NR_OBJEXTS_FLAGS = (__FIRST_OBJEXT_FLAG << 1),
}; };

View file

@ -1074,8 +1074,8 @@ static inline void rcu_read_unlock_migrate(void)
* either fall back to use of call_rcu() or rearrange the structure to * either fall back to use of call_rcu() or rearrange the structure to
* position the rcu_head structure into the first 4096 bytes. * position the rcu_head structure into the first 4096 bytes.
* *
* The object to be freed can be allocated either by kmalloc() or * The object to be freed can be allocated either by kmalloc(),
* kmem_cache_alloc(). * kmalloc_nolock(), or kmem_cache_alloc().
* *
* Note that the allowable offset might decrease in the future. * Note that the allowable offset might decrease in the future.
* *

View file

@ -837,13 +837,12 @@ static void delete_object_full(unsigned long ptr, unsigned int objflags)
struct kmemleak_object *object; struct kmemleak_object *object;
object = find_and_remove_object(ptr, 0, objflags); object = find_and_remove_object(ptr, 0, objflags);
if (!object) { if (!object)
#ifdef DEBUG /*
kmemleak_warn("Freeing unknown object at 0x%08lx\n", * kmalloc_nolock() -> kfree() calls kmemleak_free()
ptr); * without kmemleak_alloc().
#endif */
return; return;
}
__delete_object(object); __delete_object(object);
} }
@ -926,13 +925,12 @@ static void paint_ptr(unsigned long ptr, int color, unsigned int objflags)
struct kmemleak_object *object; struct kmemleak_object *object;
object = __find_and_get_object(ptr, 0, objflags); object = __find_and_get_object(ptr, 0, objflags);
if (!object) { if (!object)
kmemleak_warn("Trying to color unknown object at 0x%08lx as %s\n", /*
ptr, * kmalloc_nolock() -> kfree_rcu() calls kmemleak_ignore()
(color == KMEMLEAK_GREY) ? "Grey" : * without kmemleak_alloc().
(color == KMEMLEAK_BLACK) ? "Black" : "Unknown"); */
return; return;
}
paint_it(object, color); paint_it(object, color);
put_object(object); put_object(object);
} }

View file

@ -43,6 +43,7 @@
#include <linux/prefetch.h> #include <linux/prefetch.h>
#include <linux/memcontrol.h> #include <linux/memcontrol.h>
#include <linux/random.h> #include <linux/random.h>
#include <linux/prandom.h>
#include <kunit/test.h> #include <kunit/test.h>
#include <kunit/test-bug.h> #include <kunit/test-bug.h>
#include <linux/sort.h> #include <linux/sort.h>
@ -2189,8 +2190,6 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
virt_to_slab(vec)->slab_cache == s); virt_to_slab(vec)->slab_cache == s);
new_exts = (unsigned long)vec; new_exts = (unsigned long)vec;
if (unlikely(!allow_spin))
new_exts |= OBJEXTS_NOSPIN_ALLOC;
#ifdef CONFIG_MEMCG #ifdef CONFIG_MEMCG
new_exts |= MEMCG_DATA_OBJEXTS; new_exts |= MEMCG_DATA_OBJEXTS;
#endif #endif
@ -2228,7 +2227,7 @@ retry:
return 0; return 0;
} }
static inline void free_slab_obj_exts(struct slab *slab) static inline void free_slab_obj_exts(struct slab *slab, bool allow_spin)
{ {
struct slabobj_ext *obj_exts; struct slabobj_ext *obj_exts;
@ -2256,10 +2255,10 @@ static inline void free_slab_obj_exts(struct slab *slab)
* the extension for obj_exts is expected to be NULL. * the extension for obj_exts is expected to be NULL.
*/ */
mark_objexts_empty(obj_exts); mark_objexts_empty(obj_exts);
if (unlikely(READ_ONCE(slab->obj_exts) & OBJEXTS_NOSPIN_ALLOC)) if (allow_spin)
kfree_nolock(obj_exts);
else
kfree(obj_exts); kfree(obj_exts);
else
kfree_nolock(obj_exts);
slab->obj_exts = 0; slab->obj_exts = 0;
} }
@ -2323,7 +2322,7 @@ static int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
return 0; return 0;
} }
static inline void free_slab_obj_exts(struct slab *slab) static inline void free_slab_obj_exts(struct slab *slab, bool allow_spin)
{ {
} }
@ -2584,6 +2583,24 @@ struct rcu_delayed_free {
* Returns true if freeing of the object can proceed, false if its reuse * Returns true if freeing of the object can proceed, false if its reuse
* was delayed by CONFIG_SLUB_RCU_DEBUG or KASAN quarantine, or it was returned * was delayed by CONFIG_SLUB_RCU_DEBUG or KASAN quarantine, or it was returned
* to KFENCE. * to KFENCE.
*
* For objects allocated via kmalloc_nolock(), only a subset of alloc hooks
* are invoked, so some free hooks must handle asymmetric hook calls.
*
* Alloc hooks called for kmalloc_nolock():
* - kmsan_slab_alloc()
* - kasan_slab_alloc()
* - memcg_slab_post_alloc_hook()
* - alloc_tagging_slab_alloc_hook()
*
* Free hooks that must handle missing corresponding alloc hooks:
* - kmemleak_free_recursive()
* - kfence_free()
*
* Free hooks that have no alloc hook counterpart, and thus safe to call:
* - debug_check_no_locks_freed()
* - debug_check_no_obj_freed()
* - __kcsan_check_access()
*/ */
static __always_inline static __always_inline
bool slab_free_hook(struct kmem_cache *s, void *x, bool init, bool slab_free_hook(struct kmem_cache *s, void *x, bool init,
@ -3311,8 +3328,11 @@ static void *next_freelist_entry(struct kmem_cache *s,
return (char *)start + idx; return (char *)start + idx;
} }
static DEFINE_PER_CPU(struct rnd_state, slab_rnd_state);
/* Shuffle the single linked freelist based on a random pre-computed sequence */ /* Shuffle the single linked freelist based on a random pre-computed sequence */
static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab,
bool allow_spin)
{ {
void *start; void *start;
void *cur; void *cur;
@ -3323,7 +3343,19 @@ static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
return false; return false;
freelist_count = oo_objects(s->oo); freelist_count = oo_objects(s->oo);
pos = get_random_u32_below(freelist_count); if (allow_spin) {
pos = get_random_u32_below(freelist_count);
} else {
struct rnd_state *state;
/*
* An interrupt or NMI handler might interrupt and change
* the state in the middle, but that's safe.
*/
state = &get_cpu_var(slab_rnd_state);
pos = prandom_u32_state(state) % freelist_count;
put_cpu_var(slab_rnd_state);
}
page_limit = slab->objects * s->size; page_limit = slab->objects * s->size;
start = fixup_red_left(s, slab_address(slab)); start = fixup_red_left(s, slab_address(slab));
@ -3350,7 +3382,8 @@ static inline int init_cache_random_seq(struct kmem_cache *s)
return 0; return 0;
} }
static inline void init_freelist_randomization(void) { } static inline void init_freelist_randomization(void) { }
static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab,
bool allow_spin)
{ {
return false; return false;
} }
@ -3369,14 +3402,14 @@ static __always_inline void account_slab(struct slab *slab, int order,
} }
static __always_inline void unaccount_slab(struct slab *slab, int order, static __always_inline void unaccount_slab(struct slab *slab, int order,
struct kmem_cache *s) struct kmem_cache *s, bool allow_spin)
{ {
/* /*
* The slab object extensions should now be freed regardless of * The slab object extensions should now be freed regardless of
* whether mem_alloc_profiling_enabled() or not because profiling * whether mem_alloc_profiling_enabled() or not because profiling
* might have been disabled after slab->obj_exts got allocated. * might have been disabled after slab->obj_exts got allocated.
*/ */
free_slab_obj_exts(slab); free_slab_obj_exts(slab, allow_spin);
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
-(PAGE_SIZE << order)); -(PAGE_SIZE << order));
@ -3441,7 +3474,7 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
alloc_slab_obj_exts_early(s, slab); alloc_slab_obj_exts_early(s, slab);
account_slab(slab, oo_order(oo), s, flags); account_slab(slab, oo_order(oo), s, flags);
shuffle = shuffle_freelist(s, slab); shuffle = shuffle_freelist(s, slab, allow_spin);
if (!shuffle) { if (!shuffle) {
start = fixup_red_left(s, start); start = fixup_red_left(s, start);
@ -3480,7 +3513,7 @@ static void __free_slab(struct kmem_cache *s, struct slab *slab, bool allow_spin
page->mapping = NULL; page->mapping = NULL;
__ClearPageSlab(page); __ClearPageSlab(page);
mm_account_reclaimed_pages(pages); mm_account_reclaimed_pages(pages);
unaccount_slab(slab, order, s); unaccount_slab(slab, order, s, allow_spin);
if (allow_spin) if (allow_spin)
free_frozen_pages(page, order); free_frozen_pages(page, order);
else else
@ -3791,6 +3824,7 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context *
struct zone *zone; struct zone *zone;
enum zone_type highest_zoneidx = gfp_zone(pc->flags); enum zone_type highest_zoneidx = gfp_zone(pc->flags);
unsigned int cpuset_mems_cookie; unsigned int cpuset_mems_cookie;
bool allow_spin = gfpflags_allow_spinning(pc->flags);
/* /*
* The defrag ratio allows a configuration of the tradeoffs between * The defrag ratio allows a configuration of the tradeoffs between
@ -3815,7 +3849,15 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context *
return NULL; return NULL;
do { do {
cpuset_mems_cookie = read_mems_allowed_begin(); /*
* read_mems_allowed_begin() accesses current->mems_allowed_seq,
* a seqcount_spinlock_t that is not NMI-safe. Do not access
* current->mems_allowed_seq and avoid retry when GFP flags
* indicate spinning is not allowed.
*/
if (allow_spin)
cpuset_mems_cookie = read_mems_allowed_begin();
zonelist = node_zonelist(mempolicy_slab_node(), pc->flags); zonelist = node_zonelist(mempolicy_slab_node(), pc->flags);
for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) { for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
struct kmem_cache_node *n; struct kmem_cache_node *n;
@ -3839,7 +3881,7 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context *
} }
} }
} }
} while (read_mems_allowed_retry(cpuset_mems_cookie)); } while (allow_spin && read_mems_allowed_retry(cpuset_mems_cookie));
#endif /* CONFIG_NUMA */ #endif /* CONFIG_NUMA */
return NULL; return NULL;
} }
@ -6372,7 +6414,7 @@ void kvfree_rcu_cb(struct rcu_head *head)
/** /**
* kfree - free previously allocated memory * kfree - free previously allocated memory
* @object: pointer returned by kmalloc() or kmem_cache_alloc() * @object: pointer returned by kmalloc(), kmalloc_nolock(), or kmem_cache_alloc()
* *
* If @object is NULL, no operation is performed. * If @object is NULL, no operation is performed.
*/ */
@ -6391,6 +6433,7 @@ void kfree(const void *object)
page = virt_to_page(object); page = virt_to_page(object);
slab = page_slab(page); slab = page_slab(page);
if (!slab) { if (!slab) {
/* kmalloc_nolock() doesn't support large kmalloc */
free_large_kmalloc(page, (void *)object); free_large_kmalloc(page, (void *)object);
return; return;
} }
@ -8337,6 +8380,9 @@ void __init kmem_cache_init_late(void)
flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM | WQ_PERCPU, flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM | WQ_PERCPU,
0); 0);
WARN_ON(!flushwq); WARN_ON(!flushwq);
#ifdef CONFIG_SLAB_FREELIST_RANDOM
prandom_init_once(&slab_rnd_state);
#endif
} }
int do_kmem_cache_create(struct kmem_cache *s, const char *name, int do_kmem_cache_create(struct kmem_cache *s, const char *name,