diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1baee139999f..52bfe4157623 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -359,8 +359,7 @@ enum objext_flags { * MEMCG_DATA_OBJEXTS. */ OBJEXTS_ALLOC_FAIL = __OBJEXTS_ALLOC_FAIL, - /* slabobj_ext vector allocated with kmalloc_nolock() */ - OBJEXTS_NOSPIN_ALLOC = __FIRST_OBJEXT_FLAG, + __OBJEXTS_FLAG_UNUSED = __FIRST_OBJEXT_FLAG, /* the next bit after the last actual flag */ __NR_OBJEXTS_FLAGS = (__FIRST_OBJEXT_FLAG << 1), }; diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 7729fef249e1..04f3f86a4145 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1074,8 +1074,8 @@ static inline void rcu_read_unlock_migrate(void) * either fall back to use of call_rcu() or rearrange the structure to * position the rcu_head structure into the first 4096 bytes. * - * The object to be freed can be allocated either by kmalloc() or - * kmem_cache_alloc(). + * The object to be freed can be allocated either by kmalloc(), + * kmalloc_nolock(), or kmem_cache_alloc(). * * Note that the allowable offset might decrease in the future. * diff --git a/mm/kmemleak.c b/mm/kmemleak.c index fe33f2edfe07..d79acf5c5100 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -837,13 +837,12 @@ static void delete_object_full(unsigned long ptr, unsigned int objflags) struct kmemleak_object *object; object = find_and_remove_object(ptr, 0, objflags); - if (!object) { -#ifdef DEBUG - kmemleak_warn("Freeing unknown object at 0x%08lx\n", - ptr); -#endif + if (!object) + /* + * kmalloc_nolock() -> kfree() calls kmemleak_free() + * without kmemleak_alloc(). + */ return; - } __delete_object(object); } @@ -926,13 +925,12 @@ static void paint_ptr(unsigned long ptr, int color, unsigned int objflags) struct kmemleak_object *object; object = __find_and_get_object(ptr, 0, objflags); - if (!object) { - kmemleak_warn("Trying to color unknown object at 0x%08lx as %s\n", - ptr, - (color == KMEMLEAK_GREY) ? "Grey" : - (color == KMEMLEAK_BLACK) ? "Black" : "Unknown"); + if (!object) + /* + * kmalloc_nolock() -> kfree_rcu() calls kmemleak_ignore() + * without kmemleak_alloc(). + */ return; - } paint_it(object, color); put_object(object); } diff --git a/mm/slub.c b/mm/slub.c index 42df791279d9..865bc050f654 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -2189,8 +2190,6 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, virt_to_slab(vec)->slab_cache == s); new_exts = (unsigned long)vec; - if (unlikely(!allow_spin)) - new_exts |= OBJEXTS_NOSPIN_ALLOC; #ifdef CONFIG_MEMCG new_exts |= MEMCG_DATA_OBJEXTS; #endif @@ -2228,7 +2227,7 @@ retry: return 0; } -static inline void free_slab_obj_exts(struct slab *slab) +static inline void free_slab_obj_exts(struct slab *slab, bool allow_spin) { struct slabobj_ext *obj_exts; @@ -2256,10 +2255,10 @@ static inline void free_slab_obj_exts(struct slab *slab) * the extension for obj_exts is expected to be NULL. */ mark_objexts_empty(obj_exts); - if (unlikely(READ_ONCE(slab->obj_exts) & OBJEXTS_NOSPIN_ALLOC)) - kfree_nolock(obj_exts); - else + if (allow_spin) kfree(obj_exts); + else + kfree_nolock(obj_exts); slab->obj_exts = 0; } @@ -2323,7 +2322,7 @@ static int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, return 0; } -static inline void free_slab_obj_exts(struct slab *slab) +static inline void free_slab_obj_exts(struct slab *slab, bool allow_spin) { } @@ -2584,6 +2583,24 @@ struct rcu_delayed_free { * Returns true if freeing of the object can proceed, false if its reuse * was delayed by CONFIG_SLUB_RCU_DEBUG or KASAN quarantine, or it was returned * to KFENCE. + * + * For objects allocated via kmalloc_nolock(), only a subset of alloc hooks + * are invoked, so some free hooks must handle asymmetric hook calls. + * + * Alloc hooks called for kmalloc_nolock(): + * - kmsan_slab_alloc() + * - kasan_slab_alloc() + * - memcg_slab_post_alloc_hook() + * - alloc_tagging_slab_alloc_hook() + * + * Free hooks that must handle missing corresponding alloc hooks: + * - kmemleak_free_recursive() + * - kfence_free() + * + * Free hooks that have no alloc hook counterpart, and thus safe to call: + * - debug_check_no_locks_freed() + * - debug_check_no_obj_freed() + * - __kcsan_check_access() */ static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x, bool init, @@ -3311,8 +3328,11 @@ static void *next_freelist_entry(struct kmem_cache *s, return (char *)start + idx; } +static DEFINE_PER_CPU(struct rnd_state, slab_rnd_state); + /* Shuffle the single linked freelist based on a random pre-computed sequence */ -static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) +static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab, + bool allow_spin) { void *start; void *cur; @@ -3323,7 +3343,19 @@ static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) return false; freelist_count = oo_objects(s->oo); - pos = get_random_u32_below(freelist_count); + if (allow_spin) { + pos = get_random_u32_below(freelist_count); + } else { + struct rnd_state *state; + + /* + * An interrupt or NMI handler might interrupt and change + * the state in the middle, but that's safe. + */ + state = &get_cpu_var(slab_rnd_state); + pos = prandom_u32_state(state) % freelist_count; + put_cpu_var(slab_rnd_state); + } page_limit = slab->objects * s->size; start = fixup_red_left(s, slab_address(slab)); @@ -3350,7 +3382,8 @@ static inline int init_cache_random_seq(struct kmem_cache *s) return 0; } static inline void init_freelist_randomization(void) { } -static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) +static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab, + bool allow_spin) { return false; } @@ -3369,14 +3402,14 @@ static __always_inline void account_slab(struct slab *slab, int order, } static __always_inline void unaccount_slab(struct slab *slab, int order, - struct kmem_cache *s) + struct kmem_cache *s, bool allow_spin) { /* * The slab object extensions should now be freed regardless of * whether mem_alloc_profiling_enabled() or not because profiling * might have been disabled after slab->obj_exts got allocated. */ - free_slab_obj_exts(slab); + free_slab_obj_exts(slab, allow_spin); mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), -(PAGE_SIZE << order)); @@ -3441,7 +3474,7 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) alloc_slab_obj_exts_early(s, slab); account_slab(slab, oo_order(oo), s, flags); - shuffle = shuffle_freelist(s, slab); + shuffle = shuffle_freelist(s, slab, allow_spin); if (!shuffle) { start = fixup_red_left(s, start); @@ -3480,7 +3513,7 @@ static void __free_slab(struct kmem_cache *s, struct slab *slab, bool allow_spin page->mapping = NULL; __ClearPageSlab(page); mm_account_reclaimed_pages(pages); - unaccount_slab(slab, order, s); + unaccount_slab(slab, order, s, allow_spin); if (allow_spin) free_frozen_pages(page, order); else @@ -3791,6 +3824,7 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context * struct zone *zone; enum zone_type highest_zoneidx = gfp_zone(pc->flags); unsigned int cpuset_mems_cookie; + bool allow_spin = gfpflags_allow_spinning(pc->flags); /* * The defrag ratio allows a configuration of the tradeoffs between @@ -3815,7 +3849,15 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context * return NULL; do { - cpuset_mems_cookie = read_mems_allowed_begin(); + /* + * read_mems_allowed_begin() accesses current->mems_allowed_seq, + * a seqcount_spinlock_t that is not NMI-safe. Do not access + * current->mems_allowed_seq and avoid retry when GFP flags + * indicate spinning is not allowed. + */ + if (allow_spin) + cpuset_mems_cookie = read_mems_allowed_begin(); + zonelist = node_zonelist(mempolicy_slab_node(), pc->flags); for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) { struct kmem_cache_node *n; @@ -3839,7 +3881,7 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context * } } } - } while (read_mems_allowed_retry(cpuset_mems_cookie)); + } while (allow_spin && read_mems_allowed_retry(cpuset_mems_cookie)); #endif /* CONFIG_NUMA */ return NULL; } @@ -6372,7 +6414,7 @@ void kvfree_rcu_cb(struct rcu_head *head) /** * kfree - free previously allocated memory - * @object: pointer returned by kmalloc() or kmem_cache_alloc() + * @object: pointer returned by kmalloc(), kmalloc_nolock(), or kmem_cache_alloc() * * If @object is NULL, no operation is performed. */ @@ -6391,6 +6433,7 @@ void kfree(const void *object) page = virt_to_page(object); slab = page_slab(page); if (!slab) { + /* kmalloc_nolock() doesn't support large kmalloc */ free_large_kmalloc(page, (void *)object); return; } @@ -8337,6 +8380,9 @@ void __init kmem_cache_init_late(void) flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM | WQ_PERCPU, 0); WARN_ON(!flushwq); +#ifdef CONFIG_SLAB_FREELIST_RANDOM + prandom_init_once(&slab_rnd_state); +#endif } int do_kmem_cache_create(struct kmem_cache *s, const char *name,