slab: remove SLUB_CPU_PARTIAL

We have removed the partial slab usage from allocation paths. Now remove
the whole config option and associated code.

Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Reviewed-by: Hao Li <hao.li@linux.dev>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
This commit is contained in:
Vlastimil Babka 2026-01-23 07:52:50 +01:00
parent 17c38c8829
commit e323b52cf0
3 changed files with 18 additions and 341 deletions

View file

@ -247,17 +247,6 @@ config SLUB_STATS
out which slabs are relevant to a particular load.
Try running: slabinfo -DA
config SLUB_CPU_PARTIAL
default y
depends on SMP && !SLUB_TINY
bool "Enable per cpu partial caches"
help
Per cpu partial caches accelerate objects allocation and freeing
that is local to a processor at the price of more indeterminism
in the latency of the free. On overflow these caches will be cleared
which requires the taking of locks that may cause latency spikes.
Typically one would choose no for a realtime system.
config RANDOM_KMALLOC_CACHES
default n
depends on !SLUB_TINY

View file

@ -77,12 +77,6 @@ struct slab {
struct llist_node llnode;
void *flush_freelist;
};
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct {
struct slab *next;
int slabs; /* Nr of slabs left */
};
#endif
};
/* Double-word boundary */
struct freelist_counters;
@ -188,23 +182,6 @@ static inline size_t slab_size(const struct slab *slab)
return PAGE_SIZE << slab_order(slab);
}
#ifdef CONFIG_SLUB_CPU_PARTIAL
#define slub_percpu_partial(c) ((c)->partial)
#define slub_set_percpu_partial(c, p) \
({ \
slub_percpu_partial(c) = (p)->next; \
})
#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c))
#else
#define slub_percpu_partial(c) NULL
#define slub_set_percpu_partial(c, p)
#define slub_percpu_partial_read_once(c) NULL
#endif // CONFIG_SLUB_CPU_PARTIAL
/*
* Word size structure that can be atomically updated or read and that
* contains both the order and the number of objects that a slab of the
@ -228,12 +205,6 @@ struct kmem_cache {
unsigned int object_size; /* Object size without metadata */
struct reciprocal_value reciprocal_size;
unsigned int offset; /* Free pointer offset */
#ifdef CONFIG_SLUB_CPU_PARTIAL
/* Number of per cpu partial objects to keep around */
unsigned int cpu_partial;
/* Number of per cpu partial slabs to keep around */
unsigned int cpu_partial_slabs;
#endif
unsigned int sheaf_capacity;
struct kmem_cache_order_objects oo;

319
mm/slub.c
View file

@ -268,15 +268,6 @@ void *fixup_red_left(struct kmem_cache *s, void *p)
return p;
}
static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
{
#ifdef CONFIG_SLUB_CPU_PARTIAL
return !kmem_cache_debug(s);
#else
return false;
#endif
}
/*
* Issues still to be resolved:
*
@ -431,9 +422,6 @@ struct freelist_tid {
struct kmem_cache_cpu {
struct freelist_tid;
struct slab *slab; /* The slab from which we are allocating */
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct slab *partial; /* Partially allocated slabs */
#endif
local_trylock_t lock; /* Protects the fields above */
#ifdef CONFIG_SLUB_STATS
unsigned int stat[NR_SLUB_STAT_ITEMS];
@ -666,29 +654,6 @@ static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
return x.x & OO_MASK;
}
#ifdef CONFIG_SLUB_CPU_PARTIAL
static void slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
{
unsigned int nr_slabs;
s->cpu_partial = nr_objects;
/*
* We take the number of objects but actually limit the number of
* slabs on the per cpu partial list, in order to limit excessive
* growth of the list. For simplicity we assume that the slabs will
* be half-full.
*/
nr_slabs = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo));
s->cpu_partial_slabs = nr_slabs;
}
#elif defined(SLAB_SUPPORTS_SYSFS)
static inline void
slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
{
}
#endif /* CONFIG_SLUB_CPU_PARTIAL */
/*
* If network-based swap is enabled, slub must keep track of whether memory
* were allocated from pfmemalloc reserves.
@ -3476,12 +3441,6 @@ static void *alloc_single_from_new_slab(struct kmem_cache *s, struct slab *slab,
return object;
}
#ifdef CONFIG_SLUB_CPU_PARTIAL
static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain);
#else
static inline void put_cpu_partial(struct kmem_cache *s, struct slab *slab,
int drain) { }
#endif
static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags);
static bool get_partial_node_bulk(struct kmem_cache *s,
@ -3894,131 +3853,6 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
#define local_unlock_cpu_slab(s, flags) \
local_unlock_irqrestore(&(s)->cpu_slab->lock, flags)
#ifdef CONFIG_SLUB_CPU_PARTIAL
static void __put_partials(struct kmem_cache *s, struct slab *partial_slab)
{
struct kmem_cache_node *n = NULL, *n2 = NULL;
struct slab *slab, *slab_to_discard = NULL;
unsigned long flags = 0;
while (partial_slab) {
slab = partial_slab;
partial_slab = slab->next;
n2 = get_node(s, slab_nid(slab));
if (n != n2) {
if (n)
spin_unlock_irqrestore(&n->list_lock, flags);
n = n2;
spin_lock_irqsave(&n->list_lock, flags);
}
if (unlikely(!slab->inuse && n->nr_partial >= s->min_partial)) {
slab->next = slab_to_discard;
slab_to_discard = slab;
} else {
add_partial(n, slab, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
}
if (n)
spin_unlock_irqrestore(&n->list_lock, flags);
while (slab_to_discard) {
slab = slab_to_discard;
slab_to_discard = slab_to_discard->next;
stat(s, DEACTIVATE_EMPTY);
discard_slab(s, slab);
stat(s, FREE_SLAB);
}
}
/*
* Put all the cpu partial slabs to the node partial list.
*/
static void put_partials(struct kmem_cache *s)
{
struct slab *partial_slab;
unsigned long flags;
local_lock_irqsave(&s->cpu_slab->lock, flags);
partial_slab = this_cpu_read(s->cpu_slab->partial);
this_cpu_write(s->cpu_slab->partial, NULL);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
if (partial_slab)
__put_partials(s, partial_slab);
}
static void put_partials_cpu(struct kmem_cache *s,
struct kmem_cache_cpu *c)
{
struct slab *partial_slab;
partial_slab = slub_percpu_partial(c);
c->partial = NULL;
if (partial_slab)
__put_partials(s, partial_slab);
}
/*
* Put a slab into a partial slab slot if available.
*
* If we did not find a slot then simply move all the partials to the
* per node partial list.
*/
static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain)
{
struct slab *oldslab;
struct slab *slab_to_put = NULL;
unsigned long flags;
int slabs = 0;
local_lock_cpu_slab(s, flags);
oldslab = this_cpu_read(s->cpu_slab->partial);
if (oldslab) {
if (drain && oldslab->slabs >= s->cpu_partial_slabs) {
/*
* Partial array is full. Move the existing set to the
* per node partial list. Postpone the actual unfreezing
* outside of the critical section.
*/
slab_to_put = oldslab;
oldslab = NULL;
} else {
slabs = oldslab->slabs;
}
}
slabs++;
slab->slabs = slabs;
slab->next = oldslab;
this_cpu_write(s->cpu_slab->partial, slab);
local_unlock_cpu_slab(s, flags);
if (slab_to_put) {
__put_partials(s, slab_to_put);
stat(s, CPU_PARTIAL_DRAIN);
}
}
#else /* CONFIG_SLUB_CPU_PARTIAL */
static inline void put_partials(struct kmem_cache *s) { }
static inline void put_partials_cpu(struct kmem_cache *s,
struct kmem_cache_cpu *c) { }
#endif /* CONFIG_SLUB_CPU_PARTIAL */
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
{
unsigned long flags;
@ -4056,8 +3890,6 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
deactivate_slab(s, slab, freelist);
stat(s, CPUSLAB_FLUSH);
}
put_partials_cpu(s, c);
}
static inline void flush_this_cpu_slab(struct kmem_cache *s)
@ -4066,15 +3898,13 @@ static inline void flush_this_cpu_slab(struct kmem_cache *s)
if (c->slab)
flush_slab(s, c);
put_partials(s);
}
static bool has_cpu_slab(int cpu, struct kmem_cache *s)
{
struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
return c->slab || slub_percpu_partial(c);
return c->slab;
}
static bool has_pcs_used(int cpu, struct kmem_cache *s)
@ -5652,13 +5482,6 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
return;
}
/*
* It is enough to test IS_ENABLED(CONFIG_SLUB_CPU_PARTIAL) below
* instead of kmem_cache_has_cpu_partial(s), because kmem_cache_debug(s)
* is the only other reason it can be false, and it is already handled
* above.
*/
do {
if (unlikely(n)) {
spin_unlock_irqrestore(&n->list_lock, flags);
@ -5683,26 +5506,19 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
* Unless it's frozen.
*/
if ((!new.inuse || was_full) && !was_frozen) {
n = get_node(s, slab_nid(slab));
/*
* If slab becomes non-full and we have cpu partial
* lists, we put it there unconditionally to avoid
* taking the list_lock. Otherwise we need it.
* Speculatively acquire the list_lock.
* If the cmpxchg does not succeed then we may
* drop the list_lock without any processing.
*
* Otherwise the list_lock will synchronize with
* other processors updating the list of slabs.
*/
if (!(IS_ENABLED(CONFIG_SLUB_CPU_PARTIAL) && was_full)) {
spin_lock_irqsave(&n->list_lock, flags);
n = get_node(s, slab_nid(slab));
/*
* Speculatively acquire the list_lock.
* If the cmpxchg does not succeed then we may
* drop the list_lock without any processing.
*
* Otherwise the list_lock will synchronize with
* other processors updating the list of slabs.
*/
spin_lock_irqsave(&n->list_lock, flags);
on_node_partial = slab_test_node_partial(slab);
}
on_node_partial = slab_test_node_partial(slab);
}
} while (!slab_update_freelist(s, slab, &old, &new, "__slab_free"));
@ -5715,13 +5531,6 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
* activity can be necessary.
*/
stat(s, FREE_FROZEN);
} else if (IS_ENABLED(CONFIG_SLUB_CPU_PARTIAL) && was_full) {
/*
* If we started with a full slab then put it onto the
* per cpu partial list.
*/
put_cpu_partial(s, slab, 1);
stat(s, CPU_PARTIAL_FREE);
}
/*
@ -5750,10 +5559,9 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
/*
* Objects left in the slab. If it was not on the partial list before
* then add it. This can only happen when cache has no per cpu partial
* list otherwise we would have put it there.
* then add it.
*/
if (!IS_ENABLED(CONFIG_SLUB_CPU_PARTIAL) && unlikely(was_full)) {
if (unlikely(was_full)) {
add_partial(n, slab, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
@ -6422,8 +6230,8 @@ redo:
if (unlikely(!allow_spin)) {
/*
* __slab_free() can locklessly cmpxchg16 into a slab,
* but then it might need to take spin_lock or local_lock
* in put_cpu_partial() for further processing.
* but then it might need to take spin_lock
* for further processing.
* Avoid the complexity and simply add to a deferred list.
*/
defer_free(s, head);
@ -7747,39 +7555,6 @@ static int init_kmem_cache_nodes(struct kmem_cache *s)
return 1;
}
static void set_cpu_partial(struct kmem_cache *s)
{
#ifdef CONFIG_SLUB_CPU_PARTIAL
unsigned int nr_objects;
/*
* cpu_partial determined the maximum number of objects kept in the
* per cpu partial lists of a processor.
*
* Per cpu partial lists mainly contain slabs that just have one
* object freed. If they are used for allocation then they can be
* filled up again with minimal effort. The slab will never hit the
* per node partial lists and therefore no locking will be required.
*
* For backwards compatibility reasons, this is determined as number
* of objects, even though we now limit maximum number of pages, see
* slub_set_cpu_partial()
*/
if (!kmem_cache_has_cpu_partial(s))
nr_objects = 0;
else if (s->size >= PAGE_SIZE)
nr_objects = 6;
else if (s->size >= 1024)
nr_objects = 24;
else if (s->size >= 256)
nr_objects = 52;
else
nr_objects = 120;
slub_set_cpu_partial(s, nr_objects);
#endif
}
static unsigned int calculate_sheaf_capacity(struct kmem_cache *s,
struct kmem_cache_args *args)
@ -8640,8 +8415,6 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name,
s->min_partial = min_t(unsigned long, MAX_PARTIAL, ilog2(s->size) / 2);
s->min_partial = max_t(unsigned long, MIN_PARTIAL, s->min_partial);
set_cpu_partial(s);
s->cpu_sheaves = alloc_percpu(struct slub_percpu_sheaves);
if (!s->cpu_sheaves) {
err = -ENOMEM;
@ -9005,20 +8778,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
total += x;
nodes[node] += x;
#ifdef CONFIG_SLUB_CPU_PARTIAL
slab = slub_percpu_partial_read_once(c);
if (slab) {
node = slab_nid(slab);
if (flags & SO_TOTAL)
WARN_ON_ONCE(1);
else if (flags & SO_OBJECTS)
WARN_ON_ONCE(1);
else
x = data_race(slab->slabs);
total += x;
nodes[node] += x;
}
#endif
}
}
@ -9153,12 +8912,7 @@ SLAB_ATTR(min_partial);
static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
{
unsigned int nr_partial = 0;
#ifdef CONFIG_SLUB_CPU_PARTIAL
nr_partial = s->cpu_partial;
#endif
return sysfs_emit(buf, "%u\n", nr_partial);
return sysfs_emit(buf, "0\n");
}
static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
@ -9170,11 +8924,9 @@ static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
err = kstrtouint(buf, 10, &objects);
if (err)
return err;
if (objects && !kmem_cache_has_cpu_partial(s))
if (objects)
return -EINVAL;
slub_set_cpu_partial(s, objects);
flush_all(s);
return length;
}
SLAB_ATTR(cpu_partial);
@ -9213,42 +8965,7 @@ SLAB_ATTR_RO(objects_partial);
static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
{
int objects = 0;
int slabs = 0;
int cpu __maybe_unused;
int len = 0;
#ifdef CONFIG_SLUB_CPU_PARTIAL
for_each_online_cpu(cpu) {
struct slab *slab;
slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
if (slab)
slabs += data_race(slab->slabs);
}
#endif
/* Approximate half-full slabs, see slub_set_cpu_partial() */
objects = (slabs * oo_objects(s->oo)) / 2;
len += sysfs_emit_at(buf, len, "%d(%d)", objects, slabs);
#ifdef CONFIG_SLUB_CPU_PARTIAL
for_each_online_cpu(cpu) {
struct slab *slab;
slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
if (slab) {
slabs = data_race(slab->slabs);
objects = (slabs * oo_objects(s->oo)) / 2;
len += sysfs_emit_at(buf, len, " C%d=%d(%d)",
cpu, objects, slabs);
}
}
#endif
len += sysfs_emit_at(buf, len, "\n");
return len;
return sysfs_emit(buf, "0(0)\n");
}
SLAB_ATTR_RO(slabs_cpu_partial);