From c56f97c5c71f17d781461d44acb777cd21521b81 Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Thu, 19 Jun 2025 14:26:23 -0400 Subject: [PATCH 1/9] bitmap: generalize node_random() Generalize node_random() and make it available to general bitmaps and cpumasks users. Notice, find_first_bit() is generally faster than find_nth_bit(), and we employ it when there's a single set bit in the bitmap. See commit 3e061d924fe9c7b4 ("lib/nodemask: optimize node_random for nodemask with single NUMA node"). CC: Andrew Morton Signed-off-by: "Yury Norov [NVIDIA]" --- include/linux/find.h | 2 ++ include/linux/nodemask.h | 16 ++-------------- lib/find_bit.c | 24 ++++++++++++++++++++++++ 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/include/linux/find.h b/include/linux/find.h index 5a2c267ea7f9..98c61838002c 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -44,6 +44,8 @@ unsigned long _find_next_bit_le(const unsigned long *addr, unsigned long size, unsigned long offset); #endif +unsigned long find_random_bit(const unsigned long *addr, unsigned long size); + #ifndef find_next_bit /** * find_next_bit - find the next set bit in a memory region diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index f08ae71585fa..7ad1f5c7407e 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -492,21 +492,9 @@ static __always_inline int num_node_state(enum node_states state) static __always_inline int node_random(const nodemask_t *maskp) { #if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1) - int w, bit; + int node = find_random_bit(maskp->bits, MAX_NUMNODES); - w = nodes_weight(*maskp); - switch (w) { - case 0: - bit = NUMA_NO_NODE; - break; - case 1: - bit = first_node(*maskp); - break; - default: - bit = find_nth_bit(maskp->bits, MAX_NUMNODES, get_random_u32_below(w)); - break; - } - return bit; + return node < MAX_NUMNODES ? node : NUMA_NO_NODE; #else return 0; #endif diff --git a/lib/find_bit.c b/lib/find_bit.c index 06b6342aa3ae..d4b5a29e3e72 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -18,6 +18,7 @@ #include #include #include +#include /* * Common helper for find_bit() function family @@ -291,3 +292,26 @@ EXPORT_SYMBOL(_find_next_bit_le); #endif #endif /* __BIG_ENDIAN */ + +/** + * find_random_bit - find a set bit at random position + * @addr: The address to base the search on + * @size: The bitmap size in bits + * + * Returns: a position of a random set bit; >= @size otherwise + */ +unsigned long find_random_bit(const unsigned long *addr, unsigned long size) +{ + int w = bitmap_weight(addr, size); + + switch (w) { + case 0: + return size; + case 1: + /* Performance trick for single-bit bitmaps */ + return find_first_bit(addr, size); + default: + return find_nth_bit(addr, size, get_random_u32_below(w)); + } +} +EXPORT_SYMBOL(find_random_bit); From 012b1043420c0bc62e52902499de40b66f37fd6a Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Thu, 19 Jun 2025 14:26:24 -0400 Subject: [PATCH 2/9] cpumask: introduce cpumask_random() Propagate find_random_bit() to cpumask API. CC: Andrew Morton Signed-off-by: "Yury Norov [NVIDIA]" --- include/linux/cpumask.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 7ae80a7ca81e..39b71b662da3 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -354,6 +354,18 @@ unsigned int cpumask_next_wrap(int n, const struct cpumask *src) return find_next_bit_wrap(cpumask_bits(src), small_cpumask_bits, n + 1); } +/** + * cpumask_random - get random cpu in *src. + * @src: cpumask pointer + * + * Return: random set bit, or >= nr_cpu_ids if @src is empty. + */ +static __always_inline +unsigned int cpumask_random(const struct cpumask *src) +{ + return find_random_bit(cpumask_bits(src), nr_cpu_ids); +} + /** * for_each_cpu - iterate over every cpu in a mask * @cpu: the (optionally unsigned) integer iterator From 8557c8628cf3cf8ebd3b32601ccdde550bbf6c54 Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Thu, 19 Jun 2025 14:26:25 -0400 Subject: [PATCH 3/9] clocksource: Improve randomness in clocksource_verify_choose_cpus() The current algorithm of picking a random CPU works OK for dense online cpumask, but if cpumask is non-dense, the distribution of picked CPUs is skewed. For example, on 8-CPU board with CPUs 4-7 offlined, the probability of selecting CPU 0 is 5/8. Accordingly, cpus 1, 2 and 3 are chosen with probability 1/8 each. The proper algorithm should pick each online CPU with probability 1/4. Switch it to cpumask_random(), which has better statistical characteristics. CC: Andrew Morton Acked-by: John Stultz Reviewed-by: Thomas Gleixner Signed-off-by: "Yury Norov [NVIDIA]" --- kernel/time/clocksource.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 6a8bc7da9062..4b005b2f3ef5 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -342,10 +342,7 @@ static void clocksource_verify_choose_cpus(void) * CPUs that are currently online. */ for (i = 1; i < n; i++) { - cpu = get_random_u32_below(nr_cpu_ids); - cpu = cpumask_next(cpu - 1, cpu_online_mask); - if (cpu >= nr_cpu_ids) - cpu = cpumask_first(cpu_online_mask); + cpu = cpumask_random(cpu_online_mask); if (!WARN_ON_ONCE(cpu >= nr_cpu_ids)) cpumask_set_cpu(cpu, &cpus_chosen); } From f49a4af3fabce56aaef125f0d959b81afe194afb Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Wed, 4 Jun 2025 20:12:52 -0400 Subject: [PATCH 4/9] watchdog: fix opencoded cpumask_next_wrap() in watchdog_next_cpu() The dedicated helper is more verbose and efficient comparing to cpumask_next() followed by cpumask_first(). Signed-off-by: "Yury Norov [NVIDIA]" Reviewed-by: Douglas Anderson --- kernel/watchdog_buddy.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/watchdog_buddy.c b/kernel/watchdog_buddy.c index 34dbfe091f4b..ee754d767c21 100644 --- a/kernel/watchdog_buddy.c +++ b/kernel/watchdog_buddy.c @@ -12,10 +12,7 @@ static unsigned int watchdog_next_cpu(unsigned int cpu) { unsigned int next_cpu; - next_cpu = cpumask_next(cpu, &watchdog_cpus); - if (next_cpu >= nr_cpu_ids) - next_cpu = cpumask_first(&watchdog_cpus); - + next_cpu = cpumask_next_wrap(cpu, &watchdog_cpus); if (next_cpu == cpu) return nr_cpu_ids; From b0c85e99458af829c32c225b43f638443bff14e5 Mon Sep 17 00:00:00 2001 From: Shaopeng Tan Date: Mon, 23 Jun 2025 16:46:45 +0900 Subject: [PATCH 5/9] cpumask: Remove unnecessary cpumask_nth_andnot() Commit 94f753143028("x86/resctrl: Optimize cpumask_any_housekeeping()") switched the only user of cpumask_nth_andnot() to other cpumask functions, but left the function cpumask_nth_andnot() unused. This makes function find_nth_andnot_bit() unused as well. Delete them. Signed-off-by: Shaopeng Tan Signed-off-by: Yury Norov [NVIDIA] --- include/linux/cpumask.h | 16 ---------------- include/linux/find.h | 27 --------------------------- 2 files changed, 43 deletions(-) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 39b71b662da3..4bda089fbe5d 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -558,22 +558,6 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, small_cpumask_bits, cpumask_check(cpu)); } -/** - * cpumask_nth_andnot - get the Nth cpu set in 1st cpumask, and clear in 2nd. - * @srcp1: the cpumask pointer - * @srcp2: the cpumask pointer - * @cpu: the Nth cpu to find, starting from 0 - * - * Return: >= nr_cpu_ids if such cpu doesn't exist. - */ -static __always_inline -unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, - const struct cpumask *srcp2) -{ - return find_nth_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), - small_cpumask_bits, cpumask_check(cpu)); -} - /** * cpumask_nth_and_andnot - get the Nth cpu set in 1st and 2nd cpumask, and clear in 3rd. * @srcp1: the cpumask pointer diff --git a/include/linux/find.h b/include/linux/find.h index 98c61838002c..9d720ad92bc1 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -269,33 +269,6 @@ unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long * return __find_nth_and_bit(addr1, addr2, size, n); } -/** - * find_nth_andnot_bit - find N'th set bit in 2 memory regions, - * flipping bits in 2nd region - * @addr1: The 1st address to start the search at - * @addr2: The 2nd address to start the search at - * @size: The maximum number of bits to search - * @n: The number of set bit, which position is needed, counting from 0 - * - * Returns the bit number of the N'th set bit. - * If no such, returns @size. - */ -static __always_inline -unsigned long find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, - unsigned long size, unsigned long n) -{ - if (n >= size) - return size; - - if (small_const_nbits(size)) { - unsigned long val = *addr1 & (~*addr2) & GENMASK(size - 1, 0); - - return val ? fns(val, n) : size; - } - - return __find_nth_andnot_bit(addr1, addr2, size, n); -} - /** * find_nth_and_andnot_bit - find N'th set bit in 2 memory regions, * excluding those set in 3rd region From 6d4471252ccc1722d25200fa9b6021ab4e1d6fde Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Mon, 9 Jun 2025 11:45:45 +0900 Subject: [PATCH 6/9] bits: split the definition of the asm and non-asm GENMASK*() In an upcoming change, the non-asm GENMASK*() will all be unified to depend on GENMASK_TYPE() which indirectly depend on sizeof(), something not available in asm. Instead of adding further complexity to GENMASK_TYPE() to make it work for both asm and non asm, just split the definition of the two variants. Signed-off-by: Vincent Mailhol Reviewed-by: Lucas De Marchi Signed-off-by: Yury Norov (NVIDIA) --- include/linux/bits.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/include/linux/bits.h b/include/linux/bits.h index 7ad056219115..13dbc8adc70e 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -35,6 +35,11 @@ #define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h))) +#define GENMASK(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) +#define GENMASK_ULL(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) + /* * Generate a mask for the specified type @t. Additional checks are made to * guarantee the value returned fits in that type, relying on @@ -79,15 +84,11 @@ * BUILD_BUG_ON_ZERO is not available in h files included from asm files, * disable the input check if that is the case. */ -#define GENMASK_INPUT_CHECK(h, l) 0 +#define GENMASK(h, l) __GENMASK(h, l) +#define GENMASK_ULL(h, l) __GENMASK_ULL(h, l) #endif /* !defined(__ASSEMBLY__) */ -#define GENMASK(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) -#define GENMASK_ULL(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) - #if !defined(__ASSEMBLY__) /* * Missing asm support From 104ea1c84b91c9f452e497ba51602b903711cdd5 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Mon, 9 Jun 2025 11:45:46 +0900 Subject: [PATCH 7/9] bits: unify the non-asm GENMASK*() The newly introduced GENMASK_TYPE() macro can also be used to generate the pre-existing non-asm GENMASK*() variants. Apply GENMASK_TYPE() to GENMASK(), GENMASK_ULL() and GENMASK_U128(). Signed-off-by: Vincent Mailhol Signed-off-by: Yury Norov (NVIDIA) --- include/linux/bits.h | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/include/linux/bits.h b/include/linux/bits.h index 13dbc8adc70e..a40cc861b3a7 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -2,10 +2,8 @@ #ifndef __LINUX_BITS_H #define __LINUX_BITS_H -#include #include #include -#include #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) @@ -35,11 +33,6 @@ #define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h))) -#define GENMASK(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) -#define GENMASK_ULL(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) - /* * Generate a mask for the specified type @t. Additional checks are made to * guarantee the value returned fits in that type, relying on @@ -55,10 +48,14 @@ (type_max(t) << (l) & \ type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h))))) +#define GENMASK(h, l) GENMASK_TYPE(unsigned long, h, l) +#define GENMASK_ULL(h, l) GENMASK_TYPE(unsigned long long, h, l) + #define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l) #define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l) #define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l) #define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l) +#define GENMASK_U128(h, l) GENMASK_TYPE(u128, h, l) /* * Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The @@ -89,19 +86,4 @@ #endif /* !defined(__ASSEMBLY__) */ -#if !defined(__ASSEMBLY__) -/* - * Missing asm support - * - * __GENMASK_U128() depends on _BIT128() which would not work - * in the asm code, as it shifts an 'unsigned __int128' data - * type instead of direct representation of 128 bit constants - * such as long and unsigned long. The fundamental problem is - * that a 128 bit constant will get silently truncated by the - * gcc compiler. - */ -#define GENMASK_U128(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l)) -#endif - #endif /* __LINUX_BITS_H */ From dcb23e1878013dd04122122ed8eba35f354a091b Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Mon, 9 Jun 2025 11:45:47 +0900 Subject: [PATCH 8/9] test_bits: add tests for __GENMASK() and __GENMASK_ULL() The definitions of GENMASK() and GENMASK_ULL() do not depend any more on __GENMASK() and __GENMASK_ULL(). Duplicate the existing unit tests so that __GENMASK{,ULL}() are still covered. Because __GENMASK() and __GENMASK_ULL() do use GENMASK_INPUT_CHECK(), drop the TEST_GENMASK_FAILURES negative tests. It would be good to have a small assembly test case for GENMASK*() in case somebody decides to unify both in the future. However, I lack expertise in assembly to do so. Instead add a FIXME message to highlight the absence of the asm unit test. Signed-off-by: Vincent Mailhol Reviewed-by: Lucas De Marchi Signed-off-by: Yury Norov (NVIDIA) --- lib/tests/test_bits.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/lib/tests/test_bits.c b/lib/tests/test_bits.c index 47325b41515f..ab88e50d2edf 100644 --- a/lib/tests/test_bits.c +++ b/lib/tests/test_bits.c @@ -26,6 +26,23 @@ static_assert(assert_type(u16, GENMASK_U16(15, 0)) == U16_MAX); static_assert(assert_type(u32, GENMASK_U32(31, 0)) == U32_MAX); static_assert(assert_type(u64, GENMASK_U64(63, 0)) == U64_MAX); +/* FIXME: add a test case written in asm for GENMASK() and GENMASK_ULL() */ + +static void __genmask_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, 1ul, __GENMASK(0, 0)); + KUNIT_EXPECT_EQ(test, 3ul, __GENMASK(1, 0)); + KUNIT_EXPECT_EQ(test, 6ul, __GENMASK(2, 1)); + KUNIT_EXPECT_EQ(test, 0xFFFFFFFFul, __GENMASK(31, 0)); +} + +static void __genmask_ull_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, 1ull, __GENMASK_ULL(0, 0)); + KUNIT_EXPECT_EQ(test, 3ull, __GENMASK_ULL(1, 0)); + KUNIT_EXPECT_EQ(test, 0x000000ffffe00000ull, __GENMASK_ULL(39, 21)); + KUNIT_EXPECT_EQ(test, 0xffffffffffffffffull, __GENMASK_ULL(63, 0)); +} static void genmask_test(struct kunit *test) { @@ -123,6 +140,8 @@ static void genmask_input_check_test(struct kunit *test) static struct kunit_case bits_test_cases[] = { + KUNIT_CASE(__genmask_test), + KUNIT_CASE(__genmask_ull_test), KUNIT_CASE(genmask_test), KUNIT_CASE(genmask_ull_test), KUNIT_CASE(genmask_u128_test), From e2b02d382ae0cb90697e8529dfd3f93bf8c6905c Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Wed, 9 Jul 2025 10:38:08 +0100 Subject: [PATCH 9/9] bitfield: Ensure the return values of helper functions are checked As type##_replace_bits() has no side effects it is only useful if its return value is checked. Add __must_check to enforce this usage. To have the bits replaced in-place typep##_replace_bits() can be used instead. Although, type_##_get_bits() and type_##_encode_bits() are harder to misuse they are still only useful if the return value is checked. For consistency, also add __must_check to these. Signed-off-by: Ben Horgan Signed-off-by: Yury Norov (NVIDIA) --- include/linux/bitfield.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 6d9a53db54b6..5355f8f806a9 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -189,14 +189,14 @@ static __always_inline u64 field_mask(u64 field) } #define field_max(field) ((typeof(field))field_mask(field)) #define ____MAKE_OP(type,base,to,from) \ -static __always_inline __##type type##_encode_bits(base v, base field) \ +static __always_inline __##type __must_check type##_encode_bits(base v, base field) \ { \ if (__builtin_constant_p(v) && (v & ~field_mask(field))) \ __field_overflow(); \ return to((v & field_mask(field)) * field_multiplier(field)); \ } \ -static __always_inline __##type type##_replace_bits(__##type old, \ - base val, base field) \ +static __always_inline __##type __must_check type##_replace_bits(__##type old, \ + base val, base field) \ { \ return (old & ~to(field)) | type##_encode_bits(val, field); \ } \ @@ -205,7 +205,7 @@ static __always_inline void type##p_replace_bits(__##type *p, \ { \ *p = (*p & ~to(field)) | type##_encode_bits(val, field); \ } \ -static __always_inline base type##_get_bits(__##type v, base field) \ +static __always_inline base __must_check type##_get_bits(__##type v, base field) \ { \ return (from(v) & field)/field_multiplier(field); \ }