From c119e6685311cef0e4a4e0b7752293bea056bac7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 10 Dec 2025 08:22:37 +0000 Subject: [PATCH 01/11] genirq: Remove IRQ timing tracking infrastructure The IRQ timing tracking infrastructure was merged in 2019, but was never plumbed in, is not selectable, and is therefore never used. As Daniel agrees that there is little hope for this infrastructure to be completed in the near term, drop it altogether. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Reviewed-by: Jinjie Ruan Link: https://lore.kernel.org/r/87zf7vex6h.wl-maz@kernel.org Link: https://patch.msgid.link/20251210082242.360936-2-maz@kernel.org --- include/linux/interrupt.h | 6 - kernel/irq/Kconfig | 3 - kernel/irq/Makefile | 4 - kernel/irq/handle.c | 2 - kernel/irq/internals.h | 110 ----- kernel/irq/manage.c | 3 - kernel/irq/timings.c | 959 -------------------------------------- lib/Kconfig.debug | 8 - 8 files changed, 1095 deletions(-) delete mode 100644 kernel/irq/timings.c diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 266f2b39213a..44e335b17ed6 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -871,12 +871,6 @@ static inline void init_irq_proc(void) } #endif -#ifdef CONFIG_IRQ_TIMINGS -void irq_timings_enable(void); -void irq_timings_disable(void); -u64 irq_timings_next_event(u64 now); -#endif - struct seq_file; int show_interrupts(struct seq_file *p, void *v); int arch_show_interrupts(struct seq_file *p, int prec); diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 1b4254d19a73..05cba4e16dad 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -92,9 +92,6 @@ config GENERIC_MSI_IRQ config IRQ_MSI_IOMMU bool -config IRQ_TIMINGS - bool - config GENERIC_IRQ_MATRIX_ALLOCATOR bool diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 6ab3a4055667..86a2e5ae08f9 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -1,10 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o kexec.o -obj-$(CONFIG_IRQ_TIMINGS) += timings.o -ifeq ($(CONFIG_TEST_IRQ_TIMINGS),y) - CFLAGS_timings.o += -DDEBUG -endif obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 786f5570a640..b7d52821837b 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -188,8 +188,6 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) unsigned int irq = desc->irq_data.irq; struct irqaction *action; - record_irq_time(desc); - for_each_action_of_desc(desc, action) { irqreturn_t res; diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 0164ca48da59..202c50f0fcb2 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -288,116 +288,6 @@ static inline void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) { } #endif -#ifdef CONFIG_IRQ_TIMINGS - -#define IRQ_TIMINGS_SHIFT 5 -#define IRQ_TIMINGS_SIZE (1 << IRQ_TIMINGS_SHIFT) -#define IRQ_TIMINGS_MASK (IRQ_TIMINGS_SIZE - 1) - -/** - * struct irq_timings - irq timings storing structure - * @values: a circular buffer of u64 encoded values - * @count: the number of elements in the array - */ -struct irq_timings { - u64 values[IRQ_TIMINGS_SIZE]; - int count; -}; - -DECLARE_PER_CPU(struct irq_timings, irq_timings); - -extern void irq_timings_free(int irq); -extern int irq_timings_alloc(int irq); - -static inline void irq_remove_timings(struct irq_desc *desc) -{ - desc->istate &= ~IRQS_TIMINGS; - - irq_timings_free(irq_desc_get_irq(desc)); -} - -static inline void irq_setup_timings(struct irq_desc *desc, struct irqaction *act) -{ - int irq = irq_desc_get_irq(desc); - int ret; - - /* - * We don't need the measurement because the idle code already - * knows the next expiry event. - */ - if (act->flags & __IRQF_TIMER) - return; - - /* - * In case the timing allocation fails, we just want to warn, - * not fail, so letting the system boot anyway. - */ - ret = irq_timings_alloc(irq); - if (ret) { - pr_warn("Failed to allocate irq timing stats for irq%d (%d)", - irq, ret); - return; - } - - desc->istate |= IRQS_TIMINGS; -} - -extern void irq_timings_enable(void); -extern void irq_timings_disable(void); - -DECLARE_STATIC_KEY_FALSE(irq_timing_enabled); - -/* - * The interrupt number and the timestamp are encoded into a single - * u64 variable to optimize the size. - * 48 bit time stamp and 16 bit IRQ number is way sufficient. - * Who cares an IRQ after 78 hours of idle time? - */ -static inline u64 irq_timing_encode(u64 timestamp, int irq) -{ - return (timestamp << 16) | irq; -} - -static inline int irq_timing_decode(u64 value, u64 *timestamp) -{ - *timestamp = value >> 16; - return value & U16_MAX; -} - -static __always_inline void irq_timings_push(u64 ts, int irq) -{ - struct irq_timings *timings = this_cpu_ptr(&irq_timings); - - timings->values[timings->count & IRQ_TIMINGS_MASK] = - irq_timing_encode(ts, irq); - - timings->count++; -} - -/* - * The function record_irq_time is only called in one place in the - * interrupts handler. We want this function always inline so the code - * inside is embedded in the function and the static key branching - * code can act at the higher level. Without the explicit - * __always_inline we can end up with a function call and a small - * overhead in the hotpath for nothing. - */ -static __always_inline void record_irq_time(struct irq_desc *desc) -{ - if (!static_branch_likely(&irq_timing_enabled)) - return; - - if (desc->istate & IRQS_TIMINGS) - irq_timings_push(local_clock(), irq_desc_get_irq(desc)); -} -#else -static inline void irq_remove_timings(struct irq_desc *desc) {} -static inline void irq_setup_timings(struct irq_desc *desc, - struct irqaction *act) {}; -static inline void record_irq_time(struct irq_desc *desc) {} -#endif /* CONFIG_IRQ_TIMINGS */ - - #ifdef CONFIG_GENERIC_IRQ_CHIP void irq_init_generic_chip(struct irq_chip_generic *gc, const char *name, int num_ct, unsigned int irq_base, diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 8b1b4c8a4f54..7b25ffc5c43a 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1778,8 +1778,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) chip_bus_sync_unlock(desc); mutex_unlock(&desc->request_mutex); - irq_setup_timings(desc, new); - wake_up_and_wait_for_irq_thread_ready(desc, new); wake_up_and_wait_for_irq_thread_ready(desc, new->secondary); @@ -1950,7 +1948,6 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) irq_release_resources(desc); chip_bus_sync_unlock(desc); - irq_remove_timings(desc); } mutex_unlock(&desc->request_mutex); diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c deleted file mode 100644 index 4b7315e99bd6..000000000000 --- a/kernel/irq/timings.c +++ /dev/null @@ -1,959 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (C) 2016, Linaro Ltd - Daniel Lezcano -#define pr_fmt(fmt) "irq_timings: " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "internals.h" - -DEFINE_STATIC_KEY_FALSE(irq_timing_enabled); - -DEFINE_PER_CPU(struct irq_timings, irq_timings); - -static DEFINE_IDR(irqt_stats); - -void irq_timings_enable(void) -{ - static_branch_enable(&irq_timing_enabled); -} - -void irq_timings_disable(void) -{ - static_branch_disable(&irq_timing_enabled); -} - -/* - * The main goal of this algorithm is to predict the next interrupt - * occurrence on the current CPU. - * - * Currently, the interrupt timings are stored in a circular array - * buffer every time there is an interrupt, as a tuple: the interrupt - * number and the associated timestamp when the event occurred . - * - * For every interrupt occurring in a short period of time, we can - * measure the elapsed time between the occurrences for the same - * interrupt and we end up with a suite of intervals. The experience - * showed the interrupts are often coming following a periodic - * pattern. - * - * The objective of the algorithm is to find out this periodic pattern - * in a fastest way and use its period to predict the next irq event. - * - * When the next interrupt event is requested, we are in the situation - * where the interrupts are disabled and the circular buffer - * containing the timings is filled with the events which happened - * after the previous next-interrupt-event request. - * - * At this point, we read the circular buffer and we fill the irq - * related statistics structure. After this step, the circular array - * containing the timings is empty because all the values are - * dispatched in their corresponding buffers. - * - * Now for each interrupt, we can predict the next event by using the - * suffix array, log interval and exponential moving average - * - * 1. Suffix array - * - * Suffix array is an array of all the suffixes of a string. It is - * widely used as a data structure for compression, text search, ... - * For instance for the word 'banana', the suffixes will be: 'banana' - * 'anana' 'nana' 'ana' 'na' 'a' - * - * Usually, the suffix array is sorted but for our purpose it is - * not necessary and won't provide any improvement in the context of - * the solved problem where we clearly define the boundaries of the - * search by a max period and min period. - * - * The suffix array will build a suite of intervals of different - * length and will look for the repetition of each suite. If the suite - * is repeating then we have the period because it is the length of - * the suite whatever its position in the buffer. - * - * 2. Log interval - * - * We saw the irq timings allow to compute the interval of the - * occurrences for a specific interrupt. We can reasonably assume the - * longer is the interval, the higher is the error for the next event - * and we can consider storing those interval values into an array - * where each slot in the array correspond to an interval at the power - * of 2 of the index. For example, index 12 will contain values - * between 2^11 and 2^12. - * - * At the end we have an array of values where at each index defines a - * [2^index - 1, 2 ^ index] interval values allowing to store a large - * number of values inside a small array. - * - * For example, if we have the value 1123, then we store it at - * ilog2(1123) = 10 index value. - * - * Storing those value at the specific index is done by computing an - * exponential moving average for this specific slot. For instance, - * for values 1800, 1123, 1453, ... fall under the same slot (10) and - * the exponential moving average is computed every time a new value - * is stored at this slot. - * - * 3. Exponential Moving Average - * - * The EMA is largely used to track a signal for stocks or as a low - * pass filter. The magic of the formula, is it is very simple and the - * reactivity of the average can be tuned with the factors called - * alpha. - * - * The higher the alphas are, the faster the average respond to the - * signal change. In our case, if a slot in the array is a big - * interval, we can have numbers with a big difference between - * them. The impact of those differences in the average computation - * can be tuned by changing the alpha value. - * - * - * -- The algorithm -- - * - * We saw the different processing above, now let's see how they are - * used together. - * - * For each interrupt: - * For each interval: - * Compute the index = ilog2(interval) - * Compute a new_ema(buffer[index], interval) - * Store the index in a circular buffer - * - * Compute the suffix array of the indexes - * - * For each suffix: - * If the suffix is reverse-found 3 times - * Return suffix - * - * Return Not found - * - * However we can not have endless suffix array to be build, it won't - * make sense and it will add an extra overhead, so we can restrict - * this to a maximum suffix length of 5 and a minimum suffix length of - * 2. The experience showed 5 is the majority of the maximum pattern - * period found for different devices. - * - * The result is a pattern finding less than 1us for an interrupt. - * - * Example based on real values: - * - * Example 1 : MMC write/read interrupt interval: - * - * 223947, 1240, 1384, 1386, 1386, - * 217416, 1236, 1384, 1386, 1387, - * 214719, 1241, 1386, 1387, 1384, - * 213696, 1234, 1384, 1386, 1388, - * 219904, 1240, 1385, 1389, 1385, - * 212240, 1240, 1386, 1386, 1386, - * 214415, 1236, 1384, 1386, 1387, - * 214276, 1234, 1384, 1388, ? - * - * For each element, apply ilog2(value) - * - * 15, 8, 8, 8, 8, - * 15, 8, 8, 8, 8, - * 15, 8, 8, 8, 8, - * 15, 8, 8, 8, 8, - * 15, 8, 8, 8, 8, - * 15, 8, 8, 8, 8, - * 15, 8, 8, 8, 8, - * 15, 8, 8, 8, ? - * - * Max period of 5, we take the last (max_period * 3) 15 elements as - * we can be confident if the pattern repeats itself three times it is - * a repeating pattern. - * - * 8, - * 15, 8, 8, 8, 8, - * 15, 8, 8, 8, 8, - * 15, 8, 8, 8, ? - * - * Suffixes are: - * - * 1) 8, 15, 8, 8, 8 <- max period - * 2) 8, 15, 8, 8 - * 3) 8, 15, 8 - * 4) 8, 15 <- min period - * - * From there we search the repeating pattern for each suffix. - * - * buffer: 8, 15, 8, 8, 8, 8, 15, 8, 8, 8, 8, 15, 8, 8, 8 - * | | | | | | | | | | | | | | | - * 8, 15, 8, 8, 8 | | | | | | | | | | - * 8, 15, 8, 8, 8 | | | | | - * 8, 15, 8, 8, 8 - * - * When moving the suffix, we found exactly 3 matches. - * - * The first suffix with period 5 is repeating. - * - * The next event is (3 * max_period) % suffix_period - * - * In this example, the result 0, so the next event is suffix[0] => 8 - * - * However, 8 is the index in the array of exponential moving average - * which was calculated on the fly when storing the values, so the - * interval is ema[8] = 1366 - * - * - * Example 2: - * - * 4, 3, 5, 100, - * 3, 3, 5, 117, - * 4, 4, 5, 112, - * 4, 3, 4, 110, - * 3, 5, 3, 117, - * 4, 4, 5, 112, - * 4, 3, 4, 110, - * 3, 4, 5, 112, - * 4, 3, 4, 110 - * - * ilog2 - * - * 0, 0, 0, 4, - * 0, 0, 0, 4, - * 0, 0, 0, 4, - * 0, 0, 0, 4, - * 0, 0, 0, 4, - * 0, 0, 0, 4, - * 0, 0, 0, 4, - * 0, 0, 0, 4, - * 0, 0, 0, 4 - * - * Max period 5: - * 0, 0, 4, - * 0, 0, 0, 4, - * 0, 0, 0, 4, - * 0, 0, 0, 4 - * - * Suffixes: - * - * 1) 0, 0, 4, 0, 0 - * 2) 0, 0, 4, 0 - * 3) 0, 0, 4 - * 4) 0, 0 - * - * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4 - * | | | | | | X - * 0, 0, 4, 0, 0, | X - * 0, 0 - * - * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4 - * | | | | | | | | | | | | | | | - * 0, 0, 4, 0, | | | | | | | | | | | - * 0, 0, 4, 0, | | | | | | | - * 0, 0, 4, 0, | | | - * 0 0 4 - * - * Pattern is found 3 times, the remaining is 1 which results from - * (max_period * 3) % suffix_period. This value is the index in the - * suffix arrays. The suffix array for a period 4 has the value 4 - * at index 1. - */ -#define EMA_ALPHA_VAL 64 -#define EMA_ALPHA_SHIFT 7 - -#define PREDICTION_PERIOD_MIN 3 -#define PREDICTION_PERIOD_MAX 5 -#define PREDICTION_FACTOR 4 -#define PREDICTION_MAX 10 /* 2 ^ PREDICTION_MAX useconds */ -#define PREDICTION_BUFFER_SIZE 16 /* slots for EMAs, hardly more than 16 */ - -/* - * Number of elements in the circular buffer: If it happens it was - * flushed before, then the number of elements could be smaller than - * IRQ_TIMINGS_SIZE, so the count is used, otherwise the array size is - * used as we wrapped. The index begins from zero when we did not - * wrap. That could be done in a nicer way with the proper circular - * array structure type but with the cost of extra computation in the - * interrupt handler hot path. We choose efficiency. - */ -#define for_each_irqts(i, irqts) \ - for (i = irqts->count < IRQ_TIMINGS_SIZE ? \ - 0 : irqts->count & IRQ_TIMINGS_MASK, \ - irqts->count = min(IRQ_TIMINGS_SIZE, \ - irqts->count); \ - irqts->count > 0; irqts->count--, \ - i = (i + 1) & IRQ_TIMINGS_MASK) - -struct irqt_stat { - u64 last_ts; - u64 ema_time[PREDICTION_BUFFER_SIZE]; - int timings[IRQ_TIMINGS_SIZE]; - int circ_timings[IRQ_TIMINGS_SIZE]; - int count; -}; - -/* - * Exponential moving average computation - */ -static u64 irq_timings_ema_new(u64 value, u64 ema_old) -{ - s64 diff; - - if (unlikely(!ema_old)) - return value; - - diff = (value - ema_old) * EMA_ALPHA_VAL; - /* - * We can use a s64 type variable to be added with the u64 - * ema_old variable as this one will never have its topmost - * bit set, it will be always smaller than 2^63 nanosec - * interrupt interval (292 years). - */ - return ema_old + (diff >> EMA_ALPHA_SHIFT); -} - -static int irq_timings_next_event_index(int *buffer, size_t len, int period_max) -{ - int period; - - /* - * Move the beginning pointer to the end minus the max period x 3. - * We are at the point we can begin searching the pattern - */ - buffer = &buffer[len - (period_max * 3)]; - - /* Adjust the length to the maximum allowed period x 3 */ - len = period_max * 3; - - /* - * The buffer contains the suite of intervals, in a ilog2 - * basis, we are looking for a repetition. We point the - * beginning of the search three times the length of the - * period beginning at the end of the buffer. We do that for - * each suffix. - */ - for (period = period_max; period >= PREDICTION_PERIOD_MIN; period--) { - - /* - * The first comparison always succeed because the - * suffix is deduced from the first n-period bytes of - * the buffer and we compare the initial suffix with - * itself, so we can skip the first iteration. - */ - int idx = period; - size_t size = period; - - /* - * We look if the suite with period 'i' repeat - * itself. If it is truncated at the end, as it - * repeats we can use the period to find out the next - * element with the modulo. - */ - while (!memcmp(buffer, &buffer[idx], size * sizeof(int))) { - - /* - * Move the index in a period basis - */ - idx += size; - - /* - * If this condition is reached, all previous - * memcmp were successful, so the period is - * found. - */ - if (idx == len) - return buffer[len % period]; - - /* - * If the remaining elements to compare are - * smaller than the period, readjust the size - * of the comparison for the last iteration. - */ - if (len - idx < period) - size = len - idx; - } - } - - return -1; -} - -static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now) -{ - int index, i, period_max, count, start, min = INT_MAX; - - if ((now - irqs->last_ts) >= NSEC_PER_SEC) { - irqs->count = irqs->last_ts = 0; - return U64_MAX; - } - - /* - * As we want to find three times the repetition, we need a - * number of intervals greater or equal to three times the - * maximum period, otherwise we truncate the max period. - */ - period_max = irqs->count > (3 * PREDICTION_PERIOD_MAX) ? - PREDICTION_PERIOD_MAX : irqs->count / 3; - - /* - * If we don't have enough irq timings for this prediction, - * just bail out. - */ - if (period_max <= PREDICTION_PERIOD_MIN) - return U64_MAX; - - /* - * 'count' will depends if the circular buffer wrapped or not - */ - count = irqs->count < IRQ_TIMINGS_SIZE ? - irqs->count : IRQ_TIMINGS_SIZE; - - start = irqs->count < IRQ_TIMINGS_SIZE ? - 0 : (irqs->count & IRQ_TIMINGS_MASK); - - /* - * Copy the content of the circular buffer into another buffer - * in order to linearize the buffer instead of dealing with - * wrapping indexes and shifted array which will be prone to - * error and extremely difficult to debug. - */ - for (i = 0; i < count; i++) { - int index = (start + i) & IRQ_TIMINGS_MASK; - - irqs->timings[i] = irqs->circ_timings[index]; - min = min_t(int, irqs->timings[i], min); - } - - index = irq_timings_next_event_index(irqs->timings, count, period_max); - if (index < 0) - return irqs->last_ts + irqs->ema_time[min]; - - return irqs->last_ts + irqs->ema_time[index]; -} - -static __always_inline int irq_timings_interval_index(u64 interval) -{ - /* - * The PREDICTION_FACTOR increase the interval size for the - * array of exponential average. - */ - u64 interval_us = (interval >> 10) / PREDICTION_FACTOR; - - return likely(interval_us) ? ilog2(interval_us) : 0; -} - -static __always_inline void __irq_timings_store(int irq, struct irqt_stat *irqs, - u64 interval) -{ - int index; - - /* - * Get the index in the ema table for this interrupt. - */ - index = irq_timings_interval_index(interval); - - if (index > PREDICTION_BUFFER_SIZE - 1) { - irqs->count = 0; - return; - } - - /* - * Store the index as an element of the pattern in another - * circular array. - */ - irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index; - - irqs->ema_time[index] = irq_timings_ema_new(interval, - irqs->ema_time[index]); - - irqs->count++; -} - -static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) -{ - u64 old_ts = irqs->last_ts; - u64 interval; - - /* - * The timestamps are absolute time values, we need to compute - * the timing interval between two interrupts. - */ - irqs->last_ts = ts; - - /* - * The interval type is u64 in order to deal with the same - * type in our computation, that prevent mindfuck issues with - * overflow, sign and division. - */ - interval = ts - old_ts; - - /* - * The interrupt triggered more than one second apart, that - * ends the sequence as predictable for our purpose. In this - * case, assume we have the beginning of a sequence and the - * timestamp is the first value. As it is impossible to - * predict anything at this point, return. - * - * Note the first timestamp of the sequence will always fall - * in this test because the old_ts is zero. That is what we - * want as we need another timestamp to compute an interval. - */ - if (interval >= NSEC_PER_SEC) { - irqs->count = 0; - return; - } - - __irq_timings_store(irq, irqs, interval); -} - -/** - * irq_timings_next_event - Return when the next event is supposed to arrive - * @now: current time - * - * During the last busy cycle, the number of interrupts is incremented - * and stored in the irq_timings structure. This information is - * necessary to: - * - * - know if the index in the table wrapped up: - * - * If more than the array size interrupts happened during the - * last busy/idle cycle, the index wrapped up and we have to - * begin with the next element in the array which is the last one - * in the sequence, otherwise it is at the index 0. - * - * - have an indication of the interrupts activity on this CPU - * (eg. irq/sec) - * - * The values are 'consumed' after inserting in the statistical model, - * thus the count is reinitialized. - * - * The array of values **must** be browsed in the time direction, the - * timestamp must increase between an element and the next one. - * - * Returns a nanosec time based estimation of the earliest interrupt, - * U64_MAX otherwise. - */ -u64 irq_timings_next_event(u64 now) -{ - struct irq_timings *irqts = this_cpu_ptr(&irq_timings); - struct irqt_stat *irqs; - struct irqt_stat __percpu *s; - u64 ts, next_evt = U64_MAX; - int i, irq = 0; - - /* - * This function must be called with the local irq disabled in - * order to prevent the timings circular buffer to be updated - * while we are reading it. - */ - lockdep_assert_irqs_disabled(); - - if (!irqts->count) - return next_evt; - - /* - * Number of elements in the circular buffer: If it happens it - * was flushed before, then the number of elements could be - * smaller than IRQ_TIMINGS_SIZE, so the count is used, - * otherwise the array size is used as we wrapped. The index - * begins from zero when we did not wrap. That could be done - * in a nicer way with the proper circular array structure - * type but with the cost of extra computation in the - * interrupt handler hot path. We choose efficiency. - * - * Inject measured irq/timestamp to the pattern prediction - * model while decrementing the counter because we consume the - * data from our circular buffer. - */ - for_each_irqts(i, irqts) { - irq = irq_timing_decode(irqts->values[i], &ts); - s = idr_find(&irqt_stats, irq); - if (s) - irq_timings_store(irq, this_cpu_ptr(s), ts); - } - - /* - * Look in the list of interrupts' statistics, the earliest - * next event. - */ - idr_for_each_entry(&irqt_stats, s, i) { - - irqs = this_cpu_ptr(s); - - ts = __irq_timings_next_event(irqs, i, now); - if (ts <= now) - return now; - - if (ts < next_evt) - next_evt = ts; - } - - return next_evt; -} - -void irq_timings_free(int irq) -{ - struct irqt_stat __percpu *s; - - s = idr_find(&irqt_stats, irq); - if (s) { - free_percpu(s); - idr_remove(&irqt_stats, irq); - } -} - -int irq_timings_alloc(int irq) -{ - struct irqt_stat __percpu *s; - int id; - - /* - * Some platforms can have the same private interrupt per cpu, - * so this function may be called several times with the - * same interrupt number. Just bail out in case the per cpu - * stat structure is already allocated. - */ - s = idr_find(&irqt_stats, irq); - if (s) - return 0; - - s = alloc_percpu(*s); - if (!s) - return -ENOMEM; - - idr_preload(GFP_KERNEL); - id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT); - idr_preload_end(); - - if (id < 0) { - free_percpu(s); - return id; - } - - return 0; -} - -#ifdef CONFIG_TEST_IRQ_TIMINGS -struct timings_intervals { - u64 *intervals; - size_t count; -}; - -/* - * Intervals are given in nanosecond base - */ -static u64 intervals0[] __initdata = { - 10000, 50000, 200000, 500000, - 10000, 50000, 200000, 500000, - 10000, 50000, 200000, 500000, - 10000, 50000, 200000, 500000, - 10000, 50000, 200000, 500000, - 10000, 50000, 200000, 500000, - 10000, 50000, 200000, 500000, - 10000, 50000, 200000, 500000, - 10000, 50000, 200000, -}; - -static u64 intervals1[] __initdata = { - 223947000, 1240000, 1384000, 1386000, 1386000, - 217416000, 1236000, 1384000, 1386000, 1387000, - 214719000, 1241000, 1386000, 1387000, 1384000, - 213696000, 1234000, 1384000, 1386000, 1388000, - 219904000, 1240000, 1385000, 1389000, 1385000, - 212240000, 1240000, 1386000, 1386000, 1386000, - 214415000, 1236000, 1384000, 1386000, 1387000, - 214276000, 1234000, -}; - -static u64 intervals2[] __initdata = { - 4000, 3000, 5000, 100000, - 3000, 3000, 5000, 117000, - 4000, 4000, 5000, 112000, - 4000, 3000, 4000, 110000, - 3000, 5000, 3000, 117000, - 4000, 4000, 5000, 112000, - 4000, 3000, 4000, 110000, - 3000, 4000, 5000, 112000, - 4000, -}; - -static u64 intervals3[] __initdata = { - 1385000, 212240000, 1240000, - 1386000, 214415000, 1236000, - 1384000, 214276000, 1234000, - 1386000, 214415000, 1236000, - 1385000, 212240000, 1240000, - 1386000, 214415000, 1236000, - 1384000, 214276000, 1234000, - 1386000, 214415000, 1236000, - 1385000, 212240000, 1240000, -}; - -static u64 intervals4[] __initdata = { - 10000, 50000, 10000, 50000, - 10000, 50000, 10000, 50000, - 10000, 50000, 10000, 50000, - 10000, 50000, 10000, 50000, - 10000, 50000, 10000, 50000, - 10000, 50000, 10000, 50000, - 10000, 50000, 10000, 50000, - 10000, 50000, 10000, 50000, - 10000, -}; - -static struct timings_intervals tis[] __initdata = { - { intervals0, ARRAY_SIZE(intervals0) }, - { intervals1, ARRAY_SIZE(intervals1) }, - { intervals2, ARRAY_SIZE(intervals2) }, - { intervals3, ARRAY_SIZE(intervals3) }, - { intervals4, ARRAY_SIZE(intervals4) }, -}; - -static int __init irq_timings_test_next_index(struct timings_intervals *ti) -{ - int _buffer[IRQ_TIMINGS_SIZE]; - int buffer[IRQ_TIMINGS_SIZE]; - int index, start, i, count, period_max; - - count = ti->count - 1; - - period_max = count > (3 * PREDICTION_PERIOD_MAX) ? - PREDICTION_PERIOD_MAX : count / 3; - - /* - * Inject all values except the last one which will be used - * to compare with the next index result. - */ - pr_debug("index suite: "); - - for (i = 0; i < count; i++) { - index = irq_timings_interval_index(ti->intervals[i]); - _buffer[i & IRQ_TIMINGS_MASK] = index; - pr_cont("%d ", index); - } - - start = count < IRQ_TIMINGS_SIZE ? 0 : - count & IRQ_TIMINGS_MASK; - - count = min_t(int, count, IRQ_TIMINGS_SIZE); - - for (i = 0; i < count; i++) { - int index = (start + i) & IRQ_TIMINGS_MASK; - buffer[i] = _buffer[index]; - } - - index = irq_timings_next_event_index(buffer, count, period_max); - i = irq_timings_interval_index(ti->intervals[ti->count - 1]); - - if (index != i) { - pr_err("Expected (%d) and computed (%d) next indexes differ\n", - i, index); - return -EINVAL; - } - - return 0; -} - -static int __init irq_timings_next_index_selftest(void) -{ - int i, ret; - - for (i = 0; i < ARRAY_SIZE(tis); i++) { - - pr_info("---> Injecting intervals number #%d (count=%zd)\n", - i, tis[i].count); - - ret = irq_timings_test_next_index(&tis[i]); - if (ret) - break; - } - - return ret; -} - -static int __init irq_timings_test_irqs(struct timings_intervals *ti) -{ - struct irqt_stat __percpu *s; - struct irqt_stat *irqs; - int i, index, ret, irq = 0xACE5; - - ret = irq_timings_alloc(irq); - if (ret) { - pr_err("Failed to allocate irq timings\n"); - return ret; - } - - s = idr_find(&irqt_stats, irq); - if (!s) { - ret = -EIDRM; - goto out; - } - - irqs = this_cpu_ptr(s); - - for (i = 0; i < ti->count; i++) { - - index = irq_timings_interval_index(ti->intervals[i]); - pr_debug("%d: interval=%llu ema_index=%d\n", - i, ti->intervals[i], index); - - __irq_timings_store(irq, irqs, ti->intervals[i]); - if (irqs->circ_timings[i & IRQ_TIMINGS_MASK] != index) { - ret = -EBADSLT; - pr_err("Failed to store in the circular buffer\n"); - goto out; - } - } - - if (irqs->count != ti->count) { - ret = -ERANGE; - pr_err("Count differs\n"); - goto out; - } - - ret = 0; -out: - irq_timings_free(irq); - - return ret; -} - -static int __init irq_timings_irqs_selftest(void) -{ - int i, ret; - - for (i = 0; i < ARRAY_SIZE(tis); i++) { - pr_info("---> Injecting intervals number #%d (count=%zd)\n", - i, tis[i].count); - ret = irq_timings_test_irqs(&tis[i]); - if (ret) - break; - } - - return ret; -} - -static int __init irq_timings_test_irqts(struct irq_timings *irqts, - unsigned count) -{ - int start = count >= IRQ_TIMINGS_SIZE ? count - IRQ_TIMINGS_SIZE : 0; - int i, irq, oirq = 0xBEEF; - u64 ots = 0xDEAD, ts; - - /* - * Fill the circular buffer by using the dedicated function. - */ - for (i = 0; i < count; i++) { - pr_debug("%d: index=%d, ts=%llX irq=%X\n", - i, i & IRQ_TIMINGS_MASK, ots + i, oirq + i); - - irq_timings_push(ots + i, oirq + i); - } - - /* - * Compute the first elements values after the index wrapped - * up or not. - */ - ots += start; - oirq += start; - - /* - * Test the circular buffer count is correct. - */ - pr_debug("---> Checking timings array count (%d) is right\n", count); - if (WARN_ON(irqts->count != count)) - return -EINVAL; - - /* - * Test the macro allowing to browse all the irqts. - */ - pr_debug("---> Checking the for_each_irqts() macro\n"); - for_each_irqts(i, irqts) { - - irq = irq_timing_decode(irqts->values[i], &ts); - - pr_debug("index=%d, ts=%llX / %llX, irq=%X / %X\n", - i, ts, ots, irq, oirq); - - if (WARN_ON(ts != ots || irq != oirq)) - return -EINVAL; - - ots++; oirq++; - } - - /* - * The circular buffer should have be flushed when browsed - * with for_each_irqts - */ - pr_debug("---> Checking timings array is empty after browsing it\n"); - if (WARN_ON(irqts->count)) - return -EINVAL; - - return 0; -} - -static int __init irq_timings_irqts_selftest(void) -{ - struct irq_timings *irqts = this_cpu_ptr(&irq_timings); - int i, ret; - - /* - * Test the circular buffer with different number of - * elements. The purpose is to test at the limits (empty, half - * full, full, wrapped with the cursor at the boundaries, - * wrapped several times, etc ... - */ - int count[] = { 0, - IRQ_TIMINGS_SIZE >> 1, - IRQ_TIMINGS_SIZE, - IRQ_TIMINGS_SIZE + (IRQ_TIMINGS_SIZE >> 1), - 2 * IRQ_TIMINGS_SIZE, - (2 * IRQ_TIMINGS_SIZE) + 3, - }; - - for (i = 0; i < ARRAY_SIZE(count); i++) { - - pr_info("---> Checking the timings with %d/%d values\n", - count[i], IRQ_TIMINGS_SIZE); - - ret = irq_timings_test_irqts(irqts, count[i]); - if (ret) - break; - } - - return ret; -} - -static int __init irq_timings_selftest(void) -{ - int ret; - - pr_info("------------------- selftest start -----------------\n"); - - /* - * At this point, we don't except any subsystem to use the irq - * timings but us, so it should not be enabled. - */ - if (static_branch_unlikely(&irq_timing_enabled)) { - pr_warn("irq timings already initialized, skipping selftest\n"); - return 0; - } - - ret = irq_timings_irqts_selftest(); - if (ret) - goto out; - - ret = irq_timings_irqs_selftest(); - if (ret) - goto out; - - ret = irq_timings_next_index_selftest(); -out: - pr_info("---------- selftest end with %s -----------\n", - ret ? "failure" : "success"); - - return ret; -} -early_initcall(irq_timings_selftest); -#endif diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ba36939fda79..78854756d416 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2551,14 +2551,6 @@ config TEST_PARMAN If unsure, say N. -config TEST_IRQ_TIMINGS - bool "IRQ timings selftest" - depends on IRQ_TIMINGS - help - Enable this option to test the irq timings code on boot. - - If unsure, say N. - config TEST_LKM tristate "Test module loading with 'hello world' module" depends on m From e9b624ea31cc957b3a7798f89c20a80a8cbb0b73 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 10 Dec 2025 08:22:38 +0000 Subject: [PATCH 02/11] genirq: Remove __request_percpu_irq() helper With the IRQ timing stuff being gone, there is no need to specify a flag when requesting a percpu interrupt. Not only IRQF_TIMER was the only flag (set of flags actually) allowed, but nobody ever passed it. Get rid of __request_percpu_irq(), which was only getting 0 as flags, and promote request_percpu_irq_affinity() as its replacement. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Reviewed-by: Jinjie Ruan Link: https://patch.msgid.link/20251210082242.360936-3-maz@kernel.org --- include/linux/interrupt.h | 18 ++++-------------- kernel/irq/manage.c | 15 +++++---------- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 44e335b17ed6..00c01b0a43be 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -181,9 +181,8 @@ request_any_context_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *name, void *dev_id); extern int __must_check -__request_percpu_irq(unsigned int irq, irq_handler_t handler, - unsigned long flags, const char *devname, - const cpumask_t *affinity, void __percpu *percpu_dev_id); +request_percpu_irq_affinity(unsigned int irq, irq_handler_t handler, const char *devname, + const cpumask_t *affinity, void __percpu *percpu_dev_id); extern int __must_check request_nmi(unsigned int irq, irq_handler_t handler, unsigned long flags, @@ -193,17 +192,8 @@ static inline int __must_check request_percpu_irq(unsigned int irq, irq_handler_t handler, const char *devname, void __percpu *percpu_dev_id) { - return __request_percpu_irq(irq, handler, 0, - devname, NULL, percpu_dev_id); -} - -static inline int __must_check -request_percpu_irq_affinity(unsigned int irq, irq_handler_t handler, - const char *devname, const cpumask_t *affinity, - void __percpu *percpu_dev_id) -{ - return __request_percpu_irq(irq, handler, 0, - devname, affinity, percpu_dev_id); + return request_percpu_irq_affinity(irq, handler, devname, + NULL, percpu_dev_id); } extern int __must_check diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 7b25ffc5c43a..4d0b32642716 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -2510,10 +2510,9 @@ struct irqaction *create_percpu_irqaction(irq_handler_t handler, unsigned long f } /** - * __request_percpu_irq - allocate a percpu interrupt line + * request_percpu_irq_affinity - allocate a percpu interrupt line * @irq: Interrupt line to allocate * @handler: Function to be called when the IRQ occurs. - * @flags: Interrupt type flags (IRQF_TIMER only) * @devname: An ascii name for the claiming device * @affinity: A cpumask describing the target CPUs for this interrupt * @dev_id: A percpu cookie passed back to the handler function @@ -2526,9 +2525,8 @@ struct irqaction *create_percpu_irqaction(irq_handler_t handler, unsigned long f * the handler gets called with the interrupted CPU's instance of * that variable. */ -int __request_percpu_irq(unsigned int irq, irq_handler_t handler, - unsigned long flags, const char *devname, - const cpumask_t *affinity, void __percpu *dev_id) +int request_percpu_irq_affinity(unsigned int irq, irq_handler_t handler, const char *devname, + const cpumask_t *affinity, void __percpu *dev_id) { struct irqaction *action; struct irq_desc *desc; @@ -2542,10 +2540,7 @@ int __request_percpu_irq(unsigned int irq, irq_handler_t handler, !irq_settings_is_per_cpu_devid(desc)) return -EINVAL; - if (flags && flags != IRQF_TIMER) - return -EINVAL; - - action = create_percpu_irqaction(handler, flags, devname, affinity, dev_id); + action = create_percpu_irqaction(handler, 0, devname, affinity, dev_id); if (!action) return -ENOMEM; @@ -2564,7 +2559,7 @@ int __request_percpu_irq(unsigned int irq, irq_handler_t handler, return retval; } -EXPORT_SYMBOL_GPL(__request_percpu_irq); +EXPORT_SYMBOL_GPL(request_percpu_irq_affinity); /** * request_percpu_nmi - allocate a percpu interrupt line for NMI delivery From a1eaca410a3cf533f8005d2959a7a8d9d8979f3e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 10 Dec 2025 08:22:39 +0000 Subject: [PATCH 03/11] MIPS: Move IP30 timer to request_percpu_irq() Teach the SGI IP30 timer about request_percpu_irq(), which ultimately will allow for the removal of the antiquated setup_percpu_irq() API. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251210082242.360936-4-maz@kernel.org --- arch/mips/include/asm/cevt-r4k.h | 1 - arch/mips/kernel/cevt-r4k.c | 11 ----------- arch/mips/sgi-ip30/ip30-timer.c | 5 ++--- 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/arch/mips/include/asm/cevt-r4k.h b/arch/mips/include/asm/cevt-r4k.h index 2e13a038d260..5229eb34f28a 100644 --- a/arch/mips/include/asm/cevt-r4k.h +++ b/arch/mips/include/asm/cevt-r4k.h @@ -23,7 +23,6 @@ void mips_event_handler(struct clock_event_device *dev); int c0_compare_int_usable(void); irqreturn_t c0_compare_interrupt(int, void *); -extern struct irqaction c0_compare_irqaction; extern int cp0_timer_irq_installed; #endif /* __ASM_CEVT_R4K_H */ diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c index 5f6e9e2ebbdb..f58325f9bd2b 100644 --- a/arch/mips/kernel/cevt-r4k.c +++ b/arch/mips/kernel/cevt-r4k.c @@ -159,17 +159,6 @@ irqreturn_t c0_compare_interrupt(int irq, void *dev_id) return IRQ_NONE; } -struct irqaction c0_compare_irqaction = { - .handler = c0_compare_interrupt, - /* - * IRQF_SHARED: The timer interrupt may be shared with other interrupts - * such as perf counter and FDC interrupts. - */ - .flags = IRQF_PERCPU | IRQF_TIMER | IRQF_SHARED, - .name = "timer", -}; - - void mips_event_handler(struct clock_event_device *dev) { } diff --git a/arch/mips/sgi-ip30/ip30-timer.c b/arch/mips/sgi-ip30/ip30-timer.c index 7652f72f0daf..294e1f7e6d8a 100644 --- a/arch/mips/sgi-ip30/ip30-timer.c +++ b/arch/mips/sgi-ip30/ip30-timer.c @@ -52,11 +52,10 @@ void __init plat_time_init(void) int irq = get_c0_compare_int(); cp0_timer_irq_installed = 1; - c0_compare_irqaction.percpu_dev_id = &mips_clockevent_device; - c0_compare_irqaction.flags &= ~IRQF_SHARED; irq_set_handler(irq, handle_percpu_devid_irq); irq_set_percpu_devid(irq); - setup_percpu_irq(irq, &c0_compare_irqaction); + WARN_ON(request_percpu_irq(irq, c0_compare_interrupt, + "timer", &mips_clockevent_device)); enable_percpu_irq(irq, IRQ_TYPE_NONE); ip30_heart_clocksource_init(); From 7f92b583382a1eb4aaafed90d181464969e41656 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 10 Dec 2025 08:22:40 +0000 Subject: [PATCH 04/11] MIPS: Move IP27 timer to request_percpu_irq() Teach the SGI IP27 timer about request_percpu_irq(), which ultimately will allow for the removal of the antiquated setup_percpu_irq() API. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251210082242.360936-5-maz@kernel.org --- arch/mips/sgi-ip27/ip27-timer.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c index 444b5e0e935f..5f4da05cb2c9 100644 --- a/arch/mips/sgi-ip27/ip27-timer.c +++ b/arch/mips/sgi-ip27/ip27-timer.c @@ -58,13 +58,6 @@ static irqreturn_t hub_rt_counter_handler(int irq, void *dev_id) return IRQ_HANDLED; } -struct irqaction hub_rt_irqaction = { - .handler = hub_rt_counter_handler, - .percpu_dev_id = &hub_rt_clockevent, - .flags = IRQF_PERCPU | IRQF_TIMER, - .name = "hub-rt", -}; - /* * This is a hack; we really need to figure these values out dynamically * @@ -103,7 +96,8 @@ static void __init hub_rt_clock_event_global_init(void) { irq_set_handler(IP27_RT_TIMER_IRQ, handle_percpu_devid_irq); irq_set_percpu_devid(IP27_RT_TIMER_IRQ); - setup_percpu_irq(IP27_RT_TIMER_IRQ, &hub_rt_irqaction); + WARN_ON(request_percpu_irq(IP27_RT_TIMER_IRQ, hub_rt_counter_handler, + "hub-rt", &hub_rt_clockevent)); } static u64 hub_rt_read(struct clocksource *cs) From bd04dae0791a8d44adc304d9787916fd4c539bb4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 10 Dec 2025 08:22:41 +0000 Subject: [PATCH 05/11] clocksource/drivers/mips-gic-timer: Move GIC timer to request_percpu_irq() Teach the MIPS GIC timer about request_percpu_irq(), which ultimately will allow for the removal of the antiquated setup_percpu_irq() API. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251210082242.360936-6-maz@kernel.org --- drivers/clocksource/mips-gic-timer.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c index abb685a080a5..1501c7db9a8e 100644 --- a/drivers/clocksource/mips-gic-timer.c +++ b/drivers/clocksource/mips-gic-timer.c @@ -77,13 +77,6 @@ static irqreturn_t gic_compare_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static struct irqaction gic_compare_irqaction = { - .handler = gic_compare_interrupt, - .percpu_dev_id = &gic_clockevent_device, - .flags = IRQF_PERCPU | IRQF_TIMER, - .name = "timer", -}; - static void gic_clockevent_cpu_init(unsigned int cpu, struct clock_event_device *cd) { @@ -152,7 +145,8 @@ static int gic_clockevent_init(void) if (!gic_frequency) return -ENXIO; - ret = setup_percpu_irq(gic_timer_irq, &gic_compare_irqaction); + ret = request_percpu_irq(gic_timer_irq, gic_compare_interrupt, + "timer", &gic_clockevent_device); if (ret < 0) { pr_err("IRQ %d setup failed (%d)\n", gic_timer_irq, ret); return ret; From dbcc728e185f8c27fcafa1408ff63fe38c7dc72d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 10 Dec 2025 08:22:42 +0000 Subject: [PATCH 06/11] genirq: Remove setup_percpu_irq() setup_percpu_irq() was always a bad kludge, and should have never been there the first place. Now that the last users are gone, remove it for good. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251210082242.360936-7-maz@kernel.org --- include/linux/irq.h | 3 --- kernel/irq/manage.c | 30 ------------------------------ 2 files changed, 33 deletions(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index 4a9f1d7b08c3..67ea759749be 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -595,9 +595,6 @@ enum { #define IRQ_DEFAULT_INIT_FLAGS ARCH_IRQ_INIT_FLAGS -struct irqaction; -extern int setup_percpu_irq(unsigned int irq, struct irqaction *new); - #ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE extern void irq_cpu_online(void); extern void irq_cpu_offline(void); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 4d0b32642716..bc2d36b6b13b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -2448,36 +2448,6 @@ void free_percpu_nmi(unsigned int irq, void __percpu *dev_id) kfree(__free_percpu_irq(irq, dev_id)); } -/** - * setup_percpu_irq - setup a per-cpu interrupt - * @irq: Interrupt line to setup - * @act: irqaction for the interrupt - * - * Used to statically setup per-cpu interrupts in the early boot process. - */ -int setup_percpu_irq(unsigned int irq, struct irqaction *act) -{ - struct irq_desc *desc = irq_to_desc(irq); - int retval; - - if (!desc || !irq_settings_is_per_cpu_devid(desc)) - return -EINVAL; - - retval = irq_chip_pm_get(&desc->irq_data); - if (retval < 0) - return retval; - - if (!act->affinity) - act->affinity = cpu_online_mask; - - retval = __setup_irq(irq, desc, act); - - if (retval) - irq_chip_pm_put(&desc->irq_data); - - return retval; -} - static struct irqaction *create_percpu_irqaction(irq_handler_t handler, unsigned long flags, const char *devname, const cpumask_t *affinity, From 90876d9b37a0db170d5998c6c903eab2d56fd7cb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 17 Dec 2025 13:46:32 +0100 Subject: [PATCH 07/11] irqdomain: Fix up const problem in irq_domain_set_name() In irq_domain_set_name() a const pointer is passed in, and then the const is "lost" when container_of() is called. Fix this up by properly preserving the const pointer attribute when container_of() is used to enforce the fact that this pointer should not have anything at it changed. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/2025121731-facing-unhitched-63ae@gregkh --- kernel/irq/irqdomain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 2652c4cfd877..094e8916bb66 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -187,7 +187,7 @@ static int irq_domain_set_name(struct irq_domain *domain, const struct irq_domai const struct fwnode_handle *fwnode = info->fwnode; if (is_fwnode_irqchip(fwnode)) { - struct irqchip_fwid *fwid = container_of(fwnode, struct irqchip_fwid, fwnode); + const struct irqchip_fwid *fwid = container_of(fwnode, struct irqchip_fwid, fwnode); /* * The name_suffix is only intended to be used to avoid a name From aef30c8d569c0f31715447525640044c74feb26f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 12 Jan 2026 14:40:13 +0100 Subject: [PATCH 08/11] genirq: Warn about using IRQF_ONESHOT without a threaded handler IRQF_ONESHOT disables the interrupt source until after the threaded handler completed its work. This is needed to allow the threaded handler to run - otherwise the CPU will get back to the interrupt handler because the interrupt source remains active and the threaded handler will not able to do its work. Specifying IRQF_ONESHOT without a threaded handler does not make sense. It could be a leftover if the handler _was_ threaded and changed back to primary and the flag was not removed. This can be problematic in the `threadirqs' case because the handler is exempt from forced-threading. This in turn can become a problem on a PREEMPT_RT system if the handler attempts to acquire sleeping locks. Warn about missing threaded handlers with the IRQF_ONESHOT flag. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Reviewed-by: Laurent Pinchart Link: https://patch.msgid.link/20260112134013.eQWyReHR@linutronix.de --- kernel/irq/manage.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index bc2d36b6b13b..dde1aa62ffe8 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1473,6 +1473,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (!(new->flags & IRQF_TRIGGER_MASK)) new->flags |= irqd_get_trigger_type(&desc->irq_data); + /* + * IRQF_ONESHOT means the interrupt source in the IRQ chip will be + * masked until the threaded handled is done. If there is no thread + * handler then it makes no sense to have IRQF_ONESHOT. + */ + WARN_ON_ONCE(new->flags & IRQF_ONESHOT && !new->thread_fn); + /* * Check whether the interrupt nests into another interrupt * thread. From fb11a2493e685d0b733c2346f5b26f2e372584fb Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Mon, 12 Jan 2026 08:32:33 +0000 Subject: [PATCH 09/11] genirq: Move clear of kstat_irqs to free_desc() desc_set_defaults() has a loop to clear the per-cpu counters kstats_irq. This is only needed in free_desc(), which is used with non-sparse IRQs so that the interrupt descriptor can be recycled. For newly allocated descriptors, the memory comes from alloc_percpu() and is already zeroed out. Move the loop to free_desc() to avoid wasting time unnecessarily. Signed-off-by: Luigi Rizzo Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20260112083234.2665832-1-lrizzo@google.com --- kernel/irq/irqdesc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index f8e4e13dbe33..c3bc00e08c58 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -115,8 +115,6 @@ static inline void free_masks(struct irq_desc *desc) { } static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, const struct cpumask *affinity, struct module *owner) { - int cpu; - desc->irq_common_data.handler_data = NULL; desc->irq_common_data.msi_desc = NULL; @@ -134,8 +132,6 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, desc->tot_count = 0; desc->name = NULL; desc->owner = owner; - for_each_possible_cpu(cpu) - *per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { }; desc_smp_init(desc, node, affinity); } @@ -621,9 +617,14 @@ EXPORT_SYMBOL(irq_to_desc); static void free_desc(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); + int cpu; scoped_guard(raw_spinlock_irqsave, &desc->lock) desc_set_defaults(irq, desc, irq_desc_get_node(desc), NULL, NULL); + + for_each_possible_cpu(cpu) + *per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { }; + delete_irq_desc(irq); } From dd9f6d30c64001ca4dde973ac04d8d155e856743 Mon Sep 17 00:00:00 2001 From: Imran Khan Date: Tue, 13 Jan 2026 22:37:27 +0800 Subject: [PATCH 10/11] genirq/cpuhotplug: Notify about affinity changes breaking the affinity mask During CPU offlining the interrupts affined to that CPU are moved to other online CPUs, which might break the original affinity mask if the outgoing CPU was the last online CPU in that mask. This change is not propagated to irq_desc::affinity_notify(), which leaves users of the affinity notifier mechanism with stale information. Avoid this by scheduling affinity change notification work for interrupts that were affined to the CPU being offlined, if the new target CPU is not part of the original affinity mask. Since irq_set_affinity_locked() uses the same logic to schedule affinity change notification work, split out this logic into a dedicated function and use that at both places. [ tglx: Removed the EXPORT(), removed the !SMP stub, moved the prototype, added a lockdep assert instead of a comment, fixed up coding style and name space. Polished and clarified the change log ] Signed-off-by: Imran Khan Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20260113143727.1041265-1-imran.f.khan@oracle.com --- kernel/irq/cpuhotplug.c | 6 ++++-- kernel/irq/internals.h | 2 +- kernel/irq/manage.c | 26 ++++++++++++++++++-------- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index 755346ea9819..cd5689e383b0 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -177,9 +177,11 @@ void irq_migrate_all_off_this_cpu(void) bool affinity_broken; desc = irq_to_desc(irq); - scoped_guard(raw_spinlock, &desc->lock) + scoped_guard(raw_spinlock, &desc->lock) { affinity_broken = migrate_one_irq(desc); - + if (affinity_broken && desc->affinity_notify) + irq_affinity_schedule_notify_work(desc); + } if (affinity_broken) { pr_debug_ratelimited("IRQ %u: no longer affine to CPU%u\n", irq, smp_processor_id()); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 202c50f0fcb2..9412e57056f5 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -135,6 +135,7 @@ extern bool irq_can_set_affinity_usr(unsigned int irq); extern int irq_do_set_affinity(struct irq_data *data, const struct cpumask *dest, bool force); +extern void irq_affinity_schedule_notify_work(struct irq_desc *desc); #ifdef CONFIG_SMP extern int irq_setup_affinity(struct irq_desc *desc); @@ -142,7 +143,6 @@ extern int irq_setup_affinity(struct irq_desc *desc); static inline int irq_setup_affinity(struct irq_desc *desc) { return 0; } #endif - #define for_each_action_of_desc(desc, act) \ for (act = desc->action; act; act = act->next) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index dde1aa62ffe8..9927e0893be6 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -347,6 +347,21 @@ static bool irq_set_affinity_deactivated(struct irq_data *data, return true; } +/** + * irq_affinity_schedule_notify_work - Schedule work to notify about affinity change + * @desc: Interrupt descriptor whose affinity changed + */ +void irq_affinity_schedule_notify_work(struct irq_desc *desc) +{ + lockdep_assert_held(&desc->lock); + + kref_get(&desc->affinity_notify->kref); + if (!schedule_work(&desc->affinity_notify->work)) { + /* Work was already scheduled, drop our extra ref */ + kref_put(&desc->affinity_notify->kref, desc->affinity_notify->release); + } +} + int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, bool force) { @@ -367,14 +382,9 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, irq_copy_pending(desc, mask); } - if (desc->affinity_notify) { - kref_get(&desc->affinity_notify->kref); - if (!schedule_work(&desc->affinity_notify->work)) { - /* Work was already scheduled, drop our extra ref */ - kref_put(&desc->affinity_notify->kref, - desc->affinity_notify->release); - } - } + if (desc->affinity_notify) + irq_affinity_schedule_notify_work(desc); + irqd_set(data, IRQD_AFFINITY_SET); return ret; From 2dfc417414c6eea4e167b2f46283cded846c531a Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 27 Jan 2026 23:49:49 +0100 Subject: [PATCH 11/11] genirq/proc: Replace snprintf with strscpy in register_handler_proc Replace snprintf("%s", ...) with the faster and more direct strscpy(). Signed-off-by: Thorsten Blum Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20260127224949.441391-2-thorsten.blum@linux.dev --- kernel/irq/proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 77258eafbf63..b0999a4f1f68 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "internals.h" @@ -317,7 +318,7 @@ void register_handler_proc(unsigned int irq, struct irqaction *action) if (!desc->dir || action->dir || !action->name || !name_unique(irq, action)) return; - snprintf(name, MAX_NAMELEN, "%s", action->name); + strscpy(name, action->name); /* create /proc/irq/1234/handler/ */ action->dir = proc_mkdir(name, desc->dir);