From 2f6d2c8d9ac05a7a1c02333f6ad30868246880d8 Mon Sep 17 00:00:00 2001 From: Ziyang Huang Date: Tue, 2 Dec 2025 23:05:56 +0800 Subject: [PATCH 001/195] Revert "mtd: spinand: esmt: fix id code for F50D1G41LB" This reverts commit dd26402642a0899fde59ea6b0852fad3d799b4cc. The issue George met is due to the limit of QPIC, not the issue of the flash chip. QPIC only supports 4 bytes ID. So the fifth byte is always 0. If we use spi-gpio, the fifth byte can be read correctly. Signed-off-by: Ziyang Huang Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/esmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/spi/esmt.c b/drivers/mtd/nand/spi/esmt.c index e60e4ac1fd6f..3e86f346f751 100644 --- a/drivers/mtd/nand/spi/esmt.c +++ b/drivers/mtd/nand/spi/esmt.c @@ -215,7 +215,7 @@ static const struct spinand_info esmt_c8_spinand_table[] = { SPINAND_FACT_OTP_INFO(2, 0, &f50l1g41lb_fact_otp_ops)), SPINAND_INFO("F50D1G41LB", SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0x11, 0x7f, - 0x7f), + 0x7f, 0x7f), NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1), NAND_ECCREQ(1, 512), SPINAND_INFO_OP_VARIANTS(&read_cache_variants, From 600559b9817f6eaa927035eebb12534fadb35ee8 Mon Sep 17 00:00:00 2001 From: Atharv Dubey Date: Mon, 1 Dec 2025 22:26:01 +0530 Subject: [PATCH 002/195] rust: rbtree: fix minor typo in comment Fix a typo in a comment to improve clarity and readability. Suggested-by: Miguel Ojeda Link: https://github.com/Rust-for-Linux/linux/issues/1206 Signed-off-by: Atharv Dubey Reviewed-by: Alice Ryhl Link: https://patch.msgid.link/20251201165601.31484-1-atharvd440@gmail.com [ Removed one of the cases that is gone now. Reworded accordingly (and to avoid mentioning 'documentation', since it is just a comment). - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/rbtree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/rbtree.rs b/rust/kernel/rbtree.rs index 4729eb56827a..6c3dbb2e6f0d 100644 --- a/rust/kernel/rbtree.rs +++ b/rust/kernel/rbtree.rs @@ -1130,7 +1130,7 @@ pub struct IterMut<'a, K, V> { } // SAFETY: The [`IterMut`] has exclusive access to both `K` and `V`, so it is sufficient to require them to be `Send`. -// The iterator only gives out immutable references to the keys, but since the iterator has excusive access to those same +// The iterator only gives out immutable references to the keys, but since the iterator has exclusive access to those same // keys, `Send` is sufficient. `Sync` would be okay, but it is more restrictive to the user. unsafe impl<'a, K: Send, V: Send> Send for IterMut<'a, K, V> {} From 45f6aed8a835ee2bdd0a5d5ee626a91fe285014f Mon Sep 17 00:00:00 2001 From: Hang Shu Date: Fri, 7 Nov 2025 09:39:17 +0000 Subject: [PATCH 003/195] rust: rbtree: fix documentation typo in CursorMut peek_next method The peek_next method's doc comment incorrectly stated it accesses the "previous" node when it actually accesses the next node. Fix the documentation to accurately reflect the method's behavior. Fixes: 98c14e40e07a ("rust: rbtree: add cursor") Reviewed-by: Alice Ryhl Signed-off-by: Hang Shu Reported-by: Miguel Ojeda Closes: https://github.com/Rust-for-Linux/linux/issues/1205 Cc: stable@vger.kernel.org Reviewed-by: Gary Guo Link: https://patch.msgid.link/20251107093921.3379954-1-m18080292938@163.com [ Reworded slightly. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/rbtree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/rbtree.rs b/rust/kernel/rbtree.rs index 6c3dbb2e6f0d..312cecab72e7 100644 --- a/rust/kernel/rbtree.rs +++ b/rust/kernel/rbtree.rs @@ -985,7 +985,7 @@ impl<'a, K, V> CursorMut<'a, K, V> { self.peek(Direction::Prev) } - /// Access the previous node without moving the cursor. + /// Access the next node without moving the cursor. pub fn peek_next(&self) -> Option<(&K, &V)> { self.peek(Direction::Next) } From 1e4e2a847f3c0bb3f34f3b20229be5c0214b80fa Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Fri, 2 Jan 2026 09:48:21 +0100 Subject: [PATCH 004/195] rust: fmt: Fix grammar in Adapter description Add a missing `and` in the description of the `Adapter`. Fixes: c5cf01ba8dfe ("rust: support formatting of foreign types") Signed-off-by: Dirk Behme Acked-by: Tamir Duberstein Link: https://patch.msgid.link/20260102084821.1077864-1-dirk.behme@de.bosch.com [ Reworded for typo. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/fmt.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/fmt.rs b/rust/kernel/fmt.rs index 84d634201d90..1e8725eb44ed 100644 --- a/rust/kernel/fmt.rs +++ b/rust/kernel/fmt.rs @@ -6,7 +6,7 @@ pub use core::fmt::{Arguments, Debug, Error, Formatter, Result, Write}; -/// Internal adapter used to route allow implementations of formatting traits for foreign types. +/// Internal adapter used to route and allow implementations of formatting traits for foreign types. /// /// It is inserted automatically by the [`fmt!`] macro and is not meant to be used directly. /// From f6b8d4b7e54ffa1492db476c299c7058603108cb Mon Sep 17 00:00:00 2001 From: Nakamura Shuta Date: Thu, 4 Dec 2025 11:43:36 +0900 Subject: [PATCH 005/195] rust: num: fix typos in Bounded documentation Fix several typos and grammatical errors in the Bounded type documentation: - "less significant bits" -> "least significant bits" - "with in" -> "within" - "withheld" -> "upheld" - "// This" -> "// This" (double space) - "doesn't fits" -> "doesn't fit" (2 occurrences) Reported-by: Miguel Ojeda Closes: https://github.com/Rust-for-Linux/linux/issues/1210 Signed-off-by: Nakamura Shuta Acked-by: Alexandre Courbot Acked-by: Yury Norov (NVIDIA) Fixes: 01e345e82ec3 ("rust: num: add Bounded integer wrapping type") Link: https://patch.msgid.link/20251204024336.246587-1-nakamura.shuta@gmail.com [ Removed Link tag due to duplicated URL. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/num/bounded.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rust/kernel/num/bounded.rs b/rust/kernel/num/bounded.rs index f870080af8ac..c3ee79ba9746 100644 --- a/rust/kernel/num/bounded.rs +++ b/rust/kernel/num/bounded.rs @@ -40,11 +40,11 @@ fn fits_within(value: T, num_bits: u32) -> bool { fits_within!(value, T, num_bits) } -/// An integer value that requires only the `N` less significant bits of the wrapped type to be +/// An integer value that requires only the `N` least significant bits of the wrapped type to be /// encoded. /// /// This limits the number of usable bits in the wrapped integer type, and thus the stored value to -/// a narrower range, which provides guarantees that can be useful when working with in e.g. +/// a narrower range, which provides guarantees that can be useful when working within e.g. /// bitfields. /// /// # Invariants @@ -56,7 +56,7 @@ fn fits_within(value: T, num_bits: u32) -> bool { /// # Examples /// /// The preferred way to create values is through constants and the [`Bounded::new`] family of -/// constructors, as they trigger a build error if the type invariants cannot be withheld. +/// constructors, as they trigger a build error if the type invariants cannot be upheld. /// /// ``` /// use kernel::num::Bounded; @@ -82,7 +82,7 @@ fn fits_within(value: T, num_bits: u32) -> bool { /// ``` /// use kernel::num::Bounded; /// -/// // This succeeds because `15` can be represented with 4 unsigned bits. +/// // This succeeds because `15` can be represented with 4 unsigned bits. /// assert!(Bounded::::try_new(15).is_some()); /// /// // This fails because `16` cannot be represented with 4 unsigned bits. @@ -221,7 +221,7 @@ fn fits_within(value: T, num_bits: u32) -> bool { /// let v: Option> = 128u32.try_into_bounded(); /// assert_eq!(v.as_deref().copied(), Some(128)); /// -/// // Fails because `128` doesn't fits into 6 bits. +/// // Fails because `128` doesn't fit into 6 bits. /// let v: Option> = 128u32.try_into_bounded(); /// assert_eq!(v, None); /// ``` @@ -501,7 +501,7 @@ where /// let v: Option> = 128u32.try_into_bounded(); /// assert_eq!(v.as_deref().copied(), Some(128)); /// -/// // Fails because `128` doesn't fits into 6 bits. +/// // Fails because `128` doesn't fit into 6 bits. /// let v: Option> = 128u32.try_into_bounded(); /// assert_eq!(v, None); /// ``` From 946c5efe6a059f7d3303442644ee38384453ff68 Mon Sep 17 00:00:00 2001 From: Gary Guo Date: Thu, 11 Dec 2025 18:22:07 +0000 Subject: [PATCH 006/195] rust: fix off-by-one line number in rustdoc tests When the `#![allow]` line was added, the doctest line number anchor isn't updated which causes the line number printed in kunit test to be off-by-one. Fixes: ab844cf32058 ("rust: allow `unreachable_pub` for doctests") Signed-off-by: Gary Guo Reviewed-by: David Gow Link: https://patch.msgid.link/20251211182208.2791025-1-gary@kernel.org Signed-off-by: Miguel Ojeda --- scripts/rustdoc_test_gen.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/rustdoc_test_gen.rs b/scripts/rustdoc_test_gen.rs index be0561049660..6fd9f5c84e2e 100644 --- a/scripts/rustdoc_test_gen.rs +++ b/scripts/rustdoc_test_gen.rs @@ -206,7 +206,7 @@ pub extern "C" fn {kunit_name}(__kunit_test: *mut ::kernel::bindings::kunit) {{ /// The anchor where the test code body starts. #[allow(unused)] - static __DOCTEST_ANCHOR: i32 = ::core::line!() as i32 + {body_offset} + 1; + static __DOCTEST_ANCHOR: i32 = ::core::line!() as i32 + {body_offset} + 2; {{ #![allow(unreachable_pub, clippy::disallowed_names)] {body} From f1db6538794f5af081940850a7976319d376110a Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Sun, 4 Jan 2026 08:00:27 -0500 Subject: [PATCH 007/195] rust: fmt: fix formatting expressions Allow usage like `pr_info!("one + 1 = {}", one + 1)` to compile by ensuring that a reference is taken to the entire expression. [ The errors we would get otherwise look like: error[E0277]: `kernel::fmt::Adapter` doesn't implement `core::fmt::Display` --> ../samples/rust/rust_minimal.rs:34:9 | 34 | pr_info!("one + 1 = {}", one + 1); | ^^^^^^^^^^^^^^^^^^^^--^^^^^^^^^^^ | | | | | required by this formatting parameter | `kernel::fmt::Adapter` cannot be formatted with the default formatter | = help: the trait `core::fmt::Display` is not implemented for `kernel::fmt::Adapter` = note: in format strings you may be able to use `{:?}` (or {:#?} for pretty-print) instead = help: the trait `core::fmt::Display` is implemented for `kernel::fmt::Adapter<&T>` = note: this error originates in the macro `$crate::print_macro` which comes from the expansion of the macro `pr_info` (in Nightly builds, run with -Z macro-backtrace for more info) - Miguel ] Fixes: c5cf01ba8dfe ("rust: support formatting of foreign types") Reported-by: Janne Grunau Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/288089-General/topic/Custom.20formatting/near/566219493 Signed-off-by: Tamir Duberstein Reviewed-by: Alice Ryhl Tested-by: Janne Grunau Reviewed-by: Janne Grunau Link: https://patch.msgid.link/20260104-fmt-paren-v1-1-6b84bc0da78f@gmail.com [ Added Signed-off-by back. Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/macros/fmt.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/macros/fmt.rs b/rust/macros/fmt.rs index 2f4b9f6e2211..8354abd54502 100644 --- a/rust/macros/fmt.rs +++ b/rust/macros/fmt.rs @@ -67,7 +67,7 @@ pub(crate) fn fmt(input: TokenStream) -> TokenStream { } (None, acc) })(); - args.extend(quote_spanned!(first_span => #lhs #adapter(&#rhs))); + args.extend(quote_spanned!(first_span => #lhs #adapter(&(#rhs)))); } }; From c18f35e4904920db4c51620ba634e4d175b24741 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 23 Dec 2025 20:35:38 +0900 Subject: [PATCH 008/195] objtool/rust: add one more `noreturn` Rust function Fix the following warning: rust/kernel.o: warning: objtool: _RNvXNtNtCs1ewLyjEZ7Le_6kernel3str9parse_intaNtNtB2_7private12FromStrRadix14from_str_radix() falls through to next function _RNvXNtNtCs1ewLyjEZ7Le_6kernel3str9parse_intaNtNtB2_7private12FromStrRadix16from_u64_negated() The commit 51d9ee90ea90 ("rust: str: add radix prefixed integer parsing functions") introduces u64::from_str_radix(), whose implementation contains a panic path for out-of-range radix values. The panic helper is core::num::from_ascii_radix_panic(). Note that radix is derived from strip_radix() here and is always within the valid range, so kernel never panics. Fixes: 51d9ee90ea90 ("rust: str: add radix prefixed integer parsing functions") Signed-off-by: FUJITA Tomonori Reviewed-by: Alice Ryhl Tested-by: Alice Ryhl Link: https://patch.msgid.link/20251223113538.1016078-1-fujita.tomonori@gmail.com [ Reworded typo. - Miguel ] Signed-off-by: Miguel Ojeda --- tools/objtool/check.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 3f7999317f4d..719ec727efd4 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -197,7 +197,8 @@ static bool is_rust_noreturn(const struct symbol *func) * as well as changes to the source code itself between versions (since * these come from the Rust standard library). */ - return str_ends_with(func->name, "_4core5sliceSp15copy_from_slice17len_mismatch_fail") || + return str_ends_with(func->name, "_4core3num22from_ascii_radix_panic") || + str_ends_with(func->name, "_4core5sliceSp15copy_from_slice17len_mismatch_fail") || str_ends_with(func->name, "_4core6option13expect_failed") || str_ends_with(func->name, "_4core6option13unwrap_failed") || str_ends_with(func->name, "_4core6result13unwrap_failed") || From 609db7e73b3ecc6a0b44dc6486e88e4bce6fd8c0 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Wed, 17 Dec 2025 17:40:50 -0500 Subject: [PATCH 009/195] rust: kbuild: Add -fdiagnostics-show-context to bindgen_skip_c_flags This got added with: 7454048db27d ("kbuild: Enable GCC diagnostic context for value-tracking warnings") but clang does not have this option, so avoid passing it to bindgen. [ Details about what the option does are in the commit above. Nathan also expands on this: Right, this does look correct, as this option is specific to GCC for the purpose of exposing more information from GCC internals to the user for understanding diagnostics better. I checked that in Compiler Explorer GCC 15.2 doesn't have it, but GCC trunk indeed has. - Miguel ] Fixes: 7454048db27d ("kbuild: Enable GCC diagnostic context for value-tracking warnings") Signed-off-by: Siddhesh Poyarekar Link: https://patch.msgid.link/20251217224050.1186896-1-siddhesh@gotplt.org [ Removed Cc: stable. Added title prefix. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/Makefile b/rust/Makefile index 5d357dce1704..4dcc2eff51cb 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -383,6 +383,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ -fno-inline-functions-called-once -fsanitize=bounds-strict \ -fstrict-flex-arrays=% -fmin-function-alignment=% \ -fzero-init-padding-bits=% -mno-fdpic \ + -fdiagnostics-show-context -fdiagnostics-show-context=% \ --param=% --param asan-% -fno-isolate-erroneous-paths-dereference # Derived from `scripts/Makefile.clang`. From 3a1ec424dd9c9491138a5ebadb24ce9f33e6a822 Mon Sep 17 00:00:00 2001 From: Hsiu Che Yu Date: Thu, 4 Dec 2025 11:38:48 +0800 Subject: [PATCH 010/195] rust: num: bounded: mark __new as unsafe The `Bounded::__new()` constructor relies on the caller to ensure the value can be represented within N bits. Failing to uphold this requirement breaks the type invariant. Mark it as unsafe and document this requirement in a Safety section to make the contract explicit. Update all call sites to use unsafe blocks and change their comments from `INVARIANT:` to `SAFETY:`, as they are now justifying unsafe operations rather than establishing type invariants. Fixes: 01e345e82ec3a ("rust: num: add Bounded integer wrapping type") Link: https://lore.kernel.org/all/aS1qC_ol2XEpZ44b@google.com/ Reported-by: Miguel Ojeda Closes: https://github.com/Rust-for-Linux/linux/issues/1211 Signed-off-by: Hsiu Che Yu Acked-by: Alexandre Courbot Link: https://patch.msgid.link/20251204033849.23480-1-yu.whisper.personal@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/num/bounded.rs | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/rust/kernel/num/bounded.rs b/rust/kernel/num/bounded.rs index c3ee79ba9746..c9a44e12c19b 100644 --- a/rust/kernel/num/bounded.rs +++ b/rust/kernel/num/bounded.rs @@ -259,9 +259,9 @@ macro_rules! impl_const_new { assert!(fits_within!(VALUE, $type, N)); } - // INVARIANT: `fits_within` confirmed that `VALUE` can be represented within + // SAFETY: `fits_within` confirmed that `VALUE` can be represented within // `N` bits. - Self::__new(VALUE) + unsafe { Self::__new(VALUE) } } } )* @@ -284,7 +284,11 @@ where /// /// The caller remains responsible for checking, either statically or dynamically, that `value` /// can be represented as a `T` using at most `N` bits. - const fn __new(value: T) -> Self { + /// + /// # Safety + /// + /// The caller must ensure that `value` can be represented within `N` bits. + const unsafe fn __new(value: T) -> Self { // Enforce the type invariants. const { // `N` cannot be zero. @@ -328,8 +332,8 @@ where /// ``` pub fn try_new(value: T) -> Option { fits_within(value, N).then(|| { - // INVARIANT: `fits_within` confirmed that `value` can be represented within `N` bits. - Self::__new(value) + // SAFETY: `fits_within` confirmed that `value` can be represented within `N` bits. + unsafe { Self::__new(value) } }) } @@ -370,8 +374,8 @@ where "Requested value larger than maximal representable value." ); - // INVARIANT: `fits_within` confirmed that `expr` can be represented within `N` bits. - Self::__new(expr) + // SAFETY: `fits_within` confirmed that `expr` can be represented within `N` bits. + unsafe { Self::__new(expr) } } /// Returns the wrapped value as the backing type. @@ -410,9 +414,9 @@ where ); } - // INVARIANT: The value did fit within `N` bits, so it will all the more fit within + // SAFETY: The value did fit within `N` bits, so it will all the more fit within // the larger `M` bits. - Bounded::__new(self.0) + unsafe { Bounded::__new(self.0) } } /// Attempts to shrink the number of bits usable for `self`. @@ -466,9 +470,9 @@ where // `U` and `T` have the same sign, hence this conversion cannot fail. let value = unsafe { U::try_from(self.get()).unwrap_unchecked() }; - // INVARIANT: Although the backing type has changed, the value is still represented within + // SAFETY: Although the backing type has changed, the value is still represented within // `N` bits, and with the same signedness. - Bounded::__new(value) + unsafe { Bounded::__new(value) } } } @@ -944,9 +948,9 @@ macro_rules! impl_from_primitive { Self: AtLeastXBits<{ <$type as Integer>::BITS as usize }>, { fn from(value: $type) -> Self { - // INVARIANT: The trait bound on `Self` guarantees that `N` bits is + // SAFETY: The trait bound on `Self` guarantees that `N` bits is // enough to hold any value of the source type. - Self::__new(T::from(value)) + unsafe { Self::__new(T::from(value)) } } } )* @@ -1051,8 +1055,8 @@ where T: Integer + From, { fn from(value: bool) -> Self { - // INVARIANT: A boolean can be represented using a single bit, and thus fits within any + // SAFETY: A boolean can be represented using a single bit, and thus fits within any // integer type for any `N` > 0. - Self::__new(T::from(value)) + unsafe { Self::__new(T::from(value)) } } } From 81a304f5b39c9a0a26c1b42997e60a5c9be05ec8 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 14 Jan 2026 18:16:38 +0000 Subject: [PATCH 011/195] rust: macros: ignore example with module parameters `ModuleParamAccess` uses `SetOnce`, which depends on the helper functions so the `macros` crate example under `rusttest` fails to build: ---- rust/macros/lib.rs - module (line 62) stdout ---- error: linking with `cc` failed: exit status: 1 | = note: "cc" "-m64" ... = note: some arguments are omitted. use `--verbose` to show all linker arguments = note: rust-lld: error: undefined symbol: rust_helper_atomic_try_cmpxchg_relaxed >>> referenced by kernel.ecd446ce39a5fcbb-cgu.3 >>> kernel.kernel.ecd446ce39a5fcbb-cgu.3.rcgu.o:(kernel::sync::set_once::SetOnce$LT$T$GT$::populate::h8b02644e30bd70bc) in archive ./rust/test/libkernel.rlib rust-lld: error: undefined symbol: rust_helper_atomic_set_release >>> referenced by kernel.ecd446ce39a5fcbb-cgu.3 >>> kernel.kernel.ecd446ce39a5fcbb-cgu.3.rcgu.o:(kernel::sync::set_once::SetOnce$LT$T$GT$::populate::h8b02644e30bd70bc) in archive ./rust/test/libkernel.rlib collect2: error: ld returned 1 exit status Thus ignore that example to fix the error. [ Only the first one is needed (the other example does not use parameters), so we can keep it enabled. Thus I removed that second deletion (and reworded a bit). We may want to do something better here later on; on the other hand, we should get KUnit tests for `macros` too eventually, so we may end up removing or repurposing that target anyway, so it is not a big deal. - Miguel ] Reported-by: Miguel Ojeda Closes: https://lore.kernel.org/rust-for-linux/CANiq72mEYacdZmHKvpbahJzO_X_qqYyiSiSTYaWEQZAfp6sbxg@mail.gmail.com/ Signed-off-by: FUJITA Tomonori Fixes: 0b24f9740f26 ("rust: module: update the module macro with module parameter support") Link: https://patch.msgid.link/20251210.082603.290476643413141778.fujita.tomonori@gmail.com Signed-off-by: Miguel Ojeda --- rust/macros/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs index b38002151871..33f66e86418a 100644 --- a/rust/macros/lib.rs +++ b/rust/macros/lib.rs @@ -59,7 +59,7 @@ use proc_macro::TokenStream; /// /// # Examples /// -/// ``` +/// ```ignore /// use kernel::prelude::*; /// /// module!{ From 5157c328edb35bac05ce77da473c3209d20e0bbb Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 23 Jul 2025 11:39:40 -0400 Subject: [PATCH 012/195] scripts: generate_rust_analyzer: Add compiler_builtins -> core dep Add a dependency edge from `compiler_builtins` to `core` to `scripts/generate_rust_analyzer.py` to match `rust/Makefile`. This has been incorrect since commit 8c4555ccc55c ("scripts: add `generate_rust_analyzer.py`") Signed-off-by: Tamir Duberstein Reviewed-by: Jesung Yang Acked-by: Benno Lossin Fixes: 8c4555ccc55c ("scripts: add `generate_rust_analyzer.py`") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250723-rust-analyzer-pin-init-v1-1-3c6956173c78@kernel.org Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index 147d0cc94068..9e92590af377 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -83,7 +83,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edit append_crate( "compiler_builtins", srctree / "rust" / "compiler_builtins.rs", - [], + ["core"], ) append_crate( From 98dcca855343512a99432224447f07c5988753ad Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 23 Jul 2025 11:39:41 -0400 Subject: [PATCH 013/195] scripts: generate_rust_analyzer: Add pin_init -> compiler_builtins dep Add a dependency edge from `pin_init` to `compiler_builtins` to `scripts/generate_rust_analyzer.py` to match `rust/Makefile`. This has been incorrect since commit d7659acca7a3 ("rust: add pin-init crate build infrastructure"). Signed-off-by: Tamir Duberstein Reviewed-by: Jesung Yang Acked-by: Benno Lossin Fixes: d7659acca7a3 ("rust: add pin-init crate build infrastructure") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250723-rust-analyzer-pin-init-v1-2-3c6956173c78@kernel.org Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index 9e92590af377..5ebabbb0f1e8 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -131,7 +131,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edit append_crate( "pin_init", srctree / "rust" / "pin-init" / "src" / "lib.rs", - ["core", "pin_init_internal", "macros"], + ["core", "compiler_builtins", "pin_init_internal", "macros"], cfg=["kernel"], ) From 74e15ac34b098934895fd27655d098971d2b43d9 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 23 Jul 2025 11:39:42 -0400 Subject: [PATCH 014/195] scripts: generate_rust_analyzer: Add pin_init_internal deps Commit d7659acca7a3 ("rust: add pin-init crate build infrastructure") did not add dependencies to `pin_init_internal`, resulting in broken navigation. Thus add them now. [ Tamir elaborates: "before this series, go-to-symbol from pin_init_internal to e.g. proc_macro::TokenStream doesn't work." - Miguel ] Signed-off-by: Tamir Duberstein Reviewed-by: Jesung Yang Acked-by: Benno Lossin Fixes: d7659acca7a3 ("rust: add pin-init crate build infrastructure") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250723-rust-analyzer-pin-init-v1-3-3c6956173c78@kernel.org Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index 5ebabbb0f1e8..6061bd6e2ebd 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -123,7 +123,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edit append_crate( "pin_init_internal", srctree / "rust" / "pin-init" / "internal" / "src" / "lib.rs", - [], + ["std", "proc_macro"], cfg=["kernel"], is_proc_macro=True, ) From cfbe371194d1f342bdd88f87a9b36407d1ec0f52 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 12 Jan 2026 15:28:05 -0800 Subject: [PATCH 015/195] KVM: SVM: Check vCPU ID against max x2AVIC ID if and only if x2AVIC is enabled When allocating the AVIC backing page, only check one of the max AVIC vs. x2AVIC ID based on whether or not x2AVIC is enabled. Doing so fixes a bug where KVM incorrectly inhibits AVIC if x2AVIC is _disabled_ and any vCPU with a non-zero APIC ID is created, as x2avic_max_physical_id is left '0' when x2AVIC is disabled. Fixes: 940fc47cfb0d ("KVM: SVM: Add AVIC support for 4k vCPUs in x2AVIC mode") Cc: stable@vger.kernel.org Cc: Naveen N Rao (AMD) Cc: Suravee Suthikulpanit Reviewed-by: Naveen N Rao (AMD) Link: https://patch.msgid.link/20260112232805.1512361-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/avic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 6b77b2033208..0f6c8596719b 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -376,6 +376,7 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb) static int avic_init_backing_page(struct kvm_vcpu *vcpu) { + u32 max_id = x2avic_enabled ? x2avic_max_physical_id : AVIC_MAX_PHYSICAL_ID; struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); struct vcpu_svm *svm = to_svm(vcpu); u32 id = vcpu->vcpu_id; @@ -388,8 +389,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) * avic_vcpu_load() expects to be called if and only if the vCPU has * fully initialized AVIC. */ - if ((!x2avic_enabled && id > AVIC_MAX_PHYSICAL_ID) || - (id > x2avic_max_physical_id)) { + if (id > max_id) { kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG); vcpu->arch.apic->apicv_active = false; return 0; From b4d37cdb77a0015f51fee083598fa227cc07aaf1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jan 2026 09:46:05 -0800 Subject: [PATCH 016/195] KVM: Don't clobber irqfd routing type when deassigning irqfd When deassigning a KVM_IRQFD, don't clobber the irqfd's copy of the IRQ's routing entry as doing so breaks kvm_arch_irq_bypass_del_producer() on x86 and arm64, which explicitly look for KVM_IRQ_ROUTING_MSI. Instead, to handle a concurrent routing update, verify that the irqfd is still active before consuming the routing information. As evidenced by the x86 and arm64 bugs, and another bug in kvm_arch_update_irqfd_routing() (see below), clobbering the entry type without notifying arch code is surprising and error prone. As a bonus, checking that the irqfd is active provides a convenient location for documenting _why_ KVM must not consume the routing entry for an irqfd that is in the process of being deassigned: once the irqfd is deleted from the list (which happens *before* the eventfd is detached), it will no longer receive updates via kvm_irq_routing_update(), and so KVM could deliver an event using stale routing information (relative to KVM_SET_GSI_ROUTING returning to userspace). As an even better bonus, explicitly checking for the irqfd being active fixes a similar bug to the one the clobbering is trying to prevent: if an irqfd is deactivated, and then its routing is changed, kvm_irq_routing_update() won't invoke kvm_arch_update_irqfd_routing() (because the irqfd isn't in the list). And so if the irqfd is in bypass mode, IRQs will continue to be posted using the old routing information. As for kvm_arch_irq_bypass_del_producer(), clobbering the routing type results in KVM incorrectly keeping the IRQ in bypass mode, which is especially problematic on AMD as KVM tracks IRQs that are being posted to a vCPU in a list whose lifetime is tied to the irqfd. Without the help of KASAN to detect use-after-free, the most common sympton on AMD is a NULL pointer deref in amd_iommu_update_ga() due to the memory for irqfd structure being re-allocated and zeroed, resulting in irqfd->irq_bypass_data being NULL when read by avic_update_iommu_vcpu_affinity(): BUG: kernel NULL pointer dereference, address: 0000000000000018 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 40cf2b9067 P4D 40cf2b9067 PUD 408362a067 PMD 0 Oops: Oops: 0000 [#1] SMP CPU: 6 UID: 0 PID: 40383 Comm: vfio_irq_test Tainted: G U W O 6.19.0-smp--5dddc257e6b2-irqfd #31 NONE Tainted: [U]=USER, [W]=WARN, [O]=OOT_MODULE Hardware name: Google, Inc. Arcadia_IT_80/Arcadia_IT_80, BIOS 34.78.2-0 09/05/2025 RIP: 0010:amd_iommu_update_ga+0x19/0xe0 Call Trace: avic_update_iommu_vcpu_affinity+0x3d/0x90 [kvm_amd] __avic_vcpu_load+0xf4/0x130 [kvm_amd] kvm_arch_vcpu_load+0x89/0x210 [kvm] vcpu_load+0x30/0x40 [kvm] kvm_arch_vcpu_ioctl_run+0x45/0x620 [kvm] kvm_vcpu_ioctl+0x571/0x6a0 [kvm] __se_sys_ioctl+0x6d/0xb0 do_syscall_64+0x6f/0x9d0 entry_SYSCALL_64_after_hwframe+0x4b/0x53 RIP: 0033:0x46893b ---[ end trace 0000000000000000 ]--- If AVIC is inhibited when the irfd is deassigned, the bug will manifest as list corruption, e.g. on the next irqfd assignment. list_add corruption. next->prev should be prev (ffff8d474d5cd588), but was 0000000000000000. (next=ffff8d8658f86530). ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:31! Oops: invalid opcode: 0000 [#1] SMP CPU: 128 UID: 0 PID: 80818 Comm: vfio_irq_test Tainted: G U W O 6.19.0-smp--f19dc4d680ba-irqfd #28 NONE Tainted: [U]=USER, [W]=WARN, [O]=OOT_MODULE Hardware name: Google, Inc. Arcadia_IT_80/Arcadia_IT_80, BIOS 34.78.2-0 09/05/2025 RIP: 0010:__list_add_valid_or_report+0x97/0xc0 Call Trace: avic_pi_update_irte+0x28e/0x2b0 [kvm_amd] kvm_pi_update_irte+0xbf/0x190 [kvm] kvm_arch_irq_bypass_add_producer+0x72/0x90 [kvm] irq_bypass_register_consumer+0xcd/0x170 [irqbypass] kvm_irqfd+0x4c6/0x540 [kvm] kvm_vm_ioctl+0x118/0x5d0 [kvm] __se_sys_ioctl+0x6d/0xb0 do_syscall_64+0x6f/0x9d0 entry_SYSCALL_64_after_hwframe+0x4b/0x53 ---[ end trace 0000000000000000 ]--- On Intel and arm64, the bug is less noisy, as the end result is that the device keeps posting IRQs to the vCPU even after it's been deassigned. Note, the worst of the breakage can be traced back to commit cb210737675e ("KVM: Pass new routing entries and irqfd when updating IRTEs"), as before that commit KVM would pull the routing information from the per-VM routing table. But as above, similar bugs have existed since support for IRQ bypass was added. E.g. if a routing change finished before irq_shutdown() invoked kvm_arch_irq_bypass_del_producer(), VMX and SVM would see stale routing information and potentially leave the irqfd in bypass mode. Alternatively, x86 could be fixed by explicitly checking irq_bypass_vcpu instead of irq_entry.type in kvm_arch_irq_bypass_del_producer(), and arm64 could be modified to utilize irq_bypass_vcpu in a similar manner. But (a) that wouldn't fix the routing updates bug, and (b) fixing core code doesn't preclude x86 (or arm64) from adding such code as a sanity check (spoiler alert). Fixes: f70c20aaf141 ("KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'") Fixes: cb210737675e ("KVM: Pass new routing entries and irqfd when updating IRTEs") Fixes: a0d7e2fc61ab ("KVM: arm64: vgic-v4: Only attempt vLPI mapping for actual MSIs") Cc: stable@vger.kernel.org Cc: Marc Zyngier Cc: Oliver Upton Link: https://patch.msgid.link/20260113174606.104978-2-seanjc@google.com Signed-off-by: Sean Christopherson --- virt/kvm/eventfd.c | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 0e8b5277be3b..a369b20d47f0 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -157,21 +157,28 @@ irqfd_shutdown(struct work_struct *work) } -/* assumes kvm->irqfds.lock is held */ -static bool -irqfd_is_active(struct kvm_kernel_irqfd *irqfd) +static bool irqfd_is_active(struct kvm_kernel_irqfd *irqfd) { + /* + * Assert that either irqfds.lock or SRCU is held, as irqfds.lock must + * be held to prevent false positives (on the irqfd being active), and + * while false negatives are impossible as irqfds are never added back + * to the list once they're deactivated, the caller must at least hold + * SRCU to guard against routing changes if the irqfd is deactivated. + */ + lockdep_assert_once(lockdep_is_held(&irqfd->kvm->irqfds.lock) || + srcu_read_lock_held(&irqfd->kvm->irq_srcu)); + return list_empty(&irqfd->list) ? false : true; } /* * Mark the irqfd as inactive and schedule it for removal - * - * assumes kvm->irqfds.lock is held */ -static void -irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) +static void irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) { + lockdep_assert_held(&irqfd->kvm->irqfds.lock); + BUG_ON(!irqfd_is_active(irqfd)); list_del_init(&irqfd->list); @@ -217,8 +224,15 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) seq = read_seqcount_begin(&irqfd->irq_entry_sc); irq = irqfd->irq_entry; } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); - /* An event has been signaled, inject an interrupt */ - if (kvm_arch_set_irq_inatomic(&irq, kvm, + + /* + * An event has been signaled, inject an interrupt unless the + * irqfd is being deassigned (isn't active), in which case the + * routing information may be stale (once the irqfd is removed + * from the list, it will stop receiving routing updates). + */ + if (unlikely(!irqfd_is_active(irqfd)) || + kvm_arch_set_irq_inatomic(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false) == -EWOULDBLOCK) schedule_work(&irqfd->inject); @@ -585,18 +599,8 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) spin_lock_irq(&kvm->irqfds.lock); list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { - if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) { - /* - * This clearing of irq_entry.type is needed for when - * another thread calls kvm_irq_routing_update before - * we flush workqueue below (we synchronize with - * kvm_irq_routing_update using irqfds.lock). - */ - write_seqcount_begin(&irqfd->irq_entry_sc); - irqfd->irq_entry.type = 0; - write_seqcount_end(&irqfd->irq_entry_sc); + if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) irqfd_deactivate(irqfd); - } } spin_unlock_irq(&kvm->irqfds.lock); From ef3719e33e6649164382c629d58704b828f56079 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jan 2026 09:46:06 -0800 Subject: [PATCH 017/195] KVM: x86: Assert that non-MSI doesn't have bypass vCPU when deleting producer When disconnecting a non-MSI irqfd from an IRQ bypass producer, WARN if the irqfd is configured for IRQ bypass and set its IRTE back to remapped mode to harden against kernel/KVM bugs (keeping the irqfd in bypass mode is often fatal to the host). Deactivating an irqfd (removing it from the list of irqfds), updating irqfd routes, and the code in question are all mutually exclusive (all run under irqfds.lock). If an irqfd is configured for bypass, and the irqfd is deassigned at the same time IRQ routing is updated (to change the routing to non-MSI), then either kvm_arch_update_irqfd_routing() should process the irqfd routing change and put the IRTE into remapped mode (routing update "wins"), or kvm_arch_irq_bypass_del_producer() should see the MSI routing info (deactivation "wins"). Link: https://patch.msgid.link/20260113174606.104978-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/irq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 7cc8950005b6..4c7688670c2d 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -514,7 +514,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, */ spin_lock_irq(&kvm->irqfds.lock); - if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) { + if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI || + WARN_ON_ONCE(irqfd->irq_bypass_vcpu)) { ret = kvm_pi_update_irte(irqfd, NULL); if (ret) pr_info("irq bypass consumer (eventfd %p) unregistration fails: %d\n", From af20ae33e7dd949f2e770198e74ac8f058cb299d Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Thu, 15 Jan 2026 19:38:32 +0100 Subject: [PATCH 018/195] rust: kbuild: give `--config-path` to `rustfmt` in `.rsi` target `rustfmt` is configured via the `.rustfmt.toml` file in the source tree, and we apply `rustfmt` to the macro expanded sources generated by the `.rsi` target. However, under an `O=` pointing to an external folder (i.e. not just a subdir), `rustfmt` will not find the file when checking the parent folders. Since the edition is configured in this file, this can lead to errors when it encounters newer syntax, e.g. error: expected one of `!`, `.`, `::`, `;`, `?`, `where`, `{`, or an operator, found `"rust_minimal"` --> samples/rust/rust_minimal.rsi:29:49 | 28 | impl ::kernel::ModuleMetadata for RustMinimal { | - while parsing this item list starting here 29 | const NAME: &'static ::kernel::str::CStr = c"rust_minimal"; | ^^^^^^^^^^^^^^ expected one of 8 possible tokens 30 | } | - the item list ends here | = note: you may be trying to write a c-string literal = note: c-string literals require Rust 2021 or later = help: pass `--edition 2024` to `rustc` = note: for more on editions, read https://doc.rust-lang.org/edition-guide A workaround is to use `RUSTFMT=n`, which is documented in the `Makefile` help for cases where macro expanded source may happen to break `rustfmt` for other reasons, but this is not one of those cases. One solution would be to pass `--edition`, but we want `rustfmt` to use the entire configuration, even if currently we essentially use the default configuration. Thus explicitly give the path to the config file to `rustfmt` instead. Reported-by: Alice Ryhl Fixes: 2f7ab1267dc9 ("Kbuild: add Rust support") Cc: stable@vger.kernel.org Reviewed-by: Nathan Chancellor Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260115183832.46595-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- scripts/Makefile.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 5037f4715d74..0c838c467c76 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -356,7 +356,7 @@ $(obj)/%.o: $(obj)/%.rs FORCE quiet_cmd_rustc_rsi_rs = $(RUSTC_OR_CLIPPY_QUIET) $(quiet_modtag) $@ cmd_rustc_rsi_rs = \ $(rust_common_cmd) -Zunpretty=expanded $< >$@; \ - command -v $(RUSTFMT) >/dev/null && $(RUSTFMT) $@ + command -v $(RUSTFMT) >/dev/null && $(RUSTFMT) --config-path $(srctree)/.rustfmt.toml $@ $(obj)/%.rsi: $(obj)/%.rs FORCE +$(call if_changed_dep,rustc_rsi_rs) From 1b83ef9f7ad4635c913b80ef5e718f95f48e85af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=96zkan?= Date: Wed, 24 Dec 2025 16:53:43 +0300 Subject: [PATCH 019/195] scripts: generate_rust_analyzer: remove sysroot assertion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With nixpkgs's rustc, rust-src component is not bundled with the compiler by default and is instead provided from a separate store path, so this assumption does not hold. The assertion assumes these paths are in the same location which causes `make LLVM=1 rust-analyzer` to fail on NixOS. Link: https://rust-for-linux.zulipchat.com/#narrow/stream/x/topic/x/near/565284250 Signed-off-by: Onur Özkan Reviewed-by: Gary Guo Fixes: fe992163575b ("rust: Support latest version of `rust-analyzer`") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20251224135343.32476-1-work@onurozkan.dev [ Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index 6061bd6e2ebd..14c74dfee703 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -213,9 +213,6 @@ def main(): level=logging.INFO if args.verbose else logging.WARNING ) - # Making sure that the `sysroot` and `sysroot_src` belong to the same toolchain. - assert args.sysroot in args.sysroot_src.parents - rust_project = { "crates": generate_crates(args.srctree, args.objtree, args.sysroot_src, args.exttree, args.cfgs, args.core_edition), "sysroot": str(args.sysroot), From 87417cc95b0f1096cc27011ec750d9f988a63e83 Mon Sep 17 00:00:00 2001 From: Jesung Yang Date: Tue, 25 Nov 2025 09:32:59 +0000 Subject: [PATCH 020/195] scripts: generate_rust_analyzer: syn: treat `std` as a dependency Fix the `generate_rust_analyzer.py` script to ensure that the `rust-project.json` it produces includes `std` in the `deps` field for the `syn` crate. `syn` directly references items from `std`, so rust-analyzer should treat it as a dependency to provide correct IDE support. For example, `syn::Punctuated` contains fields of type `Vec<..>` and `Option<..>`, both of which come from the standard library prelude. With `std` listed in the `deps` field, rust-analyzer can infer the types of these fields instead of showing `{unknown}`. Verified the explicit uses of `std` using: grep -rn 'std::' rust/syn/ Fixes: 737401751ace ("rust: syn: enable support in kbuild") Signed-off-by: Jesung Yang Reviewed-by: Tamir Duberstein Tested-by: Tamir Duberstein Link: https://patch.msgid.link/6dbdf6e1c1639ae381ca9ab7041f84728ffa2267.1764062688.git.y.j3ms.n@gmail.com [ Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index 14c74dfee703..aec6e9f449ab 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -103,7 +103,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edit append_crate( "syn", srctree / "rust" / "syn" / "lib.rs", - ["proc_macro", "proc_macro2", "quote"], + ["std", "proc_macro", "proc_macro2", "quote"], cfg=crates_cfgs["syn"], ) From 3a50257e560043bf8968f807af65f32c98d7dbf9 Mon Sep 17 00:00:00 2001 From: Jesung Yang Date: Tue, 25 Nov 2025 09:33:00 +0000 Subject: [PATCH 021/195] scripts: generate_rust_analyzer: quote: treat `core` and `std` as dependencies Fix the `generate_rust_analyzer.py` script to ensure that the `rust-project.json` it produces includes `core` and `std` in the `deps` field for the `quote` crate. `quote` directly references items from both `core` and `std`, so rust-analyzer should treat them as dependencies to provide correct IDE support. For example, the `::quote::ToTokens` trait is implemented for `std::ffi::CString`. With `std` listed in the `deps` field, rust-analyzer can show the expected autocomplete for the `::quote::ToTokens` methods on `std::ffi::CString`. Verified the explicit uses of `core` and `std` using: grep -rnE 'core::|std::' rust/quote/ Fixes: 88de91cc1ce7 ("rust: quote: enable support in kbuild") Signed-off-by: Jesung Yang Reviewed-by: Tamir Duberstein Link: https://patch.msgid.link/cef76fc1105481d219953c8552eb5eb07dac707a.1764062688.git.y.j3ms.n@gmail.com [ Reworded title. - Miguel ] Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index aec6e9f449ab..3f5d2d4ce5de 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -96,7 +96,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edit append_crate( "quote", srctree / "rust" / "quote" / "lib.rs", - ["alloc", "proc_macro", "proc_macro2"], + ["core", "alloc", "std", "proc_macro", "proc_macro2"], cfg=crates_cfgs["quote"], ) From bc83834c157676e7cca62b876b5e3da1bee06285 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Thu, 15 Jan 2026 11:45:59 -0500 Subject: [PATCH 022/195] scripts: generate_rust_analyzer: compile quote with correct edition Our copy of the quote crate uses edition 2018, thus generate the correct rust-analyzer configuration for it. Fixes: 88de91cc1ce7 ("rust: quote: enable support in kbuild") Signed-off-by: Tamir Duberstein Reviewed-by: Jesung Yang Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260115-rust-analyzer-quote-edition-v1-1-d492f880dde4@gmail.com Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index 3f5d2d4ce5de..20e6adaefb5a 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -98,6 +98,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edit srctree / "rust" / "quote" / "lib.rs", ["core", "alloc", "std", "proc_macro", "proc_macro2"], cfg=crates_cfgs["quote"], + edition="2018", ) append_crate( From ac3c50b9a24e9ebeb585679078d6c47922034bb6 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 16 Jan 2026 15:46:04 -0500 Subject: [PATCH 023/195] scripts: generate_rust_analyzer: compile sysroot with correct edition Use `core_edition` for all sysroot crates rather than just core as all were updated to edition 2024 in Rust 1.87. Fixes: f4daa80d6be7 ("rust: compile libcore with edition 2024 for 1.87+") Signed-off-by: Tamir Duberstein Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260116-rust-analyzer-sysroot-v2-1-094aedc33208@kernel.org [ Added `>`s to make the quote a single block. - Miguel ] Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index 20e6adaefb5a..3b645da90092 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -61,7 +61,6 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edit display_name, deps, cfg=[], - edition="2021", ): append_crate( display_name, @@ -69,13 +68,37 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edit deps, cfg, is_workspace_member=False, - edition=edition, + # Miguel Ojeda writes: + # + # > ... in principle even the sysroot crates may have different + # > editions. + # > + # > For instance, in the move to 2024, it seems all happened at once + # > in 1.87.0 in these upstream commits: + # > + # > 0e071c2c6a58 ("Migrate core to Rust 2024") + # > f505d4e8e380 ("Migrate alloc to Rust 2024") + # > 0b2489c226c3 ("Migrate proc_macro to Rust 2024") + # > 993359e70112 ("Migrate std to Rust 2024") + # > + # > But in the previous move to 2021, `std` moved in 1.59.0, while + # > the others in 1.60.0: + # > + # > b656384d8398 ("Update stdlib to the 2021 edition") + # > 06a1c14d52a8 ("Switch all libraries to the 2021 edition") + # + # Link: https://lore.kernel.org/all/CANiq72kd9bHdKaAm=8xCUhSHMy2csyVed69bOc4dXyFAW4sfuw@mail.gmail.com/ + # + # At the time of writing all rust versions we support build the + # sysroot crates with the same edition. We may need to relax this + # assumption if future edition moves span multiple rust versions. + edition=core_edition, ) # NB: sysroot crates reexport items from one another so setting up our transitive dependencies # here is important for ensuring that rust-analyzer can resolve symbols. The sources of truth # for this dependency graph are `(sysroot_src / crate / "Cargo.toml" for crate in crates)`. - append_sysroot_crate("core", [], cfg=crates_cfgs.get("core", []), edition=core_edition) + append_sysroot_crate("core", [], cfg=crates_cfgs.get("core", [])) append_sysroot_crate("alloc", ["core"]) append_sysroot_crate("std", ["alloc", "core"]) append_sysroot_crate("proc_macro", ["core", "std"]) From 09c3c9112d71c44146419c87c55c710e68335741 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 8 Dec 2025 11:47:02 +0900 Subject: [PATCH 024/195] rust: bits: always inline functions using build_assert with arguments `build_assert` relies on the compiler to optimize out its error path. Functions using it with its arguments must thus always be inlined, otherwise the error path of `build_assert` might not be optimized out, triggering a build error. Cc: stable@vger.kernel.org Fixes: cc84ef3b88f4 ("rust: bits: add support for bits/genmask macros") Reviewed-by: Daniel Almeida Signed-off-by: Alexandre Courbot Link: https://patch.msgid.link/20251208-io-build-assert-v3-4-98aded02c1ea@nvidia.com Signed-off-by: Miguel Ojeda --- rust/kernel/bits.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rust/kernel/bits.rs b/rust/kernel/bits.rs index 553d50265883..2daead125626 100644 --- a/rust/kernel/bits.rs +++ b/rust/kernel/bits.rs @@ -27,7 +27,8 @@ macro_rules! impl_bit_fn { /// /// This version is the default and should be used if `n` is known at /// compile time. - #[inline] + // Always inline to optimize out error path of `build_assert`. + #[inline(always)] pub const fn [](n: u32) -> $ty { build_assert!(n < <$ty>::BITS); (1 as $ty) << n @@ -75,7 +76,8 @@ macro_rules! impl_genmask_fn { /// This version is the default and should be used if the range is known /// at compile time. $(#[$genmask_ex])* - #[inline] + // Always inline to optimize out error path of `build_assert`. + #[inline(always)] pub const fn [](range: RangeInclusive) -> $ty { let start = *range.start(); let end = *range.end(); From d6ff6e870077ae0f01a6f860ca1e4a5a825dc032 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 8 Dec 2025 11:47:03 +0900 Subject: [PATCH 025/195] rust: sync: refcount: always inline functions using build_assert with arguments `build_assert` relies on the compiler to optimize out its error path. Functions using it with its arguments must thus always be inlined, otherwise the error path of `build_assert` might not be optimized out, triggering a build error. Cc: stable@vger.kernel.org Fixes: bb38f35b35f9 ("rust: implement `kernel::sync::Refcount`") Reviewed-by: Daniel Almeida Signed-off-by: Alexandre Courbot Acked-by: Boqun Feng Link: https://patch.msgid.link/20251208-io-build-assert-v3-5-98aded02c1ea@nvidia.com Signed-off-by: Miguel Ojeda --- rust/kernel/sync/refcount.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rust/kernel/sync/refcount.rs b/rust/kernel/sync/refcount.rs index 19236a5bccde..6c7ae8b05a0b 100644 --- a/rust/kernel/sync/refcount.rs +++ b/rust/kernel/sync/refcount.rs @@ -23,7 +23,8 @@ impl Refcount { /// Construct a new [`Refcount`] from an initial value. /// /// The initial value should be non-saturated. - #[inline] + // Always inline to optimize out error path of `build_assert`. + #[inline(always)] pub fn new(value: i32) -> Self { build_assert!(value >= 0, "initial value saturated"); // SAFETY: There are no safety requirements for this FFI call. From 2af6ad09fc7dfe9b3610100983cccf16998bf34d Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 8 Dec 2025 11:47:05 +0900 Subject: [PATCH 026/195] rust: num: bounded: add missing comment for always inlined function This code is always inlined to avoid a build error if the error path of `build_assert` cannot be optimized out. Add a comment justifying the `#[inline(always)]` property to avoid it being taken away by mistake. Reviewed-by: Daniel Almeida Signed-off-by: Alexandre Courbot Link: https://patch.msgid.link/20251208-io-build-assert-v3-7-98aded02c1ea@nvidia.com Signed-off-by: Miguel Ojeda --- rust/kernel/num/bounded.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/kernel/num/bounded.rs b/rust/kernel/num/bounded.rs index c9a44e12c19b..5ef8361cf5d5 100644 --- a/rust/kernel/num/bounded.rs +++ b/rust/kernel/num/bounded.rs @@ -367,6 +367,7 @@ where /// assert_eq!(Bounded::::from_expr(1).get(), 1); /// assert_eq!(Bounded::::from_expr(0xff).get(), 0xff); /// ``` + // Always inline to optimize out error path of `build_assert`. #[inline(always)] pub fn from_expr(expr: T) -> Self { crate::build_assert!( From 4b22ec1685ce1fc0d862dcda3225d852fb107995 Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Sat, 17 Jan 2026 16:00:45 +0000 Subject: [PATCH 027/195] efivarfs: fix error propagation in efivar_entry_get() efivar_entry_get() always returns success even if the underlying __efivar_entry_get() fails, masking errors. This may result in uninitialized heap memory being copied to userspace in the efivarfs_file_read() path. Fix it by returning the error from __efivar_entry_get(). Fixes: 2d82e6227ea1 ("efi: vars: Move efivar caching layer into efivarfs") Cc: # v6.1+ Signed-off-by: Kohei Enju Signed-off-by: Ard Biesheuvel --- fs/efivarfs/vars.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/efivarfs/vars.c b/fs/efivarfs/vars.c index 6edc10958ecf..70e13db260db 100644 --- a/fs/efivarfs/vars.c +++ b/fs/efivarfs/vars.c @@ -552,7 +552,7 @@ int efivar_entry_get(struct efivar_entry *entry, u32 *attributes, err = __efivar_entry_get(entry, attributes, size, data); efivar_unlock(); - return 0; + return err; } /** From 6dd0fdc908c02318c28ec2c0979661846ee0a9f7 Mon Sep 17 00:00:00 2001 From: Ricardo Rivera-Matos Date: Thu, 15 Jan 2026 19:25:10 +0000 Subject: [PATCH 028/195] ASoC: cs35l45: Corrects ASP_TX5 DAPM widget channel ASP_TX5 was incorrectly mapped to a channel value of 3 corrects, the channel value of 4. Reviewed-by: Charles Keepax Signed-off-by: Ricardo Rivera-Matos Link: https://patch.msgid.link/20260115192523.1335742-2-rriveram@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l45.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs35l45.c b/sound/soc/codecs/cs35l45.c index e33f11435980..7aa558d6362f 100644 --- a/sound/soc/codecs/cs35l45.c +++ b/sound/soc/codecs/cs35l45.c @@ -453,7 +453,7 @@ static const struct snd_soc_dapm_widget cs35l45_dapm_widgets[] = { SND_SOC_DAPM_AIF_OUT("ASP_TX2", NULL, 1, CS35L45_ASP_ENABLES1, CS35L45_ASP_TX2_EN_SHIFT, 0), SND_SOC_DAPM_AIF_OUT("ASP_TX3", NULL, 2, CS35L45_ASP_ENABLES1, CS35L45_ASP_TX3_EN_SHIFT, 0), SND_SOC_DAPM_AIF_OUT("ASP_TX4", NULL, 3, CS35L45_ASP_ENABLES1, CS35L45_ASP_TX4_EN_SHIFT, 0), - SND_SOC_DAPM_AIF_OUT("ASP_TX5", NULL, 3, CS35L45_ASP_ENABLES1, CS35L45_ASP_TX5_EN_SHIFT, 0), + SND_SOC_DAPM_AIF_OUT("ASP_TX5", NULL, 4, CS35L45_ASP_ENABLES1, CS35L45_ASP_TX5_EN_SHIFT, 0), SND_SOC_DAPM_MUX("ASP_TX1 Source", SND_SOC_NOPM, 0, 0, &cs35l45_asp_muxes[0]), SND_SOC_DAPM_MUX("ASP_TX2 Source", SND_SOC_NOPM, 0, 0, &cs35l45_asp_muxes[1]), From 2e48020fd7ced9e9953c55b57a5cb608e64deee0 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 15 Jan 2026 14:14:18 +0800 Subject: [PATCH 029/195] ASoC: dt-bindings: fsl,sai: Add support for i.MX952 platform Add a new compatible string 'fsl,imx952-sai' for i.MX952 platform, which is fallback compatible with 'fsl,imx95-sai'. Signed-off-by: Shengjiu Wang Acked-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20260115061418.4131432-1-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/fsl,sai.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/sound/fsl,sai.yaml b/Documentation/devicetree/bindings/sound/fsl,sai.yaml index 0d733e5b08a4..d838ee0b61cb 100644 --- a/Documentation/devicetree/bindings/sound/fsl,sai.yaml +++ b/Documentation/devicetree/bindings/sound/fsl,sai.yaml @@ -44,6 +44,7 @@ properties: - items: - enum: - fsl,imx94-sai + - fsl,imx952-sai - const: fsl,imx95-sai reg: From 9210f5ff6318163835d9e42ee68006be4da0f531 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sun, 18 Jan 2026 17:50:30 -0300 Subject: [PATCH 030/195] ASoC: fsl: imx-card: Do not force slot width to sample width imx-card currently sets the slot width to the physical sample width for I2S links. This breaks controllers that use fixed-width slots (e.g. 32-bit FIFO words), causing the unused bits in the slot to contain undefined data when playing 16-bit streams. Do not override the slot width in the machine driver and let the CPU DAI select an appropriate default instead. This matches the behavior of simple-audio-card and avoids embedding controller-specific policy in the machine driver. On an i.MX8MP-based board using SAI as the I2S master with 32-bit slots, playing 16-bit audio resulted in spurious frequencies and an incorrect SAI data waveform, as the slot width was forced to 16 bits. After this change, audio artifacts are eliminated and the 16-bit samples correctly occupy the first half of the 32-bit slot, with the remaining bits padded with zeroes. Cc: stable@vger.kernel.org Fixes: aa736700f42f ("ASoC: imx-card: Add imx-card machine driver") Signed-off-by: Fabio Estevam Acked-by: Shengjiu Wang Link: https://patch.msgid.link/20260118205030.1532696-1-festevam@gmail.com Signed-off-by: Mark Brown --- sound/soc/fsl/imx-card.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/fsl/imx-card.c b/sound/soc/fsl/imx-card.c index 28699d7b75ca..05b4e971a366 100644 --- a/sound/soc/fsl/imx-card.c +++ b/sound/soc/fsl/imx-card.c @@ -346,7 +346,6 @@ static int imx_aif_hw_params(struct snd_pcm_substream *substream, SND_SOC_DAIFMT_PDM; } else { slots = 2; - slot_width = params_physical_width(params); fmt = (rtd->dai_link->dai_fmt & ~SND_SOC_DAIFMT_FORMAT_MASK) | SND_SOC_DAIFMT_I2S; } From 018b211b1d321a52ed8d8de74ce83ce52a2e1224 Mon Sep 17 00:00:00 2001 From: Anatolii Shirykalov Date: Mon, 19 Jan 2026 15:56:18 +0100 Subject: [PATCH 031/195] ASoC: amd: yc: Add ASUS ExpertBook PM1503CDA to quirks list Add ASUS ExpertBook PM1503CDA to the DMI quirks table to enable internal DMIC support via the ACP6x machine driver. Signed-off-by: Anatolii Shirykalov Link: https://patch.msgid.link/20260119145618.3171435-1-pipocavsobake@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 0294177acc66..f801ce4741ff 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -542,6 +542,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "15NBC1011"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ASUS EXPERTBOOK PM1503CDA"), + } + }, { .driver_data = &acp6x_card, .matches = { From 0fcee2cfc4b2e16e62ff8e0cc2cd8dd24efad65e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 21 Jan 2026 17:38:54 +0800 Subject: [PATCH 032/195] nvmet: fix race in nvmet_bio_done() leading to NULL pointer dereference There is a race condition in nvmet_bio_done() that can cause a NULL pointer dereference in blk_cgroup_bio_start(): 1. nvmet_bio_done() is called when a bio completes 2. nvmet_req_complete() is called, which invokes req->ops->queue_response(req) 3. The queue_response callback can re-queue and re-submit the same request 4. The re-submission reuses the same inline_bio from nvmet_req 5. Meanwhile, nvmet_req_bio_put() (called after nvmet_req_complete) invokes bio_uninit() for inline_bio, which sets bio->bi_blkg to NULL 6. The re-submitted bio enters submit_bio_noacct_nocheck() 7. blk_cgroup_bio_start() dereferences bio->bi_blkg, causing a crash: BUG: kernel NULL pointer dereference, address: 0000000000000028 #PF: supervisor read access in kernel mode RIP: 0010:blk_cgroup_bio_start+0x10/0xd0 Call Trace: submit_bio_noacct_nocheck+0x44/0x250 nvmet_bdev_execute_rw+0x254/0x370 [nvmet] process_one_work+0x193/0x3c0 worker_thread+0x281/0x3a0 Fix this by reordering nvmet_bio_done() to call nvmet_req_bio_put() BEFORE nvmet_req_complete(). This ensures the bio is cleaned up before the request can be re-submitted, preventing the race condition. Fixes: 190f4c2c863a ("nvmet: fix memory leak of bio integrity") Cc: Dmitry Bogdanov Cc: stable@vger.kernel.org Cc: Guangwu Zhang Link: http://www.mail-archive.com/debian-kernel@lists.debian.org/msg146238.html Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Keith Busch --- drivers/nvme/target/io-cmd-bdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 8d246b8ca604..0103815542d4 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -180,9 +180,10 @@ u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts) static void nvmet_bio_done(struct bio *bio) { struct nvmet_req *req = bio->bi_private; + blk_status_t blk_status = bio->bi_status; - nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status)); nvmet_req_bio_put(req, bio); + nvmet_req_complete(req, blk_to_nvme_status(req, blk_status)); } #ifdef CONFIG_BLK_DEV_INTEGRITY From 731bb3118f859d2a68444a9ae580681522d32bc0 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 22 Jan 2026 16:35:56 -0800 Subject: [PATCH 033/195] Revert "PCI/TSM: Report active IDE streams" The proposed ABI failed to account for multiple host bridges with the same stream name. The fix needs to namespace streams or otherwise link back to the host bridge, but a change like that is too big for a fix. Given this ABI never saw a released kernel, delete it for now and bring it back later with this issue addressed. Reported-by: Xu Yilun Reported-by: Yi Lai Closes: http://lore.kernel.org/20251223085601.2607455-1-yilun.xu@linux.intel.com Link: http://patch.msgid.link/6972c872acbb9_1d3310035@dwillia2-mobl4.notmuch Signed-off-by: Dan Williams --- Documentation/ABI/testing/sysfs-class-tsm | 10 -------- drivers/pci/ide.c | 4 ---- drivers/virt/coco/tsm-core.c | 28 ----------------------- include/linux/pci-ide.h | 2 -- include/linux/tsm.h | 3 --- 5 files changed, 47 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-tsm b/Documentation/ABI/testing/sysfs-class-tsm index 6fc1a5ac6da1..2949468deaf7 100644 --- a/Documentation/ABI/testing/sysfs-class-tsm +++ b/Documentation/ABI/testing/sysfs-class-tsm @@ -7,13 +7,3 @@ Description: signals when the PCI layer is able to support establishment of link encryption and other device-security features coordinated through a platform tsm. - -What: /sys/class/tsm/tsmN/streamH.R.E -Contact: linux-pci@vger.kernel.org -Description: - (RO) When a host bridge has established a secure connection via - the platform TSM, symlink appears. The primary function of this - is have a system global review of TSM resource consumption - across host bridges. The link points to the endpoint PCI device - and matches the same link published by the host bridge. See - Documentation/ABI/testing/sysfs-devices-pci-host-bridge. diff --git a/drivers/pci/ide.c b/drivers/pci/ide.c index f0ef474e1a0d..280941b05969 100644 --- a/drivers/pci/ide.c +++ b/drivers/pci/ide.c @@ -11,7 +11,6 @@ #include #include #include -#include #include "pci.h" @@ -373,9 +372,6 @@ void pci_ide_stream_release(struct pci_ide *ide) if (ide->partner[PCI_IDE_EP].enable) pci_ide_stream_disable(pdev, ide); - if (ide->tsm_dev) - tsm_ide_stream_unregister(ide); - if (ide->partner[PCI_IDE_RP].setup) pci_ide_stream_teardown(rp, ide); diff --git a/drivers/virt/coco/tsm-core.c b/drivers/virt/coco/tsm-core.c index f027876a2f19..0e705f3067a1 100644 --- a/drivers/virt/coco/tsm-core.c +++ b/drivers/virt/coco/tsm-core.c @@ -4,13 +4,11 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include #include #include #include #include #include -#include static struct class *tsm_class; static DECLARE_RWSEM(tsm_rwsem); @@ -108,32 +106,6 @@ void tsm_unregister(struct tsm_dev *tsm_dev) } EXPORT_SYMBOL_GPL(tsm_unregister); -/* must be invoked between tsm_register / tsm_unregister */ -int tsm_ide_stream_register(struct pci_ide *ide) -{ - struct pci_dev *pdev = ide->pdev; - struct pci_tsm *tsm = pdev->tsm; - struct tsm_dev *tsm_dev = tsm->tsm_dev; - int rc; - - rc = sysfs_create_link(&tsm_dev->dev.kobj, &pdev->dev.kobj, ide->name); - if (rc) - return rc; - - ide->tsm_dev = tsm_dev; - return 0; -} -EXPORT_SYMBOL_GPL(tsm_ide_stream_register); - -void tsm_ide_stream_unregister(struct pci_ide *ide) -{ - struct tsm_dev *tsm_dev = ide->tsm_dev; - - ide->tsm_dev = NULL; - sysfs_remove_link(&tsm_dev->dev.kobj, ide->name); -} -EXPORT_SYMBOL_GPL(tsm_ide_stream_unregister); - static void tsm_release(struct device *dev) { struct tsm_dev *tsm_dev = container_of(dev, typeof(*tsm_dev), dev); diff --git a/include/linux/pci-ide.h b/include/linux/pci-ide.h index 37a1ad9501b0..5d4d56ed088d 100644 --- a/include/linux/pci-ide.h +++ b/include/linux/pci-ide.h @@ -82,7 +82,6 @@ struct pci_ide_regs { * @host_bridge_stream: allocated from host bridge @ide_stream_ida pool * @stream_id: unique Stream ID (within Partner Port pairing) * @name: name of the established Selective IDE Stream in sysfs - * @tsm_dev: For TSM established IDE, the TSM device context * * Negative @stream_id values indicate "uninitialized" on the * expectation that with TSM established IDE the TSM owns the stream_id @@ -94,7 +93,6 @@ struct pci_ide { u8 host_bridge_stream; int stream_id; const char *name; - struct tsm_dev *tsm_dev; }; /* diff --git a/include/linux/tsm.h b/include/linux/tsm.h index a3b7ab668eff..22e05b2aac69 100644 --- a/include/linux/tsm.h +++ b/include/linux/tsm.h @@ -123,7 +123,4 @@ int tsm_report_unregister(const struct tsm_report_ops *ops); struct tsm_dev *tsm_register(struct device *parent, struct pci_tsm_ops *ops); void tsm_unregister(struct tsm_dev *tsm_dev); struct tsm_dev *find_tsm_dev(int id); -struct pci_ide; -int tsm_ide_stream_register(struct pci_ide *ide); -void tsm_ide_stream_unregister(struct pci_ide *ide); #endif /* __TSM_H */ From 8370af2019dee9ca004ca7c5e36b1f629ecb1e39 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Wed, 14 Jan 2026 19:14:55 +0800 Subject: [PATCH 034/195] PCI/IDE: Fix off by one error calculating VF RID range The VF ID range of an SR-IOV device is [0, num_VFs - 1]. pci_ide_stream_alloc() mistakenly uses num_VFs to represent the last ID. Fix that off by one error to stay in bounds of the range. Fixes: 1e4d2ff3ae45 ("PCI/IDE: Add IDE establishment helpers") Signed-off-by: Li Ming Reviewed-by: Xu Yilun Link: https://patch.msgid.link/20260114111455.550984-1-ming.li@zohomail.com Signed-off-by: Dan Williams --- drivers/pci/ide.c | 4 ++-- include/linux/pci-ide.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pci/ide.c b/drivers/pci/ide.c index 280941b05969..fcceb518c64e 100644 --- a/drivers/pci/ide.c +++ b/drivers/pci/ide.c @@ -282,8 +282,8 @@ struct pci_ide *pci_ide_stream_alloc(struct pci_dev *pdev) /* for SR-IOV case, cover all VFs */ num_vf = pci_num_vf(pdev); if (num_vf) - rid_end = PCI_DEVID(pci_iov_virtfn_bus(pdev, num_vf), - pci_iov_virtfn_devfn(pdev, num_vf)); + rid_end = PCI_DEVID(pci_iov_virtfn_bus(pdev, num_vf - 1), + pci_iov_virtfn_devfn(pdev, num_vf - 1)); else rid_end = pci_dev_id(pdev); diff --git a/include/linux/pci-ide.h b/include/linux/pci-ide.h index 5d4d56ed088d..ae07d9f699c0 100644 --- a/include/linux/pci-ide.h +++ b/include/linux/pci-ide.h @@ -26,7 +26,7 @@ enum pci_ide_partner_select { /** * struct pci_ide_partner - Per port pair Selective IDE Stream settings * @rid_start: Partner Port Requester ID range start - * @rid_end: Partner Port Requester ID range end + * @rid_end: Partner Port Requester ID range end (inclusive) * @stream_index: Selective IDE Stream Register Block selection * @mem_assoc: PCI bus memory address association for targeting peer partner * @pref_assoc: PCI bus prefetchable memory address association for From 0b50f116af5e29724b756a5ee6ae268deaae2d29 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Sun, 11 Jan 2026 15:38:23 +0800 Subject: [PATCH 035/195] PCI/IDE: Fix reading a wrong reg for unused sel stream initialization During pci_ide_init(), it will write PCI_ID_RESERVED_STREAM_ID into all unused selective IDE stream blocks. In a selective IDE stream block, IDE stream ID field is in selective IDE stream control register instead of selective IDE stream capability register. Fixes: 079115370d00 ("PCI/IDE: Initialize an ID for all IDE streams") Signed-off-by: Li Ming Acked-by: Bjorn Helgaas Reviewed-by: Xu Yilun Link: https://patch.msgid.link/20260111073823.486665-1-ming.li@zohomail.com Signed-off-by: Dan Williams --- drivers/pci/ide.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/ide.c b/drivers/pci/ide.c index fcceb518c64e..23f554490539 100644 --- a/drivers/pci/ide.c +++ b/drivers/pci/ide.c @@ -167,7 +167,7 @@ void pci_ide_init(struct pci_dev *pdev) for (u16 i = 0; i < nr_streams; i++) { int pos = __sel_ide_offset(ide_cap, nr_link_ide, i, nr_ide_mem); - pci_read_config_dword(pdev, pos + PCI_IDE_SEL_CAP, &val); + pci_read_config_dword(pdev, pos + PCI_IDE_SEL_CTL, &val); if (val & PCI_IDE_SEL_CTL_EN) continue; val &= ~PCI_IDE_SEL_CTL_ID; From 4c4e62321a41747390bc167f9c581408f6e0d322 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Thu, 22 Jan 2026 06:41:35 +0100 Subject: [PATCH 036/195] rust: proc-macro2: rebuild if the version text changes The Rust compiler cannot use dependencies built by other versions, e.g.: error[E0514]: found crate `proc_macro2` compiled by an incompatible version of rustc --> rust/quote/ext.rs:5:5 | 5 | use proc_macro2::{TokenStream, TokenTree}; | ^^^^^^^^^^^ | = note: the following crate versions were found: crate `proc_macro2` compiled by rustc 1.92.0 (ded5c06cf 2025-12-08): ./rust/libproc_macro2.rlib = help: please recompile that crate using this compiler (rustc 1.93.0 (254b59607 2026-01-19)) (consider running `cargo clean` first) Thus trigger a rebuild if the version text changes like we do in other top-level cases (e.g. see commit aeb0e24abbeb ("kbuild: rust: replace proc macros dependency on `core.o` with the version text")). The build errors for now are hard to trigger, since we do not yet use the new crates we just introduced (the use cases are coming in the next merge window), but they can still be seen if e.g. one manually removes one of the targets, so fix it already. Fixes: 158a3b72118a ("rust: proc-macro2: enable support in kbuild") Reviewed-by: Alice Ryhl Link: https://patch.msgid.link/20260122054135.138445-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/proc-macro2/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rust/proc-macro2/lib.rs b/rust/proc-macro2/lib.rs index 7b78d065d51c..5d408943fa0d 100644 --- a/rust/proc-macro2/lib.rs +++ b/rust/proc-macro2/lib.rs @@ -1,5 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT +// When fixdep scans this, it will find this string `CONFIG_RUSTC_VERSION_TEXT` +// and thus add a dependency on `include/config/RUSTC_VERSION_TEXT`, which is +// touched by Kconfig when the version string from the compiler changes. + //! [![github]](https://github.com/dtolnay/proc-macro2) [![crates-io]](https://crates.io/crates/proc-macro2) [![docs-rs]](crate) //! //! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github From 630fbc6e870eb06c5126cc97a3abecbe012272c8 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 23 Jan 2026 15:21:36 +0800 Subject: [PATCH 037/195] ALSA: hda/realtek - fixed speaker no sound If it play a 5s above silence media stream, it will cause silence detection trigger. Speaker will make no sound when you use another app to play a stream. Add this patch will solve this issue. GPIO2: Mute Hotkey GPIO3: Mic Mute LED Enable this will turn on hotkey and LED support. Signed-off-by: Kailang Yang Link: https://lore.kernel.org/f4929e137a7949238cc043d861a4d9f8@realtek.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index cafa48b5aceb..770b21ad55c4 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -3383,11 +3383,22 @@ static void alc287_alc1318_playback_pcm_hook(struct hda_pcm_stream *hinfo, struct snd_pcm_substream *substream, int action) { + static const struct coef_fw dis_coefs[] = { + WRITE_COEF(0x24, 0x0013), WRITE_COEF(0x25, 0x0000), WRITE_COEF(0x26, 0xC203), + WRITE_COEF(0x28, 0x0004), WRITE_COEF(0x29, 0xb023), + }; /* Disable AMP silence detection */ + static const struct coef_fw en_coefs[] = { + WRITE_COEF(0x24, 0x0013), WRITE_COEF(0x25, 0x0000), WRITE_COEF(0x26, 0xC203), + WRITE_COEF(0x28, 0x0084), WRITE_COEF(0x29, 0xb023), + }; /* Enable AMP silence detection */ + switch (action) { case HDA_GEN_PCM_ACT_OPEN: + alc_process_coef_fw(codec, dis_coefs); alc_write_coefex_idx(codec, 0x5a, 0x00, 0x954f); /* write gpio3 to high */ break; case HDA_GEN_PCM_ACT_CLOSE: + alc_process_coef_fw(codec, en_coefs); alc_write_coefex_idx(codec, 0x5a, 0x00, 0x554f); /* write gpio3 as default value */ break; } From e396a74222654486d6ab45dca5d0c54c408b8b91 Mon Sep 17 00:00:00 2001 From: Zhiquan Li Date: Thu, 22 Jan 2026 13:35:50 +0800 Subject: [PATCH 038/195] KVM: selftests: Add -U_FORTIFY_SOURCE to avoid some unpredictable test failures Some distributions (such as Ubuntu) configure GCC so that _FORTIFY_SOURCE is automatically enabled at -O1 or above. This results in some fortified version of definitions of standard library functions are included. While linker resolves the symbols, the fortified versions might override the definitions in lib/string_override.c and reference to those PLT entries in GLIBC. This is not a problem for the code in host, but it is a disaster for the guest code. E.g., if build and run x86/nested_emulation_test on Ubuntu 24.04 will encounter a L1 #PF due to memset() reference to __memset_chk@plt. The option -fno-builtin-memset is not helpful here, because those fortified versions are not built-in but some definitions which are included by header, they are for different intentions. In order to eliminate the unpredictable behaviors may vary depending on the linker and platform, add the "-U_FORTIFY_SOURCE" into CFLAGS to prevent from introducing the fortified definitions. Signed-off-by: Zhiquan Li Link: https://patch.msgid.link/20260122053551.548229-1-zhiquan_li@163.com Fixes: 6b6f71484bf4 ("KVM: selftests: Implement memcmp(), memcpy(), and memset() for guest use") Cc: stable@vger.kernel.org [sean: tag for stable] Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/Makefile.kvm | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index ba5c2b643efa..d45bf4ccb3bf 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -251,6 +251,7 @@ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ + -U_FORTIFY_SOURCE \ -fno-builtin-memcmp -fno-builtin-memcpy \ -fno-builtin-memset -fno-builtin-strnlen \ -fno-stack-protector -fno-PIE -fno-strict-aliasing \ From 894148a25aebeaeb7ec397fdf1a1f1958d55496d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 23 Jan 2026 13:09:00 -0800 Subject: [PATCH 039/195] coco/tsm: Remove unused variable tsm_rwsem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This variable is and was never used, remove it. Fixes: 603c646f0010 ("coco/tsm: Introduce a core device for TEE Security Managers") Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20260120-coco-tsm_rwsem-v1-1-125059fe2f69@linutronix.de Signed-off-by: Dan Williams --- drivers/virt/coco/tsm-core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/virt/coco/tsm-core.c b/drivers/virt/coco/tsm-core.c index 0e705f3067a1..8712df8596a1 100644 --- a/drivers/virt/coco/tsm-core.c +++ b/drivers/virt/coco/tsm-core.c @@ -4,14 +4,12 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include #include #include #include #include static struct class *tsm_class; -static DECLARE_RWSEM(tsm_rwsem); static DEFINE_IDA(tsm_ida); static int match_id(struct device *dev, const void *data) From a44bfed9df8a514962e2cb076d9c0b594caeff36 Mon Sep 17 00:00:00 2001 From: Chen Miao Date: Fri, 31 Oct 2025 02:32:39 +0000 Subject: [PATCH 040/195] kbuild: rust: clean libpin_init_internal in mrproper When I enabled Rust compilation, I wanted to clean up its output, so I used make mrproper. However, I was still able to find that libpin_init_internal.so in the rust directory was not deleted, while all other corresponding outputs were cleared. Thus add it to the `MRPROPER_FILES` list. Reviewed-by: Dongliang Mu Signed-off-by: Chen Miao Fixes: d7659acca7a3 ("rust: add pin-init crate build infrastructure") Cc: stable@vger.kernel.org Acked-by: Nicolas Schier Acked-by: Benno Lossin Link: https://patch.msgid.link/71ff222b8731e63e06059c5d8566434e508baf2b.1761876365.git.chenmiao@openatom.club [ Fixed tags and Git author as discussed. Reworded slightly. - Miguel ] Signed-off-by: Miguel Ojeda --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 665b79aa21b8..768e5aff1780 100644 --- a/Makefile +++ b/Makefile @@ -1624,7 +1624,8 @@ MRPROPER_FILES += include/config include/generated \ certs/x509.genkey \ vmlinux-gdb.py \ rpmbuild \ - rust/libmacros.so rust/libmacros.dylib + rust/libmacros.so rust/libmacros.dylib \ + rust/libpin_init_internal.so rust/libpin_init_internal.dylib # clean - Delete most, but leave enough to build external modules # From dedb897f11c5d7e32c0e0a0eff7cec23a8047167 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sun, 21 Dec 2025 17:45:52 +0100 Subject: [PATCH 041/195] drm/msm/a6xx: fix bogus hwcg register updates The hw clock gating register sequence consists of register value pairs that are written to the GPU during initialisation. The a690 hwcg sequence has two GMU registers in it that used to amount to random writes in the GPU mapping, but since commit 188db3d7fe66 ("drm/msm/a6xx: Rebase GMU register offsets") they trigger a fault as the updated offsets now lie outside the mapping. This in turn breaks boot of machines like the Lenovo ThinkPad X13s. Note that the updates of these GMU registers is already taken care of properly since commit 40c297eb245b ("drm/msm/a6xx: Set GMU CGC properties on a6xx too"), but for some reason these two entries were left in the table. Fixes: 5e7665b5e484 ("drm/msm/adreno: Add Adreno A690 support") Cc: stable@vger.kernel.org # 6.5 Cc: Bjorn Andersson Cc: Konrad Dybcio Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Reviewed-by: Akhil P Oommen Fixes: 188db3d7fe66 ("drm/msm/a6xx: Rebase GMU register offsets") Patchwork: https://patchwork.freedesktop.org/patch/695778/ Message-ID: <20251221164552.19990-1-johan@kernel.org> Signed-off-by: Rob Clark (cherry picked from commit dcbd2f8280eea2c965453ed8c3c69d6f121e950b) --- drivers/gpu/drm/msm/adreno/a6xx_catalog.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c index ac9a95aab2fb..4c042133261c 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c @@ -501,8 +501,6 @@ static const struct adreno_reglist a690_hwcg[] = { {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, - {REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 0x10111}, - {REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 0x5555}, {} }; From 56bd3c0f749f45793d1eae1d0ddde4255c749bf6 Mon Sep 17 00:00:00 2001 From: Thomas Fourier Date: Mon, 12 Jan 2026 14:43:24 +0100 Subject: [PATCH 042/195] scsi: qla2xxx: edif: Fix dma_free_coherent() size Earlier in the function, the ha->flt buffer is allocated with size sizeof(struct qla_flt_header) + FLT_REGIONS_SIZE but freed in the error path with size SFP_DEV_SIZE. Fixes: 84318a9f01ce ("scsi: qla2xxx: edif: Add send, receive, and accept for auth_els") Cc: stable@vger.kernel.org Signed-off-by: Thomas Fourier Link: https://patch.msgid.link/20260112134326.55466-2-fourier.thomas@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 16a44c0917e1..e939bc88e151 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -4489,7 +4489,7 @@ fail_lsrjt: fail_elsrej: dma_pool_destroy(ha->purex_dma_pool); fail_flt: - dma_free_coherent(&ha->pdev->dev, SFP_DEV_SIZE, + dma_free_coherent(&ha->pdev->dev, sizeof(struct qla_flt_header) + FLT_REGIONS_SIZE, ha->flt, ha->flt_dma); fail_flt_buffer: From 4747bafaa50115d9667ece446b1d2d4aba83dc7f Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Sat, 13 Dec 2025 16:36:43 +0800 Subject: [PATCH 043/195] scsi: be2iscsi: Fix a memory leak in beiscsi_boot_get_sinfo() If nonemb_cmd->va fails to be allocated, free the allocation previously made by alloc_mcc_wrb(). Fixes: 50a4b824be9e ("scsi: be2iscsi: Fix to make boot discovery non-blocking") Cc: stable@vger.kernel.org Signed-off-by: Haoxiang Li Link: https://patch.msgid.link/20251213083643.301240-1-lihaoxiang@isrc.iscas.ac.cn Signed-off-by: Martin K. Petersen --- drivers/scsi/be2iscsi/be_mgmt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/be2iscsi/be_mgmt.c b/drivers/scsi/be2iscsi/be_mgmt.c index 4e899ec1477d..b1cba986f0fb 100644 --- a/drivers/scsi/be2iscsi/be_mgmt.c +++ b/drivers/scsi/be2iscsi/be_mgmt.c @@ -1025,6 +1025,7 @@ unsigned int beiscsi_boot_get_sinfo(struct beiscsi_hba *phba) &nonemb_cmd->dma, GFP_KERNEL); if (!nonemb_cmd->va) { + free_mcc_wrb(ctrl, tag); mutex_unlock(&ctrl->mbox_lock); return 0; } From b2d6b1d443009ed4da2d69f5423ab38e5780505a Mon Sep 17 00:00:00 2001 From: Kery Qi Date: Wed, 21 Jan 2026 19:45:15 +0800 Subject: [PATCH 044/195] scsi: firewire: sbp-target: Fix overflow in sbp_make_tpg() The code in sbp_make_tpg() limits "tpgt" to UINT_MAX but the data type of "tpg->tport_tpgt" is u16. This causes a type truncation issue. When a user creates a TPG via configfs mkdir, for example: mkdir /sys/kernel/config/target/sbp//tpgt_70000 The value 70000 passes the "tpgt > UINT_MAX" check since 70000 is far less than 4294967295. However, when assigned to the u16 field tpg->tport_tpgt, the value is silently truncated to 4464 (70000 & 0xFFFF). This causes the value the user specified to differ from what is actually stored, leading to confusion and potential unexpected behavior. Fix this by changing the type of "tpgt" to u16 and using kstrtou16() which will properly reject values outside the u16 range. Fixes: a511ce339780 ("sbp-target: Initial merge of firewire/ieee-1394 target mode support") Signed-off-by: Kery Qi Link: https://patch.msgid.link/20260121114515.1829-2-qikeyu2017@gmail.com Signed-off-by: Martin K. Petersen --- drivers/target/sbp/sbp_target.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 9f167ff8da7b..09120a538a40 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -1960,12 +1960,12 @@ static struct se_portal_group *sbp_make_tpg(struct se_wwn *wwn, container_of(wwn, struct sbp_tport, tport_wwn); struct sbp_tpg *tpg; - unsigned long tpgt; + u16 tpgt; int ret; if (strstr(name, "tpgt_") != name) return ERR_PTR(-EINVAL); - if (kstrtoul(name + 5, 10, &tpgt) || tpgt > UINT_MAX) + if (kstrtou16(name + 5, 10, &tpgt)) return ERR_PTR(-EINVAL); if (tport->tpg) { From 0444568edbf87c1da76b61c798ce0f1c1e478467 Mon Sep 17 00:00:00 2001 From: Ajay Neeli Date: Wed, 24 Dec 2025 11:09:50 +0530 Subject: [PATCH 045/195] scsi: ufs: amd-versal2: Fix PHY initialization in HCE enable notify Move the PHY initialization from PRE_CHANGE to POST_CHANGE in the ufs_versal2_hce_enable_notify() callback. This ensures that the PHY is initialized after the host controller enable sequence is complete, rather than before it starts. The PHY initialization requires the UFS host controller to be in a stable enabled state to properly configure the MPHY registers. Moving this to POST_CHANGE aligns with the expected initialization order and prevents potential timing issues during controller startup. Fixes: 769b8b2ffded ("scsi: ufs: amd-versal2: Add UFS support for AMD Versal Gen 2 SoC") Signed-off-by: Ajay Neeli Link: https://patch.msgid.link/20251224053950.54213-1-ajay.neeli@amd.com Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-amd-versal2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-amd-versal2.c b/drivers/ufs/host/ufs-amd-versal2.c index 40543db621a1..6c454ae8a9c8 100644 --- a/drivers/ufs/host/ufs-amd-versal2.c +++ b/drivers/ufs/host/ufs-amd-versal2.c @@ -367,7 +367,7 @@ static int ufs_versal2_hce_enable_notify(struct ufs_hba *hba, { int ret = 0; - if (status == PRE_CHANGE) { + if (status == POST_CHANGE) { ret = ufs_versal2_phy_init(hba); if (ret) dev_err(hba->dev, "Phy init failed (%d)\n", ret); From 1aaedafb21f38cb872d44f7608b4828a1e14e795 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Fri, 23 Jan 2026 23:12:24 +0100 Subject: [PATCH 046/195] ALSA: hda/realtek: Really fix headset mic for TongFang X6AR55xU. Add a PCI quirk to enable microphone detection on the headphone jack of TongFang X6AR55xU devices. The former quirk entry did not acomplish this and is removed. Fixes: b48fe9af1e60 ("ALSA: hda/realtek: Fix headset mic for TongFang X6AR55xU") Signed-off-by: Tim Guttzeit Signed-off-by: Werner Sembach Link: https://patch.msgid.link/20260123221233.28273-1-wse@tuxedocomputers.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 770b21ad55c4..4b19d26d9822 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7347,6 +7347,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d05, 0x1409, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1d05, 0x300f, "TongFang X6AR5xxY", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1d05, 0x3019, "TongFang X6FR5xxY", ALC2XX_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1d05, 0x3031, "TongFang X6AR55xU", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1d17, 0x3288, "Haier Boyue G42", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), @@ -7816,10 +7817,6 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60140}, {0x19, 0x04a11030}, {0x21, 0x04211020}), - SND_HDA_PIN_QUIRK(0x10ec0274, 0x1d05, "TongFang", ALC274_FIXUP_HP_HEADSET_MIC, - {0x17, 0x90170110}, - {0x19, 0x03a11030}, - {0x21, 0x03211020}), SND_HDA_PIN_QUIRK(0x10ec0282, 0x1025, "Acer", ALC282_FIXUP_ACER_DISABLE_LINEOUT, ALC282_STANDARD_PINS, {0x12, 0x90a609c0}, From bd36f6e2abf7f85644f7ea8deb1de4040b03bbc1 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 24 Jan 2026 00:34:32 +0100 Subject: [PATCH 047/195] rust: sync: atomic: Provide stub for `rusttest` 32-bit hosts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For arm32, on a x86_64 builder, running the `rusttest` target yields: error[E0080]: evaluation of constant value failed --> rust/kernel/static_assert.rs:37:23 | 37 | const _: () = ::core::assert!($condition $(,$arg)?); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ the evaluated program panicked at 'assertion failed: size_of::() == size_of::()', rust/kernel/sync/atomic/predefine.rs:68:1 | ::: rust/kernel/sync/atomic/predefine.rs:68:1 | 68 | static_assert!(size_of::() == size_of::()); | -------------------------------------------------------------------- in this macro invocation | = note: this error originates in the macro `::core::assert` which comes from the expansion of the macro `static_assert` (in Nightly builds, run with -Z macro-backtrace for more info) The reason is that `rusttest` runs on the host, so for e.g. a x86_64 builder `isize` is 64 bits but it is not a `CONFIG_64BIT` build. Fix it by providing a stub for `rusttest` as usual. Fixes: 84c6d36bcaf9 ("rust: sync: atomic: Add Atomic<{usize,isize}>") Cc: stable@vger.kernel.org Reviewed-by: Onur Özkan Acked-by: Boqun Feng Link: https://patch.msgid.link/20260123233432.22703-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/sync/atomic/predefine.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/rust/kernel/sync/atomic/predefine.rs b/rust/kernel/sync/atomic/predefine.rs index 45a17985cda4..0fca1ba3c2db 100644 --- a/rust/kernel/sync/atomic/predefine.rs +++ b/rust/kernel/sync/atomic/predefine.rs @@ -35,12 +35,23 @@ unsafe impl super::AtomicAdd for i64 { // as `isize` and `usize`, and `isize` and `usize` are always bi-directional transmutable to // `isize_atomic_repr`, which also always implements `AtomicImpl`. #[allow(non_camel_case_types)] +#[cfg(not(testlib))] #[cfg(not(CONFIG_64BIT))] type isize_atomic_repr = i32; #[allow(non_camel_case_types)] +#[cfg(not(testlib))] #[cfg(CONFIG_64BIT)] type isize_atomic_repr = i64; +#[allow(non_camel_case_types)] +#[cfg(testlib)] +#[cfg(target_pointer_width = "32")] +type isize_atomic_repr = i32; +#[allow(non_camel_case_types)] +#[cfg(testlib)] +#[cfg(target_pointer_width = "64")] +type isize_atomic_repr = i64; + // Ensure size and alignment requirements are checked. static_assert!(size_of::() == size_of::()); static_assert!(align_of::() == align_of::()); From b0581f6ab952ffd135ca4402d2ee3da641538d6b Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 24 Jan 2026 17:09:48 +0100 Subject: [PATCH 048/195] drm/tyr: depend on `COMMON_CLK` to fix build error Tyr needs `CONFIG_COMMON_CLK` to build: error[E0432]: unresolved import `kernel::clk::Clk` --> drivers/gpu/drm/tyr/driver.rs:3:5 | 3 | use kernel::clk::Clk; | ^^^^^^^^^^^^^^^^ no `Clk` in `clk` error[E0432]: unresolved import `kernel::clk::OptionalClk` --> drivers/gpu/drm/tyr/driver.rs:4:5 | 4 | use kernel::clk::OptionalClk; | ^^^^^^^^^^^^^^^^^^^^^^^^ no `OptionalClk` in `clk` Thus add the dependency to fix it. Fixes: cf4fd52e3236 ("rust: drm: Introduce the Tyr driver for Arm Mali GPUs") Cc: stable@vger.kernel.org Acked-by: Alice Ryhl Link: https://patch.msgid.link/20260124160948.67508-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- drivers/gpu/drm/tyr/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/tyr/Kconfig b/drivers/gpu/drm/tyr/Kconfig index 4b55308fd2eb..e933e6478027 100644 --- a/drivers/gpu/drm/tyr/Kconfig +++ b/drivers/gpu/drm/tyr/Kconfig @@ -6,6 +6,7 @@ config DRM_TYR depends on RUST depends on ARM || ARM64 || COMPILE_TEST depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE + depends on COMMON_CLK default n help Rust DRM driver for ARM Mali CSF-based GPUs. From e440bc5c190cd0e5f148b2892aeb1f4bbbf54507 Mon Sep 17 00:00:00 2001 From: SeungJong Ha Date: Fri, 23 Jan 2026 13:18:44 +0000 Subject: [PATCH 049/195] scripts: generate_rust_analyzer: fix resolution of #[pin_data] macros Currently, rust-analyzer fails to properly resolve structs annotated with `#[pin_data]`. This prevents IDE features like "Go to Definition" from working correctly for those structs. Add the missing configuration to `generate_rust_analyzer.py` to ensure the `pin-init` crate macros are handled correctly. Signed-off-by: SeungJong Ha Fixes: d7659acca7a3 ("rust: add pin-init crate build infrastructure") Cc: stable@vger.kernel.org Tested-by: Tamir Duberstein Acked-by: Tamir Duberstein Acked-by: Gary Guo Reviewed-by: Jesung Yang Link: https://patch.msgid.link/20260123-fix-pin-init-crate-dependecies-v2-1-bb1c2500e54c@gmail.com Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index 3b645da90092..766c2d91cd81 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -214,7 +214,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edit append_crate( name, path, - ["core", "kernel"], + ["core", "kernel", "pin_init"], cfg=cfg, ) From 473dd9ecb3fd139ffebd6f7a9f73fb73d85fe2aa Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 21 Jan 2026 22:20:47 +0800 Subject: [PATCH 050/195] MAINTAINERS: Replace Shawn with Frank as i.MX platform maintainer Shawn is no longer interested in maintaining i.MX platform, and would like to step down. On the other hand, Frank seems to be the best successor for this role. - He has been one of the most outstanding contributors to i.MX platform in the recent years. - He has been actively working as a co-maintainer reviewing i.MX patches and keep the platform support in good shape. - He works for NXP and could be the bridge and coordinator between NXP internal developers and community contributors. Acked-by: Peng Fan Acked-by: Daniel Baluta Reviewed-by: Fabio Estevam Acked-by: Dong Aisheng Reviewed-by: Frank Li Signed-off-by: Shawn Guo --- MAINTAINERS | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 5b11839cba9d..a294b4e952a9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2745,14 +2745,14 @@ F: arch/arm/include/asm/hardware/dec21285.h F: arch/arm/mach-footbridge/ ARM/FREESCALE IMX / MXC ARM ARCHITECTURE -M: Shawn Guo +M: Frank Li M: Sascha Hauer R: Pengutronix Kernel Team R: Fabio Estevam L: imx@lists.linux.dev L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/frank.li/linux.git F: Documentation/devicetree/bindings/firmware/fsl* F: Documentation/devicetree/bindings/firmware/nxp* F: arch/arm/boot/dts/nxp/imx/ @@ -2767,22 +2767,22 @@ N: mxs N: \bmxc[^\d] ARM/FREESCALE LAYERSCAPE ARM ARCHITECTURE -M: Shawn Guo +M: Frank Li L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/frank.li/linux.git F: arch/arm/boot/dts/nxp/ls/ F: arch/arm64/boot/dts/freescale/fsl-* F: arch/arm64/boot/dts/freescale/qoriq-* ARM/FREESCALE VYBRID ARM ARCHITECTURE -M: Shawn Guo +M: Frank Li M: Sascha Hauer R: Pengutronix Kernel Team R: Stefan Agner L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/frank.li/linux.git F: arch/arm/boot/dts/nxp/vf/ F: arch/arm/mach-imx/*vf610* @@ -20562,7 +20562,7 @@ F: drivers/pinctrl/pinctrl-amd.c PIN CONTROLLER - FREESCALE M: Dong Aisheng M: Fabio Estevam -M: Shawn Guo +M: Frank Li M: Jacky Bai R: Pengutronix Kernel Team R: NXP S32 Linux Team From 5016cae970d7d59d62aa4f6f11455a9e9630dd1c Mon Sep 17 00:00:00 2001 From: Shivam Kalra Date: Fri, 23 Jan 2026 18:51:13 +0530 Subject: [PATCH 051/195] rust: num: bounded: clean __new documentation and comments Following commit 3a1ec424dd9c ("rust: num: bounded: mark __new as unsafe"), remove the redundant paragraph in the documentation of __new now that the Safety section explicitly covers the requirement. Additionally, add an INVARIANT comment inside the function body where the Bounded instance is actually constructed to document that the type invariant is upheld. Suggested-by: Miguel Ojeda Link: https://lore.kernel.org/rust-for-linux/CANiq72mUCUh72BWP4eD1PTDpwdb1ML+Xgfom-Ys6thJooqQPwQ@mail.gmail.com/ Signed-off-by: Shivam Kalra Acked-by: Alexandre Courbot Link: https://patch.msgid.link/20260123132132.53854-1-shivamklr@cock.li [ Reworded slightly. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/num/bounded.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/rust/kernel/num/bounded.rs b/rust/kernel/num/bounded.rs index 5ef8361cf5d5..fa81acbdc8c2 100644 --- a/rust/kernel/num/bounded.rs +++ b/rust/kernel/num/bounded.rs @@ -282,9 +282,6 @@ where /// All instances of [`Bounded`] must be created through this method as it enforces most of the /// type invariants. /// - /// The caller remains responsible for checking, either statically or dynamically, that `value` - /// can be represented as a `T` using at most `N` bits. - /// /// # Safety /// /// The caller must ensure that `value` can be represented within `N` bits. @@ -297,6 +294,7 @@ where assert!(N <= T::BITS); } + // INVARIANT: The caller ensures `value` fits within `N` bits. Self(value) } From ba89709a3610ba27a2eef2e127f3f4fc5b64d5f7 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Sun, 25 Jan 2026 21:08:53 -0700 Subject: [PATCH 052/195] riscv: signal: fix some warnings reported by sparse Clean up a few warnings reported by sparse in arch/riscv/kernel/signal.c. These come from code that was added recently; they were missed when I initially reviewed the patch. Fixes: 818d78ba1b3f ("riscv: signal: abstract header saving for setup_sigcontext") Cc: Andy Chiu Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202601171848.ydLTJYrz-lkp@intel.com/ [pjw@kernel.org: updated to apply] Signed-off-by: Paul Walmsley --- arch/riscv/kernel/signal.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 5a956108b1ea..dbb067e345f0 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -145,14 +145,14 @@ struct arch_ext_priv { long (*save)(struct pt_regs *regs, void __user *sc_vec); }; -struct arch_ext_priv arch_ext_list[] = { +static struct arch_ext_priv arch_ext_list[] = { { .magic = RISCV_V_MAGIC, .save = &save_v_state, }, }; -const size_t nr_arch_exts = ARRAY_SIZE(arch_ext_list); +static const size_t nr_arch_exts = ARRAY_SIZE(arch_ext_list); static long restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) @@ -297,7 +297,7 @@ static long setup_sigcontext(struct rt_sigframe __user *frame, } else { err |= __put_user(arch_ext->magic, &sc_ext_ptr->magic); err |= __put_user(ext_size, &sc_ext_ptr->size); - sc_ext_ptr = (void *)sc_ext_ptr + ext_size; + sc_ext_ptr = (void __user *)sc_ext_ptr + ext_size; } } /* Write zero to fp-reserved space and check it on restore_sigcontext */ From 494d4a051c3bfc79b847a46bdc52a2473d27b3b0 Mon Sep 17 00:00:00 2001 From: Austin Kim Date: Sun, 25 Jan 2026 21:08:59 -0700 Subject: [PATCH 053/195] riscv: fix minor typo in syscall.h comment Some developers may be confused because RISC-V does not have a register named r0. Also, orig_r0 is not available in pt_regs structure, which is specific to riscv. So we had better fix this minor typo. Signed-off-by: Austin Kim Link: https://patch.msgid.link/aW3Z4zTBvGJpk7a7@adminpc-PowerEdge-R7525 Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/syscall.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index 34313387f977..8067e666a4ca 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -20,7 +20,7 @@ extern void * const sys_call_table[]; extern void * const compat_sys_call_table[]; /* - * Only the low 32 bits of orig_r0 are meaningful, so we return int. + * Only the low 32 bits of orig_a0 are meaningful, so we return int. * This importantly ignores the high bits on 64-bit, so comparisons * sign-extend the low 32 bits. */ From 28a12ef366ecb118db19b92120a07b0491c1958e Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Sun, 25 Jan 2026 21:09:04 -0700 Subject: [PATCH 054/195] errata/sifive: remove unreliable warn_miss_errata When both the SiFive and MIPS errata are enabled then sifive_errata_patch_func emits a wrong and misleading warning claiming that the SiFive errata haven't been applied. This happens because sifive_errata_patch_func is being called twice, once for the kernel image and once for the vdso image. The vdso image has alternative entries for the MIPS errata, but none for the SiFive errata. Signed-off-by: Andreas Schwab Link: https://patch.msgid.link/mvmv7i8q8gg.fsf@suse.de Signed-off-by: Paul Walmsley --- arch/riscv/errata/sifive/errata.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c index 38aac2c47845..d0c61f86cba3 100644 --- a/arch/riscv/errata/sifive/errata.c +++ b/arch/riscv/errata/sifive/errata.c @@ -75,26 +75,12 @@ static u32 __init_or_module sifive_errata_probe(unsigned long archid, return cpu_req_errata; } -static void __init_or_module warn_miss_errata(u32 miss_errata) -{ - int i; - - pr_warn("----------------------------------------------------------------\n"); - pr_warn("WARNING: Missing the following errata may cause potential issues\n"); - for (i = 0; i < ERRATA_SIFIVE_NUMBER; i++) - if (miss_errata & 0x1 << i) - pr_warn("\tSiFive Errata[%d]:%s\n", i, errata_list[i].name); - pr_warn("Please enable the corresponding Kconfig to apply them\n"); - pr_warn("----------------------------------------------------------------\n"); -} - void sifive_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, unsigned long archid, unsigned long impid, unsigned int stage) { struct alt_entry *alt; u32 cpu_req_errata; - u32 cpu_apply_errata = 0; u32 tmp; BUILD_BUG_ON(ERRATA_SIFIVE_NUMBER >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE); @@ -118,10 +104,6 @@ void sifive_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, patch_text_nosync(ALT_OLD_PTR(alt), ALT_ALT_PTR(alt), alt->alt_len); mutex_unlock(&text_mutex); - cpu_apply_errata |= tmp; } } - if (stage != RISCV_ALTERNATIVES_MODULE && - cpu_apply_errata != cpu_req_errata) - warn_miss_errata(cpu_req_errata - cpu_apply_errata); } From 891b77d459d0ce993c68365d899134bc9fd47ac0 Mon Sep 17 00:00:00 2001 From: Zhang Heng Date: Mon, 26 Jan 2026 15:35:07 +0800 Subject: [PATCH 055/195] ALSA: hda/realtek: fix right sounds and mute/micmute LEDs for HP machine The HP EliteBook 630 G11 (103c:8c8f) is using ALC236 codec which used 0x02 to control mute LED and 0x01 to control micmute LED. Therefore, add a quirk to make it works. Link: https://bugzilla.kernel.org/show_bug.cgi?id=220828 Cc: Signed-off-by: Zhang Heng Link: https://patch.msgid.link/20260126073508.3897461-1-zhangheng@kylinos.cn Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 4b19d26d9822..a9ee084677ec 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6750,6 +6750,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c8c, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c8d, "HP ProBook 440 G11", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c8e, "HP ProBook 460 G11", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8c8f, "HP EliteBook 630 G11", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c90, "HP EliteBook 640", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c91, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), From 9e18920e783d0bcd4c127a7adc66565243ab9655 Mon Sep 17 00:00:00 2001 From: Zhang Heng Date: Mon, 26 Jan 2026 15:35:08 +0800 Subject: [PATCH 056/195] ALSA: hda/realtek: Add quirk for Inspur S14-G1 Inspur S14-G1 is equipped with ALC256. Enable "power saving mode" and Enable "headset jack mode". Signed-off-by: Zhang Heng Link: https://patch.msgid.link/20260126073508.3897461-2-zhangheng@kylinos.cn Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index a9ee084677ec..0a0496deb9c8 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7359,6 +7359,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1ee7, 0x2078, "HONOR BRB-X M1010", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1f66, 0x0105, "Ayaneo Portable Game Player", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x2014, 0x800a, "Positivo ARN50", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x2039, 0x0001, "Inspur S14-G1", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x2782, 0x0214, "VAIO VJFE-CL", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x2782, 0x0228, "Infinix ZERO BOOK 13", ALC269VB_FIXUP_INFINIX_ZERO_BOOK_13), SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO), From 403a0591be681eebc0c4825f8b42afe7fd13ee7f Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Fri, 23 Jan 2026 19:38:09 +0800 Subject: [PATCH 057/195] ASoC: soc-acpi-intel-ptl-match: fix name_prefix of rt1320-2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rt1320_2_group2_adr works with rt1320_1_group2_adr and the name_prefix should be rt1320-2. Fixes: ffe450cb6bce ("ASoC: Intel: soc-acpi-intel-ptl-match: add rt713_vb_l3_rt1320_l12 support") Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Link: https://patch.msgid.link/20260123113809.2238766-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-ptl-match.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c index 060955825fe0..e297c8ecedb7 100644 --- a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c @@ -442,7 +442,7 @@ static const struct snd_soc_acpi_adr_device rt1320_2_group2_adr[] = { .adr = 0x000230025D132001ull, .num_endpoints = 1, .endpoints = &spk_r_endpoint, - .name_prefix = "rt1320-1" + .name_prefix = "rt1320-2" } }; From 12f15d52d38ac53f7c70ea3d4b3d76afed04e064 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 23 Jan 2026 14:15:40 +0000 Subject: [PATCH 058/195] drm: Do not allow userspace to trigger kernel warnings in drm_gem_change_handle_ioctl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since GEM bo handles are u32 in the uapi and the internal implementation uses idr_alloc() which uses int ranges, passing a new handle larger than INT_MAX trivially triggers a kernel warning: idr_alloc(): ... if (WARN_ON_ONCE(start < 0)) return -EINVAL; ... Fix it by rejecting new handles above INT_MAX and at the same time make the end limit calculation more obvious by moving into int domain. Signed-off-by: Tvrtko Ursulin Reported-by: Zhi Wang Fixes: 53096728b891 ("drm: Add DRM prime interface to reassign GEM handle") Cc: David Francis Cc: Felix Kuehling Cc: Christian König Cc: # v6.18+ Tested-by: Harshit Mogalapalli Reviewed-by: Christian König Signed-off-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20260123141540.76540-1-tvrtko.ursulin@igalia.com --- drivers/gpu/drm/drm_gem.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index e4df43427394..25f68fed9b48 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -960,16 +960,21 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, { struct drm_gem_change_handle *args = data; struct drm_gem_object *obj; - int ret; + int handle, ret; if (!drm_core_check_feature(dev, DRIVER_GEM)) return -EOPNOTSUPP; + /* idr_alloc() limitation. */ + if (args->new_handle > INT_MAX) + return -EINVAL; + handle = args->new_handle; + obj = drm_gem_object_lookup(file_priv, args->handle); if (!obj) return -ENOENT; - if (args->handle == args->new_handle) { + if (args->handle == handle) { ret = 0; goto out; } @@ -977,18 +982,19 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, mutex_lock(&file_priv->prime.lock); spin_lock(&file_priv->table_lock); - ret = idr_alloc(&file_priv->object_idr, obj, - args->new_handle, args->new_handle + 1, GFP_NOWAIT); + ret = idr_alloc(&file_priv->object_idr, obj, handle, handle + 1, + GFP_NOWAIT); spin_unlock(&file_priv->table_lock); if (ret < 0) goto out_unlock; if (obj->dma_buf) { - ret = drm_prime_add_buf_handle(&file_priv->prime, obj->dma_buf, args->new_handle); + ret = drm_prime_add_buf_handle(&file_priv->prime, obj->dma_buf, + handle); if (ret < 0) { spin_lock(&file_priv->table_lock); - idr_remove(&file_priv->object_idr, args->new_handle); + idr_remove(&file_priv->object_idr, handle); spin_unlock(&file_priv->table_lock); goto out_unlock; } From c73a8917b31e8ddbd53cc248e17410cec27f8f58 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 22 Jan 2026 21:40:54 +0000 Subject: [PATCH 059/195] drm/xe: Skip address copy for sync-only execs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For parallel exec queues, xe_exec_ioctl() copied the batch buffer address array from userspace without checking num_batch_buffer. If user creates a sync-only exec that doesn't use the address field, the exec will fail with -EFAULT. Add num_batch_buffer check to skip the copy, and the exec could be executed successfully. Here is the sync-only exec: struct drm_xe_exec exec = { .extensions = 0, .exec_queue_id = qid, .num_syncs = 1, .syncs = (uintptr_t)&sync, .address = 0, /* ignored for sync-only */ .num_batch_buffer = 0, /* sync-only */ }; Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Matthew Brost Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260122214053.3189366-2-shuicheng.lin@intel.com (cherry picked from commit 4761791c1e736273d612ff564f318bfbbb04fa4e) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_exec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index fd9480031750..8e3614b24010 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -190,9 +190,9 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto err_syncs; } - if (xe_exec_queue_is_parallel(q)) { - err = copy_from_user(addresses, addresses_user, sizeof(u64) * - q->width); + if (args->num_batch_buffer && xe_exec_queue_is_parallel(q)) { + err = copy_from_user(addresses, addresses_user, + sizeof(u64) * q->width); if (err) { err = -EFAULT; goto err_syncs; From 051be49133971076717846e2a04c746ab3476282 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 16 Jan 2026 09:50:40 +0000 Subject: [PATCH 060/195] drm/xe/xelp: Fix Wa_18022495364 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It looks I mistyped CS_DEBUG_MODE2 as CS_DEBUG_MODE1 when adding the workaround. Fix it. Signed-off-by: Tvrtko Ursulin Fixes: ca33cd271ef9 ("drm/xe/xelp: Add Wa_18022495364") Cc: Matt Roper Cc: "Thomas Hellström" Cc: Rodrigo Vivi Cc: # v6.18+ Reviewed-by: Matt Roper Signed-off-by: Thomas Hellström Link: https://patch.msgid.link/20260116095040.49335-1-tvrtko.ursulin@igalia.com (cherry picked from commit 7fe6cae2f7fad2b5166b0fc096618629f9e2ebcb) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 281286f2b5f9..b8c1dd953665 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1185,7 +1185,7 @@ static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc, return -ENOSPC; *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); - *cmd++ = CS_DEBUG_MODE1(0).addr; + *cmd++ = CS_DEBUG_MODE2(0).addr; *cmd++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE); return cmd - batch; From ca8dcfedac480e424b8860e3d1394afdcdc550fe Mon Sep 17 00:00:00 2001 From: Nitin Gote Date: Tue, 20 Jan 2026 11:17:25 +0530 Subject: [PATCH 061/195] drm/xe: derive mem copy capability from graphics version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop .has_mem_copy_instr from the platform descriptors and set it in xe_info_init() after handle_gmdid() populates graphics_verx100. Centralizing the GRAPHICS_VER(xe) >= 20 check keeps MEM_COPY enabled on Xe2+ and removes redundant per-platform plumbing. Bspec: 57561 Fixes: 1e12dbae9d72 ("drm/xe/migrate: support MEM_COPY instruction") Cc: Matt Roper Reviewed-by: Matthew Auld Suggested-by: Matthew Auld Signed-off-by: Nitin Gote Link: https://patch.msgid.link/20260120054724.1982608-2-nitin.r.gote@intel.com Signed-off-by: Tejas Upadhyay (cherry picked from commit 6ef02656c3222b1e12032a40d644ed56806b14fc) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_pci.c | 6 +----- drivers/gpu/drm/xe/xe_pci_types.h | 1 - 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 9c9ea10d994c..2aa883f5ef79 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -342,7 +342,6 @@ static const struct xe_device_desc lnl_desc = { .has_display = true, .has_flat_ccs = 1, .has_pxp = true, - .has_mem_copy_instr = true, .max_gt_per_tile = 2, .needs_scratch = true, .va_bits = 48, @@ -363,7 +362,6 @@ static const struct xe_device_desc bmg_desc = { .has_heci_cscfi = 1, .has_late_bind = true, .has_sriov = true, - .has_mem_copy_instr = true, .max_gt_per_tile = 2, .needs_scratch = true, .subplatforms = (const struct xe_subplatform_desc[]) { @@ -380,7 +378,6 @@ static const struct xe_device_desc ptl_desc = { .has_display = true, .has_flat_ccs = 1, .has_sriov = true, - .has_mem_copy_instr = true, .max_gt_per_tile = 2, .needs_scratch = true, .needs_shared_vf_gt_wq = true, @@ -393,7 +390,6 @@ static const struct xe_device_desc nvls_desc = { .dma_mask_size = 46, .has_display = true, .has_flat_ccs = 1, - .has_mem_copy_instr = true, .max_gt_per_tile = 2, .require_force_probe = true, .va_bits = 48, @@ -675,7 +671,6 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.has_pxp = desc->has_pxp; xe->info.has_sriov = xe_configfs_primary_gt_allowed(to_pci_dev(xe->drm.dev)) && desc->has_sriov; - xe->info.has_mem_copy_instr = desc->has_mem_copy_instr; xe->info.skip_guc_pc = desc->skip_guc_pc; xe->info.skip_mtcfg = desc->skip_mtcfg; xe->info.skip_pcode = desc->skip_pcode; @@ -864,6 +859,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.has_range_tlb_inval = graphics_desc->has_range_tlb_inval; xe->info.has_usm = graphics_desc->has_usm; xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp; + xe->info.has_mem_copy_instr = GRAPHICS_VER(xe) >= 20; xe_info_probe_tile_count(xe); diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index 9892c063a9c5..a4451bdc79fb 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -46,7 +46,6 @@ struct xe_device_desc { u8 has_late_bind:1; u8 has_llc:1; u8 has_mbx_power_limits:1; - u8 has_mem_copy_instr:1; u8 has_pxp:1; u8 has_sriov:1; u8 needs_scratch:1; From 43b0b7eff4b3fb684f257d5a24376782e9663465 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 20 Jan 2026 16:43:44 +0100 Subject: [PATCH 062/195] platform/x86: panasonic-laptop: Fix sysfs group leak in error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The acpi_pcc_hotkey_add() error path leaks sysfs group pcc_attr_group if platform_device_register_simple() fails for the "panasonic" platform device. Address this by making it call sysfs_remove_group() in that case for the group in question. Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/3398370.44csPzL39Z@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/panasonic-laptop.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c index 255317e6fec8..937f1a5b78ed 100644 --- a/drivers/platform/x86/panasonic-laptop.c +++ b/drivers/platform/x86/panasonic-laptop.c @@ -1089,7 +1089,7 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device) PLATFORM_DEVID_NONE, NULL, 0); if (IS_ERR(pcc->platform)) { result = PTR_ERR(pcc->platform); - goto out_backlight; + goto out_sysfs; } result = device_create_file(&pcc->platform->dev, &dev_attr_cdpower); @@ -1105,6 +1105,8 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device) out_platform: platform_device_unregister(pcc->platform); +out_sysfs: + sysfs_remove_group(&device->dev.kobj, &pcc_attr_group); out_backlight: backlight_device_unregister(pcc->backlight); out_input: From 128497456756e1b952bd5a912cd073836465109d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 26 Jan 2026 16:38:45 +0200 Subject: [PATCH 063/195] platform/x86: toshiba_haps: Fix memory leaks in add/remove routines toshiba_haps_add() leaks the haps object allocated by it if it returns an error after allocating that object successfully. toshiba_haps_remove() does not free the object pointed to by toshiba_haps before clearing that pointer, so it becomes unreachable allocated memory. Address these memory leaks by using devm_kzalloc() for allocating the memory in question. Fixes: 23d0ba0c908a ("platform/x86: Toshiba HDD Active Protection Sensor") Signed-off-by: Rafael J. Wysocki --- drivers/platform/x86/toshiba_haps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/toshiba_haps.c b/drivers/platform/x86/toshiba_haps.c index 03dfddeee0c0..e9324bf16aea 100644 --- a/drivers/platform/x86/toshiba_haps.c +++ b/drivers/platform/x86/toshiba_haps.c @@ -183,7 +183,7 @@ static int toshiba_haps_add(struct acpi_device *acpi_dev) pr_info("Toshiba HDD Active Protection Sensor device\n"); - haps = kzalloc(sizeof(struct toshiba_haps_dev), GFP_KERNEL); + haps = devm_kzalloc(&acpi_dev->dev, sizeof(*haps), GFP_KERNEL); if (!haps) return -ENOMEM; From 98bdc485b281da7e20e67b13a8cc834956d68e9c Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Wed, 21 Jan 2026 18:21:08 -0800 Subject: [PATCH 064/195] platform/x86/intel/vsec: Add Nova Lake PUNIT support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add PCI ID for Nova Lake, supporting PUNIT telemetry. Signed-off-by: David E. Box Link: https://patch.msgid.link/20260122022110.3231344-1-david.e.box@linux.intel.com Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/vsec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/intel/vsec.c b/drivers/platform/x86/intel/vsec.c index ecfc7703f201..012d87878afd 100644 --- a/drivers/platform/x86/intel/vsec.c +++ b/drivers/platform/x86/intel/vsec.c @@ -766,6 +766,7 @@ static const struct intel_vsec_platform_info lnl_info = { #define PCI_DEVICE_ID_INTEL_VSEC_LNL_M 0x647d #define PCI_DEVICE_ID_INTEL_VSEC_PTL 0xb07d #define PCI_DEVICE_ID_INTEL_VSEC_WCL 0xfd7d +#define PCI_DEVICE_ID_INTEL_VSEC_NVL 0xd70d static const struct pci_device_id intel_vsec_pci_ids[] = { { PCI_DEVICE_DATA(INTEL, VSEC_ADL, &tgl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_DG1, &dg1_info) }, @@ -778,6 +779,7 @@ static const struct pci_device_id intel_vsec_pci_ids[] = { { PCI_DEVICE_DATA(INTEL, VSEC_LNL_M, &lnl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_PTL, &mtl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_WCL, &mtl_info) }, + { PCI_DEVICE_DATA(INTEL, VSEC_NVL, &mtl_info) }, { } }; MODULE_DEVICE_TABLE(pci, intel_vsec_pci_ids); From 25e9e322d2ab5c03602eff4fbf4f7c40019d8de2 Mon Sep 17 00:00:00 2001 From: Kaushlendra Kumar Date: Wed, 24 Dec 2025 08:50:53 +0530 Subject: [PATCH 065/195] platform/x86: intel_telemetry: Fix swapped arrays in PSS output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LTR blocking statistics and wakeup event counters are incorrectly cross-referenced during debugfs output rendering. The code populates pss_ltr_blkd[] with LTR blocking data and pss_s0ix_wakeup[] with wakeup data, but the display loops reference the wrong arrays. This causes the "LTR Blocking Status" section to print wakeup events and the "Wakes Status" section to print LTR blockers, misleading power management analysis and S0ix residency debugging. Fix by aligning array usage with the intended output section labels. Fixes: 87bee290998d ("platform:x86: Add Intel Telemetry Debugfs interfaces") Cc: stable@vger.kernel.org Signed-off-by: Kaushlendra Kumar Link: https://patch.msgid.link/20251224032053.3915900-1-kaushlendra.kumar@intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/telemetry/debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/telemetry/debugfs.c b/drivers/platform/x86/intel/telemetry/debugfs.c index 70e5736c44c7..189c61ff7ff0 100644 --- a/drivers/platform/x86/intel/telemetry/debugfs.c +++ b/drivers/platform/x86/intel/telemetry/debugfs.c @@ -449,7 +449,7 @@ static int telem_pss_states_show(struct seq_file *s, void *unused) for (index = 0; index < debugfs_conf->pss_ltr_evts; index++) { seq_printf(s, "%-32s\t%u\n", debugfs_conf->pss_ltr_data[index].name, - pss_s0ix_wakeup[index]); + pss_ltr_blkd[index]); } seq_puts(s, "\n--------------------------------------\n"); @@ -459,7 +459,7 @@ static int telem_pss_states_show(struct seq_file *s, void *unused) for (index = 0; index < debugfs_conf->pss_wakeup_evts; index++) { seq_printf(s, "%-32s\t%u\n", debugfs_conf->pss_wakeup[index].name, - pss_ltr_blkd[index]); + pss_s0ix_wakeup[index]); } return 0; From 39e9c376ac42705af4ed4ae39eec028e8bced9b4 Mon Sep 17 00:00:00 2001 From: Kaushlendra Kumar Date: Wed, 24 Dec 2025 11:41:44 +0530 Subject: [PATCH 066/195] platform/x86: intel_telemetry: Fix PSS event register mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PSS telemetry info parsing incorrectly applies TELEM_INFO_SRAMEVTS_MASK when extracting event register count from firmware response. This reads bits 15-8 instead of the correct bits 7-0, causing misdetection of hardware capabilities. The IOSS path correctly uses TELEM_INFO_NENABLES_MASK for register count. Apply the same mask to PSS parsing for consistency. Fixes: 9d16b482b059 ("platform:x86: Add Intel telemetry platform driver") Signed-off-by: Kaushlendra Kumar Link: https://patch.msgid.link/20251224061144.3925519-1-kaushlendra.kumar@intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/telemetry/pltdrv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/telemetry/pltdrv.c b/drivers/platform/x86/intel/telemetry/pltdrv.c index f23c170a55dc..d9aa349f81e4 100644 --- a/drivers/platform/x86/intel/telemetry/pltdrv.c +++ b/drivers/platform/x86/intel/telemetry/pltdrv.c @@ -610,7 +610,7 @@ static int telemetry_setup(struct platform_device *pdev) /* Get telemetry Info */ events = (read_buf & TELEM_INFO_SRAMEVTS_MASK) >> TELEM_INFO_SRAMEVTS_SHIFT; - event_regs = read_buf & TELEM_INFO_SRAMEVTS_MASK; + event_regs = read_buf & TELEM_INFO_NENABLES_MASK; if ((events < TELEM_MAX_EVENTS_SRAM) || (event_regs < TELEM_MAX_EVENTS_SRAM)) { dev_err(&pdev->dev, "PSS:Insufficient Space for SRAM Trace\n"); From 2b4e00d8e70ca8736fda82447be6a4e323c6d1f5 Mon Sep 17 00:00:00 2001 From: gongqi <550230171hxy@gmail.com> Date: Thu, 22 Jan 2026 23:55:00 +0800 Subject: [PATCH 067/195] platform/x86/amd/pmc: Add quirk for MECHREVO Wujie 15X Pro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MECHREVO Wujie 15X Pro suffers from spurious IRQ issues related to the AMD PMC. Add it to the quirk list to use the spurious_8042 fix. Signed-off-by: gongqi <550230171hxy@gmail.com> Link: https://patch.msgid.link/20260122155501.376199-4-550230171hxy@gmail.com Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmc/pmc-quirks.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c index 404e62ad293a..ed285afaf9b0 100644 --- a/drivers/platform/x86/amd/pmc/pmc-quirks.c +++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c @@ -302,6 +302,13 @@ static const struct dmi_system_id fwbug_list[] = { DMI_MATCH(DMI_BOARD_NAME, "XxKK4NAx_XxSP4NAx"), } }, + { + .ident = "MECHREVO Wujie 15X Pro", + .driver_data = &quirk_spurious_8042, + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "WUJIE Series-X5SP4NAG"), + } + }, {} }; From 662c9cb86fc322038647d8808e751f5c6c0cc13f Mon Sep 17 00:00:00 2001 From: Jonas Ringeis Date: Fri, 23 Jan 2026 23:54:23 +0100 Subject: [PATCH 068/195] platform/x86: lg-laptop: Recognize 2022-2025 models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lg-laptop driver uses the DMI to identify the product year. Currently, the driver recognizes all models released after 2022 incorrectly as 2022. Update logic to handle model identifiers for years 2022-2025. Link: https://en.wikipedia.org/w/index.php?title=LG_Gram&oldid=1327931565#Comparison_of_Gram_models Signed-off-by: Jonas Ringeis Link: https://patch.msgid.link/20260123225503.493467-1-private@glitchdev.me Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/lg-laptop.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c index f92e89c75db9..61ef7a218a80 100644 --- a/drivers/platform/x86/lg-laptop.c +++ b/drivers/platform/x86/lg-laptop.c @@ -838,8 +838,17 @@ static int acpi_add(struct acpi_device *device) case 'P': year = 2021; break; - default: + case 'Q': year = 2022; + break; + case 'R': + year = 2023; + break; + case 'S': + year = 2024; + break; + default: + year = 2025; } break; default: From 9502b7df5a3c7e174f74f20324ac1fe781fc5c2d Mon Sep 17 00:00:00 2001 From: Zhang Heng Date: Mon, 26 Jan 2026 09:49:52 +0800 Subject: [PATCH 069/195] ASoC: amd: yc: Add DMI quirk for Acer TravelMate P216-41-TCO Add a DMI quirk for the Acer TravelMate P216-41-TCO fixing the issue where the internal microphone was not detected. Link: https://bugzilla.kernel.org/show_bug.cgi?id=220983 Cc: stable@vger.kernel.org Signed-off-by: Zhang Heng Link: https://patch.msgid.link/20260126014952.3674450-1-zhangheng@kylinos.cn Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index f801ce4741ff..c18da0915baa 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -682,6 +682,14 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "GOH-X"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "RB"), + DMI_MATCH(DMI_BOARD_NAME, "XyloD5_RBU"), + } + }, + {} }; From 8a1968bd997f45a9b11aefeabdd1232e1b6c7184 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Tue, 20 Jan 2026 00:11:21 +0800 Subject: [PATCH 070/195] mm/shmem, swap: fix race of truncate and swap entry split The helper for shmem swap freeing is not handling the order of swap entries correctly. It uses xa_cmpxchg_irq to erase the swap entry, but it gets the entry order before that using xa_get_order without lock protection, and it may get an outdated order value if the entry is split or changed in other ways after the xa_get_order and before the xa_cmpxchg_irq. And besides, the order could grow and be larger than expected, and cause truncation to erase data beyond the end border. For example, if the target entry and following entries are swapped in or freed, then a large folio was added in place and swapped out, using the same entry, the xa_cmpxchg_irq will still succeed, it's very unlikely to happen though. To fix that, open code the Xarray cmpxchg and put the order retrieval and value checking in the same critical section. Also, ensure the order won't exceed the end border, skip it if the entry goes across the border. Skipping large swap entries crosses the end border is safe here. Shmem truncate iterates the range twice, in the first iteration, find_lock_entries already filtered such entries, and shmem will swapin the entries that cross the end border and partially truncate the folio (split the folio or at least zero part of it). So in the second loop here, if we see a swap entry that crosses the end order, it must at least have its content erased already. I observed random swapoff hangs and kernel panics when stress testing ZSWAP with shmem. After applying this patch, all problems are gone. Link: https://lkml.kernel.org/r/20260120-shmem-swap-fix-v3-1-3d33ebfbc057@tencent.com Fixes: 809bc86517cc ("mm: shmem: support large folio swap out") Signed-off-by: Kairui Song Reviewed-by: Nhat Pham Acked-by: Chris Li Cc: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Hugh Dickins Cc: Kemeng Shi Cc: Signed-off-by: Andrew Morton --- mm/shmem.c | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index ec6c01378e9d..6c3485d24d66 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -962,17 +962,29 @@ static void shmem_delete_from_page_cache(struct folio *folio, void *radswap) * being freed). */ static long shmem_free_swap(struct address_space *mapping, - pgoff_t index, void *radswap) + pgoff_t index, pgoff_t end, void *radswap) { - int order = xa_get_order(&mapping->i_pages, index); - void *old; + XA_STATE(xas, &mapping->i_pages, index); + unsigned int nr_pages = 0; + pgoff_t base; + void *entry; - old = xa_cmpxchg_irq(&mapping->i_pages, index, radswap, NULL, 0); - if (old != radswap) - return 0; - free_swap_and_cache_nr(radix_to_swp_entry(radswap), 1 << order); + xas_lock_irq(&xas); + entry = xas_load(&xas); + if (entry == radswap) { + nr_pages = 1 << xas_get_order(&xas); + base = round_down(xas.xa_index, nr_pages); + if (base < index || base + nr_pages - 1 > end) + nr_pages = 0; + else + xas_store(&xas, NULL); + } + xas_unlock_irq(&xas); - return 1 << order; + if (nr_pages) + free_swap_and_cache_nr(radix_to_swp_entry(radswap), nr_pages); + + return nr_pages; } /* @@ -1124,8 +1136,8 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, uoff_t lend, if (xa_is_value(folio)) { if (unfalloc) continue; - nr_swaps_freed += shmem_free_swap(mapping, - indices[i], folio); + nr_swaps_freed += shmem_free_swap(mapping, indices[i], + end - 1, folio); continue; } @@ -1191,12 +1203,23 @@ whole_folios: folio = fbatch.folios[i]; if (xa_is_value(folio)) { + int order; long swaps_freed; if (unfalloc) continue; - swaps_freed = shmem_free_swap(mapping, indices[i], folio); + swaps_freed = shmem_free_swap(mapping, indices[i], + end - 1, folio); if (!swaps_freed) { + /* + * If found a large swap entry cross the end border, + * skip it as the truncate_inode_partial_folio above + * should have at least zerod its content once. + */ + order = shmem_confirm_swap(mapping, indices[i], + radix_to_swp_entry(folio)); + if (order > 0 && indices[i] + (1 << order) > end) + continue; /* Swap was replaced by page: retry */ index = indices[i]; break; From 9b47d4eea3f7c1f620e95bda1d6221660bde7d7b Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 13 Jan 2026 20:15:15 +0100 Subject: [PATCH 071/195] mm/kasan: fix KASAN poisoning in vrealloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A KASAN warning can be triggered when vrealloc() changes the requested size to a value that is not aligned to KASAN_GRANULE_SIZE. ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1 at mm/kasan/shadow.c:174 kasan_unpoison+0x40/0x48 ... pc : kasan_unpoison+0x40/0x48 lr : __kasan_unpoison_vmalloc+0x40/0x68 Call trace: kasan_unpoison+0x40/0x48 (P) vrealloc_node_align_noprof+0x200/0x320 bpf_patch_insn_data+0x90/0x2f0 convert_ctx_accesses+0x8c0/0x1158 bpf_check+0x1488/0x1900 bpf_prog_load+0xd20/0x1258 __sys_bpf+0x96c/0xdf0 __arm64_sys_bpf+0x50/0xa0 invoke_syscall+0x90/0x160 Introduce a dedicated kasan_vrealloc() helper that centralizes KASAN handling for vmalloc reallocations. The helper accounts for KASAN granule alignment when growing or shrinking an allocation and ensures that partial granules are handled correctly. Use this helper from vrealloc_node_align_noprof() to fix poisoning logic. [ryabinin.a.a@gmail.com: move kasan_enabled() check, fix build] Link: https://lkml.kernel.org/r/20260119144509.32767-1-ryabinin.a.a@gmail.com Link: https://lkml.kernel.org/r/20260113191516.31015-1-ryabinin.a.a@gmail.com Fixes: d699440f58ce ("mm: fix vrealloc()'s KASAN poisoning logic") Signed-off-by: Andrey Ryabinin Reported-by: Maciej Żenczykowski Reported-by: Closes: https://lkml.kernel.org/r/CANP3RGeuRW53vukDy7WDO3FiVgu34-xVJYkfpm08oLO3odYFrA@mail.gmail.com Reviewed-by: Andrey Konovalov Tested-by: Maciej Wieczor-Retman Cc: Alexander Potapenko Cc: Dmitriy Vyukov Cc: Dmitry Vyukov Cc: Uladzislau Rezki Cc: Vincenzo Frascino Cc: Signed-off-by: Andrew Morton --- include/linux/kasan.h | 14 ++++++++++++++ mm/kasan/common.c | 21 +++++++++++++++++++++ mm/vmalloc.c | 7 ++----- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 9c6ac4b62eb9..338a1921a50a 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -641,6 +641,17 @@ kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms, __kasan_unpoison_vmap_areas(vms, nr_vms, flags); } +void __kasan_vrealloc(const void *start, unsigned long old_size, + unsigned long new_size); + +static __always_inline void kasan_vrealloc(const void *start, + unsigned long old_size, + unsigned long new_size) +{ + if (kasan_enabled()) + __kasan_vrealloc(start, old_size, new_size); +} + #else /* CONFIG_KASAN_VMALLOC */ static inline void kasan_populate_early_vm_area_shadow(void *start, @@ -670,6 +681,9 @@ kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms, kasan_vmalloc_flags_t flags) { } +static inline void kasan_vrealloc(const void *start, unsigned long old_size, + unsigned long new_size) { } + #endif /* CONFIG_KASAN_VMALLOC */ #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ diff --git a/mm/kasan/common.c b/mm/kasan/common.c index ed489a14dddf..b7d05c2a6d93 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -606,4 +606,25 @@ void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms, __kasan_unpoison_vmalloc(addr, size, flags | KASAN_VMALLOC_KEEP_TAG); } } + +void __kasan_vrealloc(const void *addr, unsigned long old_size, + unsigned long new_size) +{ + if (new_size < old_size) { + kasan_poison_last_granule(addr, new_size); + + new_size = round_up(new_size, KASAN_GRANULE_SIZE); + old_size = round_up(old_size, KASAN_GRANULE_SIZE); + if (new_size < old_size) + __kasan_poison_vmalloc(addr + new_size, + old_size - new_size); + } else if (new_size > old_size) { + old_size = round_down(old_size, KASAN_GRANULE_SIZE); + __kasan_unpoison_vmalloc(addr + old_size, + new_size - old_size, + KASAN_VMALLOC_PROT_NORMAL | + KASAN_VMALLOC_VM_ALLOC | + KASAN_VMALLOC_KEEP_TAG); + } +} #endif diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 628f96e83b11..e286c2d2068c 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -4322,7 +4322,7 @@ void *vrealloc_node_align_noprof(const void *p, size_t size, unsigned long align if (want_init_on_free() || want_init_on_alloc(flags)) memset((void *)p + size, 0, old_size - size); vm->requested_size = size; - kasan_poison_vmalloc(p + size, old_size - size); + kasan_vrealloc(p, old_size, size); return (void *)p; } @@ -4330,16 +4330,13 @@ void *vrealloc_node_align_noprof(const void *p, size_t size, unsigned long align * We already have the bytes available in the allocation; use them. */ if (size <= alloced_size) { - kasan_unpoison_vmalloc(p + old_size, size - old_size, - KASAN_VMALLOC_PROT_NORMAL | - KASAN_VMALLOC_VM_ALLOC | - KASAN_VMALLOC_KEEP_TAG); /* * No need to zero memory here, as unused memory will have * already been zeroed at initial allocation time or during * realloc shrink time. */ vm->requested_size = size; + kasan_vrealloc(p, old_size, size); return (void *)p; } From a0f3c0845a4ff68d403c568266d17e9cc553e561 Mon Sep 17 00:00:00 2001 From: "robin.kuo" Date: Fri, 16 Jan 2026 14:25:00 +0800 Subject: [PATCH 072/195] mm, swap: restore swap_space attr aviod kernel panic commit 8b47299a411a ("mm, swap: mark swap address space ro and add context debug check") made the swap address space read-only. It may lead to kernel panic if arch_prepare_to_swap returns a failure under heavy memory pressure as follows, el1_abort+0x40/0x64 el1h_64_sync_handler+0x48/0xcc el1h_64_sync+0x84/0x88 errseq_set+0x4c/0xb8 (P) __filemap_set_wb_err+0x20/0xd0 shrink_folio_list+0xc20/0x11cc evict_folios+0x1520/0x1be4 try_to_shrink_lruvec+0x27c/0x3dc shrink_one+0x9c/0x228 shrink_node+0xb3c/0xeac do_try_to_free_pages+0x170/0x4f0 try_to_free_pages+0x334/0x534 __alloc_pages_direct_reclaim+0x90/0x158 __alloc_pages_slowpath+0x334/0x588 __alloc_frozen_pages_noprof+0x224/0x2fc __folio_alloc_noprof+0x14/0x64 vma_alloc_zeroed_movable_folio+0x34/0x44 do_pte_missing+0xad4/0x1040 handle_mm_fault+0x4a4/0x790 do_page_fault+0x288/0x5f8 do_translation_fault+0x38/0x54 do_mem_abort+0x54/0xa8 Restore swap address space as not ro to avoid the panic. Link: https://lkml.kernel.org/r/20260116062535.306453-2-robin.kuo@mediatek.com Fixes: 8b47299a411a ("mm, swap: mark swap address space ro and add context debug check") Signed-off-by: robin.kuo Reviewed-by: Andrew Morton Cc: andrew.yang Cc: AngeloGiaocchino Del Regno Cc: Baoquan He Cc: Barry Song Cc: Chinwen Chang Cc: Chris Li Cc: Kairui Song Cc: Kairui Song Cc: Kemeng Shi Cc: Mathias Brugger Cc: Nhat Pham Cc: Qun-wei Lin Cc: Signed-off-by: Andrew Morton --- mm/swap.h | 2 +- mm/swap_state.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/swap.h b/mm/swap.h index d034c13d8dd2..1bd466da3039 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -198,7 +198,7 @@ int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug); void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug); /* linux/mm/swap_state.c */ -extern struct address_space swap_space __ro_after_init; +extern struct address_space swap_space __read_mostly; static inline struct address_space *swap_address_space(swp_entry_t entry) { return &swap_space; diff --git a/mm/swap_state.c b/mm/swap_state.c index 5f97c6ae70a2..44d228982521 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -37,8 +37,7 @@ static const struct address_space_operations swap_aops = { #endif }; -/* Set swap_space as read only as swap cache is handled by swap table */ -struct address_space swap_space __ro_after_init = { +struct address_space swap_space __read_mostly = { .a_ops = &swap_aops, }; From a148a2040191b12b45b82cb29c281cb3036baf90 Mon Sep 17 00:00:00 2001 From: Jane Chu Date: Tue, 20 Jan 2026 16:22:33 -0700 Subject: [PATCH 073/195] mm/memory-failure: fix missing ->mf_stats count in hugetlb poison When a newly poisoned subpage ends up in an already poisoned hugetlb folio, 'num_poisoned_pages' is incremented, but the per node ->mf_stats is not. Fix the inconsistency by designating action_result() to update them both. While at it, define __get_huge_page_for_hwpoison() return values in terms of symbol names for better readibility. Also rename folio_set_hugetlb_hwpoison() to hugetlb_update_hwpoison() since the function does more than the conventional bit setting and the fact three possible return values are expected. Link: https://lkml.kernel.org/r/20260120232234.3462258-1-jane.chu@oracle.com Fixes: 18f41fa616ee ("mm: memory-failure: bump memory failure stats to pglist_data") Signed-off-by: Jane Chu Acked-by: Miaohe Lin Cc: Chris Mason Cc: David Hildenbrand Cc: David Rientjes Cc: Jiaqi Yan Cc: Liam R. Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Rapoport Cc: Muchun Song Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Suren Baghdasaryan Cc: William Roche Cc: Signed-off-by: Andrew Morton --- mm/memory-failure.c | 93 +++++++++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 37 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index c80c2907da33..473204359e1f 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1883,12 +1883,22 @@ static unsigned long __folio_free_raw_hwp(struct folio *folio, bool move_flag) return count; } -static int folio_set_hugetlb_hwpoison(struct folio *folio, struct page *page) +#define MF_HUGETLB_FREED 0 /* freed hugepage */ +#define MF_HUGETLB_IN_USED 1 /* in-use hugepage */ +#define MF_HUGETLB_NON_HUGEPAGE 2 /* not a hugepage */ +#define MF_HUGETLB_FOLIO_PRE_POISONED 3 /* folio already poisoned */ +#define MF_HUGETLB_PAGE_PRE_POISONED 4 /* exact page already poisoned */ +#define MF_HUGETLB_RETRY 5 /* hugepage is busy, retry */ +/* + * Set hugetlb folio as hwpoisoned, update folio private raw hwpoison list + * to keep track of the poisoned pages. + */ +static int hugetlb_update_hwpoison(struct folio *folio, struct page *page) { struct llist_head *head; struct raw_hwp_page *raw_hwp; struct raw_hwp_page *p; - int ret = folio_test_set_hwpoison(folio) ? -EHWPOISON : 0; + int ret = folio_test_set_hwpoison(folio) ? MF_HUGETLB_FOLIO_PRE_POISONED : 0; /* * Once the hwpoison hugepage has lost reliable raw error info, @@ -1896,20 +1906,17 @@ static int folio_set_hugetlb_hwpoison(struct folio *folio, struct page *page) * so skip to add additional raw error info. */ if (folio_test_hugetlb_raw_hwp_unreliable(folio)) - return -EHWPOISON; + return MF_HUGETLB_FOLIO_PRE_POISONED; head = raw_hwp_list_head(folio); llist_for_each_entry(p, head->first, node) { if (p->page == page) - return -EHWPOISON; + return MF_HUGETLB_PAGE_PRE_POISONED; } raw_hwp = kmalloc(sizeof(struct raw_hwp_page), GFP_ATOMIC); if (raw_hwp) { raw_hwp->page = page; llist_add(&raw_hwp->node, head); - /* the first error event will be counted in action_result(). */ - if (ret) - num_poisoned_pages_inc(page_to_pfn(page)); } else { /* * Failed to save raw error info. We no longer trace all @@ -1957,42 +1964,39 @@ void folio_clear_hugetlb_hwpoison(struct folio *folio) /* * Called from hugetlb code with hugetlb_lock held. - * - * Return values: - * 0 - free hugepage - * 1 - in-use hugepage - * 2 - not a hugepage - * -EBUSY - the hugepage is busy (try to retry) - * -EHWPOISON - the hugepage is already hwpoisoned */ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared) { struct page *page = pfn_to_page(pfn); struct folio *folio = page_folio(page); - int ret = 2; /* fallback to normal page handling */ bool count_increased = false; + int ret, rc; - if (!folio_test_hugetlb(folio)) + if (!folio_test_hugetlb(folio)) { + ret = MF_HUGETLB_NON_HUGEPAGE; goto out; - - if (flags & MF_COUNT_INCREASED) { - ret = 1; + } else if (flags & MF_COUNT_INCREASED) { + ret = MF_HUGETLB_IN_USED; count_increased = true; } else if (folio_test_hugetlb_freed(folio)) { - ret = 0; + ret = MF_HUGETLB_FREED; } else if (folio_test_hugetlb_migratable(folio)) { - ret = folio_try_get(folio); - if (ret) + if (folio_try_get(folio)) { + ret = MF_HUGETLB_IN_USED; count_increased = true; + } else { + ret = MF_HUGETLB_FREED; + } } else { - ret = -EBUSY; + ret = MF_HUGETLB_RETRY; if (!(flags & MF_NO_RETRY)) goto out; } - if (folio_set_hugetlb_hwpoison(folio, page)) { - ret = -EHWPOISON; + rc = hugetlb_update_hwpoison(folio, page); + if (rc >= MF_HUGETLB_FOLIO_PRE_POISONED) { + ret = rc; goto out; } @@ -2017,10 +2021,16 @@ out: * with basic operations like hugepage allocation/free/demotion. * So some of prechecks for hwpoison (pinning, and testing/setting * PageHWPoison) should be done in single hugetlb_lock range. + * Returns: + * 0 - not hugetlb, or recovered + * -EBUSY - not recovered + * -EOPNOTSUPP - hwpoison_filter'ed + * -EHWPOISON - folio or exact page already poisoned + * -EFAULT - kill_accessing_process finds current->mm null */ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb) { - int res; + int res, rv; struct page *p = pfn_to_page(pfn); struct folio *folio; unsigned long page_flags; @@ -2029,22 +2039,31 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb *hugetlb = 1; retry: res = get_huge_page_for_hwpoison(pfn, flags, &migratable_cleared); - if (res == 2) { /* fallback to normal page handling */ + switch (res) { + case MF_HUGETLB_NON_HUGEPAGE: /* fallback to normal page handling */ *hugetlb = 0; return 0; - } else if (res == -EHWPOISON) { - if (flags & MF_ACTION_REQUIRED) { - folio = page_folio(p); - res = kill_accessing_process(current, folio_pfn(folio), flags); - } - action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED); - return res; - } else if (res == -EBUSY) { + case MF_HUGETLB_RETRY: if (!(flags & MF_NO_RETRY)) { flags |= MF_NO_RETRY; goto retry; } return action_result(pfn, MF_MSG_GET_HWPOISON, MF_IGNORED); + case MF_HUGETLB_FOLIO_PRE_POISONED: + case MF_HUGETLB_PAGE_PRE_POISONED: + rv = -EHWPOISON; + if (flags & MF_ACTION_REQUIRED) { + folio = page_folio(p); + rv = kill_accessing_process(current, folio_pfn(folio), flags); + } + if (res == MF_HUGETLB_PAGE_PRE_POISONED) + action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED); + else + action_result(pfn, MF_MSG_HUGE, MF_FAILED); + return rv; + default: + WARN_ON((res != MF_HUGETLB_FREED) && (res != MF_HUGETLB_IN_USED)); + break; } folio = page_folio(p); @@ -2055,7 +2074,7 @@ retry: if (migratable_cleared) folio_set_hugetlb_migratable(folio); folio_unlock(folio); - if (res == 1) + if (res == MF_HUGETLB_IN_USED) folio_put(folio); return -EOPNOTSUPP; } @@ -2064,7 +2083,7 @@ retry: * Handling free hugepage. The possible race with hugepage allocation * or demotion can be prevented by PageHWPoison flag. */ - if (res == 0) { + if (res == MF_HUGETLB_FREED) { folio_unlock(folio); if (__page_handle_poison(p) > 0) { page_ref_inc(p); From 057a6f2632c956483e2b2628477f0fcd1cd8a844 Mon Sep 17 00:00:00 2001 From: Jane Chu Date: Tue, 20 Jan 2026 16:22:34 -0700 Subject: [PATCH 074/195] mm/memory-failure: teach kill_accessing_process to accept hugetlb tail page pfn When a hugetlb folio is being poisoned again, try_memory_failure_hugetlb() passed head pfn to kill_accessing_process(), that is not right. The precise pfn of the poisoned page should be used in order to determine the precise vaddr as the SIGBUS payload. This issue has already been taken care of in the normal path, that is, hwpoison_user_mappings(), see [1][2]. Further more, for [3] to work correctly in the hugetlb repoisoning case, it's essential to inform VM the precise poisoned page, not the head page. [1] https://lkml.kernel.org/r/20231218135837.3310403-1-willy@infradead.org [2] https://lkml.kernel.org/r/20250224211445.2663312-1-jane.chu@oracle.com [3] https://lore.kernel.org/lkml/20251116013223.1557158-1-jiaqiyan@google.com/ Link: https://lkml.kernel.org/r/20260120232234.3462258-2-jane.chu@oracle.com Signed-off-by: Jane Chu Reviewed-by: Liam R. Howlett Acked-by: Miaohe Lin Cc: Chris Mason Cc: David Hildenbrand Cc: David Rientjes Cc: Jiaqi Yan Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Rapoport Cc: Muchun Song Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Suren Baghdasaryan Cc: William Roche Cc: Signed-off-by: Andrew Morton --- mm/memory-failure.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 473204359e1f..cf0d526e6d41 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -692,6 +692,8 @@ static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift, unsigned long poisoned_pfn, struct to_kill *tk) { unsigned long pfn = 0; + unsigned long hwpoison_vaddr; + unsigned long mask; if (pte_present(pte)) { pfn = pte_pfn(pte); @@ -702,10 +704,12 @@ static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift, pfn = softleaf_to_pfn(entry); } - if (!pfn || pfn != poisoned_pfn) + mask = ~((1UL << (shift - PAGE_SHIFT)) - 1); + if (!pfn || pfn != (poisoned_pfn & mask)) return 0; - set_to_kill(tk, addr, shift); + hwpoison_vaddr = addr + ((poisoned_pfn - pfn) << PAGE_SHIFT); + set_to_kill(tk, hwpoison_vaddr, shift); return 1; } @@ -2052,10 +2056,8 @@ retry: case MF_HUGETLB_FOLIO_PRE_POISONED: case MF_HUGETLB_PAGE_PRE_POISONED: rv = -EHWPOISON; - if (flags & MF_ACTION_REQUIRED) { - folio = page_folio(p); - rv = kill_accessing_process(current, folio_pfn(folio), flags); - } + if (flags & MF_ACTION_REQUIRED) + rv = kill_accessing_process(current, pfn, flags); if (res == MF_HUGETLB_PAGE_PRE_POISONED) action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED); else From d54887e8e144514768df056878d27346f5b71093 Mon Sep 17 00:00:00 2001 From: Viacheslav Bocharov Date: Tue, 20 Jan 2026 11:21:59 +0300 Subject: [PATCH 075/195] mailmap: add entry for Viacheslav Bocharov Map my address to new personal address Old domain lexina.in will no longer be accessible due to registration expiration. Link: https://lkml.kernel.org/r/20260120082212.364268-1-adeep@lexina.in Signed-off-by: Viacheslav Bocharov Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 4a8a160f28ed..a64f269dd7cb 100644 --- a/.mailmap +++ b/.mailmap @@ -850,6 +850,7 @@ Valentin Schneider Veera Sundaram Sankaran Veerabhadrarao Badiganti Venkateswara Naralasetty +Viacheslav Bocharov Vikash Garodia Vikash Garodia Vincent Mailhol From dd9e2f5b38f1fdd49b1ab6d3a85f81c14369eacc Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Jan 2026 12:27:30 +0100 Subject: [PATCH 076/195] flex_proportions: make fprop_new_period() hardirq safe Bernd has reported a lockdep splat from flexible proportions code that is essentially complaining about the following race: run_timer_softirq - we are in softirq context call_timer_fn writeout_period fprop_new_period write_seqcount_begin(&p->sequence); ... blk_mq_end_request() blk_update_request() ext4_end_bio() folio_end_writeback() __wb_writeout_add() __fprop_add_percpu_max() if (unlikely(max_frac < FPROP_FRAC_BASE)) { fprop_fraction_percpu() seq = read_seqcount_begin(&p->sequence); - sees odd sequence so loops indefinitely Note that a deadlock like this is only possible if the bdi has configured maximum fraction of writeout throughput which is very rare in general but frequent for example for FUSE bdis. To fix this problem we have to make sure write section of the sequence counter is irqsafe. Link: https://lkml.kernel.org/r/20260121112729.24463-2-jack@suse.cz Fixes: a91befde3503 ("lib/flex_proportions.c: remove local_irq_ops in fprop_new_period()") Signed-off-by: Jan Kara Reported-by: Bernd Schubert Link: https://lore.kernel.org/all/9b845a47-9aee-43dd-99bc-1a82bea00442@bsbernd.com/ Reviewed-by: Matthew Wilcox (Oracle) Cc: Joanne Koong Cc: Miklos Szeredi Cc: Signed-off-by: Andrew Morton --- lib/flex_proportions.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index 84ecccddc771..012d5614efb9 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -64,13 +64,14 @@ void fprop_global_destroy(struct fprop_global *p) bool fprop_new_period(struct fprop_global *p, int periods) { s64 events = percpu_counter_sum(&p->events); + unsigned long flags; /* * Don't do anything if there are no events. */ if (events <= 1) return false; - preempt_disable_nested(); + local_irq_save(flags); write_seqcount_begin(&p->sequence); if (periods < 64) events -= events >> periods; @@ -78,7 +79,7 @@ bool fprop_new_period(struct fprop_global *p, int periods) percpu_counter_add(&p->events, -events); p->period += periods; write_seqcount_end(&p->sequence); - preempt_enable_nested(); + local_irq_restore(flags); return true; } From 71e2b5eadbad43d33f0e2cf6d767395273ba5eaa Mon Sep 17 00:00:00 2001 From: "Pratyush Yadav (Google)" Date: Thu, 22 Jan 2026 16:18:39 +0100 Subject: [PATCH 077/195] memfd: export alloc_file() Patch series "mm: memfd_luo hotfixes". This series contains a couple of fixes for memfd preservation using LUO. This patch (of 3): The Live Update Orchestrator's (LUO) memfd preservation works by preserving all the folios of a memfd, re-creating an empty memfd on the next boot, and then inserting back the preserved folios. Currently it creates the file by directly calling shmem_file_setup(). This leaves out other work done by alloc_file() like setting up the file mode, flags, or calling the security hooks. Export alloc_file() to let memfd_luo use it. Rename it to memfd_alloc_file() since it is no longer private and thus needs a subsystem prefix. Link: https://lkml.kernel.org/r/20260122151842.4069702-1-pratyush@kernel.org Link: https://lkml.kernel.org/r/20260122151842.4069702-2-pratyush@kernel.org Signed-off-by: Pratyush Yadav (Google) Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Pasha Tatashin Cc: Baolin Wang Cc: Hugh Dickins Signed-off-by: Andrew Morton --- include/linux/memfd.h | 6 ++++++ mm/memfd.c | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/linux/memfd.h b/include/linux/memfd.h index cc74de3dbcfe..c328a7b356d0 100644 --- a/include/linux/memfd.h +++ b/include/linux/memfd.h @@ -17,6 +17,7 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx); * to by vm_flags_ptr. */ int memfd_check_seals_mmap(struct file *file, vm_flags_t *vm_flags_ptr); +struct file *memfd_alloc_file(const char *name, unsigned int flags); #else static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned int a) { @@ -31,6 +32,11 @@ static inline int memfd_check_seals_mmap(struct file *file, { return 0; } + +static inline struct file *memfd_alloc_file(const char *name, unsigned int flags) +{ + return ERR_PTR(-EINVAL); +} #endif #endif /* __LINUX_MEMFD_H */ diff --git a/mm/memfd.c b/mm/memfd.c index ab5312aff14b..f032c6052926 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -456,7 +456,7 @@ err_name: return ERR_PTR(error); } -static struct file *alloc_file(const char *name, unsigned int flags) +struct file *memfd_alloc_file(const char *name, unsigned int flags) { unsigned int *file_seals; struct file *file; @@ -520,5 +520,5 @@ SYSCALL_DEFINE2(memfd_create, return PTR_ERR(name); fd_flags = (flags & MFD_CLOEXEC) ? O_CLOEXEC : 0; - return FD_ADD(fd_flags, alloc_file(name, flags)); + return FD_ADD(fd_flags, memfd_alloc_file(name, flags)); } From 02e117b8ca58928f193e5b4df48d6763232f5e91 Mon Sep 17 00:00:00 2001 From: "Pratyush Yadav (Google)" Date: Thu, 22 Jan 2026 16:18:40 +0100 Subject: [PATCH 078/195] mm: memfd_luo: use memfd_alloc_file() instead of shmem_file_setup() When restoring a memfd, the file is created using shmem_file_setup(). While memfd creation also calls this function to get the file, it also does other things: 1. The O_LARGEFILE flag is set on the file. If this is not done, writes on the memfd exceeding 2 GiB fail. 2. FMODE_LSEEK, FMODE_PREAD, and FMODE_PWRITE are set on the file. This makes sure the file is seekable and can be used with pread() and pwrite(). 3. Initializes the security field for the inode and makes sure that inode creation is permitted by the security module. Currently, none of those things are done. This means writes above 2 GiB fail, pread(), and pwrite() fail, and so on. lseek() happens to work because file_init_path() sets it because shmem defines fop->llseek. Fix this by using memfd_alloc_file() to get the file to make sure the initialization sequence for normal and preserved memfd is the same. Link: https://lkml.kernel.org/r/20260122151842.4069702-3-pratyush@kernel.org Fixes: b3749f174d68 ("mm: memfd_luo: allow preserving memfd") Signed-off-by: Pratyush Yadav (Google) Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Pasha Tatashin Cc: Baolin Wang Cc: Hugh Dickins Signed-off-by: Andrew Morton --- mm/memfd_luo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c index 4f6ba63b4310..01a72e4d3ef6 100644 --- a/mm/memfd_luo.c +++ b/mm/memfd_luo.c @@ -78,6 +78,7 @@ #include #include #include +#include #include "internal.h" static int memfd_luo_preserve_folios(struct file *file, @@ -443,8 +444,7 @@ static int memfd_luo_retrieve(struct liveupdate_file_op_args *args) if (!ser) return -EINVAL; - file = shmem_file_setup("", 0, VM_NORESERVE); - + file = memfd_alloc_file("", 0); if (IS_ERR(file)) { pr_err("failed to setup file: %pe\n", file); return PTR_ERR(file); From c657c5dc1360fa1ed1b090aedb5883d9cf9f0a0f Mon Sep 17 00:00:00 2001 From: "Pratyush Yadav (Google)" Date: Thu, 22 Jan 2026 16:18:41 +0100 Subject: [PATCH 079/195] mm: memfd_luo: restore and free memfd_luo_ser on failure memfd_luo_ser has the serialization metadata. It is of no use once restoration fails. Free it on failure. Link: https://lkml.kernel.org/r/20260122151842.4069702-4-pratyush@kernel.org Fixes: b3749f174d68 ("mm: memfd_luo: allow preserving memfd") Signed-off-by: Pratyush Yadav (Google) Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Pasha Tatashin Cc: Baolin Wang Cc: Hugh Dickins Signed-off-by: Andrew Morton --- mm/memfd_luo.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c index 01a72e4d3ef6..a34fccc23b6a 100644 --- a/mm/memfd_luo.c +++ b/mm/memfd_luo.c @@ -447,7 +447,8 @@ static int memfd_luo_retrieve(struct liveupdate_file_op_args *args) file = memfd_alloc_file("", 0); if (IS_ERR(file)) { pr_err("failed to setup file: %pe\n", file); - return PTR_ERR(file); + err = PTR_ERR(file); + goto free_ser; } vfs_setpos(file, ser->pos, MAX_LFS_FILESIZE); @@ -473,7 +474,8 @@ static int memfd_luo_retrieve(struct liveupdate_file_op_args *args) put_file: fput(file); - +free_ser: + kho_restore_free(ser); return err; } From e86436ad0ad2a9aaf88802d69b68f02cbd1f04a9 Mon Sep 17 00:00:00 2001 From: Ran Xiaokai Date: Thu, 22 Jan 2026 13:27:40 +0000 Subject: [PATCH 080/195] kho: init alloc tags when restoring pages from reserved memory Memblock pages (including reserved memory) should have their allocation tags initialized to CODETAG_EMPTY via clear_page_tag_ref() before being released to the page allocator. When kho restores pages through kho_restore_page(), missing this call causes mismatched allocation/deallocation tracking and below warning message: alloc_tag was not set WARNING: include/linux/alloc_tag.h:164 at ___free_pages+0xb8/0x260, CPU#1: swapper/0/1 RIP: 0010:___free_pages+0xb8/0x260 kho_restore_vmalloc+0x187/0x2e0 kho_test_init+0x3c4/0xa30 do_one_initcall+0x62/0x2b0 kernel_init_freeable+0x25b/0x480 kernel_init+0x1a/0x1c0 ret_from_fork+0x2d1/0x360 Add missing clear_page_tag_ref() annotation in kho_restore_page() to fix this. Link: https://lkml.kernel.org/r/20260122132740.176468-1-ranxiaokai627@163.com Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation") Signed-off-by: Ran Xiaokai Reviewed-by: Pratyush Yadav Reviewed-by: Pasha Tatashin Reviewed-by: Mike Rapoport (Microsoft) Cc: Alexander Graf Cc: Kent Overstreet Cc: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- kernel/liveupdate/kexec_handover.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c index d4482b6e3cae..96767b106cac 100644 --- a/kernel/liveupdate/kexec_handover.c +++ b/kernel/liveupdate/kexec_handover.c @@ -255,6 +255,14 @@ static struct page *kho_restore_page(phys_addr_t phys, bool is_folio) if (is_folio && info.order) prep_compound_page(page, info.order); + /* Always mark headpage's codetag as empty to avoid accounting mismatch */ + clear_page_tag_ref(page); + if (!is_folio) { + /* Also do that for the non-compound tail pages */ + for (unsigned int i = 1; i < nr_pages; i++) + clear_page_tag_ref(page + i); + } + adjust_managed_page_count(page, nr_pages); return page; } From 412a32f0e53f4a50062f6f4bc18f8910aa551734 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 21 Jan 2026 12:36:17 -0800 Subject: [PATCH 081/195] kho: kho_preserve_vmalloc(): don't return 0 when ENOMEM kho_preserve_vmalloc() should return -ENOMEM when new_vmalloc_chunk() fails. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202601211636.IRaejjdw-lkp@intel.com/ Reviewed-by: Pasha Tatashin Reviewed-by: Pratyush Yadav Reviewed-by: Mike Rapoport (Microsoft) Cc: Alexander Graf Cc: Jason Gunthorpe Cc: Pasha Tatashin Signed-off-by: Andrew Morton --- kernel/liveupdate/kexec_handover.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c index 96767b106cac..90d411a59f76 100644 --- a/kernel/liveupdate/kexec_handover.c +++ b/kernel/liveupdate/kexec_handover.c @@ -1014,8 +1014,10 @@ int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation) chunk->phys[idx++] = phys; if (idx == ARRAY_SIZE(chunk->phys)) { chunk = new_vmalloc_chunk(chunk); - if (!chunk) + if (!chunk) { + err = -ENOMEM; goto err_free; + } idx = 0; } } From 870ff19251bf3910dda7a7245da826924045fedd Mon Sep 17 00:00:00 2001 From: Pimyn Girgis Date: Tue, 20 Jan 2026 17:15:10 +0100 Subject: [PATCH 082/195] mm/kfence: randomize the freelist on initialization Randomize the KFENCE freelist during pool initialization to make allocation patterns less predictable. This is achieved by shuffling the order in which metadata objects are added to the freelist using get_random_u32_below(). Additionally, ensure the error path correctly calculates the address range to be reset if initialization fails, as the address increment logic has been moved to a separate loop. Link: https://lkml.kernel.org/r/20260120161510.3289089-1-pimyn@google.com Fixes: 0ce20dd84089 ("mm: add Kernel Electric-Fence infrastructure") Signed-off-by: Pimyn Girgis Reviewed-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Marco Elver Cc: Ernesto Martnez Garca Cc: Greg KH Cc: Kees Cook Cc: Signed-off-by: Andrew Morton --- mm/kfence/core.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index da0f5b6f5744..4f79ec720752 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -596,7 +596,7 @@ static void rcu_guarded_free(struct rcu_head *h) static unsigned long kfence_init_pool(void) { unsigned long addr, start_pfn; - int i; + int i, rand; if (!arch_kfence_init_pool()) return (unsigned long)__kfence_pool; @@ -647,13 +647,27 @@ static unsigned long kfence_init_pool(void) INIT_LIST_HEAD(&meta->list); raw_spin_lock_init(&meta->lock); meta->state = KFENCE_OBJECT_UNUSED; - meta->addr = addr; /* Initialize for validation in metadata_to_pageaddr(). */ - list_add_tail(&meta->list, &kfence_freelist); + /* Use addr to randomize the freelist. */ + meta->addr = i; /* Protect the right redzone. */ - if (unlikely(!kfence_protect(addr + PAGE_SIZE))) + if (unlikely(!kfence_protect(addr + 2 * i * PAGE_SIZE + PAGE_SIZE))) goto reset_slab; + } + for (i = CONFIG_KFENCE_NUM_OBJECTS; i > 0; i--) { + rand = get_random_u32_below(i); + swap(kfence_metadata_init[i - 1].addr, kfence_metadata_init[rand].addr); + } + + for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) { + struct kfence_metadata *meta_1 = &kfence_metadata_init[i]; + struct kfence_metadata *meta_2 = &kfence_metadata_init[meta_1->addr]; + + list_add_tail(&meta_2->list, &kfence_freelist); + } + for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) { + kfence_metadata_init[i].addr = addr; addr += 2 * PAGE_SIZE; } @@ -666,6 +680,7 @@ static unsigned long kfence_init_pool(void) return 0; reset_slab: + addr += 2 * i * PAGE_SIZE; for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) { struct page *page; From cbbbf7795fc35a740e1d09f3a55aba543b72a8d3 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 22 Jan 2026 13:43:43 -0500 Subject: [PATCH 083/195] mm/mm_init: don't cond_resched() in deferred_init_memmap_chunk() if called from deferred_grow_zone() Commit 3acb913c9d5b ("mm/mm_init: use deferred_init_memmap_chunk() in deferred_grow_zone()") made deferred_grow_zone() call deferred_init_memmap_chunk() within a pgdat_resize_lock() critical section with irqs disabled. It did check for irqs_disabled() in deferred_init_memmap_chunk() to avoid calling cond_resched(). For a PREEMPT_RT kernel build, however, spin_lock_irqsave() does not disable interrupt but rcu_read_lock() is called. This leads to the following bug report. BUG: sleeping function called from invalid context at mm/mm_init.c:2091 in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 1, name: swapper/0 preempt_count: 0, expected: 0 RCU nest depth: 1, expected: 0 3 locks held by swapper/0/1: #0: ffff80008471b7a0 (sched_domains_mutex){+.+.}-{4:4}, at: sched_domains_mutex_lock+0x28/0x40 #1: ffff003bdfffef48 (&pgdat->node_size_lock){+.+.}-{3:3}, at: deferred_grow_zone+0x140/0x278 #2: ffff800084acf600 (rcu_read_lock){....}-{1:3}, at: rt_spin_lock+0x1b4/0x408 CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Tainted: G W 6.19.0-rc6-test #1 PREEMPT_{RT,(full) } Tainted: [W]=WARN Call trace: show_stack+0x20/0x38 (C) dump_stack_lvl+0xdc/0xf8 dump_stack+0x1c/0x28 __might_resched+0x384/0x530 deferred_init_memmap_chunk+0x560/0x688 deferred_grow_zone+0x190/0x278 _deferred_grow_zone+0x18/0x30 get_page_from_freelist+0x780/0xf78 __alloc_frozen_pages_noprof+0x1dc/0x348 alloc_slab_page+0x30/0x110 allocate_slab+0x98/0x2a0 new_slab+0x4c/0x80 ___slab_alloc+0x5a4/0x770 __slab_alloc.constprop.0+0x88/0x1e0 __kmalloc_node_noprof+0x2c0/0x598 __sdt_alloc+0x3b8/0x728 build_sched_domains+0xe0/0x1260 sched_init_domains+0x14c/0x1c8 sched_init_smp+0x9c/0x1d0 kernel_init_freeable+0x218/0x358 kernel_init+0x28/0x208 ret_from_fork+0x10/0x20 Fix it adding a new argument to deferred_init_memmap_chunk() to explicitly tell it if cond_resched() is allowed or not instead of relying on some current state information which may vary depending on the exact kernel configuration options that are enabled. Link: https://lkml.kernel.org/r/20260122184343.546627-1-longman@redhat.com Fixes: 3acb913c9d5b ("mm/mm_init: use deferred_init_memmap_chunk() in deferred_grow_zone()") Signed-off-by: Waiman Long Suggested-by: Sebastian Andrzej Siewior Reviewed-by: Sebastian Andrzej Siewior Reviewed-by: Mike Rapoport (Microsoft) Cc: David Hildenbrand Cc: "Paul E . McKenney" Cc: Steven Rostedt Cc: Wei Yang Cc: Signed-off-by: Andrew Morton --- mm/mm_init.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/mm_init.c b/mm/mm_init.c index fc2a6f1e518f..2a809cd8e7fa 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2059,7 +2059,7 @@ static unsigned long __init deferred_init_pages(struct zone *zone, */ static unsigned long __init deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn, - struct zone *zone) + struct zone *zone, bool can_resched) { int nid = zone_to_nid(zone); unsigned long nr_pages = 0; @@ -2085,10 +2085,10 @@ deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn, spfn = chunk_end; - if (irqs_disabled()) - touch_nmi_watchdog(); - else + if (can_resched) cond_resched(); + else + touch_nmi_watchdog(); } } @@ -2101,7 +2101,7 @@ deferred_init_memmap_job(unsigned long start_pfn, unsigned long end_pfn, { struct zone *zone = arg; - deferred_init_memmap_chunk(start_pfn, end_pfn, zone); + deferred_init_memmap_chunk(start_pfn, end_pfn, zone, true); } static unsigned int __init @@ -2216,7 +2216,7 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order) for (spfn = first_deferred_pfn, epfn = SECTION_ALIGN_UP(spfn + 1); nr_pages < nr_pages_needed && spfn < zone_end_pfn(zone); spfn = epfn, epfn += PAGES_PER_SECTION) { - nr_pages += deferred_init_memmap_chunk(spfn, epfn, zone); + nr_pages += deferred_init_memmap_chunk(spfn, epfn, zone, false); } /* From 12b2285bf3d14372238d36215b73af02ac3bdfc1 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 16 Jan 2026 12:10:16 +0100 Subject: [PATCH 084/195] mm/zone_device: reinitialize large zone device private folios MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reinitialize metadata for large zone device private folios in zone_device_page_init prior to creating a higher-order zone device private folio. This step is necessary when the folio's order changes dynamically between zone_device_page_init calls to avoid building a corrupt folio. As part of the metadata reinitialization, the dev_pagemap must be passed in from the caller because the pgmap stored in the folio page may have been overwritten with a compound head. Without this fix, individual pages could have invalid pgmap fields and flags (with PG_locked being notably problematic) due to prior different order allocations, which can, and will, result in kernel crashes. Link: https://lkml.kernel.org/r/20260116111325.1736137-2-francois.dugast@intel.com Fixes: d245f9b4ab80 ("mm/zone_device: support large zone device private folios") Signed-off-by: Matthew Brost Signed-off-by: Francois Dugast Acked-by: Felix Kuehling Reviewed-by: Balbir Singh Acked-by: Vlastimil Babka Cc: Zi Yan Cc: Alistair Popple Cc: Madhavan Srinivasan Cc: Nicholas Piggin Cc: Michael Ellerman Cc: "Christophe Leroy (CS GROUP)" Cc: Alex Deucher Cc: "Christian König" Cc: David Airlie Cc: Simona Vetter Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: Lyude Paul Cc: Danilo Krummrich Cc: David Hildenbrand Cc: Oscar Salvador Cc: Andrew Morton Cc: Jason Gunthorpe Cc: Leon Romanovsky Cc: Lorenzo Stoakes Cc: Liam R. Howlett Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Michal Hocko Signed-off-by: Andrew Morton --- arch/powerpc/kvm/book3s_hv_uvmem.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 2 +- drivers/gpu/drm/drm_pagemap.c | 2 +- drivers/gpu/drm/nouveau/nouveau_dmem.c | 2 +- include/linux/memremap.h | 9 ++++-- lib/test_hmm.c | 4 ++- mm/memremap.c | 35 +++++++++++++++++++++++- 7 files changed, 47 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index e5000bef90f2..7cf9310de0ec 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -723,7 +723,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm) dpage = pfn_to_page(uvmem_pfn); dpage->zone_device_data = pvt; - zone_device_page_init(dpage, 0); + zone_device_page_init(dpage, &kvmppc_uvmem_pgmap, 0); return dpage; out_clear: spin_lock(&kvmppc_uvmem_bitmap_lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index af53e796ea1b..6ada7b4af7c6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -217,7 +217,7 @@ svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn) page = pfn_to_page(pfn); svm_range_bo_ref(prange->svm_bo); page->zone_device_data = prange->svm_bo; - zone_device_page_init(page, 0); + zone_device_page_init(page, page_pgmap(page), 0); } static void diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c index 06c1bd8fc4d1..704f2f945019 100644 --- a/drivers/gpu/drm/drm_pagemap.c +++ b/drivers/gpu/drm/drm_pagemap.c @@ -197,7 +197,7 @@ static void drm_pagemap_get_devmem_page(struct page *page, struct drm_pagemap_zdd *zdd) { page->zone_device_data = drm_pagemap_zdd_get(zdd); - zone_device_page_init(page, 0); + zone_device_page_init(page, page_pgmap(page), 0); } /** diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 58071652679d..3d8031296eed 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -425,7 +425,7 @@ nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm, bool is_large) order = ilog2(DMEM_CHUNK_NPAGES); } - zone_device_folio_init(folio, order); + zone_device_folio_init(folio, page_pgmap(folio_page(folio, 0)), order); return page; } diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 713ec0435b48..e3c2ccf872a8 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -224,7 +224,8 @@ static inline bool is_fsdax_page(const struct page *page) } #ifdef CONFIG_ZONE_DEVICE -void zone_device_page_init(struct page *page, unsigned int order); +void zone_device_page_init(struct page *page, struct dev_pagemap *pgmap, + unsigned int order); void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); @@ -234,9 +235,11 @@ bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn); unsigned long memremap_compat_align(void); -static inline void zone_device_folio_init(struct folio *folio, unsigned int order) +static inline void zone_device_folio_init(struct folio *folio, + struct dev_pagemap *pgmap, + unsigned int order) { - zone_device_page_init(&folio->page, order); + zone_device_page_init(&folio->page, pgmap, order); if (order) folio_set_large_rmappable(folio); } diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 8af169d3873a..455a6862ae50 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -662,7 +662,9 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror *dmirror, goto error; } - zone_device_folio_init(page_folio(dpage), order); + zone_device_folio_init(page_folio(dpage), + page_pgmap(folio_page(page_folio(dpage), 0)), + order); dpage->zone_device_data = rpage; return dpage; diff --git a/mm/memremap.c b/mm/memremap.c index 63c6ab4fdf08..ac7be07e3361 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -477,10 +477,43 @@ void free_zone_device_folio(struct folio *folio) } } -void zone_device_page_init(struct page *page, unsigned int order) +void zone_device_page_init(struct page *page, struct dev_pagemap *pgmap, + unsigned int order) { + struct page *new_page = page; + unsigned int i; + VM_WARN_ON_ONCE(order > MAX_ORDER_NR_PAGES); + for (i = 0; i < (1UL << order); ++i, ++new_page) { + struct folio *new_folio = (struct folio *)new_page; + + /* + * new_page could have been part of previous higher order folio + * which encodes the order, in page + 1, in the flags bits. We + * blindly clear bits which could have set my order field here, + * including page head. + */ + new_page->flags.f &= ~0xffUL; /* Clear possible order, page head */ + +#ifdef NR_PAGES_IN_LARGE_FOLIO + /* + * This pointer math looks odd, but new_page could have been + * part of a previous higher order folio, which sets _nr_pages + * in page + 1 (new_page). Therefore, we use pointer casting to + * correctly locate the _nr_pages bits within new_page which + * could have modified by previous higher order folio. + */ + ((struct folio *)(new_page - 1))->_nr_pages = 0; +#endif + + new_folio->mapping = NULL; + new_folio->pgmap = pgmap; /* Also clear compound head */ + new_folio->share = 0; /* fsdax only, unused for device private */ + VM_WARN_ON_FOLIO(folio_ref_count(new_folio), new_folio); + VM_WARN_ON_FOLIO(!folio_is_zone_device(new_folio), new_folio); + } + /* * Drivers shouldn't be allocating pages after calling * memunmap_pages(). From bd58782995a2e6a07fd07255f3cc319f40b131c9 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 22 Jan 2026 02:39:36 -0800 Subject: [PATCH 085/195] vmcoreinfo: make hwerr_data visible for debugging If the kernel is compiled with LTO, hwerr_data symbol might be lost, and vmcoreinfo doesn't have it dumped. This is currently seen in some production kernels with LTO enabled. Remove the static qualifier from hwerr_data so that the information is still preserved when the kernel is built with LTO. Making hwerr_data a global symbol ensures its debug info survives the LTO link process and appears in kallsyms. Also document it, so it doesn't get removed in the future as suggested by akpm. Link: https://lkml.kernel.org/r/20260122-fix_vmcoreinfo-v2-1-2d6311f9e36c@debian.org Fixes: 3fa805c37dd4 ("vmcoreinfo: track and log recoverable hardware errors") Signed-off-by: Breno Leitao Acked-by: Baoquan He Cc: Dave Young Cc: "Luck, Tony" Cc: Omar Sandoval Cc: Shuai Xue Cc: Vivek Goyal Cc: Zhiquan Li Signed-off-by: Andrew Morton --- kernel/vmcore_info.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c index fe9bf8db1922..e2784038bbed 100644 --- a/kernel/vmcore_info.c +++ b/kernel/vmcore_info.c @@ -36,7 +36,11 @@ struct hwerr_info { time64_t timestamp; }; -static struct hwerr_info hwerr_data[HWERR_RECOV_MAX]; +/* + * The hwerr_data[] array is declared with global scope so that it remains + * accessible to vmcoreinfo even when Link Time Optimization (LTO) is enabled. + */ +struct hwerr_info hwerr_data[HWERR_RECOV_MAX]; Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len) From 7e3debb4c72fe840d60014192cf93950871fb3be Mon Sep 17 00:00:00 2001 From: Pei Xiao Date: Thu, 22 Jan 2026 09:44:48 +0800 Subject: [PATCH 086/195] cpufreq: qcom-nvmem: add sentinel to qcom_cpufreq_ipq806x_match_list The of_device_id table is expected to be NULL-terminated. Without the sentinel, the traversal of the array can lead to out-of-bound access, causing undefined behavior. This adds the missing sentinel to the qcom_cpufreq_ipq806x_match_list array. Fixes: 58f5d39d5ed8 ("cpufreq: qcom-nvmem: add compatible fallback for ipq806x for no SMEM") Signed-off-by: Pei Xiao Reviewed-by: Konrad Dybcio Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-nvmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index 81e16b5a0245..b8081acba928 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -263,6 +263,7 @@ static const struct of_device_id qcom_cpufreq_ipq806x_match_list[] __maybe_unuse { .compatible = "qcom,ipq8066", .data = (const void *)QCOM_ID_IPQ8066 }, { .compatible = "qcom,ipq8068", .data = (const void *)QCOM_ID_IPQ8068 }, { .compatible = "qcom,ipq8069", .data = (const void *)QCOM_ID_IPQ8069 }, + { /* sentinel */ } }; static int qcom_cpufreq_ipq8064_name_version(struct device *cpu_dev, From 7ca497be00163610afb663867db24ac408752f13 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 26 Jan 2026 12:12:26 +0000 Subject: [PATCH 087/195] gpio: rockchip: Stop calling pinctrl for set_direction Marking the whole controller as sleeping due to the pinctrl calls in the .direction_{input,output} callbacks has the unfortunate side effect that legitimate invocations of .get and .set, which cannot themselves sleep, in atomic context now spew WARN()s from gpiolib. However, as Heiko points out, the driver doing this is a bit silly to begin with, as the pinctrl .gpio_set_direction hook doesn't even care about the direction, the hook is only used to claim the mux. And sure enough, the .gpio_request_enable hook exists to serve this very purpose, so switch to that and remove the problematic business entirely. Cc: stable@vger.kernel.org Fixes: 20cf2aed89ac ("gpio: rockchip: mark the GPIO controller as sleeping") Suggested-by: Heiko Stuebner Signed-off-by: Robin Murphy Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/bddc0469f25843ca5ae0cf578ab3671435ae98a7.1769429546.git.robin.murphy@arm.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-rockchip.c | 8 -------- drivers/pinctrl/pinctrl-rockchip.c | 9 ++++----- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c index bae2061f15fc..0fff4a699f12 100644 --- a/drivers/gpio/gpio-rockchip.c +++ b/drivers/gpio/gpio-rockchip.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -164,12 +163,6 @@ static int rockchip_gpio_set_direction(struct gpio_chip *chip, unsigned long flags; u32 data = input ? 0 : 1; - - if (input) - pinctrl_gpio_direction_input(chip, offset); - else - pinctrl_gpio_direction_output(chip, offset); - raw_spin_lock_irqsave(&bank->slock, flags); rockchip_gpio_writel_bit(bank, offset, data, bank->gpio_regs->port_ddr); raw_spin_unlock_irqrestore(&bank->slock, flags); @@ -593,7 +586,6 @@ static int rockchip_gpiolib_register(struct rockchip_pin_bank *bank) gc->ngpio = bank->nr_pins; gc->label = bank->name; gc->parent = bank->dev; - gc->can_sleep = true; ret = gpiochip_add_data(gc, bank); if (ret) { diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index e44ef262beec..2fc67aeafdb3 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -3545,10 +3545,9 @@ static int rockchip_pmx_set(struct pinctrl_dev *pctldev, unsigned selector, return 0; } -static int rockchip_pmx_gpio_set_direction(struct pinctrl_dev *pctldev, - struct pinctrl_gpio_range *range, - unsigned offset, - bool input) +static int rockchip_pmx_gpio_request_enable(struct pinctrl_dev *pctldev, + struct pinctrl_gpio_range *range, + unsigned int offset) { struct rockchip_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); struct rockchip_pin_bank *bank; @@ -3562,7 +3561,7 @@ static const struct pinmux_ops rockchip_pmx_ops = { .get_function_name = rockchip_pmx_get_func_name, .get_function_groups = rockchip_pmx_get_groups, .set_mux = rockchip_pmx_set, - .gpio_set_direction = rockchip_pmx_gpio_set_direction, + .gpio_request_enable = rockchip_pmx_gpio_request_enable, }; /* From 213c4e51267fd825cd21a08a055450cac7e0b7fb Mon Sep 17 00:00:00 2001 From: Tagir Garaev Date: Wed, 21 Jan 2026 18:24:35 +0300 Subject: [PATCH 088/195] ASoC: Intel: sof_es8336: fix headphone GPIO logic inversion The headphone GPIO should be set to the inverse of speaker_en. When speakers are enabled, headphones should be disabled and vice versa. Currently both GPIOs are set to the same value (speaker_en), causing audio to play through both speakers and headphones simultaneously when headphones are plugged in. Tested on Huawei Matebook (BOD-WXX9) with ES8336 codec. Fixes: 6e1ff1459e00 ("ASoC: Intel: sof_es8336: support a separate gpio to control headphone") Signed-off-by: Tagir Garaev Link: https://patch.msgid.link/20260121152435.101698-1-tgaraev653@gmail.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_es8336.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/sof_es8336.c b/sound/soc/intel/boards/sof_es8336.c index 774fff58d51b..fce50fd9f093 100644 --- a/sound/soc/intel/boards/sof_es8336.c +++ b/sound/soc/intel/boards/sof_es8336.c @@ -120,7 +120,7 @@ static void pcm_pop_work_events(struct work_struct *work) gpiod_set_value_cansleep(priv->gpio_speakers, priv->speaker_en); if (quirk & SOF_ES8336_HEADPHONE_GPIO) - gpiod_set_value_cansleep(priv->gpio_headphone, priv->speaker_en); + gpiod_set_value_cansleep(priv->gpio_headphone, !priv->speaker_en); } From 05cd654829dd2717e86a5a3f9ff301447fc28c93 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Thu, 22 Jan 2026 15:40:33 +0200 Subject: [PATCH 089/195] irqchip/ls-extirq: Convert to a platform driver to make it work again Starting with the blamed commit, the ls-extirq driver stopped working. This is because ls-extirq, being one of the interrupt-map property abusers, does not pass the DT checks added by the referenced commit, making it unable to determine its interrupt parent: irq-ls-extirq: Cannot find parent domain OF: of_irq_init: Failed to init /soc/syscon@1f70000/interrupt-controller@14 ((____ptrval____)), parent 0000000000000000 Instead of reverting the referenced commit, convert the ls-extirq to a platform driver to avoid the irqchip_init() -> of_irq_init() code path completely. As part of the conversion, use the managed resources APIs and dev_err_probe() so that there is no need for a .remove() callback or for complicated error handling. Fixes: 1b1f04d8271e ("of/irq: Ignore interrupt parent for nodes without interrupts") Co-developed-by: Alexander Stein Signed-off-by: Alexander Stein Signed-off-by: Ioana Ciornei Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20260122134034.3274053-2-ioana.ciornei@nxp.com --- drivers/irqchip/irq-ls-extirq.c | 75 ++++++++++++++++----------------- 1 file changed, 36 insertions(+), 39 deletions(-) diff --git a/drivers/irqchip/irq-ls-extirq.c b/drivers/irqchip/irq-ls-extirq.c index 50a7b38381b9..96f9c20621cf 100644 --- a/drivers/irqchip/irq-ls-extirq.c +++ b/drivers/irqchip/irq-ls-extirq.c @@ -168,40 +168,34 @@ ls_extirq_parse_map(struct ls_extirq_data *priv, struct device_node *node) return 0; } -static int __init -ls_extirq_of_init(struct device_node *node, struct device_node *parent) +static int ls_extirq_probe(struct platform_device *pdev) { struct irq_domain *domain, *parent_domain; + struct device_node *node, *parent; + struct device *dev = &pdev->dev; struct ls_extirq_data *priv; int ret; + node = dev->of_node; + parent = of_irq_find_parent(node); + if (!parent) + return dev_err_probe(dev, -ENODEV, "Failed to get IRQ parent node\n"); + parent_domain = irq_find_host(parent); - if (!parent_domain) { - pr_err("Cannot find parent domain\n"); - ret = -ENODEV; - goto err_irq_find_host; - } + if (!parent_domain) + return dev_err_probe(dev, -EPROBE_DEFER, "Cannot find parent domain\n"); - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) { - ret = -ENOMEM; - goto err_alloc_priv; - } + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return dev_err_probe(dev, -ENOMEM, "Failed to allocate memory\n"); - /* - * All extirq OF nodes are under a scfg/syscon node with - * the 'ranges' property - */ - priv->intpcr = of_iomap(node, 0); - if (!priv->intpcr) { - pr_err("Cannot ioremap OF node %pOF\n", node); - ret = -ENOMEM; - goto err_iomap; - } + priv->intpcr = devm_of_iomap(dev, node, 0, NULL); + if (!priv->intpcr) + return dev_err_probe(dev, -ENOMEM, "Cannot ioremap OF node %pOF\n", node); ret = ls_extirq_parse_map(priv, node); if (ret) - goto err_parse_map; + return dev_err_probe(dev, ret, "Failed to parse IRQ map\n"); priv->big_endian = of_device_is_big_endian(node->parent); priv->is_ls1021a_or_ls1043a = of_device_is_compatible(node, "fsl,ls1021a-extirq") || @@ -210,23 +204,26 @@ ls_extirq_of_init(struct device_node *node, struct device_node *parent) domain = irq_domain_create_hierarchy(parent_domain, 0, priv->nirq, of_fwnode_handle(node), &extirq_domain_ops, priv); - if (!domain) { - ret = -ENOMEM; - goto err_add_hierarchy; - } + if (!domain) + return dev_err_probe(dev, -ENOMEM, "Failed to add IRQ domain\n"); return 0; - -err_add_hierarchy: -err_parse_map: - iounmap(priv->intpcr); -err_iomap: - kfree(priv); -err_alloc_priv: -err_irq_find_host: - return ret; } -IRQCHIP_DECLARE(ls1021a_extirq, "fsl,ls1021a-extirq", ls_extirq_of_init); -IRQCHIP_DECLARE(ls1043a_extirq, "fsl,ls1043a-extirq", ls_extirq_of_init); -IRQCHIP_DECLARE(ls1088a_extirq, "fsl,ls1088a-extirq", ls_extirq_of_init); +static const struct of_device_id ls_extirq_dt_ids[] = { + { .compatible = "fsl,ls1021a-extirq" }, + { .compatible = "fsl,ls1043a-extirq" }, + { .compatible = "fsl,ls1088a-extirq" }, + {} +}; +MODULE_DEVICE_TABLE(of, ls_extirq_dt_ids); + +static struct platform_driver ls_extirq_driver = { + .probe = ls_extirq_probe, + .driver = { + .name = "ls-extirq", + .of_match_table = ls_extirq_dt_ids, + } +}; + +builtin_platform_driver(ls_extirq_driver); From ba5c657141ea29261e893c46faff29a101f4496a Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Thu, 22 Jan 2026 15:40:34 +0200 Subject: [PATCH 090/195] bus: simple-pm-bus: Probe the Layerscape SCFG node Make the simple-pm-bus driver probe the Layerscape SCFG dt nodes and populate platform_device structures from its child dt nodes. This is now needed because its child interrupt-controller - ls-extirq - is being handled as a platform_device instead of being initialized through the IRQCHIP_DECLARE infrastructure which impeded its parent IRQ retrieval through the blamed commit. Note that this does not set ONLY_BUS because that enables the of_platform_populate() call. The extra power management operations which are enabled by that are not required but harmless. Fixes: 1b1f04d8271e ("of/irq: Ignore interrupt parent for nodes without interrupts") Signed-off-by: Ioana Ciornei Signed-off-by: Thomas Gleixner Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20260122134034.3274053-3-ioana.ciornei@nxp.com --- drivers/bus/simple-pm-bus.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/bus/simple-pm-bus.c b/drivers/bus/simple-pm-bus.c index d8e029e7e53f..3f00d953fb9a 100644 --- a/drivers/bus/simple-pm-bus.c +++ b/drivers/bus/simple-pm-bus.c @@ -142,6 +142,12 @@ static const struct of_device_id simple_pm_bus_of_match[] = { { .compatible = "simple-mfd", .data = ONLY_BUS }, { .compatible = "isa", .data = ONLY_BUS }, { .compatible = "arm,amba-bus", .data = ONLY_BUS }, + { .compatible = "fsl,ls1021a-scfg", }, + { .compatible = "fsl,ls1043a-scfg", }, + { .compatible = "fsl,ls1046a-scfg", }, + { .compatible = "fsl,ls1088a-isc", }, + { .compatible = "fsl,ls2080a-isc", }, + { .compatible = "fsl,lx2160a-isc", }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, simple_pm_bus_of_match); From fd4eeb30b9e30ca1118a618be0755287bcbb2da9 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 26 Jan 2026 04:57:35 +0000 Subject: [PATCH 091/195] objtool: Print bfd_vma as unsigned long long on ia32-x86_64 cross build When objtool is cross-compiled in ia32 container for x86_64 target it fails with the following errors: > disas.c: In function 'disas_print_addr_sym': > disas.c:173:38: error: format '%lx' expects argument of type 'long unsigned int', but argument 3 has type 'bfd_vma' {aka 'long long unsigned int'} [-Werror=format=] > 173 | DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, symstr); > | ^~~~~~~~~~~~ ~~~~ > | | > | bfd_vma {aka long long unsigned int} Provide a correct printf-fmt depending on sizeof(bfd_vma). Fixes: 5d859dff266f ("objtool: Print symbol during disassembly") Signed-off-by: Dmitry Safonov Reviewed-by: Alexandre Chartre Link: https://patch.msgid.link/20260126-objtool-ia32-v1-1-bb6feaf17566@arista.com Signed-off-by: Josh Poimboeuf --- tools/objtool/disas.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tools/objtool/disas.c b/tools/objtool/disas.c index 2b5059f55e40..26f08d41f2b1 100644 --- a/tools/objtool/disas.c +++ b/tools/objtool/disas.c @@ -108,6 +108,8 @@ static int sprint_name(char *str, const char *name, unsigned long offset) #define DINFO_FPRINTF(dinfo, ...) \ ((*(dinfo)->fprintf_func)((dinfo)->stream, __VA_ARGS__)) +#define bfd_vma_fmt \ + __builtin_choose_expr(sizeof(bfd_vma) == sizeof(unsigned long), "%#lx <%s>", "%#llx <%s>") static int disas_result_fprintf(struct disas_context *dctx, const char *fmt, va_list ap) @@ -170,10 +172,10 @@ static void disas_print_addr_sym(struct section *sec, struct symbol *sym, if (sym) { sprint_name(symstr, sym->name, addr - sym->offset); - DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, symstr); + DINFO_FPRINTF(dinfo, bfd_vma_fmt, addr, symstr); } else { str = offstr(sec, addr); - DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, str); + DINFO_FPRINTF(dinfo, bfd_vma_fmt, addr, str); free(str); } } @@ -252,7 +254,7 @@ static void disas_print_addr_reloc(bfd_vma addr, struct disassemble_info *dinfo) * example: "lea 0x0(%rip),%rdi". The kernel can reference * the next IP with _THIS_IP_ macro. */ - DINFO_FPRINTF(dinfo, "0x%lx <_THIS_IP_>", addr); + DINFO_FPRINTF(dinfo, bfd_vma_fmt, addr, "_THIS_IP_"); return; } @@ -264,11 +266,11 @@ static void disas_print_addr_reloc(bfd_vma addr, struct disassemble_info *dinfo) */ if (reloc->sym->type == STT_SECTION) { str = offstr(reloc->sym->sec, reloc->sym->offset + offset); - DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, str); + DINFO_FPRINTF(dinfo, bfd_vma_fmt, addr, str); free(str); } else { sprint_name(symstr, reloc->sym->name, offset); - DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, symstr); + DINFO_FPRINTF(dinfo, bfd_vma_fmt, addr, symstr); } } @@ -311,7 +313,7 @@ static void disas_print_address(bfd_vma addr, struct disassemble_info *dinfo) */ sym = insn_call_dest(insn); if (sym && (sym->offset == addr || (sym->offset == 0 && is_reloc))) { - DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, sym->name); + DINFO_FPRINTF(dinfo, bfd_vma_fmt, addr, sym->name); return; } From d107b3265aa5e61a1e326b2815a767526ddb12ac Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 26 Jan 2026 16:13:48 +0100 Subject: [PATCH 092/195] objtool: Replace custom macros in elf.c with shared ones The source file tools/objtool/elf.c defines the macros ALIGN_UP(), ALIGN_UP_POW2() and MAX(). These macros unnecessarily duplicate functionality already available under tools/include/, specifically ALIGN(), roundup_pow_of_two() and max(). More importantly, the definition of ALIGN_UP_POW2() is incorrect when the input is 1, as it results in a call to __builtin_clz(0), which produces an undefined result. This issue impacts the function elf_alloc_reloc(). When adding the first relocation to a section, the function allocates an undefined number of relocations. Replace the custom macros with the shared functionality to resolve these issues. Fixes: 2c05ca026218 ("objtool: Add elf_create_reloc() and elf_init_reloc()") Signed-off-by: Petr Pavlu Link: https://patch.msgid.link/20260126151356.3924887-1-petr.pavlu@suse.com Signed-off-by: Josh Poimboeuf --- tools/objtool/elf.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 6a8ed9c62323..2c02c7b49265 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -18,15 +18,14 @@ #include #include #include +#include +#include #include +#include #include #include #include -#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) -#define ALIGN_UP_POW2(x) (1U << ((8 * sizeof(x)) - __builtin_clz((x) - 1U))) -#define MAX(a, b) ((a) > (b) ? (a) : (b)) - static inline u32 str_hash(const char *str) { return jhash(str, strlen(str), 0); @@ -1336,7 +1335,7 @@ unsigned int elf_add_string(struct elf *elf, struct section *strtab, const char return -1; } - offset = ALIGN_UP(strtab->sh.sh_size, strtab->sh.sh_addralign); + offset = ALIGN(strtab->sh.sh_size, strtab->sh.sh_addralign); if (!elf_add_data(elf, strtab, str, strlen(str) + 1)) return -1; @@ -1378,7 +1377,7 @@ void *elf_add_data(struct elf *elf, struct section *sec, const void *data, size_ sec->data->d_size = size; sec->data->d_align = 1; - offset = ALIGN_UP(sec->sh.sh_size, sec->sh.sh_addralign); + offset = ALIGN(sec->sh.sh_size, sec->sh.sh_addralign); sec->sh.sh_size = offset + size; mark_sec_changed(elf, sec, true); @@ -1502,7 +1501,7 @@ static int elf_alloc_reloc(struct elf *elf, struct section *rsec) rsec->data->d_size = nr_relocs_new * elf_rela_size(elf); rsec->sh.sh_size = rsec->data->d_size; - nr_alloc = MAX(64, ALIGN_UP_POW2(nr_relocs_new)); + nr_alloc = max(64UL, roundup_pow_of_two(nr_relocs_new)); if (nr_alloc <= rsec->nr_alloc_relocs) return 0; From f2dba60339a6299e181671e95293efe312237e2d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sun, 25 Jan 2026 21:56:39 -0800 Subject: [PATCH 093/195] objtool/klp: Fix bug table handling for __WARN_printf() Running objtool klp-diff on a changed function which uses WARN() can fail with: vmlinux.o: error: objtool: md_run+0x866: failed to convert reloc sym '__bug_table' to its proper format The problem is that since commit 5b472b6e5bd9 ("x86_64/bug: Implement __WARN_printf()"), each __WARN_printf() call site now directly references its bug table entry. klp-diff errors out when it can't convert such section-based references to object symbols (because bug table entries don't have symbols). Luckily, klp-diff already has code to create symbols for bug table entries. Move that code earlier, before function diffing. Fixes: dd590d4d57eb ("objtool/klp: Introduce klp diff subcommand for diffing object files") Fixes: 5b472b6e5bd9 ("x86_64/bug: Implement __WARN_printf()") Reported-by: Song Liu Tested-by: Song Liu Link: https://patch.msgid.link/a8e0a714b9da962858842b9aecd63b4900927c88.1769406850.git.jpoimboe@kernel.org Signed-off-by: Josh Poimboeuf --- tools/objtool/klp-diff.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tools/objtool/klp-diff.c b/tools/objtool/klp-diff.c index 4d1f9e9977eb..d94531e3f64e 100644 --- a/tools/objtool/klp-diff.c +++ b/tools/objtool/klp-diff.c @@ -1425,9 +1425,6 @@ static int clone_special_sections(struct elfs *e) { struct section *patched_sec; - if (create_fake_symbols(e->patched)) - return -1; - for_each_sec(e->patched, patched_sec) { if (is_special_section(patched_sec)) { if (clone_special_section(e, patched_sec)) @@ -1704,6 +1701,17 @@ int cmd_klp_diff(int argc, const char **argv) if (!e.out) return -1; + /* + * Special section fake symbols are needed so that individual special + * section entries can be extracted by clone_special_sections(). + * + * Note the fake symbols are also needed by clone_included_functions() + * because __WARN_printf() call sites add references to bug table + * entries in the calling functions. + */ + if (create_fake_symbols(e.patched)) + return -1; + if (clone_included_functions(&e)) return -1; From 78c268f3781e4b9706103def0cc011505e0c4332 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 26 Jan 2026 15:56:44 -0800 Subject: [PATCH 094/195] livepatch/klp-build: Fix klp-build vs CONFIG_MODULE_SRCVERSION_ALL When building a patch to a single-file kernel module with CONFIG_MODULE_SRCVERSION_ALL enabled, the klp-build module link fails in modpost: Diffing objects drivers/md/raid0.o: changed function: raid0_run Building patch module: livepatch-0001-patch-raid0_run.ko drivers/md/raid0.c: No such file or directory ... The problem here is that klp-build copied drivers/md/.raid0.o.cmd to the module build directory, but it didn't also copy over the input source file listed in the .cmd file: source_drivers/md/raid0.o := drivers/md/raid0.c So modpost dies due to the missing .c file which is needed for calculating checksums for CONFIG_MODULE_SRCVERSION_ALL. Instead of copying the original .cmd file, just create an empty one. Modpost only requires that it exists. The original object's build dependencies are irrelevant for the frankenobjects used by klp-build. Fixes: 24ebfcd65a87 ("livepatch/klp-build: Introduce klp-build script for generating livepatch modules") Reported-by: Song Liu Tested-by: Song Liu Link: https://patch.msgid.link/c41b6629e02775e4c1015259aa36065b3fe2f0f3.1769471792.git.jpoimboe@kernel.org Signed-off-by: Josh Poimboeuf --- scripts/livepatch/klp-build | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/livepatch/klp-build b/scripts/livepatch/klp-build index 882272120c9e..a73515a82272 100755 --- a/scripts/livepatch/klp-build +++ b/scripts/livepatch/klp-build @@ -555,13 +555,11 @@ copy_orig_objects() { local file_dir="$(dirname "$file")" local orig_file="$ORIG_DIR/$rel_file" local orig_dir="$(dirname "$orig_file")" - local cmd_file="$file_dir/.$(basename "$file").cmd" [[ ! -f "$file" ]] && die "missing $(basename "$file") for $_file" mkdir -p "$orig_dir" cp -f "$file" "$orig_dir" - [[ -e "$cmd_file" ]] && cp -f "$cmd_file" "$orig_dir" done xtrace_restore @@ -740,15 +738,17 @@ build_patch_module() { local orig_dir="$(dirname "$orig_file")" local kmod_file="$KMOD_DIR/$rel_file" local kmod_dir="$(dirname "$kmod_file")" - local cmd_file="$orig_dir/.$(basename "$file").cmd" + local cmd_file="$kmod_dir/.$(basename "$file").cmd" mkdir -p "$kmod_dir" cp -f "$file" "$kmod_dir" - [[ -e "$cmd_file" ]] && cp -f "$cmd_file" "$kmod_dir" # Tell kbuild this is a prebuilt object cp -f "$file" "${kmod_file}_shipped" + # Make modpost happy + touch "$cmd_file" + echo -n " $rel_file" >> "$makefile" done From 96313fcc1f062ba239f4832c9eff685da6c51c99 Mon Sep 17 00:00:00 2001 From: Xuewen Yan Date: Mon, 26 Jan 2026 17:42:09 +0800 Subject: [PATCH 095/195] gpio: sprd: Change sprd_gpio lock to raw_spin_lock There was a lockdep warning in sprd_gpio: [ 6.258269][T329@C6] [ BUG: Invalid wait context ] [ 6.258270][T329@C6] 6.18.0-android17-0-g30527ad7aaae-ab00009-4k #1 Tainted: G W OE [ 6.258272][T329@C6] ----------------------------- [ 6.258273][T329@C6] modprobe/329 is trying to lock: [ 6.258275][T329@C6] ffffff8081c91690 (&sprd_gpio->lock){....}-{3:3}, at: sprd_gpio_irq_unmask+0x4c/0xa4 [gpio_sprd] [ 6.258282][T329@C6] other info that might help us debug this: [ 6.258283][T329@C6] context-{5:5} [ 6.258285][T329@C6] 3 locks held by modprobe/329: [ 6.258286][T329@C6] #0: ffffff808baca108 (&dev->mutex){....}-{4:4}, at: __driver_attach+0xc4/0x204 [ 6.258295][T329@C6] #1: ffffff80965e7240 (request_class#4){+.+.}-{4:4}, at: __setup_irq+0x1cc/0x82c [ 6.258304][T329@C6] #2: ffffff80965e70c8 (lock_class#4){....}-{2:2}, at: __setup_irq+0x21c/0x82c [ 6.258313][T329@C6] stack backtrace: [ 6.258314][T329@C6] CPU: 6 UID: 0 PID: 329 Comm: modprobe Tainted: G W OE 6.18.0-android17-0-g30527ad7aaae-ab00009-4k #1 PREEMPT 3ad5b0f45741a16e5838da790706e16ceb6717df [ 6.258316][T329@C6] Tainted: [W]=WARN, [O]=OOT_MODULE, [E]=UNSIGNED_MODULE [ 6.258317][T329@C6] Hardware name: Unisoc UMS9632-base Board (DT) [ 6.258318][T329@C6] Call trace: [ 6.258318][T329@C6] show_stack+0x20/0x30 (C) [ 6.258321][T329@C6] __dump_stack+0x28/0x3c [ 6.258324][T329@C6] dump_stack_lvl+0xac/0xf0 [ 6.258326][T329@C6] dump_stack+0x18/0x3c [ 6.258329][T329@C6] __lock_acquire+0x824/0x2c28 [ 6.258331][T329@C6] lock_acquire+0x148/0x2cc [ 6.258333][T329@C6] _raw_spin_lock_irqsave+0x6c/0xb4 [ 6.258334][T329@C6] sprd_gpio_irq_unmask+0x4c/0xa4 [gpio_sprd 814535e93c6d8e0853c45c02eab0fa88a9da6487] [ 6.258337][T329@C6] irq_startup+0x238/0x350 [ 6.258340][T329@C6] __setup_irq+0x504/0x82c [ 6.258342][T329@C6] request_threaded_irq+0x118/0x184 [ 6.258344][T329@C6] devm_request_threaded_irq+0x94/0x120 [ 6.258347][T329@C6] sc8546_init_irq+0x114/0x170 [sc8546_charger 223586ccafc27439f7db4f95b0c8e6e882349a99] [ 6.258352][T329@C6] sc8546_charger_probe+0x53c/0x5a0 [sc8546_charger 223586ccafc27439f7db4f95b0c8e6e882349a99] [ 6.258358][T329@C6] i2c_device_probe+0x2c8/0x350 [ 6.258361][T329@C6] really_probe+0x1a8/0x46c [ 6.258363][T329@C6] __driver_probe_device+0xa4/0x10c [ 6.258366][T329@C6] driver_probe_device+0x44/0x1b4 [ 6.258369][T329@C6] __driver_attach+0xd0/0x204 [ 6.258371][T329@C6] bus_for_each_dev+0x10c/0x168 [ 6.258373][T329@C6] driver_attach+0x2c/0x3c [ 6.258376][T329@C6] bus_add_driver+0x154/0x29c [ 6.258378][T329@C6] driver_register+0x70/0x10c [ 6.258381][T329@C6] i2c_register_driver+0x48/0xc8 [ 6.258384][T329@C6] init_module+0x28/0xfd8 [sc8546_charger 223586ccafc27439f7db4f95b0c8e6e882349a99] [ 6.258389][T329@C6] do_one_initcall+0x128/0x42c [ 6.258392][T329@C6] do_init_module+0x60/0x254 [ 6.258395][T329@C6] load_module+0x1054/0x1220 [ 6.258397][T329@C6] __arm64_sys_finit_module+0x240/0x35c [ 6.258400][T329@C6] invoke_syscall+0x60/0xec [ 6.258402][T329@C6] el0_svc_common+0xb0/0xe4 [ 6.258405][T329@C6] do_el0_svc+0x24/0x30 [ 6.258407][T329@C6] el0_svc+0x54/0x1c4 [ 6.258409][T329@C6] el0t_64_sync_handler+0x68/0xdc [ 6.258411][T329@C6] el0t_64_sync+0x1c4/0x1c8 This is because the spin_lock would change to rt_mutex in PREEMPT_RT, however the sprd_gpio->lock would use in hard-irq, this is unsafe. So change the spin_lock_t to raw_spin_lock_t to use the spinlock in hard-irq. Signed-off-by: Xuewen Yan Reviewed-by: Baolin Wang Reviewed-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20260126094209.9855-1-xuewen.yan@unisoc.com [Bartosz: tweaked the commit message] Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-sprd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-sprd.c b/drivers/gpio/gpio-sprd.c index 413bcd0a4240..2cc8abe705cd 100644 --- a/drivers/gpio/gpio-sprd.c +++ b/drivers/gpio/gpio-sprd.c @@ -35,7 +35,7 @@ struct sprd_gpio { struct gpio_chip chip; void __iomem *base; - spinlock_t lock; + raw_spinlock_t lock; int irq; }; @@ -54,7 +54,7 @@ static void sprd_gpio_update(struct gpio_chip *chip, unsigned int offset, unsigned long flags; u32 tmp; - spin_lock_irqsave(&sprd_gpio->lock, flags); + raw_spin_lock_irqsave(&sprd_gpio->lock, flags); tmp = readl_relaxed(base + reg); if (val) @@ -63,7 +63,7 @@ static void sprd_gpio_update(struct gpio_chip *chip, unsigned int offset, tmp &= ~BIT(SPRD_GPIO_BIT(offset)); writel_relaxed(tmp, base + reg); - spin_unlock_irqrestore(&sprd_gpio->lock, flags); + raw_spin_unlock_irqrestore(&sprd_gpio->lock, flags); } static int sprd_gpio_read(struct gpio_chip *chip, unsigned int offset, u16 reg) @@ -236,7 +236,7 @@ static int sprd_gpio_probe(struct platform_device *pdev) if (IS_ERR(sprd_gpio->base)) return PTR_ERR(sprd_gpio->base); - spin_lock_init(&sprd_gpio->lock); + raw_spin_lock_init(&sprd_gpio->lock); sprd_gpio->chip.label = dev_name(&pdev->dev); sprd_gpio->chip.ngpio = SPRD_GPIO_NR; From c0ae43d303e45764918fa8c1dc13d6a5db59c479 Mon Sep 17 00:00:00 2001 From: Denis Sergeev Date: Mon, 26 Jan 2026 06:59:14 +0300 Subject: [PATCH 096/195] gpiolib: acpi: use BIT_ULL() for u64 mask in address space handler The BIT() macro uses unsigned long, which is 32 bits on 32-bit architectures. When iterating over GPIO pins with index >= 32, the expression (*value & BIT(i)) causes undefined behavior due to shifting by a value >= type width. Since 'value' is a pointer to u64, use BIT_ULL() to ensure correct 64-bit mask on all architectures. Found by Linux Verification Center (linuxtesting.org) with Svace. Fixes: 2c4d00cb8fc5 ("gpiolib: acpi: Use BIT() macro to increase readability") Signed-off-by: Denis Sergeev Reviewed-by: Mika Westerberg Link: https://lore.kernel.org/r/20260126035914.16586-1-denserg.edu@gmail.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-acpi-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-acpi-core.c b/drivers/gpio/gpiolib-acpi-core.c index 83dd227dbbec..d42f769eeb11 100644 --- a/drivers/gpio/gpiolib-acpi-core.c +++ b/drivers/gpio/gpiolib-acpi-core.c @@ -1159,7 +1159,7 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address, mutex_unlock(&achip->conn_lock); if (function == ACPI_WRITE) - gpiod_set_raw_value_cansleep(desc, !!(*value & BIT(i))); + gpiod_set_raw_value_cansleep(desc, !!(*value & BIT_ULL(i))); else *value |= (u64)gpiod_get_raw_value_cansleep(desc) << i; } From 53ad4a948a4586359b841d607c08fb16c5503230 Mon Sep 17 00:00:00 2001 From: Yuhao Huang Date: Mon, 26 Jan 2026 12:03:48 +0800 Subject: [PATCH 097/195] gpio: virtuser: fix UAF in configfs release path The gpio-virtuser configfs release path uses guard(mutex) to protect the device structure. However, the device is freed before the guard cleanup runs, causing mutex_unlock() to operate on freed memory. Specifically, gpio_virtuser_device_config_group_release() destroys the mutex and frees the device while still inside the guard(mutex) scope. When the function returns, the guard cleanup invokes mutex_unlock(&dev->lock), resulting in a slab use-after-free. Limit the mutex lifetime by using a scoped_guard() only around the activation check, so that the lock is released before mutex_destroy() and kfree() are called. Fixes: 91581c4b3f29 ("gpio: virtuser: new virtual testing driver for the GPIO API") Signed-off-by: Yuhao Huang Link: https://lore.kernel.org/r/20260126040348.11167-1-yuhaohuang@YuhaodeMacBook-Pro.local Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-virtuser.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-virtuser.c b/drivers/gpio/gpio-virtuser.c index 37f2ce20f1ae..098e67d70ffa 100644 --- a/drivers/gpio/gpio-virtuser.c +++ b/drivers/gpio/gpio-virtuser.c @@ -1682,10 +1682,10 @@ static void gpio_virtuser_device_config_group_release(struct config_item *item) { struct gpio_virtuser_device *dev = to_gpio_virtuser_device(item); - guard(mutex)(&dev->lock); - - if (gpio_virtuser_device_is_live(dev)) - gpio_virtuser_device_deactivate(dev); + scoped_guard(mutex, &dev->lock) { + if (gpio_virtuser_device_is_live(dev)) + gpio_virtuser_device_deactivate(dev); + } mutex_destroy(&dev->lock); ida_free(&gpio_virtuser_ida, dev->id); From d02f20a4de0c498fbba2b0e3c1496e72c630a91e Mon Sep 17 00:00:00 2001 From: Martin Larsson Date: Wed, 21 Jan 2026 12:57:22 +0000 Subject: [PATCH 098/195] gpio: pca953x: mask interrupts in irq shutdown In the existing implementation irq_shutdown does not mask the interrupts in hardware. This can cause spurious interrupts from the IO expander. Add masking to irq_shutdown to prevent spurious interrupts. Cc: stable@vger.kernel.org Signed-off-by: Martin Larsson Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20260121125631.2758346-1-martin.larsson@actia.se Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 8727ae54bc57..f93a3dbb2daa 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -914,6 +914,8 @@ static void pca953x_irq_shutdown(struct irq_data *d) clear_bit(hwirq, chip->irq_trig_fall); clear_bit(hwirq, chip->irq_trig_level_low); clear_bit(hwirq, chip->irq_trig_level_high); + + pca953x_irq_mask(d); } static void pca953x_irq_print_chip(struct irq_data *data, struct seq_file *p) From c764b7af15289051718b4859a67f9a3bc69d3fb2 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 21 Jan 2026 11:04:06 +0800 Subject: [PATCH 099/195] drm/amd/pm: fix smu v13 soft clock frequency setting issue v1: resolve the issue where some freq frequencies cannot be set correctly due to insufficient floating-point precision. v2: patch this convert on 'max' value only. Signed-off-by: Yang Wang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 6194f60c707e3878e120adeb36997075664d8429) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 1 + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 4263798d716b..8e592a477c33 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -56,6 +56,7 @@ #define SMUQ10_TO_UINT(x) ((x) >> 10) #define SMUQ10_FRAC(x) ((x) & 0x3ff) #define SMUQ10_ROUND(x) ((SMUQ10_TO_UINT(x)) + ((SMUQ10_FRAC(x)) >= 0x200)) +#define SMU_V13_SOFT_FREQ_ROUND(x) ((x) + 1) extern const int pmfw_decoded_link_speed[5]; extern const int pmfw_decoded_link_width[7]; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index a89075e25717..2efd914d81e5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1555,6 +1555,7 @@ int smu_v13_0_set_soft_freq_limited_range(struct smu_context *smu, return clk_id; if (max > 0) { + max = SMU_V13_SOFT_FREQ_ROUND(max); if (automatic) param = (uint32_t)((clk_id << 16) | 0xffff); else From 239d0ccf567c3b09aed58eb88cd3376af37aaf14 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 21 Jan 2026 11:06:29 +0800 Subject: [PATCH 100/195] drm/amd/pm: fix smu v14 soft clock frequency setting issue v1: resolve the issue where some freq frequencies cannot be set correctly due to insufficient floating-point precision. v2: patch this convert on 'max' value only. Signed-off-by: Yang Wang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 53868dd8774344051999c880115740da92f97feb) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h | 1 + drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h index 29a4583db873..0b1e6f25e611 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h @@ -57,6 +57,7 @@ extern const int decoded_link_width[8]; #define DECODE_GEN_SPEED(gen_speed_idx) (decoded_link_speed[gen_speed_idx]) #define DECODE_LANE_WIDTH(lane_width_idx) (decoded_link_width[lane_width_idx]) +#define SMU_V14_SOFT_FREQ_ROUND(x) ((x) + 1) struct smu_14_0_max_sustainable_clocks { uint32_t display_clock; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index f2a16dfee599..06a81533759c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -1178,6 +1178,7 @@ int smu_v14_0_set_soft_freq_limited_range(struct smu_context *smu, return clk_id; if (max > 0) { + max = SMU_V14_SOFT_FREQ_ROUND(max); if (automatic) param = (uint32_t)((clk_id << 16) | 0xffff); else From 8b1ecc9377bc641533cd9e76dfa3aee3cd04a007 Mon Sep 17 00:00:00 2001 From: Jon Doron Date: Sat, 20 Dec 2025 15:04:40 +0200 Subject: [PATCH 101/195] drm/amdgpu: fix NULL pointer dereference in amdgpu_gmc_filter_faults_remove MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On APUs such as Raven and Renoir (GC 9.1.0, 9.2.2, 9.3.0), the ih1 and ih2 interrupt ring buffers are not initialized. This is by design, as these secondary IH rings are only available on discrete GPUs. See vega10_ih_sw_init() which explicitly skips ih1/ih2 initialization when AMD_IS_APU is set. However, amdgpu_gmc_filter_faults_remove() unconditionally uses ih1 to get the timestamp of the last interrupt entry. When retry faults are enabled on APUs (noretry=0), this function is called from the SVM page fault recovery path, resulting in a NULL pointer dereference when amdgpu_ih_decode_iv_ts_helper() attempts to access ih->ring[]. The crash manifests as: BUG: kernel NULL pointer dereference, address: 0000000000000004 RIP: 0010:amdgpu_ih_decode_iv_ts_helper+0x22/0x40 [amdgpu] Call Trace: amdgpu_gmc_filter_faults_remove+0x60/0x130 [amdgpu] svm_range_restore_pages+0xae5/0x11c0 [amdgpu] amdgpu_vm_handle_fault+0xc8/0x340 [amdgpu] gmc_v9_0_process_interrupt+0x191/0x220 [amdgpu] amdgpu_irq_dispatch+0xed/0x2c0 [amdgpu] amdgpu_ih_process+0x84/0x100 [amdgpu] This issue was exposed by commit 1446226d32a4 ("drm/amdgpu: Remove GC HW IP 9.3.0 from noretry=1") which changed the default for Renoir APU from noretry=1 to noretry=0, enabling retry fault handling and thus exercising the buggy code path. Fix this by adding a check for ih1.ring_size before attempting to use it. Also restore the soft_ih support from commit dd299441654f ("drm/amdgpu: Rework retry fault removal"). This is needed if the hardware doesn't support secondary HW IH rings. v2: additional updates (Alex) Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3814 Fixes: dd299441654f ("drm/amdgpu: Rework retry fault removal") Reviewed-by: Timur Kristóf Reviewed-by: Philip Yang Signed-off-by: Jon Doron Signed-off-by: Alex Deucher (cherry picked from commit 6ce8d536c80aa1f059e82184f0d1994436b1d526) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 7e623f91f2d7..d9c7ad297293 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -498,8 +498,13 @@ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, if (adev->irq.retry_cam_enabled) return; + else if (adev->irq.ih1.ring_size) + ih = &adev->irq.ih1; + else if (adev->irq.ih_soft.enabled) + ih = &adev->irq.ih_soft; + else + return; - ih = &adev->irq.ih1; /* Get the WPTR of the last entry in IH ring */ last_wptr = amdgpu_ih_get_wptr(adev, ih); /* Order wptr with ring data. */ From ee8d07cd5730038e33bf5e551448190bbd480eb8 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 27 Jan 2026 11:07:07 +0800 Subject: [PATCH 102/195] drm/amd/pm: fix race in power state check before mutex lock The power state check in amdgpu_dpm_set_powergating_by_smu() is done before acquiring the pm mutex, leading to a race condition where: 1. Thread A checks state and thinks no change is needed 2. Thread B acquires mutex and modifies the state 3. Thread A returns without updating state, causing inconsistency Fix this by moving the mutex lock before the power state check, ensuring atomicity of the state check and modification. Fixes: 6ee27ee27ba8 ("drm/amd/pm: avoid duplicate powergate/ungate setting") Signed-off-by: Yang Wang Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher (cherry picked from commit 7a3fbdfd19ec5992c0fc2d0bd83888644f5f2f38) --- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 79b174e5326d..302af1fb6901 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -80,15 +80,15 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, enum ip_power_state pwr_state = gate ? POWER_STATE_OFF : POWER_STATE_ON; bool is_vcn = block_type == AMD_IP_BLOCK_TYPE_VCN; + mutex_lock(&adev->pm.mutex); + if (atomic_read(&adev->pm.pwr_state[block_type]) == pwr_state && (!is_vcn || adev->vcn.num_vcn_inst == 1)) { dev_dbg(adev->dev, "IP block%d already in the target %s state!", block_type, gate ? "gate" : "ungate"); - return 0; + goto out_unlock; } - mutex_lock(&adev->pm.mutex); - switch (block_type) { case AMD_IP_BLOCK_TYPE_UVD: case AMD_IP_BLOCK_TYPE_VCE: @@ -115,6 +115,7 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, if (!ret) atomic_set(&adev->pm.pwr_state[block_type], pwr_state); +out_unlock: mutex_unlock(&adev->pm.mutex); return ret; From c87f15efeb2efc8049a4f021e7328f3a4737f749 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 Jan 2026 21:13:54 -0700 Subject: [PATCH 103/195] Revert "rnbd-clt: fix refcount underflow in device unmap path" This reverts commit ec19ed2b3e2af8ec5380400cdee9cb6560144506. This commit relies on changes queued for 7.0, and isn't safe in its current form for the 6.19 release. Revert it for now for 6.19. Link: https://lore.kernel.org/linux-block/aXhLQmRudk7cSAnT@shinmob/ Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 8194a970f002..d1c354636315 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1662,6 +1662,7 @@ static void destroy_sysfs(struct rnbd_clt_dev *dev, /* To avoid deadlock firstly remove itself */ sysfs_remove_file_self(&dev->kobj, sysfs_self); kobject_del(&dev->kobj); + kobject_put(&dev->kobj); } } From 730e5ebff40c852e3ea57b71bf02a4b89c69435f Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 27 Jan 2026 21:17:12 +0100 Subject: [PATCH 104/195] gpio: omap: do not register driver in probe() Commit 11a78b794496 ("ARM: OMAP: MPUIO wake updates") registers the omap_mpuio_driver from omap_mpuio_init(), which is called from omap_gpio_probe(). However, it neither makes sense to register drivers from probe() callbacks of other drivers, nor does the driver core allow registering drivers with a device lock already being held. The latter was revealed by commit dc23806a7c47 ("driver core: enforce device_lock for driver_match_device()") leading to a potential deadlock condition described in [1]. Additionally, the omap_mpuio_driver is never unregistered from the driver core, even if the module is unloaded. Hence, register the omap_mpuio_driver from the module initcall and unregister it in module_exit(). Link: https://lore.kernel.org/lkml/DFU7CEPUSG9A.1KKGVW4HIPMSH@kernel.org/ [1] Fixes: dc23806a7c47 ("driver core: enforce device_lock for driver_match_device()") Fixes: 11a78b794496 ("ARM: OMAP: MPUIO wake updates") Reviewed-by: Greg Kroah-Hartman Signed-off-by: Danilo Krummrich Reviewed-by: Rafael J. Wysocki (Intel) Link: https://patch.msgid.link/20260127201725.35883-1-dakr@kernel.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-omap.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index e136e81794df..e39723b5901b 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -799,10 +799,13 @@ static struct platform_device omap_mpuio_device = { static inline void omap_mpuio_init(struct gpio_bank *bank) { - platform_set_drvdata(&omap_mpuio_device, bank); + static bool registered; - if (platform_driver_register(&omap_mpuio_driver) == 0) - (void) platform_device_register(&omap_mpuio_device); + platform_set_drvdata(&omap_mpuio_device, bank); + if (!registered) { + (void)platform_device_register(&omap_mpuio_device); + registered = true; + } } /*---------------------------------------------------------------------*/ @@ -1575,13 +1578,24 @@ static struct platform_driver omap_gpio_driver = { */ static int __init omap_gpio_drv_reg(void) { - return platform_driver_register(&omap_gpio_driver); + int ret; + + ret = platform_driver_register(&omap_mpuio_driver); + if (ret) + return ret; + + ret = platform_driver_register(&omap_gpio_driver); + if (ret) + platform_driver_unregister(&omap_mpuio_driver); + + return ret; } postcore_initcall(omap_gpio_drv_reg); static void __exit omap_gpio_exit(void) { platform_driver_unregister(&omap_gpio_driver); + platform_driver_unregister(&omap_mpuio_driver); } module_exit(omap_gpio_exit); From b2cf569ed81e7574d4287eaf3b2c38690a934d34 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Tue, 27 Jan 2026 13:46:54 -0800 Subject: [PATCH 105/195] gpio: brcmstb: correct hwirq to bank map The brcmstb_gpio_hwirq_to_bank() function was designed to accommodate the downward numbering of dynamic GPIOs by traversing the bank list in the reverse order. However, the dynamic numbering has changed to increment upward which can produce an incorrect mapping. The function is modified to no longer assume an ordering of the list to accommodate either option. Fixes: 7b61212f2a07 ("gpiolib: Get rid of ARCH_NR_GPIOS") Signed-off-by: Doug Berger Signed-off-by: Florian Fainelli Reviewed-by: Linus Walleij Link: https://patch.msgid.link/20260127214656.447333-2-florian.fainelli@broadcom.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-brcmstb.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c index af9287ff5dc4..2352d099709c 100644 --- a/drivers/gpio/gpio-brcmstb.c +++ b/drivers/gpio/gpio-brcmstb.c @@ -301,12 +301,10 @@ static struct brcmstb_gpio_bank *brcmstb_gpio_hwirq_to_bank( struct brcmstb_gpio_priv *priv, irq_hw_number_t hwirq) { struct brcmstb_gpio_bank *bank; - int i = 0; - /* banks are in descending order */ - list_for_each_entry_reverse(bank, &priv->bank_list, node) { - i += bank->chip.gc.ngpio; - if (hwirq < i) + list_for_each_entry(bank, &priv->bank_list, node) { + if (hwirq >= bank->chip.gc.offset && + hwirq < (bank->chip.gc.offset + bank->chip.gc.ngpio)) return bank; } return NULL; From e535c23513c63f02f67e3e09e0787907029efeaf Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 30 Oct 2025 17:34:56 +0100 Subject: [PATCH 106/195] drm/imx/tve: fix probe device leak Make sure to drop the reference taken to the DDC device during probe on probe failure (e.g. probe deferral) and on driver unbind. Fixes: fcbc51e54d2a ("staging: drm/imx: Add support for Television Encoder (TVEv2)") Cc: stable@vger.kernel.org # 3.10 Cc: Philipp Zabel Reviewed-by: Frank Li Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20251030163456.15807-1-johan@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/imx/ipuv3/imx-tve.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/imx/ipuv3/imx-tve.c b/drivers/gpu/drm/imx/ipuv3/imx-tve.c index c5c6e070cc06..e861b8b9d8fa 100644 --- a/drivers/gpu/drm/imx/ipuv3/imx-tve.c +++ b/drivers/gpu/drm/imx/ipuv3/imx-tve.c @@ -528,6 +528,13 @@ static const struct component_ops imx_tve_ops = { .bind = imx_tve_bind, }; +static void imx_tve_put_device(void *_dev) +{ + struct device *dev = _dev; + + put_device(dev); +} + static int imx_tve_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -549,6 +556,12 @@ static int imx_tve_probe(struct platform_device *pdev) if (ddc_node) { tve->ddc = of_find_i2c_adapter_by_node(ddc_node); of_node_put(ddc_node); + if (tve->ddc) { + ret = devm_add_action_or_reset(dev, imx_tve_put_device, + &tve->ddc->dev); + if (ret) + return ret; + } } tve->mode = of_get_tve_mode(np); From 62089b804895e845f82e132ea9d46a1fc53ed5a7 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 21 Jan 2026 15:29:15 -0700 Subject: [PATCH 107/195] kbuild: rpm-pkg: Generate debuginfo package manually Commit a7c699d090a1 ("kbuild: rpm-pkg: build a debuginfo RPM") adjusted the __spec_install_post macro to include __os_install_post, which runs brp-strip. This ends up stripping module signatures, breaking loading modules with lockdown enabled. Undo most of the changes of the aforementioned debuginfo patch and mirror commit 16c36f8864e3 ("kbuild: deb-pkg: use build ID instead of debug link for dbg package") in kernel.spec to generate a functionally equivalent debuginfo package while avoiding touching the modules after they have already been signed during modules_install. Fixes: a7c699d090a1 ("kbuild: rpm-pkg: build a debuginfo RPM") Reported-by: Holger Kiehl Closes: https://lore.kernel.org/68c375f6-e07e-fec-434d-6a45a4f1390@praktifix.dwd.de/ Tested-by: Holger Kiehl Signed-off-by: Nathan Chancellor Link: https://patch.msgid.link/20260121-fix-module-signing-binrpm-pkg-v1-1-8fc5832b6cbc@kernel.org Signed-off-by: Nicolas Schier --- scripts/package/kernel.spec | 65 +++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 35 deletions(-) diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec index 98f206cb7c60..0f1c8de1bd95 100644 --- a/scripts/package/kernel.spec +++ b/scripts/package/kernel.spec @@ -2,6 +2,8 @@ %{!?_arch: %define _arch dummy} %{!?make: %define make make} %define makeflags %{?_smp_mflags} ARCH=%{ARCH} +%define __spec_install_post /usr/lib/rpm/brp-compress || : +%define debug_package %{nil} Name: kernel Summary: The Linux Kernel @@ -46,34 +48,12 @@ against the %{version} kernel package. %endif %if %{with_debuginfo} -# list of debuginfo-related options taken from distribution kernel.spec -# files -%undefine _include_minidebuginfo -%undefine _find_debuginfo_dwz_opts -%undefine _unique_build_ids -%undefine _unique_debug_names -%undefine _unique_debug_srcs -%undefine _debugsource_packages -%undefine _debuginfo_subpackages -%global _find_debuginfo_opts -r -%global _missing_build_ids_terminate_build 1 -%global _no_recompute_build_ids 1 -%{debug_package} +%package debuginfo +Summary: Debug information package for the Linux kernel +%description debuginfo +This package provides debug information for the kernel image and modules from the +%{version} package. %endif -# some (but not all) versions of rpmbuild emit %%debug_package with -# %%install. since we've already emitted it manually, that would cause -# a package redefinition error. ensure that doesn't happen -%define debug_package %{nil} - -# later, we make all modules executable so that find-debuginfo.sh strips -# them up. but they don't actually need to be executable, so remove the -# executable bit, taking care to do it _after_ find-debuginfo.sh has run -%define __spec_install_post \ - %{?__debug_package:%{__debug_install_post}} \ - %{__arch_install_post} \ - %{__os_install_post} \ - find %{buildroot}/lib/modules/%{KERNELRELEASE} -name "*.ko" -type f \\\ - | xargs --no-run-if-empty chmod u-x %prep %setup -q -n linux @@ -87,7 +67,7 @@ patch -p1 < %{SOURCE2} mkdir -p %{buildroot}/lib/modules/%{KERNELRELEASE} cp $(%{make} %{makeflags} -s image_name) %{buildroot}/lib/modules/%{KERNELRELEASE}/vmlinuz # DEPMOD=true makes depmod no-op. We do not package depmod-generated files. -%{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} DEPMOD=true modules_install +%{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} INSTALL_MOD_STRIP=1 DEPMOD=true modules_install %{make} %{makeflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install cp System.map %{buildroot}/lib/modules/%{KERNELRELEASE} cp .config %{buildroot}/lib/modules/%{KERNELRELEASE}/config @@ -118,22 +98,31 @@ ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEA echo "%exclude /lib/modules/%{KERNELRELEASE}/build" } > %{buildroot}/kernel.list -# make modules executable so that find-debuginfo.sh strips them. this -# will be undone later in %%__spec_install_post -find %{buildroot}/lib/modules/%{KERNELRELEASE} -name "*.ko" -type f \ - | xargs --no-run-if-empty chmod u+x - %if %{with_debuginfo} # copying vmlinux directly to the debug directory means it will not get # stripped (but its source paths will still be collected + fixed up) mkdir -p %{buildroot}/usr/lib/debug/lib/modules/%{KERNELRELEASE} cp vmlinux %{buildroot}/usr/lib/debug/lib/modules/%{KERNELRELEASE} + +echo /usr/lib/debug/lib/modules/%{KERNELRELEASE}/vmlinux > %{buildroot}/debuginfo.list + +while read -r mod; do + mod="${mod%.o}.ko" + dbg="%{buildroot}/usr/lib/debug/lib/modules/%{KERNELRELEASE}/kernel/${mod}" + buildid=$("${READELF}" -n "${mod}" | sed -n 's@^.*Build ID: \(..\)\(.*\)@\1/\2@p') + link="%{buildroot}/usr/lib/debug/.build-id/${buildid}.debug" + + mkdir -p "${dbg%/*}" "${link%/*}" + "${OBJCOPY}" --only-keep-debug "${mod}" "${dbg}" + ln -sf --relative "${dbg}" "${link}" + + echo "${dbg#%{buildroot}}" >> %{buildroot}/debuginfo.list + echo "${link#%{buildroot}}" >> %{buildroot}/debuginfo.list +done < modules.order %endif %clean rm -rf %{buildroot} -rm -f debugfiles.list debuglinks.list debugsourcefiles.list debugsources.list \ - elfbins.list %post if [ -x /usr/bin/kernel-install ]; then @@ -172,3 +161,9 @@ fi /usr/src/kernels/%{KERNELRELEASE} /lib/modules/%{KERNELRELEASE}/build %endif + +%if %{with_debuginfo} +%files -f %{buildroot}/debuginfo.list debuginfo +%defattr (-, root, root) +%exclude /debuginfo.list +%endif From 6d60354ea2f90352b22039ed8371c4f4321df90e Mon Sep 17 00:00:00 2001 From: Ethan Zuo Date: Wed, 28 Jan 2026 14:37:51 +0800 Subject: [PATCH 108/195] kbuild: Fix permissions of modules.builtin.modinfo Currently, modules.builtin.modinfo is created with executable permissions (0755). This is because after commit 39cfd5b12160 ("kbuild: extract modules.builtin.modinfo from vmlinux.unstripped"), modules.builtin.modinfo is extracted from vmlinux.unstripped using objcopy. When extracting sections, objcopy inherits attributes from the source ELF file. Since modules.builtin.modinfo is a data file and not an executable, it should have regular file permissions (0644). The executable bit can trigger warnings in Debian's Lintian tool. Explicitly remove the executable bit after generation. Fixes: 39cfd5b12160 ("kbuild: extract modules.builtin.modinfo from vmlinux.unstripped") Signed-off-by: Ethan Zuo Link: https://patch.msgid.link/SY0P300MB0609F6916B24ADF65502940B9C91A@SY0P300MB0609.AUSP300.PROD.OUTLOOK.COM Signed-off-by: Nicolas Schier --- scripts/Makefile.vmlinux | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index cd788cac9d91..276c3134a563 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -113,7 +113,8 @@ vmlinux: vmlinux.unstripped FORCE # what kmod expects to parse. quiet_cmd_modules_builtin_modinfo = GEN $@ cmd_modules_builtin_modinfo = $(cmd_objcopy); \ - sed -i 's/\x00\+$$/\x00/g' $@ + sed -i 's/\x00\+$$/\x00/g' $@; \ + chmod -x $@ OBJCOPYFLAGS_modules.builtin.modinfo := -j .modinfo -O binary From fe747d7112283f47169e9c16e751179a9b38611e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 26 Jan 2026 21:02:40 +0100 Subject: [PATCH 109/195] platform/x86: classmate-laptop: Add missing NULL pointer checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a few places in the Classmate laptop driver, code using the accel object may run before that object's address is stored in the driver data of the input device using it. For example, cmpc_accel_sensitivity_store_v4() is the "show" method of cmpc_accel_sensitivity_attr_v4 which is added in cmpc_accel_add_v4(), before calling dev_set_drvdata() for inputdev->dev. If the sysfs attribute is accessed prematurely, the dev_get_drvdata(&inputdev->dev) call in in cmpc_accel_sensitivity_store_v4() returns NULL which leads to a NULL pointer dereference going forward. Moreover, sysfs attributes using the input device are added before initializing that device by cmpc_add_acpi_notify_device() and if one of them is accessed before running that function, a NULL pointer dereference will occur. For example, cmpc_accel_sensitivity_attr_v4 is added before calling cmpc_add_acpi_notify_device() and if it is read prematurely, the dev_get_drvdata(&acpi->dev) call in cmpc_accel_sensitivity_show_v4() returns NULL which leads to a NULL pointer dereference going forward. Fix this by adding NULL pointer checks in all of the relevant places. Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/12825381.O9o76ZdvQC@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/classmate-laptop.c | 32 +++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/platform/x86/classmate-laptop.c b/drivers/platform/x86/classmate-laptop.c index 6b1b8e444e24..74d3eb83f56a 100644 --- a/drivers/platform/x86/classmate-laptop.c +++ b/drivers/platform/x86/classmate-laptop.c @@ -207,7 +207,12 @@ static ssize_t cmpc_accel_sensitivity_show_v4(struct device *dev, acpi = to_acpi_device(dev); inputdev = dev_get_drvdata(&acpi->dev); + if (!inputdev) + return -ENXIO; + accel = dev_get_drvdata(&inputdev->dev); + if (!accel) + return -ENXIO; return sysfs_emit(buf, "%d\n", accel->sensitivity); } @@ -224,7 +229,12 @@ static ssize_t cmpc_accel_sensitivity_store_v4(struct device *dev, acpi = to_acpi_device(dev); inputdev = dev_get_drvdata(&acpi->dev); + if (!inputdev) + return -ENXIO; + accel = dev_get_drvdata(&inputdev->dev); + if (!accel) + return -ENXIO; r = kstrtoul(buf, 0, &sensitivity); if (r) @@ -256,7 +266,12 @@ static ssize_t cmpc_accel_g_select_show_v4(struct device *dev, acpi = to_acpi_device(dev); inputdev = dev_get_drvdata(&acpi->dev); + if (!inputdev) + return -ENXIO; + accel = dev_get_drvdata(&inputdev->dev); + if (!accel) + return -ENXIO; return sysfs_emit(buf, "%d\n", accel->g_select); } @@ -273,7 +288,12 @@ static ssize_t cmpc_accel_g_select_store_v4(struct device *dev, acpi = to_acpi_device(dev); inputdev = dev_get_drvdata(&acpi->dev); + if (!inputdev) + return -ENXIO; + accel = dev_get_drvdata(&inputdev->dev); + if (!accel) + return -ENXIO; r = kstrtoul(buf, 0, &g_select); if (r) @@ -302,6 +322,8 @@ static int cmpc_accel_open_v4(struct input_dev *input) acpi = to_acpi_device(input->dev.parent); accel = dev_get_drvdata(&input->dev); + if (!accel) + return -ENXIO; cmpc_accel_set_sensitivity_v4(acpi->handle, accel->sensitivity); cmpc_accel_set_g_select_v4(acpi->handle, accel->g_select); @@ -549,7 +571,12 @@ static ssize_t cmpc_accel_sensitivity_show(struct device *dev, acpi = to_acpi_device(dev); inputdev = dev_get_drvdata(&acpi->dev); + if (!inputdev) + return -ENXIO; + accel = dev_get_drvdata(&inputdev->dev); + if (!accel) + return -ENXIO; return sysfs_emit(buf, "%d\n", accel->sensitivity); } @@ -566,7 +593,12 @@ static ssize_t cmpc_accel_sensitivity_store(struct device *dev, acpi = to_acpi_device(dev); inputdev = dev_get_drvdata(&acpi->dev); + if (!inputdev) + return -ENXIO; + accel = dev_get_drvdata(&inputdev->dev); + if (!accel) + return -ENXIO; r = kstrtoul(buf, 0, &sensitivity); if (r) From 5815d9303c67cef5f47cd01e73b671e6b9c40ef3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 24 Jan 2026 17:00:21 -0400 Subject: [PATCH 110/195] iommupt: Only cache flush memory changed by unmap The cache flush was happening on every level across the whole range of iteration, even if no leafs or tables were cleared. Instead flush only the sub range that was actually written. Overflushing isn't a correctness problem but it does impact the performance of unmap. After this series the performance compared to the original VT-d implementation with cache flushing turned on is: map_pages pgsz ,avg new,old ns, min new,old ns , min % (+ve is better) 2^12, 253,266 , 213,227 , 6.06 2^21, 246,244 , 221,219 , 0.00 2^30, 231,240 , 209,217 , 3.03 256*2^12, 2604,2668 , 2415,2540 , 4.04 256*2^21, 2495,2824 , 2390,2734 , 12.12 256*2^30, 2542,2845 , 2380,2718 , 12.12 unmap_pages pgsz ,avg new,old ns, min new,old ns , min % (+ve is better) 2^12, 259,292 , 222,251 , 11.11 2^21, 255,259 , 227,236 , 3.03 2^30, 238,254 , 217,230 , 5.05 256*2^12, 2751,2620 , 2417,2437 , 0.00 256*2^21, 2461,2526 , 2377,2423 , 1.01 256*2^30, 2498,2543 , 2370,2404 , 1.01 Fixes: efa03dab7ce4 ("iommupt: Flush the CPU cache after any writes to the page table") Reported-by: Francois Dugast Closes: https://lore.kernel.org/all/20260121130233.257428-1-francois.dugast@intel.com/ Signed-off-by: Jason Gunthorpe Reviewed-by: Lu Baolu Tested-by: Francois Dugast Reviewed-by: Kevin Tian Signed-off-by: Joerg Roedel --- drivers/iommu/generic_pt/iommu_pt.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h index 52ef028ed2db..d575f3ba9d34 100644 --- a/drivers/iommu/generic_pt/iommu_pt.h +++ b/drivers/iommu/generic_pt/iommu_pt.h @@ -931,6 +931,8 @@ static __maybe_unused int __unmap_range(struct pt_range *range, void *arg, struct pt_table_p *table) { struct pt_state pts = pt_init(range, level, table); + unsigned int flush_start_index = UINT_MAX; + unsigned int flush_end_index = UINT_MAX; struct pt_unmap_args *unmap = arg; unsigned int num_oas = 0; unsigned int start_index; @@ -986,6 +988,9 @@ static __maybe_unused int __unmap_range(struct pt_range *range, void *arg, iommu_pages_list_add(&unmap->free_list, pts.table_lower); pt_clear_entries(&pts, ilog2(1)); + if (pts.index < flush_start_index) + flush_start_index = pts.index; + flush_end_index = pts.index + 1; } pts.index++; } else { @@ -999,7 +1004,10 @@ start_oa: num_contig_lg2 = pt_entry_num_contig_lg2(&pts); pt_clear_entries(&pts, num_contig_lg2); num_oas += log2_to_int(num_contig_lg2); + if (pts.index < flush_start_index) + flush_start_index = pts.index; pts.index += log2_to_int(num_contig_lg2); + flush_end_index = pts.index; } if (pts.index >= pts.end_index) break; @@ -1007,7 +1015,8 @@ start_oa: } while (true); unmap->unmapped += log2_mul(num_oas, pt_table_item_lg2sz(&pts)); - flush_writes_range(&pts, start_index, pts.index); + if (flush_start_index != flush_end_index) + flush_writes_range(&pts, flush_start_index, flush_end_index); return ret; } From e64d1cb21a1c6ecd51bc1c94c83f6fc656f7c94d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 28 Jan 2026 10:58:54 +0100 Subject: [PATCH 111/195] gpiolib: acpi: Fix potential out-of-boundary left shift GPIO Address Space handler gets a pointer to the in or out value. This value is supposed to be at least 64-bit, but it's not limited to be exactly 64-bit. When ACPI tables are being parsed, for the bigger Connection():s ACPICA creates a Buffer instead of regular Integer object. The Buffer exists as long as Namespace holds the certain Connection(). Hence we can access the necessary bits without worrying. On the other hand, the left shift, used in the code, is limited by 31 (on 32-bit platforms) and otherwise considered to be Undefined Behaviour. Also the code uses only the first 64-bit word for the value, and anything bigger than 63 will be also subject to UB. Fix all this by modifying the code to correctly set or clear the respective bit in the bitmap constructed of 64-bit words. Fixes: 59084c564c41 ("gpiolib: acpi: use BIT_ULL() for u64 mask in address space handler") Fixes: 2c4d00cb8fc5 ("gpiolib: acpi: Use BIT() macro to increase readability") Cc: stable@vger.kernel.org Reviewed-by: Mika Westerberg Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20260128095918.4157491-1-andriy.shevchenko@linux.intel.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-acpi-core.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi-core.c b/drivers/gpio/gpiolib-acpi-core.c index d42f769eeb11..9627b3a9c7f3 100644 --- a/drivers/gpio/gpiolib-acpi-core.c +++ b/drivers/gpio/gpiolib-acpi-core.c @@ -1104,6 +1104,7 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address, unsigned int pin = agpio->pin_table[i]; struct acpi_gpio_connection *conn; struct gpio_desc *desc; + u16 word, shift; bool found; mutex_lock(&achip->conn_lock); @@ -1158,10 +1159,22 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address, mutex_unlock(&achip->conn_lock); - if (function == ACPI_WRITE) - gpiod_set_raw_value_cansleep(desc, !!(*value & BIT_ULL(i))); - else - *value |= (u64)gpiod_get_raw_value_cansleep(desc) << i; + /* + * For the cases when OperationRegion() consists of more than + * 64 bits calculate the word and bit shift to use that one to + * access the value. + */ + word = i / 64; + shift = i % 64; + + if (function == ACPI_WRITE) { + gpiod_set_raw_value_cansleep(desc, value[word] & BIT_ULL(shift)); + } else { + if (gpiod_get_raw_value_cansleep(desc)) + value[word] |= BIT_ULL(shift); + else + value[word] &= ~BIT_ULL(shift); + } } out: From a54afbc8a2138f8c2490510cf26cde188d480c43 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 27 Jan 2026 20:59:06 +0100 Subject: [PATCH 112/195] nvme-pci: DMA unmap the correct regions in nvme_free_sgls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call to nvme_free_sgls() in nvme_unmap_data() has the sg_list and sge parameters swapped. This wasn't noticed by the compiler because both share the same type. On a Xen PV hardware domain, and possibly any other architectures that takes that path, this leads to corruption of the NVMe contents. Fixes: f0887e2a52d4 ("nvme-pci: create common sgl unmapping helper") Reviewed-by: Christoph Hellwig Signed-off-by: Roger Pau Monné Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 58f3097888a7..c2bee32332fe 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -806,8 +806,8 @@ static void nvme_unmap_data(struct request *req) if (!blk_rq_dma_unmap(req, dma_dev, &iod->dma_state, iod->total_len, map)) { if (nvme_pci_cmd_use_sgl(&iod->cmd)) - nvme_free_sgls(req, iod->descriptors[0], - &iod->cmd.common.dptr.sgl, attrs); + nvme_free_sgls(req, &iod->cmd.common.dptr.sgl, + iod->descriptors[0], attrs); else nvme_free_prps(req, attrs); } From f2090ebdb59d0546cbd7b55d9dd63a77133efc03 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Sat, 22 Nov 2025 19:49:56 +0100 Subject: [PATCH 113/195] soc: qcom: smem: fix qcom_smem_is_available and check if __smem is valid Commit 7a94d5f31b54 ("soc: qcom: smem: better track SMEM uninitialized state") changed the usage of __smem and init now as an error pointer instead of NULL. qcom_smem_is_available() wasn't updated to reflect this change and also .qcom_smem_remove doesn't reset it on module exit. Update both entry to reflect new handling of __smem. Fixes: 7a94d5f31b54 ("soc: qcom: smem: better track SMEM uninitialized state") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/aSAnR3ECa04CoPqp@stanley.mountain/ Signed-off-by: Christian Marangi Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20251122185002.26524-1-ansuelsmth@gmail.com Signed-off-by: Bjorn Andersson --- drivers/soc/qcom/smem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/soc/qcom/smem.c b/drivers/soc/qcom/smem.c index fef840b54574..c18a0c946f76 100644 --- a/drivers/soc/qcom/smem.c +++ b/drivers/soc/qcom/smem.c @@ -396,7 +396,7 @@ EXPORT_SYMBOL_GPL(qcom_smem_bust_hwspin_lock_by_host); */ bool qcom_smem_is_available(void) { - return !!__smem; + return !IS_ERR(__smem); } EXPORT_SYMBOL_GPL(qcom_smem_is_available); @@ -1247,7 +1247,8 @@ static void qcom_smem_remove(struct platform_device *pdev) { platform_device_unregister(__smem->socinfo); - __smem = NULL; + /* Set to -EPROBE_DEFER to signal unprobed state */ + __smem = ERR_PTR(-EPROBE_DEFER); } static const struct of_device_id qcom_smem_of_match[] = { From 2724138b2f7f6299812b3404e23b124304834759 Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Sat, 24 Jan 2026 18:52:14 +0530 Subject: [PATCH 114/195] iommufd: Initialize batch->kind in batch_clear() KMSAN reported an uninitialized value when batch_add_pfn_num() reads batch->kind. This occurs because batch_clear() does not initialize the kind field. When batch_add_pfn_num() checks "if (batch->kind != kind)", it reads this uninitialized value, triggering KMSAN warnings. However the algorithm is fine with any value in kind at this point as the batch is always empty and it always corrects kind if wrong. Initialize batch->kind to zero in batch_clear() to silence the KMSAN warning. Link: https://patch.msgid.link/r/20260124132214.624041-1-kartikey406@gmail.com Reported-by: syzbot+df28076a30d726933015@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=df28076a30d726933015 Fixes: f394576eb11db ("iommufd: PFN handling for iopt_pages") Tested-by: syzbot+df28076a30d726933015@syzkaller.appspotmail.com Signed-off-by: Deepanshu Kartikey Reviewed-by: Kevin Tian Tested-by: syzbot+a0c841e02f328005bbcc@syzkaller.appspotmail.com Reported-by: syzbot+a0c841e02f328005bbcc@syzkaller.appspotmail.com Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/pages.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index dbe51ecb9a20..f606148920fa 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -289,6 +289,7 @@ static void batch_clear(struct pfn_batch *batch) batch->end = 0; batch->pfns[0] = 0; batch->npfns[0] = 0; + batch->kind = 0; } /* From acecfee88564d8d6c11bc23c9b7fff89cd010314 Mon Sep 17 00:00:00 2001 From: Ivan Lipski Date: Fri, 16 Jan 2026 10:03:54 -0500 Subject: [PATCH 115/195] drm/amd/display: Clear HDMI HPD pending work only if it is enabled [Why&How] On amdgpu_dm_connector_destroy(), the driver attempts to cancel pending HDMI HPD work without checking if the HDMI HPD is enabled. Added a check that it is enabled before clearing it. Fixes: 6a681cd90345 ("drm/amd/display: Add an hdmi_hpd_debounce_delay_ms module") Signed-off-by: Ivan Lipski Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Alex Deucher (cherry picked from commit 17b2c526fd8026d8e0f4c0e7f94fc517e3901589) --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 1ea5a250440f..a8a59126b2d2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7754,10 +7754,12 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector) drm_dp_mst_topology_mgr_destroy(&aconnector->mst_mgr); /* Cancel and flush any pending HDMI HPD debounce work */ - cancel_delayed_work_sync(&aconnector->hdmi_hpd_debounce_work); - if (aconnector->hdmi_prev_sink) { - dc_sink_release(aconnector->hdmi_prev_sink); - aconnector->hdmi_prev_sink = NULL; + if (aconnector->hdmi_hpd_debounce_delay_ms) { + cancel_delayed_work_sync(&aconnector->hdmi_hpd_debounce_work); + if (aconnector->hdmi_prev_sink) { + dc_sink_release(aconnector->hdmi_prev_sink); + aconnector->hdmi_prev_sink = NULL; + } } if (aconnector->bl_idx != -1) { From e7fbff9e7622a00c2b53cb14df481916f0019742 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 16 Jan 2026 17:33:05 -0500 Subject: [PATCH 116/195] drm/amdgpu/soc21: fix xclk for APUs The reference clock is supposed to be 100Mhz, but it appears to actually be slightly lower (99.81Mhz). Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/14451 Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher (cherry picked from commit 637fee3954d4bd509ea9d95ad1780fc174489860) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/soc21.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index ad36c96478a8..25536d89635d 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -225,7 +225,13 @@ static u32 soc21_get_config_memsize(struct amdgpu_device *adev) static u32 soc21_get_xclk(struct amdgpu_device *adev) { - return adev->clock.spll.reference_freq; + u32 reference_clock = adev->clock.spll.reference_freq; + + /* reference clock is actually 99.81 Mhz rather than 100 Mhz */ + if ((adev->flags & AMD_IS_APU) && reference_clock == 10000) + return 9981; + + return reference_clock; } From b1defcdc4457649db236415ee618a7151e28788c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 26 Jan 2026 23:44:45 -0500 Subject: [PATCH 117/195] drm/amdgpu: Fix cond_exec handling in amdgpu_ib_schedule() The EXEC_COUNT field must be > 0. In the gfx shadow handling we always emit a cond_exec packet after the gfx_shadow packet, but the EXEC_COUNT never gets patched. This leads to a hang when we try and reset queues on gfx11 APUs. Fixes: c68cbbfd54c6 ("drm/amdgpu: cleanup conditional execution") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4789 Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher (cherry picked from commit ba205ac3d6e83f56c4f824f23f1b4522cb844ff3) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 72ec455fa932..44f230d67da2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -235,7 +235,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, amdgpu_ring_ib_begin(ring); - if (ring->funcs->emit_gfx_shadow) + if (ring->funcs->emit_gfx_shadow && adev->gfx.cp_gfx_shadow) amdgpu_ring_emit_gfx_shadow(ring, shadow_va, csa_va, gds_va, init_shadow, vmid); @@ -291,7 +291,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, fence_flags | AMDGPU_FENCE_FLAG_64BIT); } - if (ring->funcs->emit_gfx_shadow && ring->funcs->init_cond_exec) { + if (ring->funcs->emit_gfx_shadow && ring->funcs->init_cond_exec && + adev->gfx.cp_gfx_shadow) { amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0); amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr); } From 20e01bba2ae4898ce65cdcacd1bd6bec5111abd9 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Wed, 28 Jan 2026 07:34:13 +0900 Subject: [PATCH 118/195] firewire: core: fix race condition against transaction list The list of transaction is enumerated without acquiring card lock when processing AR response event. This causes a race condition bug when processing AT request completion event concurrently. This commit fixes the bug by put timer start for split transaction expiration into the scope of lock. The value of jiffies in card structure is referred before acquiring the lock. Cc: stable@vger.kernel.org # v6.18 Fixes: b5725cfa4120 ("firewire: core: use spin lock specific to timer for split transaction") Reported-by: Andreas Persson Closes: https://github.com/alsa-project/snd-firewire-ctl-services/issues/209 Tested-by: Andreas Persson Link: https://lore.kernel.org/r/20260127223413.22265-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 7fea11a5e359..22ae387ae03c 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -173,20 +173,14 @@ static void split_transaction_timeout_callback(struct timer_list *timer) } } -static void start_split_transaction_timeout(struct fw_transaction *t, - struct fw_card *card) +// card->transactions.lock should be acquired in advance for the linked list. +static void start_split_transaction_timeout(struct fw_transaction *t, unsigned int delta) { - unsigned long delta; - if (list_empty(&t->link) || WARN_ON(t->is_split_transaction)) return; t->is_split_transaction = true; - // NOTE: This can be without irqsave when we can guarantee that __fw_send_request() for - // local destination never runs in any type of IRQ context. - scoped_guard(spinlock_irqsave, &card->split_timeout.lock) - delta = card->split_timeout.jiffies; mod_timer(&t->split_timeout_timer, jiffies + delta); } @@ -207,13 +201,20 @@ static void transmit_complete_callback(struct fw_packet *packet, break; case ACK_PENDING: { + unsigned int delta; + // NOTE: This can be without irqsave when we can guarantee that __fw_send_request() for // local destination never runs in any type of IRQ context. scoped_guard(spinlock_irqsave, &card->split_timeout.lock) { t->split_timeout_cycle = compute_split_timeout_timestamp(card, packet->timestamp) & 0xffff; + delta = card->split_timeout.jiffies; } - start_split_transaction_timeout(t, card); + + // NOTE: This can be without irqsave when we can guarantee that __fw_send_request() for + // local destination never runs in any type of IRQ context. + scoped_guard(spinlock_irqsave, &card->transactions.lock) + start_split_transaction_timeout(t, delta); break; } case ACK_BUSY_X: From 0fd17e5983337231dc655e9ca0095d2ca3f47405 Mon Sep 17 00:00:00 2001 From: Oreoluwa Babatunde Date: Mon, 26 Jan 2026 18:13:27 +0100 Subject: [PATCH 119/195] of: reserved_mem: Allow reserved_mem framework detect "cma=" kernel param When initializing the default cma region, the "cma=" kernel parameter takes priority over a DT defined linux,cma-default region. Hence, give the reserved_mem framework the ability to detect this so that the DT defined cma region can skip initialization accordingly. Signed-off-by: Oreoluwa Babatunde Tested-by: Joy Zou Acked-by: Rob Herring (Arm) Fixes: 8a6e02d0c00e ("of: reserved_mem: Restructure how the reserved memory regions are processed") Fixes: 2c223f7239f3 ("of: reserved_mem: Restructure call site for dma_contiguous_early_fixup()") Link: https://lore.kernel.org/r/20251210002027.1171519-1-oreoluwa.babatunde@oss.qualcomm.com [mszyprow: rebased onto v6.19-rc1, added fixes tags, added a stub for cma_skip_dt_default_reserved_mem() if no CONFIG_DMA_CMA is set] Signed-off-by: Marek Szyprowski --- drivers/of/of_reserved_mem.c | 19 +++++++++++++++++-- include/linux/cma.h | 9 +++++++++ kernel/dma/contiguous.c | 16 ++++++++++------ 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 5619ec917858..a2a13617c6f4 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -157,13 +157,19 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, phys_addr_t base, size; int i, len; const __be32 *prop; - bool nomap; + bool nomap, default_cma; prop = of_flat_dt_get_addr_size_prop(node, "reg", &len); if (!prop) return -ENOENT; nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL; + default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL); + + if (default_cma && cma_skip_dt_default_reserved_mem()) { + pr_err("Skipping dt linux,cma-default for \"cma=\" kernel param.\n"); + return -EINVAL; + } for (i = 0; i < len; i++) { u64 b, s; @@ -248,10 +254,13 @@ void __init fdt_scan_reserved_mem_reg_nodes(void) fdt_for_each_subnode(child, fdt, node) { const char *uname; + bool default_cma = of_get_flat_dt_prop(child, "linux,cma-default", NULL); u64 b, s; if (!of_fdt_device_is_available(fdt, child)) continue; + if (default_cma && cma_skip_dt_default_reserved_mem()) + continue; if (!of_flat_dt_get_addr_size(child, "reg", &b, &s)) continue; @@ -389,7 +398,7 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam phys_addr_t base = 0, align = 0, size; int i, len; const __be32 *prop; - bool nomap; + bool nomap, default_cma; int ret; prop = of_get_flat_dt_prop(node, "size", &len); @@ -413,6 +422,12 @@ static int __init __reserved_mem_alloc_size(unsigned long node, const char *unam } nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL; + default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL); + + if (default_cma && cma_skip_dt_default_reserved_mem()) { + pr_err("Skipping dt linux,cma-default for \"cma=\" kernel param.\n"); + return -EINVAL; + } /* Need adjust the alignment to satisfy the CMA requirement */ if (IS_ENABLED(CONFIG_CMA) diff --git a/include/linux/cma.h b/include/linux/cma.h index 62d9c1cf6326..2e6931735880 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -57,6 +57,15 @@ extern bool cma_intersects(struct cma *cma, unsigned long start, unsigned long e extern void cma_reserve_pages_on_error(struct cma *cma); +#ifdef CONFIG_DMA_CMA +extern bool cma_skip_dt_default_reserved_mem(void); +#else +static inline bool cma_skip_dt_default_reserved_mem(void) +{ + return false; +} +#endif + #ifdef CONFIG_CMA struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp); bool cma_free_folio(struct cma *cma, const struct folio *folio); diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index d8fd6f779f79..0e266979728b 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -91,6 +91,16 @@ static int __init early_cma(char *p) } early_param("cma", early_cma); +/* + * cma_skip_dt_default_reserved_mem - This is called from the + * reserved_mem framework to detect if the default cma region is being + * set by the "cma=" kernel parameter. + */ +bool __init cma_skip_dt_default_reserved_mem(void) +{ + return size_cmdline != -1; +} + #ifdef CONFIG_DMA_NUMA_CMA static struct cma *dma_contiguous_numa_area[MAX_NUMNODES]; @@ -470,12 +480,6 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem) struct cma *cma; int err; - if (size_cmdline != -1 && default_cma) { - pr_info("Reserved memory: bypass %s node, using cmdline CMA params instead\n", - rmem->name); - return -EBUSY; - } - if (!of_get_flat_dt_prop(node, "reusable", NULL) || of_get_flat_dt_prop(node, "no-map", NULL)) return -EINVAL; From 6ea84d7a92cb0b30aaf7d2066a69e28e27932332 Mon Sep 17 00:00:00 2001 From: Shida Zhang Date: Tue, 27 Jan 2026 16:21:11 +0800 Subject: [PATCH 120/195] bcache: remove dead code in detached_dev_do_request bio_alloc_clone() with GFP_NOIO and a mempool will not return NULL. Remove the unnecessary NULL check. Suggested-by: Christoph Hellwig Signed-off-by: Shida Zhang Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/md/bcache/request.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index a02aecac05cd..c2f38907a2a3 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1113,11 +1113,6 @@ static void detached_dev_do_request(struct bcache_device *d, clone_bio = bio_alloc_clone(dc->bdev, orig_bio, GFP_NOIO, &d->bio_detached); - if (!clone_bio) { - orig_bio->bi_status = BLK_STS_RESOURCE; - bio_endio(orig_bio); - return; - } ddip = container_of(clone_bio, struct detached_dev_io_private, bio); /* Count on the bcache device */ From 4da7c5c3ec34d839bba6e035c3d05c447a2f9d4f Mon Sep 17 00:00:00 2001 From: Shida Zhang Date: Tue, 27 Jan 2026 16:21:12 +0800 Subject: [PATCH 121/195] bcache: fix I/O accounting leak in detached_dev_do_request When a bcache device is detached, discard requests are completed immediately. However, the I/O accounting started in cached_dev_make_request() is not ended, leading to 100% disk utilization reports in iostat. Add the missing bio_end_io_acct() call. Fixes: cafe56359144 ("bcache: A block layer cache") Signed-off-by: Shida Zhang Acked-by: Coly Li Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/md/bcache/request.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index c2f38907a2a3..3fa3b13a410f 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1107,6 +1107,7 @@ static void detached_dev_do_request(struct bcache_device *d, if (bio_op(orig_bio) == REQ_OP_DISCARD && !bdev_max_discard_sectors(dc->bdev)) { + bio_end_io_acct(orig_bio, start_time); bio_endio(orig_bio); return; } From 0ea05c4f7527a98f5946f96c829733788934311d Mon Sep 17 00:00:00 2001 From: Han Gao Date: Wed, 28 Jan 2026 03:07:11 +0800 Subject: [PATCH 122/195] riscv: compat: fix COMPAT_UTS_MACHINE definition The COMPAT_UTS_MACHINE for riscv was incorrectly defined as "riscv". Change it to "riscv32" to reflect the correct 32-bit compat name. Fixes: 06d0e3723647 ("riscv: compat: Add basic compat data type implementation") Cc: stable@vger.kernel.org Signed-off-by: Han Gao Reviewed-by: Guo Ren (Alibaba Damo Academy) Link: https://patch.msgid.link/20260127190711.2264664-1-gaohan@iscas.ac.cn Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/compat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/compat.h b/arch/riscv/include/asm/compat.h index 6081327e55f5..28e115eed218 100644 --- a/arch/riscv/include/asm/compat.h +++ b/arch/riscv/include/asm/compat.h @@ -2,7 +2,7 @@ #ifndef __ASM_COMPAT_H #define __ASM_COMPAT_H -#define COMPAT_UTS_MACHINE "riscv\0\0" +#define COMPAT_UTS_MACHINE "riscv32\0\0" /* * Architecture specific compatibility types From c7e45aa0a91f27a63fc9adc44385cfbcdb48eec1 Mon Sep 17 00:00:00 2001 From: Yushan Wang Date: Wed, 28 Jan 2026 19:56:31 +0000 Subject: [PATCH 123/195] MAINTAINERS: Add myself as maintainer of hisi_soc_hha Add myself as maintainer of HiSilicon SoC HHA driver as I will be responsible for this driver. Signed-off-by: Yushan Wang Acked-by: Jonathan Cameron Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20260128-audition-professed-31d1b66d5b4e@spud Signed-off-by: Arnd Bergmann --- MAINTAINERS | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 151a0baca2a9..62fe38a771a5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11376,6 +11376,11 @@ F: Documentation/ABI/testing/sysfs-devices-platform-kunpeng_hccs F: drivers/soc/hisilicon/kunpeng_hccs.c F: drivers/soc/hisilicon/kunpeng_hccs.h +HISILICON SOC HHA DRIVER +M: Yushan Wang +S: Maintained +F: drivers/cache/hisi_soc_hha.c + HISILICON LPC BUS DRIVER M: Jay Fang S: Maintained From 59e82a4237cf39be697f25f2da26f4bee3007826 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Sun, 25 Jan 2026 20:15:45 +0000 Subject: [PATCH 124/195] MAINTAINERS: Change Sudeep Holla's email address Switch to my kernel.org email address going forward. Signed-off-by: Sudeep Holla Link: https://lore.kernel.org/r/20260125201545.3461463-1-sudeep.holla@kernel.org Signed-off-by: Arnd Bergmann --- .mailmap | 3 ++- MAINTAINERS | 22 +++++++++++----------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/.mailmap b/.mailmap index 428d721ffbb1..99c7ac8454b0 100644 --- a/.mailmap +++ b/.mailmap @@ -786,7 +786,8 @@ Subash Abhinov Kasiviswanathan Subhash Jadavani Sudarshan Rajagopalan -Sudeep Holla Sudeep KarkadaNagesha +Sudeep Holla Sudeep KarkadaNagesha +Sudeep Holla Sumit Garg Sumit Semwal Surabhi Vishnoi diff --git a/MAINTAINERS b/MAINTAINERS index 62fe38a771a5..3344b5c14016 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -335,7 +335,7 @@ F: tools/power/acpi/ ACPI FOR ARM64 (ACPI/arm64) M: Lorenzo Pieralisi M: Hanjun Guo -M: Sudeep Holla +M: Sudeep Holla L: linux-acpi@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@ -351,7 +351,7 @@ F: drivers/acpi/riscv/ F: include/linux/acpi_rimt.h ACPI PCC(Platform Communication Channel) MAILBOX DRIVER -M: Sudeep Holla +M: Sudeep Holla L: linux-acpi@vger.kernel.org S: Supported F: drivers/mailbox/pcc.c @@ -3681,7 +3681,7 @@ N: uniphier ARM/VERSATILE EXPRESS PLATFORM M: Liviu Dudau -M: Sudeep Holla +M: Sudeep Holla M: Lorenzo Pieralisi L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@ -6513,7 +6513,7 @@ F: drivers/i2c/busses/i2c-cp2615.c CPU FREQUENCY DRIVERS - VEXPRESS SPC ARM BIG LITTLE M: Viresh Kumar -M: Sudeep Holla +M: Sudeep Holla L: linux-pm@vger.kernel.org S: Maintained W: http://www.arm.com/products/processors/technologies/biglittleprocessing.php @@ -6609,7 +6609,7 @@ F: include/linux/platform_data/cpuidle-exynos.h CPUIDLE DRIVER - ARM PSCI M: Lorenzo Pieralisi -M: Sudeep Holla +M: Sudeep Holla M: Ulf Hansson L: linux-pm@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@ -9816,7 +9816,7 @@ F: include/uapi/linux/firewire*.h F: tools/firewire/ FIRMWARE FRAMEWORK FOR ARMV8-A -M: Sudeep Holla +M: Sudeep Holla L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: drivers/firmware/arm_ffa/ @@ -10514,7 +10514,7 @@ S: Maintained F: scripts/gendwarfksyms/ GENERIC ARCHITECTURE TOPOLOGY -M: Sudeep Holla +M: Sudeep Holla L: linux-kernel@vger.kernel.org S: Maintained F: drivers/base/arch_topology.c @@ -15106,7 +15106,7 @@ F: drivers/mailbox/arm_mhuv2.c F: include/linux/mailbox/arm_mhuv2_message.h MAILBOX ARM MHUv3 -M: Sudeep Holla +M: Sudeep Holla M: Cristian Marussi L: linux-kernel@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@ -23636,7 +23636,7 @@ F: include/uapi/linux/sed* SECURE MONITOR CALL(SMC) CALLING CONVENTION (SMCCC) M: Mark Rutland M: Lorenzo Pieralisi -M: Sudeep Holla +M: Sudeep Holla L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: drivers/firmware/smccc/ @@ -25400,7 +25400,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd.git F: drivers/mfd/syscon.c SYSTEM CONTROL & POWER/MANAGEMENT INTERFACE (SCPI/SCMI) Message Protocol drivers -M: Sudeep Holla +M: Sudeep Holla R: Cristian Marussi L: arm-scmi@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@ -26552,7 +26552,7 @@ F: samples/tsm-mr/ TRUSTED SERVICES TEE DRIVER M: Balint Dobszay -M: Sudeep Holla +M: Sudeep Holla L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: trusted-services@lists.trustedfirmware.org S: Maintained From 56c430c7f06d838fe3b2077dbbc4cc0bf992312b Mon Sep 17 00:00:00 2001 From: Sai Sree Kartheek Adivi Date: Wed, 28 Jan 2026 19:05:54 +0530 Subject: [PATCH 125/195] dma/pool: distinguish between missing and exhausted atomic pools Currently, dma_alloc_from_pool() unconditionally warns and dumps a stack trace when an allocation fails, with the message "Failed to get suitable pool". This conflates two distinct failure modes: 1. Configuration error: No atomic pool is available for the requested DMA mask (a fundamental system setup issue) 2. Resource Exhaustion: A suitable pool exists but is currently full (a recoverable runtime state) This lack of distinction prevents drivers from using __GFP_NOWARN to suppress error messages during temporary pressure spikes, such as when awaiting synchronous reclaim of descriptors. Refactor the error handling to distinguish these cases: - If no suitable pool is found, keep the unconditional WARN regarding the missing pool. - If a pool was found but is exhausted, respect __GFP_NOWARN and update the warning message to explicitly state "DMA pool exhausted". Fixes: 9420139f516d ("dma-pool: fix coherent pool allocations for IOMMU mappings") Signed-off-by: Sai Sree Kartheek Adivi Reviewed-by: Robin Murphy Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20260128133554.3056582-1-s-adivi@ti.com --- kernel/dma/pool.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index c5da29ad010c..2b2fbb709242 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -277,15 +277,20 @@ struct page *dma_alloc_from_pool(struct device *dev, size_t size, { struct gen_pool *pool = NULL; struct page *page; + bool pool_found = false; while ((pool = dma_guess_pool(pool, gfp))) { + pool_found = true; page = __dma_alloc_from_pool(dev, size, pool, cpu_addr, phys_addr_ok); if (page) return page; } - WARN(1, "Failed to get suitable pool for %s\n", dev_name(dev)); + if (pool_found) + WARN(!(gfp & __GFP_NOWARN), "DMA pool exhausted for %s\n", dev_name(dev)); + else + WARN(1, "Failed to get suitable pool for %s\n", dev_name(dev)); return NULL; } From 94e5baff3ee3cd1a5246c85b0744092eab1c5d42 Mon Sep 17 00:00:00 2001 From: Maciej Strozek Date: Wed, 28 Jan 2026 09:24:05 +0000 Subject: [PATCH 126/195] ASoC: sof_sdw: Add a quirk for Lenovo laptop using sidecar amps with cs42l43 Add a quirk for a Lenovo laptop (SSID: 0x17aa3821) to allow using sidecar CS35L57 amps with CS42L43 codec. Signed-off-by: Maciej Strozek Reviewed-by: Cezary Rojewski Link: https://patch.msgid.link/20260128092410.1540583-1-mstrozek@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 8721a098d53f..50b838be24e9 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -838,6 +838,7 @@ static const struct snd_pci_quirk sof_sdw_ssid_quirk_table[] = { SND_PCI_QUIRK(0x17aa, 0x2347, "Lenovo P16", SOC_SDW_CODEC_MIC), SND_PCI_QUIRK(0x17aa, 0x2348, "Lenovo P16", SOC_SDW_CODEC_MIC), SND_PCI_QUIRK(0x17aa, 0x2349, "Lenovo P1", SOC_SDW_CODEC_MIC), + SND_PCI_QUIRK(0x17aa, 0x3821, "Lenovo 0x3821", SOC_SDW_SIDECAR_AMPS), {} }; From 6222883af286e2feb3c9ff2bf9fd8fdf4220c55a Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 28 Jan 2026 13:04:45 -0600 Subject: [PATCH 127/195] platform/x86: hp-bioscfg: Skip empty attribute names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid registering kobjects with empty names when a BIOS attribute name decodes to an empty string. Fixes: a34fc329b1895 ("platform/x86: hp-bioscfg: bioscfg") Reported-by: Alain Cousinie Closes: https://lore.kernel.org/platform-driver-x86/22ed5f78-c8bf-4ab4-8c38-420cc0201e7e@laposte.net/ Signed-off-by: Mario Limonciello Link: https://patch.msgid.link/20260128190501.2170068-1-mario.limonciello@amd.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/hp/hp-bioscfg/bioscfg.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c index dbe096eefa75..51e8977d3eb4 100644 --- a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c +++ b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c @@ -696,6 +696,11 @@ static int hp_init_bios_package_attribute(enum hp_wmi_data_type attr_type, return ret; } + if (!str_value || !str_value[0]) { + pr_debug("Ignoring attribute with empty name\n"); + goto pack_attr_exit; + } + /* All duplicate attributes found are ignored */ duplicate = kset_find_obj(temp_kset, str_value); if (duplicate) { From 008bec8ffe6e7746588d1e12c5b3865fa478fc91 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Tue, 27 Jan 2026 15:45:40 -0800 Subject: [PATCH 128/195] platform/x86/intel/tpmi/plr: Make the file domain/status writeable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The file sys/kernel/debug/tpmi-/plr/domain/status has store and show callbacks. Make it writeable. Fixes: 811f67c51636d ("platform/x86/intel/tpmi: Add new auxiliary driver for performance limits") Signed-off-by: Ricardo Neri Link: https://patch.msgid.link/20260127-plr-debugfs-write-v1-1-1fffbc370b1e@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/plr_tpmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/plr_tpmi.c b/drivers/platform/x86/intel/plr_tpmi.c index 58132da47745..05727169f49c 100644 --- a/drivers/platform/x86/intel/plr_tpmi.c +++ b/drivers/platform/x86/intel/plr_tpmi.c @@ -316,7 +316,7 @@ static int intel_plr_probe(struct auxiliary_device *auxdev, const struct auxilia snprintf(name, sizeof(name), "domain%d", i); dentry = debugfs_create_dir(name, plr->dbgfs_dir); - debugfs_create_file("status", 0444, dentry, &plr->die_info[i], + debugfs_create_file("status", 0644, dentry, &plr->die_info[i], &plr_status_fops); } From c1ed856c09d0d730c2f63bbb757cb6011db148f9 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Wed, 21 Jan 2026 17:37:51 +0000 Subject: [PATCH 129/195] drm/xe/configfs: Fix is_bound() pci_dev lifetime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move pci_dev_put() after pci_dbg() to avoid using pdev after dropping its reference. Fixes: 2674f1ef29f46 ("drm/xe/configfs: Block runtime attribute changes") Signed-off-by: Shuicheng Lin Reviewed-by: Ashutosh Dixit Signed-off-by: Ashutosh Dixit Link: https://patch.msgid.link/20260121173750.3090907-2-shuicheng.lin@intel.com (cherry picked from commit 63b33604365bdca43dee41bab809da2230491036) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_configfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index 9f6251b1008b..82edd0466005 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -347,11 +347,10 @@ static bool is_bound(struct xe_config_group_device *dev) return false; ret = pci_get_drvdata(pdev); - pci_dev_put(pdev); - if (ret) pci_dbg(pdev, "Already bound to driver\n"); + pci_dev_put(pdev); return ret; } From cc4f433b14e05eaa4a98fd677b836e9229422387 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 28 Jan 2026 20:51:08 -0500 Subject: [PATCH 130/195] drm/amdgpu/gfx10: fix wptr reset in KGQ init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit wptr is a 64 bit value and we need to update the full value, not just 32 bits. Align with what we already do for KCQs. Reviewed-by: Timur Kristóf Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher (cherry picked from commit e80b1d1aa1073230b6c25a1a72e88f37e425ccda) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index d75b9940f248..fc65fb36e115 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6879,7 +6879,7 @@ static int gfx_v10_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset the ring */ ring->wptr = 0; - *ring->wptr_cpu_addr = 0; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); amdgpu_ring_clear_ring(ring); } From b1f810471c6a6bd349f7f9f2f2fed96082056d46 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 28 Jan 2026 18:09:03 -0500 Subject: [PATCH 131/195] drm/amdgpu/gfx11: fix wptr reset in KGQ init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit wptr is a 64 bit value and we need to update the full value, not just 32 bits. Align with what we already do for KCQs. Reviewed-by: Timur Kristóf Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher (cherry picked from commit 1f16866bdb1daed7a80ca79ae2837a9832a74fbc) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 8a2ee2de390f..3b160a67e57a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4201,7 +4201,7 @@ static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset the ring */ ring->wptr = 0; - *ring->wptr_cpu_addr = 0; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); amdgpu_ring_clear_ring(ring); } From 9077d32a4b570fa20500aa26e149981c366c965d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 28 Jan 2026 18:13:16 -0500 Subject: [PATCH 132/195] drm/amdgpu/gfx12: fix wptr reset in KGQ init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit wptr is a 64 bit value and we need to update the full value, not just 32 bits. Align with what we already do for KCQs. Reviewed-by: Timur Kristóf Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher (cherry picked from commit a2918f958d3f677ea93c0ac257cb6ba69b7abb7c) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index b786967022d2..fbc100778f97 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3079,7 +3079,7 @@ static int gfx_v12_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset the ring */ ring->wptr = 0; - *ring->wptr_cpu_addr = 0; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); amdgpu_ring_clear_ring(ring); } From 3eb46fbb601f9a0b4df8eba79252a0a85e983044 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 28 Jan 2026 22:55:46 -0500 Subject: [PATCH 133/195] drm/amdgpu/gfx11: adjust KGQ reset sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kernel gfx queues do not need to be reinitialized or remapped after a reset. This fixes queue reset failures on APUs. v2: preserve init and remap for MMIO case. Fixes: b3e9bfd86658 ("drm/amdgpu/gfx11: add ring reset callbacks") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4789 Reviewed-by: Timur Kristóf Signed-off-by: Alex Deucher (cherry picked from commit b340ff216fdabfe71ba0cdd47e9835a141d08e10) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 3b160a67e57a..e642236ea2c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6823,11 +6823,12 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; + bool use_mmio = false; int r; amdgpu_ring_reset_helper_begin(ring, timedout_fence); - r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, use_mmio); if (r) { dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); @@ -6836,16 +6837,18 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, return r; } - r = gfx_v11_0_kgq_init_queue(ring, true); - if (r) { - dev_err(adev->dev, "failed to init kgq\n"); - return r; - } + if (use_mmio) { + r = gfx_v11_0_kgq_init_queue(ring, true); + if (r) { + dev_err(adev->dev, "failed to init kgq\n"); + return r; + } - r = amdgpu_mes_map_legacy_queue(adev, ring); - if (r) { - dev_err(adev->dev, "failed to remap kgq\n"); - return r; + r = amdgpu_mes_map_legacy_queue(adev, ring); + if (r) { + dev_err(adev->dev, "failed to remap kgq\n"); + return r; + } } return amdgpu_ring_reset_helper_end(ring, timedout_fence); From dfd64f6e8cd7b59238cdaf8af7a55711f13a89db Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 28 Jan 2026 23:05:50 -0500 Subject: [PATCH 134/195] drm/amdgpu/gfx12: adjust KGQ reset sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kernel gfx queues do not need to be reinitialized or remapped after a reset. Align with gfx11. v2: preserve init and remap for MMIO case. Reviewed-by: Timur Kristóf Signed-off-by: Alex Deucher (cherry picked from commit 0a6d6ed694d72b66b0ed7a483d5effa01acd3951) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index fbc100778f97..4aab89a9ab40 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -5297,11 +5297,12 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; + bool use_mmio = false; int r; amdgpu_ring_reset_helper_begin(ring, timedout_fence); - r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, use_mmio); if (r) { dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); r = gfx_v12_reset_gfx_pipe(ring); @@ -5309,16 +5310,18 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, return r; } - r = gfx_v12_0_kgq_init_queue(ring, true); - if (r) { - dev_err(adev->dev, "failed to init kgq\n"); - return r; - } + if (use_mmio) { + r = gfx_v12_0_kgq_init_queue(ring, true); + if (r) { + dev_err(adev->dev, "failed to init kgq\n"); + return r; + } - r = amdgpu_mes_map_legacy_queue(adev, ring); - if (r) { - dev_err(adev->dev, "failed to remap kgq\n"); - return r; + r = amdgpu_mes_map_legacy_queue(adev, ring); + if (r) { + dev_err(adev->dev, "failed to remap kgq\n"); + return r; + } } return amdgpu_ring_reset_helper_end(ring, timedout_fence); From bdde21d3e77da55121885fd2ef42bc6a15ac2f0c Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 29 Jan 2026 13:31:56 -0500 Subject: [PATCH 135/195] lsm: preserve /proc/sys/vm/mmap_min_addr when !CONFIG_SECURITY While reworking the LSM initialization code the /proc/sys/vm/mmap_min_addr handler was inadvertently caught up in the change and the procfs entry wasn't setup when CONFIG_SECURITY was not selected at kernel build time. This patch restores the previous behavior and ensures that the procfs entry is setup regardless of the CONFIG_SECURITY state. Future work will improve upon this, likely by moving the procfs handler into the mm subsystem, but this patch should resolve the immediate regression. Fixes: 4ab5efcc2829 ("lsm: consolidate all of the LSM framework initcalls") Reported-by: Lorenzo Stoakes Reviewed-by: Lorenzo Stoakes Tested-by: Lorenzo Stoakes Reviewed-by: Kees Cook Signed-off-by: Paul Moore --- security/lsm.h | 9 --------- security/lsm_init.c | 7 +------ security/min_addr.c | 5 ++--- 3 files changed, 3 insertions(+), 18 deletions(-) diff --git a/security/lsm.h b/security/lsm.h index 81aadbc61685..db77cc83e158 100644 --- a/security/lsm.h +++ b/security/lsm.h @@ -37,15 +37,6 @@ int lsm_task_alloc(struct task_struct *task); /* LSM framework initializers */ -#ifdef CONFIG_MMU -int min_addr_init(void); -#else -static inline int min_addr_init(void) -{ - return 0; -} -#endif /* CONFIG_MMU */ - #ifdef CONFIG_SECURITYFS int securityfs_init(void); #else diff --git a/security/lsm_init.c b/security/lsm_init.c index 05bd52e6b1f2..573e2a7250c4 100644 --- a/security/lsm_init.c +++ b/security/lsm_init.c @@ -489,12 +489,7 @@ int __init security_init(void) */ static int __init security_initcall_pure(void) { - int rc_adr, rc_lsm; - - rc_adr = min_addr_init(); - rc_lsm = lsm_initcall(pure); - - return (rc_adr ? rc_adr : rc_lsm); + return lsm_initcall(pure); } pure_initcall(security_initcall_pure); diff --git a/security/min_addr.c b/security/min_addr.c index 0fde5ec9abc8..56e4f9d25929 100644 --- a/security/min_addr.c +++ b/security/min_addr.c @@ -5,8 +5,6 @@ #include #include -#include "lsm.h" - /* amount of vm to protect from userspace access by both DAC and the LSM*/ unsigned long mmap_min_addr; /* amount of vm to protect from userspace using CAP_SYS_RAWIO (DAC) */ @@ -54,10 +52,11 @@ static const struct ctl_table min_addr_sysctl_table[] = { }, }; -int __init min_addr_init(void) +static int __init mmap_min_addr_init(void) { register_sysctl_init("vm", min_addr_sysctl_table); update_mmap_min_addr(); return 0; } +pure_initcall(mmap_min_addr_init); From 2da8fbb8f1c17129a08c1e0e42c71eabdca76062 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Tue, 20 Jan 2026 18:32:41 +0000 Subject: [PATCH 136/195] drm/xe/nvm: Manage nvm aux cleanup with devres MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move nvm teardown to a devm-managed action registered from xe_nvm_init(). This ensures the auxiliary NVM device is deleted on probe failure and device detach without requiring explicit calls from remove paths. As part of this, drop xe_nvm_fini() from xe_device_remove() and from the survivability sysfs teardown, and remove the public xe_nvm_fini() API from the header. This is to fix below warn message when there is probe failure after xe_nvm_init(), then xe_device_probe() is called again: " [ 207.318152] sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:01.0/0000:01:00.0/0000:02:01.0/0000:03:00.0/xe.nvm.768' [ 207.318157] CPU: 5 UID: 0 PID: 10261 Comm: modprobe Tainted: G B W 6.19.0-rc2-lgci-xe-kernel+ #223 PREEMPT(voluntary) [ 207.318160] Tainted: [B]=BAD_PAGE, [W]=WARN [ 207.318161] Hardware name: ASUS System Product Name/PRIME Z790-P WIFI, BIOS 0812 02/24/2023 [ 207.318163] Call Trace: [ 207.318163] [ 207.318165] dump_stack_lvl+0xa0/0xc0 [ 207.318170] dump_stack+0x10/0x20 [ 207.318171] sysfs_warn_dup+0xd5/0x110 [ 207.318175] sysfs_create_dir_ns+0x1f6/0x280 [ 207.318177] ? __pfx_sysfs_create_dir_ns+0x10/0x10 [ 207.318179] ? lock_acquire+0x1a4/0x2e0 [ 207.318182] ? __kasan_check_read+0x11/0x20 [ 207.318185] ? do_raw_spin_unlock+0x5c/0x240 [ 207.318187] kobject_add_internal+0x28d/0x8e0 [ 207.318189] kobject_add+0x11f/0x1f0 [ 207.318191] ? __pfx_kobject_add+0x10/0x10 [ 207.318193] ? lockdep_init_map_type+0x4b/0x230 [ 207.318195] ? get_device_parent.isra.0+0x43/0x4c0 [ 207.318197] ? kobject_get+0x55/0xf0 [ 207.318199] device_add+0x2d7/0x1500 [ 207.318201] ? __pfx_device_add+0x10/0x10 [ 207.318203] ? lockdep_init_map_type+0x4b/0x230 [ 207.318205] __auxiliary_device_add+0x99/0x140 [ 207.318208] xe_nvm_init+0x7a2/0xef0 [xe] [ 207.318333] ? xe_devcoredump_init+0x80/0x110 [xe] [ 207.318452] ? __devm_add_action+0x82/0xc0 [ 207.318454] ? fs_reclaim_release+0xc0/0x110 [ 207.318457] xe_device_probe+0x17dd/0x2c40 [xe] [ 207.318574] ? __pfx___drm_dev_dbg+0x10/0x10 [ 207.318576] ? add_dr+0x180/0x220 [ 207.318579] ? __pfx___drmm_mutex_release+0x10/0x10 [ 207.318582] ? __pfx_xe_device_probe+0x10/0x10 [xe] [ 207.318697] ? xe_pm_init_early+0x33a/0x410 [xe] [ 207.318850] xe_pci_probe+0x936/0x1250 [xe] [ 207.318999] ? lock_acquire+0x1a4/0x2e0 [ 207.319003] ? __pfx_xe_pci_probe+0x10/0x10 [xe] [ 207.319151] local_pci_probe+0xe6/0x1a0 [ 207.319154] pci_device_probe+0x523/0x840 [ 207.319157] ? __pfx_pci_device_probe+0x10/0x10 [ 207.319159] ? sysfs_do_create_link_sd.isra.0+0x8c/0x110 [ 207.319162] ? sysfs_create_link+0x48/0xc0 ... " Fixes: c28bfb107dac ("drm/xe/nvm: add on-die non-volatile memory device") Reviewed-by: Alexander Usyskin Reviewed-by: Brian Nguyen Cc: Rodrigo Vivi Cc: Riana Tauro Signed-off-by: Shuicheng Lin Signed-off-by: Ashutosh Dixit Link: https://patch.msgid.link/20260120183239.2966782-6-shuicheng.lin@intel.com (cherry picked from commit 11035eab1b7d88daa7904440046e64d3810b1ca1) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_device.c | 2 -- drivers/gpu/drm/xe/xe_nvm.c | 43 +++++++++++++++++----------------- drivers/gpu/drm/xe/xe_nvm.h | 2 -- 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index cf29e259861f..9a6d49fcd8e4 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -984,8 +984,6 @@ void xe_device_remove(struct xe_device *xe) { xe_display_unregister(xe); - xe_nvm_fini(xe); - drm_dev_unplug(&xe->drm); xe_bo_pci_dev_remove_all(xe); diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c index 33f4ac82fc80..1fff24dbc7cd 100644 --- a/drivers/gpu/drm/xe/xe_nvm.c +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -83,6 +83,27 @@ static bool xe_nvm_writable_override(struct xe_device *xe) return writable_override; } +static void xe_nvm_fini(void *arg) +{ + struct xe_device *xe = arg; + struct intel_dg_nvm_dev *nvm = xe->nvm; + + if (!xe->info.has_gsc_nvm) + return; + + /* No access to internal NVM from VFs */ + if (IS_SRIOV_VF(xe)) + return; + + /* Nvm pointer should not be NULL here */ + if (WARN_ON(!nvm)) + return; + + auxiliary_device_delete(&nvm->aux_dev); + auxiliary_device_uninit(&nvm->aux_dev); + xe->nvm = NULL; +} + int xe_nvm_init(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -141,30 +162,10 @@ int xe_nvm_init(struct xe_device *xe) auxiliary_device_uninit(aux_dev); goto err; } - return 0; + return devm_add_action_or_reset(xe->drm.dev, xe_nvm_fini, xe); err: kfree(nvm); xe->nvm = NULL; return ret; } - -void xe_nvm_fini(struct xe_device *xe) -{ - struct intel_dg_nvm_dev *nvm = xe->nvm; - - if (!xe->info.has_gsc_nvm) - return; - - /* No access to internal NVM from VFs */ - if (IS_SRIOV_VF(xe)) - return; - - /* Nvm pointer should not be NULL here */ - if (WARN_ON(!nvm)) - return; - - auxiliary_device_delete(&nvm->aux_dev); - auxiliary_device_uninit(&nvm->aux_dev); - xe->nvm = NULL; -} diff --git a/drivers/gpu/drm/xe/xe_nvm.h b/drivers/gpu/drm/xe/xe_nvm.h index 7f3d5f57bed0..fd3467ad35a4 100644 --- a/drivers/gpu/drm/xe/xe_nvm.h +++ b/drivers/gpu/drm/xe/xe_nvm.h @@ -10,6 +10,4 @@ struct xe_device; int xe_nvm_init(struct xe_device *xe); -void xe_nvm_fini(struct xe_device *xe); - #endif From 8a44241b0b83a6047c5448da1fff03fcc29496b5 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Tue, 20 Jan 2026 18:32:42 +0000 Subject: [PATCH 137/195] drm/xe/nvm: Fix double-free on aux add failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a successful auxiliary_device_init(), aux_dev->dev.release (xe_nvm_release_dev()) is responsible for the kfree(nvm). When there is failure with auxiliary_device_add(), driver will call auxiliary_device_uninit(), which call put_device(). So that the .release callback will be triggered to free the memory associated with the auxiliary_device. Move the kfree(nvm) into the auxiliary_device_init() failure path and remove the err goto path to fix below error. " [ 13.232905] ================================================================== [ 13.232911] BUG: KASAN: double-free in xe_nvm_init+0x751/0xf10 [xe] [ 13.233112] Free of addr ffff888120635000 by task systemd-udevd/273 [ 13.233120] CPU: 8 UID: 0 PID: 273 Comm: systemd-udevd Not tainted 6.19.0-rc2-lgci-xe-kernel+ #225 PREEMPT(voluntary) ... [ 13.233125] Call Trace: [ 13.233126] [ 13.233127] dump_stack_lvl+0x7f/0xc0 [ 13.233132] print_report+0xce/0x610 [ 13.233136] ? kasan_complete_mode_report_info+0x5d/0x1e0 [ 13.233139] ? xe_nvm_init+0x751/0xf10 [xe] ... " v2: drop err goto path. (Alexander) Fixes: 7926ba2143d8 ("drm/xe: defer free of NVM auxiliary container to device release callback") Reviewed-by: Nitin Gote Reviewed-by: Brian Nguyen Cc: Alexander Usyskin Cc: Rodrigo Vivi Suggested-by: Brian Nguyen Signed-off-by: Shuicheng Lin Signed-off-by: Ashutosh Dixit Link: https://patch.msgid.link/20260120183239.2966782-7-shuicheng.lin@intel.com (cherry picked from commit a3187c0c2bbd947ffff97f90d077ac88f9c2a215) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_nvm.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c index 1fff24dbc7cd..6da42b2b5e46 100644 --- a/drivers/gpu/drm/xe/xe_nvm.c +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -153,19 +153,17 @@ int xe_nvm_init(struct xe_device *xe) ret = auxiliary_device_init(aux_dev); if (ret) { drm_err(&xe->drm, "xe-nvm aux init failed %d\n", ret); - goto err; + kfree(nvm); + xe->nvm = NULL; + return ret; } ret = auxiliary_device_add(aux_dev); if (ret) { drm_err(&xe->drm, "xe-nvm aux add failed %d\n", ret); auxiliary_device_uninit(aux_dev); - goto err; + xe->nvm = NULL; + return ret; } return devm_add_action_or_reset(xe->drm.dev, xe_nvm_fini, xe); - -err: - kfree(nvm); - xe->nvm = NULL; - return ret; } From 37d312bf957b95346fae2b3f82ce043474ea66c9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 28 Jan 2026 13:04:35 -0800 Subject: [PATCH 138/195] MAINTAINERS: add an entry for PSP We are missing a MAINTAINERS entry for PSP, create one. Acked-by: Willem de Bruijn Link: https://patch.msgid.link/20260128210435.161061-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0efa8cc6775b..12d0858e02c5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20973,6 +20973,18 @@ F: Documentation/devicetree/bindings/net/pse-pd/ F: drivers/net/pse-pd/ F: net/ethtool/pse-pd.c +PSP SECURITY PROTOCOL +M: Daniel Zahka +M: Jakub Kicinski +M: Willem de Bruijn +F: Documentation/netlink/specs/psp.yaml +F: Documentation/networking/psp.rst +F: include/net/psp/ +F: include/net/psp.h +F: include/uapi/linux/psp.h +F: net/psp/ +K: struct\ psp(_assoc|_dev|hdr)\b + PSTORE FILESYSTEM M: Kees Cook R: Tony Luck From 13e00fdc9236bd4d0bff4109d2983171fbcb74c4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Jan 2026 14:15:38 +0000 Subject: [PATCH 139/195] net: add skb_header_pointer_careful() helper This variant of skb_header_pointer() should be used in contexts where @offset argument is user-controlled and could be negative. Negative offsets are supported, as long as the zone starts between skb->head and skb->data. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260128141539.3404400-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 86737076101d..112e48970338 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4301,6 +4301,18 @@ skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) skb_headlen(skb), buffer); } +/* Variant of skb_header_pointer() where @offset is user-controlled + * and potentially negative. + */ +static inline void * __must_check +skb_header_pointer_careful(const struct sk_buff *skb, int offset, + int len, void *buffer) +{ + if (unlikely(offset < 0 && -offset > skb_headroom(skb))) + return NULL; + return skb_header_pointer(skb, offset, len, buffer); +} + static inline void * __must_check skb_pointer_if_linear(const struct sk_buff *skb, int offset, int len) { From cabd1a976375780dabab888784e356f574bbaed8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Jan 2026 14:15:39 +0000 Subject: [PATCH 140/195] net/sched: cls_u32: use skb_header_pointer_careful() skb_header_pointer() does not fully validate negative @offset values. Use skb_header_pointer_careful() instead. GangMin Kim provided a report and a repro fooling u32_classify(): BUG: KASAN: slab-out-of-bounds in u32_classify+0x1180/0x11b0 net/sched/cls_u32.c:221 Fixes: fbc2e7d9cf49 ("cls_u32: use skb_header_pointer() to dereference data safely") Reported-by: GangMin Kim Closes: https://lore.kernel.org/netdev/CANn89iJkyUZ=mAzLzC4GdcAgLuPnUoivdLaOs6B9rq5_erj76w@mail.gmail.com/T/ Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260128141539.3404400-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/cls_u32.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 2a1c00048fd6..58e849c0acf4 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -161,10 +161,8 @@ next_knode: int toff = off + key->off + (off2 & key->offmask); __be32 *data, hdata; - if (skb_headroom(skb) + toff > INT_MAX) - goto out; - - data = skb_header_pointer(skb, toff, 4, &hdata); + data = skb_header_pointer_careful(skb, toff, 4, + &hdata); if (!data) goto out; if ((*data ^ key->val) & key->mask) { @@ -214,8 +212,9 @@ check_terminal: if (ht->divisor) { __be32 *data, hdata; - data = skb_header_pointer(skb, off + n->sel.hoff, 4, - &hdata); + data = skb_header_pointer_careful(skb, + off + n->sel.hoff, + 4, &hdata); if (!data) goto out; sel = ht->divisor & u32_hash_fold(*data, &n->sel, @@ -229,7 +228,7 @@ check_terminal: if (n->sel.flags & TC_U32_VAROFFSET) { __be16 *data, hdata; - data = skb_header_pointer(skb, + data = skb_header_pointer_careful(skb, off + n->sel.offoff, 2, &hdata); if (!data) From ed48a84a72fefb20a82dd90a7caa7807e90c6f66 Mon Sep 17 00:00:00 2001 From: Junrui Luo Date: Wed, 28 Jan 2026 16:07:34 +0800 Subject: [PATCH 141/195] dpaa2-switch: prevent ZERO_SIZE_PTR dereference when num_ifs is zero The driver allocates arrays for ports, FDBs, and filter blocks using kcalloc() with ethsw->sw_attr.num_ifs as the element count. When the device reports zero interfaces (either due to hardware configuration or firmware issues), kcalloc(0, ...) returns ZERO_SIZE_PTR (0x10) instead of NULL. Later in dpaa2_switch_probe(), the NAPI initialization unconditionally accesses ethsw->ports[0]->netdev, which attempts to dereference ZERO_SIZE_PTR (address 0x10), resulting in a kernel panic. Add a check to ensure num_ifs is greater than zero after retrieving device attributes. This prevents the zero-sized allocations and subsequent invalid pointer dereference. Reported-by: Yuhao Jiang Reported-by: Junrui Luo Fixes: 0b1b71370458 ("staging: dpaa2-switch: handle Rx path on control interface") Signed-off-by: Junrui Luo Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/SYBPR01MB7881BEABA8DA896947962470AF91A@SYBPR01MB7881.ausprd01.prod.outlook.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index b1e1ad9e4b48..0ff234f6a3ed 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -3024,6 +3024,12 @@ static int dpaa2_switch_init(struct fsl_mc_device *sw_dev) goto err_close; } + if (!ethsw->sw_attr.num_ifs) { + dev_err(dev, "DPSW device has no interfaces\n"); + err = -ENODEV; + goto err_close; + } + err = dpsw_get_api_version(ethsw->mc_io, 0, ðsw->major, ðsw->minor); From 926ede0c85e1e57c97d64d9612455267d597bb2c Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Wed, 28 Jan 2026 15:44:38 +0000 Subject: [PATCH 142/195] net: liquidio: Initialize netdev pointer before queue setup In setup_nic_devices(), the netdev is allocated using alloc_etherdev_mq(). However, the pointer to this structure is stored in oct->props[i].netdev only after the calls to netif_set_real_num_rx_queues() and netif_set_real_num_tx_queues(). If either of these functions fails, setup_nic_devices() returns an error without freeing the allocated netdev. Since oct->props[i].netdev is still NULL at this point, the cleanup function liquidio_destroy_nic_device() will fail to find and free the netdev, resulting in a memory leak. Fix this by initializing oct->props[i].netdev before calling the queue setup functions. This ensures that the netdev is properly accessible for cleanup in case of errors. Compile tested only. Issue found using a prototype static analysis tool and code review. Fixes: c33c997346c3 ("liquidio: enhanced ethtool --set-channels feature") Signed-off-by: Zilin Guan Reviewed-by: Kory Maincent Link: https://patch.msgid.link/20260128154440.278369-2-zilin@seu.edu.cn Signed-off-by: Jakub Kicinski --- .../net/ethernet/cavium/liquidio/lio_main.c | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 0732440eeacd..1f10b1b22a1e 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -3505,6 +3505,23 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) */ netdev->netdev_ops = &lionetdevops; + lio = GET_LIO(netdev); + + memset(lio, 0, sizeof(struct lio)); + + lio->ifidx = ifidx_or_pfnum; + + props = &octeon_dev->props[i]; + props->gmxport = resp->cfg_info.linfo.gmxport; + props->netdev = netdev; + + /* Point to the properties for octeon device to which this + * interface belongs. + */ + lio->oct_dev = octeon_dev; + lio->octprops = props; + lio->netdev = netdev; + retval = netif_set_real_num_rx_queues(netdev, num_oqueues); if (retval) { dev_err(&octeon_dev->pci_dev->dev, @@ -3521,16 +3538,6 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) goto setup_nic_dev_free; } - lio = GET_LIO(netdev); - - memset(lio, 0, sizeof(struct lio)); - - lio->ifidx = ifidx_or_pfnum; - - props = &octeon_dev->props[i]; - props->gmxport = resp->cfg_info.linfo.gmxport; - props->netdev = netdev; - lio->linfo.num_rxpciq = num_oqueues; lio->linfo.num_txpciq = num_iqueues; for (j = 0; j < num_oqueues; j++) { @@ -3596,13 +3603,6 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) netdev->min_mtu = LIO_MIN_MTU_SIZE; netdev->max_mtu = LIO_MAX_MTU_SIZE; - /* Point to the properties for octeon device to which this - * interface belongs. - */ - lio->oct_dev = octeon_dev; - lio->octprops = props; - lio->netdev = netdev; - dev_dbg(&octeon_dev->pci_dev->dev, "if%d gmx: %d hw_addr: 0x%llx\n", i, lio->linfo.gmxport, CVM_CAST64(lio->linfo.hw_addr)); From 8558aef4e8a1a83049ab906d21d391093cfa7e7f Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Wed, 28 Jan 2026 15:44:39 +0000 Subject: [PATCH 143/195] net: liquidio: Fix off-by-one error in PF setup_nic_devices() cleanup In setup_nic_devices(), the initialization loop jumps to the label setup_nic_dev_free on failure. The current cleanup loop while(i--) skip the failing index i, causing a memory leak. Fix this by changing the loop to iterate from the current index i down to 0. Also, decrement i in the devlink_alloc failure path to point to the last successfully allocated index. Compile tested only. Issue found using code review. Fixes: f21fb3ed364b ("Add support of Cavium Liquidio ethernet adapters") Suggested-by: Simon Horman Signed-off-by: Zilin Guan Reviewed-by: Kory Maincent Link: https://patch.msgid.link/20260128154440.278369-3-zilin@seu.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cavium/liquidio/lio_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 1f10b1b22a1e..c1a3df225254 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -3750,6 +3750,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) if (!devlink) { device_unlock(&octeon_dev->pci_dev->dev); dev_err(&octeon_dev->pci_dev->dev, "devlink alloc failed\n"); + i--; goto setup_nic_dev_free; } @@ -3765,11 +3766,11 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) setup_nic_dev_free: - while (i--) { + do { dev_err(&octeon_dev->pci_dev->dev, "NIC ifidx:%d Setup failed\n", i); liquidio_destroy_nic_device(octeon_dev, i); - } + } while (i--); setup_nic_dev_done: From 6cbba46934aefdfb5d171e0a95aec06c24f7ca30 Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Wed, 28 Jan 2026 15:44:40 +0000 Subject: [PATCH 144/195] net: liquidio: Fix off-by-one error in VF setup_nic_devices() cleanup In setup_nic_devices(), the initialization loop jumps to the label setup_nic_dev_free on failure. The current cleanup loop while(i--) skip the failing index i, causing a memory leak. Fix this by changing the loop to iterate from the current index i down to 0. Compile tested only. Issue found using code review. Fixes: 846b46873eeb ("liquidio CN23XX: VF offload features") Suggested-by: Simon Horman Signed-off-by: Zilin Guan Reviewed-by: Kory Maincent Link: https://patch.msgid.link/20260128154440.278369-4-zilin@seu.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index e02942dbbcce..43c595f3b84e 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -2212,11 +2212,11 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) setup_nic_dev_free: - while (i--) { + do { dev_err(&octeon_dev->pci_dev->dev, "NIC ifidx:%d Setup failed\n", i); liquidio_destroy_nic_device(octeon_dev, i); - } + } while (i--); setup_nic_dev_done: From 31a7a0bbeb006bac2d9c81a2874825025214b6d8 Mon Sep 17 00:00:00 2001 From: Junrui Luo Date: Thu, 29 Jan 2026 00:55:13 +0800 Subject: [PATCH 145/195] dpaa2-switch: add bounds check for if_id in IRQ handler The IRQ handler extracts if_id from the upper 16 bits of the hardware status register and uses it to index into ethsw->ports[] without validation. Since if_id can be any 16-bit value (0-65535) but the ports array is only allocated with sw_attr.num_ifs elements, this can lead to an out-of-bounds read potentially. Add a bounds check before accessing the array, consistent with the existing validation in dpaa2_switch_rx(). Reported-by: Yuhao Jiang Reported-by: Junrui Luo Fixes: 24ab724f8a46 ("dpaa2-switch: use the port index in the IRQ handler") Signed-off-by: Junrui Luo Link: https://patch.msgid.link/SYBPR01MB7881D420AB43FF1A227B84AFAF91A@SYBPR01MB7881.ausprd01.prod.outlook.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index 0ff234f6a3ed..66240c340492 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -1531,6 +1531,10 @@ static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg) } if_id = (status & 0xFFFF0000) >> 16; + if (if_id >= ethsw->sw_attr.num_ifs) { + dev_err(dev, "Invalid if_id %d in IRQ status\n", if_id); + goto out; + } port_priv = ethsw->ports[if_id]; if (status & DPSW_IRQ_EVENT_LINK_CHANGED) From 99854c167cfc113ad863832b1601c4ca1a639cfe Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Thu, 27 Nov 2025 10:25:58 +0100 Subject: [PATCH 146/195] ice: fix missing TX timestamps interrupts on E825 devices Modify PTP (Precision Time Protocol) configuration on link down flow. Previously, PHY_REG_TX_OFFSET_READY register was cleared in such case. This register is used to determine if the timestamp is valid or not on the hardware side. However, there is a possibility that there is still the packet in the HW queue which originally was supposed to be timestamped but the link is already down and given register is cleared. This potentially might lead to the situation in which that 'delayed' packet's timestamp is treated as invalid one when the link is up again. This in turn leads to the situation in which the driver is not able to effectively clean timestamp memory and interrupt configuration. From the hardware perspective, that 'old' interrupt was not handled properly and even if new timestamp packets are processed, no new interrupts is generated. As a result, providing timestamps to the user applications (like ptp4l) is not possible. The solution for this problem is implemented at the driver level rather than the firmware, and maintains the tx_ready bit high, even during link down events. This avoids entering a potential inconsistent state between the driver and the timestamp hardware. Testing hints: - run PTP traffic at higher rate (like 16 PTP messages per second) - observe ptp4l behaviour at the client side in the following conditions: a) trigger link toggle events. It needs to be physiscal link down/up events b) link speed change In all above cases, PTP processing at ptp4l application should resume always. In failure case, the following permanent error message in ptp4l log was observed: controller-0 ptp4l: err [6175.116] ptp4l-legacy timed out while polling for tx timestamp Fixes: 7cab44f1c35f ("ice: Introduce ETH56G PHY model for E825C products") Reviewed-by: Aleksandr Loktionov Signed-off-by: Grzegorz Nitka Tested-by: Sunitha Mekala (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 4c8d20f2d2c0..0b7c2a13ab04 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1347,9 +1347,12 @@ void ice_ptp_link_change(struct ice_pf *pf, bool linkup) /* Do not reconfigure E810 or E830 PHY */ return; case ICE_MAC_GENERIC: - case ICE_MAC_GENERIC_3K_E825: ice_ptp_port_phy_restart(ptp_port); return; + case ICE_MAC_GENERIC_3K_E825: + if (linkup) + ice_ptp_port_phy_restart(ptp_port); + return; default: dev_warn(ice_pf_to_dev(pf), "%s: Unknown PHY type\n", __func__); } From 88b68f35eb43ad5ac77ac1107059040b04e6f477 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 21 Jan 2026 10:44:19 -0800 Subject: [PATCH 147/195] ice: PTP: fix missing timestamps on E825 hardware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The E825 hardware currently has each PF handle the PFINT_TSYN_TX cause of the miscellaneous OICR interrupt vector. The actual interrupt cause underlying this is shared by all ports on the same quad: ┌─────────────────────────────────┐ │ │ │ ┌────┐ ┌────┐ ┌────┐ ┌────┐ │ │ │PF 0│ │PF 1│ │PF 2│ │PF 3│ │ │ └────┘ └────┘ └────┘ └────┘ │ │ │ └────────────────▲────────────────┘ │ │ ┌────────────────┼────────────────┐ │ PHY QUAD │ └───▲────────▲────────▲────────▲──┘ │ │ │ │ ┌───┼──┐ ┌───┴──┐ ┌───┼──┐ ┌───┼──┐ │Port 0│ │Port 1│ │Port 2│ │Port 3│ └──────┘ └──────┘ └──────┘ └──────┘ If multiple PFs issue Tx timestamp requests near simultaneously, it is possible that the correct PF will not be interrupted and will miss its timestamp. Understanding why is somewhat complex. Consider the following sequence of events: CPU 0: Send Tx packet on PF 0 ... PF 0 enqueues packet with Tx request CPU 1, PF1: ... Send Tx packet on PF1 ... PF 1 enqueues packet with Tx request HW: PHY Port 0 sends packet PHY raises Tx timestamp event interrupt MAC raises each PF interrupt CPU 0, PF0: CPU 1, PF1: ice_misc_intr() checks for Tx timestamps ice_misc_intr() checks for Tx timestamp Sees packet ready bit set Sees nothing available ... Exits ... ... HW: PHY port 1 sends packet PHY interrupt ignored because not all packet timestamps read yet. ... Read timestamp, report to stack Because the interrupt event is shared for all ports on the same quad, the PHY will not raise a new interrupt for any PF until all timestamps are read. In the example above, the second timestamp comes in for port 1 before the timestamp from port 0 is read. At this point, there is no longer an interrupt thread running that will read the timestamps, because each PF has checked and found that there was no work to do. Applications such as ptp4l will timeout after waiting a few milliseconds. Eventually, the watchdog service task will re-check for all quads and notice that there are outstanding timestamps, and issue a software interrupt to recover. However, by this point it is far too late, and applications have already failed. All of this occurs because of the underlying hardware behavior. The PHY cannot raise a new interrupt signal until all outstanding timestamps have been read. As a first step to fix this, switch the E825C hardware to the ICE_PTP_TX_INTERRUPT_ALL mode. In this mode, only the clock owner PF will respond to the PFINT_TSYN_TX cause. Other PFs disable this cause and will not wake. In this mode, the clock owner will iterate over all ports and handle timestamps for each connected port. This matches the E822 behavior, and is a necessary but insufficient step to resolve the missing timestamps. Even with use of the ICE_PTP_TX_INTERRUPT_ALL mode, we still sometimes miss a timestamp event. The ice_ptp_tx_tstamp_owner() does re-check the ready bitmap, but does so before re-enabling the OICR interrupt vector. It also only checks the ready bitmap, but not the software Tx timestamp tracker. To avoid risk of losing a timestamp, refactor the logic to check both the software Tx timestamp tracker bitmap *and* the hardware ready bitmap. Additionally, do this outside of ice_ptp_process_ts() after we have already re-enabled the OICR interrupt. Remove the checks from the ice_ptp_tx_tstamp(), ice_ptp_tx_tstamp_owner(), and the ice_ptp_process_ts() functions. This results in ice_ptp_tx_tstamp() being nothing more than a wrapper around ice_ptp_process_tx_tstamp() so we can remove it. Add the ice_ptp_tx_tstamps_pending() function which returns a boolean indicating if there are any pending Tx timestamps. First, check the software timestamp tracker bitmap. In ICE_PTP_TX_INTERRUPT_ALL mode, check *all* ports software trackers. If a tracker has outstanding timestamp requests, return true. Additionally, check the PHY ready bitmap to confirm if the PHY indicates any outstanding timestamps. In the ice_misc_thread_fn(), call ice_ptp_tx_tstamps_pending() just before returning from the IRQ thread handler. If it returns true, write to PFINT_OICR to trigger a PFINT_OICR_TSYN_TX_M software interrupt. This will force the handler to interrupt again and complete the work even if the PHY hardware did not interrupt for any reason. This results in the following new flow for handling Tx timestamps: 1) send Tx packet 2) PHY captures timestamp 3) PHY triggers MAC interrupt 4) clock owner executes ice_misc_intr() with PFINT_OICR_TSYN_TX flag set 5) ice_ptp_ts_irq() returns IRQ_WAKE_THREAD 7) The interrupt thread wakes up and kernel calls ice_misc_intr_thread_fn() 8) ice_ptp_process_ts() is called to handle any outstanding timestamps 9) ice_irq_dynamic_ena() is called to re-enable the OICR hardware interrupt cause 10) ice_ptp_tx_tstamps_pending() is called to check if we missed any more outstanding timestamps, checking both software and hardware indicators. With this change, it should no longer be possible for new timestamps to come in such a way that we lose an interrupt. If a timestamp comes in before the ice_ptp_tx_tstamps_pending() call, it will be noticed by at least one of the software bitmap check or the hardware bitmap check. If the timestamp comes in *after* this check, it should cause a timestamp interrupt as we have already read all timestamps from the PHY and the OICR vector has been re-enabled. Fixes: 7cab44f1c35f ("ice: Introduce ETH56G PHY model for E825C products") Signed-off-by: Jacob Keller Reviewed-by: Aleksandr Loktionov Reviewed-by: Przemyslaw Korba Tested-by: Vitaly Grinberg Tested-by: Sunitha Mekala (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 20 +-- drivers/net/ethernet/intel/ice/ice_ptp.c | 148 ++++++++++++---------- drivers/net/ethernet/intel/ice/ice_ptp.h | 13 +- 3 files changed, 103 insertions(+), 78 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 71c6d53b461e..25cbbe67d992 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3314,18 +3314,20 @@ static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data) if (ice_is_reset_in_progress(pf->state)) goto skip_irq; - if (test_and_clear_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread)) { - /* Process outstanding Tx timestamps. If there is more work, - * re-arm the interrupt to trigger again. - */ - if (ice_ptp_process_ts(pf) == ICE_TX_TSTAMP_WORK_PENDING) { - wr32(hw, PFINT_OICR, PFINT_OICR_TSYN_TX_M); - ice_flush(hw); - } - } + if (test_and_clear_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread)) + ice_ptp_process_ts(pf); skip_irq: ice_irq_dynamic_ena(hw, NULL, NULL); + ice_flush(hw); + + if (ice_ptp_tx_tstamps_pending(pf)) { + /* If any new Tx timestamps happened while in interrupt, + * re-arm the interrupt to trigger it again. + */ + wr32(hw, PFINT_OICR, PFINT_OICR_TSYN_TX_M); + ice_flush(hw); + } return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 0b7c2a13ab04..b5cef6396319 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -573,6 +573,9 @@ static void ice_ptp_process_tx_tstamp(struct ice_ptp_tx *tx) pf = ptp_port_to_pf(ptp_port); hw = &pf->hw; + if (!tx->init) + return; + /* Read the Tx ready status first */ if (tx->has_ready_bitmap) { err = ice_get_phy_tx_tstamp_ready(hw, tx->block, &tstamp_ready); @@ -674,14 +677,9 @@ skip_ts_read: pf->ptp.tx_hwtstamp_good += tstamp_good; } -/** - * ice_ptp_tx_tstamp_owner - Process Tx timestamps for all ports on the device - * @pf: Board private structure - */ -static enum ice_tx_tstamp_work ice_ptp_tx_tstamp_owner(struct ice_pf *pf) +static void ice_ptp_tx_tstamp_owner(struct ice_pf *pf) { struct ice_ptp_port *port; - unsigned int i; mutex_lock(&pf->adapter->ports.lock); list_for_each_entry(port, &pf->adapter->ports.ports, list_node) { @@ -693,49 +691,6 @@ static enum ice_tx_tstamp_work ice_ptp_tx_tstamp_owner(struct ice_pf *pf) ice_ptp_process_tx_tstamp(tx); } mutex_unlock(&pf->adapter->ports.lock); - - for (i = 0; i < ICE_GET_QUAD_NUM(pf->hw.ptp.num_lports); i++) { - u64 tstamp_ready; - int err; - - /* Read the Tx ready status first */ - err = ice_get_phy_tx_tstamp_ready(&pf->hw, i, &tstamp_ready); - if (err) - break; - else if (tstamp_ready) - return ICE_TX_TSTAMP_WORK_PENDING; - } - - return ICE_TX_TSTAMP_WORK_DONE; -} - -/** - * ice_ptp_tx_tstamp - Process Tx timestamps for this function. - * @tx: Tx tracking structure to initialize - * - * Returns: ICE_TX_TSTAMP_WORK_PENDING if there are any outstanding incomplete - * Tx timestamps, or ICE_TX_TSTAMP_WORK_DONE otherwise. - */ -static enum ice_tx_tstamp_work ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) -{ - bool more_timestamps; - unsigned long flags; - - if (!tx->init) - return ICE_TX_TSTAMP_WORK_DONE; - - /* Process the Tx timestamp tracker */ - ice_ptp_process_tx_tstamp(tx); - - /* Check if there are outstanding Tx timestamps */ - spin_lock_irqsave(&tx->lock, flags); - more_timestamps = tx->init && !bitmap_empty(tx->in_use, tx->len); - spin_unlock_irqrestore(&tx->lock, flags); - - if (more_timestamps) - return ICE_TX_TSTAMP_WORK_PENDING; - - return ICE_TX_TSTAMP_WORK_DONE; } /** @@ -2666,32 +2621,94 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) return idx + tx->offset; } -/** - * ice_ptp_process_ts - Process the PTP Tx timestamps - * @pf: Board private structure - * - * Returns: ICE_TX_TSTAMP_WORK_PENDING if there are any outstanding Tx - * timestamps that need processing, and ICE_TX_TSTAMP_WORK_DONE otherwise. - */ -enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf) +void ice_ptp_process_ts(struct ice_pf *pf) { switch (pf->ptp.tx_interrupt_mode) { case ICE_PTP_TX_INTERRUPT_NONE: /* This device has the clock owner handle timestamps for it */ - return ICE_TX_TSTAMP_WORK_DONE; + return; case ICE_PTP_TX_INTERRUPT_SELF: /* This device handles its own timestamps */ - return ice_ptp_tx_tstamp(&pf->ptp.port.tx); + ice_ptp_process_tx_tstamp(&pf->ptp.port.tx); + return; case ICE_PTP_TX_INTERRUPT_ALL: /* This device handles timestamps for all ports */ - return ice_ptp_tx_tstamp_owner(pf); + ice_ptp_tx_tstamp_owner(pf); + return; default: WARN_ONCE(1, "Unexpected Tx timestamp interrupt mode %u\n", pf->ptp.tx_interrupt_mode); - return ICE_TX_TSTAMP_WORK_DONE; + return; } } +static bool ice_port_has_timestamps(struct ice_ptp_tx *tx) +{ + bool more_timestamps; + + scoped_guard(spinlock_irqsave, &tx->lock) { + if (!tx->init) + return false; + + more_timestamps = !bitmap_empty(tx->in_use, tx->len); + } + + return more_timestamps; +} + +static bool ice_any_port_has_timestamps(struct ice_pf *pf) +{ + struct ice_ptp_port *port; + + scoped_guard(mutex, &pf->adapter->ports.lock) { + list_for_each_entry(port, &pf->adapter->ports.ports, + list_node) { + struct ice_ptp_tx *tx = &port->tx; + + if (ice_port_has_timestamps(tx)) + return true; + } + } + + return false; +} + +bool ice_ptp_tx_tstamps_pending(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + unsigned int i; + + /* Check software indicator */ + switch (pf->ptp.tx_interrupt_mode) { + case ICE_PTP_TX_INTERRUPT_NONE: + return false; + case ICE_PTP_TX_INTERRUPT_SELF: + if (ice_port_has_timestamps(&pf->ptp.port.tx)) + return true; + break; + case ICE_PTP_TX_INTERRUPT_ALL: + if (ice_any_port_has_timestamps(pf)) + return true; + break; + default: + WARN_ONCE(1, "Unexpected Tx timestamp interrupt mode %u\n", + pf->ptp.tx_interrupt_mode); + break; + } + + /* Check hardware indicator */ + for (i = 0; i < ICE_GET_QUAD_NUM(hw->ptp.num_lports); i++) { + u64 tstamp_ready = 0; + int err; + + err = ice_get_phy_tx_tstamp_ready(&pf->hw, i, &tstamp_ready); + if (err || tstamp_ready) + return true; + } + + return false; +} + /** * ice_ptp_ts_irq - Process the PTP Tx timestamps in IRQ context * @pf: Board private structure @@ -2741,7 +2758,9 @@ irqreturn_t ice_ptp_ts_irq(struct ice_pf *pf) return IRQ_WAKE_THREAD; case ICE_MAC_E830: /* E830 can read timestamps in the top half using rd32() */ - if (ice_ptp_process_ts(pf) == ICE_TX_TSTAMP_WORK_PENDING) { + ice_ptp_process_ts(pf); + + if (ice_ptp_tx_tstamps_pending(pf)) { /* Process outstanding Tx timestamps. If there * is more work, re-arm the interrupt to trigger again. */ @@ -3194,8 +3213,9 @@ static void ice_ptp_init_tx_interrupt_mode(struct ice_pf *pf) { switch (pf->hw.mac_type) { case ICE_MAC_GENERIC: - /* E822 based PHY has the clock owner process the interrupt - * for all ports. + case ICE_MAC_GENERIC_3K_E825: + /* E82x hardware has the clock owner process timestamps for + * all ports. */ if (ice_pf_src_tmr_owned(pf)) pf->ptp.tx_interrupt_mode = ICE_PTP_TX_INTERRUPT_ALL; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 27016aac4f1e..8489bd842710 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -304,8 +304,9 @@ void ice_ptp_extts_event(struct ice_pf *pf); s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb); void ice_ptp_req_tx_single_tstamp(struct ice_ptp_tx *tx, u8 idx); void ice_ptp_complete_tx_single_tstamp(struct ice_ptp_tx *tx); -enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf); +void ice_ptp_process_ts(struct ice_pf *pf); irqreturn_t ice_ptp_ts_irq(struct ice_pf *pf); +bool ice_ptp_tx_tstamps_pending(struct ice_pf *pf); u64 ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts); @@ -345,16 +346,18 @@ static inline void ice_ptp_req_tx_single_tstamp(struct ice_ptp_tx *tx, u8 idx) static inline void ice_ptp_complete_tx_single_tstamp(struct ice_ptp_tx *tx) { } -static inline bool ice_ptp_process_ts(struct ice_pf *pf) -{ - return true; -} +static inline void ice_ptp_process_ts(struct ice_pf *pf) { } static inline irqreturn_t ice_ptp_ts_irq(struct ice_pf *pf) { return IRQ_HANDLED; } +static inline bool ice_ptp_tx_tstamps_pending(struct ice_pf *pf) +{ + return false; +} + static inline u64 ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts) { From fc6f36eaaedcf4b81af6fe1a568f018ffd530660 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Wed, 21 Jan 2026 15:51:06 +0800 Subject: [PATCH 148/195] ice: Fix PTP NULL pointer dereference during VSI rebuild Fix race condition where PTP periodic work runs while VSI is being rebuilt, accessing NULL vsi->rx_rings. The sequence was: 1. ice_ptp_prepare_for_reset() cancels PTP work 2. ice_ptp_rebuild() immediately queues PTP work 3. VSI rebuild happens AFTER ice_ptp_rebuild() 4. PTP work runs and accesses NULL vsi->rx_rings Fix: Keep PTP work cancelled during rebuild, only queue it after VSI rebuild completes in ice_rebuild(). Added ice_ptp_queue_work() helper function to encapsulate the logic for queuing PTP work, ensuring it's only queued when PTP is supported and the state is ICE_PTP_READY. Error log: [ 121.392544] ice 0000:60:00.1: PTP reset successful [ 121.392692] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 121.392712] #PF: supervisor read access in kernel mode [ 121.392720] #PF: error_code(0x0000) - not-present page [ 121.392727] PGD 0 [ 121.392734] Oops: Oops: 0000 [#1] SMP NOPTI [ 121.392746] CPU: 8 UID: 0 PID: 1005 Comm: ice-ptp-0000:60 Tainted: G S 6.19.0-rc6+ #4 PREEMPT(voluntary) [ 121.392761] Tainted: [S]=CPU_OUT_OF_SPEC [ 121.392773] RIP: 0010:ice_ptp_update_cached_phctime+0xbf/0x150 [ice] [ 121.393042] Call Trace: [ 121.393047] [ 121.393055] ice_ptp_periodic_work+0x69/0x180 [ice] [ 121.393202] kthread_worker_fn+0xa2/0x260 [ 121.393216] ? __pfx_ice_ptp_periodic_work+0x10/0x10 [ice] [ 121.393359] ? __pfx_kthread_worker_fn+0x10/0x10 [ 121.393371] kthread+0x10d/0x230 [ 121.393382] ? __pfx_kthread+0x10/0x10 [ 121.393393] ret_from_fork+0x273/0x2b0 [ 121.393407] ? __pfx_kthread+0x10/0x10 [ 121.393417] ret_from_fork_asm+0x1a/0x30 [ 121.393432] Fixes: 803bef817807d ("ice: factor out ice_ptp_rebuild_owner()") Signed-off-by: Aaron Ma Tested-by: Sunitha Mekala (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 3 +++ drivers/net/ethernet/intel/ice/ice_ptp.c | 26 ++++++++++++++++++----- drivers/net/ethernet/intel/ice/ice_ptp.h | 5 +++++ 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 25cbbe67d992..c1033070d0c7 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7809,6 +7809,9 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) /* Restore timestamp mode settings after VSI rebuild */ ice_ptp_restore_timestamp_mode(pf); + + /* Start PTP periodic work after VSI is fully rebuilt */ + ice_ptp_queue_work(pf); return; err_vsi_rebuild: diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index b5cef6396319..272683001476 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -2839,6 +2839,20 @@ static void ice_ptp_periodic_work(struct kthread_work *work) msecs_to_jiffies(err ? 10 : 500)); } +/** + * ice_ptp_queue_work - Queue PTP periodic work for a PF + * @pf: Board private structure + * + * Helper function to queue PTP periodic work after VSI rebuild completes. + * This ensures that PTP work only runs when VSI structures are ready. + */ +void ice_ptp_queue_work(struct ice_pf *pf) +{ + if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags) && + pf->ptp.state == ICE_PTP_READY) + kthread_queue_delayed_work(pf->ptp.kworker, &pf->ptp.work, 0); +} + /** * ice_ptp_prepare_rebuild_sec - Prepare second NAC for PTP reset or rebuild * @pf: Board private structure @@ -2857,10 +2871,15 @@ static void ice_ptp_prepare_rebuild_sec(struct ice_pf *pf, bool rebuild, struct ice_pf *peer_pf = ptp_port_to_pf(port); if (!ice_is_primary(&peer_pf->hw)) { - if (rebuild) + if (rebuild) { + /* TODO: When implementing rebuild=true: + * 1. Ensure secondary PFs' VSIs are rebuilt + * 2. Call ice_ptp_queue_work(peer_pf) after VSI rebuild + */ ice_ptp_rebuild(peer_pf, reset_type); - else + } else { ice_ptp_prepare_for_reset(peer_pf, reset_type); + } } } } @@ -3006,9 +3025,6 @@ void ice_ptp_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) ptp->state = ICE_PTP_READY; - /* Start periodic work going */ - kthread_queue_delayed_work(ptp->kworker, &ptp->work, 0); - dev_info(ice_pf_to_dev(pf), "PTP reset successful\n"); return; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 8489bd842710..8c44bd758a4f 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -318,6 +318,7 @@ void ice_ptp_prepare_for_reset(struct ice_pf *pf, void ice_ptp_init(struct ice_pf *pf); void ice_ptp_release(struct ice_pf *pf); void ice_ptp_link_change(struct ice_pf *pf, bool linkup); +void ice_ptp_queue_work(struct ice_pf *pf); #else /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ static inline int ice_ptp_hwtstamp_get(struct net_device *netdev, @@ -386,6 +387,10 @@ static inline void ice_ptp_link_change(struct ice_pf *pf, bool linkup) { } +static inline void ice_ptp_queue_work(struct ice_pf *pf) +{ +} + static inline int ice_ptp_clock_index(struct ice_pf *pf) { return -1; From 234e615bfece9e3e91c50fe49ab9e68ee37c791a Mon Sep 17 00:00:00 2001 From: Mohammad Heib Date: Sun, 28 Dec 2025 21:40:21 +0200 Subject: [PATCH 149/195] ice: drop udp_tunnel_get_rx_info() call from ndo_open() The ice driver calls udp_tunnel_get_rx_info() during ice_open_internal(). This is redundant because UDP tunnel RX offload state is preserved across device down/up cycles. The udp_tunnel core handles synchronization automatically when required. Furthermore, recent changes in the udp_tunnel infrastructure require querying RX info while holding the udp_tunnel lock. Calling it directly from the ndo_open path violates this requirement, triggering the following lockdep warning: Call Trace: ice_open_internal+0x253/0x350 [ice] __udp_tunnel_nic_assert_locked+0x86/0xb0 [udp_tunnel] __dev_open+0x2f5/0x880 __dev_change_flags+0x44c/0x660 netif_change_flags+0x80/0x160 devinet_ioctl+0xd21/0x15f0 inet_ioctl+0x311/0x350 sock_ioctl+0x114/0x220 __x64_sys_ioctl+0x131/0x1a0 ... Remove the redundant and unsafe call to udp_tunnel_get_rx_info() from ice_open_internal() to resolve the locking violation Fixes: 1ead7501094c ("udp_tunnel: remove rtnl_lock dependency") Signed-off-by: Mohammad Heib Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c1033070d0c7..d04605d3e61a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -9662,9 +9662,6 @@ int ice_open_internal(struct net_device *netdev) netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n", vsi->vsi_num, vsi->vsw->sw_id); - /* Update existing tunnels information */ - udp_tunnel_get_rx_info(netdev); - return err; } From 40857194956dcaf3d2b66d6bd113d844c93bef54 Mon Sep 17 00:00:00 2001 From: Mohammad Heib Date: Sun, 28 Dec 2025 21:40:20 +0200 Subject: [PATCH 150/195] i40e: drop udp_tunnel_get_rx_info() call from i40e_open() The i40e driver calls udp_tunnel_get_rx_info() during i40e_open(). This is redundant because UDP tunnel RX offload state is preserved across device down/up cycles. The udp_tunnel core handles synchronization automatically when required. Furthermore, recent changes in the udp_tunnel infrastructure require querying RX info while holding the udp_tunnel lock. Calling it directly from the ndo_open path violates this requirement, triggering the following lockdep warning: Call Trace: ? __udp_tunnel_nic_assert_locked+0x39/0x40 [udp_tunnel] i40e_open+0x135/0x14f [i40e] __dev_open+0x121/0x2e0 __dev_change_flags+0x227/0x270 dev_change_flags+0x3d/0xb0 devinet_ioctl+0x56f/0x860 sock_do_ioctl+0x7b/0x130 __x64_sys_ioctl+0x91/0xd0 do_syscall_64+0x90/0x170 ... Remove the redundant and unsafe call to udp_tunnel_get_rx_info() from i40e_open() resolve the locking violation. Fixes: 1ead7501094c ("udp_tunnel: remove rtnl_lock dependency") Signed-off-by: Mohammad Heib Reviewed-by: Aleksandr Loktionov Reviewed-by: Paul Menzel Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 0b1cc0481027..d3bc3207054f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -9030,7 +9030,6 @@ int i40e_open(struct net_device *netdev) TCP_FLAG_FIN | TCP_FLAG_CWR) >> 16); wr32(&pf->hw, I40E_GLLAN_TSOMSK_L, be32_to_cpu(TCP_FLAG_CWR) >> 16); - udp_tunnel_get_rx_info(netdev); return 0; } From f8ade833b733ae0b72e87ac6d2202a1afbe3eb4a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Jan 2026 17:43:08 -0800 Subject: [PATCH 151/195] KVM: x86: Explicitly configure supported XSS from {svm,vmx}_set_cpu_caps() Explicitly configure KVM's supported XSS as part of each vendor's setup flow to fix a bug where clearing SHSTK and IBT in kvm_cpu_caps, e.g. due to lack of CET XFEATURE support, makes kvm-intel.ko unloadable when nested VMX is enabled, i.e. when nested=1. The late clearing results in nested_vmx_setup_{entry,exit}_ctls() clearing VM_{ENTRY,EXIT}_LOAD_CET_STATE when nested_vmx_setup_ctls_msrs() runs during the CPU compatibility checks, ultimately leading to a mismatched VMCS config due to the reference config having the CET bits set, but every CPU's "local" config having the bits cleared. Note, kvm_caps.supported_{xcr0,xss} are unconditionally initialized by kvm_x86_vendor_init(), before calling into vendor code, and not referenced between ops->hardware_setup() and their current/old location. Fixes: 69cc3e886582 ("KVM: x86: Add XSS support for CET_KERNEL and CET_USER") Cc: stable@vger.kernel.org Cc: Mathias Krause Cc: John Allen Cc: Rick Edgecombe Cc: Chao Gao Cc: Binbin Wu Cc: Xiaoyao Li Reviewed-by: Xiaoyao Li Reviewed-by: Binbin Wu Link: https://patch.msgid.link/20260128014310.3255561-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 2 ++ arch/x86/kvm/vmx/vmx.c | 2 ++ arch/x86/kvm/x86.c | 30 +++++++++++++++++------------- arch/x86/kvm/x86.h | 2 ++ 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 24d59ccfa40d..4394be40fe78 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -5284,6 +5284,8 @@ static __init void svm_set_cpu_caps(void) */ kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT); kvm_cpu_cap_clear(X86_FEATURE_MSR_IMM); + + kvm_setup_xss_caps(); } static __init int svm_hardware_setup(void) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6b96f7aea20b..8c94241fbcca 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -8051,6 +8051,8 @@ static __init void vmx_set_cpu_caps(void) kvm_cpu_cap_clear(X86_FEATURE_SHSTK); kvm_cpu_cap_clear(X86_FEATURE_IBT); } + + kvm_setup_xss_caps(); } static bool vmx_is_io_intercepted(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 63afdb6bb078..72d37c8930ad 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9953,6 +9953,23 @@ static struct notifier_block pvclock_gtod_notifier = { }; #endif +void kvm_setup_xss_caps(void) +{ + if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) + kvm_caps.supported_xss = 0; + + if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) && + !kvm_cpu_cap_has(X86_FEATURE_IBT)) + kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL; + + if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) { + kvm_cpu_cap_clear(X86_FEATURE_SHSTK); + kvm_cpu_cap_clear(X86_FEATURE_IBT); + kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL; + } +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_setup_xss_caps); + static inline void kvm_ops_update(struct kvm_x86_init_ops *ops) { memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops)); @@ -10125,19 +10142,6 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) if (!tdp_enabled) kvm_caps.supported_quirks &= ~KVM_X86_QUIRK_IGNORE_GUEST_PAT; - if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) - kvm_caps.supported_xss = 0; - - if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) && - !kvm_cpu_cap_has(X86_FEATURE_IBT)) - kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL; - - if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) { - kvm_cpu_cap_clear(X86_FEATURE_SHSTK); - kvm_cpu_cap_clear(X86_FEATURE_IBT); - kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL; - } - if (kvm_caps.has_tsc_control) { /* * Make sure the user can only configure tsc_khz values that diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index fdab0ad49098..00de24f55b1f 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -471,6 +471,8 @@ extern struct kvm_host_values kvm_host; extern bool enable_pmu; +void kvm_setup_xss_caps(void); + /* * Get a filtered version of KVM's supported XCR0 that strips out dynamic * features for which the current process doesn't (yet) have permission to use. From 115135422562e2f791e98a6f55ec57b2da3b3a95 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Jan 2026 13:41:00 +0100 Subject: [PATCH 152/195] sched/deadline: Fix 'stuck' dl_server Andrea reported the dl_server getting stuck for him. He tracked it down to a state where dl_server_start() saw dl_defer_running==1, but the dl_server's job is no longer valid at the time of dl_server_start(). In the state diagram this corresponds to [4] D->A (or dl_server_stop() due to no more runnable tasks) followed by [1], which in case of a lapsed deadline must then be A->B. Now our A has dl_defer_running==1, while B demands dl_defer_running==0, therefore it must get cleared when the CBS wakeup rules demand a replenish. Fixes: a110a81c52a9 ("sched/deadline: Deferrable dl server") Reported-by: Andrea Righi arighi@nvidia.com Signed-off-by: Peter Zijlstra (Intel) Acked-by: Juri Lelli Tested-by: Andrea Righi arighi@nvidia.com Link: https://lkml.kernel.org/r/20260123161645.2181752-1-arighi@nvidia.com Link: https://patch.msgid.link/20260130124100.GC1079264@noisy.programming.kicks-ass.net --- kernel/sched/deadline.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index c509f2e7d69d..7bcde7114f1b 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1034,6 +1034,12 @@ static void update_dl_entity(struct sched_dl_entity *dl_se) return; } + /* + * When [4] D->A is followed by [1] A->B, dl_defer_running + * needs to be cleared, otherwise it will fail to properly + * start the zero-laxity timer. + */ + dl_se->dl_defer_running = 0; replenish_dl_new_period(dl_se, rq); } else if (dl_server(dl_se) && dl_se->dl_defer) { /* @@ -1655,6 +1661,12 @@ void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec) * dl_server_active = 1; * enqueue_dl_entity() * update_dl_entity(WAKEUP) + * if (dl_time_before() || dl_entity_overflow()) + * dl_defer_running = 0; + * replenish_dl_new_period(); + * // fwd period + * dl_throttled = 1; + * dl_defer_armed = 1; * if (!dl_defer_running) * dl_defer_armed = 1; * dl_throttled = 1; From 76ed27608f7dd235b727ebbb12163438c2fbb617 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 29 Jan 2026 10:28:21 -0500 Subject: [PATCH 153/195] perf: sched: Fix perf crash with new is_user_task() helper In order to do a user space stacktrace the current task needs to be a user task that has executed in user space. It use to be possible to test if a task is a user task or not by simply checking the task_struct mm field. If it was non NULL, it was a user task and if not it was a kernel task. But things have changed over time, and some kernel tasks now have their own mm field. An idea was made to instead test PF_KTHREAD and two functions were used to wrap this check in case it became more complex to test if a task was a user task or not[1]. But this was rejected and the C code simply checked the PF_KTHREAD directly. It was later found that not all kernel threads set PF_KTHREAD. The io-uring helpers instead set PF_USER_WORKER and this needed to be added as well. But checking the flags is still not enough. There's a very small window when a task exits that it frees its mm field and it is set back to NULL. If perf were to trigger at this moment, the flags test would say its a user space task but when perf would read the mm field it would crash with at NULL pointer dereference. Now there are flags that can be used to test if a task is exiting, but they are set in areas that perf may still want to profile the user space task (to see where it exited). The only real test is to check both the flags and the mm field. Instead of making this modification in every location, create a new is_user_task() helper function that does all the tests needed to know if it is safe to read the user space memory or not. [1] https://lore.kernel.org/all/20250425204120.639530125@goodmis.org/ Fixes: 90942f9fac05 ("perf: Use current->flags & PF_KTHREAD|PF_USER_WORKER instead of current->mm == NULL") Closes: https://lore.kernel.org/all/0d877e6f-41a7-4724-875d-0b0a27b8a545@roeck-us.net/ Reported-by: Guenter Roeck Signed-off-by: Steven Rostedt (Google) Signed-off-by: Peter Zijlstra (Intel) Tested-by: Guenter Roeck Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260129102821.46484722@gandalf.local.home --- include/linux/sched.h | 5 +++++ kernel/events/callchain.c | 2 +- kernel/events/core.c | 6 +++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index da0133524d08..5f00b5ed0f3b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1776,6 +1776,11 @@ static __always_inline bool is_percpu_thread(void) (current->nr_cpus_allowed == 1); } +static __always_inline bool is_user_task(struct task_struct *task) +{ + return task->mm && !(task->flags & (PF_KTHREAD | PF_USER_WORKER)); +} + /* Per-process atomic flags. */ #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 1f6589578703..9d24b6e0c91f 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -246,7 +246,7 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, if (user && !crosstask) { if (!user_mode(regs)) { - if (current->flags & (PF_KTHREAD | PF_USER_WORKER)) + if (!is_user_task(current)) goto exit_put; regs = task_pt_regs(current); } diff --git a/kernel/events/core.c b/kernel/events/core.c index a0fa488bce84..8cca80094624 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7460,7 +7460,7 @@ static void perf_sample_regs_user(struct perf_regs *regs_user, if (user_mode(regs)) { regs_user->abi = perf_reg_abi(current); regs_user->regs = regs; - } else if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER))) { + } else if (is_user_task(current)) { perf_get_regs_user(regs_user, regs); } else { regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; @@ -8100,7 +8100,7 @@ static u64 perf_virt_to_phys(u64 virt) * Try IRQ-safe get_user_page_fast_only first. * If failed, leave phys_addr as 0. */ - if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER))) { + if (is_user_task(current)) { struct page *p; pagefault_disable(); @@ -8215,7 +8215,7 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) { bool kernel = !event->attr.exclude_callchain_kernel; bool user = !event->attr.exclude_callchain_user && - !(current->flags & (PF_KTHREAD | PF_USER_WORKER)); + is_user_task(current); /* Disallow cross-task user callchains. */ bool crosstask = event->ctx->task && event->ctx->task != current; bool defer_user = IS_ENABLED(CONFIG_UNWIND_USER) && user && From 403dd7da22461b0c7fd18d5cd4373b9a1ec8b5f7 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 23 Jan 2026 16:30:56 +1100 Subject: [PATCH 154/195] crypto/ccp: Use PCI bridge defaults for IDE The current number of streams in AMD TSM is 1 which is too little, the core uses 255. Also, even if the module parameter is increased, calling pci_ide_set_nr_streams() second time triggers WARN_ON. Simplify the code by sticking to the PCI core defaults. Fixes: 4be423572da1 ("crypto/ccp: Implement SEV-TIO PCIe IDE (phase1)") Signed-off-by: Alexey Kardashevskiy Acked-by: Tom Lendacky Link: https://patch.msgid.link/20260123053057.1350569-2-aik@amd.com Signed-off-by: Dan Williams --- drivers/crypto/ccp/sev-dev-tsm.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/crypto/ccp/sev-dev-tsm.c b/drivers/crypto/ccp/sev-dev-tsm.c index ea29cd5d0ff9..7407b77c2ef2 100644 --- a/drivers/crypto/ccp/sev-dev-tsm.c +++ b/drivers/crypto/ccp/sev-dev-tsm.c @@ -19,12 +19,6 @@ MODULE_IMPORT_NS("PCI_IDE"); -#define TIO_DEFAULT_NR_IDE_STREAMS 1 - -static uint nr_ide_streams = TIO_DEFAULT_NR_IDE_STREAMS; -module_param_named(ide_nr, nr_ide_streams, uint, 0644); -MODULE_PARM_DESC(ide_nr, "Set the maximum number of IDE streams per PHB"); - #define dev_to_sp(dev) ((struct sp_device *)dev_get_drvdata(dev)) #define dev_to_psp(dev) ((struct psp_device *)(dev_to_sp(dev)->psp_data)) #define dev_to_sev(dev) ((struct sev_device *)(dev_to_psp(dev)->sev_data)) @@ -193,7 +187,6 @@ static void streams_teardown(struct pci_ide **ide) static int stream_alloc(struct pci_dev *pdev, struct pci_ide **ide, unsigned int tc) { - struct pci_dev *rp = pcie_find_root_port(pdev); struct pci_ide *ide1; if (ide[tc]) { @@ -201,11 +194,6 @@ static int stream_alloc(struct pci_dev *pdev, struct pci_ide **ide, return -EBUSY; } - /* FIXME: find a better way */ - if (nr_ide_streams != TIO_DEFAULT_NR_IDE_STREAMS) - pci_notice(pdev, "Enable non-default %d streams", nr_ide_streams); - pci_ide_set_nr_streams(to_pci_host_bridge(rp->bus->bridge), nr_ide_streams); - ide1 = pci_ide_stream_alloc(pdev); if (!ide1) return -EFAULT; From c2012263047689e495e81c96d7d5b0586299578d Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 23 Jan 2026 16:30:57 +1100 Subject: [PATCH 155/195] crypto/ccp: Allow multiple streams on the same root bridge With SEV-TIO the low-level TSM driver is responsible for allocating a Stream ID. The Stream ID needs to be unique within each IDE partner port. Fix the Stream ID selection to reuse the host bridge stream resource id which is a pool of 256 ids per host bridge on AMD platforms. Otherwise, only one device per-host bridge can establish Selective Stream IDE. Fixes: 4be423572da1 ("crypto/ccp: Implement SEV-TIO PCIe IDE (phase1)") Signed-off-by: Alexey Kardashevskiy Acked-by: Tom Lendacky Link: https://patch.msgid.link/20260123053057.1350569-3-aik@amd.com [djbw: clarify end user impact in changelog] Signed-off-by: Dan Williams --- drivers/crypto/ccp/sev-dev-tsm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/crypto/ccp/sev-dev-tsm.c b/drivers/crypto/ccp/sev-dev-tsm.c index 7407b77c2ef2..40d02adaf3f6 100644 --- a/drivers/crypto/ccp/sev-dev-tsm.c +++ b/drivers/crypto/ccp/sev-dev-tsm.c @@ -198,8 +198,7 @@ static int stream_alloc(struct pci_dev *pdev, struct pci_ide **ide, if (!ide1) return -EFAULT; - /* Blindly assign streamid=0 to TC=0, and so on */ - ide1->stream_id = tc; + ide1->stream_id = ide1->host_bridge_stream; ide[tc] = ide1; From adcbadfd8e05d3558c9cfaa783f17c645181165f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Thu, 29 Jan 2026 09:22:27 +0100 Subject: [PATCH 156/195] net: sfp: Fix quirk for Ubiquiti U-Fiber Instant SFP module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit fd580c9830316eda ("net: sfp: augment SFP parsing with phy_interface_t bitmap") did not add augumentation for the interface bitmap in the quirk for Ubiquiti U-Fiber Instant. The subsequent commit f81fa96d8a6c7a77 ("net: phylink: use phy_interface_t bitmaps for optical modules") then changed phylink code for selection of SFP interface: instead of using link mode bitmap, the interface bitmap is used, and the fastest interface mode supported by both SFP module and MAC is chosen. Since the interface bitmap contains also modes faster than 1000base-x, this caused a regression wherein this module stopped working out-of-the-box. Fix this. Fixes: fd580c9830316eda ("net: sfp: augment SFP parsing with phy_interface_t bitmap") Signed-off-by: Marek Behún Reviewed-by: Maxime Chevallier Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260129082227.17443-1-kabel@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 47f095bd91ce..3e023723887c 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -479,6 +479,8 @@ static void sfp_quirk_ubnt_uf_instant(const struct sfp_eeprom_id *id, linkmode_zero(caps->link_modes); linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT, caps->link_modes); + phy_interface_zero(caps->interfaces); + __set_bit(PHY_INTERFACE_MODE_1000BASEX, caps->interfaces); } #define SFP_QUIRK(_v, _p, _s, _f) \ From f8db6475a83649689c087a8f52486fcc53e627e9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Jan 2026 20:43:59 +0000 Subject: [PATCH 157/195] macvlan: fix error recovery in macvlan_common_newlink() valis provided a nice repro to crash the kernel: ip link add p1 type veth peer p2 ip link set address 00:00:00:00:00:20 dev p1 ip link set up dev p1 ip link set up dev p2 ip link add mv0 link p2 type macvlan mode source ip link add invalid% link p2 type macvlan mode source macaddr add 00:00:00:00:00:20 ping -c1 -I p1 1.2.3.4 He also gave a very detailed analysis: The issue is triggered when a new macvlan link is created with MACVLAN_MODE_SOURCE mode and MACVLAN_MACADDR_ADD (or MACVLAN_MACADDR_SET) parameter, lower device already has a macvlan port and register_netdevice() called from macvlan_common_newlink() fails (e.g. because of the invalid link name). In this case macvlan_hash_add_source is called from macvlan_change_sources() / macvlan_common_newlink(): This adds a reference to vlan to the port's vlan_source_hash using macvlan_source_entry. vlan is a pointer to the priv data of the link that is being created. When register_netdevice() fails, the error is returned from macvlan_newlink() to rtnl_newlink_create(): if (ops->newlink) err = ops->newlink(dev, ¶ms, extack); else err = register_netdevice(dev); if (err < 0) { free_netdev(dev); goto out; } and free_netdev() is called, causing a kvfree() on the struct net_device that is still referenced in the source entry attached to the lower device's macvlan port. Now all packets sent on the macvlan port with a matching source mac address will trigger a use-after-free in macvlan_forward_source(). With all that, my fix is to make sure we call macvlan_flush_sources() regardless of @create value whenever "goto destroy_macvlan_port;" path is taken. Many thanks to valis for following up on this issue. Fixes: aa5fd0fb7748 ("driver: macvlan: Destroy new macvlan port if macvlan_common_newlink failed.") Signed-off-by: Eric Dumazet Reported-by: valis Reported-by: syzbot+7182fbe91e58602ec1fe@syzkaller.appspotmail.com Closes: https: //lore.kernel.org/netdev/695fb1e8.050a0220.1c677c.039f.GAE@google.com/T/#u Cc: Boudewijn van der Heide Link: https://patch.msgid.link/20260129204359.632556-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/macvlan.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index b4df7e184791..c509228be84d 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1567,9 +1567,10 @@ destroy_macvlan_port: /* the macvlan port may be freed by macvlan_uninit when fail to register. * so we destroy the macvlan port only when it's valid. */ - if (create && macvlan_port_get_rtnl(lowerdev)) { + if (macvlan_port_get_rtnl(lowerdev)) { macvlan_flush_sources(port, vlan); - macvlan_port_destroy(port->dev); + if (create) + macvlan_port_destroy(port->dev); } return err; } From 6d06bc83a5ae8777a5f7a81c32dd75b8d9b2fe04 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 29 Jan 2026 12:10:30 +0900 Subject: [PATCH 158/195] net: usb: r8152: fix resume reset deadlock rtl8152 can trigger device reset during reset which potentially can result in a deadlock: **** DPM device timeout after 10 seconds; 15 seconds until panic **** Call Trace: schedule+0x483/0x1370 schedule_preempt_disabled+0x15/0x30 __mutex_lock_common+0x1fd/0x470 __rtl8152_set_mac_address+0x80/0x1f0 dev_set_mac_address+0x7f/0x150 rtl8152_post_reset+0x72/0x150 usb_reset_device+0x1d0/0x220 rtl8152_resume+0x99/0xc0 usb_resume_interface+0x3e/0xc0 usb_resume_both+0x104/0x150 usb_resume+0x22/0x110 The problem is that rtl8152 resume calls reset under tp->control mutex while reset basically re-enters rtl8152 and attempts to acquire the same tp->control lock once again. Reset INACCESSIBLE device outside of tp->control mutex scope to avoid recursive mutex_lock() deadlock. Fixes: 4933b066fefb ("r8152: If inaccessible at resume time, issue a reset") Reviewed-by: Douglas Anderson Signed-off-by: Sergey Senozhatsky Link: https://patch.msgid.link/20260129031106.3805887-1-senozhatsky@chromium.org Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index fa5192583860..2f3baa5f6e9c 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -8535,19 +8535,6 @@ static int rtl8152_system_resume(struct r8152 *tp) usb_submit_urb(tp->intr_urb, GFP_NOIO); } - /* If the device is RTL8152_INACCESSIBLE here then we should do a - * reset. This is important because the usb_lock_device_for_reset() - * that happens as a result of usb_queue_reset_device() will silently - * fail if the device was suspended or if too much time passed. - * - * NOTE: The device is locked here so we can directly do the reset. - * We don't need usb_lock_device_for_reset() because that's just a - * wrapper over device_lock() and device_resume() (which calls us) - * does that for us. - */ - if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) - usb_reset_device(tp->udev); - return 0; } @@ -8658,19 +8645,33 @@ static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message) static int rtl8152_resume(struct usb_interface *intf) { struct r8152 *tp = usb_get_intfdata(intf); + bool runtime_resume = test_bit(SELECTIVE_SUSPEND, &tp->flags); int ret; mutex_lock(&tp->control); rtl_reset_ocp_base(tp); - if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) + if (runtime_resume) ret = rtl8152_runtime_resume(tp); else ret = rtl8152_system_resume(tp); mutex_unlock(&tp->control); + /* If the device is RTL8152_INACCESSIBLE here then we should do a + * reset. This is important because the usb_lock_device_for_reset() + * that happens as a result of usb_queue_reset_device() will silently + * fail if the device was suspended or if too much time passed. + * + * NOTE: The device is locked here so we can directly do the reset. + * We don't need usb_lock_device_for_reset() because that's just a + * wrapper over device_lock() and device_resume() (which calls us) + * does that for us. + */ + if (!runtime_resume && test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + usb_reset_device(tp->udev); + return ret; } From 80f1a2c2332fee0edccd006fe87fc8a6db94bab3 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 29 Jan 2026 14:43:41 -0800 Subject: [PATCH 159/195] iommu/tegra241-cmdqv: Reset VCMDQ in tegra241_vcmdq_hw_init_user() The Enable bits in CMDQV/VINTF/VCMDQ_CONFIG registers do not actually reset the HW registers. So, the driver explicitly clears all the registers when a VINTF or VCMDQ is being initialized calling its hw_deinit() function. However, a userspace VCMDQ is not properly reset, unlike an in-kernel VCMDQ getting reset in tegra241_vcmdq_hw_init(). Meanwhile, tegra241_vintf_hw_init() calling tegra241_vintf_hw_deinit() will not deinit any VCMDQ, since there is no userspace VCMDQ mapped to the VINTF at that stage. Then, this may result in dirty VCMDQ registers, which can fail the VM. Like tegra241_vcmdq_hw_init(), reset a VCMDQ in tegra241_vcmdq_hw_init() to fix this bug. This is required by a host kernel. Fixes: 6717f26ab1e7 ("iommu/tegra241-cmdqv: Add user-space use support") Cc: stable@vger.kernel.org Reported-by: Bao Nguyen Signed-off-by: Nicolin Chen Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index 378104cd395e..04cc7a9036e4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -1078,6 +1078,9 @@ static int tegra241_vcmdq_hw_init_user(struct tegra241_vcmdq *vcmdq) { char header[64]; + /* Reset VCMDQ */ + tegra241_vcmdq_hw_deinit(vcmdq); + /* Configure the vcmdq only; User space does the enabling */ writeq_relaxed(vcmdq->cmdq.q.q_base, REG_VCMDQ_PAGE1(vcmdq, BASE)); From 8e24994872361212531a952c93adb01c485148f1 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 30 Jan 2026 14:37:47 -0700 Subject: [PATCH 160/195] kbuild: Do not run kernel-doc when building external modules After commit 778b8ebe5192 ("docs: Move the python libraries to tools/lib/python"), building an external module with any value of W= against the output of install-extmod-build fails with: $ make -C /usr/lib/modules/6.19.0-rc7-00108-g4d310797262f/build M=$PWD W=1 make: Entering directory '/usr/lib/modules/6.19.0-rc7-00108-g4d310797262f/build' make[1]: Entering directory '...' CC [M] ... Traceback (most recent call last): File "/usr/lib/modules/6.19.0-rc7-00108-g4d310797262f/build/scripts/kernel-doc.py", line 339, in main() ~~~~^^ File "/usr/lib/modules/6.19.0-rc7-00108-g4d310797262f/build/scripts/kernel-doc.py", line 295, in main from kdoc.kdoc_files import KernelFiles # pylint: disable=C0415 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ModuleNotFoundError: No module named 'kdoc' scripts/lib was included in the build directory from find_in_scripts but after the move to tools/lib/python, it is no longer included, breaking kernel-doc.py. Commit eba6ffd126cd ("docs: kdoc: move kernel-doc to tools/docs") breaks this even further by moving kernel-doc outside of scripts as well, so it cannot be found when called by cmd_checkdoc. $ make -C /usr/lib/modules/6.19.0-rc7-next-20260130/build M=$PWD W=1 make: Entering directory '/usr/lib/modules/6.19.0-rc7-next-20260130/build' make[1]: Entering directory '...' CC [M] ... python3: can't open file '/usr/lib/modules/6.19.0-rc7-next-20260130/build/tools/docs/kernel-doc': [Errno 2] No such file or directory While kernel-doc could be useful for external modules, it is more useful for in-tree documentation that will be build and included in htmldocs. Rather than including it in install-extmod-build, just skip running kernel-doc for the external module build. Cc: stable@vger.kernel.org Fixes: 778b8ebe5192 ("docs: Move the python libraries to tools/lib/python") Reported-by: Rong Zhang Closes: https://lore.kernel.org/20260129175321.415295-1-i@rong.moe/ Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Nathan Chancellor Reviewed-by: Randy Dunlap Link: https://patch.msgid.link/20260130-kbuild-skip-kernel-doc-extmod-v1-1-58443d60131a@kernel.org Reviewed-by: Nicolas Schier Signed-off-by: Nicolas Schier --- scripts/Makefile.build | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 5037f4715d74..f01d7957edf7 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -166,11 +166,13 @@ else ifeq ($(KBUILD_CHECKSRC),2) cmd_force_checksrc = $(CHECK) $(CHECKFLAGS) $(c_flags) $< endif +ifeq ($(KBUILD_EXTMOD),) ifneq ($(KBUILD_EXTRA_WARN),) cmd_checkdoc = PYTHONDONTWRITEBYTECODE=1 $(PYTHON3) $(KERNELDOC) -none $(KDOCFLAGS) \ $(if $(findstring 2, $(KBUILD_EXTRA_WARN)), -Wall) \ $< endif +endif # Compile C sources (.c) # --------------------------------------------------------------------------- From fdf3f6800be36377e045e2448087f12132b88d2f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 29 Jan 2026 19:38:27 -0800 Subject: [PATCH 161/195] net: don't touch dev->stats in BPF redirect paths Gal reports that BPF redirect increments dev->stats.tx_errors on failure. This is not correct, most modern drivers completely ignore dev->stats so these drops will be invisible to the user. Core code should use the dedicated core stats which are folded into device stats in dev_get_stats(). Note that we're switching from tx_errors to tx_dropped. Core only has tx_dropped, hence presumably users already expect that counter to increment for "stack" Tx issues. Reported-by: Gal Pressman Link: https://lore.kernel.org/c5df3b60-246a-4030-9c9a-0a35cd1ca924@nvidia.com Fixes: b4ab31414970 ("bpf: Add redirect_neigh helper as redirect drop-in") Acked-by: Martin KaFai Lau Acked-by: Daniel Borkmann Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260130033827.698841-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/filter.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index bcd73d9bd764..029e560e32ce 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2289,12 +2289,12 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev, err = bpf_out_neigh_v6(net, skb, dev, nh); if (unlikely(net_xmit_eval(err))) - DEV_STATS_INC(dev, tx_errors); + dev_core_stats_tx_dropped_inc(dev); else ret = NET_XMIT_SUCCESS; goto out_xmit; out_drop: - DEV_STATS_INC(dev, tx_errors); + dev_core_stats_tx_dropped_inc(dev); kfree_skb(skb); out_xmit: return ret; @@ -2396,12 +2396,12 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev, err = bpf_out_neigh_v4(net, skb, dev, nh); if (unlikely(net_xmit_eval(err))) - DEV_STATS_INC(dev, tx_errors); + dev_core_stats_tx_dropped_inc(dev); else ret = NET_XMIT_SUCCESS; goto out_xmit; out_drop: - DEV_STATS_INC(dev, tx_errors); + dev_core_stats_tx_dropped_inc(dev); kfree_skb(skb); out_xmit: return ret; From 18f7fcd5e69a04df57b563360b88be72471d6b62 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 1 Feb 2026 14:01:13 -0800 Subject: [PATCH 162/195] Linux 6.19-rc8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 13033e89b051..bde507d5c03d 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 19 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Baby Opossum Posse # *DOCUMENTATION* From 43151f812886be1855d2cba059f9c93e4729460b Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Mon, 2 Feb 2026 12:27:16 +0000 Subject: [PATCH 163/195] cgroup/dmem: fix NULL pointer dereference when setting max An issue was triggered: BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP NOPTI CPU: 15 UID: 0 PID: 658 Comm: bash Tainted: 6.19.0-rc6-next-2026012 Tainted: [O]=OOT_MODULE Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), RIP: 0010:strcmp+0x10/0x30 RSP: 0018:ffffc900017f7dc0 EFLAGS: 00000246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff888107cd4358 RDX: 0000000019f73907 RSI: ffffffff82cc381a RDI: 0000000000000000 RBP: ffff8881016bef0d R08: 000000006c0e7145 R09: 0000000056c0e714 R10: 0000000000000001 R11: ffff888107cd4358 R12: 0007ffffffffffff R13: ffff888101399200 R14: ffff888100fcb360 R15: 0007ffffffffffff CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000105c79000 CR4: 00000000000006f0 Call Trace: dmemcg_limit_write.constprop.0+0x16d/0x390 ? __pfx_set_resource_max+0x10/0x10 kernfs_fop_write_iter+0x14e/0x200 vfs_write+0x367/0x510 ksys_write+0x66/0xe0 do_syscall_64+0x6b/0x390 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f42697e1887 It was trriggered setting max without limitation, the command is like: "echo test/region0 > dmem.max". To fix this issue, add check whether options is valid after parsing the region_name. Fixes: b168ed458dde ("kernel/cgroup: Add "dmem" memory accounting cgroup") Cc: stable@vger.kernel.org # v6.14+ Signed-off-by: Chen Ridong Signed-off-by: Tejun Heo --- kernel/cgroup/dmem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/cgroup/dmem.c b/kernel/cgroup/dmem.c index e12b946278b6..1f0d6caaf2fb 100644 --- a/kernel/cgroup/dmem.c +++ b/kernel/cgroup/dmem.c @@ -700,6 +700,9 @@ static ssize_t dmemcg_limit_write(struct kernfs_open_file *of, if (!region_name[0]) continue; + if (!options || !*options) + return -EINVAL; + rcu_read_lock(); region = dmemcg_get_region_by_name(region_name); rcu_read_unlock(); From 592a68212c5664bcaa88f24ed80bf791282790fe Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Mon, 2 Feb 2026 12:27:17 +0000 Subject: [PATCH 164/195] cgroup/dmem: avoid rcu warning when unregister region A warnning was detected: WARNING: suspicious RCU usage 6.19.0-rc7-next-20260129+ #1101 Tainted: G O kernel/cgroup/dmem.c:456 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by insmod/532: #0: ffffffff85e78b38 (dmemcg_lock){+.+.}-dmem_cgroup_unregister_region+ stack backtrace: CPU: 2 UID: 0 PID: 532 Comm: insmod Tainted: 6.19.0-rc7-next- Tainted: [O]=OOT_MODULE Call Trace: dump_stack_lvl+0xb0/0xd0 lockdep_rcu_suspicious+0x151/0x1c0 dmem_cgroup_unregister_region+0x1e2/0x380 ? __pfx_dmem_test_init+0x10/0x10 [dmem_uaf] dmem_test_init+0x65/0xff0 [dmem_uaf] do_one_initcall+0xbb/0x3a0 The macro list_for_each_rcu() must be used within an RCU read-side critical section (between rcu_read_lock() and rcu_read_unlock()). Using it outside that context, as seen in dmem_cgroup_unregister_region(), triggers the lockdep warning because the RCU protection is not guaranteed. Replace list_for_each_rcu() with list_for_each_entry_safe(), which is appropriate for traversal under spinlock protection where nodes may be deleted. Fixes: b168ed458dde ("kernel/cgroup: Add "dmem" memory accounting cgroup") Cc: stable@vger.kernel.org # v6.14+ Signed-off-by: Chen Ridong Signed-off-by: Tejun Heo --- kernel/cgroup/dmem.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/kernel/cgroup/dmem.c b/kernel/cgroup/dmem.c index 1f0d6caaf2fb..787b334e0f5d 100644 --- a/kernel/cgroup/dmem.c +++ b/kernel/cgroup/dmem.c @@ -423,7 +423,7 @@ static void dmemcg_free_region(struct kref *ref) */ void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region) { - struct list_head *entry; + struct dmem_cgroup_pool_state *pool, *next; if (!region) return; @@ -433,10 +433,7 @@ void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region) /* Remove from global region list */ list_del_rcu(®ion->region_node); - list_for_each_rcu(entry, ®ion->pools) { - struct dmem_cgroup_pool_state *pool = - container_of(entry, typeof(*pool), region_node); - + list_for_each_entry_safe(pool, next, ®ion->pools, region_node) { list_del_rcu(&pool->css_node); } From 99a2ef500906138ba58093b9893972a5c303c734 Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Mon, 2 Feb 2026 12:27:18 +0000 Subject: [PATCH 165/195] cgroup/dmem: avoid pool UAF An UAF issue was observed: BUG: KASAN: slab-use-after-free in page_counter_uncharge+0x65/0x150 Write of size 8 at addr ffff888106715440 by task insmod/527 CPU: 4 UID: 0 PID: 527 Comm: insmod 6.19.0-rc7-next-20260129+ #11 Tainted: [O]=OOT_MODULE Call Trace: dump_stack_lvl+0x82/0xd0 kasan_report+0xca/0x100 kasan_check_range+0x39/0x1c0 page_counter_uncharge+0x65/0x150 dmem_cgroup_uncharge+0x1f/0x260 Allocated by task 527: Freed by task 0: The buggy address belongs to the object at ffff888106715400 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 64 bytes inside of freed 512-byte region [ffff888106715400, ffff888106715600) The buggy address belongs to the physical page: Memory state around the buggy address: ffff888106715300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff888106715380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff888106715400: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888106715480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888106715500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb The issue occurs because a pool can still be held by a caller after its associated memory region is unregistered. The current implementation frees the pool even if users still hold references to it (e.g., before uncharge operations complete). This patch adds a reference counter to each pool, ensuring that a pool is only freed when its reference count drops to zero. Fixes: b168ed458dde ("kernel/cgroup: Add "dmem" memory accounting cgroup") Cc: stable@vger.kernel.org # v6.14+ Signed-off-by: Chen Ridong Signed-off-by: Tejun Heo --- kernel/cgroup/dmem.c | 60 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/dmem.c b/kernel/cgroup/dmem.c index 787b334e0f5d..1ea6afffa985 100644 --- a/kernel/cgroup/dmem.c +++ b/kernel/cgroup/dmem.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -71,7 +72,9 @@ struct dmem_cgroup_pool_state { struct rcu_head rcu; struct page_counter cnt; + struct dmem_cgroup_pool_state *parent; + refcount_t ref; bool inited; }; @@ -88,6 +91,9 @@ struct dmem_cgroup_pool_state { static DEFINE_SPINLOCK(dmemcg_lock); static LIST_HEAD(dmem_cgroup_regions); +static void dmemcg_free_region(struct kref *ref); +static void dmemcg_pool_free_rcu(struct rcu_head *rcu); + static inline struct dmemcg_state * css_to_dmemcs(struct cgroup_subsys_state *css) { @@ -104,10 +110,38 @@ static struct dmemcg_state *parent_dmemcs(struct dmemcg_state *cg) return cg->css.parent ? css_to_dmemcs(cg->css.parent) : NULL; } +static void dmemcg_pool_get(struct dmem_cgroup_pool_state *pool) +{ + refcount_inc(&pool->ref); +} + +static bool dmemcg_pool_tryget(struct dmem_cgroup_pool_state *pool) +{ + return refcount_inc_not_zero(&pool->ref); +} + +static void dmemcg_pool_put(struct dmem_cgroup_pool_state *pool) +{ + if (!refcount_dec_and_test(&pool->ref)) + return; + + call_rcu(&pool->rcu, dmemcg_pool_free_rcu); +} + +static void dmemcg_pool_free_rcu(struct rcu_head *rcu) +{ + struct dmem_cgroup_pool_state *pool = container_of(rcu, typeof(*pool), rcu); + + if (pool->parent) + dmemcg_pool_put(pool->parent); + kref_put(&pool->region->ref, dmemcg_free_region); + kfree(pool); +} + static void free_cg_pool(struct dmem_cgroup_pool_state *pool) { list_del(&pool->region_node); - kfree(pool); + dmemcg_pool_put(pool); } static void @@ -342,6 +376,12 @@ alloc_pool_single(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region page_counter_init(&pool->cnt, ppool ? &ppool->cnt : NULL, true); reset_all_resource_limits(pool); + refcount_set(&pool->ref, 1); + kref_get(®ion->ref); + if (ppool && !pool->parent) { + pool->parent = ppool; + dmemcg_pool_get(ppool); + } list_add_tail_rcu(&pool->css_node, &dmemcs->pools); list_add_tail(&pool->region_node, ®ion->pools); @@ -389,6 +429,10 @@ get_cg_pool_locked(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *regio /* Fix up parent links, mark as inited. */ pool->cnt.parent = &ppool->cnt; + if (ppool && !pool->parent) { + pool->parent = ppool; + dmemcg_pool_get(ppool); + } pool->inited = true; pool = ppool; @@ -435,6 +479,8 @@ void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region) list_for_each_entry_safe(pool, next, ®ion->pools, region_node) { list_del_rcu(&pool->css_node); + list_del(&pool->region_node); + dmemcg_pool_put(pool); } /* @@ -515,8 +561,10 @@ static struct dmem_cgroup_region *dmemcg_get_region_by_name(const char *name) */ void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state *pool) { - if (pool) + if (pool) { css_put(&pool->cs->css); + dmemcg_pool_put(pool); + } } EXPORT_SYMBOL_GPL(dmem_cgroup_pool_state_put); @@ -530,6 +578,8 @@ get_cg_pool_unlocked(struct dmemcg_state *cg, struct dmem_cgroup_region *region) pool = find_cg_pool_locked(cg, region); if (pool && !READ_ONCE(pool->inited)) pool = NULL; + if (pool && !dmemcg_pool_tryget(pool)) + pool = NULL; rcu_read_unlock(); while (!pool) { @@ -538,6 +588,8 @@ get_cg_pool_unlocked(struct dmemcg_state *cg, struct dmem_cgroup_region *region) pool = get_cg_pool_locked(cg, region, &allocpool); else pool = ERR_PTR(-ENODEV); + if (!IS_ERR(pool)) + dmemcg_pool_get(pool); spin_unlock(&dmemcg_lock); if (pool == ERR_PTR(-ENOMEM)) { @@ -573,6 +625,7 @@ void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size) page_counter_uncharge(&pool->cnt, size); css_put(&pool->cs->css); + dmemcg_pool_put(pool); } EXPORT_SYMBOL_GPL(dmem_cgroup_uncharge); @@ -624,7 +677,9 @@ int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size, if (ret_limit_pool) { *ret_limit_pool = container_of(fail, struct dmem_cgroup_pool_state, cnt); css_get(&(*ret_limit_pool)->cs->css); + dmemcg_pool_get(*ret_limit_pool); } + dmemcg_pool_put(pool); ret = -EAGAIN; goto err; } @@ -719,6 +774,7 @@ static ssize_t dmemcg_limit_write(struct kernfs_open_file *of, /* And commit */ apply(pool, new_limit); + dmemcg_pool_put(pool); out_put: kref_put(®ion->ref, dmemcg_free_region); From e3a43633023e3cacaca60d4b8972d084a2b06236 Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Mon, 2 Feb 2026 08:24:07 +0000 Subject: [PATCH 166/195] smb/client: fix memory leak in smb2_open_file() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reproducer: 1. server: directories are exported read-only 2. client: mount -t cifs //${server_ip}/export /mnt 3. client: dd if=/dev/zero of=/mnt/file bs=512 count=1000 oflag=direct 4. client: umount /mnt 5. client: sleep 1 6. client: modprobe -r cifs The error message is as follows: ============================================================================= BUG cifs_small_rq (Not tainted): Objects remaining on __kmem_cache_shutdown() ----------------------------------------------------------------------------- Object 0x00000000d47521be @offset=14336 ... WARNING: mm/slub.c:1251 at __kmem_cache_shutdown+0x34e/0x440, CPU#0: modprobe/1577 ... Call Trace: kmem_cache_destroy+0x94/0x190 cifs_destroy_request_bufs+0x3e/0x50 [cifs] cleanup_module+0x4e/0x540 [cifs] __se_sys_delete_module+0x278/0x400 __x64_sys_delete_module+0x5f/0x70 x64_sys_call+0x2299/0x2ff0 do_syscall_64+0x89/0x350 entry_SYSCALL_64_after_hwframe+0x76/0x7e ... kmem_cache_destroy cifs_small_rq: Slab cache still has objects when called from cifs_destroy_request_bufs+0x3e/0x50 [cifs] WARNING: mm/slab_common.c:532 at kmem_cache_destroy+0x16b/0x190, CPU#0: modprobe/1577 Link: https://lore.kernel.org/linux-cifs/9751f02d-d1df-4265-a7d6-b19761b21834@linux.dev/T/#mf14808c144448b715f711ce5f0477a071f08eaf6 Fixes: e255612b5ed9 ("cifs: Add fallback for SMB2 CREATE without FILE_READ_ATTRIBUTES") Reported-by: Paulo Alcantara Reviewed-by: Paulo Alcantara (Red Hat) Signed-off-by: ChenXiaoSong Reviewed-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/smb2file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/smb2file.c b/fs/smb/client/smb2file.c index 7f11ae6bb785..2dd08388ea87 100644 --- a/fs/smb/client/smb2file.c +++ b/fs/smb/client/smb2file.c @@ -178,6 +178,7 @@ int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, rc = SMB2_open(xid, oparms, smb2_path, &smb2_oplock, smb2_data, NULL, &err_iov, &err_buftype); if (rc == -EACCES && retry_without_read_attributes) { + free_rsp_buf(err_buftype, err_iov.iov_base); oparms->desired_access &= ~FILE_READ_ATTRIBUTES; rc = SMB2_open(xid, oparms, smb2_path, &smb2_oplock, smb2_data, NULL, &err_iov, &err_buftype); From 67b3da8d3051fba7e1523b3afce4f71c658f15f8 Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Mon, 2 Feb 2026 09:49:06 +0000 Subject: [PATCH 167/195] smb/client: fix memory leak in SendReceive() Reproducer: 1. server: supports SMB1, directories are exported read-only 2. client: mount -t cifs -o vers=1.0 //${server_ip}/export /mnt 3. client: dd if=/dev/zero of=/mnt/file bs=512 count=1000 oflag=direct 4. client: umount /mnt 5. client: sleep 1 6. client: modprobe -r cifs The error message is as follows: ============================================================================= BUG cifs_small_rq (Not tainted): Objects remaining on __kmem_cache_shutdown() ----------------------------------------------------------------------------- Object 0x00000000d34491e6 @offset=896 Object 0x00000000bde9fab3 @offset=4480 Object 0x00000000104a1f70 @offset=6272 Object 0x0000000092a51bb5 @offset=7616 Object 0x000000006714a7db @offset=13440 ... WARNING: mm/slub.c:1251 at __kmem_cache_shutdown+0x379/0x3f0, CPU#7: modprobe/712 ... Call Trace: kmem_cache_destroy+0x69/0x160 cifs_destroy_request_bufs+0x39/0x40 [cifs] cleanup_module+0x43/0xfc0 [cifs] __se_sys_delete_module+0x1d5/0x300 __x64_sys_delete_module+0x1a/0x30 x64_sys_call+0x2299/0x2ff0 do_syscall_64+0x6e/0x270 entry_SYSCALL_64_after_hwframe+0x76/0x7e ... kmem_cache_destroy cifs_small_rq: Slab cache still has objects when called from cifs_destroy_request_bufs+0x39/0x40 [cifs] WARNING: mm/slab_common.c:532 at kmem_cache_destroy+0x142/0x160, CPU#7: modprobe/712 Link: https://lore.kernel.org/linux-cifs/9751f02d-d1df-4265-a7d6-b19761b21834@linux.dev/T/#mf14808c144448b715f711ce5f0477a071f08eaf6 Fixes: 6be09580df5c ("cifs: Make smb1's SendReceive() wrap cifs_send_recv()") Reported-by: Paulo Alcantara Reviewed-by: Paulo Alcantara (Red Hat) Reviewed-by: David Howells Signed-off-by: ChenXiaoSong Signed-off-by: Steve French --- fs/smb/client/cifstransport.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/cifstransport.c b/fs/smb/client/cifstransport.c index 28d1cee90625..98287132626e 100644 --- a/fs/smb/client/cifstransport.c +++ b/fs/smb/client/cifstransport.c @@ -251,13 +251,15 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, rc = cifs_send_recv(xid, ses, ses->server, &rqst, &resp_buf_type, flags, &resp_iov); if (rc < 0) - return rc; + goto out; if (out_buf) { *pbytes_returned = resp_iov.iov_len; if (resp_iov.iov_len) memcpy(out_buf, resp_iov.iov_base, resp_iov.iov_len); } + +out: free_rsp_buf(resp_buf_type, resp_iov.iov_base); return rc; } From 83b67cc9be9223183caf91826d9c194d7fb128fa Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Sun, 1 Feb 2026 21:59:10 +0800 Subject: [PATCH 168/195] linkwatch: use __dev_put() in callers to prevent UAF After linkwatch_do_dev() calls __dev_put() to release the linkwatch reference, the device refcount may drop to 1. At this point, netdev_run_todo() can proceed (since linkwatch_sync_dev() sees an empty list and returns without blocking), wait for the refcount to become 1 via netdev_wait_allrefs_any(), and then free the device via kobject_put(). This creates a use-after-free when __linkwatch_run_queue() tries to call netdev_unlock_ops() on the already-freed device. Note that adding netdev_lock_ops()/netdev_unlock_ops() pair in netdev_run_todo() before kobject_put() would not work, because netdev_lock_ops() is conditional - it only locks when netdev_need_ops_lock() returns true. If the device doesn't require ops_lock, linkwatch won't hold any lock, and netdev_run_todo() acquiring the lock won't provide synchronization. Fix this by moving __dev_put() from linkwatch_do_dev() to its callers. The device reference logically pairs with de-listing the device, so it's reasonable for the caller that did the de-listing to release it. This allows placing __dev_put() after all device accesses are complete, preventing UAF. The bug can be reproduced by adding mdelay(2000) after linkwatch_do_dev() in __linkwatch_run_queue(), then running: ip tuntap add mode tun name tun_test ip link set tun_test up ip link set tun_test carrier off ip link set tun_test carrier on sleep 0.5 ip tuntap del mode tun name tun_test KASAN report: ================================================================== BUG: KASAN: use-after-free in netdev_need_ops_lock include/net/netdev_lock.h:33 [inline] BUG: KASAN: use-after-free in netdev_unlock_ops include/net/netdev_lock.h:47 [inline] BUG: KASAN: use-after-free in __linkwatch_run_queue+0x865/0x8a0 net/core/link_watch.c:245 Read of size 8 at addr ffff88804de5c008 by task kworker/u32:10/8123 CPU: 0 UID: 0 PID: 8123 Comm: kworker/u32:10 Not tainted syzkaller #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 Workqueue: events_unbound linkwatch_event Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x100/0x190 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0x156/0x4c9 mm/kasan/report.c:482 kasan_report+0xdf/0x1a0 mm/kasan/report.c:595 netdev_need_ops_lock include/net/netdev_lock.h:33 [inline] netdev_unlock_ops include/net/netdev_lock.h:47 [inline] __linkwatch_run_queue+0x865/0x8a0 net/core/link_watch.c:245 linkwatch_event+0x8f/0xc0 net/core/link_watch.c:304 process_one_work+0x9c2/0x1840 kernel/workqueue.c:3257 process_scheduled_works kernel/workqueue.c:3340 [inline] worker_thread+0x5da/0xe40 kernel/workqueue.c:3421 kthread+0x3b3/0x730 kernel/kthread.c:463 ret_from_fork+0x754/0xaf0 arch/x86/kernel/process.c:158 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246 ================================================================== Fixes: 04efcee6ef8d ("net: hold instance lock during NETDEV_CHANGE") Reported-by: syzbot+1ec2f6a450f0b54af8c8@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/6824d064.a70a0220.3e9d8.001a.GAE@google.com/T/ Signed-off-by: Jiayuan Chen Signed-off-by: Jiayuan Chen Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260201135915.393451-1-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- net/core/link_watch.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 212cde35affa..25c455c10a01 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -185,10 +185,6 @@ static void linkwatch_do_dev(struct net_device *dev) netif_state_change(dev); } - /* Note: our callers are responsible for calling netdev_tracker_free(). - * This is the reason we use __dev_put() instead of dev_put(). - */ - __dev_put(dev); } static void __linkwatch_run_queue(int urgent_only) @@ -243,6 +239,11 @@ static void __linkwatch_run_queue(int urgent_only) netdev_lock_ops(dev); linkwatch_do_dev(dev); netdev_unlock_ops(dev); + /* Use __dev_put() because netdev_tracker_free() was already + * called above. Must be after netdev_unlock_ops() to prevent + * netdev_run_todo() from freeing the device while still in use. + */ + __dev_put(dev); do_dev--; spin_lock_irq(&lweventlist_lock); } @@ -278,8 +279,13 @@ void __linkwatch_sync_dev(struct net_device *dev) { netdev_ops_assert_locked(dev); - if (linkwatch_clean_dev(dev)) + if (linkwatch_clean_dev(dev)) { linkwatch_do_dev(dev); + /* Use __dev_put() because netdev_tracker_free() was already + * called inside linkwatch_clean_dev(). + */ + __dev_put(dev); + } } void linkwatch_sync_dev(struct net_device *dev) @@ -288,6 +294,10 @@ void linkwatch_sync_dev(struct net_device *dev) netdev_lock_ops(dev); linkwatch_do_dev(dev); netdev_unlock_ops(dev); + /* Use __dev_put() because netdev_tracker_free() was already + * called inside linkwatch_clean_dev(). + */ + __dev_put(dev); } } From 1c172febdf065375359b2b95156e476bfee30b60 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 30 Jan 2026 11:03:11 -0800 Subject: [PATCH 169/195] net: rss: fix reporting RXH_XFRM_NO_CHANGE as input_xfrm for contexts Initializing input_xfrm to RXH_XFRM_NO_CHANGE in RSS contexts is problematic. I think I did this to make it clear that the context does not have its own settings applied. But unlike ETH_RSS_HASH_NO_CHANGE which is zero, RXH_XFRM_NO_CHANGE is 0xff. We need to be careful when reading the value back, and remember to treat 0xff as 0. Remove the initialization and switch to storing 0. This lets us also remove the workaround in ethnl_rss_set(). Get side does not need any adjustments and context get no longer reports: RSS input transformation: symmetric-xor: on symmetric-or-xor: on Unknown bits in RSS input transformation: 0xfc for NICs which don't support input_xfrm. Remove the init of hfunc to ETH_RSS_HASH_NO_CHANGE while at it. As already mentioned this is a noop since ETH_RSS_HASH_NO_CHANGE is 0 and struct is zalloc'd. But as this fix exemplifies storing NO_CHANGE as state is fragile. This issue is implicitly caught by running our selftests because YNL in selftests errors out on unknown bits. Fixes: d3e2c7bab124 ("ethtool: rss: support setting input-xfrm via Netlink") Link: https://patch.msgid.link/20260130190311.811129-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/common.c | 3 --- net/ethtool/rss.c | 9 ++------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 369c05cf8163..d47a279eb8b9 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -862,9 +862,6 @@ ethtool_rxfh_ctx_alloc(const struct ethtool_ops *ops, ctx->key_off = key_off; ctx->priv_size = ops->rxfh_priv_size; - ctx->hfunc = ETH_RSS_HASH_NO_CHANGE; - ctx->input_xfrm = RXH_XFRM_NO_CHANGE; - return ctx; } diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 4dced53be4b3..da5934cceb07 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -824,8 +824,8 @@ rss_set_ctx_update(struct ethtool_rxfh_context *ctx, struct nlattr **tb, static int ethnl_rss_set(struct ethnl_req_info *req_info, struct genl_info *info) { - bool indir_reset = false, indir_mod, xfrm_sym = false; struct rss_req_info *request = RSS_REQINFO(req_info); + bool indir_reset = false, indir_mod, xfrm_sym; struct ethtool_rxfh_context *ctx = NULL; struct net_device *dev = req_info->dev; bool mod = false, fields_mod = false; @@ -860,12 +860,7 @@ ethnl_rss_set(struct ethnl_req_info *req_info, struct genl_info *info) rxfh.input_xfrm = data.input_xfrm; ethnl_update_u8(&rxfh.input_xfrm, tb[ETHTOOL_A_RSS_INPUT_XFRM], &mod); - /* For drivers which don't support input_xfrm it will be set to 0xff - * in the RSS context info. In all other case input_xfrm != 0 means - * symmetric hashing is requested. - */ - if (!request->rss_context || ops->rxfh_per_ctx_key) - xfrm_sym = rxfh.input_xfrm || data.input_xfrm; + xfrm_sym = rxfh.input_xfrm || data.input_xfrm; if (rxfh.input_xfrm == data.input_xfrm) rxfh.input_xfrm = RXH_XFRM_NO_CHANGE; From dbbec8c5a79f4c7aa8d07da8c0b5a34d76c50699 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 30 Jan 2026 20:04:57 +0000 Subject: [PATCH 170/195] net: stmmac: fix stm32 (and potentially others) resume regression Marek reported that suspending stm32 causes the following errors when the interface is administratively down: $ echo devices > /sys/power/pm_test $ echo mem > /sys/power/state ... ck_ker_eth2stp already disabled ... ck_ker_eth2stp already unprepared ... On suspend, stm32 starts the eth2stp clock in its suspend method, and stops it in the resume method. This is because the blamed commit omits the call to the platform glue ->suspend() method, but does make the call to the platform glue ->resume() method. This problem affects all other converted drivers as well - e.g. looking at the PCIe drivers, pci_save_state() will not be called, but pci_restore_state() will be. Similar issues affect all other drivers. Fix this by always calling the ->suspend() method, even when the network interface is down. This fixes all the conversions to the platform glue ->suspend() and ->resume() methods. Link: https://lore.kernel.org/r/20260114081809.12758-1-marex@nabladev.com Fixes: 07bbbfe7addf ("net: stmmac: add suspend()/resume() platform ops") Reported-by: Marek Vasut Tested-by: Marek Vasut Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vlujh-00000007Hkw-2p6r@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3f42843cd9ed..a379221b96a3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -8042,7 +8042,7 @@ int stmmac_suspend(struct device *dev) u32 chan; if (!ndev || !netif_running(ndev)) - return 0; + goto suspend_bsp; mutex_lock(&priv->lock); @@ -8082,6 +8082,7 @@ int stmmac_suspend(struct device *dev) if (stmmac_fpe_supported(priv)) ethtool_mmsv_stop(&priv->fpe_cfg.mmsv); +suspend_bsp: if (priv->plat->suspend) return priv->plat->suspend(dev, priv->plat->bsp_priv); From 74d9391e8849e70ded5309222d09b0ed0edbd039 Mon Sep 17 00:00:00 2001 From: Daniel Hodges Date: Sat, 31 Jan 2026 10:01:14 -0800 Subject: [PATCH 171/195] tipc: use kfree_sensitive() for session key material The rx->skey field contains a struct tipc_aead_key with GCM-AES encryption keys used for TIPC cluster communication. Using plain kfree() leaves this sensitive key material in freed memory pages where it could potentially be recovered. Switch to kfree_sensitive() to ensure the key material is zeroed before the memory is freed. Fixes: 1ef6f7c9390f ("tipc: add automatic session key exchange") Signed-off-by: Daniel Hodges Link: https://patch.msgid.link/20260131180114.2121438-1-hodgesd@meta.com Signed-off-by: Jakub Kicinski --- net/tipc/crypto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index 751904f10aab..970db62bd029 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -1219,7 +1219,7 @@ void tipc_crypto_key_flush(struct tipc_crypto *c) rx = c; tx = tipc_net(rx->net)->crypto_tx; if (cancel_delayed_work(&rx->work)) { - kfree(rx->skey); + kfree_sensitive(rx->skey); rx->skey = NULL; atomic_xchg(&rx->key_distr, 0); tipc_node_put(rx->node); @@ -2394,7 +2394,7 @@ static void tipc_crypto_work_rx(struct work_struct *work) break; default: synchronize_rcu(); - kfree(rx->skey); + kfree_sensitive(rx->skey); rx->skey = NULL; break; } From a69c17230cab07bd156f894fdc82bd78b43ea72f Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Fri, 30 Jan 2026 16:10:32 +0200 Subject: [PATCH 172/195] net: enetc: Remove SI/BDR cacheability AXI settings for ENETC v4 For ENETC v4 these settings are controlled by the global ENETC message and buffer cache attribute registers (EnBCAR and EnMCAR), from the IERB register block. The hardcoded cacheability settings were inherited from LS1028A, and should be removed from the ENETC v4 driver as they conflict with the global IERB settings. Fixes: 99100d0d9922 ("net: enetc: add preliminary support for i.MX95 ENETC PF") Signed-off-by: Claudiu Manoil Reviewed-by: Wei Fang Link: https://patch.msgid.link/20260130141035.272471-2-claudiu.manoil@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 53b26cece16a..e380a4f39855 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -2512,10 +2512,13 @@ int enetc_configure_si(struct enetc_ndev_priv *priv) struct enetc_hw *hw = &si->hw; int err; - /* set SI cache attributes */ - enetc_wr(hw, ENETC_SICAR0, - ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT); - enetc_wr(hw, ENETC_SICAR1, ENETC_SICAR_MSI); + if (is_enetc_rev1(si)) { + /* set SI cache attributes */ + enetc_wr(hw, ENETC_SICAR0, + ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT); + enetc_wr(hw, ENETC_SICAR1, ENETC_SICAR_MSI); + } + /* enable SI */ enetc_wr(hw, ENETC_SIMR, ENETC_SIMR_EN); From 9ae13b2e64fcd2ca00a76b7d60fc4641a6b9209d Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Fri, 30 Jan 2026 16:10:33 +0200 Subject: [PATCH 173/195] net: enetc: Remove CBDR cacheability AXI settings for ENETC v4 For ENETC v4 these settings are controlled by the global ENETC command cache attribute registers (EnCAR), from the IERB register block. The hardcoded CDBR cacheability settings were inherited from LS1028A, and should be removed from the ENETC v4 driver as they conflict with the global IERB settings. Fixes: e3f4a0a8ddb4 ("net: enetc: add command BD ring support for i.MX95 ENETC") Signed-off-by: Claudiu Manoil Reviewed-by: Wei Fang Link: https://patch.msgid.link/20260130141035.272471-3-claudiu.manoil@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_cbdr.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c index 3d5f31879d5c..a635bfdc30af 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c @@ -74,10 +74,6 @@ int enetc4_setup_cbdr(struct enetc_si *si) if (!user->ring) return -ENOMEM; - /* set CBDR cache attributes */ - enetc_wr(hw, ENETC_SICAR2, - ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT); - regs.pir = hw->reg + ENETC_SICBDRPIR; regs.cir = hw->reg + ENETC_SICBDRCIR; regs.mr = hw->reg + ENETC_SICBDRMR; From 21d0fc95b5920ae8e69a2c0394bef82b8392bcc9 Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Fri, 30 Jan 2026 16:10:34 +0200 Subject: [PATCH 174/195] net: enetc: Convert 16-bit register writes to 32-bit for ENETC v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For ENETC v4, which is integrated into more complex SoCs (compared to v1), 16‑bit register writes are blocked in the SoC interconnect on some chips. To be fair, it is not recommended to access 32‑bit registers of this IP using lower‑width accessors (i.e. 16‑bit), and the only exception to this rule was introduced by me in the initial ENETC v1 driver for the PMAR1 register, which holds the lower 16 bits of the primary MAC address of an SI. Meanwhile, this exception has been replicated for v4 as well. Since LS1028 (the only SoC with ENETC v1) is not affected by this issue, the current patch fixes the 16‑bit writes to PMAR1 starting with ENETC v4. Fixes: 99100d0d9922 ("net: enetc: add preliminary support for i.MX95 ENETC PF") Signed-off-by: Claudiu Manoil Reviewed-by: Wei Fang Link: https://patch.msgid.link/20260130141035.272471-4-claudiu.manoil@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc4_pf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c index 498346dd996a..c0859d200a2c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c @@ -59,10 +59,10 @@ static void enetc4_pf_set_si_primary_mac(struct enetc_hw *hw, int si, if (si != 0) { __raw_writel(upper, hw->port + ENETC4_PSIPMAR0(si)); - __raw_writew(lower, hw->port + ENETC4_PSIPMAR1(si)); + __raw_writel(lower, hw->port + ENETC4_PSIPMAR1(si)); } else { __raw_writel(upper, hw->port + ENETC4_PMAR0); - __raw_writew(lower, hw->port + ENETC4_PMAR1); + __raw_writel(lower, hw->port + ENETC4_PMAR1); } } From c28d765ec5da160d3a48d0928528084cef97bf19 Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Fri, 30 Jan 2026 16:10:35 +0200 Subject: [PATCH 175/195] net: enetc: Convert 16-bit register reads to 32-bit for ENETC v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is not recommended to access the 32‑bit registers of this hardware IP using lower‑width accessors (i.e. 16‑bit), and the only exception to this rule was introduced in the initial ENETC v1 driver for the PMAR1 register, which holds the lower 16 bits of the primary MAC address of an SI. Meanwhile, this exception has been replicated in the v4 driver code as well. Since LS1028 (the only SoC with ENETC v1) is not affected by this issue, the current patch converts the 16‑bit reads from PMAR1 starting with ENETC v4. Fixes: 99100d0d9922 ("net: enetc: add preliminary support for i.MX95 ENETC PF") Signed-off-by: Claudiu Manoil Reviewed-by: Wei Fang Link: https://patch.msgid.link/20260130141035.272471-5-claudiu.manoil@nxp.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/freescale/enetc/enetc4_pf.c | 2 +- drivers/net/ethernet/freescale/enetc/enetc_hw.h | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c index c0859d200a2c..5850540634b0 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c @@ -73,7 +73,7 @@ static void enetc4_pf_get_si_primary_mac(struct enetc_hw *hw, int si, u16 lower; upper = __raw_readl(hw->port + ENETC4_PSIPMAR0(si)); - lower = __raw_readw(hw->port + ENETC4_PSIPMAR1(si)); + lower = __raw_readl(hw->port + ENETC4_PSIPMAR1(si)); put_unaligned_le32(upper, addr); put_unaligned_le16(lower, addr + 4); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 7b882b8921fe..662e4fbafb74 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -708,13 +708,24 @@ struct enetc_cmd_rfse { #define ENETC_RFSE_EN BIT(15) #define ENETC_RFSE_MODE_BD 2 +static inline void enetc_get_primary_mac_addr(struct enetc_hw *hw, u8 *addr) +{ + u32 upper; + u16 lower; + + upper = __raw_readl(hw->reg + ENETC_SIPMAR0); + lower = __raw_readl(hw->reg + ENETC_SIPMAR1); + + put_unaligned_le32(upper, addr); + put_unaligned_le16(lower, addr + 4); +} + static inline void enetc_load_primary_mac_addr(struct enetc_hw *hw, struct net_device *ndev) { - u8 addr[ETH_ALEN] __aligned(4); + u8 addr[ETH_ALEN]; - *(u32 *)addr = __raw_readl(hw->reg + ENETC_SIPMAR0); - *(u16 *)(addr + 4) = __raw_readw(hw->reg + ENETC_SIPMAR1); + enetc_get_primary_mac_addr(hw, addr); eth_hw_addr_set(ndev, addr); } From 16459fe7e0ca6520a6e8f603de4ccd52b90fd765 Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Mon, 26 Jan 2026 21:10:46 +0000 Subject: [PATCH 176/195] x86/kfence: fix booting on 32bit non-PAE systems The original patch inverted the PTE unconditionally to avoid L1TF-vulnerable PTEs, but Linux doesn't make this adjustment in 2-level paging. Adjust the logic to use the flip_protnone_guard() helper, which is a nop on 2-level paging but inverts the address bits in all other paging modes. This doesn't matter for the Xen aspect of the original change. Linux no longer supports running 32bit PV under Xen, and Xen doesn't support running any 32bit PV guests without using PAE paging. Link: https://lkml.kernel.org/r/20260126211046.2096622-1-andrew.cooper3@citrix.com Fixes: b505f1944535 ("x86/kfence: avoid writing L1TF-vulnerable PTEs") Reported-by: Ryusuke Konishi Closes: https://lore.kernel.org/lkml/CAKFNMokwjw68ubYQM9WkzOuH51wLznHpEOMSqtMoV1Rn9JV_gw@mail.gmail.com/ Signed-off-by: Andrew Cooper Tested-by: Ryusuke Konishi Tested-by: Borislav Petkov (AMD) Cc: Alexander Potapenko Cc: Marco Elver Cc: Dmitry Vyukov Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Jann Horn Cc: Signed-off-by: Andrew Morton --- arch/x86/include/asm/kfence.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kfence.h b/arch/x86/include/asm/kfence.h index acf9ffa1a171..dfd5c74ba41a 100644 --- a/arch/x86/include/asm/kfence.h +++ b/arch/x86/include/asm/kfence.h @@ -42,7 +42,7 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) { unsigned int level; pte_t *pte = lookup_address(addr, &level); - pteval_t val; + pteval_t val, new; if (WARN_ON(!pte || level != PG_LEVEL_4K)) return false; @@ -57,11 +57,12 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) return true; /* - * Otherwise, invert the entire PTE. This avoids writing out an + * Otherwise, flip the Present bit, taking care to avoid writing an * L1TF-vulnerable PTE (not present, without the high address bits * set). */ - set_pte(pte, __pte(~val)); + new = val ^ _PAGE_PRESENT; + set_pte(pte, __pte(flip_protnone_guard(val, new, PTE_PFN_MASK))); /* * If the page was protected (non-present) and we're making it From 011d4e52a76cf4131ae612869b9874c09eef7657 Mon Sep 17 00:00:00 2001 From: "Pratyush Yadav (Google)" Date: Tue, 27 Jan 2026 00:02:52 +0100 Subject: [PATCH 177/195] liveupdate: luo_file: do not clear serialized_data on unfreeze Patch series "liveupdate: fixes in error handling". This series contains some fixes in LUO's error handling paths. The first patch deals with failed freeze() attempts. The cleanup path calls unfreeze, and that clears some data needed by later unpreserve calls. The second patch is a bit more involved. It deals with failed retrieve() attempts. To do so properly, it reworks some of the error handling logic in luo_file core. Both these fixes are "theoretical" -- in the sense that I have not been able to reproduce either of them in normal operation. The only supported file type right now is memfd, and there is nothing userspace can do right now to make it fail its retrieve or freeze. I need to make the retrieve or freeze fail by artificially injecting errors. The injected errors trigger a use-after-free and a double-free. That said, once more complex file handlers are added or memfd preservation is used in ways not currently expected or covered by the tests, we will be able to see them on real systems. This patch (of 2): The unfreeze operation is supposed to undo the effects of the freeze operation. serialized_data is not set by freeze, but by preserve. Consequently, the unpreserve operation needs to access serialized_data to undo the effects of the preserve operation. This includes freeing the serialized data structures for example. If a freeze callback fails, unfreeze is called for all frozen files. This would clear serialized_data for them. Since live update has failed, it can be expected that userspace aborts, releasing all sessions. When the sessions are released, unpreserve will be called for all files. The unfrozen files will see 0 in their serialized_data. This is not expected by file handlers, and they might either fail, leaking data and state, or might even crash or cause invalid memory access. Do not clear serialized_data on unfreeze so it gets passed on to unpreserve. There is no need to clear it on unpreserve since luo_file will be freed immediately after. Link: https://lkml.kernel.org/r/20260126230302.2936817-1-pratyush@kernel.org Link: https://lkml.kernel.org/r/20260126230302.2936817-2-pratyush@kernel.org Fixes: 7c722a7f44e0 ("liveupdate: luo_file: implement file systems callbacks") Signed-off-by: Pratyush Yadav (Google) Reviewed-by: Mike Rapoport (Microsoft) Cc: Pasha Tatashin Signed-off-by: Andrew Morton --- kernel/liveupdate/luo_file.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/liveupdate/luo_file.c b/kernel/liveupdate/luo_file.c index a32a777f6df8..9f7283379ebc 100644 --- a/kernel/liveupdate/luo_file.c +++ b/kernel/liveupdate/luo_file.c @@ -402,8 +402,6 @@ static void luo_file_unfreeze_one(struct luo_file_set *file_set, luo_file->fh->ops->unfreeze(&args); } - - luo_file->serialized_data = 0; } static void __luo_file_unfreeze(struct luo_file_set *file_set, From f1675db3c7f1838dbbec4f1768d680e0acbc6721 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Wed, 28 Jan 2026 18:39:15 +0100 Subject: [PATCH 178/195] mailmap: update Alexander Mikhalitsyn's emails Link: https://lkml.kernel.org/r/20260128173915.162309-1-alexander@mihalicyn.com Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index a64f269dd7cb..f0aa92beab95 100644 --- a/.mailmap +++ b/.mailmap @@ -33,6 +33,7 @@ Alexander Lobakin Alexander Lobakin Alexander Mikhalitsyn Alexander Mikhalitsyn +Alexander Mikhalitsyn Alexander Sverdlin Alexander Sverdlin Alexander Sverdlin From 2030dddf95451b4e7a389f052091e7c4b7b274c6 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Thu, 29 Jan 2026 00:19:23 +0800 Subject: [PATCH 179/195] mm, shmem: prevent infinite loop on truncate race When truncating a large swap entry, shmem_free_swap() returns 0 when the entry's index doesn't match the given index due to lookup alignment. The failure fallback path checks if the entry crosses the end border and aborts when it happens, so truncate won't erase an unexpected entry or range. But one scenario was ignored. When `index` points to the middle of a large swap entry, and the large swap entry doesn't go across the end border, find_get_entries() will return that large swap entry as the first item in the batch with `indices[0]` equal to `index`. The entry's base index will be smaller than `indices[0]`, so shmem_free_swap() will fail and return 0 due to the "base < index" check. The code will then call shmem_confirm_swap(), get the order, check if it crosses the END boundary (which it doesn't), and retry with the same index. The next iteration will find the same entry again at the same index with same indices, leading to an infinite loop. Fix this by retrying with a round-down index, and abort if the index is smaller than the truncate range. Link: https://lkml.kernel.org/r/aXo6ltB5iqAKJzY8@KASONG-MC4 Fixes: 809bc86517cc ("mm: shmem: support large folio swap out") Fixes: 8a1968bd997f ("mm/shmem, swap: fix race of truncate and swap entry split") Signed-off-by: Kairui Song Reported-by: Chris Mason Closes: https://lore.kernel.org/linux-mm/20260128130336.727049-1-clm@meta.com/ Reviewed-by: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: Hugh Dickins Cc: Kemeng Shi Cc: Nhat Pham Cc: Signed-off-by: Andrew Morton --- mm/shmem.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 6c3485d24d66..79af5f9f8b90 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1211,17 +1211,22 @@ whole_folios: swaps_freed = shmem_free_swap(mapping, indices[i], end - 1, folio); if (!swaps_freed) { - /* - * If found a large swap entry cross the end border, - * skip it as the truncate_inode_partial_folio above - * should have at least zerod its content once. - */ + pgoff_t base = indices[i]; + order = shmem_confirm_swap(mapping, indices[i], radix_to_swp_entry(folio)); - if (order > 0 && indices[i] + (1 << order) > end) - continue; - /* Swap was replaced by page: retry */ - index = indices[i]; + /* + * If found a large swap entry cross the end or start + * border, skip it as the truncate_inode_partial_folio + * above should have at least zerod its content once. + */ + if (order > 0) { + base = round_down(base, 1 << order); + if (base < start || base + (1 << order) > end) + continue; + } + /* Swap was replaced by page or extended, retry */ + index = base; break; } nr_swaps_freed += swaps_freed; From 1a47837bfafed7e9ef93f5dfdea6d70869b0c3ab Mon Sep 17 00:00:00 2001 From: Li Chen Date: Fri, 30 Jan 2026 19:20:33 +0800 Subject: [PATCH 180/195] Documentation: document liveupdate cmdline parameter liveupdate is used to enable Live Update Orchestrator (LUO) early during boot. Add it to kernel-parameters.txt so users can discover and use it. Link: https://lkml.kernel.org/r/20260130112036.359806-1-me@linux.beauty Signed-off-by: Li Chen Acked-by: Mike Rapoport (Microsoft) Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Frank van der Linden Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Kees Cook Cc: Li RongQing Cc: Pawan Gupta Cc: Randy Dunlap Cc: Pratyush Yadav Cc: Pasha Tatashin Signed-off-by: Andrew Morton --- Documentation/admin-guide/kernel-parameters.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 1058f2a6d6a8..aa0031108bc1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3472,6 +3472,11 @@ Kernel parameters If there are multiple matching configurations changing the same attribute, the last one is used. + liveupdate= [KNL,EARLY] + Format: + Enable Live Update Orchestrator (LUO). + Default: off. + load_ramdisk= [RAM] [Deprecated] lockd.nlm_grace_period=P [NFS] Assign grace period. From 3125fc17016945b11e9725c6aff30ff3326fd58f Mon Sep 17 00:00:00 2001 From: Tomas Hlavacek Date: Fri, 30 Jan 2026 11:23:01 +0100 Subject: [PATCH 181/195] net: spacemit: k1-emac: fix jumbo frame support The driver never programs the MAC frame size and jabber registers, causing the hardware to reject frames larger than the default 1518 bytes even when larger DMA buffers are allocated. Program MAC_MAXIMUM_FRAME_SIZE, MAC_TRANSMIT_JABBER_SIZE, and MAC_RECEIVE_JABBER_SIZE based on the configured MTU. Also fix the maximum buffer size from 4096 to 4095, since the descriptor buffer size field is only 12 bits. Account for double VLAN tags in frame size calculations. Fixes: bfec6d7f2001 ("net: spacemit: Add K1 Ethernet MAC") Cc: stable@vger.kernel.org Signed-off-by: Tomas Hlavacek Link: https://patch.msgid.link/20260130102301.477514-1-tmshlvck@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/spacemit/k1_emac.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/spacemit/k1_emac.c b/drivers/net/ethernet/spacemit/k1_emac.c index 88e9424d2d51..b49c4708bf9e 100644 --- a/drivers/net/ethernet/spacemit/k1_emac.c +++ b/drivers/net/ethernet/spacemit/k1_emac.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -38,7 +39,7 @@ #define EMAC_DEFAULT_BUFSIZE 1536 #define EMAC_RX_BUF_2K 2048 -#define EMAC_RX_BUF_4K 4096 +#define EMAC_RX_BUF_MAX FIELD_MAX(RX_DESC_1_BUFFER_SIZE_1_MASK) /* Tuning parameters from SpacemiT */ #define EMAC_TX_FRAMES 64 @@ -202,8 +203,7 @@ static void emac_init_hw(struct emac_priv *priv) { /* Destination address for 802.3x Ethernet flow control */ u8 fc_dest_addr[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x01 }; - - u32 rxirq = 0, dma = 0; + u32 rxirq = 0, dma = 0, frame_sz; regmap_set_bits(priv->regmap_apmu, priv->regmap_apmu_offset + APMU_EMAC_CTRL_REG, @@ -228,6 +228,15 @@ static void emac_init_hw(struct emac_priv *priv) DEFAULT_TX_THRESHOLD); emac_wr(priv, MAC_RECEIVE_PACKET_START_THRESHOLD, DEFAULT_RX_THRESHOLD); + /* Set maximum frame size and jabber size based on configured MTU, + * accounting for Ethernet header, double VLAN tags, and FCS. + */ + frame_sz = priv->ndev->mtu + ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN; + + emac_wr(priv, MAC_MAXIMUM_FRAME_SIZE, frame_sz); + emac_wr(priv, MAC_TRANSMIT_JABBER_SIZE, frame_sz); + emac_wr(priv, MAC_RECEIVE_JABBER_SIZE, frame_sz); + /* Configure flow control (enabled in emac_adjust_link() later) */ emac_set_mac_addr_reg(priv, fc_dest_addr, MAC_FC_SOURCE_ADDRESS_HIGH); emac_wr(priv, MAC_FC_PAUSE_HIGH_THRESHOLD, DEFAULT_FC_FIFO_HIGH); @@ -924,14 +933,14 @@ static int emac_change_mtu(struct net_device *ndev, int mtu) return -EBUSY; } - frame_len = mtu + ETH_HLEN + ETH_FCS_LEN; + frame_len = mtu + ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN; if (frame_len <= EMAC_DEFAULT_BUFSIZE) priv->dma_buf_sz = EMAC_DEFAULT_BUFSIZE; else if (frame_len <= EMAC_RX_BUF_2K) priv->dma_buf_sz = EMAC_RX_BUF_2K; else - priv->dma_buf_sz = EMAC_RX_BUF_4K; + priv->dma_buf_sz = EMAC_RX_BUF_MAX; ndev->mtu = mtu; @@ -2025,7 +2034,7 @@ static int emac_probe(struct platform_device *pdev) ndev->hw_features = NETIF_F_SG; ndev->features |= ndev->hw_features; - ndev->max_mtu = EMAC_RX_BUF_4K - (ETH_HLEN + ETH_FCS_LEN); + ndev->max_mtu = EMAC_RX_BUF_MAX - (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN); ndev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; priv = netdev_priv(ndev); From 29fb415a6a72c9207d118dd0a7a37184a14a3680 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 30 Jan 2026 17:06:45 +0000 Subject: [PATCH 182/195] btrfs: raid56: fix memory leak of btrfs_raid_bio::stripe_uptodate_bitmap We allocate the bitmap but we never free it in free_raid_bio_pointers(). Fix this by adding a bitmap_free() call against the stripe_uptodate_bitmap of a raid bio. Fixes: 1810350b04ef ("btrfs: raid56: move sector_ptr::uptodate into a dedicated bitmap") Reported-by: Christoph Hellwig Link: https://lore.kernel.org/linux-btrfs/20260126045315.GA31641@lst.de/ Reviewed-by: Qu Wenruo Tested-by: Christoph Hellwig Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index f38d8305e46d..baadaaa189c0 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -150,6 +150,7 @@ static void scrub_rbio_work_locked(struct work_struct *work); static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio) { bitmap_free(rbio->error_bitmap); + bitmap_free(rbio->stripe_uptodate_bitmap); kfree(rbio->stripe_pages); kfree(rbio->bio_paddrs); kfree(rbio->stripe_paddrs); From 5ff641011ab7fb63ea101251087745d9826e8ef5 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 29 Jan 2026 21:27:09 +0200 Subject: [PATCH 183/195] wifi: iwlwifi: mld: cancel mlo_scan_start_wk mlo_scan_start_wk is not canceled on disconnection. In fact, it is not canceled anywhere except in the restart cleanup, where we don't really have to. This can cause an init-after-queue issue: if, for example, the work was queued and then drv_change_interface got executed. This can also cause use-after-free: if the work is executed after the vif is freed. Fixes: 9748ad82a9d9 ("wifi: iwlwifi: defer MLO scan after link activation") Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260129212650.a36482a60719.I5bf64a108ca39dacb5ca0dcd8b7258a3ce8db74c@changeid --- drivers/net/wireless/intel/iwlwifi/mld/iface.c | 2 -- drivers/net/wireless/intel/iwlwifi/mld/mac80211.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/iface.c b/drivers/net/wireless/intel/iwlwifi/mld/iface.c index a5ececfc13e4..f15d1f5d1bf5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/iface.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/iface.c @@ -55,8 +55,6 @@ void iwl_mld_cleanup_vif(void *data, u8 *mac, struct ieee80211_vif *vif) ieee80211_iter_keys(mld->hw, vif, iwl_mld_cleanup_keys_iter, NULL); - wiphy_delayed_work_cancel(mld->wiphy, &mld_vif->mlo_scan_start_wk); - CLEANUP_STRUCT(mld_vif); } diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index 55b484c16280..cd0dce8de856 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -1759,6 +1759,8 @@ static int iwl_mld_move_sta_state_down(struct iwl_mld *mld, wiphy_work_cancel(mld->wiphy, &mld_vif->emlsr.unblock_tpt_wk); wiphy_delayed_work_cancel(mld->wiphy, &mld_vif->emlsr.check_tpt_wk); + wiphy_delayed_work_cancel(mld->wiphy, + &mld_vif->mlo_scan_start_wk); iwl_mld_reset_cca_40mhz_workaround(mld, vif); iwl_mld_smps_workaround(mld, vif, true); From fb7f54aa2a99b07945911152c5d3d4a6eb39f797 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 29 Jan 2026 21:27:10 +0200 Subject: [PATCH 184/195] wifi: iwlwifi: mvm: pause TCM on fast resume Not pausing it means that we can have the TCM work queued into a non-freezable workqueue, which, in resume, is re-activated before the driver's resume is called. The TCM work might send commands to the FW before we resumed the device, leading to an assert. Closes: https://lore.kernel.org/linux-wireless/aTDoDiD55qlUZ0pn@debian.local/ Tested-by: Chris Bainbridge Fixes: e8bb19c1d590 ("wifi: iwlwifi: support fast resume") Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260129212650.05621f3faedb.I44df9cf9183b5143df8078131e0d87c0fd7e1763@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 07f1a84c274e..af1a45845999 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2012-2014, 2018-2025 Intel Corporation + * Copyright (C) 2012-2014, 2018-2026 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -3239,6 +3239,8 @@ void iwl_mvm_fast_suspend(struct iwl_mvm *mvm) IWL_DEBUG_WOWLAN(mvm, "Starting fast suspend flow\n"); + iwl_mvm_pause_tcm(mvm, true); + mvm->fast_resume = true; set_bit(IWL_MVM_STATUS_IN_D3, &mvm->status); @@ -3295,6 +3297,8 @@ int iwl_mvm_fast_resume(struct iwl_mvm *mvm) mvm->trans->state = IWL_TRANS_NO_FW; } + iwl_mvm_resume_tcm(mvm); + out: clear_bit(IWL_MVM_STATUS_IN_D3, &mvm->status); mvm->fast_resume = false; From 78211543d2e44f84093049b4ef5f5bfa535f4645 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Mon, 2 Feb 2026 12:02:28 +0800 Subject: [PATCH 185/195] net: ethernet: adi: adin1110: Check return value of devm_gpiod_get_optional() in adin1110_check_spi() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The devm_gpiod_get_optional() function may return an ERR_PTR in case of genuine GPIO acquisition errors, not just NULL which indicates the legitimate absence of an optional GPIO. Add an IS_ERR() check after the call in adin1110_check_spi(). On error, return the error code to ensure proper failure handling rather than proceeding with invalid pointers. Fixes: 36934cac7aaf ("net: ethernet: adi: adin1110: add reset GPIO") Signed-off-by: Chen Ni Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20260202040228.4129097-1-nichen@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/adi/adin1110.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/adi/adin1110.c b/drivers/net/ethernet/adi/adin1110.c index 30f9d271e595..71a2397edf2b 100644 --- a/drivers/net/ethernet/adi/adin1110.c +++ b/drivers/net/ethernet/adi/adin1110.c @@ -1089,6 +1089,9 @@ static int adin1110_check_spi(struct adin1110_priv *priv) reset_gpio = devm_gpiod_get_optional(&priv->spidev->dev, "reset", GPIOD_OUT_LOW); + if (IS_ERR(reset_gpio)) + return dev_err_probe(&priv->spidev->dev, PTR_ERR(reset_gpio), + "failed to get reset gpio\n"); if (reset_gpio) { /* MISO pin is used for internal configuration, can't have * anyone else disturbing the SDO line. From f613e8b4afea0cd17c7168e8b00e25bc8d33175d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Feb 2026 20:52:17 +0000 Subject: [PATCH 186/195] net: add proper RCU protection to /proc/net/ptype Yin Fengwei reported an RCU stall in ptype_seq_show() and provided a patch. Real issue is that ptype_seq_next() and ptype_seq_show() violate RCU rules. ptype_seq_show() runs under rcu_read_lock(), and reads pt->dev to get device name without any barrier. At the same time, concurrent writers can remove a packet_type structure (which is correctly freed after an RCU grace period) and clear pt->dev without an RCU grace period. Define ptype_iter_state to carry a dev pointer along seq_net_private: struct ptype_iter_state { struct seq_net_private p; struct net_device *dev; // added in this patch }; We need to record the device pointer in ptype_get_idx() and ptype_seq_next() so that ptype_seq_show() is safe against concurrent pt->dev changes. We also need to add full RCU protection in ptype_seq_next(). (Missing READ_ONCE() when reading list.next values) Many thanks to Dong Chenchen for providing a repro. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Fixes: 1d10f8a1f40b ("net-procfs: show net devices bound packet types") Fixes: c353e8983e0d ("net: introduce per netns packet chains") Reported-by: Yin Fengwei Reported-by: Dong Chenchen Closes: https://lore.kernel.org/netdev/CANn89iKRRKPnWjJmb-_3a=sq+9h6DvTQM4DBZHT5ZRGPMzQaiA@mail.gmail.com/T/#m7b80b9fc9b9267f90e0b7aad557595f686f9c50d Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Tested-by: Yin Fengwei Link: https://patch.msgid.link/20260202205217.2881198-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/net-procfs.c | 50 +++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 70e0e9a3b650..7dbfa6109f0b 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -170,8 +170,14 @@ static const struct seq_operations softnet_seq_ops = { .show = softnet_seq_show, }; +struct ptype_iter_state { + struct seq_net_private p; + struct net_device *dev; +}; + static void *ptype_get_idx(struct seq_file *seq, loff_t pos) { + struct ptype_iter_state *iter = seq->private; struct list_head *ptype_list = NULL; struct packet_type *pt = NULL; struct net_device *dev; @@ -181,12 +187,16 @@ static void *ptype_get_idx(struct seq_file *seq, loff_t pos) for_each_netdev_rcu(seq_file_net(seq), dev) { ptype_list = &dev->ptype_all; list_for_each_entry_rcu(pt, ptype_list, list) { - if (i == pos) + if (i == pos) { + iter->dev = dev; return pt; + } ++i; } } + iter->dev = NULL; + list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_all, list) { if (i == pos) return pt; @@ -218,6 +228,7 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct ptype_iter_state *iter = seq->private; struct net *net = seq_file_net(seq); struct net_device *dev; struct packet_type *pt; @@ -229,19 +240,21 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) return ptype_get_idx(seq, 0); pt = v; - nxt = pt->list.next; - if (pt->dev) { - if (nxt != &pt->dev->ptype_all) + nxt = READ_ONCE(pt->list.next); + dev = iter->dev; + if (dev) { + if (nxt != &dev->ptype_all) goto found; - dev = pt->dev; for_each_netdev_continue_rcu(seq_file_net(seq), dev) { - if (!list_empty(&dev->ptype_all)) { - nxt = dev->ptype_all.next; + nxt = READ_ONCE(dev->ptype_all.next); + if (nxt != &dev->ptype_all) { + iter->dev = dev; goto found; } } - nxt = net->ptype_all.next; + iter->dev = NULL; + nxt = READ_ONCE(net->ptype_all.next); goto net_ptype_all; } @@ -252,20 +265,20 @@ net_ptype_all: if (nxt == &net->ptype_all) { /* continue with ->ptype_specific if it's not empty */ - nxt = net->ptype_specific.next; + nxt = READ_ONCE(net->ptype_specific.next); if (nxt != &net->ptype_specific) goto found; } hash = 0; - nxt = ptype_base[0].next; + nxt = READ_ONCE(ptype_base[0].next); } else hash = ntohs(pt->type) & PTYPE_HASH_MASK; while (nxt == &ptype_base[hash]) { if (++hash >= PTYPE_HASH_SIZE) return NULL; - nxt = ptype_base[hash].next; + nxt = READ_ONCE(ptype_base[hash].next); } found: return list_entry(nxt, struct packet_type, list); @@ -279,19 +292,24 @@ static void ptype_seq_stop(struct seq_file *seq, void *v) static int ptype_seq_show(struct seq_file *seq, void *v) { + struct ptype_iter_state *iter = seq->private; struct packet_type *pt = v; + struct net_device *dev; - if (v == SEQ_START_TOKEN) + if (v == SEQ_START_TOKEN) { seq_puts(seq, "Type Device Function\n"); - else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) && - (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) { + return 0; + } + dev = iter->dev; + if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) && + (!dev || net_eq(dev_net(dev), seq_file_net(seq)))) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else seq_printf(seq, "%04x", ntohs(pt->type)); seq_printf(seq, " %-8s %ps\n", - pt->dev ? pt->dev->name : "", pt->func); + dev ? dev->name : "", pt->func); } return 0; @@ -315,7 +333,7 @@ static int __net_init dev_proc_net_init(struct net *net) &softnet_seq_ops)) goto out_dev; if (!proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops, - sizeof(struct seq_net_private))) + sizeof(struct ptype_iter_state))) goto out_softnet; if (wext_proc_init(net)) From 5c2c3c38be396257a6a2e55bd601a12bb9781507 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 2 Feb 2026 12:43:14 +0100 Subject: [PATCH 187/195] net: gro: fix outer network offset The udp GRO complete stage assumes that all the packets inserted the RX have the `encapsulation` flag zeroed. Such assumption is not true, as a few H/W NICs can set such flag when H/W offloading the checksum for an UDP encapsulated traffic, the tun driver can inject GSO packets with UDP encapsulation and the problematic layout can also be created via a veth based setup. Due to the above, in the problematic scenarios, udp4_gro_complete() uses the wrong network offset (inner instead of outer) to compute the outer UDP header pseudo checksum, leading to csum validation errors later on in packet processing. Address the issue always clearing the encapsulation flag at GRO completion time. Such flag will be set again as needed for encapsulated packets by udp_gro_complete(). Fixes: 5ef31ea5d053 ("net: gro: fix udp bad offset in socket lookup by adding {inner_}network_offset to napi_gro_cb") Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/562638dbebb3b15424220e26a180274b387e2a88.1770032084.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- net/core/gro.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/gro.c b/net/core/gro.c index 76f9c3712422..482fa7d7f598 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -265,6 +265,8 @@ static void gro_complete(struct gro_node *gro, struct sk_buff *skb) goto out; } + /* NICs can feed encapsulated packets into GRO */ + skb->encapsulation = 0; rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { if (ptype->type != type || !ptype->callbacks.gro_complete) From bee60ce21b751275b3a7766f614373ef02dde512 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 2 Feb 2026 12:43:15 +0100 Subject: [PATCH 188/195] selftest: net: add a test-case for encap segmentation after GRO We had a few patches in this area and no explicit coverage so far. The test case covers the scenario addressed by the previous fix; reusing the existing udpgro_fwd.sh script to leverage part of the of the virtual network setup, even if such script is possibly not a perfect fit. Note that the mentioned script already contains several shellcheck violation; this patch does not fix the existing code, just avoids adding more issues in the new one. Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/768ca132af81e83856e34d3105b86c37e566a7ad.1770032084.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgro_fwd.sh | 64 +++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index a39fdc4aa2ff..9b722c1e4b0f 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -162,6 +162,39 @@ run_test() { echo " ok" } +run_test_csum() { + local -r msg="$1" + local -r dst="$2" + local csum_error_filter=UdpInCsumErrors + local csum_errors + + printf "%-40s" "$msg" + + is_ipv6 "$dst" && csum_error_filter=Udp6InCsumErrors + + ip netns exec "$NS_DST" iperf3 -s -1 >/dev/null & + wait_local_port_listen "$NS_DST" 5201 tcp + local spid="$!" + ip netns exec "$NS_SRC" iperf3 -c "$dst" -t 2 >/dev/null + local retc="$?" + wait "$spid" + local rets="$?" + if [ "$rets" -ne 0 ] || [ "$retc" -ne 0 ]; then + echo " fail client exit code $retc, server $rets" + ret=1 + return + fi + + csum_errors=$(ip netns exec "$NS_DST" nstat -as "$csum_error_filter" | + grep "$csum_error_filter" | awk '{print $2}') + if [ -n "$csum_errors" ] && [ "$csum_errors" -gt 0 ]; then + echo " fail - csum error on receive $csum_errors, expected 0" + ret=1 + return + fi + echo " ok" +} + run_bench() { local -r msg=$1 local -r dst=$2 @@ -260,6 +293,37 @@ for family in 4 6; do ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST cleanup + + # force segmentation and re-aggregation + create_vxlan_pair + ip netns exec "$NS_DST" ethtool -K veth"$DST" generic-receive-offload on + ip netns exec "$NS_SRC" ethtool -K veth"$SRC" tso off + ip -n "$NS_SRC" link set dev veth"$SRC" mtu 1430 + + # forward to a 2nd veth pair + ip -n "$NS_DST" link add br0 type bridge + ip -n "$NS_DST" link set dev veth"$DST" master br0 + + # segment the aggregated TSO packet, without csum offload + ip -n "$NS_DST" link add veth_segment type veth peer veth_rx + for FEATURE in tso tx-udp-segmentation tx-checksumming; do + ip netns exec "$NS_DST" ethtool -K veth_segment "$FEATURE" off + done + ip -n "$NS_DST" link set dev veth_segment master br0 up + ip -n "$NS_DST" link set dev br0 up + ip -n "$NS_DST" link set dev veth_rx up + + # move the lower layer IP in the last added veth + for ADDR in "$BM_NET_V4$DST/24" "$BM_NET_V6$DST/64"; do + # the dad argument will let iproute emit a unharmful warning + # with ipv4 addresses + ip -n "$NS_DST" addr del dev veth"$DST" "$ADDR" + ip -n "$NS_DST" addr add dev veth_rx "$ADDR" \ + nodad 2>/dev/null + done + + run_test_csum "GSO after GRO" "$OL_NET$DST" + cleanup done exit $ret From 7b9ebcce0296e104a0d82a6b09d68564806158ff Mon Sep 17 00:00:00 2001 From: Debarghya Kundu Date: Mon, 2 Feb 2026 19:39:24 +0000 Subject: [PATCH 189/195] gve: Fix stats report corruption on queue count change The driver and the NIC share a region in memory for stats reporting. The NIC calculates its offset into this region based on the total size of the stats region and the size of the NIC's stats. When the number of queues is changed, the driver's stats region is resized. If the queue count is increased, the NIC can write past the end of the allocated stats region, causing memory corruption. If the queue count is decreased, there is a gap between the driver and NIC stats, leading to incorrect stats reporting. This change fixes the issue by allocating stats region with maximum size, and the offset calculation for NIC stats is changed to match with the calculation of the NIC. Cc: stable@vger.kernel.org Fixes: 24aeb56f2d38 ("gve: Add Gvnic stats AQ command and ethtool show/set-priv-flags.") Signed-off-by: Debarghya Kundu Reviewed-by: Joshua Washington Signed-off-by: Harshitha Ramamurthy Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20260202193925.3106272-2-hramamurthy@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_ethtool.c | 54 ++++++++++++------- drivers/net/ethernet/google/gve/gve_main.c | 4 +- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 311b106160b2..137dd7285bda 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -156,7 +156,8 @@ gve_get_ethtool_stats(struct net_device *netdev, u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_hsplit_unsplit_pkt, rx_pkts, rx_hsplit_pkt, rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes, tx_dropped; - int stats_idx, base_stats_idx, max_stats_idx; + int rx_base_stats_idx, max_rx_stats_idx, max_tx_stats_idx; + int stats_idx, stats_region_len, nic_stats_len; struct stats *report_stats; int *rx_qid_to_stats_idx; int *tx_qid_to_stats_idx; @@ -265,20 +266,38 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = priv->stats_report_trigger_cnt; i = GVE_MAIN_STATS_LEN; - /* For rx cross-reporting stats, start from nic rx stats in report */ - base_stats_idx = GVE_TX_STATS_REPORT_NUM * num_tx_queues + - GVE_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues; - /* The boundary between driver stats and NIC stats shifts if there are - * stopped queues. - */ - base_stats_idx += NIC_RX_STATS_REPORT_NUM * num_stopped_rxqs + - NIC_TX_STATS_REPORT_NUM * num_stopped_txqs; - max_stats_idx = NIC_RX_STATS_REPORT_NUM * - (priv->rx_cfg.num_queues - num_stopped_rxqs) + - base_stats_idx; + rx_base_stats_idx = 0; + max_rx_stats_idx = 0; + max_tx_stats_idx = 0; + stats_region_len = priv->stats_report_len - + sizeof(struct gve_stats_report); + nic_stats_len = (NIC_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues + + NIC_TX_STATS_REPORT_NUM * num_tx_queues) * sizeof(struct stats); + if (unlikely((stats_region_len - + nic_stats_len) % sizeof(struct stats))) { + net_err_ratelimited("Starting index of NIC stats should be multiple of stats size"); + } else { + /* For rx cross-reporting stats, + * start from nic rx stats in report + */ + rx_base_stats_idx = (stats_region_len - nic_stats_len) / + sizeof(struct stats); + /* The boundary between driver stats and NIC stats + * shifts if there are stopped queues + */ + rx_base_stats_idx += NIC_RX_STATS_REPORT_NUM * + num_stopped_rxqs + NIC_TX_STATS_REPORT_NUM * + num_stopped_txqs; + max_rx_stats_idx = NIC_RX_STATS_REPORT_NUM * + (priv->rx_cfg.num_queues - num_stopped_rxqs) + + rx_base_stats_idx; + max_tx_stats_idx = NIC_TX_STATS_REPORT_NUM * + (num_tx_queues - num_stopped_txqs) + + max_rx_stats_idx; + } /* Preprocess the stats report for rx, map queue id to start index */ skip_nic_stats = false; - for (stats_idx = base_stats_idx; stats_idx < max_stats_idx; + for (stats_idx = rx_base_stats_idx; stats_idx < max_rx_stats_idx; stats_idx += NIC_RX_STATS_REPORT_NUM) { u32 stat_name = be32_to_cpu(report_stats[stats_idx].stat_name); u32 queue_id = be32_to_cpu(report_stats[stats_idx].queue_id); @@ -354,14 +373,9 @@ gve_get_ethtool_stats(struct net_device *netdev, i += priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS; } - /* For tx cross-reporting stats, start from nic tx stats in report */ - base_stats_idx = max_stats_idx; - max_stats_idx = NIC_TX_STATS_REPORT_NUM * - (num_tx_queues - num_stopped_txqs) + - max_stats_idx; - /* Preprocess the stats report for tx, map queue id to start index */ skip_nic_stats = false; - for (stats_idx = base_stats_idx; stats_idx < max_stats_idx; + /* NIC TX stats start right after NIC RX stats */ + for (stats_idx = max_rx_stats_idx; stats_idx < max_tx_stats_idx; stats_idx += NIC_TX_STATS_REPORT_NUM) { u32 stat_name = be32_to_cpu(report_stats[stats_idx].stat_name); u32 queue_id = be32_to_cpu(report_stats[stats_idx].queue_id); diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 52c5e4942cd4..dbc84de39b70 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -283,9 +283,9 @@ static int gve_alloc_stats_report(struct gve_priv *priv) int tx_stats_num, rx_stats_num; tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * - gve_num_tx_queues(priv); + priv->tx_cfg.max_queues; rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * - priv->rx_cfg.num_queues; + priv->rx_cfg.max_queues; priv->stats_report_len = struct_size(priv->stats_report, stats, size_add(tx_stats_num, rx_stats_num)); priv->stats_report = From c7db85d579a1dccb624235534508c75fbf2dfe46 Mon Sep 17 00:00:00 2001 From: Max Yuan Date: Mon, 2 Feb 2026 19:39:25 +0000 Subject: [PATCH 190/195] gve: Correct ethtool rx_dropped calculation The gve driver's "rx_dropped" statistic, exposed via `ethtool -S`, incorrectly includes `rx_buf_alloc_fail` counts. These failures represent an inability to allocate receive buffers, not true packet drops where a received packet is discarded. This misrepresentation can lead to inaccurate diagnostics. This patch rectifies the ethtool "rx_dropped" calculation. It removes `rx_buf_alloc_fail` from the total and adds `xdp_tx_errors` and `xdp_redirect_errors`, which represent legitimate packet drops within the XDP path. Cc: stable@vger.kernel.org Fixes: 433e274b8f7b ("gve: Add stats for gve.") Signed-off-by: Max Yuan Reviewed-by: Jordan Rhee Reviewed-by: Joshua Washington Reviewed-by: Matt Olson Signed-off-by: Harshitha Ramamurthy Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20260202193925.3106272-3-hramamurthy@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_ethtool.c | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 137dd7285bda..66ddc4413f8d 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -152,10 +152,11 @@ gve_get_ethtool_stats(struct net_device *netdev, u64 tmp_rx_pkts, tmp_rx_hsplit_pkt, tmp_rx_bytes, tmp_rx_hsplit_bytes, tmp_rx_skb_alloc_fail, tmp_rx_buf_alloc_fail, tmp_rx_desc_err_dropped_pkt, tmp_rx_hsplit_unsplit_pkt, - tmp_tx_pkts, tmp_tx_bytes; + tmp_tx_pkts, tmp_tx_bytes, + tmp_xdp_tx_errors, tmp_xdp_redirect_errors; u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_hsplit_unsplit_pkt, rx_pkts, rx_hsplit_pkt, rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes, - tx_dropped; + tx_dropped, xdp_tx_errors, xdp_redirect_errors; int rx_base_stats_idx, max_rx_stats_idx, max_tx_stats_idx; int stats_idx, stats_region_len, nic_stats_len; struct stats *report_stats; @@ -199,6 +200,7 @@ gve_get_ethtool_stats(struct net_device *netdev, for (rx_pkts = 0, rx_bytes = 0, rx_hsplit_pkt = 0, rx_skb_alloc_fail = 0, rx_buf_alloc_fail = 0, rx_desc_err_dropped_pkt = 0, rx_hsplit_unsplit_pkt = 0, + xdp_tx_errors = 0, xdp_redirect_errors = 0, ring = 0; ring < priv->rx_cfg.num_queues; ring++) { if (priv->rx) { @@ -216,6 +218,9 @@ gve_get_ethtool_stats(struct net_device *netdev, rx->rx_desc_err_dropped_pkt; tmp_rx_hsplit_unsplit_pkt = rx->rx_hsplit_unsplit_pkt; + tmp_xdp_tx_errors = rx->xdp_tx_errors; + tmp_xdp_redirect_errors = + rx->xdp_redirect_errors; } while (u64_stats_fetch_retry(&priv->rx[ring].statss, start)); rx_pkts += tmp_rx_pkts; @@ -225,6 +230,8 @@ gve_get_ethtool_stats(struct net_device *netdev, rx_buf_alloc_fail += tmp_rx_buf_alloc_fail; rx_desc_err_dropped_pkt += tmp_rx_desc_err_dropped_pkt; rx_hsplit_unsplit_pkt += tmp_rx_hsplit_unsplit_pkt; + xdp_tx_errors += tmp_xdp_tx_errors; + xdp_redirect_errors += tmp_xdp_redirect_errors; } } for (tx_pkts = 0, tx_bytes = 0, tx_dropped = 0, ring = 0; @@ -250,8 +257,8 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = rx_bytes; data[i++] = tx_bytes; /* total rx dropped packets */ - data[i++] = rx_skb_alloc_fail + rx_buf_alloc_fail + - rx_desc_err_dropped_pkt; + data[i++] = rx_skb_alloc_fail + rx_desc_err_dropped_pkt + + xdp_tx_errors + xdp_redirect_errors; data[i++] = tx_dropped; data[i++] = priv->tx_timeo_cnt; data[i++] = rx_skb_alloc_fail; @@ -330,6 +337,9 @@ gve_get_ethtool_stats(struct net_device *netdev, tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; tmp_rx_desc_err_dropped_pkt = rx->rx_desc_err_dropped_pkt; + tmp_xdp_tx_errors = rx->xdp_tx_errors; + tmp_xdp_redirect_errors = + rx->xdp_redirect_errors; } while (u64_stats_fetch_retry(&priv->rx[ring].statss, start)); data[i++] = tmp_rx_bytes; @@ -340,8 +350,9 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = rx->rx_frag_alloc_cnt; /* rx dropped packets */ data[i++] = tmp_rx_skb_alloc_fail + - tmp_rx_buf_alloc_fail + - tmp_rx_desc_err_dropped_pkt; + tmp_rx_desc_err_dropped_pkt + + tmp_xdp_tx_errors + + tmp_xdp_redirect_errors; data[i++] = rx->rx_copybreak_pkt; data[i++] = rx->rx_copied_pkt; /* stats from NIC */ From c0b5dc73a38f954e780f93a549b8fe225235c07a Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 3 Feb 2026 10:18:30 +0800 Subject: [PATCH 191/195] net: cpsw_new: Execute ndo_set_rx_mode callback in a work queue Commit 1767bb2d47b7 ("ipv6: mcast: Don't hold RTNL for IPV6_ADD_MEMBERSHIP and MCAST_JOIN_GROUP.") removed the RTNL lock for IPV6_ADD_MEMBERSHIP and MCAST_JOIN_GROUP operations. However, this change triggered the following call trace on my BeagleBone Black board: WARNING: net/8021q/vlan_core.c:236 at vlan_for_each+0x120/0x124, CPU#0: rpcbind/496 RTNL: assertion failed at net/8021q/vlan_core.c (236) Modules linked in: CPU: 0 UID: 997 PID: 496 Comm: rpcbind Not tainted 6.19.0-rc6-next-20260122-yocto-standard+ #8 PREEMPT Hardware name: Generic AM33XX (Flattened Device Tree) Call trace: unwind_backtrace from show_stack+0x28/0x2c show_stack from dump_stack_lvl+0x30/0x38 dump_stack_lvl from __warn+0xb8/0x11c __warn from warn_slowpath_fmt+0x130/0x194 warn_slowpath_fmt from vlan_for_each+0x120/0x124 vlan_for_each from cpsw_add_mc_addr+0x54/0xd8 cpsw_add_mc_addr from __hw_addr_ref_sync_dev+0xc4/0xec __hw_addr_ref_sync_dev from __dev_mc_add+0x78/0x88 __dev_mc_add from igmp6_group_added+0x84/0xec igmp6_group_added from __ipv6_dev_mc_inc+0x1fc/0x2f0 __ipv6_dev_mc_inc from __ipv6_sock_mc_join+0x124/0x1b4 __ipv6_sock_mc_join from do_ipv6_setsockopt+0x84c/0x1168 do_ipv6_setsockopt from ipv6_setsockopt+0x88/0xc8 ipv6_setsockopt from do_sock_setsockopt+0xe8/0x19c do_sock_setsockopt from __sys_setsockopt+0x84/0xac __sys_setsockopt from ret_fast_syscall+0x0/0x5 This trace occurs because vlan_for_each() is called within cpsw_ndo_set_rx_mode(), which expects the RTNL lock to be held. Since modifying vlan_for_each() to operate without the RTNL lock is not straightforward, and because ndo_set_rx_mode() is invoked both with and without the RTNL lock across different code paths, simply adding rtnl_lock() in cpsw_ndo_set_rx_mode() is not a viable solution. To resolve this issue, we opt to execute the actual processing within a work queue, following the approach used by the icssg-prueth driver. Fixes: 1767bb2d47b7 ("ipv6: mcast: Don't hold RTNL for IPV6_ADD_MEMBERSHIP and MCAST_JOIN_GROUP.") Signed-off-by: Kevin Hao Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260203-bbb-v5-1-ea0ea217a85c@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/cpsw_new.c | 34 ++++++++++++++++++++++++----- drivers/net/ethernet/ti/cpsw_priv.h | 1 + 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index ab88d4c02cbd..21af0a10626a 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -248,16 +248,22 @@ static int cpsw_purge_all_mc(struct net_device *ndev, const u8 *addr, int num) return 0; } -static void cpsw_ndo_set_rx_mode(struct net_device *ndev) +static void cpsw_ndo_set_rx_mode_work(struct work_struct *work) { - struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_priv *priv = container_of(work, struct cpsw_priv, rx_mode_work); struct cpsw_common *cpsw = priv->cpsw; + struct net_device *ndev = priv->ndev; + rtnl_lock(); + if (!netif_running(ndev)) + goto unlock_rtnl; + + netif_addr_lock_bh(ndev); if (ndev->flags & IFF_PROMISC) { /* Enable promiscuous mode */ cpsw_set_promiscious(ndev, true); cpsw_ale_set_allmulti(cpsw->ale, IFF_ALLMULTI, priv->emac_port); - return; + goto unlock_addr; } /* Disable promiscuous mode */ @@ -270,6 +276,18 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev) /* add/remove mcast address either for real netdev or for vlan */ __hw_addr_ref_sync_dev(&ndev->mc, ndev, cpsw_add_mc_addr, cpsw_del_mc_addr); + +unlock_addr: + netif_addr_unlock_bh(ndev); +unlock_rtnl: + rtnl_unlock(); +} + +static void cpsw_ndo_set_rx_mode(struct net_device *ndev) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + + schedule_work(&priv->rx_mode_work); } static unsigned int cpsw_rxbuf_total_len(unsigned int len) @@ -1398,6 +1416,7 @@ static int cpsw_create_ports(struct cpsw_common *cpsw) priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG); priv->emac_port = i + 1; priv->tx_packet_min = CPSW_MIN_PACKET_SIZE; + INIT_WORK(&priv->rx_mode_work, cpsw_ndo_set_rx_mode_work); if (is_valid_ether_addr(slave_data->mac_addr)) { ether_addr_copy(priv->mac_addr, slave_data->mac_addr); @@ -1447,13 +1466,18 @@ static int cpsw_create_ports(struct cpsw_common *cpsw) static void cpsw_unregister_ports(struct cpsw_common *cpsw) { + struct net_device *ndev; + struct cpsw_priv *priv; int i = 0; for (i = 0; i < cpsw->data.slaves; i++) { - if (!cpsw->slaves[i].ndev) + ndev = cpsw->slaves[i].ndev; + if (!ndev) continue; - unregister_netdev(cpsw->slaves[i].ndev); + priv = netdev_priv(ndev); + unregister_netdev(ndev); + disable_work_sync(&priv->rx_mode_work); } } diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h index 91add8925e23..acb6181c5c9e 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.h +++ b/drivers/net/ethernet/ti/cpsw_priv.h @@ -391,6 +391,7 @@ struct cpsw_priv { u32 tx_packet_min; struct cpsw_ale_ratelimit ale_bc_ratelimit; struct cpsw_ale_ratelimit ale_mc_ratelimit; + struct work_struct rx_mode_work; }; #define ndev_to_cpsw(ndev) (((struct cpsw_priv *)netdev_priv(ndev))->cpsw) From 0b8c878d117319f2be34c8391a77e0f4d5c94d79 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 3 Feb 2026 10:18:31 +0800 Subject: [PATCH 192/195] net: cpsw: Execute ndo_set_rx_mode callback in a work queue Commit 1767bb2d47b7 ("ipv6: mcast: Don't hold RTNL for IPV6_ADD_MEMBERSHIP and MCAST_JOIN_GROUP.") removed the RTNL lock for IPV6_ADD_MEMBERSHIP and MCAST_JOIN_GROUP operations. However, this change triggered the following call trace on my BeagleBone Black board: WARNING: net/8021q/vlan_core.c:236 at vlan_for_each+0x120/0x124, CPU#0: rpcbind/481 RTNL: assertion failed at net/8021q/vlan_core.c (236) Modules linked in: CPU: 0 UID: 997 PID: 481 Comm: rpcbind Not tainted 6.19.0-rc7-next-20260130-yocto-standard+ #35 PREEMPT Hardware name: Generic AM33XX (Flattened Device Tree) Call trace: unwind_backtrace from show_stack+0x28/0x2c show_stack from dump_stack_lvl+0x30/0x38 dump_stack_lvl from __warn+0xb8/0x11c __warn from warn_slowpath_fmt+0x130/0x194 warn_slowpath_fmt from vlan_for_each+0x120/0x124 vlan_for_each from cpsw_add_mc_addr+0x54/0x98 cpsw_add_mc_addr from __hw_addr_ref_sync_dev+0xc4/0xec __hw_addr_ref_sync_dev from __dev_mc_add+0x78/0x88 __dev_mc_add from igmp6_group_added+0x84/0xec igmp6_group_added from __ipv6_dev_mc_inc+0x1fc/0x2f0 __ipv6_dev_mc_inc from __ipv6_sock_mc_join+0x124/0x1b4 __ipv6_sock_mc_join from do_ipv6_setsockopt+0x84c/0x1168 do_ipv6_setsockopt from ipv6_setsockopt+0x88/0xc8 ipv6_setsockopt from do_sock_setsockopt+0xe8/0x19c do_sock_setsockopt from __sys_setsockopt+0x84/0xac __sys_setsockopt from ret_fast_syscall+0x0/0x54 This trace occurs because vlan_for_each() is called within cpsw_ndo_set_rx_mode(), which expects the RTNL lock to be held. Since modifying vlan_for_each() to operate without the RTNL lock is not straightforward, and because ndo_set_rx_mode() is invoked both with and without the RTNL lock across different code paths, simply adding rtnl_lock() in cpsw_ndo_set_rx_mode() is not a viable solution. To resolve this issue, we opt to execute the actual processing within a work queue, following the approach used by the icssg-prueth driver. Please note: To reproduce this issue, I manually reverted the changes to am335x-bone-common.dtsi from commit c477358e66a3 ("ARM: dts: am335x-bone: switch to new cpsw switch drv") in order to revert to the legacy cpsw driver. Fixes: 1767bb2d47b7 ("ipv6: mcast: Don't hold RTNL for IPV6_ADD_MEMBERSHIP and MCAST_JOIN_GROUP.") Signed-off-by: Kevin Hao Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260203-bbb-v5-2-ea0ea217a85c@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/cpsw.c | 41 +++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 54c24cd3d3be..b0e18bdc2c85 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -305,12 +305,19 @@ static int cpsw_purge_all_mc(struct net_device *ndev, const u8 *addr, int num) return 0; } -static void cpsw_ndo_set_rx_mode(struct net_device *ndev) +static void cpsw_ndo_set_rx_mode_work(struct work_struct *work) { - struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_priv *priv = container_of(work, struct cpsw_priv, rx_mode_work); struct cpsw_common *cpsw = priv->cpsw; + struct net_device *ndev = priv->ndev; int slave_port = -1; + rtnl_lock(); + if (!netif_running(ndev)) + goto unlock_rtnl; + + netif_addr_lock_bh(ndev); + if (cpsw->data.dual_emac) slave_port = priv->emac_port + 1; @@ -318,7 +325,7 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev) /* Enable promiscuous mode */ cpsw_set_promiscious(ndev, true); cpsw_ale_set_allmulti(cpsw->ale, IFF_ALLMULTI, slave_port); - return; + goto unlock_addr; } else { /* Disable promiscuous mode */ cpsw_set_promiscious(ndev, false); @@ -331,6 +338,18 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev) /* add/remove mcast address either for real netdev or for vlan */ __hw_addr_ref_sync_dev(&ndev->mc, ndev, cpsw_add_mc_addr, cpsw_del_mc_addr); + +unlock_addr: + netif_addr_unlock_bh(ndev); +unlock_rtnl: + rtnl_unlock(); +} + +static void cpsw_ndo_set_rx_mode(struct net_device *ndev) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + + schedule_work(&priv->rx_mode_work); } static unsigned int cpsw_rxbuf_total_len(unsigned int len) @@ -1472,6 +1491,7 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv) priv_sl2->ndev = ndev; priv_sl2->dev = &ndev->dev; priv_sl2->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG); + INIT_WORK(&priv_sl2->rx_mode_work, cpsw_ndo_set_rx_mode_work); if (is_valid_ether_addr(data->slave_data[1].mac_addr)) { memcpy(priv_sl2->mac_addr, data->slave_data[1].mac_addr, @@ -1653,6 +1673,7 @@ static int cpsw_probe(struct platform_device *pdev) priv->dev = dev; priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG); priv->emac_port = 0; + INIT_WORK(&priv->rx_mode_work, cpsw_ndo_set_rx_mode_work); if (is_valid_ether_addr(data->slave_data[0].mac_addr)) { memcpy(priv->mac_addr, data->slave_data[0].mac_addr, ETH_ALEN); @@ -1758,6 +1779,8 @@ clean_runtime_disable_ret: static void cpsw_remove(struct platform_device *pdev) { struct cpsw_common *cpsw = platform_get_drvdata(pdev); + struct net_device *ndev; + struct cpsw_priv *priv; int i, ret; ret = pm_runtime_resume_and_get(&pdev->dev); @@ -1770,9 +1793,15 @@ static void cpsw_remove(struct platform_device *pdev) return; } - for (i = 0; i < cpsw->data.slaves; i++) - if (cpsw->slaves[i].ndev) - unregister_netdev(cpsw->slaves[i].ndev); + for (i = 0; i < cpsw->data.slaves; i++) { + ndev = cpsw->slaves[i].ndev; + if (!ndev) + continue; + + priv = netdev_priv(ndev); + unregister_netdev(ndev); + disable_work_sync(&priv->rx_mode_work); + } cpts_release(cpsw->cpts); cpdma_ctlr_destroy(cpsw->dma); From 0eca95cba2b7bf7b7b4f2fa90734a85fcaa72782 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 4 Feb 2026 10:07:55 -1000 Subject: [PATCH 193/195] sched_ext: Short-circuit sched_class operations on dead tasks 7900aa699c34 ("sched_ext: Fix cgroup exit ordering by moving sched_ext_free() to finish_task_switch()") moved sched_ext_free() to finish_task_switch() and renamed it to sched_ext_dead() to fix cgroup exit ordering issues. However, this created a race window where certain sched_class ops may be invoked on dead tasks leading to failures - e.g. sched_setscheduler() may try to switch a task which finished sched_ext_dead() back into SCX triggering invalid SCX task state transitions. Add task_dead_and_done() which tests whether a task is TASK_DEAD and has completed its final context switch, and use it to short-circuit sched_class operations which may be called on dead tasks. Fixes: 7900aa699c34 ("sched_ext: Fix cgroup exit ordering by moving sched_ext_free() to finish_task_switch()") Reported-by: Andrea Righi Link: http://lkml.kernel.org/r/20260202151341.796959-1-arighi@nvidia.com Reviewed-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 8f6d8d7f895c..1a5ead4a476e 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -194,6 +194,7 @@ MODULE_PARM_DESC(bypass_lb_intv_us, "bypass load balance interval in microsecond #include static void process_ddsp_deferred_locals(struct rq *rq); +static bool task_dead_and_done(struct task_struct *p); static u32 reenq_local(struct rq *rq); static void scx_kick_cpu(struct scx_sched *sch, s32 cpu, u64 flags); static bool scx_vexit(struct scx_sched *sch, enum scx_exit_kind kind, @@ -2618,6 +2619,9 @@ static void set_cpus_allowed_scx(struct task_struct *p, set_cpus_allowed_common(p, ac); + if (task_dead_and_done(p)) + return; + /* * The effective cpumask is stored in @p->cpus_ptr which may temporarily * differ from the configured one in @p->cpus_mask. Always tell the bpf @@ -3033,10 +3037,45 @@ void scx_cancel_fork(struct task_struct *p) percpu_up_read(&scx_fork_rwsem); } +/** + * task_dead_and_done - Is a task dead and done running? + * @p: target task + * + * Once sched_ext_dead() removes the dead task from scx_tasks and exits it, the + * task no longer exists from SCX's POV. However, certain sched_class ops may be + * invoked on these dead tasks leading to failures - e.g. sched_setscheduler() + * may try to switch a task which finished sched_ext_dead() back into SCX + * triggering invalid SCX task state transitions and worse. + * + * Once a task has finished the final switch, sched_ext_dead() is the only thing + * that needs to happen on the task. Use this test to short-circuit sched_class + * operations which may be called on dead tasks. + */ +static bool task_dead_and_done(struct task_struct *p) +{ + struct rq *rq = task_rq(p); + + lockdep_assert_rq_held(rq); + + /* + * In do_task_dead(), a dying task sets %TASK_DEAD with preemption + * disabled and __schedule(). If @p has %TASK_DEAD set and off CPU, @p + * won't ever run again. + */ + return unlikely(READ_ONCE(p->__state) == TASK_DEAD) && + !task_on_cpu(rq, p); +} + void sched_ext_dead(struct task_struct *p) { unsigned long flags; + /* + * By the time control reaches here, @p has %TASK_DEAD set, switched out + * for the last time and then dropped the rq lock - task_dead_and_done() + * should be returning %true nullifying the straggling sched_class ops. + * Remove from scx_tasks and exit @p. + */ raw_spin_lock_irqsave(&scx_tasks_lock, flags); list_del_init(&p->scx.tasks_node); raw_spin_unlock_irqrestore(&scx_tasks_lock, flags); @@ -3062,6 +3101,9 @@ static void reweight_task_scx(struct rq *rq, struct task_struct *p, lockdep_assert_rq_held(task_rq(p)); + if (task_dead_and_done(p)) + return; + p->scx.weight = sched_weight_to_cgroup(scale_load_down(lw->weight)); if (SCX_HAS_OP(sch, set_weight)) SCX_CALL_OP_TASK(sch, SCX_KF_REST, set_weight, rq, @@ -3076,6 +3118,9 @@ static void switching_to_scx(struct rq *rq, struct task_struct *p) { struct scx_sched *sch = scx_root; + if (task_dead_and_done(p)) + return; + scx_enable_task(p); /* @@ -3089,6 +3134,9 @@ static void switching_to_scx(struct rq *rq, struct task_struct *p) static void switched_from_scx(struct rq *rq, struct task_struct *p) { + if (task_dead_and_done(p)) + return; + scx_disable_task(p); } From f41c5d151078c5348271ffaf8e7410d96f2d82f8 Mon Sep 17 00:00:00 2001 From: Andrew Fasano Date: Wed, 4 Feb 2026 17:46:58 +0100 Subject: [PATCH 194/195] netfilter: nf_tables: fix inverted genmask check in nft_map_catchall_activate() nft_map_catchall_activate() has an inverted element activity check compared to its non-catchall counterpart nft_mapelem_activate() and compared to what is logically required. nft_map_catchall_activate() is called from the abort path to re-activate catchall map elements that were deactivated during a failed transaction. It should skip elements that are already active (they don't need re-activation) and process elements that are inactive (they need to be restored). Instead, the current code does the opposite: it skips inactive elements and processes active ones. Compare the non-catchall activate callback, which is correct: nft_mapelem_activate(): if (nft_set_elem_active(ext, iter->genmask)) return 0; /* skip active, process inactive */ With the buggy catchall version: nft_map_catchall_activate(): if (!nft_set_elem_active(ext, genmask)) continue; /* skip inactive, process active */ The consequence is that when a DELSET operation is aborted, nft_setelem_data_activate() is never called for the catchall element. For NFT_GOTO verdict elements, this means nft_data_hold() is never called to restore the chain->use reference count. Each abort cycle permanently decrements chain->use. Once chain->use reaches zero, DELCHAIN succeeds and frees the chain while catchall verdict elements still reference it, resulting in a use-after-free. This is exploitable for local privilege escalation from an unprivileged user via user namespaces + nftables on distributions that enable CONFIG_USER_NS and CONFIG_NF_TABLES. Fix by removing the negation so the check matches nft_mapelem_activate(): skip active elements, process inactive ones. Fixes: 628bd3e49cba ("netfilter: nf_tables: drop map element references from preparation phase") Signed-off-by: Andrew Fasano Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 729a92781a1a..be92750e2af3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5914,7 +5914,7 @@ static void nft_map_catchall_activate(const struct nft_ctx *ctx, list_for_each_entry(catchall, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); - if (!nft_set_elem_active(ext, genmask)) + if (nft_set_elem_active(ext, genmask)) continue; nft_clear(ctx->net, ext); From bbf4a17ad9ffc4e3d7ec13d73ecd59dea149ed25 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Wed, 4 Feb 2026 18:58:37 +0900 Subject: [PATCH 195/195] ipv6: Fix ECMP sibling count mismatch when clearing RTF_ADDRCONF syzbot reported a kernel BUG in fib6_add_rt2node() when adding an IPv6 route. [0] Commit f72514b3c569 ("ipv6: clear RA flags when adding a static route") introduced logic to clear RTF_ADDRCONF from existing routes when a static route with the same nexthop is added. However, this causes a problem when the existing route has a gateway. When RTF_ADDRCONF is cleared from a route that has a gateway, that route becomes eligible for ECMP, i.e. rt6_qualify_for_ecmp() returns true. The issue is that this route was never added to the fib6_siblings list. This leads to a mismatch between the following counts: - The sibling count computed by iterating fib6_next chain, which includes the newly ECMP-eligible route - The actual siblings in fib6_siblings list, which does not include that route When a subsequent ECMP route is added, fib6_add_rt2node() hits BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings) because the counts don't match. Fix this by only clearing RTF_ADDRCONF when the existing route does not have a gateway. Routes without a gateway cannot qualify for ECMP anyway (rt6_qualify_for_ecmp() requires fib_nh_gw_family), so clearing RTF_ADDRCONF on them is safe and matches the original intent of the commit. [0]: kernel BUG at net/ipv6/ip6_fib.c:1217! Oops: invalid opcode: 0000 [#1] SMP KASAN PTI CPU: 0 UID: 0 PID: 6010 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/25/2025 RIP: 0010:fib6_add_rt2node+0x3433/0x3470 net/ipv6/ip6_fib.c:1217 [...] Call Trace: fib6_add+0x8da/0x18a0 net/ipv6/ip6_fib.c:1532 __ip6_ins_rt net/ipv6/route.c:1351 [inline] ip6_route_add+0xde/0x1b0 net/ipv6/route.c:3946 ipv6_route_ioctl+0x35c/0x480 net/ipv6/route.c:4571 inet6_ioctl+0x219/0x280 net/ipv6/af_inet6.c:577 sock_do_ioctl+0xdc/0x300 net/socket.c:1245 sock_ioctl+0x576/0x790 net/socket.c:1366 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:597 [inline] __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:583 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: f72514b3c569 ("ipv6: clear RA flags when adding a static route") Reported-by: syzbot+cb809def1baaac68ab92@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=cb809def1baaac68ab92 Tested-by: syzbot+cb809def1baaac68ab92@syzkaller.appspotmail.com Signed-off-by: Shigeru Yoshida Reviewed-by: Fernando Fernandez Mancera Link: https://patch.msgid.link/20260204095837.1285552-1-syoshida@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_fib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 2111af022d94..c6439e30e892 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1138,7 +1138,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, fib6_set_expires(iter, rt->expires); fib6_add_gc_list(iter); } - if (!(rt->fib6_flags & (RTF_ADDRCONF | RTF_PREFIX_RT))) { + if (!(rt->fib6_flags & (RTF_ADDRCONF | RTF_PREFIX_RT)) && + !iter->fib6_nh->fib_nh_gw_family) { iter->fib6_flags &= ~RTF_ADDRCONF; iter->fib6_flags &= ~RTF_PREFIX_RT; }