diff --git a/.editorconfig b/.editorconfig index 29a30ccfc07b..69718ac91747 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,8 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only -root = true - -[{*.{awk,c,dts,dtsi,dtso,h,mk,s,S},Kconfig,Makefile,Makefile.*}] +[{*.{awk,c,dts,dtsi,dtso,h,mk,rst,s,S},Kconfig,Makefile,Makefile.*}] charset = utf-8 end_of_line = lf insert_final_newline = true diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst index 86d7902a657f..e209c46241b0 100644 --- a/Documentation/accounting/delay-accounting.rst +++ b/Documentation/accounting/delay-accounting.rst @@ -107,22 +107,22 @@ Get sum and peak of delays, since system boot, for all pids with tgid 242:: TGID 242 - CPU count real total virtual total delay total delay average delay max delay min - 39 156000000 156576579 2111069 0.054ms 0.212296ms 0.031307ms - IO count delay total delay average delay max delay min - 0 0 0.000ms 0.000000ms 0.000000ms - SWAP count delay total delay average delay max delay min - 0 0 0.000ms 0.000000ms 0.000000ms - RECLAIM count delay total delay average delay max delay min - 0 0 0.000ms 0.000000ms 0.000000ms - THRASHING count delay total delay average delay max delay min - 0 0 0.000ms 0.000000ms 0.000000ms - COMPACT count delay total delay average delay max delay min - 0 0 0.000ms 0.000000ms 0.000000ms - WPCOPY count delay total delay average delay max delay min - 156 11215873 0.072ms 0.207403ms 0.033913ms - IRQ count delay total delay average delay max delay min - 0 0 0.000ms 0.000000ms 0.000000ms + CPU count real total virtual total delay total delay average delay max delay min delay max timestamp + 46 188000000 192348334 4098012 0.089ms 0.429260ms 0.051205ms 2026-01-15T15:06:58 + IO count delay total delay average delay max delay min delay max timestamp + 0 0 0.000ms 0.000000ms 0.000000ms N/A + SWAP count delay total delay average delay max delay min delay max timestamp + 0 0 0.000ms 0.000000ms 0.000000ms N/A + RECLAIM count delay total delay average delay max delay min delay max timestamp + 0 0 0.000ms 0.000000ms 0.000000ms N/A + THRASHING count delay total delay average delay max delay min delay max timestamp + 0 0 0.000ms 0.000000ms 0.000000ms N/A + COMPACT count delay total delay average delay max delay min delay max timestamp + 0 0 0.000ms 0.000000ms 0.000000ms N/A + WPCOPY count delay total delay average delay max delay min delay max timestamp + 182 19413338 0.107ms 0.547353ms 0.022462ms 2026-01-15T15:05:24 + IRQ count delay total delay average delay max delay min delay max timestamp + 0 0 0.000ms 0.000000ms 0.000000ms N/A Get IO accounting for pid 1, it works only with -p:: diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ef1ba922dc60..51b66c168b1d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4815,6 +4815,21 @@ Kernel parameters panic_on_warn=1 panic() instead of WARN(). Useful to cause kdump on a WARN(). + panic_force_cpu= + [KNL,SMP] Force panic handling to execute on a specific CPU. + Format: + Some platforms require panic handling to occur on a + specific CPU for the crash kernel to function correctly. + This can be due to firmware limitations, interrupt routing + constraints, or platform-specific requirements where only + a particular CPU can safely enter the crash kernel. + When set, panic() will redirect execution to the specified + CPU before proceeding with the normal panic and kexec flow. + If the target CPU is offline or unavailable, panic proceeds + on the current CPU. + This option should only be used for systems with the above + constraints as it might cause the panic operation to be less reliable. + panic_print= Bitmask for printing system info when panic happens. User can chose combination of the following bits: bit 0: print all tasks info @@ -6989,12 +7004,12 @@ Kernel parameters softlockup_panic= [KNL] Should the soft-lockup detector generate panics. - Format: 0 | 1 + Format: - A value of 1 instructs the soft-lockup detector - to panic the machine when a soft-lockup occurs. It is - also controlled by the kernel.softlockup_panic sysctl - and CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC, which is the + A value of non-zero instructs the soft-lockup detector + to panic the machine when a soft-lockup duration exceeds + N thresholds. It is also controlled by the kernel.softlockup_panic + sysctl and CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC, which is the respective build-time switch to that functionality. softlockup_all_cpu_backtrace= diff --git a/Documentation/core-api/kho/abi.rst b/Documentation/core-api/kho/abi.rst new file mode 100644 index 000000000000..2e63be3486cf --- /dev/null +++ b/Documentation/core-api/kho/abi.rst @@ -0,0 +1,28 @@ +.. SPDX-License-Identifier: GPL-2.0-or-later + +================== +Kexec Handover ABI +================== + +Core Kexec Handover ABI +======================== + +.. kernel-doc:: include/linux/kho/abi/kexec_handover.h + :doc: Kexec Handover ABI + +vmalloc preservation ABI +======================== + +.. kernel-doc:: include/linux/kho/abi/kexec_handover.h + :doc: Kexec Handover ABI for vmalloc Preservation + +memblock preservation ABI +========================= + +.. kernel-doc:: include/linux/kho/abi/memblock.h + :doc: memblock kexec handover ABI + +See Also +======== + +- :doc:`/admin-guide/mm/kho` diff --git a/Documentation/core-api/kho/bindings/kho.yaml b/Documentation/core-api/kho/bindings/kho.yaml deleted file mode 100644 index 11e8ab7b219d..000000000000 --- a/Documentation/core-api/kho/bindings/kho.yaml +++ /dev/null @@ -1,43 +0,0 @@ -# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -%YAML 1.2 ---- -title: Kexec HandOver (KHO) root tree - -maintainers: - - Mike Rapoport - - Changyuan Lyu - -description: | - System memory preserved by KHO across kexec. - -properties: - compatible: - enum: - - kho-v1 - - preserved-memory-map: - description: | - physical address (u64) of an in-memory structure describing all preserved - folios and memory ranges. - -patternProperties: - "$[0-9a-f_]+^": - $ref: sub-fdt.yaml# - description: physical address of a KHO user's own FDT. - -required: - - compatible - - preserved-memory-map - -additionalProperties: false - -examples: - - | - kho { - compatible = "kho-v1"; - preserved-memory-map = <0xf0be16 0x1000000>; - - memblock { - fdt = <0x80cc16 0x1000000>; - }; - }; diff --git a/Documentation/core-api/kho/bindings/memblock/memblock.yaml b/Documentation/core-api/kho/bindings/memblock/memblock.yaml deleted file mode 100644 index d388c28eb91d..000000000000 --- a/Documentation/core-api/kho/bindings/memblock/memblock.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -%YAML 1.2 ---- -title: Memblock reserved memory - -maintainers: - - Mike Rapoport - -description: | - Memblock can serialize its current memory reservations created with - reserve_mem command line option across kexec through KHO. - The post-KHO kernel can then consume these reservations and they are - guaranteed to have the same physical address. - -properties: - compatible: - enum: - - reserve-mem-v1 - -patternProperties: - "$[0-9a-f_]+^": - $ref: reserve-mem.yaml# - description: reserved memory regions - -required: - - compatible - -additionalProperties: false - -examples: - - | - memblock { - compatible = "memblock-v1"; - n1 { - compatible = "reserve-mem-v1"; - start = <0xc06b 0x4000000>; - size = <0x04 0x00>; - }; - }; diff --git a/Documentation/core-api/kho/bindings/memblock/reserve-mem.yaml b/Documentation/core-api/kho/bindings/memblock/reserve-mem.yaml deleted file mode 100644 index 10282d3d1bcd..000000000000 --- a/Documentation/core-api/kho/bindings/memblock/reserve-mem.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -%YAML 1.2 ---- -title: Memblock reserved memory regions - -maintainers: - - Mike Rapoport - -description: | - Memblock can serialize its current memory reservations created with - reserve_mem command line option across kexec through KHO. - This object describes each such region. - -properties: - compatible: - enum: - - reserve-mem-v1 - - start: - description: | - physical address (u64) of the reserved memory region. - - size: - description: | - size (u64) of the reserved memory region. - -required: - - compatible - - start - - size - -additionalProperties: false - -examples: - - | - n1 { - compatible = "reserve-mem-v1"; - start = <0xc06b 0x4000000>; - size = <0x04 0x00>; - }; diff --git a/Documentation/core-api/kho/bindings/sub-fdt.yaml b/Documentation/core-api/kho/bindings/sub-fdt.yaml deleted file mode 100644 index b9a3d2d24850..000000000000 --- a/Documentation/core-api/kho/bindings/sub-fdt.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -%YAML 1.2 ---- -title: KHO users' FDT address - -maintainers: - - Mike Rapoport - - Changyuan Lyu - -description: | - Physical address of an FDT blob registered by a KHO user. - -properties: - fdt: - description: | - physical address (u64) of an FDT blob. - -required: - - fdt - -additionalProperties: false - -examples: - - | - memblock { - fdt = <0x80cc16 0x1000000>; - }; diff --git a/Documentation/core-api/kho/concepts.rst b/Documentation/core-api/kho/concepts.rst deleted file mode 100644 index d626d1dbd678..000000000000 --- a/Documentation/core-api/kho/concepts.rst +++ /dev/null @@ -1,74 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0-or-later -.. _kho-concepts: - -======================= -Kexec Handover Concepts -======================= - -Kexec HandOver (KHO) is a mechanism that allows Linux to preserve memory -regions, which could contain serialized system states, across kexec. - -It introduces multiple concepts: - -KHO FDT -======= - -Every KHO kexec carries a KHO specific flattened device tree (FDT) blob -that describes preserved memory regions. These regions contain either -serialized subsystem states, or in-memory data that shall not be touched -across kexec. After KHO, subsystems can retrieve and restore preserved -memory regions from KHO FDT. - -KHO only uses the FDT container format and libfdt library, but does not -adhere to the same property semantics that normal device trees do: Properties -are passed in native endianness and standardized properties like ``regs`` and -``ranges`` do not exist, hence there are no ``#...-cells`` properties. - -KHO is still under development. The FDT schema is unstable and would change -in the future. - -Scratch Regions -=============== - -To boot into kexec, we need to have a physically contiguous memory range that -contains no handed over memory. Kexec then places the target kernel and initrd -into that region. The new kernel exclusively uses this region for memory -allocations before during boot up to the initialization of the page allocator. - -We guarantee that we always have such regions through the scratch regions: On -first boot KHO allocates several physically contiguous memory regions. Since -after kexec these regions will be used by early memory allocations, there is a -scratch region per NUMA node plus a scratch region to satisfy allocations -requests that do not require particular NUMA node assignment. -By default, size of the scratch region is calculated based on amount of memory -allocated during boot. The ``kho_scratch`` kernel command line option may be -used to explicitly define size of the scratch regions. -The scratch regions are declared as CMA when page allocator is initialized so -that their memory can be used during system lifetime. CMA gives us the -guarantee that no handover pages land in that region, because handover pages -must be at a static physical memory location and CMA enforces that only -movable pages can be located inside. - -After KHO kexec, we ignore the ``kho_scratch`` kernel command line option and -instead reuse the exact same region that was originally allocated. This allows -us to recursively execute any amount of KHO kexecs. Because we used this region -for boot memory allocations and as target memory for kexec blobs, some parts -of that memory region may be reserved. These reservations are irrelevant for -the next KHO, because kexec can overwrite even the original kernel. - -.. _kho-finalization-phase: - -KHO finalization phase -====================== - -To enable user space based kexec file loader, the kernel needs to be able to -provide the FDT that describes the current kernel's state before -performing the actual kexec. The process of generating that FDT is -called serialization. When the FDT is generated, some properties -of the system may become immutable because they are already written down -in the FDT. That state is called the KHO finalization phase. - -Public API -========== -.. kernel-doc:: kernel/liveupdate/kexec_handover.c - :export: diff --git a/Documentation/core-api/kho/fdt.rst b/Documentation/core-api/kho/fdt.rst deleted file mode 100644 index 62505285d60d..000000000000 --- a/Documentation/core-api/kho/fdt.rst +++ /dev/null @@ -1,80 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0-or-later - -======= -KHO FDT -======= - -KHO uses the flattened device tree (FDT) container format and libfdt -library to create and parse the data that is passed between the -kernels. The properties in KHO FDT are stored in native format. -It includes the physical address of an in-memory structure describing -all preserved memory regions, as well as physical addresses of KHO users' -own FDTs. Interpreting those sub FDTs is the responsibility of KHO users. - -KHO nodes and properties -======================== - -Property ``preserved-memory-map`` ---------------------------------- - -KHO saves a special property named ``preserved-memory-map`` under the root node. -This node contains the physical address of an in-memory structure for KHO to -preserve memory regions across kexec. - -Property ``compatible`` ------------------------ - -The ``compatible`` property determines compatibility between the kernel -that created the KHO FDT and the kernel that attempts to load it. -If the kernel that loads the KHO FDT is not compatible with it, the entire -KHO process will be bypassed. - -Property ``fdt`` ----------------- - -Generally, a KHO user serialize its state into its own FDT and instructs -KHO to preserve the underlying memory, such that after kexec, the new kernel -can recover its state from the preserved FDT. - -A KHO user thus can create a node in KHO root tree and save the physical address -of its own FDT in that node's property ``fdt`` . - -Examples -======== - -The following example demonstrates KHO FDT that preserves two memory -regions created with ``reserve_mem`` kernel command line parameter:: - - /dts-v1/; - - / { - compatible = "kho-v1"; - - preserved-memory-map = <0x40be16 0x1000000>; - - memblock { - fdt = <0x1517 0x1000000>; - }; - }; - -where the ``memblock`` node contains an FDT that is requested by the -subsystem memblock for preservation. The FDT contains the following -serialized data:: - - /dts-v1/; - - / { - compatible = "memblock-v1"; - - n1 { - compatible = "reserve-mem-v1"; - start = <0xc06b 0x4000000>; - size = <0x04 0x00>; - }; - - n2 { - compatible = "reserve-mem-v1"; - start = <0xc067 0x4000000>; - size = <0x04 0x00>; - }; - }; diff --git a/Documentation/core-api/kho/index.rst b/Documentation/core-api/kho/index.rst index 51ea41c6a20d..dcc6a36cc134 100644 --- a/Documentation/core-api/kho/index.rst +++ b/Documentation/core-api/kho/index.rst @@ -1,11 +1,89 @@ .. SPDX-License-Identifier: GPL-2.0-or-later +.. _kho-concepts: + ======================== Kexec Handover Subsystem ======================== +Overview +======== + +Kexec HandOver (KHO) is a mechanism that allows Linux to preserve memory +regions, which could contain serialized system states, across kexec. + +KHO uses :ref:`flattened device tree (FDT) ` to pass information about +the preserved state from pre-exec kernel to post-kexec kernel and :ref:`scratch +memory regions ` to ensure integrity of the preserved memory. + +.. _kho_fdt: + +KHO FDT +======= +Every KHO kexec carries a KHO specific flattened device tree (FDT) blob that +describes the preserved state. The FDT includes properties describing preserved +memory regions and nodes that hold subsystem specific state. + +The preserved memory regions contain either serialized subsystem states, or +in-memory data that shall not be touched across kexec. After KHO, subsystems +can retrieve and restore the preserved state from KHO FDT. + +Subsystems participating in KHO can define their own format for state +serialization and preservation. + +KHO FDT and structures defined by the subsystems form an ABI between pre-kexec +and post-kexec kernels. This ABI is defined by header files in +``include/linux/kho/abi`` directory. + .. toctree:: :maxdepth: 1 - concepts - fdt + abi.rst + +.. _kho_scratch: + +Scratch Regions +=============== + +To boot into kexec, we need to have a physically contiguous memory range that +contains no handed over memory. Kexec then places the target kernel and initrd +into that region. The new kernel exclusively uses this region for memory +allocations before during boot up to the initialization of the page allocator. + +We guarantee that we always have such regions through the scratch regions: On +first boot KHO allocates several physically contiguous memory regions. Since +after kexec these regions will be used by early memory allocations, there is a +scratch region per NUMA node plus a scratch region to satisfy allocations +requests that do not require particular NUMA node assignment. +By default, size of the scratch region is calculated based on amount of memory +allocated during boot. The ``kho_scratch`` kernel command line option may be +used to explicitly define size of the scratch regions. +The scratch regions are declared as CMA when page allocator is initialized so +that their memory can be used during system lifetime. CMA gives us the +guarantee that no handover pages land in that region, because handover pages +must be at a static physical memory location and CMA enforces that only +movable pages can be located inside. + +After KHO kexec, we ignore the ``kho_scratch`` kernel command line option and +instead reuse the exact same region that was originally allocated. This allows +us to recursively execute any amount of KHO kexecs. Because we used this region +for boot memory allocations and as target memory for kexec blobs, some parts +of that memory region may be reserved. These reservations are irrelevant for +the next KHO, because kexec can overwrite even the original kernel. + +.. _kho-finalization-phase: + +KHO finalization phase +====================== + +To enable user space based kexec file loader, the kernel needs to be able to +provide the FDT that describes the current kernel's state before +performing the actual kexec. The process of generating that FDT is +called serialization. When the FDT is generated, some properties +of the system may become immutable because they are already written down +in the FDT. That state is called the KHO finalization phase. + +See Also +======== + +- :doc:`/admin-guide/mm/kho` diff --git a/Documentation/core-api/list.rst b/Documentation/core-api/list.rst index 86873ce9adbf..241464ca0549 100644 --- a/Documentation/core-api/list.rst +++ b/Documentation/core-api/list.rst @@ -774,3 +774,12 @@ Full List API .. kernel-doc:: include/linux/list.h :internal: + +Private List API +================ + +.. kernel-doc:: include/linux/list_private.h + :doc: Private List Primitives + +.. kernel-doc:: include/linux/list_private.h + :internal: diff --git a/Documentation/core-api/liveupdate.rst b/Documentation/core-api/liveupdate.rst index 7960eb15a81f..5a292d0f3706 100644 --- a/Documentation/core-api/liveupdate.rst +++ b/Documentation/core-api/liveupdate.rst @@ -18,6 +18,11 @@ LUO Preserving File Descriptors .. kernel-doc:: kernel/liveupdate/luo_file.c :doc: LUO File Descriptors +LUO File Lifecycle Bound Global Data +==================================== +.. kernel-doc:: kernel/liveupdate/luo_flb.c + :doc: LUO File Lifecycle Bound Global Data + Live Update Orchestrator ABI ============================ .. kernel-doc:: include/linux/kho/abi/luo.h @@ -40,6 +45,9 @@ Public API .. kernel-doc:: kernel/liveupdate/luo_core.c :export: +.. kernel-doc:: kernel/liveupdate/luo_flb.c + :export: + .. kernel-doc:: kernel/liveupdate/luo_file.c :export: @@ -48,6 +56,9 @@ Internal API .. kernel-doc:: kernel/liveupdate/luo_core.c :internal: +.. kernel-doc:: kernel/liveupdate/luo_flb.c + :internal: + .. kernel-doc:: kernel/liveupdate/luo_session.c :internal: @@ -58,4 +69,4 @@ See Also ======== - :doc:`Live Update uAPI ` -- :doc:`/core-api/kho/concepts` +- :doc:`/core-api/kho/index` diff --git a/Documentation/dev-tools/checkpatch.rst b/Documentation/dev-tools/checkpatch.rst index ca475805df4c..dccede68698c 100644 --- a/Documentation/dev-tools/checkpatch.rst +++ b/Documentation/dev-tools/checkpatch.rst @@ -601,6 +601,11 @@ Commit message See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html#describe-your-changes + **BAD_COMMIT_SEPARATOR** + The commit separator is a single line with 3 dashes. + The regex match is '^---$' + Lines that start with 3 dashes and have more content on the same line + may confuse tools that apply patches. Comparison style ---------------- diff --git a/Documentation/filesystems/sysfs.rst b/Documentation/filesystems/sysfs.rst index 2703c04af7d0..ffcef4d6bc8d 100644 --- a/Documentation/filesystems/sysfs.rst +++ b/Documentation/filesystems/sysfs.rst @@ -120,7 +120,7 @@ is equivalent to doing:: .store = store_foo, }; -Note as stated in include/linux/kernel.h "OTHER_WRITABLE? Generally +Note as stated in include/linux/sysfs.h "OTHER_WRITABLE? Generally considered a bad idea." so trying to set a sysfs file writable for everyone will fail reverting to RO mode for "Others". diff --git a/Documentation/mm/memfd_preservation.rst b/Documentation/mm/memfd_preservation.rst index 66e0fb6d5ef0..a8a5b476afd3 100644 --- a/Documentation/mm/memfd_preservation.rst +++ b/Documentation/mm/memfd_preservation.rst @@ -20,4 +20,4 @@ See Also ======== - :doc:`/core-api/liveupdate` -- :doc:`/core-api/kho/concepts` +- :doc:`/core-api/kho/index` diff --git a/MAINTAINERS b/MAINTAINERS index 3f3e868b7d74..16710c66b775 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14076,6 +14076,7 @@ F: Documentation/admin-guide/mm/kho.rst F: Documentation/core-api/kho/* F: include/linux/kexec_handover.h F: include/linux/kho/ +F: include/linux/kho/abi/ F: kernel/liveupdate/kexec_handover* F: lib/test_kho.c F: tools/testing/selftests/kho/ @@ -14768,6 +14769,7 @@ F: include/linux/liveupdate.h F: include/linux/liveupdate/ F: include/uapi/linux/liveupdate.h F: kernel/liveupdate/ +F: lib/tests/liveupdate.c F: mm/memfd_luo.c F: tools/testing/selftests/liveupdate/ @@ -16520,7 +16522,7 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git for-next T: git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git fixes F: Documentation/core-api/boot-time-mm.rst -F: Documentation/core-api/kho/bindings/memblock/* +F: include/linux/kho/abi/memblock.h F: include/linux/memblock.h F: mm/bootmem_info.c F: mm/memblock.c @@ -17591,7 +17593,7 @@ S: Maintained F: Documentation/core-api/min_heap.rst F: include/linux/min_heap.h F: lib/min_heap.c -F: lib/test_min_heap.c +F: lib/tests/min_heap_kunit.c MIPI CCS, SMIA AND SMIA++ IMAGE SENSOR DRIVER M: Sakari Ailus @@ -27499,7 +27501,7 @@ R: Andy Shevchenko L: linux-kernel@vger.kernel.org S: Maintained F: include/linux/uuid.h -F: lib/test_uuid.c +F: lib/tests/uuid_kunit.c F: lib/uuid.c UV SYSFS DRIVER diff --git a/arch/arm/configs/aspeed_g5_defconfig b/arch/arm/configs/aspeed_g5_defconfig index 2e6ea13c1e9b..ec558e57d081 100644 --- a/arch/arm/configs/aspeed_g5_defconfig +++ b/arch/arm/configs/aspeed_g5_defconfig @@ -306,7 +306,7 @@ CONFIG_SCHED_STACK_END_CHECK=y CONFIG_PANIC_ON_OOPS=y CONFIG_PANIC_TIMEOUT=-1 CONFIG_SOFTLOCKUP_DETECTOR=y -CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1 CONFIG_BOOTPARAM_HUNG_TASK_PANIC=1 CONFIG_WQ_WATCHDOG=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/arm/configs/pxa3xx_defconfig b/arch/arm/configs/pxa3xx_defconfig index 07d422f0ff34..fb272e3a2337 100644 --- a/arch/arm/configs/pxa3xx_defconfig +++ b/arch/arm/configs/pxa3xx_defconfig @@ -100,7 +100,7 @@ CONFIG_PRINTK_TIME=y CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_SHIRQ=y -CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1 # CONFIG_SCHED_DEBUG is not set CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_SPINLOCK_SLEEP=y diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index d6f278b04acf..7a530ea4f5ae 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -2997,7 +2997,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t, u64 plt_target = 0ULL; bool poking_bpf_entry; - if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf)) + if (!bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf)) /* Only poking bpf text is supported. Since kernel function * entry is set up by ftrace, we reply on ftrace to poke kernel * functions. diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index d1d5a65308b9..3b63bc5b99d9 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1319,7 +1319,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t, /* Only poking bpf text is supported. Since kernel function entry * is set up by ftrace, we rely on ftrace to poke kernel functions. */ - if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf)) + if (!bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf)) return -ENOTSUPP; image = ip - offset; diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index bffcc417f44c..31d16cba9879 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -599,7 +599,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_PRIME_NUMBERS=m CONFIG_CRC_BENCHMARK=y CONFIG_XZ_DEC_TEST=m -CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_TEST_LOCKUP=m @@ -608,7 +607,6 @@ CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m CONFIG_TEST_DHRY=m -CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_DIV64=m CONFIG_TEST_MULDIV64=m CONFIG_REED_SOLOMON_TEST=m @@ -617,7 +615,6 @@ CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_BITMAP=m -CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 3f894c20b132..c0c419ec9a9e 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -556,7 +556,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_PRIME_NUMBERS=m CONFIG_CRC_BENCHMARK=y CONFIG_XZ_DEC_TEST=m -CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_TEST_LOCKUP=m @@ -565,7 +564,6 @@ CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m CONFIG_TEST_DHRY=m -CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_DIV64=m CONFIG_TEST_MULDIV64=m CONFIG_REED_SOLOMON_TEST=m @@ -574,7 +572,6 @@ CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_BITMAP=m -CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 5c5603ca16aa..2b7547ecc4c4 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -576,7 +576,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_PRIME_NUMBERS=m CONFIG_CRC_BENCHMARK=y CONFIG_XZ_DEC_TEST=m -CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_TEST_LOCKUP=m @@ -585,7 +584,6 @@ CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m CONFIG_TEST_DHRY=m -CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_DIV64=m CONFIG_TEST_MULDIV64=m CONFIG_REED_SOLOMON_TEST=m @@ -594,7 +592,6 @@ CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_BITMAP=m -CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 37c747ee395e..0b63787cff0d 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -548,7 +548,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_PRIME_NUMBERS=m CONFIG_CRC_BENCHMARK=y CONFIG_XZ_DEC_TEST=m -CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_TEST_LOCKUP=m @@ -557,7 +556,6 @@ CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m CONFIG_TEST_DHRY=m -CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_DIV64=m CONFIG_TEST_MULDIV64=m CONFIG_REED_SOLOMON_TEST=m @@ -566,7 +564,6 @@ CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_BITMAP=m -CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 1a376c2b8c45..308836b60bba 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -558,7 +558,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_PRIME_NUMBERS=m CONFIG_CRC_BENCHMARK=y CONFIG_XZ_DEC_TEST=m -CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_TEST_LOCKUP=m @@ -567,7 +566,6 @@ CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m CONFIG_TEST_DHRY=m -CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_DIV64=m CONFIG_TEST_MULDIV64=m CONFIG_REED_SOLOMON_TEST=m @@ -576,7 +574,6 @@ CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_BITMAP=m -CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 2b26450692a5..97e108c0d24f 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -575,7 +575,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_PRIME_NUMBERS=m CONFIG_CRC_BENCHMARK=y CONFIG_XZ_DEC_TEST=m -CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_TEST_LOCKUP=m @@ -584,7 +583,6 @@ CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m CONFIG_TEST_DHRY=m -CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_DIV64=m CONFIG_TEST_MULDIV64=m CONFIG_REED_SOLOMON_TEST=m @@ -593,7 +591,6 @@ CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_BITMAP=m -CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 012e0e1f506f..7e9f83af9af4 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -662,7 +662,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_PRIME_NUMBERS=m CONFIG_CRC_BENCHMARK=y CONFIG_XZ_DEC_TEST=m -CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_TEST_LOCKUP=m @@ -671,7 +670,6 @@ CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m CONFIG_TEST_DHRY=m -CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_DIV64=m CONFIG_TEST_MULDIV64=m CONFIG_REED_SOLOMON_TEST=m @@ -680,7 +678,6 @@ CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_BITMAP=m -CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 37634b35bfbd..2fe33271d249 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -548,7 +548,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_PRIME_NUMBERS=m CONFIG_CRC_BENCHMARK=y CONFIG_XZ_DEC_TEST=m -CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_TEST_LOCKUP=m @@ -557,7 +556,6 @@ CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m CONFIG_TEST_DHRY=m -CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_DIV64=m CONFIG_TEST_MULDIV64=m CONFIG_REED_SOLOMON_TEST=m @@ -566,7 +564,6 @@ CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_BITMAP=m -CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index a0d2e0070afa..4308daaa7f74 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -549,7 +549,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_PRIME_NUMBERS=m CONFIG_CRC_BENCHMARK=y CONFIG_XZ_DEC_TEST=m -CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_TEST_LOCKUP=m @@ -558,7 +557,6 @@ CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m CONFIG_TEST_DHRY=m -CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_DIV64=m CONFIG_TEST_MULDIV64=m CONFIG_REED_SOLOMON_TEST=m @@ -567,7 +565,6 @@ CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_BITMAP=m -CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 62cc3964fc34..36eb29ec54ee 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -565,7 +565,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_PRIME_NUMBERS=m CONFIG_CRC_BENCHMARK=y CONFIG_XZ_DEC_TEST=m -CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_TEST_LOCKUP=m @@ -574,7 +573,6 @@ CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m CONFIG_TEST_DHRY=m -CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_DIV64=m CONFIG_TEST_MULDIV64=m CONFIG_REED_SOLOMON_TEST=m @@ -583,7 +581,6 @@ CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_BITMAP=m -CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 13107aa4a1b4..524a89fa6953 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -546,7 +546,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_PRIME_NUMBERS=m CONFIG_CRC_BENCHMARK=y CONFIG_XZ_DEC_TEST=m -CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_TEST_LOCKUP=m @@ -554,7 +553,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m CONFIG_TEST_DHRY=m -CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_DIV64=m CONFIG_TEST_MULDIV64=m CONFIG_REED_SOLOMON_TEST=m @@ -563,7 +561,6 @@ CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_BITMAP=m -CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index eaab0ba08989..f4fbc65c52d9 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -546,7 +546,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_PRIME_NUMBERS=m CONFIG_CRC_BENCHMARK=y CONFIG_XZ_DEC_TEST=m -CONFIG_GLOB_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_TEST_LOCKUP=m @@ -555,7 +554,6 @@ CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m CONFIG_TEST_DHRY=m -CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_DIV64=m CONFIG_TEST_MULDIV64=m CONFIG_REED_SOLOMON_TEST=m @@ -564,7 +562,6 @@ CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_BITMAP=m -CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 7622aad0f0b3..f9b228e33f3b 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c index b7f6f782d9a1..8ecb56be81ac 100644 --- a/arch/mips/rb532/devices.c +++ b/arch/mips/rb532/devices.c @@ -7,6 +7,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/openrisc/configs/or1klitex_defconfig b/arch/openrisc/configs/or1klitex_defconfig index fb1eb9a68bd6..984b0e3b2768 100644 --- a/arch/openrisc/configs/or1klitex_defconfig +++ b/arch/openrisc/configs/or1klitex_defconfig @@ -52,5 +52,5 @@ CONFIG_LSM="lockdown,yama,loadpin,safesetid,integrity,bpf" CONFIG_PRINTK_TIME=y CONFIG_PANIC_ON_OOPS=y CONFIG_SOFTLOCKUP_DETECTOR=y -CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1 CONFIG_BUG_ON_DATA_CORRUPTION=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index f0d9b55cb26b..2b0720f2753b 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -425,7 +425,6 @@ CONFIG_BOOTX_TEXT=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m CONFIG_LKDTM=m -CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_DIV64=m CONFIG_BACKTRACE_SELF_TEST=m CONFIG_TEST_REF_TRACKER=m @@ -442,7 +441,6 @@ CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m -CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index f3d17a02659f..86c74146824a 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -288,7 +288,7 @@ CONFIG_SCHED_STACK_END_CHECK=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_PANIC_ON_OOPS=y CONFIG_SOFTLOCKUP_DETECTOR=y -CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1 CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y CONFIG_WQ_WATCHDOG=y diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index ca00c4824e31..b23dddfce26d 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -6,6 +6,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 987cd9fb0f37..b75dd53584d5 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -1194,7 +1194,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t, bpf_func = (unsigned long)ip; /* We currently only support poking bpf programs */ - if (!__bpf_address_lookup(bpf_func, &size, &offset, name)) { + if (!bpf_address_lookup(bpf_func, &size, &offset, name)) { pr_err("%s (0x%lx): kernel/modules are not supported\n", __func__, bpf_func); return -EOPNOTSUPP; } diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 35413999d258..98fd0a2f51c6 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -921,7 +921,7 @@ CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FAULT_INJECTION_CONFIGFS=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LKDTM=m -CONFIG_TEST_MIN_HEAP=y +CONFIG_MIN_HEAP_KUNIT_TEST=m CONFIG_KPROBES_SANITY_TEST=m CONFIG_RBTREE_TEST=y CONFIG_INTERVAL_TREE_TEST=m diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 3affba95845b..cc187afa07b3 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index 90c0e6408992..02d04ae621ba 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -4,6 +4,7 @@ #define pr_fmt(fmt) "alt: " fmt #endif +#include #include #include #include diff --git a/arch/s390/kernel/stackprotector.c b/arch/s390/kernel/stackprotector.c index d4e40483f008..8bd3ecf9200a 100644 --- a/arch/s390/kernel/stackprotector.c +++ b/arch/s390/kernel/stackprotector.c @@ -5,6 +5,7 @@ #endif #include +#include #include #include #include diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 25d9258fa592..28cfe1c700f0 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 9bcae0c599af..eebcc9db1a1b 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -437,9 +437,15 @@ int __init ima_free_kexec_buffer(void) int __init ima_get_kexec_buffer(void **addr, size_t *size) { + int ret; + if (!ima_kexec_buffer_size) return -ENOENT; + ret = ima_validate_range(ima_kexec_buffer_phys, ima_kexec_buffer_size); + if (ret) + return ret; + *addr = __va(ima_kexec_buffer_phys); *size = ima_kexec_buffer_size; diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index f0a63b2f85cc..832579143891 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -13,6 +13,7 @@ #define pr_fmt(fmt) "%s: " fmt, __func__ +#include #include #include #include diff --git a/certs/blacklist.c b/certs/blacklist.c index 675dd7a8f07a..11fc858b2921 100644 --- a/certs/blacklist.c +++ b/certs/blacklist.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c index 2326743310b1..1f1d92547dfc 100644 --- a/crypto/asymmetric_keys/asymmetric_type.c +++ b/crypto/asymmetric_keys/asymmetric_type.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c index 27b4fea37845..b695e59fd9e4 100644 --- a/crypto/asymmetric_keys/x509_public_key.c +++ b/crypto/asymmetric_keys/x509_public_key.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/crypto/krb5/selftest.c b/crypto/krb5/selftest.c index 4519c572d37e..67c4accd8cbd 100644 --- a/crypto/krb5/selftest.c +++ b/crypto/krb5/selftest.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include diff --git a/drivers/android/binder.c b/drivers/android/binder.c index b356c9b88254..bd780d88b468 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -6028,7 +6028,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) { struct binder_proc *proc = filp->private_data; - if (proc->tsk != current->group_leader) + if (!same_thread_group(proc->tsk, current)) return -EINVAL; binder_debug(BINDER_DEBUG_OPEN_CLOSE, @@ -6059,7 +6059,7 @@ static int binder_open(struct inode *nodp, struct file *filp) bool existing_pid = false; binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__, - current->group_leader->pid, current->pid); + current->tgid, current->pid); proc = kzalloc(sizeof(*proc), GFP_KERNEL); if (proc == NULL) @@ -6068,8 +6068,8 @@ static int binder_open(struct inode *nodp, struct file *filp) dbitmap_init(&proc->dmap); spin_lock_init(&proc->inner_lock); spin_lock_init(&proc->outer_lock); - get_task_struct(current->group_leader); - proc->tsk = current->group_leader; + proc->tsk = get_task_struct(current->group_leader); + proc->pid = current->tgid; proc->cred = get_cred(filp->f_cred); INIT_LIST_HEAD(&proc->todo); init_waitqueue_head(&proc->freeze_wait); @@ -6088,7 +6088,6 @@ static int binder_open(struct inode *nodp, struct file *filp) binder_alloc_init(&proc->alloc); binder_stats_created(BINDER_STAT_PROC); - proc->pid = current->group_leader->pid; INIT_LIST_HEAD(&proc->delivered_death); INIT_LIST_HEAD(&proc->delivered_freeze); INIT_LIST_HEAD(&proc->waiting_threads); diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 979c96b74cad..145ed5f14cdb 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -1233,7 +1233,7 @@ static struct shrinker *binder_shrinker; VISIBLE_IF_KUNIT void __binder_alloc_init(struct binder_alloc *alloc, struct list_lru *freelist) { - alloc->pid = current->group_leader->pid; + alloc->pid = current->tgid; alloc->mm = current->mm; mmgrab(alloc->mm); mutex_init(&alloc->mutex); diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index 45952cfea06b..bc8dbba77b87 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/auxdisplay/hd44780_common.c b/drivers/auxdisplay/hd44780_common.c index 1792fe2a4460..b71db39f9249 100644 --- a/drivers/auxdisplay/hd44780_common.c +++ b/drivers/auxdisplay/hd44780_common.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include #include diff --git a/drivers/auxdisplay/lcd2s.c b/drivers/auxdisplay/lcd2s.c index 045dbef49dee..defb0573e43c 100644 --- a/drivers/auxdisplay/lcd2s.c +++ b/drivers/auxdisplay/lcd2s.c @@ -11,6 +11,7 @@ * Author: Lars Pöschel * All rights reserved. */ +#include #include #include #include diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index c28786e0fe1c..92e446a64371 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4802,8 +4802,6 @@ static void floppy_release_allocated_regions(int fdc, const struct io_region *p) } } -#define ARRAY_END(X) (&((X)[ARRAY_SIZE(X)])) - static int floppy_request_regions(int fdc) { const struct io_region *p; diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c index 7ce61d629a87..5a53bfab470a 100644 --- a/drivers/bus/moxtet.c +++ b/drivers/bus/moxtet.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 02c07fef41ba..87d68ddf270a 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -20,6 +20,7 @@ #include #include +#include #include #include #include diff --git a/drivers/comedi/drivers/jr3_pci.c b/drivers/comedi/drivers/jr3_pci.c index 61792d940a3d..51287cbc3e48 100644 --- a/drivers/comedi/drivers/jr3_pci.c +++ b/drivers/comedi/drivers/jr3_pci.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/firmware/broadcom/bcm47xx_sprom.c b/drivers/firmware/broadcom/bcm47xx_sprom.c index fdcd3a07abcd..bca03fd85808 100644 --- a/drivers/firmware/broadcom/bcm47xx_sprom.c +++ b/drivers/firmware/broadcom/bcm47xx_sprom.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include diff --git a/drivers/gpio/gpio-macsmc.c b/drivers/gpio/gpio-macsmc.c index 30ef258e7655..b0952d066a9d 100644 --- a/drivers/gpio/gpio-macsmc.c +++ b/drivers/gpio/gpio-macsmc.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 00ea69baa126..4ce93536eeda 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1423,7 +1423,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, goto create_evict_fence_fail; } - info->pid = get_task_pid(current->group_leader, PIDTYPE_PID); + info->pid = get_task_pid(current, PIDTYPE_TGID); INIT_DELAYED_WORK(&info->restore_userptr_work, amdgpu_amdkfd_restore_userptr_worker); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6a2ea200d90c..e549accf96ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2571,10 +2571,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) vm->task_info->task.pid = current->pid; get_task_comm(vm->task_info->task.comm, current); - if (current->group_leader->mm != current->mm) - return; - - vm->task_info->tgid = current->group_leader->pid; + vm->task_info->tgid = current->tgid; get_task_comm(vm->task_info->process_name, current->group_leader); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index ba25d83c23e7..219d08f092db 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -928,12 +928,6 @@ struct kfd_process *kfd_create_process(struct task_struct *thread) if (!(thread->mm && mmget_not_zero(thread->mm))) return ERR_PTR(-EINVAL); - /* Only the pthreads threading model is supported. */ - if (thread->group_leader->mm != thread->mm) { - mmput(thread->mm); - return ERR_PTR(-EINVAL); - } - /* If the process just called exec(3), it is possible that the * cleanup of the kfd_process (following the release of the mm * of the old process image) is still in the cleanup work queue. diff --git a/drivers/gpu/drm/ci/arm.config b/drivers/gpu/drm/ci/arm.config index 411e814819a8..d7c51670da2f 100644 --- a/drivers/gpu/drm/ci/arm.config +++ b/drivers/gpu/drm/ci/arm.config @@ -52,7 +52,7 @@ CONFIG_TMPFS=y CONFIG_PROVE_LOCKING=n CONFIG_DEBUG_LOCKDEP=n CONFIG_SOFTLOCKUP_DETECTOR=n -CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=n +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=0 CONFIG_FW_LOADER_COMPRESS=y diff --git a/drivers/gpu/drm/ci/arm64.config b/drivers/gpu/drm/ci/arm64.config index fddfbd4d2493..ea0e30737c4d 100644 --- a/drivers/gpu/drm/ci/arm64.config +++ b/drivers/gpu/drm/ci/arm64.config @@ -161,7 +161,7 @@ CONFIG_TMPFS=y CONFIG_PROVE_LOCKING=n CONFIG_DEBUG_LOCKDEP=n CONFIG_SOFTLOCKUP_DETECTOR=y -CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1 CONFIG_DETECT_HUNG_TASK=y diff --git a/drivers/gpu/drm/ci/x86_64.config b/drivers/gpu/drm/ci/x86_64.config index 8eaba388b141..7ac98a78691e 100644 --- a/drivers/gpu/drm/ci/x86_64.config +++ b/drivers/gpu/drm/ci/x86_64.config @@ -47,7 +47,7 @@ CONFIG_TMPFS=y CONFIG_PROVE_LOCKING=n CONFIG_DEBUG_LOCKDEP=n CONFIG_SOFTLOCKUP_DETECTOR=y -CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1 CONFIG_DETECT_HUNG_TASK=y diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c index 87ceb0f374b6..600333ae6c8c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c @@ -3,6 +3,7 @@ * Copyright © 2020 Intel Corporation */ +#include "i915_selftest.h" #include "intel_engine_pm.h" #include "selftests/igt_flush_test.h" diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index bdf3e22c0a34..72922028f4ba 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -26,6 +26,8 @@ #include +#define STACK_MAGIC 0xdeadbeef + struct pci_dev; struct drm_i915_private; diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index 44985b515212..47ac8386aabc 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -35,7 +35,7 @@ void panfrost_gem_init(struct panfrost_device *pfdev) static void panfrost_gem_debugfs_bo_add(struct panfrost_device *pfdev, struct panfrost_gem_object *bo) { - bo->debugfs.creator.tgid = current->group_leader->pid; + bo->debugfs.creator.tgid = current->tgid; get_task_comm(bo->debugfs.creator.process_name, current->group_leader); mutex_lock(&pfdev->debugfs.gems_lock); diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index b61908fd508a..2c215efb5320 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -45,7 +45,7 @@ static void panthor_gem_debugfs_bo_add(struct panthor_gem_object *bo) struct panthor_device *ptdev = container_of(bo->base.base.dev, struct panthor_device, base); - bo->debugfs.creator.tgid = current->group_leader->pid; + bo->debugfs.creator.tgid = current->tgid; get_task_comm(bo->debugfs.creator.process_name, current->group_leader); mutex_lock(&ptdev->gems.lock); diff --git a/drivers/hid/hid-picolcd_debugfs.c b/drivers/hid/hid-picolcd_debugfs.c index d01176da8896..085847a92e07 100644 --- a/drivers/hid/hid-picolcd_debugfs.c +++ b/drivers/hid/hid-picolcd_debugfs.c @@ -11,6 +11,7 @@ #include #include +#include #include #include diff --git a/drivers/hwmon/pmbus/q54sj108a2.c b/drivers/hwmon/pmbus/q54sj108a2.c index 4d7086d83aa3..fc030ca34480 100644 --- a/drivers/hwmon/pmbus/q54sj108a2.c +++ b/drivers/hwmon/pmbus/q54sj108a2.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c index 55e7af3a5f98..9b5d34a110ba 100644 --- a/drivers/hwmon/pmbus/ucd9000.c +++ b/drivers/hwmon/pmbus/ucd9000.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 572a91a62a7b..32267258a19c 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -149,7 +149,7 @@ struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_device *device, umem->owning_mm = current->mm; umem_odp->page_shift = PAGE_SHIFT; - umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); + umem_odp->tgid = get_task_pid(current, PIDTYPE_TGID); ib_init_umem_implicit_odp(umem_odp); return umem_odp; } @@ -258,7 +258,7 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device, umem_odp->page_shift = HPAGE_SHIFT; #endif - umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); + umem_odp->tgid = get_task_pid(current, PIDTYPE_TGID); ret = ib_init_umem_odp(umem_odp, ops); if (ret) goto err_put_pid; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 23ed2fc688f0..2012ba22a7af 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -33,6 +33,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 71269446353d..e314e6a84d96 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -33,6 +33,7 @@ */ #include +#include #include #include #include diff --git a/drivers/input/touchscreen/iqs5xx.c b/drivers/input/touchscreen/iqs5xx.c index 4ebd7565ae6e..c63819abaf9b 100644 --- a/drivers/input/touchscreen/iqs5xx.c +++ b/drivers/input/touchscreen/iqs5xx.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 0e479de75ad0..cd279a4f335d 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index a9c0157bf42f..681b00958d42 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 631ccc6a2bb7..8089cb74b75d 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -17,6 +17,7 @@ #include "dm-verity-fec.h" #include "dm-verity-verify-sig.h" #include "dm-audit.h" +#include #include #include #include diff --git a/drivers/media/cec/usb/extron-da-hd-4k-plus/extron-da-hd-4k-plus.c b/drivers/media/cec/usb/extron-da-hd-4k-plus/extron-da-hd-4k-plus.c index e2eff17952ab..bf92576bb2fc 100644 --- a/drivers/media/cec/usb/extron-da-hd-4k-plus/extron-da-hd-4k-plus.c +++ b/drivers/media/cec/usb/extron-da-hd-4k-plus/extron-da-hd-4k-plus.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/cec/usb/rainshadow/rainshadow-cec.c b/drivers/media/cec/usb/rainshadow/rainshadow-cec.c index 08f58456d682..6c0cee4b066f 100644 --- a/drivers/media/cec/usb/rainshadow/rainshadow-cec.c +++ b/drivers/media/cec/usb/rainshadow/rainshadow-cec.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/i2c/ccs/ccs-reg-access.c b/drivers/media/i2c/ccs/ccs-reg-access.c index fd36889ccc1d..a0181a5d2f34 100644 --- a/drivers/media/i2c/ccs/ccs-reg-access.c +++ b/drivers/media/i2c/ccs/ccs-reg-access.c @@ -12,6 +12,7 @@ #include #include +#include #include #include "ccs.h" diff --git a/drivers/media/usb/pvrusb2/pvrusb2-debugifc.c b/drivers/media/usb/pvrusb2/pvrusb2-debugifc.c index 81d711269ab5..9f936085acbb 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-debugifc.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-debugifc.c @@ -4,6 +4,7 @@ * Copyright (C) 2005 Mike Isely */ +#include #include #include "pvrusb2-debugifc.h" #include "pvrusb2-hdw.h" diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index 0cf31164b470..3b7a041ea351 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/misc/pch_phub.c b/drivers/misc/pch_phub.c index 7bee179841bc..0d63e834dbe7 100644 --- a/drivers/misc/pch_phub.c +++ b/drivers/misc/pch_phub.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index f1c6e9d8f616..dcee384c2f06 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include diff --git a/drivers/net/can/can327.c b/drivers/net/can/can327.c index b66fc16aedd2..90f5e35f3c8f 100644 --- a/drivers/net/can/can327.c +++ b/drivers/net/can/can327.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/can/slcan/slcan-core.c b/drivers/net/can/slcan/slcan-core.c index cd789e178d34..7439849d5c84 100644 --- a/drivers/net/can/slcan/slcan-core.c +++ b/drivers/net/can/slcan/slcan-core.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/ethernet/chelsio/cxgb3/common.h b/drivers/net/ethernet/chelsio/cxgb3/common.h index ecd025dda8d6..14000977730c 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/common.h +++ b/drivers/net/ethernet/chelsio/cxgb3/common.h @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c index 030a5776c937..8803fa071c50 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c @@ -2,6 +2,7 @@ // Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. #include +#include #include #include #include diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index 60223f03482d..491e9ce3d9b0 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index fe58024b5901..00909372ea61 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 0f44ce5ccc0a..d144787b2947 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index dfd571b22107..f7b32446d3b8 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index adfc83b7ca6a..8d36162f36df 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 41b95b04143d..8e44feaf7ff1 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -18,6 +18,7 @@ */ #include +#include #include #include #include diff --git a/drivers/net/wireless/ath/ath6kl/debug.c b/drivers/net/wireless/ath/ath6kl/debug.c index b837d31416df..84403aab21c0 100644 --- a/drivers/net/wireless/ath/ath6kl/debug.c +++ b/drivers/net/wireless/ath/ath6kl/debug.c @@ -19,6 +19,7 @@ #include #include +#include #include #include diff --git a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c index 3b0e8c43ba4a..3c4bee85b825 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c @@ -7,6 +7,7 @@ #include "api/commands.h" #include "debugfs.h" #include "dbg.h" +#include #include #define FWRT_DEBUGFS_OPEN_WRAPPER(name, buflen, argtype) \ diff --git a/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c index 5c2a2033b3fd..b05b58eb1281 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c @@ -24,6 +24,7 @@ #include "fw/api/rfi.h" #include "fw/dhc-utils.h" #include +#include #define MLD_DEBUGFS_READ_FILE_OPS(name, bufsz) \ _MLD_DEBUGFS_READ_FILE_OPS(name, bufsz, struct iwl_mld) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index 683c0ba5fb39..e6b9896dc4ac 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -6,6 +6,7 @@ */ #include #include +#include #include #include #include diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index db5f9804b529..46a9dfa58a53 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #ifdef CONFIG_THERMAL diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h index e16865dd8e52..c93fd245c90f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h @@ -5,6 +5,7 @@ #define __MT7615_H #include +#include #include #include #include diff --git a/drivers/net/wireless/realtek/rtw89/debug.c b/drivers/net/wireless/realtek/rtw89/debug.c index d46691fa09bc..06a9504d2bad 100644 --- a/drivers/net/wireless/realtek/rtw89/debug.c +++ b/drivers/net/wireless/realtek/rtw89/debug.c @@ -2,6 +2,7 @@ /* Copyright(c) 2019-2020 Realtek Corporation */ +#include #include #include "coex.h" diff --git a/drivers/net/wireless/silabs/wfx/fwio.c b/drivers/net/wireless/silabs/wfx/fwio.c index 52c7f560b062..edd5ac30ed19 100644 --- a/drivers/net/wireless/silabs/wfx/fwio.c +++ b/drivers/net/wireless/silabs/wfx/fwio.c @@ -6,6 +6,7 @@ * Copyright (c) 2010, ST-Ericsson */ #include +#include #include #include #include diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index e44ef69dffc2..127dae51fec1 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -4,6 +4,7 @@ * Copyright (c) 2015-2016 HGST, a Western Digital Company. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index cc88e5a28c8a..eab3e4fc0f74 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -4,6 +4,7 @@ * Copyright (c) 2015-2016 HGST, a Western Digital Company. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include diff --git a/drivers/nvmem/brcm_nvram.c b/drivers/nvmem/brcm_nvram.c index b4cf245fb246..2dce6a7b8039 100644 --- a/drivers/nvmem/brcm_nvram.c +++ b/drivers/nvmem/brcm_nvram.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/drivers/nvmem/layouts/u-boot-env.c b/drivers/nvmem/layouts/u-boot-env.c index ab32bf1291af..f27f387bb52a 100644 --- a/drivers/nvmem/layouts/u-boot-env.c +++ b/drivers/nvmem/layouts/u-boot-env.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c index 1ee2d31816ae..c4cf3552c018 100644 --- a/drivers/of/kexec.c +++ b/drivers/of/kexec.c @@ -128,7 +128,6 @@ int __init ima_get_kexec_buffer(void **addr, size_t *size) { int ret, len; unsigned long tmp_addr; - unsigned long start_pfn, end_pfn; size_t tmp_size; const void *prop; @@ -144,17 +143,9 @@ int __init ima_get_kexec_buffer(void **addr, size_t *size) if (!tmp_size) return -ENOENT; - /* - * Calculate the PFNs for the buffer and ensure - * they are with in addressable memory. - */ - start_pfn = PHYS_PFN(tmp_addr); - end_pfn = PHYS_PFN(tmp_addr + tmp_size - 1); - if (!page_is_ram(start_pfn) || !page_is_ram(end_pfn)) { - pr_warn("IMA buffer at 0x%lx, size = 0x%zx beyond memory\n", - tmp_addr, tmp_size); - return -EINVAL; - } + ret = ima_validate_range(tmp_addr, tmp_size); + if (ret) + return ret; *addr = __va(tmp_addr); *size = tmp_size; diff --git a/drivers/platform/x86/intel/wmi/thunderbolt.c b/drivers/platform/x86/intel/wmi/thunderbolt.c index 08df560a2c7a..15e5763a20dd 100644 --- a/drivers/platform/x86/intel/wmi/thunderbolt.c +++ b/drivers/platform/x86/intel/wmi/thunderbolt.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/pnp/support.c b/drivers/pnp/support.c index a6073db10ec6..f6c866851769 100644 --- a/drivers/pnp/support.c +++ b/drivers/pnp/support.c @@ -9,6 +9,7 @@ #include #include +#include #include #include "base.h" diff --git a/drivers/ptp/ptp_pch.c b/drivers/ptp/ptp_pch.c index b8a9a54a176c..f854da2fd812 100644 --- a/drivers/ptp/ptp_pch.c +++ b/drivers/ptp/ptp_pch.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index c12941f71e2c..dcd6619a4b02 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -854,7 +854,8 @@ static struct rio_net *rio_scan_alloc_net(struct rio_mport *mport, if (idtab == NULL) { pr_err("RIO: failed to allocate destID table\n"); - rio_free_net(net); + kfree(net); + mport->net = NULL; net = NULL; } else { net->enum_data = idtab; diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c index 738d5e2d5304..020d210bde9f 100644 --- a/drivers/s390/cio/blacklist.c +++ b/drivers/s390/cio/blacklist.c @@ -10,6 +10,7 @@ #define pr_fmt(fmt) "cio: " fmt +#include #include #include #include diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 19cd27e9a3f3..d652df96a507 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index 6ba7fbddd3f7..e9a984903bff 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 4904b831c0a7..1653cc668dcf 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include diff --git a/drivers/scsi/aacraid/rx.c b/drivers/scsi/aacraid/rx.c index e06ff83b69ce..ba9f3256c258 100644 --- a/drivers/scsi/aacraid/rx.c +++ b/drivers/scsi/aacraid/rx.c @@ -17,6 +17,7 @@ */ #include +#include #include #include #include diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c index 3393a288fd23..40af961382dc 100644 --- a/drivers/scsi/ips.c +++ b/drivers/scsi/ips.c @@ -167,6 +167,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index ffa5b49aaf08..da02457f0b09 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "sas_internal.h" diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 2fff68935338..9f16164faa1e 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 987befb02408..6bd68f493f20 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/staging/rtl8723bs/core/rtw_ieee80211.c b/drivers/staging/rtl8723bs/core/rtw_ieee80211.c index 8fdeeda88a6d..e89b24fa5e05 100644 --- a/drivers/staging/rtl8723bs/core/rtw_ieee80211.c +++ b/drivers/staging/rtl8723bs/core/rtw_ieee80211.c @@ -6,6 +6,7 @@ ******************************************************************************/ #include +#include #include #include diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index c8a248bd11be..2c4d583fe3e6 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c index ec7bc6e30228..87c5d26a5089 100644 --- a/drivers/target/target_core_fabric_lib.c +++ b/drivers/target/target_core_fabric_lib.c @@ -16,6 +16,7 @@ * on the formats implemented in this file. */ +#include #include #include #include diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index fe2b888bcb43..6360b66c7445 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -7,6 +7,7 @@ * Nicholas A. Bellinger */ +#include #include #include #include diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index f686d95d3273..a29b20b5f78e 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index b3948aad0b95..e2732c575bad 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index e987d260e346..84de274d24ca 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -79,6 +79,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 604043a7533d..31950fc51a4c 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/usb/atm/speedtch.c b/drivers/usb/atm/speedtch.c index 773ac2725532..e6b610a87482 100644 --- a/drivers/usb/atm/speedtch.c +++ b/drivers/usb/atm/speedtch.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c index cd0f7b4bd82a..78a2585f33ec 100644 --- a/drivers/usb/atm/ueagle-atm.c +++ b/drivers/usb/atm/ueagle-atm.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index f58590bf5e02..c47965d850d4 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c index a4a2d3dcb0d6..5a87516ddb31 100644 --- a/drivers/usb/gadget/function/uvc_configfs.c +++ b/drivers/usb/gadget/function/uvc_configfs.c @@ -12,6 +12,7 @@ #include "uvc_configfs.h" +#include #include #include #include diff --git a/drivers/usb/typec/ucsi/debugfs.c b/drivers/usb/typec/ucsi/debugfs.c index f3684ab787fe..d1f5832165c3 100644 --- a/drivers/usb/typec/ucsi/debugfs.c +++ b/drivers/usb/typec/ucsi/debugfs.c @@ -8,6 +8,7 @@ * Gopal Saranya */ #include +#include #include #include #include diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index d83a0051c737..199799b319c2 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index ae30e394d176..2a848c35c14d 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -12,6 +12,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index 77fbd196008f..4f9dc276da6f 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -6,6 +6,7 @@ * * Common directory handling for ADFS */ +#include #include #include "adfs.h" diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 8cb1a94339b8..2b772613a74c 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index f58b12be8267..509293745ce9 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/efivarfs/vars.c b/fs/efivarfs/vars.c index 70e13db260db..6833c3d24b54 100644 --- a/fs/efivarfs/vars.c +++ b/fs/efivarfs/vars.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 630f3056658e..1b87354e24ba 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -54,7 +54,7 @@ void fat_cache_destroy(void) kmem_cache_destroy(fat_cache_cachep); } -static inline struct fat_cache *fat_cache_alloc(struct inode *inode) +static inline struct fat_cache *fat_cache_alloc(void) { return kmem_cache_alloc(fat_cache_cachep, GFP_NOFS); } @@ -144,7 +144,7 @@ static void fat_cache_add(struct inode *inode, struct fat_cache_id *new) MSDOS_I(inode)->nr_caches++; spin_unlock(&MSDOS_I(inode)->cache_lru_lock); - tmp = fat_cache_alloc(inode); + tmp = fat_cache_alloc(); if (!tmp) { spin_lock(&MSDOS_I(inode)->cache_lru_lock); MSDOS_I(inode)->nr_caches--; diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 07d95f1442c8..4b8b25f688e4 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include "fat.h" diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index ba0152ed0810..048c103b506a 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -325,7 +325,12 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) err = fat_remove_entries(dir, &sinfo); /* and releases bh */ if (err) goto out; - drop_nlink(dir); + if (dir->i_nlink >= 3) + drop_nlink(dir); + else { + fat_fs_error(sb, "parent dir link count too low (%u)", + dir->i_nlink); + } clear_nlink(inode); fat_detach(inode); diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index e46f34cade1a..2acfe3123a72 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include "fat.h" @@ -803,7 +804,12 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) err = fat_remove_entries(dir, &sinfo); /* and releases bh */ if (err) goto out; - drop_nlink(dir); + if (dir->i_nlink >= 3) + drop_nlink(dir); + else { + fat_fs_error(sb, "parent dir link count too low (%u)", + dir->i_nlink); + } clear_nlink(inode); fat_truncate_time(inode, NULL, FAT_UPDATE_ATIME | FAT_UPDATE_CMTIME); diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 53b8419ee15f..7828ad0b6f5a 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 441dfbfe2d2b..1e6b2dd47ba7 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index a4559c9f64e6..f18349689458 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 58bf58b68955..b7db177d17d6 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -1812,14 +1812,15 @@ static int __ocfs2_find_path(struct ocfs2_caching_info *ci, ret = -EROFS; goto out; } - if (le16_to_cpu(el->l_next_free_rec) == 0) { + if (!el->l_next_free_rec || !el->l_count) { ocfs2_error(ocfs2_metadata_cache_get_super(ci), - "Owner %llu has empty extent list at depth %u\n", + "Owner %llu has empty extent list at depth %u\n" + "(next free=%u count=%u)\n", (unsigned long long)ocfs2_metadata_cache_owner(ci), - le16_to_cpu(el->l_tree_depth)); + le16_to_cpu(el->l_tree_depth), + le16_to_cpu(el->l_next_free_rec), le16_to_cpu(el->l_count)); ret = -EROFS; goto out; - } for(i = 0; i < le16_to_cpu(el->l_next_free_rec) - 1; i++) { diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 724350925aff..8e9cbc334cf4 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1942,7 +1942,7 @@ static struct configfs_attribute *o2hb_region_attrs[] = { NULL, }; -static struct configfs_item_operations o2hb_region_item_ops = { +static const struct configfs_item_operations o2hb_region_item_ops = { .release = o2hb_region_release, }; @@ -2193,7 +2193,7 @@ static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { NULL, }; -static struct configfs_group_operations o2hb_heartbeat_group_group_ops = { +static const struct configfs_group_operations o2hb_heartbeat_group_group_ops = { .make_item = o2hb_heartbeat_group_make_item, .drop_item = o2hb_heartbeat_group_drop_item, }; diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index 6bc4e064ace4..c5e83c774d73 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -396,7 +396,7 @@ static struct configfs_attribute *o2nm_node_attrs[] = { NULL, }; -static struct configfs_item_operations o2nm_node_item_ops = { +static const struct configfs_item_operations o2nm_node_item_ops = { .release = o2nm_node_release, }; @@ -638,7 +638,7 @@ static void o2nm_node_group_drop_item(struct config_group *group, config_item_put(item); } -static struct configfs_group_operations o2nm_node_group_group_ops = { +static const struct configfs_group_operations o2nm_node_group_group_ops = { .make_item = o2nm_node_group_make_item, .drop_item = o2nm_node_group_drop_item, }; @@ -657,7 +657,7 @@ static void o2nm_cluster_release(struct config_item *item) kfree(cluster); } -static struct configfs_item_operations o2nm_cluster_item_ops = { +static const struct configfs_item_operations o2nm_cluster_item_ops = { .release = o2nm_cluster_release, }; @@ -741,7 +741,7 @@ static void o2nm_cluster_group_drop_item(struct config_group *group, struct conf config_item_put(item); } -static struct configfs_group_operations o2nm_cluster_group_group_ops = { +static const struct configfs_group_operations o2nm_cluster_group_group_ops = { .make_group = o2nm_cluster_group_make_group, .drop_item = o2nm_cluster_group_drop_item, }; diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 2347a50f079b..cf3ca2f597c2 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1105,7 +1105,7 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node, qr->qr_domain); - /* buffer used in dlm_mast_regions() */ + /* buffer used in dlm_match_regions() */ local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL); if (!local) return -ENOMEM; diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index b95724b767e1..9c2665dd24e2 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -74,8 +74,9 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, * nice */ status = -ESTALE; - } else + } else if (status != -ESTALE) { mlog(ML_ERROR, "test inode bit failed %d\n", status); + } goto unlock_nfs_sync; } @@ -162,8 +163,9 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) if (status < 0) { if (status == -EINVAL) { status = -ESTALE; - } else + } else if (status != -ESTALE) { mlog(ML_ERROR, "test inode bit failed %d\n", status); + } parent = ERR_PTR(status); goto bail_unlock; } diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index b5fcc2725a29..03a51662ea8e 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -1494,12 +1494,25 @@ int ocfs2_validate_inode_block(struct super_block *sb, goto bail; } - if ((le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) && - le32_to_cpu(di->i_clusters)) { - rc = ocfs2_error(sb, "Invalid dinode %llu: %u clusters\n", - (unsigned long long)bh->b_blocknr, - le32_to_cpu(di->i_clusters)); - goto bail; + if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) { + struct ocfs2_inline_data *data = &di->id2.i_data; + + if (le32_to_cpu(di->i_clusters)) { + rc = ocfs2_error(sb, + "Invalid dinode %llu: %u clusters\n", + (unsigned long long)bh->b_blocknr, + le32_to_cpu(di->i_clusters)); + goto bail; + } + + if (le64_to_cpu(di->i_size) > le16_to_cpu(data->id_count)) { + rc = ocfs2_error(sb, + "Invalid dinode #%llu: inline data i_size %llu exceeds id_count %u\n", + (unsigned long long)bh->b_blocknr, + (unsigned long long)le64_to_cpu(di->i_size), + le16_to_cpu(data->id_count)); + goto bail; + } } if (le32_to_cpu(di->i_flags) & OCFS2_CHAIN_FL) { @@ -1529,6 +1542,13 @@ int ocfs2_validate_inode_block(struct super_block *sb, } } + if ((le16_to_cpu(di->i_dyn_features) & OCFS2_HAS_REFCOUNT_FL) && + !di->i_refcount_loc) { + rc = ocfs2_error(sb, "Inode #%llu has refcount flag but no i_refcount_loc\n", + (unsigned long long)bh->b_blocknr); + goto bail; + } + rc = 0; bail: diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index d1aa04a5af1b..56be21c695d6 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -905,13 +905,11 @@ bail: static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc) { struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); - int i; alloc->id1.bitmap1.i_total = 0; alloc->id1.bitmap1.i_used = 0; la->la_bm_off = 0; - for(i = 0; i < le16_to_cpu(la->la_size); i++) - la->la_bitmap[i] = 0; + memset(la->la_bitmap, 0, le16_to_cpu(la->la_size)); } #if 0 diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 99637e34d9da..e3cdf8788484 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -662,6 +662,12 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, goto out_commit; } + gd = (struct ocfs2_group_desc *)gd_bh->b_data; + if (le16_to_cpu(gd->bg_free_bits_count) < len) { + ret = -ENOSPC; + goto out_commit; + } + /* * probe the victim cluster group to find a proper * region to fit wanted movement, it even will perform @@ -682,7 +688,6 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, goto out_commit; } - gd = (struct ocfs2_group_desc *)gd_bh->b_data; ret = ocfs2_alloc_dinode_update_counts(gb_inode, handle, gb_bh, len, le16_to_cpu(gd->bg_chain)); if (ret) { diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index f7763da5c4a2..c501eb3cdcda 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -641,7 +641,7 @@ struct ocfs2_local_alloc __le16 la_size; /* Size of included bitmap, in bytes */ __le16 la_reserved1; __le64 la_reserved2; -/*10*/ __u8 la_bitmap[]; +/*10*/ __u8 la_bitmap[] __counted_by_le(la_size); }; /* @@ -654,7 +654,7 @@ struct ocfs2_inline_data * for data, starting at id_data */ __le16 id_reserved0; __le32 id_reserved1; - __u8 id_data[]; /* Start of user data */ + __u8 id_data[] __counted_by_le(id_count); /* Start of user data */ }; /* diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index e544c704b583..ea4a68abc25b 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -44,6 +44,9 @@ struct ocfs2_slot_info { static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, unsigned int node_num); +static int ocfs2_validate_slot_map_block(struct super_block *sb, + struct buffer_head *bh); + static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si, int slot_num) { @@ -132,7 +135,8 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb) * this is not true, the read of -1 (UINT64_MAX) will fail. */ ret = ocfs2_read_blocks(INODE_CACHE(si->si_inode), -1, si->si_blocks, - si->si_bh, OCFS2_BH_IGNORE_CACHE, NULL); + si->si_bh, OCFS2_BH_IGNORE_CACHE, + ocfs2_validate_slot_map_block); if (ret == 0) { spin_lock(&osb->osb_lock); ocfs2_update_slot_info(si); @@ -332,6 +336,24 @@ int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num) return ocfs2_update_disk_slot(osb, osb->slot_info, slot_num); } +static int ocfs2_validate_slot_map_block(struct super_block *sb, + struct buffer_head *bh) +{ + int rc; + + BUG_ON(!buffer_uptodate(bh)); + + if (bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO) { + rc = ocfs2_error(sb, + "Invalid Slot Map Buffer Head " + "Block Number : %llu, Should be >= %d", + (unsigned long long)bh->b_blocknr, + OCFS2_SUPER_BLOCK_BLKNO); + return rc; + } + return 0; +} + static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, struct ocfs2_slot_info *si) { @@ -383,7 +405,8 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, bh = NULL; /* Acquire a fresh bh */ status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno, - 1, &bh, OCFS2_BH_IGNORE_CACHE, NULL); + 1, &bh, OCFS2_BH_IGNORE_CACHE, + ocfs2_validate_slot_map_block); if (status < 0) { mlog_errno(status); goto bail; diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 8e6e5235b30c..79d1325b2111 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -295,6 +295,74 @@ static int ocfs2_validate_group_descriptor(struct super_block *sb, return ocfs2_validate_gd_self(sb, bh, 0); } +/* + * The hint group descriptor (gd) may already have been released + * in _ocfs2_free_suballoc_bits(). We first check the gd signature, + * then perform the standard ocfs2_read_group_descriptor() jobs. + * + * If the gd signature is invalid, we return 'rc=0' and set + * '*released=1'. The caller is expected to handle this specific case. + * Otherwise, we return the actual error code. + * + * We treat gd signature corruption case as a release case. The + * caller ocfs2_claim_suballoc_bits() will use ocfs2_search_chain() + * to search each gd block. The code will eventually find this + * corrupted gd block - Late, but not missed. + * + * Note: + * The caller is responsible for initializing the '*released' status. + */ +static int ocfs2_read_hint_group_descriptor(struct inode *inode, + struct ocfs2_dinode *di, u64 gd_blkno, + struct buffer_head **bh, int *released) +{ + int rc; + struct buffer_head *tmp = *bh; + struct ocfs2_group_desc *gd; + + rc = ocfs2_read_block(INODE_CACHE(inode), gd_blkno, &tmp, NULL); + if (rc) + goto out; + + gd = (struct ocfs2_group_desc *) tmp->b_data; + if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { + /* + * Invalid gd cache was set in ocfs2_read_block(), + * which will affect block_group allocation. + * Path: + * ocfs2_reserve_suballoc_bits + * ocfs2_block_group_alloc + * ocfs2_block_group_alloc_contig + * ocfs2_set_new_buffer_uptodate + */ + ocfs2_remove_from_cache(INODE_CACHE(inode), tmp); + *released = 1; /* we return 'rc=0' for this case */ + goto free_bh; + } + + /* below jobs same with ocfs2_read_group_descriptor() */ + if (!buffer_jbd(tmp)) { + rc = ocfs2_validate_group_descriptor(inode->i_sb, tmp); + if (rc) + goto free_bh; + } + + rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0); + if (rc) + goto free_bh; + + /* If ocfs2_read_block() got us a new bh, pass it up. */ + if (!*bh) + *bh = tmp; + + return rc; + +free_bh: + brelse(tmp); +out: + return rc; +} + int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di, u64 gd_blkno, struct buffer_head **bh) { @@ -1725,7 +1793,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, u32 bits_wanted, u32 min_bits, struct ocfs2_suballoc_result *res, - u16 *bits_left) + u16 *bits_left, int *released) { int ret; struct buffer_head *group_bh = NULL; @@ -1733,9 +1801,11 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data; struct inode *alloc_inode = ac->ac_inode; - ret = ocfs2_read_group_descriptor(alloc_inode, di, - res->sr_bg_blkno, &group_bh); - if (ret < 0) { + ret = ocfs2_read_hint_group_descriptor(alloc_inode, di, + res->sr_bg_blkno, &group_bh, released); + if (*released) { + return 0; + } else if (ret < 0) { mlog_errno(ret); return ret; } @@ -1950,6 +2020,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, struct ocfs2_suballoc_result *res) { int status; + int released = 0; u16 victim, i; u16 bits_left = 0; u64 hint = ac->ac_last_group; @@ -1976,6 +2047,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, goto bail; } + /* the hint bg may already be released, we quiet search this group. */ res->sr_bg_blkno = hint; if (res->sr_bg_blkno) { /* Attempt to short-circuit the usual search mechanism @@ -1983,7 +2055,12 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, * allocation group. This helps us maintain some * contiguousness across allocations. */ status = ocfs2_search_one_group(ac, handle, bits_wanted, - min_bits, res, &bits_left); + min_bits, res, &bits_left, + &released); + if (released) { + res->sr_bg_blkno = 0; + goto chain_search; + } if (!status) goto set_hint; if (status < 0 && status != -ENOSPC) { @@ -1991,7 +2068,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, goto bail; } } - +chain_search: cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; if (!le16_to_cpu(cl->cl_next_free_rec) || le16_to_cpu(cl->cl_next_free_rec) > le16_to_cpu(cl->cl_count)) { @@ -2113,6 +2190,12 @@ bail: return status; } +/* + * after ocfs2 has the ability to release block group unused space, + * the ->ip_last_used_group may be invalid. so this function returns + * ac->ac_last_group need to verify. + * refer the 'hint' in ocfs2_claim_suballoc_bits() for more details. + */ static void ocfs2_init_inode_ac_group(struct inode *dir, struct buffer_head *parent_di_bh, struct ocfs2_alloc_context *ac) @@ -2551,6 +2634,198 @@ bail: return status; } +/* + * Reclaim the suballocator managed space to main bitmap. + * This function first works on the suballocator to perform the + * cleanup rec/alloc_inode job, then switches to the main bitmap + * to reclaim released space. + * + * handle: The transaction handle + * alloc_inode: The suballoc inode + * alloc_bh: The buffer_head of suballoc inode + * group_bh: The group descriptor buffer_head of suballocator managed. + * Caller should release the input group_bh. + */ +static int _ocfs2_reclaim_suballoc_to_main(handle_t *handle, + struct inode *alloc_inode, + struct buffer_head *alloc_bh, + struct buffer_head *group_bh) +{ + int idx, status = 0; + int i, next_free_rec, len = 0; + __le16 old_bg_contig_free_bits = 0; + u16 start_bit; + u32 tmp_used; + u64 bg_blkno, start_blk; + unsigned int count; + struct ocfs2_chain_rec *rec; + struct buffer_head *main_bm_bh = NULL; + struct inode *main_bm_inode = NULL; + struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); + struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data; + struct ocfs2_chain_list *cl = &fe->id2.i_chain; + struct ocfs2_group_desc *group = (struct ocfs2_group_desc *) group_bh->b_data; + + idx = le16_to_cpu(group->bg_chain); + rec = &(cl->cl_recs[idx]); + + status = ocfs2_extend_trans(handle, + ocfs2_calc_group_alloc_credits(osb->sb, + le16_to_cpu(cl->cl_cpg))); + if (status) { + mlog_errno(status); + goto bail; + } + status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), + alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto bail; + } + + /* + * Only clear the suballocator rec item in-place. + * + * If idx is not the last, we don't compress (remove the empty item) + * the cl_recs[]. If not, we need to do lots jobs. + * + * Compress cl_recs[] code example: + * if (idx != cl->cl_next_free_rec - 1) + * memmove(&cl->cl_recs[idx], &cl->cl_recs[idx + 1], + * sizeof(struct ocfs2_chain_rec) * + * (cl->cl_next_free_rec - idx - 1)); + * for(i = idx; i < cl->cl_next_free_rec-1; i++) { + * group->bg_chain = "later group->bg_chain"; + * group->bg_blkno = xxx; + * ... ... + * } + */ + + tmp_used = le32_to_cpu(fe->id1.bitmap1.i_total); + fe->id1.bitmap1.i_total = cpu_to_le32(tmp_used - le32_to_cpu(rec->c_total)); + + /* Substraction 1 for the block group itself */ + tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); + fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - 1); + + tmp_used = le32_to_cpu(fe->i_clusters); + fe->i_clusters = cpu_to_le32(tmp_used - le16_to_cpu(cl->cl_cpg)); + + spin_lock(&OCFS2_I(alloc_inode)->ip_lock); + OCFS2_I(alloc_inode)->ip_clusters -= le32_to_cpu(fe->i_clusters); + fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb, + le32_to_cpu(fe->i_clusters))); + spin_unlock(&OCFS2_I(alloc_inode)->ip_lock); + i_size_write(alloc_inode, le64_to_cpu(fe->i_size)); + alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode); + + ocfs2_journal_dirty(handle, alloc_bh); + ocfs2_update_inode_fsync_trans(handle, alloc_inode, 0); + + start_blk = le64_to_cpu(rec->c_blkno); + count = le32_to_cpu(rec->c_total) / le16_to_cpu(cl->cl_bpc); + + /* + * If the rec is the last one, let's compress the chain list by + * removing the empty cl_recs[] at the end. + */ + next_free_rec = le16_to_cpu(cl->cl_next_free_rec); + if (idx == (next_free_rec - 1)) { + len++; /* the last item should be counted first */ + for (i = (next_free_rec - 2); i > 0; i--) { + if (cl->cl_recs[i].c_free == cl->cl_recs[i].c_total) + len++; + else + break; + } + } + le16_add_cpu(&cl->cl_next_free_rec, -len); + + rec->c_free = 0; + rec->c_total = 0; + rec->c_blkno = 0; + ocfs2_remove_from_cache(INODE_CACHE(alloc_inode), group_bh); + memset(group, 0, sizeof(struct ocfs2_group_desc)); + + /* prepare job for reclaim clusters */ + main_bm_inode = ocfs2_get_system_file_inode(osb, + GLOBAL_BITMAP_SYSTEM_INODE, + OCFS2_INVALID_SLOT); + if (!main_bm_inode) + goto bail; /* ignore the error in reclaim path */ + + inode_lock(main_bm_inode); + + status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); + if (status < 0) + goto free_bm_inode; /* ignore the error in reclaim path */ + + ocfs2_block_to_cluster_group(main_bm_inode, start_blk, &bg_blkno, + &start_bit); + fe = (struct ocfs2_dinode *) main_bm_bh->b_data; + cl = &fe->id2.i_chain; + /* reuse group_bh, caller will release the input group_bh */ + group_bh = NULL; + + /* reclaim clusters to global_bitmap */ + status = ocfs2_read_group_descriptor(main_bm_inode, fe, bg_blkno, + &group_bh); + if (status < 0) { + mlog_errno(status); + goto free_bm_bh; + } + group = (struct ocfs2_group_desc *) group_bh->b_data; + + if ((count + start_bit) > le16_to_cpu(group->bg_bits)) { + ocfs2_error(alloc_inode->i_sb, + "reclaim length (%d) beyands block group length (%d)", + count + start_bit, le16_to_cpu(group->bg_bits)); + goto free_group_bh; + } + + old_bg_contig_free_bits = group->bg_contig_free_bits; + status = ocfs2_block_group_clear_bits(handle, main_bm_inode, + group, group_bh, + start_bit, count, 0, + _ocfs2_clear_bit); + if (status < 0) { + mlog_errno(status); + goto free_group_bh; + } + + status = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode), + main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + ocfs2_block_group_set_bits(handle, main_bm_inode, group, group_bh, + start_bit, count, + le16_to_cpu(old_bg_contig_free_bits), 1); + goto free_group_bh; + } + + idx = le16_to_cpu(group->bg_chain); + rec = &(cl->cl_recs[idx]); + + le32_add_cpu(&rec->c_free, count); + tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); + fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count); + ocfs2_journal_dirty(handle, main_bm_bh); + +free_group_bh: + brelse(group_bh); + +free_bm_bh: + ocfs2_inode_unlock(main_bm_inode, 1); + brelse(main_bm_bh); + +free_bm_inode: + inode_unlock(main_bm_inode); + iput(main_bm_inode); + +bail: + return status; +} + /* * expects the suballoc inode to already be locked. */ @@ -2563,12 +2838,13 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle, void (*undo_fn)(unsigned int bit, unsigned long *bitmap)) { - int status = 0; + int idx, status = 0; u32 tmp_used; struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data; struct ocfs2_chain_list *cl = &fe->id2.i_chain; struct buffer_head *group_bh = NULL; struct ocfs2_group_desc *group; + struct ocfs2_chain_rec *rec; __le16 old_bg_contig_free_bits = 0; /* The alloc_bh comes from ocfs2_free_dinode() or @@ -2614,12 +2890,26 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle, goto bail; } - le32_add_cpu(&cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free, - count); + idx = le16_to_cpu(group->bg_chain); + rec = &(cl->cl_recs[idx]); + + le32_add_cpu(&rec->c_free, count); tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count); ocfs2_journal_dirty(handle, alloc_bh); + /* + * Reclaim suballocator free space. + * Bypass: global_bitmap, non empty rec, first rec in cl_recs[] + */ + if (ocfs2_is_cluster_bitmap(alloc_inode) || + (le32_to_cpu(rec->c_free) != (le32_to_cpu(rec->c_total) - 1)) || + (le16_to_cpu(cl->cl_next_free_rec) == 1)) { + goto bail; + } + + _ocfs2_reclaim_suballoc_to_main(handle, alloc_inode, alloc_bh, group_bh); + bail: brelse(group_bh); return status; @@ -2873,7 +3163,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, struct ocfs2_group_desc *group; struct buffer_head *group_bh = NULL; u64 bg_blkno; - int status; + int status, quiet = 0, released = 0; trace_ocfs2_test_suballoc_bit((unsigned long long)blkno, (unsigned int)bit); @@ -2889,9 +3179,13 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, bg_blkno = group_blkno ? group_blkno : ocfs2_which_suballoc_group(blkno, bit); - status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno, - &group_bh); - if (status < 0) { + status = ocfs2_read_hint_group_descriptor(suballoc, alloc_di, bg_blkno, + &group_bh, &released); + if (released) { + quiet = 1; + status = -ESTALE; + goto bail; + } else if (status < 0) { mlog(ML_ERROR, "read group %llu failed %d\n", (unsigned long long)bg_blkno, status); goto bail; @@ -2903,7 +3197,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, bail: brelse(group_bh); - if (status) + if (status && !quiet) mlog_errno(status); return status; } @@ -2923,7 +3217,7 @@ bail: */ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) { - int status; + int status, quiet = 0; u64 group_blkno = 0; u16 suballoc_bit = 0, suballoc_slot = 0; struct inode *inode_alloc_inode; @@ -2965,8 +3259,12 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh, group_blkno, blkno, suballoc_bit, res); - if (status < 0) - mlog(ML_ERROR, "test suballoc bit failed %d\n", status); + if (status < 0) { + if (status == -ESTALE) + quiet = 1; + else + mlog(ML_ERROR, "test suballoc bit failed %d\n", status); + } ocfs2_inode_unlock(inode_alloc_inode, 0); inode_unlock(inode_alloc_inode); @@ -2974,7 +3272,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) iput(inode_alloc_inode); brelse(alloc_bh); bail: - if (status) + if (status && !quiet) mlog_errno(status); return status; } diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 1b21fbc16d73..e434a62dd69f 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1971,8 +1971,7 @@ static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc) ocfs2_xa_wipe_namevalue(loc); loc->xl_entry = NULL; - le16_add_cpu(&xh->xh_count, -1); - count = le16_to_cpu(xh->xh_count); + count = le16_to_cpu(xh->xh_count) - 1; /* * Only zero out the entry if there are more remaining. This is @@ -1987,6 +1986,8 @@ static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc) memset(&xh->xh_entries[count], 0, sizeof(struct ocfs2_xattr_entry)); } + + xh->xh_count = cpu_to_le16(count); } /* @@ -6394,6 +6395,10 @@ static int ocfs2_reflink_xattr_header(handle_t *handle, (void *)last - (void *)xe); memset(last, 0, sizeof(struct ocfs2_xattr_entry)); + last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)] - 1; + } else { + memset(xe, 0, sizeof(struct ocfs2_xattr_entry)); + last = NULL; } /* diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index e9a69c95be91..cda26bdef3b9 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/proc/array.c b/fs/proc/array.c index 42932f88141a..f447e734612a 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -55,6 +55,7 @@ #include #include +#include #include #include #include @@ -528,7 +529,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, } sid = task_session_nr_ns(task, ns); - ppid = task_tgid_nr_ns(task->real_parent, ns); + ppid = task_ppid_nr_ns(task, ns); pgid = task_pgrp_nr_ns(task, ns); unlock_task_sighand(task, &flags); diff --git a/fs/seq_file.c b/fs/seq_file.c index 8bbb1ad46335..8894cbde8d3a 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 32c7f3d27f74..87580ff827ee 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -16,6 +16,7 @@ #include "udfdecl.h" +#include #include #include /* for memset */ #include diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h index 100d24b02e52..f22ccfc0df98 100644 --- a/include/asm-generic/atomic64.h +++ b/include/asm-generic/atomic64.h @@ -10,7 +10,7 @@ #include typedef struct { - s64 counter; + s64 __aligned(sizeof(s64)) counter; } atomic64_t; #define ATOMIC64_INIT(i) { (i) } diff --git a/include/asm-generic/rqspinlock.h b/include/asm-generic/rqspinlock.h index 5c5cf2f7fc39..151d267a496b 100644 --- a/include/asm-generic/rqspinlock.h +++ b/include/asm-generic/rqspinlock.h @@ -28,7 +28,7 @@ struct rqspinlock { */ struct bpf_res_spin_lock { u32 val; -}; +} __aligned(__alignof__(struct rqspinlock)); struct qspinlock; #ifdef CONFIG_QUEUED_SPINLOCKS diff --git a/include/linux/array_size.h b/include/linux/array_size.h index 06d7d83196ca..0c4fec98822e 100644 --- a/include/linux/array_size.h +++ b/include/linux/array_size.h @@ -10,4 +10,10 @@ */ #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) +/** + * ARRAY_END - get a pointer to one past the last element in array @arr + * @arr: array + */ +#define ARRAY_END(arr) (&(arr)[ARRAY_SIZE(arr)]) + #endif /* _LINUX_ARRAY_SIZE_H */ diff --git a/include/linux/capability.h b/include/linux/capability.h index 1fb08922552c..37db92b3d6f8 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -203,6 +203,12 @@ static inline bool checkpoint_restore_ns_capable(struct user_namespace *ns) ns_capable(ns, CAP_SYS_ADMIN); } +static inline bool checkpoint_restore_ns_capable_noaudit(struct user_namespace *ns) +{ + return ns_capable_noaudit(ns, CAP_CHECKPOINT_RESTORE) || + ns_capable_noaudit(ns, CAP_SYS_ADMIN); +} + /* audit system wants to get cap info from files as well */ int get_vfs_caps_from_disk(struct mnt_idmap *idmap, const struct dentry *dentry, diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 7edf1a07b535..e1123dd28486 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -153,4 +153,4 @@ * Bindgen uses LLVM even if our C compiler is GCC, so we cannot * rely on the auto-detected CONFIG_CC_HAS_TYPEOF_UNQUAL. */ -#define CC_HAS_TYPEOF_UNQUAL (__clang_major__ >= 19) +#define CC_HAS_TYPEOF_UNQUAL (__clang_major__ > 19 || (__clang_major__ == 19 && __clang_minor__ > 0)) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 3c936b129860..b1b141394d13 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -289,6 +289,22 @@ struct ftrace_likely_data { # define __no_kasan_or_inline __always_inline #endif +#ifdef CONFIG_KCSAN +/* + * Type qualifier to mark variables where all data-racy accesses should be + * ignored by KCSAN. Note, the implementation simply marks these variables as + * volatile, since KCSAN will treat such accesses as "marked". + * + * Defined here because defining __data_racy as volatile for KCSAN objects only + * causes problems in BPF Type Format (BTF) generation since struct members + * of core kernel data structs will be volatile in some objects and not in + * others. Instead define it globally for KCSAN kernels. + */ +# define __data_racy volatile +#else +# define __data_racy +#endif + #ifdef __SANITIZE_THREAD__ /* * Clang still emits instrumentation for __tsan_func_{entry,exit}() and builtin @@ -300,16 +316,9 @@ struct ftrace_likely_data { * disable all instrumentation. See Kconfig.kcsan where this is mandatory. */ # define __no_kcsan __no_sanitize_thread __disable_sanitizer_instrumentation -/* - * Type qualifier to mark variables where all data-racy accesses should be - * ignored by KCSAN. Note, the implementation simply marks these variables as - * volatile, since KCSAN will treat such accesses as "marked". - */ -# define __data_racy volatile # define __no_sanitize_or_inline __no_kcsan notrace __maybe_unused #else # define __no_kcsan -# define __data_racy #endif #ifdef __SANITIZE_MEMORY__ diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 800dcc360db2..ecb06f16d22c 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -69,6 +69,14 @@ struct task_delay_info { u32 compact_count; /* total count of memory compact */ u32 wpcopy_count; /* total count of write-protect copy */ u32 irq_count; /* total count of IRQ/SOFTIRQ */ + + struct timespec64 blkio_delay_max_ts; + struct timespec64 swapin_delay_max_ts; + struct timespec64 freepages_delay_max_ts; + struct timespec64 thrashing_delay_max_ts; + struct timespec64 compact_delay_max_ts; + struct timespec64 wpcopy_delay_max_ts; + struct timespec64 irq_delay_max_ts; }; #endif diff --git a/include/linux/filter.h b/include/linux/filter.h index 4e1cb4f91f49..44d7ae95ddbc 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1376,24 +1376,13 @@ static inline bool bpf_jit_kallsyms_enabled(void) return false; } -int __bpf_address_lookup(unsigned long addr, unsigned long *size, - unsigned long *off, char *sym); +int bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char *sym); bool is_bpf_text_address(unsigned long addr); int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym); struct bpf_prog *bpf_prog_ksym_find(unsigned long addr); -static inline int -bpf_address_lookup(unsigned long addr, unsigned long *size, - unsigned long *off, char **modname, char *sym) -{ - int ret = __bpf_address_lookup(addr, size, off, sym); - - if (ret && modname) - *modname = NULL; - return ret; -} - void bpf_prog_kallsyms_add(struct bpf_prog *fp); void bpf_prog_kallsyms_del(struct bpf_prog *fp); @@ -1432,8 +1421,8 @@ static inline bool bpf_jit_kallsyms_enabled(void) } static inline int -__bpf_address_lookup(unsigned long addr, unsigned long *size, - unsigned long *off, char *sym) +bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char *sym) { return 0; } @@ -1454,13 +1443,6 @@ static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) return NULL; } -static inline int -bpf_address_lookup(unsigned long addr, unsigned long *size, - unsigned long *off, char **modname, char *sym) -{ - return 0; -} - static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp) { } diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 705db0a6d995..1a4d36fc9085 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -88,11 +88,13 @@ struct ftrace_func_entry; defined(CONFIG_DYNAMIC_FTRACE) int ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, - unsigned long *off, char **modname, char *sym); + unsigned long *off, char **modname, + const unsigned char **modbuildid, char *sym); #else static inline int ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, - unsigned long *off, char **modname, char *sym) + unsigned long *off, char **modname, + const unsigned char **modbuildid, char *sym) { return 0; } diff --git a/include/linux/ima.h b/include/linux/ima.h index 8e29cb4e6a01..abf8923f8fc5 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -69,6 +69,7 @@ static inline int ima_measure_critical_data(const char *event_label, #ifdef CONFIG_HAVE_IMA_KEXEC int __init ima_free_kexec_buffer(void); int __init ima_get_kexec_buffer(void **addr, size_t *size); +int ima_validate_range(phys_addr_t phys, size_t size); #endif #ifdef CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT diff --git a/include/linux/instrumented.h b/include/linux/instrumented.h index 711a1f0d1a73..a1b4cf81adc2 100644 --- a/include/linux/instrumented.h +++ b/include/linux/instrumented.h @@ -7,6 +7,7 @@ #ifndef _LINUX_INSTRUMENTED_H #define _LINUX_INSTRUMENTED_H +#include #include #include #include @@ -55,6 +56,19 @@ static __always_inline void instrument_read_write(const volatile void *v, size_t kcsan_check_read_write(v, size); } +static __always_inline void instrument_atomic_check_alignment(const volatile void *v, size_t size) +{ +#ifndef __DISABLE_EXPORTS + if (IS_ENABLED(CONFIG_DEBUG_ATOMIC)) { + unsigned int mask = size - 1; + + if (IS_ENABLED(CONFIG_DEBUG_ATOMIC_LARGEST_ALIGN)) + mask &= sizeof(struct { long x; } __aligned_largest) - 1; + WARN_ON_ONCE((unsigned long)v & mask); + } +#endif +} + /** * instrument_atomic_read - instrument atomic read access * @v: address of access @@ -67,6 +81,7 @@ static __always_inline void instrument_atomic_read(const volatile void *v, size_ { kasan_check_read(v, size); kcsan_check_atomic_read(v, size); + instrument_atomic_check_alignment(v, size); } /** @@ -81,6 +96,7 @@ static __always_inline void instrument_atomic_write(const volatile void *v, size { kasan_check_write(v, size); kcsan_check_atomic_write(v, size); + instrument_atomic_check_alignment(v, size); } /** @@ -95,6 +111,7 @@ static __always_inline void instrument_atomic_read_write(const volatile void *v, { kasan_check_write(v, size); kcsan_check_atomic_read_write(v, size); + instrument_atomic_check_alignment(v, size); } /** diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 60ca6a49839c..3e0f4c990297 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -10,6 +10,7 @@ #define _LINUX_IOPORT_H #ifndef __ASSEMBLY__ +#include #include #include #include @@ -165,8 +166,12 @@ enum { #define DEFINE_RES_NAMED(_start, _size, _name, _flags) \ DEFINE_RES_NAMED_DESC(_start, _size, _name, _flags, IORES_DESC_NONE) -#define DEFINE_RES(_start, _size, _flags) \ +#define __DEFINE_RES0() \ + DEFINE_RES_NAMED(0, 0, NULL, IORESOURCE_UNSET) +#define __DEFINE_RES3(_start, _size, _flags) \ DEFINE_RES_NAMED(_start, _size, NULL, _flags) +#define DEFINE_RES(...) \ + CONCATENATE(__DEFINE_RES, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__) #define DEFINE_RES_IO_NAMED(_start, _size, _name) \ DEFINE_RES_NAMED((_start), (_size), (_name), IORESOURCE_IO) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5b46924fdff5..e5570a16cbb1 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -32,7 +31,7 @@ #include #include #include -#include +#include #include #include @@ -40,8 +39,6 @@ #include -#define STACK_MAGIC 0xdeadbeef - struct completion; struct user; @@ -192,215 +189,9 @@ enum system_states { }; extern enum system_states system_state; -/* - * General tracing related utility functions - trace_printk(), - * tracing_on/tracing_off and tracing_start()/tracing_stop - * - * Use tracing_on/tracing_off when you want to quickly turn on or off - * tracing. It simply enables or disables the recording of the trace events. - * This also corresponds to the user space /sys/kernel/tracing/tracing_on - * file, which gives a means for the kernel and userspace to interact. - * Place a tracing_off() in the kernel where you want tracing to end. - * From user space, examine the trace, and then echo 1 > tracing_on - * to continue tracing. - * - * tracing_stop/tracing_start has slightly more overhead. It is used - * by things like suspend to ram where disabling the recording of the - * trace is not enough, but tracing must actually stop because things - * like calling smp_processor_id() may crash the system. - * - * Most likely, you want to use tracing_on/tracing_off. - */ - -enum ftrace_dump_mode { - DUMP_NONE, - DUMP_ALL, - DUMP_ORIG, - DUMP_PARAM, -}; - -#ifdef CONFIG_TRACING -void tracing_on(void); -void tracing_off(void); -int tracing_is_on(void); -void tracing_snapshot(void); -void tracing_snapshot_alloc(void); - -extern void tracing_start(void); -extern void tracing_stop(void); - -static inline __printf(1, 2) -void ____trace_printk_check_format(const char *fmt, ...) -{ -} -#define __trace_printk_check_format(fmt, args...) \ -do { \ - if (0) \ - ____trace_printk_check_format(fmt, ##args); \ -} while (0) - -/** - * trace_printk - printf formatting in the ftrace buffer - * @fmt: the printf format for printing - * - * Note: __trace_printk is an internal function for trace_printk() and - * the @ip is passed in via the trace_printk() macro. - * - * This function allows a kernel developer to debug fast path sections - * that printk is not appropriate for. By scattering in various - * printk like tracing in the code, a developer can quickly see - * where problems are occurring. - * - * This is intended as a debugging tool for the developer only. - * Please refrain from leaving trace_printks scattered around in - * your code. (Extra memory is used for special buffers that are - * allocated when trace_printk() is used.) - * - * A little optimization trick is done here. If there's only one - * argument, there's no need to scan the string for printf formats. - * The trace_puts() will suffice. But how can we take advantage of - * using trace_puts() when trace_printk() has only one argument? - * By stringifying the args and checking the size we can tell - * whether or not there are args. __stringify((__VA_ARGS__)) will - * turn into "()\0" with a size of 3 when there are no args, anything - * else will be bigger. All we need to do is define a string to this, - * and then take its size and compare to 3. If it's bigger, use - * do_trace_printk() otherwise, optimize it to trace_puts(). Then just - * let gcc optimize the rest. - */ - -#define trace_printk(fmt, ...) \ -do { \ - char _______STR[] = __stringify((__VA_ARGS__)); \ - if (sizeof(_______STR) > 3) \ - do_trace_printk(fmt, ##__VA_ARGS__); \ - else \ - trace_puts(fmt); \ -} while (0) - -#define do_trace_printk(fmt, args...) \ -do { \ - static const char *trace_printk_fmt __used \ - __section("__trace_printk_fmt") = \ - __builtin_constant_p(fmt) ? fmt : NULL; \ - \ - __trace_printk_check_format(fmt, ##args); \ - \ - if (__builtin_constant_p(fmt)) \ - __trace_bprintk(_THIS_IP_, trace_printk_fmt, ##args); \ - else \ - __trace_printk(_THIS_IP_, fmt, ##args); \ -} while (0) - -extern __printf(2, 3) -int __trace_bprintk(unsigned long ip, const char *fmt, ...); - -extern __printf(2, 3) -int __trace_printk(unsigned long ip, const char *fmt, ...); - -/** - * trace_puts - write a string into the ftrace buffer - * @str: the string to record - * - * Note: __trace_bputs is an internal function for trace_puts and - * the @ip is passed in via the trace_puts macro. - * - * This is similar to trace_printk() but is made for those really fast - * paths that a developer wants the least amount of "Heisenbug" effects, - * where the processing of the print format is still too much. - * - * This function allows a kernel developer to debug fast path sections - * that printk is not appropriate for. By scattering in various - * printk like tracing in the code, a developer can quickly see - * where problems are occurring. - * - * This is intended as a debugging tool for the developer only. - * Please refrain from leaving trace_puts scattered around in - * your code. (Extra memory is used for special buffers that are - * allocated when trace_puts() is used.) - * - * Returns: 0 if nothing was written, positive # if string was. - * (1 when __trace_bputs is used, strlen(str) when __trace_puts is used) - */ - -#define trace_puts(str) ({ \ - static const char *trace_printk_fmt __used \ - __section("__trace_printk_fmt") = \ - __builtin_constant_p(str) ? str : NULL; \ - \ - if (__builtin_constant_p(str)) \ - __trace_bputs(_THIS_IP_, trace_printk_fmt); \ - else \ - __trace_puts(_THIS_IP_, str, strlen(str)); \ -}) -extern int __trace_bputs(unsigned long ip, const char *str); -extern int __trace_puts(unsigned long ip, const char *str, int size); - -extern void trace_dump_stack(int skip); - -/* - * The double __builtin_constant_p is because gcc will give us an error - * if we try to allocate the static variable to fmt if it is not a - * constant. Even with the outer if statement. - */ -#define ftrace_vprintk(fmt, vargs) \ -do { \ - if (__builtin_constant_p(fmt)) { \ - static const char *trace_printk_fmt __used \ - __section("__trace_printk_fmt") = \ - __builtin_constant_p(fmt) ? fmt : NULL; \ - \ - __ftrace_vbprintk(_THIS_IP_, trace_printk_fmt, vargs); \ - } else \ - __ftrace_vprintk(_THIS_IP_, fmt, vargs); \ -} while (0) - -extern __printf(2, 0) int -__ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap); - -extern __printf(2, 0) int -__ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); - -extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode); -#else -static inline void tracing_start(void) { } -static inline void tracing_stop(void) { } -static inline void trace_dump_stack(int skip) { } - -static inline void tracing_on(void) { } -static inline void tracing_off(void) { } -static inline int tracing_is_on(void) { return 0; } -static inline void tracing_snapshot(void) { } -static inline void tracing_snapshot_alloc(void) { } - -static inline __printf(1, 2) -int trace_printk(const char *fmt, ...) -{ - return 0; -} -static __printf(1, 0) inline int -ftrace_vprintk(const char *fmt, va_list ap) -{ - return 0; -} -static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } -#endif /* CONFIG_TRACING */ - /* Rebuild everything on CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_DYNAMIC_FTRACE # define REBUILD_DUE_TO_DYNAMIC_FTRACE #endif -/* Permissions on a sysfs file: you didn't miss the 0 prefix did you? */ -#define VERIFY_OCTAL_PERMISSIONS(perms) \ - (BUILD_BUG_ON_ZERO((perms) < 0) + \ - BUILD_BUG_ON_ZERO((perms) > 0777) + \ - /* USER_READABLE >= GROUP_READABLE >= OTHER_READABLE */ \ - BUILD_BUG_ON_ZERO((((perms) >> 6) & 4) < (((perms) >> 3) & 4)) + \ - BUILD_BUG_ON_ZERO((((perms) >> 3) & 4) < ((perms) & 4)) + \ - /* USER_WRITABLE >= GROUP_WRITABLE */ \ - BUILD_BUG_ON_ZERO((((perms) >> 6) & 2) < (((perms) >> 3) & 2)) + \ - /* OTHER_WRITABLE? Generally considered a bad idea. */ \ - BUILD_BUG_ON_ZERO((perms) & 2) + \ - (perms)) #endif diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h index 5f7b9de97e8d..ac4129d1d741 100644 --- a/include/linux/kexec_handover.h +++ b/include/linux/kexec_handover.h @@ -11,49 +11,26 @@ struct kho_scratch { phys_addr_t size; }; +struct kho_vmalloc; + struct folio; struct page; -#define DECLARE_KHOSER_PTR(name, type) \ - union { \ - phys_addr_t phys; \ - type ptr; \ - } name -#define KHOSER_STORE_PTR(dest, val) \ - ({ \ - typeof(val) v = val; \ - typecheck(typeof((dest).ptr), v); \ - (dest).phys = virt_to_phys(v); \ - }) -#define KHOSER_LOAD_PTR(src) \ - ({ \ - typeof(src) s = src; \ - (typeof((s).ptr))((s).phys ? phys_to_virt((s).phys) : NULL); \ - }) - -struct kho_vmalloc_chunk; -struct kho_vmalloc { - DECLARE_KHOSER_PTR(first, struct kho_vmalloc_chunk *); - unsigned int total_pages; - unsigned short flags; - unsigned short order; -}; - #ifdef CONFIG_KEXEC_HANDOVER bool kho_is_enabled(void); bool is_kho_boot(void); int kho_preserve_folio(struct folio *folio); void kho_unpreserve_folio(struct folio *folio); -int kho_preserve_pages(struct page *page, unsigned int nr_pages); -void kho_unpreserve_pages(struct page *page, unsigned int nr_pages); +int kho_preserve_pages(struct page *page, unsigned long nr_pages); +void kho_unpreserve_pages(struct page *page, unsigned long nr_pages); int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation); void kho_unpreserve_vmalloc(struct kho_vmalloc *preservation); void *kho_alloc_preserve(size_t size); void kho_unpreserve_free(void *mem); void kho_restore_free(void *mem); struct folio *kho_restore_folio(phys_addr_t phys); -struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages); +struct page *kho_restore_pages(phys_addr_t phys, unsigned long nr_pages); void *kho_restore_vmalloc(const struct kho_vmalloc *preservation); int kho_add_subtree(const char *name, void *fdt); void kho_remove_subtree(void *fdt); diff --git a/include/linux/kho/abi/kexec_handover.h b/include/linux/kho/abi/kexec_handover.h new file mode 100644 index 000000000000..2201a0d2c159 --- /dev/null +++ b/include/linux/kho/abi/kexec_handover.h @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * Copyright (C) 2023 Alexander Graf + * Copyright (C) 2025 Microsoft Corporation, Mike Rapoport + * Copyright (C) 2025 Google LLC, Changyuan Lyu + * Copyright (C) 2025 Google LLC, Jason Miu + */ + +#ifndef _LINUX_KHO_ABI_KEXEC_HANDOVER_H +#define _LINUX_KHO_ABI_KEXEC_HANDOVER_H + +#include + +/** + * DOC: Kexec Handover ABI + * + * Kexec Handover uses the ABI defined below for passing preserved data from + * one kernel to the next. + * The ABI uses Flattened Device Tree (FDT) format. The first kernel creates an + * FDT which is then passed to the next kernel during a kexec handover. + * + * This interface is a contract. Any modification to the FDT structure, node + * properties, compatible string, or the layout of the data structures + * referenced here constitutes a breaking change. Such changes require + * incrementing the version number in KHO_FDT_COMPATIBLE to prevent a new kernel + * from misinterpreting data from an older kernel. Changes are allowed provided + * the compatibility version is incremented. However, backward/forward + * compatibility is only guaranteed for kernels supporting the same ABI version. + * + * FDT Structure Overview: + * The FDT serves as a central registry for physical + * addresses of preserved data structures and sub-FDTs. The first kernel + * populates this FDT with references to memory regions and other FDTs that + * need to persist across the kexec transition. The subsequent kernel then + * parses this FDT to locate and restore the preserved data.:: + * + * / { + * compatible = "kho-v1"; + * + * preserved-memory-map = <0x...>; + * + * { + * fdt = <0x...>; + * }; + * + * { + * fdt = <0x...>; + * }; + * ... ... + * { + * fdt = <0x...>; + * }; + * }; + * + * Root KHO Node (/): + * - compatible: "kho-v1" + * + * Indentifies the overall KHO ABI version. + * + * - preserved-memory-map: u64 + * + * Physical memory address pointing to the root of the + * preserved memory map data structure. + * + * Subnodes (): + * Subnodes can also be added to the root node to + * describe other preserved data blobs. The + * is provided by the subsystem that uses KHO for preserving its + * data. + * + * - fdt: u64 + * + * Physical address pointing to a subnode FDT blob that is also + * being preserved. + */ + +/* The compatible string for the KHO FDT root node. */ +#define KHO_FDT_COMPATIBLE "kho-v1" + +/* The FDT property for the preserved memory map. */ +#define KHO_FDT_MEMORY_MAP_PROP_NAME "preserved-memory-map" + +/* The FDT property for sub-FDTs. */ +#define KHO_FDT_SUB_TREE_PROP_NAME "fdt" + +/** + * DOC: Kexec Handover ABI for vmalloc Preservation + * + * The Kexec Handover ABI for preserving vmalloc'ed memory is defined by + * a set of structures and helper macros. The layout of these structures is a + * stable contract between kernels and is versioned by the KHO_FDT_COMPATIBLE + * string. + * + * The preservation is managed through a main descriptor &struct kho_vmalloc, + * which points to a linked list of &struct kho_vmalloc_chunk structures. These + * chunks contain the physical addresses of the preserved pages, allowing the + * next kernel to reconstruct the vmalloc area with the same content and layout. + * Helper macros are also defined for storing and loading pointers within + * these structures. + */ + +/* Helper macro to define a union for a serializable pointer. */ +#define DECLARE_KHOSER_PTR(name, type) \ + union { \ + u64 phys; \ + type ptr; \ + } name + +/* Stores the physical address of a serializable pointer. */ +#define KHOSER_STORE_PTR(dest, val) \ + ({ \ + typeof(val) v = val; \ + typecheck(typeof((dest).ptr), v); \ + (dest).phys = virt_to_phys(v); \ + }) + +/* Loads the stored physical address back to a pointer. */ +#define KHOSER_LOAD_PTR(src) \ + ({ \ + typeof(src) s = src; \ + (typeof((s).ptr))((s).phys ? phys_to_virt((s).phys) : NULL); \ + }) + +/* + * This header is embedded at the beginning of each `kho_vmalloc_chunk` + * and contains a pointer to the next chunk in the linked list, + * stored as a physical address for handover. + */ +struct kho_vmalloc_hdr { + DECLARE_KHOSER_PTR(next, struct kho_vmalloc_chunk *); +}; + +#define KHO_VMALLOC_SIZE \ + ((PAGE_SIZE - sizeof(struct kho_vmalloc_hdr)) / \ + sizeof(u64)) + +/* + * Each chunk is a single page and is part of a linked list that describes + * a preserved vmalloc area. It contains the header with the link to the next + * chunk and a zero terminated array of physical addresses of the pages that + * make up the preserved vmalloc area. + */ +struct kho_vmalloc_chunk { + struct kho_vmalloc_hdr hdr; + u64 phys[KHO_VMALLOC_SIZE]; +}; + +static_assert(sizeof(struct kho_vmalloc_chunk) == PAGE_SIZE); + +/* + * Describes a preserved vmalloc memory area, including the + * total number of pages, allocation flags, page order, and a pointer to the + * first chunk of physical page addresses. + */ +struct kho_vmalloc { + DECLARE_KHOSER_PTR(first, struct kho_vmalloc_chunk *); + unsigned int total_pages; + unsigned short flags; + unsigned short order; +}; + +#endif /* _LINUX_KHO_ABI_KEXEC_HANDOVER_H */ diff --git a/include/linux/kho/abi/luo.h b/include/linux/kho/abi/luo.h index bb099c92e469..46750a0ddf88 100644 --- a/include/linux/kho/abi/luo.h +++ b/include/linux/kho/abi/luo.h @@ -8,10 +8,10 @@ /** * DOC: Live Update Orchestrator ABI * - * This header defines the stable Application Binary Interface used by the - * Live Update Orchestrator to pass state from a pre-update kernel to a - * post-update kernel. The ABI is built upon the Kexec HandOver framework - * and uses a Flattened Device Tree to describe the preserved data. + * Live Update Orchestrator uses the stable Application Binary Interface + * defined below to pass state from a pre-update kernel to a post-update + * kernel. The ABI is built upon the Kexec HandOver framework and uses a + * Flattened Device Tree to describe the preserved data. * * This interface is a contract. Any modification to the FDT structure, node * properties, compatible strings, or the layout of the `__packed` serialization @@ -37,6 +37,11 @@ * compatible = "luo-session-v1"; * luo-session-header = ; * }; + * + * luo-flb { + * compatible = "luo-flb-v1"; + * luo-flb-header = ; + * }; * }; * * Main LUO Node (/): @@ -56,6 +61,17 @@ * is the header for a contiguous block of memory containing an array of * `struct luo_session_ser`, one for each preserved session. * + * File-Lifecycle-Bound Node (luo-flb): + * This node describes all preserved global objects whose lifecycle is bound + * to that of the preserved files (e.g., shared IOMMU state). + * + * - compatible: "luo-flb-v1" + * Identifies the FLB ABI version. + * - luo-flb-header: u64 + * The physical address of a `struct luo_flb_header_ser`. This structure is + * the header for a contiguous block of memory containing an array of + * `struct luo_flb_ser`, one for each preserved global object. + * * Serialization Structures: * The FDT properties point to memory regions containing arrays of simple, * `__packed` structures. These structures contain the actual preserved state. @@ -74,6 +90,16 @@ * Metadata for a single preserved file. Contains the `compatible` string to * find the correct handler in the new kernel, a user-provided `token` for * identification, and an opaque `data` handle for the handler to use. + * + * - struct luo_flb_header_ser: + * Header for the FLB array. Contains the total page count of the + * preserved memory block and the number of `struct luo_flb_ser` entries + * that follow. + * + * - struct luo_flb_ser: + * Metadata for a single preserved global object. Contains its `name` + * (compatible string), an opaque `data` handle, and the `count` + * number of files depending on it. */ #ifndef _LINUX_KHO_ABI_LUO_H @@ -163,4 +189,59 @@ struct luo_session_ser { struct luo_file_set_ser file_set_ser; } __packed; +/* The max size is set so it can be reliably used during in serialization */ +#define LIVEUPDATE_FLB_COMPAT_LENGTH 48 + +#define LUO_FDT_FLB_NODE_NAME "luo-flb" +#define LUO_FDT_FLB_COMPATIBLE "luo-flb-v1" +#define LUO_FDT_FLB_HEADER "luo-flb-header" + +/** + * struct luo_flb_header_ser - Header for the serialized FLB data block. + * @pgcnt: The total number of pages occupied by the entire preserved memory + * region, including this header and the subsequent array of + * &struct luo_flb_ser entries. + * @count: The number of &struct luo_flb_ser entries that follow this header + * in the memory block. + * + * This structure is located at the physical address specified by the + * `LUO_FDT_FLB_HEADER` FDT property. It provides the new kernel with the + * necessary information to find and iterate over the array of preserved + * File-Lifecycle-Bound objects and to manage the underlying memory. + * + * If this structure is modified, LUO_FDT_FLB_COMPATIBLE must be updated. + */ +struct luo_flb_header_ser { + u64 pgcnt; + u64 count; +} __packed; + +/** + * struct luo_flb_ser - Represents the serialized state of a single FLB object. + * @name: The unique compatibility string of the FLB object, used to find the + * corresponding &struct liveupdate_flb handler in the new kernel. + * @data: The opaque u64 handle returned by the FLB's .preserve() operation + * in the old kernel. This handle encapsulates the entire state needed + * for restoration. + * @count: The reference count at the time of serialization; i.e., the number + * of preserved files that depended on this FLB. This is used by the + * new kernel to correctly manage the FLB's lifecycle. + * + * An array of these structures is created in a preserved memory region and + * passed to the new kernel. Each entry allows the LUO core to restore one + * global, shared object. + * + * If this structure is modified, LUO_FDT_FLB_COMPATIBLE must be updated. + */ +struct luo_flb_ser { + char name[LIVEUPDATE_FLB_COMPAT_LENGTH]; + u64 data; + u64 count; +} __packed; + +/* Kernel Live Update Test ABI */ +#ifdef CONFIG_LIVEUPDATE_TEST +#define LIVEUPDATE_TEST_FLB_COMPATIBLE(i) "liveupdate-test-flb-v" #i +#endif + #endif /* _LINUX_KHO_ABI_LUO_H */ diff --git a/include/linux/kho/abi/memblock.h b/include/linux/kho/abi/memblock.h new file mode 100644 index 000000000000..27b042f470e1 --- /dev/null +++ b/include/linux/kho/abi/memblock.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_KHO_ABI_MEMBLOCK_H +#define _LINUX_KHO_ABI_MEMBLOCK_H + +/** + * DOC: memblock kexec handover ABI + * + * Memblock can serialize its current memory reservations created with + * reserve_mem command line option across kexec through KHO. + * The post-KHO kernel can then consume these reservations and they are + * guaranteed to have the same physical address. + * + * The state is serialized using Flattened Device Tree (FDT) format. Any + * modification to the FDT structure, node properties, or the compatible + * strings constitutes a breaking change. Such changes require incrementing the + * version number in the relevant `_COMPATIBLE` string to prevent a new kernel + * from misinterpreting data from an old kernel. + * + * Changes are allowed provided the compatibility version is incremented. + * However, backward/forward compatibility is only guaranteed for kernels + * supporting the same ABI version. + * + * FDT Structure Overview: + * The entire memblock state is encapsulated within a single KHO entry named + * "memblock". + * This entry contains an FDT with the following layout: + * + * .. code-block:: none + * + * / { + * compatible = "memblock-v1"; + * + * n1 { + * compatible = "reserve-mem-v1"; + * start = <0xc06b 0x4000000>; + * size = <0x04 0x00>; + * }; + * }; + * + * Main memblock node (/): + * + * - compatible: "memblock-v1" + + * Identifies the overall memblock ABI version. + * + * reserved_mem node: + * These nodes describe all reserve_mem regions. The node name is the name + * defined by the user for a reserve_mem region. + * + * - compatible: "reserve-mem-v1" + * + * Identifies the ABI version of reserve_mem descriptions + * + * - start: u64 + * + * Physical address of the reserved memory region. + * + * - size: u64 + * + * size in bytes of the reserved memory region. + */ + +/* Top level memblock FDT node name. */ +#define MEMBLOCK_KHO_FDT "memblock" + +/* The compatible string for the memblock FDT root node. */ +#define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1" + +/* The compatible string for the reserve_mem FDT nodes. */ +#define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1" + +#endif /* _LINUX_KHO_ABI_MEMBLOCK_H */ diff --git a/include/linux/kho/abi/memfd.h b/include/linux/kho/abi/memfd.h index da7d063474a1..68cb6303b846 100644 --- a/include/linux/kho/abi/memfd.h +++ b/include/linux/kho/abi/memfd.h @@ -12,13 +12,13 @@ #define _LINUX_KHO_ABI_MEMFD_H #include -#include +#include /** * DOC: memfd Live Update ABI * - * This header defines the ABI for preserving the state of a memfd across a - * kexec reboot using the LUO. + * memfd uses the ABI defined below for preserving its state across a kexec + * reboot using the LUO. * * The state is serialized into a packed structure `struct memfd_luo_ser` * which is handed over to the next kernel via the KHO mechanism. diff --git a/include/linux/list_private.h b/include/linux/list_private.h new file mode 100644 index 000000000000..19b01d16beda --- /dev/null +++ b/include/linux/list_private.h @@ -0,0 +1,256 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (c) 2025, Google LLC. + * Pasha Tatashin + */ +#ifndef _LINUX_LIST_PRIVATE_H +#define _LINUX_LIST_PRIVATE_H + +/** + * DOC: Private List Primitives + * + * Provides a set of list primitives identical in function to those in + * ````, but designed for cases where the embedded + * ``&struct list_head`` is private member. + */ + +#include +#include + +#define __list_private_offset(type, member) \ + ((size_t)(&ACCESS_PRIVATE(((type *)0), member))) + +/** + * list_private_entry - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the identifier passed to ACCESS_PRIVATE. + */ +#define list_private_entry(ptr, type, member) ({ \ + const struct list_head *__mptr = (ptr); \ + (type *)((char *)__mptr - __list_private_offset(type, member)); \ +}) + +/** + * list_private_first_entry - get the first element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the identifier passed to ACCESS_PRIVATE. + */ +#define list_private_first_entry(ptr, type, member) \ + list_private_entry((ptr)->next, type, member) + +/** + * list_private_last_entry - get the last element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the identifier passed to ACCESS_PRIVATE. + */ +#define list_private_last_entry(ptr, type, member) \ + list_private_entry((ptr)->prev, type, member) + +/** + * list_private_next_entry - get the next element in list + * @pos: the type * to cursor + * @member: the name of the list_head within the struct. + */ +#define list_private_next_entry(pos, member) \ + list_private_entry(ACCESS_PRIVATE(pos, member).next, typeof(*(pos)), member) + +/** + * list_private_next_entry_circular - get the next element in list + * @pos: the type * to cursor. + * @head: the list head to take the element from. + * @member: the name of the list_head within the struct. + * + * Wraparound if pos is the last element (return the first element). + * Note, that list is expected to be not empty. + */ +#define list_private_next_entry_circular(pos, head, member) \ + (list_is_last(&ACCESS_PRIVATE(pos, member), head) ? \ + list_private_first_entry(head, typeof(*(pos)), member) : \ + list_private_next_entry(pos, member)) + +/** + * list_private_prev_entry - get the prev element in list + * @pos: the type * to cursor + * @member: the name of the list_head within the struct. + */ +#define list_private_prev_entry(pos, member) \ + list_private_entry(ACCESS_PRIVATE(pos, member).prev, typeof(*(pos)), member) + +/** + * list_private_prev_entry_circular - get the prev element in list + * @pos: the type * to cursor. + * @head: the list head to take the element from. + * @member: the name of the list_head within the struct. + * + * Wraparound if pos is the first element (return the last element). + * Note, that list is expected to be not empty. + */ +#define list_private_prev_entry_circular(pos, head, member) \ + (list_is_first(&ACCESS_PRIVATE(pos, member), head) ? \ + list_private_last_entry(head, typeof(*(pos)), member) : \ + list_private_prev_entry(pos, member)) + +/** + * list_private_entry_is_head - test if the entry points to the head of the list + * @pos: the type * to cursor + * @head: the head for your list. + * @member: the name of the list_head within the struct. + */ +#define list_private_entry_is_head(pos, head, member) \ + list_is_head(&ACCESS_PRIVATE(pos, member), (head)) + +/** + * list_private_for_each_entry - iterate over list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_head within the struct. + */ +#define list_private_for_each_entry(pos, head, member) \ + for (pos = list_private_first_entry(head, typeof(*pos), member); \ + !list_private_entry_is_head(pos, head, member); \ + pos = list_private_next_entry(pos, member)) + +/** + * list_private_for_each_entry_reverse - iterate backwards over list of given type. + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_head within the struct. + */ +#define list_private_for_each_entry_reverse(pos, head, member) \ + for (pos = list_private_last_entry(head, typeof(*pos), member); \ + !list_private_entry_is_head(pos, head, member); \ + pos = list_private_prev_entry(pos, member)) + +/** + * list_private_for_each_entry_continue - continue iteration over list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * + * Continue to iterate over list of given type, continuing after + * the current position. + */ +#define list_private_for_each_entry_continue(pos, head, member) \ + for (pos = list_private_next_entry(pos, member); \ + !list_private_entry_is_head(pos, head, member); \ + pos = list_private_next_entry(pos, member)) + +/** + * list_private_for_each_entry_continue_reverse - iterate backwards from the given point + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * + * Start to iterate over list of given type backwards, continuing after + * the current position. + */ +#define list_private_for_each_entry_continue_reverse(pos, head, member) \ + for (pos = list_private_prev_entry(pos, member); \ + !list_private_entry_is_head(pos, head, member); \ + pos = list_private_prev_entry(pos, member)) + +/** + * list_private_for_each_entry_from - iterate over list of given type from the current point + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * + * Iterate over list of given type, continuing from current position. + */ +#define list_private_for_each_entry_from(pos, head, member) \ + for (; !list_private_entry_is_head(pos, head, member); \ + pos = list_private_next_entry(pos, member)) + +/** + * list_private_for_each_entry_from_reverse - iterate backwards over list of given type + * from the current point + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * + * Iterate backwards over list of given type, continuing from current position. + */ +#define list_private_for_each_entry_from_reverse(pos, head, member) \ + for (; !list_private_entry_is_head(pos, head, member); \ + pos = list_private_prev_entry(pos, member)) + +/** + * list_private_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_head within the struct. + */ +#define list_private_for_each_entry_safe(pos, n, head, member) \ + for (pos = list_private_first_entry(head, typeof(*pos), member), \ + n = list_private_next_entry(pos, member); \ + !list_private_entry_is_head(pos, head, member); \ + pos = n, n = list_private_next_entry(n, member)) + +/** + * list_private_for_each_entry_safe_continue - continue list iteration safe against removal + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * + * Iterate over list of given type, continuing after current point, + * safe against removal of list entry. + */ +#define list_private_for_each_entry_safe_continue(pos, n, head, member) \ + for (pos = list_private_next_entry(pos, member), \ + n = list_private_next_entry(pos, member); \ + !list_private_entry_is_head(pos, head, member); \ + pos = n, n = list_private_next_entry(n, member)) + +/** + * list_private_for_each_entry_safe_from - iterate over list from current point safe against removal + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * + * Iterate over list of given type from current point, safe against + * removal of list entry. + */ +#define list_private_for_each_entry_safe_from(pos, n, head, member) \ + for (n = list_private_next_entry(pos, member); \ + !list_private_entry_is_head(pos, head, member); \ + pos = n, n = list_private_next_entry(n, member)) + +/** + * list_private_for_each_entry_safe_reverse - iterate backwards over list safe against removal + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * + * Iterate backwards over list of given type, safe against removal + * of list entry. + */ +#define list_private_for_each_entry_safe_reverse(pos, n, head, member) \ + for (pos = list_private_last_entry(head, typeof(*pos), member), \ + n = list_private_prev_entry(pos, member); \ + !list_private_entry_is_head(pos, head, member); \ + pos = n, n = list_private_prev_entry(n, member)) + +/** + * list_private_safe_reset_next - reset a stale list_for_each_entry_safe loop + * @pos: the loop cursor used in the list_for_each_entry_safe loop + * @n: temporary storage used in list_for_each_entry_safe + * @member: the name of the list_head within the struct. + * + * list_safe_reset_next is not safe to use in general if the list may be + * modified concurrently (eg. the lock is dropped in the loop body). An + * exception to this is if the cursor element (pos) is pinned in the list, + * and list_safe_reset_next is called after re-taking the lock and before + * completing the current iteration of the loop body. + */ +#define list_private_safe_reset_next(pos, n, member) \ + n = list_private_next_entry(pos, member) + +#endif /* _LINUX_LIST_PRIVATE_H */ diff --git a/include/linux/liveupdate.h b/include/linux/liveupdate.h index a7f6ee5b6771..fe82a6c3005f 100644 --- a/include/linux/liveupdate.h +++ b/include/linux/liveupdate.h @@ -11,10 +11,13 @@ #include #include #include +#include #include #include struct liveupdate_file_handler; +struct liveupdate_flb; +struct liveupdate_session; struct file; /** @@ -99,6 +102,118 @@ struct liveupdate_file_handler { * registered file handlers. */ struct list_head __private list; + /* A list of FLB dependencies. */ + struct list_head __private flb_list; +}; + +/** + * struct liveupdate_flb_op_args - Arguments for FLB operation callbacks. + * @flb: The global FLB instance for which this call is performed. + * @data: For .preserve(): [OUT] The callback sets this field. + * For .unpreserve(): [IN] The handle from .preserve(). + * For .retrieve(): [IN] The handle from .preserve(). + * @obj: For .preserve(): [OUT] Sets this to the live object. + * For .retrieve(): [OUT] Sets this to the live object. + * For .finish(): [IN] The live object from .retrieve(). + * + * This structure bundles all parameters for the FLB operation callbacks. + */ +struct liveupdate_flb_op_args { + struct liveupdate_flb *flb; + u64 data; + void *obj; +}; + +/** + * struct liveupdate_flb_ops - Callbacks for global File-Lifecycle-Bound data. + * @preserve: Called when the first file using this FLB is preserved. + * The callback must save its state and return a single, + * self-contained u64 handle by setting the 'argp->data' + * field and 'argp->obj'. + * @unpreserve: Called when the last file using this FLB is unpreserved + * (aborted before reboot). Receives the handle via + * 'argp->data' and live object via 'argp->obj'. + * @retrieve: Called on-demand in the new kernel, the first time a + * component requests access to the shared object. It receives + * the preserved handle via 'argp->data' and must reconstruct + * the live object, returning it by setting the 'argp->obj' + * field. + * @finish: Called in the new kernel when the last file using this FLB + * is finished. Receives the live object via 'argp->obj' for + * cleanup. + * @owner: Module reference + * + * Operations that manage global shared data with file bound lifecycle, + * triggered by the first file that uses it and concluded by the last file that + * uses it, across all sessions. + */ +struct liveupdate_flb_ops { + int (*preserve)(struct liveupdate_flb_op_args *argp); + void (*unpreserve)(struct liveupdate_flb_op_args *argp); + int (*retrieve)(struct liveupdate_flb_op_args *argp); + void (*finish)(struct liveupdate_flb_op_args *argp); + struct module *owner; +}; + +/* + * struct luo_flb_private_state - Private FLB state structures. + * @count: The number of preserved files currently depending on this FLB. + * This is used to trigger the preserve/unpreserve/finish ops on the + * first/last file. + * @data: The opaque u64 handle returned by .preserve() or passed to + * .retrieve(). + * @obj: The live kernel object returned by .preserve() or .retrieve(). + * @lock: A mutex that protects all fields within this structure, providing + * the synchronization service for the FLB's ops. + * @finished: True once the FLB's finish() callback has run. + * @retrieved: True once the FLB's retrieve() callback has run. + */ +struct luo_flb_private_state { + long count; + u64 data; + void *obj; + struct mutex lock; + bool finished; + bool retrieved; +}; + +/* + * struct luo_flb_private - Keep separate incoming and outgoing states. + * @list: A global list of registered FLBs. + * @outgoing: The runtime state for the pre-reboot + * (preserve/unpreserve) lifecycle. + * @incoming: The runtime state for the post-reboot (retrieve/finish) + * lifecycle. + * @users: With how many File-Handlers this FLB is registered. + * @initialized: true when private fields have been initialized. + */ +struct luo_flb_private { + struct list_head list; + struct luo_flb_private_state outgoing; + struct luo_flb_private_state incoming; + int users; + bool initialized; +}; + +/** + * struct liveupdate_flb - A global definition for a shared data object. + * @ops: Callback functions + * @compatible: The compatibility string (e.g., "iommu-core-v1" + * that uniquely identifies the FLB type this handler + * supports. This is matched against the compatible string + * associated with individual &struct liveupdate_flb + * instances. + * + * This struct is the "template" that a driver registers to define a shared, + * file-lifecycle-bound object. The actual runtime state (the live object, + * refcount, etc.) is managed privately by the LUO core. + */ +struct liveupdate_flb { + const struct liveupdate_flb_ops *ops; + const char compatible[LIVEUPDATE_FLB_COMPAT_LENGTH]; + + /* private: */ + struct luo_flb_private __private private; }; #ifdef CONFIG_LIVEUPDATE @@ -112,6 +227,14 @@ int liveupdate_reboot(void); int liveupdate_register_file_handler(struct liveupdate_file_handler *fh); int liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh); +int liveupdate_register_flb(struct liveupdate_file_handler *fh, + struct liveupdate_flb *flb); +int liveupdate_unregister_flb(struct liveupdate_file_handler *fh, + struct liveupdate_flb *flb); + +int liveupdate_flb_get_incoming(struct liveupdate_flb *flb, void **objp); +int liveupdate_flb_get_outgoing(struct liveupdate_flb *flb, void **objp); + #else /* CONFIG_LIVEUPDATE */ static inline bool liveupdate_enabled(void) @@ -134,5 +257,29 @@ static inline int liveupdate_unregister_file_handler(struct liveupdate_file_hand return -EOPNOTSUPP; } +static inline int liveupdate_register_flb(struct liveupdate_file_handler *fh, + struct liveupdate_flb *flb) +{ + return -EOPNOTSUPP; +} + +static inline int liveupdate_unregister_flb(struct liveupdate_file_handler *fh, + struct liveupdate_flb *flb) +{ + return -EOPNOTSUPP; +} + +static inline int liveupdate_flb_get_incoming(struct liveupdate_flb *flb, + void **objp) +{ + return -EOPNOTSUPP; +} + +static inline int liveupdate_flb_get_outgoing(struct liveupdate_flb *flb, + void **objp) +{ + return -EOPNOTSUPP; +} + #endif /* CONFIG_LIVEUPDATE */ #endif /* _LINUX_LIVEUPDATE_H */ diff --git a/include/linux/log2.h b/include/linux/log2.h index 2eac3fc9303d..e17ceb32e0c9 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -44,7 +44,7 @@ int __ilog2_u64(u64 n) static __always_inline __attribute__((const)) bool is_power_of_2(unsigned long n) { - return (n != 0 && ((n & (n - 1)) == 0)); + return n - 1 < (n ^ (n - 1)); } /** diff --git a/include/linux/module.h b/include/linux/module.h index 20ddfd97630d..14f391b186c6 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -742,6 +742,15 @@ static inline void __module_get(struct module *module) __mod ? __mod->name : "kernel"; \ }) +static inline const unsigned char *module_buildid(struct module *mod) +{ +#ifdef CONFIG_STACKTRACE_BUILD_ID + return mod->build_id; +#else + return NULL; +#endif +} + /* Dereference module function descriptor */ void *dereference_module_function_descriptor(struct module *mod, void *ptr); diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index c03db3c2fd40..7d22d4c4ea2e 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -2,9 +2,14 @@ #ifndef _LINUX_MODULE_PARAMS_H #define _LINUX_MODULE_PARAMS_H /* (C) Copyright 2001, 2002 Rusty Russell IBM Corporation */ + +#include +#include +#include #include #include -#include +#include +#include /* * The maximum module name length, including the NUL byte. diff --git a/include/linux/panic.h b/include/linux/panic.h index a00bc0937698..f1dd417e54b2 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -41,6 +41,14 @@ void abort(void); * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec(). */ extern atomic_t panic_cpu; + +/* + * panic_redirect_cpu is used when panic is redirected to a specific CPU via + * the panic_force_cpu= boot parameter. It holds the CPU number that originally + * triggered the panic before redirection. A value of PANIC_CPU_INVALID means + * no redirection has occurred. + */ +extern atomic_t panic_redirect_cpu; #define PANIC_CPU_INVALID -1 bool panic_try_start(void); diff --git a/include/linux/sched.h b/include/linux/sched.h index 873e400aafce..074ad4ef3d81 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -49,6 +49,7 @@ #include #include #include +#include #ifndef COMPILE_OFFSETS #include #endif @@ -86,6 +87,7 @@ struct signal_struct; struct task_delay_info; struct task_group; struct task_struct; +struct timespec64; struct user_event_mm; #include @@ -435,6 +437,9 @@ struct sched_info { /* When were we last queued to run? */ unsigned long long last_queued; + /* Timestamp of max time spent waiting on a runqueue: */ + struct timespec64 max_run_delay_ts; + #endif /* CONFIG_SCHED_INFO */ }; diff --git a/include/linux/smp.h b/include/linux/smp.h index 91d0ecf3b8d3..1ebd88026119 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -62,6 +62,7 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd); void __noreturn panic_smp_self_stop(void); void __noreturn nmi_panic_self_stop(struct pt_regs *regs); void crash_smp_send_stop(void); +int panic_smp_redirect_cpu(int target_cpu, void *msg); /* * Call a function on all processors diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index c33a96b7391a..99b775f3ff46 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -808,4 +808,17 @@ static inline void sysfs_put(struct kernfs_node *kn) kernfs_put(kn); } +/* Permissions on a sysfs file: you didn't miss the 0 prefix did you? */ +#define VERIFY_OCTAL_PERMISSIONS(perms) \ + (BUILD_BUG_ON_ZERO((perms) < 0) + \ + BUILD_BUG_ON_ZERO((perms) > 0777) + \ + /* USER_READABLE >= GROUP_READABLE >= OTHER_READABLE */ \ + BUILD_BUG_ON_ZERO((((perms) >> 6) & 4) < (((perms) >> 3) & 4)) + \ + BUILD_BUG_ON_ZERO((((perms) >> 3) & 4) < ((perms) & 4)) + \ + /* USER_WRITABLE >= GROUP_WRITABLE */ \ + BUILD_BUG_ON_ZERO((((perms) >> 6) & 2) < (((perms) >> 3) & 2)) + \ + /* OTHER_WRITABLE? Generally considered a bad idea. */ \ + BUILD_BUG_ON_ZERO((perms) & 2) + \ + (perms)) + #endif /* _SYSFS_H_ */ diff --git a/include/linux/trace_printk.h b/include/linux/trace_printk.h new file mode 100644 index 000000000000..bb5874097f24 --- /dev/null +++ b/include/linux/trace_printk.h @@ -0,0 +1,204 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_TRACE_PRINTK_H +#define _LINUX_TRACE_PRINTK_H + +#include +#include +#include +#include + +/* + * General tracing related utility functions - trace_printk(), + * tracing_on/tracing_off and tracing_start()/tracing_stop + * + * Use tracing_on/tracing_off when you want to quickly turn on or off + * tracing. It simply enables or disables the recording of the trace events. + * This also corresponds to the user space /sys/kernel/tracing/tracing_on + * file, which gives a means for the kernel and userspace to interact. + * Place a tracing_off() in the kernel where you want tracing to end. + * From user space, examine the trace, and then echo 1 > tracing_on + * to continue tracing. + * + * tracing_stop/tracing_start has slightly more overhead. It is used + * by things like suspend to ram where disabling the recording of the + * trace is not enough, but tracing must actually stop because things + * like calling smp_processor_id() may crash the system. + * + * Most likely, you want to use tracing_on/tracing_off. + */ + +enum ftrace_dump_mode { + DUMP_NONE, + DUMP_ALL, + DUMP_ORIG, + DUMP_PARAM, +}; + +#ifdef CONFIG_TRACING +void tracing_on(void); +void tracing_off(void); +int tracing_is_on(void); +void tracing_snapshot(void); +void tracing_snapshot_alloc(void); + +extern void tracing_start(void); +extern void tracing_stop(void); + +static inline __printf(1, 2) +void ____trace_printk_check_format(const char *fmt, ...) +{ +} +#define __trace_printk_check_format(fmt, args...) \ +do { \ + if (0) \ + ____trace_printk_check_format(fmt, ##args); \ +} while (0) + +/** + * trace_printk - printf formatting in the ftrace buffer + * @fmt: the printf format for printing + * + * Note: __trace_printk is an internal function for trace_printk() and + * the @ip is passed in via the trace_printk() macro. + * + * This function allows a kernel developer to debug fast path sections + * that printk is not appropriate for. By scattering in various + * printk like tracing in the code, a developer can quickly see + * where problems are occurring. + * + * This is intended as a debugging tool for the developer only. + * Please refrain from leaving trace_printks scattered around in + * your code. (Extra memory is used for special buffers that are + * allocated when trace_printk() is used.) + * + * A little optimization trick is done here. If there's only one + * argument, there's no need to scan the string for printf formats. + * The trace_puts() will suffice. But how can we take advantage of + * using trace_puts() when trace_printk() has only one argument? + * By stringifying the args and checking the size we can tell + * whether or not there are args. __stringify((__VA_ARGS__)) will + * turn into "()\0" with a size of 3 when there are no args, anything + * else will be bigger. All we need to do is define a string to this, + * and then take its size and compare to 3. If it's bigger, use + * do_trace_printk() otherwise, optimize it to trace_puts(). Then just + * let gcc optimize the rest. + */ + +#define trace_printk(fmt, ...) \ +do { \ + char _______STR[] = __stringify((__VA_ARGS__)); \ + if (sizeof(_______STR) > 3) \ + do_trace_printk(fmt, ##__VA_ARGS__); \ + else \ + trace_puts(fmt); \ +} while (0) + +#define do_trace_printk(fmt, args...) \ +do { \ + static const char *trace_printk_fmt __used \ + __section("__trace_printk_fmt") = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ + \ + __trace_printk_check_format(fmt, ##args); \ + \ + if (__builtin_constant_p(fmt)) \ + __trace_bprintk(_THIS_IP_, trace_printk_fmt, ##args); \ + else \ + __trace_printk(_THIS_IP_, fmt, ##args); \ +} while (0) + +extern __printf(2, 3) +int __trace_bprintk(unsigned long ip, const char *fmt, ...); + +extern __printf(2, 3) +int __trace_printk(unsigned long ip, const char *fmt, ...); + +/** + * trace_puts - write a string into the ftrace buffer + * @str: the string to record + * + * Note: __trace_bputs is an internal function for trace_puts and + * the @ip is passed in via the trace_puts macro. + * + * This is similar to trace_printk() but is made for those really fast + * paths that a developer wants the least amount of "Heisenbug" effects, + * where the processing of the print format is still too much. + * + * This function allows a kernel developer to debug fast path sections + * that printk is not appropriate for. By scattering in various + * printk like tracing in the code, a developer can quickly see + * where problems are occurring. + * + * This is intended as a debugging tool for the developer only. + * Please refrain from leaving trace_puts scattered around in + * your code. (Extra memory is used for special buffers that are + * allocated when trace_puts() is used.) + * + * Returns: 0 if nothing was written, positive # if string was. + * (1 when __trace_bputs is used, strlen(str) when __trace_puts is used) + */ + +#define trace_puts(str) ({ \ + static const char *trace_printk_fmt __used \ + __section("__trace_printk_fmt") = \ + __builtin_constant_p(str) ? str : NULL; \ + \ + if (__builtin_constant_p(str)) \ + __trace_bputs(_THIS_IP_, trace_printk_fmt); \ + else \ + __trace_puts(_THIS_IP_, str); \ +}) +extern int __trace_bputs(unsigned long ip, const char *str); +extern int __trace_puts(unsigned long ip, const char *str); + +extern void trace_dump_stack(int skip); + +/* + * The double __builtin_constant_p is because gcc will give us an error + * if we try to allocate the static variable to fmt if it is not a + * constant. Even with the outer if statement. + */ +#define ftrace_vprintk(fmt, vargs) \ +do { \ + if (__builtin_constant_p(fmt)) { \ + static const char *trace_printk_fmt __used \ + __section("__trace_printk_fmt") = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ + \ + __ftrace_vbprintk(_THIS_IP_, trace_printk_fmt, vargs); \ + } else \ + __ftrace_vprintk(_THIS_IP_, fmt, vargs); \ +} while (0) + +extern __printf(2, 0) int +__ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap); + +extern __printf(2, 0) int +__ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); + +extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode); +#else +static inline void tracing_start(void) { } +static inline void tracing_stop(void) { } +static inline void trace_dump_stack(int skip) { } + +static inline void tracing_on(void) { } +static inline void tracing_off(void) { } +static inline int tracing_is_on(void) { return 0; } +static inline void tracing_snapshot(void) { } +static inline void tracing_snapshot_alloc(void) { } + +static inline __printf(1, 2) +int trace_printk(const char *fmt, ...) +{ + return 0; +} +static __printf(1, 0) inline int +ftrace_vprintk(const char *fmt, va_list ap) +{ + return 0; +} +static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } +#endif /* CONFIG_TRACING */ + +#endif diff --git a/include/linux/types.h b/include/linux/types.h index d673747eda8a..7e71d260763c 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -2,7 +2,6 @@ #ifndef _LINUX_TYPES_H #define _LINUX_TYPES_H -#define __EXPORTED_HEADERS__ #include #ifndef __ASSEMBLY__ @@ -185,7 +184,7 @@ typedef phys_addr_t resource_size_t; typedef unsigned long irq_hw_number_t; typedef struct { - int counter; + int __aligned(sizeof(int)) counter; } atomic_t; #define ATOMIC_INIT(i) { (i) } diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index c47d4b9b88b3..85b1fff02fde 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -17,6 +17,7 @@ #ifndef __LINUX_WW_MUTEX_H #define __LINUX_WW_MUTEX_H +#include #include #include diff --git a/include/uapi/linux/shm.h b/include/uapi/linux/shm.h index 8d1f17a4e08e..7269f9f402e3 100644 --- a/include/uapi/linux/shm.h +++ b/include/uapi/linux/shm.h @@ -5,9 +5,6 @@ #include #include #include -#ifndef __KERNEL__ -#include -#endif /* * SHMMNI, SHMMAX and SHMALL are default upper limits which can be diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index 5929030d4e8b..3ae25f3ce067 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -18,6 +18,7 @@ #define _LINUX_TASKSTATS_H #include +#include /* Format for per-task data returned to userland when * - a task exits @@ -34,7 +35,7 @@ */ -#define TASKSTATS_VERSION 16 +#define TASKSTATS_VERSION 17 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -230,6 +231,16 @@ struct taskstats { __u64 irq_delay_max; __u64 irq_delay_min; + + /*v17: delay max timestamp record*/ + struct __kernel_timespec cpu_delay_max_ts; + struct __kernel_timespec blkio_delay_max_ts; + struct __kernel_timespec swapin_delay_max_ts; + struct __kernel_timespec freepages_delay_max_ts; + struct __kernel_timespec thrashing_delay_max_ts; + struct __kernel_timespec compact_delay_max_ts; + struct __kernel_timespec wpcopy_delay_max_ts; + struct __kernel_timespec irq_delay_max_ts; }; diff --git a/init/main.c b/init/main.c index 445b5643ecec..1cb395dd94e4 100644 --- a/init/main.c +++ b/init/main.c @@ -104,6 +104,7 @@ #include #include #include +#include #include #include @@ -162,6 +163,7 @@ static size_t initargs_offs; static char *execute_command; static char *ramdisk_execute_command = "/init"; +static bool __initdata ramdisk_execute_command_set; /* * Used to generate warnings if static_key manipulation functions are used @@ -269,7 +271,7 @@ static void * __init get_boot_config_from_initrd(size_t *_size) { u32 size, csum; char *data; - u32 *hdr; + u8 *hdr; int i; if (!initrd_end) @@ -288,9 +290,9 @@ static void * __init get_boot_config_from_initrd(size_t *_size) return NULL; found: - hdr = (u32 *)(data - 8); - size = le32_to_cpu(hdr[0]); - csum = le32_to_cpu(hdr[1]); + hdr = (u8 *)(data - 8); + size = get_unaligned_le32(hdr); + csum = get_unaligned_le32(hdr + 4); data = ((void *)hdr) - size; if ((unsigned long)data < initrd_start) { @@ -623,6 +625,7 @@ static int __init rdinit_setup(char *str) unsigned int i; ramdisk_execute_command = str; + ramdisk_execute_command_set = true; /* See "auto" comment in init_setup */ for (i = 1; i < MAX_INIT_ARGS; i++) argv_init[i] = NULL; @@ -1700,8 +1703,9 @@ static noinline void __init kernel_init_freeable(void) int ramdisk_command_access; ramdisk_command_access = init_eaccess(ramdisk_execute_command); if (ramdisk_command_access != 0) { - pr_warn("check access for rdinit=%s failed: %i, ignoring\n", - ramdisk_execute_command, ramdisk_command_access); + if (ramdisk_execute_command_set) + pr_warn("check access for rdinit=%s failed: %i, ignoring\n", + ramdisk_execute_command, ramdisk_command_access); ramdisk_execute_command = NULL; prepare_namespace(); } diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 15b17e86e198..9b087ebeb643 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -214,7 +214,7 @@ static int ipc_permissions(struct ctl_table_header *head, const struct ctl_table if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) || (table->data == &ns->ids[IPC_MSG_IDS].next_id) || (table->data == &ns->ids[IPC_SHM_IDS].next_id)) && - checkpoint_restore_ns_capable(ns->user_ns)) + checkpoint_restore_ns_capable_noaudit(ns->user_ns)) mode = 0666; else #endif diff --git a/kernel/audit.c b/kernel/audit.c index 39c4f26c484d..592d927e70f9 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -32,6 +32,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index dc906dfdff94..5ab6bace7d0d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -716,8 +717,8 @@ static struct bpf_ksym *bpf_ksym_find(unsigned long addr) return n ? container_of(n, struct bpf_ksym, tnode) : NULL; } -int __bpf_address_lookup(unsigned long addr, unsigned long *size, - unsigned long *off, char *sym) +int bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char *sym) { struct bpf_ksym *ksym; int ret = 0; diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c index 2fdfa828e3d3..e4e338cdb437 100644 --- a/kernel/bpf/rqspinlock.c +++ b/kernel/bpf/rqspinlock.c @@ -695,7 +695,6 @@ __bpf_kfunc int bpf_res_spin_lock(struct bpf_res_spin_lock *lock) int ret; BUILD_BUG_ON(sizeof(rqspinlock_t) != sizeof(struct bpf_res_spin_lock)); - BUILD_BUG_ON(__alignof__(rqspinlock_t) != __alignof__(struct bpf_res_spin_lock)); preempt_disable(); ret = res_spin_lock((rqspinlock_t *)lock); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 683c332dbafb..dd89bf809772 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config index 9f6ab7dabf67..774702591d26 100644 --- a/kernel/configs/debug.config +++ b/kernel/configs/debug.config @@ -84,7 +84,7 @@ CONFIG_SLUB_DEBUG_ON=y # Debug Oops, Lockups and Hangs # CONFIG_BOOTPARAM_HUNG_TASK_PANIC=0 -# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=0 CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DETECT_HUNG_TASK=y CONFIG_PANIC_ON_OOPS=y diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 99dac1aa972a..3952b3e102e0 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -44,9 +44,15 @@ note_buf_t __percpu *crash_notes; int kimage_crash_copy_vmcoreinfo(struct kimage *image) { - struct page *vmcoreinfo_page; + struct page *vmcoreinfo_base; + struct page *vmcoreinfo_pages[DIV_ROUND_UP(VMCOREINFO_BYTES, PAGE_SIZE)]; + unsigned int order, nr_pages; + int i; void *safecopy; + nr_pages = DIV_ROUND_UP(VMCOREINFO_BYTES, PAGE_SIZE); + order = get_order(VMCOREINFO_BYTES); + if (!IS_ENABLED(CONFIG_CRASH_DUMP)) return 0; if (image->type != KEXEC_TYPE_CRASH) @@ -61,12 +67,15 @@ int kimage_crash_copy_vmcoreinfo(struct kimage *image) * happens to generate vmcoreinfo note, hereby we rely on * vmap for this purpose. */ - vmcoreinfo_page = kimage_alloc_control_pages(image, 0); - if (!vmcoreinfo_page) { + vmcoreinfo_base = kimage_alloc_control_pages(image, order); + if (!vmcoreinfo_base) { pr_warn("Could not allocate vmcoreinfo buffer\n"); return -ENOMEM; } - safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL); + for (i = 0; i < nr_pages; i++) + vmcoreinfo_pages[i] = vmcoreinfo_base + i; + + safecopy = vmap(vmcoreinfo_pages, nr_pages, VM_MAP, PAGE_KERNEL); if (!safecopy) { pr_warn("Could not vmap vmcoreinfo buffer\n"); return -ENOMEM; diff --git a/kernel/crash_dump_dm_crypt.c b/kernel/crash_dump_dm_crypt.c index 401423ba477d..37129243054d 100644 --- a/kernel/crash_dump_dm_crypt.c +++ b/kernel/crash_dump_dm_crypt.c @@ -143,6 +143,7 @@ static int read_key_from_user_keying(struct dm_crypt_key *dm_key) { const struct user_key_payload *ukp; struct key *key; + int ret = 0; kexec_dprintk("Requesting logon key %s", dm_key->key_desc); key = request_key(&key_type_logon, dm_key->key_desc, NULL); @@ -152,20 +153,28 @@ static int read_key_from_user_keying(struct dm_crypt_key *dm_key) return PTR_ERR(key); } + down_read(&key->sem); ukp = user_key_payload_locked(key); - if (!ukp) - return -EKEYREVOKED; + if (!ukp) { + ret = -EKEYREVOKED; + goto out; + } if (ukp->datalen > KEY_SIZE_MAX) { pr_err("Key size %u exceeds maximum (%u)\n", ukp->datalen, KEY_SIZE_MAX); - return -EINVAL; + ret = -EINVAL; + goto out; } memcpy(dm_key->data, ukp->data, ukp->datalen); dm_key->key_size = ukp->datalen; kexec_dprintk("Get dm crypt key (size=%u) %s: %8ph\n", dm_key->key_size, dm_key->key_desc, dm_key->data); - return 0; + +out: + up_read(&key->sem); + key_put(key); + return ret; } struct config_key { @@ -223,7 +232,7 @@ static void config_key_release(struct config_item *item) key_count--; } -static struct configfs_item_operations config_key_item_ops = { +static const struct configfs_item_operations config_key_item_ops = { .release = config_key_release, }; @@ -298,7 +307,7 @@ static struct configfs_attribute *config_keys_attrs[] = { * Note that, since no extra work is required on ->drop_item(), * no ->drop_item() is provided. */ -static struct configfs_group_operations config_keys_group_ops = { +static const struct configfs_group_operations config_keys_group_ops = { .make_item = config_keys_make_item, }; diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index 22fe969c5d2e..f586afd76c80 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -27,6 +27,7 @@ #include #include +#include #include #include #include diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 30e7912ebb0d..2e55c493c98b 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -18,6 +18,8 @@ do { \ d->type##_delay_max = tsk->delays->type##_delay_max; \ d->type##_delay_min = tsk->delays->type##_delay_min; \ + d->type##_delay_max_ts.tv_sec = tsk->delays->type##_delay_max_ts.tv_sec; \ + d->type##_delay_max_ts.tv_nsec = tsk->delays->type##_delay_max_ts.tv_nsec; \ tmp = d->type##_delay_total + tsk->delays->type##_delay; \ d->type##_delay_total = (tmp < d->type##_delay_total) ? 0 : tmp; \ d->type##_count += tsk->delays->type##_count; \ @@ -104,7 +106,8 @@ void __delayacct_tsk_init(struct task_struct *tsk) * Finish delay accounting for a statistic using its timestamps (@start), * accumulator (@total) and @count */ -static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, u64 *max, u64 *min) +static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, + u64 *max, u64 *min, struct timespec64 *ts) { s64 ns = local_clock() - *start; unsigned long flags; @@ -113,8 +116,10 @@ static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *cou raw_spin_lock_irqsave(lock, flags); *total += ns; (*count)++; - if (ns > *max) + if (ns > *max) { *max = ns; + ktime_get_real_ts64(ts); + } if (*min == 0 || ns < *min) *min = ns; raw_spin_unlock_irqrestore(lock, flags); @@ -137,7 +142,8 @@ void __delayacct_blkio_end(struct task_struct *p) &p->delays->blkio_delay, &p->delays->blkio_count, &p->delays->blkio_delay_max, - &p->delays->blkio_delay_min); + &p->delays->blkio_delay_min, + &p->delays->blkio_delay_max_ts); } int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) @@ -170,6 +176,8 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) d->cpu_delay_max = tsk->sched_info.max_run_delay; d->cpu_delay_min = tsk->sched_info.min_run_delay; + d->cpu_delay_max_ts.tv_sec = tsk->sched_info.max_run_delay_ts.tv_sec; + d->cpu_delay_max_ts.tv_nsec = tsk->sched_info.max_run_delay_ts.tv_nsec; tmp = (s64)d->cpu_delay_total + t2; d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; tmp = (s64)d->cpu_run_virtual_total + t3; @@ -217,7 +225,8 @@ void __delayacct_freepages_end(void) ¤t->delays->freepages_delay, ¤t->delays->freepages_count, ¤t->delays->freepages_delay_max, - ¤t->delays->freepages_delay_min); + ¤t->delays->freepages_delay_min, + ¤t->delays->freepages_delay_max_ts); } void __delayacct_thrashing_start(bool *in_thrashing) @@ -241,7 +250,8 @@ void __delayacct_thrashing_end(bool *in_thrashing) ¤t->delays->thrashing_delay, ¤t->delays->thrashing_count, ¤t->delays->thrashing_delay_max, - ¤t->delays->thrashing_delay_min); + ¤t->delays->thrashing_delay_min, + ¤t->delays->thrashing_delay_max_ts); } void __delayacct_swapin_start(void) @@ -256,7 +266,8 @@ void __delayacct_swapin_end(void) ¤t->delays->swapin_delay, ¤t->delays->swapin_count, ¤t->delays->swapin_delay_max, - ¤t->delays->swapin_delay_min); + ¤t->delays->swapin_delay_min, + ¤t->delays->swapin_delay_max_ts); } void __delayacct_compact_start(void) @@ -271,7 +282,8 @@ void __delayacct_compact_end(void) ¤t->delays->compact_delay, ¤t->delays->compact_count, ¤t->delays->compact_delay_max, - ¤t->delays->compact_delay_min); + ¤t->delays->compact_delay_min, + ¤t->delays->compact_delay_max_ts); } void __delayacct_wpcopy_start(void) @@ -286,7 +298,8 @@ void __delayacct_wpcopy_end(void) ¤t->delays->wpcopy_delay, ¤t->delays->wpcopy_count, ¤t->delays->wpcopy_delay_max, - ¤t->delays->wpcopy_delay_min); + ¤t->delays->wpcopy_delay_min, + ¤t->delays->wpcopy_delay_max_ts); } void __delayacct_irq(struct task_struct *task, u32 delta) @@ -296,8 +309,10 @@ void __delayacct_irq(struct task_struct *task, u32 delta) raw_spin_lock_irqsave(&task->delays->lock, flags); task->delays->irq_delay += delta; task->delays->irq_count++; - if (delta > task->delays->irq_delay_max) + if (delta > task->delays->irq_delay_max) { task->delays->irq_delay_max = delta; + ktime_get_real_ts64(&task->delays->irq_delay_max_ts); + } if (delta && (!task->delays->irq_delay_min || delta < task->delays->irq_delay_min)) task->delays->irq_delay_min = delta; raw_spin_unlock_irqrestore(&task->delays->lock, flags); diff --git a/kernel/fork.c b/kernel/fork.c index 9c5effbdbdc1..e832da9d15a4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1357,7 +1357,7 @@ struct file *get_task_exe_file(struct task_struct *task) * @task: The task. * * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning - * this kernel workthread has transiently adopted a user mm with use_mm, + * this kernel workthread has transiently adopted a user mm with kthread_use_mm, * to do its AIO) is not set and if so returns a reference to it, after * bumping up the use count. User must release the mm via mmput() * after use. Typically used by /proc and ptrace. @@ -2069,7 +2069,7 @@ __latent_entropy struct task_struct *copy_process( p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL; /* - * Clear TID on mm_release()? + * TID is cleared in mm_release() when the task exits */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL; diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 6125724aadb1..aec2f06858af 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -347,7 +347,7 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, return 1; } return !!module_address_lookup(addr, symbolsize, offset, NULL, NULL, namebuf) || - !!__bpf_address_lookup(addr, symbolsize, offset, namebuf); + !!bpf_address_lookup(addr, symbolsize, offset, namebuf); } static int kallsyms_lookup_buildid(unsigned long addr, @@ -357,8 +357,21 @@ static int kallsyms_lookup_buildid(unsigned long addr, { int ret; - namebuf[KSYM_NAME_LEN - 1] = 0; + /* + * kallsyms_lookus() returns pointer to namebuf on success and + * NULL on error. But some callers ignore the return value. + * Instead they expect @namebuf filled either with valid + * or empty string. + */ namebuf[0] = 0; + /* + * Initialize the module-related return values. They are not set + * when the symbol is in vmlinux or it is a bpf address. + */ + if (modname) + *modname = NULL; + if (modbuildid) + *modbuildid = NULL; if (is_ksym_addr(addr)) { unsigned long pos; @@ -367,10 +380,6 @@ static int kallsyms_lookup_buildid(unsigned long addr, /* Grab name */ kallsyms_expand_symbol(get_symbol_offset(pos), namebuf, KSYM_NAME_LEN); - if (modname) - *modname = NULL; - if (modbuildid) - *modbuildid = NULL; return strlen(namebuf); } @@ -379,12 +388,11 @@ static int kallsyms_lookup_buildid(unsigned long addr, ret = module_address_lookup(addr, symbolsize, offset, modname, modbuildid, namebuf); if (!ret) - ret = bpf_address_lookup(addr, symbolsize, - offset, modname, namebuf); + ret = bpf_address_lookup(addr, symbolsize, offset, namebuf); if (!ret) - ret = ftrace_mod_address_lookup(addr, symbolsize, - offset, modname, namebuf); + ret = ftrace_mod_address_lookup(addr, symbolsize, offset, + modname, modbuildid, namebuf); return ret; } @@ -428,6 +436,37 @@ int lookup_symbol_name(unsigned long addr, char *symname) return lookup_module_symbol_name(addr, symname); } +#ifdef CONFIG_STACKTRACE_BUILD_ID + +static int append_buildid(char *buffer, const char *modname, + const unsigned char *buildid) +{ + if (!modname) + return 0; + + if (!buildid) { + pr_warn_once("Undefined buildid for the module %s\n", modname); + return 0; + } + + /* build ID should match length of sprintf */ +#ifdef CONFIG_MODULES + static_assert(sizeof(typeof_member(struct module, build_id)) == 20); +#endif + + return sprintf(buffer, " %20phN", buildid); +} + +#else /* CONFIG_STACKTRACE_BUILD_ID */ + +static int append_buildid(char *buffer, const char *modname, + const unsigned char *buildid) +{ + return 0; +} + +#endif /* CONFIG_STACKTRACE_BUILD_ID */ + /* Look up a kernel symbol and return it in a text buffer. */ static int __sprint_symbol(char *buffer, unsigned long address, int symbol_offset, int add_offset, int add_buildid) @@ -437,6 +476,9 @@ static int __sprint_symbol(char *buffer, unsigned long address, unsigned long offset, size; int len; + /* Prevent module removal until modname and modbuildid are printed */ + guard(rcu)(); + address += symbol_offset; len = kallsyms_lookup_buildid(address, &size, &offset, &modname, &buildid, buffer); @@ -450,15 +492,8 @@ static int __sprint_symbol(char *buffer, unsigned long address, if (modname) { len += sprintf(buffer + len, " [%s", modname); -#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) - if (add_buildid && buildid) { - /* build ID should match length of sprintf */ -#if IS_ENABLED(CONFIG_MODULES) - static_assert(sizeof(typeof_member(struct module, build_id)) == 20); -#endif - len += sprintf(buffer + len, " %20phN", buildid); - } -#endif + if (add_buildid) + len += append_buildid(buffer + len, modname, buildid); len += sprintf(buffer + len, "]"); } diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c index 219d22857c98..8ef8167be745 100644 --- a/kernel/kcsan/kcsan_test.c +++ b/kernel/kcsan/kcsan_test.c @@ -176,7 +176,7 @@ static bool __report_matches(const struct expect_report *r) /* Title */ cur = expect[0]; - end = &expect[0][sizeof(expect[0]) - 1]; + end = ARRAY_END(expect[0]); cur += scnprintf(cur, end - cur, "BUG: KCSAN: %s in ", is_assert ? "assert: race" : "data-race"); if (r->access[1].fn) { @@ -200,7 +200,7 @@ static bool __report_matches(const struct expect_report *r) /* Access 1 */ cur = expect[1]; - end = &expect[1][sizeof(expect[1]) - 1]; + end = ARRAY_END(expect[1]); if (!r->access[1].fn) cur += scnprintf(cur, end - cur, "race at unknown origin, with "); diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index eb62a9794242..2bfbb2d144e6 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -882,6 +882,60 @@ out_free_sha_regions: } #ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY +/* + * kexec_purgatory_find_symbol - find a symbol in the purgatory + * @pi: Purgatory to search in. + * @name: Name of the symbol. + * + * Return: pointer to symbol in read-only symtab on success, NULL on error. + */ +static const Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi, + const char *name) +{ + const Elf_Shdr *sechdrs; + const Elf_Ehdr *ehdr; + const Elf_Sym *syms; + const char *strtab; + int i, k; + + if (!pi->ehdr) + return NULL; + + ehdr = pi->ehdr; + sechdrs = (void *)ehdr + ehdr->e_shoff; + + for (i = 0; i < ehdr->e_shnum; i++) { + if (sechdrs[i].sh_type != SHT_SYMTAB) + continue; + + if (sechdrs[i].sh_link >= ehdr->e_shnum) + /* Invalid strtab section number */ + continue; + strtab = (void *)ehdr + sechdrs[sechdrs[i].sh_link].sh_offset; + syms = (void *)ehdr + sechdrs[i].sh_offset; + + /* Go through symbols for a match */ + for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) { + if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL) + continue; + + if (strcmp(strtab + syms[k].st_name, name) != 0) + continue; + + if (syms[k].st_shndx == SHN_UNDEF || + syms[k].st_shndx >= ehdr->e_shnum) { + pr_debug("Symbol: %s has bad section index %d.\n", + name, syms[k].st_shndx); + return NULL; + } + + /* Found the symbol we are looking for */ + return &syms[k]; + } + } + + return NULL; +} /* * kexec_purgatory_setup_kbuf - prepare buffer to load purgatory. * @pi: Purgatory to be loaded. @@ -960,6 +1014,10 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi, unsigned long offset; size_t sechdrs_size; Elf_Shdr *sechdrs; + const Elf_Sym *entry_sym; + u16 entry_shndx = 0; + unsigned long entry_off = 0; + bool start_fixed = false; int i; /* @@ -977,6 +1035,12 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi, bss_addr = kbuf->mem + kbuf->bufsz; kbuf->image->start = pi->ehdr->e_entry; + entry_sym = kexec_purgatory_find_symbol(pi, "purgatory_start"); + if (entry_sym) { + entry_shndx = entry_sym->st_shndx; + entry_off = entry_sym->st_value; + } + for (i = 0; i < pi->ehdr->e_shnum; i++) { unsigned long align; void *src, *dst; @@ -994,6 +1058,13 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi, offset = ALIGN(offset, align); + if (!start_fixed && entry_sym && i == entry_shndx && + (sechdrs[i].sh_flags & SHF_EXECINSTR) && + entry_off < sechdrs[i].sh_size) { + kbuf->image->start = kbuf->mem + offset + entry_off; + start_fixed = true; + } + /* * Check if the segment contains the entry point, if so, * calculate the value of image->start based on it. @@ -1004,13 +1075,14 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi, * is not set to the initial value, and warn the user so they * have a chance to fix their purgatory's linker script. */ - if (sechdrs[i].sh_flags & SHF_EXECINSTR && + if (!start_fixed && sechdrs[i].sh_flags & SHF_EXECINSTR && pi->ehdr->e_entry >= sechdrs[i].sh_addr && pi->ehdr->e_entry < (sechdrs[i].sh_addr + sechdrs[i].sh_size) && - !WARN_ON(kbuf->image->start != pi->ehdr->e_entry)) { + kbuf->image->start == pi->ehdr->e_entry) { kbuf->image->start -= sechdrs[i].sh_addr; kbuf->image->start += kbuf->mem + offset; + start_fixed = true; } src = (void *)pi->ehdr + sechdrs[i].sh_offset; @@ -1128,61 +1200,6 @@ out_free_kbuf: return ret; } -/* - * kexec_purgatory_find_symbol - find a symbol in the purgatory - * @pi: Purgatory to search in. - * @name: Name of the symbol. - * - * Return: pointer to symbol in read-only symtab on success, NULL on error. - */ -static const Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi, - const char *name) -{ - const Elf_Shdr *sechdrs; - const Elf_Ehdr *ehdr; - const Elf_Sym *syms; - const char *strtab; - int i, k; - - if (!pi->ehdr) - return NULL; - - ehdr = pi->ehdr; - sechdrs = (void *)ehdr + ehdr->e_shoff; - - for (i = 0; i < ehdr->e_shnum; i++) { - if (sechdrs[i].sh_type != SHT_SYMTAB) - continue; - - if (sechdrs[i].sh_link >= ehdr->e_shnum) - /* Invalid strtab section number */ - continue; - strtab = (void *)ehdr + sechdrs[sechdrs[i].sh_link].sh_offset; - syms = (void *)ehdr + sechdrs[i].sh_offset; - - /* Go through symbols for a match */ - for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) { - if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL) - continue; - - if (strcmp(strtab + syms[k].st_name, name) != 0) - continue; - - if (syms[k].st_shndx == SHN_UNDEF || - syms[k].st_shndx >= ehdr->e_shnum) { - pr_debug("Symbol: %s has bad section index %d.\n", - name, syms[k].st_shndx); - return NULL; - } - - /* Found the symbol we are looking for */ - return &syms[k]; - } - } - - return NULL; -} - void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name) { struct purgatory_info *pi = &image->purgatory_info; diff --git a/kernel/liveupdate/Kconfig b/kernel/liveupdate/Kconfig index d2aeaf13c3ac..1a8513f16ef7 100644 --- a/kernel/liveupdate/Kconfig +++ b/kernel/liveupdate/Kconfig @@ -54,7 +54,6 @@ config KEXEC_HANDOVER_ENABLE_DEFAULT config LIVEUPDATE bool "Live Update Orchestrator" depends on KEXEC_HANDOVER - depends on SHMEM help Enable the Live Update Orchestrator. Live Update is a mechanism, typically based on kexec, that allows the kernel to be updated @@ -73,4 +72,20 @@ config LIVEUPDATE If unsure, say N. +config LIVEUPDATE_MEMFD + bool "Live update support for memfd" + depends on LIVEUPDATE + depends on MEMFD_CREATE + depends on SHMEM + default LIVEUPDATE + help + Enable live update support for memfd regions. This allows preserving + memfd-backed memory across kernel live updates. + + This can be used to back VM memory with memfds, allowing the guest + memory to persist, or for other user workloads needing to preserve + pages. + + If unsure, say N. + endmenu diff --git a/kernel/liveupdate/Makefile b/kernel/liveupdate/Makefile index 7cad2eece32d..d2f779cbe279 100644 --- a/kernel/liveupdate/Makefile +++ b/kernel/liveupdate/Makefile @@ -3,6 +3,7 @@ luo-y := \ luo_core.o \ luo_file.o \ + luo_flb.o \ luo_session.o obj-$(CONFIG_KEXEC_HANDOVER) += kexec_handover.o diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c index 90d411a59f76..fb3a7b67676e 100644 --- a/kernel/liveupdate/kexec_handover.c +++ b/kernel/liveupdate/kexec_handover.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -24,7 +25,6 @@ #include -#include "kexec_handover_internal.h" /* * KHO is tightly coupled with mm init and needs access to some of mm * internal APIs. @@ -33,10 +33,7 @@ #include "../kexec_internal.h" #include "kexec_handover_internal.h" -#define KHO_FDT_COMPATIBLE "kho-v1" -#define PROP_PRESERVED_MEMORY_MAP "preserved-memory-map" -#define PROP_SUB_FDT "fdt" - +/* The magic token for preserved pages */ #define KHO_PAGE_MAGIC 0x4b484f50U /* ASCII for 'KHOP' */ /* @@ -219,10 +216,32 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn, return 0; } +/* For physically contiguous 0-order pages. */ +static void kho_init_pages(struct page *page, unsigned long nr_pages) +{ + for (unsigned long i = 0; i < nr_pages; i++) + set_page_count(page + i, 1); +} + +static void kho_init_folio(struct page *page, unsigned int order) +{ + unsigned long nr_pages = (1 << order); + + /* Head page gets refcount of 1. */ + set_page_count(page, 1); + + /* For higher order folios, tail pages get a page count of zero. */ + for (unsigned long i = 1; i < nr_pages; i++) + set_page_count(page + i, 0); + + if (order > 0) + prep_compound_page(page, order); +} + static struct page *kho_restore_page(phys_addr_t phys, bool is_folio) { struct page *page = pfn_to_online_page(PHYS_PFN(phys)); - unsigned int nr_pages, ref_cnt; + unsigned long nr_pages; union kho_page_info info; if (!page) @@ -240,20 +259,11 @@ static struct page *kho_restore_page(phys_addr_t phys, bool is_folio) /* Clear private to make sure later restores on this page error out. */ page->private = 0; - /* Head page gets refcount of 1. */ - set_page_count(page, 1); - /* - * For higher order folios, tail pages get a page count of zero. - * For physically contiguous order-0 pages every pages gets a page - * count of 1 - */ - ref_cnt = is_folio ? 0 : 1; - for (unsigned int i = 1; i < nr_pages; i++) - set_page_count(page + i, ref_cnt); - - if (is_folio && info.order) - prep_compound_page(page, info.order); + if (is_folio) + kho_init_folio(page, info.order); + else + kho_init_pages(page, nr_pages); /* Always mark headpage's codetag as empty to avoid accounting mismatch */ clear_page_tag_ref(page); @@ -289,9 +299,9 @@ EXPORT_SYMBOL_GPL(kho_restore_folio); * Restore a contiguous list of order 0 pages that was preserved with * kho_preserve_pages(). * - * Return: 0 on success, error code on failure + * Return: the first page on success, NULL on failure. */ -struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages) +struct page *kho_restore_pages(phys_addr_t phys, unsigned long nr_pages) { const unsigned long start_pfn = PHYS_PFN(phys); const unsigned long end_pfn = start_pfn + nr_pages; @@ -386,7 +396,7 @@ static void kho_update_memory_map(struct khoser_mem_chunk *first_chunk) void *ptr; u64 phys; - ptr = fdt_getprop_w(kho_out.fdt, 0, PROP_PRESERVED_MEMORY_MAP, NULL); + ptr = fdt_getprop_w(kho_out.fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, NULL); /* Check and discard previous memory map */ phys = get_unaligned((u64 *)ptr); @@ -474,7 +484,7 @@ static phys_addr_t __init kho_get_mem_map_phys(const void *fdt) const void *mem_ptr; int len; - mem_ptr = fdt_getprop(fdt, 0, PROP_PRESERVED_MEMORY_MAP, &len); + mem_ptr = fdt_getprop(fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, &len); if (!mem_ptr || len != sizeof(u64)) { pr_err("failed to get preserved memory bitmaps\n"); return 0; @@ -645,11 +655,13 @@ static void __init kho_reserve_scratch(void) scratch_size_update(); /* FIXME: deal with node hot-plug/remove */ - kho_scratch_cnt = num_online_nodes() + 2; + kho_scratch_cnt = nodes_weight(node_states[N_MEMORY]) + 2; size = kho_scratch_cnt * sizeof(*kho_scratch); kho_scratch = memblock_alloc(size, PAGE_SIZE); - if (!kho_scratch) + if (!kho_scratch) { + pr_err("Failed to reserve scratch array\n"); goto err_disable_kho; + } /* * reserve scratch area in low memory for lowmem allocations in the @@ -658,8 +670,10 @@ static void __init kho_reserve_scratch(void) size = scratch_size_lowmem; addr = memblock_phys_alloc_range(size, CMA_MIN_ALIGNMENT_BYTES, 0, ARCH_LOW_ADDRESS_LIMIT); - if (!addr) + if (!addr) { + pr_err("Failed to reserve lowmem scratch buffer\n"); goto err_free_scratch_desc; + } kho_scratch[i].addr = addr; kho_scratch[i].size = size; @@ -668,20 +682,28 @@ static void __init kho_reserve_scratch(void) /* reserve large contiguous area for allocations without nid */ size = scratch_size_global; addr = memblock_phys_alloc(size, CMA_MIN_ALIGNMENT_BYTES); - if (!addr) + if (!addr) { + pr_err("Failed to reserve global scratch buffer\n"); goto err_free_scratch_areas; + } kho_scratch[i].addr = addr; kho_scratch[i].size = size; i++; - for_each_online_node(nid) { + /* + * Loop over nodes that have both memory and are online. Skip + * memoryless nodes, as we can not allocate scratch areas there. + */ + for_each_node_state(nid, N_MEMORY) { size = scratch_size_node(nid); addr = memblock_alloc_range_nid(size, CMA_MIN_ALIGNMENT_BYTES, 0, MEMBLOCK_ALLOC_ACCESSIBLE, nid, true); - if (!addr) + if (!addr) { + pr_err("Failed to reserve nid %d scratch buffer\n", nid); goto err_free_scratch_areas; + } kho_scratch[i].addr = addr; kho_scratch[i].size = size; @@ -735,7 +757,8 @@ int kho_add_subtree(const char *name, void *fdt) goto out_pack; } - err = fdt_setprop(root_fdt, off, PROP_SUB_FDT, &phys, sizeof(phys)); + err = fdt_setprop(root_fdt, off, KHO_FDT_SUB_TREE_PROP_NAME, + &phys, sizeof(phys)); if (err < 0) goto out_pack; @@ -766,7 +789,7 @@ void kho_remove_subtree(void *fdt) const u64 *val; int len; - val = fdt_getprop(root_fdt, off, PROP_SUB_FDT, &len); + val = fdt_getprop(root_fdt, off, KHO_FDT_SUB_TREE_PROP_NAME, &len); if (!val || len != sizeof(phys_addr_t)) continue; @@ -831,7 +854,7 @@ EXPORT_SYMBOL_GPL(kho_unpreserve_folio); * * Return: 0 on success, error code on failure */ -int kho_preserve_pages(struct page *page, unsigned int nr_pages) +int kho_preserve_pages(struct page *page, unsigned long nr_pages) { struct kho_mem_track *track = &kho_out.track; const unsigned long start_pfn = page_to_pfn(page); @@ -875,7 +898,7 @@ EXPORT_SYMBOL_GPL(kho_preserve_pages); * kho_preserve_pages() call. Unpreserving arbitrary sub-ranges of larger * preserved blocks is not supported. */ -void kho_unpreserve_pages(struct page *page, unsigned int nr_pages) +void kho_unpreserve_pages(struct page *page, unsigned long nr_pages) { struct kho_mem_track *track = &kho_out.track; const unsigned long start_pfn = page_to_pfn(page); @@ -885,21 +908,6 @@ void kho_unpreserve_pages(struct page *page, unsigned int nr_pages) } EXPORT_SYMBOL_GPL(kho_unpreserve_pages); -struct kho_vmalloc_hdr { - DECLARE_KHOSER_PTR(next, struct kho_vmalloc_chunk *); -}; - -#define KHO_VMALLOC_SIZE \ - ((PAGE_SIZE - sizeof(struct kho_vmalloc_hdr)) / \ - sizeof(phys_addr_t)) - -struct kho_vmalloc_chunk { - struct kho_vmalloc_hdr hdr; - phys_addr_t phys[KHO_VMALLOC_SIZE]; -}; - -static_assert(sizeof(struct kho_vmalloc_chunk) == PAGE_SIZE); - /* vmalloc flags KHO supports */ #define KHO_VMALLOC_SUPPORTED_FLAGS (VM_ALLOC | VM_ALLOW_HUGE_VMAP) @@ -1315,7 +1323,7 @@ int kho_retrieve_subtree(const char *name, phys_addr_t *phys) if (offset < 0) return -ENOENT; - val = fdt_getprop(fdt, offset, PROP_SUB_FDT, &len); + val = fdt_getprop(fdt, offset, KHO_FDT_SUB_TREE_PROP_NAME, &len); if (!val || len != sizeof(*val)) return -EINVAL; @@ -1335,7 +1343,7 @@ static __init int kho_out_fdt_setup(void) err |= fdt_finish_reservemap(root); err |= fdt_begin_node(root, ""); err |= fdt_property_string(root, "compatible", KHO_FDT_COMPATIBLE); - err |= fdt_property(root, PROP_PRESERVED_MEMORY_MAP, &empty_mem_map, + err |= fdt_property(root, KHO_FDT_MEMORY_MAP_PROP_NAME, &empty_mem_map, sizeof(empty_mem_map)); err |= fdt_end_node(root); err |= fdt_finish(root); @@ -1451,46 +1459,40 @@ void __init kho_memory_init(void) void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len, phys_addr_t scratch_phys, u64 scratch_len) { + unsigned int scratch_cnt = scratch_len / sizeof(*kho_scratch); struct kho_scratch *scratch = NULL; phys_addr_t mem_map_phys; void *fdt = NULL; - int err = 0; - unsigned int scratch_cnt = scratch_len / sizeof(*kho_scratch); + int err; /* Validate the input FDT */ fdt = early_memremap(fdt_phys, fdt_len); if (!fdt) { pr_warn("setup: failed to memremap FDT (0x%llx)\n", fdt_phys); - err = -EFAULT; - goto out; + goto err_report; } err = fdt_check_header(fdt); if (err) { pr_warn("setup: handover FDT (0x%llx) is invalid: %d\n", fdt_phys, err); - err = -EINVAL; - goto out; + goto err_unmap_fdt; } err = fdt_node_check_compatible(fdt, 0, KHO_FDT_COMPATIBLE); if (err) { pr_warn("setup: handover FDT (0x%llx) is incompatible with '%s': %d\n", fdt_phys, KHO_FDT_COMPATIBLE, err); - err = -EINVAL; - goto out; + goto err_unmap_fdt; } mem_map_phys = kho_get_mem_map_phys(fdt); - if (!mem_map_phys) { - err = -ENOENT; - goto out; - } + if (!mem_map_phys) + goto err_unmap_fdt; scratch = early_memremap(scratch_phys, scratch_len); if (!scratch) { pr_warn("setup: failed to memremap scratch (phys=0x%llx, len=%lld)\n", scratch_phys, scratch_len); - err = -EFAULT; - goto out; + goto err_unmap_fdt; } /* @@ -1507,7 +1509,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len, if (WARN_ON(err)) { pr_warn("failed to mark the scratch region 0x%pa+0x%pa: %pe", &area->addr, &size, ERR_PTR(err)); - goto out; + goto err_unmap_scratch; } pr_debug("Marked 0x%pa+0x%pa as scratch", &area->addr, &size); } @@ -1529,13 +1531,14 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len, kho_scratch_cnt = scratch_cnt; pr_info("found kexec handover data.\n"); -out: - if (fdt) - early_memunmap(fdt, fdt_len); - if (scratch) - early_memunmap(scratch, scratch_len); - if (err) - pr_warn("disabling KHO revival: %d\n", err); + return; + +err_unmap_scratch: + early_memunmap(scratch, scratch_len); +err_unmap_fdt: + early_memunmap(fdt, fdt_len); +err_report: + pr_warn("disabling KHO revival\n"); } /* Helper functions for kexec_file_load */ diff --git a/kernel/liveupdate/luo_core.c b/kernel/liveupdate/luo_core.c index 944663d99dd9..dda7bb57d421 100644 --- a/kernel/liveupdate/luo_core.c +++ b/kernel/liveupdate/luo_core.c @@ -35,8 +35,7 @@ * iommu, interrupts, vfio, participating filesystems, and memory management. * * LUO uses Kexec Handover to transfer memory state from the current kernel to - * the next kernel. For more details see - * Documentation/core-api/kho/concepts.rst. + * the next kernel. For more details see Documentation/core-api/kho/index.rst. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -128,7 +127,9 @@ static int __init luo_early_startup(void) if (err) return err; - return 0; + err = luo_flb_setup_incoming(luo_global.fdt_in); + + return err; } static int __init liveupdate_early_init(void) @@ -165,6 +166,7 @@ static int __init luo_fdt_setup(void) err |= fdt_property_string(fdt_out, "compatible", LUO_FDT_COMPATIBLE); err |= fdt_property(fdt_out, LUO_FDT_LIVEUPDATE_NUM, &ln, sizeof(ln)); err |= luo_session_setup_outgoing(fdt_out); + err |= luo_flb_setup_outgoing(fdt_out); err |= fdt_end_node(fdt_out); err |= fdt_finish(fdt_out); if (err) @@ -226,6 +228,8 @@ int liveupdate_reboot(void) if (err) return err; + luo_flb_serialize(); + err = kho_finalize(); if (err) { pr_err("kho_finalize failed %d\n", err); diff --git a/kernel/liveupdate/luo_file.c b/kernel/liveupdate/luo_file.c index 9f7283379ebc..4c7df52a6507 100644 --- a/kernel/liveupdate/luo_file.c +++ b/kernel/liveupdate/luo_file.c @@ -104,6 +104,7 @@ #include #include #include +#include #include #include #include @@ -273,7 +274,7 @@ int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd) goto err_fput; err = -ENOENT; - luo_list_for_each_private(fh, &luo_file_handler_list, list) { + list_private_for_each_entry(fh, &luo_file_handler_list, list) { if (fh->ops->can_preserve(fh, file)) { err = 0; break; @@ -284,10 +285,14 @@ int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd) if (err) goto err_free_files_mem; + err = luo_flb_file_preserve(fh); + if (err) + goto err_free_files_mem; + luo_file = kzalloc(sizeof(*luo_file), GFP_KERNEL); if (!luo_file) { err = -ENOMEM; - goto err_free_files_mem; + goto err_flb_unpreserve; } luo_file->file = file; @@ -311,6 +316,8 @@ int luo_preserve_file(struct luo_file_set *file_set, u64 token, int fd) err_kfree: kfree(luo_file); +err_flb_unpreserve: + luo_flb_file_unpreserve(fh); err_free_files_mem: luo_free_files_mem(file_set); err_fput: @@ -352,6 +359,7 @@ void luo_file_unpreserve_files(struct luo_file_set *file_set) args.serialized_data = luo_file->serialized_data; args.private_data = luo_file->private_data; luo_file->fh->ops->unpreserve(&args); + luo_flb_file_unpreserve(luo_file->fh); list_del(&luo_file->list); file_set->count--; @@ -627,6 +635,7 @@ static void luo_file_finish_one(struct luo_file_set *file_set, args.retrieved = luo_file->retrieved; luo_file->fh->ops->finish(&args); + luo_flb_file_finish(luo_file->fh); } /** @@ -758,7 +767,7 @@ int luo_file_deserialize(struct luo_file_set *file_set, bool handler_found = false; struct luo_file *luo_file; - luo_list_for_each_private(fh, &luo_file_handler_list, list) { + list_private_for_each_entry(fh, &luo_file_handler_list, list) { if (!strcmp(fh->compatible, file_ser[i].compatible)) { handler_found = true; break; @@ -833,7 +842,7 @@ int liveupdate_register_file_handler(struct liveupdate_file_handler *fh) return -EBUSY; /* Check for duplicate compatible strings */ - luo_list_for_each_private(fh_iter, &luo_file_handler_list, list) { + list_private_for_each_entry(fh_iter, &luo_file_handler_list, list) { if (!strcmp(fh_iter->compatible, fh->compatible)) { pr_err("File handler registration failed: Compatible string '%s' already registered.\n", fh->compatible); @@ -848,10 +857,13 @@ int liveupdate_register_file_handler(struct liveupdate_file_handler *fh) goto err_resume; } + INIT_LIST_HEAD(&ACCESS_PRIVATE(fh, flb_list)); INIT_LIST_HEAD(&ACCESS_PRIVATE(fh, list)); list_add_tail(&ACCESS_PRIVATE(fh, list), &luo_file_handler_list); luo_session_resume(); + liveupdate_test_register(fh); + return 0; err_resume: @@ -868,23 +880,38 @@ err_resume: * * It ensures safe removal by checking that: * No live update session is currently in progress. + * No FLB registered with this file handler. * * If the unregistration fails, the internal test state is reverted. * * Return: 0 Success. -EOPNOTSUPP when live update is not enabled. -EBUSY A live - * update is in progress, can't quiesce live update. + * update is in progress, can't quiesce live update or FLB is registred with + * this file handler. */ int liveupdate_unregister_file_handler(struct liveupdate_file_handler *fh) { + int err = -EBUSY; + if (!liveupdate_enabled()) return -EOPNOTSUPP; + liveupdate_test_unregister(fh); + if (!luo_session_quiesce()) - return -EBUSY; + goto err_register; + + if (!list_empty(&ACCESS_PRIVATE(fh, flb_list))) + goto err_resume; list_del(&ACCESS_PRIVATE(fh, list)); module_put(fh->ops->owner); luo_session_resume(); return 0; + +err_resume: + luo_session_resume(); +err_register: + liveupdate_test_register(fh); + return err; } diff --git a/kernel/liveupdate/luo_flb.c b/kernel/liveupdate/luo_flb.c new file mode 100644 index 000000000000..4c437de5c0b0 --- /dev/null +++ b/kernel/liveupdate/luo_flb.c @@ -0,0 +1,654 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (c) 2025, Google LLC. + * Pasha Tatashin + */ + +/** + * DOC: LUO File Lifecycle Bound Global Data + * + * File-Lifecycle-Bound (FLB) objects provide a mechanism for managing global + * state that is shared across multiple live-updatable files. The lifecycle of + * this shared state is tied to the preservation of the files that depend on it. + * + * An FLB represents a global resource, such as the IOMMU core state, that is + * required by multiple file descriptors (e.g., all VFIO fds). + * + * The preservation of the FLB's state is triggered when the *first* file + * depending on it is preserved. The cleanup of this state (unpreserve or + * finish) is triggered when the *last* file depending on it is unpreserved or + * finished. + * + * Handler Dependency: A file handler declares its dependency on one or more + * FLBs by registering them via liveupdate_register_flb(). + * + * Callback Model: Each FLB is defined by a set of operations + * (&struct liveupdate_flb_ops) that LUO invokes at key points: + * + * - .preserve(): Called for the first file. Saves global state. + * - .unpreserve(): Called for the last file (if aborted pre-reboot). + * - .retrieve(): Called on-demand in the new kernel to restore the state. + * - .finish(): Called for the last file in the new kernel for cleanup. + * + * This reference-counted approach ensures that shared state is saved exactly + * once and restored exactly once, regardless of how many files depend on it, + * and that its lifecycle is correctly managed across the kexec transition. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "luo_internal.h" + +#define LUO_FLB_PGCNT 1ul +#define LUO_FLB_MAX (((LUO_FLB_PGCNT << PAGE_SHIFT) - \ + sizeof(struct luo_flb_header_ser)) / sizeof(struct luo_flb_ser)) + +struct luo_flb_header { + struct luo_flb_header_ser *header_ser; + struct luo_flb_ser *ser; + bool active; +}; + +struct luo_flb_global { + struct luo_flb_header incoming; + struct luo_flb_header outgoing; + struct list_head list; + long count; +}; + +static struct luo_flb_global luo_flb_global = { + .list = LIST_HEAD_INIT(luo_flb_global.list), +}; + +/* + * struct luo_flb_link - Links an FLB definition to a file handler's internal + * list of dependencies. + * @flb: A pointer to the registered &struct liveupdate_flb definition. + * @list: The list_head for linking. + */ +struct luo_flb_link { + struct liveupdate_flb *flb; + struct list_head list; +}; + +/* luo_flb_get_private - Access private field, and if needed initialize it. */ +static struct luo_flb_private *luo_flb_get_private(struct liveupdate_flb *flb) +{ + struct luo_flb_private *private = &ACCESS_PRIVATE(flb, private); + + if (!private->initialized) { + mutex_init(&private->incoming.lock); + mutex_init(&private->outgoing.lock); + INIT_LIST_HEAD(&private->list); + private->users = 0; + private->initialized = true; + } + + return private; +} + +static int luo_flb_file_preserve_one(struct liveupdate_flb *flb) +{ + struct luo_flb_private *private = luo_flb_get_private(flb); + + scoped_guard(mutex, &private->outgoing.lock) { + if (!private->outgoing.count) { + struct liveupdate_flb_op_args args = {0}; + int err; + + args.flb = flb; + err = flb->ops->preserve(&args); + if (err) + return err; + private->outgoing.data = args.data; + private->outgoing.obj = args.obj; + } + private->outgoing.count++; + } + + return 0; +} + +static void luo_flb_file_unpreserve_one(struct liveupdate_flb *flb) +{ + struct luo_flb_private *private = luo_flb_get_private(flb); + + scoped_guard(mutex, &private->outgoing.lock) { + private->outgoing.count--; + if (!private->outgoing.count) { + struct liveupdate_flb_op_args args = {0}; + + args.flb = flb; + args.data = private->outgoing.data; + args.obj = private->outgoing.obj; + + if (flb->ops->unpreserve) + flb->ops->unpreserve(&args); + + private->outgoing.data = 0; + private->outgoing.obj = NULL; + } + } +} + +static int luo_flb_retrieve_one(struct liveupdate_flb *flb) +{ + struct luo_flb_private *private = luo_flb_get_private(flb); + struct luo_flb_header *fh = &luo_flb_global.incoming; + struct liveupdate_flb_op_args args = {0}; + bool found = false; + int err; + + guard(mutex)(&private->incoming.lock); + + if (private->incoming.finished) + return -ENODATA; + + if (private->incoming.retrieved) + return 0; + + if (!fh->active) + return -ENODATA; + + for (int i = 0; i < fh->header_ser->count; i++) { + if (!strcmp(fh->ser[i].name, flb->compatible)) { + private->incoming.data = fh->ser[i].data; + private->incoming.count = fh->ser[i].count; + found = true; + break; + } + } + + if (!found) + return -ENOENT; + + args.flb = flb; + args.data = private->incoming.data; + + err = flb->ops->retrieve(&args); + if (err) + return err; + + private->incoming.obj = args.obj; + private->incoming.retrieved = true; + + return 0; +} + +static void luo_flb_file_finish_one(struct liveupdate_flb *flb) +{ + struct luo_flb_private *private = luo_flb_get_private(flb); + u64 count; + + scoped_guard(mutex, &private->incoming.lock) + count = --private->incoming.count; + + if (!count) { + struct liveupdate_flb_op_args args = {0}; + + if (!private->incoming.retrieved) { + int err = luo_flb_retrieve_one(flb); + + if (WARN_ON(err)) + return; + } + + scoped_guard(mutex, &private->incoming.lock) { + args.flb = flb; + args.obj = private->incoming.obj; + flb->ops->finish(&args); + + private->incoming.data = 0; + private->incoming.obj = NULL; + private->incoming.finished = true; + } + } +} + +/** + * luo_flb_file_preserve - Notifies FLBs that a file is about to be preserved. + * @fh: The file handler for the preserved file. + * + * This function iterates through all FLBs associated with the given file + * handler. It increments the reference count for each FLB. If the count becomes + * 1, it triggers the FLB's .preserve() callback to save the global state. + * + * This operation is atomic. If any FLB's .preserve() op fails, it will roll + * back by calling .unpreserve() on any FLBs that were successfully preserved + * during this call. + * + * Context: Called from luo_preserve_file() + * Return: 0 on success, or a negative errno on failure. + */ +int luo_flb_file_preserve(struct liveupdate_file_handler *fh) +{ + struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list); + struct luo_flb_link *iter; + int err = 0; + + list_for_each_entry(iter, flb_list, list) { + err = luo_flb_file_preserve_one(iter->flb); + if (err) + goto exit_err; + } + + return 0; + +exit_err: + list_for_each_entry_continue_reverse(iter, flb_list, list) + luo_flb_file_unpreserve_one(iter->flb); + + return err; +} + +/** + * luo_flb_file_unpreserve - Notifies FLBs that a dependent file was unpreserved. + * @fh: The file handler for the unpreserved file. + * + * This function iterates through all FLBs associated with the given file + * handler, in reverse order of registration. It decrements the reference count + * for each FLB. If the count becomes 0, it triggers the FLB's .unpreserve() + * callback to clean up the global state. + * + * Context: Called when a preserved file is being cleaned up before reboot + * (e.g., from luo_file_unpreserve_files()). + */ +void luo_flb_file_unpreserve(struct liveupdate_file_handler *fh) +{ + struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list); + struct luo_flb_link *iter; + + list_for_each_entry_reverse(iter, flb_list, list) + luo_flb_file_unpreserve_one(iter->flb); +} + +/** + * luo_flb_file_finish - Notifies FLBs that a dependent file has been finished. + * @fh: The file handler for the finished file. + * + * This function iterates through all FLBs associated with the given file + * handler, in reverse order of registration. It decrements the incoming + * reference count for each FLB. If the count becomes 0, it triggers the FLB's + * .finish() callback for final cleanup in the new kernel. + * + * Context: Called from luo_file_finish() for each file being finished. + */ +void luo_flb_file_finish(struct liveupdate_file_handler *fh) +{ + struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list); + struct luo_flb_link *iter; + + list_for_each_entry_reverse(iter, flb_list, list) + luo_flb_file_finish_one(iter->flb); +} + +/** + * liveupdate_register_flb - Associate an FLB with a file handler and register it globally. + * @fh: The file handler that will now depend on the FLB. + * @flb: The File-Lifecycle-Bound object to associate. + * + * Establishes a dependency, informing the LUO core that whenever a file of + * type @fh is preserved, the state of @flb must also be managed. + * + * On the first registration of a given @flb object, it is added to a global + * registry. This function checks for duplicate registrations, both for a + * specific handler and globally, and ensures the total number of unique + * FLBs does not exceed the system limit. + * + * Context: Typically called from a subsystem's module init function after + * both the handler and the FLB have been defined and initialized. + * Return: 0 on success. Returns a negative errno on failure: + * -EINVAL if arguments are NULL or not initialized. + * -ENOMEM on memory allocation failure. + * -EEXIST if this FLB is already registered with this handler. + * -ENOSPC if the maximum number of global FLBs has been reached. + * -EOPNOTSUPP if live update is disabled or not configured. + */ +int liveupdate_register_flb(struct liveupdate_file_handler *fh, + struct liveupdate_flb *flb) +{ + struct luo_flb_private *private = luo_flb_get_private(flb); + struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list); + struct luo_flb_link *link __free(kfree) = NULL; + struct liveupdate_flb *gflb; + struct luo_flb_link *iter; + int err; + + if (!liveupdate_enabled()) + return -EOPNOTSUPP; + + if (WARN_ON(!flb->ops->preserve || !flb->ops->unpreserve || + !flb->ops->retrieve || !flb->ops->finish)) { + return -EINVAL; + } + + /* + * File handler must already be registered, as it initializes the + * flb_list + */ + if (WARN_ON(list_empty(&ACCESS_PRIVATE(fh, list)))) + return -EINVAL; + + link = kzalloc(sizeof(*link), GFP_KERNEL); + if (!link) + return -ENOMEM; + + /* + * Ensure the system is quiescent (no active sessions). + * This acts as a global lock for registration: no other thread can + * be in this section, and no sessions can be creating/using FDs. + */ + if (!luo_session_quiesce()) + return -EBUSY; + + /* Check that this FLB is not already linked to this file handler */ + err = -EEXIST; + list_for_each_entry(iter, flb_list, list) { + if (iter->flb == flb) + goto err_resume; + } + + /* + * If this FLB is not linked to global list it's the first time the FLB + * is registered + */ + if (!private->users) { + if (WARN_ON(!list_empty(&private->list))) { + err = -EINVAL; + goto err_resume; + } + + if (luo_flb_global.count == LUO_FLB_MAX) { + err = -ENOSPC; + goto err_resume; + } + + /* Check that compatible string is unique in global list */ + list_private_for_each_entry(gflb, &luo_flb_global.list, private.list) { + if (!strcmp(gflb->compatible, flb->compatible)) + goto err_resume; + } + + if (!try_module_get(flb->ops->owner)) { + err = -EAGAIN; + goto err_resume; + } + + list_add_tail(&private->list, &luo_flb_global.list); + luo_flb_global.count++; + } + + /* Finally, link the FLB to the file handler */ + private->users++; + link->flb = flb; + list_add_tail(&no_free_ptr(link)->list, flb_list); + luo_session_resume(); + + return 0; + +err_resume: + luo_session_resume(); + return err; +} + +/** + * liveupdate_unregister_flb - Remove an FLB dependency from a file handler. + * @fh: The file handler that is currently depending on the FLB. + * @flb: The File-Lifecycle-Bound object to remove. + * + * Removes the association between the specified file handler and the FLB + * previously established by liveupdate_register_flb(). + * + * This function manages the global lifecycle of the FLB. It decrements the + * FLB's usage count. If this was the last file handler referencing this FLB, + * the FLB is removed from the global registry and the reference to its + * owner module (acquired during registration) is released. + * + * Context: This function ensures the session is quiesced (no active FDs + * being created) during the update. It is typically called from a + * subsystem's module exit function. + * Return: 0 on success. + * -EOPNOTSUPP if live update is disabled. + * -EBUSY if the live update session is active and cannot be quiesced. + * -ENOENT if the FLB was not found in the file handler's list. + */ +int liveupdate_unregister_flb(struct liveupdate_file_handler *fh, + struct liveupdate_flb *flb) +{ + struct luo_flb_private *private = luo_flb_get_private(flb); + struct list_head *flb_list = &ACCESS_PRIVATE(fh, flb_list); + struct luo_flb_link *iter; + int err = -ENOENT; + + if (!liveupdate_enabled()) + return -EOPNOTSUPP; + + /* + * Ensure the system is quiescent (no active sessions). + * This acts as a global lock for unregistration. + */ + if (!luo_session_quiesce()) + return -EBUSY; + + /* Find and remove the link from the file handler's list */ + list_for_each_entry(iter, flb_list, list) { + if (iter->flb == flb) { + list_del(&iter->list); + kfree(iter); + err = 0; + break; + } + } + + if (err) + goto err_resume; + + private->users--; + /* + * If this is the last file-handler with which we are registred, remove + * from the global list, and relese module reference. + */ + if (!private->users) { + list_del_init(&private->list); + luo_flb_global.count--; + module_put(flb->ops->owner); + } + + luo_session_resume(); + + return 0; + +err_resume: + luo_session_resume(); + return err; +} + +/** + * liveupdate_flb_get_incoming - Retrieve the incoming FLB object. + * @flb: The FLB definition. + * @objp: Output parameter; will be populated with the live shared object. + * + * Returns a pointer to its shared live object for the incoming (post-reboot) + * path. + * + * If this is the first time the object is requested in the new kernel, this + * function will trigger the FLB's .retrieve() callback to reconstruct the + * object from its preserved state. Subsequent calls will return the same + * cached object. + * + * Return: 0 on success, or a negative errno on failure. -ENODATA means no + * incoming FLB data, -ENOENT means specific flb not found in the incoming + * data, and -EOPNOTSUPP when live update is disabled or not configured. + */ +int liveupdate_flb_get_incoming(struct liveupdate_flb *flb, void **objp) +{ + struct luo_flb_private *private = luo_flb_get_private(flb); + + if (!liveupdate_enabled()) + return -EOPNOTSUPP; + + if (!private->incoming.obj) { + int err = luo_flb_retrieve_one(flb); + + if (err) + return err; + } + + guard(mutex)(&private->incoming.lock); + *objp = private->incoming.obj; + + return 0; +} + +/** + * liveupdate_flb_get_outgoing - Retrieve the outgoing FLB object. + * @flb: The FLB definition. + * @objp: Output parameter; will be populated with the live shared object. + * + * Returns a pointer to its shared live object for the outgoing (pre-reboot) + * path. + * + * This function assumes the object has already been created by the FLB's + * .preserve() callback, which is triggered when the first dependent file + * is preserved. + * + * Return: 0 on success, or a negative errno on failure. + */ +int liveupdate_flb_get_outgoing(struct liveupdate_flb *flb, void **objp) +{ + struct luo_flb_private *private = luo_flb_get_private(flb); + + if (!liveupdate_enabled()) + return -EOPNOTSUPP; + + guard(mutex)(&private->outgoing.lock); + *objp = private->outgoing.obj; + + return 0; +} + +int __init luo_flb_setup_outgoing(void *fdt_out) +{ + struct luo_flb_header_ser *header_ser; + u64 header_ser_pa; + int err; + + header_ser = kho_alloc_preserve(LUO_FLB_PGCNT << PAGE_SHIFT); + if (IS_ERR(header_ser)) + return PTR_ERR(header_ser); + + header_ser_pa = virt_to_phys(header_ser); + + err = fdt_begin_node(fdt_out, LUO_FDT_FLB_NODE_NAME); + err |= fdt_property_string(fdt_out, "compatible", + LUO_FDT_FLB_COMPATIBLE); + err |= fdt_property(fdt_out, LUO_FDT_FLB_HEADER, &header_ser_pa, + sizeof(header_ser_pa)); + err |= fdt_end_node(fdt_out); + + if (err) + goto err_unpreserve; + + header_ser->pgcnt = LUO_FLB_PGCNT; + luo_flb_global.outgoing.header_ser = header_ser; + luo_flb_global.outgoing.ser = (void *)(header_ser + 1); + luo_flb_global.outgoing.active = true; + + return 0; + +err_unpreserve: + kho_unpreserve_free(header_ser); + + return err; +} + +int __init luo_flb_setup_incoming(void *fdt_in) +{ + struct luo_flb_header_ser *header_ser; + int err, header_size, offset; + const void *ptr; + u64 header_ser_pa; + + offset = fdt_subnode_offset(fdt_in, 0, LUO_FDT_FLB_NODE_NAME); + if (offset < 0) { + pr_err("Unable to get FLB node [%s]\n", LUO_FDT_FLB_NODE_NAME); + + return -ENOENT; + } + + err = fdt_node_check_compatible(fdt_in, offset, + LUO_FDT_FLB_COMPATIBLE); + if (err) { + pr_err("FLB node is incompatible with '%s' [%d]\n", + LUO_FDT_FLB_COMPATIBLE, err); + + return -EINVAL; + } + + header_size = 0; + ptr = fdt_getprop(fdt_in, offset, LUO_FDT_FLB_HEADER, &header_size); + if (!ptr || header_size != sizeof(u64)) { + pr_err("Unable to get FLB header property '%s' [%d]\n", + LUO_FDT_FLB_HEADER, header_size); + + return -EINVAL; + } + + header_ser_pa = get_unaligned((u64 *)ptr); + header_ser = phys_to_virt(header_ser_pa); + + luo_flb_global.incoming.header_ser = header_ser; + luo_flb_global.incoming.ser = (void *)(header_ser + 1); + luo_flb_global.incoming.active = true; + + return 0; +} + +/** + * luo_flb_serialize - Serializes all active FLB objects for KHO. + * + * This function is called from the reboot path. It iterates through all + * registered File-Lifecycle-Bound (FLB) objects. For each FLB that has been + * preserved (i.e., its reference count is greater than zero), it writes its + * metadata into the memory region designated for Kexec Handover. + * + * The serialized data includes the FLB's compatibility string, its opaque + * data handle, and the final reference count. This allows the new kernel to + * find the appropriate handler and reconstruct the FLB's state. + * + * Context: Called from liveupdate_reboot() just before kho_finalize(). + */ +void luo_flb_serialize(void) +{ + struct luo_flb_header *fh = &luo_flb_global.outgoing; + struct liveupdate_flb *gflb; + int i = 0; + + list_private_for_each_entry(gflb, &luo_flb_global.list, private.list) { + struct luo_flb_private *private = luo_flb_get_private(gflb); + + if (private->outgoing.count > 0) { + strscpy(fh->ser[i].name, gflb->compatible, + sizeof(fh->ser[i].name)); + fh->ser[i].data = private->outgoing.data; + fh->ser[i].count = private->outgoing.count; + i++; + } + } + + fh->header_ser->count = i; +} diff --git a/kernel/liveupdate/luo_internal.h b/kernel/liveupdate/luo_internal.h index c8973b543d1d..8083d8739b09 100644 --- a/kernel/liveupdate/luo_internal.h +++ b/kernel/liveupdate/luo_internal.h @@ -40,13 +40,6 @@ static inline int luo_ucmd_respond(struct luo_ucmd *ucmd, */ #define luo_restore_fail(__fmt, ...) panic(__fmt, ##__VA_ARGS__) -/* Mimics list_for_each_entry() but for private list head entries */ -#define luo_list_for_each_private(pos, head, member) \ - for (struct list_head *__iter = (head)->next; \ - __iter != (head) && \ - ({ pos = container_of(__iter, typeof(*(pos)), member); 1; }); \ - __iter = __iter->next) - /** * struct luo_file_set - A set of files that belong to the same sessions. * @files_list: An ordered list of files associated with this session, it is @@ -107,4 +100,19 @@ int luo_file_deserialize(struct luo_file_set *file_set, void luo_file_set_init(struct luo_file_set *file_set); void luo_file_set_destroy(struct luo_file_set *file_set); +int luo_flb_file_preserve(struct liveupdate_file_handler *fh); +void luo_flb_file_unpreserve(struct liveupdate_file_handler *fh); +void luo_flb_file_finish(struct liveupdate_file_handler *fh); +int __init luo_flb_setup_outgoing(void *fdt); +int __init luo_flb_setup_incoming(void *fdt); +void luo_flb_serialize(void); + +#ifdef CONFIG_LIVEUPDATE_TEST +void liveupdate_test_register(struct liveupdate_file_handler *fh); +void liveupdate_test_unregister(struct liveupdate_file_handler *fh); +#else +static inline void liveupdate_test_register(struct liveupdate_file_handler *fh) { } +static inline void liveupdate_test_unregister(struct liveupdate_file_handler *fh) { } +#endif + #endif /* _LINUX_LUO_INTERNAL_H */ diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c index 00a60796327c..0fc11e45df9b 100644 --- a/kernel/module/kallsyms.c +++ b/kernel/module/kallsyms.c @@ -334,13 +334,8 @@ int module_address_lookup(unsigned long addr, if (mod) { if (modname) *modname = mod->name; - if (modbuildid) { -#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) - *modbuildid = mod->build_id; -#else - *modbuildid = NULL; -#endif - } + if (modbuildid) + *modbuildid = module_buildid(mod); sym = find_kallsyms_symbol(mod, addr, size, offset); diff --git a/kernel/panic.c b/kernel/panic.c index 0c20fcaae98a..c78600212b6c 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -42,6 +42,7 @@ #define PANIC_TIMER_STEP 100 #define PANIC_BLINK_SPD 18 +#define PANIC_MSG_BUFSZ 1024 #ifdef CONFIG_SMP /* @@ -74,6 +75,8 @@ EXPORT_SYMBOL_GPL(panic_timeout); unsigned long panic_print; +static int panic_force_cpu = -1; + ATOMIC_NOTIFIER_HEAD(panic_notifier_list); EXPORT_SYMBOL(panic_notifier_list); @@ -300,6 +303,150 @@ void __weak crash_smp_send_stop(void) } atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID); +atomic_t panic_redirect_cpu = ATOMIC_INIT(PANIC_CPU_INVALID); + +#if defined(CONFIG_SMP) && defined(CONFIG_CRASH_DUMP) +static char *panic_force_buf; + +static int __init panic_force_cpu_setup(char *str) +{ + int cpu; + + if (!str) + return -EINVAL; + + if (kstrtoint(str, 0, &cpu) || cpu < 0 || cpu >= nr_cpu_ids) { + pr_warn("panic_force_cpu: invalid value '%s'\n", str); + return -EINVAL; + } + + panic_force_cpu = cpu; + return 0; +} +early_param("panic_force_cpu", panic_force_cpu_setup); + +static int __init panic_force_cpu_late_init(void) +{ + if (panic_force_cpu < 0) + return 0; + + panic_force_buf = kmalloc(PANIC_MSG_BUFSZ, GFP_KERNEL); + + return 0; +} +late_initcall(panic_force_cpu_late_init); + +static void do_panic_on_target_cpu(void *info) +{ + panic("%s", (char *)info); +} + +/** + * panic_smp_redirect_cpu - Redirect panic to target CPU + * @target_cpu: CPU that should handle the panic + * @msg: formatted panic message + * + * Default implementation uses IPI. Architectures with NMI support + * can override this for more reliable delivery. + * + * Return: 0 on success, negative errno on failure + */ +int __weak panic_smp_redirect_cpu(int target_cpu, void *msg) +{ + static call_single_data_t panic_csd; + + panic_csd.func = do_panic_on_target_cpu; + panic_csd.info = msg; + + return smp_call_function_single_async(target_cpu, &panic_csd); +} + +/** + * panic_try_force_cpu - Redirect panic to a specific CPU for crash kernel + * @fmt: panic message format string + * @args: arguments for format string + * + * Some platforms require panic handling to occur on a specific CPU + * for the crash kernel to function correctly. This function redirects + * panic handling to the CPU specified via the panic_force_cpu= boot parameter. + * + * Returns false if panic should proceed on current CPU. + * Returns true if panic was redirected. + */ +__printf(1, 0) +static bool panic_try_force_cpu(const char *fmt, va_list args) +{ + int this_cpu = raw_smp_processor_id(); + int old_cpu = PANIC_CPU_INVALID; + const char *msg; + + /* Feature not enabled via boot parameter */ + if (panic_force_cpu < 0) + return false; + + /* Already on target CPU - proceed normally */ + if (this_cpu == panic_force_cpu) + return false; + + /* Target CPU is offline, can't redirect */ + if (!cpu_online(panic_force_cpu)) { + pr_warn("panic: target CPU %d is offline, continuing on CPU %d\n", + panic_force_cpu, this_cpu); + return false; + } + + /* Another panic already in progress */ + if (panic_in_progress()) + return false; + + /* + * Only one CPU can do the redirect. Use atomic cmpxchg to ensure + * we don't race with another CPU also trying to redirect. + */ + if (!atomic_try_cmpxchg(&panic_redirect_cpu, &old_cpu, this_cpu)) + return false; + + /* + * Use dynamically allocated buffer if available, otherwise + * fall back to static message for early boot panics or allocation failure. + */ + if (panic_force_buf) { + vsnprintf(panic_force_buf, PANIC_MSG_BUFSZ, fmt, args); + msg = panic_force_buf; + } else { + msg = "Redirected panic (buffer unavailable)"; + } + + console_verbose(); + bust_spinlocks(1); + + pr_emerg("panic: Redirecting from CPU %d to CPU %d for crash kernel.\n", + this_cpu, panic_force_cpu); + + /* Dump original CPU before redirecting */ + if (!test_taint(TAINT_DIE) && + oops_in_progress <= 1 && + IS_ENABLED(CONFIG_DEBUG_BUGVERBOSE)) { + dump_stack(); + } + + if (panic_smp_redirect_cpu(panic_force_cpu, (void *)msg) != 0) { + atomic_set(&panic_redirect_cpu, PANIC_CPU_INVALID); + pr_warn("panic: failed to redirect to CPU %d, continuing on CPU %d\n", + panic_force_cpu, this_cpu); + return false; + } + + /* IPI/NMI sent, this CPU should stop */ + return true; +} +#else +__printf(1, 0) +static inline bool panic_try_force_cpu(const char *fmt, va_list args) +{ + return false; +} +#endif /* CONFIG_SMP && CONFIG_CRASH_DUMP */ bool panic_try_start(void) { @@ -428,7 +575,7 @@ static void panic_other_cpus_shutdown(bool crash_kexec) */ void vpanic(const char *fmt, va_list args) { - static char buf[1024]; + static char buf[PANIC_MSG_BUFSZ]; long i, i_next = 0, len; int state = 0; bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers; @@ -452,6 +599,15 @@ void vpanic(const char *fmt, va_list args) local_irq_disable(); preempt_disable_notrace(); + /* Redirect panic to target CPU if configured via panic_force_cpu=. */ + if (panic_try_force_cpu(fmt, args)) { + /* + * Mark ourselves offline so panic_other_cpus_shutdown() won't wait + * for us on architectures that check num_online_cpus(). + */ + set_cpu_online(smp_processor_id(), false); + panic_smp_self_stop(); + } /* * It's possible to come here directly from a panic-assertion and * not have preempt disabled. Some functions called from here want @@ -484,7 +640,11 @@ void vpanic(const char *fmt, va_list args) /* * Avoid nested stack-dumping if a panic occurs during oops processing */ - if (test_taint(TAINT_DIE) || oops_in_progress > 1) { + if (atomic_read(&panic_redirect_cpu) != PANIC_CPU_INVALID && + panic_force_cpu == raw_smp_processor_id()) { + pr_emerg("panic: Redirected from CPU %d, skipping stack dump.\n", + atomic_read(&panic_redirect_cpu)); + } else if (test_taint(TAINT_DIE) || oops_in_progress > 1) { panic_this_cpu_backtrace_printed = true; } else if (IS_ENABLED(CONFIG_DEBUG_BUGVERBOSE)) { dump_stack(); diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index c903f1a42891..a612cf253c87 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -253,8 +253,10 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t) delta = rq_clock(rq) - t->sched_info.last_queued; t->sched_info.last_queued = 0; t->sched_info.run_delay += delta; - if (delta > t->sched_info.max_run_delay) + if (delta > t->sched_info.max_run_delay) { t->sched_info.max_run_delay = delta; + ktime_get_real_ts64(&t->sched_info.max_run_delay_ts); + } if (delta && (!t->sched_info.min_run_delay || delta < t->sched_info.min_run_delay)) t->sched_info.min_run_delay = delta; rq_sched_info_dequeue(rq, delta); @@ -278,8 +280,10 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t) t->sched_info.run_delay += delta; t->sched_info.last_arrival = now; t->sched_info.pcount++; - if (delta > t->sched_info.max_run_delay) + if (delta > t->sched_info.max_run_delay) { t->sched_info.max_run_delay = delta; + ktime_get_real_ts64(&t->sched_info.max_run_delay_ts); + } if (delta && (!t->sched_info.min_run_delay || delta < t->sched_info.min_run_delay)) t->sched_info.min_run_delay = delta; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f9b10c633bdd..8fb38722fd5c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -8112,7 +8112,8 @@ ftrace_func_address_lookup(struct ftrace_mod_map *mod_map, int ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, - unsigned long *off, char **modname, char *sym) + unsigned long *off, char **modname, + const unsigned char **modbuildid, char *sym) { struct ftrace_mod_map *mod_map; int ret = 0; @@ -8124,6 +8125,8 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, if (ret) { if (modname) *modname = mod_map->mod->name; + if (modbuildid) + *modbuildid = module_buildid(mod_map->mod); break; } } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8bd4ec08fb36..b1cb30a7b83d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1178,11 +1178,10 @@ EXPORT_SYMBOL_GPL(__trace_array_puts); * __trace_puts - write a constant string into the trace buffer. * @ip: The address of the caller * @str: The constant string to write - * @size: The size of the string. */ -int __trace_puts(unsigned long ip, const char *str, int size) +int __trace_puts(unsigned long ip, const char *str) { - return __trace_array_puts(printk_trace, ip, str, size); + return __trace_array_puts(printk_trace, ip, str, strlen(str)); } EXPORT_SYMBOL_GPL(__trace_puts); @@ -1201,7 +1200,7 @@ int __trace_bputs(unsigned long ip, const char *str) int size = sizeof(struct bputs_entry); if (!printk_binsafe(tr)) - return __trace_puts(ip, str, strlen(str)); + return __trace_puts(ip, str); if (!(tr->trace_flags & TRACE_ITER(PRINTK))) return 0; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c11edec5d8f5..8428c437cb9d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -2119,7 +2119,7 @@ extern void tracing_log_err(struct trace_array *tr, * about performance). The internal_trace_puts() is for such * a purpose. */ -#define internal_trace_puts(str) __trace_puts(_THIS_IP_, str, strlen(str)) +#define internal_trace_puts(str) __trace_puts(_THIS_IP_, str) #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 6ea2f6363b90..5c153106e642 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -125,7 +125,7 @@ static void __acct_update_integrals(struct task_struct *tsk, { u64 time, delta; - if (!likely(tsk->mm)) + if (unlikely(!tsk->mm || (tsk->flags & PF_KTHREAD))) return; time = stime + utime; diff --git a/kernel/ucount.c b/kernel/ucount.c index 586af49fc03e..fc4a8f2d3096 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -47,7 +47,7 @@ static int set_permissions(struct ctl_table_header *head, int mode; /* Allow users with CAP_SYS_RESOURCE unrestrained access */ - if (ns_capable(user_ns, CAP_SYS_RESOURCE)) + if (ns_capable_noaudit(user_ns, CAP_SYS_RESOURCE)) mode = (table->mode & S_IRWXU) >> 6; else /* Allow all others at most read-only access */ diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c index 46fc1050f1bb..8d82913223a1 100644 --- a/kernel/vmcore_info.c +++ b/kernel/vmcore_info.c @@ -141,7 +141,9 @@ EXPORT_SYMBOL_GPL(hwerr_log_error_type); static int __init crash_save_vmcoreinfo_init(void) { - vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL); + int order; + order = get_order(VMCOREINFO_BYTES); + vmcoreinfo_data = (unsigned char *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order); if (!vmcoreinfo_data) { pr_warn("Memory allocation for vmcoreinfo_data failed\n"); return -ENOMEM; @@ -150,7 +152,7 @@ static int __init crash_save_vmcoreinfo_init(void) vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE, GFP_KERNEL | __GFP_ZERO); if (!vmcoreinfo_note) { - free_page((unsigned long)vmcoreinfo_data); + free_pages((unsigned long)vmcoreinfo_data, order); vmcoreinfo_data = NULL; pr_warn("Memory allocation for vmcoreinfo_note failed\n"); return -ENOMEM; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 366122f4a0f8..7d675781bc91 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -363,7 +363,7 @@ static struct cpumask watchdog_allowed_mask __read_mostly; /* Global variables, exported for sysctl */ unsigned int __read_mostly softlockup_panic = - IS_ENABLED(CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC); + CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC; static bool softlockup_initialized __read_mostly; static u64 __read_mostly sample_period; @@ -550,7 +550,7 @@ static bool need_counting_irqs(void) u8 util; int tail = __this_cpu_read(cpustat_tail); - tail = (tail + NUM_HARDIRQ_REPORT - 1) % NUM_HARDIRQ_REPORT; + tail = (tail + NUM_SAMPLE_PERIODS - 1) % NUM_SAMPLE_PERIODS; util = __this_cpu_read(cpustat_util[tail][STATS_HARDIRQ]); return util > HARDIRQ_PERCENT_THRESH; } @@ -774,8 +774,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { unsigned long touch_ts, period_ts, now; struct pt_regs *regs = get_irq_regs(); - int duration; int softlockup_all_cpu_backtrace; + int duration, thresh_count; unsigned long flags; if (!watchdog_enabled) @@ -879,7 +879,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); sys_info(softlockup_si_mask & ~SYS_INFO_ALL_BT); - if (softlockup_panic) + thresh_count = duration / get_softlockup_thresh(); + + if (softlockup_panic && thresh_count >= softlockup_panic) panic("softlockup: hung tasks"); } @@ -1228,7 +1230,7 @@ static const struct ctl_table watchdog_sysctls[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "softlockup_sys_info", diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c index d3ca70e3c256..cf05775a96d3 100644 --- a/kernel/watchdog_perf.c +++ b/kernel/watchdog_perf.c @@ -118,18 +118,11 @@ static void watchdog_overflow_callback(struct perf_event *event, watchdog_hardlockup_check(smp_processor_id(), regs); } -static int hardlockup_detector_event_create(void) +static struct perf_event *hardlockup_detector_event_create(unsigned int cpu) { - unsigned int cpu; struct perf_event_attr *wd_attr; struct perf_event *evt; - /* - * Preemption is not disabled because memory will be allocated. - * Ensure CPU-locality by calling this in per-CPU kthread. - */ - WARN_ON(!is_percpu_thread()); - cpu = raw_smp_processor_id(); wd_attr = &wd_hw_attr; wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); @@ -143,14 +136,7 @@ static int hardlockup_detector_event_create(void) watchdog_overflow_callback, NULL); } - if (IS_ERR(evt)) { - pr_debug("Perf event create on CPU %d failed with %ld\n", cpu, - PTR_ERR(evt)); - return PTR_ERR(evt); - } - WARN_ONCE(this_cpu_read(watchdog_ev), "unexpected watchdog_ev leak"); - this_cpu_write(watchdog_ev, evt); - return 0; + return evt; } /** @@ -159,17 +145,26 @@ static int hardlockup_detector_event_create(void) */ void watchdog_hardlockup_enable(unsigned int cpu) { + struct perf_event *evt; + WARN_ON_ONCE(cpu != smp_processor_id()); - if (hardlockup_detector_event_create()) + evt = hardlockup_detector_event_create(cpu); + if (IS_ERR(evt)) { + pr_debug("Perf event create on CPU %d failed with %ld\n", cpu, + PTR_ERR(evt)); return; + } /* use original value for check */ if (!atomic_fetch_inc(&watchdog_cpus)) pr_info("Enabled. Permanently consumes one hw-PMU counter.\n"); + WARN_ONCE(this_cpu_read(watchdog_ev), "unexpected watchdog_ev leak"); + this_cpu_write(watchdog_ev, evt); + watchdog_init_timestamp(); - perf_event_enable(this_cpu_read(watchdog_ev)); + perf_event_enable(evt); } /** @@ -263,19 +258,30 @@ bool __weak __init arch_perf_nmi_is_available(void) */ int __init watchdog_hardlockup_probe(void) { + struct perf_event *evt; + unsigned int cpu; int ret; if (!arch_perf_nmi_is_available()) return -ENODEV; - ret = hardlockup_detector_event_create(); + if (!hw_nmi_get_sample_period(watchdog_thresh)) + return -EINVAL; - if (ret) { + /* + * Test hardware PMU availability by creating a temporary perf event. + * The event is released immediately. + */ + cpu = raw_smp_processor_id(); + evt = hardlockup_detector_event_create(cpu); + if (IS_ERR(evt)) { pr_info("Perf NMI watchdog permanently disabled\n"); + ret = PTR_ERR(evt); } else { - perf_event_release_kernel(this_cpu_read(watchdog_ev)); - this_cpu_write(watchdog_ev, NULL); + perf_event_release_kernel(evt); + ret = 0; } + return ret; } diff --git a/lib/Kconfig b/lib/Kconfig index 2923924bea78..0f2fb9610647 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -430,19 +430,6 @@ config GLOB are compiling an out-of tree driver which tells you that it depends on this. -config GLOB_SELFTEST - tristate "glob self-test on init" - depends on GLOB - help - This option enables a simple self-test of the glob_match - function on startup. It is primarily useful for people - working on the code to ensure they haven't introduced any - regressions. - - It only adds a little bit of code and slows kernel boot (or - module load) by a small amount, so you're welcome to play with - it, but you probably don't need it. - # # Netlink attribute parsing support is select'ed if needed # diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 64ef7e62eb8a..4e2dfbbd3d78 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1147,13 +1147,14 @@ config SOFTLOCKUP_DETECTOR_INTR_STORM the CPU stats and the interrupt counts during the "soft lockups". config BOOTPARAM_SOFTLOCKUP_PANIC - bool "Panic (Reboot) On Soft Lockups" + int "Panic (Reboot) On Soft Lockups" depends on SOFTLOCKUP_DETECTOR + default 0 help - Say Y here to enable the kernel to panic on "soft lockups", - which are bugs that cause the kernel to loop in kernel - mode for more than 20 seconds (configurable using the watchdog_thresh - sysctl), without giving other tasks a chance to run. + Set to a non-zero value N to enable the kernel to panic on "soft + lockups", which are bugs that cause the kernel to loop in kernel + mode for more than (N * 20 seconds) (configurable using the + watchdog_thresh sysctl), without giving other tasks a chance to run. The panic can be used in combination with panic_timeout, to cause the system to reboot automatically after a @@ -1161,7 +1162,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC high-availability systems that have uptime guarantees and where a lockup must be resolved ASAP. - Say N if unsure. + Say 0 if unsure. config HAVE_HARDLOCKUP_DETECTOR_BUDDY bool @@ -1310,7 +1311,7 @@ config BOOTPARAM_HUNG_TASK_PANIC high-availability systems that have uptime guarantees and where a hung tasks must be resolved ASAP. - Say N if unsure. + Say 0 if unsure. config DETECT_HUNG_TASK_BLOCKER bool "Dump Hung Tasks Blocker" @@ -1419,6 +1420,24 @@ config DEBUG_PREEMPT depending on workload as it triggers debugging routines for each this_cpu operation. It should only be used for debugging purposes. +config DEBUG_ATOMIC + bool "Debug atomic variables" + depends on DEBUG_KERNEL + help + If you say Y here then the kernel will add a runtime alignment check + to atomic accesses. Useful for architectures that do not have trap on + mis-aligned access. + + This option has potentially significant overhead. + +config DEBUG_ATOMIC_LARGEST_ALIGN + bool "Check alignment only up to __aligned_largest" + depends on DEBUG_ATOMIC + help + If you say Y here then the check for natural alignment of + atomic accesses will be constrained to the compiler's largest + alignment for scalar types. + menu "Lock Debugging (spinlocks, mutexes, etc...)" config LOCK_DEBUGGING_SUPPORT @@ -2337,16 +2356,6 @@ config TEST_LIST_SORT If unsure, say N. -config TEST_MIN_HEAP - tristate "Min heap test" - depends on DEBUG_KERNEL || m - help - Enable this to turn on min heap function tests. This test is - executed only once during system boot (so affects only boot time), - or at module load time. - - If unsure, say N. - config TEST_SORT tristate "Array-based sort test" if !KUNIT_ALL_TESTS depends on KUNIT @@ -2559,9 +2568,6 @@ config TEST_BITMAP If unsure, say N. -config TEST_UUID - tristate "Test functions located in the uuid module at runtime" - config TEST_XARRAY tristate "Test the XArray code at runtime" @@ -2845,6 +2851,20 @@ config LIST_KUNIT_TEST If unsure, say N. +config LIST_PRIVATE_KUNIT_TEST + tristate "KUnit Test for Kernel Private Linked-list structures" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + This builds the KUnit test for the private linked-list primitives + defined in include/linux/list_private.h. + + These primitives allow manipulation of list_head members that are + marked as private and require special accessors (ACCESS_PRIVATE) + to strip qualifiers or handle encapsulation. + + If unsure, say N. + config HASHTABLE_KUNIT_TEST tristate "KUnit Test for Kernel Hashtable structures" if !KUNIT_ALL_TESTS depends on KUNIT @@ -2884,6 +2904,29 @@ config CONTEXT_ANALYSIS_TEST If unsure, say N. +config LIVEUPDATE_TEST + bool "Live Update Kernel Test" + default n + depends on LIVEUPDATE + help + Enable a built-in kernel test module for the Live Update + Orchestrator. + + This module validates the File-Lifecycle-Bound subsystem by + registering a set of mock FLB objects with any real file handlers + that support live update (such as the memfd handler). + + When live update operations are performed, this test module will + output messages to the kernel log (dmesg), confirming that its + registration and various callback functions (preserve, retrieve, + finish, etc.) are being invoked correctly. + + This is a debugging and regression testing tool for developers + working on the Live Update subsystem. It should not be enabled in + production kernels. + + If unsure, say N + config CMDLINE_KUNIT_TEST tristate "KUnit test for cmdline API" if !KUNIT_ALL_TESTS depends on KUNIT @@ -2959,6 +3002,17 @@ config MEMCPY_KUNIT_TEST If unsure, say N. +config MIN_HEAP_KUNIT_TEST + tristate "Min heap test" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + This option enables the KUnit test suite for the min heap library + which provides functions for creating and managing min heaps. + The test suite checks the functionality of the min heap library. + + If unsure, say N + config IS_SIGNED_TYPE_KUNIT_TEST tristate "Test is_signed_type() macro" if !KUNIT_ALL_TESTS depends on KUNIT @@ -3364,6 +3418,17 @@ config RATELIMIT_KUNIT_TEST If unsure, say N. +config UUID_KUNIT_TEST + tristate "KUnit test for UUID" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + This option enables the KUnit test suite for the uuid library, + which provides functions for generating and parsing UUID and GUID. + The test suite checks parsing of UUID and GUID strings. + + If unsure, say N. + config INT_POW_KUNIT_TEST tristate "Integer exponentiation (int_pow) test" if !KUNIT_ALL_TESTS depends on KUNIT @@ -3433,6 +3498,19 @@ config PRIME_NUMBERS_KUNIT_TEST If unsure, say N +config GLOB_KUNIT_TEST + tristate "Glob matching test" if !KUNIT_ALL_TESTS + depends on GLOB + depends on KUNIT + default KUNIT_ALL_TESTS + help + Enable this option to test the glob functions at runtime. + + This test suite verifies the correctness of glob_match() across various + scenarios, including edge cases. + + If unsure, say N + endif # RUNTIME_TESTING_MENU config ARCH_USE_MEMTEST diff --git a/lib/Makefile b/lib/Makefile index 22d8742bba57..1b9ee167517f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -77,7 +77,6 @@ obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o CFLAGS_test_ubsan.o += $(call cc-disable-warning, unused-but-set-variable) UBSAN_SANITIZE_test_ubsan.o := y obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o -obj-$(CONFIG_TEST_MIN_HEAP) += test_min_heap.o obj-$(CONFIG_TEST_LKM) += test_module.o obj-$(CONFIG_TEST_VMALLOC) += test_vmalloc.o obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o @@ -91,7 +90,6 @@ ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_KASAN),yy) GCOV_PROFILE_test_bitmap.o := n endif -obj-$(CONFIG_TEST_UUID) += test_uuid.o obj-$(CONFIG_TEST_XARRAY) += test_xarray.o obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o @@ -228,7 +226,6 @@ obj-$(CONFIG_CLOSURES) += closure.o obj-$(CONFIG_DQL) += dynamic_queue_limits.o obj-$(CONFIG_GLOB) += glob.o -obj-$(CONFIG_GLOB_SELFTEST) += globtest.o obj-$(CONFIG_DIMLIB) += dim/ obj-$(CONFIG_SIGNATURE) += digsig.o diff --git a/lib/build_OID_registry b/lib/build_OID_registry index 8267e8d71338..30493ac190c0 100755 --- a/lib/build_OID_registry +++ b/lib/build_OID_registry @@ -60,10 +60,12 @@ for (my $i = 0; $i <= $#names; $i++) { # Determine the encoded length of this OID my $size = $#components; for (my $loop = 2; $loop <= $#components; $loop++) { - my $c = $components[$loop]; + $ENV{'BC_LINE_LENGTH'} = "0"; + my $c = `echo "ibase=10; obase=2; $components[$loop]" | bc`; + chomp($c); # We will base128 encode the number - my $tmp = ($c == 0) ? 0 : int(log($c)/log(2)); + my $tmp = length($c) - 1; $tmp = int($tmp / 7); $size += $tmp; } @@ -100,16 +102,24 @@ for (my $i = 0; $i <= $#names; $i++) { push @octets, $components[0] * 40 + $components[1]; for (my $loop = 2; $loop <= $#components; $loop++) { - my $c = $components[$loop]; + # get the base 2 representation of the component + $ENV{'BC_LINE_LENGTH'} = "0"; + my $c = `echo "ibase=10; obase=2; $components[$loop]" | bc`; + chomp($c); - # Base128 encode the number - my $tmp = ($c == 0) ? 0 : int(log($c)/log(2)); + my $tmp = length($c) - 1; $tmp = int($tmp / 7); - for (; $tmp > 0; $tmp--) { - push @octets, (($c >> $tmp * 7) & 0x7f) | 0x80; + # zero pad upto length multiple of 7 + $c = substr("0000000", 0, ($tmp + 1) * 7 - length($c)).$c; + + # Base128 encode the number + for (my $j = 0; $j < $tmp; $j++) { + my $b = oct("0b".substr($c, $j * 7, 7)); + + push @octets, $b | 0x80; } - push @octets, $c & 0x7f; + push @octets, oct("0b".substr($c, $tmp * 7, 7)); } push @encoded_oids, \@octets; diff --git a/lib/globtest.c b/lib/globtest.c deleted file mode 100644 index d8e97d43b905..000000000000 --- a/lib/globtest.c +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Extracted fronm glob.c - */ - -#include -#include -#include -#include - -/* Boot with "glob.verbose=1" to show successful tests, too */ -static bool verbose = false; -module_param(verbose, bool, 0); - -struct glob_test { - char const *pat, *str; - bool expected; -}; - -static bool __pure __init test(char const *pat, char const *str, bool expected) -{ - bool match = glob_match(pat, str); - bool success = match == expected; - - /* Can't get string literals into a particular section, so... */ - static char const msg_error[] __initconst = - KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n"; - static char const msg_ok[] __initconst = - KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n"; - static char const mismatch[] __initconst = "mismatch"; - char const *message; - - if (!success) - message = msg_error; - else if (verbose) - message = msg_ok; - else - return success; - - printk(message, pat, str, mismatch + 3*match); - return success; -} - -/* - * The tests are all jammed together in one array to make it simpler - * to place that array in the .init.rodata section. The obvious - * "array of structures containing char *" has no way to force the - * pointed-to strings to be in a particular section. - * - * Anyway, a test consists of: - * 1. Expected glob_match result: '1' or '0'. - * 2. Pattern to match: null-terminated string - * 3. String to match against: null-terminated string - * - * The list of tests is terminated with a final '\0' instead of - * a glob_match result character. - */ -static char const glob_tests[] __initconst = - /* Some basic tests */ - "1" "a\0" "a\0" - "0" "a\0" "b\0" - "0" "a\0" "aa\0" - "0" "a\0" "\0" - "1" "\0" "\0" - "0" "\0" "a\0" - /* Simple character class tests */ - "1" "[a]\0" "a\0" - "0" "[a]\0" "b\0" - "0" "[!a]\0" "a\0" - "1" "[!a]\0" "b\0" - "1" "[ab]\0" "a\0" - "1" "[ab]\0" "b\0" - "0" "[ab]\0" "c\0" - "1" "[!ab]\0" "c\0" - "1" "[a-c]\0" "b\0" - "0" "[a-c]\0" "d\0" - /* Corner cases in character class parsing */ - "1" "[a-c-e-g]\0" "-\0" - "0" "[a-c-e-g]\0" "d\0" - "1" "[a-c-e-g]\0" "f\0" - "1" "[]a-ceg-ik[]\0" "a\0" - "1" "[]a-ceg-ik[]\0" "]\0" - "1" "[]a-ceg-ik[]\0" "[\0" - "1" "[]a-ceg-ik[]\0" "h\0" - "0" "[]a-ceg-ik[]\0" "f\0" - "0" "[!]a-ceg-ik[]\0" "h\0" - "0" "[!]a-ceg-ik[]\0" "]\0" - "1" "[!]a-ceg-ik[]\0" "f\0" - /* Simple wild cards */ - "1" "?\0" "a\0" - "0" "?\0" "aa\0" - "0" "??\0" "a\0" - "1" "?x?\0" "axb\0" - "0" "?x?\0" "abx\0" - "0" "?x?\0" "xab\0" - /* Asterisk wild cards (backtracking) */ - "0" "*??\0" "a\0" - "1" "*??\0" "ab\0" - "1" "*??\0" "abc\0" - "1" "*??\0" "abcd\0" - "0" "??*\0" "a\0" - "1" "??*\0" "ab\0" - "1" "??*\0" "abc\0" - "1" "??*\0" "abcd\0" - "0" "?*?\0" "a\0" - "1" "?*?\0" "ab\0" - "1" "?*?\0" "abc\0" - "1" "?*?\0" "abcd\0" - "1" "*b\0" "b\0" - "1" "*b\0" "ab\0" - "0" "*b\0" "ba\0" - "1" "*b\0" "bb\0" - "1" "*b\0" "abb\0" - "1" "*b\0" "bab\0" - "1" "*bc\0" "abbc\0" - "1" "*bc\0" "bc\0" - "1" "*bc\0" "bbc\0" - "1" "*bc\0" "bcbc\0" - /* Multiple asterisks (complex backtracking) */ - "1" "*ac*\0" "abacadaeafag\0" - "1" "*ac*ae*ag*\0" "abacadaeafag\0" - "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0" - "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0" - "1" "*abcd*\0" "abcabcabcabcdefg\0" - "1" "*ab*cd*\0" "abcabcabcabcdefg\0" - "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0" - "0" "*abcd*\0" "abcabcabcabcefg\0" - "0" "*ab*cd*\0" "abcabcabcabcefg\0"; - -static int __init glob_init(void) -{ - unsigned successes = 0; - unsigned n = 0; - char const *p = glob_tests; - static char const message[] __initconst = - KERN_INFO "glob: %u self-tests passed, %u failed\n"; - - /* - * Tests are jammed together in a string. The first byte is '1' - * or '0' to indicate the expected outcome, or '\0' to indicate the - * end of the tests. Then come two null-terminated strings: the - * pattern and the string to match it against. - */ - while (*p) { - bool expected = *p++ & 1; - char const *pat = p; - - p += strlen(p) + 1; - successes += test(pat, p, expected); - p += strlen(p) + 1; - n++; - } - - n -= successes; - printk(message, successes, n); - - /* What's the errno for "kernel bug detected"? Guess... */ - return n ? -ECANCELED : 0; -} - -/* We need a dummy exit function to allow unload */ -static void __exit glob_fini(void) { } - -module_init(glob_init); -module_exit(glob_fini); - -MODULE_DESCRIPTION("glob(7) matching tests"); -MODULE_LICENSE("Dual MIT/GPL"); diff --git a/lib/group_cpus.c b/lib/group_cpus.c index 6d08ac05f371..a93df70919df 100644 --- a/lib/group_cpus.c +++ b/lib/group_cpus.c @@ -114,48 +114,15 @@ static int ncpus_cmp_func(const void *l, const void *r) return ln->ncpus - rn->ncpus; } -/* - * Allocate group number for each node, so that for each node: - * - * 1) the allocated number is >= 1 - * - * 2) the allocated number is <= active CPU number of this node - * - * The actual allocated total groups may be less than @numgrps when - * active total CPU number is less than @numgrps. - * - * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]' - * for each node. - */ -static void alloc_nodes_groups(unsigned int numgrps, - cpumask_var_t *node_to_cpumask, - const struct cpumask *cpu_mask, - const nodemask_t nodemsk, - struct cpumask *nmsk, - struct node_groups *node_groups) +static void alloc_groups_to_nodes(unsigned int numgrps, + unsigned int numcpus, + struct node_groups *node_groups, + unsigned int num_nodes) { - unsigned n, remaining_ncpus = 0; + unsigned int n, remaining_ncpus = numcpus; + unsigned int ngroups, ncpus; - for (n = 0; n < nr_node_ids; n++) { - node_groups[n].id = n; - node_groups[n].ncpus = UINT_MAX; - } - - for_each_node_mask(n, nodemsk) { - unsigned ncpus; - - cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); - ncpus = cpumask_weight(nmsk); - - if (!ncpus) - continue; - remaining_ncpus += ncpus; - node_groups[n].ncpus = ncpus; - } - - numgrps = min_t(unsigned, remaining_ncpus, numgrps); - - sort(node_groups, nr_node_ids, sizeof(node_groups[0]), + sort(node_groups, num_nodes, sizeof(node_groups[0]), ncpus_cmp_func, NULL); /* @@ -226,9 +193,8 @@ static void alloc_nodes_groups(unsigned int numgrps, * finally for each node X: grps(X) <= ncpu(X). * */ - for (n = 0; n < nr_node_ids; n++) { - unsigned ngroups, ncpus; + for (n = 0; n < num_nodes; n++) { if (node_groups[n].ncpus == UINT_MAX) continue; @@ -246,12 +212,201 @@ static void alloc_nodes_groups(unsigned int numgrps, } } +/* + * Allocate group number for each node, so that for each node: + * + * 1) the allocated number is >= 1 + * + * 2) the allocated number is <= active CPU number of this node + * + * The actual allocated total groups may be less than @numgrps when + * active total CPU number is less than @numgrps. + * + * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]' + * for each node. + */ +static void alloc_nodes_groups(unsigned int numgrps, + cpumask_var_t *node_to_cpumask, + const struct cpumask *cpu_mask, + const nodemask_t nodemsk, + struct cpumask *nmsk, + struct node_groups *node_groups) +{ + unsigned int n, numcpus = 0; + + for (n = 0; n < nr_node_ids; n++) { + node_groups[n].id = n; + node_groups[n].ncpus = UINT_MAX; + } + + for_each_node_mask(n, nodemsk) { + unsigned int ncpus; + + cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); + ncpus = cpumask_weight(nmsk); + + if (!ncpus) + continue; + numcpus += ncpus; + node_groups[n].ncpus = ncpus; + } + + numgrps = min_t(unsigned int, numcpus, numgrps); + alloc_groups_to_nodes(numgrps, numcpus, node_groups, nr_node_ids); +} + +static void assign_cpus_to_groups(unsigned int ncpus, + struct cpumask *nmsk, + struct node_groups *nv, + struct cpumask *masks, + unsigned int *curgrp, + unsigned int last_grp) +{ + unsigned int v, cpus_per_grp, extra_grps; + /* Account for rounding errors */ + extra_grps = ncpus - nv->ngroups * (ncpus / nv->ngroups); + + /* Spread allocated groups on CPUs of the current node */ + for (v = 0; v < nv->ngroups; v++, *curgrp += 1) { + cpus_per_grp = ncpus / nv->ngroups; + + /* Account for extra groups to compensate rounding errors */ + if (extra_grps) { + cpus_per_grp++; + --extra_grps; + } + + /* + * wrapping has to be considered given 'startgrp' + * may start anywhere + */ + if (*curgrp >= last_grp) + *curgrp = 0; + grp_spread_init_one(&masks[*curgrp], nmsk, cpus_per_grp); + } +} + +static int alloc_cluster_groups(unsigned int ncpus, + unsigned int ngroups, + struct cpumask *node_cpumask, + cpumask_var_t msk, + const struct cpumask ***clusters_ptr, + struct node_groups **cluster_groups_ptr) +{ + unsigned int ncluster = 0; + unsigned int cpu, nc, n; + const struct cpumask *cluster_mask; + const struct cpumask **clusters; + struct node_groups *cluster_groups; + + cpumask_copy(msk, node_cpumask); + + /* Probe how many clusters in this node. */ + while (1) { + cpu = cpumask_first(msk); + if (cpu >= nr_cpu_ids) + break; + + cluster_mask = topology_cluster_cpumask(cpu); + if (!cpumask_weight(cluster_mask)) + goto no_cluster; + /* Clean out CPUs on the same cluster. */ + cpumask_andnot(msk, msk, cluster_mask); + ncluster++; + } + + /* If ngroups < ncluster, cross cluster is inevitable, skip. */ + if (ncluster == 0 || ncluster > ngroups) + goto no_cluster; + + /* Allocate memory based on cluster number. */ + clusters = kcalloc(ncluster, sizeof(struct cpumask *), GFP_KERNEL); + if (!clusters) + goto no_cluster; + cluster_groups = kcalloc(ncluster, sizeof(struct node_groups), GFP_KERNEL); + if (!cluster_groups) + goto fail_cluster_groups; + + /* Filling cluster info for later process. */ + cpumask_copy(msk, node_cpumask); + for (n = 0; n < ncluster; n++) { + cpu = cpumask_first(msk); + cluster_mask = topology_cluster_cpumask(cpu); + nc = cpumask_weight_and(cluster_mask, node_cpumask); + clusters[n] = cluster_mask; + cluster_groups[n].id = n; + cluster_groups[n].ncpus = nc; + cpumask_andnot(msk, msk, cluster_mask); + } + + alloc_groups_to_nodes(ngroups, ncpus, cluster_groups, ncluster); + + *clusters_ptr = clusters; + *cluster_groups_ptr = cluster_groups; + return ncluster; + + fail_cluster_groups: + kfree(clusters); + no_cluster: + return 0; +} + +/* + * Try group CPUs evenly for cluster locality within a NUMA node. + * + * Return: true if success, false otherwise. + */ +static bool __try_group_cluster_cpus(unsigned int ncpus, + unsigned int ngroups, + struct cpumask *node_cpumask, + struct cpumask *masks, + unsigned int *curgrp, + unsigned int last_grp) +{ + struct node_groups *cluster_groups; + const struct cpumask **clusters; + unsigned int ncluster; + bool ret = false; + cpumask_var_t nmsk; + unsigned int i, nc; + + if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) + goto fail_nmsk_alloc; + + ncluster = alloc_cluster_groups(ncpus, ngroups, node_cpumask, nmsk, + &clusters, &cluster_groups); + + if (ncluster == 0) + goto fail_no_clusters; + + for (i = 0; i < ncluster; i++) { + struct node_groups *nv = &cluster_groups[i]; + + /* Get the cpus on this cluster. */ + cpumask_and(nmsk, node_cpumask, clusters[nv->id]); + nc = cpumask_weight(nmsk); + if (!nc) + continue; + WARN_ON_ONCE(nv->ngroups > nc); + + assign_cpus_to_groups(nc, nmsk, nv, masks, curgrp, last_grp); + } + + ret = true; + kfree(cluster_groups); + kfree(clusters); + fail_no_clusters: + free_cpumask_var(nmsk); + fail_nmsk_alloc: + return ret; +} + static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps, cpumask_var_t *node_to_cpumask, const struct cpumask *cpu_mask, struct cpumask *nmsk, struct cpumask *masks) { - unsigned int i, n, nodes, cpus_per_grp, extra_grps, done = 0; + unsigned int i, n, nodes, done = 0; unsigned int last_grp = numgrps; unsigned int curgrp = startgrp; nodemask_t nodemsk = NODE_MASK_NONE; @@ -287,7 +442,7 @@ static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps, alloc_nodes_groups(numgrps, node_to_cpumask, cpu_mask, nodemsk, nmsk, node_groups); for (i = 0; i < nr_node_ids; i++) { - unsigned int ncpus, v; + unsigned int ncpus; struct node_groups *nv = &node_groups[i]; if (nv->ngroups == UINT_MAX) @@ -301,28 +456,14 @@ static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps, WARN_ON_ONCE(nv->ngroups > ncpus); - /* Account for rounding errors */ - extra_grps = ncpus - nv->ngroups * (ncpus / nv->ngroups); - - /* Spread allocated groups on CPUs of the current node */ - for (v = 0; v < nv->ngroups; v++, curgrp++) { - cpus_per_grp = ncpus / nv->ngroups; - - /* Account for extra groups to compensate rounding errors */ - if (extra_grps) { - cpus_per_grp++; - --extra_grps; - } - - /* - * wrapping has to be considered given 'startgrp' - * may start anywhere - */ - if (curgrp >= last_grp) - curgrp = 0; - grp_spread_init_one(&masks[curgrp], nmsk, - cpus_per_grp); + if (__try_group_cluster_cpus(ncpus, nv->ngroups, nmsk, + masks, &curgrp, last_grp)) { + done += nv->ngroups; + continue; } + + assign_cpus_to_groups(ncpus, nmsk, nv, masks, &curgrp, + last_grp); done += nv->ngroups; } kfree(node_groups); diff --git a/lib/hexdump.c b/lib/hexdump.c index c3db7c3a7643..2e5cd8c24769 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/lib/kfifo.c b/lib/kfifo.c index 525e66f8294c..2633f9cc336c 100644 --- a/lib/kfifo.c +++ b/lib/kfifo.c @@ -41,7 +41,7 @@ int __kfifo_alloc_node(struct __kfifo *fifo, unsigned int size, return -EINVAL; } - fifo->data = kmalloc_array_node(esize, size, gfp_mask, node); + fifo->data = kmalloc_array_node(size, esize, gfp_mask, node); if (!fifo->data) { fifo->mask = 0; diff --git a/lib/kstrtox.c b/lib/kstrtox.c index bdde40cd69d7..97be2a39f537 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -340,8 +340,8 @@ EXPORT_SYMBOL(kstrtos8); * @s: input string * @res: result * - * This routine returns 0 iff the first character is one of 'YyTt1NnFf0', or - * [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL. Value + * This routine returns 0 iff the first character is one of 'EeYyTt1DdNnFf0', + * or [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL. Value * pointed to by res is updated upon finding a match. */ noinline diff --git a/lib/once.c b/lib/once.c index 2c306f0e891e..8557eb489f34 100644 --- a/lib/once.c +++ b/lib/once.c @@ -93,6 +93,6 @@ void __do_once_sleepable_done(bool *done, struct static_key_true *once_key, { *done = true; mutex_unlock(&once_mutex); - once_disable_jump(once_key, mod); + static_branch_disable(once_key); } EXPORT_SYMBOL(__do_once_sleepable_done); diff --git a/lib/string_helpers.c b/lib/string_helpers.c index ffb8ead6d4cd..8cb6f66c9c2b 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/lib/test_kho.c b/lib/test_kho.c index 47de56280795..a20fafaf9846 100644 --- a/lib/test_kho.c +++ b/lib/test_kho.c @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -339,11 +340,15 @@ module_init(kho_test_init); static void kho_test_cleanup(void) { + /* unpreserve and free the data stored in folios */ + kho_test_unpreserve_data(&kho_test_state); for (int i = 0; i < kho_test_state.nr_folios; i++) folio_put(kho_test_state.folios[i]); kvfree(kho_test_state.folios); - vfree(kho_test_state.folios_info); + + /* Unpreserve and release the FDT folio */ + kho_unpreserve_folio(kho_test_state.fdt); folio_put(kho_test_state.fdt); } diff --git a/lib/test_uuid.c b/lib/test_uuid.c deleted file mode 100644 index 0124fad5d72c..000000000000 --- a/lib/test_uuid.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Test cases for lib/uuid.c module. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include - -struct test_uuid_data { - const char *uuid; - guid_t le; - uuid_t be; -}; - -static const struct test_uuid_data test_uuid_test_data[] = { - { - .uuid = "c33f4995-3701-450e-9fbf-206a2e98e576", - .le = GUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76), - .be = UUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76), - }, - { - .uuid = "64b4371c-77c1-48f9-8221-29f054fc023b", - .le = GUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b), - .be = UUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b), - }, - { - .uuid = "0cb4ddff-a545-4401-9d06-688af53e7f84", - .le = GUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84), - .be = UUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84), - }, -}; - -static const char * const test_uuid_wrong_data[] = { - "c33f4995-3701-450e-9fbf206a2e98e576 ", /* no hyphen(s) */ - "64b4371c-77c1-48f9-8221-29f054XX023b", /* invalid character(s) */ - "0cb4ddff-a545-4401-9d06-688af53e", /* not enough data */ -}; - -static unsigned total_tests __initdata; -static unsigned failed_tests __initdata; - -static void __init test_uuid_failed(const char *prefix, bool wrong, bool be, - const char *data, const char *actual) -{ - pr_err("%s test #%u %s %s data: '%s'\n", - prefix, - total_tests, - wrong ? "passed on wrong" : "failed on", - be ? "BE" : "LE", - data); - if (actual && *actual) - pr_err("%s test #%u actual data: '%s'\n", - prefix, - total_tests, - actual); - failed_tests++; -} - -static void __init test_uuid_test(const struct test_uuid_data *data) -{ - guid_t le; - uuid_t be; - char buf[48]; - - /* LE */ - total_tests++; - if (guid_parse(data->uuid, &le)) - test_uuid_failed("conversion", false, false, data->uuid, NULL); - - total_tests++; - if (!guid_equal(&data->le, &le)) { - sprintf(buf, "%pUl", &le); - test_uuid_failed("cmp", false, false, data->uuid, buf); - } - - /* BE */ - total_tests++; - if (uuid_parse(data->uuid, &be)) - test_uuid_failed("conversion", false, true, data->uuid, NULL); - - total_tests++; - if (!uuid_equal(&data->be, &be)) { - sprintf(buf, "%pUb", &be); - test_uuid_failed("cmp", false, true, data->uuid, buf); - } -} - -static void __init test_uuid_wrong(const char *data) -{ - guid_t le; - uuid_t be; - - /* LE */ - total_tests++; - if (!guid_parse(data, &le)) - test_uuid_failed("negative", true, false, data, NULL); - - /* BE */ - total_tests++; - if (!uuid_parse(data, &be)) - test_uuid_failed("negative", true, true, data, NULL); -} - -static int __init test_uuid_init(void) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(test_uuid_test_data); i++) - test_uuid_test(&test_uuid_test_data[i]); - - for (i = 0; i < ARRAY_SIZE(test_uuid_wrong_data); i++) - test_uuid_wrong(test_uuid_wrong_data[i]); - - if (failed_tests == 0) - pr_info("all %u tests passed\n", total_tests); - else - pr_err("failed %u out of %u tests\n", failed_tests, total_tests); - - return failed_tests ? -EINVAL : 0; -} -module_init(test_uuid_init); - -static void __exit test_uuid_exit(void) -{ - /* do nothing */ -} -module_exit(test_uuid_exit); - -MODULE_AUTHOR("Andy Shevchenko "); -MODULE_DESCRIPTION("Test cases for lib/uuid.c module"); -MODULE_LICENSE("Dual BSD/GPL"); diff --git a/lib/tests/Makefile b/lib/tests/Makefile index 0f24048f3684..05f74edbc62b 100644 --- a/lib/tests/Makefile +++ b/lib/tests/Makefile @@ -20,20 +20,24 @@ CFLAGS_fortify_kunit.o += $(DISABLE_STRUCTLEAK_PLUGIN) obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o CFLAGS_test_fprobe.o += $(CC_FLAGS_FTRACE) obj-$(CONFIG_FPROBE_SANITY_TEST) += test_fprobe.o +obj-$(CONFIG_GLOB_KUNIT_TEST) += glob_kunit.o obj-$(CONFIG_HASHTABLE_KUNIT_TEST) += hashtable_test.o obj-$(CONFIG_HASH_KUNIT_TEST) += test_hash.o obj-$(CONFIG_TEST_IOV_ITER) += kunit_iov_iter.o obj-$(CONFIG_IS_SIGNED_TYPE_KUNIT_TEST) += is_signed_type_kunit.o obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o +obj-$(CONFIG_LIST_PRIVATE_KUNIT_TEST) += list-private-test.o obj-$(CONFIG_KFIFO_KUNIT_TEST) += kfifo_kunit.o obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o +obj-$(CONFIG_LIVEUPDATE_TEST) += liveupdate.o CFLAGS_longest_symbol_kunit.o += $(call cc-disable-warning, missing-prototypes) obj-$(CONFIG_LONGEST_SYM_KUNIT_TEST) += longest_symbol_kunit.o obj-$(CONFIG_MEMCPY_KUNIT_TEST) += memcpy_kunit.o +obj-$(CONFIG_MIN_HEAP_KUNIT_TEST) += min_heap_kunit.o CFLAGS_overflow_kunit.o = $(call cc-disable-warning, tautological-constant-out-of-range-compare) obj-$(CONFIG_OVERFLOW_KUNIT_TEST) += overflow_kunit.o obj-$(CONFIG_PRINTF_KUNIT_TEST) += printf_kunit.o @@ -50,5 +54,6 @@ obj-$(CONFIG_STRING_HELPERS_KUNIT_TEST) += string_helpers_kunit.o obj-$(CONFIG_USERCOPY_KUNIT_TEST) += usercopy_kunit.o obj-$(CONFIG_UTIL_MACROS_KUNIT) += util_macros_kunit.o obj-$(CONFIG_RATELIMIT_KUNIT_TEST) += test_ratelimit.o +obj-$(CONFIG_UUID_KUNIT_TEST) += uuid_kunit.o obj-$(CONFIG_TEST_RUNTIME_MODULE) += module/ diff --git a/lib/tests/glob_kunit.c b/lib/tests/glob_kunit.c new file mode 100644 index 000000000000..362b1eda8e5b --- /dev/null +++ b/lib/tests/glob_kunit.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: MIT OR GPL-2.0 +/* + * Test cases for glob functions. + */ + +#include +#include +#include + +/** + * struct glob_test_case - Test case for glob matching. + * @pat: Pattern to match. + * @str: String to match against. + * @expected: Expected glob_match result, true if matched. + */ +struct glob_test_case { + const char *pat; + const char *str; + bool expected; +}; + +static const struct glob_test_case glob_test_cases[] = { + /* Some basic tests */ + { .pat = "a", .str = "a", .expected = true }, + { .pat = "a", .str = "b", .expected = false }, + { .pat = "a", .str = "aa", .expected = false }, + { .pat = "a", .str = "", .expected = false }, + { .pat = "", .str = "", .expected = true }, + { .pat = "", .str = "a", .expected = false }, + /* Simple character class tests */ + { .pat = "[a]", .str = "a", .expected = true }, + { .pat = "[a]", .str = "b", .expected = false }, + { .pat = "[!a]", .str = "a", .expected = false }, + { .pat = "[!a]", .str = "b", .expected = true }, + { .pat = "[ab]", .str = "a", .expected = true }, + { .pat = "[ab]", .str = "b", .expected = true }, + { .pat = "[ab]", .str = "c", .expected = false }, + { .pat = "[!ab]", .str = "c", .expected = true }, + { .pat = "[a-c]", .str = "b", .expected = true }, + { .pat = "[a-c]", .str = "d", .expected = false }, + /* Corner cases in character class parsing */ + { .pat = "[a-c-e-g]", .str = "-", .expected = true }, + { .pat = "[a-c-e-g]", .str = "d", .expected = false }, + { .pat = "[a-c-e-g]", .str = "f", .expected = true }, + { .pat = "[]a-ceg-ik[]", .str = "a", .expected = true }, + { .pat = "[]a-ceg-ik[]", .str = "]", .expected = true }, + { .pat = "[]a-ceg-ik[]", .str = "[", .expected = true }, + { .pat = "[]a-ceg-ik[]", .str = "h", .expected = true }, + { .pat = "[]a-ceg-ik[]", .str = "f", .expected = false }, + { .pat = "[!]a-ceg-ik[]", .str = "h", .expected = false }, + { .pat = "[!]a-ceg-ik[]", .str = "]", .expected = false }, + { .pat = "[!]a-ceg-ik[]", .str = "f", .expected = true }, + /* Simple wild cards */ + { .pat = "?", .str = "a", .expected = true }, + { .pat = "?", .str = "aa", .expected = false }, + { .pat = "??", .str = "a", .expected = false }, + { .pat = "?x?", .str = "axb", .expected = true }, + { .pat = "?x?", .str = "abx", .expected = false }, + { .pat = "?x?", .str = "xab", .expected = false }, + /* Asterisk wild cards (backtracking) */ + { .pat = "*??", .str = "a", .expected = false }, + { .pat = "*??", .str = "ab", .expected = true }, + { .pat = "*??", .str = "abc", .expected = true }, + { .pat = "*??", .str = "abcd", .expected = true }, + { .pat = "??*", .str = "a", .expected = false }, + { .pat = "??*", .str = "ab", .expected = true }, + { .pat = "??*", .str = "abc", .expected = true }, + { .pat = "??*", .str = "abcd", .expected = true }, + { .pat = "?*?", .str = "a", .expected = false }, + { .pat = "?*?", .str = "ab", .expected = true }, + { .pat = "?*?", .str = "abc", .expected = true }, + { .pat = "?*?", .str = "abcd", .expected = true }, + { .pat = "*b", .str = "b", .expected = true }, + { .pat = "*b", .str = "ab", .expected = true }, + { .pat = "*b", .str = "ba", .expected = false }, + { .pat = "*b", .str = "bb", .expected = true }, + { .pat = "*b", .str = "abb", .expected = true }, + { .pat = "*b", .str = "bab", .expected = true }, + { .pat = "*bc", .str = "abbc", .expected = true }, + { .pat = "*bc", .str = "bc", .expected = true }, + { .pat = "*bc", .str = "bbc", .expected = true }, + { .pat = "*bc", .str = "bcbc", .expected = true }, + /* Multiple asterisks (complex backtracking) */ + { .pat = "*ac*", .str = "abacadaeafag", .expected = true }, + { .pat = "*ac*ae*ag*", .str = "abacadaeafag", .expected = true }, + { .pat = "*a*b*[bc]*[ef]*g*", .str = "abacadaeafag", .expected = true }, + { .pat = "*a*b*[ef]*[cd]*g*", .str = "abacadaeafag", .expected = false }, + { .pat = "*abcd*", .str = "abcabcabcabcdefg", .expected = true }, + { .pat = "*ab*cd*", .str = "abcabcabcabcdefg", .expected = true }, + { .pat = "*abcd*abcdef*", .str = "abcabcdabcdeabcdefg", .expected = true }, + { .pat = "*abcd*", .str = "abcabcabcabcefg", .expected = false }, + { .pat = "*ab*cd*", .str = "abcabcabcabcefg", .expected = false }, +}; + +static void glob_case_to_desc(const struct glob_test_case *t, char *desc) +{ + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "pat:\"%s\" str:\"%s\"", t->pat, t->str); +} + +KUNIT_ARRAY_PARAM(glob, glob_test_cases, glob_case_to_desc); + +static void glob_test_match(struct kunit *test) +{ + const struct glob_test_case *params = test->param_value; + + KUNIT_EXPECT_EQ_MSG(test, + glob_match(params->pat, params->str), + params->expected, + "Pattern: \"%s\", String: \"%s\", Expected: %d", + params->pat, params->str, params->expected); +} + +static struct kunit_case glob_kunit_test_cases[] = { + KUNIT_CASE_PARAM(glob_test_match, glob_gen_params), + {} +}; + +static struct kunit_suite glob_test_suite = { + .name = "glob", + .test_cases = glob_kunit_test_cases, +}; + +kunit_test_suite(glob_test_suite); +MODULE_DESCRIPTION("Test cases for glob functions"); +MODULE_LICENSE("Dual MIT/GPL"); diff --git a/lib/tests/list-private-test.c b/lib/tests/list-private-test.c new file mode 100644 index 000000000000..3bd62939ae67 --- /dev/null +++ b/lib/tests/list-private-test.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KUnit compilation/smoke test for Private list primitives. + * + * Copyright (c) 2025, Google LLC. + * Pasha Tatashin + */ +#include +#include + +/* + * This forces compiler to warn if you access it directly, because list + * primitives expect (struct list_head *), not (volatile struct list_head *). + */ +#undef __private +#define __private volatile + +/* Redefine ACCESS_PRIVATE for this test. */ +#undef ACCESS_PRIVATE +#define ACCESS_PRIVATE(p, member) \ + (*((struct list_head *)((unsigned long)&((p)->member)))) + +struct list_test_struct { + int data; + struct list_head __private list; +}; + +static void list_private_compile_test(struct kunit *test) +{ + struct list_test_struct entry; + struct list_test_struct *pos, *n; + LIST_HEAD(head); + + INIT_LIST_HEAD(&ACCESS_PRIVATE(&entry, list)); + list_add(&ACCESS_PRIVATE(&entry, list), &head); + pos = &entry; + + pos = list_private_entry(&ACCESS_PRIVATE(&entry, list), struct list_test_struct, list); + pos = list_private_first_entry(&head, struct list_test_struct, list); + pos = list_private_last_entry(&head, struct list_test_struct, list); + pos = list_private_next_entry(pos, list); + pos = list_private_prev_entry(pos, list); + pos = list_private_next_entry_circular(pos, &head, list); + pos = list_private_prev_entry_circular(pos, &head, list); + + if (list_private_entry_is_head(pos, &head, list)) + return; + + list_private_for_each_entry(pos, &head, list) { } + list_private_for_each_entry_reverse(pos, &head, list) { } + list_private_for_each_entry_continue(pos, &head, list) { } + list_private_for_each_entry_continue_reverse(pos, &head, list) { } + list_private_for_each_entry_from(pos, &head, list) { } + list_private_for_each_entry_from_reverse(pos, &head, list) { } + + list_private_for_each_entry_safe(pos, n, &head, list) + list_private_safe_reset_next(pos, n, list); + list_private_for_each_entry_safe_continue(pos, n, &head, list) { } + list_private_for_each_entry_safe_from(pos, n, &head, list) { } + list_private_for_each_entry_safe_reverse(pos, n, &head, list) { } +} + +static struct kunit_case list_private_test_cases[] = { + KUNIT_CASE(list_private_compile_test), + {}, +}; + +static struct kunit_suite list_private_test_module = { + .name = "list-private-kunit-test", + .test_cases = list_private_test_cases, +}; + +kunit_test_suite(list_private_test_module); + +MODULE_DESCRIPTION("KUnit compilation test for private list primitives"); +MODULE_LICENSE("GPL"); diff --git a/lib/tests/liveupdate.c b/lib/tests/liveupdate.c new file mode 100644 index 000000000000..496d6ef91a30 --- /dev/null +++ b/lib/tests/liveupdate.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (c) 2025, Google LLC. + * Pasha Tatashin + */ + +#define pr_fmt(fmt) KBUILD_MODNAME " test: " fmt + +#include +#include +#include +#include +#include +#include "../../kernel/liveupdate/luo_internal.h" + +static const struct liveupdate_flb_ops test_flb_ops; +#define DEFINE_TEST_FLB(i) { \ + .ops = &test_flb_ops, \ + .compatible = LIVEUPDATE_TEST_FLB_COMPATIBLE(i), \ +} + +/* Number of Test FLBs to register with every file handler */ +#define TEST_NFLBS 3 +static struct liveupdate_flb test_flbs[TEST_NFLBS] = { + DEFINE_TEST_FLB(0), + DEFINE_TEST_FLB(1), + DEFINE_TEST_FLB(2), +}; + +#define TEST_FLB_MAGIC_BASE 0xFEEDF00DCAFEBEE0ULL + +static int test_flb_preserve(struct liveupdate_flb_op_args *argp) +{ + ptrdiff_t index = argp->flb - test_flbs; + + pr_info("%s: preserve was triggered\n", argp->flb->compatible); + argp->data = TEST_FLB_MAGIC_BASE + index; + + return 0; +} + +static void test_flb_unpreserve(struct liveupdate_flb_op_args *argp) +{ + pr_info("%s: unpreserve was triggered\n", argp->flb->compatible); +} + +static int test_flb_retrieve(struct liveupdate_flb_op_args *argp) +{ + ptrdiff_t index = argp->flb - test_flbs; + u64 expected_data = TEST_FLB_MAGIC_BASE + index; + + if (argp->data == expected_data) { + pr_info("%s: found flb data from the previous boot\n", + argp->flb->compatible); + argp->obj = (void *)argp->data; + } else { + pr_err("%s: ERROR - incorrect data handle: %llx, expected %llx\n", + argp->flb->compatible, argp->data, expected_data); + return -EINVAL; + } + + return 0; +} + +static void test_flb_finish(struct liveupdate_flb_op_args *argp) +{ + ptrdiff_t index = argp->flb - test_flbs; + void *expected_obj = (void *)(TEST_FLB_MAGIC_BASE + index); + + if (argp->obj == expected_obj) { + pr_info("%s: finish was triggered\n", argp->flb->compatible); + } else { + pr_err("%s: ERROR - finish called with invalid object\n", + argp->flb->compatible); + } +} + +static const struct liveupdate_flb_ops test_flb_ops = { + .preserve = test_flb_preserve, + .unpreserve = test_flb_unpreserve, + .retrieve = test_flb_retrieve, + .finish = test_flb_finish, + .owner = THIS_MODULE, +}; + +static void liveupdate_test_init(void) +{ + static DEFINE_MUTEX(init_lock); + static bool initialized; + int i; + + guard(mutex)(&init_lock); + + if (initialized) + return; + + for (i = 0; i < TEST_NFLBS; i++) { + struct liveupdate_flb *flb = &test_flbs[i]; + void *obj; + int err; + + err = liveupdate_flb_get_incoming(flb, &obj); + if (err && err != -ENODATA && err != -ENOENT) { + pr_err("liveupdate_flb_get_incoming for %s failed: %pe\n", + flb->compatible, ERR_PTR(err)); + } + } + initialized = true; +} + +void liveupdate_test_register(struct liveupdate_file_handler *fh) +{ + int err, i; + + liveupdate_test_init(); + + for (i = 0; i < TEST_NFLBS; i++) { + struct liveupdate_flb *flb = &test_flbs[i]; + + err = liveupdate_register_flb(fh, flb); + if (err) { + pr_err("Failed to register %s %pe\n", + flb->compatible, ERR_PTR(err)); + } + } + + err = liveupdate_register_flb(fh, &test_flbs[0]); + if (!err || err != -EEXIST) { + pr_err("Failed: %s should be already registered, but got err: %pe\n", + test_flbs[0].compatible, ERR_PTR(err)); + } + + pr_info("Registered %d FLBs with file handler: [%s]\n", + TEST_NFLBS, fh->compatible); +} + +void liveupdate_test_unregister(struct liveupdate_file_handler *fh) +{ + int err, i; + + for (i = 0; i < TEST_NFLBS; i++) { + struct liveupdate_flb *flb = &test_flbs[i]; + + err = liveupdate_unregister_flb(fh, flb); + if (err) { + pr_err("Failed to unregister %s %pe\n", + flb->compatible, ERR_PTR(err)); + } + } + + pr_info("Unregistered %d FLBs from file handler: [%s]\n", + TEST_NFLBS, fh->compatible); +} + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pasha Tatashin "); +MODULE_DESCRIPTION("In-kernel test for LUO mechanism"); diff --git a/lib/test_min_heap.c b/lib/tests/min_heap_kunit.c similarity index 58% rename from lib/test_min_heap.c rename to lib/tests/min_heap_kunit.c index a9c4a74d3898..9c1122661698 100644 --- a/lib/test_min_heap.c +++ b/lib/tests/min_heap_kunit.c @@ -1,60 +1,66 @@ // SPDX-License-Identifier: GPL-2.0-only -#define pr_fmt(fmt) "min_heap_test: " fmt - /* * Test cases for the min max heap. */ -#include +#include #include #include -#include #include +struct min_heap_test_case { + const char *str; + bool min_heap; +}; + +static struct min_heap_test_case min_heap_cases[] = { + { + .str = "min", + .min_heap = true, + }, + { + .str = "max", + .min_heap = false, + }, +}; + +KUNIT_ARRAY_PARAM_DESC(min_heap, min_heap_cases, str); + DEFINE_MIN_HEAP(int, min_heap_test); -static __init bool less_than(const void *lhs, const void *rhs, void __always_unused *args) +static bool less_than(const void *lhs, const void *rhs, void __always_unused *args) { return *(int *)lhs < *(int *)rhs; } -static __init bool greater_than(const void *lhs, const void *rhs, void __always_unused *args) +static bool greater_than(const void *lhs, const void *rhs, void __always_unused *args) { return *(int *)lhs > *(int *)rhs; } -static __init int pop_verify_heap(bool min_heap, - struct min_heap_test *heap, - const struct min_heap_callbacks *funcs) +static void pop_verify_heap(struct kunit *test, + bool min_heap, + struct min_heap_test *heap, + const struct min_heap_callbacks *funcs) { int *values = heap->data; - int err = 0; int last; last = values[0]; min_heap_pop_inline(heap, funcs, NULL); while (heap->nr > 0) { - if (min_heap) { - if (last > values[0]) { - pr_err("error: expected %d <= %d\n", last, - values[0]); - err++; - } - } else { - if (last < values[0]) { - pr_err("error: expected %d >= %d\n", last, - values[0]); - err++; - } - } + if (min_heap) + KUNIT_EXPECT_LE(test, last, values[0]); + else + KUNIT_EXPECT_GE(test, last, values[0]); last = values[0]; min_heap_pop_inline(heap, funcs, NULL); } - return err; } -static __init int test_heapify_all(bool min_heap) +static void test_heapify_all(struct kunit *test) { + const struct min_heap_test_case *params = test->param_value; int values[] = { 3, 1, 2, 4, 0x8000000, 0x7FFFFFF, 0, -3, -1, -2, -4, 0x8000000, 0x7FFFFFF }; struct min_heap_test heap = { @@ -63,15 +69,14 @@ static __init int test_heapify_all(bool min_heap) .size = ARRAY_SIZE(values), }; struct min_heap_callbacks funcs = { - .less = min_heap ? less_than : greater_than, + .less = params->min_heap ? less_than : greater_than, .swp = NULL, }; - int i, err; + int i; /* Test with known set of values. */ min_heapify_all_inline(&heap, &funcs, NULL); - err = pop_verify_heap(min_heap, &heap, &funcs); - + pop_verify_heap(test, params->min_heap, &heap, &funcs); /* Test with randomly generated values. */ heap.nr = ARRAY_SIZE(values); @@ -79,13 +84,12 @@ static __init int test_heapify_all(bool min_heap) values[i] = get_random_u32(); min_heapify_all_inline(&heap, &funcs, NULL); - err += pop_verify_heap(min_heap, &heap, &funcs); - - return err; + pop_verify_heap(test, params->min_heap, &heap, &funcs); } -static __init int test_heap_push(bool min_heap) +static void test_heap_push(struct kunit *test) { + const struct min_heap_test_case *params = test->param_value; const int data[] = { 3, 1, 2, 4, 0x80000000, 0x7FFFFFFF, 0, -3, -1, -2, -4, 0x80000000, 0x7FFFFFFF }; int values[ARRAY_SIZE(data)]; @@ -95,29 +99,28 @@ static __init int test_heap_push(bool min_heap) .size = ARRAY_SIZE(values), }; struct min_heap_callbacks funcs = { - .less = min_heap ? less_than : greater_than, + .less = params->min_heap ? less_than : greater_than, .swp = NULL, }; - int i, temp, err; + int i, temp; /* Test with known set of values copied from data. */ for (i = 0; i < ARRAY_SIZE(data); i++) min_heap_push_inline(&heap, &data[i], &funcs, NULL); - err = pop_verify_heap(min_heap, &heap, &funcs); + pop_verify_heap(test, params->min_heap, &heap, &funcs); /* Test with randomly generated values. */ while (heap.nr < heap.size) { temp = get_random_u32(); min_heap_push_inline(&heap, &temp, &funcs, NULL); } - err += pop_verify_heap(min_heap, &heap, &funcs); - - return err; + pop_verify_heap(test, params->min_heap, &heap, &funcs); } -static __init int test_heap_pop_push(bool min_heap) +static void test_heap_pop_push(struct kunit *test) { + const struct min_heap_test_case *params = test->param_value; const int data[] = { 3, 1, 2, 4, 0x80000000, 0x7FFFFFFF, 0, -3, -1, -2, -4, 0x80000000, 0x7FFFFFFF }; int values[ARRAY_SIZE(data)]; @@ -127,13 +130,13 @@ static __init int test_heap_pop_push(bool min_heap) .size = ARRAY_SIZE(values), }; struct min_heap_callbacks funcs = { - .less = min_heap ? less_than : greater_than, + .less = params->min_heap ? less_than : greater_than, .swp = NULL, }; - int i, temp, err; + int i, temp; /* Fill values with data to pop and replace. */ - temp = min_heap ? 0x80000000 : 0x7FFFFFFF; + temp = params->min_heap ? 0x80000000 : 0x7FFFFFFF; for (i = 0; i < ARRAY_SIZE(data); i++) min_heap_push_inline(&heap, &temp, &funcs, NULL); @@ -141,7 +144,7 @@ static __init int test_heap_pop_push(bool min_heap) for (i = 0; i < ARRAY_SIZE(data); i++) min_heap_pop_push_inline(&heap, &data[i], &funcs, NULL); - err = pop_verify_heap(min_heap, &heap, &funcs); + pop_verify_heap(test, params->min_heap, &heap, &funcs); heap.nr = 0; for (i = 0; i < ARRAY_SIZE(data); i++) @@ -152,13 +155,12 @@ static __init int test_heap_pop_push(bool min_heap) temp = get_random_u32(); min_heap_pop_push_inline(&heap, &temp, &funcs, NULL); } - err += pop_verify_heap(min_heap, &heap, &funcs); - - return err; + pop_verify_heap(test, params->min_heap, &heap, &funcs); } -static __init int test_heap_del(bool min_heap) +static void test_heap_del(struct kunit *test) { + const struct min_heap_test_case *params = test->param_value; int values[] = { 3, 1, 2, 4, 0x8000000, 0x7FFFFFF, 0, -3, -1, -2, -4, 0x8000000, 0x7FFFFFF }; struct min_heap_test heap; @@ -166,17 +168,16 @@ static __init int test_heap_del(bool min_heap) min_heap_init_inline(&heap, values, ARRAY_SIZE(values)); heap.nr = ARRAY_SIZE(values); struct min_heap_callbacks funcs = { - .less = min_heap ? less_than : greater_than, + .less = params->min_heap ? less_than : greater_than, .swp = NULL, }; - int i, err; + int i; /* Test with known set of values. */ min_heapify_all_inline(&heap, &funcs, NULL); for (i = 0; i < ARRAY_SIZE(values) / 2; i++) min_heap_del_inline(&heap, get_random_u32() % heap.nr, &funcs, NULL); - err = pop_verify_heap(min_heap, &heap, &funcs); - + pop_verify_heap(test, params->min_heap, &heap, &funcs); /* Test with randomly generated values. */ heap.nr = ARRAY_SIZE(values); @@ -186,37 +187,23 @@ static __init int test_heap_del(bool min_heap) for (i = 0; i < ARRAY_SIZE(values) / 2; i++) min_heap_del_inline(&heap, get_random_u32() % heap.nr, &funcs, NULL); - err += pop_verify_heap(min_heap, &heap, &funcs); - - return err; + pop_verify_heap(test, params->min_heap, &heap, &funcs); } -static int __init test_min_heap_init(void) -{ - int err = 0; +static struct kunit_case min_heap_test_cases[] = { + KUNIT_CASE_PARAM(test_heapify_all, min_heap_gen_params), + KUNIT_CASE_PARAM(test_heap_push, min_heap_gen_params), + KUNIT_CASE_PARAM(test_heap_pop_push, min_heap_gen_params), + KUNIT_CASE_PARAM(test_heap_del, min_heap_gen_params), + {}, +}; - err += test_heapify_all(true); - err += test_heapify_all(false); - err += test_heap_push(true); - err += test_heap_push(false); - err += test_heap_pop_push(true); - err += test_heap_pop_push(false); - err += test_heap_del(true); - err += test_heap_del(false); - if (err) { - pr_err("test failed with %d errors\n", err); - return -EINVAL; - } - pr_info("test passed\n"); - return 0; -} -module_init(test_min_heap_init); +static struct kunit_suite min_heap_test_suite = { + .name = "min_heap", + .test_cases = min_heap_test_cases, +}; -static void __exit test_min_heap_exit(void) -{ - /* do nothing */ -} -module_exit(test_min_heap_exit); +kunit_test_suite(min_heap_test_suite); MODULE_DESCRIPTION("Test cases for the min max heap"); MODULE_LICENSE("GPL"); diff --git a/lib/tests/uuid_kunit.c b/lib/tests/uuid_kunit.c new file mode 100644 index 000000000000..de71b2649dac --- /dev/null +++ b/lib/tests/uuid_kunit.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* + * Test cases for lib/uuid.c module. + */ + +#include +#include + +struct test_uuid_data { + const char *uuid; + guid_t le; + uuid_t be; +}; + +static const struct test_uuid_data test_uuid_test_data[] = { + { + .uuid = "c33f4995-3701-450e-9fbf-206a2e98e576", + .le = GUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76), + .be = UUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76), + }, + { + .uuid = "64b4371c-77c1-48f9-8221-29f054fc023b", + .le = GUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b), + .be = UUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b), + }, + { + .uuid = "0cb4ddff-a545-4401-9d06-688af53e7f84", + .le = GUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84), + .be = UUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84), + }, +}; + +static const char * const test_uuid_wrong_data[] = { + "c33f4995-3701-450e-9fbf206a2e98e576 ", /* no hyphen(s) */ + "64b4371c-77c1-48f9-8221-29f054XX023b", /* invalid character(s) */ + "0cb4ddff-a545-4401-9d06-688af53e", /* not enough data */ +}; + +static void uuid_test_guid_valid(struct kunit *test) +{ + unsigned int i; + const struct test_uuid_data *data; + guid_t le; + + for (i = 0; i < ARRAY_SIZE(test_uuid_test_data); i++) { + data = &test_uuid_test_data[i]; + KUNIT_EXPECT_EQ(test, guid_parse(data->uuid, &le), 0); + KUNIT_EXPECT_TRUE(test, guid_equal(&data->le, &le)); + } +} + +static void uuid_test_uuid_valid(struct kunit *test) +{ + unsigned int i; + const struct test_uuid_data *data; + uuid_t be; + + for (i = 0; i < ARRAY_SIZE(test_uuid_test_data); i++) { + data = &test_uuid_test_data[i]; + KUNIT_EXPECT_EQ(test, uuid_parse(data->uuid, &be), 0); + KUNIT_EXPECT_TRUE(test, uuid_equal(&data->be, &be)); + } +} + +static void uuid_test_guid_invalid(struct kunit *test) +{ + unsigned int i; + const char *uuid; + guid_t le; + + for (i = 0; i < ARRAY_SIZE(test_uuid_wrong_data); i++) { + uuid = test_uuid_wrong_data[i]; + KUNIT_EXPECT_EQ(test, guid_parse(uuid, &le), -EINVAL); + } +} + +static void uuid_test_uuid_invalid(struct kunit *test) +{ + unsigned int i; + const char *uuid; + uuid_t be; + + for (i = 0; i < ARRAY_SIZE(test_uuid_wrong_data); i++) { + uuid = test_uuid_wrong_data[i]; + KUNIT_EXPECT_EQ(test, uuid_parse(uuid, &be), -EINVAL); + } +} + +static struct kunit_case uuid_test_cases[] = { + KUNIT_CASE(uuid_test_guid_valid), + KUNIT_CASE(uuid_test_uuid_valid), + KUNIT_CASE(uuid_test_guid_invalid), + KUNIT_CASE(uuid_test_uuid_invalid), + {}, +}; + +static struct kunit_suite uuid_test_suite = { + .name = "uuid", + .test_cases = uuid_test_cases, +}; + +kunit_test_suite(uuid_test_suite); + +MODULE_AUTHOR("Andy Shevchenko "); +MODULE_DESCRIPTION("Test cases for lib/uuid.c module"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/lib/uuid.c b/lib/uuid.c index e309b4c5be3d..e8543c668dc7 100644 --- a/lib/uuid.c +++ b/lib/uuid.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/lib/vsprintf.c b/lib/vsprintf.c index a3790c43a0ab..800b8ac49f53 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/Makefile b/mm/Makefile index fd30164933a5..8ad2ab08244e 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -100,7 +100,7 @@ obj-$(CONFIG_NUMA) += memory-tiers.o obj-$(CONFIG_DEVICE_MIGRATION) += migrate_device.o obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o obj-$(CONFIG_PAGE_COUNTER) += page_counter.o -obj-$(CONFIG_LIVEUPDATE) += memfd_luo.o +obj-$(CONFIG_LIVEUPDATE_MEMFD) += memfd_luo.o obj-$(CONFIG_MEMCG_V1) += memcontrol-v1.o obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o ifdef CONFIG_SWAP diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c index 00034e37bc9f..5725a367246d 100644 --- a/mm/kfence/kfence_test.c +++ b/mm/kfence/kfence_test.c @@ -110,7 +110,7 @@ static bool report_matches(const struct expect_report *r) /* Title */ cur = expect[0]; - end = &expect[0][sizeof(expect[0]) - 1]; + end = ARRAY_END(expect[0]); switch (r->type) { case KFENCE_ERROR_OOB: cur += scnprintf(cur, end - cur, "BUG: KFENCE: out-of-bounds %s", @@ -140,7 +140,7 @@ static bool report_matches(const struct expect_report *r) /* Access information */ cur = expect[1]; - end = &expect[1][sizeof(expect[1]) - 1]; + end = ARRAY_END(expect[1]); switch (r->type) { case KFENCE_ERROR_OOB: diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 1ac56ceb29b6..fe33f2edfe07 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -510,7 +510,7 @@ static void mem_pool_free(struct kmemleak_object *object) { unsigned long flags; - if (object < mem_pool || object >= mem_pool + ARRAY_SIZE(mem_pool)) { + if (object < mem_pool || object >= ARRAY_END(mem_pool)) { kmem_cache_free(object_cache, object); return; } diff --git a/mm/kmsan/kmsan_test.c b/mm/kmsan/kmsan_test.c index 81e642db6e23..7a7fbaff7350 100644 --- a/mm/kmsan/kmsan_test.c +++ b/mm/kmsan/kmsan_test.c @@ -105,7 +105,7 @@ static bool report_matches(const struct expect_report *r) /* Title */ cur = expected_header; - end = &expected_header[sizeof(expected_header) - 1]; + end = ARRAY_END(expected_header); cur += scnprintf(cur, end - cur, "BUG: KMSAN: %s", r->error_type); diff --git a/mm/memblock.c b/mm/memblock.c index e76255e4ff36..d9ede2cfa98f 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -21,6 +21,7 @@ #ifdef CONFIG_KEXEC_HANDOVER #include #include +#include #endif /* CONFIG_KEXEC_HANDOVER */ #include @@ -2442,9 +2443,6 @@ int reserve_mem_release_by_name(const char *name) } #ifdef CONFIG_KEXEC_HANDOVER -#define MEMBLOCK_KHO_FDT "memblock" -#define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1" -#define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1" static int __init reserved_mem_preserve(void) { diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c index 0e3d972fad33..c6078cd7f7e5 100644 --- a/mm/memcontrol-v1.c +++ b/mm/memcontrol-v1.c @@ -1816,7 +1816,7 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v) mem_cgroup_flush_stats(memcg); - for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { + for (stat = stats; stat < ARRAY_END(stats); stat++) { seq_printf(m, "%s=%lu", stat->name, mem_cgroup_nr_lru_pages(memcg, stat->lru_mask, false)); @@ -1827,7 +1827,7 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v) seq_putc(m, '\n'); } - for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { + for (stat = stats; stat < ARRAY_END(stats); stat++) { seq_printf(m, "hierarchical_%s=%lu", stat->name, mem_cgroup_nr_lru_pages(memcg, stat->lru_mask, diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 8888300b65c1..d2993f187f57 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index dff66d8fb325..db9a5354f9de 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -93,7 +93,7 @@ static void update_classid_task(struct task_struct *p, u32 classid) /* Only update the leader task, when many threads in this task, * so it can avoid the useless traversal. */ - if (p != p->group_leader) + if (!thread_group_leader(p)) return; do { diff --git a/net/core/pktgen.c b/net/core/pktgen.c index d41b03fd1f63..8e185b318288 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -126,6 +126,7 @@ #include #include #include +#include #include #include #include diff --git a/net/core/utils.c b/net/core/utils.c index 5e63b0ea21f3..dd86913988f4 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index c8c3e1713c0e..51d70180e1cc 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -79,6 +79,7 @@ #include #include #include +#include #include #include #include diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 30a5a978a678..f3c6a41e4911 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 131090f31e6a..d808c0b63f30 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/net/tipc/core.h b/net/tipc/core.h index 7f3fe3401c45..9ce5f9ff6cc0 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs index 49fad6de0674..cc907fb531bc 100644 --- a/rust/kernel/task.rs +++ b/rust/kernel/task.rs @@ -204,18 +204,6 @@ impl Task { self.0.get() } - /// Returns the group leader of the given task. - pub fn group_leader(&self) -> &Task { - // SAFETY: The group leader of a task never changes after initialization, so reading this - // field is not a data race. - let ptr = unsafe { *ptr::addr_of!((*self.as_ptr()).group_leader) }; - - // SAFETY: The lifetime of the returned task reference is tied to the lifetime of `self`, - // and given that a task has a reference to its group leader, we know it must be valid for - // the lifetime of the returned task reference. - unsafe { &*ptr.cast() } - } - /// Returns the PID of the given task. pub fn pid(&self) -> Pid { // SAFETY: The pid of a task never changes after initialization, so reading this field is @@ -345,6 +333,18 @@ impl CurrentTask { // `release_task()` call. Some(unsafe { PidNamespace::from_ptr(active_ns) }) } + + /// Returns the group leader of the current task. + pub fn group_leader(&self) -> &Task { + // SAFETY: The group leader of a task never changes while the task is running, and `self` + // is the current task, which is guaranteed running. + let ptr = unsafe { (*self.as_ptr()).group_leader }; + + // SAFETY: `current->group_leader` stays valid for at least the duration in which `current` + // is running, and the signature of this function ensures that the returned `&Task` can + // only be used while `current` is still valid, thus still running. + unsafe { &*ptr.cast() } + } } // SAFETY: The type invariants guarantee that `Task` is always refcounted. diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter index 888ce286a351..db5dd18dc2d5 100755 --- a/scripts/bloat-o-meter +++ b/scripts/bloat-o-meter @@ -42,6 +42,7 @@ def getsizes(file, format): if name.startswith("__se_sys"): continue if name.startswith("__se_compat_sys"): continue if name.startswith("__addressable_"): continue + if name.startswith("__noinstr_text_start"): continue if name == "linux_banner": continue if name == "vermagic": continue # statics and some other optimizations adds random .NUMBER diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index c1140371ea5b..15f0556eeafd 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3033,6 +3033,16 @@ sub process { } } +# Check for invalid patch separator + if ($in_commit_log && + $line =~ /^---.+/) { + if (ERROR("BAD_COMMIT_SEPARATOR", + "Invalid commit separator - some tools may have problems applying this\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/-/=/g; + } + } + # Check for patch separator if ($line =~ /^---$/) { $has_patch_separator = 1; diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index 5a8cef45bacf..c0ca4eedb0fe 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -13,6 +13,7 @@ #define pr_fmt(fmt) "EVM: "fmt #include +#include #include #include #include diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index c35ea613c9f8..c6d1c7be8a3e 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c index 5beb69edd12f..36a34c54de58 100644 --- a/security/integrity/ima/ima_kexec.c +++ b/security/integrity/ima/ima_kexec.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include #include "ima.h" @@ -294,3 +296,36 @@ void __init ima_load_kexec_buffer(void) pr_debug("Error restoring the measurement list: %d\n", rc); } } + +/* + * ima_validate_range - verify a physical buffer lies in addressable RAM + * @phys: physical start address of the buffer from previous kernel + * @size: size of the buffer + * + * On success return 0. On failure returns -EINVAL so callers can skip + * restoring. + */ +int ima_validate_range(phys_addr_t phys, size_t size) +{ + unsigned long start_pfn, end_pfn; + phys_addr_t end_phys; + + if (check_add_overflow(phys, (phys_addr_t)size - 1, &end_phys)) + return -EINVAL; + + start_pfn = PHYS_PFN(phys); + end_pfn = PHYS_PFN(end_phys); + +#ifdef CONFIG_X86 + if (!pfn_range_is_mapped(start_pfn, end_pfn)) +#else + if (!page_is_ram(start_pfn) || !page_is_ram(end_pfn)) +#endif + { + pr_warn("IMA: previous kernel measurement buffer %pa (size 0x%zx) lies outside available memory\n", + &phys, size); + return -EINVAL; + } + + return 0; +} diff --git a/security/ipe/digest.c b/security/ipe/digest.c index 493716370570..5006366837ba 100644 --- a/security/ipe/digest.c +++ b/security/ipe/digest.c @@ -3,6 +3,7 @@ * Copyright (C) 2020-2024 Microsoft Corporation. All rights reserved. */ +#include #include "digest.h" /** diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 596e7a30bd3c..56b531587a1e 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include diff --git a/security/keys/trusted-keys/trusted_core.c b/security/keys/trusted-keys/trusted_core.c index 2d328de170e8..9046123d94de 100644 --- a/security/keys/trusted-keys/trusted_core.c +++ b/security/keys/trusted-keys/trusted_core.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/security/keys/trusted-keys/trusted_tpm1.c b/security/keys/trusted-keys/trusted_tpm1.c index 636acb66a4f6..c865c97aa1b4 100644 --- a/security/keys/trusted-keys/trusted_tpm1.c +++ b/security/keys/trusted-keys/trusted_tpm1.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c index 273ffbd6defe..019840006096 100644 --- a/security/loadpin/loadpin.c +++ b/security/loadpin/loadpin.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 896acad1f5f7..4d58c7ad1a23 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c index e983cd657e28..f91fe64bf4f9 100644 --- a/sound/pci/riptide/riptide.c +++ b/sound/pci/riptide/riptide.c @@ -75,6 +75,7 @@ */ #include +#include #include #include #include diff --git a/sound/usb/6fire/firmware.c b/sound/usb/6fire/firmware.c index c51abc54d2f8..cc8caec946cc 100644 --- a/sound/usb/6fire/firmware.c +++ b/sound/usb/6fire/firmware.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "firmware.h" diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 21cb3c3d1331..64796c0223be 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -194,6 +195,37 @@ static int get_family_id(int sd) #define average_ms(t, c) (t / 1000000ULL / (c ? c : 1)) #define delay_ms(t) (t / 1000000ULL) +/* + * Format timespec64 to human readable string (YYYY-MM-DD HH:MM:SS) + * Returns formatted string or "N/A" if timestamp is zero + */ +static const char *format_timespec64(struct timespec64 *ts) +{ + static char buffer[32]; + struct tm tm_info; + time_t time_sec; + + /* Check if timestamp is zero (not set) */ + if (ts->tv_sec == 0 && ts->tv_nsec == 0) + return "N/A"; + + time_sec = (time_t)ts->tv_sec; + + /* Use thread-safe localtime_r */ + if (localtime_r(&time_sec, &tm_info) == NULL) + return "N/A"; + + snprintf(buffer, sizeof(buffer), "%04d-%02d-%02dT%02d:%02d:%02d", + tm_info.tm_year + 1900, + tm_info.tm_mon + 1, + tm_info.tm_mday, + tm_info.tm_hour, + tm_info.tm_min, + tm_info.tm_sec); + + return buffer; +} + /* * Version compatibility note: * Field availability depends on taskstats version (t->version), @@ -205,13 +237,28 @@ static int get_family_id(int sd) * version >= 13 - supports WPCOPY statistics * version >= 14 - supports IRQ statistics * version >= 16 - supports *_max and *_min delay statistics + * version >= 17 - supports delay max timestamp statistics * * Always verify version before accessing version-dependent fields * to maintain backward compatibility. */ #define PRINT_CPU_DELAY(version, t) \ do { \ - if (version >= 16) { \ + if (version >= 17) { \ + printf("%-10s%15s%15s%15s%15s%15s%15s%15s%25s\n", \ + "CPU", "count", "real total", "virtual total", \ + "delay total", "delay average", "delay max", \ + "delay min", "delay max timestamp"); \ + printf(" %15llu%15llu%15llu%15llu%15.3fms%13.6fms%13.6fms%23s\n", \ + (unsigned long long)(t)->cpu_count, \ + (unsigned long long)(t)->cpu_run_real_total, \ + (unsigned long long)(t)->cpu_run_virtual_total, \ + (unsigned long long)(t)->cpu_delay_total, \ + average_ms((double)(t)->cpu_delay_total, (t)->cpu_count), \ + delay_ms((double)(t)->cpu_delay_max), \ + delay_ms((double)(t)->cpu_delay_min), \ + format_timespec64(&(t)->cpu_delay_max_ts)); \ + } else if (version >= 16) { \ printf("%-10s%15s%15s%15s%15s%15s%15s%15s\n", \ "CPU", "count", "real total", "virtual total", \ "delay total", "delay average", "delay max", "delay min"); \ @@ -257,44 +304,115 @@ static int get_family_id(int sd) } \ } while (0) +#define PRINT_FILED_DELAY_WITH_TS(name, version, t, count, total, max, min, max_ts) \ + do { \ + if (version >= 17) { \ + printf("%-10s%15s%15s%15s%15s%15s%25s\n", \ + name, "count", "delay total", "delay average", \ + "delay max", "delay min", "delay max timestamp"); \ + printf(" %15llu%15llu%15.3fms%13.6fms%13.6fms%23s\n", \ + (unsigned long long)(t)->count, \ + (unsigned long long)(t)->total, \ + average_ms((double)(t)->total, (t)->count), \ + delay_ms((double)(t)->max), \ + delay_ms((double)(t)->min), \ + format_timespec64(&(t)->max_ts)); \ + } else if (version >= 16) { \ + printf("%-10s%15s%15s%15s%15s%15s\n", \ + name, "count", "delay total", "delay average", \ + "delay max", "delay min"); \ + printf(" %15llu%15llu%15.3fms%13.6fms%13.6fms\n", \ + (unsigned long long)(t)->count, \ + (unsigned long long)(t)->total, \ + average_ms((double)(t)->total, (t)->count), \ + delay_ms((double)(t)->max), \ + delay_ms((double)(t)->min)); \ + } else { \ + printf("%-10s%15s%15s%15s\n", \ + name, "count", "delay total", "delay average"); \ + printf(" %15llu%15llu%15.3fms\n", \ + (unsigned long long)(t)->count, \ + (unsigned long long)(t)->total, \ + average_ms((double)(t)->total, (t)->count)); \ + } \ + } while (0) + static void print_delayacct(struct taskstats *t) { printf("\n\n"); PRINT_CPU_DELAY(t->version, t); - PRINT_FILED_DELAY("IO", t->version, t, - blkio_count, blkio_delay_total, - blkio_delay_max, blkio_delay_min); + /* Use new macro with timestamp support for version >= 17 */ + if (t->version >= 17) { + PRINT_FILED_DELAY_WITH_TS("IO", t->version, t, + blkio_count, blkio_delay_total, + blkio_delay_max, blkio_delay_min, blkio_delay_max_ts); - PRINT_FILED_DELAY("SWAP", t->version, t, - swapin_count, swapin_delay_total, - swapin_delay_max, swapin_delay_min); + PRINT_FILED_DELAY_WITH_TS("SWAP", t->version, t, + swapin_count, swapin_delay_total, + swapin_delay_max, swapin_delay_min, swapin_delay_max_ts); - PRINT_FILED_DELAY("RECLAIM", t->version, t, - freepages_count, freepages_delay_total, - freepages_delay_max, freepages_delay_min); + PRINT_FILED_DELAY_WITH_TS("RECLAIM", t->version, t, + freepages_count, freepages_delay_total, + freepages_delay_max, freepages_delay_min, freepages_delay_max_ts); - PRINT_FILED_DELAY("THRASHING", t->version, t, - thrashing_count, thrashing_delay_total, - thrashing_delay_max, thrashing_delay_min); + PRINT_FILED_DELAY_WITH_TS("THRASHING", t->version, t, + thrashing_count, thrashing_delay_total, + thrashing_delay_max, thrashing_delay_min, thrashing_delay_max_ts); - if (t->version >= 11) { - PRINT_FILED_DELAY("COMPACT", t->version, t, - compact_count, compact_delay_total, - compact_delay_max, compact_delay_min); - } + if (t->version >= 11) { + PRINT_FILED_DELAY_WITH_TS("COMPACT", t->version, t, + compact_count, compact_delay_total, + compact_delay_max, compact_delay_min, compact_delay_max_ts); + } - if (t->version >= 13) { - PRINT_FILED_DELAY("WPCOPY", t->version, t, - wpcopy_count, wpcopy_delay_total, - wpcopy_delay_max, wpcopy_delay_min); - } + if (t->version >= 13) { + PRINT_FILED_DELAY_WITH_TS("WPCOPY", t->version, t, + wpcopy_count, wpcopy_delay_total, + wpcopy_delay_max, wpcopy_delay_min, wpcopy_delay_max_ts); + } - if (t->version >= 14) { - PRINT_FILED_DELAY("IRQ", t->version, t, - irq_count, irq_delay_total, - irq_delay_max, irq_delay_min); + if (t->version >= 14) { + PRINT_FILED_DELAY_WITH_TS("IRQ", t->version, t, + irq_count, irq_delay_total, + irq_delay_max, irq_delay_min, irq_delay_max_ts); + } + } else { + /* Use original macro for older versions */ + PRINT_FILED_DELAY("IO", t->version, t, + blkio_count, blkio_delay_total, + blkio_delay_max, blkio_delay_min); + + PRINT_FILED_DELAY("SWAP", t->version, t, + swapin_count, swapin_delay_total, + swapin_delay_max, swapin_delay_min); + + PRINT_FILED_DELAY("RECLAIM", t->version, t, + freepages_count, freepages_delay_total, + freepages_delay_max, freepages_delay_min); + + PRINT_FILED_DELAY("THRASHING", t->version, t, + thrashing_count, thrashing_delay_total, + thrashing_delay_max, thrashing_delay_min); + + if (t->version >= 11) { + PRINT_FILED_DELAY("COMPACT", t->version, t, + compact_count, compact_delay_total, + compact_delay_max, compact_delay_min); + } + + if (t->version >= 13) { + PRINT_FILED_DELAY("WPCOPY", t->version, t, + wpcopy_count, wpcopy_delay_total, + wpcopy_delay_max, wpcopy_delay_min); + } + + if (t->version >= 14) { + PRINT_FILED_DELAY("IRQ", t->version, t, + irq_count, irq_delay_total, + irq_delay_max, irq_delay_min); + } } } diff --git a/tools/debugging/kernel-chktaint b/tools/debugging/kernel-chktaint index e7da0909d097..e1571c04afb5 100755 --- a/tools/debugging/kernel-chktaint +++ b/tools/debugging/kernel-chktaint @@ -211,9 +211,25 @@ else addout "J" echo " * fwctl's mutating debug interface was used (#19)" fi +echo "Raw taint value as int/string: $taint/'$out'" +# report on any tainted loadable modules +[ "$1" = "" ] && [ -r /sys/module/ ] && \ + cnt=`grep [A-Z] /sys/module/*/taint | wc -l` || cnt=0 + +if [ $cnt -ne 0 ]; then + echo + echo "Tainted modules:" + for dir in `ls /sys/module` ; do + if [ -r /sys/module/$dir/taint ]; then + modtnt=`cat /sys/module/$dir/taint` + [ "$modtnt" = "" ] || echo " * $dir ($modtnt)" + fi + done +fi + +echo echo "For a more detailed explanation of the various taint flags see" echo " Documentation/admin-guide/tainted-kernels.rst in the Linux kernel sources" echo " or https://kernel.org/doc/html/latest/admin-guide/tainted-kernels.html" -echo "Raw taint value as int/string: $taint/'$out'" #EOF# diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 558839e3c185..24855381290d 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -1,6 +1,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y -CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1 CONFIG_BPF=y CONFIG_BPF_EVENTS=y CONFIG_BPF_JIT=y diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index 0504c11c2de6..bb89d2dfaa2a 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -80,7 +80,7 @@ CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_WQ_WATCHDOG=y CONFIG_DETECT_HUNG_TASK=y CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y -CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=1 CONFIG_BOOTPARAM_HUNG_TASK_PANIC=1 CONFIG_PANIC_TIMEOUT=-1 CONFIG_STACKTRACE=y