btrfs: switch to library APIs for checksums

Make btrfs use the library APIs instead of crypto_shash, for all
checksum computations.  This has many benefits:

- Allows future checksum types, e.g. XXH3 or CRC64, to be more easily
  supported.  Only a library API will be needed, not crypto_shash too.

- Eliminates the overhead of the generic crypto layer, including an
  indirect call for every function call and other API overhead.  A
  microbenchmark of btrfs_check_read_bio() with crc32c checksums shows a
  speedup from 658 cycles to 608 cycles per 4096-byte block.

- Decreases the stack usage of btrfs by reducing the size of checksum
  contexts from 384 bytes to 240 bytes, and by eliminating the need for
  some functions to declare a checksum context at all.

- Increases reliability.  The library functions always succeed and
  return void.  In contrast, crypto_shash can fail and return errors.
  Also, the library functions are guaranteed to be available when btrfs
  is loaded; there's no longer any need to use module softdeps to try to
  work around the crypto modules sometimes not being loaded.

- Fixes a bug where blake2b checksums didn't work on kernels booted with
  fips=1.  Since btrfs checksums are for integrity only, it's fine for
  them to use non-FIPS-approved algorithms.

Note that with having to handle 4 algorithms instead of just 1-2, this
commit does result in a slightly positive diffstat.  That being said,
this wouldn't have been the case if btrfs had actually checked for
errors from crypto_shash, which technically it should have been doing.

Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Neal Gompa <neal@gompa.dev>
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Eric Biggers 2025-12-04 23:04:54 -08:00 committed by David Sterba
parent b39b26e017
commit fe11ac191c
10 changed files with 138 additions and 104 deletions

View file

@ -4,11 +4,8 @@ config BTRFS_FS
tristate "Btrfs filesystem support"
select BLK_CGROUP_PUNT_BIO
select CRC32
select CRYPTO
select CRYPTO_CRC32C
select CRYPTO_XXHASH
select CRYPTO_SHA256
select CRYPTO_BLAKE2B
select CRYPTO_LIB_BLAKE2B
select CRYPTO_LIB_SHA256
select ZLIB_INFLATE
select ZLIB_DEFLATE
select LZO_COMPRESS
@ -18,6 +15,7 @@ config BTRFS_FS
select FS_IOMAP
select RAID6_PQ
select XOR_BLOCKS
select XXHASH
depends on PAGE_SIZE_LESS_THAN_256KB
help

View file

@ -21,7 +21,6 @@
#include <linux/sched/mm.h>
#include <linux/log2.h>
#include <linux/shrinker.h>
#include <crypto/hash.h>
#include "misc.h"
#include "ctree.h"
#include "fs.h"

View file

@ -18,7 +18,6 @@
#include <linux/crc32c.h>
#include <linux/sched/mm.h>
#include <linux/unaligned.h>
#include <crypto/hash.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@ -62,12 +61,6 @@
static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info);
static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info);
static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
{
if (fs_info->csum_shash)
crypto_free_shash(fs_info->csum_shash);
}
/*
* Compute the csum of a btree block and store the result to provided buffer.
*/
@ -76,12 +69,11 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result)
struct btrfs_fs_info *fs_info = buf->fs_info;
int num_pages;
u32 first_page_part;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct btrfs_csum_ctx csum;
char *kaddr;
int i;
shash->tfm = fs_info->csum_shash;
crypto_shash_init(shash);
btrfs_csum_init(&csum, fs_info->csum_type);
if (buf->addr) {
/* Pages are contiguous, handle them as a big one. */
@ -94,21 +86,21 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result)
num_pages = num_extent_pages(buf);
}
crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
first_page_part - BTRFS_CSUM_SIZE);
btrfs_csum_update(&csum, kaddr + BTRFS_CSUM_SIZE,
first_page_part - BTRFS_CSUM_SIZE);
/*
* Multiple single-page folios case would reach here.
*
* nodesize <= PAGE_SIZE and large folio all handled by above
* crypto_shash_update() already.
* btrfs_csum_update() already.
*/
for (i = 1; i < num_pages && INLINE_EXTENT_BUFFER_PAGES > 1; i++) {
kaddr = folio_address(buf->folios[i]);
crypto_shash_update(shash, kaddr, PAGE_SIZE);
btrfs_csum_update(&csum, kaddr, PAGE_SIZE);
}
memset(result, 0, BTRFS_CSUM_SIZE);
crypto_shash_final(shash, result);
btrfs_csum_final(&csum, result);
}
/*
@ -160,18 +152,15 @@ static bool btrfs_supported_super_csum(u16 csum_type)
int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
const struct btrfs_super_block *disk_sb)
{
char result[BTRFS_CSUM_SIZE];
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
shash->tfm = fs_info->csum_shash;
u8 result[BTRFS_CSUM_SIZE];
/*
* The super_block structure does not span the whole
* BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is
* filled with zeros and is included in the checksum.
*/
crypto_shash_digest(shash, (const u8 *)disk_sb + BTRFS_CSUM_SIZE,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, result);
btrfs_csum(fs_info->csum_type, (const u8 *)disk_sb + BTRFS_CSUM_SIZE,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, result);
if (memcmp(disk_sb->csum, result, fs_info->csum_size))
return 1;
@ -1229,7 +1218,6 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
ASSERT(percpu_counter_sum_positive(em_counter) == 0);
percpu_counter_destroy(em_counter);
percpu_counter_destroy(&fs_info->dev_replace.bio_counter);
btrfs_free_csum_hash(fs_info);
btrfs_free_stripe_hash_table(fs_info);
btrfs_free_ref_cache(fs_info);
kfree(fs_info->balance_ctl);
@ -1983,21 +1971,8 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
return 0;
}
static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
static void btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
{
struct crypto_shash *csum_shash;
const char *csum_driver = btrfs_super_csum_driver(csum_type);
csum_shash = crypto_alloc_shash(csum_driver, 0, 0);
if (IS_ERR(csum_shash)) {
btrfs_err(fs_info, "error allocating %s hash for checksum",
csum_driver);
return PTR_ERR(csum_shash);
}
fs_info->csum_shash = csum_shash;
/* Check if the checksum implementation is a fast accelerated one. */
switch (csum_type) {
case BTRFS_CSUM_TYPE_CRC32:
@ -2011,10 +1986,8 @@ static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
break;
}
btrfs_info(fs_info, "using %s (%s) checksum algorithm",
btrfs_super_csum_name(csum_type),
crypto_shash_driver_name(csum_shash));
return 0;
btrfs_info(fs_info, "using %s checksum algorithm",
btrfs_super_csum_name(csum_type));
}
static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
@ -3302,12 +3275,9 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
}
fs_info->csum_size = btrfs_super_csum_size(disk_super);
fs_info->csum_type = csum_type;
ret = btrfs_init_csum_hash(fs_info, csum_type);
if (ret) {
btrfs_release_disk_super(disk_super);
goto fail_alloc;
}
btrfs_init_csum_hash(fs_info, csum_type);
/*
* We want to check superblock checksum, the type is stored inside.
@ -3709,7 +3679,6 @@ static int write_dev_supers(struct btrfs_device *device,
{
struct btrfs_fs_info *fs_info = device->fs_info;
struct address_space *mapping = device->bdev->bd_mapping;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
int i;
int ret;
u64 bytenr, bytenr_orig;
@ -3719,8 +3688,6 @@ static int write_dev_supers(struct btrfs_device *device,
if (max_mirrors == 0)
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
shash->tfm = fs_info->csum_shash;
for (i = 0; i < max_mirrors; i++) {
struct folio *folio;
struct bio *bio;
@ -3744,9 +3711,8 @@ static int write_dev_supers(struct btrfs_device *device,
btrfs_set_super_bytenr(sb, bytenr_orig);
crypto_shash_digest(shash, (const char *)sb + BTRFS_CSUM_SIZE,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
sb->csum);
btrfs_csum(fs_info->csum_type, (const u8 *)sb + BTRFS_CSUM_SIZE,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, sb->csum);
folio = __filemap_get_folio(mapping, bytenr >> PAGE_SHIFT,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT,

View file

@ -8,7 +8,6 @@
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/sched/mm.h>
#include <crypto/hash.h>
#include "messages.h"
#include "ctree.h"
#include "disk-io.h"
@ -769,7 +768,6 @@ static void csum_one_bio(struct btrfs_bio *bbio, struct bvec_iter *src)
{
struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct bio *bio = &bbio->bio;
struct btrfs_ordered_sum *sums = bbio->sums;
struct bvec_iter iter = *src;
@ -781,8 +779,6 @@ static void csum_one_bio(struct btrfs_bio *bbio, struct bvec_iter *src)
u32 offset = 0;
int index = 0;
shash->tfm = fs_info->csum_shash;
btrfs_bio_for_each_block(paddr, bio, &iter, step) {
paddrs[(offset / step) % nr_steps] = paddr;
offset += step;

View file

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/crc32.h>
#include "messages.h"
#include "fs.h"
#include "accessors.h"
@ -8,13 +9,11 @@
static const struct btrfs_csums {
u16 size;
const char name[10];
const char driver[12];
} btrfs_csums[] = {
[BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
[BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
[BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
[BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
.driver = "blake2b-256" },
[BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b" },
};
/* This exists for btrfs-progs usages. */
@ -37,23 +36,96 @@ const char *btrfs_super_csum_name(u16 csum_type)
return btrfs_csums[csum_type].name;
}
/*
* Return driver name if defined, otherwise the name that's also a valid driver
* name.
*/
const char *btrfs_super_csum_driver(u16 csum_type)
{
/* csum type is validated at mount time */
return btrfs_csums[csum_type].driver[0] ?
btrfs_csums[csum_type].driver :
btrfs_csums[csum_type].name;
}
size_t __attribute_const__ btrfs_get_num_csums(void)
{
return ARRAY_SIZE(btrfs_csums);
}
void btrfs_csum(u16 csum_type, const u8 *data, size_t len, u8 *out)
{
switch (csum_type) {
case BTRFS_CSUM_TYPE_CRC32:
put_unaligned_le32(~crc32c(~0, data, len), out);
break;
case BTRFS_CSUM_TYPE_XXHASH:
put_unaligned_le64(xxh64(data, len, 0), out);
break;
case BTRFS_CSUM_TYPE_SHA256:
sha256(data, len, out);
break;
case BTRFS_CSUM_TYPE_BLAKE2:
blake2b(NULL, 0, data, len, out, 32);
break;
default:
/* Checksum type is validated at mount time. */
BUG();
}
}
void btrfs_csum_init(struct btrfs_csum_ctx *ctx, u16 csum_type)
{
ctx->csum_type = csum_type;
switch (ctx->csum_type) {
case BTRFS_CSUM_TYPE_CRC32:
ctx->crc32 = ~0;
break;
case BTRFS_CSUM_TYPE_XXHASH:
xxh64_reset(&ctx->xxh64, 0);
break;
case BTRFS_CSUM_TYPE_SHA256:
sha256_init(&ctx->sha256);
break;
case BTRFS_CSUM_TYPE_BLAKE2:
blake2b_init(&ctx->blake2b, 32);
break;
default:
/* Checksume type is validated at mount time. */
BUG();
}
}
void btrfs_csum_update(struct btrfs_csum_ctx *ctx, const u8 *data, size_t len)
{
switch (ctx->csum_type) {
case BTRFS_CSUM_TYPE_CRC32:
ctx->crc32 = crc32c(ctx->crc32, data, len);
break;
case BTRFS_CSUM_TYPE_XXHASH:
xxh64_update(&ctx->xxh64, data, len);
break;
case BTRFS_CSUM_TYPE_SHA256:
sha256_update(&ctx->sha256, data, len);
break;
case BTRFS_CSUM_TYPE_BLAKE2:
blake2b_update(&ctx->blake2b, data, len);
break;
default:
/* Checksum type is validated at mount time. */
BUG();
}
}
void btrfs_csum_final(struct btrfs_csum_ctx *ctx, u8 *out)
{
switch (ctx->csum_type) {
case BTRFS_CSUM_TYPE_CRC32:
put_unaligned_le32(~ctx->crc32, out);
break;
case BTRFS_CSUM_TYPE_XXHASH:
put_unaligned_le64(xxh64_digest(&ctx->xxh64), out);
break;
case BTRFS_CSUM_TYPE_SHA256:
sha256_final(&ctx->sha256, out);
break;
case BTRFS_CSUM_TYPE_BLAKE2:
blake2b_final(&ctx->blake2b, out);
break;
default:
/* Checksum type is validated at mount time. */
BUG();
}
}
/*
* We support the following block sizes for all systems:
*

View file

@ -3,6 +3,8 @@
#ifndef BTRFS_FS_H
#define BTRFS_FS_H
#include <crypto/blake2b.h>
#include <crypto/sha2.h>
#include <linux/blkdev.h>
#include <linux/sizes.h>
#include <linux/time64.h>
@ -24,6 +26,7 @@
#include <linux/wait_bit.h>
#include <linux/sched.h>
#include <linux/rbtree.h>
#include <linux/xxhash.h>
#include <uapi/linux/btrfs.h>
#include <uapi/linux/btrfs_tree.h>
#include "extent-io-tree.h"
@ -35,7 +38,6 @@ struct inode;
struct super_block;
struct kobject;
struct reloc_control;
struct crypto_shash;
struct ulist;
struct btrfs_device;
struct btrfs_block_group;
@ -850,9 +852,10 @@ struct btrfs_fs_info {
u32 sectorsize_bits;
u32 block_min_order;
u32 block_max_order;
u32 stripesize;
u32 csum_size;
u32 csums_per_leaf;
u32 stripesize;
u32 csum_type;
/*
* Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular
@ -864,8 +867,6 @@ struct btrfs_fs_info {
spinlock_t swapfile_pins_lock;
struct rb_root swapfile_pins;
struct crypto_shash *csum_shash;
/* Type of exclusive operation running, protected by super_lock */
enum btrfs_exclusive_operation exclusive_operation;
@ -1057,8 +1058,20 @@ int btrfs_check_ioctl_vol_args_path(const struct btrfs_ioctl_vol_args *vol_args)
u16 btrfs_csum_type_size(u16 type);
int btrfs_super_csum_size(const struct btrfs_super_block *s);
const char *btrfs_super_csum_name(u16 csum_type);
const char *btrfs_super_csum_driver(u16 csum_type);
size_t __attribute_const__ btrfs_get_num_csums(void);
struct btrfs_csum_ctx {
u16 csum_type;
union {
u32 crc32;
struct xxh64_state xxh64;
struct sha256_ctx sha256;
struct blake2b_ctx blake2b;
};
};
void btrfs_csum(u16 csum_type, const u8 *data, size_t len, u8 *out);
void btrfs_csum_init(struct btrfs_csum_ctx *ctx, u16 csum_type);
void btrfs_csum_update(struct btrfs_csum_ctx *ctx, const u8 *data, size_t len);
void btrfs_csum_final(struct btrfs_csum_ctx *ctx, u8 *out);
static inline bool btrfs_is_empty_uuid(const u8 *uuid)
{

View file

@ -3,7 +3,6 @@
* Copyright (C) 2007 Oracle. All rights reserved.
*/
#include <crypto/hash.h>
#include <linux/kernel.h>
#include <linux/bio.h>
#include <linux/blk-cgroup.h>
@ -3417,20 +3416,19 @@ void btrfs_calculate_block_csum_pages(struct btrfs_fs_info *fs_info,
const u32 blocksize = fs_info->sectorsize;
const u32 step = min(blocksize, PAGE_SIZE);
const u32 nr_steps = blocksize / step;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct btrfs_csum_ctx csum;
shash->tfm = fs_info->csum_shash;
crypto_shash_init(shash);
btrfs_csum_init(&csum, fs_info->csum_type);
for (int i = 0; i < nr_steps; i++) {
const phys_addr_t paddr = paddrs[i];
void *kaddr;
ASSERT(offset_in_page(paddr) + step <= PAGE_SIZE);
kaddr = kmap_local_page(phys_to_page(paddr)) + offset_in_page(paddr);
crypto_shash_update(shash, kaddr, step);
btrfs_csum_update(&csum, kaddr, step);
kunmap_local(kaddr);
}
crypto_shash_final(shash, dest);
btrfs_csum_final(&csum, dest);
}
/*

View file

@ -6,7 +6,6 @@
#include <linux/blkdev.h>
#include <linux/ratelimit.h>
#include <linux/sched/mm.h>
#include <crypto/hash.h>
#include "ctree.h"
#include "discard.h"
#include "volumes.h"
@ -718,7 +717,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
const u64 logical = stripe->logical + (sector_nr << fs_info->sectorsize_bits);
void *first_kaddr = scrub_stripe_get_kaddr(stripe, sector_nr);
struct btrfs_header *header = first_kaddr;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct btrfs_csum_ctx csum;
u8 on_disk_csum[BTRFS_CSUM_SIZE];
u8 calculated_csum[BTRFS_CSUM_SIZE];
@ -760,17 +759,16 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
}
/* Now check tree block csum. */
shash->tfm = fs_info->csum_shash;
crypto_shash_init(shash);
crypto_shash_update(shash, first_kaddr + BTRFS_CSUM_SIZE,
fs_info->sectorsize - BTRFS_CSUM_SIZE);
btrfs_csum_init(&csum, fs_info->csum_type);
btrfs_csum_update(&csum, first_kaddr + BTRFS_CSUM_SIZE,
fs_info->sectorsize - BTRFS_CSUM_SIZE);
for (int i = sector_nr + 1; i < sector_nr + sectors_per_tree; i++) {
crypto_shash_update(shash, scrub_stripe_get_kaddr(stripe, i),
fs_info->sectorsize);
btrfs_csum_update(&csum, scrub_stripe_get_kaddr(stripe, i),
fs_info->sectorsize);
}
crypto_shash_final(shash, calculated_csum);
btrfs_csum_final(&csum, calculated_csum);
if (memcmp(calculated_csum, on_disk_csum, fs_info->csum_size) != 0) {
scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);

View file

@ -2700,7 +2700,3 @@ module_exit(exit_btrfs_fs)
MODULE_DESCRIPTION("B-Tree File System (BTRFS)");
MODULE_LICENSE("GPL");
MODULE_SOFTDEP("pre: crc32c");
MODULE_SOFTDEP("pre: xxhash64");
MODULE_SOFTDEP("pre: sha256");
MODULE_SOFTDEP("pre: blake2b-256");

View file

@ -11,7 +11,6 @@
#include <linux/bug.h>
#include <linux/list.h>
#include <linux/string_choices.h>
#include <crypto/hash.h>
#include "messages.h"
#include "ctree.h"
#include "discard.h"
@ -1253,10 +1252,9 @@ static ssize_t btrfs_checksum_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
const char *csum_name = btrfs_super_csum_name(csum_type);
return sysfs_emit(buf, "%s (%s)\n",
btrfs_super_csum_name(csum_type),
crypto_shash_driver_name(fs_info->csum_shash));
return sysfs_emit(buf, "%s (%s-lib)\n", csum_name, csum_name);
}
BTRFS_ATTR(, checksum, btrfs_checksum_show);