mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 01:04:41 +01:00
Make btrfs use the library APIs instead of crypto_shash, for all checksum computations. This has many benefits: - Allows future checksum types, e.g. XXH3 or CRC64, to be more easily supported. Only a library API will be needed, not crypto_shash too. - Eliminates the overhead of the generic crypto layer, including an indirect call for every function call and other API overhead. A microbenchmark of btrfs_check_read_bio() with crc32c checksums shows a speedup from 658 cycles to 608 cycles per 4096-byte block. - Decreases the stack usage of btrfs by reducing the size of checksum contexts from 384 bytes to 240 bytes, and by eliminating the need for some functions to declare a checksum context at all. - Increases reliability. The library functions always succeed and return void. In contrast, crypto_shash can fail and return errors. Also, the library functions are guaranteed to be available when btrfs is loaded; there's no longer any need to use module softdeps to try to work around the crypto modules sometimes not being loaded. - Fixes a bug where blake2b checksums didn't work on kernels booted with fips=1. Since btrfs checksums are for integrity only, it's fine for them to use non-FIPS-approved algorithms. Note that with having to handle 4 algorithms instead of just 1-2, this commit does result in a slightly positive diffstat. That being said, this wouldn't have been the case if btrfs had actually checked for errors from crypto_shash, which technically it should have been doing. Reviewed-by: Ard Biesheuvel <ardb@kernel.org> Reviewed-by: Neal Gompa <neal@gompa.dev> Signed-off-by: Eric Biggers <ebiggers@kernel.org> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
347 lines
9.3 KiB
C
347 lines
9.3 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
#include <linux/crc32.h>
|
|
#include "messages.h"
|
|
#include "fs.h"
|
|
#include "accessors.h"
|
|
#include "volumes.h"
|
|
|
|
static const struct btrfs_csums {
|
|
u16 size;
|
|
const char name[10];
|
|
} btrfs_csums[] = {
|
|
[BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
|
|
[BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
|
|
[BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
|
|
[BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b" },
|
|
};
|
|
|
|
/* This exists for btrfs-progs usages. */
|
|
u16 btrfs_csum_type_size(u16 type)
|
|
{
|
|
return btrfs_csums[type].size;
|
|
}
|
|
|
|
int btrfs_super_csum_size(const struct btrfs_super_block *s)
|
|
{
|
|
u16 t = btrfs_super_csum_type(s);
|
|
|
|
/* csum type is validated at mount time. */
|
|
return btrfs_csum_type_size(t);
|
|
}
|
|
|
|
const char *btrfs_super_csum_name(u16 csum_type)
|
|
{
|
|
/* csum type is validated at mount time. */
|
|
return btrfs_csums[csum_type].name;
|
|
}
|
|
|
|
size_t __attribute_const__ btrfs_get_num_csums(void)
|
|
{
|
|
return ARRAY_SIZE(btrfs_csums);
|
|
}
|
|
|
|
void btrfs_csum(u16 csum_type, const u8 *data, size_t len, u8 *out)
|
|
{
|
|
switch (csum_type) {
|
|
case BTRFS_CSUM_TYPE_CRC32:
|
|
put_unaligned_le32(~crc32c(~0, data, len), out);
|
|
break;
|
|
case BTRFS_CSUM_TYPE_XXHASH:
|
|
put_unaligned_le64(xxh64(data, len, 0), out);
|
|
break;
|
|
case BTRFS_CSUM_TYPE_SHA256:
|
|
sha256(data, len, out);
|
|
break;
|
|
case BTRFS_CSUM_TYPE_BLAKE2:
|
|
blake2b(NULL, 0, data, len, out, 32);
|
|
break;
|
|
default:
|
|
/* Checksum type is validated at mount time. */
|
|
BUG();
|
|
}
|
|
}
|
|
|
|
void btrfs_csum_init(struct btrfs_csum_ctx *ctx, u16 csum_type)
|
|
{
|
|
ctx->csum_type = csum_type;
|
|
switch (ctx->csum_type) {
|
|
case BTRFS_CSUM_TYPE_CRC32:
|
|
ctx->crc32 = ~0;
|
|
break;
|
|
case BTRFS_CSUM_TYPE_XXHASH:
|
|
xxh64_reset(&ctx->xxh64, 0);
|
|
break;
|
|
case BTRFS_CSUM_TYPE_SHA256:
|
|
sha256_init(&ctx->sha256);
|
|
break;
|
|
case BTRFS_CSUM_TYPE_BLAKE2:
|
|
blake2b_init(&ctx->blake2b, 32);
|
|
break;
|
|
default:
|
|
/* Checksume type is validated at mount time. */
|
|
BUG();
|
|
}
|
|
}
|
|
|
|
void btrfs_csum_update(struct btrfs_csum_ctx *ctx, const u8 *data, size_t len)
|
|
{
|
|
switch (ctx->csum_type) {
|
|
case BTRFS_CSUM_TYPE_CRC32:
|
|
ctx->crc32 = crc32c(ctx->crc32, data, len);
|
|
break;
|
|
case BTRFS_CSUM_TYPE_XXHASH:
|
|
xxh64_update(&ctx->xxh64, data, len);
|
|
break;
|
|
case BTRFS_CSUM_TYPE_SHA256:
|
|
sha256_update(&ctx->sha256, data, len);
|
|
break;
|
|
case BTRFS_CSUM_TYPE_BLAKE2:
|
|
blake2b_update(&ctx->blake2b, data, len);
|
|
break;
|
|
default:
|
|
/* Checksum type is validated at mount time. */
|
|
BUG();
|
|
}
|
|
}
|
|
|
|
void btrfs_csum_final(struct btrfs_csum_ctx *ctx, u8 *out)
|
|
{
|
|
switch (ctx->csum_type) {
|
|
case BTRFS_CSUM_TYPE_CRC32:
|
|
put_unaligned_le32(~ctx->crc32, out);
|
|
break;
|
|
case BTRFS_CSUM_TYPE_XXHASH:
|
|
put_unaligned_le64(xxh64_digest(&ctx->xxh64), out);
|
|
break;
|
|
case BTRFS_CSUM_TYPE_SHA256:
|
|
sha256_final(&ctx->sha256, out);
|
|
break;
|
|
case BTRFS_CSUM_TYPE_BLAKE2:
|
|
blake2b_final(&ctx->blake2b, out);
|
|
break;
|
|
default:
|
|
/* Checksum type is validated at mount time. */
|
|
BUG();
|
|
}
|
|
}
|
|
|
|
/*
|
|
* We support the following block sizes for all systems:
|
|
*
|
|
* - 4K
|
|
* This is the most common block size. For PAGE SIZE > 4K cases the subpage
|
|
* mode is used.
|
|
*
|
|
* - PAGE_SIZE
|
|
* The straightforward block size to support.
|
|
*
|
|
* And extra support for the following block sizes based on the kernel config:
|
|
*
|
|
* - MIN_BLOCKSIZE
|
|
* This is either 4K (regular builds) or 2K (debug builds)
|
|
* This allows testing subpage routines on x86_64.
|
|
*/
|
|
bool __attribute_const__ btrfs_supported_blocksize(u32 blocksize)
|
|
{
|
|
/* @blocksize should be validated first. */
|
|
ASSERT(is_power_of_2(blocksize) && blocksize >= BTRFS_MIN_BLOCKSIZE &&
|
|
blocksize <= BTRFS_MAX_BLOCKSIZE);
|
|
|
|
if (blocksize == PAGE_SIZE || blocksize == SZ_4K || blocksize == BTRFS_MIN_BLOCKSIZE)
|
|
return true;
|
|
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
|
/*
|
|
* For bs > ps support it's done by specifying a minimal folio order
|
|
* for filemap, thus implying large data folios.
|
|
* For HIGHMEM systems, we can not always access the content of a (large)
|
|
* folio in one go, but go through them page by page.
|
|
*
|
|
* A lot of features don't implement a proper PAGE sized loop for large
|
|
* folios, this includes:
|
|
*
|
|
* - compression
|
|
* - verity
|
|
* - encoded write
|
|
*
|
|
* Considering HIGHMEM is such a pain to deal with and it's going
|
|
* to be deprecated eventually, just reject HIGHMEM && bs > ps cases.
|
|
*/
|
|
if (IS_ENABLED(CONFIG_HIGHMEM) && blocksize > PAGE_SIZE)
|
|
return false;
|
|
return true;
|
|
#endif
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Start exclusive operation @type, return true on success.
|
|
*/
|
|
bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
|
|
enum btrfs_exclusive_operation type)
|
|
{
|
|
bool ret = false;
|
|
|
|
spin_lock(&fs_info->super_lock);
|
|
if (fs_info->exclusive_operation == BTRFS_EXCLOP_NONE) {
|
|
fs_info->exclusive_operation = type;
|
|
ret = true;
|
|
}
|
|
spin_unlock(&fs_info->super_lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Conditionally allow to enter the exclusive operation in case it's compatible
|
|
* with the running one. This must be paired with btrfs_exclop_start_unlock()
|
|
* and btrfs_exclop_finish().
|
|
*
|
|
* Compatibility:
|
|
* - the same type is already running
|
|
* - when trying to add a device and balance has been paused
|
|
* - not BTRFS_EXCLOP_NONE - this is intentionally incompatible and the caller
|
|
* must check the condition first that would allow none -> @type
|
|
*/
|
|
bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
|
|
enum btrfs_exclusive_operation type)
|
|
{
|
|
spin_lock(&fs_info->super_lock);
|
|
if (fs_info->exclusive_operation == type ||
|
|
(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED &&
|
|
type == BTRFS_EXCLOP_DEV_ADD))
|
|
return true;
|
|
|
|
spin_unlock(&fs_info->super_lock);
|
|
return false;
|
|
}
|
|
|
|
void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info)
|
|
{
|
|
spin_unlock(&fs_info->super_lock);
|
|
}
|
|
|
|
void btrfs_exclop_finish(struct btrfs_fs_info *fs_info)
|
|
{
|
|
spin_lock(&fs_info->super_lock);
|
|
WRITE_ONCE(fs_info->exclusive_operation, BTRFS_EXCLOP_NONE);
|
|
spin_unlock(&fs_info->super_lock);
|
|
sysfs_notify(&fs_info->fs_devices->fsid_kobj, NULL, "exclusive_operation");
|
|
}
|
|
|
|
void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
|
|
enum btrfs_exclusive_operation op)
|
|
{
|
|
switch (op) {
|
|
case BTRFS_EXCLOP_BALANCE_PAUSED:
|
|
spin_lock(&fs_info->super_lock);
|
|
ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE ||
|
|
fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD ||
|
|
fs_info->exclusive_operation == BTRFS_EXCLOP_NONE ||
|
|
fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
|
|
fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED;
|
|
spin_unlock(&fs_info->super_lock);
|
|
break;
|
|
case BTRFS_EXCLOP_BALANCE:
|
|
spin_lock(&fs_info->super_lock);
|
|
ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
|
|
fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE;
|
|
spin_unlock(&fs_info->super_lock);
|
|
break;
|
|
default:
|
|
btrfs_warn(fs_info,
|
|
"invalid exclop balance operation %d requested", op);
|
|
}
|
|
}
|
|
|
|
void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag,
|
|
const char *name)
|
|
{
|
|
struct btrfs_super_block *disk_super;
|
|
u64 features;
|
|
|
|
disk_super = fs_info->super_copy;
|
|
features = btrfs_super_incompat_flags(disk_super);
|
|
if (!(features & flag)) {
|
|
spin_lock(&fs_info->super_lock);
|
|
features = btrfs_super_incompat_flags(disk_super);
|
|
if (!(features & flag)) {
|
|
features |= flag;
|
|
btrfs_set_super_incompat_flags(disk_super, features);
|
|
btrfs_info(fs_info,
|
|
"setting incompat feature flag for %s (0x%llx)",
|
|
name, flag);
|
|
}
|
|
spin_unlock(&fs_info->super_lock);
|
|
set_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags);
|
|
}
|
|
}
|
|
|
|
void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag,
|
|
const char *name)
|
|
{
|
|
struct btrfs_super_block *disk_super;
|
|
u64 features;
|
|
|
|
disk_super = fs_info->super_copy;
|
|
features = btrfs_super_incompat_flags(disk_super);
|
|
if (features & flag) {
|
|
spin_lock(&fs_info->super_lock);
|
|
features = btrfs_super_incompat_flags(disk_super);
|
|
if (features & flag) {
|
|
features &= ~flag;
|
|
btrfs_set_super_incompat_flags(disk_super, features);
|
|
btrfs_info(fs_info,
|
|
"clearing incompat feature flag for %s (0x%llx)",
|
|
name, flag);
|
|
}
|
|
spin_unlock(&fs_info->super_lock);
|
|
set_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags);
|
|
}
|
|
}
|
|
|
|
void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag,
|
|
const char *name)
|
|
{
|
|
struct btrfs_super_block *disk_super;
|
|
u64 features;
|
|
|
|
disk_super = fs_info->super_copy;
|
|
features = btrfs_super_compat_ro_flags(disk_super);
|
|
if (!(features & flag)) {
|
|
spin_lock(&fs_info->super_lock);
|
|
features = btrfs_super_compat_ro_flags(disk_super);
|
|
if (!(features & flag)) {
|
|
features |= flag;
|
|
btrfs_set_super_compat_ro_flags(disk_super, features);
|
|
btrfs_info(fs_info,
|
|
"setting compat-ro feature flag for %s (0x%llx)",
|
|
name, flag);
|
|
}
|
|
spin_unlock(&fs_info->super_lock);
|
|
set_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags);
|
|
}
|
|
}
|
|
|
|
void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag,
|
|
const char *name)
|
|
{
|
|
struct btrfs_super_block *disk_super;
|
|
u64 features;
|
|
|
|
disk_super = fs_info->super_copy;
|
|
features = btrfs_super_compat_ro_flags(disk_super);
|
|
if (features & flag) {
|
|
spin_lock(&fs_info->super_lock);
|
|
features = btrfs_super_compat_ro_flags(disk_super);
|
|
if (features & flag) {
|
|
features &= ~flag;
|
|
btrfs_set_super_compat_ro_flags(disk_super, features);
|
|
btrfs_info(fs_info,
|
|
"clearing compat-ro feature flag for %s (0x%llx)",
|
|
name, flag);
|
|
}
|
|
spin_unlock(&fs_info->super_lock);
|
|
set_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags);
|
|
}
|
|
}
|