mirror of
https://github.com/torvalds/linux.git
synced 2026-03-07 23:04:33 +01:00
Changes since last update:
- Support inode page cache sharing among filesystems
- Formally separate optional encoded (aka compressed) inode layouts
(and the implementations) from the EROFS core on-disk aligned plain
format for future zero-trust security usage
- Improve performance by caching the fact that an inode does not have
a POSIX ACL
- Improve LZ4 decompression error reporting
- Enable LZMA by default and promote DEFLATE and Zstandard algorithms
out of EXPERIMENTAL status
- Switch to inode_set_cached_link() to cache symlink lengths
- random bugfixes and minor cleanups
-----BEGIN PGP SIGNATURE-----
iQJFBAABCgAvFiEEQ0A6bDUS9Y+83NPFUXZn5Zlu5qoFAmmJWA8RHHhpYW5nQGtl
cm5lbC5vcmcACgkQUXZn5Zlu5qpKRhAAmmkeLT5vwxpdk9l5uAzz9rvpJgZzorl2
grD6jn0whzSi3BY7MiSDwcY2wl5xPuZjHRnqrcwQzsxua/Y6YJe9mIZTKhviYzuD
6A90OxO4cIseXlGL+AK+OgiFSUBvC+0AttE9napOxQmkTrBkYPDYX2IoMOxr+1DA
vtsPAWmmYOeyjV+2nYT3qVYKk5LaHu+wjXsH6U7RDi1Cut3xu3FIRqtWKatdfhWs
0NSRVc9IcWyBvMRPjGwlEhGY+XW+tXa62NWNTDDTyXCMVVx4TKXMueJkHvo+ysYg
i7uypDAI+JfnasrlsEuRjjvvqg+bKm+6wd1y9FIU8AefPf2kp1P5QmqmhhPv0PyI
WMm6ZwQX4DTZPo6P4goxw4/SvxY8UMPHYb8/APCI7NfzG8DHCXH/OxW5yamCxL/a
6ZREjpkBtMH4lT9adCNsuKK5HQepsECCXr1BWHQDWarFFoRn0mGYIxZiHspMY2wQ
SaqSkMre59S/ZstYjtYhjwyQPscxq3mejh9Cj7R37U0nhziY54EfwytvlFrTyDZ5
gg9g+/pzEdgfjJ/sVHYMo8lHhglgzFa9hTD41qeu7AeuRmJq4GAlMhnN2bmbuoDs
mgBQam4+m74UyF1yk1L9ks8Ucepkgb/rdLr7u90nCg8PfhtQjyK46BnaCXwmktCz
0d7u6QZXNZ8=
=REdF
-----END PGP SIGNATURE-----
Merge tag 'erofs-for-7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs
Pull erofs updates from Gao Xiang:
"In this cycle, inode page cache sharing among filesystems on the same
machine is now supported, which is particularly useful for
high-density hosts running tens of thousands of containers.
In addition, we fully isolate the EROFS core on-disk format from other
optional encoded layouts since the core on-disk part is designed to be
simple, effective, and secure. Users can use the core format to build
unique golden immutable images and import their filesystem trees
directly from raw block devices via DMA, page-mapped DAX devices,
and/or file-backed mounts without having to worry about unnecessary
intrinsic consistency issues found in other generic filesystems by
design. However, the full vision is still working in progress and will
spend more time to achieve final goals.
There are other improvements and bug fixes as usual, as listed below:
- Support inode page cache sharing among filesystems
- Formally separate optional encoded (aka compressed) inode layouts
(and the implementations) from the EROFS core on-disk aligned plain
format for future zero-trust security usage
- Improve performance by caching the fact that an inode does not have
a POSIX ACL
- Improve LZ4 decompression error reporting
- Enable LZMA by default and promote DEFLATE and Zstandard algorithms
out of EXPERIMENTAL status
- Switch to inode_set_cached_link() to cache symlink lengths
- random bugfixes and minor cleanups"
* tag 'erofs-for-7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs: (31 commits)
erofs: fix UAF issue for file-backed mounts w/ directio option
erofs: update compression algorithm status
erofs: fix inline data read failure for ztailpacking pclusters
erofs: avoid some unnecessary #ifdefs
erofs: handle end of filesystem properly for file-backed mounts
erofs: separate plain and compressed filesystems formally
erofs: use inode_set_cached_link()
erofs: mark inodes without acls in erofs_read_inode()
erofs: implement .fadvise for page cache share
erofs: support compressed inodes for page cache share
erofs: support unencoded inodes for page cache share
erofs: pass inode to trace_erofs_read_folio
erofs: introduce the page cache share feature
erofs: using domain_id in the safer way
erofs: add erofs_inode_set_aops helper to set the aops
erofs: support user-defined fingerprint name
erofs: decouple `struct erofs_anon_fs_type`
fs: Export alloc_empty_backing_file
erofs: tidy up erofs_init_inode_xattrs()
erofs: add missing documentation about `directio` mount option
...
This commit is contained in:
commit
3893854000
21 changed files with 803 additions and 412 deletions
|
|
@ -3,19 +3,23 @@ Date: November 2021
|
|||
Contact: "Huang Jianan" <huangjianan@oppo.com>
|
||||
Description: Shows all enabled kernel features.
|
||||
Supported features:
|
||||
zero_padding, compr_cfgs, big_pcluster, chunked_file,
|
||||
device_table, compr_head2, sb_chksum, ztailpacking,
|
||||
dedupe, fragments, 48bit, metabox.
|
||||
compr_cfgs, big_pcluster, chunked_file, device_table,
|
||||
compr_head2, sb_chksum, ztailpacking, dedupe, fragments,
|
||||
48bit, metabox.
|
||||
|
||||
What: /sys/fs/erofs/<disk>/sync_decompress
|
||||
Date: November 2021
|
||||
Contact: "Huang Jianan" <huangjianan@oppo.com>
|
||||
Description: Control strategy of sync decompression:
|
||||
Description: Control strategy of synchronous decompression. Synchronous
|
||||
decompression tries to decompress in the reader thread for
|
||||
synchronous reads and small asynchronous reads (<= 12 KiB):
|
||||
|
||||
- 0 (default, auto): enable for readpage, and enable for
|
||||
readahead on atomic contexts only.
|
||||
- 1 (force on): enable for readpage and readahead.
|
||||
- 2 (force off): disable for all situations.
|
||||
- 0 (auto, default): apply to synchronous reads only, but will
|
||||
switch to 1 (force on) if any decompression
|
||||
request is detected in atomic contexts;
|
||||
- 1 (force on): apply to synchronous reads and small
|
||||
asynchronous reads;
|
||||
- 2 (force off): disable synchronous decompression completely.
|
||||
|
||||
What: /sys/fs/erofs/<disk>/drop_caches
|
||||
Date: November 2024
|
||||
|
|
|
|||
|
|
@ -63,9 +63,9 @@ Here are the main features of EROFS:
|
|||
- Support POSIX.1e ACLs by using extended attributes;
|
||||
|
||||
- Support transparent data compression as an option:
|
||||
LZ4, MicroLZMA and DEFLATE algorithms can be used on a per-file basis; In
|
||||
addition, inplace decompression is also supported to avoid bounce compressed
|
||||
buffers and unnecessary page cache thrashing.
|
||||
LZ4, MicroLZMA, DEFLATE and Zstandard algorithms can be used on a per-file
|
||||
basis; In addition, inplace decompression is also supported to avoid bounce
|
||||
compressed buffers and unnecessary page cache thrashing.
|
||||
|
||||
- Support chunk-based data deduplication and rolling-hash compressed data
|
||||
deduplication;
|
||||
|
|
@ -125,10 +125,18 @@ dax={always,never} Use direct access (no page cache). See
|
|||
Documentation/filesystems/dax.rst.
|
||||
dax A legacy option which is an alias for ``dax=always``.
|
||||
device=%s Specify a path to an extra device to be used together.
|
||||
directio (For file-backed mounts) Use direct I/O to access backing
|
||||
files, and asynchronous I/O will be enabled if supported.
|
||||
fsid=%s Specify a filesystem image ID for Fscache back-end.
|
||||
domain_id=%s Specify a domain ID in fscache mode so that different images
|
||||
with the same blobs under a given domain ID can share storage.
|
||||
domain_id=%s Specify a trusted domain ID for fscache mode so that
|
||||
different images with the same blobs, identified by blob IDs,
|
||||
can share storage within the same trusted domain.
|
||||
Also used for different filesystems with inode page sharing
|
||||
enabled to share page cache within the trusted domain.
|
||||
fsoffset=%llu Specify block-aligned filesystem offset for the primary device.
|
||||
inode_share Enable inode page sharing for this filesystem. Inodes with
|
||||
identical content within the same domain ID can share the
|
||||
page cache.
|
||||
=================== =========================================================
|
||||
|
||||
Sysfs Entries
|
||||
|
|
|
|||
|
|
@ -112,13 +112,14 @@ config EROFS_FS_ZIP
|
|||
config EROFS_FS_ZIP_LZMA
|
||||
bool "EROFS LZMA compressed data support"
|
||||
depends on EROFS_FS_ZIP
|
||||
default y
|
||||
help
|
||||
Saying Y here includes support for reading EROFS file systems
|
||||
containing LZMA compressed data, specifically called microLZMA. It
|
||||
gives better compression ratios than the default LZ4 format, at the
|
||||
expense of more CPU overhead.
|
||||
|
||||
If unsure, say N.
|
||||
Say N if you want to disable LZMA compression support.
|
||||
|
||||
config EROFS_FS_ZIP_DEFLATE
|
||||
bool "EROFS DEFLATE compressed data support"
|
||||
|
|
@ -129,9 +130,6 @@ config EROFS_FS_ZIP_DEFLATE
|
|||
ratios than the default LZ4 format, while it costs more CPU
|
||||
overhead.
|
||||
|
||||
DEFLATE support is an experimental feature for now and so most
|
||||
file systems will be readable without selecting this option.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config EROFS_FS_ZIP_ZSTD
|
||||
|
|
@ -141,10 +139,7 @@ config EROFS_FS_ZIP_ZSTD
|
|||
Saying Y here includes support for reading EROFS file systems
|
||||
containing Zstandard compressed data. It gives better compression
|
||||
ratios than the default LZ4 format, while it costs more CPU
|
||||
overhead.
|
||||
|
||||
Zstandard support is an experimental feature for now and so most
|
||||
file systems will be readable without selecting this option.
|
||||
overhead and memory footprint.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
|
|
@ -194,3 +189,12 @@ config EROFS_FS_PCPU_KTHREAD_HIPRI
|
|||
at higher priority.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config EROFS_FS_PAGE_CACHE_SHARE
|
||||
bool "EROFS page cache share support (experimental)"
|
||||
depends on EROFS_FS && EROFS_FS_XATTR && !EROFS_FS_ONDEMAND
|
||||
help
|
||||
This enables page cache sharing among inodes with identical
|
||||
content fingerprints on the same machine.
|
||||
|
||||
If unsure, say N.
|
||||
|
|
|
|||
|
|
@ -10,3 +10,4 @@ erofs-$(CONFIG_EROFS_FS_ZIP_ZSTD) += decompressor_zstd.o
|
|||
erofs-$(CONFIG_EROFS_FS_ZIP_ACCEL) += decompressor_crypto.o
|
||||
erofs-$(CONFIG_EROFS_FS_BACKED_BY_FILE) += fileio.o
|
||||
erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
|
||||
erofs-$(CONFIG_EROFS_FS_PAGE_CACHE_SHARE) += ishare.o
|
||||
|
|
|
|||
|
|
@ -270,6 +270,7 @@ void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty)
|
|||
struct erofs_iomap_iter_ctx {
|
||||
struct page *page;
|
||||
void *base;
|
||||
struct inode *realinode;
|
||||
};
|
||||
|
||||
static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||
|
|
@ -277,14 +278,15 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
|||
{
|
||||
struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap);
|
||||
struct erofs_iomap_iter_ctx *ctx = iter->private;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct inode *realinode = ctx ? ctx->realinode : inode;
|
||||
struct super_block *sb = realinode->i_sb;
|
||||
struct erofs_map_blocks map;
|
||||
struct erofs_map_dev mdev;
|
||||
int ret;
|
||||
|
||||
map.m_la = offset;
|
||||
map.m_llen = length;
|
||||
ret = erofs_map_blocks(inode, &map);
|
||||
ret = erofs_map_blocks(realinode, &map);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
|
|
@ -297,7 +299,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (!(map.m_flags & EROFS_MAP_META) || !erofs_inode_in_metabox(inode)) {
|
||||
if (!(map.m_flags & EROFS_MAP_META) || !erofs_inode_in_metabox(realinode)) {
|
||||
mdev = (struct erofs_map_dev) {
|
||||
.m_deviceid = map.m_deviceid,
|
||||
.m_pa = map.m_pa,
|
||||
|
|
@ -323,7 +325,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
|||
void *ptr;
|
||||
|
||||
ptr = erofs_read_metabuf(&buf, sb, map.m_pa,
|
||||
erofs_inode_in_metabox(inode));
|
||||
erofs_inode_in_metabox(realinode));
|
||||
if (IS_ERR(ptr))
|
||||
return PTR_ERR(ptr);
|
||||
iomap->inline_data = ptr;
|
||||
|
|
@ -364,12 +366,10 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
|||
u64 start, u64 len)
|
||||
{
|
||||
if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) {
|
||||
#ifdef CONFIG_EROFS_FS_ZIP
|
||||
if (!IS_ENABLED(CONFIG_EROFS_FS_ZIP))
|
||||
return -EOPNOTSUPP;
|
||||
return iomap_fiemap(inode, fieinfo, start, len,
|
||||
&z_erofs_iomap_report_ops);
|
||||
#else
|
||||
return -EOPNOTSUPP;
|
||||
#endif
|
||||
}
|
||||
return iomap_fiemap(inode, fieinfo, start, len, &erofs_iomap_ops);
|
||||
}
|
||||
|
|
@ -384,11 +384,15 @@ static int erofs_read_folio(struct file *file, struct folio *folio)
|
|||
.ops = &iomap_bio_read_ops,
|
||||
.cur_folio = folio,
|
||||
};
|
||||
struct erofs_iomap_iter_ctx iter_ctx = {};
|
||||
|
||||
trace_erofs_read_folio(folio, true);
|
||||
bool need_iput;
|
||||
struct erofs_iomap_iter_ctx iter_ctx = {
|
||||
.realinode = erofs_real_inode(folio_inode(folio), &need_iput),
|
||||
};
|
||||
|
||||
trace_erofs_read_folio(iter_ctx.realinode, folio, true);
|
||||
iomap_read_folio(&erofs_iomap_ops, &read_ctx, &iter_ctx);
|
||||
if (need_iput)
|
||||
iput(iter_ctx.realinode);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -398,12 +402,16 @@ static void erofs_readahead(struct readahead_control *rac)
|
|||
.ops = &iomap_bio_read_ops,
|
||||
.rac = rac,
|
||||
};
|
||||
struct erofs_iomap_iter_ctx iter_ctx = {};
|
||||
|
||||
trace_erofs_readahead(rac->mapping->host, readahead_index(rac),
|
||||
readahead_count(rac), true);
|
||||
bool need_iput;
|
||||
struct erofs_iomap_iter_ctx iter_ctx = {
|
||||
.realinode = erofs_real_inode(rac->mapping->host, &need_iput),
|
||||
};
|
||||
|
||||
trace_erofs_readahead(iter_ctx.realinode, readahead_index(rac),
|
||||
readahead_count(rac), true);
|
||||
iomap_readahead(&erofs_iomap_ops, &read_ctx, &iter_ctx);
|
||||
if (need_iput)
|
||||
iput(iter_ctx.realinode);
|
||||
}
|
||||
|
||||
static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
|
||||
|
|
@ -419,12 +427,13 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
|||
if (!iov_iter_count(to))
|
||||
return 0;
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
if (IS_DAX(inode))
|
||||
if (IS_ENABLED(CONFIG_FS_DAX) && IS_DAX(inode))
|
||||
return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
|
||||
#endif
|
||||
|
||||
if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev) {
|
||||
struct erofs_iomap_iter_ctx iter_ctx = {};
|
||||
struct erofs_iomap_iter_ctx iter_ctx = {
|
||||
.realinode = inode,
|
||||
};
|
||||
|
||||
return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
|
||||
NULL, 0, &iter_ctx, 0);
|
||||
|
|
@ -480,12 +489,11 @@ static loff_t erofs_file_llseek(struct file *file, loff_t offset, int whence)
|
|||
struct inode *inode = file->f_mapping->host;
|
||||
const struct iomap_ops *ops = &erofs_iomap_ops;
|
||||
|
||||
if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout))
|
||||
#ifdef CONFIG_EROFS_FS_ZIP
|
||||
if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) {
|
||||
if (!IS_ENABLED(CONFIG_EROFS_FS_ZIP))
|
||||
return generic_file_llseek(file, offset, whence);
|
||||
ops = &z_erofs_iomap_report_ops;
|
||||
#else
|
||||
return generic_file_llseek(file, offset, whence);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (whence == SEEK_HOLE)
|
||||
offset = iomap_seek_hole(inode, offset, ops);
|
||||
|
|
|
|||
|
|
@ -34,7 +34,10 @@ static int z_erofs_load_lz4_config(struct super_block *sb,
|
|||
}
|
||||
} else {
|
||||
distance = le16_to_cpu(dsb->u1.lz4_max_distance);
|
||||
if (!distance && !erofs_sb_has_lz4_0padding(sbi))
|
||||
return 0;
|
||||
sbi->lz4.max_pclusterblks = 1;
|
||||
sbi->available_compr_algs = 1 << Z_EROFS_COMPRESSION_LZ4;
|
||||
}
|
||||
|
||||
sbi->lz4.max_distance_pages = distance ?
|
||||
|
|
@ -195,55 +198,47 @@ const char *z_erofs_fixup_insize(struct z_erofs_decompress_req *rq,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq, u8 *dst)
|
||||
static const char *__z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
|
||||
u8 *dst)
|
||||
{
|
||||
bool support_0padding = false, may_inplace = false;
|
||||
bool may_inplace = false;
|
||||
unsigned int inputmargin;
|
||||
u8 *out, *headpage, *src;
|
||||
const char *reason;
|
||||
int ret, maptype;
|
||||
|
||||
DBG_BUGON(*rq->in == NULL);
|
||||
headpage = kmap_local_page(*rq->in);
|
||||
|
||||
/* LZ4 decompression inplace is only safe if zero_padding is enabled */
|
||||
if (erofs_sb_has_zero_padding(EROFS_SB(rq->sb))) {
|
||||
support_0padding = true;
|
||||
reason = z_erofs_fixup_insize(rq, headpage + rq->pageofs_in,
|
||||
min_t(unsigned int, rq->inputsize,
|
||||
rq->sb->s_blocksize - rq->pageofs_in));
|
||||
if (reason) {
|
||||
kunmap_local(headpage);
|
||||
return IS_ERR(reason) ? PTR_ERR(reason) : -EFSCORRUPTED;
|
||||
}
|
||||
may_inplace = !((rq->pageofs_in + rq->inputsize) &
|
||||
(rq->sb->s_blocksize - 1));
|
||||
reason = z_erofs_fixup_insize(rq, headpage + rq->pageofs_in,
|
||||
min_t(unsigned int, rq->inputsize,
|
||||
rq->sb->s_blocksize - rq->pageofs_in));
|
||||
if (reason) {
|
||||
kunmap_local(headpage);
|
||||
return reason;
|
||||
}
|
||||
may_inplace = !((rq->pageofs_in + rq->inputsize) &
|
||||
(rq->sb->s_blocksize - 1));
|
||||
|
||||
inputmargin = rq->pageofs_in;
|
||||
src = z_erofs_lz4_handle_overlap(rq, headpage, dst, &inputmargin,
|
||||
&maptype, may_inplace);
|
||||
if (IS_ERR(src))
|
||||
return PTR_ERR(src);
|
||||
return ERR_CAST(src);
|
||||
|
||||
out = dst + rq->pageofs_out;
|
||||
/* legacy format could compress extra data in a pcluster. */
|
||||
if (rq->partial_decoding || !support_0padding)
|
||||
if (rq->partial_decoding)
|
||||
ret = LZ4_decompress_safe_partial(src + inputmargin, out,
|
||||
rq->inputsize, rq->outputsize, rq->outputsize);
|
||||
else
|
||||
ret = LZ4_decompress_safe(src + inputmargin, out,
|
||||
rq->inputsize, rq->outputsize);
|
||||
if (ret == rq->outputsize)
|
||||
reason = NULL;
|
||||
else if (ret < 0)
|
||||
reason = "corrupted compressed data";
|
||||
else
|
||||
reason = "unexpected end of stream";
|
||||
|
||||
if (ret != rq->outputsize) {
|
||||
if (ret >= 0)
|
||||
memset(out + ret, 0, rq->outputsize - ret);
|
||||
ret = -EFSCORRUPTED;
|
||||
} else {
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (maptype == 0) {
|
||||
if (!maptype) {
|
||||
kunmap_local(headpage);
|
||||
} else if (maptype == 1) {
|
||||
vm_unmap_ram(src, rq->inpages);
|
||||
|
|
@ -251,15 +246,16 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq, u8 *dst
|
|||
z_erofs_put_gbuf(src);
|
||||
} else if (maptype != 3) {
|
||||
DBG_BUGON(1);
|
||||
return -EFAULT;
|
||||
return ERR_PTR(-EFAULT);
|
||||
}
|
||||
return ret;
|
||||
return reason;
|
||||
}
|
||||
|
||||
static const char *z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
|
||||
struct page **pagepool)
|
||||
{
|
||||
unsigned int dst_maptype;
|
||||
const char *reason;
|
||||
void *dst;
|
||||
int ret;
|
||||
|
||||
|
|
@ -283,12 +279,12 @@ static const char *z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
|
|||
dst_maptype = 2;
|
||||
}
|
||||
}
|
||||
ret = z_erofs_lz4_decompress_mem(rq, dst);
|
||||
reason = __z_erofs_lz4_decompress(rq, dst);
|
||||
if (!dst_maptype)
|
||||
kunmap_local(dst);
|
||||
else if (dst_maptype == 2)
|
||||
vm_unmap_ram(dst, rq->outpages);
|
||||
return ERR_PTR(ret);
|
||||
return reason;
|
||||
}
|
||||
|
||||
static const char *z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
|
||||
|
|
@ -452,42 +448,37 @@ int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb)
|
|||
{
|
||||
struct erofs_sb_info *sbi = EROFS_SB(sb);
|
||||
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
|
||||
unsigned int algs, alg;
|
||||
unsigned long algs, alg;
|
||||
erofs_off_t offset;
|
||||
int size, ret = 0;
|
||||
|
||||
if (!erofs_sb_has_compr_cfgs(sbi)) {
|
||||
sbi->available_compr_algs = 1 << Z_EROFS_COMPRESSION_LZ4;
|
||||
if (!erofs_sb_has_compr_cfgs(sbi))
|
||||
return z_erofs_load_lz4_config(sb, dsb, NULL, 0);
|
||||
}
|
||||
|
||||
sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs);
|
||||
if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) {
|
||||
erofs_err(sb, "unidentified algorithms %x, please upgrade kernel",
|
||||
sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS);
|
||||
algs = le16_to_cpu(dsb->u1.available_compr_algs);
|
||||
sbi->available_compr_algs = algs;
|
||||
if (algs & ~Z_EROFS_ALL_COMPR_ALGS) {
|
||||
erofs_err(sb, "unidentified algorithms %lx, please upgrade kernel",
|
||||
algs & ~Z_EROFS_ALL_COMPR_ALGS);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
(void)erofs_init_metabuf(&buf, sb, false);
|
||||
offset = EROFS_SUPER_OFFSET + sbi->sb_size;
|
||||
alg = 0;
|
||||
for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) {
|
||||
for_each_set_bit(alg, &algs, Z_EROFS_COMPRESSION_MAX) {
|
||||
const struct z_erofs_decompressor *dec = z_erofs_decomp[alg];
|
||||
void *data;
|
||||
|
||||
if (!(algs & 1))
|
||||
continue;
|
||||
|
||||
data = erofs_read_metadata(sb, &buf, &offset, &size);
|
||||
if (IS_ERR(data)) {
|
||||
ret = PTR_ERR(data);
|
||||
break;
|
||||
}
|
||||
|
||||
if (alg < Z_EROFS_COMPRESSION_MAX && dec && dec->config) {
|
||||
if (dec && dec->config) {
|
||||
ret = dec->config(sb, dsb, data, size);
|
||||
} else {
|
||||
erofs_err(sb, "algorithm %d isn't enabled on this kernel",
|
||||
erofs_err(sb, "algorithm %ld isn't enabled on this kernel",
|
||||
alg);
|
||||
ret = -EOPNOTSUPP;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ struct z_erofs_crypto_engine {
|
|||
struct crypto_acomp *tfm;
|
||||
};
|
||||
|
||||
struct z_erofs_crypto_engine *z_erofs_crypto[Z_EROFS_COMPRESSION_MAX] = {
|
||||
static struct z_erofs_crypto_engine *z_erofs_crypto[Z_EROFS_COMPRESSION_MAX] = {
|
||||
[Z_EROFS_COMPRESSION_LZ4] = (struct z_erofs_crypto_engine[]) {
|
||||
{},
|
||||
},
|
||||
|
|
|
|||
|
|
@ -89,7 +89,6 @@ static int z_erofs_load_deflate_config(struct super_block *sb,
|
|||
inited = true;
|
||||
}
|
||||
mutex_unlock(&deflate_resize_mutex);
|
||||
erofs_info(sb, "EXPERIMENTAL DEFLATE feature in use. Use at your own risk!");
|
||||
return 0;
|
||||
failed:
|
||||
mutex_unlock(&deflate_resize_mutex);
|
||||
|
|
|
|||
|
|
@ -17,13 +17,13 @@
|
|||
#define EROFS_FEATURE_COMPAT_XATTR_FILTER 0x00000004
|
||||
#define EROFS_FEATURE_COMPAT_SHARED_EA_IN_METABOX 0x00000008
|
||||
#define EROFS_FEATURE_COMPAT_PLAIN_XATTR_PFX 0x00000010
|
||||
|
||||
#define EROFS_FEATURE_COMPAT_ISHARE_XATTRS 0x00000020
|
||||
|
||||
/*
|
||||
* Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should
|
||||
* be incompatible with this kernel version.
|
||||
*/
|
||||
#define EROFS_FEATURE_INCOMPAT_ZERO_PADDING 0x00000001
|
||||
#define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING 0x00000001
|
||||
#define EROFS_FEATURE_INCOMPAT_COMPR_CFGS 0x00000002
|
||||
#define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER 0x00000002
|
||||
#define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004
|
||||
|
|
@ -83,7 +83,8 @@ struct erofs_super_block {
|
|||
__le32 xattr_prefix_start; /* start of long xattr prefixes */
|
||||
__le64 packed_nid; /* nid of the special packed inode */
|
||||
__u8 xattr_filter_reserved; /* reserved for xattr name filter */
|
||||
__u8 reserved[3];
|
||||
__u8 ishare_xattr_prefix_id;
|
||||
__u8 reserved[2];
|
||||
__le32 build_time; /* seconds added to epoch for mkfs time */
|
||||
__le64 rootnid_8b; /* (48BIT on) nid of root directory */
|
||||
__le64 reserved2;
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ struct erofs_fileio_rq {
|
|||
struct bio bio;
|
||||
struct kiocb iocb;
|
||||
struct super_block *sb;
|
||||
refcount_t ref;
|
||||
};
|
||||
|
||||
struct erofs_fileio {
|
||||
|
|
@ -24,31 +25,28 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret)
|
|||
container_of(iocb, struct erofs_fileio_rq, iocb);
|
||||
struct folio_iter fi;
|
||||
|
||||
if (ret > 0) {
|
||||
if (ret != rq->bio.bi_iter.bi_size) {
|
||||
bio_advance(&rq->bio, ret);
|
||||
zero_fill_bio(&rq->bio);
|
||||
}
|
||||
ret = 0;
|
||||
if (ret >= 0 && ret != rq->bio.bi_iter.bi_size) {
|
||||
bio_advance(&rq->bio, ret);
|
||||
zero_fill_bio(&rq->bio);
|
||||
}
|
||||
if (rq->bio.bi_end_io) {
|
||||
if (ret < 0 && !rq->bio.bi_status)
|
||||
rq->bio.bi_status = errno_to_blk_status(ret);
|
||||
} else {
|
||||
if (!rq->bio.bi_end_io) {
|
||||
bio_for_each_folio_all(fi, &rq->bio) {
|
||||
DBG_BUGON(folio_test_uptodate(fi.folio));
|
||||
erofs_onlinefolio_end(fi.folio, ret, false);
|
||||
erofs_onlinefolio_end(fi.folio, ret < 0, false);
|
||||
}
|
||||
} else if (ret < 0 && !rq->bio.bi_status) {
|
||||
rq->bio.bi_status = errno_to_blk_status(ret);
|
||||
}
|
||||
bio_endio(&rq->bio);
|
||||
bio_uninit(&rq->bio);
|
||||
kfree(rq);
|
||||
if (refcount_dec_and_test(&rq->ref))
|
||||
kfree(rq);
|
||||
}
|
||||
|
||||
static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
|
||||
{
|
||||
struct iov_iter iter;
|
||||
int ret;
|
||||
ssize_t ret;
|
||||
|
||||
if (!rq)
|
||||
return;
|
||||
|
|
@ -64,6 +62,8 @@ static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
|
|||
ret = vfs_iocb_iter_read(rq->iocb.ki_filp, &rq->iocb, &iter);
|
||||
if (ret != -EIOCBQUEUED)
|
||||
erofs_fileio_ki_complete(&rq->iocb, ret);
|
||||
if (refcount_dec_and_test(&rq->ref))
|
||||
kfree(rq);
|
||||
}
|
||||
|
||||
static struct erofs_fileio_rq *erofs_fileio_rq_alloc(struct erofs_map_dev *mdev)
|
||||
|
|
@ -74,6 +74,7 @@ static struct erofs_fileio_rq *erofs_fileio_rq_alloc(struct erofs_map_dev *mdev)
|
|||
bio_init(&rq->bio, NULL, rq->bvecs, ARRAY_SIZE(rq->bvecs), REQ_OP_READ);
|
||||
rq->iocb.ki_filp = mdev->m_dif->file;
|
||||
rq->sb = mdev->m_sb;
|
||||
refcount_set(&rq->ref, 2);
|
||||
return rq;
|
||||
}
|
||||
|
||||
|
|
@ -88,9 +89,9 @@ void erofs_fileio_submit_bio(struct bio *bio)
|
|||
bio));
|
||||
}
|
||||
|
||||
static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio)
|
||||
static int erofs_fileio_scan_folio(struct erofs_fileio *io,
|
||||
struct inode *inode, struct folio *folio)
|
||||
{
|
||||
struct inode *inode = folio_inode(folio);
|
||||
struct erofs_map_blocks *map = &io->map;
|
||||
unsigned int cur = 0, end = folio_size(folio), len, attached = 0;
|
||||
loff_t pos = folio_pos(folio), ofs;
|
||||
|
|
@ -158,31 +159,38 @@ io_retry:
|
|||
|
||||
static int erofs_fileio_read_folio(struct file *file, struct folio *folio)
|
||||
{
|
||||
bool need_iput;
|
||||
struct inode *realinode = erofs_real_inode(folio_inode(folio), &need_iput);
|
||||
struct erofs_fileio io = {};
|
||||
int err;
|
||||
|
||||
trace_erofs_read_folio(folio, true);
|
||||
err = erofs_fileio_scan_folio(&io, folio);
|
||||
trace_erofs_read_folio(realinode, folio, true);
|
||||
err = erofs_fileio_scan_folio(&io, realinode, folio);
|
||||
erofs_fileio_rq_submit(io.rq);
|
||||
if (need_iput)
|
||||
iput(realinode);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void erofs_fileio_readahead(struct readahead_control *rac)
|
||||
{
|
||||
struct inode *inode = rac->mapping->host;
|
||||
bool need_iput;
|
||||
struct inode *realinode = erofs_real_inode(rac->mapping->host, &need_iput);
|
||||
struct erofs_fileio io = {};
|
||||
struct folio *folio;
|
||||
int err;
|
||||
|
||||
trace_erofs_readahead(inode, readahead_index(rac),
|
||||
trace_erofs_readahead(realinode, readahead_index(rac),
|
||||
readahead_count(rac), true);
|
||||
while ((folio = readahead_folio(rac))) {
|
||||
err = erofs_fileio_scan_folio(&io, folio);
|
||||
err = erofs_fileio_scan_folio(&io, realinode, folio);
|
||||
if (err && err != -EINTR)
|
||||
erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu",
|
||||
folio->index, EROFS_I(inode)->nid);
|
||||
erofs_err(realinode->i_sb, "readahead error at folio %lu @ nid %llu",
|
||||
folio->index, EROFS_I(realinode)->nid);
|
||||
}
|
||||
erofs_fileio_rq_submit(io.rq);
|
||||
if (need_iput)
|
||||
iput(realinode);
|
||||
}
|
||||
|
||||
const struct address_space_operations erofs_fileio_aops = {
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
* Copyright (C) 2022, Alibaba Cloud
|
||||
* Copyright (C) 2022, Bytedance Inc. All rights reserved.
|
||||
*/
|
||||
#include <linux/pseudo_fs.h>
|
||||
#include <linux/fscache.h>
|
||||
#include "internal.h"
|
||||
|
||||
|
|
@ -13,18 +12,6 @@ static LIST_HEAD(erofs_domain_list);
|
|||
static LIST_HEAD(erofs_domain_cookies_list);
|
||||
static struct vfsmount *erofs_pseudo_mnt;
|
||||
|
||||
static int erofs_anon_init_fs_context(struct fs_context *fc)
|
||||
{
|
||||
return init_pseudo(fc, EROFS_SUPER_MAGIC) ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
static struct file_system_type erofs_anon_fs_type = {
|
||||
.owner = THIS_MODULE,
|
||||
.name = "pseudo_erofs",
|
||||
.init_fs_context = erofs_anon_init_fs_context,
|
||||
.kill_sb = kill_anon_super,
|
||||
};
|
||||
|
||||
struct erofs_fscache_io {
|
||||
struct netfs_cache_resources cres;
|
||||
struct iov_iter iter;
|
||||
|
|
@ -392,7 +379,7 @@ static void erofs_fscache_domain_put(struct erofs_domain *domain)
|
|||
}
|
||||
fscache_relinquish_volume(domain->volume, NULL, false);
|
||||
mutex_unlock(&erofs_domain_list_lock);
|
||||
kfree(domain->domain_id);
|
||||
kfree_sensitive(domain->domain_id);
|
||||
kfree(domain);
|
||||
return;
|
||||
}
|
||||
|
|
@ -459,7 +446,7 @@ static int erofs_fscache_init_domain(struct super_block *sb)
|
|||
sbi->domain = domain;
|
||||
return 0;
|
||||
out:
|
||||
kfree(domain->domain_id);
|
||||
kfree_sensitive(domain->domain_id);
|
||||
kfree(domain);
|
||||
return err;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,21 +8,29 @@
|
|||
#include <linux/compat.h>
|
||||
#include <trace/events/erofs.h>
|
||||
|
||||
static int erofs_fill_symlink(struct inode *inode, void *kaddr,
|
||||
unsigned int m_pofs)
|
||||
static int erofs_fill_symlink(struct inode *inode, void *bptr, unsigned int ofs)
|
||||
{
|
||||
struct erofs_inode *vi = EROFS_I(inode);
|
||||
loff_t off;
|
||||
char *link;
|
||||
loff_t end;
|
||||
|
||||
m_pofs += vi->xattr_isize;
|
||||
/* check if it cannot be handled with fast symlink scheme */
|
||||
if (vi->datalayout != EROFS_INODE_FLAT_INLINE ||
|
||||
check_add_overflow(m_pofs, inode->i_size, &off) ||
|
||||
off > i_blocksize(inode))
|
||||
return 0;
|
||||
|
||||
inode->i_link = kmemdup_nul(kaddr + m_pofs, inode->i_size, GFP_KERNEL);
|
||||
return inode->i_link ? 0 : -ENOMEM;
|
||||
ofs += vi->xattr_isize;
|
||||
/* check whether the symlink data is small enough to be inlined */
|
||||
if (vi->datalayout == EROFS_INODE_FLAT_INLINE &&
|
||||
!check_add_overflow(ofs, inode->i_size, &end) &&
|
||||
end <= i_blocksize(inode)) {
|
||||
link = kmemdup_nul(bptr + ofs, inode->i_size, GFP_KERNEL);
|
||||
if (!link)
|
||||
return -ENOMEM;
|
||||
if (unlikely(!inode->i_size || strlen(link) != inode->i_size)) {
|
||||
erofs_err(inode->i_sb, "invalid fast symlink size %llu @ nid %llu",
|
||||
inode->i_size | 0ULL, vi->nid);
|
||||
kfree(link);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
inode_set_cached_link(inode, link, inode->i_size);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int erofs_read_inode(struct inode *inode)
|
||||
|
|
@ -137,6 +145,11 @@ static int erofs_read_inode(struct inode *inode)
|
|||
err = -EFSCORRUPTED;
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_EROFS_FS_POSIX_ACL) &&
|
||||
erofs_inode_has_noacl(inode, ptr, ofs))
|
||||
cache_no_acl(inode);
|
||||
|
||||
switch (inode->i_mode & S_IFMT) {
|
||||
case S_IFDIR:
|
||||
vi->dot_omitted = (ifmt >> EROFS_I_DOT_OMITTED_BIT) & 1;
|
||||
|
|
@ -170,11 +183,17 @@ static int erofs_read_inode(struct inode *inode)
|
|||
goto err_out;
|
||||
}
|
||||
|
||||
if (erofs_inode_is_data_compressed(vi->datalayout))
|
||||
inode->i_blocks = le32_to_cpu(copied.i_u.blocks_lo) <<
|
||||
(sb->s_blocksize_bits - 9);
|
||||
else
|
||||
if (!erofs_inode_is_data_compressed(vi->datalayout)) {
|
||||
inode->i_blocks = round_up(inode->i_size, sb->s_blocksize) >> 9;
|
||||
} else if (!IS_ENABLED(CONFIG_EROFS_FS_ZIP) || !sbi->available_compr_algs) {
|
||||
erofs_err(sb, "compressed inode (nid %llu) is invalid in a plain filesystem",
|
||||
vi->nid);
|
||||
err = -EFSCORRUPTED;
|
||||
goto err_out;
|
||||
} else {
|
||||
inode->i_blocks = le32_to_cpu(copied.i_u.blocks_lo) <<
|
||||
(sb->s_blocksize_bits - 9);
|
||||
}
|
||||
|
||||
if (vi->datalayout == EROFS_INODE_CHUNK_BASED) {
|
||||
/* fill chunked inode summary info */
|
||||
|
|
@ -203,7 +222,6 @@ err_out:
|
|||
|
||||
static int erofs_fill_inode(struct inode *inode)
|
||||
{
|
||||
struct erofs_inode *vi = EROFS_I(inode);
|
||||
int err;
|
||||
|
||||
trace_erofs_fill_inode(inode);
|
||||
|
|
@ -214,7 +232,8 @@ static int erofs_fill_inode(struct inode *inode)
|
|||
switch (inode->i_mode & S_IFMT) {
|
||||
case S_IFREG:
|
||||
inode->i_op = &erofs_generic_iops;
|
||||
inode->i_fop = &erofs_file_fops;
|
||||
inode->i_fop = erofs_ishare_fill_inode(inode) ?
|
||||
&erofs_ishare_fops : &erofs_file_fops;
|
||||
break;
|
||||
case S_IFDIR:
|
||||
inode->i_op = &erofs_dir_iops;
|
||||
|
|
@ -235,28 +254,7 @@ static int erofs_fill_inode(struct inode *inode)
|
|||
}
|
||||
|
||||
mapping_set_large_folios(inode->i_mapping);
|
||||
if (erofs_inode_is_data_compressed(vi->datalayout)) {
|
||||
#ifdef CONFIG_EROFS_FS_ZIP
|
||||
DO_ONCE_LITE_IF(inode->i_blkbits != PAGE_SHIFT,
|
||||
erofs_info, inode->i_sb,
|
||||
"EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!");
|
||||
inode->i_mapping->a_ops = &z_erofs_aops;
|
||||
#else
|
||||
err = -EOPNOTSUPP;
|
||||
#endif
|
||||
} else {
|
||||
inode->i_mapping->a_ops = &erofs_aops;
|
||||
#ifdef CONFIG_EROFS_FS_ONDEMAND
|
||||
if (erofs_is_fscache_mode(inode->i_sb))
|
||||
inode->i_mapping->a_ops = &erofs_fscache_access_aops;
|
||||
#endif
|
||||
#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
|
||||
if (erofs_is_fileio_mode(EROFS_SB(inode->i_sb)))
|
||||
inode->i_mapping->a_ops = &erofs_fileio_aops;
|
||||
#endif
|
||||
}
|
||||
|
||||
return err;
|
||||
return erofs_inode_set_aops(inode, inode, false);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -59,10 +59,6 @@ enum {
|
|||
struct erofs_mount_opts {
|
||||
/* current strategy of how to use managed cache */
|
||||
unsigned char cache_strategy;
|
||||
/* strategy of sync decompression (0 - auto, 1 - force on, 2 - force off) */
|
||||
unsigned int sync_decompress;
|
||||
/* threshold for decompression synchronously */
|
||||
unsigned int max_sync_decompress_pages;
|
||||
unsigned int mount_opt;
|
||||
};
|
||||
|
||||
|
|
@ -116,8 +112,8 @@ struct erofs_sb_info {
|
|||
/* managed XArray arranged in physical block number */
|
||||
struct xarray managed_pslots;
|
||||
|
||||
unsigned int sync_decompress; /* strategy for sync decompression */
|
||||
unsigned int shrinker_run_no;
|
||||
u16 available_compr_algs;
|
||||
|
||||
/* pseudo inode to manage cached pages */
|
||||
struct inode *managed_cache;
|
||||
|
|
@ -134,6 +130,7 @@ struct erofs_sb_info {
|
|||
u32 xattr_blkaddr;
|
||||
u32 xattr_prefix_start;
|
||||
u8 xattr_prefix_count;
|
||||
u8 ishare_xattr_prefix_id;
|
||||
struct erofs_xattr_prefix_item *xattr_prefixes;
|
||||
unsigned int xattr_filter_reserved;
|
||||
#endif
|
||||
|
|
@ -156,6 +153,7 @@ struct erofs_sb_info {
|
|||
char *volume_name;
|
||||
u32 feature_compat;
|
||||
u32 feature_incompat;
|
||||
u16 available_compr_algs;
|
||||
|
||||
/* sysfs support */
|
||||
struct kobject s_kobj; /* /sys/fs/erofs/<devname> */
|
||||
|
|
@ -178,6 +176,7 @@ struct erofs_sb_info {
|
|||
#define EROFS_MOUNT_DAX_ALWAYS 0x00000040
|
||||
#define EROFS_MOUNT_DAX_NEVER 0x00000080
|
||||
#define EROFS_MOUNT_DIRECT_IO 0x00000100
|
||||
#define EROFS_MOUNT_INODE_SHARE 0x00000200
|
||||
|
||||
#define clear_opt(opt, option) ((opt)->mount_opt &= ~EROFS_MOUNT_##option)
|
||||
#define set_opt(opt, option) ((opt)->mount_opt |= EROFS_MOUNT_##option)
|
||||
|
|
@ -188,6 +187,8 @@ static inline bool erofs_is_fileio_mode(struct erofs_sb_info *sbi)
|
|||
return IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && sbi->dif0.file;
|
||||
}
|
||||
|
||||
extern struct file_system_type erofs_anon_fs_type;
|
||||
|
||||
static inline bool erofs_is_fscache_mode(struct super_block *sb)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) &&
|
||||
|
|
@ -220,7 +221,7 @@ static inline bool erofs_sb_has_##name(struct erofs_sb_info *sbi) \
|
|||
return sbi->feature_##compat & EROFS_FEATURE_##feature; \
|
||||
}
|
||||
|
||||
EROFS_FEATURE_FUNCS(zero_padding, incompat, INCOMPAT_ZERO_PADDING)
|
||||
EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_LZ4_0PADDING)
|
||||
EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS)
|
||||
EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER)
|
||||
EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE)
|
||||
|
|
@ -236,6 +237,7 @@ EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
|
|||
EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER)
|
||||
EROFS_FEATURE_FUNCS(shared_ea_in_metabox, compat, COMPAT_SHARED_EA_IN_METABOX)
|
||||
EROFS_FEATURE_FUNCS(plain_xattr_pfx, compat, COMPAT_PLAIN_XATTR_PFX)
|
||||
EROFS_FEATURE_FUNCS(ishare_xattrs, compat, COMPAT_ISHARE_XATTRS)
|
||||
|
||||
static inline u64 erofs_nid_to_ino64(struct erofs_sb_info *sbi, erofs_nid_t nid)
|
||||
{
|
||||
|
|
@ -265,6 +267,11 @@ static inline u64 erofs_nid_to_ino64(struct erofs_sb_info *sbi, erofs_nid_t nid)
|
|||
/* default readahead size of directories */
|
||||
#define EROFS_DIR_RA_BYTES 16384
|
||||
|
||||
struct erofs_inode_fingerprint {
|
||||
u8 *opaque;
|
||||
int size;
|
||||
};
|
||||
|
||||
struct erofs_inode {
|
||||
erofs_nid_t nid;
|
||||
|
||||
|
|
@ -300,6 +307,18 @@ struct erofs_inode {
|
|||
};
|
||||
#endif /* CONFIG_EROFS_FS_ZIP */
|
||||
};
|
||||
#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
|
||||
struct list_head ishare_list;
|
||||
union {
|
||||
/* for each anon shared inode */
|
||||
struct {
|
||||
struct erofs_inode_fingerprint fingerprint;
|
||||
spinlock_t ishare_lock;
|
||||
};
|
||||
/* for each real inode */
|
||||
struct inode *sharedinode;
|
||||
};
|
||||
#endif
|
||||
/* the corresponding vfs inode */
|
||||
struct inode vfs_inode;
|
||||
};
|
||||
|
|
@ -406,6 +425,7 @@ extern const struct inode_operations erofs_dir_iops;
|
|||
|
||||
extern const struct file_operations erofs_file_fops;
|
||||
extern const struct file_operations erofs_dir_fops;
|
||||
extern const struct file_operations erofs_ishare_fops;
|
||||
|
||||
extern const struct iomap_ops z_erofs_iomap_report_ops;
|
||||
|
||||
|
|
@ -451,6 +471,28 @@ static inline void *erofs_vm_map_ram(struct page **pages, unsigned int count)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static inline int erofs_inode_set_aops(struct inode *inode,
|
||||
struct inode *realinode, bool no_fscache)
|
||||
{
|
||||
if (erofs_inode_is_data_compressed(EROFS_I(realinode)->datalayout)) {
|
||||
if (!IS_ENABLED(CONFIG_EROFS_FS_ZIP))
|
||||
return -EOPNOTSUPP;
|
||||
DO_ONCE_LITE_IF(realinode->i_blkbits != PAGE_SHIFT,
|
||||
erofs_info, realinode->i_sb,
|
||||
"EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!");
|
||||
inode->i_mapping->a_ops = &z_erofs_aops;
|
||||
return 0;
|
||||
}
|
||||
inode->i_mapping->a_ops = &erofs_aops;
|
||||
if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && !no_fscache &&
|
||||
erofs_is_fscache_mode(realinode->i_sb))
|
||||
inode->i_mapping->a_ops = &erofs_fscache_access_aops;
|
||||
if (IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) &&
|
||||
erofs_is_fileio_mode(EROFS_SB(realinode->i_sb)))
|
||||
inode->i_mapping->a_ops = &erofs_fileio_aops;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int erofs_register_sysfs(struct super_block *sb);
|
||||
void erofs_unregister_sysfs(struct super_block *sb);
|
||||
int __init erofs_init_sysfs(void);
|
||||
|
|
@ -488,7 +530,6 @@ void z_erofs_put_gbuf(void *ptr);
|
|||
int z_erofs_gbuf_growsize(unsigned int nrpages);
|
||||
int __init z_erofs_gbuf_init(void);
|
||||
void z_erofs_gbuf_exit(void);
|
||||
int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb);
|
||||
#else
|
||||
static inline void erofs_shrinker_register(struct super_block *sb) {}
|
||||
static inline void erofs_shrinker_unregister(struct super_block *sb) {}
|
||||
|
|
@ -498,6 +539,7 @@ static inline int z_erofs_init_subsystem(void) { return 0; }
|
|||
static inline void z_erofs_exit_subsystem(void) {}
|
||||
static inline int z_erofs_init_super(struct super_block *sb) { return 0; }
|
||||
#endif /* !CONFIG_EROFS_FS_ZIP */
|
||||
int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb);
|
||||
|
||||
#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
|
||||
struct bio *erofs_fileio_bio_alloc(struct erofs_map_dev *mdev);
|
||||
|
|
@ -537,6 +579,24 @@ static inline struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) {
|
|||
static inline void erofs_fscache_submit_bio(struct bio *bio) {}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
|
||||
int __init erofs_init_ishare(void);
|
||||
void erofs_exit_ishare(void);
|
||||
bool erofs_ishare_fill_inode(struct inode *inode);
|
||||
void erofs_ishare_free_inode(struct inode *inode);
|
||||
struct inode *erofs_real_inode(struct inode *inode, bool *need_iput);
|
||||
#else
|
||||
static inline int erofs_init_ishare(void) { return 0; }
|
||||
static inline void erofs_exit_ishare(void) {}
|
||||
static inline bool erofs_ishare_fill_inode(struct inode *inode) { return false; }
|
||||
static inline void erofs_ishare_free_inode(struct inode *inode) {}
|
||||
static inline struct inode *erofs_real_inode(struct inode *inode, bool *need_iput)
|
||||
{
|
||||
*need_iput = false;
|
||||
return inode;
|
||||
}
|
||||
#endif
|
||||
|
||||
long erofs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
|
||||
long erofs_compat_ioctl(struct file *filp, unsigned int cmd,
|
||||
unsigned long arg);
|
||||
|
|
|
|||
206
fs/erofs/ishare.c
Normal file
206
fs/erofs/ishare.c
Normal file
|
|
@ -0,0 +1,206 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2024, Alibaba Cloud
|
||||
*/
|
||||
#include <linux/xxhash.h>
|
||||
#include <linux/mount.h>
|
||||
#include "internal.h"
|
||||
#include "xattr.h"
|
||||
|
||||
#include "../internal.h"
|
||||
|
||||
static struct vfsmount *erofs_ishare_mnt;
|
||||
|
||||
static inline bool erofs_is_ishare_inode(struct inode *inode)
|
||||
{
|
||||
/* assumed FS_ONDEMAND is excluded with FS_PAGE_CACHE_SHARE feature */
|
||||
return inode->i_sb->s_type == &erofs_anon_fs_type;
|
||||
}
|
||||
|
||||
static int erofs_ishare_iget5_eq(struct inode *inode, void *data)
|
||||
{
|
||||
struct erofs_inode_fingerprint *fp1 = &EROFS_I(inode)->fingerprint;
|
||||
struct erofs_inode_fingerprint *fp2 = data;
|
||||
|
||||
return fp1->size == fp2->size &&
|
||||
!memcmp(fp1->opaque, fp2->opaque, fp2->size);
|
||||
}
|
||||
|
||||
static int erofs_ishare_iget5_set(struct inode *inode, void *data)
|
||||
{
|
||||
struct erofs_inode *vi = EROFS_I(inode);
|
||||
|
||||
vi->fingerprint = *(struct erofs_inode_fingerprint *)data;
|
||||
INIT_LIST_HEAD(&vi->ishare_list);
|
||||
spin_lock_init(&vi->ishare_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool erofs_ishare_fill_inode(struct inode *inode)
|
||||
{
|
||||
struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
|
||||
struct erofs_inode *vi = EROFS_I(inode);
|
||||
struct erofs_inode_fingerprint fp;
|
||||
struct inode *sharedinode;
|
||||
unsigned long hash;
|
||||
|
||||
if (erofs_xattr_fill_inode_fingerprint(&fp, inode, sbi->domain_id))
|
||||
return false;
|
||||
hash = xxh32(fp.opaque, fp.size, 0);
|
||||
sharedinode = iget5_locked(erofs_ishare_mnt->mnt_sb, hash,
|
||||
erofs_ishare_iget5_eq, erofs_ishare_iget5_set,
|
||||
&fp);
|
||||
if (!sharedinode) {
|
||||
kfree(fp.opaque);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (inode_state_read_once(sharedinode) & I_NEW) {
|
||||
if (erofs_inode_set_aops(sharedinode, inode, true)) {
|
||||
iget_failed(sharedinode);
|
||||
kfree(fp.opaque);
|
||||
return false;
|
||||
}
|
||||
sharedinode->i_size = vi->vfs_inode.i_size;
|
||||
unlock_new_inode(sharedinode);
|
||||
} else {
|
||||
kfree(fp.opaque);
|
||||
if (sharedinode->i_size != vi->vfs_inode.i_size) {
|
||||
_erofs_printk(inode->i_sb, KERN_WARNING
|
||||
"size(%lld:%lld) not matches for the same fingerprint\n",
|
||||
vi->vfs_inode.i_size, sharedinode->i_size);
|
||||
iput(sharedinode);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
vi->sharedinode = sharedinode;
|
||||
INIT_LIST_HEAD(&vi->ishare_list);
|
||||
spin_lock(&EROFS_I(sharedinode)->ishare_lock);
|
||||
list_add(&vi->ishare_list, &EROFS_I(sharedinode)->ishare_list);
|
||||
spin_unlock(&EROFS_I(sharedinode)->ishare_lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
void erofs_ishare_free_inode(struct inode *inode)
|
||||
{
|
||||
struct erofs_inode *vi = EROFS_I(inode);
|
||||
struct inode *sharedinode = vi->sharedinode;
|
||||
|
||||
if (!sharedinode)
|
||||
return;
|
||||
spin_lock(&EROFS_I(sharedinode)->ishare_lock);
|
||||
list_del(&vi->ishare_list);
|
||||
spin_unlock(&EROFS_I(sharedinode)->ishare_lock);
|
||||
iput(sharedinode);
|
||||
vi->sharedinode = NULL;
|
||||
}
|
||||
|
||||
static int erofs_ishare_file_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct inode *sharedinode = EROFS_I(inode)->sharedinode;
|
||||
struct file *realfile;
|
||||
|
||||
if (file->f_flags & O_DIRECT)
|
||||
return -EINVAL;
|
||||
realfile = alloc_empty_backing_file(O_RDONLY|O_NOATIME, current_cred());
|
||||
if (IS_ERR(realfile))
|
||||
return PTR_ERR(realfile);
|
||||
ihold(sharedinode);
|
||||
realfile->f_op = &erofs_file_fops;
|
||||
realfile->f_inode = sharedinode;
|
||||
realfile->f_mapping = sharedinode->i_mapping;
|
||||
path_get(&file->f_path);
|
||||
backing_file_set_user_path(realfile, &file->f_path);
|
||||
|
||||
file_ra_state_init(&realfile->f_ra, file->f_mapping);
|
||||
realfile->private_data = EROFS_I(inode);
|
||||
file->private_data = realfile;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int erofs_ishare_file_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct file *realfile = file->private_data;
|
||||
|
||||
iput(realfile->f_inode);
|
||||
fput(realfile);
|
||||
file->private_data = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t erofs_ishare_file_read_iter(struct kiocb *iocb,
|
||||
struct iov_iter *to)
|
||||
{
|
||||
struct file *realfile = iocb->ki_filp->private_data;
|
||||
struct kiocb dedup_iocb;
|
||||
ssize_t nread;
|
||||
|
||||
if (!iov_iter_count(to))
|
||||
return 0;
|
||||
kiocb_clone(&dedup_iocb, iocb, realfile);
|
||||
nread = filemap_read(&dedup_iocb, to, 0);
|
||||
iocb->ki_pos = dedup_iocb.ki_pos;
|
||||
return nread;
|
||||
}
|
||||
|
||||
static int erofs_ishare_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
struct file *realfile = file->private_data;
|
||||
|
||||
vma_set_file(vma, realfile);
|
||||
return generic_file_readonly_mmap(file, vma);
|
||||
}
|
||||
|
||||
static int erofs_ishare_fadvise(struct file *file, loff_t offset,
|
||||
loff_t len, int advice)
|
||||
{
|
||||
return vfs_fadvise(file->private_data, offset, len, advice);
|
||||
}
|
||||
|
||||
const struct file_operations erofs_ishare_fops = {
|
||||
.open = erofs_ishare_file_open,
|
||||
.llseek = generic_file_llseek,
|
||||
.read_iter = erofs_ishare_file_read_iter,
|
||||
.mmap = erofs_ishare_mmap,
|
||||
.release = erofs_ishare_file_release,
|
||||
.get_unmapped_area = thp_get_unmapped_area,
|
||||
.splice_read = filemap_splice_read,
|
||||
.fadvise = erofs_ishare_fadvise,
|
||||
};
|
||||
|
||||
struct inode *erofs_real_inode(struct inode *inode, bool *need_iput)
|
||||
{
|
||||
struct erofs_inode *vi, *vi_share;
|
||||
struct inode *realinode;
|
||||
|
||||
*need_iput = false;
|
||||
if (!erofs_is_ishare_inode(inode))
|
||||
return inode;
|
||||
|
||||
vi_share = EROFS_I(inode);
|
||||
spin_lock(&vi_share->ishare_lock);
|
||||
/* fetch any one as real inode */
|
||||
DBG_BUGON(list_empty(&vi_share->ishare_list));
|
||||
list_for_each_entry(vi, &vi_share->ishare_list, ishare_list) {
|
||||
realinode = igrab(&vi->vfs_inode);
|
||||
if (realinode) {
|
||||
*need_iput = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock(&vi_share->ishare_lock);
|
||||
|
||||
DBG_BUGON(!realinode);
|
||||
return realinode;
|
||||
}
|
||||
|
||||
int __init erofs_init_ishare(void)
|
||||
{
|
||||
erofs_ishare_mnt = kern_mount(&erofs_anon_fs_type);
|
||||
return PTR_ERR_OR_ZERO(erofs_ishare_mnt);
|
||||
}
|
||||
|
||||
void erofs_exit_ishare(void)
|
||||
{
|
||||
kern_unmount(erofs_ishare_mnt);
|
||||
}
|
||||
142
fs/erofs/super.c
142
fs/erofs/super.c
|
|
@ -11,6 +11,7 @@
|
|||
#include <linux/fs_parser.h>
|
||||
#include <linux/exportfs.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/pseudo_fs.h>
|
||||
#include "xattr.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
|
|
@ -121,18 +122,6 @@ void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf,
|
|||
return buffer;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_EROFS_FS_ZIP
|
||||
static int z_erofs_parse_cfgs(struct super_block *sb,
|
||||
struct erofs_super_block *dsb)
|
||||
{
|
||||
if (!dsb->u1.available_compr_algs)
|
||||
return 0;
|
||||
|
||||
erofs_err(sb, "compression disabled, unable to mount compressed EROFS");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
|
||||
struct erofs_device_info *dif, erofs_off_t *pos)
|
||||
{
|
||||
|
|
@ -319,6 +308,15 @@ static int erofs_read_superblock(struct super_block *sb)
|
|||
sbi->xattr_prefix_start = le32_to_cpu(dsb->xattr_prefix_start);
|
||||
sbi->xattr_prefix_count = dsb->xattr_prefix_count;
|
||||
sbi->xattr_filter_reserved = dsb->xattr_filter_reserved;
|
||||
if (erofs_sb_has_ishare_xattrs(sbi)) {
|
||||
if (dsb->ishare_xattr_prefix_id >= sbi->xattr_prefix_count) {
|
||||
erofs_err(sb, "invalid ishare xattr prefix id %u",
|
||||
dsb->ishare_xattr_prefix_id);
|
||||
ret = -EFSCORRUPTED;
|
||||
goto out;
|
||||
}
|
||||
sbi->ishare_xattr_prefix_id = dsb->ishare_xattr_prefix_id;
|
||||
}
|
||||
#endif
|
||||
sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact));
|
||||
if (erofs_sb_has_48bit(sbi) && dsb->rootnid_8b) {
|
||||
|
|
@ -330,12 +328,13 @@ static int erofs_read_superblock(struct super_block *sb)
|
|||
}
|
||||
sbi->packed_nid = le64_to_cpu(dsb->packed_nid);
|
||||
if (erofs_sb_has_metabox(sbi)) {
|
||||
ret = -EFSCORRUPTED;
|
||||
if (sbi->sb_size <= offsetof(struct erofs_super_block,
|
||||
metabox_nid))
|
||||
return -EFSCORRUPTED;
|
||||
goto out;
|
||||
sbi->metabox_nid = le64_to_cpu(dsb->metabox_nid);
|
||||
if (sbi->metabox_nid & BIT_ULL(EROFS_DIRENT_NID_METABOX_BIT))
|
||||
return -EFSCORRUPTED; /* self-loop detection */
|
||||
goto out; /* self-loop detection */
|
||||
}
|
||||
sbi->inos = le64_to_cpu(dsb->inos);
|
||||
|
||||
|
|
@ -346,14 +345,22 @@ static int erofs_read_superblock(struct super_block *sb)
|
|||
if (dsb->volume_name[0]) {
|
||||
sbi->volume_name = kstrndup(dsb->volume_name,
|
||||
sizeof(dsb->volume_name), GFP_KERNEL);
|
||||
if (!sbi->volume_name)
|
||||
return -ENOMEM;
|
||||
if (!sbi->volume_name) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* parse on-disk compression configurations */
|
||||
ret = z_erofs_parse_cfgs(sb, dsb);
|
||||
if (ret < 0)
|
||||
if (IS_ENABLED(CONFIG_EROFS_FS_ZIP)) {
|
||||
ret = z_erofs_parse_cfgs(sb, dsb);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
} else if (dsb->u1.available_compr_algs ||
|
||||
erofs_sb_has_lz4_0padding(sbi)) {
|
||||
erofs_err(sb, "compression disabled, unable to mount compressed EROFS");
|
||||
ret = -EOPNOTSUPP;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = erofs_scan_devices(sb, dsb);
|
||||
|
||||
|
|
@ -372,20 +379,18 @@ static void erofs_default_options(struct erofs_sb_info *sbi)
|
|||
{
|
||||
#ifdef CONFIG_EROFS_FS_ZIP
|
||||
sbi->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND;
|
||||
sbi->opt.max_sync_decompress_pages = 3;
|
||||
sbi->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO;
|
||||
#endif
|
||||
#ifdef CONFIG_EROFS_FS_XATTR
|
||||
set_opt(&sbi->opt, XATTR_USER);
|
||||
#endif
|
||||
#ifdef CONFIG_EROFS_FS_POSIX_ACL
|
||||
set_opt(&sbi->opt, POSIX_ACL);
|
||||
sbi->sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO;
|
||||
#endif
|
||||
if (IS_ENABLED(CONFIG_EROFS_FS_XATTR))
|
||||
set_opt(&sbi->opt, XATTR_USER);
|
||||
if (IS_ENABLED(CONFIG_EROFS_FS_POSIX_ACL))
|
||||
set_opt(&sbi->opt, POSIX_ACL);
|
||||
}
|
||||
|
||||
enum {
|
||||
Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum,
|
||||
Opt_device, Opt_fsid, Opt_domain_id, Opt_directio, Opt_fsoffset,
|
||||
Opt_inode_share,
|
||||
};
|
||||
|
||||
static const struct constant_table erofs_param_cache_strategy[] = {
|
||||
|
|
@ -413,6 +418,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
|
|||
fsparam_string("domain_id", Opt_domain_id),
|
||||
fsparam_flag_no("directio", Opt_directio),
|
||||
fsparam_u64("fsoffset", Opt_fsoffset),
|
||||
fsparam_flag("inode_share", Opt_inode_share),
|
||||
{}
|
||||
};
|
||||
|
||||
|
|
@ -514,11 +520,11 @@ static int erofs_fc_parse_param(struct fs_context *fc,
|
|||
if (!sbi->fsid)
|
||||
return -ENOMEM;
|
||||
break;
|
||||
#endif
|
||||
#if defined(CONFIG_EROFS_FS_ONDEMAND) || defined(CONFIG_EROFS_FS_PAGE_CACHE_SHARE)
|
||||
case Opt_domain_id:
|
||||
kfree(sbi->domain_id);
|
||||
sbi->domain_id = kstrdup(param->string, GFP_KERNEL);
|
||||
if (!sbi->domain_id)
|
||||
return -ENOMEM;
|
||||
kfree_sensitive(sbi->domain_id);
|
||||
sbi->domain_id = no_free_ptr(param->string);
|
||||
break;
|
||||
#else
|
||||
case Opt_fsid:
|
||||
|
|
@ -539,6 +545,13 @@ static int erofs_fc_parse_param(struct fs_context *fc,
|
|||
case Opt_fsoffset:
|
||||
sbi->dif0.fsoff = result.uint_64;
|
||||
break;
|
||||
case Opt_inode_share:
|
||||
#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
|
||||
set_opt(&sbi->opt, INODE_SHARE);
|
||||
#else
|
||||
errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -614,7 +627,7 @@ static void erofs_set_sysfs_name(struct super_block *sb)
|
|||
{
|
||||
struct erofs_sb_info *sbi = EROFS_SB(sb);
|
||||
|
||||
if (sbi->domain_id)
|
||||
if (sbi->domain_id && sbi->fsid)
|
||||
super_set_sysfs_name_generic(sb, "%s,%s", sbi->domain_id,
|
||||
sbi->fsid);
|
||||
else if (sbi->fsid)
|
||||
|
|
@ -637,6 +650,15 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
|
|||
sb->s_maxbytes = MAX_LFS_FILESIZE;
|
||||
sb->s_op = &erofs_sops;
|
||||
|
||||
if (!sbi->domain_id && test_opt(&sbi->opt, INODE_SHARE)) {
|
||||
errorfc(fc, "domain_id is needed when inode_ishare is on");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (test_opt(&sbi->opt, DAX_ALWAYS) && test_opt(&sbi->opt, INODE_SHARE)) {
|
||||
errorfc(fc, "FSDAX is not allowed when inode_ishare is on");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
sbi->blkszbits = PAGE_SHIFT;
|
||||
if (!sb->s_bdev) {
|
||||
/*
|
||||
|
|
@ -714,6 +736,12 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
|
|||
erofs_info(sb, "unsupported blocksize for DAX");
|
||||
clear_opt(&sbi->opt, DAX_ALWAYS);
|
||||
}
|
||||
if (test_opt(&sbi->opt, INODE_SHARE) && !erofs_sb_has_ishare_xattrs(sbi)) {
|
||||
erofs_info(sb, "on-disk ishare xattrs not found. Turning off inode_share.");
|
||||
clear_opt(&sbi->opt, INODE_SHARE);
|
||||
}
|
||||
if (test_opt(&sbi->opt, INODE_SHARE))
|
||||
erofs_info(sb, "EXPERIMENTAL EROFS page cache share support in use. Use at your own risk!");
|
||||
|
||||
sb->s_time_gran = 1;
|
||||
sb->s_xattr = erofs_xattr_handlers;
|
||||
|
|
@ -849,7 +877,7 @@ static void erofs_sb_free(struct erofs_sb_info *sbi)
|
|||
{
|
||||
erofs_free_dev_context(sbi->devs);
|
||||
kfree(sbi->fsid);
|
||||
kfree(sbi->domain_id);
|
||||
kfree_sensitive(sbi->domain_id);
|
||||
if (sbi->dif0.file)
|
||||
fput(sbi->dif0.file);
|
||||
kfree(sbi->volume_name);
|
||||
|
|
@ -943,6 +971,41 @@ static struct file_system_type erofs_fs_type = {
|
|||
};
|
||||
MODULE_ALIAS_FS("erofs");
|
||||
|
||||
#if defined(CONFIG_EROFS_FS_ONDEMAND) || defined(CONFIG_EROFS_FS_PAGE_CACHE_SHARE)
|
||||
static void erofs_free_anon_inode(struct inode *inode)
|
||||
{
|
||||
struct erofs_inode *vi = EROFS_I(inode);
|
||||
|
||||
#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
|
||||
kfree(vi->fingerprint.opaque);
|
||||
#endif
|
||||
kmem_cache_free(erofs_inode_cachep, vi);
|
||||
}
|
||||
|
||||
static const struct super_operations erofs_anon_sops = {
|
||||
.alloc_inode = erofs_alloc_inode,
|
||||
.drop_inode = inode_just_drop,
|
||||
.free_inode = erofs_free_anon_inode,
|
||||
};
|
||||
|
||||
static int erofs_anon_init_fs_context(struct fs_context *fc)
|
||||
{
|
||||
struct pseudo_fs_context *ctx;
|
||||
|
||||
ctx = init_pseudo(fc, EROFS_SUPER_MAGIC);
|
||||
if (!ctx)
|
||||
return -ENOMEM;
|
||||
ctx->ops = &erofs_anon_sops;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct file_system_type erofs_anon_fs_type = {
|
||||
.name = "pseudo_erofs",
|
||||
.init_fs_context = erofs_anon_init_fs_context,
|
||||
.kill_sb = kill_anon_super,
|
||||
};
|
||||
#endif
|
||||
|
||||
static int __init erofs_module_init(void)
|
||||
{
|
||||
int err;
|
||||
|
|
@ -968,6 +1031,10 @@ static int __init erofs_module_init(void)
|
|||
if (err)
|
||||
goto sysfs_err;
|
||||
|
||||
err = erofs_init_ishare();
|
||||
if (err)
|
||||
goto ishare_err;
|
||||
|
||||
err = register_filesystem(&erofs_fs_type);
|
||||
if (err)
|
||||
goto fs_err;
|
||||
|
|
@ -975,6 +1042,8 @@ static int __init erofs_module_init(void)
|
|||
return 0;
|
||||
|
||||
fs_err:
|
||||
erofs_exit_ishare();
|
||||
ishare_err:
|
||||
erofs_exit_sysfs();
|
||||
sysfs_err:
|
||||
z_erofs_exit_subsystem();
|
||||
|
|
@ -992,6 +1061,7 @@ static void __exit erofs_module_exit(void)
|
|||
/* Ensure all RCU free inodes / pclusters are safe to be destroyed. */
|
||||
rcu_barrier();
|
||||
|
||||
erofs_exit_ishare();
|
||||
erofs_exit_sysfs();
|
||||
z_erofs_exit_subsystem();
|
||||
erofs_exit_shrinker();
|
||||
|
|
@ -1046,16 +1116,16 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
|
|||
#endif
|
||||
if (sbi->dif0.fsoff)
|
||||
seq_printf(seq, ",fsoffset=%llu", sbi->dif0.fsoff);
|
||||
if (test_opt(opt, INODE_SHARE))
|
||||
seq_puts(seq, ",inode_share");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void erofs_evict_inode(struct inode *inode)
|
||||
{
|
||||
#ifdef CONFIG_FS_DAX
|
||||
if (IS_DAX(inode))
|
||||
dax_break_layout_final(inode);
|
||||
#endif
|
||||
|
||||
erofs_ishare_free_inode(inode);
|
||||
truncate_inode_pages_final(&inode->i_data);
|
||||
clear_inode(inode);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ static struct erofs_attr erofs_attr_##_name = { \
|
|||
#define ATTR_LIST(name) (&erofs_attr_##name.attr)
|
||||
|
||||
#ifdef CONFIG_EROFS_FS_ZIP
|
||||
EROFS_ATTR_RW_UI(sync_decompress, erofs_mount_opts);
|
||||
EROFS_ATTR_RW_UI(sync_decompress, erofs_sb_info);
|
||||
EROFS_ATTR_FUNC(drop_caches, 0200);
|
||||
#endif
|
||||
#ifdef CONFIG_EROFS_FS_ZIP_ACCEL
|
||||
|
|
@ -86,7 +86,6 @@ static struct attribute *erofs_attrs[] = {
|
|||
ATTRIBUTE_GROUPS(erofs);
|
||||
|
||||
/* Features this copy of erofs supports */
|
||||
EROFS_ATTR_FEATURE(zero_padding);
|
||||
EROFS_ATTR_FEATURE(compr_cfgs);
|
||||
EROFS_ATTR_FEATURE(big_pcluster);
|
||||
EROFS_ATTR_FEATURE(chunked_file);
|
||||
|
|
@ -100,7 +99,6 @@ EROFS_ATTR_FEATURE(48bit);
|
|||
EROFS_ATTR_FEATURE(metabox);
|
||||
|
||||
static struct attribute *erofs_feat_attrs[] = {
|
||||
ATTR_LIST(zero_padding),
|
||||
ATTR_LIST(compr_cfgs),
|
||||
ATTR_LIST(big_pcluster),
|
||||
ATTR_LIST(chunked_file),
|
||||
|
|
@ -170,11 +168,10 @@ static ssize_t erofs_attr_store(struct kobject *kobj, struct attribute *attr,
|
|||
return ret;
|
||||
if (t != (unsigned int)t)
|
||||
return -ERANGE;
|
||||
#ifdef CONFIG_EROFS_FS_ZIP
|
||||
if (!strcmp(a->attr.name, "sync_decompress") &&
|
||||
if (IS_ENABLED(CONFIG_EROFS_FS_ZIP) &&
|
||||
!strcmp(a->attr.name, "sync_decompress") &&
|
||||
(t > EROFS_SYNC_DECOMPRESS_FORCE_OFF))
|
||||
return -EINVAL;
|
||||
#endif
|
||||
*(unsigned int *)ptr = t;
|
||||
return len;
|
||||
case attr_pointer_bool:
|
||||
|
|
|
|||
266
fs/erofs/xattr.c
266
fs/erofs/xattr.c
|
|
@ -25,15 +25,22 @@ struct erofs_xattr_iter {
|
|||
struct dentry *dentry;
|
||||
};
|
||||
|
||||
static const char *erofs_xattr_prefix(unsigned int idx, struct dentry *dentry);
|
||||
|
||||
static int erofs_init_inode_xattrs(struct inode *inode)
|
||||
{
|
||||
struct erofs_inode *const vi = EROFS_I(inode);
|
||||
struct erofs_xattr_iter it;
|
||||
unsigned int i;
|
||||
struct erofs_xattr_ibody_header *ih;
|
||||
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
|
||||
struct erofs_inode *vi = EROFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
const struct erofs_xattr_ibody_header *ih;
|
||||
__le32 *xattr_id;
|
||||
erofs_off_t pos;
|
||||
unsigned int i;
|
||||
int ret = 0;
|
||||
|
||||
if (!vi->xattr_isize)
|
||||
return -ENODATA;
|
||||
|
||||
/* the most case is that xattrs of this inode are initialized. */
|
||||
if (test_bit(EROFS_I_EA_INITED_BIT, &vi->flags)) {
|
||||
/*
|
||||
|
|
@ -43,7 +50,6 @@ static int erofs_init_inode_xattrs(struct inode *inode)
|
|||
smp_mb();
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_XATTR_BIT, TASK_KILLABLE))
|
||||
return -ERESTARTSYS;
|
||||
|
||||
|
|
@ -60,139 +66,69 @@ static int erofs_init_inode_xattrs(struct inode *inode)
|
|||
* undefined right now (maybe use later with some new sb feature).
|
||||
*/
|
||||
if (vi->xattr_isize == sizeof(struct erofs_xattr_ibody_header)) {
|
||||
erofs_err(sb,
|
||||
"xattr_isize %d of nid %llu is not supported yet",
|
||||
erofs_err(sb, "xattr_isize %d of nid %llu is not supported yet",
|
||||
vi->xattr_isize, vi->nid);
|
||||
ret = -EOPNOTSUPP;
|
||||
goto out_unlock;
|
||||
} else if (vi->xattr_isize < sizeof(struct erofs_xattr_ibody_header)) {
|
||||
if (vi->xattr_isize) {
|
||||
erofs_err(sb, "bogus xattr ibody @ nid %llu", vi->nid);
|
||||
DBG_BUGON(1);
|
||||
ret = -EFSCORRUPTED;
|
||||
goto out_unlock; /* xattr ondisk layout error */
|
||||
}
|
||||
ret = -ENODATA;
|
||||
erofs_err(sb, "bogus xattr ibody @ nid %llu", vi->nid);
|
||||
DBG_BUGON(1);
|
||||
ret = -EFSCORRUPTED;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
it.buf = __EROFS_BUF_INITIALIZER;
|
||||
ret = erofs_init_metabuf(&it.buf, sb, erofs_inode_in_metabox(inode));
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
it.pos = erofs_iloc(inode) + vi->inode_isize;
|
||||
|
||||
/* read in shared xattr array (non-atomic, see kmalloc below) */
|
||||
it.kaddr = erofs_bread(&it.buf, it.pos, true);
|
||||
if (IS_ERR(it.kaddr)) {
|
||||
ret = PTR_ERR(it.kaddr);
|
||||
pos = erofs_iloc(inode) + vi->inode_isize;
|
||||
ih = erofs_read_metabuf(&buf, sb, pos, erofs_inode_in_metabox(inode));
|
||||
if (IS_ERR(ih)) {
|
||||
ret = PTR_ERR(ih);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ih = it.kaddr;
|
||||
vi->xattr_name_filter = le32_to_cpu(ih->h_name_filter);
|
||||
vi->xattr_shared_count = ih->h_shared_count;
|
||||
vi->xattr_shared_xattrs = kmalloc_array(vi->xattr_shared_count,
|
||||
sizeof(uint), GFP_KERNEL);
|
||||
if (!vi->xattr_shared_xattrs) {
|
||||
erofs_put_metabuf(&it.buf);
|
||||
erofs_put_metabuf(&buf);
|
||||
ret = -ENOMEM;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* let's skip ibody header */
|
||||
it.pos += sizeof(struct erofs_xattr_ibody_header);
|
||||
|
||||
/* skip the ibody header and read the shared xattr array */
|
||||
pos += sizeof(struct erofs_xattr_ibody_header);
|
||||
for (i = 0; i < vi->xattr_shared_count; ++i) {
|
||||
it.kaddr = erofs_bread(&it.buf, it.pos, true);
|
||||
if (IS_ERR(it.kaddr)) {
|
||||
xattr_id = erofs_bread(&buf, pos + i * sizeof(__le32), true);
|
||||
if (IS_ERR(xattr_id)) {
|
||||
kfree(vi->xattr_shared_xattrs);
|
||||
vi->xattr_shared_xattrs = NULL;
|
||||
ret = PTR_ERR(it.kaddr);
|
||||
ret = PTR_ERR(xattr_id);
|
||||
goto out_unlock;
|
||||
}
|
||||
vi->xattr_shared_xattrs[i] = le32_to_cpu(*(__le32 *)it.kaddr);
|
||||
it.pos += sizeof(__le32);
|
||||
vi->xattr_shared_xattrs[i] = le32_to_cpu(*xattr_id);
|
||||
}
|
||||
erofs_put_metabuf(&it.buf);
|
||||
erofs_put_metabuf(&buf);
|
||||
|
||||
/* paired with smp_mb() at the beginning of the function. */
|
||||
smp_mb();
|
||||
set_bit(EROFS_I_EA_INITED_BIT, &vi->flags);
|
||||
|
||||
out_unlock:
|
||||
clear_and_wake_up_bit(EROFS_I_BL_XATTR_BIT, &vi->flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool erofs_xattr_user_list(struct dentry *dentry)
|
||||
{
|
||||
return test_opt(&EROFS_SB(dentry->d_sb)->opt, XATTR_USER);
|
||||
}
|
||||
|
||||
static bool erofs_xattr_trusted_list(struct dentry *dentry)
|
||||
{
|
||||
return capable(CAP_SYS_ADMIN);
|
||||
}
|
||||
|
||||
static int erofs_xattr_generic_get(const struct xattr_handler *handler,
|
||||
struct dentry *unused, struct inode *inode,
|
||||
const char *name, void *buffer, size_t size)
|
||||
{
|
||||
if (handler->flags == EROFS_XATTR_INDEX_USER &&
|
||||
!test_opt(&EROFS_I_SB(inode)->opt, XATTR_USER))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return erofs_getxattr(inode, handler->flags, name, buffer, size);
|
||||
}
|
||||
|
||||
const struct xattr_handler erofs_xattr_user_handler = {
|
||||
.prefix = XATTR_USER_PREFIX,
|
||||
.flags = EROFS_XATTR_INDEX_USER,
|
||||
.list = erofs_xattr_user_list,
|
||||
.get = erofs_xattr_generic_get,
|
||||
};
|
||||
|
||||
const struct xattr_handler erofs_xattr_trusted_handler = {
|
||||
.prefix = XATTR_TRUSTED_PREFIX,
|
||||
.flags = EROFS_XATTR_INDEX_TRUSTED,
|
||||
.list = erofs_xattr_trusted_list,
|
||||
.get = erofs_xattr_generic_get,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_EROFS_FS_SECURITY
|
||||
const struct xattr_handler __maybe_unused erofs_xattr_security_handler = {
|
||||
.prefix = XATTR_SECURITY_PREFIX,
|
||||
.flags = EROFS_XATTR_INDEX_SECURITY,
|
||||
.get = erofs_xattr_generic_get,
|
||||
};
|
||||
#endif
|
||||
|
||||
const struct xattr_handler * const erofs_xattr_handlers[] = {
|
||||
&erofs_xattr_user_handler,
|
||||
&erofs_xattr_trusted_handler,
|
||||
#ifdef CONFIG_EROFS_FS_SECURITY
|
||||
&erofs_xattr_security_handler,
|
||||
#endif
|
||||
NULL,
|
||||
};
|
||||
|
||||
static int erofs_xattr_copy_to_buffer(struct erofs_xattr_iter *it,
|
||||
unsigned int len)
|
||||
{
|
||||
unsigned int slice, processed;
|
||||
struct super_block *sb = it->sb;
|
||||
void *src;
|
||||
|
||||
for (processed = 0; processed < len; processed += slice) {
|
||||
it->kaddr = erofs_bread(&it->buf, it->pos, true);
|
||||
if (IS_ERR(it->kaddr))
|
||||
return PTR_ERR(it->kaddr);
|
||||
|
||||
src = it->kaddr;
|
||||
slice = min_t(unsigned int, sb->s_blocksize -
|
||||
erofs_blkoff(sb, it->pos), len - processed);
|
||||
memcpy(it->buffer + it->buffer_ofs, src, slice);
|
||||
memcpy(it->buffer + it->buffer_ofs, it->kaddr, slice);
|
||||
it->buffer_ofs += slice;
|
||||
it->pos += slice;
|
||||
}
|
||||
|
|
@ -391,8 +327,8 @@ static int erofs_xattr_iter_shared(struct erofs_xattr_iter *it,
|
|||
return i ? ret : -ENODATA;
|
||||
}
|
||||
|
||||
int erofs_getxattr(struct inode *inode, int index, const char *name,
|
||||
void *buffer, size_t buffer_size)
|
||||
static int erofs_getxattr(struct inode *inode, int index, const char *name,
|
||||
void *buffer, size_t buffer_size)
|
||||
{
|
||||
int ret;
|
||||
unsigned int hashbit;
|
||||
|
|
@ -462,6 +398,81 @@ ssize_t erofs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
|
|||
return ret ? ret : it.buffer_ofs;
|
||||
}
|
||||
|
||||
static bool erofs_xattr_user_list(struct dentry *dentry)
|
||||
{
|
||||
return test_opt(&EROFS_SB(dentry->d_sb)->opt, XATTR_USER);
|
||||
}
|
||||
|
||||
static bool erofs_xattr_trusted_list(struct dentry *dentry)
|
||||
{
|
||||
return capable(CAP_SYS_ADMIN);
|
||||
}
|
||||
|
||||
static int erofs_xattr_generic_get(const struct xattr_handler *handler,
|
||||
struct dentry *unused, struct inode *inode,
|
||||
const char *name, void *buffer, size_t size)
|
||||
{
|
||||
if (handler->flags == EROFS_XATTR_INDEX_USER &&
|
||||
!test_opt(&EROFS_I_SB(inode)->opt, XATTR_USER))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return erofs_getxattr(inode, handler->flags, name, buffer, size);
|
||||
}
|
||||
|
||||
static const struct xattr_handler erofs_xattr_user_handler = {
|
||||
.prefix = XATTR_USER_PREFIX,
|
||||
.flags = EROFS_XATTR_INDEX_USER,
|
||||
.list = erofs_xattr_user_list,
|
||||
.get = erofs_xattr_generic_get,
|
||||
};
|
||||
|
||||
static const struct xattr_handler erofs_xattr_trusted_handler = {
|
||||
.prefix = XATTR_TRUSTED_PREFIX,
|
||||
.flags = EROFS_XATTR_INDEX_TRUSTED,
|
||||
.list = erofs_xattr_trusted_list,
|
||||
.get = erofs_xattr_generic_get,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_EROFS_FS_SECURITY
|
||||
static const struct xattr_handler erofs_xattr_security_handler = {
|
||||
.prefix = XATTR_SECURITY_PREFIX,
|
||||
.flags = EROFS_XATTR_INDEX_SECURITY,
|
||||
.get = erofs_xattr_generic_get,
|
||||
};
|
||||
#endif
|
||||
|
||||
const struct xattr_handler * const erofs_xattr_handlers[] = {
|
||||
&erofs_xattr_user_handler,
|
||||
&erofs_xattr_trusted_handler,
|
||||
#ifdef CONFIG_EROFS_FS_SECURITY
|
||||
&erofs_xattr_security_handler,
|
||||
#endif
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const char *erofs_xattr_prefix(unsigned int idx, struct dentry *dentry)
|
||||
{
|
||||
static const struct xattr_handler * const xattr_handler_map[] = {
|
||||
[EROFS_XATTR_INDEX_USER] = &erofs_xattr_user_handler,
|
||||
#ifdef CONFIG_EROFS_FS_POSIX_ACL
|
||||
[EROFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &nop_posix_acl_access,
|
||||
[EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &nop_posix_acl_default,
|
||||
#endif
|
||||
[EROFS_XATTR_INDEX_TRUSTED] = &erofs_xattr_trusted_handler,
|
||||
#ifdef CONFIG_EROFS_FS_SECURITY
|
||||
[EROFS_XATTR_INDEX_SECURITY] = &erofs_xattr_security_handler,
|
||||
#endif
|
||||
};
|
||||
const struct xattr_handler *handler = NULL;
|
||||
|
||||
if (idx && idx < ARRAY_SIZE(xattr_handler_map)) {
|
||||
handler = xattr_handler_map[idx];
|
||||
if (xattr_handler_can_list(handler, dentry))
|
||||
return xattr_prefix(handler);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void erofs_xattr_prefixes_cleanup(struct super_block *sb)
|
||||
{
|
||||
struct erofs_sb_info *sbi = EROFS_SB(sb);
|
||||
|
|
@ -519,6 +530,19 @@ int erofs_xattr_prefixes_init(struct super_block *sb)
|
|||
}
|
||||
|
||||
erofs_put_metabuf(&buf);
|
||||
if (!ret && erofs_sb_has_ishare_xattrs(sbi)) {
|
||||
struct erofs_xattr_prefix_item *pf = pfs + sbi->ishare_xattr_prefix_id;
|
||||
struct erofs_xattr_long_prefix *newpfx;
|
||||
|
||||
newpfx = krealloc(pf->prefix,
|
||||
sizeof(*newpfx) + pf->infix_len + 1, GFP_KERNEL);
|
||||
if (newpfx) {
|
||||
newpfx->infix[pf->infix_len] = '\0';
|
||||
pf->prefix = newpfx;
|
||||
} else {
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
}
|
||||
sbi->xattr_prefixes = pfs;
|
||||
if (ret)
|
||||
erofs_xattr_prefixes_cleanup(sb);
|
||||
|
|
@ -563,4 +587,58 @@ struct posix_acl *erofs_get_acl(struct inode *inode, int type, bool rcu)
|
|||
kfree(value);
|
||||
return acl;
|
||||
}
|
||||
|
||||
bool erofs_inode_has_noacl(struct inode *inode, void *kaddr, unsigned int ofs)
|
||||
{
|
||||
static const unsigned int bitmask =
|
||||
BIT(21) | /* system.posix_acl_default */
|
||||
BIT(30); /* system.posix_acl_access */
|
||||
struct erofs_sb_info *sbi = EROFS_I_SB(inode);
|
||||
const struct erofs_xattr_ibody_header *ih = kaddr + ofs;
|
||||
|
||||
if (EROFS_I(inode)->xattr_isize < sizeof(*ih))
|
||||
return true;
|
||||
|
||||
if (erofs_sb_has_xattr_filter(sbi) && !sbi->xattr_filter_reserved &&
|
||||
!check_add_overflow(ofs, sizeof(*ih), &ofs) &&
|
||||
ofs <= i_blocksize(inode)) {
|
||||
if ((le32_to_cpu(ih->h_name_filter) & bitmask) == bitmask)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
|
||||
int erofs_xattr_fill_inode_fingerprint(struct erofs_inode_fingerprint *fp,
|
||||
struct inode *inode, const char *domain_id)
|
||||
{
|
||||
struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
|
||||
struct erofs_xattr_prefix_item *prefix;
|
||||
const char *infix;
|
||||
int valuelen, base_index;
|
||||
|
||||
if (!test_opt(&sbi->opt, INODE_SHARE))
|
||||
return -EOPNOTSUPP;
|
||||
if (!sbi->xattr_prefixes)
|
||||
return -EINVAL;
|
||||
prefix = sbi->xattr_prefixes + sbi->ishare_xattr_prefix_id;
|
||||
infix = prefix->prefix->infix;
|
||||
base_index = prefix->prefix->base_index;
|
||||
valuelen = erofs_getxattr(inode, base_index, infix, NULL, 0);
|
||||
if (valuelen <= 0 || valuelen > (1 << sbi->blkszbits))
|
||||
return -EFSCORRUPTED;
|
||||
fp->size = valuelen + (domain_id ? strlen(domain_id) : 0);
|
||||
fp->opaque = kmalloc(fp->size, GFP_KERNEL);
|
||||
if (!fp->opaque)
|
||||
return -ENOMEM;
|
||||
if (valuelen != erofs_getxattr(inode, base_index, infix,
|
||||
fp->opaque, valuelen)) {
|
||||
kfree(fp->opaque);
|
||||
fp->opaque = NULL;
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
memcpy(fp->opaque + valuelen, domain_id, fp->size - valuelen);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -11,51 +11,14 @@
|
|||
#include <linux/xattr.h>
|
||||
|
||||
#ifdef CONFIG_EROFS_FS_XATTR
|
||||
extern const struct xattr_handler erofs_xattr_user_handler;
|
||||
extern const struct xattr_handler erofs_xattr_trusted_handler;
|
||||
extern const struct xattr_handler erofs_xattr_security_handler;
|
||||
|
||||
static inline const char *erofs_xattr_prefix(unsigned int idx,
|
||||
struct dentry *dentry)
|
||||
{
|
||||
const struct xattr_handler *handler = NULL;
|
||||
|
||||
static const struct xattr_handler * const xattr_handler_map[] = {
|
||||
[EROFS_XATTR_INDEX_USER] = &erofs_xattr_user_handler,
|
||||
#ifdef CONFIG_EROFS_FS_POSIX_ACL
|
||||
[EROFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &nop_posix_acl_access,
|
||||
[EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &nop_posix_acl_default,
|
||||
#endif
|
||||
[EROFS_XATTR_INDEX_TRUSTED] = &erofs_xattr_trusted_handler,
|
||||
#ifdef CONFIG_EROFS_FS_SECURITY
|
||||
[EROFS_XATTR_INDEX_SECURITY] = &erofs_xattr_security_handler,
|
||||
#endif
|
||||
};
|
||||
|
||||
if (idx && idx < ARRAY_SIZE(xattr_handler_map))
|
||||
handler = xattr_handler_map[idx];
|
||||
|
||||
if (!xattr_handler_can_list(handler, dentry))
|
||||
return NULL;
|
||||
|
||||
return xattr_prefix(handler);
|
||||
}
|
||||
|
||||
extern const struct xattr_handler * const erofs_xattr_handlers[];
|
||||
|
||||
int erofs_xattr_prefixes_init(struct super_block *sb);
|
||||
void erofs_xattr_prefixes_cleanup(struct super_block *sb);
|
||||
int erofs_getxattr(struct inode *, int, const char *, void *, size_t);
|
||||
ssize_t erofs_listxattr(struct dentry *, char *, size_t);
|
||||
#else
|
||||
static inline int erofs_xattr_prefixes_init(struct super_block *sb) { return 0; }
|
||||
static inline void erofs_xattr_prefixes_cleanup(struct super_block *sb) {}
|
||||
static inline int erofs_getxattr(struct inode *inode, int index,
|
||||
const char *name, void *buffer,
|
||||
size_t buffer_size)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
#define erofs_listxattr (NULL)
|
||||
#define erofs_xattr_handlers (NULL)
|
||||
|
|
@ -67,4 +30,7 @@ struct posix_acl *erofs_get_acl(struct inode *inode, int type, bool rcu);
|
|||
#define erofs_get_acl (NULL)
|
||||
#endif
|
||||
|
||||
int erofs_xattr_fill_inode_fingerprint(struct erofs_inode_fingerprint *fp,
|
||||
struct inode *inode, const char *domain_id);
|
||||
bool erofs_inode_has_noacl(struct inode *inode, void *kaddr, unsigned int ofs);
|
||||
#endif
|
||||
|
|
|
|||
110
fs/erofs/zdata.c
110
fs/erofs/zdata.c
|
|
@ -9,6 +9,7 @@
|
|||
#include <linux/cpuhotplug.h>
|
||||
#include <trace/events/erofs.h>
|
||||
|
||||
#define Z_EROFS_MAX_SYNC_DECOMPRESS_BYTES 12288
|
||||
#define Z_EROFS_PCLUSTER_MAX_PAGES (Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE)
|
||||
#define Z_EROFS_INLINE_BVECS 2
|
||||
|
||||
|
|
@ -493,7 +494,7 @@ enum z_erofs_pclustermode {
|
|||
};
|
||||
|
||||
struct z_erofs_frontend {
|
||||
struct inode *const inode;
|
||||
struct inode *inode, *sharedinode;
|
||||
struct erofs_map_blocks map;
|
||||
struct z_erofs_bvec_iter biter;
|
||||
|
||||
|
|
@ -508,8 +509,8 @@ struct z_erofs_frontend {
|
|||
unsigned int icur;
|
||||
};
|
||||
|
||||
#define Z_EROFS_DEFINE_FRONTEND(fe, i, ho) struct z_erofs_frontend fe = { \
|
||||
.inode = i, .head = Z_EROFS_PCLUSTER_TAIL, \
|
||||
#define Z_EROFS_DEFINE_FRONTEND(fe, i, si, ho) struct z_erofs_frontend fe = { \
|
||||
.inode = i, .sharedinode = si, .head = Z_EROFS_PCLUSTER_TAIL, \
|
||||
.mode = Z_EROFS_PCLUSTER_FOLLOWED, .headoffset = ho }
|
||||
|
||||
static bool z_erofs_should_alloc_cache(struct z_erofs_frontend *fe)
|
||||
|
|
@ -805,14 +806,26 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
|
|||
struct erofs_map_blocks *map = &fe->map;
|
||||
struct super_block *sb = fe->inode->i_sb;
|
||||
struct z_erofs_pcluster *pcl = NULL;
|
||||
void *ptr;
|
||||
void *ptr = NULL;
|
||||
int ret;
|
||||
|
||||
DBG_BUGON(fe->pcl);
|
||||
/* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous pcluster */
|
||||
DBG_BUGON(!fe->head);
|
||||
|
||||
if (!(map->m_flags & EROFS_MAP_META)) {
|
||||
if (map->m_flags & EROFS_MAP_META) {
|
||||
ret = erofs_init_metabuf(&map->buf, sb,
|
||||
erofs_inode_in_metabox(fe->inode));
|
||||
if (ret)
|
||||
return ret;
|
||||
ptr = erofs_bread(&map->buf, map->m_pa, false);
|
||||
if (IS_ERR(ptr)) {
|
||||
erofs_err(sb, "failed to read inline data %pe @ pa %llu of nid %llu",
|
||||
ptr, map->m_pa, EROFS_I(fe->inode)->nid);
|
||||
return PTR_ERR(ptr);
|
||||
}
|
||||
ptr = map->buf.page;
|
||||
} else {
|
||||
while (1) {
|
||||
rcu_read_lock();
|
||||
pcl = xa_load(&EROFS_SB(sb)->managed_pslots, map->m_pa);
|
||||
|
|
@ -852,18 +865,8 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
|
|||
/* bind cache first when cached decompression is preferred */
|
||||
z_erofs_bind_cache(fe);
|
||||
} else {
|
||||
ret = erofs_init_metabuf(&map->buf, sb,
|
||||
erofs_inode_in_metabox(fe->inode));
|
||||
if (ret)
|
||||
return ret;
|
||||
ptr = erofs_bread(&map->buf, map->m_pa, false);
|
||||
if (IS_ERR(ptr)) {
|
||||
ret = PTR_ERR(ptr);
|
||||
erofs_err(sb, "failed to get inline folio %d", ret);
|
||||
return ret;
|
||||
}
|
||||
folio_get(page_folio(map->buf.page));
|
||||
WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, map->buf.page);
|
||||
folio_get(page_folio((struct page *)ptr));
|
||||
WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, ptr);
|
||||
fe->pcl->pageofs_in = map->m_pa & ~PAGE_MASK;
|
||||
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
|
||||
}
|
||||
|
|
@ -1095,21 +1098,6 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f,
|
|||
return err;
|
||||
}
|
||||
|
||||
static bool z_erofs_is_sync_decompress(struct erofs_sb_info *sbi,
|
||||
unsigned int readahead_pages)
|
||||
{
|
||||
/* auto: enable for read_folio, disable for readahead */
|
||||
if ((sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO) &&
|
||||
!readahead_pages)
|
||||
return true;
|
||||
|
||||
if ((sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_FORCE_ON) &&
|
||||
(readahead_pages <= sbi->opt.max_sync_decompress_pages))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool z_erofs_page_is_invalidated(struct page *page)
|
||||
{
|
||||
return !page_folio(page)->mapping && !z_erofs_is_shortlived_page(page);
|
||||
|
|
@ -1324,9 +1312,10 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, bool eio)
|
|||
GFP_NOWAIT | __GFP_NORETRY
|
||||
}, be->pagepool);
|
||||
if (IS_ERR(reason)) {
|
||||
erofs_err(be->sb, "failed to decompress (%s) %ld @ pa %llu size %u => %u",
|
||||
alg->name, PTR_ERR(reason), pcl->pos,
|
||||
pcl->pclustersize, pcl->length);
|
||||
if (pcl->besteffort || reason != ERR_PTR(-ENOMEM))
|
||||
erofs_err(be->sb, "failed to decompress (%s) %pe @ pa %llu size %u => %u",
|
||||
alg->name, reason, pcl->pos,
|
||||
pcl->pclustersize, pcl->length);
|
||||
err = PTR_ERR(reason);
|
||||
} else if (unlikely(reason)) {
|
||||
erofs_err(be->sb, "failed to decompress (%s) %s @ pa %llu size %u => %u",
|
||||
|
|
@ -1483,9 +1472,9 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
|
|||
#else
|
||||
queue_work(z_erofs_workqueue, &io->u.work);
|
||||
#endif
|
||||
/* enable sync decompression for readahead */
|
||||
if (sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO)
|
||||
sbi->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON;
|
||||
/* See `sync_decompress` in sysfs-fs-erofs for more details */
|
||||
if (sbi->sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO)
|
||||
sbi->sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON;
|
||||
return;
|
||||
}
|
||||
z_erofs_decompressqueue_work(&io->u.work);
|
||||
|
|
@ -1802,16 +1791,21 @@ drain_io:
|
|||
z_erofs_decompress_kickoff(q[JQ_SUBMIT], nr_bios);
|
||||
}
|
||||
|
||||
static int z_erofs_runqueue(struct z_erofs_frontend *f, unsigned int rapages)
|
||||
static int z_erofs_runqueue(struct z_erofs_frontend *f, unsigned int rabytes)
|
||||
{
|
||||
struct z_erofs_decompressqueue io[NR_JOBQUEUES];
|
||||
struct erofs_sb_info *sbi = EROFS_I_SB(f->inode);
|
||||
bool force_fg = z_erofs_is_sync_decompress(sbi, rapages);
|
||||
int syncmode = sbi->sync_decompress;
|
||||
bool force_fg;
|
||||
int err;
|
||||
|
||||
force_fg = (syncmode == EROFS_SYNC_DECOMPRESS_AUTO && !rabytes) ||
|
||||
(syncmode == EROFS_SYNC_DECOMPRESS_FORCE_ON &&
|
||||
(rabytes <= Z_EROFS_MAX_SYNC_DECOMPRESS_BYTES));
|
||||
|
||||
if (f->head == Z_EROFS_PCLUSTER_TAIL)
|
||||
return 0;
|
||||
z_erofs_submit_queue(f, io, &force_fg, !!rapages);
|
||||
z_erofs_submit_queue(f, io, &force_fg, !!rabytes);
|
||||
|
||||
/* handle bypass queue (no i/o pclusters) immediately */
|
||||
err = z_erofs_decompress_queue(&io[JQ_BYPASS], &f->pagepool);
|
||||
|
|
@ -1866,7 +1860,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_frontend *f,
|
|||
pgoff_t index = cur >> PAGE_SHIFT;
|
||||
struct folio *folio;
|
||||
|
||||
folio = erofs_grab_folio_nowait(inode->i_mapping, index);
|
||||
folio = erofs_grab_folio_nowait(f->sharedinode->i_mapping, index);
|
||||
if (!IS_ERR_OR_NULL(folio)) {
|
||||
if (folio_test_uptodate(folio))
|
||||
folio_unlock(folio);
|
||||
|
|
@ -1883,11 +1877,13 @@ static void z_erofs_pcluster_readmore(struct z_erofs_frontend *f,
|
|||
|
||||
static int z_erofs_read_folio(struct file *file, struct folio *folio)
|
||||
{
|
||||
struct inode *const inode = folio->mapping->host;
|
||||
Z_EROFS_DEFINE_FRONTEND(f, inode, folio_pos(folio));
|
||||
struct inode *sharedinode = folio->mapping->host;
|
||||
bool need_iput;
|
||||
struct inode *realinode = erofs_real_inode(sharedinode, &need_iput);
|
||||
Z_EROFS_DEFINE_FRONTEND(f, realinode, sharedinode, folio_pos(folio));
|
||||
int err;
|
||||
|
||||
trace_erofs_read_folio(folio, false);
|
||||
trace_erofs_read_folio(realinode, folio, false);
|
||||
z_erofs_pcluster_readmore(&f, NULL, true);
|
||||
err = z_erofs_scan_folio(&f, folio, false);
|
||||
z_erofs_pcluster_readmore(&f, NULL, false);
|
||||
|
|
@ -1896,23 +1892,28 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio)
|
|||
/* if some pclusters are ready, need submit them anyway */
|
||||
err = z_erofs_runqueue(&f, 0) ?: err;
|
||||
if (err && err != -EINTR)
|
||||
erofs_err(inode->i_sb, "read error %d @ %lu of nid %llu",
|
||||
err, folio->index, EROFS_I(inode)->nid);
|
||||
erofs_err(realinode->i_sb, "read error %d @ %lu of nid %llu",
|
||||
err, folio->index, EROFS_I(realinode)->nid);
|
||||
|
||||
erofs_put_metabuf(&f.map.buf);
|
||||
erofs_release_pages(&f.pagepool);
|
||||
|
||||
if (need_iput)
|
||||
iput(realinode);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void z_erofs_readahead(struct readahead_control *rac)
|
||||
{
|
||||
struct inode *const inode = rac->mapping->host;
|
||||
Z_EROFS_DEFINE_FRONTEND(f, inode, readahead_pos(rac));
|
||||
struct inode *sharedinode = rac->mapping->host;
|
||||
bool need_iput;
|
||||
struct inode *realinode = erofs_real_inode(sharedinode, &need_iput);
|
||||
Z_EROFS_DEFINE_FRONTEND(f, realinode, sharedinode, readahead_pos(rac));
|
||||
unsigned int nrpages = readahead_count(rac);
|
||||
struct folio *head = NULL, *folio;
|
||||
int err;
|
||||
|
||||
trace_erofs_readahead(inode, readahead_index(rac), nrpages, false);
|
||||
trace_erofs_readahead(realinode, readahead_index(rac), nrpages, false);
|
||||
z_erofs_pcluster_readmore(&f, rac, true);
|
||||
while ((folio = readahead_folio(rac))) {
|
||||
folio->private = head;
|
||||
|
|
@ -1926,15 +1927,18 @@ static void z_erofs_readahead(struct readahead_control *rac)
|
|||
|
||||
err = z_erofs_scan_folio(&f, folio, true);
|
||||
if (err && err != -EINTR)
|
||||
erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu",
|
||||
folio->index, EROFS_I(inode)->nid);
|
||||
erofs_err(realinode->i_sb, "readahead error at folio %lu @ nid %llu",
|
||||
folio->index, EROFS_I(realinode)->nid);
|
||||
}
|
||||
z_erofs_pcluster_readmore(&f, rac, false);
|
||||
z_erofs_pcluster_end(&f);
|
||||
|
||||
(void)z_erofs_runqueue(&f, nrpages);
|
||||
(void)z_erofs_runqueue(&f, nrpages << PAGE_SHIFT);
|
||||
erofs_put_metabuf(&f.map.buf);
|
||||
erofs_release_pages(&f.pagepool);
|
||||
|
||||
if (need_iput)
|
||||
iput(realinode);
|
||||
}
|
||||
|
||||
const struct address_space_operations z_erofs_aops = {
|
||||
|
|
|
|||
|
|
@ -308,6 +308,7 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
|
|||
ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
|
||||
return &ff->file;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(alloc_empty_backing_file);
|
||||
|
||||
/**
|
||||
* file_init_path - initialize a 'struct file' based on path
|
||||
|
|
|
|||
|
|
@ -82,9 +82,9 @@ TRACE_EVENT(erofs_fill_inode,
|
|||
|
||||
TRACE_EVENT(erofs_read_folio,
|
||||
|
||||
TP_PROTO(struct folio *folio, bool raw),
|
||||
TP_PROTO(struct inode *inode, struct folio *folio, bool raw),
|
||||
|
||||
TP_ARGS(folio, raw),
|
||||
TP_ARGS(inode, folio, raw),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev )
|
||||
|
|
@ -96,9 +96,9 @@ TRACE_EVENT(erofs_read_folio,
|
|||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dev = folio->mapping->host->i_sb->s_dev;
|
||||
__entry->nid = EROFS_I(folio->mapping->host)->nid;
|
||||
__entry->dir = S_ISDIR(folio->mapping->host->i_mode);
|
||||
__entry->dev = inode->i_sb->s_dev;
|
||||
__entry->nid = EROFS_I(inode)->nid;
|
||||
__entry->dir = S_ISDIR(inode->i_mode);
|
||||
__entry->index = folio->index;
|
||||
__entry->uptodate = folio_test_uptodate(folio);
|
||||
__entry->raw = raw;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue