From 1732053c8a6b360e2d5afb1b34fe9779398b072c Mon Sep 17 00:00:00 2001 From: Jaehun Gou Date: Tue, 2 Dec 2025 19:59:59 +0900 Subject: [PATCH 01/26] fs: ntfs3: check return value of indx_find to avoid infinite loop We found an infinite loop bug in the ntfs3 file system that can lead to a Denial-of-Service (DoS) condition. A malformed dentry in the ntfs3 filesystem can cause the kernel to hang during the lookup operations. By setting the HAS_SUB_NODE flag in an INDEX_ENTRY within a directory's INDEX_ALLOCATION block and manipulating the VCN pointer, an attacker can cause the indx_find() function to repeatedly read the same block, allocating 4 KB of memory each time. The kernel lacks VCN loop detection and depth limits, causing memory exhaustion and an OOM crash. This patch adds a return value check for fnd_push() to prevent a memory exhaustion vulnerability caused by infinite loops. When the index exceeds the size of the fnd->nodes array, fnd_push() returns -EINVAL. The indx_find() function checks this return value and stops processing, preventing further memory allocation. Co-developed-by: Seunghun Han Signed-off-by: Seunghun Han Co-developed-by: Jihoon Kwon Signed-off-by: Jihoon Kwon Signed-off-by: Jaehun Gou Signed-off-by: Konstantin Komarov --- fs/ntfs3/index.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index 7157cfd70fdc..75b94beac161 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -1190,7 +1190,12 @@ int indx_find(struct ntfs_index *indx, struct ntfs_inode *ni, return -EINVAL; } - fnd_push(fnd, node, e); + err = fnd_push(fnd, node, e); + + if (err) { + put_indx_node(node); + return err; + } } *entry = e; From f223ebffa185cc8da934333c5a31ff2d4f992dc9 Mon Sep 17 00:00:00 2001 From: Bartlomiej Kubik Date: Wed, 26 Nov 2025 23:02:51 +0100 Subject: [PATCH 02/26] fs/ntfs3: Initialize new folios before use KMSAN reports an uninitialized value in longest_match_std(), invoked from ntfs_compress_write(). When new folios are allocated without being marked uptodate and ni_read_frame() is skipped because the caller expects the frame to be completely overwritten, some reserved folios may remain only partially filled, leaving the rest memory uninitialized. Fixes: 584f60ba22f7 ("ntfs3: Convert ntfs_get_frame_pages() to use a folio") Tested-by: syzbot+08d8956768c96a2c52cf@syzkaller.appspotmail.com Reported-by: syzbot+08d8956768c96a2c52cf@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=08d8956768c96a2c52cf Signed-off-by: Bartlomiej Kubik Signed-off-by: Konstantin Komarov --- fs/ntfs3/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 2e7b2e566ebe..732260087066 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -995,7 +995,7 @@ static int ntfs_get_frame_pages(struct address_space *mapping, pgoff_t index, folio = __filemap_get_folio(mapping, index, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, - gfp_mask); + gfp_mask | __GFP_ZERO); if (IS_ERR(folio)) { while (npages--) { folio = page_folio(pages[npages]); From fac760f52467435bca12a796277bca2aba7ca416 Mon Sep 17 00:00:00 2001 From: Lalit Shankar Chowdhury Date: Fri, 28 Nov 2025 10:12:56 +0000 Subject: [PATCH 03/26] fs/ntfs3: Use wait_on_buffer() directly wait_on_buffer() checks buffer_locked() internally before calling __wait_on_buffer(). Signed-off-by: Lalit Shankar Chowdhury Signed-off-by: Konstantin Komarov --- fs/ntfs3/fsntfs.c | 16 +++++----------- fs/ntfs3/index.c | 4 +--- fs/ntfs3/super.c | 4 +--- 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 5f138f715835..a3cb39ff470f 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -875,9 +875,7 @@ void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) return; } - if (buffer_locked(bh2)) - __wait_on_buffer(bh2); - + wait_on_buffer(bh2); lock_buffer(bh2); memcpy(bh2->b_data, bh1->b_data, blocksize); set_buffer_uptodate(bh2); @@ -1069,9 +1067,7 @@ int ntfs_sb_write(struct super_block *sb, u64 lbo, size_t bytes, return -ENOMEM; } - if (buffer_locked(bh)) - __wait_on_buffer(bh); - + wait_on_buffer(bh); lock_buffer(bh); if (buf) { memcpy(bh->b_data + off, buf, op); @@ -1347,8 +1343,8 @@ int ntfs_get_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, err = -ENOMEM; goto out; } - if (buffer_locked(bh)) - __wait_on_buffer(bh); + + wait_on_buffer(bh); lock_buffer(bh); if (!buffer_uptodate(bh)) @@ -1427,9 +1423,7 @@ int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr, if (op > bytes) op = bytes; - if (buffer_locked(bh)) - __wait_on_buffer(bh); - + wait_on_buffer(bh); lock_buffer(bh); bh_data = bh->b_data + off; diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index 75b94beac161..7c7bae84ec9a 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -252,9 +252,7 @@ static int bmp_buf_get(struct ntfs_index *indx, struct ntfs_inode *ni, bbuf->bh = bh; - if (buffer_locked(bh)) - __wait_on_buffer(bh); - + wait_on_buffer(bh); lock_buffer(bh); sb = sbi->sb; diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 8b0cf0ed4f72..1acc685625f7 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -1652,9 +1652,7 @@ load_root: */ struct buffer_head *bh0 = sb_getblk(sb, 0); if (bh0) { - if (buffer_locked(bh0)) - __wait_on_buffer(bh0); - + wait_on_buffer(bh0); lock_buffer(bh0); memcpy(bh0->b_data, boot2, sizeof(*boot2)); set_buffer_uptodate(bh0); From 4b90f16e4bb5607fb35e7802eb67874038da4640 Mon Sep 17 00:00:00 2001 From: Jaehun Gou Date: Tue, 2 Dec 2025 20:01:09 +0900 Subject: [PATCH 04/26] fs: ntfs3: fix infinite loop in attr_load_runs_range on inconsistent metadata We found an infinite loop bug in the ntfs3 file system that can lead to a Denial-of-Service (DoS) condition. A malformed NTFS image can cause an infinite loop when an attribute header indicates an empty run list, while directory entries reference it as containing actual data. In NTFS, setting evcn=-1 with svcn=0 is a valid way to represent an empty run list, and run_unpack() correctly handles this by checking if evcn + 1 equals svcn and returning early without parsing any run data. However, this creates a problem when there is metadata inconsistency, where the attribute header claims to be empty (evcn=-1) but the caller expects to read actual data. When run_unpack() immediately returns success upon seeing this condition, it leaves the runs_tree uninitialized with run->runs as a NULL. The calling function attr_load_runs_range() assumes that a successful return means that the runs were loaded and sets clen to 0, expecting the next run_lookup_entry() call to succeed. Because runs_tree remains uninitialized, run_lookup_entry() continues to fail, and the loop increments vcn by zero (vcn += 0), leading to an infinite loop. This patch adds a retry counter to detect when run_lookup_entry() fails consecutively after attr_load_runs_vcn(). If the run is still not found on the second attempt, it indicates corrupted metadata and returns -EINVAL, preventing the Denial-of-Service (DoS) vulnerability. Co-developed-by: Seunghun Han Signed-off-by: Seunghun Han Co-developed-by: Jihoon Kwon Signed-off-by: Jihoon Kwon Signed-off-by: Jaehun Gou Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrib.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 980ae9157248..c45880ab2391 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -1354,19 +1354,28 @@ int attr_load_runs_range(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST vcn; CLST vcn_last = (to - 1) >> cluster_bits; CLST lcn, clen; - int err; + int err = 0; + int retry = 0; for (vcn = from >> cluster_bits; vcn <= vcn_last; vcn += clen) { if (!run_lookup_entry(run, vcn, &lcn, &clen, NULL)) { + if (retry != 0) { /* Next run_lookup_entry(vcn) also failed. */ + err = -EINVAL; + break; + } err = attr_load_runs_vcn(ni, type, name, name_len, run, vcn); if (err) - return err; + break; + clen = 0; /* Next run_lookup_entry(vcn) must be success. */ + retry++; } + else + retry = 0; } - return 0; + return err; } #ifdef CONFIG_NTFS3_LZX_XPRESS From 06909b2549d631a47fcda249d34be26f7ca1711d Mon Sep 17 00:00:00 2001 From: Jaehun Gou Date: Tue, 2 Dec 2025 20:01:46 +0900 Subject: [PATCH 05/26] fs: ntfs3: fix infinite loop triggered by zero-sized ATTR_LIST We found an infinite loop bug in the ntfs3 file system that can lead to a Denial-of-Service (DoS) condition. A malformed NTFS image can cause an infinite loop when an ATTR_LIST attribute indicates a zero data size while the driver allocates memory for it. When ntfs_load_attr_list() processes a resident ATTR_LIST with data_size set to zero, it still allocates memory because of al_aligned(0). This creates an inconsistent state where ni->attr_list.size is zero, but ni->attr_list.le is non-null. This causes ni_enum_attr_ex to incorrectly assume that no attribute list exists and enumerates only the primary MFT record. When it finds ATTR_LIST, the code reloads it and restarts the enumeration, repeating indefinitely. The mount operation never completes, hanging the kernel thread. This patch adds validation to ensure that data_size is non-zero before memory allocation. When a zero-sized ATTR_LIST is detected, the function returns -EINVAL, preventing a DoS vulnerability. Co-developed-by: Seunghun Han Signed-off-by: Seunghun Han Co-developed-by: Jihoon Kwon Signed-off-by: Jihoon Kwon Signed-off-by: Jaehun Gou Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrlist.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c index a4d74bed74fa..098bd7e8c3d6 100644 --- a/fs/ntfs3/attrlist.c +++ b/fs/ntfs3/attrlist.c @@ -52,6 +52,11 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr) if (!attr->non_res) { lsize = le32_to_cpu(attr->res.data_size); + if (!lsize) { + err = -EINVAL; + goto out; + } + /* attr is resident: lsize < record_size (1K or 4K) */ le = kvmalloc(al_aligned(lsize), GFP_KERNEL); if (!le) { @@ -66,6 +71,10 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr) u16 run_off = le16_to_cpu(attr->nres.run_off); lsize = le64_to_cpu(attr->nres.data_size); + if (!lsize) { + err = -EINVAL; + goto out; + } run_init(&ni->attr_list.run); From 4248f563f0b76f3fb74b2a28ee068bf66fcbbedf Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 8 Dec 2025 22:57:46 +0300 Subject: [PATCH 06/26] fs/ntfs3: rename ni_readpage_cmpr into ni_read_folio_cmpr The old "readpage" naming is still used in ni_readpage_cmpr(), even though the vfs has transitioned to the folio-based read_folio() API. This patch performs a straightforward renaming of the helper: ni_readpage_cmpr() -> ni_read_folio_cmpr(). Signed-off-by: Konstantin Komarov --- fs/ntfs3/frecord.c | 8 ++++---- fs/ntfs3/inode.c | 2 +- fs/ntfs3/ntfs_fs.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 641ddaf8d4a0..7e3d61de2f8f 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -2046,18 +2046,18 @@ static struct page *ntfs_lock_new_page(struct address_space *mapping, } /* - * ni_readpage_cmpr + * ni_read_folio_cmpr * * When decompressing, we typically obtain more than one page per reference. * We inject the additional pages into the page cache. */ -int ni_readpage_cmpr(struct ntfs_inode *ni, struct folio *folio) +int ni_read_folio_cmpr(struct ntfs_inode *ni, struct folio *folio) { int err; struct ntfs_sb_info *sbi = ni->mi.sbi; struct address_space *mapping = folio->mapping; - pgoff_t index = folio->index; - u64 frame_vbo, vbo = (u64)index << PAGE_SHIFT; + pgoff_t index; + u64 frame_vbo, vbo = folio_pos(folio); struct page **pages = NULL; /* Array of at most 16 pages. stack? */ u8 frame_bits; CLST frame; diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 0a9ac5efeb67..1319b99dfeb4 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -736,7 +736,7 @@ static int ntfs_read_folio(struct file *file, struct folio *folio) if (is_compressed(ni)) { ni_lock(ni); - err = ni_readpage_cmpr(ni, folio); + err = ni_read_folio_cmpr(ni, folio); ni_unlock(ni); return err; } diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index a4559c9f64e6..7b619bb151ce 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -568,7 +568,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint); #define _ni_write_inode(i, w) ni_write_inode(i, w, __func__) int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, __u64 vbo, __u64 len); -int ni_readpage_cmpr(struct ntfs_inode *ni, struct folio *folio); +int ni_read_folio_cmpr(struct ntfs_inode *ni, struct folio *folio); int ni_decompress_file(struct ntfs_inode *ni); int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, u32 pages_per_frame, int copy); From 989e29450efaf4983c66b7a628f2ffc03b6d02e8 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 8 Dec 2025 23:27:20 +0300 Subject: [PATCH 07/26] fs/ntfs3: improve readahead for bitmap initialization and large directory scans Previously sequential reads operations relied solely on single-page reads, causing the block layer to perform many synchronous I/O requests, especially for large volumes or large directories. This patch introduces explicit readahead via page_cache_sync_readahead() and file_ra_state to reduce I/O latency and improve sequential throughput. Signed-off-by: Konstantin Komarov --- fs/ntfs3/bitmap.c | 17 +++++++++++++++++ fs/ntfs3/dir.c | 4 ++-- fs/ntfs3/fslog.c | 6 ++++-- fs/ntfs3/fsntfs.c | 29 +++++++++++++++++++++-------- fs/ntfs3/index.c | 13 +++++++------ fs/ntfs3/ntfs_fs.h | 35 ++++++++++++++++++++++++++++------- 6 files changed, 79 insertions(+), 25 deletions(-) diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c index 65d05e6a0566..db7d0ecfb469 100644 --- a/fs/ntfs3/bitmap.c +++ b/fs/ntfs3/bitmap.c @@ -508,6 +508,8 @@ static int wnd_rescan(struct wnd_bitmap *wnd) size_t wpos, wbit, iw, vbo; struct buffer_head *bh = NULL; CLST lcn, clen; + struct file_ra_state *ra; + struct address_space *mapping = sb->s_bdev->bd_mapping; wnd->uptodated = 0; wnd->extent_max = 0; @@ -516,6 +518,13 @@ static int wnd_rescan(struct wnd_bitmap *wnd) vbo = 0; + /* Allocate in memory instead of stack. Not critical if failed. */ + ra = kzalloc(sizeof(*ra), GFP_NOFS); + if (ra) { + file_ra_state_init(ra, mapping); + ra->ra_pages = (wnd->nbits / 8 + PAGE_SIZE - 1) >> PAGE_SHIFT; + } + for (iw = 0; iw < wnd->nwnd; iw++) { if (iw + 1 == wnd->nwnd) wbits = wnd->bits_last; @@ -552,6 +561,13 @@ static int wnd_rescan(struct wnd_bitmap *wnd) len = ((u64)clen << cluster_bits) - off; } + if (ra) { + pgoff_t idx = lbo >> PAGE_SHIFT; + if (!ra_has_index(ra, idx)) + page_cache_sync_readahead(mapping, ra, NULL, + idx, 1); + } + bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); if (!bh) { err = -EIO; @@ -638,6 +654,7 @@ next_wnd: } out: + kfree(ra); return err; } diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index b98e95d6b4d9..1dbb661ffe0f 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -487,8 +487,8 @@ static int ntfs_readdir(struct file *file, struct dir_context *ctx) goto out; } - err = indx_read(&ni->dir, ni, bit << ni->dir.idx2vbn_bits, - &node); + err = indx_read_ra(&ni->dir, ni, bit << ni->dir.idx2vbn_bits, + &node, &file->f_ra); if (err) goto out; diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 38934e6978ec..ee24ef0dd725 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -1074,6 +1074,8 @@ struct ntfs_log { u32 client_undo_commit; struct restart_info rst_info, rst_info2; + + struct file_ra_state read_ahead; }; static inline u32 lsn_to_vbo(struct ntfs_log *log, const u64 lsn) @@ -1164,8 +1166,8 @@ static int read_log_page(struct ntfs_log *log, u32 vbo, page_buf = page_off ? log->one_page_buf : *buffer; - err = ntfs_read_run_nb(ni->mi.sbi, &ni->file.run, page_vbo, page_buf, - log->page_size, NULL); + err = ntfs_read_run_nb_ra(ni->mi.sbi, &ni->file.run, page_vbo, page_buf, + log->page_size, NULL, &log->read_ahead); if (err) goto out; diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index a3cb39ff470f..ff0b2595f32a 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -1164,11 +1164,13 @@ struct buffer_head *ntfs_bread_run(struct ntfs_sb_info *sbi, return ntfs_bread(sb, lbo >> sb->s_blocksize_bits); } -int ntfs_read_run_nb(struct ntfs_sb_info *sbi, const struct runs_tree *run, - u64 vbo, void *buf, u32 bytes, struct ntfs_buffers *nb) +int ntfs_read_run_nb_ra(struct ntfs_sb_info *sbi, const struct runs_tree *run, + u64 vbo, void *buf, u32 bytes, struct ntfs_buffers *nb, + struct file_ra_state *ra) { int err; struct super_block *sb = sbi->sb; + struct address_space *mapping = sb->s_bdev->bd_mapping; u32 blocksize = sb->s_blocksize; u8 cluster_bits = sbi->cluster_bits; u32 off = vbo & sbi->cluster_mask; @@ -1208,10 +1210,22 @@ int ntfs_read_run_nb(struct ntfs_sb_info *sbi, const struct runs_tree *run, nb->bytes = bytes; } + if (ra && !ra->ra_pages) + file_ra_state_init(ra, mapping); + for (;;) { u32 len32 = len >= bytes ? bytes : len; sector_t block = lbo >> sb->s_blocksize_bits; + if (ra) { + pgoff_t index = lbo >> PAGE_SHIFT; + if (!ra_has_index(ra, index)) { + page_cache_sync_readahead(mapping, ra, NULL, + index, 1); + ra->prev_pos = (loff_t)index << PAGE_SHIFT; + } + } + do { u32 op = blocksize - off; @@ -1282,11 +1296,11 @@ out: * * Return: < 0 if error, 0 if ok, -E_NTFS_FIXUP if need to update fixups. */ -int ntfs_read_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, - struct NTFS_RECORD_HEADER *rhdr, u32 bytes, - struct ntfs_buffers *nb) +int ntfs_read_bh_ra(struct ntfs_sb_info *sbi, const struct runs_tree *run, + u64 vbo, struct NTFS_RECORD_HEADER *rhdr, u32 bytes, + struct ntfs_buffers *nb, struct file_ra_state *ra) { - int err = ntfs_read_run_nb(sbi, run, vbo, rhdr, bytes, nb); + int err = ntfs_read_run_nb_ra(sbi, run, vbo, rhdr, bytes, nb, ra); if (err) return err; @@ -1347,8 +1361,7 @@ int ntfs_get_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, wait_on_buffer(bh); lock_buffer(bh); - if (!buffer_uptodate(bh)) - { + if (!buffer_uptodate(bh)) { memset(bh->b_data, 0, blocksize); set_buffer_uptodate(bh); } diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index 7c7bae84ec9a..f0cfa000ffbb 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -1026,17 +1026,18 @@ static int indx_write(struct ntfs_index *indx, struct ntfs_inode *ni, } /* - * indx_read + * indx_read_ra * * If ntfs_readdir calls this function * inode is shared locked and no ni_lock. * Use rw_semaphore for read/write access to alloc_run. */ -int indx_read(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn, - struct indx_node **node) +int indx_read_ra(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn, + struct indx_node **node, struct file_ra_state *ra) { int err; struct INDEX_BUFFER *ib; + struct ntfs_sb_info *sbi = ni->mi.sbi; struct runs_tree *run = &indx->alloc_run; struct rw_semaphore *lock = &indx->run_lock; u64 vbo = (u64)vbn << indx->vbn2vbo_bits; @@ -1062,7 +1063,7 @@ int indx_read(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn, } down_read(lock); - err = ntfs_read_bh(ni->mi.sbi, run, vbo, &ib->rhdr, bytes, &in->nb); + err = ntfs_read_bh_ra(sbi, run, vbo, &ib->rhdr, bytes, &in->nb, ra); up_read(lock); if (!err) goto ok; @@ -1082,7 +1083,7 @@ int indx_read(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn, goto out; down_read(lock); - err = ntfs_read_bh(ni->mi.sbi, run, vbo, &ib->rhdr, bytes, &in->nb); + err = ntfs_read_bh_ra(sbi, run, vbo, &ib->rhdr, bytes, &in->nb, ra); up_read(lock); if (err == -E_NTFS_FIXUP) goto ok; @@ -1098,7 +1099,7 @@ ok: } if (err == -E_NTFS_FIXUP) { - ntfs_write_bh(ni->mi.sbi, &ib->rhdr, &in->nb, 0); + ntfs_write_bh(sbi, &ib->rhdr, &in->nb, 0); err = 0; } diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 7b619bb151ce..18b14f7db4ad 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -625,11 +625,27 @@ int ntfs_sb_write_run(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, const void *buf, size_t bytes, int sync); struct buffer_head *ntfs_bread_run(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo); -int ntfs_read_run_nb(struct ntfs_sb_info *sbi, const struct runs_tree *run, - u64 vbo, void *buf, u32 bytes, struct ntfs_buffers *nb); -int ntfs_read_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, - struct NTFS_RECORD_HEADER *rhdr, u32 bytes, - struct ntfs_buffers *nb); +int ntfs_read_run_nb_ra(struct ntfs_sb_info *sbi, const struct runs_tree *run, + u64 vbo, void *buf, u32 bytes, struct ntfs_buffers *nb, + struct file_ra_state *ra); +static inline int ntfs_read_run_nb(struct ntfs_sb_info *sbi, + const struct runs_tree *run, u64 vbo, + void *buf, u32 bytes, + struct ntfs_buffers *nb) +{ + return ntfs_read_run_nb_ra(sbi, run, vbo, buf, bytes, nb, NULL); +} +int ntfs_read_bh_ra(struct ntfs_sb_info *sbi, const struct runs_tree *run, + u64 vbo, struct NTFS_RECORD_HEADER *rhdr, u32 bytes, + struct ntfs_buffers *nb, struct file_ra_state *ra); +static inline int ntfs_read_bh(struct ntfs_sb_info *sbi, + const struct runs_tree *run, u64 vbo, + struct NTFS_RECORD_HEADER *rhdr, u32 bytes, + struct ntfs_buffers *nb) +{ + return ntfs_read_bh_ra(sbi, run, vbo, rhdr, bytes, nb, NULL); +} + int ntfs_get_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, u32 bytes, struct ntfs_buffers *nb); int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr, @@ -695,8 +711,13 @@ int indx_init(struct ntfs_index *indx, struct ntfs_sb_info *sbi, const struct ATTRIB *attr, enum index_mutex_classed type); struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni, struct ATTRIB **attr, struct mft_inode **mi); -int indx_read(struct ntfs_index *idx, struct ntfs_inode *ni, CLST vbn, - struct indx_node **node); +int indx_read_ra(struct ntfs_index *idx, struct ntfs_inode *ni, CLST vbn, + struct indx_node **node, struct file_ra_state *ra); +static inline int indx_read(struct ntfs_index *idx, struct ntfs_inode *ni, + CLST vbn, struct indx_node **node) +{ + return indx_read_ra(idx, ni, vbn, node, NULL); +} int indx_find(struct ntfs_index *indx, struct ntfs_inode *dir, const struct INDEX_ROOT *root, const void *Key, size_t KeyLen, const void *param, int *diff, struct NTFS_DE **entry, From dffc7f2f177b7f1ca52067dc23d0304d7a25d45c Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Tue, 9 Dec 2025 12:08:32 +0300 Subject: [PATCH 08/26] fs/ntfs3: allow readdir() to finish after directory mutations without rewinddir() This patch introduces a per-directory version counter that increments on each directory modification (indx_insert_entry() / indx_delete_entry()). ntfs_readdir() uses this version to detect whether the directory has changed since enumeration began. If readdir() reaches end-of-directory but the version has changed, the walk restarts from the beginning of the index tree instead of returning prematurely. This provides rmdir-like behavior for tools that remove entries as they enumerate them. Prior to this change, bonnie++ directory operations could fail due to premature termination of readdir() during concurrent index updates. With this patch applied, bonnie++ completes successfully with no errors. Signed-off-by: Konstantin Komarov --- fs/ntfs3/dir.c | 102 ++++++++++++++++++++++++++++++++------------- fs/ntfs3/index.c | 2 + fs/ntfs3/ntfs_fs.h | 1 + 3 files changed, 76 insertions(+), 29 deletions(-) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index 1dbb661ffe0f..24cb64d5521a 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -392,33 +392,77 @@ static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, * ntfs_readdir - file_operations::iterate_shared * * Use non sorted enumeration. - * We have an example of broken volume where sorted enumeration - * counts each name twice. + * Sorted enumeration may result infinite loop if names tree contains loop. */ static int ntfs_readdir(struct file *file, struct dir_context *ctx) { const struct INDEX_ROOT *root; - u64 vbo; size_t bit; - loff_t eod; int err = 0; struct inode *dir = file_inode(file); struct ntfs_inode *ni = ntfs_i(dir); struct super_block *sb = dir->i_sb; struct ntfs_sb_info *sbi = sb->s_fs_info; loff_t i_size = i_size_read(dir); - u32 pos = ctx->pos; + u64 pos = ctx->pos; u8 *name = NULL; struct indx_node *node = NULL; u8 index_bits = ni->dir.index_bits; + size_t max_bit = i_size >> ni->dir.index_bits; + loff_t eod = i_size + sbi->record_size; /* Name is a buffer of PATH_MAX length. */ static_assert(NTFS_NAME_LEN * 4 < PATH_MAX); - eod = i_size + sbi->record_size; + if (!pos) { + /* + * ni->dir.version increments each directory change. + * Save the initial value of ni->dir.version. + */ + file->private_data = (void *)ni->dir.version; + } - if (pos >= eod) - return 0; + if (pos >= eod) { + if (file->private_data == (void *)ni->dir.version) { + /* No changes since first readdir. */ + return 0; + } + + /* + * Handle directories that changed after the initial readdir(). + * + * Some user space code implements recursive removal like this instead + * of calling rmdir(2) directly: + * + * fd = opendir(path); + * while ((dent = readdir(fd))) + * unlinkat(dirfd(fd), dent->d_name, 0); + * closedir(fd); + * + * POSIX leaves unspecified what readdir() should return once the + * directory has been modified after opendir()/rewinddir(), so this + * pattern is not guaranteed to work on all filesystems or platforms. + * + * In ntfs3 the internal name tree may be reshaped while entries are + * being removed, so there is no stable anchor for continuing a + * single-pass walk based on the original readdir() order. + * + * In practice some widely used tools (for example certain rm(1) + * implementations) have used this readdir()/unlink() loop, and some + * filesystems behave in a way that effectively makes it work in the + * common case. + * + * The code below follows that practice and tries to provide + * "rmdir-like" behaviour for such callers on ntfs3, even though the + * situation is not strictly defined by the APIs. + * + * Apple documents the same readdir()/unlink() issue and a workaround + * for HFS file systems in: + * https://web.archive.org/web/20220122122948/https:/support.apple.com/kb/TA21420?locale=en_US + */ + ctx->pos = pos = 3; + file->private_data = (void *)ni->dir.version; + } if (!dir_emit_dots(file, ctx)) return 0; @@ -454,35 +498,31 @@ static int ntfs_readdir(struct file *file, struct dir_context *ctx) if (pos >= sbi->record_size) { bit = (pos - sbi->record_size) >> index_bits; } else { + /* + * Add each name from root in 'ctx'. + */ err = ntfs_read_hdr(sbi, ni, &root->ihdr, 0, pos, name, ctx); if (err) goto out; bit = 0; } - if (!i_size) { - ctx->pos = eod; - goto out; - } - - for (;;) { - vbo = (u64)bit << index_bits; - if (vbo >= i_size) { - ctx->pos = eod; - goto out; - } - + /* + * Enumerate indexes until the end of dir. + */ + for (; bit < max_bit; bit += 1) { + /* Get the next used index. */ err = indx_used_bit(&ni->dir, ni, &bit); if (err) goto out; if (bit == MINUS_ONE_T) { - ctx->pos = eod; - goto out; + /* no more used indexes. end of dir. */ + break; } - vbo = (u64)bit << index_bits; - if (vbo >= i_size) { + if (bit >= max_bit) { + /* Corrupted directory. */ err = -EINVAL; goto out; } @@ -492,20 +532,24 @@ static int ntfs_readdir(struct file *file, struct dir_context *ctx) if (err) goto out; + /* + * Add each name from index in 'ctx'. + */ err = ntfs_read_hdr(sbi, ni, &node->index->ihdr, - vbo + sbi->record_size, pos, name, ctx); + ((u64)bit << index_bits) + sbi->record_size, + pos, name, ctx); if (err) goto out; - - bit += 1; } out: - __putname(name); put_indx_node(node); - if (err == 1) { + if (!err) { + /* End of directory. */ + ctx->pos = eod; + } else if (err == 1) { /* 'ctx' is full. */ err = 0; } else if (err == -ENOENT) { diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index f0cfa000ffbb..d08bee3c20fa 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -2002,6 +2002,7 @@ int indx_insert_entry(struct ntfs_index *indx, struct ntfs_inode *ni, fnd->level - 1, fnd); } + indx->version += 1; out: fnd_put(fnd_a); out1: @@ -2649,6 +2650,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni, mi->dirty = true; } + indx->version += 1; out: fnd_put(fnd2); out1: diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 18b14f7db4ad..cee7b73b9670 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -191,6 +191,7 @@ struct ntfs_index { struct runs_tree alloc_run; /* read/write access to 'bitmap_run'/'alloc_run' while ntfs_readdir */ struct rw_semaphore run_lock; + size_t version; /* increment each change */ /*TODO: Remove 'cmp'. */ NTFS_CMP_FUNC cmp; From f7edab0cee03a1cbe0e55a7bcab8d2d8b6b74278 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Tue, 9 Dec 2025 15:21:41 +0800 Subject: [PATCH 09/26] fs/ntfs3: fix ntfs_mount_options leak in ntfs_fill_super() In ntfs_fill_super(), the fc->fs_private pointer is set to NULL without first freeing the memory it points to. This causes the subsequent call to ntfs_fs_free() to skip freeing the ntfs_mount_options structure. This results in a kmemleak report: unreferenced object 0xff1100015378b800 (size 32): comm "mount", pid 582, jiffies 4294890685 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 ed ff ed ff 00 04 00 00 ................ backtrace (crc ed541d8c): __kmalloc_cache_noprof+0x424/0x5a0 __ntfs_init_fs_context+0x47/0x590 alloc_fs_context+0x5d8/0x960 __x64_sys_fsopen+0xb1/0x190 do_syscall_64+0x50/0x1f0 entry_SYSCALL_64_after_hwframe+0x76/0x7e This issue can be reproduced using the following commands: fallocate -l 100M test.file mount test.file /tmp/test Since sbi->options is duplicated from fc->fs_private and does not directly use the memory allocated for fs_private, it is unnecessary to set fc->fs_private to NULL. Additionally, this patch simplifies the code by utilizing the helper function put_mount_options() instead of open-coding the cleanup logic. Reported-by: syzbot+23aee7afc440fe803545@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=23aee7afc440fe803545 Fixes: aee4d5a521e9 ("ntfs3: fix double free of sbi->options->nls and clarify ownership of fc->fs_private") Signed-off-by: Baokun Li Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 1acc685625f7..df65877f172c 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -705,9 +705,7 @@ static void ntfs_put_super(struct super_block *sb) ntfs_set_state(sbi, NTFS_DIRTY_CLEAR); if (sbi->options) { - unload_nls(sbi->options->nls); - kfree(sbi->options->nls_name); - kfree(sbi->options); + put_mount_options(sbi->options); sbi->options = NULL; } @@ -1253,7 +1251,6 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) } } sbi->options = options; - fc->fs_private = NULL; sb->s_flags |= SB_NODIRATIME; sb->s_magic = 0x7366746e; // "ntfs" sb->s_op = &ntfs_sops; @@ -1677,9 +1674,7 @@ put_inode_out: out: /* sbi->options == options */ if (options) { - unload_nls(options->nls); - kfree(options->nls_name); - kfree(options); + put_mount_options(sbi->options); sbi->options = NULL; } From dcd9d6a47199565d83d61a11bbf91fa2ade4d676 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 12 Dec 2025 14:12:18 +0300 Subject: [PATCH 10/26] fs/ntfs3: fsync files by syncing parent inodes Some xfstests expect fsync() on a file or directory to also persist directory metadata up the parent chain. Using generic_file_fsync() is not sufficient for ntfs, because parent directories are not explicitly written out. Signed-off-by: Konstantin Komarov --- fs/ntfs3/dir.c | 2 +- fs/ntfs3/file.c | 30 ++++++++++++++++++++++++--- fs/ntfs3/frecord.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++ fs/ntfs3/ntfs_fs.h | 2 ++ 4 files changed, 81 insertions(+), 4 deletions(-) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index 24cb64d5521a..001773b4514b 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -668,7 +668,7 @@ const struct file_operations ntfs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = ntfs_readdir, - .fsync = generic_file_fsync, + .fsync = ntfs_file_fsync, .open = ntfs_file_open, .unlocked_ioctl = ntfs_ioctl, #ifdef CONFIG_COMPAT diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 732260087066..b48cdd77efae 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -1443,13 +1443,37 @@ static ssize_t ntfs_file_splice_write(struct pipe_inode_info *pipe, /* * ntfs_file_fsync - file_operations::fsync */ -static int ntfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) +int ntfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file_inode(file); - if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + struct super_block *sb = inode->i_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + int err, ret; + + if (unlikely(ntfs3_forced_shutdown(sb))) return -EIO; - return generic_file_fsync(file, start, end, datasync); + ret = file_write_and_wait_range(file, start, end); + if (ret) + return ret; + + ret = write_inode_now(inode, !datasync); + + if (!ret) { + ret = ni_write_parents(ntfs_i(inode), !datasync); + } + + if (!ret) { + ntfs_set_state(sbi, NTFS_DIRTY_CLEAR); + ntfs_update_mftmirr(sbi, false); + } + + err = sync_blockdev(sb->s_bdev); + if (unlikely(err && !ret)) + ret = err; + if (!ret) + blkdev_issue_flush(sb->s_bdev); + return ret; } // clang-format off diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 7e3d61de2f8f..a123e3f0acde 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -3001,6 +3001,57 @@ bool ni_is_dirty(struct inode *inode) return false; } +/* + * ni_write_parents + * + * Helper function for ntfs_file_fsync. + */ +int ni_write_parents(struct ntfs_inode *ni, int sync) +{ + int err = 0; + struct ATTRIB *attr = NULL; + struct ATTR_LIST_ENTRY *le = NULL; + struct ntfs_sb_info *sbi = ni->mi.sbi; + struct super_block *sb = sbi->sb; + + while ((attr = ni_find_attr(ni, attr, &le, ATTR_NAME, NULL, 0, NULL, + NULL))) { + struct inode *dir; + struct ATTR_FILE_NAME *fname; + + fname = resident_data_ex(attr, SIZEOF_ATTRIBUTE_FILENAME); + if (!fname) + continue; + + /* Check simple case when parent inode equals current inode. */ + if (ino_get(&fname->home) == ni->vfs_inode.i_ino) { + if (MFT_REC_ROOT != ni->vfs_inode.i_ino) { + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + err = -EINVAL; + } + continue; + } + + dir = ntfs_iget5(sb, &fname->home, NULL); + if (IS_ERR(dir)) { + ntfs_inode_warn( + &ni->vfs_inode, + "failed to open parent directory r=%lx to write", + (long)ino_get(&fname->home)); + continue; + } + + if (!is_bad_inode(dir)) { + int err2 = write_inode_now(dir, sync); + if (!err) + err = err2; + } + iput(dir); + } + + return err; +} + /* * ni_update_parent * diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index cee7b73b9670..482722438bd9 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -512,6 +512,7 @@ int ntfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, int ntfs_file_open(struct inode *inode, struct file *file); int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); +int ntfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg); long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg); extern const struct inode_operations ntfs_special_inode_operations; @@ -590,6 +591,7 @@ int ni_rename(struct ntfs_inode *dir_ni, struct ntfs_inode *new_dir_ni, struct NTFS_DE *new_de); bool ni_is_dirty(struct inode *inode); +int ni_write_parents(struct ntfs_inode *ni, int sync); /* Globals from fslog.c */ bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes); From 3a6aba7f3cf2b46816e08548c254d98de9c74eba Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 12 Dec 2025 14:27:48 +0300 Subject: [PATCH 11/26] fs/ntfs3: drop preallocated clusters for sparse and compressed files Do not keep preallocated clusters for sparsed and compressed files. Preserving preallocation in these cases causes fsx failures when running with sparse files and preallocation enabled. Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrib.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index c45880ab2391..0cd15a0983fe 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -448,8 +448,10 @@ again: is_ext = is_attr_ext(attr_b); align = sbi->cluster_size; - if (is_ext) + if (is_ext) { align <<= attr_b->nres.c_unit; + keep_prealloc = false; + } old_valid = le64_to_cpu(attr_b->nres.valid_size); old_size = le64_to_cpu(attr_b->nres.data_size); From 576248a34b927e93b2fd3fff7df735ba73ad7d01 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 12 Dec 2025 14:33:19 +0300 Subject: [PATCH 12/26] fs/ntfs3: handle attr_set_size() errors when truncating files If attr_set_size() fails while truncating down, the error is silently ignored and the inode may be left in an inconsistent state. Signed-off-by: Konstantin Komarov --- fs/ntfs3/file.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index b48cdd77efae..a88045ab549f 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -505,8 +505,8 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) { struct super_block *sb = inode->i_sb; struct ntfs_inode *ni = ntfs_i(inode); - int err, dirty = 0; u64 new_valid; + int err; if (!S_ISREG(inode->i_mode)) return 0; @@ -522,7 +522,6 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) } new_valid = ntfs_up_block(sb, min_t(u64, ni->i_valid, new_size)); - truncate_setsize(inode, new_size); ni_lock(ni); @@ -536,20 +535,19 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) ni->i_valid = new_valid; ni_unlock(ni); + if (unlikely(err)) + return err; ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE; inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); if (!IS_DIRSYNC(inode)) { - dirty = 1; + mark_inode_dirty(inode); } else { err = ntfs_sync_inode(inode); if (err) return err; } - if (dirty) - mark_inode_dirty(inode); - return 0; } From 356fa248168be90109b66f32a61b8eaedc98424a Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 12 Dec 2025 14:38:10 +0300 Subject: [PATCH 13/26] fs/ntfs3: zero-fill folios beyond i_valid in ntfs_read_folio() Handle ntfs_read_folio() early when the folio offset is beyond i_valid by zero-filling the folio and marking it uptodate. This avoids needless I/O and locking, improves read performance. Signed-off-by: Konstantin Komarov --- fs/ntfs3/inode.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 1319b99dfeb4..ace9873adaae 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -723,6 +723,19 @@ static int ntfs_read_folio(struct file *file, struct folio *folio) struct address_space *mapping = folio->mapping; struct inode *inode = mapping->host; struct ntfs_inode *ni = ntfs_i(inode); + loff_t vbo = folio_pos(folio); + + if (unlikely(is_bad_ni(ni))) { + folio_unlock(folio); + return -EIO; + } + + if (ni->i_valid <= vbo) { + folio_zero_range(folio, 0, folio_size(folio)); + folio_mark_uptodate(folio); + folio_unlock(folio); + return 0; + } if (is_resident(ni)) { ni_lock(ni); From c61326967728392931f8a2240cb2cf4c81b523c1 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 26 Dec 2025 15:59:47 +0300 Subject: [PATCH 14/26] fs/ntfs3: implement llseek SEEK_DATA/SEEK_HOLE by scanning data runs The generic llseek implementation does not understand ntfs data runs, sparse regions, or compression semantics, and therefore cannot correctly locate data or holes in files. Add a filesystem-specific llseek handler that scans attribute data runs to find the next data or hole starting at the given offset. Handle resident attributes, sparse runs, compressed holes, and the implicit hole at end-of-file. Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrib.c | 4 +-- fs/ntfs3/file.c | 27 +++++++++++++++- fs/ntfs3/frecord.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++ fs/ntfs3/ntfs.h | 1 + fs/ntfs3/ntfs_fs.h | 8 +++++ 5 files changed, 113 insertions(+), 3 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 0cd15a0983fe..3e188d6c229f 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -940,7 +940,7 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, if (!attr_b->non_res) { *lcn = RESIDENT_LCN; - *len = 1; + *len = le32_to_cpu(attr_b->res.data_size); goto out; } @@ -950,7 +950,7 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, err = -EINVAL; } else { *len = 1; - *lcn = SPARSE_LCN; + *lcn = EOF_LCN; } goto out; } diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index a88045ab549f..c89b1e7e734c 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -1474,6 +1474,31 @@ int ntfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) return ret; } +/* + * ntfs_llseek - file_operations::llseek + */ +static loff_t ntfs_llseek(struct file *file, loff_t offset, int whence) +{ + struct inode *inode = file->f_mapping->host; + struct ntfs_inode *ni = ntfs_i(inode); + loff_t maxbytes = ntfs_get_maxbytes(ni); + loff_t ret; + + if (whence == SEEK_DATA || whence == SEEK_HOLE) { + inode_lock_shared(inode); + /* Scan fragments for hole or data. */ + ret = ni_seek_data_or_hole(ni, offset, whence == SEEK_DATA); + inode_unlock_shared(inode); + + if (ret >= 0) + ret = vfs_setpos(file, ret, maxbytes); + } else { + ret = generic_file_llseek_size(file, offset, whence, maxbytes, + i_size_read(inode)); + } + return ret; +} + // clang-format off const struct inode_operations ntfs_file_inode_operations = { .getattr = ntfs_getattr, @@ -1485,7 +1510,7 @@ const struct inode_operations ntfs_file_inode_operations = { }; const struct file_operations ntfs_file_operations = { - .llseek = generic_file_llseek, + .llseek = ntfs_llseek, .read_iter = ntfs_file_read_iter, .write_iter = ntfs_file_write_iter, .unlocked_ioctl = ntfs_ioctl, diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index a123e3f0acde..03dcb66b5f6c 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -3001,6 +3001,82 @@ bool ni_is_dirty(struct inode *inode) return false; } +/* + * ni_seek_data_or_hole + * + * Helper function for ntfs_llseek( SEEK_DATA/SEEK_HOLE ) + */ +loff_t ni_seek_data_or_hole(struct ntfs_inode *ni, loff_t offset, bool data) +{ + int err; + u8 cluster_bits = ni->mi.sbi->cluster_bits; + CLST vcn, lcn, clen; + loff_t vbo; + + /* Enumerate all fragments. */ + for (vcn = offset >> cluster_bits;; vcn += clen) { + err = attr_data_get_block(ni, vcn, 1, &lcn, &clen, NULL, false); + if (err) { + return err; + } + + if (lcn == RESIDENT_LCN) { + /* clen - resident size in bytes. clen == ni->vfs_inode.i_size */ + if (offset >= clen) { + /* check eof. */ + return -ENXIO; + } + + if (data) { + return offset; + } + + return clen; + } + + if (lcn == EOF_LCN) { + if (data) { + return -ENXIO; + } + + /* implicit hole at the end of file. */ + return ni->vfs_inode.i_size; + } + + if (data) { + /* + * Adjust the file offset to the next location in the file greater than + * or equal to offset containing data. If offset points to data, then + * the file offset is set to offset. + */ + if (lcn != SPARSE_LCN) { + vbo = (u64)vcn << cluster_bits; + return max(vbo, offset); + } + } else { + /* + * Adjust the file offset to the next hole in the file greater than or + * equal to offset. If offset points into the middle of a hole, then the + * file offset is set to offset. If there is no hole past offset, then the + * file offset is adjusted to the end of the file + * (i.e., there is an implicit hole at the end of any file). + */ + if (lcn == SPARSE_LCN && + /* native compression hole begins at aligned vcn. */ + (!(ni->std_fa & FILE_ATTRIBUTE_COMPRESSED) || + !(vcn & (NTFS_LZNT_CLUSTERS - 1)))) { + vbo = (u64)vcn << cluster_bits; + return max(vbo, offset); + } + } + + if (!clen) { + /* Corrupted file. */ + return -EINVAL; + } + } +} + /* * ni_write_parents * diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 552b97905813..ae0a6ba102c0 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -81,6 +81,7 @@ typedef u32 CLST; #define SPARSE_LCN ((CLST)-1) #define RESIDENT_LCN ((CLST)-2) #define COMPRESSED_LCN ((CLST)-3) +#define EOF_LCN ((CLST)-4) enum RECORD_NUM { MFT_REC_MFT = 0, diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 482722438bd9..32823e1428a7 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -591,6 +591,7 @@ int ni_rename(struct ntfs_inode *dir_ni, struct ntfs_inode *new_dir_ni, struct NTFS_DE *new_de); bool ni_is_dirty(struct inode *inode); +loff_t ni_seek_data_or_hole(struct ntfs_inode *ni, loff_t offset, bool data); int ni_write_parents(struct ntfs_inode *ni, int sync); /* Globals from fslog.c */ @@ -1107,6 +1108,13 @@ static inline int is_resident(struct ntfs_inode *ni) return ni->ni_flags & NI_FLAG_RESIDENT; } +static inline loff_t ntfs_get_maxbytes(struct ntfs_inode *ni) +{ + struct ntfs_sb_info *sbi = ni->mi.sbi; + return is_sparsed(ni) || is_compressed(ni) ? sbi->maxbytes_sparse : + sbi->maxbytes; +} + static inline void le16_sub_cpu(__le16 *var, u16 val) { *var = cpu_to_le16(le16_to_cpu(*var) - val); From e37a75bb866c29da954b51d0dd7670406246d9ee Mon Sep 17 00:00:00 2001 From: Szymon Wilczek Date: Mon, 22 Dec 2025 16:10:10 +0100 Subject: [PATCH 15/26] fs/ntfs3: fix deadlock in ni_read_folio_cmpr Syzbot reported a task hung in ni_readpage_cmpr (now ni_read_folio_cmpr). This is caused by a lock inversion deadlock involving the inode mutex (ni_lock) and page locks. Scenario: 1. Task A enters ntfs_read_folio() for page X. It acquires ni_lock. 2. Task A calls ni_read_folio_cmpr(), which attempts to lock all pages in the compressed frame (including page Y). 3. Concurrently, Task B (e.g., via readahead) has locked page Y and calls ntfs_read_folio(). 4. Task B waits for ni_lock (held by A). 5. Task A waits for page Y lock (held by B). -> DEADLOCK. The fix is to restructure locking: do not take ni_lock in ntfs_read_folio(). Instead, acquire ni_lock inside ni_read_folio_cmpr() ONLY AFTER all required page locks for the frame have been successfully acquired. This restores the correct lock ordering (Page Lock -> ni_lock) consistent with VFS. Reported-by: syzbot+5af33dd272b913b65880@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=5af33dd272b913b65880 Fixes: f35590ee26f5 ("fs/ntfs3: remove ntfs_bio_pages and use page cache for compressed I/O") Signed-off-by: Szymon Wilczek [almaz.alexandrovich@paragon-software.com: ni_readpage_cmpr was renamed to ni_read_folio_cmpr] Signed-off-by: Konstantin Komarov --- fs/ntfs3/frecord.c | 2 ++ fs/ntfs3/inode.c | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 03dcb66b5f6c..3025a404e695 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -2107,7 +2107,9 @@ int ni_read_folio_cmpr(struct ntfs_inode *ni, struct folio *folio) pages[i] = pg; } + ni_lock(ni); err = ni_read_frame(ni, frame_vbo, pages, pages_per_frame, 0); + ni_unlock(ni); out1: for (i = 0; i < pages_per_frame; i++) { diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index ace9873adaae..4b50fdb4ff47 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -748,9 +748,8 @@ static int ntfs_read_folio(struct file *file, struct folio *folio) } if (is_compressed(ni)) { - ni_lock(ni); + /* ni_lock is taken inside ni_read_folio_cmpr after page locks */ err = ni_read_folio_cmpr(ni, folio); - ni_unlock(ni); return err; } From 099ef9ab9203dff327f2d61e44773f9acbc01f13 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 26 Dec 2025 16:45:19 +0300 Subject: [PATCH 16/26] fs/ntfs3: implement iomap-based file operations This patch modifies the ntfs3 driver by replacing the buffer_head-based operations with the iomap ones. Implementation details: - Implements core iomap operations (ntfs_iomap_begin/end) for block mapping: Proper handling of resident attributes via IOMAP_INLINE. Support for sparse files through IOMAP_HOLE semantics. Correct unwritten extent handling for zeroing operations. - Replaces custom implementations with standardized iomap helpers: Converts buffered reads to use iomap_read_folio and iomap_readahead. Implements iomap_file_buffered_write for write operations. Uses iomap_dio_rw for direct I/O paths. Migrates zero range operations to iomap_zero_range. - Preserves special handling paths for compressed files - Implements proper EOF/valid data size management during writes Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrib.c | 78 ++--- fs/ntfs3/file.c | 368 +++++++++++----------- fs/ntfs3/frecord.c | 183 +---------- fs/ntfs3/fslog.c | 2 +- fs/ntfs3/fsntfs.c | 10 +- fs/ntfs3/inode.c | 755 ++++++++++++++++++++++----------------------- fs/ntfs3/ntfs_fs.h | 16 +- fs/ntfs3/super.c | 11 +- 8 files changed, 617 insertions(+), 806 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 3e188d6c229f..aa745fb226f5 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -166,6 +166,12 @@ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, continue; } + if (err == -ENOSPC && new_len && vcn - vcn0) { + /* Keep already allocated clusters. */ + *alen = vcn - vcn0; + return 0; + } + if (err) goto out; @@ -886,7 +892,7 @@ bad_inode: * - new allocated clusters are zeroed via blkdev_issue_zeroout. */ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, - CLST *len, bool *new, bool zero) + CLST *len, bool *new, bool zero, void **res) { int err = 0; struct runs_tree *run = &ni->file.run; @@ -903,6 +909,8 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, if (new) *new = false; + if (res) + *res = NULL; /* Try to find in cache. */ down_read(&ni->file.run_lock); @@ -939,8 +947,15 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, } if (!attr_b->non_res) { + u32 data_size = le32_to_cpu(attr_b->res.data_size); *lcn = RESIDENT_LCN; - *len = le32_to_cpu(attr_b->res.data_size); + *len = data_size; + if (res && data_size) { + *res = kmemdup(resident_data(attr_b), data_size, + GFP_KERNEL); + if (!*res) + err = -ENOMEM; + } goto out; } @@ -1028,7 +1043,8 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, to_alloc = ((vcn0 + clen + clst_per_frame - 1) & cmask) - vcn; if (fr < clst_per_frame) fr = clst_per_frame; - zero = true; + if (vcn != vcn0) + zero = true; /* Check if 'vcn' and 'vcn0' in different attribute segments. */ if (vcn < svcn || evcn1 <= vcn) { @@ -1244,33 +1260,6 @@ undo1: goto out; } -int attr_data_read_resident(struct ntfs_inode *ni, struct folio *folio) -{ - u64 vbo; - struct ATTRIB *attr; - u32 data_size; - size_t len; - - attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, NULL); - if (!attr) - return -EINVAL; - - if (attr->non_res) - return E_NTFS_NONRESIDENT; - - vbo = folio->index << PAGE_SHIFT; - data_size = le32_to_cpu(attr->res.data_size); - if (vbo > data_size) - len = 0; - else - len = min(data_size - vbo, folio_size(folio)); - - folio_fill_tail(folio, 0, resident_data(attr) + vbo, len); - folio_mark_uptodate(folio); - - return 0; -} - int attr_data_write_resident(struct ntfs_inode *ni, struct folio *folio) { u64 vbo; @@ -1287,7 +1276,7 @@ int attr_data_write_resident(struct ntfs_inode *ni, struct folio *folio) return E_NTFS_NONRESIDENT; } - vbo = folio->index << PAGE_SHIFT; + vbo = folio_pos(folio); data_size = le32_to_cpu(attr->res.data_size); if (vbo < data_size) { char *data = resident_data(attr); @@ -1360,21 +1349,20 @@ int attr_load_runs_range(struct ntfs_inode *ni, enum ATTR_TYPE type, int retry = 0; for (vcn = from >> cluster_bits; vcn <= vcn_last; vcn += clen) { - if (!run_lookup_entry(run, vcn, &lcn, &clen, NULL)) { - if (retry != 0) { /* Next run_lookup_entry(vcn) also failed. */ - err = -EINVAL; - break; - } - err = attr_load_runs_vcn(ni, type, name, name_len, run, - vcn); - if (err) - break; - - clen = 0; /* Next run_lookup_entry(vcn) must be success. */ - retry++; - } - else + if (run_lookup_entry(run, vcn, &lcn, &clen, NULL)) { retry = 0; + continue; + } + if (retry) { + err = -EINVAL; + break; + } + err = attr_load_runs_vcn(ni, type, name, name_len, run, vcn); + if (err) + break; + + clen = 0; /* Next run_lookup_entry(vcn) must be success. */ + retry++; } return err; diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index c89b1e7e734c..58fa4da114bb 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "debug.h" #include "ntfs.h" @@ -189,9 +190,6 @@ static int ntfs_extend_initialized_size(struct file *file, const loff_t new_valid) { struct inode *inode = &ni->vfs_inode; - struct address_space *mapping = inode->i_mapping; - struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info; - loff_t pos = valid; int err; if (valid >= new_valid) @@ -204,140 +202,41 @@ static int ntfs_extend_initialized_size(struct file *file, WARN_ON(is_compressed(ni)); - for (;;) { - u32 zerofrom, len; - struct folio *folio; - u8 bits; - CLST vcn, lcn, clen; - - if (is_sparsed(ni)) { - bits = sbi->cluster_bits; - vcn = pos >> bits; - - err = attr_data_get_block(ni, vcn, 1, &lcn, &clen, NULL, - false); - if (err) - goto out; - - if (lcn == SPARSE_LCN) { - pos = ((loff_t)clen + vcn) << bits; - ni->i_valid = pos; - goto next; - } - } - - zerofrom = pos & (PAGE_SIZE - 1); - len = PAGE_SIZE - zerofrom; - - if (pos + len > new_valid) - len = new_valid - pos; - - err = ntfs_write_begin(NULL, mapping, pos, len, &folio, NULL); - if (err) - goto out; - - folio_zero_range(folio, zerofrom, folio_size(folio) - zerofrom); - - err = ntfs_write_end(NULL, mapping, pos, len, len, folio, NULL); - if (err < 0) - goto out; - pos += len; - -next: - if (pos >= new_valid) - break; - - balance_dirty_pages_ratelimited(mapping); - cond_resched(); + err = iomap_zero_range(inode, valid, new_valid - valid, NULL, + &ntfs_iomap_ops, &ntfs_iomap_folio_ops, NULL); + if (err) { + ni->i_valid = valid; + ntfs_inode_warn(inode, + "failed to extend initialized size to %llx.", + new_valid); + return err; } return 0; - -out: - ni->i_valid = valid; - ntfs_inode_warn(inode, "failed to extend initialized size to %llx.", - new_valid); - return err; } -/* - * ntfs_zero_range - Helper function for punch_hole. - * - * It zeroes a range [vbo, vbo_to). - */ -static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to) +static void ntfs_filemap_close(struct vm_area_struct *vma) { - int err = 0; - struct address_space *mapping = inode->i_mapping; - u32 blocksize = i_blocksize(inode); - pgoff_t idx = vbo >> PAGE_SHIFT; - u32 from = vbo & (PAGE_SIZE - 1); - pgoff_t idx_end = (vbo_to + PAGE_SIZE - 1) >> PAGE_SHIFT; - loff_t page_off; - struct buffer_head *head, *bh; - u32 bh_next, bh_off, to; - sector_t iblock; - struct folio *folio; - bool dirty = false; + struct inode *inode = file_inode(vma->vm_file); + struct ntfs_inode *ni = ntfs_i(inode); + u64 from = (u64)vma->vm_pgoff << PAGE_SHIFT; + u64 to = min_t(u64, i_size_read(inode), + from + vma->vm_end - vma->vm_start); - for (; idx < idx_end; idx += 1, from = 0) { - page_off = (loff_t)idx << PAGE_SHIFT; - to = (page_off + PAGE_SIZE) > vbo_to ? (vbo_to - page_off) : - PAGE_SIZE; - iblock = page_off >> inode->i_blkbits; - - folio = __filemap_get_folio( - mapping, idx, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, - mapping_gfp_constraint(mapping, ~__GFP_FS)); - if (IS_ERR(folio)) - return PTR_ERR(folio); - - head = folio_buffers(folio); - if (!head) - head = create_empty_buffers(folio, blocksize, 0); - - bh = head; - bh_off = 0; - do { - bh_next = bh_off + blocksize; - - if (bh_next <= from || bh_off >= to) - continue; - - if (!buffer_mapped(bh)) { - ntfs_get_block(inode, iblock, bh, 0); - /* Unmapped? It's a hole - nothing to do. */ - if (!buffer_mapped(bh)) - continue; - } - - /* Ok, it's mapped. Make sure it's up-to-date. */ - if (folio_test_uptodate(folio)) - set_buffer_uptodate(bh); - else if (bh_read(bh, 0) < 0) { - err = -EIO; - folio_unlock(folio); - folio_put(folio); - goto out; - } - - mark_buffer_dirty(bh); - } while (bh_off = bh_next, iblock += 1, - head != (bh = bh->b_this_page)); - - folio_zero_segment(folio, from, to); - dirty = true; - - folio_unlock(folio); - folio_put(folio); - cond_resched(); - } -out: - if (dirty) + if (ni->i_valid < to) { + ni->i_valid = to; mark_inode_dirty(inode); - return err; + } } +/* Copy of generic_file_vm_ops. */ +static const struct vm_operations_struct ntfs_file_vm_ops = { + .close = ntfs_filemap_close, + .fault = filemap_fault, + .map_pages = filemap_map_pages, + .page_mkwrite = filemap_page_mkwrite, +}; + /* * ntfs_file_mmap_prepare - file_operations::mmap_prepare */ @@ -346,7 +245,6 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc) struct file *file = desc->file; struct inode *inode = file_inode(file); struct ntfs_inode *ni = ntfs_i(inode); - u64 from = ((u64)desc->pgoff << PAGE_SHIFT); bool rw = desc->vm_flags & VM_WRITE; int err; @@ -378,7 +276,8 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc) } if (rw) { - u64 to = min_t(loff_t, i_size_read(inode), + u64 from = (u64)desc->pgoff << PAGE_SHIFT; + u64 to = min_t(u64, i_size_read(inode), from + vma_desc_size(desc)); if (is_sparsed(ni)) { @@ -391,7 +290,8 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc) for (; vcn < end; vcn += len) { err = attr_data_get_block(ni, vcn, 1, &lcn, - &len, &new, true); + &len, &new, true, + NULL); if (err) goto out; } @@ -411,6 +311,8 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc) } err = generic_file_mmap_prepare(desc); + if (!err && rw) + desc->vm_ops = &ntfs_file_vm_ops; out: return err; } @@ -465,7 +367,7 @@ static int ntfs_extend(struct inode *inode, loff_t pos, size_t count, */ for (; vcn < cend_v; vcn += clen) { err = attr_data_get_block(ni, vcn, cend_v - vcn, &lcn, - &clen, &new, true); + &clen, &new, true, NULL); if (err) goto out; } @@ -474,7 +376,7 @@ static int ntfs_extend(struct inode *inode, loff_t pos, size_t count, */ for (; vcn < cend; vcn += clen) { err = attr_data_get_block(ni, vcn, cend - vcn, &lcn, - &clen, &new, false); + &clen, &new, false, NULL); if (err) goto out; } @@ -503,25 +405,10 @@ out: static int ntfs_truncate(struct inode *inode, loff_t new_size) { - struct super_block *sb = inode->i_sb; - struct ntfs_inode *ni = ntfs_i(inode); - u64 new_valid; int err; + struct ntfs_inode *ni = ntfs_i(inode); + u64 new_valid = min_t(u64, ni->i_valid, new_size); - if (!S_ISREG(inode->i_mode)) - return 0; - - if (is_compressed(ni)) { - if (ni->i_valid > new_size) - ni->i_valid = new_size; - } else { - err = block_truncate_page(inode->i_mapping, new_size, - ntfs_get_block); - if (err) - return err; - } - - new_valid = ntfs_up_block(sb, min_t(u64, ni->i_valid, new_size)); truncate_setsize(inode, new_size); ni_lock(ni); @@ -531,11 +418,11 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) &new_valid, ni->mi.sbi->options->prealloc, NULL); up_write(&ni->file.run_lock); - if (new_valid < ni->i_valid) - ni->i_valid = new_valid; + ni->i_valid = new_valid; ni_unlock(ni); - if (unlikely(err)) + + if (err) return err; ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE; @@ -646,13 +533,17 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) tmp = min(vbo_a, end); if (tmp > vbo) { - err = ntfs_zero_range(inode, vbo, tmp); + err = iomap_zero_range(inode, vbo, tmp - vbo, NULL, + &ntfs_iomap_ops, + &ntfs_iomap_folio_ops, NULL); if (err) goto out; } if (vbo < end_a && end_a < end) { - err = ntfs_zero_range(inode, end_a, end); + err = iomap_zero_range(inode, end_a, end - end_a, NULL, + &ntfs_iomap_ops, + &ntfs_iomap_folio_ops, NULL); if (err) goto out; } @@ -762,7 +653,7 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) for (; vcn < cend_v; vcn += clen) { err = attr_data_get_block(ni, vcn, cend_v - vcn, &lcn, &clen, &new, - true); + true, NULL); if (err) goto out; } @@ -772,7 +663,7 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) for (; vcn < cend; vcn += clen) { err = attr_data_get_block(ni, vcn, cend - vcn, &lcn, &clen, &new, - false); + false, NULL); if (err) goto out; } @@ -787,6 +678,7 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) ni_unlock(ni); if (err) goto out; + i_size_write(inode, i_size); } else if (new_size > i_size) { i_size_write(inode, new_size); } @@ -923,12 +815,16 @@ static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct ntfs_inode *ni = ntfs_i(inode); + size_t bytes = iov_iter_count(iter); ssize_t err; err = check_read_restriction(inode); if (err) return err; + if (!bytes) + return 0; /* skip atime */ + if (is_compressed(ni)) { if (iocb->ki_flags & IOCB_DIRECT) { ntfs_inode_warn( @@ -940,13 +836,58 @@ static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) } /* Check minimum alignment for dio. */ + if ((iocb->ki_flags & IOCB_DIRECT) && + (is_resident(ni) || ((iocb->ki_pos | iov_iter_alignment(iter)) & + ni->mi.sbi->bdev_blocksize_mask))) { + /* Fallback to buffered I/O */ + iocb->ki_flags &= ~IOCB_DIRECT; + } + if (iocb->ki_flags & IOCB_DIRECT) { - struct super_block *sb = inode->i_sb; - struct ntfs_sb_info *sbi = sb->s_fs_info; - if ((iocb->ki_pos | iov_iter_alignment(iter)) & - sbi->bdev_blocksize_mask) { - iocb->ki_flags &= ~IOCB_DIRECT; + loff_t valid, i_size; + loff_t vbo = iocb->ki_pos; + loff_t end = vbo + bytes; + unsigned int dio_flags = IOMAP_DIO_PARTIAL; + + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock_shared(inode)) + return -EAGAIN; + } else { + inode_lock_shared(inode); } + + valid = ni->i_valid; + i_size = inode->i_size; + + if (vbo < valid) { + if (valid < end) { + /* read cross 'valid' size. */ + dio_flags |= IOMAP_DIO_FORCE_WAIT; + } + + err = iomap_dio_rw(iocb, iter, &ntfs_iomap_ops, NULL, + dio_flags, NULL, 0); + + if (err > 0) { + end = vbo + err; + if (valid < end) { + size_t to_zero = end - valid; + /* Fix iter. */ + iov_iter_revert(iter, to_zero); + iov_iter_zero(to_zero, iter); + } + } + } else if (vbo < i_size) { + if (end > i_size) + bytes = i_size - vbo; + iov_iter_zero(bytes, iter); + iocb->ki_pos += bytes; + err = bytes; + } + + inode_unlock_shared(inode); + file_accessed(iocb->ki_filp); + return err; } return generic_file_read_iter(iocb, iter); @@ -1070,7 +1011,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) off = valid & (frame_size - 1); err = attr_data_get_block(ni, frame << NTFS_LZNT_CUNIT, 1, &lcn, - &clen, NULL, false); + &clen, NULL, false, NULL); if (err) goto out; @@ -1273,8 +1214,9 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct ntfs_inode *ni = ntfs_i(inode); - ssize_t ret; - int err; + struct super_block *sb = inode->i_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + ssize_t ret, err; if (!inode_trylock(inode)) { if (iocb->ki_flags & IOCB_NOWAIT) @@ -1312,15 +1254,73 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ret) goto out; - ret = is_compressed(ni) ? ntfs_compress_write(iocb, from) : - __generic_file_write_iter(iocb, from); + if (is_compressed(ni)) { + ret = ntfs_compress_write(iocb, from); + goto out; + } + + /* Check minimum alignment for dio. */ + if ((iocb->ki_flags & IOCB_DIRECT) && + (is_resident(ni) || ((iocb->ki_pos | iov_iter_alignment(from)) & + sbi->bdev_blocksize_mask))) { + /* Fallback to buffered I/O */ + iocb->ki_flags &= ~IOCB_DIRECT; + } + + if (!(iocb->ki_flags & IOCB_DIRECT)) { + ret = iomap_file_buffered_write(iocb, from, &ntfs_iomap_ops, + &ntfs_iomap_folio_ops, NULL); + inode_unlock(inode); + + if (likely(ret > 0)) + ret = generic_write_sync(iocb, ret); + + return ret; + } + + ret = iomap_dio_rw(iocb, from, &ntfs_iomap_ops, NULL, IOMAP_DIO_PARTIAL, + NULL, 0); + + if (ret == -ENOTBLK) { + /* Returns -ENOTBLK in case of a page invalidation failure for writes.*/ + /* The callers needs to fall back to buffered I/O in this case. */ + ret = 0; + } + + if (ret >= 0 && iov_iter_count(from)) { + loff_t offset = iocb->ki_pos, endbyte; + + iocb->ki_flags &= ~IOCB_DIRECT; + err = iomap_file_buffered_write(iocb, from, &ntfs_iomap_ops, + &ntfs_iomap_folio_ops, NULL); + if (err < 0) { + ret = err; + goto out; + } + + /* + * We need to ensure that the pages within the page cache for + * the range covered by this I/O are written to disk and + * invalidated. This is in attempt to preserve the expected + * direct I/O semantics in the case we fallback to buffered I/O + * to complete off the I/O request. + */ + ret += err; + endbyte = offset + err - 1; + err = filemap_write_and_wait_range(inode->i_mapping, offset, + endbyte); + if (err) { + ret = err; + goto out; + } + + invalidate_mapping_pages(inode->i_mapping, offset >> PAGE_SHIFT, + endbyte >> PAGE_SHIFT); + } out: inode_unlock(inode); - if (ret > 0) - ret = generic_write_sync(iocb, ret); - return ret; } @@ -1359,6 +1359,8 @@ int ntfs_file_open(struct inode *inode, struct file *file) #endif } + file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT; + return generic_file_open(inode, file); } @@ -1408,16 +1410,30 @@ int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (unlikely(is_bad_ni(ni))) return -EINVAL; - err = fiemap_prep(inode, fieinfo, start, &len, ~FIEMAP_FLAG_XATTR); - if (err) - return err; + if (is_compressed(ni)) { + /* Unfortunately cp -r incorrectly treats compressed clusters. */ + ntfs_inode_warn(inode, + "fiemap is not supported for compressed file"); + return -EOPNOTSUPP; + } - ni_lock(ni); + if (S_ISDIR(inode->i_mode)) { + /* TODO: add support for dirs (ATTR_ALLOC). */ + ntfs_inode_warn(inode, + "fiemap is not supported for directories"); + return -EOPNOTSUPP; + } - err = ni_fiemap(ni, fieinfo, start, len); + if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { + ntfs_inode_warn(inode, "fiemap(xattr) is not supported"); + return -EOPNOTSUPP; + } - ni_unlock(ni); + inode_lock_shared(inode); + err = iomap_fiemap(inode, fieinfo, start, len, &ntfs_iomap_ops); + + inode_unlock_shared(inode); return err; } @@ -1463,7 +1479,7 @@ int ntfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) if (!ret) { ntfs_set_state(sbi, NTFS_DIRTY_CLEAR); - ntfs_update_mftmirr(sbi, false); + ntfs_update_mftmirr(sbi); } err = sync_blockdev(sb->s_bdev); diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 3025a404e695..0dc28815331e 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -1850,183 +1850,11 @@ enum REPARSE_SIGN ni_parse_reparse(struct ntfs_inode *ni, struct ATTRIB *attr, return REPARSE_LINK; } -/* - * ni_fiemap - Helper for file_fiemap(). - * - * Assumed ni_lock. - * TODO: Less aggressive locks. - */ -int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, - __u64 vbo, __u64 len) -{ - int err = 0; - struct ntfs_sb_info *sbi = ni->mi.sbi; - u8 cluster_bits = sbi->cluster_bits; - struct runs_tree run; - struct ATTRIB *attr; - CLST vcn = vbo >> cluster_bits; - CLST lcn, clen; - u64 valid = ni->i_valid; - u64 lbo, bytes; - u64 end, alloc_size; - size_t idx = -1; - u32 flags; - bool ok; - - run_init(&run); - if (S_ISDIR(ni->vfs_inode.i_mode)) { - attr = ni_find_attr(ni, NULL, NULL, ATTR_ALLOC, I30_NAME, - ARRAY_SIZE(I30_NAME), NULL, NULL); - } else { - attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, - NULL); - if (!attr) { - err = -EINVAL; - goto out; - } - if (is_attr_compressed(attr)) { - /* Unfortunately cp -r incorrectly treats compressed clusters. */ - err = -EOPNOTSUPP; - ntfs_inode_warn( - &ni->vfs_inode, - "fiemap is not supported for compressed file (cp -r)"); - goto out; - } - } - - if (!attr || !attr->non_res) { - err = fiemap_fill_next_extent( - fieinfo, 0, 0, - attr ? le32_to_cpu(attr->res.data_size) : 0, - FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_LAST | - FIEMAP_EXTENT_MERGED); - goto out; - } - - end = vbo + len; - alloc_size = le64_to_cpu(attr->nres.alloc_size); - if (end > alloc_size) - end = alloc_size; - - while (vbo < end) { - if (idx == -1) { - ok = run_lookup_entry(&run, vcn, &lcn, &clen, &idx); - } else { - CLST vcn_next = vcn; - - ok = run_get_entry(&run, ++idx, &vcn, &lcn, &clen) && - vcn == vcn_next; - if (!ok) - vcn = vcn_next; - } - - if (!ok) { - err = attr_load_runs_vcn(ni, attr->type, - attr_name(attr), - attr->name_len, &run, vcn); - - if (err) - break; - - ok = run_lookup_entry(&run, vcn, &lcn, &clen, &idx); - - if (!ok) { - err = -EINVAL; - break; - } - } - - if (!clen) { - err = -EINVAL; // ? - break; - } - - if (lcn == SPARSE_LCN) { - vcn += clen; - vbo = (u64)vcn << cluster_bits; - continue; - } - - flags = FIEMAP_EXTENT_MERGED; - if (S_ISDIR(ni->vfs_inode.i_mode)) { - ; - } else if (is_attr_compressed(attr)) { - CLST clst_data; - - err = attr_is_frame_compressed(ni, attr, - vcn >> attr->nres.c_unit, - &clst_data, &run); - if (err) - break; - if (clst_data < NTFS_LZNT_CLUSTERS) - flags |= FIEMAP_EXTENT_ENCODED; - } else if (is_attr_encrypted(attr)) { - flags |= FIEMAP_EXTENT_DATA_ENCRYPTED; - } - - vbo = (u64)vcn << cluster_bits; - bytes = (u64)clen << cluster_bits; - lbo = (u64)lcn << cluster_bits; - - vcn += clen; - - if (vbo + bytes >= end) - bytes = end - vbo; - - if (vbo + bytes <= valid) { - ; - } else if (vbo >= valid) { - flags |= FIEMAP_EXTENT_UNWRITTEN; - } else { - /* vbo < valid && valid < vbo + bytes */ - u64 dlen = valid - vbo; - - if (vbo + dlen >= end) - flags |= FIEMAP_EXTENT_LAST; - - err = fiemap_fill_next_extent(fieinfo, vbo, lbo, dlen, - flags); - - if (err < 0) - break; - if (err == 1) { - err = 0; - break; - } - - vbo = valid; - bytes -= dlen; - if (!bytes) - continue; - - lbo += dlen; - flags |= FIEMAP_EXTENT_UNWRITTEN; - } - - if (vbo + bytes >= end) - flags |= FIEMAP_EXTENT_LAST; - - err = fiemap_fill_next_extent(fieinfo, vbo, lbo, bytes, flags); - if (err < 0) - break; - if (err == 1) { - err = 0; - break; - } - - vbo += bytes; - } - -out: - run_close(&run); - return err; -} - static struct page *ntfs_lock_new_page(struct address_space *mapping, - pgoff_t index, gfp_t gfp) + pgoff_t index, gfp_t gfp) { - struct folio *folio = __filemap_get_folio(mapping, index, - FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp); + struct folio *folio = __filemap_get_folio( + mapping, index, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp); struct page *page; if (IS_ERR(folio)) @@ -2186,7 +2014,7 @@ int ni_decompress_file(struct ntfs_inode *ni) for (vcn = vbo >> sbi->cluster_bits; vcn < end; vcn += clen) { err = attr_data_get_block(ni, vcn, cend - vcn, &lcn, - &clen, &new, false); + &clen, &new, false, NULL); if (err) goto out; } @@ -3017,7 +2845,8 @@ loff_t ni_seek_data_or_hole(struct ntfs_inode *ni, loff_t offset, bool data) /* Enumerate all fragments. */ for (vcn = offset >> cluster_bits;; vcn += clen) { - err = attr_data_get_block(ni, vcn, 1, &lcn, &clen, NULL, false); + err = attr_data_get_block(ni, vcn, 1, &lcn, &clen, NULL, false, + NULL); if (err) { return err; } diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index ee24ef0dd725..464d661d9694 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -5130,7 +5130,7 @@ commit_undo: undo_action_done: - ntfs_update_mftmirr(sbi, 0); + ntfs_update_mftmirr(sbi); sbi->flags &= ~NTFS_FLAGS_NEED_REPLAY; diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index ff0b2595f32a..e9c39c62aea4 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -843,9 +843,8 @@ int ntfs_refresh_zone(struct ntfs_sb_info *sbi) /* * ntfs_update_mftmirr - Update $MFTMirr data. */ -void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) +void ntfs_update_mftmirr(struct ntfs_sb_info *sbi) { - int err; struct super_block *sb = sbi->sb; u32 blocksize, bytes; sector_t block1, block2; @@ -884,12 +883,7 @@ void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) put_bh(bh1); bh1 = NULL; - - err = wait ? sync_dirty_buffer(bh2) : 0; - put_bh(bh2); - if (err) - return; } sbi->flags &= ~NTFS_FLAGS_MFTMIRR; @@ -1357,9 +1351,7 @@ int ntfs_get_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, err = -ENOMEM; goto out; } - wait_on_buffer(bh); - lock_buffer(bh); if (!buffer_uptodate(bh)) { memset(bh->b_data, 0, blocksize); diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 4b50fdb4ff47..b969ad7c3258 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "debug.h" #include "ntfs.h" @@ -166,9 +167,7 @@ next_attr: std5 = Add2Ptr(attr, roff); -#ifdef STATX_BTIME nt2kernel(std5->cr_time, &ni->i_crtime); -#endif nt2kernel(std5->a_time, &ts); inode_set_atime_to_ts(inode, ts); nt2kernel(std5->c_time, &ts); @@ -555,168 +554,97 @@ struct inode *ntfs_iget5(struct super_block *sb, const struct MFT_REF *ref, return inode; } -enum get_block_ctx { - GET_BLOCK_GENERAL = 0, - GET_BLOCK_WRITE_BEGIN = 1, - GET_BLOCK_DIRECT_IO_R = 2, - GET_BLOCK_DIRECT_IO_W = 3, - GET_BLOCK_BMAP = 4, -}; - -static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo, - struct buffer_head *bh, int create, - enum get_block_ctx ctx) -{ - struct super_block *sb = inode->i_sb; - struct ntfs_sb_info *sbi = sb->s_fs_info; - struct ntfs_inode *ni = ntfs_i(inode); - struct folio *folio = bh->b_folio; - u8 cluster_bits = sbi->cluster_bits; - u32 block_size = sb->s_blocksize; - u64 bytes, lbo, valid; - u32 off; - int err; - CLST vcn, lcn, len; - bool new; - - /* Clear previous state. */ - clear_buffer_new(bh); - clear_buffer_uptodate(bh); - - if (is_resident(ni)) { - bh->b_blocknr = RESIDENT_LCN; - bh->b_size = block_size; - if (!folio) { - /* direct io (read) or bmap call */ - err = 0; - } else { - ni_lock(ni); - err = attr_data_read_resident(ni, folio); - ni_unlock(ni); - - if (!err) - set_buffer_uptodate(bh); - } - return err; - } - - vcn = vbo >> cluster_bits; - off = vbo & sbi->cluster_mask; - new = false; - - err = attr_data_get_block(ni, vcn, 1, &lcn, &len, create ? &new : NULL, - create && sbi->cluster_size > PAGE_SIZE); - if (err) - goto out; - - if (!len) - return 0; - - bytes = ((u64)len << cluster_bits) - off; - - if (lcn >= sbi->used.bitmap.nbits) { - /* This case includes resident/compressed/sparse. */ - if (!create) { - if (bh->b_size > bytes) - bh->b_size = bytes; - return 0; - } - WARN_ON(1); - } - - if (new) - set_buffer_new(bh); - - lbo = ((u64)lcn << cluster_bits) + off; - - set_buffer_mapped(bh); - bh->b_bdev = sb->s_bdev; - bh->b_blocknr = lbo >> sb->s_blocksize_bits; - - valid = ni->i_valid; - - if (ctx == GET_BLOCK_DIRECT_IO_W) { - /* ntfs_direct_IO will update ni->i_valid. */ - if (vbo >= valid) - set_buffer_new(bh); - } else if (create) { - /* Normal write. */ - if (bytes > bh->b_size) - bytes = bh->b_size; - - if (vbo >= valid) - set_buffer_new(bh); - - if (vbo + bytes > valid) { - ni->i_valid = vbo + bytes; - mark_inode_dirty(inode); - } - } else if (vbo >= valid) { - /* Read out of valid data. */ - clear_buffer_mapped(bh); - } else if (vbo + bytes <= valid) { - /* Normal read. */ - } else if (vbo + block_size <= valid) { - /* Normal short read. */ - bytes = block_size; - } else { - /* - * Read across valid size: vbo < valid && valid < vbo + block_size - */ - bytes = block_size; - - if (folio) { - u32 voff = valid - vbo; - - bh->b_size = block_size; - off = vbo & (PAGE_SIZE - 1); - folio_set_bh(bh, folio, off); - - if (bh_read(bh, 0) < 0) { - err = -EIO; - goto out; - } - folio_zero_segment(folio, off + voff, off + block_size); - } - } - - if (bh->b_size > bytes) - bh->b_size = bytes; - -#ifndef __LP64__ - if (ctx == GET_BLOCK_DIRECT_IO_W || ctx == GET_BLOCK_DIRECT_IO_R) { - static_assert(sizeof(size_t) < sizeof(loff_t)); - if (bytes > 0x40000000u) - bh->b_size = 0x40000000u; - } -#endif - - return 0; - -out: - return err; -} - -int ntfs_get_block(struct inode *inode, sector_t vbn, - struct buffer_head *bh_result, int create) -{ - return ntfs_get_block_vbo(inode, (u64)vbn << inode->i_blkbits, - bh_result, create, GET_BLOCK_GENERAL); -} - -static int ntfs_get_block_bmap(struct inode *inode, sector_t vsn, - struct buffer_head *bh_result, int create) -{ - return ntfs_get_block_vbo(inode, - (u64)vsn << inode->i_sb->s_blocksize_bits, - bh_result, create, GET_BLOCK_BMAP); -} - static sector_t ntfs_bmap(struct address_space *mapping, sector_t block) { - return generic_block_bmap(mapping, block, ntfs_get_block_bmap); + return iomap_bmap(mapping, block, &ntfs_iomap_ops); } +static void ntfs_iomap_read_end_io(struct bio *bio) +{ + int error = blk_status_to_errno(bio->bi_status); + struct folio_iter fi; + + bio_for_each_folio_all(fi, bio) { + struct folio *folio = fi.folio; + struct inode *inode = folio->mapping->host; + struct ntfs_inode *ni = ntfs_i(inode); + u64 valid = ni->i_valid; + u32 f_size = folio_size(folio); + loff_t f_pos = folio_pos(folio); + + + if (valid < f_pos + f_size) { + u32 z_from = valid <= f_pos ? + 0 : + offset_in_folio(folio, valid); + /* The only thing ntfs_iomap_read_end_io used for. */ + folio_zero_segment(folio, z_from, f_size); + } + + iomap_finish_folio_read(folio, fi.offset, fi.length, error); + } + bio_put(bio); +} + +/* + * Copied from iomap/bio.c. + */ +static int ntfs_iomap_bio_read_folio_range(const struct iomap_iter *iter, + struct iomap_read_folio_ctx *ctx, + size_t plen) +{ + struct folio *folio = ctx->cur_folio; + const struct iomap *iomap = &iter->iomap; + loff_t pos = iter->pos; + size_t poff = offset_in_folio(folio, pos); + loff_t length = iomap_length(iter); + sector_t sector; + struct bio *bio = ctx->read_ctx; + + sector = iomap_sector(iomap, pos); + if (!bio || bio_end_sector(bio) != sector || + !bio_add_folio(bio, folio, plen, poff)) { + gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL); + gfp_t orig_gfp = gfp; + unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE); + + if (bio) + submit_bio(bio); + + if (ctx->rac) /* same as readahead_gfp_mask */ + gfp |= __GFP_NORETRY | __GFP_NOWARN; + bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs), REQ_OP_READ, + gfp); + /* + * If the bio_alloc fails, try it again for a single page to + * avoid having to deal with partial page reads. This emulates + * what do_mpage_read_folio does. + */ + if (!bio) + bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ, orig_gfp); + if (ctx->rac) + bio->bi_opf |= REQ_RAHEAD; + bio->bi_iter.bi_sector = sector; + bio->bi_end_io = ntfs_iomap_read_end_io; + bio_add_folio_nofail(bio, folio, plen, poff); + ctx->read_ctx = bio; + } + return 0; +} + +static void ntfs_iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx) +{ + struct bio *bio = ctx->read_ctx; + + if (bio) + submit_bio(bio); +} + +static const struct iomap_read_ops ntfs_iomap_bio_read_ops = { + .read_folio_range = ntfs_iomap_bio_read_folio_range, + .submit_read = ntfs_iomap_bio_submit_read, +}; + static int ntfs_read_folio(struct file *file, struct folio *folio) { int err; @@ -724,6 +652,10 @@ static int ntfs_read_folio(struct file *file, struct folio *folio) struct inode *inode = mapping->host; struct ntfs_inode *ni = ntfs_i(inode); loff_t vbo = folio_pos(folio); + struct iomap_read_folio_ctx ctx = { + .cur_folio = folio, + .ops = &ntfs_iomap_bio_read_ops, + }; if (unlikely(is_bad_ni(ni))) { folio_unlock(folio); @@ -737,24 +669,14 @@ static int ntfs_read_folio(struct file *file, struct folio *folio) return 0; } - if (is_resident(ni)) { - ni_lock(ni); - err = attr_data_read_resident(ni, folio); - ni_unlock(ni); - if (err != E_NTFS_NONRESIDENT) { - folio_unlock(folio); - return err; - } - } - if (is_compressed(ni)) { /* ni_lock is taken inside ni_read_folio_cmpr after page locks */ err = ni_read_folio_cmpr(ni, folio); return err; } - /* Normal + sparse files. */ - return mpage_read_folio(folio, ntfs_get_block); + iomap_read_folio(&ntfs_iomap_ops, &ctx); + return 0; } static void ntfs_readahead(struct readahead_control *rac) @@ -762,8 +684,10 @@ static void ntfs_readahead(struct readahead_control *rac) struct address_space *mapping = rac->mapping; struct inode *inode = mapping->host; struct ntfs_inode *ni = ntfs_i(inode); - u64 valid; - loff_t pos; + struct iomap_read_folio_ctx ctx = { + .ops = &ntfs_iomap_bio_read_ops, + .rac = rac, + }; if (is_resident(ni)) { /* No readahead for resident. */ @@ -775,80 +699,7 @@ static void ntfs_readahead(struct readahead_control *rac) return; } - valid = ni->i_valid; - pos = readahead_pos(rac); - - if (valid < i_size_read(inode) && pos <= valid && - valid < pos + readahead_length(rac)) { - /* Range cross 'valid'. Read it page by page. */ - return; - } - - mpage_readahead(rac, ntfs_get_block); -} - -static int ntfs_get_block_direct_IO_R(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - return ntfs_get_block_vbo(inode, (u64)iblock << inode->i_blkbits, - bh_result, create, GET_BLOCK_DIRECT_IO_R); -} - -static int ntfs_get_block_direct_IO_W(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - return ntfs_get_block_vbo(inode, (u64)iblock << inode->i_blkbits, - bh_result, create, GET_BLOCK_DIRECT_IO_W); -} - -static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) -{ - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - struct ntfs_inode *ni = ntfs_i(inode); - loff_t vbo = iocb->ki_pos; - loff_t end; - int wr = iov_iter_rw(iter) & WRITE; - size_t iter_count = iov_iter_count(iter); - loff_t valid; - ssize_t ret; - - if (is_resident(ni)) { - /* Switch to buffered write. */ - ret = 0; - goto out; - } - if (is_compressed(ni)) { - ret = 0; - goto out; - } - - ret = blockdev_direct_IO(iocb, inode, iter, - wr ? ntfs_get_block_direct_IO_W : - ntfs_get_block_direct_IO_R); - - if (ret > 0) - end = vbo + ret; - else if (wr && ret == -EIOCBQUEUED) - end = vbo + iter_count; - else - goto out; - - valid = ni->i_valid; - if (wr) { - if (end > valid && !S_ISBLK(inode->i_mode)) { - ni->i_valid = end; - mark_inode_dirty(inode); - } - } else if (vbo < valid && valid < end) { - /* Fix page. */ - iov_iter_revert(iter, end - valid); - iov_iter_zero(end - valid, iter); - } - -out: - return ret; + iomap_readahead(&ntfs_iomap_ops, &ctx); } int ntfs_set_size(struct inode *inode, u64 new_size) @@ -861,12 +712,10 @@ int ntfs_set_size(struct inode *inode, u64 new_size) /* Check for maximum file size. */ if (is_sparsed(ni) || is_compressed(ni)) { if (new_size > sbi->maxbytes_sparse) { - err = -EFBIG; - goto out; + return -EFBIG; } } else if (new_size > sbi->maxbytes) { - err = -EFBIG; - goto out; + return -EFBIG; } ni_lock(ni); @@ -875,15 +724,256 @@ int ntfs_set_size(struct inode *inode, u64 new_size) err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, &ni->i_valid, true, NULL); + if (!err) { + i_size_write(inode, new_size); + mark_inode_dirty(inode); + } + up_write(&ni->file.run_lock); ni_unlock(ni); - mark_inode_dirty(inode); - -out: return err; } +/* + * Function to get mapping vbo -> lbo. + * used with: + * - iomap_zero_range + * - iomap_truncate_page + * - iomap_dio_rw + * - iomap_file_buffered_write + * - iomap_bmap + * - iomap_fiemap + * - iomap_bio_read_folio + * - iomap_bio_readahead + */ +static int ntfs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, + unsigned int flags, struct iomap *iomap, + struct iomap *srcmap) +{ + struct ntfs_inode *ni = ntfs_i(inode); + struct ntfs_sb_info *sbi = ni->mi.sbi; + u8 cluster_bits = sbi->cluster_bits; + CLST vcn = offset >> cluster_bits; + u32 off = offset & sbi->cluster_mask; + bool rw = flags & IOMAP_WRITE; + loff_t endbyte = offset + length; + void *res = NULL; + int err; + CLST lcn, clen, clen_max; + bool new_clst = false; + if (unlikely(ntfs3_forced_shutdown(sbi->sb))) + return -EIO; + + if ((flags & IOMAP_REPORT) && offset > ntfs_get_maxbytes(ni)) { + /* called from fiemap/bmap. */ + return -EINVAL; + } + + clen_max = rw ? (bytes_to_cluster(sbi, endbyte) - vcn) : 1; + + err = attr_data_get_block( + ni, vcn, clen_max, &lcn, &clen, rw ? &new_clst : NULL, + flags == IOMAP_WRITE && (off || (endbyte & sbi->cluster_mask)), + &res); + + if (err) { + return err; + } + + if (lcn == EOF_LCN) { + /* request out of file. */ + if (flags & IOMAP_REPORT) { + /* special code for report. */ + return -ENOENT; + } + + if (rw) { + /* should never be here. */ + return -EINVAL; + } + lcn = SPARSE_LCN; + } + + if (lcn == RESIDENT_LCN) { + if (offset >= clen) { + kfree(res); + if (flags & IOMAP_REPORT) { + /* special code for report. */ + return -ENOENT; + } + return -EFAULT; + } + + iomap->private = iomap->inline_data = res; + iomap->type = IOMAP_INLINE; + iomap->offset = 0; + iomap->length = clen; /* resident size in bytes. */ + iomap->flags = 0; + return 0; + } + + if (!clen) { + /* broken file? */ + return -EINVAL; + } + + if (lcn == COMPRESSED_LCN) { + /* should never be here. */ + return -EOPNOTSUPP; + } + + iomap->flags = new_clst ? IOMAP_F_NEW : 0; + iomap->bdev = inode->i_sb->s_bdev; + + /* Translate clusters into bytes. */ + iomap->offset = offset; + iomap->addr = ((loff_t)lcn << cluster_bits) + off; + iomap->length = ((loff_t)clen << cluster_bits) - off; + if (length && iomap->length > length) + iomap->length = length; + else + endbyte = offset + iomap->length; + + if (lcn == SPARSE_LCN) { + iomap->addr = IOMAP_NULL_ADDR; + iomap->type = IOMAP_HOLE; + } else if (endbyte <= ni->i_valid) { + iomap->type = IOMAP_MAPPED; + } else if (offset < ni->i_valid) { + iomap->type = IOMAP_MAPPED; + if (flags & IOMAP_REPORT) + iomap->length = ni->i_valid - offset; + } else if (rw || (flags & IOMAP_ZERO)) { + iomap->type = IOMAP_MAPPED; + } else { + iomap->type = IOMAP_UNWRITTEN; + } + + if ((flags & IOMAP_ZERO) && iomap->type == IOMAP_MAPPED) { + /* Avoid too large requests. */ + u32 tail; + u32 off_a = iomap->addr & (PAGE_SIZE - 1); + if (off_a) + tail = PAGE_SIZE - off_a; + else + tail = PAGE_SIZE; + + if (iomap->length > tail) + iomap->length = tail; + } + + return 0; +} + +static int ntfs_iomap_end(struct inode *inode, loff_t pos, loff_t length, + ssize_t written, unsigned int flags, + struct iomap *iomap) +{ + int err = 0; + struct ntfs_inode *ni = ntfs_i(inode); + loff_t endbyte = pos + written; + + if ((flags & IOMAP_WRITE) || (flags & IOMAP_ZERO)) { + if (iomap->type == IOMAP_INLINE) { + u32 data_size; + struct ATTRIB *attr; + struct mft_inode *mi; + + attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, + NULL, &mi); + if (!attr || attr->non_res) { + err = -EINVAL; + goto out; + } + + data_size = le32_to_cpu(attr->res.data_size); + if (!(pos < data_size && endbyte <= data_size)) { + err = -EINVAL; + goto out; + } + + /* Update resident data. */ + memcpy(resident_data(attr) + pos, + iomap_inline_data(iomap, pos), written); + mi->dirty = true; + ni->i_valid = data_size; + } else if (ni->i_valid < endbyte) { + ni->i_valid = endbyte; + mark_inode_dirty(inode); + } + } + + if ((flags & IOMAP_ZERO) && iomap->type == IOMAP_MAPPED) { + balance_dirty_pages_ratelimited(inode->i_mapping); + cond_resched(); + } + +out: + if (iomap->type == IOMAP_INLINE) { + kfree(iomap->private); + iomap->private = NULL; + } + + return err; +} + +/* + * write_begin + put_folio + write_end. + * iomap_zero_range + * iomap_truncate_page + * iomap_file_buffered_write + */ +static void ntfs_iomap_put_folio(struct inode *inode, loff_t pos, + unsigned int len, struct folio *folio) +{ + struct ntfs_inode *ni = ntfs_i(inode); + loff_t end = pos + len; + u32 f_size = folio_size(folio); + loff_t f_pos = folio_pos(folio); + loff_t f_end = f_pos + f_size; + + if (ni->i_valid < end && end < f_end) { + /* zero range [end - f_end). */ + /* The only thing ntfs_iomap_put_folio used for. */ + folio_zero_segment(folio, offset_in_folio(folio, end), f_size); + } + folio_unlock(folio); + folio_put(folio); +} + +static ssize_t ntfs_writeback_range(struct iomap_writepage_ctx *wpc, + struct folio *folio, u64 offset, + unsigned int len, u64 end_pos) +{ + struct iomap *iomap = &wpc->iomap; + struct inode *inode = wpc->inode; + + /* Check iomap position. */ + if (!(iomap->offset <= offset && + offset < iomap->offset + iomap->length)) { + int err; + struct ntfs_sb_info *sbi = ntfs_sb(inode->i_sb); + loff_t i_size_up = ntfs_up_cluster(sbi, inode->i_size); + loff_t len_max = i_size_up - offset; + + err = ntfs_iomap_begin(inode, offset, len_max, IOMAP_WRITE, + iomap, NULL); + if (err) { + ntfs_set_state(sbi, NTFS_DIRTY_DIRTY); + return err; + } + } + + return iomap_add_to_ioend(wpc, folio, offset, end_pos, len); +} + + +const struct iomap_writeback_ops ntfs_writeback_ops = { + .writeback_range = ntfs_writeback_range, + .writeback_submit = iomap_ioend_writeback_submit, +}; + static int ntfs_resident_writepage(struct folio *folio, struct writeback_control *wbc) { @@ -911,40 +1001,15 @@ static int ntfs_resident_writepage(struct folio *folio, static int ntfs_writepages(struct address_space *mapping, struct writeback_control *wbc) -{ - struct inode *inode = mapping->host; - - /* Avoid any operation if inode is bad. */ - if (unlikely(is_bad_ni(ntfs_i(inode)))) - return -EINVAL; - - if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) - return -EIO; - - if (is_resident(ntfs_i(inode))) { - struct folio *folio = NULL; - int error; - - while ((folio = writeback_iter(mapping, wbc, folio, &error))) - error = ntfs_resident_writepage(folio, wbc); - return error; - } - return mpage_writepages(mapping, wbc, ntfs_get_block); -} - -static int ntfs_get_block_write_begin(struct inode *inode, sector_t vbn, - struct buffer_head *bh_result, int create) -{ - return ntfs_get_block_vbo(inode, (u64)vbn << inode->i_blkbits, - bh_result, create, GET_BLOCK_WRITE_BEGIN); -} - -int ntfs_write_begin(const struct kiocb *iocb, struct address_space *mapping, - loff_t pos, u32 len, struct folio **foliop, void **fsdata) { int err; struct inode *inode = mapping->host; struct ntfs_inode *ni = ntfs_i(inode); + struct iomap_writepage_ctx wpc = { + .inode = mapping->host, + .wbc = wbc, + .ops = &ntfs_writeback_ops, + }; /* Avoid any operation if inode is bad. */ if (unlikely(is_bad_ni(ni))) @@ -954,100 +1019,15 @@ int ntfs_write_begin(const struct kiocb *iocb, struct address_space *mapping, return -EIO; if (is_resident(ni)) { - struct folio *folio = __filemap_get_folio( - mapping, pos >> PAGE_SHIFT, FGP_WRITEBEGIN, - mapping_gfp_mask(mapping)); + struct folio *folio; - if (IS_ERR(folio)) { - err = PTR_ERR(folio); - goto out; - } + while ((folio = writeback_iter(mapping, wbc, folio, &err))) + err = ntfs_resident_writepage(folio, wbc); - ni_lock(ni); - err = attr_data_read_resident(ni, folio); - ni_unlock(ni); - - if (!err) { - *foliop = folio; - goto out; - } - folio_unlock(folio); - folio_put(folio); - - if (err != E_NTFS_NONRESIDENT) - goto out; + return err; } - err = block_write_begin(mapping, pos, len, foliop, - ntfs_get_block_write_begin); - -out: - return err; -} - -/* - * ntfs_write_end - Address_space_operations::write_end. - */ -int ntfs_write_end(const struct kiocb *iocb, struct address_space *mapping, - loff_t pos, u32 len, u32 copied, struct folio *folio, - void *fsdata) -{ - struct inode *inode = mapping->host; - struct ntfs_inode *ni = ntfs_i(inode); - u64 valid = ni->i_valid; - bool dirty = false; - int err; - - if (is_resident(ni)) { - ni_lock(ni); - err = attr_data_write_resident(ni, folio); - ni_unlock(ni); - if (!err) { - struct buffer_head *head = folio_buffers(folio); - dirty = true; - /* Clear any buffers in folio. */ - if (head) { - struct buffer_head *bh = head; - - do { - clear_buffer_dirty(bh); - clear_buffer_mapped(bh); - set_buffer_uptodate(bh); - } while (head != (bh = bh->b_this_page)); - } - folio_mark_uptodate(folio); - err = copied; - } - folio_unlock(folio); - folio_put(folio); - } else { - err = generic_write_end(iocb, mapping, pos, len, copied, folio, - fsdata); - } - - if (err >= 0) { - if (!(ni->std_fa & FILE_ATTRIBUTE_ARCHIVE)) { - inode_set_mtime_to_ts(inode, - inode_set_ctime_current(inode)); - ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE; - dirty = true; - } - - if (valid != ni->i_valid) { - /* ni->i_valid is changed in ntfs_get_block_vbo. */ - dirty = true; - } - - if (pos + err > inode->i_size) { - i_size_write(inode, pos + err); - dirty = true; - } - - if (dirty) - mark_inode_dirty(inode); - } - - return err; + return iomap_writepages(&wpc); } int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc) @@ -1062,6 +1042,7 @@ int ntfs_sync_inode(struct inode *inode) /* * Helper function to read file. + * Used to read $AttrDef and $UpCase */ int inode_read_data(struct inode *inode, void *data, size_t bytes) { @@ -2107,18 +2088,26 @@ const struct address_space_operations ntfs_aops = { .read_folio = ntfs_read_folio, .readahead = ntfs_readahead, .writepages = ntfs_writepages, - .write_begin = ntfs_write_begin, - .write_end = ntfs_write_end, - .direct_IO = ntfs_direct_IO, .bmap = ntfs_bmap, - .dirty_folio = block_dirty_folio, - .migrate_folio = buffer_migrate_folio, - .invalidate_folio = block_invalidate_folio, + .dirty_folio = iomap_dirty_folio, + .migrate_folio = filemap_migrate_folio, + .release_folio = iomap_release_folio, + .invalidate_folio = iomap_invalidate_folio, }; const struct address_space_operations ntfs_aops_cmpr = { .read_folio = ntfs_read_folio, - .dirty_folio = block_dirty_folio, - .direct_IO = ntfs_direct_IO, + .dirty_folio = iomap_dirty_folio, + .release_folio = iomap_release_folio, + .invalidate_folio = iomap_invalidate_folio, +}; + +const struct iomap_ops ntfs_iomap_ops = { + .iomap_begin = ntfs_iomap_begin, + .iomap_end = ntfs_iomap_end, +}; + +const struct iomap_write_ops ntfs_iomap_folio_ops = { + .put_folio = ntfs_iomap_put_folio, }; // clang-format on diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 32823e1428a7..b7017dd4d7cd 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -442,8 +442,7 @@ int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type, u64 new_size, const u64 *new_valid, bool keep_prealloc, struct ATTRIB **ret); int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, - CLST *len, bool *new, bool zero); -int attr_data_read_resident(struct ntfs_inode *ni, struct folio *folio); + CLST *len, bool *new, bool zero, void **res); int attr_data_write_resident(struct ntfs_inode *ni, struct folio *folio); int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name, u8 name_len, struct runs_tree *run, @@ -568,8 +567,6 @@ enum REPARSE_SIGN ni_parse_reparse(struct ntfs_inode *ni, struct ATTRIB *attr, struct REPARSE_DATA_BUFFER *buffer); int ni_write_inode(struct inode *inode, int sync, const char *hint); #define _ni_write_inode(i, w) ni_write_inode(i, w, __func__) -int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, - __u64 vbo, __u64 len); int ni_read_folio_cmpr(struct ntfs_inode *ni, struct folio *folio); int ni_decompress_file(struct ntfs_inode *ni); int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, @@ -614,7 +611,7 @@ int ntfs_look_free_mft(struct ntfs_sb_info *sbi, CLST *rno, bool mft, void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno, bool is_mft); int ntfs_clear_mft_tail(struct ntfs_sb_info *sbi, size_t from, size_t to); int ntfs_refresh_zone(struct ntfs_sb_info *sbi); -void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait); +void ntfs_update_mftmirr(struct ntfs_sb_info *sbi); void ntfs_bad_inode(struct inode *inode, const char *hint); #define _ntfs_bad_inode(i) ntfs_bad_inode(i, __func__) enum NTFS_DIRTY_FLAGS { @@ -745,13 +742,6 @@ int indx_update_dup(struct ntfs_inode *ni, struct ntfs_sb_info *sbi, struct inode *ntfs_iget5(struct super_block *sb, const struct MFT_REF *ref, const struct cpu_str *name); int ntfs_set_size(struct inode *inode, u64 new_size); -int ntfs_get_block(struct inode *inode, sector_t vbn, - struct buffer_head *bh_result, int create); -int ntfs_write_begin(const struct kiocb *iocb, struct address_space *mapping, - loff_t pos, u32 len, struct folio **foliop, void **fsdata); -int ntfs_write_end(const struct kiocb *iocb, struct address_space *mapping, - loff_t pos, u32 len, u32 copied, struct folio *folio, - void *fsdata); int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc); int ntfs_sync_inode(struct inode *inode); int inode_read_data(struct inode *inode, void *data, size_t bytes); @@ -762,6 +752,8 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, int ntfs_link_inode(struct inode *inode, struct dentry *dentry); int ntfs_unlink_inode(struct inode *dir, const struct dentry *dentry); void ntfs_evict_inode(struct inode *inode); +extern const struct iomap_ops ntfs_iomap_ops; +extern const struct iomap_write_ops ntfs_iomap_folio_ops; extern const struct inode_operations ntfs_link_inode_operations; extern const struct address_space_operations ntfs_aops; extern const struct address_space_operations ntfs_aops_cmpr; diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index df65877f172c..5c104991b067 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -58,9 +58,9 @@ #include #include #include -#include #include #include +#include #include #include #include @@ -674,7 +674,7 @@ static noinline void ntfs3_put_sbi(struct ntfs_sb_info *sbi) sbi->volume.ni = NULL; } - ntfs_update_mftmirr(sbi, 0); + ntfs_update_mftmirr(sbi); indx_clear(&sbi->security.index_sii); indx_clear(&sbi->security.index_sdh); @@ -821,7 +821,12 @@ static int ntfs_sync_fs(struct super_block *sb, int wait) if (!err) ntfs_set_state(sbi, NTFS_DIRTY_CLEAR); - ntfs_update_mftmirr(sbi, wait); + ntfs_update_mftmirr(sbi); + + if (wait) { + sync_blockdev(sb->s_bdev); + blkdev_issue_flush(sb->s_bdev); + } return err; } From 08ce2fee1b869ecbfbd94e0eb2630e52203a2e03 Mon Sep 17 00:00:00 2001 From: Szymon Wilczek Date: Sat, 27 Dec 2025 15:43:07 +0100 Subject: [PATCH 17/26] ntfs3: fix circular locking dependency in run_unpack_ex Syzbot reported a circular locking dependency between wnd->rw_lock (sbi->used.bitmap) and ni->file.run_lock. The deadlock scenario: 1. ntfs_extend_mft() takes ni->file.run_lock then wnd->rw_lock. 2. run_unpack_ex() takes wnd->rw_lock then tries to acquire ni->file.run_lock inside ntfs_refresh_zone(). This creates an AB-BA deadlock. Fix this by using down_read_trylock() instead of down_read() when acquiring run_lock in run_unpack_ex(). If the lock is contended, skip ntfs_refresh_zone() - the MFT zone will be refreshed on the next MFT operation. This breaks the circular dependency since we never block waiting for run_lock while holding wnd->rw_lock. Reported-by: syzbot+d27edf9f96ae85939222@syzkaller.appspotmail.com Tested-by: syzbot+d27edf9f96ae85939222@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d27edf9f96ae85939222 Signed-off-by: Szymon Wilczek Signed-off-by: Konstantin Komarov --- fs/ntfs3/run.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c index 395b20492525..dc59cad4fa37 100644 --- a/fs/ntfs3/run.c +++ b/fs/ntfs3/run.c @@ -1131,11 +1131,14 @@ int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, struct rw_semaphore *lock = is_mounted(sbi) ? &sbi->mft.ni->file.run_lock : NULL; - if (lock) - down_read(lock); - ntfs_refresh_zone(sbi); - if (lock) - up_read(lock); + if (lock) { + if (down_read_trylock(lock)) { + ntfs_refresh_zone(sbi); + up_read(lock); + } + } else { + ntfs_refresh_zone(sbi); + } } up_write(&wnd->rw_lock); if (err) From 1dad2fff0261568e7fa6a1760619d88d0ef0aff3 Mon Sep 17 00:00:00 2001 From: sunliming Date: Wed, 7 Jan 2026 15:37:09 +0800 Subject: [PATCH 18/26] fs/ntfs3: make ntfs_writeback_ops static Fix below sparse warnings: fs/ntfs3/inode.c:972:34: sparse: sparse: symbol 'ntfs_writeback_ops' was not declared. Should it be static? Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202601061424.nbKLNwC5-lkp@intel.com/ Signed-off-by: sunliming Signed-off-by: Konstantin Komarov --- fs/ntfs3/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index b969ad7c3258..636aa77e20e4 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -969,7 +969,7 @@ static ssize_t ntfs_writeback_range(struct iomap_writepage_ctx *wpc, } -const struct iomap_writeback_ops ntfs_writeback_ops = { +static const struct iomap_writeback_ops ntfs_writeback_ops = { .writeback_range = ntfs_writeback_range, .writeback_submit = iomap_ioend_writeback_submit, }; From 27b75ca4e51e3e4554dc85dbf1a0246c66106fd3 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Sun, 28 Dec 2025 11:53:25 +0800 Subject: [PATCH 19/26] fs/ntfs3: prevent infinite loops caused by the next valid being the same When processing valid within the range [valid : pos), if valid cannot be retrieved correctly, for example, if the retrieved valid value is always the same, this can trigger a potential infinite loop, similar to the hung problem reported by syzbot [1]. Adding a check for the valid value within the loop body, and terminating the loop and returning -EINVAL if the value is the same as the current value, can prevent this. [1] INFO: task syz.4.21:6056 blocked for more than 143 seconds. Call Trace: rwbase_write_lock+0x14f/0x750 kernel/locking/rwbase_rt.c:244 inode_lock include/linux/fs.h:1027 [inline] ntfs_file_write_iter+0xe6/0x870 fs/ntfs3/file.c:1284 Fixes: 4342306f0f0d ("fs/ntfs3: Add file operations and implementation") Reported-by: syzbot+bcf9e1868c1a0c7e04f1@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=bcf9e1868c1a0c7e04f1 Signed-off-by: Edward Adam Davis Signed-off-by: Konstantin Komarov --- fs/ntfs3/file.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 58fa4da114bb..1be77f865d78 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -1016,8 +1016,12 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) goto out; if (lcn == SPARSE_LCN) { - ni->i_valid = valid = - frame_vbo + ((u64)clen << sbi->cluster_bits); + valid = frame_vbo + ((u64)clen << sbi->cluster_bits); + if (ni->i_valid == valid) { + err = -EINVAL; + goto out; + } + ni->i_valid = valid; continue; } From 6b3c83df9a0a61eb7a11beb1cef7ae5c2eb3efb6 Mon Sep 17 00:00:00 2001 From: Baolin Liu Date: Tue, 6 Jan 2026 14:34:25 +0800 Subject: [PATCH 20/26] ntfs3: Refactor duplicate kmemdup pattern in do_action() Extract the repeated pattern of duplicating attribute and updating OpenAttr into a helper function to reduce code duplication and improve maintainability. Signed-off-by: Baolin Liu Signed-off-by: Konstantin Komarov --- fs/ntfs3/fslog.c | 54 ++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 464d661d9694..4ea94d53a819 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -3030,6 +3030,26 @@ static struct ATTRIB *attr_create_nonres_log(struct ntfs_sb_info *sbi, return attr; } +/* + * update_oa_attr - Synchronize OpenAttr's attribute pointer with modified attribute + * @oa2: OpenAttr structure in memory that needs to be updated + * @attr: Modified attribute from MFT record to duplicate + * + * Returns true on success, false on allocation failure. + */ +static bool update_oa_attr(struct OpenAttr *oa2, struct ATTRIB *attr) +{ + void *p2; + + p2 = kmemdup(attr, le32_to_cpu(attr->size), GFP_NOFS); + if (p2) { + kfree(oa2->attr); + oa2->attr = p2; + return true; + } + return false; +} + /* * do_action - Common routine for the Redo and Undo Passes. * @rlsn: If it is NULL then undo. @@ -3253,15 +3273,8 @@ skip_load_parent: le16_add_cpu(&rec->hard_links, 1); oa2 = find_loaded_attr(log, attr, rno_base); - if (oa2) { - void *p2 = kmemdup(attr, le32_to_cpu(attr->size), - GFP_NOFS); - if (p2) { - // run_close(oa2->run1); - kfree(oa2->attr); - oa2->attr = p2; - } - } + if (oa2) + update_oa_attr(oa2, attr); mi->dirty = true; break; @@ -3320,16 +3333,8 @@ move_data: memmove(Add2Ptr(attr, aoff), data, dlen); oa2 = find_loaded_attr(log, attr, rno_base); - if (oa2) { - void *p2 = kmemdup(attr, le32_to_cpu(attr->size), - GFP_NOFS); - if (p2) { - // run_close(&oa2->run0); - oa2->run1 = &oa2->run0; - kfree(oa2->attr); - oa2->attr = p2; - } - } + if (oa2 && update_oa_attr(oa2, attr)) + oa2->run1 = &oa2->run0; mi->dirty = true; break; @@ -3379,14 +3384,9 @@ move_data: attr->nres.total_size = new_sz->total_size; oa2 = find_loaded_attr(log, attr, rno_base); - if (oa2) { - void *p2 = kmemdup(attr, le32_to_cpu(attr->size), - GFP_NOFS); - if (p2) { - kfree(oa2->attr); - oa2->attr = p2; - } - } + if (oa2) + update_oa_attr(oa2, attr); + mi->dirty = true; break; From ca1ceddfaa99e91da8b7ac24785b27d400b88309 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 5 Jan 2026 12:17:10 -0700 Subject: [PATCH 21/26] ntfs3: Restore NULL folio initialization in ntfs_writepages() Clang warns (or errors with CONFIG_WERROR=y): fs/ntfs3/inode.c:1021:6: error: variable 'folio' is used uninitialized whenever 'if' condition is true [-Werror,-Wsometimes-uninitialized] 1021 | if (is_resident(ni)) { | ^~~~~~~~~~~~~~~ fs/ntfs3/inode.c:1024:48: note: uninitialized use occurs here 1024 | while ((folio = writeback_iter(mapping, wbc, folio, &err))) | ^~~~~ folio should be initialized to NULL for the first iteration of writeback_iter() to start the loop properly. Restore the NULL initialization of folio that was lost in the recent iomap conversion to clear up the warning. Fixes: 099ef9a ("fs/ntfs3: implement iomap-based file operations") Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/oe-kbuild-all/202601010644.FIhOXy6Y-lkp@intel.com/ Closes: https://lore.kernel.org/r/202601010513.axd56bks-lkp@intel.com/ Signed-off-by: Nathan Chancellor [almaz.alexandrovich@paragon-software.com: added a few more tags] Signed-off-by: Konstantin Komarov --- fs/ntfs3/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 636aa77e20e4..2147fce8e0b2 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -1019,7 +1019,7 @@ static int ntfs_writepages(struct address_space *mapping, return -EIO; if (is_resident(ni)) { - struct folio *folio; + struct folio *folio = NULL; while ((folio = writeback_iter(mapping, wbc, folio, &err))) err = ntfs_resident_writepage(folio, wbc); From b2bc7c44ed1779fc9eaab9a186db0f0d01439622 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Sat, 17 Jan 2026 16:50:24 +0000 Subject: [PATCH 22/26] fs/ntfs3: Fix slab-out-of-bounds read in DeleteIndexEntryRoot In the 'DeleteIndexEntryRoot' case of the 'do_action' function, the entry size ('esize') is retrieved from the log record without adequate bounds checking. Specifically, the code calculates the end of the entry ('e2') using: e2 = Add2Ptr(e1, esize); It then calculates the size for memmove using 'PtrOffset(e2, ...)', which subtracts the end pointer from the buffer limit. If 'esize' is maliciously large, 'e2' exceeds the used buffer size. This results in a negative offset which, when cast to size_t for memmove, interprets as a massive unsigned integer, leading to a heap buffer overflow. This commit adds a check to ensure that the entry size ('esize') strictly fits within the remaining used space of the index header before performing memory operations. Fixes: b46acd6a6a62 ("fs/ntfs3: Add NTFS journal") Signed-off-by: Jiasheng Jiang Signed-off-by: Konstantin Komarov --- fs/ntfs3/fslog.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 4ea94d53a819..10863c83c315 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -3431,6 +3431,9 @@ move_data: e1 = Add2Ptr(attr, le16_to_cpu(lrh->attr_off)); esize = le16_to_cpu(e1->size); + if (PtrOffset(e1, Add2Ptr(hdr, used)) < esize) + goto dirty_vol; + e2 = Add2Ptr(e1, esize); memmove(e1, e2, PtrOffset(e2, Add2Ptr(hdr, used))); From 3c6248937fb9fe5cfb29aa8813e18c50095e4db7 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jan 2026 16:35:35 +0100 Subject: [PATCH 23/26] fs/ntfs3: allow explicit boolean acl/prealloc mount options This patch improves mount option parsing by allowing explicit boolean values for acl and prealloc. Previously those options were exposed only as presence/absence flags. Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 5c104991b067..4f423d3a248c 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -264,8 +264,10 @@ enum Opt { Opt_windows_names, Opt_showmeta, Opt_acl, + Opt_acl_bool, Opt_iocharset, Opt_prealloc, + Opt_prealloc_bool, Opt_nocase, Opt_err, }; @@ -285,9 +287,11 @@ static const struct fs_parameter_spec ntfs_fs_parameters[] = { fsparam_flag("hide_dot_files", Opt_hide_dot_files), fsparam_flag("windows_names", Opt_windows_names), fsparam_flag("showmeta", Opt_showmeta), - fsparam_flag_no("acl", Opt_acl), + fsparam_flag("acl", Opt_acl), + fsparam_bool("acl", Opt_acl_bool), fsparam_string("iocharset", Opt_iocharset), - fsparam_flag_no("prealloc", Opt_prealloc), + fsparam_flag("prealloc", Opt_prealloc), + fsparam_bool("prealloc", Opt_prealloc_bool), fsparam_flag("nocase", Opt_nocase), {} }; @@ -379,15 +383,16 @@ static int ntfs_fs_parse_param(struct fs_context *fc, case Opt_showmeta: opts->showmeta = 1; break; - case Opt_acl: - if (!result.negated) + case Opt_acl_bool: + if (result.boolean) { + case Opt_acl: #ifdef CONFIG_NTFS3_FS_POSIX_ACL fc->sb_flags |= SB_POSIXACL; #else return invalf( fc, "ntfs3: Support for ACL not compiled in!"); #endif - else + } else fc->sb_flags &= ~SB_POSIXACL; break; case Opt_iocharset: @@ -396,7 +401,10 @@ static int ntfs_fs_parse_param(struct fs_context *fc, param->string = NULL; break; case Opt_prealloc: - opts->prealloc = !result.negated; + opts->prealloc = 1; + break; + case Opt_prealloc_bool: + opts->prealloc = result.boolean; break; case Opt_nocase: opts->nocase = 1; From c1f221c1be6f641506d647297062ce5d21d03867 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 9 Feb 2026 10:51:38 +0100 Subject: [PATCH 24/26] fs/ntfs3: add fall-through between switch labels Add fall-through to fix the warning in ntfs_fs_parse_param(). Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202602041402.uojBz5QY-lkp@intel.com/ Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 4f423d3a248c..a3c07f2b604f 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -385,6 +385,7 @@ static int ntfs_fs_parse_param(struct fs_context *fc, break; case Opt_acl_bool: if (result.boolean) { + fallthrough; case Opt_acl: #ifdef CONFIG_NTFS3_FS_POSIX_ACL fc->sb_flags |= SB_POSIXACL; From c5226b96c08a010ebef5fdf4c90572bcd89e4299 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 9 Feb 2026 16:07:32 +0100 Subject: [PATCH 25/26] fs/ntfs3: avoid calling run_get_entry() when run == NULL in ntfs_read_run_nb_ra() When ntfs_read_run_nb_ra() is invoked with run == NULL the code later assumes run is valid and may call run_get_entry(NULL, ...), and also uses clen/idx without initializing them. Smatch reported uninitialized variable warnings and this can lead to undefined behaviour. This patch fixes it. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202512230646.v5hrYXL0-lkp@intel.com/ Signed-off-by: Konstantin Komarov --- fs/ntfs3/fsntfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index e9c39c62aea4..2ef500f1a9fa 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -1256,6 +1256,12 @@ int ntfs_read_run_nb_ra(struct ntfs_sb_info *sbi, const struct runs_tree *run, } while (len32); + if (!run) { + err = -EINVAL; + goto out; + } + + /* Get next fragment to read. */ vcn_next = vcn + clen; if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) || vcn != vcn_next) { From 10d7c95af043b45a85dc738c3271bf760ff3577e Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 16 Feb 2026 17:10:26 +0100 Subject: [PATCH 26/26] fs/ntfs3: add delayed-allocation (delalloc) support This patch implements delayed allocation (delalloc) in ntfs3 driver. It introduces an in-memory delayed-runlist (run_da) and the helpers to track, reserve and later convert those delayed reservations into real clusters at writeback time. The change keeps on-disk formats untouched and focuses on pagecache integration, correctness and safe interaction with fallocate, truncate, and dio/iomap paths. Key points: - add run_da (delay-allocated run tree) and bookkeeping for delayed clusters. - mark ranges as delalloc (DELALLOC_LCN) instead of immediately allocating. Actual allocation performed later (writeback / attr_set_size_ex / explicit flush paths). - direct i/o / iomap paths updated to avoid dio collisions with delalloc: dio falls back or forces allocation of delayed blocks before proceeding. - punch/collapse/truncate/fallocate check and cancel delay-alloc reservations. Sparse/compressed files handled specially. - free-space checks updated (ntfs_check_free_space) to account for reserved delalloc clusters and MFT record budgeting. - delayed allocations are committed on last writer (file release) and on explicit allocation flush paths. Tested-by: syzbot@syzkaller.appspotmail.com Reported-by: syzbot+2bd8e813c7f767aa9bb1@syzkaller.appspotmail.com Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrib.c | 333 ++++++++++++++++++++++++++++++++------------ fs/ntfs3/attrlist.c | 8 +- fs/ntfs3/file.c | 314 ++++++++++++++++++++++------------------- fs/ntfs3/frecord.c | 72 +++++++++- fs/ntfs3/fsntfs.c | 53 +++++-- fs/ntfs3/index.c | 23 ++- fs/ntfs3/inode.c | 161 ++++++++++++++------- fs/ntfs3/ntfs.h | 3 + fs/ntfs3/ntfs_fs.h | 91 ++++++++++-- fs/ntfs3/run.c | 150 ++++++++++++++++++-- fs/ntfs3/super.c | 28 +++- fs/ntfs3/xattr.c | 2 +- 12 files changed, 890 insertions(+), 348 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index aa745fb226f5..6cb9bc5d605c 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -91,7 +91,8 @@ static int attr_load_runs(struct ATTRIB *attr, struct ntfs_inode *ni, * run_deallocate_ex - Deallocate clusters. */ static int run_deallocate_ex(struct ntfs_sb_info *sbi, struct runs_tree *run, - CLST vcn, CLST len, CLST *done, bool trim) + CLST vcn, CLST len, CLST *done, bool trim, + struct runs_tree *run_da) { int err = 0; CLST vcn_next, vcn0 = vcn, lcn, clen, dn = 0; @@ -120,6 +121,16 @@ failed: if (sbi) { /* mark bitmap range [lcn + clen) as free and trim clusters. */ mark_as_free_ex(sbi, lcn, clen, trim); + + if (run_da) { + CLST da_len; + if (!run_remove_range(run_da, vcn, clen, + &da_len)) { + err = -ENOMEM; + goto failed; + } + ntfs_sub_da(sbi, da_len); + } } dn += clen; } @@ -147,9 +158,10 @@ out: * attr_allocate_clusters - Find free space, mark it as used and store in @run. */ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, - CLST vcn, CLST lcn, CLST len, CLST *pre_alloc, - enum ALLOCATE_OPT opt, CLST *alen, const size_t fr, - CLST *new_lcn, CLST *new_len) + struct runs_tree *run_da, CLST vcn, CLST lcn, + CLST len, CLST *pre_alloc, enum ALLOCATE_OPT opt, + CLST *alen, const size_t fr, CLST *new_lcn, + CLST *new_len) { int err; CLST flen, vcn0 = vcn, pre = pre_alloc ? *pre_alloc : 0; @@ -185,12 +197,21 @@ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, /* Add new fragment into run storage. */ if (!run_add_entry(run, vcn, lcn, flen, opt & ALLOCATE_MFT)) { +undo_alloc: /* Undo last 'ntfs_look_for_free_space' */ mark_as_free_ex(sbi, lcn, len, false); err = -ENOMEM; goto out; } + if (run_da) { + CLST da_len; + if (!run_remove_range(run_da, vcn, flen, &da_len)) { + goto undo_alloc; + } + ntfs_sub_da(sbi, da_len); + } + if (opt & ALLOCATE_ZERO) { u8 shift = sbi->cluster_bits - SECTOR_SHIFT; @@ -205,7 +226,7 @@ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, vcn += flen; if (flen >= len || (opt & ALLOCATE_MFT) || - (fr && run->count - cnt >= fr)) { + (opt & ALLOCATE_ONE_FR) || (fr && run->count - cnt >= fr)) { *alen = vcn - vcn0; return 0; } @@ -216,7 +237,8 @@ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, out: /* Undo 'ntfs_look_for_free_space' */ if (vcn - vcn0) { - run_deallocate_ex(sbi, run, vcn0, vcn - vcn0, NULL, false); + run_deallocate_ex(sbi, run, vcn0, vcn - vcn0, NULL, false, + run_da); run_truncate(run, vcn0); } @@ -281,7 +303,7 @@ int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr, } else { const char *data = resident_data(attr); - err = attr_allocate_clusters(sbi, run, 0, 0, len, NULL, + err = attr_allocate_clusters(sbi, run, NULL, 0, 0, len, NULL, ALLOCATE_DEF, &alen, 0, NULL, NULL); if (err) @@ -397,7 +419,7 @@ static int attr_set_size_res(struct ntfs_inode *ni, struct ATTRIB *attr, } /* - * attr_set_size - Change the size of attribute. + * attr_set_size_ex - Change the size of attribute. * * Extend: * - Sparse/compressed: No allocated clusters. @@ -405,24 +427,28 @@ static int attr_set_size_res(struct ntfs_inode *ni, struct ATTRIB *attr, * Shrink: * - No deallocate if @keep_prealloc is set. */ -int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type, - const __le16 *name, u8 name_len, struct runs_tree *run, - u64 new_size, const u64 *new_valid, bool keep_prealloc, - struct ATTRIB **ret) +int attr_set_size_ex(struct ntfs_inode *ni, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, struct runs_tree *run, + u64 new_size, const u64 *new_valid, bool keep_prealloc, + struct ATTRIB **ret, bool no_da) { int err = 0; struct ntfs_sb_info *sbi = ni->mi.sbi; u8 cluster_bits = sbi->cluster_bits; bool is_mft = ni->mi.rno == MFT_REC_MFT && type == ATTR_DATA && !name_len; - u64 old_valid, old_size, old_alloc, new_alloc, new_alloc_tmp; + u64 old_valid, old_size, old_alloc, new_alloc_tmp; + u64 new_alloc = 0; struct ATTRIB *attr = NULL, *attr_b; struct ATTR_LIST_ENTRY *le, *le_b; struct mft_inode *mi, *mi_b; CLST alen, vcn, lcn, new_alen, old_alen, svcn, evcn; CLST next_svcn, pre_alloc = -1, done = 0; - bool is_ext, is_bad = false; + bool is_ext = false, is_bad = false; bool dirty = false; + struct runs_tree *run_da = run == &ni->file.run ? &ni->file.run_da : + NULL; + bool da = !is_mft && sbi->options->delalloc && run_da && !no_da; u32 align; struct MFT_REC *rec; @@ -457,6 +483,7 @@ again: if (is_ext) { align <<= attr_b->nres.c_unit; keep_prealloc = false; + da = false; } old_valid = le64_to_cpu(attr_b->nres.valid_size); @@ -475,6 +502,37 @@ again_1: goto ok; } + if (da && + (vcn = old_alen + run_len(&ni->file.run_da), new_alen > vcn)) { + /* Resize up normal file. Delay new clusters allocation. */ + alen = new_alen - vcn; + + if (ntfs_check_free_space(sbi, alen, 0, true)) { + if (!run_add_entry(&ni->file.run_da, vcn, SPARSE_LCN, + alen, false)) { + err = -ENOMEM; + goto out; + } + + ntfs_add_da(sbi, alen); + goto ok1; + } + } + + if (!keep_prealloc && run_da && run_da->count && + (vcn = run_get_max_vcn(run_da), new_alen < vcn)) { + /* Shrink delayed clusters. */ + + /* Try to remove fragment from delay allocated run. */ + if (!run_remove_range(run_da, new_alen, vcn - new_alen, + &alen)) { + err = -ENOMEM; + goto out; + } + + ntfs_sub_da(sbi, alen); + } + vcn = old_alen - 1; svcn = le64_to_cpu(attr_b->nres.svcn); @@ -580,7 +638,8 @@ add_alloc_in_same_attr_seg: } else { /* ~3 bytes per fragment. */ err = attr_allocate_clusters( - sbi, run, vcn, lcn, to_allocate, &pre_alloc, + sbi, run, run_da, vcn, lcn, to_allocate, + &pre_alloc, is_mft ? ALLOCATE_MFT : ALLOCATE_DEF, &alen, is_mft ? 0 : (sbi->record_size - @@ -759,14 +818,14 @@ pack_runs: mi_b->dirty = dirty = true; err = run_deallocate_ex(sbi, run, vcn, evcn - vcn + 1, &dlen, - true); + true, run_da); if (err) goto out; if (is_ext) { /* dlen - really deallocated clusters. */ le64_sub_cpu(&attr_b->nres.total_size, - ((u64)dlen << cluster_bits)); + (u64)dlen << cluster_bits); } run_truncate(run, vcn); @@ -821,14 +880,14 @@ ok1: if (((type == ATTR_DATA && !name_len) || (type == ATTR_ALLOC && name == I30_NAME))) { /* Update inode_set_bytes. */ - if (attr_b->non_res) { - new_alloc = le64_to_cpu(attr_b->nres.alloc_size); - if (inode_get_bytes(&ni->vfs_inode) != new_alloc) { - inode_set_bytes(&ni->vfs_inode, new_alloc); - dirty = true; - } + if (attr_b->non_res && + inode_get_bytes(&ni->vfs_inode) != new_alloc) { + inode_set_bytes(&ni->vfs_inode, new_alloc); + dirty = true; } + i_size_write(&ni->vfs_inode, new_size); + /* Don't forget to update duplicate information in parent. */ if (dirty) { ni->ni_flags |= NI_FLAG_UPDATE_PARENT; @@ -869,7 +928,7 @@ restore_run: is_bad = true; undo_1: - run_deallocate_ex(sbi, run, vcn, alen, NULL, false); + run_deallocate_ex(sbi, run, vcn, alen, NULL, false, run_da); run_truncate(run, vcn); out: @@ -892,20 +951,9 @@ bad_inode: * - new allocated clusters are zeroed via blkdev_issue_zeroout. */ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, - CLST *len, bool *new, bool zero, void **res) + CLST *len, bool *new, bool zero, void **res, bool no_da) { - int err = 0; - struct runs_tree *run = &ni->file.run; - struct ntfs_sb_info *sbi; - u8 cluster_bits; - struct ATTRIB *attr, *attr_b; - struct ATTR_LIST_ENTRY *le, *le_b; - struct mft_inode *mi, *mi_b; - CLST hint, svcn, to_alloc, evcn1, next_svcn, asize, end, vcn0, alen; - CLST alloc, evcn; - unsigned fr; - u64 total_size, total_size0; - int step = 0; + int err; if (new) *new = false; @@ -914,23 +962,63 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, /* Try to find in cache. */ down_read(&ni->file.run_lock); - if (!run_lookup_entry(run, vcn, lcn, len, NULL)) + if (!no_da && run_lookup_entry(&ni->file.run_da, vcn, lcn, len, NULL)) { + /* The requested vcn is delay allocated. */ + *lcn = DELALLOC_LCN; + } else if (run_lookup_entry(&ni->file.run, vcn, lcn, len, NULL)) { + /* The requested vcn is known in current run. */ + } else { *len = 0; + } up_read(&ni->file.run_lock); if (*len && (*lcn != SPARSE_LCN || !new)) return 0; /* Fast normal way without allocation. */ /* No cluster in cache or we need to allocate cluster in hole. */ - sbi = ni->mi.sbi; - cluster_bits = sbi->cluster_bits; - ni_lock(ni); down_write(&ni->file.run_lock); - /* Repeat the code above (under write lock). */ - if (!run_lookup_entry(run, vcn, lcn, len, NULL)) + err = attr_data_get_block_locked(ni, vcn, clen, lcn, len, new, zero, + res, no_da); + + up_write(&ni->file.run_lock); + ni_unlock(ni); + + return err; +} + +/* + * attr_data_get_block_locked - Helper for attr_data_get_block. + */ +int attr_data_get_block_locked(struct ntfs_inode *ni, CLST vcn, CLST clen, + CLST *lcn, CLST *len, bool *new, bool zero, + void **res, bool no_da) +{ + int err = 0; + struct ntfs_sb_info *sbi = ni->mi.sbi; + struct runs_tree *run = &ni->file.run; + struct runs_tree *run_da = &ni->file.run_da; + bool da = sbi->options->delalloc && !no_da; + u8 cluster_bits; + struct ATTRIB *attr, *attr_b; + struct ATTR_LIST_ENTRY *le, *le_b; + struct mft_inode *mi, *mi_b; + CLST hint, svcn, to_alloc, evcn1, next_svcn, asize, end, vcn0; + CLST alloc, evcn; + unsigned fr; + u64 total_size, total_size0; + int step; + +again: + if (da && run_lookup_entry(run_da, vcn, lcn, len, NULL)) { + /* The requested vcn is delay allocated. */ + *lcn = DELALLOC_LCN; + } else if (run_lookup_entry(run, vcn, lcn, len, NULL)) { + /* The requested vcn is known in current run. */ + } else { *len = 0; + } if (*len) { if (*lcn != SPARSE_LCN || !new) @@ -939,6 +1027,9 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, clen = *len; } + cluster_bits = sbi->cluster_bits; + step = 0; + le_b = NULL; attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); if (!attr_b) { @@ -1061,11 +1152,38 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, if (err) goto out; } + da = false; /* no delalloc for compressed file. */ } if (vcn + to_alloc > asize) to_alloc = asize - vcn; + if (da) { + CLST rlen1, rlen2; + if (!ntfs_check_free_space(sbi, to_alloc, 0, true)) { + err = ni_allocate_da_blocks_locked(ni); + if (err) + goto out; + /* Layout of records may be changed. Start again without 'da'. */ + da = false; + goto again; + } + + /* run_add_entry consolidates existed ranges. */ + rlen1 = run_len(run_da); + if (!run_add_entry(run_da, vcn, SPARSE_LCN, to_alloc, false)) { + err = -ENOMEM; + goto out; + } + rlen2 = run_len(run_da); + + /* new added delay clusters = rlen2 - rlen1. */ + ntfs_add_da(sbi, rlen2 - rlen1); + *len = to_alloc; + *lcn = DELALLOC_LCN; + goto ok; + } + /* Get the last LCN to allocate from. */ hint = 0; @@ -1080,18 +1198,19 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, } /* Allocate and zeroout new clusters. */ - err = attr_allocate_clusters(sbi, run, vcn, hint + 1, to_alloc, NULL, - zero ? ALLOCATE_ZERO : ALLOCATE_DEF, &alen, - fr, lcn, len); + err = attr_allocate_clusters(sbi, run, run_da, vcn, hint + 1, to_alloc, + NULL, + zero ? ALLOCATE_ZERO : ALLOCATE_ONE_FR, + len, fr, lcn, len); if (err) goto out; *new = true; step = 1; - end = vcn + alen; + end = vcn + *len; /* Save 'total_size0' to restore if error. */ total_size0 = le64_to_cpu(attr_b->nres.total_size); - total_size = total_size0 + ((u64)alen << cluster_bits); + total_size = total_size0 + ((u64)*len << cluster_bits); if (vcn != vcn0) { if (!run_lookup_entry(run, vcn0, lcn, len, NULL)) { @@ -1157,7 +1276,7 @@ repack: * in 'ni_insert_nonresident'. * Return in advance -ENOSPC here if there are no free cluster and no free MFT. */ - if (!ntfs_check_for_free_space(sbi, 1, 1)) { + if (!ntfs_check_free_space(sbi, 1, 1, false)) { /* Undo step 1. */ err = -ENOSPC; goto undo1; @@ -1242,8 +1361,6 @@ out: /* Too complex to restore. */ _ntfs_bad_inode(&ni->vfs_inode); } - up_write(&ni->file.run_lock); - ni_unlock(ni); return err; @@ -1252,8 +1369,8 @@ undo1: attr_b->nres.total_size = cpu_to_le64(total_size0); inode_set_bytes(&ni->vfs_inode, total_size0); - if (run_deallocate_ex(sbi, run, vcn, alen, NULL, false) || - !run_add_entry(run, vcn, SPARSE_LCN, alen, false) || + if (run_deallocate_ex(sbi, run, vcn, *len, NULL, false, run_da) || + !run_add_entry(run, vcn, SPARSE_LCN, *len, false) || mi_pack_runs(mi, attr, run, max(end, evcn1) - svcn)) { _ntfs_bad_inode(&ni->vfs_inode); } @@ -1688,7 +1805,7 @@ int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size, if (len < clst_data) { err = run_deallocate_ex(sbi, run, vcn + len, clst_data - len, - NULL, true); + NULL, true, NULL); if (err) goto out; @@ -1708,7 +1825,7 @@ int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size, hint = -1; } - err = attr_allocate_clusters(sbi, run, vcn + clst_data, + err = attr_allocate_clusters(sbi, run, NULL, vcn + clst_data, hint + 1, len - clst_data, NULL, ALLOCATE_DEF, &alen, 0, NULL, NULL); @@ -1863,6 +1980,7 @@ int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) CLST vcn, end; u64 valid_size, data_size, alloc_size, total_size; u32 mask; + u64 i_size; __le16 a_flags; if (!bytes) @@ -1878,52 +1996,79 @@ int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) return 0; } - data_size = le64_to_cpu(attr_b->nres.data_size); - alloc_size = le64_to_cpu(attr_b->nres.alloc_size); - a_flags = attr_b->flags; - - if (is_attr_ext(attr_b)) { - total_size = le64_to_cpu(attr_b->nres.total_size); - mask = (sbi->cluster_size << attr_b->nres.c_unit) - 1; - } else { - total_size = alloc_size; - mask = sbi->cluster_mask; - } - - if ((vbo & mask) || (bytes & mask)) { + mask = is_attr_ext(attr_b) ? + ((sbi->cluster_size << attr_b->nres.c_unit) - 1) : + sbi->cluster_mask; + if ((vbo | bytes) & mask) { /* Allow to collapse only cluster aligned ranges. */ return -EINVAL; } - if (vbo > data_size) + /* i_size - size of file with delay allocated clusters. */ + i_size = ni->vfs_inode.i_size; + + if (vbo > i_size) return -EINVAL; down_write(&ni->file.run_lock); - if (vbo + bytes >= data_size) { - u64 new_valid = min(ni->i_valid, vbo); + if (vbo + bytes >= i_size) { + valid_size = min(ni->i_valid, vbo); /* Simple truncate file at 'vbo'. */ truncate_setsize(&ni->vfs_inode, vbo); err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, vbo, - &new_valid, true, NULL); + &valid_size, true); - if (!err && new_valid < ni->i_valid) - ni->i_valid = new_valid; + if (!err && valid_size < ni->i_valid) + ni->i_valid = valid_size; goto out; } - /* - * Enumerate all attribute segments and collapse. - */ - alen = alloc_size >> sbi->cluster_bits; vcn = vbo >> sbi->cluster_bits; len = bytes >> sbi->cluster_bits; end = vcn + len; dealloc = 0; done = 0; + /* + * Check delayed clusters. + */ + if (ni->file.run_da.count) { + struct runs_tree *run_da = &ni->file.run_da; + if (run_is_mapped_full(run_da, vcn, end - 1)) { + /* + * The requested range is full in delayed clusters. + */ + err = attr_set_size_ex(ni, ATTR_DATA, NULL, 0, run, + i_size - bytes, NULL, false, + NULL, true); + goto out; + } + + /* Collapse request crosses real and delayed clusters. */ + err = ni_allocate_da_blocks_locked(ni); + if (err) + goto out; + + /* Layout of records maybe changed. */ + le_b = NULL; + attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, + &mi_b); + if (!attr_b || !attr_b->non_res) { + err = -ENOENT; + goto out; + } + } + + data_size = le64_to_cpu(attr_b->nres.data_size); + alloc_size = le64_to_cpu(attr_b->nres.alloc_size); + total_size = is_attr_ext(attr_b) ? + le64_to_cpu(attr_b->nres.total_size) : + alloc_size; + alen = alloc_size >> sbi->cluster_bits; + a_flags = attr_b->flags; svcn = le64_to_cpu(attr_b->nres.svcn); evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1; @@ -1946,6 +2091,9 @@ int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) goto out; } + /* + * Enumerate all attribute segments and collapse. + */ for (;;) { CLST vcn1, eat, next_svcn; @@ -1973,13 +2121,13 @@ check_seg: vcn1 = vcn + done; /* original vcn in attr/run. */ eat = min(end, evcn1) - vcn1; - err = run_deallocate_ex(sbi, run, vcn1, eat, &dealloc, true); + err = run_deallocate_ex(sbi, run, vcn1, eat, &dealloc, true, + NULL); if (err) goto out; if (svcn + eat < evcn1) { /* Collapse a part of this attribute segment. */ - if (!run_collapse_range(run, vcn1, eat, done)) { err = -ENOMEM; goto out; @@ -2160,9 +2308,9 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size) bytes = alloc_size; bytes -= vbo; - if ((vbo & mask) || (bytes & mask)) { + if ((vbo | bytes) & mask) { /* We have to zero a range(s). */ - if (frame_size == NULL) { + if (!frame_size) { /* Caller insists range is aligned. */ return -EINVAL; } @@ -2221,7 +2369,8 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size) * Calculate how many clusters there are. * Don't do any destructive actions. */ - err = run_deallocate_ex(NULL, run, vcn1, zero, &hole2, false); + err = run_deallocate_ex(NULL, run, vcn1, zero, &hole2, false, + NULL); if (err) goto done; @@ -2259,7 +2408,8 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size) } /* Real deallocate. Should not fail. */ - run_deallocate_ex(sbi, &run2, vcn1, zero, &hole, true); + run_deallocate_ex(sbi, &run2, vcn1, zero, &hole, true, + &ni->file.run_da); next_attr: /* Free all allocated memory. */ @@ -2371,7 +2521,7 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) return -EINVAL; } - if ((vbo & mask) || (bytes & mask)) { + if ((vbo | bytes) & mask) { /* Allow to insert only frame aligned ranges. */ return -EINVAL; } @@ -2390,7 +2540,7 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) if (!attr_b->non_res) { err = attr_set_size(ni, ATTR_DATA, NULL, 0, run, - data_size + bytes, NULL, false, NULL); + data_size + bytes, NULL, false); le_b = NULL; attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, @@ -2413,7 +2563,7 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) goto done; } - /* Resident files becomes nonresident. */ + /* Resident file becomes nonresident. */ data_size = le64_to_cpu(attr_b->nres.data_size); alloc_size = le64_to_cpu(attr_b->nres.alloc_size); } @@ -2450,10 +2600,13 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) if (err) goto out; - if (!run_insert_range(run, vcn, len)) { - err = -ENOMEM; + err = run_insert_range(run, vcn, len); + if (err) + goto out; + + err = run_insert_range_da(&ni->file.run_da, vcn, len); + if (err) goto out; - } /* Try to pack in current record as much as possible. */ err = mi_pack_runs(mi, attr, run, evcn1 + len - svcn); diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c index 098bd7e8c3d6..270a29323530 100644 --- a/fs/ntfs3/attrlist.c +++ b/fs/ntfs3/attrlist.c @@ -345,8 +345,8 @@ int al_add_le(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name, le->id = id; memcpy(le->name, name, sizeof(short) * name_len); - err = attr_set_size(ni, ATTR_LIST, NULL, 0, &al->run, new_size, - &new_size, true, &attr); + err = attr_set_size_ex(ni, ATTR_LIST, NULL, 0, &al->run, new_size, + &new_size, true, &attr, false); if (err) { /* Undo memmove above. */ memmove(le, Add2Ptr(le, sz), old_size - off); @@ -404,8 +404,8 @@ int al_update(struct ntfs_inode *ni, int sync) * Attribute list increased on demand in al_add_le. * Attribute list decreased here. */ - err = attr_set_size(ni, ATTR_LIST, NULL, 0, &al->run, al->size, NULL, - false, &attr); + err = attr_set_size_ex(ni, ATTR_LIST, NULL, 0, &al->run, al->size, NULL, + false, &attr, false); if (err) goto out; diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 1be77f865d78..79e4c7a78c26 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -26,6 +26,38 @@ */ #define NTFS3_IOC_SHUTDOWN _IOR('X', 125, __u32) +/* + * Helper for ntfs_should_use_dio. + */ +static u32 ntfs_dio_alignment(struct inode *inode) +{ + struct ntfs_inode *ni = ntfs_i(inode); + + if (is_resident(ni)) { + /* Check delalloc. */ + if (!ni->file.run_da.count) + return 0; + } + + /* In most cases this is bdev_logical_block_size(bdev). */ + return ni->mi.sbi->bdev_blocksize; +} + +/* + * Returns %true if the given DIO request should be attempted with DIO, or + * %false if it should fall back to buffered I/O. + */ +static bool ntfs_should_use_dio(struct kiocb *iocb, struct iov_iter *iter) +{ + struct inode *inode = file_inode(iocb->ki_filp); + u32 dio_align = ntfs_dio_alignment(inode); + + if (!dio_align) + return false; + + return IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), dio_align); +} + static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg) { struct fstrim_range __user *user_range; @@ -186,10 +218,10 @@ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path, static int ntfs_extend_initialized_size(struct file *file, struct ntfs_inode *ni, - const loff_t valid, const loff_t new_valid) { struct inode *inode = &ni->vfs_inode; + const loff_t valid = ni->i_valid; int err; if (valid >= new_valid) @@ -200,8 +232,6 @@ static int ntfs_extend_initialized_size(struct file *file, return 0; } - WARN_ON(is_compressed(ni)); - err = iomap_zero_range(inode, valid, new_valid - valid, NULL, &ntfs_iomap_ops, &ntfs_iomap_folio_ops, NULL); if (err) { @@ -291,7 +321,7 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc) for (; vcn < end; vcn += len) { err = attr_data_get_block(ni, vcn, 1, &lcn, &len, &new, true, - NULL); + NULL, false); if (err) goto out; } @@ -302,8 +332,7 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc) err = -EAGAIN; goto out; } - err = ntfs_extend_initialized_size(file, ni, - ni->i_valid, to); + err = ntfs_extend_initialized_size(file, ni, to); inode_unlock(inode); if (err) goto out; @@ -333,55 +362,23 @@ static int ntfs_extend(struct inode *inode, loff_t pos, size_t count, ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_DIRTY); if (end > inode->i_size) { + /* + * Normal files: increase file size, allocate space. + * Sparse/Compressed: increase file size. No space allocated. + */ err = ntfs_set_size(inode, end); if (err) goto out; } if (extend_init && !is_compressed(ni)) { - err = ntfs_extend_initialized_size(file, ni, ni->i_valid, pos); + err = ntfs_extend_initialized_size(file, ni, pos); if (err) goto out; } else { err = 0; } - if (file && is_sparsed(ni)) { - /* - * This code optimizes large writes to sparse file. - * TODO: merge this fragment with fallocate fragment. - */ - struct ntfs_sb_info *sbi = ni->mi.sbi; - CLST vcn = pos >> sbi->cluster_bits; - CLST cend = bytes_to_cluster(sbi, end); - CLST cend_v = bytes_to_cluster(sbi, ni->i_valid); - CLST lcn, clen; - bool new; - - if (cend_v > cend) - cend_v = cend; - - /* - * Allocate and zero new clusters. - * Zeroing these clusters may be too long. - */ - for (; vcn < cend_v; vcn += clen) { - err = attr_data_get_block(ni, vcn, cend_v - vcn, &lcn, - &clen, &new, true, NULL); - if (err) - goto out; - } - /* - * Allocate but not zero new clusters. - */ - for (; vcn < cend; vcn += clen) { - err = attr_data_get_block(ni, vcn, cend - vcn, &lcn, - &clen, &new, false, NULL); - if (err) - goto out; - } - } - inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); mark_inode_dirty(inode); @@ -414,8 +411,9 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) ni_lock(ni); down_write(&ni->file.run_lock); - err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, - &new_valid, ni->mi.sbi->options->prealloc, NULL); + err = attr_set_size_ex(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, + &new_valid, ni->mi.sbi->options->prealloc, NULL, + false); up_write(&ni->file.run_lock); ni->i_valid = new_valid; @@ -507,7 +505,7 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) if (mode & FALLOC_FL_PUNCH_HOLE) { u32 frame_size; - loff_t mask, vbo_a, end_a, tmp; + loff_t mask, vbo_a, end_a, tmp, from; err = filemap_write_and_wait_range(mapping, vbo_down, LLONG_MAX); @@ -527,28 +525,24 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) /* Process not aligned punch. */ err = 0; + if (end > i_size) + end = i_size; mask = frame_size - 1; vbo_a = (vbo + mask) & ~mask; end_a = end & ~mask; tmp = min(vbo_a, end); - if (tmp > vbo) { - err = iomap_zero_range(inode, vbo, tmp - vbo, NULL, + from = min_t(loff_t, ni->i_valid, vbo); + /* Zero head of punch. */ + if (tmp > from) { + err = iomap_zero_range(inode, from, tmp - from, NULL, &ntfs_iomap_ops, &ntfs_iomap_folio_ops, NULL); if (err) goto out; } - if (vbo < end_a && end_a < end) { - err = iomap_zero_range(inode, end_a, end - end_a, NULL, - &ntfs_iomap_ops, - &ntfs_iomap_folio_ops, NULL); - if (err) - goto out; - } - - /* Aligned punch_hole */ + /* Aligned punch_hole. Deallocate clusters. */ if (end_a > vbo_a) { ni_lock(ni); err = attr_punch_hole(ni, vbo_a, end_a - vbo_a, NULL); @@ -556,6 +550,15 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) if (err) goto out; } + + /* Zero tail of punch. */ + if (vbo < end_a && end_a < end) { + err = iomap_zero_range(inode, end_a, end - end_a, NULL, + &ntfs_iomap_ops, + &ntfs_iomap_folio_ops, NULL); + if (err) + goto out; + } } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { /* * Write tail of the last page before removed range since @@ -653,17 +656,26 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) for (; vcn < cend_v; vcn += clen) { err = attr_data_get_block(ni, vcn, cend_v - vcn, &lcn, &clen, &new, - true, NULL); + true, NULL, false); if (err) goto out; } + + /* + * Moving up 'valid size'. + */ + err = ntfs_extend_initialized_size( + file, ni, (u64)cend_v << cluster_bits); + if (err) + goto out; + /* * Allocate but not zero new clusters. */ for (; vcn < cend; vcn += clen) { err = attr_data_get_block(ni, vcn, cend - vcn, &lcn, &clen, &new, - false, NULL); + false, NULL, false); if (err) goto out; } @@ -674,7 +686,7 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) /* True - Keep preallocated. */ err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, i_size, &ni->i_valid, - true, NULL); + true); ni_unlock(ni); if (err) goto out; @@ -816,6 +828,8 @@ static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) struct inode *inode = file_inode(file); struct ntfs_inode *ni = ntfs_i(inode); size_t bytes = iov_iter_count(iter); + loff_t valid, i_size, vbo, end; + unsigned int dio_flags; ssize_t err; err = check_read_restriction(inode); @@ -835,62 +849,63 @@ static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) file->f_ra.ra_pages = 0; } - /* Check minimum alignment for dio. */ - if ((iocb->ki_flags & IOCB_DIRECT) && - (is_resident(ni) || ((iocb->ki_pos | iov_iter_alignment(iter)) & - ni->mi.sbi->bdev_blocksize_mask))) { - /* Fallback to buffered I/O */ + /* Fallback to buffered I/O if the inode does not support direct I/O. */ + if (!(iocb->ki_flags & IOCB_DIRECT) || + !ntfs_should_use_dio(iocb, iter)) { iocb->ki_flags &= ~IOCB_DIRECT; + return generic_file_read_iter(iocb, iter); } - if (iocb->ki_flags & IOCB_DIRECT) { - loff_t valid, i_size; - loff_t vbo = iocb->ki_pos; - loff_t end = vbo + bytes; - unsigned int dio_flags = IOMAP_DIO_PARTIAL; - - if (iocb->ki_flags & IOCB_NOWAIT) { - if (!inode_trylock_shared(inode)) - return -EAGAIN; - } else { - inode_lock_shared(inode); - } - - valid = ni->i_valid; - i_size = inode->i_size; - - if (vbo < valid) { - if (valid < end) { - /* read cross 'valid' size. */ - dio_flags |= IOMAP_DIO_FORCE_WAIT; - } - - err = iomap_dio_rw(iocb, iter, &ntfs_iomap_ops, NULL, - dio_flags, NULL, 0); - - if (err > 0) { - end = vbo + err; - if (valid < end) { - size_t to_zero = end - valid; - /* Fix iter. */ - iov_iter_revert(iter, to_zero); - iov_iter_zero(to_zero, iter); - } - } - } else if (vbo < i_size) { - if (end > i_size) - bytes = i_size - vbo; - iov_iter_zero(bytes, iter); - iocb->ki_pos += bytes; - err = bytes; - } - - inode_unlock_shared(inode); - file_accessed(iocb->ki_filp); - return err; + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock_shared(inode)) + return -EAGAIN; + } else { + inode_lock_shared(inode); } - return generic_file_read_iter(iocb, iter); + vbo = iocb->ki_pos; + end = vbo + bytes; + dio_flags = 0; + valid = ni->i_valid; + i_size = inode->i_size; + + if (vbo < valid) { + if (valid < end) { + /* read cross 'valid' size. */ + dio_flags |= IOMAP_DIO_FORCE_WAIT; + } + + if (ni->file.run_da.count) { + /* Direct I/O is not compatible with delalloc. */ + err = ni_allocate_da_blocks(ni); + if (err) + goto out; + } + + err = iomap_dio_rw(iocb, iter, &ntfs_iomap_ops, NULL, dio_flags, + NULL, 0); + + if (err <= 0) + goto out; + end = vbo + err; + if (valid < end) { + size_t to_zero = end - valid; + /* Fix iter. */ + iov_iter_revert(iter, to_zero); + iov_iter_zero(to_zero, iter); + } + } else if (vbo < i_size) { + if (end > i_size) + bytes = i_size - vbo; + iov_iter_zero(bytes, iter); + iocb->ki_pos += bytes; + err = bytes; + } + +out: + inode_unlock_shared(inode); + file_accessed(iocb->ki_filp); + return err; } /* @@ -1011,17 +1026,13 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) off = valid & (frame_size - 1); err = attr_data_get_block(ni, frame << NTFS_LZNT_CUNIT, 1, &lcn, - &clen, NULL, false, NULL); + &clen, NULL, false, NULL, false); if (err) goto out; if (lcn == SPARSE_LCN) { - valid = frame_vbo + ((u64)clen << sbi->cluster_bits); - if (ni->i_valid == valid) { - err = -EINVAL; - goto out; - } - ni->i_valid = valid; + ni->i_valid = valid = + frame_vbo + ((u64)clen << sbi->cluster_bits); continue; } @@ -1207,6 +1218,9 @@ static int check_write_restriction(struct inode *inode) return -EOPNOTSUPP; } + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + return 0; } @@ -1218,8 +1232,6 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct ntfs_inode *ni = ntfs_i(inode); - struct super_block *sb = inode->i_sb; - struct ntfs_sb_info *sbi = sb->s_fs_info; ssize_t ret, err; if (!inode_trylock(inode)) { @@ -1263,15 +1275,11 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; } - /* Check minimum alignment for dio. */ - if ((iocb->ki_flags & IOCB_DIRECT) && - (is_resident(ni) || ((iocb->ki_pos | iov_iter_alignment(from)) & - sbi->bdev_blocksize_mask))) { - /* Fallback to buffered I/O */ + /* Fallback to buffered I/O if the inode does not support direct I/O. */ + if (!(iocb->ki_flags & IOCB_DIRECT) || + !ntfs_should_use_dio(iocb, from)) { iocb->ki_flags &= ~IOCB_DIRECT; - } - if (!(iocb->ki_flags & IOCB_DIRECT)) { ret = iomap_file_buffered_write(iocb, from, &ntfs_iomap_ops, &ntfs_iomap_folio_ops, NULL); inode_unlock(inode); @@ -1282,8 +1290,14 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) return ret; } - ret = iomap_dio_rw(iocb, from, &ntfs_iomap_ops, NULL, IOMAP_DIO_PARTIAL, - NULL, 0); + if (ni->file.run_da.count) { + /* Direct I/O is not compatible with delalloc. */ + ret = ni_allocate_da_blocks(ni); + if (ret) + goto out; + } + + ret = iomap_dio_rw(iocb, from, &ntfs_iomap_ops, NULL, 0, NULL, 0); if (ret == -ENOTBLK) { /* Returns -ENOTBLK in case of a page invalidation failure for writes.*/ @@ -1370,34 +1384,42 @@ int ntfs_file_open(struct inode *inode, struct file *file) /* * ntfs_file_release - file_operations::release + * + * Called when an inode is released. Note that this is different + * from ntfs_file_open: open gets called at every open, but release + * gets called only when /all/ the files are closed. */ static int ntfs_file_release(struct inode *inode, struct file *file) { - struct ntfs_inode *ni = ntfs_i(inode); - struct ntfs_sb_info *sbi = ni->mi.sbi; - int err = 0; + int err; + struct ntfs_inode *ni; - /* If we are last writer on the inode, drop the block reservation. */ - if (sbi->options->prealloc && - ((file->f_mode & FMODE_WRITE) && - atomic_read(&inode->i_writecount) == 1) - /* - * The only file when inode->i_fop = &ntfs_file_operations and - * init_rwsem(&ni->file.run_lock) is not called explicitly is MFT. - * - * Add additional check here. - */ - && inode->i_ino != MFT_REC_MFT) { + if (!(file->f_mode & FMODE_WRITE) || + atomic_read(&inode->i_writecount) != 1 || + inode->i_ino == MFT_REC_MFT) { + return 0; + } + + /* Close the last writer on the inode. */ + ni = ntfs_i(inode); + + /* Allocate delayed blocks (clusters). */ + err = ni_allocate_da_blocks(ni); + if (err) + goto out; + + if (ni->mi.sbi->options->prealloc) { ni_lock(ni); down_write(&ni->file.run_lock); + /* Deallocate preallocated. */ err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, - i_size_read(inode), &ni->i_valid, false, - NULL); + inode->i_size, &ni->i_valid, false); up_write(&ni->file.run_lock); ni_unlock(ni); } +out: return err; } @@ -1506,7 +1528,7 @@ static loff_t ntfs_llseek(struct file *file, loff_t offset, int whence) if (whence == SEEK_DATA || whence == SEEK_HOLE) { inode_lock_shared(inode); - /* Scan fragments for hole or data. */ + /* Scan file for hole or data. */ ret = ni_seek_data_or_hole(ni, offset, whence == SEEK_DATA); inode_unlock_shared(inode); diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 0dc28815331e..bd0fa481e4b3 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -123,6 +123,8 @@ void ni_clear(struct ntfs_inode *ni) indx_clear(&ni->dir); else { run_close(&ni->file.run); + ntfs_sub_da(ni->mi.sbi, run_len(&ni->file.run_da)); + run_close(&ni->file.run_da); #ifdef CONFIG_NTFS3_LZX_XPRESS if (ni->file.offs_folio) { /* On-demand allocated page for offsets. */ @@ -2014,7 +2016,8 @@ int ni_decompress_file(struct ntfs_inode *ni) for (vcn = vbo >> sbi->cluster_bits; vcn < end; vcn += clen) { err = attr_data_get_block(ni, vcn, cend - vcn, &lcn, - &clen, &new, false, NULL); + &clen, &new, false, NULL, + false); if (err) goto out; } @@ -2235,7 +2238,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, struct runs_tree *run = &ni->file.run; u64 valid_size = ni->i_valid; u64 vbo_disk; - size_t unc_size; + size_t unc_size = 0; u32 frame_size, i, ondisk_size; struct page *pg; struct ATTRIB *attr; @@ -2846,7 +2849,7 @@ loff_t ni_seek_data_or_hole(struct ntfs_inode *ni, loff_t offset, bool data) /* Enumerate all fragments. */ for (vcn = offset >> cluster_bits;; vcn += clen) { err = attr_data_get_block(ni, vcn, 1, &lcn, &clen, NULL, false, - NULL); + NULL, false); if (err) { return err; } @@ -2886,9 +2889,9 @@ loff_t ni_seek_data_or_hole(struct ntfs_inode *ni, loff_t offset, bool data) } } else { /* - * Adjust the file offset to the next hole in the file greater than or + * Adjust the file offset to the next hole in the file greater than or * equal to offset. If offset points into the middle of a hole, then the - * file offset is set to offset. If there is no hole past offset, then the + * file offset is set to offset. If there is no hole past offset, then the * file offset is adjusted to the end of the file * (i.e., there is an implicit hole at the end of any file). */ @@ -3235,3 +3238,62 @@ out: return 0; } + +/* + * Force to allocate all delay allocated clusters. + */ +int ni_allocate_da_blocks(struct ntfs_inode *ni) +{ + int err; + + ni_lock(ni); + down_write(&ni->file.run_lock); + + err = ni_allocate_da_blocks_locked(ni); + + up_write(&ni->file.run_lock); + ni_unlock(ni); + + return err; +} + +/* + * Force to allocate all delay allocated clusters. + */ +int ni_allocate_da_blocks_locked(struct ntfs_inode *ni) +{ + int err; + + if (!ni->file.run_da.count) + return 0; + + if (is_sparsed(ni)) { + CLST vcn, lcn, clen, alen; + bool new; + + /* + * Sparse file allocates clusters in 'attr_data_get_block_locked' + */ + while (run_get_entry(&ni->file.run_da, 0, &vcn, &lcn, &clen)) { + /* TODO: zero=true? */ + err = attr_data_get_block_locked(ni, vcn, clen, &lcn, + &alen, &new, true, + NULL, true); + if (err) + break; + if (!new) { + err = -EINVAL; + break; + } + } + } else { + /* + * Normal file allocates clusters in 'attr_set_size' + */ + err = attr_set_size_ex(ni, ATTR_DATA, NULL, 0, &ni->file.run, + ni->vfs_inode.i_size, &ni->i_valid, + false, NULL, true); + } + + return err; +} diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 2ef500f1a9fa..5f44e91d7997 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -445,36 +445,59 @@ up_write: } /* - * ntfs_check_for_free_space + * ntfs_check_free_space * * Check if it is possible to allocate 'clen' clusters and 'mlen' Mft records */ -bool ntfs_check_for_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen) +bool ntfs_check_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen, + bool da) { size_t free, zlen, avail; struct wnd_bitmap *wnd; + CLST da_clusters = ntfs_get_da(sbi); wnd = &sbi->used.bitmap; down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); free = wnd_zeroes(wnd); + + if (free >= da_clusters) { + free -= da_clusters; + } else { + free = 0; + } + zlen = min_t(size_t, NTFS_MIN_MFT_ZONE, wnd_zone_len(wnd)); up_read(&wnd->rw_lock); - if (free < zlen + clen) + if (free < zlen + clen) { return false; + } avail = free - (zlen + clen); - wnd = &sbi->mft.bitmap; - down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT); - free = wnd_zeroes(wnd); - zlen = wnd_zone_len(wnd); - up_read(&wnd->rw_lock); + /* + * When delalloc is active then keep in mind some reserved space. + * The worst case: 1 mft record per each ~500 clusters. + */ + if (da) { + /* 1 mft record per each 1024 clusters. */ + mlen += da_clusters >> 10; + } - if (free >= zlen + mlen) - return true; + if (mlen || !avail) { + wnd = &sbi->mft.bitmap; + down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT); + free = wnd_zeroes(wnd); + zlen = wnd_zone_len(wnd); + up_read(&wnd->rw_lock); - return avail >= bytes_to_cluster(sbi, mlen << sbi->record_bits); + if (free < zlen + mlen && + avail < bytes_to_cluster(sbi, mlen << sbi->record_bits)) { + return false; + } + } + + return true; } /* @@ -509,8 +532,8 @@ static int ntfs_extend_mft(struct ntfs_sb_info *sbi) /* Step 1: Resize $MFT::DATA. */ down_write(&ni->file.run_lock); - err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, - new_mft_bytes, NULL, false, &attr); + err = attr_set_size_ex(ni, ATTR_DATA, NULL, 0, &ni->file.run, + new_mft_bytes, NULL, false, &attr, false); if (err) { up_write(&ni->file.run_lock); @@ -525,7 +548,7 @@ static int ntfs_extend_mft(struct ntfs_sb_info *sbi) new_bitmap_bytes = ntfs3_bitmap_size(new_mft_total); err = attr_set_size(ni, ATTR_BITMAP, NULL, 0, &sbi->mft.bitmap.run, - new_bitmap_bytes, &new_bitmap_bytes, true, NULL); + new_bitmap_bytes, &new_bitmap_bytes, true); /* Refresh MFT Zone if necessary. */ down_write_nested(&sbi->used.bitmap.rw_lock, BITMAP_MUTEX_CLUSTERS); @@ -2191,7 +2214,7 @@ int ntfs_insert_security(struct ntfs_sb_info *sbi, if (new_sds_size > ni->vfs_inode.i_size) { err = attr_set_size(ni, ATTR_DATA, SDS_NAME, ARRAY_SIZE(SDS_NAME), &ni->file.run, - new_sds_size, &new_sds_size, false, NULL); + new_sds_size, &new_sds_size, false); if (err) goto out; } diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index d08bee3c20fa..2416c61050f1 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -1446,8 +1446,8 @@ static int indx_create_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, run_init(&run); - err = attr_allocate_clusters(sbi, &run, 0, 0, len, NULL, ALLOCATE_DEF, - &alen, 0, NULL, NULL); + err = attr_allocate_clusters(sbi, &run, NULL, 0, 0, len, NULL, + ALLOCATE_DEF, &alen, 0, NULL, NULL); if (err) goto out; @@ -1531,8 +1531,7 @@ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, /* Increase bitmap. */ err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, &indx->bitmap_run, - ntfs3_bitmap_size(bit + 1), NULL, true, - NULL); + ntfs3_bitmap_size(bit + 1), NULL, true); if (err) goto out1; } @@ -1553,8 +1552,7 @@ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, /* Increase allocation. */ err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, - &indx->alloc_run, data_size, &data_size, true, - NULL); + &indx->alloc_run, data_size, &data_size, true); if (err) { if (bmp) goto out2; @@ -1572,7 +1570,7 @@ out: out2: /* Ops. No space? */ attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, - &indx->bitmap_run, bmp_size, &bmp_size_v, false, NULL); + &indx->bitmap_run, bmp_size, &bmp_size_v, false); out1: return err; @@ -2106,7 +2104,7 @@ static int indx_shrink(struct ntfs_index *indx, struct ntfs_inode *ni, new_data = (u64)bit << indx->index_bits; err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, - &indx->alloc_run, new_data, &new_data, false, NULL); + &indx->alloc_run, new_data, &new_data, false); if (err) return err; @@ -2118,7 +2116,7 @@ static int indx_shrink(struct ntfs_index *indx, struct ntfs_inode *ni, return 0; err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, - &indx->bitmap_run, bpb, &bpb, false, NULL); + &indx->bitmap_run, bpb, &bpb, false); return err; } @@ -2333,6 +2331,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni, hdr = &root->ihdr; e = fnd->root_de; n = NULL; + ib = NULL; } e_size = le16_to_cpu(e->size); @@ -2355,7 +2354,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni, * Check to see if removing that entry made * the leaf empty. */ - if (ib_is_leaf(ib) && ib_is_empty(ib)) { + if (ib && ib_is_leaf(ib) && ib_is_empty(ib)) { fnd_pop(fnd); fnd_push(fnd2, n, e); } @@ -2603,7 +2602,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni, in = &s_index_names[indx->type]; err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, - &indx->alloc_run, 0, NULL, false, NULL); + &indx->alloc_run, 0, NULL, false); if (in->name == I30_NAME) i_size_write(&ni->vfs_inode, 0); @@ -2612,7 +2611,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni, run_close(&indx->alloc_run); err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, - &indx->bitmap_run, 0, NULL, false, NULL); + &indx->bitmap_run, 0, NULL, false); err = ni_remove_attr(ni, ATTR_BITMAP, in->name, in->name_len, false, NULL); run_close(&indx->bitmap_run); diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 2147fce8e0b2..aca774f1aed1 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -40,7 +40,7 @@ static struct inode *ntfs_read_mft(struct inode *inode, u32 rp_fa = 0, asize, t32; u16 roff, rsize, names = 0, links = 0; const struct ATTR_FILE_NAME *fname = NULL; - const struct INDEX_ROOT *root; + const struct INDEX_ROOT *root = NULL; struct REPARSE_DATA_BUFFER rp; // 0x18 bytes u64 t64; struct MFT_REC *rec; @@ -556,6 +556,25 @@ struct inode *ntfs_iget5(struct super_block *sb, const struct MFT_REF *ref, static sector_t ntfs_bmap(struct address_space *mapping, sector_t block) { + struct inode *inode = mapping->host; + struct ntfs_inode *ni = ntfs_i(inode); + + /* + * We can get here for an inline file via the FIBMAP ioctl + */ + if (is_resident(ni)) + return 0; + + if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && + !run_is_empty(&ni->file.run_da)) { + /* + * With delalloc data we want to sync the file so + * that we can make sure we allocate blocks for file and data + * is in place for the user to see it + */ + ni_allocate_da_blocks(ni); + } + return iomap_bmap(mapping, block, &ntfs_iomap_ops); } @@ -722,7 +741,7 @@ int ntfs_set_size(struct inode *inode, u64 new_size) down_write(&ni->file.run_lock); err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, - &ni->i_valid, true, NULL); + &ni->i_valid, true); if (!err) { i_size_write(inode, new_size); @@ -735,6 +754,10 @@ int ntfs_set_size(struct inode *inode, u64 new_size) return err; } +/* + * Special value to detect ntfs_writeback_range call + */ +#define WB_NO_DA (struct iomap *)1 /* * Function to get mapping vbo -> lbo. * used with: @@ -760,22 +783,40 @@ static int ntfs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, loff_t endbyte = offset + length; void *res = NULL; int err; - CLST lcn, clen, clen_max; + CLST lcn, clen, clen_max = 1; bool new_clst = false; + bool no_da; + bool zero = false; if (unlikely(ntfs3_forced_shutdown(sbi->sb))) return -EIO; - if ((flags & IOMAP_REPORT) && offset > ntfs_get_maxbytes(ni)) { - /* called from fiemap/bmap. */ - return -EINVAL; + if (flags & IOMAP_REPORT) { + if (offset > ntfs_get_maxbytes(ni)) { + /* called from fiemap/bmap. */ + return -EINVAL; + } + + if (offset >= inode->i_size) { + /* special code for report. */ + return -ENOENT; + } } - clen_max = rw ? (bytes_to_cluster(sbi, endbyte) - vcn) : 1; + if (IOMAP_ZERO == flags && (endbyte & sbi->cluster_mask)) { + rw = true; + } else if (rw) { + clen_max = bytes_to_cluster(sbi, endbyte) - vcn; + } - err = attr_data_get_block( - ni, vcn, clen_max, &lcn, &clen, rw ? &new_clst : NULL, - flags == IOMAP_WRITE && (off || (endbyte & sbi->cluster_mask)), - &res); + /* + * Force to allocate clusters if directIO(write) or writeback_range. + * NOTE: attr_data_get_block allocates clusters only for sparse file. + * Normal file allocates clusters in attr_set_size. + */ + no_da = flags == (IOMAP_DIRECT | IOMAP_WRITE) || srcmap == WB_NO_DA; + + err = attr_data_get_block(ni, vcn, clen_max, &lcn, &clen, + rw ? &new_clst : NULL, zero, &res, no_da); if (err) { return err; @@ -795,6 +836,8 @@ static int ntfs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, lcn = SPARSE_LCN; } + iomap->flags = new_clst ? IOMAP_F_NEW : 0; + if (lcn == RESIDENT_LCN) { if (offset >= clen) { kfree(res); @@ -809,7 +852,6 @@ static int ntfs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, iomap->type = IOMAP_INLINE; iomap->offset = 0; iomap->length = clen; /* resident size in bytes. */ - iomap->flags = 0; return 0; } @@ -818,42 +860,52 @@ static int ntfs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, return -EINVAL; } + iomap->bdev = inode->i_sb->s_bdev; + iomap->offset = offset; + iomap->length = ((loff_t)clen << cluster_bits) - off; + if (lcn == COMPRESSED_LCN) { /* should never be here. */ return -EOPNOTSUPP; } - iomap->flags = new_clst ? IOMAP_F_NEW : 0; - iomap->bdev = inode->i_sb->s_bdev; - - /* Translate clusters into bytes. */ - iomap->offset = offset; - iomap->addr = ((loff_t)lcn << cluster_bits) + off; - iomap->length = ((loff_t)clen << cluster_bits) - off; - if (length && iomap->length > length) - iomap->length = length; - else - endbyte = offset + iomap->length; - - if (lcn == SPARSE_LCN) { + if (lcn == DELALLOC_LCN) { + iomap->type = IOMAP_DELALLOC; iomap->addr = IOMAP_NULL_ADDR; - iomap->type = IOMAP_HOLE; - } else if (endbyte <= ni->i_valid) { - iomap->type = IOMAP_MAPPED; - } else if (offset < ni->i_valid) { - iomap->type = IOMAP_MAPPED; - if (flags & IOMAP_REPORT) - iomap->length = ni->i_valid - offset; - } else if (rw || (flags & IOMAP_ZERO)) { - iomap->type = IOMAP_MAPPED; } else { - iomap->type = IOMAP_UNWRITTEN; + + /* Translate clusters into bytes. */ + iomap->addr = ((loff_t)lcn << cluster_bits) + off; + if (length && iomap->length > length) + iomap->length = length; + else + endbyte = offset + iomap->length; + + if (lcn == SPARSE_LCN) { + iomap->addr = IOMAP_NULL_ADDR; + iomap->type = IOMAP_HOLE; + // if (IOMAP_ZERO == flags && !off) { + // iomap->length = (endbyte - offset) & + // sbi->cluster_mask_inv; + // } + } else if (endbyte <= ni->i_valid) { + iomap->type = IOMAP_MAPPED; + } else if (offset < ni->i_valid) { + iomap->type = IOMAP_MAPPED; + if (flags & IOMAP_REPORT) + iomap->length = ni->i_valid - offset; + } else if (rw || (flags & IOMAP_ZERO)) { + iomap->type = IOMAP_MAPPED; + } else { + iomap->type = IOMAP_UNWRITTEN; + } } - if ((flags & IOMAP_ZERO) && iomap->type == IOMAP_MAPPED) { + if ((flags & IOMAP_ZERO) && + (iomap->type == IOMAP_MAPPED || iomap->type == IOMAP_DELALLOC)) { /* Avoid too large requests. */ u32 tail; - u32 off_a = iomap->addr & (PAGE_SIZE - 1); + u32 off_a = offset & (PAGE_SIZE - 1); if (off_a) tail = PAGE_SIZE - off_a; else @@ -904,7 +956,9 @@ static int ntfs_iomap_end(struct inode *inode, loff_t pos, loff_t length, } } - if ((flags & IOMAP_ZERO) && iomap->type == IOMAP_MAPPED) { + if ((flags & IOMAP_ZERO) && + (iomap->type == IOMAP_MAPPED || iomap->type == IOMAP_DELALLOC)) { + /* Pair for code in ntfs_iomap_begin. */ balance_dirty_pages_ratelimited(inode->i_mapping); cond_resched(); } @@ -933,7 +987,7 @@ static void ntfs_iomap_put_folio(struct inode *inode, loff_t pos, loff_t f_pos = folio_pos(folio); loff_t f_end = f_pos + f_size; - if (ni->i_valid < end && end < f_end) { + if (ni->i_valid <= end && end < f_end) { /* zero range [end - f_end). */ /* The only thing ntfs_iomap_put_folio used for. */ folio_zero_segment(folio, offset_in_folio(folio, end), f_size); @@ -942,23 +996,31 @@ static void ntfs_iomap_put_folio(struct inode *inode, loff_t pos, folio_put(folio); } +/* + * iomap_writeback_ops::writeback_range + */ static ssize_t ntfs_writeback_range(struct iomap_writepage_ctx *wpc, struct folio *folio, u64 offset, unsigned int len, u64 end_pos) { struct iomap *iomap = &wpc->iomap; - struct inode *inode = wpc->inode; - /* Check iomap position. */ - if (!(iomap->offset <= offset && - offset < iomap->offset + iomap->length)) { + if (iomap->offset + iomap->length <= offset || offset < iomap->offset) { int err; + struct inode *inode = wpc->inode; + struct ntfs_inode *ni = ntfs_i(inode); struct ntfs_sb_info *sbi = ntfs_sb(inode->i_sb); loff_t i_size_up = ntfs_up_cluster(sbi, inode->i_size); loff_t len_max = i_size_up - offset; - err = ntfs_iomap_begin(inode, offset, len_max, IOMAP_WRITE, - iomap, NULL); + err = ni->file.run_da.count ? ni_allocate_da_blocks(ni) : 0; + + if (!err) { + /* Use local special value 'WB_NO_DA' to disable delalloc. */ + err = ntfs_iomap_begin(inode, offset, len_max, + IOMAP_WRITE, iomap, WB_NO_DA); + } + if (err) { ntfs_set_state(sbi, NTFS_DIRTY_DIRTY); return err; @@ -1532,9 +1594,10 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, attr->nres.alloc_size = cpu_to_le64(ntfs_up_cluster(sbi, nsize)); - err = attr_allocate_clusters(sbi, &ni->file.run, 0, 0, - clst, NULL, ALLOCATE_DEF, - &alen, 0, NULL, NULL); + err = attr_allocate_clusters(sbi, &ni->file.run, NULL, + 0, 0, clst, NULL, + ALLOCATE_DEF, &alen, 0, + NULL, NULL); if (err) goto out5; @@ -1675,7 +1738,7 @@ out6: /* Delete ATTR_EA, if non-resident. */ struct runs_tree run; run_init(&run); - attr_set_size(ni, ATTR_EA, NULL, 0, &run, 0, NULL, false, NULL); + attr_set_size(ni, ATTR_EA, NULL, 0, &run, 0, NULL, false); run_close(&run); } diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index ae0a6ba102c0..892f13e65d42 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -77,11 +77,14 @@ static_assert(sizeof(size_t) == 8); typedef u32 CLST; #endif +/* On-disk sparsed cluster is marked as -1. */ #define SPARSE_LCN64 ((u64)-1) #define SPARSE_LCN ((CLST)-1) +/* Below is virtual (not on-disk) values. */ #define RESIDENT_LCN ((CLST)-2) #define COMPRESSED_LCN ((CLST)-3) #define EOF_LCN ((CLST)-4) +#define DELALLOC_LCN ((CLST)-5) enum RECORD_NUM { MFT_REC_MFT = 0, diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index b7017dd4d7cd..a705923de75e 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -108,6 +108,7 @@ struct ntfs_mount_options { unsigned force : 1; /* RW mount dirty volume. */ unsigned prealloc : 1; /* Preallocate space when file is growing. */ unsigned nocase : 1; /* case insensitive. */ + unsigned delalloc : 1; /* delay allocation. */ }; /* Special value to unpack and deallocate. */ @@ -132,7 +133,8 @@ struct ntfs_buffers { enum ALLOCATE_OPT { ALLOCATE_DEF = 0, // Allocate all clusters. ALLOCATE_MFT = 1, // Allocate for MFT. - ALLOCATE_ZERO = 2, // Zeroout new allocated clusters + ALLOCATE_ZERO = 2, // Zeroout new allocated clusters. + ALLOCATE_ONE_FR = 4, // Allocate one fragment only. }; enum bitmap_mutex_classes { @@ -213,7 +215,7 @@ struct ntfs_sb_info { u32 discard_granularity; u64 discard_granularity_mask_inv; // ~(discard_granularity_mask_inv-1) - u32 bdev_blocksize_mask; // bdev_logical_block_size(bdev) - 1; + u32 bdev_blocksize; // bdev_logical_block_size(bdev) u32 cluster_size; // bytes per cluster u32 cluster_mask; // == cluster_size - 1 @@ -272,6 +274,12 @@ struct ntfs_sb_info { struct { struct wnd_bitmap bitmap; // $Bitmap::Data CLST next_free_lcn; + /* Total sum of delay allocated clusters in all files. */ +#ifdef CONFIG_NTFS3_64BIT_CLUSTER + atomic64_t da; +#else + atomic_t da; +#endif } used; struct { @@ -379,7 +387,7 @@ struct ntfs_inode { */ u8 mi_loaded; - /* + /* * Use this field to avoid any write(s). * If inode is bad during initialization - use make_bad_inode * If inode is bad during operations - use this field @@ -390,7 +398,14 @@ struct ntfs_inode { struct ntfs_index dir; struct { struct rw_semaphore run_lock; + /* Unpacked runs from just one record. */ struct runs_tree run; + /* + * Pairs [vcn, len] for all delay allocated clusters. + * Normal file always contains delayed clusters in one fragment. + * TODO: use 2 CLST per pair instead of 3. + */ + struct runs_tree run_da; #ifdef CONFIG_NTFS3_LZX_XPRESS struct folio *offs_folio; #endif @@ -430,19 +445,32 @@ enum REPARSE_SIGN { /* Functions from attrib.c */ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, - CLST vcn, CLST lcn, CLST len, CLST *pre_alloc, - enum ALLOCATE_OPT opt, CLST *alen, const size_t fr, - CLST *new_lcn, CLST *new_len); + struct runs_tree *run_da, CLST vcn, CLST lcn, + CLST len, CLST *pre_alloc, enum ALLOCATE_OPT opt, + CLST *alen, const size_t fr, CLST *new_lcn, + CLST *new_len); int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr, struct ATTR_LIST_ENTRY *le, struct mft_inode *mi, u64 new_size, struct runs_tree *run, struct ATTRIB **ins_attr, struct page *page); -int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type, - const __le16 *name, u8 name_len, struct runs_tree *run, - u64 new_size, const u64 *new_valid, bool keep_prealloc, - struct ATTRIB **ret); +int attr_set_size_ex(struct ntfs_inode *ni, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, struct runs_tree *run, + u64 new_size, const u64 *new_valid, bool keep_prealloc, + struct ATTRIB **ret, bool no_da); +static inline int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, + struct runs_tree *run, u64 new_size, + const u64 *new_valid, bool keep_prealloc) +{ + return attr_set_size_ex(ni, type, name, name_len, run, new_size, + new_valid, keep_prealloc, NULL, false); +} int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, - CLST *len, bool *new, bool zero, void **res); + CLST *len, bool *new, bool zero, void **res, + bool no_da); +int attr_data_get_block_locked(struct ntfs_inode *ni, CLST vcn, CLST clen, + CLST *lcn, CLST *len, bool *new, bool zero, + void **res, bool no_da); int attr_data_write_resident(struct ntfs_inode *ni, struct folio *folio); int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name, u8 name_len, struct runs_tree *run, @@ -590,6 +618,8 @@ int ni_rename(struct ntfs_inode *dir_ni, struct ntfs_inode *new_dir_ni, bool ni_is_dirty(struct inode *inode); loff_t ni_seek_data_or_hole(struct ntfs_inode *ni, loff_t offset, bool data); int ni_write_parents(struct ntfs_inode *ni, int sync); +int ni_allocate_da_blocks(struct ntfs_inode *ni); +int ni_allocate_da_blocks_locked(struct ntfs_inode *ni); /* Globals from fslog.c */ bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes); @@ -605,7 +635,8 @@ int ntfs_loadlog_and_replay(struct ntfs_inode *ni, struct ntfs_sb_info *sbi); int ntfs_look_for_free_space(struct ntfs_sb_info *sbi, CLST lcn, CLST len, CLST *new_lcn, CLST *new_len, enum ALLOCATE_OPT opt); -bool ntfs_check_for_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen); +bool ntfs_check_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen, + bool da); int ntfs_look_free_mft(struct ntfs_sb_info *sbi, CLST *rno, bool mft, struct ntfs_inode *ni, struct mft_inode **mi); void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno, bool is_mft); @@ -831,7 +862,8 @@ void run_truncate_around(struct runs_tree *run, CLST vcn); bool run_add_entry(struct runs_tree *run, CLST vcn, CLST lcn, CLST len, bool is_mft); bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len, CLST sub); -bool run_insert_range(struct runs_tree *run, CLST vcn, CLST len); +int run_insert_range(struct runs_tree *run, CLST vcn, CLST len); +int run_insert_range_da(struct runs_tree *run, CLST vcn, CLST len); bool run_get_entry(const struct runs_tree *run, size_t index, CLST *vcn, CLST *lcn, CLST *len); bool run_is_mapped_full(const struct runs_tree *run, CLST svcn, CLST evcn); @@ -851,6 +883,9 @@ int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, #endif int run_get_highest_vcn(CLST vcn, const u8 *run_buf, u64 *highest_vcn); int run_clone(const struct runs_tree *run, struct runs_tree *new_run); +bool run_remove_range(struct runs_tree *run, CLST vcn, CLST len, CLST *done); +CLST run_len(const struct runs_tree *run); +CLST run_get_max_vcn(const struct runs_tree *run); /* Globals from super.c */ void *ntfs_set_shared(void *ptr, u32 bytes); @@ -1027,6 +1062,36 @@ static inline int ntfs3_forced_shutdown(struct super_block *sb) return test_bit(NTFS_FLAGS_SHUTDOWN_BIT, &ntfs_sb(sb)->flags); } +/* Returns total sum of delay allocated clusters in all files. */ +static inline CLST ntfs_get_da(struct ntfs_sb_info *sbi) +{ +#ifdef CONFIG_NTFS3_64BIT_CLUSTER + return atomic64_read(&sbi->used.da); +#else + return atomic_read(&sbi->used.da); +#endif +} + +/* Update total count of delay allocated clusters. */ +static inline void ntfs_add_da(struct ntfs_sb_info *sbi, CLST da) +{ +#ifdef CONFIG_NTFS3_64BIT_CLUSTER + atomic64_add(da, &sbi->used.da); +#else + atomic_add(da, &sbi->used.da); +#endif +} + +/* Update total count of delay allocated clusters. */ +static inline void ntfs_sub_da(struct ntfs_sb_info *sbi, CLST da) +{ +#ifdef CONFIG_NTFS3_64BIT_CLUSTER + atomic64_sub(da, &sbi->used.da); +#else + atomic_sub(da, &sbi->used.da); +#endif +} + /* * ntfs_up_cluster - Align up on cluster boundary. */ diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c index dc59cad4fa37..c0324cdc174d 100644 --- a/fs/ntfs3/run.c +++ b/fs/ntfs3/run.c @@ -454,7 +454,7 @@ requires_new_range: /* * If existing range fits then were done. - * Otherwise extend found one and fall back to range jocode. + * Otherwise extend found one and fall back to range join code. */ if (r->vcn + r->len < vcn + len) r->len += len - ((r->vcn + r->len) - vcn); @@ -482,7 +482,8 @@ requires_new_range: return true; } -/* run_collapse_range +/* + * run_collapse_range * * Helper for attr_collapse_range(), * which is helper for fallocate(collapse_range). @@ -493,8 +494,9 @@ bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len, CLST sub) struct ntfs_run *r, *e, *eat_start, *eat_end; CLST end; - if (WARN_ON(!run_lookup(run, vcn, &index))) - return true; /* Should never be here. */ + if (!run_lookup(run, vcn, &index) && index >= run->count) { + return true; + } e = run->runs + run->count; r = run->runs + index; @@ -560,13 +562,13 @@ bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len, CLST sub) * Helper for attr_insert_range(), * which is helper for fallocate(insert_range). */ -bool run_insert_range(struct runs_tree *run, CLST vcn, CLST len) +int run_insert_range(struct runs_tree *run, CLST vcn, CLST len) { size_t index; struct ntfs_run *r, *e; if (WARN_ON(!run_lookup(run, vcn, &index))) - return false; /* Should never be here. */ + return -EINVAL; /* Should never be here. */ e = run->runs + run->count; r = run->runs + index; @@ -588,13 +590,49 @@ bool run_insert_range(struct runs_tree *run, CLST vcn, CLST len) r->len = len1; if (!run_add_entry(run, vcn + len, lcn2, len2, false)) - return false; + return -ENOMEM; } if (!run_add_entry(run, vcn, SPARSE_LCN, len, false)) - return false; + return -ENOMEM; - return true; + return 0; +} + +/* run_insert_range_da + * + * Helper for attr_insert_range(), + * which is helper for fallocate(insert_range). + */ +int run_insert_range_da(struct runs_tree *run, CLST vcn, CLST len) +{ + struct ntfs_run *r, *r0 = NULL, *e = run->runs + run->count; + ; + + for (r = run->runs; r < e; r++) { + CLST end = r->vcn + r->len; + + if (vcn >= end) + continue; + + if (!r0 && r->vcn < vcn) { + r0 = r; + } else { + r->vcn += len; + } + } + + if (r0) { + /* split fragment. */ + CLST len1 = vcn - r0->vcn; + CLST len2 = r0->len - len1; + + r0->len = len1; + if (!run_add_entry(run, vcn + len, SPARSE_LCN, len2, false)) + return -ENOMEM; + } + + return 0; } /* @@ -1209,3 +1247,97 @@ int run_clone(const struct runs_tree *run, struct runs_tree *new_run) new_run->count = run->count; return 0; } + +/* + * run_remove_range + * + */ +bool run_remove_range(struct runs_tree *run, CLST vcn, CLST len, CLST *done) +{ + size_t index, eat; + struct ntfs_run *r, *e, *eat_start, *eat_end; + CLST end, d; + + *done = 0; + + /* Fast check. */ + if (!run->count) + return true; + + if (!run_lookup(run, vcn, &index) && index >= run->count) { + /* No entries in this run. */ + return true; + } + + + e = run->runs + run->count; + r = run->runs + index; + end = vcn + len; + + if (vcn > r->vcn) { + CLST r_end = r->vcn + r->len; + d = vcn - r->vcn; + + if (r_end > end) { + /* Remove a middle part, split. */ + *done += len; + r->len = d; + return run_add_entry(run, end, r->lcn, r_end - end, + false); + } + /* Remove tail of run .*/ + *done += r->len - d; + r->len = d; + r += 1; + } + + eat_start = r; + eat_end = r; + + for (; r < e; r++) { + if (r->vcn >= end) + continue; + + if (r->vcn + r->len <= end) { + /* Eat this run. */ + *done += r->len; + eat_end = r + 1; + continue; + } + + d = end - r->vcn; + *done += d; + if (r->lcn != SPARSE_LCN) + r->lcn += d; + r->len -= d; + r->vcn = end; + } + + eat = eat_end - eat_start; + memmove(eat_start, eat_end, (e - eat_end) * sizeof(*r)); + run->count -= eat; + + return true; +} + +CLST run_len(const struct runs_tree *run) +{ + const struct ntfs_run *r, *e; + CLST len = 0; + + for (r = run->runs, e = r + run->count; r < e; r++) { + len += r->len; + } + + return len; +} + +CLST run_get_max_vcn(const struct runs_tree *run) +{ + const struct ntfs_run *r; + if (!run->count) + return 0; + + r = run->runs + run->count - 1; + return r->vcn + r->len; +} diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index a3c07f2b604f..27411203082a 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -269,6 +269,8 @@ enum Opt { Opt_prealloc, Opt_prealloc_bool, Opt_nocase, + Opt_delalloc, + Opt_delalloc_bool, Opt_err, }; @@ -293,6 +295,8 @@ static const struct fs_parameter_spec ntfs_fs_parameters[] = { fsparam_flag("prealloc", Opt_prealloc), fsparam_bool("prealloc", Opt_prealloc_bool), fsparam_flag("nocase", Opt_nocase), + fsparam_flag("delalloc", Opt_delalloc), + fsparam_bool("delalloc", Opt_delalloc_bool), {} }; // clang-format on @@ -410,6 +414,12 @@ static int ntfs_fs_parse_param(struct fs_context *fc, case Opt_nocase: opts->nocase = 1; break; + case Opt_delalloc: + opts->delalloc = 1; + break; + case Opt_delalloc_bool: + opts->delalloc = result.boolean; + break; default: /* Should not be here unless we forget add case. */ return -EINVAL; @@ -726,14 +736,22 @@ static int ntfs_statfs(struct dentry *dentry, struct kstatfs *buf) struct super_block *sb = dentry->d_sb; struct ntfs_sb_info *sbi = sb->s_fs_info; struct wnd_bitmap *wnd = &sbi->used.bitmap; + CLST da_clusters = ntfs_get_da(sbi); buf->f_type = sb->s_magic; - buf->f_bsize = sbi->cluster_size; + buf->f_bsize = buf->f_frsize = sbi->cluster_size; buf->f_blocks = wnd->nbits; - buf->f_bfree = buf->f_bavail = wnd_zeroes(wnd); + buf->f_bfree = wnd_zeroes(wnd); + if (buf->f_bfree > da_clusters) { + buf->f_bfree -= da_clusters; + } else { + buf->f_bfree = 0; + } + buf->f_bavail = buf->f_bfree; + buf->f_fsid.val[0] = sbi->volume.ser_num; - buf->f_fsid.val[1] = (sbi->volume.ser_num >> 32); + buf->f_fsid.val[1] = sbi->volume.ser_num >> 32; buf->f_namelen = NTFS_NAME_LEN; return 0; @@ -778,6 +796,8 @@ static int ntfs_show_options(struct seq_file *m, struct dentry *root) seq_puts(m, ",prealloc"); if (opts->nocase) seq_puts(m, ",nocase"); + if (opts->delalloc) + seq_puts(m, ",delalloc"); return 0; } @@ -1088,7 +1108,7 @@ read_boot: dev_size += sector_size - 1; } - sbi->bdev_blocksize_mask = max(boot_sector_size, sector_size) - 1; + sbi->bdev_blocksize = max(boot_sector_size, sector_size); sbi->mft.lbo = mlcn << cluster_bits; sbi->mft.lbo2 = mlcn2 << cluster_bits; diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index c93df55e98d0..2302539852ef 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -460,7 +460,7 @@ update_ea: new_sz = size; err = attr_set_size(ni, ATTR_EA, NULL, 0, &ea_run, new_sz, &new_sz, - false, NULL); + false); if (err) goto out;