A fix for a race condition around r_parent tracking that took a long

time to track down from Alex and some fixes for potential crashes on
 accessing invalid memory from Max and myself.  All marked for stable.
 -----BEGIN PGP SIGNATURE-----
 
 iQFHBAABCAAxFiEEydHwtzie9C7TfviiSn/eOAIR84sFAmjFk5wTHGlkcnlvbW92
 QGdtYWlsLmNvbQAKCRBKf944AhHzi/b1B/9Wm5Sb1Gja4BK04xrtQ76VWAquiPp+
 CSB685OnFPr3437sIe06ZDZjVnqqLyYVFBmUlVOlkSfYaR4uHtznpvdvhVlerN+y
 UaP/qHp5vacEf3n+MWCUXnDTqau65J+wDri64HNOWyl3UvZ/aaxz9UdZROqNQUMj
 H1OdYgGr1n4DLT/iBntr8t9GAuZ5/bjwXWJGZ7xkaO4hjth6fAzw45BpX4Iuoox9
 3ZPBJjz5p1SiXtREDL1eiyipE5o7Fg+R0QIIH6tQaH8KORyZYZDjdj2jyPg2+Wq2
 N1vFP6IxRZyZ3hapK3ZCQjnAM26lZJ+aaWvFjT45VdLz88HJxvDDV/1k
 =c7ou
 -----END PGP SIGNATURE-----

Merge tag 'ceph-for-6.17-rc6' of https://github.com/ceph/ceph-client

Pull ceph fixes from Ilya Dryomov:
 "A fix for a race condition around r_parent tracking that took a long
  time to track down from Alex and some fixes for potential crashes on
  accessing invalid memory from Max and myself.

  All marked for stable"

* tag 'ceph-for-6.17-rc6' of https://github.com/ceph/ceph-client:
  libceph: fix invalid accesses to ceph_connection_v1_info
  ceph: fix crash after fscrypt_encrypt_pagecache_blocks() error
  ceph: always call ceph_shift_unused_folios_left()
  ceph: fix race condition where r_parent becomes stale before sending message
  ceph: fix race condition validating r_parent before applying state
This commit is contained in:
Linus Torvalds 2025-09-13 10:45:11 -07:00
commit 5cd64d4f92
8 changed files with 223 additions and 126 deletions

View file

@ -1264,7 +1264,9 @@ static inline int move_dirty_folio_in_page_array(struct address_space *mapping,
0,
gfp_flags);
if (IS_ERR(pages[index])) {
if (PTR_ERR(pages[index]) == -EINVAL) {
int err = PTR_ERR(pages[index]);
if (err == -EINVAL) {
pr_err_client(cl, "inode->i_blkbits=%hhu\n",
inode->i_blkbits);
}
@ -1273,7 +1275,7 @@ static inline int move_dirty_folio_in_page_array(struct address_space *mapping,
BUG_ON(ceph_wbc->locked_pages == 0);
pages[index] = NULL;
return PTR_ERR(pages[index]);
return err;
}
} else {
pages[index] = &folio->page;
@ -1687,6 +1689,7 @@ get_more_pages:
process_folio_batch:
rc = ceph_process_folio_batch(mapping, wbc, &ceph_wbc);
ceph_shift_unused_folios_left(&ceph_wbc.fbatch);
if (rc)
goto release_folios;
@ -1695,8 +1698,6 @@ process_folio_batch:
goto release_folios;
if (ceph_wbc.processed_in_fbatch) {
ceph_shift_unused_folios_left(&ceph_wbc.fbatch);
if (folio_batch_count(&ceph_wbc.fbatch) == 0 &&
ceph_wbc.locked_pages < ceph_wbc.max_pages) {
doutc(cl, "reached end fbatch, trying for more\n");

View file

@ -55,8 +55,6 @@ static int mdsc_show(struct seq_file *s, void *p)
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
struct rb_node *rp;
int pathlen = 0;
u64 pathbase;
char *path;
mutex_lock(&mdsc->mutex);
@ -81,8 +79,8 @@ static int mdsc_show(struct seq_file *s, void *p)
if (req->r_inode) {
seq_printf(s, " #%llx", ceph_ino(req->r_inode));
} else if (req->r_dentry) {
path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen,
&pathbase, 0);
struct ceph_path_info path_info;
path = ceph_mdsc_build_path(mdsc, req->r_dentry, &path_info, 0);
if (IS_ERR(path))
path = NULL;
spin_lock(&req->r_dentry->d_lock);
@ -91,7 +89,7 @@ static int mdsc_show(struct seq_file *s, void *p)
req->r_dentry,
path ? path : "");
spin_unlock(&req->r_dentry->d_lock);
ceph_mdsc_free_path(path, pathlen);
ceph_mdsc_free_path_info(&path_info);
} else if (req->r_path1) {
seq_printf(s, " #%llx/%s", req->r_ino1.ino,
req->r_path1);
@ -100,8 +98,8 @@ static int mdsc_show(struct seq_file *s, void *p)
}
if (req->r_old_dentry) {
path = ceph_mdsc_build_path(mdsc, req->r_old_dentry, &pathlen,
&pathbase, 0);
struct ceph_path_info path_info;
path = ceph_mdsc_build_path(mdsc, req->r_old_dentry, &path_info, 0);
if (IS_ERR(path))
path = NULL;
spin_lock(&req->r_old_dentry->d_lock);
@ -111,7 +109,7 @@ static int mdsc_show(struct seq_file *s, void *p)
req->r_old_dentry,
path ? path : "");
spin_unlock(&req->r_old_dentry->d_lock);
ceph_mdsc_free_path(path, pathlen);
ceph_mdsc_free_path_info(&path_info);
} else if (req->r_path2 && req->r_op != CEPH_MDS_OP_SYMLINK) {
if (req->r_ino2.ino)
seq_printf(s, " #%llx/%s", req->r_ino2.ino,

View file

@ -1271,10 +1271,8 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
/* If op failed, mark everyone involved for errors */
if (result) {
int pathlen = 0;
u64 base = 0;
char *path = ceph_mdsc_build_path(mdsc, dentry, &pathlen,
&base, 0);
struct ceph_path_info path_info = {0};
char *path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0);
/* mark error on parent + clear complete */
mapping_set_error(req->r_parent->i_mapping, result);
@ -1288,8 +1286,8 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
mapping_set_error(req->r_old_inode->i_mapping, result);
pr_warn_client(cl, "failure path=(%llx)%s result=%d!\n",
base, IS_ERR(path) ? "<<bad>>" : path, result);
ceph_mdsc_free_path(path, pathlen);
path_info.vino.ino, IS_ERR(path) ? "<<bad>>" : path, result);
ceph_mdsc_free_path_info(&path_info);
}
out:
iput(req->r_old_inode);
@ -1347,8 +1345,6 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
int err = -EROFS;
int op;
char *path;
int pathlen;
u64 pathbase;
if (ceph_snap(dir) == CEPH_SNAPDIR) {
/* rmdir .snap/foo is RMSNAP */
@ -1367,14 +1363,15 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
if (!dn) {
try_async = false;
} else {
path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0);
struct ceph_path_info path_info;
path = ceph_mdsc_build_path(mdsc, dn, &path_info, 0);
if (IS_ERR(path)) {
try_async = false;
err = 0;
} else {
err = ceph_mds_check_access(mdsc, path, MAY_WRITE);
}
ceph_mdsc_free_path(path, pathlen);
ceph_mdsc_free_path_info(&path_info);
dput(dn);
/* For none EACCES cases will let the MDS do the mds auth check */

View file

@ -368,8 +368,6 @@ int ceph_open(struct inode *inode, struct file *file)
int flags, fmode, wanted;
struct dentry *dentry;
char *path;
int pathlen;
u64 pathbase;
bool do_sync = false;
int mask = MAY_READ;
@ -399,14 +397,15 @@ int ceph_open(struct inode *inode, struct file *file)
if (!dentry) {
do_sync = true;
} else {
path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0);
struct ceph_path_info path_info;
path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0);
if (IS_ERR(path)) {
do_sync = true;
err = 0;
} else {
err = ceph_mds_check_access(mdsc, path, mask);
}
ceph_mdsc_free_path(path, pathlen);
ceph_mdsc_free_path_info(&path_info);
dput(dentry);
/* For none EACCES cases will let the MDS do the mds auth check */
@ -614,15 +613,13 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc,
mapping_set_error(req->r_parent->i_mapping, result);
if (result) {
int pathlen = 0;
u64 base = 0;
char *path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen,
&base, 0);
struct ceph_path_info path_info = {0};
char *path = ceph_mdsc_build_path(mdsc, req->r_dentry, &path_info, 0);
pr_warn_client(cl,
"async create failure path=(%llx)%s result=%d!\n",
base, IS_ERR(path) ? "<<bad>>" : path, result);
ceph_mdsc_free_path(path, pathlen);
path_info.vino.ino, IS_ERR(path) ? "<<bad>>" : path, result);
ceph_mdsc_free_path_info(&path_info);
ceph_dir_clear_complete(req->r_parent);
if (!d_unhashed(dentry))
@ -791,8 +788,6 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
int mask;
int err;
char *path;
int pathlen;
u64 pathbase;
doutc(cl, "%p %llx.%llx dentry %p '%pd' %s flags %d mode 0%o\n",
dir, ceph_vinop(dir), dentry, dentry,
@ -814,7 +809,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
if (!dn) {
try_async = false;
} else {
path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0);
struct ceph_path_info path_info;
path = ceph_mdsc_build_path(mdsc, dn, &path_info, 0);
if (IS_ERR(path)) {
try_async = false;
err = 0;
@ -826,7 +822,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
mask |= MAY_WRITE;
err = ceph_mds_check_access(mdsc, path, mask);
}
ceph_mdsc_free_path(path, pathlen);
ceph_mdsc_free_path_info(&path_info);
dput(dn);
/* For none EACCES cases will let the MDS do the mds auth check */

View file

@ -55,6 +55,52 @@ static int ceph_set_ino_cb(struct inode *inode, void *data)
return 0;
}
/*
* Check if the parent inode matches the vino from directory reply info
*/
static inline bool ceph_vino_matches_parent(struct inode *parent,
struct ceph_vino vino)
{
return ceph_ino(parent) == vino.ino && ceph_snap(parent) == vino.snap;
}
/*
* Validate that the directory inode referenced by @req->r_parent matches the
* inode number and snapshot id contained in the reply's directory record. If
* they do not match which can theoretically happen if the parent dentry was
* moved between the time the request was issued and the reply arrived fall
* back to looking up the correct inode in the inode cache.
*
* A reference is *always* returned. Callers that receive a different inode
* than the original @parent are responsible for dropping the extra reference
* once the reply has been processed.
*/
static struct inode *ceph_get_reply_dir(struct super_block *sb,
struct inode *parent,
struct ceph_mds_reply_info_parsed *rinfo)
{
struct ceph_vino vino;
if (unlikely(!rinfo->diri.in))
return parent; /* nothing to compare against */
/* If we didn't have a cached parent inode to begin with, just bail out. */
if (!parent)
return NULL;
vino.ino = le64_to_cpu(rinfo->diri.in->ino);
vino.snap = le64_to_cpu(rinfo->diri.in->snapid);
if (likely(ceph_vino_matches_parent(parent, vino)))
return parent; /* matches use the original reference */
/* Mismatch this should be rare. Emit a WARN and obtain the correct inode. */
WARN_ONCE(1, "ceph: reply dir mismatch (parent valid %llx.%llx reply %llx.%llx)\n",
ceph_ino(parent), ceph_snap(parent), vino.ino, vino.snap);
return ceph_get_inode(sb, vino, NULL);
}
/**
* ceph_new_inode - allocate a new inode in advance of an expected create
* @dir: parent directory for new inode
@ -1523,6 +1569,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
struct ceph_vino tvino, dvino;
struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
struct ceph_client *cl = fsc->client;
struct inode *parent_dir = NULL;
int err = 0;
doutc(cl, "%p is_dentry %d is_target %d\n", req,
@ -1536,10 +1583,17 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
}
if (rinfo->head->is_dentry) {
struct inode *dir = req->r_parent;
if (dir) {
err = ceph_fill_inode(dir, NULL, &rinfo->diri,
/*
* r_parent may be stale, in cases when R_PARENT_LOCKED is not set,
* so we need to get the correct inode
*/
parent_dir = ceph_get_reply_dir(sb, req->r_parent, rinfo);
if (unlikely(IS_ERR(parent_dir))) {
err = PTR_ERR(parent_dir);
goto done;
}
if (parent_dir) {
err = ceph_fill_inode(parent_dir, NULL, &rinfo->diri,
rinfo->dirfrag, session, -1,
&req->r_caps_reservation);
if (err < 0)
@ -1548,14 +1602,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
WARN_ON_ONCE(1);
}
if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
if (parent_dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
bool is_nokey = false;
struct qstr dname;
struct dentry *dn, *parent;
struct fscrypt_str oname = FSTR_INIT(NULL, 0);
struct ceph_fname fname = { .dir = dir,
struct ceph_fname fname = { .dir = parent_dir,
.name = rinfo->dname,
.ctext = rinfo->altname,
.name_len = rinfo->dname_len,
@ -1564,10 +1618,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
BUG_ON(!rinfo->head->is_target);
BUG_ON(req->r_dentry);
parent = d_find_any_alias(dir);
parent = d_find_any_alias(parent_dir);
BUG_ON(!parent);
err = ceph_fname_alloc_buffer(dir, &oname);
err = ceph_fname_alloc_buffer(parent_dir, &oname);
if (err < 0) {
dput(parent);
goto done;
@ -1576,7 +1630,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
err = ceph_fname_to_usr(&fname, NULL, &oname, &is_nokey);
if (err < 0) {
dput(parent);
ceph_fname_free_buffer(dir, &oname);
ceph_fname_free_buffer(parent_dir, &oname);
goto done;
}
dname.name = oname.name;
@ -1595,7 +1649,7 @@ retry_lookup:
dname.len, dname.name, dn);
if (!dn) {
dput(parent);
ceph_fname_free_buffer(dir, &oname);
ceph_fname_free_buffer(parent_dir, &oname);
err = -ENOMEM;
goto done;
}
@ -1610,12 +1664,12 @@ retry_lookup:
ceph_snap(d_inode(dn)) != tvino.snap)) {
doutc(cl, " dn %p points to wrong inode %p\n",
dn, d_inode(dn));
ceph_dir_clear_ordered(dir);
ceph_dir_clear_ordered(parent_dir);
d_delete(dn);
dput(dn);
goto retry_lookup;
}
ceph_fname_free_buffer(dir, &oname);
ceph_fname_free_buffer(parent_dir, &oname);
req->r_dentry = dn;
dput(parent);
@ -1794,6 +1848,9 @@ retry_lookup:
&dvino, ptvino);
}
done:
/* Drop extra ref from ceph_get_reply_dir() if it returned a new inode */
if (unlikely(!IS_ERR_OR_NULL(parent_dir) && parent_dir != req->r_parent))
iput(parent_dir);
doutc(cl, "done err=%d\n", err);
return err;
}
@ -2487,22 +2544,21 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
int truncate_retry = 20; /* The RMW will take around 50ms */
struct dentry *dentry;
char *path;
int pathlen;
u64 pathbase;
bool do_sync = false;
dentry = d_find_alias(inode);
if (!dentry) {
do_sync = true;
} else {
path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0);
struct ceph_path_info path_info;
path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0);
if (IS_ERR(path)) {
do_sync = true;
err = 0;
} else {
err = ceph_mds_check_access(mdsc, path, MAY_WRITE);
}
ceph_mdsc_free_path(path, pathlen);
ceph_mdsc_free_path_info(&path_info);
dput(dentry);
/* For none EACCES cases will let the MDS do the mds auth check */

View file

@ -2681,8 +2681,7 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen)
* ceph_mdsc_build_path - build a path string to a given dentry
* @mdsc: mds client
* @dentry: dentry to which path should be built
* @plen: returned length of string
* @pbase: returned base inode number
* @path_info: output path, length, base ino+snap, and freepath ownership flag
* @for_wire: is this path going to be sent to the MDS?
*
* Build a string that represents the path to the dentry. This is mostly called
@ -2700,7 +2699,7 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen)
* foo/.snap/bar -> foo//bar
*/
char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry,
int *plen, u64 *pbase, int for_wire)
struct ceph_path_info *path_info, int for_wire)
{
struct ceph_client *cl = mdsc->fsc->client;
struct dentry *cur;
@ -2810,16 +2809,28 @@ retry:
return ERR_PTR(-ENAMETOOLONG);
}
*pbase = base;
*plen = PATH_MAX - 1 - pos;
/* Initialize the output structure */
memset(path_info, 0, sizeof(*path_info));
path_info->vino.ino = base;
path_info->pathlen = PATH_MAX - 1 - pos;
path_info->path = path + pos;
path_info->freepath = true;
/* Set snap from dentry if available */
if (d_inode(dentry))
path_info->vino.snap = ceph_snap(d_inode(dentry));
else
path_info->vino.snap = CEPH_NOSNAP;
doutc(cl, "on %p %d built %llx '%.*s'\n", dentry, d_count(dentry),
base, *plen, path + pos);
base, PATH_MAX - 1 - pos, path + pos);
return path + pos;
}
static int build_dentry_path(struct ceph_mds_client *mdsc, struct dentry *dentry,
struct inode *dir, const char **ppath, int *ppathlen,
u64 *pino, bool *pfreepath, bool parent_locked)
struct inode *dir, struct ceph_path_info *path_info,
bool parent_locked)
{
char *path;
@ -2828,41 +2839,47 @@ static int build_dentry_path(struct ceph_mds_client *mdsc, struct dentry *dentry
dir = d_inode_rcu(dentry->d_parent);
if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP &&
!IS_ENCRYPTED(dir)) {
*pino = ceph_ino(dir);
path_info->vino.ino = ceph_ino(dir);
path_info->vino.snap = ceph_snap(dir);
rcu_read_unlock();
*ppath = dentry->d_name.name;
*ppathlen = dentry->d_name.len;
path_info->path = dentry->d_name.name;
path_info->pathlen = dentry->d_name.len;
path_info->freepath = false;
return 0;
}
rcu_read_unlock();
path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1);
path = ceph_mdsc_build_path(mdsc, dentry, path_info, 1);
if (IS_ERR(path))
return PTR_ERR(path);
*ppath = path;
*pfreepath = true;
/*
* ceph_mdsc_build_path already fills path_info, including snap handling.
*/
return 0;
}
static int build_inode_path(struct inode *inode,
const char **ppath, int *ppathlen, u64 *pino,
bool *pfreepath)
static int build_inode_path(struct inode *inode, struct ceph_path_info *path_info)
{
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
struct dentry *dentry;
char *path;
if (ceph_snap(inode) == CEPH_NOSNAP) {
*pino = ceph_ino(inode);
*ppathlen = 0;
path_info->vino.ino = ceph_ino(inode);
path_info->vino.snap = ceph_snap(inode);
path_info->pathlen = 0;
path_info->freepath = false;
return 0;
}
dentry = d_find_alias(inode);
path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1);
path = ceph_mdsc_build_path(mdsc, dentry, path_info, 1);
dput(dentry);
if (IS_ERR(path))
return PTR_ERR(path);
*ppath = path;
*pfreepath = true;
/*
* ceph_mdsc_build_path already fills path_info, including snap from dentry.
* Override with inode's snap since that's what this function is for.
*/
path_info->vino.snap = ceph_snap(inode);
return 0;
}
@ -2872,26 +2889,32 @@ static int build_inode_path(struct inode *inode,
*/
static int set_request_path_attr(struct ceph_mds_client *mdsc, struct inode *rinode,
struct dentry *rdentry, struct inode *rdiri,
const char *rpath, u64 rino, const char **ppath,
int *pathlen, u64 *ino, bool *freepath,
const char *rpath, u64 rino,
struct ceph_path_info *path_info,
bool parent_locked)
{
struct ceph_client *cl = mdsc->fsc->client;
int r = 0;
/* Initialize the output structure */
memset(path_info, 0, sizeof(*path_info));
if (rinode) {
r = build_inode_path(rinode, ppath, pathlen, ino, freepath);
r = build_inode_path(rinode, path_info);
doutc(cl, " inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
ceph_snap(rinode));
} else if (rdentry) {
r = build_dentry_path(mdsc, rdentry, rdiri, ppath, pathlen, ino,
freepath, parent_locked);
doutc(cl, " dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, *ppath);
r = build_dentry_path(mdsc, rdentry, rdiri, path_info, parent_locked);
doutc(cl, " dentry %p %llx/%.*s\n", rdentry, path_info->vino.ino,
path_info->pathlen, path_info->path);
} else if (rpath || rino) {
*ino = rino;
*ppath = rpath;
*pathlen = rpath ? strlen(rpath) : 0;
doutc(cl, " path %.*s\n", *pathlen, rpath);
path_info->vino.ino = rino;
path_info->vino.snap = CEPH_NOSNAP;
path_info->path = rpath;
path_info->pathlen = rpath ? strlen(rpath) : 0;
path_info->freepath = false;
doutc(cl, " path %.*s\n", path_info->pathlen, rpath);
}
return r;
@ -2968,11 +2991,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
struct ceph_client *cl = mdsc->fsc->client;
struct ceph_msg *msg;
struct ceph_mds_request_head_legacy *lhead;
const char *path1 = NULL;
const char *path2 = NULL;
u64 ino1 = 0, ino2 = 0;
int pathlen1 = 0, pathlen2 = 0;
bool freepath1 = false, freepath2 = false;
struct ceph_path_info path_info1 = {0};
struct ceph_path_info path_info2 = {0};
struct dentry *old_dentry = NULL;
int len;
u16 releases;
@ -2982,25 +3002,49 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
u16 request_head_version = mds_supported_head_version(session);
kuid_t caller_fsuid = req->r_cred->fsuid;
kgid_t caller_fsgid = req->r_cred->fsgid;
bool parent_locked = test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
ret = set_request_path_attr(mdsc, req->r_inode, req->r_dentry,
req->r_parent, req->r_path1, req->r_ino1.ino,
&path1, &pathlen1, &ino1, &freepath1,
test_bit(CEPH_MDS_R_PARENT_LOCKED,
&req->r_req_flags));
req->r_parent, req->r_path1, req->r_ino1.ino,
&path_info1, parent_locked);
if (ret < 0) {
msg = ERR_PTR(ret);
goto out;
}
/*
* When the parent directory's i_rwsem is *not* locked, req->r_parent may
* have become stale (e.g. after a concurrent rename) between the time the
* dentry was looked up and now. If we detect that the stored r_parent
* does not match the inode number we just encoded for the request, switch
* to the correct inode so that the MDS receives a valid parent reference.
*/
if (!parent_locked && req->r_parent && path_info1.vino.ino &&
ceph_ino(req->r_parent) != path_info1.vino.ino) {
struct inode *old_parent = req->r_parent;
struct inode *correct_dir = ceph_get_inode(mdsc->fsc->sb, path_info1.vino, NULL);
if (!IS_ERR(correct_dir)) {
WARN_ONCE(1, "ceph: r_parent mismatch (had %llx wanted %llx) - updating\n",
ceph_ino(old_parent), path_info1.vino.ino);
/*
* Transfer CEPH_CAP_PIN from the old parent to the new one.
* The pin was taken earlier in ceph_mdsc_submit_request().
*/
ceph_put_cap_refs(ceph_inode(old_parent), CEPH_CAP_PIN);
iput(old_parent);
req->r_parent = correct_dir;
ceph_get_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
}
}
/* If r_old_dentry is set, then assume that its parent is locked */
if (req->r_old_dentry &&
!(req->r_old_dentry->d_flags & DCACHE_DISCONNECTED))
old_dentry = req->r_old_dentry;
ret = set_request_path_attr(mdsc, NULL, old_dentry,
req->r_old_dentry_dir,
req->r_path2, req->r_ino2.ino,
&path2, &pathlen2, &ino2, &freepath2, true);
req->r_old_dentry_dir,
req->r_path2, req->r_ino2.ino,
&path_info2, true);
if (ret < 0) {
msg = ERR_PTR(ret);
goto out_free1;
@ -3031,7 +3075,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
/* filepaths */
len += 2 * (1 + sizeof(u32) + sizeof(u64));
len += pathlen1 + pathlen2;
len += path_info1.pathlen + path_info2.pathlen;
/* cap releases */
len += sizeof(struct ceph_mds_request_release) *
@ -3039,9 +3083,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
!!req->r_old_inode_drop + !!req->r_old_dentry_drop);
if (req->r_dentry_drop)
len += pathlen1;
len += path_info1.pathlen;
if (req->r_old_dentry_drop)
len += pathlen2;
len += path_info2.pathlen;
/* MClientRequest tail */
@ -3154,8 +3198,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
lhead->ino = cpu_to_le64(req->r_deleg_ino);
lhead->args = req->r_args;
ceph_encode_filepath(&p, end, ino1, path1);
ceph_encode_filepath(&p, end, ino2, path2);
ceph_encode_filepath(&p, end, path_info1.vino.ino, path_info1.path);
ceph_encode_filepath(&p, end, path_info2.vino.ino, path_info2.path);
/* make note of release offset, in case we need to replay */
req->r_request_release_offset = p - msg->front.iov_base;
@ -3218,11 +3262,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
msg->hdr.data_off = cpu_to_le16(0);
out_free2:
if (freepath2)
ceph_mdsc_free_path((char *)path2, pathlen2);
ceph_mdsc_free_path_info(&path_info2);
out_free1:
if (freepath1)
ceph_mdsc_free_path((char *)path1, pathlen1);
ceph_mdsc_free_path_info(&path_info1);
out:
return msg;
out_err:
@ -4579,24 +4621,20 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg)
struct ceph_pagelist *pagelist = recon_state->pagelist;
struct dentry *dentry;
struct ceph_cap *cap;
char *path;
int pathlen = 0, err;
u64 pathbase;
struct ceph_path_info path_info = {0};
int err;
u64 snap_follows;
dentry = d_find_primary(inode);
if (dentry) {
/* set pathbase to parent dir when msg_version >= 2 */
path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase,
char *path = ceph_mdsc_build_path(mdsc, dentry, &path_info,
recon_state->msg_version >= 2);
dput(dentry);
if (IS_ERR(path)) {
err = PTR_ERR(path);
goto out_err;
}
} else {
path = NULL;
pathbase = 0;
}
spin_lock(&ci->i_ceph_lock);
@ -4629,7 +4667,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg)
rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
rec.v2.issued = cpu_to_le32(cap->issued);
rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
rec.v2.pathbase = cpu_to_le64(pathbase);
rec.v2.pathbase = cpu_to_le64(path_info.vino.ino);
rec.v2.flock_len = (__force __le32)
((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
} else {
@ -4644,7 +4682,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg)
ts = inode_get_atime(inode);
ceph_encode_timespec64(&rec.v1.atime, &ts);
rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
rec.v1.pathbase = cpu_to_le64(pathbase);
rec.v1.pathbase = cpu_to_le64(path_info.vino.ino);
}
if (list_empty(&ci->i_cap_snaps)) {
@ -4706,7 +4744,7 @@ encode_again:
sizeof(struct ceph_filelock);
rec.v2.flock_len = cpu_to_le32(struct_len);
struct_len += sizeof(u32) + pathlen + sizeof(rec.v2);
struct_len += sizeof(u32) + path_info.pathlen + sizeof(rec.v2);
if (struct_v >= 2)
struct_len += sizeof(u64); /* snap_follows */
@ -4730,7 +4768,7 @@ encode_again:
ceph_pagelist_encode_8(pagelist, 1);
ceph_pagelist_encode_32(pagelist, struct_len);
}
ceph_pagelist_encode_string(pagelist, path, pathlen);
ceph_pagelist_encode_string(pagelist, (char *)path_info.path, path_info.pathlen);
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2));
ceph_locks_to_pagelist(flocks, pagelist,
num_fcntl_locks, num_flock_locks);
@ -4741,17 +4779,17 @@ out_freeflocks:
} else {
err = ceph_pagelist_reserve(pagelist,
sizeof(u64) + sizeof(u32) +
pathlen + sizeof(rec.v1));
path_info.pathlen + sizeof(rec.v1));
if (err)
goto out_err;
ceph_pagelist_encode_64(pagelist, ceph_ino(inode));
ceph_pagelist_encode_string(pagelist, path, pathlen);
ceph_pagelist_encode_string(pagelist, (char *)path_info.path, path_info.pathlen);
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
}
out_err:
ceph_mdsc_free_path(path, pathlen);
ceph_mdsc_free_path_info(&path_info);
if (!err)
recon_state->nr_caps++;
return err;

View file

@ -617,14 +617,24 @@ extern int ceph_mds_check_access(struct ceph_mds_client *mdsc, char *tpath,
extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
static inline void ceph_mdsc_free_path(char *path, int len)
/*
* Structure to group path-related output parameters for build_*_path functions
*/
struct ceph_path_info {
const char *path;
int pathlen;
struct ceph_vino vino;
bool freepath;
};
static inline void ceph_mdsc_free_path_info(const struct ceph_path_info *path_info)
{
if (!IS_ERR_OR_NULL(path))
__putname(path - (PATH_MAX - 1 - len));
if (path_info && path_info->freepath && !IS_ERR_OR_NULL(path_info->path))
__putname((char *)path_info->path - (PATH_MAX - 1 - path_info->pathlen));
}
extern char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc,
struct dentry *dentry, int *plen, u64 *base,
struct dentry *dentry, struct ceph_path_info *path_info,
int for_wire);
extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry);

View file

@ -1524,7 +1524,7 @@ static void con_fault_finish(struct ceph_connection *con)
* in case we faulted due to authentication, invalidate our
* current tickets so that we can get new ones.
*/
if (con->v1.auth_retry) {
if (!ceph_msgr2(from_msgr(con->msgr)) && con->v1.auth_retry) {
dout("auth_retry %d, invalidating\n", con->v1.auth_retry);
if (con->ops->invalidate_authorizer)
con->ops->invalidate_authorizer(con);
@ -1714,9 +1714,10 @@ static void clear_standby(struct ceph_connection *con)
{
/* come back from STANDBY? */
if (con->state == CEPH_CON_S_STANDBY) {
dout("clear_standby %p and ++connect_seq\n", con);
dout("clear_standby %p\n", con);
con->state = CEPH_CON_S_PREOPEN;
con->v1.connect_seq++;
if (!ceph_msgr2(from_msgr(con->msgr)))
con->v1.connect_seq++;
WARN_ON(ceph_con_flag_test(con, CEPH_CON_F_WRITE_PENDING));
WARN_ON(ceph_con_flag_test(con, CEPH_CON_F_KEEPALIVE_PENDING));
}