Skip to content

Commit

Permalink
Merge tag 'ceph-for-5.16-rc1' of git:/ceph/ceph-client
Browse files Browse the repository at this point in the history
Pull ceph updates from Ilya Dryomov:
 "One notable change here is that async creates and unlinks introduced
  in 5.7 are now enabled by default. This should greatly speed up things
  like rm, tar and rsync. To opt out, wsync mount option can be used.

  Other than that we have a pile of bug fixes all across the filesystem
  from Jeff, Xiubo and Kotresh and a metrics infrastructure rework from
  Luis"

* tag 'ceph-for-5.16-rc1' of git:/ceph/ceph-client:
  ceph: add a new metric to keep track of remote object copies
  libceph, ceph: move ceph_osdc_copy_from() into cephfs code
  ceph: clean-up metrics data structures to reduce code duplication
  ceph: split 'metric' debugfs file into several files
  ceph: return the real size read when it hits EOF
  ceph: properly handle statfs on multifs setups
  ceph: shut down mount on bad mdsmap or fsmap decode
  ceph: fix mdsmap decode when there are MDS's beyond max_mds
  ceph: ignore the truncate when size won't change with Fx caps issued
  ceph: don't rely on error_string to validate blocklisted session.
  ceph: just use ci->i_version for fscache aux info
  ceph: shut down access to inode when async create fails
  ceph: refactor remove_session_caps_cb
  ceph: fix auth cap handling logic in remove_session_caps_cb
  ceph: drop private list from remove_session_caps_cb
  ceph: don't use -ESTALE as special return code in try_get_cap_refs
  ceph: print inode numbers instead of pointer values
  ceph: enable async dirops by default
  libceph: drop ->monmap and err initialization
  ceph: convert to noop_direct_IO
  • Loading branch information
torvalds committed Nov 13, 2021
2 parents a27c085 + c02cb7b commit 0ecca62
Show file tree
Hide file tree
Showing 18 changed files with 544 additions and 479 deletions.
29 changes: 12 additions & 17 deletions fs/ceph/addr.c
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,7 @@ static int ceph_writepages_start(struct address_space *mapping,
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));

if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
if (ceph_inode_is_shutdown(inode)) {
if (ci->i_wrbuffer_ref > 0) {
pr_warn_ratelimited(
"writepage_start %p %lld forced umount\n",
Expand Down Expand Up @@ -1146,12 +1146,12 @@ static struct ceph_snap_context *
ceph_find_incompatible(struct page *page)
{
struct inode *inode = page->mapping->host;
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);

if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
dout(" page %p forced umount\n", page);
return ERR_PTR(-EIO);
if (ceph_inode_is_shutdown(inode)) {
dout(" page %p %llx:%llx is shutdown\n", page,
ceph_vinop(inode));
return ERR_PTR(-ESTALE);
}

for (;;) {
Expand Down Expand Up @@ -1312,17 +1312,6 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
return copied;
}

/*
* we set .direct_IO to indicate direct io is supported, but since we
* intercept O_DIRECT reads and writes early, this function should
* never get called.
*/
static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter)
{
WARN_ON(1);
return -EINVAL;
}

const struct address_space_operations ceph_aops = {
.readpage = ceph_readpage,
.readahead = ceph_readahead,
Expand All @@ -1333,7 +1322,7 @@ const struct address_space_operations ceph_aops = {
.set_page_dirty = ceph_set_page_dirty,
.invalidatepage = ceph_invalidatepage,
.releasepage = ceph_releasepage,
.direct_IO = ceph_direct_io,
.direct_IO = noop_direct_IO,
};

static void ceph_block_sigs(sigset_t *oldset)
Expand Down Expand Up @@ -1362,6 +1351,9 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
sigset_t oldset;
vm_fault_t ret = VM_FAULT_SIGBUS;

if (ceph_inode_is_shutdown(inode))
return ret;

ceph_block_sigs(&oldset);

dout("filemap_fault %p %llx.%llx %llu trying to get caps\n",
Expand Down Expand Up @@ -1453,6 +1445,9 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
sigset_t oldset;
vm_fault_t ret = VM_FAULT_SIGBUS;

if (ceph_inode_is_shutdown(inode))
return ret;

prealloc_cf = ceph_alloc_cap_flush();
if (!prealloc_cf)
return VM_FAULT_OOM;
Expand Down
23 changes: 3 additions & 20 deletions fs/ceph/cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,6 @@
#include "super.h"
#include "cache.h"

struct ceph_aux_inode {
u64 version;
u64 mtime_sec;
u64 mtime_nsec;
};

struct fscache_netfs ceph_cache_netfs = {
.name = "ceph",
.version = 0,
Expand Down Expand Up @@ -109,20 +103,14 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux(
void *cookie_netfs_data, const void *data, uint16_t dlen,
loff_t object_size)
{
struct ceph_aux_inode aux;
struct ceph_inode_info* ci = cookie_netfs_data;
struct inode* inode = &ci->vfs_inode;

if (dlen != sizeof(aux) ||
if (dlen != sizeof(ci->i_version) ||
i_size_read(inode) != object_size)
return FSCACHE_CHECKAUX_OBSOLETE;

memset(&aux, 0, sizeof(aux));
aux.version = ci->i_version;
aux.mtime_sec = inode->i_mtime.tv_sec;
aux.mtime_nsec = inode->i_mtime.tv_nsec;

if (memcmp(data, &aux, sizeof(aux)) != 0)
if (*(u64 *)data != ci->i_version)
return FSCACHE_CHECKAUX_OBSOLETE;

dout("ceph inode 0x%p cached okay\n", ci);
Expand All @@ -139,7 +127,6 @@ void ceph_fscache_register_inode_cookie(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_aux_inode aux;

/* No caching for filesystem */
if (!fsc->fscache)
Expand All @@ -151,14 +138,10 @@ void ceph_fscache_register_inode_cookie(struct inode *inode)

inode_lock_nested(inode, I_MUTEX_CHILD);
if (!ci->fscache) {
memset(&aux, 0, sizeof(aux));
aux.version = ci->i_version;
aux.mtime_sec = inode->i_mtime.tv_sec;
aux.mtime_nsec = inode->i_mtime.tv_nsec;
ci->fscache = fscache_acquire_cookie(fsc->fscache,
&ceph_fscache_inode_object_def,
&ci->i_vino, sizeof(ci->i_vino),
&aux, sizeof(aux),
&ci->i_version, sizeof(ci->i_version),
ci, i_size_read(inode), false);
}
inode_unlock(inode);
Expand Down
151 changes: 134 additions & 17 deletions fs/ceph/caps.c
Original file line number Diff line number Diff line change
Expand Up @@ -1188,11 +1188,11 @@ void ceph_remove_cap(struct ceph_cap *cap, bool queue_release)

lockdep_assert_held(&ci->i_ceph_lock);

fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
fsc = ceph_inode_to_client(&ci->vfs_inode);
WARN_ON_ONCE(ci->i_auth_cap == cap &&
!list_empty(&ci->i_dirty_item) &&
!fsc->blocklisted &&
READ_ONCE(fsc->mount_state) != CEPH_MOUNT_SHUTDOWN);
!ceph_inode_is_shutdown(&ci->vfs_inode));

__ceph_remove_cap(cap, queue_release);
}
Expand Down Expand Up @@ -1968,8 +1968,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
}
}

dout("check_caps %p file_want %s used %s dirty %s flushing %s"
" issued %s revoking %s retain %s %s%s\n", inode,
dout("check_caps %llx.%llx file_want %s used %s dirty %s flushing %s"
" issued %s revoking %s retain %s %s%s\n", ceph_vinop(inode),
ceph_cap_string(file_wanted),
ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
ceph_cap_string(ci->i_flushing_caps),
Expand All @@ -1990,7 +1990,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
(revoking & (CEPH_CAP_FILE_CACHE|
CEPH_CAP_FILE_LAZYIO)) && /* or revoking cache */
!tried_invalidate) {
dout("check_caps trying to invalidate on %p\n", inode);
dout("check_caps trying to invalidate on %llx.%llx\n",
ceph_vinop(inode));
if (try_nonblocking_invalidate(inode) < 0) {
dout("check_caps queuing invalidate\n");
queue_invalidate = true;
Expand Down Expand Up @@ -2629,9 +2630,9 @@ void ceph_take_cap_refs(struct ceph_inode_info *ci, int got,
*
* Returns 0 if caps were not able to be acquired (yet), 1 if succeed,
* or a negative error code. There are 3 speical error codes:
* -EAGAIN: need to sleep but non-blocking is specified
* -EFBIG: ask caller to call check_max_size() and try again.
* -ESTALE: ask caller to call ceph_renew_caps() and try again.
* -EAGAIN: need to sleep but non-blocking is specified
* -EFBIG: ask caller to call check_max_size() and try again.
* -EUCLEAN: ask caller to call ceph_renew_caps() and try again.
*/
enum {
/* first 8 bits are reserved for CEPH_FILE_MODE_FOO */
Expand Down Expand Up @@ -2679,7 +2680,7 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
inode, endoff, ci->i_max_size);
if (endoff > ci->i_requested_max_size)
ret = ci->i_auth_cap ? -EFBIG : -ESTALE;
ret = ci->i_auth_cap ? -EFBIG : -EUCLEAN;
goto out_unlock;
}
/*
Expand Down Expand Up @@ -2749,17 +2750,17 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
goto out_unlock;
}

if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
dout("get_cap_refs %p forced umount\n", inode);
ret = -EIO;
if (ceph_inode_is_shutdown(inode)) {
dout("get_cap_refs %p inode is shutdown\n", inode);
ret = -ESTALE;
goto out_unlock;
}
mds_wanted = __ceph_caps_mds_wanted(ci, false);
if (need & ~mds_wanted) {
dout("get_cap_refs %p need %s > mds_wanted %s\n",
inode, ceph_cap_string(need),
ceph_cap_string(mds_wanted));
ret = -ESTALE;
ret = -EUCLEAN;
goto out_unlock;
}

Expand Down Expand Up @@ -2843,7 +2844,7 @@ int ceph_try_get_caps(struct inode *inode, int need, int want,

ret = try_get_cap_refs(inode, need, want, 0, flags, got);
/* three special error codes */
if (ret == -EAGAIN || ret == -EFBIG || ret == -ESTALE)
if (ret == -EAGAIN || ret == -EFBIG || ret == -EUCLEAN)
ret = 0;
return ret;
}
Expand Down Expand Up @@ -2926,7 +2927,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got
}

if (ret < 0) {
if (ret == -EFBIG || ret == -ESTALE) {
if (ret == -EFBIG || ret == -EUCLEAN) {
int ret2 = ceph_wait_on_async_create(inode);
if (ret2 < 0)
return ret2;
Expand All @@ -2935,7 +2936,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got
check_max_size(inode, endoff);
continue;
}
if (ret == -ESTALE) {
if (ret == -EUCLEAN) {
/* session was killed, try renew caps */
ret = ceph_renew_caps(inode, flags);
if (ret == 0)
Expand Down Expand Up @@ -4315,7 +4316,7 @@ static void flush_dirty_session_caps(struct ceph_mds_session *s)
i_dirty_item);
inode = &ci->vfs_inode;
ihold(inode);
dout("flush_dirty_caps %p\n", inode);
dout("flush_dirty_caps %llx.%llx\n", ceph_vinop(inode));
spin_unlock(&mdsc->cap_dirty_lock);
ceph_check_caps(ci, CHECK_CAPS_FLUSH, NULL);
iput(inode);
Expand Down Expand Up @@ -4560,3 +4561,119 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
spin_unlock(&dentry->d_lock);
return ret;
}

static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_cap_snap *capsnap;
int capsnap_release = 0;

lockdep_assert_held(&ci->i_ceph_lock);

dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);

while (!list_empty(&ci->i_cap_snaps)) {
capsnap = list_first_entry(&ci->i_cap_snaps,
struct ceph_cap_snap, ci_item);
__ceph_remove_capsnap(inode, capsnap, NULL, NULL);
ceph_put_snap_context(capsnap->context);
ceph_put_cap_snap(capsnap);
capsnap_release++;
}
wake_up_all(&ci->i_cap_wq);
wake_up_all(&mdsc->cap_flushing_wq);
return capsnap_release;
}

int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate)
{
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
bool is_auth;
bool dirty_dropped = false;
int iputs = 0;

lockdep_assert_held(&ci->i_ceph_lock);

dout("removing cap %p, ci is %p, inode is %p\n",
cap, ci, &ci->vfs_inode);

is_auth = (cap == ci->i_auth_cap);
__ceph_remove_cap(cap, false);
if (is_auth) {
struct ceph_cap_flush *cf;

if (ceph_inode_is_shutdown(inode)) {
if (inode->i_data.nrpages > 0)
*invalidate = true;
if (ci->i_wrbuffer_ref > 0)
mapping_set_error(&inode->i_data, -EIO);
}

spin_lock(&mdsc->cap_dirty_lock);

/* trash all of the cap flushes for this inode */
while (!list_empty(&ci->i_cap_flush_list)) {
cf = list_first_entry(&ci->i_cap_flush_list,
struct ceph_cap_flush, i_list);
list_del_init(&cf->g_list);
list_del_init(&cf->i_list);
if (!cf->is_capsnap)
ceph_free_cap_flush(cf);
}

if (!list_empty(&ci->i_dirty_item)) {
pr_warn_ratelimited(
" dropping dirty %s state for %p %lld\n",
ceph_cap_string(ci->i_dirty_caps),
inode, ceph_ino(inode));
ci->i_dirty_caps = 0;
list_del_init(&ci->i_dirty_item);
dirty_dropped = true;
}
if (!list_empty(&ci->i_flushing_item)) {
pr_warn_ratelimited(
" dropping dirty+flushing %s state for %p %lld\n",
ceph_cap_string(ci->i_flushing_caps),
inode, ceph_ino(inode));
ci->i_flushing_caps = 0;
list_del_init(&ci->i_flushing_item);
mdsc->num_cap_flushing--;
dirty_dropped = true;
}
spin_unlock(&mdsc->cap_dirty_lock);

if (dirty_dropped) {
mapping_set_error(inode->i_mapping, -EIO);

if (ci->i_wrbuffer_ref_head == 0 &&
ci->i_wr_ref == 0 &&
ci->i_dirty_caps == 0 &&
ci->i_flushing_caps == 0) {
ceph_put_snap_context(ci->i_head_snapc);
ci->i_head_snapc = NULL;
}
}

if (atomic_read(&ci->i_filelock_ref) > 0) {
/* make further file lock syscall return -EIO */
ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
pr_warn_ratelimited(" dropping file locks for %p %lld\n",
inode, ceph_ino(inode));
}

if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) {
cf = ci->i_prealloc_cap_flush;
ci->i_prealloc_cap_flush = NULL;
if (!cf->is_capsnap)
ceph_free_cap_flush(cf);
}

if (!list_empty(&ci->i_cap_snaps))
iputs = remove_capsnaps(mdsc, inode);
}
if (dirty_dropped)
++iputs;
return iputs;
}
Loading

0 comments on commit 0ecca62

Please sign in to comment.