| From 20ca7cb3065fbb8193b5c05856f64fa83683ef91 Mon Sep 17 00:00:00 2001 |
| From: Ilya Dryomov <idryomov@gmail.com> |
| Date: Mon, 9 Mar 2020 12:03:14 +0100 |
| Subject: [PATCH] ceph: check POOL_FLAG_FULL/NEARFULL in addition to |
| OSDMAP_FULL/NEARFULL |
| |
| commit 7614209736fbc4927584d4387faade4f31444fce upstream. |
| |
| CEPH_OSDMAP_FULL/NEARFULL aren't set since mimic, so we need to consult |
| per-pool flags as well. Unfortunately the backwards compatibility here |
| is lacking: |
| |
| - the change that deprecated OSDMAP_FULL/NEARFULL went into mimic, but |
| was guarded by require_osd_release >= RELEASE_LUMINOUS |
| - it was subsequently backported to luminous in v12.2.2, but that makes |
| no difference to clients that only check OSDMAP_FULL/NEARFULL because |
| require_osd_release is not client-facing -- it is for OSDs |
| |
| Since all kernels are affected, the best we can do here is just start |
| checking both map flags and pool flags and send that to stable. |
| |
| These checks are best effort, so take osdc->lock and look up pool flags |
| just once. Remove the FIXME, since filesystem quotas are checked above |
| and RADOS quotas are reflected in POOL_FLAG_FULL: when the pool reaches |
| its quota, both POOL_FLAG_FULL and POOL_FLAG_FULL_QUOTA are set. |
| |
| Cc: stable@vger.kernel.org |
| Reported-by: Yanhu Cao <gmayyyha@gmail.com> |
| Signed-off-by: Ilya Dryomov <idryomov@gmail.com> |
| Reviewed-by: Jeff Layton <jlayton@kernel.org> |
| Acked-by: Sage Weil <sage@redhat.com> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/fs/ceph/file.c b/fs/ceph/file.c |
| index 8eb5f973744f..2595052d5eef 100644 |
| --- a/fs/ceph/file.c |
| +++ b/fs/ceph/file.c |
| @@ -1384,9 +1384,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) |
| struct inode *inode = file_inode(file); |
| struct ceph_inode_info *ci = ceph_inode(inode); |
| struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
| + struct ceph_osd_client *osdc = &fsc->client->osdc; |
| struct ceph_cap_flush *prealloc_cf; |
| ssize_t count, written = 0; |
| int err, want, got; |
| + u32 map_flags; |
| + u64 pool_flags; |
| loff_t pos; |
| loff_t limit = max(i_size_read(inode), fsc->max_file_size); |
| |
| @@ -1441,8 +1444,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) |
| goto out; |
| } |
| |
| - /* FIXME: not complete since it doesn't account for being at quota */ |
| - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) { |
| + down_read(&osdc->lock); |
| + map_flags = osdc->osdmap->flags; |
| + pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id); |
| + up_read(&osdc->lock); |
| + if ((map_flags & CEPH_OSDMAP_FULL) || |
| + (pool_flags & CEPH_POOL_FLAG_FULL)) { |
| err = -ENOSPC; |
| goto out; |
| } |
| @@ -1532,7 +1539,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) |
| } |
| |
| if (written >= 0) { |
| - if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL)) |
| + if ((map_flags & CEPH_OSDMAP_NEARFULL) || |
| + (pool_flags & CEPH_POOL_FLAG_NEARFULL)) |
| iocb->ki_flags |= IOCB_DSYNC; |
| written = generic_write_sync(iocb, written); |
| } |
| diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h |
| index e081b56f1c1d..5e601975745f 100644 |
| --- a/include/linux/ceph/osdmap.h |
| +++ b/include/linux/ceph/osdmap.h |
| @@ -37,6 +37,9 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs); |
| #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id |
| together */ |
| #define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */ |
| +#define CEPH_POOL_FLAG_FULL_QUOTA (1ULL << 10) /* pool ran out of quota, |
| + will set FULL too */ |
| +#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */ |
| |
| struct ceph_pg_pool_info { |
| struct rb_node node; |
| @@ -304,5 +307,6 @@ extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map, |
| |
| extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id); |
| extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); |
| +u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id); |
| |
| #endif |
| diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h |
| index 3eb0e55665b4..c004bced9b91 100644 |
| --- a/include/linux/ceph/rados.h |
| +++ b/include/linux/ceph/rados.h |
| @@ -143,8 +143,10 @@ extern const char *ceph_osd_state_name(int s); |
| /* |
| * osd map flag bits |
| */ |
| -#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */ |
| -#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */ |
| +#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC), |
| + not set since ~luminous */ |
| +#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC), |
| + not set since ~luminous */ |
| #define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ |
| #define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ |
| #define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ |
| diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c |
| index 48a31dc9161c..f6a366eb8aa5 100644 |
| --- a/net/ceph/osdmap.c |
| +++ b/net/ceph/osdmap.c |
| @@ -710,6 +710,15 @@ int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name) |
| } |
| EXPORT_SYMBOL(ceph_pg_poolid_by_name); |
| |
| +u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id) |
| +{ |
| + struct ceph_pg_pool_info *pi; |
| + |
| + pi = __lookup_pg_pool(&map->pg_pools, id); |
| + return pi ? pi->flags : 0; |
| +} |
| +EXPORT_SYMBOL(ceph_pg_pool_flags); |
| + |
| static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) |
| { |
| rb_erase(&pi->node, root); |
| -- |
| 2.7.4 |
| |