| From 05a630d76bd3f39baf0eecfa305bed2820796dee Mon Sep 17 00:00:00 2001 |
| From: "Darrick J. Wong" <darrick.wong@oracle.com> |
| Date: Thu, 2 Feb 2017 15:14:01 -0800 |
| Subject: [PATCH] xfs: allow unwritten extents in the CoW fork |
| |
| commit 05a630d76bd3f39baf0eecfa305bed2820796dee upstream. |
| |
| In the data fork, we only allow extents to perform the following state |
| transitions: |
| |
| delay -> real <-> unwritten |
| |
| There's no way to move directly from a delalloc reservation to an |
| /unwritten/ allocated extent. However, for the CoW fork we want to be |
| able to do the following to each extent: |
| |
| delalloc -> unwritten -> written -> remapped to data fork |
| |
| This will help us to avoid a race in the speculative CoW preallocation |
| code between a first thread that is allocating a CoW extent and a second |
| thread that is remapping part of a file after a write. In order to do |
| this, however, we need two things: first, we have to be able to |
| transition from da to unwritten, and second the function that converts |
| between real and unwritten has to be made aware of the cow fork. Do |
| both of those things. |
| |
| Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> |
| Reviewed-by: Christoph Hellwig <hch@lst.de> |
| |
| diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c |
| index 2e91eb66d32f..dcffbb09444e 100644 |
| --- a/fs/xfs/libxfs/xfs_bmap.c |
| +++ b/fs/xfs/libxfs/xfs_bmap.c |
| @@ -1850,6 +1850,7 @@ xfs_bmap_add_extent_delay_real( |
| */ |
| trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); |
| xfs_bmbt_set_startblock(ep, new->br_startblock); |
| + xfs_bmbt_set_state(ep, new->br_state); |
| trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); |
| |
| (*nextents)++; |
| @@ -2188,6 +2189,7 @@ STATIC int /* error */ |
| xfs_bmap_add_extent_unwritten_real( |
| struct xfs_trans *tp, |
| xfs_inode_t *ip, /* incore inode pointer */ |
| + int whichfork, |
| xfs_extnum_t *idx, /* extent number to update/insert */ |
| xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ |
| xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
| @@ -2207,12 +2209,14 @@ xfs_bmap_add_extent_unwritten_real( |
| /* left is 0, right is 1, prev is 2 */ |
| int rval=0; /* return value (logging flags) */ |
| int state = 0;/* state bits, accessed thru macros */ |
| - struct xfs_mount *mp = tp->t_mountp; |
| + struct xfs_mount *mp = ip->i_mount; |
| |
| *logflagsp = 0; |
| |
| cur = *curp; |
| - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); |
| + ifp = XFS_IFORK_PTR(ip, whichfork); |
| + if (whichfork == XFS_COW_FORK) |
| + state |= BMAP_COWFORK; |
| |
| ASSERT(*idx >= 0); |
| ASSERT(*idx <= xfs_iext_count(ifp)); |
| @@ -2271,7 +2275,7 @@ xfs_bmap_add_extent_unwritten_real( |
| * Don't set contiguous if the combined extent would be too large. |
| * Also check for all-three-contiguous being too large. |
| */ |
| - if (*idx < xfs_iext_count(&ip->i_df) - 1) { |
| + if (*idx < xfs_iext_count(ifp) - 1) { |
| state |= BMAP_RIGHT_VALID; |
| xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); |
| if (isnullstartblock(RIGHT.br_startblock)) |
| @@ -2311,7 +2315,8 @@ xfs_bmap_add_extent_unwritten_real( |
| trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
| |
| xfs_iext_remove(ip, *idx + 1, 2, state); |
| - ip->i_d.di_nextents -= 2; |
| + XFS_IFORK_NEXT_SET(ip, whichfork, |
| + XFS_IFORK_NEXTENTS(ip, whichfork) - 2); |
| if (cur == NULL) |
| rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
| else { |
| @@ -2354,7 +2359,8 @@ xfs_bmap_add_extent_unwritten_real( |
| trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
| |
| xfs_iext_remove(ip, *idx + 1, 1, state); |
| - ip->i_d.di_nextents--; |
| + XFS_IFORK_NEXT_SET(ip, whichfork, |
| + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); |
| if (cur == NULL) |
| rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
| else { |
| @@ -2389,7 +2395,8 @@ xfs_bmap_add_extent_unwritten_real( |
| xfs_bmbt_set_state(ep, newext); |
| trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
| xfs_iext_remove(ip, *idx + 1, 1, state); |
| - ip->i_d.di_nextents--; |
| + XFS_IFORK_NEXT_SET(ip, whichfork, |
| + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); |
| if (cur == NULL) |
| rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
| else { |
| @@ -2501,7 +2508,8 @@ xfs_bmap_add_extent_unwritten_real( |
| trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
| |
| xfs_iext_insert(ip, *idx, 1, new, state); |
| - ip->i_d.di_nextents++; |
| + XFS_IFORK_NEXT_SET(ip, whichfork, |
| + XFS_IFORK_NEXTENTS(ip, whichfork) + 1); |
| if (cur == NULL) |
| rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
| else { |
| @@ -2579,7 +2587,8 @@ xfs_bmap_add_extent_unwritten_real( |
| ++*idx; |
| xfs_iext_insert(ip, *idx, 1, new, state); |
| |
| - ip->i_d.di_nextents++; |
| + XFS_IFORK_NEXT_SET(ip, whichfork, |
| + XFS_IFORK_NEXTENTS(ip, whichfork) + 1); |
| if (cur == NULL) |
| rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
| else { |
| @@ -2627,7 +2636,8 @@ xfs_bmap_add_extent_unwritten_real( |
| ++*idx; |
| xfs_iext_insert(ip, *idx, 2, &r[0], state); |
| |
| - ip->i_d.di_nextents += 2; |
| + XFS_IFORK_NEXT_SET(ip, whichfork, |
| + XFS_IFORK_NEXTENTS(ip, whichfork) + 2); |
| if (cur == NULL) |
| rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
| else { |
| @@ -2681,17 +2691,17 @@ xfs_bmap_add_extent_unwritten_real( |
| } |
| |
| /* update reverse mappings */ |
| - error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new); |
| + error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new); |
| if (error) |
| goto done; |
| |
| /* convert to a btree if necessary */ |
| - if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) { |
| + if (xfs_bmap_needs_btree(ip, whichfork)) { |
| int tmp_logflags; /* partial log flag return val */ |
| |
| ASSERT(cur == NULL); |
| error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur, |
| - 0, &tmp_logflags, XFS_DATA_FORK); |
| + 0, &tmp_logflags, whichfork); |
| *logflagsp |= tmp_logflags; |
| if (error) |
| goto done; |
| @@ -2703,7 +2713,7 @@ xfs_bmap_add_extent_unwritten_real( |
| *curp = cur; |
| } |
| |
| - xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK); |
| + xfs_bmap_check_leaf_extents(*curp, ip, whichfork); |
| done: |
| *logflagsp |= rval; |
| return error; |
| @@ -4354,10 +4364,16 @@ xfs_bmapi_allocate( |
| bma->got.br_state = XFS_EXT_NORM; |
| |
| /* |
| - * A wasdelay extent has been initialized, so shouldn't be flagged |
| - * as unwritten. |
| + * In the data fork, a wasdelay extent has been initialized, so |
| + * shouldn't be flagged as unwritten. |
| + * |
| + * For the cow fork, however, we convert delalloc reservations |
| + * (extents allocated for speculative preallocation) to |
| + * allocated unwritten extents, and only convert the unwritten |
| + * extents to real extents when we're about to write the data. |
| */ |
| - if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) && |
| + if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) && |
| + (bma->flags & XFS_BMAPI_PREALLOC) && |
| xfs_sb_version_hasextflgbit(&mp->m_sb)) |
| bma->got.br_state = XFS_EXT_UNWRITTEN; |
| |
| @@ -4408,8 +4424,6 @@ xfs_bmapi_convert_unwritten( |
| (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) |
| return 0; |
| |
| - ASSERT(whichfork != XFS_COW_FORK); |
| - |
| /* |
| * Modify (by adding) the state flag, if writing. |
| */ |
| @@ -4434,8 +4448,8 @@ xfs_bmapi_convert_unwritten( |
| return error; |
| } |
| |
| - error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, |
| - &bma->cur, mval, bma->firstblock, bma->dfops, |
| + error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork, |
| + &bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops, |
| &tmp_logflags); |
| /* |
| * Log the inode core unconditionally in the unwritten extent conversion |
| @@ -4444,8 +4458,12 @@ xfs_bmapi_convert_unwritten( |
| * in the transaction for the sake of fsync(), even if nothing has |
| * changed, because fsync() will not force the log for this transaction |
| * unless it sees the inode pinned. |
| + * |
| + * Note: If we're only converting cow fork extents, there aren't |
| + * any on-disk updates to make, so we don't need to log anything. |
| */ |
| - bma->logflags |= tmp_logflags | XFS_ILOG_CORE; |
| + if (whichfork != XFS_COW_FORK) |
| + bma->logflags |= tmp_logflags | XFS_ILOG_CORE; |
| if (error) |
| return error; |
| |
| @@ -4519,15 +4537,15 @@ xfs_bmapi_write( |
| ASSERT(*nmap >= 1); |
| ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); |
| ASSERT(!(flags & XFS_BMAPI_IGSTATE)); |
| - ASSERT(tp != NULL); |
| + ASSERT(tp != NULL || |
| + (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) == |
| + (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)); |
| ASSERT(len > 0); |
| ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); |
| ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
| ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK); |
| ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP)); |
| ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP)); |
| - ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK); |
| - ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK); |
| |
| /* zeroing is for currently only for data extents, not metadata */ |
| ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != |
| @@ -5542,8 +5560,8 @@ __xfs_bunmapi( |
| } |
| del.br_state = XFS_EXT_UNWRITTEN; |
| error = xfs_bmap_add_extent_unwritten_real(tp, ip, |
| - &lastx, &cur, &del, firstblock, dfops, |
| - &logflags); |
| + whichfork, &lastx, &cur, &del, |
| + firstblock, dfops, &logflags); |
| if (error) |
| goto error0; |
| goto nodelete; |
| @@ -5596,8 +5614,9 @@ __xfs_bunmapi( |
| prev.br_state = XFS_EXT_UNWRITTEN; |
| lastx--; |
| error = xfs_bmap_add_extent_unwritten_real(tp, |
| - ip, &lastx, &cur, &prev, |
| - firstblock, dfops, &logflags); |
| + ip, whichfork, &lastx, &cur, |
| + &prev, firstblock, dfops, |
| + &logflags); |
| if (error) |
| goto error0; |
| goto nodelete; |
| @@ -5605,8 +5624,9 @@ __xfs_bunmapi( |
| ASSERT(del.br_state == XFS_EXT_NORM); |
| del.br_state = XFS_EXT_UNWRITTEN; |
| error = xfs_bmap_add_extent_unwritten_real(tp, |
| - ip, &lastx, &cur, &del, |
| - firstblock, dfops, &logflags); |
| + ip, whichfork, &lastx, &cur, |
| + &del, firstblock, dfops, |
| + &logflags); |
| if (error) |
| goto error0; |
| goto nodelete; |
| -- |
| 2.12.0 |
| |