libxfs/logitem.c - pub/scm/linux/kernel/git/djwong/xfsprogs-dev - Git at Google

 // SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
  * All Rights Reserved.
  */

 #include "libxfs_priv.h"
 #include "xfs_fs.h"
 #include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
 #include "xfs_inode_buf.h"
 #include "xfs_inode_fork.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
 #include "xfs_rtbitmap.h"

 struct kmem_cache	*xfs_buf_item_cache;
 struct kmem_cache	*xfs_ili_cache;		/* inode log item cache */

 /*
  * Following functions from fs/xfs/xfs_trans_buf.c
  */

 /*
  * Check to see if a buffer matching the given parameters is already
  * a part of the given transaction.
  */
 struct xfs_buf *
 xfs_trans_buf_item_match(
 	xfs_trans_t		*tp,
 	struct xfs_buftarg	*btp,
 	struct xfs_buf_map	*map,
 	int			nmaps)
 {
 	struct xfs_log_item	*lip;
 	struct xfs_buf_log_item *blip;
 	int			len = 0;
 	int			i;

 	for (i = 0; i < nmaps; i++)
 		len += map[i].bm_len;

 	list_for_each_entry(lip, &tp->t_items, li_trans) {
 		blip = (struct xfs_buf_log_item *)lip;
 		if (blip->bli_item.li_type == XFS_LI_BUF &&
 		    blip->bli_buf->b_target == btp &&
 		    xfs_buf_daddr(blip->bli_buf) == map[0].bm_bn &&
 		    blip->bli_buf->b_length == len) {
 			ASSERT(blip->bli_buf->b_map_count == nmaps);
 			return blip->bli_buf;
 		}
 	}

 	return NULL;
 }
 /*
  * The following are from fs/xfs/xfs_buf_item.c
  */

 static const struct xfs_item_ops xfs_buf_item_ops = {
 };

 /*
  * Allocate a new buf log item to go with the given buffer.
  * Set the buffer's b_log_item field to point to the new
  * buf log item.  If there are other item's attached to the
  * buffer (see xfs_buf_attach_iodone() below), then put the
  * buf log item at the front.
  */
 void
 xfs_buf_item_init(
 	struct xfs_buf		*bp,
 	xfs_mount_t		*mp)
 {
 	xfs_log_item_t		*lip;
 	xfs_buf_log_item_t	*bip;

 #ifdef LI_DEBUG
 	fprintf(stderr, "buf_item_init for buffer %p\n", bp);
 #endif

 	/*
 	 * Check to see if there is already a buf log item for
 	 * this buffer.	 If there is, it is guaranteed to be
 	 * the first.  If we do already have one, there is
 	 * nothing to do here so return.
 	 */
 	if (bp->b_log_item != NULL) {
 		lip = bp->b_log_item;
 		if (lip->li_type == XFS_LI_BUF) {
 #ifdef LI_DEBUG
 			fprintf(stderr,
 				"reused buf item %p for pre-logged buffer %p\n",
 				lip, bp);
 #endif
 			return;
 		}
 	}

 	bip = kmem_cache_zalloc(xfs_buf_item_cache, 0);
 #ifdef LI_DEBUG
 	fprintf(stderr, "adding buf item %p for not-logged buffer %p\n",
 		bip, bp);
 #endif
 	xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
 	bip->bli_buf = bp;
 	bip->__bli_format.blf_type = XFS_LI_BUF;
 	bip->__bli_format.blf_blkno = (int64_t)xfs_buf_daddr(bp);
 	bip->__bli_format.blf_len = (unsigned short)bp->b_length;
 	bp->b_log_item = bip;
 }


 /*
  * Mark bytes first through last inclusive as dirty in the buf
  * item's bitmap.
  */
 void
 xfs_buf_item_log(
 	xfs_buf_log_item_t	*bip,
 	uint			first,
 	uint			last)
 {
 	/*
 	 * Mark the item as having some dirty data for
 	 * quick reference in xfs_buf_item_dirty.
 	 */
 	bip->bli_flags |= XFS_BLI_DIRTY;
 }

 static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
 {
 	return container_of(lip, struct xfs_inode_log_item, ili_item);
 }

 static uint64_t
 xfs_inode_item_sort(
 	struct xfs_log_item	*lip)
 {
 	return INODE_ITEM(lip)->ili_inode->i_ino;
 }

 /*
  * Prior to finally logging the inode, we have to ensure that all the
  * per-modification inode state changes are applied. This includes VFS inode
  * state updates, format conversions, verifier state synchronisation and
  * ensuring the inode buffer remains in memory whilst the inode is dirty.
  *
  * We have to be careful when we grab the inode cluster buffer due to lock
  * ordering constraints. The unlinked inode modifications (xfs_iunlink_item)
  * require AGI -> inode cluster buffer lock order. The inode cluster buffer is
  * not locked until ->precommit, so it happens after everything else has been
  * modified.
  *
  * Further, we have AGI -> AGF lock ordering, and with O_TMPFILE handling we
  * have AGI -> AGF -> iunlink item -> inode cluster buffer lock order. Hence we
  * cannot safely lock the inode cluster buffer in xfs_trans_log_inode() because
  * it can be called on a inode (e.g. via bumplink/droplink) before we take the
  * AGF lock modifying directory blocks.
  *
  * Rather than force a complete rework of all the transactions to call
  * xfs_trans_log_inode() once and once only at the end of every transaction, we
  * move the pinning of the inode cluster buffer to a ->precommit operation. This
  * matches how the xfs_iunlink_item locks the inode cluster buffer, and it
  * ensures that the inode cluster buffer locking is always done last in a
  * transaction. i.e. we ensure the lock order is always AGI -> AGF -> inode
  * cluster buffer.
  *
  * If we return the inode number as the precommit sort key then we'll also
  * guarantee that the order all inode cluster buffer locking is the same all the
  * inodes and unlink items in the transaction.
  */
 static int
 xfs_inode_item_precommit(
 	struct xfs_trans	*tp,
 	struct xfs_log_item	*lip)
 {
 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
 	struct xfs_inode	*ip = iip->ili_inode;
 	struct inode		*inode = VFS_I(ip);
 	unsigned int		flags = iip->ili_dirty_flags;

 	/*
 	 * Don't bother with i_lock for the I_DIRTY_TIME check here, as races
 	 * don't matter - we either will need an extra transaction in 24 hours
 	 * to log the timestamps, or will clear already cleared fields in the
 	 * worst case.
 	 */
 	if (inode->i_state & I_DIRTY_TIME) {
 		spin_lock(&inode->i_lock);
 		inode->i_state &= ~I_DIRTY_TIME;
 		spin_unlock(&inode->i_lock);
 	}

 	/*
 	 * If we're updating the inode core or the timestamps and it's possible
 	 * to upgrade this inode to bigtime format, do so now.
 	 */
 	if ((flags & (XFS_ILOG_CORE | XFS_ILOG_TIMESTAMP)) &&
 	    xfs_has_bigtime(ip->i_mount) &&
 	    !xfs_inode_has_bigtime(ip)) {
 		ip->i_diflags2 |= XFS_DIFLAG2_BIGTIME;
 		flags |= XFS_ILOG_CORE;
 	}

 	/*
 	 * Inode verifiers do not check that the extent size hint is an integer
 	 * multiple of the rt extent size on a directory with both rtinherit
 	 * and extszinherit flags set.  If we're logging a directory that is
 	 * misconfigured in this way, clear the hint.
 	 */
 	if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
 	    (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
 	    xfs_extlen_to_rtxmod(ip->i_mount, ip->i_extsize) > 0) {
 		ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
 				   XFS_DIFLAG_EXTSZINHERIT);
 		ip->i_extsize = 0;
 		flags |= XFS_ILOG_CORE;
 	}

 	/*
 	 * Record the specific change for fdatasync optimisation. This allows
 	 * fdatasync to skip log forces for inodes that are only timestamp
 	 * dirty. Once we've processed the XFS_ILOG_IVERSION flag, convert it
 	 * to XFS_ILOG_CORE so that the actual on-disk dirty tracking
 	 * (ili_fields) correctly tracks that the version has changed.
 	 */
 	spin_lock(&iip->ili_lock);
 	iip->ili_fsync_fields |= (flags & ~XFS_ILOG_IVERSION);
 	if (flags & XFS_ILOG_IVERSION)
 		flags = ((flags & ~XFS_ILOG_IVERSION) | XFS_ILOG_CORE);

 	if (!iip->ili_item.li_buf) {
 		struct xfs_buf	*bp;
 		int		error;

 		/*
 		 * We hold the ILOCK here, so this inode is not going to be
 		 * flushed while we are here. Further, because there is no
 		 * buffer attached to the item, we know that there is no IO in
 		 * progress, so nothing will clear the ili_fields while we read
 		 * in the buffer. Hence we can safely drop the spin lock and
 		 * read the buffer knowing that the state will not change from
 		 * here.
 		 */
 		spin_unlock(&iip->ili_lock);
 		error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &bp);
 		if (error)
 			return error;

 		/*
 		 * We need an explicit buffer reference for the log item but
 		 * don't want the buffer to remain attached to the transaction.
 		 * Hold the buffer but release the transaction reference once
 		 * we've attached the inode log item to the buffer log item
 		 * list.
 		 */
 		xfs_buf_hold(bp);
 		spin_lock(&iip->ili_lock);
 		iip->ili_item.li_buf = bp;
 		bp->b_flags |= _XBF_INODES;
 		list_add_tail(&iip->ili_item.li_bio_list, &bp->b_li_list);
 		xfs_trans_brelse(tp, bp);
 	}

 	/*
 	 * Always OR in the bits from the ili_last_fields field.  This is to
 	 * coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines
 	 * in the eventual clearing of the ili_fields bits.  See the big comment
 	 * in xfs_iflush() for an explanation of this coordination mechanism.
 	 */
 	iip->ili_fields |= (flags | iip->ili_last_fields);
 	spin_unlock(&iip->ili_lock);

 	/*
 	 * We are done with the log item transaction dirty state, so clear it so
 	 * that it doesn't pollute future transactions.
 	 */
 	iip->ili_dirty_flags = 0;
 	return 0;
 }

 static const struct xfs_item_ops xfs_inode_item_ops = {
 	.iop_sort	= xfs_inode_item_sort,
 	.iop_precommit	= xfs_inode_item_precommit,
 };

 /*
  * Initialize the inode log item for a newly allocated (in-core) inode.
  */
 void
 xfs_inode_item_init(
 	xfs_inode_t			*ip,
 	xfs_mount_t			*mp)
 {
 	struct xfs_inode_log_item	*iip;

 	ASSERT(ip->i_itemp == NULL);
 	iip = ip->i_itemp = kmem_cache_zalloc(xfs_ili_cache, 0);
 #ifdef LI_DEBUG
 	fprintf(stderr, "inode_item_init for inode %llu, iip=%p\n",
 		ip->i_ino, iip);
 #endif

 	spin_lock_init(&iip->ili_lock);

         xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
 						&xfs_inode_item_ops);
 	iip->ili_inode = ip;
 }
	// SPDX-License-Identifier: GPL-2.0
	/*
	* Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
	* All Rights Reserved.
	*/

	#include "libxfs_priv.h"
	#include "xfs_fs.h"
	#include "xfs_shared.h"
	#include "xfs_format.h"
	#include "xfs_log_format.h"
	#include "xfs_trans_resv.h"
	#include "xfs_mount.h"
	#include "xfs_inode_buf.h"
	#include "xfs_inode_fork.h"
	#include "xfs_inode.h"
	#include "xfs_trans.h"
	#include "xfs_rtbitmap.h"

	struct kmem_cache *xfs_buf_item_cache;
	struct kmem_cache xfs_ili_cache; / inode log item cache */

	/*
	* Following functions from fs/xfs/xfs_trans_buf.c
	*/

	/*
	* Check to see if a buffer matching the given parameters is already
	* a part of the given transaction.
	*/
	struct xfs_buf *
	xfs_trans_buf_item_match(
	xfs_trans_t *tp,
	struct xfs_buftarg *btp,
	struct xfs_buf_map *map,
	int nmaps)
	{
	struct xfs_log_item *lip;
	struct xfs_buf_log_item *blip;
	int len = 0;
	int i;

	for (i = 0; i < nmaps; i++)
	len += map[i].bm_len;

	list_for_each_entry(lip, &tp->t_items, li_trans) {
	blip = (struct xfs_buf_log_item *)lip;
	if (blip->bli_item.li_type == XFS_LI_BUF &&
	blip->bli_buf->b_target == btp &&
	xfs_buf_daddr(blip->bli_buf) == map[0].bm_bn &&
	blip->bli_buf->b_length == len) {
	ASSERT(blip->bli_buf->b_map_count == nmaps);
	return blip->bli_buf;
	}
	}

	return NULL;
	}
	/*
	* The following are from fs/xfs/xfs_buf_item.c
	*/

	static const struct xfs_item_ops xfs_buf_item_ops = {
	};

	/*
	* Allocate a new buf log item to go with the given buffer.
	* Set the buffer's b_log_item field to point to the new
	* buf log item. If there are other item's attached to the
	* buffer (see xfs_buf_attach_iodone() below), then put the
	* buf log item at the front.
	*/
	void
	xfs_buf_item_init(
	struct xfs_buf *bp,
	xfs_mount_t *mp)
	{
	xfs_log_item_t *lip;
	xfs_buf_log_item_t *bip;

	#ifdef LI_DEBUG
	fprintf(stderr, "buf_item_init for buffer %p\n", bp);
	#endif

	/*
	* Check to see if there is already a buf log item for
	* this buffer. If there is, it is guaranteed to be
	* the first. If we do already have one, there is
	* nothing to do here so return.
	*/
	if (bp->b_log_item != NULL) {
	lip = bp->b_log_item;
	if (lip->li_type == XFS_LI_BUF) {
	#ifdef LI_DEBUG
	fprintf(stderr,
	"reused buf item %p for pre-logged buffer %p\n",
	lip, bp);
	#endif
	return;
	}
	}

	bip = kmem_cache_zalloc(xfs_buf_item_cache, 0);
	#ifdef LI_DEBUG
	fprintf(stderr, "adding buf item %p for not-logged buffer %p\n",
	bip, bp);
	#endif
	xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
	bip->bli_buf = bp;
	bip->__bli_format.blf_type = XFS_LI_BUF;
	bip->__bli_format.blf_blkno = (int64_t)xfs_buf_daddr(bp);
	bip->__bli_format.blf_len = (unsigned short)bp->b_length;
	bp->b_log_item = bip;
	}


	/*
	* Mark bytes first through last inclusive as dirty in the buf
	* item's bitmap.
	*/
	void
	xfs_buf_item_log(
	xfs_buf_log_item_t *bip,
	uint first,
	uint last)
	{
	/*
	* Mark the item as having some dirty data for
	* quick reference in xfs_buf_item_dirty.
	*/
	bip->bli_flags \|= XFS_BLI_DIRTY;
	}

	static inline struct xfs_inode_log_item INODE_ITEM(struct xfs_log_item lip)
	{
	return container_of(lip, struct xfs_inode_log_item, ili_item);
	}

	static uint64_t
	xfs_inode_item_sort(
	struct xfs_log_item *lip)
	{
	return INODE_ITEM(lip)->ili_inode->i_ino;
	}

	/*
	* Prior to finally logging the inode, we have to ensure that all the
	* per-modification inode state changes are applied. This includes VFS inode
	* state updates, format conversions, verifier state synchronisation and
	* ensuring the inode buffer remains in memory whilst the inode is dirty.
	*
	* We have to be careful when we grab the inode cluster buffer due to lock
	* ordering constraints. The unlinked inode modifications (xfs_iunlink_item)
	* require AGI -> inode cluster buffer lock order. The inode cluster buffer is
	* not locked until ->precommit, so it happens after everything else has been
	* modified.
	*
	* Further, we have AGI -> AGF lock ordering, and with O_TMPFILE handling we
	* have AGI -> AGF -> iunlink item -> inode cluster buffer lock order. Hence we
	* cannot safely lock the inode cluster buffer in xfs_trans_log_inode() because
	* it can be called on a inode (e.g. via bumplink/droplink) before we take the
	* AGF lock modifying directory blocks.
	*
	* Rather than force a complete rework of all the transactions to call
	* xfs_trans_log_inode() once and once only at the end of every transaction, we
	* move the pinning of the inode cluster buffer to a ->precommit operation. This
	* matches how the xfs_iunlink_item locks the inode cluster buffer, and it
	* ensures that the inode cluster buffer locking is always done last in a
	* transaction. i.e. we ensure the lock order is always AGI -> AGF -> inode
	* cluster buffer.
	*
	* If we return the inode number as the precommit sort key then we'll also
	* guarantee that the order all inode cluster buffer locking is the same all the
	* inodes and unlink items in the transaction.
	*/
	static int
	xfs_inode_item_precommit(
	struct xfs_trans *tp,
	struct xfs_log_item *lip)
	{
	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
	struct xfs_inode *ip = iip->ili_inode;
	struct inode *inode = VFS_I(ip);
	unsigned int flags = iip->ili_dirty_flags;

	/*
	* Don't bother with i_lock for the I_DIRTY_TIME check here, as races
	* don't matter - we either will need an extra transaction in 24 hours
	* to log the timestamps, or will clear already cleared fields in the
	* worst case.
	*/
	if (inode->i_state & I_DIRTY_TIME) {
	spin_lock(&inode->i_lock);
	inode->i_state &= ~I_DIRTY_TIME;
	spin_unlock(&inode->i_lock);
	}

	/*
	* If we're updating the inode core or the timestamps and it's possible
	* to upgrade this inode to bigtime format, do so now.
	*/
	if ((flags & (XFS_ILOG_CORE \| XFS_ILOG_TIMESTAMP)) &&
	xfs_has_bigtime(ip->i_mount) &&
	!xfs_inode_has_bigtime(ip)) {
	ip->i_diflags2 \|= XFS_DIFLAG2_BIGTIME;
	flags \|= XFS_ILOG_CORE;
	}

	/*
	* Inode verifiers do not check that the extent size hint is an integer
	* multiple of the rt extent size on a directory with both rtinherit
	* and extszinherit flags set. If we're logging a directory that is
	* misconfigured in this way, clear the hint.
	*/
	if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
	(ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
	xfs_extlen_to_rtxmod(ip->i_mount, ip->i_extsize) > 0) {
	ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE \|
	XFS_DIFLAG_EXTSZINHERIT);
	ip->i_extsize = 0;
	flags \|= XFS_ILOG_CORE;
	}

	/*
	* Record the specific change for fdatasync optimisation. This allows
	* fdatasync to skip log forces for inodes that are only timestamp
	* dirty. Once we've processed the XFS_ILOG_IVERSION flag, convert it
	* to XFS_ILOG_CORE so that the actual on-disk dirty tracking
	* (ili_fields) correctly tracks that the version has changed.
	*/
	spin_lock(&iip->ili_lock);
	iip->ili_fsync_fields \|= (flags & ~XFS_ILOG_IVERSION);
	if (flags & XFS_ILOG_IVERSION)
	flags = ((flags & ~XFS_ILOG_IVERSION) \| XFS_ILOG_CORE);

	if (!iip->ili_item.li_buf) {
	struct xfs_buf *bp;
	int error;

	/*
	* We hold the ILOCK here, so this inode is not going to be
	* flushed while we are here. Further, because there is no
	* buffer attached to the item, we know that there is no IO in
	* progress, so nothing will clear the ili_fields while we read
	* in the buffer. Hence we can safely drop the spin lock and
	* read the buffer knowing that the state will not change from
	* here.
	*/
	spin_unlock(&iip->ili_lock);
	error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &bp);
	if (error)
	return error;

	/*
	* We need an explicit buffer reference for the log item but
	* don't want the buffer to remain attached to the transaction.
	* Hold the buffer but release the transaction reference once
	* we've attached the inode log item to the buffer log item
	* list.
	*/
	xfs_buf_hold(bp);
	spin_lock(&iip->ili_lock);
	iip->ili_item.li_buf = bp;
	bp->b_flags \|= _XBF_INODES;
	list_add_tail(&iip->ili_item.li_bio_list, &bp->b_li_list);
	xfs_trans_brelse(tp, bp);
	}

	/*
	* Always OR in the bits from the ili_last_fields field. This is to
	* coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines
	* in the eventual clearing of the ili_fields bits. See the big comment
	* in xfs_iflush() for an explanation of this coordination mechanism.
	*/
	iip->ili_fields \|= (flags \| iip->ili_last_fields);
	spin_unlock(&iip->ili_lock);

	/*
	* We are done with the log item transaction dirty state, so clear it so
	* that it doesn't pollute future transactions.
	*/
	iip->ili_dirty_flags = 0;
	return 0;
	}

	static const struct xfs_item_ops xfs_inode_item_ops = {
	.iop_sort = xfs_inode_item_sort,
	.iop_precommit = xfs_inode_item_precommit,
	};

	/*
	* Initialize the inode log item for a newly allocated (in-core) inode.
	*/
	void
	xfs_inode_item_init(
	xfs_inode_t *ip,
	xfs_mount_t *mp)
	{
	struct xfs_inode_log_item *iip;

	ASSERT(ip->i_itemp == NULL);
	iip = ip->i_itemp = kmem_cache_zalloc(xfs_ili_cache, 0);
	#ifdef LI_DEBUG
	fprintf(stderr, "inode_item_init for inode %llu, iip=%p\n",
	ip->i_ino, iip);
	#endif

	spin_lock_init(&iip->ili_lock);

	xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
	&xfs_inode_item_ops);
	iip->ili_inode = ip;
	}