libxfs/inode.c - pub/scm/linux/kernel/git/djwong/xfsprogs-dev - Git at Google

 // SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
  * All Rights Reserved.
  */

 #include "libxfs_priv.h"
 #include "libxfs.h"
 #include "libxfs_io.h"
 #include "init.h"
 #include "xfs_fs.h"
 #include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
 #include "xfs_inode_buf.h"
 #include "xfs_inode_fork.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
 #include "xfs_bmap.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_trans_space.h"
 #include "xfs_ialloc.h"
 #include "xfs_alloc.h"
 #include "xfs_bit.h"
 #include "xfs_da_format.h"
 #include "xfs_da_btree.h"
 #include "xfs_dir2_priv.h"

 /*
  * Initialise a newly allocated inode and return the in-core inode to the
  * caller locked exclusively.
  */
 int
 libxfs_icreate(
 	struct xfs_trans	*tp,
 	xfs_ino_t		ino,
 	const struct xfs_icreate_args *args,
 	struct xfs_inode	**ipp)
 {
 	struct xfs_mount	*mp = tp->t_mountp;
 	struct xfs_inode	*ip = NULL;
 	int			error;

 	error = libxfs_iget(mp, tp, ino, XFS_IGET_CREATE, &ip);
 	if (error)
 		return error;

 	ASSERT(ip != NULL);
 	xfs_trans_ijoin(tp, ip, 0);
 	xfs_inode_init(tp, args, ip);

 	*ipp = ip;
 	return 0;
 }

 /* Set up inode attributes for newly created internal files. */
 void
 libxfs_icreate_args_rootfile(
 	struct xfs_icreate_args	*args,
 	struct xfs_mount	*mp,
 	umode_t			mode,
 	bool			init_xattrs)
 {
 	args->idmap = NULL;
 	args->uid = make_kuid(0);
 	args->gid = make_kgid(0);
 	args->prid = 0;
 	args->mode = mode;
 	args->flags = XFS_ICREATE_ARGS_FORCE_UID |
 		      XFS_ICREATE_ARGS_FORCE_GID |
 		      XFS_ICREATE_ARGS_FORCE_MODE;
 	if (init_xattrs)
 		args->flags |= XFS_ICREATE_ARGS_INIT_XATTRS;
 }

 /*
  * Writes a modified inode's changes out to the inode's on disk home.
  * Originally based on xfs_iflush_int() from xfs_inode.c in the kernel.
  */
 int
 libxfs_iflush_int(
 	struct xfs_inode		*ip,
 	struct xfs_buf			*bp)
 {
 	struct xfs_inode_log_item	*iip;
 	struct xfs_dinode		*dip;
 	struct xfs_mount		*mp;

 	ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE ||
 		ip->i_df.if_nextents > ip->i_df.if_ext_max);

 	iip = ip->i_itemp;
 	mp = ip->i_mount;

 	/* set *dip = inode's place in the buffer */
 	dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);

 	if (XFS_ISREG(ip)) {
 		ASSERT( (ip->i_df.if_format == XFS_DINODE_FMT_EXTENTS) ||
 			(ip->i_df.if_format == XFS_DINODE_FMT_BTREE) );
 	} else if (XFS_ISDIR(ip)) {
 		ASSERT( (ip->i_df.if_format == XFS_DINODE_FMT_EXTENTS) ||
 			(ip->i_df.if_format == XFS_DINODE_FMT_BTREE)   ||
 			(ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) );
 	}
 	ASSERT(ip->i_df.if_nextents+ip.i_af->if_nextents <= ip->i_nblocks);
 	ASSERT(ip->i_forkoff <= mp->m_sb.sb_inodesize);

 	/* bump the change count on v3 inodes */
 	if (xfs_has_v3inodes(mp))
 		VFS_I(ip)->i_version++;

 	/*
 	 * If there are inline format data / attr forks attached to this inode,
 	 * make sure they are not corrupt.
 	 */
 	if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL &&
 	    xfs_ifork_verify_local_data(ip))
 		return -EFSCORRUPTED;
 	if (xfs_inode_has_attr_fork(ip) &&
 	    ip->i_af.if_format == XFS_DINODE_FMT_LOCAL &&
 	    xfs_ifork_verify_local_attr(ip))
 		return -EFSCORRUPTED;

 	/*
 	 * Copy the dirty parts of the inode into the on-disk
 	 * inode.  We always copy out the core of the inode,
 	 * because if the inode is dirty at all the core must
 	 * be.
 	 */
 	xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn);

 	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
 	if (xfs_inode_has_attr_fork(ip))
 		xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);

 	/* generate the checksum. */
 	xfs_dinode_calc_crc(mp, dip);

 	return 0;
 }

 /*
  * Inode cache stubs.
  */

 struct kmem_cache		*xfs_inode_cache;
 extern struct kmem_cache	*xfs_ili_cache;

 int
 libxfs_iget(
 	struct xfs_mount	*mp,
 	struct xfs_trans	*tp,
 	xfs_ino_t		ino,
 	uint			flags,
 	struct xfs_inode	**ipp)
 {
 	struct xfs_inode	*ip;
 	struct xfs_perag	*pag;
 	int			error = 0;

 	/* reject inode numbers outside existing AGs */
 	if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
 		return -EINVAL;

 	ip = kmem_cache_zalloc(xfs_inode_cache, 0);
 	if (!ip)
 		return -ENOMEM;

 	VFS_I(ip)->i_count = 1;
 	ip->i_ino = ino;
 	ip->i_mount = mp;
 	ip->i_diflags2 = mp->m_ino_geo.new_diflags2;
 	ip->i_af.if_format = XFS_DINODE_FMT_EXTENTS;
 	ip->i_next_unlinked = NULLAGINO;
 	ip->i_prev_unlinked = NULLAGINO;
 	spin_lock_init(&VFS_I(ip)->i_lock);

 	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
 	error = xfs_imap(pag, tp, ip->i_ino, &ip->i_imap, 0);
 	xfs_perag_put(pag);

 	if (error)
 		goto out_destroy;

 	/*
 	 * For version 5 superblocks, if we are initialising a new inode and we
 	 * are not utilising the XFS_MOUNT_IKEEP inode cluster mode, we can
 	 * simply build the new inode core with a random generation number.
 	 *
 	 * For version 4 (and older) superblocks, log recovery is dependent on
 	 * the di_flushiter field being initialised from the current on-disk
 	 * value and hence we must also read the inode off disk even when
 	 * initializing new inodes.
 	 */
 	if (xfs_has_v3inodes(mp) &&
 	    (flags & XFS_IGET_CREATE) && !xfs_has_ikeep(mp)) {
 		VFS_I(ip)->i_generation = get_random_u32();
 	} else {
 		struct xfs_buf		*bp;

 		error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp);
 		if (error)
 			goto out_destroy;

 		error = xfs_inode_from_disk(ip,
 				xfs_buf_offset(bp, ip->i_imap.im_boffset));
 		if (!error)
 			xfs_buf_set_ref(bp, XFS_INO_REF);
 		xfs_trans_brelse(tp, bp);

 		if (error)
 			goto out_destroy;
 	}

 	*ipp = ip;
 	return 0;

 out_destroy:
 	kmem_cache_free(xfs_inode_cache, ip);
 	*ipp = NULL;
 	return error;
 }

 /*
  * Get a metadata inode.  The ftype must match exactly.  Caller must supply
  * a transaction (even if empty) to avoid livelocking if the inobt has a cycle.
  */
 int
 libxfs_imeta_iget(
 	struct xfs_trans	*tp,
 	xfs_ino_t		ino,
 	unsigned char		ftype,
 	struct xfs_inode	**ipp)
 {
 	struct xfs_mount	*mp = tp->t_mountp;
 	struct xfs_inode	*ip;
 	int			error;

 	error = libxfs_iget(mp, tp, ino, XFS_IGET_UNTRUSTED, &ip);
 	if (error)
 		return error;

 	if ((xfs_has_metadir(mp) && !xfs_is_metadir_inode(ip)) ||
 	    ftype == XFS_DIR3_FT_UNKNOWN ||
 	    xfs_mode_to_ftype(VFS_I(ip)->i_mode) != ftype) {
 		libxfs_irele(ip);
 		return -EFSCORRUPTED;
 	}

 	*ipp = ip;
 	return 0;
 }

 static void
 libxfs_idestroy(
 	struct xfs_inode	*ip)
 {
 	switch (VFS_I(ip)->i_mode & S_IFMT) {
 		case S_IFREG:
 		case S_IFDIR:
 		case S_IFLNK:
 			libxfs_idestroy_fork(&ip->i_df);
 			break;
 	}

 	libxfs_ifork_zap_attr(ip);

 	if (ip->i_cowfp) {
 		libxfs_idestroy_fork(ip->i_cowfp);
 		kmem_cache_free(xfs_ifork_cache, ip->i_cowfp);
 	}
 }

 void
 libxfs_irele(
 	struct xfs_inode	*ip)
 {
 	VFS_I(ip)->i_count--;

 	if (VFS_I(ip)->i_count == 0) {
 		ASSERT(ip->i_itemp == NULL);
 		libxfs_idestroy(ip);
 		kmem_cache_free(xfs_inode_cache, ip);
 	}
 }

 void
 libxfs_imeta_irele(
 	struct xfs_inode	*ip)
 {
 	ASSERT(!xfs_has_metadir(ip->i_mount) || xfs_is_metadir_inode(ip));

 	libxfs_irele(ip);
 }

 static inline void inode_fsgid_set(struct inode *inode,
 				   struct mnt_idmap *idmap)
 {
 	inode->i_gid = make_kgid(0);
 }

 void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode,
 		      const struct inode *dir, umode_t mode)
 {
 	inode_fsuid_set(inode, idmap);
 	if (dir && dir->i_mode & S_ISGID) {
 		inode->i_gid = dir->i_gid;

 		/* Directories are special, and always inherit S_ISGID */
 		if (S_ISDIR(mode))
 			mode |= S_ISGID;
 	} else
 		inode_fsgid_set(inode, idmap);
 	inode->i_mode = mode;
 }

 /*
  * This call is used to indicate that the buffer is going to
  * be staled and was an inode buffer. This means it gets
  * special processing during unpin - where any inodes
  * associated with the buffer should be removed from ail.
  * There is also special processing during recovery,
  * any replay of the inodes in the buffer needs to be
  * prevented as the buffer may have been reused.
  */
 static void
 xfs_trans_stale_inode_buf(
 	xfs_trans_t		*tp,
 	struct xfs_buf		*bp)
 {
 	ASSERT(bp->b_transp == tp);
 	ASSERT(bip != NULL);
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);

 	bp->b_flags |= _XBF_INODES;
 	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
 }

 /*
  * A big issue when freeing the inode cluster is that we _cannot_ skip any
  * inodes that are in memory - they all must be marked stale and attached to
  * the cluster buffer.
  */
 int
 libxfs_ifree_cluster(
 	struct xfs_trans	*tp,
 	struct xfs_perag	*pag,
 	struct xfs_inode	*free_ip,
 	struct xfs_icluster	*xic)
 {
 	struct xfs_mount	*mp = free_ip->i_mount;
 	struct xfs_ino_geometry	*igeo = M_IGEO(mp);
 	struct xfs_buf		*bp;
 	xfs_daddr_t		blkno;
 	xfs_ino_t		inum = xic->first_ino;
 	int			nbufs;
 	int			j;
 	int			ioffset;
 	int			error;

 	nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster;

 	for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) {
 		/*
 		 * The allocation bitmap tells us which inodes of the chunk were
 		 * physically allocated. Skip the cluster if an inode falls into
 		 * a sparse region.
 		 */
 		ioffset = inum - xic->first_ino;
 		if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
 			ASSERT(ioffset % igeo->inodes_per_cluster == 0);
 			continue;
 		}

 		blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
 					 XFS_INO_TO_AGBNO(mp, inum));

 		/*
 		 * We obtain and lock the backing buffer first in the process
 		 * here to ensure dirty inodes attached to the buffer remain in
 		 * the flushing state while we mark them stale.
 		 *
 		 * If we scan the in-memory inodes first, then buffer IO can
 		 * complete before we get a lock on it, and hence we may fail
 		 * to mark all the active inodes on the buffer stale.
 		 */
 		error = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
 				mp->m_bsize * igeo->blocks_per_cluster,
 				XBF_UNMAPPED, &bp);
 		if (error)
 			return error;

 		/*
 		 * This buffer may not have been correctly initialised as we
 		 * didn't read it from disk. That's not important because we are
 		 * only using to mark the buffer as stale in the log, and to
 		 * attach stale cached inodes on it. That means it will never be
 		 * dispatched for IO. If it is, we want to know about it, and we
 		 * want it to fail. We can acheive this by adding a write
 		 * verifier to the buffer.
 		 */
 		bp->b_ops = &xfs_inode_buf_ops;

 		xfs_trans_stale_inode_buf(tp, bp);
 		xfs_trans_binval(tp, bp);
 	}
 	return 0;
 }
	// SPDX-License-Identifier: GPL-2.0
	/*
	* Copyright (c) 2000-2005 Silicon Graphics, Inc.
	* All Rights Reserved.
	*/

	#include "libxfs_priv.h"
	#include "libxfs.h"
	#include "libxfs_io.h"
	#include "init.h"
	#include "xfs_fs.h"
	#include "xfs_shared.h"
	#include "xfs_format.h"
	#include "xfs_log_format.h"
	#include "xfs_trans_resv.h"
	#include "xfs_mount.h"
	#include "xfs_defer.h"
	#include "xfs_inode_buf.h"
	#include "xfs_inode_fork.h"
	#include "xfs_inode.h"
	#include "xfs_trans.h"
	#include "xfs_bmap.h"
	#include "xfs_bmap_btree.h"
	#include "xfs_trans_space.h"
	#include "xfs_ialloc.h"
	#include "xfs_alloc.h"
	#include "xfs_bit.h"
	#include "xfs_da_format.h"
	#include "xfs_da_btree.h"
	#include "xfs_dir2_priv.h"

	/*
	* Initialise a newly allocated inode and return the in-core inode to the
	* caller locked exclusively.
	*/
	int
	libxfs_icreate(
	struct xfs_trans *tp,
	xfs_ino_t ino,
	const struct xfs_icreate_args *args,
	struct xfs_inode **ipp)
	{
	struct xfs_mount *mp = tp->t_mountp;
	struct xfs_inode *ip = NULL;
	int error;

	error = libxfs_iget(mp, tp, ino, XFS_IGET_CREATE, &ip);
	if (error)
	return error;

	ASSERT(ip != NULL);
	xfs_trans_ijoin(tp, ip, 0);
	xfs_inode_init(tp, args, ip);

	*ipp = ip;
	return 0;
	}

	/* Set up inode attributes for newly created internal files. */
	void
	libxfs_icreate_args_rootfile(
	struct xfs_icreate_args *args,
	struct xfs_mount *mp,
	umode_t mode,
	bool init_xattrs)
	{
	args->idmap = NULL;
	args->uid = make_kuid(0);
	args->gid = make_kgid(0);
	args->prid = 0;
	args->mode = mode;
	args->flags = XFS_ICREATE_ARGS_FORCE_UID \|
	XFS_ICREATE_ARGS_FORCE_GID \|
	XFS_ICREATE_ARGS_FORCE_MODE;
	if (init_xattrs)
	args->flags \|= XFS_ICREATE_ARGS_INIT_XATTRS;
	}

	/*
	* Writes a modified inode's changes out to the inode's on disk home.
	* Originally based on xfs_iflush_int() from xfs_inode.c in the kernel.
	*/
	int
	libxfs_iflush_int(
	struct xfs_inode *ip,
	struct xfs_buf *bp)
	{
	struct xfs_inode_log_item *iip;
	struct xfs_dinode *dip;
	struct xfs_mount *mp;

	ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE \|\|
	ip->i_df.if_nextents > ip->i_df.if_ext_max);

	iip = ip->i_itemp;
	mp = ip->i_mount;

	/* set dip = inode's place in the buffer /
	dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);

	if (XFS_ISREG(ip)) {
	ASSERT( (ip->i_df.if_format == XFS_DINODE_FMT_EXTENTS) \|\|
	(ip->i_df.if_format == XFS_DINODE_FMT_BTREE) );
	} else if (XFS_ISDIR(ip)) {
	ASSERT( (ip->i_df.if_format == XFS_DINODE_FMT_EXTENTS) \|\|
	(ip->i_df.if_format == XFS_DINODE_FMT_BTREE) \|\|
	(ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) );
	}
	ASSERT(ip->i_df.if_nextents+ip.i_af->if_nextents <= ip->i_nblocks);
	ASSERT(ip->i_forkoff <= mp->m_sb.sb_inodesize);

	/* bump the change count on v3 inodes */
	if (xfs_has_v3inodes(mp))
	VFS_I(ip)->i_version++;

	/*
	* If there are inline format data / attr forks attached to this inode,
	* make sure they are not corrupt.
	*/
	if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL &&
	xfs_ifork_verify_local_data(ip))
	return -EFSCORRUPTED;
	if (xfs_inode_has_attr_fork(ip) &&
	ip->i_af.if_format == XFS_DINODE_FMT_LOCAL &&
	xfs_ifork_verify_local_attr(ip))
	return -EFSCORRUPTED;

	/*
	* Copy the dirty parts of the inode into the on-disk
	* inode. We always copy out the core of the inode,
	* because if the inode is dirty at all the core must
	* be.
	*/
	xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn);

	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
	if (xfs_inode_has_attr_fork(ip))
	xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);

	/* generate the checksum. */
	xfs_dinode_calc_crc(mp, dip);

	return 0;
	}

	/*
	* Inode cache stubs.
	*/

	struct kmem_cache *xfs_inode_cache;
	extern struct kmem_cache *xfs_ili_cache;

	int
	libxfs_iget(
	struct xfs_mount *mp,
	struct xfs_trans *tp,
	xfs_ino_t ino,
	uint flags,
	struct xfs_inode **ipp)
	{
	struct xfs_inode *ip;
	struct xfs_perag *pag;
	int error = 0;

	/* reject inode numbers outside existing AGs */
	if (!ino \|\| XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
	return -EINVAL;

	ip = kmem_cache_zalloc(xfs_inode_cache, 0);
	if (!ip)
	return -ENOMEM;

	VFS_I(ip)->i_count = 1;
	ip->i_ino = ino;
	ip->i_mount = mp;
	ip->i_diflags2 = mp->m_ino_geo.new_diflags2;
	ip->i_af.if_format = XFS_DINODE_FMT_EXTENTS;
	ip->i_next_unlinked = NULLAGINO;
	ip->i_prev_unlinked = NULLAGINO;
	spin_lock_init(&VFS_I(ip)->i_lock);

	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
	error = xfs_imap(pag, tp, ip->i_ino, &ip->i_imap, 0);
	xfs_perag_put(pag);

	if (error)
	goto out_destroy;

	/*
	* For version 5 superblocks, if we are initialising a new inode and we
	* are not utilising the XFS_MOUNT_IKEEP inode cluster mode, we can
	* simply build the new inode core with a random generation number.
	*
	* For version 4 (and older) superblocks, log recovery is dependent on
	* the di_flushiter field being initialised from the current on-disk
	* value and hence we must also read the inode off disk even when
	* initializing new inodes.
	*/
	if (xfs_has_v3inodes(mp) &&
	(flags & XFS_IGET_CREATE) && !xfs_has_ikeep(mp)) {
	VFS_I(ip)->i_generation = get_random_u32();
	} else {
	struct xfs_buf *bp;

	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp);
	if (error)
	goto out_destroy;

	error = xfs_inode_from_disk(ip,
	xfs_buf_offset(bp, ip->i_imap.im_boffset));
	if (!error)
	xfs_buf_set_ref(bp, XFS_INO_REF);
	xfs_trans_brelse(tp, bp);

	if (error)
	goto out_destroy;
	}

	*ipp = ip;
	return 0;

	out_destroy:
	kmem_cache_free(xfs_inode_cache, ip);
	*ipp = NULL;
	return error;
	}

	/*
	* Get a metadata inode. The ftype must match exactly. Caller must supply
	* a transaction (even if empty) to avoid livelocking if the inobt has a cycle.
	*/
	int
	libxfs_imeta_iget(
	struct xfs_trans *tp,
	xfs_ino_t ino,
	unsigned char ftype,
	struct xfs_inode **ipp)
	{
	struct xfs_mount *mp = tp->t_mountp;
	struct xfs_inode *ip;
	int error;

	error = libxfs_iget(mp, tp, ino, XFS_IGET_UNTRUSTED, &ip);
	if (error)
	return error;

	if ((xfs_has_metadir(mp) && !xfs_is_metadir_inode(ip)) \|\|
	ftype == XFS_DIR3_FT_UNKNOWN \|\|
	xfs_mode_to_ftype(VFS_I(ip)->i_mode) != ftype) {
	libxfs_irele(ip);
	return -EFSCORRUPTED;
	}

	*ipp = ip;
	return 0;
	}

	static void
	libxfs_idestroy(
	struct xfs_inode *ip)
	{
	switch (VFS_I(ip)->i_mode & S_IFMT) {
	case S_IFREG:
	case S_IFDIR:
	case S_IFLNK:
	libxfs_idestroy_fork(&ip->i_df);
	break;
	}

	libxfs_ifork_zap_attr(ip);

	if (ip->i_cowfp) {
	libxfs_idestroy_fork(ip->i_cowfp);
	kmem_cache_free(xfs_ifork_cache, ip->i_cowfp);
	}
	}

	void
	libxfs_irele(
	struct xfs_inode *ip)
	{
	VFS_I(ip)->i_count--;

	if (VFS_I(ip)->i_count == 0) {
	ASSERT(ip->i_itemp == NULL);
	libxfs_idestroy(ip);
	kmem_cache_free(xfs_inode_cache, ip);
	}
	}

	void
	libxfs_imeta_irele(
	struct xfs_inode *ip)
	{
	ASSERT(!xfs_has_metadir(ip->i_mount) \|\| xfs_is_metadir_inode(ip));

	libxfs_irele(ip);
	}

	static inline void inode_fsgid_set(struct inode *inode,
	struct mnt_idmap *idmap)
	{
	inode->i_gid = make_kgid(0);
	}

	void inode_init_owner(struct mnt_idmap idmap, struct inode inode,
	const struct inode *dir, umode_t mode)
	{
	inode_fsuid_set(inode, idmap);
	if (dir && dir->i_mode & S_ISGID) {
	inode->i_gid = dir->i_gid;

	/* Directories are special, and always inherit S_ISGID */
	if (S_ISDIR(mode))
	mode \|= S_ISGID;
	} else
	inode_fsgid_set(inode, idmap);
	inode->i_mode = mode;
	}

	/*
	* This call is used to indicate that the buffer is going to
	* be staled and was an inode buffer. This means it gets
	* special processing during unpin - where any inodes
	* associated with the buffer should be removed from ail.
	* There is also special processing during recovery,
	* any replay of the inodes in the buffer needs to be
	* prevented as the buffer may have been reused.
	*/
	static void
	xfs_trans_stale_inode_buf(
	xfs_trans_t *tp,
	struct xfs_buf *bp)
	{
	ASSERT(bp->b_transp == tp);
	ASSERT(bip != NULL);
	ASSERT(atomic_read(&bip->bli_refcount) > 0);

	bp->b_flags \|= _XBF_INODES;
	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
	}

	/*
	* A big issue when freeing the inode cluster is that we _cannot_ skip any
	* inodes that are in memory - they all must be marked stale and attached to
	* the cluster buffer.
	*/
	int
	libxfs_ifree_cluster(
	struct xfs_trans *tp,
	struct xfs_perag *pag,
	struct xfs_inode *free_ip,
	struct xfs_icluster *xic)
	{
	struct xfs_mount *mp = free_ip->i_mount;
	struct xfs_ino_geometry *igeo = M_IGEO(mp);
	struct xfs_buf *bp;
	xfs_daddr_t blkno;
	xfs_ino_t inum = xic->first_ino;
	int nbufs;
	int j;
	int ioffset;
	int error;

	nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster;

	for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) {
	/*
	* The allocation bitmap tells us which inodes of the chunk were
	* physically allocated. Skip the cluster if an inode falls into
	* a sparse region.
	*/
	ioffset = inum - xic->first_ino;
	if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
	ASSERT(ioffset % igeo->inodes_per_cluster == 0);
	continue;
	}

	blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
	XFS_INO_TO_AGBNO(mp, inum));

	/*
	* We obtain and lock the backing buffer first in the process
	* here to ensure dirty inodes attached to the buffer remain in
	* the flushing state while we mark them stale.
	*
	* If we scan the in-memory inodes first, then buffer IO can
	* complete before we get a lock on it, and hence we may fail
	* to mark all the active inodes on the buffer stale.
	*/
	error = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
	mp->m_bsize * igeo->blocks_per_cluster,
	XBF_UNMAPPED, &bp);
	if (error)
	return error;

	/*
	* This buffer may not have been correctly initialised as we
	* didn't read it from disk. That's not important because we are
	* only using to mark the buffer as stale in the log, and to
	* attach stale cached inodes on it. That means it will never be
	* dispatched for IO. If it is, we want to know about it, and we
	* want it to fail. We can acheive this by adding a write
	* verifier to the buffer.
	*/
	bp->b_ops = &xfs_inode_buf_ops;

	xfs_trans_stale_inode_buf(tp, bp);
	xfs_trans_binval(tp, bp);
	}
	return 0;
	}