| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
| * All Rights Reserved. |
| */ |
| |
| #include "libxfs_priv.h" |
| #include "libxfs.h" |
| #include "libxfs_io.h" |
| #include "init.h" |
| #include "xfs_fs.h" |
| #include "xfs_shared.h" |
| #include "xfs_format.h" |
| #include "xfs_log_format.h" |
| #include "xfs_trans_resv.h" |
| #include "xfs_mount.h" |
| #include "xfs_defer.h" |
| #include "xfs_inode_buf.h" |
| #include "xfs_inode_fork.h" |
| #include "xfs_inode.h" |
| #include "xfs_trans.h" |
| #include "xfs_bmap.h" |
| #include "xfs_bmap_btree.h" |
| #include "xfs_trans_space.h" |
| #include "xfs_ialloc.h" |
| #include "xfs_alloc.h" |
| #include "xfs_bit.h" |
| #include "xfs_da_format.h" |
| #include "xfs_da_btree.h" |
| #include "xfs_dir2_priv.h" |
| |
| /* |
| * Initialise a newly allocated inode and return the in-core inode to the |
| * caller locked exclusively. |
| */ |
| int |
| libxfs_icreate( |
| struct xfs_trans *tp, |
| xfs_ino_t ino, |
| const struct xfs_icreate_args *args, |
| struct xfs_inode **ipp) |
| { |
| struct xfs_mount *mp = tp->t_mountp; |
| struct xfs_inode *ip = NULL; |
| int error; |
| |
| error = libxfs_iget(mp, tp, ino, XFS_IGET_CREATE, &ip); |
| if (error) |
| return error; |
| |
| ASSERT(ip != NULL); |
| xfs_trans_ijoin(tp, ip, 0); |
| xfs_inode_init(tp, args, ip); |
| |
| *ipp = ip; |
| return 0; |
| } |
| |
| /* Set up inode attributes for newly created internal files. */ |
| void |
| libxfs_icreate_args_rootfile( |
| struct xfs_icreate_args *args, |
| struct xfs_mount *mp, |
| umode_t mode, |
| bool init_xattrs) |
| { |
| args->idmap = NULL; |
| args->uid = make_kuid(0); |
| args->gid = make_kgid(0); |
| args->prid = 0; |
| args->mode = mode; |
| args->flags = XFS_ICREATE_ARGS_FORCE_UID | |
| XFS_ICREATE_ARGS_FORCE_GID | |
| XFS_ICREATE_ARGS_FORCE_MODE; |
| if (init_xattrs) |
| args->flags |= XFS_ICREATE_ARGS_INIT_XATTRS; |
| } |
| |
| /* |
| * Writes a modified inode's changes out to the inode's on disk home. |
| * Originally based on xfs_iflush_int() from xfs_inode.c in the kernel. |
| */ |
| int |
| libxfs_iflush_int( |
| struct xfs_inode *ip, |
| struct xfs_buf *bp) |
| { |
| struct xfs_inode_log_item *iip; |
| struct xfs_dinode *dip; |
| struct xfs_mount *mp; |
| |
| ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE || |
| ip->i_df.if_nextents > ip->i_df.if_ext_max); |
| |
| iip = ip->i_itemp; |
| mp = ip->i_mount; |
| |
| /* set *dip = inode's place in the buffer */ |
| dip = xfs_buf_offset(bp, ip->i_imap.im_boffset); |
| |
| if (XFS_ISREG(ip)) { |
| ASSERT( (ip->i_df.if_format == XFS_DINODE_FMT_EXTENTS) || |
| (ip->i_df.if_format == XFS_DINODE_FMT_BTREE) ); |
| } else if (XFS_ISDIR(ip)) { |
| ASSERT( (ip->i_df.if_format == XFS_DINODE_FMT_EXTENTS) || |
| (ip->i_df.if_format == XFS_DINODE_FMT_BTREE) || |
| (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) ); |
| } |
| ASSERT(ip->i_df.if_nextents+ip.i_af->if_nextents <= ip->i_nblocks); |
| ASSERT(ip->i_forkoff <= mp->m_sb.sb_inodesize); |
| |
| /* bump the change count on v3 inodes */ |
| if (xfs_has_v3inodes(mp)) |
| VFS_I(ip)->i_version++; |
| |
| /* |
| * If there are inline format data / attr forks attached to this inode, |
| * make sure they are not corrupt. |
| */ |
| if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL && |
| xfs_ifork_verify_local_data(ip)) |
| return -EFSCORRUPTED; |
| if (xfs_inode_has_attr_fork(ip) && |
| ip->i_af.if_format == XFS_DINODE_FMT_LOCAL && |
| xfs_ifork_verify_local_attr(ip)) |
| return -EFSCORRUPTED; |
| |
| /* |
| * Copy the dirty parts of the inode into the on-disk |
| * inode. We always copy out the core of the inode, |
| * because if the inode is dirty at all the core must |
| * be. |
| */ |
| xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn); |
| |
| xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK); |
| if (xfs_inode_has_attr_fork(ip)) |
| xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK); |
| |
| /* generate the checksum. */ |
| xfs_dinode_calc_crc(mp, dip); |
| |
| return 0; |
| } |
| |
| /* |
| * Inode cache stubs. |
| */ |
| |
| struct kmem_cache *xfs_inode_cache; |
| extern struct kmem_cache *xfs_ili_cache; |
| |
| int |
| libxfs_iget( |
| struct xfs_mount *mp, |
| struct xfs_trans *tp, |
| xfs_ino_t ino, |
| uint flags, |
| struct xfs_inode **ipp) |
| { |
| struct xfs_inode *ip; |
| struct xfs_perag *pag; |
| int error = 0; |
| |
| /* reject inode numbers outside existing AGs */ |
| if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) |
| return -EINVAL; |
| |
| ip = kmem_cache_zalloc(xfs_inode_cache, 0); |
| if (!ip) |
| return -ENOMEM; |
| |
| VFS_I(ip)->i_count = 1; |
| ip->i_ino = ino; |
| ip->i_mount = mp; |
| ip->i_diflags2 = mp->m_ino_geo.new_diflags2; |
| ip->i_af.if_format = XFS_DINODE_FMT_EXTENTS; |
| ip->i_next_unlinked = NULLAGINO; |
| ip->i_prev_unlinked = NULLAGINO; |
| spin_lock_init(&VFS_I(ip)->i_lock); |
| |
| pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); |
| error = xfs_imap(pag, tp, ip->i_ino, &ip->i_imap, 0); |
| xfs_perag_put(pag); |
| |
| if (error) |
| goto out_destroy; |
| |
| /* |
| * For version 5 superblocks, if we are initialising a new inode and we |
| * are not utilising the XFS_MOUNT_IKEEP inode cluster mode, we can |
| * simply build the new inode core with a random generation number. |
| * |
| * For version 4 (and older) superblocks, log recovery is dependent on |
| * the di_flushiter field being initialised from the current on-disk |
| * value and hence we must also read the inode off disk even when |
| * initializing new inodes. |
| */ |
| if (xfs_has_v3inodes(mp) && |
| (flags & XFS_IGET_CREATE) && !xfs_has_ikeep(mp)) { |
| VFS_I(ip)->i_generation = get_random_u32(); |
| } else { |
| struct xfs_buf *bp; |
| |
| error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp); |
| if (error) |
| goto out_destroy; |
| |
| error = xfs_inode_from_disk(ip, |
| xfs_buf_offset(bp, ip->i_imap.im_boffset)); |
| if (!error) |
| xfs_buf_set_ref(bp, XFS_INO_REF); |
| xfs_trans_brelse(tp, bp); |
| |
| if (error) |
| goto out_destroy; |
| } |
| |
| *ipp = ip; |
| return 0; |
| |
| out_destroy: |
| kmem_cache_free(xfs_inode_cache, ip); |
| *ipp = NULL; |
| return error; |
| } |
| |
| /* |
| * Get a metadata inode. The ftype must match exactly. Caller must supply |
| * a transaction (even if empty) to avoid livelocking if the inobt has a cycle. |
| */ |
| int |
| libxfs_imeta_iget( |
| struct xfs_trans *tp, |
| xfs_ino_t ino, |
| unsigned char ftype, |
| struct xfs_inode **ipp) |
| { |
| struct xfs_mount *mp = tp->t_mountp; |
| struct xfs_inode *ip; |
| int error; |
| |
| error = libxfs_iget(mp, tp, ino, XFS_IGET_UNTRUSTED, &ip); |
| if (error) |
| return error; |
| |
| if ((xfs_has_metadir(mp) && !xfs_is_metadir_inode(ip)) || |
| ftype == XFS_DIR3_FT_UNKNOWN || |
| xfs_mode_to_ftype(VFS_I(ip)->i_mode) != ftype) { |
| libxfs_irele(ip); |
| return -EFSCORRUPTED; |
| } |
| |
| *ipp = ip; |
| return 0; |
| } |
| |
| static void |
| libxfs_idestroy( |
| struct xfs_inode *ip) |
| { |
| switch (VFS_I(ip)->i_mode & S_IFMT) { |
| case S_IFREG: |
| case S_IFDIR: |
| case S_IFLNK: |
| libxfs_idestroy_fork(&ip->i_df); |
| break; |
| } |
| |
| libxfs_ifork_zap_attr(ip); |
| |
| if (ip->i_cowfp) { |
| libxfs_idestroy_fork(ip->i_cowfp); |
| kmem_cache_free(xfs_ifork_cache, ip->i_cowfp); |
| } |
| } |
| |
| void |
| libxfs_irele( |
| struct xfs_inode *ip) |
| { |
| VFS_I(ip)->i_count--; |
| |
| if (VFS_I(ip)->i_count == 0) { |
| ASSERT(ip->i_itemp == NULL); |
| libxfs_idestroy(ip); |
| kmem_cache_free(xfs_inode_cache, ip); |
| } |
| } |
| |
| void |
| libxfs_imeta_irele( |
| struct xfs_inode *ip) |
| { |
| ASSERT(!xfs_has_metadir(ip->i_mount) || xfs_is_metadir_inode(ip)); |
| |
| libxfs_irele(ip); |
| } |
| |
| static inline void inode_fsgid_set(struct inode *inode, |
| struct mnt_idmap *idmap) |
| { |
| inode->i_gid = make_kgid(0); |
| } |
| |
| void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode, |
| const struct inode *dir, umode_t mode) |
| { |
| inode_fsuid_set(inode, idmap); |
| if (dir && dir->i_mode & S_ISGID) { |
| inode->i_gid = dir->i_gid; |
| |
| /* Directories are special, and always inherit S_ISGID */ |
| if (S_ISDIR(mode)) |
| mode |= S_ISGID; |
| } else |
| inode_fsgid_set(inode, idmap); |
| inode->i_mode = mode; |
| } |
| |
| /* |
| * This call is used to indicate that the buffer is going to |
| * be staled and was an inode buffer. This means it gets |
| * special processing during unpin - where any inodes |
| * associated with the buffer should be removed from ail. |
| * There is also special processing during recovery, |
| * any replay of the inodes in the buffer needs to be |
| * prevented as the buffer may have been reused. |
| */ |
| static void |
| xfs_trans_stale_inode_buf( |
| xfs_trans_t *tp, |
| struct xfs_buf *bp) |
| { |
| ASSERT(bp->b_transp == tp); |
| ASSERT(bip != NULL); |
| ASSERT(atomic_read(&bip->bli_refcount) > 0); |
| |
| bp->b_flags |= _XBF_INODES; |
| xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF); |
| } |
| |
| /* |
| * A big issue when freeing the inode cluster is that we _cannot_ skip any |
| * inodes that are in memory - they all must be marked stale and attached to |
| * the cluster buffer. |
| */ |
| int |
| libxfs_ifree_cluster( |
| struct xfs_trans *tp, |
| struct xfs_perag *pag, |
| struct xfs_inode *free_ip, |
| struct xfs_icluster *xic) |
| { |
| struct xfs_mount *mp = free_ip->i_mount; |
| struct xfs_ino_geometry *igeo = M_IGEO(mp); |
| struct xfs_buf *bp; |
| xfs_daddr_t blkno; |
| xfs_ino_t inum = xic->first_ino; |
| int nbufs; |
| int j; |
| int ioffset; |
| int error; |
| |
| nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster; |
| |
| for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) { |
| /* |
| * The allocation bitmap tells us which inodes of the chunk were |
| * physically allocated. Skip the cluster if an inode falls into |
| * a sparse region. |
| */ |
| ioffset = inum - xic->first_ino; |
| if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) { |
| ASSERT(ioffset % igeo->inodes_per_cluster == 0); |
| continue; |
| } |
| |
| blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), |
| XFS_INO_TO_AGBNO(mp, inum)); |
| |
| /* |
| * We obtain and lock the backing buffer first in the process |
| * here to ensure dirty inodes attached to the buffer remain in |
| * the flushing state while we mark them stale. |
| * |
| * If we scan the in-memory inodes first, then buffer IO can |
| * complete before we get a lock on it, and hence we may fail |
| * to mark all the active inodes on the buffer stale. |
| */ |
| error = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, |
| mp->m_bsize * igeo->blocks_per_cluster, |
| XBF_UNMAPPED, &bp); |
| if (error) |
| return error; |
| |
| /* |
| * This buffer may not have been correctly initialised as we |
| * didn't read it from disk. That's not important because we are |
| * only using to mark the buffer as stale in the log, and to |
| * attach stale cached inodes on it. That means it will never be |
| * dispatched for IO. If it is, we want to know about it, and we |
| * want it to fail. We can acheive this by adding a write |
| * verifier to the buffer. |
| */ |
| bp->b_ops = &xfs_inode_buf_ops; |
| |
| xfs_trans_stale_inode_buf(tp, bp); |
| xfs_trans_binval(tp, bp); |
| } |
| return 0; |
| } |