| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
| * All Rights Reserved. |
| */ |
| |
| #include "libxfs_priv.h" |
| #include "libxfs.h" |
| #include "libxfs_io.h" |
| #include "init.h" |
| #include "xfs_fs.h" |
| #include "xfs_shared.h" |
| #include "xfs_format.h" |
| #include "xfs_log_format.h" |
| #include "xfs_trans_resv.h" |
| #include "xfs_mount.h" |
| #include "xfs_defer.h" |
| #include "xfs_inode_buf.h" |
| #include "xfs_inode_fork.h" |
| #include "xfs_inode.h" |
| #include "xfs_trans.h" |
| #include "xfs_bmap.h" |
| #include "xfs_bmap_btree.h" |
| #include "xfs_trans_space.h" |
| #include "xfs_ialloc.h" |
| #include "xfs_alloc.h" |
| #include "xfs_bit.h" |
| #include "xfs_da_format.h" |
| #include "xfs_da_btree.h" |
| #include "xfs_dir2_priv.h" |
| |
| /* |
| * Calculate the worst case log unit reservation for a given superblock |
| * configuration. Copied and munged from the kernel code, and assumes a |
| * worse case header usage (maximum log buffer sizes) |
| */ |
| int |
| xfs_log_calc_unit_res( |
| struct xfs_mount *mp, |
| int unit_bytes) |
| { |
| int iclog_space; |
| int iclog_header_size; |
| int iclog_size; |
| uint num_headers; |
| |
| if (xfs_has_logv2(mp)) { |
| iclog_size = XLOG_MAX_RECORD_BSIZE; |
| iclog_header_size = BBTOB(iclog_size / XLOG_HEADER_CYCLE_SIZE); |
| } else { |
| iclog_size = XLOG_BIG_RECORD_BSIZE; |
| iclog_header_size = BBSIZE; |
| } |
| |
| /* |
| * Permanent reservations have up to 'cnt'-1 active log operations |
| * in the log. A unit in this case is the amount of space for one |
| * of these log operations. Normal reservations have a cnt of 1 |
| * and their unit amount is the total amount of space required. |
| * |
| * The following lines of code account for non-transaction data |
| * which occupy space in the on-disk log. |
| * |
| * Normal form of a transaction is: |
| * <oph><trans-hdr><start-oph><reg1-oph><reg1><reg2-oph>...<commit-oph> |
| * and then there are LR hdrs, split-recs and roundoff at end of syncs. |
| * |
| * We need to account for all the leadup data and trailer data |
| * around the transaction data. |
| * And then we need to account for the worst case in terms of using |
| * more space. |
| * The worst case will happen if: |
| * - the placement of the transaction happens to be such that the |
| * roundoff is at its maximum |
| * - the transaction data is synced before the commit record is synced |
| * i.e. <transaction-data><roundoff> | <commit-rec><roundoff> |
| * Therefore the commit record is in its own Log Record. |
| * This can happen as the commit record is called with its |
| * own region to xlog_write(). |
| * This then means that in the worst case, roundoff can happen for |
| * the commit-rec as well. |
| * The commit-rec is smaller than padding in this scenario and so it is |
| * not added separately. |
| */ |
| |
| /* for trans header */ |
| unit_bytes += sizeof(xlog_op_header_t); |
| unit_bytes += sizeof(xfs_trans_header_t); |
| |
| /* for start-rec */ |
| unit_bytes += sizeof(xlog_op_header_t); |
| |
| /* |
| * for LR headers - the space for data in an iclog is the size minus |
| * the space used for the headers. If we use the iclog size, then we |
| * undercalculate the number of headers required. |
| * |
| * Furthermore - the addition of op headers for split-recs might |
| * increase the space required enough to require more log and op |
| * headers, so take that into account too. |
| * |
| * IMPORTANT: This reservation makes the assumption that if this |
| * transaction is the first in an iclog and hence has the LR headers |
| * accounted to it, then the remaining space in the iclog is |
| * exclusively for this transaction. i.e. if the transaction is larger |
| * than the iclog, it will be the only thing in that iclog. |
| * Fundamentally, this means we must pass the entire log vector to |
| * xlog_write to guarantee this. |
| */ |
| iclog_space = iclog_size - iclog_header_size; |
| num_headers = howmany(unit_bytes, iclog_space); |
| |
| /* for split-recs - ophdrs added when data split over LRs */ |
| unit_bytes += sizeof(xlog_op_header_t) * num_headers; |
| |
| /* add extra header reservations if we overrun */ |
| while (!num_headers || |
| howmany(unit_bytes, iclog_space) > num_headers) { |
| unit_bytes += sizeof(xlog_op_header_t); |
| num_headers++; |
| } |
| unit_bytes += iclog_header_size * num_headers; |
| |
| /* for commit-rec LR header - note: padding will subsume the ophdr */ |
| unit_bytes += iclog_header_size; |
| |
| /* for roundoff padding for transaction data and one for commit record */ |
| if (xfs_has_logv2(mp) && mp->m_sb.sb_logsunit > 1) { |
| /* log su roundoff */ |
| unit_bytes += 2 * mp->m_sb.sb_logsunit; |
| } else { |
| /* BB roundoff */ |
| unit_bytes += 2 * BBSIZE; |
| } |
| |
| return unit_bytes; |
| } |
| |
| struct timespec64 |
| current_time(struct inode *inode) |
| { |
| struct timespec64 tv; |
| struct timeval stv; |
| |
| gettimeofday(&stv, (struct timezone *)0); |
| tv.tv_sec = stv.tv_sec; |
| tv.tv_nsec = stv.tv_usec * 1000; |
| |
| return tv; |
| } |
| |
| int |
| libxfs_mod_incore_sb( |
| struct xfs_mount *mp, |
| int field, |
| int64_t delta, |
| int rsvd) |
| { |
| long long lcounter; /* long counter for 64 bit fields */ |
| |
| switch (field) { |
| case XFS_TRANS_SB_FDBLOCKS: |
| lcounter = (long long)mp->m_sb.sb_fdblocks; |
| lcounter += delta; |
| if (lcounter < 0) |
| return -ENOSPC; |
| mp->m_sb.sb_fdblocks = lcounter; |
| return 0; |
| default: |
| ASSERT(0); |
| return -EINVAL; |
| } |
| } |
| |
| /* |
| * This routine allocates disk space for the given file. |
| * Originally derived from xfs_alloc_file_space(). |
| */ |
| int |
| libxfs_alloc_file_space( |
| struct xfs_inode *ip, |
| xfs_off_t offset, |
| xfs_off_t len, |
| int alloc_type) |
| { |
| struct xfs_bmbt_irec imaps[1]; |
| struct xfs_bmbt_irec *imapp; |
| struct xfs_mount *mp; |
| struct xfs_trans *tp; |
| xfs_off_t count; |
| xfs_filblks_t datablocks; |
| xfs_filblks_t allocated_fsb; |
| xfs_filblks_t allocatesize_fsb; |
| int reccount; |
| uint resblks; |
| xfs_fileoff_t startoffset_fsb; |
| int error; |
| |
| if (len <= 0) |
| return -EINVAL; |
| |
| count = len; |
| error = 0; |
| imapp = &imaps[0]; |
| reccount = 1; |
| mp = ip->i_mount; |
| startoffset_fsb = XFS_B_TO_FSBT(mp, offset); |
| allocatesize_fsb = XFS_B_TO_FSB(mp, count); |
| |
| /* allocate file space until done or until there is an error */ |
| while (allocatesize_fsb && !error) { |
| datablocks = allocatesize_fsb; |
| |
| resblks = (uint)XFS_DIOSTRAT_SPACE_RES(mp, datablocks); |
| error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, |
| 0, 0, &tp); |
| /* |
| * Check for running out of space |
| */ |
| if (error) { |
| ASSERT(error == -ENOSPC); |
| break; |
| } |
| xfs_trans_ijoin(tp, ip, 0); |
| |
| error = xfs_bmapi_write(tp, ip, startoffset_fsb, |
| allocatesize_fsb, alloc_type, resblks, |
| imapp, &reccount); |
| |
| if (error) |
| goto error0; |
| |
| /* |
| * Complete the transaction |
| */ |
| error = xfs_trans_commit(tp); |
| if (error) |
| break; |
| |
| allocated_fsb = imapp->br_blockcount; |
| if (reccount == 0) |
| return -ENOSPC; |
| |
| startoffset_fsb += allocated_fsb; |
| allocatesize_fsb -= allocated_fsb; |
| } |
| return error; |
| |
| error0: /* Cancel bmap, cancel trans */ |
| xfs_trans_cancel(tp); |
| return error; |
| } |
| |
| void |
| cmn_err(int level, char *fmt, ...) |
| { |
| va_list ap; |
| |
| va_start(ap, fmt); |
| vfprintf(stderr, fmt, ap); |
| fputs("\n", stderr); |
| va_end(ap); |
| } |
| |
| /* |
| * Warnings specifically for verifier errors. Differentiate CRC vs. invalid |
| * values, and omit the stack trace unless the error level is tuned high. |
| */ |
| void |
| xfs_verifier_error( |
| struct xfs_buf *bp, |
| int error, |
| xfs_failaddr_t failaddr) |
| { |
| xfs_buf_ioerror(bp, error); |
| |
| xfs_alert(NULL, "Metadata %s detected at %p, %s block 0x%llx/0x%x", |
| bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", |
| failaddr ? failaddr : __return_address, |
| bp->b_ops->name, xfs_buf_daddr(bp), BBTOB(bp->b_length)); |
| } |
| |
| /* |
| * Warnings for inode corruption problems. Don't bother with the stack |
| * trace unless the error level is turned up high. |
| */ |
| void |
| xfs_inode_verifier_error( |
| struct xfs_inode *ip, |
| int error, |
| const char *name, |
| void *buf, |
| size_t bufsz, |
| xfs_failaddr_t failaddr) |
| { |
| xfs_alert(NULL, "Metadata %s detected at %p, inode 0x%llx %s", |
| error == -EFSBADCRC ? "CRC error" : "corruption", |
| failaddr ? failaddr : __return_address, |
| ip->i_ino, name); |
| } |
| |
| /* |
| * Complain about the kinds of metadata corruption that we can't detect from a |
| * verifier, such as incorrect inter-block relationship data. Does not set |
| * bp->b_error. |
| */ |
| void |
| xfs_buf_corruption_error( |
| struct xfs_buf *bp, |
| xfs_failaddr_t fa) |
| { |
| xfs_alert(NULL, "Metadata corruption detected at %p, %s block 0x%llx", |
| fa, bp->b_ops->name, xfs_buf_daddr(bp)); |
| } |
| |
| /* |
| * This is called from I/O verifiers on v5 superblock filesystems. In the |
| * kernel, it validates the metadata LSN parameter against the current LSN of |
| * the active log. We don't have an active log in userspace so this kind of |
| * validation is not required. Therefore, this function always returns true in |
| * userspace. |
| * |
| * xfs_repair piggybacks off this mechanism to help track the largest metadata |
| * LSN in use on a filesystem. Keep a record of the largest LSN seen such that |
| * repair can validate it against the state of the log. |
| */ |
| xfs_lsn_t libxfs_max_lsn = 0; |
| static pthread_mutex_t libxfs_max_lsn_lock = PTHREAD_MUTEX_INITIALIZER; |
| |
| bool |
| xfs_log_check_lsn( |
| struct xfs_mount *mp, |
| xfs_lsn_t lsn) |
| { |
| int cycle = CYCLE_LSN(lsn); |
| int block = BLOCK_LSN(lsn); |
| int max_cycle; |
| int max_block; |
| |
| if (lsn == NULLCOMMITLSN) |
| return true; |
| |
| pthread_mutex_lock(&libxfs_max_lsn_lock); |
| |
| max_cycle = CYCLE_LSN(libxfs_max_lsn); |
| max_block = BLOCK_LSN(libxfs_max_lsn); |
| |
| if ((cycle > max_cycle) || |
| (cycle == max_cycle && block > max_block)) |
| libxfs_max_lsn = lsn; |
| |
| pthread_mutex_unlock(&libxfs_max_lsn_lock); |
| |
| return true; |
| } |
| |
| void |
| xfs_log_item_init( |
| struct xfs_mount *mp, |
| struct xfs_log_item *item, |
| int type) |
| { |
| item->li_mountp = mp; |
| item->li_type = type; |
| |
| INIT_LIST_HEAD(&item->li_trans); |
| INIT_LIST_HEAD(&item->li_bio_list); |
| } |
| |
| static struct xfs_buftarg * |
| xfs_find_bdev_for_inode( |
| struct xfs_inode *ip) |
| { |
| struct xfs_mount *mp = ip->i_mount; |
| |
| if (XFS_IS_REALTIME_INODE(ip)) |
| return mp->m_rtdev_targp; |
| return mp->m_ddev_targp; |
| } |
| |
| static xfs_daddr_t |
| xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) |
| { |
| if (XFS_IS_REALTIME_INODE(ip)) |
| return XFS_FSB_TO_BB(ip->i_mount, fsb); |
| return XFS_FSB_TO_DADDR(ip->i_mount, (fsb)); |
| } |
| |
| int |
| libxfs_zero_extent( |
| struct xfs_inode *ip, |
| xfs_fsblock_t start_fsb, |
| xfs_off_t count_fsb) |
| { |
| xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); |
| ssize_t size = XFS_FSB_TO_BB(ip->i_mount, count_fsb); |
| |
| return libxfs_device_zero(xfs_find_bdev_for_inode(ip), sector, size); |
| } |
| |
| unsigned int |
| hweight8(unsigned int w) |
| { |
| unsigned int res = w - ((w >> 1) & 0x55); |
| res = (res & 0x33) + ((res >> 2) & 0x33); |
| return (res + (res >> 4)) & 0x0F; |
| } |
| |
| unsigned int |
| hweight32(unsigned int w) |
| { |
| unsigned int res = w - ((w >> 1) & 0x55555555); |
| res = (res & 0x33333333) + ((res >> 2) & 0x33333333); |
| res = (res + (res >> 4)) & 0x0F0F0F0F; |
| res = res + (res >> 8); |
| return (res + (res >> 16)) & 0x000000FF; |
| } |
| |
| unsigned int |
| hweight64(__u64 w) |
| { |
| return hweight32((unsigned int)w) + |
| hweight32((unsigned int)(w >> 32)); |
| } |
| |
| /* xfs_health.c */ |
| |
| /* Mark a per-fs metadata healed. */ |
| void |
| xfs_fs_mark_healthy( |
| struct xfs_mount *mp, |
| unsigned int mask) |
| { |
| ASSERT(!(mask & ~XFS_SICK_FS_PRIMARY)); |
| trace_xfs_fs_mark_healthy(mp, mask); |
| |
| spin_lock(&mp->m_sb_lock); |
| mp->m_fs_sick &= ~mask; |
| mp->m_fs_checked |= mask; |
| spin_unlock(&mp->m_sb_lock); |
| } |
| |
| void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo) { } |
| void xfs_agno_mark_sick(struct xfs_mount *mp, xfs_agnumber_t agno, |
| unsigned int mask) { } |
| void xfs_ag_mark_sick(struct xfs_perag *pag, unsigned int mask) { } |
| void xfs_bmap_mark_sick(struct xfs_inode *ip, int whichfork) { } |
| void xfs_btree_mark_sick(struct xfs_btree_cur *cur) { } |
| void xfs_dirattr_mark_sick(struct xfs_inode *ip, int whichfork) { } |
| void xfs_da_mark_sick(struct xfs_da_args *args) { } |
| void xfs_inode_mark_sick(struct xfs_inode *ip, unsigned int mask) { } |
| void xfs_rt_mark_sick(struct xfs_mount *mp, unsigned int mask) { } |
| |
| /* Create a metadata for the last component of the path. */ |
| STATIC int |
| libxfs_imeta_mkdir( |
| struct xfs_mount *mp, |
| const struct xfs_imeta_path *path) |
| { |
| struct xfs_imeta_end ic; |
| struct xfs_inode *ip = NULL; |
| struct xfs_trans *tp = NULL; |
| uint resblks; |
| int error; |
| |
| /* Allocate a transaction to create the last directory. */ |
| resblks = libxfs_imeta_create_space_res(mp); |
| error = libxfs_trans_alloc(mp, &M_RES(mp)->tr_imeta_create, resblks, |
| 0, 0, &tp); |
| if (error) |
| return error; |
| |
| /* Create the subdirectory. */ |
| error = libxfs_imeta_create(&tp, path, S_IFDIR, 0, &ip, &ic); |
| if (error) { |
| libxfs_trans_cancel(tp); |
| libxfs_imeta_end_update(mp, &ic, error); |
| goto out_irele; |
| } |
| |
| error = libxfs_trans_commit(tp); |
| libxfs_imeta_end_update(mp, &ic, error); |
| |
| out_irele: |
| if (ip) |
| libxfs_irele(ip); |
| return error; |
| } |
| |
| /* |
| * Make sure that every metadata directory path component exists and is a |
| * directory. |
| */ |
| int |
| libxfs_imeta_ensure_dirpath( |
| struct xfs_mount *mp, |
| const struct xfs_imeta_path *path) |
| { |
| struct xfs_imeta_path temp_path = { |
| .im_path = path->im_path, |
| .im_depth = 1, |
| .im_ftype = XFS_DIR3_FT_DIR, |
| }; |
| unsigned int i; |
| int error = 0; |
| |
| if (!xfs_has_metadir(mp)) |
| return 0; |
| |
| for (i = 0; i < path->im_depth - 1; i++, temp_path.im_depth++) { |
| error = libxfs_imeta_mkdir(mp, &temp_path); |
| if (error && error != -EEXIST) |
| break; |
| } |
| |
| return error == -EEXIST ? 0 : error; |
| } |
| |
| /* |
| * Write a buffer to a file on the data device. We assume there are no holes |
| * and no unwritten extents. |
| */ |
| int |
| libxfs_file_write( |
| struct xfs_trans *tp, |
| struct xfs_inode *ip, |
| void *buf, |
| size_t len, |
| bool logit) |
| { |
| struct xfs_bmbt_irec map; |
| struct xfs_mount *mp = tp->t_mountp; |
| struct xfs_buf *bp; |
| xfs_daddr_t d; |
| xfs_fileoff_t bno = 0; |
| xfs_fileoff_t end_bno = XFS_B_TO_FSB(mp, len); |
| size_t count; |
| size_t bcount; |
| int nmap; |
| int error = 0; |
| |
| /* Write one block at a time. */ |
| while (bno < end_bno) { |
| nmap = 1; |
| error = libxfs_bmapi_read(ip, bno, end_bno, &map, &nmap, 0); |
| if (error) |
| return error; |
| if (nmap != 1) |
| return -ENOSPC; |
| |
| if (map.br_startblock == HOLESTARTBLOCK || |
| map.br_state == XFS_EXT_UNWRITTEN) |
| return -EINVAL; |
| |
| d = XFS_FSB_TO_DADDR(mp, map.br_startblock); |
| error = libxfs_trans_get_buf(logit ? tp : NULL, mp->m_dev, d, |
| map.br_blockcount << mp->m_blkbb_log, 0, &bp); |
| if (error) |
| break; |
| count = min(len, XFS_FSB_TO_B(mp, map.br_blockcount)); |
| memmove(bp->b_addr, buf, count); |
| bcount = BBTOB(bp->b_length); |
| if (count < bcount) |
| memset((char *)bp->b_addr + count, 0, bcount - count); |
| if (logit) |
| libxfs_trans_log_buf(tp, bp, 0, bcount - 1); |
| else { |
| libxfs_buf_mark_dirty(bp); |
| libxfs_buf_relse(bp); |
| } |
| if (error) |
| break; |
| |
| buf += count; |
| len -= count; |
| bno += map.br_blockcount; |
| } |
| |
| return error; |
| } |