| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * Copyright (c) 2000-2006 Silicon Graphics, Inc. |
| * All Rights Reserved. |
| */ |
| |
| |
| #include "libxfs_priv.h" |
| #include "init.h" |
| #include "xfs_fs.h" |
| #include "xfs_shared.h" |
| #include "xfs_format.h" |
| #include "xfs_log_format.h" |
| #include "xfs_trans_resv.h" |
| #include "xfs_mount.h" |
| #include "xfs_inode_buf.h" |
| #include "xfs_inode_fork.h" |
| #include "xfs_inode.h" |
| #include "xfs_trans.h" |
| #include "libfrog/platform.h" |
| #include "libxfs/xfile.h" |
| #include "libxfs/buf_mem.h" |
| #include "libxfs.h" |
| |
| static void libxfs_brelse(struct cache_node *node); |
| |
| /* |
| * Important design/architecture note: |
| * |
| * The userspace code that uses the buffer cache is much less constrained than |
| * the kernel code. The userspace code is pretty nasty in places, especially |
| * when it comes to buffer error handling. Very little of the userspace code |
| * outside libxfs clears bp->b_error - very little code even checks it - so the |
| * libxfs code is tripping on stale errors left by the userspace code. |
| * |
| * We can't clear errors or zero buffer contents in libxfs_buf_get-* like we do |
| * in the kernel, because those functions are used by the libxfs_readbuf_* |
| * functions and hence need to leave the buffers unchanged on cache hits. This |
| * is actually the only way to gather a write error from a libxfs_writebuf() |
| * call - you need to get the buffer again so you can check bp->b_error field - |
| * assuming that the buffer is still in the cache when you check, that is. |
| * |
| * This is very different to the kernel code which does not release buffers on a |
| * write so we can wait on IO and check errors. The kernel buffer cache also |
| * guarantees a buffer of a known initial state from xfs_buf_get() even on a |
| * cache hit. |
| * |
| * IOWs, userspace is behaving quite differently to the kernel and as a result |
| * it leaks errors from reads, invalidations and writes through |
| * libxfs_buf_get/libxfs_buf_read. |
| * |
| * The result of this is that until the userspace code outside libxfs is cleaned |
| * up, functions that release buffers from userspace control (i.e |
| * libxfs_writebuf/libxfs_buf_relse) need to zero bp->b_error to prevent |
| * propagation of stale errors into future buffer operations. |
| */ |
| |
| #define BDSTRAT_SIZE (256 * 1024) |
| |
| #define IO_BCOMPARE_CHECK |
| |
| /* XXX: (dgc) Propagate errors, only exit if fail-on-error flag set */ |
| int |
| libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len) |
| { |
| int fd = btp->bt_bdev_fd; |
| xfs_off_t start_offset, end_offset, offset; |
| ssize_t zsize, bytes; |
| size_t len_bytes; |
| char *z; |
| int error; |
| |
| if (xfs_buftarg_is_mem(btp)) |
| return -EOPNOTSUPP; |
| |
| start_offset = LIBXFS_BBTOOFF64(start); |
| |
| /* try to use special zeroing methods, fall back to writes if needed */ |
| len_bytes = LIBXFS_BBTOOFF64(len); |
| error = fallocate(fd, FALLOC_FL_ZERO_RANGE, start_offset, len_bytes); |
| if (!error) { |
| xfs_buftarg_trip_write(btp); |
| return 0; |
| } |
| |
| zsize = min(BDSTRAT_SIZE, BBTOB(len)); |
| if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) { |
| fprintf(stderr, |
| _("%s: %s can't memalign %d bytes: %s\n"), |
| progname, __FUNCTION__, (int)zsize, strerror(errno)); |
| exit(1); |
| } |
| memset(z, 0, zsize); |
| |
| if ((lseek(fd, start_offset, SEEK_SET)) < 0) { |
| fprintf(stderr, _("%s: %s seek to offset %llu failed: %s\n"), |
| progname, __FUNCTION__, |
| (unsigned long long)start_offset, strerror(errno)); |
| exit(1); |
| } |
| |
| end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset; |
| for (offset = 0; offset < end_offset; ) { |
| bytes = min((ssize_t)(end_offset - offset), zsize); |
| if ((bytes = write(fd, z, bytes)) < 0) { |
| fprintf(stderr, _("%s: %s write failed: %s\n"), |
| progname, __FUNCTION__, strerror(errno)); |
| exit(1); |
| } else if (bytes == 0) { |
| fprintf(stderr, _("%s: %s not progressing?\n"), |
| progname, __FUNCTION__); |
| exit(1); |
| } |
| xfs_buftarg_trip_write(btp); |
| offset += bytes; |
| } |
| free(z); |
| return 0; |
| } |
| |
| static void unmount_record(void *p) |
| { |
| xlog_op_header_t *op = (xlog_op_header_t *)p; |
| /* the data section must be 32 bit size aligned */ |
| struct { |
| uint16_t magic; |
| uint16_t pad1; |
| uint32_t pad2; /* may as well make it 64 bits */ |
| } magic = { XLOG_UNMOUNT_TYPE, 0, 0 }; |
| |
| memset(p, 0, BBSIZE); |
| /* dummy tid to mark this as written from userspace */ |
| op->oh_tid = cpu_to_be32(0xb0c0d0d0); |
| op->oh_len = cpu_to_be32(sizeof(magic)); |
| op->oh_clientid = XFS_LOG; |
| op->oh_flags = XLOG_UNMOUNT_TRANS; |
| op->oh_res2 = 0; |
| |
| /* and the data for this op */ |
| memcpy((char *)p + sizeof(xlog_op_header_t), &magic, sizeof(magic)); |
| } |
| |
| static char *next( |
| char *ptr, |
| int offset, |
| void *private) |
| { |
| struct xfs_buf *buf = (struct xfs_buf *)private; |
| |
| if (buf && |
| (BBTOB(buf->b_length) < (int)(ptr - (char *)buf->b_addr) + offset)) |
| abort(); |
| |
| return ptr + offset; |
| } |
| |
| struct xfs_buf * |
| libxfs_getsb( |
| struct xfs_mount *mp) |
| { |
| struct xfs_buf *bp; |
| |
| libxfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, XFS_FSS_TO_BB(mp, 1), |
| 0, &bp, &xfs_sb_buf_ops); |
| return bp; |
| } |
| |
| struct kmem_cache *xfs_buf_cache; |
| |
| static struct cache_mru xfs_buf_freelist = |
| {{&xfs_buf_freelist.cm_list, &xfs_buf_freelist.cm_list}, |
| 0, PTHREAD_MUTEX_INITIALIZER }; |
| |
| /* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ |
| #define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL |
| #define CACHE_LINE_SIZE 64 |
| unsigned int |
| libxfs_bhash(cache_key_t key, unsigned int hashsize, unsigned int hashshift) |
| { |
| uint64_t hashval = ((struct xfs_bufkey *)key)->blkno; |
| uint64_t tmp; |
| |
| tmp = hashval ^ (GOLDEN_RATIO_PRIME + hashval) / CACHE_LINE_SIZE; |
| tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> hashshift); |
| return tmp % hashsize; |
| } |
| |
| int |
| libxfs_bcompare( |
| struct cache_node *node, |
| cache_key_t key) |
| { |
| struct xfs_buf *bp = container_of(node, struct xfs_buf, |
| b_node); |
| struct xfs_bufkey *bkey = (struct xfs_bufkey *)key; |
| struct cache *bcache = bkey->buftarg->bcache; |
| |
| if (bp->b_cache_key == bkey->blkno) { |
| if (bp->b_length == bkey->bblen) |
| return CACHE_HIT; |
| #ifdef IO_BCOMPARE_CHECK |
| if (!(bcache->c_flags & CACHE_MISCOMPARE_PURGE)) { |
| fprintf(stderr, |
| "%lx: Badness in key lookup (length)\n" |
| "bp=(bno 0x%llx, len %u bytes) key=(bno 0x%llx, len %u bytes)\n", |
| pthread_self(), |
| (unsigned long long)xfs_buf_daddr(bp), |
| BBTOB(bp->b_length), |
| (unsigned long long)bkey->blkno, |
| BBTOB(bkey->bblen)); |
| } |
| #endif |
| return CACHE_PURGE; |
| } |
| return CACHE_MISS; |
| } |
| |
| static void |
| __initbuf(struct xfs_buf *bp, struct xfs_buftarg *btp, xfs_daddr_t bno, |
| unsigned int bytes) |
| { |
| ASSERT(!xfs_buftarg_is_mem(btp)); |
| |
| bp->b_flags = 0; |
| bp->b_cache_key = bno; |
| bp->b_length = BTOBB(bytes); |
| bp->b_target = btp; |
| bp->b_mount = btp->bt_mount; |
| bp->b_error = 0; |
| if (!bp->b_addr) |
| bp->b_addr = memalign(libxfs_device_alignment(), bytes); |
| if (!bp->b_addr) { |
| fprintf(stderr, |
| _("%s: %s can't memalign %u bytes: %s\n"), |
| progname, __FUNCTION__, bytes, |
| strerror(errno)); |
| exit(1); |
| } |
| memset(bp->b_addr, 0, bytes); |
| pthread_mutex_init(&bp->b_lock, NULL); |
| bp->b_holder = 0; |
| bp->b_recur = 0; |
| bp->b_ops = NULL; |
| INIT_LIST_HEAD(&bp->b_li_list); |
| |
| if (!bp->b_maps) |
| bp->b_maps = &bp->__b_map; |
| |
| if (bp->b_maps == &bp->__b_map) { |
| bp->b_nmaps = 1; |
| bp->b_maps[0].bm_bn = bno; |
| bp->b_maps[0].bm_len = bp->b_length; |
| } |
| } |
| |
| static void |
| libxfs_initbuf(struct xfs_buf *bp, struct xfs_buftarg *btp, xfs_daddr_t bno, |
| unsigned int bytes) |
| { |
| __initbuf(bp, btp, bno, bytes); |
| } |
| |
| static void |
| libxfs_initbuf_map(struct xfs_buf *bp, struct xfs_buftarg *btp, |
| struct xfs_buf_map *map, int nmaps) |
| { |
| unsigned int bytes = 0; |
| int i; |
| |
| bytes = sizeof(struct xfs_buf_map) * nmaps; |
| bp->b_maps = malloc(bytes); |
| if (!bp->b_maps) { |
| fprintf(stderr, |
| _("%s: %s can't malloc %u bytes: %s\n"), |
| progname, __FUNCTION__, bytes, |
| strerror(errno)); |
| exit(1); |
| } |
| bp->b_nmaps = nmaps; |
| |
| bytes = 0; |
| for ( i = 0; i < nmaps; i++) { |
| bp->b_maps[i].bm_bn = map[i].bm_bn; |
| bp->b_maps[i].bm_len = map[i].bm_len; |
| bytes += BBTOB(map[i].bm_len); |
| } |
| |
| __initbuf(bp, btp, map[0].bm_bn, bytes); |
| bp->b_flags |= LIBXFS_B_DISCONTIG; |
| } |
| |
| static struct xfs_buf * |
| __libxfs_getbufr(int blen) |
| { |
| struct xfs_buf *bp; |
| |
| /* |
| * first look for a buffer that can be used as-is, |
| * if one cannot be found, see if there is a buffer, |
| * and if so, free its buffer and set b_addr to NULL |
| * before calling libxfs_initbuf. |
| */ |
| pthread_mutex_lock(&xfs_buf_freelist.cm_mutex); |
| if (!list_empty(&xfs_buf_freelist.cm_list)) { |
| list_for_each_entry(bp, &xfs_buf_freelist.cm_list, b_node.cn_mru) { |
| if (bp->b_length == BTOBB(blen)) { |
| list_del_init(&bp->b_node.cn_mru); |
| break; |
| } |
| } |
| if (&bp->b_node.cn_mru == &xfs_buf_freelist.cm_list) { |
| bp = list_entry(xfs_buf_freelist.cm_list.next, |
| struct xfs_buf, b_node.cn_mru); |
| list_del_init(&bp->b_node.cn_mru); |
| free(bp->b_addr); |
| bp->b_addr = NULL; |
| if (bp->b_maps != &bp->__b_map) |
| free(bp->b_maps); |
| bp->b_maps = NULL; |
| } |
| } else |
| bp = kmem_cache_zalloc(xfs_buf_cache, 0); |
| pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex); |
| bp->b_ops = NULL; |
| if (bp->b_flags & LIBXFS_B_DIRTY) |
| fprintf(stderr, "found dirty buffer (bulk) on free list!\n"); |
| |
| return bp; |
| } |
| |
| static struct xfs_buf * |
| libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen) |
| { |
| struct xfs_buf *bp; |
| int blen = BBTOB(bblen); |
| |
| bp =__libxfs_getbufr(blen); |
| if (bp) |
| libxfs_initbuf(bp, btp, blkno, blen); |
| return bp; |
| } |
| |
| static struct xfs_buf * |
| libxfs_getbufr_map(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen, |
| struct xfs_buf_map *map, int nmaps) |
| { |
| struct xfs_buf *bp; |
| int blen = BBTOB(bblen); |
| |
| if (!map || !nmaps) { |
| fprintf(stderr, |
| _("%s: %s invalid map %p or nmaps %d\n"), |
| progname, __FUNCTION__, map, nmaps); |
| exit(1); |
| } |
| |
| if (blkno != map[0].bm_bn) { |
| fprintf(stderr, |
| _("%s: %s map blkno 0x%llx doesn't match key 0x%llx\n"), |
| progname, __FUNCTION__, (long long)map[0].bm_bn, |
| (long long)blkno); |
| exit(1); |
| } |
| |
| bp =__libxfs_getbufr(blen); |
| if (bp) |
| libxfs_initbuf_map(bp, btp, map, nmaps); |
| return bp; |
| } |
| |
| void |
| xfs_buf_lock( |
| struct xfs_buf *bp) |
| { |
| if (use_xfs_buf_lock) |
| pthread_mutex_lock(&bp->b_lock); |
| } |
| |
| void |
| xfs_buf_unlock( |
| struct xfs_buf *bp) |
| { |
| if (use_xfs_buf_lock) |
| pthread_mutex_unlock(&bp->b_lock); |
| } |
| |
| static int |
| __cache_lookup( |
| struct xfs_bufkey *key, |
| unsigned int flags, |
| struct xfs_buf **bpp) |
| { |
| struct cache_node *cn = NULL; |
| struct cache *bcache = key->buftarg->bcache; |
| struct xfs_buf *bp; |
| |
| *bpp = NULL; |
| |
| cache_node_get(bcache, key, &cn); |
| if (!cn) |
| return -ENOMEM; |
| bp = container_of(cn, struct xfs_buf, b_node); |
| |
| if (use_xfs_buf_lock) { |
| int ret; |
| |
| ret = pthread_mutex_trylock(&bp->b_lock); |
| if (ret) { |
| ASSERT(ret == EAGAIN); |
| if (flags & LIBXFS_GETBUF_TRYLOCK) { |
| cache_node_put(bcache, cn); |
| return -EAGAIN; |
| } |
| |
| if (pthread_equal(bp->b_holder, pthread_self())) { |
| fprintf(stderr, |
| _("Warning: recursive buffer locking at block %" PRIu64 " detected\n"), |
| key->blkno); |
| bp->b_recur++; |
| *bpp = bp; |
| return 0; |
| } else { |
| pthread_mutex_lock(&bp->b_lock); |
| } |
| } |
| |
| bp->b_holder = pthread_self(); |
| } |
| |
| cache_node_set_priority(bcache, cn, |
| cache_node_get_priority(cn) - CACHE_PREFETCH_PRIORITY); |
| *bpp = bp; |
| return 0; |
| } |
| |
| static int |
| libxfs_getbuf_flags( |
| struct xfs_buftarg *btp, |
| xfs_daddr_t blkno, |
| int len, |
| unsigned int flags, |
| struct xfs_buf **bpp) |
| { |
| struct xfs_bufkey key = {NULL}; |
| int ret; |
| |
| key.buftarg = btp; |
| key.blkno = blkno; |
| key.bblen = len; |
| |
| ret = __cache_lookup(&key, flags, bpp); |
| if (ret) |
| return ret; |
| |
| if (btp == btp->bt_mount->m_ddev_targp) { |
| (*bpp)->b_pag = xfs_perag_get(btp->bt_mount, |
| xfs_daddr_to_agno(btp->bt_mount, blkno)); |
| } |
| |
| return 0; |
| } |
| |
| /* |
| * Clean the buffer flags for libxfs_getbuf*(), which wants to return |
| * an unused buffer with clean state. This prevents CRC errors on a |
| * re-read of a corrupt block that was prefetched and freed. This |
| * can happen with a massively corrupt directory that is discarded, |
| * but whose blocks are then recycled into expanding lost+found. |
| * |
| * Note however that if the buffer's dirty (prefetch calls getbuf) |
| * we'll leave the state alone because we don't want to discard blocks |
| * that have been fixed. |
| */ |
| static void |
| reset_buf_state( |
| struct xfs_buf *bp) |
| { |
| if (bp && !(bp->b_flags & LIBXFS_B_DIRTY)) |
| bp->b_flags &= ~(LIBXFS_B_UNCHECKED | LIBXFS_B_STALE | |
| LIBXFS_B_UPTODATE); |
| } |
| |
| static int |
| __libxfs_buf_get_map( |
| struct xfs_buftarg *btp, |
| struct xfs_buf_map *map, |
| int nmaps, |
| int flags, |
| struct xfs_buf **bpp) |
| { |
| struct xfs_bufkey key = {NULL}; |
| int i; |
| |
| if (nmaps == 1) |
| return libxfs_getbuf_flags(btp, map[0].bm_bn, map[0].bm_len, |
| flags, bpp); |
| |
| key.buftarg = btp; |
| key.blkno = map[0].bm_bn; |
| for (i = 0; i < nmaps; i++) { |
| key.bblen += map[i].bm_len; |
| } |
| key.map = map; |
| key.nmaps = nmaps; |
| |
| return __cache_lookup(&key, flags, bpp); |
| } |
| |
| int |
| libxfs_buf_get_map( |
| struct xfs_buftarg *btp, |
| struct xfs_buf_map *map, |
| int nmaps, |
| int flags, |
| struct xfs_buf **bpp) |
| { |
| int error; |
| |
| error = __libxfs_buf_get_map(btp, map, nmaps, flags, bpp); |
| if (error) |
| return error; |
| |
| reset_buf_state(*bpp); |
| return 0; |
| } |
| |
| void |
| libxfs_buf_relse( |
| struct xfs_buf *bp) |
| { |
| /* |
| * ensure that any errors on this use of the buffer don't carry |
| * over to the next user. |
| */ |
| bp->b_error = 0; |
| if (use_xfs_buf_lock) { |
| if (bp->b_recur) { |
| bp->b_recur--; |
| } else { |
| bp->b_holder = 0; |
| pthread_mutex_unlock(&bp->b_lock); |
| } |
| } |
| |
| if (!list_empty(&bp->b_node.cn_hash)) |
| cache_node_put(bp->b_target->bcache, &bp->b_node); |
| else if (--bp->b_node.cn_count == 0) { |
| if (bp->b_flags & LIBXFS_B_DIRTY) |
| libxfs_bwrite(bp); |
| libxfs_brelse(&bp->b_node); |
| } |
| } |
| |
| static struct cache_node * |
| libxfs_balloc( |
| cache_key_t key) |
| { |
| struct xfs_bufkey *bufkey = (struct xfs_bufkey *)key; |
| struct xfs_buf *bp; |
| |
| if (bufkey->map) |
| bp = libxfs_getbufr_map(bufkey->buftarg, bufkey->blkno, |
| bufkey->bblen, bufkey->map, bufkey->nmaps); |
| else |
| bp = libxfs_getbufr(bufkey->buftarg, bufkey->blkno, |
| bufkey->bblen); |
| return &bp->b_node; |
| } |
| |
| static int |
| __read_buf(int fd, void *buf, int len, off_t offset, int flags) |
| { |
| int sts; |
| |
| sts = pread(fd, buf, len, offset); |
| if (sts < 0) { |
| int error = errno; |
| fprintf(stderr, _("%s: read failed: %s\n"), |
| progname, strerror(error)); |
| return -error; |
| } else if (sts != len) { |
| fprintf(stderr, _("%s: error - read only %d of %d bytes\n"), |
| progname, sts, len); |
| return -EIO; |
| } |
| return 0; |
| } |
| |
| int |
| libxfs_readbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, struct xfs_buf *bp, |
| int len, int flags) |
| { |
| int fd = btp->bt_bdev_fd; |
| int bytes = BBTOB(len); |
| int error; |
| |
| ASSERT(len <= bp->b_length); |
| |
| if (xfs_buftarg_is_mem(btp)) |
| return 0; |
| |
| error = __read_buf(fd, bp->b_addr, bytes, LIBXFS_BBTOOFF64(blkno), flags); |
| if (!error && |
| bp->b_target == btp && |
| bp->b_cache_key == blkno && |
| bp->b_length == len) |
| bp->b_flags |= LIBXFS_B_UPTODATE; |
| bp->b_error = error; |
| return error; |
| } |
| |
| int |
| libxfs_readbuf_verify( |
| struct xfs_buf *bp, |
| const struct xfs_buf_ops *ops) |
| { |
| if (!ops) |
| return bp->b_error; |
| |
| bp->b_ops = ops; |
| bp->b_ops->verify_read(bp); |
| bp->b_flags &= ~LIBXFS_B_UNCHECKED; |
| return bp->b_error; |
| } |
| |
| int |
| libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags) |
| { |
| int fd = btp->bt_bdev_fd; |
| int error = 0; |
| void *buf; |
| int i; |
| |
| if (xfs_buftarg_is_mem(btp)) |
| return 0; |
| |
| buf = bp->b_addr; |
| for (i = 0; i < bp->b_nmaps; i++) { |
| off_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn); |
| int len = BBTOB(bp->b_maps[i].bm_len); |
| |
| error = __read_buf(fd, buf, len, offset, flags); |
| if (error) { |
| bp->b_error = error; |
| break; |
| } |
| buf += len; |
| } |
| |
| if (!error) |
| bp->b_flags |= LIBXFS_B_UPTODATE; |
| return error; |
| } |
| |
| int |
| libxfs_buf_read_map( |
| struct xfs_buftarg *btp, |
| struct xfs_buf_map *map, |
| int nmaps, |
| int flags, |
| struct xfs_buf **bpp, |
| const struct xfs_buf_ops *ops) |
| { |
| struct xfs_buf *bp; |
| bool salvage = flags & LIBXFS_READBUF_SALVAGE; |
| int error = 0; |
| |
| *bpp = NULL; |
| if (nmaps == 1) |
| error = libxfs_getbuf_flags(btp, map[0].bm_bn, map[0].bm_len, |
| 0, &bp); |
| else |
| error = __libxfs_buf_get_map(btp, map, nmaps, 0, &bp); |
| if (error) |
| return error; |
| |
| /* |
| * If the buffer was prefetched, it is likely that it was not validated. |
| * Hence if we are supplied an ops function and the buffer is marked as |
| * unchecked, we need to validate it now. |
| * |
| * We do this verification even if the buffer is dirty - the |
| * verification is almost certainly going to fail the CRC check in this |
| * case as a dirty buffer has not had the CRC recalculated. However, we |
| * should not be dirtying unchecked buffers and therefore failing it |
| * here because it's dirty and unchecked indicates we've screwed up |
| * somewhere else. |
| * |
| * Note that if the caller passes in LIBXFS_READBUF_SALVAGE, that means |
| * they want the buffer even if it fails verification. |
| */ |
| bp->b_error = 0; |
| if (bp->b_flags & (LIBXFS_B_UPTODATE | LIBXFS_B_DIRTY)) { |
| if (bp->b_flags & LIBXFS_B_UNCHECKED) |
| error = libxfs_readbuf_verify(bp, ops); |
| if (error && !salvage) |
| goto err; |
| goto ok; |
| } |
| |
| /* |
| * Set the ops on a cache miss (i.e. first physical read) as the |
| * verifier may change the ops to match the type of buffer it contains. |
| * A cache hit might reset the verifier to the original type if we set |
| * it again, but it won't get called again and set to match the buffer |
| * contents. *cough* xfs_da_node_buf_ops *cough*. |
| */ |
| if (nmaps == 1) |
| error = libxfs_readbufr(btp, map[0].bm_bn, bp, map[0].bm_len, |
| flags); |
| else |
| error = libxfs_readbufr_map(btp, bp, flags); |
| if (error) |
| goto err; |
| |
| error = libxfs_readbuf_verify(bp, ops); |
| if (error && !salvage) |
| goto err; |
| |
| ok: |
| *bpp = bp; |
| return 0; |
| err: |
| libxfs_buf_relse(bp); |
| return error; |
| } |
| |
| /* Allocate a raw uncached buffer. */ |
| static inline struct xfs_buf * |
| libxfs_getbufr_uncached( |
| struct xfs_buftarg *targ, |
| xfs_daddr_t daddr, |
| size_t bblen) |
| { |
| struct xfs_buf *bp; |
| |
| bp = libxfs_getbufr(targ, daddr, bblen); |
| if (!bp) |
| return NULL; |
| |
| INIT_LIST_HEAD(&bp->b_node.cn_hash); |
| bp->b_node.cn_count = 1; |
| return bp; |
| } |
| |
| /* |
| * Allocate an uncached buffer that points nowhere. The refcount will be 1, |
| * and the cache node hash list will be empty to indicate that it's uncached. |
| */ |
| int |
| libxfs_buf_get_uncached( |
| struct xfs_buftarg *targ, |
| size_t bblen, |
| int flags, |
| struct xfs_buf **bpp) |
| { |
| *bpp = libxfs_getbufr_uncached(targ, XFS_BUF_DADDR_NULL, bblen); |
| return *bpp != NULL ? 0 : -ENOMEM; |
| } |
| |
| /* |
| * Allocate and read an uncached buffer. The refcount will be 1, and the cache |
| * node hash list will be empty to indicate that it's uncached. |
| */ |
| int |
| libxfs_buf_read_uncached( |
| struct xfs_buftarg *targ, |
| xfs_daddr_t daddr, |
| size_t bblen, |
| int flags, |
| struct xfs_buf **bpp, |
| const struct xfs_buf_ops *ops) |
| { |
| struct xfs_buf *bp; |
| int error; |
| |
| *bpp = NULL; |
| bp = libxfs_getbufr_uncached(targ, daddr, bblen); |
| if (!bp) |
| return -ENOMEM; |
| |
| error = libxfs_readbufr(targ, daddr, bp, bblen, flags); |
| if (error) |
| goto err; |
| |
| error = libxfs_readbuf_verify(bp, ops); |
| if (error) |
| goto err; |
| |
| *bpp = bp; |
| return 0; |
| err: |
| libxfs_buf_relse(bp); |
| return error; |
| } |
| |
| static int |
| __write_buf(int fd, void *buf, int len, off_t offset, int flags) |
| { |
| int sts; |
| |
| sts = pwrite(fd, buf, len, offset); |
| if (sts < 0) { |
| int error = errno; |
| fprintf(stderr, _("%s: pwrite failed: %s\n"), |
| progname, strerror(error)); |
| return -error; |
| } else if (sts != len) { |
| fprintf(stderr, _("%s: error - pwrite only %d of %d bytes\n"), |
| progname, sts, len); |
| return -EIO; |
| } |
| return 0; |
| } |
| |
| int |
| libxfs_bwrite( |
| struct xfs_buf *bp) |
| { |
| int fd = bp->b_target->bt_bdev_fd; |
| |
| /* |
| * we never write buffers that are marked stale. This indicates they |
| * contain data that has been invalidated, and even if the buffer is |
| * dirty it must *never* be written. Verifiers are wonderful for finding |
| * bugs like this. Make sure the error is obvious as to the cause. |
| */ |
| if (bp->b_flags & LIBXFS_B_STALE) { |
| bp->b_error = -ESTALE; |
| return bp->b_error; |
| } |
| |
| /* Trigger the writeback hook if there is one. */ |
| if (bp->b_mount->m_buf_writeback_fn) |
| bp->b_mount->m_buf_writeback_fn(bp); |
| |
| /* |
| * clear any pre-existing error status on the buffer. This can occur if |
| * the buffer is corrupt on disk and the repair process doesn't clear |
| * the error before fixing and writing it back. |
| */ |
| bp->b_error = 0; |
| if (bp->b_ops) { |
| bp->b_ops->verify_write(bp); |
| if (bp->b_error) { |
| fprintf(stderr, |
| _("%s: write verifier failed on %s bno 0x%llx/0x%x\n"), |
| __func__, bp->b_ops->name, |
| (unsigned long long)xfs_buf_daddr(bp), |
| bp->b_length); |
| return bp->b_error; |
| } |
| } |
| |
| if (xfs_buftarg_is_mem(bp->b_target)) { |
| bp->b_error = 0; |
| } else if (!(bp->b_flags & LIBXFS_B_DISCONTIG)) { |
| bp->b_error = __write_buf(fd, bp->b_addr, BBTOB(bp->b_length), |
| LIBXFS_BBTOOFF64(xfs_buf_daddr(bp)), |
| bp->b_flags); |
| } else { |
| int i; |
| void *buf = bp->b_addr; |
| |
| for (i = 0; i < bp->b_nmaps; i++) { |
| off_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn); |
| int len = BBTOB(bp->b_maps[i].bm_len); |
| |
| bp->b_error = __write_buf(fd, buf, len, offset, |
| bp->b_flags); |
| if (bp->b_error) |
| break; |
| buf += len; |
| } |
| } |
| |
| if (bp->b_error) { |
| fprintf(stderr, |
| _("%s: write failed on %s bno 0x%llx/0x%x, err=%d\n"), |
| __func__, bp->b_ops ? bp->b_ops->name : "(unknown)", |
| (unsigned long long)xfs_buf_daddr(bp), |
| bp->b_length, -bp->b_error); |
| } else { |
| bp->b_flags |= LIBXFS_B_UPTODATE; |
| bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_UNCHECKED); |
| xfs_buftarg_trip_write(bp->b_target); |
| } |
| return bp->b_error; |
| } |
| |
| /* |
| * Mark a buffer dirty. The dirty data will be written out when the cache |
| * is flushed (or at release time if the buffer is uncached). |
| */ |
| void |
| libxfs_buf_mark_dirty( |
| struct xfs_buf *bp) |
| { |
| /* |
| * Clear any error hanging over from reading the buffer. This prevents |
| * subsequent reads after this write from seeing stale errors. |
| */ |
| bp->b_error = 0; |
| bp->b_flags &= ~LIBXFS_B_STALE; |
| bp->b_flags |= LIBXFS_B_DIRTY; |
| } |
| |
| /* Prepare a buffer to be sent to the MRU list. */ |
| static inline void |
| libxfs_buf_prepare_mru( |
| struct xfs_buf *bp) |
| { |
| if (bp->b_pag) |
| xfs_perag_put(bp->b_pag); |
| bp->b_pag = NULL; |
| |
| ASSERT(!xfs_buftarg_is_mem(btp)); |
| |
| if (!(bp->b_flags & LIBXFS_B_DIRTY)) |
| return; |
| |
| /* Complain about (and remember) dropping dirty buffers. */ |
| fprintf(stderr, _("%s: Releasing dirty buffer to free list!\n"), |
| progname); |
| |
| if (bp->b_error == -EFSCORRUPTED) |
| bp->b_target->flags |= XFS_BUFTARG_CORRUPT_WRITE; |
| bp->b_target->flags |= XFS_BUFTARG_LOST_WRITE; |
| } |
| |
| static void |
| libxfs_brelse( |
| struct cache_node *node) |
| { |
| struct xfs_buf *bp = container_of(node, struct xfs_buf, |
| b_node); |
| |
| if (!bp) |
| return; |
| libxfs_buf_prepare_mru(bp); |
| |
| pthread_mutex_lock(&xfs_buf_freelist.cm_mutex); |
| list_add(&bp->b_node.cn_mru, &xfs_buf_freelist.cm_list); |
| pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex); |
| } |
| |
| static unsigned int |
| libxfs_bulkrelse( |
| struct cache *cache, |
| struct list_head *list) |
| { |
| struct xfs_buf *bp; |
| int count = 0; |
| |
| if (list_empty(list)) |
| return 0 ; |
| |
| list_for_each_entry(bp, list, b_node.cn_mru) { |
| libxfs_buf_prepare_mru(bp); |
| count++; |
| } |
| |
| pthread_mutex_lock(&xfs_buf_freelist.cm_mutex); |
| list_splice(list, &xfs_buf_freelist.cm_list); |
| pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex); |
| |
| return count; |
| } |
| |
| /* |
| * Free everything from the xfs_buf_freelist MRU, used at final teardown |
| */ |
| void |
| libxfs_bcache_free(void) |
| { |
| struct list_head *cm_list; |
| struct xfs_buf *bp, *next; |
| |
| cm_list = &xfs_buf_freelist.cm_list; |
| list_for_each_entry_safe(bp, next, cm_list, b_node.cn_mru) { |
| free(bp->b_addr); |
| if (bp->b_maps != &bp->__b_map) |
| free(bp->b_maps); |
| kmem_cache_free(xfs_buf_cache, bp); |
| } |
| } |
| |
| /* |
| * When a buffer is marked dirty, the error is cleared. Hence if we are trying |
| * to flush a buffer prior to cache reclaim that has an error on it it means |
| * we've already tried to flush it and it failed. Prevent repeated corruption |
| * errors from being reported by skipping such buffers - when the corruption is |
| * fixed the buffer will be marked dirty again and we can write it again. |
| */ |
| static int |
| libxfs_bflush( |
| struct cache_node *node) |
| { |
| struct xfs_buf *bp = container_of(node, struct xfs_buf, |
| b_node); |
| |
| if (!bp->b_error && bp->b_flags & LIBXFS_B_DIRTY) |
| return libxfs_bwrite(bp); |
| return bp->b_error; |
| } |
| |
| void |
| libxfs_bcache_purge(struct xfs_mount *mp) |
| { |
| if (!mp) |
| return; |
| cache_purge(mp->m_ddev_targp->bcache); |
| cache_purge(mp->m_logdev_targp->bcache); |
| cache_purge(mp->m_rtdev_targp->bcache); |
| } |
| |
| void |
| libxfs_bcache_flush(struct xfs_mount *mp) |
| { |
| if (!mp) |
| return; |
| cache_flush(mp->m_ddev_targp->bcache); |
| cache_flush(mp->m_logdev_targp->bcache); |
| cache_flush(mp->m_rtdev_targp->bcache); |
| } |
| |
| int |
| libxfs_bcache_overflowed(struct xfs_mount *mp) |
| { |
| return cache_overflowed(mp->m_ddev_targp->bcache) || |
| cache_overflowed(mp->m_logdev_targp->bcache) || |
| cache_overflowed(mp->m_rtdev_targp->bcache); |
| } |
| |
| struct cache_operations libxfs_bcache_operations = { |
| .hash = libxfs_bhash, |
| .alloc = libxfs_balloc, |
| .flush = libxfs_bflush, |
| .relse = libxfs_brelse, |
| .compare = libxfs_bcompare, |
| .bulkrelse = libxfs_bulkrelse |
| }; |
| |
| /* |
| * Verify an on-disk magic value against the magic value specified in the |
| * verifier structure. The verifier magic is in disk byte order so the caller is |
| * expected to pass the value directly from disk. |
| */ |
| bool |
| xfs_verify_magic( |
| struct xfs_buf *bp, |
| __be32 dmagic) |
| { |
| struct xfs_mount *mp = bp->b_mount; |
| int idx; |
| |
| idx = xfs_has_crc(mp); |
| if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx]))) |
| return false; |
| return dmagic == bp->b_ops->magic[idx]; |
| } |
| |
| /* |
| * Verify an on-disk magic value against the magic value specified in the |
| * verifier structure. The verifier magic is in disk byte order so the caller is |
| * expected to pass the value directly from disk. |
| */ |
| bool |
| xfs_verify_magic16( |
| struct xfs_buf *bp, |
| __be16 dmagic) |
| { |
| struct xfs_mount *mp = bp->b_mount; |
| int idx; |
| |
| idx = xfs_has_crc(mp); |
| if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx]))) |
| return false; |
| return dmagic == bp->b_ops->magic16[idx]; |
| } |
| |
| /* |
| * Inode cache stubs. |
| */ |
| |
| struct kmem_cache *xfs_inode_cache; |
| extern struct kmem_cache *xfs_ili_cache; |
| |
| int |
| libxfs_iget( |
| struct xfs_mount *mp, |
| struct xfs_trans *tp, |
| xfs_ino_t ino, |
| uint lock_flags, |
| struct xfs_inode **ipp) |
| { |
| struct xfs_inode *ip; |
| struct xfs_buf *bp; |
| struct xfs_perag *pag; |
| int error = 0; |
| |
| /* reject inode numbers outside existing AGs */ |
| if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) |
| return -EINVAL; |
| |
| ip = kmem_cache_zalloc(xfs_inode_cache, 0); |
| if (!ip) |
| return -ENOMEM; |
| |
| VFS_I(ip)->i_count = 1; |
| ip->i_ino = ino; |
| ip->i_mount = mp; |
| ip->i_af.if_format = XFS_DINODE_FMT_EXTENTS; |
| spin_lock_init(&VFS_I(ip)->i_lock); |
| |
| pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); |
| error = xfs_imap(pag, tp, ip->i_ino, &ip->i_imap, 0); |
| xfs_perag_put(pag); |
| |
| if (error) |
| goto out_destroy; |
| |
| error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp); |
| if (error) |
| goto out_destroy; |
| |
| error = xfs_inode_from_disk(ip, |
| xfs_buf_offset(bp, ip->i_imap.im_boffset)); |
| if (!error) |
| xfs_buf_set_ref(bp, XFS_INO_REF); |
| xfs_trans_brelse(tp, bp); |
| |
| if (error) |
| goto out_destroy; |
| |
| *ipp = ip; |
| return 0; |
| |
| out_destroy: |
| kmem_cache_free(xfs_inode_cache, ip); |
| *ipp = NULL; |
| return error; |
| } |
| |
| static void |
| libxfs_idestroy(xfs_inode_t *ip) |
| { |
| switch (VFS_I(ip)->i_mode & S_IFMT) { |
| case S_IFREG: |
| case S_IFDIR: |
| case S_IFLNK: |
| libxfs_idestroy_fork(&ip->i_df); |
| break; |
| } |
| |
| libxfs_ifork_zap_attr(ip); |
| |
| if (ip->i_cowfp) { |
| libxfs_idestroy_fork(ip->i_cowfp); |
| kmem_cache_free(xfs_ifork_cache, ip->i_cowfp); |
| } |
| } |
| |
| void |
| libxfs_irele( |
| struct xfs_inode *ip) |
| { |
| VFS_I(ip)->i_count--; |
| |
| if (VFS_I(ip)->i_count == 0) { |
| ASSERT(ip->i_itemp == NULL); |
| libxfs_idestroy(ip); |
| kmem_cache_free(xfs_inode_cache, ip); |
| } |
| } |
| |
| /* |
| * Flush everything dirty in the kernel and disk write caches to stable media. |
| * Returns 0 for success or a negative error code. |
| */ |
| int |
| libxfs_blkdev_issue_flush( |
| struct xfs_buftarg *btp) |
| { |
| int ret; |
| |
| if (btp->bt_bdev == 0) |
| return 0; |
| |
| ret = platform_flush_device(btp->bt_bdev_fd, btp->bt_bdev); |
| return ret ? -errno : 0; |
| } |
| |
| /* |
| * Write out a buffer list synchronously. |
| * |
| * This will take the @buffer_list, write all buffers out and wait for I/O |
| * completion on all of the buffers. @buffer_list is consumed by the function, |
| * so callers must have some other way of tracking buffers if they require such |
| * functionality. |
| */ |
| int |
| xfs_buf_delwri_submit( |
| struct list_head *buffer_list) |
| { |
| struct xfs_buf *bp, *n; |
| int error = 0, error2; |
| |
| list_for_each_entry_safe(bp, n, buffer_list, b_list) { |
| list_del_init(&bp->b_list); |
| error2 = libxfs_bwrite(bp); |
| if (!error) |
| error = error2; |
| libxfs_buf_relse(bp); |
| } |
| |
| return error; |
| } |
| |
| /* |
| * Cancel a delayed write list. |
| * |
| * Remove each buffer from the list, clear the delwri queue flag and drop the |
| * associated buffer reference. |
| */ |
| void |
| xfs_buf_delwri_cancel( |
| struct list_head *list) |
| { |
| struct xfs_buf *bp; |
| |
| while (!list_empty(list)) { |
| bp = list_first_entry(list, struct xfs_buf, b_list); |
| |
| list_del_init(&bp->b_list); |
| libxfs_buf_relse(bp); |
| } |
| } |
| |
| /* |
| * Format the log. The caller provides either a buftarg which is used to access |
| * the log via buffers or a direct pointer to a buffer that encapsulates the |
| * entire log. |
| */ |
| int |
| libxfs_log_clear( |
| struct xfs_buftarg *btp, |
| char *dptr, |
| xfs_daddr_t start, |
| uint length, /* basic blocks */ |
| uuid_t *fs_uuid, |
| int version, |
| int sunit, /* bytes */ |
| int fmt, |
| int cycle, |
| bool max) |
| { |
| struct xfs_buf *bp = NULL; |
| int len; |
| xfs_lsn_t lsn; |
| xfs_lsn_t tail_lsn; |
| xfs_daddr_t blk; |
| xfs_daddr_t end_blk; |
| char *ptr; |
| |
| if (((btp && dptr) || (!btp && !dptr)) || |
| (btp && !btp->bt_bdev) || !fs_uuid) |
| return -EINVAL; |
| |
| /* first zero the log */ |
| if (btp) |
| libxfs_device_zero(btp, start, length); |
| else |
| memset(dptr, 0, BBTOB(length)); |
| |
| /* |
| * Initialize the log record length and LSNs. XLOG_INIT_CYCLE is a |
| * special reset case where we only write a single record where the lsn |
| * and tail_lsn match. Otherwise, the record lsn starts at block 0 of |
| * the specified cycle and points tail_lsn at the last record of the |
| * previous cycle. |
| */ |
| len = ((version == 2) && sunit) ? BTOBB(sunit) : 2; |
| len = max(len, 2); |
| lsn = xlog_assign_lsn(cycle, 0); |
| if (cycle == XLOG_INIT_CYCLE) |
| tail_lsn = lsn; |
| else |
| tail_lsn = xlog_assign_lsn(cycle - 1, length - len); |
| |
| /* write out the first log record */ |
| ptr = dptr; |
| if (btp) { |
| bp = libxfs_getbufr_uncached(btp, start, len); |
| ptr = bp->b_addr; |
| } |
| libxfs_log_header(ptr, fs_uuid, version, sunit, fmt, lsn, tail_lsn, |
| next, bp); |
| if (bp) { |
| libxfs_buf_mark_dirty(bp); |
| libxfs_buf_relse(bp); |
| } |
| |
| /* |
| * There's nothing else to do if this is a log reset. The kernel detects |
| * the rest of the log is zeroed and starts at cycle 1. |
| */ |
| if (cycle == XLOG_INIT_CYCLE) |
| return 0; |
| |
| /* |
| * Bump the record size for a full log format if the caller allows it. |
| * This is primarily for performance reasons and most callers don't care |
| * about record size since the log is clean after we're done. |
| */ |
| if (max) |
| len = BTOBB(BDSTRAT_SIZE); |
| |
| /* |
| * Otherwise, fill everything beyond the initial record with records of |
| * the previous cycle so the kernel head/tail detection works correctly. |
| * |
| * We don't particularly care about the record size or content here. |
| * It's only important that the headers are in place such that the |
| * kernel finds 1.) a clean log and 2.) the correct current cycle value. |
| * Therefore, bump up the record size to the max to use larger I/Os and |
| * improve performance. |
| */ |
| cycle--; |
| blk = start + len; |
| if (dptr) |
| dptr += BBTOB(len); |
| end_blk = start + length; |
| |
| len = min(end_blk - blk, len); |
| while (blk < end_blk) { |
| lsn = xlog_assign_lsn(cycle, blk - start); |
| tail_lsn = xlog_assign_lsn(cycle, blk - start - len); |
| |
| ptr = dptr; |
| if (btp) { |
| bp = libxfs_getbufr_uncached(btp, blk, len); |
| ptr = bp->b_addr; |
| } |
| /* |
| * Note: pass the full buffer length as the sunit to initialize |
| * the entire buffer. |
| */ |
| libxfs_log_header(ptr, fs_uuid, version, BBTOB(len), fmt, lsn, |
| tail_lsn, next, bp); |
| if (bp) { |
| libxfs_buf_mark_dirty(bp); |
| libxfs_buf_relse(bp); |
| } |
| |
| blk += len; |
| if (dptr) |
| dptr += BBTOB(len); |
| len = min(end_blk - blk, len); |
| } |
| |
| return 0; |
| } |
| |
| int |
| libxfs_log_header( |
| char *caddr, |
| uuid_t *fs_uuid, |
| int version, |
| int sunit, |
| int fmt, |
| xfs_lsn_t lsn, |
| xfs_lsn_t tail_lsn, |
| libxfs_get_block_t *nextfunc, |
| void *private) |
| { |
| xlog_rec_header_t *head = (xlog_rec_header_t *)caddr; |
| char *p = caddr; |
| __be32 cycle_lsn; |
| int i, len; |
| int hdrs = 1; |
| |
| if (lsn == NULLCOMMITLSN) |
| lsn = xlog_assign_lsn(XLOG_INIT_CYCLE, 0); |
| if (tail_lsn == NULLCOMMITLSN) |
| tail_lsn = lsn; |
| |
| len = ((version == 2) && sunit) ? BTOBB(sunit) : 1; |
| |
| memset(p, 0, BBSIZE); |
| head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); |
| head->h_cycle = cpu_to_be32(CYCLE_LSN(lsn)); |
| head->h_version = cpu_to_be32(version); |
| head->h_crc = cpu_to_le32(0); |
| head->h_prev_block = cpu_to_be32(-1); |
| head->h_num_logops = cpu_to_be32(1); |
| head->h_fmt = cpu_to_be32(fmt); |
| head->h_size = cpu_to_be32(max(sunit, XLOG_BIG_RECORD_BSIZE)); |
| |
| head->h_lsn = cpu_to_be64(lsn); |
| head->h_tail_lsn = cpu_to_be64(tail_lsn); |
| |
| memcpy(&head->h_fs_uuid, fs_uuid, sizeof(uuid_t)); |
| |
| /* |
| * The kernel expects to see either a log record header magic value or |
| * the LSN cycle at the top of every log block. The first word of each |
| * non-header block is copied to the record headers and replaced with |
| * the cycle value (see xlog_[un]pack_data() and xlog_get_cycle() for |
| * details). |
| * |
| * Even though we only ever write an unmount record (one block), we |
| * support writing log records up to the max log buffer size of 256k to |
| * improve log format performance. This means a record can require up |
| * to 8 headers (1 rec. header + 7 ext. headers) for the packed cycle |
| * data (each header supports 32k of data). |
| */ |
| cycle_lsn = CYCLE_LSN_DISK(head->h_lsn); |
| if (version == 2 && sunit > XLOG_HEADER_CYCLE_SIZE) { |
| hdrs = sunit / XLOG_HEADER_CYCLE_SIZE; |
| if (sunit % XLOG_HEADER_CYCLE_SIZE) |
| hdrs++; |
| } |
| |
| /* |
| * A fixed number of extended headers is expected based on h_size. If |
| * required, format those now so the unmount record is located |
| * correctly. |
| * |
| * Since we only write an unmount record, we only need one h_cycle_data |
| * entry for the unmount record block. The subsequent record data |
| * blocks are zeroed, which means we can stamp them directly with the |
| * cycle and zero the rest of the cycle data in the extended headers. |
| */ |
| if (hdrs > 1) { |
| for (i = 1; i < hdrs; i++) { |
| p = nextfunc(p, BBSIZE, private); |
| memset(p, 0, BBSIZE); |
| /* xlog_rec_ext_header.xh_cycle */ |
| *(__be32 *)p = cycle_lsn; |
| } |
| } |
| |
| /* |
| * The total length is the max of the stripe unit or 2 basic block |
| * minimum (1 hdr blk + 1 data blk). The record length is the total |
| * minus however many header blocks are required. |
| */ |
| head->h_len = cpu_to_be32(max(BBTOB(2), sunit) - hdrs * BBSIZE); |
| |
| /* |
| * Write out the unmount record, pack the first word into the record |
| * header and stamp the block with the cycle. |
| */ |
| p = nextfunc(p, BBSIZE, private); |
| unmount_record(p); |
| |
| head->h_cycle_data[0] = *(__be32 *)p; |
| *(__be32 *)p = cycle_lsn; |
| |
| /* |
| * Finally, zero all remaining blocks in the record and stamp each with |
| * the cycle. We don't need to pack any of these blocks because the |
| * cycle data in the headers has already been zeroed. |
| */ |
| len = max(len, hdrs + 1); |
| for (i = hdrs + 1; i < len; i++) { |
| p = nextfunc(p, BBSIZE, private); |
| memset(p, 0, BBSIZE); |
| *(__be32 *)p = cycle_lsn; |
| } |
| |
| return BBTOB(len); |
| } |
| |
| void |
| libxfs_buf_set_priority( |
| struct xfs_buf *bp, |
| int priority) |
| { |
| cache_node_set_priority(bp->b_target->bcache, &bp->b_node, priority); |
| } |
| |
| int |
| libxfs_buf_priority( |
| struct xfs_buf *bp) |
| { |
| return cache_node_get_priority(&bp->b_node); |
| } |
| |
| /* |
| * Log a message about and stale a buffer that a caller has decided is corrupt. |
| * |
| * This function should be called for the kinds of metadata corruption that |
| * cannot be detect from a verifier, such as incorrect inter-block relationship |
| * data. Do /not/ call this function from a verifier function. |
| * |
| * The buffer must be XBF_DONE prior to the call. Afterwards, the buffer will |
| * be marked stale, but b_error will not be set. The caller is responsible for |
| * releasing the buffer or fixing it. |
| */ |
| void |
| __xfs_buf_mark_corrupt( |
| struct xfs_buf *bp, |
| xfs_failaddr_t fa) |
| { |
| ASSERT(bp->b_flags & XBF_DONE); |
| |
| xfs_buf_corruption_error(bp, fa); |
| xfs_buf_stale(bp); |
| } |