| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * Copyright (c) 2000-2006 Silicon Graphics, Inc. |
| * All Rights Reserved. |
| */ |
| |
| |
| #include "libxfs_priv.h" |
| #include "init.h" |
| #include "xfs_fs.h" |
| #include "xfs_shared.h" |
| #include "xfs_format.h" |
| #include "xfs_log_format.h" |
| #include "xfs_trans_resv.h" |
| #include "xfs_mount.h" |
| #include "xfs_inode_buf.h" |
| #include "xfs_inode_fork.h" |
| #include "xfs_inode.h" |
| #include "xfs_trans.h" |
| |
| #include "libxfs.h" /* for LIBXFS_EXIT_ON_FAILURE */ |
| |
| /* |
| * Important design/architecture note: |
| * |
| * The userspace code that uses the buffer cache is much less constrained than |
| * the kernel code. The userspace code is pretty nasty in places, especially |
| * when it comes to buffer error handling. Very little of the userspace code |
| * outside libxfs clears bp->b_error - very little code even checks it - so the |
| * libxfs code is tripping on stale errors left by the userspace code. |
| * |
| * We can't clear errors or zero buffer contents in libxfs_getbuf-* like we do |
| * in the kernel, because those functions are used by the libxfs_readbuf_* |
| * functions and hence need to leave the buffers unchanged on cache hits. This |
| * is actually the only way to gather a write error from a libxfs_writebuf() |
| * call - you need to get the buffer again so you can check bp->b_error field - |
| * assuming that the buffer is still in the cache when you check, that is. |
| * |
| * This is very different to the kernel code which does not release buffers on a |
| * write so we can wait on IO and check errors. The kernel buffer cache also |
| * guarantees a buffer of a known initial state from xfs_buf_get() even on a |
| * cache hit. |
| * |
| * IOWs, userspace is behaving quite differently to the kernel and as a result |
| * it leaks errors from reads, invalidations and writes through |
| * libxfs_getbuf/libxfs_readbuf. |
| * |
| * The result of this is that until the userspace code outside libxfs is cleaned |
| * up, functions that release buffers from userspace control (i.e |
| * libxfs_writebuf/libxfs_putbuf) need to zero bp->b_error to prevent |
| * propagation of stale errors into future buffer operations. |
| */ |
| |
| #define BDSTRAT_SIZE (256 * 1024) |
| |
| #define IO_BCOMPARE_CHECK |
| |
| /* XXX: (dgc) Propagate errors, only exit if fail-on-error flag set */ |
| int |
| libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len) |
| { |
| xfs_off_t start_offset, end_offset, offset; |
| ssize_t zsize, bytes; |
| char *z; |
| int fd; |
| |
| zsize = min(BDSTRAT_SIZE, BBTOB(len)); |
| if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) { |
| fprintf(stderr, |
| _("%s: %s can't memalign %d bytes: %s\n"), |
| progname, __FUNCTION__, (int)zsize, strerror(errno)); |
| exit(1); |
| } |
| memset(z, 0, zsize); |
| |
| fd = libxfs_device_to_fd(btp->dev); |
| start_offset = LIBXFS_BBTOOFF64(start); |
| |
| if ((lseek(fd, start_offset, SEEK_SET)) < 0) { |
| fprintf(stderr, _("%s: %s seek to offset %llu failed: %s\n"), |
| progname, __FUNCTION__, |
| (unsigned long long)start_offset, strerror(errno)); |
| exit(1); |
| } |
| |
| end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset; |
| for (offset = 0; offset < end_offset; ) { |
| bytes = min((ssize_t)(end_offset - offset), zsize); |
| if ((bytes = write(fd, z, bytes)) < 0) { |
| fprintf(stderr, _("%s: %s write failed: %s\n"), |
| progname, __FUNCTION__, strerror(errno)); |
| exit(1); |
| } else if (bytes == 0) { |
| fprintf(stderr, _("%s: %s not progressing?\n"), |
| progname, __FUNCTION__); |
| exit(1); |
| } |
| offset += bytes; |
| } |
| free(z); |
| return 0; |
| } |
| |
| static void unmount_record(void *p) |
| { |
| xlog_op_header_t *op = (xlog_op_header_t *)p; |
| /* the data section must be 32 bit size aligned */ |
| struct { |
| uint16_t magic; |
| uint16_t pad1; |
| uint32_t pad2; /* may as well make it 64 bits */ |
| } magic = { XLOG_UNMOUNT_TYPE, 0, 0 }; |
| |
| memset(p, 0, BBSIZE); |
| /* dummy tid to mark this as written from userspace */ |
| op->oh_tid = cpu_to_be32(0xb0c0d0d0); |
| op->oh_len = cpu_to_be32(sizeof(magic)); |
| op->oh_clientid = XFS_LOG; |
| op->oh_flags = XLOG_UNMOUNT_TRANS; |
| op->oh_res2 = 0; |
| |
| /* and the data for this op */ |
| memcpy((char *)p + sizeof(xlog_op_header_t), &magic, sizeof(magic)); |
| } |
| |
| static char *next( |
| char *ptr, |
| int offset, |
| void *private) |
| { |
| struct xfs_buf *buf = (struct xfs_buf *)private; |
| |
| if (buf && |
| (buf->b_bcount < (int)(ptr - (char *)buf->b_addr) + offset)) |
| abort(); |
| |
| return ptr + offset; |
| } |
| |
| /* |
| * Format the log. The caller provides either a buftarg which is used to access |
| * the log via buffers or a direct pointer to a buffer that encapsulates the |
| * entire log. |
| */ |
| int |
| libxfs_log_clear( |
| struct xfs_buftarg *btp, |
| char *dptr, |
| xfs_daddr_t start, |
| uint length, /* basic blocks */ |
| uuid_t *fs_uuid, |
| int version, |
| int sunit, /* bytes */ |
| int fmt, |
| int cycle, |
| bool max) |
| { |
| struct xfs_buf *bp = NULL; |
| int len; |
| xfs_lsn_t lsn; |
| xfs_lsn_t tail_lsn; |
| xfs_daddr_t blk; |
| xfs_daddr_t end_blk; |
| char *ptr; |
| |
| if (((btp && dptr) || (!btp && !dptr)) || |
| (btp && !btp->dev) || !fs_uuid) |
| return -EINVAL; |
| |
| /* first zero the log */ |
| if (btp) |
| libxfs_device_zero(btp, start, length); |
| else |
| memset(dptr, 0, BBTOB(length)); |
| |
| /* |
| * Initialize the log record length and LSNs. XLOG_INIT_CYCLE is a |
| * special reset case where we only write a single record where the lsn |
| * and tail_lsn match. Otherwise, the record lsn starts at block 0 of |
| * the specified cycle and points tail_lsn at the last record of the |
| * previous cycle. |
| */ |
| len = ((version == 2) && sunit) ? BTOBB(sunit) : 2; |
| len = max(len, 2); |
| lsn = xlog_assign_lsn(cycle, 0); |
| if (cycle == XLOG_INIT_CYCLE) |
| tail_lsn = lsn; |
| else |
| tail_lsn = xlog_assign_lsn(cycle - 1, length - len); |
| |
| /* write out the first log record */ |
| ptr = dptr; |
| if (btp) { |
| bp = libxfs_getbufr(btp, start, len); |
| ptr = bp->b_addr; |
| } |
| libxfs_log_header(ptr, fs_uuid, version, sunit, fmt, lsn, tail_lsn, |
| next, bp); |
| if (bp) { |
| bp->b_flags |= LIBXFS_B_DIRTY; |
| libxfs_putbufr(bp); |
| } |
| |
| /* |
| * There's nothing else to do if this is a log reset. The kernel detects |
| * the rest of the log is zeroed and starts at cycle 1. |
| */ |
| if (cycle == XLOG_INIT_CYCLE) |
| return 0; |
| |
| /* |
| * Bump the record size for a full log format if the caller allows it. |
| * This is primarily for performance reasons and most callers don't care |
| * about record size since the log is clean after we're done. |
| */ |
| if (max) |
| len = BTOBB(BDSTRAT_SIZE); |
| |
| /* |
| * Otherwise, fill everything beyond the initial record with records of |
| * the previous cycle so the kernel head/tail detection works correctly. |
| * |
| * We don't particularly care about the record size or content here. |
| * It's only important that the headers are in place such that the |
| * kernel finds 1.) a clean log and 2.) the correct current cycle value. |
| * Therefore, bump up the record size to the max to use larger I/Os and |
| * improve performance. |
| */ |
| cycle--; |
| blk = start + len; |
| if (dptr) |
| dptr += BBTOB(len); |
| end_blk = start + length; |
| |
| len = min(end_blk - blk, len); |
| while (blk < end_blk) { |
| lsn = xlog_assign_lsn(cycle, blk - start); |
| tail_lsn = xlog_assign_lsn(cycle, blk - start - len); |
| |
| ptr = dptr; |
| if (btp) { |
| bp = libxfs_getbufr(btp, blk, len); |
| ptr = bp->b_addr; |
| } |
| /* |
| * Note: pass the full buffer length as the sunit to initialize |
| * the entire buffer. |
| */ |
| libxfs_log_header(ptr, fs_uuid, version, BBTOB(len), fmt, lsn, |
| tail_lsn, next, bp); |
| if (bp) { |
| bp->b_flags |= LIBXFS_B_DIRTY; |
| libxfs_putbufr(bp); |
| } |
| |
| blk += len; |
| if (dptr) |
| dptr += BBTOB(len); |
| len = min(end_blk - blk, len); |
| } |
| |
| return 0; |
| } |
| |
| int |
| libxfs_log_header( |
| char *caddr, |
| uuid_t *fs_uuid, |
| int version, |
| int sunit, |
| int fmt, |
| xfs_lsn_t lsn, |
| xfs_lsn_t tail_lsn, |
| libxfs_get_block_t *nextfunc, |
| void *private) |
| { |
| xlog_rec_header_t *head = (xlog_rec_header_t *)caddr; |
| char *p = caddr; |
| __be32 cycle_lsn; |
| int i, len; |
| int hdrs = 1; |
| |
| if (lsn == NULLCOMMITLSN) |
| lsn = xlog_assign_lsn(XLOG_INIT_CYCLE, 0); |
| if (tail_lsn == NULLCOMMITLSN) |
| tail_lsn = lsn; |
| |
| len = ((version == 2) && sunit) ? BTOBB(sunit) : 1; |
| |
| memset(p, 0, BBSIZE); |
| head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); |
| head->h_cycle = cpu_to_be32(CYCLE_LSN(lsn)); |
| head->h_version = cpu_to_be32(version); |
| head->h_crc = cpu_to_le32(0); |
| head->h_prev_block = cpu_to_be32(-1); |
| head->h_num_logops = cpu_to_be32(1); |
| head->h_fmt = cpu_to_be32(fmt); |
| head->h_size = cpu_to_be32(max(sunit, XLOG_BIG_RECORD_BSIZE)); |
| |
| head->h_lsn = cpu_to_be64(lsn); |
| head->h_tail_lsn = cpu_to_be64(tail_lsn); |
| |
| memcpy(&head->h_fs_uuid, fs_uuid, sizeof(uuid_t)); |
| |
| /* |
| * The kernel expects to see either a log record header magic value or |
| * the LSN cycle at the top of every log block. The first word of each |
| * non-header block is copied to the record headers and replaced with |
| * the cycle value (see xlog_[un]pack_data() and xlog_get_cycle() for |
| * details). |
| * |
| * Even though we only ever write an unmount record (one block), we |
| * support writing log records up to the max log buffer size of 256k to |
| * improve log format performance. This means a record can require up |
| * to 8 headers (1 rec. header + 7 ext. headers) for the packed cycle |
| * data (each header supports 32k of data). |
| */ |
| cycle_lsn = CYCLE_LSN_DISK(head->h_lsn); |
| if (version == 2 && sunit > XLOG_HEADER_CYCLE_SIZE) { |
| hdrs = sunit / XLOG_HEADER_CYCLE_SIZE; |
| if (sunit % XLOG_HEADER_CYCLE_SIZE) |
| hdrs++; |
| } |
| |
| /* |
| * A fixed number of extended headers is expected based on h_size. If |
| * required, format those now so the unmount record is located |
| * correctly. |
| * |
| * Since we only write an unmount record, we only need one h_cycle_data |
| * entry for the unmount record block. The subsequent record data |
| * blocks are zeroed, which means we can stamp them directly with the |
| * cycle and zero the rest of the cycle data in the extended headers. |
| */ |
| if (hdrs > 1) { |
| for (i = 1; i < hdrs; i++) { |
| p = nextfunc(p, BBSIZE, private); |
| memset(p, 0, BBSIZE); |
| /* xlog_rec_ext_header.xh_cycle */ |
| *(__be32 *)p = cycle_lsn; |
| } |
| } |
| |
| /* |
| * The total length is the max of the stripe unit or 2 basic block |
| * minimum (1 hdr blk + 1 data blk). The record length is the total |
| * minus however many header blocks are required. |
| */ |
| head->h_len = cpu_to_be32(max(BBTOB(2), sunit) - hdrs * BBSIZE); |
| |
| /* |
| * Write out the unmount record, pack the first word into the record |
| * header and stamp the block with the cycle. |
| */ |
| p = nextfunc(p, BBSIZE, private); |
| unmount_record(p); |
| |
| head->h_cycle_data[0] = *(__be32 *)p; |
| *(__be32 *)p = cycle_lsn; |
| |
| /* |
| * Finally, zero all remaining blocks in the record and stamp each with |
| * the cycle. We don't need to pack any of these blocks because the |
| * cycle data in the headers has already been zeroed. |
| */ |
| len = max(len, hdrs + 1); |
| for (i = hdrs + 1; i < len; i++) { |
| p = nextfunc(p, BBSIZE, private); |
| memset(p, 0, BBSIZE); |
| *(__be32 *)p = cycle_lsn; |
| } |
| |
| return BBTOB(len); |
| } |
| |
| /* |
| * Simple I/O (buffer cache) interface |
| */ |
| |
| |
| #ifdef XFS_BUF_TRACING |
| |
| #undef libxfs_readbuf |
| #undef libxfs_readbuf_map |
| #undef libxfs_writebuf |
| #undef libxfs_getbuf |
| #undef libxfs_getbuf_map |
| #undef libxfs_getbuf_flags |
| #undef libxfs_putbuf |
| |
| xfs_buf_t *libxfs_readbuf(struct xfs_buftarg *, xfs_daddr_t, int, int, |
| const struct xfs_buf_ops *); |
| xfs_buf_t *libxfs_readbuf_map(struct xfs_buftarg *, struct xfs_buf_map *, |
| int, int, const struct xfs_buf_ops *); |
| int libxfs_writebuf(xfs_buf_t *, int); |
| xfs_buf_t *libxfs_getbuf(struct xfs_buftarg *, xfs_daddr_t, int); |
| xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, struct xfs_buf_map *, |
| int, int); |
| xfs_buf_t *libxfs_getbuf_flags(struct xfs_buftarg *, xfs_daddr_t, int, |
| unsigned int); |
| void libxfs_putbuf (xfs_buf_t *); |
| |
| #define __add_trace(bp, func, file, line) \ |
| do { \ |
| if (bp) { \ |
| (bp)->b_func = (func); \ |
| (bp)->b_file = (file); \ |
| (bp)->b_line = (line); \ |
| } \ |
| } while (0) |
| |
| xfs_buf_t * |
| libxfs_trace_readbuf(const char *func, const char *file, int line, |
| struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags, |
| const struct xfs_buf_ops *ops) |
| { |
| xfs_buf_t *bp = libxfs_readbuf(btp, blkno, len, flags, ops); |
| __add_trace(bp, func, file, line); |
| return bp; |
| } |
| |
| xfs_buf_t * |
| libxfs_trace_readbuf_map(const char *func, const char *file, int line, |
| struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, int flags, |
| const struct xfs_buf_ops *ops) |
| { |
| xfs_buf_t *bp = libxfs_readbuf_map(btp, map, nmaps, flags, ops); |
| __add_trace(bp, func, file, line); |
| return bp; |
| } |
| |
| int |
| libxfs_trace_writebuf(const char *func, const char *file, int line, xfs_buf_t *bp, int flags) |
| { |
| __add_trace(bp, func, file, line); |
| return libxfs_writebuf(bp, flags); |
| } |
| |
| xfs_buf_t * |
| libxfs_trace_getbuf(const char *func, const char *file, int line, |
| struct xfs_buftarg *btp, xfs_daddr_t blkno, int len) |
| { |
| xfs_buf_t *bp = libxfs_getbuf(btp, blkno, len); |
| __add_trace(bp, func, file, line); |
| return bp; |
| } |
| |
| xfs_buf_t * |
| libxfs_trace_getbuf_map(const char *func, const char *file, int line, |
| struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, |
| int flags) |
| { |
| xfs_buf_t *bp = libxfs_getbuf_map(btp, map, nmaps, flags); |
| __add_trace(bp, func, file, line); |
| return bp; |
| } |
| |
| xfs_buf_t * |
| libxfs_trace_getbuf_flags(const char *func, const char *file, int line, |
| struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, unsigned int flags) |
| { |
| xfs_buf_t *bp = libxfs_getbuf_flags(btp, blkno, len, flags); |
| __add_trace(bp, func, file, line); |
| return bp; |
| } |
| |
| void |
| libxfs_trace_putbuf(const char *func, const char *file, int line, xfs_buf_t *bp) |
| { |
| __add_trace(bp, func, file, line); |
| libxfs_putbuf(bp); |
| } |
| |
| |
| #endif |
| |
| |
| xfs_buf_t * |
| libxfs_getsb(xfs_mount_t *mp, int flags) |
| { |
| return libxfs_readbuf(mp->m_ddev_targp, XFS_SB_DADDR, |
| XFS_FSS_TO_BB(mp, 1), flags, &xfs_sb_buf_ops); |
| } |
| |
| kmem_zone_t *xfs_buf_zone; |
| |
| static struct cache_mru xfs_buf_freelist = |
| {{&xfs_buf_freelist.cm_list, &xfs_buf_freelist.cm_list}, |
| 0, PTHREAD_MUTEX_INITIALIZER }; |
| |
| /* |
| * The bufkey is used to pass the new buffer information to the cache object |
| * allocation routine. Because discontiguous buffers need to pass different |
| * information, we need fields to pass that information. However, because the |
| * blkno and bblen is needed for the initial cache entry lookup (i.e. for |
| * bcompare) the fact that the map/nmaps is non-null to switch to discontiguous |
| * buffer initialisation instead of a contiguous buffer. |
| */ |
| struct xfs_bufkey { |
| struct xfs_buftarg *buftarg; |
| xfs_daddr_t blkno; |
| unsigned int bblen; |
| struct xfs_buf_map *map; |
| int nmaps; |
| }; |
| |
| /* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ |
| #define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL |
| #define CACHE_LINE_SIZE 64 |
| static unsigned int |
| libxfs_bhash(cache_key_t key, unsigned int hashsize, unsigned int hashshift) |
| { |
| uint64_t hashval = ((struct xfs_bufkey *)key)->blkno; |
| uint64_t tmp; |
| |
| tmp = hashval ^ (GOLDEN_RATIO_PRIME + hashval) / CACHE_LINE_SIZE; |
| tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> hashshift); |
| return tmp % hashsize; |
| } |
| |
| static int |
| libxfs_bcompare(struct cache_node *node, cache_key_t key) |
| { |
| struct xfs_buf *bp = (struct xfs_buf *)node; |
| struct xfs_bufkey *bkey = (struct xfs_bufkey *)key; |
| |
| if (bp->b_target->dev == bkey->buftarg->dev && |
| bp->b_bn == bkey->blkno) { |
| if (bp->b_bcount == BBTOB(bkey->bblen)) |
| return CACHE_HIT; |
| #ifdef IO_BCOMPARE_CHECK |
| if (!(libxfs_bcache->c_flags & CACHE_MISCOMPARE_PURGE)) { |
| fprintf(stderr, |
| "%lx: Badness in key lookup (length)\n" |
| "bp=(bno 0x%llx, len %u bytes) key=(bno 0x%llx, len %u bytes)\n", |
| pthread_self(), |
| (unsigned long long)bp->b_bn, (int)bp->b_bcount, |
| (unsigned long long)bkey->blkno, |
| BBTOB(bkey->bblen)); |
| } |
| #endif |
| return CACHE_PURGE; |
| } |
| return CACHE_MISS; |
| } |
| |
| static void |
| __initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno, |
| unsigned int bytes) |
| { |
| bp->b_flags = 0; |
| bp->b_bn = bno; |
| bp->b_bcount = bytes; |
| bp->b_length = BTOBB(bytes); |
| bp->b_target = btp; |
| bp->b_error = 0; |
| if (!bp->b_addr) |
| bp->b_addr = memalign(libxfs_device_alignment(), bytes); |
| if (!bp->b_addr) { |
| fprintf(stderr, |
| _("%s: %s can't memalign %u bytes: %s\n"), |
| progname, __FUNCTION__, bytes, |
| strerror(errno)); |
| exit(1); |
| } |
| memset(bp->b_addr, 0, bytes); |
| #ifdef XFS_BUF_TRACING |
| list_head_init(&bp->b_lock_list); |
| #endif |
| pthread_mutex_init(&bp->b_lock, NULL); |
| bp->b_holder = 0; |
| bp->b_recur = 0; |
| bp->b_ops = NULL; |
| |
| if (!bp->b_maps) { |
| bp->b_nmaps = 1; |
| bp->b_maps = &bp->__b_map; |
| bp->b_maps[0].bm_bn = bp->b_bn; |
| bp->b_maps[0].bm_len = bp->b_length; |
| } |
| } |
| |
| static void |
| libxfs_initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno, |
| unsigned int bytes) |
| { |
| __initbuf(bp, btp, bno, bytes); |
| } |
| |
| static void |
| libxfs_initbuf_map(xfs_buf_t *bp, struct xfs_buftarg *btp, |
| struct xfs_buf_map *map, int nmaps) |
| { |
| unsigned int bytes = 0; |
| int i; |
| |
| bytes = sizeof(struct xfs_buf_map) * nmaps; |
| bp->b_maps = malloc(bytes); |
| if (!bp->b_maps) { |
| fprintf(stderr, |
| _("%s: %s can't malloc %u bytes: %s\n"), |
| progname, __FUNCTION__, bytes, |
| strerror(errno)); |
| exit(1); |
| } |
| bp->b_nmaps = nmaps; |
| |
| bytes = 0; |
| for ( i = 0; i < nmaps; i++) { |
| bp->b_maps[i].bm_bn = map[i].bm_bn; |
| bp->b_maps[i].bm_len = map[i].bm_len; |
| bytes += BBTOB(map[i].bm_len); |
| } |
| |
| __initbuf(bp, btp, map[0].bm_bn, bytes); |
| bp->b_flags |= LIBXFS_B_DISCONTIG; |
| } |
| |
| static xfs_buf_t * |
| __libxfs_getbufr(int blen) |
| { |
| xfs_buf_t *bp; |
| |
| /* |
| * first look for a buffer that can be used as-is, |
| * if one cannot be found, see if there is a buffer, |
| * and if so, free its buffer and set b_addr to NULL |
| * before calling libxfs_initbuf. |
| */ |
| pthread_mutex_lock(&xfs_buf_freelist.cm_mutex); |
| if (!list_empty(&xfs_buf_freelist.cm_list)) { |
| list_for_each_entry(bp, &xfs_buf_freelist.cm_list, b_node.cn_mru) { |
| if (bp->b_bcount == blen) { |
| list_del_init(&bp->b_node.cn_mru); |
| break; |
| } |
| } |
| if (&bp->b_node.cn_mru == &xfs_buf_freelist.cm_list) { |
| bp = list_entry(xfs_buf_freelist.cm_list.next, |
| xfs_buf_t, b_node.cn_mru); |
| list_del_init(&bp->b_node.cn_mru); |
| free(bp->b_addr); |
| bp->b_addr = NULL; |
| if (bp->b_maps != &bp->__b_map) |
| free(bp->b_maps); |
| bp->b_maps = NULL; |
| } |
| } else |
| bp = kmem_zone_zalloc(xfs_buf_zone, 0); |
| pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex); |
| bp->b_ops = NULL; |
| if (bp->b_flags & LIBXFS_B_DIRTY) |
| fprintf(stderr, "found dirty buffer (bulk) on free list!"); |
| |
| return bp; |
| } |
| |
| xfs_buf_t * |
| libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen) |
| { |
| xfs_buf_t *bp; |
| int blen = BBTOB(bblen); |
| |
| bp =__libxfs_getbufr(blen); |
| if (bp) |
| libxfs_initbuf(bp, btp, blkno, blen); |
| #ifdef IO_DEBUG |
| printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n", |
| pthread_self(), __FUNCTION__, blen, |
| (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp); |
| #endif |
| |
| return bp; |
| } |
| |
| static xfs_buf_t * |
| libxfs_getbufr_map(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen, |
| struct xfs_buf_map *map, int nmaps) |
| { |
| xfs_buf_t *bp; |
| int blen = BBTOB(bblen); |
| |
| if (!map || !nmaps) { |
| fprintf(stderr, |
| _("%s: %s invalid map %p or nmaps %d\n"), |
| progname, __FUNCTION__, map, nmaps); |
| exit(1); |
| } |
| |
| if (blkno != map[0].bm_bn) { |
| fprintf(stderr, |
| _("%s: %s map blkno 0x%llx doesn't match key 0x%llx\n"), |
| progname, __FUNCTION__, (long long)map[0].bm_bn, |
| (long long)blkno); |
| exit(1); |
| } |
| |
| bp =__libxfs_getbufr(blen); |
| if (bp) |
| libxfs_initbuf_map(bp, btp, map, nmaps); |
| #ifdef IO_DEBUG |
| printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n", |
| pthread_self(), __FUNCTION__, blen, |
| (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp); |
| #endif |
| |
| return bp; |
| } |
| |
| #ifdef XFS_BUF_TRACING |
| struct list_head lock_buf_list = {&lock_buf_list, &lock_buf_list}; |
| int lock_buf_count = 0; |
| #endif |
| |
| static struct xfs_buf * |
| __cache_lookup(struct xfs_bufkey *key, unsigned int flags) |
| { |
| struct xfs_buf *bp; |
| |
| cache_node_get(libxfs_bcache, key, (struct cache_node **)&bp); |
| if (!bp) |
| return NULL; |
| |
| if (use_xfs_buf_lock) { |
| int ret; |
| |
| ret = pthread_mutex_trylock(&bp->b_lock); |
| if (ret) { |
| ASSERT(ret == EAGAIN); |
| if (flags & LIBXFS_GETBUF_TRYLOCK) |
| goto out_put; |
| |
| if (pthread_equal(bp->b_holder, pthread_self())) { |
| fprintf(stderr, |
| _("Warning: recursive buffer locking at block %" PRIu64 " detected\n"), |
| key->blkno); |
| bp->b_recur++; |
| return bp; |
| } else { |
| pthread_mutex_lock(&bp->b_lock); |
| } |
| } |
| |
| bp->b_holder = pthread_self(); |
| } |
| |
| cache_node_set_priority(libxfs_bcache, (struct cache_node *)bp, |
| cache_node_get_priority((struct cache_node *)bp) - |
| CACHE_PREFETCH_PRIORITY); |
| #ifdef XFS_BUF_TRACING |
| pthread_mutex_lock(&libxfs_bcache->c_mutex); |
| lock_buf_count++; |
| list_add(&bp->b_lock_list, &lock_buf_list); |
| pthread_mutex_unlock(&libxfs_bcache->c_mutex); |
| #endif |
| #ifdef IO_DEBUG |
| printf("%lx %s: hit buffer %p for bno = 0x%llx/0x%llx\n", |
| pthread_self(), __FUNCTION__, |
| bp, bp->b_bn, (long long)LIBXFS_BBTOOFF64(key->blkno)); |
| #endif |
| |
| return bp; |
| out_put: |
| cache_node_put(libxfs_bcache, (struct cache_node *)bp); |
| return NULL; |
| } |
| |
| struct xfs_buf * |
| libxfs_getbuf_flags(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, |
| unsigned int flags) |
| { |
| struct xfs_bufkey key = {NULL}; |
| |
| key.buftarg = btp; |
| key.blkno = blkno; |
| key.bblen = len; |
| |
| return __cache_lookup(&key, flags); |
| } |
| |
| /* |
| * Clean the buffer flags for libxfs_getbuf*(), which wants to return |
| * an unused buffer with clean state. This prevents CRC errors on a |
| * re-read of a corrupt block that was prefetched and freed. This |
| * can happen with a massively corrupt directory that is discarded, |
| * but whose blocks are then recycled into expanding lost+found. |
| * |
| * Note however that if the buffer's dirty (prefetch calls getbuf) |
| * we'll leave the state alone because we don't want to discard blocks |
| * that have been fixed. |
| */ |
| static void |
| reset_buf_state( |
| struct xfs_buf *bp) |
| { |
| if (bp && !(bp->b_flags & LIBXFS_B_DIRTY)) |
| bp->b_flags &= ~(LIBXFS_B_UNCHECKED | LIBXFS_B_STALE | |
| LIBXFS_B_UPTODATE); |
| } |
| |
| struct xfs_buf * |
| libxfs_getbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len) |
| { |
| struct xfs_buf *bp; |
| |
| bp = libxfs_getbuf_flags(btp, blkno, len, 0); |
| reset_buf_state(bp); |
| return bp; |
| } |
| |
| static struct xfs_buf * |
| __libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, |
| int nmaps, int flags) |
| { |
| struct xfs_bufkey key = {NULL}; |
| int i; |
| |
| if (nmaps == 1) |
| return libxfs_getbuf_flags(btp, map[0].bm_bn, map[0].bm_len, |
| flags); |
| |
| key.buftarg = btp; |
| key.blkno = map[0].bm_bn; |
| for (i = 0; i < nmaps; i++) { |
| key.bblen += map[i].bm_len; |
| } |
| key.map = map; |
| key.nmaps = nmaps; |
| |
| return __cache_lookup(&key, flags); |
| } |
| |
| struct xfs_buf * |
| libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, |
| int nmaps, int flags) |
| { |
| struct xfs_buf *bp; |
| |
| bp = __libxfs_getbuf_map(btp, map, nmaps, flags); |
| reset_buf_state(bp); |
| return bp; |
| } |
| |
| void |
| libxfs_putbuf(xfs_buf_t *bp) |
| { |
| /* |
| * ensure that any errors on this use of the buffer don't carry |
| * over to the next user. |
| */ |
| bp->b_error = 0; |
| |
| #ifdef XFS_BUF_TRACING |
| pthread_mutex_lock(&libxfs_bcache->c_mutex); |
| lock_buf_count--; |
| ASSERT(lock_buf_count >= 0); |
| list_del_init(&bp->b_lock_list); |
| pthread_mutex_unlock(&libxfs_bcache->c_mutex); |
| #endif |
| if (use_xfs_buf_lock) { |
| if (bp->b_recur) { |
| bp->b_recur--; |
| } else { |
| bp->b_holder = 0; |
| pthread_mutex_unlock(&bp->b_lock); |
| } |
| } |
| |
| cache_node_put(libxfs_bcache, (struct cache_node *)bp); |
| } |
| |
| void |
| libxfs_purgebuf(xfs_buf_t *bp) |
| { |
| struct xfs_bufkey key = {NULL}; |
| |
| key.buftarg = bp->b_target; |
| key.blkno = bp->b_bn; |
| key.bblen = bp->b_length; |
| |
| cache_node_purge(libxfs_bcache, &key, (struct cache_node *)bp); |
| } |
| |
| static struct cache_node * |
| libxfs_balloc(cache_key_t key) |
| { |
| struct xfs_bufkey *bufkey = (struct xfs_bufkey *)key; |
| |
| if (bufkey->map) |
| return (struct cache_node *) |
| libxfs_getbufr_map(bufkey->buftarg, |
| bufkey->blkno, bufkey->bblen, |
| bufkey->map, bufkey->nmaps); |
| return (struct cache_node *)libxfs_getbufr(bufkey->buftarg, |
| bufkey->blkno, bufkey->bblen); |
| } |
| |
| |
| static int |
| __read_buf(int fd, void *buf, int len, off64_t offset, int flags) |
| { |
| int sts; |
| |
| sts = pread(fd, buf, len, offset); |
| if (sts < 0) { |
| int error = errno; |
| fprintf(stderr, _("%s: read failed: %s\n"), |
| progname, strerror(error)); |
| if (flags & LIBXFS_EXIT_ON_FAILURE) |
| exit(1); |
| return -error; |
| } else if (sts != len) { |
| fprintf(stderr, _("%s: error - read only %d of %d bytes\n"), |
| progname, sts, len); |
| if (flags & LIBXFS_EXIT_ON_FAILURE) |
| exit(1); |
| return -EIO; |
| } |
| return 0; |
| } |
| |
| int |
| libxfs_readbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, xfs_buf_t *bp, |
| int len, int flags) |
| { |
| int fd = libxfs_device_to_fd(btp->dev); |
| int bytes = BBTOB(len); |
| int error; |
| |
| ASSERT(BBTOB(len) <= bp->b_bcount); |
| |
| error = __read_buf(fd, bp->b_addr, bytes, LIBXFS_BBTOOFF64(blkno), flags); |
| if (!error && |
| bp->b_target->dev == btp->dev && |
| bp->b_bn == blkno && |
| bp->b_bcount == bytes) |
| bp->b_flags |= LIBXFS_B_UPTODATE; |
| #ifdef IO_DEBUG |
| printf("%lx: %s: read %u bytes, error %d, blkno=0x%llx(0x%llx), %p\n", |
| pthread_self(), __FUNCTION__, bytes, error, |
| (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp); |
| #endif |
| return error; |
| } |
| |
| void |
| libxfs_readbuf_verify(struct xfs_buf *bp, const struct xfs_buf_ops *ops) |
| { |
| if (!ops) |
| return; |
| bp->b_ops = ops; |
| bp->b_ops->verify_read(bp); |
| bp->b_flags &= ~LIBXFS_B_UNCHECKED; |
| } |
| |
| |
| xfs_buf_t * |
| libxfs_readbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags, |
| const struct xfs_buf_ops *ops) |
| { |
| xfs_buf_t *bp; |
| int error; |
| |
| bp = libxfs_getbuf_flags(btp, blkno, len, 0); |
| if (!bp) |
| return NULL; |
| |
| /* |
| * if the buffer was prefetched, it is likely that it was not validated. |
| * Hence if we are supplied an ops function and the buffer is marked as |
| * unchecked, we need to validate it now. |
| * |
| * We do this verification even if the buffer is dirty - the |
| * verification is almost certainly going to fail the CRC check in this |
| * case as a dirty buffer has not had the CRC recalculated. However, we |
| * should not be dirtying unchecked buffers and therefore failing it |
| * here because it's dirty and unchecked indicates we've screwed up |
| * somewhere else. |
| */ |
| bp->b_error = 0; |
| if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) { |
| if (bp->b_flags & LIBXFS_B_UNCHECKED) |
| libxfs_readbuf_verify(bp, ops); |
| return bp; |
| } |
| |
| /* |
| * Set the ops on a cache miss (i.e. first physical read) as the |
| * verifier may change the ops to match the type of buffer it contains. |
| * A cache hit might reset the verifier to the original type if we set |
| * it again, but it won't get called again and set to match the buffer |
| * contents. *cough* xfs_da_node_buf_ops *cough*. |
| */ |
| error = libxfs_readbufr(btp, blkno, bp, len, flags); |
| if (error) |
| bp->b_error = error; |
| else |
| libxfs_readbuf_verify(bp, ops); |
| return bp; |
| } |
| |
| int |
| libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags) |
| { |
| int fd; |
| int error = 0; |
| void *buf; |
| int i; |
| |
| fd = libxfs_device_to_fd(btp->dev); |
| buf = bp->b_addr; |
| for (i = 0; i < bp->b_nmaps; i++) { |
| off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn); |
| int len = BBTOB(bp->b_maps[i].bm_len); |
| |
| error = __read_buf(fd, buf, len, offset, flags); |
| if (error) { |
| bp->b_error = error; |
| break; |
| } |
| buf += len; |
| } |
| |
| if (!error) |
| bp->b_flags |= LIBXFS_B_UPTODATE; |
| #ifdef IO_DEBUG |
| printf("%lx: %s: read %lu bytes, error %d, blkno=%llu(%llu), %p\n", |
| pthread_self(), __FUNCTION__, buf - (char *)bp->b_addr, error, |
| (long long)LIBXFS_BBTOOFF64(bp->b_bn), (long long)bp->b_bn, bp); |
| #endif |
| return error; |
| } |
| |
| struct xfs_buf * |
| libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, |
| int flags, const struct xfs_buf_ops *ops) |
| { |
| struct xfs_buf *bp; |
| int error = 0; |
| |
| if (nmaps == 1) |
| return libxfs_readbuf(btp, map[0].bm_bn, map[0].bm_len, |
| flags, ops); |
| |
| bp = __libxfs_getbuf_map(btp, map, nmaps, 0); |
| if (!bp) |
| return NULL; |
| |
| bp->b_error = 0; |
| if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) { |
| if (bp->b_flags & LIBXFS_B_UNCHECKED) |
| libxfs_readbuf_verify(bp, ops); |
| return bp; |
| } |
| error = libxfs_readbufr_map(btp, bp, flags); |
| if (!error) |
| libxfs_readbuf_verify(bp, ops); |
| |
| #ifdef IO_DEBUGX |
| printf("%lx: %s: read %lu bytes, error %d, blkno=%llu(%llu), %p\n", |
| pthread_self(), __FUNCTION__, buf - (char *)bp->b_addr, error, |
| (long long)LIBXFS_BBTOOFF64(bp->b_bn), (long long)bp->b_bn, bp); |
| #endif |
| return bp; |
| } |
| |
| static int |
| __write_buf(int fd, void *buf, int len, off64_t offset, int flags) |
| { |
| int sts; |
| |
| sts = pwrite(fd, buf, len, offset); |
| if (sts < 0) { |
| int error = errno; |
| fprintf(stderr, _("%s: pwrite failed: %s\n"), |
| progname, strerror(error)); |
| if (flags & LIBXFS_B_EXIT) |
| exit(1); |
| return -error; |
| } else if (sts != len) { |
| fprintf(stderr, _("%s: error - pwrite only %d of %d bytes\n"), |
| progname, sts, len); |
| if (flags & LIBXFS_B_EXIT) |
| exit(1); |
| return -EIO; |
| } |
| return 0; |
| } |
| |
| int |
| libxfs_writebufr(xfs_buf_t *bp) |
| { |
| int fd = libxfs_device_to_fd(bp->b_target->dev); |
| |
| /* |
| * we never write buffers that are marked stale. This indicates they |
| * contain data that has been invalidated, and even if the buffer is |
| * dirty it must *never* be written. Verifiers are wonderful for finding |
| * bugs like this. Make sure the error is obvious as to the cause. |
| */ |
| if (bp->b_flags & LIBXFS_B_STALE) { |
| bp->b_error = -ESTALE; |
| return bp->b_error; |
| } |
| |
| /* |
| * clear any pre-existing error status on the buffer. This can occur if |
| * the buffer is corrupt on disk and the repair process doesn't clear |
| * the error before fixing and writing it back. |
| */ |
| bp->b_error = 0; |
| if (bp->b_ops) { |
| bp->b_ops->verify_write(bp); |
| if (bp->b_error) { |
| fprintf(stderr, |
| _("%s: write verifer failed on %s bno 0x%llx/0x%x\n"), |
| __func__, bp->b_ops->name, |
| (long long)bp->b_bn, bp->b_bcount); |
| return bp->b_error; |
| } |
| } |
| |
| if (!(bp->b_flags & LIBXFS_B_DISCONTIG)) { |
| bp->b_error = __write_buf(fd, bp->b_addr, bp->b_bcount, |
| LIBXFS_BBTOOFF64(bp->b_bn), bp->b_flags); |
| } else { |
| int i; |
| void *buf = bp->b_addr; |
| |
| for (i = 0; i < bp->b_nmaps; i++) { |
| off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn); |
| int len = BBTOB(bp->b_maps[i].bm_len); |
| |
| bp->b_error = __write_buf(fd, buf, len, offset, |
| bp->b_flags); |
| if (bp->b_error) |
| break; |
| buf += len; |
| } |
| } |
| |
| #ifdef IO_DEBUG |
| printf("%lx: %s: wrote %u bytes, blkno=%llu(%llu), %p, error %d\n", |
| pthread_self(), __FUNCTION__, bp->b_bcount, |
| (long long)LIBXFS_BBTOOFF64(bp->b_bn), |
| (long long)bp->b_bn, bp, bp->b_error); |
| #endif |
| if (!bp->b_error) { |
| bp->b_flags |= LIBXFS_B_UPTODATE; |
| bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_EXIT | |
| LIBXFS_B_UNCHECKED); |
| } |
| return bp->b_error; |
| } |
| |
| int |
| libxfs_writebuf_int(xfs_buf_t *bp, int flags) |
| { |
| /* |
| * Clear any error hanging over from reading the buffer. This prevents |
| * subsequent reads after this write from seeing stale errors. |
| */ |
| bp->b_error = 0; |
| bp->b_flags &= ~LIBXFS_B_STALE; |
| bp->b_flags |= (LIBXFS_B_DIRTY | flags); |
| return 0; |
| } |
| |
| int |
| libxfs_writebuf(xfs_buf_t *bp, int flags) |
| { |
| #ifdef IO_DEBUG |
| printf("%lx: %s: dirty blkno=%llu(%llu)\n", |
| pthread_self(), __FUNCTION__, |
| (long long)LIBXFS_BBTOOFF64(bp->b_bn), |
| (long long)bp->b_bn); |
| #endif |
| /* |
| * Clear any error hanging over from reading the buffer. This prevents |
| * subsequent reads after this write from seeing stale errors. |
| */ |
| bp->b_error = 0; |
| bp->b_flags &= ~LIBXFS_B_STALE; |
| bp->b_flags |= (LIBXFS_B_DIRTY | flags); |
| libxfs_putbuf(bp); |
| return 0; |
| } |
| |
| void |
| libxfs_iomove(xfs_buf_t *bp, uint boff, int len, void *data, int flags) |
| { |
| #ifdef IO_DEBUG |
| if (boff + len > bp->b_bcount) { |
| printf("Badness, iomove out of range!\n" |
| "bp=(bno 0x%llx, bytes %u) range=(boff %u, bytes %u)\n", |
| (long long)bp->b_bn, bp->b_bcount, boff, len); |
| abort(); |
| } |
| #endif |
| switch (flags) { |
| case LIBXFS_BZERO: |
| memset(bp->b_addr + boff, 0, len); |
| break; |
| case LIBXFS_BREAD: |
| memcpy(data, bp->b_addr + boff, len); |
| break; |
| case LIBXFS_BWRITE: |
| memcpy(bp->b_addr + boff, data, len); |
| break; |
| } |
| } |
| |
| static void |
| libxfs_brelse( |
| struct cache_node *node) |
| { |
| struct xfs_buf *bp = (struct xfs_buf *)node; |
| |
| if (!bp) |
| return; |
| if (bp->b_flags & LIBXFS_B_DIRTY) |
| fprintf(stderr, |
| "releasing dirty buffer to free list!"); |
| |
| pthread_mutex_lock(&xfs_buf_freelist.cm_mutex); |
| list_add(&bp->b_node.cn_mru, &xfs_buf_freelist.cm_list); |
| pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex); |
| } |
| |
| static unsigned int |
| libxfs_bulkrelse( |
| struct cache *cache, |
| struct list_head *list) |
| { |
| xfs_buf_t *bp; |
| int count = 0; |
| |
| if (list_empty(list)) |
| return 0 ; |
| |
| list_for_each_entry(bp, list, b_node.cn_mru) { |
| if (bp->b_flags & LIBXFS_B_DIRTY) |
| fprintf(stderr, |
| "releasing dirty buffer (bulk) to free list!"); |
| count++; |
| } |
| |
| pthread_mutex_lock(&xfs_buf_freelist.cm_mutex); |
| list_splice(list, &xfs_buf_freelist.cm_list); |
| pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex); |
| |
| return count; |
| } |
| |
| /* |
| * Free everything from the xfs_buf_freelist MRU, used at final teardown |
| */ |
| void |
| libxfs_bcache_free(void) |
| { |
| struct list_head *cm_list; |
| xfs_buf_t *bp, *next; |
| |
| cm_list = &xfs_buf_freelist.cm_list; |
| list_for_each_entry_safe(bp, next, cm_list, b_node.cn_mru) { |
| free(bp->b_addr); |
| if (bp->b_maps != &bp->__b_map) |
| free(bp->b_maps); |
| kmem_zone_free(xfs_buf_zone, bp); |
| } |
| } |
| |
| /* |
| * When a buffer is marked dirty, the error is cleared. Hence if we are trying |
| * to flush a buffer prior to cache reclaim that has an error on it it means |
| * we've already tried to flush it and it failed. Prevent repeated corruption |
| * errors from being reported by skipping such buffers - when the corruption is |
| * fixed the buffer will be marked dirty again and we can write it again. |
| */ |
| static int |
| libxfs_bflush( |
| struct cache_node *node) |
| { |
| struct xfs_buf *bp = (struct xfs_buf *)node; |
| |
| if (!bp->b_error && bp->b_flags & LIBXFS_B_DIRTY) |
| return libxfs_writebufr(bp); |
| return bp->b_error; |
| } |
| |
| void |
| libxfs_putbufr(xfs_buf_t *bp) |
| { |
| if (bp->b_flags & LIBXFS_B_DIRTY) |
| libxfs_writebufr(bp); |
| libxfs_brelse((struct cache_node *)bp); |
| } |
| |
| |
| void |
| libxfs_bcache_purge(void) |
| { |
| cache_purge(libxfs_bcache); |
| } |
| |
| void |
| libxfs_bcache_flush(void) |
| { |
| cache_flush(libxfs_bcache); |
| } |
| |
| int |
| libxfs_bcache_overflowed(void) |
| { |
| return cache_overflowed(libxfs_bcache); |
| } |
| |
| struct cache_operations libxfs_bcache_operations = { |
| .hash = libxfs_bhash, |
| .alloc = libxfs_balloc, |
| .flush = libxfs_bflush, |
| .relse = libxfs_brelse, |
| .compare = libxfs_bcompare, |
| .bulkrelse = libxfs_bulkrelse |
| }; |
| |
| |
| /* |
| * Inode cache stubs. |
| */ |
| |
| kmem_zone_t *xfs_inode_zone; |
| extern kmem_zone_t *xfs_ili_zone; |
| |
| /* |
| * If there are inline format data / attr forks attached to this inode, |
| * make sure they're not corrupt. |
| */ |
| bool |
| libxfs_inode_verify_forks( |
| struct xfs_inode *ip, |
| struct xfs_ifork_ops *ops) |
| { |
| struct xfs_ifork *ifp; |
| xfs_failaddr_t fa; |
| |
| if (!ops) |
| return true; |
| |
| fa = xfs_ifork_verify_data(ip, ops); |
| if (fa) { |
| ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); |
| xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork", |
| ifp->if_u1.if_data, ifp->if_bytes, fa); |
| return false; |
| } |
| |
| fa = xfs_ifork_verify_attr(ip, ops); |
| if (fa) { |
| ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK); |
| xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork", |
| ifp ? ifp->if_u1.if_data : NULL, |
| ifp ? ifp->if_bytes : 0, fa); |
| return false; |
| } |
| return true; |
| } |
| |
| int |
| libxfs_iget( |
| struct xfs_mount *mp, |
| struct xfs_trans *tp, |
| xfs_ino_t ino, |
| uint lock_flags, |
| struct xfs_inode **ipp, |
| struct xfs_ifork_ops *ifork_ops) |
| { |
| struct xfs_inode *ip; |
| int error = 0; |
| |
| ip = kmem_zone_zalloc(xfs_inode_zone, 0); |
| if (!ip) |
| return -ENOMEM; |
| |
| ip->i_ino = ino; |
| ip->i_mount = mp; |
| error = xfs_iread(mp, tp, ip, 0); |
| if (error) { |
| kmem_zone_free(xfs_inode_zone, ip); |
| *ipp = NULL; |
| return error; |
| } |
| |
| if (!libxfs_inode_verify_forks(ip, ifork_ops)) { |
| libxfs_irele(ip); |
| return -EFSCORRUPTED; |
| } |
| |
| /* |
| * set up the inode ops structure that the libxfs code relies on |
| */ |
| if (XFS_ISDIR(ip)) |
| ip->d_ops = mp->m_dir_inode_ops; |
| else |
| ip->d_ops = mp->m_nondir_inode_ops; |
| |
| *ipp = ip; |
| return 0; |
| } |
| |
| static void |
| libxfs_idestroy(xfs_inode_t *ip) |
| { |
| switch (VFS_I(ip)->i_mode & S_IFMT) { |
| case S_IFREG: |
| case S_IFDIR: |
| case S_IFLNK: |
| libxfs_idestroy_fork(ip, XFS_DATA_FORK); |
| break; |
| } |
| if (ip->i_afp) |
| libxfs_idestroy_fork(ip, XFS_ATTR_FORK); |
| if (ip->i_cowfp) |
| xfs_idestroy_fork(ip, XFS_COW_FORK); |
| } |
| |
| void |
| libxfs_irele( |
| struct xfs_inode *ip) |
| { |
| if (ip->i_itemp) |
| kmem_zone_free(xfs_ili_zone, ip->i_itemp); |
| ip->i_itemp = NULL; |
| libxfs_idestroy(ip); |
| kmem_zone_free(xfs_inode_zone, ip); |
| } |