| #ifndef TUX3_H |
| #define TUX3_H |
| |
| #ifdef __KERNEL__ |
| #include <linux/version.h> |
| #include <linux/kernel.h> |
| #include <linux/sched.h> |
| #include <linux/time.h> |
| #include <linux/fs.h> |
| #include <linux/buffer_head.h> |
| #include <linux/bio.h> |
| #include <linux/blkdev.h> /* for struct blk_plug */ |
| #include <linux/mutex.h> |
| #include <linux/magic.h> |
| #include <linux/slab.h> |
| #include <linux/xattr.h> |
| #include <linux/list_sort.h> |
| |
| #include "trace.h" |
| #include "buffer.h" |
| #endif /* !__KERNEL__ */ |
| |
| #include "link.h" |
| |
| #define fieldtype(compound, field) typeof(((compound *)NULL)->field) |
| #define vecset(d, v, n) memset((d), (v), (n) * sizeof(*(d))) |
| #define veccopy(d, s, n) memcpy((d), (s), (n) * sizeof(*(d))) |
| #define vecmove(d, s, n) memmove((d), (s), (n) * sizeof(*(d))) |
| |
| typedef u64 inum_t; |
| typedef u64 tuxkey_t; |
| |
| static inline void *encode16(void *at, unsigned val) |
| { |
| *(__be16 *)at = cpu_to_be16(val); |
| return at + sizeof(u16); |
| } |
| |
| static inline void *encode32(void *at, unsigned val) |
| { |
| *(__be32 *)at = cpu_to_be32(val); |
| return at + sizeof(u32); |
| } |
| |
| static inline void *encode64(void *at, u64 val) |
| { |
| *(__be64 *)at = cpu_to_be64(val); |
| return at + sizeof(u64); |
| } |
| |
| static inline void *encode48(void *at, u64 val) |
| { |
| at = encode16(at, val >> 32); |
| return encode32(at, val); |
| } |
| |
| static inline void *decode16(void *at, unsigned *val) |
| { |
| *val = be16_to_cpup(at); |
| return at + sizeof(u16); |
| } |
| |
| static inline void *decode32(void *at, unsigned *val) |
| { |
| *val = be32_to_cpup(at); |
| return at + sizeof(u32); |
| } |
| |
| static inline void *decode64(void *at, u64 *val) |
| { |
| *val = be64_to_cpup(at); |
| return at + sizeof(u64); |
| } |
| |
| static inline void *decode48(void *at, u64 *val) |
| { |
| unsigned part1, part2; |
| at = decode16(at, &part1); |
| at = decode32(at, &part2); |
| *val = (u64)part1 << 32 | part2; |
| return at; |
| } |
| |
| /* Tux3 disk format */ |
| |
| /* |
| * TUX3_LABEL includes the date of the last incompatible disk format change |
| * NOTE: Always update this history for each incompatible change! |
| * |
| * Disk Format Revision History |
| * |
| * 2008-08-06: Beginning of time |
| * 2008-09-06: Actual checking starts |
| * 2008-12-12: Atom dictionary size in disksuper instead of atable->i_size |
| * 2009-02-28: Attributes renumbered, rdev added |
| * 2009-03-10: Alignment fix of disksuper |
| * 2012-02-16: Update for atomic commit |
| * 2012-07-02: Use timestamp 32.32 fixed point. Increase log_balloc size. |
| * 2012-12-20: Add ->usedinodes |
| * 2014-03-27: Change internal inum numbers. Change btree->root.depth. |
| * 2014-05-06: Change timestamp format to nanosecond. |
| */ |
| #define TUX3_MAGIC { 't', 'u', 'x', '3', 0x20, 0x14, 0x05, 0x06 } |
| #define TUX3_MAGIC_STR \ |
| ((typeof(((struct disksuper *)0)->magic))TUX3_MAGIC) |
| |
| #define TUX3_MAGIC_LOG 0x10ad |
| #define TUX3_MAGIC_BNODE 0xb4de |
| #define TUX3_MAGIC_DLEAF 0xbeaf |
| #define TUX3_MAGIC_ILEAF 0x90de |
| #define TUX3_MAGIC_OLEAF 0x6eaf |
| |
| /* Number of available inum ("0" - "((1 << 48) - 1)") */ |
| #define MAX_INODES_BITS 48 |
| #define MAX_INODES ((u64)1 << MAX_INODES_BITS) |
| /* Maximum number of block address ("0" - "((1 << 48) - 1)") */ |
| #define MAX_BLOCKS_BITS 48 |
| #define MAX_BLOCKS ((block_t)1 << MAX_BLOCKS_BITS) |
| |
| #define SB_LOC (1 << 12) |
| #define SB_LEN (1 << 12) /* this is maximum blocksize */ |
| |
| #define MAX_TUXKEY (((tuxkey_t)1 << 48) - 1) |
| #define TUXKEY_LIMIT (MAX_TUXKEY + 1) |
| |
| /* Special inode numbers */ |
| #define TUX_INVALID_INO 0 /* FIXME: just for debugging */ |
| #define TUX_BITMAP_INO 1 |
| #define TUX_COUNTMAP_INO 2 /* Block group free count map */ |
| #define TUX_VTABLE_INO 3 |
| #define TUX_ATABLE_INO 4 |
| #define TUX_VOLMAP_INO 61 /* This doesn't have entry in ileaf */ |
| #define TUX_LOGMAP_INO 62 /* This is volmap for log blocks */ |
| #define TUX_ROOTDIR_INO 63 |
| #define TUX_NORMAL_INO 64 /* until this ino, reserved ino */ |
| |
| struct disksuper { |
| /* Update magic on any incompatible format change */ |
| char magic[8]; /* Contains TUX3_LABEL magic string */ |
| __be64 birthdate; /* Volume creation date */ |
| __be64 flags; /* Need to assign some flags */ |
| __be16 blockbits; /* Shift to get volume block size */ |
| __be16 unused[3]; /* Padding for alignment */ |
| __be64 volblocks; /* Volume size */ |
| |
| /* The rest should be moved to a "metablock" that is updated frequently */ |
| __be64 iroot; /* Root of the itree */ |
| __be64 oroot; /* Root of the otree */ |
| __be64 usedinodes; /* Number of using inode numbers. (Instead of |
| * free inode numbers). With this, we can |
| * change the maximum inodes without changing |
| * usedinodes on disk. |
| */ |
| __be64 nextblock; /* Get rid of this when we have a real allocation policy */ |
| __be64 atomdictsize; /* |
| * Size of the atom dictionary instead of i_size |
| * FIXME: we are better to remove this somehow |
| */ |
| __be32 freeatom; /* Beginning of persistent free atom list in atable */ |
| __be32 atomgen; /* Next atom number if there are no free atoms */ |
| __be64 logchain; /* Most recent delta commit block pointer */ |
| __be32 logcount; /* Count of log blocks in the current log chain */ |
| } __packed; |
| |
| enum { MAX_DIRECT_COUNT = SHRT_MAX }; |
| |
| /* FIXME: maybe better to remove struct root to reduce holes in structure? */ |
| struct root { |
| unsigned short direct; /* block/depth is an extent instead of btree */ |
| union { |
| short count; /* Number of blocks in direct extent */ |
| short depth; /* Btree levels include leaf level */ |
| }; |
| block_t block; /* Disk location of btree root */ |
| }; |
| |
| struct btree { |
| struct rw_semaphore lock; |
| struct sb *sb; /* Convenience to reduce parameter list size */ |
| struct btree_ops *ops; /* Generic btree low level operations */ |
| struct root root; /* Cached description of btree root */ |
| u16 entries_per_leaf; /* Used in btree leaf splitting */ |
| }; |
| |
| /* Define layout of btree root on disk, endian conversion is elsewhere. */ |
| |
| static inline u64 pack_root(struct root *root) |
| { |
| return (u64)root->direct << 63 | (u64)root->depth << 48 | root->block; |
| } |
| |
| static inline struct root unpack_root(u64 v) |
| { |
| return (struct root){ |
| .direct = v >> 63, |
| .depth = (v >> 48) & 0x7fff, |
| .block = v & (-1ULL >> 16), |
| }; |
| } |
| |
| /* Path cursor for btree traversal */ |
| |
| struct cursor { |
| struct btree *btree; |
| #define CURSOR_DEBUG |
| #ifdef CURSOR_DEBUG |
| #define FREE_BUFFER ((void *)0xdbc06505) |
| #define FREE_NEXT ((void *)0xdbc06507) |
| int maxlevel; |
| #endif |
| int level; |
| struct path_level { |
| struct buffer_head *buffer; |
| struct index_entry *next; |
| } path[]; |
| }; |
| |
| struct stash { struct flink_head head; u64 *pos, *top; }; |
| |
| /* Flush synchronously */ |
| #define TUX3_FLUSHER_SYNC 1 |
| /* Flush asynchronously by own timing */ |
| #define TUX3_FLUSHER_ASYNC_OWN 2 |
| /* Flush asynchronously by kernel normal timing (by hackish way) */ |
| #define TUX3_FLUSHER_ASYNC_HACK 3 |
| |
| /* Refcount for delta */ |
| struct delta_ref { |
| atomic_t refcount; |
| unsigned delta; |
| #ifdef UNIFY_DEBUG |
| int unify_flag; /* FIXME: is there better way? */ |
| #endif |
| }; |
| |
| /* Per-delta data structure for sb */ |
| struct sb_delta_dirty { |
| struct list_head dirty_inodes; /* dirty inodes list */ |
| }; |
| |
| /* Pin a block in cache and keep a pointer to it */ |
| struct countmap_pin { |
| struct buffer_head *buffer; |
| }; |
| |
| struct tux3_idefer_map; |
| /* Tux3-specific sb is a handle for the entire volume state */ |
| struct sb { |
| union { |
| struct disksuper super; |
| char thisbig[SB_LEN]; |
| }; |
| |
| #if TUX3_FLUSHER == TUX3_FLUSHER_SYNC |
| struct rw_semaphore delta_lock; /* delta transition exclusive */ |
| #endif |
| struct delta_ref __rcu *current_delta; /* current delta */ |
| struct delta_ref delta_refs[TUX3_MAX_DELTA]; |
| unsigned next_delta; /* delta commit cycle */ |
| unsigned unify; /* log unify cycle */ |
| |
| #define TUX3_COMMIT_RUNNING_BIT 0 |
| #define TUX3_COMMIT_PENDING_BIT 1 |
| unsigned long backend_state; /* delta state */ |
| #ifdef UNIFY_DEBUG |
| struct delta_ref *pending_delta; /* pending delta for commit */ |
| #endif |
| unsigned staging_delta; /* staging delta */ |
| unsigned committed_delta; /* committed delta */ |
| wait_queue_head_t delta_event_wq; /* wait queue for delta event */ |
| #if TUX3_FLUSHER == TUX3_FLUSHER_ASYNC_OWN |
| struct task_struct *flush_task; /* work to flush delta */ |
| #endif |
| #if TUX3_FLUSHER == TUX3_FLUSHER_ASYNC_HACK |
| struct backing_dev_info bdi; |
| #endif |
| |
| struct btree itree; /* Inode btree */ |
| struct btree otree; /* Orphan btree */ |
| struct inode *volmap; /* Volume metadata cache (like blockdev). */ |
| struct inode *bitmap; /* allocation bitmap special file */ |
| struct inode *countmap; /* block group free count map */ |
| struct inode *rootdir; /* root directory special file */ |
| struct inode *vtable; /* version table special file */ |
| struct inode *atable; /* xattr atom special file */ |
| |
| unsigned blocksize, blockbits, blockmask, groupbits; |
| u64 freeinodes; /* Number of free inode numbers. This is |
| * including the deferred allocated inodes */ |
| block_t volblocks, volmask, freeblocks, nextblock; |
| inum_t nextinum; /* FIXME: temporary hack to avoid to find |
| * same area in itree for free inum. */ |
| unsigned entries_per_node; /* must be per-btree type, get rid of this */ |
| unsigned version; /* Currently mounted volume version view */ |
| |
| unsigned atomref_base; /* Index of atom refcount base */ |
| unsigned unatom_base; /* Index of unatom base */ |
| loff_t atomdictsize; /* Atom dictionary size */ |
| unsigned freeatom; /* Start of free atom list in atom table */ |
| unsigned atomgen; /* Next atom number to allocate if no free atoms */ |
| |
| /* |
| * For backend only |
| */ |
| struct inode *logmap; /* Log block cache */ |
| unsigned lognext; /* Index of next log block in log map */ |
| struct buffer_head *logbuf; /* Cached log block */ |
| unsigned char *logpos, *logtop; /* Where to emit next log entry */ |
| |
| struct list_head orphan_add; /* defered orphan inode add list */ |
| struct list_head orphan_del; /* defered orphan inode del list */ |
| |
| struct stash defree; /* defer extent frees until after delta */ |
| struct stash deunify; /* defer extent frees until after unify */ |
| |
| struct list_head unify_buffers; /* dirty metadata flushed at unify */ |
| |
| struct iowait *iowait; /* helper for waiting I/O */ |
| |
| /* |
| * For frontend and backend |
| */ |
| spinlock_t countmap_lock; |
| struct countmap_pin countmap_pin; |
| struct tux3_idefer_map *idefer_map; |
| struct list_head alloc_inodes; /* deferred inum allocation inodes */ |
| |
| spinlock_t forked_buffers_lock; |
| struct link forked_buffers; /* forked buffers list */ |
| |
| spinlock_t dirty_inodes_lock; /* lock of dirty_inodes for frontend */ |
| /* Per-delta dirty data for sb */ |
| struct sb_delta_dirty s_ddc[TUX3_MAX_DELTA]; |
| #ifdef __KERNEL__ |
| struct super_block *vfs_sb; /* Generic kernel superblock */ |
| #else |
| struct dev *dev; /* userspace block device */ |
| loff_t s_maxbytes; /* maximum file size */ |
| #endif |
| }; |
| |
| /* Block segment (physical block extent) info */ |
| #define BLOCK_SEG_HOLE (1 << 0) |
| #define BLOCK_SEG_NEW (1 << 1) |
| |
| struct block_segment { |
| block_t block; /* Start of physical address */ |
| unsigned count; /* Number of blocks */ |
| unsigned state; /* State of this segment */ |
| }; |
| |
| /* |
| * Balloc flags |
| */ |
| /* Allow fewer allocation than requested */ |
| #define BALLOC_PARTIAL (1 << 0) |
| |
| /* logging */ |
| |
| struct logblock { |
| __be16 magic; /* Magic number */ |
| __be16 bytes; /* Total data bytes on this block */ |
| u32 unused; /* padding */ |
| __be64 logchain; /* Block number to previous logblock */ |
| unsigned char data[]; /* Log data */ |
| }; |
| |
| enum { |
| LOG_BALLOC = 0x33, /* Log of block allocation */ |
| LOG_BFREE, /* Log of freeing block after delta */ |
| LOG_BFREE_ON_UNIFY, /* Log of freeing block after unify */ |
| LOG_BFREE_RELOG, /* LOG_BFREE, but re-log of free after unify */ |
| LOG_LEAF_REDIRECT, /* Log of leaf redirect */ |
| LOG_LEAF_FREE, /* Log of freeing leaf */ |
| LOG_BNODE_REDIRECT, /* Log of bnode redirect */ |
| LOG_BNODE_ROOT, /* Log of new bnode root allocation */ |
| LOG_BNODE_SPLIT, /* Log of spliting bnode to new bnode */ |
| LOG_BNODE_ADD, /* Log of adding bnode index */ |
| LOG_BNODE_UPDATE, /* Log of bnode index ->block update */ |
| LOG_BNODE_MERGE, /* Log of merging 2 bnodes */ |
| LOG_BNODE_DEL, /* Log of deleting bnode index */ |
| LOG_BNODE_ADJUST, /* Log of bnode index ->key adjust */ |
| LOG_BNODE_FREE, /* Log of freeing bnode */ |
| LOG_ORPHAN_ADD, /* Log of adding orphan inode */ |
| LOG_ORPHAN_DEL, /* Log of deleting orphan inode */ |
| LOG_FREEBLOCKS, /* Log of freeblocks in bitmap on unify */ |
| LOG_UNIFY, /* Log of marking unify */ |
| LOG_DELTA, /* just for debugging */ |
| LOG_TYPES |
| }; |
| |
| /* For debugging, MAX_ATTRS is smaller than 31, so present never be -1 */ |
| #define TUX3_INVALID_PRESENT (-1U) |
| |
| /* Inode attributes data */ |
| struct tux3_iattr_data { |
| unsigned present; |
| |
| umode_t i_mode; |
| uid_t i_uid; |
| gid_t i_gid; |
| unsigned int i_nlink; |
| dev_t i_rdev; |
| loff_t i_size; |
| // struct timespec i_atime; |
| struct timespec i_mtime; |
| struct timespec i_ctime; |
| u64 i_version; |
| }; |
| |
| /* Per-delta data structure for inode */ |
| struct inode_delta_dirty { |
| struct list_head dirty_buffers; /* list for dirty buffers */ |
| struct list_head dirty_holes; /* list for hole extents */ |
| struct list_head dirty_list; /* link for dirty inode list */ |
| |
| /* Forked data storage */ |
| /* FIXME: we don't need this for frontend delta. We would want |
| * to allocate dynamically */ |
| struct tux3_iattr_data idata; |
| }; |
| |
| struct xcache; |
| struct tux3_inode { |
| struct btree btree; |
| inum_t inum; /* Inode number */ |
| struct xcache *xcache; /* Extended attribute cache */ |
| struct list_head alloc_list; /* link for deferred inum allocation */ |
| struct list_head orphan_list; /* link for orphan inode list */ |
| |
| /* FIXME: we can use RCU for hole_extents? */ |
| spinlock_t hole_extents_lock; /* lock for hole_extents */ |
| struct list_head hole_extents; /* hole extents list */ |
| |
| struct rw_semaphore truncate_lock; /* lock for truncate and mmap */ |
| spinlock_t lock; /* lock for inode metadata */ |
| /* Per-delta dirty data for inode */ |
| unsigned flags; /* flags for inode state */ |
| unsigned present; /* Attributes decoded from or |
| * to be encoded to itree */ |
| struct inode_delta_dirty i_ddc[TUX3_MAX_DELTA]; |
| #ifdef __KERNEL__ |
| int (*io)(int rw, struct bufvec *bufvec); |
| #endif |
| /* Generic inode */ |
| struct inode vfs_inode; |
| }; |
| |
| static inline struct tux3_inode *tux_inode(struct inode *inode) |
| { |
| return container_of(inode, struct tux3_inode, vfs_inode); |
| } |
| |
| static inline struct inode *btree_inode(struct btree *btree) |
| { |
| return &container_of(btree, struct tux3_inode, btree)->vfs_inode; |
| } |
| #ifdef __KERNEL__ |
| static inline struct sb *tux_sb(struct super_block *sb) |
| { |
| return sb->s_fs_info; |
| } |
| |
| static inline struct super_block *vfs_sb(struct sb *sb) |
| { |
| return sb->vfs_sb; |
| } |
| |
| typedef struct address_space map_t; |
| |
| static inline map_t *mapping(struct inode *inode) |
| { |
| return inode->i_mapping; |
| } |
| |
| static inline struct block_device *sb_dev(struct sb *sb) |
| { |
| return sb->vfs_sb->s_bdev; |
| } |
| #else /* !__KERNEL__ */ |
| static inline struct sb *tux_sb(struct sb *sb) |
| { |
| return sb; |
| } |
| |
| static inline struct sb *vfs_sb(struct sb *sb) |
| { |
| return sb; |
| } |
| |
| static inline map_t *mapping(struct inode *inode) |
| { |
| return inode->map; |
| } |
| |
| static inline struct dev *sb_dev(struct sb *sb) |
| { |
| return sb->dev; |
| } |
| #endif /* !__KERNEL__ */ |
| |
| /* Get delta from free running counter */ |
| static inline unsigned tux3_delta(unsigned delta) |
| { |
| return delta & (TUX3_MAX_DELTA - 1); |
| } |
| |
| /* Get per-delta dirty data control for sb */ |
| static inline struct sb_delta_dirty *tux3_sb_ddc(struct sb *sb, unsigned delta) |
| { |
| return &sb->s_ddc[tux3_delta(delta)]; |
| } |
| |
| /* Get per-delta dirty data control for inode */ |
| static inline struct inode_delta_dirty *tux3_inode_ddc(struct inode *inode, |
| unsigned delta) |
| { |
| return &tux_inode(inode)->i_ddc[tux3_delta(delta)]; |
| } |
| |
| static inline struct tux3_inode *i_ddc_to_inode(struct inode_delta_dirty *i_ddc, |
| unsigned delta) |
| { |
| return container_of(i_ddc, struct tux3_inode, i_ddc[tux3_delta(delta)]); |
| } |
| |
| /* Get per-delta dirty buffers list from inode */ |
| static inline struct list_head *tux3_dirty_buffers(struct inode *inode, |
| unsigned delta) |
| { |
| return &tux3_inode_ddc(inode, delta)->dirty_buffers; |
| } |
| |
| struct tux_iattr { |
| kuid_t uid; |
| kgid_t gid; |
| umode_t mode; |
| }; |
| |
| static inline struct btree *itree_btree(struct sb *sb) |
| { |
| return &sb->itree; |
| } |
| |
| static inline struct btree *otree_btree(struct sb *sb) |
| { |
| return &sb->otree; |
| } |
| |
| #define TUX_LINK_MAX (1 << 15) /* arbitrary limit, increase it */ |
| |
| #define TUX_NAME_LEN 255 |
| |
| /* Directory entry */ |
| struct tux3_dirent { |
| __be64 inum; |
| __be16 rec_len; |
| u8 name_len, type; |
| char name[]; |
| /* |
| * On 64bit arch sizeof(struct tux3_dirent) == 16. We should use |
| * offsetof(struct tux3_dirent, name) instead. |
| */ |
| /* u32 __pad; */ |
| }; |
| |
| struct btree_key_range { |
| tuxkey_t start; |
| unsigned len; |
| }; |
| |
| enum btree_result { |
| BTREE_DO_RETRY = 0, |
| BTREE_DO_DIRTY, |
| BTREE_DO_SPLIT, |
| }; |
| |
| struct btree_ops { |
| void (*btree_init)(struct btree *btree); |
| int (*leaf_init)(struct btree *btree, void *leaf); |
| tuxkey_t (*leaf_split)(struct btree *btree, tuxkey_t hint, void *from, void *into); |
| /* return value: 1 - modified, 0 - not modified, < 0 - error */ |
| int (*leaf_chop)(struct btree *btree, tuxkey_t start, u64 len, void *leaf); |
| /* return value: 1 - merged, 0 - couldn't merge */ |
| int (*leaf_merge)(struct btree *btree, void *into, void *from); |
| /* return value: < 0 - error, 0 >= - btree_result */ |
| int (*leaf_pre_write)(struct btree *btree, tuxkey_t key_bottom, tuxkey_t key_limit, void *leaf, struct btree_key_range *key); |
| /* return value: < 0 - error, 0 >= - btree_result */ |
| int (*leaf_write)(struct btree *btree, tuxkey_t key_bottom, tuxkey_t key_limit, void *leaf, struct btree_key_range *key, tuxkey_t *split_hint); |
| int (*leaf_read)(struct btree *btree, tuxkey_t key_bottom, tuxkey_t key_limit, void *leaf, struct btree_key_range *key); |
| |
| void *private_ops; |
| |
| /* |
| * for debugging |
| */ |
| int (*leaf_sniff)(struct btree *btree, void *leaf); |
| /* return value: 1 - can free, 0 - can't free */ |
| int (*leaf_can_free)(struct btree *btree, void *leaf); |
| }; |
| |
| /* Information for replay */ |
| struct replay { |
| struct sb *sb; |
| |
| /* For orphan.c */ |
| struct list_head log_orphan_add; /* To remember LOG_ORPHAN_ADD */ |
| struct list_head orphan_in_otree; /* Orphan inodes in sb->otree */ |
| |
| /* For replay.c */ |
| void *unify_pos; /* position of unify log in a log block */ |
| block_t unify_index; /* index of a log block including unify log */ |
| block_t blocknrs[]; /* block address of log blocks */ |
| }; |
| |
| /* Does this root has direct extent? */ |
| static inline int has_direct_extent(struct btree *btree) |
| { |
| return btree->root.direct; |
| } |
| |
| extern struct root no_root; |
| /* Does this root has bnode/leaf? */ |
| static inline int has_root(struct btree *btree) |
| { |
| return !has_direct_extent(btree) && btree->root.depth > 0; |
| } |
| |
| /* Doesn't have both btree or direct extent? */ |
| static inline int has_no_root(struct btree *btree) |
| { |
| return btree->root.depth == 0; |
| } |
| |
| /* Redirect ptr which is pointing data of src from src to dst */ |
| static inline void *ptr_redirect(void *ptr, void *src, void *dst) |
| { |
| if (ptr) { |
| assert(ptr >= src); |
| return dst + (ptr - src); |
| } |
| return NULL; |
| } |
| |
| #ifdef __KERNEL__ |
| static inline struct timespec gettime(void) |
| { |
| return current_kernel_time(); |
| } |
| |
| static inline struct inode *buffer_inode(struct buffer_head *buffer) |
| { |
| return buffer->b_page->mapping->host; |
| } |
| |
| /* Get logical index of buffer */ |
| static inline block_t bufindex(struct buffer_head *buffer) |
| { |
| struct page *page = buffer->b_page; |
| /* FIXME: maybe we want to remove buffer->b_size */ |
| #if BITS_PER_LONG == 64 |
| return (page_offset(page) + bh_offset(buffer)) / buffer->b_size; |
| #else |
| const int blockbits = ffs(buffer->b_size) - 1; |
| return (page_offset(page) + bh_offset(buffer)) >> blockbits; |
| #endif |
| } |
| |
| /* dir.c */ |
| extern const struct file_operations tux_dir_fops; |
| extern const struct inode_operations tux_dir_iops; |
| |
| /* filemap.c */ |
| int tux3_get_block(struct inode *inode, sector_t iblock, |
| struct buffer_head *bh_result, int create); |
| struct buffer_head *__get_buffer(struct page *page, int offset); |
| void tux3_try_cancel_dirty_page(struct page *page); |
| void __tux3_set_page_dirty_account(struct page *page, |
| struct address_space *mapping, int warn); |
| int tux3_file_mmap(struct file *file, struct vm_area_struct *vma); |
| extern const struct address_space_operations tux_file_aops; |
| extern const struct address_space_operations tux_symlink_aops; |
| extern const struct address_space_operations tux_blk_aops; |
| extern const struct address_space_operations tux_vol_aops; |
| |
| /* inode.c */ |
| int tux3_sync_file(struct file *file, loff_t start, loff_t end, int datasync); |
| int tux3_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); |
| int tux3_setattr(struct dentry *dentry, struct iattr *iattr); |
| |
| /* symlink.c */ |
| extern const struct inode_operations tux_symlink_iops; |
| |
| /* utility.c */ |
| int vecio(int rw, struct block_device *dev, loff_t offset, unsigned vecs, |
| struct bio_vec *vec, bio_end_io_t endio, void *info); |
| int syncio(int rw, struct block_device *dev, loff_t offset, unsigned vecs, |
| struct bio_vec *vec); |
| int devio(int rw, struct block_device *dev, loff_t offset, void *data, |
| unsigned len); |
| int blockio(int rw, struct sb *sb, struct buffer_head *buffer, block_t block); |
| int blockio_vec(int rw, struct bufvec *bufvec, block_t block, unsigned count); |
| |
| #define tux3_msg(sb, fmt, ...) \ |
| __tux3_msg(sb, KERN_INFO, "", fmt, ##__VA_ARGS__) |
| #define __tux3_err(sb, func, line, fmt, ...) \ |
| __tux3_msg(sb, KERN_ERR, " error", \ |
| "%s:%d: " fmt, func, line, ##__VA_ARGS__) |
| #define tux3_err(sb, fmt, ...) \ |
| __tux3_err(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) |
| #define tux3_warn(sb, fmt, ...) \ |
| __tux3_msg(sb, KERN_WARNING, " warning", fmt, ##__VA_ARGS__) |
| |
| /* temporary hack for buffer */ |
| struct buffer_head *peekblk(struct address_space *mapping, block_t iblock); |
| struct buffer_head *blockread(struct address_space *mapping, block_t iblock); |
| struct buffer_head *blockget(struct address_space *mapping, block_t iblock); |
| #endif /* !__KERNEL__ */ |
| |
| /* balloc.c */ |
| void countmap_put(struct countmap_pin *countmap_pin); |
| int countmap_used(struct sb *sb, block_t group); |
| void bitmap_dump(struct inode *inode, block_t start, block_t count); |
| int balloc_find_range(struct sb *sb, |
| struct block_segment *seg, int maxsegs, int *segs, |
| block_t start, block_t range, unsigned *blocks); |
| int balloc_find(struct sb *sb, |
| struct block_segment *seg, int maxsegs, int *segs, |
| unsigned *blocks); |
| int balloc_use(struct sb *sb, struct block_segment *seg, int segs); |
| int balloc_segs(struct sb *sb, |
| struct block_segment *seg, int maxsegs, int *segs, |
| unsigned *blocks); |
| block_t balloc_one(struct sb *sb); |
| int bfree_segs(struct sb *sb, struct block_segment *seg, int segs); |
| int bfree(struct sb *sb, block_t start, unsigned blocks); |
| int replay_update_bitmap(struct replay *rp, block_t start, unsigned blocks, int set); |
| |
| /* btree.c */ |
| unsigned calc_entries_per_node(unsigned blocksize); |
| struct buffer_head *cursor_leafbuf(struct cursor *cursor); |
| void release_cursor(struct cursor *cursor); |
| struct cursor *alloc_cursor(struct btree *btree, int); |
| void free_cursor(struct cursor *cursor); |
| |
| void init_btree(struct btree *btree, struct sb *sb, struct root root, struct btree_ops *ops); |
| int btree_alloc_empty(struct btree *btree); |
| int btree_free_empty(struct btree *btree); |
| struct buffer_head *new_leaf(struct btree *btree); |
| tuxkey_t cursor_next_key(struct cursor *cursor); |
| tuxkey_t cursor_this_key(struct cursor *cursor); |
| int btree_probe(struct cursor *cursor, tuxkey_t key); |
| typedef int (*btree_traverse_func_t)(struct btree *btree, tuxkey_t key_bottom, |
| tuxkey_t key_limit, void *leaf, |
| tuxkey_t key, u64 len, void *data); |
| int btree_traverse(struct cursor *cursor, tuxkey_t key, u64 len, |
| btree_traverse_func_t func, void *data); |
| int btree_chop(struct btree *btree, tuxkey_t start, u64 len); |
| int btree_insert_leaf(struct cursor *cursor, tuxkey_t key, struct buffer_head *leafbuf); |
| void *btree_expand(struct cursor *cursor, tuxkey_t key, unsigned newsize); |
| int noop_pre_write(struct btree *btree, tuxkey_t key_bottom, tuxkey_t key_limit, |
| void *leaf, struct btree_key_range *key); |
| int btree_write(struct cursor *cursor, struct btree_key_range *key); |
| int btree_read(struct cursor *cursor, struct btree_key_range *key); |
| int cursor_redirect(struct cursor *cursor); |
| int replay_bnode_redirect(struct replay *rp, block_t oldblock, block_t newblock); |
| int replay_bnode_root(struct replay *rp, block_t root, unsigned count, |
| block_t left, block_t right, tuxkey_t rkey); |
| int replay_bnode_split(struct replay *rp, block_t src, unsigned pos, block_t dst); |
| int replay_bnode_add(struct replay *rp, block_t parent, block_t child, tuxkey_t key); |
| int replay_bnode_update(struct replay *rp, block_t parent, block_t child, tuxkey_t key); |
| int replay_bnode_merge(struct replay *rp, block_t src, block_t dst); |
| int replay_bnode_del(struct replay *rp, block_t bnode, tuxkey_t key, unsigned count); |
| int replay_bnode_adjust(struct replay *rp, block_t bnode, tuxkey_t from, tuxkey_t to); |
| |
| /* commit.c */ |
| int setup_sb(struct sb *sb, struct disksuper *super); |
| int load_sb(struct sb *sb); |
| int save_sb(struct sb *sb); |
| void tux3_start_backend(struct sb *sb); |
| void tux3_end_backend(void); |
| int tux3_under_backend(struct sb *sb); |
| int force_unify(struct sb *sb); |
| int force_delta(struct sb *sb); |
| unsigned tux3_get_current_delta(void); |
| unsigned tux3_inode_delta(struct inode *inode); |
| void change_begin_atomic(struct sb *sb); |
| void change_end_atomic(struct sb *sb); |
| void change_begin_atomic_nested(struct sb *sb, void **ptr); |
| void change_end_atomic_nested(struct sb *sb, void *ptr); |
| void change_begin(struct sb *sb); |
| int change_end(struct sb *sb); |
| void change_begin_if_needed(struct sb *sb, int need_sep); |
| void change_end_if_needed(struct sb *sb); |
| |
| /* commit_flusher.c */ |
| #include "commit_flusher.h" |
| |
| /* dir.c */ |
| void tux_set_entry(struct buffer_head *buffer, struct tux3_dirent *entry, |
| inum_t inum, umode_t mode); |
| void tux_update_dirent(struct inode *dir, struct buffer_head *buffer, |
| struct tux3_dirent *entry, struct inode *new_inode); |
| loff_t tux_alloc_entry(struct inode *dir, const char *name, unsigned len, |
| loff_t *size, struct buffer_head **hold); |
| int tux_create_dirent(struct inode *dir, const struct qstr *qstr, |
| struct inode *inode); |
| struct tux3_dirent *tux_find_entry(struct inode *dir, const char *name, |
| unsigned len, struct buffer_head **result, |
| loff_t size); |
| struct tux3_dirent *tux_find_dirent(struct inode *dir, const struct qstr *qstr, |
| struct buffer_head **result); |
| int tux_delete_entry(struct inode *dir, struct buffer_head *buffer, |
| struct tux3_dirent *entry); |
| int tux_delete_dirent(struct inode *dir, struct buffer_head *buffer, |
| struct tux3_dirent *entry); |
| int tux_readdir(struct file *file, struct dir_context *ctx); |
| int tux_dir_is_empty(struct inode *dir); |
| |
| /* dleaf.c */ |
| extern struct btree_ops dtree_ops; |
| |
| /* filemap.c */ |
| int dtree_chop(struct btree *btree, tuxkey_t start, u64 len); |
| int tux3_filemap_overwrite_io(int rw, struct bufvec *bufvec); |
| int tux3_filemap_redirect_io(int rw, struct bufvec *bufvec); |
| int tux3_truncate_partial_block(struct inode *inode, loff_t newsize); |
| void tux3_truncate_pagecache(struct inode *inode, loff_t newsize); |
| |
| /* iattr.c */ |
| void dump_attrs(struct inode *inode); |
| void *encode_kind(void *attrs, unsigned kind, unsigned version); |
| void *decode_kind(void *attrs, unsigned *kind, unsigned *version); |
| extern struct ileaf_attr_ops iattr_ops; |
| |
| /* ileaf.c */ |
| struct ileaf; |
| void *ileaf_lookup(struct btree *btree, inum_t inum, struct ileaf *leaf, unsigned *result); |
| int ileaf_find_free(struct btree *btree, tuxkey_t key_bottom, |
| tuxkey_t key_limit, void *leaf, |
| tuxkey_t key, u64 len, void *data); |
| struct ileaf_enumrate_cb { |
| int (*callback)(struct btree *btree, inum_t inum, void *attrs, |
| unsigned size, void *data); |
| void *data; |
| }; |
| int ileaf_enumerate(struct btree *btree, tuxkey_t key_bottom, |
| tuxkey_t key_limit, void *leaf, |
| tuxkey_t key, u64 len, void *data); |
| extern struct btree_ops itree_ops; |
| extern struct btree_ops otree_ops; |
| |
| /* inode.c */ |
| void tux3_inode_copy_attrs(struct inode *inode, unsigned delta); |
| struct inode *tux_new_volmap(struct sb *sb); |
| struct inode *tux_new_logmap(struct sb *sb); |
| struct inode *tux_new_inode(struct inode *dir, struct tux_iattr *iattr, |
| dev_t rdev); |
| struct tux3_idefer_map *tux3_alloc_idefer_map(void); |
| void tux3_free_idefer_map(struct tux3_idefer_map *map); |
| int __init tux3_init_idefer_cache(void); |
| void tux3_destroy_idefer_cache(void); |
| void del_defer_alloc_inum(struct inode *inode); |
| void cancel_defer_alloc_inum(struct inode *inode); |
| int tux_assign_inum(struct inode *inode, inum_t goal); |
| struct inode *tux_create_specific_inode(struct inode *dir, inum_t inum, |
| struct tux_iattr *iattr, dev_t rdev); |
| struct inode *tux3_iget(struct sb *sb, inum_t inum); |
| struct inode *tux3_ilookup_nowait(struct sb *sb, inum_t inum); |
| struct inode *tux3_ilookup(struct sb *sb, inum_t inum); |
| int tux3_save_inode(struct inode *inode, struct tux3_iattr_data *idata, |
| unsigned delta); |
| int tux3_purge_inode(struct inode *inode, struct tux3_iattr_data *idata, |
| unsigned delta); |
| int tux3_drop_inode(struct inode *inode); |
| void tux3_evict_inode(struct inode *inode); |
| void iget_if_dirty(struct inode *inode); |
| |
| /* log.c */ |
| extern unsigned log_size[]; |
| void log_next(struct sb *sb); |
| void log_drop(struct sb *sb); |
| void log_finish(struct sb *sb); |
| void log_finish_cycle(struct sb *sb, int discard); |
| int tux3_logmap_io(int rw, struct bufvec *bufvec); |
| void log_balloc(struct sb *sb, block_t block, unsigned count); |
| void log_bfree(struct sb *sb, block_t block, unsigned count); |
| void log_bfree_on_unify(struct sb *sb, block_t block, unsigned count); |
| void log_bfree_relog(struct sb *sb, block_t block, unsigned count); |
| void log_leaf_redirect(struct sb *sb, block_t oldblock, block_t newblock); |
| void log_leaf_free(struct sb *sb, block_t leaf); |
| void log_bnode_redirect(struct sb *sb, block_t oldblock, block_t newblock); |
| void log_bnode_root(struct sb *sb, block_t root, unsigned count, |
| block_t left, block_t right, tuxkey_t rkey); |
| void log_bnode_split(struct sb *sb, block_t src, unsigned pos, block_t dst); |
| void log_bnode_add(struct sb *sb, block_t parent, block_t child, tuxkey_t key); |
| void log_bnode_update(struct sb *sb, block_t parent, block_t child, |
| tuxkey_t key); |
| void log_bnode_merge(struct sb *sb, block_t src, block_t dst); |
| void log_bnode_del(struct sb *sb, block_t node, tuxkey_t key, unsigned count); |
| void log_bnode_adjust(struct sb *sb, block_t node, tuxkey_t from, tuxkey_t to); |
| void log_bnode_free(struct sb *sb, block_t bnode); |
| void log_orphan_add(struct sb *sb, unsigned version, tuxkey_t inum); |
| void log_orphan_del(struct sb *sb, unsigned version, tuxkey_t inum); |
| void log_freeblocks(struct sb *sb, block_t freeblocks); |
| void log_delta(struct sb *sb); |
| void log_unify(struct sb *sb); |
| |
| typedef int (*unstash_t)(struct sb *sb, u64 val); |
| void stash_init(struct stash *stash); |
| int stash_value(struct stash *stash, u64 value); |
| int unstash(struct sb *sb, struct stash *defree, unstash_t actor); |
| int stash_walk(struct sb *sb, struct stash *stash, unstash_t actor); |
| int defer_bfree(struct sb *sb, struct stash *defree, |
| block_t block, unsigned count); |
| void destroy_defer_bfree(struct stash *defree); |
| |
| /* orphan.c */ |
| void clean_orphan_list(struct list_head *head); |
| extern struct ileaf_attr_ops oattr_ops; |
| int tux3_unify_orphan_add(struct sb *sb, struct list_head *orphan_add); |
| int tux3_unify_orphan_del(struct sb *sb, struct list_head *orphan_del); |
| int tux3_make_orphan_add(struct inode *inode); |
| int tux3_make_orphan_del(struct inode *inode); |
| int replay_orphan_add(struct replay *rp, unsigned version, inum_t inum); |
| int replay_orphan_del(struct replay *rp, unsigned version, inum_t inum); |
| void replay_iput_orphan_inodes(struct sb *sb, |
| struct list_head *orphan_in_otree, |
| int destroy); |
| int replay_load_orphan_inodes(struct replay *rp); |
| |
| /* super.c */ |
| struct replay *tux3_init_fs(struct sb *sbi); |
| |
| /* policy.c */ |
| inum_t policy_inum(struct inode *dir, loff_t where, struct inode *inode); |
| void policy_inode_init(inum_t *previous); |
| void policy_inode(struct inode *inode, inum_t *previous); |
| void policy_extents(struct bufvec *bufvec); |
| |
| /* replay.c */ |
| struct replay *replay_stage1(struct sb *sb); |
| int replay_stage2(struct replay *rp); |
| int replay_stage3(struct replay *rp, int apply); |
| |
| /* utility.c */ |
| void __printf(4, 5) |
| __tux3_msg(struct sb *sb, const char *level, const char *prefix, |
| const char *fmt, ...); |
| void __printf(1, 2) |
| __tux3_dbg(const char *fmt, ...); |
| #define tux3_dbg(fmt , ...) \ |
| __tux3_dbg("%s:%d: " fmt "\n", __func__, __LINE__, ##__VA_ARGS__) |
| void __printf(4, 5) |
| __tux3_fs_error(struct sb *sb, const char *func, unsigned int line, |
| const char *fmt, ...); |
| #define tux3_fs_error(sb, fmt, ...) \ |
| __tux3_fs_error(sb, __func__, __LINE__, fmt , ##__VA_ARGS__) |
| |
| void hexdump(void *data, unsigned size); |
| void set_bits(u8 *bitmap, unsigned start, unsigned count); |
| void clear_bits(u8 *bitmap, unsigned start, unsigned count); |
| int all_set(u8 *bitmap, unsigned start, unsigned count); |
| int all_clear(u8 *bitmap, unsigned start, unsigned count); |
| int bytebits(u8 c); |
| |
| /* writeback.c */ |
| void tux3_set_inode_no_flush(struct inode *inode); |
| void tux3_set_inode_always_dirty(struct inode *inode); |
| void tux3_mark_btree_dirty(struct btree *btree); |
| void __tux3_mark_inode_dirty(struct inode *inode, int flags); |
| static inline void tux3_mark_inode_dirty(struct inode *inode) |
| { |
| __tux3_mark_inode_dirty(inode, I_DIRTY); |
| } |
| static inline void tux3_mark_inode_dirty_sync(struct inode *inode) |
| { |
| __tux3_mark_inode_dirty(inode, I_DIRTY_SYNC); |
| } |
| |
| void tux3_dirty_inode(struct inode *inode, int flags); |
| void tux3_mark_inode_to_delete(struct inode *inode); |
| void tux3_iattrdirty(struct inode *inode); |
| void tux3_xattrdirty(struct inode *inode); |
| void tux3_xattr_read_and_clear(struct inode *inode); |
| void tux3_clear_dirty_inode(struct inode *inode); |
| void __tux3_mark_buffer_dirty(struct buffer_head *buffer, unsigned delta); |
| void tux3_mark_buffer_dirty(struct buffer_head *buffer); |
| void tux3_mark_buffer_unify(struct buffer_head *buffer); |
| void tux3_mark_inode_orphan(struct tux3_inode *tuxnode); |
| int tux3_inode_is_orphan(struct tux3_inode *tuxnode); |
| int tux3_flush_inode_internal(struct inode *inode, unsigned delta, int req_flag); |
| int tux3_flush_inode(struct inode *inode, unsigned delta, int req_flag); |
| int tux3_flush_inodes(struct sb *sb, unsigned delta); |
| int tux3_has_dirty_inodes(struct sb *sb, unsigned delta); |
| void tux3_clear_dirty_inodes(struct sb *sb, unsigned delta); |
| void tux3_check_destroy_inode_flags(struct inode *inode); |
| |
| /* xattr.c */ |
| #ifndef ENOATTR |
| #define ENOATTR ENODATA |
| #endif |
| |
| void atable_init_base(struct sb *sb); |
| int xcache_dump(struct inode *inode); |
| void free_xcache(struct inode *inode); |
| int new_xcache(struct inode *inode, unsigned size); |
| int xcache_remove_all(struct inode *inode); |
| int get_xattr(struct inode *inode, const char *name, unsigned len, |
| void *data, unsigned size); |
| int set_xattr(struct inode *inode, const char *name, unsigned len, |
| const void *data, unsigned size, unsigned flags); |
| int del_xattr(struct inode *inode, const char *name, unsigned len); |
| int list_xattr(struct inode *inode, char *text, size_t size); |
| unsigned encode_xsize(struct inode *inode); |
| void *encode_xattrs(struct inode *inode, void *attrs, unsigned size); |
| unsigned decode_xsize(struct inode *inode, void *attrs, unsigned size); |
| void *decode_xattr(struct inode *inode, void *attrs); |
| |
| static inline struct buffer_head *vol_find_get_block(struct sb *sb, block_t block) |
| { |
| return peekblk(mapping(sb->volmap), block); |
| } |
| |
| static inline struct buffer_head *vol_getblk(struct sb *sb, block_t block) |
| { |
| return blockget(mapping(sb->volmap), block); |
| } |
| |
| static inline struct buffer_head *vol_bread(struct sb *sb, block_t block) |
| { |
| return blockread(mapping(sb->volmap), block); |
| } |
| |
| #include "dirty-buffer.h" /* remove this after atomic commit */ |
| #endif /* !TUX3_H */ |