blob: 2c6ebb4908a3952c2ace7079e4299c79a08f9809 [file] [log] [blame]
#include <linux/err.h>
#include <linux/gfp.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/version.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/extent_map.h>
static struct kmem_cache *extent_map_cache;
int __init extent_map_init(void)
{
extent_map_cache = KMEM_CACHE(extent_map, SLAB_MEM_SPREAD);
if (!extent_map_cache)
return -ENOMEM;
return 0;
}
void __exit extent_map_exit(void)
{
if (extent_map_cache)
kmem_cache_destroy(extent_map_cache);
}
void extent_map_tree_init(struct extent_map_tree *tree)
{
tree->map.rb_node = NULL;
tree->last = NULL;
spin_lock_init(&tree->lock);
}
EXPORT_SYMBOL(extent_map_tree_init);
struct extent_map *alloc_extent_map(gfp_t mask)
{
struct extent_map *em;
em = kmem_cache_alloc(extent_map_cache, mask);
if (!em || IS_ERR(em))
return em;
atomic_set(&em->refs, 1);
em->flags = 0;
return em;
}
EXPORT_SYMBOL(alloc_extent_map);
void free_extent_map(struct extent_map *em)
{
if (!em)
return;
if (atomic_dec_and_test(&em->refs))
kmem_cache_free(extent_map_cache, em);
}
EXPORT_SYMBOL(free_extent_map);
static struct rb_node *tree_insert(struct rb_root *root, loff_t offset,
struct rb_node *node)
{
struct rb_node ** p = &root->rb_node;
struct rb_node * parent = NULL;
struct extent_map *entry;
while(*p) {
parent = *p;
entry = rb_entry(parent, struct extent_map, rb_node);
if (offset < entry->start)
p = &(*p)->rb_left;
else if (offset >= entry->start + entry->len)
p = &(*p)->rb_right;
else
return parent;
}
entry = rb_entry(node, struct extent_map, rb_node);
rb_link_node(node, parent, p);
rb_insert_color(node, root);
return NULL;
}
static struct rb_node *__tree_search(struct rb_root *root, loff_t offset,
struct rb_node **prev_ret)
{
struct rb_node * n = root->rb_node;
struct rb_node *prev = NULL;
struct extent_map *entry;
struct extent_map *prev_entry = NULL;
while(n) {
entry = rb_entry(n, struct extent_map, rb_node);
prev = n;
prev_entry = entry;
if (offset < entry->start)
n = n->rb_left;
else if (offset >= entry->start + entry->len)
n = n->rb_right;
else
return n;
}
if (!prev_ret)
return NULL;
while(prev && (offset >= prev_entry->start + prev_entry->len)) {
prev = rb_next(prev);
prev_entry = rb_entry(prev, struct extent_map, rb_node);
}
*prev_ret = prev;
return NULL;
}
static inline struct rb_node *tree_search(struct rb_root *root, loff_t offset)
{
struct rb_node *prev;
struct rb_node *ret;
ret = __tree_search(root, offset, &prev);
if (!ret)
return prev;
return ret;
}
static int tree_delete(struct rb_root *root, loff_t offset)
{
struct rb_node *node;
struct extent_map *entry;
node = __tree_search(root, offset, NULL);
if (!node)
return -ENOENT;
entry = rb_entry(node, struct extent_map, rb_node);
rb_erase(node, root);
return 0;
}
static int mergable_maps(struct extent_map *prev, struct extent_map *next)
{
if (extent_map_end(prev) == next->start &&
prev->flags == next->flags &&
((next->block_start == EXTENT_MAP_HOLE &&
prev->block_start == EXTENT_MAP_HOLE) ||
(next->block_start == EXTENT_MAP_INLINE &&
prev->block_start == EXTENT_MAP_INLINE) ||
(next->block_start == EXTENT_MAP_DELALLOC &&
prev->block_start == EXTENT_MAP_DELALLOC) ||
(next->block_start < EXTENT_MAP_LAST_BYTE - 1 &&
next->block_start == extent_map_block_end(prev)))) {
return 1;
}
return 0;
}
/*
* add_extent_mapping tries a simple forward/backward merge with existing
* mappings. The extent_map struct passed in will be inserted into
* the tree directly (no copies made, just a reference taken).
*/
int add_extent_mapping(struct extent_map_tree *tree,
struct extent_map *em)
{
int ret = 0;
struct extent_map *merge = NULL;
struct rb_node *rb;
unsigned long flags;
spin_lock_irqsave(&tree->lock, flags);
rb = tree_insert(&tree->map, em->start, &em->rb_node);
if (rb) {
ret = -EEXIST;
goto out;
}
atomic_inc(&em->refs);
if (em->start != 0) {
rb = rb_prev(&em->rb_node);
if (rb)
merge = rb_entry(rb, struct extent_map, rb_node);
if (rb && mergable_maps(merge, em)) {
em->start = merge->start;
em->len += merge->len;
em->block_start = merge->block_start;
rb_erase(&merge->rb_node, &tree->map);
free_extent_map(merge);
}
}
rb = rb_next(&em->rb_node);
if (rb)
merge = rb_entry(rb, struct extent_map, rb_node);
if (rb && mergable_maps(em, merge)) {
em->len += merge->len;
rb_erase(&merge->rb_node, &tree->map);
free_extent_map(merge);
}
tree->last = em;
out:
spin_unlock_irqrestore(&tree->lock, flags);
return ret;
}
EXPORT_SYMBOL(add_extent_mapping);
/*
* lookup_extent_mapping returns the first extent_map struct in the
* tree that intersects the [start, len] range. There may
* be additional objects in the tree that intersect, so check the object
* returned carefully to make sure you don't need additional lookups.
*/
struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
loff_t start, u64 len)
{
struct extent_map *em;
struct extent_map *last;
struct rb_node *rb_node;
unsigned long flags;
spin_lock_irqsave(&tree->lock, flags);
last = tree->last;
if (last && start >= last->start &&
start + len <= extent_map_end(last)) {
em = last;
atomic_inc(&em->refs);
goto out;
}
rb_node = tree_search(&tree->map, start);
if (!rb_node) {
em = NULL;
goto out;
}
if (IS_ERR(rb_node)) {
em = ERR_PTR(PTR_ERR(rb_node));
goto out;
}
em = rb_entry(rb_node, struct extent_map, rb_node);
if (extent_map_end(em) <= start || em->start >= start + len) {
em = NULL;
goto out;
}
atomic_inc(&em->refs);
tree->last = em;
out:
spin_unlock_irqrestore(&tree->lock, flags);
return em;
}
EXPORT_SYMBOL(lookup_extent_mapping);
/*
* removes an extent_map struct from the tree. No reference counts are
* dropped, and no checks are done to see if the range is in use
*/
int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
{
int ret;
unsigned long flags;
spin_lock_irqsave(&tree->lock, flags);
ret = tree_delete(&tree->map, em->start);
tree->last = NULL;
spin_unlock_irqrestore(&tree->lock, flags);
return ret;
}
EXPORT_SYMBOL(remove_extent_mapping);
static struct extent_map *__map_extent(struct extent_map_tree *tree,
struct address_space *mapping,
loff_t start, u64 len, int create,
gfp_t gfp_mask, get_block_t get_block)
{
struct inode *inode = mapping->host;
struct extent_map *em;
struct buffer_head result;
sector_t start_block;
u64 cur_len;
int ret;
again:
em = lookup_extent_mapping(tree, start, len);
if (em) {
/*
* we may have found an extent that starts after the
* requested range. Double check and alter the length
* appropriately
*/
if (em->start > start) {
len = em->start - start;
} else if (!create || em->block_start != EXTENT_MAP_HOLE) {
return em;
}
free_extent_map(em);
}
if (gfp_mask & GFP_ATOMIC)
return NULL;
em = alloc_extent_map(GFP_NOFS);
if (!em)
return ERR_PTR(-ENOMEM);
len = min_t(u64, len, (size_t)-1);
result.b_state = 0;
result.b_size = len;
start_block = start >> inode->i_blkbits;
if (len < inode->i_sb->s_blocksize) {
printk("warning2: mapping length %Lu\n", len);
}
/*
* FIXME if there are errors later on, we end up exposing stale
* data on disk while filling holes.
*/
ret = get_block(inode, start_block,
&result, create);
if (ret < 0) {
free_extent_map(em);
return ERR_PTR(ret);
}
cur_len = result.b_size;
em->start = start;
em->len = cur_len;
em->bdev = result.b_bdev;
if (create && buffer_new(&result)) {
remove_extent_mappings(tree, em->start, em->len);
em->flags = (1 << EXTENT_MAP_HOLE_FILLED);
}
if (buffer_mapped(&result))
em->block_start = (loff_t)result.b_blocknr << inode->i_blkbits;
else {
em->block_start = EXTENT_MAP_HOLE;
if (create) {
free_extent_map(em);
return ERR_PTR(-EIO);
}
}
ret = add_extent_mapping(tree, em);
if (ret == -EEXIST) {
free_extent_map(em);
goto again;
}
return em;
}
struct extent_map *map_extent_get_block(struct extent_map_tree *tree,
struct address_space *mapping,
loff_t start, u64 len, int create,
gfp_t gfp_mask, get_block_t get_block)
{
struct extent_map *em;
loff_t last;
u64 map_ahead_len = 0;
em = __map_extent(tree, mapping, start, len, create,
gfp_mask, get_block);
/*
* if we're doing a write or we found a large extent, return it
*/
if (IS_ERR(em) || !em || create || start + len < extent_map_end(em)) {
return em;
}
/*
* otherwise, try to walk forward a bit and see if we can build
* something bigger.
*/
do {
last = extent_map_end(em);
free_extent_map(em);
em = __map_extent(tree, mapping, last, len, create,
gfp_mask, get_block);
if (IS_ERR(em) || !em)
break;
map_ahead_len += extent_map_end(em) - last;
} while(em->start <= start && start + len <= extent_map_end(em) &&
em->block_start < EXTENT_MAP_LAST_BYTE &&
map_ahead_len < (512 * 1024));
/* make sure we return the extent for this range */
if (!em || IS_ERR(em) || em->start > start ||
start + len > extent_map_end(em)) {
free_extent_map(em);
em = __map_extent(tree, mapping, start, len, create,
gfp_mask, get_block);
}
return em;
}
EXPORT_SYMBOL(map_extent_get_block);
int remove_extent_mappings(struct extent_map_tree *tree, loff_t start, u64 len)
{
struct extent_map *em;
while((em = lookup_extent_mapping(tree, start, len))) {
remove_extent_mapping(tree, em);
/* once for us */
free_extent_map(em);
/* once for the tree */
free_extent_map(em);
}
return 0;
}
EXPORT_SYMBOL(remove_extent_mappings);