blob: a760c3a70e42b1ecf1078ec7c0015133d2a127af [file] [log] [blame]
/*
* Blockconsole - write kernel console to a block device
*
* Copyright (C) 2012 Joern Engel <joern@logfs.org>
*
* For usage and disk format, please see
* Documentation/block/blockconsole.txt
*
* Blockconsole allocates a 1MB buffer at init time. printk() calls
* bcon_write(), which simply writes to the buffer and wakes up a
* writeback thread. bcon_writeback() will then write the console out
* to the blockdevice, in chunks of sector_size.
*
* All allocations for blockconsole happen at init time. All means
* 1MB worth of buffer, a struct bio for every 512B sector, plus
* another page and struct bio to zero the current 1MB of the device
* before writing to it.
*
* The block layer and device drivers may require further memory
* allocations do sleeping calls and require all sorts of
* infrastructure. In such cases, writeback may be delayed or fail
* altogether. But since writeback is decoupled from printk(), the
* worst consequence should be loss of debug information - which would
* be lost without blockconsole as well.
*
* On top of the writeback thread, bcon_write() also schedules a 1s
* timer. When the timer expires, the current partial sector will be
* padded and written out as well. Blockconsole does no overwrites,
* so a spurious line of up to 510 spaces and a newline is the result.
*
* There is a panic handler that tries to push out the last dying
* breath. Sometimes that works, sometimes it causes a secondary
* oops. I have never seen it do harm and when it does work it
* provides useful crash information.
*
* In cases where more printk data comes in the front door than the
* backing device can handle, there will be data loss. Again,
* blockconsole is best-effort. Given the 1MB buffer it takes an
* extreme slow device or a deliberate attempt to overflow the buffer,
* so this is almost never a problem in practice.
*
* In case the device is already filled with data (particularly after
* blockconsole wraps around), each 1MB tile is written with zeroes
* once before the regular sector writes happen. Noone should
* interpret stale data from before the wrap-around as current - or
* have to guess where the current data ends and the stale data
* begins.
*
* Detection of console devices currently works by having the partition
* scanning code call bcon_add() once for every partition and once for
* every device. If we find a valid header, the device is automatically
* used.
*
* Removal of console devices is also automatic - sooner or later a
* removed device will cause write errors and any device that
* consistently returns write errors will get removed. Main goal here
* was to be resilient against flaky hardware, using the same code to
* handle device removal is a bonus and ensures test coverage.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/bio.h>
#include <linux/blockconsole.h>
#include <linux/console.h>
#include <linux/ctype.h>
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/genhd.h>
#include <linux/kref.h>
#include <linux/kthread.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/random.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/workqueue.h>
#define BLOCKCONSOLE_MAGIC "\nLinux blockconsole version 1.1\n"
#define BCON_UUID_OFS (32)
#define BCON_ROUND_OFS (41)
#define BCON_TILE_OFS (50)
#define BCON_HEADERSIZE (50)
#define BCON_LONG_HEADERSIZE (59) /* with tile index */
#define MAX_EXTENTS (16) /* 256 bytes for extent_map on 64bit */
#define CACHE_SIZE (1024 * 1024)
#define PAGE_COUNT (CACHE_SIZE >> PAGE_SHIFT)
#define SECTOR_SHIFT (9)
#define SECTOR_COUNT (CACHE_SIZE >> SECTOR_SHIFT)
#define CACHE_PAGE_MASK (PAGE_COUNT - 1)
#define CACHE_SECTOR_MASK (SECTOR_COUNT - 1)
#define CACHE_MASK (CACHE_SIZE - 1)
#define SECTOR_SIZE (1u << SECTOR_SHIFT)
#define SECTOR_MASK (~(SECTOR_SIZE-1))
#define PG_SECTOR_MASK ((PAGE_SIZE >> SECTOR_SHIFT) - 1)
struct bcon_bio {
struct bio bio;
struct bio_vec bvec;
void *sector;
int in_flight;
};
struct bcon_extent {
sector_t ofs;
sector_t len;
};
struct blockconsole {
char devname[32];
atomic64_t console_bytes;
spinlock_t end_io_lock;
struct timer_list pad_timer;
int error_count;
struct kref kref;
u64 write_bytes;
u64 max_bytes;
u32 round;
u32 uuid;
struct bcon_bio bio_array[SECTOR_COUNT];
struct page *pages;
struct bcon_bio zero_bios[PAGE_COUNT];
struct page *zero_page;
struct block_device *bdev;
struct console console;
struct work_struct unregister_work;
struct work_struct release_work;
struct task_struct *writeback_thread;
struct notifier_block panic_block;
int no_extents;
struct bcon_extent extent_map[MAX_EXTENTS];
};
/* Do the extent-based remapping in case of logging to files */
static sector_t get_sector(struct blockconsole *bc, u64 fpos)
{
sector_t logical = fpos >> SECTOR_SHIFT;
int i;
for (i = 0; i < bc->no_extents; i++) {
if (logical < bc->extent_map[i].len)
return logical + bc->extent_map[i].ofs;
logical -= bc->extent_map[i].len;
}
BUG();
}
static void bcon_get(struct blockconsole *bc)
{
kref_get(&bc->kref);
}
static void __bcon_release(struct work_struct *work)
{
struct blockconsole *bc = container_of(work, struct blockconsole,
release_work);
__free_pages(bc->zero_page, 0);
__free_pages(bc->pages, 8);
invalidate_mapping_pages(bc->bdev->bd_inode->i_mapping, 0, -1);
blkdev_put(bc->bdev, FMODE_READ|FMODE_WRITE);
kfree(bc);
}
static void bcon_release(struct kref *kref)
{
struct blockconsole *bc = container_of(kref, struct blockconsole, kref);
/* bcon_release can be called from atomic context */
schedule_work(&bc->release_work);
}
static void bcon_put(struct blockconsole *bc)
{
kref_put(&bc->kref, bcon_release);
}
static unsigned int __bcon_console_ofs(u64 console_bytes)
{
return console_bytes & ~SECTOR_MASK;
}
static unsigned int bcon_console_ofs(struct blockconsole *bc)
{
return __bcon_console_ofs(atomic64_read(&bc->console_bytes));
}
static unsigned int __bcon_console_sector(u64 console_bytes)
{
return (console_bytes >> SECTOR_SHIFT) & CACHE_SECTOR_MASK;
}
static unsigned int bcon_console_sector(struct blockconsole *bc)
{
return __bcon_console_sector(atomic64_read(&bc->console_bytes));
}
static unsigned int bcon_write_sector(struct blockconsole *bc)
{
return (bc->write_bytes >> SECTOR_SHIFT) & CACHE_SECTOR_MASK;
}
static void clear_sector(void *sector)
{
memset(sector, ' ', 511);
memset(sector + 511, 10, 1);
}
static void bcon_init_first_page(struct blockconsole *bc)
{
char *buf = page_address(bc->pages);
size_t len = strlen(BLOCKCONSOLE_MAGIC);
u32 tile = atomic64_read(&bc->console_bytes) >> 20; /* We overflow after 4TB - fine */
clear_sector(buf);
memcpy(buf, BLOCKCONSOLE_MAGIC, len);
sprintf(buf + BCON_UUID_OFS, "%08x", bc->uuid);
sprintf(buf + BCON_ROUND_OFS, "%08x", bc->round);
sprintf(buf + BCON_TILE_OFS, "%08x", tile);
/* replace NUL with newline */
buf[BCON_UUID_OFS + 8] = 10;
buf[BCON_ROUND_OFS + 8] = 10;
buf[BCON_TILE_OFS + 8] = 10;
}
static void bcon_advance_console_bytes(struct blockconsole *bc, int bytes)
{
u64 old, new;
do {
old = atomic64_read(&bc->console_bytes);
new = old + bytes;
if (new >= bc->max_bytes)
new = 0;
if ((new & CACHE_MASK) == 0) {
bcon_init_first_page(bc);
new += BCON_LONG_HEADERSIZE;
}
} while (atomic64_cmpxchg(&bc->console_bytes, old, new) != old);
}
static void request_complete(struct bio *bio, int err)
{
complete((struct completion *)bio->bi_private);
}
static int sync_read(struct blockconsole *bc, u64 ofs)
{
struct bio bio;
struct bio_vec bio_vec;
struct completion complete;
bio_init(&bio);
bio.bi_io_vec = &bio_vec;
bio_vec.bv_page = bc->pages;
bio_vec.bv_len = SECTOR_SIZE;
bio_vec.bv_offset = 0;
bio.bi_vcnt = 1;
bio.bi_idx = 0;
bio.bi_size = SECTOR_SIZE;
bio.bi_bdev = bc->bdev;
bio.bi_sector = get_sector(bc, ofs);
init_completion(&complete);
bio.bi_private = &complete;
bio.bi_end_io = request_complete;
submit_bio(READ, &bio);
wait_for_completion(&complete);
return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO;
}
static void bcon_erase_segment(struct blockconsole *bc)
{
int i;
for (i = 0; i < PAGE_COUNT; i++) {
struct bcon_bio *bcon_bio = bc->zero_bios + i;
struct bio *bio = &bcon_bio->bio;
/*
* If the last erase hasn't finished yet, just skip it. The log
* will look messy, but that's all.
*/
rmb();
if (bcon_bio->in_flight)
continue;
bio_init(bio);
bio->bi_io_vec = &bcon_bio->bvec;
bio->bi_vcnt = 1;
bio->bi_size = PAGE_SIZE;
bio->bi_bdev = bc->bdev;
bio->bi_private = bc;
bio->bi_idx = 0;
bio->bi_sector = get_sector(bc, bc->write_bytes + i * PAGE_SIZE);
bcon_bio->in_flight = 1;
wmb();
/* We want the erase to go to the device first somehow */
submit_bio(WRITE | REQ_SOFTBARRIER, bio);
}
}
static void bcon_advance_write_bytes(struct blockconsole *bc, int bytes)
{
bc->write_bytes += bytes;
if (bc->write_bytes >= bc->max_bytes) {
bc->write_bytes = 0;
bc->round++;
bcon_init_first_page(bc);
}
}
/*
* Check if we have an 8-digit hex number followed by newline
*/
static bool is_four_byte_hex(const void *data)
{
const char *str = data;
int len = 0;
while (isxdigit(*str) && len++ < 9)
str++;
if (len != 8)
return false;
/* str should point to a \n now */
if (*str != 0xa)
return false;
return true;
}
static int bcon_magic_present(const void *data)
{
size_t len = strlen(BLOCKCONSOLE_MAGIC);
if (memcmp(data, BLOCKCONSOLE_MAGIC, len))
return 0;
if (!is_four_byte_hex(data + BCON_UUID_OFS))
return 0;
if (!is_four_byte_hex(data + BCON_ROUND_OFS))
return 0;
if (!is_four_byte_hex(data + BCON_TILE_OFS))
return 0;
return 11;
}
static int bcon_find_end_of_log(struct blockconsole *bc)
{
u64 start = 0, end = bc->max_bytes, middle;
void *sec0 = bc->bio_array[0].sector;
void *sec1 = bc->bio_array[1].sector;
int err, version;
err = sync_read(bc, 0);
if (err)
return err;
/* Second sanity check, out of sheer paranoia */
version = bcon_magic_present(sec0);
if (!version)
return -EINVAL;
bc->uuid = simple_strtoull(sec0 + BCON_UUID_OFS, NULL, 16);
bc->round = simple_strtoull(sec0 + BCON_ROUND_OFS, NULL, 16);
memcpy(sec1, sec0, BCON_HEADERSIZE);
for (;;) {
middle = (start + end) / 2;
middle &= ~CACHE_MASK;
if (middle == start)
break;
err = sync_read(bc, middle);
if (err)
return err;
if (memcmp(sec1, sec0, BCON_HEADERSIZE)) {
/* If the two differ, we haven't written that far yet */
end = middle;
} else {
start = middle;
}
}
bc->write_bytes = end;
atomic64_set(&bc->console_bytes, end);
bcon_advance_console_bytes(bc, 0); /* To skip the header */
bcon_advance_write_bytes(bc, 0); /* To wrap around, if necessary */
bcon_erase_segment(bc);
return 0;
}
static void bcon_unregister(struct work_struct *work)
{
struct blockconsole *bc = container_of(work, struct blockconsole,
unregister_work);
atomic_notifier_chain_unregister(&panic_notifier_list, &bc->panic_block);
unregister_console(&bc->console);
del_timer_sync(&bc->pad_timer);
kthread_stop(bc->writeback_thread);
/* No new io will be scheduled anymore now */
bcon_put(bc);
}
#define BCON_MAX_ERRORS 10
static void bcon_end_io(struct bio *bio, int err)
{
struct bcon_bio *bcon_bio = container_of(bio, struct bcon_bio, bio);
struct blockconsole *bc = bio->bi_private;
unsigned long flags;
/*
* We want to assume the device broken and free this console if
* we accumulate too many errors. But if errors are transient,
* we also want to forget about them once writes succeed again.
* Oh, and we only want to reset the counter if it hasn't reached
* the limit yet, so we don't bcon_put() twice from here.
*/
spin_lock_irqsave(&bc->end_io_lock, flags);
if (err) {
if (bc->error_count++ == BCON_MAX_ERRORS) {
pr_info("no longer logging to %s\n", bc->devname);
schedule_work(&bc->unregister_work);
}
} else {
if (bc->error_count && bc->error_count < BCON_MAX_ERRORS)
bc->error_count = 0;
}
/*
* Add padding (a bunch of spaces and a newline) early so bcon_pad
* only has to advance a pointer.
*/
clear_sector(bcon_bio->sector);
bcon_bio->in_flight = 0;
spin_unlock_irqrestore(&bc->end_io_lock, flags);
bcon_put(bc);
}
static void bcon_writesector(struct blockconsole *bc, int index)
{
struct bcon_bio *bcon_bio = bc->bio_array + index;
struct bio *bio = &bcon_bio->bio;
rmb();
if (bcon_bio->in_flight)
return;
bcon_get(bc);
bio_init(bio);
bio->bi_io_vec = &bcon_bio->bvec;
bio->bi_vcnt = 1;
bio->bi_size = SECTOR_SIZE;
bio->bi_bdev = bc->bdev;
bio->bi_private = bc;
bio->bi_end_io = bcon_end_io;
bio->bi_idx = 0;
bio->bi_sector = get_sector(bc, bc->write_bytes);
bcon_bio->in_flight = 1;
wmb();
submit_bio(WRITE, bio);
}
/**
* bcon_writeback - the writeback thread
* @_bc: The struct blockconsole
*
* Will loop and writeback any full sectors, then go back to sleep.
*/
static int bcon_writeback(void *_bc)
{
struct blockconsole *bc = _bc;
struct sched_param(sp);
sp.sched_priority = MAX_RT_PRIO - 1; /* Highest realtime prio */
sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
schedule();
if (kthread_should_stop())
break;
while (bcon_write_sector(bc) != bcon_console_sector(bc)) {
bcon_writesector(bc, bcon_write_sector(bc));
bcon_advance_write_bytes(bc, SECTOR_SIZE);
if (bcon_write_sector(bc) == 0)
bcon_erase_segment(bc);
}
}
return 0;
}
static void bcon_pad(unsigned long data)
{
struct blockconsole *bc = (void *)data;
unsigned int n;
/*
* We deliberately race against bcon_write here. If we lose the race,
* our padding is no longer where we expected it to be, i.e. it is
* no longer a bunch of spaces with a newline at the end. There could
* not be a newline at all or it could be somewhere in the middle.
* Either way, the log corruption is fairly obvious to spot and ignore
* for human readers.
*/
n = SECTOR_SIZE - bcon_console_ofs(bc);
if (n != SECTOR_SIZE) {
bcon_advance_console_bytes(bc, n);
wake_up_process(bc->writeback_thread);
}
}
static void bcon_write(struct console *console, const char *msg,
unsigned int len)
{
struct blockconsole *bc = container_of(console, struct blockconsole,
console);
unsigned int n;
u64 console_bytes;
int i;
while (len) {
console_bytes = atomic64_read(&bc->console_bytes);
i = __bcon_console_sector(console_bytes);
rmb();
if (bc->bio_array[i].in_flight)
break;
n = min(len, SECTOR_SIZE - __bcon_console_ofs(console_bytes));
memcpy(bc->bio_array[i].sector +
__bcon_console_ofs(console_bytes), msg, n);
len -= n;
msg += n;
bcon_advance_console_bytes(bc, n);
}
wake_up_process(bc->writeback_thread);
mod_timer(&bc->pad_timer, jiffies + HZ);
}
/**
* bcon_init_bios - initialize the struct bio array
*/
static void bcon_init_bios(struct blockconsole *bc)
{
int i;
for (i = 0; i < SECTOR_COUNT; i++) {
int page_index = i >> (PAGE_SHIFT - SECTOR_SHIFT);
struct page *page = bc->pages + page_index;
struct bcon_bio *bcon_bio = bc->bio_array + i;
struct bio_vec *bvec = &bcon_bio->bvec;
bcon_bio->in_flight = 0;
bcon_bio->sector = page_address(bc->pages + page_index)
+ SECTOR_SIZE * (i & PG_SECTOR_MASK);
clear_sector(bcon_bio->sector);
bvec->bv_page = page;
bvec->bv_len = SECTOR_SIZE;
bvec->bv_offset = SECTOR_SIZE * (i & PG_SECTOR_MASK);
}
}
static void bcon_init_zero_bio(struct blockconsole *bc)
{
int i;
memset(page_address(bc->zero_page), 0, PAGE_SIZE);
for (i = 0; i < PAGE_COUNT; i++) {
struct bcon_bio *bcon_bio = bc->zero_bios + i;
struct bio_vec *bvec = &bcon_bio->bvec;
bcon_bio->in_flight = 0;
bvec->bv_page = bc->zero_page;
bvec->bv_len = PAGE_SIZE;
bvec->bv_offset = 0;
}
}
/**
* blockconsole_panic - panic notifier
*
* Tries to write back any crash information. This fails fairly
* regularly. As always, blockconsole is best-effort.
*/
static int blockconsole_panic(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct blockconsole *bc = container_of(this, struct blockconsole,
panic_block);
unsigned int n;
n = SECTOR_SIZE - bcon_console_ofs(bc);
if (n != SECTOR_SIZE)
bcon_advance_console_bytes(bc, n);
bcon_writeback(bc);
return NOTIFY_DONE;
}
static int create_extent_map(struct blockconsole *bc, struct inode *inode)
{
u64 max_size = i_size_read(inode) & ~CACHE_MASK;
sector_t last_block = max_size >> inode->i_blkbits;
sector_t probe_block = 0;
sector_t ofs;
sector_t no_secs = 0;
struct bcon_extent *extent = bc->extent_map;
unsigned long sec_per_block = 1 << (inode->i_blkbits - SECTOR_SHIFT);
bc->no_extents = 1;
for (; probe_block < last_block;
probe_block++, no_secs += sec_per_block) {
ofs = bmap(inode, probe_block);
if (!ofs)
return -EINVAL;
if (!extent->ofs) {
/* First iteration */
goto new_extent;
}
if (ofs * sec_per_block == extent->ofs + extent->len) {
/* Part of current extent */
extent->len += sec_per_block;
continue;
}
/* Extents currently have to be page-aligned for erase */
if (extent->len & (~PAGE_MASK >> SECTOR_SHIFT))
return -EINVAL;
/* New extent */
extent++;
bc->no_extents++;
if (extent == bc->extent_map + MAX_EXTENTS)
break;
new_extent:
extent->ofs = ofs * sec_per_block;
extent->len = sec_per_block;
}
if (bc->extent_map[bc->no_extents - 1].len & (~PAGE_MASK >> SECTOR_SHIFT))
return -EINVAL;
bc->max_bytes = (no_secs << SECTOR_SHIFT) & ~CACHE_MASK;
/* If the file is too small or too fragmented, just give up */
if (bc->max_bytes < 4 << 20)
return -EINVAL;
return 0;
}
static int claim_logfile(struct blockconsole *bc, struct inode *inode)
{
int err;
if (S_ISBLK(inode->i_mode)) {
bc->bdev = bdgrab(I_BDEV(inode));
/* FIXME: blkdev_put */
err = blkdev_get(bc->bdev, FMODE_READ | FMODE_WRITE, bcon_add);
if (err)
return -EINVAL;
bc->extent_map[0].ofs = 0;
bc->extent_map[0].len = bc->bdev->bd_inode->i_size & ~CACHE_MASK;
bc->max_bytes = i_size_read(inode) & ~CACHE_MASK;
} else if (S_ISREG(inode->i_mode)) {
bc->bdev = inode->i_sb->s_bdev;
mutex_lock(&inode->i_mutex);
return create_extent_map(bc, inode);
} else
return -EINVAL;
return 0;
}
static void unclaim_logfile(struct blockconsole *bc, struct inode *inode)
{
if (S_ISBLK(inode->i_mode))
blkdev_put(bc->bdev, FMODE_READ | FMODE_WRITE);
else {
inode->i_flags &= ~S_SWAPFILE;
mutex_unlock(&inode->i_mutex);
}
}
static int __bcon_create(struct blockconsole *bc)
{
int err = -ENOMEM;
bc->pages = alloc_pages(GFP_KERNEL, 8);
if (!bc->pages)
goto out;
bc->zero_page = alloc_pages(GFP_KERNEL, 0);
if (!bc->zero_page)
goto out1;
bcon_init_bios(bc);
bcon_init_zero_bio(bc);
setup_timer(&bc->pad_timer, bcon_pad, (unsigned long)bc);
err = bcon_find_end_of_log(bc);
if (err)
goto out2;
kref_init(&bc->kref); /* This reference gets freed on errors */
bc->writeback_thread = kthread_run(bcon_writeback, bc, "bcon_%s",
bc->devname);
if (IS_ERR(bc->writeback_thread)) {
err = PTR_ERR(bc->writeback_thread);
goto out2;
}
INIT_WORK(&bc->unregister_work, bcon_unregister);
INIT_WORK(&bc->release_work, __bcon_release);
register_console(&bc->console);
bc->panic_block.notifier_call = blockconsole_panic;
bc->panic_block.priority = INT_MAX;
atomic_notifier_chain_register(&panic_notifier_list, &bc->panic_block);
pr_info("now logging to %s at %llx\n", bc->devname,
atomic64_read(&bc->console_bytes) >> 20);
return 0;
out2:
__free_pages(bc->zero_page, 0);
out1:
__free_pages(bc->pages, 8);
out:
return err;
}
static int bcon_add_file(const char *name, struct kernel_param *kp)
{
struct blockconsole *bc;
struct file *file = NULL;
struct address_space *mapping;
struct inode *inode;
int err;
bc = kzalloc(sizeof(*bc), GFP_KERNEL);
if (!bc)
return -ENOMEM;
spin_lock_init(&bc->end_io_lock);
strcpy(bc->console.name, "bcon");
bc->console.flags = CON_PRINTBUFFER | CON_ENABLED | CON_ALLDATA;
bc->console.write = bcon_write;
memset(bc->devname, ' ', sizeof(bc->devname));
strlcpy(bc->devname, name, sizeof(bc->devname));
file = filp_open(name, O_RDWR|O_LARGEFILE, 0);
if (IS_ERR(file)) {
err = PTR_ERR(file);
goto out;
}
mapping = file->f_mapping;
inode = mapping->host;
err = claim_logfile(bc, inode);
if (err)
goto out1;
err = __bcon_create(bc);
if (err)
goto out2;
return err;
out2:
unclaim_logfile(bc, inode);
out1:
filp_close(file, NULL);
out:
kfree(bc);
return err;
}
static int bcon_create(dev_t devt)
{
const fmode_t mode = FMODE_READ | FMODE_WRITE;
struct blockconsole *bc;
int err;
bc = kzalloc(sizeof(*bc), GFP_KERNEL);
if (!bc)
return -ENOMEM;
spin_lock_init(&bc->end_io_lock);
strcpy(bc->console.name, "bcon");
bc->console.flags = CON_PRINTBUFFER | CON_ENABLED | CON_ALLDATA;
bc->console.write = bcon_write;
bc->bdev = blkdev_get_by_dev(devt, mode, NULL);
if (IS_ERR(bc->bdev))
goto out;
memset(bc->devname, ' ', sizeof(bc->devname));
strlcpy(bc->devname, dev_name(part_to_dev(bc->bdev->bd_part)),
sizeof(bc->devname));
bc->max_bytes = bc->bdev->bd_inode->i_size & ~CACHE_MASK;
bc->no_extents = 1;
bc->extent_map[0].ofs = 0;
bc->extent_map[0].len = bc->max_bytes >> SECTOR_SHIFT;
err = __bcon_create(bc);
if (err)
goto out;
return err;
out:
kfree(bc);
/* Not strictly correct, be the caller doesn't care */
return -ENOMEM;
}
struct bcon_candidate {
struct work_struct work;
dev_t devt;
};
/*
* Calling bcon_create directly would cause a deadlock. __blkdev_get will
* take bdev->bd_mutex, which is already held by the partitioning code.
* Hence go through the indirection of a work queue.
*/
static void bcon_do_add(struct work_struct *work)
{
struct bcon_candidate *cand = container_of(work, struct bcon_candidate,
work);
bcon_create(cand->devt);
kfree(cand);
}
void bcon_add(dev_t devt)
{
struct bcon_candidate *cand;
cand = kmalloc(sizeof(cand), GFP_KERNEL);
if (!cand)
return;
cand->devt = devt;
INIT_WORK(&cand->work, bcon_do_add);
schedule_work(&cand->work);
}
module_param_call(device, bcon_add_file, NULL, NULL, 0200);