blob: 8ff276a78f7aeed8ee0edab1687bd5af17ee2d6a [file] [log] [blame]
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/cdrom.h>
#include <linux/hdreg.h>
#include <scsi/scsi.h>
#include <scsi/scsi_ioctl.h>
#include "blktap.h"
int blktap_device_major;
#define dev_to_blktap(_dev) container_of(_dev, struct blktap, device)
static int
blktap_device_open(struct block_device *bdev, fmode_t mode)
{
struct gendisk *disk = bdev->bd_disk;
struct blktap_device *tapdev = disk->private_data;
if (!tapdev)
return -ENXIO;
/* NB. we might have bounced a bd trylock by tapdisk. when
* failing for reasons not !tapdev, make sure to kick tapdisk
* out of destroy wait state again. */
return 0;
}
static int
blktap_device_release(struct gendisk *disk, fmode_t mode)
{
struct blktap_device *tapdev = disk->private_data;
struct block_device *bdev = bdget_disk(disk, 0);
struct blktap *tap = dev_to_blktap(tapdev);
bdput(bdev);
if (!bdev->bd_openers) {
set_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse);
blktap_ring_kick_user(tap);
}
return 0;
}
static int
blktap_device_getgeo(struct block_device *bd, struct hd_geometry *hg)
{
/* We don't have real geometry info, but let's at least return
values consistent with the size of the device */
sector_t nsect = get_capacity(bd->bd_disk);
sector_t cylinders = nsect;
hg->heads = 0xff;
hg->sectors = 0x3f;
sector_div(cylinders, hg->heads * hg->sectors);
hg->cylinders = cylinders;
if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
hg->cylinders = 0xffff;
return 0;
}
static int
blktap_device_ioctl(struct block_device *bd, fmode_t mode,
unsigned command, unsigned long argument)
{
int i;
switch (command) {
case CDROMMULTISESSION:
BTDBG("FIXME: support multisession CDs later\n");
for (i = 0; i < sizeof(struct cdrom_multisession); i++)
if (put_user(0, (char __user *)(argument + i)))
return -EFAULT;
return 0;
case SCSI_IOCTL_GET_IDLUN:
if (!access_ok(VERIFY_WRITE, argument,
sizeof(struct scsi_idlun)))
return -EFAULT;
/* return 0 for now. */
__put_user(0, &((struct scsi_idlun __user *)argument)->dev_id);
__put_user(0,
&((struct scsi_idlun __user *)argument)->host_unique_id);
return 0;
default:
/*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
command);*/
return -EINVAL; /* same return as native Linux */
}
return 0;
}
static struct block_device_operations blktap_device_file_operations = {
.owner = THIS_MODULE,
.open = blktap_device_open,
.release = blktap_device_release,
.ioctl = blktap_device_ioctl,
.getgeo = blktap_device_getgeo
};
/* NB. __blktap holding the queue lock; blktap where unlocked */
static inline struct request*
__blktap_next_queued_rq(struct request_queue *q)
{
return blk_peek_request(q);
}
static inline void
__blktap_dequeue_rq(struct request *rq)
{
blk_start_request(rq);
}
/* NB. err == 0 indicates success, failures < 0 */
static inline void
__blktap_end_queued_rq(struct request *rq, int err)
{
blk_start_request(rq);
__blk_end_request(rq, err, blk_rq_bytes(rq));
}
static inline void
__blktap_end_rq(struct request *rq, int err)
{
__blk_end_request(rq, err, blk_rq_bytes(rq));
}
static inline void
blktap_end_rq(struct request *rq, int err)
{
struct request_queue *q = rq->q;
spin_lock_irq(q->queue_lock);
__blktap_end_rq(rq, err);
spin_unlock_irq(q->queue_lock);
}
void
blktap_device_end_request(struct blktap *tap,
struct blktap_request *request,
int error)
{
struct blktap_device *tapdev = &tap->device;
struct request *rq = request->rq;
blktap_ring_unmap_request(tap, request);
blktap_ring_free_request(tap, request);
dev_dbg(disk_to_dev(tapdev->gd),
"end_request: op=%d error=%d bytes=%d\n",
rq_data_dir(rq), error, blk_rq_bytes(rq));
blktap_end_rq(rq, error);
}
int
blktap_device_make_request(struct blktap *tap, struct request *rq)
{
struct blktap_device *tapdev = &tap->device;
struct blktap_request *request;
int write, nsegs;
int err;
request = blktap_ring_make_request(tap);
if (IS_ERR(request)) {
err = PTR_ERR(request);
request = NULL;
if (err == -ENOSPC || err == -ENOMEM)
goto stop;
goto fail;
}
write = rq_data_dir(rq) == WRITE;
nsegs = blk_rq_map_sg(rq->q, rq, request->sg_table);
dev_dbg(disk_to_dev(tapdev->gd),
"make_request: op=%c bytes=%d nsegs=%d\n",
write ? 'w' : 'r', blk_rq_bytes(rq), nsegs);
request->rq = rq;
request->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
err = blktap_request_get_pages(tap, request, nsegs);
if (err)
goto stop;
err = blktap_ring_map_request(tap, request);
if (err)
goto fail;
blktap_ring_submit_request(tap, request);
return 0;
stop:
tap->stats.st_oo_req++;
err = -EBUSY;
_out:
if (request)
blktap_ring_free_request(tap, request);
return err;
fail:
if (printk_ratelimit())
dev_warn(disk_to_dev(tapdev->gd),
"make request: %d, failing\n", err);
goto _out;
}
/*
* called from tapdisk context
*/
void
blktap_device_run_queue(struct blktap *tap)
{
struct blktap_device *tapdev = &tap->device;
struct request_queue *q;
struct request *rq;
int err;
if (!tapdev->gd)
return;
q = tapdev->gd->queue;
spin_lock_irq(&tapdev->lock);
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
do {
rq = __blktap_next_queued_rq(q);
if (!rq)
break;
if (!blk_fs_request(rq)) {
__blktap_end_queued_rq(rq, -EOPNOTSUPP);
continue;
}
spin_unlock_irq(&tapdev->lock);
err = blktap_device_make_request(tap, rq);
spin_lock_irq(&tapdev->lock);
if (err == -EBUSY) {
blk_stop_queue(q);
break;
}
__blktap_dequeue_rq(rq);
if (unlikely(err))
__blktap_end_rq(rq, err);
} while (1);
spin_unlock_irq(&tapdev->lock);
}
static void
blktap_device_do_request(struct request_queue *rq)
{
struct blktap_device *tapdev = rq->queuedata;
struct blktap *tap = dev_to_blktap(tapdev);
blktap_ring_kick_user(tap);
}
static void
blktap_device_configure(struct blktap *tap,
struct blktap_params *params)
{
struct request_queue *rq;
struct blktap_device *dev = &tap->device;
dev = &tap->device;
rq = dev->gd->queue;
spin_lock_irq(&dev->lock);
set_capacity(dev->gd, params->capacity);
/* Hard sector size and max sectors impersonate the equiv. hardware. */
blk_queue_logical_block_size(rq, params->sector_size);
blk_queue_max_sectors(rq, 512);
/* Each segment in a request is up to an aligned page in size. */
blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
blk_queue_max_segment_size(rq, PAGE_SIZE);
/* Ensure a merged request will fit in a single I/O ring slot. */
blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
/* Make sure buffer addresses are sector-aligned. */
blk_queue_dma_alignment(rq, 511);
/* We are reordering, but cacheless. */
blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN, NULL);
spin_unlock_irq(&dev->lock);
}
static int
blktap_device_validate_params(struct blktap *tap,
struct blktap_params *params)
{
struct device *dev = tap->ring.dev;
int sector_order, name_sz;
sector_order = ffs(params->sector_size) - 1;
if (sector_order < 9 ||
sector_order > 12 ||
params->sector_size != 1U<<sector_order)
goto fail;
if (!params->capacity ||
(params->capacity > ULLONG_MAX >> sector_order))
goto fail;
name_sz = min(sizeof(params->name), sizeof(tap->name));
if (strnlen(params->name, name_sz) >= name_sz)
goto fail;
return 0;
fail:
params->name[name_sz-1] = 0;
dev_err(dev, "capacity: %llu, sector-size: %lu, name: %s\n",
params->capacity, params->sector_size, params->name);
return -EINVAL;
}
int
blktap_device_destroy(struct blktap *tap)
{
struct blktap_device *tapdev = &tap->device;
struct block_device *bdev;
struct gendisk *gd;
int err;
gd = tapdev->gd;
if (!gd)
return 0;
bdev = bdget_disk(gd, 0);
err = !mutex_trylock(&bdev->bd_mutex);
if (err) {
/* NB. avoid a deadlock. the last opener syncs the
* bdev holding bd_mutex. */
err = -EBUSY;
goto out_nolock;
}
if (bdev->bd_openers) {
err = -EBUSY;
goto out;
}
del_gendisk(gd);
gd->private_data = NULL;
blk_cleanup_queue(gd->queue);
put_disk(gd);
tapdev->gd = NULL;
clear_bit(BLKTAP_DEVICE, &tap->dev_inuse);
err = 0;
out:
mutex_unlock(&bdev->bd_mutex);
out_nolock:
bdput(bdev);
return err;
}
static void
blktap_device_fail_queue(struct blktap *tap)
{
struct blktap_device *tapdev = &tap->device;
struct request_queue *q = tapdev->gd->queue;
spin_lock_irq(&tapdev->lock);
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
do {
struct request *rq = __blktap_next_queued_rq(q);
if (!rq)
break;
__blktap_end_queued_rq(rq, -EIO);
} while (1);
spin_unlock_irq(&tapdev->lock);
}
static int
blktap_device_try_destroy(struct blktap *tap)
{
int err;
err = blktap_device_destroy(tap);
if (err)
blktap_device_fail_queue(tap);
return err;
}
void
blktap_device_destroy_sync(struct blktap *tap)
{
wait_event(tap->ring.poll_wait,
!blktap_device_try_destroy(tap));
}
int
blktap_device_create(struct blktap *tap, struct blktap_params *params)
{
int minor, err;
struct gendisk *gd;
struct request_queue *rq;
struct blktap_device *tapdev;
gd = NULL;
rq = NULL;
tapdev = &tap->device;
minor = tap->minor;
if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
return -EEXIST;
if (blktap_device_validate_params(tap, params))
return -EINVAL;
gd = alloc_disk(1);
if (!gd) {
err = -ENOMEM;
goto fail;
}
if (minor < 26) {
sprintf(gd->disk_name, "td%c", 'a' + minor % 26);
} else if (minor < (26 + 1) * 26) {
sprintf(gd->disk_name, "td%c%c",
'a' + minor / 26 - 1,'a' + minor % 26);
} else {
const unsigned int m1 = (minor / 26 - 1) / 26 - 1;
const unsigned int m2 = (minor / 26 - 1) % 26;
const unsigned int m3 = minor % 26;
sprintf(gd->disk_name, "td%c%c%c",
'a' + m1, 'a' + m2, 'a' + m3);
}
gd->major = blktap_device_major;
gd->first_minor = minor;
gd->fops = &blktap_device_file_operations;
gd->private_data = tapdev;
spin_lock_init(&tapdev->lock);
rq = blk_init_queue(blktap_device_do_request, &tapdev->lock);
if (!rq) {
err = -ENOMEM;
goto fail;
}
elevator_init(rq, "noop");
gd->queue = rq;
rq->queuedata = tapdev;
tapdev->gd = gd;
blktap_device_configure(tap, params);
add_disk(gd);
if (params->name[0])
strncpy(tap->name, params->name, sizeof(tap->name)-1);
set_bit(BLKTAP_DEVICE, &tap->dev_inuse);
dev_info(disk_to_dev(gd), "sector-size: %u capacity: %llu\n",
queue_logical_block_size(rq),
(unsigned long long)get_capacity(gd));
return 0;
fail:
if (gd)
del_gendisk(gd);
if (rq)
blk_cleanup_queue(rq);
return err;
}
size_t
blktap_device_debug(struct blktap *tap, char *buf, size_t size)
{
struct gendisk *disk = tap->device.gd;
struct request_queue *q;
struct block_device *bdev;
char *s = buf, *end = buf + size;
if (!disk)
return 0;
q = disk->queue;
s += snprintf(s, end - s,
"disk capacity:%llu sector size:%u\n",
(unsigned long long)get_capacity(disk),
queue_logical_block_size(q));
s += snprintf(s, end - s,
"queue flags:%#lx plugged:%d stopped:%d empty:%d\n",
q->queue_flags,
blk_queue_plugged(q), blk_queue_stopped(q),
elv_queue_empty(q));
bdev = bdget_disk(disk, 0);
if (bdev) {
s += snprintf(s, end - s,
"bdev openers:%d closed:%d\n",
bdev->bd_openers,
test_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse));
bdput(bdev);
}
return s - buf;
}
int __init
blktap_device_init()
{
int major;
/* Dynamically allocate a major for this device */
major = register_blkdev(0, "tapdev");
if (major < 0) {
BTERR("Couldn't register blktap device\n");
return -ENOMEM;
}
blktap_device_major = major;
BTINFO("blktap device major %d\n", major);
return 0;
}
void
blktap_device_exit(void)
{
if (blktap_device_major)
unregister_blkdev(blktap_device_major, "tapdev");
}