blob: ee20d54ab97eb1b6763674e35d6a82b53e7457a0 [file] [log] [blame]
/*
* dm-device.c: create or remove device via device mapper API.
*
* Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/types.h>
#include <sys/stat.h>
#include <linux/kdev_t.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <syslog.h>
#include <fcntl.h>
#include <errno.h>
#include <libdevmapper.h>
#include "device-discovery.h"
#define DM_DEV_NAME_LEN 256
#ifndef DM_MAX_TYPE_NAME
#define DM_MAX_TYPE_NAME 16
#endif
#define DM_PARAMS_LEN 512 /* XXX: is this enough for target? */
#define TYPE_HAS_DEV(type) ((type == BLOCK_VOLUME_SIMPLE) || \
(type == BLOCK_VOLUME_PSEUDO))
struct bl_dm_table {
uint64_t offset;
uint64_t size;
char target_type[DM_MAX_TYPE_NAME];
char params[DM_PARAMS_LEN];
struct bl_dm_table *next;
};
struct bl_dm_tree {
uint64_t dev;
struct dm_tree *tree;
struct bl_dm_tree *next;
};
static const char dm_name[] = "pnfs_vol_%u";
static unsigned int dev_count;
static inline struct bl_dm_table *bl_dm_table_alloc(void)
{
return (struct bl_dm_table *)calloc(1, sizeof(struct bl_dm_table));
}
static void bl_dm_table_free(struct bl_dm_table *bl_table_head)
{
struct bl_dm_table *p;
while (bl_table_head) {
p = bl_table_head->next;
free(bl_table_head);
bl_table_head = p;
}
}
static void add_to_bl_dm_table(struct bl_dm_table **bl_table_head,
struct bl_dm_table *table)
{
struct bl_dm_table *p;
if (!*bl_table_head) {
*bl_table_head = table;
return;
}
p = *bl_table_head;
while (p->next)
p = p->next;
p->next = table;
}
struct bl_dm_tree *bl_tree_head;
static struct bl_dm_tree *find_bl_dm_tree(uint64_t dev)
{
struct bl_dm_tree *p;
for (p = bl_tree_head; p; p = p->next) {
if (p->dev == dev)
break;
}
return p;
}
static void del_from_bl_dm_tree(uint64_t dev)
{
struct bl_dm_tree *p, *pre = bl_tree_head;
for (p = pre; p; p = p->next) {
if (p->dev == dev) {
pre->next = p->next;
if (p == bl_tree_head)
bl_tree_head = bl_tree_head->next;
free(p);
break;
}
pre = p;
}
}
static void add_to_bl_dm_tree(struct bl_dm_tree *tree)
{
struct bl_dm_tree *p;
if (!bl_tree_head) {
bl_tree_head = tree;
return;
}
p = bl_tree_head;
while (p->next)
p = p->next;
p->next = tree;
return;
}
/*
* Create device via device mapper
* return 0 when creation failed
* return dev no for created device
*/
static uint64_t
dm_device_create_mapped(const char *dev_name, struct bl_dm_table *p)
{
struct dm_task *dmt;
struct dm_info dminfo;
int ret = 0;
dmt = dm_task_create(DM_DEVICE_CREATE);
if (!dmt) {
BL_LOG_ERR("Create dm_task for %s failed\n", dev_name);
return 0;
}
ret = dm_task_set_name(dmt, dev_name);
if (!ret)
goto err_out;
while (p) {
ret =
dm_task_add_target(dmt, p->offset, p->size, p->target_type,
p->params);
if (!ret)
goto err_out;
p = p->next;
}
ret = dm_task_run(dmt) && dm_task_get_info(dmt, &dminfo)
&& dminfo.exists;
if (!ret)
goto err_out;
dm_task_update_nodes();
err_out:
dm_task_destroy(dmt);
if (!ret) {
BL_LOG_ERR("Create device %s failed\n", dev_name);
return 0;
}
return MKDEV(dminfo.major, dminfo.minor);
}
static int dm_device_remove_byname(const char *dev_name)
{
struct dm_task *dmt;
int ret = 0;
BL_LOG_INFO("%s: %s\n", __func__, dev_name);
dmt = dm_task_create(DM_DEVICE_REMOVE);
if (!dmt)
return 0;
ret = dm_task_set_name(dmt, dev_name) && dm_task_run(dmt);
dm_task_update_nodes();
dm_task_destroy(dmt);
return ret;
}
static int dm_device_remove(uint64_t dev)
{
struct dm_task *dmt;
struct dm_names *dmnames;
char *name = NULL;
int ret = 0;
/* Look for dev_name via dev, if dev_name could be transferred here,
we could jump to DM_DEVICE_REMOVE directly */
dmt = dm_task_create(DM_DEVICE_LIST);
if (!dmt) {
BL_LOG_ERR("dm_task creation failed\n");
goto out;
}
ret = dm_task_run(dmt);
if (!ret) {
BL_LOG_ERR("dm_task_run failed\n");
goto out;
}
dmnames = dm_task_get_names(dmt);
if (!dmnames || !dmnames->dev) {
BL_LOG_ERR("dm_task_get_names failed\n");
goto out;
}
while (dmnames) {
if (dmnames->dev == dev) {
name = strdup(dmnames->name);
break;
}
dmnames = (void *)dmnames + dmnames->next;
}
if (!name) {
BL_LOG_ERR("Could not find device\n");
goto out;
}
dm_task_update_nodes();
out:
if (dmt)
dm_task_destroy(dmt);
/* Start to remove device */
if (name) {
ret = dm_device_remove_byname(name);
free(name);
}
return ret;
}
static void dm_devicelist_remove(unsigned int start, unsigned int end)
{
char dev_name[DM_DEV_NAME_LEN];
unsigned int count;
if (start >= dev_count || end <= 1 || start >= end - 1)
return;
for (count = end - 1; count > start; count--) {
snprintf(dev_name, sizeof dev_name, dm_name, count - 1);
dm_device_remove_byname(dev_name);
}
return;
}
static void bl_dm_remove_tree(uint64_t dev)
{
struct bl_dm_tree *p;
p = find_bl_dm_tree(dev);
if (!p)
return;
dm_tree_free(p->tree);
del_from_bl_dm_tree(dev);
}
static int bl_dm_create_tree(uint64_t dev)
{
struct dm_tree *tree;
struct bl_dm_tree *bl_tree;
bl_tree = find_bl_dm_tree(dev);
if (bl_tree)
return 1;
tree = dm_tree_create();
if (!tree)
return 0;
if (!dm_tree_add_dev(tree, MAJOR(dev), MINOR(dev))) {
dm_tree_free(tree);
return 0;
}
bl_tree = malloc(sizeof(struct bl_dm_tree));
if (!bl_tree) {
dm_tree_free(tree);
return 0;
}
bl_tree->dev = dev;
bl_tree->tree = tree;
bl_tree->next = NULL;
add_to_bl_dm_tree(bl_tree);
return 1;
}
int dm_device_remove_all(uint64_t *dev)
{
struct bl_dm_tree *p;
struct dm_tree_node *node;
const char *uuid;
int ret = 0;
uint32_t major, minor;
uint64_t bl_dev;
memcpy(&major, dev, sizeof(uint32_t));
memcpy(&minor, (void *)dev + sizeof(uint32_t), sizeof(uint32_t));
bl_dev = MKDEV(major, minor);
p = find_bl_dm_tree(bl_dev);
if (!p)
return ret;
node = dm_tree_find_node(p->tree, MAJOR(bl_dev), MINOR(bl_dev));
if (!node)
return ret;
uuid = dm_tree_node_get_uuid(node);
if (!uuid)
return ret;
dm_device_remove(bl_dev);
ret = dm_tree_deactivate_children(node, uuid, strlen(uuid));
dm_task_update_nodes();
bl_dm_remove_tree(bl_dev);
return ret;
}
static int dm_device_exists(char *dev_name)
{
char fullname[DM_DEV_NAME_LEN];
snprintf(fullname, sizeof fullname, "/dev/mapper/%s", dev_name);
return (access(fullname, F_OK) >= 0);
}
/* TODO: check the value for DM_DEV_NAME_LEN, DM_TYPE_LEN, DM_PARAMS_LEN */
uint64_t dm_device_create(struct bl_volume *vols, int num_vols)
{
uint64_t size, stripe_unit, dev = 0;
unsigned int count = dev_count;
int volnum, i, pos;
struct bl_volume *node;
char *tmp;
struct bl_dm_table *table = NULL;
struct bl_dm_table *bl_table_head = NULL;
unsigned int len;
char *dev_name = NULL;
/* Create pseudo device here */
for (volnum = 0; volnum < num_vols; volnum++) {
node = &vols[volnum];
switch (node->bv_type) {
case BLOCK_VOLUME_SIMPLE:
/* Do not need to create device here */
dev = node->param.bv_dev;
goto continued;
case BLOCK_VOLUME_SLICE:
table = bl_dm_table_alloc();
if (!table)
goto out;
table->offset = 0;
table->size = node->bv_size;
strcpy(table->target_type, "linear");
if (!TYPE_HAS_DEV(node->bv_vols[0]->bv_type)) {
free(table);
goto out;
}
dev = node->bv_vols[0]->param.bv_dev;
tmp = table->params;
BL_LOG_INFO("%s: major %llu minor %llu", __func__,
(long long unsigned)MAJOR(dev),
(long long unsigned)MINOR(dev));
if (!dm_format_dev(tmp, DM_PARAMS_LEN,
MAJOR(dev), MINOR(dev))) {
free(table);
goto out;
}
tmp += strlen(tmp);
sprintf(tmp, " %llu",
(long long unsigned)node->param.bv_offset);
add_to_bl_dm_table(&bl_table_head, table);
break;
case BLOCK_VOLUME_STRIPE:
table = bl_dm_table_alloc();
if (!table)
goto out;
table->offset = 0;
/* Truncate size to a stripe unit boundary */
stripe_unit = node->param.bv_stripe_unit;
table->size =
node->bv_size - (node->bv_size % stripe_unit);
strcpy(table->target_type, "striped");
sprintf(table->params, "%d %llu %n", node->bv_vol_n,
(long long unsigned) stripe_unit, &pos);
/* Copy subdev major:minor to params */
tmp = table->params + pos;
len = DM_PARAMS_LEN - pos;
for (i = 0; i < node->bv_vol_n; i++) {
if (!TYPE_HAS_DEV(node->bv_vols[i]->bv_type)) {
free(table);
goto out;
}
dev = node->bv_vols[i]->param.bv_dev;
if (!dm_format_dev(tmp, len, MAJOR(dev),
MINOR(dev))) {
free(table);
goto out;
}
pos = strlen(tmp);
tmp += pos;
len -= pos;
sprintf(tmp, " %d ", 0);
tmp += 3;
len -= 3;
}
add_to_bl_dm_table(&bl_table_head, table);
break;
case BLOCK_VOLUME_CONCAT:
size = 0;
for (i = 0; i < node->bv_vol_n; i++) {
table = bl_dm_table_alloc();
if (!table)
goto out;
table->offset = size;
table->size = node->bv_vols[i]->bv_size;
if (!TYPE_HAS_DEV(node->bv_vols[i]->bv_type)) {
free(table);
goto out;
}
strcpy(table->target_type, "linear");
tmp = table->params;
dev = node->bv_vols[i]->param.bv_dev;
BL_LOG_INFO("%s: major %lu minor %lu", __func__,
(long unsigned int)MAJOR(dev),
(long unsigned int)MINOR(dev));
if (!dm_format_dev(tmp, DM_PARAMS_LEN,
MAJOR(dev), MINOR(dev))) {
free(table);
goto out;
}
tmp += strlen(tmp);
sprintf(tmp, " %d", 0);
size += table->size;
add_to_bl_dm_table(&bl_table_head, table);
}
break;
default:
/* Delete previous temporary devices */
dm_devicelist_remove(count, dev_count);
goto out;
} /* end of swtich */
/* Create dev_name here. Name of device is pnfs_vol_XXX */
if (dev_name)
free(dev_name);
dev_name = (char *)calloc(DM_DEV_NAME_LEN, sizeof(char));
if (!dev_name) {
BL_LOG_ERR("%s: Out of memory\n", __func__);
goto out;
}
do {
snprintf(dev_name, DM_DEV_NAME_LEN, dm_name,
dev_count++);
} while (dm_device_exists(dev_name));
dev = dm_device_create_mapped(dev_name, bl_table_head);
BL_LOG_INFO("%s: %d %s %d:%d\n", __func__, volnum, dev_name,
(int) MAJOR(dev), (int) MINOR(dev));
if (!dev) {
/* Delete previous temporary devices */
dm_devicelist_remove(count, dev_count);
goto out;
}
node->param.bv_dev = dev;
/* TODO: extend use with PSEUDO later */
node->bv_type = BLOCK_VOLUME_PSEUDO;
continued:
if (bl_table_head)
bl_dm_table_free(bl_table_head);
bl_table_head = NULL;
}
out:
if (bl_table_head) {
bl_dm_table_free(bl_table_head);
bl_table_head = NULL;
}
if (dev)
bl_dm_create_tree(dev);
if (dev_name)
free(dev_name);
return dev;
}