| /* |
| * device-process.c: detailed processing of device information sent |
| * from kernel. |
| * |
| * Copyright (c) 2006 The Regents of the University of Michigan. |
| * All rights reserved. |
| * |
| * Andy Adamson <andros@citi.umich.edu> |
| * Fred Isaman <iisaman@umich.edu> |
| * |
| * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> |
| * |
| * Used codes in linux/fs/nfs/blocklayout/blocklayoutdev.c. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include <sys/types.h> |
| #include <sys/stat.h> |
| #include <sys/user.h> |
| #include <arpa/inet.h> |
| #include <linux/kdev_t.h> |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <unistd.h> |
| #include <syslog.h> |
| #include <fcntl.h> |
| #include <errno.h> |
| |
| #include "device-discovery.h" |
| |
| uint32_t *blk_overflow(uint32_t * p, uint32_t * end, size_t nbytes) |
| { |
| uint32_t *q = p + ((nbytes + 3) >> 2); |
| |
| if (q > end || q < p) |
| return NULL; |
| return p; |
| } |
| |
| static int decode_blk_signature(uint32_t **pp, uint32_t * end, |
| struct bl_sig *sig) |
| { |
| int i; |
| uint32_t siglen, *p = *pp; |
| |
| BLK_READBUF(p, end, 4); |
| READ32(sig->si_num_comps); |
| if (sig->si_num_comps == 0) { |
| BL_LOG_ERR("0 components in sig\n"); |
| goto out_err; |
| } |
| if (sig->si_num_comps >= BLOCK_MAX_SIG_COMP) { |
| BL_LOG_ERR("number of sig comps %i >= BLOCK_MAX_SIG_COMP\n", |
| sig->si_num_comps); |
| goto out_err; |
| } |
| for (i = 0; i < sig->si_num_comps; i++) { |
| struct bl_sig_comp *comp = &sig->si_comps[i]; |
| |
| BLK_READBUF(p, end, 12); |
| READ64(comp->bs_offset); |
| READ32(siglen); |
| comp->bs_length = siglen; |
| BLK_READBUF(p, end, siglen); |
| /* Note we rely here on fact that sig is used immediately |
| * for mapping, then thrown away. |
| */ |
| comp->bs_string = (char *)p; |
| p += ((siglen + 3) >> 2); |
| } |
| *pp = p; |
| return 0; |
| out_err: |
| return -EIO; |
| } |
| |
| /* |
| * Read signature from device and compare to sig_comp |
| * return: 0=match, 1=no match, -1=error |
| */ |
| static int |
| read_cmp_blk_sig(struct bl_disk *disk, int fd, struct bl_sig_comp *comp) |
| { |
| const char *dev_name = disk->valid_path->full_path; |
| int ret = -1; |
| ssize_t siglen = comp->bs_length; |
| int64_t bs_offset = comp->bs_offset; |
| char *sig = NULL; |
| |
| sig = (char *)malloc(siglen); |
| if (!sig) { |
| BL_LOG_ERR("%s: Out of memory\n", __func__); |
| goto out; |
| } |
| |
| if (bs_offset < 0) |
| bs_offset += (((int64_t) disk->size) << 9); |
| if (lseek64(fd, bs_offset, SEEK_SET) == -1) { |
| BL_LOG_ERR("File %s lseek error\n", dev_name); |
| goto out; |
| } |
| |
| if (read(fd, sig, siglen) != siglen) { |
| BL_LOG_ERR("File %s read error\n", dev_name); |
| goto out; |
| } |
| |
| ret = memcmp(sig, comp->bs_string, siglen); |
| |
| out: |
| if (sig) |
| free(sig); |
| return ret; |
| } |
| |
| /* |
| * All signatures in sig must be found on disk for verification. |
| * Returns True if sig matches, False otherwise. |
| */ |
| static int verify_sig(struct bl_disk *disk, struct bl_sig *sig) |
| { |
| const char *dev_name = disk->valid_path->full_path; |
| int fd, i, rv; |
| |
| fd = open(dev_name, O_RDONLY | O_LARGEFILE); |
| if (fd < 0) { |
| BL_LOG_ERR("%s: %s could not be opened for read\n", __func__, |
| dev_name); |
| return 0; |
| } |
| |
| rv = 1; |
| |
| for (i = 0; i < sig->si_num_comps; i++) { |
| if (read_cmp_blk_sig(disk, fd, &sig->si_comps[i])) { |
| rv = 0; |
| break; |
| } |
| } |
| |
| if (fd >= 0) |
| close(fd); |
| return rv; |
| } |
| |
| /* |
| * map_sig_to_device() |
| * Given a signature, walk the list of visible disks searching for |
| * a match. Returns True if mapping was done, False otherwise. |
| * |
| * While we're at it, fill in the vol->bv_size. |
| */ |
| static int map_sig_to_device(struct bl_sig *sig, struct bl_volume *vol) |
| { |
| int mapped = 0; |
| struct bl_disk *disk; |
| |
| /* scan disk list to find out match device */ |
| for (disk = visible_disk_list; disk; disk = disk->next) { |
| /* FIXME: should we use better algorithm for disk scan? */ |
| mapped = verify_sig(disk, sig); |
| if (mapped) { |
| BL_LOG_INFO("%s: using device %s\n", |
| __func__, disk->valid_path->full_path); |
| vol->param.bv_dev = disk->dev; |
| vol->bv_size = disk->size; |
| break; |
| } |
| } |
| return mapped; |
| } |
| |
| /* We are given an array of XDR encoded array indices, each of which should |
| * refer to a previously decoded device. Translate into a list of pointers |
| * to the appropriate pnfs_blk_volume's. |
| */ |
| static int set_vol_array(uint32_t **pp, uint32_t *end, |
| struct bl_volume *vols, int working) |
| { |
| int i, index; |
| uint32_t *p = *pp; |
| struct bl_volume **array = vols[working].bv_vols; |
| |
| for (i = 0; i < vols[working].bv_vol_n; i++) { |
| BLK_READBUF(p, end, 4); |
| READ32(index); |
| if ((index < 0) || (index >= working)) { |
| BL_LOG_ERR("set_vol_array: Id %i out of range\n", |
| index); |
| goto out_err; |
| } |
| array[i] = &vols[index]; |
| } |
| *pp = p; |
| return 0; |
| out_err: |
| return -EIO; |
| } |
| |
| static uint64_t sum_subvolume_sizes(struct bl_volume *vol) |
| { |
| int i; |
| uint64_t sum = 0; |
| |
| for (i = 0; i < vol->bv_vol_n; i++) |
| sum += vol->bv_vols[i]->bv_size; |
| return sum; |
| } |
| |
| static int |
| decode_blk_volume(uint32_t **pp, uint32_t *end, struct bl_volume *vols, int voln, |
| int *array_cnt) |
| { |
| int status = 0, j; |
| struct bl_sig sig; |
| uint32_t *p = *pp; |
| struct bl_volume *vol = &vols[voln]; |
| uint64_t tmp; |
| |
| BLK_READBUF(p, end, 4); |
| READ32(vol->bv_type); |
| |
| switch (vol->bv_type) { |
| case BLOCK_VOLUME_SIMPLE: |
| *array_cnt = 0; |
| status = decode_blk_signature(&p, end, &sig); |
| if (status) |
| return status; |
| status = map_sig_to_device(&sig, vol); |
| if (!status) { |
| BL_LOG_ERR("Could not find disk for device\n"); |
| return -ENXIO; |
| } |
| BL_LOG_INFO("%s: simple %d\n", __func__, voln); |
| status = 0; |
| break; |
| case BLOCK_VOLUME_SLICE: |
| BLK_READBUF(p, end, 16); |
| READ_SECTOR(vol->param.bv_offset); |
| READ_SECTOR(vol->bv_size); |
| *array_cnt = vol->bv_vol_n = 1; |
| BL_LOG_INFO("%s: slice %d\n", __func__, voln); |
| status = set_vol_array(&p, end, vols, voln); |
| break; |
| case BLOCK_VOLUME_STRIPE: |
| BLK_READBUF(p, end, 8); |
| READ_SECTOR(vol->param.bv_stripe_unit); |
| off_t stripe_unit = vol->param.bv_stripe_unit; |
| /* Check limitations imposed by device-mapper */ |
| if ((stripe_unit & (stripe_unit - 1)) != 0 |
| || stripe_unit < (off_t) (sysconf(_SC_PAGE_SIZE) >> 9)) |
| return -EIO; |
| BLK_READBUF(p, end, 4); |
| READ32(vol->bv_vol_n); |
| if (!vol->bv_vol_n) |
| return -EIO; |
| *array_cnt = vol->bv_vol_n; |
| BL_LOG_INFO("%s: stripe %d nvols=%d unit=%ld\n", __func__, voln, |
| vol->bv_vol_n, (long)stripe_unit); |
| status = set_vol_array(&p, end, vols, voln); |
| if (status) |
| return status; |
| for (j = 1; j < vol->bv_vol_n; j++) { |
| if (vol->bv_vols[j]->bv_size != |
| vol->bv_vols[0]->bv_size) { |
| BL_LOG_ERR("varying subvol size\n"); |
| return -EIO; |
| } |
| } |
| vol->bv_size = vol->bv_vols[0]->bv_size * vol->bv_vol_n; |
| break; |
| case BLOCK_VOLUME_CONCAT: |
| BLK_READBUF(p, end, 4); |
| READ32(vol->bv_vol_n); |
| if (!vol->bv_vol_n) |
| return -EIO; |
| *array_cnt = vol->bv_vol_n; |
| BL_LOG_INFO("%s: concat %d %d\n", __func__, voln, |
| vol->bv_vol_n); |
| status = set_vol_array(&p, end, vols, voln); |
| if (status) |
| return status; |
| vol->bv_size = sum_subvolume_sizes(vol); |
| break; |
| default: |
| BL_LOG_ERR("Unknown volume type %i\n", vol->bv_type); |
| out_err: |
| return -EIO; |
| } |
| *pp = p; |
| return status; |
| } |
| |
| uint64_t process_deviceinfo(const char *dev_addr_buf, |
| unsigned int dev_addr_len, |
| uint32_t *major, uint32_t *minor) |
| { |
| int num_vols, i, status, count; |
| uint32_t *p, *end; |
| struct bl_volume *vols = NULL, **arrays = NULL, **arrays_ptr = NULL; |
| uint64_t dev = 0; |
| |
| p = (uint32_t *) dev_addr_buf; |
| end = (uint32_t *) ((char *)p + dev_addr_len); |
| |
| /* Decode block volume */ |
| BLK_READBUF(p, end, 4); |
| READ32(num_vols); |
| BL_LOG_INFO("%s: %d vols\n", __func__, num_vols); |
| if (num_vols <= 0) |
| goto out_err; |
| |
| vols = (struct bl_volume *)malloc(num_vols * sizeof(struct bl_volume)); |
| if (!vols) { |
| BL_LOG_ERR("%s: Out of memory\n", __func__); |
| goto out_err; |
| } |
| |
| /* Each volume in vols array needs its own array. Save time by |
| * allocating them all in one large hunk. Because each volume |
| * array can only reference previous volumes, and because once |
| * a concat or stripe references a volume, it may never be |
| * referenced again, the volume arrays are guaranteed to fit |
| * in the suprisingly small space allocated. |
| */ |
| arrays_ptr = arrays = |
| (struct bl_volume **)malloc(num_vols * 2 * |
| sizeof(struct bl_volume *)); |
| if (!arrays) { |
| BL_LOG_ERR("%s: Out of memory\n", __func__); |
| goto out_err; |
| } |
| |
| for (i = 0; i < num_vols; i++) { |
| vols[i].bv_vols = arrays_ptr; |
| status = decode_blk_volume(&p, end, vols, i, &count); |
| if (status) |
| goto out_err; |
| arrays_ptr += count; |
| } |
| |
| if (p != end) { |
| BL_LOG_ERR("p is not equal to end!\n"); |
| goto out_err; |
| } |
| |
| dev = dm_device_create(vols, num_vols); |
| if (dev) { |
| *major = MAJOR(dev); |
| *minor = MINOR(dev); |
| } |
| |
| out_err: |
| if (vols) |
| free(vols); |
| if (arrays) |
| free(arrays); |
| return dev; |
| } |