| /* |
| * mdadm - manage Linux "md" devices aka RAID arrays. |
| * |
| * Copyright (C) 2006 Neil Brown <neilb@suse.de> |
| * |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation; either version 2 of the License, or |
| * (at your option) any later version. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, write to the Free Software |
| * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| * |
| * Author: Neil Brown |
| * Email: <neilb@suse.de> |
| */ |
| |
| #include "mdadm.h" |
| |
| /* To restripe, we read from old geometry to a buffer, and |
| * read from buffer to new geometry. |
| * When reading we don't worry about parity. When writing we do. |
| * |
| */ |
| |
| static int geo_map(int block, unsigned long long stripe, int raid_disks, int level, int layout) |
| { |
| /* On the given stripe, find which disk in the array will have |
| * block numbered 'block'. |
| * '-1' means the parity block. |
| * '-2' means the Q syndrome. |
| */ |
| int pd; |
| |
| switch(level*100 + layout) { |
| case 000: |
| case 400: |
| /* raid 4 isn't messed around by parity blocks */ |
| if (block == -1) |
| return raid_disks-1; /* parity block */ |
| return block; |
| case 500 + ALGORITHM_LEFT_ASYMMETRIC: |
| pd = (raid_disks-1) - stripe % raid_disks; |
| if (block == -1) return pd; |
| if (block >= pd) |
| block++; |
| return block; |
| |
| case 500 + ALGORITHM_RIGHT_ASYMMETRIC: |
| pd = stripe % raid_disks; |
| if (block == -1) return pd; |
| if (block >= pd) |
| block++; |
| return block; |
| |
| case 500 + ALGORITHM_LEFT_SYMMETRIC: |
| pd = (raid_disks - 1) - stripe % raid_disks; |
| if (block == -1) return pd; |
| return (pd + 1 + block) % raid_disks; |
| |
| case 500 + ALGORITHM_RIGHT_SYMMETRIC: |
| pd = stripe % raid_disks; |
| if (block == -1) return pd; |
| return (pd + 1 + block) % raid_disks; |
| |
| case 600 + ALGORITHM_LEFT_ASYMMETRIC: |
| pd = raid_disks - 1 - (stripe % raid_disks); |
| if (block == -1) return pd; |
| if (block == -2) return (pd+1) % raid_disks; |
| if (pd == raid_disks - 1) |
| return block+1; |
| if (block >= pd) |
| return block+2; |
| return block; |
| |
| case 600 + ALGORITHM_RIGHT_ASYMMETRIC: |
| pd = stripe % raid_disks; |
| if (block == -1) return pd; |
| if (block == -2) return (pd+1) % raid_disks; |
| if (pd == raid_disks - 1) |
| return block+1; |
| if (block >= pd) |
| return block+2; |
| return block; |
| |
| case 600 + ALGORITHM_LEFT_SYMMETRIC: |
| pd = raid_disks - 1 - (stripe % raid_disks); |
| if (block == -1) return pd; |
| if (block == -2) return (pd+1) % raid_disks; |
| return (pd + 2 + block) % raid_disks; |
| |
| case 600 + ALGORITHM_RIGHT_SYMMETRIC: |
| pd = stripe % raid_disks; |
| if (block == -1) return pd; |
| if (block == -2) return (pd+1) % raid_disks; |
| return (pd + 2 + block) % raid_disks; |
| } |
| return -1; |
| } |
| |
| |
| static void xor_blocks(char *target, char **sources, int disks, int size) |
| { |
| int i, j; |
| /* Amazingly inefficient... */ |
| for (i=0; i<size; i++) { |
| char c = 0; |
| for (j=0 ; j<disks; j++) |
| c ^= sources[j][i]; |
| target[i] = c; |
| } |
| } |
| |
| static void qsyndrome(char *p, char *q, char **sources, int disks, int size) |
| { |
| int d, z; |
| char wq0, wp0, wd0, w10, w20; |
| for ( d = 0; d < size; d++) { |
| wq0 = wp0 = sources[disks-1][d]; |
| for ( z = disks-2 ; z >= 0 ; z-- ) { |
| wd0 = sources[z][d]; |
| wp0 ^= wd0; |
| w20 = (wq0&0x80) ? 0xff : 0x00; |
| w10 = (wq0 << 1) & 0xff; |
| w20 &= 0x1d; |
| w10 ^= w20; |
| wq0 = w10 ^ wd0; |
| } |
| p[d] = wp0; |
| q[d] = wq0; |
| } |
| } |
| |
| /* Save data: |
| * We are given: |
| * A list of 'fds' of the active disks. For now we require all to be present. |
| * A geometry: raid_disks, chunk_size, level, layout |
| * A list of 'fds' for mirrored targets. They are already seeked to |
| * right (Write) location |
| * A start and length |
| */ |
| |
| int save_stripes(int *source, unsigned long long *offsets, |
| int raid_disks, int chunk_size, int level, int layout, |
| int nwrites, int *dest, |
| unsigned long long start, unsigned long long length) |
| { |
| char buf[8192]; |
| int cpos = start % chunk_size; /* where in chunk we are up to */ |
| int len; |
| int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2); |
| int disk; |
| |
| while (length > 0) { |
| unsigned long long offset; |
| int i; |
| len = chunk_size - cpos; |
| if (len > sizeof(buf)) len = sizeof(buf); |
| if (len > length) len = length; |
| /* len bytes to be moved from one device */ |
| |
| offset = (start/chunk_size/data_disks)*chunk_size + cpos; |
| disk = start/chunk_size % data_disks; |
| disk = geo_map(disk, start/chunk_size/data_disks, |
| raid_disks, level, layout); |
| if (lseek64(source[disk], offsets[disk]+offset, 0) < 0) |
| return -1; |
| if (read(source[disk], buf, len) != len) |
| return -1; |
| for (i=0; i<nwrites; i++) |
| if (write(dest[i], buf, len) != len) |
| return -1; |
| length -= len; |
| start += len; |
| cpos += len; |
| while (cpos >= chunk_size) cpos -= chunk_size; |
| } |
| return 0; |
| } |
| |
| /* Restore data: |
| * We are given: |
| * A list of 'fds' of the active disks. Some may be '-1' for not-available. |
| * A geometry: raid_disks, chunk_size, level, layout |
| * An 'fd' to read from. It is already seeked to the right (Read) location. |
| * A start and length. |
| * The length must be a multiple of the stripe size. |
| * |
| * We build a full stripe in memory and then write it out. |
| * We assume that there are enough working devices. |
| */ |
| int restore_stripes(int *dest, unsigned long long *offsets, |
| int raid_disks, int chunk_size, int level, int layout, |
| int source, unsigned long long read_offset, |
| unsigned long long start, unsigned long long length) |
| { |
| char *stripe_buf = malloc(raid_disks * chunk_size); |
| char **stripes = malloc(raid_disks * sizeof(char*)); |
| char **blocks = malloc(raid_disks * sizeof(char*)); |
| int i; |
| |
| int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2); |
| |
| if (stripe_buf == NULL || stripes == NULL || blocks == NULL) { |
| free(stripe_buf); |
| free(stripes); |
| free(blocks); |
| return -2; |
| } |
| for (i=0; i<raid_disks; i++) |
| stripes[i] = stripe_buf + i * chunk_size; |
| while (length > 0) { |
| int len = data_disks * chunk_size; |
| unsigned long long offset; |
| int disk, qdisk; |
| if (length < len) |
| return -3; |
| for (i=0; i < data_disks; i++) { |
| int disk = geo_map(i, start/chunk_size/data_disks, |
| raid_disks, level, layout); |
| blocks[i] = stripes[disk]; |
| if (lseek64(source, read_offset, 0) != read_offset) |
| return -1; |
| if (read(source, stripes[disk], chunk_size) != chunk_size) |
| return -1; |
| read_offset += chunk_size; |
| } |
| /* We have the data, now do the parity */ |
| offset = (start/chunk_size/data_disks) * chunk_size; |
| switch (level) { |
| case 4: |
| case 5: |
| disk = geo_map(-1, start/chunk_size/data_disks, |
| raid_disks, level, layout); |
| xor_blocks(stripes[disk], blocks, data_disks, chunk_size); |
| break; |
| case 6: |
| disk = geo_map(-1, start/chunk_size/data_disks, |
| raid_disks, level, layout); |
| qdisk = geo_map(-2, start/chunk_size/data_disks, |
| raid_disks, level, layout); |
| |
| qsyndrome(stripes[disk], stripes[qdisk], blocks, |
| data_disks, chunk_size); |
| break; |
| } |
| for (i=0; i < raid_disks ; i++) |
| if (dest[i] >= 0) { |
| if (lseek64(dest[i], offsets[i]+offset, 0) < 0) |
| return -1; |
| if (write(dest[i], stripes[i], chunk_size) != chunk_size) |
| return -1; |
| } |
| length -= len; |
| start += len; |
| } |
| return 0; |
| } |
| |
| #ifdef MAIN |
| |
| int test_stripes(int *source, unsigned long long *offsets, |
| int raid_disks, int chunk_size, int level, int layout, |
| unsigned long long start, unsigned long long length) |
| { |
| /* ready the data and p (and q) blocks, and check we got them right */ |
| char *stripe_buf = malloc(raid_disks * chunk_size); |
| char **stripes = malloc(raid_disks * sizeof(char*)); |
| char **blocks = malloc(raid_disks * sizeof(char*)); |
| char *p = malloc(chunk_size); |
| char *q = malloc(chunk_size); |
| |
| int i; |
| int data_disks = raid_disks - (level == 5 ? 1: 2); |
| for ( i = 0 ; i < raid_disks ; i++) |
| stripes[i] = stripe_buf + i * chunk_size; |
| |
| while (length > 0) { |
| int disk; |
| |
| for (i = 0 ; i < raid_disks ; i++) { |
| lseek64(source[i], offsets[i]+start, 0); |
| read(source[i], stripes[i], chunk_size); |
| } |
| for (i = 0 ; i < data_disks ; i++) { |
| int disk = geo_map(i, start/chunk_size, raid_disks, |
| level, layout); |
| blocks[i] = stripes[disk]; |
| printf("%d->%d\n", i, disk); |
| } |
| switch(level) { |
| case 6: |
| qsyndrome(p, q, blocks, data_disks, chunk_size); |
| disk = geo_map(-1, start/chunk_size, raid_disks, |
| level, layout); |
| if (memcmp(p, stripes[disk], chunk_size) != 0) { |
| printf("P(%d) wrong at %llu\n", disk, |
| start / chunk_size); |
| } |
| disk = geo_map(-2, start/chunk_size, raid_disks, |
| level, layout); |
| if (memcmp(q, stripes[disk], chunk_size) != 0) { |
| printf("Q(%d) wrong at %llu\n", disk, |
| start / chunk_size); |
| } |
| break; |
| } |
| length -= chunk_size; |
| start += chunk_size; |
| } |
| return 0; |
| } |
| |
| unsigned long long getnum(char *str, char **err) |
| { |
| char *e; |
| unsigned long long rv = strtoull(str, &e, 10); |
| if (e==str || *e) { |
| *err = str; |
| return 0; |
| } |
| return rv; |
| } |
| |
| main(int argc, char *argv[]) |
| { |
| /* save/restore file raid_disks chunk_size level layout start length devices... |
| */ |
| int save; |
| int *fds; |
| char *file; |
| int storefd; |
| unsigned long long *offsets; |
| int raid_disks, chunk_size, level, layout; |
| unsigned long long start, length; |
| int i; |
| |
| char *err = NULL; |
| if (argc < 10) { |
| fprintf(stderr, "Usage: test_stripe save/restore file raid_disks" |
| " chunk_size level layout start length devices...\n"); |
| exit(1); |
| } |
| if (strcmp(argv[1], "save")==0) |
| save = 1; |
| else if (strcmp(argv[1], "restore") == 0) |
| save = 0; |
| else if (strcmp(argv[1], "test") == 0) |
| save = 2; |
| else { |
| fprintf(stderr, "test_stripe: must give 'save' or 'restore'.\n"); |
| exit(2); |
| } |
| |
| file = argv[2]; |
| raid_disks = getnum(argv[3], &err); |
| chunk_size = getnum(argv[4], &err); |
| level = getnum(argv[5], &err); |
| layout = getnum(argv[6], &err); |
| start = getnum(argv[7], &err); |
| length = getnum(argv[8], &err); |
| if (err) { |
| fprintf(stderr, "test_stripe: Bad number: %s\n", err); |
| exit(2); |
| } |
| if (argc != raid_disks + 9) { |
| fprintf(stderr, "test_stripe: wrong number of devices: want %d found %d\n", |
| raid_disks, argc-9); |
| exit(2); |
| } |
| fds = malloc(raid_disks * sizeof(*fds)); |
| offsets = malloc(raid_disks * sizeof(*offsets)); |
| memset(offsets, 0, raid_disks * sizeof(*offsets)); |
| |
| storefd = open(file, O_RDWR); |
| if (storefd < 0) { |
| perror(file); |
| fprintf(stderr, "test_stripe: could not open %s.\n", file); |
| exit(3); |
| } |
| for (i=0; i<raid_disks; i++) { |
| fds[i] = open(argv[9+i], O_RDWR); |
| if (fds[i] < 0) { |
| perror(argv[9+i]); |
| fprintf(stderr,"test_stripe: cannot open %s.\n", argv[9+i]); |
| exit(3); |
| } |
| } |
| |
| if (save == 1) { |
| int rv = save_stripes(fds, offsets, |
| raid_disks, chunk_size, level, layout, |
| 1, &storefd, |
| start, length); |
| if (rv != 0) { |
| fprintf(stderr, |
| "test_stripe: save_stripes returned %d\n", rv); |
| exit(1); |
| } |
| } else if (save == 2) { |
| int rv = test_stripes(fds, offsets, |
| raid_disks, chunk_size, level, layout, |
| start, length); |
| if (rv != 0) { |
| fprintf(stderr, |
| "test_stripe: test_stripes returned %d\n", rv); |
| exit(1); |
| } |
| } else { |
| int rv = restore_stripes(fds, offsets, |
| raid_disks, chunk_size, level, layout, |
| storefd, 0ULL, |
| start, length); |
| if (rv != 0) { |
| fprintf(stderr, |
| "test_stripe: restore_stripes returned %d\n", |
| rv); |
| exit(1); |
| } |
| } |
| exit(0); |
| } |
| |
| #endif /* MAIN */ |