| /* |
| * Copyright (c) 2000-2001 Silicon Graphics, Inc. |
| * All Rights Reserved. |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU General Public License as |
| * published by the Free Software Foundation. |
| * |
| * This program is distributed in the hope that it would be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, write the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
| */ |
| |
| #include <xfs/xfs.h> |
| #include <xfs/jdm.h> |
| |
| #include <unistd.h> |
| #include <stdlib.h> |
| #include <sys/mman.h> |
| #include <errno.h> |
| #include <memory.h> |
| #include <limits.h> |
| #include <assert.h> |
| |
| #include "config.h" |
| |
| #include "types.h" |
| #include "mlog.h" |
| #include "win.h" |
| #include "node.h" |
| #include "mmap.h" |
| |
| extern size_t pgsz; |
| extern size_t pgmask; |
| |
| /* node handle limits |
| */ |
| #ifdef NODECHK |
| |
| /* macros for manipulating node handles when handle consistency |
| * checking is enabled. the upper bits of a handle will be loaded |
| * with the node gen count, described below. this should not be |
| * used for production code, it cuts into the number of dirents |
| * that xfsrestore can handle. |
| */ |
| #define HDLGENCNT 4 |
| #define HDLGENSHIFT (NBBY * sizeof (nh_t) - HDLGENCNT) |
| #define HDLGENLOMASK ((1 << HDLGENCNT) - 1) |
| #define HDLGENMASK (HDLGENLOMASK << HDLGENSHIFT) |
| #define HDLMASK ((1 << HDLGENSHIFT) - 1) |
| #define HDLGETGEN(h) ((u_char_t) \ |
| (((int)h >> HDLGENSHIFT) \ |
| & \ |
| HDLGENLOMASK)) |
| #define HDLGETNHDL(h) ((nh_t)((int)h & HDLMASK)) |
| #define HDLMKHDL(g, n) ((nh_t)((((int)g << HDLGENSHIFT)\ |
| & \ |
| HDLGENMASK) \ |
| | \ |
| ((int)n & HDLMASK))) |
| #define NH_MAX (HDLMASK) |
| |
| /* the housekeeping byte of each node will hold two check fields: |
| * a gen count, initialized to 0 and incremented each time a node |
| * is allocated, to catch re-use of stale handles; and unique pattern, to |
| * differentiate a valid node from random memory. two unique patterns will |
| * be used; one when the node is on the free list, another when it is |
| * allocated. this allows me to detect use of handles to nodes which have |
| * be freed. |
| */ |
| #define HKPGENCNT HDLGENCNT |
| #define HKPGENSHIFT (NBBY - HKPGENCNT) |
| #define HKPGENLOMASK ((1 << HKPGENCNT) - 1) |
| #define HKPGENMASK (HKPGENLOMASK << HKPGENSHIFT) |
| #define HKPUNQCNT (NBBY - HKPGENCNT) |
| #define HKPUNQMASK ((1 << HKPUNQCNT) - 1) |
| #define HKPGETGEN(b) ((u_char_t) \ |
| (((int)b >> HKPGENSHIFT) \ |
| & \ |
| HKPGENLOMASK)) |
| #define HKPGETUNQ(b) ((u_char_t)((int)b & HKPUNQMASK)) |
| #define HKPMKHKP(g, u) ((u_char_t) \ |
| ((((int)g << HKPGENSHIFT) \ |
| & \ |
| HKPGENMASK) \ |
| | \ |
| ((int)u & HKPUNQMASK))) |
| |
| /* simple patterns for detecting a node |
| */ |
| #define NODEUNQFREE 0x9 |
| #define NODEUNQALCD 0x6 |
| |
| #else /* NODECHK */ |
| |
| #define NH_MAX (NH_NULL - 1) |
| |
| #endif /* NODECHK */ |
| |
| /* window constraints |
| */ |
| #define NODESPERSEG_MIN 1048576 |
| #define WINMAP_MIN 4 |
| |
| /* reserve the firstpage for a header to save persistent context |
| */ |
| #define NODE_HDRSZ pgsz |
| |
| typedef int relnix_t; |
| |
| struct node_hdr { |
| size_t nh_nodesz; |
| /* internal node size - user may see something smaller |
| */ |
| ix_t nh_nodehkix; |
| /* index of byte in each node the user has reserved |
| * for use by me |
| */ |
| nh_t nh_nodesperseg; |
| /* an integral number of internal nodes must fit into a |
| * segment |
| */ |
| size64_t nh_segsz; |
| /* the backing store is partitoned into segment, which |
| * can be mapped into VM windows by the win abstraction |
| */ |
| size_t nh_winmapmax; |
| /* maximum number of windows which can be mapped |
| */ |
| size_t nh_nodealignsz; |
| /* user's constraint on node alignment |
| */ |
| nh_t nh_freenh; |
| /* handle of first node of singly-linked list of free nodes |
| */ |
| off64_t nh_firstsegoff; |
| /* offset into backing store of the first segment |
| */ |
| nh_t nh_virgnh; |
| /* handle of next virgin node |
| */ |
| int nh_segixshift; |
| /* bitshift used to extract the segment index from an nh_t |
| */ |
| relnix_t nh_relnixmask; |
| /* bitmask used to extract the node index from an nh_t |
| * (relative to the start of a segment) |
| */ |
| }; |
| |
| typedef struct node_hdr node_hdr_t; |
| |
| static node_hdr_t *node_hdrp; |
| static int node_fd; |
| |
| static inline segix_t |
| nh2segix(nh_t nh) |
| { |
| return nh >> node_hdrp->nh_segixshift; |
| } |
| |
| static inline relnix_t |
| nh2relnix(nh_t nh) |
| { |
| return nh & node_hdrp->nh_relnixmask; |
| } |
| |
| static inline void |
| node_map_internal(nh_t nh, void **pp) |
| { |
| win_map(nh2segix(nh), pp); |
| if (*pp != NULL) { |
| relnix_t relnix = nh2relnix(nh); |
| *pp = (void *)((char *)(*pp) + |
| ((off64_t)relnix * |
| node_hdrp->nh_nodesz)); |
| } |
| } |
| |
| /* ARGSUSED */ |
| static inline void |
| node_unmap_internal(nh_t nh, void **pp, bool_t freepr) |
| { |
| #ifdef NODECHK |
| register u_char_t hkp; |
| register u_char_t hdlgen; |
| register u_char_t nodegen; |
| register u_char_t nodeunq; |
| #endif /* NODECHK */ |
| |
| assert(pp); |
| assert(*pp); |
| assert(nh != NH_NULL); |
| |
| /* convert the handle into an index |
| */ |
| #ifdef NODECHK |
| hdlgen = HDLGETGEN(nh); |
| nh = HDLGETNHDL(nh); |
| #endif /* NODECHK */ |
| |
| assert(nh <= NH_MAX); |
| |
| #ifdef NODECHK |
| hkp = *(*(u_char_t **)pp + node_hdrp->nh_nodehkix); |
| nodegen = HKPGETGEN(hkp); |
| assert(nodegen == hdlgen); |
| nodeunq = HKPGETUNQ(hkp); |
| if (!freepr) { |
| assert(nodeunq != NODEUNQFREE); |
| assert(nodeunq == NODEUNQALCD); |
| } else { |
| assert(nodeunq != NODEUNQALCD); |
| assert(nodeunq == NODEUNQFREE); |
| } |
| #endif /* NODECHK */ |
| |
| /* unmap the window containing the node |
| */ |
| win_unmap(nh2segix(nh), pp); /* zeros *pp */ |
| } |
| |
| /* ARGSUSED */ |
| bool_t |
| node_init(int fd, |
| off64_t off, |
| size_t usrnodesz, |
| ix_t nodehkix, |
| size_t nodealignsz, |
| size64_t vmsz, |
| size64_t dirs_nondirs_cnt) |
| { |
| size_t nodesz; |
| size64_t segsz; |
| nh_t nodesperseg; |
| size_t max_segments; |
| size_t winmapmax; |
| size_t segcount; |
| int segixshift; |
| |
| /* sanity checks |
| */ |
| assert(sizeof(node_hdr_t) <= NODE_HDRSZ); |
| assert(sizeof(nh_t) < sizeof(off64_t)); |
| assert(sizeof(nh_t) <= sizeof(segix_t)); |
| assert(sizeof(nh_t) <= sizeof(relnix_t)); |
| assert(nodehkix < usrnodesz); |
| assert(usrnodesz >= sizeof(char *) + 1); |
| /* so node is at least big enough to hold |
| * the free list linkage and the housekeeping byte |
| */ |
| assert(nodehkix > sizeof(char *)); |
| /* since beginning of each node is used to |
| * link it in the free list. |
| */ |
| |
| /* adjust the user's node size to meet user's alignment constraint |
| */ |
| nodesz = (usrnodesz + nodealignsz - 1) & ~(nodealignsz - 1); |
| |
| /* Calculate the node table params based on the number of inodes in the |
| * dump, since that's all we know. Ideally we'd base this on the number |
| * of dirents in the dump instead as there's a node per dirent. |
| * |
| * Due to virtual memory constraints and the fact that we don't know |
| * the final node table size, we can't guarantee that the entire node |
| * table can be mapped at once. Instead segments will be mapped using a |
| * window abstraction. Some operations require WINMAP_MIN nodes to be |
| * referenced at a time, therefore we must ensure that this many |
| * segments fit in virtual memory. |
| * |
| * nodesperseg must be a power of two. Earlier versions did not enforce |
| * this, and profiling showed that nearly 50% of cpu time during the |
| * node table construction was consumed doing division and modulo to |
| * convert an nh_t to a segment index and node offset. By making |
| * nodesperseg a power of two we can use shift and bitwise-and instead. |
| * |
| * Each segment must hold an integral number of nodes and be an integral |
| * number of pages. #1 ensures this except when nodesperseg is small, so |
| * the smallest allowed segsz is pgsz * nodesz (i.e., nodesperseg == |
| * pgsz). However this is of no consequence as we enforce a larger |
| * minimum nodesperseg (NODESPERSEG_MIN) anyway in order to place a |
| * reasonable cap on the max number of segments. |
| */ |
| |
| assert(NODESPERSEG_MIN >= pgsz); |
| |
| if (vmsz < WINMAP_MIN * NODESPERSEG_MIN * nodesz) { |
| mlog(MLOG_NORMAL | MLOG_ERROR, _( |
| "not enough virtual memory for node abstraction: " |
| "remaining-vsmz=%llu need=%llu\n"), |
| vmsz, WINMAP_MIN * NODESPERSEG_MIN * nodesz); |
| return BOOL_FALSE; |
| } |
| |
| /* This is checked as nodes are allocated as well (remember that |
| * dirs_nondirs_cnt may be less than the number of nodes/dirents). |
| * Checking this here prevents potential overflow in the logic below. |
| */ |
| if (dirs_nondirs_cnt > NH_MAX) { |
| mlog(MLOG_NORMAL | MLOG_ERROR, _( |
| "dump contains %llu inodes, restore can only handle %u\n"), |
| dirs_nondirs_cnt, NH_MAX); |
| return BOOL_FALSE; |
| } |
| |
| for (winmapmax = 0, segcount = 1; winmapmax < WINMAP_MIN; segcount <<= 1) { |
| |
| nodesperseg = max(dirs_nondirs_cnt / segcount, NODESPERSEG_MIN); |
| |
| /* nodesperseg must be a power of 2 */ |
| for (segixshift = 0; |
| (1ULL << segixshift) < nodesperseg; |
| segixshift++); |
| |
| /* rounding up to a power of 2 may have caused overflow */ |
| if ((1ULL << segixshift) > NH_MAX) |
| segixshift--; |
| |
| nodesperseg = 1UL << segixshift; |
| |
| max_segments = 1UL << (NBBY * sizeof(nh_t) - segixshift); |
| |
| segsz = nodesperseg * nodesz; |
| |
| /* max number of segments that will fit in virtual memory, |
| * capped at the max possible number of segments |
| */ |
| winmapmax = min(vmsz / segsz, max_segments); |
| } |
| |
| /* map the abstraction header |
| */ |
| assert((NODE_HDRSZ & pgmask) == 0); |
| assert(!(NODE_HDRSZ % pgsz)); |
| assert(off <= OFF64MAX); |
| assert(!(off % (off64_t)pgsz)); |
| node_hdrp = (node_hdr_t *)mmap_autogrow( |
| NODE_HDRSZ, |
| fd, |
| off); |
| if (node_hdrp == (node_hdr_t *)-1) { |
| mlog(MLOG_NORMAL | MLOG_ERROR, _( |
| "unable to map node hdr of size %d: %s\n"), |
| NODE_HDRSZ, |
| strerror(errno)); |
| return BOOL_FALSE; |
| } |
| |
| /* initialize and save persistent context. |
| */ |
| node_hdrp->nh_nodesz = nodesz; |
| node_hdrp->nh_nodehkix = nodehkix; |
| node_hdrp->nh_segsz = segsz; |
| node_hdrp->nh_winmapmax = winmapmax; |
| node_hdrp->nh_nodesperseg = nodesperseg; |
| node_hdrp->nh_nodealignsz = nodealignsz; |
| node_hdrp->nh_freenh = NH_NULL; |
| node_hdrp->nh_firstsegoff = off + (off64_t)NODE_HDRSZ; |
| node_hdrp->nh_virgnh = 0; |
| node_hdrp->nh_segixshift = segixshift; |
| node_hdrp->nh_relnixmask = nodesperseg - 1; |
| |
| /* save transient context |
| */ |
| node_fd = fd; |
| |
| /* initialize the window abstraction |
| */ |
| win_init(fd, |
| node_hdrp->nh_firstsegoff, |
| segsz, |
| winmapmax); |
| |
| /* announce the results |
| */ |
| mlog(MLOG_DEBUG | MLOG_TREE, |
| "node_init:" |
| " vmsz = %llu (0x%llx)" |
| " segsz = %llu (0x%llx)" |
| " nodesperseg = %u (0x%x)" |
| " winmapmax = %lu (0x%lx)" |
| "\n", |
| vmsz, vmsz, |
| segsz, segsz, |
| nodesperseg, nodesperseg, |
| winmapmax, winmapmax); |
| |
| return BOOL_TRUE; |
| } |
| |
| bool_t |
| node_sync(int fd, off64_t off) |
| { |
| /* sanity checks |
| */ |
| assert(sizeof(node_hdr_t) <= NODE_HDRSZ); |
| |
| /* map the abstraction header |
| */ |
| assert((NODE_HDRSZ & pgmask) == 0); |
| assert(off <= (off64_t)OFF64MAX); |
| assert(!(off % (off64_t)pgsz)); |
| node_hdrp = (node_hdr_t *)mmap_autogrow( |
| NODE_HDRSZ, |
| fd, |
| off); |
| if (node_hdrp == (node_hdr_t *)-1) { |
| mlog(MLOG_NORMAL | MLOG_ERROR, _( |
| "unable to map node hdr of size %d: %s\n"), |
| NODE_HDRSZ, |
| strerror(errno)); |
| return BOOL_FALSE; |
| } |
| |
| /* save transient context |
| */ |
| node_fd = fd; |
| |
| /* initialize the window abstraction |
| */ |
| win_init(fd, |
| node_hdrp->nh_firstsegoff, |
| node_hdrp->nh_segsz, |
| node_hdrp->nh_winmapmax); |
| |
| return BOOL_TRUE; |
| } |
| |
| nh_t |
| node_alloc(void) |
| { |
| u_char_t *p; |
| nh_t nh; |
| #ifdef NODECHK |
| u_char_t *hkpp; |
| u_char_t gen = 0; |
| u_char_t unq; |
| #endif /* NODECHK */ |
| |
| /* if there's a node available on the free list, use it. |
| * otherwise get the next one from the current virgin segment, |
| * or allocate a new virgin segment if the current one is depleted. |
| */ |
| if (node_hdrp->nh_freenh != NH_NULL) { |
| nh_t *linkagep; |
| |
| nh = node_hdrp->nh_freenh; |
| |
| node_map_internal(nh, (void **)&p); |
| if (p == NULL) |
| return NH_NULL; |
| #ifdef NODECHK |
| hkpp = p + node_hdrp->nh_nodehkix; |
| gen = (u_char_t)(HKPGETGEN(*p) + (u_char_t)1); |
| unq = HKPGETUNQ(*hkpp); |
| assert(unq != NODEUNQALCD); |
| assert(unq == NODEUNQFREE); |
| #endif /* NODECHK */ |
| |
| /* adjust the free list */ |
| linkagep = (nh_t *)p; |
| node_hdrp->nh_freenh = *linkagep; |
| |
| node_unmap_internal(nh, (void **)&p, BOOL_TRUE); |
| |
| } else { |
| if (nh2relnix(node_hdrp->nh_virgnh) == 0) { |
| /* need to start a new virgin segment */ |
| int rval; |
| off64_t new_seg_off = |
| node_hdrp->nh_firstsegoff + |
| (off64_t)nh2segix(node_hdrp->nh_virgnh) * |
| (off64_t)node_hdrp->nh_segsz; |
| |
| assert(new_seg_off |
| <= |
| OFF64MAX - (off64_t)node_hdrp->nh_segsz); |
| mlog(MLOG_DEBUG, |
| "pre-growing new node array segment at %lld " |
| "size %lld\n", |
| new_seg_off, |
| (off64_t)node_hdrp->nh_segsz); |
| rval = ftruncate64(node_fd, |
| new_seg_off |
| + |
| (off64_t)node_hdrp->nh_segsz); |
| if (rval) { |
| mlog(MLOG_NORMAL | MLOG_WARNING | MLOG_TREE, _( |
| "unable to autogrow node segment %u: " |
| "%s (%d)\n"), |
| nh2segix(node_hdrp->nh_virgnh), |
| strerror(errno), |
| errno); |
| } |
| } |
| |
| nh = node_hdrp->nh_virgnh++; |
| } |
| |
| /* build a handle for node |
| */ |
| if (nh > NH_MAX) { |
| mlog(MLOG_NORMAL | MLOG_ERROR, _( |
| "dump contains too many dirents, " |
| "restore can only handle %llu\n"), |
| NH_MAX); |
| return NH_NULL; |
| } |
| #ifdef NODECHK |
| node_map_internal(nh, (void **)&p); |
| if (p == NULL) |
| abort(); |
| hkpp = p + (int)node_hdrp->nh_nodehkix; |
| nh = HDLMKHDL(gen, nh); |
| *hkpp = HKPMKHKP(gen, NODEUNQALCD); |
| node_unmap_internal(nh, (void **)&p, BOOL_FALSE); |
| #endif /* NODECHK */ |
| |
| return nh; |
| } |
| |
| void * |
| node_map(nh_t nh) |
| { |
| u_char_t *p; |
| #ifdef NODECHK |
| register u_char_t hkp; |
| register u_char_t hdlgen; |
| register u_char_t nodegen; |
| register u_char_t nodeunq; |
| #endif /* NODECHK */ |
| |
| assert(nh != NH_NULL); |
| |
| /* convert the handle into an index |
| */ |
| #ifdef NODECHK |
| hdlgen = HDLGETGEN(nh); |
| nh = HDLGETNHDL(nh); |
| #endif /* NODECHK */ |
| |
| assert(nh <= NH_MAX); |
| |
| /* map in |
| */ |
| p = 0; /* keep lint happy */ |
| node_map_internal(nh, (void **)&p); |
| if (p == NULL) |
| return NULL; |
| |
| #ifdef NODECHK |
| hkp = *(p + node_hdrp->nh_nodehkix); |
| nodegen = HKPGETGEN(hkp); |
| nodeunq = HKPGETUNQ(hkp); |
| assert(nodegen == hdlgen); |
| assert(nodeunq != NODEUNQFREE); |
| assert(nodeunq == NODEUNQALCD); |
| #endif /* NODECHK */ |
| |
| return (void *)p; |
| } |
| |
| void |
| node_unmap(nh_t nh, void **pp) |
| { |
| node_unmap_internal(nh, pp, BOOL_FALSE); |
| } |
| |
| void |
| node_free(nh_t *nhp) |
| { |
| nh_t nh; |
| u_char_t *p; |
| register nh_t *linkagep; |
| #ifdef NODECHK |
| register u_char_t *hkpp; |
| register u_char_t hdlgen; |
| register u_char_t nodegen; |
| register u_char_t nodeunq; |
| #endif /* NODECHK */ |
| |
| assert(nhp); |
| nh = *nhp; |
| assert(nh != NH_NULL); |
| |
| /* convert the handle into an index |
| */ |
| #ifdef NODECHK |
| hdlgen = HDLGETGEN(nh); |
| nh = HDLGETNHDL(nh); |
| #endif /* NODECHK */ |
| |
| assert(nh <= NH_MAX); |
| |
| /* map in |
| */ |
| p = (u_char_t *)node_map(nh); |
| if (p == NULL){ |
| *nhp = NH_NULL; |
| return; |
| } |
| |
| #ifdef NODECHK |
| /* fix up unique field |
| */ |
| hkpp = p + node_hdrp->nh_nodehkix; |
| nodegen = HKPGETGEN(*hkpp); |
| nodeunq = HKPGETUNQ(*hkpp); |
| assert(nodegen == hdlgen); |
| assert(nodeunq != NODEUNQFREE); |
| assert(nodeunq == NODEUNQALCD); |
| *hkpp = HKPMKHKP(nodegen, NODEUNQFREE); |
| #endif /* NODECHK */ |
| |
| /* put node on free list |
| */ |
| linkagep = (nh_t *)p; |
| *linkagep = node_hdrp->nh_freenh; |
| node_hdrp->nh_freenh = nh; |
| |
| /* map out |
| */ |
| node_unmap_internal(nh, (void **)&p, BOOL_TRUE); |
| |
| /* invalidate caller's handle |
| */ |
| *nhp = NH_NULL; |
| } |