blob: 51a183e4ab0d534639fb33ab54f0af2ae8fce58f [file] [log] [blame]
/*
* Copyright (c) 2000-2001 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <xfs/xfs.h>
#include <xfs/jdm.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <errno.h>
#include <memory.h>
#include <limits.h>
#include <assert.h>
#include "config.h"
#include "types.h"
#include "mlog.h"
#include "win.h"
#include "node.h"
#include "mmap.h"
extern size_t pgsz;
extern size_t pgmask;
/* node handle limits
*/
#ifdef NODECHK
/* macros for manipulating node handles when handle consistency
* checking is enabled. the upper bits of a handle will be loaded
* with the node gen count, described below. this should not be
* used for production code, it cuts into the number of dirents
* that xfsrestore can handle.
*/
#define HDLGENCNT 4
#define HDLGENSHIFT (NBBY * sizeof (nh_t) - HDLGENCNT)
#define HDLGENLOMASK ((1 << HDLGENCNT) - 1)
#define HDLGENMASK (HDLGENLOMASK << HDLGENSHIFT)
#define HDLMASK ((1 << HDLGENSHIFT) - 1)
#define HDLGETGEN(h) ((u_char_t) \
(((int)h >> HDLGENSHIFT) \
& \
HDLGENLOMASK))
#define HDLGETNHDL(h) ((nh_t)((int)h & HDLMASK))
#define HDLMKHDL(g, n) ((nh_t)((((int)g << HDLGENSHIFT)\
& \
HDLGENMASK) \
| \
((int)n & HDLMASK)))
#define NH_MAX (HDLMASK)
/* the housekeeping byte of each node will hold two check fields:
* a gen count, initialized to 0 and incremented each time a node
* is allocated, to catch re-use of stale handles; and unique pattern, to
* differentiate a valid node from random memory. two unique patterns will
* be used; one when the node is on the free list, another when it is
* allocated. this allows me to detect use of handles to nodes which have
* be freed.
*/
#define HKPGENCNT HDLGENCNT
#define HKPGENSHIFT (NBBY - HKPGENCNT)
#define HKPGENLOMASK ((1 << HKPGENCNT) - 1)
#define HKPGENMASK (HKPGENLOMASK << HKPGENSHIFT)
#define HKPUNQCNT (NBBY - HKPGENCNT)
#define HKPUNQMASK ((1 << HKPUNQCNT) - 1)
#define HKPGETGEN(b) ((u_char_t) \
(((int)b >> HKPGENSHIFT) \
& \
HKPGENLOMASK))
#define HKPGETUNQ(b) ((u_char_t)((int)b & HKPUNQMASK))
#define HKPMKHKP(g, u) ((u_char_t) \
((((int)g << HKPGENSHIFT) \
& \
HKPGENMASK) \
| \
((int)u & HKPUNQMASK)))
/* simple patterns for detecting a node
*/
#define NODEUNQFREE 0x9
#define NODEUNQALCD 0x6
#else /* NODECHK */
#define NH_MAX (NH_NULL - 1)
#endif /* NODECHK */
/* window constraints
*/
#define NODESPERSEG_MIN 1048576
#define WINMAP_MIN 4
/* reserve the firstpage for a header to save persistent context
*/
#define NODE_HDRSZ pgsz
typedef int relnix_t;
struct node_hdr {
size_t nh_nodesz;
/* internal node size - user may see something smaller
*/
ix_t nh_nodehkix;
/* index of byte in each node the user has reserved
* for use by me
*/
nh_t nh_nodesperseg;
/* an integral number of internal nodes must fit into a
* segment
*/
size64_t nh_segsz;
/* the backing store is partitoned into segment, which
* can be mapped into VM windows by the win abstraction
*/
size_t nh_winmapmax;
/* maximum number of windows which can be mapped
*/
size_t nh_nodealignsz;
/* user's constraint on node alignment
*/
nh_t nh_freenh;
/* handle of first node of singly-linked list of free nodes
*/
off64_t nh_firstsegoff;
/* offset into backing store of the first segment
*/
nh_t nh_virgnh;
/* handle of next virgin node
*/
int nh_segixshift;
/* bitshift used to extract the segment index from an nh_t
*/
relnix_t nh_relnixmask;
/* bitmask used to extract the node index from an nh_t
* (relative to the start of a segment)
*/
};
typedef struct node_hdr node_hdr_t;
static node_hdr_t *node_hdrp;
static int node_fd;
static inline segix_t
nh2segix(nh_t nh)
{
return nh >> node_hdrp->nh_segixshift;
}
static inline relnix_t
nh2relnix(nh_t nh)
{
return nh & node_hdrp->nh_relnixmask;
}
static inline void
node_map_internal(nh_t nh, void **pp)
{
win_map(nh2segix(nh), pp);
if (*pp != NULL) {
relnix_t relnix = nh2relnix(nh);
*pp = (void *)((char *)(*pp) +
((off64_t)relnix *
node_hdrp->nh_nodesz));
}
}
/* ARGSUSED */
static inline void
node_unmap_internal(nh_t nh, void **pp, bool_t freepr)
{
#ifdef NODECHK
register u_char_t hkp;
register u_char_t hdlgen;
register u_char_t nodegen;
register u_char_t nodeunq;
#endif /* NODECHK */
assert(pp);
assert(*pp);
assert(nh != NH_NULL);
/* convert the handle into an index
*/
#ifdef NODECHK
hdlgen = HDLGETGEN(nh);
nh = HDLGETNHDL(nh);
#endif /* NODECHK */
assert(nh <= NH_MAX);
#ifdef NODECHK
hkp = *(*(u_char_t **)pp + node_hdrp->nh_nodehkix);
nodegen = HKPGETGEN(hkp);
assert(nodegen == hdlgen);
nodeunq = HKPGETUNQ(hkp);
if (!freepr) {
assert(nodeunq != NODEUNQFREE);
assert(nodeunq == NODEUNQALCD);
} else {
assert(nodeunq != NODEUNQALCD);
assert(nodeunq == NODEUNQFREE);
}
#endif /* NODECHK */
/* unmap the window containing the node
*/
win_unmap(nh2segix(nh), pp); /* zeros *pp */
}
/* ARGSUSED */
bool_t
node_init(int fd,
off64_t off,
size_t usrnodesz,
ix_t nodehkix,
size_t nodealignsz,
size64_t vmsz,
size64_t dirs_nondirs_cnt)
{
size_t nodesz;
size64_t segsz;
nh_t nodesperseg;
size_t max_segments;
size_t winmapmax;
size_t segcount;
int segixshift;
/* sanity checks
*/
assert(sizeof(node_hdr_t) <= NODE_HDRSZ);
assert(sizeof(nh_t) < sizeof(off64_t));
assert(sizeof(nh_t) <= sizeof(segix_t));
assert(sizeof(nh_t) <= sizeof(relnix_t));
assert(nodehkix < usrnodesz);
assert(usrnodesz >= sizeof(char *) + 1);
/* so node is at least big enough to hold
* the free list linkage and the housekeeping byte
*/
assert(nodehkix > sizeof(char *));
/* since beginning of each node is used to
* link it in the free list.
*/
/* adjust the user's node size to meet user's alignment constraint
*/
nodesz = (usrnodesz + nodealignsz - 1) & ~(nodealignsz - 1);
/* Calculate the node table params based on the number of inodes in the
* dump, since that's all we know. Ideally we'd base this on the number
* of dirents in the dump instead as there's a node per dirent.
*
* Due to virtual memory constraints and the fact that we don't know
* the final node table size, we can't guarantee that the entire node
* table can be mapped at once. Instead segments will be mapped using a
* window abstraction. Some operations require WINMAP_MIN nodes to be
* referenced at a time, therefore we must ensure that this many
* segments fit in virtual memory.
*
* nodesperseg must be a power of two. Earlier versions did not enforce
* this, and profiling showed that nearly 50% of cpu time during the
* node table construction was consumed doing division and modulo to
* convert an nh_t to a segment index and node offset. By making
* nodesperseg a power of two we can use shift and bitwise-and instead.
*
* Each segment must hold an integral number of nodes and be an integral
* number of pages. #1 ensures this except when nodesperseg is small, so
* the smallest allowed segsz is pgsz * nodesz (i.e., nodesperseg ==
* pgsz). However this is of no consequence as we enforce a larger
* minimum nodesperseg (NODESPERSEG_MIN) anyway in order to place a
* reasonable cap on the max number of segments.
*/
assert(NODESPERSEG_MIN >= pgsz);
if (vmsz < WINMAP_MIN * NODESPERSEG_MIN * nodesz) {
mlog(MLOG_NORMAL | MLOG_ERROR, _(
"not enough virtual memory for node abstraction: "
"remaining-vsmz=%llu need=%llu\n"),
vmsz, WINMAP_MIN * NODESPERSEG_MIN * nodesz);
return BOOL_FALSE;
}
/* This is checked as nodes are allocated as well (remember that
* dirs_nondirs_cnt may be less than the number of nodes/dirents).
* Checking this here prevents potential overflow in the logic below.
*/
if (dirs_nondirs_cnt > NH_MAX) {
mlog(MLOG_NORMAL | MLOG_ERROR, _(
"dump contains %llu inodes, restore can only handle %u\n"),
dirs_nondirs_cnt, NH_MAX);
return BOOL_FALSE;
}
for (winmapmax = 0, segcount = 1; winmapmax < WINMAP_MIN; segcount <<= 1) {
nodesperseg = max(dirs_nondirs_cnt / segcount, NODESPERSEG_MIN);
/* nodesperseg must be a power of 2 */
for (segixshift = 0;
(1ULL << segixshift) < nodesperseg;
segixshift++);
/* rounding up to a power of 2 may have caused overflow */
if ((1ULL << segixshift) > NH_MAX)
segixshift--;
nodesperseg = 1UL << segixshift;
max_segments = 1UL << (NBBY * sizeof(nh_t) - segixshift);
segsz = nodesperseg * nodesz;
/* max number of segments that will fit in virtual memory,
* capped at the max possible number of segments
*/
winmapmax = min(vmsz / segsz, max_segments);
}
/* map the abstraction header
*/
assert((NODE_HDRSZ & pgmask) == 0);
assert(!(NODE_HDRSZ % pgsz));
assert(off <= OFF64MAX);
assert(!(off % (off64_t)pgsz));
node_hdrp = (node_hdr_t *)mmap_autogrow(
NODE_HDRSZ,
fd,
off);
if (node_hdrp == (node_hdr_t *)-1) {
mlog(MLOG_NORMAL | MLOG_ERROR, _(
"unable to map node hdr of size %d: %s\n"),
NODE_HDRSZ,
strerror(errno));
return BOOL_FALSE;
}
/* initialize and save persistent context.
*/
node_hdrp->nh_nodesz = nodesz;
node_hdrp->nh_nodehkix = nodehkix;
node_hdrp->nh_segsz = segsz;
node_hdrp->nh_winmapmax = winmapmax;
node_hdrp->nh_nodesperseg = nodesperseg;
node_hdrp->nh_nodealignsz = nodealignsz;
node_hdrp->nh_freenh = NH_NULL;
node_hdrp->nh_firstsegoff = off + (off64_t)NODE_HDRSZ;
node_hdrp->nh_virgnh = 0;
node_hdrp->nh_segixshift = segixshift;
node_hdrp->nh_relnixmask = nodesperseg - 1;
/* save transient context
*/
node_fd = fd;
/* initialize the window abstraction
*/
win_init(fd,
node_hdrp->nh_firstsegoff,
segsz,
winmapmax);
/* announce the results
*/
mlog(MLOG_DEBUG | MLOG_TREE,
"node_init:"
" vmsz = %llu (0x%llx)"
" segsz = %llu (0x%llx)"
" nodesperseg = %u (0x%x)"
" winmapmax = %lu (0x%lx)"
"\n",
vmsz, vmsz,
segsz, segsz,
nodesperseg, nodesperseg,
winmapmax, winmapmax);
return BOOL_TRUE;
}
bool_t
node_sync(int fd, off64_t off)
{
/* sanity checks
*/
assert(sizeof(node_hdr_t) <= NODE_HDRSZ);
/* map the abstraction header
*/
assert((NODE_HDRSZ & pgmask) == 0);
assert(off <= (off64_t)OFF64MAX);
assert(!(off % (off64_t)pgsz));
node_hdrp = (node_hdr_t *)mmap_autogrow(
NODE_HDRSZ,
fd,
off);
if (node_hdrp == (node_hdr_t *)-1) {
mlog(MLOG_NORMAL | MLOG_ERROR, _(
"unable to map node hdr of size %d: %s\n"),
NODE_HDRSZ,
strerror(errno));
return BOOL_FALSE;
}
/* save transient context
*/
node_fd = fd;
/* initialize the window abstraction
*/
win_init(fd,
node_hdrp->nh_firstsegoff,
node_hdrp->nh_segsz,
node_hdrp->nh_winmapmax);
return BOOL_TRUE;
}
nh_t
node_alloc(void)
{
u_char_t *p;
nh_t nh;
#ifdef NODECHK
u_char_t *hkpp;
u_char_t gen = 0;
u_char_t unq;
#endif /* NODECHK */
/* if there's a node available on the free list, use it.
* otherwise get the next one from the current virgin segment,
* or allocate a new virgin segment if the current one is depleted.
*/
if (node_hdrp->nh_freenh != NH_NULL) {
nh_t *linkagep;
nh = node_hdrp->nh_freenh;
node_map_internal(nh, (void **)&p);
if (p == NULL)
return NH_NULL;
#ifdef NODECHK
hkpp = p + node_hdrp->nh_nodehkix;
gen = (u_char_t)(HKPGETGEN(*p) + (u_char_t)1);
unq = HKPGETUNQ(*hkpp);
assert(unq != NODEUNQALCD);
assert(unq == NODEUNQFREE);
#endif /* NODECHK */
/* adjust the free list */
linkagep = (nh_t *)p;
node_hdrp->nh_freenh = *linkagep;
node_unmap_internal(nh, (void **)&p, BOOL_TRUE);
} else {
if (nh2relnix(node_hdrp->nh_virgnh) == 0) {
/* need to start a new virgin segment */
int rval;
off64_t new_seg_off =
node_hdrp->nh_firstsegoff +
(off64_t)nh2segix(node_hdrp->nh_virgnh) *
(off64_t)node_hdrp->nh_segsz;
assert(new_seg_off
<=
OFF64MAX - (off64_t)node_hdrp->nh_segsz);
mlog(MLOG_DEBUG,
"pre-growing new node array segment at %lld "
"size %lld\n",
new_seg_off,
(off64_t)node_hdrp->nh_segsz);
rval = ftruncate64(node_fd,
new_seg_off
+
(off64_t)node_hdrp->nh_segsz);
if (rval) {
mlog(MLOG_NORMAL | MLOG_WARNING | MLOG_TREE, _(
"unable to autogrow node segment %u: "
"%s (%d)\n"),
nh2segix(node_hdrp->nh_virgnh),
strerror(errno),
errno);
}
}
nh = node_hdrp->nh_virgnh++;
}
/* build a handle for node
*/
if (nh > NH_MAX) {
mlog(MLOG_NORMAL | MLOG_ERROR, _(
"dump contains too many dirents, "
"restore can only handle %llu\n"),
NH_MAX);
return NH_NULL;
}
#ifdef NODECHK
node_map_internal(nh, (void **)&p);
if (p == NULL)
abort();
hkpp = p + (int)node_hdrp->nh_nodehkix;
nh = HDLMKHDL(gen, nh);
*hkpp = HKPMKHKP(gen, NODEUNQALCD);
node_unmap_internal(nh, (void **)&p, BOOL_FALSE);
#endif /* NODECHK */
return nh;
}
void *
node_map(nh_t nh)
{
u_char_t *p;
#ifdef NODECHK
register u_char_t hkp;
register u_char_t hdlgen;
register u_char_t nodegen;
register u_char_t nodeunq;
#endif /* NODECHK */
assert(nh != NH_NULL);
/* convert the handle into an index
*/
#ifdef NODECHK
hdlgen = HDLGETGEN(nh);
nh = HDLGETNHDL(nh);
#endif /* NODECHK */
assert(nh <= NH_MAX);
/* map in
*/
p = 0; /* keep lint happy */
node_map_internal(nh, (void **)&p);
if (p == NULL)
return NULL;
#ifdef NODECHK
hkp = *(p + node_hdrp->nh_nodehkix);
nodegen = HKPGETGEN(hkp);
nodeunq = HKPGETUNQ(hkp);
assert(nodegen == hdlgen);
assert(nodeunq != NODEUNQFREE);
assert(nodeunq == NODEUNQALCD);
#endif /* NODECHK */
return (void *)p;
}
void
node_unmap(nh_t nh, void **pp)
{
node_unmap_internal(nh, pp, BOOL_FALSE);
}
void
node_free(nh_t *nhp)
{
nh_t nh;
u_char_t *p;
register nh_t *linkagep;
#ifdef NODECHK
register u_char_t *hkpp;
register u_char_t hdlgen;
register u_char_t nodegen;
register u_char_t nodeunq;
#endif /* NODECHK */
assert(nhp);
nh = *nhp;
assert(nh != NH_NULL);
/* convert the handle into an index
*/
#ifdef NODECHK
hdlgen = HDLGETGEN(nh);
nh = HDLGETNHDL(nh);
#endif /* NODECHK */
assert(nh <= NH_MAX);
/* map in
*/
p = (u_char_t *)node_map(nh);
if (p == NULL){
*nhp = NH_NULL;
return;
}
#ifdef NODECHK
/* fix up unique field
*/
hkpp = p + node_hdrp->nh_nodehkix;
nodegen = HKPGETGEN(*hkpp);
nodeunq = HKPGETUNQ(*hkpp);
assert(nodegen == hdlgen);
assert(nodeunq != NODEUNQFREE);
assert(nodeunq == NODEUNQALCD);
*hkpp = HKPMKHKP(nodegen, NODEUNQFREE);
#endif /* NODECHK */
/* put node on free list
*/
linkagep = (nh_t *)p;
*linkagep = node_hdrp->nh_freenh;
node_hdrp->nh_freenh = nh;
/* map out
*/
node_unmap_internal(nh, (void **)&p, BOOL_TRUE);
/* invalidate caller's handle
*/
*nhp = NH_NULL;
}