blob: b5df88ac1f31d2df4e75d208abe9cd02833ffe01 [file] [log] [blame]
/*
* Copyright (c) 2000-2001 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <xfs/xfs.h>
#include <xfs/jdm.h>
#include <sys/mman.h>
#include <errno.h>
#include <memory.h>
#include <limits.h>
#include "types.h"
#include "mlog.h"
#include "win.h"
#include "node.h"
#include "mmap.h"
#define max( a, b ) ( ( ( a ) > ( b ) ) ? ( a ) : ( b ) )
#define min( a, b ) ( ( ( a ) < ( b ) ) ? ( a ) : ( b ) )
extern size_t pgsz;
extern size_t pgmask;
/* node handle limits
*/
#ifdef NODECHK
/* macros for manipulating node handles when handle consistency
* checking is enabled. the upper bits of a handle will be loaded
* with the node gen count, described below. this should not be
* used for production code, it cuts into the number of dirents
* that xfsrestore can handle.
*/
#define HDLGENCNT 4
#define HDLGENSHIFT ( NBBY * sizeof ( nh_t ) - HDLGENCNT )
#define HDLGENLOMASK ( ( 1 << HDLGENCNT ) - 1 )
#define HDLGENMASK ( HDLGENLOMASK << HDLGENSHIFT )
#define HDLMASK ( ( 1 << HDLGENSHIFT ) - 1 )
#define HDLGETGEN( h ) ( ( u_char_t ) \
( ( ( int )h >> HDLGENSHIFT ) \
& \
HDLGENLOMASK ))
#define HDLGETNHDL( h ) ( ( nh_t )( ( int )h & HDLMASK ))
#define HDLMKHDL( g, n ) ( ( nh_t )( ( ( ( int )g << HDLGENSHIFT )\
& \
HDLGENMASK ) \
| \
( ( int )n & HDLMASK )))
#define NH_MAX ( HDLMASK )
/* the housekeeping byte of each node will hold two check fields:
* a gen count, initialized to 0 and incremented each time a node
* is allocated, to catch re-use of stale handles; and unique pattern, to
* differentiate a valid node from random memory. two unique patterns will
* be used; one when the node is on the free list, another when it is
* allocated. this allows me to detect use of handles to nodes which have
* be freed.
*/
#define HKPGENCNT HDLGENCNT
#define HKPGENSHIFT ( NBBY - HKPGENCNT )
#define HKPGENLOMASK ( ( 1 << HKPGENCNT ) - 1 )
#define HKPGENMASK ( HKPGENLOMASK << HKPGENSHIFT )
#define HKPUNQCNT ( NBBY - HKPGENCNT )
#define HKPUNQMASK ( ( 1 << HKPUNQCNT ) - 1 )
#define HKPGETGEN( b ) ( ( u_char_t ) \
( ( ( int )b >> HKPGENSHIFT ) \
& \
HKPGENLOMASK ))
#define HKPGETUNQ( b ) ( ( u_char_t )( ( int )b & HKPUNQMASK ))
#define HKPMKHKP( g, u ) ( ( u_char_t ) \
( ( ( ( int )g << HKPGENSHIFT ) \
& \
HKPGENMASK ) \
| \
( ( int )u & HKPUNQMASK )))
/* simple patterns for detecting a node
*/
#define NODEUNQFREE 0x9
#define NODEUNQALCD 0x6
#else /* NODECHK */
#define NH_MAX ( NH_NULL - 1 )
#endif /* NODECHK */
/* window constraints
*/
#define NODESPERSEG_MIN 1048576
#define WINMAP_MIN 4
/* reserve the firstpage for a header to save persistent context
*/
#define NODE_HDRSZ pgsz
typedef intgen_t relnix_t;
struct node_hdr {
size_t nh_nodesz;
/* internal node size - user may see something smaller
*/
ix_t nh_nodehkix;
/* index of byte in each node the user has reserved
* for use by me
*/
nh_t nh_nodesperseg;
/* an integral number of internal nodes must fit into a
* segment
*/
size64_t nh_segsz;
/* the backing store is partitoned into segment, which
* can be mapped into VM windows by the win abstraction
*/
size_t nh_winmapmax;
/* maximum number of windows which can be mapped
*/
size_t nh_nodealignsz;
/* user's constraint on node alignment
*/
nh_t nh_freenh;
/* handle of first node of singly-linked list of free nodes
*/
off64_t nh_firstsegoff;
/* offset into backing store of the first segment
*/
nh_t nh_virgnh;
/* handle of next virgin node
*/
intgen_t nh_segixshift;
/* bitshift used to extract the segment index from an nh_t
*/
relnix_t nh_relnixmask;
/* bitmask used to extract the node index from an nh_t
* (relative to the start of a segment)
*/
};
typedef struct node_hdr node_hdr_t;
static node_hdr_t *node_hdrp;
static intgen_t node_fd;
static inline segix_t
nh2segix( nh_t nh )
{
return nh >> node_hdrp->nh_segixshift;
}
static inline relnix_t
nh2relnix( nh_t nh )
{
return nh & node_hdrp->nh_relnixmask;
}
static inline void
node_map_internal( nh_t nh, void **pp )
{
win_map( nh2segix( nh ), pp );
if ( *pp != NULL ) {
relnix_t relnix = nh2relnix( nh );
*pp = ( void * )( ( char * )( *pp ) +
( ( off64_t )relnix *
node_hdrp->nh_nodesz ) );
}
}
/* ARGSUSED */
static inline void
node_unmap_internal( nh_t nh, void **pp, bool_t freepr )
{
#ifdef NODECHK
register u_char_t hkp;
register u_char_t hdlgen;
register u_char_t nodegen;
register u_char_t nodeunq;
#endif /* NODECHK */
ASSERT( pp );
ASSERT( *pp );
ASSERT( nh != NH_NULL );
/* convert the handle into an index
*/
#ifdef NODECHK
hdlgen = HDLGETGEN( nh );
nh = HDLGETNHDL( nh );
#endif /* NODECHK */
ASSERT( nh <= NH_MAX );
#ifdef NODECHK
hkp = *( *( u_char_t ** )pp + node_hdrp->nh_nodehkix );
nodegen = HKPGETGEN( hkp );
ASSERT( nodegen == hdlgen );
nodeunq = HKPGETUNQ( hkp );
if ( ! freepr ) {
ASSERT( nodeunq != NODEUNQFREE );
ASSERT( nodeunq == NODEUNQALCD );
} else {
ASSERT( nodeunq != NODEUNQALCD );
ASSERT( nodeunq == NODEUNQFREE );
}
#endif /* NODECHK */
/* unmap the window containing the node
*/
win_unmap( nh2segix( nh ), pp ); /* zeros *pp */
}
/* ARGSUSED */
bool_t
node_init( intgen_t fd,
off64_t off,
size_t usrnodesz,
ix_t nodehkix,
size_t nodealignsz,
size64_t vmsz,
size64_t dirs_nondirs_cnt )
{
size_t nodesz;
size64_t segsz;
nh_t nodesperseg;
size_t max_segments;
size_t winmapmax;
size_t segcount;
intgen_t segixshift;
/* sanity checks
*/
ASSERT( sizeof( node_hdr_t ) <= NODE_HDRSZ );
ASSERT( sizeof( nh_t ) < sizeof( off64_t ));
ASSERT( sizeof( nh_t ) <= sizeof( segix_t ));
ASSERT( sizeof( nh_t ) <= sizeof( relnix_t ));
ASSERT( nodehkix < usrnodesz );
ASSERT( usrnodesz >= sizeof( char * ) + 1 );
/* so node is at least big enough to hold
* the free list linkage and the housekeeping byte
*/
ASSERT( nodehkix > sizeof( char * ));
/* since beginning of each node is used to
* link it in the free list.
*/
/* adjust the user's node size to meet user's alignment constraint
*/
nodesz = ( usrnodesz + nodealignsz - 1 ) & ~( nodealignsz - 1 );
/* Calculate the node table params based on the number of inodes in the
* dump, since that's all we know. Ideally we'd base this on the number
* of dirents in the dump instead as there's a node per dirent.
*
* Due to virtual memory constraints and the fact that we don't know
* the final node table size, we can't guarantee that the entire node
* table can be mapped at once. Instead segments will be mapped using a
* window abstraction. Some operations require WINMAP_MIN nodes to be
* referenced at a time, therefore we must ensure that this many
* segments fit in virtual memory.
*
* nodesperseg must be a power of two. Earlier versions did not enforce
* this, and profiling showed that nearly 50% of cpu time during the
* node table construction was consumed doing division and modulo to
* convert an nh_t to a segment index and node offset. By making
* nodesperseg a power of two we can use shift and bitwise-and instead.
*
* Each segment must hold an integral number of nodes and be an integral
* number of pages. #1 ensures this except when nodesperseg is small, so
* the smallest allowed segsz is pgsz * nodesz (i.e., nodesperseg ==
* pgsz). However this is of no consequence as we enforce a larger
* minimum nodesperseg (NODESPERSEG_MIN) anyway in order to place a
* reasonable cap on the max number of segments.
*/
ASSERT( NODESPERSEG_MIN >= pgsz );
if ( vmsz < WINMAP_MIN * NODESPERSEG_MIN * nodesz ) {
mlog( MLOG_NORMAL | MLOG_ERROR, _(
"not enough virtual memory for node abstraction: "
"remaining-vsmz=%llu need=%llu\n"),
vmsz, WINMAP_MIN * NODESPERSEG_MIN * nodesz );
return BOOL_FALSE;
}
/* This is checked as nodes are allocated as well (remember that
* dirs_nondirs_cnt may be less than the number of nodes/dirents).
* Checking this here prevents potential overflow in the logic below.
*/
if ( dirs_nondirs_cnt > NH_MAX ) {
mlog( MLOG_NORMAL | MLOG_ERROR, _(
"dump contains %llu inodes, restore can only handle %u\n"),
dirs_nondirs_cnt, NH_MAX );
return BOOL_FALSE;
}
for ( winmapmax = 0, segcount = 1; winmapmax < WINMAP_MIN; segcount <<= 1 ) {
nodesperseg = max( dirs_nondirs_cnt / segcount, NODESPERSEG_MIN );
/* nodesperseg must be a power of 2 */
for ( segixshift = 0;
( 1ULL << segixshift ) < nodesperseg;
segixshift++ );
/* rounding up to a power of 2 may have caused overflow */
if ( ( 1ULL << segixshift ) > NH_MAX )
segixshift--;
nodesperseg = 1UL << segixshift;
max_segments = 1UL << ( NBBY * sizeof(nh_t) - segixshift );
segsz = nodesperseg * nodesz;
/* max number of segments that will fit in virtual memory,
* capped at the max possible number of segments
*/
winmapmax = min( vmsz / segsz, max_segments );
}
/* map the abstraction header
*/
ASSERT( ( NODE_HDRSZ & pgmask ) == 0 );
ASSERT( ! ( NODE_HDRSZ % pgsz ));
ASSERT( off <= OFF64MAX );
ASSERT( ! ( off % ( off64_t )pgsz ));
node_hdrp = ( node_hdr_t * )mmap_autogrow(
NODE_HDRSZ,
fd,
off );
if ( node_hdrp == (node_hdr_t *)-1 ) {
mlog( MLOG_NORMAL | MLOG_ERROR, _(
"unable to map node hdr of size %d: %s\n"),
NODE_HDRSZ,
strerror( errno ));
return BOOL_FALSE;
}
/* initialize and save persistent context.
*/
node_hdrp->nh_nodesz = nodesz;
node_hdrp->nh_nodehkix = nodehkix;
node_hdrp->nh_segsz = segsz;
node_hdrp->nh_winmapmax = winmapmax;
node_hdrp->nh_nodesperseg = nodesperseg;
node_hdrp->nh_nodealignsz = nodealignsz;
node_hdrp->nh_freenh = NH_NULL;
node_hdrp->nh_firstsegoff = off + ( off64_t )NODE_HDRSZ;
node_hdrp->nh_virgnh = 0;
node_hdrp->nh_segixshift = segixshift;
node_hdrp->nh_relnixmask = nodesperseg - 1;
/* save transient context
*/
node_fd = fd;
/* initialize the window abstraction
*/
win_init( fd,
node_hdrp->nh_firstsegoff,
segsz,
winmapmax );
/* announce the results
*/
mlog( MLOG_DEBUG | MLOG_TREE,
"node_init:"
" vmsz = %llu (0x%llx)"
" segsz = %llu (0x%llx)"
" nodesperseg = %u (0x%x)"
" winmapmax = %lu (0x%lx)"
"\n",
vmsz, vmsz,
segsz, segsz,
nodesperseg, nodesperseg,
winmapmax, winmapmax );
return BOOL_TRUE;
}
bool_t
node_sync( intgen_t fd, off64_t off )
{
/* sanity checks
*/
ASSERT( sizeof( node_hdr_t ) <= NODE_HDRSZ );
/* map the abstraction header
*/
ASSERT( ( NODE_HDRSZ & pgmask ) == 0 );
ASSERT( off <= ( off64_t )OFF64MAX );
ASSERT( ! ( off % ( off64_t )pgsz ));
node_hdrp = ( node_hdr_t * )mmap_autogrow(
NODE_HDRSZ,
fd,
off );
if ( node_hdrp == (node_hdr_t *)-1 ) {
mlog( MLOG_NORMAL | MLOG_ERROR, _(
"unable to map node hdr of size %d: %s\n"),
NODE_HDRSZ,
strerror( errno ));
return BOOL_FALSE;
}
/* save transient context
*/
node_fd = fd;
/* initialize the window abstraction
*/
win_init( fd,
node_hdrp->nh_firstsegoff,
node_hdrp->nh_segsz,
node_hdrp->nh_winmapmax );
return BOOL_TRUE;
}
nh_t
node_alloc( void )
{
u_char_t *p;
nh_t nh;
#ifdef NODECHK
u_char_t *hkpp;
u_char_t gen = 0;
u_char_t unq;
#endif /* NODECHK */
/* if there's a node available on the free list, use it.
* otherwise get the next one from the current virgin segment,
* or allocate a new virgin segment if the current one is depleted.
*/
if ( node_hdrp->nh_freenh != NH_NULL ) {
nh_t *linkagep;
nh = node_hdrp->nh_freenh;
node_map_internal( nh, ( void ** )&p );
if (p == NULL)
return NH_NULL;
#ifdef NODECHK
hkpp = p + node_hdrp->nh_nodehkix;
gen = ( u_char_t )( HKPGETGEN( *p ) + ( u_char_t )1 );
unq = HKPGETUNQ( *hkpp );
ASSERT( unq != NODEUNQALCD );
ASSERT( unq == NODEUNQFREE );
#endif /* NODECHK */
/* adjust the free list */
linkagep = ( nh_t * )p;
node_hdrp->nh_freenh = *linkagep;
node_unmap_internal( nh, ( void ** )&p, BOOL_TRUE );
} else {
if ( nh2relnix( node_hdrp->nh_virgnh ) == 0 ) {
/* need to start a new virgin segment */
intgen_t rval;
off64_t new_seg_off =
node_hdrp->nh_firstsegoff +
( off64_t )nh2segix( node_hdrp->nh_virgnh ) *
( off64_t )node_hdrp->nh_segsz;
ASSERT( new_seg_off
<=
OFF64MAX - ( off64_t )node_hdrp->nh_segsz );
mlog( MLOG_DEBUG,
"pre-growing new node array segment at %lld "
"size %lld\n",
new_seg_off,
( off64_t )node_hdrp->nh_segsz );
rval = ftruncate64( node_fd,
new_seg_off
+
( off64_t )node_hdrp->nh_segsz );
if ( rval ) {
mlog( MLOG_NORMAL | MLOG_WARNING | MLOG_TREE, _(
"unable to autogrow node segment %u: "
"%s (%d)\n"),
nh2segix( node_hdrp->nh_virgnh ),
strerror( errno ),
errno );
}
}
nh = node_hdrp->nh_virgnh++;
}
/* build a handle for node
*/
if ( nh > NH_MAX ) {
mlog( MLOG_NORMAL | MLOG_ERROR, _(
"dump contains too many dirents, "
"restore can only handle %llu\n"),
NH_MAX );
return NH_NULL;
}
#ifdef NODECHK
node_map_internal( nh , ( void ** )&p );
if (p == NULL)
abort();
hkpp = p + ( int )node_hdrp->nh_nodehkix;
nh = HDLMKHDL( gen, nh );
*hkpp = HKPMKHKP( gen, NODEUNQALCD );
node_unmap_internal( nh, ( void ** )&p, BOOL_FALSE );
#endif /* NODECHK */
return nh;
}
void *
node_map( nh_t nh )
{
u_char_t *p;
#ifdef NODECHK
register u_char_t hkp;
register u_char_t hdlgen;
register u_char_t nodegen;
register u_char_t nodeunq;
#endif /* NODECHK */
ASSERT( nh != NH_NULL );
/* convert the handle into an index
*/
#ifdef NODECHK
hdlgen = HDLGETGEN( nh );
nh = HDLGETNHDL( nh );
#endif /* NODECHK */
ASSERT( nh <= NH_MAX );
/* map in
*/
p = 0; /* keep lint happy */
node_map_internal( nh, ( void ** )&p );
if (p == NULL)
return NULL;
#ifdef NODECHK
hkp = *( p + node_hdrp->nh_nodehkix );
nodegen = HKPGETGEN( hkp );
nodeunq = HKPGETUNQ( hkp );
ASSERT( nodegen == hdlgen );
ASSERT( nodeunq != NODEUNQFREE );
ASSERT( nodeunq == NODEUNQALCD );
#endif /* NODECHK */
return ( void * )p;
}
void
node_unmap( nh_t nh, void **pp )
{
node_unmap_internal( nh, pp, BOOL_FALSE );
}
void
node_free( nh_t *nhp )
{
nh_t nh;
u_char_t *p;
register nh_t *linkagep;
#ifdef NODECHK
register u_char_t *hkpp;
register u_char_t hdlgen;
register u_char_t nodegen;
register u_char_t nodeunq;
#endif /* NODECHK */
ASSERT( nhp );
nh = *nhp;
ASSERT( nh != NH_NULL );
/* convert the handle into an index
*/
#ifdef NODECHK
hdlgen = HDLGETGEN( nh );
nh = HDLGETNHDL( nh );
#endif /* NODECHK */
ASSERT( nh <= NH_MAX );
/* map in
*/
p = ( u_char_t * )node_map( nh );
if (p == NULL){
*nhp = NH_NULL;
return;
}
#ifdef NODECHK
/* fix up unique field
*/
hkpp = p + node_hdrp->nh_nodehkix;
nodegen = HKPGETGEN( *hkpp );
nodeunq = HKPGETUNQ( *hkpp );
ASSERT( nodegen == hdlgen );
ASSERT( nodeunq != NODEUNQFREE );
ASSERT( nodeunq == NODEUNQALCD );
*hkpp = HKPMKHKP( nodegen, NODEUNQFREE );
#endif /* NODECHK */
/* put node on free list
*/
linkagep = ( nh_t * )p;
*linkagep = node_hdrp->nh_freenh;
node_hdrp->nh_freenh = nh;
/* map out
*/
node_unmap_internal( nh, ( void ** )&p, BOOL_TRUE );
/* invalidate caller's handle
*/
*nhp = NH_NULL;
}