blob: 0f3b57342965c0b06ae997276454bcbbb8a4aaca [file] [log] [blame]
/*
* Copyright (c) 2000-2001 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <xfs/xfs.h>
#include <xfs/jdm.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <time.h>
#include <stdlib.h>
#include <memory.h>
#include <errno.h>
#include "types.h"
#include "mlog.h"
#include "qlock.h"
#include "mmap.h"
#include "win.h"
extern size_t pgsz;
extern size_t pgmask;
/* number of entries to add to the segmap if
* it needs to be resized
*/
#define SEGMAP_INCR 16
/*
* critical region
*/
#define CRITICAL_BEGIN() if (!locksoffpr) qlock_lock( tranp->t_qlockh )
#define CRITICAL_END() if (!locksoffpr) qlock_unlock( tranp->t_qlockh )
/* window descriptor
*/
struct win {
segix_t w_segix;
/* index of segment mapped by this window
*/
void *w_p;
/* window virtual address
*/
size_t w_refcnt;
/* reference count
*/
struct win *w_nextp;
/* LRU list forward linkage
*/
struct win *w_prevp;
/* LRU list backward linkage
*/
};
typedef struct win win_t;
/* forward declarations
*/
static void win_segmap_resize( segix_t segix );
/* transient state
*/
struct tran {
intgen_t t_fd;
/* file descriptor of backing store to be windowed
*/
off64_t t_firstoff;
/* offset of first seg within backing store (for mmap( ))
*/
size64_t t_segsz;
/* backing store segment / window size
*/
size_t t_winmax;
/* maximum number of windows to allocate
*/
size_t t_wincnt;
/* number of windows allocated
*/
size_t t_winmmaps;
/* number of window mmap calls made
*/
win_t *t_lruheadp;
/* LRU head (re-use from this end)
*/
win_t *t_lrutailp;
/* LRU tail (put here when no refs)
*/
win_t **t_segmap;
/* mapping from segment index to window. an entry
* points to a win_t struct if segment is currently
* mapped, otherwise the entry is NULL.
*/
size_t t_segmaplen;
/* number of segments currently represented in
* t_segmap array.
*/
qlockh_t t_qlockh;
/* for establishing critical regions
*/
};
typedef struct tran tran_t;
static tran_t *tranp = 0;
static bool_t locksoffpr = BOOL_FALSE;
/*
* assumes called in region where only 1 thread can execute it
*/
void
win_locks_off(void)
{
locksoffpr = BOOL_TRUE;
}
/*
* assumes called in region where only 1 thread can execute it
*/
void
win_locks_on(void)
{
locksoffpr = BOOL_FALSE;
}
/*
* tell me how many windows I used for the tree
*/
size_t
win_getnum_mmaps(void)
{
return tranp->t_winmmaps;
}
void
win_init( intgen_t fd,
off64_t firstoff,
size64_t segsz,
size_t winmax )
{
/* validate parameters
*/
ASSERT( ( firstoff & ( off64_t )pgmask ) == 0 );
ASSERT( ( segsz & pgmask ) == 0 );
/* allocate and initialize transient state
*/
ASSERT( tranp == 0 );
tranp = ( tran_t * )calloc( 1, sizeof( tran_t ));
ASSERT( tranp );
tranp->t_fd = fd;
tranp->t_firstoff = firstoff;
tranp->t_segsz = segsz;
tranp->t_winmax = winmax;
tranp->t_segmaplen = SEGMAP_INCR;
tranp->t_segmap = (win_t **)
calloc( tranp->t_segmaplen, sizeof(win_t *) );
ASSERT( tranp->t_segmap );
/* initialize critical region enforcer
*/
tranp->t_qlockh = qlock_alloc( QLOCK_ORD_WIN );
}
void
win_map( segix_t segix, void **pp )
{
off64_t segoff;
win_t *winp;
CRITICAL_BEGIN();
#ifdef TREE_DEBUG
mlog(MLOG_DEBUG | MLOG_TREE | MLOG_NOLOCK,
"win_map(segix=%u,addr=%p)\n", segix, pp);
#endif
/* resize the array if necessary */
if ( segix >= tranp->t_segmaplen )
win_segmap_resize( segix );
/* see if segment already mapped. if ref cnt zero,
* remove from LRU list.
*/
winp = tranp->t_segmap[segix];
if ( winp ) {
#ifdef TREE_DEBUG
mlog(MLOG_DEBUG | MLOG_TREE | MLOG_NOLOCK,
"win_map(): requested segment already mapped\n");
#endif
if ( winp->w_refcnt == 0 ) {
ASSERT( tranp->t_lruheadp );
ASSERT( tranp->t_lrutailp );
if ( tranp->t_lruheadp == winp ) {
if ( tranp->t_lrutailp == winp ) {
tranp->t_lruheadp = 0;
tranp->t_lrutailp = 0;
} else {
tranp->t_lruheadp = winp->w_nextp;
tranp->t_lruheadp->w_prevp = 0;
}
} else {
if ( tranp->t_lrutailp == winp ) {
tranp->t_lrutailp = winp->w_prevp;
tranp->t_lrutailp->w_nextp = 0;
} else {
winp->w_prevp->w_nextp = winp->w_nextp;
winp->w_nextp->w_prevp = winp->w_prevp;
}
}
winp->w_prevp = 0;
winp->w_nextp = 0;
} else {
ASSERT( ! winp->w_prevp );
ASSERT( ! winp->w_nextp );
}
winp->w_refcnt++;
*pp = winp->w_p;
CRITICAL_END();
return;
}
/* Allocate a new descriptor if we haven't yet hit the maximum,
* otherwise reuse any descriptor on the LRU list.
*/
if ( tranp->t_wincnt < tranp->t_winmax ) {
#ifdef TREE_DEBUG
mlog(MLOG_DEBUG | MLOG_TREE | MLOG_NOLOCK,
"win_map(): create a new window\n");
#endif
winp = ( win_t * )calloc( 1, sizeof( win_t ));
ASSERT( winp );
tranp->t_wincnt++;
} else if ( tranp->t_lruheadp ) {
/* REFERENCED */
intgen_t rval;
#ifdef TREE_DEBUG
mlog(MLOG_DEBUG | MLOG_TREE | MLOG_NOLOCK,
"win_map(): get head from lru freelist & unmap\n");
#endif
ASSERT( tranp->t_lrutailp );
winp = tranp->t_lruheadp;
tranp->t_lruheadp = winp->w_nextp;
if ( tranp->t_lruheadp ) {
tranp->t_lruheadp->w_prevp = 0;
} else {
tranp->t_lrutailp = 0;
}
tranp->t_segmap[winp->w_segix] = NULL;
rval = munmap( winp->w_p, tranp->t_segsz );
ASSERT( ! rval );
memset( ( void * )winp, 0, sizeof( win_t ));
} else {
ASSERT( tranp->t_wincnt == tranp->t_winmax );
*pp = NULL;
CRITICAL_END();
mlog( MLOG_NORMAL | MLOG_WARNING, _(
"all map windows in use. Check virtual memory limits\n"));
return;
}
/* calculate offset of segment
*/
segoff = segix * ( off64_t )tranp->t_segsz;
/* map the window
*/
ASSERT( tranp->t_segsz >= 1 );
ASSERT( tranp->t_firstoff
<=
OFF64MAX - segoff - ( off64_t )tranp->t_segsz + 1ll );
ASSERT( ! ( tranp->t_segsz % pgsz ));
ASSERT( ! ( ( tranp->t_firstoff + segoff ) % ( off64_t )pgsz ));
#ifdef TREE_DEBUG
mlog(MLOG_DEBUG | MLOG_TREE | MLOG_NOLOCK,
"win_map(): mmap segment at %lld, size = %llu\n",
( off64_t )( tranp->t_firstoff + segoff ), tranp->t_segsz);
#endif
tranp->t_winmmaps++;
winp->w_p = mmap_autogrow(
tranp->t_segsz,
tranp->t_fd,
( off64_t )( tranp->t_firstoff + segoff ));
if ( winp->w_p == (void *)-1 ) {
int error = errno;
mlog( MLOG_NORMAL | MLOG_ERROR, _(
"win_map(): unable to map a node segment of size %d at %d: %s\n"),
tranp->t_segsz, tranp->t_firstoff + segoff,
strerror( error ));
tranp->t_wincnt--;
tranp->t_winmax--;
CRITICAL_END();
free(winp);
if (error == ENOMEM && tranp->t_lruheadp) {
mlog( MLOG_NORMAL | MLOG_ERROR,
_("win_map(): try to select a different win_t\n"));
win_map(segix, pp);
return;
}
*pp = NULL;
return;
}
winp->w_segix = segix;
ASSERT( winp->w_refcnt == 0 );
winp->w_refcnt++;
tranp->t_segmap[winp->w_segix] = winp;
*pp = winp->w_p;
CRITICAL_END();
}
void
win_unmap( segix_t segix, void **pp )
{
win_t *winp;
CRITICAL_BEGIN();
/* verify window mapped
*/
ASSERT( segix < tranp->t_segmaplen );
winp = tranp->t_segmap[segix];
ASSERT( winp );
/* validate p
*/
ASSERT( pp );
ASSERT( *pp );
ASSERT( *pp >= winp->w_p );
ASSERT( *pp < ( void * )( ( char * )( winp->w_p ) + tranp->t_segsz ));
/* decrement the reference count. if zero, place at tail of LRU list.
*/
ASSERT( winp->w_refcnt > 0 );
winp->w_refcnt--;
ASSERT( ! winp->w_prevp );
ASSERT( ! winp->w_nextp );
if ( winp->w_refcnt == 0 ) {
if ( tranp->t_lrutailp ) {
ASSERT( tranp->t_lruheadp );
winp->w_prevp = tranp->t_lrutailp;
tranp->t_lrutailp->w_nextp = winp;
tranp->t_lrutailp = winp;
} else {
ASSERT( ! tranp->t_lruheadp );
ASSERT( ! winp->w_prevp );
tranp->t_lruheadp = winp;
tranp->t_lrutailp = winp;
}
ASSERT( ! winp->w_nextp );
}
/* zero the caller's pointer
*/
*pp = 0;
CRITICAL_END();
}
static void
win_segmap_resize(segix_t segix)
{
size_t oldlen;
win_t **new_part;
oldlen = tranp->t_segmaplen;
tranp->t_segmaplen = segix + SEGMAP_INCR;
tranp->t_segmap = (win_t **)
realloc( tranp->t_segmap, tranp->t_segmaplen * sizeof(win_t *) );
ASSERT( tranp->t_segmap );
/* clear the new portion of the array */
new_part = tranp->t_segmap + oldlen;
memset( new_part, 0, (tranp->t_segmaplen - oldlen) * sizeof(win_t *) );
}