blob: 155419ccf1453607c6c74d52fe444b4811c94486 [file] [log] [blame]
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Author: Peter J. Braam <braam@clusterfs.com>
* Copyright (C) 1998 Stelias Computing Inc
* Copyright (C) 1999 Red Hat Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* This file implements basic routines supporting the semantics
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/locks.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>
#include <linux/smp_lock.h>
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
int presto_walk(const char *name, struct nameidata *nd)
{
int err;
/* we do not follow symlinks to support symlink operations
correctly. The vfs should always hand us resolved dentries
so we should not be required to use LOOKUP_FOLLOW. At the
reintegrating end, lento again should be working with the
resolved pathname and not the symlink. SHP
XXX: This code implies that direct symlinks do not work. SHP
*/
unsigned int flags = LOOKUP_POSITIVE;
ENTRY;
err = 0;
if (path_init(name, flags, nd))
err = path_walk(name, nd);
return err;
}
/* find the presto minor device for this inode */
int presto_i2m(struct inode *inode)
{
struct presto_cache *cache;
ENTRY;
cache = presto_get_cache(inode);
CDEBUG(D_PSDEV, "\n");
if ( !cache ) {
CERROR("PRESTO: BAD: cannot find cache for dev %d, ino %ld\n",
inode->i_dev, inode->i_ino);
EXIT;
return -1;
}
EXIT;
return cache->cache_psdev->uc_minor;
}
inline int presto_f2m(struct presto_file_set *fset)
{
return fset->fset_cache->cache_psdev->uc_minor;
}
inline int presto_c2m(struct presto_cache *cache)
{
return cache->cache_psdev->uc_minor;
}
/* XXX check this out */
struct presto_file_set *presto_path2fileset(const char *name)
{
struct nameidata nd;
struct presto_file_set *fileset;
int error;
ENTRY;
error = presto_walk(name, &nd);
if (!error) {
#if 0
error = do_revalidate(nd.dentry);
#endif
if (!error)
fileset = presto_fset(nd.dentry);
path_release(&nd);
EXIT;
} else
fileset = ERR_PTR(error);
EXIT;
return fileset;
}
/* check a flag on this dentry or fset root. Semantics:
- most flags: test if it is set
- PRESTO_ATTR, PRESTO_DATA return 1 if PRESTO_FSETINSYNC is set
*/
int presto_chk(struct dentry *dentry, int flag)
{
int minor;
struct presto_file_set *fset = presto_fset(dentry);
ENTRY;
minor = presto_i2m(dentry->d_inode);
if ( izo_channels[minor].uc_no_filter ) {
EXIT;
return ~0;
}
/* if the fileset is in sync DATA and ATTR are OK */
if ( fset &&
(flag == PRESTO_ATTR || flag == PRESTO_DATA) &&
(fset->fset_flags & FSET_INSYNC) ) {
CDEBUG(D_INODE, "fset in sync (ino %ld)!\n",
fset->fset_dentry->d_inode->i_ino);
EXIT;
return 1;
}
EXIT;
return (presto_d2d(dentry)->dd_flags & flag);
}
/* set a bit in the dentry flags */
void presto_set(struct dentry *dentry, int flag)
{
ENTRY;
if ( dentry->d_inode ) {
CDEBUG(D_INODE, "SET ino %ld, flag %x\n",
dentry->d_inode->i_ino, flag);
}
if ( presto_d2d(dentry) == NULL) {
CERROR("dentry without d_fsdata in presto_set: %p: %*s", dentry,
dentry->d_name.len, dentry->d_name.name);
BUG();
}
presto_d2d(dentry)->dd_flags |= flag;
EXIT;
}
/* given a path: complete the closes on the fset */
int lento_complete_closes(char *path)
{
struct nameidata nd;
struct dentry *dentry;
int error;
struct presto_file_set *fset;
ENTRY;
error = presto_walk(path, &nd);
if (error) {
EXIT;
return error;
}
dentry = nd.dentry;
error = -ENXIO;
if ( !presto_ispresto(dentry->d_inode) ) {
EXIT;
goto out_complete;
}
fset = presto_fset(dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
EXIT;
goto out_complete;
}
/* transactions and locking are internal to this function */
error = presto_complete_lml(fset);
EXIT;
out_complete:
path_release(&nd);
return error;
}
#if 0
/* given a path: write a close record and cancel an LML record, finally
call truncate LML. Lento is doing this so it goes in with uid/gid's
root.
*/
int lento_cancel_lml(char *path,
__u64 lml_offset,
__u64 remote_ino,
__u32 remote_generation,
__u32 remote_version,
struct lento_vfs_context *info)
{
struct nameidata nd;
struct rec_info rec;
struct dentry *dentry;
int error;
struct presto_file_set *fset;
void *handle;
struct presto_version new_ver;
ENTRY;
error = presto_walk(path, &nd);
if (error) {
EXIT;
return error;
}
dentry = nd.dentry;
error = -ENXIO;
if ( !presto_ispresto(dentry->d_inode) ) {
EXIT;
goto out_cancel_lml;
}
fset = presto_fset(dentry);
error=-EINVAL;
if (fset==NULL) {
CERROR("No fileset!\n");
EXIT;
goto out_cancel_lml;
}
/* this only requires a transaction below which is automatic */
handle = presto_trans_start(fset, dentry->d_inode, PRESTO_OP_RELEASE);
if ( IS_ERR(handle) ) {
error = -ENOMEM;
EXIT;
goto out_cancel_lml;
}
if (info->flags & LENTO_FL_CANCEL_LML) {
error = presto_clear_lml_close(fset, lml_offset);
if ( error ) {
presto_trans_commit(fset, handle);
EXIT;
goto out_cancel_lml;
}
}
if (info->flags & LENTO_FL_WRITE_KML) {
struct file file;
file.private_data = NULL;
file.f_dentry = dentry;
presto_getversion(&new_ver, dentry->d_inode);
error = presto_journal_close(&rec, fset, &file, dentry,
&new_ver);
if ( error ) {
EXIT;
presto_trans_commit(fset, handle);
goto out_cancel_lml;
}
}
if (info->flags & LENTO_FL_WRITE_EXPECT) {
error = presto_write_last_rcvd(&rec, fset, info);
if ( error < 0 ) {
EXIT;
presto_trans_commit(fset, handle);
goto out_cancel_lml;
}
}
presto_trans_commit(fset, handle);
if (info->flags & LENTO_FL_CANCEL_LML) {
presto_truncate_lml(fset);
}
out_cancel_lml:
EXIT;
path_release(&nd);
return error;
}
#endif
/* given a dentry, operate on the flags in its dentry. Used by downcalls */
int izo_mark_dentry(struct dentry *dentry, int and_flag, int or_flag,
int *res)
{
int error = 0;
if (presto_d2d(dentry) == NULL) {
CERROR("InterMezzo: no ddata for inode %ld in %s\n",
dentry->d_inode->i_ino, __FUNCTION__);
return -EINVAL;
}
CDEBUG(D_INODE, "inode: %ld, and flag %x, or flag %x, dd_flags %x\n",
dentry->d_inode->i_ino, and_flag, or_flag,
presto_d2d(dentry)->dd_flags);
presto_d2d(dentry)->dd_flags &= and_flag;
presto_d2d(dentry)->dd_flags |= or_flag;
if (res)
*res = presto_d2d(dentry)->dd_flags;
return error;
}
/* given a path, operate on the flags in its cache. Used by mark_ioctl */
int izo_mark_cache(struct dentry *dentry, int and_flag, int or_flag,
int *res)
{
struct presto_cache *cache;
if (presto_d2d(dentry) == NULL) {
CERROR("InterMezzo: no ddata for inode %ld in %s\n",
dentry->d_inode->i_ino, __FUNCTION__);
return -EINVAL;
}
CDEBUG(D_INODE, "inode: %ld, and flag %x, or flag %x, dd_flags %x\n",
dentry->d_inode->i_ino, and_flag, or_flag,
presto_d2d(dentry)->dd_flags);
cache = presto_get_cache(dentry->d_inode);
if ( !cache ) {
CERROR("PRESTO: BAD: cannot find cache in izo_mark_cache\n");
return -EBADF;
}
cache->cache_flags &= and_flag;
cache->cache_flags |= or_flag;
if (res)
*res = (int)cache->cache_flags;
return 0;
}
int presto_set_max_kml_size(const char *path, unsigned long max_size)
{
struct presto_file_set *fset;
ENTRY;
fset = presto_path2fileset(path);
if (IS_ERR(fset)) {
EXIT;
return PTR_ERR(fset);
}
fset->kml_truncate_size = max_size;
CDEBUG(D_CACHE, "KML truncate size set to %lu bytes for fset %s.\n",
max_size, path);
EXIT;
return 0;
}
int izo_mark_fset(struct dentry *dentry, int and_flag, int or_flag,
int * res)
{
struct presto_file_set *fset;
fset = presto_fset(dentry);
if ( !fset ) {
CERROR("PRESTO: BAD: cannot find cache in izo_mark_cache\n");
make_bad_inode(dentry->d_inode);
return -EBADF;
}
fset->fset_flags &= and_flag;
fset->fset_flags |= or_flag;
if (res)
*res = (int)fset->fset_flags;
return 0;
}
/* talk to Lento about the permit */
static int presto_permit_upcall(struct dentry *dentry)
{
int rc;
char *path, *buffer;
int pathlen;
int minor;
int fsetnamelen;
struct presto_file_set *fset = NULL;
ENTRY;
if ( (minor = presto_i2m(dentry->d_inode)) < 0) {
EXIT;
return -EINVAL;
}
fset = presto_fset(dentry);
if (!fset) {
EXIT;
return -ENOTCONN;
}
if ( !presto_lento_up(minor) ) {
if ( fset->fset_flags & FSET_STEAL_PERMIT ) {
EXIT;
return 0;
} else {
EXIT;
return -ENOTCONN;
}
}
PRESTO_ALLOC(buffer, PAGE_SIZE);
if ( !buffer ) {
CERROR("PRESTO: out of memory!\n");
EXIT;
return -ENOMEM;
}
path = presto_path(dentry, fset->fset_dentry, buffer, PAGE_SIZE);
pathlen = MYPATHLEN(buffer, path);
fsetnamelen = strlen(fset->fset_name);
rc = izo_upc_permit(minor, dentry, pathlen, path, fset->fset_name);
PRESTO_FREE(buffer, PAGE_SIZE);
EXIT;
return rc;
}
/* get a write permit for the fileset of this inode
* - if this returns a negative value there was an error
* - if 0 is returned the permit was already in the kernel -- or --
* Lento gave us the permit without reintegration
* - lento returns the number of records it reintegrated
*
* Note that if this fileset has branches, a permit will -never- to a normal
* process for writing in the data area (ie, outside of .intermezzo)
*/
int presto_get_permit(struct inode * inode)
{
struct dentry *de;
struct presto_file_set *fset;
int minor = presto_i2m(inode);
int rc = 0;
ENTRY;
if (minor < 0) {
EXIT;
return -1;
}
if ( ISLENTO(minor) ) {
EXIT;
return 0;
}
if (list_empty(&inode->i_dentry)) {
CERROR("No alias for inode %d\n", (int) inode->i_ino);
EXIT;
return -EINVAL;
}
de = list_entry(inode->i_dentry.next, struct dentry, d_alias);
if (presto_chk(de, PRESTO_DONT_JOURNAL)) {
EXIT;
return 0;
}
fset = presto_fset(de);
if ( !fset ) {
CERROR("Presto: no fileset in presto_get_permit!\n");
EXIT;
return -EINVAL;
}
if (fset->fset_flags & FSET_HAS_BRANCHES) {
EXIT;
return -EROFS;
}
spin_lock(&fset->fset_permit_lock);
if (fset->fset_flags & FSET_HASPERMIT) {
fset->fset_permit_count++;
CDEBUG(D_INODE, "permit count now %d, inode %lx\n",
fset->fset_permit_count, inode->i_ino);
spin_unlock(&fset->fset_permit_lock);
EXIT;
return 0;
}
/* Allow reintegration to proceed without locks -SHP */
fset->fset_permit_upcall_count++;
if (fset->fset_permit_upcall_count == 1) {
spin_unlock(&fset->fset_permit_lock);
rc = presto_permit_upcall(fset->fset_dentry);
spin_lock(&fset->fset_permit_lock);
fset->fset_permit_upcall_count--;
if (rc == 0) {
izo_mark_fset(fset->fset_dentry, ~0, FSET_HASPERMIT,
NULL);
fset->fset_permit_count++;
} else if (rc == ENOTCONN) {
CERROR("InterMezzo: disconnected operation. stealing permit.\n");
izo_mark_fset(fset->fset_dentry, ~0, FSET_HASPERMIT,
NULL);
fset->fset_permit_count++;
/* set a disconnected flag here to stop upcalls */
rc = 0;
} else {
CERROR("InterMezzo: presto_permit_upcall failed: %d\n", rc);
rc = -EROFS;
/* go to sleep here and try again? */
}
wake_up_interruptible(&fset->fset_permit_queue);
} else {
/* Someone is already doing an upcall; go to sleep. */
DECLARE_WAITQUEUE(wait, current);
spin_unlock(&fset->fset_permit_lock);
add_wait_queue(&fset->fset_permit_queue, &wait);
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
spin_lock(&fset->fset_permit_lock);
if (fset->fset_permit_upcall_count == 0)
break;
spin_unlock(&fset->fset_permit_lock);
if (signal_pending(current)) {
remove_wait_queue(&fset->fset_permit_queue,
&wait);
return -ERESTARTSYS;
}
schedule();
}
remove_wait_queue(&fset->fset_permit_queue, &wait);
/* We've been woken up: do we have the permit? */
if (fset->fset_flags & FSET_HASPERMIT)
/* FIXME: Is this the right thing? */
rc = -EAGAIN;
}
CDEBUG(D_INODE, "permit count now %d, ino %ld (likely 1), "
"rc %d\n", fset->fset_permit_count, inode->i_ino, rc);
spin_unlock(&fset->fset_permit_lock);
EXIT;
return rc;
}
int presto_put_permit(struct inode * inode)
{
struct dentry *de;
struct presto_file_set *fset;
int minor = presto_i2m(inode);
ENTRY;
if (minor < 0) {
EXIT;
return -1;
}
if ( ISLENTO(minor) ) {
EXIT;
return 0;
}
if (list_empty(&inode->i_dentry)) {
CERROR("No alias for inode %d\n", (int) inode->i_ino);
EXIT;
return -1;
}
de = list_entry(inode->i_dentry.next, struct dentry, d_alias);
fset = presto_fset(de);
if ( !fset ) {
CERROR("InterMezzo: no fileset in %s!\n", __FUNCTION__);
EXIT;
return -1;
}
if (presto_chk(de, PRESTO_DONT_JOURNAL)) {
EXIT;
return 0;
}
spin_lock(&fset->fset_permit_lock);
if (fset->fset_flags & FSET_HASPERMIT) {
if (fset->fset_permit_count > 0)
fset->fset_permit_count--;
else
CERROR("Put permit while permit count is 0, "
"inode %ld!\n", inode->i_ino);
} else {
fset->fset_permit_count = 0;
CERROR("InterMezzo: put permit while no permit, inode %ld, "
"flags %x!\n", inode->i_ino, fset->fset_flags);
}
CDEBUG(D_INODE, "permit count now %d, inode %ld\n",
fset->fset_permit_count, inode->i_ino);
if (fset->fset_flags & FSET_PERMIT_WAITING &&
fset->fset_permit_count == 0) {
CDEBUG(D_INODE, "permit count now 0, ino %ld, wake sleepers\n",
inode->i_ino);
wake_up_interruptible(&fset->fset_permit_queue);
}
spin_unlock(&fset->fset_permit_lock);
EXIT;
return 0;
}
void presto_getversion(struct presto_version * presto_version,
struct inode * inode)
{
presto_version->pv_mtime = (__u64)inode->i_mtime;
presto_version->pv_ctime = (__u64)inode->i_ctime;
presto_version->pv_size = (__u64)inode->i_size;
}
/* If uuid is non-null, it is the uuid of the peer that's making the revocation
* request. If it is null, this request was made locally, without external
* pressure to give up the permit. This most often occurs when a client
* starts up.
*
* FIXME: this function needs to be refactored slightly once we start handling
* multiple clients.
*/
int izo_revoke_permit(struct dentry *dentry, __u8 uuid[16])
{
struct presto_file_set *fset;
DECLARE_WAITQUEUE(wait, current);
int minor, rc;
ENTRY;
minor = presto_i2m(dentry->d_inode);
if (minor < 0) {
EXIT;
return -ENODEV;
}
fset = presto_fset(dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
spin_lock(&fset->fset_permit_lock);
if (fset->fset_flags & FSET_PERMIT_WAITING) {
CERROR("InterMezzo: Two processes are waiting on the same permit--this not yet supported! Aborting this particular permit request...\n");
EXIT;
spin_unlock(&fset->fset_permit_lock);
return -EINVAL;
}
if (fset->fset_permit_count == 0)
goto got_permit;
/* Something is still using this permit. Mark that we're waiting for it
* and go to sleep. */
rc = izo_mark_fset(dentry, ~0, FSET_PERMIT_WAITING, NULL);
spin_unlock(&fset->fset_permit_lock);
if (rc < 0) {
EXIT;
return rc;
}
add_wait_queue(&fset->fset_permit_queue, &wait);
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
spin_lock(&fset->fset_permit_lock);
if (fset->fset_permit_count == 0)
break;
spin_unlock(&fset->fset_permit_lock);
if (signal_pending(current)) {
/* FIXME: there must be a better thing to return... */
remove_wait_queue(&fset->fset_permit_queue, &wait);
EXIT;
return -ERESTARTSYS;
}
/* FIXME: maybe there should be a timeout here. */
schedule();
}
remove_wait_queue(&fset->fset_permit_queue, &wait);
got_permit:
/* By this point fset->fset_permit_count is zero and we're holding the
* lock. */
CDEBUG(D_CACHE, "InterMezzo: releasing permit inode %ld\n",
dentry->d_inode->i_ino);
if (uuid != NULL) {
rc = izo_upc_revoke_permit(minor, fset->fset_name, uuid);
if (rc < 0) {
spin_unlock(&fset->fset_permit_lock);
EXIT;
return rc;
}
}
izo_mark_fset(fset->fset_dentry, ~FSET_PERMIT_WAITING, 0, NULL);
izo_mark_fset(fset->fset_dentry, ~FSET_HASPERMIT, 0, NULL);
spin_unlock(&fset->fset_permit_lock);
EXIT;
return 0;
}
inline int presto_is_read_only(struct presto_file_set * fset)
{
int minor, mask;
struct presto_cache *cache = fset->fset_cache;
minor= cache->cache_psdev->uc_minor;
mask= (ISLENTO(minor)? FSET_LENTO_RO : FSET_CLIENT_RO);
if ( fset->fset_flags & mask )
return 1;
mask= (ISLENTO(minor)? CACHE_LENTO_RO : CACHE_CLIENT_RO);
return ((cache->cache_flags & mask)? 1 : 0);
}