blob: 0f826926c29648b65807fb958158da592336b98b [file] [log] [blame]
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2000 Stelias Computing, Inc.
* Copyright (C) 2000 Red Hat, Inc.
* Copyright (C) 2000 TurboLinux, Inc.
* Copyright (C) 2000 Los Alamos National Laboratory.
* Copyright (C) 2000, 2001 Tacit Networks, Inc.
* Copyright (C) 2000 Peter J. Braam
* Copyright (C) 2001 Mountain View Data, Inc.
* Copyright (C) 2001 Cluster File Systems, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* This file manages file I/O
*
*/
#include <stdarg.h>
#include <asm/bitops.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/ext2_fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/locks.h>
#include <linux/blkdev.h>
#include <linux/init.h>
#include <linux/smp_lock.h>
#define __NO_VERSION__
#include <linux/module.h>
#include <linux/intermezzo_fs.h>
#include <linux/intermezzo_psdev.h>
#include <linux/fsfilter.h>
/*
* these are initialized in super.c
*/
extern int presto_permission(struct inode *inode, int mask);
static int presto_open_upcall(int minor, struct dentry *de)
{
int rc = 0;
char *path, *buffer;
struct presto_file_set *fset;
int pathlen;
struct lento_vfs_context info;
struct presto_dentry_data *dd = presto_d2d(de);
PRESTO_ALLOC(buffer, PAGE_SIZE);
if ( !buffer ) {
CERROR("PRESTO: out of memory!\n");
return -ENOMEM;
}
fset = presto_fset(de);
path = presto_path(de, fset->fset_dentry, buffer, PAGE_SIZE);
pathlen = MYPATHLEN(buffer, path);
CDEBUG(D_FILE, "de %p, dd %p\n", de, dd);
if (dd->remote_ino == 0) {
rc = presto_get_fileid(minor, fset, de);
}
memset (&info, 0, sizeof(info));
if (dd->remote_ino > 0) {
info.remote_ino = dd->remote_ino;
info.remote_generation = dd->remote_generation;
} else
CERROR("get_fileid failed %d, ino: %Lx, fetching by name\n", rc,
dd->remote_ino);
rc = izo_upc_open(minor, pathlen, path, fset->fset_name, &info);
PRESTO_FREE(buffer, PAGE_SIZE);
return rc;
}
static inline int open_check_dod(struct file *file,
struct presto_file_set *fset)
{
int gen, is_iopen = 0, minor;
struct presto_cache *cache = fset->fset_cache;
ino_t inum;
minor = presto_c2m(cache);
if ( ISLENTO(minor) ) {
CDEBUG(D_CACHE, "is lento, not doing DOD.\n");
return 0;
}
/* Files are only ever opened by inode during backfetches, when by
* definition we have the authoritative copy of the data. No DOD. */
is_iopen = izo_dentry_is_ilookup(file->f_dentry, &inum, &gen);
if (is_iopen) {
CDEBUG(D_CACHE, "doing iopen, not doing DOD.\n");
return 0;
}
if (!(fset->fset_flags & FSET_DATA_ON_DEMAND)) {
CDEBUG(D_CACHE, "fileset not on demand.\n");
return 0;
}
if (file->f_flags & O_TRUNC) {
CDEBUG(D_CACHE, "fileset dod: O_TRUNC.\n");
return 0;
}
if (presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL)) {
CDEBUG(D_CACHE, "file under .intermezzo, not doing DOD\n");
return 0;
}
if (presto_chk(file->f_dentry, PRESTO_DATA)) {
CDEBUG(D_CACHE, "PRESTO_DATA is set, not doing DOD.\n");
return 0;
}
if (cache->cache_filter->o_trops->tr_all_data(file->f_dentry->d_inode)) {
CDEBUG(D_CACHE, "file not sparse, not doing DOD.\n");
return 0;
}
return 1;
}
static int presto_file_open(struct inode *inode, struct file *file)
{
int rc = 0;
struct file_operations *fops;
struct presto_cache *cache;
struct presto_file_set *fset;
struct presto_file_data *fdata;
int writable = (file->f_flags & (O_RDWR | O_WRONLY));
int minor, i;
ENTRY;
if (presto_prep(file->f_dentry, &cache, &fset) < 0) {
EXIT;
return -EBADF;
}
minor = presto_c2m(cache);
CDEBUG(D_CACHE, "DATA_OK: %d, ino: %ld, islento: %d\n",
presto_chk(file->f_dentry, PRESTO_DATA), inode->i_ino,
ISLENTO(minor));
if ( !ISLENTO(minor) && (file->f_flags & O_RDWR ||
file->f_flags & O_WRONLY)) {
CDEBUG(D_CACHE, "calling presto_get_permit\n");
if ( presto_get_permit(inode) < 0 ) {
EXIT;
return -EROFS;
}
presto_put_permit(inode);
}
if (open_check_dod(file, fset)) {
CDEBUG(D_CACHE, "presto_open_upcall\n");
CDEBUG(D_CACHE, "dentry: %p setting DATA, ATTR\n", file->f_dentry);
presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA);
rc = presto_open_upcall(minor, file->f_dentry);
if (rc) {
EXIT;
CERROR("%s: returning error %d\n", __FUNCTION__, rc);
return rc;
}
}
/* file was truncated upon open: do not refetch */
if (file->f_flags & O_TRUNC) {
CDEBUG(D_CACHE, "setting DATA, ATTR\n");
presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA);
}
fops = filter_c2cffops(cache->cache_filter);
if ( fops->open ) {
CDEBUG(D_CACHE, "calling fs open\n");
rc = fops->open(inode, file);
if (rc) {
EXIT;
return rc;
}
}
if (writable) {
PRESTO_ALLOC(fdata, sizeof(*fdata));
if (!fdata) {
EXIT;
return -ENOMEM;
}
/* LOCK: XXX check that the kernel lock protects this alloc */
fdata->fd_do_lml = 0;
fdata->fd_bytes_written = 0;
fdata->fd_fsuid = current->fsuid;
fdata->fd_fsgid = current->fsgid;
fdata->fd_mode = file->f_dentry->d_inode->i_mode;
fdata->fd_uid = file->f_dentry->d_inode->i_uid;
fdata->fd_gid = file->f_dentry->d_inode->i_gid;
fdata->fd_ngroups = current->ngroups;
for (i=0 ; i < current->ngroups ; i++)
fdata->fd_groups[i] = current->groups[i];
if (!ISLENTO(minor))
fdata->fd_info.flags = LENTO_FL_KML;
else {
/* this is for the case of DOD,
reint_close will adjust flags if needed */
fdata->fd_info.flags = 0;
}
presto_getversion(&fdata->fd_version, inode);
file->private_data = fdata;
} else {
file->private_data = NULL;
}
EXIT;
return 0;
}
int presto_adjust_lml(struct file *file, struct lento_vfs_context *info)
{
struct presto_file_data *fdata =
(struct presto_file_data *) file->private_data;
if (!fdata) {
EXIT;
return -EINVAL;
}
memcpy(&fdata->fd_info, info, sizeof(*info));
EXIT;
return 0;
}
static int presto_file_release(struct inode *inode, struct file *file)
{
int rc;
struct file_operations *fops;
struct presto_cache *cache;
struct presto_file_set *fset;
struct presto_file_data *fdata =
(struct presto_file_data *)file->private_data;
ENTRY;
rc = presto_prep(file->f_dentry, &cache, &fset);
if ( rc ) {
EXIT;
return rc;
}
fops = filter_c2cffops(cache->cache_filter);
if (fops && fops->release)
rc = fops->release(inode, file);
CDEBUG(D_CACHE, "islento = %d (minor %d), rc %d, data %p\n",
ISLENTO(cache->cache_psdev->uc_minor),
cache->cache_psdev->uc_minor, rc, fdata);
/* this file was modified: ignore close errors, write KML */
if (fdata && fdata->fd_do_lml) {
/* XXX: remove when lento gets file granularity cd */
if ( presto_get_permit(inode) < 0 ) {
EXIT;
return -EROFS;
}
fdata->fd_info.updated_time = file->f_dentry->d_inode->i_mtime;
rc = presto_do_close(fset, file);
presto_put_permit(inode);
}
if (!rc && fdata) {
PRESTO_FREE(fdata, sizeof(*fdata));
file->private_data = NULL;
}
EXIT;
return rc;
}
static void presto_apply_write_policy(struct file *file,
struct presto_file_set *fset, loff_t res)
{
struct presto_file_data *fdata =
(struct presto_file_data *)file->private_data;
struct presto_cache *cache = fset->fset_cache;
struct presto_version new_file_ver;
int error;
struct rec_info rec;
/* Here we do a journal close after a fixed or a specified
amount of KBytes, currently a global parameter set with
sysctl. If files are open for a long time, this gives added
protection. (XXX todo: per cache, add ioctl, handle
journaling in a thread, add more options etc.)
*/
if ((fset->fset_flags & FSET_JCLOSE_ON_WRITE) &&
(!ISLENTO(cache->cache_psdev->uc_minor))) {
fdata->fd_bytes_written += res;
if (fdata->fd_bytes_written >= fset->fset_file_maxio) {
presto_getversion(&new_file_ver,
file->f_dentry->d_inode);
/* This is really heavy weight and should be fixed
ASAP. At most we should be recording the number
of bytes written and not locking the kernel,
wait for permits, etc, on the write path. SHP
*/
lock_kernel();
if ( presto_get_permit(file->f_dentry->d_inode) < 0 ) {
EXIT;
/* we must be disconnected, not to worry */
unlock_kernel();
return;
}
error = presto_journal_close(&rec, fset, file,
file->f_dentry,
&fdata->fd_version,
&new_file_ver);
presto_put_permit(file->f_dentry->d_inode);
unlock_kernel();
if ( error ) {
CERROR("presto_close: cannot journal close\n");
/* XXX these errors are really bad */
/* panic(); */
return;
}
fdata->fd_bytes_written = 0;
}
}
}
static ssize_t presto_file_write(struct file *file, const char *buf,
size_t size, loff_t *off)
{
struct rec_info rec;
int error;
struct presto_cache *cache;
struct presto_file_set *fset;
struct file_operations *fops;
ssize_t res;
int do_lml_here;
void *handle = NULL;
unsigned long blocks;
struct presto_file_data *fdata;
loff_t res_size;
error = presto_prep(file->f_dentry, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
blocks = (size >> file->f_dentry->d_inode->i_sb->s_blocksize_bits) + 1;
/* XXX 3 is for ext2 indirect blocks ... */
res_size = 2 * PRESTO_REQHIGH + ((blocks+3)
<< file->f_dentry->d_inode->i_sb->s_blocksize_bits);
error = presto_reserve_space(fset->fset_cache, res_size);
CDEBUG(D_INODE, "Reserved %Ld for %d\n", res_size, size);
if ( error ) {
EXIT;
return -ENOSPC;
}
CDEBUG(D_INODE, "islento %d, minor: %d\n",
ISLENTO(cache->cache_psdev->uc_minor),
cache->cache_psdev->uc_minor);
/*
* XXX this lock should become a per inode lock when
* Vinny's changes are in; we could just use i_sem.
*/
read_lock(&fset->fset_lml.fd_lock);
fdata = (struct presto_file_data *)file->private_data;
do_lml_here = size && (fdata->fd_do_lml == 0) &&
!presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL);
if (do_lml_here)
fdata->fd_do_lml = 1;
read_unlock(&fset->fset_lml.fd_lock);
/* XXX
There might be a bug here. We need to make
absolutely sure that the ext3_file_write commits
after our transaction that writes the LML record.
Nesting the file write helps if new blocks are allocated.
*/
res = 0;
if (do_lml_here) {
struct presto_version file_version;
/* handle different space reqs from file system below! */
handle = presto_trans_start(fset, file->f_dentry->d_inode,
KML_OPCODE_WRITE);
if ( IS_ERR(handle) ) {
presto_release_space(fset->fset_cache, res_size);
CERROR("presto_write: no space for transaction\n");
return -ENOSPC;
}
presto_getversion(&file_version, file->f_dentry->d_inode);
res = presto_write_lml_close(&rec, fset, file,
fdata->fd_info.remote_ino,
fdata->fd_info.remote_generation,
&fdata->fd_info.remote_version,
&file_version);
fdata->fd_lml_offset = rec.offset;
if ( res ) {
CERROR("intermezzo: PANIC failed to write LML\n");
*(int *)0 = 1;
EXIT;
goto exit_write;
}
presto_trans_commit(fset, handle);
}
fops = filter_c2cffops(cache->cache_filter);
res = fops->write(file, buf, size, off);
if ( res != size ) {
CDEBUG(D_FILE, "file write returns short write: size %d, res %d\n", size, res);
}
if ( (res > 0) && fdata )
presto_apply_write_policy(file, fset, res);
exit_write:
presto_release_space(fset->fset_cache, res_size);
return res;
}
struct file_operations presto_file_fops = {
.write = presto_file_write,
.open = presto_file_open,
.release = presto_file_release,
.ioctl = presto_ioctl
};
struct inode_operations presto_file_iops = {
.permission = presto_permission,
.setattr = presto_setattr,
#ifdef CONFIG_FS_EXT_ATTR
.set_ext_attr = presto_set_ext_attr,
#endif
};
/* FIXME: I bet we want to add a lock here and in presto_file_open. */
int izo_purge_file(struct presto_file_set *fset, char *file)
{
#if 0
void *handle = NULL;
char *path = NULL;
struct nameidata nd;
struct dentry *dentry;
int rc = 0, len;
loff_t oldsize;
/* FIXME: not mtpt it's gone */
len = strlen(fset->fset_cache->cache_mtpt) + strlen(file) + 1;
PRESTO_ALLOC(path, len + 1);
if (path == NULL)
return -1;
sprintf(path, "%s/%s", fset->fset_cache->cache_mtpt, file);
rc = izo_lookup_file(fset, path, &nd);
if (rc)
goto error;
dentry = nd.dentry;
/* FIXME: take a lock here */
if (dentry->d_inode->i_atime > CURRENT_TIME - 5) {
/* We lost the race; this file was accessed while we were doing
* ioctls and lookups and whatnot. */
rc = -EBUSY;
goto error_unlock;
}
/* FIXME: Check if this file is open. */
handle = presto_trans_start(fset, dentry->d_inode, KML_OPCODE_TRUNC);
if (IS_ERR(handle)) {
rc = -ENOMEM;
goto error_unlock;
}
/* FIXME: Write LML record */
oldsize = dentry->d_inode->i_size;
rc = izo_do_truncate(fset, dentry, 0, oldsize);
if (rc != 0)
goto error_clear;
rc = izo_do_truncate(fset, dentry, oldsize, 0);
if (rc != 0)
goto error_clear;
error_clear:
/* FIXME: clear LML record */
error_unlock:
/* FIXME: release the lock here */
error:
if (handle != NULL && !IS_ERR(handle))
presto_trans_commit(fset, handle);
if (path != NULL)
PRESTO_FREE(path, len + 1);
return rc;
#else
return 0;
#endif
}