blob: 909f447128d2a9205827fd516baa4f6cedf965a8 [file] [log] [blame]
/*
* linux/fs/nfs/read.c
*
* Block I/O for NFS
*
* Partial copy of Linus' read cache modifications to fs/nfs/file.c
* modified for async RPC by okir@monad.swb.de
*
* We do an ugly hack here in order to return proper error codes to the
* user program when a read request failed: since generic_file_read
* only checks the return value of inode->i_op->readpage() which is always 0
* for async RPC, we set the error bit of the page to 1 when an error occurs,
* and make nfs_readpage transmit requests synchronously when encountering this.
* This is only a small problem, though, since we now retry all operations
* within the RPC code when root squashing is suspected.
*/
#include <linux/config.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/nfs_flushd.h>
#include <linux/smp_lock.h>
#include <asm/system.h>
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
struct nfs_read_data {
struct rpc_task task;
struct inode *inode;
struct rpc_cred *cred;
struct nfs_readargs args; /* XDR argument struct */
struct nfs_readres res; /* ... and result struct */
struct nfs_fattr fattr; /* fattr storage */
struct list_head pages; /* Coalesced read requests */
};
/*
* Local function declarations
*/
static void nfs_readpage_result(struct rpc_task *task);
/* Hack for future NFS swap support */
#ifndef IS_SWAPFILE
# define IS_SWAPFILE(inode) (0)
#endif
static kmem_cache_t *nfs_rdata_cachep;
static __inline__ struct nfs_read_data *nfs_readdata_alloc(void)
{
struct nfs_read_data *p;
p = kmem_cache_alloc(nfs_rdata_cachep, SLAB_NOFS);
if (p) {
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages);
}
return p;
}
static __inline__ void nfs_readdata_free(struct nfs_read_data *p)
{
kmem_cache_free(nfs_rdata_cachep, p);
}
static void nfs_readdata_release(struct rpc_task *task)
{
struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata;
nfs_readdata_free(data);
}
/*
* Read a page synchronously.
*/
static int
nfs_readpage_sync(struct file *file, struct inode *inode, struct page *page)
{
struct rpc_cred *cred = NULL;
struct nfs_fattr fattr;
loff_t offset = page_offset(page);
char *buffer;
int rsize = NFS_SERVER(inode)->rsize;
int result;
int count = PAGE_CACHE_SIZE;
int flags = IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0;
int eof;
dprintk("NFS: nfs_readpage_sync(%p)\n", page);
if (file)
cred = nfs_file_cred(file);
/*
* This works now because the socket layer never tries to DMA
* into this buffer directly.
*/
buffer = kmap(page);
do {
if (count < rsize)
rsize = count;
dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Ld, %d, %p)\n",
NFS_SERVER(inode)->hostname,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
(long long)offset, rsize, buffer);
lock_kernel();
result = NFS_PROTO(inode)->read(inode, cred, &fattr, flags,
offset, rsize, buffer, &eof);
nfs_refresh_inode(inode, &fattr);
unlock_kernel();
/*
* Even if we had a partial success we can't mark the page
* cache valid.
*/
if (result < 0) {
if (result == -EISDIR)
result = -EINVAL;
goto io_error;
}
count -= result;
offset += result;
buffer += result;
if (result < rsize) /* NFSv2ism */
break;
} while (count);
memset(buffer, 0, count);
flush_dcache_page(page);
SetPageUptodate(page);
if (PageError(page))
ClearPageError(page);
result = 0;
io_error:
kunmap(page);
UnlockPage(page);
return result;
}
/*
* Add a request to the inode's asynchronous read list.
*/
static inline void
nfs_mark_request_read(struct nfs_page *req)
{
struct inode *inode = req->wb_inode;
struct nfs_inode *nfsi = NFS_I(inode);
spin_lock(&nfs_wreq_lock);
nfs_list_add_request(req, &nfsi->read);
nfsi->nread++;
__nfs_add_lru(&NFS_SERVER(inode)->lru_read, req);
spin_unlock(&nfs_wreq_lock);
}
static int
nfs_readpage_async(struct file *file, struct inode *inode, struct page *page)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_page *new;
new = nfs_create_request(nfs_file_cred(file), inode, page, 0, PAGE_CACHE_SIZE);
if (IS_ERR(new))
return PTR_ERR(new);
nfs_mark_request_read(new);
if (nfsi->nread >= NFS_SERVER(inode)->rpages ||
page_index(page) == (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
nfs_pagein_inode(inode, 0, 0);
return 0;
}
/*
* Set up the NFS read request struct
*/
static void
nfs_read_rpcsetup(struct list_head *head, struct nfs_read_data *data)
{
struct nfs_page *req;
struct iovec *iov;
unsigned int count;
iov = data->args.iov;
count = 0;
while (!list_empty(head)) {
struct nfs_page *req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
nfs_list_add_request(req, &data->pages);
iov->iov_base = kmap(req->wb_page) + req->wb_offset;
iov->iov_len = req->wb_bytes;
count += req->wb_bytes;
iov++;
data->args.nriov++;
}
req = nfs_list_entry(data->pages.next);
data->inode = req->wb_inode;
data->cred = req->wb_cred;
data->args.fh = NFS_FH(req->wb_inode);
data->args.offset = page_offset(req->wb_page) + req->wb_offset;
data->args.count = count;
data->res.fattr = &data->fattr;
data->res.count = count;
data->res.eof = 0;
}
static void
nfs_async_read_error(struct list_head *head)
{
struct nfs_page *req;
struct page *page;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
page = req->wb_page;
nfs_list_remove_request(req);
SetPageError(page);
UnlockPage(page);
nfs_clear_request(req);
nfs_release_request(req);
nfs_unlock_request(req);
}
}
static int
nfs_pagein_one(struct list_head *head, struct inode *inode)
{
struct rpc_task *task;
struct rpc_clnt *clnt = NFS_CLIENT(inode);
struct nfs_read_data *data;
struct rpc_message msg;
int flags;
sigset_t oldset;
data = nfs_readdata_alloc();
if (!data)
goto out_bad;
task = &data->task;
/* N.B. Do we need to test? Never called for swapfile inode */
flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
nfs_read_rpcsetup(head, data);
/* Finalize the task. */
rpc_init_task(task, clnt, nfs_readpage_result, flags);
task->tk_calldata = data;
/* Release requests */
task->tk_release = nfs_readdata_release;
#ifdef CONFIG_NFS_V3
msg.rpc_proc = (NFS_PROTO(inode)->version == 3) ? NFS3PROC_READ : NFSPROC_READ;
#else
msg.rpc_proc = NFSPROC_READ;
#endif
msg.rpc_argp = &data->args;
msg.rpc_resp = &data->res;
msg.rpc_cred = data->cred;
/* Start the async call */
dprintk("NFS: %4d initiated read call (req %s/%Ld count %d nriov %d.\n",
task->tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
data->args.count, data->args.nriov);
rpc_clnt_sigmask(clnt, &oldset);
rpc_call_setup(task, &msg, 0);
lock_kernel();
rpc_execute(task);
unlock_kernel();
rpc_clnt_sigunmask(clnt, &oldset);
return 0;
out_bad:
nfs_async_read_error(head);
return -ENOMEM;
}
int
nfs_pagein_list(struct list_head *head, int rpages)
{
LIST_HEAD(one_request);
struct nfs_page *req;
int error = 0;
unsigned int pages = 0;
while (!list_empty(head)) {
pages += nfs_coalesce_requests(head, &one_request, rpages);
req = nfs_list_entry(one_request.next);
error = nfs_pagein_one(&one_request, req->wb_inode);
if (error < 0)
break;
}
if (error >= 0)
return pages;
nfs_async_read_error(head);
return error;
}
/**
* nfs_scan_lru_read_timeout - Scan LRU list for timed out read requests
* @server: NFS superblock data
* @dst: destination list
*
* Moves a maximum of 'rpages' timed out requests from the NFS read LRU list.
* The elements are checked to ensure that they form a contiguous set
* of pages, and that they originated from the same file.
*/
int
nfs_scan_lru_read_timeout(struct nfs_server *server, struct list_head *dst)
{
struct nfs_inode *nfsi;
int npages;
npages = nfs_scan_lru_timeout(&server->lru_read, dst, server->rpages);
if (npages) {
nfsi = NFS_I(nfs_list_entry(dst->next)->wb_inode);
nfsi->nread -= npages;
}
return npages;
}
/**
* nfs_scan_lru_read - Scan LRU list for read requests
* @server: NFS superblock data
* @dst: destination list
*
* Moves a maximum of 'rpages' requests from the NFS read LRU list.
* The elements are checked to ensure that they form a contiguous set
* of pages, and that they originated from the same file.
*/
int
nfs_scan_lru_read(struct nfs_server *server, struct list_head *dst)
{
struct nfs_inode *nfsi;
int npages;
npages = nfs_scan_lru(&server->lru_read, dst, server->rpages);
if (npages) {
nfsi = NFS_I(nfs_list_entry(dst->next)->wb_inode);
nfsi->nread -= npages;
}
return npages;
}
/*
* nfs_scan_read - Scan an inode for read requests
* @inode: NFS inode to scan
* @dst: destination list
* @idx_start: lower bound of page->index to scan
* @npages: idx_start + npages sets the upper bound to scan
*
* Moves requests from the inode's read list.
* The requests are *not* checked to ensure that they form a contiguous set.
*/
static int
nfs_scan_read(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
{
struct nfs_inode *nfsi = NFS_I(inode);
int res;
res = nfs_scan_list(&nfsi->read, dst, NULL, idx_start, npages);
nfsi->nread -= res;
if ((nfsi->nread == 0) != list_empty(&nfsi->read))
printk(KERN_ERR "NFS: desynchronized value of nfs_i.nread.\n");
return res;
}
int nfs_pagein_inode(struct inode *inode, unsigned long idx_start,
unsigned int npages)
{
LIST_HEAD(head);
int res,
error = 0;
spin_lock(&nfs_wreq_lock);
res = nfs_scan_read(inode, &head, idx_start, npages);
spin_unlock(&nfs_wreq_lock);
if (res)
error = nfs_pagein_list(&head, NFS_SERVER(inode)->rpages);
if (error < 0)
return error;
return res;
}
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
*/
static void
nfs_readpage_result(struct rpc_task *task)
{
struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
struct inode *inode = data->inode;
unsigned int count = data->res.count;
dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
task->tk_pid, task->tk_status);
nfs_refresh_inode(inode, &data->fattr);
while (!list_empty(&data->pages)) {
struct nfs_page *req = nfs_list_entry(data->pages.next);
struct page *page = req->wb_page;
nfs_list_remove_request(req);
if (task->tk_status >= 0) {
if (count < PAGE_CACHE_SIZE) {
char *p = kmap(page);
memset(p + count, 0, PAGE_CACHE_SIZE - count);
kunmap(page);
count = 0;
} else
count -= PAGE_CACHE_SIZE;
SetPageUptodate(page);
} else
SetPageError(page);
flush_dcache_page(page);
kunmap(page);
UnlockPage(page);
dprintk("NFS: read (%s/%Ld %d@%Ld)\n",
req->wb_inode->i_sb->s_id,
(long long)NFS_FILEID(req->wb_inode),
req->wb_bytes,
(long long)(page_offset(page) + req->wb_offset));
nfs_clear_request(req);
nfs_release_request(req);
nfs_unlock_request(req);
}
}
/*
* Read a page over NFS.
* We read the page synchronously in the following cases:
* - The NFS rsize is smaller than PAGE_CACHE_SIZE. We could kludge our way
* around this by creating several consecutive read requests, but
* that's hardly worth it.
* - The error flag is set for this page. This happens only when a
* previous async read operation failed.
*/
int
nfs_readpage(struct file *file, struct page *page)
{
struct inode *inode = page->mapping->host;
int error;
dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
page, PAGE_CACHE_SIZE, page->index);
/*
* Try to flush any pending writes to the file..
*
* NOTE! Because we own the page lock, there cannot
* be any new pending writes generated at this point
* for this page (other pages can be written to).
*/
error = nfs_wb_page(inode, page);
if (error)
goto out_error;
if (!PageError(page) && NFS_SERVER(inode)->rsize >= PAGE_CACHE_SIZE) {
error = nfs_readpage_async(file, inode, page);
goto out;
}
error = nfs_readpage_sync(file, inode, page);
if (error < 0 && IS_SWAPFILE(inode))
printk("Aiee.. nfs swap-in of page failed!\n");
out:
return error;
out_error:
UnlockPage(page);
goto out;
}
int nfs_init_readpagecache(void)
{
nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
sizeof(struct nfs_read_data),
0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
if (nfs_rdata_cachep == NULL)
return -ENOMEM;
return 0;
}
void nfs_destroy_readpagecache(void)
{
if (kmem_cache_destroy(nfs_rdata_cachep))
printk(KERN_INFO "nfs_read_data: not all structures were freed\n");
}