| From f9153e342e7b04acc8c76ff7d086124d9909aef3 Mon Sep 17 00:00:00 2001 |
| From: Mike Marshall <hubcap@omnibond.com> |
| Date: Wed, 8 Apr 2020 08:52:40 -0400 |
| Subject: [PATCH] orangefs: get rid of knob code... |
| |
| commit ec95f1dedc9c64ac5a8b0bdb7c276936c70fdedd upstream. |
| |
| Christoph Hellwig sent in a reversion of "orangefs: remember count |
| when reading." because: |
| |
| ->read_iter calls can race with each other and one or |
| more ->flush calls. Remove the the scheme to store the read |
| count in the file private data as is is completely racy and |
| can cause use after free or double free conditions |
| |
| Christoph's reversion caused Orangefs not to work or to compile. I |
| added a patch that fixed that, but intel's kbuild test robot pointed |
| out that sending Christoph's patch followed by my patch upstream, it |
| would break bisection because of the failure to compile. So I have |
| combined the reversion plus my patch... here's the commit message |
| that was in my patch: |
| |
| Logically, optimal Orangefs "pages" are 4 megabytes. Reading |
| large Orangefs files 4096 bytes at a time is like trying to |
| kick a dead whale down the beach. Before Christoph's "Revert |
| orangefs: remember count when reading." I tried to give users |
| a knob whereby they could, for example, use "count" in |
| read(2) or bs with dd(1) to get whatever they considered an |
| appropriate amount of bytes at a time from Orangefs and fill |
| as many page cache pages as they could at once. |
| |
| Without the racy code that Christoph reverted Orangefs won't |
| even compile, much less work. So this replaces the logic that |
| used the private file data that Christoph reverted with |
| a static number of bytes to read from Orangefs. |
| |
| I ran tests like the following to determine what a |
| reasonable static number of bytes might be: |
| |
| dd if=/pvfsmnt/asdf of=/dev/null count=128 bs=4194304 |
| dd if=/pvfsmnt/asdf of=/dev/null count=256 bs=2097152 |
| dd if=/pvfsmnt/asdf of=/dev/null count=512 bs=1048576 |
| . |
| . |
| . |
| dd if=/pvfsmnt/asdf of=/dev/null count=4194304 bs=128 |
| |
| Reads seem faster using the static number, so my "knob code" |
| wasn't just racy, it wasn't even a good idea... |
| |
| Signed-off-by: Mike Marshall <hubcap@omnibond.com> |
| Reported-by: kbuild test robot <lkp@intel.com> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c |
| index a35c17017210..59ec1cb0e158 100644 |
| --- a/fs/orangefs/file.c |
| +++ b/fs/orangefs/file.c |
| @@ -313,23 +313,8 @@ static ssize_t orangefs_file_read_iter(struct kiocb *iocb, |
| struct iov_iter *iter) |
| { |
| int ret; |
| - struct orangefs_read_options *ro; |
| - |
| orangefs_stats.reads++; |
| |
| - /* |
| - * Remember how they set "count" in read(2) or pread(2) or whatever - |
| - * users can use count as a knob to control orangefs io size and later |
| - * we can try to help them fill as many pages as possible in readpage. |
| - */ |
| - if (!iocb->ki_filp->private_data) { |
| - iocb->ki_filp->private_data = kmalloc(sizeof *ro, GFP_KERNEL); |
| - if (!iocb->ki_filp->private_data) |
| - return(ENOMEM); |
| - ro = iocb->ki_filp->private_data; |
| - ro->blksiz = iter->count; |
| - } |
| - |
| down_read(&file_inode(iocb->ki_filp)->i_rwsem); |
| ret = orangefs_revalidate_mapping(file_inode(iocb->ki_filp)); |
| if (ret) |
| @@ -598,12 +583,6 @@ static int orangefs_lock(struct file *filp, int cmd, struct file_lock *fl) |
| return rc; |
| } |
| |
| -static int orangefs_file_open(struct inode * inode, struct file *file) |
| -{ |
| - file->private_data = NULL; |
| - return generic_file_open(inode, file); |
| -} |
| - |
| static int orangefs_flush(struct file *file, fl_owner_t id) |
| { |
| /* |
| @@ -617,9 +596,6 @@ static int orangefs_flush(struct file *file, fl_owner_t id) |
| struct inode *inode = file->f_mapping->host; |
| int r; |
| |
| - kfree(file->private_data); |
| - file->private_data = NULL; |
| - |
| if (inode->i_state & I_DIRTY_TIME) { |
| spin_lock(&inode->i_lock); |
| inode->i_state &= ~I_DIRTY_TIME; |
| @@ -642,7 +618,7 @@ const struct file_operations orangefs_file_operations = { |
| .lock = orangefs_lock, |
| .unlocked_ioctl = orangefs_ioctl, |
| .mmap = orangefs_file_mmap, |
| - .open = orangefs_file_open, |
| + .open = generic_file_open, |
| .flush = orangefs_flush, |
| .release = orangefs_file_release, |
| .fsync = orangefs_fsync, |
| diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c |
| index 0c337d8bdaab..c9b3e5ec8221 100644 |
| --- a/fs/orangefs/inode.c |
| +++ b/fs/orangefs/inode.c |
| @@ -259,46 +259,19 @@ static int orangefs_readpage(struct file *file, struct page *page) |
| pgoff_t index; /* which page */ |
| struct page *next_page; |
| char *kaddr; |
| - struct orangefs_read_options *ro = file->private_data; |
| loff_t read_size; |
| - loff_t roundedup; |
| int buffer_index = -1; /* orangefs shared memory slot */ |
| int slot_index; /* index into slot */ |
| int remaining; |
| |
| /* |
| - * If they set some miniscule size for "count" in read(2) |
| - * (for example) then let's try to read a page, or the whole file |
| - * if it is smaller than a page. Once "count" goes over a page |
| - * then lets round up to the highest page size multiple that is |
| - * less than or equal to "count" and do that much orangefs IO and |
| - * try to fill as many pages as we can from it. |
| - * |
| - * "count" should be represented in ro->blksiz. |
| - * |
| - * inode->i_size = file size. |
| + * Get up to this many bytes from Orangefs at a time and try |
| + * to fill them into the page cache at once. Tests with dd made |
| + * this seem like a reasonable static number, if there was |
| + * interest perhaps this number could be made setable through |
| + * sysfs... |
| */ |
| - if (ro) { |
| - if (ro->blksiz < PAGE_SIZE) { |
| - if (inode->i_size < PAGE_SIZE) |
| - read_size = inode->i_size; |
| - else |
| - read_size = PAGE_SIZE; |
| - } else { |
| - roundedup = ((PAGE_SIZE - 1) & ro->blksiz) ? |
| - ((ro->blksiz + PAGE_SIZE) & ~(PAGE_SIZE -1)) : |
| - ro->blksiz; |
| - if (roundedup > inode->i_size) |
| - read_size = inode->i_size; |
| - else |
| - read_size = roundedup; |
| - |
| - } |
| - } else { |
| - read_size = PAGE_SIZE; |
| - } |
| - if (!read_size) |
| - read_size = PAGE_SIZE; |
| + read_size = 524288; |
| |
| if (PageDirty(page)) |
| orangefs_launder_page(page); |
| diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h |
| index 572dd29fbd54..884cdf134f3e 100644 |
| --- a/fs/orangefs/orangefs-kernel.h |
| +++ b/fs/orangefs/orangefs-kernel.h |
| @@ -239,10 +239,6 @@ struct orangefs_write_range { |
| kgid_t gid; |
| }; |
| |
| -struct orangefs_read_options { |
| - ssize_t blksiz; |
| -}; |
| - |
| extern struct orangefs_stats orangefs_stats; |
| |
| /* |
| -- |
| 2.27.0 |
| |