fs/eulerfs/file.c - pub/scm/linux/kernel/git/colyli/openEuler-kernel - Git at Google

 // SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
  * only version 2 as published by the Free Software Foundation.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  * GNU General Public License for more details.
  */

 #include <linux/fs.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/uio.h>
 #include <linux/mm.h>
 #include <linux/uaccess.h>
 #include <linux/falloc.h>
 #include <asm/mman.h>
 #include "euler.h"
 #include "dax.h"
 #include "dep.h"

 static long eufs_fallocate(struct file *file, int mode, loff_t offset,
 			    loff_t len)
 {
 	struct inode *inode = file_inode(file);
 	struct eufs_inode_info *vi = EUFS_I(inode);
 	loff_t end = offset + len;
 	unsigned long start_page = offset >> PAGE_SHIFT,
 		      end_page = DIV_ROUND_UP(end, PAGE_SIZE);
 	bool zero = mode & FALLOC_FL_ZERO_RANGE;
 	bool keep = mode & FALLOC_FL_KEEP_SIZE;
 	long r = 0;

 	if (mode & (FALLOC_FL_INSERT_RANGE | FALLOC_FL_PUNCH_HOLE |
 		    FALLOC_FL_COLLAPSE_RANGE))
 		return -EOPNOTSUPP;

 	/* Fix xfstests 228 */
 	r = inode_newsize_ok(inode, len + offset);
 	if (r)
 		return r;

 	inode_lock(inode);

 	down_write(&vi->mmap_rwsem);

 	r = eufs_alloc_blocks_btree(
 		inode, start_page, end_page - start_page,
 		(offset < inode->i_size) ?
 			/* Zero if overwriting */
 			EUFS_ALLOC_BLOCKS_ZERO_ALL :
 			(((offset >> PAGE_SHIFT) <=
 				  (inode->i_size >> PAGE_SHIFT) &&
 			  (offset > inode->i_size)) ?
 				 EUFS_ALLOC_BLOCKS_ZERO_EDGE :
 				 EUFS_ALLOC_BLOCKS_ZERO_NONE));
 	if (IS_ERR_VALUE(r))
 		goto out;

 	eufs_dbg(
 		"fallocate (f=%px, fsize=%llx, offset=%llx, len=%llx, zero=%d, keep=%d)\n",
 		file, inode->i_size, offset, len, zero, keep);
 	if (offset + len >= inode->i_size) {
 		if (!zero) {
 			/* zero inode->i_size ~> offset + len */
 			__eufs_file_write(file->f_mapping, NULL,
 					       offset + len - inode->i_size,
 					       inode->i_size, NULL, true, keep);

 			/* zero part of the last block goes beyond the new EOF */
 			eufs_inode_zero_range(inode, offset + len,
 					       PAGE_ALIGN(offset + len));
 		} else if (offset >= inode->i_size) {
 			eufs_dbg("zero(f=%px): %llx to %llx\n", file,
 				  inode->i_size, offset);
 			/* zero inode->i_size ~> offset */
 			__eufs_file_write(file->f_mapping, NULL,
 					       offset - inode->i_size,
 					       inode->i_size, NULL, true, keep);
 		}

 		if (!keep)
 			inode->i_size = offset + len;
 	}

 	if (zero) {
 		/* zero offset ~> offset + len */
 		__eufs_file_write(file->f_mapping, NULL, len, offset, NULL,
 				       true, keep);

 		/* zero part of the last block goes beyond the new EOF */
 		if (offset + len >= inode->i_size)
 			eufs_inode_zero_range(inode, offset + len,
 					       PAGE_ALIGN(offset + len));
 	}

 	request_persistence(inode);

 out:
 	up_write(&vi->mmap_rwsem);
 	inode_unlock(inode);

 	return r;
 }

 static void eufs_dir_fsync_until_seq(struct inode *dir, u32 dep_seq)
 {
 	struct eufs_inode_info *vinode = EUFS_I(dir);

 	if (eufs_dep_seq_after_eq(vinode->i_persisted_dep_seq, dep_seq))
 		return;

 	inode_lock(dir);
 	eufs_dir_fsync_oneshot(dir);
 	inode_unlock(dir);
 }

 static void eufs_persist_dentries(struct inode *inode)
 {
 	struct eufs_inode_info *vi = EUFS_I(inode);
 	struct list_head *head = &vi->i_owner_list;

 	if (list_empty(head))
 		return;

 	spin_lock(&vi->i_owner_lock);
 	while (!list_empty(head)) {
 		struct dep_node *dep;
 		struct inode *dir;
 		u32 seq;

 		dep = list_first_entry(head, struct dep_node, owner_node);
 		dir = dep->dir;
 		seq = dep->seq;

 		/* let it be deleted by dir persistence ? */
 		list_del_init(&dep->owner_node);
 		spin_unlock(&vi->i_owner_lock);

 		eufs_dir_fsync_until_seq(dir, seq);

 		spin_lock(&vi->i_owner_lock);
 	}
 	spin_unlock(&vi->i_owner_lock);
 }

 static void eufs_persist_parent_dentry_till_root(struct dentry *child)
 {
 	struct dentry *cur = child;

 	while (!IS_ROOT(cur)) {
 		struct dentry *parent;

 		parent = cur->d_parent;
 		eufs_persist_dentries(parent->d_inode);
 		cur = parent;
 	}
 }

 static int eufs_persist_parent_dentries_till_root(struct file *filp)
 {
 	struct inode *inode = filp->f_inode;
 	unsigned int nlink = inode->i_nlink;
 	struct dentry *alias;
 	struct dentry **aliases;
 	unsigned int cnt;
 	unsigned int idx;

 	if (nlink == 0)
 		return 0;

 	if (nlink == 1) {
 		eufs_persist_parent_dentry_till_root(filp->f_path.dentry);
 		return 0;
 	}

 	aliases = kmalloc(nlink * sizeof(*aliases), GFP_KERNEL);
 	if (aliases == NULL)
 		return -ENOMEM;

 	cnt = 0;
 	spin_lock(&inode->i_lock);
 	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
 		dget(alias);
 		aliases[cnt++] = alias;

 		if (cnt >= nlink)
 			break;
 	}
 	spin_unlock(&inode->i_lock);

 	for (idx = 0; idx < cnt; idx++) {
 		eufs_persist_parent_dentry_till_root(aliases[idx]);
 		dput(aliases[idx]);
 	}

 	kfree(aliases);

 	return 0;
 }

 /*
  * inconsistency:
  *
  * non-dir:
  * data -> inode & name -> dentries... -> parent dentries...
  * dentry [parent data -> parent inode]
  *  (1) link can be high: new A/1, link B/2 (A/1), fsync A/1
  *  (2) link can be low: new A/1, link B/2 (A/1), fsync A/1, rm A/1,
  *      fsync A/1
  *
  * dir:
  *  children inodes -> data -> inode & name
  *  (1) link & size can be low: new A/1..3, fsync A
  *  (2) link & size can be high: new A/1..3, fsync A, rm A/1, fsync A
  */
 /* This function is called by both msync() and fsync(). */
 int eufs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	/* Sync from start to end[inclusive] */
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
 	struct eufs_inode_info *vi = EUFS_I(inode);
 	umode_t mode;
 	int err;
 	bool inode_is_dirty;

 	mode = inode->i_mode;
 	/* persist file data written through mmap */
 	if (S_ISREG(mode)) {
 		err = filemap_write_and_wait_range(inode->i_mapping, start,
 						   end);
 		if (err)
 			return err;
 	}

 	/* persist its data and inode first */
 	inode_is_dirty = false;
 	inode_lock(inode);
 	if (vi->i_is_dirty) {
 		inode_is_dirty = true;
 	} else {
 		/*
 		 * modifications of mtime/ctime/atime has not been tracked
 		 * by persister yet, so check it here
 		 */
 		spin_lock(&inode->i_lock);
 		if (inode->i_state & I_DIRTY_SYNC) {
 			inode->i_state &= ~I_DIRTY_SYNC;
 			inode_is_dirty = true;
 		}
 		spin_unlock(&inode->i_lock);
 	}
 	if (inode_is_dirty)
 		fsync_oneshot(inode);
 	inode_unlock(inode);

 	/*
 	 * persist dentries related with the inode. If it is non-dir,
 	 * there may be multiple dentries related with it (namely hard-link).
 	 */
 	eufs_persist_dentries(inode);

 	/*
 	 * persist parent dentries and recurse upward until the root dentry is reached.
 	 * For non-dir, there may be multiple parent dentries due to hard-link.
 	 */
 	if (!S_ISDIR(mode))
 		err = eufs_persist_parent_dentries_till_root(file);
 	else
 		eufs_persist_parent_dentry_till_root(file->f_path.dentry);

 	return err;
 }

 const struct file_operations eufs_file_operations = {
 	.llseek = eufs_file_llseek,
 	.read = eufs_file_read,
 	.write = eufs_file_write,
 	.mmap = eufs_dax_file_mmap,
 	.open = generic_file_open,
 	.fsync = eufs_fsync,
 	.flush = NULL,
 	.fallocate = eufs_fallocate,
 };

 const struct inode_operations eufs_file_inode_operations = {
 	.setattr = eufs_notify_change,
 	.getattr = eufs_file_getattr,
 };
	// SPDX-License-Identifier: GPL-2.0
	/*
	* Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
	* This program is free software; you can redistribute it and/or modify
	* it under the terms of the GNU General Public License version 2 and
	* only version 2 as published by the Free Software Foundation.
	*
	* This program is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU General Public License for more details.
	*/

	#include <linux/fs.h>
	#include <linux/sched.h>
	#include <linux/slab.h>
	#include <linux/uio.h>
	#include <linux/mm.h>
	#include <linux/uaccess.h>
	#include <linux/falloc.h>
	#include <asm/mman.h>
	#include "euler.h"
	#include "dax.h"
	#include "dep.h"

	static long eufs_fallocate(struct file *file, int mode, loff_t offset,
	loff_t len)
	{
	struct inode *inode = file_inode(file);
	struct eufs_inode_info *vi = EUFS_I(inode);
	loff_t end = offset + len;
	unsigned long start_page = offset >> PAGE_SHIFT,
	end_page = DIV_ROUND_UP(end, PAGE_SIZE);
	bool zero = mode & FALLOC_FL_ZERO_RANGE;
	bool keep = mode & FALLOC_FL_KEEP_SIZE;
	long r = 0;

	if (mode & (FALLOC_FL_INSERT_RANGE \| FALLOC_FL_PUNCH_HOLE \|
	FALLOC_FL_COLLAPSE_RANGE))
	return -EOPNOTSUPP;

	/* Fix xfstests 228 */
	r = inode_newsize_ok(inode, len + offset);
	if (r)
	return r;

	inode_lock(inode);

	down_write(&vi->mmap_rwsem);

	r = eufs_alloc_blocks_btree(
	inode, start_page, end_page - start_page,
	(offset < inode->i_size) ?
	/* Zero if overwriting */
	EUFS_ALLOC_BLOCKS_ZERO_ALL :
	(((offset >> PAGE_SHIFT) <=
	(inode->i_size >> PAGE_SHIFT) &&
	(offset > inode->i_size)) ?
	EUFS_ALLOC_BLOCKS_ZERO_EDGE :
	EUFS_ALLOC_BLOCKS_ZERO_NONE));
	if (IS_ERR_VALUE(r))
	goto out;

	eufs_dbg(
	"fallocate (f=%px, fsize=%llx, offset=%llx, len=%llx, zero=%d, keep=%d)\n",
	file, inode->i_size, offset, len, zero, keep);
	if (offset + len >= inode->i_size) {
	if (!zero) {
	/* zero inode->i_size ~> offset + len */
	__eufs_file_write(file->f_mapping, NULL,
	offset + len - inode->i_size,
	inode->i_size, NULL, true, keep);

	/* zero part of the last block goes beyond the new EOF */
	eufs_inode_zero_range(inode, offset + len,
	PAGE_ALIGN(offset + len));
	} else if (offset >= inode->i_size) {
	eufs_dbg("zero(f=%px): %llx to %llx\n", file,
	inode->i_size, offset);
	/* zero inode->i_size ~> offset */
	__eufs_file_write(file->f_mapping, NULL,
	offset - inode->i_size,
	inode->i_size, NULL, true, keep);
	}

	if (!keep)
	inode->i_size = offset + len;
	}

	if (zero) {
	/* zero offset ~> offset + len */
	__eufs_file_write(file->f_mapping, NULL, len, offset, NULL,
	true, keep);

	/* zero part of the last block goes beyond the new EOF */
	if (offset + len >= inode->i_size)
	eufs_inode_zero_range(inode, offset + len,
	PAGE_ALIGN(offset + len));
	}

	request_persistence(inode);

	out:
	up_write(&vi->mmap_rwsem);
	inode_unlock(inode);

	return r;
	}

	static void eufs_dir_fsync_until_seq(struct inode *dir, u32 dep_seq)
	{
	struct eufs_inode_info *vinode = EUFS_I(dir);

	if (eufs_dep_seq_after_eq(vinode->i_persisted_dep_seq, dep_seq))
	return;

	inode_lock(dir);
	eufs_dir_fsync_oneshot(dir);
	inode_unlock(dir);
	}

	static void eufs_persist_dentries(struct inode *inode)
	{
	struct eufs_inode_info *vi = EUFS_I(inode);
	struct list_head *head = &vi->i_owner_list;

	if (list_empty(head))
	return;

	spin_lock(&vi->i_owner_lock);
	while (!list_empty(head)) {
	struct dep_node *dep;
	struct inode *dir;
	u32 seq;

	dep = list_first_entry(head, struct dep_node, owner_node);
	dir = dep->dir;
	seq = dep->seq;

	/* let it be deleted by dir persistence ? */
	list_del_init(&dep->owner_node);
	spin_unlock(&vi->i_owner_lock);

	eufs_dir_fsync_until_seq(dir, seq);

	spin_lock(&vi->i_owner_lock);
	}
	spin_unlock(&vi->i_owner_lock);
	}

	static void eufs_persist_parent_dentry_till_root(struct dentry *child)
	{
	struct dentry *cur = child;

	while (!IS_ROOT(cur)) {
	struct dentry *parent;

	parent = cur->d_parent;
	eufs_persist_dentries(parent->d_inode);
	cur = parent;
	}
	}

	static int eufs_persist_parent_dentries_till_root(struct file *filp)
	{
	struct inode *inode = filp->f_inode;
	unsigned int nlink = inode->i_nlink;
	struct dentry *alias;
	struct dentry **aliases;
	unsigned int cnt;
	unsigned int idx;

	if (nlink == 0)
	return 0;

	if (nlink == 1) {
	eufs_persist_parent_dentry_till_root(filp->f_path.dentry);
	return 0;
	}

	aliases = kmalloc(nlink * sizeof(*aliases), GFP_KERNEL);
	if (aliases == NULL)
	return -ENOMEM;

	cnt = 0;
	spin_lock(&inode->i_lock);
	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
	dget(alias);
	aliases[cnt++] = alias;

	if (cnt >= nlink)
	break;
	}
	spin_unlock(&inode->i_lock);

	for (idx = 0; idx < cnt; idx++) {
	eufs_persist_parent_dentry_till_root(aliases[idx]);
	dput(aliases[idx]);
	}

	kfree(aliases);

	return 0;
	}

	/*
	* inconsistency:
	*
	* non-dir:
	* data -> inode & name -> dentries... -> parent dentries...
	* dentry [parent data -> parent inode]
	* (1) link can be high: new A/1, link B/2 (A/1), fsync A/1
	* (2) link can be low: new A/1, link B/2 (A/1), fsync A/1, rm A/1,
	* fsync A/1
	*
	* dir:
	* children inodes -> data -> inode & name
	* (1) link & size can be low: new A/1..3, fsync A
	* (2) link & size can be high: new A/1..3, fsync A, rm A/1, fsync A
	*/
	/* This function is called by both msync() and fsync(). */
	int eufs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
	{
	/* Sync from start to end[inclusive] */
	struct address_space *mapping = file->f_mapping;
	struct inode *inode = mapping->host;
	struct eufs_inode_info *vi = EUFS_I(inode);
	umode_t mode;
	int err;
	bool inode_is_dirty;

	mode = inode->i_mode;
	/* persist file data written through mmap */
	if (S_ISREG(mode)) {
	err = filemap_write_and_wait_range(inode->i_mapping, start,
	end);
	if (err)
	return err;
	}

	/* persist its data and inode first */
	inode_is_dirty = false;
	inode_lock(inode);
	if (vi->i_is_dirty) {
	inode_is_dirty = true;
	} else {
	/*
	* modifications of mtime/ctime/atime has not been tracked
	* by persister yet, so check it here
	*/
	spin_lock(&inode->i_lock);
	if (inode->i_state & I_DIRTY_SYNC) {
	inode->i_state &= ~I_DIRTY_SYNC;
	inode_is_dirty = true;
	}
	spin_unlock(&inode->i_lock);
	}
	if (inode_is_dirty)
	fsync_oneshot(inode);
	inode_unlock(inode);

	/*
	* persist dentries related with the inode. If it is non-dir,
	* there may be multiple dentries related with it (namely hard-link).
	*/
	eufs_persist_dentries(inode);

	/*
	* persist parent dentries and recurse upward until the root dentry is reached.
	* For non-dir, there may be multiple parent dentries due to hard-link.
	*/
	if (!S_ISDIR(mode))
	err = eufs_persist_parent_dentries_till_root(file);
	else
	eufs_persist_parent_dentry_till_root(file->f_path.dentry);

	return err;
	}

	const struct file_operations eufs_file_operations = {
	.llseek = eufs_file_llseek,
	.read = eufs_file_read,
	.write = eufs_file_write,
	.mmap = eufs_dax_file_mmap,
	.open = generic_file_open,
	.fsync = eufs_fsync,
	.flush = NULL,
	.fallocate = eufs_fallocate,
	};

	const struct inode_operations eufs_file_inode_operations = {
	.setattr = eufs_notify_change,
	.getattr = eufs_file_getattr,
	};