Merge branch 'akpm' (patches from Andrew) Merge misc fixes from Andrew Morton: "15 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: mm, docs: update memory.stat description with workingset* entries mm: vmscan: scan until it finds eligible pages mm, thp: copying user pages must schedule on collapse dax: fix PMD data corruption when fault races with write dax: fix data corruption when fault races with write ext4: return to starting transaction in ext4_dax_huge_fault() mm: fix data corruption due to stale mmap reads dax: prevent invalidation of mapped DAX entries Tigran has moved mm, vmalloc: fix vmalloc users tracking properly mm/khugepaged: add missed tracepoint for collapse_huge_page_swapin gcov: support GCC 7.1 mm, vmstat: Remove spurious WARN() during zoneinfo print time: delete current_fs_time() hwpoison, memcg: forcibly uncharge LRU pages

commit: 1251704a631b62591ad1d1b6ead252e9e597d5f5 [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Sat May 13 09:49:35 2017 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> Sat May 13 09:49:35 2017 -0700
tree: bc394a069d3b8aef8c6dc147438c05cc9ba057aa
parent: 0fcc3ab23d7395f58e8ab0834e7913e2e4314a83 [diff]
parent: b340959ea281dbac15344277094d0a294dbe8aca [diff]
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index d5a22ba..0ff8fe7 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h

@@ -98,7 +98,7 @@
 
 		if (bytes < 8) {
 			if (!IS_ALIGNED(dest, 4) || (bytes != 4))
-				arch_wb_cache_pmem(addr, 1);
+				arch_wb_cache_pmem(addr, bytes);
 		} else {
 			if (!IS_ALIGNED(dest, 8)) {
 				dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);

diff --git a/block/Kconfig b/block/Kconfig
index a8ad7e7..89cd28f 100644
--- a/block/Kconfig
+++ b/block/Kconfig

@@ -6,7 +6,6 @@
        default y
        select SBITMAP
        select SRCU
-       select DAX
        help
 	 Provide block layer support for the kernel.
 

diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index b7053ea..b79aa8f 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig

@@ -19,7 +19,7 @@
 
 config DEV_DAX_PMEM
 	tristate "PMEM DAX: direct access to persistent memory"
-	depends on LIBNVDIMM && NVDIMM_DAX
+	depends on LIBNVDIMM && NVDIMM_DAX && DEV_DAX
 	default DEV_DAX
 	help
 	  Support raw access to persistent memory.  Note that this
@@ -28,9 +28,4 @@
 
 	  Say Y if unsure
 
-config NR_DEV_DAX
-	int "Maximum number of Device-DAX instances"
-	default 32768
-	range 256 2147483647
-
 endif

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 465dcd7..ebf43f5 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c

@@ -14,16 +14,13 @@
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/magic.h>
+#include <linux/genhd.h>
 #include <linux/cdev.h>
 #include <linux/hash.h>
 #include <linux/slab.h>
 #include <linux/dax.h>
 #include <linux/fs.h>
 
-static int nr_dax = CONFIG_NR_DEV_DAX;
-module_param(nr_dax, int, S_IRUGO);
-MODULE_PARM_DESC(nr_dax, "max number of dax device instances");
-
 static dev_t dax_devt;
 DEFINE_STATIC_SRCU(dax_srcu);
 static struct vfsmount *dax_mnt;
@@ -47,6 +44,75 @@
 }
 EXPORT_SYMBOL_GPL(dax_read_unlock);
 
+int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
+		pgoff_t *pgoff)
+{
+	phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512;
+
+	if (pgoff)
+		*pgoff = PHYS_PFN(phys_off);
+	if (phys_off % PAGE_SIZE || size % PAGE_SIZE)
+		return -EINVAL;
+	return 0;
+}
+EXPORT_SYMBOL(bdev_dax_pgoff);
+
+/**
+ * __bdev_dax_supported() - Check if the device supports dax for filesystem
+ * @sb: The superblock of the device
+ * @blocksize: The block size of the device
+ *
+ * This is a library function for filesystems to check if the block device
+ * can be mounted with dax option.
+ *
+ * Return: negative errno if unsupported, 0 if supported.
+ */
+int __bdev_dax_supported(struct super_block *sb, int blocksize)
+{
+	struct block_device *bdev = sb->s_bdev;
+	struct dax_device *dax_dev;
+	pgoff_t pgoff;
+	int err, id;
+	void *kaddr;
+	pfn_t pfn;
+	long len;
+
+	if (blocksize != PAGE_SIZE) {
+		pr_err("VFS (%s): error: unsupported blocksize for dax\n",
+				sb->s_id);
+		return -EINVAL;
+	}
+
+	err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
+	if (err) {
+		pr_err("VFS (%s): error: unaligned partition for dax\n",
+				sb->s_id);
+		return err;
+	}
+
+	dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+	if (!dax_dev) {
+		pr_err("VFS (%s): error: device does not support dax\n",
+				sb->s_id);
+		return -EOPNOTSUPP;
+	}
+
+	id = dax_read_lock();
+	len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
+	dax_read_unlock(id);
+
+	put_dax(dax_dev);
+
+	if (len < 1) {
+		pr_err("VFS (%s): error: dax access failed (%ld)",
+				sb->s_id, len);
+		return len < 0 ? len : -EIO;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__bdev_dax_supported);
+
 /**
  * struct dax_device - anchor object for dax services
  * @inode: core vfs
@@ -261,7 +327,7 @@
 	if (__host && !host)
 		return NULL;
 
-	minor = ida_simple_get(&dax_minor_ida, 0, nr_dax, GFP_KERNEL);
+	minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL);
 	if (minor < 0)
 		goto err_minor;
 
@@ -405,8 +471,7 @@
 	if (rc)
 		return rc;
 
-	nr_dax = max(nr_dax, 256);
-	rc = alloc_chrdev_region(&dax_devt, 0, nr_dax, "dax");
+	rc = alloc_chrdev_region(&dax_devt, 0, MINORMASK+1, "dax");
 	if (rc)
 		__dax_fs_exit();
 	return rc;
@@ -414,7 +479,7 @@
 
 static void __exit dax_fs_exit(void)
 {
-	unregister_chrdev_region(dax_devt, nr_dax);
+	unregister_chrdev_region(dax_devt, MINORMASK+1);
 	ida_destroy(&dax_minor_ida);
 	__dax_fs_exit();
 }

diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 9faaa96..822198a 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c

@@ -218,7 +218,8 @@
 }
 
 static int nsblk_rw_bytes(struct nd_namespace_common *ndns,
-		resource_size_t offset, void *iobuf, size_t n, int rw)
+		resource_size_t offset, void *iobuf, size_t n, int rw,
+		unsigned long flags)
 {
 	struct nd_namespace_blk *nsblk = to_nd_namespace_blk(&ndns->dev);
 	struct nd_blk_region *ndbr = to_ndbr(nsblk);

diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 368795a..983718b 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c

@@ -32,45 +32,53 @@
 };
 
 static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
-		void *buf, size_t n)
+		void *buf, size_t n, unsigned long flags)
 {
 	struct nd_btt *nd_btt = arena->nd_btt;
 	struct nd_namespace_common *ndns = nd_btt->ndns;
 
 	/* arena offsets are 4K from the base of the device */
 	offset += SZ_4K;
-	return nvdimm_read_bytes(ndns, offset, buf, n);
+	return nvdimm_read_bytes(ndns, offset, buf, n, flags);
 }
 
 static int arena_write_bytes(struct arena_info *arena, resource_size_t offset,
-		void *buf, size_t n)
+		void *buf, size_t n, unsigned long flags)
 {
 	struct nd_btt *nd_btt = arena->nd_btt;
 	struct nd_namespace_common *ndns = nd_btt->ndns;
 
 	/* arena offsets are 4K from the base of the device */
 	offset += SZ_4K;
-	return nvdimm_write_bytes(ndns, offset, buf, n);
+	return nvdimm_write_bytes(ndns, offset, buf, n, flags);
 }
 
 static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
 {
 	int ret;
 
+	/*
+	 * infooff and info2off should always be at least 512B aligned.
+	 * We rely on that to make sure rw_bytes does error clearing
+	 * correctly, so make sure that is the case.
+	 */
+	WARN_ON_ONCE(!IS_ALIGNED(arena->infooff, 512));
+	WARN_ON_ONCE(!IS_ALIGNED(arena->info2off, 512));
+
 	ret = arena_write_bytes(arena, arena->info2off, super,
-			sizeof(struct btt_sb));
+			sizeof(struct btt_sb), 0);
 	if (ret)
 		return ret;
 
 	return arena_write_bytes(arena, arena->infooff, super,
-			sizeof(struct btt_sb));
+			sizeof(struct btt_sb), 0);
 }
 
 static int btt_info_read(struct arena_info *arena, struct btt_sb *super)
 {
 	WARN_ON(!super);
 	return arena_read_bytes(arena, arena->infooff, super,
-			sizeof(struct btt_sb));
+			sizeof(struct btt_sb), 0);
 }
 
 /*
@@ -79,16 +87,17 @@
  *   mapping is in little-endian
  *   mapping contains 'E' and 'Z' flags as desired
  */
-static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping)
+static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping,
+		unsigned long flags)
 {
 	u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
 
 	WARN_ON(lba >= arena->external_nlba);
-	return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE);
+	return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE, flags);
 }
 
 static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping,
-			u32 z_flag, u32 e_flag)
+			u32 z_flag, u32 e_flag, unsigned long rwb_flags)
 {
 	u32 ze;
 	__le32 mapping_le;
@@ -127,11 +136,11 @@
 	}
 
 	mapping_le = cpu_to_le32(mapping);
-	return __btt_map_write(arena, lba, mapping_le);
+	return __btt_map_write(arena, lba, mapping_le, rwb_flags);
 }
 
 static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
-			int *trim, int *error)
+			int *trim, int *error, unsigned long rwb_flags)
 {
 	int ret;
 	__le32 in;
@@ -140,7 +149,7 @@
 
 	WARN_ON(lba >= arena->external_nlba);
 
-	ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE);
+	ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE, rwb_flags);
 	if (ret)
 		return ret;
 
@@ -189,7 +198,7 @@
 	WARN_ON(!ent);
 	return arena_read_bytes(arena,
 			arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
-			2 * LOG_ENT_SIZE);
+			2 * LOG_ENT_SIZE, 0);
 }
 
 static struct dentry *debugfs_root;
@@ -335,7 +344,7 @@
  * btt_flog_write is the wrapper for updating the freelist elements
  */
 static int __btt_log_write(struct arena_info *arena, u32 lane,
-			u32 sub, struct log_entry *ent)
+			u32 sub, struct log_entry *ent, unsigned long flags)
 {
 	int ret;
 	/*
@@ -350,13 +359,13 @@
 	void *src = ent;
 
 	/* split the 16B write into atomic, durable halves */
-	ret = arena_write_bytes(arena, ns_off, src, log_half);
+	ret = arena_write_bytes(arena, ns_off, src, log_half, flags);
 	if (ret)
 		return ret;
 
 	ns_off += log_half;
 	src += log_half;
-	return arena_write_bytes(arena, ns_off, src, log_half);
+	return arena_write_bytes(arena, ns_off, src, log_half, flags);
 }
 
 static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
@@ -364,7 +373,7 @@
 {
 	int ret;
 
-	ret = __btt_log_write(arena, lane, sub, ent);
+	ret = __btt_log_write(arena, lane, sub, ent, NVDIMM_IO_ATOMIC);
 	if (ret)
 		return ret;
 
@@ -393,11 +402,19 @@
 	if (!zerobuf)
 		return -ENOMEM;
 
+	/*
+	 * mapoff should always be at least 512B  aligned. We rely on that to
+	 * make sure rw_bytes does error clearing correctly, so make sure that
+	 * is the case.
+	 */
+	WARN_ON_ONCE(!IS_ALIGNED(arena->mapoff, 512));
+
 	while (mapsize) {
 		size_t size = min(mapsize, chunk_size);
 
+		WARN_ON_ONCE(size < 512);
 		ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
-				size);
+				size, 0);
 		if (ret)
 			goto free;
 
@@ -417,26 +434,50 @@
  */
 static int btt_log_init(struct arena_info *arena)
 {
+	size_t logsize = arena->info2off - arena->logoff;
+	size_t chunk_size = SZ_4K, offset = 0;
+	struct log_entry log;
+	void *zerobuf;
 	int ret;
 	u32 i;
-	struct log_entry log, zerolog;
 
-	memset(&zerolog, 0, sizeof(zerolog));
+	zerobuf = kzalloc(chunk_size, GFP_KERNEL);
+	if (!zerobuf)
+		return -ENOMEM;
+	/*
+	 * logoff should always be at least 512B  aligned. We rely on that to
+	 * make sure rw_bytes does error clearing correctly, so make sure that
+	 * is the case.
+	 */
+	WARN_ON_ONCE(!IS_ALIGNED(arena->logoff, 512));
+
+	while (logsize) {
+		size_t size = min(logsize, chunk_size);
+
+		WARN_ON_ONCE(size < 512);
+		ret = arena_write_bytes(arena, arena->logoff + offset, zerobuf,
+				size, 0);
+		if (ret)
+			goto free;
+
+		offset += size;
+		logsize -= size;
+		cond_resched();
+	}
 
 	for (i = 0; i < arena->nfree; i++) {
 		log.lba = cpu_to_le32(i);
 		log.old_map = cpu_to_le32(arena->external_nlba + i);
 		log.new_map = cpu_to_le32(arena->external_nlba + i);
 		log.seq = cpu_to_le32(LOG_SEQ_INIT);
-		ret = __btt_log_write(arena, i, 0, &log);
+		ret = __btt_log_write(arena, i, 0, &log, 0);
 		if (ret)
-			return ret;
-		ret = __btt_log_write(arena, i, 1, &zerolog);
-		if (ret)
-			return ret;
+			goto free;
 	}
 
-	return 0;
+ free:
+	kfree(zerobuf);
+	return ret;
 }
 
 static int btt_freelist_init(struct arena_info *arena)
@@ -470,7 +511,7 @@
 
 		/* Check if map recovery is needed */
 		ret = btt_map_read(arena, le32_to_cpu(log_new.lba), &map_entry,
-				NULL, NULL);
+				NULL, NULL, 0);
 		if (ret)
 			return ret;
 		if ((le32_to_cpu(log_new.new_map) != map_entry) &&
@@ -480,7 +521,7 @@
 			 * to complete the map write. So fix up the map.
 			 */
 			ret = btt_map_write(arena, le32_to_cpu(log_new.lba),
-					le32_to_cpu(log_new.new_map), 0, 0);
+					le32_to_cpu(log_new.new_map), 0, 0, 0);
 			if (ret)
 				return ret;
 		}
@@ -875,7 +916,7 @@
 	u64 nsoff = to_namespace_offset(arena, lba);
 	void *mem = kmap_atomic(page);
 
-	ret = arena_read_bytes(arena, nsoff, mem + off, len);
+	ret = arena_read_bytes(arena, nsoff, mem + off, len, NVDIMM_IO_ATOMIC);
 	kunmap_atomic(mem);
 
 	return ret;
@@ -888,7 +929,7 @@
 	u64 nsoff = to_namespace_offset(arena, lba);
 	void *mem = kmap_atomic(page);
 
-	ret = arena_write_bytes(arena, nsoff, mem + off, len);
+	ret = arena_write_bytes(arena, nsoff, mem + off, len, NVDIMM_IO_ATOMIC);
 	kunmap_atomic(mem);
 
 	return ret;
@@ -931,10 +972,12 @@
 		mem = kmap_atomic(bv.bv_page);
 		if (rw)
 			ret = arena_write_bytes(arena, meta_nsoff,
-					mem + bv.bv_offset, cur_len);
+					mem + bv.bv_offset, cur_len,
+					NVDIMM_IO_ATOMIC);
 		else
 			ret = arena_read_bytes(arena, meta_nsoff,
-					mem + bv.bv_offset, cur_len);
+					mem + bv.bv_offset, cur_len,
+					NVDIMM_IO_ATOMIC);
 
 		kunmap_atomic(mem);
 		if (ret)
@@ -976,7 +1019,8 @@
 
 		cur_len = min(btt->sector_size, len);
 
-		ret = btt_map_read(arena, premap, &postmap, &t_flag, &e_flag);
+		ret = btt_map_read(arena, premap, &postmap, &t_flag, &e_flag,
+				NVDIMM_IO_ATOMIC);
 		if (ret)
 			goto out_lane;
 
@@ -1006,7 +1050,7 @@
 			barrier();
 
 			ret = btt_map_read(arena, premap, &new_map, &t_flag,
-						&e_flag);
+						&e_flag, NVDIMM_IO_ATOMIC);
 			if (ret)
 				goto out_rtt;
 
@@ -1093,7 +1137,8 @@
 		}
 
 		lock_map(arena, premap);
-		ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL);
+		ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL,
+				NVDIMM_IO_ATOMIC);
 		if (ret)
 			goto out_map;
 		if (old_postmap >= arena->internal_nlba) {
@@ -1110,7 +1155,7 @@
 		if (ret)
 			goto out_map;
 
-		ret = btt_map_write(arena, premap, new_postmap, 0, 0);
+		ret = btt_map_write(arena, premap, new_postmap, 0, 0, 0);
 		if (ret)
 			goto out_map;
 

diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 4b76af2..ae00dc0 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c

@@ -273,7 +273,7 @@
 	if (!btt_sb || !ndns || !nd_btt)
 		return -ENODEV;
 
-	if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb)))
+	if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb), 0))
 		return -ENXIO;
 
 	if (nvdimm_namespace_capacity(ndns) < SZ_16M)

diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 93d128d..7ceb5fa 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c

@@ -228,7 +228,8 @@
 EXPORT_SYMBOL(nd_sb_checksum);
 
 static int nsio_rw_bytes(struct nd_namespace_common *ndns,
-		resource_size_t offset, void *buf, size_t size, int rw)
+		resource_size_t offset, void *buf, size_t size, int rw,
+		unsigned long flags)
 {
 	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
 	unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
@@ -259,7 +260,8 @@
 		 * work around this collision.
 		 */
 		if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)
-				&& (!ndns->claim || !is_nd_btt(ndns->claim))) {
+				&& !(flags & NVDIMM_IO_ATOMIC)
+				&& !ndns->claim) {
 			long cleared;
 
 			cleared = nvdimm_clear_poison(&ndns->dev,

diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 77d0321..03852d7 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h

@@ -31,6 +31,7 @@
 	ND_MAX_LANES = 256,
 	SECTOR_SHIFT = 9,
 	INT_LBASIZE_ALIGNMENT = 64,
+	NVDIMM_IO_ATOMIC = 1,
 };
 
 struct nd_poison {

diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 335c817..a6c4036 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c

@@ -357,7 +357,7 @@
 	if (!is_nd_pmem(nd_pfn->dev.parent))
 		return -ENODEV;
 
-	if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb)))
+	if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0))
 		return -ENXIO;
 
 	if (memcmp(pfn_sb->signature, sig, PFN_SIG_LEN) != 0)
@@ -662,7 +662,7 @@
 	checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
 	pfn_sb->checksum = cpu_to_le64(checksum);
 
-	return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb));
+	return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0);
 }
 
 /*

diff --git a/fs/Kconfig b/fs/Kconfig
index 83eab52..b0e42b6 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig

@@ -39,6 +39,7 @@
 	depends on MMU
 	depends on !(ARM || MIPS || SPARC)
 	select FS_IOMAP
+	select DAX
 	help
 	  Direct Access (DAX) can be used on memory-backed block devices.
 	  If the block device supports DAX and the filesystem supports DAX,

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 2a305c1..519599d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c

@@ -717,72 +717,6 @@
 }
 EXPORT_SYMBOL_GPL(bdev_write_page);
 
-int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
-		pgoff_t *pgoff)
-{
-	phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512;
-
-	if (pgoff)
-		*pgoff = PHYS_PFN(phys_off);
-	if (phys_off % PAGE_SIZE || size % PAGE_SIZE)
-		return -EINVAL;
-	return 0;
-}
-EXPORT_SYMBOL(bdev_dax_pgoff);
-
-/**
- * bdev_dax_supported() - Check if the device supports dax for filesystem
- * @sb: The superblock of the device
- * @blocksize: The block size of the device
- *
- * This is a library function for filesystems to check if the block device
- * can be mounted with dax option.
- *
- * Return: negative errno if unsupported, 0 if supported.
- */
-int bdev_dax_supported(struct super_block *sb, int blocksize)
-{
-	struct block_device *bdev = sb->s_bdev;
-	struct dax_device *dax_dev;
-	pgoff_t pgoff;
-	int err, id;
-	void *kaddr;
-	pfn_t pfn;
-	long len;
-
-	if (blocksize != PAGE_SIZE) {
-		vfs_msg(sb, KERN_ERR, "error: unsupported blocksize for dax");
-		return -EINVAL;
-	}
-
-	err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
-	if (err) {
-		vfs_msg(sb, KERN_ERR, "error: unaligned partition for dax");
-		return err;
-	}
-
-	dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
-	if (!dax_dev) {
-		vfs_msg(sb, KERN_ERR, "error: device does not support dax");
-		return -EOPNOTSUPP;
-	}
-
-	id = dax_read_lock();
-	len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
-	dax_read_unlock(id);
-
-	put_dax(dax_dev);
-
-	if (len < 1) {
-		vfs_msg(sb, KERN_ERR,
-				"error: dax access failed (%ld)", len);
-		return len < 0 ? len : -EIO;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(bdev_dax_supported);
-
 /*
  * pseudo-fs
  */

diff --git a/fs/dax.c b/fs/dax.c
index 93ae872..c22eaf1 100644
--- a/fs/dax.c
+++ b/fs/dax.c

@@ -964,12 +964,12 @@
 		void *kaddr;
 		pfn_t pfn;
 
-		rc = bdev_dax_pgoff(bdev, sector, size, &pgoff);
+		rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
 		if (rc)
 			return rc;
 
 		id = dax_read_lock();
-		rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr,
+		rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr,
 				&pfn);
 		if (rc < 0) {
 			dax_read_unlock(id);

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 8ac673c..9c2028b 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c

@@ -32,6 +32,7 @@
 #include <linux/log2.h>
 #include <linux/quotaops.h>
 #include <linux/uaccess.h>
+#include <linux/dax.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c90edf09..0b177da 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c

@@ -37,6 +37,7 @@
 #include <linux/ctype.h>
 #include <linux/log2.h>
 #include <linux/crc16.h>
+#include <linux/dax.h>
 #include <linux/cleancache.h>
 #include <linux/uaccess.h>
 

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 47d239d..455a575 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c

@@ -52,6 +52,7 @@
 #include "xfs_reflink.h"
 
 #include <linux/namei.h>
+#include <linux/dax.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/mount.h>

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b5d1e27..ab92c4e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h

@@ -1947,8 +1947,6 @@
 extern int bdev_read_page(struct block_device *, sector_t, struct page *);
 extern int bdev_write_page(struct block_device *, sector_t, struct page *,
 						struct writeback_control *);
-extern int bdev_dax_supported(struct super_block *, int);
-int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
 #else /* CONFIG_BLOCK */
 
 struct block_device;

diff --git a/include/linux/dax.h b/include/linux/dax.h
index d1236d1..00ebac8 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h

@@ -18,12 +18,38 @@
 			void **, pfn_t *);
 };
 
+int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
+#if IS_ENABLED(CONFIG_FS_DAX)
+int __bdev_dax_supported(struct super_block *sb, int blocksize);
+static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
+{
+	return __bdev_dax_supported(sb, blocksize);
+}
+#else
+static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_DAX)
+struct dax_device *dax_get_by_host(const char *host);
+void put_dax(struct dax_device *dax_dev);
+#else
+static inline struct dax_device *dax_get_by_host(const char *host)
+{
+	return NULL;
+}
+
+static inline void put_dax(struct dax_device *dax_dev)
+{
+}
+#endif
+
 int dax_read_lock(void);
 void dax_read_unlock(int id);
-struct dax_device *dax_get_by_host(const char *host);
 struct dax_device *alloc_dax(void *private, const char *host,
 		const struct dax_operations *ops);
-void put_dax(struct dax_device *dax_dev);
 bool dax_alive(struct dax_device *dax_dev);
 void kill_dax(struct dax_device *dax_dev);
 void *dax_get_private(struct dax_device *dax_dev);

diff --git a/include/linux/nd.h b/include/linux/nd.h
index fa66aee..194b8e0 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h

@@ -48,7 +48,7 @@
 	struct device dev;
 	struct device *claim;
 	int (*rw_bytes)(struct nd_namespace_common *, resource_size_t offset,
-			void *buf, size_t size, int rw);
+			void *buf, size_t size, int rw, unsigned long flags);
 };
 
 static inline struct nd_namespace_common *to_ndns(struct device *dev)
@@ -134,9 +134,10 @@
  * @buf is up-to-date upon return from this routine.
  */
 static inline int nvdimm_read_bytes(struct nd_namespace_common *ndns,
-		resource_size_t offset, void *buf, size_t size)
+		resource_size_t offset, void *buf, size_t size,
+		unsigned long flags)
 {
-	return ndns->rw_bytes(ndns, offset, buf, size, READ);
+	return ndns->rw_bytes(ndns, offset, buf, size, READ, flags);
 }
 
 /**
@@ -152,9 +153,10 @@
  * to media is handled internal to the @ndns driver, if at all.
  */
 static inline int nvdimm_write_bytes(struct nd_namespace_common *ndns,
-		resource_size_t offset, void *buf, size_t size)
+		resource_size_t offset, void *buf, size_t size,
+		unsigned long flags)
 {
-	return ndns->rw_bytes(ndns, offset, buf, size, WRITE);
+	return ndns->rw_bytes(ndns, offset, buf, size, WRITE, flags);
 }
 
 #define MODULE_ALIAS_ND_DEVICE(type) \
commit	1251704a631b62591ad1d1b6ead252e9e597d5f5	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Sat May 13 09:49:35 2017 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	Sat May 13 09:49:35 2017 -0700
tree	bc394a069d3b8aef8c6dc147438c05cc9ba057aa
parent	0fcc3ab23d7395f58e8ab0834e7913e2e4314a83 [diff]
parent	b340959ea281dbac15344277094d0a294dbe8aca [diff]