From e94f5a2285fc94202a9efb2c687481f29b64132c Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Fri, 14 Aug 2015 16:15:31 -0400 Subject: [PATCH 1/6] dax: fix O_DIRECT I/O to the last block of a blockdev commit bbab37ddc20b (block: Add support for DAX reads/writes to block devices) caused a regression in mkfs.xfs. That utility sets the block size of the device to the logical block size using the BLKBSZSET ioctl, and then issues a single sector read from the last sector of the device. This results in the dax_io code trying to do a page-sized read from 512 bytes from the end of the device. The result is -ERANGE being returned to userspace. The fix is to align the block to the page size before calling get_block. Thanks to willy for simplifying my original patch. Cc: Signed-off-by: Jeff Moyer Tested-by: Linda Knippers Signed-off-by: Dan Williams --- fs/dax.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/dax.c b/fs/dax.c index 93bf2f990ace..7ae6df7ea1d2 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -119,7 +119,8 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, size_t len; if (pos == max) { unsigned blkbits = inode->i_blkbits; - sector_t block = pos >> blkbits; + long page = pos >> PAGE_SHIFT; + sector_t block = page << (PAGE_SHIFT - blkbits); unsigned first = pos - (block << blkbits); long size; From f0b2e563bc419df7c1b3d2f494574c25125f6aed Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Fri, 14 Aug 2015 16:15:32 -0400 Subject: [PATCH 2/6] blockdev: don't set S_DAX for misaligned partitions The dax code doesn't currently support misaligned partitions, so disable O_DIRECT via dax until such time as that support materializes. Cc: Suggested-by: Boaz Harrosh Signed-off-by: Jeff Moyer Signed-off-by: Dan Williams --- fs/block_dev.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/block_dev.c b/fs/block_dev.c index 22ea424ee741..073bb57adab1 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1242,6 +1242,13 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) goto out_clear; } bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); + /* + * If the partition is not aligned on a page + * boundary, we can't do dax I/O to it. + */ + if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) || + (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) + bdev->bd_inode->i_flags &= ~S_DAX; } } else { if (bdev->bd_contains == bdev) { From 4be9c1fc3df9c3b03c9bde8aec5e44fc73996a3f Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 16 Sep 2015 21:24:47 +0800 Subject: [PATCH 3/6] libnvdimm: btt_devs: Fix locking in namespace_store Always take device_lock() before nvdimm_bus_lock() to prevent deadlock. Cc: Signed-off-by: Axel Lin Signed-off-by: Dan Williams --- drivers/nvdimm/btt_devs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 59ad54a63d9f..cb477518dd0e 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -128,13 +128,13 @@ static ssize_t namespace_store(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - nvdimm_bus_lock(dev); device_lock(dev); + nvdimm_bus_lock(dev); rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len); dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, rc, buf, buf[len - 1] == '\n' ? "" : "\n"); - device_unlock(dev); nvdimm_bus_unlock(dev); + device_unlock(dev); return rc; } From 4ca8b57a0af145f4e791f21dbca6ad789da9ee8b Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 16 Sep 2015 21:25:38 +0800 Subject: [PATCH 4/6] libnvdimm: pfn_devs: Fix locking in namespace_store Always take device_lock() before nvdimm_bus_lock() to prevent deadlock. Signed-off-by: Axel Lin Signed-off-by: Dan Williams --- drivers/nvdimm/pfn_devs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 3fd7d0d81a47..71805a1aa0f3 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -148,13 +148,13 @@ static ssize_t namespace_store(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn(dev); ssize_t rc; - nvdimm_bus_lock(dev); device_lock(dev); + nvdimm_bus_lock(dev); rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, rc, buf, buf[len - 1] == '\n' ? "" : "\n"); - device_unlock(dev); nvdimm_bus_unlock(dev); + device_unlock(dev); return rc; } From ba8fe0f85e15d047686caf8a42463b592c63c98c Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 16 Sep 2015 14:52:21 -0600 Subject: [PATCH 5/6] pmem: add proper fencing to pmem_rw_page() pmem_rw_page() needs to call wmb_pmem() on writes to make sure that the newly written data is durable. This flow was added to pmem_rw_bytes() and pmem_make_request() with this commit: commit 61031952f4c8 ("arch, x86: pmem api for ensuring durability of persistent memory updates") ...the pmem_rw_page() path was missed. Cc: Signed-off-by: Ross Zwisler Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index b9525385c0dc..0ba6a978f227 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -92,6 +92,8 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, struct pmem_device *pmem = bdev->bd_disk->private_data; pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector); + if (rw & WRITE) + wmb_pmem(); page_endio(page, rw & WRITE, 0); return 0; From ae4f976968896f8f41b3a7aa21be6146492211e5 Mon Sep 17 00:00:00 2001 From: Tyler Baker Date: Sat, 19 Sep 2015 03:58:10 -0400 Subject: [PATCH 6/6] mm: fix type cast in __pfn_to_phys() The various definitions of __pfn_to_phys() have been consolidated to use a generic macro in include/asm-generic/memory_model.h. This hit mainline in the form of 012dcef3f058 "mm: move __phys_to_pfn and __pfn_to_phys to asm/generic/memory_model.h". When the generic macro was implemented the type cast to phys_addr_t was dropped which caused boot regressions on ARM platforms with more than 4GB of memory and LPAE enabled. It was suggested to use PFN_PHYS() defined in include/linux/pfn.h as provides the correct logic and avoids further duplication. Reported-by: kernelci.org bot Suggested-by: Dan Williams Signed-off-by: Tyler Baker Signed-off-by: Dan Williams --- include/asm-generic/memory_model.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index f20f407ce45d..4b4b056a6eb0 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -73,7 +73,7 @@ * Convert a physical address to a Page Frame Number and back */ #define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT)) -#define __pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT) +#define __pfn_to_phys(pfn) PFN_PHYS(pfn) #define page_to_pfn __page_to_pfn #define pfn_to_page __pfn_to_page