[PATCH v2 00/25] replace ioremap_{cache|wt} with memremap
by Dan Williams
Changes since v1 [1]:
1/ Drop the attempt at unifying ioremap() prototypes, just focus on
converting ioremap_cache and ioremap_wt over to memremap (Christoph)
2/ Drop the unrelated cleanups to use %pa in __ioremap_caller (Thomas)
3/ Add support for memremap() attempts on "System RAM" to simply return
the kernel virtual address for that range. ARM depends on this
functionality in ioremap_cache() and ACPI was open coding a similar
solution. (Mark)
4/ Split the conversions of ioremap_{cache|wt} into separate patches per
driver / arch.
5/ Fix bisection breakage and other reports from 0day-kbuild
---
While developing the pmem driver we noticed that the __iomem annotation
on the return value from ioremap_cache() was being mishandled by several
callers. We also observed that all of the call sites expected to be
able to treat the return value from ioremap_cache() as normal
(non-__iomem) pointer to memory.
This patchset takes the opportunity to clean up the above confusion as
well as a few issues with the ioremap_{cache|wt} interface, including:
1/ Eliminating the possibility of function prototypes differing between
architectures by defining a central memremap() prototype that takes
flags to determine the mapping type.
2/ Returning NULL rather than falling back silently to a different
mapping-type. This allows drivers to be stricter about the
mapping-type fallbacks that are permissible.
[1]: http://marc.info/?l=linux-arm-kernel&m=143735199029255&w=2
---
Dan Williams (22):
mm: enhance region_is_ram() to distinguish 'unknown' vs 'mixed'
arch, drivers: don't include <asm/io.h> directly, use <linux/io.h> instead
cleanup IORESOURCE_CACHEABLE vs ioremap()
intel_iommu: fix leaked ioremap mapping
arch: introduce memremap()
arm: switch from ioremap_cache to memremap
x86: switch from ioremap_cache to memremap
gma500: switch from acpi_os_ioremap to ioremap
i915: switch from acpi_os_ioremap to ioremap
acpi: switch from ioremap_cache to memremap
toshiba laptop: replace ioremap_cache with ioremap
memconsole: fix __iomem mishandling, switch to memremap
visorbus: switch from ioremap_cache to memremap
intel-iommu: switch from ioremap_cache to memremap
libnvdimm, pmem: switch from ioremap_cache to memremap
pxa2xx-flash: switch from ioremap_cache to memremap
sfi: switch from ioremap_cache to memremap
fbdev: switch from ioremap_wt to memremap
pmem: switch from ioremap_wt to memremap
arch: remove ioremap_cache, replace with arch_memremap
arch: remove ioremap_wt, replace with arch_memremap
pmem: convert to generic memremap
Toshi Kani (3):
mm, x86: Fix warning in ioremap RAM check
mm, x86: Remove region_is_ram() call from ioremap
mm: Fix bugs in region_is_ram()
arch/arc/include/asm/io.h | 1
arch/arm/Kconfig | 1
arch/arm/include/asm/io.h | 13 +++-
arch/arm/include/asm/xen/page.h | 4 +
arch/arm/mach-clps711x/board-cdb89712.c | 2 -
arch/arm/mach-shmobile/pm-rcar.c | 2 -
arch/arm/mm/ioremap.c | 12 +++-
arch/arm/mm/nommu.c | 11 ++-
arch/arm64/Kconfig | 1
arch/arm64/include/asm/acpi.h | 10 +--
arch/arm64/include/asm/dmi.h | 8 +--
arch/arm64/include/asm/io.h | 8 ++-
arch/arm64/kernel/efi.c | 9 ++-
arch/arm64/kernel/smp_spin_table.c | 19 +++---
arch/arm64/mm/ioremap.c | 20 ++----
arch/avr32/include/asm/io.h | 1
arch/frv/Kconfig | 1
arch/frv/include/asm/io.h | 17 ++---
arch/frv/mm/kmap.c | 6 ++
arch/ia64/Kconfig | 1
arch/ia64/include/asm/io.h | 11 +++
arch/ia64/kernel/cyclone.c | 2 -
arch/m32r/include/asm/io.h | 1
arch/m68k/Kconfig | 1
arch/m68k/include/asm/io_mm.h | 14 +---
arch/m68k/include/asm/io_no.h | 12 ++--
arch/m68k/include/asm/raw_io.h | 4 +
arch/m68k/mm/kmap.c | 17 +++++
arch/m68k/mm/sun3kmap.c | 6 ++
arch/metag/include/asm/io.h | 3 -
arch/microblaze/include/asm/io.h | 1
arch/mn10300/include/asm/io.h | 1
arch/nios2/include/asm/io.h | 1
arch/powerpc/kernel/pci_of_scan.c | 2 -
arch/s390/include/asm/io.h | 1
arch/sh/Kconfig | 1
arch/sh/include/asm/io.h | 20 ++++--
arch/sh/mm/ioremap.c | 10 +++
arch/sparc/include/asm/io_32.h | 1
arch/sparc/include/asm/io_64.h | 1
arch/sparc/kernel/pci.c | 3 -
arch/tile/include/asm/io.h | 1
arch/x86/Kconfig | 1
arch/x86/include/asm/efi.h | 3 +
arch/x86/include/asm/io.h | 17 +++--
arch/x86/kernel/crash_dump_64.c | 6 +-
arch/x86/kernel/kdebugfs.c | 8 +--
arch/x86/kernel/ksysfs.c | 28 ++++-----
arch/x86/mm/ioremap.c | 76 ++++++++++--------------
arch/xtensa/Kconfig | 1
arch/xtensa/include/asm/io.h | 9 ++-
drivers/acpi/apei/einj.c | 9 ++-
drivers/acpi/apei/erst.c | 6 +-
drivers/acpi/nvs.c | 6 +-
drivers/acpi/osl.c | 70 ++++++----------------
drivers/char/toshiba.c | 2 -
drivers/firmware/google/memconsole.c | 7 +-
drivers/gpu/drm/gma500/opregion.c | 2 -
drivers/gpu/drm/i915/intel_opregion.c | 2 -
drivers/iommu/intel-iommu.c | 10 ++-
drivers/iommu/intel_irq_remapping.c | 4 +
drivers/isdn/icn/icn.h | 2 -
drivers/mtd/devices/slram.c | 2 -
drivers/mtd/maps/pxa2xx-flash.c | 4 +
drivers/mtd/nand/diskonchip.c | 2 -
drivers/mtd/onenand/generic.c | 2 -
drivers/nvdimm/Kconfig | 2 -
drivers/pci/probe.c | 3 -
drivers/pnp/manager.c | 2 -
drivers/scsi/aic94xx/aic94xx_init.c | 7 --
drivers/scsi/arcmsr/arcmsr_hba.c | 5 --
drivers/scsi/mvsas/mv_init.c | 15 +----
drivers/scsi/sun3x_esp.c | 2 -
drivers/sfi/sfi_core.c | 4 +
drivers/staging/comedi/drivers/ii_pci20kc.c | 1
drivers/staging/unisys/visorbus/visorchannel.c | 16 +++--
drivers/staging/unisys/visorbus/visorchipset.c | 17 +++--
drivers/tty/serial/8250/8250_core.c | 2 -
drivers/video/fbdev/Kconfig | 2 -
drivers/video/fbdev/amifb.c | 5 +-
drivers/video/fbdev/atafb.c | 5 +-
drivers/video/fbdev/hpfb.c | 6 +-
drivers/video/fbdev/ocfb.c | 1
drivers/video/fbdev/s1d13xxxfb.c | 3 -
drivers/video/fbdev/stifb.c | 1
include/acpi/acpi_io.h | 6 +-
include/asm-generic/io.h | 8 ---
include/asm-generic/iomap.h | 4 -
include/linux/io-mapping.h | 2 -
include/linux/io.h | 9 +++
include/linux/mtd/map.h | 2 -
include/linux/pmem.h | 26 +++++---
include/video/vga.h | 2 -
kernel/Makefile | 2 +
kernel/memremap.c | 74 +++++++++++++++++++++++
kernel/resource.c | 43 +++++++-------
lib/Kconfig | 5 +-
lib/devres.c | 13 +---
lib/pci_iomap.c | 7 +-
tools/testing/nvdimm/Kbuild | 4 +
tools/testing/nvdimm/test/iomap.c | 34 ++++++++---
101 files changed, 482 insertions(+), 398 deletions(-)
create mode 100644 kernel/memremap.c
4 years, 10 months
[PATCH 00/15] get_user_pages() for dax mappings
by Dan Williams
To date, we have implemented two I/O usage models for persistent memory,
PMEM (a persistent "ram disk") and DAX (mmap persistent memory into
userspace). This series adds a third, DAX-GUP, that allows DAX mappings
to be the target of direct-i/o. It allows userspace to coordinate
DMA/RDMA from/to persitent memory.
The implementation leverages the ZONE_DEVICE mm-zone that went into
4.3-rc1 to flag pages that are owned and dynamically mapped by a device
driver. The pmem driver, after mapping a persistent memory range into
the system memmap via devm_memremap_pages(), arranges for DAX to
distinguish pfn-only versus page-backed pmem-pfns via flags in the new
__pfn_t type. The DAX code, upon seeing a PFN_DEV+PFN_MAP flagged pfn,
flags the resulting pte(s) inserted into the process page tables with a
new _PAGE_DEVMAP flag. Later, when get_user_pages() is walking ptes it
keys off _PAGE_DEVMAP to pin the device hosting the page range active.
Finally, get_page() and put_page() are modified to take references
against the device driver established page mapping.
Next step, more testing specifically DAX-get_user_pages() vs truncate.
Patches 1 - 3 are general compilation fixups from 0day-kbuild reports
while developing this series.
Patches 4 - 7 are minor cleanups and reworks of the devm_memremap_* api.
Patches 8 - 10 add a reference counter for pinning the pmem driver
active while it is in use. It turns out, prior to these changes, you
can reliably crash the kernel on shutdown if the pmem device is unbound
while hosting a mounted filesystem.
Patches 11 - 15 use __pfn_t and the _PAGE_DEVMAP flag to implement the
dax-gup path.
This series is built on 4.3-rc2 plus the __dax_pmd_fault fix from Ross:
https://patchwork.kernel.org/patch/7244961/
---
Dan Williams (15):
avr32: convert to asm-generic/memory_model.h
hugetlb: fix compile error on tile
frv: fix compiler warning from definition of __pmd()
x86, mm: quiet arch_add_memory()
pmem: kill memremap_pmem()
devm_memunmap: use devres_release()
devm_memremap: convert to return ERR_PTR
block, dax, pmem: reference counting infrastructure
block, pmem: fix null pointer de-reference on shutdown, check for queue death
block, dax: fix lifetime of in-kernel dax mappings
mm, dax, pmem: introduce __pfn_t
mm, dax, gpu: convert vm_insert_mixed to __pfn_t, introduce _PAGE_DEVMAP
mm, dax: convert vmf_insert_pfn_pmd() to __pfn_t
mm, dax, pmem: introduce {get|put}_dev_pagemap() for dax-gup
mm, x86: get_user_pages() for dax mappings
arch/alpha/include/asm/pgtable.h | 1
arch/avr32/include/asm/page.h | 8 +
arch/frv/include/asm/page.h | 2
arch/ia64/include/asm/pgtable.h | 1
arch/m68k/include/asm/page_no.h | 1
arch/parisc/include/asm/pgtable.h | 1
arch/powerpc/include/asm/pgtable.h | 1
arch/powerpc/sysdev/axonram.c | 10 +
arch/sparc/include/asm/pgtable_64.h | 2
arch/tile/include/asm/pgtable.h | 1
arch/um/include/asm/pgtable-3level.h | 1
arch/x86/include/asm/pgtable.h | 24 ++++
arch/x86/include/asm/pgtable_types.h | 7 +
arch/x86/mm/gup.c | 56 ++++++++
arch/x86/mm/init.c | 4 -
arch/x86/mm/init_64.c | 4 -
arch/x86/mm/pat.c | 4 -
block/blk-core.c | 86 ++++++++++++-
block/blk-mq-sysfs.c | 2
block/blk-mq.c | 48 ++-----
block/blk-sysfs.c | 9 +
block/blk.h | 3
drivers/block/brd.c | 6 -
drivers/gpu/drm/exynos/exynos_drm_gem.c | 3
drivers/gpu/drm/gma500/framebuffer.c | 2
drivers/gpu/drm/msm/msm_gem.c | 3
drivers/gpu/drm/omapdrm/omap_gem.c | 6 +
drivers/gpu/drm/ttm/ttm_bo_vm.c | 3
drivers/nvdimm/pmem.c | 57 +++++---
drivers/s390/block/dcssblk.c | 12 +-
fs/block_dev.c | 2
fs/dax.c | 140 +++++++++++++-------
include/asm-generic/pgtable.h | 6 +
include/linux/blkdev.h | 24 +++-
include/linux/huge_mm.h | 2
include/linux/hugetlb.h | 1
include/linux/io.h | 17 --
include/linux/mm.h | 212 +++++++++++++++++++++++++++++--
include/linux/mm_types.h | 6 +
include/linux/pfn.h | 9 +
include/linux/pmem.h | 26 ----
kernel/memremap.c | 78 +++++++++++
mm/gup.c | 11 +-
mm/huge_memory.c | 10 +
mm/hugetlb.c | 18 ++-
mm/memory.c | 17 +-
mm/swap.c | 15 ++
47 files changed, 729 insertions(+), 233 deletions(-)
5 years, 4 months
[PATCH 0/2] block drivers + dax vs driver unbind
by Dan Williams
Auditing pmem driver teardown operations, while developing
get_user_pages() support for dax [1], revealed that we can trivially
crash the kernel by triggering new i/o requests after unbinding the pmem
driver. In fact, any bio-based driver is susceptible to this crash
because the queue draining done at shutdown uses in flight 'struct
request' objects to pin the queue active.
Solve the problem generically for all drivers and export the new
blk_queue_enter() and blk_queue_exit() helpers for dax to indicate when
the "request queue" is busy (i.e. we are actively using an address
returned by ->direct_access()).
[1]: https://lists.01.org/pipermail/linux-nvdimm/2015-September/002199.html
---
Dan Williams (2):
block: generic request_queue reference counting
block, dax: fix lifetime of in-kernel dax mappings
block/blk-core.c | 71 +++++++++++++++++++++++---
block/blk-mq-sysfs.c | 6 --
block/blk-mq.c | 80 +++++++++---------------------
block/blk-sysfs.c | 3 -
block/blk.h | 12 ++++
fs/dax.c | 130 +++++++++++++++++++++++++++++++-----------------
include/linux/blk-mq.h | 1
include/linux/blkdev.h | 4 +
8 files changed, 185 insertions(+), 122 deletions(-)
5 years, 4 months
[PATCH 0/3] numa allocations for devm and pmem
by Dan Williams
I noticed that pmem should be using blk_alloc_queue_node() and then I
wondered about the devm allocations... So, here is a quick conversion
of devm to use dev_to_node() for node local allocations by default and
as a result pmem to allocate all its driver infrastructure near to the
device.
---
Dan Williams (3):
devm: make allocations numa aware by default
devm_memremap_pages: use numa_mem_id
pmem, memremap: convert to numa aware allocations
drivers/base/devres.c | 19 ++++++++++---------
drivers/nvdimm/pmem.c | 5 +++--
include/linux/device.h | 16 ++++++++++++----
kernel/memremap.c | 9 +++++----
4 files changed, 30 insertions(+), 19 deletions(-)
5 years, 4 months
[PATCH] ext2, ext4: warn when mounting with dax enabled
by Dan Williams
Similar to XFS warn when mounting DAX while it is still considered under
development. Also, aspects of the DAX implementation, for example
synchronization against multiple faults and faults causing block
allocation, depend on the correct implementation in the filesystem. The
maturity of a given DAX implementation is filesystem specific.
Cc: Jan Kara <jack(a)suse.com>
Cc: "Theodore Ts'o" <tytso(a)mit.edu>
Cc: Andreas Dilger <adilger.kernel(a)dilger.ca>
Cc: Matthew Wilcox <willy(a)linux.intel.com>
Cc: linux-ext4(a)vger.kernel.org
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Reported-by: Dave Chinner <david(a)fromorbit.com>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
Given the pending investigations [1], and rather than marking FS_DAX
broken, warn about DAX usage until we've landed fixes for the issues
that Dave has identified.
[1]: https://lists.01.org/pipermail/linux-nvdimm/2015-September/002266.html
fs/ext2/super.c | 2 ++
fs/ext4/super.c | 6 +++++-
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 900e19cf9ef6..2597b0663bf2 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -566,6 +566,8 @@ static int parse_options(char *options, struct super_block *sb)
/* Fall through */
case Opt_dax:
#ifdef CONFIG_FS_DAX
+ ext2_msg(sb, KERN_WARNING,
+ "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
set_opt(sbi->s_mount_opt, DAX);
#else
ext2_msg(sb, KERN_INFO, "dax option not supported");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a63c7b0a10cf..4db6eb0b6979 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1656,8 +1656,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
}
sbi->s_jquota_fmt = m->mount_opt;
#endif
-#ifndef CONFIG_FS_DAX
} else if (token == Opt_dax) {
+#ifdef CONFIG_FS_DAX
+ ext4_msg(sb, KERN_WARNING,
+ "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
+ sbi->s_mount_opt |= m->mount_opt;
+#else
ext4_msg(sb, KERN_INFO, "dax option not supported");
return -1;
#endif
5 years, 4 months
[PATCH] dax: fix deadlock in __dax_fault
by Ross Zwisler
Fix the deadlock exposed by xfstests generic/075. Here is the sequence
that was causing us to deadlock:
1) enter __dax_fault()
2) page = find_get_page() gives us a page, so skip
i_mmap_lock_write(mapping)
3) if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page)
passes, enter this block
4) if (vmf->flags & FAULT_FLAG_WRITE) fails, so do the else case and
i_mmap_unlock_write(mapping);
return dax_load_hole(mapping, page, vmf);
This causes us to up_write() a semaphore that we weren't holding.
The up_write() on a semaphore we didn't down_write() happens twice in
a row, and then the next time we try and i_mmap_lock_write(), we hang.
Signed-off-by: Ross Zwisler <ross.zwisler(a)linux.intel.com>
Reported-by: Dave Chinner <david(a)fromorbit.com>
---
fs/dax.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/dax.c b/fs/dax.c
index 7ae6df7..df1b0ac 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -405,7 +405,8 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
if (error)
goto unlock;
} else {
- i_mmap_unlock_write(mapping);
+ if (!page)
+ i_mmap_unlock_write(mapping);
return dax_load_hole(mapping, page, vmf);
}
}
--
2.1.0
5 years, 4 months
[PATCH v2 0/9] initial struct page support for pmem
by Dan Williams
Changes since v1 [1]:
1/ Several simplifications from Christoph including dropping the __pfn_t
dependency, and merging ZONE_DEVICE into the base arch_add_memory()
implementation.
2/ Drop the deeper changes to the memory hotplug code that enabled
allocating the backing 'struct page' array from pmem (struct
vmem_altmap). This functionality is still needed when large capacity
PMEM devices arrive. However, for now we can take this simple step to
enable struct page mapping in RAM and enable it by default for small
capacity CONFIG_X86_PMEM_LEGACY devices.
3/ A rework of the PMEM api to allow usage of the non-temporal
memcpy_to_pmem() implementation even on platforms without pcommit
instruction support.
[1]: https://lists.01.org/pipermail/linux-nvdimm/2015-August/001809.html
---
When we last left this debate [2] it was becoming clear that the
'page-less' approach left too many I/O scenarios off the table. The
page-less enabling is still useful for avoiding the overhead of struct
page where it is not needed, but in the end, page-backed persistent
memory seems to be a requirement. We confirmed as much at the recently
concluded Persistent Memory Microconference at Linux Plumbers.
Whereas the initial RFC of this functionality enabled userspace to pick
whether struct page is allocated from RAM or PMEM. This new version
only enables RAM-backed for now. This is suitable for existing NVDIMM
devices and a starting point to incrementally build "allocate struct
page from PMEM" support.
[2]: https://lists.01.org/pipermail/linux-nvdimm/2015-May/000748.html
---
Christoph Hellwig (2):
mm: move __phys_to_pfn and __pfn_to_phys to asm/generic/memory_model.h
add devm_memremap_pages
Dan Williams (7):
dax: drop size parameter to ->direct_access()
mm: ZONE_DEVICE for "device memory"
x86, pmem: push fallback handling to arch code
libnvdimm, pfn: 'struct page' provider infrastructure
libnvdimm, pmem: 'struct page' for pmem
libnvdimm, pmem: direct map legacy pmem by default
devm_memremap_pages: protect against pmem device unbind
arch/arm/include/asm/memory.h | 6 -
arch/arm64/include/asm/memory.h | 6 -
arch/ia64/mm/init.c | 4
arch/powerpc/mm/mem.c | 4
arch/powerpc/sysdev/axonram.c | 2
arch/s390/mm/init.c | 2
arch/sh/mm/init.c | 5 -
arch/tile/mm/init.c | 2
arch/unicore32/include/asm/memory.h | 6 -
arch/x86/include/asm/io.h | 2
arch/x86/include/asm/pmem.h | 41 ++++
arch/x86/mm/init_32.c | 4
arch/x86/mm/init_64.c | 4
drivers/acpi/nfit.c | 2
drivers/block/brd.c | 6 -
drivers/nvdimm/Kconfig | 23 ++
drivers/nvdimm/Makefile | 2
drivers/nvdimm/btt.c | 6 -
drivers/nvdimm/btt_devs.c | 172 +-----------------
drivers/nvdimm/claim.c | 201 +++++++++++++++++++++
drivers/nvdimm/e820.c | 1
drivers/nvdimm/namespace_devs.c | 62 +++++-
drivers/nvdimm/nd-core.h | 9 +
drivers/nvdimm/nd.h | 59 ++++++
drivers/nvdimm/pfn.h | 35 ++++
drivers/nvdimm/pfn_devs.c | 337 +++++++++++++++++++++++++++++++++++
drivers/nvdimm/pmem.c | 220 +++++++++++++++++++++--
drivers/nvdimm/region.c | 2
drivers/nvdimm/region_devs.c | 20 ++
drivers/s390/block/dcssblk.c | 4
fs/block_dev.c | 2
include/asm-generic/memory_model.h | 6 +
include/asm-generic/pmem.h | 72 +++++++
include/linux/blkdev.h | 2
include/linux/io.h | 57 ++++++
include/linux/libnvdimm.h | 4
include/linux/memory_hotplug.h | 5 -
include/linux/mmzone.h | 23 ++
include/linux/pmem.h | 73 +-------
kernel/memremap.c | 136 ++++++++++++++
mm/Kconfig | 17 ++
mm/memory_hotplug.c | 14 +
mm/page_alloc.c | 3
tools/testing/nvdimm/Kbuild | 3
tools/testing/nvdimm/test/iomap.c | 13 +
45 files changed, 1369 insertions(+), 310 deletions(-)
create mode 100644 drivers/nvdimm/claim.c
create mode 100644 drivers/nvdimm/pfn.h
create mode 100644 drivers/nvdimm/pfn_devs.c
create mode 100644 include/asm-generic/pmem.h
5 years, 5 months
[PATCH v2] dax: fix NULL pointer in __dax_pmd_fault()
by Ross Zwisler
The following commit:
commit 46c043ede471 ("mm: take i_mmap_lock in unmap_mapping_range() for
DAX")
moved some code in __dax_pmd_fault() that was responsible for zeroing
newly allocated PMD pages. The new location didn't properly set up
'kaddr', though, so when run this code resulted in a NULL pointer BUG.
Fix this by getting the correct 'kaddr' via bdev_direct_access().
Signed-off-by: Ross Zwisler <ross.zwisler(a)linux.intel.com>
Reported-by: Dan Williams <dan.j.williams(a)intel.com>
---
fs/dax.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/fs/dax.c b/fs/dax.c
index 7ae6df7..bcfb14b 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -569,8 +569,20 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE)
goto fallback;
+ sector = bh.b_blocknr << (blkbits - 9);
+
if (buffer_unwritten(&bh) || buffer_new(&bh)) {
int i;
+
+ length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn,
+ bh.b_size);
+ if (length < 0) {
+ result = VM_FAULT_SIGBUS;
+ goto out;
+ }
+ if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
+ goto fallback;
+
for (i = 0; i < PTRS_PER_PMD; i++)
clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE);
wmb_pmem();
@@ -623,7 +635,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
result = VM_FAULT_NOPAGE;
spin_unlock(ptl);
} else {
- sector = bh.b_blocknr << (blkbits - 9);
length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn,
bh.b_size);
if (length < 0) {
--
2.1.0
5 years, 5 months
[PATCH 1/2] ndctl: add TEST_SKIP constant
by Ross Zwisler
To help with readability, since "77" isn't very informative.
Signed-off-by: Ross Zwisler <ross.zwisler(a)linux.intel.com>
---
builtin-test.c | 6 +++---
lib/blk_namespaces.c | 2 +-
lib/pmem_namespaces.c | 2 +-
lib/test-core.c | 4 ++--
lib/test-dpa-alloc.c | 4 ++--
lib/test-libndctl.c | 4 ++--
lib/test-parent-uuid.c | 4 ++--
test.h | 4 ++++
8 files changed, 17 insertions(+), 13 deletions(-)
diff --git a/builtin-test.c b/builtin-test.c
index 9c3b7a8..854984b 100644
--- a/builtin-test.c
+++ b/builtin-test.c
@@ -6,7 +6,7 @@
static char *result(int rc)
{
- if (rc == 77)
+ if (rc == TEST_SKIP)
return "SKIP";
else if (rc)
return "FAIL";
@@ -46,12 +46,12 @@ int cmd_test(int argc, const char **argv)
rc = test_libndctl(loglevel, test);
fprintf(stderr, "test-libndctl: %s\n", result(rc));
- if (rc && rc != 77)
+ if (rc && rc != TEST_SKIP)
return rc;
rc = test_dpa_alloc(loglevel, test);
fprintf(stderr, "test-dpa-alloc: %s\n", result(rc));
- if (rc && rc != 77)
+ if (rc && rc != TEST_SKIP)
return rc;
rc = test_parent_uuid(loglevel, test);
diff --git a/lib/blk_namespaces.c b/lib/blk_namespaces.c
index 968af27..9a07000 100644
--- a/lib/blk_namespaces.c
+++ b/lib/blk_namespaces.c
@@ -227,7 +227,7 @@ int test_blk_namespaces(int log_level)
bus = ndctl_bus_get_by_provider(ctx, provider);
if (!bus) {
fprintf(stderr, "%s: failed to find NFIT-provider\n", comm);
- rc = 77;
+ rc = TEST_SKIP;
goto err_nobus;
} else
fprintf(stderr, "%s: found provider: %s\n", comm,
diff --git a/lib/pmem_namespaces.c b/lib/pmem_namespaces.c
index 17c3019..2fd6e50 100644
--- a/lib/pmem_namespaces.c
+++ b/lib/pmem_namespaces.c
@@ -187,7 +187,7 @@ int test_pmem_namespaces(int log_level)
bus = ndctl_bus_get_by_provider(ctx, provider);
if (!bus) {
fprintf(stderr, "%s: failed to find NFIT-provider\n", comm);
- rc = 77;
+ rc = TEST_SKIP;
goto err;
} else
fprintf(stderr, "%s: found provider: %s\n", comm,
diff --git a/lib/test-core.c b/lib/test-core.c
index 93d02d8..ddf059b 100644
--- a/lib/test-core.c
+++ b/lib/test-core.c
@@ -32,10 +32,10 @@ int ndctl_test_result(struct ndctl_test *test, int rc)
fprintf(stderr, "attempted: %d skipped: %d\n",
ndctl_test_get_attempted(test),
ndctl_test_get_skipped(test));
- if (rc && rc != 77)
+ if (rc && rc != TEST_SKIP)
return rc;
if (ndctl_test_get_skipped(test) >= ndctl_test_get_attempted(test))
- return 77;
+ return TEST_SKIP;
/* return success if no failures and at least one test not skipped */
return 0;
}
diff --git a/lib/test-dpa-alloc.c b/lib/test-dpa-alloc.c
index 6fa88cc..a2ecf94 100644
--- a/lib/test-dpa-alloc.c
+++ b/lib/test-dpa-alloc.c
@@ -298,7 +298,7 @@ int test_dpa_alloc(int loglevel, struct ndctl_test *test)
int err, result = EXIT_FAILURE;
if (!ndctl_test_attempt(test, KERNEL_VERSION(4, 2, 0)))
- return 77;
+ return TEST_SKIP;
err = ndctl_new(&ctx);
if (err < 0)
@@ -317,7 +317,7 @@ int test_dpa_alloc(int loglevel, struct ndctl_test *test)
err = kmod_module_probe_insert_module(mod, KMOD_PROBE_APPLY_BLACKLIST,
NULL, NULL, NULL, NULL);
if (err < 0) {
- result = 77;
+ result = TEST_SKIP;
ndctl_test_skip(test);
fprintf(stderr, "%s unavailable skipping tests\n",
NFIT_TEST_MODULE);
diff --git a/lib/test-libndctl.c b/lib/test-libndctl.c
index 616b0be..88e63e0 100644
--- a/lib/test-libndctl.c
+++ b/lib/test-libndctl.c
@@ -1561,7 +1561,7 @@ int test_libndctl(int loglevel, struct ndctl_test *test)
int err, result = EXIT_FAILURE;
if (!ndctl_test_attempt(test, KERNEL_VERSION(4, 2, 0)))
- return 77;
+ return TEST_SKIP;
err = ndctl_new(&ctx);
if (err < 0)
@@ -1581,7 +1581,7 @@ int test_libndctl(int loglevel, struct ndctl_test *test)
err = kmod_module_probe_insert_module(mod, KMOD_PROBE_APPLY_BLACKLIST,
NULL, NULL, NULL, NULL);
if (err < 0) {
- result = 77;
+ result = TEST_SKIP;
ndctl_test_skip(test);
fprintf(stderr, "%s unavailable skipping tests\n",
NFIT_TEST_MODULE);
diff --git a/lib/test-parent-uuid.c b/lib/test-parent-uuid.c
index 042ea6b..4fd7e1f 100644
--- a/lib/test-parent-uuid.c
+++ b/lib/test-parent-uuid.c
@@ -228,7 +228,7 @@ int test_parent_uuid(int loglevel, struct ndctl_test *test)
int err, result = EXIT_FAILURE;
if (!ndctl_test_attempt(test, KERNEL_VERSION(4, 3, 0)))
- return 77;
+ return TEST_SKIP;
err = ndctl_new(&ctx);
if (err < 0)
@@ -247,7 +247,7 @@ int test_parent_uuid(int loglevel, struct ndctl_test *test)
err = kmod_module_probe_insert_module(mod, KMOD_PROBE_APPLY_BLACKLIST,
NULL, NULL, NULL, NULL);
if (err < 0) {
- result = 77;
+ result = TEST_SKIP;
ndctl_test_skip(test);
fprintf(stderr, "%s unavailable skipping tests\n",
NFIT_TEST_MODULE);
diff --git a/test.h b/test.h
index d58dc88..33598bb 100644
--- a/test.h
+++ b/test.h
@@ -1,5 +1,8 @@
#ifndef __TEST_H__
#define __TEST_H__
+
+#define TEST_SKIP 77
+
struct ndctl_test;
struct ndctl_test;
struct ndctl_test *ndctl_test_new(unsigned int kver);
@@ -19,4 +22,5 @@ int test_libndctl(int loglevel, struct ndctl_test *test);
int test_blk_namespaces(int loglevel);
int test_pmem_namespaces(int loglevel);
int test_pcommit(void);
+
#endif /* __TEST_H__ */
--
2.1.0
5 years, 5 months
[PATCH] dax: fix NULL pointer in __dax_pmd_fault()
by Ross Zwisler
The following commit:
commit 46c043ede471 ("mm: take i_mmap_lock in unmap_mapping_range() for
DAX")
moved some code in __dax_pmd_fault() that was responsible for zeroing
newly allocated PMD pages. The new location didn't properly set up
'kaddr', though, so when run this code resulted in a NULL pointer BUG.
Fix this by getting the correct 'kaddr' via bdev_direct_access(), and
only make the second call to bdev_direct_access() if we don't already
have a PFN from the first call.
Signed-off-by: Ross Zwisler <ross.zwisler(a)linux.intel.com>
Reported-by: Dan Williams <dan.j.williams(a)intel.com>
---
fs/dax.c | 31 ++++++++++++++++++++++---------
1 file changed, 22 insertions(+), 9 deletions(-)
diff --git a/fs/dax.c b/fs/dax.c
index 7ae6df7..08ac2bd 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -532,7 +532,7 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
void __pmem *kaddr;
pgoff_t size, pgoff;
sector_t block, sector;
- unsigned long pfn;
+ unsigned long pfn = 0;
int result = 0;
/* Fall back to PTEs if we're going to COW */
@@ -569,8 +569,20 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE)
goto fallback;
+ sector = bh.b_blocknr << (blkbits - 9);
+
if (buffer_unwritten(&bh) || buffer_new(&bh)) {
int i;
+
+ length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn,
+ bh.b_size);
+ if (length < 0) {
+ result = VM_FAULT_SIGBUS;
+ goto out;
+ }
+ if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
+ goto fallback;
+
for (i = 0; i < PTRS_PER_PMD; i++)
clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE);
wmb_pmem();
@@ -623,15 +635,16 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
result = VM_FAULT_NOPAGE;
spin_unlock(ptl);
} else {
- sector = bh.b_blocknr << (blkbits - 9);
- length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn,
- bh.b_size);
- if (length < 0) {
- result = VM_FAULT_SIGBUS;
- goto out;
+ if (pfn == 0) {
+ length = bdev_direct_access(bh.b_bdev, sector, &kaddr,
+ &pfn, bh.b_size);
+ if (length < 0) {
+ result = VM_FAULT_SIGBUS;
+ goto out;
+ }
+ if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
+ goto fallback;
}
- if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
- goto fallback;
result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write);
}
--
2.1.0
5 years, 5 months