[PATCH v3 1/2] xfs: fix incorrect argument count check
by Ross Zwisler
t_mmap_dio.c actually requires 4 arguments, not 3 as the current check
enforces:
# ./src/t_mmap_dio
usage: t_mmap_dio <src file> <dest file> <size> <msg>
# ./src/t_mmap_dio one two three
open src(No such file or directory) len 0 (null)
Signed-off-by: Ross Zwisler <ross.zwisler(a)linux.intel.com>
Fixes: 456581661b4d ("xfs: test per-inode DAX flag by IO")
---
src/t_mmap_dio.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/t_mmap_dio.c b/src/t_mmap_dio.c
index 69b9ca8..6c8ca1a 100644
--- a/src/t_mmap_dio.c
+++ b/src/t_mmap_dio.c
@@ -39,7 +39,7 @@ int main(int argc, char **argv)
char *dfile;
unsigned long len, opt;
- if (argc < 4)
+ if (argc < 5)
usage(basename(argv[0]));
while ((opt = getopt(argc, argv, "b")) != -1)
--
2.9.3
5 years
[PATCH v2 1/2] xfs: fix incorrect argument count check
by Ross Zwisler
t_mmap_dio.c actually requires 4 arguments, not 3 as the current check
enforces:
# ./src/t_mmap_dio
usage: t_mmap_dio <src file> <dest file> <size> <msg>
# ./src/t_mmap_dio one two three
open src(No such file or directory) len 0 (null)
Signed-off-by: Ross Zwisler <ross.zwisler(a)linux.intel.com>
Fixes: 456581661b4d ("xfs: test per-inode DAX flag by IO")
---
src/t_mmap_dio.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/t_mmap_dio.c b/src/t_mmap_dio.c
index 69b9ca8..6c8ca1a 100644
--- a/src/t_mmap_dio.c
+++ b/src/t_mmap_dio.c
@@ -39,7 +39,7 @@ int main(int argc, char **argv)
char *dfile;
unsigned long len, opt;
- if (argc < 4)
+ if (argc < 5)
usage(basename(argv[0]));
while ((opt = getopt(argc, argv, "b")) != -1)
--
2.9.3
5 years
[PATCH] pmem: fix a NULL pointer BUG in nd_pmem_notify
by Toshi Kani
The following BUG was observed when nd_pmem_notify() was called
for a BTT device. The use of a pmem_device pointer is not valid
with BTT.
BUG: unable to handle kernel NULL pointer dereference at 0000000000000030
IP: nd_pmem_notify+0x30/0xf0 [nd_pmem]
Call Trace:
nd_device_notify+0x40/0x50
child_notify+0x10/0x20
device_for_each_child+0x50/0x90
nd_region_notify+0x20/0x30
nd_device_notify+0x40/0x50
nvdimm_region_notify+0x27/0x30
acpi_nfit_scrub+0x341/0x590 [nfit]
process_one_work+0x197/0x450
worker_thread+0x4e/0x4a0
kthread+0x109/0x140
Fix nd_pmem_notify() by setting nd_region and badblocks pointers
properly for BTT.
Signed-off-by: Toshi Kani <toshi.kani(a)hpe.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Vishal Verma <vishal.l.verma(a)intel.com>
---
drivers/nvdimm/pmem.c | 37 +++++++++++++++++++++++++------------
1 file changed, 25 insertions(+), 12 deletions(-)
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 5b536be..0fc1826 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -388,12 +388,12 @@ static void nd_pmem_shutdown(struct device *dev)
static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
{
- struct pmem_device *pmem = dev_get_drvdata(dev);
- struct nd_region *nd_region = to_region(pmem);
+ struct nd_region *nd_region;
resource_size_t offset = 0, end_trunc = 0;
struct nd_namespace_common *ndns;
struct nd_namespace_io *nsio;
struct resource res;
+ struct badblocks *bb;
if (event != NVDIMM_REVALIDATE_POISON)
return;
@@ -402,20 +402,33 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
struct nd_btt *nd_btt = to_nd_btt(dev);
ndns = nd_btt->ndns;
- } else if (is_nd_pfn(dev)) {
- struct nd_pfn *nd_pfn = to_nd_pfn(dev);
- struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
+ nd_region = to_nd_region(ndns->dev.parent);
+ nsio = to_nd_namespace_io(&ndns->dev);
+ bb = &nsio->bb;
+ } else {
+ struct pmem_device *pmem = dev_get_drvdata(dev);
- ndns = nd_pfn->ndns;
- offset = pmem->data_offset + __le32_to_cpu(pfn_sb->start_pad);
- end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
- } else
- ndns = to_ndns(dev);
+ nd_region = to_region(pmem);
+ bb = &pmem->bb;
+
+ if (is_nd_pfn(dev)) {
+ struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+ struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
+
+ ndns = nd_pfn->ndns;
+ offset = pmem->data_offset +
+ __le32_to_cpu(pfn_sb->start_pad);
+ end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
+ } else {
+ ndns = to_ndns(dev);
+ }
+
+ nsio = to_nd_namespace_io(&ndns->dev);
+ }
- nsio = to_nd_namespace_io(&ndns->dev);
res.start = nsio->res.start + offset;
res.end = nsio->res.end - end_trunc;
- nvdimm_badblocks_populate(nd_region, &pmem->bb, &res);
+ nvdimm_badblocks_populate(nd_region, bb, &res);
}
MODULE_ALIAS("pmem");
5 years
[PATCH v3] axon_ram: add dax_operations support
by Dan Williams
Setup a dax_device to have the same lifetime as the axon_ram block
device and add a ->direct_access() method that is equivalent to
axon_ram_direct_access(). Once fs/dax.c has been converted to use
dax_operations the old axon_ram_direct_access() will be removed.
Reported-by: Gerald Schaefer <gerald.schaefer(a)de.ibm.com>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
Changes since v2:
* fix return code in the alloc_dax() failure case (Gerald)
arch/powerpc/platforms/Kconfig | 1 +
arch/powerpc/sysdev/axonram.c | 48 +++++++++++++++++++++++++++++++++++-----
2 files changed, 43 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 7e3a2ebba29b..33244e3d9375 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -284,6 +284,7 @@ config CPM2
config AXON_RAM
tristate "Axon DDR2 memory device driver"
depends on PPC_IBM_CELL_BLADE && BLOCK
+ select DAX
default m
help
It registers one block device per Axon's DDR2 memory bank found
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index f523ac883150..171ba86a3494 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -25,6 +25,7 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
+#include <linux/dax.h>
#include <linux/device.h>
#include <linux/errno.h>
#include <linux/fs.h>
@@ -62,6 +63,7 @@ static int azfs_major, azfs_minor;
struct axon_ram_bank {
struct platform_device *device;
struct gendisk *disk;
+ struct dax_device *dax_dev;
unsigned int irq_id;
unsigned long ph_addr;
unsigned long io_addr;
@@ -137,25 +139,47 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
return BLK_QC_T_NONE;
}
+static long
+__axon_ram_direct_access(struct axon_ram_bank *bank, pgoff_t pgoff, long nr_pages,
+ void **kaddr, pfn_t *pfn)
+{
+ resource_size_t offset = pgoff * PAGE_SIZE;
+
+ *kaddr = (void *) bank->io_addr + offset;
+ *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
+ return (bank->size - offset) / PAGE_SIZE;
+}
+
/**
* axon_ram_direct_access - direct_access() method for block device
* @device, @sector, @data: see block_device_operations method
*/
static long
-axon_ram_direct_access(struct block_device *device, sector_t sector,
+axon_ram_blk_direct_access(struct block_device *device, sector_t sector,
void **kaddr, pfn_t *pfn, long size)
{
struct axon_ram_bank *bank = device->bd_disk->private_data;
- loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
- *kaddr = (void *) bank->io_addr + offset;
- *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
- return bank->size - offset;
+ return __axon_ram_direct_access(bank, (sector * 512) / PAGE_SIZE,
+ size / PAGE_SIZE, kaddr, pfn) * PAGE_SIZE;
}
static const struct block_device_operations axon_ram_devops = {
.owner = THIS_MODULE,
- .direct_access = axon_ram_direct_access
+ .direct_access = axon_ram_blk_direct_access
+};
+
+static long
+axon_ram_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
+ void **kaddr, pfn_t *pfn)
+{
+ struct axon_ram_bank *bank = dax_get_private(dax_dev);
+
+ return __axon_ram_direct_access(bank, pgoff, nr_pages, kaddr, pfn);
+}
+
+static const struct dax_operations axon_ram_dax_ops = {
+ .direct_access = axon_ram_dax_direct_access,
};
/**
@@ -219,6 +243,7 @@ static int axon_ram_probe(struct platform_device *device)
goto failed;
}
+
bank->disk->major = azfs_major;
bank->disk->first_minor = azfs_minor;
bank->disk->fops = &axon_ram_devops;
@@ -227,6 +252,13 @@ static int axon_ram_probe(struct platform_device *device)
sprintf(bank->disk->disk_name, "%s%d",
AXON_RAM_DEVICE_NAME, axon_ram_bank_id);
+ bank->dax_dev = alloc_dax(bank, bank->disk->disk_name,
+ &axon_ram_dax_ops);
+ if (!bank->dax_dev) {
+ rc = -ENOMEM;
+ goto failed;
+ }
+
bank->disk->queue = blk_alloc_queue(GFP_KERNEL);
if (bank->disk->queue == NULL) {
dev_err(&device->dev, "Cannot register disk queue\n");
@@ -278,6 +310,8 @@ static int axon_ram_probe(struct platform_device *device)
del_gendisk(bank->disk);
put_disk(bank->disk);
}
+ kill_dax(bank->dax_dev);
+ put_dax(bank->dax_dev);
device->dev.platform_data = NULL;
if (bank->io_addr != 0)
iounmap((void __iomem *) bank->io_addr);
@@ -300,6 +334,8 @@ axon_ram_remove(struct platform_device *device)
device_remove_file(&device->dev, &dev_attr_ecc);
free_irq(bank->irq_id, device);
+ kill_dax(bank->dax_dev);
+ put_dax(bank->dax_dev);
del_gendisk(bank->disk);
put_disk(bank->disk);
iounmap((void __iomem *) bank->io_addr);
5 years
[PATCH] libnvdimm: fix phys_addr for nvdimm_clear_poison
by Toshi Kani
nvdimm_clear_poison() expects a physical address, not an offset.
Fix nsio_rw_bytes() to call nvdimm_clear_poison() with a physical
address.
Signed-off-by: Toshi Kani <toshi.kani(a)hpe.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Dave Jiang <dave.jiang(a)intel.com>
Cc: Vishal Verma <vishal.l.verma(a)intel.com>
---
drivers/nvdimm/claim.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index ca6d572..0b31073 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -254,7 +254,8 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
&& (!ndns->claim || !is_nd_btt(ndns->claim))) {
long cleared;
- cleared = nvdimm_clear_poison(&ndns->dev, offset, size);
+ cleared = nvdimm_clear_poison(&ndns->dev,
+ nsio->res.start + offset, size);
if (cleared < size)
rc = -EIO;
if (cleared > 0 && cleared / 512) {
5 years
[RFC 0/8] Copy Offload with Peer-to-Peer PCI Memory
by Logan Gunthorpe
Hello,
As discussed at LSF/MM we'd like to present our work to enable
copy offload support in NVMe fabrics RDMA targets. We'd appreciate
some review and feedback from the community on our direction.
This series is not intended to go upstream at this point.
The concept here is to use memory that's exposed on a PCI BAR as
data buffers in the NVME target code such that data can be transferred
from an RDMA NIC to the special memory and then directly to an NVMe
device avoiding system memory entirely. The upside of this is better
QoS for applications running on the CPU utilizing memory and lower
PCI bandwidth required to the CPU (such that systems could be designed
with fewer lanes connected to the CPU). However, presently, the trade-off
is currently a reduction in overall throughput. (Largely due to hardware
issues that would certainly improve in the future).
Due to these trade-offs we've designed the system to only enable using
the PCI memory in cases where the NIC, NVMe devices and memory are all
behind the same PCI switch. This will mean many setups that could likely
work well will not be supported so that we can be more confident it
will work and not place any responsibility on the user to understand
their topology. (We've chosen to go this route based on feedback we
received at LSF).
In order to enable this functionality we introduce a new p2pmem device
which can be instantiated by PCI drivers. The device will register some
PCI memory as ZONE_DEVICE and provide an genalloc based allocator for
users of these devices to get buffers. We give an example of enabling
p2p memory with the cxgb4 driver, however currently these devices have
some hardware issues that prevent their use so we will likely be
dropping this patch in the future. Ideally, we'd want to enable this
functionality with NVME CMB buffers, however we don't have any hardware
with this feature at this time.
In nvmet-rdma, we attempt to get an appropriate p2pmem device at
queue creation time and if a suitable one is found we will use it for
all the (non-inlined) memory in the queue. An 'allow_p2pmem' configfs
attribute is also created which is required to be set before any p2pmem
is attempted.
This patchset also includes a more controversial patch which provides an
interface for userspace to obtain p2pmem buffers through an mmap call on
a cdev. This enables userspace to fairly easily use p2pmem with RDMA and
O_DIRECT interfaces. However, the user would be entirely responsible for
knowing what their doing and inspecting sysfs to understand the pci
topology and only using it in sane situations.
Thanks,
Logan
Logan Gunthorpe (6):
Introduce Peer-to-Peer memory (p2pmem) device
nvmet: Use p2pmem in nvme target
scatterlist: Modify SG copy functions to support io memory.
nvmet: Be careful about using iomem accesses when dealing with p2pmem
p2pmem: Support device removal
p2pmem: Added char device user interface
Steve Wise (2):
cxgb4: setup pcie memory window 4 and create p2pmem region
p2pmem: Add debugfs "stats" file
drivers/memory/Kconfig | 5 +
drivers/memory/Makefile | 2 +
drivers/memory/p2pmem.c | 697 ++++++++++++++++++++++++
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 3 +
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 97 +++-
drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 5 +
drivers/nvme/target/configfs.c | 31 ++
drivers/nvme/target/core.c | 18 +-
drivers/nvme/target/fabrics-cmd.c | 28 +-
drivers/nvme/target/nvmet.h | 2 +
drivers/nvme/target/rdma.c | 183 +++++--
drivers/scsi/scsi_debug.c | 7 +-
include/linux/p2pmem.h | 120 ++++
include/linux/scatterlist.h | 7 +-
lib/scatterlist.c | 64 ++-
15 files changed, 1189 insertions(+), 80 deletions(-)
create mode 100644 drivers/memory/p2pmem.c
create mode 100644 include/linux/p2pmem.h
--
2.1.4
5 years
[RFC] nvdimm: Unitialized variable used in DSM calls
by Jerry Hoemann
nd_cmd_out_size is called by __nd_ioctl to size the buffer passed to
acpi_nfit_ctl. If the DSM function being called has a variable
sized output, nd_cmd_out_size will look at the return field to
determine buffer size. However, the DSM call hasn't been made yet,
so output size is core residue.
Have nd_cmd_out_size be bimodal with version "early" to be called
before the DSM call is made. For variable sized output fields
have it return ND_IOCTL_MAX_BUFLEN. __nd_ioctl sees new return
values and adjust buffer size accordingly.
The downside to this approach are:
1) Requires user buffer input size to be ND_IOCTL_MAX_BUFLEN (4 MB) for
calls to DSM with variable return.
2) The needless copyin of 4MB
An alternative approach (not yet prototyped) would move the call
to nd_cmd_out_size until after the return from acpi_nfit_ctl. Here,
the call has been made and return size would be known.
The size of the buffer allocated in would always be ND_IOCTL_MAX_BUFLEN.
Signed-off-by: Jerry Hoemann <jerry.hoemann(a)hpe.com>
---
drivers/nvdimm/bus.c | 30 ++++++++++++++++++++++++------
1 file changed, 24 insertions(+), 6 deletions(-)
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 23d4a17..50f8cc6 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -713,9 +713,9 @@ u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
}
EXPORT_SYMBOL_GPL(nd_cmd_in_size);
-u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
+u32 nd_cmd_out_size_early(struct nvdimm *nvdimm, int cmd,
const struct nd_cmd_desc *desc, int idx, const u32 *in_field,
- const u32 *out_field, unsigned long remainder)
+ const u32 *out_field, unsigned long remainder, int early)
{
if (idx >= desc->out_num)
return UINT_MAX;
@@ -725,9 +725,13 @@ u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && idx == 1)
return in_field[1];
- else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2)
+ else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2) {
+ if (early)
+ return ND_IOCTL_MAX_BUFLEN;
return out_field[1];
- else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 2) {
+ } else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 2) {
+ if (early)
+ return ND_IOCTL_MAX_BUFLEN;
/*
* Per table 9-276 ARS Data in ACPI 6.1, out_field[1] is
* "Size of Output Buffer in bytes, including this
@@ -753,6 +757,13 @@ u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
return UINT_MAX;
}
+
+u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
+ const struct nd_cmd_desc *desc, int idx, const u32 *in_field,
+ const u32 *out_field, unsigned long remainder)
+{
+ return nd_cmd_out_size_early(nvdimm, cmd, desc, idx, in_field, out_field, remainder, 0);
+}
EXPORT_SYMBOL_GPL(nd_cmd_out_size);
void wait_nvdimm_bus_probe_idle(struct device *dev)
@@ -890,10 +901,17 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
/* process an output envelope */
for (i = 0; i < desc->out_num; i++) {
- u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i,
- (u32 *) in_env, (u32 *) out_env, 0);
+ u32 out_size = nd_cmd_out_size_early(nvdimm, cmd, desc, i,
+ (u32 *) in_env, (u32 *) out_env, 0, 1);
u32 copy;
+ if (out_size == ND_IOCTL_MAX_BUFLEN) {
+ /* variable sized output */
+ out_len = out_size;
+ out_len -= in_len;
+ break;
+ }
+
if (out_size == UINT_MAX) {
dev_dbg(dev, "%s:%s unknown output size cmd: %s field: %d\n",
__func__, dimm_name, cmd_name, i);
--
1.8.3.1
5 years
[4.4-stable PATCH 0/2] libnvdimm: stable fixes for 4.4
by Dan Williams
Hi -stable team,
Here is a backport for commit 11e63f6d920d "x86, pmem: fix broken
__copy_user_nocache cache-bypass assumptions", and another block layer
fix that allows the libnvdimm unit tests to run.
I have copied Jens and Jan on the block-layer fix in case they have any
concerns.
---
Dan Williams (2):
x86, pmem: fix broken __copy_user_nocache cache-bypass assumptions
block: fix del_gendisk() vs blkdev_ioctl crash
arch/x86/include/asm/pmem.h | 45 +++++++++++++++++++++++++++++++------------
block/genhd.c | 1 -
2 files changed, 32 insertions(+), 14 deletions(-)
5 years