[ndctl PATCH] test: Add device-dax MADV_HWPOISON test
by Dan Williams
Reuse test_dax_poison() for triggering memory_failure() on device-dax
huge/gigantic page mappings.
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
test.h | 4 ++++
test/dax-pmd.c | 34 +++++++++++++++++++++++++---------
test/device-dax.c | 28 +++++++++++++++++++---------
3 files changed, 48 insertions(+), 18 deletions(-)
diff --git a/test.h b/test.h
index 5f2d6293c104..ce873f51f7aa 100644
--- a/test.h
+++ b/test.h
@@ -12,6 +12,8 @@
*/
#ifndef __TEST_H__
#define __TEST_H__
+#include <stdbool.h>
+
struct ndctl_test;
struct ndctl_ctx;
struct ndctl_test *ndctl_test_new(unsigned int kver);
@@ -36,6 +38,8 @@ struct ndctl_ctx;
int test_parent_uuid(int loglevel, struct ndctl_test *test, struct ndctl_ctx *ctx);
int test_multi_pmem(int loglevel, struct ndctl_test *test, struct ndctl_ctx *ctx);
int test_dax_directio(int dax_fd, unsigned long align, void *dax_addr, off_t offset);
+int test_dax_poison(int dax_fd, unsigned long align, void *dax_addr,
+ off_t offset, bool fsdax);
int test_dpa_alloc(int loglevel, struct ndctl_test *test, struct ndctl_ctx *ctx);
int test_dsm_fail(int loglevel, struct ndctl_test *test, struct ndctl_ctx *ctx);
int test_libndctl(int loglevel, struct ndctl_test *test, struct ndctl_ctx *ctx);
diff --git a/test/dax-pmd.c b/test/dax-pmd.c
index abff4f9fd199..65110b7c6a4c 100644
--- a/test/dax-pmd.c
+++ b/test/dax-pmd.c
@@ -27,6 +27,7 @@
#include <test.h>
#include <util/size.h>
#include <linux/fiemap.h>
+#include <stdbool.h>
#define NUM_EXTENTS 5
#define fail() fprintf(stderr, "%s: failed at: %d (%s)\n", \
@@ -217,8 +218,8 @@ static void sigbus_hdl(int sig, siginfo_t *si, void *ptr)
siglongjmp(sj_env, 1);
}
-static int test_dax_poison(int dax_fd, unsigned long align, void *dax_addr,
- off_t offset)
+int test_dax_poison(int dax_fd, unsigned long align, void *dax_addr,
+ off_t offset, bool fsdax)
{
unsigned char *addr = MAP_FAILED;
struct sigaction act;
@@ -226,6 +227,13 @@ static int test_dax_poison(int dax_fd, unsigned long align, void *dax_addr,
void *buf;
int rc;
+ /*
+ * MADV_HWPOISON must be page aligned, and this routine assumes
+ * align is >= 8K
+ */
+ if (align < SZ_2M)
+ return 0;
+
if (posix_memalign(&buf, 4096, 4096) != 0)
return -ENOMEM;
@@ -240,13 +248,15 @@ static int test_dax_poison(int dax_fd, unsigned long align, void *dax_addr,
}
/* dirty the block on disk to bypass the default zero page */
- rc = pwrite(dax_fd, buf, 4096, offset + align / 2);
- if (rc < 4096) {
- fail();
- rc = -ENXIO;
- goto out;
+ if (fsdax) {
+ rc = pwrite(dax_fd, buf, 4096, offset + align / 2);
+ if (rc < 4096) {
+ fail();
+ rc = -ENXIO;
+ goto out;
+ }
+ fsync(dax_fd);
}
- fsync(dax_fd);
addr = mmap(dax_addr, 2*align, PROT_READ|PROT_WRITE,
MAP_SHARED_VALIDATE|MAP_POPULATE|MAP_SYNC, dax_fd, offset);
@@ -281,6 +291,11 @@ clear_error:
goto out;
}
+ if (!fsdax) {
+ rc = 0;
+ goto out;
+ }
+
rc = fallocate(dax_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE,
offset + align / 2, 4096);
if (rc) {
@@ -312,6 +327,7 @@ out:
static int test_pmd(int fd)
{
unsigned long long m_align, p_align, pmd_off;
+ static const bool fsdax = true;
struct fiemap_extent *ext;
void *base, *pmd_addr;
struct fiemap *map;
@@ -371,7 +387,7 @@ static int test_pmd(int fd)
if (rc)
goto err_directio;
- rc = test_dax_poison(fd, HPAGE_SIZE, pmd_addr, pmd_off);
+ rc = test_dax_poison(fd, HPAGE_SIZE, pmd_addr, pmd_off, fsdax);
err_directio:
err_extent:
diff --git a/test/device-dax.c b/test/device-dax.c
index 0a42a327c96d..712c247adfb2 100644
--- a/test/device-dax.c
+++ b/test/device-dax.c
@@ -151,15 +151,6 @@ static int __test_device_dax(unsigned long align, int loglevel,
struct daxctl_region *dax_region;
char *buf, path[100], data[VERIFY_BUF_SIZE];
- memset (&act, 0, sizeof(act));
- act.sa_sigaction = sigbus;
- act.sa_flags = SA_SIGINFO;
-
- if (sigaction(SIGBUS, &act, 0)) {
- perror("sigaction");
- return 1;
- }
-
ndctl_set_log_priority(ctx, loglevel);
ndns = ndctl_get_test_dev(ctx);
@@ -276,6 +267,7 @@ static int __test_device_dax(unsigned long align, int loglevel,
* otherwise not supported.
*/
if (ndctl_test_attempt(test, KERNEL_VERSION(4, 9, 0))) {
+ static const bool devdax = false;
int fd2;
rc = test_dax_directio(fd, align, NULL, 0);
@@ -285,6 +277,15 @@ static int __test_device_dax(unsigned long align, int loglevel,
goto out;
}
+ fprintf(stderr, "%s: test dax poison\n",
+ ndctl_namespace_get_devname(ndns));
+ rc = test_dax_poison(fd, align, NULL, 0, devdax);
+ if (rc) {
+ fprintf(stderr, "%s: failed dax poison\n",
+ ndctl_namespace_get_devname(ndns));
+ goto out;
+ }
+
fd2 = open("/proc/self/smaps", O_RDONLY);
if (fd2 < 0) {
fprintf(stderr, "%s: failed smaps open\n",
@@ -312,6 +313,15 @@ static int __test_device_dax(unsigned long align, int loglevel,
goto out;
}
+ memset(&act, 0, sizeof(act));
+ act.sa_sigaction = sigbus;
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGBUS, &act, 0)) {
+ perror("sigaction");
+ rc = EXIT_FAILURE;
+ goto out;
+ }
+
/* test fault after device-dax instance disabled */
if (sigsetjmp(sj_env, 1)) {
/* got sigbus, success */
2 years, 8 months
[ndctl PATCH v4] ndctl: add an api for getting the ars_status overflow flag
by Vishal Verma
The ARS status command defines a 'flags' field that wasn't being exposed
via an API yet. Since there is only one flag defined in ACPI 6.2, add a
helper for retrieving it (overflow flag).
Reported-by: Jacek Zloch <jacek.zloch(a)intel.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Reviewed-by: Dan Williams <dan.j.williams(a)intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma(a)intel.com>
---
ndctl/lib/ars.c | 11 +++++++++++
ndctl/lib/libndctl.sym | 1 +
ndctl/lib/private.h | 3 +++
ndctl/libndctl.h | 1 +
4 files changed, 16 insertions(+)
v4: move the flag mask to private.h (Dan)
diff --git a/ndctl/lib/ars.c b/ndctl/lib/ars.c
index e04b51e..c78e3bf 100644
--- a/ndctl/lib/ars.c
+++ b/ndctl/lib/ars.c
@@ -269,6 +269,17 @@ NDCTL_EXPORT unsigned long long ndctl_cmd_ars_get_record_len(
return ars_stat->ars_status->records[rec_index].length;
}
+NDCTL_EXPORT int ndctl_cmd_ars_stat_get_flag_overflow(
+ struct ndctl_cmd *ars_stat)
+{
+ struct ndctl_ctx *ctx = ndctl_bus_get_ctx(cmd_to_bus(ars_stat));
+
+ if (!validate_ars_stat(ctx, ars_stat))
+ return -EINVAL;
+
+ return !!(ars_stat->ars_status->flags & ND_ARS_STAT_FLAG_OVERFLOW);
+}
+
NDCTL_EXPORT struct ndctl_cmd *ndctl_bus_cmd_new_clear_error(
unsigned long long address, unsigned long long len,
struct ndctl_cmd *ars_cap)
diff --git a/ndctl/lib/libndctl.sym b/ndctl/lib/libndctl.sym
index c1228e5..e939993 100644
--- a/ndctl/lib/libndctl.sym
+++ b/ndctl/lib/libndctl.sym
@@ -365,4 +365,5 @@ global:
ndctl_cmd_ars_cap_get_clear_unit;
ndctl_namespace_inject_error2;
ndctl_namespace_uninject_error2;
+ ndctl_cmd_ars_stat_get_flag_overflow;
} LIBNDCTL_15;
diff --git a/ndctl/lib/private.h b/ndctl/lib/private.h
index 73bbeed..b756b74 100644
--- a/ndctl/lib/private.h
+++ b/ndctl/lib/private.h
@@ -278,6 +278,9 @@ struct ndctl_bb {
struct list_node list;
};
+/* ars_status flags */
+#define ND_ARS_STAT_FLAG_OVERFLOW (1 << 0)
+
struct ndctl_dimm_ops {
const char *(*cmd_desc)(int);
struct ndctl_cmd *(*new_smart)(struct ndctl_dimm *);
diff --git a/ndctl/libndctl.h b/ndctl/libndctl.h
index be997ac..9270bae 100644
--- a/ndctl/libndctl.h
+++ b/ndctl/libndctl.h
@@ -210,6 +210,7 @@ struct ndctl_cmd *ndctl_bus_cmd_new_clear_error(unsigned long long address,
unsigned long long ndctl_cmd_clear_error_get_cleared(
struct ndctl_cmd *clear_err);
unsigned int ndctl_cmd_ars_cap_get_clear_unit(struct ndctl_cmd *ars_cap);
+int ndctl_cmd_ars_stat_get_flag_overflow(struct ndctl_cmd *ars_stat);
/*
* Note: ndctl_cmd_smart_get_temperature is an alias for
--
2.17.0
2 years, 8 months
[ndctl PATCH v3] ndctl: add an api for getting the ars_status overflow flag
by Vishal Verma
The ARS status command defines a 'flags' field that wasn't being exposed
via an API yet. Since there is only one flag defined in ACPI 6.2, add a
helper for retrieving it (overflow flag).
Reported-by: Jacek Zloch <jacek.zloch(a)intel.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma(a)intel.com>
---
ndctl/lib/ars.c | 11 +++++++++++
ndctl/lib/libndctl.sym | 1 +
ndctl/libndctl.h | 4 ++++
3 files changed, 16 insertions(+)
v3: ensure we can only return 0, 1, or -error from this interface. (Dan)
diff --git a/ndctl/lib/ars.c b/ndctl/lib/ars.c
index e04b51e..c78e3bf 100644
--- a/ndctl/lib/ars.c
+++ b/ndctl/lib/ars.c
@@ -269,6 +269,17 @@ NDCTL_EXPORT unsigned long long ndctl_cmd_ars_get_record_len(
return ars_stat->ars_status->records[rec_index].length;
}
+NDCTL_EXPORT int ndctl_cmd_ars_stat_get_flag_overflow(
+ struct ndctl_cmd *ars_stat)
+{
+ struct ndctl_ctx *ctx = ndctl_bus_get_ctx(cmd_to_bus(ars_stat));
+
+ if (!validate_ars_stat(ctx, ars_stat))
+ return -EINVAL;
+
+ return !!(ars_stat->ars_status->flags & ND_ARS_STAT_FLAG_OVERFLOW);
+}
+
NDCTL_EXPORT struct ndctl_cmd *ndctl_bus_cmd_new_clear_error(
unsigned long long address, unsigned long long len,
struct ndctl_cmd *ars_cap)
diff --git a/ndctl/lib/libndctl.sym b/ndctl/lib/libndctl.sym
index c1228e5..e939993 100644
--- a/ndctl/lib/libndctl.sym
+++ b/ndctl/lib/libndctl.sym
@@ -365,4 +365,5 @@ global:
ndctl_cmd_ars_cap_get_clear_unit;
ndctl_namespace_inject_error2;
ndctl_namespace_uninject_error2;
+ ndctl_cmd_ars_stat_get_flag_overflow;
} LIBNDCTL_15;
diff --git a/ndctl/libndctl.h b/ndctl/libndctl.h
index be997ac..3d141a6 100644
--- a/ndctl/libndctl.h
+++ b/ndctl/libndctl.h
@@ -187,6 +187,9 @@ int ndctl_dimm_is_enabled(struct ndctl_dimm *dimm);
int ndctl_dimm_disable(struct ndctl_dimm *dimm);
int ndctl_dimm_enable(struct ndctl_dimm *dimm);
+/* ars_status flags */
+#define ND_ARS_STAT_FLAG_OVERFLOW (1 << 0)
+
struct ndctl_cmd;
struct ndctl_cmd *ndctl_bus_cmd_new_ars_cap(struct ndctl_bus *bus,
unsigned long long address, unsigned long long len);
@@ -210,6 +213,7 @@ struct ndctl_cmd *ndctl_bus_cmd_new_clear_error(unsigned long long address,
unsigned long long ndctl_cmd_clear_error_get_cleared(
struct ndctl_cmd *clear_err);
unsigned int ndctl_cmd_ars_cap_get_clear_unit(struct ndctl_cmd *ars_cap);
+int ndctl_cmd_ars_stat_get_flag_overflow(struct ndctl_cmd *ars_stat);
/*
* Note: ndctl_cmd_smart_get_temperature is an alias for
--
2.17.0
2 years, 8 months
[ndctl PATCH v2] ndctl: add an api for getting the ars_status overflow flag
by Vishal Verma
The ARS status command defines a 'flags' field that wasn't being exposed
via an API yet. Since there is only one flag defined in ACPI 6.2, add a
helper for retrieving it (overflow flag).
Reported-by: Jacek Zloch <jacek.zloch(a)intel.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma(a)intel.com>
---
ndctl/lib/ars.c | 11 +++++++++++
ndctl/lib/libndctl.sym | 1 +
ndctl/libndctl.h | 4 ++++
3 files changed, 16 insertions(+)
v2: instead of exposing the binary representation of flags, provide a
helper for each flag. ACPI currently defines a single 'overflow' flag.
(Dan)
diff --git a/ndctl/lib/ars.c b/ndctl/lib/ars.c
index e04b51e..b765c88 100644
--- a/ndctl/lib/ars.c
+++ b/ndctl/lib/ars.c
@@ -269,6 +269,17 @@ NDCTL_EXPORT unsigned long long ndctl_cmd_ars_get_record_len(
return ars_stat->ars_status->records[rec_index].length;
}
+NDCTL_EXPORT int ndctl_cmd_ars_stat_get_flag_overflow(
+ struct ndctl_cmd *ars_stat)
+{
+ struct ndctl_ctx *ctx = ndctl_bus_get_ctx(cmd_to_bus(ars_stat));
+
+ if (!validate_ars_stat(ctx, ars_stat))
+ return -EINVAL;
+
+ return (ars_stat->ars_status->flags & ND_ARS_STAT_FLAG_OVERFLOW);
+}
+
NDCTL_EXPORT struct ndctl_cmd *ndctl_bus_cmd_new_clear_error(
unsigned long long address, unsigned long long len,
struct ndctl_cmd *ars_cap)
diff --git a/ndctl/lib/libndctl.sym b/ndctl/lib/libndctl.sym
index c1228e5..e939993 100644
--- a/ndctl/lib/libndctl.sym
+++ b/ndctl/lib/libndctl.sym
@@ -365,4 +365,5 @@ global:
ndctl_cmd_ars_cap_get_clear_unit;
ndctl_namespace_inject_error2;
ndctl_namespace_uninject_error2;
+ ndctl_cmd_ars_stat_get_flag_overflow;
} LIBNDCTL_15;
diff --git a/ndctl/libndctl.h b/ndctl/libndctl.h
index be997ac..3d141a6 100644
--- a/ndctl/libndctl.h
+++ b/ndctl/libndctl.h
@@ -187,6 +187,9 @@ int ndctl_dimm_is_enabled(struct ndctl_dimm *dimm);
int ndctl_dimm_disable(struct ndctl_dimm *dimm);
int ndctl_dimm_enable(struct ndctl_dimm *dimm);
+/* ars_status flags */
+#define ND_ARS_STAT_FLAG_OVERFLOW (1 << 0)
+
struct ndctl_cmd;
struct ndctl_cmd *ndctl_bus_cmd_new_ars_cap(struct ndctl_bus *bus,
unsigned long long address, unsigned long long len);
@@ -210,6 +213,7 @@ struct ndctl_cmd *ndctl_bus_cmd_new_clear_error(unsigned long long address,
unsigned long long ndctl_cmd_clear_error_get_cleared(
struct ndctl_cmd *clear_err);
unsigned int ndctl_cmd_ars_cap_get_clear_unit(struct ndctl_cmd *ars_cap);
+int ndctl_cmd_ars_stat_get_flag_overflow(struct ndctl_cmd *ars_stat);
/*
* Note: ndctl_cmd_smart_get_temperature is an alias for
--
2.17.0
2 years, 8 months
[PATCH v3 1/4] pmem, libnvdimm: complete REQ_FLUSH => REQ_PREFLUSH
by Ross Zwisler
Complete the move from REQ_FLUSH to REQ_PREFLUSH that apparently started
way back in v4.8.
Signed-off-by: Ross Zwisler <ross.zwisler(a)linux.intel.com>
---
drivers/nvdimm/pmem.c | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 9d714926ecf5..252adfab1e47 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -164,11 +164,6 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
return rc;
}
-/* account for REQ_FLUSH rename, replace with REQ_PREFLUSH after v4.8-rc1 */
-#ifndef REQ_FLUSH
-#define REQ_FLUSH REQ_PREFLUSH
-#endif
-
static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
{
blk_status_t rc = 0;
@@ -179,7 +174,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
struct pmem_device *pmem = q->queuedata;
struct nd_region *nd_region = to_region(pmem);
- if (bio->bi_opf & REQ_FLUSH)
+ if (bio->bi_opf & REQ_PREFLUSH)
nvdimm_flush(nd_region);
do_acct = nd_iostat_start(bio, &start);
--
2.14.4
2 years, 8 months
[PATCH v2 1/3] libnvdimm: unconditionally deep flush on *sync
by Ross Zwisler
Prior to this commit we would only do a "deep flush" in response to an
msync/fsync/sync call if the nvdimm_has_cache() returned true at the time
we were setting up the request queue. This happens due to the write cache
value passed in to blk_queue_write_cache(). We do have a "write_cache"
sysfs entry for namespaces, i.e.:
/sys/bus/nd/devices/pfn0.1/block/pmem0/dax/write_cache
which can be used to control whether or not the kernel thinks a given
namespace has a write cache, but this didn't modify the deep flush behavior
that we set up when the driver was initialized. Instead, it only modified
whether or not DAX would flush CPU caches in response to *sync calls.
Simplify this by making the *sync "deep flush" always happen, regardless of
the write cache setting of a namespace. The DAX CPU cache flushing will be
controlled by a combination of the write_cache setting as well as whether
the platform supports flush-on-fail CPU caches.
Signed-off-by: Ross Zwisler <ross.zwisler(a)linux.intel.com>
Suggested-by: Dan Williams <dan.j.williams(a)intel.com>
---
drivers/nvdimm/pmem.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 9d714926ecf5..a152dd9e4134 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -299,7 +299,7 @@ static int pmem_attach_disk(struct device *dev,
{
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
struct nd_region *nd_region = to_nd_region(dev->parent);
- int nid = dev_to_node(dev), fua, wbc;
+ int nid = dev_to_node(dev), fua;
struct resource *res = &nsio->res;
struct resource bb_res;
struct nd_pfn *nd_pfn = NULL;
@@ -335,7 +335,6 @@ static int pmem_attach_disk(struct device *dev,
dev_warn(dev, "unable to guarantee persistence of writes\n");
fua = 0;
}
- wbc = nvdimm_has_cache(nd_region);
if (!devm_request_mem_region(dev, res->start, resource_size(res),
dev_name(&ndns->dev))) {
@@ -382,7 +381,7 @@ static int pmem_attach_disk(struct device *dev,
return PTR_ERR(addr);
pmem->virt_addr = addr;
- blk_queue_write_cache(q, wbc, fua);
+ blk_queue_write_cache(q, true, fua);
blk_queue_make_request(q, pmem_make_request);
blk_queue_physical_block_size(q, PAGE_SIZE);
blk_queue_logical_block_size(q, pmem_sector_size(ndns));
@@ -413,7 +412,7 @@ static int pmem_attach_disk(struct device *dev,
put_disk(disk);
return -ENOMEM;
}
- dax_write_cache(dax_dev, wbc);
+ dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
pmem->dax_dev = dax_dev;
gendev = disk_to_dev(disk);
--
2.14.4
2 years, 8 months
[ndctl PATCH] ndctl: add an api for getting ars_status flags
by Vishal Verma
The ARS status command defines a 'flags' field that wasn't being exposed
via an API yet. Add a new ndctl_cmd_ars_get_flags() helper to retrieve
this field.
Reported-by: Jacek Zloch <jacek.zloch(a)intel.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma(a)intel.com>
---
ndctl/lib/ars.c | 10 ++++++++++
ndctl/lib/libndctl.sym | 1 +
ndctl/libndctl.h | 1 +
3 files changed, 12 insertions(+)
diff --git a/ndctl/lib/ars.c b/ndctl/lib/ars.c
index e04b51e..b0a7ae6 100644
--- a/ndctl/lib/ars.c
+++ b/ndctl/lib/ars.c
@@ -269,6 +269,16 @@ NDCTL_EXPORT unsigned long long ndctl_cmd_ars_get_record_len(
return ars_stat->ars_status->records[rec_index].length;
}
+NDCTL_EXPORT unsigned int ndctl_cmd_ars_get_flags(struct ndctl_cmd *ars_stat)
+{
+ struct ndctl_ctx *ctx = ndctl_bus_get_ctx(cmd_to_bus(ars_stat));
+
+ if (!validate_ars_stat(ctx, ars_stat))
+ return 0;
+
+ return ars_stat->ars_status->flags;
+}
+
NDCTL_EXPORT struct ndctl_cmd *ndctl_bus_cmd_new_clear_error(
unsigned long long address, unsigned long long len,
struct ndctl_cmd *ars_cap)
diff --git a/ndctl/lib/libndctl.sym b/ndctl/lib/libndctl.sym
index c1228e5..9940c7a 100644
--- a/ndctl/lib/libndctl.sym
+++ b/ndctl/lib/libndctl.sym
@@ -365,4 +365,5 @@ global:
ndctl_cmd_ars_cap_get_clear_unit;
ndctl_namespace_inject_error2;
ndctl_namespace_uninject_error2;
+ ndctl_cmd_ars_get_flags;
} LIBNDCTL_15;
diff --git a/ndctl/libndctl.h b/ndctl/libndctl.h
index be997ac..99218e6 100644
--- a/ndctl/libndctl.h
+++ b/ndctl/libndctl.h
@@ -210,6 +210,7 @@ struct ndctl_cmd *ndctl_bus_cmd_new_clear_error(unsigned long long address,
unsigned long long ndctl_cmd_clear_error_get_cleared(
struct ndctl_cmd *clear_err);
unsigned int ndctl_cmd_ars_cap_get_clear_unit(struct ndctl_cmd *ars_cap);
+unsigned int ndctl_cmd_ars_get_flags(struct ndctl_cmd *ars_stat);
/*
* Note: ndctl_cmd_smart_get_temperature is an alias for
--
2.17.0
2 years, 8 months
Re: [Qemu-devel] Questions about vNVDIMM on qemu/KVM
by Pankaj Gupta
Hi,
>
> > I'm investigating status of vNVDIMM on qemu/KVM,
> > and I have some questions about it. I'm glad if anyone answer them.
> >
> > In my understanding, qemu/KVM has a feature to show NFIT for guest,
> > and it will be still updated about platform capability with this patch set.
> > https://lists.gnu.org/archive/html/qemu-devel/2018-05/msg04756.html
> >
> > And libvirt also supports this feature with <memory model='nvdimm'>
> > https://libvirt.org/formatdomain.html#elementsMemory
> >
> >
> > However, virtio-pmem is developing now, and it is better
> > for archtectures to detect regions of NVDIMM without ACPI (like s390x)
> > In addition, It is also necessary to flush guest contents on vNVDIMM
> > who has a backend-file.
> >
> >
> > Q1) Does ACPI.NFIT bus of qemu/kvm remain with virtio-pmem?
No.
> > How do each roles become it if both NFIT and virtio-pmem will be
> > available?
There are two main use cases:
1] DAX memory region pass-through to guest:
-------------------------------------------
As this region is present in actual physical NVDIMM device and exposed to guest,
ACPI/NFIT way is used. If all the persistent memory is used by only this way we
don't need 'virtio-pmem'.
2] Emulated DAX memory region in host passed to guest:
--------------------------------------------------------
If this type of region is exposed to guest, it will be preferable to use
'virtio-pmem'.
This is regular host memory which is mmaped in guest address space for emulating
persistent memory. Guest writes are present in host page cache and not assured to be
written on backing disk without an explicit flush/sync call.
'virtio-pmem' will solve the problem of flushing guest writes present in host page cache.
With filesystems at host which use journal-ling like (ext4, xfs), they automatically call
'fsync' at regular intervals. but still there is not 100% assurance of all write persistence until
an explicit flush is done from guest. So, we need an additional fsync to flush guest writes to
backing disk. We are using this approach to avoid using guest page cache and keep page cache management
of all the guests at host side.
If both ACPI NFIT and virtio-pmem are present, both will have their corresponding memory regions and
defined by memory type "Persistent shared Memory" in case of virtio-pmem and "Persistent Memory" for
ACPI NVDIMM. This is to differentiate both the memory types.
> > If my understanding is correct, both NFIT and virtio-pmem is used to
> > detect vNVDIMM regions, but only one seems to be necessary....
> >
> > Otherwize, is the NFIT bus just for keeping compatibility,
> > and virtio-pmem is promising way?
> >
> >
> > Q2) What bus is(will be?) created for virtio-pmem?
> > I could confirm the bus of NFIT is created with <memory
> > model='nvdimm'>,
For virtio-pmem also its nvdimm bus.
> > and I heard other bus will be created for virtio-pmem, but I could not
> > find what bus is created concretely.
> > ---
> > # ndctl list -B
> > {
> > "provider":"ACPI.NFIT",
> > "dev":"ndbus0"
> > }
> > ---
> >
> > I think it affects what operations user will be able to, and what
> > notification is necessary for vNVDIMM.
> > ACPI defines some operations like namespace controll, and notification
> > for NVDIMM health status or others.
> > (I suppose that other status notification might be necessary for
> > vNVDIMM,
> > but I'm not sure yet...)
For virtio-pmem, we are not providing advance features like namespace and various
other features which ACPI/NVDIMM hardware provides. This is just to keep paravirt
device simple.
Moreover I have not yet looked at ndctl side of things. I am not 100% sure how
ndctl will handle 'virtio-pmem'.
Adding 'Dan' in loop, he can add his thoughts.
Thanks,
Pankaj
> >
> > If my understanding is wrong, please correct me.
> >
> > Thanks,
> > ---
> > Yasunori Goto
> >
> >
> >
>
2 years, 8 months
[ndctl PATCH v3] ndctl: refactor validation of the ars_status command
by Vishal Verma
The APIs that iterate over the information contained in an ars_atatus
command require a prior, successfully completed ars_status command
struct. We were neglecting to verify that the firmware status too
indicates a success. We were also incorrectly requiring that
ars_status->status be zero, where as a positive status only indicates an
underrun. The underrun is fine as the firmware is not expected to
completely fill the max_ars_out sized buffer.
Refactor this checking to mimic validate_ars_cap() which checks the
firmware status, and fixes the check for the cmd status. Use this for
ndctl_cmd_ars_in_progress as well which had the same (incorrect)
cmd->status check.
Reported-by: Tomasz Rochumski <tomasz.rochumski(a)intel.com>
Tested-by: Jacek Zloch <jacek.zloch(a)intel.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma(a)intel.com>
---
ndctl/lib/ars.c | 69 ++++++++++++++++++++++++++++++-------------------
1 file changed, 43 insertions(+), 26 deletions(-)
v3: do cmd validation before dereferencing any of the fields in the
command.
diff --git a/ndctl/lib/ars.c b/ndctl/lib/ars.c
index 1ff6cf7..1e0cfdc 100644
--- a/ndctl/lib/ars.c
+++ b/ndctl/lib/ars.c
@@ -195,24 +195,44 @@ NDCTL_EXPORT unsigned int ndctl_cmd_ars_cap_get_clear_unit(
return 0;
}
+static bool __validate_ars_stat(struct ndctl_cmd *ars_stat)
+{
+ /*
+ * A positive status indicates an underrun, but that is fine since
+ * the firmware is not expected to completely fill the max_ars_out
+ * sized buffer.
+ */
+ if (ars_stat->type != ND_CMD_ARS_STATUS || ars_stat->status < 0)
+ return false;
+ if ((ndctl_cmd_get_firmware_status(ars_stat) & ARS_STATUS_MASK) != 0)
+ return false;
+ return true;
+}
+
+#define validate_ars_stat(ctx, ars_stat) \
+({ \
+ bool __valid = __validate_ars_stat(ars_stat); \
+ if (!__valid) \
+ dbg(ctx, "expected sucessfully completed ars_stat command\n"); \
+ __valid; \
+})
+
NDCTL_EXPORT int ndctl_cmd_ars_in_progress(struct ndctl_cmd *cmd)
{
struct ndctl_ctx *ctx = ndctl_bus_get_ctx(cmd_to_bus(cmd));
- if (cmd->type == ND_CMD_ARS_STATUS && cmd->status == 0) {
- if (cmd->ars_status->status == 1 << 16) {
- /*
- * If in-progress, invalidate the ndctl_cmd, so
- * that if we're called again without a fresh
- * ars_status command, we fail.
- */
- cmd->status = 1;
- return 1;
- }
+ if (!validate_ars_stat(ctx, cmd))
return 0;
- }
- dbg(ctx, "invalid ars_status\n");
+ if (ndctl_cmd_get_firmware_status(cmd) == 1 << 16) {
+ /*
+ * If in-progress, invalidate the ndctl_cmd, so
+ * that if we're called again without a fresh
+ * ars_status command, we fail.
+ */
+ cmd->status = 1;
+ return 1;
+ }
return 0;
}
@@ -220,11 +240,10 @@ NDCTL_EXPORT unsigned int ndctl_cmd_ars_num_records(struct ndctl_cmd *ars_stat)
{
struct ndctl_ctx *ctx = ndctl_bus_get_ctx(cmd_to_bus(ars_stat));
- if (ars_stat->type == ND_CMD_ARS_STATUS && ars_stat->status == 0)
- return ars_stat->ars_status->num_records;
+ if (!validate_ars_stat(ctx, ars_stat))
+ return 0;
- dbg(ctx, "invalid ars_status\n");
- return 0;
+ return ars_stat->ars_status->num_records;
}
NDCTL_EXPORT unsigned long long ndctl_cmd_ars_get_record_addr(
@@ -232,16 +251,15 @@ NDCTL_EXPORT unsigned long long ndctl_cmd_ars_get_record_addr(
{
struct ndctl_ctx *ctx = ndctl_bus_get_ctx(cmd_to_bus(ars_stat));
+ if (!validate_ars_stat(ctx, ars_stat))
+ return 0;
+
if (rec_index >= ars_stat->ars_status->num_records) {
dbg(ctx, "invalid record index\n");
return 0;
}
- if (ars_stat->type == ND_CMD_ARS_STATUS && ars_stat->status == 0)
- return ars_stat->ars_status->records[rec_index].err_address;
-
- dbg(ctx, "invalid ars_status\n");
- return 0;
+ return ars_stat->ars_status->records[rec_index].err_address;
}
NDCTL_EXPORT unsigned long long ndctl_cmd_ars_get_record_len(
@@ -249,16 +267,15 @@ NDCTL_EXPORT unsigned long long ndctl_cmd_ars_get_record_len(
{
struct ndctl_ctx *ctx = ndctl_bus_get_ctx(cmd_to_bus(ars_stat));
+ if (!validate_ars_stat(ctx, ars_stat))
+ return 0;
+
if (rec_index >= ars_stat->ars_status->num_records) {
dbg(ctx, "invalid record index\n");
return 0;
}
- if (ars_stat->type == ND_CMD_ARS_STATUS && ars_stat->status == 0)
- return ars_stat->ars_status->records[rec_index].length;
-
- dbg(ctx, "invalid ars_status\n");
- return 0;
+ return ars_stat->ars_status->records[rec_index].length;
}
NDCTL_EXPORT struct ndctl_cmd *ndctl_bus_cmd_new_clear_error(
--
2.17.0
2 years, 8 months
[PATCH 1/2] libnvdimm: use dax_write_cache* helpers
by Ross Zwisler
Use dax_write_cache() and dax_write_cache_enabled() instead of open coding
the bit operations.
Signed-off-by: Ross Zwisler <ross.zwisler(a)linux.intel.com>
---
drivers/dax/super.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 2b2332b605e4..c2c46f96b18c 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -182,8 +182,7 @@ static ssize_t write_cache_show(struct device *dev,
if (!dax_dev)
return -ENXIO;
- rc = sprintf(buf, "%d\n", !!test_bit(DAXDEV_WRITE_CACHE,
- &dax_dev->flags));
+ rc = sprintf(buf, "%d\n", !!dax_write_cache_enabled(dax_dev));
put_dax(dax_dev);
return rc;
}
@@ -201,10 +200,8 @@ static ssize_t write_cache_store(struct device *dev,
if (rc)
len = rc;
- else if (write_cache)
- set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
else
- clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
+ dax_write_cache(dax_dev, write_cache);
put_dax(dax_dev);
return len;
@@ -286,7 +283,7 @@ EXPORT_SYMBOL_GPL(dax_copy_from_iter);
void arch_wb_cache_pmem(void *addr, size_t size);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
{
- if (unlikely(!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags)))
+ if (unlikely(!dax_write_cache_enabled(dax_dev)))
return;
arch_wb_cache_pmem(addr, size);
--
2.14.4
2 years, 8 months