[ndctl PATCH] test, btt-check: bump supported version to 4.13
by Dan Williams
Given that the kernel needs to carry the BTT error clearing workaround
for another cycle [1], bump the default expectations of this test to
4.13. It is worth noting that the nfit_test bus provider does not suffer
from the same sleeping while atomic issue in its emulation of the "clear
error DSM", so this kernel band-aid can be reverted when testing through
that path.
[1]: https://patchwork.kernel.org/patch/9706741/
Cc: Vishal Verma <vishal.l.verma(a)intel.com>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
test/btt-check.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/test/btt-check.sh b/test/btt-check.sh
index 2dde2461ec81..4c328fecbe88 100755
--- a/test/btt-check.sh
+++ b/test/btt-check.sh
@@ -43,7 +43,7 @@ check_min_kver()
[[ "$ver" == "$(echo -e "$ver\n$KVER" | sort -V | head -1)" ]]
}
-check_min_kver "4.12" || { echo "kernel $KVER may not support badblocks clearing on pmem via btt"; exit $rc; }
+check_min_kver "4.13" || { echo "kernel $KVER may not support badblocks clearing on pmem via btt"; exit $rc; }
create()
{
3 years, 8 months
[PATCH v2] ndctl: add clear error support for ndctl
by Dave Jiang
Adding ndctl support that will allow clearing of bad blocks for a device.
Initial implementation will only support device dax devices. The ndctl
takes a device path and parameters of the starting bad block, and the number
of bad blocks to clear.
Signed-off-by: Dave Jiang <dave.jiang(a)intel.com>
---
v2: Addressed comments from Vishal
- added bounds checking for the badblocks region.
- updated verbiage to use badblocks instead of poison.
- set default len to 1.
- fixed error out for stat
- fixed error out that was copy/paste error
- remove duplicate check_min_kver() in shell script
- fixed logic of checking empty badblocks
Documentation/Makefile.am | 1 +
Documentation/ndctl-clear-error.txt | 37 ++++++
builtin.h | 1 +
ndctl/Makefile.am | 1 +
ndctl/clear-error.c | 233 ++++++++++++++++++++++++++++++++++++
ndctl/lib/libndctl.c | 73 +++++++++++
ndctl/lib/libndctl.sym | 2 +
ndctl/libndctl.h.in | 10 ++
ndctl/ndctl.c | 4 +-
test/Makefile.am | 1 +
test/ndctl-clear-error-dax.sh | 68 +++++++++++
11 files changed, 430 insertions(+), 1 deletion(-)
create mode 100755 test/ndctl-clear-error-dax.sh
diff --git a/Documentation/Makefile.am b/Documentation/Makefile.am
index d72085d..7bf1caa 100644
--- a/Documentation/Makefile.am
+++ b/Documentation/Makefile.am
@@ -14,6 +14,7 @@ man1_MANS = \
ndctl-create-namespace.1 \
ndctl-destroy-namespace.1 \
ndctl-check-namespace.1 \
+ ndctl-clear-error.1 \
ndctl-list.1 \
daxctl-list.1
diff --git a/Documentation/ndctl-clear-error.txt b/Documentation/ndctl-clear-error.txt
new file mode 100644
index 0000000..ccff6ca
--- /dev/null
+++ b/Documentation/ndctl-clear-error.txt
@@ -0,0 +1,37 @@
+ndctl-clear-error(1)
+====================
+
+NAME
+----
+ndctl-clear-error - clear badblocks for a device
+
+SYNOPSIS
+--------
+[verse]
+'ndctl clear-error' [<options>]
+
+EXAMPLES
+--------
+
+Clear poison (bad blocks) for the provided device
+[verse]
+ndctl clear-error -f /dev/dax0.0 -s 0 -l 8
+
+Clear poison (bad blocks) at block offset 0 for 8 blocks on device /dev/dax0.0
+
+OPTIONS
+-------
+-f::
+--file::
+ The device/file to be cleared of poison (bad blocks).
+
+-s::
+--start::
+ The offset where the poison (bad block) starts for this device.
+ Typically this is acquired from the sysfs badblocks file.
+
+-l::
+--len::
+ The number of badblocks to clear in size of 512 bytes increments.
+
+
diff --git a/builtin.h b/builtin.h
index a8bc848..f522d00 100644
--- a/builtin.h
+++ b/builtin.h
@@ -30,4 +30,5 @@ int cmd_test(int argc, const char **argv, void *ctx);
#ifdef ENABLE_DESTRUCTIVE
int cmd_bat(int argc, const char **argv, void *ctx);
#endif
+int cmd_clear_error(int argc, const char **argv, void *ctx);
#endif /* _NDCTL_BUILTIN_H_ */
diff --git a/ndctl/Makefile.am b/ndctl/Makefile.am
index d346c04..8123169 100644
--- a/ndctl/Makefile.am
+++ b/ndctl/Makefile.am
@@ -11,6 +11,7 @@ ndctl_SOURCES = ndctl.c \
../util/log.c \
list.c \
test.c \
+ clear-error.c \
../util/json.c
if ENABLE_SMART
diff --git a/ndctl/clear-error.c b/ndctl/clear-error.c
new file mode 100644
index 0000000..33d930a
--- /dev/null
+++ b/ndctl/clear-error.c
@@ -0,0 +1,233 @@
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <string.h>
+#include <limits.h>
+#include <ccan/short_types/short_types.h>
+#include <ccan/array_size/array_size.h>
+#include <util/filter.h>
+#include <util/parse-options.h>
+#include <util/log.h>
+#include <ndctl/libndctl.h>
+#include <ndctl.h>
+
+struct clear_err {
+ const char *dev_name;
+ u64 bb_start;
+ unsigned int bb_len;
+ struct ndctl_cmd *ars_cap;
+ struct ndctl_cmd *clear_err;
+ struct ndctl_bus *bus;
+ struct ndctl_region *region;
+ struct ndctl_dax *dax;
+ struct ndctl_ctx *ctx;
+} clear_err;
+
+static int send_clear_error(struct ndctl_bus *bus, u64 start, u64 size)
+{
+ u64 cleared;
+ int rc;
+
+ clear_err.clear_err = ndctl_bus_cmd_new_clear_error(
+ start, size, clear_err.ars_cap);
+ if (!clear_err.clear_err) {
+ fprintf(stderr, "%s: bus: %s failed to create cmd\n",
+ __func__, ndctl_bus_get_provider(bus));
+ return -ENXIO;
+ }
+
+ rc = ndctl_cmd_submit(clear_err.clear_err);
+ if (rc) {
+ fprintf(stderr, "%s: bus: %s failed to submit cmd: %d\n",
+ __func__, ndctl_bus_get_provider(bus), rc);
+ ndctl_cmd_unref(clear_err.clear_err);
+ return rc;
+ }
+
+ cleared = ndctl_cmd_clear_error_get_cleared(clear_err.clear_err);
+ if (cleared != size) {
+ fprintf(stderr, "%s: bus: %s expected to clear: %ld actual: %ld\
+n",
+ __func__, ndctl_bus_get_provider(bus),
+ size, cleared);
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+static int get_ars_cap(struct ndctl_bus *bus, u64 start, u64 size)
+{
+ int rc;
+
+ clear_err.ars_cap = ndctl_bus_cmd_new_ars_cap(bus, start, size);
+ if (!clear_err.ars_cap) {
+ fprintf(stderr, "%s: bus: %s failed to create cmd\n",
+ __func__, ndctl_bus_get_provider(bus));
+ return -ENOTTY;
+ }
+
+ rc = ndctl_cmd_submit(clear_err.ars_cap);
+ if (rc) {
+ fprintf(stderr, "%s: bus: %s failed to submit cmd: %d\n",
+ __func__, ndctl_bus_get_provider(bus), rc);
+ ndctl_cmd_unref(clear_err.ars_cap);
+ return rc;
+ }
+
+ if (ndctl_cmd_ars_cap_get_size(clear_err.ars_cap) <
+ sizeof(struct nd_cmd_ars_status)){
+ fprintf(stderr, "%s: bus: %s expected size >= %zd got: %d\n",
+ __func__, ndctl_bus_get_provider(bus),
+ sizeof(struct nd_cmd_ars_status),
+ ndctl_cmd_ars_cap_get_size(clear_err.ars_cap));
+ ndctl_cmd_unref(clear_err.ars_cap);
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+static int match_dev(struct clear_err *ce, char *dev_name)
+{
+ ndctl_bus_foreach(ce->ctx, ce->bus) {
+ ndctl_region_foreach(ce->bus, ce->region) {
+ ndctl_dax_foreach(ce->region, ce->dax) {
+ if (strncmp(basename(dev_name),
+ ndctl_dax_get_devname(ce->dax), 256)
+ == 0) {
+ return 0;
+ }
+ }
+ }
+ }
+
+ return -ENODEV;
+}
+
+static int check_user_input_range(struct ndctl_region *region,
+ unsigned long long start, unsigned int len)
+{
+ struct badblock *bb;
+ int fit = 0;
+
+ ndctl_region_badblock_foreach(region, bb) {
+ if (start >= bb->offset &&
+ start + len <= bb->offset + bb->len) {
+ fit = 1;
+ break;
+ }
+ }
+
+ return fit;
+}
+
+static int clear_error(struct clear_err *ce)
+{
+ struct stat stats;
+ int rc;
+ char dev_name[256];
+ uint64_t base;
+ unsigned long long start;
+ unsigned int len;
+
+ strncpy(dev_name, ce->dev_name, 256);
+
+ rc = stat(dev_name, &stats);
+ if (rc < 0) {
+ perror("stat failed");
+ fprintf(stderr, "Unable to stat %s\n", dev_name);
+ return -1;
+ }
+
+ if (!S_ISCHR(stats.st_mode)) {
+ fprintf(stderr, "%s not DAX device\n", dev_name);
+ return -1;
+ }
+
+ rc = ndctl_new(&ce->ctx);
+ if (rc)
+ return rc;
+
+ if ((rc = match_dev(ce, dev_name)) < 0)
+ goto cleanup;
+
+ base = ndctl_region_get_resource(ce->region);
+ if (base == ULLONG_MAX) {
+ rc = -ERANGE;
+ goto cleanup;
+ }
+
+ if (check_user_input_range(ce->region, clear_err.bb_start,
+ clear_err.bb_len) == 0) {
+ rc = -EINVAL;
+ goto cleanup;
+ }
+
+ start = base + clear_err.bb_start * 512;
+ len = clear_err.bb_len * 512;
+
+ rc = get_ars_cap(ce->bus, start, len);
+ if (rc) {
+ fprintf(stderr, "get_ars_cap failed\n");
+ goto cleanup;
+ }
+
+ rc = send_clear_error(ce->bus, start, len);
+ if (rc) {
+ fprintf(stderr, "send_clear_error failed\n");
+ goto cleanup;
+ }
+
+ rc = 0;
+
+cleanup:
+ ndctl_unref(ce->ctx);
+ return rc;
+}
+
+int cmd_clear_error(int argc, const char **argv, void *ctx)
+{
+ int i, rc;
+ const char * const u[] = {
+ "ndctl clear-error [<options>]",
+ NULL
+ };
+ const struct option options[] = {
+ OPT_STRING('f', "file", &clear_err.dev_name, "device-name",
+ "device/file name to be operated on"),
+ OPT_U64('s', "start", &clear_err.bb_start,
+ "badblock start"),
+ OPT_UINTEGER('l', "len", &clear_err.bb_len, "badblock length"),
+ OPT_END(),
+ };
+
+ argc = parse_options(argc, argv, options, u, 0);
+
+ for (i = 0; i < argc; i++)
+ error("unknown parameter \"%s\"\n", argv[i]);
+
+ if (argc)
+ usage_with_options(u, options);
+
+ if (!clear_err.dev_name) {
+ error("missing device/file name passed in\n");
+ usage_with_options(u, options);
+ return -EINVAL;
+ }
+
+ if (clear_err.bb_len == 0)
+ clear_err.bb_len = 1;
+
+ rc = clear_error(&clear_err);
+ if (rc)
+ return rc;
+
+ return 0;
+}
diff --git a/ndctl/lib/libndctl.c b/ndctl/lib/libndctl.c
index ac1fc63..7399729 100644
--- a/ndctl/lib/libndctl.c
+++ b/ndctl/lib/libndctl.c
@@ -229,6 +229,8 @@ struct ndctl_region {
int state;
unsigned long long cookie;
} iset;
+ FILE *badblocks;
+ struct badblock bb;
};
/**
@@ -1867,6 +1869,77 @@ NDCTL_EXPORT struct ndctl_dimm *ndctl_region_get_next_dimm(struct ndctl_region *
return NULL;
}
+static int regions_badblocks_init(struct ndctl_region *region)
+{
+ struct ndctl_ctx *ctx = ndctl_region_get_ctx(region);
+ struct ndctl_bus *bus = ndctl_region_get_bus(region);
+ char *bb_path;
+ int rc = 0;
+
+ /* if the file is already opened */
+ if (region->badblocks) {
+ fclose(region->badblocks);
+ region->badblocks = NULL;
+ }
+
+ if (asprintf(&bb_path, "/sys/devices/platform/%s/%s/%s/badblocks",
+ ndctl_bus_get_provider(bus),
+ ndctl_bus_get_devname(bus),
+ ndctl_region_get_devname(region)) < 0) {
+ rc = -errno;
+ err(ctx, "region badblocks path allocation failure\n");
+ return rc;
+ }
+
+ region->badblocks = fopen(bb_path, "r");
+ if (!region->badblocks) {
+ rc = -errno;
+ err(ctx, "region badblocks fopen failed\n");
+ return -rc;
+ }
+
+ free(bb_path);
+ return rc;
+}
+
+NDCTL_EXPORT struct badblock *ndctl_region_get_next_badblock(struct ndctl_region *region)
+{
+ int rc;
+ char *buf = NULL;
+ size_t rlen = 0;
+
+ if (!region->badblocks)
+ return NULL;
+
+ rc = getline(&buf, &rlen, region->badblocks);
+ if (rc == -1)
+ return NULL;
+
+ rc = sscanf(buf, "%llu %u", ®ion->bb.offset, ®ion->bb.len);
+ free(buf);
+ if (rc == EOF) {
+ /* end of the road, clean up */
+ fclose(region->badblocks);
+ region->badblocks = NULL;
+ region->bb.offset = 0;
+ region->bb.len = 0;
+ return NULL;
+ }
+
+ return ®ion->bb;
+}
+
+NDCTL_EXPORT struct badblock *ndctl_region_get_first_badblock(struct ndctl_region *region)
+{
+ int rc;
+
+ rc = regions_badblocks_init(region);
+ if (rc < 0)
+ return NULL;
+
+ return ndctl_region_get_next_badblock(region);
+}
+
static struct nd_cmd_vendor_tail *to_vendor_tail(struct ndctl_cmd *cmd)
{
struct nd_cmd_vendor_tail *tail = (struct nd_cmd_vendor_tail *)
diff --git a/ndctl/lib/libndctl.sym b/ndctl/lib/libndctl.sym
index b5a085c..a1b5baf 100644
--- a/ndctl/lib/libndctl.sym
+++ b/ndctl/lib/libndctl.sym
@@ -116,6 +116,8 @@ global:
ndctl_dimm_get_available_labels;
ndctl_region_get_first;
ndctl_region_get_next;
+ ndctl_region_get_first_badblock;
+ ndctl_region_get_next_badblock;
ndctl_region_get_id;
ndctl_region_get_devname;
ndctl_region_get_interleave_ways;
diff --git a/ndctl/libndctl.h.in b/ndctl/libndctl.h.in
index 6ee8a35..2c45d2d 100644
--- a/ndctl/libndctl.h.in
+++ b/ndctl/libndctl.h.in
@@ -372,6 +372,10 @@ int ndctl_cmd_get_status(struct ndctl_cmd *cmd);
unsigned int ndctl_cmd_get_firmware_status(struct ndctl_cmd *cmd);
int ndctl_cmd_submit(struct ndctl_cmd *cmd);
+struct badblock {
+ unsigned long long offset;
+ unsigned int len;
+};
struct ndctl_region;
struct ndctl_region *ndctl_region_get_first(struct ndctl_bus *bus);
struct ndctl_region *ndctl_region_get_next(struct ndctl_region *region);
@@ -379,6 +383,12 @@ struct ndctl_region *ndctl_region_get_next(struct ndctl_region *region);
for (region = ndctl_region_get_first(bus); \
region != NULL; \
region = ndctl_region_get_next(region))
+struct badblock *ndctl_region_get_first_badblock(struct ndctl_region *region);
+struct badblock *ndctl_region_get_next_badblock(struct ndctl_region *region);
+#define ndctl_region_badblock_foreach(region, badblock) \
+ for (badblock = ndctl_region_get_first_badblock(region); \
+ badblock != NULL; \
+ badblock = ndctl_region_get_next_badblock(region))
unsigned int ndctl_region_get_id(struct ndctl_region *region);
const char *ndctl_region_get_devname(struct ndctl_region *region);
unsigned int ndctl_region_get_interleave_ways(struct ndctl_region *region);
diff --git a/ndctl/ndctl.c b/ndctl/ndctl.c
index 4b08c9b..8aff623 100644
--- a/ndctl/ndctl.c
+++ b/ndctl/ndctl.c
@@ -29,7 +29,8 @@ static int cmd_help(int argc, const char **argv, void *ctx)
{
const char * const builtin_help_subcommands[] = {
"enable-region", "disable-region", "zero-labels",
- "enable-namespace", "disable-namespace", NULL };
+ "enable-namespace", "disable-namespace",
+ "clear-error", NULL };
struct option builtin_help_options[] = {
OPT_END(),
};
@@ -67,6 +68,7 @@ static struct cmd_struct commands[] = {
{ "write-labels", cmd_write_labels },
{ "init-labels", cmd_init_labels },
{ "check-labels", cmd_check_labels },
+ { "clear-error", cmd_clear_error },
{ "list", cmd_list },
{ "help", cmd_help },
#ifdef ENABLE_TEST
diff --git a/test/Makefile.am b/test/Makefile.am
index 9353a34..3cd159e 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -10,6 +10,7 @@ TESTS =\
clear.sh \
dax-errors.sh \
daxdev-errors.sh \
+ ndctl-clear-error-dax.sh \
btt-check.sh \
label-compat.sh \
blk-exhaust.sh
diff --git a/test/ndctl-clear-error-dax.sh b/test/ndctl-clear-error-dax.sh
new file mode 100755
index 0000000..646b601
--- /dev/null
+++ b/test/ndctl-clear-error-dax.sh
@@ -0,0 +1,68 @@
+#!/bin/bash -x
+DEV=""
+NDCTL="../ndctl/ndctl"
+BUS="-b nfit_test.0"
+BUS1="-b nfit_test.1"
+json2var="s/[{}\",]//g; s/:/=/g"
+rc=77
+
+check_min_kver()
+{
+ local ver="$1"
+ : "${KVER:=$(uname -r)}"
+
+ [ -n "$ver" ] || return 1
+ [[ "$ver" == "$(echo -e "$ver\n$KVER" | sort -V | head -1)" ]]
+}
+
+check_min_kver "4.12" || { echo "kernel $KVER lacks dax dev error handling"; exit $rc; }
+
+set -e
+
+err() {
+ echo "test/clear: failed at line $1"
+ exit $rc
+}
+
+set -e
+trap 'err $LINENO' ERR
+
+# setup (reset nfit_test dimms)
+modprobe nfit_test
+$NDCTL disable-region $BUS all
+$NDCTL zero-labels $BUS all
+$NDCTL enable-region $BUS all
+
+rc=1
+
+query=". | sort_by(.available_size) | reverse | .[0].dev"
+region=$($NDCTL list $BUS -t pmem -Ri | jq -r "$query")
+
+# create dax
+chardev="x"
+json=$($NDCTL create-namespace $BUS -r $region -t pmem -m dax -a 4096)
+chardev=$(echo $json | jq -r ". | select(.mode == \"dax\") | .daxregion.devices[0].chardev")
+[ $chardev = "x" ] && echo "fail: $LINENO" && exit 1
+
+json1=$($NDCTL list $BUS)
+eval $(echo $json1 | sed -e "$json2var")
+
+read sector len < /sys/bus/platform/devices/nfit_test.0/$dev/$region/badblocks
+echo "sector: $sector len: $len"
+
+# clearing using ndctl
+$NDCTL clear-error -f /dev/$chardev -s $sector -l $len
+
+# check badblocks, should be empty
+if read sector len < /sys/bus/platform/devices/nfit_test.0/$dev/$region/badblocks; then
+ [ -n "$sector" ] && echo "fail: $LINENO" && exit 1
+else
+ echo "badblocks empty, expected"
+fi
+
+
+$NDCTL disable-region $BUS all
+$NDCTL disable-region $BUS1 all
+modprobe -r nfit_test
+
+exit 0
3 years, 8 months
[PATCH] libnvdimm: restore "libnvdimm: band aid btt vs clear poison locking"
by Dan Williams
This continues the 4.11 status quo of disabling of error clearing from
the BTT I/O path. Toshi found that even though we have eliminated all
the libnvdimm sources of sleeping-while-atomic triggers, we still have
sleeping operations that will occur in the path to send the ACPI DSM to
the DIMM to clear the error:
BUG: sleeping function called from invalid context at mm/slab.h:432
in_atomic(): 1, irqs_disabled(): 0, pid: 13353, name: dd
Call Trace:
dump_stack+0x86/0xc3
___might_sleep+0x17d/0x250
__might_sleep+0x4a/0x80
__kmalloc+0x1c0/0x2e0
acpi_os_allocate_zeroed+0x2d/0x2f
acpi_evaluate_object+0x59/0x3b1
acpi_evaluate_dsm+0xbd/0x10c
acpi_nfit_ctl+0x1ef/0x7c0 [nfit]
? nsio_rw_bytes+0x152/0x280
nvdimm_clear_poison+0x77/0x140
nsio_rw_bytes+0x18f/0x280
btt_write_pg+0x1d4/0x3d0 [nd_btt]
btt_make_request+0x119/0x2d0 [nd_btt]
A solution for tracking and handling media errors natively in the BTT is
needed.
Cc: Jeff Moyer <jmoyer(a)redhat.com>
Cc: Dave Jiang <dave.jiang(a)intel.com>
Cc: Vishal Verma <vishal.l.verma(a)intel.com>
Reported-by: Toshi Kani <toshi.kani(a)hpe.com>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
drivers/nvdimm/claim.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 35b210dc1e56..6945e35058bf 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -250,7 +250,16 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
}
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {
- if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)) {
+ /*
+ * FIXME: nsio_rw_bytes() may be called from atomic
+ * context in the btt case and the ACPI DSM path for
+ * clearing the error takes sleeping locks and allocates
+ * memory. An explicit error clearing path, and support
+ * for tracking badblocks in BTT metadata is needed to
+ * work around this collision.
+ */
+ if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)
+ && (!ndns->claim || !is_nd_btt(ndns->claim))) {
long cleared;
cleared = nvdimm_clear_poison(&ndns->dev,
3 years, 8 months
[PATCH 1/2] libnvdimm: fix clear length of nvdimm_forget_poison()
by Toshi Kani
ND_CMD_CLEAR_ERROR command returns 'clear_err.cleared', the length
of error actually cleared, which may be smaller than its requested
'len'.
Change nvdimm_clear_poison() to call nvdimm_forget_poison() with
'clear_err.cleared' when this value is valid.
Signed-off-by: Toshi Kani <toshi.kani(a)hpe.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Dave Jiang <dave.jiang(a)intel.com>
Cc: Vishal Verma <vishal.l.verma(a)intel.com>
---
Based on 'libnvdimm-for-next'.
---
drivers/nvdimm/bus.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index d214ac44..43ddfd4 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -219,7 +219,9 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
if (cmd_rc < 0)
return cmd_rc;
- nvdimm_forget_poison(nvdimm_bus, phys, len);
+ if (clear_err.cleared > 0)
+ nvdimm_forget_poison(nvdimm_bus, phys, clear_err.cleared);
+
return clear_err.cleared;
}
EXPORT_SYMBOL_GPL(nvdimm_clear_poison);
3 years, 8 months
Delivery Problem, Express FedEx Package #165533287
by FedEx Express Support
Hello,
We'd like to notify you that today was the first attempt to deliver your
parcel #708065843.
Important details can be found in the confirmation document attached below.
Thank you for your consideration.
Contessa Guimaraes - FedEx International Agent
3 years, 8 months
KASLR causes intermittent boot failures on some systems
by Jeff Moyer
Hi,
commit 021182e52fe01 ("x86/mm: Enable KASLR for physical mapping memory
regions") causes some of my systems with persistent memory (whether real
or emulated) to fail to boot with a couple of different crash
signatures. The first signature is a NMI watchdog lockup of all but 1
cpu, which causes much difficulty in extracting useful information from
the console. The second variant is an invalid paging request, listed
below.
On some systems, I haven't hit this problem at all. Other systems
experience a failed boot maybe 20-30% of the time. To reproduce it,
configure some emulated pmem on your system. You can find directions
for that here: https://nvdimm.wiki.kernel.org/
Install ndctl (https://github.com/pmem/ndctl).
Configure the namespace:
# ndctl create-namespace -f -e namespace0.0 -m memory
Then just reboot several times (5 should be enough), and hopefully
you'll hit the issue.
I've attached both my .config and the dmesg output from a successful
boot at the end of this mail.
Cheers,
Jeff
[ 9.874109] pmem0: detected capacity change from 0 to 206158430208
[ 9.881652] BUG: unable to handle kernel paging request at ffff9406bfff0000
[ 9.889431] IP: memcpy_erms+0x6/0x10
[ 9.893422] PGD 0
[ 9.893423]
[ 9.897316] Oops: 0000 [#1] SMP
[ 9.900820] Modules linked in: isci mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt igb fb_sys_fops ahci libsas ttm ptp libahci crc32c_intel scsi_transport_sas nd_pmem pps_core nd_btt drm dca libata i2c_algo_bit i2c_core dm_mirror dm_region_hash dm_log dm_mod
[ 9.927322] CPU: 11 PID: 441 Comm: systemd-udevd Not tainted 4.11.0-rc5+ #1
[ 9.935092] Hardware name: Intel Corporation LH Pass/SVRBD-ROW_P, BIOS SE5C600.86B.02.01.SP06.050920141054 05/09/2014
[ 9.946934] task: ffff92dedae12b80 task.stack: ffffbaeb0783c000
[ 9.953539] RIP: 0010:memcpy_erms+0x6/0x10
[ 9.958108] RSP: 0018:ffffbaeb0783f9b8 EFLAGS: 00010286
[ 9.963939] RAX: ffff92e6dafef000 RBX: 0000000000000000 RCX: 0000000000001000
[ 9.971904] RDX: 0000000000001000 RSI: ffff9406bfff0000 RDI: ffff92e6dafef000
[ 9.979869] RBP: ffffbaeb0783fa38 R08: 0000000000000000 R09: 0000000017ffff80
[ 9.987831] R10: 0000000000000000 R11: ffff9406bfff0000 R12: ffff92d83bfaea98
[ 9.995794] R13: 0000002fffff0000 R14: 0000000000001000 R15: ffff92e6dafef000
[ 10.003759] FS: 00007fd4c2e618c0(0000) GS:ffff92e6de4c0000(0000) knlGS:0000000000000000
[ 10.012779] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 10.019192] CR2: ffff9406bfff0000 CR3: 000000081a05c000 CR4: 00000000001406e0
[ 10.027158] Call Trace:
[ 10.029891] ? pmem_do_bvec+0x93/0x290 [nd_pmem]
[ 10.035046] ? radix_tree_node_alloc.constprop.20+0x85/0xc0
[ 10.041263] ? radix_tree_node_alloc.constprop.20+0x85/0xc0
[ 10.047481] pmem_rw_page+0x3a/0x60 [nd_pmem]
[ 10.052343] bdev_read_page+0x81/0xb0
[ 10.056431] do_mpage_readpage+0x56f/0x770
[ 10.060991] ? I_BDEV+0x20/0x20
[ 10.064500] ? lru_cache_add+0xe/0x10
[ 10.068584] mpage_readpages+0x148/0x1e0
[ 10.072958] ? I_BDEV+0x20/0x20
[ 10.076462] ? I_BDEV+0x20/0x20
[ 10.079969] ? alloc_pages_current+0x88/0x120
[ 10.084830] blkdev_readpages+0x1d/0x20
[ 10.089111] __do_page_cache_readahead+0x1ce/0x2c0
[ 10.094456] force_page_cache_readahead+0xa2/0x100
[ 10.099800] page_cache_sync_readahead+0x3f/0x50
[ 10.104956] generic_file_read_iter+0x60d/0x8c0
[ 10.110014] ? cp_new_stat+0x14f/0x180
[ 10.114187] blkdev_read_iter+0x37/0x40
[ 10.118469] __vfs_read+0xe0/0x150
[ 10.122253] vfs_read+0x8c/0x130
[ 10.125856] SyS_read+0x55/0xc0
[ 10.129354] entry_SYSCALL_64_fastpath+0x1a/0xa9
[ 10.134508] RIP: 0033:0x7fd4c1d9d480
[ 10.138487] RSP: 002b:00007fffa1f96e08 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
[ 10.146934] RAX: ffffffffffffffda RBX: 00007fffa1f968f0 RCX: 00007fd4c1d9d480
[ 10.154896] RDX: 0000000000000040 RSI: 0000559de3d6d978 RDI: 0000000000000008
[ 10.162859] RBP: 0000000000010300 R08: 0000000000000020 R09: 0000000000000068
[ 10.170820] R10: 00007fffa1f96b90 R11: 0000000000000246 R12: 0000000000000000
[ 10.178783] R13: 00007fffa1f97980 R14: 0000000000000000 R15: 0000000000000000
[ 10.186748] Code: ff 90 90 90 90 eb 1e 0f 1f 00 48 89 f8 48 89 d1 48 c1 e9 03 83 e2 07 f3 48 a5 89 d1 f3 a4 c3 66 0f 1f 44 00 00 48 89 f8 48 89 d1 <f3> a4 c3 0f 1f 80 00 00 00 00 48 89 f8 48 83 fa 20 72 7e 40 38
[ 10.207813] RIP: memcpy_erms+0x6/0x10 RSP: ffffbaeb0783f9b8
[ 10.214022] CR2: ffff9406bfff0000
[ 10.217774] ---[ end trace 2ea6d4ce29040562 ]---
[ 10.265522] Kernel panic - not syncing: Fatal exception
[ 10.271381] Kernel Offset: 0x2a000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[ 10.309968] ---[ end Kernel panic - not syncing: Fatal exception
[ 10.316682] ------------[ cut here ]------------
3 years, 8 months
Your FedEx Parcel #111786243, Delivery Unsuccessful
by FedEx Delivery Support
Hello,
Our FedEx Delivery Representative was at your place today to deliver your
parcel #281300331.
Please, see the notice for the details.
We are glad to assist you.
Odetta Feser - FedEx Expedited Manager
3 years, 8 months