[ndctl PATCH] ndctl/bus: Add poll interval to wait-scrub
by Dan Williams
The kernel ARS state machine implements an exponential backoff timeout
to not spam the platform ARS interface, a potentially high overhead
interface. Recent kernel changes allow root to bypass / reset the
polling interval. Add an option to 'ndctl wait-scrub' to attempt to poll
at a user-specified frequency (when that user is root).
As part of the implementation of the 'wait-scrub' enhancement take the
opportunity to refactor the exported
ndctl_bus_wait_for_scrub_completion() helper function into the more
capable ndctl_bus_poll_scrub_completion().
Reported-by: Erwin Tsaur <erwin.tsaur(a)oracle.com>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
ndctl/bus.c | 25 +++++++++++---
ndctl/lib/libndctl.c | 88 ++++++++++++++++++++++++++++++++++++++----------
ndctl/lib/libndctl.sym | 6 +++
ndctl/libndctl.h | 2 +
4 files changed, 97 insertions(+), 24 deletions(-)
diff --git a/ndctl/bus.c b/ndctl/bus.c
index ce7f76add777..86bbd5178df9 100644
--- a/ndctl/bus.c
+++ b/ndctl/bus.c
@@ -16,10 +16,24 @@
static struct {
bool verbose;
+ unsigned int poll_interval;
} param;
-static const struct option bus_options[] = {
- OPT_BOOLEAN('v',"verbose", ¶m.verbose, "turn on debug"),
+
+#define BASE_OPTIONS() \
+ OPT_BOOLEAN('v',"verbose", ¶m.verbose, "turn on debug")
+
+#define WAIT_OPTIONS() \
+ OPT_UINTEGER('p', "poll", ¶m.poll_interval, "poll interval (seconds)")
+
+static const struct option start_options[] = {
+ BASE_OPTIONS(),
+ OPT_END(),
+};
+
+static const struct option wait_options[] = {
+ BASE_OPTIONS(),
+ WAIT_OPTIONS(),
OPT_END(),
};
@@ -27,7 +41,8 @@ static int scrub_action(struct ndctl_bus *bus, enum device_action action)
{
switch (action) {
case ACTION_WAIT:
- return ndctl_bus_wait_for_scrub_completion(bus);
+ return ndctl_bus_poll_scrub_completion(bus,
+ param.poll_interval, 0);
case ACTION_START:
return ndctl_bus_start_scrub(bus);
default:
@@ -100,7 +115,7 @@ static int bus_action(int argc, const char **argv, const char *usage,
int cmd_start_scrub(int argc, const char **argv, struct ndctl_ctx *ctx)
{
char *usage = "ndctl start-scrub [<bus-id> <bus-id2> ... <bus-idN>] [<options>]";
- int start = bus_action(argc, argv, usage, bus_options,
+ int start = bus_action(argc, argv, usage, start_options,
ACTION_START, ctx);
if (start <= 0) {
@@ -115,7 +130,7 @@ int cmd_start_scrub(int argc, const char **argv, struct ndctl_ctx *ctx)
int cmd_wait_scrub(int argc, const char **argv, struct ndctl_ctx *ctx)
{
char *usage = "ndctl wait-scrub [<bus-id> <bus-id2> ... <bus-idN>] [<options>]";
- int wait = bus_action(argc, argv, usage, bus_options,
+ int wait = bus_action(argc, argv, usage, wait_options,
ACTION_WAIT, ctx);
if (wait <= 0) {
diff --git a/ndctl/lib/libndctl.c b/ndctl/lib/libndctl.c
index c9e2875d6011..fd36aa0662f4 100644
--- a/ndctl/lib/libndctl.c
+++ b/ndctl/lib/libndctl.c
@@ -1273,22 +1273,33 @@ NDCTL_EXPORT unsigned int ndctl_bus_get_scrub_count(struct ndctl_bus *bus)
}
/**
- * ndctl_bus_wait_for_scrub - wait for a scrub to complete
+ * ndctl_bus_poll_scrub_completion - wait for a scrub to complete
* @bus: bus for which to check whether a scrub is in progress
+ * @poll_interval: nr seconds between wake up and re-read the status
+ * @timeout: total number of seconds to wait
*
- * Upon return this bus has completed any in-progress scrubs. This is
- * different from ndctl_cmd_ars_in_progress in that the latter checks
- * the output of an ars_status command to see if the in-progress flag
- * is set, i.e. provides the firmware's view of whether a scrub is in
- * progress. ndctl_bus_wait_for_scrub instead checks the kernel's view
- * of whether a scrub is in progress by looking at the 'scrub' file in
- * sysfs.
+ * Upon return this bus has completed any in-progress scrubs if @timeout
+ * is 0 otherwise -ETIMEDOUT when @timeout seconds have expired. This
+ * is different from ndctl_cmd_ars_in_progress in that the latter checks
+ * the output of an ars_status command to see if the in-progress flag is
+ * set, i.e. provides the firmware's view of whether a scrub is in
+ * progress. ndctl_bus_wait_for_scrub_completion() instead checks the
+ * kernel's view of whether a scrub is in progress by looking at the
+ * 'scrub' file in sysfs.
+ *
+ * The @poll_interval option changes the frequency at which the kernel
+ * status is polled, but it requires a supporting kernel for that poll
+ * interval to be reflected to the kernel's polling of the ARS
+ * interface. Kernel's with poll interval support limit that polling to
+ * root (CAP_SYS_RAWIO) processes.
*/
-NDCTL_EXPORT int ndctl_bus_wait_for_scrub_completion(struct ndctl_bus *bus)
+NDCTL_EXPORT int ndctl_bus_poll_scrub_completion(struct ndctl_bus *bus,
+ unsigned int poll_interval, unsigned int timeout)
{
struct ndctl_ctx *ctx = ndctl_bus_get_ctx(bus);
+ const char *provider = ndctl_bus_get_provider(bus);
+ char buf[SYSFS_ATTR_SIZE] = { 0 };
unsigned int scrub_count;
- char buf[SYSFS_ATTR_SIZE];
struct pollfd fds;
char in_progress;
int fd = 0, rc;
@@ -1314,32 +1325,71 @@ NDCTL_EXPORT int ndctl_bus_wait_for_scrub_completion(struct ndctl_bus *bus)
rc = 0;
break;
} else if (rc == 2 && in_progress == '+') {
+ long tmo;
+
+ if (!timeout)
+ tmo = poll_interval;
+ else if (!poll_interval)
+ tmo = timeout;
+ else
+ tmo = min(poll_interval, timeout);
+
+ tmo *= 1000;
+ if (tmo == 0)
+ tmo = -1;
+
/* scrub in progress, wait */
- rc = poll(&fds, 1, -1);
- if (rc < 0) {
+ rc = poll(&fds, 1, tmo);
+ dbg(ctx, "%s: poll wake: rc: %d status: \'%s\'\n",
+ provider, rc, buf);
+ if (rc > 0)
+ fds.revents = 0;
+ if (pread(fd, buf, 1, 0) == -1) {
rc = -errno;
- dbg(ctx, "poll error: %s\n", strerror(errno));
break;
}
- dbg(ctx, "poll wake: revents: %d\n", fds.revents);
- if (pread(fd, buf, 1, 0) == -1) {
+
+ if (rc < 0) {
rc = -errno;
+ dbg(ctx, "%s: poll error: %s\n", provider,
+ strerror(errno));
break;
+ } else if (rc == 0) {
+ dbg(ctx, "%s: poll timeout: interval: %d timeout: %d\n",
+ provider, poll_interval, timeout);
+ if (!timeout)
+ continue;
+
+ if (!poll_interval || poll_interval > timeout) {
+ rc = -ETIMEDOUT;
+ break;
+ }
+
+ if (timeout > poll_interval)
+ timeout -= poll_interval;
+ else if (timeout == poll_interval) {
+ timeout = 1;
+ poll_interval = 0;
+ }
}
- fds.revents = 0;
}
}
if (rc == 0)
- dbg(ctx, "bus%d: scrub complete\n", ndctl_bus_get_id(bus));
+ dbg(ctx, "%s: scrub complete, status: \'%s\'\n", provider, buf);
else
- dbg(ctx, "bus%d: error waiting for scrub completion: %s\n",
- ndctl_bus_get_id(bus), strerror(-rc));
+ dbg(ctx, "%s: error waiting for scrub completion: %s\n",
+ provider, strerror(-rc));
if (fd)
close (fd);
return rc;
}
+NDCTL_EXPORT int ndctl_bus_wait_for_scrub_completion(struct ndctl_bus *bus)
+{
+ return ndctl_bus_poll_scrub_completion(bus, 0, 0);
+}
+
static int ndctl_bind(struct ndctl_ctx *ctx, struct kmod_module *module,
const char *devname);
static int ndctl_unbind(struct ndctl_ctx *ctx, const char *devpath);
diff --git a/ndctl/lib/libndctl.sym b/ndctl/lib/libndctl.sym
index cb9f769fbbca..297f03d7ae39 100644
--- a/ndctl/lib/libndctl.sym
+++ b/ndctl/lib/libndctl.sym
@@ -404,3 +404,9 @@ global:
ndctl_dimm_update_master_passphrase;
ndctl_dimm_master_secure_erase;
} LIBNDCTL_18;
+
+
+LIBNDCTL_20 {
+global:
+ ndctl_bus_poll_scrub_completion;
+} LIBNDCTL_19;
diff --git a/ndctl/libndctl.h b/ndctl/libndctl.h
index 0debdb61b0ac..e378802ee4c1 100644
--- a/ndctl/libndctl.h
+++ b/ndctl/libndctl.h
@@ -133,6 +133,8 @@ enum ndctl_persistence_domain ndctl_bus_get_persistence_domain(
struct ndctl_bus *bus);
int ndctl_bus_wait_probe(struct ndctl_bus *bus);
int ndctl_bus_wait_for_scrub_completion(struct ndctl_bus *bus);
+int ndctl_bus_poll_scrub_completion(struct ndctl_bus *bus,
+ unsigned int poll_interval, unsigned int timeout);
unsigned int ndctl_bus_get_scrub_count(struct ndctl_bus *bus);
int ndctl_bus_get_scrub_state(struct ndctl_bus *bus);
int ndctl_bus_start_scrub(struct ndctl_bus *bus);
3 years, 3 months
[ndctl PATCH 1/2] autoconf: Check for MAP_SHARED_VALIDATE
by Robert Elliott
Some ndctl tests use two new mmap() flags named MAP_SHARED_VALIDATE
and MAP_SYNC that were added by linux kernel-4.15, intended to be
defined for applications by sys/mman.h. However, these do not exist
unless the distro has glibc-2.28.
In addition to the existing check for MAP_SYNC, check that
MAP_SHARED_VALIDATE is provided.
Fixes: 94679e6b78aa ("ndctl, test: check availability of MAP_SYNC for poison test")
Signed-off-by: Robert Elliott <elliott(a)hpe.com>
---
configure.ac | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/configure.ac b/configure.ac
index d27a2b1..efbdcde 100644
--- a/configure.ac
+++ b/configure.ac
@@ -103,12 +103,13 @@ AS_IF([test "x$enable_test" = "xyes"],
AM_CONDITIONAL([ENABLE_TEST], [test "x$enable_test" = "xyes"])
AC_CHECK_DECLS([BUS_MCEERR_AR], [enable_bus_mc_err=yes], [], [[#include <signal.h>]])
+AC_CHECK_DECLS([MAP_SHARED_VALIDATE], [enable_map_shared_validate=yes], [], [[#include <sys/mman.h>]])
AC_CHECK_DECLS([MAP_SYNC], [enable_map_sync=yes], [], [[#include <sys/mman.h>]])
-AS_IF([test "x$enable_bus_mc_err" = "xyes" -a "x$enable_map_sync" = "xyes"],
+AS_IF([test "x$enable_bus_mc_err" = "xyes" -a "x$enable_map_sync" = "xyes" -a "x$enable_map_shared_validate" = "xyes"],
[AC_DEFINE([ENABLE_POISON], [1], [ndctl test poison support])])
AM_CONDITIONAL([ENABLE_POISON],
- [test "x$enable_bus_mc_err" = "xyes" -a "x$enable_map_sync" = "xyes"])
+ [test "x$enable_bus_mc_err" = "xyes" -a "x$enable_map_sync" = "xyes" -a "x$enable_map_shared_validate" = "xyes"])
PKG_CHECK_MODULES([KMOD], [libkmod])
PKG_CHECK_MODULES([UDEV], [libudev])
--
2.20.1
3 years, 3 months
[PATCH 0/6] nfit/ars: Improve polling and short-ARS execution
by Dan Williams
Here is a small pile of updates to better coordinate the Linux ARS state
machine with platform-BIOS implementations. Specifically, take advantage
of opportunities to run short-ARS whenever the ARS interface is found to
be idle at init, always run short-ARS even if no_init_ars is specified,
allow root to reset the exponential backoff polling interval for ARS
completion, and protect the kernel against the consumption of stale ARS
results.
---
Dan Williams (6):
nfit/ars: Attempt a short-ARS whenever the ARS state is idle at boot
nfit/ars: Attempt short-ARS even in the no_init_ars case
nfit/ars: Allow root to busy-poll the ARS state machine
nfit/ars: Remove ars_start_flags
nfit/ars: Introduce scrub_flags
nfit/ars: Avoid stale ARS results
drivers/acpi/nfit/core.c | 67 ++++++++++++++++++++++++++++++++--------------
drivers/acpi/nfit/nfit.h | 10 +++++--
2 files changed, 53 insertions(+), 24 deletions(-)
3 years, 3 months
[PATCH 0/5] [v4] Allow persistent memory to be used like normal RAM
by Dave Hansen
v3 spurred a bunch of really good discussion. Thanks to everybody
that made comments and suggestions!
I would still love some Acks on this from the folks on cc, even if it
is on just the patch touching your area.
Note: these are based on commit d2f33c19644 in:
git://git.kernel.org/pub/scm/linux/kernel/git/djbw/nvdimm.git libnvdimm-pending
Changes since v3:
* Move HMM-related resource warning instead of removing it
* Use __request_resource() directly instead of devm.
* Create a separate DAX_PMEM Kconfig option, complete with help text
* Update patch descriptions and cover letter to give a better
overview of use-cases and hardware where this might be useful.
Changes since v2:
* Updates to dev_dax_kmem_probe() in patch 5:
* Reject probes for devices with bad NUMA nodes. Keeps slow
memory from being added to node 0.
* Use raw request_mem_region()
* Add comments about permanent reservation
* use dev_*() instead of printk's
* Add references to nvdimm documentation in descriptions
* Remove unneeded GPL export
* Add Kconfig prompt and help text
Changes since v1:
* Now based on git://git.kernel.org/pub/scm/linux/kernel/git/djbw/nvdimm.git
* Use binding/unbinding from "dax bus" code
* Move over to a "dax bus" driver from being an nvdimm driver
--
Persistent memory is cool. But, currently, you have to rewrite
your applications to use it. Wouldn't it be cool if you could
just have it show up in your system like normal RAM and get to
it like a slow blob of memory? Well... have I got the patch
series for you!
== Background / Use Cases ==
Persistent Memory (aka Non-Volatile DIMMs / NVDIMMS) themselves
are described in detail in Documentation/nvdimm/nvdimm.txt.
However, this documentation focuses on actually using them as
storage. This set is focused on using NVDIMMs as DRAM replacement.
This is intended for Intel-style NVDIMMs (aka. Intel Optane DC
persistent memory) NVDIMMs. These DIMMs are physically persistent,
more akin to flash than traditional RAM. They are also expected to
be more cost-effective than using RAM, which is why folks want this
set in the first place.
This set is not intended for RAM-based NVDIMMs. Those are not
cost-effective vs. plain RAM, and this using them here would simply
be a waste.
But, why would you bother with this approach? Intel itself [1]
has announced a hardware feature that does something very similar:
"Memory Mode" which turns DRAM into a cache in front of persistent
memory, which is then as a whole used as normal "RAM"?
Here are a few reasons:
1. The capacity of memory mode is the size of your persistent
memory that you dedicate. DRAM capacity is "lost" because it
is used for cache. With this, you get PMEM+DRAM capacity for
memory.
2. DRAM acts as a cache with memory mode, and caches can lead to
unpredictable latencies. Since memory mode is all-or-nothing
(either all your DRAM is used as cache or none is), your entire
memory space is exposed to these unpredictable latencies. This
solution lets you guarantee DRAM latencies if you need them.
3. The new "tier" of memory is exposed to software. That means
that you can build tiered applications or infrastructure. A
cloud provider could sell cheaper VMs that use more PMEM and
more expensive ones that use DRAM. That's impossible with
memory mode.
Don't take this as criticism of memory mode. Memory mode is
awesome, and doesn't strictly require *any* software changes (we
have software changes proposed for optimizing it though). It has
tons of other advantages over *this* approach. Basically, we
believe that the approach in these patches is complementary to
memory mode and that both can live side-by-side in harmony.
== Patch Set Overview ==
This series adds a new "driver" to which pmem devices can be
attached. Once attached, the memory "owned" by the device is
hot-added to the kernel and managed like any other memory. On
systems with an HMAT (a new ACPI table), each socket (roughly)
will have a separate NUMA node for its persistent memory so
this newly-added memory can be selected by its unique NUMA
node.
== Testing Overview ==
Here's how I set up a system to test this thing:
1. Boot qemu with lots of memory: "-m 4096", for instance
2. Reserve 512MB of physical memory. Reserving a spot a 2GB
physical seems to work: memmap=512M!0x0000000080000000
This will end up looking like a pmem device at boot.
3. When booted, convert fsdax device to "device dax":
ndctl create-namespace -fe namespace0.0 -m dax
4. See patch 4 for instructions on binding the kmem driver
to a device.
5. Now, online the new memory sections. Perhaps:
grep ^MemTotal /proc/meminfo
for f in `grep -vl online /sys/devices/system/memory/*/state`; do
echo $f: `cat $f`
echo online_movable > $f
grep ^MemTotal /proc/meminfo
done
1. https://itpeernetwork.intel.com/intel-optane-dc-persistent-memory-operati...
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Dave Jiang <dave.jiang(a)intel.com>
Cc: Ross Zwisler <zwisler(a)kernel.org>
Cc: Vishal Verma <vishal.l.verma(a)intel.com>
Cc: Tom Lendacky <thomas.lendacky(a)amd.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: linux-nvdimm(a)lists.01.org
Cc: linux-kernel(a)vger.kernel.org
Cc: linux-mm(a)kvack.org
Cc: Huang Ying <ying.huang(a)intel.com>
Cc: Fengguang Wu <fengguang.wu(a)intel.com>
Cc: Borislav Petkov <bp(a)suse.de>
Cc: Bjorn Helgaas <bhelgaas(a)google.com>
Cc: Yaowei Bai <baiyaowei(a)cmss.chinamobile.com>
Cc: Takashi Iwai <tiwai(a)suse.de>
Cc: Jerome Glisse <jglisse(a)redhat.com>
3 years, 3 months
[PATCH v2] nfit: add Hyper-V NVDIMM DSM command set to white list
by Dexuan Cui
Add the Hyper-V _DSM command set to the white list of NVDIMM command
sets.
This command set is documented at http://www.uefi.org/RFIC_LIST
(see "Virtual NVDIMM 0x1901").
Thanks Dan Williams <dan.j.williams(a)intel.com> for writing the
comment change.
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
Reviewed-by: Michael Kelley <mikelley(a)microsoft.com>
---
Changes in v2:
Updated the comment and changelog (Thanks, Dan!)
Rebased to the tag libnvdimm-fixes-5.0-rc4 of the nvdimm tree.
drivers/acpi/nfit/core.c | 17 ++++++++++++++---
drivers/acpi/nfit/nfit.h | 6 +++++-
include/uapi/linux/ndctl.h | 1 +
3 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index e18ade5d74e9..a9270c99be72 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1861,9 +1861,17 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
dev_set_drvdata(&adev_dimm->dev, nfit_mem);
/*
- * Until standardization materializes we need to consider 4
- * different command sets. Note, that checking for function0 (bit0)
- * tells us if any commands are reachable through this GUID.
+ * There are 4 "legacy" NVDIMM command sets
+ * (NVDIMM_FAMILY_{INTEL,MSFT,HPE1,HPE2}) that were created before
+ * an EFI working group was established to constrain this
+ * proliferation. The nfit driver probes for the supported command
+ * set by GUID. Note, if you're a platform developer looking to add
+ * a new command set to this probe, consider using an existing set,
+ * or otherwise seek approval to publish the command set at
+ * http://www.uefi.org/RFIC_LIST.
+ *
+ * Note, that checking for function0 (bit0) tells us if any commands
+ * are reachable through this GUID.
*/
for (i = 0; i <= NVDIMM_FAMILY_MAX; i++)
if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
@@ -1886,6 +1894,8 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
dsm_mask &= ~(1 << 8);
} else if (nfit_mem->family == NVDIMM_FAMILY_MSFT) {
dsm_mask = 0xffffffff;
+ } else if (nfit_mem->family == NVDIMM_FAMILY_HYPERV) {
+ dsm_mask = 0x1f;
} else {
dev_dbg(dev, "unknown dimm command family\n");
nfit_mem->family = -1;
@@ -3729,6 +3739,7 @@ static __init int nfit_init(void)
guid_parse(UUID_NFIT_DIMM_N_HPE1, &nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
guid_parse(UUID_NFIT_DIMM_N_HPE2, &nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
guid_parse(UUID_NFIT_DIMM_N_MSFT, &nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
+ guid_parse(UUID_NFIT_DIMM_N_HYPERV, &nfit_uuid[NFIT_DEV_DIMM_N_HYPERV]);
nfit_wq = create_singlethread_workqueue("nfit");
if (!nfit_wq)
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index 33691aecfcee..4de167b4f76f 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -34,11 +34,14 @@
/* https://msdn.microsoft.com/library/windows/hardware/mt604741 */
#define UUID_NFIT_DIMM_N_MSFT "1ee68b36-d4bd-4a1a-9a16-4f8e53d46e05"
+/* http://www.uefi.org/RFIC_LIST (see "Virtual NVDIMM 0x1901") */
+#define UUID_NFIT_DIMM_N_HYPERV "5746c5f2-a9a2-4264-ad0e-e4ddc9e09e80"
+
#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
| ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
| ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED)
-#define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_MSFT
+#define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_HYPERV
#define NVDIMM_STANDARD_CMDMASK \
(1 << ND_CMD_SMART | 1 << ND_CMD_SMART_THRESHOLD | 1 << ND_CMD_DIMM_FLAGS \
@@ -94,6 +97,7 @@ enum nfit_uuids {
NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1,
NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2,
NFIT_DEV_DIMM_N_MSFT = NVDIMM_FAMILY_MSFT,
+ NFIT_DEV_DIMM_N_HYPERV = NVDIMM_FAMILY_HYPERV,
NFIT_SPA_VOLATILE,
NFIT_SPA_PM,
NFIT_SPA_DCR,
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index f57c9e434d2d..de5d90212409 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -243,6 +243,7 @@ struct nd_cmd_pkg {
#define NVDIMM_FAMILY_HPE1 1
#define NVDIMM_FAMILY_HPE2 2
#define NVDIMM_FAMILY_MSFT 3
+#define NVDIMM_FAMILY_HYPERV 4
#define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\
struct nd_cmd_pkg)
--
2.19.1
3 years, 3 months
[PATCH v2] device-dax: Auto-bind device after successful new_id
by Dan Williams
The typical 'new_id' attribute behavior is to immediately attach a
device to its driver after a new device-id is added. Implement this
behavior for the dax bus.
Reported-by: Alexander Duyck <alexander.h.duyck(a)linux.intel.com>
Reported-by: Brice Goglin <Brice.Goglin(a)inria.fr>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
Changes since v1:
* Fix the remove_id path since do_id_store() is shared with the new_id
path (Brice)
Brice, this works for me. I'll push it out on libnvdimm-pending, or in
the meantime you can apply this patch after reverting commit
a9f1ffdb6a20 on the current state of the branch.
drivers/dax/bus.c | 24 ++++++++++++++++++------
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index c620ad52d7e5..a410154d75fb 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -57,8 +57,13 @@ static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev)
return match;
}
+enum id_action {
+ ID_REMOVE,
+ ID_ADD,
+};
+
static ssize_t do_id_store(struct device_driver *drv, const char *buf,
- size_t count, bool add)
+ size_t count, enum id_action action)
{
struct dax_device_driver *dax_drv = to_dax_drv(drv);
unsigned int region_id, id;
@@ -77,7 +82,7 @@ static ssize_t do_id_store(struct device_driver *drv, const char *buf,
mutex_lock(&dax_bus_lock);
dax_id = __dax_match_id(dax_drv, buf);
if (!dax_id) {
- if (add) {
+ if (action == ID_ADD) {
dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL);
if (dax_id) {
strncpy(dax_id->dev_name, buf, DAX_NAME_LEN);
@@ -86,26 +91,33 @@ static ssize_t do_id_store(struct device_driver *drv, const char *buf,
rc = -ENOMEM;
} else
/* nothing to remove */;
- } else if (!add) {
+ } else if (action == ID_REMOVE) {
list_del(&dax_id->list);
kfree(dax_id);
} else
/* dax_id already added */;
mutex_unlock(&dax_bus_lock);
- return rc;
+
+ if (rc < 0)
+ return rc;
+ if (action == ID_ADD)
+ rc = driver_attach(drv);
+ if (rc)
+ return rc;
+ return count;
}
static ssize_t new_id_store(struct device_driver *drv, const char *buf,
size_t count)
{
- return do_id_store(drv, buf, count, true);
+ return do_id_store(drv, buf, count, ID_ADD);
}
static DRIVER_ATTR_WO(new_id);
static ssize_t remove_id_store(struct device_driver *drv, const char *buf,
size_t count)
{
- return do_id_store(drv, buf, count, false);
+ return do_id_store(drv, buf, count, ID_REMOVE);
}
static DRIVER_ATTR_WO(remove_id);
3 years, 3 months
[PATCH AUTOSEL 4.19 44/83] acpi/nfit: Fix race accessing memdev in nfit_get_smbios_id()
by Sasha Levin
From: Tony Luck <tony.luck(a)intel.com>
[ Upstream commit 0919871ac37fdcf46c7657da0f1742efe096b399 ]
Possible race accessing memdev structures after dropping the
mutex. Dan Williams says this could race against another thread
that is doing:
# echo "ACPI0012:00" > /sys/bus/acpi/drivers/nfit/unbind
Reported-by: Jane Chu <jane.chu(a)oracle.com>
Fixes: 23222f8f8dce ("acpi, nfit: Add function to look up nvdimm...")
Signed-off-by: Tony Luck <tony.luck(a)intel.com>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/acpi/nfit/core.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index ea59c01ce8db..f530d3541242 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -719,6 +719,7 @@ int nfit_get_smbios_id(u32 device_handle, u16 *flags)
struct acpi_nfit_memory_map *memdev;
struct acpi_nfit_desc *acpi_desc;
struct nfit_mem *nfit_mem;
+ u16 physical_id;
mutex_lock(&acpi_desc_lock);
list_for_each_entry(acpi_desc, &acpi_descs, list) {
@@ -726,10 +727,11 @@ int nfit_get_smbios_id(u32 device_handle, u16 *flags)
list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
memdev = __to_nfit_memdev(nfit_mem);
if (memdev->device_handle == device_handle) {
+ *flags = memdev->flags;
+ physical_id = memdev->physical_id;
mutex_unlock(&acpi_desc->init_mutex);
mutex_unlock(&acpi_desc_lock);
- *flags = memdev->flags;
- return memdev->physical_id;
+ return physical_id;
}
}
mutex_unlock(&acpi_desc->init_mutex);
--
2.19.1
3 years, 3 months
[PATCH AUTOSEL 4.20 052/105] acpi/nfit: Fix race accessing memdev in nfit_get_smbios_id()
by Sasha Levin
From: Tony Luck <tony.luck(a)intel.com>
[ Upstream commit 0919871ac37fdcf46c7657da0f1742efe096b399 ]
Possible race accessing memdev structures after dropping the
mutex. Dan Williams says this could race against another thread
that is doing:
# echo "ACPI0012:00" > /sys/bus/acpi/drivers/nfit/unbind
Reported-by: Jane Chu <jane.chu(a)oracle.com>
Fixes: 23222f8f8dce ("acpi, nfit: Add function to look up nvdimm...")
Signed-off-by: Tony Luck <tony.luck(a)intel.com>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/acpi/nfit/core.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 8535e7999769..2a2d7ec77252 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -724,6 +724,7 @@ int nfit_get_smbios_id(u32 device_handle, u16 *flags)
struct acpi_nfit_memory_map *memdev;
struct acpi_nfit_desc *acpi_desc;
struct nfit_mem *nfit_mem;
+ u16 physical_id;
mutex_lock(&acpi_desc_lock);
list_for_each_entry(acpi_desc, &acpi_descs, list) {
@@ -731,10 +732,11 @@ int nfit_get_smbios_id(u32 device_handle, u16 *flags)
list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
memdev = __to_nfit_memdev(nfit_mem);
if (memdev->device_handle == device_handle) {
+ *flags = memdev->flags;
+ physical_id = memdev->physical_id;
mutex_unlock(&acpi_desc->init_mutex);
mutex_unlock(&acpi_desc_lock);
- *flags = memdev->flags;
- return memdev->physical_id;
+ return physical_id;
}
}
mutex_unlock(&acpi_desc->init_mutex);
--
2.19.1
3 years, 3 months