[PATCH] nfit: fix format interface code byte order per ACPI6.1
by Dan Williams
ACPI6.1 clarifies that DCR fields are stored as an array of bytes,
update the format interface code constants to match.
Cc: Toshi Kani <toshi.kani(a)hpe.com>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
drivers/acpi/nfit.h | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
index c75576b2d50e..92ea5d06c7fe 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit.h
@@ -41,11 +41,13 @@ enum nfit_uuids {
NFIT_UUID_MAX,
};
-enum nfit_fic {
- NFIT_FIC_BYTE = 0x101, /* byte-addressable energy backed */
- NFIT_FIC_BLK = 0x201, /* block-addressable non-energy backed */
- NFIT_FIC_BYTEN = 0x301, /* byte-addressable non-energy backed */
-};
+/*
+ * Region format interface codes are stored as an array of bytes in the
+ * NFIT DIMM Control Region structure
+ */
+#define NFIT_FIC_BYTE cpu_to_be16(0x101) /* byte-addressable energy */
+#define NFIT_FIC_BLK cpu_to_be16(0x201) /* block-addressable non-energy backed */
+#define NFIT_FIC_BYTEN cpu_to_be16(0x301) /* byte-addressable non-energy backed */
enum {
NFIT_BLK_READ_FLUSH = 1,
6 years, 1 month
[PATCH v2] test: Add a unit test for dax error handling
by Vishal Verma
When we have a namespace with media errors, DAX should fail when trying
to map the bad blocks for direct access, but a regular write() to the
same sector should go through the driver and clear the error.
This test checks for all of the above happening - failure for a read()
on a file with a bad block, failure on an mmap-read for the same, and
finally a successful write that clears the bad block.
It also tests that a hole punch to a badblock (if the hole-punch is
sector aligned and sized) clears the error.
Signed-off-by: Vishal Verma <vishal.l.verma(a)intel.com>
---
v2: Also test that punching a hole clears poison.
Makefile.am | 5 +-
test/dax-errors.c | 139 +++++++++++++++++++++++++++++++++++++++++++++++++++++
test/dax-errors.sh | 126 ++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 268 insertions(+), 2 deletions(-)
create mode 100644 test/dax-errors.c
create mode 100755 test/dax-errors.sh
diff --git a/Makefile.am b/Makefile.am
index 3f7dca3..27b06a6 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -145,8 +145,8 @@ EXTRA_DIST += lib/libndctl.pc.in
CLEANFILES += lib/libndctl.pc
TESTS = test/libndctl test/dpa-alloc test/parent-uuid test/create.sh \
- test/clear.sh
-check_PROGRAMS = test/libndctl test/dpa-alloc test/parent-uuid
+ test/clear.sh test/dax-errors.sh
+check_PROGRAMS = test/libndctl test/dpa-alloc test/parent-uuid test/dax-errors
if ENABLE_DESTRUCTIVE
TESTS += test/blk-ns test/pmem-ns test/pcommit
@@ -179,3 +179,4 @@ test_dax_dev_LDADD = lib/libndctl.la
test_dax_pmd_SOURCES = test/dax-pmd.c
test_mmap_SOURCES = test/mmap.c
+test_dax_err_SOURCES = test/dax-errors.c
diff --git a/test/dax-errors.c b/test/dax-errors.c
new file mode 100644
index 0000000..4e9bb04
--- /dev/null
+++ b/test/dax-errors.c
@@ -0,0 +1,139 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <linux/fs.h>
+#include <linux/fiemap.h>
+#include <setjmp.h>
+
+#define fail() fprintf(stderr, "%s: failed at: %d\n", __func__, __LINE__)
+
+static sigjmp_buf sj_env;
+static int sig_count;
+
+static void sigbus_hdl(int sig, siginfo_t *siginfo, void *ptr)
+{
+ fprintf(stderr, "** Received a SIGBUS **\n");
+ sig_count++;
+ siglongjmp(sj_env, 1);
+}
+
+static int test_dax_read_err(int fd)
+{
+ void *base, *buf;
+ int rc = 0;
+
+ if (fd < 0) {
+ fail();
+ return -ENXIO;
+ }
+
+ if (posix_memalign(&buf, 4096, 4096) != 0)
+ return -ENOMEM;
+
+ base = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ if (base == MAP_FAILED) {
+ perror("mmap");
+ rc = -ENXIO;
+ goto err_mmap;
+ }
+
+ if (sigsetjmp(sj_env, 1)) {
+ if (sig_count == 1) {
+ fprintf(stderr, "Failed to read from mapped file\n");
+ free(buf);
+ if (base) {
+ if (munmap(base, 4096) < 0) {
+ fail();
+ return 1;
+ }
+ }
+ return 1;
+ }
+ return sig_count;
+ }
+
+ /* read a page through DAX (should fail due to a bad block) */
+ memcpy(buf, base, 4096);
+
+ err_mmap:
+ free(buf);
+ return rc;
+}
+
+static int test_dax_write_clear(int fd)
+{
+ void *buf;
+ int rc = 0;
+
+ if (fd < 0) {
+ fail();
+ return -ENXIO;
+ }
+
+ if (posix_memalign(&buf, 4096, 4096) != 0)
+ return -ENOMEM;
+ memset(buf, 0, 4096);
+
+ /*
+ * Attempt to write zeroes to the first page of the file using write()
+ * This should clear the pmem errors/bad blocks
+ */
+ printf("Attempting to write\n");
+ if (write(fd, buf, 4096) < 0)
+ rc = errno;
+
+ free(buf);
+ return rc;
+}
+
+int main(int argc, char *argv[])
+{
+ int fd, rc;
+ struct sigaction act;
+
+ if (argc < 1)
+ return -EINVAL;
+
+ memset(&act, 0, sizeof(act));
+ act.sa_sigaction = sigbus_hdl;
+ act.sa_flags = SA_SIGINFO;
+
+ if (sigaction(SIGBUS, &act, 0)) {
+ fail();
+ return 1;
+ }
+
+ fd = open(argv[1], O_RDWR | O_DIRECT);
+
+ /* Start the test. First, we do an mmap-read, and expect it to fail */
+ rc = test_dax_read_err(fd);
+ if (rc == 0) {
+ fprintf(stderr, "Expected read to fail, but it succeeded\n");
+ rc = -ENXIO;
+ goto out;
+ }
+ if (rc > 1) {
+ fprintf(stderr, "Received a second SIGBUS, exiting.\n");
+ rc = -ENXIO;
+ goto out;
+ }
+ printf(" mmap-read failed as expected\n");
+
+ /* Next, do a regular (O_DIRECT) write() */
+ rc = test_dax_write_clear(fd);
+ if (rc)
+ perror("write");
+
+ out:
+ if (fd >= 0)
+ close(fd);
+ return rc;
+}
diff --git a/test/dax-errors.sh b/test/dax-errors.sh
new file mode 100755
index 0000000..37d847b
--- /dev/null
+++ b/test/dax-errors.sh
@@ -0,0 +1,126 @@
+#!/bin/bash -x
+
+DEV=""
+NDCTL="./ndctl"
+BUS="-b nfit_test.0"
+BUS1="-b nfit_test.1"
+MNT=test_dax_mnt
+FILE=image
+json2var="s/[{}\",]//g; s/:/=/g"
+rc=77
+
+err() {
+ rc=1
+ echo "test/dax-errors: failed at line $1"
+ rm -f $FILE
+ rm -f $MNT/$FILE
+ if [ -n "$blockdev" ]; then
+ umount /dev/$blockdev
+ else
+ rc=77
+ fi
+ rmdir $MNT
+ exit $rc
+}
+
+set -e
+mkdir -p $MNT
+trap 'err $LINENO' ERR
+
+# setup (reset nfit_test dimms)
+modprobe nfit_test
+$NDCTL disable-region $BUS all
+$NDCTL zero-labels $BUS all
+$NDCTL enable-region $BUS all
+
+rc=1
+
+# create pmem
+dev="x"
+json=$($NDCTL create-namespace $BUS -t pmem -m raw)
+eval $(echo $json | sed -e "$json2var")
+[ $dev = "x" ] && echo "fail: $LINENO" && exit 1
+[ $mode != "raw" ] && echo "fail: $LINENO" && exit 1
+
+# check for expected errors in the middle of the namespace
+read sector len < /sys/block/$blockdev/badblocks
+[ $((sector * 2)) -ne $((size /512)) ] && echo "fail: $LINENO" && exit 1
+if dd if=/dev/$blockdev of=/dev/null iflag=direct bs=512 skip=$sector count=$len; then
+ echo "fail: $LINENO" && exit 1
+fi
+
+# check that writing clears the errors
+if ! dd of=/dev/$blockdev if=/dev/zero oflag=direct bs=512 seek=$sector count=$len; then
+ echo "fail: $LINENO" && exit 1
+fi
+
+if read sector len < /sys/block/$blockdev/badblocks; then
+ # fail if reading badblocks returns data
+ echo "fail: $LINENO" && exit 1
+fi
+
+#mkfs.xfs /dev/$blockdev -b size=4096 -f
+mkfs.ext4 /dev/$blockdev -b 4096
+mount /dev/$blockdev $MNT -o dax
+
+# prepare an image file with random data
+dd if=/dev/urandom of=$FILE bs=4096 count=4
+test -s $FILE
+
+# copy it to the dax file system
+cp $FILE $MNT/$FILE
+
+# Get the start sector for the file
+start_sect=$(filefrag -v -b512 $MNT/$FILE | grep -E "^[ ]+[0-9]+.*" | head -1 | awk '{ print $4 }' | cut -d. -f1)
+test -n "$start_sect"
+echo "start sector of the file is $start_sect"
+
+# inject badblocks for one page at the start of the file
+echo $start_sect 8 > /sys/block/$blockdev/badblocks
+
+# make sure reading the first block of the file fails as expected
+: The following 'dd' is expected to hit an I/O Error
+dd if=$MNT/$FILE of=/dev/null iflag=direct bs=4096 count=1 && err $LINENO || true
+
+# run the dax-errors test
+test -x test/dax-errors
+test/dax-errors $MNT/$FILE
+
+if read sector len < /sys/block/$blockdev/badblocks; then
+ # fail if reading badblocks returns data
+ echo "fail: $LINENO" && exit 1
+fi
+
+# test that a hole punch to a dax file also clears errors
+dd if=/dev/urandom of=$MNT/$FILE oflag=direct bs=4096 count=4
+start_sect=$(filefrag -v -b512 $MNT/$FILE | grep -E "^[ ]+[0-9]+.*" | head -1 | awk '{ print $4 }' | cut -d. -f1)
+test -n "$start_sect"
+echo "holepunch test: start sector: $start_sect"
+
+# inject a badblock at the second sector of the first page
+echo $((start_sect + 1)) 1 > /sys/block/$blockdev/badblocks
+
+# verify badblock by reading
+: The following 'dd' is expected to hit an I/O Error
+dd if=$MNT/$FILE of=/dev/null iflag=direct bs=4096 count=1 && err $LINENO || true
+
+# hole punch the second sector, and verify it clears the
+# badblock (and doesn't fail)
+if ! fallocate -p -o 0 -l 1024 $MNT/$FILE; then
+ echo "fail: $LINENO" && exit 1
+fi
+[ -n "$(cat /sys/block/$blockdev/badblocks)" ] && echo "error: $LINENO" && exit 1
+
+# cleanup
+rm -f $FILE
+rm -f $MNT/$FILE
+if [ -n "$blockdev" ]; then
+ umount /dev/$blockdev
+fi
+rmdir $MNT
+
+$NDCTL disable-region $BUS all
+$NDCTL disable-region $BUS1 all
+modprobe -r nfit_test
+
+exit 0
--
2.5.5
6 years, 1 month
[PATCH v10 0/7] nvdimm: Add an IOCTL pass thru for DSM calls
by Jerry Hoemann
The NVDIMM code in the kernel supports an IOCTL interface to user
space based upon the Intel Example DSM:
http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf
This interface cannot be used by other NVDIMM DSMs that support
incompatible functions.
An alternative DSM specification for Type N DSM being developed
by Hewlett Packard Enterprise can be found at:
https://github.com/HewlettPackard/hpe-nvm/tree/master/Documentation
To accommodate multiple and conflicting DSM specifications, this patch
set adds a generic "pass-thru" IOCTL interface which is not tied to
a particular DSM.
A new _IOC_NR ND_CMD_CALL == "30" is added for the pass thru call.
The new data structure nd_cmd_pkg serves as a wrapper for the
pass-thru calls. This wrapper supplies the data that the kernel
needs to make the _DSM call.
Unlike the definitions of the _DSM functions themselves, the nd_cmd_pkg
provides the calling information (input/output sizes) in an uniform
manner making the kernel marshaling of the arguments straight
forward.
This shifts the marshaling burden from the kernel to the user
space application while still permitting the kernel to internally
call _DSM functions.
The kernel functions __nd_ioctl and acpi_nfit_ctl were modified
to accommodate ND_CMD_CALL.
Changes in version 10:
---------------------
Code review comments:
0. "static const" on nfit_cmd_family_tbl
1. change name ND_MAX_CMD to ND_MAX_DSM_FUN_IDX
2. use nfit_cmd_family_tbl locally
3. add comment on passthru return semantics for size mis-match.
4. Misc white space changes.
5. Change name call_dsm back to pkg
6. set bit ND_CMD_CALL to acpi_desc->bus_dsm_force_en in nfit_test0_setup
0005-nvdimm-Extend-nfit_cmd_family_tbl-to-allow-masking-d:
---------------------------------------------------------
A new patch was added specifically dealing with the nfit_cmd_family_tbl.
A mask to be bit-and with the firmware dsm_mask was added.
A mask to be bit-or with the cmd_mask were added.
The table is searched once and pointer to the entry was returned.
This allows for the minor modification required for each nvdimm type.
I left this as a separate patch so change was more obvious for
reviewers, but it can be squashed later.
0006-tools-testing-nvdimm-dimm_dsm_force_en:
--------------------------------------------
A new patch was added to change name of dimm_dsm_force_en and
to set its value in nfit_test0_setup().
0007-nvmdimm-ND_CMD_CALL:
-------------------------
An RFC patch was added to change value of ND_CMD_CALL to 30 to avoid/delay
collision with commands which are directly mapped to the Intel DSM.
The maintainers tree has ND_CMD_CALL value set to 10. I'm okay with either
value as long as it doesn't change once accepted.
Changes in version 9:
---------------------
0. Based on https://git.kernel.org/cgit/linux/kernel/git/djbw/nvdimm.git/log/?h=for-4...
1. Fixed a broken printk statement in error path.
Addressed the following code review requests:
1. Separated determination of uuid and dsm_mask into separate functions.
2. Reverted to the repeatedly calling acpi_check_dsm for each bit in
dsm_mask instead of just calling firmware once. Function is
called 22 times per nvdimm.
4. Removed bit mask from nfit_cmd_family_tbl.
5. Added separate cmd_mask to struct nfit_mem.
6. Changed *dsm_mask to cmd_mask in struct nvdimm.
7. changed __nd_ioctl to filter based upon cmd_mask not dsm_mask.
Changes in version 8:
---------------------
1. augmented family_to_uuid() to return uuid. This to address bug
in prior version where acpi_nfit_ctl wasn't updating uuid
with value associated with command family.
2. patch 0006 changes name of nvdimm_bus_descriptor.dsm_mask to .cmd_mask
3. patch 0008 adds field cmd_ioctl if kernel supports full ioctl
as with Intel example dsm.
4. patch 0009 make determination if kernel supports the full
cmd_ioctl for that dsm. Updates the commands_show function
to invert the sense of display of commands. All dsm support
pass-thru, only Intel example support the full ioctl interface.
5. patch 0010 adds explicit ioctl interface to return command mask.
This was done in part to avoid "unknown" command in sysfs.
Changes in version 7:
--------------------
0. change name ND_CMD_CALL_DSM to ND_CMD_CALL
- part of abstracting out DSM missed in version 6.
1. change name in struct nd_call_dsm
a) "ncp_" -> "nd_"
b) ncp_pot_size -> nd_fw_size
c) ncp_type -> nd_family
o) cascade name changes to other patches
2. Expanded comment around data structure nd_cmd_pkg
3. At Dan's request, hard coding "root" UUID.
a) retract extension of dsm_uuid to nvdimm_bus_descriptor.
b) reverted nfit.c/acpi_nfit_init_dsms() with the exception of
allowing function 0 in mask.
4. At Dan's request, removed "rev" from nd_cmd_pkg. Hard-coding
use of rev "1" in acpi_nfit_ctl.
Changes in version 6:
---------------------
Built against
git://git.kernel.org/pub/scm/linux/kernel/git/djbw/nvdimm.git
libnvdimm-pending
0. Patches "Clean-up access mode check" and "Fix security issue with DSM IOCTL"
already in above libnvdimm-pending. So omitted here.
1. Incorporated changes from Dan's RFC patch set
https://lists.01.org/pipermail/linux-nvdimm/2016-January/004049.html
2. Dan asked me to abstract out the DSM aspects from the ndm_cmd_dsmcall_pkg.
This became nd_cmd_pkg. UUIDs are no longer passed in from
user applications.
3. To accommodate multiple UUIDS, added table cmd_type_tbl which is used
to determine UUID for the acpi object by calling function 0 for
each UUID in table until success.
This table also provides a MASK field that the kernel can use
to exclude functions being called.
This table can be thought of a list of "acceptable" DSMs.
4. The cmd_type_tbl is also used by acpi_nfit_ctl to map the
external handle of calls to internal handle, UUID.
Note, code only validates that the requested type of call is one in
cmd_type_tbl, but it might not necessarily be the same found during
acpi_nfit_add_dimm. The ACPI SPEC appears to allow and firmware
does implement multiple UUID per object.
In the case where type is in table, but the UUID isn't supported
by the underlying firmware, firmware shall return an error when
called.
This allows for use of a secondary DSM on an object. This could
be considered a feature or a defect. This can be tightened
up if needed.
Changes in version 5:
---------------------
0. Fixed submit comment for drivers/acpi/utils.c.
Changes in version 4:
---------------------
0. Added patch to correct parameter type passed to acpi_evaluate_dsm
ACPI defines arguments rev and fun as 64 bit quantities and the ioctl
exports to user face rev and func. We want those to match the ACPI spec.
Also modified acpi_evaluate_dsm_typed and acpi_check dsm which had
similar issue.
1. nd_cmd_dsmcall_pkg rearrange a reserve and rounded up total size
to 16 byte boundary.
2. Created stand alone patch for the pre-existing security issue related
to "read only" IOCTL calls.
3. Added patch for increasing envelope size of IOCTL. Needed to
be able to read in the wrapper to know remaining size to copy in.
Note: in_env, out_env are statics sized based upon this change.
4. Moved copyin code to table driven nd_cmd_desc
Note, the last 40 lines or so of acpi_nfit_ctl will not return _DSM
data unless the size allocated in user space buffer equals
out_obj->buffer.length.
The semantic we want in the pass thru case is to return as much
of the _DSM data as the user space buffer would accommodate.
Hence, in acpi_nfit_ctl I have retained the line:
memcpy(pkg->dsm_buf + pkg->h.dsm_in,
out_obj->buffer.pointer,
min(pkg->h.dsm_size, pkg->h.dsm_out));
and the early return from the function.
Changes in version 3:
---------------------
1. Changed name ND_CMD_PASSTHRU to ND_CMD_CALL_DSM.
2. Value of ND_CMD_CALL_DSM is 10, not 100.
3. Changed name of nd_passthru_pkg to nd_cmd_dsmcall_pkg.
4. Removed separate functions for handling ND_CMD_CALL_DSM.
Moved functionality to __nd_ioctl and acpi_nfit_ctl proper.
The resultant code looks very different from prior versions.
5. BUGFIX: __nd_ioctl: Change the if read_only switch to use
_IOC_NR cmd (not ioctl_cmd) for better protection.
Do we want to make a stand alone patch for this issue?
Changes in version 2:
---------------------
1. Cleanup access mode check in nd_ioctl and nvdimm_ioctl.
2. Change name of ndn_pkg to nd_passthru_pkg
3. Adjust sizes in nd_passthru_pkg. DSM integers are 64 bit.
4. No new ioctl type, instead tunnel into the existing number space.
5. Push down one function level where determine ioctl cmd type.
6. re-work diagnostic print/dump message in pass-thru functions.
Jerry Hoemann (7):
nvdimm: Add IOCTL pass thru functions
libnvdimm: nvdimm_bus_descriptor field name change
tools/testing/nvdimm: 'call_dsm' support
nvdimm: Add concept of cmd mask
nvdimm: Extend nfit_cmd_family_tbl to allow masking dsm_mask
tools/testing/nvdimm: dimm_dsm_force_en
nvmdimm: ND_CMD_CALL
drivers/acpi/nfit.c | 127 ++++++++++++++++++++++++++++++++++-----
drivers/acpi/nfit.h | 3 +-
drivers/nvdimm/bus.c | 47 +++++++++++++--
drivers/nvdimm/core.c | 2 +-
drivers/nvdimm/dimm_devs.c | 12 ++--
drivers/nvdimm/nd-core.h | 2 +-
include/linux/libnvdimm.h | 4 +-
include/uapi/linux/ndctl.h | 2 +-
tools/testing/nvdimm/test/nfit.c | 22 +++++--
9 files changed, 184 insertions(+), 37 deletions(-)
--
1.7.11.3
6 years, 1 month
[ndctl PATCH 0/2] ACPI 6.1 DIMM id support
by Dan Williams
Following Toshi's patches to add a sysfs attribute for DIMM ids, plumb
this new attribute into libndctl and the 'ndctl list' utility.
---
Dan Williams (2):
ndctl: add ndctl_dimm_get_unique_id() api
ndctl, list: add 'id' to the dimm listing
lib/libndctl.c | 38 +++++++++++++++++++++++++++++++-------
lib/libndctl.sym | 1 +
lib/ndctl/libndctl.h.in | 1 +
util/json.c | 8 ++++++++
4 files changed, 41 insertions(+), 7 deletions(-)
6 years, 2 months
[ndctl PATCH 0/7] miscellaneous fixes
by Dan Williams
1/ Resend "ndctl, create-namespace: report failures due to namespace
being mounted" with the strerror() fixup recommended by Linda.
2/ Resend "ndctl, create-namespace: check for ZONE_DEVICE=n kernels"
with a fixup for ndns being NULL in validate_namespace_options().
3/ A configure.ac fix to stop depending on documentation utilities in
the --disable-docs case.
4/ A collection of static analysis fixes.
---
Dan Williams (7):
ndctl, create-namespace: report failures due to namespace being mounted
ndctl, create-namespace: check for ZONE_DEVICE=n kernels
ndctl: fix some warnings
ndctl: drop asciidocs and xmlto requirement for --disable-docs
ndctl: fix to_dsm_index() static analysis warning
ndctl: fix uninitialized variable in add_dimm()
ndctl: fix unchecked return value
builtin-read-labels.c | 3 +--
builtin-xaction-namespace.c | 41 +++++++++++++++++++++++++++++++----------
configure.ac | 23 ++++++++++++-----------
lib/libndctl-private.h | 15 +++++++++++++++
lib/libndctl.c | 12 ++++--------
test/core.c | 3 ++-
6 files changed, 65 insertions(+), 32 deletions(-)
6 years, 2 months
[PATCH v4 0/2] Align mmap address for DAX pmd mappings
by Toshi Kani
When CONFIG_FS_DAX_PMD is set, DAX supports mmap() using pmd page
size. This feature relies on both mmap virtual address and FS
block (i.e. physical address) to be aligned by the pmd page size.
Users can use mkfs options to specify FS to align block allocations.
However, aligning mmap address requires code changes to existing
applications for providing a pmd-aligned address to mmap().
For instance, fio with "ioengine=mmap" performs I/Os with mmap() [1].
It calls mmap() with a NULL address, which needs to be changed to
provide a pmd-aligned address for testing with DAX pmd mappings.
Changing all applications that call mmap() with NULL is undesirable.
This patch-set extends filesystems to align an mmap address for
a DAX file so that unmodified applications can use DAX pmd mappings.
[1]: https://github.com/axboe/fio/blob/master/engines/mmap.c
v4:
- Use loff_t for offset and cast before shift (Jan Kara)
- Remove redundant paranthesis (Jan Kara)
- Allow integration with huge page cache support (Matthew Wilcox)
- Prepare for PUD mapping support (Mike Kravetz, Matthew Wilcox)
v3:
- Check overflow condition to offset + length. (Matthew Wilcox)
- Remove indent by using gotos. (Matthew Wilcox)
- Define dax_get_unmapped_area to NULL when CONFIG_FS_DAX is unset.
(Matthew Wilcox)
- Squash all filesystem patches together. (Matthew Wilcox)
v2:
- Change filesystems to provide their get_unmapped_area().
(Matthew Wilcox)
- Add more description about the benefit. (Matthew Wilcox)
---
Toshi Kani (2):
1/2 thp, dax: add thp_get_unmapped_area for pmd mappings
2/2 ext2/4, xfs, blk: call thp_get_unmapped_area() for pmd mappings
---
fs/block_dev.c | 1 +
fs/ext2/file.c | 1 +
fs/ext4/file.c | 1 +
fs/xfs/xfs_file.c | 1 +
include/linux/huge_mm.h | 7 +++++++
mm/huge_memory.c | 43 +++++++++++++++++++++++++++++++++++++++++++
6 files changed, 54 insertions(+)
6 years, 2 months
[PATCH v3 0/7] dax: handling media errors
by Vishal Verma
Until now, dax has been disabled if media errors were found on
any device. This series attempts to address that.
The first three patches from Dan re-enable dax even when media
errors are present.
The fourth patch from Matthew removes the
zeroout path from dax entirely, making zeroout operations always
go through the driver (The motivation is that if a backing device
has media errors, and we create a sparse file on it, we don't
want the initial zeroing to happen via dax, we want to give the
block driver a chance to clear the errors).
The fifth patch changes the behaviour of dax_do_io by adding a
wrapper around it that is passed all the arguments also needed by
__blockdev_do_direct_IO. If (the new) __dax_do_io fails with -EIO
due to a bad block, we simply retry with the direct_IO path which
forces the IO to go through the block driver, and can attempt to
clear the error.
Patch 6 reduces our calls to clear_pmem from dax in the
truncate/hole-punch cases. We check if the range being truncated
is sector aligned/sized, and if so, send blkdev_issue_zeroout
instead of clear_pmem so that errors can be handled better by
the driver.
Patch 7 fixes a redundant comment in DAX and is mostly unrelated
to the rest of this series.
This series also depends on/is based on Jan Kara's DAX Locking
fixes series [1].
[1]: http://www.spinics.net/lists/linux-mm/msg105819.html
v3:
- Wrapper-ize the direct_IO fallback again and make an exception
for -EIOCBQUEUED (Jeff, Dan)
- Reduce clear_pmem usage in DAX to the minimum
Dan Williams (3):
block, dax: pass blk_dax_ctl through to drivers
dax: fallback from pmd to pte on error
dax: enable dax in the presence of known media errors (badblocks)
Matthew Wilcox (1):
dax: use sb_issue_zerout instead of calling dax_clear_sectors
Vishal Verma (3):
dax: handle media errors in dax_do_io
dax: for truncate/hole-punch, do zeroing through the driver if
possible
dax: fix a comment in dax_zero_page_range and dax_truncate_page
arch/powerpc/sysdev/axonram.c | 10 +++---
block/ioctl.c | 9 -----
drivers/block/brd.c | 9 ++---
drivers/nvdimm/pmem.c | 17 +++++++---
drivers/s390/block/dcssblk.c | 12 +++----
fs/block_dev.c | 7 ++--
fs/dax.c | 78 +++++++++++++++----------------------------
fs/ext2/inode.c | 12 +++----
fs/ext4/inode.c | 5 +--
fs/xfs/xfs_aops.c | 8 ++---
fs/xfs/xfs_bmap_util.c | 15 +++------
include/linux/blkdev.h | 3 +-
include/linux/dax.h | 31 ++++++++++++++++-
13 files changed, 108 insertions(+), 108 deletions(-)
--
2.5.5
6 years, 2 months
[RFC v9 0/5] nvdimm: Add an IOCTL pass thru for DSM calls
by Jerry Hoemann
The NVDIMM code in the kernel supports an IOCTL interface to user
space based upon the Intel Example DSM:
http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf
This interface cannot be used by other NVDIMM DSMs that support
incompatible functions.
An alternative DSM specification for Type N DSM being developed
by Hewlett Packard Enterprise can be found at:
https://github.com/HewlettPackard/hpe-nvm/tree/master/Documentation
To accommodate multiple and conflicting DSM specifications, this patch
set adds a generic "pass-thru" IOCTL interface which is not tied to
a particular DSM.
A new _IOC_NR ND_CMD_CALL == "10" is added for the pass thru call.
The new data structure nd_cmd_pkg serves as a wrapper for the
pass-thru calls. This wrapper supplies the data that the kernel
needs to make the _DSM call.
Unlike the definitions of the _DSM functions themselves, the nd_cmd_pkg
provides the calling information (input/output sizes) in an uniform
manner making the kernel marshaling of the arguments straight
forward.
This shifts the marshaling burden from the kernel to the user
space application while still permitting the kernel to internally
call _DSM functions.
The kernel functions __nd_ioctl and acpi_nfit_ctl were modified
to accommodate ND_CMD_CALL.
Changes in version 9:
---------------------
0. Based on https://git.kernel.org/cgit/linux/kernel/git/djbw/nvdimm.git/log/?h=for-4...
1. Fixed a broken printk statement in error path.
Addressed the following code review requests:
1. Separated determination of uuid and dsm_mask into separate functions.
2. Reverted to the repeatedly calling acpi_check_dsm for each bit in
dsm_mask instead of just calling firmware once. Function is
called 22 times per nvdimm.
4. Removed bit mask from nfit_cmd_family_tbl.
5. Added separate cmd_mask to struct nfit_mem.
6. Changed *dsm_mask to cmd_mask in struct nvdimm.
7. changed __nd_ioctl to filter based upon cmd_mask not dsm_mask.
Changes in version 8:
---------------------
1. augmented family_to_uuid() to return uuid. This to address bug
in prior version where acpi_nfit_ctl wasn't updating uuid
with value associated with command family.
2. patch 0006 changes name of nvdimm_bus_descriptor.dsm_mask to .cmd_mask
3. patch 0008 adds field cmd_ioctl if kernel supports full ioctl
as with Intel example dsm.
4. patch 0009 make determination if kernel supports the full
cmd_ioctl for that dsm. Updates the commands_show function
to invert the sense of display of commands. All dsm support
pass-thru, only Intel example support the full ioctl interface.
5. patch 0010 adds explicit ioctl interface to return command mask.
This was done in part to avoid "unknown" command in sysfs.
Changes in version 7:
--------------------
0. change name ND_CMD_CALL_DSM to ND_CMD_CALL
- part of abstracting out DSM missed in version 6.
1. change name in struct nd_call_dsm
a) "ncp_" -> "nd_"
b) ncp_pot_size -> nd_fw_size
c) ncp_type -> nd_family
o) cascade name changes to other patches
2. Expanded comment around data structure nd_cmd_pkg
3. At Dan's request, hard coding "root" UUID.
a) retract extension of dsm_uuid to nvdimm_bus_descriptor.
b) reverted nfit.c/acpi_nfit_init_dsms() with the exception of
allowing function 0 in mask.
4. At Dan's request, removed "rev" from nd_cmd_pkg. Hard-coding
use of rev "1" in acpi_nfit_ctl.
Changes in version 6:
---------------------
Built against
git://git.kernel.org/pub/scm/linux/kernel/git/djbw/nvdimm.git
libnvdimm-pending
0. Patches "Clean-up access mode check" and "Fix security issue with DSM IOCTL"
already in above libnvdimm-pending. So omitted here.
1. Incorporated changes from Dan's RFC patch set
https://lists.01.org/pipermail/linux-nvdimm/2016-January/004049.html
2. Dan asked me to abstract out the DSM aspects from the ndm_cmd_dsmcall_pkg.
This became nd_cmd_pkg. UUIDs are no longer passed in from
user applications.
3. To accommodate multiple UUIDS, added table cmd_type_tbl which is used
to determine UUID for the acpi object by calling function 0 for
each UUID in table until success.
This table also provides a MASK field that the kernel can use
to exclude functions being called.
This table can be thought of a list of "acceptable" DSMs.
4. The cmd_type_tbl is also used by acpi_nfit_ctl to map the
external handle of calls to internal handle, UUID.
Note, code only validates that the requested type of call is one in
cmd_type_tbl, but it might not necessarily be the same found during
acpi_nfit_add_dimm. The ACPI SPEC appears to allow and firmware
does implement multiple UUID per object.
In the case where type is in table, but the UUID isn't supported
by the underlying firmware, firmware shall return an error when
called.
This allows for use of a secondary DSM on an object. This could
be considered a feature or a defect. This can be tightened
up if needed.
Changes in version 5:
---------------------
0. Fixed submit comment for drivers/acpi/utils.c.
Changes in version 4:
---------------------
0. Added patch to correct parameter type passed to acpi_evaluate_dsm
ACPI defines arguments rev and fun as 64 bit quantities and the ioctl
exports to user face rev and func. We want those to match the ACPI spec.
Also modified acpi_evaluate_dsm_typed and acpi_check dsm which had
similar issue.
1. nd_cmd_dsmcall_pkg rearrange a reserve and rounded up total size
to 16 byte boundary.
2. Created stand alone patch for the pre-existing security issue related
to "read only" IOCTL calls.
3. Added patch for increasing envelope size of IOCTL. Needed to
be able to read in the wrapper to know remaining size to copy in.
Note: in_env, out_env are statics sized based upon this change.
4. Moved copyin code to table driven nd_cmd_desc
Note, the last 40 lines or so of acpi_nfit_ctl will not return _DSM
data unless the size allocated in user space buffer equals
out_obj->buffer.length.
The semantic we want in the pass thru case is to return as much
of the _DSM data as the user space buffer would accommodate.
Hence, in acpi_nfit_ctl I have retained the line:
memcpy(pkg->dsm_buf + pkg->h.dsm_in,
out_obj->buffer.pointer,
min(pkg->h.dsm_size, pkg->h.dsm_out));
and the early return from the function.
Changes in version 3:
---------------------
1. Changed name ND_CMD_PASSTHRU to ND_CMD_CALL_DSM.
2. Value of ND_CMD_CALL_DSM is 10, not 100.
3. Changed name of nd_passthru_pkg to nd_cmd_dsmcall_pkg.
4. Removed separate functions for handling ND_CMD_CALL_DSM.
Moved functionality to __nd_ioctl and acpi_nfit_ctl proper.
The resultant code looks very different from prior versions.
5. BUGFIX: __nd_ioctl: Change the if read_only switch to use
_IOC_NR cmd (not ioctl_cmd) for better protection.
Do we want to make a stand alone patch for this issue?
Changes in version 2:
---------------------
1. Cleanup access mode check in nd_ioctl and nvdimm_ioctl.
2. Change name of ndn_pkg to nd_passthru_pkg
3. Adjust sizes in nd_passthru_pkg. DSM integers are 64 bit.
4. No new ioctl type, instead tunnel into the existing number space.
5. Push down one function level where determine ioctl cmd type.
6. re-work diagnostic print/dump message in pass-thru functions.
Jerry Hoemann (5):
nvdimm: Add IOCTL pass thru functions
libnvdimm: nvdimm_bus_descriptor field name change
Subject: [PATCH v8 07/10] tools/testing/nvdimm: 'call_dsm' support
nvdimm: Add concept of cmd mask
nvdimm: Add ioctl to return command mask.
drivers/acpi/nfit.c | 134 ++++++++++++++++++++++++++++++++++-----
drivers/acpi/nfit.h | 1 +
drivers/nvdimm/bus.c | 57 +++++++++++++++--
drivers/nvdimm/core.c | 2 +-
drivers/nvdimm/dimm_devs.c | 12 ++--
drivers/nvdimm/nd-core.h | 2 +-
include/linux/libnvdimm.h | 4 +-
include/uapi/linux/ndctl.h | 9 +++
tools/testing/nvdimm/test/nfit.c | 15 ++++-
9 files changed, 202 insertions(+), 34 deletions(-)
--
1.7.11.3
6 years, 2 months
[ndctl PATCH 0/8] create-namespace fixes, and a misc build fix
by Dan Williams
A collection of fixes for create-namespace operations mainly around
properly generating default parameters.
The destroy-namespace implementation was sent out previously, however
the zero_info_block() enhancement deserved to be its own patch ("ndctl,
create-namespace: make zero_info_block() more robust").
---
Dan Williams (7):
ndctl, create-namespace: make zero_info_block() more robust
ndctl: new 'destroy-namespace' utility
ndctl, create-namespace: report failures due to namespace being mounted
ndctl, xaction-namespace: make 'verbose' option available in all sub-commands
ndctl, create-namespace: fix memmap location reconfiguration
ndctl, create-namespace: fix sector mode default for pmem namespaces
ndctl, create-namespace: check for ZONE_DEVICE=n kernels
Marcin Ślusarz (1):
ndctl: fix build on systems where /bin/sh is not a link to bash
Documentation/Makefile.am | 1
Documentation/ndctl-destroy-namespace.txt | 32 ++++++++
Documentation/xable-namespace-options.txt | 4 +
builtin-xaction-namespace.c | 121 +++++++++++++++++++++++------
builtin.h | 1
git-version | 2
ndctl.c | 3 -
7 files changed, 137 insertions(+), 27 deletions(-)
create mode 100644 Documentation/ndctl-destroy-namespace.txt
6 years, 2 months
[PATCH v3 0/2] Align mmap address for DAX pmd mappings
by Toshi Kani
When CONFIG_FS_DAX_PMD is set, DAX supports mmap() using pmd page
size. This feature relies on both mmap virtual address and FS
block (i.e. physical address) to be aligned by the pmd page size.
Users can use mkfs options to specify FS to align block allocations.
However, aligning mmap address requires code changes to existing
applications for providing a pmd-aligned address to mmap().
For instance, fio with "ioengine=mmap" performs I/Os with mmap() [1].
It calls mmap() with a NULL address, which needs to be changed to
provide a pmd-aligned address for testing with DAX pmd mappings.
Changing all applications that call mmap() with NULL is undesirable.
This patch-set extends filesystems to align an mmap address for
a DAX file so that unmodified applications can use DAX pmd mappings.
[1]: https://github.com/axboe/fio/blob/master/engines/mmap.c
v3:
- Check overflow condition to offset + length. (Matthew Wilcox)
- Remove indent by using gotos. (Matthew Wilcox)
- Define dax_get_unmapped_area to NULL when CONFIG_FS_DAX is unset.
(Matthew Wilcox)
- Squash all filesystem patches together. (Matthew Wilcox)
v2:
- Change filesystems to provide their get_unmapped_area().
(Matthew Wilcox)
- Add more description about the benefit. (Matthew Wilcox)
---
Toshi Kani (2):
1/2 dax: add dax_get_unmapped_area for pmd mappings
2/2 ext2/4, xfs, blk: call dax_get_unmapped_area() for DAX pmd mappings
---
fs/block_dev.c | 1 +
fs/dax.c | 43 +++++++++++++++++++++++++++++++++++++++++++
fs/ext2/file.c | 1 +
fs/ext4/file.c | 1 +
fs/xfs/xfs_file.c | 1 +
include/linux/dax.h | 3 +++
6 files changed, 50 insertions(+)
6 years, 2 months