[PATCH] dm: Call proper helper to determine dax support
by Jan Kara
DM was calling generic_fsdax_supported() to determine whether a device
referenced in the DM table supports DAX. However this is a helper for "leaf" device drivers so that
they don't have to duplicate common generic checks. High level code
should call dax_supported() helper which that calls into appropriate
helper for the particular device. This problem manifested itself as
kernel messages:
dm-3: error: dax access failed (-95)
when lvm2-testsuite run in cases where a DM device was stacked on top of
another DM device.
Fixes: 7bf7eac8d648 ("dax: Arrange for dax_supported check to span multiple devices")
Tested-by: Adrian Huang <ahuang12(a)lenovo.com>
Signed-off-by: Jan Kara <jack(a)suse.cz>
---
drivers/dax/super.c | 4 ++++
drivers/md/dm-table.c | 3 +--
include/linux/dax.h | 11 +++++++++--
3 files changed, 14 insertions(+), 4 deletions(-)
This patch should go in together with Adrian's
https://lore.kernel.org/linux-nvdimm/20200916133923.31-1-adrianhuang0701@...
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index e5767c83ea23..b6284c5cae0a 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -325,11 +325,15 @@ EXPORT_SYMBOL_GPL(dax_direct_access);
bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
int blocksize, sector_t start, sector_t len)
{
+ if (!dax_dev)
+ return false;
+
if (!dax_alive(dax_dev))
return false;
return dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, start, len);
}
+EXPORT_SYMBOL_GPL(dax_supported);
size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i)
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 5edc3079e7c1..bed1ff0744ec 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -862,8 +862,7 @@ int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
{
int blocksize = *(int *) data;
- return generic_fsdax_supported(dev->dax_dev, dev->bdev, blocksize,
- start, len);
+ return dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
}
/* Check devices support synchronous DAX */
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 6904d4e0b2e0..9f916326814a 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -130,6 +130,8 @@ static inline bool generic_fsdax_supported(struct dax_device *dax_dev,
return __generic_fsdax_supported(dax_dev, bdev, blocksize, start,
sectors);
}
+bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
+ int blocksize, sector_t start, sector_t len);
static inline void fs_put_dax(struct dax_device *dax_dev)
{
@@ -157,6 +159,13 @@ static inline bool generic_fsdax_supported(struct dax_device *dax_dev,
return false;
}
+static inline bool dax_supported(struct dax_device *dax_dev,
+ struct block_device *bdev, int blocksize, sector_t start,
+ sector_t len)
+{
+ return false;
+}
+
static inline void fs_put_dax(struct dax_device *dax_dev)
{
}
@@ -195,8 +204,6 @@ bool dax_alive(struct dax_device *dax_dev);
void *dax_get_private(struct dax_device *dax_dev);
long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
void **kaddr, pfn_t *pfn);
-bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
- int blocksize, sector_t start, sector_t len);
size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i);
size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
--
2.16.4
1 year, 9 months
[linux-nvdimm:libnvdimm-fixes 1/2] drivers/md/dm-table.c:866:
undefined reference to `dax_read_lock'
by kernel test robot
tree: https://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git libnvdimm-fixes
head: 3305ce9e90c8f1fefdf75acfbb814574c12bfcc5
commit: ec5f196ad972cd740bcbe6ef9c89a8a92d54ba44 [1/2] dm: Call proper helper to determine dax support
config: openrisc-randconfig-r022-20200918 (attached as .config)
compiler: or1k-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout ec5f196ad972cd740bcbe6ef9c89a8a92d54ba44
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=openrisc
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp(a)intel.com>
All errors (new ones prefixed by >>):
or1k-linux-ld: drivers/md/dm-table.o: in function `device_supports_dax':
>> drivers/md/dm-table.c:866: undefined reference to `dax_read_lock'
drivers/md/dm-table.c:866:(.text+0x84c): relocation truncated to fit: R_OR1K_INSN_REL_26 against undefined symbol `dax_read_lock'
>> or1k-linux-ld: drivers/md/dm-table.c:868: undefined reference to `dax_read_unlock'
drivers/md/dm-table.c:868:(.text+0x854): relocation truncated to fit: R_OR1K_INSN_REL_26 against undefined symbol `dax_read_unlock'
# https://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git/commit/...
git remote add linux-nvdimm https://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git
git fetch --no-tags linux-nvdimm libnvdimm-fixes
git checkout ec5f196ad972cd740bcbe6ef9c89a8a92d54ba44
vim +866 drivers/md/dm-table.c
858
859 /* validate the dax capability of the target device span */
860 int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
861 sector_t start, sector_t len, void *data)
862 {
863 int blocksize = *(int *) data, id;
864 bool rc;
865
> 866 id = dax_read_lock();
867 rc = dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
> 868 dax_read_unlock(id);
869
870 return rc;
871 }
872
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
1 year, 9 months
[PATCH v3 1/1] dax: Fix stack overflow when mounting fsdax pmem device
by Adrian Huang
From: Adrian Huang <ahuang12(a)lenovo.com>
When mounting fsdax pmem device, commit 6180bb446ab6 ("dax: fix
detection of dax support for non-persistent memory block devices")
introduces the stack overflow [1][2]. Here is the call path for
mounting ext4 file system:
ext4_fill_super
bdev_dax_supported
__bdev_dax_supported
dax_supported
generic_fsdax_supported
__generic_fsdax_supported
bdev_dax_supported
The call path leads to the infinite calling loop, so we cannot
call bdev_dax_supported() in __generic_fsdax_supported(). The sanity
checking of the variable 'dax_dev' is moved prior to the two
bdev_dax_pgoff() checks [3][4].
[1] https://lore.kernel.org/linux-nvdimm/1420999447.1004543.1600055488770.Jav...
[2] https://lore.kernel.org/linux-nvdimm/alpine.LRH.2.02.2009141131220.30651@...
[3] https://lore.kernel.org/linux-nvdimm/CA+RJvhxBHriCuJhm-D8NvJRe3h2MLM+ZMFg...
[4] https://lore.kernel.org/linux-nvdimm/20200903160608.GU878166@iweiny-DESK2...
Fixes: 6180bb446ab6 ("dax: fix detection of dax support for non-persistent memory block devices")
Reported-by: Yi Zhang <yi.zhang(a)redhat.com>
Reported-by: Mikulas Patocka <mpatocka(a)redhat.com>
Signed-off-by: Adrian Huang <ahuang12(a)lenovo.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Cc: Coly Li <colyli(a)suse.de>
Cc: Ira Weiny <ira.weiny(a)intel.com>
Cc: John Pittman <jpittman(a)redhat.com>
---
Changelog:
v3:
1. Add Reviewed-by from Jan
2. Add Reported-by
3. Replace lists.01.org with lore.kernel
v2:
Remove the checking for the returned value '-EOPNOTSUPP' of
dax_direct_access(). Jan has prepared a patch to address the
issue in dm.
---
drivers/dax/super.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index e5767c83ea23..11d0541e6f8f 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -85,6 +85,12 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
return false;
}
+ if (!dax_dev) {
+ pr_debug("%s: error: dax unsupported by block device\n",
+ bdevname(bdev, buf));
+ return false;
+ }
+
err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
if (err) {
pr_info("%s: error: unaligned partition for dax\n",
@@ -100,12 +106,6 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
return false;
}
- if (!dax_dev || !bdev_dax_supported(bdev, blocksize)) {
- pr_debug("%s: error: dax unsupported by block device\n",
- bdevname(bdev, buf));
- return false;
- }
-
id = dax_read_lock();
len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
--
2.17.1
1 year, 9 months
regression on 5.9.0-rc5: mount fsdax w/o dax lead kernel panic
by Yi Zhang
Hi
Could you help check this regression, let me know if you need more info/testing, thanks.
Reproducer:
# ndctl list
[
{
"dev":"namespace1.0",
"mode":"fsdax",
"map":"dev",
"size":16909336576,
"uuid":"c4ebad91-3d29-4fb6-be2c-9ee8e12c8b44",
"sector_size":512,
"align":2097152,
"blockdev":"pmem1"
}
]
# ndctl create-namespace -f -e namespace1.0 -m fsdax
{
"dev":"namespace1.0",
"mode":"fsdax",
"map":"dev",
"size":"15.75 GiB (16.91 GB)",
"uuid":"069a6b5c-917e-4c6c-a277-01fd8574ccb3",
"sector_size":512,
"align":2097152,
"blockdev":"pmem1"
}
# mkfs.ext4 /dev/pmem1
# mount /dev/pmem1 /mnt ---> panic here
kernel log:
[ 1363.513242] BUG: stack guard page was hit at 00000000118cab51 (stack is 00000000548b8b77..000000005363ed26)
[ 1363.513242] kernel stack overflow (double-fault): 0000 [#1] SMP NOPTI
[ 1363.513243] CPU: 22 PID: 14682 Comm: mount Tainted: G S I 5.9.0-rc5 #1
[ 1363.513243] Hardware name: Dell Inc. PowerEdge R640/06NR82, BIOS 2.8.1 06/26/2020
[ 1363.513243] RIP: 0010:igrab+0x14/0x50
[ 1363.513244] Code: 62 07 8c 00 85 c0 74 c6 83 f8 01 75 cd eb cd 0f 1f 80 00 00 00 00 0f 1f 44 00 00 55 48 8d af 88 00 00 00 53 48 89 fb 48 89 ef <e8> 27 65 64 00 f6 83 98 00 00 00 30 75 17 f0 ff 83 58 01 00 00 48
[ 1363.513244] RSP: 0018:ffffa700c0e0c000 EFLAGS: 00010246
[ 1363.513245] RAX: 0000000000000000 RBX: ffff95b69e415a10 RCX: 0000000000000001
[ 1363.513245] RDX: 0000000000000000 RSI: ffff95a946a406b8 RDI: ffff95b69e415a98
[ 1363.513245] RBP: ffff95b69e415a98 R08: 0000000500000000 R09: 8080808080808080
[ 1363.513245] R10: 0000000000000000 R11: fefefefefefefeff R12: 0000000000000001
[ 1363.513246] R13: ffff95b69e415a00 R14: 0000000000000000 R15: ffff95b79f4d3800
[ 1363.513246] FS: 00007fbe9b823080(0000) GS:ffff95b87ff80000(0000) knlGS:0000000000000000
[ 1363.513246] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1363.513246] CR2: ffffa700c0e0bff8 CR3: 0000000ffec46002 CR4: 00000000007706e0
[ 1363.513247] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1363.513247] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 1363.513247] PKRU: 55555554
[ 1363.513247] Call Trace:
[ 1363.513247] dax_get_by_host+0x7c/0xd0
[ 1363.513247] __bdev_dax_supported+0x72/0x170
[ 1363.513248] __generic_fsdax_supported+0x8c/0x500
[ 1363.513248] __bdev_dax_supported+0xd0/0x170
[ 1363.513248] __generic_fsdax_supported+0x8c/0x500
[ 1363.513248] __bdev_dax_supported+0xd0/0x170
[ 1363.513248] __generic_fsdax_supported+0x8c/0x500
[ 1363.513249] __bdev_dax_supported+0xd0/0x170
[ 1363.513249] __generic_fsdax_supported+0x8c/0x500
[ 1363.513249] __bdev_dax_supported+0xd0/0x170
[ 1363.513249] __generic_fsdax_supported+0x8c/0x500
[ 1363.513249] __bdev_dax_supported+0xd0/0x170
[ 1363.513250] __generic_fsdax_supported+0x8c/0x500
[ 1363.513250] __bdev_dax_supported+0xd0/0x170
[ 1363.513250] __generic_fsdax_supported+0x8c/0x500
[ 1363.513250] __bdev_dax_supported+0xd0/0x170
[ 1363.513250] __generic_fsdax_supported+0x8c/0x500
[ 1363.513250] __bdev_dax_supported+0xd0/0x170
[ 1363.513251] __generic_fsdax_supported+0x8c/0x500
[ 1363.513251] __bdev_dax_supported+0xd0/0x170
[ 1363.513251] __generic_fsdax_supported+0x8c/0x500
[ 1363.513251] __bdev_dax_supported+0xd0/0x170
[ 1363.513251] __generic_fsdax_supported+0x8c/0x500
[ 1363.513252] __bdev_dax_supported+0xd0/0x170
[ 1363.513252] __generic_fsdax_supported+0x8c/0x500
[ 1363.513252] __bdev_dax_supported+0xd0/0x170
[ 1363.513252] __generic_fsdax_supported+0x8c/0x500
[ 1363.513252] __bdev_dax_supported+0xd0/0x170
[ 1363.513252] __generic_fsdax_supported+0x8c/0x500
[ 1363.513253] __bdev_dax_supported+0xd0/0x170
[ 1363.513253] __generic_fsdax_supported+0x8c/0x500
[ 1363.513253] __bdev_dax_supported+0xd0/0x170
[ 1363.513253] __generic_fsdax_supported+0x8c/0x500
[ 1363.513253] __bdev_dax_supported+0xd0/0x170
[ 1363.513254] __generic_fsdax_supported+0x8c/0x500
[ 1363.513254] __bdev_dax_supported+0xd0/0x170
[ 1363.513254] __generic_fsdax_supported+0x8c/0x500
[ 1363.513254] __bdev_dax_supported+0xd0/0x170
[ 1363.513254] __generic_fsdax_supported+0x8c/0x500
[ 1363.513254] __bdev_dax_supported+0xd0/0x170
[ 1363.513255] __generic_fsdax_supported+0x8c/0x500
[ 1363.513255] __bdev_dax_supported+0xd0/0x170
[ 1363.513255] __generic_fsdax_supported+0x8c/0x500
[ 1363.513255] __bdev_dax_supported+0xd0/0x170
[ 1363.513255] __generic_fsdax_supported+0x8c/0x500
[ 1363.513255] __bdev_dax_supported+0xd0/0x170
[ 1363.513256] __generic_fsdax_supported+0x8c/0x500
[ 1363.513256] __bdev_dax_supported+0xd0/0x170
[ 1363.513256] __generic_fsdax_supported+0x8c/0x500
[ 1363.513256] __bdev_dax_supported+0xd0/0x170
[ 1363.513256] __generic_fsdax_supported+0x8c/0x500
[ 1363.513257] __bdev_dax_supported+0xd0/0x170
[ 1363.513257] __generic_fsdax_supported+0x8c/0x500
[ 1363.513257] __bdev_dax_supported+0xd0/0x170
[ 1363.513257] __generic_fsdax_supported+0x8c/0x500
[ 1363.513257] __bdev_dax_supported+0xd0/0x170
[ 1363.513257] __generic_fsdax_supported+0x8c/0x500
[ 1363.513258] __bdev_dax_supported+0xd0/0x170
[ 1363.513258] __generic_fsdax_supported+0x8c/0x500
[ 1363.513258] __bdev_dax_supported+0xd0/0x170
[ 1363.513258] __generic_fsdax_supported+0x8c/0x500
[ 1363.513258] __bdev_dax_supported+0xd0/0x170
[ 1363.513259] __generic_fsdax_supported+0x8c/0x500
[ 1363.513259] __bdev_dax_supported+0xd0/0x170
[ 1363.513259] __generic_fsdax_supported+0x8c/0x500
[ 1363.513259] __bdev_dax_supported+0xd0/0x170
[ 1363.513259] __generic_fsdax_supported+0x8c/0x500
[ 1363.513259] __bdev_dax_supported+0xd0/0x170
[ 1363.513260] __generic_fsdax_supported+0x8c/0x500
[ 1363.513260] __bdev_dax_supported+0xd0/0x170
[ 1363.513260] __generic_fsdax_supported+0x8c/0x500
[ 1363.513260] __bdev_dax_supported+0xd0/0x170
[ 1363.513260] __generic_fsdax_supported+0x8c/0x500
[ 1363.513261] __bdev_dax_supported+0xd0/0x170
[ 1363.513261] __generic_fsdax_supported+0x8c/0x500
[ 1363.513261] __bdev_dax_supported+0xd0/0x170
[ 1363.513261] __generic_fsdax_supported+0x8c/0x500
[ 1363.513261] __bdev_dax_supported+0xd0/0x170
[ 1363.513261] __generic_fsdax_supported+0x8c/0x500
[ 1363.513262] __bdev_dax_supported+0xd0/0x170
[ 1363.513262] __generic_fsdax_supported+0x8c/0x500
[ 1363.513262] __bdev_dax_supported+0xd0/0x170
[ 1363.513262] __generic_fsdax_supported+0x8c/0x500
[ 1363.513262] __bdev_dax_supported+0xd0/0x170
[ 1363.513263] __generic_fsdax_supported+0x8c/0x500
[ 1363.513263] __bdev_dax_supported+0xd0/0x170
[ 1363.513263] __generic_fsdax_supported+0x8c/0x500
[ 1363.513263] __bdev_dax_supported+0xd0/0x170
[ 1363.513263] __generic_fsdax_supported+0x8c/0x500
[ 1363.513263] __bdev_dax_supported+0xd0/0x170
[ 1363.513264] __generic_fsdax_supported+0x8c/0x500
[ 1363.513264] __bdev_dax_supported+0xd0/0x170
[ 1363.513264] __generic_fsdax_supported+0x8c/0x500
[ 1363.513264] __bdev_dax_supported+0xd0/0x170
[ 1363.513264] __generic_fsdax_supported+0x8c/0x500
[ 1363.513264] __bdev_dax_supported+0xd0/0x170
[ 1363.513265] __generic_fsdax_supported+0x8c/0x500
[ 1363.513265] __bdev_dax_supported+0xd0/0x170
[ 1363.513265] __generic_fsdax_supported+0x8c/0x500
[ 1363.513265] __bdev_dax_supported+0xd0/0x170
[ 1363.513265] __generic_fsdax_supported+0x8c/0x500
[ 1363.513266] __bdev_dax_supported+0xd0/0x170
[ 1363.513266] __generic_fsdax_supported+0x8c/0x500
[ 1363.513266] __bdev_dax_supported+0xd0/0x170
[ 1363.513266] __generic_fsdax_supported+0x8c/0x500
[ 1363.513266] __bdev_dax_supported+0xd0/0x170
[ 1363.513266] __generic_fsdax_supported+0x8c/0x500
[ 1363.513267] __bdev_dax_supported+0xd0/0x170
[ 1363.513267] __generic_fsdax_supported+0x8c/0x500
[ 1363.513267] __bdev_dax_supported+0xd0/0x170
[ 1363.513267] __generic_fsdax_supported+0x8c/0x500
[ 1363.513267] __bdev_dax_supported+0xd0/0x170
[ 1363.513268] __generic_fsdax_supported+0x8c/0x500
[ 1363.513268] __bdev_dax_supported+0xd0/0x170
[ 1363.513268] __generic_fsdax_supported+0x8c/0x500
[ 1363.513268] __bdev_dax_supported+0xd0/0x170
[ 1363.513268] __generic_fsdax_supported+0x8c/0x500
[ 1363.513268] __bdev_dax_supported+0xd0/0x170
[ 1363.513269] __generic_fsdax_supported+0x8c/0x500
[ 1363.513269] __bdev_dax_supported+0xd0/0x170
[ 1363.513269] __generic_fsdax_supported+0x8c/0x500
[ 1363.513269] __bdev_dax_supported+0xd0/0x170
[ 1363.513269] __generic_fsdax_supported+0x8c/0x500
[ 1363.513270] __bdev_dax_supported+0xd0/0x170
[ 1363.513270] __generic_fsdax_supported+0x8c/0x500
[ 1363.513270] __bdev_dax_supported+0xd0/0x170
[ 1363.513270] __generic_fsdax_supported+0x8c/0x500
[ 1363.513270] __bdev_dax_supported+0xd0/0x170
[ 1363.513270] __generic_fsdax_supported+0x8c/0x500
[ 1363.513271] __bdev_dax_supported+0xd0/0x170
[ 1363.513271] __generic_fsdax_supported+0x8c/0x500
[ 1363.513271] __bdev_dax_supported+0xd0/0x170
[ 1363.513271] __generic_fsdax_supported+0x8c/0x500
[ 1363.513271] ? drain_obj_stock.isra.71+0x60/0x80
[ 1363.513272] ? prep_new_page+0xb1/0xe0
[ 1363.513272] __bdev_dax_supported+0xd0/0x170
[ 1363.513272] ? get_partial_node.isra.87.part.88+0x14c/0x260
[ 1363.513272] __generic_fsdax_supported+0x8c/0x500
[ 1363.513272] ? __mod_memcg_lruvec_state+0x21/0x100
[ 1363.513273] __bdev_dax_supported+0xd0/0x170
[ 1363.513273] __generic_fsdax_supported+0x8c/0x500
[ 1363.513273] __bdev_dax_supported+0xd0/0x170
[ 1363.513273] __generic_fsdax_supported+0x8c/0x500
[ 1363.513273] ? _cond_resched+0x15/0x30
[ 1363.513273] ? __kmalloc_node+0x4df/0x510
[ 1363.513274] ? crypto_create_tfm_node+0x3a/0xf0
[ 1363.513274] __bdev_dax_supported+0xd0/0x170
[ 1363.513274] ext4_fill_super+0x719/0x31c0 [ext4]
[ 1363.513274] ? bdev_name.isra.9+0x63/0xd0
[ 1363.513274] ? vsnprintf+0x37c/0x520
[ 1363.513275] ? ext4_calculate_overhead+0x490/0x490 [ext4]
[ 1363.513275] ? mount_bdev+0x185/0x1b0
[ 1363.513275] mount_bdev+0x185/0x1b0
[ 1363.513275] legacy_get_tree+0x27/0x40
[ 1363.513275] vfs_get_tree+0x25/0xb0
[ 1363.513275] path_mount+0x676/0x980
[ 1363.513276] do_mount+0x75/0x90
[ 1363.513276] __x64_sys_mount+0xc4/0xe0
[ 1363.513276] do_syscall_64+0x33/0x40
[ 1363.513276] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1363.513276] RIP: 0033:0x7fbe9a85da8e
[ 1363.513277] Code: 48 8b 0d fd f3 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ca f3 2b 00 f7 d8 64 89 01 48
[ 1363.513277] RSP: 002b:00007fffc4274d88 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5
[ 1363.513278] RAX: ffffffffffffffda RBX: 00005557f8a14460 RCX: 00007fbe9a85da8e
[ 1363.513278] RDX: 00005557f8a14640 RSI: 00005557f8a14680 RDI: 00005557f8a14660
[ 1363.513278] RBP: 00007fbe9b609184 R08: 0000000000000000 R09: 0000000000000003
[ 1363.513278] R10: 00000000c0ed0000 R11: 0000000000000246 R12: 0000000000000000
[ 1363.513279] R13: 00000000c0ed0000 R14: 00005557f8a14660 R15: 00005557f8a14640
[ 1363.513279] Modules linked in: ext4 mbcache jbd2 rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache rfkill sunrpc vfat fat dm_multipath intel_rapl_msr intel_rapl_common isst_if_common skx_edac x86_pkg_temp_thermal intel_powerclamp ipmi_ssif coretemp kvm_intel mgag200 i2c_algo_bit kvm drm_kms_helper iTCO_wdt iTCO_vendor_support syscopyarea sysfillrect dcdbas sysimgblt fb_sys_fops irqbypass crct10dif_pclmul drm crc32_pclmul ghash_clmulni_intel acpi_ipmi rapl ipmi_si intel_cstate mei_me dell_smbios i2c_i801 ipmi_devintf dax_pmem_compat intel_uncore mei wmi_bmof dell_wmi_descripto
[ 1363.513285] Lost 29 message(s)!
[ 1364.493871] ---[ end trace d756ed97f26ed4e7 ]---
[ 1364.493872] RIP: 0010:igrab+0x14/0x50
[ 1364.493872] Code: 62 07 8c 00 85 c0 74 c6 83 f8 01 75 cd eb cd 0f 1f 80 00 00 00 00 0f 1f 44 00 00 55 48 8d af 88 00 00 00 53 48 89 fb 48 89 ef <e8> 27 65 64 00 f6 83 98 00 00 00 30 75 17 f0 ff 83 58 01 00 00 48
[ 1364.493873] RSP: 0018:ffffa700c0e0c000 EFLAGS: 00010246
[ 1364.493873] RAX: 0000000000000000 RBX: ffff95b69e415a10 RCX: 0000000000000001
[ 1364.493873] RDX: 0000000000000000 RSI: ffff95a946a406b8 RDI: ffff95b69e415a98
[ 1364.493874] RBP: ffff95b69e415a98 R08: 0000000500000000 R09: 8080808080808080
[ 1364.493874] R10: 0000000000000000 R11: fefefefefefefeff R12: 0000000000000001
[ 1364.493874] R13: ffff95b69e415a00 R14: 0000000000000000 R15: ffff95b79f4d3800
[ 1364.493874] FS: 00007fbe9b823080(0000) GS:ffff95b87ff80000(0000) knlGS:0000000000000000
[ 1364.493875] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1364.493875] CR2: ffffa700c0e0bff8 CR3: 0000000ffec46002 CR4: 00000000007706e0
[ 1364.493875] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1364.493875] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 1364.493875] PKRU: 55555554
[ 1364.493876] Kernel panic - not syncing: Fatal exception in interrupt
[ 1364.979497] Kernel Offset: 0x12600000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
Best Regards,
Yi Zhang
1 year, 9 months
[PATCH] device-dax: make dev_dax_kmem_probe() static
by Jason Yan
This eliminates the following sparse warning:
drivers/dax/kmem.c:38:5: warning: symbol 'dev_dax_kmem_probe' was not
declared. Should it be static?
Reported-by: Hulk Robot <hulkci(a)huawei.com>
Signed-off-by: Jason Yan <yanaijie(a)huawei.com>
---
drivers/dax/kmem.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c
index 7dcb2902e9b1..e79afbadd4e0 100644
--- a/drivers/dax/kmem.c
+++ b/drivers/dax/kmem.c
@@ -35,7 +35,7 @@ static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r)
return 0;
}
-int dev_dax_kmem_probe(struct dev_dax *dev_dax)
+static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
{
int numa_node = dev_dax->target_node;
struct device *dev = &dev_dax->dev;
--
2.25.4
1 year, 9 months
[PATCH v2] powerpc/papr_scm: Fix warning triggered by perf_stats_show()
by Vaibhav Jain
A warning is reported by the kernel in case perf_stats_show() returns
an error code. The warning is of the form below:
papr_scm ibm,persistent-memory:ibm,pmemory@44100001:
Failed to query performance stats, Err:-10
dev_attr_show: perf_stats_show+0x0/0x1c0 [papr_scm] returned bad count
fill_read_buffer: dev_attr_show+0x0/0xb0 returned bad count
On investigation it looks like that the compiler is silently truncating the
return value of drc_pmem_query_stats() from 'long' to 'int', since the
variable used to store the return code 'rc' is an 'int'. This
truncated value is then returned back as a 'ssize_t' back from
perf_stats_show() to 'dev_attr_show()' which thinks of it as a large
unsigned number and triggers this warning..
To fix this we update the type of variable 'rc' from 'int' to
'ssize_t' that prevents the compiler from truncating the return value
of drc_pmem_query_stats() and returning correct signed value back from
perf_stats_show().
Fixes: 2d02bf835e573 ('powerpc/papr_scm: Fetch nvdimm performance
stats from PHYP')
Signed-off-by: Vaibhav Jain <vaibhav(a)linux.ibm.com>
---
Changelog:
v2: Added an explicit cast to the expression calling 'seq_buf_used()'
and triggering this issue. [ Ira ]
---
arch/powerpc/platforms/pseries/papr_scm.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index a88a707a608aa..5493bc847bd08 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -785,7 +785,8 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
static ssize_t perf_stats_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- int index, rc;
+ int index;
+ ssize_t rc;
struct seq_buf s;
struct papr_scm_perf_stat *stat;
struct papr_scm_perf_stats *stats;
@@ -820,7 +821,7 @@ static ssize_t perf_stats_show(struct device *dev,
free_stats:
kfree(stats);
- return rc ? rc : seq_buf_used(&s);
+ return rc ? rc : (ssize_t)seq_buf_used(&s);
}
DEVICE_ATTR_ADMIN_RO(perf_stats);
--
2.26.2
1 year, 9 months
[PATCH v2 1/1] dax: Fix stack overflow when mounting fsdax pmem device
by Adrian Huang
From: Adrian Huang <ahuang12(a)lenovo.com>
When mounting fsdax pmem device, commit 6180bb446ab6 ("dax: fix
detection of dax support for non-persistent memory block devices")
introduces the stack overflow [1][2]. Here is the call path for
mounting ext4 file system:
ext4_fill_super
bdev_dax_supported
__bdev_dax_supported
dax_supported
generic_fsdax_supported
__generic_fsdax_supported
bdev_dax_supported
The call path leads to the infinite calling loop, so we cannot
call bdev_dax_supported() in __generic_fsdax_supported(). The sanity
checking of the variable 'dax_dev' is moved prior to the two
bdev_dax_pgoff() checks [3][4].
[1] https://lists.01.org/hyperkitty/list/linux-nvdimm@lists.01.org/thread/BUL...
[2] https://lists.01.org/hyperkitty/list/linux-nvdimm@lists.01.org/thread/OOZ...
[3] https://lists.01.org/hyperkitty/list/linux-nvdimm@lists.01.org/message/SM...
[4] https://lists.01.org/hyperkitty/list/linux-nvdimm@lists.01.org/message/7E...
Fixes: 6180bb446ab6 ("dax: fix detection of dax support for non-persistent memory block devices")
Cc: Coly Li <colyli(a)suse.de>
Cc: Jan Kara <jack(a)suse.cz>
Cc: Ira Weiny <ira.weiny(a)intel.com>
Cc: John Pittman <jpittman(a)redhat.com>
Cc: Mikulas Patocka <mpatocka(a)redhat.com>
Cc: Alasdair Kergon <agk(a)redhat.com>
Cc: Mike Snitzer <snitzer(a)redhat.com>
Signed-off-by: Adrian Huang <ahuang12(a)lenovo.com>
---
v1->v2
* Remove the checking for the returned value '-EOPNOTSUPP' of
dax_direct_access(). Jan has prepared a patch to address the
issue in dm.
---
drivers/dax/super.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index e5767c83ea23..11d0541e6f8f 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -85,6 +85,12 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
return false;
}
+ if (!dax_dev) {
+ pr_debug("%s: error: dax unsupported by block device\n",
+ bdevname(bdev, buf));
+ return false;
+ }
+
err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
if (err) {
pr_info("%s: error: unaligned partition for dax\n",
@@ -100,12 +106,6 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
return false;
}
- if (!dax_dev || !bdev_dax_supported(bdev, blocksize)) {
- pr_debug("%s: error: dax unsupported by block device\n",
- bdevname(bdev, buf));
- return false;
- }
-
id = dax_read_lock();
len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
--
2.17.1
1 year, 9 months
[PATCH v5 0/5] mm: introduce memfd_secret system call to create "secret" memory areas
by Mike Rapoport
From: Mike Rapoport <rppt(a)linux.ibm.com>
Hi,
This is an implementation of "secret" mappings backed by a file descriptor.
I've dropped the boot time reservation patch for now as it is not strictly
required for the basic usage and can be easily added later either with or
without CMA.
v5 changes:
* rebase on v5.9-rc5
* drop boot time memory reservation patch
v4 changes:
* rebase on v5.9-rc1
* Do not redefine PMD_PAGE_ORDER in fs/dax.c, thanks Kirill
* Make secret mappings exclusive by default and only require flags to
memfd_secret() system call for uncached mappings, thanks again Kirill :)
v3 changes:
* Squash kernel-parameters.txt update into the commit that added the
command line option.
* Make uncached mode explicitly selectable by architectures. For now enable
it only on x86.
v2 changes:
* Follow Michael's suggestion and name the new system call 'memfd_secret'
* Add kernel-parameters documentation about the boot option
* Fix i386-tinyconfig regression reported by the kbuild bot.
CONFIG_SECRETMEM now depends on !EMBEDDED to disable it on small systems
from one side and still make it available unconditionally on
architectures that support SET_DIRECT_MAP.
The file descriptor backing secret memory mappings is created using a
dedicated memfd_secret system call The desired protection mode for the
memory is configured using flags parameter of the system call. The mmap()
of the file descriptor created with memfd_secret() will create a "secret"
memory mapping. The pages in that mapping will be marked as not present in
the direct map and will have desired protection bits set in the user page
table. For instance, current implementation allows uncached mappings.
Although normally Linux userspace mappings are protected from other users,
such secret mappings are useful for environments where a hostile tenant is
trying to trick the kernel into giving them access to other tenants
mappings.
Additionally, the secret mappings may be used as a mean to protect guest
memory in a virtual machine host.
For demonstration of secret memory usage we've created a userspace library
[1] that does two things: the first is act as a preloader for openssl to
redirect all the OPENSSL_malloc calls to secret memory meaning any secret
keys get automatically protected this way and the other thing it does is
expose the API to the user who needs it. We anticipate that a lot of the
use cases would be like the openssl one: many toolkits that deal with
secret keys already have special handling for the memory to try to give
them greater protection, so this would simply be pluggable into the
toolkits without any need for user application modification.
I've hesitated whether to continue to use new flags to memfd_create() or to
add a new system call and I've decided to use a new system call after I've
started to look into man pages update. There would have been two completely
independent descriptions and I think it would have been very confusing.
Hiding secret memory mappings behind an anonymous file allows (ab)use of
the page cache for tracking pages allocated for the "secret" mappings as
well as using address_space_operations for e.g. page migration callbacks.
The anonymous file may be also used implicitly, like hugetlb files, to
implement mmap(MAP_SECRET) and use the secret memory areas with "native" mm
ABIs in the future.
As the fragmentation of the direct map was one of the major concerns raised
during the previous postings, I've added an amortizing cache of PMD-size
pages to each file descriptor that is used as an allocation pool for the
secret memory areas.
v4: https://lore.kernel.org/lkml/20200818141554.13945-1-rppt@kernel.org
v3: https://lore.kernel.org/lkml/20200804095035.18778-1-rppt@kernel.org
v2: https://lore.kernel.org/lkml/20200727162935.31714-1-rppt@kernel.org
v1: https://lore.kernel.org/lkml/20200720092435.17469-1-rppt@kernel.org/
rfc-v2: https://lore.kernel.org/lkml/20200706172051.19465-1-rppt@kernel.org/
rfc-v1: https://lore.kernel.org/lkml/20200130162340.GA14232@rapoport-lnx/
Mike Rapoport (5):
mm: add definition of PMD_PAGE_ORDER
mmap: make mlock_future_check() global
mm: introduce memfd_secret system call to create "secret" memory areas
arch, mm: wire up memfd_secret system call were relevant
mm: secretmem: use PMD-size pages to amortize direct map fragmentation
arch/Kconfig | 7 +
arch/arm64/include/asm/unistd.h | 2 +-
arch/arm64/include/asm/unistd32.h | 2 +
arch/arm64/include/uapi/asm/unistd.h | 1 +
arch/riscv/include/asm/unistd.h | 1 +
arch/x86/Kconfig | 1 +
arch/x86/entry/syscalls/syscall_32.tbl | 1 +
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
fs/dax.c | 11 +-
include/linux/pgtable.h | 3 +
include/linux/syscalls.h | 1 +
include/uapi/asm-generic/unistd.h | 7 +-
include/uapi/linux/magic.h | 1 +
include/uapi/linux/secretmem.h | 8 +
kernel/sys_ni.c | 2 +
mm/Kconfig | 4 +
mm/Makefile | 1 +
mm/internal.h | 3 +
mm/mmap.c | 5 +-
mm/secretmem.c | 333 +++++++++++++++++++++++++
20 files changed, 383 insertions(+), 12 deletions(-)
create mode 100644 include/uapi/linux/secretmem.h
create mode 100644 mm/secretmem.c
--
2.28.0
1 year, 9 months
[PATCH 1/1] dax: Fix stack overflow when mounting fsdax pmem device
by Adrian Huang
From: Adrian Huang <ahuang12(a)lenovo.com>
When mounting fsdax pmem device, commit 6180bb446ab6 ("dax: fix
detection of dax support for non-persistent memory block devices")
introduces the stack overflow [1][2]. Here is the call path for
mounting ext4 file system:
ext4_fill_super
bdev_dax_supported
__bdev_dax_supported
dax_supported
generic_fsdax_supported
__generic_fsdax_supported
bdev_dax_supported
The call path leads to the infinite calling loop, so we cannot
call bdev_dax_supported() in __generic_fsdax_supported(). The sanity
checking of the variable 'dax_dev' is moved prior to the two
bdev_dax_pgoff() checks [3][4].
To fix the issue triggered by lvm2-testsuite (the issue that the
above-mentioned commit wants to fix), this patch does not print the
"error: dax access failed" message if the physical disk does not
support DAX (dax_dev is NULL). The detail info is described as follows:
1. The dax_dev of the dm devices (dm-0, dm-1..) is always allocated
in alloc_dev() [drivers/md/dm.c].
2. When calling __generic_fsdax_supported() with dm-0 device, the
call path is shown as follows (the physical disks of dm-0 do
not support DAX):
dax_direct_access (valid dax_dev with dm-0)
dax_dev->ops->direct_access
dm_dax_direct_access
ti->type->direct_access
linear_dax_direct_access (assume the target is linear)
dax_direct_access (dax_dev is NULLL with ram0, or sdaX)
3. The call 'dax_direct_access()' in __generic_fsdax_supported() gets
the returned value '-EOPNOTSUPP'.
4. However, the message 'dm-3: error: dax access failed (-5)' is still
printed for the dm target 'error' since io_err_dax_direct_access()
always returns the status '-EIO'. Cc' device mapper maintainers to
see if they have concerns.
[1] https://lists.01.org/hyperkitty/list/linux-nvdimm@lists.01.org/thread/BUL...
[2] https://lists.01.org/hyperkitty/list/linux-nvdimm@lists.01.org/thread/OOZ...
[3] https://lists.01.org/hyperkitty/list/linux-nvdimm@lists.01.org/message/SM...
[4] https://lists.01.org/hyperkitty/list/linux-nvdimm@lists.01.org/message/7E...
Fixes: 6180bb446ab6 ("dax: fix detection of dax support for non-persistent memory block devices")
Cc: Coly Li <colyli(a)suse.de>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Vishal Verma <vishal.l.verma(a)intel.com>
Cc: Dave Jiang <dave.jiang(a)intel.com>
Cc: Ira Weiny <ira.weiny(a)intel.com>
Cc: John Pittman <jpittman(a)redhat.com>
Cc: Mikulas Patocka <mpatocka(a)redhat.com>
Cc: Alasdair Kergon <agk(a)redhat.com>
Cc: Mike Snitzer <snitzer(a)redhat.com>
Signed-off-by: Adrian Huang <ahuang12(a)lenovo.com>
---
drivers/dax/super.c | 23 ++++++++++++++++-------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index e5767c83ea23..fb151417ec10 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -85,6 +85,12 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
return false;
}
+ if (!dax_dev) {
+ pr_debug("%s: error: dax unsupported by block device\n",
+ bdevname(bdev, buf));
+ return false;
+ }
+
err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
if (err) {
pr_info("%s: error: unaligned partition for dax\n",
@@ -100,19 +106,22 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
return false;
}
- if (!dax_dev || !bdev_dax_supported(bdev, blocksize)) {
- pr_debug("%s: error: dax unsupported by block device\n",
- bdevname(bdev, buf));
- return false;
- }
-
id = dax_read_lock();
len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
if (len < 1 || len2 < 1) {
- pr_info("%s: error: dax access failed (%ld)\n",
+ /*
+ * Only print the real error message: do not need to print
+ * the message for the underlying raw disk (physical disk)
+ * that does not support DAX (dax_dev = NULL). This case
+ * is observed when physical disks are configured by
+ * lvm2 (device mapper).
+ */
+ if (len != -EOPNOTSUPP && len2 != -EOPNOTSUPP) {
+ pr_info("%s: error: dax access failed (%ld)\n",
bdevname(bdev, buf), len < 1 ? len : len2);
+ }
dax_read_unlock(id);
return false;
}
--
2.17.1
1 year, 9 months