[PATCH v6] libnvdimm: fix clear poison locking with spinlock and GFP_NOWAIT allocation
by Dave Jiang
The following warning results from holding a lane spinlock,
preempt_disable(), or the btt map spinlock and then trying to take the
reconfig_mutex to walk the poison list and potentially add new entries.
BUG: sleeping function called from invalid context at kernel/locking/mutex.
c:747
in_atomic(): 1, irqs_disabled(): 0, pid: 17159, name: dd
[..]
Call Trace:
dump_stack+0x85/0xc8
___might_sleep+0x184/0x250
__might_sleep+0x4a/0x90
__mutex_lock+0x58/0x9b0
? nvdimm_bus_lock+0x21/0x30 [libnvdimm]
? __nvdimm_bus_badblocks_clear+0x2f/0x60 [libnvdimm]
? acpi_nfit_forget_poison+0x79/0x80 [nfit]
? _raw_spin_unlock+0x27/0x40
mutex_lock_nested+0x1b/0x20
nvdimm_bus_lock+0x21/0x30 [libnvdimm]
nvdimm_forget_poison+0x25/0x50 [libnvdimm]
nvdimm_clear_poison+0x106/0x140 [libnvdimm]
nsio_rw_bytes+0x164/0x270 [libnvdimm]
btt_write_pg+0x1de/0x3e0 [nd_btt]
? blk_queue_enter+0x30/0x290
btt_make_request+0x11a/0x310 [nd_btt]
? blk_queue_enter+0xb7/0x290
? blk_queue_enter+0x30/0x290
generic_make_request+0x118/0x3b0
A spinlock is introduced to protect the poison list. This allows us to not
having to acquire the reconfig_mutex for touching the poison list. The
add_poison() function has been broken out into two helper functions. One to
allocate the poison entry and the other to apppend the entry. This allows us
to unlock the poison_lock in non-I/O path and continue to be able to allocate
the poison entry with GFP_KERNEL. We will use GFP_NOWAIT in the I/O path in
order to satisfy being in atomic context.
Signed-off-by: Dave Jiang <dave.jiang(a)intel.com>
---
v2:
Dan
- remove mempool and just use GFP_NOWAIT
v3:
Vishal
- remove unneeded code
v4:
Dan
- fixup commit header
- GFP_NOWAIT only in I/O path
v5:
Dan
- address issue where when lock released to allocate, the list has updated
v6:
Dan
- open code poison entry allocation
- remove NULL check for kfree()
drivers/nvdimm/bus.c | 7 +++---
drivers/nvdimm/core.c | 56 +++++++++++++++++++++++++++------------------
drivers/nvdimm/nd-core.h | 1 +
include/linux/libnvdimm.h | 2 --
4 files changed, 38 insertions(+), 28 deletions(-)
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 5ad2e59..d214ac44 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -296,6 +296,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
init_waitqueue_head(&nvdimm_bus->probe_wait);
nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
mutex_init(&nvdimm_bus->reconfig_mutex);
+ spin_lock_init(&nvdimm_bus->poison_lock);
if (nvdimm_bus->id < 0) {
kfree(nvdimm_bus);
return NULL;
@@ -364,9 +365,9 @@ static int nd_bus_remove(struct device *dev)
nd_synchronize();
device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
- nvdimm_bus_lock(&nvdimm_bus->dev);
+ spin_lock(&nvdimm_bus->poison_lock);
free_poison_list(&nvdimm_bus->poison_list);
- nvdimm_bus_unlock(&nvdimm_bus->dev);
+ spin_unlock(&nvdimm_bus->poison_lock);
nvdimm_bus_destroy_ndctl(nvdimm_bus);
@@ -990,7 +991,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
if (clear_err->cleared) {
/* clearing the poison list we keep track of */
- __nvdimm_forget_poison(nvdimm_bus, clear_err->address,
+ nvdimm_forget_poison(nvdimm_bus, clear_err->address,
clear_err->cleared);
/* now sync the badblocks lists */
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 40a3da0..2dee908 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -518,6 +518,15 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region,
}
EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);
+static void append_poison_entry(struct nvdimm_bus *nvdimm_bus,
+ struct nd_poison *pl, u64 addr, u64 length)
+{
+ lockdep_assert_held(&nvdimm_bus->poison_lock);
+ pl->start = addr;
+ pl->length = length;
+ list_add_tail(&pl->list, &nvdimm_bus->poison_list);
+}
+
static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length,
gfp_t flags)
{
@@ -527,19 +536,24 @@ static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length,
if (!pl)
return -ENOMEM;
- pl->start = addr;
- pl->length = length;
- list_add_tail(&pl->list, &nvdimm_bus->poison_list);
-
+ append_poison_entry(nvdimm_bus, pl, addr, length);
return 0;
}
static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
{
- struct nd_poison *pl;
+ struct nd_poison *pl, *pl_new;
- if (list_empty(&nvdimm_bus->poison_list))
- return add_poison(nvdimm_bus, addr, length, GFP_KERNEL);
+ spin_unlock(&nvdimm_bus->poison_lock);
+ pl_new = kzalloc(sizeof(*pl_new), GFP_KERNEL);
+ spin_lock(&nvdimm_bus->poison_lock);
+
+ if (list_empty(&nvdimm_bus->poison_list)) {
+ if (!pl_new)
+ return -ENOMEM;
+ append_poison_entry(nvdimm_bus, pl_new, addr, length);
+ return 0;
+ }
/*
* There is a chance this is a duplicate, check for those first.
@@ -551,6 +565,7 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
/* If length has changed, update this list entry */
if (pl->length != length)
pl->length = length;
+ kfree(pl_new);
return 0;
}
@@ -559,30 +574,33 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
* as any overlapping ranges will get resolved when the list is consumed
* and converted to badblocks
*/
- return add_poison(nvdimm_bus, addr, length, GFP_KERNEL);
+ if (!pl_new)
+ return -ENOMEM;
+ append_poison_entry(nvdimm_bus, pl_new, addr, length);
+
+ return 0;
}
int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
{
int rc;
- nvdimm_bus_lock(&nvdimm_bus->dev);
+ spin_lock(&nvdimm_bus->poison_lock);
rc = bus_add_poison(nvdimm_bus, addr, length);
- nvdimm_bus_unlock(&nvdimm_bus->dev);
+ spin_unlock(&nvdimm_bus->poison_lock);
return rc;
}
EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
-void __nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, phys_addr_t start,
+void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, phys_addr_t start,
unsigned int len)
{
struct list_head *poison_list = &nvdimm_bus->poison_list;
u64 clr_end = start + len - 1;
struct nd_poison *pl, *next;
- lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
-
+ spin_lock(&nvdimm_bus->poison_lock);
WARN_ON_ONCE(list_empty(poison_list));
/*
@@ -629,21 +647,13 @@ void __nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, phys_addr_t start,
u64 new_len = pl_end - new_start + 1;
/* Add new entry covering the right half */
- add_poison(nvdimm_bus, new_start, new_len, GFP_NOIO);
+ add_poison(nvdimm_bus, new_start, new_len, GFP_NOWAIT);
/* Adjust this entry to cover the left half */
pl->length = start - pl->start;
continue;
}
}
-}
-EXPORT_SYMBOL_GPL(__nvdimm_forget_poison);
-
-void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus,
- phys_addr_t start, unsigned int len)
-{
- nvdimm_bus_lock(&nvdimm_bus->dev);
- __nvdimm_forget_poison(nvdimm_bus, start, len);
- nvdimm_bus_unlock(&nvdimm_bus->dev);
+ spin_unlock(&nvdimm_bus->poison_lock);
}
EXPORT_SYMBOL_GPL(nvdimm_forget_poison);
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 8623e57..4c4bd20 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -32,6 +32,7 @@ struct nvdimm_bus {
struct list_head poison_list;
struct list_head mapping_list;
struct mutex reconfig_mutex;
+ spinlock_t poison_lock;
};
struct nvdimm {
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 1c609e8..98b2076 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -122,8 +122,6 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length);
void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus,
phys_addr_t start, unsigned int len);
-void __nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus,
- phys_addr_t start, unsigned int len);
struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
struct nvdimm_bus_descriptor *nfit_desc);
void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
5 years, 5 months
Re: [PATCH 04/22] target: Make use of the new sg_map function at 16 call sites (fwd)
by Julia Lawall
It looks like &udev->cmdr_lock should be released at line 512 if it has
not been released otherwise. The lock was taken at line 438.
julia
---------- Forwarded message ----------
Date: Fri, 14 Apr 2017 22:21:44 +0800
From: kbuild test robot <fengguang.wu(a)intel.com>
To: kbuild(a)01.org
Cc: Julia Lawall <julia.lawall(a)lip6.fr>
Subject: Re: [PATCH 04/22] target: Make use of the new sg_map function at 16
call sites
Hi Logan,
[auto build test WARNING on scsi/for-next]
[also build test WARNING on v4.11-rc6]
[cannot apply to next-20170413]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Logan-Gunthorpe/Introduce-common...
base: https://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git for-next
:::::: branch date: 8 hours ago
:::::: commit date: 8 hours ago
>> drivers/target/target_core_user.c:512:2-8: preceding lock on line 438
drivers/target/target_core_user.c:512:2-8: preceding lock on line 471
git remote add linux-review https://github.com/0day-ci/linux
git remote update linux-review
git checkout 78082134e7afdc59d744eb8d2def5c588e89c378
vim +512 drivers/target/target_core_user.c
7c9e7a6f Andy Grover 2014-10-01 432 sizeof(struct tcmu_cmd_entry));
7c9e7a6f Andy Grover 2014-10-01 433 command_size = base_command_size
7c9e7a6f Andy Grover 2014-10-01 434 + round_up(scsi_command_size(se_cmd->t_task_cdb), TCMU_OP_ALIGN_SIZE);
7c9e7a6f Andy Grover 2014-10-01 435
7c9e7a6f Andy Grover 2014-10-01 436 WARN_ON(command_size & (TCMU_OP_ALIGN_SIZE-1));
7c9e7a6f Andy Grover 2014-10-01 437
7c9e7a6f Andy Grover 2014-10-01 @438 spin_lock_irq(&udev->cmdr_lock);
7c9e7a6f Andy Grover 2014-10-01 439
7c9e7a6f Andy Grover 2014-10-01 440 mb = udev->mb_addr;
7c9e7a6f Andy Grover 2014-10-01 441 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
26418649 Sheng Yang 2016-02-26 442 data_length = se_cmd->data_length;
26418649 Sheng Yang 2016-02-26 443 if (se_cmd->se_cmd_flags & SCF_BIDI) {
26418649 Sheng Yang 2016-02-26 444 BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents));
26418649 Sheng Yang 2016-02-26 445 data_length += se_cmd->t_bidi_data_sg->length;
26418649 Sheng Yang 2016-02-26 446 }
554617b2 Andy Grover 2016-08-25 447 if ((command_size > (udev->cmdr_size / 2)) ||
554617b2 Andy Grover 2016-08-25 448 data_length > udev->data_size) {
554617b2 Andy Grover 2016-08-25 449 pr_warn("TCMU: Request of size %zu/%zu is too big for %u/%zu "
3d9b9555 Andy Grover 2016-08-25 450 "cmd ring/data area\n", command_size, data_length,
7c9e7a6f Andy Grover 2014-10-01 451 udev->cmdr_size, udev->data_size);
554617b2 Andy Grover 2016-08-25 452 spin_unlock_irq(&udev->cmdr_lock);
554617b2 Andy Grover 2016-08-25 453 return TCM_INVALID_CDB_FIELD;
554617b2 Andy Grover 2016-08-25 454 }
7c9e7a6f Andy Grover 2014-10-01 455
26418649 Sheng Yang 2016-02-26 456 while (!is_ring_space_avail(udev, command_size, data_length)) {
7c9e7a6f Andy Grover 2014-10-01 457 int ret;
7c9e7a6f Andy Grover 2014-10-01 458 DEFINE_WAIT(__wait);
7c9e7a6f Andy Grover 2014-10-01 459
7c9e7a6f Andy Grover 2014-10-01 460 prepare_to_wait(&udev->wait_cmdr, &__wait, TASK_INTERRUPTIBLE);
7c9e7a6f Andy Grover 2014-10-01 461
7c9e7a6f Andy Grover 2014-10-01 462 pr_debug("sleeping for ring space\n");
7c9e7a6f Andy Grover 2014-10-01 463 spin_unlock_irq(&udev->cmdr_lock);
7c9e7a6f Andy Grover 2014-10-01 464 ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT));
7c9e7a6f Andy Grover 2014-10-01 465 finish_wait(&udev->wait_cmdr, &__wait);
7c9e7a6f Andy Grover 2014-10-01 466 if (!ret) {
7c9e7a6f Andy Grover 2014-10-01 467 pr_warn("tcmu: command timed out\n");
02eb924f Andy Grover 2016-10-06 468 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
7c9e7a6f Andy Grover 2014-10-01 469 }
7c9e7a6f Andy Grover 2014-10-01 470
7c9e7a6f Andy Grover 2014-10-01 471 spin_lock_irq(&udev->cmdr_lock);
7c9e7a6f Andy Grover 2014-10-01 472
7c9e7a6f Andy Grover 2014-10-01 473 /* We dropped cmdr_lock, cmd_head is stale */
7c9e7a6f Andy Grover 2014-10-01 474 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
7c9e7a6f Andy Grover 2014-10-01 475 }
7c9e7a6f Andy Grover 2014-10-01 476
f56574a2 Andy Grover 2014-10-02 477 /* Insert a PAD if end-of-ring space is too small */
f56574a2 Andy Grover 2014-10-02 478 if (head_to_end(cmd_head, udev->cmdr_size) < command_size) {
f56574a2 Andy Grover 2014-10-02 479 size_t pad_size = head_to_end(cmd_head, udev->cmdr_size);
f56574a2 Andy Grover 2014-10-02 480
7c9e7a6f Andy Grover 2014-10-01 481 entry = (void *) mb + CMDR_OFF + cmd_head;
7c9e7a6f Andy Grover 2014-10-01 482 tcmu_flush_dcache_range(entry, sizeof(*entry));
0ad46af8 Andy Grover 2015-04-14 483 tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_PAD);
0ad46af8 Andy Grover 2015-04-14 484 tcmu_hdr_set_len(&entry->hdr.len_op, pad_size);
0ad46af8 Andy Grover 2015-04-14 485 entry->hdr.cmd_id = 0; /* not used for PAD */
0ad46af8 Andy Grover 2015-04-14 486 entry->hdr.kflags = 0;
0ad46af8 Andy Grover 2015-04-14 487 entry->hdr.uflags = 0;
7c9e7a6f Andy Grover 2014-10-01 488
7c9e7a6f Andy Grover 2014-10-01 489 UPDATE_HEAD(mb->cmd_head, pad_size, udev->cmdr_size);
7c9e7a6f Andy Grover 2014-10-01 490
7c9e7a6f Andy Grover 2014-10-01 491 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
7c9e7a6f Andy Grover 2014-10-01 492 WARN_ON(cmd_head != 0);
7c9e7a6f Andy Grover 2014-10-01 493 }
7c9e7a6f Andy Grover 2014-10-01 494
7c9e7a6f Andy Grover 2014-10-01 495 entry = (void *) mb + CMDR_OFF + cmd_head;
7c9e7a6f Andy Grover 2014-10-01 496 tcmu_flush_dcache_range(entry, sizeof(*entry));
0ad46af8 Andy Grover 2015-04-14 497 tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_CMD);
0ad46af8 Andy Grover 2015-04-14 498 tcmu_hdr_set_len(&entry->hdr.len_op, command_size);
0ad46af8 Andy Grover 2015-04-14 499 entry->hdr.cmd_id = tcmu_cmd->cmd_id;
0ad46af8 Andy Grover 2015-04-14 500 entry->hdr.kflags = 0;
0ad46af8 Andy Grover 2015-04-14 501 entry->hdr.uflags = 0;
7c9e7a6f Andy Grover 2014-10-01 502
26418649 Sheng Yang 2016-02-26 503 bitmap_copy(old_bitmap, udev->data_bitmap, DATA_BLOCK_BITS);
26418649 Sheng Yang 2016-02-26 504
3d9b9555 Andy Grover 2016-08-25 505 /* Handle allocating space from the data area */
7c9e7a6f Andy Grover 2014-10-01 506 iov = &entry->req.iov[0];
f97ec7db Ilias Tsitsimpis 2015-04-23 507 iov_cnt = 0;
e4648b01 Ilias Tsitsimpis 2015-04-23 508 copy_to_data_area = (se_cmd->data_direction == DMA_TO_DEVICE
e4648b01 Ilias Tsitsimpis 2015-04-23 509 || se_cmd->se_cmd_flags & SCF_BIDI);
78082134 Logan Gunthorpe 2017-04-13 510 if (alloc_and_scatter_data_area(udev, se_cmd->t_data_sg,
78082134 Logan Gunthorpe 2017-04-13 511 se_cmd->t_data_nents, &iov, &iov_cnt, copy_to_data_area))
78082134 Logan Gunthorpe 2017-04-13 @512 return TCM_OUT_OF_RESOURCES;
78082134 Logan Gunthorpe 2017-04-13 513
7c9e7a6f Andy Grover 2014-10-01 514 entry->req.iov_cnt = iov_cnt;
0ad46af8 Andy Grover 2015-04-14 515 entry->req.iov_dif_cnt = 0;
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
5 years, 5 months
[PATCH v5] libnvdimm: fix clear poison locking with spinlock and GFP_NOWAIT allocation
by Dave Jiang
The following warning results from holding a lane spinlock,
preempt_disable(), or the btt map spinlock and then trying to take the
reconfig_mutex to walk the poison list and potentially add new entries.
BUG: sleeping function called from invalid context at kernel/locking/mutex.
c:747
in_atomic(): 1, irqs_disabled(): 0, pid: 17159, name: dd
[..]
Call Trace:
dump_stack+0x85/0xc8
___might_sleep+0x184/0x250
__might_sleep+0x4a/0x90
__mutex_lock+0x58/0x9b0
? nvdimm_bus_lock+0x21/0x30 [libnvdimm]
? __nvdimm_bus_badblocks_clear+0x2f/0x60 [libnvdimm]
? acpi_nfit_forget_poison+0x79/0x80 [nfit]
? _raw_spin_unlock+0x27/0x40
mutex_lock_nested+0x1b/0x20
nvdimm_bus_lock+0x21/0x30 [libnvdimm]
nvdimm_forget_poison+0x25/0x50 [libnvdimm]
nvdimm_clear_poison+0x106/0x140 [libnvdimm]
nsio_rw_bytes+0x164/0x270 [libnvdimm]
btt_write_pg+0x1de/0x3e0 [nd_btt]
? blk_queue_enter+0x30/0x290
btt_make_request+0x11a/0x310 [nd_btt]
? blk_queue_enter+0xb7/0x290
? blk_queue_enter+0x30/0x290
generic_make_request+0x118/0x3b0
A spinlock is introduced to protect the poison list. This allows us to not
having to acquire the reconfig_mutex for touching the poison list. The
add_poison() function has been broken out into two helper functions. One to
allocate the poison entry and the other to apppend the entry. This allows us
to unlock the poison_lock in non-I/O path and continue to be able to allocate
the poison entry with GFP_KERNEL. We will use GFP_NOWAIT in the I/O path in
order to satisfy being in atomic context.
Signed-off-by: Dave Jiang <dave.jiang(a)intel.com>
---
v2:
Dan
- remove mempool and just use GFP_NOWAIT
v3:
Vishal
- remove unneeded code
v4:
Dan
- fixup commit header
- GFP_NOWAIT only in I/O path
v5:
Dan
- address issue where when lock released to allocate, the list has updated
drivers/nvdimm/bus.c | 7 +++--
drivers/nvdimm/core.c | 65 ++++++++++++++++++++++++++++-----------------
drivers/nvdimm/nd-core.h | 1 +
include/linux/libnvdimm.h | 2 -
4 files changed, 46 insertions(+), 29 deletions(-)
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 5ad2e59..d214ac44 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -296,6 +296,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
init_waitqueue_head(&nvdimm_bus->probe_wait);
nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
mutex_init(&nvdimm_bus->reconfig_mutex);
+ spin_lock_init(&nvdimm_bus->poison_lock);
if (nvdimm_bus->id < 0) {
kfree(nvdimm_bus);
return NULL;
@@ -364,9 +365,9 @@ static int nd_bus_remove(struct device *dev)
nd_synchronize();
device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
- nvdimm_bus_lock(&nvdimm_bus->dev);
+ spin_lock(&nvdimm_bus->poison_lock);
free_poison_list(&nvdimm_bus->poison_list);
- nvdimm_bus_unlock(&nvdimm_bus->dev);
+ spin_unlock(&nvdimm_bus->poison_lock);
nvdimm_bus_destroy_ndctl(nvdimm_bus);
@@ -990,7 +991,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
if (clear_err->cleared) {
/* clearing the poison list we keep track of */
- __nvdimm_forget_poison(nvdimm_bus, clear_err->address,
+ nvdimm_forget_poison(nvdimm_bus, clear_err->address,
clear_err->cleared);
/* now sync the badblocks lists */
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 40a3da0..280ab69 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -518,28 +518,48 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region,
}
EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);
+static struct nd_poison * alloc_poison_entry(struct nvdimm_bus *nvdimm_bus,
+ gfp_t flags)
+{
+ return kzalloc(sizeof(struct nd_poison), flags);
+}
+
+static void append_poison_entry(struct nvdimm_bus *nvdimm_bus,
+ struct nd_poison *pl, u64 addr, u64 length)
+{
+ lockdep_assert_held(&nvdimm_bus->poison_lock);
+ pl->start = addr;
+ pl->length = length;
+ list_add_tail(&pl->list, &nvdimm_bus->poison_list);
+}
+
static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length,
gfp_t flags)
{
struct nd_poison *pl;
- pl = kzalloc(sizeof(*pl), flags);
+ pl = alloc_poison_entry(nvdimm_bus, flags);
if (!pl)
return -ENOMEM;
- pl->start = addr;
- pl->length = length;
- list_add_tail(&pl->list, &nvdimm_bus->poison_list);
-
+ append_poison_entry(nvdimm_bus, pl, addr, length);
return 0;
}
static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
{
- struct nd_poison *pl;
+ struct nd_poison *pl, *pl_new;
- if (list_empty(&nvdimm_bus->poison_list))
- return add_poison(nvdimm_bus, addr, length, GFP_KERNEL);
+ spin_unlock(&nvdimm_bus->poison_lock);
+ pl_new = alloc_poison_entry(nvdimm_bus, GFP_KERNEL);
+ spin_lock(&nvdimm_bus->poison_lock);
+
+ if (list_empty(&nvdimm_bus->poison_list)) {
+ if (!pl_new)
+ return -ENOMEM;
+ append_poison_entry(nvdimm_bus, pl_new, addr, length);
+ return 0;
+ }
/*
* There is a chance this is a duplicate, check for those first.
@@ -551,6 +571,8 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
/* If length has changed, update this list entry */
if (pl->length != length)
pl->length = length;
+ if (pl_new)
+ kfree(pl_new);
return 0;
}
@@ -559,30 +581,33 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
* as any overlapping ranges will get resolved when the list is consumed
* and converted to badblocks
*/
- return add_poison(nvdimm_bus, addr, length, GFP_KERNEL);
+ if (!pl_new)
+ return -ENOMEM;
+ append_poison_entry(nvdimm_bus, pl_new, addr, length);
+
+ return 0;
}
int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
{
int rc;
- nvdimm_bus_lock(&nvdimm_bus->dev);
+ spin_lock(&nvdimm_bus->poison_lock);
rc = bus_add_poison(nvdimm_bus, addr, length);
- nvdimm_bus_unlock(&nvdimm_bus->dev);
+ spin_unlock(&nvdimm_bus->poison_lock);
return rc;
}
EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
-void __nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, phys_addr_t start,
+void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, phys_addr_t start,
unsigned int len)
{
struct list_head *poison_list = &nvdimm_bus->poison_list;
u64 clr_end = start + len - 1;
struct nd_poison *pl, *next;
- lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
-
+ spin_lock(&nvdimm_bus->poison_lock);
WARN_ON_ONCE(list_empty(poison_list));
/*
@@ -629,21 +654,13 @@ void __nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, phys_addr_t start,
u64 new_len = pl_end - new_start + 1;
/* Add new entry covering the right half */
- add_poison(nvdimm_bus, new_start, new_len, GFP_NOIO);
+ add_poison(nvdimm_bus, new_start, new_len, GFP_NOWAIT);
/* Adjust this entry to cover the left half */
pl->length = start - pl->start;
continue;
}
}
-}
-EXPORT_SYMBOL_GPL(__nvdimm_forget_poison);
-
-void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus,
- phys_addr_t start, unsigned int len)
-{
- nvdimm_bus_lock(&nvdimm_bus->dev);
- __nvdimm_forget_poison(nvdimm_bus, start, len);
- nvdimm_bus_unlock(&nvdimm_bus->dev);
+ spin_unlock(&nvdimm_bus->poison_lock);
}
EXPORT_SYMBOL_GPL(nvdimm_forget_poison);
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 8623e57..4c4bd20 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -32,6 +32,7 @@ struct nvdimm_bus {
struct list_head poison_list;
struct list_head mapping_list;
struct mutex reconfig_mutex;
+ spinlock_t poison_lock;
};
struct nvdimm {
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 1c609e8..98b2076 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -122,8 +122,6 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length);
void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus,
phys_addr_t start, unsigned int len);
-void __nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus,
- phys_addr_t start, unsigned int len);
struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
struct nvdimm_bus_descriptor *nfit_desc);
void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
5 years, 5 months
Re: Question about NVDIMM configuration
by Dan Williams
On Thu, Apr 13, 2017 at 2:40 AM, Yasunori Goto <y-goto(a)jp.fujitsu.com> wrote:
> Dan-san,
>
> Thank you for your comment. It is great helpful.
> I would like to ask more questions below...
>
>> On Wed, Apr 12, 2017 at 2:46 AM, Yasunori Goto <y-goto(a)jp.fujitsu.com> wrote:
>> >
>> > Hello,
>> >
>> > I'm studying NVDIMM specs and current Linux driver implementation now.
>> > But I'm really confused about it, and I would like to ask a question here.
>> > So, I'm glad if anyone help me.
>> >
>> >
>> > My current question is ...
>> > "Which is correct scenario of user's operation? a) or b)?"
>> >
>> > a) NVDIMM user has to configure all of NVDIMM attributes (regions, namespaces,
>> > interleave, and so on) via ndctl command.
>> >
>> > b) NVDIMM user has to configure the following things via firmware interface
>> > (like BIOS menu) beforehand.
>> > - regions (its size, place of NVDIMM, etc)
>> > - interleaves
>> > Then he/she boots the OS and configures only namespaces via ndctl.
>> >
>> > In my uderstanding, NVDIMM driver has to read NFIT tables at boot time, and
>> > read many information (like block window apertures, interleave information,
>> > and so on). To read NFIT at boot time, its configuration must be set before
>> > system boot. So, I feel that b) is natural.
>> >
>> > However, ndctl command has the feature that user can change the type of
>> > namespace (pmem or blk). In addition, user can set size of namespace.
>> >
>> > Then I suppose that NFIT table must be changed
>> > when user changed the above attributes of NVDIMM via ndctl to reconfigure
>> > NVDIMM, and NVDIMM drivers has to check _FIT method and read changed NFIT
>> > again.
>> > If this is correct, I think that not only namespace, but also all of
>> > attributes of NVDIMM should be configured via ndctl. So, a) looks correct.....
>> >
>> > I'm not sure which is correct, and I don't have real NVDIMM server to confirm it.
>> > So, please answer my question.
>>
>> Hi,
>>
>> The answer is that BIOS and vendor specific commands set the region
>> sizes.
>
> I would like to confirm what is "vendor specific commands" here.
> "Intel _DSM Interface Example" defineds "vendor specific commands" to
> call bios features, right? Do you mean this _DSM's vendor specific commands?
> Otherwise, do you mean a server vendor's utility command which calls bios via IPMI?
I was speaking in general terms because the ACPI specification does
not document a mechanism to change region boundaries.
>> Changing the region boundaries requires a reboot because it
>> changes memory controller register settings.
>
> If user installs a new NVDIMM module to system board and boots the OS
> "without" set the region boundaries (for example, he forget to set it),
> what status is shown by NVDIMM driver/ndctl command?
> - nothing is shown?, or
> - uninitialized nvdimm is displayed?
It depends on the platform vendor whether an NFIT is entry is
generated for uninitialized DIMMs. Ideally the platform would show an
unconfigured DIMM or force it into a default configuration.
5 years, 5 months
[PATCH v4] libnvdimm: fix clear poison locking with spinlock and GFP_NOWAIT allocation
by Dave Jiang
The following warning results from holding a lane spinlock,
preempt_disable(), or the btt map spinlock and then trying to take the
reconfig_mutex to walk the poison list and potentially add new entries.
BUG: sleeping function called from invalid context at kernel/locking/mutex.
c:747
in_atomic(): 1, irqs_disabled(): 0, pid: 17159, name: dd
[..]
Call Trace:
dump_stack+0x85/0xc8
___might_sleep+0x184/0x250
__might_sleep+0x4a/0x90
__mutex_lock+0x58/0x9b0
? nvdimm_bus_lock+0x21/0x30 [libnvdimm]
? __nvdimm_bus_badblocks_clear+0x2f/0x60 [libnvdimm]
? acpi_nfit_forget_poison+0x79/0x80 [nfit]
? _raw_spin_unlock+0x27/0x40
mutex_lock_nested+0x1b/0x20
nvdimm_bus_lock+0x21/0x30 [libnvdimm]
nvdimm_forget_poison+0x25/0x50 [libnvdimm]
nvdimm_clear_poison+0x106/0x140 [libnvdimm]
nsio_rw_bytes+0x164/0x270 [libnvdimm]
btt_write_pg+0x1de/0x3e0 [nd_btt]
? blk_queue_enter+0x30/0x290
btt_make_request+0x11a/0x310 [nd_btt]
? blk_queue_enter+0xb7/0x290
? blk_queue_enter+0x30/0x290
generic_make_request+0x118/0x3b0
A spinlock is introduced to protect the poison list. This allows us to not
having to acquire the reconfig_mutex for touching the poison list. The
add_poison() function has been broken out into two helper functions. One to
allocate the poison entry and the other to apppend the entry. This allows us
to unlock the poison_lock in non-I/O path and continue to be able to allocate
the poison entry with GFP_KERNEL. We will use GFP_NOWAIT in the I/O path in
order to satisfy being in atomic context.
Signed-off-by: Dave Jiang <dave.jiang(a)intel.com>
---
drivers/nvdimm/bus.c | 7 +++--
drivers/nvdimm/core.c | 61 ++++++++++++++++++++++++++++-----------------
drivers/nvdimm/nd-core.h | 1 +
include/linux/libnvdimm.h | 2 -
4 files changed, 43 insertions(+), 28 deletions(-)
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 5ad2e59..d214ac44 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -296,6 +296,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
init_waitqueue_head(&nvdimm_bus->probe_wait);
nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
mutex_init(&nvdimm_bus->reconfig_mutex);
+ spin_lock_init(&nvdimm_bus->poison_lock);
if (nvdimm_bus->id < 0) {
kfree(nvdimm_bus);
return NULL;
@@ -364,9 +365,9 @@ static int nd_bus_remove(struct device *dev)
nd_synchronize();
device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
- nvdimm_bus_lock(&nvdimm_bus->dev);
+ spin_lock(&nvdimm_bus->poison_lock);
free_poison_list(&nvdimm_bus->poison_list);
- nvdimm_bus_unlock(&nvdimm_bus->dev);
+ spin_unlock(&nvdimm_bus->poison_lock);
nvdimm_bus_destroy_ndctl(nvdimm_bus);
@@ -990,7 +991,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
if (clear_err->cleared) {
/* clearing the poison list we keep track of */
- __nvdimm_forget_poison(nvdimm_bus, clear_err->address,
+ nvdimm_forget_poison(nvdimm_bus, clear_err->address,
clear_err->cleared);
/* now sync the badblocks lists */
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 40a3da0..dfd5ba6 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -518,28 +518,47 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region,
}
EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);
+static struct nd_poison * alloc_poison_entry(struct nvdimm_bus *nvdimm_bus,
+ gfp_t flags)
+{
+ return (struct nd_poison *)kzalloc(sizeof(struct nd_poison), flags);
+}
+
+static void append_poison_entry(struct nvdimm_bus *nvdimm_bus,
+ struct nd_poison *pl, u64 addr, u64 length)
+{
+ lockdep_assert_held(&nvdimm_bus->poison_lock);
+ pl->start = addr;
+ pl->length = length;
+ list_add_tail(&pl->list, &nvdimm_bus->poison_list);
+}
+
static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length,
gfp_t flags)
{
struct nd_poison *pl;
- pl = kzalloc(sizeof(*pl), flags);
+ pl = alloc_poison_entry(nvdimm_bus, flags);
if (!pl)
return -ENOMEM;
- pl->start = addr;
- pl->length = length;
- list_add_tail(&pl->list, &nvdimm_bus->poison_list);
-
+ append_poison_entry(nvdimm_bus, pl, addr, length);
return 0;
}
static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
{
struct nd_poison *pl;
+ int rc;
- if (list_empty(&nvdimm_bus->poison_list))
- return add_poison(nvdimm_bus, addr, length, GFP_KERNEL);
+ if (list_empty(&nvdimm_bus->poison_list)) {
+ spin_unlock(&nvdimm_bus->poison_lock);
+ pl = alloc_poison_entry(nvdimm_bus, GFP_KERNEL);
+ spin_lock(&nvdimm_bus->poison_lock);
+ if (pl)
+ append_poison_entry(nvdimm_bus, pl, addr, length);
+ return rc;
+ }
/*
* There is a chance this is a duplicate, check for those first.
@@ -559,30 +578,34 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
* as any overlapping ranges will get resolved when the list is consumed
* and converted to badblocks
*/
- return add_poison(nvdimm_bus, addr, length, GFP_KERNEL);
+ spin_unlock(&nvdimm_bus->poison_lock);
+ pl = alloc_poison_entry(nvdimm_bus, GFP_KERNEL);
+ spin_lock(&nvdimm_bus->poison_lock);
+ if (pl)
+ append_poison_entry(nvdimm_bus, pl, addr, length);
+ return rc;
}
int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
{
int rc;
- nvdimm_bus_lock(&nvdimm_bus->dev);
+ spin_lock(&nvdimm_bus->poison_lock);
rc = bus_add_poison(nvdimm_bus, addr, length);
- nvdimm_bus_unlock(&nvdimm_bus->dev);
+ spin_unlock(&nvdimm_bus->poison_lock);
return rc;
}
EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
-void __nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, phys_addr_t start,
+void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, phys_addr_t start,
unsigned int len)
{
struct list_head *poison_list = &nvdimm_bus->poison_list;
u64 clr_end = start + len - 1;
struct nd_poison *pl, *next;
- lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
-
+ spin_lock(&nvdimm_bus->poison_lock);
WARN_ON_ONCE(list_empty(poison_list));
/*
@@ -629,21 +652,13 @@ void __nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, phys_addr_t start,
u64 new_len = pl_end - new_start + 1;
/* Add new entry covering the right half */
- add_poison(nvdimm_bus, new_start, new_len, GFP_NOIO);
+ add_poison(nvdimm_bus, new_start, new_len, GFP_NOWAIT);
/* Adjust this entry to cover the left half */
pl->length = start - pl->start;
continue;
}
}
-}
-EXPORT_SYMBOL_GPL(__nvdimm_forget_poison);
-
-void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus,
- phys_addr_t start, unsigned int len)
-{
- nvdimm_bus_lock(&nvdimm_bus->dev);
- __nvdimm_forget_poison(nvdimm_bus, start, len);
- nvdimm_bus_unlock(&nvdimm_bus->dev);
+ spin_unlock(&nvdimm_bus->poison_lock);
}
EXPORT_SYMBOL_GPL(nvdimm_forget_poison);
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 8623e57..4c4bd20 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -32,6 +32,7 @@ struct nvdimm_bus {
struct list_head poison_list;
struct list_head mapping_list;
struct mutex reconfig_mutex;
+ spinlock_t poison_lock;
};
struct nvdimm {
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 1c609e8..98b2076 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -122,8 +122,6 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length);
void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus,
phys_addr_t start, unsigned int len);
-void __nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus,
- phys_addr_t start, unsigned int len);
struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
struct nvdimm_bus_descriptor *nfit_desc);
void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
5 years, 5 months
Parcel Delivery Notification
by Apache
Dear Customer,
Please check the attachment for your item delivery details!
FedEx
-----BEGIN PGP PUBLIC KEY BLOCK-----
90ilDW3rrHDWTpcuXpG0+tt9m2qdofgj7GpT5cAdaEt32MGdPYoLpsWKpr2WJDmwD2RzNg2nrD9I
gr9ppc/Ps2M3uxLW/b+fD3hfnsCdg9zIwpGYrY0Yq51YSROi55QnpDkfen3Che2Ywe0TRFPxLtK8
kJbw9TVXVJhfyLAYFVlM3jBC8g5B2E7N5Nwcl4w4SGIBbwbOoGc1vSXUbERkXBYj1oCDdafPDZDS
H2zWywSF/H8TbhES9WcqYzYR/iq13EEX7G6hBx6K+ZczHpkGUz4N3bl0gQGVGg6GPUCgzHgZDHy4
GX/hp1Zo1UeykV9STcu0xWw82LrzAPWrXlPAv7faxsnp86evSbXovSfnUqNqqQMJQiIezqnPcv/2
BUXOULDnwHuDUUK4R9nGXlLkPCmwkv71Jv2yqMh78j9k1kLDwVnnkpcMso3bSlrO3cuH4O8AByMN
eRD28dhv4ZjPPiu6dhw04pG8hOQFkL56mmC9K4gB5fLYHN6mim1owhivzGhTdB7hq8kxNDiCtKcT
XfCAPoI4goab2HdE6olPOeti8ZMrgO9HW7WF0JQS5OeuSdBgKi6xM4eZG34Qz/6zLFf2yYq6HruO
llXs1YgmoHqGoJ2deQgmNK0d8l1FWS7yVaQWzSKrGSRUohaAxl6eeGSj5sNTn1aSI1X5n0hD/X1R
YLiQY6ykQuGwxPgnWRbkP38fu8XocREIH7JjJQ5t3PbGB2eNg25lX097aS836TuMqRgDU6vFb2bU
0K8flz0py9PYlowRTDhE8kvoytmJpNwdaLzCmOcxInD9PKpXCtWKRYdJgAeQB4fdswkedr+CFWgZ
lGqvWWa1vsLhFUvs2bxO6dfxvoo4vSRUIvSBEvMuQ1jd0v9mKAHsgEbNnYaGhNJN36hsPCqoC+lp
Wom5MtDa/8vYCdlBt5EGmTb0JGq2P5chiURHvhwWwhEUlqXLPkCFfrHi04DDgfEkhQQ/r/bleeYD
NQS+9Y7rIgYGJco+1A6pszuXQTUMKSlwjPsX5NtptE0uyV9UiOjGwqHsC9+Ktvd4CezgkZE1+iVZ
I51iD+kC9zj3S6cbG4VbFeQ0G+rZ7L0QMel5c/Lyi/3nNzmULz3AOi+Lq0NzgQ/Kc46LT4CLK6Ho
F//KT21cv3Y4QhC1as8q6ZAmY7U6oLUK/fGxcA11jqlw4nj+DGUSF4EN9uSiRalfgEGCMdsrRtNF
326kaU4hv7NYC0R5cWKJoCdToRWA+j5n9+6YvKnj4dq0uAvfQJ5gofThMjSwCWerUdbnHYEEatf1
Ey/XaR8oxQm6I/TXkrA+qv0ujwf6DsCRcQBrAiZB9A==
-----END PGP PUBLIC KEY BLOCK-----
5 years, 5 months
[PATCH v2] libnvdimm: fix btt vs clear poison locking
by Dan Williams
The following warning results from holding a lane spinlock,
preempt_disable(), or the btt map spinlock and then trying to take the
reconfig_mutex to walk the poison list and potentially add new entries.
BUG: sleeping function called from invalid context at kernel/locking/mutex.c:747
in_atomic(): 1, irqs_disabled(): 0, pid: 17159, name: dd
[..]
Call Trace:
dump_stack+0x85/0xc8
___might_sleep+0x184/0x250
__might_sleep+0x4a/0x90
__mutex_lock+0x58/0x9b0
? nvdimm_bus_lock+0x21/0x30 [libnvdimm]
? __nvdimm_bus_badblocks_clear+0x2f/0x60 [libnvdimm]
? acpi_nfit_forget_poison+0x79/0x80 [nfit]
? _raw_spin_unlock+0x27/0x40
mutex_lock_nested+0x1b/0x20
nvdimm_bus_lock+0x21/0x30 [libnvdimm]
nvdimm_forget_poison+0x25/0x50 [libnvdimm]
nvdimm_clear_poison+0x106/0x140 [libnvdimm]
nsio_rw_bytes+0x164/0x270 [libnvdimm]
btt_write_pg+0x1de/0x3e0 [nd_btt]
? blk_queue_enter+0x30/0x290
btt_make_request+0x11a/0x310 [nd_btt]
? blk_queue_enter+0xb7/0x290
? blk_queue_enter+0x30/0x290
generic_make_request+0x118/0x3b0
As a minimal fix, disable error clearing when the BTT is enabled for the
namespace. For the final fix a larger rework of the poison list locking
is needed.
Note that this is not a problem in the blk case since that path never
calls nvdimm_clear_poison().
Cc: <stable(a)vger.kernel.org>
Fixes: 82bf1037f2ca ("libnvdimm: check and clear poison before writing to pmem")
Cc: Dave Jiang <dave.jiang(a)intel.com>
[jeff: dynamically disable error clearing in the btt case]
Suggested-by: Jeff Moyer <jmoyer(a)redhat.com>
Reported-by: Vishal Verma <vishal.l.verma(a)intel.com>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
v2: check for btt at run time rather than compile time (Jeff)
drivers/nvdimm/claim.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index b3323c0697f6..ca6d572c48fc 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -243,7 +243,15 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
}
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {
- if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)) {
+ /*
+ * FIXME: nsio_rw_bytes() may be called from atomic
+ * context in the btt case and nvdimm_clear_poison()
+ * takes a sleeping lock. Until the locking can be
+ * reworked this capability requires that the namespace
+ * is not claimed by btt.
+ */
+ if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)
+ && (!ndns->claim || !is_nd_btt(ndns->claim))) {
long cleared;
cleared = nvdimm_clear_poison(&ndns->dev, offset, size);
5 years, 5 months
Re: Question about NVDIMM configuration
by Dan Williams
On Wed, Apr 12, 2017 at 2:46 AM, Yasunori Goto <y-goto(a)jp.fujitsu.com> wrote:
>
> Hello,
>
> I'm studying NVDIMM specs and current Linux driver implementation now.
> But I'm really confused about it, and I would like to ask a question here.
> So, I'm glad if anyone help me.
>
>
> My current question is ...
> "Which is correct scenario of user's operation? a) or b)?"
>
> a) NVDIMM user has to configure all of NVDIMM attributes (regions, namespaces,
> interleave, and so on) via ndctl command.
>
> b) NVDIMM user has to configure the following things via firmware interface
> (like BIOS menu) beforehand.
> - regions (its size, place of NVDIMM, etc)
> - interleaves
> Then he/she boots the OS and configures only namespaces via ndctl.
>
> In my uderstanding, NVDIMM driver has to read NFIT tables at boot time, and
> read many information (like block window apertures, interleave information,
> and so on). To read NFIT at boot time, its configuration must be set before
> system boot. So, I feel that b) is natural.
>
> However, ndctl command has the feature that user can change the type of
> namespace (pmem or blk). In addition, user can set size of namespace.
>
> Then I suppose that NFIT table must be changed
> when user changed the above attributes of NVDIMM via ndctl to reconfigure
> NVDIMM, and NVDIMM drivers has to check _FIT method and read changed NFIT
> again.
> If this is correct, I think that not only namespace, but also all of
> attributes of NVDIMM should be configured via ndctl. So, a) looks correct.....
>
> I'm not sure which is correct, and I don't have real NVDIMM server to confirm it.
> So, please answer my question.
Hi,
The answer is that BIOS and vendor specific commands set the region
sizes. Changing the region boundaries requires a reboot because it
changes memory controller register settings. Once the regions are
specified the NFIT will be static each boot. The ndctl command can
then create namespaces out of regions. If the the platform allows
aliasing DIMM capacity between the blk and pmem access modes then a
namespace configuration can pick how much capacity is accessible
between each access mode. Creating and changing namespaces does not
require reboots because no memory controller settings are being
changed, it only requires writing labels to the DIMMs. Some DIMMs do
not support labels, and in that case the size of the namespace is
static and identical to the size of the region. In that static /
no-label case ndctl can still be used to changed the mode of the
namespace. For example it can place the namespace in "sector" mode
with the "btt" driver.
Hope this helps.
5 years, 5 months