[PATCH AUTOSEL 4.19 25/36] libnvdimm: Fix compilation warnings with W=1
by Sasha Levin
From: Qian Cai <cai(a)lca.pw>
[ Upstream commit c01dafad77fea8d64c4fdca0a6031c980842ad65 ]
Several places (dimm_devs.c, core.c etc) include label.h but only
label.c uses NSINDEX_SIGNATURE, so move its definition to label.c
instead.
In file included from drivers/nvdimm/dimm_devs.c:23:
drivers/nvdimm/label.h:41:19: warning: 'NSINDEX_SIGNATURE' defined but
not used [-Wunused-const-variable=]
Also, some places abuse "/**" which is only reserved for the kernel-doc.
drivers/nvdimm/bus.c:648: warning: cannot understand function prototype:
'struct attribute_group nd_device_attribute_group = '
drivers/nvdimm/bus.c:677: warning: cannot understand function prototype:
'struct attribute_group nd_numa_attribute_group = '
Those are just some member assignments for the "struct attribute_group"
instances and it can't be expressed in the kernel-doc.
Reviewed-by: Vishal Verma <vishal.l.verma(a)intel.com>
Signed-off-by: Qian Cai <cai(a)lca.pw>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/nvdimm/bus.c | 4 ++--
drivers/nvdimm/label.c | 2 ++
drivers/nvdimm/label.h | 2 --
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 9148015ed803..a3132a9eb91c 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -612,7 +612,7 @@ static struct attribute *nd_device_attributes[] = {
NULL,
};
-/**
+/*
* nd_device_attribute_group - generic attributes for all devices on an nd bus
*/
struct attribute_group nd_device_attribute_group = {
@@ -641,7 +641,7 @@ static umode_t nd_numa_attr_visible(struct kobject *kobj, struct attribute *a,
return a->mode;
}
-/**
+/*
* nd_numa_attribute_group - NUMA attributes for all devices on an nd bus
*/
struct attribute_group nd_numa_attribute_group = {
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index 452ad379ed70..9f1b7e3153f9 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -25,6 +25,8 @@ static guid_t nvdimm_btt2_guid;
static guid_t nvdimm_pfn_guid;
static guid_t nvdimm_dax_guid;
+static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0";
+
static u32 best_seq(u32 a, u32 b)
{
a &= NSINDEX_SEQ_MASK;
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
index 18bbe183b3a9..52f9fcada00a 100644
--- a/drivers/nvdimm/label.h
+++ b/drivers/nvdimm/label.h
@@ -38,8 +38,6 @@ enum {
ND_NSINDEX_INIT = 0x1,
};
-static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0";
-
/**
* struct nd_namespace_index - label set superblock
* @sig: NAMESPACE_INDEX\0
--
2.20.1
1 year, 7 months
[PATCH AUTOSEL 5.1 39/60] libnvdimm: Fix compilation warnings with W=1
by Sasha Levin
From: Qian Cai <cai(a)lca.pw>
[ Upstream commit c01dafad77fea8d64c4fdca0a6031c980842ad65 ]
Several places (dimm_devs.c, core.c etc) include label.h but only
label.c uses NSINDEX_SIGNATURE, so move its definition to label.c
instead.
In file included from drivers/nvdimm/dimm_devs.c:23:
drivers/nvdimm/label.h:41:19: warning: 'NSINDEX_SIGNATURE' defined but
not used [-Wunused-const-variable=]
Also, some places abuse "/**" which is only reserved for the kernel-doc.
drivers/nvdimm/bus.c:648: warning: cannot understand function prototype:
'struct attribute_group nd_device_attribute_group = '
drivers/nvdimm/bus.c:677: warning: cannot understand function prototype:
'struct attribute_group nd_numa_attribute_group = '
Those are just some member assignments for the "struct attribute_group"
instances and it can't be expressed in the kernel-doc.
Reviewed-by: Vishal Verma <vishal.l.verma(a)intel.com>
Signed-off-by: Qian Cai <cai(a)lca.pw>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/nvdimm/bus.c | 4 ++--
drivers/nvdimm/label.c | 2 ++
drivers/nvdimm/label.h | 2 --
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 7bbff0af29b2..99d5892ea98a 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -642,7 +642,7 @@ static struct attribute *nd_device_attributes[] = {
NULL,
};
-/**
+/*
* nd_device_attribute_group - generic attributes for all devices on an nd bus
*/
struct attribute_group nd_device_attribute_group = {
@@ -671,7 +671,7 @@ static umode_t nd_numa_attr_visible(struct kobject *kobj, struct attribute *a,
return a->mode;
}
-/**
+/*
* nd_numa_attribute_group - NUMA attributes for all devices on an nd bus
*/
struct attribute_group nd_numa_attribute_group = {
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index 2030805aa216..edf278067e72 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -25,6 +25,8 @@ static guid_t nvdimm_btt2_guid;
static guid_t nvdimm_pfn_guid;
static guid_t nvdimm_dax_guid;
+static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0";
+
static u32 best_seq(u32 a, u32 b)
{
a &= NSINDEX_SEQ_MASK;
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
index e9a2ad3c2150..4bb7add39580 100644
--- a/drivers/nvdimm/label.h
+++ b/drivers/nvdimm/label.h
@@ -38,8 +38,6 @@ enum {
ND_NSINDEX_INIT = 0x1,
};
-static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0";
-
/**
* struct nd_namespace_index - label set superblock
* @sig: NAMESPACE_INDEX\0
--
2.20.1
1 year, 7 months
[PATCH v3 00/18] kunit: introduce KUnit, the Linux kernel unit testing framework
by Brendan Higgins
## TLDR
I mostly wanted to incorporate feedback I got over the last week and a
half.
Biggest things to look out for:
- KUnit core now outputs results in TAP14.
- Heavily reworked tools/testing/kunit/kunit.py
- Changed how parsing works.
- Added testing.
- Greg, Logan, you might want to re-review this.
- Added documentation on how to use KUnit on non-UML kernels. You can
see the docs rendered here[1].
There is still some discussion going on on the [PATCH v2 00/17] thread,
but I wanted to get some of these updates out before they got too stale
(and too difficult for me to keep track of). I hope no one minds.
## Background
This patch set proposes KUnit, a lightweight unit testing and mocking
framework for the Linux kernel.
Unlike Autotest and kselftest, KUnit is a true unit testing framework;
it does not require installing the kernel on a test machine or in a VM
(however, KUnit still allows you to run tests on test machines or in VMs
if you want) and does not require tests to be written in userspace
running on a host kernel. Additionally, KUnit is fast: From invocation
to completion KUnit can run several dozen tests in under a second.
Currently, the entire KUnit test suite for KUnit runs in under a second
from the initial invocation (build time excluded).
KUnit is heavily inspired by JUnit, Python's unittest.mock, and
Googletest/Googlemock for C++. KUnit provides facilities for defining
unit test cases, grouping related test cases into test suites, providing
common infrastructure for running tests, mocking, spying, and much more.
## What's so special about unit testing?
A unit test is supposed to test a single unit of code in isolation,
hence the name. There should be no dependencies outside the control of
the test; this means no external dependencies, which makes tests orders
of magnitudes faster. Likewise, since there are no external dependencies,
there are no hoops to jump through to run the tests. Additionally, this
makes unit tests deterministic: a failing unit test always indicates a
problem. Finally, because unit tests necessarily have finer granularity,
they are able to test all code paths easily solving the classic problem
of difficulty in exercising error handling code.
## Is KUnit trying to replace other testing frameworks for the kernel?
No. Most existing tests for the Linux kernel are end-to-end tests, which
have their place. A well tested system has lots of unit tests, a
reasonable number of integration tests, and some end-to-end tests. KUnit
is just trying to address the unit test space which is currently not
being addressed.
## More information on KUnit
There is a bunch of documentation near the end of this patch set that
describes how to use KUnit and best practices for writing unit tests.
For convenience I am hosting the compiled docs here[2].
Additionally for convenience, I have applied these patches to a
branch[3].
The repo may be cloned with:
git clone https://kunit.googlesource.com/linux
This patchset is on the kunit/rfc/v5.1/v3 branch.
## Changes Since Last Version
- Converted KUnit core to print test results in TAP14 format as
suggested by Greg and Frank.
- Heavily reworked tools/testing/kunit/kunit.py
- Changed how parsing works.
- Added testing.
- Added documentation on how to use KUnit on non-UML kernels. You can
see the docs rendered here[1].
- Added a new set of EXPECTs and ASSERTs for pointer comparison.
- Removed more function indirection as suggested by Logan.
- Added a new patch that adds `kunit_try_catch_throw` to objtool's
noreturn list.
- Fixed a number of minorish issues pointed out by Shuah, Masahiro, and
kbuild bot.
[1] https://google.github.io/kunit-docs/third_party/kernel/docs/usage.html#ku...
[2] https://google.github.io/kunit-docs/third_party/kernel/docs/
[3] https://kunit.googlesource.com/linux/+/kunit/rfc/v5.1/v3
--
2.21.0.1020.gf2820cf01a-goog
1 year, 7 months
[PATCH v8 00/12] mm: Sub-section memory hotplug support
by Dan Williams
Changes since v7 [1]:
- Make subsection helpers pfn based rather than physical-address based
(Oscar and Pavel)
- Make subsection bitmap definition scalable for different section and
sub-section sizes across architectures. As a result:
unsigned long map_active
...is converted to:
DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION)
...and the helpers are renamed with a 'subsection' prefix. (Pavel)
- New in this version is a touch of arch/powerpc/include/asm/sparsemem.h
in "[PATCH v8 01/12] mm/sparsemem: Introduce struct mem_section_usage"
to define ARCH_SUBSECTION_SHIFT.
- Drop "mm/sparsemem: Introduce common definitions for the size and mask
of a section" in favor of Robin's "mm/memremap: Rename and consolidate
SECTION_SIZE" (Pavel)
- Collect some more Reviewed-by tags. Patches that still lack review
tags: 1, 3, 9 - 12
[1]: https://lore.kernel.org/lkml/155677652226.2336373.8700273400832001094.stg...
---
[merge logistics]
Hi Andrew,
These are too late for v5.2, I'm posting this v8 during the merge window
to maintain the review momentum.
---
[cover letter]
The memory hotplug section is an arbitrary / convenient unit for memory
hotplug. 'Section-size' units have bled into the user interface
('memblock' sysfs) and can not be changed without breaking existing
userspace. The section-size constraint, while mostly benign for typical
memory hotplug, has and continues to wreak havoc with 'device-memory'
use cases, persistent memory (pmem) in particular. Recall that pmem uses
devm_memremap_pages(), and subsequently arch_add_memory(), to allocate a
'struct page' memmap for pmem. However, it does not use the 'bottom
half' of memory hotplug, i.e. never marks pmem pages online and never
exposes the userspace memblock interface for pmem. This leaves an
opening to redress the section-size constraint.
To date, the libnvdimm subsystem has attempted to inject padding to
satisfy the internal constraints of arch_add_memory(). Beyond
complicating the code, leading to bugs [2], wasting memory, and limiting
configuration flexibility, the padding hack is broken when the platform
changes this physical memory alignment of pmem from one boot to the
next. Device failure (intermittent or permanent) and physical
reconfiguration are events that can cause the platform firmware to
change the physical placement of pmem on a subsequent boot, and device
failure is an everyday event in a data-center.
It turns out that sections are only a hard requirement of the
user-facing interface for memory hotplug and with a bit more
infrastructure sub-section arch_add_memory() support can be added for
kernel internal usages like devm_memremap_pages(). Here is an analysis
of the current design assumptions in the current code and how they are
addressed in the new implementation:
Current design assumptions:
- Sections that describe boot memory (early sections) are never
unplugged / removed.
- pfn_valid(), in the CONFIG_SPARSEMEM_VMEMMAP=y, case devolves to a
valid_section() check
- __add_pages() and helper routines assume all operations occur in
PAGES_PER_SECTION units.
- The memblock sysfs interface only comprehends full sections
New design assumptions:
- Sections are instrumented with a sub-section bitmask to track (on x86)
individual 2MB sub-divisions of a 128MB section.
- Partially populated early sections can be extended with additional
sub-sections, and those sub-sections can be removed with
arch_remove_memory(). With this in place we no longer lose usable memory
capacity to padding.
- pfn_valid() is updated to look deeper than valid_section() to also check the
active-sub-section mask. This indication is in the same cacheline as
the valid_section() so the performance impact is expected to be
negligible. So far the lkp robot has not reported any regressions.
- Outside of the core vmemmap population routines which are replaced,
other helper routines like shrink_{zone,pgdat}_span() are updated to
handle the smaller granularity. Core memory hotplug routines that deal
with online memory are not touched.
- The existing memblock sysfs user api guarantees / assumptions are
not touched since this capability is limited to !online
!memblock-sysfs-accessible sections.
Meanwhile the issue reports continue to roll in from users that do not
understand when and how the 128MB constraint will bite them. The current
implementation relied on being able to support at least one misaligned
namespace, but that immediately falls over on any moderately complex
namespace creation attempt. Beyond the initial problem of 'System RAM'
colliding with pmem, and the unsolvable problem of physical alignment
changes, Linux is now being exposed to platforms that collide pmem
ranges with other pmem ranges by default [3]. In short,
devm_memremap_pages() has pushed the venerable section-size constraint
past the breaking point, and the simplicity of section-aligned
arch_add_memory() is no longer tenable.
These patches are exposed to the kbuild robot on my libnvdimm-pending
branch [4], and a preview of the unit test for this functionality is
available on the 'subsection-pending' branch of ndctl [5].
[2]: https://lore.kernel.org/r/155000671719.348031.2347363160141119237.stgit@d...
[3]: https://github.com/pmem/ndctl/issues/76
[4]: https://git.kernel.org/pub/scm/linux/kernel/git/djbw/nvdimm.git/log/?h=li...
[5]: https://github.com/pmem/ndctl/commit/7c59b4867e1c
---
Dan Williams (11):
mm/sparsemem: Introduce struct mem_section_usage
mm/sparsemem: Add helpers track active portions of a section at boot
mm/hotplug: Prepare shrink_{zone,pgdat}_span for sub-section removal
mm/sparsemem: Convert kmalloc_section_memmap() to populate_section_memmap()
mm/hotplug: Kill is_dev_zone() usage in __remove_pages()
mm: Kill is_dev_zone() helper
mm/sparsemem: Prepare for sub-section ranges
mm/sparsemem: Support sub-section hotplug
mm/devm_memremap_pages: Enable sub-section remap
libnvdimm/pfn: Fix fsdax-mode namespace info-block zero-fields
libnvdimm/pfn: Stop padding pmem namespaces to section alignment
Robin Murphy (1):
mm/memremap: Rename and consolidate SECTION_SIZE
arch/powerpc/include/asm/sparsemem.h | 3
arch/x86/mm/init_64.c | 4
drivers/nvdimm/dax_devs.c | 2
drivers/nvdimm/pfn.h | 15 -
drivers/nvdimm/pfn_devs.c | 95 +++------
include/linux/memory_hotplug.h | 7 -
include/linux/mm.h | 4
include/linux/mmzone.h | 93 +++++++--
kernel/memremap.c | 63 ++----
mm/hmm.c | 2
mm/memory_hotplug.c | 172 +++++++++-------
mm/page_alloc.c | 8 -
mm/sparse-vmemmap.c | 21 +-
mm/sparse.c | 369 +++++++++++++++++++++++-----------
14 files changed, 511 insertions(+), 347 deletions(-)
1 year, 7 months
[PATCH v7 00/12] mm: Sub-section memory hotplug support
by Dan Williams
Changes since v6 [1]:
- Rebase on next-20190501, no related conflicts or updates
- Fix boot crash due to inaccurate setup of the initial section
->map_active bitmask caused by multiple activations of the same
section. (Jane, Jeff)
- Fix pmem startup crash when devm_memremap_pages() needs to instantiate
a new section. (Jeff)
- Drop mhp_restrictions for the __remove_pages() path in favor of
find_memory_block() to detect cases where section-aligned remove is
required (David)
- Add "[PATCH v7 06/12] mm/hotplug: Kill is_dev_zone() usage in
__remove_pages()"
- Cleanup shrink_{zone,pgdat}_span to remove no longer necessary @ms
section variables. (Oscar)
- Add subsection_check() to the __add_pages() path to prevent
inadvertent sub-section misuse.
[1]: https://lore.kernel.org/lkml/155552633539.2015392.2477781120122237934.stg...
---
[merge logistics]
Hi Andrew,
I believe this is ready for another spin in -mm now that the boot
regression has been squashed. In a chat with Michal last night at LSF/MM
I submitted to his assertion that the boot regression validates the
general concern that there were/are subtle dependencies on sections
beyond what I found to date by code inspection. Of course I want to
relieve the pain that the section constraint inflicts on libnvdimm and
devm_memremap_pages() as soon as possible (i.e. v5.2), but deferment to
v5.3 to give Michal time to do an in-depth look is also acceptable.
---
[cover letter]
The memory hotplug section is an arbitrary / convenient unit for memory
hotplug. 'Section-size' units have bled into the user interface
('memblock' sysfs) and can not be changed without breaking existing
userspace. The section-size constraint, while mostly benign for typical
memory hotplug, has and continues to wreak havoc with 'device-memory'
use cases, persistent memory (pmem) in particular. Recall that pmem uses
devm_memremap_pages(), and subsequently arch_add_memory(), to allocate a
'struct page' memmap for pmem. However, it does not use the 'bottom
half' of memory hotplug, i.e. never marks pmem pages online and never
exposes the userspace memblock interface for pmem. This leaves an
opening to redress the section-size constraint.
To date, the libnvdimm subsystem has attempted to inject padding to
satisfy the internal constraints of arch_add_memory(). Beyond
complicating the code, leading to bugs [2], wasting memory, and limiting
configuration flexibility, the padding hack is broken when the platform
changes this physical memory alignment of pmem from one boot to the
next. Device failure (intermittent or permanent) and physical
reconfiguration are events that can cause the platform firmware to
change the physical placement of pmem on a subsequent boot, and device
failure is an everyday event in a data-center.
It turns out that sections are only a hard requirement of the
user-facing interface for memory hotplug and with a bit more
infrastructure sub-section arch_add_memory() support can be added for
kernel internal usages like devm_memremap_pages(). Here is an analysis
of the current design assumptions in the current code and how they are
addressed in the new implementation:
Current design assumptions:
- Sections that describe boot memory (early sections) are never
unplugged / removed.
- pfn_valid(), in the CONFIG_SPARSEMEM_VMEMMAP=y, case devolves to a
valid_section() check
- __add_pages() and helper routines assume all operations occur in
PAGES_PER_SECTION units.
- The memblock sysfs interface only comprehends full sections
New design assumptions:
- Sections are instrumented with a sub-section bitmask to track (on x86)
individual 2MB sub-divisions of a 128MB section.
- Partially populated early sections can be extended with additional
sub-sections, and those sub-sections can be removed with
arch_remove_memory(). With this in place we no longer lose usable memory
capacity to padding.
- pfn_valid() is updated to look deeper than valid_section() to also check the
active-sub-section mask. This indication is in the same cacheline as
the valid_section() so the performance impact is expected to be
negligible. So far the lkp robot has not reported any regressions.
- Outside of the core vmemmap population routines which are replaced,
other helper routines like shrink_{zone,pgdat}_span() are updated to
handle the smaller granularity. Core memory hotplug routines that deal
with online memory are not touched.
- The existing memblock sysfs user api guarantees / assumptions are
not touched since this capability is limited to !online
!memblock-sysfs-accessible sections.
Meanwhile the issue reports continue to roll in from users that do not
understand when and how the 128MB constraint will bite them. The current
implementation relied on being able to support at least one misaligned
namespace, but that immediately falls over on any moderately complex
namespace creation attempt. Beyond the initial problem of 'System RAM'
colliding with pmem, and the unsolvable problem of physical alignment
changes, Linux is now being exposed to platforms that collide pmem
ranges with other pmem ranges by default [3]. In short,
devm_memremap_pages() has pushed the venerable section-size constraint
past the breaking point, and the simplicity of section-aligned
arch_add_memory() is no longer tenable.
These patches are exposed to the kbuild robot on my libnvdimm-pending
branch [4], and a preview of the unit test for this functionality is
available on the 'subsection-pending' branch of ndctl [5].
[2]: https://lore.kernel.org/r/155000671719.348031.2347363160141119237.stgit@d...
[3]: https://github.com/pmem/ndctl/issues/76
[4]: https://git.kernel.org/pub/scm/linux/kernel/git/djbw/nvdimm.git/log/?h=li...
[5]: https://github.com/pmem/ndctl/commit/7c59b4867e1c
---
Dan Williams (12):
mm/sparsemem: Introduce struct mem_section_usage
mm/sparsemem: Introduce common definitions for the size and mask of a section
mm/sparsemem: Add helpers track active portions of a section at boot
mm/hotplug: Prepare shrink_{zone,pgdat}_span for sub-section removal
mm/sparsemem: Convert kmalloc_section_memmap() to populate_section_memmap()
mm/hotplug: Kill is_dev_zone() usage in __remove_pages()
mm: Kill is_dev_zone() helper
mm/sparsemem: Prepare for sub-section ranges
mm/sparsemem: Support sub-section hotplug
mm/devm_memremap_pages: Enable sub-section remap
libnvdimm/pfn: Fix fsdax-mode namespace info-block zero-fields
libnvdimm/pfn: Stop padding pmem namespaces to section alignment
arch/x86/mm/init_64.c | 4
drivers/nvdimm/dax_devs.c | 2
drivers/nvdimm/pfn.h | 12 -
drivers/nvdimm/pfn_devs.c | 93 +++-------
include/linux/memory_hotplug.h | 7 -
include/linux/mm.h | 4
include/linux/mmzone.h | 72 ++++++--
kernel/memremap.c | 63 +++----
mm/hmm.c | 2
mm/memory_hotplug.c | 172 ++++++++++---------
mm/page_alloc.c | 8 +
mm/sparse-vmemmap.c | 21 ++
mm/sparse.c | 370 ++++++++++++++++++++++++++++------------
13 files changed, 490 insertions(+), 340 deletions(-)
1 year, 7 months
[PATCH AUTOSEL 4.19 013/141] mm: page_mkclean vs MADV_DONTNEED race
by Sasha Levin
From: "Aneesh Kumar K.V" <aneesh.kumar(a)linux.ibm.com>
[ Upstream commit 024eee0e83f0df52317be607ca521e0fc572aa07 ]
MADV_DONTNEED is handled with mmap_sem taken in read mode. We call
page_mkclean without holding mmap_sem.
MADV_DONTNEED implies that pages in the region are unmapped and subsequent
access to the pages in that range is handled as a new page fault. This
implies that if we don't have parallel access to the region when
MADV_DONTNEED is run we expect those range to be unallocated.
w.r.t page_mkclean() we need to make sure that we don't break the
MADV_DONTNEED semantics. MADV_DONTNEED check for pmd_none without holding
pmd_lock. This implies we skip the pmd if we temporarily mark pmd none.
Avoid doing that while marking the page clean.
Keep the sequence same for dax too even though we don't support
MADV_DONTNEED for dax mapping
The bug was noticed by code review and I didn't observe any failures w.r.t
test run. This is similar to
commit 58ceeb6bec86d9140f9d91d71a710e963523d063
Author: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Date: Thu Apr 13 14:56:26 2017 -0700
thp: fix MADV_DONTNEED vs. MADV_FREE race
commit ced108037c2aa542b3ed8b7afd1576064ad1362a
Author: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Date: Thu Apr 13 14:56:20 2017 -0700
thp: fix MADV_DONTNEED vs. numa balancing race
Link: http://lkml.kernel.org/r/20190321040610.14226-1-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc:"Kirill A . Shutemov" <kirill(a)shutemov.name>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/dax.c | 2 +-
mm/rmap.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/dax.c b/fs/dax.c
index 004c8ac1117c4..75a289c31c7e5 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -908,7 +908,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
goto unlock_pmd;
flush_cache_page(vma, address, pfn);
- pmd = pmdp_huge_clear_flush(vma, address, pmdp);
+ pmd = pmdp_invalidate(vma, address, pmdp);
pmd = pmd_wrprotect(pmd);
pmd = pmd_mkclean(pmd);
set_pmd_at(vma->vm_mm, address, pmdp, pmd);
diff --git a/mm/rmap.c b/mm/rmap.c
index 85b7f94233526..f048c2651954b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -926,7 +926,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
continue;
flush_cache_page(vma, address, page_to_pfn(page));
- entry = pmdp_huge_clear_flush(vma, address, pmd);
+ entry = pmdp_invalidate(vma, address, pmd);
entry = pmd_wrprotect(entry);
entry = pmd_mkclean(entry);
set_pmd_at(vma->vm_mm, address, pmd, entry);
--
2.20.1
1 year, 7 months
[PATCH AUTOSEL 5.0 018/173] mm: page_mkclean vs MADV_DONTNEED race
by Sasha Levin
From: "Aneesh Kumar K.V" <aneesh.kumar(a)linux.ibm.com>
[ Upstream commit 024eee0e83f0df52317be607ca521e0fc572aa07 ]
MADV_DONTNEED is handled with mmap_sem taken in read mode. We call
page_mkclean without holding mmap_sem.
MADV_DONTNEED implies that pages in the region are unmapped and subsequent
access to the pages in that range is handled as a new page fault. This
implies that if we don't have parallel access to the region when
MADV_DONTNEED is run we expect those range to be unallocated.
w.r.t page_mkclean() we need to make sure that we don't break the
MADV_DONTNEED semantics. MADV_DONTNEED check for pmd_none without holding
pmd_lock. This implies we skip the pmd if we temporarily mark pmd none.
Avoid doing that while marking the page clean.
Keep the sequence same for dax too even though we don't support
MADV_DONTNEED for dax mapping
The bug was noticed by code review and I didn't observe any failures w.r.t
test run. This is similar to
commit 58ceeb6bec86d9140f9d91d71a710e963523d063
Author: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Date: Thu Apr 13 14:56:26 2017 -0700
thp: fix MADV_DONTNEED vs. MADV_FREE race
commit ced108037c2aa542b3ed8b7afd1576064ad1362a
Author: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Date: Thu Apr 13 14:56:20 2017 -0700
thp: fix MADV_DONTNEED vs. numa balancing race
Link: http://lkml.kernel.org/r/20190321040610.14226-1-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc:"Kirill A . Shutemov" <kirill(a)shutemov.name>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/dax.c | 2 +-
mm/rmap.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/dax.c b/fs/dax.c
index 8eb3e8c2b4bdc..163ebd6cc0d1c 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -814,7 +814,7 @@ static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index,
goto unlock_pmd;
flush_cache_page(vma, address, pfn);
- pmd = pmdp_huge_clear_flush(vma, address, pmdp);
+ pmd = pmdp_invalidate(vma, address, pmdp);
pmd = pmd_wrprotect(pmd);
pmd = pmd_mkclean(pmd);
set_pmd_at(vma->vm_mm, address, pmdp, pmd);
diff --git a/mm/rmap.c b/mm/rmap.c
index 0454ecc29537a..e0710b258c417 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -928,7 +928,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
continue;
flush_cache_page(vma, address, page_to_pfn(page));
- entry = pmdp_huge_clear_flush(vma, address, pmd);
+ entry = pmdp_invalidate(vma, address, pmd);
entry = pmd_wrprotect(entry);
entry = pmd_mkclean(entry);
set_pmd_at(vma->vm_mm, address, pmd, entry);
--
2.20.1
1 year, 7 months
[PATCH AUTOSEL 5.1 020/186] mm: page_mkclean vs MADV_DONTNEED race
by Sasha Levin
From: "Aneesh Kumar K.V" <aneesh.kumar(a)linux.ibm.com>
[ Upstream commit 024eee0e83f0df52317be607ca521e0fc572aa07 ]
MADV_DONTNEED is handled with mmap_sem taken in read mode. We call
page_mkclean without holding mmap_sem.
MADV_DONTNEED implies that pages in the region are unmapped and subsequent
access to the pages in that range is handled as a new page fault. This
implies that if we don't have parallel access to the region when
MADV_DONTNEED is run we expect those range to be unallocated.
w.r.t page_mkclean() we need to make sure that we don't break the
MADV_DONTNEED semantics. MADV_DONTNEED check for pmd_none without holding
pmd_lock. This implies we skip the pmd if we temporarily mark pmd none.
Avoid doing that while marking the page clean.
Keep the sequence same for dax too even though we don't support
MADV_DONTNEED for dax mapping
The bug was noticed by code review and I didn't observe any failures w.r.t
test run. This is similar to
commit 58ceeb6bec86d9140f9d91d71a710e963523d063
Author: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Date: Thu Apr 13 14:56:26 2017 -0700
thp: fix MADV_DONTNEED vs. MADV_FREE race
commit ced108037c2aa542b3ed8b7afd1576064ad1362a
Author: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Date: Thu Apr 13 14:56:20 2017 -0700
thp: fix MADV_DONTNEED vs. numa balancing race
Link: http://lkml.kernel.org/r/20190321040610.14226-1-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc:"Kirill A . Shutemov" <kirill(a)shutemov.name>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/dax.c | 2 +-
mm/rmap.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/dax.c b/fs/dax.c
index 83009875308c5..f74386293632d 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -814,7 +814,7 @@ static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index,
goto unlock_pmd;
flush_cache_page(vma, address, pfn);
- pmd = pmdp_huge_clear_flush(vma, address, pmdp);
+ pmd = pmdp_invalidate(vma, address, pmdp);
pmd = pmd_wrprotect(pmd);
pmd = pmd_mkclean(pmd);
set_pmd_at(vma->vm_mm, address, pmdp, pmd);
diff --git a/mm/rmap.c b/mm/rmap.c
index b30c7c71d1d92..76c8dfd3ae1cd 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -928,7 +928,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
continue;
flush_cache_page(vma, address, page_to_pfn(page));
- entry = pmdp_huge_clear_flush(vma, address, pmd);
+ entry = pmdp_invalidate(vma, address, pmd);
entry = pmd_wrprotect(entry);
entry = pmd_mkclean(entry);
set_pmd_at(vma->vm_mm, address, pmd, entry);
--
2.20.1
1 year, 7 months