[PATCH ndctl v2] infoblock: Set the default alignment to the platform alignment
by Santosh Sivaraj
The default alignment for write-infoblock command is set to 2M. Change
that to use the platform's supported alignment or PAGE_SIZE. The first
supported alignment is taken as the default.
Signed-off-by: Santosh Sivaraj <santosh(a)fossix.org>
---
ndctl/namespace.c | 69 ++++++++++++++++++++++++++++++++++++++---------
1 file changed, 56 insertions(+), 13 deletions(-)
v2: Get the 'write-infoblock all' path right [Ira]
diff --git a/ndctl/namespace.c b/ndctl/namespace.c
index 0550580..8aa5a42 100644
--- a/ndctl/namespace.c
+++ b/ndctl/namespace.c
@@ -175,7 +175,7 @@ OPT_STRING('m', "mode", ¶m.mode, "operation-mode", \
OPT_STRING('s', "size", ¶m.size, "size", \
"override the image size to instantiate the infoblock"), \
OPT_STRING('a', "align", ¶m.align, "align", \
- "specify the expected physical alignment (default: 2M)"), \
+ "specify the expected physical alignment"), \
OPT_STRING('u', "uuid", ¶m.uuid, "uuid", \
"specify the uuid for the infoblock (default: autogenerate)"), \
OPT_STRING('M', "map", ¶m.map, "memmap-location", \
@@ -325,23 +325,15 @@ static int set_defaults(enum device_action action)
sysconf(_SC_PAGE_SIZE));
rc = -EINVAL;
}
- } else if (action == ACTION_WRITE_INFOBLOCK)
- param.align = "2M";
+ }
if (param.size) {
unsigned long long size = parse_size64(param.size);
- unsigned long long align = parse_size64(param.align);
if (size == ULLONG_MAX) {
error("failed to parse namespace size '%s'\n",
param.size);
rc = -EINVAL;
- } else if (action == ACTION_WRITE_INFOBLOCK
- && align < ULLONG_MAX
- && !IS_ALIGNED(size, align)) {
- error("--size=%s not aligned to %s\n", param.size,
- param.align);
- rc = -EINVAL;
}
}
@@ -1982,6 +1974,23 @@ out:
return rc;
}
+static unsigned long ndctl_get_default_alignment(struct ndctl_namespace *ndns)
+{
+ unsigned long long align = 0;
+ struct ndctl_dax *dax = ndctl_namespace_get_dax(ndns);
+ struct ndctl_pfn *pfn = ndctl_namespace_get_pfn(ndns);
+
+ if (ndctl_namespace_get_mode(ndns) == NDCTL_NS_MODE_FSDAX && pfn)
+ align = ndctl_pfn_get_supported_alignment(pfn, 1);
+ else if (ndctl_namespace_get_mode(ndns) == NDCTL_NS_MODE_DEVDAX && dax)
+ align = ndctl_dax_get_supported_alignment(dax, 1);
+
+ if (!align)
+ align = sysconf(_SC_PAGE_SIZE);
+
+ return align;
+}
+
static int namespace_rw_infoblock(struct ndctl_namespace *ndns,
struct read_infoblock_ctx *ri_ctx, int write)
{
@@ -2013,9 +2022,40 @@ static int namespace_rw_infoblock(struct ndctl_namespace *ndns,
}
sprintf(path, "/dev/%s", ndctl_namespace_get_block_device(ndns));
- if (write)
- rc = file_write_infoblock(path);
- else
+ if (write) {
+ unsigned long long align;
+ bool align_provided = true;
+
+ if (!param.align) {
+ align = ndctl_get_default_alignment(ndns);
+
+ if (asprintf((char **)¶m.align, "%llu", align) < 0) {
+ rc = -EINVAL;
+ goto out;
+ }
+ align_provided = false;
+ }
+
+ if (param.size) {
+ unsigned long long size = parse_size64(param.size);
+ align = parse_size64(param.align);
+
+ if (align < ULLONG_MAX && !IS_ALIGNED(size, align)) {
+ error("--size=%s not aligned to %s\n", param.size,
+ param.align);
+
+ rc = -EINVAL;
+ }
+ }
+
+ if (!rc)
+ rc = file_write_infoblock(path);
+
+ if (!align_provided) {
+ free((char *)param.align);
+ param.align = NULL;
+ }
+ } else
rc = file_read_infoblock(path, ndns, ri_ctx);
param.parent_uuid = save;
out:
@@ -2060,6 +2100,9 @@ static int do_xaction_namespace(const char *namespace,
}
if (action == ACTION_WRITE_INFOBLOCK && !namespace) {
+ if (!param.align)
+ param.align = "2M";
+
rc = file_write_infoblock(param.outfile);
if (rc >= 0)
(*processed)++;
--
2.26.2
2 years, 1 month
[ndctl PATCH] Documentation: write-infoblock namespace as mutually exclusive
by Harish
The write-infoblock command allows user to write to only one of
namespace, stdout or to a file. Document that explicitly and also
change the usage string to indicate mutual exclusion.
Signed-off-by: Harish <harish(a)linux.ibm.com>
---
Documentation/ndctl/ndctl-write-infoblock.txt | 14 ++++++++------
ndctl/namespace.c | 4 +++-
2 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/Documentation/ndctl/ndctl-write-infoblock.txt b/Documentation/ndctl/ndctl-write-infoblock.txt
index 8197559..c29327d 100644
--- a/Documentation/ndctl/ndctl-write-infoblock.txt
+++ b/Documentation/ndctl/ndctl-write-infoblock.txt
@@ -61,19 +61,21 @@ read 1 infoblock
OPTIONS
-------
-<namespace(s)>::
- One or more 'namespaceX.Y' device names. The keyword 'all' can be specified to
- operate on every namespace in the system, optionally filtered by bus id (see
- --bus= option), or region id (see --region= option).
+<namespace>::
+ Write the infoblock to 'namespaceX.Y' device name. The keyword 'all' can be
+ specified to operate on every namespace in the system, optionally filtered
+ by bus id (see --bus= option), or region id (see --region= option).
+ (mutually exclusive with --stdout and --output)
-c::
--stdout::
- Write the infoblock to stdout
+ Write the infoblock to stdout (mutually
+ exclusive with <namespace> and --output)
-o::
--output=::
Write the infoblock to the given file (mutually
- exclusive with --stdout).
+ exclusive with <namespace> and --stdout).
-m::
--mode=::
diff --git a/ndctl/namespace.c b/ndctl/namespace.c
index 0550580..cf6c4ea 100644
--- a/ndctl/namespace.c
+++ b/ndctl/namespace.c
@@ -2346,7 +2346,9 @@ int cmd_read_infoblock(int argc, const char **argv, struct ndctl_ctx *ctx)
int cmd_write_infoblock(int argc, const char **argv, struct ndctl_ctx *ctx)
{
- char *xable_usage = "ndctl write-infoblock <namespace> [<options>]";
+ char *xable_usage = "ndctl write-infoblock [<namespace> | -o <file> "
+ "| --stdout] [<options>]";
+
const char *namespace = parse_namespace_options(argc, argv,
ACTION_WRITE_INFOBLOCK, write_infoblock_options,
xable_usage);
--
2.26.2
2 years, 1 month
Re: [PATCH v2 1/3] arm64/numa: export memory_add_physaddr_to_nid as EXPORT_SYMBOL_GPL
by Dan Williams
On Tue, Jul 7, 2020 at 9:08 PM Justin He <Justin.He(a)arm.com> wrote:
[..]
> > Especially for architectures that use memblock info for numa info
> > (which seems to be everyone except x86) why not implement a generic
> > memory_add_physaddr_to_nid() that does:
> >
> > int memory_add_physaddr_to_nid(u64 addr)
> > {
> > unsigned long start_pfn, end_pfn, pfn = PHYS_PFN(addr);
> > int nid;
> >
> > for_each_online_node(nid) {
> > get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
> > if (pfn >= start_pfn && pfn <= end_pfn)
> > return nid;
> > }
> > return NUMA_NO_NODE;
> > }
>
> Thanks for your suggestion,
> Could I wrap the codes and let memory_add_physaddr_to_nid simply invoke
> phys_to_target_node()?
I think it needs to be the reverse. phys_to_target_node() should call
memory_add_physaddr_to_nid() by default, but fall back to searching
reserved memory address ranges in memblock. See phys_to_target_node()
in arch/x86/mm/numa.c. That one uses numa_meminfo instead of memblock,
but the principle is the same i.e. that a target node may not be
represented in memblock.memory, but memblock.reserved. I'm working on
a patch to provide a function similar to get_pfn_range_for_nid() that
operates on reserved memory.
2 years, 1 month
Re: [PATCH v2 1/3] arm64/numa: export memory_add_physaddr_to_nid as
EXPORT_SYMBOL_GPL
by David Hildenbrand
On 08.07.20 08:56, Justin He wrote:
> Hi Dan
>
>> -----Original Message-----
>> From: Dan Williams <dan.j.williams(a)intel.com>
>> Sent: Wednesday, July 8, 2020 1:48 PM
>> To: Mike Rapoport <rppt(a)linux.ibm.com>
>> Cc: Justin He <Justin.He(a)arm.com>; Michal Hocko <mhocko(a)kernel.org>; David
>> Hildenbrand <david(a)redhat.com>; Catalin Marinas <Catalin.Marinas(a)arm.com>;
>> Will Deacon <will(a)kernel.org>; Vishal Verma <vishal.l.verma(a)intel.com>;
>> Dave Jiang <dave.jiang(a)intel.com>; Andrew Morton <akpm@linux-
>> foundation.org>; Baoquan He <bhe(a)redhat.com>; Chuhong Yuan
>> <hslester96(a)gmail.com>; linux-arm-kernel(a)lists.infradead.org; linux-
>> kernel(a)vger.kernel.org; linux-mm(a)kvack.org; linux-nvdimm(a)lists.01.org;
>> Kaly Xin <Kaly.Xin(a)arm.com>
>> Subject: Re: [PATCH v2 1/3] arm64/numa: export memory_add_physaddr_to_nid
>> as EXPORT_SYMBOL_GPL
>>
>> On Tue, Jul 7, 2020 at 10:33 PM Mike Rapoport <rppt(a)linux.ibm.com> wrote:
>>>
>>> On Tue, Jul 07, 2020 at 08:56:36PM -0700, Dan Williams wrote:
>>>> On Tue, Jul 7, 2020 at 7:20 PM Justin He <Justin.He(a)arm.com> wrote:
>>>>>
>>>>> Hi Michal and David
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Michal Hocko <mhocko(a)kernel.org>
>>>>>> Sent: Tuesday, July 7, 2020 7:55 PM
>>>>>> To: Justin He <Justin.He(a)arm.com>
>>>>>> Cc: Catalin Marinas <Catalin.Marinas(a)arm.com>; Will Deacon
>>>>>> <will(a)kernel.org>; Dan Williams <dan.j.williams(a)intel.com>; Vishal
>> Verma
>>>>>> <vishal.l.verma(a)intel.com>; Dave Jiang <dave.jiang(a)intel.com>;
>> Andrew
>>>>>> Morton <akpm(a)linux-foundation.org>; Mike Rapoport
>> <rppt(a)linux.ibm.com>;
>>>>>> Baoquan He <bhe(a)redhat.com>; Chuhong Yuan <hslester96(a)gmail.com>;
>> linux-
>>>>>> arm-kernel(a)lists.infradead.org; linux-kernel(a)vger.kernel.org;
>> linux-
>>>>>> mm(a)kvack.org; linux-nvdimm(a)lists.01.org; Kaly Xin
>> <Kaly.Xin(a)arm.com>
>>>>>> Subject: Re: [PATCH v2 1/3] arm64/numa: export
>> memory_add_physaddr_to_nid
>>>>>> as EXPORT_SYMBOL_GPL
>>>>>>
>>>>>> On Tue 07-07-20 13:59:15, Jia He wrote:
>>>>>>> This exports memory_add_physaddr_to_nid() for module driver to
>> use.
>>>>>>>
>>>>>>> memory_add_physaddr_to_nid() is a fallback option to get the nid
>> in case
>>>>>>> NUMA_NO_NID is detected.
>>>>>>>
>>>>>>> Suggested-by: David Hildenbrand <david(a)redhat.com>
>>>>>>> Signed-off-by: Jia He <justin.he(a)arm.com>
>>>>>>> ---
>>>>>>> arch/arm64/mm/numa.c | 5 +++--
>>>>>>> 1 file changed, 3 insertions(+), 2 deletions(-)
>>>>>>>
>>>>>>> diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
>>>>>>> index aafcee3e3f7e..7eeb31740248 100644
>>>>>>> --- a/arch/arm64/mm/numa.c
>>>>>>> +++ b/arch/arm64/mm/numa.c
>>>>>>> @@ -464,10 +464,11 @@ void __init arm64_numa_init(void)
>>>>>>>
>>>>>>> /*
>>>>>>> * We hope that we will be hotplugging memory on nodes we
>> already know
>>>>>> about,
>>>>>>> - * such that acpi_get_node() succeeds and we never fall back to
>> this...
>>>>>>> + * such that acpi_get_node() succeeds. But when SRAT is not
>> present,
>>>>>> the node
>>>>>>> + * id may be probed as NUMA_NO_NODE by acpi, Here provide a
>> fallback
>>>>>> option.
>>>>>>> */
>>>>>>> int memory_add_physaddr_to_nid(u64 addr)
>>>>>>> {
>>>>>>> - pr_warn("Unknown node for memory at 0x%llx, assuming node
>> 0\n",
>>>>>> addr);
>>>>>>> return 0;
>>>>>>> }
>>>>>>> +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
>>>>>>
>>>>>> Does it make sense to export a noop function? Wouldn't make more
>> sense
>>>>>> to simply make it static inline somewhere in a header? I haven't
>> checked
>>>>>> whether there is an easy way to do that sanely bu this just hit my
>> eyes.
>>>>>
>>>>> Okay, I can make a change in memory_hotplug.h, sth like:
>>>>> --- a/include/linux/memory_hotplug.h
>>>>> +++ b/include/linux/memory_hotplug.h
>>>>> @@ -149,13 +149,13 @@ int add_pages(int nid, unsigned long start_pfn,
>> unsigned long nr_pages,
>>>>> struct mhp_params *params);
>>>>> #endif /* ARCH_HAS_ADD_PAGES */
>>>>>
>>>>> -#ifdef CONFIG_NUMA
>>>>> -extern int memory_add_physaddr_to_nid(u64 start);
>>>>> -#else
>>>>> +#if !defined(CONFIG_NUMA) || !defined(memory_add_physaddr_to_nid)
>>>>> static inline int memory_add_physaddr_to_nid(u64 start)
>>>>> {
>>>>> return 0;
>>>>> }
>>>>> +#else
>>>>> +extern int memory_add_physaddr_to_nid(u64 start);
>>>>> #endif
>>>>>
>>>>> And then check the memory_add_physaddr_to_nid() helper on all arches,
>>>>> if it is noop(return 0), I can simply remove it.
>>>>> if it is not noop, after the helper,
>>>>> #define memory_add_physaddr_to_nid
>>>>>
>>>>> What do you think of this proposal?
>>>>
>>>> Especially for architectures that use memblock info for numa info
>>>> (which seems to be everyone except x86) why not implement a generic
>>>> memory_add_physaddr_to_nid() that does:
>>>
>>> That would be only arm64.
>>>
>>
>> Darn, I saw ARCH_KEEP_MEMBLOCK and had delusions of grandeur that it
>> could solve my numa api woes. At least for x86 the problem is already
>> solved with reserved numa_meminfo, but now I'm trying to write generic
>> drivers that use those apis and finding these gaps on other archs.
>
> Even on arm64, there is a dependency issue in dax_pmem kmem case.
> If dax pmem uses memory_add_physaddr_to_nid() to decide which node that
> memblock should add into, get_pfn_range_for_nid() might not have
> the correct memblock info at that time. That is, get_pfn_range_for_nid()
> can't get the correct memblock info before add_memory()
>
> So IMO, memory_add_physaddr_to_nid() still have to implement as noop on
> arm64 (return 0) together with sh,s390x? Powerpc, x86,ia64 can use their
> own implementation. And phys_to_target_node() can use your suggested(
> for_each_online_node() ...)
>
> What do you think of it? Thanks
You are trying to fix the "we only have one dummy node" AFAIU, so what
you propose here is certainly good enough for now.
--
Thanks,
David / dhildenb
2 years, 1 month
Re: [PATCH v2 1/3] arm64/numa: export memory_add_physaddr_to_nid as EXPORT_SYMBOL_GPL
by Dan Williams
On Tue, Jul 7, 2020 at 7:20 PM Justin He <Justin.He(a)arm.com> wrote:
>
> Hi Michal and David
>
> > -----Original Message-----
> > From: Michal Hocko <mhocko(a)kernel.org>
> > Sent: Tuesday, July 7, 2020 7:55 PM
> > To: Justin He <Justin.He(a)arm.com>
> > Cc: Catalin Marinas <Catalin.Marinas(a)arm.com>; Will Deacon
> > <will(a)kernel.org>; Dan Williams <dan.j.williams(a)intel.com>; Vishal Verma
> > <vishal.l.verma(a)intel.com>; Dave Jiang <dave.jiang(a)intel.com>; Andrew
> > Morton <akpm(a)linux-foundation.org>; Mike Rapoport <rppt(a)linux.ibm.com>;
> > Baoquan He <bhe(a)redhat.com>; Chuhong Yuan <hslester96(a)gmail.com>; linux-
> > arm-kernel(a)lists.infradead.org; linux-kernel(a)vger.kernel.org; linux-
> > mm(a)kvack.org; linux-nvdimm(a)lists.01.org; Kaly Xin <Kaly.Xin(a)arm.com>
> > Subject: Re: [PATCH v2 1/3] arm64/numa: export memory_add_physaddr_to_nid
> > as EXPORT_SYMBOL_GPL
> >
> > On Tue 07-07-20 13:59:15, Jia He wrote:
> > > This exports memory_add_physaddr_to_nid() for module driver to use.
> > >
> > > memory_add_physaddr_to_nid() is a fallback option to get the nid in case
> > > NUMA_NO_NID is detected.
> > >
> > > Suggested-by: David Hildenbrand <david(a)redhat.com>
> > > Signed-off-by: Jia He <justin.he(a)arm.com>
> > > ---
> > > arch/arm64/mm/numa.c | 5 +++--
> > > 1 file changed, 3 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
> > > index aafcee3e3f7e..7eeb31740248 100644
> > > --- a/arch/arm64/mm/numa.c
> > > +++ b/arch/arm64/mm/numa.c
> > > @@ -464,10 +464,11 @@ void __init arm64_numa_init(void)
> > >
> > > /*
> > > * We hope that we will be hotplugging memory on nodes we already know
> > about,
> > > - * such that acpi_get_node() succeeds and we never fall back to this...
> > > + * such that acpi_get_node() succeeds. But when SRAT is not present,
> > the node
> > > + * id may be probed as NUMA_NO_NODE by acpi, Here provide a fallback
> > option.
> > > */
> > > int memory_add_physaddr_to_nid(u64 addr)
> > > {
> > > - pr_warn("Unknown node for memory at 0x%llx, assuming node 0\n",
> > addr);
> > > return 0;
> > > }
> > > +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
> >
> > Does it make sense to export a noop function? Wouldn't make more sense
> > to simply make it static inline somewhere in a header? I haven't checked
> > whether there is an easy way to do that sanely bu this just hit my eyes.
>
> Okay, I can make a change in memory_hotplug.h, sth like:
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -149,13 +149,13 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
> struct mhp_params *params);
> #endif /* ARCH_HAS_ADD_PAGES */
>
> -#ifdef CONFIG_NUMA
> -extern int memory_add_physaddr_to_nid(u64 start);
> -#else
> +#if !defined(CONFIG_NUMA) || !defined(memory_add_physaddr_to_nid)
> static inline int memory_add_physaddr_to_nid(u64 start)
> {
> return 0;
> }
> +#else
> +extern int memory_add_physaddr_to_nid(u64 start);
> #endif
>
> And then check the memory_add_physaddr_to_nid() helper on all arches,
> if it is noop(return 0), I can simply remove it.
> if it is not noop, after the helper,
> #define memory_add_physaddr_to_nid
>
> What do you think of this proposal?
Especially for architectures that use memblock info for numa info
(which seems to be everyone except x86) why not implement a generic
memory_add_physaddr_to_nid() that does:
int memory_add_physaddr_to_nid(u64 addr)
{
unsigned long start_pfn, end_pfn, pfn = PHYS_PFN(addr);
int nid;
for_each_online_node(nid) {
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
if (pfn >= start_pfn && pfn <= end_pfn)
return nid;
}
return NUMA_NO_NODE;
}
2 years, 1 month
[PATCH ndctl] infoblock: Set the default alignment to the platform alignment
by Santosh Sivaraj
The default alignment for write-infoblock command is set to 2M. Change
that to use the platform's supported alignment or PAGE_SIZE. The first
supported alignment is taken as the default.
Signed-off-by: Santosh Sivaraj <santosh(a)fossix.org>
---
ndctl/namespace.c | 56 ++++++++++++++++++++++++++++++++++++++---------
1 file changed, 46 insertions(+), 10 deletions(-)
diff --git a/ndctl/namespace.c b/ndctl/namespace.c
index 0550580..4f056b7 100644
--- a/ndctl/namespace.c
+++ b/ndctl/namespace.c
@@ -175,7 +175,7 @@ OPT_STRING('m', "mode", ¶m.mode, "operation-mode", \
OPT_STRING('s', "size", ¶m.size, "size", \
"override the image size to instantiate the infoblock"), \
OPT_STRING('a', "align", ¶m.align, "align", \
- "specify the expected physical alignment (default: 2M)"), \
+ "specify the expected physical alignment"), \
OPT_STRING('u', "uuid", ¶m.uuid, "uuid", \
"specify the uuid for the infoblock (default: autogenerate)"), \
OPT_STRING('M', "map", ¶m.map, "memmap-location", \
@@ -325,23 +325,15 @@ static int set_defaults(enum device_action action)
sysconf(_SC_PAGE_SIZE));
rc = -EINVAL;
}
- } else if (action == ACTION_WRITE_INFOBLOCK)
- param.align = "2M";
+ }
if (param.size) {
unsigned long long size = parse_size64(param.size);
- unsigned long long align = parse_size64(param.align);
if (size == ULLONG_MAX) {
error("failed to parse namespace size '%s'\n",
param.size);
rc = -EINVAL;
- } else if (action == ACTION_WRITE_INFOBLOCK
- && align < ULLONG_MAX
- && !IS_ALIGNED(size, align)) {
- error("--size=%s not aligned to %s\n", param.size,
- param.align);
- rc = -EINVAL;
}
}
@@ -1982,6 +1974,23 @@ out:
return rc;
}
+static unsigned long ndctl_get_default_alignment(struct ndctl_namespace *ndns)
+{
+ unsigned long long align = 0;
+ struct ndctl_dax *dax = ndctl_namespace_get_dax(ndns);
+ struct ndctl_pfn *pfn = ndctl_namespace_get_pfn(ndns);
+
+ if (ndctl_namespace_get_mode(ndns) == NDCTL_NS_MODE_FSDAX && pfn)
+ align = ndctl_pfn_get_supported_alignment(pfn, 1);
+ else if (ndctl_namespace_get_mode(ndns) == NDCTL_NS_MODE_DEVDAX && dax)
+ align = ndctl_dax_get_supported_alignment(dax, 1);
+
+ if (!align)
+ align = sysconf(_SC_PAGE_SIZE);
+
+ return align;
+}
+
static int namespace_rw_infoblock(struct ndctl_namespace *ndns,
struct read_infoblock_ctx *ri_ctx, int write)
{
@@ -1992,12 +2001,36 @@ static int namespace_rw_infoblock(struct ndctl_namespace *ndns,
const char *save;
const char *cmd = write ? "write-infoblock" : "read-infoblock";
const char *devname = ndctl_namespace_get_devname(ndns);
+ unsigned long long align;
if (ndctl_namespace_is_active(ndns)) {
pr_verbose("%s: %s enabled, must be disabled\n", cmd, devname);
return -EBUSY;
}
+ if (write) {
+ if (!param.align) {
+ align = ndctl_get_default_alignment(ndns);
+
+ if (asprintf((char **)¶m.align, "%llu", align) < 0) {
+ rc = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (param.size) {
+ unsigned long long size = parse_size64(param.size);
+ align = parse_size64(param.align);
+
+ if (align < ULLONG_MAX && !IS_ALIGNED(size, align)) {
+ error("--size=%s not aligned to %s\n", param.size,
+ param.align);
+ rc = -EINVAL;
+ goto out;
+ }
+ }
+ }
+
ndctl_namespace_set_raw_mode(ndns, 1);
rc = ndctl_namespace_enable(ndns);
if (rc < 0) {
@@ -2060,6 +2093,9 @@ static int do_xaction_namespace(const char *namespace,
}
if (action == ACTION_WRITE_INFOBLOCK && !namespace) {
+ if (!param.align)
+ param.align = "2M";
+
rc = file_write_infoblock(param.outfile);
if (rc >= 0)
(*processed)++;
--
2.26.2
2 years, 1 month
[ndctl PATCH] infoblock: Make output mutually exclusive
by Harish
Patch fixes checking output filter option (-o <file> or -c) of
write-infoblock command to be mutually exclusive.
Signed-off-by: Harish <harish(a)linux.ibm.com>
---
ndctl/namespace.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ndctl/namespace.c b/ndctl/namespace.c
index 0550580..d3ade25 100644
--- a/ndctl/namespace.c
+++ b/ndctl/namespace.c
@@ -440,7 +440,7 @@ static const char *parse_namespace_options(int argc, const char **argv,
rc = -EINVAL;
}
- if (action == ACTION_WRITE_INFOBLOCK && (param.outfile || param.std_out)
+ if (action == ACTION_WRITE_INFOBLOCK && (param.outfile && param.std_out)
&& argc) {
error("specify only one of a namespace filter, --output, or --stdout\n");
rc = -EINVAL;
--
2.26.2
2 years, 1 month
[PATCH v2] dax: print error message by pr_info() in __generic_fsdax_supported()
by Coly Li
In struct dax_operations, the callback routine dax_supported() returns
a bool type result. For false return value, the caller has no idea
whether the device does not support dax at all, or it is just some mis-
configuration issue.
An example is formatting an Ext4 file system on pmem device on top of
a NVDIMM namespace by,
# mkfs.ext4 /dev/pmem0
If the fs block size does not match kernel space memory page size (which
is possible on non-x86 platform), mount this Ext4 file system will fail,
# mount -o dax /dev/pmem0 /mnt
mount: /mnt: wrong fs type, bad option, bad superblock on /dev/pmem0,
missing codepage or helper program, or other error.
And from the dmesg output there is only the following information,
[ 307.853148] EXT4-fs (pmem0): DAX unsupported by block device.
The above information is quite confusing. Because definiately the pmem0
device supports dax operation, and the super block is consistent as how
it was created by mkfs.ext4.
Indeed the failure is from __generic_fsdax_supported() by the following
code piece,
if (blocksize != PAGE_SIZE) {
pr_debug("%s: error: unsupported blocksize for dax\n",
bdevname(bdev, buf));
return false;
}
It is because the Ext4 block size is 4KB and kernel page size is 8KB or
16KB.
It is not simple to make dax_supported() from struct dax_operations
or __generic_fsdax_supported() to return exact failure type right now.
So the simplest fix is to use pr_info() to print all the error messages
inside __generic_fsdax_supported(). Then users may find informative clue
from the kernel message at least.
Message printed by pr_debug() is very easy to be ignored by users. This
patch prints error message by pr_info() in __generic_fsdax_supported(),
when then mount fails, following lines can be found from dmesg output,
[ 2705.500885] pmem0: error: unsupported blocksize for dax
[ 2705.500888] EXT4-fs (pmem0): DAX unsupported by block device.
Now the users may have idea the mount failure is from pmem driver for
unsupported block size.
Reported-by: Michal Suchanek <msuchanek(a)suse.com>
Suggested-by: Jan Kara <jack(a)suse.com>
Signed-off-by: Coly Li <colyli(a)suse.de>
Reviewed-by: Jan Kara <jack(a)suse.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Anthony Iliopoulos <ailiopoulos(a)suse.com>
---
Changelog:
v2: Add reviewed-by from Jan Kara
v1: initial version.
drivers/dax/super.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 8e32345be0f7..de0d02ec0347 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -80,14 +80,14 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
int err, id;
if (blocksize != PAGE_SIZE) {
- pr_debug("%s: error: unsupported blocksize for dax\n",
+ pr_info("%s: error: unsupported blocksize for dax\n",
bdevname(bdev, buf));
return false;
}
err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
if (err) {
- pr_debug("%s: error: unaligned partition for dax\n",
+ pr_info("%s: error: unaligned partition for dax\n",
bdevname(bdev, buf));
return false;
}
@@ -95,7 +95,7 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512;
err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
if (err) {
- pr_debug("%s: error: unaligned partition for dax\n",
+ pr_info("%s: error: unaligned partition for dax\n",
bdevname(bdev, buf));
return false;
}
@@ -106,7 +106,7 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
dax_read_unlock(id);
if (len < 1 || len2 < 1) {
- pr_debug("%s: error: dax access failed (%ld)\n",
+ pr_info("%s: error: dax access failed (%ld)\n",
bdevname(bdev, buf), len < 1 ? len : len2);
return false;
}
@@ -139,7 +139,7 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
}
if (!dax_enabled) {
- pr_debug("%s: error: dax support not enabled\n",
+ pr_info("%s: error: dax support not enabled\n",
bdevname(bdev, buf));
return false;
}
--
2.26.2
2 years, 1 month
[PATCH] libnvdimm/security: Fix key lookup permissions
by Dan Williams
As of commit 8c0637e950d6 ("keys: Make the KEY_NEED_* perms an enum rather
than a mask") lookup_user_key() needs an explicit declaration of what it
wants to do with the key. Add KEY_NEED_SEARCH to fix a warning with the
below signature, and fixes the inability to retrieve a key.
WARNING: CPU: 15 PID: 6276 at security/keys/permission.c:35 key_task_permission+0xd3/0x140
[..]
RIP: 0010:key_task_permission+0xd3/0x140
[..]
Call Trace:
lookup_user_key+0xeb/0x6b0
? vsscanf+0x3df/0x840
? key_validate+0x50/0x50
? key_default_cmp+0x20/0x20
nvdimm_get_user_key_payload.part.0+0x21/0x110 [libnvdimm]
nvdimm_security_store+0x67d/0xb20 [libnvdimm]
security_store+0x67/0x1a0 [libnvdimm]
kernfs_fop_write+0xcf/0x1c0
vfs_write+0xde/0x1d0
ksys_write+0x68/0xe0
do_syscall_64+0x5c/0xa0
entry_SYSCALL_64_after_hwframe+0x49/0xb3
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Vishal Verma <vishal.l.verma(a)intel.com>
Cc: Dave Jiang <dave.jiang(a)intel.com>
Cc: Ira Weiny <ira.weiny(a)intel.com>
Suggested-by: David Howells <dhowells(a)redhat.com>
Fixes: 8c0637e950d6 ("keys: Make the KEY_NEED_* perms an enum rather than a mask")
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
drivers/nvdimm/security.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c
index 89b85970912d..4cef69bd3c1b 100644
--- a/drivers/nvdimm/security.c
+++ b/drivers/nvdimm/security.c
@@ -95,7 +95,7 @@ static struct key *nvdimm_lookup_user_key(struct nvdimm *nvdimm,
struct encrypted_key_payload *epayload;
struct device *dev = &nvdimm->dev;
- keyref = lookup_user_key(id, 0, 0);
+ keyref = lookup_user_key(id, 0, KEY_NEED_SEARCH);
if (IS_ERR(keyref))
return NULL;
2 years, 1 month
[PATCH] dax: print error message by pr_info() in __generic_fsdax_supported()
by Coly Li
In struct dax_operations, the callback routine dax_supported() returns
a bool type result. For false return value, the caller has no idea
whether the device does not support dax at all, or it is just some mis-
configuration issue.
An example is formatting an Ext4 file system on pmem device on top of
a NVDIMM namespace by,
# mkfs.ext4 /dev/pmem0
If the fs block size does not match kernel space memory page size (which
is possible on non-x86 platform), mount this Ext4 file system will fail,
# mount -o dax /dev/pmem0 /mnt
mount: /mnt: wrong fs type, bad option, bad superblock on /dev/pmem0,
missing codepage or helper program, or other error.
And from the dmesg output there is only the following information,
[ 307.853148] EXT4-fs (pmem0): DAX unsupported by block device.
The above information is quite confusing. Because definiately the pmem0
device supports dax operation, and the super block is consistent as how
it was created by mkfs.ext4.
Indeed the failure is from __generic_fsdax_supported() by the following
code piece,
if (blocksize != PAGE_SIZE) {
pr_debug("%s: error: unsupported blocksize for dax\n",
bdevname(bdev, buf));
return false;
}
It is because the Ext4 block size is 4KB and kernel page size is 8KB or
16KB.
It is not simple to make dax_supported() from struct dax_operations
or __generic_fsdax_supported() to return exact failure type right now.
So the simplest fix is to use pr_info() to print all the error messages
inside __generic_fsdax_supported(). Then users may find informative clue
from the kernel message at least.
Message printed by pr_debug() is very easy to be ignored by users. This
patch prints error message by pr_info() in __generic_fsdax_supported(),
when then mount fails, following lines can be found from dmesg output,
[ 2705.500885] pmem0: error: unsupported blocksize for dax
[ 2705.500888] EXT4-fs (pmem0): DAX unsupported by block device.
Now the users may have idea the mount failure is from pmem driver for
unsupported block size.
Reported-by: Michal Suchanek <msuchanek(a)suse.com>
Suggested-by: Jan Kara <jack(a)suse.com>
Signed-off-by: Coly Li <colyli(a)suse.de>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Anthony Iliopoulos <ailiopoulos(a)suse.com>
---
drivers/dax/super.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 8e32345be0f7..de0d02ec0347 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -80,14 +80,14 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
int err, id;
if (blocksize != PAGE_SIZE) {
- pr_debug("%s: error: unsupported blocksize for dax\n",
+ pr_info("%s: error: unsupported blocksize for dax\n",
bdevname(bdev, buf));
return false;
}
err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
if (err) {
- pr_debug("%s: error: unaligned partition for dax\n",
+ pr_info("%s: error: unaligned partition for dax\n",
bdevname(bdev, buf));
return false;
}
@@ -95,7 +95,7 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512;
err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
if (err) {
- pr_debug("%s: error: unaligned partition for dax\n",
+ pr_info("%s: error: unaligned partition for dax\n",
bdevname(bdev, buf));
return false;
}
@@ -106,7 +106,7 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
dax_read_unlock(id);
if (len < 1 || len2 < 1) {
- pr_debug("%s: error: dax access failed (%ld)\n",
+ pr_info("%s: error: dax access failed (%ld)\n",
bdevname(bdev, buf), len < 1 ? len : len2);
return false;
}
@@ -139,7 +139,7 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
}
if (!dax_enabled) {
- pr_debug("%s: error: dax support not enabled\n",
+ pr_info("%s: error: dax support not enabled\n",
bdevname(bdev, buf));
return false;
}
--
2.26.2
2 years, 1 month