Hi Vishal,
I was looking at acpi_nfit_find_poison() and if I'm reading this
right, I think it's throwing away some ARS results and re-running
an ARS unnecessarily. More comments below...
-- ljk
static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc,
struct nd_region_desc *ndr_desc)
{
struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus;
struct nd_cmd_ars_status *ars_status = NULL;
struct nd_cmd_ars_start *ars_start = NULL;
struct nd_cmd_ars_cap *ars_cap = NULL;
u64 start, len, cur, remaining;
int rc;
ars_cap = kzalloc(sizeof(*ars_cap), GFP_KERNEL);
if (!ars_cap)
return -ENOMEM;
start = ndr_desc->res->start;
len = ndr_desc->res->end - ndr_desc->res->start + 1;
rc = ars_get_cap(nd_desc, ars_cap, start, len);
if (rc)
goto out;
/*
* If ARS is unsupported, or if the 'Persistent Memory Scrub' flag in
* extended status is not set, skip this but continue initialization
*/
if ((ars_cap->status & 0xffff) ||
!(ars_cap->status >> 16 & ND_ARS_PERSISTENT)) {
dev_warn(acpi_desc->dev,
"ARS unsupported (status: 0x%x), won't create an error list\n",
ars_cap->status);
goto out;
}
/*
* Check if a full-range ARS has been run. If so, use those results
* without having to start a new ARS.
*/
ars_status = kzalloc(ars_cap->max_ars_out + sizeof(*ars_status),
GFP_KERNEL);
if (!ars_status) {
rc = -ENOMEM;
goto out;
}
rc = ars_get_status(nd_desc, ars_status);
if (rc)
goto out;
if (ars_status->address <= start &&
(ars_status->address + ars_status->length >= start + len)) {
rc = ars_status_process_records(nvdimm_bus, ars_status, start);
goto out;
}
The above code will process the records if the ARS ran to completion but
not if the ARS overflowed. It won't process partial results because it's
checking both the start and the length against the total range.
/*
* ARS_STATUS can overflow if the number of poison entries found is
* greater than the maximum buffer size (ars_cap->max_ars_out)
* To detect overflow, check if the length field of ars_status
* is less than the length we supplied. If so, process the
* error entries we got, adjust the start point, and start again
*/
This comment seems like the right idea but that's not what it's doing.
ars_start = kzalloc(sizeof(*ars_start), GFP_KERNEL);
if (!ars_start)
return -ENOMEM;
cur = start;
remaining = len;
If we get here, we're starting over at the beginning, losing the
previous results. Shouldn't we process the previous results and
then enter this loop using
cur = ars_status->address + ars_status->length;
remaining = len - ars_status->length;
?
Or restructure the loop so that the existing results, if any, are
processed before doing an ars_do_start()? Or did I miss something?
do {
u64 done, end;
rc = ars_do_start(nd_desc, ars_start, cur, remaining);
if (rc)
goto out;
rc = ars_get_status(nd_desc, ars_status);
if (rc)
goto out;
rc = ars_status_process_records(nvdimm_bus, ars_status, cur);
if (rc)
goto out;
end = min(cur + remaining,
ars_status->address + ars_status->length);
done = end - cur;
cur += done;
remaining -= done;
} while (remaining);
out:
kfree(ars_cap);
kfree(ars_start);
kfree(ars_status);
return rc;
}