diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 866853abebea..2532294bbd68 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -35,16 +35,6 @@ static bool force_enable_dimms; module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status"); -static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT; -module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR); -MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds"); - -/* after three payloads of overflow, it's dead jim */ -static unsigned int scrub_overflow_abort = 3; -module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR); -MODULE_PARM_DESC(scrub_overflow_abort, - "Number of times we overflow ARS results before abort"); - static bool disable_vendor_specific; module_param(disable_vendor_specific, bool, S_IRUGO); MODULE_PARM_DESC(disable_vendor_specific, @@ -1251,7 +1241,7 @@ static ssize_t scrub_show(struct device *dev, mutex_lock(&acpi_desc->init_mutex); rc = sprintf(buf, "%d%s", acpi_desc->scrub_count, - work_busy(&acpi_desc->work) + work_busy(&acpi_desc->dwork.work) && !acpi_desc->cancel ? "+\n" : "\n"); mutex_unlock(&acpi_desc->init_mutex); } @@ -2452,7 +2442,8 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa memset(&ars_start, 0, sizeof(ars_start)); ars_start.address = spa->address; ars_start.length = spa->length; - ars_start.flags = acpi_desc->ars_start_flags; + if (test_bit(ARS_SHORT, &nfit_spa->ars_state)) + ars_start.flags = ND_ARS_RETURN_PREV_DATA; if (nfit_spa_type(spa) == NFIT_SPA_PM) ars_start.type = ND_ARS_PERSISTENT; else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) @@ -2500,6 +2491,52 @@ static int ars_get_status(struct acpi_nfit_desc *acpi_desc) return cmd_rc; } +static void ars_complete(struct acpi_nfit_desc *acpi_desc, + struct nfit_spa *nfit_spa) +{ + struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status; + struct acpi_nfit_system_address *spa = nfit_spa->spa; + struct nd_region *nd_region = nfit_spa->nd_region; + struct device *dev; + + if ((ars_status->address >= spa->address && ars_status->address + < spa->address + spa->length) + || (ars_status->address < spa->address)) { + /* + * Assume that if a scrub starts at an offset from the + * start of nfit_spa that we are in the continuation + * case. + * + * Otherwise, if the scrub covers the spa range, mark + * any pending request complete. + */ + if (ars_status->address + ars_status->length + >= spa->address + spa->length) + /* complete */; + else + return; + } else + return; + + if (test_bit(ARS_DONE, &nfit_spa->ars_state)) + return; + + if (!test_and_clear_bit(ARS_REQ, &nfit_spa->ars_state)) + return; + + if (nd_region) { + dev = nd_region_dev(nd_region); + nvdimm_region_notify(nd_region, NVDIMM_REVALIDATE_POISON); + } else + dev = acpi_desc->dev; + + dev_dbg(dev, "ARS: range %d %s complete\n", spa->range_index, + test_bit(ARS_SHORT, &nfit_spa->ars_state) + ? "short" : "long"); + clear_bit(ARS_SHORT, &nfit_spa->ars_state); + set_bit(ARS_DONE, &nfit_spa->ars_state); +} + static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc) { struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus; @@ -2764,6 +2801,7 @@ static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc) return -ENOMEM; rc = ars_get_status(acpi_desc); + if (rc < 0 && rc != -ENOSPC) return rc; @@ -2773,223 +2811,125 @@ static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc) return 0; } -static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc, - struct nfit_spa *nfit_spa) +static int ars_register(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa, + int *query_rc) { - struct acpi_nfit_system_address *spa = nfit_spa->spa; - unsigned int overflow_retry = scrub_overflow_abort; - u64 init_ars_start = 0, init_ars_len = 0; - struct device *dev = acpi_desc->dev; - unsigned int tmo = scrub_timeout; - int rc; + int rc = *query_rc; - if (!test_bit(ARS_REQ, &nfit_spa->ars_state) || !nfit_spa->nd_region) - return; - - rc = ars_start(acpi_desc, nfit_spa); - /* - * If we timed out the initial scan we'll still be busy here, - * and will wait another timeout before giving up permanently. - */ - if (rc < 0 && rc != -EBUSY) - return; - - do { - u64 ars_start, ars_len; - - if (acpi_desc->cancel) - break; - rc = acpi_nfit_query_poison(acpi_desc); - if (rc == -ENOTTY) - break; - if (rc == -EBUSY && !tmo) { - dev_warn(dev, "range %d ars timeout, aborting\n", - spa->range_index); - break; - } + set_bit(ARS_REQ, &nfit_spa->ars_state); + set_bit(ARS_SHORT, &nfit_spa->ars_state); + switch (rc) { + case 0: + case -EAGAIN: + rc = ars_start(acpi_desc, nfit_spa); if (rc == -EBUSY) { - /* - * Note, entries may be appended to the list - * while the lock is dropped, but the workqueue - * being active prevents entries being deleted / - * freed. - */ - mutex_unlock(&acpi_desc->init_mutex); - ssleep(1); - tmo--; - mutex_lock(&acpi_desc->init_mutex); - continue; - } - - /* we got some results, but there are more pending... */ - if (rc == -ENOSPC && overflow_retry--) { - if (!init_ars_len) { - init_ars_len = acpi_desc->ars_status->length; - init_ars_start = acpi_desc->ars_status->address; - } - rc = ars_continue(acpi_desc); - } - - if (rc < 0) { - dev_warn(dev, "range %d ars continuation failed\n", - spa->range_index); + *query_rc = rc; + break; + } else if (rc == 0) { + rc = acpi_nfit_query_poison(acpi_desc); + } else { + set_bit(ARS_FAILED, &nfit_spa->ars_state); break; } - - if (init_ars_len) { - ars_start = init_ars_start; - ars_len = init_ars_len; - } else { - ars_start = acpi_desc->ars_status->address; - ars_len = acpi_desc->ars_status->length; - } - dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n", - spa->range_index, ars_start, ars_len); - /* notify the region about new poison entries */ - nvdimm_region_notify(nfit_spa->nd_region, - NVDIMM_REVALIDATE_POISON); + if (rc == -EAGAIN) + clear_bit(ARS_SHORT, &nfit_spa->ars_state); + else if (rc == 0) + ars_complete(acpi_desc, nfit_spa); break; - } while (1); + case -EBUSY: + case -ENOSPC: + break; + default: + set_bit(ARS_FAILED, &nfit_spa->ars_state); + break; + } + + if (test_and_clear_bit(ARS_DONE, &nfit_spa->ars_state)) + set_bit(ARS_REQ, &nfit_spa->ars_state); + + return acpi_nfit_register_region(acpi_desc, nfit_spa); +} + +static void ars_complete_all(struct acpi_nfit_desc *acpi_desc) +{ + struct nfit_spa *nfit_spa; + + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { + if (test_bit(ARS_FAILED, &nfit_spa->ars_state)) + continue; + ars_complete(acpi_desc, nfit_spa); + } +} + +static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc, + int query_rc) +{ + unsigned int tmo = acpi_desc->scrub_tmo; + struct device *dev = acpi_desc->dev; + struct nfit_spa *nfit_spa; + + if (acpi_desc->cancel) + return 0; + + if (query_rc == -EBUSY) { + dev_dbg(dev, "ARS: ARS busy\n"); + return min(30U * 60U, tmo * 2); + } + if (query_rc == -ENOSPC) { + dev_dbg(dev, "ARS: ARS continue\n"); + ars_continue(acpi_desc); + return 1; + } + if (query_rc && query_rc != -EAGAIN) { + unsigned long long addr, end; + + addr = acpi_desc->ars_status->address; + end = addr + acpi_desc->ars_status->length; + dev_dbg(dev, "ARS: %llx-%llx failed (%d)\n", addr, end, + query_rc); + } + + ars_complete_all(acpi_desc); + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { + if (test_bit(ARS_FAILED, &nfit_spa->ars_state)) + continue; + if (test_bit(ARS_REQ, &nfit_spa->ars_state)) { + int rc = ars_start(acpi_desc, nfit_spa); + + clear_bit(ARS_DONE, &nfit_spa->ars_state); + dev = nd_region_dev(nfit_spa->nd_region); + dev_dbg(dev, "ARS: range %d ARS start (%d)\n", + nfit_spa->spa->range_index, rc); + if (rc == 0 || rc == -EBUSY) + return 1; + dev_err(dev, "ARS: range %d ARS failed (%d)\n", + nfit_spa->spa->range_index, rc); + set_bit(ARS_FAILED, &nfit_spa->ars_state); + } + } + return 0; } static void acpi_nfit_scrub(struct work_struct *work) { - struct device *dev; - u64 init_scrub_length = 0; - struct nfit_spa *nfit_spa; - u64 init_scrub_address = 0; - bool init_ars_done = false; struct acpi_nfit_desc *acpi_desc; - unsigned int tmo = scrub_timeout; - unsigned int overflow_retry = scrub_overflow_abort; + unsigned int tmo; + int query_rc; - acpi_desc = container_of(work, typeof(*acpi_desc), work); - dev = acpi_desc->dev; - - /* - * We scrub in 2 phases. The first phase waits for any platform - * firmware initiated scrubs to complete and then we go search for the - * affected spa regions to mark them scanned. In the second phase we - * initiate a directed scrub for every range that was not scrubbed in - * phase 1. If we're called for a 'rescan', we harmlessly pass through - * the first phase, but really only care about running phase 2, where - * regions can be notified of new poison. - */ - - /* process platform firmware initiated scrubs */ - retry: + acpi_desc = container_of(work, typeof(*acpi_desc), dwork.work); mutex_lock(&acpi_desc->init_mutex); - list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { - struct nd_cmd_ars_status *ars_status; - struct acpi_nfit_system_address *spa; - u64 ars_start, ars_len; - int rc; - - if (acpi_desc->cancel) - break; - - if (nfit_spa->nd_region) - continue; - - if (init_ars_done) { - /* - * No need to re-query, we're now just - * reconciling all the ranges covered by the - * initial scrub - */ - rc = 0; - } else - rc = acpi_nfit_query_poison(acpi_desc); - - if (rc == -ENOTTY) { - /* no ars capability, just register spa and move on */ - acpi_nfit_register_region(acpi_desc, nfit_spa); - continue; - } - - if (rc == -EBUSY && !tmo) { - /* fallthrough to directed scrub in phase 2 */ - dev_warn(dev, "timeout awaiting ars results, continuing...\n"); - break; - } else if (rc == -EBUSY) { - mutex_unlock(&acpi_desc->init_mutex); - ssleep(1); - tmo--; - goto retry; - } - - /* we got some results, but there are more pending... */ - if (rc == -ENOSPC && overflow_retry--) { - ars_status = acpi_desc->ars_status; - /* - * Record the original scrub range, so that we - * can recall all the ranges impacted by the - * initial scrub. - */ - if (!init_scrub_length) { - init_scrub_length = ars_status->length; - init_scrub_address = ars_status->address; - } - rc = ars_continue(acpi_desc); - if (rc == 0) { - mutex_unlock(&acpi_desc->init_mutex); - goto retry; - } - } - - if (rc < 0) { - /* - * Initial scrub failed, we'll give it one more - * try below... - */ - break; - } - - /* We got some final results, record completed ranges */ - ars_status = acpi_desc->ars_status; - if (init_scrub_length) { - ars_start = init_scrub_address; - ars_len = ars_start + init_scrub_length; - } else { - ars_start = ars_status->address; - ars_len = ars_status->length; - } - spa = nfit_spa->spa; - - if (!init_ars_done) { - init_ars_done = true; - dev_dbg(dev, "init scrub %#llx + %#llx complete\n", - ars_start, ars_len); - } - if (ars_start <= spa->address && ars_start + ars_len - >= spa->address + spa->length) - acpi_nfit_register_region(acpi_desc, nfit_spa); + query_rc = acpi_nfit_query_poison(acpi_desc); + tmo = __acpi_nfit_scrub(acpi_desc, query_rc); + if (tmo) { + queue_delayed_work(nfit_wq, &acpi_desc->dwork, tmo * HZ); + acpi_desc->scrub_tmo = tmo; + } else { + acpi_desc->scrub_count++; + if (acpi_desc->scrub_count_state) + sysfs_notify_dirent(acpi_desc->scrub_count_state); } - - /* - * For all the ranges not covered by an initial scrub we still - * want to see if there are errors, but it's ok to discover them - * asynchronously. - */ - list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { - /* - * Flag all the ranges that still need scrubbing, but - * register them now to make data available. - */ - if (!nfit_spa->nd_region) { - set_bit(ARS_REQ, &nfit_spa->ars_state); - acpi_nfit_register_region(acpi_desc, nfit_spa); - } - } - acpi_desc->init_complete = 1; - - list_for_each_entry(nfit_spa, &acpi_desc->spas, list) - acpi_nfit_async_scrub(acpi_desc, nfit_spa); - acpi_desc->scrub_count++; - acpi_desc->ars_start_flags = 0; - if (acpi_desc->scrub_count_state) - sysfs_notify_dirent(acpi_desc->scrub_count_state); + memset(acpi_desc->ars_status, 0, acpi_desc->max_ars); mutex_unlock(&acpi_desc->init_mutex); } @@ -3015,33 +2955,61 @@ static void acpi_nfit_init_ars(struct acpi_nfit_desc *acpi_desc, nfit_spa->max_ars = ars_cap.max_ars_out; nfit_spa->clear_err_unit = ars_cap.clear_err_unit; acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars); + clear_bit(ARS_FAILED, &nfit_spa->ars_state); + set_bit(ARS_REQ, &nfit_spa->ars_state); } - static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) { struct nfit_spa *nfit_spa; + int rc, query_rc; list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { - int rc, type = nfit_spa_type(nfit_spa->spa); - - /* PMEM and VMEM will be registered by the ARS workqueue */ - if (type == NFIT_SPA_PM || type == NFIT_SPA_VOLATILE) { + set_bit(ARS_FAILED, &nfit_spa->ars_state); + switch (nfit_spa_type(nfit_spa->spa)) { + case NFIT_SPA_VOLATILE: + case NFIT_SPA_PM: acpi_nfit_init_ars(acpi_desc, nfit_spa); - continue; + break; } - /* BLK apertures belong to BLK region registration below */ - if (type == NFIT_SPA_BDW) - continue; - /* BLK regions don't need to wait for ARS results */ - rc = acpi_nfit_register_region(acpi_desc, nfit_spa); - if (rc) - return rc; } - acpi_desc->ars_start_flags = 0; - if (!acpi_desc->cancel) - queue_work(nfit_wq, &acpi_desc->work); + /* + * Reap any results that might be pending before starting new + * short requests. + */ + query_rc = acpi_nfit_query_poison(acpi_desc); + if (query_rc == 0) + ars_complete_all(acpi_desc); + + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) + switch (nfit_spa_type(nfit_spa->spa)) { + case NFIT_SPA_VOLATILE: + case NFIT_SPA_PM: + /* register regions and kick off initial ARS run */ + rc = ars_register(acpi_desc, nfit_spa, &query_rc); + if (rc) + return rc; + break; + case NFIT_SPA_BDW: + /* nothing to register */ + break; + case NFIT_SPA_DCR: + case NFIT_SPA_VDISK: + case NFIT_SPA_VCD: + case NFIT_SPA_PDISK: + case NFIT_SPA_PCD: + /* register known regions that don't support ARS */ + rc = acpi_nfit_register_region(acpi_desc, nfit_spa); + if (rc) + return rc; + break; + default: + /* don't register unknown regions */ + break; + } + + queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0); return 0; } @@ -3176,49 +3144,20 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz) } EXPORT_SYMBOL_GPL(acpi_nfit_init); -struct acpi_nfit_flush_work { - struct work_struct work; - struct completion cmp; -}; - -static void flush_probe(struct work_struct *work) -{ - struct acpi_nfit_flush_work *flush; - - flush = container_of(work, typeof(*flush), work); - complete(&flush->cmp); -} - static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) { struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); struct device *dev = acpi_desc->dev; - struct acpi_nfit_flush_work flush; - int rc; - /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */ + /* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */ device_lock(dev); device_unlock(dev); - /* bounce the init_mutex to make init_complete valid */ + /* Bounce the init_mutex to complete initial registration */ mutex_lock(&acpi_desc->init_mutex); - if (acpi_desc->cancel || acpi_desc->init_complete) { - mutex_unlock(&acpi_desc->init_mutex); - return 0; - } - - /* - * Scrub work could take 10s of seconds, userspace may give up so we - * need to be interruptible while waiting. - */ - INIT_WORK_ONSTACK(&flush.work, flush_probe); - init_completion(&flush.cmp); - queue_work(nfit_wq, &flush.work); mutex_unlock(&acpi_desc->init_mutex); - rc = wait_for_completion_interruptible(&flush.cmp); - cancel_work_sync(&flush.work); - return rc; + return 0; } static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, @@ -3237,7 +3176,7 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, * just needs guarantees that any ars it initiates are not * interrupted by any intervening start reqeusts from userspace. */ - if (work_busy(&acpi_desc->work)) + if (work_busy(&acpi_desc->dwork.work)) return -EBUSY; return 0; @@ -3246,11 +3185,9 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags) { struct device *dev = acpi_desc->dev; + int scheduled = 0, busy = 0; struct nfit_spa *nfit_spa; - if (work_busy(&acpi_desc->work)) - return -EBUSY; - mutex_lock(&acpi_desc->init_mutex); if (acpi_desc->cancel) { mutex_unlock(&acpi_desc->init_mutex); @@ -3258,21 +3195,32 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags) } list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { - struct acpi_nfit_system_address *spa = nfit_spa->spa; + int type = nfit_spa_type(nfit_spa->spa); - if (nfit_spa_type(spa) != NFIT_SPA_PM) + if (type != NFIT_SPA_PM && type != NFIT_SPA_VOLATILE) + continue; + if (test_bit(ARS_FAILED, &nfit_spa->ars_state)) continue; - set_bit(ARS_REQ, &nfit_spa->ars_state); + if (test_and_set_bit(ARS_REQ, &nfit_spa->ars_state)) + busy++; + else { + if (test_bit(ARS_SHORT, &flags)) + set_bit(ARS_SHORT, &nfit_spa->ars_state); + scheduled++; + } + } + if (scheduled) { + queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0); + dev_dbg(dev, "ars_scan triggered\n"); } - acpi_desc->ars_start_flags = 0; - if (test_bit(ARS_SHORT, &flags)) - acpi_desc->ars_start_flags |= ND_ARS_RETURN_PREV_DATA; - queue_work(nfit_wq, &acpi_desc->work); - dev_dbg(dev, "ars_scan triggered\n"); mutex_unlock(&acpi_desc->init_mutex); - return 0; + if (scheduled) + return 0; + if (busy) + return -EBUSY; + return -ENOTTY; } void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) @@ -3299,7 +3247,8 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) INIT_LIST_HEAD(&acpi_desc->dimms); INIT_LIST_HEAD(&acpi_desc->list); mutex_init(&acpi_desc->init_mutex); - INIT_WORK(&acpi_desc->work, acpi_nfit_scrub); + acpi_desc->scrub_tmo = 1; + INIT_DELAYED_WORK(&acpi_desc->dwork, acpi_nfit_scrub); } EXPORT_SYMBOL_GPL(acpi_nfit_desc_init); @@ -3323,6 +3272,7 @@ void acpi_nfit_shutdown(void *data) mutex_lock(&acpi_desc->init_mutex); acpi_desc->cancel = 1; + cancel_delayed_work_sync(&acpi_desc->dwork); mutex_unlock(&acpi_desc->init_mutex); /* diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index 45e7949986a8..7d15856a739f 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -197,18 +197,18 @@ struct acpi_nfit_desc { struct device *dev; u8 ars_start_flags; struct nd_cmd_ars_status *ars_status; - struct work_struct work; + struct delayed_work dwork; struct list_head list; struct kernfs_node *scrub_count_state; unsigned int max_ars; unsigned int scrub_count; unsigned int scrub_mode; unsigned int cancel:1; - unsigned int init_complete:1; unsigned long dimm_cmd_force_en; unsigned long bus_cmd_force_en; unsigned long bus_nfit_cmd_force_en; unsigned int platform_cap; + unsigned int scrub_tmo; int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, void *iobuf, u64 len, int rw); };