From bc94638886ab21f8247d3f7f39573d3feb7d8284 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 13 Dec 2019 09:55:14 +0100 Subject: [PATCH 01/27] ACPI: processor: Export function to claim _CST control The intel_idle driver will be modified to use ACPI _CST subsequently and it will need to notify the platform firmware of that if acpi_gbl_FADT.cst_control is set, so add a routine for this purpose, acpi_processor_claim_cst_control(), to acpi_processor.c (so that it is always present which is required by intel_idle) and export it to allow the ACPI processor driver (which is modular) to call it. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_processor.c | 25 +++++++++++++++++++++++++ drivers/acpi/processor_idle.c | 12 ++++-------- include/linux/acpi.h | 6 ++++++ 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 2c4dda0787e8..8a53f3c5b70e 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -705,3 +705,28 @@ void __init acpi_processor_init(void) acpi_scan_add_handler_with_hotplug(&processor_handler, "processor"); acpi_scan_add_handler(&processor_container_handler); } + +#ifdef CONFIG_ACPI_PROCESSOR_CSTATE +/** + * acpi_processor_claim_cst_control - Request _CST control from the platform. + */ +bool acpi_processor_claim_cst_control(void) +{ + static bool cst_control_claimed; + acpi_status status; + + if (!acpi_gbl_FADT.cst_control || cst_control_claimed) + return true; + + status = acpi_os_write_port(acpi_gbl_FADT.smi_command, + acpi_gbl_FADT.cst_control, 8); + if (ACPI_FAILURE(status)) { + pr_warn("ACPI: Failed to claim processor _CST control\n"); + return false; + } + + cst_control_claimed = true; + return true; +} +EXPORT_SYMBOL_GPL(acpi_processor_claim_cst_control); +#endif /* CONFIG_ACPI_PROCESSOR_CSTATE */ diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 2ae95df2e74f..dd737d836c03 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -909,7 +909,6 @@ static int acpi_processor_setup_cstates(struct acpi_processor *pr) static inline void acpi_processor_cstate_first_run_checks(void) { - acpi_status status; static int first_run; if (first_run) @@ -921,13 +920,10 @@ static inline void acpi_processor_cstate_first_run_checks(void) max_cstate); first_run++; - if (acpi_gbl_FADT.cst_control && !nocst) { - status = acpi_os_write_port(acpi_gbl_FADT.smi_command, - acpi_gbl_FADT.cst_control, 8); - if (ACPI_FAILURE(status)) - ACPI_EXCEPTION((AE_INFO, status, - "Notifying BIOS of _CST ability failed")); - } + if (nocst) + return; + + acpi_processor_claim_cst_control(); } #else diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 0f37a7d5fa77..ee39b05e7f76 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -279,6 +279,12 @@ static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id) /* Validate the processor object's proc_id */ bool acpi_duplicate_processor_id(int proc_id); +/* Processor _CTS control */ +#ifdef CONFIG_ACPI_PROCESSOR_CSTATE +bool acpi_processor_claim_cst_control(void); +#else +static inline bool acpi_processor_claim_cst_control(void) { return false; } +#endif #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ From 987c785319b99e32602f7f86cfae3cf9b81e402b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 13 Dec 2019 09:55:24 +0100 Subject: [PATCH 02/27] ACPI: processor: Introduce acpi_processor_evaluate_cst() In order to separate the ACPI _CST evaluation from checks specific to the ACPI processor driver, move the majority of the acpi_processor_get_power_info_cst() function body to a new function, acpi_processor_evaluate_cst(), that will extract the C-states information from _CST output, and redefine acpi_processor_get_power_info_cst() as a wrapper around it. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 54 +++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index dd737d836c03..e92d0e6d4cd1 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -297,21 +297,17 @@ static int acpi_processor_get_power_info_default(struct acpi_processor *pr) return 0; } -static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) +static int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu, + struct acpi_processor_power *info) { - acpi_status status; - u64 count; - int current_count; - int i, ret = 0; struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *cst; + acpi_status status; + u64 count; + int current_count = 0; + int i, ret = 0; - if (nocst) - return -ENODEV; - - current_count = 0; - - status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer); + status = acpi_evaluate_object(handle, "_CST", NULL, &buffer); if (ACPI_FAILURE(status)) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _CST, giving up\n")); return -ENODEV; @@ -335,9 +331,6 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) goto end; } - /* Tell driver that at least _CST is supported. */ - pr->flags.has_cst = 1; - for (i = 1; i <= count; i++) { union acpi_object *element; union acpi_object *obj; @@ -383,7 +376,7 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) cx.entry_method = ACPI_CSTATE_SYSTEMIO; if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { if (acpi_processor_ffh_cstate_probe - (pr->id, &cx, reg) == 0) { + (cpu, &cx, reg) == 0) { cx.entry_method = ACPI_CSTATE_FFH; } else if (cx.type == ACPI_STATE_C1) { /* @@ -432,7 +425,7 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) continue; current_count++; - memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx)); + memcpy(&info->states[current_count], &cx, sizeof(cx)); /* * We support total ACPI_PROCESSOR_MAX_POWER - 1 @@ -446,12 +439,9 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) } } - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d power states\n", - current_count)); + acpi_handle_info(handle, "Found %d idle states\n", current_count); - /* Validate number of power states discovered */ - if (current_count < 2) - ret = -EFAULT; + info->count = current_count; end: kfree(buffer.pointer); @@ -459,6 +449,28 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) return ret; } +static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) +{ + int ret; + + if (nocst) + return -ENODEV; + + ret = acpi_processor_evaluate_cst(pr->handle, pr->id, &pr->power); + if (ret) + return ret; + + /* + * It is expected that there will be at least 2 states, C1 and + * something else (C2 or C3), so fail if that is not the case. + */ + if (pr->power.count < 2) + return -EFAULT; + + pr->flags.has_cst = 1; + return 0; +} + static void acpi_processor_power_verify_c3(struct acpi_processor *pr, struct acpi_processor_cx *cx) { From aa659a3fca79abd9a3ca3ddc535db631b01c9a02 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 13 Dec 2019 09:55:33 +0100 Subject: [PATCH 03/27] ACPI: processor: Clean up acpi_processor_evaluate_cst() Clean up acpi_processor_evaluate_cst() in multiple ways: * Rename current_count to last_index which matches the purpose of the variable better. * Consistently use acpi_handle_*() for printing messages and make the messages cleaner. * Drop redundant parens and braces. * Rewrite and clarify comments. * Rearrange checks and drop the redundant ones. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 114 ++++++++++++++++------------------ 1 file changed, 52 insertions(+), 62 deletions(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index e92d0e6d4cd1..7c2fe3b2ec31 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -304,29 +304,29 @@ static int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu, union acpi_object *cst; acpi_status status; u64 count; - int current_count = 0; + int last_index = 0; int i, ret = 0; status = acpi_evaluate_object(handle, "_CST", NULL, &buffer); if (ACPI_FAILURE(status)) { - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _CST, giving up\n")); + acpi_handle_debug(handle, "No _CST\n"); return -ENODEV; } cst = buffer.pointer; - /* There must be at least 2 elements */ - if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) { - pr_err("not enough elements in _CST\n"); + /* There must be at least 2 elements. */ + if (!cst || cst->type != ACPI_TYPE_PACKAGE || cst->package.count < 2) { + acpi_handle_warn(handle, "Invalid _CST output\n"); ret = -EFAULT; goto end; } count = cst->package.elements[0].integer.value; - /* Validate number of power states. */ + /* Validate the number of C-states. */ if (count < 1 || count != cst->package.count - 1) { - pr_err("count given by _CST is not valid\n"); + acpi_handle_warn(handle, "Inconsistent _CST data\n"); ret = -EFAULT; goto end; } @@ -337,111 +337,101 @@ static int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu, struct acpi_power_register *reg; struct acpi_processor_cx cx; + /* + * If there is not enough space for all C-states, skip the + * excess ones and log a warning. + */ + if (last_index >= ACPI_PROCESSOR_MAX_POWER - 1) { + acpi_handle_warn(handle, + "No room for more idle states (limit: %d)\n", + ACPI_PROCESSOR_MAX_POWER - 1); + break; + } + memset(&cx, 0, sizeof(cx)); - element = &(cst->package.elements[i]); + element = &cst->package.elements[i]; if (element->type != ACPI_TYPE_PACKAGE) continue; if (element->package.count != 4) continue; - obj = &(element->package.elements[0]); + obj = &element->package.elements[0]; if (obj->type != ACPI_TYPE_BUFFER) continue; reg = (struct acpi_power_register *)obj->buffer.pointer; - if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO && - (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) - continue; - - /* There should be an easy way to extract an integer... */ - obj = &(element->package.elements[1]); + obj = &element->package.elements[1]; if (obj->type != ACPI_TYPE_INTEGER) continue; cx.type = obj->integer.value; /* - * Some buggy BIOSes won't list C1 in _CST - - * Let acpi_processor_get_power_info_default() handle them later + * There are known cases in which the _CST output does not + * contain C1, so if the type of the first state found is not + * C1, leave an empty slot for C1 to be filled in later. */ if (i == 1 && cx.type != ACPI_STATE_C1) - current_count++; + last_index = 1; cx.address = reg->address; - cx.index = current_count + 1; + cx.index = last_index + 1; - cx.entry_method = ACPI_CSTATE_SYSTEMIO; if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { - if (acpi_processor_ffh_cstate_probe - (cpu, &cx, reg) == 0) { - cx.entry_method = ACPI_CSTATE_FFH; + if (!acpi_processor_ffh_cstate_probe(cpu, &cx, reg)) { + /* + * In the majority of cases _CST describes C1 as + * a FIXED_HARDWARE C-state, but if the command + * line forbids using MWAIT, use CSTATE_HALT for + * C1 regardless. + */ + if (cx.type == ACPI_STATE_C1 && + boot_option_idle_override == IDLE_NOMWAIT) { + cx.entry_method = ACPI_CSTATE_HALT; + snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT"); + } else { + cx.entry_method = ACPI_CSTATE_FFH; + } } else if (cx.type == ACPI_STATE_C1) { /* - * C1 is a special case where FIXED_HARDWARE - * can be handled in non-MWAIT way as well. - * In that case, save this _CST entry info. - * Otherwise, ignore this info and continue. + * In the special case of C1, FIXED_HARDWARE can + * be handled by executing the HLT instruction. */ cx.entry_method = ACPI_CSTATE_HALT; snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT"); } else { continue; } - if (cx.type == ACPI_STATE_C1 && - (boot_option_idle_override == IDLE_NOMWAIT)) { - /* - * In most cases the C1 space_id obtained from - * _CST object is FIXED_HARDWARE access mode. - * But when the option of idle=halt is added, - * the entry_method type should be changed from - * CSTATE_FFH to CSTATE_HALT. - * When the option of idle=nomwait is added, - * the C1 entry_method type should be - * CSTATE_HALT. - */ - cx.entry_method = ACPI_CSTATE_HALT; - snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT"); - } - } else { + } else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { + cx.entry_method = ACPI_CSTATE_SYSTEMIO; snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI IOPORT 0x%x", cx.address); + } else { + continue; } - if (cx.type == ACPI_STATE_C1) { + if (cx.type == ACPI_STATE_C1) cx.valid = 1; - } - obj = &(element->package.elements[2]); + obj = &element->package.elements[2]; if (obj->type != ACPI_TYPE_INTEGER) continue; cx.latency = obj->integer.value; - obj = &(element->package.elements[3]); + obj = &element->package.elements[3]; if (obj->type != ACPI_TYPE_INTEGER) continue; - current_count++; - memcpy(&info->states[current_count], &cx, sizeof(cx)); - - /* - * We support total ACPI_PROCESSOR_MAX_POWER - 1 - * (From 1 through ACPI_PROCESSOR_MAX_POWER - 1) - */ - if (current_count >= (ACPI_PROCESSOR_MAX_POWER - 1)) { - pr_warn("Limiting number of power states to max (%d)\n", - ACPI_PROCESSOR_MAX_POWER); - pr_warn("Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n"); - break; - } + memcpy(&info->states[++last_index], &cx, sizeof(cx)); } - acpi_handle_info(handle, "Found %d idle states\n", current_count); + acpi_handle_info(handle, "Found %d idle states\n", last_index); - info->count = current_count; + info->count = last_index; end: kfree(buffer.pointer); From 22c48a439d6ac1dbe2db1092b64052592a47b017 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sun, 15 Dec 2019 13:02:05 +0000 Subject: [PATCH 04/27] cpuidle: clps711x: convert to devm_platform_ioremap_resource() Use devm_platform_ioremap_resource() to simplify code. Signed-off-by: Yangtao Li Acked-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-clps711x.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/cpuidle/cpuidle-clps711x.c b/drivers/cpuidle/cpuidle-clps711x.c index 6e36740f5719..fc22c59b6c73 100644 --- a/drivers/cpuidle/cpuidle-clps711x.c +++ b/drivers/cpuidle/cpuidle-clps711x.c @@ -37,10 +37,7 @@ static struct cpuidle_driver clps711x_idle_driver = { static int __init clps711x_cpuidle_probe(struct platform_device *pdev) { - struct resource *res; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - clps711x_halt = devm_ioremap_resource(&pdev->dev, res); + clps711x_halt = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(clps711x_halt)) return PTR_ERR(clps711x_halt); From 85c3ebd4a0510c0700e4f3e205bbb46757a53feb Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sun, 15 Dec 2019 13:02:06 +0000 Subject: [PATCH 05/27] cpuidle: kirkwood: convert to devm_platform_ioremap_resource() Use devm_platform_ioremap_resource() to simplify code. Signed-off-by: Yangtao Li Reviewed-by: Andrew Lunn Acked-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-kirkwood.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/cpuidle/cpuidle-kirkwood.c b/drivers/cpuidle/cpuidle-kirkwood.c index d23d8f468c12..511c4f46027a 100644 --- a/drivers/cpuidle/cpuidle-kirkwood.c +++ b/drivers/cpuidle/cpuidle-kirkwood.c @@ -55,10 +55,7 @@ static struct cpuidle_driver kirkwood_idle_driver = { /* Initialize CPU idle by registering the idle states */ static int kirkwood_cpuidle_probe(struct platform_device *pdev) { - struct resource *res; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ddr_operation_base = devm_ioremap_resource(&pdev->dev, res); + ddr_operation_base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ddr_operation_base)) return PTR_ERR(ddr_operation_base); From 239ed06d0eefc4ee351af69bd64742ada56c4bdb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 16 Dec 2019 12:07:01 +0100 Subject: [PATCH 06/27] ACPI: processor: Make ACPI_PROCESSOR_CSTATE depend on ACPI_PROCESSOR To avoid build errors when CONFIG_ACPI_PROCESSOR_CSTATE is set and CONFIG_ACPI_PROCESSOR is not (that may appear in randconfig builds), make the former depend on the latter. Acked-by: Len Brown Signed-off-by: Rafael J. Wysocki --- drivers/acpi/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 002838d23b86..cc57bab146b5 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -241,6 +241,7 @@ config ACPI_CPU_FREQ_PSS config ACPI_PROCESSOR_CSTATE def_bool y + depends on ACPI_PROCESSOR depends on IA64 || X86 config ACPI_PROCESSOR_IDLE From 77fb4e0a559a960eb36d0b2c50c781c5492577eb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 13 Dec 2019 09:55:42 +0100 Subject: [PATCH 07/27] ACPI: processor: Export acpi_processor_evaluate_cst() The intel_idle driver will be modified to use ACPI _CST subsequently and it will need to call acpi_processor_evaluate_cst(), so move that function to acpi_processor.c so that it is always present (which is required by intel_idle) and export it to modules to allow the ACPI processor driver (which is modular) to call it. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_processor.c | 157 ++++++++++++++++++++++++++++++++++ drivers/acpi/processor_idle.c | 142 ------------------------------ include/linux/acpi.h | 9 ++ 3 files changed, 166 insertions(+), 142 deletions(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 8a53f3c5b70e..5379bc3f275d 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -729,4 +729,161 @@ bool acpi_processor_claim_cst_control(void) return true; } EXPORT_SYMBOL_GPL(acpi_processor_claim_cst_control); + +/** + * acpi_processor_evaluate_cst - Evaluate the processor _CST control method. + * @handle: ACPI handle of the processor object containing the _CST. + * @cpu: The numeric ID of the target CPU. + * @info: Object write the C-states information into. + * + * Extract the C-state information for the given CPU from the output of the _CST + * control method under the corresponding ACPI processor object (or processor + * device object) and populate @info with it. + * + * If any ACPI_ADR_SPACE_FIXED_HARDWARE C-states are found, invoke + * acpi_processor_ffh_cstate_probe() to verify them and update the + * cpu_cstate_entry data for @cpu. + */ +int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu, + struct acpi_processor_power *info) +{ + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + union acpi_object *cst; + acpi_status status; + u64 count; + int last_index = 0; + int i, ret = 0; + + status = acpi_evaluate_object(handle, "_CST", NULL, &buffer); + if (ACPI_FAILURE(status)) { + acpi_handle_debug(handle, "No _CST\n"); + return -ENODEV; + } + + cst = buffer.pointer; + + /* There must be at least 2 elements. */ + if (!cst || cst->type != ACPI_TYPE_PACKAGE || cst->package.count < 2) { + acpi_handle_warn(handle, "Invalid _CST output\n"); + ret = -EFAULT; + goto end; + } + + count = cst->package.elements[0].integer.value; + + /* Validate the number of C-states. */ + if (count < 1 || count != cst->package.count - 1) { + acpi_handle_warn(handle, "Inconsistent _CST data\n"); + ret = -EFAULT; + goto end; + } + + for (i = 1; i <= count; i++) { + union acpi_object *element; + union acpi_object *obj; + struct acpi_power_register *reg; + struct acpi_processor_cx cx; + + /* + * If there is not enough space for all C-states, skip the + * excess ones and log a warning. + */ + if (last_index >= ACPI_PROCESSOR_MAX_POWER - 1) { + acpi_handle_warn(handle, + "No room for more idle states (limit: %d)\n", + ACPI_PROCESSOR_MAX_POWER - 1); + break; + } + + memset(&cx, 0, sizeof(cx)); + + element = &cst->package.elements[i]; + if (element->type != ACPI_TYPE_PACKAGE) + continue; + + if (element->package.count != 4) + continue; + + obj = &element->package.elements[0]; + + if (obj->type != ACPI_TYPE_BUFFER) + continue; + + reg = (struct acpi_power_register *)obj->buffer.pointer; + + obj = &element->package.elements[1]; + if (obj->type != ACPI_TYPE_INTEGER) + continue; + + cx.type = obj->integer.value; + /* + * There are known cases in which the _CST output does not + * contain C1, so if the type of the first state found is not + * C1, leave an empty slot for C1 to be filled in later. + */ + if (i == 1 && cx.type != ACPI_STATE_C1) + last_index = 1; + + cx.address = reg->address; + cx.index = last_index + 1; + + if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { + if (!acpi_processor_ffh_cstate_probe(cpu, &cx, reg)) { + /* + * In the majority of cases _CST describes C1 as + * a FIXED_HARDWARE C-state, but if the command + * line forbids using MWAIT, use CSTATE_HALT for + * C1 regardless. + */ + if (cx.type == ACPI_STATE_C1 && + boot_option_idle_override == IDLE_NOMWAIT) { + cx.entry_method = ACPI_CSTATE_HALT; + snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT"); + } else { + cx.entry_method = ACPI_CSTATE_FFH; + } + } else if (cx.type == ACPI_STATE_C1) { + /* + * In the special case of C1, FIXED_HARDWARE can + * be handled by executing the HLT instruction. + */ + cx.entry_method = ACPI_CSTATE_HALT; + snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT"); + } else { + continue; + } + } else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { + cx.entry_method = ACPI_CSTATE_SYSTEMIO; + snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI IOPORT 0x%x", + cx.address); + } else { + continue; + } + + if (cx.type == ACPI_STATE_C1) + cx.valid = 1; + + obj = &element->package.elements[2]; + if (obj->type != ACPI_TYPE_INTEGER) + continue; + + cx.latency = obj->integer.value; + + obj = &element->package.elements[3]; + if (obj->type != ACPI_TYPE_INTEGER) + continue; + + memcpy(&info->states[++last_index], &cx, sizeof(cx)); + } + + acpi_handle_info(handle, "Found %d idle states\n", last_index); + + info->count = last_index; + + end: + kfree(buffer.pointer); + + return ret; +} +EXPORT_SYMBOL_GPL(acpi_processor_evaluate_cst); #endif /* CONFIG_ACPI_PROCESSOR_CSTATE */ diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 7c2fe3b2ec31..dcc289e30166 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -297,148 +297,6 @@ static int acpi_processor_get_power_info_default(struct acpi_processor *pr) return 0; } -static int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu, - struct acpi_processor_power *info) -{ - struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; - union acpi_object *cst; - acpi_status status; - u64 count; - int last_index = 0; - int i, ret = 0; - - status = acpi_evaluate_object(handle, "_CST", NULL, &buffer); - if (ACPI_FAILURE(status)) { - acpi_handle_debug(handle, "No _CST\n"); - return -ENODEV; - } - - cst = buffer.pointer; - - /* There must be at least 2 elements. */ - if (!cst || cst->type != ACPI_TYPE_PACKAGE || cst->package.count < 2) { - acpi_handle_warn(handle, "Invalid _CST output\n"); - ret = -EFAULT; - goto end; - } - - count = cst->package.elements[0].integer.value; - - /* Validate the number of C-states. */ - if (count < 1 || count != cst->package.count - 1) { - acpi_handle_warn(handle, "Inconsistent _CST data\n"); - ret = -EFAULT; - goto end; - } - - for (i = 1; i <= count; i++) { - union acpi_object *element; - union acpi_object *obj; - struct acpi_power_register *reg; - struct acpi_processor_cx cx; - - /* - * If there is not enough space for all C-states, skip the - * excess ones and log a warning. - */ - if (last_index >= ACPI_PROCESSOR_MAX_POWER - 1) { - acpi_handle_warn(handle, - "No room for more idle states (limit: %d)\n", - ACPI_PROCESSOR_MAX_POWER - 1); - break; - } - - memset(&cx, 0, sizeof(cx)); - - element = &cst->package.elements[i]; - if (element->type != ACPI_TYPE_PACKAGE) - continue; - - if (element->package.count != 4) - continue; - - obj = &element->package.elements[0]; - - if (obj->type != ACPI_TYPE_BUFFER) - continue; - - reg = (struct acpi_power_register *)obj->buffer.pointer; - - obj = &element->package.elements[1]; - if (obj->type != ACPI_TYPE_INTEGER) - continue; - - cx.type = obj->integer.value; - /* - * There are known cases in which the _CST output does not - * contain C1, so if the type of the first state found is not - * C1, leave an empty slot for C1 to be filled in later. - */ - if (i == 1 && cx.type != ACPI_STATE_C1) - last_index = 1; - - cx.address = reg->address; - cx.index = last_index + 1; - - if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { - if (!acpi_processor_ffh_cstate_probe(cpu, &cx, reg)) { - /* - * In the majority of cases _CST describes C1 as - * a FIXED_HARDWARE C-state, but if the command - * line forbids using MWAIT, use CSTATE_HALT for - * C1 regardless. - */ - if (cx.type == ACPI_STATE_C1 && - boot_option_idle_override == IDLE_NOMWAIT) { - cx.entry_method = ACPI_CSTATE_HALT; - snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT"); - } else { - cx.entry_method = ACPI_CSTATE_FFH; - } - } else if (cx.type == ACPI_STATE_C1) { - /* - * In the special case of C1, FIXED_HARDWARE can - * be handled by executing the HLT instruction. - */ - cx.entry_method = ACPI_CSTATE_HALT; - snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT"); - } else { - continue; - } - } else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { - cx.entry_method = ACPI_CSTATE_SYSTEMIO; - snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI IOPORT 0x%x", - cx.address); - } else { - continue; - } - - if (cx.type == ACPI_STATE_C1) - cx.valid = 1; - - obj = &element->package.elements[2]; - if (obj->type != ACPI_TYPE_INTEGER) - continue; - - cx.latency = obj->integer.value; - - obj = &element->package.elements[3]; - if (obj->type != ACPI_TYPE_INTEGER) - continue; - - memcpy(&info->states[++last_index], &cx, sizeof(cx)); - } - - acpi_handle_info(handle, "Found %d idle states\n", last_index); - - info->count = last_index; - - end: - kfree(buffer.pointer); - - return ret; -} - static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) { int ret; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ee39b05e7f76..0f24d701fbdc 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -280,10 +280,19 @@ static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id) /* Validate the processor object's proc_id */ bool acpi_duplicate_processor_id(int proc_id); /* Processor _CTS control */ +struct acpi_processor_power; + #ifdef CONFIG_ACPI_PROCESSOR_CSTATE bool acpi_processor_claim_cst_control(void); +int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu, + struct acpi_processor_power *info); #else static inline bool acpi_processor_claim_cst_control(void) { return false; } +static inline int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu, + struct acpi_processor_power *info) +{ + return -ENODEV; +} #endif #ifdef CONFIG_ACPI_HOTPLUG_CPU From 9f3d6daf61e5156139cd05643f7f1c2a9b7b49b0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 13 Dec 2019 09:55:52 +0100 Subject: [PATCH 08/27] intel_idle: Refactor intel_idle_cpuidle_driver_init() Move the C-state verification and checks from intel_idle_cpuidle_driver_init() to a separate function, intel_idle_verify_cstate(), and make the former call it after checking the CPUIDLE_FLAG_UNUSABLE state flag. Also combine the drv->states[] updates with the incrementation of drv->state_count. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 49 ++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 75fd2a7b0842..47255d3cf51f 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -944,6 +944,22 @@ static void intel_idle_s2idle(struct cpuidle_device *dev, mwait_idle_with_hints(eax, ecx); } +static bool intel_idle_verify_cstate(unsigned int mwait_hint) +{ + unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; + unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & + MWAIT_SUBSTATE_MASK; + + /* Ignore the C-state if there are NO sub-states in CPUID for it. */ + if (num_substates == 0) + return false; + + if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + mark_tsc_unstable("TSC halts in idle states deeper than C2"); + + return true; +} + static void __setup_broadcast_timer(bool on) { if (on) @@ -1332,10 +1348,10 @@ static void __init intel_idle_cpuidle_driver_init(void) drv->state_count = 1; for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { - int num_substates, mwait_hint, mwait_cstate; + unsigned int mwait_hint; - if ((cpuidle_state_table[cstate].enter == NULL) && - (cpuidle_state_table[cstate].enter_s2idle == NULL)) + if (!cpuidle_state_table[cstate].enter && + !cpuidle_state_table[cstate].enter_s2idle) break; if (cstate + 1 > max_cstate) { @@ -1343,34 +1359,19 @@ static void __init intel_idle_cpuidle_driver_init(void) break; } - mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); - mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint); - - /* number of sub-states for this state in CPUID.MWAIT */ - num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4)) - & MWAIT_SUBSTATE_MASK; - - /* if NO sub-states for this state in CPUID, skip it */ - if (num_substates == 0) - continue; - - /* if state marked as disabled, skip it */ + /* If marked as unusable, skip this state. */ if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { pr_debug("state %s is disabled\n", cpuidle_state_table[cstate].name); continue; } + mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); + if (!intel_idle_verify_cstate(mwait_hint)) + continue; - if (((mwait_cstate + 1) > 2) && - !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) - mark_tsc_unstable("TSC halts in idle" - " states deeper than C2"); - - drv->states[drv->state_count] = /* structure copy */ - cpuidle_state_table[cstate]; - - drv->state_count += 1; + /* Structure copy. */ + drv->states[drv->state_count++] = cpuidle_state_table[cstate]; } if (icpu->byt_auto_demotion_disable_flag) { From 18734958e9bfbc055805d110a38dc76307eba742 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 13 Dec 2019 09:56:01 +0100 Subject: [PATCH 09/27] intel_idle: Use ACPI _CST for processor models without C-state tables Modify the intel_idle driver to get the C-states information from ACPI _CST if the processor model is not recognized by it. The processor is still required to support MWAIT and the information from ACPI _CST will only be used if all of the C-states listed by _CST are of the ACPI_CSTATE_FFH type (which means that they are expected to be entered via MWAIT). Moreover, the driver assumes that the _CST information is the same for all CPUs in the system, so it is sufficient to evaluate _CST for one of them and extract the common list of C-states from there. Also _CST is evaluated once at the system initialization time and the driver does not respond to _CST change notifications (that can be changed in the future). The main functional difference between intel_idle with this change and the ACPI processor driver is that the former sets the target residency to be equal to the exit latency (provided by _CST) for C1-type C-states and to 3 times the exit latency value for the other C-state types, whereas the latter obtains the target residency by multiplying the exit latency by the same number (2 by default) for all C-state types. Therefore it is expected that in general using the former instead of the latter on the same system will lead to improved energy-efficiency. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 190 ++++++++++++++++++++++++++++++++------ 1 file changed, 162 insertions(+), 28 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 47255d3cf51f..28812d93d59a 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -41,6 +41,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -1111,6 +1112,129 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { {} }; +#define INTEL_CPU_FAM6_MWAIT \ + { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_MWAIT, 0 } + +static const struct x86_cpu_id intel_mwait_ids[] __initconst = { + INTEL_CPU_FAM6_MWAIT, + {} +}; + +static bool intel_idle_max_cstate_reached(int cstate) +{ + if (cstate + 1 > max_cstate) { + pr_info("max_cstate %d reached\n", max_cstate); + return true; + } + return false; +} + +#ifdef CONFIG_ACPI_PROCESSOR_CSTATE +#include + +static struct acpi_processor_power acpi_state_table; + +/** + * intel_idle_cst_usable - Check if the _CST information can be used. + * + * Check if all of the C-states listed by _CST in the max_cstate range are + * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. + */ +static bool intel_idle_cst_usable(void) +{ + int cstate, limit; + + limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1), + acpi_state_table.count); + + for (cstate = 1; cstate < limit; cstate++) { + struct acpi_processor_cx *cx = &acpi_state_table.states[cstate]; + + if (cx->entry_method != ACPI_CSTATE_FFH) + return false; + } + + return true; +} + +static bool intel_idle_acpi_cst_extract(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) { + struct acpi_processor *pr = per_cpu(processors, cpu); + + if (!pr) + continue; + + if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table)) + continue; + + acpi_state_table.count++; + + if (!intel_idle_cst_usable()) + continue; + + if (!acpi_processor_claim_cst_control()) { + acpi_state_table.count = 0; + return false; + } + + return true; + } + + pr_debug("ACPI _CST not found or not usable\n"); + return false; +} + +static void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) +{ + int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); + + /* + * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of + * the interesting states are ACPI_CSTATE_FFH. + */ + for (cstate = 1; cstate < limit; cstate++) { + struct acpi_processor_cx *cx; + struct cpuidle_state *state; + + if (intel_idle_max_cstate_reached(cstate)) + break; + + cx = &acpi_state_table.states[cstate]; + + state = &drv->states[drv->state_count++]; + + snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); + strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); + state->exit_latency = cx->latency; + /* + * For C1-type C-states use the same number for both the exit + * latency and target residency, because that is the case for + * C1 in the majority of the static C-states tables above. + * For the other types of C-states, however, set the target + * residency to 3 times the exit latency which should lead to + * a reasonable balance between energy-efficiency and + * performance in the majority of interesting cases. + */ + state->target_residency = cx->latency; + if (cx->type > ACPI_STATE_C1) + state->target_residency *= 3; + + state->flags = MWAIT2flg(cx->address); + if (cx->type > ACPI_STATE_C2) + state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; + + state->enter = intel_idle; + state->enter_s2idle = intel_idle_s2idle; + } +} +#else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ +static inline bool intel_idle_acpi_cst_extract(void) { return false; } +static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } +#endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ + /* * intel_idle_probe() */ @@ -1125,17 +1249,15 @@ static int __init intel_idle_probe(void) } id = x86_match_cpu(intel_idle_ids); - if (!id) { - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 6) - pr_debug("does not run on family %d model %d\n", - boot_cpu_data.x86, boot_cpu_data.x86_model); - return -ENODEV; - } - - if (!boot_cpu_has(X86_FEATURE_MWAIT)) { - pr_debug("Please enable MWAIT in BIOS SETUP\n"); - return -ENODEV; + if (id) { + if (!boot_cpu_has(X86_FEATURE_MWAIT)) { + pr_debug("Please enable MWAIT in BIOS SETUP\n"); + return -ENODEV; + } + } else { + id = x86_match_cpu(intel_mwait_ids); + if (!id) + return -ENODEV; } if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) @@ -1151,7 +1273,10 @@ static int __init intel_idle_probe(void) pr_debug("MWAIT substates: 0x%x\n", mwait_substates); icpu = (const struct idle_cpu *)id->driver_data; - cpuidle_state_table = icpu->state_table; + if (icpu) + cpuidle_state_table = icpu->state_table; + else if (!intel_idle_acpi_cst_extract()) + return -ENODEV; pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", boot_cpu_data.x86_model); @@ -1333,32 +1458,20 @@ static void intel_idle_state_table_update(void) } } -/* - * intel_idle_cpuidle_driver_init() - * allocate, initialize cpuidle_states - */ -static void __init intel_idle_cpuidle_driver_init(void) +static void intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) { int cstate; - struct cpuidle_driver *drv = &intel_idle_driver; - - intel_idle_state_table_update(); - - cpuidle_poll_state_init(drv); - drv->state_count = 1; for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { unsigned int mwait_hint; + if (intel_idle_max_cstate_reached(cstate)) + break; + if (!cpuidle_state_table[cstate].enter && !cpuidle_state_table[cstate].enter_s2idle) break; - if (cstate + 1 > max_cstate) { - pr_info("max_cstate %d reached\n", max_cstate); - break; - } - /* If marked as unusable, skip this state. */ if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { pr_debug("state %s is disabled\n", @@ -1380,6 +1493,24 @@ static void __init intel_idle_cpuidle_driver_init(void) } } +/* + * intel_idle_cpuidle_driver_init() + * allocate, initialize cpuidle_states + */ +static void __init intel_idle_cpuidle_driver_init(void) +{ + struct cpuidle_driver *drv = &intel_idle_driver; + + intel_idle_state_table_update(); + + cpuidle_poll_state_init(drv); + drv->state_count = 1; + + if (icpu) + intel_idle_init_cstates_icpu(drv); + else + intel_idle_init_cstates_acpi(drv); +} /* * intel_idle_cpu_init() @@ -1398,6 +1529,9 @@ static int intel_idle_cpu_init(unsigned int cpu) return -EIO; } + if (!icpu) + return 0; + if (icpu->auto_demotion_disable_flags) auto_demotion_disable(); From 75a80267410e38ab76c4ceb39753f96d72113781 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 13 Dec 2019 09:56:13 +0100 Subject: [PATCH 10/27] cpuidle: Allow idle states to be disabled by default In certain situations it may be useful to prevent some idle states from being used by default while allowing user space to enable them later on. For this purpose, introduce a new state flag, CPUIDLE_FLAG_OFF, to mark idle states that should be disabled by default, make the core set CPUIDLE_STATE_DISABLED_BY_USER for those states at the initialization time and add a new state attribute in sysfs, "default_status", to inform user space of the initial status of the given idle state ("disabled" if CPUIDLE_FLAG_OFF is set for it, "enabled" otherwise). Signed-off-by: Rafael J. Wysocki --- Documentation/ABI/testing/sysfs-devices-system-cpu | 6 ++++++ Documentation/admin-guide/pm/cpuidle.rst | 3 +++ drivers/cpuidle/cpuidle.c | 6 +++++- drivers/cpuidle/sysfs.c | 10 ++++++++++ include/linux/cpuidle.h | 1 + 5 files changed, 25 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index fc20cde63d1e..2e0e3b45d02a 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -196,6 +196,12 @@ Description: does not reflect it. Likewise, if one enables a deep state but a lighter state still is disabled, then this has no effect. +What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/default_status +Date: December 2019 +KernelVersion: v5.6 +Contact: Linux power management list +Description: + (RO) The default status of this state, "enabled" or "disabled". What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/residency Date: March 2014 diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst index e70b365dbc60..311cd7cc2b75 100644 --- a/Documentation/admin-guide/pm/cpuidle.rst +++ b/Documentation/admin-guide/pm/cpuidle.rst @@ -506,6 +506,9 @@ object corresponding to it, as follows: ``disable`` Whether or not this idle state is disabled. +``default_status`` + The default status of this state, "enabled" or "disabled". + ``latency`` Exit latency of the idle state in microseconds. diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 33d19c8eb027..a2af7bb8f0a5 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -572,10 +572,14 @@ static int __cpuidle_register_device(struct cpuidle_device *dev) if (!try_module_get(drv->owner)) return -EINVAL; - for (i = 0; i < drv->state_count; i++) + for (i = 0; i < drv->state_count; i++) { if (drv->states[i].flags & CPUIDLE_FLAG_UNUSABLE) dev->states_usage[i].disable |= CPUIDLE_STATE_DISABLED_BY_DRIVER; + if (drv->states[i].flags & CPUIDLE_FLAG_OFF) + dev->states_usage[i].disable |= CPUIDLE_STATE_DISABLED_BY_USER; + } + per_cpu(cpuidle_devices, dev->cpu) = dev; list_add(&dev->device_list, &cpuidle_detected_devices); diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 38ef770be90d..254d1560dc19 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -327,6 +327,14 @@ static ssize_t store_state_disable(struct cpuidle_state *state, return size; } +static ssize_t show_state_default_status(struct cpuidle_state *state, + struct cpuidle_state_usage *state_usage, + char *buf) +{ + return sprintf(buf, "%s\n", + state->flags & CPUIDLE_FLAG_OFF ? "disabled" : "enabled"); +} + define_one_state_ro(name, show_state_name); define_one_state_ro(desc, show_state_desc); define_one_state_ro(latency, show_state_exit_latency); @@ -337,6 +345,7 @@ define_one_state_ro(time, show_state_time); define_one_state_rw(disable, show_state_disable, store_state_disable); define_one_state_ro(above, show_state_above); define_one_state_ro(below, show_state_below); +define_one_state_ro(default_status, show_state_default_status); static struct attribute *cpuidle_state_default_attrs[] = { &attr_name.attr, @@ -349,6 +358,7 @@ static struct attribute *cpuidle_state_default_attrs[] = { &attr_disable.attr, &attr_above.attr, &attr_below.attr, + &attr_default_status.attr, NULL }; diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 1dabe36bd011..ebfb52b3ffbf 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -77,6 +77,7 @@ struct cpuidle_state { #define CPUIDLE_FLAG_COUPLED BIT(1) /* state applies to multiple cpus */ #define CPUIDLE_FLAG_TIMER_STOP BIT(2) /* timer is stopped on this state */ #define CPUIDLE_FLAG_UNUSABLE BIT(3) /* avoid using this state */ +#define CPUIDLE_FLAG_OFF BIT(4) /* disable this state by default */ struct cpuidle_device_kobj; struct cpuidle_state_kobj; From bff8e60a86f4960133f90ad9add9adeb082b8154 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 13 Dec 2019 09:56:21 +0100 Subject: [PATCH 11/27] intel_idle: Allow ACPI _CST to be used for selected known processors Update the intel_idle driver to get the C-states information from ACPI _CST in some cases in which the processor is known to the driver, as long as that information is available and the new use_acpi flag is set in the profile of the processor in question. In the cases when there is a specific table of C-states for the given processor in the driver, that table is used as the primary source of information on the available C-states, but if ACPI _CST is present, the C-states that are not listed by it will not be enabled by default (they still can be enabled later by user space via sysfs, though). The new CPUIDLE_FLAG_ALWAYS_ENABLE flag can be used for marking C-states that should be enabled by default even if they are not listed by ACPI _CST. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 45 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 28812d93d59a..a072b84d9595 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -80,6 +80,7 @@ struct idle_cpu { unsigned long auto_demotion_disable_flags; bool byt_auto_demotion_disable_flag; bool disable_promotion_to_c1e; + bool use_acpi; }; static const struct idle_cpu *icpu; @@ -90,6 +91,11 @@ static void intel_idle_s2idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index); static struct cpuidle_state *cpuidle_state_table; +/* + * Enable this state by default even if the ACPI _CST does not list it. + */ +#define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) + /* * Set this flag for states where the HW flushes the TLB for us * and so we don't need cross-calls to keep it consistent. @@ -1230,9 +1236,33 @@ static void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) state->enter_s2idle = intel_idle_s2idle; } } + +static bool intel_idle_off_by_default(u32 mwait_hint) +{ + int cstate, limit; + + /* + * If there are no _CST C-states, do not disable any C-states by + * default. + */ + if (!acpi_state_table.count) + return false; + + limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); + /* + * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of + * the interesting states are ACPI_CSTATE_FFH. + */ + for (cstate = 1; cstate < limit; cstate++) { + if (acpi_state_table.states[cstate].address == mwait_hint) + return false; + } + return true; +} #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ static inline bool intel_idle_acpi_cst_extract(void) { return false; } static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } +static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ /* @@ -1273,10 +1303,13 @@ static int __init intel_idle_probe(void) pr_debug("MWAIT substates: 0x%x\n", mwait_substates); icpu = (const struct idle_cpu *)id->driver_data; - if (icpu) + if (icpu) { cpuidle_state_table = icpu->state_table; - else if (!intel_idle_acpi_cst_extract()) + if (icpu->use_acpi) + intel_idle_acpi_cst_extract(); + } else if (!intel_idle_acpi_cst_extract()) { return -ENODEV; + } pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", boot_cpu_data.x86_model); @@ -1484,7 +1517,13 @@ static void intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) continue; /* Structure copy. */ - drv->states[drv->state_count++] = cpuidle_state_table[cstate]; + drv->states[drv->state_count] = cpuidle_state_table[cstate]; + + if (icpu->use_acpi && intel_idle_off_by_default(mwait_hint) && + !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)) + drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; + + drv->state_count++; } if (icpu->byt_auto_demotion_disable_flag) { From 4ec32d9e8e5b6d6eb491eeee3938665d8a2388fa Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 13 Dec 2019 09:56:29 +0100 Subject: [PATCH 12/27] intel_idle: Add module parameter to prevent ACPI _CST from being used Add a new module parameter called "no_acpi" to the intel_idle driver to allow the driver to be prevented from using ACPI _CST via kernel command line. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index a072b84d9595..26fe383bb921 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1138,6 +1138,10 @@ static bool intel_idle_max_cstate_reached(int cstate) #ifdef CONFIG_ACPI_PROCESSOR_CSTATE #include +static bool no_acpi __read_mostly; +module_param(no_acpi, bool, 0444); +MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); + static struct acpi_processor_power acpi_state_table; /** @@ -1167,6 +1171,11 @@ static bool intel_idle_acpi_cst_extract(void) { unsigned int cpu; + if (no_acpi) { + pr_debug("Not allowed to use ACPI _CST\n"); + return false; + } + for_each_possible_cpu(cpu) { struct acpi_processor *pr = per_cpu(processors, cpu); From e6d4f08a677654385869ba0c39d7c9ceec47e5c5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 13 Dec 2019 09:56:38 +0100 Subject: [PATCH 13/27] intel_idle: Use ACPI _CST on server systems In many cases, especially on server systems, it is desirable to avoid enabling C-states that have been disabled in the platform firmware (BIOS) setup, except for C1E. As a rule, the C-states disabled this way are not listed by ACPI _CST, so if that is used by intel_idle along with the specific table of C-states that it has for the given processor, the C-states disabled through the platform firmware will not be enabled by default by intel_idle. Accordingly, set the use_acpi flag (introduced previously) in all server processor profiles defined in intel_idle (so as to make it use ACPI _CST to decide which C-states to enable by default) and set the CPUIDLE_FLAG_ALWAYS_ENABLE flag (also introduced previously) for C1E in all C-states tables in intel_idle that contain C1 too (so that C1E is enabled regardless of whether or not it is listed by ACPI _CST). Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 70 ++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 20 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 26fe383bb921..1467490adfc3 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -131,7 +131,7 @@ static struct cpuidle_state nehalem_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, .enter = &intel_idle, @@ -168,7 +168,7 @@ static struct cpuidle_state snb_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, .enter = &intel_idle, @@ -303,7 +303,7 @@ static struct cpuidle_state ivb_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, .enter = &intel_idle, @@ -348,7 +348,7 @@ static struct cpuidle_state ivt_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 80, .enter = &intel_idle, @@ -385,7 +385,7 @@ static struct cpuidle_state ivt_cstates_4s[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 250, .enter = &intel_idle, @@ -422,7 +422,7 @@ static struct cpuidle_state ivt_cstates_8s[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 500, .enter = &intel_idle, @@ -459,7 +459,7 @@ static struct cpuidle_state hsw_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, .enter = &intel_idle, @@ -527,7 +527,7 @@ static struct cpuidle_state bdw_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, .enter = &intel_idle, @@ -596,7 +596,7 @@ static struct cpuidle_state skl_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, .enter = &intel_idle, @@ -665,7 +665,7 @@ static struct cpuidle_state skx_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, .enter = &intel_idle, @@ -815,7 +815,7 @@ static struct cpuidle_state bxt_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, .enter = &intel_idle, @@ -876,7 +876,7 @@ static struct cpuidle_state dnv_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, .enter = &intel_idle, @@ -998,6 +998,13 @@ static const struct idle_cpu idle_cpu_nehalem = { .disable_promotion_to_c1e = true, }; +static const struct idle_cpu idle_cpu_nhx = { + .state_table = nehalem_cstates, + .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct idle_cpu idle_cpu_atom = { .state_table = atom_cstates, }; @@ -1016,6 +1023,12 @@ static const struct idle_cpu idle_cpu_snb = { .disable_promotion_to_c1e = true, }; +static const struct idle_cpu idle_cpu_snx = { + .state_table = snb_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct idle_cpu idle_cpu_byt = { .state_table = byt_cstates, .disable_promotion_to_c1e = true, @@ -1036,6 +1049,7 @@ static const struct idle_cpu idle_cpu_ivb = { static const struct idle_cpu idle_cpu_ivt = { .state_table = ivt_cstates, .disable_promotion_to_c1e = true, + .use_acpi = true, }; static const struct idle_cpu idle_cpu_hsw = { @@ -1043,11 +1057,23 @@ static const struct idle_cpu idle_cpu_hsw = { .disable_promotion_to_c1e = true, }; +static const struct idle_cpu idle_cpu_hsx = { + .state_table = hsw_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct idle_cpu idle_cpu_bdw = { .state_table = bdw_cstates, .disable_promotion_to_c1e = true, }; +static const struct idle_cpu idle_cpu_bdx = { + .state_table = bdw_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct idle_cpu idle_cpu_skl = { .state_table = skl_cstates, .disable_promotion_to_c1e = true, @@ -1056,15 +1082,18 @@ static const struct idle_cpu idle_cpu_skl = { static const struct idle_cpu idle_cpu_skx = { .state_table = skx_cstates, .disable_promotion_to_c1e = true, + .use_acpi = true, }; static const struct idle_cpu idle_cpu_avn = { .state_table = avn_cstates, .disable_promotion_to_c1e = true, + .use_acpi = true, }; static const struct idle_cpu idle_cpu_knl = { .state_table = knl_cstates, + .use_acpi = true, }; static const struct idle_cpu idle_cpu_bxt = { @@ -1075,20 +1104,21 @@ static const struct idle_cpu idle_cpu_bxt = { static const struct idle_cpu idle_cpu_dnv = { .state_table = dnv_cstates, .disable_promotion_to_c1e = true, + .use_acpi = true, }; static const struct x86_cpu_id intel_idle_ids[] __initconst = { - INTEL_CPU_FAM6(NEHALEM_EP, idle_cpu_nehalem), + INTEL_CPU_FAM6(NEHALEM_EP, idle_cpu_nhx), INTEL_CPU_FAM6(NEHALEM, idle_cpu_nehalem), INTEL_CPU_FAM6(NEHALEM_G, idle_cpu_nehalem), INTEL_CPU_FAM6(WESTMERE, idle_cpu_nehalem), - INTEL_CPU_FAM6(WESTMERE_EP, idle_cpu_nehalem), - INTEL_CPU_FAM6(NEHALEM_EX, idle_cpu_nehalem), + INTEL_CPU_FAM6(WESTMERE_EP, idle_cpu_nhx), + INTEL_CPU_FAM6(NEHALEM_EX, idle_cpu_nhx), INTEL_CPU_FAM6(ATOM_BONNELL, idle_cpu_atom), INTEL_CPU_FAM6(ATOM_BONNELL_MID, idle_cpu_lincroft), - INTEL_CPU_FAM6(WESTMERE_EX, idle_cpu_nehalem), + INTEL_CPU_FAM6(WESTMERE_EX, idle_cpu_nhx), INTEL_CPU_FAM6(SANDYBRIDGE, idle_cpu_snb), - INTEL_CPU_FAM6(SANDYBRIDGE_X, idle_cpu_snb), + INTEL_CPU_FAM6(SANDYBRIDGE_X, idle_cpu_snx), INTEL_CPU_FAM6(ATOM_SALTWELL, idle_cpu_atom), INTEL_CPU_FAM6(ATOM_SILVERMONT, idle_cpu_byt), INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, idle_cpu_tangier), @@ -1096,14 +1126,14 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { INTEL_CPU_FAM6(IVYBRIDGE, idle_cpu_ivb), INTEL_CPU_FAM6(IVYBRIDGE_X, idle_cpu_ivt), INTEL_CPU_FAM6(HASWELL, idle_cpu_hsw), - INTEL_CPU_FAM6(HASWELL_X, idle_cpu_hsw), + INTEL_CPU_FAM6(HASWELL_X, idle_cpu_hsx), INTEL_CPU_FAM6(HASWELL_L, idle_cpu_hsw), INTEL_CPU_FAM6(HASWELL_G, idle_cpu_hsw), INTEL_CPU_FAM6(ATOM_SILVERMONT_D, idle_cpu_avn), INTEL_CPU_FAM6(BROADWELL, idle_cpu_bdw), INTEL_CPU_FAM6(BROADWELL_G, idle_cpu_bdw), - INTEL_CPU_FAM6(BROADWELL_X, idle_cpu_bdw), - INTEL_CPU_FAM6(BROADWELL_D, idle_cpu_bdw), + INTEL_CPU_FAM6(BROADWELL_X, idle_cpu_bdx), + INTEL_CPU_FAM6(BROADWELL_D, idle_cpu_bdx), INTEL_CPU_FAM6(SKYLAKE_L, idle_cpu_skl), INTEL_CPU_FAM6(SKYLAKE, idle_cpu_skl), INTEL_CPU_FAM6(KABYLAKE_L, idle_cpu_skl), From 577a2f41f4c7aced4fed41b20ee77cedd8c197cf Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 2 Jan 2020 21:42:16 +0100 Subject: [PATCH 14/27] cpuidle: Drop unused cpuidle_driver_ref/unref() functions The cpuidle_driver_ref() and cpuidle_driver_unref() functions are not used and the refcnt field in struct cpuidle_driver operated by them is not updated anywhere else (so it is permanently equal to 0), so drop both of them along with refcnt. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano --- drivers/cpuidle/driver.c | 46 ---------------------------------------- include/linux/cpuidle.h | 5 ----- 2 files changed, 51 deletions(-) diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index ce6a5f80fb83..4070e573bf43 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -155,8 +155,6 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv) { int i; - drv->refcnt = 0; - /* * Use all possible CPUs as the default, because if the kernel boots * with some CPUs offline and then we online one of them, the CPU @@ -240,9 +238,6 @@ static int __cpuidle_register_driver(struct cpuidle_driver *drv) */ static void __cpuidle_unregister_driver(struct cpuidle_driver *drv) { - if (WARN_ON(drv->refcnt > 0)) - return; - if (drv->bctimer) { drv->bctimer = 0; on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer, @@ -349,47 +344,6 @@ struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev) } EXPORT_SYMBOL_GPL(cpuidle_get_cpu_driver); -/** - * cpuidle_driver_ref - get a reference to the driver. - * - * Increment the reference counter of the cpuidle driver associated with - * the current CPU. - * - * Returns a pointer to the driver, or NULL if the current CPU has no driver. - */ -struct cpuidle_driver *cpuidle_driver_ref(void) -{ - struct cpuidle_driver *drv; - - spin_lock(&cpuidle_driver_lock); - - drv = cpuidle_get_driver(); - if (drv) - drv->refcnt++; - - spin_unlock(&cpuidle_driver_lock); - return drv; -} - -/** - * cpuidle_driver_unref - puts down the refcount for the driver - * - * Decrement the reference counter of the cpuidle driver associated with - * the current CPU. - */ -void cpuidle_driver_unref(void) -{ - struct cpuidle_driver *drv; - - spin_lock(&cpuidle_driver_lock); - - drv = cpuidle_get_driver(); - if (drv && !WARN_ON(drv->refcnt <= 0)) - drv->refcnt--; - - spin_unlock(&cpuidle_driver_lock); -} - /** * cpuidle_driver_state_disabled - Disable or enable an idle state * @drv: cpuidle driver owning the state diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 1dabe36bd011..23744d49bc22 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -115,7 +115,6 @@ DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev); struct cpuidle_driver { const char *name; struct module *owner; - int refcnt; /* used by the cpuidle framework to setup the broadcast timer */ unsigned int bctimer:1; @@ -147,8 +146,6 @@ extern u64 cpuidle_poll_time(struct cpuidle_driver *drv, extern int cpuidle_register_driver(struct cpuidle_driver *drv); extern struct cpuidle_driver *cpuidle_get_driver(void); -extern struct cpuidle_driver *cpuidle_driver_ref(void); -extern void cpuidle_driver_unref(void); extern void cpuidle_driver_state_disabled(struct cpuidle_driver *drv, int idx, bool disable); extern void cpuidle_unregister_driver(struct cpuidle_driver *drv); @@ -186,8 +183,6 @@ static inline u64 cpuidle_poll_time(struct cpuidle_driver *drv, static inline int cpuidle_register_driver(struct cpuidle_driver *drv) {return -ENODEV; } static inline struct cpuidle_driver *cpuidle_get_driver(void) {return NULL; } -static inline struct cpuidle_driver *cpuidle_driver_ref(void) {return NULL; } -static inline void cpuidle_driver_unref(void) {} static inline void cpuidle_driver_state_disabled(struct cpuidle_driver *drv, int idx, bool disable) { } static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { } From 53eb82b0977da8fbba472bd2275fa2ad27f50621 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 29 Dec 2019 19:09:12 +0100 Subject: [PATCH 15/27] cpuidle: arm: Enable compile testing for some of drivers Some of cpuidle drivers for ARMv7 can be compile tested on this architecture because they do not depend on mach-specific bits. Enable compile testing for big.LITTLE, Kirkwood, Zynq, AT91, Exynos and mvebu cpuidle drivers. Signed-off-by: Krzysztof Kozlowski Acked-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/Kconfig.arm | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm index a224d33dda7f..62272ecfa771 100644 --- a/drivers/cpuidle/Kconfig.arm +++ b/drivers/cpuidle/Kconfig.arm @@ -25,7 +25,7 @@ config ARM_PSCI_CPUIDLE config ARM_BIG_LITTLE_CPUIDLE bool "Support for ARM big.LITTLE processors" - depends on ARCH_VEXPRESS_TC2_PM || ARCH_EXYNOS + depends on ARCH_VEXPRESS_TC2_PM || ARCH_EXYNOS || COMPILE_TEST depends on MCPM && !ARM64 select ARM_CPU_SUSPEND select CPU_IDLE_MULTIPLE_DRIVERS @@ -51,13 +51,13 @@ config ARM_HIGHBANK_CPUIDLE config ARM_KIRKWOOD_CPUIDLE bool "CPU Idle Driver for Marvell Kirkwood SoCs" - depends on MACH_KIRKWOOD && !ARM64 + depends on (MACH_KIRKWOOD || COMPILE_TEST) && !ARM64 help This adds the CPU Idle driver for Marvell Kirkwood SoCs. config ARM_ZYNQ_CPUIDLE bool "CPU Idle Driver for Xilinx Zynq processors" - depends on ARCH_ZYNQ && !ARM64 + depends on (ARCH_ZYNQ || COMPILE_TEST) && !ARM64 help Select this to enable cpuidle on Xilinx Zynq processors. @@ -70,19 +70,19 @@ config ARM_U8500_CPUIDLE config ARM_AT91_CPUIDLE bool "Cpu Idle Driver for the AT91 processors" default y - depends on ARCH_AT91 && !ARM64 + depends on (ARCH_AT91 || COMPILE_TEST) && !ARM64 help Select this to enable cpuidle for AT91 processors. config ARM_EXYNOS_CPUIDLE bool "Cpu Idle Driver for the Exynos processors" - depends on ARCH_EXYNOS && !ARM64 + depends on (ARCH_EXYNOS || COMPILE_TEST) && !ARM64 select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP help Select this to enable cpuidle for Exynos processors. config ARM_MVEBU_V7_CPUIDLE bool "CPU Idle Driver for mvebu v7 family processors" - depends on ARCH_MVEBU && !ARM64 + depends on (ARCH_MVEBU || COMPILE_TEST) && !ARM64 help Select this to enable cpuidle on Armada 370, 38x and XP processors. From a3299182216397a0b943d2549d1997f4eba2bdd2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 15 Jan 2020 10:54:58 +0100 Subject: [PATCH 16/27] Documentation: admin-guide: PM: Add intel_idle document Add an admin-guide document for the intel_idle driver to describe how it works: how it enumerates idle states, what happens during the initialization of it, how it can be controlled via the kernel command line and so on. Signed-off-by: Rafael J. Wysocki Reviewed-by: Randy Dunlap --- Documentation/admin-guide/pm/intel_idle.rst | 246 ++++++++++++++++++ .../admin-guide/pm/working-state.rst | 1 + 2 files changed, 247 insertions(+) create mode 100644 Documentation/admin-guide/pm/intel_idle.rst diff --git a/Documentation/admin-guide/pm/intel_idle.rst b/Documentation/admin-guide/pm/intel_idle.rst new file mode 100644 index 000000000000..afbf778035f8 --- /dev/null +++ b/Documentation/admin-guide/pm/intel_idle.rst @@ -0,0 +1,246 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: + +============================================== +``intel_idle`` CPU Idle Time Management Driver +============================================== + +:Copyright: |copy| 2020 Intel Corporation + +:Author: Rafael J. Wysocki + + +General Information +=================== + +``intel_idle`` is a part of the +:doc:`CPU idle time management subsystem ` in the Linux kernel +(``CPUIdle``). It is the default CPU idle time management driver for the +Nehalem and later generations of Intel processors, but the level of support for +a particular processor model in it depends on whether or not it recognizes that +processor model and may also depend on information coming from the platform +firmware. [To understand ``intel_idle`` it is necessary to know how ``CPUIdle`` +works in general, so this is the time to get familiar with :doc:`cpuidle` if you +have not done that yet.] + +``intel_idle`` uses the ``MWAIT`` instruction to inform the processor that the +logical CPU executing it is idle and so it may be possible to put some of the +processor's functional blocks into low-power states. That instruction takes two +arguments (passed in the ``EAX`` and ``ECX`` registers of the target CPU), the +first of which, referred to as a *hint*, can be used by the processor to +determine what can be done (for details refer to Intel Software Developer’s +Manual [1]_). Accordingly, ``intel_idle`` refuses to work with processors in +which the support for the ``MWAIT`` instruction has been disabled (for example, +via the platform firmware configuration menu) or which do not support that +instruction at all. + +``intel_idle`` is not modular, so it cannot be unloaded, which means that the +only way to pass early-configuration-time parameters to it is via the kernel +command line. + + +.. _intel-idle-enumeration-of-states: + +Enumeration of Idle States +========================== + +Each ``MWAIT`` hint value is interpreted by the processor as a license to +reconfigure itself in a certain way in order to save energy. The processor +configurations (with reduced power draw) resulting from that are referred to +as C-states (in the ACPI terminology) or idle states. The list of meaningful +``MWAIT`` hint values and idle states (i.e. low-power configurations of the +processor) corresponding to them depends on the processor model and it may also +depend on the configuration of the platform. + +In order to create a list of available idle states required by the ``CPUIdle`` +subsystem (see :ref:`idle-states-representation` in :doc:`cpuidle`), +``intel_idle`` can use two sources of information: static tables of idle states +for different processor models included in the driver itself and the ACPI tables +of the system. The former are always used if the processor model at hand is +recognized by ``intel_idle`` and the latter are used if that is required for +the given processor model (which is the case for all server processor models +recognized by ``intel_idle``) or if the processor model is not recognized. + +If the ACPI tables are going to be used for building the list of available idle +states, ``intel_idle`` first looks for a ``_CST`` object under one of the ACPI +objects corresponding to the CPUs in the system (refer to the ACPI specification +[2]_ for the description of ``_CST`` and its output package). Because the +``CPUIdle`` subsystem expects that the list of idle states supplied by the +driver will be suitable for all of the CPUs handled by it and ``intel_idle`` is +registered as the ``CPUIdle`` driver for all of the CPUs in the system, the +driver looks for the first ``_CST`` object returning at least one valid idle +state description and such that all of the idle states included in its return +package are of the FFH (Functional Fixed Hardware) type, which means that the +``MWAIT`` instruction is expected to be used to tell the processor that it can +enter one of them. The return package of that ``_CST`` is then assumed to be +applicable to all of the other CPUs in the system and the idle state +descriptions extracted from it are stored in a preliminary list of idle states +coming from the ACPI tables. [This step is skipped if ``intel_idle`` is +configured to ignore the ACPI tables; see `below `_.] + +Next, the first (index 0) entry in the list of available idle states is +initialized to represent a "polling idle state" (a pseudo-idle state in which +the target CPU continuously fetches and executes instructions), and the +subsequent (real) idle state entries are populated as follows. + +If the processor model at hand is recognized by ``intel_idle``, there is a +(static) table of idle state descriptions for it in the driver. In that case, +the "internal" table is the primary source of information on idle states and the +information from it is copied to the final list of available idle states. If +using the ACPI tables for the enumeration of idle states is not required +(depending on the processor model), all of the listed idle state are enabled by +default (so all of them will be taken into consideration by ``CPUIdle`` +governors during CPU idle state selection). Otherwise, some of the listed idle +states may not be enabled by default if there are no matching entries in the +preliminary list of idle states coming from the ACPI tables. In that case user +space still can enable them later (on a per-CPU basis) with the help of +the ``disable`` idle state attribute in ``sysfs`` (see +:ref:`idle-states-representation` in :doc:`cpuidle`). This basically means that +the idle states "known" to the driver may not be enabled by default if they have +not been exposed by the platform firmware (through the ACPI tables). + +If the given processor model is not recognized by ``intel_idle``, but it +supports ``MWAIT``, the preliminary list of idle states coming from the ACPI +tables is used for building the final list that will be supplied to the +``CPUIdle`` core during driver registration. For each idle state in that list, +the description, ``MWAIT`` hint and exit latency are copied to the corresponding +entry in the final list of idle states. The name of the idle state represented +by it (to be returned by the ``name`` idle state attribute in ``sysfs``) is +"CX_ACPI", where X is the index of that idle state in the final list (note that +the minimum value of X is 1, because 0 is reserved for the "polling" state), and +its target residency is based on the exit latency value. Specifically, for +C1-type idle states the exit latency value is also used as the target residency +(for compatibility with the majority of the "internal" tables of idle states for +various processor models recognized by ``intel_idle``) and for the other idle +state types (C2 and C3) the target residency value is 3 times the exit latency +(again, that is because it reflects the target residency to exit latency ratio +in the majority of cases for the processor models recognized by ``intel_idle``). +All of the idle states in the final list are enabled by default in this case. + + +.. _intel-idle-initialization: + +Initialization +============== + +The initialization of ``intel_idle`` starts with checking if the kernel command +line options forbid the use of the ``MWAIT`` instruction. If that is the case, +an error code is returned right away. + +The next step is to check whether or not the processor model is known to the +driver, which determines the idle states enumeration method (see +`above `_), and whether or not the processor +supports ``MWAIT`` (the initialization fails if that is not the case). Then, +the ``MWAIT`` support in the processor is enumerated through ``CPUID`` and the +driver initialization fails if the level of support is not as expected (for +example, if the total number of ``MWAIT`` substates returned is 0). + +Next, if the driver is not configured to ignore the ACPI tables (see +`below `_), the idle states information provided by the +platform firmware is extracted from them. + +Then, ``CPUIdle`` device objects are allocated for all CPUs and the list of +available idle states is created as explained +`above `_. + +Finally, ``intel_idle`` is registered with the help of cpuidle_register_driver() +as the ``CPUIdle`` driver for all CPUs in the system and a CPU online callback +for configuring individual CPUs is registered via cpuhp_setup_state(), which +(among other things) causes the callback routine to be invoked for all of the +CPUs present in the system at that time (each CPU executes its own instance of +the callback routine). That routine registers a ``CPUIdle`` device for the CPU +running it (which enables the ``CPUIdle`` subsystem to operate that CPU) and +optionally performs some CPU-specific initialization actions that may be +required for the given processor model. + + +.. _intel-idle-parameters: + +Kernel Command Line Options and Module Parameters +================================================= + +The *x86* architecture support code recognizes three kernel command line +options related to CPU idle time management: ``idle=poll``, ``idle=halt``, +and ``idle=nomwait``. If any of them is present in the kernel command line, the +``MWAIT`` instruction is not allowed to be used, so the initialization of +``intel_idle`` will fail. + +Apart from that there are two module parameters recognized by ``intel_idle`` +itself that can be set via the kernel command line (they cannot be updated via +sysfs, so that is the only way to change their values). + +The ``max_cstate`` parameter value is the maximum idle state index in the list +of idle states supplied to the ``CPUIdle`` core during the registration of the +driver. It is also the maximum number of regular (non-polling) idle states that +can be used by ``intel_idle``, so the enumeration of idle states is terminated +after finding that number of usable idle states (the other idle states that +potentially might have been used if ``max_cstate`` had been greater are not +taken into consideration at all). Setting ``max_cstate`` can prevent +``intel_idle`` from exposing idle states that are regarded as "too deep" for +some reason to the ``CPUIdle`` core, but it does so by making them effectively +invisible until the system is shut down and started again which may not always +be desirable. In practice, it is only really necessary to do that if the idle +states in question cannot be enabled during system startup, because in the +working state of the system the CPU power management quality of service (PM +QoS) feature can be used to prevent ``CPUIdle`` from touching those idle states +even if they have been enumerated (see :ref:`cpu-pm-qos` in :doc:`cpuidle`). +Setting ``max_cstate`` to 0 causes the ``intel_idle`` initialization to fail. + +The ``noacpi`` module parameter (which is recognized by ``intel_idle`` if the +kernel has been configured with ACPI support), can be set to make the driver +ignore the system's ACPI tables entirely (it is unset by default). + + +.. _intel-idle-core-and-package-idle-states: + +Core and Package Levels of Idle States +====================================== + +Typically, in a processor supporting the ``MWAIT`` instruction there are (at +least) two levels of idle states (or C-states). One level, referred to as +"core C-states", covers individual cores in the processor, whereas the other +level, referred to as "package C-states", covers the entire processor package +and it may also involve other components of the system (GPUs, memory +controllers, I/O hubs etc.). + +Some of the ``MWAIT`` hint values allow the processor to use core C-states only +(most importantly, that is the case for the ``MWAIT`` hint value corresponding +to the ``C1`` idle state), but the majority of them give it a license to put +the target core (i.e. the core containing the logical CPU executing ``MWAIT`` +with the given hint value) into a specific core C-state and then (if possible) +to enter a specific package C-state at the deeper level. For example, the +``MWAIT`` hint value representing the ``C3`` idle state allows the processor to +put the target core into the low-power state referred to as "core ``C3``" (or +``CC3``), which happens if all of the logical CPUs (SMT siblings) in that core +have executed ``MWAIT`` with the ``C3`` hint value (or with a hint value +representing a deeper idle state), and in addition to that (in the majority of +cases) it gives the processor a license to put the entire package (possibly +including some non-CPU components such as a GPU or a memory controller) into the +low-power state referred to as "package ``C3``" (or ``PC3``), which happens if +all of the cores have gone into the ``CC3`` state and (possibly) some additional +conditions are satisfied (for instance, if the GPU is covered by ``PC3``, it may +be required to be in a certain GPU-specific low-power state for ``PC3`` to be +reachable). + +As a rule, there is no simple way to make the processor use core C-states only +if the conditions for entering the corresponding package C-states are met, so +the logical CPU executing ``MWAIT`` with a hint value that is not core-level +only (like for ``C1``) must always assume that this may cause the processor to +enter a package C-state. [That is why the exit latency and target residency +values corresponding to the majority of ``MWAIT`` hint values in the "internal" +tables of idle states in ``intel_idle`` reflect the properties of package +C-states.] If using package C-states is not desirable at all, either +:ref:`PM QoS ` or the ``max_cstate`` module parameter of +``intel_idle`` described `above `_ must be used to +restrict the range of permissible idle states to the ones with core-level only +``MWAIT`` hint values (like ``C1``). + + +References +========== + +.. [1] *Intel® 64 and IA-32 Architectures Software Developer’s Manual Volume 2B*, + https://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-vol-2b-manual.html + +.. [2] *Advanced Configuration and Power Interface (ACPI) Specification*, + https://uefi.org/specifications diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst index fc298eb1234b..88f717e59a42 100644 --- a/Documentation/admin-guide/pm/working-state.rst +++ b/Documentation/admin-guide/pm/working-state.rst @@ -8,6 +8,7 @@ Working-State Power Management :maxdepth: 2 cpuidle + intel_idle cpufreq intel_pstate intel_epb From 32014c86d4e1639b430d1f833f0848b99638ca39 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Mon, 20 Jan 2020 14:24:08 +0100 Subject: [PATCH 17/27] cpuidle: coupled: fix warnings when compiling with W=1 Fix warnings that show up when compiling with W=1 Signed-off-by: Benjamin Gaignard Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/coupled.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c index b607278df25b..04003b90dc49 100644 --- a/drivers/cpuidle/coupled.c +++ b/drivers/cpuidle/coupled.c @@ -89,6 +89,7 @@ * @coupled_cpus: mask of cpus that are part of the coupled set * @requested_state: array of requested states for cpus in the coupled set * @ready_waiting_counts: combined count of cpus in ready or waiting loops + * @abort_barrier: synchronisation point for abort cases * @online_count: count of cpus that are online * @refcnt: reference count of cpuidle devices that are using this struct * @prevent: flag to prevent coupled idle while a cpu is hotplugging @@ -338,7 +339,7 @@ static void cpuidle_coupled_poke(int cpu) /** * cpuidle_coupled_poke_others - wake up all other cpus that may be waiting - * @dev: struct cpuidle_device for this cpu + * @this_cpu: target cpu * @coupled: the struct coupled that contains the current cpu * * Calls cpuidle_coupled_poke on all other online cpus. @@ -355,7 +356,7 @@ static void cpuidle_coupled_poke_others(int this_cpu, /** * cpuidle_coupled_set_waiting - mark this cpu as in the wait loop - * @dev: struct cpuidle_device for this cpu + * @cpu: target cpu * @coupled: the struct coupled that contains the current cpu * @next_state: the index in drv->states of the requested state for this cpu * @@ -376,7 +377,7 @@ static int cpuidle_coupled_set_waiting(int cpu, /** * cpuidle_coupled_set_not_waiting - mark this cpu as leaving the wait loop - * @dev: struct cpuidle_device for this cpu + * @cpu: target cpu * @coupled: the struct coupled that contains the current cpu * * Removes the requested idle state for the specified cpuidle device. @@ -412,7 +413,7 @@ static void cpuidle_coupled_set_done(int cpu, struct cpuidle_coupled *coupled) /** * cpuidle_coupled_clear_pokes - spin until the poke interrupt is processed - * @cpu - this cpu + * @cpu: this cpu * * Turns on interrupts and spins until any outstanding poke interrupts have * been processed and the poke bit has been cleared. From a09da3fbc17f36feac00fea37a225596ae302360 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Mon, 20 Jan 2020 14:33:59 +0100 Subject: [PATCH 18/27] cpuidle: sysfs: fix warnings when compiling with W=1 Fix kernel documentation comments to remove warnings when compiling with W=1. Signed-off-by: Benjamin Gaignard Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/sysfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 38ef770be90d..1909584e1e50 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -142,6 +142,7 @@ static struct attribute_group cpuidle_attr_group = { /** * cpuidle_add_interface - add CPU global sysfs attributes + * @dev: the target device */ int cpuidle_add_interface(struct device *dev) { @@ -153,6 +154,7 @@ int cpuidle_add_interface(struct device *dev) /** * cpuidle_remove_interface - remove CPU global sysfs attributes + * @dev: the target device */ void cpuidle_remove_interface(struct device *dev) { @@ -615,7 +617,7 @@ static struct kobj_type ktype_driver_cpuidle = { /** * cpuidle_add_driver_sysfs - adds the driver name sysfs attribute - * @device: the target device + * @dev: the target device */ static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev) { @@ -646,7 +648,7 @@ static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev) /** * cpuidle_remove_driver_sysfs - removes the driver name sysfs attribute - * @device: the target device + * @dev: the target device */ static void cpuidle_remove_driver_sysfs(struct cpuidle_device *dev) { From cefb9409ff995fcc98ce44a07b549ba1b827c31b Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Tue, 21 Jan 2020 09:27:58 +0100 Subject: [PATCH 19/27] cpuidle: fix cpuidle_find_deepest_state() kerneldoc warnings Fix cpuidle_find_deepest_state() kernel documentation to avoid warnings when compiling with W=1. Signed-off-by: Benjamin Gaignard Acked-by: Randy Dunlap Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 33d19c8eb027..ad064d84da5e 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -121,6 +121,9 @@ void cpuidle_use_deepest_state(u64 latency_limit_ns) * cpuidle_find_deepest_state - Find the deepest available idle state. * @drv: cpuidle driver for the given CPU. * @dev: cpuidle device for the given CPU. + * @latency_limit_ns: Idle state exit latency limit + * + * Return: the index of the deepest available idle state. */ int cpuidle_find_deepest_state(struct cpuidle_driver *drv, struct cpuidle_device *dev, From cbd2c4c25d7e0ba7afe42f2397865b164d1109eb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 10 Jan 2020 11:43:23 +0100 Subject: [PATCH 20/27] intel_idle: Eliminate __setup_broadcast_timer() The __setup_broadcast_timer() static function is only called in one place and "true" is passed to it as the argument in there, so effectively it is a wrapper arround tick_broadcast_enable(). To simplify the code, call tick_broadcast_enable() directly instead of __setup_broadcast_timer() and drop the latter. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 1467490adfc3..33e353bdd4b6 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -967,14 +967,6 @@ static bool intel_idle_verify_cstate(unsigned int mwait_hint) return true; } -static void __setup_broadcast_timer(bool on) -{ - if (on) - tick_broadcast_enable(); - else - tick_broadcast_disable(); -} - static void auto_demotion_disable(void) { unsigned long long msr_bits; @@ -1624,7 +1616,7 @@ static int intel_idle_cpu_online(unsigned int cpu) struct cpuidle_device *dev; if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) - __setup_broadcast_timer(true); + tick_broadcast_enable(); /* * Some systems can hotplug a cpu at runtime after From a6c86e336282c67a0b04f64b3b5574794249e3e1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 10 Jan 2020 11:44:58 +0100 Subject: [PATCH 21/27] intel_idle: Fold intel_idle_probe() into intel_idle_init() There is no particular reason why intel_idle_probe() needs to be a separate function and folding it into intel_idle_init() causes the code to be somewhat easier to follow, so do just that. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 97 +++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 55 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 33e353bdd4b6..690309c78dfb 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1296,58 +1296,6 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ -/* - * intel_idle_probe() - */ -static int __init intel_idle_probe(void) -{ - unsigned int eax, ebx, ecx; - const struct x86_cpu_id *id; - - if (max_cstate == 0) { - pr_debug("disabled\n"); - return -EPERM; - } - - id = x86_match_cpu(intel_idle_ids); - if (id) { - if (!boot_cpu_has(X86_FEATURE_MWAIT)) { - pr_debug("Please enable MWAIT in BIOS SETUP\n"); - return -ENODEV; - } - } else { - id = x86_match_cpu(intel_mwait_ids); - if (!id) - return -ENODEV; - } - - if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) - return -ENODEV; - - cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); - - if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || - !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || - !mwait_substates) - return -ENODEV; - - pr_debug("MWAIT substates: 0x%x\n", mwait_substates); - - icpu = (const struct idle_cpu *)id->driver_data; - if (icpu) { - cpuidle_state_table = icpu->state_table; - if (icpu->use_acpi) - intel_idle_acpi_cst_extract(); - } else if (!intel_idle_acpi_cst_extract()) { - return -ENODEV; - } - - pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", - boot_cpu_data.x86_model); - - return 0; -} - /* * intel_idle_cpuidle_devices_uninit() * Unregisters the cpuidle devices. @@ -1632,15 +1580,54 @@ static int intel_idle_cpu_online(unsigned int cpu) static int __init intel_idle_init(void) { + const struct x86_cpu_id *id; + unsigned int eax, ebx, ecx; int retval; /* Do not load intel_idle at all for now if idle= is passed */ if (boot_option_idle_override != IDLE_NO_OVERRIDE) return -ENODEV; - retval = intel_idle_probe(); - if (retval) - return retval; + if (max_cstate == 0) { + pr_debug("disabled\n"); + return -EPERM; + } + + id = x86_match_cpu(intel_idle_ids); + if (id) { + if (!boot_cpu_has(X86_FEATURE_MWAIT)) { + pr_debug("Please enable MWAIT in BIOS SETUP\n"); + return -ENODEV; + } + } else { + id = x86_match_cpu(intel_mwait_ids); + if (!id) + return -ENODEV; + } + + if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) + return -ENODEV; + + cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); + + if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || + !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || + !mwait_substates) + return -ENODEV; + + pr_debug("MWAIT substates: 0x%x\n", mwait_substates); + + icpu = (const struct idle_cpu *)id->driver_data; + if (icpu) { + cpuidle_state_table = icpu->state_table; + if (icpu->use_acpi) + intel_idle_acpi_cst_extract(); + } else if (!intel_idle_acpi_cst_extract()) { + return -ENODEV; + } + + pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", + boot_cpu_data.x86_model); intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); if (intel_idle_cpuidle_devices == NULL) From 533da74a8c8d75af7958f4aa1abc944f061d1d85 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 10 Jan 2020 11:45:49 +0100 Subject: [PATCH 22/27] intel_idle: Clean up NULL pointer check in intel_idle_init() Instead of comparing intel_idle_cpuidle_devices with NULL apply the "!" (not) operator to it when checking it against NULL. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 690309c78dfb..fddb72e5f6fa 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1630,7 +1630,7 @@ static int __init intel_idle_init(void) boot_cpu_data.x86_model); intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); - if (intel_idle_cpuidle_devices == NULL) + if (!intel_idle_cpuidle_devices) return -ENOMEM; intel_idle_cpuidle_driver_init(); From 3d3a1ae9b4bedc3e8c88ff4baeb0a195cb447fa1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 10 Jan 2020 11:48:25 +0100 Subject: [PATCH 23/27] intel_idle: Rearrange intel_idle_cpuidle_driver_init() Notice that intel_idle_state_table_update() only needs to be called if icpu is not NULL, so fold it into intel_idle_init_cstates_icpu(), and pass a pointer to the driver object to intel_idle_cpuidle_driver_init() as an argument instead of referencing it locally in there. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index fddb72e5f6fa..c535b0b55ea5 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1447,16 +1447,12 @@ static void sklh_idle_state_table_update(void) skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */ skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ } -/* - * intel_idle_state_table_update() - * - * Update the default state_table for this CPU-id - */ -static void intel_idle_state_table_update(void) +static void intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) { - switch (boot_cpu_data.x86_model) { + int cstate; + switch (boot_cpu_data.x86_model) { case INTEL_FAM6_IVYBRIDGE_X: ivt_idle_state_table_update(); break; @@ -1468,11 +1464,6 @@ static void intel_idle_state_table_update(void) sklh_idle_state_table_update(); break; } -} - -static void intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) -{ - int cstate; for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { unsigned int mwait_hint; @@ -1515,12 +1506,8 @@ static void intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) * intel_idle_cpuidle_driver_init() * allocate, initialize cpuidle_states */ -static void __init intel_idle_cpuidle_driver_init(void) +static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) { - struct cpuidle_driver *drv = &intel_idle_driver; - - intel_idle_state_table_update(); - cpuidle_poll_state_init(drv); drv->state_count = 1; @@ -1633,7 +1620,8 @@ static int __init intel_idle_init(void) if (!intel_idle_cpuidle_devices) return -ENOMEM; - intel_idle_cpuidle_driver_init(); + intel_idle_cpuidle_driver_init(&intel_idle_driver); + retval = cpuidle_register_driver(&intel_idle_driver); if (retval) { struct cpuidle_driver *drv = cpuidle_get_driver(); From 0755a9bd9963ec3d611129cf2cdbb3e041ccbd6a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 10 Jan 2020 11:49:58 +0100 Subject: [PATCH 24/27] intel_idle: Move and clean up intel_idle_cpuidle_devices_uninit() Move intel_idle_cpuidle_devices_uninit() closer to its caller, intel_idle_init(), add the __init modifier to its header, drop a redundant local variable from it and fix up its kerneldoc comment. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index c535b0b55ea5..9841046dfd32 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1296,21 +1296,6 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ -/* - * intel_idle_cpuidle_devices_uninit() - * Unregisters the cpuidle devices. - */ -static void intel_idle_cpuidle_devices_uninit(void) -{ - int i; - struct cpuidle_device *dev; - - for_each_online_cpu(i) { - dev = per_cpu_ptr(intel_idle_cpuidle_devices, i); - cpuidle_unregister_device(dev); - } -} - /* * ivt_idle_state_table_update(void) * @@ -1565,6 +1550,17 @@ static int intel_idle_cpu_online(unsigned int cpu) return 0; } +/** + * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices. + */ +static void __init intel_idle_cpuidle_devices_uninit(void) +{ + int i; + + for_each_online_cpu(i) + cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); +} + static int __init intel_idle_init(void) { const struct x86_cpu_id *id; From 095928ae484b9f765fb4967c6cf76a2f86fc9787 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 10 Jan 2020 11:51:22 +0100 Subject: [PATCH 25/27] intel_idle: Annotate initialization code and data structures Annotate the functions that are only used at the initialization time with __init and the data structures used by them with __initdata or __initconst. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 9841046dfd32..b796eaea8715 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1148,7 +1148,7 @@ static const struct x86_cpu_id intel_mwait_ids[] __initconst = { {} }; -static bool intel_idle_max_cstate_reached(int cstate) +static bool __init intel_idle_max_cstate_reached(int cstate) { if (cstate + 1 > max_cstate) { pr_info("max_cstate %d reached\n", max_cstate); @@ -1164,7 +1164,7 @@ static bool no_acpi __read_mostly; module_param(no_acpi, bool, 0444); MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); -static struct acpi_processor_power acpi_state_table; +static struct acpi_processor_power acpi_state_table __initdata; /** * intel_idle_cst_usable - Check if the _CST information can be used. @@ -1172,7 +1172,7 @@ static struct acpi_processor_power acpi_state_table; * Check if all of the C-states listed by _CST in the max_cstate range are * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. */ -static bool intel_idle_cst_usable(void) +static bool __init intel_idle_cst_usable(void) { int cstate, limit; @@ -1189,7 +1189,7 @@ static bool intel_idle_cst_usable(void) return true; } -static bool intel_idle_acpi_cst_extract(void) +static bool __init intel_idle_acpi_cst_extract(void) { unsigned int cpu; @@ -1224,7 +1224,7 @@ static bool intel_idle_acpi_cst_extract(void) return false; } -static void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) +static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); @@ -1268,7 +1268,7 @@ static void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) } } -static bool intel_idle_off_by_default(u32 mwait_hint) +static bool __init intel_idle_off_by_default(u32 mwait_hint) { int cstate, limit; @@ -1302,7 +1302,7 @@ static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } * Tune IVT multi-socket targets * Assumption: num_sockets == (max_package_num + 1) */ -static void ivt_idle_state_table_update(void) +static void __init ivt_idle_state_table_update(void) { /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ int cpu, package_num, num_sockets = 1; @@ -1329,10 +1329,11 @@ static void ivt_idle_state_table_update(void) * Translate IRTL (Interrupt Response Time Limit) MSR to usec */ -static unsigned int irtl_ns_units[] = { - 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; +static const unsigned int irtl_ns_units[] __initconst = { + 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 +}; -static unsigned long long irtl_2_usec(unsigned long long irtl) +static unsigned long long __init irtl_2_usec(unsigned long long irtl) { unsigned long long ns; @@ -1349,7 +1350,7 @@ static unsigned long long irtl_2_usec(unsigned long long irtl) * On BXT, we trust the IRTL to show the definitive maximum latency * We use the same value for target_residency. */ -static void bxt_idle_state_table_update(void) +static void __init bxt_idle_state_table_update(void) { unsigned long long msr; unsigned int usec; @@ -1396,7 +1397,7 @@ static void bxt_idle_state_table_update(void) * On SKL-H (model 0x5e) disable C8 and C9 if: * C10 is enabled and SGX disabled */ -static void sklh_idle_state_table_update(void) +static void __init sklh_idle_state_table_update(void) { unsigned long long msr; unsigned int eax, ebx, ecx, edx; @@ -1433,7 +1434,7 @@ static void sklh_idle_state_table_update(void) skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ } -static void intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) +static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) { int cstate; From 1aefbd7aeb7695c1a9e6ece9a1612ee88e1ea3f8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 10 Jan 2020 11:52:32 +0100 Subject: [PATCH 26/27] intel_idle: Move 3 functions closer to their callers Move intel_idle_verify_cstate(), auto_demotion_disable() and c1e_promotion_disable() closer to their callers. While at it, annotate intel_idle_verify_cstate() with __init, as it is only used during the initialization of the driver. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 67 ++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index b796eaea8715..27c50fba26d5 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -951,39 +951,6 @@ static void intel_idle_s2idle(struct cpuidle_device *dev, mwait_idle_with_hints(eax, ecx); } -static bool intel_idle_verify_cstate(unsigned int mwait_hint) -{ - unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; - unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & - MWAIT_SUBSTATE_MASK; - - /* Ignore the C-state if there are NO sub-states in CPUID for it. */ - if (num_substates == 0) - return false; - - if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) - mark_tsc_unstable("TSC halts in idle states deeper than C2"); - - return true; -} - -static void auto_demotion_disable(void) -{ - unsigned long long msr_bits; - - rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); - msr_bits &= ~(icpu->auto_demotion_disable_flags); - wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); -} -static void c1e_promotion_disable(void) -{ - unsigned long long msr_bits; - - rdmsrl(MSR_IA32_POWER_CTL, msr_bits); - msr_bits &= ~0x2; - wrmsrl(MSR_IA32_POWER_CTL, msr_bits); -} - static const struct idle_cpu idle_cpu_nehalem = { .state_table = nehalem_cstates, .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, @@ -1434,6 +1401,22 @@ static void __init sklh_idle_state_table_update(void) skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ } +static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) +{ + unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; + unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & + MWAIT_SUBSTATE_MASK; + + /* Ignore the C-state if there are NO sub-states in CPUID for it. */ + if (num_substates == 0) + return false; + + if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + mark_tsc_unstable("TSC halts in idle states deeper than C2"); + + return true; +} + static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) { int cstate; @@ -1503,6 +1486,24 @@ static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) intel_idle_init_cstates_acpi(drv); } +static void auto_demotion_disable(void) +{ + unsigned long long msr_bits; + + rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); + msr_bits &= ~(icpu->auto_demotion_disable_flags); + wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); +} + +static void c1e_promotion_disable(void) +{ + unsigned long long msr_bits; + + rdmsrl(MSR_IA32_POWER_CTL, msr_bits); + msr_bits &= ~0x2; + wrmsrl(MSR_IA32_POWER_CTL, msr_bits); +} + /* * intel_idle_cpu_init() * allocate, initialize, register cpuidle_devices From 86e9466ae620824dee861f0f6ed933fcd0b449f3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Jan 2020 11:46:24 +0100 Subject: [PATCH 27/27] intel_idle: Clean up irtl_2_usec() Move the irtl_ns_units[] definition into irtl_2_usec() which is the only user of it, use div_u64() for the division in there (as the divisor is small enough) and use the NSEC_PER_USEC symbol for the divisor. Also convert the irtl_2_usec() comment to a proper kerneldo one. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 27c50fba26d5..9f38ff02a7b8 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1292,16 +1292,17 @@ static void __init ivt_idle_state_table_update(void) /* else, 1 and 2 socket systems use default ivt_cstates */ } -/* - * Translate IRTL (Interrupt Response Time Limit) MSR to usec +/** + * irtl_2_usec - IRTL to microseconds conversion. + * @irtl: IRTL MSR value. + * + * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds. */ - -static const unsigned int irtl_ns_units[] __initconst = { - 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 -}; - static unsigned long long __init irtl_2_usec(unsigned long long irtl) { + static const unsigned int irtl_ns_units[] __initconst = { + 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 + }; unsigned long long ns; if (!irtl) @@ -1309,8 +1310,9 @@ static unsigned long long __init irtl_2_usec(unsigned long long irtl) ns = irtl_ns_units[(irtl >> 10) & 0x7]; - return div64_u64((irtl & 0x3FF) * ns, 1000); + return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); } + /* * bxt_idle_state_table_update(void) *