From 6319aee10e530315689db7609a7d4c444124ff22 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 8 May 2019 15:19:13 +0530 Subject: [PATCH 01/57] opp: Attach genpds to devices from within OPP core The OPP core requires the virtual device pointers to set performance state on behalf of the device, for the multiple power domain case. The genpd API (dev_pm_domain_attach_by_name()) has evolved now to support even the single power domain case and that lets us add common code for handling both the cases more efficiently. The virtual device structure returned by dev_pm_domain_attach_by_name() isn't normally used by the cpufreq drivers as they don't manage power on/off of the domains and so is only useful for the OPP core. This patch moves all the complexity into the OPP core to make the end drivers simple. The earlier APIs dev_pm_opp_{set|put}_genpd_virt_dev() are reworked into dev_pm_opp_{attach|detach}_genpd(). The new helper dev_pm_opp_attach_genpd() accepts a NULL terminated array of strings which contains names of all the genpd's to attach. It then attaches all the domains and saves the pointers to the virtual devices. The other helper undo the work done by this helper. Tested-by: Niklas Cassel Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 134 ++++++++++++++++++++++++++--------------- include/linux/pm_opp.h | 8 +-- 2 files changed, 89 insertions(+), 53 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 0e7703fe733f..67d6b0caeab1 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1744,91 +1744,127 @@ void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table) } EXPORT_SYMBOL_GPL(dev_pm_opp_unregister_set_opp_helper); +static void _opp_detach_genpd(struct opp_table *opp_table) +{ + int index; + + for (index = 0; index < opp_table->required_opp_count; index++) { + if (!opp_table->genpd_virt_devs[index]) + continue; + + dev_pm_domain_detach(opp_table->genpd_virt_devs[index], false); + opp_table->genpd_virt_devs[index] = NULL; + } +} + /** - * dev_pm_opp_set_genpd_virt_dev - Set virtual genpd device for an index - * @dev: Consumer device for which the genpd device is getting set. - * @virt_dev: virtual genpd device. - * @index: index. + * dev_pm_opp_attach_genpd - Attach genpd(s) for the device and save virtual device pointer + * @dev: Consumer device for which the genpd is getting attached. + * @names: Null terminated array of pointers containing names of genpd to attach. * * Multiple generic power domains for a device are supported with the help of * virtual genpd devices, which are created for each consumer device - genpd * pair. These are the device structures which are attached to the power domain * and are required by the OPP core to set the performance state of the genpd. + * The same API also works for the case where single genpd is available and so + * we don't need to support that separately. * * This helper will normally be called by the consumer driver of the device - * "dev", as only that has details of the genpd devices. + * "dev", as only that has details of the genpd names. * - * This helper needs to be called once for each of those virtual devices, but - * only if multiple domains are available for a device. Otherwise the original - * device structure will be used instead by the OPP core. + * This helper needs to be called once with a list of all genpd to attach. + * Otherwise the original device structure will be used instead by the OPP core. */ -struct opp_table *dev_pm_opp_set_genpd_virt_dev(struct device *dev, - struct device *virt_dev, - int index) +struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names) { struct opp_table *opp_table; + struct device *virt_dev; + int index, ret = -EINVAL; + const char **name = names; opp_table = dev_pm_opp_get_opp_table(dev); if (!opp_table) return ERR_PTR(-ENOMEM); - mutex_lock(&opp_table->genpd_virt_dev_lock); - - if (unlikely(!opp_table->genpd_virt_devs || - index >= opp_table->required_opp_count || - opp_table->genpd_virt_devs[index])) { - - dev_err(dev, "Invalid request to set required device\n"); - dev_pm_opp_put_opp_table(opp_table); - mutex_unlock(&opp_table->genpd_virt_dev_lock); - - return ERR_PTR(-EINVAL); + /* + * If the genpd's OPP table isn't already initialized, parsing of the + * required-opps fail for dev. We should retry this after genpd's OPP + * table is added. + */ + if (!opp_table->required_opp_count) { + ret = -EPROBE_DEFER; + goto put_table; + } + + mutex_lock(&opp_table->genpd_virt_dev_lock); + + while (*name) { + index = of_property_match_string(dev->of_node, + "power-domain-names", *name); + if (index < 0) { + dev_err(dev, "Failed to find power domain: %s (%d)\n", + *name, index); + goto err; + } + + if (index >= opp_table->required_opp_count) { + dev_err(dev, "Index can't be greater than required-opp-count - 1, %s (%d : %d)\n", + *name, opp_table->required_opp_count, index); + goto err; + } + + if (opp_table->genpd_virt_devs[index]) { + dev_err(dev, "Genpd virtual device already set %s\n", + *name); + goto err; + } + + virt_dev = dev_pm_domain_attach_by_name(dev, *name); + if (IS_ERR(virt_dev)) { + ret = PTR_ERR(virt_dev); + dev_err(dev, "Couldn't attach to pm_domain: %d\n", ret); + goto err; + } + + opp_table->genpd_virt_devs[index] = virt_dev; + name++; } - opp_table->genpd_virt_devs[index] = virt_dev; mutex_unlock(&opp_table->genpd_virt_dev_lock); return opp_table; + +err: + _opp_detach_genpd(opp_table); + mutex_unlock(&opp_table->genpd_virt_dev_lock); + +put_table: + dev_pm_opp_put_opp_table(opp_table); + + return ERR_PTR(ret); } +EXPORT_SYMBOL_GPL(dev_pm_opp_attach_genpd); /** - * dev_pm_opp_put_genpd_virt_dev() - Releases resources blocked for genpd device. - * @opp_table: OPP table returned by dev_pm_opp_set_genpd_virt_dev(). - * @virt_dev: virtual genpd device. + * dev_pm_opp_detach_genpd() - Detach genpd(s) from the device. + * @opp_table: OPP table returned by dev_pm_opp_attach_genpd(). * - * This releases the resource previously acquired with a call to - * dev_pm_opp_set_genpd_virt_dev(). The consumer driver shall call this helper - * if it doesn't want OPP core to update performance state of a power domain - * anymore. + * This detaches the genpd(s), resets the virtual device pointers, and puts the + * OPP table. */ -void dev_pm_opp_put_genpd_virt_dev(struct opp_table *opp_table, - struct device *virt_dev) +void dev_pm_opp_detach_genpd(struct opp_table *opp_table) { - int i; - /* * Acquire genpd_virt_dev_lock to make sure virt_dev isn't getting * used in parallel. */ mutex_lock(&opp_table->genpd_virt_dev_lock); - - for (i = 0; i < opp_table->required_opp_count; i++) { - if (opp_table->genpd_virt_devs[i] != virt_dev) - continue; - - opp_table->genpd_virt_devs[i] = NULL; - dev_pm_opp_put_opp_table(opp_table); - - /* Drop the vote */ - dev_pm_genpd_set_performance_state(virt_dev, 0); - break; - } - + _opp_detach_genpd(opp_table); mutex_unlock(&opp_table->genpd_virt_dev_lock); - if (unlikely(i == opp_table->required_opp_count)) - dev_err(virt_dev, "Failed to find required device entry\n"); + dev_pm_opp_put_opp_table(opp_table); } +EXPORT_SYMBOL_GPL(dev_pm_opp_detach_genpd); /** * dev_pm_opp_xlate_performance_state() - Find required OPP's pstate for src_table. diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index b150fe97ce5a..be570761b77a 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -131,8 +131,8 @@ struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char * name); void dev_pm_opp_put_clkname(struct opp_table *opp_table); struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table); -struct opp_table *dev_pm_opp_set_genpd_virt_dev(struct device *dev, struct device *virt_dev, int index); -void dev_pm_opp_put_genpd_virt_dev(struct opp_table *opp_table, struct device *virt_dev); +struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names); +void dev_pm_opp_detach_genpd(struct opp_table *opp_table); int dev_pm_opp_xlate_performance_state(struct opp_table *src_table, struct opp_table *dst_table, unsigned int pstate); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask); @@ -295,12 +295,12 @@ static inline struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const static inline void dev_pm_opp_put_clkname(struct opp_table *opp_table) {} -static inline struct opp_table *dev_pm_opp_set_genpd_virt_dev(struct device *dev, struct device *virt_dev, int index) +static inline struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names) { return ERR_PTR(-ENOTSUPP); } -static inline void dev_pm_opp_put_genpd_virt_dev(struct opp_table *opp_table, struct device *virt_dev) {} +static inline void dev_pm_opp_detach_genpd(struct opp_table *opp_table) {} static inline int dev_pm_opp_xlate_performance_state(struct opp_table *src_table, struct opp_table *dst_table, unsigned int pstate) { From c0ab9e0812da8e2134dd63d030c8a8abd2112a5a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 8 May 2019 15:43:36 +0530 Subject: [PATCH 02/57] opp: Allocate genpd_virt_devs from dev_pm_opp_attach_genpd() Currently the space for the array of virtual devices is allocated along with the OPP table, but that isn't going to work well from now onwards. For single power domain case, a driver can either use the original device structure for setting the performance state (if genpd attached with dev_pm_domain_attach()) or use the virtual device structure (if genpd attached with dev_pm_domain_attach_by_name(), which returns the virtual device) and so we can't know in advance if we are going to need genpd_virt_devs array or not. Lets delay the allocation a bit and do it along with dev_pm_opp_attach_genpd() rather. The deallocation is done from dev_pm_opp_detach_genpd(). Tested-by: Niklas Cassel Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 10 ++++++++++ drivers/opp/of.c | 30 ++---------------------------- 2 files changed, 12 insertions(+), 28 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 67d6b0caeab1..764e05a2fa66 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1755,6 +1755,9 @@ static void _opp_detach_genpd(struct opp_table *opp_table) dev_pm_domain_detach(opp_table->genpd_virt_devs[index], false); opp_table->genpd_virt_devs[index] = NULL; } + + kfree(opp_table->genpd_virt_devs); + opp_table->genpd_virt_devs = NULL; } /** @@ -1798,6 +1801,12 @@ struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names mutex_lock(&opp_table->genpd_virt_dev_lock); + opp_table->genpd_virt_devs = kcalloc(opp_table->required_opp_count, + sizeof(*opp_table->genpd_virt_devs), + GFP_KERNEL); + if (!opp_table->genpd_virt_devs) + goto unlock; + while (*name) { index = of_property_match_string(dev->of_node, "power-domain-names", *name); @@ -1836,6 +1845,7 @@ struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names err: _opp_detach_genpd(opp_table); +unlock: mutex_unlock(&opp_table->genpd_virt_dev_lock); put_table: diff --git a/drivers/opp/of.c b/drivers/opp/of.c index c10c782d15aa..a637f30552a3 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -141,7 +141,6 @@ err: static void _opp_table_free_required_tables(struct opp_table *opp_table) { struct opp_table **required_opp_tables = opp_table->required_opp_tables; - struct device **genpd_virt_devs = opp_table->genpd_virt_devs; int i; if (!required_opp_tables) @@ -155,10 +154,8 @@ static void _opp_table_free_required_tables(struct opp_table *opp_table) } kfree(required_opp_tables); - kfree(genpd_virt_devs); opp_table->required_opp_count = 0; - opp_table->genpd_virt_devs = NULL; opp_table->required_opp_tables = NULL; } @@ -171,9 +168,8 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table, struct device_node *opp_np) { struct opp_table **required_opp_tables; - struct device **genpd_virt_devs = NULL; struct device_node *required_np, *np; - int count, count_pd, i; + int count, i; /* Traversing the first OPP node is all we need */ np = of_get_next_available_child(opp_np, NULL); @@ -186,33 +182,11 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table, if (!count) goto put_np; - /* - * Check the number of power-domains to know if we need to deal - * with virtual devices. In some cases we have devices with multiple - * power domains but with only one of them being scalable, hence - * 'count' could be 1, but we still have to deal with multiple genpds - * and virtual devices. - */ - count_pd = of_count_phandle_with_args(dev->of_node, "power-domains", - "#power-domain-cells"); - if (!count_pd) - goto put_np; - - if (count_pd > 1) { - genpd_virt_devs = kcalloc(count, sizeof(*genpd_virt_devs), - GFP_KERNEL); - if (!genpd_virt_devs) - goto put_np; - } - required_opp_tables = kcalloc(count, sizeof(*required_opp_tables), GFP_KERNEL); - if (!required_opp_tables) { - kfree(genpd_virt_devs); + if (!required_opp_tables) goto put_np; - } - opp_table->genpd_virt_devs = genpd_virt_devs; opp_table->required_opp_tables = required_opp_tables; opp_table->required_opp_count = count; From 4d28ba1d62c48d5242ca30fa0051ab3498bc5c5b Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Mon, 13 May 2019 11:01:38 +0000 Subject: [PATCH 03/57] cpufreq: Add imx-cpufreq-dt driver Right now in upstream imx8m cpufreq support just lists a common subset of OPPs because the higher ones should only be attempted after checking speed grading in fuses. Add a small driver which checks speed grading from nvmem cells before registering cpufreq-dt. This driver allows unlocking all frequencies for imx8mm and imx8mq and could be applied to other chips like imx7d Signed-off-by: Leonard Crestez Signed-off-by: Viresh Kumar --- drivers/cpufreq/Kconfig.arm | 9 +++ drivers/cpufreq/Makefile | 1 + drivers/cpufreq/cpufreq-dt-platdev.c | 3 + drivers/cpufreq/imx-cpufreq-dt.c | 96 ++++++++++++++++++++++++++++ drivers/soc/imx/soc-imx8.c | 3 + 5 files changed, 112 insertions(+) create mode 100644 drivers/cpufreq/imx-cpufreq-dt.c diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 179a1d302f48..982efdf9c7e5 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -92,6 +92,15 @@ config ARM_IMX6Q_CPUFREQ If in doubt, say N. +config ARM_IMX_CPUFREQ_DT + tristate "Freescale i.MX8M cpufreq support" + depends on ARCH_MXC && CPUFREQ_DT + help + This adds cpufreq driver support for Freescale i.MX8M series SoCs, + based on cpufreq-dt. + + If in doubt, say N. + config ARM_KIRKWOOD_CPUFREQ def_bool MACH_KIRKWOOD help diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 689b26c6f949..7bcda2273d0c 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -56,6 +56,7 @@ obj-$(CONFIG_ACPI_CPPC_CPUFREQ) += cppc_cpufreq.o obj-$(CONFIG_ARCH_DAVINCI) += davinci-cpufreq.o obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ) += highbank-cpufreq.o obj-$(CONFIG_ARM_IMX6Q_CPUFREQ) += imx6q-cpufreq.o +obj-$(CONFIG_ARM_IMX_CPUFREQ_DT) += imx-cpufreq-dt.o obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ) += kirkwood-cpufreq.o obj-$(CONFIG_ARM_MEDIATEK_CPUFREQ) += mediatek-cpufreq.o obj-$(CONFIG_MACH_MVEBU_V7) += mvebu-cpufreq.o diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 47729a22c159..19c1aad57e26 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -108,6 +108,9 @@ static const struct of_device_id blacklist[] __initconst = { { .compatible = "calxeda,highbank", }, { .compatible = "calxeda,ecx-2000", }, + { .compatible = "fsl,imx8mq", }, + { .compatible = "fsl,imx8mm", }, + { .compatible = "marvell,armadaxp", }, { .compatible = "mediatek,mt2701", }, diff --git a/drivers/cpufreq/imx-cpufreq-dt.c b/drivers/cpufreq/imx-cpufreq-dt.c new file mode 100644 index 000000000000..e1aa346efa10 --- /dev/null +++ b/drivers/cpufreq/imx-cpufreq-dt.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 NXP + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define OCOTP_CFG3_SPEED_GRADE_SHIFT 8 +#define OCOTP_CFG3_SPEED_GRADE_MASK (0x3 << 8) +#define OCOTP_CFG3_MKT_SEGMENT_SHIFT 6 +#define OCOTP_CFG3_MKT_SEGMENT_MASK (0x3 << 6) + +static const struct of_device_id imx_cpufreq_dt_match_list[] = { + { .compatible = "fsl,imx8mm" }, + { .compatible = "fsl,imx8mq" }, + {} +}; + +/* cpufreq-dt device registered by imx-cpufreq-dt */ +static struct platform_device *cpufreq_dt_pdev; +static struct opp_table *cpufreq_opp_table; + +static int imx_cpufreq_dt_probe(struct platform_device *pdev) +{ + struct device *cpu_dev = get_cpu_device(0); + struct device_node *np; + const struct of_device_id *match; + u32 cell_value, supported_hw[2]; + int speed_grade, mkt_segment; + int ret; + + np = of_find_node_by_path("/"); + match = of_match_node(imx_cpufreq_dt_match_list, np); + of_node_put(np); + if (!match) + return -ENODEV; + + ret = nvmem_cell_read_u32(cpu_dev, "speed_grade", &cell_value); + if (ret) + return ret; + + speed_grade = (cell_value & OCOTP_CFG3_SPEED_GRADE_MASK) >> OCOTP_CFG3_SPEED_GRADE_SHIFT; + mkt_segment = (cell_value & OCOTP_CFG3_MKT_SEGMENT_MASK) >> OCOTP_CFG3_MKT_SEGMENT_SHIFT; + supported_hw[0] = BIT(speed_grade); + supported_hw[1] = BIT(mkt_segment); + dev_info(&pdev->dev, "cpu speed grade %d mkt segment %d supported-hw %#x %#x\n", + speed_grade, mkt_segment, supported_hw[0], supported_hw[1]); + + cpufreq_opp_table = dev_pm_opp_set_supported_hw(cpu_dev, supported_hw, 2); + if (IS_ERR(cpufreq_opp_table)) { + ret = PTR_ERR(cpufreq_opp_table); + dev_err(&pdev->dev, "Failed to set supported opp: %d\n", ret); + return ret; + } + + cpufreq_dt_pdev = platform_device_register_data( + &pdev->dev, "cpufreq-dt", -1, NULL, 0); + if (IS_ERR(cpufreq_dt_pdev)) { + dev_pm_opp_put_supported_hw(cpufreq_opp_table); + ret = PTR_ERR(cpufreq_dt_pdev); + dev_err(&pdev->dev, "Failed to register cpufreq-dt: %d\n", ret); + return ret; + } + + return 0; +} + +static int imx_cpufreq_dt_remove(struct platform_device *pdev) +{ + platform_device_unregister(cpufreq_dt_pdev); + dev_pm_opp_put_supported_hw(cpufreq_opp_table); + + return 0; +} + +static struct platform_driver imx_cpufreq_dt_driver = { + .probe = imx_cpufreq_dt_probe, + .remove = imx_cpufreq_dt_remove, + .driver = { + .name = "imx-cpufreq-dt", + }, +}; +module_platform_driver(imx_cpufreq_dt_driver); + +MODULE_ALIAS("platform:imx-cpufreq-dt"); +MODULE_DESCRIPTION("Freescale i.MX cpufreq speed grading driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/soc/imx/soc-imx8.c b/drivers/soc/imx/soc-imx8.c index fc6429f9170a..b1bd8e2543ac 100644 --- a/drivers/soc/imx/soc-imx8.c +++ b/drivers/soc/imx/soc-imx8.c @@ -103,6 +103,9 @@ static int __init imx8_soc_init(void) if (IS_ERR(soc_dev)) goto free_rev; + if (IS_ENABLED(CONFIG_ARM_IMX_CPUFREQ_DT)) + platform_device_register_simple("imx-cpufreq-dt", -1, NULL, 0); + return 0; free_rev: From a02177a39344b643dccfa3b18fa5c1fc4984e1e5 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Mon, 13 May 2019 11:01:39 +0000 Subject: [PATCH 04/57] dt-bindings: imx-cpufreq-dt: Document opp-supported-hw usage The interpretation of opp-supported-hw bits for imx-cpufreq-dt driver is not very obvious so attempt to explain it. There is no OF compat string associated. Reviewed-by: Rob Herring Signed-off-by: Leonard Crestez Signed-off-by: Viresh Kumar --- .../bindings/cpufreq/imx-cpufreq-dt.txt | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 Documentation/devicetree/bindings/cpufreq/imx-cpufreq-dt.txt diff --git a/Documentation/devicetree/bindings/cpufreq/imx-cpufreq-dt.txt b/Documentation/devicetree/bindings/cpufreq/imx-cpufreq-dt.txt new file mode 100644 index 000000000000..87bff5add3f9 --- /dev/null +++ b/Documentation/devicetree/bindings/cpufreq/imx-cpufreq-dt.txt @@ -0,0 +1,37 @@ +i.MX CPUFreq-DT OPP bindings +================================ + +Certain i.MX SoCs support different OPPs depending on the "market segment" and +"speed grading" value which are written in fuses. These bits are combined with +the opp-supported-hw values for each OPP to check if the OPP is allowed. + +Required properties: +-------------------- + +For each opp entry in 'operating-points-v2' table: +- opp-supported-hw: Two bitmaps indicating: + - Supported speed grade mask + - Supported market segment mask + 0: Consumer + 1: Extended Consumer + 2: Industrial + 3: Automotive + +Example: +-------- + +opp_table { + compatible = "operating-points-v2"; + opp-1000000000 { + opp-hz = /bits/ 64 <1000000000>; + /* grade >= 0, consumer only */ + opp-supported-hw = <0xf>, <0x3>; + }; + + opp-1300000000 { + opp-hz = /bits/ 64 <1300000000>; + opp-microvolt = <1000000>; + /* grade >= 1, all segments */ + opp-supported-hw = <0xe>, <0x7>; + }; +} From 7673896a40693506db49c1e9f9c3a4a8c7e357c5 Mon Sep 17 00:00:00 2001 From: Todd Brandt Date: Tue, 14 May 2019 10:53:57 -0700 Subject: [PATCH 05/57] Update to pm-graph 5.3 sleepgraph: - add support for parsing kernel issues from timeline dmesg logs - with -summary, generate a summary-issues.html for kernel issues found - with -summary, generate a summary-devices.html for device callback times - when recreating a timeline, use -o to set the output html filename - capture mcelog data when hardware errors occur and store in log - add -turbostat option to capture power data during freeze Signed-off-by: Todd Brandt Signed-off-by: Rafael J. Wysocki --- tools/power/pm-graph/sleepgraph.py | 496 ++++++++++++++++++++++------- 1 file changed, 379 insertions(+), 117 deletions(-) diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 52618f3444d4..41d28d63e7c9 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -61,6 +61,7 @@ import ConfigParser import gzip from threading import Thread from subprocess import call, Popen, PIPE +import base64 def pprint(msg): print(msg) @@ -74,7 +75,7 @@ def pprint(msg): # store system values and test parameters class SystemValues: title = 'SleepGraph' - version = '5.2' + version = '5.3' ansi = False rs = 0 display = '' @@ -199,6 +200,7 @@ class SystemValues: 'usleep_range': { 'args_x86_64': {'min':'%di:s32', 'max':'%si:s32'}, 'ub': 1 }, 'mutex_lock_slowpath': { 'func':'__mutex_lock_slowpath', 'ub': 1 }, 'acpi_os_stall': {'ub': 1}, + 'rt_mutex_slowlock': {'ub': 1}, # ACPI 'acpi_resume_power_resources': {}, 'acpi_ps_parse_aml': {}, @@ -344,10 +346,12 @@ class SystemValues: m = info['baseboard-manufacturer'] elif 'system-manufacturer' in info: m = info['system-manufacturer'] - if 'baseboard-product-name' in info: - p = info['baseboard-product-name'] - elif 'system-product-name' in info: + if 'system-product-name' in info: p = info['system-product-name'] + elif 'baseboard-product-name' in info: + p = info['baseboard-product-name'] + if m[:5].lower() == 'intel' and 'baseboard-product-name' in info: + p = info['baseboard-product-name'] if 'processor-version' in info: c = info['processor-version'] if 'bios-version' in info: @@ -688,7 +692,8 @@ class SystemValues: if self.bufsize > 0: tgtsize = self.bufsize elif self.usecallgraph or self.usedevsrc: - bmax = (1*1024*1024) if self.suspendmode == 'disk' else (3*1024*1024) + bmax = (1*1024*1024) if self.suspendmode in ['disk', 'command'] \ + else (3*1024*1024) tgtsize = min(self.memfree, bmax) else: tgtsize = 65536 @@ -776,6 +781,10 @@ class SystemValues: fw = test['fw'] if(fw): fp.write('# fwsuspend %u fwresume %u\n' % (fw[0], fw[1])) + if 'mcelog' in test: + fp.write('# mcelog %s\n' % test['mcelog']) + if 'turbo' in test: + fp.write('# turbostat %s\n' % test['turbo']) if 'bat' in test: (a1, c1), (a2, c2) = test['bat'] fp.write('# battery %s %d %s %d\n' % (a1, c1, a2, c2)) @@ -829,6 +838,56 @@ class SystemValues: if isgz: return gzip.open(filename, mode+'b') return open(filename, mode) + def mcelog(self, clear=False): + cmd = self.getExec('mcelog') + if not cmd: + return '' + if clear: + call(cmd+' > /dev/null 2>&1', shell=True) + return '' + fp = Popen([cmd], stdout=PIPE, stderr=PIPE).stdout + out = fp.read().strip() + fp.close() + if not out: + return '' + return base64.b64encode(out.encode('zlib')) + def haveTurbostat(self): + cmd = self.getExec('turbostat') + if not cmd: + return False + fp = Popen([cmd, '-v'], stdout=PIPE, stderr=PIPE).stderr + out = fp.read().strip() + fp.close() + return re.match('turbostat version [0-8.]* .*', out) + def turbostat(self): + cmd = self.getExec('turbostat') + if not cmd: + return 'missing turbostat executable' + outfile = '/tmp/pm-graph-turbostat.txt' + res = call('%s -o %s -q -S echo freeze > %s' % \ + (cmd, outfile, self.powerfile), shell=True) + if res != 0: + return 'turbosat returned %d' % res + if not os.path.exists(outfile): + return 'turbostat output missing' + fp = open(outfile, 'r') + text = [] + for line in fp: + if re.match('[0-9.]* sec', line): + continue + text.append(line.split()) + fp.close() + if len(text) < 2: + return 'turbostat output format error' + out = [] + for key in text[0]: + values = [] + idx = text[0].index(key) + for line in text[1:]: + if len(line) > idx: + values.append(line[idx]) + out.append('%s=%s' % (key, ','.join(values))) + return '|'.join(out) sysvals = SystemValues() switchvalues = ['enable', 'disable', 'on', 'off', 'true', 'false', '1', '0'] @@ -941,6 +1000,8 @@ class Data: self.outfile = '' self.kerror = False self.battery = 0 + self.turbostat = 0 + self.mcelog = 0 self.enterfail = '' self.currphase = '' self.pstl = dict() # process timeline @@ -975,8 +1036,38 @@ class Data: if len(plist) < 1: return '' return plist[-1] - def extractErrorInfo(self): - lf = sysvals.openlog(sysvals.dmesgfile, 'r') + def errorSummary(self, errinfo, msg): + found = False + for entry in errinfo: + if re.match(entry['match'], msg): + entry['count'] += 1 + if sysvals.hostname not in entry['urls']: + entry['urls'][sysvals.hostname] = sysvals.htmlfile + found = True + break + if found: + return + arr = msg.split() + for j in range(len(arr)): + if re.match('^[0-9\-\.]*$', arr[j]): + arr[j] = '[0-9\-\.]*' + else: + arr[j] = arr[j]\ + .replace(']', '\]').replace('[', '\[').replace('.', '\.')\ + .replace('+', '\+').replace('*', '\*').replace('(', '\(')\ + .replace(')', '\)') + mstr = ' '.join(arr) + entry = { + 'line': msg, + 'match': mstr, + 'count': 1, + 'urls': {sysvals.hostname: sysvals.htmlfile} + } + errinfo.append(entry) + def extractErrorInfo(self, issues=0): + lf = self.dmesgtext + if len(self.dmesgtext) < 1 and sysvals.dmesgfile: + lf = sysvals.openlog(sysvals.dmesgfile, 'r') i = 0 list = [] for line in lf: @@ -993,6 +1084,8 @@ class Data: if re.match(self.errlist[err], msg): list.append((err, dir, t, i, i)) self.kerror = True + if not isinstance(issues, int): + self.errorSummary(issues, msg) break for e in list: type, dir, t, idx1, idx2 = e @@ -1000,7 +1093,8 @@ class Data: self.errorinfo[dir].append((type, t, idx1, idx2)) if self.kerror: sysvals.dmesglog = True - lf.close() + if len(self.dmesgtext) < 1 and sysvals.dmesgfile: + lf.close() def setStart(self, time): self.start = time def setEnd(self, time): @@ -2358,6 +2452,8 @@ class TestProps: '(?P[0-9]{2})(?P[0-9]{2})(?P[0-9]{2})'+\ ' (?P.*) (?P.*) (?P.*)$' batteryfmt = '^# battery (?P\w*) (?P\d*) (?P\w*) (?P\d*)' + tstatfmt = '^# turbostat (?P\S*)' + mcelogfmt = '^# mcelog (?P\S*)' testerrfmt = '^# enter_sleep_error (?P.*)' sysinfofmt = '^# sysinfo .*' cmdlinefmt = '^# command \| (?P.*)' @@ -2380,6 +2476,8 @@ class TestProps: self.cmdline = '' self.kparams = '' self.testerror = [] + self.mcelog = [] + self.turbostat = [] self.battery = [] self.fwdata = [] self.ftrace_line_fmt = self.ftrace_line_fmt_nop @@ -2394,6 +2492,38 @@ class TestProps: self.ftrace_line_fmt = self.ftrace_line_fmt_nop else: doError('Invalid tracer format: [%s]' % tracer) + def decode(self, data): + try: + out = base64.b64decode(data).decode('zlib') + except: + out = data + return out + def stampInfo(self, line): + if re.match(self.stampfmt, line): + self.stamp = line + return True + elif re.match(self.sysinfofmt, line): + self.sysinfo = line + return True + elif re.match(self.cmdlinefmt, line): + self.cmdline = line + return True + elif re.match(self.mcelogfmt, line): + self.mcelog.append(line) + return True + elif re.match(self.tstatfmt, line): + self.turbostat.append(line) + return True + elif re.match(self.batteryfmt, line): + self.battery.append(line) + return True + elif re.match(self.testerrfmt, line): + self.testerror.append(line) + return True + elif re.match(self.firmwarefmt, line): + self.fwdata.append(line) + return True + return False def parseStamp(self, data, sv): # global test data m = re.match(self.stampfmt, self.stamp) @@ -2436,9 +2566,21 @@ class TestProps: sv.stamp = data.stamp # firmware data if sv.suspendmode == 'mem' and len(self.fwdata) > data.testnumber: - data.fwSuspend, data.fwResume = self.fwdata[data.testnumber] - if(data.fwSuspend > 0 or data.fwResume > 0): - data.fwValid = True + m = re.match(self.firmwarefmt, self.fwdata[data.testnumber]) + if m: + data.fwSuspend, data.fwResume = int(m.group('s')), int(m.group('r')) + if(data.fwSuspend > 0 or data.fwResume > 0): + data.fwValid = True + # mcelog data + if len(self.mcelog) > data.testnumber: + m = re.match(self.mcelogfmt, self.mcelog[data.testnumber]) + if m: + data.mcelog = self.decode(m.group('m')) + # turbostat data + if len(self.turbostat) > data.testnumber: + m = re.match(self.tstatfmt, self.turbostat[data.testnumber]) + if m: + data.turbostat = m.group('t') # battery data if len(self.battery) > data.testnumber: m = re.match(self.batteryfmt, self.battery[data.testnumber]) @@ -2564,21 +2706,7 @@ def appendIncompleteTraceLog(testruns): for line in tf: # remove any latent carriage returns line = line.replace('\r\n', '') - # grab the stamp and sysinfo - if re.match(tp.stampfmt, line): - tp.stamp = line - continue - elif re.match(tp.sysinfofmt, line): - tp.sysinfo = line - continue - elif re.match(tp.cmdlinefmt, line): - tp.cmdline = line - continue - elif re.match(tp.batteryfmt, line): - tp.battery.append(line) - continue - elif re.match(tp.testerrfmt, line): - tp.testerror.append(line) + if tp.stampInfo(line): continue # determine the trace data type (required for further parsing) m = re.match(tp.tracertypefmt, line) @@ -2701,26 +2829,7 @@ def parseTraceLog(live=False): for line in tf: # remove any latent carriage returns line = line.replace('\r\n', '') - # stamp and sysinfo lines - if re.match(tp.stampfmt, line): - tp.stamp = line - continue - elif re.match(tp.sysinfofmt, line): - tp.sysinfo = line - continue - elif re.match(tp.cmdlinefmt, line): - tp.cmdline = line - continue - elif re.match(tp.batteryfmt, line): - tp.battery.append(line) - continue - elif re.match(tp.testerrfmt, line): - tp.testerror.append(line) - continue - # firmware line: pull out any firmware data - m = re.match(tp.firmwarefmt, line) - if(m): - tp.fwdata.append((int(m.group('s')), int(m.group('r')))) + if tp.stampInfo(line): continue # tracer type line: determine the trace data type m = re.match(tp.tracertypefmt, line) @@ -3141,25 +3250,7 @@ def loadKernelLog(): idx = line.find('[') if idx > 1: line = line[idx:] - # grab the stamp and sysinfo - if re.match(tp.stampfmt, line): - tp.stamp = line - continue - elif re.match(tp.sysinfofmt, line): - tp.sysinfo = line - continue - elif re.match(tp.cmdlinefmt, line): - tp.cmdline = line - continue - elif re.match(tp.batteryfmt, line): - tp.battery.append(line) - continue - elif re.match(tp.testerrfmt, line): - tp.testerror.append(line) - continue - m = re.match(tp.firmwarefmt, line) - if(m): - tp.fwdata.append((int(m.group('s')), int(m.group('r')))) + if tp.stampInfo(line): continue m = re.match('[ \t]*(\[ *)(?P[0-9\.]*)(\]) (?P.*)', line) if(not m): @@ -3531,22 +3622,16 @@ def addCallgraphs(sv, hf, data): name+' → '+cg.name, color, dev['id']) hf.write('\n\n \n') -# Function: createHTMLSummarySimple -# Description: -# Create summary html file for a series of tests -# Arguments: -# testruns: array of Data objects from parseTraceLog -def createHTMLSummarySimple(testruns, htmlfile, title): - # write the html header first (html head, css code, up to body start) - html = '\n\n\n\ +def summaryCSS(title, center=True): + tdcenter = 'text-align:center;' if center else '' + out = '\n\n\n\ \n\ - SleepGraph Summary\n\ + '+title+'\n\ \n\n\n' + return out + +# Function: createHTMLSummarySimple +# Description: +# Create summary html file for a series of tests +# Arguments: +# testruns: array of Data objects from parseTraceLog +def createHTMLSummarySimple(testruns, htmlfile, title): + # write the html header first (html head, css code, up to body start) + html = summaryCSS('Summary - SleepGraph') # extract the test data into list list = dict() @@ -3579,17 +3674,20 @@ def createHTMLSummarySimple(testruns, htmlfile, title): tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [[], []] iMin, iMed, iMax = [0, 0], [0, 0], [0, 0] num = 0 + res = data['result'] tVal = [float(data['suspend']), float(data['resume'])] list[mode]['data'].append([data['host'], data['kernel'], - data['time'], tVal[0], tVal[1], data['url'], data['result'], + data['time'], tVal[0], tVal[1], data['url'], res, data['issues'], data['sus_worst'], data['sus_worsttime'], data['res_worst'], data['res_worsttime']]) idx = len(list[mode]['data']) - 1 - if data['result'] not in cnt: - cnt[data['result']] = 1 + if res.startswith('fail in'): + res = 'fail' + if res not in cnt: + cnt[res] = 1 else: - cnt[data['result']] += 1 - if data['result'] == 'pass': + cnt[res] += 1 + if res == 'pass': for i in range(2): tMed[i].append(tVal[i]) tAvg[i] += tVal[i] @@ -3623,7 +3721,7 @@ def createHTMLSummarySimple(testruns, htmlfile, title): tdlink = '\thtml\n' # table header - html += '\n\n' + th.format('#') +\ + html += '
\n\n' + th.format('#') +\ th.format('Mode') + th.format('Host') + th.format('Kernel') +\ th.format('Test Time') + th.format('Result') + th.format('Issues') +\ th.format('Suspend') + th.format('Resume') +\ @@ -3698,6 +3796,104 @@ def createHTMLSummarySimple(testruns, htmlfile, title): hf.write(html+'
\n\n\n') hf.close() +def createHTMLDeviceSummary(testruns, htmlfile, title): + html = summaryCSS('Device Summary - SleepGraph', False) + + # create global device list from all tests + devall = dict() + for data in testruns: + host, url, devlist = data['host'], data['url'], data['devlist'] + for type in devlist: + if type not in devall: + devall[type] = dict() + mdevlist, devlist = devall[type], data['devlist'][type] + for name in devlist: + length = devlist[name] + if name not in mdevlist: + mdevlist[name] = {'name': name, 'host': host, + 'worst': length, 'total': length, 'count': 1, + 'url': url} + else: + if length > mdevlist[name]['worst']: + mdevlist[name]['worst'] = length + mdevlist[name]['url'] = url + mdevlist[name]['host'] = host + mdevlist[name]['total'] += length + mdevlist[name]['count'] += 1 + + # generate the html + th = '\t{0}\n' + td = '\t{0}\n' + tdr = '\t{0}\n' + tdlink = '\thtml\n' + limit = 1 + for type in sorted(devall, reverse=True): + num = 0 + devlist = devall[type] + # table header + html += '
%s (%s devices > %d ms)
\n' % \ + (title, type.upper(), limit) + html += '\n' + '' +\ + th.format('Average Time') + th.format('Count') +\ + th.format('Worst Time') + th.format('Host (worst time)') +\ + th.format('Link (worst time)') + '\n' + for name in sorted(devlist, key=lambda k:devlist[k]['worst'], reverse=True): + data = devall[type][name] + data['average'] = data['total'] / data['count'] + if data['average'] < limit: + continue + # row classes - alternate row color + rcls = ['alt'] if num % 2 == 1 else [] + html += '\n' if len(rcls) > 0 else '\n' + html += tdr.format(data['name']) # name + html += td.format('%.3f ms' % data['average']) # average + html += td.format(data['count']) # count + html += td.format('%.3f ms' % data['worst']) # worst + html += td.format(data['host']) # host + html += tdlink.format(data['url']) # url + html += '\n' + num += 1 + html += '
Device Name
\n' + + # flush the data to file + hf = open(htmlfile, 'w') + hf.write(html+'\n\n') + hf.close() + return devall + +def createHTMLIssuesSummary(issues, htmlfile, title): + html = summaryCSS('Issues Summary - SleepGraph', False) + + # generate the html + th = '\t{0}\n' + td = '\t{1}\n' + tdlink = '{0}' + subtitle = '%d issues' % len(issues) if len(issues) > 0 else 'no issues' + html += '
%s (%s)
\n' % (title, subtitle) + html += '\n' + th.format('Count') + th.format('Issue') +\ + th.format('Hosts') + th.format('First Instance') + '\n' + + num = 0 + for e in sorted(issues, key=lambda v:v['count'], reverse=True): + links = [] + for host in sorted(e['urls']): + links.append(tdlink.format(host, e['urls'][host])) + # row classes - alternate row color + rcls = ['alt'] if num % 2 == 1 else [] + html += '\n' if len(rcls) > 0 else '\n' + html += td.format('center', e['count']) # count + html += td.format('left', e['line']) # issue + html += td.format('center', len(e['urls'])) # hosts + html += td.format('center nowrap', '
'.join(links)) # links + html += '\n' + num += 1 + + # flush the data to file + hf = open(htmlfile, 'w') + hf.write(html+'
\n\n\n') + hf.close() + return issues + def ordinal(value): suffix = 'th' if value < 10 or value > 19: @@ -4621,6 +4817,7 @@ def executeSuspend(): pprint('SUSPEND START') else: pprint('SUSPEND START (press a key to resume)') + sysvals.mcelog(True) bat1 = getBattery() if battery else False # set rtcwake if(sysvals.rtcwake): @@ -4652,13 +4849,21 @@ def executeSuspend(): pf = open(sysvals.diskpowerfile, 'w') pf.write(sysvals.diskmode) pf.close() - pf = open(sysvals.powerfile, 'w') - pf.write(mode) - # execution will pause here - try: - pf.close() - except Exception as e: - tdata['error'] = str(e) + if mode == 'freeze' and sysvals.haveTurbostat(): + # execution will pause here + turbo = sysvals.turbostat() + if '|' in turbo: + tdata['turbo'] = turbo + else: + tdata['error'] = turbo + else: + pf = open(sysvals.powerfile, 'w') + pf.write(mode) + # execution will pause here + try: + pf.close() + except Exception as e: + tdata['error'] = str(e) if(sysvals.rtcwake): sysvals.rtcWakeAlarmOff() # postdelay delay @@ -4672,6 +4877,9 @@ def executeSuspend(): sysvals.fsetVal('RESUME COMPLETE', 'trace_marker') if(sysvals.suspendmode == 'mem' or sysvals.suspendmode == 'command'): tdata['fw'] = getFPDT(False) + mcelog = sysvals.mcelog() + if mcelog: + tdata['mcelog'] = mcelog bat2 = getBattery() if battery else False if battery and bat1 and bat2: tdata['bat'] = (bat1, bat2) @@ -4694,6 +4902,7 @@ def executeSuspend(): op.close() sysvals.fsetVal('', 'trace') devProps() + return testdata def readFile(file): if os.path.islink(file): @@ -5398,6 +5607,12 @@ def processData(live=False): appendIncompleteTraceLog(testruns) sysvals.vprint('Command:\n %s' % sysvals.cmdline) for data in testruns: + if data.mcelog: + sysvals.vprint('MCELOG Data:') + for line in data.mcelog.split('\n'): + sysvals.vprint(' %s' % line) + if data.turbostat: + sysvals.vprint('Turbostat:\n %s' % data.turbostat.replace('|', ' ')) if data.battery: a1, c1, a2, c2 = data.battery s = 'Battery:\n Before - AC: %s, Charge: %d\n After - AC: %s, Charge: %d' % \ @@ -5431,7 +5646,10 @@ def rerunTest(): doesTraceLogHaveTraceEvents() if not sysvals.dmesgfile and not sysvals.usetraceevents: doError('recreating this html output requires a dmesg file') - sysvals.setOutputFile() + if sysvals.outdir: + sysvals.htmlfile = sysvals.outdir + else: + sysvals.setOutputFile() if os.path.exists(sysvals.htmlfile): if not os.path.isfile(sysvals.htmlfile): doError('a directory already exists with this name: %s' % sysvals.htmlfile) @@ -5450,14 +5668,18 @@ def runTest(n=0): sysvals.initTestOutput('suspend') # execute the test - executeSuspend() + testdata = executeSuspend() sysvals.cleanupFtrace() if sysvals.skiphtml: sysvals.sudoUserchown(sysvals.testdir) return - testruns, stamp = processData(True) - for data in testruns: - del data + if len(testdata) > 0 and not testdata[0]['error']: + testruns, stamp = processData(True) + for data in testruns: + del data + else: + stamp = testdata[0] + sysvals.sudoUserchown(sysvals.testdir) sysvals.outputResult(stamp, n) if 'error' in stamp: @@ -5487,8 +5709,13 @@ def find_in_html(html, start, end, firstonly=True): return '' return out -def data_from_html(file, outpath, devlist=False): - html = open(file, 'r').read() +def data_from_html(file, outpath, issues): + if '' not in file: + html = open(file, 'r').read() + sysvals.htmlfile = os.path.relpath(file, outpath) + else: + html = file + # extract general info suspend = find_in_html(html, 'Kernel Suspend', 'ms') resume = find_in_html(html, 'Kernel Resume', 'ms') line = find_in_html(html, '
', '
') @@ -5499,6 +5726,7 @@ def data_from_html(file, outpath, devlist=False): dt = datetime.strptime(' '.join(stmp[3:]), '%B %d %Y, %I:%M:%S %p') except: return False + sysvals.hostname = stmp[0] tstr = dt.strftime('%Y/%m/%d %H:%M:%S') error = find_in_html(html, '') if error: @@ -5509,13 +5737,39 @@ def data_from_html(file, outpath, devlist=False): result = 'fail' else: result = 'pass' + # extract error info ilist = [] - e = find_in_html(html, 'class="err"[\w=":;\.%\- ]*>', '→', False) - for i in list(set(e)): - ilist.append('%sx%d' % (i, e.count(i)) if e.count(i) > 1 else i) + log = find_in_html(html, '').strip() + if log: + d = Data(0) + d.end = 999999999 + d.dmesgtext = log.split('\n') + d.extractErrorInfo(issues) + elist = dict() + for dir in d.errorinfo: + for err in d.errorinfo[dir]: + if err[0] not in elist: + elist[err[0]] = 0 + elist[err[0]] += 1 + for i in elist: + ilist.append('%sx%d' % (i, elist[i]) if elist[i] > 1 else i) low = find_in_html(html, 'freeze time: ', ' ms') if low and '|' in low: - ilist.append('FREEZEx%d' % len(low.split('|'))) + issue = 'FREEZEx%d' % len(low.split('|')) + match = [i for i in issues if i['match'] == issue] + if len(match) > 0: + match[0]['count'] += 1 + if sysvals.hostname not in match[0]['urls']: + match[0]['urls'][sysvals.hostname] = sysvals.htmlfile + else: + issues.append({ + 'match': issue, 'count': 1, 'line': issue, + 'urls': {sysvals.hostname: sysvals.htmlfile}, + }) + ilist.append(issue) + + # extract device info devices = dict() for line in html.split('\n'): m = re.match(' *
.*)\" class=\"thread.*', line) @@ -5527,19 +5781,23 @@ def data_from_html(file, outpath, devlist=False): name, time, phase = m.group('n'), m.group('t'), m.group('p') if ' async' in name or ' sync' in name: name = ' '.join(name.split(' ')[:-1]) - d = phase.split('_')[0] + if phase.startswith('suspend'): + d = 'suspend' + elif phase.startswith('resume'): + d = 'resume' + else: + continue if d not in devices: devices[d] = dict() if name not in devices[d]: devices[d][name] = 0.0 devices[d][name] += float(time) - worst = {'suspend': {'name':'', 'time': 0.0}, - 'resume': {'name':'', 'time': 0.0}} - for d in devices: - if d not in worst: - worst[d] = dict() - dev = devices[d] - if len(dev.keys()) > 0: + # create worst device info + worst = dict() + for d in ['suspend', 'resume']: + worst[d] = {'name':'', 'time': 0.0} + dev = devices[d] if d in devices else 0 + if dev and len(dev.keys()) > 0: n = sorted(dev, key=dev.get, reverse=True)[0] worst[d]['name'], worst[d]['time'] = n, dev[n] data = { @@ -5551,14 +5809,13 @@ def data_from_html(file, outpath, devlist=False): 'issues': ' '.join(ilist), 'suspend': suspend, 'resume': resume, + 'devlist': devices, 'sus_worst': worst['suspend']['name'], 'sus_worsttime': worst['suspend']['time'], 'res_worst': worst['resume']['name'], 'res_worsttime': worst['resume']['time'], - 'url': os.path.relpath(file, outpath), + 'url': sysvals.htmlfile, } - if devlist: - data['devlist'] = devices return data # Function: runSummary @@ -5567,7 +5824,7 @@ def data_from_html(file, outpath, devlist=False): def runSummary(subdir, local=True, genhtml=False): inpath = os.path.abspath(subdir) outpath = os.path.abspath('.') if local else inpath - pprint('Generating a summary of folder "%s"' % inpath) + pprint('Generating a summary of folder:\n %s' % inpath) if genhtml: for dirname, dirnames, filenames in os.walk(subdir): sysvals.dmesgfile = sysvals.ftracefile = sysvals.htmlfile = '' @@ -5583,26 +5840,31 @@ def runSummary(subdir, local=True, genhtml=False): if sysvals.dmesgfile: pprint('DMESG : %s' % sysvals.dmesgfile) rerunTest() + issues = [] testruns = [] desc = {'host':[],'mode':[],'kernel':[]} for dirname, dirnames, filenames in os.walk(subdir): for filename in filenames: if(not re.match('.*.html', filename)): continue - data = data_from_html(os.path.join(dirname, filename), outpath) + data = data_from_html(os.path.join(dirname, filename), outpath, issues) if(not data): continue testruns.append(data) for key in desc: if data[key] not in desc[key]: desc[key].append(data[key]) - outfile = os.path.join(outpath, 'summary.html') - pprint('Summary file: %s' % outfile) + pprint('Summary files:') if len(desc['host']) == len(desc['mode']) == len(desc['kernel']) == 1: title = '%s %s %s' % (desc['host'][0], desc['kernel'][0], desc['mode'][0]) else: title = inpath - createHTMLSummarySimple(testruns, outfile, title) + createHTMLSummarySimple(testruns, os.path.join(outpath, 'summary.html'), title) + pprint(' summary.html - tabular list of test data found') + createHTMLDeviceSummary(testruns, os.path.join(outpath, 'summary-devices.html'), title) + pprint(' summary-devices.html - kernel device list sorted by total execution time') + createHTMLIssuesSummary(issues, os.path.join(outpath, 'summary-issues.html'), title) + pprint(' summary-issues.html - kernel issues found sorted by frequency') # Function: checkArgBool # Description: From 45dd0a42b90bed6cb8c63e7f88f62bf5662c2413 Mon Sep 17 00:00:00 2001 From: Todd Brandt Date: Tue, 14 May 2019 10:53:58 -0700 Subject: [PATCH 06/57] Update to pm-graph 5.4 bootgraph: - dmesg log format has changed, update parser in two places - fix prints in preparation for upgrade to python3 sleepgraph: - fix prints in preparation for upgrade to python3 - add new trace events and kprobes to cover freeze more completely - add new -ftop callgraph trace over suspend_devices_and_enter - add -wifi option to check if a wifi connection is active - add -skipkprobe option to suppress unwanted kprobes in dev mode - add kernel params and sysinfo to the log output - don't crash if /dev/mem is throwing IO errors, ignore FPDT and DMI - fix kprobe length calculation when calls are recursive - add several new kernel issue definitions for USB, ACPI, ATA, etc - enable turbostat output to be read from stdout instead of from file - add BIOS call data to the timeline from acpi_ps_execute_method kprobe Signed-off-by: Todd Brandt Signed-off-by: Rafael J. Wysocki --- tools/power/pm-graph/bootgraph.py | 8 +- tools/power/pm-graph/sleepgraph.py | 491 ++++++++++++++++++++--------- 2 files changed, 347 insertions(+), 152 deletions(-) diff --git a/tools/power/pm-graph/bootgraph.py b/tools/power/pm-graph/bootgraph.py index 6dae57041537..d7f4bd152bf1 100755 --- a/tools/power/pm-graph/bootgraph.py +++ b/tools/power/pm-graph/bootgraph.py @@ -333,9 +333,9 @@ def parseKernelLog(): if(not sysvals.stamp['kernel']): sysvals.stamp['kernel'] = sysvals.kernelVersion(msg) continue - m = re.match('.* setting system clock to (?P.*) UTC.*', msg) + m = re.match('.* setting system clock to (?P[0-9\-]*)[ A-Z](?P[0-9:]*) UTC.*', msg) if(m): - bt = datetime.strptime(m.group('t'), '%Y-%m-%d %H:%M:%S') + bt = datetime.strptime(m.group('d')+' '+m.group('t'), '%Y-%m-%d %H:%M:%S') bt = bt - timedelta(seconds=int(ktime)) data.boottime = bt.strftime('%Y-%m-%d_%H:%M:%S') sysvals.stamp['time'] = bt.strftime('%B %d %Y, %I:%M:%S %p') @@ -356,7 +356,7 @@ def parseKernelLog(): data.newAction(phase, f, pid, start, ktime, int(r), int(t)) del devtemp[f] continue - if(re.match('^Freeing unused kernel memory.*', msg)): + if(re.match('^Freeing unused kernel .*', msg)): data.tUserMode = ktime data.dmesg['kernel']['end'] = ktime data.dmesg['user']['start'] = ktime @@ -1016,7 +1016,7 @@ if __name__ == '__main__': updateKernelParams() elif cmd == 'flistall': for f in sysvals.getBootFtraceFilterFunctions(): - print f + print(f) elif cmd == 'checkbl': sysvals.getBootLoader() pprint('Boot Loader: %s\n%s' % (sysvals.bootloader, sysvals.blexec)) diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 41d28d63e7c9..ccd0f3917c51 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -17,9 +17,9 @@ # # Links: # Home Page -# https://01.org/suspendresume +# https://01.org/pm-graph # Source repo -# git@github.com:01org/pm-graph +# git@github.com:intel/pm-graph # # Description: # This tool is designed to assist kernel and OS developers in optimizing @@ -32,6 +32,7 @@ # viewed in firefox or chrome. # # The following kernel build options are required: +# CONFIG_DEVMEM=y # CONFIG_PM_DEBUG=y # CONFIG_PM_SLEEP_DEBUG=y # CONFIG_FTRACE=y @@ -75,7 +76,7 @@ def pprint(msg): # store system values and test parameters class SystemValues: title = 'SleepGraph' - version = '5.3' + version = '5.4' ansi = False rs = 0 display = '' @@ -83,8 +84,9 @@ class SystemValues: sync = False verbose = False testlog = True - dmesglog = False + dmesglog = True ftracelog = False + tstat = False mindevlen = 0.0 mincglen = 0.0 cgphase = '' @@ -108,6 +110,8 @@ class SystemValues: pmdpath = '/sys/power/pm_debug_messages' traceevents = [ 'suspend_resume', + 'wakeup_source_activate', + 'wakeup_source_deactivate', 'device_pm_callback_end', 'device_pm_callback_start' ] @@ -139,6 +143,8 @@ class SystemValues: x2delay = 0 skiphtml = False usecallgraph = False + ftopfunc = 'suspend_devices_and_enter' + ftop = False usetraceevents = False usetracemarkers = True usekprobes = True @@ -167,6 +173,13 @@ class SystemValues: 'acpi_hibernation_leave': {}, 'acpi_pm_freeze': {}, 'acpi_pm_thaw': {}, + 'acpi_s2idle_end': {}, + 'acpi_s2idle_sync': {}, + 'acpi_s2idle_begin': {}, + 'acpi_s2idle_prepare': {}, + 'acpi_s2idle_wake': {}, + 'acpi_s2idle_wakeup': {}, + 'acpi_s2idle_restore': {}, 'hibernate_preallocate_memory': {}, 'create_basic_memory_bitmaps': {}, 'swsusp_write': {}, @@ -203,7 +216,11 @@ class SystemValues: 'rt_mutex_slowlock': {'ub': 1}, # ACPI 'acpi_resume_power_resources': {}, - 'acpi_ps_parse_aml': {}, + 'acpi_ps_execute_method': { 'args_x86_64': { + 'fullpath':'+0(+40(%di)):string', + }}, + # mei_me + 'mei_reset': {}, # filesystem 'ext4_sync_fs': {}, # 80211 @@ -252,6 +269,7 @@ class SystemValues: timeformat = '%.3f' cmdline = '%s %s' % \ (os.path.basename(sys.argv[0]), ' '.join(sys.argv[1:])) + kparams = '' sudouser = '' def __init__(self): self.archargs = 'args_'+platform.machine() @@ -330,6 +348,7 @@ class SystemValues: args['date'] = n.strftime('%y%m%d') args['time'] = n.strftime('%H%M%S') args['hostname'] = args['host'] = self.hostname + args['mode'] = self.suspendmode return value.format(**args) def setOutputFile(self): if self.dmesgfile != '': @@ -341,7 +360,7 @@ class SystemValues: if(m): self.htmlfile = m.group('name')+'.html' def systemInfo(self, info): - p = c = m = b = '' + p = m = '' if 'baseboard-manufacturer' in info: m = info['baseboard-manufacturer'] elif 'system-manufacturer' in info: @@ -352,12 +371,17 @@ class SystemValues: p = info['baseboard-product-name'] if m[:5].lower() == 'intel' and 'baseboard-product-name' in info: p = info['baseboard-product-name'] - if 'processor-version' in info: - c = info['processor-version'] - if 'bios-version' in info: - b = info['bios-version'] - self.sysstamp = '# sysinfo | man:%s | plat:%s | cpu:%s | bios:%s | numcpu:%d | memsz:%d | memfr:%d' % \ - (m, p, c, b, self.cpucount, self.memtotal, self.memfree) + c = info['processor-version'] if 'processor-version' in info else '' + b = info['bios-version'] if 'bios-version' in info else '' + r = info['bios-release-date'] if 'bios-release-date' in info else '' + self.sysstamp = '# sysinfo | man:%s | plat:%s | cpu:%s | bios:%s | biosdate:%s | numcpu:%d | memsz:%d | memfr:%d' % \ + (m, p, c, b, r, self.cpucount, self.memtotal, self.memfree) + try: + kcmd = open('/proc/cmdline', 'r').read().strip() + except: + kcmd = '' + if kcmd: + self.sysstamp += '\n# kparams | %s' % kcmd def printSystemInfo(self, fatal=False): self.rootCheck(True) out = dmidecode(self.mempath, fatal) @@ -365,10 +389,10 @@ class SystemValues: return fmt = '%-24s: %s' for name in sorted(out): - print fmt % (name, out[name]) - print fmt % ('cpucount', ('%d' % self.cpucount)) - print fmt % ('memtotal', ('%d kB' % self.memtotal)) - print fmt % ('memfree', ('%d kB' % self.memfree)) + print(fmt % (name, out[name])) + print(fmt % ('cpucount', ('%d' % self.cpucount))) + print(fmt % ('memtotal', ('%d kB' % self.memtotal))) + print(fmt % ('memfree', ('%d kB' % self.memfree))) def cpuInfo(self): self.cpucount = 0 fp = open('/proc/cpuinfo', 'r') @@ -388,7 +412,7 @@ class SystemValues: def initTestOutput(self, name): self.prefix = self.hostname v = open('/proc/version', 'r').read().strip() - kver = string.split(v)[2] + kver = v.split()[2] fmt = name+'-%m%d%y-%H%M%S' testtime = datetime.now().strftime(fmt) self.teststamp = \ @@ -403,7 +427,7 @@ class SystemValues: self.htmlfile = \ self.testdir+'/'+self.prefix+'_'+self.suspendmode+'.html' if not os.path.isdir(self.testdir): - os.mkdir(self.testdir) + os.makedirs(self.testdir) def getValueList(self, value): out = [] for i in value.split(','): @@ -414,6 +438,12 @@ class SystemValues: self.devicefilter = self.getValueList(value) def setCallgraphFilter(self, value): self.cgfilter = self.getValueList(value) + def skipKprobes(self, value): + for k in self.getValueList(value): + if k in self.tracefuncs: + del self.tracefuncs[k] + if k in self.dev_tracefuncs: + del self.dev_tracefuncs[k] def setCallgraphBlacklist(self, file): self.cgblacklist = self.listFromFile(file) def rtcWakeAlarmOn(self): @@ -483,9 +513,9 @@ class SystemValues: if 'func' in self.tracefuncs[i]: i = self.tracefuncs[i]['func'] if i in master: - print i + print(i) else: - print self.colorText(i) + print(self.colorText(i)) def setFtraceFilterFunctions(self, list): master = self.listFromFile(self.tpath+'available_filter_functions') flist = '' @@ -728,7 +758,10 @@ class SystemValues: cf.append(self.tracefuncs[fn]['func']) else: cf.append(fn) - self.setFtraceFilterFunctions(cf) + if self.ftop: + self.setFtraceFilterFunctions([self.ftopfunc]) + else: + self.setFtraceFilterFunctions(cf) # initialize the kprobe trace elif self.usekprobes: for name in self.tracefuncs: @@ -788,6 +821,14 @@ class SystemValues: if 'bat' in test: (a1, c1), (a2, c2) = test['bat'] fp.write('# battery %s %d %s %d\n' % (a1, c1, a2, c2)) + if 'wifi' in test: + wstr = [] + for wifi in test['wifi']: + tmp = [] + for key in sorted(wifi): + tmp.append('%s:%s' % (key, wifi[key])) + wstr.append('|'.join(tmp)) + fp.write('# wifi %s\n' % (','.join(wstr))) if test['error'] or len(testdata) > 1: fp.write('# enter_sleep_error %s\n' % test['error']) return fp @@ -852,26 +893,22 @@ class SystemValues: return '' return base64.b64encode(out.encode('zlib')) def haveTurbostat(self): + if not self.tstat: + return False cmd = self.getExec('turbostat') if not cmd: return False fp = Popen([cmd, '-v'], stdout=PIPE, stderr=PIPE).stderr out = fp.read().strip() fp.close() - return re.match('turbostat version [0-8.]* .*', out) + return re.match('turbostat version [0-9\.]* .*', out) def turbostat(self): cmd = self.getExec('turbostat') if not cmd: return 'missing turbostat executable' - outfile = '/tmp/pm-graph-turbostat.txt' - res = call('%s -o %s -q -S echo freeze > %s' % \ - (cmd, outfile, self.powerfile), shell=True) - if res != 0: - return 'turbosat returned %d' % res - if not os.path.exists(outfile): - return 'turbostat output missing' - fp = open(outfile, 'r') text = [] + fullcmd = '%s -q -S echo freeze > %s' % (cmd, self.powerfile) + fp = Popen(['sh', '-c', fullcmd], stdout=PIPE, stderr=PIPE).stderr for line in fp: if re.match('[0-9.]* sec', line): continue @@ -888,6 +925,60 @@ class SystemValues: values.append(line[idx]) out.append('%s=%s' % (key, ','.join(values))) return '|'.join(out) + def checkWifi(self): + out = dict() + iwcmd, ifcmd = self.getExec('iwconfig'), self.getExec('ifconfig') + if not iwcmd or not ifcmd: + return out + fp = Popen(iwcmd, stdout=PIPE, stderr=PIPE).stdout + for line in fp: + m = re.match('(?P\S*) .* ESSID:(?P\S*)', line) + if not m: + continue + out['device'] = m.group('dev') + if '"' in m.group('ess'): + out['essid'] = m.group('ess').strip('"') + break + fp.close() + if 'device' in out: + fp = Popen([ifcmd, out['device']], stdout=PIPE, stderr=PIPE).stdout + for line in fp: + m = re.match('.* inet (?P[0-9\.]*)', line) + if m: + out['ip'] = m.group('ip') + break + fp.close() + return out + def errorSummary(self, errinfo, msg): + found = False + for entry in errinfo: + if re.match(entry['match'], msg): + entry['count'] += 1 + if self.hostname not in entry['urls']: + entry['urls'][self.hostname] = [self.htmlfile] + elif self.htmlfile not in entry['urls'][self.hostname]: + entry['urls'][self.hostname].append(self.htmlfile) + found = True + break + if found: + return + arr = msg.split() + for j in range(len(arr)): + if re.match('^[0-9,\-\.]*$', arr[j]): + arr[j] = '[0-9,\-\.]*' + else: + arr[j] = arr[j]\ + .replace('\\', '\\\\').replace(']', '\]').replace('[', '\[')\ + .replace('.', '\.').replace('+', '\+').replace('*', '\*')\ + .replace('(', '\(').replace(')', '\)') + mstr = ' '.join(arr) + entry = { + 'line': msg, + 'match': mstr, + 'count': 1, + 'urls': {self.hostname: [self.htmlfile]} + } + errinfo.append(entry) sysvals = SystemValues() switchvalues = ['enable', 'disable', 'on', 'off', 'true', 'false', '1', '0'] @@ -982,7 +1073,14 @@ class Data: 'ERROR' : '.*ERROR.*', 'WARNING' : '.*WARNING.*', 'IRQ' : '.*genirq: .*', - 'TASKFAIL': '.*Freezing of tasks failed.*', + 'TASKFAIL': '.*Freezing of tasks *.*', + 'ACPI' : '.*ACPI *(?P[A-Za-z]*) *Error[: ].*', + 'DEVFAIL' : '.* failed to (?P[a-z]*) async: .*', + 'DISKFULL': '.*No space left on device.*', + 'USBERR' : '.*usb .*device .*, error [0-9-]*', + 'ATAERR' : ' *ata[0-9\.]*: .*failed.*', + 'MEIERR' : ' *mei.*: .*failed.*', + 'TPMERR' : '(?i) *tpm *tpm[0-9]*: .*error.*', } def __init__(self, num): idchar = 'abcdefghij' @@ -1000,6 +1098,7 @@ class Data: self.outfile = '' self.kerror = False self.battery = 0 + self.wifi = 0 self.turbostat = 0 self.mcelog = 0 self.enterfail = '' @@ -1036,35 +1135,21 @@ class Data: if len(plist) < 1: return '' return plist[-1] - def errorSummary(self, errinfo, msg): - found = False - for entry in errinfo: - if re.match(entry['match'], msg): - entry['count'] += 1 - if sysvals.hostname not in entry['urls']: - entry['urls'][sysvals.hostname] = sysvals.htmlfile - found = True - break - if found: - return - arr = msg.split() - for j in range(len(arr)): - if re.match('^[0-9\-\.]*$', arr[j]): - arr[j] = '[0-9\-\.]*' - else: - arr[j] = arr[j]\ - .replace(']', '\]').replace('[', '\[').replace('.', '\.')\ - .replace('+', '\+').replace('*', '\*').replace('(', '\(')\ - .replace(')', '\)') - mstr = ' '.join(arr) - entry = { - 'line': msg, - 'match': mstr, - 'count': 1, - 'urls': {sysvals.hostname: sysvals.htmlfile} - } - errinfo.append(entry) - def extractErrorInfo(self, issues=0): + def turbostatInfo(self): + tp = TestProps() + out = {'syslpi':'N/A','pkgpc10':'N/A'} + for line in self.dmesgtext: + m = re.match(tp.tstatfmt, line) + if not m: + continue + for i in m.group('t').split('|'): + if 'SYS%LPI' in i: + out['syslpi'] = i.split('=')[-1]+'%' + elif 'pc10' in i: + out['pkgpc10'] = i.split('=')[-1]+'%' + break + return out + def extractErrorInfo(self): lf = self.dmesgtext if len(self.dmesgtext) < 1 and sysvals.dmesgfile: lf = sysvals.openlog(sysvals.dmesgfile, 'r') @@ -1082,19 +1167,19 @@ class Data: msg = m.group('msg') for err in self.errlist: if re.match(self.errlist[err], msg): - list.append((err, dir, t, i, i)) + list.append((msg, err, dir, t, i, i)) self.kerror = True - if not isinstance(issues, int): - self.errorSummary(issues, msg) break - for e in list: - type, dir, t, idx1, idx2 = e + msglist = [] + for msg, type, dir, t, idx1, idx2 in list: + msglist.append(msg) sysvals.vprint('kernel %s found in %s at %f' % (type, dir, t)) self.errorinfo[dir].append((type, t, idx1, idx2)) if self.kerror: sysvals.dmesglog = True if len(self.dmesgtext) < 1 and sysvals.dmesgfile: lf.close() + return msglist def setStart(self, time): self.start = time def setEnd(self, time): @@ -2147,7 +2232,7 @@ class FTraceCallGraph: if(data.dmesg[p]['start'] <= self.start and self.start <= data.dmesg[p]['end']): list = data.dmesg[p]['list'] - for devname in list: + for devname in sorted(list, key=lambda k:list[k]['start']): dev = list[devname] if(pid == dev['pid'] and self.start <= dev['start'] and @@ -2452,6 +2537,7 @@ class TestProps: '(?P[0-9]{2})(?P[0-9]{2})(?P[0-9]{2})'+\ ' (?P.*) (?P.*) (?P.*)$' batteryfmt = '^# battery (?P\w*) (?P\d*) (?P\w*) (?P\d*)' + wififmt = '^# wifi (?P.*)' tstatfmt = '^# turbostat (?P\S*)' mcelogfmt = '^# mcelog (?P\S*)' testerrfmt = '^# enter_sleep_error (?P.*)' @@ -2479,6 +2565,7 @@ class TestProps: self.mcelog = [] self.turbostat = [] self.battery = [] + self.wifi = [] self.fwdata = [] self.ftrace_line_fmt = self.ftrace_line_fmt_nop self.cgformat = False @@ -2505,6 +2592,9 @@ class TestProps: elif re.match(self.sysinfofmt, line): self.sysinfo = line return True + elif re.match(self.kparamsfmt, line): + self.kparams = line + return True elif re.match(self.cmdlinefmt, line): self.cmdline = line return True @@ -2517,6 +2607,9 @@ class TestProps: elif re.match(self.batteryfmt, line): self.battery.append(line) return True + elif re.match(self.wififmt, line): + self.wifi.append(line) + return True elif re.match(self.testerrfmt, line): self.testerror.append(line) return True @@ -2586,6 +2679,11 @@ class TestProps: m = re.match(self.batteryfmt, self.battery[data.testnumber]) if m: data.battery = m.groups() + # wifi data + if len(self.wifi) > data.testnumber: + m = re.match(self.wififmt, self.wifi[data.testnumber]) + if m: + data.wifi = m.group('w') # sleep mode enter errors if len(self.testerror) > data.testnumber: m = re.match(self.testerrfmt, self.testerror[data.testnumber]) @@ -2655,9 +2753,9 @@ class ProcessMonitor: # Quickly determine if the ftrace log has all of the trace events, # markers, and/or kprobes required for primary parsing. def doesTraceLogHaveTraceEvents(): - kpcheck = ['_cal: (', '_cpu_down()'] + kpcheck = ['_cal: (', '_ret: ('] techeck = ['suspend_resume', 'device_pm_callback'] - tmcheck = ['tracing_mark_write'] + tmcheck = ['SUSPEND START', 'RESUME COMPLETE'] sysvals.usekprobes = False fp = sysvals.openlog(sysvals.ftracefile, 'r') for line in fp: @@ -3042,7 +3140,7 @@ def parseTraceLog(live=False): tp.ktemp[key].append({ 'pid': pid, 'begin': t.time, - 'end': t.time, + 'end': -1, 'name': displayname, 'cdata': kprobedata, 'proc': m_proc, @@ -3053,12 +3151,11 @@ def parseTraceLog(live=False): elif(t.freturn): if(key not in tp.ktemp) or len(tp.ktemp[key]) < 1: continue - e = tp.ktemp[key][-1] - if e['begin'] < 0.0 or t.time - e['begin'] < 0.000001: - tp.ktemp[key].pop() - else: - e['end'] = t.time - e['rdata'] = kprobedata + e = next((x for x in reversed(tp.ktemp[key]) if x['end'] < 0), 0) + if not e: + continue + e['end'] = t.time + e['rdata'] = kprobedata # end of kernel resume if(phase != 'suspend_prepare' and kprobename in krescalls): if phase in data.dmesg: @@ -3080,8 +3177,10 @@ def parseTraceLog(live=False): if(res == -1): testrun.ftemp[key][-1].addLine(t) tf.close() + if len(testdata) < 1: + sysvals.vprint('WARNING: ftrace start marker is missing') if data and not data.devicegroups: - sysvals.vprint('WARNING: end marker is missing') + sysvals.vprint('WARNING: ftrace end marker is missing') data.handleEndMarker(t.time) if sysvals.suspendmode == 'command': @@ -3130,9 +3229,11 @@ def parseTraceLog(live=False): name, pid = key if name not in sysvals.tracefuncs: continue + if pid not in data.devpids: + data.devpids.append(pid) for e in tp.ktemp[key]: kb, ke = e['begin'], e['end'] - if kb == ke or tlb > kb or tle <= kb: + if ke - kb < 0.000001 or tlb > kb or tle <= kb: continue color = sysvals.kprobeColor(name) data.newActionGlobal(e['name'], kb, ke, pid, color) @@ -3144,7 +3245,7 @@ def parseTraceLog(live=False): continue for e in tp.ktemp[key]: kb, ke = e['begin'], e['end'] - if kb == ke or tlb > kb or tle <= kb: + if ke - kb < 0.000001 or tlb > kb or tle <= kb: continue data.addDeviceFunctionCall(e['name'], name, e['proc'], pid, kb, ke, e['cdata'], e['rdata']) @@ -3168,7 +3269,7 @@ def parseTraceLog(live=False): if not devname: sortkey = '%f%f%d' % (cg.start, cg.end, pid) sortlist[sortkey] = cg - elif len(cg.list) > 1000000: + elif len(cg.list) > 1000000 and cg.name != sysvals.ftopfunc: sysvals.vprint('WARNING: the callgraph for %s is massive (%d lines)' %\ (devname, len(cg.list))) # create blocks for orphan cg data @@ -3275,7 +3376,7 @@ def loadKernelLog(): if data: testruns.append(data) if len(testruns) < 1: - pprint('ERROR: dmesg log has no suspend/resume data: %s' \ + doError('dmesg log has no suspend/resume data: %s' \ % sysvals.dmesgfile) # fix lines with same timestamp/function with the call and return swapped @@ -3614,6 +3715,8 @@ def addCallgraphs(sv, hf, data): name += ' '+p if('ftrace' in dev): cg = dev['ftrace'] + if cg.name == sv.ftopfunc: + name = 'top level suspend/resume call' num = callgraphHTML(sv, hf, num, cg, name, color, dev['id']) if('ftraces' in dev): @@ -3653,9 +3756,10 @@ def createHTMLSummarySimple(testruns, htmlfile, title): # extract the test data into list list = dict() - tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [[], []] + tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [dict(), dict()] iMin, iMed, iMax = [0, 0], [0, 0], [0, 0] num = 0 + useturbo = False lastmode = '' cnt = dict() for data in sorted(testruns, key=lambda v:(v['mode'], v['host'], v['kernel'], v['time'])): @@ -3666,20 +3770,25 @@ def createHTMLSummarySimple(testruns, htmlfile, title): for i in range(2): s = sorted(tMed[i]) list[lastmode]['med'][i] = s[int(len(s)/2)] - iMed[i] = tMed[i].index(list[lastmode]['med'][i]) + iMed[i] = tMed[i][list[lastmode]['med'][i]] list[lastmode]['avg'] = [tAvg[0] / num, tAvg[1] / num] list[lastmode]['min'] = tMin list[lastmode]['max'] = tMax list[lastmode]['idx'] = (iMin, iMed, iMax) - tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [[], []] + tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [dict(), dict()] iMin, iMed, iMax = [0, 0], [0, 0], [0, 0] num = 0 + pkgpc10 = syslpi = '' + if 'pkgpc10' in data and 'syslpi' in data: + pkgpc10 = data['pkgpc10'] + syslpi = data['syslpi'] + useturbo = True res = data['result'] tVal = [float(data['suspend']), float(data['resume'])] list[mode]['data'].append([data['host'], data['kernel'], data['time'], tVal[0], tVal[1], data['url'], res, data['issues'], data['sus_worst'], data['sus_worsttime'], - data['res_worst'], data['res_worsttime']]) + data['res_worst'], data['res_worsttime'], pkgpc10, syslpi]) idx = len(list[mode]['data']) - 1 if res.startswith('fail in'): res = 'fail' @@ -3689,7 +3798,7 @@ def createHTMLSummarySimple(testruns, htmlfile, title): cnt[res] += 1 if res == 'pass': for i in range(2): - tMed[i].append(tVal[i]) + tMed[i][tVal[i]] = idx tAvg[i] += tVal[i] if tMin[i] == 0 or tVal[i] < tMin[i]: iMin[i] = idx @@ -3703,7 +3812,7 @@ def createHTMLSummarySimple(testruns, htmlfile, title): for i in range(2): s = sorted(tMed[i]) list[lastmode]['med'][i] = s[int(len(s)/2)] - iMed[i] = tMed[i].index(list[lastmode]['med'][i]) + iMed[i] = tMed[i][list[lastmode]['med'][i]] list[lastmode]['avg'] = [tAvg[0] / num, tAvg[1] / num] list[lastmode]['min'] = tMin list[lastmode]['max'] = tMax @@ -3719,6 +3828,7 @@ def createHTMLSummarySimple(testruns, htmlfile, title): td = '\t
\n' tdh = '\t{0}\n' tdlink = '\t\n' + colspan = '14' if useturbo else '12' # table header html += '
', '{0}html
\n\n' + th.format('#') +\ @@ -3726,12 +3836,13 @@ def createHTMLSummarySimple(testruns, htmlfile, title): th.format('Test Time') + th.format('Result') + th.format('Issues') +\ th.format('Suspend') + th.format('Resume') +\ th.format('Worst Suspend Device') + th.format('SD Time') +\ - th.format('Worst Resume Device') + th.format('RD Time') +\ - th.format('Detail') + '\n' - + th.format('Worst Resume Device') + th.format('RD Time') + if useturbo: + html += th.format('PkgPC10') + th.format('SysLPI') + html += th.format('Detail')+'\n' # export list into html head = ''+\ - ''+\ '\n' - headnone = '\n' + headnone = '\n' for mode in list: # header line for each suspend mode num = 0 @@ -3787,6 +3899,9 @@ def createHTMLSummarySimple(testruns, htmlfile, title): html += td.format('%.3f ms' % d[9]) if d[9] else td.format('') # sus_worst time html += td.format(d[10]) # res_worst html += td.format('%.3f ms' % d[11]) if d[11] else td.format('') # res_worst time + if useturbo: + html += td.format(d[12]) # pkg_pc10 + html += td.format(d[13]) # syslpi html += tdlink.format(d[5]) if d[5] else td.format('') # url html += '\n' num += 1 @@ -3861,8 +3976,10 @@ def createHTMLDeviceSummary(testruns, htmlfile, title): hf.close() return devall -def createHTMLIssuesSummary(issues, htmlfile, title): +def createHTMLIssuesSummary(testruns, issues, htmlfile, title, extra=''): + multihost = len([e for e in issues if len(e['urls']) > 1]) > 0 html = summaryCSS('Issues Summary - SleepGraph', False) + total = len(testruns) # generate the html th = '\t\n' @@ -3870,27 +3987,36 @@ def createHTMLIssuesSummary(issues, htmlfile, title): tdlink = '{0}' subtitle = '%d issues' % len(issues) if len(issues) > 0 else 'no issues' html += '
%s (%s)
{0}{1}Suspend Avg={2} '+\ + 'Suspend Avg={2} '+\ 'Min={3} '+\ 'Med={4} '+\ 'Max={5} '+\ @@ -3740,7 +3851,8 @@ def createHTMLSummarySimple(testruns, htmlfile, title): 'Med={8} '+\ 'Max={9}
{0}{1}
{0}{1}
{0}
\n' % (title, subtitle) - html += '\n' + th.format('Count') + th.format('Issue') +\ - th.format('Hosts') + th.format('First Instance') + '\n' + html += '\n' + th.format('Issue') + th.format('Count') + if multihost: + html += th.format('Hosts') + html += th.format('Tests') + th.format('Fail Rate') +\ + th.format('First Instance') + '\n' num = 0 for e in sorted(issues, key=lambda v:v['count'], reverse=True): + testtotal = 0 links = [] for host in sorted(e['urls']): - links.append(tdlink.format(host, e['urls'][host])) + links.append(tdlink.format(host, e['urls'][host][0])) + testtotal += len(e['urls'][host]) + rate = '%d/%d (%.2f%%)' % (testtotal, total, 100*float(testtotal)/float(total)) # row classes - alternate row color rcls = ['alt'] if num % 2 == 1 else [] html += '\n' if len(rcls) > 0 else '\n' - html += td.format('center', e['count']) # count html += td.format('left', e['line']) # issue - html += td.format('center', len(e['urls'])) # hosts + html += td.format('center', e['count']) # count + if multihost: + html += td.format('center', len(e['urls'])) # hosts + html += td.format('center', testtotal) # test count + html += td.format('center', rate) # test rate html += td.format('center nowrap', '
'.join(links)) # links html += '
\n' num += 1 # flush the data to file hf = open(htmlfile, 'w') - hf.write(html+'
\n\n\n') + hf.write(html+'\n'+extra+'\n\n') hf.close() return issues @@ -4195,7 +4321,7 @@ def createHTML(testruns, testfail): for word in phase.split('_'): id += word[0] order = '%.2f' % ((p['order'] * pdelta) + pmargin) - name = string.replace(phase, '_', '  ') + name = phase.replace('_', '  ') devtl.html += devtl.html_legend.format(order, p['color'], name, id) devtl.html += '\n' @@ -4784,6 +4910,7 @@ def setRuntimeSuspend(before=True): def executeSuspend(): pm = ProcessMonitor() tp = sysvals.tpath + wifi = sysvals.checkWifi() testdata = [] battery = True if getBattery() else False # run these commands to prepare the system for suspend @@ -4857,6 +4984,8 @@ def executeSuspend(): else: tdata['error'] = turbo else: + if sysvals.haveTurbostat(): + sysvals.vprint('WARNING: ignoring turbostat in mode "%s"' % mode) pf = open(sysvals.powerfile, 'w') pf.write(mode) # execution will pause here @@ -4883,6 +5012,8 @@ def executeSuspend(): bat2 = getBattery() if battery else False if battery and bat1 and bat2: tdata['bat'] = (bat1, bat2) + if 'device' in wifi and 'ip' in wifi: + tdata['wifi'] = (wifi, sysvals.checkWifi()) testdata.append(tdata) # stop ftrace if(sysvals.usecallgraph or sysvals.usetraceevents): @@ -4989,7 +5120,7 @@ def deviceInfo(output=''): ms2nice(power['runtime_active_time']), \ ms2nice(power['runtime_suspended_time'])) for i in sorted(lines): - print lines[i] + print(lines[i]) return res # Function: devProps @@ -5122,12 +5253,12 @@ def getModes(): modes = [] if(os.path.exists(sysvals.powerfile)): fp = open(sysvals.powerfile, 'r') - modes = string.split(fp.read()) + modes = fp.read().split() fp.close() if(os.path.exists(sysvals.mempowerfile)): deep = False fp = open(sysvals.mempowerfile, 'r') - for m in string.split(fp.read()): + for m in fp.read().split(): memmode = m.strip('[]') if memmode == 'deep': deep = True @@ -5138,7 +5269,7 @@ def getModes(): modes.remove('mem') if('disk' in modes and os.path.exists(sysvals.diskpowerfile)): fp = open(sysvals.diskpowerfile, 'r') - for m in string.split(fp.read()): + for m in fp.read().split(): modes.append('disk-%s' % m.strip('[]')) fp.close() return modes @@ -5201,14 +5332,15 @@ def dmidecode(mempath, fatal=False): continue # read in the memory for scanning - fp = open(mempath, 'rb') try: + fp = open(mempath, 'rb') fp.seek(memaddr) buf = fp.read(memsize) except: if(fatal): doError('DMI table is unreachable, sorry') else: + pprint('WARNING: /dev/mem is not readable, ignoring DMI data') return out fp.close() @@ -5231,14 +5363,15 @@ def dmidecode(mempath, fatal=False): return out # read in the SM or DMI table - fp = open(mempath, 'rb') try: + fp = open(mempath, 'rb') fp.seek(base) buf = fp.read(length) except: if(fatal): doError('DMI table is unreachable, sorry') else: + pprint('WARNING: /dev/mem is not readable, ignoring DMI data') return out fp.close() @@ -5382,7 +5515,11 @@ def getFPDT(output): i = 0 fwData = [0, 0] records = buf[36:] - fp = open(sysvals.mempath, 'rb') + try: + fp = open(sysvals.mempath, 'rb') + except: + pprint('WARNING: /dev/mem is not readable, ignoring the FPDT data') + return False while(i < len(records)): header = struct.unpack('HBB', records[i:i+4]) if(header[0] not in rectype): @@ -5499,13 +5636,14 @@ def statusCheck(probecheck=False): pprint(' is ftrace supported: %s' % res) # check if kprobes are available - res = sysvals.colorText('NO') - sysvals.usekprobes = sysvals.verifyKprobes() - if(sysvals.usekprobes): - res = 'YES' - else: - sysvals.usedevsrc = False - pprint(' are kprobes supported: %s' % res) + if sysvals.usekprobes: + res = sysvals.colorText('NO') + sysvals.usekprobes = sysvals.verifyKprobes() + if(sysvals.usekprobes): + res = 'YES' + else: + sysvals.usedevsrc = False + pprint(' are kprobes supported: %s' % res) # what data source are we using res = 'DMESG' @@ -5593,6 +5731,8 @@ def getArgFloat(name, args, min, max, main=True): def processData(live=False): pprint('PROCESSING DATA') + sysvals.vprint('usetraceevents=%s, usetracemarkers=%s, usekprobes=%s' % \ + (sysvals.usetraceevents, sysvals.usetracemarkers, sysvals.usekprobes)) error = '' if(sysvals.usetraceevents): testruns, error = parseTraceLog(live) @@ -5605,6 +5745,11 @@ def processData(live=False): parseKernelLog(data) if(sysvals.ftracefile and (sysvals.usecallgraph or sysvals.usetraceevents)): appendIncompleteTraceLog(testruns) + sysvals.vprint('System Info:') + for key in sorted(sysvals.stamp): + sysvals.vprint(' %-8s : %s' % (key.upper(), sysvals.stamp[key])) + if sysvals.kparams: + sysvals.vprint('Kparams:\n %s' % sysvals.kparams) sysvals.vprint('Command:\n %s' % sysvals.cmdline) for data in testruns: if data.mcelog: @@ -5612,12 +5757,24 @@ def processData(live=False): for line in data.mcelog.split('\n'): sysvals.vprint(' %s' % line) if data.turbostat: - sysvals.vprint('Turbostat:\n %s' % data.turbostat.replace('|', ' ')) + idx, s = 0, 'Turbostat:\n ' + for val in data.turbostat.split('|'): + idx += len(val) + 1 + if idx >= 80: + idx = 0 + s += '\n ' + s += val + ' ' + sysvals.vprint(s) if data.battery: a1, c1, a2, c2 = data.battery s = 'Battery:\n Before - AC: %s, Charge: %d\n After - AC: %s, Charge: %d' % \ (a1, int(c1), a2, int(c2)) sysvals.vprint(s) + if data.wifi: + w = data.wifi.replace('|', ' ').split(',') + s = 'Wifi:\n Before %s\n After %s' % \ + (w[0], w[1]) + sysvals.vprint(s) data.printDetails() if sysvals.cgdump: for data in testruns: @@ -5641,13 +5798,13 @@ def processData(live=False): # Function: rerunTest # Description: # generate an output from an existing set of ftrace/dmesg logs -def rerunTest(): +def rerunTest(htmlfile=''): if sysvals.ftracefile: doesTraceLogHaveTraceEvents() if not sysvals.dmesgfile and not sysvals.usetraceevents: doError('recreating this html output requires a dmesg file') - if sysvals.outdir: - sysvals.htmlfile = sysvals.outdir + if htmlfile: + sysvals.htmlfile = htmlfile else: sysvals.setOutputFile() if os.path.exists(sysvals.htmlfile): @@ -5673,7 +5830,7 @@ def runTest(n=0): if sysvals.skiphtml: sysvals.sudoUserchown(sysvals.testdir) return - if len(testdata) > 0 and not testdata[0]['error']: + if not testdata[0]['error']: testruns, stamp = processData(True) for data in testruns: del data @@ -5709,15 +5866,13 @@ def find_in_html(html, start, end, firstonly=True): return '' return out -def data_from_html(file, outpath, issues): - if '' not in file: - html = open(file, 'r').read() - sysvals.htmlfile = os.path.relpath(file, outpath) - else: - html = file +def data_from_html(file, outpath, issues, fulldetail=False): + html = open(file, 'r').read() + sysvals.htmlfile = os.path.relpath(file, outpath) # extract general info suspend = find_in_html(html, 'Kernel Suspend', 'ms') resume = find_in_html(html, 'Kernel Resume', 'ms') + sysinfo = find_in_html(html, '
', '
') line = find_in_html(html, '
', '
') stmp = line.split() if not suspend or not resume or len(stmp) != 8: @@ -5739,13 +5894,18 @@ def data_from_html(file, outpath, issues): result = 'pass' # extract error info ilist = [] + extra = dict() log = find_in_html(html, '').strip() if log: d = Data(0) d.end = 999999999 d.dmesgtext = log.split('\n') - d.extractErrorInfo(issues) + msglist = d.extractErrorInfo() + for msg in msglist: + sysvals.errorSummary(issues, msg) + if stmp[2] == 'freeze': + extra = d.turbostatInfo() elist = dict() for dir in d.errorinfo: for err in d.errorinfo[dir]: @@ -5761,14 +5921,15 @@ def data_from_html(file, outpath, issues): if len(match) > 0: match[0]['count'] += 1 if sysvals.hostname not in match[0]['urls']: - match[0]['urls'][sysvals.hostname] = sysvals.htmlfile + match[0]['urls'][sysvals.hostname] = [sysvals.htmlfile] + elif sysvals.htmlfile not in match[0]['urls'][sysvals.hostname]: + match[0]['urls'][sysvals.hostname].append(sysvals.htmlfile) else: issues.append({ 'match': issue, 'count': 1, 'line': issue, - 'urls': {sysvals.hostname: sysvals.htmlfile}, + 'urls': {sysvals.hostname: [sysvals.htmlfile]}, }) ilist.append(issue) - # extract device info devices = dict() for line in html.split('\n'): @@ -5804,6 +5965,7 @@ def data_from_html(file, outpath, issues): 'mode': stmp[2], 'host': stmp[0], 'kernel': stmp[1], + 'sysinfo': sysinfo, 'time': tstr, 'result': result, 'issues': ' '.join(ilist), @@ -5816,8 +5978,28 @@ def data_from_html(file, outpath, issues): 'res_worsttime': worst['resume']['time'], 'url': sysvals.htmlfile, } + for key in extra: + data[key] = extra[key] + if fulldetail: + data['funclist'] = find_in_html(html, '
Test xset by toggling the given mode (on/off/standby/suspend)\n'\ ' -sysinfo Print out system info extracted from BIOS\n'\ ' -devinfo Print out the pm settings of all devices which support runtime suspend\n'\ @@ -6158,7 +6330,7 @@ def printHelp(): ' [redo]\n'\ ' -ftrace ftracefile Create HTML output using ftrace input (used with -dmesg)\n'\ ' -dmesg dmesgfile Create HTML output using dmesg (used with -ftrace)\n'\ - '' % (sysvals.title, sysvals.version, sysvals.suspendmode)) + '' % (sysvals.title, sysvals.version, sysvals.suspendmode, sysvals.ftopfunc)) return True # ----------------- MAIN -------------------- @@ -6168,7 +6340,7 @@ if __name__ == '__main__': cmd = '' simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall', '-devinfo', '-status', '-battery', '-xon', '-xoff', '-xstandby', - '-xsuspend', '-xinit', '-xreset', '-xstat'] + '-xsuspend', '-xinit', '-xreset', '-xstat', '-wifi'] if '-f' in sys.argv: sysvals.cgskip = sysvals.configFile('cgskip.txt') # loop through the command line arguments @@ -6200,6 +6372,10 @@ if __name__ == '__main__': sysvals.postdelay = getArgInt('-postdelay', args, 0, 60000) elif(arg == '-f'): sysvals.usecallgraph = True + elif(arg == '-ftop'): + sysvals.usecallgraph = True + sysvals.ftop = True + sysvals.usekprobes = False elif(arg == '-skiphtml'): sysvals.skiphtml = True elif(arg == '-cgdump'): @@ -6210,10 +6386,16 @@ if __name__ == '__main__': genhtml = True elif(arg == '-addlogs'): sysvals.dmesglog = sysvals.ftracelog = True + elif(arg == '-nologs'): + sysvals.dmesglog = sysvals.ftracelog = False elif(arg == '-addlogdmesg'): sysvals.dmesglog = True elif(arg == '-addlogftrace'): sysvals.ftracelog = True + elif(arg == '-turbostat'): + sysvals.tstat = True + if not sysvals.haveTurbostat(): + doError('Turbostat command not found') elif(arg == '-verbose'): sysvals.verbose = True elif(arg == '-proc'): @@ -6283,6 +6465,12 @@ if __name__ == '__main__': except: doError('No callgraph functions supplied', True) sysvals.setCallgraphFilter(val) + elif(arg == '-skipkprobe'): + try: + val = args.next() + except: + doError('No kprobe functions supplied', True) + sysvals.skipKprobes(val) elif(arg == '-cgskip'): try: val = args.next() @@ -6421,7 +6609,7 @@ if __name__ == '__main__': elif(cmd == 'devinfo'): deviceInfo() elif(cmd == 'modes'): - print getModes() + pprint(getModes()) elif(cmd == 'flist'): sysvals.getFtraceFilterFunctions(True) elif(cmd == 'flistall'): @@ -6433,11 +6621,18 @@ if __name__ == '__main__': ret = displayControl(cmd[1:]) elif(cmd == 'xstat'): pprint('Display Status: %s' % displayControl('stat').upper()) + elif(cmd == 'wifi'): + out = sysvals.checkWifi() + if 'device' not in out: + pprint('WIFI interface not found') + else: + for key in sorted(out): + pprint('%6s: %s' % (key.upper(), out[key])) sys.exit(ret) # if instructed, re-analyze existing data files if(sysvals.notestrun): - stamp = rerunTest() + stamp = rerunTest(sysvals.outdir) sysvals.outputResult(stamp) sys.exit(0) @@ -6474,7 +6669,7 @@ if __name__ == '__main__': s = 'suspend-x%d' % sysvals.multitest['count'] sysvals.outdir = datetime.now().strftime(s+'-%y%m%d-%H%M%S') if not os.path.isdir(sysvals.outdir): - os.mkdir(sysvals.outdir) + os.makedirs(sysvals.outdir) for i in range(sysvals.multitest['count']): if(i != 0): pprint('Waiting %d seconds...' % (sysvals.multitest['delay'])) From d5a5e4ec5b415e63dee9dd76653377455572ac09 Mon Sep 17 00:00:00 2001 From: Todd Brandt Date: Tue, 14 May 2019 10:53:59 -0700 Subject: [PATCH 07/57] Add README and update pm-graph and sleepgraph docs Config/man page/README files: - include README in the pm-graph folder - add more detail to the example config to describe more options - update the sleepgraph man page to document the new arguments Signed-off-by: Todd Brandt [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki --- tools/power/pm-graph/README | 552 ++++++++++++++++++++++++ tools/power/pm-graph/config/example.cfg | 26 ++ tools/power/pm-graph/sleepgraph.8 | 16 +- 3 files changed, 592 insertions(+), 2 deletions(-) create mode 100644 tools/power/pm-graph/README diff --git a/tools/power/pm-graph/README b/tools/power/pm-graph/README new file mode 100644 index 000000000000..58a5591e3951 --- /dev/null +++ b/tools/power/pm-graph/README @@ -0,0 +1,552 @@ + p m - g r a p h + + pm-graph: suspend/resume/boot timing analysis tools + Version: 5.4 + Author: Todd Brandt + Home Page: https://01.org/pm-graph + + Report bugs/issues at bugzilla.kernel.org Tools/pm-graph + - https://bugzilla.kernel.org/buglist.cgi?component=pm-graph&product=Tools + + Full documentation available online & in man pages + - Getting Started: + https://01.org/pm-graph/documentation/getting-started + + - Config File Format: + https://01.org/pm-graph/documentation/3-config-file-format + + - upstream version in git: + https://github.com/intel/pm-graph/ + + Table of Contents + - Overview + - Setup + - Usage + - Basic Usage + - Dev Mode Usage + - Proc Mode Usage + - Configuration Files + - Usage Examples + - Config File Options + - Custom Timeline Entries + - Adding/Editing Timeline Functions + - Adding/Editing Dev Timeline Source Functions + - Verifying your Custom Functions + - Testing on consumer linux Operating Systems + - Android + +------------------------------------------------------------------ +| OVERVIEW | +------------------------------------------------------------------ + + This tool suite is designed to assist kernel and OS developers in optimizing + their linux stack's suspend/resume & boot time. Using a kernel image built + with a few extra options enabled, the tools will execute a suspend or boot, + and will capture dmesg and ftrace data. This data is transformed into a set of + timelines and a callgraph to give a quick and detailed view of which devices + and kernel processes are taking the most time in suspend/resume & boot. + +------------------------------------------------------------------ +| SETUP | +------------------------------------------------------------------ + + These packages are required to execute the scripts + - python + - python-requests + + Ubuntu: + sudo apt-get install python python-requests + + Fedora: + sudo dnf install python python-requests + + The tools can most easily be installed via git clone and make install + + $> git clone http://github.com/intel/pm-graph.git + $> cd pm-graph + $> sudo make install + $> man sleepgraph ; man bootgraph + + Setup involves some minor kernel configuration + + The following kernel build options are required for all kernels: + CONFIG_DEVMEM=y + CONFIG_PM_DEBUG=y + CONFIG_PM_SLEEP_DEBUG=y + CONFIG_FTRACE=y + CONFIG_FUNCTION_TRACER=y + CONFIG_FUNCTION_GRAPH_TRACER=y + CONFIG_KPROBES=y + CONFIG_KPROBES_ON_FTRACE=y + + In kernel 3.15.0, two patches were upstreamed which enable the + v3.0 behavior. These patches allow the tool to read all the + data from trace events instead of from dmesg. You can enable + this behavior on earlier kernels with these patches: + + (kernel/pre-3.15/enable_trace_events_suspend_resume.patch) + (kernel/pre-3.15/enable_trace_events_device_pm_callback.patch) + + If you're using a kernel older than 3.15.0, the following + additional kernel parameters are required: + (e.g. in file /etc/default/grub) + GRUB_CMDLINE_LINUX_DEFAULT="... initcall_debug log_buf_len=32M ..." + + If you're using a kernel older than 3.11-rc2, the following simple + patch must be applied to enable ftrace data: + in file: kernel/power/suspend.c + in function: int suspend_devices_and_enter(suspend_state_t state) + remove call to "ftrace_stop();" + remove call to "ftrace_start();" + + There is a patch which does this for kernel v3.8.0: + (kernel/pre-3.11-rc2/enable_ftrace_in_suspendresume.patch) + + + +------------------------------------------------------------------ +| USAGE | +------------------------------------------------------------------ + +Basic Usage +___________ + + 1) First configure a kernel using the instructions from the previous sections. + Then build, install, and boot with it. + 2) Open up a terminal window and execute the mode list command: + + %> sudo ./sleepgraph.py -modes + ['freeze', 'mem', 'disk'] + + Execute a test using one of the available power modes, e.g. mem (S3): + + %> sudo ./sleepgraph.py -m mem -rtcwake 15 + + or with a config file + + %> sudo ./sleepgraph.py -config config/suspend.cfg + + When the system comes back you'll see the script finishing up and + creating the output files in the test subdir. It generates output + files in subdirectory: suspend-mmddyy-HHMMSS. The ftrace file can + be used to regenerate the html timeline with different options + + HTML output: _.html + raw dmesg output: __dmesg.txt + raw ftrace output: __ftrace.txt + + View the html in firefox or chrome. + + +Dev Mode Usage +______________ + + Developer mode adds information on low level source calls to the timeline. + The tool sets kprobes on all delay and mutex calls to see which devices + are waiting for something and when. It also sets a suite of kprobes on + subsystem dependent calls to better fill out the timeline. + + The tool will also expose kernel threads that don't normally show up in the + timeline. This is useful in discovering dependent threads to get a better + idea of what each device is waiting for. For instance, the scsi_eh thread, + a.k.a. scsi resume error handler, is what each SATA disk device waits for + before it can continue resume. + + The timeline will be much larger if run with dev mode, so it can be useful + to set the -mindev option to clip out any device blocks that are too small + to see easily. The following command will give a nice dev mode run: + + %> sudo ./sleepgraph.py -m mem -rtcwake 15 -mindev 1 -dev + + or with a config file + + %> sudo ./sleepgraph.py -config config/suspend-dev.cfg + + +Proc Mode Usage +_______________ + + Proc mode adds user process info to the timeline. This is done in a manner + similar to the bootchart utility, which graphs init processes and their + execution as the system boots. This tool option does the same thing but for + the period before and after suspend/resume. + + In order to see any process info, there needs to be some delay before or + after resume since processes are frozen in suspend_prepare and thawed in + resume_complete. The predelay and postdelay args allow you to do this. It + can also be useful to run in x2 mode with an x2 delay, this way you can + see process activity before and after resume, and in between two + successive suspend/resumes. + + The command can be run like this: + + %> sudo ./sleepgraph.py -m mem -rtcwake 15 -x2 -x2delay 1000 -predelay 1000 -postdelay 1000 -proc + + or with a config file + + %> sudo ./sleepgraph.py -config config/suspend-proc.cfg + + +------------------------------------------------------------------ +| CONFIGURATION FILES | +------------------------------------------------------------------ + + Since 4.0 we've moved to using config files in lieu of command line options. + The config folder contains a collection of typical use cases. + There are corresponding configs for other power modes: + + Simple suspend/resume with basic timeline (mem/freeze/standby) + config/suspend.cfg + config/freeze.cfg + config/standby.cfg + + Dev mode suspend/resume with dev timeline (mem/freeze/standby) + config/suspend-dev.cfg + config/freeze-dev.cfg + config/standby-dev.cfg + + Simple suspend/resume with timeline and callgraph (mem/freeze/standby) + config/suspend-callgraph.cfg + config/freeze-callgraph.cfg + config/standby-callgraph.cfg + + Sample proc mode x2 run using mem suspend + config/suspend-x2-proc.cfg + + Sample for editing timeline funcs (moves internal functions into config) + config/custom-timeline-functions.cfg + + Sample debug config for serio subsystem + config/debug-serio-suspend.cfg + + +Usage Examples +______________ + + Run a simple mem suspend: + %> sudo ./sleepgraph.py -config config/suspend.cfg + + Run a mem suspend with callgraph data: + %> sudo ./sleepgraph.py -config config/suspend-callgraph.cfg + + Run a mem suspend with dev mode detail: + %> sudo ./sleepgraph.py -config config/suspend-dev.cfg + + +Config File Options +___________________ + + [Settings] + + # Verbosity: print verbose messages (def: false) + verbose: false + + # Suspend Mode: e.g. standby, mem, freeze, disk (def: mem) + mode: mem + + # Output Directory Format: {hostname}, {date}, {time} give current values + output-dir: suspend-{hostname}-{date}-{time} + + # Automatic Wakeup: use rtcwake to wakeup after X seconds (def: infinity) + rtcwake: 15 + + # Add Logs: add the dmesg and ftrace log to the html output (def: false) + addlogs: false + + # Sus/Res Gap: insert a gap between sus & res in the timeline (def: false) + srgap: false + + # Custom Command: Command to execute in lieu of suspend (def: "") + command: echo mem > /sys/power/state + + # Proc mode: graph user processes and cpu usage in the timeline (def: false) + proc: false + + # Dev mode: graph source functions in the timeline (def: false) + dev: false + + # Suspend/Resume x2: run 2 suspend/resumes back to back (def: false) + x2: false + + # x2 Suspend Delay: time delay between the two test runs in ms (def: 0 ms) + x2delay: 0 + + # Pre Suspend Delay: nclude an N ms delay before (1st) suspend (def: 0 ms) + predelay: 0 + + # Post Resume Delay: include an N ms delay after (last) resume (def: 0 ms) + postdelay: 0 + + # Min Device Length: graph only dev callbacks longer than min (def: 0.001 ms) + mindev: 0.001 + + # Callgraph: gather ftrace callgraph data on all timeline events (def: false) + callgraph: false + + # Expand Callgraph: pre-expand the callgraph treeviews in html (def: false) + expandcg: false + + # Min Callgraph Length: show callgraphs only if longer than min (def: 1 ms) + mincg: 1 + + # Timestamp Precision: number of sig digits in timestamps (0:S, [3:ms], 6:us) + timeprec: 3 + + # Device Filter: show only devs whose name/driver includes one of these strings + devicefilter: _cpu_up,_cpu_down,i915,usb + + # Override default timeline entries: + # Do not use the internal default functions for timeline entries (def: false) + # Set this to true if you intend to only use the ones defined in the config + override-timeline-functions: true + + # Override default dev timeline entries: + # Do not use the internal default functions for dev timeline entries (def: false) + # Set this to true if you intend to only use the ones defined in the config + override-dev-timeline-functions: true + + # Call Loop Max Gap (dev mode only) + # merge loops of the same call if each is less than maxgap apart (def: 100us) + callloop-maxgap: 0.0001 + + # Call Loop Max Length (dev mode only) + # merge loops of the same call if each is less than maxlen in length (def: 5ms) + callloop-maxlen: 0.005 + +------------------------------------------------------------------ +| CUSTOM TIMELINE ENTRIES | +------------------------------------------------------------------ + +Adding or Editing Timeline Functions +____________________________________ + + The tool uses an array of function names to fill out empty spaces in the + timeline where device callbacks don't appear. For instance, in suspend_prepare + the tool adds the sys_sync and freeze_processes calls as virtual device blocks + in the timeline to show you where the time is going. These calls should fill + the timeline with contiguous data so that most kernel execution is covered. + + It is possible to add new function calls to the timeline by adding them to + the config. It's also possible to copy the internal timeline functions into + the config so that you can override and edit them. Place them in the + timeline_functions_ARCH section with the name of your architecture appended. + i.e. for x86_64: [timeline_functions_x86_64] + + Use the override-timeline-functions option if you only want to use your + custom calls, or leave it false to append them to the internal ones. + + This section includes a list of functions (set using kprobes) which use both + symbol data and function arg data. The args are pulled directly from the + stack using this architecture's registers and stack formatting. Each entry + can include up to four pieces of info: The function name, a format string, + an argument list, and a color. But only a function name is required. + + For a full example config, see config/custom-timeline-functions.cfg. It pulls + all the internal timeline functions into the config and allows you to edit + them. + + Entry format: + + function: format{fn_arg1}_{fn_arg2} fn_arg1 fn_arg2 ... [color=purple] + + Required Arguments: + + function: The symbol name for the function you want probed, this is the + minimum required for an entry, it will show up as the function + name with no arguments. + + example: _cpu_up: + + Optional Arguments: + + format: The format to display the data on the timeline in. Use braces to + enclose the arg names. + + example: CPU_ON[{cpu}] + + color: The color of the entry block in the timeline. The default color is + transparent, so the entry shares the phase color. The color is an + html color string, either a word, or an RGB. + + example: [color=#CC00CC] + + arglist: A list of arguments from registers/stack addresses. See URL: + https://www.kernel.org/doc/Documentation/trace/kprobetrace.txt + + example: cpu=%di:s32 + + Here is a full example entry. It displays cpu resume calls in the timeline + in orange. They will appear as CPU_ON[0], CPU_ON[1], etc. + + [timeline_functions_x86_64] + _cpu_up: CPU_ON[{cpu}] cpu=%di:s32 [color=orange] + + +Adding or Editing Dev Mode Timeline Source Functions +____________________________________________________ + + In dev mode, the tool uses an array of function names to monitor source + execution within the timeline entries. + + The function calls are displayed inside the main device/call blocks in the + timeline. However, if a function call is not within a main timeline event, + it will spawn an entirely new event named after the caller's kernel thread. + These asynchronous kernel threads will populate in a separate section + beneath the main device/call section. + + The tool has a set of hard coded calls which focus on the most common use + cases: msleep, udelay, schedule_timeout, mutex_lock_slowpath, etc. These are + the functions that add a hardcoded time delay to the suspend/resume path. + The tool also includes some common functions native to important + subsystems: ata, i915, and ACPI, etc. + + It is possible to add new function calls to the dev timeline by adding them + to the config. It's also possible to copy the internal dev timeline + functions into the config so that you can override and edit them. Place them + in the dev_timeline_functions_ARCH section with the name of your architecture + appended. i.e. for x86_64: [dev_timeline_functions_x86_64] + + Use the override-dev-timeline-functions option if you only want to use your + custom calls, or leave it false to append them to the internal ones. + + The format is the same as the timeline_functions_x86_64 section. It's a + list of functions (set using kprobes) which use both symbol data and function + arg data. The args are pulled directly from the stack using this + architecture's registers and stack formatting. Each entry can include up + to four pieces of info: The function name, a format string, an argument list, + and a color. But only the function name is required. + + For a full example config, see config/custom-timeline-functions.cfg. It pulls + all the internal dev timeline functions into the config and allows you to edit + them. + + Here is a full example entry. It displays the ATA port reset calls as + ataN_port_reset in the timeline. This is where most of the SATA disk resume + time goes, so it can be helpful to see the low level call. + + [dev_timeline_functions_x86_64] + ata_eh_recover: ata{port}_port_reset port=+36(%di):s32 [color=#CC00CC] + + +Verifying your custom functions +_______________________________ + + Once you have a set of functions (kprobes) defined, it can be useful to + perform a quick check to see if you formatted them correctly and if the system + actually supports them. To do this, run the tool with your config file + and the -status option. The tool will go through all the kprobes (both + custom and internal if you haven't overridden them) and actually attempts + to set them in ftrace. It will then print out success or fail for you. + + Note that kprobes which don't actually exist in the kernel won't stop the + tool, they just wont show up. + + For example: + + sudo ./sleepgraph.py -config config/custom-timeline-functions.cfg -status + Checking this system (myhostname)... + have root access: YES + is sysfs mounted: YES + is "mem" a valid power mode: YES + is ftrace supported: YES + are kprobes supported: YES + timeline data source: FTRACE (all trace events found) + is rtcwake supported: YES + verifying timeline kprobes work: + _cpu_down: YES + _cpu_up: YES + acpi_pm_finish: YES + acpi_pm_prepare: YES + freeze_kernel_threads: YES + freeze_processes: YES + sys_sync: YES + thaw_processes: YES + verifying dev kprobes work: + __const_udelay: YES + __mutex_lock_slowpath: YES + acpi_os_stall: YES + acpi_ps_parse_aml: YES + intel_opregion_init: NO + intel_opregion_register: NO + intel_opregion_setup: NO + msleep: YES + schedule_timeout: YES + schedule_timeout_uninterruptible: YES + usleep_range: YES + + +------------------------------------------------------------------ +| TESTING ON CONSUMER LINUX OPERATING SYSTEMS | +------------------------------------------------------------------ + +Android +_______ + + The easiest way to execute on an android device is to run the android.sh + script on the device, then pull the ftrace log back to the host and run + sleepgraph.py on it. + + Here are the steps: + + [download and install the tool on the device] + + host%> wget https://raw.githubusercontent.com/intel/pm-graph/master/tools/android.sh + host%> adb connect 192.168.1.6 + host%> adb root + # push the script to a writeable location + host%> adb push android.sh /sdcard/ + + [check whether the tool will run on your device] + + host%> adb shell + dev%> cd /sdcard + dev%> sh android.sh status + host : asus_t100 + kernel : 3.14.0-i386-dirty + modes : freeze mem + rtcwake : supported + ftrace : supported + trace events { + suspend_resume: found + device_pm_callback_end: found + device_pm_callback_start: found + } + # the above is what you see on a system that's properly patched + + [execute the suspend] + + # NOTE: The suspend will only work if the screen isn't timed out, + # so you have to press some keys first to wake it up b4 suspend) + dev%> sh android.sh suspend mem + ------------------------------------ + Suspend/Resume timing test initiated + ------------------------------------ + hostname : asus_t100 + kernel : 3.14.0-i386-dirty + mode : mem + ftrace out : /mnt/shell/emulated/0/ftrace.txt + dmesg out : /mnt/shell/emulated/0/dmesg.txt + log file : /mnt/shell/emulated/0/log.txt + ------------------------------------ + INITIALIZING FTRACE........DONE + STARTING FTRACE + SUSPEND START @ 21:24:02 (rtcwake in 10 seconds) + + + [retrieve the data from the device] + + # I find that you have to actually kill the adb process and + # reconnect sometimes in order for the connection to work post-suspend + host%> adb connect 192.168.1.6 + # (required) get the ftrace data, this is the most important piece + host%> adb pull /sdcard/ftrace.txt + # (optional) get the dmesg data, this is for debugging + host%> adb pull /sdcard/dmesg.txt + # (optional) get the log, which just lists some test times for comparison + host%> adb pull /sdcard/log.txt + + [create an output html file using sleepgraph.py] + + host%> sleepgraph.py -ftrace ftrace.txt + + You should now have an output.html with the android data, enjoy! diff --git a/tools/power/pm-graph/config/example.cfg b/tools/power/pm-graph/config/example.cfg index 05b2efb9bb54..1ef3eb9383fa 100644 --- a/tools/power/pm-graph/config/example.cfg +++ b/tools/power/pm-graph/config/example.cfg @@ -98,12 +98,34 @@ postdelay: 0 # graph only devices longer than min in the timeline (default: 0.001 ms) mindev: 0.001 +# Call Loop Max Gap (dev mode only) +# merge loops of the same call if each is less than maxgap apart (def: 100us) +callloop-maxgap: 0.0001 + +# Call Loop Max Length (dev mode only) +# merge loops of the same call if each is less than maxlen in length (def: 5ms) +callloop-maxlen: 0.005 + +# Override default timeline entries: +# Do not use the internal default functions for timeline entries (def: false) +# Set this to true if you intend to only use the ones defined in the config +override-timeline-functions: true + +# Override default dev timeline entries: +# Do not use the internal default functions for dev timeline entries (def: false) +# Set this to true if you intend to only use the ones defined in the config +override-dev-timeline-functions: true + # ---- Debug Options ---- # Callgraph # gather detailed ftrace callgraph data on all timeline events (default: false) callgraph: false +# Max graph depth +# limit the callgraph trace to this depth (default: 0 = all) +maxdepth: 2 + # Callgraph phase filter # Only enable callgraphs for one phase, i.e. resume_noirq (default: all) cgphase: suspend @@ -131,3 +153,7 @@ timeprec: 6 # Add kprobe functions to the timeline # Add functions to the timeline from a text file (default: no-action) # fadd: file.txt + +# Ftrace buffer size +# Set trace buffer size to N kilo-bytes (default: all of free memory up to 3GB) +# bufsize: 1000 diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8 index 24a2e7d0ae63..9648be644d5f 100644 --- a/tools/power/pm-graph/sleepgraph.8 +++ b/tools/power/pm-graph/sleepgraph.8 @@ -53,6 +53,11 @@ disable rtcwake and require a user keypress to resume. Add the dmesg and ftrace logs to the html output. They will be viewable by clicking buttons in the timeline. .TP +\fB-turbostat\fR +Use turbostat to execute the command in freeze mode (default: disabled). This +will provide turbostat output in the log which will tell you which actual +power modes were entered. +.TP \fB-result \fIfile\fR Export a results table to a text file for parsing. .TP @@ -121,6 +126,10 @@ be created in a new subdirectory with a summary page: suspend-xN-{date}-{time}. Use ftrace to create device callgraphs (default: disabled). This can produce very large outputs, i.e. 10MB - 100MB. .TP +\fB-ftop\fR +Use ftrace on the top level call: "suspend_devices_and_enter" only (default: disabled). +This option implies -f and creates a single callgraph covering all of suspend/resume. +.TP \fB-maxdepth \fIlevel\fR limit the callgraph trace depth to \fIlevel\fR (default: 0=all). This is the best way to limit the output size when using callgraphs via -f. @@ -138,8 +147,8 @@ which are barely visible in the timeline. The value is a float: e.g. 0.001 represents 1 us. .TP \fB-cgfilter \fI"func1,func2,..."\fR -Reduce callgraph output in the timeline by limiting it to a list of calls. The -argument can be a single function name or a comma delimited list. +Reduce callgraph output in the timeline by limiting it certain devices. The +argument can be a single device name or a comma delimited list. (default: none) .TP \fB-cgskip \fIfile\fR @@ -183,6 +192,9 @@ Print out the contents of the ACPI Firmware Performance Data Table. \fB-battery\fR Print out battery status and current charge. .TP +\fB-wifi\fR +Print out wifi status and connection details. +.TP \fB-xon/-xoff/-xstandby/-xsuspend\fR Test xset by attempting to switch the display to the given mode. This is the same command which will be issued by \fB-display \fImode\fR. From 32865e3e010f0ea81e518878e9a6b1492ff07281 Mon Sep 17 00:00:00 2001 From: Lenny Szubowicz Date: Thu, 2 May 2019 16:00:52 -0400 Subject: [PATCH 08/57] ACPI / LPIT: Correct LPIT end address for lpit_process() Correct the LPIT end address which is passed into lpit_process() and the end address limit test in lpit_process(). The LPI state descriptor subtables follow the fixed sized acpi_lpit_header up to the end of the LPIT. The last LPI state descriptor can end at exactly the end of the LPIT. Note that this is a fix to a latent problem. Although incorrect, the unpatched version works because the passed in end address is just slightly beyond the actual end of the LPIT and the size of the ACPI LPIT header is smaller than the size of the only currently defined LPI state descriptor, acpi_lpit_native. Signed-off-by: Lenny Szubowicz Reviewed-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_lpit.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpi_lpit.c b/drivers/acpi/acpi_lpit.c index e43cb71b6972..8b170a07908a 100644 --- a/drivers/acpi/acpi_lpit.c +++ b/drivers/acpi/acpi_lpit.c @@ -137,7 +137,7 @@ static void lpit_update_residency(struct lpit_residency_info *info, static void lpit_process(u64 begin, u64 end) { - while (begin + sizeof(struct acpi_lpit_native) < end) { + while (begin + sizeof(struct acpi_lpit_native) <= end) { struct acpi_lpit_native *lpit_native = (struct acpi_lpit_native *)begin; if (!lpit_native->header.type && !lpit_native->header.flags) { @@ -156,7 +156,6 @@ static void lpit_process(u64 begin, u64 end) void acpi_init_lpit(void) { acpi_status status; - u64 lpit_begin; struct acpi_table_lpit *lpit; status = acpi_get_table(ACPI_SIG_LPIT, 0, (struct acpi_table_header **)&lpit); @@ -164,6 +163,6 @@ void acpi_init_lpit(void) if (ACPI_FAILURE(status)) return; - lpit_begin = (u64)lpit + sizeof(*lpit); - lpit_process(lpit_begin, lpit_begin + lpit->header.length); + lpit_process((u64)lpit + sizeof(*lpit), + (u64)lpit + lpit->header.length); } From 7e186d9de9290d57c90751ea252135f425f49e6e Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 27 May 2019 20:05:35 +0800 Subject: [PATCH 09/57] drivers: base: power: clock_ops: Use of_clk_get_parent_count() Use of_clk_get_parent_count() instead of open coding. Reviewed-by: Geert Uytterhoeven Signed-off-by: Kefeng Wang Signed-off-by: Rafael J. Wysocki --- drivers/base/power/clock_ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index 59d19dd64928..3e84e3085d43 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -195,8 +196,7 @@ int of_pm_clk_add_clks(struct device *dev) if (!dev || !dev->of_node) return -EINVAL; - count = of_count_phandle_with_args(dev->of_node, "clocks", - "#clock-cells"); + count = of_clk_get_parent_count(dev->of_node); if (count <= 0) return -ENODEV; From 338c993f9aa2078e03c909464e963a31e086df69 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 27 May 2019 14:27:51 +0200 Subject: [PATCH 10/57] PM / clk: Remove error message on out-of-memory condition There is no need to print an error message if kstrdup() fails, as the memory allocation core already takes care of that. Note that commit 59d84ca8c46a93ad ("PM / OPP / clk: Remove unnecessary OOM message") already removed similar error messages, but this one was forgotten. Signed-off-by: Geert Uytterhoeven Signed-off-by: Rafael J. Wysocki --- drivers/base/power/clock_ops.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index 3e84e3085d43..ced6863a16a5 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -93,8 +93,6 @@ static int __pm_clk_add(struct device *dev, const char *con_id, if (con_id) { ce->con_id = kstrdup(con_id, GFP_KERNEL); if (!ce->con_id) { - dev_err(dev, - "Not enough memory for clock connection ID.\n"); kfree(ce); return -ENOMEM; } From c2147585cce028e46a0ea8b24be3a11716ee6e4f Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Wed, 29 May 2019 11:52:08 +0000 Subject: [PATCH 11/57] cpufreq: imx-cpufreq-dt: Fix no OPPs available on unfused parts Early samples without fuses written report "0 0" which means consumer segment and minimum speed grading. According to datasheet the minimum speed grade is not supported for consumer parts so all OPPs are disabled which results in stack dumps later on. Fix by clamping minimum consumer speed grade to 1 on imx8mm and imx8mq. Fixes: 4d28ba1d62c4 ("cpufreq: Add imx-cpufreq-dt driver") Signed-off-by: Leonard Crestez [ Viresh: s/minumum/minimum/ in patch and log ] Signed-off-by: Viresh Kumar --- drivers/cpufreq/imx-cpufreq-dt.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/cpufreq/imx-cpufreq-dt.c b/drivers/cpufreq/imx-cpufreq-dt.c index e1aa346efa10..35b6717d7255 100644 --- a/drivers/cpufreq/imx-cpufreq-dt.c +++ b/drivers/cpufreq/imx-cpufreq-dt.c @@ -50,6 +50,21 @@ static int imx_cpufreq_dt_probe(struct platform_device *pdev) speed_grade = (cell_value & OCOTP_CFG3_SPEED_GRADE_MASK) >> OCOTP_CFG3_SPEED_GRADE_SHIFT; mkt_segment = (cell_value & OCOTP_CFG3_MKT_SEGMENT_MASK) >> OCOTP_CFG3_MKT_SEGMENT_SHIFT; + + /* + * Early samples without fuses written report "0 0" which means + * consumer segment and minimum speed grading. + * + * According to datasheet minimum speed grading is not supported for + * consumer parts so clamp to 1 to avoid warning for "no OPPs" + * + * Applies to 8mq and 8mm. + */ + if (mkt_segment == 0 && speed_grade == 0 && ( + !strcmp(match->compatible, "fsl,imx8mm") || + !strcmp(match->compatible, "fsl,imx8mq"))) + speed_grade = 1; + supported_hw[0] = BIT(speed_grade); supported_hw[1] = BIT(mkt_segment); dev_info(&pdev->dev, "cpu speed grade %d mkt segment %d supported-hw %#x %#x\n", From 036eb5c6d532a0fa1cea854814628be625809c4e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 1 Jun 2019 07:43:38 +0000 Subject: [PATCH 12/57] cpufreq: armada-37xx: Remove set but not used variable 'freq' Fixes gcc '-Wunused-but-set-variable' warning: drivers/cpufreq/armada-37xx-cpufreq.c: In function 'armada37xx_cpufreq_avs_setup': drivers/cpufreq/armada-37xx-cpufreq.c:260:28: warning: variable 'freq' set but not used [-Wunused-but-set-variable] It's never used since introduction in commit 1c3528232f4b ("cpufreq: armada-37xx: Add AVS support") Signed-off-by: YueHaibing Signed-off-by: Viresh Kumar --- drivers/cpufreq/armada-37xx-cpufreq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index 0df16eb1eb3c..aa0f06dec959 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -257,7 +257,7 @@ static void __init armada37xx_cpufreq_avs_configure(struct regmap *base, static void __init armada37xx_cpufreq_avs_setup(struct regmap *base, struct armada_37xx_dvfs *dvfs) { - unsigned int avs_val = 0, freq; + unsigned int avs_val = 0; int load_level = 0; if (base == NULL) @@ -275,8 +275,6 @@ static void __init armada37xx_cpufreq_avs_setup(struct regmap *base, for (load_level = 1; load_level < LOAD_LEVEL_NR; load_level++) { - freq = dvfs->cpu_freq_max / dvfs->divider[load_level]; - avs_val = dvfs->avs[load_level]; regmap_update_bits(base, ARMADA_37XX_AVS_VSET(load_level-1), ARMADA_37XX_AVS_VDD_MASK << ARMADA_37XX_AVS_HIGH_VDD_LIMIT | From 22a26cc6a51ef73dcfeb64c50513903f6b2d53d8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 22 May 2019 11:45:46 -0700 Subject: [PATCH 13/57] cpufreq: brcmstb-avs-cpufreq: Fix initial command check There is a logical error in brcm_avs_is_firmware_loaded() whereby if the firmware returns -EINVAL, we will be reporting this as an error. The comment is correct, the code was not. Fixes: de322e085995 ("cpufreq: brcmstb-avs-cpufreq: AVS CPUfreq driver for Broadcom STB SoCs") Signed-off-by: Florian Fainelli Acked-by: Markus Mayer Signed-off-by: Viresh Kumar --- drivers/cpufreq/brcmstb-avs-cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index e6f9cbe5835f..6ed53ca8aa98 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -446,8 +446,8 @@ static bool brcm_avs_is_firmware_loaded(struct private_data *priv) rc = brcm_avs_get_pmap(priv, NULL); magic = readl(priv->base + AVS_MBOX_MAGIC); - return (magic == AVS_FIRMWARE_MAGIC) && (rc != -ENOTSUPP) && - (rc != -EINVAL); + return (magic == AVS_FIRMWARE_MAGIC) && ((rc != -ENOTSUPP) || + (rc != -EINVAL)); } static unsigned int brcm_avs_cpufreq_get(unsigned int cpu) From 4c5681fcc684c762b09435de3e82ffeee7769d21 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 22 May 2019 11:45:47 -0700 Subject: [PATCH 14/57] cpufreq: brcmstb-avs-cpufreq: Fix types for voltage/frequency What we read back from the register is going to be capped at 32-bits, and cpufreq_freq_table.frequency is an unsigned int. Avoid any possible value truncation by using the appropriate return value. Fixes: de322e085995 ("cpufreq: brcmstb-avs-cpufreq: AVS CPUfreq driver for Broadcom STB SoCs") Signed-off-by: Florian Fainelli Acked-by: Markus Mayer Signed-off-by: Viresh Kumar --- drivers/cpufreq/brcmstb-avs-cpufreq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index 6ed53ca8aa98..77b0e5d0fb13 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -384,12 +384,12 @@ static int brcm_avs_set_pstate(struct private_data *priv, unsigned int pstate) return __issue_avs_command(priv, AVS_CMD_SET_PSTATE, true, args); } -static unsigned long brcm_avs_get_voltage(void __iomem *base) +static u32 brcm_avs_get_voltage(void __iomem *base) { return readl(base + AVS_MBOX_VOLTAGE1); } -static unsigned long brcm_avs_get_frequency(void __iomem *base) +static u32 brcm_avs_get_frequency(void __iomem *base) { return readl(base + AVS_MBOX_FREQUENCY) * 1000; /* in kHz */ } @@ -653,14 +653,14 @@ static ssize_t show_brcm_avs_voltage(struct cpufreq_policy *policy, char *buf) { struct private_data *priv = policy->driver_data; - return sprintf(buf, "0x%08lx\n", brcm_avs_get_voltage(priv->base)); + return sprintf(buf, "0x%08x\n", brcm_avs_get_voltage(priv->base)); } static ssize_t show_brcm_avs_frequency(struct cpufreq_policy *policy, char *buf) { struct private_data *priv = policy->driver_data; - return sprintf(buf, "0x%08lx\n", brcm_avs_get_frequency(priv->base)); + return sprintf(buf, "0x%08x\n", brcm_avs_get_frequency(priv->base)); } cpufreq_freq_attr_ro(brcm_avs_pstate); From bd59ffb23b9de82d2ad3d2cd752520de245780c6 Mon Sep 17 00:00:00 2001 From: Nick Black Date: Thu, 18 Apr 2019 15:12:52 -0400 Subject: [PATCH 15/57] cpupower: correct spelling of interval Fix up multiple instances of "intervall" to correct "interval" (all save one Italian instance). Signed-off-by: Nick Black Signed-off-by: Shuah Khan --- tools/power/cpupower/man/cpupower-monitor.1 | 2 +- tools/power/cpupower/po/cs.po | 2 +- tools/power/cpupower/po/de.po | 2 +- tools/power/cpupower/po/fr.po | 2 +- tools/power/cpupower/po/it.po | 2 +- tools/power/cpupower/po/pt.po | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/power/cpupower/man/cpupower-monitor.1 b/tools/power/cpupower/man/cpupower-monitor.1 index 914cbb9d9cd0..70a56476f4b0 100644 --- a/tools/power/cpupower/man/cpupower-monitor.1 +++ b/tools/power/cpupower/man/cpupower-monitor.1 @@ -61,7 +61,7 @@ Only display specific monitors. Use the monitor string(s) provided by \-l option .PP \-i seconds .RS 4 -Measure intervall. +Measure interval. .RE .PP \-c diff --git a/tools/power/cpupower/po/cs.po b/tools/power/cpupower/po/cs.po index cb22c45c5069..bfc7e1702ec9 100644 --- a/tools/power/cpupower/po/cs.po +++ b/tools/power/cpupower/po/cs.po @@ -98,7 +98,7 @@ msgstr "" #: utils/idle_monitor/cpupower-monitor.c:74 #, c-format -msgid "\t -i: time intervall to measure for in seconds (default 1)\n" +msgid "\t -i: time interval to measure for in seconds (default 1)\n" msgstr "" #: utils/idle_monitor/cpupower-monitor.c:75 diff --git a/tools/power/cpupower/po/de.po b/tools/power/cpupower/po/de.po index 840c17cc450a..70887bb8ba95 100644 --- a/tools/power/cpupower/po/de.po +++ b/tools/power/cpupower/po/de.po @@ -95,7 +95,7 @@ msgstr "" #: utils/idle_monitor/cpupower-monitor.c:74 #, c-format -msgid "\t -i: time intervall to measure for in seconds (default 1)\n" +msgid "\t -i: time interval to measure for in seconds (default 1)\n" msgstr "" #: utils/idle_monitor/cpupower-monitor.c:75 diff --git a/tools/power/cpupower/po/fr.po b/tools/power/cpupower/po/fr.po index b46ca2548f86..b6e505b34e4a 100644 --- a/tools/power/cpupower/po/fr.po +++ b/tools/power/cpupower/po/fr.po @@ -95,7 +95,7 @@ msgstr "" #: utils/idle_monitor/cpupower-monitor.c:74 #, c-format -msgid "\t -i: time intervall to measure for in seconds (default 1)\n" +msgid "\t -i: time interval to measure for in seconds (default 1)\n" msgstr "" #: utils/idle_monitor/cpupower-monitor.c:75 diff --git a/tools/power/cpupower/po/it.po b/tools/power/cpupower/po/it.po index f80c4ddb9bda..a1deeb52c9e0 100644 --- a/tools/power/cpupower/po/it.po +++ b/tools/power/cpupower/po/it.po @@ -95,7 +95,7 @@ msgstr "" #: utils/idle_monitor/cpupower-monitor.c:74 #, c-format -msgid "\t -i: time intervall to measure for in seconds (default 1)\n" +msgid "\t -i: time interval to measure for in seconds (default 1)\n" msgstr "" #: utils/idle_monitor/cpupower-monitor.c:75 diff --git a/tools/power/cpupower/po/pt.po b/tools/power/cpupower/po/pt.po index 990f5267ffe8..902186585bb9 100644 --- a/tools/power/cpupower/po/pt.po +++ b/tools/power/cpupower/po/pt.po @@ -93,7 +93,7 @@ msgstr "" #: utils/idle_monitor/cpupower-monitor.c:74 #, c-format -msgid "\t -i: time intervall to measure for in seconds (default 1)\n" +msgid "\t -i: time interval to measure for in seconds (default 1)\n" msgstr "" #: utils/idle_monitor/cpupower-monitor.c:75 From 04507c0a9385cc8280f794a36bfff567c8cc1042 Mon Sep 17 00:00:00 2001 From: Abhishek Goel Date: Wed, 29 May 2019 04:30:33 -0500 Subject: [PATCH 16/57] cpupower : frequency-set -r option misses the last cpu in related cpu list To set frequency on specific cpus using cpupower, following syntax can be used : cpupower -c #i frequency-set -f #f -r While setting frequency using cpupower frequency-set command, if we use '-r' option, it is expected to set frequency for all cpus related to cpu #i. But it is observed to be missing the last cpu in related cpu list. This patch fixes the problem. Signed-off-by: Abhishek Goel Reviewed-by: Thomas Renninger Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/cpufreq-set.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c index f49bc4aa2a08..6ed82fba5aaa 100644 --- a/tools/power/cpupower/utils/cpufreq-set.c +++ b/tools/power/cpupower/utils/cpufreq-set.c @@ -305,6 +305,8 @@ int cmd_freq_set(int argc, char **argv) bitmask_setbit(cpus_chosen, cpus->cpu); cpus = cpus->next; } + /* Set the last cpu in related cpus list */ + bitmask_setbit(cpus_chosen, cpus->cpu); cpufreq_put_related_cpus(cpus); } } From 7d5f589a522889d3fd56ad0c43961b90d07b4a93 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Wed, 5 Jun 2019 13:37:05 +0300 Subject: [PATCH 17/57] cpufreq: imx-cpufreq-dt: Remove global platform match list This is not currently needed, instead a platform device is always created from SOC-specific code. We can use of_machine_is_compatible for per-SOC behavior instead. Suggested-by: Viresh Kumar Signed-off-by: Leonard Crestez Signed-off-by: Viresh Kumar --- drivers/cpufreq/imx-cpufreq-dt.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/drivers/cpufreq/imx-cpufreq-dt.c b/drivers/cpufreq/imx-cpufreq-dt.c index 35b6717d7255..b54fd26ea7df 100644 --- a/drivers/cpufreq/imx-cpufreq-dt.c +++ b/drivers/cpufreq/imx-cpufreq-dt.c @@ -19,12 +19,6 @@ #define OCOTP_CFG3_MKT_SEGMENT_SHIFT 6 #define OCOTP_CFG3_MKT_SEGMENT_MASK (0x3 << 6) -static const struct of_device_id imx_cpufreq_dt_match_list[] = { - { .compatible = "fsl,imx8mm" }, - { .compatible = "fsl,imx8mq" }, - {} -}; - /* cpufreq-dt device registered by imx-cpufreq-dt */ static struct platform_device *cpufreq_dt_pdev; static struct opp_table *cpufreq_opp_table; @@ -32,18 +26,10 @@ static struct opp_table *cpufreq_opp_table; static int imx_cpufreq_dt_probe(struct platform_device *pdev) { struct device *cpu_dev = get_cpu_device(0); - struct device_node *np; - const struct of_device_id *match; u32 cell_value, supported_hw[2]; int speed_grade, mkt_segment; int ret; - np = of_find_node_by_path("/"); - match = of_match_node(imx_cpufreq_dt_match_list, np); - of_node_put(np); - if (!match) - return -ENODEV; - ret = nvmem_cell_read_u32(cpu_dev, "speed_grade", &cell_value); if (ret) return ret; @@ -61,8 +47,8 @@ static int imx_cpufreq_dt_probe(struct platform_device *pdev) * Applies to 8mq and 8mm. */ if (mkt_segment == 0 && speed_grade == 0 && ( - !strcmp(match->compatible, "fsl,imx8mm") || - !strcmp(match->compatible, "fsl,imx8mq"))) + of_machine_is_compatible("fsl,imx8mm") || + of_machine_is_compatible("fsl,imx8mq"))) speed_grade = 1; supported_hw[0] = BIT(speed_grade); From e6abacabb5acb6315e0e9560b2cb50a4d55b6e09 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Wed, 5 Jun 2019 13:37:06 +0300 Subject: [PATCH 18/57] cpufreq: Switch imx7d to imx-cpufreq-dt for speed grading This driver can handle speed grading bits on imx7d just like on imx8mq and imx8mm. Signed-off-by: Leonard Crestez Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 19c1aad57e26..eb282dff9f2c 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -40,7 +40,6 @@ static const struct of_device_id whitelist[] __initconst = { { .compatible = "fsl,imx27", }, { .compatible = "fsl,imx51", }, { .compatible = "fsl,imx53", }, - { .compatible = "fsl,imx7d", }, { .compatible = "marvell,berlin", }, { .compatible = "marvell,pxa250", }, @@ -108,6 +107,7 @@ static const struct of_device_id blacklist[] __initconst = { { .compatible = "calxeda,highbank", }, { .compatible = "calxeda,ecx-2000", }, + { .compatible = "fsl,imx7d", }, { .compatible = "fsl,imx8mq", }, { .compatible = "fsl,imx8mm", }, From d3df18a97e586702920337056540267807b23f8e Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 12 Jun 2019 20:24:56 +0200 Subject: [PATCH 19/57] cpufreq: add driver for Raspberry Pi Raspberry Pi's firmware offers and interface though which update it's performance requirements. It allows us to request for specific runtime frequencies, which the firmware might or might not respect, depending on the firmware configuration and thermals. As the maximum and minimum frequencies are configurable in the firmware there is no way to know in advance their values. So the Raspberry Pi cpufreq driver queries them, builds an opp frequency table to then launch cpufreq-dt. Also, as the firmware interface might be configured as a module, making the cpu clock unavailable during init, this implements a full fledged driver, as opposed to most drivers registering cpufreq-dt, which only make use of an init routine. Signed-off-by: Nicolas Saenz Julienne Acked-by: Eric Anholt Reviewed-by: Stephen Boyd Acked-by: Stefan Wahren Signed-off-by: Viresh Kumar --- drivers/cpufreq/Kconfig.arm | 8 +++ drivers/cpufreq/Makefile | 1 + drivers/cpufreq/raspberrypi-cpufreq.c | 97 +++++++++++++++++++++++++++ 3 files changed, 106 insertions(+) create mode 100644 drivers/cpufreq/raspberrypi-cpufreq.c diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 982efdf9c7e5..2499d0bccb3a 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -141,6 +141,14 @@ config ARM_QCOM_CPUFREQ_HW The driver implements the cpufreq interface for this HW engine. Say Y if you want to support CPUFreq HW. +config ARM_RASPBERRYPI_CPUFREQ + tristate "Raspberry Pi cpufreq support" + depends on CLK_RASPBERRYPI || COMPILE_TEST + help + This adds the CPUFreq driver for Raspberry Pi + + If in doubt, say N. + config ARM_S3C_CPUFREQ bool help diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 7bcda2273d0c..5a6c70d26c98 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_ARM_PXA2xx_CPUFREQ) += pxa2xx-cpufreq.o obj-$(CONFIG_PXA3xx) += pxa3xx-cpufreq.o obj-$(CONFIG_ARM_QCOM_CPUFREQ_HW) += qcom-cpufreq-hw.o obj-$(CONFIG_ARM_QCOM_CPUFREQ_KRYO) += qcom-cpufreq-kryo.o +obj-$(CONFIG_ARM_RASPBERRYPI_CPUFREQ) += raspberrypi-cpufreq.o obj-$(CONFIG_ARM_S3C2410_CPUFREQ) += s3c2410-cpufreq.o obj-$(CONFIG_ARM_S3C2412_CPUFREQ) += s3c2412-cpufreq.o obj-$(CONFIG_ARM_S3C2416_CPUFREQ) += s3c2416-cpufreq.o diff --git a/drivers/cpufreq/raspberrypi-cpufreq.c b/drivers/cpufreq/raspberrypi-cpufreq.c new file mode 100644 index 000000000000..2bc7d9734272 --- /dev/null +++ b/drivers/cpufreq/raspberrypi-cpufreq.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Raspberry Pi cpufreq driver + * + * Copyright (C) 2019, Nicolas Saenz Julienne + */ + +#include +#include +#include +#include +#include +#include + +#define RASPBERRYPI_FREQ_INTERVAL 100000000 + +static struct platform_device *cpufreq_dt; + +static int raspberrypi_cpufreq_probe(struct platform_device *pdev) +{ + struct device *cpu_dev; + unsigned long min, max; + unsigned long rate; + struct clk *clk; + int ret; + + cpu_dev = get_cpu_device(0); + if (!cpu_dev) { + pr_err("Cannot get CPU for cpufreq driver\n"); + return -ENODEV; + } + + clk = clk_get(cpu_dev, NULL); + if (IS_ERR(clk)) { + dev_err(cpu_dev, "Cannot get clock for CPU0\n"); + return PTR_ERR(clk); + } + + /* + * The max and min frequencies are configurable in the Raspberry Pi + * firmware, so we query them at runtime. + */ + min = roundup(clk_round_rate(clk, 0), RASPBERRYPI_FREQ_INTERVAL); + max = roundup(clk_round_rate(clk, ULONG_MAX), RASPBERRYPI_FREQ_INTERVAL); + clk_put(clk); + + for (rate = min; rate <= max; rate += RASPBERRYPI_FREQ_INTERVAL) { + ret = dev_pm_opp_add(cpu_dev, rate, 0); + if (ret) + goto remove_opp; + } + + cpufreq_dt = platform_device_register_simple("cpufreq-dt", -1, NULL, 0); + ret = PTR_ERR_OR_ZERO(cpufreq_dt); + if (ret) { + dev_err(cpu_dev, "Failed to create platform device, %d\n", ret); + goto remove_opp; + } + + return 0; + +remove_opp: + dev_pm_opp_remove_all_dynamic(cpu_dev); + + return ret; +} + +static int raspberrypi_cpufreq_remove(struct platform_device *pdev) +{ + struct device *cpu_dev; + + cpu_dev = get_cpu_device(0); + if (cpu_dev) + dev_pm_opp_remove_all_dynamic(cpu_dev); + + platform_device_unregister(cpufreq_dt); + + return 0; +} + +/* + * Since the driver depends on clk-raspberrypi, which may return EPROBE_DEFER, + * all the activity is performed in the probe, which may be defered as well. + */ +static struct platform_driver raspberrypi_cpufreq_driver = { + .driver = { + .name = "raspberrypi-cpufreq", + }, + .probe = raspberrypi_cpufreq_probe, + .remove = raspberrypi_cpufreq_remove, +}; +module_platform_driver(raspberrypi_cpufreq_driver); + +MODULE_AUTHOR("Nicolas Saenz Julienne Date: Wed, 12 Jun 2019 13:07:02 +0300 Subject: [PATCH 20/57] ACPI / sleep: Switch to use acpi_dev_get_first_match_dev() Switch the acpi_pm_finish() to use acpi_dev_get_first_match_dev() instead of custom approach. Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sleep.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index e52f1238d2d6..e21a0f5fdadd 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -454,14 +454,6 @@ static int acpi_pm_prepare(void) return error; } -static int find_powerf_dev(struct device *dev, void *data) -{ - struct acpi_device *device = to_acpi_device(dev); - const char *hid = acpi_device_hid(device); - - return !strcmp(hid, ACPI_BUTTON_HID_POWERF); -} - /** * acpi_pm_finish - Instruct the platform to leave a sleep state. * @@ -470,7 +462,7 @@ static int find_powerf_dev(struct device *dev, void *data) */ static void acpi_pm_finish(void) { - struct device *pwr_btn_dev; + struct acpi_device *pwr_btn_adev; u32 acpi_state = acpi_target_sleep_state; acpi_ec_unblock_transactions(); @@ -501,11 +493,11 @@ static void acpi_pm_finish(void) return; pwr_btn_event_pending = false; - pwr_btn_dev = bus_find_device(&acpi_bus_type, NULL, NULL, - find_powerf_dev); - if (pwr_btn_dev) { - pm_wakeup_event(pwr_btn_dev, 0); - put_device(pwr_btn_dev); + pwr_btn_adev = acpi_dev_get_first_match_dev(ACPI_BUTTON_HID_POWERF, + NULL, -1); + if (pwr_btn_adev) { + pm_wakeup_event(&pwr_btn_adev->dev, 0); + acpi_dev_put(pwr_btn_adev); } } From 1ec0cd8286f35988134e05367ab5e66213b84e7c Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Fri, 24 May 2019 12:44:18 +0200 Subject: [PATCH 21/57] PM: hibernate: powerpc: Expose pfn_is_nosave() prototype The declaration for pfn_is_nosave is only available in kernel/power/power.h. Since this function can be override in arch, expose it globally. Having a prototype will make sure to avoid warning (sometime treated as error with W=1) such as: arch/powerpc/kernel/suspend.c:18:5: error: no previous prototype for 'pfn_is_nosave' [-Werror=missing-prototypes] This moves the declaration into a globally visible header file and add missing include to avoid a warning on powerpc. Also remove the duplicated prototypes since not required anymore. Signed-off-by: Mathieu Malaterre Acked-by: Michael Ellerman (powerpc) Signed-off-by: Rafael J. Wysocki --- arch/powerpc/kernel/suspend.c | 1 + arch/s390/kernel/entry.h | 1 - include/linux/suspend.h | 1 + kernel/power/power.h | 2 -- 4 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c index c612d50c9d18..b84992c10854 100644 --- a/arch/powerpc/kernel/suspend.c +++ b/arch/powerpc/kernel/suspend.c @@ -7,6 +7,7 @@ */ #include +#include #include #include diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 20420c2b8a14..b2956d49b6ad 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -63,7 +63,6 @@ void __init startup_init(void); void die(struct pt_regs *regs, const char *str); int setup_profiling_timer(unsigned int multiplier); void __init time_init(void); -int pfn_is_nosave(unsigned long); void s390_early_resume(void); unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip); diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 8594001e8be8..05645f726815 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -426,6 +426,7 @@ extern bool system_entering_hibernation(void); extern bool hibernation_available(void); asmlinkage int swsusp_save(void); extern struct pbe *restore_pblist; +int pfn_is_nosave(unsigned long pfn); #else /* CONFIG_HIBERNATION */ static inline void register_nosave_region(unsigned long b, unsigned long e) {} static inline void register_nosave_region_late(unsigned long b, unsigned long e) {} diff --git a/kernel/power/power.h b/kernel/power/power.h index 9e58bdc8a562..44bee462ff57 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -75,8 +75,6 @@ static inline void hibernate_reserved_size_init(void) {} static inline void hibernate_image_size_init(void) {} #endif /* !CONFIG_HIBERNATION */ -extern int pfn_is_nosave(unsigned long); - #define power_attr(_name) \ static struct kobj_attribute _name##_attr = { \ .attr = { \ From 3540d38dd38308b811c6ddaf2fde03e9948cc1c1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 5 Jun 2019 09:12:37 -0700 Subject: [PATCH 22/57] PM: sleep: Show how long dpm_suspend_start() and dpm_suspend_end() take When debugging device driver power management code it is convenient to know how much time is spent in the "suspend start" and "suspend end" phases. Hence log the time spent in these phases. Signed-off-by: Bart Van Assche Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index dcfc0a36c8f7..1e84b8aa220f 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1631,17 +1631,20 @@ int dpm_suspend_late(pm_message_t state) */ int dpm_suspend_end(pm_message_t state) { - int error = dpm_suspend_late(state); + ktime_t starttime = ktime_get(); + int error; + + error = dpm_suspend_late(state); if (error) - return error; + goto out; error = dpm_suspend_noirq(state); - if (error) { + if (error) dpm_resume_early(resume_event(state)); - return error; - } - return 0; +out: + dpm_show_time(starttime, state, error, "end"); + return error; } EXPORT_SYMBOL_GPL(dpm_suspend_end); @@ -2034,6 +2037,7 @@ int dpm_prepare(pm_message_t state) */ int dpm_suspend_start(pm_message_t state) { + ktime_t starttime = ktime_get(); int error; error = dpm_prepare(state); @@ -2042,6 +2046,7 @@ int dpm_suspend_start(pm_message_t state) dpm_save_failed_step(SUSPEND_PREPARE); } else error = dpm_suspend(state); + dpm_show_time(starttime, state, error, "start"); return error; } EXPORT_SYMBOL_GPL(dpm_suspend_start); From b3e3759ee4abd72bedbf4b109ff1749d3aea6f21 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 20 Mar 2019 15:19:08 +0530 Subject: [PATCH 23/57] opp: Don't overwrite rounded clk rate The OPP table normally contains 'fmax' values corresponding to the voltage or performance levels of each OPP, but we don't necessarily want all the devices to run at fmax all the time. Running at fmax makes sense for devices like CPU/GPU, which have a finite amount of work to do and since a specific amount of energy is consumed at an OPP, its better to run at the highest possible frequency for that voltage value. On the other hand, we have IO devices which need to run at specific frequencies only for their proper functioning, instead of maximum possible frequency. The OPP core currently roundup to the next possible OPP for a frequency and select the fmax value. To support the IO devices by the OPP core, lets do the roundup to fetch the voltage or performance state values, but not use the OPP frequency value. Rather use the value returned by clk_round_rate(). The current user, cpufreq, of dev_pm_opp_set_rate() already does the rounding to the next OPP before calling this routine and it won't have any side affects because of this change. Signed-off-by: Stephen Boyd Signed-off-by: Rajendra Nayak [ Viresh: Massaged changelog, added comment and use temp_opp variable instead ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 764e05a2fa66..bae94bfa1e96 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -751,13 +751,16 @@ static int _set_required_opps(struct device *dev, * @dev: device for which we do this operation * @target_freq: frequency to achieve * - * This configures the power-supplies and clock source to the levels specified - * by the OPP corresponding to the target_freq. + * This configures the power-supplies to the levels specified by the OPP + * corresponding to the target_freq, and programs the clock to a value <= + * target_freq, as rounded by clk_round_rate(). Device wanting to run at fmax + * provided by the opp, should have already rounded to the target OPP's + * frequency. */ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) { struct opp_table *opp_table; - unsigned long freq, old_freq; + unsigned long freq, old_freq, temp_freq; struct dev_pm_opp *old_opp, *opp; struct clk *clk; int ret; @@ -796,13 +799,15 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) goto put_opp_table; } - old_opp = _find_freq_ceil(opp_table, &old_freq); + temp_freq = old_freq; + old_opp = _find_freq_ceil(opp_table, &temp_freq); if (IS_ERR(old_opp)) { dev_err(dev, "%s: failed to find current OPP for freq %lu (%ld)\n", __func__, old_freq, PTR_ERR(old_opp)); } - opp = _find_freq_ceil(opp_table, &freq); + temp_freq = freq; + opp = _find_freq_ceil(opp_table, &temp_freq); if (IS_ERR(opp)) { ret = PTR_ERR(opp); dev_err(dev, "%s: failed to find OPP for freq %lu (%d)\n", From cd7ea582866f43324f3d60f0d4daac05e08f0f41 Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Wed, 20 Mar 2019 15:19:09 +0530 Subject: [PATCH 24/57] opp: Make dev_pm_opp_set_rate() handle freq = 0 to drop performance votes For devices with performance state, we use dev_pm_opp_set_rate() to set the appropriate clk rate and the performance state. We do need a way to remove the performance state vote when we idle the device and turn the clocks off. Use dev_pm_opp_set_rate() with freq = 0 to achieve this. Signed-off-by: Rajendra Nayak Signed-off-by: Stephen Boyd [ Viresh: Updated _set_required_opps() to handle the !opp case ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index bae94bfa1e96..9fda9a0ec016 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -711,7 +711,7 @@ static int _set_required_opps(struct device *dev, /* Single genpd case */ if (!genpd_virt_devs) { - pstate = opp->required_opps[0]->pstate; + pstate = likely(opp) ? opp->required_opps[0]->pstate : 0; ret = dev_pm_genpd_set_performance_state(dev, pstate); if (ret) { dev_err(dev, "Failed to set performance state of %s: %d (%d)\n", @@ -729,7 +729,7 @@ static int _set_required_opps(struct device *dev, mutex_lock(&opp_table->genpd_virt_dev_lock); for (i = 0; i < opp_table->required_opp_count; i++) { - pstate = opp->required_opps[i]->pstate; + pstate = likely(opp) ? opp->required_opps[i]->pstate : 0; if (!genpd_virt_devs[i]) continue; @@ -765,18 +765,23 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) struct clk *clk; int ret; - if (unlikely(!target_freq)) { - dev_err(dev, "%s: Invalid target frequency %lu\n", __func__, - target_freq); - return -EINVAL; - } - opp_table = _find_opp_table(dev); if (IS_ERR(opp_table)) { dev_err(dev, "%s: device opp doesn't exist\n", __func__); return PTR_ERR(opp_table); } + if (unlikely(!target_freq)) { + if (opp_table->required_opp_tables) { + ret = _set_required_opps(dev, opp_table, NULL); + } else { + dev_err(dev, "target frequency can't be 0\n"); + ret = -EINVAL; + } + + goto put_opp_table; + } + clk = opp_table->clk; if (IS_ERR(clk)) { dev_err(dev, "%s: No clock available for the device\n", From 2d4a79ae34048996906e585375c2e3e699e1e11a Mon Sep 17 00:00:00 2001 From: David Arcari Date: Thu, 6 Jun 2019 14:50:52 -0400 Subject: [PATCH 25/57] cpufreq: pcc-cpufreq: Fail initialization if driver cannot be registered Make pcc_cpufreq_init() return error codes when the driver cannot be registered. Otherwise the driver can shows up loaded via lsmod even though it failed initialization. This is confusing to the user. Signed-off-by: David Arcari Cc: "Rafael J. Wysocki" Cc: Viresh Kumar Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/pcc-cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c index 1e5e64643c3a..fdc767fdbe6a 100644 --- a/drivers/cpufreq/pcc-cpufreq.c +++ b/drivers/cpufreq/pcc-cpufreq.c @@ -582,10 +582,10 @@ static int __init pcc_cpufreq_init(void) /* Skip initialization if another cpufreq driver is there. */ if (cpufreq_get_current_driver()) - return 0; + return -EEXIST; if (acpi_disabled) - return 0; + return -ENODEV; ret = pcc_cpufreq_probe(); if (ret) { From 234f223d63d8f7db64a682ccf02871d40d38db52 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 7 Jun 2019 00:30:58 +0200 Subject: [PATCH 26/57] PCI: PM: Avoid resuming devices in D3hot during system suspend The current code resumes devices in D3hot during system suspend if the target power state for them is D3cold, but that is not necessary in general. It only is necessary to do that if the platform firmware requires the device to be resumed, but that should be covered by the platform_pci_need_resume() check anyway, so rework pci_dev_keep_suspended() to avoid returning 'false' for devices in D3hot which need not be resumed due to platform firmware requirements. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg --- drivers/pci/pci.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 8abc843b1615..324175509e94 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2474,10 +2474,20 @@ bool pci_dev_keep_suspended(struct pci_dev *pci_dev) { struct device *dev = &pci_dev->dev; bool wakeup = device_may_wakeup(dev); + pci_power_t target_state; - if (!pm_runtime_suspended(dev) - || pci_target_state(pci_dev, wakeup) != pci_dev->current_state - || platform_pci_need_resume(pci_dev)) + if (!pm_runtime_suspended(dev) || platform_pci_need_resume(pci_dev)) + return false; + + target_state = pci_target_state(pci_dev, wakeup); + + /* + * If the earlier platform check has not triggered, D3cold is just power + * removal on top of D3hot, so no need to resume the device in that + * case. + */ + if (target_state != pci_dev->current_state && + target_state != PCI_D3cold && pci_dev->current_state != PCI_D3hot) return false; /* From 0c7376ada9508141becec9b897d73b65ce66a15a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 7 Jun 2019 00:32:31 +0200 Subject: [PATCH 27/57] PCI: PM: Replace pci_dev_keep_suspended() with two functions The code in pci_dev_keep_suspended() is relatively hard to follow due to the negative checks in it and in its callers and the function has a possible side-effect (disabling the PME) which doesn't really match its role. For this reason, move the PME disabling from pci_dev_keep_suspended() to a separate function and change the semantics (and name) of the rest of it, so that 'true' is returned when the device needs to be resumed (and not the other way around). Change the callers of pci_dev_keep_suspended() accordingly. While at it, make the code flow in pci_pm_poweroff() reflect the pci_pm_suspend() more closely to avoid arbitrary differences between them. This is a cosmetic change with no intention to alter behavior. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg --- drivers/pci/pci-driver.c | 22 +++++++++++++--- drivers/pci/pci.c | 55 ++++++++++++++++++++-------------------- drivers/pci/pci.h | 3 ++- 3 files changed, 48 insertions(+), 32 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 5eadbc3d0969..dd656cf19a12 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -679,6 +679,7 @@ static bool pci_has_legacy_pm_support(struct pci_dev *pci_dev) static int pci_pm_prepare(struct device *dev) { struct device_driver *drv = dev->driver; + struct pci_dev *pci_dev = to_pci_dev(dev); if (drv && drv->pm && drv->pm->prepare) { int error = drv->pm->prepare(dev); @@ -688,7 +689,15 @@ static int pci_pm_prepare(struct device *dev) if (!error && dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_PREPARE)) return 0; } - return pci_dev_keep_suspended(to_pci_dev(dev)); + if (pci_dev_need_resume(pci_dev)) + return 0; + + /* + * The PME setting needs to be adjusted here in case the direct-complete + * optimization is used with respect to this device. + */ + pci_dev_adjust_pme(pci_dev); + return 1; } static void pci_pm_complete(struct device *dev) @@ -758,9 +767,11 @@ static int pci_pm_suspend(struct device *dev) * better to resume the device from runtime suspend here. */ if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) || - !pci_dev_keep_suspended(pci_dev)) { + pci_dev_need_resume(pci_dev)) { pm_runtime_resume(dev); pci_dev->state_saved = false; + } else { + pci_dev_adjust_pme(pci_dev); } if (pm->suspend) { @@ -1108,10 +1119,13 @@ static int pci_pm_poweroff(struct device *dev) /* The reason to do that is the same as in pci_pm_suspend(). */ if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) || - !pci_dev_keep_suspended(pci_dev)) + pci_dev_need_resume(pci_dev)) { pm_runtime_resume(dev); + pci_dev->state_saved = false; + } else { + pci_dev_adjust_pme(pci_dev); + } - pci_dev->state_saved = false; if (pm->poweroff) { int error; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 324175509e94..24d86bd1bba1 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2459,55 +2459,56 @@ bool pci_dev_run_wake(struct pci_dev *dev) EXPORT_SYMBOL_GPL(pci_dev_run_wake); /** - * pci_dev_keep_suspended - Check if the device can stay in the suspended state. + * pci_dev_need_resume - Check if it is necessary to resume the device. * @pci_dev: Device to check. * - * Return 'true' if the device is runtime-suspended, it doesn't have to be + * Return 'true' if the device is not runtime-suspended or it has to be * reconfigured due to wakeup settings difference between system and runtime - * suspend and the current power state of it is suitable for the upcoming - * (system) transition. - * - * If the device is not configured for system wakeup, disable PME for it before - * returning 'true' to prevent it from waking up the system unnecessarily. + * suspend, or the current power state of it is not suitable for the upcoming + * (system-wide) transition. */ -bool pci_dev_keep_suspended(struct pci_dev *pci_dev) +bool pci_dev_need_resume(struct pci_dev *pci_dev) { struct device *dev = &pci_dev->dev; - bool wakeup = device_may_wakeup(dev); pci_power_t target_state; if (!pm_runtime_suspended(dev) || platform_pci_need_resume(pci_dev)) - return false; + return true; - target_state = pci_target_state(pci_dev, wakeup); + target_state = pci_target_state(pci_dev, device_may_wakeup(dev)); /* * If the earlier platform check has not triggered, D3cold is just power * removal on top of D3hot, so no need to resume the device in that * case. */ - if (target_state != pci_dev->current_state && - target_state != PCI_D3cold && pci_dev->current_state != PCI_D3hot) - return false; + return target_state != pci_dev->current_state && + target_state != PCI_D3cold && + pci_dev->current_state != PCI_D3hot; +} + +/** + * pci_dev_adjust_pme - Adjust PME setting for a suspended device. + * @pci_dev: Device to check. + * + * If the device is suspended and it is not configured for system wakeup, + * disable PME for it to prevent it from waking up the system unnecessarily. + * + * Note that if the device's power state is D3cold and the platform check in + * pci_dev_need_resume() has not triggered, the device's configuration need not + * be changed. + */ +void pci_dev_adjust_pme(struct pci_dev *pci_dev) +{ + struct device *dev = &pci_dev->dev; - /* - * At this point the device is good to go unless it's been configured - * to generate PME at the runtime suspend time, but it is not supposed - * to wake up the system. In that case, simply disable PME for it - * (it will have to be re-enabled on exit from system resume). - * - * If the device's power state is D3cold and the platform check above - * hasn't triggered, the device's configuration is suitable and we don't - * need to manipulate it at all. - */ spin_lock_irq(&dev->power.lock); - if (pm_runtime_suspended(dev) && pci_dev->current_state < PCI_D3cold && - !wakeup) + if (pm_runtime_suspended(dev) && !device_may_wakeup(dev) && + pci_dev->current_state < PCI_D3cold) __pci_pme_active(pci_dev, false); spin_unlock_irq(&dev->power.lock); - return true; } /** diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 9cb99380c61e..e04fa7fd3e2f 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -82,7 +82,8 @@ int pci_finish_runtime_suspend(struct pci_dev *dev); void pcie_clear_root_pme_status(struct pci_dev *dev); int __pci_pme_wakeup(struct pci_dev *dev, void *ign); void pci_pme_restore(struct pci_dev *dev); -bool pci_dev_keep_suspended(struct pci_dev *dev); +bool pci_dev_need_resume(struct pci_dev *dev); +void pci_dev_adjust_pme(struct pci_dev *dev); void pci_dev_complete_resume(struct pci_dev *pci_dev); void pci_config_pm_runtime_get(struct pci_dev *dev); void pci_config_pm_runtime_put(struct pci_dev *dev); From c2bf1fc212f7e6f25ace1af8f0b3ac061ea48ba5 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 12 Jun 2019 13:57:38 +0300 Subject: [PATCH 28/57] PCI: Add missing link delays required by the PCIe spec Currently Linux does not follow PCIe spec regarding the required delays after reset. A concrete example is a Thunderbolt add-in-card that consists of a PCIe switch and two PCIe endpoints: +-1b.0-[01-6b]----00.0-[02-6b]--+-00.0-[03]----00.0 TBT controller +-01.0-[04-36]-- DS hotplug port +-02.0-[37]----00.0 xHCI controller \-04.0-[38-6b]-- DS hotplug port The root port (1b.0) and the PCIe switch downstream ports are all PCIe gen3 so they support 8GT/s link speeds. We wait for the PCIe hierarchy to enter D3cold (runtime): pcieport 0000:00:1b.0: power state changed by ACPI to D3cold When it wakes up from D3cold, according to the PCIe 4.0 section 5.8 the PCIe switch is put to reset and its power is re-applied. This means that we must follow the rules in PCIe 4.0 section 6.6.1. For the PCIe gen3 ports we are dealing with here, the following applies: With a Downstream Port that supports Link speeds greater than 5.0 GT/s, software must wait a minimum of 100 ms after Link training completes before sending a Configuration Request to the device immediately below that Port. Software can determine when Link training completes by polling the Data Link Layer Link Active bit or by setting up an associated interrupt (see Section 6.7.3.3). Translating this into the above topology we would need to do this (DLLLA stands for Data Link Layer Link Active): pcieport 0000:00:1b.0: wait for 100ms after DLLLA is set before access to 0000:01:00.0 pcieport 0000:02:00.0: wait for 100ms after DLLLA is set before access to 0000:03:00.0 pcieport 0000:02:02.0: wait for 100ms after DLLLA is set before access to 0000:37:00.0 I've instrumented the kernel with additional logging so we can see the actual delays the kernel performs: pcieport 0000:00:1b.0: power state changed by ACPI to D0 pcieport 0000:00:1b.0: waiting for D3cold delay of 100 ms pcieport 0000:00:1b.0: waking up bus pcieport 0000:00:1b.0: waiting for D3hot delay of 10 ms pcieport 0000:00:1b.0: restoring config space at offset 0x2c (was 0x60, writing 0x60) ... pcieport 0000:00:1b.0: PME# disabled pcieport 0000:01:00.0: restoring config space at offset 0x3c (was 0x1ff, writing 0x201ff) ... pcieport 0000:01:00.0: PME# disabled pcieport 0000:02:00.0: restoring config space at offset 0x3c (was 0x1ff, writing 0x201ff) ... pcieport 0000:02:00.0: PME# disabled pcieport 0000:02:01.0: restoring config space at offset 0x3c (was 0x1ff, writing 0x201ff) ... pcieport 0000:02:01.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100407) pcieport 0000:02:01.0: PME# disabled pcieport 0000:02:02.0: restoring config space at offset 0x3c (was 0x1ff, writing 0x201ff) ... pcieport 0000:02:02.0: PME# disabled pcieport 0000:02:04.0: restoring config space at offset 0x3c (was 0x1ff, writing 0x201ff) ... pcieport 0000:02:04.0: PME# disabled pcieport 0000:02:01.0: PME# enabled pcieport 0000:02:01.0: waiting for D3hot delay of 10 ms pcieport 0000:02:04.0: PME# enabled pcieport 0000:02:04.0: waiting for D3hot delay of 10 ms thunderbolt 0000:03:00.0: restoring config space at offset 0x14 (was 0x0, writing 0x8a040000) ... thunderbolt 0000:03:00.0: PME# disabled xhci_hcd 0000:37:00.0: restoring config space at offset 0x10 (was 0x0, writing 0x73f00000) ... xhci_hcd 0000:37:00.0: PME# disabled For the switch upstream port (01:00.0) we wait for 100ms but not taking into account the DLLLA requirement. We then wait 10ms for D3hot -> D0 transition of the root port and the two downstream hotplug ports. This means that we deviate from what the spec requires. Performing the same check for system sleep (s2idle) transitions we can see following when resuming from s2idle: pcieport 0000:00:1b.0: power state changed by ACPI to D0 pcieport 0000:00:1b.0: restoring config space at offset 0x2c (was 0x60, writing 0x60) ... pcieport 0000:01:00.0: restoring config space at offset 0x3c (was 0x1ff, writing 0x201ff) ... pcieport 0000:02:02.0: restoring config space at offset 0x3c (was 0x1ff, writing 0x201ff) pcieport 0000:02:02.0: restoring config space at offset 0x2c (was 0x0, writing 0x0) pcieport 0000:02:01.0: restoring config space at offset 0x3c (was 0x1ff, writing 0x201ff) pcieport 0000:02:04.0: restoring config space at offset 0x3c (was 0x1ff, writing 0x201ff) pcieport 0000:02:02.0: restoring config space at offset 0x28 (was 0x0, writing 0x0) pcieport 0000:02:00.0: restoring config space at offset 0x3c (was 0x1ff, writing 0x201ff) pcieport 0000:02:02.0: restoring config space at offset 0x24 (was 0x10001, writing 0x1fff1) pcieport 0000:02:01.0: restoring config space at offset 0x2c (was 0x0, writing 0x60) pcieport 0000:02:02.0: restoring config space at offset 0x20 (was 0x0, writing 0x73f073f0) pcieport 0000:02:04.0: restoring config space at offset 0x2c (was 0x0, writing 0x60) pcieport 0000:02:01.0: restoring config space at offset 0x28 (was 0x0, writing 0x60) pcieport 0000:02:00.0: restoring config space at offset 0x2c (was 0x0, writing 0x0) pcieport 0000:02:02.0: restoring config space at offset 0x1c (was 0x101, writing 0x1f1) pcieport 0000:02:04.0: restoring config space at offset 0x28 (was 0x0, writing 0x60) pcieport 0000:02:01.0: restoring config space at offset 0x24 (was 0x10001, writing 0x1ff10001) pcieport 0000:02:00.0: restoring config space at offset 0x28 (was 0x0, writing 0x0) pcieport 0000:02:02.0: restoring config space at offset 0x18 (was 0x0, writing 0x373702) pcieport 0000:02:04.0: restoring config space at offset 0x24 (was 0x10001, writing 0x49f12001) pcieport 0000:02:01.0: restoring config space at offset 0x20 (was 0x0, writing 0x73e05c00) pcieport 0000:02:00.0: restoring config space at offset 0x24 (was 0x10001, writing 0x1fff1) pcieport 0000:02:04.0: restoring config space at offset 0x20 (was 0x0, writing 0x89f07400) pcieport 0000:02:01.0: restoring config space at offset 0x1c (was 0x101, writing 0x5151) pcieport 0000:02:00.0: restoring config space at offset 0x20 (was 0x0, writing 0x8a008a00) pcieport 0000:02:02.0: restoring config space at offset 0xc (was 0x10000, writing 0x10020) pcieport 0000:02:04.0: restoring config space at offset 0x1c (was 0x101, writing 0x6161) pcieport 0000:02:01.0: restoring config space at offset 0x18 (was 0x0, writing 0x360402) pcieport 0000:02:00.0: restoring config space at offset 0x1c (was 0x101, writing 0x1f1) pcieport 0000:02:04.0: restoring config space at offset 0x18 (was 0x0, writing 0x6b3802) pcieport 0000:02:02.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100407) pcieport 0000:02:00.0: restoring config space at offset 0x18 (was 0x0, writing 0x30302) pcieport 0000:02:01.0: restoring config space at offset 0xc (was 0x10000, writing 0x10020) pcieport 0000:02:04.0: restoring config space at offset 0xc (was 0x10000, writing 0x10020) pcieport 0000:02:00.0: restoring config space at offset 0xc (was 0x10000, writing 0x10020) pcieport 0000:02:01.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100407) pcieport 0000:02:04.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100407) pcieport 0000:02:00.0: restoring config space at offset 0x4 (was 0x100000, writing 0x100407) xhci_hcd 0000:37:00.0: restoring config space at offset 0x10 (was 0x0, writing 0x73f00000) ... thunderbolt 0000:03:00.0: restoring config space at offset 0x14 (was 0x0, writing 0x8a040000) This is even worse. None of the mandatory delays are performed. If this would be S3 instead of s2idle then according to PCI FW spec 3.2 section 4.6.8. there is a specific _DSM that allows the OS to skip the delays but this platform does not provide the _DSM and does not go to S3 anyway so no firmware is involved that could already handle these delays. In this particular Intel Coffee Lake platform these delays are not actually needed because there is an additional delay as part of the ACPI power resource that is used to turn on power to the hierarchy but since that additional delay is not required by any of standards (PCIe, ACPI) it is not present in the Intel Ice Lake, for example where missing the mandatory delays causes pciehp to start tearing down the stack too early (links are not yet trained). For this reason, change the PCIe portdrv PM resume hooks so that they perform the mandatory delays before the downstream component gets resumed. We perform the delays before port services are resumed because otherwise pciehp might find that the link is not up (even if it is just training) and tears-down the hierarchy. Signed-off-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- drivers/pci/pci.c | 29 ++++++++++----- drivers/pci/pci.h | 1 + drivers/pci/pcie/portdrv_core.c | 66 +++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 10 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 24d86bd1bba1..9839d6f9bcb5 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1004,15 +1004,10 @@ static void __pci_start_power_transition(struct pci_dev *dev, pci_power_t state) if (state == PCI_D0) { pci_platform_power_transition(dev, PCI_D0); /* - * Mandatory power management transition delays, see - * PCI Express Base Specification Revision 2.0 Section - * 6.6.1: Conventional Reset. Do not delay for - * devices powered on/off by corresponding bridge, - * because have already delayed for the bridge. + * Mandatory power management transition delays are + * handled in the PCIe portdrv resume hooks. */ if (dev->runtime_d3cold) { - if (dev->d3cold_delay && !dev->imm_ready) - msleep(dev->d3cold_delay); /* * When powering on a bridge from D3cold, the * whole hierarchy may be powered on into @@ -4579,14 +4574,16 @@ static int pci_pm_reset(struct pci_dev *dev, int probe) return pci_dev_wait(dev, "PM D3->D0", PCIE_RESET_READY_POLL_MS); } + /** - * pcie_wait_for_link - Wait until link is active or inactive + * pcie_wait_for_link_delay - Wait until link is active or inactive * @pdev: Bridge device * @active: waiting for active or inactive? + * @delay: Delay to wait after link has become active (in ms) * * Use this to wait till link becomes active or inactive. */ -bool pcie_wait_for_link(struct pci_dev *pdev, bool active) +bool pcie_wait_for_link_delay(struct pci_dev *pdev, bool active, int delay) { int timeout = 1000; bool ret; @@ -4623,13 +4620,25 @@ bool pcie_wait_for_link(struct pci_dev *pdev, bool active) timeout -= 10; } if (active && ret) - msleep(100); + msleep(delay); else if (ret != active) pci_info(pdev, "Data Link Layer Link Active not %s in 1000 msec\n", active ? "set" : "cleared"); return ret == active; } +/** + * pcie_wait_for_link - Wait until link is active or inactive + * @pdev: Bridge device + * @active: waiting for active or inactive? + * + * Use this to wait till link becomes active or inactive. + */ +bool pcie_wait_for_link(struct pci_dev *pdev, bool active) +{ + return pcie_wait_for_link_delay(pdev, active, 100); +} + void pci_reset_secondary_bus(struct pci_dev *dev) { u16 ctrl; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index e04fa7fd3e2f..7f9fd93270ed 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -494,6 +494,7 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev) void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, u32 service); +bool pcie_wait_for_link_delay(struct pci_dev *pdev, bool active, int delay); bool pcie_wait_for_link(struct pci_dev *pdev, bool active); #ifdef CONFIG_PCIEASPM void pcie_aspm_init_link_state(struct pci_dev *pdev); diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 1b330129089f..308c3e0c4a34 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -378,6 +379,67 @@ static int pm_iter(struct device *dev, void *data) return 0; } +static int get_downstream_delay(struct pci_bus *bus) +{ + struct pci_dev *pdev; + int min_delay = 100; + int max_delay = 0; + + list_for_each_entry(pdev, &bus->devices, bus_list) { + if (!pdev->imm_ready) + min_delay = 0; + else if (pdev->d3cold_delay < min_delay) + min_delay = pdev->d3cold_delay; + if (pdev->d3cold_delay > max_delay) + max_delay = pdev->d3cold_delay; + } + + return max(min_delay, max_delay); +} + +/* + * wait_for_downstream_link - Wait for downstream link to establish + * @pdev: PCIe port whose downstream link is waited + * + * Handle delays according to PCIe 4.0 section 6.6.1 before configuration + * access to the downstream component is permitted. + * + * This blocks PCI core resume of the hierarchy below this port until the + * link is trained. Should be called before resuming port services to + * prevent pciehp from starting to tear-down the hierarchy too soon. + */ +static void wait_for_downstream_link(struct pci_dev *pdev) +{ + int delay; + + if (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT && + pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM) + return; + + if (pci_dev_is_disconnected(pdev)) + return; + + if (!pdev->subordinate || list_empty(&pdev->subordinate->devices) || + !pdev->bridge_d3) + return; + + delay = get_downstream_delay(pdev->subordinate); + if (!delay) + return; + + dev_dbg(&pdev->dev, "waiting downstream link for %d ms\n", delay); + + /* + * If downstream port does not support speeds greater than 5 GT/s + * need to wait 100ms. For higher speeds (gen3) we need to wait + * first for the data link layer to become active. + */ + if (pcie_get_speed_cap(pdev) <= PCIE_SPEED_5_0GT) + msleep(delay); + else + pcie_wait_for_link_delay(pdev, true, delay); +} + /** * pcie_port_device_suspend - suspend port services associated with a PCIe port * @dev: PCI Express port to handle @@ -391,6 +453,8 @@ int pcie_port_device_suspend(struct device *dev) int pcie_port_device_resume_noirq(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, resume_noirq); + + wait_for_downstream_link(to_pci_dev(dev)); return device_for_each_child(dev, &off, pm_iter); } @@ -421,6 +485,8 @@ int pcie_port_device_runtime_suspend(struct device *dev) int pcie_port_device_runtime_resume(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, runtime_resume); + + wait_for_downstream_link(to_pci_dev(dev)); return device_for_each_child(dev, &off, pm_iter); } #endif /* PM */ From 000dd5316e1c756a1c028f22e01d06a38249dd4d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 12 Jun 2019 13:57:39 +0300 Subject: [PATCH 29/57] PCI: Do not poll for PME if the device is in D3cold PME polling does not take into account that a device that is directly connected to the host bridge may go into D3cold as well. This leads to a situation where the PME poll thread reads from a config space of a device that is in D3cold and gets incorrect information because the config space is not accessible. Here is an example from Intel Ice Lake system where two PCIe root ports are in D3cold (I've instrumented the kernel to log the PMCSR register contents): [ 62.971442] pcieport 0000:00:07.1: Check PME status, PMCSR=0xffff [ 62.971504] pcieport 0000:00:07.0: Check PME status, PMCSR=0xffff Since 0xffff is interpreted so that PME is pending, the root ports will be runtime resumed. This repeats over and over again essentially blocking all runtime power management. Prevent this from happening by checking whether the device is in D3cold before its PME status is read. Fixes: 71a83bd727cc ("PCI/PM: add runtime PM support to PCIe port") Signed-off-by: Mika Westerberg Reviewed-by: Lukas Wunner Cc: 3.6+ # v3.6+ Signed-off-by: Rafael J. Wysocki --- drivers/pci/pci.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 9839d6f9bcb5..e34fb2b3c466 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2060,6 +2060,13 @@ static void pci_pme_list_scan(struct work_struct *work) */ if (bridge && bridge->current_state != PCI_D0) continue; + /* + * If the device is in D3cold it should not be + * polled either. + */ + if (pme_dev->dev->current_state == PCI_D3cold) + continue; + pci_pme_wakeup(pme_dev->dev, NULL); } else { list_del(&pme_dev->list); From 8eb835e4789acb7c306d56ae9069e0e1134aa5b2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 11 Jun 2019 20:05:47 +0200 Subject: [PATCH 30/57] power: avs: smartreflex: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. And even when not checking the return value, no need to cast away the call to (void), as these functions were never a "must check" type of a function, so remove that odd cast. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- drivers/power/avs/smartreflex.c | 41 +++++++++------------------------ 1 file changed, 11 insertions(+), 30 deletions(-) diff --git a/drivers/power/avs/smartreflex.c b/drivers/power/avs/smartreflex.c index c96c01e09740..4684e7df833a 100644 --- a/drivers/power/avs/smartreflex.c +++ b/drivers/power/avs/smartreflex.c @@ -899,38 +899,19 @@ static int omap_sr_probe(struct platform_device *pdev) } dev_info(&pdev->dev, "%s: SmartReflex driver initialized\n", __func__); - if (!sr_dbg_dir) { + if (!sr_dbg_dir) sr_dbg_dir = debugfs_create_dir("smartreflex", NULL); - if (IS_ERR_OR_NULL(sr_dbg_dir)) { - ret = PTR_ERR(sr_dbg_dir); - pr_err("%s:sr debugfs dir creation failed(%d)\n", - __func__, ret); - goto err_list_del; - } - } sr_info->dbg_dir = debugfs_create_dir(sr_info->name, sr_dbg_dir); - if (IS_ERR_OR_NULL(sr_info->dbg_dir)) { - dev_err(&pdev->dev, "%s: Unable to create debugfs directory\n", - __func__); - ret = PTR_ERR(sr_info->dbg_dir); - goto err_debugfs; - } - (void) debugfs_create_file("autocomp", S_IRUGO | S_IWUSR, - sr_info->dbg_dir, (void *)sr_info, &pm_sr_fops); - (void) debugfs_create_x32("errweight", S_IRUGO, sr_info->dbg_dir, - &sr_info->err_weight); - (void) debugfs_create_x32("errmaxlimit", S_IRUGO, sr_info->dbg_dir, - &sr_info->err_maxlimit); + debugfs_create_file("autocomp", S_IRUGO | S_IWUSR, sr_info->dbg_dir, + (void *)sr_info, &pm_sr_fops); + debugfs_create_x32("errweight", S_IRUGO, sr_info->dbg_dir, + &sr_info->err_weight); + debugfs_create_x32("errmaxlimit", S_IRUGO, sr_info->dbg_dir, + &sr_info->err_maxlimit); nvalue_dir = debugfs_create_dir("nvalue", sr_info->dbg_dir); - if (IS_ERR_OR_NULL(nvalue_dir)) { - dev_err(&pdev->dev, "%s: Unable to create debugfs directory for n-values\n", - __func__); - ret = PTR_ERR(nvalue_dir); - goto err_debugfs; - } if (sr_info->nvalue_count == 0 || !sr_info->nvalue_table) { dev_warn(&pdev->dev, "%s: %s: No Voltage table for the corresponding vdd. Cannot create debugfs entries for n-values\n", @@ -945,12 +926,12 @@ static int omap_sr_probe(struct platform_device *pdev) snprintf(name, sizeof(name), "volt_%lu", sr_info->nvalue_table[i].volt_nominal); - (void) debugfs_create_x32(name, S_IRUGO | S_IWUSR, nvalue_dir, - &(sr_info->nvalue_table[i].nvalue)); + debugfs_create_x32(name, S_IRUGO | S_IWUSR, nvalue_dir, + &(sr_info->nvalue_table[i].nvalue)); snprintf(name, sizeof(name), "errminlimit_%lu", sr_info->nvalue_table[i].volt_nominal); - (void) debugfs_create_x32(name, S_IRUGO | S_IWUSR, nvalue_dir, - &(sr_info->nvalue_table[i].errminlimit)); + debugfs_create_x32(name, S_IRUGO | S_IWUSR, nvalue_dir, + &(sr_info->nvalue_table[i].errminlimit)); } From 0b385a0c3bd3f6d1044728b732bfc7dfb01c9fb5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 18 Jun 2019 10:18:28 +0200 Subject: [PATCH 31/57] PM: suspend: Rename pm_suspend_via_s2idle() The name of pm_suspend_via_s2idle() is confusing, as it doesn't reflect the purpose of the function precisely enough and it is very similar to pm_suspend_via_firmware(), which has a different purpose, so rename it as pm_suspend_default_s2idle() and update its only caller, i8042_register_ports(), accordingly. Signed-off-by: Rafael J. Wysocki Acked-by: Dmitry Torokhov --- drivers/input/serio/i8042.c | 2 +- include/linux/suspend.h | 4 ++-- kernel/power/suspend.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 6462f1798fbb..8384abc41d7f 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -1410,7 +1410,7 @@ static void __init i8042_register_ports(void) * behavior on many platforms using suspend-to-RAM (ACPI S3) * by default. */ - if (pm_suspend_via_s2idle() && i == I8042_KBD_PORT_NO) + if (pm_suspend_default_s2idle() && i == I8042_KBD_PORT_NO) device_set_wakeup_enable(&serio->dev, true); } } diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 05645f726815..d07ae7fb9315 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -282,7 +282,7 @@ static inline bool idle_should_enter_s2idle(void) return unlikely(s2idle_state == S2IDLE_STATE_ENTER); } -extern bool pm_suspend_via_s2idle(void); +extern bool pm_suspend_default_s2idle(void); extern void __init pm_states_init(void); extern void s2idle_set_ops(const struct platform_s2idle_ops *ops); extern void s2idle_wake(void); @@ -314,7 +314,7 @@ static inline void pm_set_suspend_via_firmware(void) {} static inline void pm_set_resume_via_firmware(void) {} static inline bool pm_suspend_via_firmware(void) { return false; } static inline bool pm_resume_via_firmware(void) { return false; } -static inline bool pm_suspend_via_s2idle(void) { return false; } +static inline bool pm_suspend_default_s2idle(void) { return false; } static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {} static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; } diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 9505101ed2bc..8703b0ca4986 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -62,16 +62,16 @@ enum s2idle_states __read_mostly s2idle_state; static DEFINE_RAW_SPINLOCK(s2idle_lock); /** - * pm_suspend_via_s2idle - Check if suspend-to-idle is the default suspend. + * pm_suspend_default_s2idle - Check if suspend-to-idle is the default suspend. * * Return 'true' if suspend-to-idle has been selected as the default system * suspend method. */ -bool pm_suspend_via_s2idle(void) +bool pm_suspend_default_s2idle(void) { return mem_sleep_current == PM_SUSPEND_TO_IDLE; } -EXPORT_SYMBOL_GPL(pm_suspend_via_s2idle); +EXPORT_SYMBOL_GPL(pm_suspend_default_s2idle); void s2idle_set_ops(const struct platform_s2idle_ops *ops) { From 25fa4d9d4ca62dbf8fd1eeef413421c94183da3c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 18 Jun 2019 17:34:16 +0200 Subject: [PATCH 32/57] drivers: base: power: remove wakeup_sources_stats_dentry variable wakeup_sources_stats_dentry is assigned when the debugfs file is created, but then never used ever again. So no need for it at all, just remove it and call debugfs_create_file() on its own. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeup.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 5b2b6a05a4f3..ee31d4f8d856 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -968,8 +968,6 @@ void pm_wakep_autosleep_enabled(bool set) } #endif /* CONFIG_PM_AUTOSLEEP */ -static struct dentry *wakeup_sources_stats_dentry; - /** * print_wakeup_source_stats - Print wakeup source statistics information. * @m: seq_file to print the statistics into. @@ -1099,8 +1097,8 @@ static const struct file_operations wakeup_sources_stats_fops = { static int __init wakeup_sources_debugfs_init(void) { - wakeup_sources_stats_dentry = debugfs_create_file("wakeup_sources", - S_IRUGO, NULL, NULL, &wakeup_sources_stats_fops); + debugfs_create_file("wakeup_sources", S_IRUGO, NULL, NULL, + &wakeup_sources_stats_fops); return 0; } From e9bea8f98a539080070e3eff70a1731ce0ffdc8d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 19 Jun 2019 00:48:15 +0200 Subject: [PATCH 33/57] PM: sleep: Update struct wakeup_source documentation The kerneldoc comment for struct wakeup_source has become outdated, so fix that. Signed-off-by: Rafael J. Wysocki Reviewed-by: Greg Kroah-Hartman --- include/linux/pm_wakeup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index ce57771fab9b..91027602d137 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -36,7 +36,7 @@ struct wake_irq; * @expire_count: Number of times the wakeup source's timeout has expired. * @wakeup_count: Number of times the wakeup source might abort suspend. * @active: Status of the wakeup source. - * @has_timeout: The wakeup source has been activated with a timeout. + * @autosleep_enabled: Autosleep is active, so update @prevent_sleep_time. */ struct wakeup_source { const char *name; From f9020441dbc39133591ff72b420f21f51896afc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Chmiel?= Date: Fri, 21 Jun 2019 12:10:43 +0200 Subject: [PATCH 34/57] cpufreq: s5pv210: Don't flood kernel log after cpufreq change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit replaces printk with pr_debug, so we don't flood kernel log. Signed-off-by: Paweł Chmiel Acked-by: Krzysztof Kozlowski Signed-off-by: Viresh Kumar --- drivers/cpufreq/s5pv210-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c index 5b4289460bc9..c7b7d1e65b08 100644 --- a/drivers/cpufreq/s5pv210-cpufreq.c +++ b/drivers/cpufreq/s5pv210-cpufreq.c @@ -481,7 +481,7 @@ static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index) arm_volt, arm_volt_max); } - printk(KERN_DEBUG "Perf changed[L%d]\n", index); + pr_debug("Perf changed[L%d]\n", index); exit: mutex_unlock(&set_freq_lock); From 560d1bcad715c215e7ffe5d7cffe045974b623d0 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 23 Jun 2019 20:50:53 +0300 Subject: [PATCH 35/57] opp: Don't use IS_ERR on invalid supplies _set_opp_custom() receives a set of OPP supplies as its arguments and the caller of it passes NULL when the supplies are not valid. But _set_opp_custom(), by mistake, checks for error by performing IS_ERR(old_supply) on it which will always evaluate to false. The problem was spotted during of testing of upcoming update for the NVIDIA Tegra CPUFreq driver. Cc: stable Fixes: 7e535993fa4f ("OPP: Separate out custom OPP handler specific code") Reported-by: Marc Dietrich Signed-off-by: Dmitry Osipenko [ Viresh: Massaged changelog ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 9fda9a0ec016..89ec6aa220cf 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -685,7 +685,7 @@ static int _set_opp_custom(const struct opp_table *opp_table, data->old_opp.rate = old_freq; size = sizeof(*old_supply) * opp_table->regulator_count; - if (IS_ERR(old_supply)) + if (!old_supply) memset(data->old_opp.supplies, 0, size); else memcpy(data->old_opp.supplies, old_supply, size); From bcc61569997b2188ba89db43b5b991da01ea2d18 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 25 Jun 2019 13:32:41 +0200 Subject: [PATCH 36/57] cpufreq: Move the IS_ENABLED(CPU_THERMAL) macro into a stub cpufreq_online() and cpufreq_offline() [un]register the driver as a cooling device. This is done if the driver is flagged as a cooling device in addition with an IS_ENABLED() check to compile out the branching code. Group this test in a stub function added in the cpufreq header instead of having the IS_ENABLED() in the code. Suggested-by: Rafael J. Wysocki Signed-off-by: Daniel Lezcano Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 6 ++---- include/linux/cpufreq.h | 6 ++++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 85ff958e01f1..aee024e42618 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1378,8 +1378,7 @@ static int cpufreq_online(unsigned int cpu) if (cpufreq_driver->ready) cpufreq_driver->ready(policy); - if (IS_ENABLED(CONFIG_CPU_THERMAL) && - cpufreq_driver->flags & CPUFREQ_IS_COOLING_DEV) + if (cpufreq_thermal_control_enabled(cpufreq_driver)) policy->cdev = of_cpufreq_cooling_register(policy); pr_debug("initialization complete\n"); @@ -1469,8 +1468,7 @@ static int cpufreq_offline(unsigned int cpu) goto unlock; } - if (IS_ENABLED(CONFIG_CPU_THERMAL) && - cpufreq_driver->flags & CPUFREQ_IS_COOLING_DEV) { + if (cpufreq_thermal_control_enabled(cpufreq_driver)) { cpufreq_cooling_unregister(policy->cdev); policy->cdev = NULL; } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d01a74fbc4db..a1467aa7f58b 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -409,6 +409,12 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); const char *cpufreq_get_current_driver(void); void *cpufreq_get_driver_data(void); +static inline int cpufreq_thermal_control_enabled(struct cpufreq_driver *drv) +{ + return IS_ENABLED(CONFIG_CPU_THERMAL) && + (drv->flags & CPUFREQ_IS_COOLING_DEV); +} + static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, unsigned int min, unsigned int max) { From 407d0fff22667598fe3602965f0e0bf78dabdca5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 20 Jun 2019 08:35:46 +0530 Subject: [PATCH 37/57] cpufreq: Remove redundant !setpolicy check cpufreq_start_governor() is only called for !setpolicy case, checking it again is not required. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index aee024e42618..8caec5211726 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2151,7 +2151,7 @@ static int cpufreq_start_governor(struct cpufreq_policy *policy) pr_debug("%s: for CPU %u\n", __func__, policy->cpu); - if (cpufreq_driver->get && !cpufreq_driver->setpolicy) + if (cpufreq_driver->get) cpufreq_update_current_freq(policy); if (policy->governor->start) { From 5ddc6d4e30f4e8701af661601ca07abdfc237996 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 20 Jun 2019 08:35:48 +0530 Subject: [PATCH 38/57] cpufreq: Use has_target() instead of !setpolicy For code consistency, use has_target() instead of !setpolicy everywhere, as it is already done at several places. Maybe we should also use "!has_target()" instead of "cpufreq_driver->setpolicy" where we need to check if the driver supports setpolicy, so to use only one expression for this kind of differentiation. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8caec5211726..555a62646491 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -634,7 +634,7 @@ static int cpufreq_parse_policy(char *str_governor, } /** - * cpufreq_parse_governor - parse a governor string only for !setpolicy + * cpufreq_parse_governor - parse a governor string only for has_target() */ static int cpufreq_parse_governor(char *str_governor, struct cpufreq_policy *policy) @@ -1303,7 +1303,7 @@ static int cpufreq_online(unsigned int cpu) policy->max = policy->user_policy.max; } - if (cpufreq_driver->get && !cpufreq_driver->setpolicy) { + if (cpufreq_driver->get && has_target()) { policy->cur = cpufreq_driver->get(policy->cpu); if (!policy->cur) { pr_err("%s: ->get() failed\n", __func__); @@ -2402,7 +2402,7 @@ void cpufreq_update_policy(unsigned int cpu) * BIOS might change freq behind our back * -> ask driver for current freq and notify governors about a change */ - if (cpufreq_driver->get && !cpufreq_driver->setpolicy && + if (cpufreq_driver->get && has_target() && (cpufreq_suspended || WARN_ON(!cpufreq_update_current_freq(policy)))) goto unlock; From 21ba237926227121dacccaf5d7863b0cb50f3eda Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 25 Jun 2019 14:04:45 +0200 Subject: [PATCH 39/57] ACPI: PM: Avoid evaluating _PS3 on transitions from D3hot to D3cold If the power state of a device with ACPI PM is changed from D3hot to D3cold, it merely is a matter of dropping references to additional power resources (specifically, those in the list returned by _PR3), and the _PS3 method should not be invoked for the device then (as it has already been evaluated during the previous transition to D3hot). Fixes: 20dacb71ad28 (ACPI / PM: Rework device power management to follow ACPI 6) Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg --- drivers/acpi/device_pm.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index b859d75eaf9f..3269a4e8b902 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -210,9 +210,15 @@ int acpi_device_set_power(struct acpi_device *device, int state) return -ENODEV; } - result = acpi_dev_pm_explicit_set(device, state); - if (result) - goto end; + /* + * If the device goes from D3hot to D3cold, _PS3 has been + * evaluated for it already, so skip it in that case. + */ + if (device->power.state < ACPI_STATE_D3_HOT) { + result = acpi_dev_pm_explicit_set(device, state); + if (result) + goto end; + } if (device->power.flags.power_resources) result = acpi_power_transition(device, target_state); From f850a48a07996bfd7bd1b2e52f57b5ee55125482 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 25 Jun 2019 14:06:13 +0200 Subject: [PATCH 40/57] ACPI: PM: Allow transitions to D0 to occur in special cases If a device with ACPI PM is left in D0 during a system-wide transition to the S3 (suspend-to-RAM) or S4 (hibernation) sleep state, the actual state of the device need not be D0 during resume from it, although its power.state value will still reflect D0 (that is, the power state from before the system-wide transition). In that case, the acpi_device_set_power() call made to ensure that the power state of the device will be D0 going forward has no effect, because the new state (D0) is equal to the one reflected by the device's power.state value. That does not affect power resources, which are taken care of by acpi_resume_power_resources() called from acpi_pm_finish() during resume from system-wide sleep states, but it still may be necessary to invoke _PS0 for the device on top of that in order to finalize its transition to D0. For this reason, modify acpi_device_set_power() to allow transitions to D0 to occur even if D0 is the current power state of the device according to its power.state value. That will not affect power resources, which are assumed to be in the right configuration already (as reflected by the current values of their reference counters), but it may cause _PS0 to be evaluated for the device. However, evaluating _PS0 for a device already in D0 may lead to confusion in general, so invoke _PSC (if present) to check the device's current power state upfront and only evaluate _PS0 for it if _PSC has returned a power state different from D0. [If _PSC is not present or the evaluation of it fails, the power state of the device is assumed to be D0 at this point.] Fixes: 20dacb71ad28 (ACPI / PM: Rework device power management to follow ACPI 6) Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg --- drivers/acpi/device_pm.c | 53 ++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 3269a4e8b902..94194c7e8a07 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -53,6 +53,19 @@ const char *acpi_power_state_string(int state) } } +static int acpi_dev_pm_explicit_get(struct acpi_device *device, int *state) +{ + unsigned long long psc; + acpi_status status; + + status = acpi_evaluate_integer(device->handle, "_PSC", NULL, &psc); + if (ACPI_FAILURE(status)) + return -ENODEV; + + *state = psc; + return 0; +} + /** * acpi_device_get_power - Get power state of an ACPI device. * @device: Device to get the power state of. @@ -65,6 +78,7 @@ const char *acpi_power_state_string(int state) int acpi_device_get_power(struct acpi_device *device, int *state) { int result = ACPI_STATE_UNKNOWN; + int error; if (!device || !state) return -EINVAL; @@ -81,18 +95,16 @@ int acpi_device_get_power(struct acpi_device *device, int *state) * if available. */ if (device->power.flags.power_resources) { - int error = acpi_power_get_inferred_state(device, &result); + error = acpi_power_get_inferred_state(device, &result); if (error) return error; } if (device->power.flags.explicit_get) { - acpi_handle handle = device->handle; - unsigned long long psc; - acpi_status status; + int psc; - status = acpi_evaluate_integer(handle, "_PSC", NULL, &psc); - if (ACPI_FAILURE(status)) - return -ENODEV; + error = acpi_dev_pm_explicit_get(device, &psc); + if (error) + return error; /* * The power resources settings may indicate a power state @@ -160,7 +172,8 @@ int acpi_device_set_power(struct acpi_device *device, int state) /* Make sure this is a valid target state */ - if (state == device->power.state) { + /* There is a special case for D0 addressed below. */ + if (state > ACPI_STATE_D0 && state == device->power.state) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] already in %s\n", device->pnp.bus_id, acpi_power_state_string(state))); @@ -228,6 +241,30 @@ int acpi_device_set_power(struct acpi_device *device, int state) if (result) goto end; } + + if (device->power.state == ACPI_STATE_D0) { + int psc; + + /* Nothing to do here if _PSC is not present. */ + if (!device->power.flags.explicit_get) + return 0; + + /* + * The power state of the device was set to D0 last + * time, but that might have happened before a + * system-wide transition involving the platform + * firmware, so it may be necessary to evaluate _PS0 + * for the device here. However, use extra care here + * and evaluate _PSC to check the device's current power + * state, and only invoke _PS0 if the evaluation of _PSC + * is successful and it returns a power state different + * from D0. + */ + result = acpi_dev_pm_explicit_get(device, &psc); + if (result || psc == ACPI_STATE_D0) + return 0; + } + result = acpi_dev_pm_explicit_set(device, ACPI_STATE_D0); } From 83a16e3f6d70da99896c7a2639c0b60fff13afb8 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 25 Jun 2019 13:29:40 +0300 Subject: [PATCH 41/57] PCI / ACPI: Use cached ACPI device state to get PCI device power state The ACPI power state returned by acpi_device_get_power() may depend on the configuration of ACPI power resources in the system which may change any time after acpi_device_get_power() has returned, unless the reference counters of the ACPI power resources in question are set to prevent that from happening. Thus it is invalid to use acpi_device_get_power() in acpi_pci_get_power_state() the way it is done now and the value of the ->power.state field in the corresponding struct acpi_device objects (which reflects the ACPI power resources reference counting, among other things) should be used instead. As an example where this becomes an issue is Intel Ice Lake where the Thunderbolt controller (NHI), two PCIe root ports (RP0 and RP1) and xHCI all share the same power resources. The following picture with power resources marked with [] shows the topology: Host bridge | +- RP0 ---\ +- RP1 ---|--+--> [TBT] +- NHI --/ | | | | v +- xHCI --> [D3C] Here TBT and D3C are the shared ACPI power resources. ACPI _PR3() method of the devices in question returns either TBT or D3C or both. Say we runtime suspend first the root ports RP0 and RP1, then NHI. Now since the TBT power resource is still on when the root ports are runtime suspended their dev->current_state is set to D3hot. When NHI is runtime suspended TBT is finally turned off but state of the root ports remain to be D3hot. Now when the xHCI is runtime suspended D3C gets also turned off. PCI core thus has power states of these devices cached in their dev->current_state as follows: RP0 -> D3hot RP1 -> D3hot NHI -> D3cold xHCI -> D3cold If the user now runs lspci for instance, the result is all 1's like in the below output (00:07.0 is the first root port, RP0): 00:07.0 PCI bridge: Intel Corporation Device 8a1d (rev ff) (prog-if ff) !!! Unknown header type 7f Kernel driver in use: pcieport In short the hardware state is not in sync with the software state anymore. The exact same thing happens with the PME polling thread which ends up bringing the root ports back into D0 after they are runtime suspended. For this reason, modify acpi_pci_get_power_state() so that it uses the ACPI device power state that was cached by the ACPI core. This makes the PCI device power state match the ACPI device power state regardless of state of the shared power resources which may still be on at this point. Link: https://lore.kernel.org/r/20190618161858.77834-2-mika.westerberg@linux.intel.com Signed-off-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- drivers/pci/pci-acpi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 1897847ceb0c..b782acac26c5 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -685,7 +685,8 @@ static pci_power_t acpi_pci_get_power_state(struct pci_dev *dev) if (!adev || !acpi_device_power_manageable(adev)) return PCI_UNKNOWN; - if (acpi_device_get_power(adev, &state) || state == ACPI_STATE_UNKNOWN) + state = adev->power.state; + if (state == ACPI_STATE_UNKNOWN) return PCI_UNKNOWN; return state_conv[state]; From 4533771c1e53b921f66e580135ee64a76986a491 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 25 Jun 2019 13:29:41 +0300 Subject: [PATCH 42/57] ACPI / PM: Introduce concept of a _PR0 dependent device If there are shared power resources between otherwise unrelated devices turning them on causes the other devices sharing them to be powered up as well. In case of PCI devices go into D0uninitialized state meaning that if they were configured to trigger wake that configuration is lost at this point. For this reason introduce a concept of "_PR0 dependent device" that can be added to any ACPI device that has power resources. The dependent device will be included in a list of dependent devices for all power resources returned by the ACPI device's _PR0 (assuming it has one). Whenever a power resource having dependent devices is turned physically on (its _ON method is called) we runtime resume all of them to allow their driver or in case of PCI the PCI core to re-initialize the device and its wake configuration. This adds two functions that can be used to add and remove these dependent devices. Note the dependent device does not necessary need share power resources so this functionality can be used to add "software dependencies" as well if needed. Signed-off-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- drivers/acpi/power.c | 135 ++++++++++++++++++++++++++++++++++++++++ include/acpi/acpi_bus.h | 4 ++ 2 files changed, 139 insertions(+) diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index a916417b9e70..fe1e7bc91a5e 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -42,6 +42,11 @@ ACPI_MODULE_NAME("power"); #define ACPI_POWER_RESOURCE_STATE_ON 0x01 #define ACPI_POWER_RESOURCE_STATE_UNKNOWN 0xFF +struct acpi_power_dependent_device { + struct device *dev; + struct list_head node; +}; + struct acpi_power_resource { struct acpi_device device; struct list_head list_node; @@ -51,6 +56,7 @@ struct acpi_power_resource { unsigned int ref_count; bool wakeup_enabled; struct mutex resource_lock; + struct list_head dependents; }; struct acpi_power_resource_entry { @@ -232,8 +238,121 @@ static int acpi_power_get_list_state(struct list_head *list, int *state) return 0; } +static int +acpi_power_resource_add_dependent(struct acpi_power_resource *resource, + struct device *dev) +{ + struct acpi_power_dependent_device *dep; + int ret = 0; + + mutex_lock(&resource->resource_lock); + list_for_each_entry(dep, &resource->dependents, node) { + /* Only add it once */ + if (dep->dev == dev) + goto unlock; + } + + dep = kzalloc(sizeof(*dep), GFP_KERNEL); + if (!dep) { + ret = -ENOMEM; + goto unlock; + } + + dep->dev = dev; + list_add_tail(&dep->node, &resource->dependents); + dev_dbg(dev, "added power dependency to [%s]\n", resource->name); + +unlock: + mutex_unlock(&resource->resource_lock); + return ret; +} + +static void +acpi_power_resource_remove_dependent(struct acpi_power_resource *resource, + struct device *dev) +{ + struct acpi_power_dependent_device *dep; + + mutex_lock(&resource->resource_lock); + list_for_each_entry(dep, &resource->dependents, node) { + if (dep->dev == dev) { + list_del(&dep->node); + kfree(dep); + dev_dbg(dev, "removed power dependency to [%s]\n", + resource->name); + break; + } + } + mutex_unlock(&resource->resource_lock); +} + +/** + * acpi_device_power_add_dependent - Add dependent device of this ACPI device + * @adev: ACPI device pointer + * @dev: Dependent device + * + * If @adev has non-empty _PR0 the @dev is added as dependent device to all + * power resources returned by it. This means that whenever these power + * resources are turned _ON the dependent devices get runtime resumed. This + * is needed for devices such as PCI to allow its driver to re-initialize + * it after it went to D0uninitialized. + * + * If @adev does not have _PR0 this does nothing. + * + * Returns %0 in case of success and negative errno otherwise. + */ +int acpi_device_power_add_dependent(struct acpi_device *adev, + struct device *dev) +{ + struct acpi_power_resource_entry *entry; + struct list_head *resources; + int ret; + + if (!adev->flags.power_manageable) + return 0; + + resources = &adev->power.states[ACPI_STATE_D0].resources; + list_for_each_entry(entry, resources, node) { + ret = acpi_power_resource_add_dependent(entry->resource, dev); + if (ret) + goto err; + } + + return 0; + +err: + list_for_each_entry(entry, resources, node) + acpi_power_resource_remove_dependent(entry->resource, dev); + + return ret; +} + +/** + * acpi_device_power_remove_dependent - Remove dependent device + * @adev: ACPI device pointer + * @dev: Dependent device + * + * Does the opposite of acpi_device_power_add_dependent() and removes the + * dependent device if it is found. Can be called to @adev that does not + * have _PR0 as well. + */ +void acpi_device_power_remove_dependent(struct acpi_device *adev, + struct device *dev) +{ + struct acpi_power_resource_entry *entry; + struct list_head *resources; + + if (!adev->flags.power_manageable) + return; + + resources = &adev->power.states[ACPI_STATE_D0].resources; + list_for_each_entry_reverse(entry, resources, node) + acpi_power_resource_remove_dependent(entry->resource, dev); +} + static int __acpi_power_on(struct acpi_power_resource *resource) { + struct acpi_power_dependent_device *dep; acpi_status status = AE_OK; status = acpi_evaluate_object(resource->device.handle, "_ON", NULL, NULL); @@ -243,6 +362,21 @@ static int __acpi_power_on(struct acpi_power_resource *resource) ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Power resource [%s] turned on\n", resource->name)); + /* + * If there are other dependents on this power resource we need to + * resume them now so that their drivers can re-initialize the + * hardware properly after it went back to D0. + */ + if (list_empty(&resource->dependents) || + list_is_singular(&resource->dependents)) + return 0; + + list_for_each_entry(dep, &resource->dependents, node) { + dev_dbg(dep->dev, "runtime resuming because [%s] turned on\n", + resource->name); + pm_request_resume(dep->dev); + } + return 0; } @@ -810,6 +944,7 @@ int acpi_add_power_resource(acpi_handle handle) ACPI_STA_DEFAULT); mutex_init(&resource->resource_lock); INIT_LIST_HEAD(&resource->list_node); + INIT_LIST_HEAD(&resource->dependents); resource->name = device->pnp.bus_id; strcpy(acpi_device_name(device), ACPI_POWER_DEVICE_NAME); strcpy(acpi_device_class(device), ACPI_POWER_CLASS); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 31b6c87d6240..4752ff0a9d9b 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -513,6 +513,10 @@ int acpi_device_fix_up_power(struct acpi_device *device); int acpi_bus_update_power(acpi_handle handle, int *state_p); int acpi_device_update_power(struct acpi_device *device, int *state_p); bool acpi_bus_power_manageable(acpi_handle handle); +int acpi_device_power_add_dependent(struct acpi_device *adev, + struct device *dev); +void acpi_device_power_remove_dependent(struct acpi_device *adev, + struct device *dev); #ifdef CONFIG_PM bool acpi_bus_can_wakeup(acpi_handle handle); From 53b22f900c2d282bf6499712930188cc02306e4e Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 25 Jun 2019 13:29:42 +0300 Subject: [PATCH 43/57] PCI / ACPI: Add _PR0 dependent devices If otherwise unrelated PCI devices share ACPI power resources turning them on causes the devices to enter D0uninitialized power state which may cause problems. For example in Intel Ice Lake two root ports (RP0 and RP1), Thunderbolt controller (NHI) and xHCI controller all share power resources as can be ween in the topology below where power resources are marked with []: Host bridge | +- RP0 ---\ +- RP1 ---|--+--> [TBT] +- NHI --/ | | | | v +- xHCI --> [D3C] In a situation where all devices sharing the power resources are in D3cold (the power resources are turned off) and for example the Thunderbolt controller is runtime resumed resulting that the power resources are turned on. This means that the other devices sharing them (RP0, RP1 and xHCI) are transitioned into D0uninitialized state. If they were configured to trigger wake (PME) on a certain event that configuration gets lost after reset so we would need to re-initialize them to get the wakeup working as expected again. To do so we would need to runtime resume all of them to make sure their registers get restored properly before we can runtime suspend them again. Since we just added concept of "_PR0 dependent device" we can solve this by calling the relevant add/remove functions when the PCI device is bind to its ACPI representation. If it has power resources the PCI device will be added as dependent device to them and runtime resumed whenever they are physically turned on. This should make sure PCI core can reconfigure wakes after the device is transitioned into D0uninitialized. Signed-off-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- drivers/pci/pci-acpi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index b782acac26c5..2abe0eeafb53 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -902,6 +902,7 @@ static void pci_acpi_setup(struct device *dev) device_wakeup_enable(dev); acpi_pci_wakeup(pci_dev, false); + acpi_device_power_add_dependent(adev, dev); } static void pci_acpi_cleanup(struct device *dev) @@ -914,6 +915,7 @@ static void pci_acpi_cleanup(struct device *dev) pci_acpi_remove_pm_notifier(adev); if (adev->wakeup.flags.valid) { + acpi_device_power_remove_dependent(adev, dev); if (pci_dev->bridge_d3) device_wakeup_disable(dev); From b51033e06c2ebbad322370f4a35c84488e61b342 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 25 Jun 2019 14:09:12 +0200 Subject: [PATCH 44/57] PCI: PM/ACPI: Refresh all stale power state data in pci_pm_complete() In pci_pm_complete() there are checks to decide whether or not to resume devices that were left in runtime-suspend during the preceding system-wide transition into a sleep state. They involve checking the current power state of the device and comparing it with the power state of it set before the preceding system-wide transition, but the platform component of the device's power state is not handled correctly in there. Namely, on platforms with ACPI, the device power state information needs to be updated with care, so that the reference counters of power resources used by the device (if any) are set to ensure that the refreshed power state of it will be maintained going forward. To that end, introduce a new ->refresh_state() platform PM callback for PCI devices, for asking the platform to refresh the device power state data and ensure that the corresponding power state will be maintained going forward, make it invoke acpi_device_update_power() (for devices with ACPI PM) on platforms with ACPI and make pci_pm_complete() use it, through a new pci_refresh_power_state() wrapper function. Fixes: a0d2a959d3da (PCI: Avoid unnecessary resume after direct-complete) Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg --- drivers/pci/pci-acpi.c | 9 +++++++++ drivers/pci/pci-driver.c | 9 ++++++++- drivers/pci/pci.c | 21 +++++++++++++++++++++ drivers/pci/pci.h | 4 ++++ 4 files changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 2abe0eeafb53..45049f558860 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -692,6 +692,14 @@ static pci_power_t acpi_pci_get_power_state(struct pci_dev *dev) return state_conv[state]; } +static void acpi_pci_refresh_power_state(struct pci_dev *dev) +{ + struct acpi_device *adev = ACPI_COMPANION(&dev->dev); + + if (adev && acpi_device_power_manageable(adev)) + acpi_device_update_power(adev, NULL); +} + static int acpi_pci_propagate_wakeup(struct pci_bus *bus, bool enable) { while (bus->parent) { @@ -749,6 +757,7 @@ static const struct pci_platform_pm_ops acpi_pci_platform_pm = { .is_manageable = acpi_pci_power_manageable, .set_state = acpi_pci_set_power_state, .get_state = acpi_pci_get_power_state, + .refresh_state = acpi_pci_refresh_power_state, .choose_state = acpi_pci_choose_state, .set_wakeup = acpi_pci_wakeup, .need_resume = acpi_pci_need_resume, diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index bd097ea5925c..3149f11ca265 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -710,7 +710,14 @@ static void pci_pm_complete(struct device *dev) if (pm_runtime_suspended(dev) && pm_resume_via_firmware()) { pci_power_t pre_sleep_state = pci_dev->current_state; - pci_update_current_state(pci_dev, pci_dev->current_state); + pci_refresh_power_state(pci_dev); + /* + * On platforms with ACPI this check may also trigger for + * devices sharing power resources if one of those power + * resources has been activated as a result of a change of the + * power state of another device sharing it. However, in that + * case it is also better to resume the device, in general. + */ if (pci_dev->current_state < pre_sleep_state) pm_request_resume(dev); } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e34fb2b3c466..b1f563916036 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -777,6 +777,12 @@ static inline pci_power_t platform_pci_get_power_state(struct pci_dev *dev) return pci_platform_pm ? pci_platform_pm->get_state(dev) : PCI_UNKNOWN; } +static inline void platform_pci_refresh_power_state(struct pci_dev *dev) +{ + if (pci_platform_pm && pci_platform_pm->refresh_state) + pci_platform_pm->refresh_state(dev); +} + static inline pci_power_t platform_pci_choose_state(struct pci_dev *dev) { return pci_platform_pm ? @@ -937,6 +943,21 @@ void pci_update_current_state(struct pci_dev *dev, pci_power_t state) } } +/** + * pci_refresh_power_state - Refresh the given device's power state data + * @dev: Target PCI device. + * + * Ask the platform to refresh the devices power state information and invoke + * pci_update_current_state() to update its current PCI power state. + */ +void pci_refresh_power_state(struct pci_dev *dev) +{ + if (platform_pci_power_manageable(dev)) + platform_pci_refresh_power_state(dev); + + pci_update_current_state(dev, dev->current_state); +} + /** * pci_power_up - Put the given device into D0 forcibly * @dev: PCI device to power up diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 7f9fd93270ed..5db6f985f16d 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -51,6 +51,8 @@ int pci_bus_error_reset(struct pci_dev *dev); * * @get_state: queries the platform firmware for a device's current power state * + * @refresh_state: asks the platform to refresh the device's power state data + * * @choose_state: returns PCI power state of given device preferred by the * platform; to be used during system-wide transitions from a * sleeping state to the working state and vice versa @@ -69,6 +71,7 @@ struct pci_platform_pm_ops { bool (*is_manageable)(struct pci_dev *dev); int (*set_state)(struct pci_dev *dev, pci_power_t state); pci_power_t (*get_state)(struct pci_dev *dev); + void (*refresh_state)(struct pci_dev *dev); pci_power_t (*choose_state)(struct pci_dev *dev); int (*set_wakeup)(struct pci_dev *dev, bool enable); bool (*need_resume)(struct pci_dev *dev); @@ -76,6 +79,7 @@ struct pci_platform_pm_ops { int pci_set_platform_pm(const struct pci_platform_pm_ops *ops); void pci_update_current_state(struct pci_dev *dev, pci_power_t state); +void pci_refresh_power_state(struct pci_dev *dev); void pci_power_up(struct pci_dev *dev); void pci_disable_enabled_device(struct pci_dev *dev); int pci_finish_runtime_suspend(struct pci_dev *dev); From 9801522840cc1073f8064b4c979b7b6995c74bca Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 28 Jun 2019 10:46:55 +0530 Subject: [PATCH 45/57] cpufreq: Don't skip frequency validation for has_target() drivers CPUFREQ_CONST_LOOPS was introduced in a very old commit from pre-2.6 kernel release by commit 6a4a93f9c0d5 ("[CPUFREQ] Fix 'out of sync' issue"). Basically, that commit does two things: - It adds the frequency verification code (which is quite similar to what we have today as well). - And it sets the CPUFREQ_CONST_LOOPS flag only for setpolicy drivers, rightly so based on the code we had then. The idea was to avoid frequency validation for setpolicy drivers as the cpufreq core doesn't know what frequency the hardware is running at and so no point in doing frequency verification. The problem happened when we started to use the same CPUFREQ_CONST_LOOPS flag for constant loops-per-jiffy thing as well and many has_target() drivers started using the same flag and unknowingly skipped the verification of frequency. There is no logical reason behind skipping frequency validation because of the presence of CPUFREQ_CONST_LOOPS flag otherwise. Fix this issue by skipping frequency validation only for setpolicy drivers and always doing it for has_target() drivers irrespective of the presence or absence of CPUFREQ_CONST_LOOPS flag. cpufreq_notify_transition() is only called for has_target() type driver and not for set_policy type, and the check is simply redundant. Remove it as well. Also remove () around freq comparison statement as they aren't required and checkpatch also warns for them. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 555a62646491..2930065f78a4 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -359,12 +359,10 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy, * which is not equal to what the cpufreq core thinks is * "old frequency". */ - if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { - if (policy->cur && (policy->cur != freqs->old)) { - pr_debug("Warning: CPU frequency is %u, cpufreq assumed %u kHz\n", - freqs->old, policy->cur); - freqs->old = policy->cur; - } + if (policy->cur && policy->cur != freqs->old) { + pr_debug("Warning: CPU frequency is %u, cpufreq assumed %u kHz\n", + freqs->old, policy->cur); + freqs->old = policy->cur; } srcu_notifier_call_chain(&cpufreq_transition_notifier_list, @@ -1616,8 +1614,7 @@ static unsigned int __cpufreq_get(struct cpufreq_policy *policy) if (policy->fast_switch_enabled) return ret_freq; - if (ret_freq && policy->cur && - !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { + if (has_target() && ret_freq && policy->cur) { /* verify no discrepancy between actual and saved value exists */ if (unlikely(ret_freq != policy->cur)) { From 2f02a7ecd512288c40bd72bdd4d87ab4f01c1615 Mon Sep 17 00:00:00 2001 From: Fuqian Huang Date: Fri, 28 Jun 2019 10:50:35 +0800 Subject: [PATCH 46/57] kernel: power: swap: use kzalloc() instead of kmalloc() followed by memset() Use zeroing allocator instead of using allocator followed with memset with 0 Signed-off-by: Fuqian Huang Acked-by: Pavel Machek [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki --- kernel/power/swap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index e1912ad13bdc..ca0fcb5ced71 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -974,12 +974,11 @@ static int get_swap_reader(struct swap_map_handle *handle, last = handle->maps = NULL; offset = swsusp_header->image; while (offset) { - tmp = kmalloc(sizeof(*handle->maps), GFP_KERNEL); + tmp = kzalloc(sizeof(*handle->maps), GFP_KERNEL); if (!tmp) { release_swap_reader(handle); return -ENOMEM; } - memset(tmp, 0, sizeof(*tmp)); if (!handle->maps) handle->maps = tmp; if (last) From 5980752e6ef7079c0839576df10f8062d8a48883 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 20 Jun 2019 08:35:49 +0530 Subject: [PATCH 47/57] cpufreq: Consolidate cpufreq_update_current_freq() and __cpufreq_get() Their implementations are quite similar, so modify cpufreq_update_current_freq() somewhat and call it from __cpufreq_get(). Also rename cpufreq_update_current_freq() to cpufreq_verify_current_freq(), as that's what it is doing. Signed-off-by: Viresh Kumar [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 70 ++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 42 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 2930065f78a4..cba9c9e1bd0f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1545,6 +1545,30 @@ static void cpufreq_out_of_sync(struct cpufreq_policy *policy, cpufreq_freq_transition_end(policy, &freqs, 0); } +static unsigned int cpufreq_verify_current_freq(struct cpufreq_policy *policy, bool update) +{ + unsigned int new_freq; + + new_freq = cpufreq_driver->get(policy->cpu); + if (!new_freq) + return 0; + + /* + * If fast frequency switching is used with the given policy, the check + * against policy->cur is pointless, so skip it in that case. + */ + if (policy->fast_switch_enabled || !has_target()) + return new_freq; + + if (policy->cur != new_freq) { + cpufreq_out_of_sync(policy, new_freq); + if (update) + schedule_work(&policy->update); + } + + return new_freq; +} + /** * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur * @cpu: CPU number @@ -1600,30 +1624,10 @@ EXPORT_SYMBOL(cpufreq_quick_get_max); static unsigned int __cpufreq_get(struct cpufreq_policy *policy) { - unsigned int ret_freq = 0; - if (unlikely(policy_is_inactive(policy))) - return ret_freq; + return 0; - ret_freq = cpufreq_driver->get(policy->cpu); - - /* - * If fast frequency switching is used with the given policy, the check - * against policy->cur is pointless, so skip it in that case too. - */ - if (policy->fast_switch_enabled) - return ret_freq; - - if (has_target() && ret_freq && policy->cur) { - /* verify no discrepancy between actual and - saved value exists */ - if (unlikely(ret_freq != policy->cur)) { - cpufreq_out_of_sync(policy, ret_freq); - schedule_work(&policy->update); - } - } - - return ret_freq; + return cpufreq_verify_current_freq(policy, true); } /** @@ -1650,24 +1654,6 @@ unsigned int cpufreq_get(unsigned int cpu) } EXPORT_SYMBOL(cpufreq_get); -static unsigned int cpufreq_update_current_freq(struct cpufreq_policy *policy) -{ - unsigned int new_freq; - - new_freq = cpufreq_driver->get(policy->cpu); - if (!new_freq) - return 0; - - if (!policy->cur) { - pr_debug("cpufreq: Driver did not initialize current freq\n"); - policy->cur = new_freq; - } else if (policy->cur != new_freq && has_target()) { - cpufreq_out_of_sync(policy, new_freq); - } - - return new_freq; -} - static struct subsys_interface cpufreq_interface = { .name = "cpufreq", .subsys = &cpu_subsys, @@ -2149,7 +2135,7 @@ static int cpufreq_start_governor(struct cpufreq_policy *policy) pr_debug("%s: for CPU %u\n", __func__, policy->cpu); if (cpufreq_driver->get) - cpufreq_update_current_freq(policy); + cpufreq_verify_current_freq(policy, false); if (policy->governor->start) { ret = policy->governor->start(policy); @@ -2400,7 +2386,7 @@ void cpufreq_update_policy(unsigned int cpu) * -> ask driver for current freq and notify governors about a change */ if (cpufreq_driver->get && has_target() && - (cpufreq_suspended || WARN_ON(!cpufreq_update_current_freq(policy)))) + (cpufreq_suspended || WARN_ON(!cpufreq_verify_current_freq(policy, false)))) goto unlock; pr_debug("updating policy for CPU %u\n", cpu); From 70a59fde6e69d1d8579f84bf4555bfffb3ce452d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 20 Jun 2019 08:35:50 +0530 Subject: [PATCH 48/57] cpufreq: Avoid calling cpufreq_verify_current_freq() from handle_update() On some occasions cpufreq_verify_current_freq() schedules a work whose callback is handle_update(), which further calls cpufreq_update_policy() which may end up calling cpufreq_verify_current_freq() again. On the other hand, when cpufreq_update_policy() is called from handle_update(), the pointer to the cpufreq policy is already available, but cpufreq_cpu_acquire() is still called to get it in cpufreq_update_policy(), which should be avoided as well. To fix these issues, create a new helper, refresh_frequency_limits(), and make both handle_update() call it cpufreq_update_policy(). Signed-off-by: Viresh Kumar [ rjw: Rename reeval_frequency_limits() as refresh_frequency_limits() ] [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index cba9c9e1bd0f..ceb57af15ca0 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1115,13 +1115,25 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp return ret; } +static void refresh_frequency_limits(struct cpufreq_policy *policy) +{ + struct cpufreq_policy new_policy = *policy; + + pr_debug("updating policy for CPU %u\n", policy->cpu); + + new_policy.min = policy->user_policy.min; + new_policy.max = policy->user_policy.max; + + cpufreq_set_policy(policy, &new_policy); +} + static void handle_update(struct work_struct *work) { struct cpufreq_policy *policy = container_of(work, struct cpufreq_policy, update); - unsigned int cpu = policy->cpu; - pr_debug("handle_update for cpu %u called\n", cpu); - cpufreq_update_policy(cpu); + + pr_debug("handle_update for cpu %u called\n", policy->cpu); + refresh_frequency_limits(policy); } static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) @@ -2376,7 +2388,6 @@ int cpufreq_set_policy(struct cpufreq_policy *policy, void cpufreq_update_policy(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); - struct cpufreq_policy new_policy; if (!policy) return; @@ -2389,12 +2400,7 @@ void cpufreq_update_policy(unsigned int cpu) (cpufreq_suspended || WARN_ON(!cpufreq_verify_current_freq(policy, false)))) goto unlock; - pr_debug("updating policy for CPU %u\n", cpu); - memcpy(&new_policy, policy, sizeof(*policy)); - new_policy.min = policy->user_policy.min; - new_policy.max = policy->user_policy.max; - - cpufreq_set_policy(policy, &new_policy); + refresh_frequency_limits(policy); unlock: cpufreq_cpu_release(policy); From 501debd4aa5edc755037c39ea5a8fba23b41e580 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 1 Jul 2019 12:44:25 +0200 Subject: [PATCH 49/57] PM: ACPI/PCI: Resume all devices during hibernation Both the PCI bus type and the ACPI PM domain avoid resuming runtime-suspended devices with DPM_FLAG_SMART_SUSPEND set during hibernation (before creating the snapshot image of system memory), but that turns out to be a mistake. It leads to functional issues and adds complexity that's hard to justify. For this reason, resume all runtime-suspended PCI devices and all devices in the ACPI PM domains before creating a snapshot image of system memory during hibernation. Fixes: 05087360fd7a (ACPI / PM: Take SMART_SUSPEND driver flag into account) Fixes: c4b65157aeef (PCI / PM: Take SMART_SUSPEND driver flag into account) Link: https://lore.kernel.org/linux-acpi/917d4399-2e22-67b1-9d54-808561f9083f@uwyo.edu/T/#maf065fe6e4974f2a9d79f332ab99dfaba635f64c Reported-by: Robert R. Howell Tested-by: Robert R. Howell Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Reviewed-by: Hans de Goede --- drivers/acpi/device_pm.c | 13 +++++++------ drivers/pci/pci-driver.c | 16 ++++++++-------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index e54956ae93d3..44172eb18d6e 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -1112,13 +1112,14 @@ EXPORT_SYMBOL_GPL(acpi_subsys_resume_early); int acpi_subsys_freeze(struct device *dev) { /* - * This used to be done in acpi_subsys_prepare() for all devices and - * some drivers may depend on it, so do it here. Ideally, however, - * runtime-suspended devices should not be touched during freeze/thaw - * transitions. + * Resume all runtime-suspended devices before creating a snapshot + * image of system memory, because the restore kernel generally cannot + * be expected to always handle them consistently and they need to be + * put into the runtime-active metastate during system resume anyway, + * so it is better to ensure that the state saved in the image will be + * always consistent with that. */ - if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND)) - pm_runtime_resume(dev); + pm_runtime_resume(dev); return pm_generic_freeze(dev); } diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 5eadbc3d0969..ce54fa3ed94a 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -972,15 +972,15 @@ static int pci_pm_freeze(struct device *dev) } /* - * This used to be done in pci_pm_prepare() for all devices and some - * drivers may depend on it, so do it here. Ideally, runtime-suspended - * devices should not be touched during freeze/thaw transitions, - * however. + * Resume all runtime-suspended devices before creating a snapshot + * image of system memory, because the restore kernel generally cannot + * be expected to always handle them consistently and they need to be + * put into the runtime-active metastate during system resume anyway, + * so it is better to ensure that the state saved in the image will be + * always consistent with that. */ - if (!dev_pm_smart_suspend_and_suspended(dev)) { - pm_runtime_resume(dev); - pci_dev->state_saved = false; - } + pm_runtime_resume(dev); + pci_dev->state_saved = false; if (pm->freeze) { int error; From a78ae45a795aa579efa4094729073bbdab02da25 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 1 Jul 2019 12:46:45 +0200 Subject: [PATCH 50/57] PCI: PM: Simplify bus-level hibernation callbacks After a previous change causing all runtime-suspended PCI devices to be resumed before creating a snapshot image of memory during hibernation, it is not necessary to worry about the case in which them might be left in runtime-suspend any more, so get rid of the code related to that from bus-level PCI hibernation callbacks. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Reviewed-by: Hans de Goede --- drivers/pci/pci-driver.c | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index ce54fa3ed94a..ac3e80baef0a 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -994,22 +994,11 @@ static int pci_pm_freeze(struct device *dev) return 0; } -static int pci_pm_freeze_late(struct device *dev) -{ - if (dev_pm_smart_suspend_and_suspended(dev)) - return 0; - - return pm_generic_freeze_late(dev); -} - static int pci_pm_freeze_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; - if (dev_pm_smart_suspend_and_suspended(dev)) - return 0; - if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend_late(dev, PMSG_FREEZE); @@ -1039,16 +1028,6 @@ static int pci_pm_thaw_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; - /* - * If the device is in runtime suspend, the code below may not work - * correctly with it, so skip that code and make the PM core skip all of - * the subsequent "thaw" callbacks for the device. - */ - if (dev_pm_smart_suspend_and_suspended(dev)) { - dev_pm_skip_next_resume_phases(dev); - return 0; - } - if (pcibios_pm_ops.thaw_noirq) { error = pcibios_pm_ops.thaw_noirq(dev); if (error) @@ -1183,10 +1162,6 @@ static int pci_pm_restore_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; - /* This is analogous to the pci_pm_resume_noirq() case. */ - if (dev_pm_smart_suspend_and_suspended(dev)) - pm_runtime_set_active(dev); - if (pcibios_pm_ops.restore_noirq) { error = pcibios_pm_ops.restore_noirq(dev); if (error) @@ -1235,7 +1210,6 @@ static int pci_pm_restore(struct device *dev) #else /* !CONFIG_HIBERNATE_CALLBACKS */ #define pci_pm_freeze NULL -#define pci_pm_freeze_late NULL #define pci_pm_freeze_noirq NULL #define pci_pm_thaw NULL #define pci_pm_thaw_noirq NULL @@ -1361,7 +1335,6 @@ static const struct dev_pm_ops pci_dev_pm_ops = { .suspend_late = pci_pm_suspend_late, .resume = pci_pm_resume, .freeze = pci_pm_freeze, - .freeze_late = pci_pm_freeze_late, .thaw = pci_pm_thaw, .poweroff = pci_pm_poweroff, .poweroff_late = pci_pm_poweroff_late, From 3cd7957e85e67120bb9f6bfb75d81dcc19af282b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 1 Jul 2019 12:54:10 +0200 Subject: [PATCH 51/57] ACPI: PM: Simplify and fix PM domain hibernation callbacks First, after a previous change causing all runtime-suspended devices in the ACPI PM domain (and ACPI LPSS devices) to be resumed before creating a snapshot image of memory during hibernation, it is not necessary to worry about the case in which them might be left in runtime-suspend any more, so get rid of the code related to that from ACPI PM domain and ACPI LPSS hibernation callbacks. Second, it is not correct to use pm_generic_resume_early() and acpi_subsys_resume_noirq() in hibernation "restore" callbacks (which currently happens in the ACPI PM domain and ACPI LPSS), so introduce proper _restore_late and _restore_noirq callbacks for the ACPI PM domain and ACPI LPSS. Fixes: 05087360fd7a (ACPI / PM: Take SMART_SUSPEND driver flag into account) Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Reviewed-by: Hans de Goede --- drivers/acpi/acpi_lpss.c | 61 ++++++++++++++++++++++++++++++++++------ drivers/acpi/device_pm.c | 61 ++++++---------------------------------- include/linux/acpi.h | 10 ------- 3 files changed, 61 insertions(+), 71 deletions(-) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index cf768608437e..8ea836857691 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -1094,16 +1094,62 @@ static int acpi_lpss_resume_noirq(struct device *dev) struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev)); int ret; - ret = acpi_subsys_resume_noirq(dev); + /* Follow acpi_subsys_resume_noirq(). */ + if (dev_pm_may_skip_resume(dev)) + return 0; + + if (dev_pm_smart_suspend_and_suspended(dev)) + pm_runtime_set_active(dev); + + ret = pm_generic_resume_noirq(dev); if (ret) return ret; - if (!dev_pm_may_skip_resume(dev) && pdata->dev_desc->resume_from_noirq) - ret = acpi_lpss_do_resume_early(dev); + if (!pdata->dev_desc->resume_from_noirq) + return 0; - return ret; + /* + * The driver's ->resume_early callback will be invoked by + * acpi_lpss_do_resume_early(), with the assumption that the driver + * really wanted to run that code in ->resume_noirq, but it could not + * run before acpi_dev_resume() and the driver expected the latter to be + * called in the "early" phase. + */ + return acpi_lpss_do_resume_early(dev); } +static int acpi_lpss_do_restore_early(struct device *dev) +{ + int ret = acpi_lpss_resume(dev); + + return ret ? ret : pm_generic_restore_early(dev); +} + +static int acpi_lpss_restore_early(struct device *dev) +{ + struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev)); + + if (pdata->dev_desc->resume_from_noirq) + return 0; + + return acpi_lpss_do_restore_early(dev); +} + +static int acpi_lpss_restore_noirq(struct device *dev) +{ + struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev)); + int ret; + + ret = pm_generic_restore_noirq(dev); + if (ret) + return ret; + + if (!pdata->dev_desc->resume_from_noirq) + return 0; + + /* This is analogous to what happens in acpi_lpss_resume_noirq(). */ + return acpi_lpss_do_restore_early(dev); +} #endif /* CONFIG_PM_SLEEP */ static int acpi_lpss_runtime_suspend(struct device *dev) @@ -1137,14 +1183,11 @@ static struct dev_pm_domain acpi_lpss_pm_domain = { .resume_noirq = acpi_lpss_resume_noirq, .resume_early = acpi_lpss_resume_early, .freeze = acpi_subsys_freeze, - .freeze_late = acpi_subsys_freeze_late, - .freeze_noirq = acpi_subsys_freeze_noirq, - .thaw_noirq = acpi_subsys_thaw_noirq, .poweroff = acpi_subsys_suspend, .poweroff_late = acpi_lpss_suspend_late, .poweroff_noirq = acpi_lpss_suspend_noirq, - .restore_noirq = acpi_lpss_resume_noirq, - .restore_early = acpi_lpss_resume_early, + .restore_noirq = acpi_lpss_restore_noirq, + .restore_early = acpi_lpss_restore_early, #endif .runtime_suspend = acpi_lpss_runtime_suspend, .runtime_resume = acpi_lpss_runtime_resume, diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 44172eb18d6e..52fc9042a107 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -1073,7 +1073,7 @@ EXPORT_SYMBOL_GPL(acpi_subsys_suspend_noirq); * acpi_subsys_resume_noirq - Run the device driver's "noirq" resume callback. * @dev: Device to handle. */ -int acpi_subsys_resume_noirq(struct device *dev) +static int acpi_subsys_resume_noirq(struct device *dev) { if (dev_pm_may_skip_resume(dev)) return 0; @@ -1088,7 +1088,6 @@ int acpi_subsys_resume_noirq(struct device *dev) return pm_generic_resume_noirq(dev); } -EXPORT_SYMBOL_GPL(acpi_subsys_resume_noirq); /** * acpi_subsys_resume_early - Resume device using ACPI. @@ -1098,12 +1097,11 @@ EXPORT_SYMBOL_GPL(acpi_subsys_resume_noirq); * generic early resume procedure for it during system transition into the * working state. */ -int acpi_subsys_resume_early(struct device *dev) +static int acpi_subsys_resume_early(struct device *dev) { int ret = acpi_dev_resume(dev); return ret ? ret : pm_generic_resume_early(dev); } -EXPORT_SYMBOL_GPL(acpi_subsys_resume_early); /** * acpi_subsys_freeze - Run the device driver's freeze callback. @@ -1126,52 +1124,15 @@ int acpi_subsys_freeze(struct device *dev) EXPORT_SYMBOL_GPL(acpi_subsys_freeze); /** - * acpi_subsys_freeze_late - Run the device driver's "late" freeze callback. - * @dev: Device to handle. + * acpi_subsys_restore_early - Restore device using ACPI. + * @dev: Device to restore. */ -int acpi_subsys_freeze_late(struct device *dev) +int acpi_subsys_restore_early(struct device *dev) { - - if (dev_pm_smart_suspend_and_suspended(dev)) - return 0; - - return pm_generic_freeze_late(dev); + int ret = acpi_dev_resume(dev); + return ret ? ret : pm_generic_restore_early(dev); } -EXPORT_SYMBOL_GPL(acpi_subsys_freeze_late); - -/** - * acpi_subsys_freeze_noirq - Run the device driver's "noirq" freeze callback. - * @dev: Device to handle. - */ -int acpi_subsys_freeze_noirq(struct device *dev) -{ - - if (dev_pm_smart_suspend_and_suspended(dev)) - return 0; - - return pm_generic_freeze_noirq(dev); -} -EXPORT_SYMBOL_GPL(acpi_subsys_freeze_noirq); - -/** - * acpi_subsys_thaw_noirq - Run the device driver's "noirq" thaw callback. - * @dev: Device to handle. - */ -int acpi_subsys_thaw_noirq(struct device *dev) -{ - /* - * If the device is in runtime suspend, the "thaw" code may not work - * correctly with it, so skip the driver callback and make the PM core - * skip all of the subsequent "thaw" callbacks for the device. - */ - if (dev_pm_smart_suspend_and_suspended(dev)) { - dev_pm_skip_next_resume_phases(dev); - return 0; - } - - return pm_generic_thaw_noirq(dev); -} -EXPORT_SYMBOL_GPL(acpi_subsys_thaw_noirq); +EXPORT_SYMBOL_GPL(acpi_subsys_restore_early); #endif /* CONFIG_PM_SLEEP */ static struct dev_pm_domain acpi_general_pm_domain = { @@ -1187,14 +1148,10 @@ static struct dev_pm_domain acpi_general_pm_domain = { .resume_noirq = acpi_subsys_resume_noirq, .resume_early = acpi_subsys_resume_early, .freeze = acpi_subsys_freeze, - .freeze_late = acpi_subsys_freeze_late, - .freeze_noirq = acpi_subsys_freeze_noirq, - .thaw_noirq = acpi_subsys_thaw_noirq, .poweroff = acpi_subsys_suspend, .poweroff_late = acpi_subsys_suspend_late, .poweroff_noirq = acpi_subsys_suspend_noirq, - .restore_noirq = acpi_subsys_resume_noirq, - .restore_early = acpi_subsys_resume_early, + .restore_early = acpi_subsys_restore_early, #endif }, }; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d315d86844e4..ea7415440901 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -918,26 +918,16 @@ int acpi_subsys_prepare(struct device *dev); void acpi_subsys_complete(struct device *dev); int acpi_subsys_suspend_late(struct device *dev); int acpi_subsys_suspend_noirq(struct device *dev); -int acpi_subsys_resume_noirq(struct device *dev); -int acpi_subsys_resume_early(struct device *dev); int acpi_subsys_suspend(struct device *dev); int acpi_subsys_freeze(struct device *dev); -int acpi_subsys_freeze_late(struct device *dev); -int acpi_subsys_freeze_noirq(struct device *dev); -int acpi_subsys_thaw_noirq(struct device *dev); #else static inline int acpi_dev_resume_early(struct device *dev) { return 0; } static inline int acpi_subsys_prepare(struct device *dev) { return 0; } static inline void acpi_subsys_complete(struct device *dev) {} static inline int acpi_subsys_suspend_late(struct device *dev) { return 0; } static inline int acpi_subsys_suspend_noirq(struct device *dev) { return 0; } -static inline int acpi_subsys_resume_noirq(struct device *dev) { return 0; } -static inline int acpi_subsys_resume_early(struct device *dev) { return 0; } static inline int acpi_subsys_suspend(struct device *dev) { return 0; } static inline int acpi_subsys_freeze(struct device *dev) { return 0; } -static inline int acpi_subsys_freeze_late(struct device *dev) { return 0; } -static inline int acpi_subsys_freeze_noirq(struct device *dev) { return 0; } -static inline int acpi_subsys_thaw_noirq(struct device *dev) { return 0; } #endif #ifdef CONFIG_ACPI From c95b7595f85c688d5c569ddbbd6ab6a4bdae2f36 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 1 Jul 2019 12:54:29 +0200 Subject: [PATCH 52/57] ACPI: PM: Introduce "poweroff" callbacks for ACPI PM domain and LPSS In general, it is not correct to call pm_generic_suspend(), pm_generic_suspend_late() and pm_generic_suspend_noirq() during the hibernation's "poweroff" transition, because device drivers may provide special callbacks to be invoked then and the wrappers in question cause system suspend callbacks to be run. Unfortunately, that happens in the ACPI PM domain and ACPI LPSS. To address this potential issue, introduce "poweroff" callbacks for the ACPI PM and LPSS that will use pm_generic_poweroff(), pm_generic_poweroff_late() and pm_generic_poweroff_noirq() as appropriate. Fixes: 05087360fd7a (ACPI / PM: Take SMART_SUSPEND driver flag into account) Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Reviewed-by: Hans de Goede --- drivers/acpi/acpi_lpss.c | 50 +++++++++++++++++++++++++++++++--- drivers/acpi/device_pm.c | 58 +++++++++++++++++++++++++++++++++++++--- include/linux/acpi.h | 2 ++ 3 files changed, 104 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 8ea836857691..a7396e18f168 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -1064,6 +1064,13 @@ static int acpi_lpss_suspend_noirq(struct device *dev) int ret; if (pdata->dev_desc->resume_from_noirq) { + /* + * The driver's ->suspend_late callback will be invoked by + * acpi_lpss_do_suspend_late(), with the assumption that the + * driver really wanted to run that code in ->suspend_noirq, but + * it could not run after acpi_dev_suspend() and the driver + * expected the latter to be called in the "late" phase. + */ ret = acpi_lpss_do_suspend_late(dev); if (ret) return ret; @@ -1150,6 +1157,43 @@ static int acpi_lpss_restore_noirq(struct device *dev) /* This is analogous to what happens in acpi_lpss_resume_noirq(). */ return acpi_lpss_do_restore_early(dev); } + +static int acpi_lpss_do_poweroff_late(struct device *dev) +{ + int ret = pm_generic_poweroff_late(dev); + + return ret ? ret : acpi_lpss_suspend(dev, device_may_wakeup(dev)); +} + +static int acpi_lpss_poweroff_late(struct device *dev) +{ + struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev)); + + if (dev_pm_smart_suspend_and_suspended(dev)) + return 0; + + if (pdata->dev_desc->resume_from_noirq) + return 0; + + return acpi_lpss_do_poweroff_late(dev); +} + +static int acpi_lpss_poweroff_noirq(struct device *dev) +{ + struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev)); + + if (dev_pm_smart_suspend_and_suspended(dev)) + return 0; + + if (pdata->dev_desc->resume_from_noirq) { + /* This is analogous to the acpi_lpss_suspend_noirq() case. */ + int ret = acpi_lpss_do_poweroff_late(dev); + if (ret) + return ret; + } + + return pm_generic_poweroff_noirq(dev); +} #endif /* CONFIG_PM_SLEEP */ static int acpi_lpss_runtime_suspend(struct device *dev) @@ -1183,9 +1227,9 @@ static struct dev_pm_domain acpi_lpss_pm_domain = { .resume_noirq = acpi_lpss_resume_noirq, .resume_early = acpi_lpss_resume_early, .freeze = acpi_subsys_freeze, - .poweroff = acpi_subsys_suspend, - .poweroff_late = acpi_lpss_suspend_late, - .poweroff_noirq = acpi_lpss_suspend_noirq, + .poweroff = acpi_subsys_poweroff, + .poweroff_late = acpi_lpss_poweroff_late, + .poweroff_noirq = acpi_lpss_poweroff_noirq, .restore_noirq = acpi_lpss_restore_noirq, .restore_early = acpi_lpss_restore_early, #endif diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 52fc9042a107..6a9d41c44b70 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -1133,6 +1133,58 @@ int acpi_subsys_restore_early(struct device *dev) return ret ? ret : pm_generic_restore_early(dev); } EXPORT_SYMBOL_GPL(acpi_subsys_restore_early); + +/** + * acpi_subsys_poweroff - Run the device driver's poweroff callback. + * @dev: Device to handle. + * + * Follow PCI and resume devices from runtime suspend before running their + * system poweroff callbacks, unless the driver can cope with runtime-suspended + * devices during system suspend and there are no ACPI-specific reasons for + * resuming them. + */ +int acpi_subsys_poweroff(struct device *dev) +{ + if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) || + acpi_dev_needs_resume(dev, ACPI_COMPANION(dev))) + pm_runtime_resume(dev); + + return pm_generic_poweroff(dev); +} +EXPORT_SYMBOL_GPL(acpi_subsys_poweroff); + +/** + * acpi_subsys_poweroff_late - Run the device driver's poweroff callback. + * @dev: Device to handle. + * + * Carry out the generic late poweroff procedure for @dev and use ACPI to put + * it into a low-power state during system transition into a sleep state. + */ +static int acpi_subsys_poweroff_late(struct device *dev) +{ + int ret; + + if (dev_pm_smart_suspend_and_suspended(dev)) + return 0; + + ret = pm_generic_poweroff_late(dev); + if (ret) + return ret; + + return acpi_dev_suspend(dev, device_may_wakeup(dev)); +} + +/** + * acpi_subsys_poweroff_noirq - Run the driver's "noirq" poweroff callback. + * @dev: Device to suspend. + */ +static int acpi_subsys_poweroff_noirq(struct device *dev) +{ + if (dev_pm_smart_suspend_and_suspended(dev)) + return 0; + + return pm_generic_poweroff_noirq(dev); +} #endif /* CONFIG_PM_SLEEP */ static struct dev_pm_domain acpi_general_pm_domain = { @@ -1148,9 +1200,9 @@ static struct dev_pm_domain acpi_general_pm_domain = { .resume_noirq = acpi_subsys_resume_noirq, .resume_early = acpi_subsys_resume_early, .freeze = acpi_subsys_freeze, - .poweroff = acpi_subsys_suspend, - .poweroff_late = acpi_subsys_suspend_late, - .poweroff_noirq = acpi_subsys_suspend_noirq, + .poweroff = acpi_subsys_poweroff, + .poweroff_late = acpi_subsys_poweroff_late, + .poweroff_noirq = acpi_subsys_poweroff_noirq, .restore_early = acpi_subsys_restore_early, #endif }, diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ea7415440901..22840633c28c 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -920,6 +920,7 @@ int acpi_subsys_suspend_late(struct device *dev); int acpi_subsys_suspend_noirq(struct device *dev); int acpi_subsys_suspend(struct device *dev); int acpi_subsys_freeze(struct device *dev); +int acpi_subsys_poweroff(struct device *dev); #else static inline int acpi_dev_resume_early(struct device *dev) { return 0; } static inline int acpi_subsys_prepare(struct device *dev) { return 0; } @@ -928,6 +929,7 @@ static inline int acpi_subsys_suspend_late(struct device *dev) { return 0; } static inline int acpi_subsys_suspend_noirq(struct device *dev) { return 0; } static inline int acpi_subsys_suspend(struct device *dev) { return 0; } static inline int acpi_subsys_freeze(struct device *dev) { return 0; } +static inline int acpi_subsys_poweroff(struct device *dev) { return 0; } #endif #ifdef CONFIG_ACPI From 99465f12babd4f3d5659b6c147bb5b9976dfe033 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 1 Jul 2019 12:55:43 +0200 Subject: [PATCH 53/57] ACPI: PM: Drop unused function and function header Remove a leftover function header and a static inline stub with no users from the ACPI header file. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Reviewed-by: Hans de Goede --- include/linux/acpi.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 22840633c28c..cbbe45e408d9 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -913,7 +913,6 @@ static inline int acpi_dev_pm_attach(struct device *dev, bool power_on) #endif #if defined(CONFIG_ACPI) && defined(CONFIG_PM_SLEEP) -int acpi_dev_suspend_late(struct device *dev); int acpi_subsys_prepare(struct device *dev); void acpi_subsys_complete(struct device *dev); int acpi_subsys_suspend_late(struct device *dev); @@ -922,7 +921,6 @@ int acpi_subsys_suspend(struct device *dev); int acpi_subsys_freeze(struct device *dev); int acpi_subsys_poweroff(struct device *dev); #else -static inline int acpi_dev_resume_early(struct device *dev) { return 0; } static inline int acpi_subsys_prepare(struct device *dev) { return 0; } static inline void acpi_subsys_complete(struct device *dev) {} static inline int acpi_subsys_suspend_late(struct device *dev) { return 0; } From 5004efbb3611f0672af6c92823d17100603bcccd Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 1 Jul 2019 15:58:27 +0200 Subject: [PATCH 54/57] Documentation: ABI: power: Add missing newline at end of file "git diff" says: \ No newline at end of file after modifying the files. Signed-off-by: Geert Uytterhoeven Signed-off-by: Rafael J. Wysocki --- Documentation/ABI/testing/sysfs-power | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index 18b7dc929234..3c5130355011 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -300,4 +300,4 @@ Description: attempt. Using this sysfs file will override any values that were - set using the kernel command line for disk offset. \ No newline at end of file + set using the kernel command line for disk offset. From 9ed411c06dd1cdf6171b992f68c37bc2d66054f9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 4 Jul 2019 01:02:49 +0200 Subject: [PATCH 55/57] ACPI: PM: Unexport acpi_device_get_power() Using acpi_device_get_power() outside of ACPI device initialization and ACPI sysfs is problematic due to the way in which power resources are handled by it, so unexport it and add a paragraph explaining the pitfalls to its kerneldoc comment. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg --- drivers/acpi/device_pm.c | 6 +++++- drivers/acpi/internal.h | 7 +++++++ include/acpi/acpi_bus.h | 1 - 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index e54956ae93d3..81eb15a0cc42 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -53,6 +53,11 @@ const char *acpi_power_state_string(int state) * This function does not update the device's power.state field, but it may * update its parent's power.state field (when the parent's power state is * unknown and the device's power state turns out to be D0). + * + * Also, it does not update power resource reference counters to ensure that + * the power state returned by it will be persistent and it may return a power + * state shallower than previously set by acpi_device_set_power() for @device + * (if that power state depends on any power resources). */ int acpi_device_get_power(struct acpi_device *device, int *state) { @@ -118,7 +123,6 @@ int acpi_device_get_power(struct acpi_device *device, int *state) return 0; } -EXPORT_SYMBOL(acpi_device_get_power); static int acpi_dev_pm_explicit_set(struct acpi_device *adev, int state) { diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index f6157d4d637a..f4c2fe6be4f2 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -139,8 +139,15 @@ int acpi_power_get_inferred_state(struct acpi_device *device, int *state); int acpi_power_on_resources(struct acpi_device *device, int state); int acpi_power_transition(struct acpi_device *device, int state); +/* -------------------------------------------------------------------------- + Device Power Management + -------------------------------------------------------------------------- */ +int acpi_device_get_power(struct acpi_device *device, int *state); int acpi_wakeup_device_init(void); +/* -------------------------------------------------------------------------- + Processor + -------------------------------------------------------------------------- */ #ifdef CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC void acpi_early_processor_set_pdc(void); #else diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 4752ff0a9d9b..e85dc78f6875 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -506,7 +506,6 @@ int acpi_bus_get_status(struct acpi_device *device); int acpi_bus_set_power(acpi_handle handle, int state); const char *acpi_power_state_string(int state); -int acpi_device_get_power(struct acpi_device *device, int *state); int acpi_device_set_power(struct acpi_device *device, int state); int acpi_bus_init_power(struct acpi_device *device); int acpi_device_fix_up_power(struct acpi_device *device); From 02bd45a28bf32993e396fdcfd7d7c7cdc0847ed1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 4 Jul 2019 01:05:38 +0200 Subject: [PATCH 56/57] PM: sleep: Drop dev_pm_skip_next_resume_phases() After recent hibernation-related changes, there are no more callers of dev_pm_skip_next_resume_phases() except for the PM core itself in which it is more straightforward to run the statements from that function directly, so do that and drop it. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg --- drivers/base/power/main.c | 19 +++---------------- include/linux/pm.h | 1 - 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 1e84b8aa220f..7fb2c39bc725 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -529,21 +529,6 @@ static void dpm_watchdog_clear(struct dpm_watchdog *wd) /*------------------------- Resume routines -------------------------*/ -/** - * dev_pm_skip_next_resume_phases - Skip next system resume phases for device. - * @dev: Target device. - * - * Make the core skip the "early resume" and "resume" phases for @dev. - * - * This function can be called by middle-layer code during the "noirq" phase of - * system resume if necessary, but not by device drivers. - */ -void dev_pm_skip_next_resume_phases(struct device *dev) -{ - dev->power.is_late_suspended = false; - dev->power.is_suspended = false; -} - /** * suspend_event - Return a "suspend" message for given "resume" one. * @resume_msg: PM message representing a system-wide resume transition. @@ -681,6 +666,9 @@ Skip: dev->power.is_noirq_suspended = false; if (skip_resume) { + /* Make the next phases of resume skip the device. */ + dev->power.is_late_suspended = false; + dev->power.is_suspended = false; /* * The device is going to be left in suspend, but it might not * have been in runtime suspend before the system suspended, so @@ -689,7 +677,6 @@ Skip: * device again. */ pm_runtime_set_suspended(dev); - dev_pm_skip_next_resume_phases(dev); } Out: diff --git a/include/linux/pm.h b/include/linux/pm.h index 345d74a727e3..283fb3defe56 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -760,7 +760,6 @@ extern int pm_generic_poweroff_late(struct device *dev); extern int pm_generic_poweroff(struct device *dev); extern void pm_generic_complete(struct device *dev); -extern void dev_pm_skip_next_resume_phases(struct device *dev); extern bool dev_pm_may_skip_resume(struct device *dev); extern bool dev_pm_smart_suspend_and_suspended(struct device *dev); From ad5a449b707b909a91ed59109f421a1b965c6004 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 4 Jul 2019 02:43:32 +0000 Subject: [PATCH 57/57] ACPI: PM: Make acpi_sleep_state_supported() non-static With some upcoming patches to save/restore the Hyper-V drivers related states, a Linux VM running on Hyper-V will be able to hibernate. When a Linux VM hibernates, unluckily we must disable the memory hot-add/remove and balloon up/down capabilities in the hv_balloon driver (drivers/hv/hv_balloon.c), because these can not really work according to the design of the related back-end driver on the host. By default, Hyper-V does not enable the virtual ACPI S4 state for a VM; on recent Hyper-V hosts, the administrator is able to enable the virtual ACPI S4 state for a VM, so we hope to use the presence of the virtual ACPI S4 state as a hint for hv_balloon to disable the aforementioned capabilities. In this way, hibernation will work more reliably, from the user's perspective. By marking acpi_sleep_state_supported() non-static, we'll be able to implement a hv_is_hibernation_supported() API in the always-built-in module arch/x86/hyperv/hv_init.c, and the API will be called by hv_balloon. Signed-off-by: Dexuan Cui Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sleep.c | 2 +- include/acpi/acpi_bus.h | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index e21a0f5fdadd..a64b81719611 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -79,7 +79,7 @@ static int acpi_sleep_prepare(u32 acpi_state) return 0; } -static bool acpi_sleep_state_supported(u8 sleep_state) +bool acpi_sleep_state_supported(u8 sleep_state) { acpi_status status; u8 type_a, type_b; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 52d4375bde9d..37c0bac4ad6a 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -664,6 +664,12 @@ static inline int acpi_pm_set_bridge_wakeup(struct device *dev, bool enable) } #endif +#ifdef CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT +bool acpi_sleep_state_supported(u8 sleep_state); +#else +static inline bool acpi_sleep_state_supported(u8 sleep_state) { return false; } +#endif + #ifdef CONFIG_ACPI_SLEEP u32 acpi_target_system_state(void); #else