From eb0b3e78e6290e5a1bf970830939d5c8c9151892 Mon Sep 17 00:00:00 2001 From: Pan Xinhui Date: Tue, 7 Jul 2015 20:43:26 +0800 Subject: [PATCH 01/53] acpi-cpufreq: replace per_cpu with driver_data of policy Drivers can store their internal per-policy information in policy->driver_data, lets use it. we have benefits after this replacing. 1) memory saving. 2) policy is shared by several cpus, per_cpu seems not correct. using *driver_data* is more reasonable. 3) fix a memory leak in acpi_cpufreq_cpu_exit. as policy->cpu might change during cpu hotplug. So sometimes we cant't free *data*, use *driver_data* to fix it. 4) fix a zero return value of get_cur_freq_on_cpu. Only per_cpu of policy->cpu is set to *data*, if we try to get cpufreq on other cpus, we get zero instead of correct values. Use *driver_data* to fix it. Signed-off-by: Pan Xinhui Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/acpi-cpufreq.c | 40 +++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 0136dfcdabf0..e7fcaa6eedbd 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -72,8 +72,6 @@ struct acpi_cpufreq_data { cpumask_var_t freqdomain_cpus; }; -static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); - /* acpi_perf_data is a pointer to percpu data. */ static struct acpi_processor_performance __percpu *acpi_perf_data; @@ -144,7 +142,7 @@ static int _store_boost(int val) static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf) { - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); + struct acpi_cpufreq_data *data = policy->driver_data; return cpufreq_show_cpus(data->freqdomain_cpus, buf); } @@ -327,7 +325,8 @@ static void drv_write(struct drv_cmd *cmd) put_cpu(); } -static u32 get_cur_val(const struct cpumask *mask) +static u32 +get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data) { struct acpi_processor_performance *perf; struct drv_cmd cmd; @@ -335,7 +334,7 @@ static u32 get_cur_val(const struct cpumask *mask) if (unlikely(cpumask_empty(mask))) return 0; - switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) { + switch (data->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: cmd.type = SYSTEM_INTEL_MSR_CAPABLE; cmd.addr.msr.reg = MSR_IA32_PERF_CTL; @@ -346,7 +345,7 @@ static u32 get_cur_val(const struct cpumask *mask) break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; - perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data; + perf = data->acpi_data; cmd.addr.io.port = perf->control_register.address; cmd.addr.io.bit_width = perf->control_register.bit_width; break; @@ -364,19 +363,24 @@ static u32 get_cur_val(const struct cpumask *mask) static unsigned int get_cur_freq_on_cpu(unsigned int cpu) { - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); + struct acpi_cpufreq_data *data; + struct cpufreq_policy *policy; unsigned int freq; unsigned int cached_freq; pr_debug("get_cur_freq_on_cpu (%d)\n", cpu); - if (unlikely(data == NULL || - data->acpi_data == NULL || data->freq_table == NULL)) { + policy = cpufreq_cpu_get(cpu); + if (unlikely(!policy)) + return 0; + + data = policy->driver_data; + cpufreq_cpu_put(policy); + if (unlikely(!data || !data->acpi_data || !data->freq_table)) return 0; - } cached_freq = data->freq_table[data->acpi_data->state].frequency; - freq = extract_freq(get_cur_val(cpumask_of(cpu)), data); + freq = extract_freq(get_cur_val(cpumask_of(cpu), data), data); if (freq != cached_freq) { /* * The dreaded BIOS frequency change behind our back. @@ -397,7 +401,7 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, unsigned int i; for (i = 0; i < 100; i++) { - cur_freq = extract_freq(get_cur_val(mask), data); + cur_freq = extract_freq(get_cur_val(mask, data), data); if (cur_freq == freq) return 1; udelay(10); @@ -408,7 +412,7 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, static int acpi_cpufreq_target(struct cpufreq_policy *policy, unsigned int index) { - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); + struct acpi_cpufreq_data *data = policy->driver_data; struct acpi_processor_performance *perf; struct drv_cmd cmd; unsigned int next_perf_state = 0; /* Index into perf table */ @@ -673,7 +677,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) } data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); - per_cpu(acfreq_data, cpu) = data; + policy->driver_data = data; if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; @@ -843,19 +847,19 @@ err_free_mask: free_cpumask_var(data->freqdomain_cpus); err_free: kfree(data); - per_cpu(acfreq_data, cpu) = NULL; + policy->driver_data = NULL; return result; } static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) { - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); + struct acpi_cpufreq_data *data = policy->driver_data; pr_debug("acpi_cpufreq_cpu_exit\n"); if (data) { - per_cpu(acfreq_data, policy->cpu) = NULL; + policy->driver_data = NULL; acpi_processor_unregister_performance(data->acpi_data, policy->cpu); free_cpumask_var(data->freqdomain_cpus); @@ -868,7 +872,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) static int acpi_cpufreq_resume(struct cpufreq_policy *policy) { - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); + struct acpi_cpufreq_data *data = policy->driver_data; pr_debug("acpi_cpufreq_resume\n"); From 8101f99703048ceaa31c756abe1098d099249ad9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 8 Jul 2015 15:12:15 +0530 Subject: [PATCH 02/53] cpufreq: cpufreq_add_dev: name goto labels based on what they do These labels are are named in two ways normally: - Based on what caused to jump to such labels - Based on what we do under such labels We follow the first naming convention today and that leads to multiple labels for doing the same work. Fix it by switching to the second way of naming them. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 26063afb3eba..702777b1d645 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1288,7 +1288,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) recover_policy = false; policy = cpufreq_policy_alloc(dev); if (!policy) - goto nomem_out; + goto out_release_rwsem; } cpumask_copy(policy->cpus, cpumask_of(cpu)); @@ -1299,7 +1299,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) ret = cpufreq_driver->init(policy); if (ret) { pr_debug("initialization failed\n"); - goto err_set_policy_cpu; + goto out_free_policy; } down_write(&policy->rwsem); @@ -1327,7 +1327,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) policy->cur = cpufreq_driver->get(policy->cpu); if (!policy->cur) { pr_err("%s: ->get() failed\n", __func__); - goto err_get_freq; + goto out_exit_policy; } } @@ -1377,7 +1377,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) if (!recover_policy) { ret = cpufreq_add_dev_interface(policy, dev); if (ret) - goto err_out_unregister; + goto out_exit_policy; blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_CREATE_POLICY, policy); @@ -1406,15 +1406,14 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) return 0; -err_out_unregister: -err_get_freq: +out_exit_policy: up_write(&policy->rwsem); if (cpufreq_driver->exit) cpufreq_driver->exit(policy); -err_set_policy_cpu: +out_free_policy: cpufreq_policy_free(policy, recover_policy); -nomem_out: +out_release_rwsem: up_read(&cpufreq_rwsem); return ret; From 7f0fa40f5a587c2a026de776cc6a26373ac0f244 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 8 Jul 2015 15:12:16 +0530 Subject: [PATCH 03/53] cpufreq: Properly handle errors from cpufreq_init_policy() cpufreq_init_policy() can fail, and we don't do anything except a call to ->exit() on that. The policy should be freed if this happens. Do it properly. Reported-and-tested-by: "Jon Medhurst (Tixy)" Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 702777b1d645..a7b6ac6e048e 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1060,11 +1060,10 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy, return cpufreq_add_dev_symlink(policy); } -static void cpufreq_init_policy(struct cpufreq_policy *policy) +static int cpufreq_init_policy(struct cpufreq_policy *policy) { struct cpufreq_governor *gov = NULL; struct cpufreq_policy new_policy; - int ret = 0; memcpy(&new_policy, policy, sizeof(*policy)); @@ -1083,12 +1082,7 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy) cpufreq_parse_governor(gov->name, &new_policy.policy, NULL); /* set default policy */ - ret = cpufreq_set_policy(policy, &new_policy); - if (ret) { - pr_debug("setting policy failed\n"); - if (cpufreq_driver->exit) - cpufreq_driver->exit(policy); - } + return cpufreq_set_policy(policy, &new_policy); } static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, @@ -1386,7 +1380,12 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) write_unlock_irqrestore(&cpufreq_driver_lock, flags); } - cpufreq_init_policy(policy); + ret = cpufreq_init_policy(policy); + if (ret) { + pr_err("%s: Failed to initialize policy for cpu: %d (%d)\n", + __func__, cpu, ret); + goto out_remove_policy_notify; + } if (!recover_policy) { policy->user_policy.policy = policy->policy; @@ -1406,6 +1405,9 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) return 0; +out_remove_policy_notify: + /* cpufreq_policy_free() will notify based on this */ + recover_policy = true; out_exit_policy: up_write(&policy->rwsem); From 8cfcfd39000d54188e0df1e0fafe63f53897b62a Mon Sep 17 00:00:00 2001 From: Pan Xinhui Date: Fri, 10 Jul 2015 14:36:20 +0800 Subject: [PATCH 04/53] acpi-cpufreq: Fix an ACPI perf unregister issue As policy->cpu may not be same in acpi_cpufreq_cpu_init() and acpi_cpufreq_cpu_exit(). There is a risk that we use different CPU to un/register ACPI performance. So acpi_processor_unregister_performance() may not be able to do the cleanup work. That causes a memory leak. And if there will be another acpi_processor_register_performance() call, it may also fail thanks to the internal check of pr->performace. So add a new struct acpi_cpufreq_data field, acpi_perf_cpu, to fix this issue. Signed-off-by: Pan Xinhui Acked-by: Viresh Kumar [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/acpi-cpufreq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index e7fcaa6eedbd..de54ce14eb39 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -69,6 +69,7 @@ struct acpi_cpufreq_data { struct cpufreq_frequency_table *freq_table; unsigned int resume; unsigned int cpu_feature; + unsigned int acpi_perf_cpu; cpumask_var_t freqdomain_cpus; }; @@ -677,6 +678,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) } data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); + data->acpi_perf_cpu = cpu; policy->driver_data = data; if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) @@ -861,7 +863,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) if (data) { policy->driver_data = NULL; acpi_processor_unregister_performance(data->acpi_data, - policy->cpu); + data->acpi_perf_cpu); free_cpumask_var(data->freqdomain_cpus); kfree(data->freq_table); kfree(data); From 3a5f5b2e3b4ba165e342c4e969c7fa8d85be0d94 Mon Sep 17 00:00:00 2001 From: Cristian Ardelean Date: Fri, 10 Jul 2015 14:42:15 -0400 Subject: [PATCH 05/53] cpufreq: integrator: fixed coding style issues Fixed coding style issues found by checkpatch.pl tool. Changed space indentation to tab, removed unneccesary braces, removed space between MODULE macros and parentheses. REMARKS: failed to 'make' this file with error message 'fatal error: asm/mach-types.h: No such file or directory'. Signed-off-by: Cristian Ardelean Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/integrator-cpufreq.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/integrator-cpufreq.c b/drivers/cpufreq/integrator-cpufreq.c index 129e266f7621..2faa4216bf2a 100644 --- a/drivers/cpufreq/integrator-cpufreq.c +++ b/drivers/cpufreq/integrator-cpufreq.c @@ -98,11 +98,10 @@ static int integrator_set_target(struct cpufreq_policy *policy, /* get current setting */ cm_osc = __raw_readl(cm_base + INTEGRATOR_HDR_OSC_OFFSET); - if (machine_is_integrator()) { + if (machine_is_integrator()) vco.s = (cm_osc >> 8) & 7; - } else if (machine_is_cintegrator()) { + else if (machine_is_cintegrator()) vco.s = 1; - } vco.v = cm_osc & 255; vco.r = 22; freqs.old = icst_hz(&cclk_params, vco) / 1000; @@ -163,11 +162,10 @@ static unsigned int integrator_get(unsigned int cpu) /* detect memory etc. */ cm_osc = __raw_readl(cm_base + INTEGRATOR_HDR_OSC_OFFSET); - if (machine_is_integrator()) { + if (machine_is_integrator()) vco.s = (cm_osc >> 8) & 7; - } else { + else vco.s = 1; - } vco.v = cm_osc & 255; vco.r = 22; @@ -203,7 +201,7 @@ static int __init integrator_cpufreq_probe(struct platform_device *pdev) struct resource *res; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) + if (!res) return -ENODEV; cm_base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); @@ -234,6 +232,6 @@ static struct platform_driver integrator_cpufreq_driver = { module_platform_driver_probe(integrator_cpufreq_driver, integrator_cpufreq_probe); -MODULE_AUTHOR ("Russell M. King"); -MODULE_DESCRIPTION ("cpufreq driver for ARM Integrator CPUs"); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Russell M. King"); +MODULE_DESCRIPTION("cpufreq driver for ARM Integrator CPUs"); +MODULE_LICENSE("GPL"); From ba88d4338f226766f510e207911dde8c1875e072 Mon Sep 17 00:00:00 2001 From: Kristen Carlson Accardi Date: Tue, 14 Jul 2015 09:46:23 -0700 Subject: [PATCH 06/53] intel_pstate: enable HWP per CPU HWP previously was only enabled at driver load time, on the boot CPU, however, HWP must be enabled per package. Move the code to enable HWP to the cpufreq driver init path so that it will be called per CPU. Signed-off-by: Kristen Carlson Accardi Tested-by: David Zhuang Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 15ada47bb720..763d8f34ab49 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -484,12 +484,11 @@ static void __init intel_pstate_sysfs_expose_params(void) } /************************** sysfs end ************************/ -static void intel_pstate_hwp_enable(void) +static void intel_pstate_hwp_enable(struct cpudata *cpudata) { - hwp_active++; pr_info("intel_pstate: HWP enabled\n"); - wrmsrl( MSR_PM_ENABLE, 0x1); + wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); } static int byt_get_min_pstate(void) @@ -932,6 +931,10 @@ static int intel_pstate_init_cpu(unsigned int cpunum) cpu = all_cpu_data[cpunum]; cpu->cpu = cpunum; + + if (hwp_active) + intel_pstate_hwp_enable(cpu); + intel_pstate_get_cpu_pstates(cpu); init_timer_deferrable(&cpu->timer); @@ -1245,7 +1248,7 @@ static int __init intel_pstate_init(void) return -ENOMEM; if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) - intel_pstate_hwp_enable(); + hwp_active++; if (!hwp_active && hwp_only) goto out; From 386d46e6d5238c9648399eb1e0c418d06f4126a2 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 19 Jun 2015 17:18:01 +0530 Subject: [PATCH 07/53] cpufreq: governor: Name delayed-work as dwork Delayed work was named as 'work' and to access work within it we do work.work. Not much readable. Rename delayed_work as 'dwork'. Reviewed-by: Preeti U Murthy Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 2 +- drivers/cpufreq/cpufreq_governor.c | 6 +++--- drivers/cpufreq/cpufreq_governor.h | 2 +- drivers/cpufreq/cpufreq_ondemand.c | 8 ++++---- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index c86a10c30912..0e3ec1d968d9 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -105,7 +105,7 @@ static void cs_check_cpu(int cpu, unsigned int load) static void cs_dbs_timer(struct work_struct *work) { struct cs_cpu_dbs_info_s *dbs_info = container_of(work, - struct cs_cpu_dbs_info_s, cdbs.work.work); + struct cs_cpu_dbs_info_s, cdbs.dwork.work); unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; struct cs_cpu_dbs_info_s *core_dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 57a39f8a92b7..6024bbc782c0 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -165,7 +165,7 @@ static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data, { struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); - mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay); + mod_delayed_work_on(cpu, system_wq, &cdbs->dwork, delay); } void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, @@ -204,7 +204,7 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data, for_each_cpu(i, policy->cpus) { cdbs = dbs_data->cdata->get_cpu_cdbs(i); - cancel_delayed_work_sync(&cdbs->work); + cancel_delayed_work_sync(&cdbs->dwork); } } @@ -367,7 +367,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; mutex_init(&j_cdbs->timer_mutex); - INIT_DEFERRABLE_WORK(&j_cdbs->work, cdata->gov_dbs_timer); + INIT_DEFERRABLE_WORK(&j_cdbs->dwork, cdata->gov_dbs_timer); } if (cdata->governor == GOV_CONSERVATIVE) { diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 34736f5e869d..352eecaae789 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -142,7 +142,7 @@ struct cpu_dbs_common_info { */ unsigned int prev_load; struct cpufreq_policy *cur_policy; - struct delayed_work work; + struct delayed_work dwork; /* * percpu mutex that serializes governor limit change with gov_dbs_timer * invocation. We do not want gov_dbs_timer to run when user is changing diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 3c1e10f2304c..830aef1db8c3 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -194,7 +194,7 @@ static void od_check_cpu(int cpu, unsigned int load) static void od_dbs_timer(struct work_struct *work) { struct od_cpu_dbs_info_s *dbs_info = - container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work); + container_of(work, struct od_cpu_dbs_info_s, cdbs.dwork.work); unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; struct od_cpu_dbs_info_s *core_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); @@ -275,18 +275,18 @@ static void update_sampling_rate(struct dbs_data *dbs_data, mutex_lock(&dbs_info->cdbs.timer_mutex); - if (!delayed_work_pending(&dbs_info->cdbs.work)) { + if (!delayed_work_pending(&dbs_info->cdbs.dwork)) { mutex_unlock(&dbs_info->cdbs.timer_mutex); continue; } next_sampling = jiffies + usecs_to_jiffies(new_rate); - appointed_at = dbs_info->cdbs.work.timer.expires; + appointed_at = dbs_info->cdbs.dwork.timer.expires; if (time_before(next_sampling, appointed_at)) { mutex_unlock(&dbs_info->cdbs.timer_mutex); - cancel_delayed_work_sync(&dbs_info->cdbs.work); + cancel_delayed_work_sync(&dbs_info->cdbs.dwork); mutex_lock(&dbs_info->cdbs.timer_mutex); gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy, From d3574c851148266177ea9ecae10a317e6eae94de Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 19 Jun 2015 17:18:02 +0530 Subject: [PATCH 08/53] cpufreq: governor: Drop unused field 'cpu' Its not used at all, drop it. Reviewed-by: Preeti U Murthy Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 1 - drivers/cpufreq/cpufreq_governor.h | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 6024bbc782c0..2149ba7d32a8 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -353,7 +353,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, struct cpu_dbs_common_info *j_cdbs = cdata->get_cpu_cdbs(j); unsigned int prev_load; - j_cdbs->cpu = j; j_cdbs->cur_policy = policy; j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 352eecaae789..1bbf8c87fdd5 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -130,7 +130,6 @@ static void *get_cpu_dbs_info_s(int cpu) \ /* Per cpu structures */ struct cpu_dbs_common_info { - int cpu; u64 prev_cpu_idle; u64 prev_cpu_wall; u64 prev_cpu_nice; From 875b8508f9607b92e3ef4ece2fddf86d61351085 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 19 Jun 2015 17:18:03 +0530 Subject: [PATCH 09/53] cpufreq: governor: Rename 'cpu_dbs_common_info' to 'cpu_dbs_info' Its not common info to all CPUs, but a structure representing common type of cpu info to both governor types. Lets drop 'common_' from its name. Reviewed-by: Preeti U Murthy Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 19 +++++++++---------- drivers/cpufreq/cpufreq_governor.h | 13 ++++++------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 2149ba7d32a8..529f236f2d05 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -32,7 +32,7 @@ static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) { - struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; struct cpufreq_policy *policy; @@ -64,7 +64,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) /* Get Absolute Load */ for_each_cpu(j, policy->cpus) { - struct cpu_dbs_common_info *j_cdbs; + struct cpu_dbs_info *j_cdbs; u64 cur_wall_time, cur_idle_time; unsigned int idle_time, wall_time; unsigned int load; @@ -163,7 +163,7 @@ EXPORT_SYMBOL_GPL(dbs_check_cpu); static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data, unsigned int delay) { - struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); mod_delayed_work_on(cpu, system_wq, &cdbs->dwork, delay); } @@ -199,7 +199,7 @@ EXPORT_SYMBOL_GPL(gov_queue_work); static inline void gov_cancel_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy) { - struct cpu_dbs_common_info *cdbs; + struct cpu_dbs_info *cdbs; int i; for_each_cpu(i, policy->cpus) { @@ -209,8 +209,7 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data, } /* Will return if we need to evaluate cpu load again or not */ -bool need_load_eval(struct cpu_dbs_common_info *cdbs, - unsigned int sampling_rate) +bool need_load_eval(struct cpu_dbs_info *cdbs, unsigned int sampling_rate) { if (policy_is_shared(cdbs->cur_policy)) { ktime_t time_now = ktime_get(); @@ -330,7 +329,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, { struct common_dbs_data *cdata = dbs_data->cdata; unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; - struct cpu_dbs_common_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu); int io_busy = 0; if (!policy->cur) @@ -350,7 +349,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, } for_each_cpu(j, policy->cpus) { - struct cpu_dbs_common_info *j_cdbs = cdata->get_cpu_cdbs(j); + struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j); unsigned int prev_load; j_cdbs->cur_policy = policy; @@ -398,7 +397,7 @@ static void cpufreq_governor_stop(struct cpufreq_policy *policy, { struct common_dbs_data *cdata = dbs_data->cdata; unsigned int cpu = policy->cpu; - struct cpu_dbs_common_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu); if (cdata->governor == GOV_CONSERVATIVE) { struct cs_cpu_dbs_info_s *cs_dbs_info = @@ -418,7 +417,7 @@ static void cpufreq_governor_limits(struct cpufreq_policy *policy, { struct common_dbs_data *cdata = dbs_data->cdata; unsigned int cpu = policy->cpu; - struct cpu_dbs_common_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu); if (!cpu_cdbs->cur_policy) return; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 1bbf8c87fdd5..6b5e33f68064 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -109,7 +109,7 @@ store_one(_gov, file_name) /* create helper routines */ #define define_get_cpu_dbs_routines(_dbs_info) \ -static struct cpu_dbs_common_info *get_cpu_cdbs(int cpu) \ +static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \ { \ return &per_cpu(_dbs_info, cpu).cdbs; \ } \ @@ -129,7 +129,7 @@ static void *get_cpu_dbs_info_s(int cpu) \ */ /* Per cpu structures */ -struct cpu_dbs_common_info { +struct cpu_dbs_info { u64 prev_cpu_idle; u64 prev_cpu_wall; u64 prev_cpu_nice; @@ -152,7 +152,7 @@ struct cpu_dbs_common_info { }; struct od_cpu_dbs_info_s { - struct cpu_dbs_common_info cdbs; + struct cpu_dbs_info cdbs; struct cpufreq_frequency_table *freq_table; unsigned int freq_lo; unsigned int freq_lo_jiffies; @@ -162,7 +162,7 @@ struct od_cpu_dbs_info_s { }; struct cs_cpu_dbs_info_s { - struct cpu_dbs_common_info cdbs; + struct cpu_dbs_info cdbs; unsigned int down_skip; unsigned int requested_freq; unsigned int enable:1; @@ -203,7 +203,7 @@ struct common_dbs_data { */ struct dbs_data *gdbs_data; - struct cpu_dbs_common_info *(*get_cpu_cdbs)(int cpu); + struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); void *(*get_cpu_dbs_info_s)(int cpu); void (*gov_dbs_timer)(struct work_struct *work); void (*gov_check_cpu)(int cpu, unsigned int load); @@ -264,8 +264,7 @@ static ssize_t show_sampling_rate_min_gov_pol \ extern struct mutex cpufreq_governor_lock; void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); -bool need_load_eval(struct cpu_dbs_common_info *cdbs, - unsigned int sampling_rate); +bool need_load_eval(struct cpu_dbs_info *cdbs, unsigned int sampling_rate); int cpufreq_governor_dbs(struct cpufreq_policy *policy, struct common_dbs_data *cdata, unsigned int event); void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, From 49a9a40c1b48d24f0fd9a6b6be8a4038f47d13bf Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 19 Jun 2015 17:18:04 +0530 Subject: [PATCH 10/53] cpufreq: governor: name pointer to cpu_dbs_info as 'cdbs' It is called as 'cdbs' at most of the places and 'cpu_dbs' at others. Lets use 'cdbs' consistently for better readability. Reviewed-by: Preeti U Murthy Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 529f236f2d05..4ea13f182154 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -329,7 +329,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, { struct common_dbs_data *cdata = dbs_data->cdata; unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; - struct cpu_dbs_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); int io_busy = 0; if (!policy->cur) @@ -385,7 +385,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, } /* Initiate timer time stamp */ - cpu_cdbs->time_stamp = ktime_get(); + cdbs->time_stamp = ktime_get(); gov_queue_work(dbs_data, policy, delay_for_sampling_rate(sampling_rate), true); @@ -397,7 +397,7 @@ static void cpufreq_governor_stop(struct cpufreq_policy *policy, { struct common_dbs_data *cdata = dbs_data->cdata; unsigned int cpu = policy->cpu; - struct cpu_dbs_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); if (cdata->governor == GOV_CONSERVATIVE) { struct cs_cpu_dbs_info_s *cs_dbs_info = @@ -408,8 +408,8 @@ static void cpufreq_governor_stop(struct cpufreq_policy *policy, gov_cancel_work(dbs_data, policy); - mutex_destroy(&cpu_cdbs->timer_mutex); - cpu_cdbs->cur_policy = NULL; + mutex_destroy(&cdbs->timer_mutex); + cdbs->cur_policy = NULL; } static void cpufreq_governor_limits(struct cpufreq_policy *policy, @@ -417,20 +417,20 @@ static void cpufreq_governor_limits(struct cpufreq_policy *policy, { struct common_dbs_data *cdata = dbs_data->cdata; unsigned int cpu = policy->cpu; - struct cpu_dbs_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); - if (!cpu_cdbs->cur_policy) + if (!cdbs->cur_policy) return; - mutex_lock(&cpu_cdbs->timer_mutex); - if (policy->max < cpu_cdbs->cur_policy->cur) - __cpufreq_driver_target(cpu_cdbs->cur_policy, policy->max, + mutex_lock(&cdbs->timer_mutex); + if (policy->max < cdbs->cur_policy->cur) + __cpufreq_driver_target(cdbs->cur_policy, policy->max, CPUFREQ_RELATION_H); - else if (policy->min > cpu_cdbs->cur_policy->cur) - __cpufreq_driver_target(cpu_cdbs->cur_policy, policy->min, + else if (policy->min > cdbs->cur_policy->cur) + __cpufreq_driver_target(cdbs->cur_policy, policy->min, CPUFREQ_RELATION_L); dbs_check_cpu(dbs_data, cpu); - mutex_unlock(&cpu_cdbs->timer_mutex); + mutex_unlock(&cdbs->timer_mutex); } int cpufreq_governor_dbs(struct cpufreq_policy *policy, From 42994af63cd1aafc9289035cf621e501b08732e9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 19 Jun 2015 17:18:05 +0530 Subject: [PATCH 11/53] cpufreq: governor: rename cur_policy as policy Just call it 'policy', cur_policy is unnecessarily long and doesn't have any special meaning. Reviewed-by: Preeti U Murthy Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 10 +++++----- drivers/cpufreq/cpufreq_governor.c | 18 +++++++++--------- drivers/cpufreq/cpufreq_governor.h | 2 +- drivers/cpufreq/cpufreq_ondemand.c | 19 ++++++++++--------- 4 files changed, 25 insertions(+), 24 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 0e3ec1d968d9..af47d322679e 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -47,7 +47,7 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, static void cs_check_cpu(int cpu, unsigned int load) { struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); - struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy; + struct cpufreq_policy *policy = dbs_info->cdbs.policy; struct dbs_data *dbs_data = policy->governor_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; @@ -106,10 +106,10 @@ static void cs_dbs_timer(struct work_struct *work) { struct cs_cpu_dbs_info_s *dbs_info = container_of(work, struct cs_cpu_dbs_info_s, cdbs.dwork.work); - unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; + unsigned int cpu = dbs_info->cdbs.policy->cpu; struct cs_cpu_dbs_info_s *core_dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); - struct dbs_data *dbs_data = dbs_info->cdbs.cur_policy->governor_data; + struct dbs_data *dbs_data = dbs_info->cdbs.policy->governor_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; int delay = delay_for_sampling_rate(cs_tuners->sampling_rate); bool modify_all = true; @@ -120,7 +120,7 @@ static void cs_dbs_timer(struct work_struct *work) else dbs_check_cpu(dbs_data, cpu); - gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy, delay, modify_all); + gov_queue_work(dbs_data, dbs_info->cdbs.policy, delay, modify_all); mutex_unlock(&core_dbs_info->cdbs.timer_mutex); } @@ -135,7 +135,7 @@ static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, if (!dbs_info->enable) return 0; - policy = dbs_info->cdbs.cur_policy; + policy = dbs_info->cdbs.policy; /* * we only care if our internally tracked freq moves outside the 'valid' diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 4ea13f182154..c0566f86caed 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -60,7 +60,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) ignore_nice = cs_tuners->ignore_nice_load; } - policy = cdbs->cur_policy; + policy = cdbs->policy; /* Get Absolute Load */ for_each_cpu(j, policy->cpus) { @@ -211,7 +211,7 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data, /* Will return if we need to evaluate cpu load again or not */ bool need_load_eval(struct cpu_dbs_info *cdbs, unsigned int sampling_rate) { - if (policy_is_shared(cdbs->cur_policy)) { + if (policy_is_shared(cdbs->policy)) { ktime_t time_now = ktime_get(); s64 delta_us = ktime_us_delta(time_now, cdbs->time_stamp); @@ -352,7 +352,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j); unsigned int prev_load; - j_cdbs->cur_policy = policy; + j_cdbs->policy = policy; j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); @@ -409,7 +409,7 @@ static void cpufreq_governor_stop(struct cpufreq_policy *policy, gov_cancel_work(dbs_data, policy); mutex_destroy(&cdbs->timer_mutex); - cdbs->cur_policy = NULL; + cdbs->policy = NULL; } static void cpufreq_governor_limits(struct cpufreq_policy *policy, @@ -419,15 +419,15 @@ static void cpufreq_governor_limits(struct cpufreq_policy *policy, unsigned int cpu = policy->cpu; struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); - if (!cdbs->cur_policy) + if (!cdbs->policy) return; mutex_lock(&cdbs->timer_mutex); - if (policy->max < cdbs->cur_policy->cur) - __cpufreq_driver_target(cdbs->cur_policy, policy->max, + if (policy->max < cdbs->policy->cur) + __cpufreq_driver_target(cdbs->policy, policy->max, CPUFREQ_RELATION_H); - else if (policy->min > cdbs->cur_policy->cur) - __cpufreq_driver_target(cdbs->cur_policy, policy->min, + else if (policy->min > cdbs->policy->cur) + __cpufreq_driver_target(cdbs->policy, policy->min, CPUFREQ_RELATION_L); dbs_check_cpu(dbs_data, cpu); mutex_unlock(&cdbs->timer_mutex); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 6b5e33f68064..a0f8eb79ee6d 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -140,7 +140,7 @@ struct cpu_dbs_info { * wake-up from idle. */ unsigned int prev_load; - struct cpufreq_policy *cur_policy; + struct cpufreq_policy *policy; struct delayed_work dwork; /* * percpu mutex that serializes governor limit change with gov_dbs_timer diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 830aef1db8c3..d29c6f9c6e3e 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -155,7 +155,7 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) static void od_check_cpu(int cpu, unsigned int load) { struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy; + struct cpufreq_policy *policy = dbs_info->cdbs.policy; struct dbs_data *dbs_data = policy->governor_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; @@ -195,10 +195,10 @@ static void od_dbs_timer(struct work_struct *work) { struct od_cpu_dbs_info_s *dbs_info = container_of(work, struct od_cpu_dbs_info_s, cdbs.dwork.work); - unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; + unsigned int cpu = dbs_info->cdbs.policy->cpu; struct od_cpu_dbs_info_s *core_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - struct dbs_data *dbs_data = dbs_info->cdbs.cur_policy->governor_data; + struct dbs_data *dbs_data = dbs_info->cdbs.policy->governor_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; int delay = 0, sample_type = core_dbs_info->sample_type; bool modify_all = true; @@ -213,8 +213,9 @@ static void od_dbs_timer(struct work_struct *work) core_dbs_info->sample_type = OD_NORMAL_SAMPLE; if (sample_type == OD_SUB_SAMPLE) { delay = core_dbs_info->freq_lo_jiffies; - __cpufreq_driver_target(core_dbs_info->cdbs.cur_policy, - core_dbs_info->freq_lo, CPUFREQ_RELATION_H); + __cpufreq_driver_target(core_dbs_info->cdbs.policy, + core_dbs_info->freq_lo, + CPUFREQ_RELATION_H); } else { dbs_check_cpu(dbs_data, cpu); if (core_dbs_info->freq_lo) { @@ -229,7 +230,7 @@ max_delay: delay = delay_for_sampling_rate(od_tuners->sampling_rate * core_dbs_info->rate_mult); - gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy, delay, modify_all); + gov_queue_work(dbs_data, dbs_info->cdbs.policy, delay, modify_all); mutex_unlock(&core_dbs_info->cdbs.timer_mutex); } @@ -289,8 +290,8 @@ static void update_sampling_rate(struct dbs_data *dbs_data, cancel_delayed_work_sync(&dbs_info->cdbs.dwork); mutex_lock(&dbs_info->cdbs.timer_mutex); - gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy, - usecs_to_jiffies(new_rate), true); + gov_queue_work(dbs_data, dbs_info->cdbs.policy, + usecs_to_jiffies(new_rate), true); } mutex_unlock(&dbs_info->cdbs.timer_mutex); @@ -559,7 +560,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias) if (cpumask_test_cpu(cpu, &done)) continue; - policy = per_cpu(od_cpu_dbs_info, cpu).cdbs.cur_policy; + policy = per_cpu(od_cpu_dbs_info, cpu).cdbs.policy; if (!policy) continue; From 44152cb82d1ad6ae6f8b47c5437f6f1e65ca82c4 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Sat, 18 Jul 2015 11:30:59 +0530 Subject: [PATCH 12/53] cpufreq: governor: Keep single copy of information common to policy->cpus Some information is common to all CPUs belonging to a policy, but are kept on per-cpu basis. Lets keep that in another structure common to all policy->cpus. That will make updates/reads to that less complex and less error prone. The memory for cpu_common_dbs_info is allocated/freed at INIT/EXIT, so that it we don't reallocate it for STOP/START sequence. It will be also be used (in next patch) while the governor is stopped and so must not be freed that early. Reviewed-and-tested-by: Preeti U Murthy Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 18 ++--- drivers/cpufreq/cpufreq_governor.c | 92 +++++++++++++++++++------- drivers/cpufreq/cpufreq_governor.h | 24 ++++--- drivers/cpufreq/cpufreq_ondemand.c | 38 ++++++----- 4 files changed, 114 insertions(+), 58 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index af47d322679e..d21c3cff9056 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -47,7 +47,7 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, static void cs_check_cpu(int cpu, unsigned int load) { struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); - struct cpufreq_policy *policy = dbs_info->cdbs.policy; + struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; struct dbs_data *dbs_data = policy->governor_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; @@ -106,22 +106,24 @@ static void cs_dbs_timer(struct work_struct *work) { struct cs_cpu_dbs_info_s *dbs_info = container_of(work, struct cs_cpu_dbs_info_s, cdbs.dwork.work); - unsigned int cpu = dbs_info->cdbs.policy->cpu; + struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; + unsigned int cpu = policy->cpu; struct cs_cpu_dbs_info_s *core_dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); - struct dbs_data *dbs_data = dbs_info->cdbs.policy->governor_data; + struct dbs_data *dbs_data = policy->governor_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; int delay = delay_for_sampling_rate(cs_tuners->sampling_rate); bool modify_all = true; - mutex_lock(&core_dbs_info->cdbs.timer_mutex); - if (!need_load_eval(&core_dbs_info->cdbs, cs_tuners->sampling_rate)) + mutex_lock(&core_dbs_info->cdbs.shared->timer_mutex); + if (!need_load_eval(core_dbs_info->cdbs.shared, + cs_tuners->sampling_rate)) modify_all = false; else dbs_check_cpu(dbs_data, cpu); - gov_queue_work(dbs_data, dbs_info->cdbs.policy, delay, modify_all); - mutex_unlock(&core_dbs_info->cdbs.timer_mutex); + gov_queue_work(dbs_data, policy, delay, modify_all); + mutex_unlock(&core_dbs_info->cdbs.shared->timer_mutex); } static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, @@ -135,7 +137,7 @@ static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, if (!dbs_info->enable) return 0; - policy = dbs_info->cdbs.policy; + policy = dbs_info->cdbs.shared->policy; /* * we only care if our internally tracked freq moves outside the 'valid' diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index c0566f86caed..b01cb729104b 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -35,7 +35,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - struct cpufreq_policy *policy; + struct cpufreq_policy *policy = cdbs->shared->policy; unsigned int sampling_rate; unsigned int max_load = 0; unsigned int ignore_nice; @@ -60,8 +60,6 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) ignore_nice = cs_tuners->ignore_nice_load; } - policy = cdbs->policy; - /* Get Absolute Load */ for_each_cpu(j, policy->cpus) { struct cpu_dbs_info *j_cdbs; @@ -209,17 +207,18 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data, } /* Will return if we need to evaluate cpu load again or not */ -bool need_load_eval(struct cpu_dbs_info *cdbs, unsigned int sampling_rate) +bool need_load_eval(struct cpu_common_dbs_info *shared, + unsigned int sampling_rate) { - if (policy_is_shared(cdbs->policy)) { + if (policy_is_shared(shared->policy)) { ktime_t time_now = ktime_get(); - s64 delta_us = ktime_us_delta(time_now, cdbs->time_stamp); + s64 delta_us = ktime_us_delta(time_now, shared->time_stamp); /* Do nothing if we recently have sampled */ if (delta_us < (s64)(sampling_rate / 2)) return false; else - cdbs->time_stamp = time_now; + shared->time_stamp = time_now; } return true; @@ -238,6 +237,37 @@ static void set_sampling_rate(struct dbs_data *dbs_data, } } +static int alloc_common_dbs_info(struct cpufreq_policy *policy, + struct common_dbs_data *cdata) +{ + struct cpu_common_dbs_info *shared; + int j; + + /* Allocate memory for the common information for policy->cpus */ + shared = kzalloc(sizeof(*shared), GFP_KERNEL); + if (!shared) + return -ENOMEM; + + /* Set shared for all CPUs, online+offline */ + for_each_cpu(j, policy->related_cpus) + cdata->get_cpu_cdbs(j)->shared = shared; + + return 0; +} + +static void free_common_dbs_info(struct cpufreq_policy *policy, + struct common_dbs_data *cdata) +{ + struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); + struct cpu_common_dbs_info *shared = cdbs->shared; + int j; + + for_each_cpu(j, policy->cpus) + cdata->get_cpu_cdbs(j)->shared = NULL; + + kfree(shared); +} + static int cpufreq_governor_init(struct cpufreq_policy *policy, struct dbs_data *dbs_data, struct common_dbs_data *cdata) @@ -248,6 +278,11 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, if (dbs_data) { if (WARN_ON(have_governor_per_policy())) return -EINVAL; + + ret = alloc_common_dbs_info(policy, cdata); + if (ret) + return ret; + dbs_data->usage_count++; policy->governor_data = dbs_data; return 0; @@ -257,12 +292,16 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, if (!dbs_data) return -ENOMEM; + ret = alloc_common_dbs_info(policy, cdata); + if (ret) + goto free_dbs_data; + dbs_data->cdata = cdata; dbs_data->usage_count = 1; ret = cdata->init(dbs_data, !policy->governor->initialized); if (ret) - goto free_dbs_data; + goto free_common_dbs_info; /* policy latency is in ns. Convert it to us first */ latency = policy->cpuinfo.transition_latency / 1000; @@ -299,6 +338,8 @@ put_kobj: } cdata_exit: cdata->exit(dbs_data, !policy->governor->initialized); +free_common_dbs_info: + free_common_dbs_info(policy, cdata); free_dbs_data: kfree(dbs_data); return ret; @@ -322,6 +363,8 @@ static void cpufreq_governor_exit(struct cpufreq_policy *policy, cdata->exit(dbs_data, policy->governor->initialized == 1); kfree(dbs_data); } + + free_common_dbs_info(policy, cdata); } static int cpufreq_governor_start(struct cpufreq_policy *policy, @@ -330,6 +373,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, struct common_dbs_data *cdata = dbs_data->cdata; unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_common_dbs_info *shared = cdbs->shared; int io_busy = 0; if (!policy->cur) @@ -348,11 +392,14 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, io_busy = od_tuners->io_is_busy; } + shared->policy = policy; + shared->time_stamp = ktime_get(); + mutex_init(&shared->timer_mutex); + for_each_cpu(j, policy->cpus) { struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j); unsigned int prev_load; - j_cdbs->policy = policy; j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); @@ -364,7 +411,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - mutex_init(&j_cdbs->timer_mutex); INIT_DEFERRABLE_WORK(&j_cdbs->dwork, cdata->gov_dbs_timer); } @@ -384,9 +430,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, od_ops->powersave_bias_init_cpu(cpu); } - /* Initiate timer time stamp */ - cdbs->time_stamp = ktime_get(); - gov_queue_work(dbs_data, policy, delay_for_sampling_rate(sampling_rate), true); return 0; @@ -398,6 +441,9 @@ static void cpufreq_governor_stop(struct cpufreq_policy *policy, struct common_dbs_data *cdata = dbs_data->cdata; unsigned int cpu = policy->cpu; struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_common_dbs_info *shared = cdbs->shared; + + gov_cancel_work(dbs_data, policy); if (cdata->governor == GOV_CONSERVATIVE) { struct cs_cpu_dbs_info_s *cs_dbs_info = @@ -406,10 +452,8 @@ static void cpufreq_governor_stop(struct cpufreq_policy *policy, cs_dbs_info->enable = 0; } - gov_cancel_work(dbs_data, policy); - - mutex_destroy(&cdbs->timer_mutex); - cdbs->policy = NULL; + shared->policy = NULL; + mutex_destroy(&shared->timer_mutex); } static void cpufreq_governor_limits(struct cpufreq_policy *policy, @@ -419,18 +463,18 @@ static void cpufreq_governor_limits(struct cpufreq_policy *policy, unsigned int cpu = policy->cpu; struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); - if (!cdbs->policy) + if (!cdbs->shared || !cdbs->shared->policy) return; - mutex_lock(&cdbs->timer_mutex); - if (policy->max < cdbs->policy->cur) - __cpufreq_driver_target(cdbs->policy, policy->max, + mutex_lock(&cdbs->shared->timer_mutex); + if (policy->max < cdbs->shared->policy->cur) + __cpufreq_driver_target(cdbs->shared->policy, policy->max, CPUFREQ_RELATION_H); - else if (policy->min > cdbs->policy->cur) - __cpufreq_driver_target(cdbs->policy, policy->min, + else if (policy->min > cdbs->shared->policy->cur) + __cpufreq_driver_target(cdbs->shared->policy, policy->min, CPUFREQ_RELATION_L); dbs_check_cpu(dbs_data, cpu); - mutex_unlock(&cdbs->timer_mutex); + mutex_unlock(&cdbs->shared->timer_mutex); } int cpufreq_governor_dbs(struct cpufreq_policy *policy, diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index a0f8eb79ee6d..8e4a25f0730c 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -128,6 +128,18 @@ static void *get_cpu_dbs_info_s(int cpu) \ * cs_*: Conservative governor */ +/* Common to all CPUs of a policy */ +struct cpu_common_dbs_info { + struct cpufreq_policy *policy; + /* + * percpu mutex that serializes governor limit change with gov_dbs_timer + * invocation. We do not want gov_dbs_timer to run when user is changing + * the governor or limits. + */ + struct mutex timer_mutex; + ktime_t time_stamp; +}; + /* Per cpu structures */ struct cpu_dbs_info { u64 prev_cpu_idle; @@ -140,15 +152,8 @@ struct cpu_dbs_info { * wake-up from idle. */ unsigned int prev_load; - struct cpufreq_policy *policy; struct delayed_work dwork; - /* - * percpu mutex that serializes governor limit change with gov_dbs_timer - * invocation. We do not want gov_dbs_timer to run when user is changing - * the governor or limits. - */ - struct mutex timer_mutex; - ktime_t time_stamp; + struct cpu_common_dbs_info *shared; }; struct od_cpu_dbs_info_s { @@ -264,7 +269,8 @@ static ssize_t show_sampling_rate_min_gov_pol \ extern struct mutex cpufreq_governor_lock; void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); -bool need_load_eval(struct cpu_dbs_info *cdbs, unsigned int sampling_rate); +bool need_load_eval(struct cpu_common_dbs_info *shared, + unsigned int sampling_rate); int cpufreq_governor_dbs(struct cpufreq_policy *policy, struct common_dbs_data *cdata, unsigned int event); void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index d29c6f9c6e3e..14d7e86e7f94 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -155,7 +155,7 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) static void od_check_cpu(int cpu, unsigned int load) { struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - struct cpufreq_policy *policy = dbs_info->cdbs.policy; + struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; struct dbs_data *dbs_data = policy->governor_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; @@ -195,16 +195,18 @@ static void od_dbs_timer(struct work_struct *work) { struct od_cpu_dbs_info_s *dbs_info = container_of(work, struct od_cpu_dbs_info_s, cdbs.dwork.work); - unsigned int cpu = dbs_info->cdbs.policy->cpu; + struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; + unsigned int cpu = policy->cpu; struct od_cpu_dbs_info_s *core_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - struct dbs_data *dbs_data = dbs_info->cdbs.policy->governor_data; + struct dbs_data *dbs_data = policy->governor_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; int delay = 0, sample_type = core_dbs_info->sample_type; bool modify_all = true; - mutex_lock(&core_dbs_info->cdbs.timer_mutex); - if (!need_load_eval(&core_dbs_info->cdbs, od_tuners->sampling_rate)) { + mutex_lock(&core_dbs_info->cdbs.shared->timer_mutex); + if (!need_load_eval(core_dbs_info->cdbs.shared, + od_tuners->sampling_rate)) { modify_all = false; goto max_delay; } @@ -213,8 +215,7 @@ static void od_dbs_timer(struct work_struct *work) core_dbs_info->sample_type = OD_NORMAL_SAMPLE; if (sample_type == OD_SUB_SAMPLE) { delay = core_dbs_info->freq_lo_jiffies; - __cpufreq_driver_target(core_dbs_info->cdbs.policy, - core_dbs_info->freq_lo, + __cpufreq_driver_target(policy, core_dbs_info->freq_lo, CPUFREQ_RELATION_H); } else { dbs_check_cpu(dbs_data, cpu); @@ -230,8 +231,8 @@ max_delay: delay = delay_for_sampling_rate(od_tuners->sampling_rate * core_dbs_info->rate_mult); - gov_queue_work(dbs_data, dbs_info->cdbs.policy, delay, modify_all); - mutex_unlock(&core_dbs_info->cdbs.timer_mutex); + gov_queue_work(dbs_data, policy, delay, modify_all); + mutex_unlock(&core_dbs_info->cdbs.shared->timer_mutex); } /************************** sysfs interface ************************/ @@ -274,10 +275,10 @@ static void update_sampling_rate(struct dbs_data *dbs_data, dbs_info = &per_cpu(od_cpu_dbs_info, cpu); cpufreq_cpu_put(policy); - mutex_lock(&dbs_info->cdbs.timer_mutex); + mutex_lock(&dbs_info->cdbs.shared->timer_mutex); if (!delayed_work_pending(&dbs_info->cdbs.dwork)) { - mutex_unlock(&dbs_info->cdbs.timer_mutex); + mutex_unlock(&dbs_info->cdbs.shared->timer_mutex); continue; } @@ -286,15 +287,15 @@ static void update_sampling_rate(struct dbs_data *dbs_data, if (time_before(next_sampling, appointed_at)) { - mutex_unlock(&dbs_info->cdbs.timer_mutex); + mutex_unlock(&dbs_info->cdbs.shared->timer_mutex); cancel_delayed_work_sync(&dbs_info->cdbs.dwork); - mutex_lock(&dbs_info->cdbs.timer_mutex); + mutex_lock(&dbs_info->cdbs.shared->timer_mutex); - gov_queue_work(dbs_data, dbs_info->cdbs.policy, + gov_queue_work(dbs_data, policy, usecs_to_jiffies(new_rate), true); } - mutex_unlock(&dbs_info->cdbs.timer_mutex); + mutex_unlock(&dbs_info->cdbs.shared->timer_mutex); } } @@ -557,13 +558,16 @@ static void od_set_powersave_bias(unsigned int powersave_bias) get_online_cpus(); for_each_online_cpu(cpu) { + struct cpu_common_dbs_info *shared; + if (cpumask_test_cpu(cpu, &done)) continue; - policy = per_cpu(od_cpu_dbs_info, cpu).cdbs.policy; - if (!policy) + shared = per_cpu(od_cpu_dbs_info, cpu).cdbs.shared; + if (!shared) continue; + policy = shared->policy; cpumask_or(&done, &done, policy->cpus); if (policy->governor != &cpufreq_gov_ondemand) From 43e0ee361e96229959c2ce1eda1ad9d6b3c191b2 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Sat, 18 Jul 2015 11:31:00 +0530 Subject: [PATCH 13/53] cpufreq: governor: split out common part of {cs|od}_dbs_timer() Some part of cs_dbs_timer() and od_dbs_timer() is exactly same and is unnecessarily duplicated. Create the real work-handler in cpufreq_governor.c and put the common code in this routine (dbs_timer()). Shouldn't make any functional change. Reviewed-and-tested-by: Preeti U Murthy Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 23 ++++------------ drivers/cpufreq/cpufreq_governor.c | 38 +++++++++++++++++++++++--- drivers/cpufreq/cpufreq_governor.h | 10 +++---- drivers/cpufreq/cpufreq_ondemand.c | 36 ++++++++++-------------- 4 files changed, 58 insertions(+), 49 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index d21c3cff9056..84a1506950a7 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -102,28 +102,15 @@ static void cs_check_cpu(int cpu, unsigned int load) } } -static void cs_dbs_timer(struct work_struct *work) +static unsigned int cs_dbs_timer(struct cpu_dbs_info *cdbs, + struct dbs_data *dbs_data, bool modify_all) { - struct cs_cpu_dbs_info_s *dbs_info = container_of(work, - struct cs_cpu_dbs_info_s, cdbs.dwork.work); - struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; - unsigned int cpu = policy->cpu; - struct cs_cpu_dbs_info_s *core_dbs_info = &per_cpu(cs_cpu_dbs_info, - cpu); - struct dbs_data *dbs_data = policy->governor_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - int delay = delay_for_sampling_rate(cs_tuners->sampling_rate); - bool modify_all = true; - mutex_lock(&core_dbs_info->cdbs.shared->timer_mutex); - if (!need_load_eval(core_dbs_info->cdbs.shared, - cs_tuners->sampling_rate)) - modify_all = false; - else - dbs_check_cpu(dbs_data, cpu); + if (modify_all) + dbs_check_cpu(dbs_data, cdbs->shared->policy->cpu); - gov_queue_work(dbs_data, policy, delay, modify_all); - mutex_unlock(&core_dbs_info->cdbs.shared->timer_mutex); + return delay_for_sampling_rate(cs_tuners->sampling_rate); } static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index b01cb729104b..7ed0ec2ac853 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -207,8 +207,8 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data, } /* Will return if we need to evaluate cpu load again or not */ -bool need_load_eval(struct cpu_common_dbs_info *shared, - unsigned int sampling_rate) +static bool need_load_eval(struct cpu_common_dbs_info *shared, + unsigned int sampling_rate) { if (policy_is_shared(shared->policy)) { ktime_t time_now = ktime_get(); @@ -223,7 +223,37 @@ bool need_load_eval(struct cpu_common_dbs_info *shared, return true; } -EXPORT_SYMBOL_GPL(need_load_eval); + +static void dbs_timer(struct work_struct *work) +{ + struct cpu_dbs_info *cdbs = container_of(work, struct cpu_dbs_info, + dwork.work); + struct cpu_common_dbs_info *shared = cdbs->shared; + struct cpufreq_policy *policy = shared->policy; + struct dbs_data *dbs_data = policy->governor_data; + unsigned int sampling_rate, delay; + bool modify_all = true; + + mutex_lock(&shared->timer_mutex); + + if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + + sampling_rate = cs_tuners->sampling_rate; + } else { + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + + sampling_rate = od_tuners->sampling_rate; + } + + if (!need_load_eval(cdbs->shared, sampling_rate)) + modify_all = false; + + delay = dbs_data->cdata->gov_dbs_timer(cdbs, dbs_data, modify_all); + gov_queue_work(dbs_data, policy, delay, modify_all); + + mutex_unlock(&shared->timer_mutex); +} static void set_sampling_rate(struct dbs_data *dbs_data, unsigned int sampling_rate) @@ -411,7 +441,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - INIT_DEFERRABLE_WORK(&j_cdbs->dwork, cdata->gov_dbs_timer); + INIT_DEFERRABLE_WORK(&j_cdbs->dwork, dbs_timer); } if (cdata->governor == GOV_CONSERVATIVE) { diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 8e4a25f0730c..50f171796632 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -132,8 +132,8 @@ static void *get_cpu_dbs_info_s(int cpu) \ struct cpu_common_dbs_info { struct cpufreq_policy *policy; /* - * percpu mutex that serializes governor limit change with gov_dbs_timer - * invocation. We do not want gov_dbs_timer to run when user is changing + * percpu mutex that serializes governor limit change with dbs_timer + * invocation. We do not want dbs_timer to run when user is changing * the governor or limits. */ struct mutex timer_mutex; @@ -210,7 +210,9 @@ struct common_dbs_data { struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); void *(*get_cpu_dbs_info_s)(int cpu); - void (*gov_dbs_timer)(struct work_struct *work); + unsigned int (*gov_dbs_timer)(struct cpu_dbs_info *cdbs, + struct dbs_data *dbs_data, + bool modify_all); void (*gov_check_cpu)(int cpu, unsigned int load); int (*init)(struct dbs_data *dbs_data, bool notify); void (*exit)(struct dbs_data *dbs_data, bool notify); @@ -269,8 +271,6 @@ static ssize_t show_sampling_rate_min_gov_pol \ extern struct mutex cpufreq_governor_lock; void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); -bool need_load_eval(struct cpu_common_dbs_info *shared, - unsigned int sampling_rate); int cpufreq_governor_dbs(struct cpufreq_policy *policy, struct common_dbs_data *cdata, unsigned int event); void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 14d7e86e7f94..1fa9088c84a8 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -191,48 +191,40 @@ static void od_check_cpu(int cpu, unsigned int load) } } -static void od_dbs_timer(struct work_struct *work) +static unsigned int od_dbs_timer(struct cpu_dbs_info *cdbs, + struct dbs_data *dbs_data, bool modify_all) { - struct od_cpu_dbs_info_s *dbs_info = - container_of(work, struct od_cpu_dbs_info_s, cdbs.dwork.work); - struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; + struct cpufreq_policy *policy = cdbs->shared->policy; unsigned int cpu = policy->cpu; - struct od_cpu_dbs_info_s *core_dbs_info = &per_cpu(od_cpu_dbs_info, + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - struct dbs_data *dbs_data = policy->governor_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; - int delay = 0, sample_type = core_dbs_info->sample_type; - bool modify_all = true; + int delay = 0, sample_type = dbs_info->sample_type; - mutex_lock(&core_dbs_info->cdbs.shared->timer_mutex); - if (!need_load_eval(core_dbs_info->cdbs.shared, - od_tuners->sampling_rate)) { - modify_all = false; + if (!modify_all) goto max_delay; - } /* Common NORMAL_SAMPLE setup */ - core_dbs_info->sample_type = OD_NORMAL_SAMPLE; + dbs_info->sample_type = OD_NORMAL_SAMPLE; if (sample_type == OD_SUB_SAMPLE) { - delay = core_dbs_info->freq_lo_jiffies; - __cpufreq_driver_target(policy, core_dbs_info->freq_lo, + delay = dbs_info->freq_lo_jiffies; + __cpufreq_driver_target(policy, dbs_info->freq_lo, CPUFREQ_RELATION_H); } else { dbs_check_cpu(dbs_data, cpu); - if (core_dbs_info->freq_lo) { + if (dbs_info->freq_lo) { /* Setup timer for SUB_SAMPLE */ - core_dbs_info->sample_type = OD_SUB_SAMPLE; - delay = core_dbs_info->freq_hi_jiffies; + dbs_info->sample_type = OD_SUB_SAMPLE; + delay = dbs_info->freq_hi_jiffies; } } max_delay: if (!delay) delay = delay_for_sampling_rate(od_tuners->sampling_rate - * core_dbs_info->rate_mult); + * dbs_info->rate_mult); - gov_queue_work(dbs_data, policy, delay, modify_all); - mutex_unlock(&core_dbs_info->cdbs.shared->timer_mutex); + return delay; } /************************** sysfs interface ************************/ From a72c49590a1f9e5d26a71c3f807dbb8958c93513 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Sat, 18 Jul 2015 11:31:01 +0530 Subject: [PATCH 14/53] cpufreq: governor: Avoid invalid states with additional checks There can be races where the request has come to a wrong state. For example INIT followed by STOP (instead of START) or START followed by EXIT (instead of STOP). Address these races by making sure the state-machine never gets into any invalid state. Also return an error if an invalid state-transition is requested. Reviewed-and-tested-by: Preeti U Murthy Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 46 +++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 7ed0ec2ac853..f225dc975415 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -305,6 +305,10 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, unsigned int latency; int ret; + /* State should be equivalent to EXIT */ + if (policy->governor_data) + return -EBUSY; + if (dbs_data) { if (WARN_ON(have_governor_per_policy())) return -EINVAL; @@ -375,10 +379,15 @@ free_dbs_data: return ret; } -static void cpufreq_governor_exit(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_exit(struct cpufreq_policy *policy, + struct dbs_data *dbs_data) { struct common_dbs_data *cdata = dbs_data->cdata; + struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); + + /* State should be equivalent to INIT */ + if (!cdbs->shared || cdbs->shared->policy) + return -EBUSY; policy->governor_data = NULL; if (!--dbs_data->usage_count) { @@ -395,6 +404,7 @@ static void cpufreq_governor_exit(struct cpufreq_policy *policy, } free_common_dbs_info(policy, cdata); + return 0; } static int cpufreq_governor_start(struct cpufreq_policy *policy, @@ -409,6 +419,10 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, if (!policy->cur) return -EINVAL; + /* State should be equivalent to INIT */ + if (!shared || shared->policy) + return -EBUSY; + if (cdata->governor == GOV_CONSERVATIVE) { struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; @@ -465,14 +479,18 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, return 0; } -static void cpufreq_governor_stop(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_stop(struct cpufreq_policy *policy, + struct dbs_data *dbs_data) { struct common_dbs_data *cdata = dbs_data->cdata; unsigned int cpu = policy->cpu; struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); struct cpu_common_dbs_info *shared = cdbs->shared; + /* State should be equivalent to START */ + if (!shared || !shared->policy) + return -EBUSY; + gov_cancel_work(dbs_data, policy); if (cdata->governor == GOV_CONSERVATIVE) { @@ -484,17 +502,19 @@ static void cpufreq_governor_stop(struct cpufreq_policy *policy, shared->policy = NULL; mutex_destroy(&shared->timer_mutex); + return 0; } -static void cpufreq_governor_limits(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_limits(struct cpufreq_policy *policy, + struct dbs_data *dbs_data) { struct common_dbs_data *cdata = dbs_data->cdata; unsigned int cpu = policy->cpu; struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); + /* State should be equivalent to START */ if (!cdbs->shared || !cdbs->shared->policy) - return; + return -EBUSY; mutex_lock(&cdbs->shared->timer_mutex); if (policy->max < cdbs->shared->policy->cur) @@ -505,13 +525,15 @@ static void cpufreq_governor_limits(struct cpufreq_policy *policy, CPUFREQ_RELATION_L); dbs_check_cpu(dbs_data, cpu); mutex_unlock(&cdbs->shared->timer_mutex); + + return 0; } int cpufreq_governor_dbs(struct cpufreq_policy *policy, struct common_dbs_data *cdata, unsigned int event) { struct dbs_data *dbs_data; - int ret = 0; + int ret; /* Lock governor to block concurrent initialization of governor */ mutex_lock(&cdata->mutex); @@ -531,17 +553,19 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, ret = cpufreq_governor_init(policy, dbs_data, cdata); break; case CPUFREQ_GOV_POLICY_EXIT: - cpufreq_governor_exit(policy, dbs_data); + ret = cpufreq_governor_exit(policy, dbs_data); break; case CPUFREQ_GOV_START: ret = cpufreq_governor_start(policy, dbs_data); break; case CPUFREQ_GOV_STOP: - cpufreq_governor_stop(policy, dbs_data); + ret = cpufreq_governor_stop(policy, dbs_data); break; case CPUFREQ_GOV_LIMITS: - cpufreq_governor_limits(policy, dbs_data); + ret = cpufreq_governor_limits(policy, dbs_data); break; + default: + ret = -EINVAL; } unlock: From 871ef3b53a2f4dd9be348c07b77df3c4bd74a37f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Sat, 18 Jul 2015 11:31:02 +0530 Subject: [PATCH 15/53] cpufreq: governor: Don't WARN on invalid states With previous commit, governors have started to return errors on invalid state-transition requests. We already have a WARN for an invalid state-transition request in cpufreq_governor_dbs(). This does trigger today, as the sequence of events isn't guaranteed by cpufreq core. Lets stop warning on that for now, and make sure we don't enter an invalid state. Reviewed-and-tested-by: Preeti U Murthy Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index f225dc975415..939197ffa4ac 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -543,7 +543,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, else dbs_data = cdata->gdbs_data; - if (WARN_ON(!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT))) { + if (!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)) { ret = -EINVAL; goto unlock; } From 4bc384ae6299d3f3a948efabda2a423e2a293ee0 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Sat, 18 Jul 2015 11:31:03 +0530 Subject: [PATCH 16/53] cpufreq: propagate errors returned from __cpufreq_governor() Return codes aren't honored properly in cpufreq_set_policy(). This can lead to two problems: - wrong errors propagated to sysfs - we try to do next state-change even if the previous one failed cpufreq_governor_dbs() now returns proper errors on all invalid state-transition requests and this code should honor that. Reviewed-and-tested-by: Preeti U Murthy Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a7b6ac6e048e..a3e8fb61cbcc 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2295,16 +2295,31 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, old_gov = policy->governor; /* end old governor */ if (old_gov) { - __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + if (ret) { + /* This can happen due to race with other operations */ + pr_debug("%s: Failed to Stop Governor: %s (%d)\n", + __func__, old_gov->name, ret); + return ret; + } + up_write(&policy->rwsem); - __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); down_write(&policy->rwsem); + + if (ret) { + pr_err("%s: Failed to Exit Governor: %s (%d)\n", + __func__, old_gov->name, ret); + return ret; + } } /* start new governor */ policy->governor = new_policy->governor; - if (!__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) { - if (!__cpufreq_governor(policy, CPUFREQ_GOV_START)) + ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); + if (!ret) { + ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + if (!ret) goto out; up_write(&policy->rwsem); @@ -2316,11 +2331,13 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, pr_debug("starting governor %s failed\n", policy->governor->name); if (old_gov) { policy->governor = old_gov; - __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); - __cpufreq_governor(policy, CPUFREQ_GOV_START); + if (__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) + policy->governor = NULL; + else + __cpufreq_governor(policy, CPUFREQ_GOV_START); } - return -EINVAL; + return ret; out: pr_debug("governor: change or update limits\n"); From b2f8dc4ce6626e25b164e29cf72b70230a1f1711 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 22 Jul 2015 22:11:16 +0200 Subject: [PATCH 17/53] ACPI / processor: Drop an unused argument of a cleanup routine acpi_processor_unregister_performance() actually doesn't use its first argument, so drop it and update the callers accordingly. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/acpi/processor_perflib.c | 4 +--- drivers/cpufreq/acpi-cpufreq.c | 5 ++--- drivers/cpufreq/e_powersaver.c | 2 +- drivers/cpufreq/ia64-acpi-cpufreq.c | 5 ++--- drivers/cpufreq/powernow-k7.c | 4 ++-- drivers/cpufreq/powernow-k8.c | 5 ++--- drivers/xen/xen-acpi-processor.c | 16 ++++++---------- include/acpi/processor.h | 5 +---- 8 files changed, 17 insertions(+), 29 deletions(-) diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index cfc8aba72f86..36b6da2918a6 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -784,9 +784,7 @@ acpi_processor_register_performance(struct acpi_processor_performance EXPORT_SYMBOL(acpi_processor_register_performance); -void -acpi_processor_unregister_performance(struct acpi_processor_performance - *performance, unsigned int cpu) +void acpi_processor_unregister_performance(unsigned int cpu) { struct acpi_processor *pr; diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index de54ce14eb39..ca9d0f6edf08 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -844,7 +844,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) err_freqfree: kfree(data->freq_table); err_unreg: - acpi_processor_unregister_performance(perf, cpu); + acpi_processor_unregister_performance(cpu); err_free_mask: free_cpumask_var(data->freqdomain_cpus); err_free: @@ -862,8 +862,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) if (data) { policy->driver_data = NULL; - acpi_processor_unregister_performance(data->acpi_data, - data->acpi_perf_cpu); + acpi_processor_unregister_performance(data->acpi_perf_cpu); free_cpumask_var(data->freqdomain_cpus); kfree(data->freq_table); kfree(data); diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c index a0d2a423cea9..4085244c8a67 100644 --- a/drivers/cpufreq/e_powersaver.c +++ b/drivers/cpufreq/e_powersaver.c @@ -78,7 +78,7 @@ static int eps_acpi_init(void) static int eps_acpi_exit(struct cpufreq_policy *policy) { if (eps_acpi_cpu_perf) { - acpi_processor_unregister_performance(eps_acpi_cpu_perf, 0); + acpi_processor_unregister_performance(0); free_cpumask_var(eps_acpi_cpu_perf->shared_cpu_map); kfree(eps_acpi_cpu_perf); eps_acpi_cpu_perf = NULL; diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c index c30aaa6a54e8..a9c193286ef4 100644 --- a/drivers/cpufreq/ia64-acpi-cpufreq.c +++ b/drivers/cpufreq/ia64-acpi-cpufreq.c @@ -313,7 +313,7 @@ acpi_cpufreq_cpu_init ( err_freqfree: kfree(data->freq_table); err_unreg: - acpi_processor_unregister_performance(&data->acpi_data, cpu); + acpi_processor_unregister_performance(cpu); err_free: kfree(data); acpi_io_data[cpu] = NULL; @@ -332,8 +332,7 @@ acpi_cpufreq_cpu_exit ( if (data) { acpi_io_data[policy->cpu] = NULL; - acpi_processor_unregister_performance(&data->acpi_data, - policy->cpu); + acpi_processor_unregister_performance(policy->cpu); kfree(data); } diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c index 37c5742482d8..c1ae1999770a 100644 --- a/drivers/cpufreq/powernow-k7.c +++ b/drivers/cpufreq/powernow-k7.c @@ -421,7 +421,7 @@ static int powernow_acpi_init(void) return 0; err2: - acpi_processor_unregister_performance(acpi_processor_perf, 0); + acpi_processor_unregister_performance(0); err1: free_cpumask_var(acpi_processor_perf->shared_cpu_map); err05: @@ -661,7 +661,7 @@ static int powernow_cpu_exit(struct cpufreq_policy *policy) { #ifdef CONFIG_X86_POWERNOW_K7_ACPI if (acpi_processor_perf) { - acpi_processor_unregister_performance(acpi_processor_perf, 0); + acpi_processor_unregister_performance(0); free_cpumask_var(acpi_processor_perf->shared_cpu_map); kfree(acpi_processor_perf); } diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 5c035d04d827..0b5bf135b090 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -795,7 +795,7 @@ err_out_mem: kfree(powernow_table); err_out: - acpi_processor_unregister_performance(&data->acpi_data, data->cpu); + acpi_processor_unregister_performance(data->cpu); /* data->acpi_data.state_count informs us at ->exit() * whether ACPI was used */ @@ -863,8 +863,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { if (data->acpi_data.state_count) - acpi_processor_unregister_performance(&data->acpi_data, - data->cpu); + acpi_processor_unregister_performance(data->cpu); free_cpumask_var(data->acpi_data.shared_cpu_map); } diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index 59fc190f1e92..70fa438000af 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -560,11 +560,9 @@ static int __init xen_acpi_processor_init(void) return 0; err_unregister: - for_each_possible_cpu(i) { - struct acpi_processor_performance *perf; - perf = per_cpu_ptr(acpi_perf_data, i); - acpi_processor_unregister_performance(perf, i); - } + for_each_possible_cpu(i) + acpi_processor_unregister_performance(i); + err_out: /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ free_acpi_perf_data(); @@ -579,11 +577,9 @@ static void __exit xen_acpi_processor_exit(void) kfree(acpi_ids_done); kfree(acpi_id_present); kfree(acpi_id_cst_present); - for_each_possible_cpu(i) { - struct acpi_processor_performance *perf; - perf = per_cpu_ptr(acpi_perf_data, i); - acpi_processor_unregister_performance(perf, i); - } + for_each_possible_cpu(i) + acpi_processor_unregister_performance(i); + free_acpi_perf_data(); } diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 4188a4d3b597..aad1f2a3cd2b 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -228,10 +228,7 @@ extern int acpi_processor_preregister_performance(struct extern int acpi_processor_register_performance(struct acpi_processor_performance *performance, unsigned int cpu); -extern void acpi_processor_unregister_performance(struct - acpi_processor_performance - *performance, - unsigned int cpu); +extern void acpi_processor_unregister_performance(unsigned int cpu); /* note: this locks both the calling module and the processor module if a _PPC object exists, rmmod is disallowed then */ From 3427616b2a5aa7f34716278aa6a140378a00a36e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 22 Jul 2015 22:11:56 +0200 Subject: [PATCH 18/53] cpufreq: acpi-cpufreq: Drop acpi_data from struct acpi_cpufreq_data After commit 8cfcfd39000d (acpi-cpufreq: Fix an ACPI perf unregister issue) we store both a pointer to per-CPU data of the first policy CPU and the number of that CPU which are redundant. Since the CPU number has to be stored anyway for the unregistration, the pointer to the CPU's per-CPU data may be dropped and we can access the data in question via per_cpu_ptr(). Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/acpi-cpufreq.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index ca9d0f6edf08..af2bb8883d96 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -65,7 +65,6 @@ enum { #define MSR_K7_HWCR_CPB_DIS (1ULL << 25) struct acpi_cpufreq_data { - struct acpi_processor_performance *acpi_data; struct cpufreq_frequency_table *freq_table; unsigned int resume; unsigned int cpu_feature; @@ -76,6 +75,11 @@ struct acpi_cpufreq_data { /* acpi_perf_data is a pointer to percpu data. */ static struct acpi_processor_performance __percpu *acpi_perf_data; +static inline struct acpi_processor_performance *to_perf_data(struct acpi_cpufreq_data *data) +{ + return per_cpu_ptr(acpi_perf_data, data->acpi_perf_cpu); +} + static struct cpufreq_driver acpi_cpufreq_driver; static unsigned int acpi_pstate_strict; @@ -201,7 +205,7 @@ static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) struct acpi_processor_performance *perf; int i; - perf = data->acpi_data; + perf = to_perf_data(data); for (i = 0; i < perf->state_count; i++) { if (value == perf->states[i].status) @@ -220,7 +224,7 @@ static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) else msr &= INTEL_MSR_RANGE; - perf = data->acpi_data; + perf = to_perf_data(data); cpufreq_for_each_entry(pos, data->freq_table) if (msr == perf->states[pos->driver_data].status) @@ -346,7 +350,7 @@ get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data) break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; - perf = data->acpi_data; + perf = to_perf_data(data); cmd.addr.io.port = perf->control_register.address; cmd.addr.io.bit_width = perf->control_register.bit_width; break; @@ -377,10 +381,10 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) data = policy->driver_data; cpufreq_cpu_put(policy); - if (unlikely(!data || !data->acpi_data || !data->freq_table)) + if (unlikely(!data || !data->freq_table)) return 0; - cached_freq = data->freq_table[data->acpi_data->state].frequency; + cached_freq = data->freq_table[to_perf_data(data)->state].frequency; freq = extract_freq(get_cur_val(cpumask_of(cpu), data), data); if (freq != cached_freq) { /* @@ -419,12 +423,11 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, unsigned int next_perf_state = 0; /* Index into perf table */ int result = 0; - if (unlikely(data == NULL || - data->acpi_data == NULL || data->freq_table == NULL)) { + if (unlikely(data == NULL || data->freq_table == NULL)) { return -ENODEV; } - perf = data->acpi_data; + perf = to_perf_data(data); next_perf_state = data->freq_table[index].driver_data; if (perf->state == next_perf_state) { if (unlikely(data->resume)) { @@ -487,8 +490,9 @@ out: static unsigned long acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) { - struct acpi_processor_performance *perf = data->acpi_data; + struct acpi_processor_performance *perf; + perf = to_perf_data(data); if (cpu_khz) { /* search the closest match to cpu_khz */ unsigned int i; @@ -677,18 +681,17 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) goto err_free; } - data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); + perf = per_cpu_ptr(acpi_perf_data, cpu); data->acpi_perf_cpu = cpu; policy->driver_data = data; if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; - result = acpi_processor_register_performance(data->acpi_data, cpu); + result = acpi_processor_register_performance(perf, cpu); if (result) goto err_free_mask; - perf = data->acpi_data; policy->shared_type = perf->shared_type; /* From f56c50e322eed07526a08edefa29fc8bab1e93df Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 22 Jul 2015 22:12:10 +0200 Subject: [PATCH 19/53] cpufreq: acpi-cpufreq: Fix up the handling of cpb sysfs attribute The cpb sysfs attribute is only exposed by the ACPI cpufreq driver after a runtime check. For this purpose, the driver keeps a NULL placeholder in its table of sysfs attributes and replaces the NULL with a pointer to an attribute structure if it decides to expose cpb. That is confusing, so make the driver set the pointer to the cpb attribute structure upfront and replace it with NULL if the attribute should not be exposed instead. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/acpi-cpufreq.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index af2bb8883d96..15b921a9248c 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -888,7 +888,9 @@ static int acpi_cpufreq_resume(struct cpufreq_policy *policy) static struct freq_attr *acpi_cpufreq_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, &freqdomain_cpus, - NULL, /* this is a placeholder for cpb, do not remove */ +#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB + &cpb, +#endif NULL, }; @@ -961,17 +963,16 @@ static int __init acpi_cpufreq_init(void) * only if configured. This is considered legacy code, which * will probably be removed at some point in the future. */ - if (check_amd_hwpstate_cpu(0)) { - struct freq_attr **iter; + if (!check_amd_hwpstate_cpu(0)) { + struct freq_attr **attr; - pr_debug("adding sysfs entry for cpb\n"); + pr_debug("CPB unsupported, do not expose it\n"); - for (iter = acpi_cpufreq_attr; *iter != NULL; iter++) - ; - - /* make sure there is a terminator behind it */ - if (iter[1] == NULL) - *iter = &cpb; + for (attr = acpi_cpufreq_attr; *attr; attr++) + if (*attr == &cpb) { + *attr = NULL; + break; + } } #endif acpi_cpufreq_boost_init(); From 946c14f812bfff18e6fd6357d06b6e8fa8793fec Mon Sep 17 00:00:00 2001 From: Pan Xinhui Date: Mon, 20 Jul 2015 14:22:46 +0800 Subject: [PATCH 20/53] cpufreq: ia64: remove redundant freq_table of acpi_cpufreq_data freq_table is now stored as policy->freq_table, so drop the redundant freq_table from struct cpufreq_acpi_io. Signed-off-by: Pan Xinhui Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/ia64-acpi-cpufreq.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c index a9c193286ef4..cab8ab675bbc 100644 --- a/drivers/cpufreq/ia64-acpi-cpufreq.c +++ b/drivers/cpufreq/ia64-acpi-cpufreq.c @@ -29,7 +29,6 @@ MODULE_LICENSE("GPL"); struct cpufreq_acpi_io { struct acpi_processor_performance acpi_data; - struct cpufreq_frequency_table *freq_table; unsigned int resume; }; @@ -221,6 +220,7 @@ acpi_cpufreq_cpu_init ( unsigned int cpu = policy->cpu; struct cpufreq_acpi_io *data; unsigned int result = 0; + struct cpufreq_frequency_table *freq_table; pr_debug("acpi_cpufreq_cpu_init\n"); @@ -254,10 +254,10 @@ acpi_cpufreq_cpu_init ( } /* alloc freq_table */ - data->freq_table = kzalloc(sizeof(*data->freq_table) * + freq_table = kzalloc(sizeof(*freq_table) * (data->acpi_data.state_count + 1), GFP_KERNEL); - if (!data->freq_table) { + if (!freq_table) { result = -ENOMEM; goto err_unreg; } @@ -276,14 +276,14 @@ acpi_cpufreq_cpu_init ( for (i = 0; i <= data->acpi_data.state_count; i++) { if (i < data->acpi_data.state_count) { - data->freq_table[i].frequency = + freq_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000; } else { - data->freq_table[i].frequency = CPUFREQ_TABLE_END; + freq_table[i].frequency = CPUFREQ_TABLE_END; } } - result = cpufreq_table_validate_and_show(policy, data->freq_table); + result = cpufreq_table_validate_and_show(policy, freq_table); if (result) { goto err_freqfree; } @@ -311,7 +311,7 @@ acpi_cpufreq_cpu_init ( return (result); err_freqfree: - kfree(data->freq_table); + kfree(freq_table); err_unreg: acpi_processor_unregister_performance(cpu); err_free: From 555f3fe957b5bd763d49719cc68c6435c9c8dcf1 Mon Sep 17 00:00:00 2001 From: Pan Xinhui Date: Mon, 20 Jul 2015 14:24:36 +0800 Subject: [PATCH 21/53] cpufreq: ia64: Fix a memory leak in acpi_cpufreq_cpu_exit() freq_table should be alloced in ->init and freed in ->exit, but it it is not freed. Fix this memory leak in acpi_cpufreq_cpu_exit(). Signed-off-by: Pan Xinhui Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/ia64-acpi-cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c index cab8ab675bbc..0202429f1c5b 100644 --- a/drivers/cpufreq/ia64-acpi-cpufreq.c +++ b/drivers/cpufreq/ia64-acpi-cpufreq.c @@ -333,6 +333,7 @@ acpi_cpufreq_cpu_exit ( if (data) { acpi_io_data[policy->cpu] = NULL; acpi_processor_unregister_performance(policy->cpu); + kfree(policy->freq_table); kfree(data); } From 454d3a2500a4eb33be85dde3bfba9e5f6b5efadc Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 22 Jul 2015 17:59:11 +0200 Subject: [PATCH 22/53] cpufreq: Remove cpufreq_rwsem cpufreq_rwsem was introduced in commit 6eed9404ab3c4 ("cpufreq: Use rwsem for protecting critical sections) in order to replace try_module_get() on the cpu-freq driver. That try_module_get() worked well until the refcount was so heavily used that module removal became more or less impossible. Though when looking at the various (undocumented) protection mechanisms in that code, the randomly sprinkeled around cpufreq_rwsem locking sites are superfluous. The policy, which is acquired in cpufreq_cpu_get() and released in cpufreq_cpu_put() is sufficiently protected already. cpufreq_cpu_get(cpu) /* Protects against concurrent driver removal */ read_lock_irqsave(&cpufreq_driver_lock, flags); policy = per_cpu(cpufreq_cpu_data, cpu); kobject_get(&policy->kobj); read_unlock_irqrestore(&cpufreq_driver_lock, flags); The reference on the policy serializes versus module unload already: cpufreq_unregister_driver() subsys_interface_unregister() __cpufreq_remove_dev_finish() per_cpu(cpufreq_cpu_data) = NULL; cpufreq_policy_put_kobj() If there is a reference held on the policy, i.e. obtained prior to the unregister call, then cpufreq_policy_put_kobj() will wait until that reference is dropped. So once subsys_interface_unregister() returns there is no policy pointer in flight and no new reference can be obtained. So that rwsem protection is useless. The other usage of cpufreq_rwsem in show()/store() of the sysfs interface is redundant as well because sysfs already does the proper kobject_get()/put() pairs. That leaves CPU hotplug versus module removal. The current down_write() around the write_lock() in cpufreq_unregister_driver() is silly at best as it protects actually nothing. The trivial solution to this is to prevent hotplug across cpufreq_unregister_driver completely. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 41 +++------------------------------------ 1 file changed, 3 insertions(+), 38 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a3e8fb61cbcc..febda462681c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -112,12 +112,6 @@ static inline bool has_target(void) return cpufreq_driver->target_index || cpufreq_driver->target; } -/* - * rwsem to guarantee that cpufreq driver module doesn't unload during critical - * sections - */ -static DECLARE_RWSEM(cpufreq_rwsem); - /* internal prototypes */ static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); @@ -277,10 +271,6 @@ EXPORT_SYMBOL_GPL(cpufreq_generic_get); * If corresponding call cpufreq_cpu_put() isn't made, the policy wouldn't be * freed as that depends on the kobj count. * - * It also takes a read-lock of 'cpufreq_rwsem' and doesn't put it back if a - * valid policy is found. This is done to make sure the driver doesn't get - * unregistered while the policy is being used. - * * Return: A valid policy on success, otherwise NULL on failure. */ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) @@ -291,9 +281,6 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) if (WARN_ON(cpu >= nr_cpu_ids)) return NULL; - if (!down_read_trylock(&cpufreq_rwsem)) - return NULL; - /* get the cpufreq driver */ read_lock_irqsave(&cpufreq_driver_lock, flags); @@ -306,9 +293,6 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) read_unlock_irqrestore(&cpufreq_driver_lock, flags); - if (!policy) - up_read(&cpufreq_rwsem); - return policy; } EXPORT_SYMBOL_GPL(cpufreq_cpu_get); @@ -320,13 +304,10 @@ EXPORT_SYMBOL_GPL(cpufreq_cpu_get); * * This decrements the kobject reference count incremented earlier by calling * cpufreq_cpu_get(). - * - * It also drops the read-lock of 'cpufreq_rwsem' taken at cpufreq_cpu_get(). */ void cpufreq_cpu_put(struct cpufreq_policy *policy) { kobject_put(&policy->kobj); - up_read(&cpufreq_rwsem); } EXPORT_SYMBOL_GPL(cpufreq_cpu_put); @@ -851,9 +832,6 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) struct freq_attr *fattr = to_attr(attr); ssize_t ret; - if (!down_read_trylock(&cpufreq_rwsem)) - return -EINVAL; - down_read(&policy->rwsem); if (fattr->show) @@ -862,7 +840,6 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) ret = -EIO; up_read(&policy->rwsem); - up_read(&cpufreq_rwsem); return ret; } @@ -879,9 +856,6 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, if (!cpu_online(policy->cpu)) goto unlock; - if (!down_read_trylock(&cpufreq_rwsem)) - goto unlock; - down_write(&policy->rwsem); /* Updating inactive policies is invalid, so avoid doing that. */ @@ -897,8 +871,6 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, unlock_policy_rwsem: up_write(&policy->rwsem); - - up_read(&cpufreq_rwsem); unlock: put_online_cpus(); @@ -1261,15 +1233,11 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) if (cpu_is_offline(cpu)) return add_cpu_dev_symlink(per_cpu(cpufreq_cpu_data, cpu), cpu); - if (!down_read_trylock(&cpufreq_rwsem)) - return 0; - /* Check if this CPU already has a policy to manage it */ policy = per_cpu(cpufreq_cpu_data, cpu); if (policy && !policy_is_inactive(policy)) { WARN_ON(!cpumask_test_cpu(cpu, policy->related_cpus)); ret = cpufreq_add_policy_cpu(policy, cpu, dev); - up_read(&cpufreq_rwsem); return ret; } @@ -1395,8 +1363,6 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) kobject_uevent(&policy->kobj, KOBJ_ADD); - up_read(&cpufreq_rwsem); - /* Callback for handling stuff after policy is ready */ if (cpufreq_driver->ready) cpufreq_driver->ready(policy); @@ -1416,8 +1382,6 @@ out_exit_policy: out_free_policy: cpufreq_policy_free(policy, recover_policy); out_release_rwsem: - up_read(&cpufreq_rwsem); - return ret; } @@ -2604,19 +2568,20 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver) pr_debug("unregistering driver %s\n", driver->name); + /* Protect against concurrent cpu hotplug */ + get_online_cpus(); subsys_interface_unregister(&cpufreq_interface); if (cpufreq_boost_supported()) cpufreq_sysfs_remove_file(&boost.attr); unregister_hotcpu_notifier(&cpufreq_cpu_notifier); - down_write(&cpufreq_rwsem); write_lock_irqsave(&cpufreq_driver_lock, flags); cpufreq_driver = NULL; write_unlock_irqrestore(&cpufreq_driver_lock, flags); - up_write(&cpufreq_rwsem); + put_online_cpus(); return 0; } From 15c0b4d222f83672407419f9c9e167e996d8ad2b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 27 Jul 2015 23:11:09 +0200 Subject: [PATCH 23/53] cpufreq: Rework two functions related to CPU offline Since __cpufreq_remove_dev_prepare() and __cpufreq_remove_dev_finish() are about CPU offline rather than about CPU removal, rename them to cpufreq_offline_prepare() and cpufreq_offline_finish(), respectively. Also change their argument from a struct device pointer to a CPU number, because they use the CPU number only internally anyway and make them void as their return values are ignored. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 46251e8d30f2..29e3ec979266 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1398,10 +1398,8 @@ out_release_rwsem: return ret; } -static int __cpufreq_remove_dev_prepare(struct device *dev) +static void cpufreq_offline_prepare(unsigned int cpu) { - unsigned int cpu = dev->id; - int ret = 0; struct cpufreq_policy *policy; pr_debug("%s: unregistering CPU %u\n", __func__, cpu); @@ -1409,11 +1407,11 @@ static int __cpufreq_remove_dev_prepare(struct device *dev) policy = cpufreq_cpu_get_raw(cpu); if (!policy) { pr_debug("%s: No cpu_data found\n", __func__); - return -EINVAL; + return; } if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + int ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); if (ret) pr_err("%s: Failed to stop governor\n", __func__); } @@ -1434,7 +1432,7 @@ static int __cpufreq_remove_dev_prepare(struct device *dev) /* Start governor again for active policy */ if (!policy_is_inactive(policy)) { if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + int ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); if (!ret) ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); @@ -1444,28 +1442,24 @@ static int __cpufreq_remove_dev_prepare(struct device *dev) } else if (cpufreq_driver->stop_cpu) { cpufreq_driver->stop_cpu(policy); } - - return ret; } -static int __cpufreq_remove_dev_finish(struct device *dev) +static void cpufreq_offline_finish(unsigned int cpu) { - unsigned int cpu = dev->id; - int ret; struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); if (!policy) { pr_debug("%s: No cpu_data found\n", __func__); - return -EINVAL; + return; } /* Only proceed for inactive policies */ if (!policy_is_inactive(policy)) - return 0; + return; /* If cpu is last user of policy, free policy */ if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + int ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); if (ret) pr_err("%s: Failed to exit governor\n", __func__); } @@ -1477,8 +1471,6 @@ static int __cpufreq_remove_dev_finish(struct device *dev) */ if (cpufreq_driver->exit) cpufreq_driver->exit(policy); - - return 0; } /** @@ -1495,8 +1487,8 @@ static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) return 0; if (cpu_online(cpu)) { - __cpufreq_remove_dev_prepare(dev); - __cpufreq_remove_dev_finish(dev); + cpufreq_offline_prepare(cpu); + cpufreq_offline_finish(cpu); } cpumask_clear_cpu(cpu, policy->real_cpus); @@ -2379,11 +2371,11 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, break; case CPU_DOWN_PREPARE: - __cpufreq_remove_dev_prepare(dev); + cpufreq_offline_prepare(cpu); break; case CPU_POST_DEAD: - __cpufreq_remove_dev_finish(dev); + cpufreq_offline_finish(cpu); break; case CPU_DOWN_FAILED: From 11ce707e6c2aea05e1f54680fb89a8a44ded5db4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 27 Jul 2015 23:11:21 +0200 Subject: [PATCH 24/53] cpufreq: Drop cpufreq_policy_restore() Notice that when cpufreq_policy_restore() is called, its per-CPU cpufreq_cpu_data variable has been already dereferenced and if that variable is not NULL, the policy local pointer in cpufreq_add_dev() contains its value. Therefore it is not necessary to dereference it again and the policy pointer can be used directly. Moreover, if that pointer is not NULL, the policy is inactive (or the previous check would have made us return from cpufreq_add_dev()) so the restoration code from cpufreq_policy_restore() can be moved to that point in cpufreq_add_dev(). Do that and drop cpufreq_policy_restore(). Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 44 ++++++++++----------------------------- 1 file changed, 11 insertions(+), 33 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 29e3ec979266..3199b8dafd60 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1092,28 +1092,6 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, return 0; } -static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu) -{ - struct cpufreq_policy *policy; - unsigned long flags; - - read_lock_irqsave(&cpufreq_driver_lock, flags); - policy = per_cpu(cpufreq_cpu_data, cpu); - read_unlock_irqrestore(&cpufreq_driver_lock, flags); - - if (likely(policy)) { - /* Policy should be inactive here */ - WARN_ON(!policy_is_inactive(policy)); - - down_write(&policy->rwsem); - policy->cpu = cpu; - policy->governor = NULL; - up_write(&policy->rwsem); - } - - return policy; -} - static struct cpufreq_policy *cpufreq_policy_alloc(struct device *dev) { struct cpufreq_policy *policy; @@ -1226,7 +1204,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) int ret = -ENOMEM; struct cpufreq_policy *policy; unsigned long flags; - bool recover_policy = !sif; + bool recover_policy; pr_debug("adding CPU %u\n", cpu); @@ -1244,18 +1222,18 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) /* Check if this CPU already has a policy to manage it */ policy = per_cpu(cpufreq_cpu_data, cpu); - if (policy && !policy_is_inactive(policy)) { + if (policy) { WARN_ON(!cpumask_test_cpu(cpu, policy->related_cpus)); - ret = cpufreq_add_policy_cpu(policy, cpu, dev); - return ret; - } + if (!policy_is_inactive(policy)) + return cpufreq_add_policy_cpu(policy, cpu, dev); - /* - * Restore the saved policy when doing light-weight init and fall back - * to the full init if that fails. - */ - policy = recover_policy ? cpufreq_policy_restore(cpu) : NULL; - if (!policy) { + /* This is the only online CPU for the policy. Start over. */ + recover_policy = true; + down_write(&policy->rwsem); + policy->cpu = cpu; + policy->governor = NULL; + up_write(&policy->rwsem); + } else { recover_policy = false; policy = cpufreq_policy_alloc(dev); if (!policy) From d4d854d6c7706e6a5cda297e350e3626d55e9bc9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 27 Jul 2015 23:11:30 +0200 Subject: [PATCH 25/53] cpufreq: Drop unnecessary label from cpufreq_add_dev() The leftover out_release_rwsem label in cpufreq_add_dev() is not necessary any more and confusing, so drop it. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 3199b8dafd60..5c80502339a1 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1201,7 +1201,7 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy, bool notify) static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) { unsigned int j, cpu = dev->id; - int ret = -ENOMEM; + int ret; struct cpufreq_policy *policy; unsigned long flags; bool recover_policy; @@ -1237,7 +1237,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) recover_policy = false; policy = cpufreq_policy_alloc(dev); if (!policy) - goto out_release_rwsem; + return -ENOMEM; } cpumask_copy(policy->cpus, cpumask_of(cpu)); @@ -1372,7 +1372,6 @@ out_exit_policy: cpufreq_driver->exit(policy); out_free_policy: cpufreq_policy_free(policy, recover_policy); -out_release_rwsem: return ret; } From d9612a495b0bc93f5db0e0033fe4ee7abb7167c7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 27 Jul 2015 23:11:37 +0200 Subject: [PATCH 26/53] cpufreq: Drop unused dev argument from two functions The dev argument of cpufreq_add_policy_cpu() and cpufreq_add_dev_interface() is not used by any of them, so drop it. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5c80502339a1..9d8f8cd64e58 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -999,8 +999,7 @@ static void cpufreq_remove_dev_symlink(struct cpufreq_policy *policy) } } -static int cpufreq_add_dev_interface(struct cpufreq_policy *policy, - struct device *dev) +static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) { struct freq_attr **drv_attr; int ret = 0; @@ -1057,8 +1056,7 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy) return cpufreq_set_policy(policy, &new_policy); } -static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, - unsigned int cpu, struct device *dev) +static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) { int ret = 0; @@ -1225,7 +1223,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) if (policy) { WARN_ON(!cpumask_test_cpu(cpu, policy->related_cpus)); if (!policy_is_inactive(policy)) - return cpufreq_add_policy_cpu(policy, cpu, dev); + return cpufreq_add_policy_cpu(policy, cpu); /* This is the only online CPU for the policy. Start over. */ recover_policy = true; @@ -1328,7 +1326,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) CPUFREQ_START, policy); if (!recover_policy) { - ret = cpufreq_add_dev_interface(policy, dev); + ret = cpufreq_add_dev_interface(policy); if (ret) goto out_exit_policy; blocking_notifier_call_chain(&cpufreq_policy_notifier_list, From 4d1f3a5bcb489cc6f7cbc128e0c292fed7868d32 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 27 Jul 2015 23:11:44 +0200 Subject: [PATCH 27/53] cpufreq: Do not update related_cpus on every policy activation The related_cpus mask includes CPUs whose cpufreq_cpu_data per-CPU pointers have been set the the given policy. Since those pointers are only set at the policy creation time and unset when the policy is deleted, the related_cpus should not be updated between those two operations. For this reason, avoid updating it whenever the first of the "related" CPUs goes online. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 9d8f8cd64e58..0ea4bb723760 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1251,12 +1251,12 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) down_write(&policy->rwsem); - /* related cpus should atleast have policy->cpus */ - cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus); - - /* Remember which CPUs have been present at the policy creation time. */ - if (!recover_policy) + if (!recover_policy) { + /* related_cpus should at least include policy->cpus. */ + cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus); + /* Remember CPUs present at the policy creation time. */ cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask); + } /* * affected cpus must always be the one, which are online. We aren't From a34e63b14486e98cf78c99bf8ef141de7508dbc2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 27 Jul 2015 23:11:50 +0200 Subject: [PATCH 28/53] cpufreq: Pass CPU number to cpufreq_policy_alloc() Change cpufreq_policy_alloc() to take a CPU number instead of a CPU device pointer as its argument, as it is the only function called by cpufreq_add_dev() taking a device pointer argument at this point. That will allow us to split the CPU online part from cpufreq_add_dev() more cleanly going forward. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0ea4bb723760..0618522d4863 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1090,11 +1090,15 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp return 0; } -static struct cpufreq_policy *cpufreq_policy_alloc(struct device *dev) +static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) { + struct device *dev = get_cpu_device(cpu); struct cpufreq_policy *policy; int ret; + if (WARN_ON(!dev)) + return NULL; + policy = kzalloc(sizeof(*policy), GFP_KERNEL); if (!policy) return NULL; @@ -1122,10 +1126,10 @@ static struct cpufreq_policy *cpufreq_policy_alloc(struct device *dev) init_completion(&policy->kobj_unregister); INIT_WORK(&policy->update, handle_update); - policy->cpu = dev->id; + policy->cpu = cpu; /* Set this once on allocation */ - policy->kobj_cpu = dev->id; + policy->kobj_cpu = cpu; return policy; @@ -1233,7 +1237,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) up_write(&policy->rwsem); } else { recover_policy = false; - policy = cpufreq_policy_alloc(dev); + policy = cpufreq_policy_alloc(cpu); if (!policy) return -ENOMEM; } From 053819e0bf8407746cc5febf7a4947bee50377b4 Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Thu, 16 Jul 2015 13:34:18 +0530 Subject: [PATCH 29/53] cpufreq: powernv: Handle throttling due to Pmax capping at chip level The On-Chip-Controller(OCC) can throttle cpu frequency by reducing the max allowed frequency for that chip if the chip exceeds its power or temperature limits. As Pmax capping is a chip level condition report this throttling behavior at chip level and also do not set the global 'throttled' on Pmax capping instead set the per-chip throttled variable. Report unthrottling if Pmax is restored after throttling. This patch adds a structure to store chip id and throttled state of the chip. Signed-off-by: Shilpasri G Bhat Reviewed-by: Preeti U Murthy Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 59 ++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index ebef0d8279c7..d0c18c9ce1ff 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -42,6 +43,13 @@ static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; static bool rebooting, throttled; +static struct chip { + unsigned int id; + bool throttled; +} *chips; + +static int nr_chips; + /* * Note: The set of pstates consists of contiguous integers, the * smallest of which is indicated by powernv_pstate_info.min, the @@ -301,22 +309,33 @@ static inline unsigned int get_nominal_index(void) static void powernv_cpufreq_throttle_check(unsigned int cpu) { unsigned long pmsr; - int pmsr_pmax, pmsr_lp; + int pmsr_pmax, pmsr_lp, i; pmsr = get_pmspr(SPRN_PMSR); + for (i = 0; i < nr_chips; i++) + if (chips[i].id == cpu_to_chip_id(cpu)) + break; + /* Check for Pmax Capping */ pmsr_pmax = (s8)PMSR_MAX(pmsr); if (pmsr_pmax != powernv_pstate_info.max) { - throttled = true; - pr_info("CPU %d Pmax is reduced to %d\n", cpu, pmsr_pmax); - pr_info("Max allowed Pstate is capped\n"); + if (chips[i].throttled) + goto next; + chips[i].throttled = true; + pr_info("CPU %d on Chip %u has Pmax reduced to %d\n", cpu, + chips[i].id, pmsr_pmax); + } else if (chips[i].throttled) { + chips[i].throttled = false; + pr_info("CPU %d on Chip %u has Pmax restored to %d\n", cpu, + chips[i].id, pmsr_pmax); } /* * Check for Psafe by reading LocalPstate * or check if Psafe_mode_active is set in PMSR. */ +next: pmsr_lp = (s8)PMSR_LP(pmsr); if ((pmsr_lp < powernv_pstate_info.min) || (pmsr & PMSR_PSAFE_ENABLE)) { @@ -414,6 +433,33 @@ static struct cpufreq_driver powernv_cpufreq_driver = { .attr = powernv_cpu_freq_attr, }; +static int init_chip_info(void) +{ + unsigned int chip[256]; + unsigned int cpu, i; + unsigned int prev_chip_id = UINT_MAX; + + for_each_possible_cpu(cpu) { + unsigned int id = cpu_to_chip_id(cpu); + + if (prev_chip_id != id) { + prev_chip_id = id; + chip[nr_chips++] = id; + } + } + + chips = kmalloc_array(nr_chips, sizeof(struct chip), GFP_KERNEL); + if (!chips) + return -ENOMEM; + + for (i = 0; i < nr_chips; i++) { + chips[i].id = chip[i]; + chips[i].throttled = false; + } + + return 0; +} + static int __init powernv_cpufreq_init(void) { int rc = 0; @@ -429,6 +475,11 @@ static int __init powernv_cpufreq_init(void) return rc; } + /* Populate chip info */ + rc = init_chip_info(); + if (rc) + return rc; + register_reboot_notifier(&powernv_cpufreq_reboot_nb); return cpufreq_register_driver(&powernv_cpufreq_driver); } From 196ba2d514a13f6af1b3d78de71ce74ed2fc8bdc Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Thu, 16 Jul 2015 13:34:19 +0530 Subject: [PATCH 30/53] powerpc/powernv: Add definition of OPAL_MSG_OCC message type Add OPAL_MSG_OCC message definition to opal_message_type to receive OCC events like reset, load and throttled. Host performance can be affected when OCC is reset or OCC throttles the max Pstate. We can register to opal_message_notifier to receive OPAL_MSG_OCC type of message and report it to the userspace so as to keep the user informed about the reason for a performance drop in workloads. The reset and load OCC events are notified to kernel when FSP sends OCC_RESET and OCC_LOAD commands. Both reset and load messages are sent to kernel on successful completion of reset and load operation respectively. The throttle OCC event indicates that the Pmax of the chip is reduced. The chip_id and throttle reason for reducing Pmax is also queued along with the message. Signed-off-by: Shilpasri G Bhat Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- arch/powerpc/include/asm/opal-api.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index e9e4c52f3685..64dc9f547fb6 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -361,6 +361,7 @@ enum opal_msg_type { OPAL_MSG_HMI_EVT, OPAL_MSG_DPO, OPAL_MSG_PRD, + OPAL_MSG_OCC, OPAL_MSG_TYPE_MAX, }; @@ -700,6 +701,17 @@ struct opal_prd_msg_header { struct opal_prd_msg; +#define OCC_RESET 0 +#define OCC_LOAD 1 +#define OCC_THROTTLE 2 +#define OCC_MAX_THROTTLE_STATUS 5 + +struct opal_occ_msg { + __be64 type; + __be64 chip; + __be64 throttle_status; +}; + /* * SG entries * From cb166fa937a2fbc14badcafca86202354c34a213 Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Thu, 16 Jul 2015 13:34:20 +0530 Subject: [PATCH 31/53] cpufreq: powernv: Register for OCC related opal_message notification OCC is an On-Chip-Controller which takes care of power and thermal safety of the chip. During runtime due to power failure or overtemperature the OCC may throttle the frequencies of the CPUs to remain within the power budget. We want the cpufreq driver to be aware of such situations to be able to report the reason to the user. We register to opal_message_notifier to receive OCC messages from opal. powernv_cpufreq_throttle_check() reports any frequency throttling and this patch will report the reason or event that caused throttling. We can be throttled if OCC is reset or OCC limits Pmax due to power or thermal reasons. We are also notified of unthrottling after an OCC reset or if OCC restores Pmax on the chip. Signed-off-by: Shilpasri G Bhat Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 74 ++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index d0c18c9ce1ff..a634199052b6 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -33,6 +33,7 @@ #include #include #include /* Required for cpu_sibling_mask() in UP configs */ +#include #define POWERNV_MAX_PSTATES 256 #define PMSR_PSAFE_ENABLE (1UL << 30) @@ -41,7 +42,7 @@ #define PMSR_LP(x) ((x >> 48) & 0xFF) static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; -static bool rebooting, throttled; +static bool rebooting, throttled, occ_reset; static struct chip { unsigned int id; @@ -414,6 +415,74 @@ static struct notifier_block powernv_cpufreq_reboot_nb = { .notifier_call = powernv_cpufreq_reboot_notifier, }; +static char throttle_reason[][30] = { + "No throttling", + "Power Cap", + "Processor Over Temperature", + "Power Supply Failure", + "Over Current", + "OCC Reset" + }; + +static int powernv_cpufreq_occ_msg(struct notifier_block *nb, + unsigned long msg_type, void *_msg) +{ + struct opal_msg *msg = _msg; + struct opal_occ_msg omsg; + + if (msg_type != OPAL_MSG_OCC) + return 0; + + omsg.type = be64_to_cpu(msg->params[0]); + + switch (omsg.type) { + case OCC_RESET: + occ_reset = true; + /* + * powernv_cpufreq_throttle_check() is called in + * target() callback which can detect the throttle state + * for governors like ondemand. + * But static governors will not call target() often thus + * report throttling here. + */ + if (!throttled) { + throttled = true; + pr_crit("CPU Frequency is throttled\n"); + } + pr_info("OCC: Reset\n"); + break; + case OCC_LOAD: + pr_info("OCC: Loaded\n"); + break; + case OCC_THROTTLE: + omsg.chip = be64_to_cpu(msg->params[1]); + omsg.throttle_status = be64_to_cpu(msg->params[2]); + + if (occ_reset) { + occ_reset = false; + throttled = false; + pr_info("OCC: Active\n"); + return 0; + } + + if (omsg.throttle_status && + omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS) + pr_info("OCC: Chip %u Pmax reduced due to %s\n", + (unsigned int)omsg.chip, + throttle_reason[omsg.throttle_status]); + else if (!omsg.throttle_status) + pr_info("OCC: Chip %u %s\n", (unsigned int)omsg.chip, + throttle_reason[omsg.throttle_status]); + } + return 0; +} + +static struct notifier_block powernv_cpufreq_opal_nb = { + .notifier_call = powernv_cpufreq_occ_msg, + .next = NULL, + .priority = 0, +}; + static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy) { struct powernv_smp_call_data freq_data; @@ -481,6 +550,7 @@ static int __init powernv_cpufreq_init(void) return rc; register_reboot_notifier(&powernv_cpufreq_reboot_nb); + opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb); return cpufreq_register_driver(&powernv_cpufreq_driver); } module_init(powernv_cpufreq_init); @@ -488,6 +558,8 @@ module_init(powernv_cpufreq_init); static void __exit powernv_cpufreq_exit(void) { unregister_reboot_notifier(&powernv_cpufreq_reboot_nb); + opal_message_notifier_unregister(OPAL_MSG_OCC, + &powernv_cpufreq_opal_nb); cpufreq_unregister_driver(&powernv_cpufreq_driver); } module_exit(powernv_cpufreq_exit); From 735366fc407755626058218fc8d0430735a669ac Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Thu, 16 Jul 2015 13:34:21 +0530 Subject: [PATCH 32/53] cpufreq: powernv: Call throttle_check() on receiving OCC_THROTTLE Re-evaluate the chip's throttled state on recieving OCC_THROTTLE notification by executing *throttle_check() on any one of the cpu on the chip. This is a sanity check to verify if we were indeed throttled/unthrottled after receiving OCC_THROTTLE notification. We cannot call *throttle_check() directly from the notification handler because we could be handling chip1's notification in chip2. So initiate an smp_call to execute *throttle_check(). We are irq-disabled in the notification handler, so use a worker thread to smp_call throttle_check() on any of the cpu in the chipmask. Signed-off-by: Shilpasri G Bhat Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index a634199052b6..22f33ff2d77a 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -47,6 +47,8 @@ static bool rebooting, throttled, occ_reset; static struct chip { unsigned int id; bool throttled; + cpumask_t mask; + struct work_struct throttle; } *chips; static int nr_chips; @@ -307,8 +309,9 @@ static inline unsigned int get_nominal_index(void) return powernv_pstate_info.max - powernv_pstate_info.nominal; } -static void powernv_cpufreq_throttle_check(unsigned int cpu) +static void powernv_cpufreq_throttle_check(void *data) { + unsigned int cpu = smp_processor_id(); unsigned long pmsr; int pmsr_pmax, pmsr_lp, i; @@ -370,7 +373,7 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy, return 0; if (!throttled) - powernv_cpufreq_throttle_check(smp_processor_id()); + powernv_cpufreq_throttle_check(NULL); freq_data.pstate_id = powernv_freqs[new_index].driver_data; @@ -415,6 +418,14 @@ static struct notifier_block powernv_cpufreq_reboot_nb = { .notifier_call = powernv_cpufreq_reboot_notifier, }; +void powernv_cpufreq_work_fn(struct work_struct *work) +{ + struct chip *chip = container_of(work, struct chip, throttle); + + smp_call_function_any(&chip->mask, + powernv_cpufreq_throttle_check, NULL, 0); +} + static char throttle_reason[][30] = { "No throttling", "Power Cap", @@ -429,6 +440,7 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb, { struct opal_msg *msg = _msg; struct opal_occ_msg omsg; + int i; if (msg_type != OPAL_MSG_OCC) return 0; @@ -462,6 +474,10 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb, occ_reset = false; throttled = false; pr_info("OCC: Active\n"); + + for (i = 0; i < nr_chips; i++) + schedule_work(&chips[i].throttle); + return 0; } @@ -473,6 +489,12 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb, else if (!omsg.throttle_status) pr_info("OCC: Chip %u %s\n", (unsigned int)omsg.chip, throttle_reason[omsg.throttle_status]); + else + return 0; + + for (i = 0; i < nr_chips; i++) + if (chips[i].id == omsg.chip) + schedule_work(&chips[i].throttle); } return 0; } @@ -524,6 +546,8 @@ static int init_chip_info(void) for (i = 0; i < nr_chips; i++) { chips[i].id = chip[i]; chips[i].throttled = false; + cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i])); + INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn); } return 0; From 3dd3ebe5bb3837aeac28a23f8f22b97cb84abab6 Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Thu, 16 Jul 2015 13:34:22 +0530 Subject: [PATCH 33/53] cpufreq: powernv: Report Psafe only if PMSR.psafe_mode_active bit is set On a reset cycle of OCC, although the system retires from safe frequency state the local pstate is not restored to Pmin or last requested pstate. Now if the cpufreq governor initiates a pstate change, the local pstate will be in Psafe and we will be reporting a false positive when we are not throttled. So in powernv_cpufreq_throttle_check() remove the condition which checks if local pstate is less than Pmin while checking for Psafe frequency. If the cpus are forced to Psafe then PMSR.psafe_mode_active bit will be set. So, when OCCs become active this bit will be cleared. Let us just rely on this bit for reporting throttling. Signed-off-by: Shilpasri G Bhat Reviewed-by: Preeti U Murthy Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 22f33ff2d77a..90b42938837e 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -39,7 +39,6 @@ #define PMSR_PSAFE_ENABLE (1UL << 30) #define PMSR_SPR_EM_DISABLE (1UL << 31) #define PMSR_MAX(x) ((x >> 32) & 0xFF) -#define PMSR_LP(x) ((x >> 48) & 0xFF) static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; static bool rebooting, throttled, occ_reset; @@ -313,7 +312,7 @@ static void powernv_cpufreq_throttle_check(void *data) { unsigned int cpu = smp_processor_id(); unsigned long pmsr; - int pmsr_pmax, pmsr_lp, i; + int pmsr_pmax, i; pmsr = get_pmspr(SPRN_PMSR); @@ -335,14 +334,9 @@ static void powernv_cpufreq_throttle_check(void *data) chips[i].id, pmsr_pmax); } - /* - * Check for Psafe by reading LocalPstate - * or check if Psafe_mode_active is set in PMSR. - */ + /* Check if Psafe_mode_active is set in PMSR. */ next: - pmsr_lp = (s8)PMSR_LP(pmsr); - if ((pmsr_lp < powernv_pstate_info.min) || - (pmsr & PMSR_PSAFE_ENABLE)) { + if (pmsr & PMSR_PSAFE_ENABLE) { throttled = true; pr_info("Pstate set to safe frequency\n"); } From 227942809b52f23cda414858b635c0285f11de00 Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Thu, 16 Jul 2015 13:34:23 +0530 Subject: [PATCH 34/53] cpufreq: powernv: Restore cpu frequency to policy->cur on unthrottling If frequency is throttled due to OCC reset then cpus will be in Psafe frequency, so restore the frequency on all cpus to policy->cur when OCCs are active again. And if frequency is throttled due to Pmax capping then restore the frequency of all the cpus in the chip on unthrottling. Signed-off-by: Shilpasri G Bhat Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 90b42938837e..546e056e416d 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -48,6 +48,7 @@ static struct chip { bool throttled; cpumask_t mask; struct work_struct throttle; + bool restore; } *chips; static int nr_chips; @@ -415,9 +416,29 @@ static struct notifier_block powernv_cpufreq_reboot_nb = { void powernv_cpufreq_work_fn(struct work_struct *work) { struct chip *chip = container_of(work, struct chip, throttle); + unsigned int cpu; + cpumask_var_t mask; smp_call_function_any(&chip->mask, powernv_cpufreq_throttle_check, NULL, 0); + + if (!chip->restore) + return; + + chip->restore = false; + cpumask_copy(mask, &chip->mask); + for_each_cpu_and(cpu, mask, cpu_online_mask) { + int index, tcpu; + struct cpufreq_policy policy; + + cpufreq_get_policy(&policy, cpu); + cpufreq_frequency_table_target(&policy, policy.freq_table, + policy.cur, + CPUFREQ_RELATION_C, &index); + powernv_cpufreq_target_index(&policy, index); + for_each_cpu(tcpu, policy.cpus) + cpumask_clear_cpu(tcpu, mask); + } } static char throttle_reason[][30] = { @@ -469,8 +490,10 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb, throttled = false; pr_info("OCC: Active\n"); - for (i = 0; i < nr_chips; i++) + for (i = 0; i < nr_chips; i++) { + chips[i].restore = true; schedule_work(&chips[i].throttle); + } return 0; } @@ -487,8 +510,11 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb, return 0; for (i = 0; i < nr_chips; i++) - if (chips[i].id == omsg.chip) + if (chips[i].id == omsg.chip) { + if (!omsg.throttle_status) + chips[i].restore = true; schedule_work(&chips[i].throttle); + } } return 0; } @@ -542,6 +568,7 @@ static int init_chip_info(void) chips[i].throttled = false; cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i])); INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn); + chips[i].restore = false; } return 0; From 0b275352872b2641ed5c94d0f0f8c7e907bf3e3f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 29 Jul 2015 03:03:44 +0200 Subject: [PATCH 35/53] cpufreq: Separate CPU device registration from CPU online To separate the CPU online interface from the CPU device registration, split cpufreq_online() out of cpufreq_add_dev() and make cpufreq_cpu_callback() call the former, while cpufreq_add_dev() itself will only be used as the CPU device addition subsystem interface callback. Signed-off-by: Rafael J. Wysocki Suggested-by: Russell King Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 90 ++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 43 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0618522d4863..0d46b557c016 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1191,36 +1191,15 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy, bool notify) kfree(policy); } -/** - * cpufreq_add_dev - add a CPU device - * - * Adds the cpufreq interface for a CPU device. - * - * The Oracle says: try running cpufreq registration/unregistration concurrently - * with with cpu hotplugging and all hell will break loose. Tried to clean this - * mess up, but more thorough testing is needed. - Mathieu - */ -static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) +static int cpufreq_online(unsigned int cpu) { - unsigned int j, cpu = dev->id; - int ret; struct cpufreq_policy *policy; - unsigned long flags; bool recover_policy; + unsigned long flags; + unsigned int j; + int ret; - pr_debug("adding CPU %u\n", cpu); - - if (cpu_is_offline(cpu)) { - /* - * Only possible if we are here from the subsys_interface add - * callback. A hotplug notifier will follow and we will handle - * it as CPU online then. For now, just create the sysfs link, - * unless there is no policy or the link is already present. - */ - policy = per_cpu(cpufreq_cpu_data, cpu); - return policy && !cpumask_test_and_set_cpu(cpu, policy->real_cpus) - ? add_cpu_dev_symlink(policy, cpu) : 0; - } + pr_debug("%s: bringing CPU%u online\n", __func__, cpu); /* Check if this CPU already has a policy to manage it */ policy = per_cpu(cpufreq_cpu_data, cpu); @@ -1377,6 +1356,35 @@ out_free_policy: return ret; } +/** + * cpufreq_add_dev - the cpufreq interface for a CPU device. + * @dev: CPU device. + * @sif: Subsystem interface structure pointer (not used) + */ +static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) +{ + unsigned cpu = dev->id; + int ret; + + dev_dbg(dev, "%s: adding CPU%u\n", __func__, cpu); + + if (cpu_online(cpu)) { + ret = cpufreq_online(cpu); + } else { + /* + * A hotplug notifier will follow and we will handle it as CPU + * online then. For now, just create the sysfs link, unless + * there is no policy or the link is already present. + */ + struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); + + ret = policy && !cpumask_test_and_set_cpu(cpu, policy->real_cpus) + ? add_cpu_dev_symlink(policy, cpu) : 0; + } + + return ret; +} + static void cpufreq_offline_prepare(unsigned int cpu) { struct cpufreq_policy *policy; @@ -2340,27 +2348,23 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct device *dev; - dev = get_cpu_device(cpu); - if (dev) { - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_ONLINE: - cpufreq_add_dev(dev, NULL); - break; + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + cpufreq_online(cpu); + break; - case CPU_DOWN_PREPARE: - cpufreq_offline_prepare(cpu); - break; + case CPU_DOWN_PREPARE: + cpufreq_offline_prepare(cpu); + break; - case CPU_POST_DEAD: - cpufreq_offline_finish(cpu); - break; + case CPU_POST_DEAD: + cpufreq_offline_finish(cpu); + break; - case CPU_DOWN_FAILED: - cpufreq_add_dev(dev, NULL); - break; - } + case CPU_DOWN_FAILED: + cpufreq_online(cpu); + break; } return NOTIFY_OK; } From 194d99c7e3ce54a8c7d8adf79fb1d8ad49a9bf9f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 29 Jul 2015 03:08:57 +0200 Subject: [PATCH 36/53] cpufreq: Replace recover_policy with new_policy in cpufreq_online() The recover_policy is unsed in cpufreq_online() to indicate whether a new policy object is created or an existing one is reinitialized. The "recover" part of the name is slightly confusing (it should be "reinitialization" rather than "recovery") and the logical not (!) operator is applied to it in almost all of the checks it is used in, so replace that variable with a new one called "new_policy" that will be true in the case of a new policy creation. While at it, drop one of the labels that is jumped to from only one spot. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0d46b557c016..24e4ba568e77 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1194,7 +1194,7 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy, bool notify) static int cpufreq_online(unsigned int cpu) { struct cpufreq_policy *policy; - bool recover_policy; + bool new_policy; unsigned long flags; unsigned int j; int ret; @@ -1209,13 +1209,13 @@ static int cpufreq_online(unsigned int cpu) return cpufreq_add_policy_cpu(policy, cpu); /* This is the only online CPU for the policy. Start over. */ - recover_policy = true; + new_policy = false; down_write(&policy->rwsem); policy->cpu = cpu; policy->governor = NULL; up_write(&policy->rwsem); } else { - recover_policy = false; + new_policy = true; policy = cpufreq_policy_alloc(cpu); if (!policy) return -ENOMEM; @@ -1234,7 +1234,7 @@ static int cpufreq_online(unsigned int cpu) down_write(&policy->rwsem); - if (!recover_policy) { + if (new_policy) { /* related_cpus should at least include policy->cpus. */ cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus); /* Remember CPUs present at the policy creation time. */ @@ -1247,7 +1247,7 @@ static int cpufreq_online(unsigned int cpu) */ cpumask_and(policy->cpus, policy->cpus, cpu_online_mask); - if (!recover_policy) { + if (new_policy) { policy->user_policy.min = policy->min; policy->user_policy.max = policy->max; @@ -1308,7 +1308,7 @@ static int cpufreq_online(unsigned int cpu) blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_START, policy); - if (!recover_policy) { + if (new_policy) { ret = cpufreq_add_dev_interface(policy); if (ret) goto out_exit_policy; @@ -1324,10 +1324,12 @@ static int cpufreq_online(unsigned int cpu) if (ret) { pr_err("%s: Failed to initialize policy for cpu: %d (%d)\n", __func__, cpu, ret); - goto out_remove_policy_notify; + /* cpufreq_policy_free() will notify based on this */ + new_policy = false; + goto out_exit_policy; } - if (!recover_policy) { + if (new_policy) { policy->user_policy.policy = policy->policy; policy->user_policy.governor = policy->governor; } @@ -1343,16 +1345,13 @@ static int cpufreq_online(unsigned int cpu) return 0; -out_remove_policy_notify: - /* cpufreq_policy_free() will notify based on this */ - recover_policy = true; out_exit_policy: up_write(&policy->rwsem); if (cpufreq_driver->exit) cpufreq_driver->exit(policy); out_free_policy: - cpufreq_policy_free(policy, recover_policy); + cpufreq_policy_free(policy, !new_policy); return ret; } From fdd320da84c63092fe6e155cb7b8d80f7f765fa9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 30 Jul 2015 01:45:07 +0200 Subject: [PATCH 37/53] cpufreq: Lock CPU online/offline in cpufreq_register_driver() To protect against races with concurrent CPU online/offline, call get_online_cpus() before registering a cpufreq driver. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 24e4ba568e77..0f4e96ff99e8 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2471,10 +2471,14 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) pr_debug("trying to register driver %s\n", driver_data->name); + /* Protect against concurrent CPU online/offline. */ + get_online_cpus(); + write_lock_irqsave(&cpufreq_driver_lock, flags); if (cpufreq_driver) { write_unlock_irqrestore(&cpufreq_driver_lock, flags); - return -EEXIST; + ret = -EEXIST; + goto out; } cpufreq_driver = driver_data; write_unlock_irqrestore(&cpufreq_driver_lock, flags); @@ -2513,7 +2517,10 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) register_hotcpu_notifier(&cpufreq_cpu_notifier); pr_debug("driver %s up and running\n", driver_data->name); - return 0; +out: + put_online_cpus(); + return ret; + err_if_unreg: subsys_interface_unregister(&cpufreq_interface); err_boost_unreg: @@ -2523,7 +2530,7 @@ err_null_driver: write_lock_irqsave(&cpufreq_driver_lock, flags); cpufreq_driver = NULL; write_unlock_irqrestore(&cpufreq_driver_lock, flags); - return ret; + goto out; } EXPORT_SYMBOL_GPL(cpufreq_register_driver); From fba9573b33f8bddd772195c202f6b8ec0751cedd Mon Sep 17 00:00:00 2001 From: Pan Xinhui Date: Thu, 30 Jul 2015 18:10:40 +0800 Subject: [PATCH 38/53] cpufreq: Correct a freq check in cpufreq_set_policy() This check was originally added by commit 9c9a43ed2734 ("[CPUFREQ] return error when failing to set minfreq").It attempt to return an error on obviously incorrect limits when we echo xxx >.../scaling_max,min_freq Actually we just need check if new_policy->min > new_policy->max. Because at least one of max/min is copied from cpufreq_get_policy(). For example, when we echo xxx > .../scaling_min_freq, new_policy is copied from policy in cpufreq_get_policy. new_policy->max is same with policy->max. new_policy->min is set to a new value. Let me explain it in deduction method, first statement in if (): new_policy->min > policy->max policy->max == new_policy->max ==> new_policy->min > new_policy->max second statement in if(): new_policy->max < policy->min policy->max < policy->min ==>new_policy->min > new_policy->max (induction method) So we have proved that we only need check if new_policy->min > new_policy->max. After apply this patch, we can also modify ->min and ->max at same time if new freq range is very much different from current freq range. For example, if current freq range is 480000-960000, then we want to set this range to 1120000-2240000, we would fail in the past because new_policy->min > policy->max. As long as the cpufreq range is valid, we has no reason to reject the user. So correct the check to avoid such case. Signed-off-by: Pan Xinhui Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0f4e96ff99e8..76a26609d96b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2190,7 +2190,11 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, memcpy(&new_policy->cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo)); - if (new_policy->min > policy->max || new_policy->max < policy->min) + /* + * This check works well when we store new min/max freq attributes, + * because new_policy is a copy of policy with one field updated. + */ + if (new_policy->min > new_policy->max) return -EINVAL; /* verify the cpu speed can be set within this limit */ From 144c8e172b5c388ddf41fa64e154f53384ec3448 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 29 Jul 2015 23:53:10 +0800 Subject: [PATCH 39/53] intel_pstate: Fix possible overflow complained by Coverity Coverity scanning performed on intel_pstate.c shows possible overflow when doing left shifting: val = pstate << 8; since pstate is of type integer, while val is of u64, left shifting pstate might lead to potential loss of upper bits. Say, if pstate equals 0x4000 0000, after pstate << 8 we will get zero assigned to val. Although pstate will not likely be that big, this patch cast the left operand to u64 before performing the left shift, to avoid complaining from Coverity. Reported-by: Coquard, Christophe Signed-off-by: Chen Yu Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 7b2721fb861f..b9354b6f7149 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -521,7 +521,7 @@ static void byt_set_pstate(struct cpudata *cpudata, int pstate) int32_t vid_fp; u32 vid; - val = pstate << 8; + val = (u64)pstate << 8; if (limits.no_turbo && !limits.turbo_disabled) val |= (u64)1 << 32; @@ -610,7 +610,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate) { u64 val; - val = pstate << 8; + val = (u64)pstate << 8; if (limits.no_turbo && !limits.turbo_disabled) val |= (u64)1 << 32; From 1c9391238753ffb370f02743b5f43bac6dea304b Mon Sep 17 00:00:00 2001 From: Kristen Carlson Accardi Date: Wed, 5 Aug 2015 12:47:14 -0700 Subject: [PATCH 40/53] intel_pstate: Add SKY-S support Whitelist the SKL-S processor Signed-off-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b9354b6f7149..f05f0633b2a9 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -908,6 +908,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { ICPU(0x4c, byt_params), ICPU(0x4e, core_params), ICPU(0x4f, core_params), + ICPU(0x5e, core_params), ICPU(0x56, core_params), ICPU(0x57, knl_params), {} From 5aecc3c8a24a7c2db29ec4a927bbb08befcb508e Mon Sep 17 00:00:00 2001 From: Ethan Zhao Date: Wed, 5 Aug 2015 09:28:50 +0900 Subject: [PATCH 41/53] intel_pstate: append more Oracle OEM table id to vendor bypass list Append more Oracle X86 servers that have their own power management, SUN FIRE X4275 M3 SUN FIRE X4170 M3 and SUN FIRE X6-2 Signed-off-by: Ethan Zhao Acked-by: Viresh Kumar Acked-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f05f0633b2a9..31d0548638e8 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1174,6 +1174,10 @@ static struct hw_vendor_info vendor_info[] = { {1, "ORACLE", "X4270M3 ", PPC}, {1, "ORACLE", "X4270M2 ", PPC}, {1, "ORACLE", "X4170M2 ", PPC}, + {1, "ORACLE", "X4170 M3", PPC}, + {1, "ORACLE", "X4275 M3", PPC}, + {1, "ORACLE", "X6-2 ", PPC}, + {1, "ORACLE", "Sudbury ", PPC}, {0, "", ""}, }; From c9c96ae2c57d91ea2b73ef447fdd44c760a96d97 Mon Sep 17 00:00:00 2001 From: Pi-Cheng Chen Date: Mon, 17 Aug 2015 17:24:23 +0800 Subject: [PATCH 42/53] dt-bindings: mediatek: Add MT8173 CPU DVFS clock bindings This patch adds the clock and regulator consumer properties part of document for CPU DVFS clocks on Mediatek MT8173 SoC. Signed-off-by: Pi-Cheng Chen Acked-by: Michael Turquette Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- .../bindings/clock/mt8173-cpu-dvfs.txt | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/mt8173-cpu-dvfs.txt diff --git a/Documentation/devicetree/bindings/clock/mt8173-cpu-dvfs.txt b/Documentation/devicetree/bindings/clock/mt8173-cpu-dvfs.txt new file mode 100644 index 000000000000..52b457c23eed --- /dev/null +++ b/Documentation/devicetree/bindings/clock/mt8173-cpu-dvfs.txt @@ -0,0 +1,83 @@ +Device Tree Clock bindins for CPU DVFS of Mediatek MT8173 SoC + +Required properties: +- clocks: A list of phandle + clock-specifier pairs for the clocks listed in clock names. +- clock-names: Should contain the following: + "cpu" - The multiplexer for clock input of CPU cluster. + "intermediate" - A parent of "cpu" clock which is used as "intermediate" clock + source (usually MAINPLL) when the original CPU PLL is under + transition and not stable yet. + Please refer to Documentation/devicetree/bindings/clk/clock-bindings.txt for + generic clock consumer properties. +- proc-supply: Regulator for Vproc of CPU cluster. + +Optional properties: +- sram-supply: Regulator for Vsram of CPU cluster. When present, the cpufreq driver + needs to do "voltage tracking" to step by step scale up/down Vproc and + Vsram to fit SoC specific needs. When absent, the voltage scaling + flow is handled by hardware, hence no software "voltage tracking" is + needed. + +Example: +-------- + cpu0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a53"; + reg = <0x000>; + enable-method = "psci"; + cpu-idle-states = <&CPU_SLEEP_0>; + clocks = <&infracfg CLK_INFRA_CA53SEL>, + <&apmixedsys CLK_APMIXED_MAINPLL>; + clock-names = "cpu", "intermediate"; + }; + + cpu1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a53"; + reg = <0x001>; + enable-method = "psci"; + cpu-idle-states = <&CPU_SLEEP_0>; + clocks = <&infracfg CLK_INFRA_CA53SEL>, + <&apmixedsys CLK_APMIXED_MAINPLL>; + clock-names = "cpu", "intermediate"; + }; + + cpu2: cpu@100 { + device_type = "cpu"; + compatible = "arm,cortex-a57"; + reg = <0x100>; + enable-method = "psci"; + cpu-idle-states = <&CPU_SLEEP_0>; + clocks = <&infracfg CLK_INFRA_CA57SEL>, + <&apmixedsys CLK_APMIXED_MAINPLL>; + clock-names = "cpu", "intermediate"; + }; + + cpu3: cpu@101 { + device_type = "cpu"; + compatible = "arm,cortex-a57"; + reg = <0x101>; + enable-method = "psci"; + cpu-idle-states = <&CPU_SLEEP_0>; + clocks = <&infracfg CLK_INFRA_CA57SEL>, + <&apmixedsys CLK_APMIXED_MAINPLL>; + clock-names = "cpu", "intermediate"; + }; + + &cpu0 { + proc-supply = <&mt6397_vpca15_reg>; + }; + + &cpu1 { + proc-supply = <&mt6397_vpca15_reg>; + }; + + &cpu2 { + proc-supply = <&da9211_vcpu_reg>; + sram-supply = <&mt6397_vsramca7_reg>; + }; + + &cpu3 { + proc-supply = <&da9211_vcpu_reg>; + sram-supply = <&mt6397_vsramca7_reg>; + }; From 1453863fb02a18900c9079fa2e4f02710bf46507 Mon Sep 17 00:00:00 2001 From: Pi-Cheng Chen Date: Wed, 19 Aug 2015 10:05:06 +0800 Subject: [PATCH 43/53] cpufreq: mediatek: Add MT8173 cpufreq driver Mediatek MT8173 is an ARMv8 based quad-core (2*Cortex-A53 and 2*Cortex-A72) SoC with duall clusters. For each cluster, two voltage inputs, Vproc and Vsram are supplied by two regulators. For the big cluster, two regulators come from different PMICs. In this case, when scaling voltage inputs of the cluster, the voltages of two regulator inputs need to be controlled by software explicitly under the SoC specific limitation: 100mV < Vsram - Vproc < 200mV which is called 'voltage tracking' mechanism. And when scaling the frequency of cluster clock input, the input MUX need to be parented to another "intermediate" stable PLL first and reparented to the original PLL once the original PLL is stable at the target frequency. This patch implements those mechanisms to enable CPU DVFS support for Mediatek MT8173 SoC. Signed-off-by: Pi-Cheng Chen Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 7 + drivers/cpufreq/Makefile | 1 + drivers/cpufreq/mt8173-cpufreq.c | 527 +++++++++++++++++++++++++++++++ 3 files changed, 535 insertions(+) create mode 100644 drivers/cpufreq/mt8173-cpufreq.c diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index cc8a71c267b8..2bacf24a19a9 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -130,6 +130,13 @@ config ARM_KIRKWOOD_CPUFREQ This adds the CPUFreq driver for Marvell Kirkwood SoCs. +config ARM_MT8173_CPUFREQ + bool "Mediatek MT8173 CPUFreq support" + depends on ARCH_MEDIATEK && REGULATOR + select PM_OPP + help + This adds the CPUFreq driver support for Mediatek MT8173 SoC. + config ARM_OMAP2PLUS_CPUFREQ bool "TI OMAP2+" depends on ARCH_OMAP2PLUS diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 2169bf792db7..9c75fafd2901 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_ARM_HISI_ACPU_CPUFREQ) += hisi-acpu-cpufreq.o obj-$(CONFIG_ARM_IMX6Q_CPUFREQ) += imx6q-cpufreq.o obj-$(CONFIG_ARM_INTEGRATOR) += integrator-cpufreq.o obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ) += kirkwood-cpufreq.o +obj-$(CONFIG_ARM_MT8173_CPUFREQ) += mt8173-cpufreq.o obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ) += omap-cpufreq.o obj-$(CONFIG_ARM_PXA2xx_CPUFREQ) += pxa2xx-cpufreq.o obj-$(CONFIG_PXA3xx) += pxa3xx-cpufreq.o diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c new file mode 100644 index 000000000000..49caed293a3b --- /dev/null +++ b/drivers/cpufreq/mt8173-cpufreq.c @@ -0,0 +1,527 @@ +/* + * Copyright (c) 2015 Linaro Ltd. + * Author: Pi-Cheng Chen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MIN_VOLT_SHIFT (100000) +#define MAX_VOLT_SHIFT (200000) +#define MAX_VOLT_LIMIT (1150000) +#define VOLT_TOL (10000) + +/* + * The struct mtk_cpu_dvfs_info holds necessary information for doing CPU DVFS + * on each CPU power/clock domain of Mediatek SoCs. Each CPU cluster in + * Mediatek SoCs has two voltage inputs, Vproc and Vsram. In some cases the two + * voltage inputs need to be controlled under a hardware limitation: + * 100mV < Vsram - Vproc < 200mV + * + * When scaling the clock frequency of a CPU clock domain, the clock source + * needs to be switched to another stable PLL clock temporarily until + * the original PLL becomes stable at target frequency. + */ +struct mtk_cpu_dvfs_info { + struct device *cpu_dev; + struct regulator *proc_reg; + struct regulator *sram_reg; + struct clk *cpu_clk; + struct clk *inter_clk; + struct thermal_cooling_device *cdev; + int intermediate_voltage; + bool need_voltage_tracking; +}; + +static int mtk_cpufreq_voltage_tracking(struct mtk_cpu_dvfs_info *info, + int new_vproc) +{ + struct regulator *proc_reg = info->proc_reg; + struct regulator *sram_reg = info->sram_reg; + int old_vproc, old_vsram, new_vsram, vsram, vproc, ret; + + old_vproc = regulator_get_voltage(proc_reg); + old_vsram = regulator_get_voltage(sram_reg); + /* Vsram should not exceed the maximum allowed voltage of SoC. */ + new_vsram = min(new_vproc + MIN_VOLT_SHIFT, MAX_VOLT_LIMIT); + + if (old_vproc < new_vproc) { + /* + * When scaling up voltages, Vsram and Vproc scale up step + * by step. At each step, set Vsram to (Vproc + 200mV) first, + * then set Vproc to (Vsram - 100mV). + * Keep doing it until Vsram and Vproc hit target voltages. + */ + do { + old_vsram = regulator_get_voltage(sram_reg); + old_vproc = regulator_get_voltage(proc_reg); + + vsram = min(new_vsram, old_vproc + MAX_VOLT_SHIFT); + + if (vsram + VOLT_TOL >= MAX_VOLT_LIMIT) { + vsram = MAX_VOLT_LIMIT; + + /* + * If the target Vsram hits the maximum voltage, + * try to set the exact voltage value first. + */ + ret = regulator_set_voltage(sram_reg, vsram, + vsram); + if (ret) + ret = regulator_set_voltage(sram_reg, + vsram - VOLT_TOL, + vsram); + + vproc = new_vproc; + } else { + ret = regulator_set_voltage(sram_reg, vsram, + vsram + VOLT_TOL); + + vproc = vsram - MIN_VOLT_SHIFT; + } + if (ret) + return ret; + + ret = regulator_set_voltage(proc_reg, vproc, + vproc + VOLT_TOL); + if (ret) { + regulator_set_voltage(sram_reg, old_vsram, + old_vsram); + return ret; + } + } while (vproc < new_vproc || vsram < new_vsram); + } else if (old_vproc > new_vproc) { + /* + * When scaling down voltages, Vsram and Vproc scale down step + * by step. At each step, set Vproc to (Vsram - 200mV) first, + * then set Vproc to (Vproc + 100mV). + * Keep doing it until Vsram and Vproc hit target voltages. + */ + do { + old_vproc = regulator_get_voltage(proc_reg); + old_vsram = regulator_get_voltage(sram_reg); + + vproc = max(new_vproc, old_vsram - MAX_VOLT_SHIFT); + ret = regulator_set_voltage(proc_reg, vproc, + vproc + VOLT_TOL); + if (ret) + return ret; + + if (vproc == new_vproc) + vsram = new_vsram; + else + vsram = max(new_vsram, vproc + MIN_VOLT_SHIFT); + + if (vsram + VOLT_TOL >= MAX_VOLT_LIMIT) { + vsram = MAX_VOLT_LIMIT; + + /* + * If the target Vsram hits the maximum voltage, + * try to set the exact voltage value first. + */ + ret = regulator_set_voltage(sram_reg, vsram, + vsram); + if (ret) + ret = regulator_set_voltage(sram_reg, + vsram - VOLT_TOL, + vsram); + } else { + ret = regulator_set_voltage(sram_reg, vsram, + vsram + VOLT_TOL); + } + + if (ret) { + regulator_set_voltage(proc_reg, old_vproc, + old_vproc); + return ret; + } + } while (vproc > new_vproc + VOLT_TOL || + vsram > new_vsram + VOLT_TOL); + } + + return 0; +} + +static int mtk_cpufreq_set_voltage(struct mtk_cpu_dvfs_info *info, int vproc) +{ + if (info->need_voltage_tracking) + return mtk_cpufreq_voltage_tracking(info, vproc); + else + return regulator_set_voltage(info->proc_reg, vproc, + vproc + VOLT_TOL); +} + +static int mtk_cpufreq_set_target(struct cpufreq_policy *policy, + unsigned int index) +{ + struct cpufreq_frequency_table *freq_table = policy->freq_table; + struct clk *cpu_clk = policy->clk; + struct clk *armpll = clk_get_parent(cpu_clk); + struct mtk_cpu_dvfs_info *info = policy->driver_data; + struct device *cpu_dev = info->cpu_dev; + struct dev_pm_opp *opp; + long freq_hz, old_freq_hz; + int vproc, old_vproc, inter_vproc, target_vproc, ret; + + inter_vproc = info->intermediate_voltage; + + old_freq_hz = clk_get_rate(cpu_clk); + old_vproc = regulator_get_voltage(info->proc_reg); + + freq_hz = freq_table[index].frequency * 1000; + + rcu_read_lock(); + opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_hz); + if (IS_ERR(opp)) { + rcu_read_unlock(); + pr_err("cpu%d: failed to find OPP for %ld\n", + policy->cpu, freq_hz); + return PTR_ERR(opp); + } + vproc = dev_pm_opp_get_voltage(opp); + rcu_read_unlock(); + + /* + * If the new voltage or the intermediate voltage is higher than the + * current voltage, scale up voltage first. + */ + target_vproc = (inter_vproc > vproc) ? inter_vproc : vproc; + if (old_vproc < target_vproc) { + ret = mtk_cpufreq_set_voltage(info, target_vproc); + if (ret) { + pr_err("cpu%d: failed to scale up voltage!\n", + policy->cpu); + mtk_cpufreq_set_voltage(info, old_vproc); + return ret; + } + } + + /* Reparent the CPU clock to intermediate clock. */ + ret = clk_set_parent(cpu_clk, info->inter_clk); + if (ret) { + pr_err("cpu%d: failed to re-parent cpu clock!\n", + policy->cpu); + mtk_cpufreq_set_voltage(info, old_vproc); + WARN_ON(1); + return ret; + } + + /* Set the original PLL to target rate. */ + ret = clk_set_rate(armpll, freq_hz); + if (ret) { + pr_err("cpu%d: failed to scale cpu clock rate!\n", + policy->cpu); + clk_set_parent(cpu_clk, armpll); + mtk_cpufreq_set_voltage(info, old_vproc); + return ret; + } + + /* Set parent of CPU clock back to the original PLL. */ + ret = clk_set_parent(cpu_clk, armpll); + if (ret) { + pr_err("cpu%d: failed to re-parent cpu clock!\n", + policy->cpu); + mtk_cpufreq_set_voltage(info, inter_vproc); + WARN_ON(1); + return ret; + } + + /* + * If the new voltage is lower than the intermediate voltage or the + * original voltage, scale down to the new voltage. + */ + if (vproc < inter_vproc || vproc < old_vproc) { + ret = mtk_cpufreq_set_voltage(info, vproc); + if (ret) { + pr_err("cpu%d: failed to scale down voltage!\n", + policy->cpu); + clk_set_parent(cpu_clk, info->inter_clk); + clk_set_rate(armpll, old_freq_hz); + clk_set_parent(cpu_clk, armpll); + return ret; + } + } + + return 0; +} + +static void mtk_cpufreq_ready(struct cpufreq_policy *policy) +{ + struct mtk_cpu_dvfs_info *info = policy->driver_data; + struct device_node *np = of_node_get(info->cpu_dev->of_node); + + if (WARN_ON(!np)) + return; + + if (of_find_property(np, "#cooling-cells", NULL)) { + info->cdev = of_cpufreq_cooling_register(np, + policy->related_cpus); + + if (IS_ERR(info->cdev)) { + dev_err(info->cpu_dev, + "running cpufreq without cooling device: %ld\n", + PTR_ERR(info->cdev)); + + info->cdev = NULL; + } + } + + of_node_put(np); +} + +static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) +{ + struct device *cpu_dev; + struct regulator *proc_reg = ERR_PTR(-ENODEV); + struct regulator *sram_reg = ERR_PTR(-ENODEV); + struct clk *cpu_clk = ERR_PTR(-ENODEV); + struct clk *inter_clk = ERR_PTR(-ENODEV); + struct dev_pm_opp *opp; + unsigned long rate; + int ret; + + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) { + pr_err("failed to get cpu%d device\n", cpu); + return -ENODEV; + } + + cpu_clk = clk_get(cpu_dev, "cpu"); + if (IS_ERR(cpu_clk)) { + if (PTR_ERR(cpu_clk) == -EPROBE_DEFER) + pr_warn("cpu clk for cpu%d not ready, retry.\n", cpu); + else + pr_err("failed to get cpu clk for cpu%d\n", cpu); + + ret = PTR_ERR(cpu_clk); + return ret; + } + + inter_clk = clk_get(cpu_dev, "intermediate"); + if (IS_ERR(inter_clk)) { + if (PTR_ERR(inter_clk) == -EPROBE_DEFER) + pr_warn("intermediate clk for cpu%d not ready, retry.\n", + cpu); + else + pr_err("failed to get intermediate clk for cpu%d\n", + cpu); + + ret = PTR_ERR(inter_clk); + goto out_free_resources; + } + + proc_reg = regulator_get_exclusive(cpu_dev, "proc"); + if (IS_ERR(proc_reg)) { + if (PTR_ERR(proc_reg) == -EPROBE_DEFER) + pr_warn("proc regulator for cpu%d not ready, retry.\n", + cpu); + else + pr_err("failed to get proc regulator for cpu%d\n", + cpu); + + ret = PTR_ERR(proc_reg); + goto out_free_resources; + } + + /* Both presence and absence of sram regulator are valid cases. */ + sram_reg = regulator_get_exclusive(cpu_dev, "sram"); + + ret = of_init_opp_table(cpu_dev); + if (ret) { + pr_warn("no OPP table for cpu%d\n", cpu); + goto out_free_resources; + } + + /* Search a safe voltage for intermediate frequency. */ + rate = clk_get_rate(inter_clk); + rcu_read_lock(); + opp = dev_pm_opp_find_freq_ceil(cpu_dev, &rate); + if (IS_ERR(opp)) { + rcu_read_unlock(); + pr_err("failed to get intermediate opp for cpu%d\n", cpu); + ret = PTR_ERR(opp); + goto out_free_opp_table; + } + info->intermediate_voltage = dev_pm_opp_get_voltage(opp); + rcu_read_unlock(); + + info->cpu_dev = cpu_dev; + info->proc_reg = proc_reg; + info->sram_reg = IS_ERR(sram_reg) ? NULL : sram_reg; + info->cpu_clk = cpu_clk; + info->inter_clk = inter_clk; + + /* + * If SRAM regulator is present, software "voltage tracking" is needed + * for this CPU power domain. + */ + info->need_voltage_tracking = !IS_ERR(sram_reg); + + return 0; + +out_free_opp_table: + of_free_opp_table(cpu_dev); + +out_free_resources: + if (!IS_ERR(proc_reg)) + regulator_put(proc_reg); + if (!IS_ERR(sram_reg)) + regulator_put(sram_reg); + if (!IS_ERR(cpu_clk)) + clk_put(cpu_clk); + if (!IS_ERR(inter_clk)) + clk_put(inter_clk); + + return ret; +} + +static void mtk_cpu_dvfs_info_release(struct mtk_cpu_dvfs_info *info) +{ + if (!IS_ERR(info->proc_reg)) + regulator_put(info->proc_reg); + if (!IS_ERR(info->sram_reg)) + regulator_put(info->sram_reg); + if (!IS_ERR(info->cpu_clk)) + clk_put(info->cpu_clk); + if (!IS_ERR(info->inter_clk)) + clk_put(info->inter_clk); + + of_free_opp_table(info->cpu_dev); +} + +static int mtk_cpufreq_init(struct cpufreq_policy *policy) +{ + struct mtk_cpu_dvfs_info *info; + struct cpufreq_frequency_table *freq_table; + int ret; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + ret = mtk_cpu_dvfs_info_init(info, policy->cpu); + if (ret) { + pr_err("%s failed to initialize dvfs info for cpu%d\n", + __func__, policy->cpu); + goto out_free_dvfs_info; + } + + ret = dev_pm_opp_init_cpufreq_table(info->cpu_dev, &freq_table); + if (ret) { + pr_err("failed to init cpufreq table for cpu%d: %d\n", + policy->cpu, ret); + goto out_release_dvfs_info; + } + + ret = cpufreq_table_validate_and_show(policy, freq_table); + if (ret) { + pr_err("%s: invalid frequency table: %d\n", __func__, ret); + goto out_free_cpufreq_table; + } + + /* CPUs in the same cluster share a clock and power domain. */ + cpumask_copy(policy->cpus, &cpu_topology[policy->cpu].core_sibling); + policy->driver_data = info; + policy->clk = info->cpu_clk; + + return 0; + +out_free_cpufreq_table: + dev_pm_opp_free_cpufreq_table(info->cpu_dev, &freq_table); + +out_release_dvfs_info: + mtk_cpu_dvfs_info_release(info); + +out_free_dvfs_info: + kfree(info); + + return ret; +} + +static int mtk_cpufreq_exit(struct cpufreq_policy *policy) +{ + struct mtk_cpu_dvfs_info *info = policy->driver_data; + + cpufreq_cooling_unregister(info->cdev); + dev_pm_opp_free_cpufreq_table(info->cpu_dev, &policy->freq_table); + mtk_cpu_dvfs_info_release(info); + kfree(info); + + return 0; +} + +static struct cpufreq_driver mt8173_cpufreq_driver = { + .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .verify = cpufreq_generic_frequency_table_verify, + .target_index = mtk_cpufreq_set_target, + .get = cpufreq_generic_get, + .init = mtk_cpufreq_init, + .exit = mtk_cpufreq_exit, + .ready = mtk_cpufreq_ready, + .name = "mtk-cpufreq", + .attr = cpufreq_generic_attr, +}; + +static int mt8173_cpufreq_probe(struct platform_device *pdev) +{ + int ret; + + ret = cpufreq_register_driver(&mt8173_cpufreq_driver); + if (ret) + pr_err("failed to register mtk cpufreq driver\n"); + + return ret; +} + +static struct platform_driver mt8173_cpufreq_platdrv = { + .driver = { + .name = "mt8173-cpufreq", + }, + .probe = mt8173_cpufreq_probe, +}; + +static int mt8173_cpufreq_driver_init(void) +{ + struct platform_device *pdev; + int err; + + if (!of_machine_is_compatible("mediatek,mt8173")) + return -ENODEV; + + err = platform_driver_register(&mt8173_cpufreq_platdrv); + if (err) + return err; + + /* + * Since there's no place to hold device registration code and no + * device tree based way to match cpufreq driver yet, both the driver + * and the device registration codes are put here to handle defer + * probing. + */ + pdev = platform_device_register_simple("mt8173-cpufreq", -1, NULL, 0); + if (IS_ERR(pdev)) { + pr_err("failed to register mtk-cpufreq platform device\n"); + return PTR_ERR(pdev); + } + + return 0; +} +device_initcall(mt8173_cpufreq_driver_init); From 6bfb7c7434f75d29241413dc7e784295ba56de98 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 3 Aug 2015 08:36:14 +0530 Subject: [PATCH 44/53] cpufreq: remove redundant CPUFREQ_INCOMPATIBLE notifier event What's being done from CPUFREQ_INCOMPATIBLE, can also be done with CPUFREQ_ADJUST. There is nothing special with CPUFREQ_INCOMPATIBLE notifier. Kill CPUFREQ_INCOMPATIBLE and fix its usage sites. This also updates the numbering of notifier events to remove holes. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/core.txt | 7 ++----- drivers/acpi/processor_perflib.c | 2 +- drivers/cpufreq/cpufreq.c | 4 ---- drivers/cpufreq/ppc_cbe_cpufreq_pmi.c | 4 ++-- drivers/video/fbdev/pxafb.c | 1 - drivers/video/fbdev/sa1100fb.c | 1 - include/linux/cpufreq.h | 9 ++++----- 7 files changed, 9 insertions(+), 19 deletions(-) diff --git a/Documentation/cpu-freq/core.txt b/Documentation/cpu-freq/core.txt index 70933eadc308..ba78e7c2a069 100644 --- a/Documentation/cpu-freq/core.txt +++ b/Documentation/cpu-freq/core.txt @@ -55,16 +55,13 @@ transition notifiers. ---------------------------- These are notified when a new policy is intended to be set. Each -CPUFreq policy notifier is called three times for a policy transition: +CPUFreq policy notifier is called twice for a policy transition: 1.) During CPUFREQ_ADJUST all CPUFreq notifiers may change the limit if they see a need for this - may it be thermal considerations or hardware limitations. -2.) During CPUFREQ_INCOMPATIBLE only changes may be done in order to avoid - hardware failure. - -3.) And during CPUFREQ_NOTIFY all notifiers are informed of the new policy +2.) And during CPUFREQ_NOTIFY all notifiers are informed of the new policy - if two hardware drivers failed to agree on a new policy before this stage, the incompatible hardware shall be shut down, and the user informed of this. diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 36b6da2918a6..91941f1cdecb 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -87,7 +87,7 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb, if (ignore_ppc) return 0; - if (event != CPUFREQ_INCOMPATIBLE) + if (event != CPUFREQ_ADJUST) return 0; mutex_lock(&performance_mutex); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 76a26609d96b..293f47b814bf 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2206,10 +2206,6 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_ADJUST, new_policy); - /* adjust if necessary - hardware incompatibility*/ - blocking_notifier_call_chain(&cpufreq_policy_notifier_list, - CPUFREQ_INCOMPATIBLE, new_policy); - /* * verify the cpu speed can be set within this limit, which might be * different to the first one diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c index d29e8da396a0..7969f7690498 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c @@ -97,8 +97,8 @@ static int pmi_notifier(struct notifier_block *nb, struct cpufreq_frequency_table *cbe_freqs; u8 node; - /* Should this really be called for CPUFREQ_ADJUST, CPUFREQ_INCOMPATIBLE - * and CPUFREQ_NOTIFY policy events?) + /* Should this really be called for CPUFREQ_ADJUST and CPUFREQ_NOTIFY + * policy events?) */ if (event == CPUFREQ_START) return 0; diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c index 7245611ec963..94813af97f09 100644 --- a/drivers/video/fbdev/pxafb.c +++ b/drivers/video/fbdev/pxafb.c @@ -1668,7 +1668,6 @@ pxafb_freq_policy(struct notifier_block *nb, unsigned long val, void *data) switch (val) { case CPUFREQ_ADJUST: - case CPUFREQ_INCOMPATIBLE: pr_debug("min dma period: %d ps, " "new clock %d kHz\n", pxafb_display_dma_period(var), policy->max); diff --git a/drivers/video/fbdev/sa1100fb.c b/drivers/video/fbdev/sa1100fb.c index 89dd7e02197f..dcf774c15889 100644 --- a/drivers/video/fbdev/sa1100fb.c +++ b/drivers/video/fbdev/sa1100fb.c @@ -1042,7 +1042,6 @@ sa1100fb_freq_policy(struct notifier_block *nb, unsigned long val, switch (val) { case CPUFREQ_ADJUST: - case CPUFREQ_INCOMPATIBLE: dev_dbg(fbi->dev, "min dma period: %d ps, " "new clock %d kHz\n", sa1100fb_min_dma_period(fbi), policy->max); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index bde1e567b3a9..bedcc90c0757 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -369,11 +369,10 @@ static inline void cpufreq_resume(void) {} /* Policy Notifiers */ #define CPUFREQ_ADJUST (0) -#define CPUFREQ_INCOMPATIBLE (1) -#define CPUFREQ_NOTIFY (2) -#define CPUFREQ_START (3) -#define CPUFREQ_CREATE_POLICY (4) -#define CPUFREQ_REMOVE_POLICY (5) +#define CPUFREQ_NOTIFY (1) +#define CPUFREQ_START (2) +#define CPUFREQ_CREATE_POLICY (3) +#define CPUFREQ_REMOVE_POLICY (4) #ifdef CONFIG_CPU_FREQ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list); From 8fa5b631f32238a16ae3db0db5b354f7b9eb20cb Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 3 Aug 2015 08:36:15 +0530 Subject: [PATCH 45/53] cpufreq: use memcpy() to copy policy cpufreq_get_policy() is useful if the pointer to policy isn't available in advance. But if it is available, then there is no need to call cpufreq_get_policy(). Directly use memcpy() to copy the policy. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 293f47b814bf..86d69416821b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -606,9 +606,7 @@ static ssize_t store_##file_name \ int ret, temp; \ struct cpufreq_policy new_policy; \ \ - ret = cpufreq_get_policy(&new_policy, policy->cpu); \ - if (ret) \ - return -EINVAL; \ + memcpy(&new_policy, policy, sizeof(*policy)); \ \ ret = sscanf(buf, "%u", &new_policy.object); \ if (ret != 1) \ @@ -662,9 +660,7 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, char str_governor[16]; struct cpufreq_policy new_policy; - ret = cpufreq_get_policy(&new_policy, policy->cpu); - if (ret) - return ret; + memcpy(&new_policy, policy, sizeof(*policy)); ret = sscanf(buf, "%15s", str_governor); if (ret != 1) From 14ca0bdfdd6b422027b9b733abb0bf151811eaa7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 3 Aug 2015 08:36:16 +0530 Subject: [PATCH 46/53] cpufreq: update user_policy.* on success 'user_policy' caches properties of a policy that are set by userspace. And these must be updated only if cpufreq core was successful in updating them based on request from user space. In store_scaling_governor(), we are updating user_policy.policy and user_policy.governor even if cpufreq_set_policy() failed. That's incorrect. Fix this by updating user_policy.* only if we were successful in updating the properties. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 86d69416821b..8e71d8e08439 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -671,14 +671,12 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, return -EINVAL; ret = cpufreq_set_policy(policy, &new_policy); + if (ret) + return ret; policy->user_policy.policy = policy->policy; policy->user_policy.governor = policy->governor; - - if (ret) - return ret; - else - return count; + return count; } /** From e27f8bd248756310a6df8b67f96d41d5a693642c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 3 Aug 2015 08:36:17 +0530 Subject: [PATCH 47/53] cpufreq: remove redundant 'governor' field from user_policy Its always same as policy->governor, and there is no need to keep another copy of it. Remove it. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 7 ++----- include/linux/cpufreq.h | 1 - 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8e71d8e08439..3033952391fe 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -675,7 +675,6 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, return ret; policy->user_policy.policy = policy->policy; - policy->user_policy.governor = policy->governor; return count; } @@ -1323,10 +1322,9 @@ static int cpufreq_online(unsigned int cpu) goto out_exit_policy; } - if (new_policy) { + if (new_policy) policy->user_policy.policy = policy->policy; - policy->user_policy.governor = policy->governor; - } + up_write(&policy->rwsem); kobject_uevent(&policy->kobj, KOBJ_ADD); @@ -2305,7 +2303,6 @@ int cpufreq_update_policy(unsigned int cpu) new_policy.min = policy->user_policy.min; new_policy.max = policy->user_policy.max; new_policy.policy = policy->user_policy.policy; - new_policy.governor = policy->user_policy.governor; /* * BIOS might change freq behind our back diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index bedcc90c0757..752bf8a5c314 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -55,7 +55,6 @@ struct cpufreq_real_policy { unsigned int min; /* in kHz */ unsigned int max; /* in kHz */ unsigned int policy; /* see above */ - struct cpufreq_governor *governor; /* see below */ }; struct cpufreq_policy { From 88dc4384958759510db248bf9158434ac783d407 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 3 Aug 2015 08:36:18 +0530 Subject: [PATCH 48/53] cpufreq: remove redundant 'policy' field from user_policy Its always same as policy->policy, and there is no need to keep another copy of it. Remove it. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 10 +--------- include/linux/cpufreq.h | 1 - 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 3033952391fe..a80bd68bbd74 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -671,11 +671,7 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, return -EINVAL; ret = cpufreq_set_policy(policy, &new_policy); - if (ret) - return ret; - - policy->user_policy.policy = policy->policy; - return count; + return ret ? ret : count; } /** @@ -1322,9 +1318,6 @@ static int cpufreq_online(unsigned int cpu) goto out_exit_policy; } - if (new_policy) - policy->user_policy.policy = policy->policy; - up_write(&policy->rwsem); kobject_uevent(&policy->kobj, KOBJ_ADD); @@ -2302,7 +2295,6 @@ int cpufreq_update_policy(unsigned int cpu) memcpy(&new_policy, policy, sizeof(*policy)); new_policy.min = policy->user_policy.min; new_policy.max = policy->user_policy.max; - new_policy.policy = policy->user_policy.policy; /* * BIOS might change freq behind our back diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 752bf8a5c314..54dbbff0a55e 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -54,7 +54,6 @@ struct cpufreq_cpuinfo { struct cpufreq_real_policy { unsigned int min; /* in kHz */ unsigned int max; /* in kHz */ - unsigned int policy; /* see above */ }; struct cpufreq_policy { From c7a7b418dd1991079dd7ef03fec7d1863ef96154 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 3 Aug 2015 08:36:19 +0530 Subject: [PATCH 49/53] cpufreq: rename cpufreq_real_policy as cpufreq_user_policy Its all about caching min/max freq requested by userspace, and the name 'cpufreq_real_policy' doesn't fit that well. Rename it to cpufreq_user_policy. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 54dbbff0a55e..6ff6a4d95eea 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -51,7 +51,7 @@ struct cpufreq_cpuinfo { unsigned int transition_latency; }; -struct cpufreq_real_policy { +struct cpufreq_user_policy { unsigned int min; /* in kHz */ unsigned int max; /* in kHz */ }; @@ -86,7 +86,7 @@ struct cpufreq_policy { struct work_struct update; /* if update_policy() needs to be * called, but you're in IRQ context */ - struct cpufreq_real_policy user_policy; + struct cpufreq_user_policy user_policy; struct cpufreq_frequency_table *freq_table; struct list_head policy_list; From 36dfef23cd26a6d3b71dd86509e34d311f1cd906 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 3 Aug 2015 08:36:20 +0530 Subject: [PATCH 50/53] cpufreq: drop !cpufreq_driver check from cpufreq_parse_governor() Driver is guaranteed to be present on a call to cpufreq_parse_governor() and there is no need to check for !cpufreq_driver. Drop it. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a80bd68bbd74..a05cc75cc45d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -520,9 +520,6 @@ static int cpufreq_parse_governor(char *str_governor, unsigned int *policy, { int err = -EINVAL; - if (!cpufreq_driver) - goto out; - if (cpufreq_driver->setpolicy) { if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) { *policy = CPUFREQ_POLICY_PERFORMANCE; @@ -557,7 +554,6 @@ static int cpufreq_parse_governor(char *str_governor, unsigned int *policy, mutex_unlock(&cpufreq_governor_mutex); } -out: return err; } From a482e5562e48d89ea50f41f9fc6ed3a9768de2ff Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 7 Aug 2015 09:59:17 +0200 Subject: [PATCH 51/53] cpufreq: sfi: use kmemdup rather than duplicating its implementation The patch was generated using fixed coccinelle semantic patch scripts/coccinelle/api/memdup.cocci [1]. [1]: http://permalink.gmane.org/gmane.linux.kernel/2014320 Signed-off-by: Andrzej Hajda Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/sfi-cpufreq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/cpufreq/sfi-cpufreq.c b/drivers/cpufreq/sfi-cpufreq.c index ffa3389e535b..992ce6f9abec 100644 --- a/drivers/cpufreq/sfi-cpufreq.c +++ b/drivers/cpufreq/sfi-cpufreq.c @@ -45,12 +45,10 @@ static int sfi_parse_freq(struct sfi_table_header *table) pentry = (struct sfi_freq_table_entry *)sb->pentry; totallen = num_freq_table_entries * sizeof(*pentry); - sfi_cpufreq_array = kzalloc(totallen, GFP_KERNEL); + sfi_cpufreq_array = kmemdup(pentry, totallen, GFP_KERNEL); if (!sfi_cpufreq_array) return -ENOMEM; - memcpy(sfi_cpufreq_array, pentry, totallen); - return 0; } From 309d0631cc329ca1051c631c89e0acc9b752cb4d Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Thu, 27 Aug 2015 14:41:44 +0530 Subject: [PATCH 52/53] cpufreq: powernv: Increase the verbosity of OCC console messages Modify the OCC reset/load/active event message to make it clearer for the user to understand the event and effect of the event. Suggested-by: Stewart Smith Signed-off-by: Shilpasri G Bhat Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 546e056e416d..64994e10638e 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -465,6 +465,7 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb, switch (omsg.type) { case OCC_RESET: occ_reset = true; + pr_info("OCC (On Chip Controller - enforces hard thermal/power limits) Resetting\n"); /* * powernv_cpufreq_throttle_check() is called in * target() callback which can detect the throttle state @@ -474,12 +475,12 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb, */ if (!throttled) { throttled = true; - pr_crit("CPU Frequency is throttled\n"); + pr_crit("CPU frequency is throttled for duration\n"); } - pr_info("OCC: Reset\n"); + break; case OCC_LOAD: - pr_info("OCC: Loaded\n"); + pr_info("OCC Loading, CPU frequency is throttled until OCC is started\n"); break; case OCC_THROTTLE: omsg.chip = be64_to_cpu(msg->params[1]); @@ -488,7 +489,7 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb, if (occ_reset) { occ_reset = false; throttled = false; - pr_info("OCC: Active\n"); + pr_info("OCC Active, CPU frequency is no longer throttled\n"); for (i = 0; i < nr_chips; i++) { chips[i].restore = true; From 72e624de6e6f0d5a638fbc23842aa76ae048e9e7 Mon Sep 17 00:00:00 2001 From: Abhilash Jindal Date: Tue, 11 Aug 2015 12:01:22 -0400 Subject: [PATCH 53/53] cpufreq: speedstep-lib: Use monotonic clock Wall time obtained from do_gettimeofday is susceptible to sudden jumps due to user setting the time or due to NTP. Monotonic time is constantly increasing time better suited for comparing two timestamps. Signed-off-by: Abhilash Jindal Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/speedstep-lib.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c index 4ab7a2156672..15d3214aaa00 100644 --- a/drivers/cpufreq/speedstep-lib.c +++ b/drivers/cpufreq/speedstep-lib.c @@ -386,7 +386,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor, unsigned int prev_speed; unsigned int ret = 0; unsigned long flags; - struct timeval tv1, tv2; + ktime_t tv1, tv2; if ((!processor) || (!low_speed) || (!high_speed) || (!set_state)) return -EINVAL; @@ -415,14 +415,14 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor, /* start latency measurement */ if (transition_latency) - do_gettimeofday(&tv1); + tv1 = ktime_get(); /* switch to high state */ set_state(SPEEDSTEP_HIGH); /* end latency measurement */ if (transition_latency) - do_gettimeofday(&tv2); + tv2 = ktime_get(); *high_speed = speedstep_get_frequency(processor); if (!*high_speed) { @@ -442,8 +442,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor, set_state(SPEEDSTEP_LOW); if (transition_latency) { - *transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC + - tv2.tv_usec - tv1.tv_usec; + *transition_latency = ktime_to_us(ktime_sub(tv2, tv1)); pr_debug("transition latency is %u uSec\n", *transition_latency); /* convert uSec to nSec and add 20% for safety reasons */